Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ethernet/intel
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
324 files changed, 354187 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
new file mode 100644
index 0000000000..9bc0a95198
--- /dev/null
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -0,0 +1,359 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Intel network device configuration
+#
+
+config NET_VENDOR_INTEL
+	bool "Intel devices"
+	default y
+	help
+	  If you have a network (Ethernet) card belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Intel cards. If you say Y, you will be asked for
+	  your specific card in the following questions.
+
+if NET_VENDOR_INTEL
+
+config E100
+	tristate "Intel(R) PRO/100+ support"
+	depends on PCI
+	select MII
+	help
+	  This driver supports Intel(R) PRO/100 family of adapters.
+	  To verify that your adapter is supported, find the board ID number
+	  on the adapter. Look for a label that has a barcode and a number
+	  in the format 123456-001 (six digits hyphen three digits).
+
+	  Use the above information and the Adapter & Driver ID Guide that
+	  can be located at:
+
+	  <http://support.intel.com>
+
+	  to identify the adapter.
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/e100.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called e100.
+
+config E1000
+	tristate "Intel(R) PRO/1000 Gigabit Ethernet support"
+	depends on PCI
+	help
+	  This driver supports Intel(R) PRO/1000 gigabit ethernet family of
+	  adapters.  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/e1000.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called e1000.
+
+config E1000E
+	tristate "Intel(R) PRO/1000 PCI-Express Gigabit Ethernet support"
+	depends on PCI && (!SPARC32 || BROKEN)
+	depends on PTP_1588_CLOCK_OPTIONAL
+	select CRC32
+	help
+	  This driver supports the PCI-Express Intel(R) PRO/1000 gigabit
+	  ethernet family of adapters. For PCI or PCI-X e1000 adapters,
+	  use the regular e1000 driver For more information on how to
+	  identify your adapter, go to the Adapter & Driver ID Guide that
+	  can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/e1000e.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called e1000e.
+
+config E1000E_HWTS
+	bool "Support HW cross-timestamp on PCH devices"
+	default y
+	depends on E1000E && X86
+	help
+	 Say Y to enable hardware supported cross-timestamping on PCH
+	 devices. The cross-timestamp is available through the PTP clock
+	 driver precise cross-timestamp ioctl (PTP_SYS_OFFSET_PRECISE).
+
+config IGB
+	tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support"
+	depends on PCI
+	depends on PTP_1588_CLOCK_OPTIONAL
+	select I2C
+	select I2C_ALGOBIT
+	help
+	  This driver supports Intel(R) 82575/82576 gigabit ethernet family of
+	  adapters.  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/igb.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called igb.
+
+config IGB_HWMON
+	bool "Intel(R) PCI-Express Gigabit adapters HWMON support"
+	default y
+	depends on IGB && HWMON && !(IGB=y && HWMON=m)
+	help
+	  Say Y if you want to expose thermal sensor data on Intel devices.
+
+	  Some of our devices contain thermal sensors, both external and internal.
+	  This data is available via the hwmon sysfs interface and exposes
+	  the onboard sensors.
+
+config IGB_DCA
+	bool "Direct Cache Access (DCA) Support"
+	default y
+	depends on IGB && DCA && !(IGB=y && DCA=m)
+	help
+	  Say Y here if you want to use Direct Cache Access (DCA) in the
+	  driver.  DCA is a method for warming the CPU cache before data
+	  is used, with the intent of lessening the impact of cache misses.
+
+config IGBVF
+	tristate "Intel(R) 82576 Virtual Function Ethernet support"
+	depends on PCI
+	help
+	  This driver supports Intel(R) 82576 virtual functions.  For more
+	  information on how to identify your adapter, go to the Adapter &
+	  Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/igbvf.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called igbvf.
+
+config IXGBE
+	tristate "Intel(R) 10GbE PCI Express adapters support"
+	depends on PCI
+	depends on PTP_1588_CLOCK_OPTIONAL
+	select MDIO
+	select PHYLIB
+	help
+	  This driver supports Intel(R) 10GbE PCI Express family of
+	  adapters.  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called ixgbe.
+
+config IXGBE_HWMON
+	bool "Intel(R) 10GbE PCI Express adapters HWMON support"
+	default y
+	depends on IXGBE && HWMON && !(IXGBE=y && HWMON=m)
+	help
+	  Say Y if you want to expose the thermal sensor data on some of
+	  our cards, via a hwmon sysfs interface.
+
+config IXGBE_DCA
+	bool "Direct Cache Access (DCA) Support"
+	default y
+	depends on IXGBE && DCA && !(IXGBE=y && DCA=m)
+	help
+	  Say Y here if you want to use Direct Cache Access (DCA) in the
+	  driver.  DCA is a method for warming the CPU cache before data
+	  is used, with the intent of lessening the impact of cache misses.
+
+config IXGBE_DCB
+	bool "Data Center Bridging (DCB) Support"
+	default n
+	depends on IXGBE && DCB
+	help
+	  Say Y here if you want to use Data Center Bridging (DCB) in the
+	  driver.
+
+	  If unsure, say N.
+
+config IXGBE_IPSEC
+	bool "IPSec XFRM cryptography-offload acceleration"
+	depends on IXGBE
+	depends on XFRM_OFFLOAD
+	default y
+	select XFRM_ALGO
+	help
+	  Enable support for IPSec offload in ixgbe.ko
+
+config IXGBEVF
+	tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support"
+	depends on PCI_MSI
+	help
+	  This driver supports Intel(R) PCI Express virtual functions for the
+	  Intel(R) ixgbe driver.  For more information on how to identify your
+	  adapter, go to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called ixgbevf.  MSI-X interrupt support is required
+	  for this driver to work correctly.
+
+config IXGBEVF_IPSEC
+	bool "IPSec XFRM cryptography-offload acceleration"
+	depends on IXGBEVF
+	depends on XFRM_OFFLOAD
+	default y
+	select XFRM_ALGO
+	help
+	  Enable support for IPSec offload in ixgbevf.ko
+
+config I40E
+	tristate "Intel(R) Ethernet Controller XL710 Family support"
+	depends on PTP_1588_CLOCK_OPTIONAL
+	depends on PCI
+	select AUXILIARY_BUS
+	help
+	  This driver supports Intel(R) Ethernet Controller XL710 Family of
+	  devices.  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/i40e.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called i40e.
+
+config I40E_DCB
+	bool "Data Center Bridging (DCB) Support"
+	default n
+	depends on I40E && DCB
+	help
+	  Say Y here if you want to use Data Center Bridging (DCB) in the
+	  driver.
+
+	  If unsure, say N.
+
+# this is here to allow seamless migration from I40EVF --> IAVF name
+# so that CONFIG_IAVF symbol will always mirror the state of CONFIG_I40EVF
+config IAVF
+	tristate
+config I40EVF
+	tristate "Intel(R) Ethernet Adaptive Virtual Function support"
+	select IAVF
+	depends on PCI_MSI
+	help
+	  This driver supports virtual functions for Intel XL710,
+	  X710, X722, XXV710, and all devices advertising support for
+	  Intel Ethernet Adaptive Virtual Function devices. For more
+	  information on how to identify your adapter, go to the Adapter
+	  & Driver ID Guide that can be located at:
+
+	  <https://support.intel.com>
+
+	  This driver was formerly named i40evf.
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/iavf.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called iavf.  MSI-X interrupt support is required
+	  for this driver to work correctly.
+
+config ICE
+	tristate "Intel(R) Ethernet Connection E800 Series Support"
+	default n
+	depends on PCI_MSI
+	depends on PTP_1588_CLOCK_OPTIONAL
+	depends on GNSS || GNSS = n
+	select AUXILIARY_BUS
+	select DIMLIB
+	select NET_DEVLINK
+	select PLDMFW
+	help
+	  This driver supports Intel(R) Ethernet Connection E800 Series of
+	  devices.  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/ice.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called ice.
+
+config ICE_SWITCHDEV
+	bool "Switchdev Support"
+	default y
+	depends on ICE && NET_SWITCHDEV
+	help
+	  Switchdev support provides internal SRIOV packet steering and switching.
+
+	  To enable it on running kernel use devlink tool:
+	  #devlink dev eswitch set pci/0000:XX:XX.X mode switchdev
+
+	  Say Y here if you want to use Switchdev in the driver.
+
+	  If unsure, say N.
+
+config ICE_HWTS
+	bool "Support HW cross-timestamp on platforms with PTM support"
+	default y
+	depends on ICE && X86
+	help
+	  Say Y to enable hardware supported cross-timestamping on platforms
+	  with PCIe PTM support. The cross-timestamp is available through
+	  the PTP clock driver precise cross-timestamp ioctl
+	  (PTP_SYS_OFFSET_PRECISE).
+
+config FM10K
+	tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support"
+	default n
+	depends on PCI_MSI
+	depends on PTP_1588_CLOCK_OPTIONAL
+	help
+	  This driver supports Intel(R) FM10000 Ethernet Switch Host
+	  Interface.  For more information on how to identify your adapter,
+	  go to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  More specific information on configuring the driver is in
+	  <file:Documentation/networking/device_drivers/ethernet/intel/fm10k.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called fm10k.  MSI-X interrupt support is required
+
+config IGC
+	tristate "Intel(R) Ethernet Controller I225-LM/I225-V support"
+	default n
+	depends on PCI
+	depends on PTP_1588_CLOCK_OPTIONAL
+	help
+	  This driver supports Intel(R) Ethernet Controller I225-LM/I225-V
+	  family of adapters.
+
+	  For more information on how to identify your adapter, go
+	  to the Adapter & Driver ID Guide that can be located at:
+
+	  <http://support.intel.com>
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called igc.
+
+endif # NET_VENDOR_INTEL
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
new file mode 100644
index 0000000000..d80d041320
--- /dev/null
+++ b/drivers/net/ethernet/intel/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the Intel network device drivers.
+#
+
+obj-$(CONFIG_E100) += e100.o
+obj-$(CONFIG_E1000) += e1000/
+obj-$(CONFIG_E1000E) += e1000e/
+obj-$(CONFIG_IGB) += igb/
+obj-$(CONFIG_IGC) += igc/
+obj-$(CONFIG_IGBVF) += igbvf/
+obj-$(CONFIG_IXGBE) += ixgbe/
+obj-$(CONFIG_IXGBEVF) += ixgbevf/
+obj-$(CONFIG_I40E) += i40e/
+obj-$(CONFIG_IAVF) += iavf/
+obj-$(CONFIG_FM10K) += fm10k/
+obj-$(CONFIG_ICE) += ice/
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
new file mode 100644
index 0000000000..d3fdc29093
--- /dev/null
+++ b/drivers/net/ethernet/intel/e100.c
@@ -0,0 +1,3196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+/*
+ *	e100.c: Intel(R) PRO/100 ethernet driver
+ *
+ *	(Re)written 2003 by scott.feldman@intel.com.  Based loosely on
+ *	original e100 driver, but better described as a munging of
+ *	e100, e1000, eepro100, tg3, 8139cp, and other drivers.
+ *
+ *	References:
+ *		Intel 8255x 10/100 Mbps Ethernet Controller Family,
+ *		Open Source Software Developers Manual,
+ *		http://sourceforge.net/projects/e1000
+ *
+ *
+ *	                      Theory of Operation
+ *
+ *	I.   General
+ *
+ *	The driver supports Intel(R) 10/100 Mbps PCI Fast Ethernet
+ *	controller family, which includes the 82557, 82558, 82559, 82550,
+ *	82551, and 82562 devices.  82558 and greater controllers
+ *	integrate the Intel 82555 PHY.  The controllers are used in
+ *	server and client network interface cards, as well as in
+ *	LAN-On-Motherboard (LOM), CardBus, MiniPCI, and ICHx
+ *	configurations.  8255x supports a 32-bit linear addressing
+ *	mode and operates at 33Mhz PCI clock rate.
+ *
+ *	II.  Driver Operation
+ *
+ *	Memory-mapped mode is used exclusively to access the device's
+ *	shared-memory structure, the Control/Status Registers (CSR). All
+ *	setup, configuration, and control of the device, including queuing
+ *	of Tx, Rx, and configuration commands is through the CSR.
+ *	cmd_lock serializes accesses to the CSR command register.  cb_lock
+ *	protects the shared Command Block List (CBL).
+ *
+ *	8255x is highly MII-compliant and all access to the PHY go
+ *	through the Management Data Interface (MDI).  Consequently, the
+ *	driver leverages the mii.c library shared with other MII-compliant
+ *	devices.
+ *
+ *	Big- and Little-Endian byte order as well as 32- and 64-bit
+ *	archs are supported.  Weak-ordered memory and non-cache-coherent
+ *	archs are supported.
+ *
+ *	III. Transmit
+ *
+ *	A Tx skb is mapped and hangs off of a TCB.  TCBs are linked
+ *	together in a fixed-size ring (CBL) thus forming the flexible mode
+ *	memory structure.  A TCB marked with the suspend-bit indicates
+ *	the end of the ring.  The last TCB processed suspends the
+ *	controller, and the controller can be restarted by issue a CU
+ *	resume command to continue from the suspend point, or a CU start
+ *	command to start at a given position in the ring.
+ *
+ *	Non-Tx commands (config, multicast setup, etc) are linked
+ *	into the CBL ring along with Tx commands.  The common structure
+ *	used for both Tx and non-Tx commands is the Command Block (CB).
+ *
+ *	cb_to_use is the next CB to use for queuing a command; cb_to_clean
+ *	is the next CB to check for completion; cb_to_send is the first
+ *	CB to start on in case of a previous failure to resume.  CB clean
+ *	up happens in interrupt context in response to a CU interrupt.
+ *	cbs_avail keeps track of number of free CB resources available.
+ *
+ * 	Hardware padding of short packets to minimum packet size is
+ * 	enabled.  82557 pads with 7Eh, while the later controllers pad
+ * 	with 00h.
+ *
+ *	IV.  Receive
+ *
+ *	The Receive Frame Area (RFA) comprises a ring of Receive Frame
+ *	Descriptors (RFD) + data buffer, thus forming the simplified mode
+ *	memory structure.  Rx skbs are allocated to contain both the RFD
+ *	and the data buffer, but the RFD is pulled off before the skb is
+ *	indicated.  The data buffer is aligned such that encapsulated
+ *	protocol headers are u32-aligned.  Since the RFD is part of the
+ *	mapped shared memory, and completion status is contained within
+ *	the RFD, the RFD must be dma_sync'ed to maintain a consistent
+ *	view from software and hardware.
+ *
+ *	In order to keep updates to the RFD link field from colliding with
+ *	hardware writes to mark packets complete, we use the feature that
+ *	hardware will not write to a size 0 descriptor and mark the previous
+ *	packet as end-of-list (EL).   After updating the link, we remove EL
+ *	and only then restore the size such that hardware may use the
+ *	previous-to-end RFD.
+ *
+ *	Under typical operation, the  receive unit (RU) is start once,
+ *	and the controller happily fills RFDs as frames arrive.  If
+ *	replacement RFDs cannot be allocated, or the RU goes non-active,
+ *	the RU must be restarted.  Frame arrival generates an interrupt,
+ *	and Rx indication and re-allocation happen in the same context,
+ *	therefore no locking is required.  A software-generated interrupt
+ *	is generated from the watchdog to recover from a failed allocation
+ *	scenario where all Rx resources have been indicated and none re-
+ *	placed.
+ *
+ *	V.   Miscellaneous
+ *
+ * 	VLAN offloading of tagging, stripping and filtering is not
+ * 	supported, but driver will accommodate the extra 4-byte VLAN tag
+ * 	for processing by upper layers.  Tx/Rx Checksum offloading is not
+ * 	supported.  Tx Scatter/Gather is not supported.  Jumbo Frames is
+ * 	not supported (hardware limitation).
+ *
+ * 	MagicPacket(tm) WoL support is enabled/disabled via ethtool.
+ *
+ * 	Thanks to JC (jchapman@katalix.com) for helping with
+ * 	testing/troubleshooting the development driver.
+ *
+ * 	TODO:
+ * 	o several entry points race with dev->close
+ * 	o check for tx-no-resources/stop Q races with tx clean/wake Q
+ *
+ *	FIXES:
+ * 2005/12/02 - Michael O'Donnell <Michael.ODonnell at stratus dot com>
+ *	- Stratus87247: protect MDI control register manipulations
+ * 2009/06/01 - Andreas Mohr <andi at lisas dot de>
+ *      - add clean lowlevel I/O emulation for cards with MII-lacking PHYs
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/hardirq.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/skbuff.h>
+#include <linux/ethtool.h>
+#include <linux/string.h>
+#include <linux/firmware.h>
+#include <linux/rtnetlink.h>
+#include <asm/unaligned.h>
+
+
+#define DRV_NAME		"e100"
+#define DRV_DESCRIPTION		"Intel(R) PRO/100 Network Driver"
+#define DRV_COPYRIGHT		"Copyright(c) 1999-2006 Intel Corporation"
+
+#define E100_WATCHDOG_PERIOD	(2 * HZ)
+#define E100_NAPI_WEIGHT	16
+
+#define FIRMWARE_D101M		"e100/d101m_ucode.bin"
+#define FIRMWARE_D101S		"e100/d101s_ucode.bin"
+#define FIRMWARE_D102E		"e100/d102e_ucode.bin"
+
+MODULE_DESCRIPTION(DRV_DESCRIPTION);
+MODULE_AUTHOR(DRV_COPYRIGHT);
+MODULE_LICENSE("GPL v2");
+MODULE_FIRMWARE(FIRMWARE_D101M);
+MODULE_FIRMWARE(FIRMWARE_D101S);
+MODULE_FIRMWARE(FIRMWARE_D102E);
+
+static int debug = 3;
+static int eeprom_bad_csum_allow = 0;
+static int use_io = 0;
+module_param(debug, int, 0);
+module_param(eeprom_bad_csum_allow, int, 0);
+module_param(use_io, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+MODULE_PARM_DESC(eeprom_bad_csum_allow, "Allow bad eeprom checksums");
+MODULE_PARM_DESC(use_io, "Force use of i/o access mode");
+
+#define INTEL_8255X_ETHERNET_DEVICE(device_id, ich) {\
+	PCI_VENDOR_ID_INTEL, device_id, PCI_ANY_ID, PCI_ANY_ID, \
+	PCI_CLASS_NETWORK_ETHERNET << 8, 0xFFFF00, ich }
+static const struct pci_device_id e100_id_table[] = {
+	INTEL_8255X_ETHERNET_DEVICE(0x1029, 0),
+	INTEL_8255X_ETHERNET_DEVICE(0x1030, 0),
+	INTEL_8255X_ETHERNET_DEVICE(0x1031, 3),
+	INTEL_8255X_ETHERNET_DEVICE(0x1032, 3),
+	INTEL_8255X_ETHERNET_DEVICE(0x1033, 3),
+	INTEL_8255X_ETHERNET_DEVICE(0x1034, 3),
+	INTEL_8255X_ETHERNET_DEVICE(0x1038, 3),
+	INTEL_8255X_ETHERNET_DEVICE(0x1039, 4),
+	INTEL_8255X_ETHERNET_DEVICE(0x103A, 4),
+	INTEL_8255X_ETHERNET_DEVICE(0x103B, 4),
+	INTEL_8255X_ETHERNET_DEVICE(0x103C, 4),
+	INTEL_8255X_ETHERNET_DEVICE(0x103D, 4),
+	INTEL_8255X_ETHERNET_DEVICE(0x103E, 4),
+	INTEL_8255X_ETHERNET_DEVICE(0x1050, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1051, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1052, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1053, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1054, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1055, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1056, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1057, 5),
+	INTEL_8255X_ETHERNET_DEVICE(0x1059, 0),
+	INTEL_8255X_ETHERNET_DEVICE(0x1064, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x1065, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x1066, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x1067, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x1068, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x1069, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x106A, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x106B, 6),
+	INTEL_8255X_ETHERNET_DEVICE(0x1091, 7),
+	INTEL_8255X_ETHERNET_DEVICE(0x1092, 7),
+	INTEL_8255X_ETHERNET_DEVICE(0x1093, 7),
+	INTEL_8255X_ETHERNET_DEVICE(0x1094, 7),
+	INTEL_8255X_ETHERNET_DEVICE(0x1095, 7),
+	INTEL_8255X_ETHERNET_DEVICE(0x10fe, 7),
+	INTEL_8255X_ETHERNET_DEVICE(0x1209, 0),
+	INTEL_8255X_ETHERNET_DEVICE(0x1229, 0),
+	INTEL_8255X_ETHERNET_DEVICE(0x2449, 2),
+	INTEL_8255X_ETHERNET_DEVICE(0x2459, 2),
+	INTEL_8255X_ETHERNET_DEVICE(0x245D, 2),
+	INTEL_8255X_ETHERNET_DEVICE(0x27DC, 7),
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, e100_id_table);
+
+enum mac {
+	mac_82557_D100_A  = 0,
+	mac_82557_D100_B  = 1,
+	mac_82557_D100_C  = 2,
+	mac_82558_D101_A4 = 4,
+	mac_82558_D101_B0 = 5,
+	mac_82559_D101M   = 8,
+	mac_82559_D101S   = 9,
+	mac_82550_D102    = 12,
+	mac_82550_D102_C  = 13,
+	mac_82551_E       = 14,
+	mac_82551_F       = 15,
+	mac_82551_10      = 16,
+	mac_unknown       = 0xFF,
+};
+
+enum phy {
+	phy_100a     = 0x000003E0,
+	phy_100c     = 0x035002A8,
+	phy_82555_tx = 0x015002A8,
+	phy_nsc_tx   = 0x5C002000,
+	phy_82562_et = 0x033002A8,
+	phy_82562_em = 0x032002A8,
+	phy_82562_ek = 0x031002A8,
+	phy_82562_eh = 0x017002A8,
+	phy_82552_v  = 0xd061004d,
+	phy_unknown  = 0xFFFFFFFF,
+};
+
+/* CSR (Control/Status Registers) */
+struct csr {
+	struct {
+		u8 status;
+		u8 stat_ack;
+		u8 cmd_lo;
+		u8 cmd_hi;
+		u32 gen_ptr;
+	} scb;
+	u32 port;
+	u16 flash_ctrl;
+	u8 eeprom_ctrl_lo;
+	u8 eeprom_ctrl_hi;
+	u32 mdi_ctrl;
+	u32 rx_dma_count;
+};
+
+enum scb_status {
+	rus_no_res       = 0x08,
+	rus_ready        = 0x10,
+	rus_mask         = 0x3C,
+};
+
+enum ru_state  {
+	RU_SUSPENDED = 0,
+	RU_RUNNING	 = 1,
+	RU_UNINITIALIZED = -1,
+};
+
+enum scb_stat_ack {
+	stat_ack_not_ours    = 0x00,
+	stat_ack_sw_gen      = 0x04,
+	stat_ack_rnr         = 0x10,
+	stat_ack_cu_idle     = 0x20,
+	stat_ack_frame_rx    = 0x40,
+	stat_ack_cu_cmd_done = 0x80,
+	stat_ack_not_present = 0xFF,
+	stat_ack_rx = (stat_ack_sw_gen | stat_ack_rnr | stat_ack_frame_rx),
+	stat_ack_tx = (stat_ack_cu_idle | stat_ack_cu_cmd_done),
+};
+
+enum scb_cmd_hi {
+	irq_mask_none = 0x00,
+	irq_mask_all  = 0x01,
+	irq_sw_gen    = 0x02,
+};
+
+enum scb_cmd_lo {
+	cuc_nop        = 0x00,
+	ruc_start      = 0x01,
+	ruc_load_base  = 0x06,
+	cuc_start      = 0x10,
+	cuc_resume     = 0x20,
+	cuc_dump_addr  = 0x40,
+	cuc_dump_stats = 0x50,
+	cuc_load_base  = 0x60,
+	cuc_dump_reset = 0x70,
+};
+
+enum cuc_dump {
+	cuc_dump_complete       = 0x0000A005,
+	cuc_dump_reset_complete = 0x0000A007,
+};
+
+enum port {
+	software_reset  = 0x0000,
+	selftest        = 0x0001,
+	selective_reset = 0x0002,
+};
+
+enum eeprom_ctrl_lo {
+	eesk = 0x01,
+	eecs = 0x02,
+	eedi = 0x04,
+	eedo = 0x08,
+};
+
+enum mdi_ctrl {
+	mdi_write = 0x04000000,
+	mdi_read  = 0x08000000,
+	mdi_ready = 0x10000000,
+};
+
+enum eeprom_op {
+	op_write = 0x05,
+	op_read  = 0x06,
+	op_ewds  = 0x10,
+	op_ewen  = 0x13,
+};
+
+enum eeprom_offsets {
+	eeprom_cnfg_mdix  = 0x03,
+	eeprom_phy_iface  = 0x06,
+	eeprom_id         = 0x0A,
+	eeprom_config_asf = 0x0D,
+	eeprom_smbus_addr = 0x90,
+};
+
+enum eeprom_cnfg_mdix {
+	eeprom_mdix_enabled = 0x0080,
+};
+
+enum eeprom_phy_iface {
+	NoSuchPhy = 0,
+	I82553AB,
+	I82553C,
+	I82503,
+	DP83840,
+	S80C240,
+	S80C24,
+	I82555,
+	DP83840A = 10,
+};
+
+enum eeprom_id {
+	eeprom_id_wol = 0x0020,
+};
+
+enum eeprom_config_asf {
+	eeprom_asf = 0x8000,
+	eeprom_gcl = 0x4000,
+};
+
+enum cb_status {
+	cb_complete = 0x8000,
+	cb_ok       = 0x2000,
+};
+
+/*
+ * cb_command - Command Block flags
+ * @cb_tx_nc:  0: controller does CRC (normal),  1: CRC from skb memory
+ */
+enum cb_command {
+	cb_nop    = 0x0000,
+	cb_iaaddr = 0x0001,
+	cb_config = 0x0002,
+	cb_multi  = 0x0003,
+	cb_tx     = 0x0004,
+	cb_ucode  = 0x0005,
+	cb_dump   = 0x0006,
+	cb_tx_sf  = 0x0008,
+	cb_tx_nc  = 0x0010,
+	cb_cid    = 0x1f00,
+	cb_i      = 0x2000,
+	cb_s      = 0x4000,
+	cb_el     = 0x8000,
+};
+
+struct rfd {
+	__le16 status;
+	__le16 command;
+	__le32 link;
+	__le32 rbd;
+	__le16 actual_size;
+	__le16 size;
+};
+
+struct rx {
+	struct rx *next, *prev;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+};
+
+#if defined(__BIG_ENDIAN_BITFIELD)
+#define X(a,b)	b,a
+#else
+#define X(a,b)	a,b
+#endif
+struct config {
+/*0*/	u8 X(byte_count:6, pad0:2);
+/*1*/	u8 X(X(rx_fifo_limit:4, tx_fifo_limit:3), pad1:1);
+/*2*/	u8 adaptive_ifs;
+/*3*/	u8 X(X(X(X(mwi_enable:1, type_enable:1), read_align_enable:1),
+	   term_write_cache_line:1), pad3:4);
+/*4*/	u8 X(rx_dma_max_count:7, pad4:1);
+/*5*/	u8 X(tx_dma_max_count:7, dma_max_count_enable:1);
+/*6*/	u8 X(X(X(X(X(X(X(late_scb_update:1, direct_rx_dma:1),
+	   tno_intr:1), cna_intr:1), standard_tcb:1), standard_stat_counter:1),
+	   rx_save_overruns : 1), rx_save_bad_frames : 1);
+/*7*/	u8 X(X(X(X(X(rx_discard_short_frames:1, tx_underrun_retry:2),
+	   pad7:2), rx_extended_rfd:1), tx_two_frames_in_fifo:1),
+	   tx_dynamic_tbd:1);
+/*8*/	u8 X(X(mii_mode:1, pad8:6), csma_disabled:1);
+/*9*/	u8 X(X(X(X(X(rx_tcpudp_checksum:1, pad9:3), vlan_arp_tco:1),
+	   link_status_wake:1), arp_wake:1), mcmatch_wake:1);
+/*10*/	u8 X(X(X(pad10:3, no_source_addr_insertion:1), preamble_length:2),
+	   loopback:2);
+/*11*/	u8 X(linear_priority:3, pad11:5);
+/*12*/	u8 X(X(linear_priority_mode:1, pad12:3), ifs:4);
+/*13*/	u8 ip_addr_lo;
+/*14*/	u8 ip_addr_hi;
+/*15*/	u8 X(X(X(X(X(X(X(promiscuous_mode:1, broadcast_disabled:1),
+	   wait_after_win:1), pad15_1:1), ignore_ul_bit:1), crc_16_bit:1),
+	   pad15_2:1), crs_or_cdt:1);
+/*16*/	u8 fc_delay_lo;
+/*17*/	u8 fc_delay_hi;
+/*18*/	u8 X(X(X(X(X(rx_stripping:1, tx_padding:1), rx_crc_transfer:1),
+	   rx_long_ok:1), fc_priority_threshold:3), pad18:1);
+/*19*/	u8 X(X(X(X(X(X(X(addr_wake:1, magic_packet_disable:1),
+	   fc_disable:1), fc_restop:1), fc_restart:1), fc_reject:1),
+	   full_duplex_force:1), full_duplex_pin:1);
+/*20*/	u8 X(X(X(pad20_1:5, fc_priority_location:1), multi_ia:1), pad20_2:1);
+/*21*/	u8 X(X(pad21_1:3, multicast_all:1), pad21_2:4);
+/*22*/	u8 X(X(rx_d102_mode:1, rx_vlan_drop:1), pad22:6);
+	u8 pad_d102[9];
+};
+
+#define E100_MAX_MULTICAST_ADDRS	64
+struct multi {
+	__le16 count;
+	u8 addr[E100_MAX_MULTICAST_ADDRS * ETH_ALEN + 2/*pad*/];
+};
+
+/* Important: keep total struct u32-aligned */
+#define UCODE_SIZE			134
+struct cb {
+	__le16 status;
+	__le16 command;
+	__le32 link;
+	union {
+		u8 iaaddr[ETH_ALEN];
+		__le32 ucode[UCODE_SIZE];
+		struct config config;
+		struct multi multi;
+		struct {
+			u32 tbd_array;
+			u16 tcb_byte_count;
+			u8 threshold;
+			u8 tbd_count;
+			struct {
+				__le32 buf_addr;
+				__le16 size;
+				u16 eol;
+			} tbd;
+		} tcb;
+		__le32 dump_buffer_addr;
+	} u;
+	struct cb *next, *prev;
+	dma_addr_t dma_addr;
+	struct sk_buff *skb;
+};
+
+enum loopback {
+	lb_none = 0, lb_mac = 1, lb_phy = 3,
+};
+
+struct stats {
+	__le32 tx_good_frames, tx_max_collisions, tx_late_collisions,
+		tx_underruns, tx_lost_crs, tx_deferred, tx_single_collisions,
+		tx_multiple_collisions, tx_total_collisions;
+	__le32 rx_good_frames, rx_crc_errors, rx_alignment_errors,
+		rx_resource_errors, rx_overrun_errors, rx_cdt_errors,
+		rx_short_frame_errors;
+	__le32 fc_xmt_pause, fc_rcv_pause, fc_rcv_unsupported;
+	__le16 xmt_tco_frames, rcv_tco_frames;
+	__le32 complete;
+};
+
+struct mem {
+	struct {
+		u32 signature;
+		u32 result;
+	} selftest;
+	struct stats stats;
+	u8 dump_buf[596];
+};
+
+struct param_range {
+	u32 min;
+	u32 max;
+	u32 count;
+};
+
+struct params {
+	struct param_range rfds;
+	struct param_range cbs;
+};
+
+struct nic {
+	/* Begin: frequently used values: keep adjacent for cache effect */
+	u32 msg_enable				____cacheline_aligned;
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+	u16 (*mdio_ctrl)(struct nic *nic, u32 addr, u32 dir, u32 reg, u16 data);
+
+	struct rx *rxs				____cacheline_aligned;
+	struct rx *rx_to_use;
+	struct rx *rx_to_clean;
+	struct rfd blank_rfd;
+	enum ru_state ru_running;
+
+	spinlock_t cb_lock			____cacheline_aligned;
+	spinlock_t cmd_lock;
+	struct csr __iomem *csr;
+	enum scb_cmd_lo cuc_cmd;
+	unsigned int cbs_avail;
+	struct napi_struct napi;
+	struct cb *cbs;
+	struct cb *cb_to_use;
+	struct cb *cb_to_send;
+	struct cb *cb_to_clean;
+	__le16 tx_command;
+	/* End: frequently used values: keep adjacent for cache effect */
+
+	enum {
+		ich                = (1 << 0),
+		promiscuous        = (1 << 1),
+		multicast_all      = (1 << 2),
+		wol_magic          = (1 << 3),
+		ich_10h_workaround = (1 << 4),
+	} flags					____cacheline_aligned;
+
+	enum mac mac;
+	enum phy phy;
+	struct params params;
+	struct timer_list watchdog;
+	struct mii_if_info mii;
+	struct work_struct tx_timeout_task;
+	enum loopback loopback;
+
+	struct mem *mem;
+	dma_addr_t dma_addr;
+
+	struct dma_pool *cbs_pool;
+	dma_addr_t cbs_dma_addr;
+	u8 adaptive_ifs;
+	u8 tx_threshold;
+	u32 tx_frames;
+	u32 tx_collisions;
+	u32 tx_deferred;
+	u32 tx_single_collisions;
+	u32 tx_multiple_collisions;
+	u32 tx_fc_pause;
+	u32 tx_tco_frames;
+
+	u32 rx_fc_pause;
+	u32 rx_fc_unsupported;
+	u32 rx_tco_frames;
+	u32 rx_short_frame_errors;
+	u32 rx_over_length_errors;
+
+	u16 eeprom_wc;
+	__le16 eeprom[256];
+	spinlock_t mdio_lock;
+	const struct firmware *fw;
+};
+
+static inline void e100_write_flush(struct nic *nic)
+{
+	/* Flush previous PCI writes through intermediate bridges
+	 * by doing a benign read */
+	(void)ioread8(&nic->csr->scb.status);
+}
+
+static void e100_enable_irq(struct nic *nic)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&nic->cmd_lock, flags);
+	iowrite8(irq_mask_none, &nic->csr->scb.cmd_hi);
+	e100_write_flush(nic);
+	spin_unlock_irqrestore(&nic->cmd_lock, flags);
+}
+
+static void e100_disable_irq(struct nic *nic)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&nic->cmd_lock, flags);
+	iowrite8(irq_mask_all, &nic->csr->scb.cmd_hi);
+	e100_write_flush(nic);
+	spin_unlock_irqrestore(&nic->cmd_lock, flags);
+}
+
+static void e100_hw_reset(struct nic *nic)
+{
+	/* Put CU and RU into idle with a selective reset to get
+	 * device off of PCI bus */
+	iowrite32(selective_reset, &nic->csr->port);
+	e100_write_flush(nic); udelay(20);
+
+	/* Now fully reset device */
+	iowrite32(software_reset, &nic->csr->port);
+	e100_write_flush(nic); udelay(20);
+
+	/* Mask off our interrupt line - it's unmasked after reset */
+	e100_disable_irq(nic);
+}
+
+static int e100_self_test(struct nic *nic)
+{
+	u32 dma_addr = nic->dma_addr + offsetof(struct mem, selftest);
+
+	/* Passing the self-test is a pretty good indication
+	 * that the device can DMA to/from host memory */
+
+	nic->mem->selftest.signature = 0;
+	nic->mem->selftest.result = 0xFFFFFFFF;
+
+	iowrite32(selftest | dma_addr, &nic->csr->port);
+	e100_write_flush(nic);
+	/* Wait 10 msec for self-test to complete */
+	msleep(10);
+
+	/* Interrupts are enabled after self-test */
+	e100_disable_irq(nic);
+
+	/* Check results of self-test */
+	if (nic->mem->selftest.result != 0) {
+		netif_err(nic, hw, nic->netdev,
+			  "Self-test failed: result=0x%08X\n",
+			  nic->mem->selftest.result);
+		return -ETIMEDOUT;
+	}
+	if (nic->mem->selftest.signature == 0) {
+		netif_err(nic, hw, nic->netdev, "Self-test failed: timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void e100_eeprom_write(struct nic *nic, u16 addr_len, u16 addr, __le16 data)
+{
+	u32 cmd_addr_data[3];
+	u8 ctrl;
+	int i, j;
+
+	/* Three cmds: write/erase enable, write data, write/erase disable */
+	cmd_addr_data[0] = op_ewen << (addr_len - 2);
+	cmd_addr_data[1] = (((op_write << addr_len) | addr) << 16) |
+		le16_to_cpu(data);
+	cmd_addr_data[2] = op_ewds << (addr_len - 2);
+
+	/* Bit-bang cmds to write word to eeprom */
+	for (j = 0; j < 3; j++) {
+
+		/* Chip select */
+		iowrite8(eecs | eesk, &nic->csr->eeprom_ctrl_lo);
+		e100_write_flush(nic); udelay(4);
+
+		for (i = 31; i >= 0; i--) {
+			ctrl = (cmd_addr_data[j] & (1 << i)) ?
+				eecs | eedi : eecs;
+			iowrite8(ctrl, &nic->csr->eeprom_ctrl_lo);
+			e100_write_flush(nic); udelay(4);
+
+			iowrite8(ctrl | eesk, &nic->csr->eeprom_ctrl_lo);
+			e100_write_flush(nic); udelay(4);
+		}
+		/* Wait 10 msec for cmd to complete */
+		msleep(10);
+
+		/* Chip deselect */
+		iowrite8(0, &nic->csr->eeprom_ctrl_lo);
+		e100_write_flush(nic); udelay(4);
+	}
+};
+
+/* General technique stolen from the eepro100 driver - very clever */
+static __le16 e100_eeprom_read(struct nic *nic, u16 *addr_len, u16 addr)
+{
+	u32 cmd_addr_data;
+	u16 data = 0;
+	u8 ctrl;
+	int i;
+
+	cmd_addr_data = ((op_read << *addr_len) | addr) << 16;
+
+	/* Chip select */
+	iowrite8(eecs | eesk, &nic->csr->eeprom_ctrl_lo);
+	e100_write_flush(nic); udelay(4);
+
+	/* Bit-bang to read word from eeprom */
+	for (i = 31; i >= 0; i--) {
+		ctrl = (cmd_addr_data & (1 << i)) ? eecs | eedi : eecs;
+		iowrite8(ctrl, &nic->csr->eeprom_ctrl_lo);
+		e100_write_flush(nic); udelay(4);
+
+		iowrite8(ctrl | eesk, &nic->csr->eeprom_ctrl_lo);
+		e100_write_flush(nic); udelay(4);
+
+		/* Eeprom drives a dummy zero to EEDO after receiving
+		 * complete address.  Use this to adjust addr_len. */
+		ctrl = ioread8(&nic->csr->eeprom_ctrl_lo);
+		if (!(ctrl & eedo) && i > 16) {
+			*addr_len -= (i - 16);
+			i = 17;
+		}
+
+		data = (data << 1) | (ctrl & eedo ? 1 : 0);
+	}
+
+	/* Chip deselect */
+	iowrite8(0, &nic->csr->eeprom_ctrl_lo);
+	e100_write_flush(nic); udelay(4);
+
+	return cpu_to_le16(data);
+};
+
+/* Load entire EEPROM image into driver cache and validate checksum */
+static int e100_eeprom_load(struct nic *nic)
+{
+	u16 addr, addr_len = 8, checksum = 0;
+
+	/* Try reading with an 8-bit addr len to discover actual addr len */
+	e100_eeprom_read(nic, &addr_len, 0);
+	nic->eeprom_wc = 1 << addr_len;
+
+	for (addr = 0; addr < nic->eeprom_wc; addr++) {
+		nic->eeprom[addr] = e100_eeprom_read(nic, &addr_len, addr);
+		if (addr < nic->eeprom_wc - 1)
+			checksum += le16_to_cpu(nic->eeprom[addr]);
+	}
+
+	/* The checksum, stored in the last word, is calculated such that
+	 * the sum of words should be 0xBABA */
+	if (cpu_to_le16(0xBABA - checksum) != nic->eeprom[nic->eeprom_wc - 1]) {
+		netif_err(nic, probe, nic->netdev, "EEPROM corrupted\n");
+		if (!eeprom_bad_csum_allow)
+			return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/* Save (portion of) driver EEPROM cache to device and update checksum */
+static int e100_eeprom_save(struct nic *nic, u16 start, u16 count)
+{
+	u16 addr, addr_len = 8, checksum = 0;
+
+	/* Try reading with an 8-bit addr len to discover actual addr len */
+	e100_eeprom_read(nic, &addr_len, 0);
+	nic->eeprom_wc = 1 << addr_len;
+
+	if (start + count >= nic->eeprom_wc)
+		return -EINVAL;
+
+	for (addr = start; addr < start + count; addr++)
+		e100_eeprom_write(nic, addr_len, addr, nic->eeprom[addr]);
+
+	/* The checksum, stored in the last word, is calculated such that
+	 * the sum of words should be 0xBABA */
+	for (addr = 0; addr < nic->eeprom_wc - 1; addr++)
+		checksum += le16_to_cpu(nic->eeprom[addr]);
+	nic->eeprom[nic->eeprom_wc - 1] = cpu_to_le16(0xBABA - checksum);
+	e100_eeprom_write(nic, addr_len, nic->eeprom_wc - 1,
+		nic->eeprom[nic->eeprom_wc - 1]);
+
+	return 0;
+}
+
+#define E100_WAIT_SCB_TIMEOUT 20000 /* we might have to wait 100ms!!! */
+#define E100_WAIT_SCB_FAST 20       /* delay like the old code */
+static int e100_exec_cmd(struct nic *nic, u8 cmd, dma_addr_t dma_addr)
+{
+	unsigned long flags;
+	unsigned int i;
+	int err = 0;
+
+	spin_lock_irqsave(&nic->cmd_lock, flags);
+
+	/* Previous command is accepted when SCB clears */
+	for (i = 0; i < E100_WAIT_SCB_TIMEOUT; i++) {
+		if (likely(!ioread8(&nic->csr->scb.cmd_lo)))
+			break;
+		cpu_relax();
+		if (unlikely(i > E100_WAIT_SCB_FAST))
+			udelay(5);
+	}
+	if (unlikely(i == E100_WAIT_SCB_TIMEOUT)) {
+		err = -EAGAIN;
+		goto err_unlock;
+	}
+
+	if (unlikely(cmd != cuc_resume))
+		iowrite32(dma_addr, &nic->csr->scb.gen_ptr);
+	iowrite8(cmd, &nic->csr->scb.cmd_lo);
+
+err_unlock:
+	spin_unlock_irqrestore(&nic->cmd_lock, flags);
+
+	return err;
+}
+
+static int e100_exec_cb(struct nic *nic, struct sk_buff *skb,
+	int (*cb_prepare)(struct nic *, struct cb *, struct sk_buff *))
+{
+	struct cb *cb;
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&nic->cb_lock, flags);
+
+	if (unlikely(!nic->cbs_avail)) {
+		err = -ENOMEM;
+		goto err_unlock;
+	}
+
+	cb = nic->cb_to_use;
+	nic->cb_to_use = cb->next;
+	nic->cbs_avail--;
+	cb->skb = skb;
+
+	err = cb_prepare(nic, cb, skb);
+	if (err)
+		goto err_unlock;
+
+	if (unlikely(!nic->cbs_avail))
+		err = -ENOSPC;
+
+
+	/* Order is important otherwise we'll be in a race with h/w:
+	 * set S-bit in current first, then clear S-bit in previous. */
+	cb->command |= cpu_to_le16(cb_s);
+	dma_wmb();
+	cb->prev->command &= cpu_to_le16(~cb_s);
+
+	while (nic->cb_to_send != nic->cb_to_use) {
+		if (unlikely(e100_exec_cmd(nic, nic->cuc_cmd,
+			nic->cb_to_send->dma_addr))) {
+			/* Ok, here's where things get sticky.  It's
+			 * possible that we can't schedule the command
+			 * because the controller is too busy, so
+			 * let's just queue the command and try again
+			 * when another command is scheduled. */
+			if (err == -ENOSPC) {
+				//request a reset
+				schedule_work(&nic->tx_timeout_task);
+			}
+			break;
+		} else {
+			nic->cuc_cmd = cuc_resume;
+			nic->cb_to_send = nic->cb_to_send->next;
+		}
+	}
+
+err_unlock:
+	spin_unlock_irqrestore(&nic->cb_lock, flags);
+
+	return err;
+}
+
+static int mdio_read(struct net_device *netdev, int addr, int reg)
+{
+	struct nic *nic = netdev_priv(netdev);
+	return nic->mdio_ctrl(nic, addr, mdi_read, reg, 0);
+}
+
+static void mdio_write(struct net_device *netdev, int addr, int reg, int data)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	nic->mdio_ctrl(nic, addr, mdi_write, reg, data);
+}
+
+/* the standard mdio_ctrl() function for usual MII-compliant hardware */
+static u16 mdio_ctrl_hw(struct nic *nic, u32 addr, u32 dir, u32 reg, u16 data)
+{
+	u32 data_out = 0;
+	unsigned int i;
+	unsigned long flags;
+
+
+	/*
+	 * Stratus87247: we shouldn't be writing the MDI control
+	 * register until the Ready bit shows True.  Also, since
+	 * manipulation of the MDI control registers is a multi-step
+	 * procedure it should be done under lock.
+	 */
+	spin_lock_irqsave(&nic->mdio_lock, flags);
+	for (i = 100; i; --i) {
+		if (ioread32(&nic->csr->mdi_ctrl) & mdi_ready)
+			break;
+		udelay(20);
+	}
+	if (unlikely(!i)) {
+		netdev_err(nic->netdev, "e100.mdio_ctrl won't go Ready\n");
+		spin_unlock_irqrestore(&nic->mdio_lock, flags);
+		return 0;		/* No way to indicate timeout error */
+	}
+	iowrite32((reg << 16) | (addr << 21) | dir | data, &nic->csr->mdi_ctrl);
+
+	for (i = 0; i < 100; i++) {
+		udelay(20);
+		if ((data_out = ioread32(&nic->csr->mdi_ctrl)) & mdi_ready)
+			break;
+	}
+	spin_unlock_irqrestore(&nic->mdio_lock, flags);
+	netif_printk(nic, hw, KERN_DEBUG, nic->netdev,
+		     "%s:addr=%d, reg=%d, data_in=0x%04X, data_out=0x%04X\n",
+		     dir == mdi_read ? "READ" : "WRITE",
+		     addr, reg, data, data_out);
+	return (u16)data_out;
+}
+
+/* slightly tweaked mdio_ctrl() function for phy_82552_v specifics */
+static u16 mdio_ctrl_phy_82552_v(struct nic *nic,
+				 u32 addr,
+				 u32 dir,
+				 u32 reg,
+				 u16 data)
+{
+	if ((reg == MII_BMCR) && (dir == mdi_write)) {
+		if (data & (BMCR_ANRESTART | BMCR_ANENABLE)) {
+			u16 advert = mdio_read(nic->netdev, nic->mii.phy_id,
+							MII_ADVERTISE);
+
+			/*
+			 * Workaround Si issue where sometimes the part will not
+			 * autoneg to 100Mbps even when advertised.
+			 */
+			if (advert & ADVERTISE_100FULL)
+				data |= BMCR_SPEED100 | BMCR_FULLDPLX;
+			else if (advert & ADVERTISE_100HALF)
+				data |= BMCR_SPEED100;
+		}
+	}
+	return mdio_ctrl_hw(nic, addr, dir, reg, data);
+}
+
+/* Fully software-emulated mdio_ctrl() function for cards without
+ * MII-compliant PHYs.
+ * For now, this is mainly geared towards 80c24 support; in case of further
+ * requirements for other types (i82503, ...?) either extend this mechanism
+ * or split it, whichever is cleaner.
+ */
+static u16 mdio_ctrl_phy_mii_emulated(struct nic *nic,
+				      u32 addr,
+				      u32 dir,
+				      u32 reg,
+				      u16 data)
+{
+	/* might need to allocate a netdev_priv'ed register array eventually
+	 * to be able to record state changes, but for now
+	 * some fully hardcoded register handling ought to be ok I guess. */
+
+	if (dir == mdi_read) {
+		switch (reg) {
+		case MII_BMCR:
+			/* Auto-negotiation, right? */
+			return  BMCR_ANENABLE |
+				BMCR_FULLDPLX;
+		case MII_BMSR:
+			return	BMSR_LSTATUS /* for mii_link_ok() */ |
+				BMSR_ANEGCAPABLE |
+				BMSR_10FULL;
+		case MII_ADVERTISE:
+			/* 80c24 is a "combo card" PHY, right? */
+			return	ADVERTISE_10HALF |
+				ADVERTISE_10FULL;
+		default:
+			netif_printk(nic, hw, KERN_DEBUG, nic->netdev,
+				     "%s:addr=%d, reg=%d, data=0x%04X: unimplemented emulation!\n",
+				     dir == mdi_read ? "READ" : "WRITE",
+				     addr, reg, data);
+			return 0xFFFF;
+		}
+	} else {
+		switch (reg) {
+		default:
+			netif_printk(nic, hw, KERN_DEBUG, nic->netdev,
+				     "%s:addr=%d, reg=%d, data=0x%04X: unimplemented emulation!\n",
+				     dir == mdi_read ? "READ" : "WRITE",
+				     addr, reg, data);
+			return 0xFFFF;
+		}
+	}
+}
+static inline int e100_phy_supports_mii(struct nic *nic)
+{
+	/* for now, just check it by comparing whether we
+	   are using MII software emulation.
+	*/
+	return (nic->mdio_ctrl != mdio_ctrl_phy_mii_emulated);
+}
+
+static void e100_get_defaults(struct nic *nic)
+{
+	struct param_range rfds = { .min = 16, .max = 256, .count = 256 };
+	struct param_range cbs  = { .min = 64, .max = 256, .count = 128 };
+
+	/* MAC type is encoded as rev ID; exception: ICH is treated as 82559 */
+	nic->mac = (nic->flags & ich) ? mac_82559_D101M : nic->pdev->revision;
+	if (nic->mac == mac_unknown)
+		nic->mac = mac_82557_D100_A;
+
+	nic->params.rfds = rfds;
+	nic->params.cbs = cbs;
+
+	/* Quadwords to DMA into FIFO before starting frame transmit */
+	nic->tx_threshold = 0xE0;
+
+	/* no interrupt for every tx completion, delay = 256us if not 557 */
+	nic->tx_command = cpu_to_le16(cb_tx | cb_tx_sf |
+		((nic->mac >= mac_82558_D101_A4) ? cb_cid : cb_i));
+
+	/* Template for a freshly allocated RFD */
+	nic->blank_rfd.command = 0;
+	nic->blank_rfd.rbd = cpu_to_le32(0xFFFFFFFF);
+	nic->blank_rfd.size = cpu_to_le16(VLAN_ETH_FRAME_LEN + ETH_FCS_LEN);
+
+	/* MII setup */
+	nic->mii.phy_id_mask = 0x1F;
+	nic->mii.reg_num_mask = 0x1F;
+	nic->mii.dev = nic->netdev;
+	nic->mii.mdio_read = mdio_read;
+	nic->mii.mdio_write = mdio_write;
+}
+
+static int e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb)
+{
+	struct config *config = &cb->u.config;
+	u8 *c = (u8 *)config;
+	struct net_device *netdev = nic->netdev;
+
+	cb->command = cpu_to_le16(cb_config);
+
+	memset(config, 0, sizeof(struct config));
+
+	config->byte_count = 0x16;		/* bytes in this struct */
+	config->rx_fifo_limit = 0x8;		/* bytes in FIFO before DMA */
+	config->direct_rx_dma = 0x1;		/* reserved */
+	config->standard_tcb = 0x1;		/* 1=standard, 0=extended */
+	config->standard_stat_counter = 0x1;	/* 1=standard, 0=extended */
+	config->rx_discard_short_frames = 0x1;	/* 1=discard, 0=pass */
+	config->tx_underrun_retry = 0x3;	/* # of underrun retries */
+	if (e100_phy_supports_mii(nic))
+		config->mii_mode = 1;           /* 1=MII mode, 0=i82503 mode */
+	config->pad10 = 0x6;
+	config->no_source_addr_insertion = 0x1;	/* 1=no, 0=yes */
+	config->preamble_length = 0x2;		/* 0=1, 1=3, 2=7, 3=15 bytes */
+	config->ifs = 0x6;			/* x16 = inter frame spacing */
+	config->ip_addr_hi = 0xF2;		/* ARP IP filter - not used */
+	config->pad15_1 = 0x1;
+	config->pad15_2 = 0x1;
+	config->crs_or_cdt = 0x0;		/* 0=CRS only, 1=CRS or CDT */
+	config->fc_delay_hi = 0x40;		/* time delay for fc frame */
+	config->tx_padding = 0x1;		/* 1=pad short frames */
+	config->fc_priority_threshold = 0x7;	/* 7=priority fc disabled */
+	config->pad18 = 0x1;
+	config->full_duplex_pin = 0x1;		/* 1=examine FDX# pin */
+	config->pad20_1 = 0x1F;
+	config->fc_priority_location = 0x1;	/* 1=byte#31, 0=byte#19 */
+	config->pad21_1 = 0x5;
+
+	config->adaptive_ifs = nic->adaptive_ifs;
+	config->loopback = nic->loopback;
+
+	if (nic->mii.force_media && nic->mii.full_duplex)
+		config->full_duplex_force = 0x1;	/* 1=force, 0=auto */
+
+	if (nic->flags & promiscuous || nic->loopback) {
+		config->rx_save_bad_frames = 0x1;	/* 1=save, 0=discard */
+		config->rx_discard_short_frames = 0x0;	/* 1=discard, 0=save */
+		config->promiscuous_mode = 0x1;		/* 1=on, 0=off */
+	}
+
+	if (unlikely(netdev->features & NETIF_F_RXFCS))
+		config->rx_crc_transfer = 0x1;	/* 1=save, 0=discard */
+
+	if (nic->flags & multicast_all)
+		config->multicast_all = 0x1;		/* 1=accept, 0=no */
+
+	/* disable WoL when up */
+	if (netif_running(nic->netdev) || !(nic->flags & wol_magic))
+		config->magic_packet_disable = 0x1;	/* 1=off, 0=on */
+
+	if (nic->mac >= mac_82558_D101_A4) {
+		config->fc_disable = 0x1;	/* 1=Tx fc off, 0=Tx fc on */
+		config->mwi_enable = 0x1;	/* 1=enable, 0=disable */
+		config->standard_tcb = 0x0;	/* 1=standard, 0=extended */
+		config->rx_long_ok = 0x1;	/* 1=VLANs ok, 0=standard */
+		if (nic->mac >= mac_82559_D101M) {
+			config->tno_intr = 0x1;		/* TCO stats enable */
+			/* Enable TCO in extended config */
+			if (nic->mac >= mac_82551_10) {
+				config->byte_count = 0x20; /* extended bytes */
+				config->rx_d102_mode = 0x1; /* GMRC for TCO */
+			}
+		} else {
+			config->standard_stat_counter = 0x0;
+		}
+	}
+
+	if (netdev->features & NETIF_F_RXALL) {
+		config->rx_save_overruns = 0x1; /* 1=save, 0=discard */
+		config->rx_save_bad_frames = 0x1;       /* 1=save, 0=discard */
+		config->rx_discard_short_frames = 0x0;  /* 1=discard, 0=save */
+	}
+
+	netif_printk(nic, hw, KERN_DEBUG, nic->netdev, "[00-07]=%8ph\n",
+		     c + 0);
+	netif_printk(nic, hw, KERN_DEBUG, nic->netdev, "[08-15]=%8ph\n",
+		     c + 8);
+	netif_printk(nic, hw, KERN_DEBUG, nic->netdev, "[16-23]=%8ph\n",
+		     c + 16);
+	return 0;
+}
+
+/*************************************************************************
+*  CPUSaver parameters
+*
+*  All CPUSaver parameters are 16-bit literals that are part of a
+*  "move immediate value" instruction.  By changing the value of
+*  the literal in the instruction before the code is loaded, the
+*  driver can change the algorithm.
+*
+*  INTDELAY - This loads the dead-man timer with its initial value.
+*    When this timer expires the interrupt is asserted, and the
+*    timer is reset each time a new packet is received.  (see
+*    BUNDLEMAX below to set the limit on number of chained packets)
+*    The current default is 0x600 or 1536.  Experiments show that
+*    the value should probably stay within the 0x200 - 0x1000.
+*
+*  BUNDLEMAX -
+*    This sets the maximum number of frames that will be bundled.  In
+*    some situations, such as the TCP windowing algorithm, it may be
+*    better to limit the growth of the bundle size than let it go as
+*    high as it can, because that could cause too much added latency.
+*    The default is six, because this is the number of packets in the
+*    default TCP window size.  A value of 1 would make CPUSaver indicate
+*    an interrupt for every frame received.  If you do not want to put
+*    a limit on the bundle size, set this value to xFFFF.
+*
+*  BUNDLESMALL -
+*    This contains a bit-mask describing the minimum size frame that
+*    will be bundled.  The default masks the lower 7 bits, which means
+*    that any frame less than 128 bytes in length will not be bundled,
+*    but will instead immediately generate an interrupt.  This does
+*    not affect the current bundle in any way.  Any frame that is 128
+*    bytes or large will be bundled normally.  This feature is meant
+*    to provide immediate indication of ACK frames in a TCP environment.
+*    Customers were seeing poor performance when a machine with CPUSaver
+*    enabled was sending but not receiving.  The delay introduced when
+*    the ACKs were received was enough to reduce total throughput, because
+*    the sender would sit idle until the ACK was finally seen.
+*
+*    The current default is 0xFF80, which masks out the lower 7 bits.
+*    This means that any frame which is x7F (127) bytes or smaller
+*    will cause an immediate interrupt.  Because this value must be a
+*    bit mask, there are only a few valid values that can be used.  To
+*    turn this feature off, the driver can write the value xFFFF to the
+*    lower word of this instruction (in the same way that the other
+*    parameters are used).  Likewise, a value of 0xF800 (2047) would
+*    cause an interrupt to be generated for every frame, because all
+*    standard Ethernet frames are <= 2047 bytes in length.
+*************************************************************************/
+
+/* if you wish to disable the ucode functionality, while maintaining the
+ * workarounds it provides, set the following defines to:
+ * BUNDLESMALL 0
+ * BUNDLEMAX 1
+ * INTDELAY 1
+ */
+#define BUNDLESMALL 1
+#define BUNDLEMAX (u16)6
+#define INTDELAY (u16)1536 /* 0x600 */
+
+/* Initialize firmware */
+static const struct firmware *e100_request_firmware(struct nic *nic)
+{
+	const char *fw_name;
+	const struct firmware *fw = nic->fw;
+	u8 timer, bundle, min_size;
+	int err = 0;
+	bool required = false;
+
+	/* do not load u-code for ICH devices */
+	if (nic->flags & ich)
+		return NULL;
+
+	/* Search for ucode match against h/w revision
+	 *
+	 * Based on comments in the source code for the FreeBSD fxp
+	 * driver, the FIRMWARE_D102E ucode includes both CPUSaver and
+	 *
+	 *    "fixes for bugs in the B-step hardware (specifically, bugs
+	 *     with Inline Receive)."
+	 *
+	 * So we must fail if it cannot be loaded.
+	 *
+	 * The other microcode files are only required for the optional
+	 * CPUSaver feature.  Nice to have, but no reason to fail.
+	 */
+	if (nic->mac == mac_82559_D101M) {
+		fw_name = FIRMWARE_D101M;
+	} else if (nic->mac == mac_82559_D101S) {
+		fw_name = FIRMWARE_D101S;
+	} else if (nic->mac == mac_82551_F || nic->mac == mac_82551_10) {
+		fw_name = FIRMWARE_D102E;
+		required = true;
+	} else { /* No ucode on other devices */
+		return NULL;
+	}
+
+	/* If the firmware has not previously been loaded, request a pointer
+	 * to it. If it was previously loaded, we are reinitializing the
+	 * adapter, possibly in a resume from hibernate, in which case
+	 * request_firmware() cannot be used.
+	 */
+	if (!fw)
+		err = request_firmware(&fw, fw_name, &nic->pdev->dev);
+
+	if (err) {
+		if (required) {
+			netif_err(nic, probe, nic->netdev,
+				  "Failed to load firmware \"%s\": %d\n",
+				  fw_name, err);
+			return ERR_PTR(err);
+		} else {
+			netif_info(nic, probe, nic->netdev,
+				   "CPUSaver disabled. Needs \"%s\": %d\n",
+				   fw_name, err);
+			return NULL;
+		}
+	}
+
+	/* Firmware should be precisely UCODE_SIZE (words) plus three bytes
+	   indicating the offsets for BUNDLESMALL, BUNDLEMAX, INTDELAY */
+	if (fw->size != UCODE_SIZE * 4 + 3) {
+		netif_err(nic, probe, nic->netdev,
+			  "Firmware \"%s\" has wrong size %zu\n",
+			  fw_name, fw->size);
+		release_firmware(fw);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Read timer, bundle and min_size from end of firmware blob */
+	timer = fw->data[UCODE_SIZE * 4];
+	bundle = fw->data[UCODE_SIZE * 4 + 1];
+	min_size = fw->data[UCODE_SIZE * 4 + 2];
+
+	if (timer >= UCODE_SIZE || bundle >= UCODE_SIZE ||
+	    min_size >= UCODE_SIZE) {
+		netif_err(nic, probe, nic->netdev,
+			  "\"%s\" has bogus offset values (0x%x,0x%x,0x%x)\n",
+			  fw_name, timer, bundle, min_size);
+		release_firmware(fw);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* OK, firmware is validated and ready to use. Save a pointer
+	 * to it in the nic */
+	nic->fw = fw;
+	return fw;
+}
+
+static int e100_setup_ucode(struct nic *nic, struct cb *cb,
+			     struct sk_buff *skb)
+{
+	const struct firmware *fw = (void *)skb;
+	u8 timer, bundle, min_size;
+
+	/* It's not a real skb; we just abused the fact that e100_exec_cb
+	   will pass it through to here... */
+	cb->skb = NULL;
+
+	/* firmware is stored as little endian already */
+	memcpy(cb->u.ucode, fw->data, UCODE_SIZE * 4);
+
+	/* Read timer, bundle and min_size from end of firmware blob */
+	timer = fw->data[UCODE_SIZE * 4];
+	bundle = fw->data[UCODE_SIZE * 4 + 1];
+	min_size = fw->data[UCODE_SIZE * 4 + 2];
+
+	/* Insert user-tunable settings in cb->u.ucode */
+	cb->u.ucode[timer] &= cpu_to_le32(0xFFFF0000);
+	cb->u.ucode[timer] |= cpu_to_le32(INTDELAY);
+	cb->u.ucode[bundle] &= cpu_to_le32(0xFFFF0000);
+	cb->u.ucode[bundle] |= cpu_to_le32(BUNDLEMAX);
+	cb->u.ucode[min_size] &= cpu_to_le32(0xFFFF0000);
+	cb->u.ucode[min_size] |= cpu_to_le32((BUNDLESMALL) ? 0xFFFF : 0xFF80);
+
+	cb->command = cpu_to_le16(cb_ucode | cb_el);
+	return 0;
+}
+
+static inline int e100_load_ucode_wait(struct nic *nic)
+{
+	const struct firmware *fw;
+	int err = 0, counter = 50;
+	struct cb *cb = nic->cb_to_clean;
+
+	fw = e100_request_firmware(nic);
+	/* If it's NULL, then no ucode is required */
+	if (IS_ERR_OR_NULL(fw))
+		return PTR_ERR_OR_ZERO(fw);
+
+	if ((err = e100_exec_cb(nic, (void *)fw, e100_setup_ucode)))
+		netif_err(nic, probe, nic->netdev,
+			  "ucode cmd failed with error %d\n", err);
+
+	/* must restart cuc */
+	nic->cuc_cmd = cuc_start;
+
+	/* wait for completion */
+	e100_write_flush(nic);
+	udelay(10);
+
+	/* wait for possibly (ouch) 500ms */
+	while (!(cb->status & cpu_to_le16(cb_complete))) {
+		msleep(10);
+		if (!--counter) break;
+	}
+
+	/* ack any interrupts, something could have been set */
+	iowrite8(~0, &nic->csr->scb.stat_ack);
+
+	/* if the command failed, or is not OK, notify and return */
+	if (!counter || !(cb->status & cpu_to_le16(cb_ok))) {
+		netif_err(nic, probe, nic->netdev, "ucode load failed\n");
+		err = -EPERM;
+	}
+
+	return err;
+}
+
+static int e100_setup_iaaddr(struct nic *nic, struct cb *cb,
+	struct sk_buff *skb)
+{
+	cb->command = cpu_to_le16(cb_iaaddr);
+	memcpy(cb->u.iaaddr, nic->netdev->dev_addr, ETH_ALEN);
+	return 0;
+}
+
+static int e100_dump(struct nic *nic, struct cb *cb, struct sk_buff *skb)
+{
+	cb->command = cpu_to_le16(cb_dump);
+	cb->u.dump_buffer_addr = cpu_to_le32(nic->dma_addr +
+		offsetof(struct mem, dump_buf));
+	return 0;
+}
+
+static int e100_phy_check_without_mii(struct nic *nic)
+{
+	u8 phy_type;
+	int without_mii;
+
+	phy_type = (le16_to_cpu(nic->eeprom[eeprom_phy_iface]) >> 8) & 0x0f;
+
+	switch (phy_type) {
+	case NoSuchPhy: /* Non-MII PHY; UNTESTED! */
+	case I82503: /* Non-MII PHY; UNTESTED! */
+	case S80C24: /* Non-MII PHY; tested and working */
+		/* paragraph from the FreeBSD driver, "FXP_PHY_80C24":
+		 * The Seeq 80c24 AutoDUPLEX(tm) Ethernet Interface Adapter
+		 * doesn't have a programming interface of any sort.  The
+		 * media is sensed automatically based on how the link partner
+		 * is configured.  This is, in essence, manual configuration.
+		 */
+		netif_info(nic, probe, nic->netdev,
+			   "found MII-less i82503 or 80c24 or other PHY\n");
+
+		nic->mdio_ctrl = mdio_ctrl_phy_mii_emulated;
+		nic->mii.phy_id = 0; /* is this ok for an MII-less PHY? */
+
+		/* these might be needed for certain MII-less cards...
+		 * nic->flags |= ich;
+		 * nic->flags |= ich_10h_workaround; */
+
+		without_mii = 1;
+		break;
+	default:
+		without_mii = 0;
+		break;
+	}
+	return without_mii;
+}
+
+#define NCONFIG_AUTO_SWITCH	0x0080
+#define MII_NSC_CONG		MII_RESV1
+#define NSC_CONG_ENABLE		0x0100
+#define NSC_CONG_TXREADY	0x0400
+static int e100_phy_init(struct nic *nic)
+{
+	struct net_device *netdev = nic->netdev;
+	u32 addr;
+	u16 bmcr, stat, id_lo, id_hi, cong;
+
+	/* Discover phy addr by searching addrs in order {1,0,2,..., 31} */
+	for (addr = 0; addr < 32; addr++) {
+		nic->mii.phy_id = (addr == 0) ? 1 : (addr == 1) ? 0 : addr;
+		bmcr = mdio_read(netdev, nic->mii.phy_id, MII_BMCR);
+		stat = mdio_read(netdev, nic->mii.phy_id, MII_BMSR);
+		stat = mdio_read(netdev, nic->mii.phy_id, MII_BMSR);
+		if (!((bmcr == 0xFFFF) || ((stat == 0) && (bmcr == 0))))
+			break;
+	}
+	if (addr == 32) {
+		/* uhoh, no PHY detected: check whether we seem to be some
+		 * weird, rare variant which is *known* to not have any MII.
+		 * But do this AFTER MII checking only, since this does
+		 * lookup of EEPROM values which may easily be unreliable. */
+		if (e100_phy_check_without_mii(nic))
+			return 0; /* simply return and hope for the best */
+		else {
+			/* for unknown cases log a fatal error */
+			netif_err(nic, hw, nic->netdev,
+				  "Failed to locate any known PHY, aborting\n");
+			return -EAGAIN;
+		}
+	} else
+		netif_printk(nic, hw, KERN_DEBUG, nic->netdev,
+			     "phy_addr = %d\n", nic->mii.phy_id);
+
+	/* Get phy ID */
+	id_lo = mdio_read(netdev, nic->mii.phy_id, MII_PHYSID1);
+	id_hi = mdio_read(netdev, nic->mii.phy_id, MII_PHYSID2);
+	nic->phy = (u32)id_hi << 16 | (u32)id_lo;
+	netif_printk(nic, hw, KERN_DEBUG, nic->netdev,
+		     "phy ID = 0x%08X\n", nic->phy);
+
+	/* Select the phy and isolate the rest */
+	for (addr = 0; addr < 32; addr++) {
+		if (addr != nic->mii.phy_id) {
+			mdio_write(netdev, addr, MII_BMCR, BMCR_ISOLATE);
+		} else if (nic->phy != phy_82552_v) {
+			bmcr = mdio_read(netdev, addr, MII_BMCR);
+			mdio_write(netdev, addr, MII_BMCR,
+				bmcr & ~BMCR_ISOLATE);
+		}
+	}
+	/*
+	 * Workaround for 82552:
+	 * Clear the ISOLATE bit on selected phy_id last (mirrored on all
+	 * other phy_id's) using bmcr value from addr discovery loop above.
+	 */
+	if (nic->phy == phy_82552_v)
+		mdio_write(netdev, nic->mii.phy_id, MII_BMCR,
+			bmcr & ~BMCR_ISOLATE);
+
+	/* Handle National tx phys */
+#define NCS_PHY_MODEL_MASK	0xFFF0FFFF
+	if ((nic->phy & NCS_PHY_MODEL_MASK) == phy_nsc_tx) {
+		/* Disable congestion control */
+		cong = mdio_read(netdev, nic->mii.phy_id, MII_NSC_CONG);
+		cong |= NSC_CONG_TXREADY;
+		cong &= ~NSC_CONG_ENABLE;
+		mdio_write(netdev, nic->mii.phy_id, MII_NSC_CONG, cong);
+	}
+
+	if (nic->phy == phy_82552_v) {
+		u16 advert = mdio_read(netdev, nic->mii.phy_id, MII_ADVERTISE);
+
+		/* assign special tweaked mdio_ctrl() function */
+		nic->mdio_ctrl = mdio_ctrl_phy_82552_v;
+
+		/* Workaround Si not advertising flow-control during autoneg */
+		advert |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+		mdio_write(netdev, nic->mii.phy_id, MII_ADVERTISE, advert);
+
+		/* Reset for the above changes to take effect */
+		bmcr = mdio_read(netdev, nic->mii.phy_id, MII_BMCR);
+		bmcr |= BMCR_RESET;
+		mdio_write(netdev, nic->mii.phy_id, MII_BMCR, bmcr);
+	} else if ((nic->mac >= mac_82550_D102) || ((nic->flags & ich) &&
+	   (mdio_read(netdev, nic->mii.phy_id, MII_TPISTATUS) & 0x8000) &&
+	   (le16_to_cpu(nic->eeprom[eeprom_cnfg_mdix]) & eeprom_mdix_enabled))) {
+		/* enable/disable MDI/MDI-X auto-switching. */
+		mdio_write(netdev, nic->mii.phy_id, MII_NCONFIG,
+				nic->mii.force_media ? 0 : NCONFIG_AUTO_SWITCH);
+	}
+
+	return 0;
+}
+
+static int e100_hw_init(struct nic *nic)
+{
+	int err = 0;
+
+	e100_hw_reset(nic);
+
+	netif_err(nic, hw, nic->netdev, "e100_hw_init\n");
+	if ((err = e100_self_test(nic)))
+		return err;
+
+	if ((err = e100_phy_init(nic)))
+		return err;
+	if ((err = e100_exec_cmd(nic, cuc_load_base, 0)))
+		return err;
+	if ((err = e100_exec_cmd(nic, ruc_load_base, 0)))
+		return err;
+	if ((err = e100_load_ucode_wait(nic)))
+		return err;
+	if ((err = e100_exec_cb(nic, NULL, e100_configure)))
+		return err;
+	if ((err = e100_exec_cb(nic, NULL, e100_setup_iaaddr)))
+		return err;
+	if ((err = e100_exec_cmd(nic, cuc_dump_addr,
+		nic->dma_addr + offsetof(struct mem, stats))))
+		return err;
+	if ((err = e100_exec_cmd(nic, cuc_dump_reset, 0)))
+		return err;
+
+	e100_disable_irq(nic);
+
+	return 0;
+}
+
+static int e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb)
+{
+	struct net_device *netdev = nic->netdev;
+	struct netdev_hw_addr *ha;
+	u16 i, count = min(netdev_mc_count(netdev), E100_MAX_MULTICAST_ADDRS);
+
+	cb->command = cpu_to_le16(cb_multi);
+	cb->u.multi.count = cpu_to_le16(count * ETH_ALEN);
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev) {
+		if (i == count)
+			break;
+		memcpy(&cb->u.multi.addr[i++ * ETH_ALEN], &ha->addr,
+			ETH_ALEN);
+	}
+	return 0;
+}
+
+static void e100_set_multicast_list(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	netif_printk(nic, hw, KERN_DEBUG, nic->netdev,
+		     "mc_count=%d, flags=0x%04X\n",
+		     netdev_mc_count(netdev), netdev->flags);
+
+	if (netdev->flags & IFF_PROMISC)
+		nic->flags |= promiscuous;
+	else
+		nic->flags &= ~promiscuous;
+
+	if (netdev->flags & IFF_ALLMULTI ||
+		netdev_mc_count(netdev) > E100_MAX_MULTICAST_ADDRS)
+		nic->flags |= multicast_all;
+	else
+		nic->flags &= ~multicast_all;
+
+	e100_exec_cb(nic, NULL, e100_configure);
+	e100_exec_cb(nic, NULL, e100_multi);
+}
+
+static void e100_update_stats(struct nic *nic)
+{
+	struct net_device *dev = nic->netdev;
+	struct net_device_stats *ns = &dev->stats;
+	struct stats *s = &nic->mem->stats;
+	__le32 *complete = (nic->mac < mac_82558_D101_A4) ? &s->fc_xmt_pause :
+		(nic->mac < mac_82559_D101M) ? (__le32 *)&s->xmt_tco_frames :
+		&s->complete;
+
+	/* Device's stats reporting may take several microseconds to
+	 * complete, so we're always waiting for results of the
+	 * previous command. */
+
+	if (*complete == cpu_to_le32(cuc_dump_reset_complete)) {
+		*complete = 0;
+		nic->tx_frames = le32_to_cpu(s->tx_good_frames);
+		nic->tx_collisions = le32_to_cpu(s->tx_total_collisions);
+		ns->tx_aborted_errors += le32_to_cpu(s->tx_max_collisions);
+		ns->tx_window_errors += le32_to_cpu(s->tx_late_collisions);
+		ns->tx_carrier_errors += le32_to_cpu(s->tx_lost_crs);
+		ns->tx_fifo_errors += le32_to_cpu(s->tx_underruns);
+		ns->collisions += nic->tx_collisions;
+		ns->tx_errors += le32_to_cpu(s->tx_max_collisions) +
+			le32_to_cpu(s->tx_lost_crs);
+		nic->rx_short_frame_errors +=
+			le32_to_cpu(s->rx_short_frame_errors);
+		ns->rx_length_errors = nic->rx_short_frame_errors +
+			nic->rx_over_length_errors;
+		ns->rx_crc_errors += le32_to_cpu(s->rx_crc_errors);
+		ns->rx_frame_errors += le32_to_cpu(s->rx_alignment_errors);
+		ns->rx_over_errors += le32_to_cpu(s->rx_overrun_errors);
+		ns->rx_fifo_errors += le32_to_cpu(s->rx_overrun_errors);
+		ns->rx_missed_errors += le32_to_cpu(s->rx_resource_errors);
+		ns->rx_errors += le32_to_cpu(s->rx_crc_errors) +
+			le32_to_cpu(s->rx_alignment_errors) +
+			le32_to_cpu(s->rx_short_frame_errors) +
+			le32_to_cpu(s->rx_cdt_errors);
+		nic->tx_deferred += le32_to_cpu(s->tx_deferred);
+		nic->tx_single_collisions +=
+			le32_to_cpu(s->tx_single_collisions);
+		nic->tx_multiple_collisions +=
+			le32_to_cpu(s->tx_multiple_collisions);
+		if (nic->mac >= mac_82558_D101_A4) {
+			nic->tx_fc_pause += le32_to_cpu(s->fc_xmt_pause);
+			nic->rx_fc_pause += le32_to_cpu(s->fc_rcv_pause);
+			nic->rx_fc_unsupported +=
+				le32_to_cpu(s->fc_rcv_unsupported);
+			if (nic->mac >= mac_82559_D101M) {
+				nic->tx_tco_frames +=
+					le16_to_cpu(s->xmt_tco_frames);
+				nic->rx_tco_frames +=
+					le16_to_cpu(s->rcv_tco_frames);
+			}
+		}
+	}
+
+
+	if (e100_exec_cmd(nic, cuc_dump_reset, 0))
+		netif_printk(nic, tx_err, KERN_DEBUG, nic->netdev,
+			     "exec cuc_dump_reset failed\n");
+}
+
+static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
+{
+	/* Adjust inter-frame-spacing (IFS) between two transmits if
+	 * we're getting collisions on a half-duplex connection. */
+
+	if (duplex == DUPLEX_HALF) {
+		u32 prev = nic->adaptive_ifs;
+		u32 min_frames = (speed == SPEED_100) ? 1000 : 100;
+
+		if ((nic->tx_frames / 32 < nic->tx_collisions) &&
+		   (nic->tx_frames > min_frames)) {
+			if (nic->adaptive_ifs < 60)
+				nic->adaptive_ifs += 5;
+		} else if (nic->tx_frames < min_frames) {
+			if (nic->adaptive_ifs >= 5)
+				nic->adaptive_ifs -= 5;
+		}
+		if (nic->adaptive_ifs != prev)
+			e100_exec_cb(nic, NULL, e100_configure);
+	}
+}
+
+static void e100_watchdog(struct timer_list *t)
+{
+	struct nic *nic = from_timer(nic, t, watchdog);
+	struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
+	u32 speed;
+
+	netif_printk(nic, timer, KERN_DEBUG, nic->netdev,
+		     "right now = %ld\n", jiffies);
+
+	/* mii library handles link maintenance tasks */
+
+	mii_ethtool_gset(&nic->mii, &cmd);
+	speed = ethtool_cmd_speed(&cmd);
+
+	if (mii_link_ok(&nic->mii) && !netif_carrier_ok(nic->netdev)) {
+		netdev_info(nic->netdev, "NIC Link is Up %u Mbps %s Duplex\n",
+			    speed == SPEED_100 ? 100 : 10,
+			    cmd.duplex == DUPLEX_FULL ? "Full" : "Half");
+	} else if (!mii_link_ok(&nic->mii) && netif_carrier_ok(nic->netdev)) {
+		netdev_info(nic->netdev, "NIC Link is Down\n");
+	}
+
+	mii_check_link(&nic->mii);
+
+	/* Software generated interrupt to recover from (rare) Rx
+	 * allocation failure.
+	 * Unfortunately have to use a spinlock to not re-enable interrupts
+	 * accidentally, due to hardware that shares a register between the
+	 * interrupt mask bit and the SW Interrupt generation bit */
+	spin_lock_irq(&nic->cmd_lock);
+	iowrite8(ioread8(&nic->csr->scb.cmd_hi) | irq_sw_gen,&nic->csr->scb.cmd_hi);
+	e100_write_flush(nic);
+	spin_unlock_irq(&nic->cmd_lock);
+
+	e100_update_stats(nic);
+	e100_adjust_adaptive_ifs(nic, speed, cmd.duplex);
+
+	if (nic->mac <= mac_82557_D100_C)
+		/* Issue a multicast command to workaround a 557 lock up */
+		e100_set_multicast_list(nic->netdev);
+
+	if (nic->flags & ich && speed == SPEED_10 && cmd.duplex == DUPLEX_HALF)
+		/* Need SW workaround for ICH[x] 10Mbps/half duplex Tx hang. */
+		nic->flags |= ich_10h_workaround;
+	else
+		nic->flags &= ~ich_10h_workaround;
+
+	mod_timer(&nic->watchdog,
+		  round_jiffies(jiffies + E100_WATCHDOG_PERIOD));
+}
+
+static int e100_xmit_prepare(struct nic *nic, struct cb *cb,
+	struct sk_buff *skb)
+{
+	dma_addr_t dma_addr;
+	cb->command = nic->tx_command;
+
+	dma_addr = dma_map_single(&nic->pdev->dev, skb->data, skb->len,
+				  DMA_TO_DEVICE);
+	/* If we can't map the skb, have the upper layer try later */
+	if (dma_mapping_error(&nic->pdev->dev, dma_addr))
+		return -ENOMEM;
+
+	/*
+	 * Use the last 4 bytes of the SKB payload packet as the CRC, used for
+	 * testing, ie sending frames with bad CRC.
+	 */
+	if (unlikely(skb->no_fcs))
+		cb->command |= cpu_to_le16(cb_tx_nc);
+	else
+		cb->command &= ~cpu_to_le16(cb_tx_nc);
+
+	/* interrupt every 16 packets regardless of delay */
+	if ((nic->cbs_avail & ~15) == nic->cbs_avail)
+		cb->command |= cpu_to_le16(cb_i);
+	cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd);
+	cb->u.tcb.tcb_byte_count = 0;
+	cb->u.tcb.threshold = nic->tx_threshold;
+	cb->u.tcb.tbd_count = 1;
+	cb->u.tcb.tbd.buf_addr = cpu_to_le32(dma_addr);
+	cb->u.tcb.tbd.size = cpu_to_le16(skb->len);
+	skb_tx_timestamp(skb);
+	return 0;
+}
+
+static netdev_tx_t e100_xmit_frame(struct sk_buff *skb,
+				   struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+	int err;
+
+	if (nic->flags & ich_10h_workaround) {
+		/* SW workaround for ICH[x] 10Mbps/half duplex Tx hang.
+		   Issue a NOP command followed by a 1us delay before
+		   issuing the Tx command. */
+		if (e100_exec_cmd(nic, cuc_nop, 0))
+			netif_printk(nic, tx_err, KERN_DEBUG, nic->netdev,
+				     "exec cuc_nop failed\n");
+		udelay(1);
+	}
+
+	err = e100_exec_cb(nic, skb, e100_xmit_prepare);
+
+	switch (err) {
+	case -ENOSPC:
+		/* We queued the skb, but now we're out of space. */
+		netif_printk(nic, tx_err, KERN_DEBUG, nic->netdev,
+			     "No space for CB\n");
+		netif_stop_queue(netdev);
+		break;
+	case -ENOMEM:
+		/* This is a hard error - log it. */
+		netif_printk(nic, tx_err, KERN_DEBUG, nic->netdev,
+			     "Out of Tx resources, returning skb\n");
+		netif_stop_queue(netdev);
+		return NETDEV_TX_BUSY;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static int e100_tx_clean(struct nic *nic)
+{
+	struct net_device *dev = nic->netdev;
+	struct cb *cb;
+	int tx_cleaned = 0;
+
+	spin_lock(&nic->cb_lock);
+
+	/* Clean CBs marked complete */
+	for (cb = nic->cb_to_clean;
+	    cb->status & cpu_to_le16(cb_complete);
+	    cb = nic->cb_to_clean = cb->next) {
+		dma_rmb(); /* read skb after status */
+		netif_printk(nic, tx_done, KERN_DEBUG, nic->netdev,
+			     "cb[%d]->status = 0x%04X\n",
+			     (int)(((void*)cb - (void*)nic->cbs)/sizeof(struct cb)),
+			     cb->status);
+
+		if (likely(cb->skb != NULL)) {
+			dev->stats.tx_packets++;
+			dev->stats.tx_bytes += cb->skb->len;
+
+			dma_unmap_single(&nic->pdev->dev,
+					 le32_to_cpu(cb->u.tcb.tbd.buf_addr),
+					 le16_to_cpu(cb->u.tcb.tbd.size),
+					 DMA_TO_DEVICE);
+			dev_kfree_skb_any(cb->skb);
+			cb->skb = NULL;
+			tx_cleaned = 1;
+		}
+		cb->status = 0;
+		nic->cbs_avail++;
+	}
+
+	spin_unlock(&nic->cb_lock);
+
+	/* Recover from running out of Tx resources in xmit_frame */
+	if (unlikely(tx_cleaned && netif_queue_stopped(nic->netdev)))
+		netif_wake_queue(nic->netdev);
+
+	return tx_cleaned;
+}
+
+static void e100_clean_cbs(struct nic *nic)
+{
+	if (nic->cbs) {
+		while (nic->cbs_avail != nic->params.cbs.count) {
+			struct cb *cb = nic->cb_to_clean;
+			if (cb->skb) {
+				dma_unmap_single(&nic->pdev->dev,
+						 le32_to_cpu(cb->u.tcb.tbd.buf_addr),
+						 le16_to_cpu(cb->u.tcb.tbd.size),
+						 DMA_TO_DEVICE);
+				dev_kfree_skb(cb->skb);
+			}
+			nic->cb_to_clean = nic->cb_to_clean->next;
+			nic->cbs_avail++;
+		}
+		dma_pool_free(nic->cbs_pool, nic->cbs, nic->cbs_dma_addr);
+		nic->cbs = NULL;
+		nic->cbs_avail = 0;
+	}
+	nic->cuc_cmd = cuc_start;
+	nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean =
+		nic->cbs;
+}
+
+static int e100_alloc_cbs(struct nic *nic)
+{
+	struct cb *cb;
+	unsigned int i, count = nic->params.cbs.count;
+
+	nic->cuc_cmd = cuc_start;
+	nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL;
+	nic->cbs_avail = 0;
+
+	nic->cbs = dma_pool_zalloc(nic->cbs_pool, GFP_KERNEL,
+				   &nic->cbs_dma_addr);
+	if (!nic->cbs)
+		return -ENOMEM;
+
+	for (cb = nic->cbs, i = 0; i < count; cb++, i++) {
+		cb->next = (i + 1 < count) ? cb + 1 : nic->cbs;
+		cb->prev = (i == 0) ? nic->cbs + count - 1 : cb - 1;
+
+		cb->dma_addr = nic->cbs_dma_addr + i * sizeof(struct cb);
+		cb->link = cpu_to_le32(nic->cbs_dma_addr +
+			((i+1) % count) * sizeof(struct cb));
+	}
+
+	nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = nic->cbs;
+	nic->cbs_avail = count;
+
+	return 0;
+}
+
+static inline void e100_start_receiver(struct nic *nic, struct rx *rx)
+{
+	if (!nic->rxs) return;
+	if (RU_SUSPENDED != nic->ru_running) return;
+
+	/* handle init time starts */
+	if (!rx) rx = nic->rxs;
+
+	/* (Re)start RU if suspended or idle and RFA is non-NULL */
+	if (rx->skb) {
+		e100_exec_cmd(nic, ruc_start, rx->dma_addr);
+		nic->ru_running = RU_RUNNING;
+	}
+}
+
+#define RFD_BUF_LEN (sizeof(struct rfd) + VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+static int e100_rx_alloc_skb(struct nic *nic, struct rx *rx)
+{
+	if (!(rx->skb = netdev_alloc_skb_ip_align(nic->netdev, RFD_BUF_LEN)))
+		return -ENOMEM;
+
+	/* Init, and map the RFD. */
+	skb_copy_to_linear_data(rx->skb, &nic->blank_rfd, sizeof(struct rfd));
+	rx->dma_addr = dma_map_single(&nic->pdev->dev, rx->skb->data,
+				      RFD_BUF_LEN, DMA_BIDIRECTIONAL);
+
+	if (dma_mapping_error(&nic->pdev->dev, rx->dma_addr)) {
+		dev_kfree_skb_any(rx->skb);
+		rx->skb = NULL;
+		rx->dma_addr = 0;
+		return -ENOMEM;
+	}
+
+	/* Link the RFD to end of RFA by linking previous RFD to
+	 * this one.  We are safe to touch the previous RFD because
+	 * it is protected by the before last buffer's el bit being set */
+	if (rx->prev->skb) {
+		struct rfd *prev_rfd = (struct rfd *)rx->prev->skb->data;
+		put_unaligned_le32(rx->dma_addr, &prev_rfd->link);
+		dma_sync_single_for_device(&nic->pdev->dev,
+					   rx->prev->dma_addr,
+					   sizeof(struct rfd),
+					   DMA_BIDIRECTIONAL);
+	}
+
+	return 0;
+}
+
+static int e100_rx_indicate(struct nic *nic, struct rx *rx,
+	unsigned int *work_done, unsigned int work_to_do)
+{
+	struct net_device *dev = nic->netdev;
+	struct sk_buff *skb = rx->skb;
+	struct rfd *rfd = (struct rfd *)skb->data;
+	u16 rfd_status, actual_size;
+	u16 fcs_pad = 0;
+
+	if (unlikely(work_done && *work_done >= work_to_do))
+		return -EAGAIN;
+
+	/* Need to sync before taking a peek at cb_complete bit */
+	dma_sync_single_for_cpu(&nic->pdev->dev, rx->dma_addr,
+				sizeof(struct rfd), DMA_BIDIRECTIONAL);
+	rfd_status = le16_to_cpu(rfd->status);
+
+	netif_printk(nic, rx_status, KERN_DEBUG, nic->netdev,
+		     "status=0x%04X\n", rfd_status);
+	dma_rmb(); /* read size after status bit */
+
+	/* If data isn't ready, nothing to indicate */
+	if (unlikely(!(rfd_status & cb_complete))) {
+		/* If the next buffer has the el bit, but we think the receiver
+		 * is still running, check to see if it really stopped while
+		 * we had interrupts off.
+		 * This allows for a fast restart without re-enabling
+		 * interrupts */
+		if ((le16_to_cpu(rfd->command) & cb_el) &&
+		    (RU_RUNNING == nic->ru_running))
+
+			if (ioread8(&nic->csr->scb.status) & rus_no_res)
+				nic->ru_running = RU_SUSPENDED;
+		dma_sync_single_for_device(&nic->pdev->dev, rx->dma_addr,
+					   sizeof(struct rfd),
+					   DMA_FROM_DEVICE);
+		return -ENODATA;
+	}
+
+	/* Get actual data size */
+	if (unlikely(dev->features & NETIF_F_RXFCS))
+		fcs_pad = 4;
+	actual_size = le16_to_cpu(rfd->actual_size) & 0x3FFF;
+	if (unlikely(actual_size > RFD_BUF_LEN - sizeof(struct rfd)))
+		actual_size = RFD_BUF_LEN - sizeof(struct rfd);
+
+	/* Get data */
+	dma_unmap_single(&nic->pdev->dev, rx->dma_addr, RFD_BUF_LEN,
+			 DMA_BIDIRECTIONAL);
+
+	/* If this buffer has the el bit, but we think the receiver
+	 * is still running, check to see if it really stopped while
+	 * we had interrupts off.
+	 * This allows for a fast restart without re-enabling interrupts.
+	 * This can happen when the RU sees the size change but also sees
+	 * the el bit set. */
+	if ((le16_to_cpu(rfd->command) & cb_el) &&
+	    (RU_RUNNING == nic->ru_running)) {
+
+	    if (ioread8(&nic->csr->scb.status) & rus_no_res)
+		nic->ru_running = RU_SUSPENDED;
+	}
+
+	/* Pull off the RFD and put the actual data (minus eth hdr) */
+	skb_reserve(skb, sizeof(struct rfd));
+	skb_put(skb, actual_size);
+	skb->protocol = eth_type_trans(skb, nic->netdev);
+
+	/* If we are receiving all frames, then don't bother
+	 * checking for errors.
+	 */
+	if (unlikely(dev->features & NETIF_F_RXALL)) {
+		if (actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN + fcs_pad)
+			/* Received oversized frame, but keep it. */
+			nic->rx_over_length_errors++;
+		goto process_skb;
+	}
+
+	if (unlikely(!(rfd_status & cb_ok))) {
+		/* Don't indicate if hardware indicates errors */
+		dev_kfree_skb_any(skb);
+	} else if (actual_size > ETH_DATA_LEN + VLAN_ETH_HLEN + fcs_pad) {
+		/* Don't indicate oversized frames */
+		nic->rx_over_length_errors++;
+		dev_kfree_skb_any(skb);
+	} else {
+process_skb:
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += (actual_size - fcs_pad);
+		netif_receive_skb(skb);
+		if (work_done)
+			(*work_done)++;
+	}
+
+	rx->skb = NULL;
+
+	return 0;
+}
+
+static void e100_rx_clean(struct nic *nic, unsigned int *work_done,
+	unsigned int work_to_do)
+{
+	struct rx *rx;
+	int restart_required = 0, err = 0;
+	struct rx *old_before_last_rx, *new_before_last_rx;
+	struct rfd *old_before_last_rfd, *new_before_last_rfd;
+
+	/* Indicate newly arrived packets */
+	for (rx = nic->rx_to_clean; rx->skb; rx = nic->rx_to_clean = rx->next) {
+		err = e100_rx_indicate(nic, rx, work_done, work_to_do);
+		/* Hit quota or no more to clean */
+		if (-EAGAIN == err || -ENODATA == err)
+			break;
+	}
+
+
+	/* On EAGAIN, hit quota so have more work to do, restart once
+	 * cleanup is complete.
+	 * Else, are we already rnr? then pay attention!!! this ensures that
+	 * the state machine progression never allows a start with a
+	 * partially cleaned list, avoiding a race between hardware
+	 * and rx_to_clean when in NAPI mode */
+	if (-EAGAIN != err && RU_SUSPENDED == nic->ru_running)
+		restart_required = 1;
+
+	old_before_last_rx = nic->rx_to_use->prev->prev;
+	old_before_last_rfd = (struct rfd *)old_before_last_rx->skb->data;
+
+	/* Alloc new skbs to refill list */
+	for (rx = nic->rx_to_use; !rx->skb; rx = nic->rx_to_use = rx->next) {
+		if (unlikely(e100_rx_alloc_skb(nic, rx)))
+			break; /* Better luck next time (see watchdog) */
+	}
+
+	new_before_last_rx = nic->rx_to_use->prev->prev;
+	if (new_before_last_rx != old_before_last_rx) {
+		/* Set the el-bit on the buffer that is before the last buffer.
+		 * This lets us update the next pointer on the last buffer
+		 * without worrying about hardware touching it.
+		 * We set the size to 0 to prevent hardware from touching this
+		 * buffer.
+		 * When the hardware hits the before last buffer with el-bit
+		 * and size of 0, it will RNR interrupt, the RUS will go into
+		 * the No Resources state.  It will not complete nor write to
+		 * this buffer. */
+		new_before_last_rfd =
+			(struct rfd *)new_before_last_rx->skb->data;
+		new_before_last_rfd->size = 0;
+		new_before_last_rfd->command |= cpu_to_le16(cb_el);
+		dma_sync_single_for_device(&nic->pdev->dev,
+					   new_before_last_rx->dma_addr,
+					   sizeof(struct rfd),
+					   DMA_BIDIRECTIONAL);
+
+		/* Now that we have a new stopping point, we can clear the old
+		 * stopping point.  We must sync twice to get the proper
+		 * ordering on the hardware side of things. */
+		old_before_last_rfd->command &= ~cpu_to_le16(cb_el);
+		dma_sync_single_for_device(&nic->pdev->dev,
+					   old_before_last_rx->dma_addr,
+					   sizeof(struct rfd),
+					   DMA_BIDIRECTIONAL);
+		old_before_last_rfd->size = cpu_to_le16(VLAN_ETH_FRAME_LEN
+							+ ETH_FCS_LEN);
+		dma_sync_single_for_device(&nic->pdev->dev,
+					   old_before_last_rx->dma_addr,
+					   sizeof(struct rfd),
+					   DMA_BIDIRECTIONAL);
+	}
+
+	if (restart_required) {
+		// ack the rnr?
+		iowrite8(stat_ack_rnr, &nic->csr->scb.stat_ack);
+		e100_start_receiver(nic, nic->rx_to_clean);
+		if (work_done)
+			(*work_done)++;
+	}
+}
+
+static void e100_rx_clean_list(struct nic *nic)
+{
+	struct rx *rx;
+	unsigned int i, count = nic->params.rfds.count;
+
+	nic->ru_running = RU_UNINITIALIZED;
+
+	if (nic->rxs) {
+		for (rx = nic->rxs, i = 0; i < count; rx++, i++) {
+			if (rx->skb) {
+				dma_unmap_single(&nic->pdev->dev,
+						 rx->dma_addr, RFD_BUF_LEN,
+						 DMA_BIDIRECTIONAL);
+				dev_kfree_skb(rx->skb);
+			}
+		}
+		kfree(nic->rxs);
+		nic->rxs = NULL;
+	}
+
+	nic->rx_to_use = nic->rx_to_clean = NULL;
+}
+
+static int e100_rx_alloc_list(struct nic *nic)
+{
+	struct rx *rx;
+	unsigned int i, count = nic->params.rfds.count;
+	struct rfd *before_last;
+
+	nic->rx_to_use = nic->rx_to_clean = NULL;
+	nic->ru_running = RU_UNINITIALIZED;
+
+	if (!(nic->rxs = kcalloc(count, sizeof(struct rx), GFP_KERNEL)))
+		return -ENOMEM;
+
+	for (rx = nic->rxs, i = 0; i < count; rx++, i++) {
+		rx->next = (i + 1 < count) ? rx + 1 : nic->rxs;
+		rx->prev = (i == 0) ? nic->rxs + count - 1 : rx - 1;
+		if (e100_rx_alloc_skb(nic, rx)) {
+			e100_rx_clean_list(nic);
+			return -ENOMEM;
+		}
+	}
+	/* Set the el-bit on the buffer that is before the last buffer.
+	 * This lets us update the next pointer on the last buffer without
+	 * worrying about hardware touching it.
+	 * We set the size to 0 to prevent hardware from touching this buffer.
+	 * When the hardware hits the before last buffer with el-bit and size
+	 * of 0, it will RNR interrupt, the RU will go into the No Resources
+	 * state.  It will not complete nor write to this buffer. */
+	rx = nic->rxs->prev->prev;
+	before_last = (struct rfd *)rx->skb->data;
+	before_last->command |= cpu_to_le16(cb_el);
+	before_last->size = 0;
+	dma_sync_single_for_device(&nic->pdev->dev, rx->dma_addr,
+				   sizeof(struct rfd), DMA_BIDIRECTIONAL);
+
+	nic->rx_to_use = nic->rx_to_clean = nic->rxs;
+	nic->ru_running = RU_SUSPENDED;
+
+	return 0;
+}
+
+static irqreturn_t e100_intr(int irq, void *dev_id)
+{
+	struct net_device *netdev = dev_id;
+	struct nic *nic = netdev_priv(netdev);
+	u8 stat_ack = ioread8(&nic->csr->scb.stat_ack);
+
+	netif_printk(nic, intr, KERN_DEBUG, nic->netdev,
+		     "stat_ack = 0x%02X\n", stat_ack);
+
+	if (stat_ack == stat_ack_not_ours ||	/* Not our interrupt */
+	   stat_ack == stat_ack_not_present)	/* Hardware is ejected */
+		return IRQ_NONE;
+
+	/* Ack interrupt(s) */
+	iowrite8(stat_ack, &nic->csr->scb.stat_ack);
+
+	/* We hit Receive No Resource (RNR); restart RU after cleaning */
+	if (stat_ack & stat_ack_rnr)
+		nic->ru_running = RU_SUSPENDED;
+
+	if (likely(napi_schedule_prep(&nic->napi))) {
+		e100_disable_irq(nic);
+		__napi_schedule(&nic->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int e100_poll(struct napi_struct *napi, int budget)
+{
+	struct nic *nic = container_of(napi, struct nic, napi);
+	unsigned int work_done = 0;
+
+	e100_rx_clean(nic, &work_done, budget);
+	e100_tx_clean(nic);
+
+	/* If budget fully consumed, continue polling */
+	if (work_done == budget)
+		return budget;
+
+	/* only re-enable interrupt if stack agrees polling is really done */
+	if (likely(napi_complete_done(napi, work_done)))
+		e100_enable_irq(nic);
+
+	return work_done;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void e100_netpoll(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	e100_disable_irq(nic);
+	e100_intr(nic->pdev->irq, netdev);
+	e100_tx_clean(nic);
+	e100_enable_irq(nic);
+}
+#endif
+
+static int e100_set_mac_address(struct net_device *netdev, void *p)
+{
+	struct nic *nic = netdev_priv(netdev);
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	eth_hw_addr_set(netdev, addr->sa_data);
+	e100_exec_cb(nic, NULL, e100_setup_iaaddr);
+
+	return 0;
+}
+
+static int e100_asf(struct nic *nic)
+{
+	/* ASF can be enabled from eeprom */
+	return (nic->pdev->device >= 0x1050) && (nic->pdev->device <= 0x1057) &&
+	   (le16_to_cpu(nic->eeprom[eeprom_config_asf]) & eeprom_asf) &&
+	   !(le16_to_cpu(nic->eeprom[eeprom_config_asf]) & eeprom_gcl) &&
+	   ((le16_to_cpu(nic->eeprom[eeprom_smbus_addr]) & 0xFF) != 0xFE);
+}
+
+static int e100_up(struct nic *nic)
+{
+	int err;
+
+	if ((err = e100_rx_alloc_list(nic)))
+		return err;
+	if ((err = e100_alloc_cbs(nic)))
+		goto err_rx_clean_list;
+	if ((err = e100_hw_init(nic)))
+		goto err_clean_cbs;
+	e100_set_multicast_list(nic->netdev);
+	e100_start_receiver(nic, NULL);
+	mod_timer(&nic->watchdog, jiffies);
+	if ((err = request_irq(nic->pdev->irq, e100_intr, IRQF_SHARED,
+		nic->netdev->name, nic->netdev)))
+		goto err_no_irq;
+	netif_wake_queue(nic->netdev);
+	napi_enable(&nic->napi);
+	/* enable ints _after_ enabling poll, preventing a race between
+	 * disable ints+schedule */
+	e100_enable_irq(nic);
+	return 0;
+
+err_no_irq:
+	del_timer_sync(&nic->watchdog);
+err_clean_cbs:
+	e100_clean_cbs(nic);
+err_rx_clean_list:
+	e100_rx_clean_list(nic);
+	return err;
+}
+
+static void e100_down(struct nic *nic)
+{
+	/* wait here for poll to complete */
+	napi_disable(&nic->napi);
+	netif_stop_queue(nic->netdev);
+	e100_hw_reset(nic);
+	free_irq(nic->pdev->irq, nic->netdev);
+	del_timer_sync(&nic->watchdog);
+	netif_carrier_off(nic->netdev);
+	e100_clean_cbs(nic);
+	e100_rx_clean_list(nic);
+}
+
+static void e100_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	/* Reset outside of interrupt context, to avoid request_irq
+	 * in interrupt context */
+	schedule_work(&nic->tx_timeout_task);
+}
+
+static void e100_tx_timeout_task(struct work_struct *work)
+{
+	struct nic *nic = container_of(work, struct nic, tx_timeout_task);
+	struct net_device *netdev = nic->netdev;
+
+	netif_printk(nic, tx_err, KERN_DEBUG, nic->netdev,
+		     "scb.status=0x%02X\n", ioread8(&nic->csr->scb.status));
+
+	rtnl_lock();
+	if (netif_running(netdev)) {
+		e100_down(netdev_priv(netdev));
+		e100_up(netdev_priv(netdev));
+	}
+	rtnl_unlock();
+}
+
+static int e100_loopback_test(struct nic *nic, enum loopback loopback_mode)
+{
+	int err;
+	struct sk_buff *skb;
+
+	/* Use driver resources to perform internal MAC or PHY
+	 * loopback test.  A single packet is prepared and transmitted
+	 * in loopback mode, and the test passes if the received
+	 * packet compares byte-for-byte to the transmitted packet. */
+
+	if ((err = e100_rx_alloc_list(nic)))
+		return err;
+	if ((err = e100_alloc_cbs(nic)))
+		goto err_clean_rx;
+
+	/* ICH PHY loopback is broken so do MAC loopback instead */
+	if (nic->flags & ich && loopback_mode == lb_phy)
+		loopback_mode = lb_mac;
+
+	nic->loopback = loopback_mode;
+	if ((err = e100_hw_init(nic)))
+		goto err_loopback_none;
+
+	if (loopback_mode == lb_phy)
+		mdio_write(nic->netdev, nic->mii.phy_id, MII_BMCR,
+			BMCR_LOOPBACK);
+
+	e100_start_receiver(nic, NULL);
+
+	if (!(skb = netdev_alloc_skb(nic->netdev, ETH_DATA_LEN))) {
+		err = -ENOMEM;
+		goto err_loopback_none;
+	}
+	skb_put(skb, ETH_DATA_LEN);
+	memset(skb->data, 0xFF, ETH_DATA_LEN);
+	e100_xmit_frame(skb, nic->netdev);
+
+	msleep(10);
+
+	dma_sync_single_for_cpu(&nic->pdev->dev, nic->rx_to_clean->dma_addr,
+				RFD_BUF_LEN, DMA_BIDIRECTIONAL);
+
+	if (memcmp(nic->rx_to_clean->skb->data + sizeof(struct rfd),
+	   skb->data, ETH_DATA_LEN))
+		err = -EAGAIN;
+
+err_loopback_none:
+	mdio_write(nic->netdev, nic->mii.phy_id, MII_BMCR, 0);
+	nic->loopback = lb_none;
+	e100_clean_cbs(nic);
+	e100_hw_reset(nic);
+err_clean_rx:
+	e100_rx_clean_list(nic);
+	return err;
+}
+
+#define MII_LED_CONTROL	0x1B
+#define E100_82552_LED_OVERRIDE 0x19
+#define E100_82552_LED_ON       0x000F /* LEDTX and LED_RX both on */
+#define E100_82552_LED_OFF      0x000A /* LEDTX and LED_RX both off */
+
+static int e100_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	mii_ethtool_get_link_ksettings(&nic->mii, cmd);
+
+	return 0;
+}
+
+static int e100_set_link_ksettings(struct net_device *netdev,
+				   const struct ethtool_link_ksettings *cmd)
+{
+	struct nic *nic = netdev_priv(netdev);
+	int err;
+
+	mdio_write(netdev, nic->mii.phy_id, MII_BMCR, BMCR_RESET);
+	err = mii_ethtool_set_link_ksettings(&nic->mii, cmd);
+	e100_exec_cb(nic, NULL, e100_configure);
+
+	return err;
+}
+
+static void e100_get_drvinfo(struct net_device *netdev,
+	struct ethtool_drvinfo *info)
+{
+	struct nic *nic = netdev_priv(netdev);
+	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strscpy(info->bus_info, pci_name(nic->pdev),
+		sizeof(info->bus_info));
+}
+
+#define E100_PHY_REGS 0x1D
+static int e100_get_regs_len(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	/* We know the number of registers, and the size of the dump buffer.
+	 * Calculate the total size in bytes.
+	 */
+	return (1 + E100_PHY_REGS) * sizeof(u32) + sizeof(nic->mem->dump_buf);
+}
+
+static void e100_get_regs(struct net_device *netdev,
+	struct ethtool_regs *regs, void *p)
+{
+	struct nic *nic = netdev_priv(netdev);
+	u32 *buff = p;
+	int i;
+
+	regs->version = (1 << 24) | nic->pdev->revision;
+	buff[0] = ioread8(&nic->csr->scb.cmd_hi) << 24 |
+		ioread8(&nic->csr->scb.cmd_lo) << 16 |
+		ioread16(&nic->csr->scb.status);
+	for (i = 0; i < E100_PHY_REGS; i++)
+		/* Note that we read the registers in reverse order. This
+		 * ordering is the ABI apparently used by ethtool and other
+		 * applications.
+		 */
+		buff[1 + i] = mdio_read(netdev, nic->mii.phy_id,
+					E100_PHY_REGS - 1 - i);
+	memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf));
+	e100_exec_cb(nic, NULL, e100_dump);
+	msleep(10);
+	memcpy(&buff[1 + E100_PHY_REGS], nic->mem->dump_buf,
+	       sizeof(nic->mem->dump_buf));
+}
+
+static void e100_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct nic *nic = netdev_priv(netdev);
+	wol->supported = (nic->mac >= mac_82558_D101_A4) ?  WAKE_MAGIC : 0;
+	wol->wolopts = (nic->flags & wol_magic) ? WAKE_MAGIC : 0;
+}
+
+static int e100_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	if ((wol->wolopts && wol->wolopts != WAKE_MAGIC) ||
+	    !device_can_wakeup(&nic->pdev->dev))
+		return -EOPNOTSUPP;
+
+	if (wol->wolopts)
+		nic->flags |= wol_magic;
+	else
+		nic->flags &= ~wol_magic;
+
+	device_set_wakeup_enable(&nic->pdev->dev, wol->wolopts);
+
+	e100_exec_cb(nic, NULL, e100_configure);
+
+	return 0;
+}
+
+static u32 e100_get_msglevel(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+	return nic->msg_enable;
+}
+
+static void e100_set_msglevel(struct net_device *netdev, u32 value)
+{
+	struct nic *nic = netdev_priv(netdev);
+	nic->msg_enable = value;
+}
+
+static int e100_nway_reset(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+	return mii_nway_restart(&nic->mii);
+}
+
+static u32 e100_get_link(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+	return mii_link_ok(&nic->mii);
+}
+
+static int e100_get_eeprom_len(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+	return nic->eeprom_wc << 1;
+}
+
+#define E100_EEPROM_MAGIC	0x1234
+static int e100_get_eeprom(struct net_device *netdev,
+	struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	eeprom->magic = E100_EEPROM_MAGIC;
+	memcpy(bytes, &((u8 *)nic->eeprom)[eeprom->offset], eeprom->len);
+
+	return 0;
+}
+
+static int e100_set_eeprom(struct net_device *netdev,
+	struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	if (eeprom->magic != E100_EEPROM_MAGIC)
+		return -EINVAL;
+
+	memcpy(&((u8 *)nic->eeprom)[eeprom->offset], bytes, eeprom->len);
+
+	return e100_eeprom_save(nic, eeprom->offset >> 1,
+		(eeprom->len >> 1) + 1);
+}
+
+static void e100_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct nic *nic = netdev_priv(netdev);
+	struct param_range *rfds = &nic->params.rfds;
+	struct param_range *cbs = &nic->params.cbs;
+
+	ring->rx_max_pending = rfds->max;
+	ring->tx_max_pending = cbs->max;
+	ring->rx_pending = rfds->count;
+	ring->tx_pending = cbs->count;
+}
+
+static int e100_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring,
+			      struct kernel_ethtool_ringparam *kernel_ring,
+			      struct netlink_ext_ack *extack)
+{
+	struct nic *nic = netdev_priv(netdev);
+	struct param_range *rfds = &nic->params.rfds;
+	struct param_range *cbs = &nic->params.cbs;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	if (netif_running(netdev))
+		e100_down(nic);
+	rfds->count = max(ring->rx_pending, rfds->min);
+	rfds->count = min(rfds->count, rfds->max);
+	cbs->count = max(ring->tx_pending, cbs->min);
+	cbs->count = min(cbs->count, cbs->max);
+	netif_info(nic, drv, nic->netdev, "Ring Param settings: rx: %d, tx %d\n",
+		   rfds->count, cbs->count);
+	if (netif_running(netdev))
+		e100_up(nic);
+
+	return 0;
+}
+
+static const char e100_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Link test     (on/offline)",
+	"Eeprom test   (on/offline)",
+	"Self test        (offline)",
+	"Mac loopback     (offline)",
+	"Phy loopback     (offline)",
+};
+#define E100_TEST_LEN	ARRAY_SIZE(e100_gstrings_test)
+
+static void e100_diag_test(struct net_device *netdev,
+	struct ethtool_test *test, u64 *data)
+{
+	struct ethtool_cmd cmd;
+	struct nic *nic = netdev_priv(netdev);
+	int i;
+
+	memset(data, 0, E100_TEST_LEN * sizeof(u64));
+	data[0] = !mii_link_ok(&nic->mii);
+	data[1] = e100_eeprom_load(nic);
+	if (test->flags & ETH_TEST_FL_OFFLINE) {
+
+		/* save speed, duplex & autoneg settings */
+		mii_ethtool_gset(&nic->mii, &cmd);
+
+		if (netif_running(netdev))
+			e100_down(nic);
+		data[2] = e100_self_test(nic);
+		data[3] = e100_loopback_test(nic, lb_mac);
+		data[4] = e100_loopback_test(nic, lb_phy);
+
+		/* restore speed, duplex & autoneg settings */
+		mii_ethtool_sset(&nic->mii, &cmd);
+
+		if (netif_running(netdev))
+			e100_up(nic);
+	}
+	for (i = 0; i < E100_TEST_LEN; i++)
+		test->flags |= data[i] ? ETH_TEST_FL_FAILED : 0;
+
+	msleep_interruptible(4 * 1000);
+}
+
+static int e100_set_phys_id(struct net_device *netdev,
+			    enum ethtool_phys_id_state state)
+{
+	struct nic *nic = netdev_priv(netdev);
+	enum led_state {
+		led_on     = 0x01,
+		led_off    = 0x04,
+		led_on_559 = 0x05,
+		led_on_557 = 0x07,
+	};
+	u16 led_reg = (nic->phy == phy_82552_v) ? E100_82552_LED_OVERRIDE :
+		MII_LED_CONTROL;
+	u16 leds = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		return 2;
+
+	case ETHTOOL_ID_ON:
+		leds = (nic->phy == phy_82552_v) ? E100_82552_LED_ON :
+		       (nic->mac < mac_82559_D101M) ? led_on_557 : led_on_559;
+		break;
+
+	case ETHTOOL_ID_OFF:
+		leds = (nic->phy == phy_82552_v) ? E100_82552_LED_OFF : led_off;
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		break;
+	}
+
+	mdio_write(netdev, nic->mii.phy_id, led_reg, leds);
+	return 0;
+}
+
+static const char e100_gstrings_stats[][ETH_GSTRING_LEN] = {
+	"rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors",
+	"tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions",
+	"rx_length_errors", "rx_over_errors", "rx_crc_errors",
+	"rx_frame_errors", "rx_fifo_errors", "rx_missed_errors",
+	"tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors",
+	"tx_heartbeat_errors", "tx_window_errors",
+	/* device-specific stats */
+	"tx_deferred", "tx_single_collisions", "tx_multi_collisions",
+	"tx_flow_control_pause", "rx_flow_control_pause",
+	"rx_flow_control_unsupported", "tx_tco_packets", "rx_tco_packets",
+	"rx_short_frame_errors", "rx_over_length_errors",
+};
+#define E100_NET_STATS_LEN	21
+#define E100_STATS_LEN	ARRAY_SIZE(e100_gstrings_stats)
+
+static int e100_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return E100_TEST_LEN;
+	case ETH_SS_STATS:
+		return E100_STATS_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void e100_get_ethtool_stats(struct net_device *netdev,
+	struct ethtool_stats *stats, u64 *data)
+{
+	struct nic *nic = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < E100_NET_STATS_LEN; i++)
+		data[i] = ((unsigned long *)&netdev->stats)[i];
+
+	data[i++] = nic->tx_deferred;
+	data[i++] = nic->tx_single_collisions;
+	data[i++] = nic->tx_multiple_collisions;
+	data[i++] = nic->tx_fc_pause;
+	data[i++] = nic->rx_fc_pause;
+	data[i++] = nic->rx_fc_unsupported;
+	data[i++] = nic->tx_tco_frames;
+	data[i++] = nic->rx_tco_frames;
+	data[i++] = nic->rx_short_frame_errors;
+	data[i++] = nic->rx_over_length_errors;
+}
+
+static void e100_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, e100_gstrings_test, sizeof(e100_gstrings_test));
+		break;
+	case ETH_SS_STATS:
+		memcpy(data, e100_gstrings_stats, sizeof(e100_gstrings_stats));
+		break;
+	}
+}
+
+static const struct ethtool_ops e100_ethtool_ops = {
+	.get_drvinfo		= e100_get_drvinfo,
+	.get_regs_len		= e100_get_regs_len,
+	.get_regs		= e100_get_regs,
+	.get_wol		= e100_get_wol,
+	.set_wol		= e100_set_wol,
+	.get_msglevel		= e100_get_msglevel,
+	.set_msglevel		= e100_set_msglevel,
+	.nway_reset		= e100_nway_reset,
+	.get_link		= e100_get_link,
+	.get_eeprom_len		= e100_get_eeprom_len,
+	.get_eeprom		= e100_get_eeprom,
+	.set_eeprom		= e100_set_eeprom,
+	.get_ringparam		= e100_get_ringparam,
+	.set_ringparam		= e100_set_ringparam,
+	.self_test		= e100_diag_test,
+	.get_strings		= e100_get_strings,
+	.set_phys_id		= e100_set_phys_id,
+	.get_ethtool_stats	= e100_get_ethtool_stats,
+	.get_sset_count		= e100_get_sset_count,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_link_ksettings	= e100_get_link_ksettings,
+	.set_link_ksettings	= e100_set_link_ksettings,
+};
+
+static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct nic *nic = netdev_priv(netdev);
+
+	return generic_mii_ioctl(&nic->mii, if_mii(ifr), cmd, NULL);
+}
+
+static int e100_alloc(struct nic *nic)
+{
+	nic->mem = dma_alloc_coherent(&nic->pdev->dev, sizeof(struct mem),
+				      &nic->dma_addr, GFP_KERNEL);
+	return nic->mem ? 0 : -ENOMEM;
+}
+
+static void e100_free(struct nic *nic)
+{
+	if (nic->mem) {
+		dma_free_coherent(&nic->pdev->dev, sizeof(struct mem),
+				  nic->mem, nic->dma_addr);
+		nic->mem = NULL;
+	}
+}
+
+static int e100_open(struct net_device *netdev)
+{
+	struct nic *nic = netdev_priv(netdev);
+	int err = 0;
+
+	netif_carrier_off(netdev);
+	if ((err = e100_up(nic)))
+		netif_err(nic, ifup, nic->netdev, "Cannot open interface, aborting\n");
+	return err;
+}
+
+static int e100_close(struct net_device *netdev)
+{
+	e100_down(netdev_priv(netdev));
+	return 0;
+}
+
+static int e100_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	struct nic *nic = netdev_priv(netdev);
+	netdev_features_t changed = features ^ netdev->features;
+
+	if (!(changed & (NETIF_F_RXFCS | NETIF_F_RXALL)))
+		return 0;
+
+	netdev->features = features;
+	e100_exec_cb(nic, NULL, e100_configure);
+	return 1;
+}
+
+static const struct net_device_ops e100_netdev_ops = {
+	.ndo_open		= e100_open,
+	.ndo_stop		= e100_close,
+	.ndo_start_xmit		= e100_xmit_frame,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_rx_mode	= e100_set_multicast_list,
+	.ndo_set_mac_address	= e100_set_mac_address,
+	.ndo_eth_ioctl		= e100_do_ioctl,
+	.ndo_tx_timeout		= e100_tx_timeout,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= e100_netpoll,
+#endif
+	.ndo_set_features	= e100_set_features,
+};
+
+static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct nic *nic;
+	int err;
+
+	if (!(netdev = alloc_etherdev(sizeof(struct nic))))
+		return -ENOMEM;
+
+	netdev->hw_features |= NETIF_F_RXFCS;
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+	netdev->hw_features |= NETIF_F_RXALL;
+
+	netdev->netdev_ops = &e100_netdev_ops;
+	netdev->ethtool_ops = &e100_ethtool_ops;
+	netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+	nic = netdev_priv(netdev);
+	netif_napi_add_weight(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
+	nic->netdev = netdev;
+	nic->pdev = pdev;
+	nic->msg_enable = (1 << debug) - 1;
+	nic->mdio_ctrl = mdio_ctrl_hw;
+	pci_set_drvdata(pdev, netdev);
+
+	if ((err = pci_enable_device(pdev))) {
+		netif_err(nic, probe, nic->netdev, "Cannot enable PCI device, aborting\n");
+		goto err_out_free_dev;
+	}
+
+	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		netif_err(nic, probe, nic->netdev, "Cannot find proper PCI device base address, aborting\n");
+		err = -ENODEV;
+		goto err_out_disable_pdev;
+	}
+
+	if ((err = pci_request_regions(pdev, DRV_NAME))) {
+		netif_err(nic, probe, nic->netdev, "Cannot obtain PCI resources, aborting\n");
+		goto err_out_disable_pdev;
+	}
+
+	if ((err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))) {
+		netif_err(nic, probe, nic->netdev, "No usable DMA configuration, aborting\n");
+		goto err_out_free_res;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	if (use_io)
+		netif_info(nic, probe, nic->netdev, "using i/o access mode\n");
+
+	nic->csr = pci_iomap(pdev, (use_io ? 1 : 0), sizeof(struct csr));
+	if (!nic->csr) {
+		netif_err(nic, probe, nic->netdev, "Cannot map device registers, aborting\n");
+		err = -ENOMEM;
+		goto err_out_free_res;
+	}
+
+	if (ent->driver_data)
+		nic->flags |= ich;
+	else
+		nic->flags &= ~ich;
+
+	e100_get_defaults(nic);
+
+	/* D100 MAC doesn't allow rx of vlan packets with normal MTU */
+	if (nic->mac < mac_82558_D101_A4)
+		netdev->features |= NETIF_F_VLAN_CHALLENGED;
+
+	/* locks must be initialized before calling hw_reset */
+	spin_lock_init(&nic->cb_lock);
+	spin_lock_init(&nic->cmd_lock);
+	spin_lock_init(&nic->mdio_lock);
+
+	/* Reset the device before pci_set_master() in case device is in some
+	 * funky state and has an interrupt pending - hint: we don't have the
+	 * interrupt handler registered yet. */
+	e100_hw_reset(nic);
+
+	pci_set_master(pdev);
+
+	timer_setup(&nic->watchdog, e100_watchdog, 0);
+
+	INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
+
+	if ((err = e100_alloc(nic))) {
+		netif_err(nic, probe, nic->netdev, "Cannot alloc driver memory, aborting\n");
+		goto err_out_iounmap;
+	}
+
+	if ((err = e100_eeprom_load(nic)))
+		goto err_out_free;
+
+	e100_phy_init(nic);
+
+	eth_hw_addr_set(netdev, (u8 *)nic->eeprom);
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		if (!eeprom_bad_csum_allow) {
+			netif_err(nic, probe, nic->netdev, "Invalid MAC address from EEPROM, aborting\n");
+			err = -EAGAIN;
+			goto err_out_free;
+		} else {
+			netif_err(nic, probe, nic->netdev, "Invalid MAC address from EEPROM, you MUST configure one.\n");
+		}
+	}
+
+	/* Wol magic packet can be enabled from eeprom */
+	if ((nic->mac >= mac_82558_D101_A4) &&
+	   (le16_to_cpu(nic->eeprom[eeprom_id]) & eeprom_id_wol)) {
+		nic->flags |= wol_magic;
+		device_set_wakeup_enable(&pdev->dev, true);
+	}
+
+	/* ack any pending wake events, disable PME */
+	pci_pme_active(pdev, false);
+
+	strcpy(netdev->name, "eth%d");
+	if ((err = register_netdev(netdev))) {
+		netif_err(nic, probe, nic->netdev, "Cannot register net device, aborting\n");
+		goto err_out_free;
+	}
+	nic->cbs_pool = dma_pool_create(netdev->name,
+			   &nic->pdev->dev,
+			   nic->params.cbs.max * sizeof(struct cb),
+			   sizeof(u32),
+			   0);
+	if (!nic->cbs_pool) {
+		netif_err(nic, probe, nic->netdev, "Cannot create DMA pool, aborting\n");
+		err = -ENOMEM;
+		goto err_out_pool;
+	}
+	netif_info(nic, probe, nic->netdev,
+		   "addr 0x%llx, irq %d, MAC addr %pM\n",
+		   (unsigned long long)pci_resource_start(pdev, use_io ? 1 : 0),
+		   pdev->irq, netdev->dev_addr);
+
+	return 0;
+
+err_out_pool:
+	unregister_netdev(netdev);
+err_out_free:
+	e100_free(nic);
+err_out_iounmap:
+	pci_iounmap(pdev, nic->csr);
+err_out_free_res:
+	pci_release_regions(pdev);
+err_out_disable_pdev:
+	pci_disable_device(pdev);
+err_out_free_dev:
+	free_netdev(netdev);
+	return err;
+}
+
+static void e100_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+
+	if (netdev) {
+		struct nic *nic = netdev_priv(netdev);
+		unregister_netdev(netdev);
+		e100_free(nic);
+		pci_iounmap(pdev, nic->csr);
+		dma_pool_destroy(nic->cbs_pool);
+		free_netdev(netdev);
+		pci_release_regions(pdev);
+		pci_disable_device(pdev);
+	}
+}
+
+#define E100_82552_SMARTSPEED   0x14   /* SmartSpeed Ctrl register */
+#define E100_82552_REV_ANEG     0x0200 /* Reverse auto-negotiation */
+#define E100_82552_ANEG_NOW     0x0400 /* Auto-negotiate now */
+static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		e100_down(nic);
+
+	if ((nic->flags & wol_magic) | e100_asf(nic)) {
+		/* enable reverse auto-negotiation */
+		if (nic->phy == phy_82552_v) {
+			u16 smartspeed = mdio_read(netdev, nic->mii.phy_id,
+			                           E100_82552_SMARTSPEED);
+
+			mdio_write(netdev, nic->mii.phy_id,
+			           E100_82552_SMARTSPEED, smartspeed |
+			           E100_82552_REV_ANEG | E100_82552_ANEG_NOW);
+		}
+		*enable_wake = true;
+	} else {
+		*enable_wake = false;
+	}
+
+	pci_disable_device(pdev);
+}
+
+static int __e100_power_off(struct pci_dev *pdev, bool wake)
+{
+	if (wake)
+		return pci_prepare_to_sleep(pdev);
+
+	pci_wake_from_d3(pdev, false);
+	pci_set_power_state(pdev, PCI_D3hot);
+
+	return 0;
+}
+
+static int __maybe_unused e100_suspend(struct device *dev_d)
+{
+	bool wake;
+
+	__e100_shutdown(to_pci_dev(dev_d), &wake);
+
+	return 0;
+}
+
+static int __maybe_unused e100_resume(struct device *dev_d)
+{
+	struct net_device *netdev = dev_get_drvdata(dev_d);
+	struct nic *nic = netdev_priv(netdev);
+	int err;
+
+	err = pci_enable_device(to_pci_dev(dev_d));
+	if (err) {
+		netdev_err(netdev, "Resume cannot enable PCI device, aborting\n");
+		return err;
+	}
+	pci_set_master(to_pci_dev(dev_d));
+
+	/* disable reverse auto-negotiation */
+	if (nic->phy == phy_82552_v) {
+		u16 smartspeed = mdio_read(netdev, nic->mii.phy_id,
+		                           E100_82552_SMARTSPEED);
+
+		mdio_write(netdev, nic->mii.phy_id,
+		           E100_82552_SMARTSPEED,
+		           smartspeed & ~(E100_82552_REV_ANEG));
+	}
+
+	if (netif_running(netdev))
+		e100_up(nic);
+
+	netif_device_attach(netdev);
+
+	return 0;
+}
+
+static void e100_shutdown(struct pci_dev *pdev)
+{
+	bool wake;
+	__e100_shutdown(pdev, &wake);
+	if (system_state == SYSTEM_POWER_OFF)
+		__e100_power_off(pdev, wake);
+}
+
+/* ------------------ PCI Error Recovery infrastructure  -------------- */
+/**
+ * e100_io_error_detected - called when PCI error is detected.
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ */
+static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (netif_running(netdev))
+		e100_down(nic);
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * e100_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t e100_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	if (pci_enable_device(pdev)) {
+		pr_err("Cannot re-enable PCI device after reset\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	pci_set_master(pdev);
+
+	/* Only one device per card can do a reset */
+	if (0 != PCI_FUNC(pdev->devfn))
+		return PCI_ERS_RESULT_RECOVERED;
+	e100_hw_reset(nic);
+	e100_phy_init(nic);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * e100_io_resume - resume normal operations
+ * @pdev: Pointer to PCI device
+ *
+ * Resume normal operations after an error recovery
+ * sequence has been completed.
+ */
+static void e100_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
+
+	/* ack any pending wake events, disable PME */
+	pci_enable_wake(pdev, PCI_D0, 0);
+
+	netif_device_attach(netdev);
+	if (netif_running(netdev)) {
+		e100_open(netdev);
+		mod_timer(&nic->watchdog, jiffies);
+	}
+}
+
+static const struct pci_error_handlers e100_err_handler = {
+	.error_detected = e100_io_error_detected,
+	.slot_reset = e100_io_slot_reset,
+	.resume = e100_io_resume,
+};
+
+static SIMPLE_DEV_PM_OPS(e100_pm_ops, e100_suspend, e100_resume);
+
+static struct pci_driver e100_driver = {
+	.name =         DRV_NAME,
+	.id_table =     e100_id_table,
+	.probe =        e100_probe,
+	.remove =       e100_remove,
+
+	/* Power Management hooks */
+	.driver.pm =	&e100_pm_ops,
+
+	.shutdown =     e100_shutdown,
+	.err_handler = &e100_err_handler,
+};
+
+static int __init e100_init_module(void)
+{
+	if (((1 << debug) - 1) & NETIF_MSG_DRV) {
+		pr_info("%s\n", DRV_DESCRIPTION);
+		pr_info("%s\n", DRV_COPYRIGHT);
+	}
+	return pci_register_driver(&e100_driver);
+}
+
+static void __exit e100_cleanup_module(void)
+{
+	pci_unregister_driver(&e100_driver);
+}
+
+module_init(e100_init_module);
+module_exit(e100_cleanup_module);
diff --git a/drivers/net/ethernet/intel/e1000/Makefile b/drivers/net/ethernet/intel/e1000/Makefile
new file mode 100644
index 0000000000..314c52d44b
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 1999 - 2006 Intel Corporation.
+
+#
+# Makefile for the Intel(R) PRO/1000 ethernet driver
+#
+
+obj-$(CONFIG_E1000) += e1000.o
+
+e1000-objs := e1000_main.o e1000_hw.o e1000_ethtool.o e1000_param.o
diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h
new file mode 100644
index 0000000000..75f3fd1d8d
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/e1000.h
@@ -0,0 +1,351 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _E1000_H_
+#define _E1000_H_
+
+#include <linux/stddef.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/dma-mapping.h>
+#include <linux/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <linux/capability.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <net/pkt_sched.h>
+#include <linux/list.h>
+#include <linux/reboot.h>
+#include <net/checksum.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+
+#define BAR_0		0
+#define BAR_1		1
+
+#define INTEL_E1000_ETHERNET_DEVICE(device_id) {\
+	PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
+
+struct e1000_adapter;
+
+#include "e1000_hw.h"
+
+#define E1000_MAX_INTR			10
+
+/*
+ * Count for polling __E1000_RESET condition every 10-20msec.
+ */
+#define E1000_CHECK_RESET_COUNT	50
+
+/* TX/RX descriptor defines */
+#define E1000_DEFAULT_TXD		256
+#define E1000_MAX_TXD			256
+#define E1000_MIN_TXD			48
+#define E1000_MAX_82544_TXD		4096
+
+#define E1000_DEFAULT_RXD		256
+#define E1000_MAX_RXD			256
+#define E1000_MIN_RXD			48
+#define E1000_MAX_82544_RXD		4096
+
+#define E1000_MIN_ITR_USECS		10 /* 100000 irq/sec */
+#define E1000_MAX_ITR_USECS		10000 /* 100    irq/sec */
+
+/* this is the size past which hardware will drop packets when setting LPE=0 */
+#define MAXIMUM_ETHERNET_VLAN_SIZE	1522
+
+/* Supported Rx Buffer Sizes */
+#define E1000_RXBUFFER_128		128    /* Used for packet split */
+#define E1000_RXBUFFER_256		256    /* Used for packet split */
+#define E1000_RXBUFFER_512		512
+#define E1000_RXBUFFER_1024		1024
+#define E1000_RXBUFFER_2048		2048
+#define E1000_RXBUFFER_4096		4096
+#define E1000_RXBUFFER_8192		8192
+#define E1000_RXBUFFER_16384		16384
+
+/* SmartSpeed delimiters */
+#define E1000_SMARTSPEED_DOWNSHIFT	3
+#define E1000_SMARTSPEED_MAX		15
+
+/* Packet Buffer allocations */
+#define E1000_PBA_BYTES_SHIFT		0xA
+#define E1000_TX_HEAD_ADDR_SHIFT	7
+#define E1000_PBA_TX_MASK		0xFFFF0000
+
+/* Flow Control Watermarks */
+#define E1000_FC_HIGH_DIFF	0x1638 /* High: 5688 bytes below Rx FIFO size */
+#define E1000_FC_LOW_DIFF	0x1640 /* Low:  5696 bytes below Rx FIFO size */
+
+#define E1000_FC_PAUSE_TIME	0xFFFF /* pause for the max or until send xon */
+
+/* How many Tx Descriptors do we need to call netif_wake_queue ? */
+#define E1000_TX_QUEUE_WAKE	16
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define E1000_RX_BUFFER_WRITE	16 /* Must be power of 2 */
+
+#define AUTO_ALL_MODES		0
+#define E1000_EEPROM_82544_APM	0x0004
+#define E1000_EEPROM_APME	0x0400
+
+#ifndef E1000_MASTER_SLAVE
+/* Switch to override PHY master/slave setting */
+#define E1000_MASTER_SLAVE	e1000_ms_hw_default
+#endif
+
+#define E1000_MNG_VLAN_NONE	(-1)
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct e1000_tx_buffer {
+	struct sk_buff *skb;
+	dma_addr_t dma;
+	unsigned long time_stamp;
+	u16 length;
+	u16 next_to_watch;
+	bool mapped_as_page;
+	unsigned short segs;
+	unsigned int bytecount;
+};
+
+struct e1000_rx_buffer {
+	union {
+		struct page *page; /* jumbo: alloc_page */
+		u8 *data; /* else, netdev_alloc_frag */
+	} rxbuf;
+	dma_addr_t dma;
+};
+
+struct e1000_tx_ring {
+	/* pointer to the descriptor ring memory */
+	void *desc;
+	/* physical address of the descriptor ring */
+	dma_addr_t dma;
+	/* length of descriptor ring in bytes */
+	unsigned int size;
+	/* number of descriptors in the ring */
+	unsigned int count;
+	/* next descriptor to associate a buffer with */
+	unsigned int next_to_use;
+	/* next descriptor to check for DD status bit */
+	unsigned int next_to_clean;
+	/* array of buffer information structs */
+	struct e1000_tx_buffer *buffer_info;
+
+	u16 tdh;
+	u16 tdt;
+	bool last_tx_tso;
+};
+
+struct e1000_rx_ring {
+	/* pointer to the descriptor ring memory */
+	void *desc;
+	/* physical address of the descriptor ring */
+	dma_addr_t dma;
+	/* length of descriptor ring in bytes */
+	unsigned int size;
+	/* number of descriptors in the ring */
+	unsigned int count;
+	/* next descriptor to associate a buffer with */
+	unsigned int next_to_use;
+	/* next descriptor to check for DD status bit */
+	unsigned int next_to_clean;
+	/* array of buffer information structs */
+	struct e1000_rx_buffer *buffer_info;
+	struct sk_buff *rx_skb_top;
+
+	/* cpu for rx queue */
+	int cpu;
+
+	u16 rdh;
+	u16 rdt;
+};
+
+#define E1000_DESC_UNUSED(R)						\
+({									\
+	unsigned int clean = smp_load_acquire(&(R)->next_to_clean);	\
+	unsigned int use = READ_ONCE((R)->next_to_use);			\
+	(clean > use ? 0 : (R)->count) + clean - use - 1;		\
+})
+
+#define E1000_RX_DESC_EXT(R, i)						\
+	(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
+#define E1000_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
+#define E1000_RX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_rx_desc)
+#define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
+#define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
+
+/* board specific private data structure */
+
+struct e1000_adapter {
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	u16 mng_vlan_id;
+	u32 bd_number;
+	u32 rx_buffer_len;
+	u32 wol;
+	u32 smartspeed;
+	u32 en_mng_pt;
+	u16 link_speed;
+	u16 link_duplex;
+	spinlock_t stats_lock;
+	unsigned int total_tx_bytes;
+	unsigned int total_tx_packets;
+	unsigned int total_rx_bytes;
+	unsigned int total_rx_packets;
+	/* Interrupt Throttle Rate */
+	u32 itr;
+	u32 itr_setting;
+	u16 tx_itr;
+	u16 rx_itr;
+
+	u8 fc_autoneg;
+
+	/* TX */
+	struct e1000_tx_ring *tx_ring;      /* One per active queue */
+	unsigned int restart_queue;
+	u32 txd_cmd;
+	u32 tx_int_delay;
+	u32 tx_abs_int_delay;
+	u32 gotcl;
+	u64 gotcl_old;
+	u64 tpt_old;
+	u64 colc_old;
+	u32 tx_timeout_count;
+	u32 tx_fifo_head;
+	u32 tx_head_addr;
+	u32 tx_fifo_size;
+	u8  tx_timeout_factor;
+	atomic_t tx_fifo_stall;
+	bool pcix_82544;
+	bool detect_tx_hung;
+	bool dump_buffers;
+
+	/* RX */
+	bool (*clean_rx)(struct e1000_adapter *adapter,
+			 struct e1000_rx_ring *rx_ring,
+			 int *work_done, int work_to_do);
+	void (*alloc_rx_buf)(struct e1000_adapter *adapter,
+			     struct e1000_rx_ring *rx_ring,
+			     int cleaned_count);
+	struct e1000_rx_ring *rx_ring;      /* One per active queue */
+	struct napi_struct napi;
+
+	int num_tx_queues;
+	int num_rx_queues;
+
+	u64 hw_csum_err;
+	u64 hw_csum_good;
+	u32 alloc_rx_buff_failed;
+	u32 rx_int_delay;
+	u32 rx_abs_int_delay;
+	bool rx_csum;
+	u32 gorcl;
+	u64 gorcl_old;
+
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	/* structs defined in e1000_hw.h */
+	struct e1000_hw hw;
+	struct e1000_hw_stats stats;
+	struct e1000_phy_info phy_info;
+	struct e1000_phy_stats phy_stats;
+
+	u32 test_icr;
+	struct e1000_tx_ring test_tx_ring;
+	struct e1000_rx_ring test_rx_ring;
+
+	int msg_enable;
+
+	/* to not mess up cache alignment, always add to the bottom */
+	bool tso_force;
+	bool smart_power_down;	/* phy smart power down */
+	bool quad_port_a;
+	unsigned long flags;
+	u32 eeprom_wol;
+
+	/* for ioport free */
+	int bars;
+	int need_ioport;
+
+	bool discarding;
+
+	struct work_struct reset_task;
+	struct delayed_work watchdog_task;
+	struct delayed_work fifo_stall_task;
+	struct delayed_work phy_info_task;
+};
+
+enum e1000_state_t {
+	__E1000_TESTING,
+	__E1000_RESETTING,
+	__E1000_DOWN,
+	__E1000_DISABLED
+};
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+struct net_device *e1000_get_hw_dev(struct e1000_hw *hw);
+#define e_dbg(format, arg...) \
+	netdev_dbg(e1000_get_hw_dev(hw), format, ## arg)
+#define e_err(msglvl, format, arg...) \
+	netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_info(msglvl, format, arg...) \
+	netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_warn(msglvl, format, arg...) \
+	netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_notice(msglvl, format, arg...) \
+	netif_notice(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_dev_info(format, arg...) \
+	dev_info(&adapter->pdev->dev, format, ## arg)
+#define e_dev_warn(format, arg...) \
+	dev_warn(&adapter->pdev->dev, format, ## arg)
+#define e_dev_err(format, arg...) \
+	dev_err(&adapter->pdev->dev, format, ## arg)
+
+extern char e1000_driver_name[];
+
+int e1000_open(struct net_device *netdev);
+int e1000_close(struct net_device *netdev);
+int e1000_up(struct e1000_adapter *adapter);
+void e1000_down(struct e1000_adapter *adapter);
+void e1000_reinit_locked(struct e1000_adapter *adapter);
+void e1000_reset(struct e1000_adapter *adapter);
+int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx);
+int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
+int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
+void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
+void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
+void e1000_update_stats(struct e1000_adapter *adapter);
+bool e1000_has_link(struct e1000_adapter *adapter);
+void e1000_power_up_phy(struct e1000_adapter *);
+void e1000_set_ethtool_ops(struct net_device *netdev);
+void e1000_check_options(struct e1000_adapter *adapter);
+
+#endif /* _E1000_H_ */
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
new file mode 100644
index 0000000000..d06d29c6c0
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -0,0 +1,1893 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+/* ethtool support for e1000 */
+
+#include "e1000.h"
+#include <linux/jiffies.h>
+#include <linux/uaccess.h>
+
+enum {NETDEV_STATS, E1000_STATS};
+
+struct e1000_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int type;
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define E1000_STAT(m)		E1000_STATS, \
+				sizeof(((struct e1000_adapter *)0)->m), \
+				offsetof(struct e1000_adapter, m)
+#define E1000_NETDEV_STAT(m)	NETDEV_STATS, \
+				sizeof(((struct net_device *)0)->m), \
+				offsetof(struct net_device, m)
+
+static const struct e1000_stats e1000_gstrings_stats[] = {
+	{ "rx_packets", E1000_STAT(stats.gprc) },
+	{ "tx_packets", E1000_STAT(stats.gptc) },
+	{ "rx_bytes", E1000_STAT(stats.gorcl) },
+	{ "tx_bytes", E1000_STAT(stats.gotcl) },
+	{ "rx_broadcast", E1000_STAT(stats.bprc) },
+	{ "tx_broadcast", E1000_STAT(stats.bptc) },
+	{ "rx_multicast", E1000_STAT(stats.mprc) },
+	{ "tx_multicast", E1000_STAT(stats.mptc) },
+	{ "rx_errors", E1000_STAT(stats.rxerrc) },
+	{ "tx_errors", E1000_STAT(stats.txerrc) },
+	{ "tx_dropped", E1000_NETDEV_STAT(stats.tx_dropped) },
+	{ "multicast", E1000_STAT(stats.mprc) },
+	{ "collisions", E1000_STAT(stats.colc) },
+	{ "rx_length_errors", E1000_STAT(stats.rlerrc) },
+	{ "rx_over_errors", E1000_NETDEV_STAT(stats.rx_over_errors) },
+	{ "rx_crc_errors", E1000_STAT(stats.crcerrs) },
+	{ "rx_frame_errors", E1000_NETDEV_STAT(stats.rx_frame_errors) },
+	{ "rx_no_buffer_count", E1000_STAT(stats.rnbc) },
+	{ "rx_missed_errors", E1000_STAT(stats.mpc) },
+	{ "tx_aborted_errors", E1000_STAT(stats.ecol) },
+	{ "tx_carrier_errors", E1000_STAT(stats.tncrs) },
+	{ "tx_fifo_errors", E1000_NETDEV_STAT(stats.tx_fifo_errors) },
+	{ "tx_heartbeat_errors", E1000_NETDEV_STAT(stats.tx_heartbeat_errors) },
+	{ "tx_window_errors", E1000_STAT(stats.latecol) },
+	{ "tx_abort_late_coll", E1000_STAT(stats.latecol) },
+	{ "tx_deferred_ok", E1000_STAT(stats.dc) },
+	{ "tx_single_coll_ok", E1000_STAT(stats.scc) },
+	{ "tx_multi_coll_ok", E1000_STAT(stats.mcc) },
+	{ "tx_timeout_count", E1000_STAT(tx_timeout_count) },
+	{ "tx_restart_queue", E1000_STAT(restart_queue) },
+	{ "rx_long_length_errors", E1000_STAT(stats.roc) },
+	{ "rx_short_length_errors", E1000_STAT(stats.ruc) },
+	{ "rx_align_errors", E1000_STAT(stats.algnerrc) },
+	{ "tx_tcp_seg_good", E1000_STAT(stats.tsctc) },
+	{ "tx_tcp_seg_failed", E1000_STAT(stats.tsctfc) },
+	{ "rx_flow_control_xon", E1000_STAT(stats.xonrxc) },
+	{ "rx_flow_control_xoff", E1000_STAT(stats.xoffrxc) },
+	{ "tx_flow_control_xon", E1000_STAT(stats.xontxc) },
+	{ "tx_flow_control_xoff", E1000_STAT(stats.xofftxc) },
+	{ "rx_long_byte_count", E1000_STAT(stats.gorcl) },
+	{ "rx_csum_offload_good", E1000_STAT(hw_csum_good) },
+	{ "rx_csum_offload_errors", E1000_STAT(hw_csum_err) },
+	{ "alloc_rx_buff_failed", E1000_STAT(alloc_rx_buff_failed) },
+	{ "tx_smbus", E1000_STAT(stats.mgptc) },
+	{ "rx_smbus", E1000_STAT(stats.mgprc) },
+	{ "dropped_smbus", E1000_STAT(stats.mgpdc) },
+};
+
+#define E1000_QUEUE_STATS_LEN 0
+#define E1000_GLOBAL_STATS_LEN ARRAY_SIZE(e1000_gstrings_stats)
+#define E1000_STATS_LEN (E1000_GLOBAL_STATS_LEN + E1000_QUEUE_STATS_LEN)
+static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)", "Eeprom test    (offline)",
+	"Interrupt test (offline)", "Loopback test  (offline)",
+	"Link test   (on/offline)"
+};
+
+#define E1000_TEST_LEN	ARRAY_SIZE(e1000_gstrings_test)
+
+static int e1000_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 supported, advertising;
+
+	if (hw->media_type == e1000_media_type_copper) {
+		supported = (SUPPORTED_10baseT_Half |
+			     SUPPORTED_10baseT_Full |
+			     SUPPORTED_100baseT_Half |
+			     SUPPORTED_100baseT_Full |
+			     SUPPORTED_1000baseT_Full|
+			     SUPPORTED_Autoneg |
+			     SUPPORTED_TP);
+		advertising = ADVERTISED_TP;
+
+		if (hw->autoneg == 1) {
+			advertising |= ADVERTISED_Autoneg;
+			/* the e1000 autoneg seems to match ethtool nicely */
+			advertising |= hw->autoneg_advertised;
+		}
+
+		cmd->base.port = PORT_TP;
+		cmd->base.phy_address = hw->phy_addr;
+	} else {
+		supported   = (SUPPORTED_1000baseT_Full |
+			       SUPPORTED_FIBRE |
+			       SUPPORTED_Autoneg);
+
+		advertising = (ADVERTISED_1000baseT_Full |
+			       ADVERTISED_FIBRE |
+			       ADVERTISED_Autoneg);
+
+		cmd->base.port = PORT_FIBRE;
+	}
+
+	if (er32(STATUS) & E1000_STATUS_LU) {
+		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
+					   &adapter->link_duplex);
+		cmd->base.speed = adapter->link_speed;
+
+		/* unfortunately FULL_DUPLEX != DUPLEX_FULL
+		 * and HALF_DUPLEX != DUPLEX_HALF
+		 */
+		if (adapter->link_duplex == FULL_DUPLEX)
+			cmd->base.duplex = DUPLEX_FULL;
+		else
+			cmd->base.duplex = DUPLEX_HALF;
+	} else {
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+
+	cmd->base.autoneg = ((hw->media_type == e1000_media_type_fiber) ||
+			 hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	/* MDI-X => 1; MDI => 0 */
+	if ((hw->media_type == e1000_media_type_copper) &&
+	    netif_carrier_ok(netdev))
+		cmd->base.eth_tp_mdix = (!!adapter->phy_info.mdix_mode ?
+				     ETH_TP_MDI_X : ETH_TP_MDI);
+	else
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->mdix == AUTO_ALL_MODES)
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		cmd->base.eth_tp_mdix_ctrl = hw->mdix;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
+	return 0;
+}
+
+static int e1000_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
+	/* MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		if (hw->media_type != e1000_media_type_copper)
+			return -EOPNOTSUPP;
+
+		if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+		    (cmd->base.autoneg != AUTONEG_ENABLE)) {
+			e_err(drv, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
+		msleep(1);
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		hw->autoneg = 1;
+		if (hw->media_type == e1000_media_type_fiber)
+			hw->autoneg_advertised = ADVERTISED_1000baseT_Full |
+						 ADVERTISED_FIBRE |
+						 ADVERTISED_Autoneg;
+		else
+			hw->autoneg_advertised = advertising |
+						 ADVERTISED_TP |
+						 ADVERTISED_Autoneg;
+	} else {
+		u32 speed = cmd->base.speed;
+		/* calling this overrides forced MDI setting */
+		if (e1000_set_spd_dplx(adapter, speed, cmd->base.duplex)) {
+			clear_bit(__E1000_RESETTING, &adapter->flags);
+			return -EINVAL;
+		}
+	}
+
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->mdix = AUTO_ALL_MODES;
+		else
+			hw->mdix = cmd->base.eth_tp_mdix_ctrl;
+	}
+
+	/* reset the link */
+
+	if (netif_running(adapter->netdev)) {
+		e1000_down(adapter);
+		e1000_up(adapter);
+	} else {
+		e1000_reset(adapter);
+	}
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+	return 0;
+}
+
+static u32 e1000_get_link(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	/* If the link is not reported up to netdev, interrupts are disabled,
+	 * and so the physical link state may have changed since we last
+	 * looked. Set get_link_status to make sure that the true link
+	 * state is interrogated, rather than pulling a cached and possibly
+	 * stale link state from the driver.
+	 */
+	if (!netif_carrier_ok(netdev))
+		adapter->hw.get_link_status = 1;
+
+	return e1000_has_link(adapter);
+}
+
+static void e1000_get_pauseparam(struct net_device *netdev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	pause->autoneg =
+		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (hw->fc == E1000_FC_RX_PAUSE) {
+		pause->rx_pause = 1;
+	} else if (hw->fc == E1000_FC_TX_PAUSE) {
+		pause->tx_pause = 1;
+	} else if (hw->fc == E1000_FC_FULL) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int e1000_set_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int retval = 0;
+
+	adapter->fc_autoneg = pause->autoneg;
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
+		msleep(1);
+
+	if (pause->rx_pause && pause->tx_pause)
+		hw->fc = E1000_FC_FULL;
+	else if (pause->rx_pause && !pause->tx_pause)
+		hw->fc = E1000_FC_RX_PAUSE;
+	else if (!pause->rx_pause && pause->tx_pause)
+		hw->fc = E1000_FC_TX_PAUSE;
+	else if (!pause->rx_pause && !pause->tx_pause)
+		hw->fc = E1000_FC_NONE;
+
+	hw->original_fc = hw->fc;
+
+	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+		if (netif_running(adapter->netdev)) {
+			e1000_down(adapter);
+			e1000_up(adapter);
+		} else {
+			e1000_reset(adapter);
+		}
+	} else
+		retval = ((hw->media_type == e1000_media_type_fiber) ?
+			  e1000_setup_link(hw) : e1000_force_mac_fc(hw));
+
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+	return retval;
+}
+
+static u32 e1000_get_msglevel(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void e1000_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = data;
+}
+
+static int e1000_get_regs_len(struct net_device *netdev)
+{
+#define E1000_REGS_LEN 32
+	return E1000_REGS_LEN * sizeof(u32);
+}
+
+static void e1000_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+			   void *p)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u16 phy_data;
+
+	memset(p, 0, E1000_REGS_LEN * sizeof(u32));
+
+	regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	regs_buff[0]  = er32(CTRL);
+	regs_buff[1]  = er32(STATUS);
+
+	regs_buff[2]  = er32(RCTL);
+	regs_buff[3]  = er32(RDLEN);
+	regs_buff[4]  = er32(RDH);
+	regs_buff[5]  = er32(RDT);
+	regs_buff[6]  = er32(RDTR);
+
+	regs_buff[7]  = er32(TCTL);
+	regs_buff[8]  = er32(TDLEN);
+	regs_buff[9]  = er32(TDH);
+	regs_buff[10] = er32(TDT);
+	regs_buff[11] = er32(TIDV);
+
+	regs_buff[12] = hw->phy_type;  /* PHY type (IGP=1, M88=0) */
+	if (hw->phy_type == e1000_phy_igp) {
+		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
+				    IGP01E1000_PHY_AGC_A);
+		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_A &
+				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
+		regs_buff[13] = (u32)phy_data; /* cable length */
+		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
+				    IGP01E1000_PHY_AGC_B);
+		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_B &
+				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
+		regs_buff[14] = (u32)phy_data; /* cable length */
+		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
+				    IGP01E1000_PHY_AGC_C);
+		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_C &
+				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
+		regs_buff[15] = (u32)phy_data; /* cable length */
+		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
+				    IGP01E1000_PHY_AGC_D);
+		e1000_read_phy_reg(hw, IGP01E1000_PHY_AGC_D &
+				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
+		regs_buff[16] = (u32)phy_data; /* cable length */
+		regs_buff[17] = 0; /* extended 10bt distance (not needed) */
+		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0);
+		e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS &
+				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
+		regs_buff[18] = (u32)phy_data; /* cable polarity */
+		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT,
+				    IGP01E1000_PHY_PCS_INIT_REG);
+		e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG &
+				   IGP01E1000_PHY_PAGE_SELECT, &phy_data);
+		regs_buff[19] = (u32)phy_data; /* cable polarity */
+		regs_buff[20] = 0; /* polarity correction enabled (always) */
+		regs_buff[22] = 0; /* phy receive errors (unavailable) */
+		regs_buff[23] = regs_buff[18]; /* mdix mode */
+		e1000_write_phy_reg(hw, IGP01E1000_PHY_PAGE_SELECT, 0x0);
+	} else {
+		e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+		regs_buff[13] = (u32)phy_data; /* cable length */
+		regs_buff[14] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		regs_buff[15] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		regs_buff[16] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+		regs_buff[17] = (u32)phy_data; /* extended 10bt distance */
+		regs_buff[18] = regs_buff[13]; /* cable polarity */
+		regs_buff[19] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		regs_buff[20] = regs_buff[17]; /* polarity correction */
+		/* phy receive errors */
+		regs_buff[22] = adapter->phy_stats.receive_errors;
+		regs_buff[23] = regs_buff[13]; /* mdix mode */
+	}
+	regs_buff[21] = adapter->phy_stats.idle_errors;  /* phy idle errors */
+	e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
+	regs_buff[24] = (u32)phy_data;  /* phy local receiver status */
+	regs_buff[25] = regs_buff[24];  /* phy remote receiver status */
+	if (hw->mac_type >= e1000_82540 &&
+	    hw->media_type == e1000_media_type_copper) {
+		regs_buff[26] = er32(MANC);
+	}
+}
+
+static int e1000_get_eeprom_len(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	return hw->eeprom.word_size * 2;
+}
+
+static int e1000_get_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	int first_word, last_word;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				    GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	if (hw->eeprom.type == e1000_eeprom_spi)
+		ret_val = e1000_read_eeprom(hw, first_word,
+					    last_word - first_word + 1,
+					    eeprom_buff);
+	else {
+		for (i = 0; i < last_word - first_word + 1; i++) {
+			ret_val = e1000_read_eeprom(hw, first_word + i, 1,
+						    &eeprom_buff[i]);
+			if (ret_val)
+				break;
+		}
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
+	       eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int e1000_set_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EFAULT;
+
+	max_len = hw->eeprom.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = e1000_read_eeprom(hw, first_word, 1,
+					    &eeprom_buff[0]);
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
+		/* need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = e1000_read_eeprom(hw, last_word, 1,
+					    &eeprom_buff[last_word - first_word]);
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = e1000_write_eeprom(hw, first_word,
+				     last_word - first_word + 1, eeprom_buff);
+
+	/* Update the checksum over the first part of the EEPROM if needed */
+	if ((ret_val == 0) && (first_word <= EEPROM_CHECKSUM_REG))
+		e1000_update_eeprom_checksum(hw);
+
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void e1000_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	strscpy(drvinfo->driver,  e1000_driver_name,
+		sizeof(drvinfo->driver));
+
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+}
+
+static void e1000_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring,
+				struct kernel_ethtool_ringparam *kernel_ring,
+				struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	e1000_mac_type mac_type = hw->mac_type;
+	struct e1000_tx_ring *txdr = adapter->tx_ring;
+	struct e1000_rx_ring *rxdr = adapter->rx_ring;
+
+	ring->rx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_RXD :
+		E1000_MAX_82544_RXD;
+	ring->tx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_TXD :
+		E1000_MAX_82544_TXD;
+	ring->rx_pending = rxdr->count;
+	ring->tx_pending = txdr->count;
+}
+
+static int e1000_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	e1000_mac_type mac_type = hw->mac_type;
+	struct e1000_tx_ring *txdr, *tx_old;
+	struct e1000_rx_ring *rxdr, *rx_old;
+	int i, err;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
+		msleep(1);
+
+	if (netif_running(adapter->netdev))
+		e1000_down(adapter);
+
+	tx_old = adapter->tx_ring;
+	rx_old = adapter->rx_ring;
+
+	err = -ENOMEM;
+	txdr = kcalloc(adapter->num_tx_queues, sizeof(struct e1000_tx_ring),
+		       GFP_KERNEL);
+	if (!txdr)
+		goto err_alloc_tx;
+
+	rxdr = kcalloc(adapter->num_rx_queues, sizeof(struct e1000_rx_ring),
+		       GFP_KERNEL);
+	if (!rxdr)
+		goto err_alloc_rx;
+
+	adapter->tx_ring = txdr;
+	adapter->rx_ring = rxdr;
+
+	rxdr->count = max(ring->rx_pending, (u32)E1000_MIN_RXD);
+	rxdr->count = min(rxdr->count, (u32)(mac_type < e1000_82544 ?
+			  E1000_MAX_RXD : E1000_MAX_82544_RXD));
+	rxdr->count = ALIGN(rxdr->count, REQ_RX_DESCRIPTOR_MULTIPLE);
+	txdr->count = max(ring->tx_pending, (u32)E1000_MIN_TXD);
+	txdr->count = min(txdr->count, (u32)(mac_type < e1000_82544 ?
+			  E1000_MAX_TXD : E1000_MAX_82544_TXD));
+	txdr->count = ALIGN(txdr->count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		txdr[i].count = txdr->count;
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		rxdr[i].count = rxdr->count;
+
+	err = 0;
+	if (netif_running(adapter->netdev)) {
+		/* Try to get new resources before deleting old */
+		err = e1000_setup_all_rx_resources(adapter);
+		if (err)
+			goto err_setup_rx;
+		err = e1000_setup_all_tx_resources(adapter);
+		if (err)
+			goto err_setup_tx;
+
+		/* save the new, restore the old in order to free it,
+		 * then restore the new back again
+		 */
+
+		adapter->rx_ring = rx_old;
+		adapter->tx_ring = tx_old;
+		e1000_free_all_rx_resources(adapter);
+		e1000_free_all_tx_resources(adapter);
+		adapter->rx_ring = rxdr;
+		adapter->tx_ring = txdr;
+		err = e1000_up(adapter);
+	}
+	kfree(tx_old);
+	kfree(rx_old);
+
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+	return err;
+
+err_setup_tx:
+	e1000_free_all_rx_resources(adapter);
+err_setup_rx:
+	adapter->rx_ring = rx_old;
+	adapter->tx_ring = tx_old;
+	kfree(rxdr);
+err_alloc_rx:
+	kfree(txdr);
+err_alloc_tx:
+	if (netif_running(adapter->netdev))
+		e1000_up(adapter);
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+	return err;
+}
+
+static bool reg_pattern_test(struct e1000_adapter *adapter, u64 *data, int reg,
+			     u32 mask, u32 write)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	static const u32 test[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+	};
+	u8 __iomem *address = hw->hw_addr + reg;
+	u32 read;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test); i++) {
+		writel(write & test[i], address);
+		read = readl(address);
+		if (read != (write & test[i] & mask)) {
+			e_err(drv, "pattern test reg %04X failed: "
+			      "got 0x%08X expected 0x%08X\n",
+			      reg, read, (write & test[i] & mask));
+			*data = reg;
+			return true;
+		}
+	}
+	return false;
+}
+
+static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data, int reg,
+			      u32 mask, u32 write)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 __iomem *address = hw->hw_addr + reg;
+	u32 read;
+
+	writel(write & mask, address);
+	read = readl(address);
+	if ((read & mask) != (write & mask)) {
+		e_err(drv, "set/check reg %04X test failed: "
+		      "got 0x%08X expected 0x%08X\n",
+		      reg, (read & mask), (write & mask));
+		*data = reg;
+		return true;
+	}
+	return false;
+}
+
+#define REG_PATTERN_TEST(reg, mask, write)			     \
+	do {							     \
+		if (reg_pattern_test(adapter, data,		     \
+			     (hw->mac_type >= e1000_82543)   \
+			     ? E1000_##reg : E1000_82542_##reg,	     \
+			     mask, write))			     \
+			return 1;				     \
+	} while (0)
+
+#define REG_SET_AND_CHECK(reg, mask, write)			     \
+	do {							     \
+		if (reg_set_and_check(adapter, data,		     \
+			      (hw->mac_type >= e1000_82543)  \
+			      ? E1000_##reg : E1000_82542_##reg,     \
+			      mask, write))			     \
+			return 1;				     \
+	} while (0)
+
+static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
+{
+	u32 value, before, after;
+	u32 i, toggle;
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* The status register is Read Only, so a write should fail.
+	 * Some bits that get toggled are ignored.
+	 */
+
+	/* there are several bits on newer hardware that are r/w */
+	toggle = 0xFFFFF833;
+
+	before = er32(STATUS);
+	value = (er32(STATUS) & toggle);
+	ew32(STATUS, toggle);
+	after = er32(STATUS) & toggle;
+	if (value != after) {
+		e_err(drv, "failed STATUS register test got: "
+		      "0x%08X expected: 0x%08X\n", after, value);
+		*data = 1;
+		return 1;
+	}
+	/* restore previous status */
+	ew32(STATUS, before);
+
+	REG_PATTERN_TEST(FCAL, 0xFFFFFFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(FCAH, 0x0000FFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(FCT, 0x0000FFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(VET, 0x0000FFFF, 0xFFFFFFFF);
+
+	REG_PATTERN_TEST(RDTR, 0x0000FFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(RDBAH, 0xFFFFFFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(RDLEN, 0x000FFF80, 0x000FFFFF);
+	REG_PATTERN_TEST(RDH, 0x0000FFFF, 0x0000FFFF);
+	REG_PATTERN_TEST(RDT, 0x0000FFFF, 0x0000FFFF);
+	REG_PATTERN_TEST(FCRTH, 0x0000FFF8, 0x0000FFF8);
+	REG_PATTERN_TEST(FCTTV, 0x0000FFFF, 0x0000FFFF);
+	REG_PATTERN_TEST(TIPG, 0x3FFFFFFF, 0x3FFFFFFF);
+	REG_PATTERN_TEST(TDBAH, 0xFFFFFFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(TDLEN, 0x000FFF80, 0x000FFFFF);
+
+	REG_SET_AND_CHECK(RCTL, 0xFFFFFFFF, 0x00000000);
+
+	before = 0x06DFB3FE;
+	REG_SET_AND_CHECK(RCTL, before, 0x003FFFFB);
+	REG_SET_AND_CHECK(TCTL, 0xFFFFFFFF, 0x00000000);
+
+	if (hw->mac_type >= e1000_82543) {
+		REG_SET_AND_CHECK(RCTL, before, 0xFFFFFFFF);
+		REG_PATTERN_TEST(RDBAL, 0xFFFFFFF0, 0xFFFFFFFF);
+		REG_PATTERN_TEST(TXCW, 0xC000FFFF, 0x0000FFFF);
+		REG_PATTERN_TEST(TDBAL, 0xFFFFFFF0, 0xFFFFFFFF);
+		REG_PATTERN_TEST(TIDV, 0x0000FFFF, 0x0000FFFF);
+		value = E1000_RAR_ENTRIES;
+		for (i = 0; i < value; i++) {
+			REG_PATTERN_TEST(RA + (((i << 1) + 1) << 2),
+					 0x8003FFFF, 0xFFFFFFFF);
+		}
+	} else {
+		REG_SET_AND_CHECK(RCTL, 0xFFFFFFFF, 0x01FFFFFF);
+		REG_PATTERN_TEST(RDBAL, 0xFFFFF000, 0xFFFFFFFF);
+		REG_PATTERN_TEST(TXCW, 0x0000FFFF, 0x0000FFFF);
+		REG_PATTERN_TEST(TDBAL, 0xFFFFF000, 0xFFFFFFFF);
+	}
+
+	value = E1000_MC_TBL_SIZE;
+	for (i = 0; i < value; i++)
+		REG_PATTERN_TEST(MTA + (i << 2), 0xFFFFFFFF, 0xFFFFFFFF);
+
+	*data = 0;
+	return 0;
+}
+
+static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 temp;
+	u16 checksum = 0;
+	u16 i;
+
+	*data = 0;
+	/* Read and add up the contents of the EEPROM */
+	for (i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) {
+		if ((e1000_read_eeprom(hw, i, 1, &temp)) < 0) {
+			*data = 1;
+			break;
+		}
+		checksum += temp;
+	}
+
+	/* If Checksum is not Correct return error else test passed */
+	if ((checksum != (u16)EEPROM_SUM) && !(*data))
+		*data = 2;
+
+	return *data;
+}
+
+static irqreturn_t e1000_test_intr(int irq, void *data)
+{
+	struct net_device *netdev = (struct net_device *)data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->test_icr |= er32(ICR);
+
+	return IRQ_HANDLED;
+}
+
+static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
+{
+	struct net_device *netdev = adapter->netdev;
+	u32 mask, i = 0;
+	bool shared_int = true;
+	u32 irq = adapter->pdev->irq;
+	struct e1000_hw *hw = &adapter->hw;
+
+	*data = 0;
+
+	/* NOTE: we don't test MSI interrupts here, yet
+	 * Hook up test interrupt handler just for this test
+	 */
+	if (!request_irq(irq, e1000_test_intr, IRQF_PROBE_SHARED, netdev->name,
+			 netdev))
+		shared_int = false;
+	else if (request_irq(irq, e1000_test_intr, IRQF_SHARED,
+			     netdev->name, netdev)) {
+		*data = 1;
+		return -1;
+	}
+	e_info(hw, "testing %s interrupt\n", (shared_int ?
+	       "shared" : "unshared"));
+
+	/* Disable all the interrupts */
+	ew32(IMC, 0xFFFFFFFF);
+	E1000_WRITE_FLUSH();
+	msleep(10);
+
+	/* Test each interrupt */
+	for (; i < 10; i++) {
+		/* Interrupt to test */
+		mask = 1 << i;
+
+		if (!shared_int) {
+			/* Disable the interrupt to be reported in
+			 * the cause register and then force the same
+			 * interrupt and see if one gets posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			ew32(IMC, mask);
+			ew32(ICS, mask);
+			E1000_WRITE_FLUSH();
+			msleep(10);
+
+			if (adapter->test_icr & mask) {
+				*data = 3;
+				break;
+			}
+		}
+
+		/* Enable the interrupt to be reported in
+		 * the cause register and then force the same
+		 * interrupt and see if one gets posted.  If
+		 * an interrupt was not posted to the bus, the
+		 * test failed.
+		 */
+		adapter->test_icr = 0;
+		ew32(IMS, mask);
+		ew32(ICS, mask);
+		E1000_WRITE_FLUSH();
+		msleep(10);
+
+		if (!(adapter->test_icr & mask)) {
+			*data = 4;
+			break;
+		}
+
+		if (!shared_int) {
+			/* Disable the other interrupts to be reported in
+			 * the cause register and then force the other
+			 * interrupts and see if any get posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			ew32(IMC, ~mask & 0x00007FFF);
+			ew32(ICS, ~mask & 0x00007FFF);
+			E1000_WRITE_FLUSH();
+			msleep(10);
+
+			if (adapter->test_icr) {
+				*data = 5;
+				break;
+			}
+		}
+	}
+
+	/* Disable all the interrupts */
+	ew32(IMC, 0xFFFFFFFF);
+	E1000_WRITE_FLUSH();
+	msleep(10);
+
+	/* Unhook test interrupt handler */
+	free_irq(irq, netdev);
+
+	return *data;
+}
+
+static void e1000_free_desc_rings(struct e1000_adapter *adapter)
+{
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	int i;
+
+	if (txdr->desc && txdr->buffer_info) {
+		for (i = 0; i < txdr->count; i++) {
+			if (txdr->buffer_info[i].dma)
+				dma_unmap_single(&pdev->dev,
+						 txdr->buffer_info[i].dma,
+						 txdr->buffer_info[i].length,
+						 DMA_TO_DEVICE);
+			dev_kfree_skb(txdr->buffer_info[i].skb);
+		}
+	}
+
+	if (rxdr->desc && rxdr->buffer_info) {
+		for (i = 0; i < rxdr->count; i++) {
+			if (rxdr->buffer_info[i].dma)
+				dma_unmap_single(&pdev->dev,
+						 rxdr->buffer_info[i].dma,
+						 E1000_RXBUFFER_2048,
+						 DMA_FROM_DEVICE);
+			kfree(rxdr->buffer_info[i].rxbuf.data);
+		}
+	}
+
+	if (txdr->desc) {
+		dma_free_coherent(&pdev->dev, txdr->size, txdr->desc,
+				  txdr->dma);
+		txdr->desc = NULL;
+	}
+	if (rxdr->desc) {
+		dma_free_coherent(&pdev->dev, rxdr->size, rxdr->desc,
+				  rxdr->dma);
+		rxdr->desc = NULL;
+	}
+
+	kfree(txdr->buffer_info);
+	txdr->buffer_info = NULL;
+	kfree(rxdr->buffer_info);
+	rxdr->buffer_info = NULL;
+}
+
+static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	u32 rctl;
+	int i, ret_val;
+
+	/* Setup Tx descriptor ring and Tx buffers */
+
+	if (!txdr->count)
+		txdr->count = E1000_DEFAULT_TXD;
+
+	txdr->buffer_info = kcalloc(txdr->count, sizeof(struct e1000_tx_buffer),
+				    GFP_KERNEL);
+	if (!txdr->buffer_info) {
+		ret_val = 1;
+		goto err_nomem;
+	}
+
+	txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
+	txdr->size = ALIGN(txdr->size, 4096);
+	txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
+					GFP_KERNEL);
+	if (!txdr->desc) {
+		ret_val = 2;
+		goto err_nomem;
+	}
+	txdr->next_to_use = txdr->next_to_clean = 0;
+
+	ew32(TDBAL, ((u64)txdr->dma & 0x00000000FFFFFFFF));
+	ew32(TDBAH, ((u64)txdr->dma >> 32));
+	ew32(TDLEN, txdr->count * sizeof(struct e1000_tx_desc));
+	ew32(TDH, 0);
+	ew32(TDT, 0);
+	ew32(TCTL, E1000_TCTL_PSP | E1000_TCTL_EN |
+	     E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT |
+	     E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT);
+
+	for (i = 0; i < txdr->count; i++) {
+		struct e1000_tx_desc *tx_desc = E1000_TX_DESC(*txdr, i);
+		struct sk_buff *skb;
+		unsigned int size = 1024;
+
+		skb = alloc_skb(size, GFP_KERNEL);
+		if (!skb) {
+			ret_val = 3;
+			goto err_nomem;
+		}
+		skb_put(skb, size);
+		txdr->buffer_info[i].skb = skb;
+		txdr->buffer_info[i].length = skb->len;
+		txdr->buffer_info[i].dma =
+			dma_map_single(&pdev->dev, skb->data, skb->len,
+				       DMA_TO_DEVICE);
+		if (dma_mapping_error(&pdev->dev, txdr->buffer_info[i].dma)) {
+			ret_val = 4;
+			goto err_nomem;
+		}
+		tx_desc->buffer_addr = cpu_to_le64(txdr->buffer_info[i].dma);
+		tx_desc->lower.data = cpu_to_le32(skb->len);
+		tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP |
+						   E1000_TXD_CMD_IFCS |
+						   E1000_TXD_CMD_RPS);
+		tx_desc->upper.data = 0;
+	}
+
+	/* Setup Rx descriptor ring and Rx buffers */
+
+	if (!rxdr->count)
+		rxdr->count = E1000_DEFAULT_RXD;
+
+	rxdr->buffer_info = kcalloc(rxdr->count, sizeof(struct e1000_rx_buffer),
+				    GFP_KERNEL);
+	if (!rxdr->buffer_info) {
+		ret_val = 5;
+		goto err_nomem;
+	}
+
+	rxdr->size = rxdr->count * sizeof(struct e1000_rx_desc);
+	rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
+					GFP_KERNEL);
+	if (!rxdr->desc) {
+		ret_val = 6;
+		goto err_nomem;
+	}
+	rxdr->next_to_use = rxdr->next_to_clean = 0;
+
+	rctl = er32(RCTL);
+	ew32(RCTL, rctl & ~E1000_RCTL_EN);
+	ew32(RDBAL, ((u64)rxdr->dma & 0xFFFFFFFF));
+	ew32(RDBAH, ((u64)rxdr->dma >> 32));
+	ew32(RDLEN, rxdr->size);
+	ew32(RDH, 0);
+	ew32(RDT, 0);
+	rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_SZ_2048 |
+		E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+		(hw->mc_filter_type << E1000_RCTL_MO_SHIFT);
+	ew32(RCTL, rctl);
+
+	for (i = 0; i < rxdr->count; i++) {
+		struct e1000_rx_desc *rx_desc = E1000_RX_DESC(*rxdr, i);
+		u8 *buf;
+
+		buf = kzalloc(E1000_RXBUFFER_2048 + NET_SKB_PAD + NET_IP_ALIGN,
+			      GFP_KERNEL);
+		if (!buf) {
+			ret_val = 7;
+			goto err_nomem;
+		}
+		rxdr->buffer_info[i].rxbuf.data = buf;
+
+		rxdr->buffer_info[i].dma =
+			dma_map_single(&pdev->dev,
+				       buf + NET_SKB_PAD + NET_IP_ALIGN,
+				       E1000_RXBUFFER_2048, DMA_FROM_DEVICE);
+		if (dma_mapping_error(&pdev->dev, rxdr->buffer_info[i].dma)) {
+			ret_val = 8;
+			goto err_nomem;
+		}
+		rx_desc->buffer_addr = cpu_to_le64(rxdr->buffer_info[i].dma);
+	}
+
+	return 0;
+
+err_nomem:
+	e1000_free_desc_rings(adapter);
+	return ret_val;
+}
+
+static void e1000_phy_disable_receiver(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
+	e1000_write_phy_reg(hw, 29, 0x001F);
+	e1000_write_phy_reg(hw, 30, 0x8FFC);
+	e1000_write_phy_reg(hw, 29, 0x001A);
+	e1000_write_phy_reg(hw, 30, 0x8FF0);
+}
+
+static void e1000_phy_reset_clk_and_crs(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 phy_reg;
+
+	/* Because we reset the PHY above, we need to re-force TX_CLK in the
+	 * Extended PHY Specific Control Register to 25MHz clock.  This
+	 * value defaults back to a 2.5MHz clock when the PHY is reset.
+	 */
+	e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg);
+	phy_reg |= M88E1000_EPSCR_TX_CLK_25;
+	e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_reg);
+
+	/* In addition, because of the s/w reset above, we need to enable
+	 * CRS on TX.  This must be set for both full and half duplex
+	 * operation.
+	 */
+	e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_reg);
+	phy_reg |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+	e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_reg);
+}
+
+static int e1000_nonintegrated_phy_loopback(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_reg;
+	u16 phy_reg;
+
+	/* Setup the Device Control Register for PHY loopback test. */
+
+	ctrl_reg = er32(CTRL);
+	ctrl_reg |= (E1000_CTRL_ILOS |		/* Invert Loss-Of-Signal */
+		     E1000_CTRL_FRCSPD |	/* Set the Force Speed Bit */
+		     E1000_CTRL_FRCDPX |	/* Set the Force Duplex Bit */
+		     E1000_CTRL_SPD_1000 |	/* Force Speed to 1000 */
+		     E1000_CTRL_FD);		/* Force Duplex to FULL */
+
+	ew32(CTRL, ctrl_reg);
+
+	/* Read the PHY Specific Control Register (0x10) */
+	e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_reg);
+
+	/* Clear Auto-Crossover bits in PHY Specific Control Register
+	 * (bits 6:5).
+	 */
+	phy_reg &= ~M88E1000_PSCR_AUTO_X_MODE;
+	e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_reg);
+
+	/* Perform software reset on the PHY */
+	e1000_phy_reset(hw);
+
+	/* Have to setup TX_CLK and TX_CRS after software reset */
+	e1000_phy_reset_clk_and_crs(adapter);
+
+	e1000_write_phy_reg(hw, PHY_CTRL, 0x8100);
+
+	/* Wait for reset to complete. */
+	udelay(500);
+
+	/* Have to setup TX_CLK and TX_CRS after software reset */
+	e1000_phy_reset_clk_and_crs(adapter);
+
+	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
+	e1000_phy_disable_receiver(adapter);
+
+	/* Set the loopback bit in the PHY control register. */
+	e1000_read_phy_reg(hw, PHY_CTRL, &phy_reg);
+	phy_reg |= MII_CR_LOOPBACK;
+	e1000_write_phy_reg(hw, PHY_CTRL, phy_reg);
+
+	/* Setup TX_CLK and TX_CRS one more time. */
+	e1000_phy_reset_clk_and_crs(adapter);
+
+	/* Check Phy Configuration */
+	e1000_read_phy_reg(hw, PHY_CTRL, &phy_reg);
+	if (phy_reg != 0x4100)
+		return 9;
+
+	e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_reg);
+	if (phy_reg != 0x0070)
+		return 10;
+
+	e1000_read_phy_reg(hw, 29, &phy_reg);
+	if (phy_reg != 0x001A)
+		return 11;
+
+	return 0;
+}
+
+static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_reg = 0;
+	u32 stat_reg = 0;
+
+	hw->autoneg = false;
+
+	if (hw->phy_type == e1000_phy_m88) {
+		/* Auto-MDI/MDIX Off */
+		e1000_write_phy_reg(hw,
+				    M88E1000_PHY_SPEC_CTRL, 0x0808);
+		/* reset to update Auto-MDI/MDIX */
+		e1000_write_phy_reg(hw, PHY_CTRL, 0x9140);
+		/* autoneg off */
+		e1000_write_phy_reg(hw, PHY_CTRL, 0x8140);
+	}
+
+	ctrl_reg = er32(CTRL);
+
+	/* force 1000, set loopback */
+	e1000_write_phy_reg(hw, PHY_CTRL, 0x4140);
+
+	/* Now set up the MAC to the same speed/duplex as the PHY. */
+	ctrl_reg = er32(CTRL);
+	ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
+	ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
+			E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
+			E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
+			E1000_CTRL_FD); /* Force Duplex to FULL */
+
+	if (hw->media_type == e1000_media_type_copper &&
+	    hw->phy_type == e1000_phy_m88)
+		ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
+	else {
+		/* Set the ILOS bit on the fiber Nic is half
+		 * duplex link is detected.
+		 */
+		stat_reg = er32(STATUS);
+		if ((stat_reg & E1000_STATUS_FD) == 0)
+			ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU);
+	}
+
+	ew32(CTRL, ctrl_reg);
+
+	/* Disable the receiver on the PHY so when a cable is plugged in, the
+	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
+	 */
+	if (hw->phy_type == e1000_phy_m88)
+		e1000_phy_disable_receiver(adapter);
+
+	udelay(500);
+
+	return 0;
+}
+
+static int e1000_set_phy_loopback(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 phy_reg = 0;
+	u16 count = 0;
+
+	switch (hw->mac_type) {
+	case e1000_82543:
+		if (hw->media_type == e1000_media_type_copper) {
+			/* Attempt to setup Loopback mode on Non-integrated PHY.
+			 * Some PHY registers get corrupted at random, so
+			 * attempt this 10 times.
+			 */
+			while (e1000_nonintegrated_phy_loopback(adapter) &&
+			       count++ < 10);
+			if (count < 11)
+				return 0;
+		}
+		break;
+
+	case e1000_82544:
+	case e1000_82540:
+	case e1000_82545:
+	case e1000_82545_rev_3:
+	case e1000_82546:
+	case e1000_82546_rev_3:
+	case e1000_82541:
+	case e1000_82541_rev_2:
+	case e1000_82547:
+	case e1000_82547_rev_2:
+		return e1000_integrated_phy_loopback(adapter);
+	default:
+		/* Default PHY loopback work is to read the MII
+		 * control register and assert bit 14 (loopback mode).
+		 */
+		e1000_read_phy_reg(hw, PHY_CTRL, &phy_reg);
+		phy_reg |= MII_CR_LOOPBACK;
+		e1000_write_phy_reg(hw, PHY_CTRL, phy_reg);
+		return 0;
+	}
+
+	return 8;
+}
+
+static int e1000_setup_loopback_test(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	if (hw->media_type == e1000_media_type_fiber ||
+	    hw->media_type == e1000_media_type_internal_serdes) {
+		switch (hw->mac_type) {
+		case e1000_82545:
+		case e1000_82546:
+		case e1000_82545_rev_3:
+		case e1000_82546_rev_3:
+			return e1000_set_phy_loopback(adapter);
+		default:
+			rctl = er32(RCTL);
+			rctl |= E1000_RCTL_LBM_TCVR;
+			ew32(RCTL, rctl);
+			return 0;
+		}
+	} else if (hw->media_type == e1000_media_type_copper) {
+		return e1000_set_phy_loopback(adapter);
+	}
+
+	return 7;
+}
+
+static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+	u16 phy_reg;
+
+	rctl = er32(RCTL);
+	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+	ew32(RCTL, rctl);
+
+	switch (hw->mac_type) {
+	case e1000_82545:
+	case e1000_82546:
+	case e1000_82545_rev_3:
+	case e1000_82546_rev_3:
+	default:
+		hw->autoneg = true;
+		e1000_read_phy_reg(hw, PHY_CTRL, &phy_reg);
+		if (phy_reg & MII_CR_LOOPBACK) {
+			phy_reg &= ~MII_CR_LOOPBACK;
+			e1000_write_phy_reg(hw, PHY_CTRL, phy_reg);
+			e1000_phy_reset(hw);
+		}
+		break;
+	}
+}
+
+static void e1000_create_lbtest_frame(struct sk_buff *skb,
+				      unsigned int frame_size)
+{
+	memset(skb->data, 0xFF, frame_size);
+	frame_size &= ~1;
+	memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1);
+	skb->data[frame_size / 2 + 10] = 0xBE;
+	skb->data[frame_size / 2 + 12] = 0xAF;
+}
+
+static int e1000_check_lbtest_frame(const unsigned char *data,
+				    unsigned int frame_size)
+{
+	frame_size &= ~1;
+	if (*(data + 3) == 0xFF) {
+		if ((*(data + frame_size / 2 + 10) == 0xBE) &&
+		    (*(data + frame_size / 2 + 12) == 0xAF)) {
+			return 0;
+		}
+	}
+	return 13;
+}
+
+static int e1000_run_loopback_test(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	int i, j, k, l, lc, good_cnt, ret_val = 0;
+	unsigned long time;
+
+	ew32(RDT, rxdr->count - 1);
+
+	/* Calculate the loop count based on the largest descriptor ring
+	 * The idea is to wrap the largest ring a number of times using 64
+	 * send/receive pairs during each loop
+	 */
+
+	if (rxdr->count <= txdr->count)
+		lc = ((txdr->count / 64) * 2) + 1;
+	else
+		lc = ((rxdr->count / 64) * 2) + 1;
+
+	k = l = 0;
+	for (j = 0; j <= lc; j++) { /* loop count loop */
+		for (i = 0; i < 64; i++) { /* send the packets */
+			e1000_create_lbtest_frame(txdr->buffer_info[i].skb,
+						  1024);
+			dma_sync_single_for_device(&pdev->dev,
+						   txdr->buffer_info[k].dma,
+						   txdr->buffer_info[k].length,
+						   DMA_TO_DEVICE);
+			if (unlikely(++k == txdr->count))
+				k = 0;
+		}
+		ew32(TDT, k);
+		E1000_WRITE_FLUSH();
+		msleep(200);
+		time = jiffies; /* set the start time for the receive */
+		good_cnt = 0;
+		do { /* receive the sent packets */
+			dma_sync_single_for_cpu(&pdev->dev,
+						rxdr->buffer_info[l].dma,
+						E1000_RXBUFFER_2048,
+						DMA_FROM_DEVICE);
+
+			ret_val = e1000_check_lbtest_frame(
+					rxdr->buffer_info[l].rxbuf.data +
+					NET_SKB_PAD + NET_IP_ALIGN,
+					1024);
+			if (!ret_val)
+				good_cnt++;
+			if (unlikely(++l == rxdr->count))
+				l = 0;
+			/* time + 20 msecs (200 msecs on 2.4) is more than
+			 * enough time to complete the receives, if it's
+			 * exceeded, break and error off
+			 */
+		} while (good_cnt < 64 && time_after(time + 20, jiffies));
+
+		if (good_cnt != 64) {
+			ret_val = 13; /* ret_val is the same as mis-compare */
+			break;
+		}
+		if (time_after_eq(jiffies, time + 2)) {
+			ret_val = 14; /* error code for time out error */
+			break;
+		}
+	} /* end loop count loop */
+	return ret_val;
+}
+
+static int e1000_loopback_test(struct e1000_adapter *adapter, u64 *data)
+{
+	*data = e1000_setup_desc_rings(adapter);
+	if (*data)
+		goto out;
+	*data = e1000_setup_loopback_test(adapter);
+	if (*data)
+		goto err_loopback;
+	*data = e1000_run_loopback_test(adapter);
+	e1000_loopback_cleanup(adapter);
+
+err_loopback:
+	e1000_free_desc_rings(adapter);
+out:
+	return *data;
+}
+
+static int e1000_link_test(struct e1000_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	*data = 0;
+	if (hw->media_type == e1000_media_type_internal_serdes) {
+		int i = 0;
+
+		hw->serdes_has_link = false;
+
+		/* On some blade server designs, link establishment
+		 * could take as long as 2-3 minutes
+		 */
+		do {
+			e1000_check_for_link(hw);
+			if (hw->serdes_has_link)
+				return *data;
+			msleep(20);
+		} while (i++ < 3750);
+
+		*data = 1;
+	} else {
+		e1000_check_for_link(hw);
+		if (hw->autoneg)  /* if auto_neg is set wait for it */
+			msleep(4000);
+
+		if (!(er32(STATUS) & E1000_STATUS_LU))
+			*data = 1;
+	}
+	return *data;
+}
+
+static int e1000_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return E1000_TEST_LEN;
+	case ETH_SS_STATS:
+		return E1000_STATS_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void e1000_diag_test(struct net_device *netdev,
+			    struct ethtool_test *eth_test, u64 *data)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	bool if_running = netif_running(netdev);
+
+	set_bit(__E1000_TESTING, &adapter->flags);
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		/* Offline tests */
+
+		/* save speed, duplex, autoneg settings */
+		u16 autoneg_advertised = hw->autoneg_advertised;
+		u8 forced_speed_duplex = hw->forced_speed_duplex;
+		u8 autoneg = hw->autoneg;
+
+		e_info(hw, "offline testing starting\n");
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result
+		 */
+		if (e1000_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (if_running)
+			/* indicate we're in test mode */
+			e1000_close(netdev);
+		else
+			e1000_reset(adapter);
+
+		if (e1000_reg_test(adapter, &data[0]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		e1000_reset(adapter);
+		if (e1000_eeprom_test(adapter, &data[1]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		e1000_reset(adapter);
+		if (e1000_intr_test(adapter, &data[2]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		e1000_reset(adapter);
+		/* make sure the phy is powered up */
+		e1000_power_up_phy(adapter);
+		if (e1000_loopback_test(adapter, &data[3]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* restore speed, duplex, autoneg settings */
+		hw->autoneg_advertised = autoneg_advertised;
+		hw->forced_speed_duplex = forced_speed_duplex;
+		hw->autoneg = autoneg;
+
+		e1000_reset(adapter);
+		clear_bit(__E1000_TESTING, &adapter->flags);
+		if (if_running)
+			e1000_open(netdev);
+	} else {
+		e_info(hw, "online testing starting\n");
+		/* Online tests */
+		if (e1000_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Online tests aren't run; pass by default */
+		data[0] = 0;
+		data[1] = 0;
+		data[2] = 0;
+		data[3] = 0;
+
+		clear_bit(__E1000_TESTING, &adapter->flags);
+	}
+	msleep_interruptible(4 * 1000);
+}
+
+static int e1000_wol_exclusion(struct e1000_adapter *adapter,
+			       struct ethtool_wolinfo *wol)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int retval = 1; /* fail by default */
+
+	switch (hw->device_id) {
+	case E1000_DEV_ID_82542:
+	case E1000_DEV_ID_82543GC_FIBER:
+	case E1000_DEV_ID_82543GC_COPPER:
+	case E1000_DEV_ID_82544EI_FIBER:
+	case E1000_DEV_ID_82546EB_QUAD_COPPER:
+	case E1000_DEV_ID_82545EM_FIBER:
+	case E1000_DEV_ID_82545EM_COPPER:
+	case E1000_DEV_ID_82546GB_QUAD_COPPER:
+	case E1000_DEV_ID_82546GB_PCIE:
+		/* these don't support WoL at all */
+		wol->supported = 0;
+		break;
+	case E1000_DEV_ID_82546EB_FIBER:
+	case E1000_DEV_ID_82546GB_FIBER:
+		/* Wake events not supported on port B */
+		if (er32(STATUS) & E1000_STATUS_FUNC_1) {
+			wol->supported = 0;
+			break;
+		}
+		/* return success for non excluded adapter ports */
+		retval = 0;
+		break;
+	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
+		/* quad port adapters only support WoL on port A */
+		if (!adapter->quad_port_a) {
+			wol->supported = 0;
+			break;
+		}
+		/* return success for non excluded adapter ports */
+		retval = 0;
+		break;
+	default:
+		/* dual port cards only support WoL on port A from now on
+		 * unless it was enabled in the eeprom for port B
+		 * so exclude FUNC_1 ports from having WoL enabled
+		 */
+		if (er32(STATUS) & E1000_STATUS_FUNC_1 &&
+		    !adapter->eeprom_wol) {
+			wol->supported = 0;
+			break;
+		}
+
+		retval = 0;
+	}
+
+	return retval;
+}
+
+static void e1000_get_wol(struct net_device *netdev,
+			  struct ethtool_wolinfo *wol)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	wol->supported = WAKE_UCAST | WAKE_MCAST | WAKE_BCAST | WAKE_MAGIC;
+	wol->wolopts = 0;
+
+	/* this function will set ->supported = 0 and return 1 if wol is not
+	 * supported by this hardware
+	 */
+	if (e1000_wol_exclusion(adapter, wol) ||
+	    !device_can_wakeup(&adapter->pdev->dev))
+		return;
+
+	/* apply any specific unsupported masks here */
+	switch (hw->device_id) {
+	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
+		/* KSP3 does not support UCAST wake-ups */
+		wol->supported &= ~WAKE_UCAST;
+
+		if (adapter->wol & E1000_WUFC_EX)
+			e_err(drv, "Interface does not support directed "
+			      "(unicast) frame wake-up packets\n");
+		break;
+	default:
+		break;
+	}
+
+	if (adapter->wol & E1000_WUFC_EX)
+		wol->wolopts |= WAKE_UCAST;
+	if (adapter->wol & E1000_WUFC_MC)
+		wol->wolopts |= WAKE_MCAST;
+	if (adapter->wol & E1000_WUFC_BC)
+		wol->wolopts |= WAKE_BCAST;
+	if (adapter->wol & E1000_WUFC_MAG)
+		wol->wolopts |= WAKE_MAGIC;
+}
+
+static int e1000_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE))
+		return -EOPNOTSUPP;
+
+	if (e1000_wol_exclusion(adapter, wol) ||
+	    !device_can_wakeup(&adapter->pdev->dev))
+		return wol->wolopts ? -EOPNOTSUPP : 0;
+
+	switch (hw->device_id) {
+	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
+		if (wol->wolopts & WAKE_UCAST) {
+			e_err(drv, "Interface does not support directed "
+			      "(unicast) frame wake-up packets\n");
+			return -EOPNOTSUPP;
+		}
+		break;
+	default:
+		break;
+	}
+
+	/* these settings will always override what we currently have */
+	adapter->wol = 0;
+
+	if (wol->wolopts & WAKE_UCAST)
+		adapter->wol |= E1000_WUFC_EX;
+	if (wol->wolopts & WAKE_MCAST)
+		adapter->wol |= E1000_WUFC_MC;
+	if (wol->wolopts & WAKE_BCAST)
+		adapter->wol |= E1000_WUFC_BC;
+	if (wol->wolopts & WAKE_MAGIC)
+		adapter->wol |= E1000_WUFC_MAG;
+
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	return 0;
+}
+
+static int e1000_set_phys_id(struct net_device *netdev,
+			     enum ethtool_phys_id_state state)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		e1000_setup_led(hw);
+		return 2;
+
+	case ETHTOOL_ID_ON:
+		e1000_led_on(hw);
+		break;
+
+	case ETHTOOL_ID_OFF:
+		e1000_led_off(hw);
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		e1000_cleanup_led(hw);
+	}
+
+	return 0;
+}
+
+static int e1000_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->hw.mac_type < e1000_82545)
+		return -EOPNOTSUPP;
+
+	if (adapter->itr_setting <= 4)
+		ec->rx_coalesce_usecs = adapter->itr_setting;
+	else
+		ec->rx_coalesce_usecs = 1000000 / adapter->itr_setting;
+
+	return 0;
+}
+
+static int e1000_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (hw->mac_type < e1000_82545)
+		return -EOPNOTSUPP;
+
+	if ((ec->rx_coalesce_usecs > E1000_MAX_ITR_USECS) ||
+	    ((ec->rx_coalesce_usecs > 4) &&
+	     (ec->rx_coalesce_usecs < E1000_MIN_ITR_USECS)) ||
+	    (ec->rx_coalesce_usecs == 2))
+		return -EINVAL;
+
+	if (ec->rx_coalesce_usecs == 4) {
+		adapter->itr = adapter->itr_setting = 4;
+	} else if (ec->rx_coalesce_usecs <= 3) {
+		adapter->itr = 20000;
+		adapter->itr_setting = ec->rx_coalesce_usecs;
+	} else {
+		adapter->itr = (1000000 / ec->rx_coalesce_usecs);
+		adapter->itr_setting = adapter->itr & ~3;
+	}
+
+	if (adapter->itr_setting != 0)
+		ew32(ITR, 1000000000 / (adapter->itr * 256));
+	else
+		ew32(ITR, 0);
+
+	return 0;
+}
+
+static int e1000_nway_reset(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		e1000_reinit_locked(adapter);
+	return 0;
+}
+
+static void e1000_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	int i;
+	const struct e1000_stats *stat = e1000_gstrings_stats;
+
+	e1000_update_stats(adapter);
+	for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++, stat++) {
+		char *p;
+
+		switch (stat->type) {
+		case NETDEV_STATS:
+			p = (char *)netdev + stat->stat_offset;
+			break;
+		case E1000_STATS:
+			p = (char *)adapter + stat->stat_offset;
+			break;
+		default:
+			netdev_WARN_ONCE(netdev, "Invalid E1000 stat type: %u index %d\n",
+					 stat->type, i);
+			continue;
+		}
+
+		if (stat->sizeof_stat == sizeof(u64))
+			data[i] = *(u64 *)p;
+		else
+			data[i] = *(u32 *)p;
+	}
+/* BUG_ON(i != E1000_STATS_LEN); */
+}
+
+static void e1000_get_strings(struct net_device *netdev, u32 stringset,
+			      u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, e1000_gstrings_test, sizeof(e1000_gstrings_test));
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, e1000_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		/* BUG_ON(p - data != E1000_STATS_LEN * ETH_GSTRING_LEN); */
+		break;
+	}
+}
+
+static const struct ethtool_ops e1000_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS,
+	.get_drvinfo		= e1000_get_drvinfo,
+	.get_regs_len		= e1000_get_regs_len,
+	.get_regs		= e1000_get_regs,
+	.get_wol		= e1000_get_wol,
+	.set_wol		= e1000_set_wol,
+	.get_msglevel		= e1000_get_msglevel,
+	.set_msglevel		= e1000_set_msglevel,
+	.nway_reset		= e1000_nway_reset,
+	.get_link		= e1000_get_link,
+	.get_eeprom_len		= e1000_get_eeprom_len,
+	.get_eeprom		= e1000_get_eeprom,
+	.set_eeprom		= e1000_set_eeprom,
+	.get_ringparam		= e1000_get_ringparam,
+	.set_ringparam		= e1000_set_ringparam,
+	.get_pauseparam		= e1000_get_pauseparam,
+	.set_pauseparam		= e1000_set_pauseparam,
+	.self_test		= e1000_diag_test,
+	.get_strings		= e1000_get_strings,
+	.set_phys_id		= e1000_set_phys_id,
+	.get_ethtool_stats	= e1000_get_ethtool_stats,
+	.get_sset_count		= e1000_get_sset_count,
+	.get_coalesce		= e1000_get_coalesce,
+	.set_coalesce		= e1000_set_coalesce,
+	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_link_ksettings	= e1000_get_link_ksettings,
+	.set_link_ksettings	= e1000_set_link_ksettings,
+};
+
+void e1000_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &e1000_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
new file mode 100644
index 0000000000..4542e2bc28
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -0,0 +1,5636 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+/* e1000_hw.c
+ * Shared functions for accessing and configuring the MAC
+ */
+
+#include "e1000.h"
+
+static s32 e1000_check_downshift(struct e1000_hw *hw);
+static s32 e1000_check_polarity(struct e1000_hw *hw,
+				e1000_rev_polarity *polarity);
+static void e1000_clear_hw_cntrs(struct e1000_hw *hw);
+static void e1000_clear_vfta(struct e1000_hw *hw);
+static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw,
+					      bool link_up);
+static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw);
+static s32 e1000_detect_gig_phy(struct e1000_hw *hw);
+static s32 e1000_get_auto_rd_done(struct e1000_hw *hw);
+static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
+				  u16 *max_length);
+static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw);
+static s32 e1000_id_led_init(struct e1000_hw *hw);
+static void e1000_init_rx_addrs(struct e1000_hw *hw);
+static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
+				  struct e1000_phy_info *phy_info);
+static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
+				  struct e1000_phy_info *phy_info);
+static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active);
+static s32 e1000_wait_autoneg(struct e1000_hw *hw);
+static void e1000_write_reg_io(struct e1000_hw *hw, u32 offset, u32 value);
+static s32 e1000_set_phy_type(struct e1000_hw *hw);
+static void e1000_phy_init_script(struct e1000_hw *hw);
+static s32 e1000_setup_copper_link(struct e1000_hw *hw);
+static s32 e1000_setup_fiber_serdes_link(struct e1000_hw *hw);
+static s32 e1000_adjust_serdes_amplitude(struct e1000_hw *hw);
+static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw);
+static s32 e1000_config_mac_to_phy(struct e1000_hw *hw);
+static void e1000_raise_mdi_clk(struct e1000_hw *hw, u32 *ctrl);
+static void e1000_lower_mdi_clk(struct e1000_hw *hw, u32 *ctrl);
+static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, u32 data, u16 count);
+static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw);
+static s32 e1000_phy_reset_dsp(struct e1000_hw *hw);
+static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset,
+				  u16 words, u16 *data);
+static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset,
+					u16 words, u16 *data);
+static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw);
+static void e1000_raise_ee_clk(struct e1000_hw *hw, u32 *eecd);
+static void e1000_lower_ee_clk(struct e1000_hw *hw, u32 *eecd);
+static void e1000_shift_out_ee_bits(struct e1000_hw *hw, u16 data, u16 count);
+static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
+				  u16 phy_data);
+static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
+				 u16 *phy_data);
+static u16 e1000_shift_in_ee_bits(struct e1000_hw *hw, u16 count);
+static s32 e1000_acquire_eeprom(struct e1000_hw *hw);
+static void e1000_release_eeprom(struct e1000_hw *hw);
+static void e1000_standby_eeprom(struct e1000_hw *hw);
+static s32 e1000_set_vco_speed(struct e1000_hw *hw);
+static s32 e1000_polarity_reversal_workaround(struct e1000_hw *hw);
+static s32 e1000_set_phy_mode(struct e1000_hw *hw);
+static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
+				u16 *data);
+static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
+				 u16 *data);
+
+/* IGP cable length table */
+static const
+u16 e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] = {
+	5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+	5, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 25, 25, 25,
+	25, 25, 25, 25, 30, 30, 30, 30, 40, 40, 40, 40, 40, 40, 40, 40,
+	40, 50, 50, 50, 50, 50, 50, 50, 60, 60, 60, 60, 60, 60, 60, 60,
+	60, 70, 70, 70, 70, 70, 70, 80, 80, 80, 80, 80, 80, 90, 90, 90,
+	90, 90, 90, 90, 90, 90, 100, 100, 100, 100, 100, 100, 100, 100, 100,
+	    100,
+	100, 100, 100, 100, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110,
+	    110, 110,
+	110, 110, 110, 110, 110, 110, 120, 120, 120, 120, 120, 120, 120, 120,
+	    120, 120
+};
+
+static DEFINE_MUTEX(e1000_eeprom_lock);
+static DEFINE_SPINLOCK(e1000_phy_lock);
+
+/**
+ * e1000_set_phy_type - Set the phy type member in the hw struct.
+ * @hw: Struct containing variables accessed by shared code
+ */
+static s32 e1000_set_phy_type(struct e1000_hw *hw)
+{
+	if (hw->mac_type == e1000_undefined)
+		return -E1000_ERR_PHY_TYPE;
+
+	switch (hw->phy_id) {
+	case M88E1000_E_PHY_ID:
+	case M88E1000_I_PHY_ID:
+	case M88E1011_I_PHY_ID:
+	case M88E1111_I_PHY_ID:
+	case M88E1118_E_PHY_ID:
+		hw->phy_type = e1000_phy_m88;
+		break;
+	case IGP01E1000_I_PHY_ID:
+		if (hw->mac_type == e1000_82541 ||
+		    hw->mac_type == e1000_82541_rev_2 ||
+		    hw->mac_type == e1000_82547 ||
+		    hw->mac_type == e1000_82547_rev_2)
+			hw->phy_type = e1000_phy_igp;
+		break;
+	case RTL8211B_PHY_ID:
+		hw->phy_type = e1000_phy_8211;
+		break;
+	case RTL8201N_PHY_ID:
+		hw->phy_type = e1000_phy_8201;
+		break;
+	default:
+		/* Should never have loaded on this device */
+		hw->phy_type = e1000_phy_undefined;
+		return -E1000_ERR_PHY_TYPE;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_init_script - IGP phy init script - initializes the GbE PHY
+ * @hw: Struct containing variables accessed by shared code
+ */
+static void e1000_phy_init_script(struct e1000_hw *hw)
+{
+	u16 phy_saved_data;
+
+	if (hw->phy_init_script) {
+		msleep(20);
+
+		/* Save off the current value of register 0x2F5B to be restored
+		 * at the end of this routine.
+		 */
+		e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
+
+		/* Disabled the PHY transmitter */
+		e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
+		msleep(20);
+
+		e1000_write_phy_reg(hw, 0x0000, 0x0140);
+		msleep(5);
+
+		switch (hw->mac_type) {
+		case e1000_82541:
+		case e1000_82547:
+			e1000_write_phy_reg(hw, 0x1F95, 0x0001);
+			e1000_write_phy_reg(hw, 0x1F71, 0xBD21);
+			e1000_write_phy_reg(hw, 0x1F79, 0x0018);
+			e1000_write_phy_reg(hw, 0x1F30, 0x1600);
+			e1000_write_phy_reg(hw, 0x1F31, 0x0014);
+			e1000_write_phy_reg(hw, 0x1F32, 0x161C);
+			e1000_write_phy_reg(hw, 0x1F94, 0x0003);
+			e1000_write_phy_reg(hw, 0x1F96, 0x003F);
+			e1000_write_phy_reg(hw, 0x2010, 0x0008);
+			break;
+
+		case e1000_82541_rev_2:
+		case e1000_82547_rev_2:
+			e1000_write_phy_reg(hw, 0x1F73, 0x0099);
+			break;
+		default:
+			break;
+		}
+
+		e1000_write_phy_reg(hw, 0x0000, 0x3300);
+		msleep(20);
+
+		/* Now enable the transmitter */
+		e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
+
+		if (hw->mac_type == e1000_82547) {
+			u16 fused, fine, coarse;
+
+			/* Move to analog registers page */
+			e1000_read_phy_reg(hw,
+					   IGP01E1000_ANALOG_SPARE_FUSE_STATUS,
+					   &fused);
+
+			if (!(fused & IGP01E1000_ANALOG_SPARE_FUSE_ENABLED)) {
+				e1000_read_phy_reg(hw,
+						   IGP01E1000_ANALOG_FUSE_STATUS,
+						   &fused);
+
+				fine = fused & IGP01E1000_ANALOG_FUSE_FINE_MASK;
+				coarse =
+				    fused & IGP01E1000_ANALOG_FUSE_COARSE_MASK;
+
+				if (coarse >
+				    IGP01E1000_ANALOG_FUSE_COARSE_THRESH) {
+					coarse -=
+					    IGP01E1000_ANALOG_FUSE_COARSE_10;
+					fine -= IGP01E1000_ANALOG_FUSE_FINE_1;
+				} else if (coarse ==
+					   IGP01E1000_ANALOG_FUSE_COARSE_THRESH)
+					fine -= IGP01E1000_ANALOG_FUSE_FINE_10;
+
+				fused =
+				    (fused & IGP01E1000_ANALOG_FUSE_POLY_MASK) |
+				    (fine & IGP01E1000_ANALOG_FUSE_FINE_MASK) |
+				    (coarse &
+				     IGP01E1000_ANALOG_FUSE_COARSE_MASK);
+
+				e1000_write_phy_reg(hw,
+						    IGP01E1000_ANALOG_FUSE_CONTROL,
+						    fused);
+				e1000_write_phy_reg(hw,
+						    IGP01E1000_ANALOG_FUSE_BYPASS,
+						    IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL);
+			}
+		}
+	}
+}
+
+/**
+ * e1000_set_mac_type - Set the mac type member in the hw struct.
+ * @hw: Struct containing variables accessed by shared code
+ */
+s32 e1000_set_mac_type(struct e1000_hw *hw)
+{
+	switch (hw->device_id) {
+	case E1000_DEV_ID_82542:
+		switch (hw->revision_id) {
+		case E1000_82542_2_0_REV_ID:
+			hw->mac_type = e1000_82542_rev2_0;
+			break;
+		case E1000_82542_2_1_REV_ID:
+			hw->mac_type = e1000_82542_rev2_1;
+			break;
+		default:
+			/* Invalid 82542 revision ID */
+			return -E1000_ERR_MAC_TYPE;
+		}
+		break;
+	case E1000_DEV_ID_82543GC_FIBER:
+	case E1000_DEV_ID_82543GC_COPPER:
+		hw->mac_type = e1000_82543;
+		break;
+	case E1000_DEV_ID_82544EI_COPPER:
+	case E1000_DEV_ID_82544EI_FIBER:
+	case E1000_DEV_ID_82544GC_COPPER:
+	case E1000_DEV_ID_82544GC_LOM:
+		hw->mac_type = e1000_82544;
+		break;
+	case E1000_DEV_ID_82540EM:
+	case E1000_DEV_ID_82540EM_LOM:
+	case E1000_DEV_ID_82540EP:
+	case E1000_DEV_ID_82540EP_LOM:
+	case E1000_DEV_ID_82540EP_LP:
+		hw->mac_type = e1000_82540;
+		break;
+	case E1000_DEV_ID_82545EM_COPPER:
+	case E1000_DEV_ID_82545EM_FIBER:
+		hw->mac_type = e1000_82545;
+		break;
+	case E1000_DEV_ID_82545GM_COPPER:
+	case E1000_DEV_ID_82545GM_FIBER:
+	case E1000_DEV_ID_82545GM_SERDES:
+		hw->mac_type = e1000_82545_rev_3;
+		break;
+	case E1000_DEV_ID_82546EB_COPPER:
+	case E1000_DEV_ID_82546EB_FIBER:
+	case E1000_DEV_ID_82546EB_QUAD_COPPER:
+		hw->mac_type = e1000_82546;
+		break;
+	case E1000_DEV_ID_82546GB_COPPER:
+	case E1000_DEV_ID_82546GB_FIBER:
+	case E1000_DEV_ID_82546GB_SERDES:
+	case E1000_DEV_ID_82546GB_PCIE:
+	case E1000_DEV_ID_82546GB_QUAD_COPPER:
+	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
+		hw->mac_type = e1000_82546_rev_3;
+		break;
+	case E1000_DEV_ID_82541EI:
+	case E1000_DEV_ID_82541EI_MOBILE:
+	case E1000_DEV_ID_82541ER_LOM:
+		hw->mac_type = e1000_82541;
+		break;
+	case E1000_DEV_ID_82541ER:
+	case E1000_DEV_ID_82541GI:
+	case E1000_DEV_ID_82541GI_LF:
+	case E1000_DEV_ID_82541GI_MOBILE:
+		hw->mac_type = e1000_82541_rev_2;
+		break;
+	case E1000_DEV_ID_82547EI:
+	case E1000_DEV_ID_82547EI_MOBILE:
+		hw->mac_type = e1000_82547;
+		break;
+	case E1000_DEV_ID_82547GI:
+		hw->mac_type = e1000_82547_rev_2;
+		break;
+	case E1000_DEV_ID_INTEL_CE4100_GBE:
+		hw->mac_type = e1000_ce4100;
+		break;
+	default:
+		/* Should never have loaded on this device */
+		return -E1000_ERR_MAC_TYPE;
+	}
+
+	switch (hw->mac_type) {
+	case e1000_82541:
+	case e1000_82547:
+	case e1000_82541_rev_2:
+	case e1000_82547_rev_2:
+		hw->asf_firmware_present = true;
+		break;
+	default:
+		break;
+	}
+
+	/* The 82543 chip does not count tx_carrier_errors properly in
+	 * FD mode
+	 */
+	if (hw->mac_type == e1000_82543)
+		hw->bad_tx_carr_stats_fd = true;
+
+	if (hw->mac_type > e1000_82544)
+		hw->has_smbus = true;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_media_type - Set media type and TBI compatibility.
+ * @hw: Struct containing variables accessed by shared code
+ */
+void e1000_set_media_type(struct e1000_hw *hw)
+{
+	u32 status;
+
+	if (hw->mac_type != e1000_82543) {
+		/* tbi_compatibility is only valid on 82543 */
+		hw->tbi_compatibility_en = false;
+	}
+
+	switch (hw->device_id) {
+	case E1000_DEV_ID_82545GM_SERDES:
+	case E1000_DEV_ID_82546GB_SERDES:
+		hw->media_type = e1000_media_type_internal_serdes;
+		break;
+	default:
+		switch (hw->mac_type) {
+		case e1000_82542_rev2_0:
+		case e1000_82542_rev2_1:
+			hw->media_type = e1000_media_type_fiber;
+			break;
+		case e1000_ce4100:
+			hw->media_type = e1000_media_type_copper;
+			break;
+		default:
+			status = er32(STATUS);
+			if (status & E1000_STATUS_TBIMODE) {
+				hw->media_type = e1000_media_type_fiber;
+				/* tbi_compatibility not valid on fiber */
+				hw->tbi_compatibility_en = false;
+			} else {
+				hw->media_type = e1000_media_type_copper;
+			}
+			break;
+		}
+	}
+}
+
+/**
+ * e1000_reset_hw - reset the hardware completely
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Reset the transmit and receive units; mask and clear all interrupts.
+ */
+s32 e1000_reset_hw(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	u32 ctrl_ext;
+	u32 manc;
+	u32 led_ctrl;
+	s32 ret_val;
+
+	/* For 82542 (rev 2.0), disable MWI before issuing a device reset */
+	if (hw->mac_type == e1000_82542_rev2_0) {
+		e_dbg("Disabling MWI on 82542 rev 2.0\n");
+		e1000_pci_clear_mwi(hw);
+	}
+
+	/* Clear interrupt mask to stop board from generating interrupts */
+	e_dbg("Masking off all interrupts\n");
+	ew32(IMC, 0xffffffff);
+
+	/* Disable the Transmit and Receive units.  Then delay to allow
+	 * any pending transactions to complete before we hit the MAC with
+	 * the global reset.
+	 */
+	ew32(RCTL, 0);
+	ew32(TCTL, E1000_TCTL_PSP);
+	E1000_WRITE_FLUSH();
+
+	/* The tbi_compatibility_on Flag must be cleared when Rctl is cleared. */
+	hw->tbi_compatibility_on = false;
+
+	/* Delay to allow any outstanding PCI transactions to complete before
+	 * resetting the device
+	 */
+	msleep(10);
+
+	ctrl = er32(CTRL);
+
+	/* Must reset the PHY before resetting the MAC */
+	if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
+		ew32(CTRL, (ctrl | E1000_CTRL_PHY_RST));
+		E1000_WRITE_FLUSH();
+		msleep(5);
+	}
+
+	/* Issue a global reset to the MAC.  This will reset the chip's
+	 * transmit, receive, DMA, and link units.  It will not effect
+	 * the current PCI configuration.  The global reset bit is self-
+	 * clearing, and should clear within a microsecond.
+	 */
+	e_dbg("Issuing a global reset to MAC\n");
+
+	switch (hw->mac_type) {
+	case e1000_82544:
+	case e1000_82540:
+	case e1000_82545:
+	case e1000_82546:
+	case e1000_82541:
+	case e1000_82541_rev_2:
+		/* These controllers can't ack the 64-bit write when issuing the
+		 * reset, so use IO-mapping as a workaround to issue the reset
+		 */
+		E1000_WRITE_REG_IO(hw, CTRL, (ctrl | E1000_CTRL_RST));
+		break;
+	case e1000_82545_rev_3:
+	case e1000_82546_rev_3:
+		/* Reset is performed on a shadow of the control register */
+		ew32(CTRL_DUP, (ctrl | E1000_CTRL_RST));
+		break;
+	case e1000_ce4100:
+	default:
+		ew32(CTRL, (ctrl | E1000_CTRL_RST));
+		break;
+	}
+
+	/* After MAC reset, force reload of EEPROM to restore power-on settings
+	 * to device.  Later controllers reload the EEPROM automatically, so
+	 * just wait for reload to complete.
+	 */
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+	case e1000_82544:
+		/* Wait for reset to complete */
+		udelay(10);
+		ctrl_ext = er32(CTRL_EXT);
+		ctrl_ext |= E1000_CTRL_EXT_EE_RST;
+		ew32(CTRL_EXT, ctrl_ext);
+		E1000_WRITE_FLUSH();
+		/* Wait for EEPROM reload */
+		msleep(2);
+		break;
+	case e1000_82541:
+	case e1000_82541_rev_2:
+	case e1000_82547:
+	case e1000_82547_rev_2:
+		/* Wait for EEPROM reload */
+		msleep(20);
+		break;
+	default:
+		/* Auto read done will delay 5ms or poll based on mac type */
+		ret_val = e1000_get_auto_rd_done(hw);
+		if (ret_val)
+			return ret_val;
+		break;
+	}
+
+	/* Disable HW ARPs on ASF enabled adapters */
+	if (hw->mac_type >= e1000_82540) {
+		manc = er32(MANC);
+		manc &= ~(E1000_MANC_ARP_EN);
+		ew32(MANC, manc);
+	}
+
+	if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
+		e1000_phy_init_script(hw);
+
+		/* Configure activity LED after PHY reset */
+		led_ctrl = er32(LEDCTL);
+		led_ctrl &= IGP_ACTIVITY_LED_MASK;
+		led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
+		ew32(LEDCTL, led_ctrl);
+	}
+
+	/* Clear interrupt mask to stop board from generating interrupts */
+	e_dbg("Masking off all interrupts\n");
+	ew32(IMC, 0xffffffff);
+
+	/* Clear any pending interrupt events. */
+	er32(ICR);
+
+	/* If MWI was previously enabled, reenable it. */
+	if (hw->mac_type == e1000_82542_rev2_0) {
+		if (hw->pci_cmd_word & PCI_COMMAND_INVALIDATE)
+			e1000_pci_set_mwi(hw);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_init_hw - Performs basic configuration of the adapter.
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Assumes that the controller has previously been reset and is in a
+ * post-reset uninitialized state. Initializes the receive address registers,
+ * multicast table, and VLAN filter table. Calls routines to setup link
+ * configuration and flow control settings. Clears all on-chip counters. Leaves
+ * the transmit and receive units disabled and uninitialized.
+ */
+s32 e1000_init_hw(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	u32 i;
+	s32 ret_val;
+	u32 mta_size;
+	u32 ctrl_ext;
+
+	/* Initialize Identification LED */
+	ret_val = e1000_id_led_init(hw);
+	if (ret_val) {
+		e_dbg("Error Initializing Identification LED\n");
+		return ret_val;
+	}
+
+	/* Set the media type and TBI compatibility */
+	e1000_set_media_type(hw);
+
+	/* Disabling VLAN filtering. */
+	e_dbg("Initializing the IEEE VLAN\n");
+	if (hw->mac_type < e1000_82545_rev_3)
+		ew32(VET, 0);
+	e1000_clear_vfta(hw);
+
+	/* For 82542 (rev 2.0), disable MWI and put the receiver into reset */
+	if (hw->mac_type == e1000_82542_rev2_0) {
+		e_dbg("Disabling MWI on 82542 rev 2.0\n");
+		e1000_pci_clear_mwi(hw);
+		ew32(RCTL, E1000_RCTL_RST);
+		E1000_WRITE_FLUSH();
+		msleep(5);
+	}
+
+	/* Setup the receive address. This involves initializing all of the
+	 * Receive Address Registers (RARs 0 - 15).
+	 */
+	e1000_init_rx_addrs(hw);
+
+	/* For 82542 (rev 2.0), take the receiver out of reset and enable MWI */
+	if (hw->mac_type == e1000_82542_rev2_0) {
+		ew32(RCTL, 0);
+		E1000_WRITE_FLUSH();
+		msleep(1);
+		if (hw->pci_cmd_word & PCI_COMMAND_INVALIDATE)
+			e1000_pci_set_mwi(hw);
+	}
+
+	/* Zero out the Multicast HASH table */
+	e_dbg("Zeroing the MTA\n");
+	mta_size = E1000_MC_TBL_SIZE;
+	for (i = 0; i < mta_size; i++) {
+		E1000_WRITE_REG_ARRAY(hw, MTA, i, 0);
+		/* use write flush to prevent Memory Write Block (MWB) from
+		 * occurring when accessing our register space
+		 */
+		E1000_WRITE_FLUSH();
+	}
+
+	/* Set the PCI priority bit correctly in the CTRL register.  This
+	 * determines if the adapter gives priority to receives, or if it
+	 * gives equal priority to transmits and receives.  Valid only on
+	 * 82542 and 82543 silicon.
+	 */
+	if (hw->dma_fairness && hw->mac_type <= e1000_82543) {
+		ctrl = er32(CTRL);
+		ew32(CTRL, ctrl | E1000_CTRL_PRIOR);
+	}
+
+	switch (hw->mac_type) {
+	case e1000_82545_rev_3:
+	case e1000_82546_rev_3:
+		break;
+	default:
+		/* Workaround for PCI-X problem when BIOS sets MMRBC
+		 * incorrectly.
+		 */
+		if (hw->bus_type == e1000_bus_type_pcix &&
+		    e1000_pcix_get_mmrbc(hw) > 2048)
+			e1000_pcix_set_mmrbc(hw, 2048);
+		break;
+	}
+
+	/* Call a subroutine to configure the link and setup flow control. */
+	ret_val = e1000_setup_link(hw);
+
+	/* Set the transmit descriptor write-back policy */
+	if (hw->mac_type > e1000_82544) {
+		ctrl = er32(TXDCTL);
+		ctrl =
+		    (ctrl & ~E1000_TXDCTL_WTHRESH) |
+		    E1000_TXDCTL_FULL_TX_DESC_WB;
+		ew32(TXDCTL, ctrl);
+	}
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	e1000_clear_hw_cntrs(hw);
+
+	if (hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER ||
+	    hw->device_id == E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3) {
+		ctrl_ext = er32(CTRL_EXT);
+		/* Relaxed ordering must be disabled to avoid a parity
+		 * error crash in a PCI slot.
+		 */
+		ctrl_ext |= E1000_CTRL_EXT_RO_DIS;
+		ew32(CTRL_EXT, ctrl_ext);
+	}
+
+	return ret_val;
+}
+
+/**
+ * e1000_adjust_serdes_amplitude - Adjust SERDES output amplitude based on EEPROM setting.
+ * @hw: Struct containing variables accessed by shared code.
+ */
+static s32 e1000_adjust_serdes_amplitude(struct e1000_hw *hw)
+{
+	u16 eeprom_data;
+	s32 ret_val;
+
+	if (hw->media_type != e1000_media_type_internal_serdes)
+		return E1000_SUCCESS;
+
+	switch (hw->mac_type) {
+	case e1000_82545_rev_3:
+	case e1000_82546_rev_3:
+		break;
+	default:
+		return E1000_SUCCESS;
+	}
+
+	ret_val = e1000_read_eeprom(hw, EEPROM_SERDES_AMPLITUDE, 1,
+				    &eeprom_data);
+	if (ret_val)
+		return ret_val;
+
+	if (eeprom_data != EEPROM_RESERVED_WORD) {
+		/* Adjust SERDES output amplitude only. */
+		eeprom_data &= EEPROM_SERDES_AMPLITUDE_MASK;
+		ret_val =
+		    e1000_write_phy_reg(hw, M88E1000_PHY_EXT_CTRL, eeprom_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_setup_link - Configures flow control and link settings.
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Determines which flow control settings to use. Calls the appropriate media-
+ * specific link configuration function. Configures the flow control settings.
+ * Assuming the adapter has a valid link partner, a valid link should be
+ * established. Assumes the hardware has previously been reset and the
+ * transmitter and receiver are not enabled.
+ */
+s32 e1000_setup_link(struct e1000_hw *hw)
+{
+	u32 ctrl_ext;
+	s32 ret_val;
+	u16 eeprom_data;
+
+	/* Read and store word 0x0F of the EEPROM. This word contains bits
+	 * that determine the hardware's default PAUSE (flow control) mode,
+	 * a bit that determines whether the HW defaults to enabling or
+	 * disabling auto-negotiation, and the direction of the
+	 * SW defined pins. If there is no SW over-ride of the flow
+	 * control setting, then the variable hw->fc will
+	 * be initialized based on a value in the EEPROM.
+	 */
+	if (hw->fc == E1000_FC_DEFAULT) {
+		ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG,
+					    1, &eeprom_data);
+		if (ret_val) {
+			e_dbg("EEPROM Read Error\n");
+			return -E1000_ERR_EEPROM;
+		}
+		if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) == 0)
+			hw->fc = E1000_FC_NONE;
+		else if ((eeprom_data & EEPROM_WORD0F_PAUSE_MASK) ==
+			 EEPROM_WORD0F_ASM_DIR)
+			hw->fc = E1000_FC_TX_PAUSE;
+		else
+			hw->fc = E1000_FC_FULL;
+	}
+
+	/* We want to save off the original Flow Control configuration just
+	 * in case we get disconnected and then reconnected into a different
+	 * hub or switch with different Flow Control capabilities.
+	 */
+	if (hw->mac_type == e1000_82542_rev2_0)
+		hw->fc &= (~E1000_FC_TX_PAUSE);
+
+	if ((hw->mac_type < e1000_82543) && (hw->report_tx_early == 1))
+		hw->fc &= (~E1000_FC_RX_PAUSE);
+
+	hw->original_fc = hw->fc;
+
+	e_dbg("After fix-ups FlowControl is now = %x\n", hw->fc);
+
+	/* Take the 4 bits from EEPROM word 0x0F that determine the initial
+	 * polarity value for the SW controlled pins, and setup the
+	 * Extended Device Control reg with that info.
+	 * This is needed because one of the SW controlled pins is used for
+	 * signal detection.  So this should be done before e1000_setup_pcs_link()
+	 * or e1000_phy_setup() is called.
+	 */
+	if (hw->mac_type == e1000_82543) {
+		ret_val = e1000_read_eeprom(hw, EEPROM_INIT_CONTROL2_REG,
+					    1, &eeprom_data);
+		if (ret_val) {
+			e_dbg("EEPROM Read Error\n");
+			return -E1000_ERR_EEPROM;
+		}
+		ctrl_ext = ((eeprom_data & EEPROM_WORD0F_SWPDIO_EXT) <<
+			    SWDPIO__EXT_SHIFT);
+		ew32(CTRL_EXT, ctrl_ext);
+	}
+
+	/* Call the necessary subroutine to configure the link. */
+	ret_val = (hw->media_type == e1000_media_type_copper) ?
+	    e1000_setup_copper_link(hw) : e1000_setup_fiber_serdes_link(hw);
+
+	/* Initialize the flow control address, type, and PAUSE timer
+	 * registers to their default values.  This is done even if flow
+	 * control is disabled, because it does not hurt anything to
+	 * initialize these registers.
+	 */
+	e_dbg("Initializing the Flow Control address, type and timer regs\n");
+
+	ew32(FCT, FLOW_CONTROL_TYPE);
+	ew32(FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+	ew32(FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+	ew32(FCTTV, hw->fc_pause_time);
+
+	/* Set the flow control receive threshold registers.  Normally,
+	 * these registers will be set to a default threshold that may be
+	 * adjusted later by the driver's runtime code.  However, if the
+	 * ability to transmit pause frames in not enabled, then these
+	 * registers will be set to 0.
+	 */
+	if (!(hw->fc & E1000_FC_TX_PAUSE)) {
+		ew32(FCRTL, 0);
+		ew32(FCRTH, 0);
+	} else {
+		/* We need to set up the Receive Threshold high and low water
+		 * marks as well as (optionally) enabling the transmission of
+		 * XON frames.
+		 */
+		if (hw->fc_send_xon) {
+			ew32(FCRTL, (hw->fc_low_water | E1000_FCRTL_XONE));
+			ew32(FCRTH, hw->fc_high_water);
+		} else {
+			ew32(FCRTL, hw->fc_low_water);
+			ew32(FCRTH, hw->fc_high_water);
+		}
+	}
+	return ret_val;
+}
+
+/**
+ * e1000_setup_fiber_serdes_link - prepare fiber or serdes link
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Manipulates Physical Coding Sublayer functions in order to configure
+ * link. Assumes the hardware has been previously reset and the transmitter
+ * and receiver are not enabled.
+ */
+static s32 e1000_setup_fiber_serdes_link(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	u32 status;
+	u32 txcw = 0;
+	u32 i;
+	u32 signal = 0;
+	s32 ret_val;
+
+	/* On adapters with a MAC newer than 82544, SWDP 1 will be
+	 * set when the optics detect a signal. On older adapters, it will be
+	 * cleared when there is a signal.  This applies to fiber media only.
+	 * If we're on serdes media, adjust the output amplitude to value
+	 * set in the EEPROM.
+	 */
+	ctrl = er32(CTRL);
+	if (hw->media_type == e1000_media_type_fiber)
+		signal = (hw->mac_type > e1000_82544) ? E1000_CTRL_SWDPIN1 : 0;
+
+	ret_val = e1000_adjust_serdes_amplitude(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Take the link out of reset */
+	ctrl &= ~(E1000_CTRL_LRST);
+
+	/* Adjust VCO speed to improve BER performance */
+	ret_val = e1000_set_vco_speed(hw);
+	if (ret_val)
+		return ret_val;
+
+	e1000_config_collision_dist(hw);
+
+	/* Check for a software override of the flow control settings, and setup
+	 * the device accordingly.  If auto-negotiation is enabled, then
+	 * software will have to set the "PAUSE" bits to the correct value in
+	 * the Tranmsit Config Word Register (TXCW) and re-start
+	 * auto-negotiation.  However, if auto-negotiation is disabled, then
+	 * software will have to manually configure the two flow control enable
+	 * bits in the CTRL register.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *  0:  Flow control is completely disabled
+	 *  1:  Rx flow control is enabled (we can receive pause frames, but
+	 *      not send pause frames).
+	 *  2:  Tx flow control is enabled (we can send pause frames but we do
+	 *      not support receiving pause frames).
+	 *  3:  Both Rx and TX flow control (symmetric) are enabled.
+	 */
+	switch (hw->fc) {
+	case E1000_FC_NONE:
+		/* Flow ctrl is completely disabled by a software over-ride */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
+		break;
+	case E1000_FC_RX_PAUSE:
+		/* Rx Flow control is enabled and Tx Flow control is disabled by
+		 * a software over-ride. Since there really isn't a way to
+		 * advertise that we are capable of Rx Pause ONLY, we will
+		 * advertise that we support both symmetric and asymmetric Rx
+		 * PAUSE. Later, we will disable the adapter's ability to send
+		 * PAUSE frames.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+		break;
+	case E1000_FC_TX_PAUSE:
+		/* Tx Flow control is enabled, and Rx Flow control is disabled,
+		 * by a software over-ride.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
+		break;
+	case E1000_FC_FULL:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+		break;
+	default:
+		e_dbg("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	/* Since auto-negotiation is enabled, take the link out of reset (the
+	 * link will be in reset, because we previously reset the chip). This
+	 * will restart auto-negotiation.  If auto-negotiation is successful
+	 * then the link-up status bit will be set and the flow control enable
+	 * bits (RFCE and TFCE) will be set according to their negotiated value.
+	 */
+	e_dbg("Auto-negotiation enabled\n");
+
+	ew32(TXCW, txcw);
+	ew32(CTRL, ctrl);
+	E1000_WRITE_FLUSH();
+
+	hw->txcw = txcw;
+	msleep(1);
+
+	/* If we have a signal (the cable is plugged in) then poll for a
+	 * "Link-Up" indication in the Device Status Register.  Time-out if a
+	 * link isn't seen in 500 milliseconds seconds (Auto-negotiation should
+	 * complete in less than 500 milliseconds even if the other end is doing
+	 * it in SW). For internal serdes, we just assume a signal is present,
+	 * then poll.
+	 */
+	if (hw->media_type == e1000_media_type_internal_serdes ||
+	    (er32(CTRL) & E1000_CTRL_SWDPIN1) == signal) {
+		e_dbg("Looking for Link\n");
+		for (i = 0; i < (LINK_UP_TIMEOUT / 10); i++) {
+			msleep(10);
+			status = er32(STATUS);
+			if (status & E1000_STATUS_LU)
+				break;
+		}
+		if (i == (LINK_UP_TIMEOUT / 10)) {
+			e_dbg("Never got a valid link from auto-neg!!!\n");
+			hw->autoneg_failed = 1;
+			/* AutoNeg failed to achieve a link, so we'll call
+			 * e1000_check_for_link. This routine will force the
+			 * link up if we detect a signal. This will allow us to
+			 * communicate with non-autonegotiating link partners.
+			 */
+			ret_val = e1000_check_for_link(hw);
+			if (ret_val) {
+				e_dbg("Error while checking for link\n");
+				return ret_val;
+			}
+			hw->autoneg_failed = 0;
+		} else {
+			hw->autoneg_failed = 0;
+			e_dbg("Valid Link Found\n");
+		}
+	} else {
+		e_dbg("No Signal Detected\n");
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_rtl_setup - Copper link setup for e1000_phy_rtl series.
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Commits changes to PHY configuration by calling e1000_phy_reset().
+ */
+static s32 e1000_copper_link_rtl_setup(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	/* SW reset the PHY so all changes take effect */
+	ret_val = e1000_phy_reset(hw);
+	if (ret_val) {
+		e_dbg("Error Resetting the PHY\n");
+		return ret_val;
+	}
+
+	return E1000_SUCCESS;
+}
+
+static s32 gbe_dhg_phy_setup(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u32 ctrl_aux;
+
+	switch (hw->phy_type) {
+	case e1000_phy_8211:
+		ret_val = e1000_copper_link_rtl_setup(hw);
+		if (ret_val) {
+			e_dbg("e1000_copper_link_rtl_setup failed!\n");
+			return ret_val;
+		}
+		break;
+	case e1000_phy_8201:
+		/* Set RMII mode */
+		ctrl_aux = er32(CTL_AUX);
+		ctrl_aux |= E1000_CTL_AUX_RMII;
+		ew32(CTL_AUX, ctrl_aux);
+		E1000_WRITE_FLUSH();
+
+		/* Disable the J/K bits required for receive */
+		ctrl_aux = er32(CTL_AUX);
+		ctrl_aux |= 0x4;
+		ctrl_aux &= ~0x2;
+		ew32(CTL_AUX, ctrl_aux);
+		E1000_WRITE_FLUSH();
+		ret_val = e1000_copper_link_rtl_setup(hw);
+
+		if (ret_val) {
+			e_dbg("e1000_copper_link_rtl_setup failed!\n");
+			return ret_val;
+		}
+		break;
+	default:
+		e_dbg("Error Resetting the PHY\n");
+		return E1000_ERR_PHY_TYPE;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_preconfig - early configuration for copper
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Make sure we have a valid PHY and change PHY mode before link setup.
+ */
+static s32 e1000_copper_link_preconfig(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+	u16 phy_data;
+
+	ctrl = er32(CTRL);
+	/* With 82543, we need to force speed and duplex on the MAC equal to
+	 * what the PHY speed and duplex configuration is. In addition, we need
+	 * to perform a hardware reset on the PHY to take it out of reset.
+	 */
+	if (hw->mac_type > e1000_82543) {
+		ctrl |= E1000_CTRL_SLU;
+		ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+		ew32(CTRL, ctrl);
+	} else {
+		ctrl |=
+		    (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX | E1000_CTRL_SLU);
+		ew32(CTRL, ctrl);
+		ret_val = e1000_phy_hw_reset(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Make sure we have a valid PHY */
+	ret_val = e1000_detect_gig_phy(hw);
+	if (ret_val) {
+		e_dbg("Error, did not detect valid phy.\n");
+		return ret_val;
+	}
+	e_dbg("Phy ID = %x\n", hw->phy_id);
+
+	/* Set PHY to class A mode (if necessary) */
+	ret_val = e1000_set_phy_mode(hw);
+	if (ret_val)
+		return ret_val;
+
+	if ((hw->mac_type == e1000_82545_rev_3) ||
+	    (hw->mac_type == e1000_82546_rev_3)) {
+		ret_val =
+		    e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+		phy_data |= 0x00000008;
+		ret_val =
+		    e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	}
+
+	if (hw->mac_type <= e1000_82543 ||
+	    hw->mac_type == e1000_82541 || hw->mac_type == e1000_82547 ||
+	    hw->mac_type == e1000_82541_rev_2 ||
+	    hw->mac_type == e1000_82547_rev_2)
+		hw->phy_reset_disable = false;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_igp_setup - Copper link setup for e1000_phy_igp series.
+ * @hw: Struct containing variables accessed by shared code
+ */
+static s32 e1000_copper_link_igp_setup(struct e1000_hw *hw)
+{
+	u32 led_ctrl;
+	s32 ret_val;
+	u16 phy_data;
+
+	if (hw->phy_reset_disable)
+		return E1000_SUCCESS;
+
+	ret_val = e1000_phy_reset(hw);
+	if (ret_val) {
+		e_dbg("Error Resetting the PHY\n");
+		return ret_val;
+	}
+
+	/* Wait 15ms for MAC to configure PHY from eeprom settings */
+	msleep(15);
+	/* Configure activity LED after PHY reset */
+	led_ctrl = er32(LEDCTL);
+	led_ctrl &= IGP_ACTIVITY_LED_MASK;
+	led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
+	ew32(LEDCTL, led_ctrl);
+
+	/* The NVM settings will configure LPLU in D3 for IGP2 and IGP3 PHYs */
+	if (hw->phy_type == e1000_phy_igp) {
+		/* disable lplu d3 during driver init */
+		ret_val = e1000_set_d3_lplu_state(hw, false);
+		if (ret_val) {
+			e_dbg("Error Disabling LPLU D3\n");
+			return ret_val;
+		}
+	}
+
+	/* Configure mdi-mdix settings */
+	ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
+		hw->dsp_config_state = e1000_dsp_config_disabled;
+		/* Force MDI for earlier revs of the IGP PHY */
+		phy_data &=
+		    ~(IGP01E1000_PSCR_AUTO_MDIX |
+		      IGP01E1000_PSCR_FORCE_MDI_MDIX);
+		hw->mdix = 1;
+
+	} else {
+		hw->dsp_config_state = e1000_dsp_config_enabled;
+		phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+
+		switch (hw->mdix) {
+		case 1:
+			phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+			break;
+		case 2:
+			phy_data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
+			break;
+		case 0:
+		default:
+			phy_data |= IGP01E1000_PSCR_AUTO_MDIX;
+			break;
+		}
+	}
+	ret_val = e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* set auto-master slave resolution settings */
+	if (hw->autoneg) {
+		e1000_ms_type phy_ms_setting = hw->master_slave;
+
+		if (hw->ffe_config_state == e1000_ffe_config_active)
+			hw->ffe_config_state = e1000_ffe_config_enabled;
+
+		if (hw->dsp_config_state == e1000_dsp_config_activated)
+			hw->dsp_config_state = e1000_dsp_config_enabled;
+
+		/* when autonegotiation advertisement is only 1000Mbps then we
+		 * should disable SmartSpeed and enable Auto MasterSlave
+		 * resolution as hardware default.
+		 */
+		if (hw->autoneg_advertised == ADVERTISE_1000_FULL) {
+			/* Disable SmartSpeed */
+			ret_val =
+			    e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					       &phy_data);
+			if (ret_val)
+				return ret_val;
+			phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val =
+			    e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+						phy_data);
+			if (ret_val)
+				return ret_val;
+			/* Set auto Master/Slave resolution process */
+			ret_val =
+			    e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
+			if (ret_val)
+				return ret_val;
+			phy_data &= ~CR_1000T_MS_ENABLE;
+			ret_val =
+			    e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
+			if (ret_val)
+				return ret_val;
+		}
+
+		ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* load defaults for future use */
+		hw->original_master_slave = (phy_data & CR_1000T_MS_ENABLE) ?
+		    ((phy_data & CR_1000T_MS_VALUE) ?
+		     e1000_ms_force_master :
+		     e1000_ms_force_slave) : e1000_ms_auto;
+
+		switch (phy_ms_setting) {
+		case e1000_ms_force_master:
+			phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
+			break;
+		case e1000_ms_force_slave:
+			phy_data |= CR_1000T_MS_ENABLE;
+			phy_data &= ~(CR_1000T_MS_VALUE);
+			break;
+		case e1000_ms_auto:
+			phy_data &= ~CR_1000T_MS_ENABLE;
+			break;
+		default:
+			break;
+		}
+		ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_mgp_setup - Copper link setup for e1000_phy_m88 series.
+ * @hw: Struct containing variables accessed by shared code
+ */
+static s32 e1000_copper_link_mgp_setup(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	if (hw->phy_reset_disable)
+		return E1000_SUCCESS;
+
+	/* Enable CRS on TX. This must be set for half-duplex operation. */
+	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+	/* Options:
+	 *   MDI/MDI-X = 0 (default)
+	 *   0 - Auto for all speeds
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+	 */
+	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+	switch (hw->mdix) {
+	case 1:
+		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+		break;
+	case 2:
+		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+		break;
+	case 3:
+		phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+		break;
+	case 0:
+	default:
+		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+		break;
+	}
+
+	/* Options:
+	 *   disable_polarity_correction = 0 (default)
+	 *       Automatic Correction for Reversed Cable Polarity
+	 *   0 - Disabled
+	 *   1 - Enabled
+	 */
+	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+	if (hw->disable_polarity_correction == 1)
+		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	if (hw->phy_revision < M88E1011_I_REV_4) {
+		/* Force TX_CLK in the Extended PHY Specific Control Register
+		 * to 25MHz clock.
+		 */
+		ret_val =
+		    e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+				       &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data |= M88E1000_EPSCR_TX_CLK_25;
+
+		if ((hw->phy_revision == E1000_REVISION_2) &&
+		    (hw->phy_id == M88E1111_I_PHY_ID)) {
+			/* Vidalia Phy, set the downshift counter to 5x */
+			phy_data &= ~(M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK);
+			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
+			ret_val = e1000_write_phy_reg(hw,
+						      M88E1000_EXT_PHY_SPEC_CTRL,
+						      phy_data);
+			if (ret_val)
+				return ret_val;
+		} else {
+			/* Configure Master and Slave downshift values */
+			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+				      M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+			ret_val = e1000_write_phy_reg(hw,
+						      M88E1000_EXT_PHY_SPEC_CTRL,
+						      phy_data);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+
+	/* SW Reset the PHY so all changes take effect */
+	ret_val = e1000_phy_reset(hw);
+	if (ret_val) {
+		e_dbg("Error Resetting the PHY\n");
+		return ret_val;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_autoneg - setup auto-neg
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Setup auto-negotiation and flow control advertisements,
+ * and then perform auto-negotiation.
+ */
+static s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	/* Perform some bounds checking on the hw->autoneg_advertised
+	 * parameter.  If this variable is zero, then set it to the default.
+	 */
+	hw->autoneg_advertised &= AUTONEG_ADVERTISE_SPEED_DEFAULT;
+
+	/* If autoneg_advertised is zero, we assume it was not defaulted
+	 * by the calling code so we set to advertise full capability.
+	 */
+	if (hw->autoneg_advertised == 0)
+		hw->autoneg_advertised = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+
+	/* IFE/RTL8201N PHY only supports 10/100 */
+	if (hw->phy_type == e1000_phy_8201)
+		hw->autoneg_advertised &= AUTONEG_ADVERTISE_10_100_ALL;
+
+	e_dbg("Reconfiguring auto-neg advertisement params\n");
+	ret_val = e1000_phy_setup_autoneg(hw);
+	if (ret_val) {
+		e_dbg("Error Setting up Auto-Negotiation\n");
+		return ret_val;
+	}
+	e_dbg("Restarting Auto-Neg\n");
+
+	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
+	 * the Auto Neg Restart bit in the PHY control register.
+	 */
+	ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+	ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Does the user want to wait for Auto-Neg to complete here, or
+	 * check at a later time (for example, callback routine).
+	 */
+	if (hw->wait_autoneg_complete) {
+		ret_val = e1000_wait_autoneg(hw);
+		if (ret_val) {
+			e_dbg
+			    ("Error while waiting for autoneg to complete\n");
+			return ret_val;
+		}
+	}
+
+	hw->get_link_status = true;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_copper_link_postconfig - post link setup
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Config the MAC and the PHY after link is up.
+ *   1) Set up the MAC to the current PHY speed/duplex
+ *      if we are on 82543.  If we
+ *      are on newer silicon, we only need to configure
+ *      collision distance in the Transmit Control Register.
+ *   2) Set up flow control on the MAC to that established with
+ *      the link partner.
+ *   3) Config DSP to improve Gigabit link quality for some PHY revisions.
+ */
+static s32 e1000_copper_link_postconfig(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	if ((hw->mac_type >= e1000_82544) && (hw->mac_type != e1000_ce4100)) {
+		e1000_config_collision_dist(hw);
+	} else {
+		ret_val = e1000_config_mac_to_phy(hw);
+		if (ret_val) {
+			e_dbg("Error configuring MAC to PHY settings\n");
+			return ret_val;
+		}
+	}
+	ret_val = e1000_config_fc_after_link_up(hw);
+	if (ret_val) {
+		e_dbg("Error Configuring Flow Control\n");
+		return ret_val;
+	}
+
+	/* Config DSP to improve Giga link quality */
+	if (hw->phy_type == e1000_phy_igp) {
+		ret_val = e1000_config_dsp_after_link_change(hw, true);
+		if (ret_val) {
+			e_dbg("Error Configuring DSP after link up\n");
+			return ret_val;
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_setup_copper_link - phy/speed/duplex setting
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Detects which PHY is present and sets up the speed and duplex
+ */
+static s32 e1000_setup_copper_link(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 i;
+	u16 phy_data;
+
+	/* Check if it is a valid PHY and set PHY mode if necessary. */
+	ret_val = e1000_copper_link_preconfig(hw);
+	if (ret_val)
+		return ret_val;
+
+	if (hw->phy_type == e1000_phy_igp) {
+		ret_val = e1000_copper_link_igp_setup(hw);
+		if (ret_val)
+			return ret_val;
+	} else if (hw->phy_type == e1000_phy_m88) {
+		ret_val = e1000_copper_link_mgp_setup(hw);
+		if (ret_val)
+			return ret_val;
+	} else {
+		ret_val = gbe_dhg_phy_setup(hw);
+		if (ret_val) {
+			e_dbg("gbe_dhg_phy_setup failed!\n");
+			return ret_val;
+		}
+	}
+
+	if (hw->autoneg) {
+		/* Setup autoneg and flow control advertisement
+		 * and perform autonegotiation
+		 */
+		ret_val = e1000_copper_link_autoneg(hw);
+		if (ret_val)
+			return ret_val;
+	} else {
+		/* PHY will be set to 10H, 10F, 100H,or 100F
+		 * depending on value from forced_speed_duplex.
+		 */
+		e_dbg("Forcing speed and duplex\n");
+		ret_val = e1000_phy_force_speed_duplex(hw);
+		if (ret_val) {
+			e_dbg("Error Forcing Speed and Duplex\n");
+			return ret_val;
+		}
+	}
+
+	/* Check link status. Wait up to 100 microseconds for link to become
+	 * valid.
+	 */
+	for (i = 0; i < 10; i++) {
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		if (phy_data & MII_SR_LINK_STATUS) {
+			/* Config the MAC and PHY after link is up */
+			ret_val = e1000_copper_link_postconfig(hw);
+			if (ret_val)
+				return ret_val;
+
+			e_dbg("Valid link established!!!\n");
+			return E1000_SUCCESS;
+		}
+		udelay(10);
+	}
+
+	e_dbg("Unable to establish link!!!\n");
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_setup_autoneg - phy settings
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Configures PHY autoneg and flow control advertisement settings
+ */
+s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 mii_autoneg_adv_reg;
+	u16 mii_1000t_ctrl_reg;
+
+	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
+	ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	/* Read the MII 1000Base-T Control Register (Address 9). */
+	ret_val = e1000_read_phy_reg(hw, PHY_1000T_CTRL, &mii_1000t_ctrl_reg);
+	if (ret_val)
+		return ret_val;
+	else if (hw->phy_type == e1000_phy_8201)
+		mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
+
+	/* Need to parse both autoneg_advertised and fc and set up
+	 * the appropriate PHY registers.  First we will parse for
+	 * autoneg_advertised software override.  Since we can advertise
+	 * a plethora of combinations, we need to check each bit
+	 * individually.
+	 */
+
+	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
+	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
+	 * the  1000Base-T Control Register (Address 9).
+	 */
+	mii_autoneg_adv_reg &= ~REG4_SPEED_MASK;
+	mii_1000t_ctrl_reg &= ~REG9_SPEED_MASK;
+
+	e_dbg("autoneg_advertised %x\n", hw->autoneg_advertised);
+
+	/* Do we want to advertise 10 Mb Half Duplex? */
+	if (hw->autoneg_advertised & ADVERTISE_10_HALF) {
+		e_dbg("Advertise 10mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+	}
+
+	/* Do we want to advertise 10 Mb Full Duplex? */
+	if (hw->autoneg_advertised & ADVERTISE_10_FULL) {
+		e_dbg("Advertise 10mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Half Duplex? */
+	if (hw->autoneg_advertised & ADVERTISE_100_HALF) {
+		e_dbg("Advertise 100mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Full Duplex? */
+	if (hw->autoneg_advertised & ADVERTISE_100_FULL) {
+		e_dbg("Advertise 100mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+	if (hw->autoneg_advertised & ADVERTISE_1000_HALF) {
+		e_dbg
+		    ("Advertise 1000mb Half duplex requested, request denied!\n");
+	}
+
+	/* Do we want to advertise 1000 Mb Full Duplex? */
+	if (hw->autoneg_advertised & ADVERTISE_1000_FULL) {
+		e_dbg("Advertise 1000mb Full duplex\n");
+		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+	}
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the PHY advertisement registers accordingly.  If
+	 * auto-negotiation is enabled, then software will have to set the
+	 * "PAUSE" bits to the correct value in the Auto-Negotiation
+	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start
+	 * auto-negotiation.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not support receiving pause frames).
+	 *      3:  Both Rx and TX flow control (symmetric) are enabled.
+	 *  other:  No software override.  The flow control configuration
+	 *          in the EEPROM is used.
+	 */
+	switch (hw->fc) {
+	case E1000_FC_NONE:	/* 0 */
+		/* Flow control (RX & TX) is completely disabled by a
+		 * software over-ride.
+		 */
+		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case E1000_FC_RX_PAUSE:	/* 1 */
+		/* RX Flow control is enabled, and TX Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		/* Since there really isn't a way to advertise that we are
+		 * capable of RX Pause ONLY, we will advertise that we
+		 * support both symmetric and asymmetric RX PAUSE.  Later
+		 * (in e1000_config_fc_after_link_up) we will disable the
+		 * hw's ability to send PAUSE frames.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case E1000_FC_TX_PAUSE:	/* 2 */
+		/* TX Flow control is enabled, and RX Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+		break;
+	case E1000_FC_FULL:	/* 3 */
+		/* Flow control (both RX and TX) is enabled by a software
+		 * over-ride.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	default:
+		e_dbg("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = e1000_write_phy_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	e_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+	if (hw->phy_type == e1000_phy_8201) {
+		mii_1000t_ctrl_reg = 0;
+	} else {
+		ret_val = e1000_write_phy_reg(hw, PHY_1000T_CTRL,
+					      mii_1000t_ctrl_reg);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_force_speed_duplex - force link settings
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Force PHY speed and duplex settings to hw->forced_speed_duplex
+ */
+static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+	u16 mii_ctrl_reg;
+	u16 mii_status_reg;
+	u16 phy_data;
+	u16 i;
+
+	/* Turn off Flow control if we are forcing speed and duplex. */
+	hw->fc = E1000_FC_NONE;
+
+	e_dbg("hw->fc = %d\n", hw->fc);
+
+	/* Read the Device Control Register. */
+	ctrl = er32(CTRL);
+
+	/* Set the bits to Force Speed and Duplex in the Device Ctrl Reg. */
+	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ctrl &= ~(DEVICE_SPEED_MASK);
+
+	/* Clear the Auto Speed Detect Enable bit. */
+	ctrl &= ~E1000_CTRL_ASDE;
+
+	/* Read the MII Control Register. */
+	ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &mii_ctrl_reg);
+	if (ret_val)
+		return ret_val;
+
+	/* We need to disable autoneg in order to force link and duplex. */
+
+	mii_ctrl_reg &= ~MII_CR_AUTO_NEG_EN;
+
+	/* Are we forcing Full or Half Duplex? */
+	if (hw->forced_speed_duplex == e1000_100_full ||
+	    hw->forced_speed_duplex == e1000_10_full) {
+		/* We want to force full duplex so we SET the full duplex bits
+		 * in the Device and MII Control Registers.
+		 */
+		ctrl |= E1000_CTRL_FD;
+		mii_ctrl_reg |= MII_CR_FULL_DUPLEX;
+		e_dbg("Full Duplex\n");
+	} else {
+		/* We want to force half duplex so we CLEAR the full duplex bits
+		 * in the Device and MII Control Registers.
+		 */
+		ctrl &= ~E1000_CTRL_FD;
+		mii_ctrl_reg &= ~MII_CR_FULL_DUPLEX;
+		e_dbg("Half Duplex\n");
+	}
+
+	/* Are we forcing 100Mbps??? */
+	if (hw->forced_speed_duplex == e1000_100_full ||
+	    hw->forced_speed_duplex == e1000_100_half) {
+		/* Set the 100Mb bit and turn off the 1000Mb and 10Mb bits. */
+		ctrl |= E1000_CTRL_SPD_100;
+		mii_ctrl_reg |= MII_CR_SPEED_100;
+		mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
+		e_dbg("Forcing 100mb ");
+	} else {
+		/* Set the 10Mb bit and turn off the 1000Mb and 100Mb bits. */
+		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+		mii_ctrl_reg |= MII_CR_SPEED_10;
+		mii_ctrl_reg &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
+		e_dbg("Forcing 10mb ");
+	}
+
+	e1000_config_collision_dist(hw);
+
+	/* Write the configured values back to the Device Control Reg. */
+	ew32(CTRL, ctrl);
+
+	if (hw->phy_type == e1000_phy_m88) {
+		ret_val =
+		    e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* Clear Auto-Crossover to force MDI manually. M88E1000 requires
+		 * MDI forced whenever speed are duplex are forced.
+		 */
+		phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+		ret_val =
+		    e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+
+		e_dbg("M88E1000 PSCR: %x\n", phy_data);
+
+		/* Need to reset the PHY or these changes will be ignored */
+		mii_ctrl_reg |= MII_CR_RESET;
+
+		/* Disable MDI-X support for 10/100 */
+	} else {
+		/* Clear Auto-Crossover to force MDI manually.  IGP requires MDI
+		 * forced whenever speed or duplex are forced.
+		 */
+		ret_val =
+		    e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+		phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+
+		ret_val =
+		    e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Write back the modified PHY MII control register. */
+	ret_val = e1000_write_phy_reg(hw, PHY_CTRL, mii_ctrl_reg);
+	if (ret_val)
+		return ret_val;
+
+	udelay(1);
+
+	/* The wait_autoneg_complete flag may be a little misleading here.
+	 * Since we are forcing speed and duplex, Auto-Neg is not enabled.
+	 * But we do want to delay for a period while forcing only so we
+	 * don't generate false No Link messages.  So we will wait here
+	 * only if the user has set wait_autoneg_complete to 1, which is
+	 * the default.
+	 */
+	if (hw->wait_autoneg_complete) {
+		/* We will wait for autoneg to complete. */
+		e_dbg("Waiting for forced speed/duplex link.\n");
+		mii_status_reg = 0;
+
+		/* Wait for autoneg to complete or 4.5 seconds to expire */
+		for (i = PHY_FORCE_TIME; i > 0; i--) {
+			/* Read the MII Status Register and wait for Auto-Neg
+			 * Complete bit to be set.
+			 */
+			ret_val =
+			    e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+			if (ret_val)
+				return ret_val;
+
+			ret_val =
+			    e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+			if (ret_val)
+				return ret_val;
+
+			if (mii_status_reg & MII_SR_LINK_STATUS)
+				break;
+			msleep(100);
+		}
+		if ((i == 0) && (hw->phy_type == e1000_phy_m88)) {
+			/* We didn't get link.  Reset the DSP and wait again
+			 * for link.
+			 */
+			ret_val = e1000_phy_reset_dsp(hw);
+			if (ret_val) {
+				e_dbg("Error Resetting PHY DSP\n");
+				return ret_val;
+			}
+		}
+		/* This loop will early-out if the link condition has been
+		 * met
+		 */
+		for (i = PHY_FORCE_TIME; i > 0; i--) {
+			if (mii_status_reg & MII_SR_LINK_STATUS)
+				break;
+			msleep(100);
+			/* Read the MII Status Register and wait for Auto-Neg
+			 * Complete bit to be set.
+			 */
+			ret_val =
+			    e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+			if (ret_val)
+				return ret_val;
+
+			ret_val =
+			    e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+
+	if (hw->phy_type == e1000_phy_m88) {
+		/* Because we reset the PHY above, we need to re-force TX_CLK in
+		 * the Extended PHY Specific Control Register to 25MHz clock.
+		 * This value defaults back to a 2.5MHz clock when the PHY is
+		 * reset.
+		 */
+		ret_val =
+		    e1000_read_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+				       &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data |= M88E1000_EPSCR_TX_CLK_25;
+		ret_val =
+		    e1000_write_phy_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+					phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* In addition, because of the s/w reset above, we need to
+		 * enable CRS on Tx.  This must be set for both full and half
+		 * duplex operation.
+		 */
+		ret_val =
+		    e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+		ret_val =
+		    e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+
+		if ((hw->mac_type == e1000_82544 ||
+		     hw->mac_type == e1000_82543) &&
+		    (!hw->autoneg) &&
+		    (hw->forced_speed_duplex == e1000_10_full ||
+		     hw->forced_speed_duplex == e1000_10_half)) {
+			ret_val = e1000_polarity_reversal_workaround(hw);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_config_collision_dist - set collision distance register
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Sets the collision distance in the Transmit Control register.
+ * Link should have been established previously. Reads the speed and duplex
+ * information from the Device Status register.
+ */
+void e1000_config_collision_dist(struct e1000_hw *hw)
+{
+	u32 tctl, coll_dist;
+
+	if (hw->mac_type < e1000_82543)
+		coll_dist = E1000_COLLISION_DISTANCE_82542;
+	else
+		coll_dist = E1000_COLLISION_DISTANCE;
+
+	tctl = er32(TCTL);
+
+	tctl &= ~E1000_TCTL_COLD;
+	tctl |= coll_dist << E1000_COLD_SHIFT;
+
+	ew32(TCTL, tctl);
+	E1000_WRITE_FLUSH();
+}
+
+/**
+ * e1000_config_mac_to_phy - sync phy and mac settings
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Sets MAC speed and duplex settings to reflect the those in the PHY
+ * The contents of the PHY register containing the needed information need to
+ * be passed in.
+ */
+static s32 e1000_config_mac_to_phy(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+	u16 phy_data;
+
+	/* 82544 or newer MAC, Auto Speed Detection takes care of
+	 * MAC speed/duplex configuration.
+	 */
+	if ((hw->mac_type >= e1000_82544) && (hw->mac_type != e1000_ce4100))
+		return E1000_SUCCESS;
+
+	/* Read the Device Control Register and set the bits to Force Speed
+	 * and Duplex.
+	 */
+	ctrl = er32(CTRL);
+	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ctrl &= ~(E1000_CTRL_SPD_SEL | E1000_CTRL_ILOS);
+
+	switch (hw->phy_type) {
+	case e1000_phy_8201:
+		ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		if (phy_data & RTL_PHY_CTRL_FD)
+			ctrl |= E1000_CTRL_FD;
+		else
+			ctrl &= ~E1000_CTRL_FD;
+
+		if (phy_data & RTL_PHY_CTRL_SPD_100)
+			ctrl |= E1000_CTRL_SPD_100;
+		else
+			ctrl |= E1000_CTRL_SPD_10;
+
+		e1000_config_collision_dist(hw);
+		break;
+	default:
+		/* Set up duplex in the Device Control and Transmit Control
+		 * registers depending on negotiated values.
+		 */
+		ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+					     &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		if (phy_data & M88E1000_PSSR_DPLX)
+			ctrl |= E1000_CTRL_FD;
+		else
+			ctrl &= ~E1000_CTRL_FD;
+
+		e1000_config_collision_dist(hw);
+
+		/* Set up speed in the Device Control register depending on
+		 * negotiated values.
+		 */
+		if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS)
+			ctrl |= E1000_CTRL_SPD_1000;
+		else if ((phy_data & M88E1000_PSSR_SPEED) ==
+			 M88E1000_PSSR_100MBS)
+			ctrl |= E1000_CTRL_SPD_100;
+	}
+
+	/* Write the configured values back to the Device Control Reg. */
+	ew32(CTRL, ctrl);
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_force_mac_fc - force flow control settings
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Forces the MAC's flow control settings.
+ * Sets the TFCE and RFCE bits in the device control register to reflect
+ * the adapter settings. TFCE and RFCE need to be explicitly set by
+ * software when a Copper PHY is used because autonegotiation is managed
+ * by the PHY rather than the MAC. Software must also configure these
+ * bits when link is forced on a fiber connection.
+ */
+s32 e1000_force_mac_fc(struct e1000_hw *hw)
+{
+	u32 ctrl;
+
+	/* Get the current configuration of the Device Control Register */
+	ctrl = er32(CTRL);
+
+	/* Because we didn't get link via the internal auto-negotiation
+	 * mechanism (we either forced link or we got link via PHY
+	 * auto-neg), we have to manually enable/disable transmit an
+	 * receive flow control.
+	 *
+	 * The "Case" statement below enables/disable flow control
+	 * according to the "hw->fc" parameter.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause
+	 *          frames but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not receive pause frames).
+	 *      3:  Both Rx and TX flow control (symmetric) is enabled.
+	 *  other:  No other values should be possible at this point.
+	 */
+
+	switch (hw->fc) {
+	case E1000_FC_NONE:
+		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
+		break;
+	case E1000_FC_RX_PAUSE:
+		ctrl &= (~E1000_CTRL_TFCE);
+		ctrl |= E1000_CTRL_RFCE;
+		break;
+	case E1000_FC_TX_PAUSE:
+		ctrl &= (~E1000_CTRL_RFCE);
+		ctrl |= E1000_CTRL_TFCE;
+		break;
+	case E1000_FC_FULL:
+		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
+		break;
+	default:
+		e_dbg("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	/* Disable TX Flow Control for 82542 (rev 2.0) */
+	if (hw->mac_type == e1000_82542_rev2_0)
+		ctrl &= (~E1000_CTRL_TFCE);
+
+	ew32(CTRL, ctrl);
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_config_fc_after_link_up - configure flow control after autoneg
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Configures flow control settings after link is established
+ * Should be called immediately after a valid link has been established.
+ * Forces MAC flow control settings if link was forced. When in MII/GMII mode
+ * and autonegotiation is enabled, the MAC flow control settings will be set
+ * based on the flow control negotiated by the PHY. In TBI mode, the TFCE
+ * and RFCE bits will be automatically set to the negotiated flow control mode.
+ */
+static s32 e1000_config_fc_after_link_up(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 mii_status_reg;
+	u16 mii_nway_adv_reg;
+	u16 mii_nway_lp_ability_reg;
+	u16 speed;
+	u16 duplex;
+
+	/* Check for the case where we have fiber media and auto-neg failed
+	 * so we had to force link.  In this case, we need to force the
+	 * configuration of the MAC to match the "fc" parameter.
+	 */
+	if (((hw->media_type == e1000_media_type_fiber) &&
+	     (hw->autoneg_failed)) ||
+	    ((hw->media_type == e1000_media_type_internal_serdes) &&
+	     (hw->autoneg_failed)) ||
+	    ((hw->media_type == e1000_media_type_copper) &&
+	     (!hw->autoneg))) {
+		ret_val = e1000_force_mac_fc(hw);
+		if (ret_val) {
+			e_dbg("Error forcing flow control settings\n");
+			return ret_val;
+		}
+	}
+
+	/* Check for the case where we have copper media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if ((hw->media_type == e1000_media_type_copper) && hw->autoneg) {
+		/* Read the MII Status Register and check to see if AutoNeg
+		 * has completed.  We read this twice because this reg has
+		 * some "sticky" (latched) bits.
+		 */
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+
+		if (mii_status_reg & MII_SR_AUTONEG_COMPLETE) {
+			/* The AutoNeg process has completed, so we now need to
+			 * read both the Auto Negotiation Advertisement Register
+			 * (Address 4) and the Auto_Negotiation Base Page
+			 * Ability Register (Address 5) to determine how flow
+			 * control was negotiated.
+			 */
+			ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_ADV,
+						     &mii_nway_adv_reg);
+			if (ret_val)
+				return ret_val;
+			ret_val = e1000_read_phy_reg(hw, PHY_LP_ABILITY,
+						     &mii_nway_lp_ability_reg);
+			if (ret_val)
+				return ret_val;
+
+			/* Two bits in the Auto Negotiation Advertisement
+			 * Register (Address 4) and two bits in the Auto
+			 * Negotiation Base Page Ability Register (Address 5)
+			 * determine flow control for both the PHY and the link
+			 * partner.  The following table, taken out of the IEEE
+			 * 802.3ab/D6.0 dated March 25, 1999, describes these
+			 * PAUSE resolution bits and how flow control is
+			 * determined based upon these settings.
+			 * NOTE:  DC = Don't Care
+			 *
+			 *   LOCAL DEVICE  |   LINK PARTNER
+			 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+			 *-------|---------|-------|---------|------------------
+			 *   0   |    0    |  DC   |   DC    | E1000_FC_NONE
+			 *   0   |    1    |   0   |   DC    | E1000_FC_NONE
+			 *   0   |    1    |   1   |    0    | E1000_FC_NONE
+			 *   0   |    1    |   1   |    1    | E1000_FC_TX_PAUSE
+			 *   1   |    0    |   0   |   DC    | E1000_FC_NONE
+			 *   1   |   DC    |   1   |   DC    | E1000_FC_FULL
+			 *   1   |    1    |   0   |    0    | E1000_FC_NONE
+			 *   1   |    1    |   0   |    1    | E1000_FC_RX_PAUSE
+			 *
+			 */
+			/* Are both PAUSE bits set to 1?  If so, this implies
+			 * Symmetric Flow Control is enabled at both ends.  The
+			 * ASM_DIR bits are irrelevant per the spec.
+			 *
+			 * For Symmetric Flow Control:
+			 *
+			 *   LOCAL DEVICE  |   LINK PARTNER
+			 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+			 *-------|---------|-------|---------|------------------
+			 *   1   |   DC    |   1   |   DC    | E1000_FC_FULL
+			 *
+			 */
+			if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+				/* Now we need to check if the user selected Rx
+				 * ONLY of pause frames.  In this case, we had
+				 * to advertise FULL flow control because we
+				 * could not advertise Rx ONLY. Hence, we must
+				 * now check to see if we need to turn OFF the
+				 * TRANSMISSION of PAUSE frames.
+				 */
+				if (hw->original_fc == E1000_FC_FULL) {
+					hw->fc = E1000_FC_FULL;
+					e_dbg("Flow Control = FULL.\n");
+				} else {
+					hw->fc = E1000_FC_RX_PAUSE;
+					e_dbg
+					    ("Flow Control = RX PAUSE frames only.\n");
+				}
+			}
+			/* For receiving PAUSE frames ONLY.
+			 *
+			 *   LOCAL DEVICE  |   LINK PARTNER
+			 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+			 *-------|---------|-------|---------|------------------
+			 *   0   |    1    |   1   |    1    | E1000_FC_TX_PAUSE
+			 *
+			 */
+			else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+				 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+				 (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+				 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+				hw->fc = E1000_FC_TX_PAUSE;
+				e_dbg
+				    ("Flow Control = TX PAUSE frames only.\n");
+			}
+			/* For transmitting PAUSE frames ONLY.
+			 *
+			 *   LOCAL DEVICE  |   LINK PARTNER
+			 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+			 *-------|---------|-------|---------|------------------
+			 *   1   |    1    |   0   |    1    | E1000_FC_RX_PAUSE
+			 *
+			 */
+			else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+				 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+				 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+				 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+				hw->fc = E1000_FC_RX_PAUSE;
+				e_dbg
+				    ("Flow Control = RX PAUSE frames only.\n");
+			}
+			/* Per the IEEE spec, at this point flow control should
+			 * be disabled.  However, we want to consider that we
+			 * could be connected to a legacy switch that doesn't
+			 * advertise desired flow control, but can be forced on
+			 * the link partner.  So if we advertised no flow
+			 * control, that is what we will resolve to.  If we
+			 * advertised some kind of receive capability (Rx Pause
+			 * Only or Full Flow Control) and the link partner
+			 * advertised none, we will configure ourselves to
+			 * enable Rx Flow Control only.  We can do this safely
+			 * for two reasons:  If the link partner really
+			 * didn't want flow control enabled, and we enable Rx,
+			 * no harm done since we won't be receiving any PAUSE
+			 * frames anyway.  If the intent on the link partner was
+			 * to have flow control enabled, then by us enabling Rx
+			 * only, we can at least receive pause frames and
+			 * process them. This is a good idea because in most
+			 * cases, since we are predominantly a server NIC, more
+			 * times than not we will be asked to delay transmission
+			 * of packets than asking our link partner to pause
+			 * transmission of frames.
+			 */
+			else if ((hw->original_fc == E1000_FC_NONE ||
+				  hw->original_fc == E1000_FC_TX_PAUSE) ||
+				 hw->fc_strict_ieee) {
+				hw->fc = E1000_FC_NONE;
+				e_dbg("Flow Control = NONE.\n");
+			} else {
+				hw->fc = E1000_FC_RX_PAUSE;
+				e_dbg
+				    ("Flow Control = RX PAUSE frames only.\n");
+			}
+
+			/* Now we need to do one last check...  If we auto-
+			 * negotiated to HALF DUPLEX, flow control should not be
+			 * enabled per IEEE 802.3 spec.
+			 */
+			ret_val =
+			    e1000_get_speed_and_duplex(hw, &speed, &duplex);
+			if (ret_val) {
+				e_dbg
+				    ("Error getting link speed and duplex\n");
+				return ret_val;
+			}
+
+			if (duplex == HALF_DUPLEX)
+				hw->fc = E1000_FC_NONE;
+
+			/* Now we call a subroutine to actually force the MAC
+			 * controller to use the correct flow control settings.
+			 */
+			ret_val = e1000_force_mac_fc(hw);
+			if (ret_val) {
+				e_dbg
+				    ("Error forcing flow control settings\n");
+				return ret_val;
+			}
+		} else {
+			e_dbg
+			    ("Copper PHY and Auto Neg has not completed.\n");
+		}
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_check_for_serdes_link_generic - Check for link (Serdes)
+ * @hw: pointer to the HW structure
+ *
+ * Checks for link up on the hardware.  If link is not up and we have
+ * a signal, then we need to force link up.
+ */
+static s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw)
+{
+	u32 rxcw;
+	u32 ctrl;
+	u32 status;
+	s32 ret_val = E1000_SUCCESS;
+
+	ctrl = er32(CTRL);
+	status = er32(STATUS);
+	rxcw = er32(RXCW);
+
+	/* If we don't have link (auto-negotiation failed or link partner
+	 * cannot auto-negotiate), and our link partner is not trying to
+	 * auto-negotiate with us (we are receiving idles or data),
+	 * we need to force link up. We also need to give auto-negotiation
+	 * time to complete.
+	 */
+	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
+	if ((!(status & E1000_STATUS_LU)) && (!(rxcw & E1000_RXCW_C))) {
+		if (hw->autoneg_failed == 0) {
+			hw->autoneg_failed = 1;
+			goto out;
+		}
+		e_dbg("NOT RXing /C/, disable AutoNeg and force link.\n");
+
+		/* Disable auto-negotiation in the TXCW register */
+		ew32(TXCW, (hw->txcw & ~E1000_TXCW_ANE));
+
+		/* Force link-up and also force full-duplex. */
+		ctrl = er32(CTRL);
+		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+		ew32(CTRL, ctrl);
+
+		/* Configure Flow Control after forcing link up. */
+		ret_val = e1000_config_fc_after_link_up(hw);
+		if (ret_val) {
+			e_dbg("Error configuring flow control\n");
+			goto out;
+		}
+	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
+		/* If we are forcing link and we are receiving /C/ ordered
+		 * sets, re-enable auto-negotiation in the TXCW register
+		 * and disable forced link in the Device Control register
+		 * in an attempt to auto-negotiate with our link partner.
+		 */
+		e_dbg("RXing /C/, enable AutoNeg and stop forcing link.\n");
+		ew32(TXCW, hw->txcw);
+		ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
+
+		hw->serdes_has_link = true;
+	} else if (!(E1000_TXCW_ANE & er32(TXCW))) {
+		/* If we force link for non-auto-negotiation switch, check
+		 * link status based on MAC synchronization for internal
+		 * serdes media type.
+		 */
+		/* SYNCH bit and IV bit are sticky. */
+		udelay(10);
+		rxcw = er32(RXCW);
+		if (rxcw & E1000_RXCW_SYNCH) {
+			if (!(rxcw & E1000_RXCW_IV)) {
+				hw->serdes_has_link = true;
+				e_dbg("SERDES: Link up - forced.\n");
+			}
+		} else {
+			hw->serdes_has_link = false;
+			e_dbg("SERDES: Link down - force failed.\n");
+		}
+	}
+
+	if (E1000_TXCW_ANE & er32(TXCW)) {
+		status = er32(STATUS);
+		if (status & E1000_STATUS_LU) {
+			/* SYNCH bit and IV bit are sticky, so reread rxcw. */
+			udelay(10);
+			rxcw = er32(RXCW);
+			if (rxcw & E1000_RXCW_SYNCH) {
+				if (!(rxcw & E1000_RXCW_IV)) {
+					hw->serdes_has_link = true;
+					e_dbg("SERDES: Link up - autoneg "
+						 "completed successfully.\n");
+				} else {
+					hw->serdes_has_link = false;
+					e_dbg("SERDES: Link down - invalid"
+						 "codewords detected in autoneg.\n");
+				}
+			} else {
+				hw->serdes_has_link = false;
+				e_dbg("SERDES: Link down - no sync.\n");
+			}
+		} else {
+			hw->serdes_has_link = false;
+			e_dbg("SERDES: Link down - autoneg failed\n");
+		}
+	}
+
+      out:
+	return ret_val;
+}
+
+/**
+ * e1000_check_for_link
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Checks to see if the link status of the hardware has changed.
+ * Called by any function that needs to check the link status of the adapter.
+ */
+s32 e1000_check_for_link(struct e1000_hw *hw)
+{
+	u32 status;
+	u32 rctl;
+	u32 icr;
+	s32 ret_val;
+	u16 phy_data;
+
+	er32(CTRL);
+	status = er32(STATUS);
+
+	/* On adapters with a MAC newer than 82544, SW Definable pin 1 will be
+	 * set when the optics detect a signal. On older adapters, it will be
+	 * cleared when there is a signal.  This applies to fiber media only.
+	 */
+	if ((hw->media_type == e1000_media_type_fiber) ||
+	    (hw->media_type == e1000_media_type_internal_serdes)) {
+		er32(RXCW);
+
+		if (hw->media_type == e1000_media_type_fiber) {
+			if (status & E1000_STATUS_LU)
+				hw->get_link_status = false;
+		}
+	}
+
+	/* If we have a copper PHY then we only want to go out to the PHY
+	 * registers to see if Auto-Neg has completed and/or if our link
+	 * status has changed.  The get_link_status flag will be set if we
+	 * receive a Link Status Change interrupt or we have Rx Sequence
+	 * Errors.
+	 */
+	if ((hw->media_type == e1000_media_type_copper) && hw->get_link_status) {
+		/* First we want to see if the MII Status Register reports
+		 * link.  If so, then we want to get the current speed/duplex
+		 * of the PHY.
+		 * Read the register twice since the link bit is sticky.
+		 */
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		if (phy_data & MII_SR_LINK_STATUS) {
+			hw->get_link_status = false;
+			/* Check if there was DownShift, must be checked
+			 * immediately after link-up
+			 */
+			e1000_check_downshift(hw);
+
+			/* If we are on 82544 or 82543 silicon and speed/duplex
+			 * are forced to 10H or 10F, then we will implement the
+			 * polarity reversal workaround.  We disable interrupts
+			 * first, and upon returning, place the devices
+			 * interrupt state to its previous value except for the
+			 * link status change interrupt which will
+			 * happen due to the execution of this workaround.
+			 */
+
+			if ((hw->mac_type == e1000_82544 ||
+			     hw->mac_type == e1000_82543) &&
+			    (!hw->autoneg) &&
+			    (hw->forced_speed_duplex == e1000_10_full ||
+			     hw->forced_speed_duplex == e1000_10_half)) {
+				ew32(IMC, 0xffffffff);
+				ret_val =
+				    e1000_polarity_reversal_workaround(hw);
+				icr = er32(ICR);
+				ew32(ICS, (icr & ~E1000_ICS_LSC));
+				ew32(IMS, IMS_ENABLE_MASK);
+			}
+
+		} else {
+			/* No link detected */
+			e1000_config_dsp_after_link_change(hw, false);
+			return 0;
+		}
+
+		/* If we are forcing speed/duplex, then we simply return since
+		 * we have already determined whether we have link or not.
+		 */
+		if (!hw->autoneg)
+			return -E1000_ERR_CONFIG;
+
+		/* optimize the dsp settings for the igp phy */
+		e1000_config_dsp_after_link_change(hw, true);
+
+		/* We have a M88E1000 PHY and Auto-Neg is enabled.  If we
+		 * have Si on board that is 82544 or newer, Auto
+		 * Speed Detection takes care of MAC speed/duplex
+		 * configuration.  So we only need to configure Collision
+		 * Distance in the MAC.  Otherwise, we need to force
+		 * speed/duplex on the MAC to the current PHY speed/duplex
+		 * settings.
+		 */
+		if ((hw->mac_type >= e1000_82544) &&
+		    (hw->mac_type != e1000_ce4100))
+			e1000_config_collision_dist(hw);
+		else {
+			ret_val = e1000_config_mac_to_phy(hw);
+			if (ret_val) {
+				e_dbg
+				    ("Error configuring MAC to PHY settings\n");
+				return ret_val;
+			}
+		}
+
+		/* Configure Flow Control now that Auto-Neg has completed.
+		 * First, we need to restore the desired flow control settings
+		 * because we may have had to re-autoneg with a different link
+		 * partner.
+		 */
+		ret_val = e1000_config_fc_after_link_up(hw);
+		if (ret_val) {
+			e_dbg("Error configuring flow control\n");
+			return ret_val;
+		}
+
+		/* At this point we know that we are on copper and we have
+		 * auto-negotiated link.  These are conditions for checking the
+		 * link partner capability register.  We use the link speed to
+		 * determine if TBI compatibility needs to be turned on or off.
+		 * If the link is not at gigabit speed, then TBI compatibility
+		 * is not needed.  If we are at gigabit speed, we turn on TBI
+		 * compatibility.
+		 */
+		if (hw->tbi_compatibility_en) {
+			u16 speed, duplex;
+
+			ret_val =
+			    e1000_get_speed_and_duplex(hw, &speed, &duplex);
+
+			if (ret_val) {
+				e_dbg
+				    ("Error getting link speed and duplex\n");
+				return ret_val;
+			}
+			if (speed != SPEED_1000) {
+				/* If link speed is not set to gigabit speed, we
+				 * do not need to enable TBI compatibility.
+				 */
+				if (hw->tbi_compatibility_on) {
+					/* If we previously were in the mode,
+					 * turn it off.
+					 */
+					rctl = er32(RCTL);
+					rctl &= ~E1000_RCTL_SBP;
+					ew32(RCTL, rctl);
+					hw->tbi_compatibility_on = false;
+				}
+			} else {
+				/* If TBI compatibility is was previously off,
+				 * turn it on. For compatibility with a TBI link
+				 * partner, we will store bad packets. Some
+				 * frames have an additional byte on the end and
+				 * will look like CRC errors to the hardware.
+				 */
+				if (!hw->tbi_compatibility_on) {
+					hw->tbi_compatibility_on = true;
+					rctl = er32(RCTL);
+					rctl |= E1000_RCTL_SBP;
+					ew32(RCTL, rctl);
+				}
+			}
+		}
+	}
+
+	if ((hw->media_type == e1000_media_type_fiber) ||
+	    (hw->media_type == e1000_media_type_internal_serdes))
+		e1000_check_for_serdes_link_generic(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_speed_and_duplex
+ * @hw: Struct containing variables accessed by shared code
+ * @speed: Speed of the connection
+ * @duplex: Duplex setting of the connection
+ *
+ * Detects the current speed and duplex settings of the hardware.
+ */
+s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex)
+{
+	u32 status;
+	s32 ret_val;
+	u16 phy_data;
+
+	if (hw->mac_type >= e1000_82543) {
+		status = er32(STATUS);
+		if (status & E1000_STATUS_SPEED_1000) {
+			*speed = SPEED_1000;
+			e_dbg("1000 Mbs, ");
+		} else if (status & E1000_STATUS_SPEED_100) {
+			*speed = SPEED_100;
+			e_dbg("100 Mbs, ");
+		} else {
+			*speed = SPEED_10;
+			e_dbg("10 Mbs, ");
+		}
+
+		if (status & E1000_STATUS_FD) {
+			*duplex = FULL_DUPLEX;
+			e_dbg("Full Duplex\n");
+		} else {
+			*duplex = HALF_DUPLEX;
+			e_dbg(" Half Duplex\n");
+		}
+	} else {
+		e_dbg("1000 Mbs, Full Duplex\n");
+		*speed = SPEED_1000;
+		*duplex = FULL_DUPLEX;
+	}
+
+	/* IGP01 PHY may advertise full duplex operation after speed downgrade
+	 * even if it is operating at half duplex.  Here we set the duplex
+	 * settings to match the duplex in the link partner's capabilities.
+	 */
+	if (hw->phy_type == e1000_phy_igp && hw->speed_downgraded) {
+		ret_val = e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		if (!(phy_data & NWAY_ER_LP_NWAY_CAPS))
+			*duplex = HALF_DUPLEX;
+		else {
+			ret_val =
+			    e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_data);
+			if (ret_val)
+				return ret_val;
+			if ((*speed == SPEED_100 &&
+			     !(phy_data & NWAY_LPAR_100TX_FD_CAPS)) ||
+			    (*speed == SPEED_10 &&
+			     !(phy_data & NWAY_LPAR_10T_FD_CAPS)))
+				*duplex = HALF_DUPLEX;
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_wait_autoneg
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Blocks until autoneg completes or times out (~4.5 seconds)
+ */
+static s32 e1000_wait_autoneg(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 i;
+	u16 phy_data;
+
+	e_dbg("Waiting for Auto-Neg to complete.\n");
+
+	/* We will wait for autoneg to complete or 4.5 seconds to expire. */
+	for (i = PHY_AUTO_NEG_TIME; i > 0; i--) {
+		/* Read the MII Status Register and wait for Auto-Neg
+		 * Complete bit to be set.
+		 */
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+		if (phy_data & MII_SR_AUTONEG_COMPLETE)
+			return E1000_SUCCESS;
+
+		msleep(100);
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_raise_mdi_clk - Raises the Management Data Clock
+ * @hw: Struct containing variables accessed by shared code
+ * @ctrl: Device control register's current value
+ */
+static void e1000_raise_mdi_clk(struct e1000_hw *hw, u32 *ctrl)
+{
+	/* Raise the clock input to the Management Data Clock (by setting the
+	 * MDC bit), and then delay 10 microseconds.
+	 */
+	ew32(CTRL, (*ctrl | E1000_CTRL_MDC));
+	E1000_WRITE_FLUSH();
+	udelay(10);
+}
+
+/**
+ * e1000_lower_mdi_clk - Lowers the Management Data Clock
+ * @hw: Struct containing variables accessed by shared code
+ * @ctrl: Device control register's current value
+ */
+static void e1000_lower_mdi_clk(struct e1000_hw *hw, u32 *ctrl)
+{
+	/* Lower the clock input to the Management Data Clock (by clearing the
+	 * MDC bit), and then delay 10 microseconds.
+	 */
+	ew32(CTRL, (*ctrl & ~E1000_CTRL_MDC));
+	E1000_WRITE_FLUSH();
+	udelay(10);
+}
+
+/**
+ * e1000_shift_out_mdi_bits - Shifts data bits out to the PHY
+ * @hw: Struct containing variables accessed by shared code
+ * @data: Data to send out to the PHY
+ * @count: Number of bits to shift out
+ *
+ * Bits are shifted out in MSB to LSB order.
+ */
+static void e1000_shift_out_mdi_bits(struct e1000_hw *hw, u32 data, u16 count)
+{
+	u32 ctrl;
+	u32 mask;
+
+	/* We need to shift "count" number of bits out to the PHY. So, the value
+	 * in the "data" parameter will be shifted out to the PHY one bit at a
+	 * time. In order to do this, "data" must be broken down into bits.
+	 */
+	mask = 0x01;
+	mask <<= (count - 1);
+
+	ctrl = er32(CTRL);
+
+	/* Set MDIO_DIR and MDC_DIR direction bits to be used as output pins. */
+	ctrl |= (E1000_CTRL_MDIO_DIR | E1000_CTRL_MDC_DIR);
+
+	while (mask) {
+		/* A "1" is shifted out to the PHY by setting the MDIO bit to
+		 * "1" and then raising and lowering the Management Data Clock.
+		 * A "0" is shifted out to the PHY by setting the MDIO bit to
+		 * "0" and then raising and lowering the clock.
+		 */
+		if (data & mask)
+			ctrl |= E1000_CTRL_MDIO;
+		else
+			ctrl &= ~E1000_CTRL_MDIO;
+
+		ew32(CTRL, ctrl);
+		E1000_WRITE_FLUSH();
+
+		udelay(10);
+
+		e1000_raise_mdi_clk(hw, &ctrl);
+		e1000_lower_mdi_clk(hw, &ctrl);
+
+		mask = mask >> 1;
+	}
+}
+
+/**
+ * e1000_shift_in_mdi_bits - Shifts data bits in from the PHY
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Bits are shifted in MSB to LSB order.
+ */
+static u16 e1000_shift_in_mdi_bits(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	u16 data = 0;
+	u8 i;
+
+	/* In order to read a register from the PHY, we need to shift in a total
+	 * of 18 bits from the PHY. The first two bit (turnaround) times are
+	 * used to avoid contention on the MDIO pin when a read operation is
+	 * performed. These two bits are ignored by us and thrown away. Bits are
+	 * "shifted in" by raising the input to the Management Data Clock
+	 * (setting the MDC bit), and then reading the value of the MDIO bit.
+	 */
+	ctrl = er32(CTRL);
+
+	/* Clear MDIO_DIR (SWDPIO1) to indicate this bit is to be used as
+	 * input.
+	 */
+	ctrl &= ~E1000_CTRL_MDIO_DIR;
+	ctrl &= ~E1000_CTRL_MDIO;
+
+	ew32(CTRL, ctrl);
+	E1000_WRITE_FLUSH();
+
+	/* Raise and Lower the clock before reading in the data. This accounts
+	 * for the turnaround bits. The first clock occurred when we clocked out
+	 * the last bit of the Register Address.
+	 */
+	e1000_raise_mdi_clk(hw, &ctrl);
+	e1000_lower_mdi_clk(hw, &ctrl);
+
+	for (data = 0, i = 0; i < 16; i++) {
+		data = data << 1;
+		e1000_raise_mdi_clk(hw, &ctrl);
+		ctrl = er32(CTRL);
+		/* Check to see if we shifted in a "1". */
+		if (ctrl & E1000_CTRL_MDIO)
+			data |= 1;
+		e1000_lower_mdi_clk(hw, &ctrl);
+	}
+
+	e1000_raise_mdi_clk(hw, &ctrl);
+	e1000_lower_mdi_clk(hw, &ctrl);
+
+	return data;
+}
+
+/**
+ * e1000_read_phy_reg - read a phy register
+ * @hw: Struct containing variables accessed by shared code
+ * @reg_addr: address of the PHY register to read
+ * @phy_data: pointer to the value on the PHY register
+ *
+ * Reads the value from a PHY register, if the value is on a specific non zero
+ * page, sets the page first.
+ */
+s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 *phy_data)
+{
+	u32 ret_val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&e1000_phy_lock, flags);
+
+	if ((hw->phy_type == e1000_phy_igp) &&
+	    (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
+		ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
+						 (u16) reg_addr);
+		if (ret_val)
+			goto out;
+	}
+
+	ret_val = e1000_read_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr,
+					phy_data);
+out:
+	spin_unlock_irqrestore(&e1000_phy_lock, flags);
+
+	return ret_val;
+}
+
+static s32 e1000_read_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
+				 u16 *phy_data)
+{
+	u32 i;
+	u32 mdic = 0;
+	const u32 phy_addr = (hw->mac_type == e1000_ce4100) ? hw->phy_addr : 1;
+
+	if (reg_addr > MAX_PHY_REG_ADDRESS) {
+		e_dbg("PHY Address %d is out of range\n", reg_addr);
+		return -E1000_ERR_PARAM;
+	}
+
+	if (hw->mac_type > e1000_82543) {
+		/* Set up Op-code, Phy Address, and register address in the MDI
+		 * Control register.  The MAC will take care of interfacing with
+		 * the PHY to retrieve the desired data.
+		 */
+		if (hw->mac_type == e1000_ce4100) {
+			mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
+				(phy_addr << E1000_MDIC_PHY_SHIFT) |
+				(INTEL_CE_GBE_MDIC_OP_READ) |
+				(INTEL_CE_GBE_MDIC_GO));
+
+			writel(mdic, E1000_MDIO_CMD);
+
+			/* Poll the ready bit to see if the MDI read
+			 * completed
+			 */
+			for (i = 0; i < 64; i++) {
+				udelay(50);
+				mdic = readl(E1000_MDIO_CMD);
+				if (!(mdic & INTEL_CE_GBE_MDIC_GO))
+					break;
+			}
+
+			if (mdic & INTEL_CE_GBE_MDIC_GO) {
+				e_dbg("MDI Read did not complete\n");
+				return -E1000_ERR_PHY;
+			}
+
+			mdic = readl(E1000_MDIO_STS);
+			if (mdic & INTEL_CE_GBE_MDIC_READ_ERROR) {
+				e_dbg("MDI Read Error\n");
+				return -E1000_ERR_PHY;
+			}
+			*phy_data = (u16)mdic;
+		} else {
+			mdic = ((reg_addr << E1000_MDIC_REG_SHIFT) |
+				(phy_addr << E1000_MDIC_PHY_SHIFT) |
+				(E1000_MDIC_OP_READ));
+
+			ew32(MDIC, mdic);
+
+			/* Poll the ready bit to see if the MDI read
+			 * completed
+			 */
+			for (i = 0; i < 64; i++) {
+				udelay(50);
+				mdic = er32(MDIC);
+				if (mdic & E1000_MDIC_READY)
+					break;
+			}
+			if (!(mdic & E1000_MDIC_READY)) {
+				e_dbg("MDI Read did not complete\n");
+				return -E1000_ERR_PHY;
+			}
+			if (mdic & E1000_MDIC_ERROR) {
+				e_dbg("MDI Error\n");
+				return -E1000_ERR_PHY;
+			}
+			*phy_data = (u16)mdic;
+		}
+	} else {
+		/* We must first send a preamble through the MDIO pin to signal
+		 * the beginning of an MII instruction.  This is done by sending
+		 * 32 consecutive "1" bits.
+		 */
+		e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
+
+		/* Now combine the next few fields that are required for a read
+		 * operation.  We use this method instead of calling the
+		 * e1000_shift_out_mdi_bits routine five different times. The
+		 * format of a MII read instruction consists of a shift out of
+		 * 14 bits and is defined as follows:
+		 *    <Preamble><SOF><Op Code><Phy Addr><Reg Addr>
+		 * followed by a shift in of 18 bits.  This first two bits
+		 * shifted in are TurnAround bits used to avoid contention on
+		 * the MDIO pin when a READ operation is performed.  These two
+		 * bits are thrown away followed by a shift in of 16 bits which
+		 * contains the desired data.
+		 */
+		mdic = ((reg_addr) | (phy_addr << 5) |
+			(PHY_OP_READ << 10) | (PHY_SOF << 12));
+
+		e1000_shift_out_mdi_bits(hw, mdic, 14);
+
+		/* Now that we've shifted out the read command to the MII, we
+		 * need to "shift in" the 16-bit value (18 total bits) of the
+		 * requested PHY register address.
+		 */
+		*phy_data = e1000_shift_in_mdi_bits(hw);
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_phy_reg - write a phy register
+ *
+ * @hw: Struct containing variables accessed by shared code
+ * @reg_addr: address of the PHY register to write
+ * @phy_data: data to write to the PHY
+ *
+ * Writes a value to a PHY register
+ */
+s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 phy_data)
+{
+	u32 ret_val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&e1000_phy_lock, flags);
+
+	if ((hw->phy_type == e1000_phy_igp) &&
+	    (reg_addr > MAX_PHY_MULTI_PAGE_REG)) {
+		ret_val = e1000_write_phy_reg_ex(hw, IGP01E1000_PHY_PAGE_SELECT,
+						 (u16)reg_addr);
+		if (ret_val) {
+			spin_unlock_irqrestore(&e1000_phy_lock, flags);
+			return ret_val;
+		}
+	}
+
+	ret_val = e1000_write_phy_reg_ex(hw, MAX_PHY_REG_ADDRESS & reg_addr,
+					 phy_data);
+	spin_unlock_irqrestore(&e1000_phy_lock, flags);
+
+	return ret_val;
+}
+
+static s32 e1000_write_phy_reg_ex(struct e1000_hw *hw, u32 reg_addr,
+				  u16 phy_data)
+{
+	u32 i;
+	u32 mdic = 0;
+	const u32 phy_addr = (hw->mac_type == e1000_ce4100) ? hw->phy_addr : 1;
+
+	if (reg_addr > MAX_PHY_REG_ADDRESS) {
+		e_dbg("PHY Address %d is out of range\n", reg_addr);
+		return -E1000_ERR_PARAM;
+	}
+
+	if (hw->mac_type > e1000_82543) {
+		/* Set up Op-code, Phy Address, register address, and data
+		 * intended for the PHY register in the MDI Control register.
+		 * The MAC will take care of interfacing with the PHY to send
+		 * the desired data.
+		 */
+		if (hw->mac_type == e1000_ce4100) {
+			mdic = (((u32)phy_data) |
+				(reg_addr << E1000_MDIC_REG_SHIFT) |
+				(phy_addr << E1000_MDIC_PHY_SHIFT) |
+				(INTEL_CE_GBE_MDIC_OP_WRITE) |
+				(INTEL_CE_GBE_MDIC_GO));
+
+			writel(mdic, E1000_MDIO_CMD);
+
+			/* Poll the ready bit to see if the MDI read
+			 * completed
+			 */
+			for (i = 0; i < 640; i++) {
+				udelay(5);
+				mdic = readl(E1000_MDIO_CMD);
+				if (!(mdic & INTEL_CE_GBE_MDIC_GO))
+					break;
+			}
+			if (mdic & INTEL_CE_GBE_MDIC_GO) {
+				e_dbg("MDI Write did not complete\n");
+				return -E1000_ERR_PHY;
+			}
+		} else {
+			mdic = (((u32)phy_data) |
+				(reg_addr << E1000_MDIC_REG_SHIFT) |
+				(phy_addr << E1000_MDIC_PHY_SHIFT) |
+				(E1000_MDIC_OP_WRITE));
+
+			ew32(MDIC, mdic);
+
+			/* Poll the ready bit to see if the MDI read
+			 * completed
+			 */
+			for (i = 0; i < 641; i++) {
+				udelay(5);
+				mdic = er32(MDIC);
+				if (mdic & E1000_MDIC_READY)
+					break;
+			}
+			if (!(mdic & E1000_MDIC_READY)) {
+				e_dbg("MDI Write did not complete\n");
+				return -E1000_ERR_PHY;
+			}
+		}
+	} else {
+		/* We'll need to use the SW defined pins to shift the write
+		 * command out to the PHY. We first send a preamble to the PHY
+		 * to signal the beginning of the MII instruction.  This is done
+		 * by sending 32 consecutive "1" bits.
+		 */
+		e1000_shift_out_mdi_bits(hw, PHY_PREAMBLE, PHY_PREAMBLE_SIZE);
+
+		/* Now combine the remaining required fields that will indicate
+		 * a write operation. We use this method instead of calling the
+		 * e1000_shift_out_mdi_bits routine for each field in the
+		 * command. The format of a MII write instruction is as follows:
+		 * <Preamble><SOF><OpCode><PhyAddr><RegAddr><Turnaround><Data>.
+		 */
+		mdic = ((PHY_TURNAROUND) | (reg_addr << 2) | (phy_addr << 7) |
+			(PHY_OP_WRITE << 12) | (PHY_SOF << 14));
+		mdic <<= 16;
+		mdic |= (u32)phy_data;
+
+		e1000_shift_out_mdi_bits(hw, mdic, 32);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_hw_reset - reset the phy, hardware style
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Returns the PHY to the power-on reset state
+ */
+s32 e1000_phy_hw_reset(struct e1000_hw *hw)
+{
+	u32 ctrl, ctrl_ext;
+	u32 led_ctrl;
+
+	e_dbg("Resetting Phy...\n");
+
+	if (hw->mac_type > e1000_82543) {
+		/* Read the device control register and assert the
+		 * E1000_CTRL_PHY_RST bit. Then, take it out of reset.
+		 * For e1000 hardware, we delay for 10ms between the assert
+		 * and de-assert.
+		 */
+		ctrl = er32(CTRL);
+		ew32(CTRL, ctrl | E1000_CTRL_PHY_RST);
+		E1000_WRITE_FLUSH();
+
+		msleep(10);
+
+		ew32(CTRL, ctrl);
+		E1000_WRITE_FLUSH();
+
+	} else {
+		/* Read the Extended Device Control Register, assert the
+		 * PHY_RESET_DIR bit to put the PHY into reset. Then, take it
+		 * out of reset.
+		 */
+		ctrl_ext = er32(CTRL_EXT);
+		ctrl_ext |= E1000_CTRL_EXT_SDP4_DIR;
+		ctrl_ext &= ~E1000_CTRL_EXT_SDP4_DATA;
+		ew32(CTRL_EXT, ctrl_ext);
+		E1000_WRITE_FLUSH();
+		msleep(10);
+		ctrl_ext |= E1000_CTRL_EXT_SDP4_DATA;
+		ew32(CTRL_EXT, ctrl_ext);
+		E1000_WRITE_FLUSH();
+	}
+	udelay(150);
+
+	if ((hw->mac_type == e1000_82541) || (hw->mac_type == e1000_82547)) {
+		/* Configure activity LED after PHY reset */
+		led_ctrl = er32(LEDCTL);
+		led_ctrl &= IGP_ACTIVITY_LED_MASK;
+		led_ctrl |= (IGP_ACTIVITY_LED_ENABLE | IGP_LED3_MODE);
+		ew32(LEDCTL, led_ctrl);
+	}
+
+	/* Wait for FW to finish PHY configuration. */
+	return e1000_get_phy_cfg_done(hw);
+}
+
+/**
+ * e1000_phy_reset - reset the phy to commit settings
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Resets the PHY
+ * Sets bit 15 of the MII Control register
+ */
+s32 e1000_phy_reset(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	switch (hw->phy_type) {
+	case e1000_phy_igp:
+		ret_val = e1000_phy_hw_reset(hw);
+		if (ret_val)
+			return ret_val;
+		break;
+	default:
+		ret_val = e1000_read_phy_reg(hw, PHY_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data |= MII_CR_RESET;
+		ret_val = e1000_write_phy_reg(hw, PHY_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+
+		udelay(1);
+		break;
+	}
+
+	if (hw->phy_type == e1000_phy_igp)
+		e1000_phy_init_script(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_detect_gig_phy - check the phy type
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Probes the expected PHY address for known PHY IDs
+ */
+static s32 e1000_detect_gig_phy(struct e1000_hw *hw)
+{
+	s32 phy_init_status, ret_val;
+	u16 phy_id_high, phy_id_low;
+	bool match = false;
+
+	if (hw->phy_id != 0)
+		return E1000_SUCCESS;
+
+	/* Read the PHY ID Registers to identify which PHY is onboard. */
+	ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high);
+	if (ret_val)
+		return ret_val;
+
+	hw->phy_id = (u32)(phy_id_high << 16);
+	udelay(20);
+	ret_val = e1000_read_phy_reg(hw, PHY_ID2, &phy_id_low);
+	if (ret_val)
+		return ret_val;
+
+	hw->phy_id |= (u32)(phy_id_low & PHY_REVISION_MASK);
+	hw->phy_revision = (u32)phy_id_low & ~PHY_REVISION_MASK;
+
+	switch (hw->mac_type) {
+	case e1000_82543:
+		if (hw->phy_id == M88E1000_E_PHY_ID)
+			match = true;
+		break;
+	case e1000_82544:
+		if (hw->phy_id == M88E1000_I_PHY_ID)
+			match = true;
+		break;
+	case e1000_82540:
+	case e1000_82545:
+	case e1000_82545_rev_3:
+	case e1000_82546:
+	case e1000_82546_rev_3:
+		if (hw->phy_id == M88E1011_I_PHY_ID)
+			match = true;
+		break;
+	case e1000_ce4100:
+		if ((hw->phy_id == RTL8211B_PHY_ID) ||
+		    (hw->phy_id == RTL8201N_PHY_ID) ||
+		    (hw->phy_id == M88E1118_E_PHY_ID))
+			match = true;
+		break;
+	case e1000_82541:
+	case e1000_82541_rev_2:
+	case e1000_82547:
+	case e1000_82547_rev_2:
+		if (hw->phy_id == IGP01E1000_I_PHY_ID)
+			match = true;
+		break;
+	default:
+		e_dbg("Invalid MAC type %d\n", hw->mac_type);
+		return -E1000_ERR_CONFIG;
+	}
+	phy_init_status = e1000_set_phy_type(hw);
+
+	if ((match) && (phy_init_status == E1000_SUCCESS)) {
+		e_dbg("PHY ID 0x%X detected\n", hw->phy_id);
+		return E1000_SUCCESS;
+	}
+	e_dbg("Invalid PHY ID 0x%X\n", hw->phy_id);
+	return -E1000_ERR_PHY;
+}
+
+/**
+ * e1000_phy_reset_dsp - reset DSP
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Resets the PHY's DSP
+ */
+static s32 e1000_phy_reset_dsp(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	do {
+		ret_val = e1000_write_phy_reg(hw, 29, 0x001d);
+		if (ret_val)
+			break;
+		ret_val = e1000_write_phy_reg(hw, 30, 0x00c1);
+		if (ret_val)
+			break;
+		ret_val = e1000_write_phy_reg(hw, 30, 0x0000);
+		if (ret_val)
+			break;
+		ret_val = E1000_SUCCESS;
+	} while (0);
+
+	return ret_val;
+}
+
+/**
+ * e1000_phy_igp_get_info - get igp specific registers
+ * @hw: Struct containing variables accessed by shared code
+ * @phy_info: PHY information structure
+ *
+ * Get PHY information from various PHY registers for igp PHY only.
+ */
+static s32 e1000_phy_igp_get_info(struct e1000_hw *hw,
+				  struct e1000_phy_info *phy_info)
+{
+	s32 ret_val;
+	u16 phy_data, min_length, max_length, average;
+	e1000_rev_polarity polarity;
+
+	/* The downshift status is checked only once, after link is established,
+	 * and it stored in the hw->speed_downgraded parameter.
+	 */
+	phy_info->downshift = (e1000_downshift) hw->speed_downgraded;
+
+	/* IGP01E1000 does not need to support it. */
+	phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_normal;
+
+	/* IGP01E1000 always correct polarity reversal */
+	phy_info->polarity_correction = e1000_polarity_reversal_enabled;
+
+	/* Check polarity status */
+	ret_val = e1000_check_polarity(hw, &polarity);
+	if (ret_val)
+		return ret_val;
+
+	phy_info->cable_polarity = polarity;
+
+	ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_info->mdix_mode =
+	    (e1000_auto_x_mode) ((phy_data & IGP01E1000_PSSR_MDIX) >>
+				 IGP01E1000_PSSR_MDIX_SHIFT);
+
+	if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
+	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+		/* Local/Remote Receiver Information are only valid @ 1000
+		 * Mbps
+		 */
+		ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+				      SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
+		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+		phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+				       SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
+		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+
+		/* Get cable length */
+		ret_val = e1000_get_cable_length(hw, &min_length, &max_length);
+		if (ret_val)
+			return ret_val;
+
+		/* Translate to old method */
+		average = (max_length + min_length) / 2;
+
+		if (average <= e1000_igp_cable_length_50)
+			phy_info->cable_length = e1000_cable_length_50;
+		else if (average <= e1000_igp_cable_length_80)
+			phy_info->cable_length = e1000_cable_length_50_80;
+		else if (average <= e1000_igp_cable_length_110)
+			phy_info->cable_length = e1000_cable_length_80_110;
+		else if (average <= e1000_igp_cable_length_140)
+			phy_info->cable_length = e1000_cable_length_110_140;
+		else
+			phy_info->cable_length = e1000_cable_length_140;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_m88_get_info - get m88 specific registers
+ * @hw: Struct containing variables accessed by shared code
+ * @phy_info: PHY information structure
+ *
+ * Get PHY information from various PHY registers for m88 PHY only.
+ */
+static s32 e1000_phy_m88_get_info(struct e1000_hw *hw,
+				  struct e1000_phy_info *phy_info)
+{
+	s32 ret_val;
+	u16 phy_data;
+	e1000_rev_polarity polarity;
+
+	/* The downshift status is checked only once, after link is established,
+	 * and it stored in the hw->speed_downgraded parameter.
+	 */
+	phy_info->downshift = (e1000_downshift) hw->speed_downgraded;
+
+	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_info->extended_10bt_distance =
+	    ((phy_data & M88E1000_PSCR_10BT_EXT_DIST_ENABLE) >>
+	     M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT) ?
+	    e1000_10bt_ext_dist_enable_lower :
+	    e1000_10bt_ext_dist_enable_normal;
+
+	phy_info->polarity_correction =
+	    ((phy_data & M88E1000_PSCR_POLARITY_REVERSAL) >>
+	     M88E1000_PSCR_POLARITY_REVERSAL_SHIFT) ?
+	    e1000_polarity_reversal_disabled : e1000_polarity_reversal_enabled;
+
+	/* Check polarity status */
+	ret_val = e1000_check_polarity(hw, &polarity);
+	if (ret_val)
+		return ret_val;
+	phy_info->cable_polarity = polarity;
+
+	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_info->mdix_mode =
+	    (e1000_auto_x_mode) ((phy_data & M88E1000_PSSR_MDIX) >>
+				 M88E1000_PSSR_MDIX_SHIFT);
+
+	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
+		/* Cable Length Estimation and Local/Remote Receiver Information
+		 * are only valid at 1000 Mbps.
+		 */
+		phy_info->cable_length =
+		    (e1000_cable_length) ((phy_data &
+					   M88E1000_PSSR_CABLE_LENGTH) >>
+					  M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+
+		ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_info->local_rx = ((phy_data & SR_1000T_LOCAL_RX_STATUS) >>
+				      SR_1000T_LOCAL_RX_STATUS_SHIFT) ?
+		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+		phy_info->remote_rx = ((phy_data & SR_1000T_REMOTE_RX_STATUS) >>
+				       SR_1000T_REMOTE_RX_STATUS_SHIFT) ?
+		    e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_phy_get_info - request phy info
+ * @hw: Struct containing variables accessed by shared code
+ * @phy_info: PHY information structure
+ *
+ * Get PHY information from various PHY registers
+ */
+s32 e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	phy_info->cable_length = e1000_cable_length_undefined;
+	phy_info->extended_10bt_distance = e1000_10bt_ext_dist_enable_undefined;
+	phy_info->cable_polarity = e1000_rev_polarity_undefined;
+	phy_info->downshift = e1000_downshift_undefined;
+	phy_info->polarity_correction = e1000_polarity_reversal_undefined;
+	phy_info->mdix_mode = e1000_auto_x_mode_undefined;
+	phy_info->local_rx = e1000_1000t_rx_status_undefined;
+	phy_info->remote_rx = e1000_1000t_rx_status_undefined;
+
+	if (hw->media_type != e1000_media_type_copper) {
+		e_dbg("PHY info is only valid for copper media\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	if ((phy_data & MII_SR_LINK_STATUS) != MII_SR_LINK_STATUS) {
+		e_dbg("PHY info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	if (hw->phy_type == e1000_phy_igp)
+		return e1000_phy_igp_get_info(hw, phy_info);
+	else if ((hw->phy_type == e1000_phy_8211) ||
+		 (hw->phy_type == e1000_phy_8201))
+		return E1000_SUCCESS;
+	else
+		return e1000_phy_m88_get_info(hw, phy_info);
+}
+
+s32 e1000_validate_mdi_setting(struct e1000_hw *hw)
+{
+	if (!hw->autoneg && (hw->mdix == 0 || hw->mdix == 3)) {
+		e_dbg("Invalid MDI setting detected\n");
+		hw->mdix = 1;
+		return -E1000_ERR_CONFIG;
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_init_eeprom_params - initialize sw eeprom vars
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Sets up eeprom variables in the hw struct.  Must be called after mac_type
+ * is configured.
+ */
+s32 e1000_init_eeprom_params(struct e1000_hw *hw)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	u32 eecd = er32(EECD);
+	s32 ret_val = E1000_SUCCESS;
+	u16 eeprom_size;
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+	case e1000_82544:
+		eeprom->type = e1000_eeprom_microwire;
+		eeprom->word_size = 64;
+		eeprom->opcode_bits = 3;
+		eeprom->address_bits = 6;
+		eeprom->delay_usec = 50;
+		break;
+	case e1000_82540:
+	case e1000_82545:
+	case e1000_82545_rev_3:
+	case e1000_82546:
+	case e1000_82546_rev_3:
+		eeprom->type = e1000_eeprom_microwire;
+		eeprom->opcode_bits = 3;
+		eeprom->delay_usec = 50;
+		if (eecd & E1000_EECD_SIZE) {
+			eeprom->word_size = 256;
+			eeprom->address_bits = 8;
+		} else {
+			eeprom->word_size = 64;
+			eeprom->address_bits = 6;
+		}
+		break;
+	case e1000_82541:
+	case e1000_82541_rev_2:
+	case e1000_82547:
+	case e1000_82547_rev_2:
+		if (eecd & E1000_EECD_TYPE) {
+			eeprom->type = e1000_eeprom_spi;
+			eeprom->opcode_bits = 8;
+			eeprom->delay_usec = 1;
+			if (eecd & E1000_EECD_ADDR_BITS) {
+				eeprom->page_size = 32;
+				eeprom->address_bits = 16;
+			} else {
+				eeprom->page_size = 8;
+				eeprom->address_bits = 8;
+			}
+		} else {
+			eeprom->type = e1000_eeprom_microwire;
+			eeprom->opcode_bits = 3;
+			eeprom->delay_usec = 50;
+			if (eecd & E1000_EECD_ADDR_BITS) {
+				eeprom->word_size = 256;
+				eeprom->address_bits = 8;
+			} else {
+				eeprom->word_size = 64;
+				eeprom->address_bits = 6;
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (eeprom->type == e1000_eeprom_spi) {
+		/* eeprom_size will be an enum [0..8] that maps to eeprom sizes
+		 * 128B to 32KB (incremented by powers of 2).
+		 */
+		/* Set to default value for initial eeprom read. */
+		eeprom->word_size = 64;
+		ret_val = e1000_read_eeprom(hw, EEPROM_CFG, 1, &eeprom_size);
+		if (ret_val)
+			return ret_val;
+		eeprom_size =
+		    (eeprom_size & EEPROM_SIZE_MASK) >> EEPROM_SIZE_SHIFT;
+		/* 256B eeprom size was not supported in earlier hardware, so we
+		 * bump eeprom_size up one to ensure that "1" (which maps to
+		 * 256B) is never the result used in the shifting logic below.
+		 */
+		if (eeprom_size)
+			eeprom_size++;
+
+		eeprom->word_size = 1 << (eeprom_size + EEPROM_WORD_SIZE_SHIFT);
+	}
+	return ret_val;
+}
+
+/**
+ * e1000_raise_ee_clk - Raises the EEPROM's clock input.
+ * @hw: Struct containing variables accessed by shared code
+ * @eecd: EECD's current value
+ */
+static void e1000_raise_ee_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	/* Raise the clock input to the EEPROM (by setting the SK bit), and then
+	 * wait <delay> microseconds.
+	 */
+	*eecd = *eecd | E1000_EECD_SK;
+	ew32(EECD, *eecd);
+	E1000_WRITE_FLUSH();
+	udelay(hw->eeprom.delay_usec);
+}
+
+/**
+ * e1000_lower_ee_clk - Lowers the EEPROM's clock input.
+ * @hw: Struct containing variables accessed by shared code
+ * @eecd: EECD's current value
+ */
+static void e1000_lower_ee_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	/* Lower the clock input to the EEPROM (by clearing the SK bit), and
+	 * then wait 50 microseconds.
+	 */
+	*eecd = *eecd & ~E1000_EECD_SK;
+	ew32(EECD, *eecd);
+	E1000_WRITE_FLUSH();
+	udelay(hw->eeprom.delay_usec);
+}
+
+/**
+ * e1000_shift_out_ee_bits - Shift data bits out to the EEPROM.
+ * @hw: Struct containing variables accessed by shared code
+ * @data: data to send to the EEPROM
+ * @count: number of bits to shift out
+ */
+static void e1000_shift_out_ee_bits(struct e1000_hw *hw, u16 data, u16 count)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	u32 eecd;
+	u32 mask;
+
+	/* We need to shift "count" bits out to the EEPROM. So, value in the
+	 * "data" parameter will be shifted out to the EEPROM one bit at a time.
+	 * In order to do this, "data" must be broken down into bits.
+	 */
+	mask = 0x01 << (count - 1);
+	eecd = er32(EECD);
+	if (eeprom->type == e1000_eeprom_microwire)
+		eecd &= ~E1000_EECD_DO;
+	else if (eeprom->type == e1000_eeprom_spi)
+		eecd |= E1000_EECD_DO;
+
+	do {
+		/* A "1" is shifted out to the EEPROM by setting bit "DI" to a
+		 * "1", and then raising and then lowering the clock (the SK bit
+		 * controls the clock input to the EEPROM).  A "0" is shifted
+		 * out to the EEPROM by setting "DI" to "0" and then raising and
+		 * then lowering the clock.
+		 */
+		eecd &= ~E1000_EECD_DI;
+
+		if (data & mask)
+			eecd |= E1000_EECD_DI;
+
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+
+		udelay(eeprom->delay_usec);
+
+		e1000_raise_ee_clk(hw, &eecd);
+		e1000_lower_ee_clk(hw, &eecd);
+
+		mask = mask >> 1;
+
+	} while (mask);
+
+	/* We leave the "DI" bit set to "0" when we leave this routine. */
+	eecd &= ~E1000_EECD_DI;
+	ew32(EECD, eecd);
+}
+
+/**
+ * e1000_shift_in_ee_bits - Shift data bits in from the EEPROM
+ * @hw: Struct containing variables accessed by shared code
+ * @count: number of bits to shift in
+ */
+static u16 e1000_shift_in_ee_bits(struct e1000_hw *hw, u16 count)
+{
+	u32 eecd;
+	u32 i;
+	u16 data;
+
+	/* In order to read a register from the EEPROM, we need to shift 'count'
+	 * bits in from the EEPROM. Bits are "shifted in" by raising the clock
+	 * input to the EEPROM (setting the SK bit), and then reading the value
+	 * of the "DO" bit.  During this "shifting in" process the "DI" bit
+	 * should always be clear.
+	 */
+
+	eecd = er32(EECD);
+
+	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+	data = 0;
+
+	for (i = 0; i < count; i++) {
+		data = data << 1;
+		e1000_raise_ee_clk(hw, &eecd);
+
+		eecd = er32(EECD);
+
+		eecd &= ~(E1000_EECD_DI);
+		if (eecd & E1000_EECD_DO)
+			data |= 1;
+
+		e1000_lower_ee_clk(hw, &eecd);
+	}
+
+	return data;
+}
+
+/**
+ * e1000_acquire_eeprom - Prepares EEPROM for access
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This
+ * function should be called before issuing a command to the EEPROM.
+ */
+static s32 e1000_acquire_eeprom(struct e1000_hw *hw)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	u32 eecd, i = 0;
+
+	eecd = er32(EECD);
+
+	/* Request EEPROM Access */
+	if (hw->mac_type > e1000_82544) {
+		eecd |= E1000_EECD_REQ;
+		ew32(EECD, eecd);
+		eecd = er32(EECD);
+		while ((!(eecd & E1000_EECD_GNT)) &&
+		       (i < E1000_EEPROM_GRANT_ATTEMPTS)) {
+			i++;
+			udelay(5);
+			eecd = er32(EECD);
+		}
+		if (!(eecd & E1000_EECD_GNT)) {
+			eecd &= ~E1000_EECD_REQ;
+			ew32(EECD, eecd);
+			e_dbg("Could not acquire EEPROM grant\n");
+			return -E1000_ERR_EEPROM;
+		}
+	}
+
+	/* Setup EEPROM for Read/Write */
+
+	if (eeprom->type == e1000_eeprom_microwire) {
+		/* Clear SK and DI */
+		eecd &= ~(E1000_EECD_DI | E1000_EECD_SK);
+		ew32(EECD, eecd);
+
+		/* Set CS */
+		eecd |= E1000_EECD_CS;
+		ew32(EECD, eecd);
+	} else if (eeprom->type == e1000_eeprom_spi) {
+		/* Clear SK and CS */
+		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(1);
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_standby_eeprom - Returns EEPROM to a "standby" state
+ * @hw: Struct containing variables accessed by shared code
+ */
+static void e1000_standby_eeprom(struct e1000_hw *hw)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	u32 eecd;
+
+	eecd = er32(EECD);
+
+	if (eeprom->type == e1000_eeprom_microwire) {
+		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(eeprom->delay_usec);
+
+		/* Clock high */
+		eecd |= E1000_EECD_SK;
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(eeprom->delay_usec);
+
+		/* Select EEPROM */
+		eecd |= E1000_EECD_CS;
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(eeprom->delay_usec);
+
+		/* Clock low */
+		eecd &= ~E1000_EECD_SK;
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(eeprom->delay_usec);
+	} else if (eeprom->type == e1000_eeprom_spi) {
+		/* Toggle CS to flush commands */
+		eecd |= E1000_EECD_CS;
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(eeprom->delay_usec);
+		eecd &= ~E1000_EECD_CS;
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(eeprom->delay_usec);
+	}
+}
+
+/**
+ * e1000_release_eeprom - drop chip select
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Terminates a command by inverting the EEPROM's chip select pin
+ */
+static void e1000_release_eeprom(struct e1000_hw *hw)
+{
+	u32 eecd;
+
+	eecd = er32(EECD);
+
+	if (hw->eeprom.type == e1000_eeprom_spi) {
+		eecd |= E1000_EECD_CS;	/* Pull CS high */
+		eecd &= ~E1000_EECD_SK;	/* Lower SCK */
+
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+
+		udelay(hw->eeprom.delay_usec);
+	} else if (hw->eeprom.type == e1000_eeprom_microwire) {
+		/* cleanup eeprom */
+
+		/* CS on Microwire is active-high */
+		eecd &= ~(E1000_EECD_CS | E1000_EECD_DI);
+
+		ew32(EECD, eecd);
+
+		/* Rising edge of clock */
+		eecd |= E1000_EECD_SK;
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(hw->eeprom.delay_usec);
+
+		/* Falling edge of clock */
+		eecd &= ~E1000_EECD_SK;
+		ew32(EECD, eecd);
+		E1000_WRITE_FLUSH();
+		udelay(hw->eeprom.delay_usec);
+	}
+
+	/* Stop requesting EEPROM access */
+	if (hw->mac_type > e1000_82544) {
+		eecd &= ~E1000_EECD_REQ;
+		ew32(EECD, eecd);
+	}
+}
+
+/**
+ * e1000_spi_eeprom_ready - Reads a 16 bit word from the EEPROM.
+ * @hw: Struct containing variables accessed by shared code
+ */
+static s32 e1000_spi_eeprom_ready(struct e1000_hw *hw)
+{
+	u16 retry_count = 0;
+	u8 spi_stat_reg;
+
+	/* Read "Status Register" repeatedly until the LSB is cleared.  The
+	 * EEPROM will signal that the command has been completed by clearing
+	 * bit 0 of the internal status register.  If it's not cleared within
+	 * 5 milliseconds, then error out.
+	 */
+	retry_count = 0;
+	do {
+		e1000_shift_out_ee_bits(hw, EEPROM_RDSR_OPCODE_SPI,
+					hw->eeprom.opcode_bits);
+		spi_stat_reg = (u8)e1000_shift_in_ee_bits(hw, 8);
+		if (!(spi_stat_reg & EEPROM_STATUS_RDY_SPI))
+			break;
+
+		udelay(5);
+		retry_count += 5;
+
+		e1000_standby_eeprom(hw);
+	} while (retry_count < EEPROM_MAX_RETRY_SPI);
+
+	/* ATMEL SPI write time could vary from 0-20mSec on 3.3V devices (and
+	 * only 0-5mSec on 5V devices)
+	 */
+	if (retry_count >= EEPROM_MAX_RETRY_SPI) {
+		e_dbg("SPI EEPROM Status error\n");
+		return -E1000_ERR_EEPROM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_eeprom - Reads a 16 bit word from the EEPROM.
+ * @hw: Struct containing variables accessed by shared code
+ * @offset: offset of  word in the EEPROM to read
+ * @data: word read from the EEPROM
+ * @words: number of words to read
+ */
+s32 e1000_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	s32 ret;
+
+	mutex_lock(&e1000_eeprom_lock);
+	ret = e1000_do_read_eeprom(hw, offset, words, data);
+	mutex_unlock(&e1000_eeprom_lock);
+	return ret;
+}
+
+static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
+				u16 *data)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	u32 i = 0;
+
+	if (hw->mac_type == e1000_ce4100) {
+		GBE_CONFIG_FLASH_READ(GBE_CONFIG_BASE_VIRT, offset, words,
+				      data);
+		return E1000_SUCCESS;
+	}
+
+	/* A check for invalid values:  offset too large, too many words, and
+	 * not enough words.
+	 */
+	if ((offset >= eeprom->word_size) ||
+	    (words > eeprom->word_size - offset) ||
+	    (words == 0)) {
+		e_dbg("\"words\" parameter out of bounds. Words = %d,"
+		      "size = %d\n", offset, eeprom->word_size);
+		return -E1000_ERR_EEPROM;
+	}
+
+	/* EEPROM's that don't use EERD to read require us to bit-bang the SPI
+	 * directly. In this case, we need to acquire the EEPROM so that
+	 * FW or other port software does not interrupt.
+	 */
+	/* Prepare the EEPROM for bit-bang reading */
+	if (e1000_acquire_eeprom(hw) != E1000_SUCCESS)
+		return -E1000_ERR_EEPROM;
+
+	/* Set up the SPI or Microwire EEPROM for bit-bang reading.  We have
+	 * acquired the EEPROM at this point, so any returns should release it
+	 */
+	if (eeprom->type == e1000_eeprom_spi) {
+		u16 word_in;
+		u8 read_opcode = EEPROM_READ_OPCODE_SPI;
+
+		if (e1000_spi_eeprom_ready(hw)) {
+			e1000_release_eeprom(hw);
+			return -E1000_ERR_EEPROM;
+		}
+
+		e1000_standby_eeprom(hw);
+
+		/* Some SPI eeproms use the 8th address bit embedded in the
+		 * opcode
+		 */
+		if ((eeprom->address_bits == 8) && (offset >= 128))
+			read_opcode |= EEPROM_A8_OPCODE_SPI;
+
+		/* Send the READ command (opcode + addr)  */
+		e1000_shift_out_ee_bits(hw, read_opcode, eeprom->opcode_bits);
+		e1000_shift_out_ee_bits(hw, (u16)(offset * 2),
+					eeprom->address_bits);
+
+		/* Read the data.  The address of the eeprom internally
+		 * increments with each byte (spi) being read, saving on the
+		 * overhead of eeprom setup and tear-down.  The address counter
+		 * will roll over if reading beyond the size of the eeprom, thus
+		 * allowing the entire memory to be read starting from any
+		 * offset.
+		 */
+		for (i = 0; i < words; i++) {
+			word_in = e1000_shift_in_ee_bits(hw, 16);
+			data[i] = (word_in >> 8) | (word_in << 8);
+		}
+	} else if (eeprom->type == e1000_eeprom_microwire) {
+		for (i = 0; i < words; i++) {
+			/* Send the READ command (opcode + addr)  */
+			e1000_shift_out_ee_bits(hw,
+						EEPROM_READ_OPCODE_MICROWIRE,
+						eeprom->opcode_bits);
+			e1000_shift_out_ee_bits(hw, (u16)(offset + i),
+						eeprom->address_bits);
+
+			/* Read the data.  For microwire, each word requires the
+			 * overhead of eeprom setup and tear-down.
+			 */
+			data[i] = e1000_shift_in_ee_bits(hw, 16);
+			e1000_standby_eeprom(hw);
+			cond_resched();
+		}
+	}
+
+	/* End this read operation */
+	e1000_release_eeprom(hw);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_validate_eeprom_checksum - Verifies that the EEPROM has a valid checksum
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Reads the first 64 16 bit words of the EEPROM and sums the values read.
+ * If the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is
+ * valid.
+ */
+s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, eeprom_data;
+
+	for (i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++) {
+		if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) {
+			e_dbg("EEPROM Read Error\n");
+			return -E1000_ERR_EEPROM;
+		}
+		checksum += eeprom_data;
+	}
+
+#ifdef CONFIG_PARISC
+	/* This is a signature and not a checksum on HP c8000 */
+	if ((hw->subsystem_vendor_id == 0x103C) && (eeprom_data == 0x16d6))
+		return E1000_SUCCESS;
+
+#endif
+	if (checksum == (u16)EEPROM_SUM)
+		return E1000_SUCCESS;
+	else {
+		e_dbg("EEPROM Checksum Invalid\n");
+		return -E1000_ERR_EEPROM;
+	}
+}
+
+/**
+ * e1000_update_eeprom_checksum - Calculates/writes the EEPROM checksum
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA.
+ * Writes the difference to word offset 63 of the EEPROM.
+ */
+s32 e1000_update_eeprom_checksum(struct e1000_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, eeprom_data;
+
+	for (i = 0; i < EEPROM_CHECKSUM_REG; i++) {
+		if (e1000_read_eeprom(hw, i, 1, &eeprom_data) < 0) {
+			e_dbg("EEPROM Read Error\n");
+			return -E1000_ERR_EEPROM;
+		}
+		checksum += eeprom_data;
+	}
+	checksum = (u16)EEPROM_SUM - checksum;
+	if (e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) {
+		e_dbg("EEPROM Write Error\n");
+		return -E1000_ERR_EEPROM;
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_eeprom - write words to the different EEPROM types.
+ * @hw: Struct containing variables accessed by shared code
+ * @offset: offset within the EEPROM to be written to
+ * @words: number of words to write
+ * @data: 16 bit word to be written to the EEPROM
+ *
+ * If e1000_update_eeprom_checksum is not called after this function, the
+ * EEPROM will most likely contain an invalid checksum.
+ */
+s32 e1000_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	s32 ret;
+
+	mutex_lock(&e1000_eeprom_lock);
+	ret = e1000_do_write_eeprom(hw, offset, words, data);
+	mutex_unlock(&e1000_eeprom_lock);
+	return ret;
+}
+
+static s32 e1000_do_write_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
+				 u16 *data)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	s32 status = 0;
+
+	if (hw->mac_type == e1000_ce4100) {
+		GBE_CONFIG_FLASH_WRITE(GBE_CONFIG_BASE_VIRT, offset, words,
+				       data);
+		return E1000_SUCCESS;
+	}
+
+	/* A check for invalid values:  offset too large, too many words, and
+	 * not enough words.
+	 */
+	if ((offset >= eeprom->word_size) ||
+	    (words > eeprom->word_size - offset) ||
+	    (words == 0)) {
+		e_dbg("\"words\" parameter out of bounds\n");
+		return -E1000_ERR_EEPROM;
+	}
+
+	/* Prepare the EEPROM for writing  */
+	if (e1000_acquire_eeprom(hw) != E1000_SUCCESS)
+		return -E1000_ERR_EEPROM;
+
+	if (eeprom->type == e1000_eeprom_microwire) {
+		status = e1000_write_eeprom_microwire(hw, offset, words, data);
+	} else {
+		status = e1000_write_eeprom_spi(hw, offset, words, data);
+		msleep(10);
+	}
+
+	/* Done with writing */
+	e1000_release_eeprom(hw);
+
+	return status;
+}
+
+/**
+ * e1000_write_eeprom_spi - Writes a 16 bit word to a given offset in an SPI EEPROM.
+ * @hw: Struct containing variables accessed by shared code
+ * @offset: offset within the EEPROM to be written to
+ * @words: number of words to write
+ * @data: pointer to array of 8 bit words to be written to the EEPROM
+ */
+static s32 e1000_write_eeprom_spi(struct e1000_hw *hw, u16 offset, u16 words,
+				  u16 *data)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	u16 widx = 0;
+
+	while (widx < words) {
+		u8 write_opcode = EEPROM_WRITE_OPCODE_SPI;
+
+		if (e1000_spi_eeprom_ready(hw))
+			return -E1000_ERR_EEPROM;
+
+		e1000_standby_eeprom(hw);
+		cond_resched();
+
+		/*  Send the WRITE ENABLE command (8 bit opcode )  */
+		e1000_shift_out_ee_bits(hw, EEPROM_WREN_OPCODE_SPI,
+					eeprom->opcode_bits);
+
+		e1000_standby_eeprom(hw);
+
+		/* Some SPI eeproms use the 8th address bit embedded in the
+		 * opcode
+		 */
+		if ((eeprom->address_bits == 8) && (offset >= 128))
+			write_opcode |= EEPROM_A8_OPCODE_SPI;
+
+		/* Send the Write command (8-bit opcode + addr) */
+		e1000_shift_out_ee_bits(hw, write_opcode, eeprom->opcode_bits);
+
+		e1000_shift_out_ee_bits(hw, (u16)((offset + widx) * 2),
+					eeprom->address_bits);
+
+		/* Send the data */
+
+		/* Loop to allow for up to whole page write (32 bytes) of
+		 * eeprom
+		 */
+		while (widx < words) {
+			u16 word_out = data[widx];
+
+			word_out = (word_out >> 8) | (word_out << 8);
+			e1000_shift_out_ee_bits(hw, word_out, 16);
+			widx++;
+
+			/* Some larger eeprom sizes are capable of a 32-byte
+			 * PAGE WRITE operation, while the smaller eeproms are
+			 * capable of an 8-byte PAGE WRITE operation.  Break the
+			 * inner loop to pass new address
+			 */
+			if ((((offset + widx) * 2) % eeprom->page_size) == 0) {
+				e1000_standby_eeprom(hw);
+				break;
+			}
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_write_eeprom_microwire - Writes a 16 bit word to a given offset in a Microwire EEPROM.
+ * @hw: Struct containing variables accessed by shared code
+ * @offset: offset within the EEPROM to be written to
+ * @words: number of words to write
+ * @data: pointer to array of 8 bit words to be written to the EEPROM
+ */
+static s32 e1000_write_eeprom_microwire(struct e1000_hw *hw, u16 offset,
+					u16 words, u16 *data)
+{
+	struct e1000_eeprom_info *eeprom = &hw->eeprom;
+	u32 eecd;
+	u16 words_written = 0;
+	u16 i = 0;
+
+	/* Send the write enable command to the EEPROM (3-bit opcode plus
+	 * 6/8-bit dummy address beginning with 11).  It's less work to include
+	 * the 11 of the dummy address as part of the opcode than it is to shift
+	 * it over the correct number of bits for the address.  This puts the
+	 * EEPROM into write/erase mode.
+	 */
+	e1000_shift_out_ee_bits(hw, EEPROM_EWEN_OPCODE_MICROWIRE,
+				(u16)(eeprom->opcode_bits + 2));
+
+	e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2));
+
+	/* Prepare the EEPROM */
+	e1000_standby_eeprom(hw);
+
+	while (words_written < words) {
+		/* Send the Write command (3-bit opcode + addr) */
+		e1000_shift_out_ee_bits(hw, EEPROM_WRITE_OPCODE_MICROWIRE,
+					eeprom->opcode_bits);
+
+		e1000_shift_out_ee_bits(hw, (u16)(offset + words_written),
+					eeprom->address_bits);
+
+		/* Send the data */
+		e1000_shift_out_ee_bits(hw, data[words_written], 16);
+
+		/* Toggle the CS line.  This in effect tells the EEPROM to
+		 * execute the previous command.
+		 */
+		e1000_standby_eeprom(hw);
+
+		/* Read DO repeatedly until it is high (equal to '1').  The
+		 * EEPROM will signal that the command has been completed by
+		 * raising the DO signal. If DO does not go high in 10
+		 * milliseconds, then error out.
+		 */
+		for (i = 0; i < 200; i++) {
+			eecd = er32(EECD);
+			if (eecd & E1000_EECD_DO)
+				break;
+			udelay(50);
+		}
+		if (i == 200) {
+			e_dbg("EEPROM Write did not complete\n");
+			return -E1000_ERR_EEPROM;
+		}
+
+		/* Recover from write */
+		e1000_standby_eeprom(hw);
+		cond_resched();
+
+		words_written++;
+	}
+
+	/* Send the write disable command to the EEPROM (3-bit opcode plus
+	 * 6/8-bit dummy address beginning with 10).  It's less work to include
+	 * the 10 of the dummy address as part of the opcode than it is to shift
+	 * it over the correct number of bits for the address.  This takes the
+	 * EEPROM out of write/erase mode.
+	 */
+	e1000_shift_out_ee_bits(hw, EEPROM_EWDS_OPCODE_MICROWIRE,
+				(u16)(eeprom->opcode_bits + 2));
+
+	e1000_shift_out_ee_bits(hw, 0, (u16)(eeprom->address_bits - 2));
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_read_mac_addr - read the adapters MAC from eeprom
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Reads the adapter's MAC address from the EEPROM and inverts the LSB for the
+ * second function of dual function devices
+ */
+s32 e1000_read_mac_addr(struct e1000_hw *hw)
+{
+	u16 offset;
+	u16 eeprom_data, i;
+
+	for (i = 0; i < NODE_ADDRESS_SIZE; i += 2) {
+		offset = i >> 1;
+		if (e1000_read_eeprom(hw, offset, 1, &eeprom_data) < 0) {
+			e_dbg("EEPROM Read Error\n");
+			return -E1000_ERR_EEPROM;
+		}
+		hw->perm_mac_addr[i] = (u8)(eeprom_data & 0x00FF);
+		hw->perm_mac_addr[i + 1] = (u8)(eeprom_data >> 8);
+	}
+
+	switch (hw->mac_type) {
+	default:
+		break;
+	case e1000_82546:
+	case e1000_82546_rev_3:
+		if (er32(STATUS) & E1000_STATUS_FUNC_1)
+			hw->perm_mac_addr[5] ^= 0x01;
+		break;
+	}
+
+	for (i = 0; i < NODE_ADDRESS_SIZE; i++)
+		hw->mac_addr[i] = hw->perm_mac_addr[i];
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_init_rx_addrs - Initializes receive address filters.
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Places the MAC address in receive address register 0 and clears the rest
+ * of the receive address registers. Clears the multicast table. Assumes
+ * the receiver is in reset when the routine is called.
+ */
+static void e1000_init_rx_addrs(struct e1000_hw *hw)
+{
+	u32 i;
+	u32 rar_num;
+
+	/* Setup the receive address. */
+	e_dbg("Programming MAC Address into RAR[0]\n");
+
+	e1000_rar_set(hw, hw->mac_addr, 0);
+
+	rar_num = E1000_RAR_ENTRIES;
+
+	/* Zero out the following 14 receive addresses. RAR[15] is for
+	 * manageability
+	 */
+	e_dbg("Clearing RAR[1-14]\n");
+	for (i = 1; i < rar_num; i++) {
+		E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
+		E1000_WRITE_FLUSH();
+		E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
+		E1000_WRITE_FLUSH();
+	}
+}
+
+/**
+ * e1000_hash_mc_addr - Hashes an address to determine its location in the multicast table
+ * @hw: Struct containing variables accessed by shared code
+ * @mc_addr: the multicast address to hash
+ */
+u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+{
+	u32 hash_value = 0;
+
+	/* The portion of the address that is used for the hash table is
+	 * determined by the mc_filter_type setting.
+	 */
+	switch (hw->mc_filter_type) {
+		/* [0] [1] [2] [3] [4] [5]
+		 * 01  AA  00  12  34  56
+		 * LSB                 MSB
+		 */
+	case 0:
+		/* [47:36] i.e. 0x563 for above example address */
+		hash_value = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+		break;
+	case 1:
+		/* [46:35] i.e. 0xAC6 for above example address */
+		hash_value = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+		break;
+	case 2:
+		/* [45:34] i.e. 0x5D8 for above example address */
+		hash_value = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+		break;
+	case 3:
+		/* [43:32] i.e. 0x634 for above example address */
+		hash_value = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+		break;
+	}
+
+	hash_value &= 0xFFF;
+	return hash_value;
+}
+
+/**
+ * e1000_rar_set - Puts an ethernet address into a receive address register.
+ * @hw: Struct containing variables accessed by shared code
+ * @addr: Address to put into receive address register
+ * @index: Receive address register to write
+ */
+void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	/* Disable Rx and flush all Rx frames before enabling RSS to avoid Rx
+	 * unit hang.
+	 *
+	 * Description:
+	 * If there are any Rx frames queued up or otherwise present in the HW
+	 * before RSS is enabled, and then we enable RSS, the HW Rx unit will
+	 * hang.  To work around this issue, we have to disable receives and
+	 * flush out all Rx frames before we enable RSS. To do so, we modify we
+	 * redirect all Rx traffic to manageability and then reset the HW.
+	 * This flushes away Rx frames, and (since the redirections to
+	 * manageability persists across resets) keeps new ones from coming in
+	 * while we work.  Then, we clear the Address Valid AV bit for all MAC
+	 * addresses and undo the re-direction to manageability.
+	 * Now, frames are coming in again, but the MAC won't accept them, so
+	 * far so good.  We now proceed to initialize RSS (if necessary) and
+	 * configure the Rx unit.  Last, we re-enable the AV bits and continue
+	 * on our merry way.
+	 */
+	switch (hw->mac_type) {
+	default:
+		/* Indicate to hardware the Address is Valid. */
+		rar_high |= E1000_RAH_AV;
+		break;
+	}
+
+	E1000_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low);
+	E1000_WRITE_FLUSH();
+	E1000_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high);
+	E1000_WRITE_FLUSH();
+}
+
+/**
+ * e1000_write_vfta - Writes a value to the specified offset in the VLAN filter table.
+ * @hw: Struct containing variables accessed by shared code
+ * @offset: Offset in VLAN filter table to write
+ * @value: Value to write into VLAN filter table
+ */
+void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	u32 temp;
+
+	if ((hw->mac_type == e1000_82544) && ((offset & 0x1) == 1)) {
+		temp = E1000_READ_REG_ARRAY(hw, VFTA, (offset - 1));
+		E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
+		E1000_WRITE_FLUSH();
+		E1000_WRITE_REG_ARRAY(hw, VFTA, (offset - 1), temp);
+		E1000_WRITE_FLUSH();
+	} else {
+		E1000_WRITE_REG_ARRAY(hw, VFTA, offset, value);
+		E1000_WRITE_FLUSH();
+	}
+}
+
+/**
+ * e1000_clear_vfta - Clears the VLAN filter table
+ * @hw: Struct containing variables accessed by shared code
+ */
+static void e1000_clear_vfta(struct e1000_hw *hw)
+{
+	u32 offset;
+
+	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+		E1000_WRITE_REG_ARRAY(hw, VFTA, offset, 0);
+		E1000_WRITE_FLUSH();
+	}
+}
+
+static s32 e1000_id_led_init(struct e1000_hw *hw)
+{
+	u32 ledctl;
+	const u32 ledctl_mask = 0x000000FF;
+	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
+	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
+	u16 eeprom_data, i, temp;
+	const u16 led_mask = 0x0F;
+
+	if (hw->mac_type < e1000_82540) {
+		/* Nothing to do */
+		return E1000_SUCCESS;
+	}
+
+	ledctl = er32(LEDCTL);
+	hw->ledctl_default = ledctl;
+	hw->ledctl_mode1 = hw->ledctl_default;
+	hw->ledctl_mode2 = hw->ledctl_default;
+
+	if (e1000_read_eeprom(hw, EEPROM_ID_LED_SETTINGS, 1, &eeprom_data) < 0) {
+		e_dbg("EEPROM Read Error\n");
+		return -E1000_ERR_EEPROM;
+	}
+
+	if ((eeprom_data == ID_LED_RESERVED_0000) ||
+	    (eeprom_data == ID_LED_RESERVED_FFFF)) {
+		eeprom_data = ID_LED_DEFAULT;
+	}
+
+	for (i = 0; i < 4; i++) {
+		temp = (eeprom_data >> (i << 2)) & led_mask;
+		switch (temp) {
+		case ID_LED_ON1_DEF2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_ON1_OFF2:
+			hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			hw->ledctl_mode1 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_OFF1_DEF2:
+		case ID_LED_OFF1_ON2:
+		case ID_LED_OFF1_OFF2:
+			hw->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			hw->ledctl_mode1 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+		switch (temp) {
+		case ID_LED_DEF1_ON2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_OFF1_ON2:
+			hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			hw->ledctl_mode2 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_DEF1_OFF2:
+		case ID_LED_ON1_OFF2:
+		case ID_LED_OFF1_OFF2:
+			hw->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			hw->ledctl_mode2 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_setup_led
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Prepares SW controlable LED for use and saves the current state of the LED.
+ */
+s32 e1000_setup_led(struct e1000_hw *hw)
+{
+	u32 ledctl;
+	s32 ret_val = E1000_SUCCESS;
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+	case e1000_82544:
+		/* No setup necessary */
+		break;
+	case e1000_82541:
+	case e1000_82547:
+	case e1000_82541_rev_2:
+	case e1000_82547_rev_2:
+		/* Turn off PHY Smart Power Down (if enabled) */
+		ret_val = e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO,
+					     &hw->phy_spd_default);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO,
+					      (u16)(hw->phy_spd_default &
+						     ~IGP01E1000_GMII_SPD));
+		if (ret_val)
+			return ret_val;
+		fallthrough;
+	default:
+		if (hw->media_type == e1000_media_type_fiber) {
+			ledctl = er32(LEDCTL);
+			/* Save current LEDCTL settings */
+			hw->ledctl_default = ledctl;
+			/* Turn off LED0 */
+			ledctl &= ~(E1000_LEDCTL_LED0_IVRT |
+				    E1000_LEDCTL_LED0_BLINK |
+				    E1000_LEDCTL_LED0_MODE_MASK);
+			ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
+				   E1000_LEDCTL_LED0_MODE_SHIFT);
+			ew32(LEDCTL, ledctl);
+		} else if (hw->media_type == e1000_media_type_copper)
+			ew32(LEDCTL, hw->ledctl_mode1);
+		break;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_cleanup_led - Restores the saved state of the SW controlable LED.
+ * @hw: Struct containing variables accessed by shared code
+ */
+s32 e1000_cleanup_led(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_SUCCESS;
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+	case e1000_82544:
+		/* No cleanup necessary */
+		break;
+	case e1000_82541:
+	case e1000_82547:
+	case e1000_82541_rev_2:
+	case e1000_82547_rev_2:
+		/* Turn on PHY Smart Power Down (if previously enabled) */
+		ret_val = e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO,
+					      hw->phy_spd_default);
+		if (ret_val)
+			return ret_val;
+		fallthrough;
+	default:
+		/* Restore LEDCTL settings */
+		ew32(LEDCTL, hw->ledctl_default);
+		break;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_led_on - Turns on the software controllable LED
+ * @hw: Struct containing variables accessed by shared code
+ */
+s32 e1000_led_on(struct e1000_hw *hw)
+{
+	u32 ctrl = er32(CTRL);
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+		/* Set SW Defineable Pin 0 to turn on the LED */
+		ctrl |= E1000_CTRL_SWDPIN0;
+		ctrl |= E1000_CTRL_SWDPIO0;
+		break;
+	case e1000_82544:
+		if (hw->media_type == e1000_media_type_fiber) {
+			/* Set SW Defineable Pin 0 to turn on the LED */
+			ctrl |= E1000_CTRL_SWDPIN0;
+			ctrl |= E1000_CTRL_SWDPIO0;
+		} else {
+			/* Clear SW Defineable Pin 0 to turn on the LED */
+			ctrl &= ~E1000_CTRL_SWDPIN0;
+			ctrl |= E1000_CTRL_SWDPIO0;
+		}
+		break;
+	default:
+		if (hw->media_type == e1000_media_type_fiber) {
+			/* Clear SW Defineable Pin 0 to turn on the LED */
+			ctrl &= ~E1000_CTRL_SWDPIN0;
+			ctrl |= E1000_CTRL_SWDPIO0;
+		} else if (hw->media_type == e1000_media_type_copper) {
+			ew32(LEDCTL, hw->ledctl_mode2);
+			return E1000_SUCCESS;
+		}
+		break;
+	}
+
+	ew32(CTRL, ctrl);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_led_off - Turns off the software controllable LED
+ * @hw: Struct containing variables accessed by shared code
+ */
+s32 e1000_led_off(struct e1000_hw *hw)
+{
+	u32 ctrl = er32(CTRL);
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+		/* Clear SW Defineable Pin 0 to turn off the LED */
+		ctrl &= ~E1000_CTRL_SWDPIN0;
+		ctrl |= E1000_CTRL_SWDPIO0;
+		break;
+	case e1000_82544:
+		if (hw->media_type == e1000_media_type_fiber) {
+			/* Clear SW Defineable Pin 0 to turn off the LED */
+			ctrl &= ~E1000_CTRL_SWDPIN0;
+			ctrl |= E1000_CTRL_SWDPIO0;
+		} else {
+			/* Set SW Defineable Pin 0 to turn off the LED */
+			ctrl |= E1000_CTRL_SWDPIN0;
+			ctrl |= E1000_CTRL_SWDPIO0;
+		}
+		break;
+	default:
+		if (hw->media_type == e1000_media_type_fiber) {
+			/* Set SW Defineable Pin 0 to turn off the LED */
+			ctrl |= E1000_CTRL_SWDPIN0;
+			ctrl |= E1000_CTRL_SWDPIO0;
+		} else if (hw->media_type == e1000_media_type_copper) {
+			ew32(LEDCTL, hw->ledctl_mode1);
+			return E1000_SUCCESS;
+		}
+		break;
+	}
+
+	ew32(CTRL, ctrl);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_clear_hw_cntrs - Clears all hardware statistics counters.
+ * @hw: Struct containing variables accessed by shared code
+ */
+static void e1000_clear_hw_cntrs(struct e1000_hw *hw)
+{
+	er32(CRCERRS);
+	er32(SYMERRS);
+	er32(MPC);
+	er32(SCC);
+	er32(ECOL);
+	er32(MCC);
+	er32(LATECOL);
+	er32(COLC);
+	er32(DC);
+	er32(SEC);
+	er32(RLEC);
+	er32(XONRXC);
+	er32(XONTXC);
+	er32(XOFFRXC);
+	er32(XOFFTXC);
+	er32(FCRUC);
+
+	er32(PRC64);
+	er32(PRC127);
+	er32(PRC255);
+	er32(PRC511);
+	er32(PRC1023);
+	er32(PRC1522);
+
+	er32(GPRC);
+	er32(BPRC);
+	er32(MPRC);
+	er32(GPTC);
+	er32(GORCL);
+	er32(GORCH);
+	er32(GOTCL);
+	er32(GOTCH);
+	er32(RNBC);
+	er32(RUC);
+	er32(RFC);
+	er32(ROC);
+	er32(RJC);
+	er32(TORL);
+	er32(TORH);
+	er32(TOTL);
+	er32(TOTH);
+	er32(TPR);
+	er32(TPT);
+
+	er32(PTC64);
+	er32(PTC127);
+	er32(PTC255);
+	er32(PTC511);
+	er32(PTC1023);
+	er32(PTC1522);
+
+	er32(MPTC);
+	er32(BPTC);
+
+	if (hw->mac_type < e1000_82543)
+		return;
+
+	er32(ALGNERRC);
+	er32(RXERRC);
+	er32(TNCRS);
+	er32(CEXTERR);
+	er32(TSCTC);
+	er32(TSCTFC);
+
+	if (hw->mac_type <= e1000_82544)
+		return;
+
+	er32(MGTPRC);
+	er32(MGTPDC);
+	er32(MGTPTC);
+}
+
+/**
+ * e1000_reset_adaptive - Resets Adaptive IFS to its default state.
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Call this after e1000_init_hw. You may override the IFS defaults by setting
+ * hw->ifs_params_forced to true. However, you must initialize hw->
+ * current_ifs_val, ifs_min_val, ifs_max_val, ifs_step_size, and ifs_ratio
+ * before calling this function.
+ */
+void e1000_reset_adaptive(struct e1000_hw *hw)
+{
+	if (hw->adaptive_ifs) {
+		if (!hw->ifs_params_forced) {
+			hw->current_ifs_val = 0;
+			hw->ifs_min_val = IFS_MIN;
+			hw->ifs_max_val = IFS_MAX;
+			hw->ifs_step_size = IFS_STEP;
+			hw->ifs_ratio = IFS_RATIO;
+		}
+		hw->in_ifs_mode = false;
+		ew32(AIT, 0);
+	} else {
+		e_dbg("Not in Adaptive IFS mode!\n");
+	}
+}
+
+/**
+ * e1000_update_adaptive - update adaptive IFS
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Called during the callback/watchdog routine to update IFS value based on
+ * the ratio of transmits to collisions.
+ */
+void e1000_update_adaptive(struct e1000_hw *hw)
+{
+	if (hw->adaptive_ifs) {
+		if ((hw->collision_delta * hw->ifs_ratio) > hw->tx_packet_delta) {
+			if (hw->tx_packet_delta > MIN_NUM_XMITS) {
+				hw->in_ifs_mode = true;
+				if (hw->current_ifs_val < hw->ifs_max_val) {
+					if (hw->current_ifs_val == 0)
+						hw->current_ifs_val =
+						    hw->ifs_min_val;
+					else
+						hw->current_ifs_val +=
+						    hw->ifs_step_size;
+					ew32(AIT, hw->current_ifs_val);
+				}
+			}
+		} else {
+			if (hw->in_ifs_mode &&
+			    (hw->tx_packet_delta <= MIN_NUM_XMITS)) {
+				hw->current_ifs_val = 0;
+				hw->in_ifs_mode = false;
+				ew32(AIT, 0);
+			}
+		}
+	} else {
+		e_dbg("Not in Adaptive IFS mode!\n");
+	}
+}
+
+/**
+ * e1000_get_bus_info
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Gets the current PCI bus type, speed, and width of the hardware
+ */
+void e1000_get_bus_info(struct e1000_hw *hw)
+{
+	u32 status;
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+		hw->bus_type = e1000_bus_type_pci;
+		hw->bus_speed = e1000_bus_speed_unknown;
+		hw->bus_width = e1000_bus_width_unknown;
+		break;
+	default:
+		status = er32(STATUS);
+		hw->bus_type = (status & E1000_STATUS_PCIX_MODE) ?
+		    e1000_bus_type_pcix : e1000_bus_type_pci;
+
+		if (hw->device_id == E1000_DEV_ID_82546EB_QUAD_COPPER) {
+			hw->bus_speed = (hw->bus_type == e1000_bus_type_pci) ?
+			    e1000_bus_speed_66 : e1000_bus_speed_120;
+		} else if (hw->bus_type == e1000_bus_type_pci) {
+			hw->bus_speed = (status & E1000_STATUS_PCI66) ?
+			    e1000_bus_speed_66 : e1000_bus_speed_33;
+		} else {
+			switch (status & E1000_STATUS_PCIX_SPEED) {
+			case E1000_STATUS_PCIX_SPEED_66:
+				hw->bus_speed = e1000_bus_speed_66;
+				break;
+			case E1000_STATUS_PCIX_SPEED_100:
+				hw->bus_speed = e1000_bus_speed_100;
+				break;
+			case E1000_STATUS_PCIX_SPEED_133:
+				hw->bus_speed = e1000_bus_speed_133;
+				break;
+			default:
+				hw->bus_speed = e1000_bus_speed_reserved;
+				break;
+			}
+		}
+		hw->bus_width = (status & E1000_STATUS_BUS64) ?
+		    e1000_bus_width_64 : e1000_bus_width_32;
+		break;
+	}
+}
+
+/**
+ * e1000_write_reg_io
+ * @hw: Struct containing variables accessed by shared code
+ * @offset: offset to write to
+ * @value: value to write
+ *
+ * Writes a value to one of the devices registers using port I/O (as opposed to
+ * memory mapped I/O). Only 82544 and newer devices support port I/O.
+ */
+static void e1000_write_reg_io(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	unsigned long io_addr = hw->io_base;
+	unsigned long io_data = hw->io_base + 4;
+
+	e1000_io_write(hw, io_addr, offset);
+	e1000_io_write(hw, io_data, value);
+}
+
+/**
+ * e1000_get_cable_length - Estimates the cable length.
+ * @hw: Struct containing variables accessed by shared code
+ * @min_length: The estimated minimum length
+ * @max_length: The estimated maximum length
+ *
+ * returns: - E1000_ERR_XXX
+ *            E1000_SUCCESS
+ *
+ * This function always returns a ranged length (minimum & maximum).
+ * So for M88 phy's, this function interprets the one value returned from the
+ * register to the minimum and maximum range.
+ * For IGP phy's, the function calculates the range by the AGC registers.
+ */
+static s32 e1000_get_cable_length(struct e1000_hw *hw, u16 *min_length,
+				  u16 *max_length)
+{
+	s32 ret_val;
+	u16 agc_value = 0;
+	u16 i, phy_data;
+	u16 cable_length;
+
+	*min_length = *max_length = 0;
+
+	/* Use old method for Phy older than IGP */
+	if (hw->phy_type == e1000_phy_m88) {
+		ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+					     &phy_data);
+		if (ret_val)
+			return ret_val;
+		cable_length = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+		    M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+
+		/* Convert the enum value to ranged values */
+		switch (cable_length) {
+		case e1000_cable_length_50:
+			*min_length = 0;
+			*max_length = e1000_igp_cable_length_50;
+			break;
+		case e1000_cable_length_50_80:
+			*min_length = e1000_igp_cable_length_50;
+			*max_length = e1000_igp_cable_length_80;
+			break;
+		case e1000_cable_length_80_110:
+			*min_length = e1000_igp_cable_length_80;
+			*max_length = e1000_igp_cable_length_110;
+			break;
+		case e1000_cable_length_110_140:
+			*min_length = e1000_igp_cable_length_110;
+			*max_length = e1000_igp_cable_length_140;
+			break;
+		case e1000_cable_length_140:
+			*min_length = e1000_igp_cable_length_140;
+			*max_length = e1000_igp_cable_length_170;
+			break;
+		default:
+			return -E1000_ERR_PHY;
+		}
+	} else if (hw->phy_type == e1000_phy_igp) {	/* For IGP PHY */
+		u16 cur_agc_value;
+		u16 min_agc_value = IGP01E1000_AGC_LENGTH_TABLE_SIZE;
+		static const u16 agc_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = {
+		       IGP01E1000_PHY_AGC_A,
+		       IGP01E1000_PHY_AGC_B,
+		       IGP01E1000_PHY_AGC_C,
+		       IGP01E1000_PHY_AGC_D
+		};
+		/* Read the AGC registers for all channels */
+		for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
+			ret_val =
+			    e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data);
+			if (ret_val)
+				return ret_val;
+
+			cur_agc_value = phy_data >> IGP01E1000_AGC_LENGTH_SHIFT;
+
+			/* Value bound check. */
+			if ((cur_agc_value >=
+			     IGP01E1000_AGC_LENGTH_TABLE_SIZE - 1) ||
+			    (cur_agc_value == 0))
+				return -E1000_ERR_PHY;
+
+			agc_value += cur_agc_value;
+
+			/* Update minimal AGC value. */
+			if (min_agc_value > cur_agc_value)
+				min_agc_value = cur_agc_value;
+		}
+
+		/* Remove the minimal AGC result for length < 50m */
+		if (agc_value <
+		    IGP01E1000_PHY_CHANNEL_NUM * e1000_igp_cable_length_50) {
+			agc_value -= min_agc_value;
+
+			/* Get the average length of the remaining 3 channels */
+			agc_value /= (IGP01E1000_PHY_CHANNEL_NUM - 1);
+		} else {
+			/* Get the average length of all the 4 channels. */
+			agc_value /= IGP01E1000_PHY_CHANNEL_NUM;
+		}
+
+		/* Set the range of the calculated length. */
+		*min_length = ((e1000_igp_cable_length_table[agc_value] -
+				IGP01E1000_AGC_RANGE) > 0) ?
+		    (e1000_igp_cable_length_table[agc_value] -
+		     IGP01E1000_AGC_RANGE) : 0;
+		*max_length = e1000_igp_cable_length_table[agc_value] +
+		    IGP01E1000_AGC_RANGE;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_check_polarity - Check the cable polarity
+ * @hw: Struct containing variables accessed by shared code
+ * @polarity: output parameter : 0 - Polarity is not reversed
+ *                               1 - Polarity is reversed.
+ *
+ * returns: - E1000_ERR_XXX
+ *            E1000_SUCCESS
+ *
+ * For phy's older than IGP, this function simply reads the polarity bit in the
+ * Phy Status register.  For IGP phy's, this bit is valid only if link speed is
+ * 10 Mbps.  If the link speed is 100 Mbps there is no polarity so this bit will
+ * return 0.  If the link speed is 1000 Mbps the polarity status is in the
+ * IGP01E1000_PHY_PCS_INIT_REG.
+ */
+static s32 e1000_check_polarity(struct e1000_hw *hw,
+				e1000_rev_polarity *polarity)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	if (hw->phy_type == e1000_phy_m88) {
+		/* return the Polarity bit in the Status register. */
+		ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+					     &phy_data);
+		if (ret_val)
+			return ret_val;
+		*polarity = ((phy_data & M88E1000_PSSR_REV_POLARITY) >>
+			     M88E1000_PSSR_REV_POLARITY_SHIFT) ?
+		    e1000_rev_polarity_reversed : e1000_rev_polarity_normal;
+
+	} else if (hw->phy_type == e1000_phy_igp) {
+		/* Read the Status register to check the speed */
+		ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_STATUS,
+					     &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* If speed is 1000 Mbps, must read the
+		 * IGP01E1000_PHY_PCS_INIT_REG to find the polarity status
+		 */
+		if ((phy_data & IGP01E1000_PSSR_SPEED_MASK) ==
+		    IGP01E1000_PSSR_SPEED_1000MBPS) {
+			/* Read the GIG initialization PCS register (0x00B4) */
+			ret_val =
+			    e1000_read_phy_reg(hw, IGP01E1000_PHY_PCS_INIT_REG,
+					       &phy_data);
+			if (ret_val)
+				return ret_val;
+
+			/* Check the polarity bits */
+			*polarity = (phy_data & IGP01E1000_PHY_POLARITY_MASK) ?
+			    e1000_rev_polarity_reversed :
+			    e1000_rev_polarity_normal;
+		} else {
+			/* For 10 Mbps, read the polarity bit in the status
+			 * register. (for 100 Mbps this bit is always 0)
+			 */
+			*polarity =
+			    (phy_data & IGP01E1000_PSSR_POLARITY_REVERSED) ?
+			    e1000_rev_polarity_reversed :
+			    e1000_rev_polarity_normal;
+		}
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_check_downshift - Check if Downshift occurred
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * returns: - E1000_ERR_XXX
+ *            E1000_SUCCESS
+ *
+ * For phy's older than IGP, this function reads the Downshift bit in the Phy
+ * Specific Status register.  For IGP phy's, it reads the Downgrade bit in the
+ * Link Health register.  In IGP this bit is latched high, so the driver must
+ * read it immediately after link is established.
+ */
+static s32 e1000_check_downshift(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	if (hw->phy_type == e1000_phy_igp) {
+		ret_val = e1000_read_phy_reg(hw, IGP01E1000_PHY_LINK_HEALTH,
+					     &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		hw->speed_downgraded =
+		    (phy_data & IGP01E1000_PLHR_SS_DOWNGRADE) ? 1 : 0;
+	} else if (hw->phy_type == e1000_phy_m88) {
+		ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_SPEC_STATUS,
+					     &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		hw->speed_downgraded = (phy_data & M88E1000_PSSR_DOWNSHIFT) >>
+		    M88E1000_PSSR_DOWNSHIFT_SHIFT;
+	}
+
+	return E1000_SUCCESS;
+}
+
+static const u16 dsp_reg_array[IGP01E1000_PHY_CHANNEL_NUM] = {
+	IGP01E1000_PHY_AGC_PARAM_A,
+	IGP01E1000_PHY_AGC_PARAM_B,
+	IGP01E1000_PHY_AGC_PARAM_C,
+	IGP01E1000_PHY_AGC_PARAM_D
+};
+
+static s32 e1000_1000Mb_check_cable_length(struct e1000_hw *hw)
+{
+	u16 min_length, max_length;
+	u16 phy_data, i;
+	s32 ret_val;
+
+	ret_val = e1000_get_cable_length(hw, &min_length, &max_length);
+	if (ret_val)
+		return ret_val;
+
+	if (hw->dsp_config_state != e1000_dsp_config_enabled)
+		return 0;
+
+	if (min_length >= e1000_igp_cable_length_50) {
+		for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
+			ret_val = e1000_read_phy_reg(hw, dsp_reg_array[i],
+						     &phy_data);
+			if (ret_val)
+				return ret_val;
+
+			phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX;
+
+			ret_val = e1000_write_phy_reg(hw, dsp_reg_array[i],
+						      phy_data);
+			if (ret_val)
+				return ret_val;
+		}
+		hw->dsp_config_state = e1000_dsp_config_activated;
+	} else {
+		u16 ffe_idle_err_timeout = FFE_IDLE_ERR_COUNT_TIMEOUT_20;
+		u32 idle_errs = 0;
+
+		/* clear previous idle error counts */
+		ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		for (i = 0; i < ffe_idle_err_timeout; i++) {
+			udelay(1000);
+			ret_val = e1000_read_phy_reg(hw, PHY_1000T_STATUS,
+						     &phy_data);
+			if (ret_val)
+				return ret_val;
+
+			idle_errs += (phy_data & SR_1000T_IDLE_ERROR_CNT);
+			if (idle_errs > SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT) {
+				hw->ffe_config_state = e1000_ffe_config_active;
+
+				ret_val = e1000_write_phy_reg(hw,
+							      IGP01E1000_PHY_DSP_FFE,
+							      IGP01E1000_PHY_DSP_FFE_CM_CP);
+				if (ret_val)
+					return ret_val;
+				break;
+			}
+
+			if (idle_errs)
+				ffe_idle_err_timeout =
+					    FFE_IDLE_ERR_COUNT_TIMEOUT_100;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * e1000_config_dsp_after_link_change
+ * @hw: Struct containing variables accessed by shared code
+ * @link_up: was link up at the time this was called
+ *
+ * returns: - E1000_ERR_PHY if fail to read/write the PHY
+ *            E1000_SUCCESS at any other case.
+ *
+ * 82541_rev_2 & 82547_rev_2 have the capability to configure the DSP when a
+ * gigabit link is achieved to improve link quality.
+ */
+
+static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up)
+{
+	s32 ret_val;
+	u16 phy_data, phy_saved_data, speed, duplex, i;
+
+	if (hw->phy_type != e1000_phy_igp)
+		return E1000_SUCCESS;
+
+	if (link_up) {
+		ret_val = e1000_get_speed_and_duplex(hw, &speed, &duplex);
+		if (ret_val) {
+			e_dbg("Error getting link speed and duplex\n");
+			return ret_val;
+		}
+
+		if (speed == SPEED_1000) {
+			ret_val = e1000_1000Mb_check_cable_length(hw);
+			if (ret_val)
+				return ret_val;
+		}
+	} else {
+		if (hw->dsp_config_state == e1000_dsp_config_activated) {
+			/* Save off the current value of register 0x2F5B to be
+			 * restored at the end of the routines.
+			 */
+			ret_val =
+			    e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
+
+			if (ret_val)
+				return ret_val;
+
+			/* Disable the PHY transmitter */
+			ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
+
+			if (ret_val)
+				return ret_val;
+
+			msleep(20);
+
+			ret_val = e1000_write_phy_reg(hw, 0x0000,
+						      IGP01E1000_IEEE_FORCE_GIGA);
+			if (ret_val)
+				return ret_val;
+			for (i = 0; i < IGP01E1000_PHY_CHANNEL_NUM; i++) {
+				ret_val =
+				    e1000_read_phy_reg(hw, dsp_reg_array[i],
+						       &phy_data);
+				if (ret_val)
+					return ret_val;
+
+				phy_data &= ~IGP01E1000_PHY_EDAC_MU_INDEX;
+				phy_data |= IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS;
+
+				ret_val =
+				    e1000_write_phy_reg(hw, dsp_reg_array[i],
+							phy_data);
+				if (ret_val)
+					return ret_val;
+			}
+
+			ret_val = e1000_write_phy_reg(hw, 0x0000,
+						      IGP01E1000_IEEE_RESTART_AUTONEG);
+			if (ret_val)
+				return ret_val;
+
+			msleep(20);
+
+			/* Now enable the transmitter */
+			ret_val =
+			    e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
+
+			if (ret_val)
+				return ret_val;
+
+			hw->dsp_config_state = e1000_dsp_config_enabled;
+		}
+
+		if (hw->ffe_config_state == e1000_ffe_config_active) {
+			/* Save off the current value of register 0x2F5B to be
+			 * restored at the end of the routines.
+			 */
+			ret_val =
+			    e1000_read_phy_reg(hw, 0x2F5B, &phy_saved_data);
+
+			if (ret_val)
+				return ret_val;
+
+			/* Disable the PHY transmitter */
+			ret_val = e1000_write_phy_reg(hw, 0x2F5B, 0x0003);
+
+			if (ret_val)
+				return ret_val;
+
+			msleep(20);
+
+			ret_val = e1000_write_phy_reg(hw, 0x0000,
+						      IGP01E1000_IEEE_FORCE_GIGA);
+			if (ret_val)
+				return ret_val;
+			ret_val =
+			    e1000_write_phy_reg(hw, IGP01E1000_PHY_DSP_FFE,
+						IGP01E1000_PHY_DSP_FFE_DEFAULT);
+			if (ret_val)
+				return ret_val;
+
+			ret_val = e1000_write_phy_reg(hw, 0x0000,
+						      IGP01E1000_IEEE_RESTART_AUTONEG);
+			if (ret_val)
+				return ret_val;
+
+			msleep(20);
+
+			/* Now enable the transmitter */
+			ret_val =
+			    e1000_write_phy_reg(hw, 0x2F5B, phy_saved_data);
+
+			if (ret_val)
+				return ret_val;
+
+			hw->ffe_config_state = e1000_ffe_config_enabled;
+		}
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_phy_mode - Set PHY to class A mode
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Assumes the following operations will follow to enable the new class mode.
+ *  1. Do a PHY soft reset
+ *  2. Restart auto-negotiation or force link.
+ */
+static s32 e1000_set_phy_mode(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 eeprom_data;
+
+	if ((hw->mac_type == e1000_82545_rev_3) &&
+	    (hw->media_type == e1000_media_type_copper)) {
+		ret_val =
+		    e1000_read_eeprom(hw, EEPROM_PHY_CLASS_WORD, 1,
+				      &eeprom_data);
+		if (ret_val)
+			return ret_val;
+
+		if ((eeprom_data != EEPROM_RESERVED_WORD) &&
+		    (eeprom_data & EEPROM_PHY_CLASS_A)) {
+			ret_val =
+			    e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT,
+						0x000B);
+			if (ret_val)
+				return ret_val;
+			ret_val =
+			    e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL,
+						0x8104);
+			if (ret_val)
+				return ret_val;
+
+			hw->phy_reset_disable = false;
+		}
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_d3_lplu_state - set d3 link power state
+ * @hw: Struct containing variables accessed by shared code
+ * @active: true to enable lplu false to disable lplu.
+ *
+ * This function sets the lplu state according to the active flag.  When
+ * activating lplu this function also disables smart speed and vise versa.
+ * lplu will not be activated unless the device autonegotiation advertisement
+ * meets standards of either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *
+ * returns: - E1000_ERR_PHY if fail to read/write the PHY
+ *            E1000_SUCCESS at any other case.
+ */
+static s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	if (hw->phy_type != e1000_phy_igp)
+		return E1000_SUCCESS;
+
+	/* During driver activity LPLU should not be used or it will attain link
+	 * from the lowest speeds starting from 10Mbps. The capability is used
+	 * for Dx transitions and states
+	 */
+	if (hw->mac_type == e1000_82541_rev_2 ||
+	    hw->mac_type == e1000_82547_rev_2) {
+		ret_val =
+		    e1000_read_phy_reg(hw, IGP01E1000_GMII_FIFO, &phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (!active) {
+		if (hw->mac_type == e1000_82541_rev_2 ||
+		    hw->mac_type == e1000_82547_rev_2) {
+			phy_data &= ~IGP01E1000_GMII_FLEX_SPD;
+			ret_val =
+			    e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO,
+						phy_data);
+			if (ret_val)
+				return ret_val;
+		}
+
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (hw->smart_speed == e1000_smart_speed_on) {
+			ret_val =
+			    e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					       &phy_data);
+			if (ret_val)
+				return ret_val;
+
+			phy_data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val =
+			    e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+						phy_data);
+			if (ret_val)
+				return ret_val;
+		} else if (hw->smart_speed == e1000_smart_speed_off) {
+			ret_val =
+			    e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					       &phy_data);
+			if (ret_val)
+				return ret_val;
+
+			phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val =
+			    e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+						phy_data);
+			if (ret_val)
+				return ret_val;
+		}
+	} else if ((hw->autoneg_advertised == AUTONEG_ADVERTISE_SPEED_DEFAULT) ||
+		   (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_ALL) ||
+		   (hw->autoneg_advertised == AUTONEG_ADVERTISE_10_100_ALL)) {
+		if (hw->mac_type == e1000_82541_rev_2 ||
+		    hw->mac_type == e1000_82547_rev_2) {
+			phy_data |= IGP01E1000_GMII_FLEX_SPD;
+			ret_val =
+			    e1000_write_phy_reg(hw, IGP01E1000_GMII_FIFO,
+						phy_data);
+			if (ret_val)
+				return ret_val;
+		}
+
+		/* When LPLU is enabled we should disable SmartSpeed */
+		ret_val =
+		    e1000_read_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+				       &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val =
+		    e1000_write_phy_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_set_vco_speed
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Change VCO speed register to improve Bit Error Rate performance of SERDES.
+ */
+static s32 e1000_set_vco_speed(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 default_page = 0;
+	u16 phy_data;
+
+	switch (hw->mac_type) {
+	case e1000_82545_rev_3:
+	case e1000_82546_rev_3:
+		break;
+	default:
+		return E1000_SUCCESS;
+	}
+
+	/* Set PHY register 30, page 5, bit 8 to 0 */
+
+	ret_val =
+	    e1000_read_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, &default_page);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0005);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data &= ~M88E1000_PHY_VCO_REG_BIT8;
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Set PHY register 30, page 4, bit 11 to 1 */
+
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0004);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_read_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= M88E1000_PHY_VCO_REG_BIT11;
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	ret_val =
+	    e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, default_page);
+	if (ret_val)
+		return ret_val;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_enable_mng_pass_thru - check for bmc pass through
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Verifies the hardware needs to allow ARPs to be processed by the host
+ * returns: - true/false
+ */
+u32 e1000_enable_mng_pass_thru(struct e1000_hw *hw)
+{
+	u32 manc;
+
+	if (hw->asf_firmware_present) {
+		manc = er32(MANC);
+
+		if (!(manc & E1000_MANC_RCV_TCO_EN) ||
+		    !(manc & E1000_MANC_EN_MAC_ADDR_FILTER))
+			return false;
+		if ((manc & E1000_MANC_SMBUS_EN) && !(manc & E1000_MANC_ASF_EN))
+			return true;
+	}
+	return false;
+}
+
+static s32 e1000_polarity_reversal_workaround(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 mii_status_reg;
+	u16 i;
+
+	/* Polarity reversal workaround for forced 10F/10H links. */
+
+	/* Disable the transmitter on the PHY */
+
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
+	if (ret_val)
+		return ret_val;
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFFF);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
+	if (ret_val)
+		return ret_val;
+
+	/* This loop will early-out if the NO link condition has been met. */
+	for (i = PHY_FORCE_TIME; i > 0; i--) {
+		/* Read the MII Status Register and wait for Link Status bit
+		 * to be clear.
+		 */
+
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+
+		if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0)
+			break;
+		msleep(100);
+	}
+
+	/* Recommended delay time after link has been lost */
+	msleep(1000);
+
+	/* Now we will re-enable th transmitter on the PHY */
+
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019);
+	if (ret_val)
+		return ret_val;
+	msleep(50);
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0);
+	if (ret_val)
+		return ret_val;
+	msleep(50);
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00);
+	if (ret_val)
+		return ret_val;
+	msleep(50);
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0000);
+	if (ret_val)
+		return ret_val;
+
+	/* This loop will early-out if the link condition has been met. */
+	for (i = PHY_FORCE_TIME; i > 0; i--) {
+		/* Read the MII Status Register and wait for Link Status bit
+		 * to be set.
+		 */
+
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1000_read_phy_reg(hw, PHY_STATUS, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+
+		if (mii_status_reg & MII_SR_LINK_STATUS)
+			break;
+		msleep(100);
+	}
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_auto_rd_done
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Check for EEPROM Auto Read bit done.
+ * returns: - E1000_ERR_RESET if fail to reset MAC
+ *            E1000_SUCCESS at any other case.
+ */
+static s32 e1000_get_auto_rd_done(struct e1000_hw *hw)
+{
+	msleep(5);
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_get_phy_cfg_done
+ * @hw: Struct containing variables accessed by shared code
+ *
+ * Checks if the PHY configuration is done
+ * returns: - E1000_ERR_RESET if fail to reset MAC
+ *            E1000_SUCCESS at any other case.
+ */
+static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw)
+{
+	msleep(10);
+	return E1000_SUCCESS;
+}
diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.h b/drivers/net/ethernet/intel/e1000/e1000_hw.h
new file mode 100644
index 0000000000..95cdd17134
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.h
@@ -0,0 +1,3082 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+/* e1000_hw.h
+ * Structures, enums, and macros for the MAC
+ */
+
+#ifndef _E1000_HW_H_
+#define _E1000_HW_H_
+
+#include "e1000_osdep.h"
+
+
+/* Forward declarations of structures used by the shared code */
+struct e1000_hw;
+struct e1000_hw_stats;
+
+/* Enumerated types specific to the e1000 hardware */
+/* Media Access Controllers */
+typedef enum {
+	e1000_undefined = 0,
+	e1000_82542_rev2_0,
+	e1000_82542_rev2_1,
+	e1000_82543,
+	e1000_82544,
+	e1000_82540,
+	e1000_82545,
+	e1000_82545_rev_3,
+	e1000_82546,
+	e1000_ce4100,
+	e1000_82546_rev_3,
+	e1000_82541,
+	e1000_82541_rev_2,
+	e1000_82547,
+	e1000_82547_rev_2,
+	e1000_num_macs
+} e1000_mac_type;
+
+typedef enum {
+	e1000_eeprom_uninitialized = 0,
+	e1000_eeprom_spi,
+	e1000_eeprom_microwire,
+	e1000_eeprom_flash,
+	e1000_eeprom_none,	/* No NVM support */
+	e1000_num_eeprom_types
+} e1000_eeprom_type;
+
+/* Media Types */
+typedef enum {
+	e1000_media_type_copper = 0,
+	e1000_media_type_fiber = 1,
+	e1000_media_type_internal_serdes = 2,
+	e1000_num_media_types
+} e1000_media_type;
+
+typedef enum {
+	e1000_10_half = 0,
+	e1000_10_full = 1,
+	e1000_100_half = 2,
+	e1000_100_full = 3
+} e1000_speed_duplex_type;
+
+/* Flow Control Settings */
+typedef enum {
+	E1000_FC_NONE = 0,
+	E1000_FC_RX_PAUSE = 1,
+	E1000_FC_TX_PAUSE = 2,
+	E1000_FC_FULL = 3,
+	E1000_FC_DEFAULT = 0xFF
+} e1000_fc_type;
+
+struct e1000_shadow_ram {
+	u16 eeprom_word;
+	bool modified;
+};
+
+/* PCI bus types */
+typedef enum {
+	e1000_bus_type_unknown = 0,
+	e1000_bus_type_pci,
+	e1000_bus_type_pcix,
+	e1000_bus_type_reserved
+} e1000_bus_type;
+
+/* PCI bus speeds */
+typedef enum {
+	e1000_bus_speed_unknown = 0,
+	e1000_bus_speed_33,
+	e1000_bus_speed_66,
+	e1000_bus_speed_100,
+	e1000_bus_speed_120,
+	e1000_bus_speed_133,
+	e1000_bus_speed_reserved
+} e1000_bus_speed;
+
+/* PCI bus widths */
+typedef enum {
+	e1000_bus_width_unknown = 0,
+	e1000_bus_width_32,
+	e1000_bus_width_64,
+	e1000_bus_width_reserved
+} e1000_bus_width;
+
+/* PHY status info structure and supporting enums */
+typedef enum {
+	e1000_cable_length_50 = 0,
+	e1000_cable_length_50_80,
+	e1000_cable_length_80_110,
+	e1000_cable_length_110_140,
+	e1000_cable_length_140,
+	e1000_cable_length_undefined = 0xFF
+} e1000_cable_length;
+
+typedef enum {
+	e1000_gg_cable_length_60 = 0,
+	e1000_gg_cable_length_60_115 = 1,
+	e1000_gg_cable_length_115_150 = 2,
+	e1000_gg_cable_length_150 = 4
+} e1000_gg_cable_length;
+
+typedef enum {
+	e1000_igp_cable_length_10 = 10,
+	e1000_igp_cable_length_20 = 20,
+	e1000_igp_cable_length_30 = 30,
+	e1000_igp_cable_length_40 = 40,
+	e1000_igp_cable_length_50 = 50,
+	e1000_igp_cable_length_60 = 60,
+	e1000_igp_cable_length_70 = 70,
+	e1000_igp_cable_length_80 = 80,
+	e1000_igp_cable_length_90 = 90,
+	e1000_igp_cable_length_100 = 100,
+	e1000_igp_cable_length_110 = 110,
+	e1000_igp_cable_length_115 = 115,
+	e1000_igp_cable_length_120 = 120,
+	e1000_igp_cable_length_130 = 130,
+	e1000_igp_cable_length_140 = 140,
+	e1000_igp_cable_length_150 = 150,
+	e1000_igp_cable_length_160 = 160,
+	e1000_igp_cable_length_170 = 170,
+	e1000_igp_cable_length_180 = 180
+} e1000_igp_cable_length;
+
+typedef enum {
+	e1000_10bt_ext_dist_enable_normal = 0,
+	e1000_10bt_ext_dist_enable_lower,
+	e1000_10bt_ext_dist_enable_undefined = 0xFF
+} e1000_10bt_ext_dist_enable;
+
+typedef enum {
+	e1000_rev_polarity_normal = 0,
+	e1000_rev_polarity_reversed,
+	e1000_rev_polarity_undefined = 0xFF
+} e1000_rev_polarity;
+
+typedef enum {
+	e1000_downshift_normal = 0,
+	e1000_downshift_activated,
+	e1000_downshift_undefined = 0xFF
+} e1000_downshift;
+
+typedef enum {
+	e1000_smart_speed_default = 0,
+	e1000_smart_speed_on,
+	e1000_smart_speed_off
+} e1000_smart_speed;
+
+typedef enum {
+	e1000_polarity_reversal_enabled = 0,
+	e1000_polarity_reversal_disabled,
+	e1000_polarity_reversal_undefined = 0xFF
+} e1000_polarity_reversal;
+
+typedef enum {
+	e1000_auto_x_mode_manual_mdi = 0,
+	e1000_auto_x_mode_manual_mdix,
+	e1000_auto_x_mode_auto1,
+	e1000_auto_x_mode_auto2,
+	e1000_auto_x_mode_undefined = 0xFF
+} e1000_auto_x_mode;
+
+typedef enum {
+	e1000_1000t_rx_status_not_ok = 0,
+	e1000_1000t_rx_status_ok,
+	e1000_1000t_rx_status_undefined = 0xFF
+} e1000_1000t_rx_status;
+
+typedef enum {
+	e1000_phy_m88 = 0,
+	e1000_phy_igp,
+	e1000_phy_8211,
+	e1000_phy_8201,
+	e1000_phy_undefined = 0xFF
+} e1000_phy_type;
+
+typedef enum {
+	e1000_ms_hw_default = 0,
+	e1000_ms_force_master,
+	e1000_ms_force_slave,
+	e1000_ms_auto
+} e1000_ms_type;
+
+typedef enum {
+	e1000_ffe_config_enabled = 0,
+	e1000_ffe_config_active,
+	e1000_ffe_config_blocked
+} e1000_ffe_config;
+
+typedef enum {
+	e1000_dsp_config_disabled = 0,
+	e1000_dsp_config_enabled,
+	e1000_dsp_config_activated,
+	e1000_dsp_config_undefined = 0xFF
+} e1000_dsp_config;
+
+struct e1000_phy_info {
+	e1000_cable_length cable_length;
+	e1000_10bt_ext_dist_enable extended_10bt_distance;
+	e1000_rev_polarity cable_polarity;
+	e1000_downshift downshift;
+	e1000_polarity_reversal polarity_correction;
+	e1000_auto_x_mode mdix_mode;
+	e1000_1000t_rx_status local_rx;
+	e1000_1000t_rx_status remote_rx;
+};
+
+struct e1000_phy_stats {
+	u32 idle_errors;
+	u32 receive_errors;
+};
+
+struct e1000_eeprom_info {
+	e1000_eeprom_type type;
+	u16 word_size;
+	u16 opcode_bits;
+	u16 address_bits;
+	u16 delay_usec;
+	u16 page_size;
+};
+
+/* Flex ASF Information */
+#define E1000_HOST_IF_MAX_SIZE  2048
+
+typedef enum {
+	e1000_byte_align = 0,
+	e1000_word_align = 1,
+	e1000_dword_align = 2
+} e1000_align_type;
+
+/* Error Codes */
+#define E1000_SUCCESS      0
+#define E1000_ERR_EEPROM   1
+#define E1000_ERR_PHY      2
+#define E1000_ERR_CONFIG   3
+#define E1000_ERR_PARAM    4
+#define E1000_ERR_MAC_TYPE 5
+#define E1000_ERR_PHY_TYPE 6
+#define E1000_ERR_RESET   9
+#define E1000_ERR_MASTER_REQUESTS_PENDING 10
+#define E1000_ERR_HOST_INTERFACE_COMMAND 11
+#define E1000_BLK_PHY_RESET   12
+
+#define E1000_BYTE_SWAP_WORD(_value) ((((_value) & 0x00ff) << 8) | \
+                                     (((_value) & 0xff00) >> 8))
+
+/* Function prototypes */
+/* Initialization */
+s32 e1000_reset_hw(struct e1000_hw *hw);
+s32 e1000_init_hw(struct e1000_hw *hw);
+s32 e1000_set_mac_type(struct e1000_hw *hw);
+void e1000_set_media_type(struct e1000_hw *hw);
+
+/* Link Configuration */
+s32 e1000_setup_link(struct e1000_hw *hw);
+s32 e1000_phy_setup_autoneg(struct e1000_hw *hw);
+void e1000_config_collision_dist(struct e1000_hw *hw);
+s32 e1000_check_for_link(struct e1000_hw *hw);
+s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 * speed, u16 * duplex);
+s32 e1000_force_mac_fc(struct e1000_hw *hw);
+
+/* PHY */
+s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 * phy_data);
+s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 reg_addr, u16 data);
+s32 e1000_phy_hw_reset(struct e1000_hw *hw);
+s32 e1000_phy_reset(struct e1000_hw *hw);
+s32 e1000_phy_get_info(struct e1000_hw *hw, struct e1000_phy_info *phy_info);
+s32 e1000_validate_mdi_setting(struct e1000_hw *hw);
+
+/* EEPROM Functions */
+s32 e1000_init_eeprom_params(struct e1000_hw *hw);
+
+/* MNG HOST IF functions */
+u32 e1000_enable_mng_pass_thru(struct e1000_hw *hw);
+
+#define E1000_MNG_DHCP_TX_PAYLOAD_CMD   64
+#define E1000_HI_MAX_MNG_DATA_LENGTH    0x6F8	/* Host Interface data length */
+
+#define E1000_MNG_DHCP_COMMAND_TIMEOUT  10	/* Time in ms to process MNG command */
+#define E1000_MNG_DHCP_COOKIE_OFFSET    0x6F0	/* Cookie offset */
+#define E1000_MNG_DHCP_COOKIE_LENGTH    0x10	/* Cookie length */
+#define E1000_MNG_IAMT_MODE             0x3
+#define E1000_MNG_ICH_IAMT_MODE         0x2
+#define E1000_IAMT_SIGNATURE            0x544D4149	/* Intel(R) Active Management Technology signature */
+
+#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING_SUPPORT 0x1	/* DHCP parsing enabled */
+#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT    0x2	/* DHCP parsing enabled */
+#define E1000_VFTA_ENTRY_SHIFT                       0x5
+#define E1000_VFTA_ENTRY_MASK                        0x7F
+#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK              0x1F
+
+struct e1000_host_mng_command_header {
+	u8 command_id;
+	u8 checksum;
+	u16 reserved1;
+	u16 reserved2;
+	u16 command_length;
+};
+
+struct e1000_host_mng_command_info {
+	struct e1000_host_mng_command_header command_header;	/* Command Head/Command Result Head has 4 bytes */
+	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];	/* Command data can length 0..0x658 */
+};
+#ifdef __BIG_ENDIAN
+struct e1000_host_mng_dhcp_cookie {
+	u32 signature;
+	u16 vlan_id;
+	u8 reserved0;
+	u8 status;
+	u32 reserved1;
+	u8 checksum;
+	u8 reserved3;
+	u16 reserved2;
+};
+#else
+struct e1000_host_mng_dhcp_cookie {
+	u32 signature;
+	u8 status;
+	u8 reserved0;
+	u16 vlan_id;
+	u32 reserved1;
+	u16 reserved2;
+	u8 reserved3;
+	u8 checksum;
+};
+#endif
+
+s32 e1000_read_eeprom(struct e1000_hw *hw, u16 reg, u16 words, u16 * data);
+s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw);
+s32 e1000_update_eeprom_checksum(struct e1000_hw *hw);
+s32 e1000_write_eeprom(struct e1000_hw *hw, u16 reg, u16 words, u16 * data);
+s32 e1000_read_mac_addr(struct e1000_hw *hw);
+
+/* Filters (multicast, vlan, receive) */
+u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 * mc_addr);
+void e1000_rar_set(struct e1000_hw *hw, u8 * mc_addr, u32 rar_index);
+void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value);
+
+/* LED functions */
+s32 e1000_setup_led(struct e1000_hw *hw);
+s32 e1000_cleanup_led(struct e1000_hw *hw);
+s32 e1000_led_on(struct e1000_hw *hw);
+s32 e1000_led_off(struct e1000_hw *hw);
+
+/* Adaptive IFS Functions */
+
+/* Everything else */
+void e1000_reset_adaptive(struct e1000_hw *hw);
+void e1000_update_adaptive(struct e1000_hw *hw);
+void e1000_get_bus_info(struct e1000_hw *hw);
+void e1000_pci_set_mwi(struct e1000_hw *hw);
+void e1000_pci_clear_mwi(struct e1000_hw *hw);
+void e1000_pcix_set_mmrbc(struct e1000_hw *hw, int mmrbc);
+int e1000_pcix_get_mmrbc(struct e1000_hw *hw);
+/* Port I/O is only supported on 82544 and newer */
+void e1000_io_write(struct e1000_hw *hw, unsigned long port, u32 value);
+
+#define E1000_READ_REG_IO(a, reg) \
+    e1000_read_reg_io((a), E1000_##reg)
+#define E1000_WRITE_REG_IO(a, reg, val) \
+    e1000_write_reg_io((a), E1000_##reg, val)
+
+/* PCI Device IDs */
+#define E1000_DEV_ID_82542               0x1000
+#define E1000_DEV_ID_82543GC_FIBER       0x1001
+#define E1000_DEV_ID_82543GC_COPPER      0x1004
+#define E1000_DEV_ID_82544EI_COPPER      0x1008
+#define E1000_DEV_ID_82544EI_FIBER       0x1009
+#define E1000_DEV_ID_82544GC_COPPER      0x100C
+#define E1000_DEV_ID_82544GC_LOM         0x100D
+#define E1000_DEV_ID_82540EM             0x100E
+#define E1000_DEV_ID_82540EM_LOM         0x1015
+#define E1000_DEV_ID_82540EP_LOM         0x1016
+#define E1000_DEV_ID_82540EP             0x1017
+#define E1000_DEV_ID_82540EP_LP          0x101E
+#define E1000_DEV_ID_82545EM_COPPER      0x100F
+#define E1000_DEV_ID_82545EM_FIBER       0x1011
+#define E1000_DEV_ID_82545GM_COPPER      0x1026
+#define E1000_DEV_ID_82545GM_FIBER       0x1027
+#define E1000_DEV_ID_82545GM_SERDES      0x1028
+#define E1000_DEV_ID_82546EB_COPPER      0x1010
+#define E1000_DEV_ID_82546EB_FIBER       0x1012
+#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D
+#define E1000_DEV_ID_82541EI             0x1013
+#define E1000_DEV_ID_82541EI_MOBILE      0x1018
+#define E1000_DEV_ID_82541ER_LOM         0x1014
+#define E1000_DEV_ID_82541ER             0x1078
+#define E1000_DEV_ID_82547GI             0x1075
+#define E1000_DEV_ID_82541GI             0x1076
+#define E1000_DEV_ID_82541GI_MOBILE      0x1077
+#define E1000_DEV_ID_82541GI_LF          0x107C
+#define E1000_DEV_ID_82546GB_COPPER      0x1079
+#define E1000_DEV_ID_82546GB_FIBER       0x107A
+#define E1000_DEV_ID_82546GB_SERDES      0x107B
+#define E1000_DEV_ID_82546GB_PCIE        0x108A
+#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099
+#define E1000_DEV_ID_82547EI             0x1019
+#define E1000_DEV_ID_82547EI_MOBILE      0x101A
+#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5
+#define E1000_DEV_ID_INTEL_CE4100_GBE    0x2E6E
+
+#define NODE_ADDRESS_SIZE 6
+
+/* MAC decode size is 128K - This is the size of BAR0 */
+#define MAC_DECODE_SIZE (128 * 1024)
+
+#define E1000_82542_2_0_REV_ID 2
+#define E1000_82542_2_1_REV_ID 3
+#define E1000_REVISION_0       0
+#define E1000_REVISION_1       1
+#define E1000_REVISION_2       2
+#define E1000_REVISION_3       3
+
+#define SPEED_10    10
+#define SPEED_100   100
+#define SPEED_1000  1000
+#define HALF_DUPLEX 1
+#define FULL_DUPLEX 2
+
+/* The sizes (in bytes) of a ethernet packet */
+#define ENET_HEADER_SIZE             14
+#define MINIMUM_ETHERNET_FRAME_SIZE  64	/* With FCS */
+#define ETHERNET_FCS_SIZE            4
+#define MINIMUM_ETHERNET_PACKET_SIZE \
+    (MINIMUM_ETHERNET_FRAME_SIZE - ETHERNET_FCS_SIZE)
+#define CRC_LENGTH                   ETHERNET_FCS_SIZE
+#define MAX_JUMBO_FRAME_SIZE         0x3F00
+
+/* 802.1q VLAN Packet Sizes */
+#define VLAN_TAG_SIZE  4	/* 802.3ac tag (not DMAed) */
+
+/* Ethertype field values */
+#define ETHERNET_IEEE_VLAN_TYPE 0x8100	/* 802.3ac packet */
+#define ETHERNET_IP_TYPE        0x0800	/* IP packets */
+#define ETHERNET_ARP_TYPE       0x0806	/* Address Resolution Protocol (ARP) */
+
+/* Packet Header defines */
+#define IP_PROTOCOL_TCP    6
+#define IP_PROTOCOL_UDP    0x11
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register.  Each bit is documented below:
+ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ *   o RXSEQ  = Receive Sequence Error
+ */
+#define POLL_IMS_ENABLE_MASK ( \
+    E1000_IMS_RXDMT0 |         \
+    E1000_IMS_RXSEQ)
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register.  Each bit is documented below:
+ *   o RXT0   = Receiver Timer Interrupt (ring 0)
+ *   o TXDW   = Transmit Descriptor Written Back
+ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ *   o RXSEQ  = Receive Sequence Error
+ *   o LSC    = Link Status Change
+ */
+#define IMS_ENABLE_MASK ( \
+    E1000_IMS_RXT0   |    \
+    E1000_IMS_TXDW   |    \
+    E1000_IMS_RXDMT0 |    \
+    E1000_IMS_RXSEQ  |    \
+    E1000_IMS_LSC)
+
+/* Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor. We
+ * reserve one of these spots for our directed address, allowing us room for
+ * E1000_RAR_ENTRIES - 1 multicast addresses.
+ */
+#define E1000_RAR_ENTRIES 15
+
+#define MIN_NUMBER_OF_DESCRIPTORS  8
+#define MAX_NUMBER_OF_DESCRIPTORS  0xFFF8
+
+/* Receive Descriptor */
+struct e1000_rx_desc {
+	__le64 buffer_addr;	/* Address of the descriptor's data buffer */
+	__le16 length;		/* Length of data DMAed into data buffer */
+	__le16 csum;		/* Packet checksum */
+	u8 status;		/* Descriptor status */
+	u8 errors;		/* Descriptor Errors */
+	__le16 special;
+};
+
+/* Receive Descriptor - Extended */
+union e1000_rx_desc_extended {
+	struct {
+		__le64 buffer_addr;
+		__le64 reserved;
+	} read;
+	struct {
+		struct {
+			__le32 mrq;	/* Multiple Rx Queues */
+			union {
+				__le32 rss;	/* RSS Hash */
+				struct {
+					__le16 ip_id;	/* IP id */
+					__le16 csum;	/* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error;	/* ext status/error */
+			__le16 length;
+			__le16 vlan;	/* VLAN tag */
+		} upper;
+	} wb;			/* writeback */
+};
+
+#define MAX_PS_BUFFERS 4
+/* Receive Descriptor - Packet Split */
+union e1000_rx_desc_packet_split {
+	struct {
+		/* one buffer for protocol header(s), three data buffers */
+		__le64 buffer_addr[MAX_PS_BUFFERS];
+	} read;
+	struct {
+		struct {
+			__le32 mrq;	/* Multiple Rx Queues */
+			union {
+				__le32 rss;	/* RSS Hash */
+				struct {
+					__le16 ip_id;	/* IP id */
+					__le16 csum;	/* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error;	/* ext status/error */
+			__le16 length0;	/* length of buffer 0 */
+			__le16 vlan;	/* VLAN tag */
+		} middle;
+		struct {
+			__le16 header_status;
+			__le16 length[3];	/* length of buffers 1-3 */
+		} upper;
+		__le64 reserved;
+	} wb;			/* writeback */
+};
+
+/* Receive Descriptor bit definitions */
+#define E1000_RXD_STAT_DD       0x01	/* Descriptor Done */
+#define E1000_RXD_STAT_EOP      0x02	/* End of Packet */
+#define E1000_RXD_STAT_IXSM     0x04	/* Ignore checksum */
+#define E1000_RXD_STAT_VP       0x08	/* IEEE VLAN Packet */
+#define E1000_RXD_STAT_UDPCS    0x10	/* UDP xsum calculated */
+#define E1000_RXD_STAT_TCPCS    0x20	/* TCP xsum calculated */
+#define E1000_RXD_STAT_IPCS     0x40	/* IP xsum calculated */
+#define E1000_RXD_STAT_PIF      0x80	/* passed in-exact filter */
+#define E1000_RXD_STAT_IPIDV    0x200	/* IP identification valid */
+#define E1000_RXD_STAT_UDPV     0x400	/* Valid UDP checksum */
+#define E1000_RXD_STAT_ACK      0x8000	/* ACK Packet indication */
+#define E1000_RXD_ERR_CE        0x01	/* CRC Error */
+#define E1000_RXD_ERR_SE        0x02	/* Symbol Error */
+#define E1000_RXD_ERR_SEQ       0x04	/* Sequence Error */
+#define E1000_RXD_ERR_CXE       0x10	/* Carrier Extension Error */
+#define E1000_RXD_ERR_TCPE      0x20	/* TCP/UDP Checksum Error */
+#define E1000_RXD_ERR_IPE       0x40	/* IP Checksum Error */
+#define E1000_RXD_ERR_RXE       0x80	/* Rx Data Error */
+#define E1000_RXD_SPC_VLAN_MASK 0x0FFF	/* VLAN ID is in lower 12 bits */
+#define E1000_RXD_SPC_PRI_MASK  0xE000	/* Priority is in upper 3 bits */
+#define E1000_RXD_SPC_PRI_SHIFT 13
+#define E1000_RXD_SPC_CFI_MASK  0x1000	/* CFI is bit 12 */
+#define E1000_RXD_SPC_CFI_SHIFT 12
+
+#define E1000_RXDEXT_STATERR_CE    0x01000000
+#define E1000_RXDEXT_STATERR_SE    0x02000000
+#define E1000_RXDEXT_STATERR_SEQ   0x04000000
+#define E1000_RXDEXT_STATERR_CXE   0x10000000
+#define E1000_RXDEXT_STATERR_TCPE  0x20000000
+#define E1000_RXDEXT_STATERR_IPE   0x40000000
+#define E1000_RXDEXT_STATERR_RXE   0x80000000
+
+#define E1000_RXDPS_HDRSTAT_HDRSP        0x00008000
+#define E1000_RXDPS_HDRSTAT_HDRLEN_MASK  0x000003FF
+
+/* mask to determine if packets should be dropped due to frame errors */
+#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
+    E1000_RXD_ERR_CE  |                \
+    E1000_RXD_ERR_SE  |                \
+    E1000_RXD_ERR_SEQ |                \
+    E1000_RXD_ERR_CXE |                \
+    E1000_RXD_ERR_RXE)
+
+/* Same mask, but for extended and packet split descriptors */
+#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
+    E1000_RXDEXT_STATERR_CE  |            \
+    E1000_RXDEXT_STATERR_SE  |            \
+    E1000_RXDEXT_STATERR_SEQ |            \
+    E1000_RXDEXT_STATERR_CXE |            \
+    E1000_RXDEXT_STATERR_RXE)
+
+/* Transmit Descriptor */
+struct e1000_tx_desc {
+	__le64 buffer_addr;	/* Address of the descriptor's data buffer */
+	union {
+		__le32 data;
+		struct {
+			__le16 length;	/* Data buffer length */
+			u8 cso;	/* Checksum offset */
+			u8 cmd;	/* Descriptor control */
+		} flags;
+	} lower;
+	union {
+		__le32 data;
+		struct {
+			u8 status;	/* Descriptor status */
+			u8 css;	/* Checksum start */
+			__le16 special;
+		} fields;
+	} upper;
+};
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_DTYP_D     0x00100000	/* Data Descriptor */
+#define E1000_TXD_DTYP_C     0x00000000	/* Context Descriptor */
+#define E1000_TXD_POPTS_IXSM 0x01	/* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM 0x02	/* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_EOP    0x01000000	/* End of Packet */
+#define E1000_TXD_CMD_IFCS   0x02000000	/* Insert FCS (Ethernet CRC) */
+#define E1000_TXD_CMD_IC     0x04000000	/* Insert Checksum */
+#define E1000_TXD_CMD_RS     0x08000000	/* Report Status */
+#define E1000_TXD_CMD_RPS    0x10000000	/* Report Packet Sent */
+#define E1000_TXD_CMD_DEXT   0x20000000	/* Descriptor extension (0 = legacy) */
+#define E1000_TXD_CMD_VLE    0x40000000	/* Add VLAN tag */
+#define E1000_TXD_CMD_IDE    0x80000000	/* Enable Tidv register */
+#define E1000_TXD_STAT_DD    0x00000001	/* Descriptor Done */
+#define E1000_TXD_STAT_EC    0x00000002	/* Excess Collisions */
+#define E1000_TXD_STAT_LC    0x00000004	/* Late Collisions */
+#define E1000_TXD_STAT_TU    0x00000008	/* Transmit underrun */
+#define E1000_TXD_CMD_TCP    0x01000000	/* TCP packet */
+#define E1000_TXD_CMD_IP     0x02000000	/* IP packet */
+#define E1000_TXD_CMD_TSE    0x04000000	/* TCP Seg enable */
+#define E1000_TXD_STAT_TC    0x00000004	/* Tx Underrun */
+
+/* Offload Context Descriptor */
+struct e1000_context_desc {
+	union {
+		__le32 ip_config;
+		struct {
+			u8 ipcss;	/* IP checksum start */
+			u8 ipcso;	/* IP checksum offset */
+			__le16 ipcse;	/* IP checksum end */
+		} ip_fields;
+	} lower_setup;
+	union {
+		__le32 tcp_config;
+		struct {
+			u8 tucss;	/* TCP checksum start */
+			u8 tucso;	/* TCP checksum offset */
+			__le16 tucse;	/* TCP checksum end */
+		} tcp_fields;
+	} upper_setup;
+	__le32 cmd_and_length;	/* */
+	union {
+		__le32 data;
+		struct {
+			u8 status;	/* Descriptor status */
+			u8 hdr_len;	/* Header length */
+			__le16 mss;	/* Maximum segment size */
+		} fields;
+	} tcp_seg_setup;
+};
+
+/* Offload data descriptor */
+struct e1000_data_desc {
+	__le64 buffer_addr;	/* Address of the descriptor's buffer address */
+	union {
+		__le32 data;
+		struct {
+			__le16 length;	/* Data buffer length */
+			u8 typ_len_ext;	/* */
+			u8 cmd;	/* */
+		} flags;
+	} lower;
+	union {
+		__le32 data;
+		struct {
+			u8 status;	/* Descriptor status */
+			u8 popts;	/* Packet Options */
+			__le16 special;	/* */
+		} fields;
+	} upper;
+};
+
+/* Filters */
+#define E1000_NUM_UNICAST          16	/* Unicast filter entries */
+#define E1000_MC_TBL_SIZE          128	/* Multicast Filter Table (4096 bits) */
+#define E1000_VLAN_FILTER_TBL_SIZE 128	/* VLAN Filter Table (4096 bits) */
+
+/* Receive Address Register */
+struct e1000_rar {
+	volatile __le32 low;	/* receive address low */
+	volatile __le32 high;	/* receive address high */
+};
+
+/* Number of entries in the Multicast Table Array (MTA). */
+#define E1000_NUM_MTA_REGISTERS 128
+
+/* IPv4 Address Table Entry */
+struct e1000_ipv4_at_entry {
+	volatile u32 ipv4_addr;	/* IP Address (RW) */
+	volatile u32 reserved;
+};
+
+/* Four wakeup IP addresses are supported */
+#define E1000_WAKEUP_IP_ADDRESS_COUNT_MAX 4
+#define E1000_IP4AT_SIZE                  E1000_WAKEUP_IP_ADDRESS_COUNT_MAX
+#define E1000_IP6AT_SIZE                  1
+
+/* IPv6 Address Table Entry */
+struct e1000_ipv6_at_entry {
+	volatile u8 ipv6_addr[16];
+};
+
+/* Flexible Filter Length Table Entry */
+struct e1000_fflt_entry {
+	volatile u32 length;	/* Flexible Filter Length (RW) */
+	volatile u32 reserved;
+};
+
+/* Flexible Filter Mask Table Entry */
+struct e1000_ffmt_entry {
+	volatile u32 mask;	/* Flexible Filter Mask (RW) */
+	volatile u32 reserved;
+};
+
+/* Flexible Filter Value Table Entry */
+struct e1000_ffvt_entry {
+	volatile u32 value;	/* Flexible Filter Value (RW) */
+	volatile u32 reserved;
+};
+
+/* Four Flexible Filters are supported */
+#define E1000_FLEXIBLE_FILTER_COUNT_MAX 4
+
+/* Each Flexible Filter is at most 128 (0x80) bytes in length */
+#define E1000_FLEXIBLE_FILTER_SIZE_MAX  128
+
+#define E1000_FFLT_SIZE E1000_FLEXIBLE_FILTER_COUNT_MAX
+#define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
+#define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
+
+#define E1000_DISABLE_SERDES_LOOPBACK   0x0400
+
+/* Register Set. (82543, 82544)
+ *
+ * Registers are defined to be 32 bits and  should be accessed as 32 bit values.
+ * These registers are physically located on the NIC, but are mapped into the
+ * host memory address space.
+ *
+ * RW - register is both readable and writable
+ * RO - register is read only
+ * WO - register is write only
+ * R/clr - register is read only and is cleared when read
+ * A - register array
+ */
+#define E1000_CTRL     0x00000	/* Device Control - RW */
+#define E1000_CTRL_DUP 0x00004	/* Device Control Duplicate (Shadow) - RW */
+#define E1000_STATUS   0x00008	/* Device Status - RO */
+#define E1000_EECD     0x00010	/* EEPROM/Flash Control - RW */
+#define E1000_EERD     0x00014	/* EEPROM Read - RW */
+#define E1000_CTRL_EXT 0x00018	/* Extended Device Control - RW */
+#define E1000_FLA      0x0001C	/* Flash Access - RW */
+#define E1000_MDIC     0x00020	/* MDI Control - RW */
+
+#define INTEL_CE_GBE_MDIO_RCOMP_BASE    (hw->ce4100_gbe_mdio_base_virt)
+#define E1000_MDIO_STS  (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0)
+#define E1000_MDIO_CMD  (INTEL_CE_GBE_MDIO_RCOMP_BASE + 4)
+#define E1000_MDIO_DRV  (INTEL_CE_GBE_MDIO_RCOMP_BASE + 8)
+#define E1000_MDC_CMD   (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0xC)
+#define E1000_RCOMP_CTL (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0x20)
+#define E1000_RCOMP_STS (INTEL_CE_GBE_MDIO_RCOMP_BASE + 0x24)
+
+#define E1000_SCTL     0x00024	/* SerDes Control - RW */
+#define E1000_FEXTNVM  0x00028	/* Future Extended NVM register */
+#define E1000_FCAL     0x00028	/* Flow Control Address Low - RW */
+#define E1000_FCAH     0x0002C	/* Flow Control Address High -RW */
+#define E1000_FCT      0x00030	/* Flow Control Type - RW */
+#define E1000_VET      0x00038	/* VLAN Ether Type - RW */
+#define E1000_ICR      0x000C0	/* Interrupt Cause Read - R/clr */
+#define E1000_ITR      0x000C4	/* Interrupt Throttling Rate - RW */
+#define E1000_ICS      0x000C8	/* Interrupt Cause Set - WO */
+#define E1000_IMS      0x000D0	/* Interrupt Mask Set - RW */
+#define E1000_IMC      0x000D8	/* Interrupt Mask Clear - WO */
+#define E1000_IAM      0x000E0	/* Interrupt Acknowledge Auto Mask */
+
+/* Auxiliary Control Register. This register is CE4100 specific,
+ * RMII/RGMII function is switched by this register - RW
+ * Following are bits definitions of the Auxiliary Control Register
+ */
+#define E1000_CTL_AUX  0x000E0
+#define E1000_CTL_AUX_END_SEL_SHIFT     10
+#define E1000_CTL_AUX_ENDIANESS_SHIFT   8
+#define E1000_CTL_AUX_RGMII_RMII_SHIFT  0
+
+/* descriptor and packet transfer use CTL_AUX.ENDIANESS */
+#define E1000_CTL_AUX_DES_PKT   (0x0 << E1000_CTL_AUX_END_SEL_SHIFT)
+/* descriptor use CTL_AUX.ENDIANESS, packet use default */
+#define E1000_CTL_AUX_DES       (0x1 << E1000_CTL_AUX_END_SEL_SHIFT)
+/* descriptor use default, packet use CTL_AUX.ENDIANESS */
+#define E1000_CTL_AUX_PKT       (0x2 << E1000_CTL_AUX_END_SEL_SHIFT)
+/* all use CTL_AUX.ENDIANESS */
+#define E1000_CTL_AUX_ALL       (0x3 << E1000_CTL_AUX_END_SEL_SHIFT)
+
+#define E1000_CTL_AUX_RGMII     (0x0 << E1000_CTL_AUX_RGMII_RMII_SHIFT)
+#define E1000_CTL_AUX_RMII      (0x1 << E1000_CTL_AUX_RGMII_RMII_SHIFT)
+
+/* LW little endian, Byte big endian */
+#define E1000_CTL_AUX_LWLE_BBE  (0x0 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+#define E1000_CTL_AUX_LWLE_BLE  (0x1 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+#define E1000_CTL_AUX_LWBE_BBE  (0x2 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+#define E1000_CTL_AUX_LWBE_BLE  (0x3 << E1000_CTL_AUX_ENDIANESS_SHIFT)
+
+#define E1000_RCTL     0x00100	/* RX Control - RW */
+#define E1000_RDTR1    0x02820	/* RX Delay Timer (1) - RW */
+#define E1000_RDBAL1   0x02900	/* RX Descriptor Base Address Low (1) - RW */
+#define E1000_RDBAH1   0x02904	/* RX Descriptor Base Address High (1) - RW */
+#define E1000_RDLEN1   0x02908	/* RX Descriptor Length (1) - RW */
+#define E1000_RDH1     0x02910	/* RX Descriptor Head (1) - RW */
+#define E1000_RDT1     0x02918	/* RX Descriptor Tail (1) - RW */
+#define E1000_FCTTV    0x00170	/* Flow Control Transmit Timer Value - RW */
+#define E1000_TXCW     0x00178	/* TX Configuration Word - RW */
+#define E1000_RXCW     0x00180	/* RX Configuration Word - RO */
+#define E1000_TCTL     0x00400	/* TX Control - RW */
+#define E1000_TCTL_EXT 0x00404	/* Extended TX Control - RW */
+#define E1000_TIPG     0x00410	/* TX Inter-packet gap -RW */
+#define E1000_TBT      0x00448	/* TX Burst Timer - RW */
+#define E1000_AIT      0x00458	/* Adaptive Interframe Spacing Throttle - RW */
+#define E1000_LEDCTL   0x00E00	/* LED Control - RW */
+#define E1000_EXTCNF_CTRL  0x00F00	/* Extended Configuration Control */
+#define E1000_EXTCNF_SIZE  0x00F08	/* Extended Configuration Size */
+#define E1000_PHY_CTRL     0x00F10	/* PHY Control Register in CSR */
+#define FEXTNVM_SW_CONFIG  0x0001
+#define E1000_PBA      0x01000	/* Packet Buffer Allocation - RW */
+#define E1000_PBS      0x01008	/* Packet Buffer Size */
+#define E1000_EEMNGCTL 0x01010	/* MNG EEprom Control */
+#define E1000_FLASH_UPDATES 1000
+#define E1000_EEARBC   0x01024	/* EEPROM Auto Read Bus Control */
+#define E1000_FLASHT   0x01028	/* FLASH Timer Register */
+#define E1000_EEWR     0x0102C	/* EEPROM Write Register - RW */
+#define E1000_FLSWCTL  0x01030	/* FLASH control register */
+#define E1000_FLSWDATA 0x01034	/* FLASH data register */
+#define E1000_FLSWCNT  0x01038	/* FLASH Access Counter */
+#define E1000_FLOP     0x0103C	/* FLASH Opcode Register */
+#define E1000_ERT      0x02008	/* Early Rx Threshold - RW */
+#define E1000_FCRTL    0x02160	/* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTH    0x02168	/* Flow Control Receive Threshold High - RW */
+#define E1000_PSRCTL   0x02170	/* Packet Split Receive Control - RW */
+#define E1000_RDFH     0x02410  /* RX Data FIFO Head - RW */
+#define E1000_RDFT     0x02418  /* RX Data FIFO Tail - RW */
+#define E1000_RDFHS    0x02420  /* RX Data FIFO Head Saved - RW */
+#define E1000_RDFTS    0x02428  /* RX Data FIFO Tail Saved - RW */
+#define E1000_RDFPC    0x02430  /* RX Data FIFO Packet Count - RW */
+#define E1000_RDBAL    0x02800	/* RX Descriptor Base Address Low - RW */
+#define E1000_RDBAH    0x02804	/* RX Descriptor Base Address High - RW */
+#define E1000_RDLEN    0x02808	/* RX Descriptor Length - RW */
+#define E1000_RDH      0x02810	/* RX Descriptor Head - RW */
+#define E1000_RDT      0x02818	/* RX Descriptor Tail - RW */
+#define E1000_RDTR     0x02820	/* RX Delay Timer - RW */
+#define E1000_RDBAL0   E1000_RDBAL	/* RX Desc Base Address Low (0) - RW */
+#define E1000_RDBAH0   E1000_RDBAH	/* RX Desc Base Address High (0) - RW */
+#define E1000_RDLEN0   E1000_RDLEN	/* RX Desc Length (0) - RW */
+#define E1000_RDH0     E1000_RDH	/* RX Desc Head (0) - RW */
+#define E1000_RDT0     E1000_RDT	/* RX Desc Tail (0) - RW */
+#define E1000_RDTR0    E1000_RDTR	/* RX Delay Timer (0) - RW */
+#define E1000_RXDCTL   0x02828	/* RX Descriptor Control queue 0 - RW */
+#define E1000_RXDCTL1  0x02928	/* RX Descriptor Control queue 1 - RW */
+#define E1000_RADV     0x0282C	/* RX Interrupt Absolute Delay Timer - RW */
+#define E1000_RSRPD    0x02C00	/* RX Small Packet Detect - RW */
+#define E1000_RAID     0x02C08	/* Receive Ack Interrupt Delay - RW */
+#define E1000_TXDMAC   0x03000	/* TX DMA Control - RW */
+#define E1000_KABGTXD  0x03004	/* AFE Band Gap Transmit Ref Data */
+#define E1000_TDFH     0x03410	/* TX Data FIFO Head - RW */
+#define E1000_TDFT     0x03418	/* TX Data FIFO Tail - RW */
+#define E1000_TDFHS    0x03420	/* TX Data FIFO Head Saved - RW */
+#define E1000_TDFTS    0x03428	/* TX Data FIFO Tail Saved - RW */
+#define E1000_TDFPC    0x03430	/* TX Data FIFO Packet Count - RW */
+#define E1000_TDBAL    0x03800	/* TX Descriptor Base Address Low - RW */
+#define E1000_TDBAH    0x03804	/* TX Descriptor Base Address High - RW */
+#define E1000_TDLEN    0x03808	/* TX Descriptor Length - RW */
+#define E1000_TDH      0x03810	/* TX Descriptor Head - RW */
+#define E1000_TDT      0x03818	/* TX Descripotr Tail - RW */
+#define E1000_TIDV     0x03820	/* TX Interrupt Delay Value - RW */
+#define E1000_TXDCTL   0x03828	/* TX Descriptor Control - RW */
+#define E1000_TADV     0x0382C	/* TX Interrupt Absolute Delay Val - RW */
+#define E1000_TSPMT    0x03830	/* TCP Segmentation PAD & Min Threshold - RW */
+#define E1000_TARC0    0x03840	/* TX Arbitration Count (0) */
+#define E1000_TDBAL1   0x03900	/* TX Desc Base Address Low (1) - RW */
+#define E1000_TDBAH1   0x03904	/* TX Desc Base Address High (1) - RW */
+#define E1000_TDLEN1   0x03908	/* TX Desc Length (1) - RW */
+#define E1000_TDH1     0x03910	/* TX Desc Head (1) - RW */
+#define E1000_TDT1     0x03918	/* TX Desc Tail (1) - RW */
+#define E1000_TXDCTL1  0x03928	/* TX Descriptor Control (1) - RW */
+#define E1000_TARC1    0x03940	/* TX Arbitration Count (1) */
+#define E1000_CRCERRS  0x04000	/* CRC Error Count - R/clr */
+#define E1000_ALGNERRC 0x04004	/* Alignment Error Count - R/clr */
+#define E1000_SYMERRS  0x04008	/* Symbol Error Count - R/clr */
+#define E1000_RXERRC   0x0400C	/* Receive Error Count - R/clr */
+#define E1000_MPC      0x04010	/* Missed Packet Count - R/clr */
+#define E1000_SCC      0x04014	/* Single Collision Count - R/clr */
+#define E1000_ECOL     0x04018	/* Excessive Collision Count - R/clr */
+#define E1000_MCC      0x0401C	/* Multiple Collision Count - R/clr */
+#define E1000_LATECOL  0x04020	/* Late Collision Count - R/clr */
+#define E1000_COLC     0x04028	/* Collision Count - R/clr */
+#define E1000_DC       0x04030	/* Defer Count - R/clr */
+#define E1000_TNCRS    0x04034	/* TX-No CRS - R/clr */
+#define E1000_SEC      0x04038	/* Sequence Error Count - R/clr */
+#define E1000_CEXTERR  0x0403C	/* Carrier Extension Error Count - R/clr */
+#define E1000_RLEC     0x04040	/* Receive Length Error Count - R/clr */
+#define E1000_XONRXC   0x04048	/* XON RX Count - R/clr */
+#define E1000_XONTXC   0x0404C	/* XON TX Count - R/clr */
+#define E1000_XOFFRXC  0x04050	/* XOFF RX Count - R/clr */
+#define E1000_XOFFTXC  0x04054	/* XOFF TX Count - R/clr */
+#define E1000_FCRUC    0x04058	/* Flow Control RX Unsupported Count- R/clr */
+#define E1000_PRC64    0x0405C	/* Packets RX (64 bytes) - R/clr */
+#define E1000_PRC127   0x04060	/* Packets RX (65-127 bytes) - R/clr */
+#define E1000_PRC255   0x04064	/* Packets RX (128-255 bytes) - R/clr */
+#define E1000_PRC511   0x04068	/* Packets RX (255-511 bytes) - R/clr */
+#define E1000_PRC1023  0x0406C	/* Packets RX (512-1023 bytes) - R/clr */
+#define E1000_PRC1522  0x04070	/* Packets RX (1024-1522 bytes) - R/clr */
+#define E1000_GPRC     0x04074	/* Good Packets RX Count - R/clr */
+#define E1000_BPRC     0x04078	/* Broadcast Packets RX Count - R/clr */
+#define E1000_MPRC     0x0407C	/* Multicast Packets RX Count - R/clr */
+#define E1000_GPTC     0x04080	/* Good Packets TX Count - R/clr */
+#define E1000_GORCL    0x04088	/* Good Octets RX Count Low - R/clr */
+#define E1000_GORCH    0x0408C	/* Good Octets RX Count High - R/clr */
+#define E1000_GOTCL    0x04090	/* Good Octets TX Count Low - R/clr */
+#define E1000_GOTCH    0x04094	/* Good Octets TX Count High - R/clr */
+#define E1000_RNBC     0x040A0	/* RX No Buffers Count - R/clr */
+#define E1000_RUC      0x040A4	/* RX Undersize Count - R/clr */
+#define E1000_RFC      0x040A8	/* RX Fragment Count - R/clr */
+#define E1000_ROC      0x040AC	/* RX Oversize Count - R/clr */
+#define E1000_RJC      0x040B0	/* RX Jabber Count - R/clr */
+#define E1000_MGTPRC   0x040B4	/* Management Packets RX Count - R/clr */
+#define E1000_MGTPDC   0x040B8	/* Management Packets Dropped Count - R/clr */
+#define E1000_MGTPTC   0x040BC	/* Management Packets TX Count - R/clr */
+#define E1000_TORL     0x040C0	/* Total Octets RX Low - R/clr */
+#define E1000_TORH     0x040C4	/* Total Octets RX High - R/clr */
+#define E1000_TOTL     0x040C8	/* Total Octets TX Low - R/clr */
+#define E1000_TOTH     0x040CC	/* Total Octets TX High - R/clr */
+#define E1000_TPR      0x040D0	/* Total Packets RX - R/clr */
+#define E1000_TPT      0x040D4	/* Total Packets TX - R/clr */
+#define E1000_PTC64    0x040D8	/* Packets TX (64 bytes) - R/clr */
+#define E1000_PTC127   0x040DC	/* Packets TX (65-127 bytes) - R/clr */
+#define E1000_PTC255   0x040E0	/* Packets TX (128-255 bytes) - R/clr */
+#define E1000_PTC511   0x040E4	/* Packets TX (256-511 bytes) - R/clr */
+#define E1000_PTC1023  0x040E8	/* Packets TX (512-1023 bytes) - R/clr */
+#define E1000_PTC1522  0x040EC	/* Packets TX (1024-1522 Bytes) - R/clr */
+#define E1000_MPTC     0x040F0	/* Multicast Packets TX Count - R/clr */
+#define E1000_BPTC     0x040F4	/* Broadcast Packets TX Count - R/clr */
+#define E1000_TSCTC    0x040F8	/* TCP Segmentation Context TX - R/clr */
+#define E1000_TSCTFC   0x040FC	/* TCP Segmentation Context TX Fail - R/clr */
+#define E1000_IAC      0x04100	/* Interrupt Assertion Count */
+#define E1000_ICRXPTC  0x04104	/* Interrupt Cause Rx Packet Timer Expire Count */
+#define E1000_ICRXATC  0x04108	/* Interrupt Cause Rx Absolute Timer Expire Count */
+#define E1000_ICTXPTC  0x0410C	/* Interrupt Cause Tx Packet Timer Expire Count */
+#define E1000_ICTXATC  0x04110	/* Interrupt Cause Tx Absolute Timer Expire Count */
+#define E1000_ICTXQEC  0x04118	/* Interrupt Cause Tx Queue Empty Count */
+#define E1000_ICTXQMTC 0x0411C	/* Interrupt Cause Tx Queue Minimum Threshold Count */
+#define E1000_ICRXDMTC 0x04120	/* Interrupt Cause Rx Descriptor Minimum Threshold Count */
+#define E1000_ICRXOC   0x04124	/* Interrupt Cause Receiver Overrun Count */
+#define E1000_RXCSUM   0x05000	/* RX Checksum Control - RW */
+#define E1000_RFCTL    0x05008	/* Receive Filter Control */
+#define E1000_MTA      0x05200	/* Multicast Table Array - RW Array */
+#define E1000_RA       0x05400	/* Receive Address - RW Array */
+#define E1000_VFTA     0x05600	/* VLAN Filter Table Array - RW Array */
+#define E1000_WUC      0x05800	/* Wakeup Control - RW */
+#define E1000_WUFC     0x05808	/* Wakeup Filter Control - RW */
+#define E1000_WUS      0x05810	/* Wakeup Status - RO */
+#define E1000_MANC     0x05820	/* Management Control - RW */
+#define E1000_IPAV     0x05838	/* IP Address Valid - RW */
+#define E1000_IP4AT    0x05840	/* IPv4 Address Table - RW Array */
+#define E1000_IP6AT    0x05880	/* IPv6 Address Table - RW Array */
+#define E1000_WUPL     0x05900	/* Wakeup Packet Length - RW */
+#define E1000_WUPM     0x05A00	/* Wakeup Packet Memory - RO A */
+#define E1000_FFLT     0x05F00	/* Flexible Filter Length Table - RW Array */
+#define E1000_HOST_IF  0x08800	/* Host Interface */
+#define E1000_FFMT     0x09000	/* Flexible Filter Mask Table - RW Array */
+#define E1000_FFVT     0x09800	/* Flexible Filter Value Table - RW Array */
+
+#define E1000_KUMCTRLSTA 0x00034	/* MAC-PHY interface - RW */
+#define E1000_MDPHYA     0x0003C	/* PHY address - RW */
+#define E1000_MANC2H     0x05860	/* Management Control To Host - RW */
+#define E1000_SW_FW_SYNC 0x05B5C	/* Software-Firmware Synchronization - RW */
+
+#define E1000_GCR       0x05B00	/* PCI-Ex Control */
+#define E1000_GSCL_1    0x05B10	/* PCI-Ex Statistic Control #1 */
+#define E1000_GSCL_2    0x05B14	/* PCI-Ex Statistic Control #2 */
+#define E1000_GSCL_3    0x05B18	/* PCI-Ex Statistic Control #3 */
+#define E1000_GSCL_4    0x05B1C	/* PCI-Ex Statistic Control #4 */
+#define E1000_FACTPS    0x05B30	/* Function Active and Power State to MNG */
+#define E1000_SWSM      0x05B50	/* SW Semaphore */
+#define E1000_FWSM      0x05B54	/* FW Semaphore */
+#define E1000_FFLT_DBG  0x05F04	/* Debug Register */
+#define E1000_HICR      0x08F00	/* Host Interface Control */
+
+/* RSS registers */
+#define E1000_CPUVEC    0x02C10	/* CPU Vector Register - RW */
+#define E1000_MRQC      0x05818	/* Multiple Receive Control - RW */
+#define E1000_RETA      0x05C00	/* Redirection Table - RW Array */
+#define E1000_RSSRK     0x05C80	/* RSS Random Key - RW Array */
+#define E1000_RSSIM     0x05864	/* RSS Interrupt Mask */
+#define E1000_RSSIR     0x05868	/* RSS Interrupt Request */
+/* Register Set (82542)
+ *
+ * Some of the 82542 registers are located at different offsets than they are
+ * in more current versions of the 8254x. Despite the difference in location,
+ * the registers function in the same manner.
+ */
+#define E1000_82542_CTL_AUX  E1000_CTL_AUX
+#define E1000_82542_CTRL     E1000_CTRL
+#define E1000_82542_CTRL_DUP E1000_CTRL_DUP
+#define E1000_82542_STATUS   E1000_STATUS
+#define E1000_82542_EECD     E1000_EECD
+#define E1000_82542_EERD     E1000_EERD
+#define E1000_82542_CTRL_EXT E1000_CTRL_EXT
+#define E1000_82542_FLA      E1000_FLA
+#define E1000_82542_MDIC     E1000_MDIC
+#define E1000_82542_SCTL     E1000_SCTL
+#define E1000_82542_FEXTNVM  E1000_FEXTNVM
+#define E1000_82542_FCAL     E1000_FCAL
+#define E1000_82542_FCAH     E1000_FCAH
+#define E1000_82542_FCT      E1000_FCT
+#define E1000_82542_VET      E1000_VET
+#define E1000_82542_RA       0x00040
+#define E1000_82542_ICR      E1000_ICR
+#define E1000_82542_ITR      E1000_ITR
+#define E1000_82542_ICS      E1000_ICS
+#define E1000_82542_IMS      E1000_IMS
+#define E1000_82542_IMC      E1000_IMC
+#define E1000_82542_RCTL     E1000_RCTL
+#define E1000_82542_RDTR     0x00108
+#define E1000_82542_RDFH     E1000_RDFH
+#define E1000_82542_RDFT     E1000_RDFT
+#define E1000_82542_RDFHS    E1000_RDFHS
+#define E1000_82542_RDFTS    E1000_RDFTS
+#define E1000_82542_RDFPC    E1000_RDFPC
+#define E1000_82542_RDBAL    0x00110
+#define E1000_82542_RDBAH    0x00114
+#define E1000_82542_RDLEN    0x00118
+#define E1000_82542_RDH      0x00120
+#define E1000_82542_RDT      0x00128
+#define E1000_82542_RDTR0    E1000_82542_RDTR
+#define E1000_82542_RDBAL0   E1000_82542_RDBAL
+#define E1000_82542_RDBAH0   E1000_82542_RDBAH
+#define E1000_82542_RDLEN0   E1000_82542_RDLEN
+#define E1000_82542_RDH0     E1000_82542_RDH
+#define E1000_82542_RDT0     E1000_82542_RDT
+#define E1000_82542_SRRCTL(_n) (0x280C + ((_n) << 8))	/* Split and Replication
+							 * RX Control - RW */
+#define E1000_82542_DCA_RXCTRL(_n) (0x02814 + ((_n) << 8))
+#define E1000_82542_RDBAH3   0x02B04	/* RX Desc Base High Queue 3 - RW */
+#define E1000_82542_RDBAL3   0x02B00	/* RX Desc Low Queue 3 - RW */
+#define E1000_82542_RDLEN3   0x02B08	/* RX Desc Length Queue 3 - RW */
+#define E1000_82542_RDH3     0x02B10	/* RX Desc Head Queue 3 - RW */
+#define E1000_82542_RDT3     0x02B18	/* RX Desc Tail Queue 3 - RW */
+#define E1000_82542_RDBAL2   0x02A00	/* RX Desc Base Low Queue 2 - RW */
+#define E1000_82542_RDBAH2   0x02A04	/* RX Desc Base High Queue 2 - RW */
+#define E1000_82542_RDLEN2   0x02A08	/* RX Desc Length Queue 2 - RW */
+#define E1000_82542_RDH2     0x02A10	/* RX Desc Head Queue 2 - RW */
+#define E1000_82542_RDT2     0x02A18	/* RX Desc Tail Queue 2 - RW */
+#define E1000_82542_RDTR1    0x00130
+#define E1000_82542_RDBAL1   0x00138
+#define E1000_82542_RDBAH1   0x0013C
+#define E1000_82542_RDLEN1   0x00140
+#define E1000_82542_RDH1     0x00148
+#define E1000_82542_RDT1     0x00150
+#define E1000_82542_FCRTH    0x00160
+#define E1000_82542_FCRTL    0x00168
+#define E1000_82542_FCTTV    E1000_FCTTV
+#define E1000_82542_TXCW     E1000_TXCW
+#define E1000_82542_RXCW     E1000_RXCW
+#define E1000_82542_MTA      0x00200
+#define E1000_82542_TCTL     E1000_TCTL
+#define E1000_82542_TCTL_EXT E1000_TCTL_EXT
+#define E1000_82542_TIPG     E1000_TIPG
+#define E1000_82542_TDBAL    0x00420
+#define E1000_82542_TDBAH    0x00424
+#define E1000_82542_TDLEN    0x00428
+#define E1000_82542_TDH      0x00430
+#define E1000_82542_TDT      0x00438
+#define E1000_82542_TIDV     0x00440
+#define E1000_82542_TBT      E1000_TBT
+#define E1000_82542_AIT      E1000_AIT
+#define E1000_82542_VFTA     0x00600
+#define E1000_82542_LEDCTL   E1000_LEDCTL
+#define E1000_82542_PBA      E1000_PBA
+#define E1000_82542_PBS      E1000_PBS
+#define E1000_82542_EEMNGCTL E1000_EEMNGCTL
+#define E1000_82542_EEARBC   E1000_EEARBC
+#define E1000_82542_FLASHT   E1000_FLASHT
+#define E1000_82542_EEWR     E1000_EEWR
+#define E1000_82542_FLSWCTL  E1000_FLSWCTL
+#define E1000_82542_FLSWDATA E1000_FLSWDATA
+#define E1000_82542_FLSWCNT  E1000_FLSWCNT
+#define E1000_82542_FLOP     E1000_FLOP
+#define E1000_82542_EXTCNF_CTRL  E1000_EXTCNF_CTRL
+#define E1000_82542_EXTCNF_SIZE  E1000_EXTCNF_SIZE
+#define E1000_82542_PHY_CTRL E1000_PHY_CTRL
+#define E1000_82542_ERT      E1000_ERT
+#define E1000_82542_RXDCTL   E1000_RXDCTL
+#define E1000_82542_RXDCTL1  E1000_RXDCTL1
+#define E1000_82542_RADV     E1000_RADV
+#define E1000_82542_RSRPD    E1000_RSRPD
+#define E1000_82542_TXDMAC   E1000_TXDMAC
+#define E1000_82542_KABGTXD  E1000_KABGTXD
+#define E1000_82542_TDFHS    E1000_TDFHS
+#define E1000_82542_TDFTS    E1000_TDFTS
+#define E1000_82542_TDFPC    E1000_TDFPC
+#define E1000_82542_TXDCTL   E1000_TXDCTL
+#define E1000_82542_TADV     E1000_TADV
+#define E1000_82542_TSPMT    E1000_TSPMT
+#define E1000_82542_CRCERRS  E1000_CRCERRS
+#define E1000_82542_ALGNERRC E1000_ALGNERRC
+#define E1000_82542_SYMERRS  E1000_SYMERRS
+#define E1000_82542_RXERRC   E1000_RXERRC
+#define E1000_82542_MPC      E1000_MPC
+#define E1000_82542_SCC      E1000_SCC
+#define E1000_82542_ECOL     E1000_ECOL
+#define E1000_82542_MCC      E1000_MCC
+#define E1000_82542_LATECOL  E1000_LATECOL
+#define E1000_82542_COLC     E1000_COLC
+#define E1000_82542_DC       E1000_DC
+#define E1000_82542_TNCRS    E1000_TNCRS
+#define E1000_82542_SEC      E1000_SEC
+#define E1000_82542_CEXTERR  E1000_CEXTERR
+#define E1000_82542_RLEC     E1000_RLEC
+#define E1000_82542_XONRXC   E1000_XONRXC
+#define E1000_82542_XONTXC   E1000_XONTXC
+#define E1000_82542_XOFFRXC  E1000_XOFFRXC
+#define E1000_82542_XOFFTXC  E1000_XOFFTXC
+#define E1000_82542_FCRUC    E1000_FCRUC
+#define E1000_82542_PRC64    E1000_PRC64
+#define E1000_82542_PRC127   E1000_PRC127
+#define E1000_82542_PRC255   E1000_PRC255
+#define E1000_82542_PRC511   E1000_PRC511
+#define E1000_82542_PRC1023  E1000_PRC1023
+#define E1000_82542_PRC1522  E1000_PRC1522
+#define E1000_82542_GPRC     E1000_GPRC
+#define E1000_82542_BPRC     E1000_BPRC
+#define E1000_82542_MPRC     E1000_MPRC
+#define E1000_82542_GPTC     E1000_GPTC
+#define E1000_82542_GORCL    E1000_GORCL
+#define E1000_82542_GORCH    E1000_GORCH
+#define E1000_82542_GOTCL    E1000_GOTCL
+#define E1000_82542_GOTCH    E1000_GOTCH
+#define E1000_82542_RNBC     E1000_RNBC
+#define E1000_82542_RUC      E1000_RUC
+#define E1000_82542_RFC      E1000_RFC
+#define E1000_82542_ROC      E1000_ROC
+#define E1000_82542_RJC      E1000_RJC
+#define E1000_82542_MGTPRC   E1000_MGTPRC
+#define E1000_82542_MGTPDC   E1000_MGTPDC
+#define E1000_82542_MGTPTC   E1000_MGTPTC
+#define E1000_82542_TORL     E1000_TORL
+#define E1000_82542_TORH     E1000_TORH
+#define E1000_82542_TOTL     E1000_TOTL
+#define E1000_82542_TOTH     E1000_TOTH
+#define E1000_82542_TPR      E1000_TPR
+#define E1000_82542_TPT      E1000_TPT
+#define E1000_82542_PTC64    E1000_PTC64
+#define E1000_82542_PTC127   E1000_PTC127
+#define E1000_82542_PTC255   E1000_PTC255
+#define E1000_82542_PTC511   E1000_PTC511
+#define E1000_82542_PTC1023  E1000_PTC1023
+#define E1000_82542_PTC1522  E1000_PTC1522
+#define E1000_82542_MPTC     E1000_MPTC
+#define E1000_82542_BPTC     E1000_BPTC
+#define E1000_82542_TSCTC    E1000_TSCTC
+#define E1000_82542_TSCTFC   E1000_TSCTFC
+#define E1000_82542_RXCSUM   E1000_RXCSUM
+#define E1000_82542_WUC      E1000_WUC
+#define E1000_82542_WUFC     E1000_WUFC
+#define E1000_82542_WUS      E1000_WUS
+#define E1000_82542_MANC     E1000_MANC
+#define E1000_82542_IPAV     E1000_IPAV
+#define E1000_82542_IP4AT    E1000_IP4AT
+#define E1000_82542_IP6AT    E1000_IP6AT
+#define E1000_82542_WUPL     E1000_WUPL
+#define E1000_82542_WUPM     E1000_WUPM
+#define E1000_82542_FFLT     E1000_FFLT
+#define E1000_82542_TDFH     0x08010
+#define E1000_82542_TDFT     0x08018
+#define E1000_82542_FFMT     E1000_FFMT
+#define E1000_82542_FFVT     E1000_FFVT
+#define E1000_82542_HOST_IF  E1000_HOST_IF
+#define E1000_82542_IAM         E1000_IAM
+#define E1000_82542_EEMNGCTL    E1000_EEMNGCTL
+#define E1000_82542_PSRCTL      E1000_PSRCTL
+#define E1000_82542_RAID        E1000_RAID
+#define E1000_82542_TARC0       E1000_TARC0
+#define E1000_82542_TDBAL1      E1000_TDBAL1
+#define E1000_82542_TDBAH1      E1000_TDBAH1
+#define E1000_82542_TDLEN1      E1000_TDLEN1
+#define E1000_82542_TDH1        E1000_TDH1
+#define E1000_82542_TDT1        E1000_TDT1
+#define E1000_82542_TXDCTL1     E1000_TXDCTL1
+#define E1000_82542_TARC1       E1000_TARC1
+#define E1000_82542_RFCTL       E1000_RFCTL
+#define E1000_82542_GCR         E1000_GCR
+#define E1000_82542_GSCL_1      E1000_GSCL_1
+#define E1000_82542_GSCL_2      E1000_GSCL_2
+#define E1000_82542_GSCL_3      E1000_GSCL_3
+#define E1000_82542_GSCL_4      E1000_GSCL_4
+#define E1000_82542_FACTPS      E1000_FACTPS
+#define E1000_82542_SWSM        E1000_SWSM
+#define E1000_82542_FWSM        E1000_FWSM
+#define E1000_82542_FFLT_DBG    E1000_FFLT_DBG
+#define E1000_82542_IAC         E1000_IAC
+#define E1000_82542_ICRXPTC     E1000_ICRXPTC
+#define E1000_82542_ICRXATC     E1000_ICRXATC
+#define E1000_82542_ICTXPTC     E1000_ICTXPTC
+#define E1000_82542_ICTXATC     E1000_ICTXATC
+#define E1000_82542_ICTXQEC     E1000_ICTXQEC
+#define E1000_82542_ICTXQMTC    E1000_ICTXQMTC
+#define E1000_82542_ICRXDMTC    E1000_ICRXDMTC
+#define E1000_82542_ICRXOC      E1000_ICRXOC
+#define E1000_82542_HICR        E1000_HICR
+
+#define E1000_82542_CPUVEC      E1000_CPUVEC
+#define E1000_82542_MRQC        E1000_MRQC
+#define E1000_82542_RETA        E1000_RETA
+#define E1000_82542_RSSRK       E1000_RSSRK
+#define E1000_82542_RSSIM       E1000_RSSIM
+#define E1000_82542_RSSIR       E1000_RSSIR
+#define E1000_82542_KUMCTRLSTA E1000_KUMCTRLSTA
+#define E1000_82542_SW_FW_SYNC E1000_SW_FW_SYNC
+
+/* Statistics counters collected by the MAC */
+struct e1000_hw_stats {
+	u64 crcerrs;
+	u64 algnerrc;
+	u64 symerrs;
+	u64 rxerrc;
+	u64 txerrc;
+	u64 mpc;
+	u64 scc;
+	u64 ecol;
+	u64 mcc;
+	u64 latecol;
+	u64 colc;
+	u64 dc;
+	u64 tncrs;
+	u64 sec;
+	u64 cexterr;
+	u64 rlec;
+	u64 xonrxc;
+	u64 xontxc;
+	u64 xoffrxc;
+	u64 xofftxc;
+	u64 fcruc;
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorcl;
+	u64 gorch;
+	u64 gotcl;
+	u64 gotch;
+	u64 rnbc;
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rlerrc;
+	u64 rjc;
+	u64 mgprc;
+	u64 mgpdc;
+	u64 mgptc;
+	u64 torl;
+	u64 torh;
+	u64 totl;
+	u64 toth;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 tsctc;
+	u64 tsctfc;
+	u64 iac;
+	u64 icrxptc;
+	u64 icrxatc;
+	u64 ictxptc;
+	u64 ictxatc;
+	u64 ictxqec;
+	u64 ictxqmtc;
+	u64 icrxdmtc;
+	u64 icrxoc;
+};
+
+/* Structure containing variables used by the shared code (e1000_hw.c) */
+struct e1000_hw {
+	u8 __iomem *hw_addr;
+	u8 __iomem *flash_address;
+	void __iomem *ce4100_gbe_mdio_base_virt;
+	e1000_mac_type mac_type;
+	e1000_phy_type phy_type;
+	u32 phy_init_script;
+	e1000_media_type media_type;
+	void *back;
+	struct e1000_shadow_ram *eeprom_shadow_ram;
+	u32 flash_bank_size;
+	u32 flash_base_addr;
+	e1000_fc_type fc;
+	e1000_bus_speed bus_speed;
+	e1000_bus_width bus_width;
+	e1000_bus_type bus_type;
+	struct e1000_eeprom_info eeprom;
+	e1000_ms_type master_slave;
+	e1000_ms_type original_master_slave;
+	e1000_ffe_config ffe_config_state;
+	u32 asf_firmware_present;
+	u32 eeprom_semaphore_present;
+	unsigned long io_base;
+	u32 phy_id;
+	u32 phy_revision;
+	u32 phy_addr;
+	u32 original_fc;
+	u32 txcw;
+	u32 autoneg_failed;
+	u32 max_frame_size;
+	u32 min_frame_size;
+	u32 mc_filter_type;
+	u32 num_mc_addrs;
+	u32 collision_delta;
+	u32 tx_packet_delta;
+	u32 ledctl_default;
+	u32 ledctl_mode1;
+	u32 ledctl_mode2;
+	bool tx_pkt_filtering;
+	struct e1000_host_mng_dhcp_cookie mng_cookie;
+	u16 phy_spd_default;
+	u16 autoneg_advertised;
+	u16 pci_cmd_word;
+	u16 fc_high_water;
+	u16 fc_low_water;
+	u16 fc_pause_time;
+	u16 current_ifs_val;
+	u16 ifs_min_val;
+	u16 ifs_max_val;
+	u16 ifs_step_size;
+	u16 ifs_ratio;
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+	u8 autoneg;
+	u8 mdix;
+	u8 forced_speed_duplex;
+	u8 wait_autoneg_complete;
+	u8 dma_fairness;
+	u8 mac_addr[NODE_ADDRESS_SIZE];
+	u8 perm_mac_addr[NODE_ADDRESS_SIZE];
+	bool disable_polarity_correction;
+	bool speed_downgraded;
+	e1000_smart_speed smart_speed;
+	e1000_dsp_config dsp_config_state;
+	bool get_link_status;
+	bool serdes_has_link;
+	bool tbi_compatibility_en;
+	bool tbi_compatibility_on;
+	bool laa_is_present;
+	bool phy_reset_disable;
+	bool initialize_hw_bits_disable;
+	bool fc_send_xon;
+	bool fc_strict_ieee;
+	bool report_tx_early;
+	bool adaptive_ifs;
+	bool ifs_params_forced;
+	bool in_ifs_mode;
+	bool mng_reg_access_disabled;
+	bool leave_av_bit_off;
+	bool bad_tx_carr_stats_fd;
+	bool has_smbus;
+};
+
+#define E1000_EEPROM_SWDPIN0   0x0001	/* SWDPIN 0 EEPROM Value */
+#define E1000_EEPROM_LED_LOGIC 0x0020	/* Led Logic Word */
+#define E1000_EEPROM_RW_REG_DATA   16	/* Offset to data in EEPROM read/write registers */
+#define E1000_EEPROM_RW_REG_DONE   2	/* Offset to READ/WRITE done bit */
+#define E1000_EEPROM_RW_REG_START  1	/* First bit for telling part to start operation */
+#define E1000_EEPROM_RW_ADDR_SHIFT 2	/* Shift to the address bits */
+#define E1000_EEPROM_POLL_WRITE    1	/* Flag for polling for write complete */
+#define E1000_EEPROM_POLL_READ     0	/* Flag for polling for read complete */
+/* Register Bit Masks */
+/* Device Control */
+#define E1000_CTRL_FD       0x00000001	/* Full duplex.0=half; 1=full */
+#define E1000_CTRL_BEM      0x00000002	/* Endian Mode.0=little,1=big */
+#define E1000_CTRL_PRIOR    0x00000004	/* Priority on PCI. 0=rx,1=fair */
+#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004	/*Blocks new Master requests */
+#define E1000_CTRL_LRST     0x00000008	/* Link reset. 0=normal,1=reset */
+#define E1000_CTRL_TME      0x00000010	/* Test mode. 0=normal,1=test */
+#define E1000_CTRL_SLE      0x00000020	/* Serial Link on 0=dis,1=en */
+#define E1000_CTRL_ASDE     0x00000020	/* Auto-speed detect enable */
+#define E1000_CTRL_SLU      0x00000040	/* Set link up (Force Link) */
+#define E1000_CTRL_ILOS     0x00000080	/* Invert Loss-Of Signal */
+#define E1000_CTRL_SPD_SEL  0x00000300	/* Speed Select Mask */
+#define E1000_CTRL_SPD_10   0x00000000	/* Force 10Mb */
+#define E1000_CTRL_SPD_100  0x00000100	/* Force 100Mb */
+#define E1000_CTRL_SPD_1000 0x00000200	/* Force 1Gb */
+#define E1000_CTRL_BEM32    0x00000400	/* Big Endian 32 mode */
+#define E1000_CTRL_FRCSPD   0x00000800	/* Force Speed */
+#define E1000_CTRL_FRCDPX   0x00001000	/* Force Duplex */
+#define E1000_CTRL_D_UD_EN  0x00002000	/* Dock/Undock enable */
+#define E1000_CTRL_D_UD_POLARITY 0x00004000	/* Defined polarity of Dock/Undock indication in SDP[0] */
+#define E1000_CTRL_FORCE_PHY_RESET 0x00008000	/* Reset both PHY ports, through PHYRST_N pin */
+#define E1000_CTRL_EXT_LINK_EN 0x00010000	/* enable link status from external LINK_0 and LINK_1 pins */
+#define E1000_CTRL_SWDPIN0  0x00040000	/* SWDPIN 0 value */
+#define E1000_CTRL_SWDPIN1  0x00080000	/* SWDPIN 1 value */
+#define E1000_CTRL_SWDPIN2  0x00100000	/* SWDPIN 2 value */
+#define E1000_CTRL_SWDPIN3  0x00200000	/* SWDPIN 3 value */
+#define E1000_CTRL_SWDPIO0  0x00400000	/* SWDPIN 0 Input or output */
+#define E1000_CTRL_SWDPIO1  0x00800000	/* SWDPIN 1 input or output */
+#define E1000_CTRL_SWDPIO2  0x01000000	/* SWDPIN 2 input or output */
+#define E1000_CTRL_SWDPIO3  0x02000000	/* SWDPIN 3 input or output */
+#define E1000_CTRL_RST      0x04000000	/* Global reset */
+#define E1000_CTRL_RFCE     0x08000000	/* Receive Flow Control enable */
+#define E1000_CTRL_TFCE     0x10000000	/* Transmit flow control enable */
+#define E1000_CTRL_RTE      0x20000000	/* Routing tag enable */
+#define E1000_CTRL_VME      0x40000000	/* IEEE VLAN mode enable */
+#define E1000_CTRL_PHY_RST  0x80000000	/* PHY Reset */
+#define E1000_CTRL_SW2FW_INT 0x02000000	/* Initiate an interrupt to manageability engine */
+
+/* Device Status */
+#define E1000_STATUS_FD         0x00000001	/* Full duplex.0=half,1=full */
+#define E1000_STATUS_LU         0x00000002	/* Link up.0=no,1=link */
+#define E1000_STATUS_FUNC_MASK  0x0000000C	/* PCI Function Mask */
+#define E1000_STATUS_FUNC_SHIFT 2
+#define E1000_STATUS_FUNC_0     0x00000000	/* Function 0 */
+#define E1000_STATUS_FUNC_1     0x00000004	/* Function 1 */
+#define E1000_STATUS_TXOFF      0x00000010	/* transmission paused */
+#define E1000_STATUS_TBIMODE    0x00000020	/* TBI mode */
+#define E1000_STATUS_SPEED_MASK 0x000000C0
+#define E1000_STATUS_SPEED_10   0x00000000	/* Speed 10Mb/s */
+#define E1000_STATUS_SPEED_100  0x00000040	/* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000 0x00000080	/* Speed 1000Mb/s */
+#define E1000_STATUS_LAN_INIT_DONE 0x00000200	/* Lan Init Completion
+						   by EEPROM/Flash */
+#define E1000_STATUS_ASDV       0x00000300	/* Auto speed detect value */
+#define E1000_STATUS_DOCK_CI    0x00000800	/* Change in Dock/Undock state. Clear on write '0'. */
+#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000	/* Status of Master requests. */
+#define E1000_STATUS_MTXCKOK    0x00000400	/* MTX clock running OK */
+#define E1000_STATUS_PCI66      0x00000800	/* In 66Mhz slot */
+#define E1000_STATUS_BUS64      0x00001000	/* In 64 bit slot */
+#define E1000_STATUS_PCIX_MODE  0x00002000	/* PCI-X mode */
+#define E1000_STATUS_PCIX_SPEED 0x0000C000	/* PCI-X bus speed */
+#define E1000_STATUS_BMC_SKU_0  0x00100000	/* BMC USB redirect disabled */
+#define E1000_STATUS_BMC_SKU_1  0x00200000	/* BMC SRAM disabled */
+#define E1000_STATUS_BMC_SKU_2  0x00400000	/* BMC SDRAM disabled */
+#define E1000_STATUS_BMC_CRYPTO 0x00800000	/* BMC crypto disabled */
+#define E1000_STATUS_BMC_LITE   0x01000000	/* BMC external code execution disabled */
+#define E1000_STATUS_RGMII_ENABLE 0x02000000	/* RGMII disabled */
+#define E1000_STATUS_FUSE_8       0x04000000
+#define E1000_STATUS_FUSE_9       0x08000000
+#define E1000_STATUS_SERDES0_DIS  0x10000000	/* SERDES disabled on port 0 */
+#define E1000_STATUS_SERDES1_DIS  0x20000000	/* SERDES disabled on port 1 */
+
+/* Constants used to interpret the masked PCI-X bus speed. */
+#define E1000_STATUS_PCIX_SPEED_66  0x00000000	/* PCI-X bus speed  50-66 MHz */
+#define E1000_STATUS_PCIX_SPEED_100 0x00004000	/* PCI-X bus speed  66-100 MHz */
+#define E1000_STATUS_PCIX_SPEED_133 0x00008000	/* PCI-X bus speed 100-133 MHz */
+
+/* EEPROM/Flash Control */
+#define E1000_EECD_SK        0x00000001	/* EEPROM Clock */
+#define E1000_EECD_CS        0x00000002	/* EEPROM Chip Select */
+#define E1000_EECD_DI        0x00000004	/* EEPROM Data In */
+#define E1000_EECD_DO        0x00000008	/* EEPROM Data Out */
+#define E1000_EECD_FWE_MASK  0x00000030
+#define E1000_EECD_FWE_DIS   0x00000010	/* Disable FLASH writes */
+#define E1000_EECD_FWE_EN    0x00000020	/* Enable FLASH writes */
+#define E1000_EECD_FWE_SHIFT 4
+#define E1000_EECD_REQ       0x00000040	/* EEPROM Access Request */
+#define E1000_EECD_GNT       0x00000080	/* EEPROM Access Grant */
+#define E1000_EECD_PRES      0x00000100	/* EEPROM Present */
+#define E1000_EECD_SIZE      0x00000200	/* EEPROM Size (0=64 word 1=256 word) */
+#define E1000_EECD_ADDR_BITS 0x00000400	/* EEPROM Addressing bits based on type
+					 * (0-small, 1-large) */
+#define E1000_EECD_TYPE      0x00002000	/* EEPROM Type (1-SPI, 0-Microwire) */
+#ifndef E1000_EEPROM_GRANT_ATTEMPTS
+#define E1000_EEPROM_GRANT_ATTEMPTS 1000	/* EEPROM # attempts to gain grant */
+#endif
+#define E1000_EECD_AUTO_RD          0x00000200	/* EEPROM Auto Read done */
+#define E1000_EECD_SIZE_EX_MASK     0x00007800	/* EEprom Size */
+#define E1000_EECD_SIZE_EX_SHIFT    11
+#define E1000_EECD_NVADDS    0x00018000	/* NVM Address Size */
+#define E1000_EECD_SELSHAD   0x00020000	/* Select Shadow RAM */
+#define E1000_EECD_INITSRAM  0x00040000	/* Initialize Shadow RAM */
+#define E1000_EECD_FLUPD     0x00080000	/* Update FLASH */
+#define E1000_EECD_AUPDEN    0x00100000	/* Enable Autonomous FLASH update */
+#define E1000_EECD_SHADV     0x00200000	/* Shadow RAM Data Valid */
+#define E1000_EECD_SEC1VAL   0x00400000	/* Sector One Valid */
+#define E1000_EECD_SECVAL_SHIFT      22
+#define E1000_STM_OPCODE     0xDB00
+#define E1000_HICR_FW_RESET  0xC0
+
+#define E1000_SHADOW_RAM_WORDS     2048
+#define E1000_ICH_NVM_SIG_WORD     0x13
+#define E1000_ICH_NVM_SIG_MASK     0xC0
+
+/* EEPROM Read */
+#define E1000_EERD_START      0x00000001	/* Start Read */
+#define E1000_EERD_DONE       0x00000010	/* Read Done */
+#define E1000_EERD_ADDR_SHIFT 8
+#define E1000_EERD_ADDR_MASK  0x0000FF00	/* Read Address */
+#define E1000_EERD_DATA_SHIFT 16
+#define E1000_EERD_DATA_MASK  0xFFFF0000	/* Read Data */
+
+/* SPI EEPROM Status Register */
+#define EEPROM_STATUS_RDY_SPI  0x01
+#define EEPROM_STATUS_WEN_SPI  0x02
+#define EEPROM_STATUS_BP0_SPI  0x04
+#define EEPROM_STATUS_BP1_SPI  0x08
+#define EEPROM_STATUS_WPEN_SPI 0x80
+
+/* Extended Device Control */
+#define E1000_CTRL_EXT_GPI0_EN   0x00000001	/* Maps SDP4 to GPI0 */
+#define E1000_CTRL_EXT_GPI1_EN   0x00000002	/* Maps SDP5 to GPI1 */
+#define E1000_CTRL_EXT_PHYINT_EN E1000_CTRL_EXT_GPI1_EN
+#define E1000_CTRL_EXT_GPI2_EN   0x00000004	/* Maps SDP6 to GPI2 */
+#define E1000_CTRL_EXT_GPI3_EN   0x00000008	/* Maps SDP7 to GPI3 */
+#define E1000_CTRL_EXT_SDP4_DATA 0x00000010	/* Value of SW Defineable Pin 4 */
+#define E1000_CTRL_EXT_SDP5_DATA 0x00000020	/* Value of SW Defineable Pin 5 */
+#define E1000_CTRL_EXT_PHY_INT   E1000_CTRL_EXT_SDP5_DATA
+#define E1000_CTRL_EXT_SDP6_DATA 0x00000040	/* Value of SW Defineable Pin 6 */
+#define E1000_CTRL_EXT_SDP7_DATA 0x00000080	/* Value of SW Defineable Pin 7 */
+#define E1000_CTRL_EXT_SDP4_DIR  0x00000100	/* Direction of SDP4 0=in 1=out */
+#define E1000_CTRL_EXT_SDP5_DIR  0x00000200	/* Direction of SDP5 0=in 1=out */
+#define E1000_CTRL_EXT_SDP6_DIR  0x00000400	/* Direction of SDP6 0=in 1=out */
+#define E1000_CTRL_EXT_SDP7_DIR  0x00000800	/* Direction of SDP7 0=in 1=out */
+#define E1000_CTRL_EXT_ASDCHK    0x00001000	/* Initiate an ASD sequence */
+#define E1000_CTRL_EXT_EE_RST    0x00002000	/* Reinitialize from EEPROM */
+#define E1000_CTRL_EXT_IPS       0x00004000	/* Invert Power State */
+#define E1000_CTRL_EXT_SPD_BYPS  0x00008000	/* Speed Select Bypass */
+#define E1000_CTRL_EXT_RO_DIS    0x00020000	/* Relaxed Ordering disable */
+#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000
+#define E1000_CTRL_EXT_LINK_MODE_TBI  0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_KMRN 0x00000000
+#define E1000_CTRL_EXT_LINK_MODE_SERDES  0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_SGMII   0x00800000
+#define E1000_CTRL_EXT_WR_WMARK_MASK  0x03000000
+#define E1000_CTRL_EXT_WR_WMARK_256   0x00000000
+#define E1000_CTRL_EXT_WR_WMARK_320   0x01000000
+#define E1000_CTRL_EXT_WR_WMARK_384   0x02000000
+#define E1000_CTRL_EXT_WR_WMARK_448   0x03000000
+#define E1000_CTRL_EXT_DRV_LOAD       0x10000000	/* Driver loaded bit for FW */
+#define E1000_CTRL_EXT_IAME           0x08000000	/* Interrupt acknowledge Auto-mask */
+#define E1000_CTRL_EXT_INT_TIMER_CLR  0x20000000	/* Clear Interrupt timers after IMS clear */
+#define E1000_CRTL_EXT_PB_PAREN       0x01000000	/* packet buffer parity error detection enabled */
+#define E1000_CTRL_EXT_DF_PAREN       0x02000000	/* descriptor FIFO parity error detection enable */
+#define E1000_CTRL_EXT_GHOST_PAREN    0x40000000
+
+/* MDI Control */
+#define E1000_MDIC_DATA_MASK 0x0000FFFF
+#define E1000_MDIC_REG_MASK  0x001F0000
+#define E1000_MDIC_REG_SHIFT 16
+#define E1000_MDIC_PHY_MASK  0x03E00000
+#define E1000_MDIC_PHY_SHIFT 21
+#define E1000_MDIC_OP_WRITE  0x04000000
+#define E1000_MDIC_OP_READ   0x08000000
+#define E1000_MDIC_READY     0x10000000
+#define E1000_MDIC_INT_EN    0x20000000
+#define E1000_MDIC_ERROR     0x40000000
+
+#define INTEL_CE_GBE_MDIC_OP_WRITE      0x04000000
+#define INTEL_CE_GBE_MDIC_OP_READ       0x00000000
+#define INTEL_CE_GBE_MDIC_GO            0x80000000
+#define INTEL_CE_GBE_MDIC_READ_ERROR    0x80000000
+
+#define E1000_KUMCTRLSTA_MASK           0x0000FFFF
+#define E1000_KUMCTRLSTA_OFFSET         0x001F0000
+#define E1000_KUMCTRLSTA_OFFSET_SHIFT   16
+#define E1000_KUMCTRLSTA_REN            0x00200000
+
+#define E1000_KUMCTRLSTA_OFFSET_FIFO_CTRL      0x00000000
+#define E1000_KUMCTRLSTA_OFFSET_CTRL           0x00000001
+#define E1000_KUMCTRLSTA_OFFSET_INB_CTRL       0x00000002
+#define E1000_KUMCTRLSTA_OFFSET_DIAG           0x00000003
+#define E1000_KUMCTRLSTA_OFFSET_TIMEOUTS       0x00000004
+#define E1000_KUMCTRLSTA_OFFSET_INB_PARAM      0x00000009
+#define E1000_KUMCTRLSTA_OFFSET_HD_CTRL        0x00000010
+#define E1000_KUMCTRLSTA_OFFSET_M2P_SERDES     0x0000001E
+#define E1000_KUMCTRLSTA_OFFSET_M2P_MODES      0x0000001F
+
+/* FIFO Control */
+#define E1000_KUMCTRLSTA_FIFO_CTRL_RX_BYPASS   0x00000008
+#define E1000_KUMCTRLSTA_FIFO_CTRL_TX_BYPASS   0x00000800
+
+/* In-Band Control */
+#define E1000_KUMCTRLSTA_INB_CTRL_LINK_STATUS_TX_TIMEOUT_DEFAULT    0x00000500
+#define E1000_KUMCTRLSTA_INB_CTRL_DIS_PADDING  0x00000010
+
+/* Half-Duplex Control */
+#define E1000_KUMCTRLSTA_HD_CTRL_10_100_DEFAULT 0x00000004
+#define E1000_KUMCTRLSTA_HD_CTRL_1000_DEFAULT  0x00000000
+
+#define E1000_KUMCTRLSTA_OFFSET_K0S_CTRL       0x0000001E
+
+#define E1000_KUMCTRLSTA_DIAG_FELPBK           0x2000
+#define E1000_KUMCTRLSTA_DIAG_NELPBK           0x1000
+
+#define E1000_KUMCTRLSTA_K0S_100_EN            0x2000
+#define E1000_KUMCTRLSTA_K0S_GBE_EN            0x1000
+#define E1000_KUMCTRLSTA_K0S_ENTRY_LATENCY_MASK   0x0003
+
+#define E1000_KABGTXD_BGSQLBIAS                0x00050000
+
+#define E1000_PHY_CTRL_SPD_EN                  0x00000001
+#define E1000_PHY_CTRL_D0A_LPLU                0x00000002
+#define E1000_PHY_CTRL_NOND0A_LPLU             0x00000004
+#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE      0x00000008
+#define E1000_PHY_CTRL_GBE_DISABLE             0x00000040
+#define E1000_PHY_CTRL_B2B_EN                  0x00000080
+
+/* LED Control */
+#define E1000_LEDCTL_LED0_MODE_MASK       0x0000000F
+#define E1000_LEDCTL_LED0_MODE_SHIFT      0
+#define E1000_LEDCTL_LED0_BLINK_RATE      0x0000020
+#define E1000_LEDCTL_LED0_IVRT            0x00000040
+#define E1000_LEDCTL_LED0_BLINK           0x00000080
+#define E1000_LEDCTL_LED1_MODE_MASK       0x00000F00
+#define E1000_LEDCTL_LED1_MODE_SHIFT      8
+#define E1000_LEDCTL_LED1_BLINK_RATE      0x0002000
+#define E1000_LEDCTL_LED1_IVRT            0x00004000
+#define E1000_LEDCTL_LED1_BLINK           0x00008000
+#define E1000_LEDCTL_LED2_MODE_MASK       0x000F0000
+#define E1000_LEDCTL_LED2_MODE_SHIFT      16
+#define E1000_LEDCTL_LED2_BLINK_RATE      0x00200000
+#define E1000_LEDCTL_LED2_IVRT            0x00400000
+#define E1000_LEDCTL_LED2_BLINK           0x00800000
+#define E1000_LEDCTL_LED3_MODE_MASK       0x0F000000
+#define E1000_LEDCTL_LED3_MODE_SHIFT      24
+#define E1000_LEDCTL_LED3_BLINK_RATE      0x20000000
+#define E1000_LEDCTL_LED3_IVRT            0x40000000
+#define E1000_LEDCTL_LED3_BLINK           0x80000000
+
+#define E1000_LEDCTL_MODE_LINK_10_1000  0x0
+#define E1000_LEDCTL_MODE_LINK_100_1000 0x1
+#define E1000_LEDCTL_MODE_LINK_UP       0x2
+#define E1000_LEDCTL_MODE_ACTIVITY      0x3
+#define E1000_LEDCTL_MODE_LINK_ACTIVITY 0x4
+#define E1000_LEDCTL_MODE_LINK_10       0x5
+#define E1000_LEDCTL_MODE_LINK_100      0x6
+#define E1000_LEDCTL_MODE_LINK_1000     0x7
+#define E1000_LEDCTL_MODE_PCIX_MODE     0x8
+#define E1000_LEDCTL_MODE_FULL_DUPLEX   0x9
+#define E1000_LEDCTL_MODE_COLLISION     0xA
+#define E1000_LEDCTL_MODE_BUS_SPEED     0xB
+#define E1000_LEDCTL_MODE_BUS_SIZE      0xC
+#define E1000_LEDCTL_MODE_PAUSED        0xD
+#define E1000_LEDCTL_MODE_LED_ON        0xE
+#define E1000_LEDCTL_MODE_LED_OFF       0xF
+
+/* Receive Address */
+#define E1000_RAH_AV  0x80000000	/* Receive descriptor valid */
+
+/* Interrupt Cause Read */
+#define E1000_ICR_TXDW          0x00000001	/* Transmit desc written back */
+#define E1000_ICR_TXQE          0x00000002	/* Transmit Queue empty */
+#define E1000_ICR_LSC           0x00000004	/* Link Status Change */
+#define E1000_ICR_RXSEQ         0x00000008	/* rx sequence error */
+#define E1000_ICR_RXDMT0        0x00000010	/* rx desc min. threshold (0) */
+#define E1000_ICR_RXO           0x00000040	/* rx overrun */
+#define E1000_ICR_RXT0          0x00000080	/* rx timer intr (ring 0) */
+#define E1000_ICR_MDAC          0x00000200	/* MDIO access complete */
+#define E1000_ICR_RXCFG         0x00000400	/* RX /c/ ordered set */
+#define E1000_ICR_GPI_EN0       0x00000800	/* GP Int 0 */
+#define E1000_ICR_GPI_EN1       0x00001000	/* GP Int 1 */
+#define E1000_ICR_GPI_EN2       0x00002000	/* GP Int 2 */
+#define E1000_ICR_GPI_EN3       0x00004000	/* GP Int 3 */
+#define E1000_ICR_TXD_LOW       0x00008000
+#define E1000_ICR_SRPD          0x00010000
+#define E1000_ICR_ACK           0x00020000	/* Receive Ack frame */
+#define E1000_ICR_MNG           0x00040000	/* Manageability event */
+#define E1000_ICR_DOCK          0x00080000	/* Dock/Undock */
+#define E1000_ICR_INT_ASSERTED  0x80000000	/* If this bit asserted, the driver should claim the interrupt */
+#define E1000_ICR_RXD_FIFO_PAR0 0x00100000	/* queue 0 Rx descriptor FIFO parity error */
+#define E1000_ICR_TXD_FIFO_PAR0 0x00200000	/* queue 0 Tx descriptor FIFO parity error */
+#define E1000_ICR_HOST_ARB_PAR  0x00400000	/* host arb read buffer parity error */
+#define E1000_ICR_PB_PAR        0x00800000	/* packet buffer parity error */
+#define E1000_ICR_RXD_FIFO_PAR1 0x01000000	/* queue 1 Rx descriptor FIFO parity error */
+#define E1000_ICR_TXD_FIFO_PAR1 0x02000000	/* queue 1 Tx descriptor FIFO parity error */
+#define E1000_ICR_ALL_PARITY    0x03F00000	/* all parity error bits */
+#define E1000_ICR_DSW           0x00000020	/* FW changed the status of DISSW bit in the FWSM */
+#define E1000_ICR_PHYINT        0x00001000	/* LAN connected device generates an interrupt */
+#define E1000_ICR_EPRST         0x00100000	/* ME hardware reset occurs */
+
+/* Interrupt Cause Set */
+#define E1000_ICS_TXDW      E1000_ICR_TXDW	/* Transmit desc written back */
+#define E1000_ICS_TXQE      E1000_ICR_TXQE	/* Transmit Queue empty */
+#define E1000_ICS_LSC       E1000_ICR_LSC	/* Link Status Change */
+#define E1000_ICS_RXSEQ     E1000_ICR_RXSEQ	/* rx sequence error */
+#define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0	/* rx desc min. threshold */
+#define E1000_ICS_RXO       E1000_ICR_RXO	/* rx overrun */
+#define E1000_ICS_RXT0      E1000_ICR_RXT0	/* rx timer intr */
+#define E1000_ICS_MDAC      E1000_ICR_MDAC	/* MDIO access complete */
+#define E1000_ICS_RXCFG     E1000_ICR_RXCFG	/* RX /c/ ordered set */
+#define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0	/* GP Int 0 */
+#define E1000_ICS_GPI_EN1   E1000_ICR_GPI_EN1	/* GP Int 1 */
+#define E1000_ICS_GPI_EN2   E1000_ICR_GPI_EN2	/* GP Int 2 */
+#define E1000_ICS_GPI_EN3   E1000_ICR_GPI_EN3	/* GP Int 3 */
+#define E1000_ICS_TXD_LOW   E1000_ICR_TXD_LOW
+#define E1000_ICS_SRPD      E1000_ICR_SRPD
+#define E1000_ICS_ACK       E1000_ICR_ACK	/* Receive Ack frame */
+#define E1000_ICS_MNG       E1000_ICR_MNG	/* Manageability event */
+#define E1000_ICS_DOCK      E1000_ICR_DOCK	/* Dock/Undock */
+#define E1000_ICS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0	/* queue 0 Rx descriptor FIFO parity error */
+#define E1000_ICS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0	/* queue 0 Tx descriptor FIFO parity error */
+#define E1000_ICS_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR	/* host arb read buffer parity error */
+#define E1000_ICS_PB_PAR        E1000_ICR_PB_PAR	/* packet buffer parity error */
+#define E1000_ICS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1	/* queue 1 Rx descriptor FIFO parity error */
+#define E1000_ICS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1	/* queue 1 Tx descriptor FIFO parity error */
+#define E1000_ICS_DSW       E1000_ICR_DSW
+#define E1000_ICS_PHYINT    E1000_ICR_PHYINT
+#define E1000_ICS_EPRST     E1000_ICR_EPRST
+
+/* Interrupt Mask Set */
+#define E1000_IMS_TXDW      E1000_ICR_TXDW	/* Transmit desc written back */
+#define E1000_IMS_TXQE      E1000_ICR_TXQE	/* Transmit Queue empty */
+#define E1000_IMS_LSC       E1000_ICR_LSC	/* Link Status Change */
+#define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ	/* rx sequence error */
+#define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0	/* rx desc min. threshold */
+#define E1000_IMS_RXO       E1000_ICR_RXO	/* rx overrun */
+#define E1000_IMS_RXT0      E1000_ICR_RXT0	/* rx timer intr */
+#define E1000_IMS_MDAC      E1000_ICR_MDAC	/* MDIO access complete */
+#define E1000_IMS_RXCFG     E1000_ICR_RXCFG	/* RX /c/ ordered set */
+#define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0	/* GP Int 0 */
+#define E1000_IMS_GPI_EN1   E1000_ICR_GPI_EN1	/* GP Int 1 */
+#define E1000_IMS_GPI_EN2   E1000_ICR_GPI_EN2	/* GP Int 2 */
+#define E1000_IMS_GPI_EN3   E1000_ICR_GPI_EN3	/* GP Int 3 */
+#define E1000_IMS_TXD_LOW   E1000_ICR_TXD_LOW
+#define E1000_IMS_SRPD      E1000_ICR_SRPD
+#define E1000_IMS_ACK       E1000_ICR_ACK	/* Receive Ack frame */
+#define E1000_IMS_MNG       E1000_ICR_MNG	/* Manageability event */
+#define E1000_IMS_DOCK      E1000_ICR_DOCK	/* Dock/Undock */
+#define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0	/* queue 0 Rx descriptor FIFO parity error */
+#define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0	/* queue 0 Tx descriptor FIFO parity error */
+#define E1000_IMS_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR	/* host arb read buffer parity error */
+#define E1000_IMS_PB_PAR        E1000_ICR_PB_PAR	/* packet buffer parity error */
+#define E1000_IMS_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1	/* queue 1 Rx descriptor FIFO parity error */
+#define E1000_IMS_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1	/* queue 1 Tx descriptor FIFO parity error */
+#define E1000_IMS_DSW       E1000_ICR_DSW
+#define E1000_IMS_PHYINT    E1000_ICR_PHYINT
+#define E1000_IMS_EPRST     E1000_ICR_EPRST
+
+/* Interrupt Mask Clear */
+#define E1000_IMC_TXDW      E1000_ICR_TXDW	/* Transmit desc written back */
+#define E1000_IMC_TXQE      E1000_ICR_TXQE	/* Transmit Queue empty */
+#define E1000_IMC_LSC       E1000_ICR_LSC	/* Link Status Change */
+#define E1000_IMC_RXSEQ     E1000_ICR_RXSEQ	/* rx sequence error */
+#define E1000_IMC_RXDMT0    E1000_ICR_RXDMT0	/* rx desc min. threshold */
+#define E1000_IMC_RXO       E1000_ICR_RXO	/* rx overrun */
+#define E1000_IMC_RXT0      E1000_ICR_RXT0	/* rx timer intr */
+#define E1000_IMC_MDAC      E1000_ICR_MDAC	/* MDIO access complete */
+#define E1000_IMC_RXCFG     E1000_ICR_RXCFG	/* RX /c/ ordered set */
+#define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0	/* GP Int 0 */
+#define E1000_IMC_GPI_EN1   E1000_ICR_GPI_EN1	/* GP Int 1 */
+#define E1000_IMC_GPI_EN2   E1000_ICR_GPI_EN2	/* GP Int 2 */
+#define E1000_IMC_GPI_EN3   E1000_ICR_GPI_EN3	/* GP Int 3 */
+#define E1000_IMC_TXD_LOW   E1000_ICR_TXD_LOW
+#define E1000_IMC_SRPD      E1000_ICR_SRPD
+#define E1000_IMC_ACK       E1000_ICR_ACK	/* Receive Ack frame */
+#define E1000_IMC_MNG       E1000_ICR_MNG	/* Manageability event */
+#define E1000_IMC_DOCK      E1000_ICR_DOCK	/* Dock/Undock */
+#define E1000_IMC_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0	/* queue 0 Rx descriptor FIFO parity error */
+#define E1000_IMC_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0	/* queue 0 Tx descriptor FIFO parity error */
+#define E1000_IMC_HOST_ARB_PAR  E1000_ICR_HOST_ARB_PAR	/* host arb read buffer parity error */
+#define E1000_IMC_PB_PAR        E1000_ICR_PB_PAR	/* packet buffer parity error */
+#define E1000_IMC_RXD_FIFO_PAR1 E1000_ICR_RXD_FIFO_PAR1	/* queue 1 Rx descriptor FIFO parity error */
+#define E1000_IMC_TXD_FIFO_PAR1 E1000_ICR_TXD_FIFO_PAR1	/* queue 1 Tx descriptor FIFO parity error */
+#define E1000_IMC_DSW       E1000_ICR_DSW
+#define E1000_IMC_PHYINT    E1000_ICR_PHYINT
+#define E1000_IMC_EPRST     E1000_ICR_EPRST
+
+/* Receive Control */
+#define E1000_RCTL_RST            0x00000001	/* Software reset */
+#define E1000_RCTL_EN             0x00000002	/* enable */
+#define E1000_RCTL_SBP            0x00000004	/* store bad packet */
+#define E1000_RCTL_UPE            0x00000008	/* unicast promiscuous enable */
+#define E1000_RCTL_MPE            0x00000010	/* multicast promiscuous enab */
+#define E1000_RCTL_LPE            0x00000020	/* long packet enable */
+#define E1000_RCTL_LBM_NO         0x00000000	/* no loopback mode */
+#define E1000_RCTL_LBM_MAC        0x00000040	/* MAC loopback mode */
+#define E1000_RCTL_LBM_SLP        0x00000080	/* serial link loopback mode */
+#define E1000_RCTL_LBM_TCVR       0x000000C0	/* tcvr loopback mode */
+#define E1000_RCTL_DTYP_MASK      0x00000C00	/* Descriptor type mask */
+#define E1000_RCTL_DTYP_PS        0x00000400	/* Packet Split descriptor */
+#define E1000_RCTL_RDMTS_HALF     0x00000000	/* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_QUAT     0x00000100	/* rx desc min threshold size */
+#define E1000_RCTL_RDMTS_EIGTH    0x00000200	/* rx desc min threshold size */
+#define E1000_RCTL_MO_SHIFT       12	/* multicast offset shift */
+#define E1000_RCTL_MO_0           0x00000000	/* multicast offset 11:0 */
+#define E1000_RCTL_MO_1           0x00001000	/* multicast offset 12:1 */
+#define E1000_RCTL_MO_2           0x00002000	/* multicast offset 13:2 */
+#define E1000_RCTL_MO_3           0x00003000	/* multicast offset 15:4 */
+#define E1000_RCTL_MDR            0x00004000	/* multicast desc ring 0 */
+#define E1000_RCTL_BAM            0x00008000	/* broadcast enable */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
+#define E1000_RCTL_SZ_2048        0x00000000	/* rx buffer size 2048 */
+#define E1000_RCTL_SZ_1024        0x00010000	/* rx buffer size 1024 */
+#define E1000_RCTL_SZ_512         0x00020000	/* rx buffer size 512 */
+#define E1000_RCTL_SZ_256         0x00030000	/* rx buffer size 256 */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
+#define E1000_RCTL_SZ_16384       0x00010000	/* rx buffer size 16384 */
+#define E1000_RCTL_SZ_8192        0x00020000	/* rx buffer size 8192 */
+#define E1000_RCTL_SZ_4096        0x00030000	/* rx buffer size 4096 */
+#define E1000_RCTL_VFE            0x00040000	/* vlan filter enable */
+#define E1000_RCTL_CFIEN          0x00080000	/* canonical form enable */
+#define E1000_RCTL_CFI            0x00100000	/* canonical form indicator */
+#define E1000_RCTL_DPF            0x00400000	/* discard pause frames */
+#define E1000_RCTL_PMCF           0x00800000	/* pass MAC control frames */
+#define E1000_RCTL_BSEX           0x02000000	/* Buffer size extension */
+#define E1000_RCTL_SECRC          0x04000000	/* Strip Ethernet CRC */
+#define E1000_RCTL_FLXBUF_MASK    0x78000000	/* Flexible buffer size */
+#define E1000_RCTL_FLXBUF_SHIFT   27	/* Flexible buffer shift */
+
+/* Use byte values for the following shift parameters
+ * Usage:
+ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE0_MASK) |
+ *                ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE1_MASK) |
+ *                ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE2_MASK) |
+ *                ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
+ *                  E1000_PSRCTL_BSIZE3_MASK))
+ * where value0 = [128..16256],  default=256
+ *       value1 = [1024..64512], default=4096
+ *       value2 = [0..64512],    default=4096
+ *       value3 = [0..64512],    default=0
+ */
+
+#define E1000_PSRCTL_BSIZE0_MASK   0x0000007F
+#define E1000_PSRCTL_BSIZE1_MASK   0x00003F00
+#define E1000_PSRCTL_BSIZE2_MASK   0x003F0000
+#define E1000_PSRCTL_BSIZE3_MASK   0x3F000000
+
+#define E1000_PSRCTL_BSIZE0_SHIFT  7	/* Shift _right_ 7 */
+#define E1000_PSRCTL_BSIZE1_SHIFT  2	/* Shift _right_ 2 */
+#define E1000_PSRCTL_BSIZE2_SHIFT  6	/* Shift _left_ 6 */
+#define E1000_PSRCTL_BSIZE3_SHIFT 14	/* Shift _left_ 14 */
+
+/* SW_W_SYNC definitions */
+#define E1000_SWFW_EEP_SM     0x0001
+#define E1000_SWFW_PHY0_SM    0x0002
+#define E1000_SWFW_PHY1_SM    0x0004
+#define E1000_SWFW_MAC_CSR_SM 0x0008
+
+/* Receive Descriptor */
+#define E1000_RDT_DELAY 0x0000ffff	/* Delay timer (1=1024us) */
+#define E1000_RDT_FPDB  0x80000000	/* Flush descriptor block */
+#define E1000_RDLEN_LEN 0x0007ff80	/* descriptor length */
+#define E1000_RDH_RDH   0x0000ffff	/* receive descriptor head */
+#define E1000_RDT_RDT   0x0000ffff	/* receive descriptor tail */
+
+/* Flow Control */
+#define E1000_FCRTH_RTH  0x0000FFF8	/* Mask Bits[15:3] for RTH */
+#define E1000_FCRTH_XFCE 0x80000000	/* External Flow Control Enable */
+#define E1000_FCRTL_RTL  0x0000FFF8	/* Mask Bits[15:3] for RTL */
+#define E1000_FCRTL_XONE 0x80000000	/* Enable XON frame transmission */
+
+/* Header split receive */
+#define E1000_RFCTL_ISCSI_DIS           0x00000001
+#define E1000_RFCTL_ISCSI_DWC_MASK      0x0000003E
+#define E1000_RFCTL_ISCSI_DWC_SHIFT     1
+#define E1000_RFCTL_NFSW_DIS            0x00000040
+#define E1000_RFCTL_NFSR_DIS            0x00000080
+#define E1000_RFCTL_NFS_VER_MASK        0x00000300
+#define E1000_RFCTL_NFS_VER_SHIFT       8
+#define E1000_RFCTL_IPV6_DIS            0x00000400
+#define E1000_RFCTL_IPV6_XSUM_DIS       0x00000800
+#define E1000_RFCTL_ACK_DIS             0x00001000
+#define E1000_RFCTL_ACKD_DIS            0x00002000
+#define E1000_RFCTL_IPFRSP_DIS          0x00004000
+#define E1000_RFCTL_EXTEN               0x00008000
+#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
+#define E1000_RFCTL_NEW_IPV6_EXT_DIS    0x00020000
+
+/* Receive Descriptor Control */
+#define E1000_RXDCTL_PTHRESH 0x0000003F	/* RXDCTL Prefetch Threshold */
+#define E1000_RXDCTL_HTHRESH 0x00003F00	/* RXDCTL Host Threshold */
+#define E1000_RXDCTL_WTHRESH 0x003F0000	/* RXDCTL Writeback Threshold */
+#define E1000_RXDCTL_GRAN    0x01000000	/* RXDCTL Granularity */
+
+/* Transmit Descriptor Control */
+#define E1000_TXDCTL_PTHRESH 0x0000003F	/* TXDCTL Prefetch Threshold */
+#define E1000_TXDCTL_HTHRESH 0x00003F00	/* TXDCTL Host Threshold */
+#define E1000_TXDCTL_WTHRESH 0x003F0000	/* TXDCTL Writeback Threshold */
+#define E1000_TXDCTL_GRAN    0x01000000	/* TXDCTL Granularity */
+#define E1000_TXDCTL_LWTHRESH 0xFE000000	/* TXDCTL Low Threshold */
+#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000	/* GRAN=1, WTHRESH=1 */
+#define E1000_TXDCTL_COUNT_DESC 0x00400000	/* Enable the counting of desc.
+						   still to be processed. */
+/* Transmit Configuration Word */
+#define E1000_TXCW_FD         0x00000020	/* TXCW full duplex */
+#define E1000_TXCW_HD         0x00000040	/* TXCW half duplex */
+#define E1000_TXCW_PAUSE      0x00000080	/* TXCW sym pause request */
+#define E1000_TXCW_ASM_DIR    0x00000100	/* TXCW astm pause direction */
+#define E1000_TXCW_PAUSE_MASK 0x00000180	/* TXCW pause request mask */
+#define E1000_TXCW_RF         0x00003000	/* TXCW remote fault */
+#define E1000_TXCW_NP         0x00008000	/* TXCW next page */
+#define E1000_TXCW_CW         0x0000ffff	/* TxConfigWord mask */
+#define E1000_TXCW_TXC        0x40000000	/* Transmit Config control */
+#define E1000_TXCW_ANE        0x80000000	/* Auto-neg enable */
+
+/* Receive Configuration Word */
+#define E1000_RXCW_CW    0x0000ffff	/* RxConfigWord mask */
+#define E1000_RXCW_NC    0x04000000	/* Receive config no carrier */
+#define E1000_RXCW_IV    0x08000000	/* Receive config invalid */
+#define E1000_RXCW_CC    0x10000000	/* Receive config change */
+#define E1000_RXCW_C     0x20000000	/* Receive config */
+#define E1000_RXCW_SYNCH 0x40000000	/* Receive config synch */
+#define E1000_RXCW_ANC   0x80000000	/* Auto-neg complete */
+
+/* Transmit Control */
+#define E1000_TCTL_RST    0x00000001	/* software reset */
+#define E1000_TCTL_EN     0x00000002	/* enable tx */
+#define E1000_TCTL_BCE    0x00000004	/* busy check enable */
+#define E1000_TCTL_PSP    0x00000008	/* pad short packets */
+#define E1000_TCTL_CT     0x00000ff0	/* collision threshold */
+#define E1000_TCTL_COLD   0x003ff000	/* collision distance */
+#define E1000_TCTL_SWXOFF 0x00400000	/* SW Xoff transmission */
+#define E1000_TCTL_PBE    0x00800000	/* Packet Burst Enable */
+#define E1000_TCTL_RTLC   0x01000000	/* Re-transmit on late collision */
+#define E1000_TCTL_NRTU   0x02000000	/* No Re-transmit on underrun */
+#define E1000_TCTL_MULR   0x10000000	/* Multiple request support */
+/* Extended Transmit Control */
+#define E1000_TCTL_EXT_BST_MASK  0x000003FF	/* Backoff Slot Time */
+#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00	/* Gigabit Carry Extend Padding */
+
+/* Receive Checksum Control */
+#define E1000_RXCSUM_PCSS_MASK 0x000000FF	/* Packet Checksum Start */
+#define E1000_RXCSUM_IPOFL     0x00000100	/* IPv4 checksum offload */
+#define E1000_RXCSUM_TUOFL     0x00000200	/* TCP / UDP checksum offload */
+#define E1000_RXCSUM_IPV6OFL   0x00000400	/* IPv6 checksum offload */
+#define E1000_RXCSUM_IPPCSE    0x00001000	/* IP payload checksum enable */
+#define E1000_RXCSUM_PCSD      0x00002000	/* packet checksum disabled */
+
+/* Multiple Receive Queue Control */
+#define E1000_MRQC_ENABLE_MASK              0x00000003
+#define E1000_MRQC_ENABLE_RSS_2Q            0x00000001
+#define E1000_MRQC_ENABLE_RSS_INT           0x00000004
+#define E1000_MRQC_RSS_FIELD_MASK           0xFFFF0000
+#define E1000_MRQC_RSS_FIELD_IPV4_TCP       0x00010000
+#define E1000_MRQC_RSS_FIELD_IPV4           0x00020000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX    0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6_EX        0x00080000
+#define E1000_MRQC_RSS_FIELD_IPV6           0x00100000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP       0x00200000
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define E1000_WUC_APME       0x00000001	/* APM Enable */
+#define E1000_WUC_PME_EN     0x00000002	/* PME Enable */
+#define E1000_WUC_PME_STATUS 0x00000004	/* PME Status */
+#define E1000_WUC_APMPME     0x00000008	/* Assert PME on APM Wakeup */
+#define E1000_WUC_SPM        0x80000000	/* Enable SPM */
+
+/* Wake Up Filter Control */
+#define E1000_WUFC_LNKC 0x00000001	/* Link Status Change Wakeup Enable */
+#define E1000_WUFC_MAG  0x00000002	/* Magic Packet Wakeup Enable */
+#define E1000_WUFC_EX   0x00000004	/* Directed Exact Wakeup Enable */
+#define E1000_WUFC_MC   0x00000008	/* Directed Multicast Wakeup Enable */
+#define E1000_WUFC_BC   0x00000010	/* Broadcast Wakeup Enable */
+#define E1000_WUFC_ARP  0x00000020	/* ARP Request Packet Wakeup Enable */
+#define E1000_WUFC_IPV4 0x00000040	/* Directed IPv4 Packet Wakeup Enable */
+#define E1000_WUFC_IPV6 0x00000080	/* Directed IPv6 Packet Wakeup Enable */
+#define E1000_WUFC_IGNORE_TCO      0x00008000	/* Ignore WakeOn TCO packets */
+#define E1000_WUFC_FLX0 0x00010000	/* Flexible Filter 0 Enable */
+#define E1000_WUFC_FLX1 0x00020000	/* Flexible Filter 1 Enable */
+#define E1000_WUFC_FLX2 0x00040000	/* Flexible Filter 2 Enable */
+#define E1000_WUFC_FLX3 0x00080000	/* Flexible Filter 3 Enable */
+#define E1000_WUFC_ALL_FILTERS 0x000F00FF	/* Mask for all wakeup filters */
+#define E1000_WUFC_FLX_OFFSET 16	/* Offset to the Flexible Filters bits */
+#define E1000_WUFC_FLX_FILTERS 0x000F0000	/* Mask for the 4 flexible filters */
+
+/* Wake Up Status */
+#define E1000_WUS_LNKC 0x00000001	/* Link Status Changed */
+#define E1000_WUS_MAG  0x00000002	/* Magic Packet Received */
+#define E1000_WUS_EX   0x00000004	/* Directed Exact Received */
+#define E1000_WUS_MC   0x00000008	/* Directed Multicast Received */
+#define E1000_WUS_BC   0x00000010	/* Broadcast Received */
+#define E1000_WUS_ARP  0x00000020	/* ARP Request Packet Received */
+#define E1000_WUS_IPV4 0x00000040	/* Directed IPv4 Packet Wakeup Received */
+#define E1000_WUS_IPV6 0x00000080	/* Directed IPv6 Packet Wakeup Received */
+#define E1000_WUS_FLX0 0x00010000	/* Flexible Filter 0 Match */
+#define E1000_WUS_FLX1 0x00020000	/* Flexible Filter 1 Match */
+#define E1000_WUS_FLX2 0x00040000	/* Flexible Filter 2 Match */
+#define E1000_WUS_FLX3 0x00080000	/* Flexible Filter 3 Match */
+#define E1000_WUS_FLX_FILTERS 0x000F0000	/* Mask for the 4 flexible filters */
+
+/* Management Control */
+#define E1000_MANC_SMBUS_EN      0x00000001	/* SMBus Enabled - RO */
+#define E1000_MANC_ASF_EN        0x00000002	/* ASF Enabled - RO */
+#define E1000_MANC_R_ON_FORCE    0x00000004	/* Reset on Force TCO - RO */
+#define E1000_MANC_RMCP_EN       0x00000100	/* Enable RCMP 026Fh Filtering */
+#define E1000_MANC_0298_EN       0x00000200	/* Enable RCMP 0298h Filtering */
+#define E1000_MANC_IPV4_EN       0x00000400	/* Enable IPv4 */
+#define E1000_MANC_IPV6_EN       0x00000800	/* Enable IPv6 */
+#define E1000_MANC_SNAP_EN       0x00001000	/* Accept LLC/SNAP */
+#define E1000_MANC_ARP_EN        0x00002000	/* Enable ARP Request Filtering */
+#define E1000_MANC_NEIGHBOR_EN   0x00004000	/* Enable Neighbor Discovery
+						 * Filtering */
+#define E1000_MANC_ARP_RES_EN    0x00008000	/* Enable ARP response Filtering */
+#define E1000_MANC_TCO_RESET     0x00010000	/* TCO Reset Occurred */
+#define E1000_MANC_RCV_TCO_EN    0x00020000	/* Receive TCO Packets Enabled */
+#define E1000_MANC_REPORT_STATUS 0x00040000	/* Status Reporting Enabled */
+#define E1000_MANC_RCV_ALL       0x00080000	/* Receive All Enabled */
+#define E1000_MANC_BLK_PHY_RST_ON_IDE   0x00040000	/* Block phy resets */
+#define E1000_MANC_EN_MAC_ADDR_FILTER   0x00100000	/* Enable MAC address
+							 * filtering */
+#define E1000_MANC_EN_MNG2HOST   0x00200000	/* Enable MNG packets to host
+						 * memory */
+#define E1000_MANC_EN_IP_ADDR_FILTER    0x00400000	/* Enable IP address
+							 * filtering */
+#define E1000_MANC_EN_XSUM_FILTER   0x00800000	/* Enable checksum filtering */
+#define E1000_MANC_BR_EN         0x01000000	/* Enable broadcast filtering */
+#define E1000_MANC_SMB_REQ       0x01000000	/* SMBus Request */
+#define E1000_MANC_SMB_GNT       0x02000000	/* SMBus Grant */
+#define E1000_MANC_SMB_CLK_IN    0x04000000	/* SMBus Clock In */
+#define E1000_MANC_SMB_DATA_IN   0x08000000	/* SMBus Data In */
+#define E1000_MANC_SMB_DATA_OUT  0x10000000	/* SMBus Data Out */
+#define E1000_MANC_SMB_CLK_OUT   0x20000000	/* SMBus Clock Out */
+
+#define E1000_MANC_SMB_DATA_OUT_SHIFT  28	/* SMBus Data Out Shift */
+#define E1000_MANC_SMB_CLK_OUT_SHIFT   29	/* SMBus Clock Out Shift */
+
+/* SW Semaphore Register */
+#define E1000_SWSM_SMBI         0x00000001	/* Driver Semaphore bit */
+#define E1000_SWSM_SWESMBI      0x00000002	/* FW Semaphore bit */
+#define E1000_SWSM_WMNG         0x00000004	/* Wake MNG Clock */
+#define E1000_SWSM_DRV_LOAD     0x00000008	/* Driver Loaded Bit */
+
+/* FW Semaphore Register */
+#define E1000_FWSM_MODE_MASK    0x0000000E	/* FW mode */
+#define E1000_FWSM_MODE_SHIFT            1
+#define E1000_FWSM_FW_VALID     0x00008000	/* FW established a valid mode */
+
+#define E1000_FWSM_RSPCIPHY        0x00000040	/* Reset PHY on PCI reset */
+#define E1000_FWSM_DISSW           0x10000000	/* FW disable SW Write Access */
+#define E1000_FWSM_SKUSEL_MASK     0x60000000	/* LAN SKU select */
+#define E1000_FWSM_SKUEL_SHIFT     29
+#define E1000_FWSM_SKUSEL_EMB      0x0	/* Embedded SKU */
+#define E1000_FWSM_SKUSEL_CONS     0x1	/* Consumer SKU */
+#define E1000_FWSM_SKUSEL_PERF_100 0x2	/* Perf & Corp 10/100 SKU */
+#define E1000_FWSM_SKUSEL_PERF_GBE 0x3	/* Perf & Copr GbE SKU */
+
+/* FFLT Debug Register */
+#define E1000_FFLT_DBG_INVC     0x00100000	/* Invalid /C/ code handling */
+
+typedef enum {
+	e1000_mng_mode_none = 0,
+	e1000_mng_mode_asf,
+	e1000_mng_mode_pt,
+	e1000_mng_mode_ipmi,
+	e1000_mng_mode_host_interface_only
+} e1000_mng_mode;
+
+/* Host Interface Control Register */
+#define E1000_HICR_EN           0x00000001	/* Enable Bit - RO */
+#define E1000_HICR_C            0x00000002	/* Driver sets this bit when done
+						 * to put command in RAM */
+#define E1000_HICR_SV           0x00000004	/* Status Validity */
+#define E1000_HICR_FWR          0x00000080	/* FW reset. Set by the Host */
+
+/* Host Interface Command Interface - Address range 0x8800-0x8EFF */
+#define E1000_HI_MAX_DATA_LENGTH         252	/* Host Interface data length */
+#define E1000_HI_MAX_BLOCK_BYTE_LENGTH  1792	/* Number of bytes in range */
+#define E1000_HI_MAX_BLOCK_DWORD_LENGTH  448	/* Number of dwords in range */
+#define E1000_HI_COMMAND_TIMEOUT         500	/* Time in ms to process HI command */
+
+struct e1000_host_command_header {
+	u8 command_id;
+	u8 command_length;
+	u8 command_options;	/* I/F bits for command, status for return */
+	u8 checksum;
+};
+struct e1000_host_command_info {
+	struct e1000_host_command_header command_header;	/* Command Head/Command Result Head has 4 bytes */
+	u8 command_data[E1000_HI_MAX_DATA_LENGTH];	/* Command data can length 0..252 */
+};
+
+/* Host SMB register #0 */
+#define E1000_HSMC0R_CLKIN      0x00000001	/* SMB Clock in */
+#define E1000_HSMC0R_DATAIN     0x00000002	/* SMB Data in */
+#define E1000_HSMC0R_DATAOUT    0x00000004	/* SMB Data out */
+#define E1000_HSMC0R_CLKOUT     0x00000008	/* SMB Clock out */
+
+/* Host SMB register #1 */
+#define E1000_HSMC1R_CLKIN      E1000_HSMC0R_CLKIN
+#define E1000_HSMC1R_DATAIN     E1000_HSMC0R_DATAIN
+#define E1000_HSMC1R_DATAOUT    E1000_HSMC0R_DATAOUT
+#define E1000_HSMC1R_CLKOUT     E1000_HSMC0R_CLKOUT
+
+/* FW Status Register */
+#define E1000_FWSTS_FWS_MASK    0x000000FF	/* FW Status */
+
+/* Wake Up Packet Length */
+#define E1000_WUPL_LENGTH_MASK 0x0FFF	/* Only the lower 12 bits are valid */
+
+#define E1000_MDALIGN          4096
+
+/* PCI-Ex registers*/
+
+/* PCI-Ex Control Register */
+#define E1000_GCR_RXD_NO_SNOOP          0x00000001
+#define E1000_GCR_RXDSCW_NO_SNOOP       0x00000002
+#define E1000_GCR_RXDSCR_NO_SNOOP       0x00000004
+#define E1000_GCR_TXD_NO_SNOOP          0x00000008
+#define E1000_GCR_TXDSCW_NO_SNOOP       0x00000010
+#define E1000_GCR_TXDSCR_NO_SNOOP       0x00000020
+
+#define PCI_EX_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP         | \
+                             E1000_GCR_RXDSCW_NO_SNOOP      | \
+                             E1000_GCR_RXDSCR_NO_SNOOP      | \
+                             E1000_GCR_TXD_NO_SNOOP         | \
+                             E1000_GCR_TXDSCW_NO_SNOOP      | \
+                             E1000_GCR_TXDSCR_NO_SNOOP)
+
+#define PCI_EX_82566_SNOOP_ALL PCI_EX_NO_SNOOP_ALL
+
+#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000
+/* Function Active and Power State to MNG */
+#define E1000_FACTPS_FUNC0_POWER_STATE_MASK         0x00000003
+#define E1000_FACTPS_LAN0_VALID                     0x00000004
+#define E1000_FACTPS_FUNC0_AUX_EN                   0x00000008
+#define E1000_FACTPS_FUNC1_POWER_STATE_MASK         0x000000C0
+#define E1000_FACTPS_FUNC1_POWER_STATE_SHIFT        6
+#define E1000_FACTPS_LAN1_VALID                     0x00000100
+#define E1000_FACTPS_FUNC1_AUX_EN                   0x00000200
+#define E1000_FACTPS_FUNC2_POWER_STATE_MASK         0x00003000
+#define E1000_FACTPS_FUNC2_POWER_STATE_SHIFT        12
+#define E1000_FACTPS_IDE_ENABLE                     0x00004000
+#define E1000_FACTPS_FUNC2_AUX_EN                   0x00008000
+#define E1000_FACTPS_FUNC3_POWER_STATE_MASK         0x000C0000
+#define E1000_FACTPS_FUNC3_POWER_STATE_SHIFT        18
+#define E1000_FACTPS_SP_ENABLE                      0x00100000
+#define E1000_FACTPS_FUNC3_AUX_EN                   0x00200000
+#define E1000_FACTPS_FUNC4_POWER_STATE_MASK         0x03000000
+#define E1000_FACTPS_FUNC4_POWER_STATE_SHIFT        24
+#define E1000_FACTPS_IPMI_ENABLE                    0x04000000
+#define E1000_FACTPS_FUNC4_AUX_EN                   0x08000000
+#define E1000_FACTPS_MNGCG                          0x20000000
+#define E1000_FACTPS_LAN_FUNC_SEL                   0x40000000
+#define E1000_FACTPS_PM_STATE_CHANGED               0x80000000
+
+/* PCI-Ex Config Space */
+#define PCI_EX_LINK_STATUS           0x12
+#define PCI_EX_LINK_WIDTH_MASK       0x3F0
+#define PCI_EX_LINK_WIDTH_SHIFT      4
+
+/* EEPROM Commands - Microwire */
+#define EEPROM_READ_OPCODE_MICROWIRE  0x6	/* EEPROM read opcode */
+#define EEPROM_WRITE_OPCODE_MICROWIRE 0x5	/* EEPROM write opcode */
+#define EEPROM_ERASE_OPCODE_MICROWIRE 0x7	/* EEPROM erase opcode */
+#define EEPROM_EWEN_OPCODE_MICROWIRE  0x13	/* EEPROM erase/write enable */
+#define EEPROM_EWDS_OPCODE_MICROWIRE  0x10	/* EEPROM erase/write disable */
+
+/* EEPROM Commands - SPI */
+#define EEPROM_MAX_RETRY_SPI        5000	/* Max wait of 5ms, for RDY signal */
+#define EEPROM_READ_OPCODE_SPI      0x03	/* EEPROM read opcode */
+#define EEPROM_WRITE_OPCODE_SPI     0x02	/* EEPROM write opcode */
+#define EEPROM_A8_OPCODE_SPI        0x08	/* opcode bit-3 = address bit-8 */
+#define EEPROM_WREN_OPCODE_SPI      0x06	/* EEPROM set Write Enable latch */
+#define EEPROM_WRDI_OPCODE_SPI      0x04	/* EEPROM reset Write Enable latch */
+#define EEPROM_RDSR_OPCODE_SPI      0x05	/* EEPROM read Status register */
+#define EEPROM_WRSR_OPCODE_SPI      0x01	/* EEPROM write Status register */
+#define EEPROM_ERASE4K_OPCODE_SPI   0x20	/* EEPROM ERASE 4KB */
+#define EEPROM_ERASE64K_OPCODE_SPI  0xD8	/* EEPROM ERASE 64KB */
+#define EEPROM_ERASE256_OPCODE_SPI  0xDB	/* EEPROM ERASE 256B */
+
+/* EEPROM Size definitions */
+#define EEPROM_WORD_SIZE_SHIFT  6
+#define EEPROM_SIZE_SHIFT       10
+#define EEPROM_SIZE_MASK        0x1C00
+
+/* EEPROM Word Offsets */
+#define EEPROM_COMPAT                 0x0003
+#define EEPROM_ID_LED_SETTINGS        0x0004
+#define EEPROM_VERSION                0x0005
+#define EEPROM_SERDES_AMPLITUDE       0x0006	/* For SERDES output amplitude adjustment. */
+#define EEPROM_PHY_CLASS_WORD         0x0007
+#define EEPROM_INIT_CONTROL1_REG      0x000A
+#define EEPROM_INIT_CONTROL2_REG      0x000F
+#define EEPROM_SWDEF_PINS_CTRL_PORT_1 0x0010
+#define EEPROM_INIT_CONTROL3_PORT_B   0x0014
+#define EEPROM_INIT_3GIO_3            0x001A
+#define EEPROM_SWDEF_PINS_CTRL_PORT_0 0x0020
+#define EEPROM_INIT_CONTROL3_PORT_A   0x0024
+#define EEPROM_CFG                    0x0012
+#define EEPROM_FLASH_VERSION          0x0032
+#define EEPROM_CHECKSUM_REG           0x003F
+
+#define E1000_EEPROM_CFG_DONE         0x00040000	/* MNG config cycle done */
+#define E1000_EEPROM_CFG_DONE_PORT_1  0x00080000	/* ...for second port */
+
+/* Word definitions for ID LED Settings */
+#define ID_LED_RESERVED_0000 0x0000
+#define ID_LED_RESERVED_FFFF 0xFFFF
+#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2 << 12) | \
+                              (ID_LED_OFF1_OFF2 << 8) | \
+                              (ID_LED_DEF1_DEF2 << 4) | \
+                              (ID_LED_DEF1_DEF2))
+#define ID_LED_DEF1_DEF2     0x1
+#define ID_LED_DEF1_ON2      0x2
+#define ID_LED_DEF1_OFF2     0x3
+#define ID_LED_ON1_DEF2      0x4
+#define ID_LED_ON1_ON2       0x5
+#define ID_LED_ON1_OFF2      0x6
+#define ID_LED_OFF1_DEF2     0x7
+#define ID_LED_OFF1_ON2      0x8
+#define ID_LED_OFF1_OFF2     0x9
+
+#define IGP_ACTIVITY_LED_MASK   0xFFFFF0FF
+#define IGP_ACTIVITY_LED_ENABLE 0x0300
+#define IGP_LED3_MODE           0x07000000
+
+/* Mask bits for SERDES amplitude adjustment in Word 6 of the EEPROM */
+#define EEPROM_SERDES_AMPLITUDE_MASK  0x000F
+
+/* Mask bit for PHY class in Word 7 of the EEPROM */
+#define EEPROM_PHY_CLASS_A   0x8000
+
+/* Mask bits for fields in Word 0x0a of the EEPROM */
+#define EEPROM_WORD0A_ILOS   0x0010
+#define EEPROM_WORD0A_SWDPIO 0x01E0
+#define EEPROM_WORD0A_LRST   0x0200
+#define EEPROM_WORD0A_FD     0x0400
+#define EEPROM_WORD0A_66MHZ  0x0800
+
+/* Mask bits for fields in Word 0x0f of the EEPROM */
+#define EEPROM_WORD0F_PAUSE_MASK 0x3000
+#define EEPROM_WORD0F_PAUSE      0x1000
+#define EEPROM_WORD0F_ASM_DIR    0x2000
+#define EEPROM_WORD0F_ANE        0x0800
+#define EEPROM_WORD0F_SWPDIO_EXT 0x00F0
+#define EEPROM_WORD0F_LPLU       0x0001
+
+/* Mask bits for fields in Word 0x10/0x20 of the EEPROM */
+#define EEPROM_WORD1020_GIGA_DISABLE         0x0010
+#define EEPROM_WORD1020_GIGA_DISABLE_NON_D0A 0x0008
+
+/* Mask bits for fields in Word 0x1a of the EEPROM */
+#define EEPROM_WORD1A_ASPM_MASK  0x000C
+
+/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */
+#define EEPROM_SUM 0xBABA
+
+/* EEPROM Map defines (WORD OFFSETS)*/
+#define EEPROM_NODE_ADDRESS_BYTE_0 0
+#define EEPROM_PBA_BYTE_1          8
+
+#define EEPROM_RESERVED_WORD          0xFFFF
+
+/* EEPROM Map Sizes (Byte Counts) */
+#define PBA_SIZE 4
+
+/* Collision related configuration parameters */
+#define E1000_COLLISION_THRESHOLD       15
+#define E1000_CT_SHIFT                  4
+/* Collision distance is a 0-based value that applies to
+ * half-duplex-capable hardware only. */
+#define E1000_COLLISION_DISTANCE        63
+#define E1000_COLLISION_DISTANCE_82542  64
+#define E1000_FDX_COLLISION_DISTANCE    E1000_COLLISION_DISTANCE
+#define E1000_HDX_COLLISION_DISTANCE    E1000_COLLISION_DISTANCE
+#define E1000_COLD_SHIFT                12
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE  8
+#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+
+/* Default values for the transmit IPG register */
+#define DEFAULT_82542_TIPG_IPGT        10
+#define DEFAULT_82543_TIPG_IPGT_FIBER  9
+#define DEFAULT_82543_TIPG_IPGT_COPPER 8
+
+#define E1000_TIPG_IPGT_MASK  0x000003FF
+#define E1000_TIPG_IPGR1_MASK 0x000FFC00
+#define E1000_TIPG_IPGR2_MASK 0x3FF00000
+
+#define DEFAULT_82542_TIPG_IPGR1 2
+#define DEFAULT_82543_TIPG_IPGR1 8
+#define E1000_TIPG_IPGR1_SHIFT  10
+
+#define DEFAULT_82542_TIPG_IPGR2 10
+#define DEFAULT_82543_TIPG_IPGR2 6
+#define E1000_TIPG_IPGR2_SHIFT  20
+
+#define E1000_TXDMAC_DPP 0x00000001
+
+/* Adaptive IFS defines */
+#define TX_THRESHOLD_START     8
+#define TX_THRESHOLD_INCREMENT 10
+#define TX_THRESHOLD_DECREMENT 1
+#define TX_THRESHOLD_STOP      190
+#define TX_THRESHOLD_DISABLE   0
+#define TX_THRESHOLD_TIMER_MS  10000
+#define MIN_NUM_XMITS          1000
+#define IFS_MAX                80
+#define IFS_STEP               10
+#define IFS_MIN                40
+#define IFS_RATIO              4
+
+/* Extended Configuration Control and Size */
+#define E1000_EXTCNF_CTRL_PCIE_WRITE_ENABLE 0x00000001
+#define E1000_EXTCNF_CTRL_PHY_WRITE_ENABLE  0x00000002
+#define E1000_EXTCNF_CTRL_D_UD_ENABLE       0x00000004
+#define E1000_EXTCNF_CTRL_D_UD_LATENCY      0x00000008
+#define E1000_EXTCNF_CTRL_D_UD_OWNER        0x00000010
+#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020
+#define E1000_EXTCNF_CTRL_MDIO_HW_OWNERSHIP 0x00000040
+#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER   0x0FFF0000
+
+#define E1000_EXTCNF_SIZE_EXT_PHY_LENGTH    0x000000FF
+#define E1000_EXTCNF_SIZE_EXT_DOCK_LENGTH   0x0000FF00
+#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH   0x00FF0000
+#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE  0x00000001
+#define E1000_EXTCNF_CTRL_SWFLAG            0x00000020
+
+/* PBA constants */
+#define E1000_PBA_8K 0x0008	/* 8KB, default Rx allocation */
+#define E1000_PBA_12K 0x000C	/* 12KB, default Rx allocation */
+#define E1000_PBA_16K 0x0010	/* 16KB, default TX allocation */
+#define E1000_PBA_20K 0x0014
+#define E1000_PBA_22K 0x0016
+#define E1000_PBA_24K 0x0018
+#define E1000_PBA_30K 0x001E
+#define E1000_PBA_32K 0x0020
+#define E1000_PBA_34K 0x0022
+#define E1000_PBA_38K 0x0026
+#define E1000_PBA_40K 0x0028
+#define E1000_PBA_48K 0x0030	/* 48KB, default RX allocation */
+
+#define E1000_PBS_16K E1000_PBA_16K
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
+#define FLOW_CONTROL_TYPE         0x8808
+
+/* The historical defaults for the flow control values are given below. */
+#define FC_DEFAULT_HI_THRESH        (0x8000)	/* 32KB */
+#define FC_DEFAULT_LO_THRESH        (0x4000)	/* 16KB */
+#define FC_DEFAULT_TX_TIMER         (0x100)	/* ~130 us */
+
+/* PCIX Config space */
+#define PCIX_COMMAND_REGISTER    0xE6
+#define PCIX_STATUS_REGISTER_LO  0xE8
+#define PCIX_STATUS_REGISTER_HI  0xEA
+
+#define PCIX_COMMAND_MMRBC_MASK      0x000C
+#define PCIX_COMMAND_MMRBC_SHIFT     0x2
+#define PCIX_STATUS_HI_MMRBC_MASK    0x0060
+#define PCIX_STATUS_HI_MMRBC_SHIFT   0x5
+#define PCIX_STATUS_HI_MMRBC_4K      0x3
+#define PCIX_STATUS_HI_MMRBC_2K      0x2
+
+/* Number of bits required to shift right the "pause" bits from the
+ * EEPROM (bits 13:12) to the "pause" (bits 8:7) field in the TXCW register.
+ */
+#define PAUSE_SHIFT 5
+
+/* Number of bits required to shift left the "SWDPIO" bits from the
+ * EEPROM (bits 8:5) to the "SWDPIO" (bits 25:22) field in the CTRL register.
+ */
+#define SWDPIO_SHIFT 17
+
+/* Number of bits required to shift left the "SWDPIO_EXT" bits from the
+ * EEPROM word F (bits 7:4) to the bits 11:8 of The Extended CTRL register.
+ */
+#define SWDPIO__EXT_SHIFT 4
+
+/* Number of bits required to shift left the "ILOS" bit from the EEPROM
+ * (bit 4) to the "ILOS" (bit 7) field in the CTRL register.
+ */
+#define ILOS_SHIFT  3
+
+#define RECEIVE_BUFFER_ALIGN_SIZE  (256)
+
+/* Number of milliseconds we wait for auto-negotiation to complete */
+#define LINK_UP_TIMEOUT             500
+
+/* Number of milliseconds we wait for Eeprom auto read bit done after MAC reset */
+#define AUTO_READ_DONE_TIMEOUT      10
+/* Number of milliseconds we wait for PHY configuration done after MAC reset */
+#define PHY_CFG_TIMEOUT             100
+
+#define E1000_TX_BUFFER_SIZE ((u32)1514)
+
+/* The carrier extension symbol, as received by the NIC. */
+#define CARRIER_EXTENSION   0x0F
+
+/* TBI_ACCEPT macro definition:
+ *
+ * This macro requires:
+ *      adapter = a pointer to struct e1000_hw
+ *      status = the 8 bit status field of the RX descriptor with EOP set
+ *      error = the 8 bit error field of the RX descriptor with EOP set
+ *      length = the sum of all the length fields of the RX descriptors that
+ *               make up the current frame
+ *      last_byte = the last byte of the frame DMAed by the hardware
+ *      max_frame_length = the maximum frame length we want to accept.
+ *      min_frame_length = the minimum frame length we want to accept.
+ *
+ * This macro is a conditional that should be used in the interrupt
+ * handler's Rx processing routine when RxErrors have been detected.
+ *
+ * Typical use:
+ *  ...
+ *  if (TBI_ACCEPT) {
+ *      accept_frame = true;
+ *      e1000_tbi_adjust_stats(adapter, MacAddress);
+ *      frame_length--;
+ *  } else {
+ *      accept_frame = false;
+ *  }
+ *  ...
+ */
+
+#define TBI_ACCEPT(adapter, status, errors, length, last_byte) \
+    ((adapter)->tbi_compatibility_on && \
+     (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \
+     ((last_byte) == CARRIER_EXTENSION) && \
+     (((status) & E1000_RXD_STAT_VP) ? \
+          (((length) > ((adapter)->min_frame_size - VLAN_TAG_SIZE)) && \
+           ((length) <= ((adapter)->max_frame_size + 1))) : \
+          (((length) > (adapter)->min_frame_size) && \
+           ((length) <= ((adapter)->max_frame_size + VLAN_TAG_SIZE + 1)))))
+
+/* Structures, enums, and macros for the PHY */
+
+/* Bit definitions for the Management Data IO (MDIO) and Management Data
+ * Clock (MDC) pins in the Device Control Register.
+ */
+#define E1000_CTRL_PHY_RESET_DIR  E1000_CTRL_SWDPIO0
+#define E1000_CTRL_PHY_RESET      E1000_CTRL_SWDPIN0
+#define E1000_CTRL_MDIO_DIR       E1000_CTRL_SWDPIO2
+#define E1000_CTRL_MDIO           E1000_CTRL_SWDPIN2
+#define E1000_CTRL_MDC_DIR        E1000_CTRL_SWDPIO3
+#define E1000_CTRL_MDC            E1000_CTRL_SWDPIN3
+#define E1000_CTRL_PHY_RESET_DIR4 E1000_CTRL_EXT_SDP4_DIR
+#define E1000_CTRL_PHY_RESET4     E1000_CTRL_EXT_SDP4_DATA
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CTRL         0x00	/* Control Register */
+#define PHY_STATUS       0x01	/* Status Register */
+#define PHY_ID1          0x02	/* Phy Id Reg (word 1) */
+#define PHY_ID2          0x03	/* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV  0x04	/* Autoneg Advertisement */
+#define PHY_LP_ABILITY   0x05	/* Link Partner Ability (Base Page) */
+#define PHY_AUTONEG_EXP  0x06	/* Autoneg Expansion Reg */
+#define PHY_NEXT_PAGE_TX 0x07	/* Next Page TX */
+#define PHY_LP_NEXT_PAGE 0x08	/* Link Partner Next Page */
+#define PHY_1000T_CTRL   0x09	/* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS 0x0A	/* 1000Base-T Status Reg */
+#define PHY_EXT_STATUS   0x0F	/* Extended Status Reg */
+
+#define MAX_PHY_REG_ADDRESS        0x1F	/* 5 bit address bus (0-0x1F) */
+#define MAX_PHY_MULTI_PAGE_REG     0xF	/* Registers equal on all pages */
+
+/* M88E1000 Specific Registers */
+#define M88E1000_PHY_SPEC_CTRL     0x10	/* PHY Specific Control Register */
+#define M88E1000_PHY_SPEC_STATUS   0x11	/* PHY Specific Status Register */
+#define M88E1000_INT_ENABLE        0x12	/* Interrupt Enable Register */
+#define M88E1000_INT_STATUS        0x13	/* Interrupt Status Register */
+#define M88E1000_EXT_PHY_SPEC_CTRL 0x14	/* Extended PHY Specific Control */
+#define M88E1000_RX_ERR_CNTR       0x15	/* Receive Error Counter */
+
+#define M88E1000_PHY_EXT_CTRL      0x1A	/* PHY extend control register */
+#define M88E1000_PHY_PAGE_SELECT   0x1D	/* Reg 29 for page number setting */
+#define M88E1000_PHY_GEN_CONTROL   0x1E	/* Its meaning depends on reg 29 */
+#define M88E1000_PHY_VCO_REG_BIT8  0x100	/* Bits 8 & 11 are adjusted for */
+#define M88E1000_PHY_VCO_REG_BIT11 0x800	/* improved BER performance */
+
+#define IGP01E1000_IEEE_REGS_PAGE  0x0000
+#define IGP01E1000_IEEE_RESTART_AUTONEG 0x3300
+#define IGP01E1000_IEEE_FORCE_GIGA      0x0140
+
+/* IGP01E1000 Specific Registers */
+#define IGP01E1000_PHY_PORT_CONFIG 0x10	/* PHY Specific Port Config Register */
+#define IGP01E1000_PHY_PORT_STATUS 0x11	/* PHY Specific Status Register */
+#define IGP01E1000_PHY_PORT_CTRL   0x12	/* PHY Specific Control Register */
+#define IGP01E1000_PHY_LINK_HEALTH 0x13	/* PHY Link Health Register */
+#define IGP01E1000_GMII_FIFO       0x14	/* GMII FIFO Register */
+#define IGP01E1000_PHY_CHANNEL_QUALITY 0x15	/* PHY Channel Quality Register */
+#define IGP02E1000_PHY_POWER_MGMT      0x19
+#define IGP01E1000_PHY_PAGE_SELECT     0x1F	/* PHY Page Select Core Register */
+
+/* IGP01E1000 AGC Registers - stores the cable length values*/
+#define IGP01E1000_PHY_AGC_A        0x1172
+#define IGP01E1000_PHY_AGC_B        0x1272
+#define IGP01E1000_PHY_AGC_C        0x1472
+#define IGP01E1000_PHY_AGC_D        0x1872
+
+/* IGP02E1000 AGC Registers for cable length values */
+#define IGP02E1000_PHY_AGC_A        0x11B1
+#define IGP02E1000_PHY_AGC_B        0x12B1
+#define IGP02E1000_PHY_AGC_C        0x14B1
+#define IGP02E1000_PHY_AGC_D        0x18B1
+
+/* IGP01E1000 DSP Reset Register */
+#define IGP01E1000_PHY_DSP_RESET   0x1F33
+#define IGP01E1000_PHY_DSP_SET     0x1F71
+#define IGP01E1000_PHY_DSP_FFE     0x1F35
+
+#define IGP01E1000_PHY_CHANNEL_NUM    4
+#define IGP02E1000_PHY_CHANNEL_NUM    4
+
+#define IGP01E1000_PHY_AGC_PARAM_A    0x1171
+#define IGP01E1000_PHY_AGC_PARAM_B    0x1271
+#define IGP01E1000_PHY_AGC_PARAM_C    0x1471
+#define IGP01E1000_PHY_AGC_PARAM_D    0x1871
+
+#define IGP01E1000_PHY_EDAC_MU_INDEX        0xC000
+#define IGP01E1000_PHY_EDAC_SIGN_EXT_9_BITS 0x8000
+
+#define IGP01E1000_PHY_ANALOG_TX_STATE      0x2890
+#define IGP01E1000_PHY_ANALOG_CLASS_A       0x2000
+#define IGP01E1000_PHY_FORCE_ANALOG_ENABLE  0x0004
+#define IGP01E1000_PHY_DSP_FFE_CM_CP        0x0069
+
+#define IGP01E1000_PHY_DSP_FFE_DEFAULT      0x002A
+/* IGP01E1000 PCS Initialization register - stores the polarity status when
+ * speed = 1000 Mbps. */
+#define IGP01E1000_PHY_PCS_INIT_REG  0x00B4
+#define IGP01E1000_PHY_PCS_CTRL_REG  0x00B5
+
+#define IGP01E1000_ANALOG_REGS_PAGE  0x20C0
+
+/* PHY Control Register */
+#define MII_CR_SPEED_SELECT_MSB 0x0040	/* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_COLL_TEST_ENABLE 0x0080	/* Collision test enable */
+#define MII_CR_FULL_DUPLEX      0x0100	/* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG 0x0200	/* Restart auto negotiation */
+#define MII_CR_ISOLATE          0x0400	/* Isolate PHY from MII */
+#define MII_CR_POWER_DOWN       0x0800	/* Power down */
+#define MII_CR_AUTO_NEG_EN      0x1000	/* Auto Neg Enable */
+#define MII_CR_SPEED_SELECT_LSB 0x2000	/* bits 6,13: 10=1000, 01=100, 00=10 */
+#define MII_CR_LOOPBACK         0x4000	/* 0 = normal, 1 = loopback */
+#define MII_CR_RESET            0x8000	/* 0 = normal, 1 = PHY reset */
+
+/* PHY Status Register */
+#define MII_SR_EXTENDED_CAPS     0x0001	/* Extended register capabilities */
+#define MII_SR_JABBER_DETECT     0x0002	/* Jabber Detected */
+#define MII_SR_LINK_STATUS       0x0004	/* Link Status 1 = link */
+#define MII_SR_AUTONEG_CAPS      0x0008	/* Auto Neg Capable */
+#define MII_SR_REMOTE_FAULT      0x0010	/* Remote Fault Detect */
+#define MII_SR_AUTONEG_COMPLETE  0x0020	/* Auto Neg Complete */
+#define MII_SR_PREAMBLE_SUPPRESS 0x0040	/* Preamble may be suppressed */
+#define MII_SR_EXTENDED_STATUS   0x0100	/* Ext. status info in Reg 0x0F */
+#define MII_SR_100T2_HD_CAPS     0x0200	/* 100T2 Half Duplex Capable */
+#define MII_SR_100T2_FD_CAPS     0x0400	/* 100T2 Full Duplex Capable */
+#define MII_SR_10T_HD_CAPS       0x0800	/* 10T   Half Duplex Capable */
+#define MII_SR_10T_FD_CAPS       0x1000	/* 10T   Full Duplex Capable */
+#define MII_SR_100X_HD_CAPS      0x2000	/* 100X  Half Duplex Capable */
+#define MII_SR_100X_FD_CAPS      0x4000	/* 100X  Full Duplex Capable */
+#define MII_SR_100T4_CAPS        0x8000	/* 100T4 Capable */
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_SELECTOR_FIELD 0x0001	/* indicates IEEE 802.3 CSMA/CD */
+#define NWAY_AR_10T_HD_CAPS    0x0020	/* 10T   Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS    0x0040	/* 10T   Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS  0x0080	/* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS  0x0100	/* 100TX Full Duplex Capable */
+#define NWAY_AR_100T4_CAPS     0x0200	/* 100T4 Capable */
+#define NWAY_AR_PAUSE          0x0400	/* Pause operation desired */
+#define NWAY_AR_ASM_DIR        0x0800	/* Asymmetric Pause Direction bit */
+#define NWAY_AR_REMOTE_FAULT   0x2000	/* Remote Fault detected */
+#define NWAY_AR_NEXT_PAGE      0x8000	/* Next Page ability supported */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_SELECTOR_FIELD 0x0000	/* LP protocol selector field */
+#define NWAY_LPAR_10T_HD_CAPS    0x0020	/* LP is 10T   Half Duplex Capable */
+#define NWAY_LPAR_10T_FD_CAPS    0x0040	/* LP is 10T   Full Duplex Capable */
+#define NWAY_LPAR_100TX_HD_CAPS  0x0080	/* LP is 100TX Half Duplex Capable */
+#define NWAY_LPAR_100TX_FD_CAPS  0x0100	/* LP is 100TX Full Duplex Capable */
+#define NWAY_LPAR_100T4_CAPS     0x0200	/* LP is 100T4 Capable */
+#define NWAY_LPAR_PAUSE          0x0400	/* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR        0x0800	/* LP Asymmetric Pause Direction bit */
+#define NWAY_LPAR_REMOTE_FAULT   0x2000	/* LP has detected Remote Fault */
+#define NWAY_LPAR_ACKNOWLEDGE    0x4000	/* LP has rx'd link code word */
+#define NWAY_LPAR_NEXT_PAGE      0x8000	/* Next Page ability supported */
+
+/* Autoneg Expansion Register */
+#define NWAY_ER_LP_NWAY_CAPS      0x0001	/* LP has Auto Neg Capability */
+#define NWAY_ER_PAGE_RXD          0x0002	/* LP is 10T   Half Duplex Capable */
+#define NWAY_ER_NEXT_PAGE_CAPS    0x0004	/* LP is 10T   Full Duplex Capable */
+#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008	/* LP is 100TX Half Duplex Capable */
+#define NWAY_ER_PAR_DETECT_FAULT  0x0010	/* LP is 100TX Full Duplex Capable */
+
+/* Next Page TX Register */
+#define NPTX_MSG_CODE_FIELD 0x0001	/* NP msg code or unformatted data */
+#define NPTX_TOGGLE         0x0800	/* Toggles between exchanges
+					 * of different NP
+					 */
+#define NPTX_ACKNOWLDGE2    0x1000	/* 1 = will comply with msg
+					 * 0 = cannot comply with msg
+					 */
+#define NPTX_MSG_PAGE       0x2000	/* formatted(1)/unformatted(0) pg */
+#define NPTX_NEXT_PAGE      0x8000	/* 1 = addition NP will follow
+					 * 0 = sending last NP
+					 */
+
+/* Link Partner Next Page Register */
+#define LP_RNPR_MSG_CODE_FIELD 0x0001	/* NP msg code or unformatted data */
+#define LP_RNPR_TOGGLE         0x0800	/* Toggles between exchanges
+					 * of different NP
+					 */
+#define LP_RNPR_ACKNOWLDGE2    0x1000	/* 1 = will comply with msg
+					 * 0 = cannot comply with msg
+					 */
+#define LP_RNPR_MSG_PAGE       0x2000	/* formatted(1)/unformatted(0) pg */
+#define LP_RNPR_ACKNOWLDGE     0x4000	/* 1 = ACK / 0 = NO ACK */
+#define LP_RNPR_NEXT_PAGE      0x8000	/* 1 = addition NP will follow
+					 * 0 = sending last NP
+					 */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_ASYM_PAUSE      0x0080	/* Advertise asymmetric pause bit */
+#define CR_1000T_HD_CAPS         0x0100	/* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS         0x0200	/* Advertise 1000T FD capability  */
+#define CR_1000T_REPEATER_DTE    0x0400	/* 1=Repeater/switch device port */
+					/* 0=DTE device */
+#define CR_1000T_MS_VALUE        0x0800	/* 1=Configure PHY as Master */
+					/* 0=Configure PHY as Slave */
+#define CR_1000T_MS_ENABLE       0x1000	/* 1=Master/Slave manual config value */
+					/* 0=Automatic Master/Slave config */
+#define CR_1000T_TEST_MODE_NORMAL 0x0000	/* Normal Operation */
+#define CR_1000T_TEST_MODE_1     0x2000	/* Transmit Waveform test */
+#define CR_1000T_TEST_MODE_2     0x4000	/* Master Transmit Jitter test */
+#define CR_1000T_TEST_MODE_3     0x6000	/* Slave Transmit Jitter test */
+#define CR_1000T_TEST_MODE_4     0x8000	/* Transmitter Distortion test */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_IDLE_ERROR_CNT   0x00FF	/* Num idle errors since last read */
+#define SR_1000T_ASYM_PAUSE_DIR   0x0100	/* LP asymmetric pause direction bit */
+#define SR_1000T_LP_HD_CAPS       0x0400	/* LP is 1000T HD capable */
+#define SR_1000T_LP_FD_CAPS       0x0800	/* LP is 1000T FD capable */
+#define SR_1000T_REMOTE_RX_STATUS 0x1000	/* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS  0x2000	/* Local receiver OK */
+#define SR_1000T_MS_CONFIG_RES    0x4000	/* 1=Local TX is Master, 0=Slave */
+#define SR_1000T_MS_CONFIG_FAULT  0x8000	/* Master/Slave config fault */
+#define SR_1000T_REMOTE_RX_STATUS_SHIFT          12
+#define SR_1000T_LOCAL_RX_STATUS_SHIFT           13
+#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT    5
+#define FFE_IDLE_ERR_COUNT_TIMEOUT_20            20
+#define FFE_IDLE_ERR_COUNT_TIMEOUT_100           100
+
+/* Extended Status Register */
+#define IEEE_ESR_1000T_HD_CAPS 0x1000	/* 1000T HD capable */
+#define IEEE_ESR_1000T_FD_CAPS 0x2000	/* 1000T FD capable */
+#define IEEE_ESR_1000X_HD_CAPS 0x4000	/* 1000X HD capable */
+#define IEEE_ESR_1000X_FD_CAPS 0x8000	/* 1000X FD capable */
+
+#define PHY_TX_POLARITY_MASK   0x0100	/* register 10h bit 8 (polarity bit) */
+#define PHY_TX_NORMAL_POLARITY 0	/* register 10h bit 8 (normal polarity) */
+
+#define AUTO_POLARITY_DISABLE  0x0010	/* register 11h bit 4 */
+				      /* (0=enable, 1=disable) */
+
+/* M88E1000 PHY Specific Control Register */
+#define M88E1000_PSCR_JABBER_DISABLE    0x0001	/* 1=Jabber Function disabled */
+#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002	/* 1=Polarity Reversal enabled */
+#define M88E1000_PSCR_SQE_TEST          0x0004	/* 1=SQE Test enabled */
+#define M88E1000_PSCR_CLK125_DISABLE    0x0010	/* 1=CLK125 low,
+						 * 0=CLK125 toggling
+						 */
+#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000	/* MDI Crossover Mode bits 6:5 */
+					       /* Manual MDI configuration */
+#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020	/* Manual MDIX configuration */
+#define M88E1000_PSCR_AUTO_X_1000T     0x0040	/* 1000BASE-T: Auto crossover,
+						 *  100BASE-TX/10BASE-T:
+						 *  MDI Mode
+						 */
+#define M88E1000_PSCR_AUTO_X_MODE      0x0060	/* Auto crossover enabled
+						 * all speeds.
+						 */
+#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE 0x0080
+					/* 1=Enable Extended 10BASE-T distance
+					 * (Lower 10BASE-T RX Threshold)
+					 * 0=Normal 10BASE-T RX Threshold */
+#define M88E1000_PSCR_MII_5BIT_ENABLE      0x0100
+					/* 1=5-Bit interface in 100BASE-TX
+					 * 0=MII interface in 100BASE-TX */
+#define M88E1000_PSCR_SCRAMBLER_DISABLE    0x0200	/* 1=Scrambler disable */
+#define M88E1000_PSCR_FORCE_LINK_GOOD      0x0400	/* 1=Force link good */
+#define M88E1000_PSCR_ASSERT_CRS_ON_TX     0x0800	/* 1=Assert CRS on Transmit */
+
+#define M88E1000_PSCR_POLARITY_REVERSAL_SHIFT    1
+#define M88E1000_PSCR_AUTO_X_MODE_SHIFT          5
+#define M88E1000_PSCR_10BT_EXT_DIST_ENABLE_SHIFT 7
+
+/* M88E1000 PHY Specific Status Register */
+#define M88E1000_PSSR_JABBER             0x0001	/* 1=Jabber */
+#define M88E1000_PSSR_REV_POLARITY       0x0002	/* 1=Polarity reversed */
+#define M88E1000_PSSR_DOWNSHIFT          0x0020	/* 1=Downshifted */
+#define M88E1000_PSSR_MDIX               0x0040	/* 1=MDIX; 0=MDI */
+#define M88E1000_PSSR_CABLE_LENGTH       0x0380	/* 0=<50M;1=50-80M;2=80-110M;
+						 * 3=110-140M;4=>140M */
+#define M88E1000_PSSR_LINK               0x0400	/* 1=Link up, 0=Link down */
+#define M88E1000_PSSR_SPD_DPLX_RESOLVED  0x0800	/* 1=Speed & Duplex resolved */
+#define M88E1000_PSSR_PAGE_RCVD          0x1000	/* 1=Page received */
+#define M88E1000_PSSR_DPLX               0x2000	/* 1=Duplex 0=Half Duplex */
+#define M88E1000_PSSR_SPEED              0xC000	/* Speed, bits 14:15 */
+#define M88E1000_PSSR_10MBS              0x0000	/* 00=10Mbs */
+#define M88E1000_PSSR_100MBS             0x4000	/* 01=100Mbs */
+#define M88E1000_PSSR_1000MBS            0x8000	/* 10=1000Mbs */
+
+#define M88E1000_PSSR_REV_POLARITY_SHIFT 1
+#define M88E1000_PSSR_DOWNSHIFT_SHIFT    5
+#define M88E1000_PSSR_MDIX_SHIFT         6
+#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
+
+/* M88E1000 Extended PHY Specific Control Register */
+#define M88E1000_EPSCR_FIBER_LOOPBACK 0x4000	/* 1=Fiber loopback */
+#define M88E1000_EPSCR_DOWN_NO_IDLE   0x8000	/* 1=Lost lock detect enabled.
+						 * Will assert lost lock and bring
+						 * link down if idle not seen
+						 * within 1ms in 1000BASE-T
+						 */
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master */
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_2X   0x0400
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_3X   0x0800
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_4X   0x0C00
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the slave */
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_DIS   0x0000
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_2X    0x0200
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_3X    0x0300
+#define M88E1000_EPSCR_TX_CLK_2_5     0x0060	/* 2.5 MHz TX_CLK */
+#define M88E1000_EPSCR_TX_CLK_25      0x0070	/* 25  MHz TX_CLK */
+#define M88E1000_EPSCR_TX_CLK_0       0x0000	/* NO  TX_CLK */
+
+/* M88EC018 Rev 2 specific DownShift settings */
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_1X    0x0000
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_2X    0x0200
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_3X    0x0400
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_4X    0x0600
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_6X    0x0A00
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_7X    0x0C00
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_8X    0x0E00
+
+/* IGP01E1000 Specific Port Config Register - R/W */
+#define IGP01E1000_PSCFR_AUTO_MDIX_PAR_DETECT  0x0010
+#define IGP01E1000_PSCFR_PRE_EN                0x0020
+#define IGP01E1000_PSCFR_SMART_SPEED           0x0080
+#define IGP01E1000_PSCFR_DISABLE_TPLOOPBACK    0x0100
+#define IGP01E1000_PSCFR_DISABLE_JABBER        0x0400
+#define IGP01E1000_PSCFR_DISABLE_TRANSMIT      0x2000
+
+/* IGP01E1000 Specific Port Status Register - R/O */
+#define IGP01E1000_PSSR_AUTONEG_FAILED         0x0001	/* RO LH SC */
+#define IGP01E1000_PSSR_POLARITY_REVERSED      0x0002
+#define IGP01E1000_PSSR_CABLE_LENGTH           0x007C
+#define IGP01E1000_PSSR_FULL_DUPLEX            0x0200
+#define IGP01E1000_PSSR_LINK_UP                0x0400
+#define IGP01E1000_PSSR_MDIX                   0x0800
+#define IGP01E1000_PSSR_SPEED_MASK             0xC000	/* speed bits mask */
+#define IGP01E1000_PSSR_SPEED_10MBPS           0x4000
+#define IGP01E1000_PSSR_SPEED_100MBPS          0x8000
+#define IGP01E1000_PSSR_SPEED_1000MBPS         0xC000
+#define IGP01E1000_PSSR_CABLE_LENGTH_SHIFT     0x0002	/* shift right 2 */
+#define IGP01E1000_PSSR_MDIX_SHIFT             0x000B	/* shift right 11 */
+
+/* IGP01E1000 Specific Port Control Register - R/W */
+#define IGP01E1000_PSCR_TP_LOOPBACK            0x0010
+#define IGP01E1000_PSCR_CORRECT_NC_SCMBLR      0x0200
+#define IGP01E1000_PSCR_TEN_CRS_SELECT         0x0400
+#define IGP01E1000_PSCR_FLIP_CHIP              0x0800
+#define IGP01E1000_PSCR_AUTO_MDIX              0x1000
+#define IGP01E1000_PSCR_FORCE_MDI_MDIX         0x2000	/* 0-MDI, 1-MDIX */
+
+/* IGP01E1000 Specific Port Link Health Register */
+#define IGP01E1000_PLHR_SS_DOWNGRADE           0x8000
+#define IGP01E1000_PLHR_GIG_SCRAMBLER_ERROR    0x4000
+#define IGP01E1000_PLHR_MASTER_FAULT           0x2000
+#define IGP01E1000_PLHR_MASTER_RESOLUTION      0x1000
+#define IGP01E1000_PLHR_GIG_REM_RCVR_NOK       0x0800	/* LH */
+#define IGP01E1000_PLHR_IDLE_ERROR_CNT_OFLOW   0x0400	/* LH */
+#define IGP01E1000_PLHR_DATA_ERR_1             0x0200	/* LH */
+#define IGP01E1000_PLHR_DATA_ERR_0             0x0100
+#define IGP01E1000_PLHR_AUTONEG_FAULT          0x0040
+#define IGP01E1000_PLHR_AUTONEG_ACTIVE         0x0010
+#define IGP01E1000_PLHR_VALID_CHANNEL_D        0x0008
+#define IGP01E1000_PLHR_VALID_CHANNEL_C        0x0004
+#define IGP01E1000_PLHR_VALID_CHANNEL_B        0x0002
+#define IGP01E1000_PLHR_VALID_CHANNEL_A        0x0001
+
+/* IGP01E1000 Channel Quality Register */
+#define IGP01E1000_MSE_CHANNEL_D        0x000F
+#define IGP01E1000_MSE_CHANNEL_C        0x00F0
+#define IGP01E1000_MSE_CHANNEL_B        0x0F00
+#define IGP01E1000_MSE_CHANNEL_A        0xF000
+
+#define IGP02E1000_PM_SPD                         0x0001	/* Smart Power Down */
+#define IGP02E1000_PM_D3_LPLU                     0x0004	/* Enable LPLU in non-D0a modes */
+#define IGP02E1000_PM_D0_LPLU                     0x0002	/* Enable LPLU in D0a mode */
+
+/* IGP01E1000 DSP reset macros */
+#define DSP_RESET_ENABLE     0x0
+#define DSP_RESET_DISABLE    0x2
+#define E1000_MAX_DSP_RESETS 10
+
+/* IGP01E1000 & IGP02E1000 AGC Registers */
+
+#define IGP01E1000_AGC_LENGTH_SHIFT 7	/* Coarse - 13:11, Fine - 10:7 */
+#define IGP02E1000_AGC_LENGTH_SHIFT 9	/* Coarse - 15:13, Fine - 12:9 */
+
+/* IGP02E1000 AGC Register Length 9-bit mask */
+#define IGP02E1000_AGC_LENGTH_MASK  0x7F
+
+/* 7 bits (3 Coarse + 4 Fine) --> 128 optional values */
+#define IGP01E1000_AGC_LENGTH_TABLE_SIZE 128
+#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 113
+
+/* The precision error of the cable length is +/- 10 meters */
+#define IGP01E1000_AGC_RANGE    10
+#define IGP02E1000_AGC_RANGE    15
+
+/* IGP01E1000 PCS Initialization register */
+/* bits 3:6 in the PCS registers stores the channels polarity */
+#define IGP01E1000_PHY_POLARITY_MASK    0x0078
+
+/* IGP01E1000 GMII FIFO Register */
+#define IGP01E1000_GMII_FLEX_SPD               0x10	/* Enable flexible speed
+							 * on Link-Up */
+#define IGP01E1000_GMII_SPD                    0x20	/* Enable SPD */
+
+/* IGP01E1000 Analog Register */
+#define IGP01E1000_ANALOG_SPARE_FUSE_STATUS       0x20D1
+#define IGP01E1000_ANALOG_FUSE_STATUS             0x20D0
+#define IGP01E1000_ANALOG_FUSE_CONTROL            0x20DC
+#define IGP01E1000_ANALOG_FUSE_BYPASS             0x20DE
+
+#define IGP01E1000_ANALOG_FUSE_POLY_MASK            0xF000
+#define IGP01E1000_ANALOG_FUSE_FINE_MASK            0x0F80
+#define IGP01E1000_ANALOG_FUSE_COARSE_MASK          0x0070
+#define IGP01E1000_ANALOG_SPARE_FUSE_ENABLED        0x0100
+#define IGP01E1000_ANALOG_FUSE_ENABLE_SW_CONTROL    0x0002
+
+#define IGP01E1000_ANALOG_FUSE_COARSE_THRESH        0x0040
+#define IGP01E1000_ANALOG_FUSE_COARSE_10            0x0010
+#define IGP01E1000_ANALOG_FUSE_FINE_1               0x0080
+#define IGP01E1000_ANALOG_FUSE_FINE_10              0x0500
+
+/* Bit definitions for valid PHY IDs. */
+/* I = Integrated
+ * E = External
+ */
+#define M88_VENDOR         0x0141
+#define M88E1000_E_PHY_ID  0x01410C50
+#define M88E1000_I_PHY_ID  0x01410C30
+#define M88E1011_I_PHY_ID  0x01410C20
+#define IGP01E1000_I_PHY_ID  0x02A80380
+#define M88E1000_12_PHY_ID M88E1000_E_PHY_ID
+#define M88E1000_14_PHY_ID M88E1000_E_PHY_ID
+#define M88E1011_I_REV_4   0x04
+#define M88E1111_I_PHY_ID  0x01410CC0
+#define M88E1118_E_PHY_ID  0x01410E40
+#define L1LXT971A_PHY_ID   0x001378E0
+
+#define RTL8211B_PHY_ID    0x001CC910
+#define RTL8201N_PHY_ID    0x8200
+#define RTL_PHY_CTRL_FD    0x0100 /* Full duplex.0=half; 1=full */
+#define RTL_PHY_CTRL_SPD_100    0x200000 /* Force 100Mb */
+
+/* Bits...
+ * 15-5: page
+ * 4-0: register offset
+ */
+#define PHY_PAGE_SHIFT        5
+#define PHY_REG(page, reg)    \
+        (((page) << PHY_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
+
+#define IGP3_PHY_PORT_CTRL           \
+        PHY_REG(769, 17)	/* Port General Configuration */
+#define IGP3_PHY_RATE_ADAPT_CTRL \
+        PHY_REG(769, 25)	/* Rate Adapter Control Register */
+
+#define IGP3_KMRN_FIFO_CTRL_STATS \
+        PHY_REG(770, 16)	/* KMRN FIFO's control/status register */
+#define IGP3_KMRN_POWER_MNG_CTRL \
+        PHY_REG(770, 17)	/* KMRN Power Management Control Register */
+#define IGP3_KMRN_INBAND_CTRL \
+        PHY_REG(770, 18)	/* KMRN Inband Control Register */
+#define IGP3_KMRN_DIAG \
+        PHY_REG(770, 19)	/* KMRN Diagnostic register */
+#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS 0x0002	/* RX PCS is not synced */
+#define IGP3_KMRN_ACK_TIMEOUT \
+        PHY_REG(770, 20)	/* KMRN Acknowledge Timeouts register */
+
+#define IGP3_VR_CTRL \
+        PHY_REG(776, 18)	/* Voltage regulator control register */
+#define IGP3_VR_CTRL_MODE_SHUT       0x0200	/* Enter powerdown, shutdown VRs */
+#define IGP3_VR_CTRL_MODE_MASK       0x0300	/* Shutdown VR Mask */
+
+#define IGP3_CAPABILITY \
+        PHY_REG(776, 19)	/* IGP3 Capability Register */
+
+/* Capabilities for SKU Control  */
+#define IGP3_CAP_INITIATE_TEAM       0x0001	/* Able to initiate a team */
+#define IGP3_CAP_WFM                 0x0002	/* Support WoL and PXE */
+#define IGP3_CAP_ASF                 0x0004	/* Support ASF */
+#define IGP3_CAP_LPLU                0x0008	/* Support Low Power Link Up */
+#define IGP3_CAP_DC_AUTO_SPEED       0x0010	/* Support AC/DC Auto Link Speed */
+#define IGP3_CAP_SPD                 0x0020	/* Support Smart Power Down */
+#define IGP3_CAP_MULT_QUEUE          0x0040	/* Support 2 tx & 2 rx queues */
+#define IGP3_CAP_RSS                 0x0080	/* Support RSS */
+#define IGP3_CAP_8021PQ              0x0100	/* Support 802.1Q & 802.1p */
+#define IGP3_CAP_AMT_CB              0x0200	/* Support active manageability and circuit breaker */
+
+#define IGP3_PPC_JORDAN_EN           0x0001
+#define IGP3_PPC_JORDAN_GIGA_SPEED   0x0002
+
+#define IGP3_KMRN_PMC_EE_IDLE_LINK_DIS         0x0001
+#define IGP3_KMRN_PMC_K0S_ENTRY_LATENCY_MASK   0x001E
+#define IGP3_KMRN_PMC_K0S_MODE1_EN_GIGA        0x0020
+#define IGP3_KMRN_PMC_K0S_MODE1_EN_100         0x0040
+
+#define IGP3E1000_PHY_MISC_CTRL                0x1B	/* Misc. Ctrl register */
+#define IGP3_PHY_MISC_DUPLEX_MANUAL_SET        0x1000	/* Duplex Manual Set */
+
+#define IGP3_KMRN_EXT_CTRL  PHY_REG(770, 18)
+#define IGP3_KMRN_EC_DIS_INBAND    0x0080
+
+#define IGP03E1000_E_PHY_ID  0x02A80390
+#define IFE_E_PHY_ID         0x02A80330	/* 10/100 PHY */
+#define IFE_PLUS_E_PHY_ID    0x02A80320
+#define IFE_C_E_PHY_ID       0x02A80310
+
+#define IFE_PHY_EXTENDED_STATUS_CONTROL   0x10	/* 100BaseTx Extended Status, Control and Address */
+#define IFE_PHY_SPECIAL_CONTROL           0x11	/* 100BaseTx PHY special control register */
+#define IFE_PHY_RCV_FALSE_CARRIER         0x13	/* 100BaseTx Receive False Carrier Counter */
+#define IFE_PHY_RCV_DISCONNECT            0x14	/* 100BaseTx Receive Disconnect Counter */
+#define IFE_PHY_RCV_ERROT_FRAME           0x15	/* 100BaseTx Receive Error Frame Counter */
+#define IFE_PHY_RCV_SYMBOL_ERR            0x16	/* Receive Symbol Error Counter */
+#define IFE_PHY_PREM_EOF_ERR              0x17	/* 100BaseTx Receive Premature End Of Frame Error Counter */
+#define IFE_PHY_RCV_EOF_ERR               0x18	/* 10BaseT Receive End Of Frame Error Counter */
+#define IFE_PHY_TX_JABBER_DETECT          0x19	/* 10BaseT Transmit Jabber Detect Counter */
+#define IFE_PHY_EQUALIZER                 0x1A	/* PHY Equalizer Control and Status */
+#define IFE_PHY_SPECIAL_CONTROL_LED       0x1B	/* PHY special control and LED configuration */
+#define IFE_PHY_MDIX_CONTROL              0x1C	/* MDI/MDI-X Control register */
+#define IFE_PHY_HWI_CONTROL               0x1D	/* Hardware Integrity Control (HWI) */
+
+#define IFE_PESC_REDUCED_POWER_DOWN_DISABLE  0x2000	/* Default 1 = Disable auto reduced power down */
+#define IFE_PESC_100BTX_POWER_DOWN           0x0400	/* Indicates the power state of 100BASE-TX */
+#define IFE_PESC_10BTX_POWER_DOWN            0x0200	/* Indicates the power state of 10BASE-T */
+#define IFE_PESC_POLARITY_REVERSED           0x0100	/* Indicates 10BASE-T polarity */
+#define IFE_PESC_PHY_ADDR_MASK               0x007C	/* Bit 6:2 for sampled PHY address */
+#define IFE_PESC_SPEED                       0x0002	/* Auto-negotiation speed result 1=100Mbs, 0=10Mbs */
+#define IFE_PESC_DUPLEX                      0x0001	/* Auto-negotiation duplex result 1=Full, 0=Half */
+#define IFE_PESC_POLARITY_REVERSED_SHIFT     8
+
+#define IFE_PSC_DISABLE_DYNAMIC_POWER_DOWN   0x0100	/* 1 = Dynamic Power Down disabled */
+#define IFE_PSC_FORCE_POLARITY               0x0020	/* 1=Reversed Polarity, 0=Normal */
+#define IFE_PSC_AUTO_POLARITY_DISABLE        0x0010	/* 1=Auto Polarity Disabled, 0=Enabled */
+#define IFE_PSC_JABBER_FUNC_DISABLE          0x0001	/* 1=Jabber Disabled, 0=Normal Jabber Operation */
+#define IFE_PSC_FORCE_POLARITY_SHIFT         5
+#define IFE_PSC_AUTO_POLARITY_DISABLE_SHIFT  4
+
+#define IFE_PMC_AUTO_MDIX                    0x0080	/* 1=enable MDI/MDI-X feature, default 0=disabled */
+#define IFE_PMC_FORCE_MDIX                   0x0040	/* 1=force MDIX-X, 0=force MDI */
+#define IFE_PMC_MDIX_STATUS                  0x0020	/* 1=MDI-X, 0=MDI */
+#define IFE_PMC_AUTO_MDIX_COMPLETE           0x0010	/* Resolution algorithm is completed */
+#define IFE_PMC_MDIX_MODE_SHIFT              6
+#define IFE_PHC_MDIX_RESET_ALL_MASK          0x0000	/* Disable auto MDI-X */
+
+#define IFE_PHC_HWI_ENABLE                   0x8000	/* Enable the HWI feature */
+#define IFE_PHC_ABILITY_CHECK                0x4000	/* 1= Test Passed, 0=failed */
+#define IFE_PHC_TEST_EXEC                    0x2000	/* PHY launch test pulses on the wire */
+#define IFE_PHC_HIGHZ                        0x0200	/* 1 = Open Circuit */
+#define IFE_PHC_LOWZ                         0x0400	/* 1 = Short Circuit */
+#define IFE_PHC_LOW_HIGH_Z_MASK              0x0600	/* Mask for indication type of problem on the line */
+#define IFE_PHC_DISTANCE_MASK                0x01FF	/* Mask for distance to the cable problem, in 80cm granularity */
+#define IFE_PHC_RESET_ALL_MASK               0x0000	/* Disable HWI */
+#define IFE_PSCL_PROBE_MODE                  0x0020	/* LED Probe mode */
+#define IFE_PSCL_PROBE_LEDS_OFF              0x0006	/* Force LEDs 0 and 2 off */
+#define IFE_PSCL_PROBE_LEDS_ON               0x0007	/* Force LEDs 0 and 2 on */
+
+#define ICH_FLASH_COMMAND_TIMEOUT            5000	/* 5000 uSecs - adjusted */
+#define ICH_FLASH_ERASE_TIMEOUT              3000000	/* Up to 3 seconds - worst case */
+#define ICH_FLASH_CYCLE_REPEAT_COUNT         10	/* 10 cycles */
+#define ICH_FLASH_SEG_SIZE_256               256
+#define ICH_FLASH_SEG_SIZE_4K                4096
+#define ICH_FLASH_SEG_SIZE_64K               65536
+
+#define ICH_CYCLE_READ                       0x0
+#define ICH_CYCLE_RESERVED                   0x1
+#define ICH_CYCLE_WRITE                      0x2
+#define ICH_CYCLE_ERASE                      0x3
+
+#define ICH_FLASH_GFPREG   0x0000
+#define ICH_FLASH_HSFSTS   0x0004
+#define ICH_FLASH_HSFCTL   0x0006
+#define ICH_FLASH_FADDR    0x0008
+#define ICH_FLASH_FDATA0   0x0010
+#define ICH_FLASH_FRACC    0x0050
+#define ICH_FLASH_FREG0    0x0054
+#define ICH_FLASH_FREG1    0x0058
+#define ICH_FLASH_FREG2    0x005C
+#define ICH_FLASH_FREG3    0x0060
+#define ICH_FLASH_FPR0     0x0074
+#define ICH_FLASH_FPR1     0x0078
+#define ICH_FLASH_SSFSTS   0x0090
+#define ICH_FLASH_SSFCTL   0x0092
+#define ICH_FLASH_PREOP    0x0094
+#define ICH_FLASH_OPTYPE   0x0096
+#define ICH_FLASH_OPMENU   0x0098
+
+#define ICH_FLASH_REG_MAPSIZE      0x00A0
+#define ICH_FLASH_SECTOR_SIZE      4096
+#define ICH_GFPREG_BASE_MASK       0x1FFF
+#define ICH_FLASH_LINEAR_ADDR_MASK 0x00FFFFFF
+
+/* Miscellaneous PHY bit definitions. */
+#define PHY_PREAMBLE        0xFFFFFFFF
+#define PHY_SOF             0x01
+#define PHY_OP_READ         0x02
+#define PHY_OP_WRITE        0x01
+#define PHY_TURNAROUND      0x02
+#define PHY_PREAMBLE_SIZE   32
+#define MII_CR_SPEED_1000   0x0040
+#define MII_CR_SPEED_100    0x2000
+#define MII_CR_SPEED_10     0x0000
+#define E1000_PHY_ADDRESS   0x01
+#define PHY_AUTO_NEG_TIME   45	/* 4.5 Seconds */
+#define PHY_FORCE_TIME      20	/* 2.0 Seconds */
+#define PHY_REVISION_MASK   0xFFFFFFF0
+#define DEVICE_SPEED_MASK   0x00000300	/* Device Ctrl Reg Speed Mask */
+#define REG4_SPEED_MASK     0x01E0
+#define REG9_SPEED_MASK     0x0300
+#define ADVERTISE_10_HALF   0x0001
+#define ADVERTISE_10_FULL   0x0002
+#define ADVERTISE_100_HALF  0x0004
+#define ADVERTISE_100_FULL  0x0008
+#define ADVERTISE_1000_HALF 0x0010
+#define ADVERTISE_1000_FULL 0x0020
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT 0x002F	/* Everything but 1000-Half */
+#define AUTONEG_ADVERTISE_10_100_ALL    0x000F	/* All 10/100 speeds */
+#define AUTONEG_ADVERTISE_10_ALL        0x0003	/* 10Mbps Full & Half speeds */
+
+#endif /* _E1000_HW_H_ */
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
new file mode 100644
index 0000000000..da6e303ad9
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -0,0 +1,5313 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+#include "e1000.h"
+#include <net/ip6_checksum.h>
+#include <linux/io.h>
+#include <linux/prefetch.h>
+#include <linux/bitops.h>
+#include <linux/if_vlan.h>
+
+char e1000_driver_name[] = "e1000";
+static char e1000_driver_string[] = "Intel(R) PRO/1000 Network Driver";
+static const char e1000_copyright[] = "Copyright (c) 1999-2006 Intel Corporation.";
+
+/* e1000_pci_tbl - PCI Device ID Table
+ *
+ * Last entry must be all 0s
+ *
+ * Macro expands to...
+ *   {PCI_DEVICE(PCI_VENDOR_ID_INTEL, device_id)}
+ */
+static const struct pci_device_id e1000_pci_tbl[] = {
+	INTEL_E1000_ETHERNET_DEVICE(0x1000),
+	INTEL_E1000_ETHERNET_DEVICE(0x1001),
+	INTEL_E1000_ETHERNET_DEVICE(0x1004),
+	INTEL_E1000_ETHERNET_DEVICE(0x1008),
+	INTEL_E1000_ETHERNET_DEVICE(0x1009),
+	INTEL_E1000_ETHERNET_DEVICE(0x100C),
+	INTEL_E1000_ETHERNET_DEVICE(0x100D),
+	INTEL_E1000_ETHERNET_DEVICE(0x100E),
+	INTEL_E1000_ETHERNET_DEVICE(0x100F),
+	INTEL_E1000_ETHERNET_DEVICE(0x1010),
+	INTEL_E1000_ETHERNET_DEVICE(0x1011),
+	INTEL_E1000_ETHERNET_DEVICE(0x1012),
+	INTEL_E1000_ETHERNET_DEVICE(0x1013),
+	INTEL_E1000_ETHERNET_DEVICE(0x1014),
+	INTEL_E1000_ETHERNET_DEVICE(0x1015),
+	INTEL_E1000_ETHERNET_DEVICE(0x1016),
+	INTEL_E1000_ETHERNET_DEVICE(0x1017),
+	INTEL_E1000_ETHERNET_DEVICE(0x1018),
+	INTEL_E1000_ETHERNET_DEVICE(0x1019),
+	INTEL_E1000_ETHERNET_DEVICE(0x101A),
+	INTEL_E1000_ETHERNET_DEVICE(0x101D),
+	INTEL_E1000_ETHERNET_DEVICE(0x101E),
+	INTEL_E1000_ETHERNET_DEVICE(0x1026),
+	INTEL_E1000_ETHERNET_DEVICE(0x1027),
+	INTEL_E1000_ETHERNET_DEVICE(0x1028),
+	INTEL_E1000_ETHERNET_DEVICE(0x1075),
+	INTEL_E1000_ETHERNET_DEVICE(0x1076),
+	INTEL_E1000_ETHERNET_DEVICE(0x1077),
+	INTEL_E1000_ETHERNET_DEVICE(0x1078),
+	INTEL_E1000_ETHERNET_DEVICE(0x1079),
+	INTEL_E1000_ETHERNET_DEVICE(0x107A),
+	INTEL_E1000_ETHERNET_DEVICE(0x107B),
+	INTEL_E1000_ETHERNET_DEVICE(0x107C),
+	INTEL_E1000_ETHERNET_DEVICE(0x108A),
+	INTEL_E1000_ETHERNET_DEVICE(0x1099),
+	INTEL_E1000_ETHERNET_DEVICE(0x10B5),
+	INTEL_E1000_ETHERNET_DEVICE(0x2E6E),
+	/* required last entry */
+	{0,}
+};
+
+MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
+
+int e1000_up(struct e1000_adapter *adapter);
+void e1000_down(struct e1000_adapter *adapter);
+void e1000_reinit_locked(struct e1000_adapter *adapter);
+void e1000_reset(struct e1000_adapter *adapter);
+int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
+int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
+void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
+void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
+static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
+				    struct e1000_tx_ring *txdr);
+static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
+				    struct e1000_rx_ring *rxdr);
+static void e1000_free_tx_resources(struct e1000_adapter *adapter,
+				    struct e1000_tx_ring *tx_ring);
+static void e1000_free_rx_resources(struct e1000_adapter *adapter,
+				    struct e1000_rx_ring *rx_ring);
+void e1000_update_stats(struct e1000_adapter *adapter);
+
+static int e1000_init_module(void);
+static void e1000_exit_module(void);
+static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+static void e1000_remove(struct pci_dev *pdev);
+static int e1000_alloc_queues(struct e1000_adapter *adapter);
+static int e1000_sw_init(struct e1000_adapter *adapter);
+int e1000_open(struct net_device *netdev);
+int e1000_close(struct net_device *netdev);
+static void e1000_configure_tx(struct e1000_adapter *adapter);
+static void e1000_configure_rx(struct e1000_adapter *adapter);
+static void e1000_setup_rctl(struct e1000_adapter *adapter);
+static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter);
+static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter);
+static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
+				struct e1000_tx_ring *tx_ring);
+static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
+				struct e1000_rx_ring *rx_ring);
+static void e1000_set_rx_mode(struct net_device *netdev);
+static void e1000_update_phy_info_task(struct work_struct *work);
+static void e1000_watchdog(struct work_struct *work);
+static void e1000_82547_tx_fifo_stall_task(struct work_struct *work);
+static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+				    struct net_device *netdev);
+static int e1000_change_mtu(struct net_device *netdev, int new_mtu);
+static int e1000_set_mac(struct net_device *netdev, void *p);
+static irqreturn_t e1000_intr(int irq, void *data);
+static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
+			       struct e1000_tx_ring *tx_ring);
+static int e1000_clean(struct napi_struct *napi, int budget);
+static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
+			       struct e1000_rx_ring *rx_ring,
+			       int *work_done, int work_to_do);
+static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
+				     struct e1000_rx_ring *rx_ring,
+				     int *work_done, int work_to_do);
+static void e1000_alloc_dummy_rx_buffers(struct e1000_adapter *adapter,
+					 struct e1000_rx_ring *rx_ring,
+					 int cleaned_count)
+{
+}
+static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
+				   struct e1000_rx_ring *rx_ring,
+				   int cleaned_count);
+static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
+					 struct e1000_rx_ring *rx_ring,
+					 int cleaned_count);
+static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
+static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
+			   int cmd);
+static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
+static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
+static void e1000_tx_timeout(struct net_device *dev, unsigned int txqueue);
+static void e1000_reset_task(struct work_struct *work);
+static void e1000_smartspeed(struct e1000_adapter *adapter);
+static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
+				       struct sk_buff *skb);
+
+static bool e1000_vlan_used(struct e1000_adapter *adapter);
+static void e1000_vlan_mode(struct net_device *netdev,
+			    netdev_features_t features);
+static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
+				     bool filter_on);
+static int e1000_vlan_rx_add_vid(struct net_device *netdev,
+				 __be16 proto, u16 vid);
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev,
+				  __be16 proto, u16 vid);
+static void e1000_restore_vlan(struct e1000_adapter *adapter);
+
+static int __maybe_unused e1000_suspend(struct device *dev);
+static int __maybe_unused e1000_resume(struct device *dev);
+static void e1000_shutdown(struct pci_dev *pdev);
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* for netdump / net console */
+static void e1000_netpoll (struct net_device *netdev);
+#endif
+
+#define COPYBREAK_DEFAULT 256
+static unsigned int copybreak __read_mostly = COPYBREAK_DEFAULT;
+module_param(copybreak, uint, 0644);
+MODULE_PARM_DESC(copybreak,
+	"Maximum size of packet that is copied to a new buffer on receive");
+
+static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t state);
+static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev);
+static void e1000_io_resume(struct pci_dev *pdev);
+
+static const struct pci_error_handlers e1000_err_handler = {
+	.error_detected = e1000_io_error_detected,
+	.slot_reset = e1000_io_slot_reset,
+	.resume = e1000_io_resume,
+};
+
+static SIMPLE_DEV_PM_OPS(e1000_pm_ops, e1000_suspend, e1000_resume);
+
+static struct pci_driver e1000_driver = {
+	.name     = e1000_driver_name,
+	.id_table = e1000_pci_tbl,
+	.probe    = e1000_probe,
+	.remove   = e1000_remove,
+	.driver = {
+		.pm = &e1000_pm_ops,
+	},
+	.shutdown = e1000_shutdown,
+	.err_handler = &e1000_err_handler
+};
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
+MODULE_LICENSE("GPL v2");
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+/**
+ * e1000_get_hw_dev - helper function for getting netdev
+ * @hw: pointer to HW struct
+ *
+ * return device used by hardware layer to print debugging information
+ *
+ **/
+struct net_device *e1000_get_hw_dev(struct e1000_hw *hw)
+{
+	struct e1000_adapter *adapter = hw->back;
+	return adapter->netdev;
+}
+
+/**
+ * e1000_init_module - Driver Registration Routine
+ *
+ * e1000_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init e1000_init_module(void)
+{
+	int ret;
+	pr_info("%s\n", e1000_driver_string);
+
+	pr_info("%s\n", e1000_copyright);
+
+	ret = pci_register_driver(&e1000_driver);
+	if (copybreak != COPYBREAK_DEFAULT) {
+		if (copybreak == 0)
+			pr_info("copybreak disabled\n");
+		else
+			pr_info("copybreak enabled for "
+				   "packets <= %u bytes\n", copybreak);
+	}
+	return ret;
+}
+
+module_init(e1000_init_module);
+
+/**
+ * e1000_exit_module - Driver Exit Cleanup Routine
+ *
+ * e1000_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit e1000_exit_module(void)
+{
+	pci_unregister_driver(&e1000_driver);
+}
+
+module_exit(e1000_exit_module);
+
+static int e1000_request_irq(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	irq_handler_t handler = e1000_intr;
+	int irq_flags = IRQF_SHARED;
+	int err;
+
+	err = request_irq(adapter->pdev->irq, handler, irq_flags, netdev->name,
+			  netdev);
+	if (err) {
+		e_err(probe, "Unable to allocate interrupt Error: %d\n", err);
+	}
+
+	return err;
+}
+
+static void e1000_free_irq(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	free_irq(adapter->pdev->irq, netdev);
+}
+
+/**
+ * e1000_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void e1000_irq_disable(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	ew32(IMC, ~0);
+	E1000_WRITE_FLUSH();
+	synchronize_irq(adapter->pdev->irq);
+}
+
+/**
+ * e1000_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static void e1000_irq_enable(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	ew32(IMS, IMS_ENABLE_MASK);
+	E1000_WRITE_FLUSH();
+}
+
+static void e1000_update_mng_vlan(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u16 vid = hw->mng_cookie.vlan_id;
+	u16 old_vid = adapter->mng_vlan_id;
+
+	if (!e1000_vlan_used(adapter))
+		return;
+
+	if (!test_bit(vid, adapter->active_vlans)) {
+		if (hw->mng_cookie.status &
+		    E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) {
+			e1000_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
+			adapter->mng_vlan_id = vid;
+		} else {
+			adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
+		}
+		if ((old_vid != (u16)E1000_MNG_VLAN_NONE) &&
+		    (vid != old_vid) &&
+		    !test_bit(old_vid, adapter->active_vlans))
+			e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q),
+					       old_vid);
+	} else {
+		adapter->mng_vlan_id = vid;
+	}
+}
+
+static void e1000_init_manageability(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (adapter->en_mng_pt) {
+		u32 manc = er32(MANC);
+
+		/* disable hardware interception of ARP */
+		manc &= ~(E1000_MANC_ARP_EN);
+
+		ew32(MANC, manc);
+	}
+}
+
+static void e1000_release_manageability(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (adapter->en_mng_pt) {
+		u32 manc = er32(MANC);
+
+		/* re-enable hardware interception of ARP */
+		manc |= E1000_MANC_ARP_EN;
+
+		ew32(MANC, manc);
+	}
+}
+
+/**
+ * e1000_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ **/
+static void e1000_configure(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	e1000_set_rx_mode(netdev);
+
+	e1000_restore_vlan(adapter);
+	e1000_init_manageability(adapter);
+
+	e1000_configure_tx(adapter);
+	e1000_setup_rctl(adapter);
+	e1000_configure_rx(adapter);
+	/* call E1000_DESC_UNUSED which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct e1000_rx_ring *ring = &adapter->rx_ring[i];
+		adapter->alloc_rx_buf(adapter, ring,
+				      E1000_DESC_UNUSED(ring));
+	}
+}
+
+int e1000_up(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* hardware has been reset, we need to reload some things */
+	e1000_configure(adapter);
+
+	clear_bit(__E1000_DOWN, &adapter->flags);
+
+	napi_enable(&adapter->napi);
+
+	e1000_irq_enable(adapter);
+
+	netif_wake_queue(adapter->netdev);
+
+	/* fire a link change interrupt to start the watchdog */
+	ew32(ICS, E1000_ICS_LSC);
+	return 0;
+}
+
+/**
+ * e1000_power_up_phy - restore link in case the phy was powered down
+ * @adapter: address of board private structure
+ *
+ * The phy may be powered down to save power and turn off link when the
+ * driver is unloaded and wake on lan is not enabled (among others)
+ * *** this routine MUST be followed by a call to e1000_reset ***
+ **/
+void e1000_power_up_phy(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 mii_reg = 0;
+
+	/* Just clear the power down bit to wake the phy back up */
+	if (hw->media_type == e1000_media_type_copper) {
+		/* according to the manual, the phy will retain its
+		 * settings across a power-down/up cycle
+		 */
+		e1000_read_phy_reg(hw, PHY_CTRL, &mii_reg);
+		mii_reg &= ~MII_CR_POWER_DOWN;
+		e1000_write_phy_reg(hw, PHY_CTRL, mii_reg);
+	}
+}
+
+static void e1000_power_down_phy(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Power down the PHY so no link is implied when interface is down *
+	 * The PHY cannot be powered down if any of the following is true *
+	 * (a) WoL is enabled
+	 * (b) AMT is active
+	 * (c) SoL/IDER session is active
+	 */
+	if (!adapter->wol && hw->mac_type >= e1000_82540 &&
+	   hw->media_type == e1000_media_type_copper) {
+		u16 mii_reg = 0;
+
+		switch (hw->mac_type) {
+		case e1000_82540:
+		case e1000_82545:
+		case e1000_82545_rev_3:
+		case e1000_82546:
+		case e1000_ce4100:
+		case e1000_82546_rev_3:
+		case e1000_82541:
+		case e1000_82541_rev_2:
+		case e1000_82547:
+		case e1000_82547_rev_2:
+			if (er32(MANC) & E1000_MANC_SMBUS_EN)
+				goto out;
+			break;
+		default:
+			goto out;
+		}
+		e1000_read_phy_reg(hw, PHY_CTRL, &mii_reg);
+		mii_reg |= MII_CR_POWER_DOWN;
+		e1000_write_phy_reg(hw, PHY_CTRL, mii_reg);
+		msleep(1);
+	}
+out:
+	return;
+}
+
+static void e1000_down_and_stop(struct e1000_adapter *adapter)
+{
+	set_bit(__E1000_DOWN, &adapter->flags);
+
+	cancel_delayed_work_sync(&adapter->watchdog_task);
+
+	/*
+	 * Since the watchdog task can reschedule other tasks, we should cancel
+	 * it first, otherwise we can run into the situation when a work is
+	 * still running after the adapter has been turned down.
+	 */
+
+	cancel_delayed_work_sync(&adapter->phy_info_task);
+	cancel_delayed_work_sync(&adapter->fifo_stall_task);
+
+	/* Only kill reset task if adapter is not resetting */
+	if (!test_bit(__E1000_RESETTING, &adapter->flags))
+		cancel_work_sync(&adapter->reset_task);
+}
+
+void e1000_down(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 rctl, tctl;
+
+	/* disable receives in the hardware */
+	rctl = er32(RCTL);
+	ew32(RCTL, rctl & ~E1000_RCTL_EN);
+	/* flush and sleep below */
+
+	netif_tx_disable(netdev);
+
+	/* disable transmits in the hardware */
+	tctl = er32(TCTL);
+	tctl &= ~E1000_TCTL_EN;
+	ew32(TCTL, tctl);
+	/* flush both disables and wait for them to finish */
+	E1000_WRITE_FLUSH();
+	msleep(10);
+
+	/* Set the carrier off after transmits have been disabled in the
+	 * hardware, to avoid race conditions with e1000_watchdog() (which
+	 * may be running concurrently to us, checking for the carrier
+	 * bit to decide whether it should enable transmits again). Such
+	 * a race condition would result into transmission being disabled
+	 * in the hardware until the next IFF_DOWN+IFF_UP cycle.
+	 */
+	netif_carrier_off(netdev);
+
+	napi_disable(&adapter->napi);
+
+	e1000_irq_disable(adapter);
+
+	/* Setting DOWN must be after irq_disable to prevent
+	 * a screaming interrupt.  Setting DOWN also prevents
+	 * tasks from rescheduling.
+	 */
+	e1000_down_and_stop(adapter);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+	e1000_reset(adapter);
+	e1000_clean_all_tx_rings(adapter);
+	e1000_clean_all_rx_rings(adapter);
+}
+
+void e1000_reinit_locked(struct e1000_adapter *adapter)
+{
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
+		msleep(1);
+
+	/* only run the task if not already down */
+	if (!test_bit(__E1000_DOWN, &adapter->flags)) {
+		e1000_down(adapter);
+		e1000_up(adapter);
+	}
+
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+}
+
+void e1000_reset(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 pba = 0, tx_space, min_tx_space, min_rx_space;
+	bool legacy_pba_adjust = false;
+	u16 hwm;
+
+	/* Repartition Pba for greater than 9k mtu
+	 * To take effect CTRL.RST is required.
+	 */
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+	case e1000_82544:
+	case e1000_82540:
+	case e1000_82541:
+	case e1000_82541_rev_2:
+		legacy_pba_adjust = true;
+		pba = E1000_PBA_48K;
+		break;
+	case e1000_82545:
+	case e1000_82545_rev_3:
+	case e1000_82546:
+	case e1000_ce4100:
+	case e1000_82546_rev_3:
+		pba = E1000_PBA_48K;
+		break;
+	case e1000_82547:
+	case e1000_82547_rev_2:
+		legacy_pba_adjust = true;
+		pba = E1000_PBA_30K;
+		break;
+	case e1000_undefined:
+	case e1000_num_macs:
+		break;
+	}
+
+	if (legacy_pba_adjust) {
+		if (hw->max_frame_size > E1000_RXBUFFER_8192)
+			pba -= 8; /* allocate more FIFO for Tx */
+
+		if (hw->mac_type == e1000_82547) {
+			adapter->tx_fifo_head = 0;
+			adapter->tx_head_addr = pba << E1000_TX_HEAD_ADDR_SHIFT;
+			adapter->tx_fifo_size =
+				(E1000_PBA_40K - pba) << E1000_PBA_BYTES_SHIFT;
+			atomic_set(&adapter->tx_fifo_stall, 0);
+		}
+	} else if (hw->max_frame_size >  ETH_FRAME_LEN + ETH_FCS_LEN) {
+		/* adjust PBA for jumbo frames */
+		ew32(PBA, pba);
+
+		/* To maintain wire speed transmits, the Tx FIFO should be
+		 * large enough to accommodate two full transmit packets,
+		 * rounded up to the next 1KB and expressed in KB.  Likewise,
+		 * the Rx FIFO should be large enough to accommodate at least
+		 * one full receive packet and is similarly rounded up and
+		 * expressed in KB.
+		 */
+		pba = er32(PBA);
+		/* upper 16 bits has Tx packet buffer allocation size in KB */
+		tx_space = pba >> 16;
+		/* lower 16 bits has Rx packet buffer allocation size in KB */
+		pba &= 0xffff;
+		/* the Tx fifo also stores 16 bytes of information about the Tx
+		 * but don't include ethernet FCS because hardware appends it
+		 */
+		min_tx_space = (hw->max_frame_size +
+				sizeof(struct e1000_tx_desc) -
+				ETH_FCS_LEN) * 2;
+		min_tx_space = ALIGN(min_tx_space, 1024);
+		min_tx_space >>= 10;
+		/* software strips receive CRC, so leave room for it */
+		min_rx_space = hw->max_frame_size;
+		min_rx_space = ALIGN(min_rx_space, 1024);
+		min_rx_space >>= 10;
+
+		/* If current Tx allocation is less than the min Tx FIFO size,
+		 * and the min Tx FIFO size is less than the current Rx FIFO
+		 * allocation, take space away from current Rx allocation
+		 */
+		if (tx_space < min_tx_space &&
+		    ((min_tx_space - tx_space) < pba)) {
+			pba = pba - (min_tx_space - tx_space);
+
+			/* PCI/PCIx hardware has PBA alignment constraints */
+			switch (hw->mac_type) {
+			case e1000_82545 ... e1000_82546_rev_3:
+				pba &= ~(E1000_PBA_8K - 1);
+				break;
+			default:
+				break;
+			}
+
+			/* if short on Rx space, Rx wins and must trump Tx
+			 * adjustment or use Early Receive if available
+			 */
+			if (pba < min_rx_space)
+				pba = min_rx_space;
+		}
+	}
+
+	ew32(PBA, pba);
+
+	/* flow control settings:
+	 * The high water mark must be low enough to fit one full frame
+	 * (or the size used for early receive) above it in the Rx FIFO.
+	 * Set it to the lower of:
+	 * - 90% of the Rx FIFO size, and
+	 * - the full Rx FIFO size minus the early receive size (for parts
+	 *   with ERT support assuming ERT set to E1000_ERT_2048), or
+	 * - the full Rx FIFO size minus one full frame
+	 */
+	hwm = min(((pba << 10) * 9 / 10),
+		  ((pba << 10) - hw->max_frame_size));
+
+	hw->fc_high_water = hwm & 0xFFF8;	/* 8-byte granularity */
+	hw->fc_low_water = hw->fc_high_water - 8;
+	hw->fc_pause_time = E1000_FC_PAUSE_TIME;
+	hw->fc_send_xon = 1;
+	hw->fc = hw->original_fc;
+
+	/* Allow time for pending master requests to run */
+	e1000_reset_hw(hw);
+	if (hw->mac_type >= e1000_82544)
+		ew32(WUC, 0);
+
+	if (e1000_init_hw(hw))
+		e_dev_err("Hardware Error\n");
+	e1000_update_mng_vlan(adapter);
+
+	/* if (adapter->hwflags & HWFLAGS_PHY_PWR_BIT) { */
+	if (hw->mac_type >= e1000_82544 &&
+	    hw->autoneg == 1 &&
+	    hw->autoneg_advertised == ADVERTISE_1000_FULL) {
+		u32 ctrl = er32(CTRL);
+		/* clear phy power management bit if we are in gig only mode,
+		 * which if enabled will attempt negotiation to 100Mb, which
+		 * can cause a loss of link at power off or driver unload
+		 */
+		ctrl &= ~E1000_CTRL_SWDPIN3;
+		ew32(CTRL, ctrl);
+	}
+
+	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
+	ew32(VET, ETHERNET_IEEE_VLAN_TYPE);
+
+	e1000_reset_adaptive(hw);
+	e1000_phy_get_info(hw, &adapter->phy_info);
+
+	e1000_release_manageability(adapter);
+}
+
+/* Dump the eeprom for users having checksum issues */
+static void e1000_dump_eeprom(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ethtool_eeprom eeprom;
+	const struct ethtool_ops *ops = netdev->ethtool_ops;
+	u8 *data;
+	int i;
+	u16 csum_old, csum_new = 0;
+
+	eeprom.len = ops->get_eeprom_len(netdev);
+	eeprom.offset = 0;
+
+	data = kmalloc(eeprom.len, GFP_KERNEL);
+	if (!data)
+		return;
+
+	ops->get_eeprom(netdev, &eeprom, data);
+
+	csum_old = (data[EEPROM_CHECKSUM_REG * 2]) +
+		   (data[EEPROM_CHECKSUM_REG * 2 + 1] << 8);
+	for (i = 0; i < EEPROM_CHECKSUM_REG * 2; i += 2)
+		csum_new += data[i] + (data[i + 1] << 8);
+	csum_new = EEPROM_SUM - csum_new;
+
+	pr_err("/*********************/\n");
+	pr_err("Current EEPROM Checksum : 0x%04x\n", csum_old);
+	pr_err("Calculated              : 0x%04x\n", csum_new);
+
+	pr_err("Offset    Values\n");
+	pr_err("========  ======\n");
+	print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, data, 128, 0);
+
+	pr_err("Include this output when contacting your support provider.\n");
+	pr_err("This is not a software error! Something bad happened to\n");
+	pr_err("your hardware or EEPROM image. Ignoring this problem could\n");
+	pr_err("result in further problems, possibly loss of data,\n");
+	pr_err("corruption or system hangs!\n");
+	pr_err("The MAC Address will be reset to 00:00:00:00:00:00,\n");
+	pr_err("which is invalid and requires you to set the proper MAC\n");
+	pr_err("address manually before continuing to enable this network\n");
+	pr_err("device. Please inspect the EEPROM dump and report the\n");
+	pr_err("issue to your hardware vendor or Intel Customer Support.\n");
+	pr_err("/*********************/\n");
+
+	kfree(data);
+}
+
+/**
+ * e1000_is_need_ioport - determine if an adapter needs ioport resources or not
+ * @pdev: PCI device information struct
+ *
+ * Return true if an adapter needs ioport resources
+ **/
+static int e1000_is_need_ioport(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case E1000_DEV_ID_82540EM:
+	case E1000_DEV_ID_82540EM_LOM:
+	case E1000_DEV_ID_82540EP:
+	case E1000_DEV_ID_82540EP_LOM:
+	case E1000_DEV_ID_82540EP_LP:
+	case E1000_DEV_ID_82541EI:
+	case E1000_DEV_ID_82541EI_MOBILE:
+	case E1000_DEV_ID_82541ER:
+	case E1000_DEV_ID_82541ER_LOM:
+	case E1000_DEV_ID_82541GI:
+	case E1000_DEV_ID_82541GI_LF:
+	case E1000_DEV_ID_82541GI_MOBILE:
+	case E1000_DEV_ID_82544EI_COPPER:
+	case E1000_DEV_ID_82544EI_FIBER:
+	case E1000_DEV_ID_82544GC_COPPER:
+	case E1000_DEV_ID_82544GC_LOM:
+	case E1000_DEV_ID_82545EM_COPPER:
+	case E1000_DEV_ID_82545EM_FIBER:
+	case E1000_DEV_ID_82546EB_COPPER:
+	case E1000_DEV_ID_82546EB_FIBER:
+	case E1000_DEV_ID_82546EB_QUAD_COPPER:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static netdev_features_t e1000_fix_features(struct net_device *netdev,
+	netdev_features_t features)
+{
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
+	return features;
+}
+
+static int e1000_set_features(struct net_device *netdev,
+	netdev_features_t features)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	netdev_features_t changed = features ^ netdev->features;
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+		e1000_vlan_mode(netdev, features);
+
+	if (!(changed & (NETIF_F_RXCSUM | NETIF_F_RXALL)))
+		return 0;
+
+	netdev->features = features;
+	adapter->rx_csum = !!(features & NETIF_F_RXCSUM);
+
+	if (netif_running(netdev))
+		e1000_reinit_locked(adapter);
+	else
+		e1000_reset(adapter);
+
+	return 1;
+}
+
+static const struct net_device_ops e1000_netdev_ops = {
+	.ndo_open		= e1000_open,
+	.ndo_stop		= e1000_close,
+	.ndo_start_xmit		= e1000_xmit_frame,
+	.ndo_set_rx_mode	= e1000_set_rx_mode,
+	.ndo_set_mac_address	= e1000_set_mac,
+	.ndo_tx_timeout		= e1000_tx_timeout,
+	.ndo_change_mtu		= e1000_change_mtu,
+	.ndo_eth_ioctl		= e1000_ioctl,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_vlan_rx_add_vid	= e1000_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= e1000_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= e1000_netpoll,
+#endif
+	.ndo_fix_features	= e1000_fix_features,
+	.ndo_set_features	= e1000_set_features,
+};
+
+/**
+ * e1000_init_hw_struct - initialize members of hw struct
+ * @adapter: board private struct
+ * @hw: structure used by e1000_hw.c
+ *
+ * Factors out initialization of the e1000_hw struct to its own function
+ * that can be called very early at init (just after struct allocation).
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ * Returns negative error codes if MAC type setup fails.
+ */
+static int e1000_init_hw_struct(struct e1000_adapter *adapter,
+				struct e1000_hw *hw)
+{
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_id = pdev->subsystem_device;
+	hw->revision_id = pdev->revision;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->pci_cmd_word);
+
+	hw->max_frame_size = adapter->netdev->mtu +
+			     ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
+	hw->min_frame_size = MINIMUM_ETHERNET_FRAME_SIZE;
+
+	/* identify the MAC */
+	if (e1000_set_mac_type(hw)) {
+		e_err(probe, "Unknown MAC Type\n");
+		return -EIO;
+	}
+
+	switch (hw->mac_type) {
+	default:
+		break;
+	case e1000_82541:
+	case e1000_82547:
+	case e1000_82541_rev_2:
+	case e1000_82547_rev_2:
+		hw->phy_init_script = 1;
+		break;
+	}
+
+	e1000_set_media_type(hw);
+	e1000_get_bus_info(hw);
+
+	hw->wait_autoneg_complete = false;
+	hw->tbi_compatibility_en = true;
+	hw->adaptive_ifs = true;
+
+	/* Copper options */
+
+	if (hw->media_type == e1000_media_type_copper) {
+		hw->mdix = AUTO_ALL_MODES;
+		hw->disable_polarity_correction = false;
+		hw->master_slave = E1000_MASTER_SLAVE;
+	}
+
+	return 0;
+}
+
+/**
+ * e1000_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in e1000_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * e1000_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct e1000_adapter *adapter = NULL;
+	struct e1000_hw *hw;
+
+	static int cards_found;
+	static int global_quad_port_a; /* global ksp3 port a indication */
+	int i, err, pci_using_dac;
+	u16 eeprom_data = 0;
+	u16 tmp = 0;
+	u16 eeprom_apme_mask = E1000_EEPROM_APME;
+	int bars, need_ioport;
+	bool disable_dev = false;
+
+	/* do not allocate ioport bars when not needed */
+	need_ioport = e1000_is_need_ioport(pdev);
+	if (need_ioport) {
+		bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO);
+		err = pci_enable_device(pdev);
+	} else {
+		bars = pci_select_bars(pdev, IORESOURCE_MEM);
+		err = pci_enable_device_mem(pdev);
+	}
+	if (err)
+		return err;
+
+	err = pci_request_selected_regions(pdev, bars, e1000_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_set_master(pdev);
+	err = pci_save_state(pdev);
+	if (err)
+		goto err_alloc_etherdev;
+
+	err = -ENOMEM;
+	netdev = alloc_etherdev(sizeof(struct e1000_adapter));
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+	adapter->bars = bars;
+	adapter->need_ioport = need_ioport;
+
+	hw = &adapter->hw;
+	hw->back = adapter;
+
+	err = -EIO;
+	hw->hw_addr = pci_ioremap_bar(pdev, BAR_0);
+	if (!hw->hw_addr)
+		goto err_ioremap;
+
+	if (adapter->need_ioport) {
+		for (i = BAR_1; i < PCI_STD_NUM_BARS; i++) {
+			if (pci_resource_len(pdev, i) == 0)
+				continue;
+			if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
+				hw->io_base = pci_resource_start(pdev, i);
+				break;
+			}
+		}
+	}
+
+	/* make ready for any if (hw->...) below */
+	err = e1000_init_hw_struct(adapter, hw);
+	if (err)
+		goto err_sw_init;
+
+	/* there is a workaround being applied below that limits
+	 * 64-bit DMA addresses to 64-bit hardware.  There are some
+	 * 32-bit adapters that Tx hang when given 64-bit DMA addresses
+	 */
+	pci_using_dac = 0;
+	if ((hw->bus_type == e1000_bus_type_pcix) &&
+	    !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+		pci_using_dac = 1;
+	} else {
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (err) {
+			pr_err("No usable DMA config, aborting\n");
+			goto err_dma;
+		}
+	}
+
+	netdev->netdev_ops = &e1000_netdev_ops;
+	e1000_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+	netif_napi_add(netdev, &adapter->napi, e1000_clean);
+
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+	adapter->bd_number = cards_found;
+
+	/* setup the private structure */
+
+	err = e1000_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	err = -EIO;
+	if (hw->mac_type == e1000_ce4100) {
+		hw->ce4100_gbe_mdio_base_virt =
+					ioremap(pci_resource_start(pdev, BAR_1),
+						pci_resource_len(pdev, BAR_1));
+
+		if (!hw->ce4100_gbe_mdio_base_virt)
+			goto err_mdio_ioremap;
+	}
+
+	if (hw->mac_type >= e1000_82543) {
+		netdev->hw_features = NETIF_F_SG |
+				   NETIF_F_HW_CSUM |
+				   NETIF_F_HW_VLAN_CTAG_RX;
+		netdev->features = NETIF_F_HW_VLAN_CTAG_TX |
+				   NETIF_F_HW_VLAN_CTAG_FILTER;
+	}
+
+	if ((hw->mac_type >= e1000_82544) &&
+	   (hw->mac_type != e1000_82547))
+		netdev->hw_features |= NETIF_F_TSO;
+
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+	netdev->features |= netdev->hw_features;
+	netdev->hw_features |= (NETIF_F_RXCSUM |
+				NETIF_F_RXALL |
+				NETIF_F_RXFCS);
+
+	if (pci_using_dac) {
+		netdev->features |= NETIF_F_HIGHDMA;
+		netdev->vlan_features |= NETIF_F_HIGHDMA;
+	}
+
+	netdev->vlan_features |= (NETIF_F_TSO |
+				  NETIF_F_HW_CSUM |
+				  NETIF_F_SG);
+
+	/* Do not set IFF_UNICAST_FLT for VMWare's 82545EM */
+	if (hw->device_id != E1000_DEV_ID_82545EM_COPPER ||
+	    hw->subsystem_vendor_id != PCI_VENDOR_ID_VMWARE)
+		netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* MTU range: 46 - 16110 */
+	netdev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	netdev->max_mtu = MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+
+	adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
+
+	/* initialize eeprom parameters */
+	if (e1000_init_eeprom_params(hw)) {
+		e_err(probe, "EEPROM initialization failed\n");
+		goto err_eeprom;
+	}
+
+	/* before reading the EEPROM, reset the controller to
+	 * put the device in a known good starting state
+	 */
+
+	e1000_reset_hw(hw);
+
+	/* make sure the EEPROM is good */
+	if (e1000_validate_eeprom_checksum(hw) < 0) {
+		e_err(probe, "The EEPROM Checksum Is Not Valid\n");
+		e1000_dump_eeprom(adapter);
+		/* set MAC address to all zeroes to invalidate and temporary
+		 * disable this device for the user. This blocks regular
+		 * traffic while still permitting ethtool ioctls from reaching
+		 * the hardware as well as allowing the user to run the
+		 * interface after manually setting a hw addr using
+		 * `ip set address`
+		 */
+		memset(hw->mac_addr, 0, netdev->addr_len);
+	} else {
+		/* copy the MAC address out of the EEPROM */
+		if (e1000_read_mac_addr(hw))
+			e_err(probe, "EEPROM Read Error\n");
+	}
+	/* don't block initialization here due to bad MAC address */
+	eth_hw_addr_set(netdev, hw->mac_addr);
+
+	if (!is_valid_ether_addr(netdev->dev_addr))
+		e_err(probe, "Invalid MAC Address\n");
+
+
+	INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog);
+	INIT_DELAYED_WORK(&adapter->fifo_stall_task,
+			  e1000_82547_tx_fifo_stall_task);
+	INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task);
+	INIT_WORK(&adapter->reset_task, e1000_reset_task);
+
+	e1000_check_options(adapter);
+
+	/* Initial Wake on LAN setting
+	 * If APM wake is enabled in the EEPROM,
+	 * enable the ACPI Magic Packet filter
+	 */
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+	case e1000_82543:
+		break;
+	case e1000_82544:
+		e1000_read_eeprom(hw,
+			EEPROM_INIT_CONTROL2_REG, 1, &eeprom_data);
+		eeprom_apme_mask = E1000_EEPROM_82544_APM;
+		break;
+	case e1000_82546:
+	case e1000_82546_rev_3:
+		if (er32(STATUS) & E1000_STATUS_FUNC_1) {
+			e1000_read_eeprom(hw,
+				EEPROM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
+			break;
+		}
+		fallthrough;
+	default:
+		e1000_read_eeprom(hw,
+			EEPROM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
+		break;
+	}
+	if (eeprom_data & eeprom_apme_mask)
+		adapter->eeprom_wol |= E1000_WUFC_MAG;
+
+	/* now that we have the eeprom settings, apply the special cases
+	 * where the eeprom may be wrong or the board simply won't support
+	 * wake on lan on a particular port
+	 */
+	switch (pdev->device) {
+	case E1000_DEV_ID_82546GB_PCIE:
+		adapter->eeprom_wol = 0;
+		break;
+	case E1000_DEV_ID_82546EB_FIBER:
+	case E1000_DEV_ID_82546GB_FIBER:
+		/* Wake events only supported on port A for dual fiber
+		 * regardless of eeprom setting
+		 */
+		if (er32(STATUS) & E1000_STATUS_FUNC_1)
+			adapter->eeprom_wol = 0;
+		break;
+	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
+		/* if quad port adapter, disable WoL on all but port A */
+		if (global_quad_port_a != 0)
+			adapter->eeprom_wol = 0;
+		else
+			adapter->quad_port_a = true;
+		/* Reset for multiple quad port adapters */
+		if (++global_quad_port_a == 4)
+			global_quad_port_a = 0;
+		break;
+	}
+
+	/* initialize the wol settings based on the eeprom settings */
+	adapter->wol = adapter->eeprom_wol;
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	/* Auto detect PHY address */
+	if (hw->mac_type == e1000_ce4100) {
+		for (i = 0; i < 32; i++) {
+			hw->phy_addr = i;
+			e1000_read_phy_reg(hw, PHY_ID2, &tmp);
+
+			if (tmp != 0 && tmp != 0xFF)
+				break;
+		}
+
+		if (i >= 32)
+			goto err_eeprom;
+	}
+
+	/* reset the hardware with the new settings */
+	e1000_reset(adapter);
+
+	strcpy(netdev->name, "eth%d");
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	e1000_vlan_filter_on_off(adapter, false);
+
+	/* print bus type/speed/width info */
+	e_info(probe, "(PCI%s:%dMHz:%d-bit) %pM\n",
+	       ((hw->bus_type == e1000_bus_type_pcix) ? "-X" : ""),
+	       ((hw->bus_speed == e1000_bus_speed_133) ? 133 :
+		(hw->bus_speed == e1000_bus_speed_120) ? 120 :
+		(hw->bus_speed == e1000_bus_speed_100) ? 100 :
+		(hw->bus_speed == e1000_bus_speed_66) ? 66 : 33),
+	       ((hw->bus_width == e1000_bus_width_64) ? 64 : 32),
+	       netdev->dev_addr);
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	e_info(probe, "Intel(R) PRO/1000 Network Connection\n");
+
+	cards_found++;
+	return 0;
+
+err_register:
+err_eeprom:
+	e1000_phy_hw_reset(hw);
+
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+	kfree(adapter->tx_ring);
+	kfree(adapter->rx_ring);
+err_dma:
+err_sw_init:
+err_mdio_ioremap:
+	iounmap(hw->ce4100_gbe_mdio_base_virt);
+	iounmap(hw->hw_addr);
+err_ioremap:
+	disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_selected_regions(pdev, bars);
+err_pci_reg:
+	if (!adapter || disable_dev)
+		pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * e1000_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * e1000_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device. That could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void e1000_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	bool disable_dev;
+
+	e1000_down_and_stop(adapter);
+	e1000_release_manageability(adapter);
+
+	unregister_netdev(netdev);
+
+	e1000_phy_hw_reset(hw);
+
+	kfree(adapter->tx_ring);
+	kfree(adapter->rx_ring);
+
+	if (hw->mac_type == e1000_ce4100)
+		iounmap(hw->ce4100_gbe_mdio_base_virt);
+	iounmap(hw->hw_addr);
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+	pci_release_selected_regions(pdev, adapter->bars);
+
+	disable_dev = !test_and_set_bit(__E1000_DISABLED, &adapter->flags);
+	free_netdev(netdev);
+
+	if (disable_dev)
+		pci_disable_device(pdev);
+}
+
+/**
+ * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * e1000_sw_init initializes the Adapter private data structure.
+ * e1000_init_hw_struct MUST be called before this function
+ **/
+static int e1000_sw_init(struct e1000_adapter *adapter)
+{
+	adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
+
+	adapter->num_tx_queues = 1;
+	adapter->num_rx_queues = 1;
+
+	if (e1000_alloc_queues(adapter)) {
+		e_err(probe, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	e1000_irq_disable(adapter);
+
+	spin_lock_init(&adapter->stats_lock);
+
+	set_bit(__E1000_DOWN, &adapter->flags);
+
+	return 0;
+}
+
+/**
+ * e1000_alloc_queues - Allocate memory for all rings
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one ring per queue at run-time since we don't know the
+ * number of queues at compile-time.
+ **/
+static int e1000_alloc_queues(struct e1000_adapter *adapter)
+{
+	adapter->tx_ring = kcalloc(adapter->num_tx_queues,
+				   sizeof(struct e1000_tx_ring), GFP_KERNEL);
+	if (!adapter->tx_ring)
+		return -ENOMEM;
+
+	adapter->rx_ring = kcalloc(adapter->num_rx_queues,
+				   sizeof(struct e1000_rx_ring), GFP_KERNEL);
+	if (!adapter->rx_ring) {
+		kfree(adapter->tx_ring);
+		return -ENOMEM;
+	}
+
+	return E1000_SUCCESS;
+}
+
+/**
+ * e1000_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog task is started,
+ * and the stack is notified that the interface is ready.
+ **/
+int e1000_open(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	/* disallow open during test */
+	if (test_bit(__E1000_TESTING, &adapter->flags))
+		return -EBUSY;
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = e1000_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = e1000_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	e1000_power_up_phy(adapter);
+
+	adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
+	if ((hw->mng_cookie.status &
+			  E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT)) {
+		e1000_update_mng_vlan(adapter);
+	}
+
+	/* before we allocate an interrupt, we must be ready to handle it.
+	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+	 * as soon as we call pci_request_irq, so we have to setup our
+	 * clean_rx handler before we do so.
+	 */
+	e1000_configure(adapter);
+
+	err = e1000_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* From here on the code is the same as e1000_up() */
+	clear_bit(__E1000_DOWN, &adapter->flags);
+
+	napi_enable(&adapter->napi);
+
+	e1000_irq_enable(adapter);
+
+	netif_start_queue(netdev);
+
+	/* fire a link status change interrupt to start the watchdog */
+	ew32(ICS, E1000_ICS_LSC);
+
+	return E1000_SUCCESS;
+
+err_req_irq:
+	e1000_power_down_phy(adapter);
+	e1000_free_all_rx_resources(adapter);
+err_setup_rx:
+	e1000_free_all_tx_resources(adapter);
+err_setup_tx:
+	e1000_reset(adapter);
+
+	return err;
+}
+
+/**
+ * e1000_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+int e1000_close(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int count = E1000_CHECK_RESET_COUNT;
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags) && count--)
+		usleep_range(10000, 20000);
+
+	WARN_ON(count < 0);
+
+	/* signal that we're down so that the reset task will no longer run */
+	set_bit(__E1000_DOWN, &adapter->flags);
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+
+	e1000_down(adapter);
+	e1000_power_down_phy(adapter);
+	e1000_free_irq(adapter);
+
+	e1000_free_all_tx_resources(adapter);
+	e1000_free_all_rx_resources(adapter);
+
+	/* kill manageability vlan ID if supported, but not if a vlan with
+	 * the same ID is registered on the host OS (let 8021q kill it)
+	 */
+	if ((hw->mng_cookie.status &
+	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) &&
+	    !test_bit(adapter->mng_vlan_id, adapter->active_vlans)) {
+		e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q),
+				       adapter->mng_vlan_id);
+	}
+
+	return 0;
+}
+
+/**
+ * e1000_check_64k_bound - check that memory doesn't cross 64kB boundary
+ * @adapter: address of board private structure
+ * @start: address of beginning of memory
+ * @len: length of memory
+ **/
+static bool e1000_check_64k_bound(struct e1000_adapter *adapter, void *start,
+				  unsigned long len)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long begin = (unsigned long)start;
+	unsigned long end = begin + len;
+
+	/* First rev 82545 and 82546 need to not allow any memory
+	 * write location to cross 64k boundary due to errata 23
+	 */
+	if (hw->mac_type == e1000_82545 ||
+	    hw->mac_type == e1000_ce4100 ||
+	    hw->mac_type == e1000_82546) {
+		return ((begin ^ (end - 1)) >> 16) == 0;
+	}
+
+	return true;
+}
+
+/**
+ * e1000_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @adapter: board private structure
+ * @txdr:    tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int e1000_setup_tx_resources(struct e1000_adapter *adapter,
+				    struct e1000_tx_ring *txdr)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int size;
+
+	size = sizeof(struct e1000_tx_buffer) * txdr->count;
+	txdr->buffer_info = vzalloc(size);
+	if (!txdr->buffer_info)
+		return -ENOMEM;
+
+	/* round up to nearest 4K */
+
+	txdr->size = txdr->count * sizeof(struct e1000_tx_desc);
+	txdr->size = ALIGN(txdr->size, 4096);
+
+	txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
+					GFP_KERNEL);
+	if (!txdr->desc) {
+setup_tx_desc_die:
+		vfree(txdr->buffer_info);
+		return -ENOMEM;
+	}
+
+	/* Fix for errata 23, can't cross 64kB boundary */
+	if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
+		void *olddesc = txdr->desc;
+		dma_addr_t olddma = txdr->dma;
+		e_err(tx_err, "txdr align check failed: %u bytes at %p\n",
+		      txdr->size, txdr->desc);
+		/* Try again, without freeing the previous */
+		txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size,
+						&txdr->dma, GFP_KERNEL);
+		/* Failed allocation, critical failure */
+		if (!txdr->desc) {
+			dma_free_coherent(&pdev->dev, txdr->size, olddesc,
+					  olddma);
+			goto setup_tx_desc_die;
+		}
+
+		if (!e1000_check_64k_bound(adapter, txdr->desc, txdr->size)) {
+			/* give up */
+			dma_free_coherent(&pdev->dev, txdr->size, txdr->desc,
+					  txdr->dma);
+			dma_free_coherent(&pdev->dev, txdr->size, olddesc,
+					  olddma);
+			e_err(probe, "Unable to allocate aligned memory "
+			      "for the transmit descriptor ring\n");
+			vfree(txdr->buffer_info);
+			return -ENOMEM;
+		} else {
+			/* Free old allocation, new allocation was successful */
+			dma_free_coherent(&pdev->dev, txdr->size, olddesc,
+					  olddma);
+		}
+	}
+	memset(txdr->desc, 0, txdr->size);
+
+	txdr->next_to_use = 0;
+	txdr->next_to_clean = 0;
+
+	return 0;
+}
+
+/**
+ * e1000_setup_all_tx_resources - wrapper to allocate Tx resources
+ * 				  (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+int e1000_setup_all_tx_resources(struct e1000_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = e1000_setup_tx_resources(adapter, &adapter->tx_ring[i]);
+		if (err) {
+			e_err(probe, "Allocation for Tx Queue %u failed\n", i);
+			for (i-- ; i >= 0; i--)
+				e1000_free_tx_resources(adapter,
+							&adapter->tx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * e1000_configure_tx - Configure 8254x Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void e1000_configure_tx(struct e1000_adapter *adapter)
+{
+	u64 tdba;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tdlen, tctl, tipg;
+	u32 ipgr1, ipgr2;
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+
+	switch (adapter->num_tx_queues) {
+	case 1:
+	default:
+		tdba = adapter->tx_ring[0].dma;
+		tdlen = adapter->tx_ring[0].count *
+			sizeof(struct e1000_tx_desc);
+		ew32(TDLEN, tdlen);
+		ew32(TDBAH, (tdba >> 32));
+		ew32(TDBAL, (tdba & 0x00000000ffffffffULL));
+		ew32(TDT, 0);
+		ew32(TDH, 0);
+		adapter->tx_ring[0].tdh = ((hw->mac_type >= e1000_82543) ?
+					   E1000_TDH : E1000_82542_TDH);
+		adapter->tx_ring[0].tdt = ((hw->mac_type >= e1000_82543) ?
+					   E1000_TDT : E1000_82542_TDT);
+		break;
+	}
+
+	/* Set the default values for the Tx Inter Packet Gap timer */
+	if ((hw->media_type == e1000_media_type_fiber ||
+	     hw->media_type == e1000_media_type_internal_serdes))
+		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
+	else
+		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
+
+	switch (hw->mac_type) {
+	case e1000_82542_rev2_0:
+	case e1000_82542_rev2_1:
+		tipg = DEFAULT_82542_TIPG_IPGT;
+		ipgr1 = DEFAULT_82542_TIPG_IPGR1;
+		ipgr2 = DEFAULT_82542_TIPG_IPGR2;
+		break;
+	default:
+		ipgr1 = DEFAULT_82543_TIPG_IPGR1;
+		ipgr2 = DEFAULT_82543_TIPG_IPGR2;
+		break;
+	}
+	tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT;
+	tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT;
+	ew32(TIPG, tipg);
+
+	/* Set the Tx Interrupt Delay register */
+
+	ew32(TIDV, adapter->tx_int_delay);
+	if (hw->mac_type >= e1000_82540)
+		ew32(TADV, adapter->tx_abs_int_delay);
+
+	/* Program the Transmit Control Register */
+
+	tctl = er32(TCTL);
+	tctl &= ~E1000_TCTL_CT;
+	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
+		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
+
+	e1000_config_collision_dist(hw);
+
+	/* Setup Transmit Descriptor Settings for eop descriptor */
+	adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
+
+	/* only set IDE if we are delaying interrupts using the timers */
+	if (adapter->tx_int_delay)
+		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
+
+	if (hw->mac_type < e1000_82543)
+		adapter->txd_cmd |= E1000_TXD_CMD_RPS;
+	else
+		adapter->txd_cmd |= E1000_TXD_CMD_RS;
+
+	/* Cache if we're 82544 running in PCI-X because we'll
+	 * need this to apply a workaround later in the send path.
+	 */
+	if (hw->mac_type == e1000_82544 &&
+	    hw->bus_type == e1000_bus_type_pcix)
+		adapter->pcix_82544 = true;
+
+	ew32(TCTL, tctl);
+
+}
+
+/**
+ * e1000_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
+ * @rxdr:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int e1000_setup_rx_resources(struct e1000_adapter *adapter,
+				    struct e1000_rx_ring *rxdr)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int size, desc_len;
+
+	size = sizeof(struct e1000_rx_buffer) * rxdr->count;
+	rxdr->buffer_info = vzalloc(size);
+	if (!rxdr->buffer_info)
+		return -ENOMEM;
+
+	desc_len = sizeof(struct e1000_rx_desc);
+
+	/* Round up to nearest 4K */
+
+	rxdr->size = rxdr->count * desc_len;
+	rxdr->size = ALIGN(rxdr->size, 4096);
+
+	rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
+					GFP_KERNEL);
+	if (!rxdr->desc) {
+setup_rx_desc_die:
+		vfree(rxdr->buffer_info);
+		return -ENOMEM;
+	}
+
+	/* Fix for errata 23, can't cross 64kB boundary */
+	if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) {
+		void *olddesc = rxdr->desc;
+		dma_addr_t olddma = rxdr->dma;
+		e_err(rx_err, "rxdr align check failed: %u bytes at %p\n",
+		      rxdr->size, rxdr->desc);
+		/* Try again, without freeing the previous */
+		rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size,
+						&rxdr->dma, GFP_KERNEL);
+		/* Failed allocation, critical failure */
+		if (!rxdr->desc) {
+			dma_free_coherent(&pdev->dev, rxdr->size, olddesc,
+					  olddma);
+			goto setup_rx_desc_die;
+		}
+
+		if (!e1000_check_64k_bound(adapter, rxdr->desc, rxdr->size)) {
+			/* give up */
+			dma_free_coherent(&pdev->dev, rxdr->size, rxdr->desc,
+					  rxdr->dma);
+			dma_free_coherent(&pdev->dev, rxdr->size, olddesc,
+					  olddma);
+			e_err(probe, "Unable to allocate aligned memory for "
+			      "the Rx descriptor ring\n");
+			goto setup_rx_desc_die;
+		} else {
+			/* Free old allocation, new allocation was successful */
+			dma_free_coherent(&pdev->dev, rxdr->size, olddesc,
+					  olddma);
+		}
+	}
+	memset(rxdr->desc, 0, rxdr->size);
+
+	rxdr->next_to_clean = 0;
+	rxdr->next_to_use = 0;
+	rxdr->rx_skb_top = NULL;
+
+	return 0;
+}
+
+/**
+ * e1000_setup_all_rx_resources - wrapper to allocate Rx resources
+ * 				  (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+int e1000_setup_all_rx_resources(struct e1000_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = e1000_setup_rx_resources(adapter, &adapter->rx_ring[i]);
+		if (err) {
+			e_err(probe, "Allocation for Rx Queue %u failed\n", i);
+			for (i-- ; i >= 0; i--)
+				e1000_free_rx_resources(adapter,
+							&adapter->rx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ * e1000_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ **/
+static void e1000_setup_rctl(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	rctl = er32(RCTL);
+
+	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+
+	rctl |= E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
+		E1000_RCTL_RDMTS_HALF |
+		(hw->mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+	if (hw->tbi_compatibility_on == 1)
+		rctl |= E1000_RCTL_SBP;
+	else
+		rctl &= ~E1000_RCTL_SBP;
+
+	if (adapter->netdev->mtu <= ETH_DATA_LEN)
+		rctl &= ~E1000_RCTL_LPE;
+	else
+		rctl |= E1000_RCTL_LPE;
+
+	/* Setup buffer sizes */
+	rctl &= ~E1000_RCTL_SZ_4096;
+	rctl |= E1000_RCTL_BSEX;
+	switch (adapter->rx_buffer_len) {
+	case E1000_RXBUFFER_2048:
+	default:
+		rctl |= E1000_RCTL_SZ_2048;
+		rctl &= ~E1000_RCTL_BSEX;
+		break;
+	case E1000_RXBUFFER_4096:
+		rctl |= E1000_RCTL_SZ_4096;
+		break;
+	case E1000_RXBUFFER_8192:
+		rctl |= E1000_RCTL_SZ_8192;
+		break;
+	case E1000_RXBUFFER_16384:
+		rctl |= E1000_RCTL_SZ_16384;
+		break;
+	}
+
+	/* This is useful for sniffing bad packets. */
+	if (adapter->netdev->features & NETIF_F_RXALL) {
+		/* UPE and MPE will be handled by normal PROMISC logic
+		 * in e1000e_set_rx_mode
+		 */
+		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
+			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
+			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
+
+		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
+			  E1000_RCTL_DPF | /* Allow filtered pause */
+			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
+		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
+		 * and that breaks VLANs.
+		 */
+	}
+
+	ew32(RCTL, rctl);
+}
+
+/**
+ * e1000_configure_rx - Configure 8254x Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void e1000_configure_rx(struct e1000_adapter *adapter)
+{
+	u64 rdba;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rdlen, rctl, rxcsum;
+
+	if (adapter->netdev->mtu > ETH_DATA_LEN) {
+		rdlen = adapter->rx_ring[0].count *
+			sizeof(struct e1000_rx_desc);
+		adapter->clean_rx = e1000_clean_jumbo_rx_irq;
+		adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
+	} else {
+		rdlen = adapter->rx_ring[0].count *
+			sizeof(struct e1000_rx_desc);
+		adapter->clean_rx = e1000_clean_rx_irq;
+		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
+	}
+
+	/* disable receives while setting up the descriptors */
+	rctl = er32(RCTL);
+	ew32(RCTL, rctl & ~E1000_RCTL_EN);
+
+	/* set the Receive Delay Timer Register */
+	ew32(RDTR, adapter->rx_int_delay);
+
+	if (hw->mac_type >= e1000_82540) {
+		ew32(RADV, adapter->rx_abs_int_delay);
+		if (adapter->itr_setting != 0)
+			ew32(ITR, 1000000000 / (adapter->itr * 256));
+	}
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	switch (adapter->num_rx_queues) {
+	case 1:
+	default:
+		rdba = adapter->rx_ring[0].dma;
+		ew32(RDLEN, rdlen);
+		ew32(RDBAH, (rdba >> 32));
+		ew32(RDBAL, (rdba & 0x00000000ffffffffULL));
+		ew32(RDT, 0);
+		ew32(RDH, 0);
+		adapter->rx_ring[0].rdh = ((hw->mac_type >= e1000_82543) ?
+					   E1000_RDH : E1000_82542_RDH);
+		adapter->rx_ring[0].rdt = ((hw->mac_type >= e1000_82543) ?
+					   E1000_RDT : E1000_82542_RDT);
+		break;
+	}
+
+	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
+	if (hw->mac_type >= e1000_82543) {
+		rxcsum = er32(RXCSUM);
+		if (adapter->rx_csum)
+			rxcsum |= E1000_RXCSUM_TUOFL;
+		else
+			/* don't need to clear IPPCSE as it defaults to 0 */
+			rxcsum &= ~E1000_RXCSUM_TUOFL;
+		ew32(RXCSUM, rxcsum);
+	}
+
+	/* Enable Receives */
+	ew32(RCTL, rctl | E1000_RCTL_EN);
+}
+
+/**
+ * e1000_free_tx_resources - Free Tx Resources per Queue
+ * @adapter: board private structure
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+static void e1000_free_tx_resources(struct e1000_adapter *adapter,
+				    struct e1000_tx_ring *tx_ring)
+{
+	struct pci_dev *pdev = adapter->pdev;
+
+	e1000_clean_tx_ring(adapter, tx_ring);
+
+	vfree(tx_ring->buffer_info);
+	tx_ring->buffer_info = NULL;
+
+	dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
+			  tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * e1000_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+void e1000_free_all_tx_resources(struct e1000_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		e1000_free_tx_resources(adapter, &adapter->tx_ring[i]);
+}
+
+static void
+e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter,
+				 struct e1000_tx_buffer *buffer_info,
+				 int budget)
+{
+	if (buffer_info->dma) {
+		if (buffer_info->mapped_as_page)
+			dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
+				       buffer_info->length, DMA_TO_DEVICE);
+		else
+			dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
+					 buffer_info->length,
+					 DMA_TO_DEVICE);
+		buffer_info->dma = 0;
+	}
+	if (buffer_info->skb) {
+		napi_consume_skb(buffer_info->skb, budget);
+		buffer_info->skb = NULL;
+	}
+	buffer_info->time_stamp = 0;
+	/* buffer_info must be completely set up in the transmit path */
+}
+
+/**
+ * e1000_clean_tx_ring - Free Tx Buffers
+ * @adapter: board private structure
+ * @tx_ring: ring to be cleaned
+ **/
+static void e1000_clean_tx_ring(struct e1000_adapter *adapter,
+				struct e1000_tx_ring *tx_ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_tx_buffer *buffer_info;
+	unsigned long size;
+	unsigned int i;
+
+	/* Free all the Tx ring sk_buffs */
+
+	for (i = 0; i < tx_ring->count; i++) {
+		buffer_info = &tx_ring->buffer_info[i];
+		e1000_unmap_and_free_tx_resource(adapter, buffer_info, 0);
+	}
+
+	netdev_reset_queue(adapter->netdev);
+	size = sizeof(struct e1000_tx_buffer) * tx_ring->count;
+	memset(tx_ring->buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	tx_ring->last_tx_tso = false;
+
+	writel(0, hw->hw_addr + tx_ring->tdh);
+	writel(0, hw->hw_addr + tx_ring->tdt);
+}
+
+/**
+ * e1000_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void e1000_clean_all_tx_rings(struct e1000_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		e1000_clean_tx_ring(adapter, &adapter->tx_ring[i]);
+}
+
+/**
+ * e1000_free_rx_resources - Free Rx Resources
+ * @adapter: board private structure
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+static void e1000_free_rx_resources(struct e1000_adapter *adapter,
+				    struct e1000_rx_ring *rx_ring)
+{
+	struct pci_dev *pdev = adapter->pdev;
+
+	e1000_clean_rx_ring(adapter, rx_ring);
+
+	vfree(rx_ring->buffer_info);
+	rx_ring->buffer_info = NULL;
+
+	dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
+			  rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * e1000_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+void e1000_free_all_rx_resources(struct e1000_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		e1000_free_rx_resources(adapter, &adapter->rx_ring[i]);
+}
+
+#define E1000_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)
+static unsigned int e1000_frag_len(const struct e1000_adapter *a)
+{
+	return SKB_DATA_ALIGN(a->rx_buffer_len + E1000_HEADROOM) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static void *e1000_alloc_frag(const struct e1000_adapter *a)
+{
+	unsigned int len = e1000_frag_len(a);
+	u8 *data = netdev_alloc_frag(len);
+
+	if (likely(data))
+		data += E1000_HEADROOM;
+	return data;
+}
+
+/**
+ * e1000_clean_rx_ring - Free Rx Buffers per Queue
+ * @adapter: board private structure
+ * @rx_ring: ring to free buffers from
+ **/
+static void e1000_clean_rx_ring(struct e1000_adapter *adapter,
+				struct e1000_rx_ring *rx_ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_rx_buffer *buffer_info;
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned long size;
+	unsigned int i;
+
+	/* Free all the Rx netfrags */
+	for (i = 0; i < rx_ring->count; i++) {
+		buffer_info = &rx_ring->buffer_info[i];
+		if (adapter->clean_rx == e1000_clean_rx_irq) {
+			if (buffer_info->dma)
+				dma_unmap_single(&pdev->dev, buffer_info->dma,
+						 adapter->rx_buffer_len,
+						 DMA_FROM_DEVICE);
+			if (buffer_info->rxbuf.data) {
+				skb_free_frag(buffer_info->rxbuf.data);
+				buffer_info->rxbuf.data = NULL;
+			}
+		} else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq) {
+			if (buffer_info->dma)
+				dma_unmap_page(&pdev->dev, buffer_info->dma,
+					       adapter->rx_buffer_len,
+					       DMA_FROM_DEVICE);
+			if (buffer_info->rxbuf.page) {
+				put_page(buffer_info->rxbuf.page);
+				buffer_info->rxbuf.page = NULL;
+			}
+		}
+
+		buffer_info->dma = 0;
+	}
+
+	/* there also may be some cached data from a chained receive */
+	napi_free_frags(&adapter->napi);
+	rx_ring->rx_skb_top = NULL;
+
+	size = sizeof(struct e1000_rx_buffer) * rx_ring->count;
+	memset(rx_ring->buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	writel(0, hw->hw_addr + rx_ring->rdh);
+	writel(0, hw->hw_addr + rx_ring->rdt);
+}
+
+/**
+ * e1000_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void e1000_clean_all_rx_rings(struct e1000_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		e1000_clean_rx_ring(adapter, &adapter->rx_ring[i]);
+}
+
+/* The 82542 2.0 (revision 2) needs to have the receive unit in reset
+ * and memory write and invalidate disabled for certain operations
+ */
+static void e1000_enter_82542_rst(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 rctl;
+
+	e1000_pci_clear_mwi(hw);
+
+	rctl = er32(RCTL);
+	rctl |= E1000_RCTL_RST;
+	ew32(RCTL, rctl);
+	E1000_WRITE_FLUSH();
+	mdelay(5);
+
+	if (netif_running(netdev))
+		e1000_clean_all_rx_rings(adapter);
+}
+
+static void e1000_leave_82542_rst(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 rctl;
+
+	rctl = er32(RCTL);
+	rctl &= ~E1000_RCTL_RST;
+	ew32(RCTL, rctl);
+	E1000_WRITE_FLUSH();
+	mdelay(5);
+
+	if (hw->pci_cmd_word & PCI_COMMAND_INVALIDATE)
+		e1000_pci_set_mwi(hw);
+
+	if (netif_running(netdev)) {
+		/* No need to loop, because 82542 supports only 1 queue */
+		struct e1000_rx_ring *ring = &adapter->rx_ring[0];
+		e1000_configure_rx(adapter);
+		adapter->alloc_rx_buf(adapter, ring, E1000_DESC_UNUSED(ring));
+	}
+}
+
+/**
+ * e1000_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int e1000_set_mac(struct net_device *netdev, void *p)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	/* 82542 2.0 needs to be in reset to write receive address registers */
+
+	if (hw->mac_type == e1000_82542_rev2_0)
+		e1000_enter_82542_rst(adapter);
+
+	eth_hw_addr_set(netdev, addr->sa_data);
+	memcpy(hw->mac_addr, addr->sa_data, netdev->addr_len);
+
+	e1000_rar_set(hw, hw->mac_addr, 0);
+
+	if (hw->mac_type == e1000_82542_rev2_0)
+		e1000_leave_82542_rst(adapter);
+
+	return 0;
+}
+
+/**
+ * e1000_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated. This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+static void e1000_set_rx_mode(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct netdev_hw_addr *ha;
+	bool use_uc = false;
+	u32 rctl;
+	u32 hash_value;
+	int i, rar_entries = E1000_RAR_ENTRIES;
+	int mta_reg_count = E1000_NUM_MTA_REGISTERS;
+	u32 *mcarray = kcalloc(mta_reg_count, sizeof(u32), GFP_ATOMIC);
+
+	if (!mcarray)
+		return;
+
+	/* Check for Promiscuous and All Multicast modes */
+
+	rctl = er32(RCTL);
+
+	if (netdev->flags & IFF_PROMISC) {
+		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
+		rctl &= ~E1000_RCTL_VFE;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI)
+			rctl |= E1000_RCTL_MPE;
+		else
+			rctl &= ~E1000_RCTL_MPE;
+		/* Enable VLAN filter if there is a VLAN */
+		if (e1000_vlan_used(adapter))
+			rctl |= E1000_RCTL_VFE;
+	}
+
+	if (netdev_uc_count(netdev) > rar_entries - 1) {
+		rctl |= E1000_RCTL_UPE;
+	} else if (!(netdev->flags & IFF_PROMISC)) {
+		rctl &= ~E1000_RCTL_UPE;
+		use_uc = true;
+	}
+
+	ew32(RCTL, rctl);
+
+	/* 82542 2.0 needs to be in reset to write receive address registers */
+
+	if (hw->mac_type == e1000_82542_rev2_0)
+		e1000_enter_82542_rst(adapter);
+
+	/* load the first 14 addresses into the exact filters 1-14. Unicast
+	 * addresses take precedence to avoid disabling unicast filtering
+	 * when possible.
+	 *
+	 * RAR 0 is used for the station MAC address
+	 * if there are not 14 addresses, go ahead and clear the filters
+	 */
+	i = 1;
+	if (use_uc)
+		netdev_for_each_uc_addr(ha, netdev) {
+			if (i == rar_entries)
+				break;
+			e1000_rar_set(hw, ha->addr, i++);
+		}
+
+	netdev_for_each_mc_addr(ha, netdev) {
+		if (i == rar_entries) {
+			/* load any remaining addresses into the hash table */
+			u32 hash_reg, hash_bit, mta;
+			hash_value = e1000_hash_mc_addr(hw, ha->addr);
+			hash_reg = (hash_value >> 5) & 0x7F;
+			hash_bit = hash_value & 0x1F;
+			mta = (1 << hash_bit);
+			mcarray[hash_reg] |= mta;
+		} else {
+			e1000_rar_set(hw, ha->addr, i++);
+		}
+	}
+
+	for (; i < rar_entries; i++) {
+		E1000_WRITE_REG_ARRAY(hw, RA, i << 1, 0);
+		E1000_WRITE_FLUSH();
+		E1000_WRITE_REG_ARRAY(hw, RA, (i << 1) + 1, 0);
+		E1000_WRITE_FLUSH();
+	}
+
+	/* write the hash table completely, write from bottom to avoid
+	 * both stupid write combining chipsets, and flushing each write
+	 */
+	for (i = mta_reg_count - 1; i >= 0 ; i--) {
+		/* If we are on an 82544 has an errata where writing odd
+		 * offsets overwrites the previous even offset, but writing
+		 * backwards over the range solves the issue by always
+		 * writing the odd offset first
+		 */
+		E1000_WRITE_REG_ARRAY(hw, MTA, i, mcarray[i]);
+	}
+	E1000_WRITE_FLUSH();
+
+	if (hw->mac_type == e1000_82542_rev2_0)
+		e1000_leave_82542_rst(adapter);
+
+	kfree(mcarray);
+}
+
+/**
+ * e1000_update_phy_info_task - get phy info
+ * @work: work struct contained inside adapter struct
+ *
+ * Need to wait a few seconds after link up to get diagnostic information from
+ * the phy
+ */
+static void e1000_update_phy_info_task(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work,
+						     struct e1000_adapter,
+						     phy_info_task.work);
+
+	e1000_phy_get_info(&adapter->hw, &adapter->phy_info);
+}
+
+/**
+ * e1000_82547_tx_fifo_stall_task - task to complete work
+ * @work: work struct contained inside adapter struct
+ **/
+static void e1000_82547_tx_fifo_stall_task(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work,
+						     struct e1000_adapter,
+						     fifo_stall_task.work);
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 tctl;
+
+	if (atomic_read(&adapter->tx_fifo_stall)) {
+		if ((er32(TDT) == er32(TDH)) &&
+		   (er32(TDFT) == er32(TDFH)) &&
+		   (er32(TDFTS) == er32(TDFHS))) {
+			tctl = er32(TCTL);
+			ew32(TCTL, tctl & ~E1000_TCTL_EN);
+			ew32(TDFT, adapter->tx_head_addr);
+			ew32(TDFH, adapter->tx_head_addr);
+			ew32(TDFTS, adapter->tx_head_addr);
+			ew32(TDFHS, adapter->tx_head_addr);
+			ew32(TCTL, tctl);
+			E1000_WRITE_FLUSH();
+
+			adapter->tx_fifo_head = 0;
+			atomic_set(&adapter->tx_fifo_stall, 0);
+			netif_wake_queue(netdev);
+		} else if (!test_bit(__E1000_DOWN, &adapter->flags)) {
+			schedule_delayed_work(&adapter->fifo_stall_task, 1);
+		}
+	}
+}
+
+bool e1000_has_link(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	bool link_active = false;
+
+	/* get_link_status is set on LSC (link status) interrupt or rx
+	 * sequence error interrupt (except on intel ce4100).
+	 * get_link_status will stay false until the
+	 * e1000_check_for_link establishes link for copper adapters
+	 * ONLY
+	 */
+	switch (hw->media_type) {
+	case e1000_media_type_copper:
+		if (hw->mac_type == e1000_ce4100)
+			hw->get_link_status = 1;
+		if (hw->get_link_status) {
+			e1000_check_for_link(hw);
+			link_active = !hw->get_link_status;
+		} else {
+			link_active = true;
+		}
+		break;
+	case e1000_media_type_fiber:
+		e1000_check_for_link(hw);
+		link_active = !!(er32(STATUS) & E1000_STATUS_LU);
+		break;
+	case e1000_media_type_internal_serdes:
+		e1000_check_for_link(hw);
+		link_active = hw->serdes_has_link;
+		break;
+	default:
+		break;
+	}
+
+	return link_active;
+}
+
+/**
+ * e1000_watchdog - work function
+ * @work: work struct contained inside adapter struct
+ **/
+static void e1000_watchdog(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work,
+						     struct e1000_adapter,
+						     watchdog_task.work);
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_tx_ring *txdr = adapter->tx_ring;
+	u32 link, tctl;
+
+	link = e1000_has_link(adapter);
+	if ((netif_carrier_ok(netdev)) && link)
+		goto link_up;
+
+	if (link) {
+		if (!netif_carrier_ok(netdev)) {
+			u32 ctrl;
+			/* update snapshot of PHY registers on LSC */
+			e1000_get_speed_and_duplex(hw,
+						   &adapter->link_speed,
+						   &adapter->link_duplex);
+
+			ctrl = er32(CTRL);
+			pr_info("%s NIC Link is Up %d Mbps %s, "
+				"Flow Control: %s\n",
+				netdev->name,
+				adapter->link_speed,
+				adapter->link_duplex == FULL_DUPLEX ?
+				"Full Duplex" : "Half Duplex",
+				((ctrl & E1000_CTRL_TFCE) && (ctrl &
+				E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
+				E1000_CTRL_RFCE) ? "RX" : ((ctrl &
+				E1000_CTRL_TFCE) ? "TX" : "None")));
+
+			/* adjust timeout factor according to speed/duplex */
+			adapter->tx_timeout_factor = 1;
+			switch (adapter->link_speed) {
+			case SPEED_10:
+				adapter->tx_timeout_factor = 16;
+				break;
+			case SPEED_100:
+				/* maybe add some timeout factor ? */
+				break;
+			}
+
+			/* enable transmits in the hardware */
+			tctl = er32(TCTL);
+			tctl |= E1000_TCTL_EN;
+			ew32(TCTL, tctl);
+
+			netif_carrier_on(netdev);
+			if (!test_bit(__E1000_DOWN, &adapter->flags))
+				schedule_delayed_work(&adapter->phy_info_task,
+						      2 * HZ);
+			adapter->smartspeed = 0;
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+			pr_info("%s NIC Link is Down\n",
+				netdev->name);
+			netif_carrier_off(netdev);
+
+			if (!test_bit(__E1000_DOWN, &adapter->flags))
+				schedule_delayed_work(&adapter->phy_info_task,
+						      2 * HZ);
+		}
+
+		e1000_smartspeed(adapter);
+	}
+
+link_up:
+	e1000_update_stats(adapter);
+
+	hw->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
+	adapter->tpt_old = adapter->stats.tpt;
+	hw->collision_delta = adapter->stats.colc - adapter->colc_old;
+	adapter->colc_old = adapter->stats.colc;
+
+	adapter->gorcl = adapter->stats.gorcl - adapter->gorcl_old;
+	adapter->gorcl_old = adapter->stats.gorcl;
+	adapter->gotcl = adapter->stats.gotcl - adapter->gotcl_old;
+	adapter->gotcl_old = adapter->stats.gotcl;
+
+	e1000_update_adaptive(hw);
+
+	if (!netif_carrier_ok(netdev)) {
+		if (E1000_DESC_UNUSED(txdr) + 1 < txdr->count) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context).
+			 */
+			adapter->tx_timeout_count++;
+			schedule_work(&adapter->reset_task);
+			/* exit immediately since reset is imminent */
+			return;
+		}
+	}
+
+	/* Simple mode for Interrupt Throttle Rate (ITR) */
+	if (hw->mac_type >= e1000_82540 && adapter->itr_setting == 4) {
+		/* Symmetric Tx/Rx gets a reduced ITR=2000;
+		 * Total asymmetrical Tx or Rx gets ITR=8000;
+		 * everyone else is between 2000-8000.
+		 */
+		u32 goc = (adapter->gotcl + adapter->gorcl) / 10000;
+		u32 dif = (adapter->gotcl > adapter->gorcl ?
+			    adapter->gotcl - adapter->gorcl :
+			    adapter->gorcl - adapter->gotcl) / 10000;
+		u32 itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000;
+
+		ew32(ITR, 1000000000 / (itr * 256));
+	}
+
+	/* Cause software interrupt to ensure rx ring is cleaned */
+	ew32(ICS, E1000_ICS_RXDMT0);
+
+	/* Force detection of hung controller every watchdog period */
+	adapter->detect_tx_hung = true;
+
+	/* Reschedule the task */
+	if (!test_bit(__E1000_DOWN, &adapter->flags))
+		schedule_delayed_work(&adapter->watchdog_task, 2 * HZ);
+}
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+/**
+ * e1000_update_itr - update the dynamic ITR value based on statistics
+ * @adapter: pointer to adapter
+ * @itr_setting: current adapter->itr
+ * @packets: the number of packets during this measurement interval
+ * @bytes: the number of bytes during this measurement interval
+ *
+ *      Stores a new ITR value based on packets and byte
+ *      counts during the last interrupt.  The advantage of per interrupt
+ *      computation is faster updates and more accurate ITR for the current
+ *      traffic pattern.  Constants in this function were computed
+ *      based on theoretical maximum wire speed and thresholds were set based
+ *      on testing data as well as attempting to minimize response time
+ *      while increasing bulk throughput.
+ *      this functionality is controlled by the InterruptThrottleRate module
+ *      parameter (see e1000_param.c)
+ **/
+static unsigned int e1000_update_itr(struct e1000_adapter *adapter,
+				     u16 itr_setting, int packets, int bytes)
+{
+	unsigned int retval = itr_setting;
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (unlikely(hw->mac_type < e1000_82540))
+		goto update_itr_done;
+
+	if (packets == 0)
+		goto update_itr_done;
+
+	switch (itr_setting) {
+	case lowest_latency:
+		/* jumbo frames get bulk treatment*/
+		if (bytes/packets > 8000)
+			retval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			retval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* jumbo frames need bulk latency setting */
+			if (bytes/packets > 8000)
+				retval = bulk_latency;
+			else if ((packets < 10) || ((bytes/packets) > 1200))
+				retval = bulk_latency;
+			else if ((packets > 35))
+				retval = lowest_latency;
+		} else if (bytes/packets > 2000)
+			retval = bulk_latency;
+		else if (packets <= 2 && bytes < 512)
+			retval = lowest_latency;
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				retval = low_latency;
+		} else if (bytes < 6000) {
+			retval = low_latency;
+		}
+		break;
+	}
+
+update_itr_done:
+	return retval;
+}
+
+static void e1000_set_itr(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 current_itr;
+	u32 new_itr = adapter->itr;
+
+	if (unlikely(hw->mac_type < e1000_82540))
+		return;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	if (unlikely(adapter->link_speed != SPEED_1000)) {
+		new_itr = 4000;
+		goto set_itr_now;
+	}
+
+	adapter->tx_itr = e1000_update_itr(adapter, adapter->tx_itr,
+					   adapter->total_tx_packets,
+					   adapter->total_tx_bytes);
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (adapter->itr_setting == 3 && adapter->tx_itr == lowest_latency)
+		adapter->tx_itr = low_latency;
+
+	adapter->rx_itr = e1000_update_itr(adapter, adapter->rx_itr,
+					   adapter->total_rx_packets,
+					   adapter->total_rx_bytes);
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (adapter->itr_setting == 3 && adapter->rx_itr == lowest_latency)
+		adapter->rx_itr = low_latency;
+
+	current_itr = max(adapter->rx_itr, adapter->tx_itr);
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = 70000;
+		break;
+	case low_latency:
+		new_itr = 20000; /* aka hwitr = ~200 */
+		break;
+	case bulk_latency:
+		new_itr = 4000;
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != adapter->itr) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > adapter->itr ?
+			  min(adapter->itr + (new_itr >> 2), new_itr) :
+			  new_itr;
+		adapter->itr = new_itr;
+		ew32(ITR, 1000000000 / (new_itr * 256));
+	}
+}
+
+#define E1000_TX_FLAGS_CSUM		0x00000001
+#define E1000_TX_FLAGS_VLAN		0x00000002
+#define E1000_TX_FLAGS_TSO		0x00000004
+#define E1000_TX_FLAGS_IPV4		0x00000008
+#define E1000_TX_FLAGS_NO_FCS		0x00000010
+#define E1000_TX_FLAGS_VLAN_MASK	0xffff0000
+#define E1000_TX_FLAGS_VLAN_SHIFT	16
+
+static int e1000_tso(struct e1000_adapter *adapter,
+		     struct e1000_tx_ring *tx_ring, struct sk_buff *skb,
+		     __be16 protocol)
+{
+	struct e1000_context_desc *context_desc;
+	struct e1000_tx_buffer *buffer_info;
+	unsigned int i;
+	u32 cmd_length = 0;
+	u16 ipcse = 0, tucse, mss;
+	u8 ipcss, ipcso, tucss, tucso, hdr_len;
+
+	if (skb_is_gso(skb)) {
+		int err;
+
+		err = skb_cow_head(skb, 0);
+		if (err < 0)
+			return err;
+
+		hdr_len = skb_tcp_all_headers(skb);
+		mss = skb_shinfo(skb)->gso_size;
+		if (protocol == htons(ETH_P_IP)) {
+			struct iphdr *iph = ip_hdr(skb);
+			iph->tot_len = 0;
+			iph->check = 0;
+			tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
+								 iph->daddr, 0,
+								 IPPROTO_TCP,
+								 0);
+			cmd_length = E1000_TXD_CMD_IP;
+			ipcse = skb_transport_offset(skb) - 1;
+		} else if (skb_is_gso_v6(skb)) {
+			tcp_v6_gso_csum_prep(skb);
+			ipcse = 0;
+		}
+		ipcss = skb_network_offset(skb);
+		ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
+		tucss = skb_transport_offset(skb);
+		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
+		tucse = 0;
+
+		cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
+			       E1000_TXD_CMD_TCP | (skb->len - (hdr_len)));
+
+		i = tx_ring->next_to_use;
+		context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
+		buffer_info = &tx_ring->buffer_info[i];
+
+		context_desc->lower_setup.ip_fields.ipcss  = ipcss;
+		context_desc->lower_setup.ip_fields.ipcso  = ipcso;
+		context_desc->lower_setup.ip_fields.ipcse  = cpu_to_le16(ipcse);
+		context_desc->upper_setup.tcp_fields.tucss = tucss;
+		context_desc->upper_setup.tcp_fields.tucso = tucso;
+		context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse);
+		context_desc->tcp_seg_setup.fields.mss     = cpu_to_le16(mss);
+		context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
+		context_desc->cmd_and_length = cpu_to_le32(cmd_length);
+
+		buffer_info->time_stamp = jiffies;
+		buffer_info->next_to_watch = i;
+
+		if (++i == tx_ring->count)
+			i = 0;
+
+		tx_ring->next_to_use = i;
+
+		return true;
+	}
+	return false;
+}
+
+static bool e1000_tx_csum(struct e1000_adapter *adapter,
+			  struct e1000_tx_ring *tx_ring, struct sk_buff *skb,
+			  __be16 protocol)
+{
+	struct e1000_context_desc *context_desc;
+	struct e1000_tx_buffer *buffer_info;
+	unsigned int i;
+	u8 css;
+	u32 cmd_len = E1000_TXD_CMD_DEXT;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return false;
+
+	switch (protocol) {
+	case cpu_to_be16(ETH_P_IP):
+		if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+			cmd_len |= E1000_TXD_CMD_TCP;
+		break;
+	case cpu_to_be16(ETH_P_IPV6):
+		/* XXX not handling all IPV6 headers */
+		if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
+			cmd_len |= E1000_TXD_CMD_TCP;
+		break;
+	default:
+		if (unlikely(net_ratelimit()))
+			e_warn(drv, "checksum_partial proto=%x!\n",
+			       skb->protocol);
+		break;
+	}
+
+	css = skb_checksum_start_offset(skb);
+
+	i = tx_ring->next_to_use;
+	buffer_info = &tx_ring->buffer_info[i];
+	context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
+
+	context_desc->lower_setup.ip_config = 0;
+	context_desc->upper_setup.tcp_fields.tucss = css;
+	context_desc->upper_setup.tcp_fields.tucso =
+		css + skb->csum_offset;
+	context_desc->upper_setup.tcp_fields.tucse = 0;
+	context_desc->tcp_seg_setup.data = 0;
+	context_desc->cmd_and_length = cpu_to_le32(cmd_len);
+
+	buffer_info->time_stamp = jiffies;
+	buffer_info->next_to_watch = i;
+
+	if (unlikely(++i == tx_ring->count))
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	return true;
+}
+
+#define E1000_MAX_TXD_PWR	12
+#define E1000_MAX_DATA_PER_TXD	(1<<E1000_MAX_TXD_PWR)
+
+static int e1000_tx_map(struct e1000_adapter *adapter,
+			struct e1000_tx_ring *tx_ring,
+			struct sk_buff *skb, unsigned int first,
+			unsigned int max_per_txd, unsigned int nr_frags,
+			unsigned int mss)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_tx_buffer *buffer_info;
+	unsigned int len = skb_headlen(skb);
+	unsigned int offset = 0, size, count = 0, i;
+	unsigned int f, bytecount, segs;
+
+	i = tx_ring->next_to_use;
+
+	while (len) {
+		buffer_info = &tx_ring->buffer_info[i];
+		size = min(len, max_per_txd);
+		/* Workaround for Controller erratum --
+		 * descriptor for non-tso packet in a linear SKB that follows a
+		 * tso gets written back prematurely before the data is fully
+		 * DMA'd to the controller
+		 */
+		if (!skb->data_len && tx_ring->last_tx_tso &&
+		    !skb_is_gso(skb)) {
+			tx_ring->last_tx_tso = false;
+			size -= 4;
+		}
+
+		/* Workaround for premature desc write-backs
+		 * in TSO mode.  Append 4-byte sentinel desc
+		 */
+		if (unlikely(mss && !nr_frags && size == len && size > 8))
+			size -= 4;
+		/* work-around for errata 10 and it applies
+		 * to all controllers in PCI-X mode
+		 * The fix is to make sure that the first descriptor of a
+		 * packet is smaller than 2048 - 16 - 16 (or 2016) bytes
+		 */
+		if (unlikely((hw->bus_type == e1000_bus_type_pcix) &&
+			     (size > 2015) && count == 0))
+			size = 2015;
+
+		/* Workaround for potential 82544 hang in PCI-X.  Avoid
+		 * terminating buffers within evenly-aligned dwords.
+		 */
+		if (unlikely(adapter->pcix_82544 &&
+		   !((unsigned long)(skb->data + offset + size - 1) & 4) &&
+		   size > 4))
+			size -= 4;
+
+		buffer_info->length = size;
+		/* set time_stamp *before* dma to help avoid a possible race */
+		buffer_info->time_stamp = jiffies;
+		buffer_info->mapped_as_page = false;
+		buffer_info->dma = dma_map_single(&pdev->dev,
+						  skb->data + offset,
+						  size, DMA_TO_DEVICE);
+		if (dma_mapping_error(&pdev->dev, buffer_info->dma))
+			goto dma_error;
+		buffer_info->next_to_watch = i;
+
+		len -= size;
+		offset += size;
+		count++;
+		if (len) {
+			i++;
+			if (unlikely(i == tx_ring->count))
+				i = 0;
+		}
+	}
+
+	for (f = 0; f < nr_frags; f++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+		len = skb_frag_size(frag);
+		offset = 0;
+
+		while (len) {
+			unsigned long bufend;
+			i++;
+			if (unlikely(i == tx_ring->count))
+				i = 0;
+
+			buffer_info = &tx_ring->buffer_info[i];
+			size = min(len, max_per_txd);
+			/* Workaround for premature desc write-backs
+			 * in TSO mode.  Append 4-byte sentinel desc
+			 */
+			if (unlikely(mss && f == (nr_frags-1) &&
+			    size == len && size > 8))
+				size -= 4;
+			/* Workaround for potential 82544 hang in PCI-X.
+			 * Avoid terminating buffers within evenly-aligned
+			 * dwords.
+			 */
+			bufend = (unsigned long)
+				page_to_phys(skb_frag_page(frag));
+			bufend += offset + size - 1;
+			if (unlikely(adapter->pcix_82544 &&
+				     !(bufend & 4) &&
+				     size > 4))
+				size -= 4;
+
+			buffer_info->length = size;
+			buffer_info->time_stamp = jiffies;
+			buffer_info->mapped_as_page = true;
+			buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag,
+						offset, size, DMA_TO_DEVICE);
+			if (dma_mapping_error(&pdev->dev, buffer_info->dma))
+				goto dma_error;
+			buffer_info->next_to_watch = i;
+
+			len -= size;
+			offset += size;
+			count++;
+		}
+	}
+
+	segs = skb_shinfo(skb)->gso_segs ?: 1;
+	/* multiply data chunks by size of headers */
+	bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len;
+
+	tx_ring->buffer_info[i].skb = skb;
+	tx_ring->buffer_info[i].segs = segs;
+	tx_ring->buffer_info[i].bytecount = bytecount;
+	tx_ring->buffer_info[first].next_to_watch = i;
+
+	return count;
+
+dma_error:
+	dev_err(&pdev->dev, "TX DMA map failed\n");
+	buffer_info->dma = 0;
+	if (count)
+		count--;
+
+	while (count--) {
+		if (i == 0)
+			i += tx_ring->count;
+		i--;
+		buffer_info = &tx_ring->buffer_info[i];
+		e1000_unmap_and_free_tx_resource(adapter, buffer_info, 0);
+	}
+
+	return 0;
+}
+
+static void e1000_tx_queue(struct e1000_adapter *adapter,
+			   struct e1000_tx_ring *tx_ring, int tx_flags,
+			   int count)
+{
+	struct e1000_tx_desc *tx_desc = NULL;
+	struct e1000_tx_buffer *buffer_info;
+	u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
+	unsigned int i;
+
+	if (likely(tx_flags & E1000_TX_FLAGS_TSO)) {
+		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
+			     E1000_TXD_CMD_TSE;
+		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+
+		if (likely(tx_flags & E1000_TX_FLAGS_IPV4))
+			txd_upper |= E1000_TXD_POPTS_IXSM << 8;
+	}
+
+	if (likely(tx_flags & E1000_TX_FLAGS_CSUM)) {
+		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+	}
+
+	if (unlikely(tx_flags & E1000_TX_FLAGS_VLAN)) {
+		txd_lower |= E1000_TXD_CMD_VLE;
+		txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
+	}
+
+	if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS))
+		txd_lower &= ~(E1000_TXD_CMD_IFCS);
+
+	i = tx_ring->next_to_use;
+
+	while (count--) {
+		buffer_info = &tx_ring->buffer_info[i];
+		tx_desc = E1000_TX_DESC(*tx_ring, i);
+		tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+		tx_desc->lower.data =
+			cpu_to_le32(txd_lower | buffer_info->length);
+		tx_desc->upper.data = cpu_to_le32(txd_upper);
+		if (unlikely(++i == tx_ring->count))
+			i = 0;
+	}
+
+	tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
+
+	/* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */
+	if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS))
+		tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS));
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	dma_wmb();
+
+	tx_ring->next_to_use = i;
+}
+
+/* 82547 workaround to avoid controller hang in half-duplex environment.
+ * The workaround is to avoid queuing a large packet that would span
+ * the internal Tx FIFO ring boundary by notifying the stack to resend
+ * the packet at a later time.  This gives the Tx FIFO an opportunity to
+ * flush all packets.  When that occurs, we reset the Tx FIFO pointers
+ * to the beginning of the Tx FIFO.
+ */
+
+#define E1000_FIFO_HDR			0x10
+#define E1000_82547_PAD_LEN		0x3E0
+
+static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
+				       struct sk_buff *skb)
+{
+	u32 fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
+	u32 skb_fifo_len = skb->len + E1000_FIFO_HDR;
+
+	skb_fifo_len = ALIGN(skb_fifo_len, E1000_FIFO_HDR);
+
+	if (adapter->link_duplex != HALF_DUPLEX)
+		goto no_fifo_stall_required;
+
+	if (atomic_read(&adapter->tx_fifo_stall))
+		return 1;
+
+	if (skb_fifo_len >= (E1000_82547_PAD_LEN + fifo_space)) {
+		atomic_set(&adapter->tx_fifo_stall, 1);
+		return 1;
+	}
+
+no_fifo_stall_required:
+	adapter->tx_fifo_head += skb_fifo_len;
+	if (adapter->tx_fifo_head >= adapter->tx_fifo_size)
+		adapter->tx_fifo_head -= adapter->tx_fifo_size;
+	return 0;
+}
+
+static int __e1000_maybe_stop_tx(struct net_device *netdev, int size)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_tx_ring *tx_ring = adapter->tx_ring;
+
+	netif_stop_queue(netdev);
+	/* Herbert's original patch had:
+	 *  smp_mb__after_netif_stop_queue();
+	 * but since that doesn't exist yet, just open code it.
+	 */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (likely(E1000_DESC_UNUSED(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! */
+	netif_start_queue(netdev);
+	++adapter->restart_queue;
+	return 0;
+}
+
+static int e1000_maybe_stop_tx(struct net_device *netdev,
+			       struct e1000_tx_ring *tx_ring, int size)
+{
+	if (likely(E1000_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+	return __e1000_maybe_stop_tx(netdev, size);
+}
+
+#define TXD_USE_COUNT(S, X) (((S) + ((1 << (X)) - 1)) >> (X))
+static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_tx_ring *tx_ring;
+	unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD;
+	unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
+	unsigned int tx_flags = 0;
+	unsigned int len = skb_headlen(skb);
+	unsigned int nr_frags;
+	unsigned int mss;
+	int count = 0;
+	int tso;
+	unsigned int f;
+	__be16 protocol = vlan_get_protocol(skb);
+
+	/* This goes back to the question of how to logically map a Tx queue
+	 * to a flow.  Right now, performance is impacted slightly negatively
+	 * if using multiple Tx queues.  If the stack breaks away from a
+	 * single qdisc implementation, we can look at this again.
+	 */
+	tx_ring = adapter->tx_ring;
+
+	/* On PCI/PCI-X HW, if packet size is less than ETH_ZLEN,
+	 * packets may get corrupted during padding by HW.
+	 * To WA this issue, pad all small packets manually.
+	 */
+	if (eth_skb_pad(skb))
+		return NETDEV_TX_OK;
+
+	mss = skb_shinfo(skb)->gso_size;
+	/* The controller does a simple calculation to
+	 * make sure there is enough room in the FIFO before
+	 * initiating the DMA for each buffer.  The calc is:
+	 * 4 = ceil(buffer len/mss).  To make sure we don't
+	 * overrun the FIFO, adjust the max buffer len if mss
+	 * drops.
+	 */
+	if (mss) {
+		u8 hdr_len;
+		max_per_txd = min(mss << 2, max_per_txd);
+		max_txd_pwr = fls(max_per_txd) - 1;
+
+		hdr_len = skb_tcp_all_headers(skb);
+		if (skb->data_len && hdr_len == len) {
+			switch (hw->mac_type) {
+			case e1000_82544: {
+				unsigned int pull_size;
+
+				/* Make sure we have room to chop off 4 bytes,
+				 * and that the end alignment will work out to
+				 * this hardware's requirements
+				 * NOTE: this is a TSO only workaround
+				 * if end byte alignment not correct move us
+				 * into the next dword
+				 */
+				if ((unsigned long)(skb_tail_pointer(skb) - 1)
+				    & 4)
+					break;
+				pull_size = min((unsigned int)4, skb->data_len);
+				if (!__pskb_pull_tail(skb, pull_size)) {
+					e_err(drv, "__pskb_pull_tail "
+					      "failed.\n");
+					dev_kfree_skb_any(skb);
+					return NETDEV_TX_OK;
+				}
+				len = skb_headlen(skb);
+				break;
+			}
+			default:
+				/* do nothing */
+				break;
+			}
+		}
+	}
+
+	/* reserve a descriptor for the offload context */
+	if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
+		count++;
+	count++;
+
+	/* Controller Erratum workaround */
+	if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
+		count++;
+
+	count += TXD_USE_COUNT(len, max_txd_pwr);
+
+	if (adapter->pcix_82544)
+		count++;
+
+	/* work-around for errata 10 and it applies to all controllers
+	 * in PCI-X mode, so add one more descriptor to the count
+	 */
+	if (unlikely((hw->bus_type == e1000_bus_type_pcix) &&
+			(len > 2015)))
+		count++;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	for (f = 0; f < nr_frags; f++)
+		count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]),
+				       max_txd_pwr);
+	if (adapter->pcix_82544)
+		count += nr_frags;
+
+	/* need: count + 2 desc gap to keep tail from touching
+	 * head, otherwise try next time
+	 */
+	if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2)))
+		return NETDEV_TX_BUSY;
+
+	if (unlikely((hw->mac_type == e1000_82547) &&
+		     (e1000_82547_fifo_workaround(adapter, skb)))) {
+		netif_stop_queue(netdev);
+		if (!test_bit(__E1000_DOWN, &adapter->flags))
+			schedule_delayed_work(&adapter->fifo_stall_task, 1);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= E1000_TX_FLAGS_VLAN;
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     E1000_TX_FLAGS_VLAN_SHIFT);
+	}
+
+	first = tx_ring->next_to_use;
+
+	tso = e1000_tso(adapter, tx_ring, skb, protocol);
+	if (tso < 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (likely(tso)) {
+		if (likely(hw->mac_type != e1000_82544))
+			tx_ring->last_tx_tso = true;
+		tx_flags |= E1000_TX_FLAGS_TSO;
+	} else if (likely(e1000_tx_csum(adapter, tx_ring, skb, protocol)))
+		tx_flags |= E1000_TX_FLAGS_CSUM;
+
+	if (protocol == htons(ETH_P_IP))
+		tx_flags |= E1000_TX_FLAGS_IPV4;
+
+	if (unlikely(skb->no_fcs))
+		tx_flags |= E1000_TX_FLAGS_NO_FCS;
+
+	count = e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd,
+			     nr_frags, mss);
+
+	if (count) {
+		/* The descriptors needed is higher than other Intel drivers
+		 * due to a number of workarounds.  The breakdown is below:
+		 * Data descriptors: MAX_SKB_FRAGS + 1
+		 * Context Descriptor: 1
+		 * Keep head from touching tail: 2
+		 * Workarounds: 3
+		 */
+		int desc_needed = MAX_SKB_FRAGS + 7;
+
+		netdev_sent_queue(netdev, skb->len);
+		skb_tx_timestamp(skb);
+
+		e1000_tx_queue(adapter, tx_ring, tx_flags, count);
+
+		/* 82544 potentially requires twice as many data descriptors
+		 * in order to guarantee buffers don't end on evenly-aligned
+		 * dwords
+		 */
+		if (adapter->pcix_82544)
+			desc_needed += MAX_SKB_FRAGS + 1;
+
+		/* Make sure there is space in the ring for the next send. */
+		e1000_maybe_stop_tx(netdev, tx_ring, desc_needed);
+
+		if (!netdev_xmit_more() ||
+		    netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
+			writel(tx_ring->next_to_use, hw->hw_addr + tx_ring->tdt);
+		}
+	} else {
+		dev_kfree_skb_any(skb);
+		tx_ring->buffer_info[first].time_stamp = 0;
+		tx_ring->next_to_use = first;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+#define NUM_REGS 38 /* 1 based count */
+static void e1000_regdump(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 regs[NUM_REGS];
+	u32 *regs_buff = regs;
+	int i = 0;
+
+	static const char * const reg_name[] = {
+		"CTRL",  "STATUS",
+		"RCTL", "RDLEN", "RDH", "RDT", "RDTR",
+		"TCTL", "TDBAL", "TDBAH", "TDLEN", "TDH", "TDT",
+		"TIDV", "TXDCTL", "TADV", "TARC0",
+		"TDBAL1", "TDBAH1", "TDLEN1", "TDH1", "TDT1",
+		"TXDCTL1", "TARC1",
+		"CTRL_EXT", "ERT", "RDBAL", "RDBAH",
+		"TDFH", "TDFT", "TDFHS", "TDFTS", "TDFPC",
+		"RDFH", "RDFT", "RDFHS", "RDFTS", "RDFPC"
+	};
+
+	regs_buff[0]  = er32(CTRL);
+	regs_buff[1]  = er32(STATUS);
+
+	regs_buff[2]  = er32(RCTL);
+	regs_buff[3]  = er32(RDLEN);
+	regs_buff[4]  = er32(RDH);
+	regs_buff[5]  = er32(RDT);
+	regs_buff[6]  = er32(RDTR);
+
+	regs_buff[7]  = er32(TCTL);
+	regs_buff[8]  = er32(TDBAL);
+	regs_buff[9]  = er32(TDBAH);
+	regs_buff[10] = er32(TDLEN);
+	regs_buff[11] = er32(TDH);
+	regs_buff[12] = er32(TDT);
+	regs_buff[13] = er32(TIDV);
+	regs_buff[14] = er32(TXDCTL);
+	regs_buff[15] = er32(TADV);
+	regs_buff[16] = er32(TARC0);
+
+	regs_buff[17] = er32(TDBAL1);
+	regs_buff[18] = er32(TDBAH1);
+	regs_buff[19] = er32(TDLEN1);
+	regs_buff[20] = er32(TDH1);
+	regs_buff[21] = er32(TDT1);
+	regs_buff[22] = er32(TXDCTL1);
+	regs_buff[23] = er32(TARC1);
+	regs_buff[24] = er32(CTRL_EXT);
+	regs_buff[25] = er32(ERT);
+	regs_buff[26] = er32(RDBAL0);
+	regs_buff[27] = er32(RDBAH0);
+	regs_buff[28] = er32(TDFH);
+	regs_buff[29] = er32(TDFT);
+	regs_buff[30] = er32(TDFHS);
+	regs_buff[31] = er32(TDFTS);
+	regs_buff[32] = er32(TDFPC);
+	regs_buff[33] = er32(RDFH);
+	regs_buff[34] = er32(RDFT);
+	regs_buff[35] = er32(RDFHS);
+	regs_buff[36] = er32(RDFTS);
+	regs_buff[37] = er32(RDFPC);
+
+	pr_info("Register dump\n");
+	for (i = 0; i < NUM_REGS; i++)
+		pr_info("%-15s  %08x\n", reg_name[i], regs_buff[i]);
+}
+
+/*
+ * e1000_dump: Print registers, tx ring and rx ring
+ */
+static void e1000_dump(struct e1000_adapter *adapter)
+{
+	/* this code doesn't handle multiple rings */
+	struct e1000_tx_ring *tx_ring = adapter->tx_ring;
+	struct e1000_rx_ring *rx_ring = adapter->rx_ring;
+	int i;
+
+	if (!netif_msg_hw(adapter))
+		return;
+
+	/* Print Registers */
+	e1000_regdump(adapter);
+
+	/* transmit dump */
+	pr_info("TX Desc ring0 dump\n");
+
+	/* Transmit Descriptor Formats - DEXT[29] is 0 (Legacy) or 1 (Extended)
+	 *
+	 * Legacy Transmit Descriptor
+	 *   +--------------------------------------------------------------+
+	 * 0 |         Buffer Address [63:0] (Reserved on Write Back)       |
+	 *   +--------------------------------------------------------------+
+	 * 8 | Special  |    CSS     | Status |  CMD    |  CSO   |  Length  |
+	 *   +--------------------------------------------------------------+
+	 *   63       48 47        36 35    32 31     24 23    16 15        0
+	 *
+	 * Extended Context Descriptor (DTYP=0x0) for TSO or checksum offload
+	 *   63      48 47    40 39       32 31             16 15    8 7      0
+	 *   +----------------------------------------------------------------+
+	 * 0 |  TUCSE  | TUCS0  |   TUCSS   |     IPCSE       | IPCS0 | IPCSS |
+	 *   +----------------------------------------------------------------+
+	 * 8 |   MSS   | HDRLEN | RSV | STA | TUCMD | DTYP |      PAYLEN      |
+	 *   +----------------------------------------------------------------+
+	 *   63      48 47    40 39 36 35 32 31   24 23  20 19                0
+	 *
+	 * Extended Data Descriptor (DTYP=0x1)
+	 *   +----------------------------------------------------------------+
+	 * 0 |                     Buffer Address [63:0]                      |
+	 *   +----------------------------------------------------------------+
+	 * 8 | VLAN tag |  POPTS  | Rsvd | Status | Command | DTYP |  DTALEN  |
+	 *   +----------------------------------------------------------------+
+	 *   63       48 47     40 39  36 35    32 31     24 23  20 19        0
+	 */
+	pr_info("Tc[desc]     [Ce CoCsIpceCoS] [MssHlRSCm0Plen] [bi->dma       ] leng  ntw timestmp         bi->skb\n");
+	pr_info("Td[desc]     [address 63:0  ] [VlaPoRSCm1Dlen] [bi->dma       ] leng  ntw timestmp         bi->skb\n");
+
+	if (!netif_msg_tx_done(adapter))
+		goto rx_ring_summary;
+
+	for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
+		struct e1000_tx_desc *tx_desc = E1000_TX_DESC(*tx_ring, i);
+		struct e1000_tx_buffer *buffer_info = &tx_ring->buffer_info[i];
+		struct my_u { __le64 a; __le64 b; };
+		struct my_u *u = (struct my_u *)tx_desc;
+		const char *type;
+
+		if (i == tx_ring->next_to_use && i == tx_ring->next_to_clean)
+			type = "NTC/U";
+		else if (i == tx_ring->next_to_use)
+			type = "NTU";
+		else if (i == tx_ring->next_to_clean)
+			type = "NTC";
+		else
+			type = "";
+
+		pr_info("T%c[0x%03X]    %016llX %016llX %016llX %04X  %3X %016llX %p %s\n",
+			((le64_to_cpu(u->b) & (1<<20)) ? 'd' : 'c'), i,
+			le64_to_cpu(u->a), le64_to_cpu(u->b),
+			(u64)buffer_info->dma, buffer_info->length,
+			buffer_info->next_to_watch,
+			(u64)buffer_info->time_stamp, buffer_info->skb, type);
+	}
+
+rx_ring_summary:
+	/* receive dump */
+	pr_info("\nRX Desc ring dump\n");
+
+	/* Legacy Receive Descriptor Format
+	 *
+	 * +-----------------------------------------------------+
+	 * |                Buffer Address [63:0]                |
+	 * +-----------------------------------------------------+
+	 * | VLAN Tag | Errors | Status 0 | Packet csum | Length |
+	 * +-----------------------------------------------------+
+	 * 63       48 47    40 39      32 31         16 15      0
+	 */
+	pr_info("R[desc]      [address 63:0  ] [vl er S cks ln] [bi->dma       ] [bi->skb]\n");
+
+	if (!netif_msg_rx_status(adapter))
+		goto exit;
+
+	for (i = 0; rx_ring->desc && (i < rx_ring->count); i++) {
+		struct e1000_rx_desc *rx_desc = E1000_RX_DESC(*rx_ring, i);
+		struct e1000_rx_buffer *buffer_info = &rx_ring->buffer_info[i];
+		struct my_u { __le64 a; __le64 b; };
+		struct my_u *u = (struct my_u *)rx_desc;
+		const char *type;
+
+		if (i == rx_ring->next_to_use)
+			type = "NTU";
+		else if (i == rx_ring->next_to_clean)
+			type = "NTC";
+		else
+			type = "";
+
+		pr_info("R[0x%03X]     %016llX %016llX %016llX %p %s\n",
+			i, le64_to_cpu(u->a), le64_to_cpu(u->b),
+			(u64)buffer_info->dma, buffer_info->rxbuf.data, type);
+	} /* for */
+
+	/* dump the descriptor caches */
+	/* rx */
+	pr_info("Rx descriptor cache in 64bit format\n");
+	for (i = 0x6000; i <= 0x63FF ; i += 0x10) {
+		pr_info("R%04X: %08X|%08X %08X|%08X\n",
+			i,
+			readl(adapter->hw.hw_addr + i+4),
+			readl(adapter->hw.hw_addr + i),
+			readl(adapter->hw.hw_addr + i+12),
+			readl(adapter->hw.hw_addr + i+8));
+	}
+	/* tx */
+	pr_info("Tx descriptor cache in 64bit format\n");
+	for (i = 0x7000; i <= 0x73FF ; i += 0x10) {
+		pr_info("T%04X: %08X|%08X %08X|%08X\n",
+			i,
+			readl(adapter->hw.hw_addr + i+4),
+			readl(adapter->hw.hw_addr + i),
+			readl(adapter->hw.hw_addr + i+12),
+			readl(adapter->hw.hw_addr + i+8));
+	}
+exit:
+	return;
+}
+
+/**
+ * e1000_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: number of the Tx queue that hung (unused)
+ **/
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	/* Do the reset outside of interrupt context */
+	adapter->tx_timeout_count++;
+	schedule_work(&adapter->reset_task);
+}
+
+static void e1000_reset_task(struct work_struct *work)
+{
+	struct e1000_adapter *adapter =
+		container_of(work, struct e1000_adapter, reset_task);
+
+	e_err(drv, "Reset adapter\n");
+	e1000_reinit_locked(adapter);
+}
+
+/**
+ * e1000_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
+
+	/* Adapter-specific max frame size limits. */
+	switch (hw->mac_type) {
+	case e1000_undefined ... e1000_82542_rev2_1:
+		if (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)) {
+			e_err(probe, "Jumbo Frames not supported.\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		/* Capable of supporting up to MAX_JUMBO_FRAME_SIZE limit. */
+		break;
+	}
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
+		msleep(1);
+	/* e1000_down has a dependency on max_frame_size */
+	hw->max_frame_size = max_frame;
+	if (netif_running(netdev)) {
+		/* prevent buffers from being reallocated */
+		adapter->alloc_rx_buf = e1000_alloc_dummy_rx_buffers;
+		e1000_down(adapter);
+	}
+
+	/* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
+	 * means we reserve 2 more, this pushes us to allocate from the next
+	 * larger slab size.
+	 * i.e. RXBUFFER_2048 --> size-4096 slab
+	 * however with the new *_jumbo_rx* routines, jumbo receives will use
+	 * fragmented skbs
+	 */
+
+	if (max_frame <= E1000_RXBUFFER_2048)
+		adapter->rx_buffer_len = E1000_RXBUFFER_2048;
+	else
+#if (PAGE_SIZE >= E1000_RXBUFFER_16384)
+		adapter->rx_buffer_len = E1000_RXBUFFER_16384;
+#elif (PAGE_SIZE >= E1000_RXBUFFER_4096)
+		adapter->rx_buffer_len = PAGE_SIZE;
+#endif
+
+	/* adjust allocation if LPE protects us, and we aren't using SBP */
+	if (!hw->tbi_compatibility_on &&
+	    ((max_frame == (ETH_FRAME_LEN + ETH_FCS_LEN)) ||
+	     (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE)))
+		adapter->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
+
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		e1000_up(adapter);
+	else
+		e1000_reset(adapter);
+
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+
+	return 0;
+}
+
+/**
+ * e1000_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ **/
+void e1000_update_stats(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned long flags;
+	u16 phy_tmp;
+
+#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
+
+	/* Prevent stats update while adapter is being reset, or if the pci
+	 * connection is down.
+	 */
+	if (adapter->link_speed == 0)
+		return;
+	if (pci_channel_offline(pdev))
+		return;
+
+	spin_lock_irqsave(&adapter->stats_lock, flags);
+
+	/* these counters are modified from e1000_tbi_adjust_stats,
+	 * called from the interrupt context, so they must only
+	 * be written while holding adapter->stats_lock
+	 */
+
+	adapter->stats.crcerrs += er32(CRCERRS);
+	adapter->stats.gprc += er32(GPRC);
+	adapter->stats.gorcl += er32(GORCL);
+	adapter->stats.gorch += er32(GORCH);
+	adapter->stats.bprc += er32(BPRC);
+	adapter->stats.mprc += er32(MPRC);
+	adapter->stats.roc += er32(ROC);
+
+	adapter->stats.prc64 += er32(PRC64);
+	adapter->stats.prc127 += er32(PRC127);
+	adapter->stats.prc255 += er32(PRC255);
+	adapter->stats.prc511 += er32(PRC511);
+	adapter->stats.prc1023 += er32(PRC1023);
+	adapter->stats.prc1522 += er32(PRC1522);
+
+	adapter->stats.symerrs += er32(SYMERRS);
+	adapter->stats.mpc += er32(MPC);
+	adapter->stats.scc += er32(SCC);
+	adapter->stats.ecol += er32(ECOL);
+	adapter->stats.mcc += er32(MCC);
+	adapter->stats.latecol += er32(LATECOL);
+	adapter->stats.dc += er32(DC);
+	adapter->stats.sec += er32(SEC);
+	adapter->stats.rlec += er32(RLEC);
+	adapter->stats.xonrxc += er32(XONRXC);
+	adapter->stats.xontxc += er32(XONTXC);
+	adapter->stats.xoffrxc += er32(XOFFRXC);
+	adapter->stats.xofftxc += er32(XOFFTXC);
+	adapter->stats.fcruc += er32(FCRUC);
+	adapter->stats.gptc += er32(GPTC);
+	adapter->stats.gotcl += er32(GOTCL);
+	adapter->stats.gotch += er32(GOTCH);
+	adapter->stats.rnbc += er32(RNBC);
+	adapter->stats.ruc += er32(RUC);
+	adapter->stats.rfc += er32(RFC);
+	adapter->stats.rjc += er32(RJC);
+	adapter->stats.torl += er32(TORL);
+	adapter->stats.torh += er32(TORH);
+	adapter->stats.totl += er32(TOTL);
+	adapter->stats.toth += er32(TOTH);
+	adapter->stats.tpr += er32(TPR);
+
+	adapter->stats.ptc64 += er32(PTC64);
+	adapter->stats.ptc127 += er32(PTC127);
+	adapter->stats.ptc255 += er32(PTC255);
+	adapter->stats.ptc511 += er32(PTC511);
+	adapter->stats.ptc1023 += er32(PTC1023);
+	adapter->stats.ptc1522 += er32(PTC1522);
+
+	adapter->stats.mptc += er32(MPTC);
+	adapter->stats.bptc += er32(BPTC);
+
+	/* used for adaptive IFS */
+
+	hw->tx_packet_delta = er32(TPT);
+	adapter->stats.tpt += hw->tx_packet_delta;
+	hw->collision_delta = er32(COLC);
+	adapter->stats.colc += hw->collision_delta;
+
+	if (hw->mac_type >= e1000_82543) {
+		adapter->stats.algnerrc += er32(ALGNERRC);
+		adapter->stats.rxerrc += er32(RXERRC);
+		adapter->stats.tncrs += er32(TNCRS);
+		adapter->stats.cexterr += er32(CEXTERR);
+		adapter->stats.tsctc += er32(TSCTC);
+		adapter->stats.tsctfc += er32(TSCTFC);
+	}
+
+	/* Fill out the OS statistics structure */
+	netdev->stats.multicast = adapter->stats.mprc;
+	netdev->stats.collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	/* RLEC on some newer hardware can be incorrect so build
+	 * our own version based on RUC and ROC
+	 */
+	netdev->stats.rx_errors = adapter->stats.rxerrc +
+		adapter->stats.crcerrs + adapter->stats.algnerrc +
+		adapter->stats.ruc + adapter->stats.roc +
+		adapter->stats.cexterr;
+	adapter->stats.rlerrc = adapter->stats.ruc + adapter->stats.roc;
+	netdev->stats.rx_length_errors = adapter->stats.rlerrc;
+	netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
+	netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
+	netdev->stats.rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+	adapter->stats.txerrc = adapter->stats.ecol + adapter->stats.latecol;
+	netdev->stats.tx_errors = adapter->stats.txerrc;
+	netdev->stats.tx_aborted_errors = adapter->stats.ecol;
+	netdev->stats.tx_window_errors = adapter->stats.latecol;
+	netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
+	if (hw->bad_tx_carr_stats_fd &&
+	    adapter->link_duplex == FULL_DUPLEX) {
+		netdev->stats.tx_carrier_errors = 0;
+		adapter->stats.tncrs = 0;
+	}
+
+	/* Tx Dropped needs to be maintained elsewhere */
+
+	/* Phy Stats */
+	if (hw->media_type == e1000_media_type_copper) {
+		if ((adapter->link_speed == SPEED_1000) &&
+		   (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
+			phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
+			adapter->phy_stats.idle_errors += phy_tmp;
+		}
+
+		if ((hw->mac_type <= e1000_82546) &&
+		   (hw->phy_type == e1000_phy_m88) &&
+		   !e1000_read_phy_reg(hw, M88E1000_RX_ERR_CNTR, &phy_tmp))
+			adapter->phy_stats.receive_errors += phy_tmp;
+	}
+
+	/* Management Stats */
+	if (hw->has_smbus) {
+		adapter->stats.mgptc += er32(MGTPTC);
+		adapter->stats.mgprc += er32(MGTPRC);
+		adapter->stats.mgpdc += er32(MGTPDC);
+	}
+
+	spin_unlock_irqrestore(&adapter->stats_lock, flags);
+}
+
+/**
+ * e1000_intr - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t e1000_intr(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 icr = er32(ICR);
+
+	if (unlikely((!icr)))
+		return IRQ_NONE;  /* Not our interrupt */
+
+	/* we might have caused the interrupt, but the above
+	 * read cleared it, and just in case the driver is
+	 * down there is nothing to do so return handled
+	 */
+	if (unlikely(test_bit(__E1000_DOWN, &adapter->flags)))
+		return IRQ_HANDLED;
+
+	if (unlikely(icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))) {
+		hw->get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__E1000_DOWN, &adapter->flags))
+			schedule_delayed_work(&adapter->watchdog_task, 1);
+	}
+
+	/* disable interrupts, without the synchronize_irq bit */
+	ew32(IMC, ~0);
+	E1000_WRITE_FLUSH();
+
+	if (likely(napi_schedule_prep(&adapter->napi))) {
+		adapter->total_tx_bytes = 0;
+		adapter->total_tx_packets = 0;
+		adapter->total_rx_bytes = 0;
+		adapter->total_rx_packets = 0;
+		__napi_schedule(&adapter->napi);
+	} else {
+		/* this really should not happen! if it does it is basically a
+		 * bug, but not a hard error, so enable ints and continue
+		 */
+		if (!test_bit(__E1000_DOWN, &adapter->flags))
+			e1000_irq_enable(adapter);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * e1000_clean - NAPI Rx polling callback
+ * @napi: napi struct containing references to driver info
+ * @budget: budget given to driver for receive packets
+ **/
+static int e1000_clean(struct napi_struct *napi, int budget)
+{
+	struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter,
+						     napi);
+	int tx_clean_complete = 0, work_done = 0;
+
+	tx_clean_complete = e1000_clean_tx_irq(adapter, &adapter->tx_ring[0]);
+
+	adapter->clean_rx(adapter, &adapter->rx_ring[0], &work_done, budget);
+
+	if (!tx_clean_complete || work_done == budget)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (likely(adapter->itr_setting & 3))
+			e1000_set_itr(adapter);
+		if (!test_bit(__E1000_DOWN, &adapter->flags))
+			e1000_irq_enable(adapter);
+	}
+
+	return work_done;
+}
+
+/**
+ * e1000_clean_tx_irq - Reclaim resources after transmit completes
+ * @adapter: board private structure
+ * @tx_ring: ring to clean
+ **/
+static bool e1000_clean_tx_irq(struct e1000_adapter *adapter,
+			       struct e1000_tx_ring *tx_ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_tx_desc *tx_desc, *eop_desc;
+	struct e1000_tx_buffer *buffer_info;
+	unsigned int i, eop;
+	unsigned int count = 0;
+	unsigned int total_tx_bytes = 0, total_tx_packets = 0;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
+
+	i = tx_ring->next_to_clean;
+	eop = tx_ring->buffer_info[i].next_to_watch;
+	eop_desc = E1000_TX_DESC(*tx_ring, eop);
+
+	while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
+	       (count < tx_ring->count)) {
+		bool cleaned = false;
+		dma_rmb();	/* read buffer_info after eop_desc */
+		for ( ; !cleaned; count++) {
+			tx_desc = E1000_TX_DESC(*tx_ring, i);
+			buffer_info = &tx_ring->buffer_info[i];
+			cleaned = (i == eop);
+
+			if (cleaned) {
+				total_tx_packets += buffer_info->segs;
+				total_tx_bytes += buffer_info->bytecount;
+				if (buffer_info->skb) {
+					bytes_compl += buffer_info->skb->len;
+					pkts_compl++;
+				}
+
+			}
+			e1000_unmap_and_free_tx_resource(adapter, buffer_info,
+							 64);
+			tx_desc->upper.data = 0;
+
+			if (unlikely(++i == tx_ring->count))
+				i = 0;
+		}
+
+		eop = tx_ring->buffer_info[i].next_to_watch;
+		eop_desc = E1000_TX_DESC(*tx_ring, eop);
+	}
+
+	/* Synchronize with E1000_DESC_UNUSED called from e1000_xmit_frame,
+	 * which will reuse the cleaned buffers.
+	 */
+	smp_store_release(&tx_ring->next_to_clean, i);
+
+	netdev_completed_queue(netdev, pkts_compl, bytes_compl);
+
+#define TX_WAKE_THRESHOLD 32
+	if (unlikely(count && netif_carrier_ok(netdev) &&
+		     E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+
+		if (netif_queue_stopped(netdev) &&
+		    !(test_bit(__E1000_DOWN, &adapter->flags))) {
+			netif_wake_queue(netdev);
+			++adapter->restart_queue;
+		}
+	}
+
+	if (adapter->detect_tx_hung) {
+		/* Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i
+		 */
+		adapter->detect_tx_hung = false;
+		if (tx_ring->buffer_info[eop].time_stamp &&
+		    time_after(jiffies, tx_ring->buffer_info[eop].time_stamp +
+			       (adapter->tx_timeout_factor * HZ)) &&
+		    !(er32(STATUS) & E1000_STATUS_TXOFF)) {
+
+			/* detected Tx unit hang */
+			e_err(drv, "Detected Tx Unit Hang\n"
+			      "  Tx Queue             <%lu>\n"
+			      "  TDH                  <%x>\n"
+			      "  TDT                  <%x>\n"
+			      "  next_to_use          <%x>\n"
+			      "  next_to_clean        <%x>\n"
+			      "buffer_info[next_to_clean]\n"
+			      "  time_stamp           <%lx>\n"
+			      "  next_to_watch        <%x>\n"
+			      "  jiffies              <%lx>\n"
+			      "  next_to_watch.status <%x>\n",
+				(unsigned long)(tx_ring - adapter->tx_ring),
+				readl(hw->hw_addr + tx_ring->tdh),
+				readl(hw->hw_addr + tx_ring->tdt),
+				tx_ring->next_to_use,
+				tx_ring->next_to_clean,
+				tx_ring->buffer_info[eop].time_stamp,
+				eop,
+				jiffies,
+				eop_desc->upper.fields.status);
+			e1000_dump(adapter);
+			netif_stop_queue(netdev);
+		}
+	}
+	adapter->total_tx_bytes += total_tx_bytes;
+	adapter->total_tx_packets += total_tx_packets;
+	netdev->stats.tx_bytes += total_tx_bytes;
+	netdev->stats.tx_packets += total_tx_packets;
+	return count < tx_ring->count;
+}
+
+/**
+ * e1000_rx_checksum - Receive Checksum Offload for 82543
+ * @adapter:     board private structure
+ * @status_err:  receive descriptor status and error fields
+ * @csum:        receive descriptor csum field
+ * @skb:         socket buffer with received data
+ **/
+static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
+			      u32 csum, struct sk_buff *skb)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 status = (u16)status_err;
+	u8 errors = (u8)(status_err >> 24);
+
+	skb_checksum_none_assert(skb);
+
+	/* 82543 or newer only */
+	if (unlikely(hw->mac_type < e1000_82543))
+		return;
+	/* Ignore Checksum bit is set */
+	if (unlikely(status & E1000_RXD_STAT_IXSM))
+		return;
+	/* TCP/UDP checksum error bit is set */
+	if (unlikely(errors & E1000_RXD_ERR_TCPE)) {
+		/* let the stack verify checksum errors */
+		adapter->hw_csum_err++;
+		return;
+	}
+	/* TCP/UDP Checksum has not been calculated */
+	if (!(status & E1000_RXD_STAT_TCPCS))
+		return;
+
+	/* It must be a TCP or UDP packet with a valid checksum */
+	if (likely(status & E1000_RXD_STAT_TCPCS)) {
+		/* TCP checksum is good */
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+	adapter->hw_csum_good++;
+}
+
+/**
+ * e1000_consume_page - helper function for jumbo Rx path
+ * @bi: software descriptor shadow data
+ * @skb: skb being modified
+ * @length: length of data being added
+ **/
+static void e1000_consume_page(struct e1000_rx_buffer *bi, struct sk_buff *skb,
+			       u16 length)
+{
+	bi->rxbuf.page = NULL;
+	skb->len += length;
+	skb->data_len += length;
+	skb->truesize += PAGE_SIZE;
+}
+
+/**
+ * e1000_receive_skb - helper function to handle rx indications
+ * @adapter: board private structure
+ * @status: descriptor status field as written by hardware
+ * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
+ * @skb: pointer to sk_buff to be indicated to stack
+ */
+static void e1000_receive_skb(struct e1000_adapter *adapter, u8 status,
+			      __le16 vlan, struct sk_buff *skb)
+{
+	skb->protocol = eth_type_trans(skb, adapter->netdev);
+
+	if (status & E1000_RXD_STAT_VP) {
+		u16 vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+	napi_gro_receive(&adapter->napi, skb);
+}
+
+/**
+ * e1000_tbi_adjust_stats
+ * @hw: Struct containing variables accessed by shared code
+ * @stats: point to stats struct
+ * @frame_len: The length of the frame in question
+ * @mac_addr: The Ethernet destination address of the frame in question
+ *
+ * Adjusts the statistic counters when a frame is accepted by TBI_ACCEPT
+ */
+static void e1000_tbi_adjust_stats(struct e1000_hw *hw,
+				   struct e1000_hw_stats *stats,
+				   u32 frame_len, const u8 *mac_addr)
+{
+	u64 carry_bit;
+
+	/* First adjust the frame length. */
+	frame_len--;
+	/* We need to adjust the statistics counters, since the hardware
+	 * counters overcount this packet as a CRC error and undercount
+	 * the packet as a good packet
+	 */
+	/* This packet should not be counted as a CRC error. */
+	stats->crcerrs--;
+	/* This packet does count as a Good Packet Received. */
+	stats->gprc++;
+
+	/* Adjust the Good Octets received counters */
+	carry_bit = 0x80000000 & stats->gorcl;
+	stats->gorcl += frame_len;
+	/* If the high bit of Gorcl (the low 32 bits of the Good Octets
+	 * Received Count) was one before the addition,
+	 * AND it is zero after, then we lost the carry out,
+	 * need to add one to Gorch (Good Octets Received Count High).
+	 * This could be simplified if all environments supported
+	 * 64-bit integers.
+	 */
+	if (carry_bit && ((stats->gorcl & 0x80000000) == 0))
+		stats->gorch++;
+	/* Is this a broadcast or multicast?  Check broadcast first,
+	 * since the test for a multicast frame will test positive on
+	 * a broadcast frame.
+	 */
+	if (is_broadcast_ether_addr(mac_addr))
+		stats->bprc++;
+	else if (is_multicast_ether_addr(mac_addr))
+		stats->mprc++;
+
+	if (frame_len == hw->max_frame_size) {
+		/* In this case, the hardware has overcounted the number of
+		 * oversize frames.
+		 */
+		if (stats->roc > 0)
+			stats->roc--;
+	}
+
+	/* Adjust the bin counters when the extra byte put the frame in the
+	 * wrong bin. Remember that the frame_len was adjusted above.
+	 */
+	if (frame_len == 64) {
+		stats->prc64++;
+		stats->prc127--;
+	} else if (frame_len == 127) {
+		stats->prc127++;
+		stats->prc255--;
+	} else if (frame_len == 255) {
+		stats->prc255++;
+		stats->prc511--;
+	} else if (frame_len == 511) {
+		stats->prc511++;
+		stats->prc1023--;
+	} else if (frame_len == 1023) {
+		stats->prc1023++;
+		stats->prc1522--;
+	} else if (frame_len == 1522) {
+		stats->prc1522++;
+	}
+}
+
+static bool e1000_tbi_should_accept(struct e1000_adapter *adapter,
+				    u8 status, u8 errors,
+				    u32 length, const u8 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 last_byte = *(data + length - 1);
+
+	if (TBI_ACCEPT(hw, status, errors, length, last_byte)) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&adapter->stats_lock, irq_flags);
+		e1000_tbi_adjust_stats(hw, &adapter->stats, length, data);
+		spin_unlock_irqrestore(&adapter->stats_lock, irq_flags);
+
+		return true;
+	}
+
+	return false;
+}
+
+static struct sk_buff *e1000_alloc_rx_skb(struct e1000_adapter *adapter,
+					  unsigned int bufsz)
+{
+	struct sk_buff *skb = napi_alloc_skb(&adapter->napi, bufsz);
+
+	if (unlikely(!skb))
+		adapter->alloc_rx_buff_failed++;
+	return skb;
+}
+
+/**
+ * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy
+ * @adapter: board private structure
+ * @rx_ring: ring to clean
+ * @work_done: amount of napi work completed this call
+ * @work_to_do: max amount of work allowed for this call to do
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ */
+static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
+				     struct e1000_rx_ring *rx_ring,
+				     int *work_done, int work_to_do)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_rx_desc *rx_desc, *next_rxd;
+	struct e1000_rx_buffer *buffer_info, *next_buffer;
+	u32 length;
+	unsigned int i;
+	int cleaned_count = 0;
+	bool cleaned = false;
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+
+	i = rx_ring->next_to_clean;
+	rx_desc = E1000_RX_DESC(*rx_ring, i);
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (rx_desc->status & E1000_RXD_STAT_DD) {
+		struct sk_buff *skb;
+		u8 status;
+
+		if (*work_done >= work_to_do)
+			break;
+		(*work_done)++;
+		dma_rmb(); /* read descriptor and rx_buffer_info after status DD */
+
+		status = rx_desc->status;
+
+		if (++i == rx_ring->count)
+			i = 0;
+
+		next_rxd = E1000_RX_DESC(*rx_ring, i);
+		prefetch(next_rxd);
+
+		next_buffer = &rx_ring->buffer_info[i];
+
+		cleaned = true;
+		cleaned_count++;
+		dma_unmap_page(&pdev->dev, buffer_info->dma,
+			       adapter->rx_buffer_len, DMA_FROM_DEVICE);
+		buffer_info->dma = 0;
+
+		length = le16_to_cpu(rx_desc->length);
+
+		/* errors is only valid for DD + EOP descriptors */
+		if (unlikely((status & E1000_RXD_STAT_EOP) &&
+		    (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) {
+			u8 *mapped = page_address(buffer_info->rxbuf.page);
+
+			if (e1000_tbi_should_accept(adapter, status,
+						    rx_desc->errors,
+						    length, mapped)) {
+				length--;
+			} else if (netdev->features & NETIF_F_RXALL) {
+				goto process_skb;
+			} else {
+				/* an error means any chain goes out the window
+				 * too
+				 */
+				dev_kfree_skb(rx_ring->rx_skb_top);
+				rx_ring->rx_skb_top = NULL;
+				goto next_desc;
+			}
+		}
+
+#define rxtop rx_ring->rx_skb_top
+process_skb:
+		if (!(status & E1000_RXD_STAT_EOP)) {
+			/* this descriptor is only the beginning (or middle) */
+			if (!rxtop) {
+				/* this is the beginning of a chain */
+				rxtop = napi_get_frags(&adapter->napi);
+				if (!rxtop)
+					break;
+
+				skb_fill_page_desc(rxtop, 0,
+						   buffer_info->rxbuf.page,
+						   0, length);
+			} else {
+				/* this is the middle of a chain */
+				skb_fill_page_desc(rxtop,
+				    skb_shinfo(rxtop)->nr_frags,
+				    buffer_info->rxbuf.page, 0, length);
+			}
+			e1000_consume_page(buffer_info, rxtop, length);
+			goto next_desc;
+		} else {
+			if (rxtop) {
+				/* end of the chain */
+				skb_fill_page_desc(rxtop,
+				    skb_shinfo(rxtop)->nr_frags,
+				    buffer_info->rxbuf.page, 0, length);
+				skb = rxtop;
+				rxtop = NULL;
+				e1000_consume_page(buffer_info, skb, length);
+			} else {
+				struct page *p;
+				/* no chain, got EOP, this buf is the packet
+				 * copybreak to save the put_page/alloc_page
+				 */
+				p = buffer_info->rxbuf.page;
+				if (length <= copybreak) {
+					if (likely(!(netdev->features & NETIF_F_RXFCS)))
+						length -= 4;
+					skb = e1000_alloc_rx_skb(adapter,
+								 length);
+					if (!skb)
+						break;
+
+					memcpy(skb_tail_pointer(skb),
+					       page_address(p), length);
+
+					/* re-use the page, so don't erase
+					 * buffer_info->rxbuf.page
+					 */
+					skb_put(skb, length);
+					e1000_rx_checksum(adapter,
+							  status | rx_desc->errors << 24,
+							  le16_to_cpu(rx_desc->csum), skb);
+
+					total_rx_bytes += skb->len;
+					total_rx_packets++;
+
+					e1000_receive_skb(adapter, status,
+							  rx_desc->special, skb);
+					goto next_desc;
+				} else {
+					skb = napi_get_frags(&adapter->napi);
+					if (!skb) {
+						adapter->alloc_rx_buff_failed++;
+						break;
+					}
+					skb_fill_page_desc(skb, 0, p, 0,
+							   length);
+					e1000_consume_page(buffer_info, skb,
+							   length);
+				}
+			}
+		}
+
+		/* Receive Checksum Offload XXX recompute due to CRC strip? */
+		e1000_rx_checksum(adapter,
+				  (u32)(status) |
+				  ((u32)(rx_desc->errors) << 24),
+				  le16_to_cpu(rx_desc->csum), skb);
+
+		total_rx_bytes += (skb->len - 4); /* don't count FCS */
+		if (likely(!(netdev->features & NETIF_F_RXFCS)))
+			pskb_trim(skb, skb->len - 4);
+		total_rx_packets++;
+
+		if (status & E1000_RXD_STAT_VP) {
+			__le16 vlan = rx_desc->special;
+			u16 vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
+
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+		}
+
+		napi_gro_frags(&adapter->napi);
+
+next_desc:
+		rx_desc->status = 0;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+			adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		/* use prefetched values */
+		rx_desc = next_rxd;
+		buffer_info = next_buffer;
+	}
+	rx_ring->next_to_clean = i;
+
+	cleaned_count = E1000_DESC_UNUSED(rx_ring);
+	if (cleaned_count)
+		adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+
+	adapter->total_rx_packets += total_rx_packets;
+	adapter->total_rx_bytes += total_rx_bytes;
+	netdev->stats.rx_bytes += total_rx_bytes;
+	netdev->stats.rx_packets += total_rx_packets;
+	return cleaned;
+}
+
+/* this should improve performance for small packets with large amounts
+ * of reassembly being done in the stack
+ */
+static struct sk_buff *e1000_copybreak(struct e1000_adapter *adapter,
+				       struct e1000_rx_buffer *buffer_info,
+				       u32 length, const void *data)
+{
+	struct sk_buff *skb;
+
+	if (length > copybreak)
+		return NULL;
+
+	skb = e1000_alloc_rx_skb(adapter, length);
+	if (!skb)
+		return NULL;
+
+	dma_sync_single_for_cpu(&adapter->pdev->dev, buffer_info->dma,
+				length, DMA_FROM_DEVICE);
+
+	skb_put_data(skb, data, length);
+
+	return skb;
+}
+
+/**
+ * e1000_clean_rx_irq - Send received data up the network stack; legacy
+ * @adapter: board private structure
+ * @rx_ring: ring to clean
+ * @work_done: amount of napi work completed this call
+ * @work_to_do: max amount of work allowed for this call to do
+ */
+static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
+			       struct e1000_rx_ring *rx_ring,
+			       int *work_done, int work_to_do)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_rx_desc *rx_desc, *next_rxd;
+	struct e1000_rx_buffer *buffer_info, *next_buffer;
+	u32 length;
+	unsigned int i;
+	int cleaned_count = 0;
+	bool cleaned = false;
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+
+	i = rx_ring->next_to_clean;
+	rx_desc = E1000_RX_DESC(*rx_ring, i);
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (rx_desc->status & E1000_RXD_STAT_DD) {
+		struct sk_buff *skb;
+		u8 *data;
+		u8 status;
+
+		if (*work_done >= work_to_do)
+			break;
+		(*work_done)++;
+		dma_rmb(); /* read descriptor and rx_buffer_info after status DD */
+
+		status = rx_desc->status;
+		length = le16_to_cpu(rx_desc->length);
+
+		data = buffer_info->rxbuf.data;
+		prefetch(data);
+		skb = e1000_copybreak(adapter, buffer_info, length, data);
+		if (!skb) {
+			unsigned int frag_len = e1000_frag_len(adapter);
+
+			skb = napi_build_skb(data - E1000_HEADROOM, frag_len);
+			if (!skb) {
+				adapter->alloc_rx_buff_failed++;
+				break;
+			}
+
+			skb_reserve(skb, E1000_HEADROOM);
+			dma_unmap_single(&pdev->dev, buffer_info->dma,
+					 adapter->rx_buffer_len,
+					 DMA_FROM_DEVICE);
+			buffer_info->dma = 0;
+			buffer_info->rxbuf.data = NULL;
+		}
+
+		if (++i == rx_ring->count)
+			i = 0;
+
+		next_rxd = E1000_RX_DESC(*rx_ring, i);
+		prefetch(next_rxd);
+
+		next_buffer = &rx_ring->buffer_info[i];
+
+		cleaned = true;
+		cleaned_count++;
+
+		/* !EOP means multiple descriptors were used to store a single
+		 * packet, if thats the case we need to toss it.  In fact, we
+		 * to toss every packet with the EOP bit clear and the next
+		 * frame that _does_ have the EOP bit set, as it is by
+		 * definition only a frame fragment
+		 */
+		if (unlikely(!(status & E1000_RXD_STAT_EOP)))
+			adapter->discarding = true;
+
+		if (adapter->discarding) {
+			/* All receives must fit into a single buffer */
+			netdev_dbg(netdev, "Receive packet consumed multiple buffers\n");
+			dev_kfree_skb(skb);
+			if (status & E1000_RXD_STAT_EOP)
+				adapter->discarding = false;
+			goto next_desc;
+		}
+
+		if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) {
+			if (e1000_tbi_should_accept(adapter, status,
+						    rx_desc->errors,
+						    length, data)) {
+				length--;
+			} else if (netdev->features & NETIF_F_RXALL) {
+				goto process_skb;
+			} else {
+				dev_kfree_skb(skb);
+				goto next_desc;
+			}
+		}
+
+process_skb:
+		total_rx_bytes += (length - 4); /* don't count FCS */
+		total_rx_packets++;
+
+		if (likely(!(netdev->features & NETIF_F_RXFCS)))
+			/* adjust length to remove Ethernet CRC, this must be
+			 * done after the TBI_ACCEPT workaround above
+			 */
+			length -= 4;
+
+		if (buffer_info->rxbuf.data == NULL)
+			skb_put(skb, length);
+		else /* copybreak skb */
+			skb_trim(skb, length);
+
+		/* Receive Checksum Offload */
+		e1000_rx_checksum(adapter,
+				  (u32)(status) |
+				  ((u32)(rx_desc->errors) << 24),
+				  le16_to_cpu(rx_desc->csum), skb);
+
+		e1000_receive_skb(adapter, status, rx_desc->special, skb);
+
+next_desc:
+		rx_desc->status = 0;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+			adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		/* use prefetched values */
+		rx_desc = next_rxd;
+		buffer_info = next_buffer;
+	}
+	rx_ring->next_to_clean = i;
+
+	cleaned_count = E1000_DESC_UNUSED(rx_ring);
+	if (cleaned_count)
+		adapter->alloc_rx_buf(adapter, rx_ring, cleaned_count);
+
+	adapter->total_rx_packets += total_rx_packets;
+	adapter->total_rx_bytes += total_rx_bytes;
+	netdev->stats.rx_bytes += total_rx_bytes;
+	netdev->stats.rx_packets += total_rx_packets;
+	return cleaned;
+}
+
+/**
+ * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers
+ * @adapter: address of board private structure
+ * @rx_ring: pointer to receive ring structure
+ * @cleaned_count: number of buffers to allocate this pass
+ **/
+static void
+e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
+			     struct e1000_rx_ring *rx_ring, int cleaned_count)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_rx_desc *rx_desc;
+	struct e1000_rx_buffer *buffer_info;
+	unsigned int i;
+
+	i = rx_ring->next_to_use;
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (cleaned_count--) {
+		/* allocate a new page if necessary */
+		if (!buffer_info->rxbuf.page) {
+			buffer_info->rxbuf.page = alloc_page(GFP_ATOMIC);
+			if (unlikely(!buffer_info->rxbuf.page)) {
+				adapter->alloc_rx_buff_failed++;
+				break;
+			}
+		}
+
+		if (!buffer_info->dma) {
+			buffer_info->dma = dma_map_page(&pdev->dev,
+							buffer_info->rxbuf.page, 0,
+							adapter->rx_buffer_len,
+							DMA_FROM_DEVICE);
+			if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+				put_page(buffer_info->rxbuf.page);
+				buffer_info->rxbuf.page = NULL;
+				buffer_info->dma = 0;
+				adapter->alloc_rx_buff_failed++;
+				break;
+			}
+		}
+
+		rx_desc = E1000_RX_DESC(*rx_ring, i);
+		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+
+		if (unlikely(++i == rx_ring->count))
+			i = 0;
+		buffer_info = &rx_ring->buffer_info[i];
+	}
+
+	if (likely(rx_ring->next_to_use != i)) {
+		rx_ring->next_to_use = i;
+		if (unlikely(i-- == 0))
+			i = (rx_ring->count - 1);
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		dma_wmb();
+		writel(i, adapter->hw.hw_addr + rx_ring->rdt);
+	}
+}
+
+/**
+ * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
+ * @adapter: address of board private structure
+ * @rx_ring: pointer to ring struct
+ * @cleaned_count: number of new Rx buffers to try to allocate
+ **/
+static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
+				   struct e1000_rx_ring *rx_ring,
+				   int cleaned_count)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_rx_desc *rx_desc;
+	struct e1000_rx_buffer *buffer_info;
+	unsigned int i;
+	unsigned int bufsz = adapter->rx_buffer_len;
+
+	i = rx_ring->next_to_use;
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (cleaned_count--) {
+		void *data;
+
+		if (buffer_info->rxbuf.data)
+			goto skip;
+
+		data = e1000_alloc_frag(adapter);
+		if (!data) {
+			/* Better luck next round */
+			adapter->alloc_rx_buff_failed++;
+			break;
+		}
+
+		/* Fix for errata 23, can't cross 64kB boundary */
+		if (!e1000_check_64k_bound(adapter, data, bufsz)) {
+			void *olddata = data;
+			e_err(rx_err, "skb align check failed: %u bytes at "
+			      "%p\n", bufsz, data);
+			/* Try again, without freeing the previous */
+			data = e1000_alloc_frag(adapter);
+			/* Failed allocation, critical failure */
+			if (!data) {
+				skb_free_frag(olddata);
+				adapter->alloc_rx_buff_failed++;
+				break;
+			}
+
+			if (!e1000_check_64k_bound(adapter, data, bufsz)) {
+				/* give up */
+				skb_free_frag(data);
+				skb_free_frag(olddata);
+				adapter->alloc_rx_buff_failed++;
+				break;
+			}
+
+			/* Use new allocation */
+			skb_free_frag(olddata);
+		}
+		buffer_info->dma = dma_map_single(&pdev->dev,
+						  data,
+						  adapter->rx_buffer_len,
+						  DMA_FROM_DEVICE);
+		if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+			skb_free_frag(data);
+			buffer_info->dma = 0;
+			adapter->alloc_rx_buff_failed++;
+			break;
+		}
+
+		/* XXX if it was allocated cleanly it will never map to a
+		 * boundary crossing
+		 */
+
+		/* Fix for errata 23, can't cross 64kB boundary */
+		if (!e1000_check_64k_bound(adapter,
+					(void *)(unsigned long)buffer_info->dma,
+					adapter->rx_buffer_len)) {
+			e_err(rx_err, "dma align check failed: %u bytes at "
+			      "%p\n", adapter->rx_buffer_len,
+			      (void *)(unsigned long)buffer_info->dma);
+
+			dma_unmap_single(&pdev->dev, buffer_info->dma,
+					 adapter->rx_buffer_len,
+					 DMA_FROM_DEVICE);
+
+			skb_free_frag(data);
+			buffer_info->rxbuf.data = NULL;
+			buffer_info->dma = 0;
+
+			adapter->alloc_rx_buff_failed++;
+			break;
+		}
+		buffer_info->rxbuf.data = data;
+ skip:
+		rx_desc = E1000_RX_DESC(*rx_ring, i);
+		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+
+		if (unlikely(++i == rx_ring->count))
+			i = 0;
+		buffer_info = &rx_ring->buffer_info[i];
+	}
+
+	if (likely(rx_ring->next_to_use != i)) {
+		rx_ring->next_to_use = i;
+		if (unlikely(i-- == 0))
+			i = (rx_ring->count - 1);
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		dma_wmb();
+		writel(i, hw->hw_addr + rx_ring->rdt);
+	}
+}
+
+/**
+ * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers.
+ * @adapter: address of board private structure
+ **/
+static void e1000_smartspeed(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 phy_status;
+	u16 phy_ctrl;
+
+	if ((hw->phy_type != e1000_phy_igp) || !hw->autoneg ||
+	   !(hw->autoneg_advertised & ADVERTISE_1000_FULL))
+		return;
+
+	if (adapter->smartspeed == 0) {
+		/* If Master/Slave config fault is asserted twice,
+		 * we assume back-to-back
+		 */
+		e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_status);
+		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT))
+			return;
+		e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_status);
+		if (!(phy_status & SR_1000T_MS_CONFIG_FAULT))
+			return;
+		e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_ctrl);
+		if (phy_ctrl & CR_1000T_MS_ENABLE) {
+			phy_ctrl &= ~CR_1000T_MS_ENABLE;
+			e1000_write_phy_reg(hw, PHY_1000T_CTRL,
+					    phy_ctrl);
+			adapter->smartspeed++;
+			if (!e1000_phy_setup_autoneg(hw) &&
+			   !e1000_read_phy_reg(hw, PHY_CTRL,
+					       &phy_ctrl)) {
+				phy_ctrl |= (MII_CR_AUTO_NEG_EN |
+					     MII_CR_RESTART_AUTO_NEG);
+				e1000_write_phy_reg(hw, PHY_CTRL,
+						    phy_ctrl);
+			}
+		}
+		return;
+	} else if (adapter->smartspeed == E1000_SMARTSPEED_DOWNSHIFT) {
+		/* If still no link, perhaps using 2/3 pair cable */
+		e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_ctrl);
+		phy_ctrl |= CR_1000T_MS_ENABLE;
+		e1000_write_phy_reg(hw, PHY_1000T_CTRL, phy_ctrl);
+		if (!e1000_phy_setup_autoneg(hw) &&
+		   !e1000_read_phy_reg(hw, PHY_CTRL, &phy_ctrl)) {
+			phy_ctrl |= (MII_CR_AUTO_NEG_EN |
+				     MII_CR_RESTART_AUTO_NEG);
+			e1000_write_phy_reg(hw, PHY_CTRL, phy_ctrl);
+		}
+	}
+	/* Restart process after E1000_SMARTSPEED_MAX iterations */
+	if (adapter->smartspeed++ == E1000_SMARTSPEED_MAX)
+		adapter->smartspeed = 0;
+}
+
+/**
+ * e1000_ioctl - handle ioctl calls
+ * @netdev: pointer to our netdev
+ * @ifr: pointer to interface request structure
+ * @cmd: ioctl data
+ **/
+static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSMIIREG:
+		return e1000_mii_ioctl(netdev, ifr, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * e1000_mii_ioctl -
+ * @netdev: pointer to our netdev
+ * @ifr: pointer to interface request structure
+ * @cmd: ioctl data
+ **/
+static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
+			   int cmd)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct mii_ioctl_data *data = if_mii(ifr);
+	int retval;
+	u16 mii_reg;
+	unsigned long flags;
+
+	if (hw->media_type != e1000_media_type_copper)
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = hw->phy_addr;
+		break;
+	case SIOCGMIIREG:
+		spin_lock_irqsave(&adapter->stats_lock, flags);
+		if (e1000_read_phy_reg(hw, data->reg_num & 0x1F,
+				   &data->val_out)) {
+			spin_unlock_irqrestore(&adapter->stats_lock, flags);
+			return -EIO;
+		}
+		spin_unlock_irqrestore(&adapter->stats_lock, flags);
+		break;
+	case SIOCSMIIREG:
+		if (data->reg_num & ~(0x1F))
+			return -EFAULT;
+		mii_reg = data->val_in;
+		spin_lock_irqsave(&adapter->stats_lock, flags);
+		if (e1000_write_phy_reg(hw, data->reg_num,
+					mii_reg)) {
+			spin_unlock_irqrestore(&adapter->stats_lock, flags);
+			return -EIO;
+		}
+		spin_unlock_irqrestore(&adapter->stats_lock, flags);
+		if (hw->media_type == e1000_media_type_copper) {
+			switch (data->reg_num) {
+			case PHY_CTRL:
+				if (mii_reg & MII_CR_POWER_DOWN)
+					break;
+				if (mii_reg & MII_CR_AUTO_NEG_EN) {
+					hw->autoneg = 1;
+					hw->autoneg_advertised = 0x2F;
+				} else {
+					u32 speed;
+					if (mii_reg & 0x40)
+						speed = SPEED_1000;
+					else if (mii_reg & 0x2000)
+						speed = SPEED_100;
+					else
+						speed = SPEED_10;
+					retval = e1000_set_spd_dplx(
+						adapter, speed,
+						((mii_reg & 0x100)
+						 ? DUPLEX_FULL :
+						 DUPLEX_HALF));
+					if (retval)
+						return retval;
+				}
+				if (netif_running(adapter->netdev))
+					e1000_reinit_locked(adapter);
+				else
+					e1000_reset(adapter);
+				break;
+			case M88E1000_PHY_SPEC_CTRL:
+			case M88E1000_EXT_PHY_SPEC_CTRL:
+				if (e1000_phy_reset(hw))
+					return -EIO;
+				break;
+			}
+		} else {
+			switch (data->reg_num) {
+			case PHY_CTRL:
+				if (mii_reg & MII_CR_POWER_DOWN)
+					break;
+				if (netif_running(adapter->netdev))
+					e1000_reinit_locked(adapter);
+				else
+					e1000_reset(adapter);
+				break;
+			}
+		}
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return E1000_SUCCESS;
+}
+
+void e1000_pci_set_mwi(struct e1000_hw *hw)
+{
+	struct e1000_adapter *adapter = hw->back;
+	int ret_val = pci_set_mwi(adapter->pdev);
+
+	if (ret_val)
+		e_err(probe, "Error in setting MWI\n");
+}
+
+void e1000_pci_clear_mwi(struct e1000_hw *hw)
+{
+	struct e1000_adapter *adapter = hw->back;
+
+	pci_clear_mwi(adapter->pdev);
+}
+
+int e1000_pcix_get_mmrbc(struct e1000_hw *hw)
+{
+	struct e1000_adapter *adapter = hw->back;
+	return pcix_get_mmrbc(adapter->pdev);
+}
+
+void e1000_pcix_set_mmrbc(struct e1000_hw *hw, int mmrbc)
+{
+	struct e1000_adapter *adapter = hw->back;
+	pcix_set_mmrbc(adapter->pdev, mmrbc);
+}
+
+void e1000_io_write(struct e1000_hw *hw, unsigned long port, u32 value)
+{
+	outl(value, port);
+}
+
+static bool e1000_vlan_used(struct e1000_adapter *adapter)
+{
+	u16 vid;
+
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+		return true;
+	return false;
+}
+
+static void __e1000_vlan_mode(struct e1000_adapter *adapter,
+			      netdev_features_t features)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl;
+
+	ctrl = er32(CTRL);
+	if (features & NETIF_F_HW_VLAN_CTAG_RX) {
+		/* enable VLAN tag insert/strip */
+		ctrl |= E1000_CTRL_VME;
+	} else {
+		/* disable VLAN tag insert/strip */
+		ctrl &= ~E1000_CTRL_VME;
+	}
+	ew32(CTRL, ctrl);
+}
+static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
+				     bool filter_on)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	if (!test_bit(__E1000_DOWN, &adapter->flags))
+		e1000_irq_disable(adapter);
+
+	__e1000_vlan_mode(adapter, adapter->netdev->features);
+	if (filter_on) {
+		/* enable VLAN receive filtering */
+		rctl = er32(RCTL);
+		rctl &= ~E1000_RCTL_CFIEN;
+		if (!(adapter->netdev->flags & IFF_PROMISC))
+			rctl |= E1000_RCTL_VFE;
+		ew32(RCTL, rctl);
+		e1000_update_mng_vlan(adapter);
+	} else {
+		/* disable VLAN receive filtering */
+		rctl = er32(RCTL);
+		rctl &= ~E1000_RCTL_VFE;
+		ew32(RCTL, rctl);
+	}
+
+	if (!test_bit(__E1000_DOWN, &adapter->flags))
+		e1000_irq_enable(adapter);
+}
+
+static void e1000_vlan_mode(struct net_device *netdev,
+			    netdev_features_t features)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (!test_bit(__E1000_DOWN, &adapter->flags))
+		e1000_irq_disable(adapter);
+
+	__e1000_vlan_mode(adapter, features);
+
+	if (!test_bit(__E1000_DOWN, &adapter->flags))
+		e1000_irq_enable(adapter);
+}
+
+static int e1000_vlan_rx_add_vid(struct net_device *netdev,
+				 __be16 proto, u16 vid)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vfta, index;
+
+	if ((hw->mng_cookie.status &
+	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) &&
+	    (vid == adapter->mng_vlan_id))
+		return 0;
+
+	if (!e1000_vlan_used(adapter))
+		e1000_vlan_filter_on_off(adapter, true);
+
+	/* add VID to filter table */
+	index = (vid >> 5) & 0x7F;
+	vfta = E1000_READ_REG_ARRAY(hw, VFTA, index);
+	vfta |= (1 << (vid & 0x1F));
+	e1000_write_vfta(hw, index, vfta);
+
+	set_bit(vid, adapter->active_vlans);
+
+	return 0;
+}
+
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev,
+				  __be16 proto, u16 vid)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vfta, index;
+
+	if (!test_bit(__E1000_DOWN, &adapter->flags))
+		e1000_irq_disable(adapter);
+	if (!test_bit(__E1000_DOWN, &adapter->flags))
+		e1000_irq_enable(adapter);
+
+	/* remove VID from filter table */
+	index = (vid >> 5) & 0x7F;
+	vfta = E1000_READ_REG_ARRAY(hw, VFTA, index);
+	vfta &= ~(1 << (vid & 0x1F));
+	e1000_write_vfta(hw, index, vfta);
+
+	clear_bit(vid, adapter->active_vlans);
+
+	if (!e1000_vlan_used(adapter))
+		e1000_vlan_filter_on_off(adapter, false);
+
+	return 0;
+}
+
+static void e1000_restore_vlan(struct e1000_adapter *adapter)
+{
+	u16 vid;
+
+	if (!e1000_vlan_used(adapter))
+		return;
+
+	e1000_vlan_filter_on_off(adapter, true);
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+		e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
+}
+
+int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	hw->autoneg = 0;
+
+	/* Make sure dplx is at most 1 bit and lsb of speed is not set
+	 * for the switch() below to work
+	 */
+	if ((spd & 1) || (dplx & ~1))
+		goto err_inval;
+
+	/* Fiber NICs only allow 1000 gbps Full duplex */
+	if ((hw->media_type == e1000_media_type_fiber) &&
+	    spd != SPEED_1000 &&
+	    dplx != DUPLEX_FULL)
+		goto err_inval;
+
+	switch (spd + dplx) {
+	case SPEED_10 + DUPLEX_HALF:
+		hw->forced_speed_duplex = e1000_10_half;
+		break;
+	case SPEED_10 + DUPLEX_FULL:
+		hw->forced_speed_duplex = e1000_10_full;
+		break;
+	case SPEED_100 + DUPLEX_HALF:
+		hw->forced_speed_duplex = e1000_100_half;
+		break;
+	case SPEED_100 + DUPLEX_FULL:
+		hw->forced_speed_duplex = e1000_100_full;
+		break;
+	case SPEED_1000 + DUPLEX_FULL:
+		hw->autoneg = 1;
+		hw->autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_HALF: /* not supported */
+	default:
+		goto err_inval;
+	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	hw->mdix = AUTO_ALL_MODES;
+
+	return 0;
+
+err_inval:
+	e_err(probe, "Unsupported Speed/Duplex configuration\n");
+	return -EINVAL;
+}
+
+static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl, ctrl_ext, rctl, status;
+	u32 wufc = adapter->wol;
+
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev)) {
+		int count = E1000_CHECK_RESET_COUNT;
+
+		while (test_bit(__E1000_RESETTING, &adapter->flags) && count--)
+			usleep_range(10000, 20000);
+
+		WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags));
+		e1000_down(adapter);
+	}
+
+	status = er32(STATUS);
+	if (status & E1000_STATUS_LU)
+		wufc &= ~E1000_WUFC_LNKC;
+
+	if (wufc) {
+		e1000_setup_rctl(adapter);
+		e1000_set_rx_mode(netdev);
+
+		rctl = er32(RCTL);
+
+		/* turn on all-multi mode if wake on multicast is enabled */
+		if (wufc & E1000_WUFC_MC)
+			rctl |= E1000_RCTL_MPE;
+
+		/* enable receives in the hardware */
+		ew32(RCTL, rctl | E1000_RCTL_EN);
+
+		if (hw->mac_type >= e1000_82540) {
+			ctrl = er32(CTRL);
+			/* advertise wake from D3Cold */
+			#define E1000_CTRL_ADVD3WUC 0x00100000
+			/* phy power management enable */
+			#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
+			ctrl |= E1000_CTRL_ADVD3WUC |
+				E1000_CTRL_EN_PHY_PWR_MGMT;
+			ew32(CTRL, ctrl);
+		}
+
+		if (hw->media_type == e1000_media_type_fiber ||
+		    hw->media_type == e1000_media_type_internal_serdes) {
+			/* keep the laser running in D3 */
+			ctrl_ext = er32(CTRL_EXT);
+			ctrl_ext |= E1000_CTRL_EXT_SDP7_DATA;
+			ew32(CTRL_EXT, ctrl_ext);
+		}
+
+		ew32(WUC, E1000_WUC_PME_EN);
+		ew32(WUFC, wufc);
+	} else {
+		ew32(WUC, 0);
+		ew32(WUFC, 0);
+	}
+
+	e1000_release_manageability(adapter);
+
+	*enable_wake = !!wufc;
+
+	/* make sure adapter isn't asleep if manageability is enabled */
+	if (adapter->en_mng_pt)
+		*enable_wake = true;
+
+	if (netif_running(netdev))
+		e1000_free_irq(adapter);
+
+	if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+		pci_disable_device(pdev);
+
+	return 0;
+}
+
+static int __maybe_unused e1000_suspend(struct device *dev)
+{
+	int retval;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool wake;
+
+	retval = __e1000_shutdown(pdev, &wake);
+	device_set_wakeup_enable(dev, wake);
+
+	return retval;
+}
+
+static int __maybe_unused e1000_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 err;
+
+	if (adapter->need_ioport)
+		err = pci_enable_device(pdev);
+	else
+		err = pci_enable_device_mem(pdev);
+	if (err) {
+		pr_err("Cannot enable PCI device from suspend\n");
+		return err;
+	}
+
+	/* flush memory to make sure state is correct */
+	smp_mb__before_atomic();
+	clear_bit(__E1000_DISABLED, &adapter->flags);
+	pci_set_master(pdev);
+
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pci_enable_wake(pdev, PCI_D3cold, 0);
+
+	if (netif_running(netdev)) {
+		err = e1000_request_irq(adapter);
+		if (err)
+			return err;
+	}
+
+	e1000_power_up_phy(adapter);
+	e1000_reset(adapter);
+	ew32(WUS, ~0);
+
+	e1000_init_manageability(adapter);
+
+	if (netif_running(netdev))
+		e1000_up(adapter);
+
+	netif_device_attach(netdev);
+
+	return 0;
+}
+
+static void e1000_shutdown(struct pci_dev *pdev)
+{
+	bool wake;
+
+	__e1000_shutdown(pdev, &wake);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, wake);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+static void e1000_netpoll(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (disable_hardirq(adapter->pdev->irq))
+		e1000_intr(adapter->pdev->irq, netdev);
+	enable_irq(adapter->pdev->irq);
+}
+#endif
+
+/**
+ * e1000_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (netif_running(netdev))
+		e1000_down(adapter);
+
+	if (!test_and_set_bit(__E1000_DISABLED, &adapter->flags))
+		pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * e1000_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+ * resembles the first-half of the e1000_resume routine.
+ */
+static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	if (adapter->need_ioport)
+		err = pci_enable_device(pdev);
+	else
+		err = pci_enable_device_mem(pdev);
+	if (err) {
+		pr_err("Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* flush memory to make sure state is correct */
+	smp_mb__before_atomic();
+	clear_bit(__E1000_DISABLED, &adapter->flags);
+	pci_set_master(pdev);
+
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pci_enable_wake(pdev, PCI_D3cold, 0);
+
+	e1000_reset(adapter);
+	ew32(WUS, ~0);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * e1000_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+ * second-half of the e1000_resume routine.
+ */
+static void e1000_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	e1000_init_manageability(adapter);
+
+	if (netif_running(netdev)) {
+		if (e1000_up(adapter)) {
+			pr_info("can't bring device back up after reset\n");
+			return;
+		}
+	}
+
+	netif_device_attach(netdev);
+}
+
+/* e1000_main.c */
diff --git a/drivers/net/ethernet/intel/e1000/e1000_osdep.h b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
new file mode 100644
index 0000000000..e966bb2907
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/e1000_osdep.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+/* glue for the OS independent part of e1000
+ * includes register access macros
+ */
+
+#ifndef _E1000_OSDEP_H_
+#define _E1000_OSDEP_H_
+
+#include <asm/io.h>
+
+#define CONFIG_RAM_BASE         0x60000
+#define GBE_CONFIG_OFFSET       0x0
+
+#define GBE_CONFIG_RAM_BASE \
+	((unsigned int)(CONFIG_RAM_BASE + GBE_CONFIG_OFFSET))
+
+#define GBE_CONFIG_BASE_VIRT \
+	((void __iomem *)phys_to_virt(GBE_CONFIG_RAM_BASE))
+
+#define GBE_CONFIG_FLASH_WRITE(base, offset, count, data) \
+	(iowrite16_rep(base + offset, data, count))
+
+#define GBE_CONFIG_FLASH_READ(base, offset, count, data) \
+	(ioread16_rep(base + (offset << 1), data, count))
+
+#define er32(reg)							\
+	(readl(hw->hw_addr + ((hw->mac_type >= e1000_82543)		\
+			       ? E1000_##reg : E1000_82542_##reg)))
+
+#define ew32(reg, value)						\
+	(writel((value), (hw->hw_addr + ((hw->mac_type >= e1000_82543)	\
+					 ? E1000_##reg : E1000_82542_##reg))))
+
+#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \
+    writel((value), ((a)->hw_addr + \
+        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
+        ((offset) << 2))))
+
+#define E1000_READ_REG_ARRAY(a, reg, offset) ( \
+    readl((a)->hw_addr + \
+        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
+        ((offset) << 2)))
+
+#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY
+#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY
+
+#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \
+    writew((value), ((a)->hw_addr + \
+        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
+        ((offset) << 1))))
+
+#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \
+    readw((a)->hw_addr + \
+        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
+        ((offset) << 1)))
+
+#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \
+    writeb((value), ((a)->hw_addr + \
+        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
+        (offset))))
+
+#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \
+    readb((a)->hw_addr + \
+        (((a)->mac_type >= e1000_82543) ? E1000_##reg : E1000_82542_##reg) + \
+        (offset)))
+
+#define E1000_WRITE_FLUSH() er32(STATUS)
+
+#define E1000_WRITE_ICH_FLASH_REG(a, reg, value) ( \
+    writel((value), ((a)->flash_address + reg)))
+
+#define E1000_READ_ICH_FLASH_REG(a, reg) ( \
+    readl((a)->flash_address + reg))
+
+#define E1000_WRITE_ICH_FLASH_REG16(a, reg, value) ( \
+    writew((value), ((a)->flash_address + reg)))
+
+#define E1000_READ_ICH_FLASH_REG16(a, reg) ( \
+    readw((a)->flash_address + reg))
+
+#endif /* _E1000_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/e1000/e1000_param.c b/drivers/net/ethernet/intel/e1000/e1000_param.c
new file mode 100644
index 0000000000..f4154ca7fc
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000/e1000_param.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2006 Intel Corporation. */
+
+#include "e1000.h"
+
+/* This is the only thing that needs to be changed to adjust the
+ * maximum number of ports that the driver can manage.
+ */
+
+#define E1000_MAX_NIC 32
+
+#define OPTION_UNSET   -1
+#define OPTION_DISABLED 0
+#define OPTION_ENABLED  1
+
+/* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+
+#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
+#define E1000_PARAM(X, desc) \
+	static int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT; \
+	static unsigned int num_##X; \
+	module_param_array_named(X, X, int, &num_##X, 0); \
+	MODULE_PARM_DESC(X, desc);
+
+/* Transmit Descriptor Count
+ *
+ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
+ * Valid Range: 80-4096 for 82544 and newer
+ *
+ * Default Value: 256
+ */
+E1000_PARAM(TxDescriptors, "Number of transmit descriptors");
+
+/* Receive Descriptor Count
+ *
+ * Valid Range: 80-256 for 82542 and 82543 gigabit ethernet controllers
+ * Valid Range: 80-4096 for 82544 and newer
+ *
+ * Default Value: 256
+ */
+E1000_PARAM(RxDescriptors, "Number of receive descriptors");
+
+/* User Specified Speed Override
+ *
+ * Valid Range: 0, 10, 100, 1000
+ *  - 0    - auto-negotiate at all supported speeds
+ *  - 10   - only link at 10 Mbps
+ *  - 100  - only link at 100 Mbps
+ *  - 1000 - only link at 1000 Mbps
+ *
+ * Default Value: 0
+ */
+E1000_PARAM(Speed, "Speed setting");
+
+/* User Specified Duplex Override
+ *
+ * Valid Range: 0-2
+ *  - 0 - auto-negotiate for duplex
+ *  - 1 - only link at half duplex
+ *  - 2 - only link at full duplex
+ *
+ * Default Value: 0
+ */
+E1000_PARAM(Duplex, "Duplex setting");
+
+/* Auto-negotiation Advertisement Override
+ *
+ * Valid Range: 0x01-0x0F, 0x20-0x2F (copper); 0x20 (fiber)
+ *
+ * The AutoNeg value is a bit mask describing which speed and duplex
+ * combinations should be advertised during auto-negotiation.
+ * The supported speed and duplex modes are listed below
+ *
+ * Bit           7     6     5      4      3     2     1      0
+ * Speed (Mbps)  N/A   N/A   1000   N/A    100   100   10     10
+ * Duplex                    Full          Full  Half  Full   Half
+ *
+ * Default Value: 0x2F (copper); 0x20 (fiber)
+ */
+E1000_PARAM(AutoNeg, "Advertised auto-negotiation setting");
+#define AUTONEG_ADV_DEFAULT  0x2F
+
+/* User Specified Flow Control Override
+ *
+ * Valid Range: 0-3
+ *  - 0 - No Flow Control
+ *  - 1 - Rx only, respond to PAUSE frames but do not generate them
+ *  - 2 - Tx only, generate PAUSE frames but ignore them on receive
+ *  - 3 - Full Flow Control Support
+ *
+ * Default Value: Read flow control settings from the EEPROM
+ */
+E1000_PARAM(FlowControl, "Flow Control setting");
+
+/* XsumRX - Receive Checksum Offload Enable/Disable
+ *
+ * Valid Range: 0, 1
+ *  - 0 - disables all checksum offload
+ *  - 1 - enables receive IP/TCP/UDP checksum offload
+ *        on 82543 and newer -based NICs
+ *
+ * Default Value: 1
+ */
+E1000_PARAM(XsumRX, "Disable or enable Receive Checksum offload");
+
+/* Transmit Interrupt Delay in units of 1.024 microseconds
+ *  Tx interrupt delay needs to typically be set to something non zero
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
+#define DEFAULT_TIDV                   8
+#define MAX_TXDELAY               0xFFFF
+#define MIN_TXDELAY                    0
+
+/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
+#define DEFAULT_TADV                  32
+#define MAX_TXABSDELAY            0xFFFF
+#define MIN_TXABSDELAY                 0
+
+/* Receive Interrupt Delay in units of 1.024 microseconds
+ *   hardware will likely hang if you set this to anything but zero.
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
+#define DEFAULT_RDTR                   0
+#define MAX_RXDELAY               0xFFFF
+#define MIN_RXDELAY                    0
+
+/* Receive Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
+#define DEFAULT_RADV                   8
+#define MAX_RXABSDELAY            0xFFFF
+#define MIN_RXABSDELAY                 0
+
+/* Interrupt Throttle Rate (interrupts/sec)
+ *
+ * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative)
+ */
+E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
+#define DEFAULT_ITR                    3
+#define MAX_ITR                   100000
+#define MIN_ITR                      100
+
+/* Enable Smart Power Down of the PHY
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0 (disabled)
+ */
+E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down");
+
+struct e1000_option {
+	enum { enable_option, range_option, list_option } type;
+	const char *name;
+	const char *err;
+	int def;
+	union {
+		struct { /* range_option info */
+			int min;
+			int max;
+		} r;
+		struct { /* list_option info */
+			int nr;
+			const struct e1000_opt_list { int i; char *str; } *p;
+		} l;
+	} arg;
+};
+
+static int e1000_validate_option(unsigned int *value,
+				 const struct e1000_option *opt,
+				 struct e1000_adapter *adapter)
+{
+	if (*value == OPTION_UNSET) {
+		*value = opt->def;
+		return 0;
+	}
+
+	switch (opt->type) {
+	case enable_option:
+		switch (*value) {
+		case OPTION_ENABLED:
+			e_dev_info("%s Enabled\n", opt->name);
+			return 0;
+		case OPTION_DISABLED:
+			e_dev_info("%s Disabled\n", opt->name);
+			return 0;
+		}
+		break;
+	case range_option:
+		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
+			e_dev_info("%s set to %i\n", opt->name, *value);
+			return 0;
+		}
+		break;
+	case list_option: {
+		int i;
+		const struct e1000_opt_list *ent;
+
+		for (i = 0; i < opt->arg.l.nr; i++) {
+			ent = &opt->arg.l.p[i];
+			if (*value == ent->i) {
+				if (ent->str[0] != '\0')
+					e_dev_info("%s\n", ent->str);
+				return 0;
+			}
+		}
+	}
+		break;
+	default:
+		BUG();
+	}
+
+	e_dev_info("Invalid %s value specified (%i) %s\n",
+	       opt->name, *value, opt->err);
+	*value = opt->def;
+	return -1;
+}
+
+static void e1000_check_fiber_options(struct e1000_adapter *adapter);
+static void e1000_check_copper_options(struct e1000_adapter *adapter);
+
+/**
+ * e1000_check_options - Range Checking for Command Line Parameters
+ * @adapter: board private structure
+ *
+ * This routine checks all command line parameters for valid user
+ * input.  If an invalid value is given, or if no user specified
+ * value exists, a default value is used.  The final value is stored
+ * in a variable in the adapter structure.
+ **/
+void e1000_check_options(struct e1000_adapter *adapter)
+{
+	struct e1000_option opt;
+	int bd = adapter->bd_number;
+
+	if (bd >= E1000_MAX_NIC) {
+		e_dev_warn("Warning: no configuration for board #%i "
+			   "using defaults for all values\n", bd);
+	}
+
+	{ /* Transmit Descriptor Count */
+		struct e1000_tx_ring *tx_ring = adapter->tx_ring;
+		int i;
+		e1000_mac_type mac_type = adapter->hw.mac_type;
+
+		opt = (struct e1000_option) {
+			.type = range_option,
+			.name = "Transmit Descriptors",
+			.err  = "using default of "
+				__MODULE_STRING(E1000_DEFAULT_TXD),
+			.def  = E1000_DEFAULT_TXD,
+			.arg  = { .r = {
+				.min = E1000_MIN_TXD,
+				.max = mac_type < e1000_82544 ? E1000_MAX_TXD : E1000_MAX_82544_TXD
+				}}
+		};
+
+		if (num_TxDescriptors > bd) {
+			tx_ring->count = TxDescriptors[bd];
+			e1000_validate_option(&tx_ring->count, &opt, adapter);
+			tx_ring->count = ALIGN(tx_ring->count,
+						REQ_TX_DESCRIPTOR_MULTIPLE);
+		} else {
+			tx_ring->count = opt.def;
+		}
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			tx_ring[i].count = tx_ring->count;
+	}
+	{ /* Receive Descriptor Count */
+		struct e1000_rx_ring *rx_ring = adapter->rx_ring;
+		int i;
+		e1000_mac_type mac_type = adapter->hw.mac_type;
+
+		opt = (struct e1000_option) {
+			.type = range_option,
+			.name = "Receive Descriptors",
+			.err  = "using default of "
+				__MODULE_STRING(E1000_DEFAULT_RXD),
+			.def  = E1000_DEFAULT_RXD,
+			.arg  = { .r = {
+				.min = E1000_MIN_RXD,
+				.max = mac_type < e1000_82544 ? E1000_MAX_RXD :
+				       E1000_MAX_82544_RXD
+			}}
+		};
+
+		if (num_RxDescriptors > bd) {
+			rx_ring->count = RxDescriptors[bd];
+			e1000_validate_option(&rx_ring->count, &opt, adapter);
+			rx_ring->count = ALIGN(rx_ring->count,
+						REQ_RX_DESCRIPTOR_MULTIPLE);
+		} else {
+			rx_ring->count = opt.def;
+		}
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			rx_ring[i].count = rx_ring->count;
+	}
+	{ /* Checksum Offload Enable/Disable */
+		opt = (struct e1000_option) {
+			.type = enable_option,
+			.name = "Checksum Offload",
+			.err  = "defaulting to Enabled",
+			.def  = OPTION_ENABLED
+		};
+
+		if (num_XsumRX > bd) {
+			unsigned int rx_csum = XsumRX[bd];
+			e1000_validate_option(&rx_csum, &opt, adapter);
+			adapter->rx_csum = rx_csum;
+		} else {
+			adapter->rx_csum = opt.def;
+		}
+	}
+	{ /* Flow Control */
+
+		static const struct e1000_opt_list fc_list[] = {
+		       { E1000_FC_NONE, "Flow Control Disabled" },
+		       { E1000_FC_RX_PAUSE, "Flow Control Receive Only" },
+		       { E1000_FC_TX_PAUSE, "Flow Control Transmit Only" },
+		       { E1000_FC_FULL, "Flow Control Enabled" },
+		       { E1000_FC_DEFAULT, "Flow Control Hardware Default" }
+		};
+
+		opt = (struct e1000_option) {
+			.type = list_option,
+			.name = "Flow Control",
+			.err  = "reading default settings from EEPROM",
+			.def  = E1000_FC_DEFAULT,
+			.arg  = { .l = { .nr = ARRAY_SIZE(fc_list),
+					 .p = fc_list }}
+		};
+
+		if (num_FlowControl > bd) {
+			unsigned int fc = FlowControl[bd];
+			e1000_validate_option(&fc, &opt, adapter);
+			adapter->hw.fc = adapter->hw.original_fc = fc;
+		} else {
+			adapter->hw.fc = adapter->hw.original_fc = opt.def;
+		}
+	}
+	{ /* Transmit Interrupt Delay */
+		opt = (struct e1000_option) {
+			.type = range_option,
+			.name = "Transmit Interrupt Delay",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_TIDV),
+			.def  = DEFAULT_TIDV,
+			.arg  = { .r = { .min = MIN_TXDELAY,
+					 .max = MAX_TXDELAY }}
+		};
+
+		if (num_TxIntDelay > bd) {
+			adapter->tx_int_delay = TxIntDelay[bd];
+			e1000_validate_option(&adapter->tx_int_delay, &opt,
+			                      adapter);
+		} else {
+			adapter->tx_int_delay = opt.def;
+		}
+	}
+	{ /* Transmit Absolute Interrupt Delay */
+		opt = (struct e1000_option) {
+			.type = range_option,
+			.name = "Transmit Absolute Interrupt Delay",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_TADV),
+			.def  = DEFAULT_TADV,
+			.arg  = { .r = { .min = MIN_TXABSDELAY,
+					 .max = MAX_TXABSDELAY }}
+		};
+
+		if (num_TxAbsIntDelay > bd) {
+			adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
+			e1000_validate_option(&adapter->tx_abs_int_delay, &opt,
+					      adapter);
+		} else {
+			adapter->tx_abs_int_delay = opt.def;
+		}
+	}
+	{ /* Receive Interrupt Delay */
+		opt = (struct e1000_option) {
+			.type = range_option,
+			.name = "Receive Interrupt Delay",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_RDTR),
+			.def  = DEFAULT_RDTR,
+			.arg  = { .r = { .min = MIN_RXDELAY,
+					 .max = MAX_RXDELAY }}
+		};
+
+		if (num_RxIntDelay > bd) {
+			adapter->rx_int_delay = RxIntDelay[bd];
+			e1000_validate_option(&adapter->rx_int_delay, &opt,
+					      adapter);
+		} else {
+			adapter->rx_int_delay = opt.def;
+		}
+	}
+	{ /* Receive Absolute Interrupt Delay */
+		opt = (struct e1000_option) {
+			.type = range_option,
+			.name = "Receive Absolute Interrupt Delay",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_RADV),
+			.def  = DEFAULT_RADV,
+			.arg  = { .r = { .min = MIN_RXABSDELAY,
+					 .max = MAX_RXABSDELAY }}
+		};
+
+		if (num_RxAbsIntDelay > bd) {
+			adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
+			e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
+					      adapter);
+		} else {
+			adapter->rx_abs_int_delay = opt.def;
+		}
+	}
+	{ /* Interrupt Throttling Rate */
+		opt = (struct e1000_option) {
+			.type = range_option,
+			.name = "Interrupt Throttling Rate (ints/sec)",
+			.err  = "using default of " __MODULE_STRING(DEFAULT_ITR),
+			.def  = DEFAULT_ITR,
+			.arg  = { .r = { .min = MIN_ITR,
+					 .max = MAX_ITR }}
+		};
+
+		if (num_InterruptThrottleRate > bd) {
+			adapter->itr = InterruptThrottleRate[bd];
+			switch (adapter->itr) {
+			case 0:
+				e_dev_info("%s turned off\n", opt.name);
+				break;
+			case 1:
+				e_dev_info("%s set to dynamic mode\n",
+					   opt.name);
+				adapter->itr_setting = adapter->itr;
+				adapter->itr = 20000;
+				break;
+			case 3:
+				e_dev_info("%s set to dynamic conservative "
+					   "mode\n", opt.name);
+				adapter->itr_setting = adapter->itr;
+				adapter->itr = 20000;
+				break;
+			case 4:
+				e_dev_info("%s set to simplified "
+					   "(2000-8000) ints mode\n", opt.name);
+				adapter->itr_setting = adapter->itr;
+				break;
+			default:
+				e1000_validate_option(&adapter->itr, &opt,
+						      adapter);
+				/* save the setting, because the dynamic bits
+				 * change itr.
+				 * clear the lower two bits because they are
+				 * used as control
+				 */
+				adapter->itr_setting = adapter->itr & ~3;
+				break;
+			}
+		} else {
+			adapter->itr_setting = opt.def;
+			adapter->itr = 20000;
+		}
+	}
+	{ /* Smart Power Down */
+		opt = (struct e1000_option) {
+			.type = enable_option,
+			.name = "PHY Smart Power Down",
+			.err  = "defaulting to Disabled",
+			.def  = OPTION_DISABLED
+		};
+
+		if (num_SmartPowerDownEnable > bd) {
+			unsigned int spd = SmartPowerDownEnable[bd];
+			e1000_validate_option(&spd, &opt, adapter);
+			adapter->smart_power_down = spd;
+		} else {
+			adapter->smart_power_down = opt.def;
+		}
+	}
+
+	switch (adapter->hw.media_type) {
+	case e1000_media_type_fiber:
+	case e1000_media_type_internal_serdes:
+		e1000_check_fiber_options(adapter);
+		break;
+	case e1000_media_type_copper:
+		e1000_check_copper_options(adapter);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * e1000_check_fiber_options - Range Checking for Link Options, Fiber Version
+ * @adapter: board private structure
+ *
+ * Handles speed and duplex options on fiber adapters
+ **/
+static void e1000_check_fiber_options(struct e1000_adapter *adapter)
+{
+	int bd = adapter->bd_number;
+	if (num_Speed > bd) {
+		e_dev_info("Speed not valid for fiber adapters, parameter "
+			   "ignored\n");
+	}
+
+	if (num_Duplex > bd) {
+		e_dev_info("Duplex not valid for fiber adapters, parameter "
+			   "ignored\n");
+	}
+
+	if ((num_AutoNeg > bd) && (AutoNeg[bd] != 0x20)) {
+		e_dev_info("AutoNeg other than 1000/Full is not valid for fiber"
+			   "adapters, parameter ignored\n");
+	}
+}
+
+/**
+ * e1000_check_copper_options - Range Checking for Link Options, Copper Version
+ * @adapter: board private structure
+ *
+ * Handles speed and duplex options on copper adapters
+ **/
+static void e1000_check_copper_options(struct e1000_adapter *adapter)
+{
+	struct e1000_option opt;
+	unsigned int speed, dplx, an;
+	int bd = adapter->bd_number;
+
+	{ /* Speed */
+		static const struct e1000_opt_list speed_list[] = {
+			{          0, "" },
+			{   SPEED_10, "" },
+			{  SPEED_100, "" },
+			{ SPEED_1000, "" }};
+
+		opt = (struct e1000_option) {
+			.type = list_option,
+			.name = "Speed",
+			.err  = "parameter ignored",
+			.def  = 0,
+			.arg  = { .l = { .nr = ARRAY_SIZE(speed_list),
+					 .p = speed_list }}
+		};
+
+		if (num_Speed > bd) {
+			speed = Speed[bd];
+			e1000_validate_option(&speed, &opt, adapter);
+		} else {
+			speed = opt.def;
+		}
+	}
+	{ /* Duplex */
+		static const struct e1000_opt_list dplx_list[] = {
+			{           0, "" },
+			{ HALF_DUPLEX, "" },
+			{ FULL_DUPLEX, "" }};
+
+		opt = (struct e1000_option) {
+			.type = list_option,
+			.name = "Duplex",
+			.err  = "parameter ignored",
+			.def  = 0,
+			.arg  = { .l = { .nr = ARRAY_SIZE(dplx_list),
+					 .p = dplx_list }}
+		};
+
+		if (num_Duplex > bd) {
+			dplx = Duplex[bd];
+			e1000_validate_option(&dplx, &opt, adapter);
+		} else {
+			dplx = opt.def;
+		}
+	}
+
+	if ((num_AutoNeg > bd) && (speed != 0 || dplx != 0)) {
+		e_dev_info("AutoNeg specified along with Speed or Duplex, "
+			   "parameter ignored\n");
+		adapter->hw.autoneg_advertised = AUTONEG_ADV_DEFAULT;
+	} else { /* Autoneg */
+		static const struct e1000_opt_list an_list[] =
+			#define AA "AutoNeg advertising "
+			{{ 0x01, AA "10/HD" },
+			 { 0x02, AA "10/FD" },
+			 { 0x03, AA "10/FD, 10/HD" },
+			 { 0x04, AA "100/HD" },
+			 { 0x05, AA "100/HD, 10/HD" },
+			 { 0x06, AA "100/HD, 10/FD" },
+			 { 0x07, AA "100/HD, 10/FD, 10/HD" },
+			 { 0x08, AA "100/FD" },
+			 { 0x09, AA "100/FD, 10/HD" },
+			 { 0x0a, AA "100/FD, 10/FD" },
+			 { 0x0b, AA "100/FD, 10/FD, 10/HD" },
+			 { 0x0c, AA "100/FD, 100/HD" },
+			 { 0x0d, AA "100/FD, 100/HD, 10/HD" },
+			 { 0x0e, AA "100/FD, 100/HD, 10/FD" },
+			 { 0x0f, AA "100/FD, 100/HD, 10/FD, 10/HD" },
+			 { 0x20, AA "1000/FD" },
+			 { 0x21, AA "1000/FD, 10/HD" },
+			 { 0x22, AA "1000/FD, 10/FD" },
+			 { 0x23, AA "1000/FD, 10/FD, 10/HD" },
+			 { 0x24, AA "1000/FD, 100/HD" },
+			 { 0x25, AA "1000/FD, 100/HD, 10/HD" },
+			 { 0x26, AA "1000/FD, 100/HD, 10/FD" },
+			 { 0x27, AA "1000/FD, 100/HD, 10/FD, 10/HD" },
+			 { 0x28, AA "1000/FD, 100/FD" },
+			 { 0x29, AA "1000/FD, 100/FD, 10/HD" },
+			 { 0x2a, AA "1000/FD, 100/FD, 10/FD" },
+			 { 0x2b, AA "1000/FD, 100/FD, 10/FD, 10/HD" },
+			 { 0x2c, AA "1000/FD, 100/FD, 100/HD" },
+			 { 0x2d, AA "1000/FD, 100/FD, 100/HD, 10/HD" },
+			 { 0x2e, AA "1000/FD, 100/FD, 100/HD, 10/FD" },
+			 { 0x2f, AA "1000/FD, 100/FD, 100/HD, 10/FD, 10/HD" }};
+
+		opt = (struct e1000_option) {
+			.type = list_option,
+			.name = "AutoNeg",
+			.err  = "parameter ignored",
+			.def  = AUTONEG_ADV_DEFAULT,
+			.arg  = { .l = { .nr = ARRAY_SIZE(an_list),
+					 .p = an_list }}
+		};
+
+		if (num_AutoNeg > bd) {
+			an = AutoNeg[bd];
+			e1000_validate_option(&an, &opt, adapter);
+		} else {
+			an = opt.def;
+		}
+		adapter->hw.autoneg_advertised = an;
+	}
+
+	switch (speed + dplx) {
+	case 0:
+		adapter->hw.autoneg = adapter->fc_autoneg = 1;
+		if ((num_Speed > bd) && (speed != 0 || dplx != 0))
+			e_dev_info("Speed and duplex autonegotiation "
+				   "enabled\n");
+		break;
+	case HALF_DUPLEX:
+		e_dev_info("Half Duplex specified without Speed\n");
+		e_dev_info("Using Autonegotiation at Half Duplex only\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+						 ADVERTISE_100_HALF;
+		break;
+	case FULL_DUPLEX:
+		e_dev_info("Full Duplex specified without Speed\n");
+		e_dev_info("Using Autonegotiation at Full Duplex only\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_10_FULL |
+						 ADVERTISE_100_FULL |
+						 ADVERTISE_1000_FULL;
+		break;
+	case SPEED_10:
+		e_dev_info("10 Mbps Speed specified without Duplex\n");
+		e_dev_info("Using Autonegotiation at 10 Mbps only\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_10_HALF |
+						 ADVERTISE_10_FULL;
+		break;
+	case SPEED_10 + HALF_DUPLEX:
+		e_dev_info("Forcing to 10 Mbps Half Duplex\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_10_half;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_10 + FULL_DUPLEX:
+		e_dev_info("Forcing to 10 Mbps Full Duplex\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_10_full;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_100:
+		e_dev_info("100 Mbps Speed specified without Duplex\n");
+		e_dev_info("Using Autonegotiation at 100 Mbps only\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_100_HALF |
+						 ADVERTISE_100_FULL;
+		break;
+	case SPEED_100 + HALF_DUPLEX:
+		e_dev_info("Forcing to 100 Mbps Half Duplex\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_100_half;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_100 + FULL_DUPLEX:
+		e_dev_info("Forcing to 100 Mbps Full Duplex\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 0;
+		adapter->hw.forced_speed_duplex = e1000_100_full;
+		adapter->hw.autoneg_advertised = 0;
+		break;
+	case SPEED_1000:
+		e_dev_info("1000 Mbps Speed specified without Duplex\n");
+		goto full_duplex_only;
+	case SPEED_1000 + HALF_DUPLEX:
+		e_dev_info("Half Duplex is not supported at 1000 Mbps\n");
+		fallthrough;
+	case SPEED_1000 + FULL_DUPLEX:
+full_duplex_only:
+		e_dev_info("Using Autonegotiation at 1000 Mbps Full Duplex "
+			   "only\n");
+		adapter->hw.autoneg = adapter->fc_autoneg = 1;
+		adapter->hw.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	default:
+		BUG();
+	}
+
+	/* Speed, AutoNeg and MDI/MDI-X must all play nice */
+	if (e1000_validate_mdi_setting(&(adapter->hw)) < 0) {
+		e_dev_info("Speed, AutoNeg and MDI-X specs are incompatible. "
+			   "Setting MDI-X to a compatible value.\n");
+	}
+}
+
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
new file mode 100644
index 0000000000..be9c695dde
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c
@@ -0,0 +1,1412 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* 80003ES2LAN Gigabit Ethernet Controller (Copper)
+ * 80003ES2LAN Gigabit Ethernet Controller (Serdes)
+ */
+
+#include "e1000.h"
+
+/* A table for the GG82563 cable length where the range is defined
+ * with a lower bound at "index" and the upper bound at
+ * "index + 5".
+ */
+static const u16 e1000_gg82563_cable_length_table[] = {
+	0, 60, 115, 150, 150, 60, 115, 150, 180, 180, 0xFF
+};
+
+#define GG82563_CABLE_LENGTH_TABLE_SIZE \
+		ARRAY_SIZE(e1000_gg82563_cable_length_table)
+
+static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw);
+static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
+static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask);
+static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw);
+static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw);
+static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw);
+static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex);
+static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
+					   u16 *data);
+static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
+					    u16 data);
+static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw);
+
+/**
+ *  e1000_init_phy_params_80003es2lan - Init ESB2 PHY func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_phy_params_80003es2lan(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+
+	if (hw->phy.media_type != e1000_media_type_copper) {
+		phy->type = e1000_phy_none;
+		return 0;
+	} else {
+		phy->ops.power_up = e1000_power_up_phy_copper;
+		phy->ops.power_down = e1000_power_down_phy_copper_80003es2lan;
+	}
+
+	phy->addr = 1;
+	phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+	phy->reset_delay_us = 100;
+	phy->type = e1000_phy_gg82563;
+
+	/* This can only be done after all function pointers are setup. */
+	ret_val = e1000e_get_phy_id(hw);
+
+	/* Verify phy id */
+	if (phy->id != GG82563_E_PHY_ID)
+		return -E1000_ERR_PHY;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_init_nvm_params_80003es2lan - Init ESB2 NVM func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_nvm_params_80003es2lan(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = er32(EECD);
+	u16 size;
+
+	nvm->opcode_bits = 8;
+	nvm->delay_usec = 1;
+	switch (nvm->override) {
+	case e1000_nvm_override_spi_large:
+		nvm->page_size = 32;
+		nvm->address_bits = 16;
+		break;
+	case e1000_nvm_override_spi_small:
+		nvm->page_size = 8;
+		nvm->address_bits = 8;
+		break;
+	default:
+		nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
+		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8;
+		break;
+	}
+
+	nvm->type = e1000_nvm_eeprom_spi;
+
+	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+		     E1000_EECD_SIZE_EX_SHIFT);
+
+	/* Added to a constant, "size" becomes the left-shift value
+	 * for setting word_size.
+	 */
+	size += NVM_WORD_SIZE_BASE_SHIFT;
+
+	/* EEPROM access above 16k is unsupported */
+	if (size > 14)
+		size = 14;
+	nvm->word_size = BIT(size);
+
+	return 0;
+}
+
+/**
+ *  e1000_init_mac_params_80003es2lan - Init ESB2 MAC func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_mac_params_80003es2lan(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+
+	/* Set media type and media-dependent function pointers */
+	switch (hw->adapter->pdev->device) {
+	case E1000_DEV_ID_80003ES2LAN_SERDES_DPT:
+		hw->phy.media_type = e1000_media_type_internal_serdes;
+		mac->ops.check_for_link = e1000e_check_for_serdes_link;
+		mac->ops.setup_physical_interface =
+		    e1000e_setup_fiber_serdes_link;
+		break;
+	default:
+		hw->phy.media_type = e1000_media_type_copper;
+		mac->ops.check_for_link = e1000e_check_for_copper_link;
+		mac->ops.setup_physical_interface =
+		    e1000_setup_copper_link_80003es2lan;
+		break;
+	}
+
+	/* Set mta register count */
+	mac->mta_reg_count = 128;
+	/* Set rar entry count */
+	mac->rar_entry_count = E1000_RAR_ENTRIES;
+	/* FWSM register */
+	mac->has_fwsm = true;
+	/* ARC supported; valid only if manageability features are enabled. */
+	mac->arc_subsystem_valid = !!(er32(FWSM) & E1000_FWSM_MODE_MASK);
+	/* Adaptive IFS not supported */
+	mac->adaptive_ifs = false;
+
+	/* set lan id for port to determine which phy lock to use */
+	hw->mac.ops.set_lan_id(hw);
+
+	return 0;
+}
+
+static s32 e1000_get_variants_80003es2lan(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	s32 rc;
+
+	rc = e1000_init_mac_params_80003es2lan(hw);
+	if (rc)
+		return rc;
+
+	rc = e1000_init_nvm_params_80003es2lan(hw);
+	if (rc)
+		return rc;
+
+	rc = e1000_init_phy_params_80003es2lan(hw);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
+/**
+ *  e1000_acquire_phy_80003es2lan - Acquire rights to access PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  A wrapper to acquire access rights to the correct PHY.
+ **/
+static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw)
+{
+	u16 mask;
+
+	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
+	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
+}
+
+/**
+ *  e1000_release_phy_80003es2lan - Release rights to access PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  A wrapper to release access rights to the correct PHY.
+ **/
+static void e1000_release_phy_80003es2lan(struct e1000_hw *hw)
+{
+	u16 mask;
+
+	mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM;
+	e1000_release_swfw_sync_80003es2lan(hw, mask);
+}
+
+/**
+ *  e1000_acquire_mac_csr_80003es2lan - Acquire right to access Kumeran register
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the semaphore to access the Kumeran interface.
+ *
+ **/
+static s32 e1000_acquire_mac_csr_80003es2lan(struct e1000_hw *hw)
+{
+	u16 mask;
+
+	mask = E1000_SWFW_CSR_SM;
+
+	return e1000_acquire_swfw_sync_80003es2lan(hw, mask);
+}
+
+/**
+ *  e1000_release_mac_csr_80003es2lan - Release right to access Kumeran Register
+ *  @hw: pointer to the HW structure
+ *
+ *  Release the semaphore used to access the Kumeran interface
+ **/
+static void e1000_release_mac_csr_80003es2lan(struct e1000_hw *hw)
+{
+	u16 mask;
+
+	mask = E1000_SWFW_CSR_SM;
+
+	e1000_release_swfw_sync_80003es2lan(hw, mask);
+}
+
+/**
+ *  e1000_acquire_nvm_80003es2lan - Acquire rights to access NVM
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the semaphore to access the EEPROM.
+ **/
+static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000e_acquire_nvm(hw);
+
+	if (ret_val)
+		e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_release_nvm_80003es2lan - Relinquish rights to access NVM
+ *  @hw: pointer to the HW structure
+ *
+ *  Release the semaphore used to access the EEPROM.
+ **/
+static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw)
+{
+	e1000e_release_nvm(hw);
+	e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 i = 0;
+	s32 timeout = 50;
+
+	while (i < timeout) {
+		if (e1000e_get_hw_semaphore(hw))
+			return -E1000_ERR_SWFW_SYNC;
+
+		swfw_sync = er32(SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/* Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		e1000e_put_hw_semaphore(hw);
+		mdelay(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		e_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		return -E1000_ERR_SWFW_SYNC;
+	}
+
+	swfw_sync |= swmask;
+	ew32(SW_FW_SYNC, swfw_sync);
+
+	e1000e_put_hw_semaphore(hw);
+
+	return 0;
+}
+
+/**
+ *  e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	while (e1000e_get_hw_semaphore(hw) != 0)
+		; /* Empty */
+
+	swfw_sync = er32(SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	ew32(SW_FW_SYNC, swfw_sync);
+
+	e1000e_put_hw_semaphore(hw);
+}
+
+/**
+ *  e1000_read_phy_reg_gg82563_80003es2lan - Read GG82563 PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of the register to read
+ *  @data: pointer to the data returned from the operation
+ *
+ *  Read the GG82563 PHY register.
+ **/
+static s32 e1000_read_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
+						  u32 offset, u16 *data)
+{
+	s32 ret_val;
+	u32 page_select;
+	u16 temp;
+
+	ret_val = e1000_acquire_phy_80003es2lan(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Select Configuration Page */
+	if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
+		page_select = GG82563_PHY_PAGE_SELECT;
+	} else {
+		/* Use Alternative Page Select register to access
+		 * registers 30 and 31
+		 */
+		page_select = GG82563_PHY_PAGE_SELECT_ALT;
+	}
+
+	temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT);
+	ret_val = e1000e_write_phy_reg_mdic(hw, page_select, temp);
+	if (ret_val) {
+		e1000_release_phy_80003es2lan(hw);
+		return ret_val;
+	}
+
+	if (hw->dev_spec.e80003es2lan.mdic_wa_enable) {
+		/* The "ready" bit in the MDIC register may be incorrectly set
+		 * before the device has completed the "Page Select" MDI
+		 * transaction.  So we wait 200us after each MDI command...
+		 */
+		usleep_range(200, 400);
+
+		/* ...and verify the command was successful. */
+		ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp);
+
+		if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) {
+			e1000_release_phy_80003es2lan(hw);
+			return -E1000_ERR_PHY;
+		}
+
+		usleep_range(200, 400);
+
+		ret_val = e1000e_read_phy_reg_mdic(hw,
+						   MAX_PHY_REG_ADDRESS & offset,
+						   data);
+
+		usleep_range(200, 400);
+	} else {
+		ret_val = e1000e_read_phy_reg_mdic(hw,
+						   MAX_PHY_REG_ADDRESS & offset,
+						   data);
+	}
+
+	e1000_release_phy_80003es2lan(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_phy_reg_gg82563_80003es2lan - Write GG82563 PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of the register to read
+ *  @data: value to write to the register
+ *
+ *  Write to the GG82563 PHY register.
+ **/
+static s32 e1000_write_phy_reg_gg82563_80003es2lan(struct e1000_hw *hw,
+						   u32 offset, u16 data)
+{
+	s32 ret_val;
+	u32 page_select;
+	u16 temp;
+
+	ret_val = e1000_acquire_phy_80003es2lan(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Select Configuration Page */
+	if ((offset & MAX_PHY_REG_ADDRESS) < GG82563_MIN_ALT_REG) {
+		page_select = GG82563_PHY_PAGE_SELECT;
+	} else {
+		/* Use Alternative Page Select register to access
+		 * registers 30 and 31
+		 */
+		page_select = GG82563_PHY_PAGE_SELECT_ALT;
+	}
+
+	temp = (u16)((u16)offset >> GG82563_PAGE_SHIFT);
+	ret_val = e1000e_write_phy_reg_mdic(hw, page_select, temp);
+	if (ret_val) {
+		e1000_release_phy_80003es2lan(hw);
+		return ret_val;
+	}
+
+	if (hw->dev_spec.e80003es2lan.mdic_wa_enable) {
+		/* The "ready" bit in the MDIC register may be incorrectly set
+		 * before the device has completed the "Page Select" MDI
+		 * transaction.  So we wait 200us after each MDI command...
+		 */
+		usleep_range(200, 400);
+
+		/* ...and verify the command was successful. */
+		ret_val = e1000e_read_phy_reg_mdic(hw, page_select, &temp);
+
+		if (((u16)offset >> GG82563_PAGE_SHIFT) != temp) {
+			e1000_release_phy_80003es2lan(hw);
+			return -E1000_ERR_PHY;
+		}
+
+		usleep_range(200, 400);
+
+		ret_val = e1000e_write_phy_reg_mdic(hw,
+						    MAX_PHY_REG_ADDRESS &
+						    offset, data);
+
+		usleep_range(200, 400);
+	} else {
+		ret_val = e1000e_write_phy_reg_mdic(hw,
+						    MAX_PHY_REG_ADDRESS &
+						    offset, data);
+	}
+
+	e1000_release_phy_80003es2lan(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_nvm_80003es2lan - Write to ESB2 NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of the register to read
+ *  @words: number of words to write
+ *  @data: buffer of data to write to the NVM
+ *
+ *  Write "words" of data to the ESB2 NVM.
+ **/
+static s32 e1000_write_nvm_80003es2lan(struct e1000_hw *hw, u16 offset,
+				       u16 words, u16 *data)
+{
+	return e1000e_write_nvm_spi(hw, offset, words, data);
+}
+
+/**
+ *  e1000_get_cfg_done_80003es2lan - Wait for configuration to complete
+ *  @hw: pointer to the HW structure
+ *
+ *  Wait a specific amount of time for manageability processes to complete.
+ *  This is a function pointer entry point called by the phy module.
+ **/
+static s32 e1000_get_cfg_done_80003es2lan(struct e1000_hw *hw)
+{
+	s32 timeout = PHY_CFG_TIMEOUT;
+	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
+
+	if (hw->bus.func == 1)
+		mask = E1000_NVM_CFG_DONE_PORT_1;
+
+	while (timeout) {
+		if (er32(EEMNGCTL) & mask)
+			break;
+		usleep_range(1000, 2000);
+		timeout--;
+	}
+	if (!timeout) {
+		e_dbg("MNG configuration cycle has not completed.\n");
+		return -E1000_ERR_RESET;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_80003es2lan - Force PHY speed and duplex
+ *  @hw: pointer to the HW structure
+ *
+ *  Force the speed and duplex settings onto the PHY.  This is a
+ *  function pointer entry point called by the phy module.
+ **/
+static s32 e1000_phy_force_speed_duplex_80003es2lan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	/* Clear Auto-Crossover to force MDI manually.  M88E1000 requires MDI
+	 * forced whenever speed and duplex are forced.
+	 */
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data &= ~GG82563_PSCR_CROSSOVER_MODE_AUTO;
+	ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e_dbg("GG82563 PSCR: %X\n", phy_data);
+
+	ret_val = e1e_rphy(hw, MII_BMCR, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	/* Reset the phy to commit changes. */
+	phy_data |= BMCR_RESET;
+
+	ret_val = e1e_wphy(hw, MII_BMCR, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	udelay(1);
+
+	if (hw->phy.autoneg_wait_to_complete) {
+		e_dbg("Waiting for forced speed/duplex link on GG82563 phy.\n");
+
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link) {
+			/* We didn't get link.
+			 * Reset the DSP and cross our fingers.
+			 */
+			ret_val = e1000e_phy_reset_dsp(hw);
+			if (ret_val)
+				return ret_val;
+		}
+
+		/* Try once more */
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+	}
+
+	ret_val = e1e_rphy(hw, GG82563_PHY_MAC_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Resetting the phy means we need to verify the TX_CLK corresponds
+	 * to the link speed.  10Mbps -> 2.5MHz, else 25MHz.
+	 */
+	phy_data &= ~GG82563_MSCR_TX_CLK_MASK;
+	if (hw->mac.forced_speed_duplex & E1000_ALL_10_SPEED)
+		phy_data |= GG82563_MSCR_TX_CLK_10MBPS_2_5;
+	else
+		phy_data |= GG82563_MSCR_TX_CLK_100MBPS_25;
+
+	/* In addition, we must re-enable CRS on Tx for both half and full
+	 * duplex.
+	 */
+	phy_data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
+	ret_val = e1e_wphy(hw, GG82563_PHY_MAC_SPEC_CTRL, phy_data);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_cable_length_80003es2lan - Set approximate cable length
+ *  @hw: pointer to the HW structure
+ *
+ *  Find the approximate cable length as measured by the GG82563 PHY.
+ *  This is a function pointer entry point called by the phy module.
+ **/
+static s32 e1000_get_cable_length_80003es2lan(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, index;
+
+	ret_val = e1e_rphy(hw, GG82563_PHY_DSP_DISTANCE, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	index = phy_data & GG82563_DSPD_CABLE_LENGTH;
+
+	if (index >= GG82563_CABLE_LENGTH_TABLE_SIZE - 5)
+		return -E1000_ERR_PHY;
+
+	phy->min_cable_length = e1000_gg82563_cable_length_table[index];
+	phy->max_cable_length = e1000_gg82563_cable_length_table[index + 5];
+
+	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+	return 0;
+}
+
+/**
+ *  e1000_get_link_up_info_80003es2lan - Report speed and duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: pointer to speed buffer
+ *  @duplex: pointer to duplex buffer
+ *
+ *  Retrieve the current speed and duplex configuration.
+ **/
+static s32 e1000_get_link_up_info_80003es2lan(struct e1000_hw *hw, u16 *speed,
+					      u16 *duplex)
+{
+	s32 ret_val;
+
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		ret_val = e1000e_get_speed_and_duplex_copper(hw, speed, duplex);
+		hw->phy.ops.cfg_on_link_up(hw);
+	} else {
+		ret_val = e1000e_get_speed_and_duplex_fiber_serdes(hw,
+								   speed,
+								   duplex);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_reset_hw_80003es2lan - Reset the ESB2 controller
+ *  @hw: pointer to the HW structure
+ *
+ *  Perform a global reset to the ESB2 controller.
+ **/
+static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+	u16 kum_reg_data;
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = e1000e_disable_pcie_master(hw);
+	if (ret_val)
+		e_dbg("PCI-E Master disable polling has failed.\n");
+
+	e_dbg("Masking off all interrupts\n");
+	ew32(IMC, 0xffffffff);
+
+	ew32(RCTL, 0);
+	ew32(TCTL, E1000_TCTL_PSP);
+	e1e_flush();
+
+	usleep_range(10000, 11000);
+
+	ctrl = er32(CTRL);
+
+	ret_val = e1000_acquire_phy_80003es2lan(hw);
+	if (ret_val)
+		return ret_val;
+
+	e_dbg("Issuing a global reset to MAC\n");
+	ew32(CTRL, ctrl | E1000_CTRL_RST);
+	e1000_release_phy_80003es2lan(hw);
+
+	/* Disable IBIST slave mode (far-end loopback) */
+	ret_val =
+	    e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
+					    &kum_reg_data);
+	if (!ret_val) {
+		kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+		ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+						 E1000_KMRNCTRLSTA_INBAND_PARAM,
+						 kum_reg_data);
+		if (ret_val)
+			e_dbg("Error disabling far-end loopback\n");
+	} else {
+		e_dbg("Error disabling far-end loopback\n");
+	}
+
+	ret_val = e1000e_get_auto_rd_done(hw);
+	if (ret_val)
+		/* We don't want to continue accessing MAC registers. */
+		return ret_val;
+
+	/* Clear any pending interrupt events. */
+	ew32(IMC, 0xffffffff);
+	er32(ICR);
+
+	return e1000_check_alt_mac_addr_generic(hw);
+}
+
+/**
+ *  e1000_init_hw_80003es2lan - Initialize the ESB2 controller
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize the hw bits, LED, VFTA, MTA, link and hw counters.
+ **/
+static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 reg_data;
+	s32 ret_val;
+	u16 kum_reg_data;
+	u16 i;
+
+	e1000_initialize_hw_bits_80003es2lan(hw);
+
+	/* Initialize identification LED */
+	ret_val = mac->ops.id_led_init(hw);
+	/* An error is not fatal and we should not stop init due to this */
+	if (ret_val)
+		e_dbg("Error initializing identification LED\n");
+
+	/* Disabling VLAN filtering */
+	e_dbg("Initializing the IEEE VLAN\n");
+	mac->ops.clear_vfta(hw);
+
+	/* Setup the receive address. */
+	e1000e_init_rx_addrs(hw, mac->rar_entry_count);
+
+	/* Zero out the Multicast HASH table */
+	e_dbg("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
+
+	/* Setup link and flow control */
+	ret_val = mac->ops.setup_link(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Disable IBIST slave mode (far-end loopback) */
+	ret_val =
+	    e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
+					    &kum_reg_data);
+	if (!ret_val) {
+		kum_reg_data |= E1000_KMRNCTRLSTA_IBIST_DISABLE;
+		ret_val = e1000_write_kmrn_reg_80003es2lan(hw,
+						 E1000_KMRNCTRLSTA_INBAND_PARAM,
+						 kum_reg_data);
+		if (ret_val)
+			e_dbg("Error disabling far-end loopback\n");
+	} else {
+		e_dbg("Error disabling far-end loopback\n");
+	}
+
+	/* Set the transmit descriptor write-back policy */
+	reg_data = er32(TXDCTL(0));
+	reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) |
+		    E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC);
+	ew32(TXDCTL(0), reg_data);
+
+	/* ...for both queues. */
+	reg_data = er32(TXDCTL(1));
+	reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) |
+		    E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC);
+	ew32(TXDCTL(1), reg_data);
+
+	/* Enable retransmit on late collisions */
+	reg_data = er32(TCTL);
+	reg_data |= E1000_TCTL_RTLC;
+	ew32(TCTL, reg_data);
+
+	/* Configure Gigabit Carry Extend Padding */
+	reg_data = er32(TCTL_EXT);
+	reg_data &= ~E1000_TCTL_EXT_GCEX_MASK;
+	reg_data |= DEFAULT_TCTL_EXT_GCEX_80003ES2LAN;
+	ew32(TCTL_EXT, reg_data);
+
+	/* Configure Transmit Inter-Packet Gap */
+	reg_data = er32(TIPG);
+	reg_data &= ~E1000_TIPG_IPGT_MASK;
+	reg_data |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN;
+	ew32(TIPG, reg_data);
+
+	reg_data = E1000_READ_REG_ARRAY(hw, E1000_FFLT, 0x0001);
+	reg_data &= ~0x00100000;
+	E1000_WRITE_REG_ARRAY(hw, E1000_FFLT, 0x0001, reg_data);
+
+	/* default to true to enable the MDIC W/A */
+	hw->dev_spec.e80003es2lan.mdic_wa_enable = true;
+
+	ret_val =
+	    e1000_read_kmrn_reg_80003es2lan(hw, E1000_KMRNCTRLSTA_OFFSET >>
+					    E1000_KMRNCTRLSTA_OFFSET_SHIFT, &i);
+	if (!ret_val) {
+		if ((i & E1000_KMRNCTRLSTA_OPMODE_MASK) ==
+		    E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO)
+			hw->dev_spec.e80003es2lan.mdic_wa_enable = false;
+	}
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	e1000_clear_hw_cntrs_80003es2lan(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_initialize_hw_bits_80003es2lan - Init hw bits of ESB2
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes required hardware-dependent bits needed for normal operation.
+ **/
+static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw)
+{
+	u32 reg;
+
+	/* Transmit Descriptor Control 0 */
+	reg = er32(TXDCTL(0));
+	reg |= BIT(22);
+	ew32(TXDCTL(0), reg);
+
+	/* Transmit Descriptor Control 1 */
+	reg = er32(TXDCTL(1));
+	reg |= BIT(22);
+	ew32(TXDCTL(1), reg);
+
+	/* Transmit Arbitration Control 0 */
+	reg = er32(TARC(0));
+	reg &= ~(0xF << 27);	/* 30:27 */
+	if (hw->phy.media_type != e1000_media_type_copper)
+		reg &= ~BIT(20);
+	ew32(TARC(0), reg);
+
+	/* Transmit Arbitration Control 1 */
+	reg = er32(TARC(1));
+	if (er32(TCTL) & E1000_TCTL_MULR)
+		reg &= ~BIT(28);
+	else
+		reg |= BIT(28);
+	ew32(TARC(1), reg);
+
+	/* Disable IPv6 extension header parsing because some malformed
+	 * IPv6 headers can hang the Rx.
+	 */
+	reg = er32(RFCTL);
+	reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS);
+	ew32(RFCTL, reg);
+}
+
+/**
+ *  e1000_copper_link_setup_gg82563_80003es2lan - Configure GG82563 Link
+ *  @hw: pointer to the HW structure
+ *
+ *  Setup some GG82563 PHY registers for obtaining link
+ **/
+static s32 e1000_copper_link_setup_gg82563_80003es2lan(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u32 reg;
+	u16 data;
+
+	ret_val = e1e_rphy(hw, GG82563_PHY_MAC_SPEC_CTRL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data |= GG82563_MSCR_ASSERT_CRS_ON_TX;
+	/* Use 25MHz for both link down and 1000Base-T for Tx clock. */
+	data |= GG82563_MSCR_TX_CLK_1000MBPS_25;
+
+	ret_val = e1e_wphy(hw, GG82563_PHY_MAC_SPEC_CTRL, data);
+	if (ret_val)
+		return ret_val;
+
+	/* Options:
+	 *   MDI/MDI-X = 0 (default)
+	 *   0 - Auto for all speeds
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+	 */
+	ret_val = e1e_rphy(hw, GG82563_PHY_SPEC_CTRL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data &= ~GG82563_PSCR_CROSSOVER_MODE_MASK;
+
+	switch (phy->mdix) {
+	case 1:
+		data |= GG82563_PSCR_CROSSOVER_MODE_MDI;
+		break;
+	case 2:
+		data |= GG82563_PSCR_CROSSOVER_MODE_MDIX;
+		break;
+	case 0:
+	default:
+		data |= GG82563_PSCR_CROSSOVER_MODE_AUTO;
+		break;
+	}
+
+	/* Options:
+	 *   disable_polarity_correction = 0 (default)
+	 *       Automatic Correction for Reversed Cable Polarity
+	 *   0 - Disabled
+	 *   1 - Enabled
+	 */
+	data &= ~GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
+	if (phy->disable_polarity_correction)
+		data |= GG82563_PSCR_POLARITY_REVERSAL_DISABLE;
+
+	ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL, data);
+	if (ret_val)
+		return ret_val;
+
+	/* SW Reset the PHY so all changes take effect */
+	ret_val = hw->phy.ops.commit(hw);
+	if (ret_val) {
+		e_dbg("Error Resetting the PHY\n");
+		return ret_val;
+	}
+
+	/* Bypass Rx and Tx FIFO's */
+	reg = E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL;
+	data = (E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS |
+		E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS);
+	ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data);
+	if (ret_val)
+		return ret_val;
+
+	reg = E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE;
+	ret_val = e1000_read_kmrn_reg_80003es2lan(hw, reg, &data);
+	if (ret_val)
+		return ret_val;
+	data |= E1000_KMRNCTRLSTA_OPMODE_E_IDLE;
+	ret_val = e1000_write_kmrn_reg_80003es2lan(hw, reg, data);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1e_rphy(hw, GG82563_PHY_SPEC_CTRL_2, &data);
+	if (ret_val)
+		return ret_val;
+
+	data &= ~GG82563_PSCR2_REVERSE_AUTO_NEG;
+	ret_val = e1e_wphy(hw, GG82563_PHY_SPEC_CTRL_2, data);
+	if (ret_val)
+		return ret_val;
+
+	reg = er32(CTRL_EXT);
+	reg &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
+	ew32(CTRL_EXT, reg);
+
+	ret_val = e1e_rphy(hw, GG82563_PHY_PWR_MGMT_CTRL, &data);
+	if (ret_val)
+		return ret_val;
+
+	/* Do not init these registers when the HW is in IAMT mode, since the
+	 * firmware will have already initialized them.  We only initialize
+	 * them if the HW is not in IAMT mode.
+	 */
+	if (!hw->mac.ops.check_mng_mode(hw)) {
+		/* Enable Electrical Idle on the PHY */
+		data |= GG82563_PMCR_ENABLE_ELECTRICAL_IDLE;
+		ret_val = e1e_wphy(hw, GG82563_PHY_PWR_MGMT_CTRL, data);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &data);
+		if (ret_val)
+			return ret_val;
+
+		data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
+		ret_val = e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Workaround: Disable padding in Kumeran interface in the MAC
+	 * and in the PHY to avoid CRC errors.
+	 */
+	ret_val = e1e_rphy(hw, GG82563_PHY_INBAND_CTRL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data |= GG82563_ICR_DIS_PADDING;
+	ret_val = e1e_wphy(hw, GG82563_PHY_INBAND_CTRL, data);
+	if (ret_val)
+		return ret_val;
+
+	return 0;
+}
+
+/**
+ *  e1000_setup_copper_link_80003es2lan - Setup Copper Link for ESB2
+ *  @hw: pointer to the HW structure
+ *
+ *  Essentially a wrapper for setting up all things "copper" related.
+ *  This is a function pointer entry point called by the mac module.
+ **/
+static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+	u16 reg_data;
+
+	ctrl = er32(CTRL);
+	ctrl |= E1000_CTRL_SLU;
+	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ew32(CTRL, ctrl);
+
+	/* Set the mac to wait the maximum time between each
+	 * iteration and increase the max iterations when
+	 * polling the phy; this fixes erroneous timeouts at 10Mbps.
+	 */
+	ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 4),
+						   0xFFFF);
+	if (ret_val)
+		return ret_val;
+	ret_val = e1000_read_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9),
+						  &reg_data);
+	if (ret_val)
+		return ret_val;
+	reg_data |= 0x3F;
+	ret_val = e1000_write_kmrn_reg_80003es2lan(hw, GG82563_REG(0x34, 9),
+						   reg_data);
+	if (ret_val)
+		return ret_val;
+	ret_val =
+	    e1000_read_kmrn_reg_80003es2lan(hw,
+					    E1000_KMRNCTRLSTA_OFFSET_INB_CTRL,
+					    &reg_data);
+	if (ret_val)
+		return ret_val;
+	reg_data |= E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING;
+	ret_val =
+	    e1000_write_kmrn_reg_80003es2lan(hw,
+					     E1000_KMRNCTRLSTA_OFFSET_INB_CTRL,
+					     reg_data);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000_copper_link_setup_gg82563_80003es2lan(hw);
+	if (ret_val)
+		return ret_val;
+
+	return e1000e_setup_copper_link(hw);
+}
+
+/**
+ *  e1000_cfg_on_link_up_80003es2lan - es2 link configuration after link-up
+ *  @hw: pointer to the HW structure
+ *
+ *  Configure the KMRN interface by applying last minute quirks for
+ *  10/100 operation.
+ **/
+static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 speed;
+	u16 duplex;
+
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		ret_val = e1000e_get_speed_and_duplex_copper(hw, &speed,
+							     &duplex);
+		if (ret_val)
+			return ret_val;
+
+		if (speed == SPEED_1000)
+			ret_val = e1000_cfg_kmrn_1000_80003es2lan(hw);
+		else
+			ret_val = e1000_cfg_kmrn_10_100_80003es2lan(hw, duplex);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_cfg_kmrn_10_100_80003es2lan - Apply "quirks" for 10/100 operation
+ *  @hw: pointer to the HW structure
+ *  @duplex: current duplex setting
+ *
+ *  Configure the KMRN interface by applying last minute quirks for
+ *  10/100 operation.
+ **/
+static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex)
+{
+	s32 ret_val;
+	u32 tipg;
+	u32 i = 0;
+	u16 reg_data, reg_data2;
+
+	reg_data = E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT;
+	ret_val =
+	    e1000_write_kmrn_reg_80003es2lan(hw,
+					     E1000_KMRNCTRLSTA_OFFSET_HD_CTRL,
+					     reg_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Configure Transmit Inter-Packet Gap */
+	tipg = er32(TIPG);
+	tipg &= ~E1000_TIPG_IPGT_MASK;
+	tipg |= DEFAULT_TIPG_IPGT_10_100_80003ES2LAN;
+	ew32(TIPG, tipg);
+
+	do {
+		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data2);
+		if (ret_val)
+			return ret_val;
+		i++;
+	} while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY));
+
+	if (duplex == HALF_DUPLEX)
+		reg_data |= GG82563_KMCR_PASS_FALSE_CARRIER;
+	else
+		reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
+
+	return e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
+}
+
+/**
+ *  e1000_cfg_kmrn_1000_80003es2lan - Apply "quirks" for gigabit operation
+ *  @hw: pointer to the HW structure
+ *
+ *  Configure the KMRN interface by applying last minute quirks for
+ *  gigabit operation.
+ **/
+static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 reg_data, reg_data2;
+	u32 tipg;
+	u32 i = 0;
+
+	reg_data = E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT;
+	ret_val =
+	    e1000_write_kmrn_reg_80003es2lan(hw,
+					     E1000_KMRNCTRLSTA_OFFSET_HD_CTRL,
+					     reg_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Configure Transmit Inter-Packet Gap */
+	tipg = er32(TIPG);
+	tipg &= ~E1000_TIPG_IPGT_MASK;
+	tipg |= DEFAULT_TIPG_IPGT_1000_80003ES2LAN;
+	ew32(TIPG, tipg);
+
+	do {
+		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1e_rphy(hw, GG82563_PHY_KMRN_MODE_CTRL, &reg_data2);
+		if (ret_val)
+			return ret_val;
+		i++;
+	} while ((reg_data != reg_data2) && (i < GG82563_MAX_KMRN_RETRY));
+
+	reg_data &= ~GG82563_KMCR_PASS_FALSE_CARRIER;
+
+	return e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, reg_data);
+}
+
+/**
+ *  e1000_read_kmrn_reg_80003es2lan - Read kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquire semaphore, then read the PHY register at offset
+ *  using the kumeran interface.  The information retrieved is stored in data.
+ *  Release the semaphore before exiting.
+ **/
+static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
+					   u16 *data)
+{
+	u32 kmrnctrlsta;
+	s32 ret_val;
+
+	ret_val = e1000_acquire_mac_csr_80003es2lan(hw);
+	if (ret_val)
+		return ret_val;
+
+	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+		       E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
+	ew32(KMRNCTRLSTA, kmrnctrlsta);
+	e1e_flush();
+
+	udelay(2);
+
+	kmrnctrlsta = er32(KMRNCTRLSTA);
+	*data = (u16)kmrnctrlsta;
+
+	e1000_release_mac_csr_80003es2lan(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_kmrn_reg_80003es2lan - Write kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquire semaphore, then write the data to PHY register
+ *  at the offset using the kumeran interface.  Release semaphore
+ *  before exiting.
+ **/
+static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset,
+					    u16 data)
+{
+	u32 kmrnctrlsta;
+	s32 ret_val;
+
+	ret_val = e1000_acquire_mac_csr_80003es2lan(hw);
+	if (ret_val)
+		return ret_val;
+
+	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+		       E1000_KMRNCTRLSTA_OFFSET) | data;
+	ew32(KMRNCTRLSTA, kmrnctrlsta);
+	e1e_flush();
+
+	udelay(2);
+
+	e1000_release_mac_csr_80003es2lan(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_mac_addr_80003es2lan - Read device MAC address
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	/* If there's an alternate MAC address place it in RAR0
+	 * so that it will override the Si installed default perm
+	 * address.
+	 */
+	ret_val = e1000_check_alt_mac_addr_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	return e1000_read_mac_addr_generic(hw);
+}
+
+/**
+ * e1000_power_down_phy_copper_80003es2lan - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ **/
+static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw)
+{
+	/* If the management interface is not enabled, then power down */
+	if (!(hw->mac.ops.check_mng_mode(hw) ||
+	      hw->phy.ops.check_reset_block(hw)))
+		e1000_power_down_phy_copper(hw);
+}
+
+/**
+ *  e1000_clear_hw_cntrs_80003es2lan - Clear device specific hardware counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the hardware counters by reading the counter registers.
+ **/
+static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw)
+{
+	e1000e_clear_hw_cntrs_base(hw);
+
+	er32(PRC64);
+	er32(PRC127);
+	er32(PRC255);
+	er32(PRC511);
+	er32(PRC1023);
+	er32(PRC1522);
+	er32(PTC64);
+	er32(PTC127);
+	er32(PTC255);
+	er32(PTC511);
+	er32(PTC1023);
+	er32(PTC1522);
+
+	er32(ALGNERRC);
+	er32(RXERRC);
+	er32(TNCRS);
+	er32(CEXTERR);
+	er32(TSCTC);
+	er32(TSCTFC);
+
+	er32(MGTPRC);
+	er32(MGTPDC);
+	er32(MGTPTC);
+
+	er32(IAC);
+	er32(ICRXOC);
+
+	er32(ICRXPTC);
+	er32(ICRXATC);
+	er32(ICTXPTC);
+	er32(ICTXATC);
+	er32(ICTXQEC);
+	er32(ICTXQMTC);
+	er32(ICRXDMTC);
+}
+
+static const struct e1000_mac_operations es2_mac_ops = {
+	.read_mac_addr		= e1000_read_mac_addr_80003es2lan,
+	.id_led_init		= e1000e_id_led_init_generic,
+	.blink_led		= e1000e_blink_led_generic,
+	.check_mng_mode		= e1000e_check_mng_mode_generic,
+	/* check_for_link dependent on media type */
+	.cleanup_led		= e1000e_cleanup_led_generic,
+	.clear_hw_cntrs		= e1000_clear_hw_cntrs_80003es2lan,
+	.get_bus_info		= e1000e_get_bus_info_pcie,
+	.set_lan_id		= e1000_set_lan_id_multi_port_pcie,
+	.get_link_up_info	= e1000_get_link_up_info_80003es2lan,
+	.led_on			= e1000e_led_on_generic,
+	.led_off		= e1000e_led_off_generic,
+	.update_mc_addr_list	= e1000e_update_mc_addr_list_generic,
+	.write_vfta		= e1000_write_vfta_generic,
+	.clear_vfta		= e1000_clear_vfta_generic,
+	.reset_hw		= e1000_reset_hw_80003es2lan,
+	.init_hw		= e1000_init_hw_80003es2lan,
+	.setup_link		= e1000e_setup_link_generic,
+	/* setup_physical_interface dependent on media type */
+	.setup_led		= e1000e_setup_led_generic,
+	.config_collision_dist	= e1000e_config_collision_dist_generic,
+	.rar_set		= e1000e_rar_set_generic,
+	.rar_get_count		= e1000e_rar_get_count_generic,
+};
+
+static const struct e1000_phy_operations es2_phy_ops = {
+	.acquire		= e1000_acquire_phy_80003es2lan,
+	.check_polarity		= e1000_check_polarity_m88,
+	.check_reset_block	= e1000e_check_reset_block_generic,
+	.commit			= e1000e_phy_sw_reset,
+	.force_speed_duplex	= e1000_phy_force_speed_duplex_80003es2lan,
+	.get_cfg_done		= e1000_get_cfg_done_80003es2lan,
+	.get_cable_length	= e1000_get_cable_length_80003es2lan,
+	.get_info		= e1000e_get_phy_info_m88,
+	.read_reg		= e1000_read_phy_reg_gg82563_80003es2lan,
+	.release		= e1000_release_phy_80003es2lan,
+	.reset			= e1000e_phy_hw_reset_generic,
+	.set_d0_lplu_state	= NULL,
+	.set_d3_lplu_state	= e1000e_set_d3_lplu_state,
+	.write_reg		= e1000_write_phy_reg_gg82563_80003es2lan,
+	.cfg_on_link_up		= e1000_cfg_on_link_up_80003es2lan,
+};
+
+static const struct e1000_nvm_operations es2_nvm_ops = {
+	.acquire		= e1000_acquire_nvm_80003es2lan,
+	.read			= e1000e_read_nvm_eerd,
+	.release		= e1000_release_nvm_80003es2lan,
+	.reload			= e1000e_reload_nvm_generic,
+	.update			= e1000e_update_nvm_checksum_generic,
+	.valid_led_default	= e1000e_valid_led_default,
+	.validate		= e1000e_validate_nvm_checksum_generic,
+	.write			= e1000_write_nvm_80003es2lan,
+};
+
+const struct e1000_info e1000_es2_info = {
+	.mac			= e1000_80003es2lan,
+	.flags			= FLAG_HAS_HW_VLAN_FILTER
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_HAS_WOL
+				  | FLAG_APME_IN_CTRL3
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_RX_NEEDS_RESTART /* errata */
+				  | FLAG_TARC_SET_BIT_ZERO /* errata */
+				  | FLAG_APME_CHECK_PORT_B
+				  | FLAG_DISABLE_FC_PAUSE_TIME, /* errata */
+	.flags2			= FLAG2_DMA_BURST,
+	.pba			= 38,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.get_variants		= e1000_get_variants_80003es2lan,
+	.mac_ops		= &es2_mac_ops,
+	.phy_ops		= &es2_phy_ops,
+	.nvm_ops		= &es2_nvm_ops,
+};
diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.h b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
new file mode 100644
index 0000000000..aa9d639c6c
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_80003ES2LAN_H_
+#define _E1000E_80003ES2LAN_H_
+
+#define E1000_KMRNCTRLSTA_OFFSET_FIFO_CTRL	0x00
+#define E1000_KMRNCTRLSTA_OFFSET_INB_CTRL	0x02
+#define E1000_KMRNCTRLSTA_OFFSET_HD_CTRL	0x10
+#define E1000_KMRNCTRLSTA_OFFSET_MAC2PHY_OPMODE	0x1F
+
+#define E1000_KMRNCTRLSTA_FIFO_CTRL_RX_BYPASS	0x0008
+#define E1000_KMRNCTRLSTA_FIFO_CTRL_TX_BYPASS	0x0800
+#define E1000_KMRNCTRLSTA_INB_CTRL_DIS_PADDING	0x0010
+
+#define E1000_KMRNCTRLSTA_HD_CTRL_10_100_DEFAULT 0x0004
+#define E1000_KMRNCTRLSTA_HD_CTRL_1000_DEFAULT	0x0000
+#define E1000_KMRNCTRLSTA_OPMODE_E_IDLE		0x2000
+
+#define E1000_KMRNCTRLSTA_OPMODE_MASK		0x000C
+#define E1000_KMRNCTRLSTA_OPMODE_INBAND_MDIO	0x0004
+
+#define E1000_TCTL_EXT_GCEX_MASK 0x000FFC00	/* Gig Carry Extend Padding */
+#define DEFAULT_TCTL_EXT_GCEX_80003ES2LAN	0x00010000
+
+#define DEFAULT_TIPG_IPGT_1000_80003ES2LAN	0x8
+#define DEFAULT_TIPG_IPGT_10_100_80003ES2LAN	0x9
+
+/* GG82563 PHY Specific Status Register (Page 0, Register 16 */
+#define GG82563_PSCR_POLARITY_REVERSAL_DISABLE	0x0002	/* 1=Reversal Dis */
+#define GG82563_PSCR_CROSSOVER_MODE_MASK	0x0060
+#define GG82563_PSCR_CROSSOVER_MODE_MDI		0x0000	/* 00=Manual MDI */
+#define GG82563_PSCR_CROSSOVER_MODE_MDIX	0x0020	/* 01=Manual MDIX */
+#define GG82563_PSCR_CROSSOVER_MODE_AUTO	0x0060	/* 11=Auto crossover */
+
+/* PHY Specific Control Register 2 (Page 0, Register 26) */
+#define GG82563_PSCR2_REVERSE_AUTO_NEG		0x2000	/* 1=Reverse Auto-Neg */
+
+/* MAC Specific Control Register (Page 2, Register 21) */
+/* Tx clock speed for Link Down and 1000BASE-T for the following speeds */
+#define GG82563_MSCR_TX_CLK_MASK		0x0007
+#define GG82563_MSCR_TX_CLK_10MBPS_2_5		0x0004
+#define GG82563_MSCR_TX_CLK_100MBPS_25		0x0005
+#define GG82563_MSCR_TX_CLK_1000MBPS_25		0x0007
+
+#define GG82563_MSCR_ASSERT_CRS_ON_TX		0x0010	/* 1=Assert */
+
+/* DSP Distance Register (Page 5, Register 26)
+ * 0 = <50M
+ * 1 = 50-80M
+ * 2 = 80-100M
+ * 3 = 110-140M
+ * 4 = >140M
+ */
+#define GG82563_DSPD_CABLE_LENGTH		0x0007
+
+/* Kumeran Mode Control Register (Page 193, Register 16) */
+#define GG82563_KMCR_PASS_FALSE_CARRIER		0x0800
+
+/* Max number of times Kumeran read/write should be validated */
+#define GG82563_MAX_KMRN_RETRY			0x5
+
+/* Power Management Control Register (Page 193, Register 20) */
+/* 1=Enable SERDES Electrical Idle */
+#define GG82563_PMCR_ENABLE_ELECTRICAL_IDLE	0x0001
+
+/* In-Band Control Register (Page 194, Register 18) */
+#define GG82563_ICR_DIS_PADDING			0x0010	/* Disable Padding */
+
+#endif
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
new file mode 100644
index 0000000000..0b1e890dd5
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -0,0 +1,2049 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* 82571EB Gigabit Ethernet Controller
+ * 82571EB Gigabit Ethernet Controller (Copper)
+ * 82571EB Gigabit Ethernet Controller (Fiber)
+ * 82571EB Dual Port Gigabit Mezzanine Adapter
+ * 82571EB Quad Port Gigabit Mezzanine Adapter
+ * 82571PT Gigabit PT Quad Port Server ExpressModule
+ * 82572EI Gigabit Ethernet Controller (Copper)
+ * 82572EI Gigabit Ethernet Controller (Fiber)
+ * 82572EI Gigabit Ethernet Controller
+ * 82573V Gigabit Ethernet Controller (Copper)
+ * 82573E Gigabit Ethernet Controller (Copper)
+ * 82573L Gigabit Ethernet Controller
+ * 82574L Gigabit Network Connection
+ * 82583V Gigabit Network Connection
+ */
+
+#include "e1000.h"
+
+static s32 e1000_get_phy_id_82571(struct e1000_hw *hw);
+static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw);
+static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw);
+static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw);
+static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset,
+				      u16 words, u16 *data);
+static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw);
+static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw);
+static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw);
+static bool e1000_check_mng_mode_82574(struct e1000_hw *hw);
+static s32 e1000_led_on_82574(struct e1000_hw *hw);
+static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw);
+static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw);
+static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw);
+static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw);
+static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw);
+static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active);
+static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active);
+
+/**
+ *  e1000_init_phy_params_82571 - Init PHY func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_phy_params_82571(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+
+	if (hw->phy.media_type != e1000_media_type_copper) {
+		phy->type = e1000_phy_none;
+		return 0;
+	}
+
+	phy->addr = 1;
+	phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+	phy->reset_delay_us = 100;
+
+	phy->ops.power_up = e1000_power_up_phy_copper;
+	phy->ops.power_down = e1000_power_down_phy_copper_82571;
+
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		phy->type = e1000_phy_igp_2;
+		break;
+	case e1000_82573:
+		phy->type = e1000_phy_m88;
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		phy->type = e1000_phy_bm;
+		phy->ops.acquire = e1000_get_hw_semaphore_82574;
+		phy->ops.release = e1000_put_hw_semaphore_82574;
+		phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82574;
+		phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82574;
+		break;
+	default:
+		return -E1000_ERR_PHY;
+	}
+
+	/* This can only be done after all function pointers are setup. */
+	ret_val = e1000_get_phy_id_82571(hw);
+	if (ret_val) {
+		e_dbg("Error getting PHY ID\n");
+		return ret_val;
+	}
+
+	/* Verify phy id */
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		if (phy->id != IGP01E1000_I_PHY_ID)
+			ret_val = -E1000_ERR_PHY;
+		break;
+	case e1000_82573:
+		if (phy->id != M88E1111_I_PHY_ID)
+			ret_val = -E1000_ERR_PHY;
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		if (phy->id != BME1000_E_PHY_ID_R2)
+			ret_val = -E1000_ERR_PHY;
+		break;
+	default:
+		ret_val = -E1000_ERR_PHY;
+		break;
+	}
+
+	if (ret_val)
+		e_dbg("PHY ID unknown: type = 0x%08x\n", phy->id);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_init_nvm_params_82571 - Init NVM func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_nvm_params_82571(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = er32(EECD);
+	u16 size;
+
+	nvm->opcode_bits = 8;
+	nvm->delay_usec = 1;
+	switch (nvm->override) {
+	case e1000_nvm_override_spi_large:
+		nvm->page_size = 32;
+		nvm->address_bits = 16;
+		break;
+	case e1000_nvm_override_spi_small:
+		nvm->page_size = 8;
+		nvm->address_bits = 8;
+		break;
+	default:
+		nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
+		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? 16 : 8;
+		break;
+	}
+
+	switch (hw->mac.type) {
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		if (((eecd >> 15) & 0x3) == 0x3) {
+			nvm->type = e1000_nvm_flash_hw;
+			nvm->word_size = 2048;
+			/* Autonomous Flash update bit must be cleared due
+			 * to Flash update issue.
+			 */
+			eecd &= ~E1000_EECD_AUPDEN;
+			ew32(EECD, eecd);
+			break;
+		}
+		fallthrough;
+	default:
+		nvm->type = e1000_nvm_eeprom_spi;
+		size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+			     E1000_EECD_SIZE_EX_SHIFT);
+		/* Added to a constant, "size" becomes the left-shift value
+		 * for setting word_size.
+		 */
+		size += NVM_WORD_SIZE_BASE_SHIFT;
+
+		/* EEPROM access above 16k is unsupported */
+		if (size > 14)
+			size = 14;
+		nvm->word_size = BIT(size);
+		break;
+	}
+
+	/* Function Pointers */
+	switch (hw->mac.type) {
+	case e1000_82574:
+	case e1000_82583:
+		nvm->ops.acquire = e1000_get_hw_semaphore_82574;
+		nvm->ops.release = e1000_put_hw_semaphore_82574;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_init_mac_params_82571 - Init MAC func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_mac_params_82571(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 swsm = 0;
+	u32 swsm2 = 0;
+	bool force_clear_smbi = false;
+
+	/* Set media type and media-dependent function pointers */
+	switch (hw->adapter->pdev->device) {
+	case E1000_DEV_ID_82571EB_FIBER:
+	case E1000_DEV_ID_82572EI_FIBER:
+	case E1000_DEV_ID_82571EB_QUAD_FIBER:
+		hw->phy.media_type = e1000_media_type_fiber;
+		mac->ops.setup_physical_interface =
+		    e1000_setup_fiber_serdes_link_82571;
+		mac->ops.check_for_link = e1000e_check_for_fiber_link;
+		mac->ops.get_link_up_info =
+		    e1000e_get_speed_and_duplex_fiber_serdes;
+		break;
+	case E1000_DEV_ID_82571EB_SERDES:
+	case E1000_DEV_ID_82571EB_SERDES_DUAL:
+	case E1000_DEV_ID_82571EB_SERDES_QUAD:
+	case E1000_DEV_ID_82572EI_SERDES:
+		hw->phy.media_type = e1000_media_type_internal_serdes;
+		mac->ops.setup_physical_interface =
+		    e1000_setup_fiber_serdes_link_82571;
+		mac->ops.check_for_link = e1000_check_for_serdes_link_82571;
+		mac->ops.get_link_up_info =
+		    e1000e_get_speed_and_duplex_fiber_serdes;
+		break;
+	default:
+		hw->phy.media_type = e1000_media_type_copper;
+		mac->ops.setup_physical_interface =
+		    e1000_setup_copper_link_82571;
+		mac->ops.check_for_link = e1000e_check_for_copper_link;
+		mac->ops.get_link_up_info = e1000e_get_speed_and_duplex_copper;
+		break;
+	}
+
+	/* Set mta register count */
+	mac->mta_reg_count = 128;
+	/* Set rar entry count */
+	mac->rar_entry_count = E1000_RAR_ENTRIES;
+	/* Adaptive IFS supported */
+	mac->adaptive_ifs = true;
+
+	/* MAC-specific function pointers */
+	switch (hw->mac.type) {
+	case e1000_82573:
+		mac->ops.set_lan_id = e1000_set_lan_id_single_port;
+		mac->ops.check_mng_mode = e1000e_check_mng_mode_generic;
+		mac->ops.led_on = e1000e_led_on_generic;
+		mac->ops.blink_led = e1000e_blink_led_generic;
+
+		/* FWSM register */
+		mac->has_fwsm = true;
+		/* ARC supported; valid only if manageability features are
+		 * enabled.
+		 */
+		mac->arc_subsystem_valid = !!(er32(FWSM) &
+					      E1000_FWSM_MODE_MASK);
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		mac->ops.set_lan_id = e1000_set_lan_id_single_port;
+		mac->ops.check_mng_mode = e1000_check_mng_mode_82574;
+		mac->ops.led_on = e1000_led_on_82574;
+		break;
+	default:
+		mac->ops.check_mng_mode = e1000e_check_mng_mode_generic;
+		mac->ops.led_on = e1000e_led_on_generic;
+		mac->ops.blink_led = e1000e_blink_led_generic;
+
+		/* FWSM register */
+		mac->has_fwsm = true;
+		break;
+	}
+
+	/* Ensure that the inter-port SWSM.SMBI lock bit is clear before
+	 * first NVM or PHY access. This should be done for single-port
+	 * devices, and for one port only on dual-port devices so that
+	 * for those devices we can still use the SMBI lock to synchronize
+	 * inter-port accesses to the PHY & NVM.
+	 */
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		swsm2 = er32(SWSM2);
+
+		if (!(swsm2 & E1000_SWSM2_LOCK)) {
+			/* Only do this for the first interface on this card */
+			ew32(SWSM2, swsm2 | E1000_SWSM2_LOCK);
+			force_clear_smbi = true;
+		} else {
+			force_clear_smbi = false;
+		}
+		break;
+	default:
+		force_clear_smbi = true;
+		break;
+	}
+
+	if (force_clear_smbi) {
+		/* Make sure SWSM.SMBI is clear */
+		swsm = er32(SWSM);
+		if (swsm & E1000_SWSM_SMBI) {
+			/* This bit should not be set on a first interface, and
+			 * indicates that the bootagent or EFI code has
+			 * improperly left this bit enabled
+			 */
+			e_dbg("Please update your 82571 Bootagent\n");
+		}
+		ew32(SWSM, swsm & ~E1000_SWSM_SMBI);
+	}
+
+	/* Initialize device specific counter of SMBI acquisition timeouts. */
+	hw->dev_spec.e82571.smb_counter = 0;
+
+	return 0;
+}
+
+static s32 e1000_get_variants_82571(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	static int global_quad_port_a;	/* global port a indication */
+	struct pci_dev *pdev = adapter->pdev;
+	int is_port_b = er32(STATUS) & E1000_STATUS_FUNC_1;
+	s32 rc;
+
+	rc = e1000_init_mac_params_82571(hw);
+	if (rc)
+		return rc;
+
+	rc = e1000_init_nvm_params_82571(hw);
+	if (rc)
+		return rc;
+
+	rc = e1000_init_phy_params_82571(hw);
+	if (rc)
+		return rc;
+
+	/* tag quad port adapters first, it's used below */
+	switch (pdev->device) {
+	case E1000_DEV_ID_82571EB_QUAD_COPPER:
+	case E1000_DEV_ID_82571EB_QUAD_FIBER:
+	case E1000_DEV_ID_82571EB_QUAD_COPPER_LP:
+	case E1000_DEV_ID_82571PT_QUAD_COPPER:
+		adapter->flags |= FLAG_IS_QUAD_PORT;
+		/* mark the first port */
+		if (global_quad_port_a == 0)
+			adapter->flags |= FLAG_IS_QUAD_PORT_A;
+		/* Reset for multiple quad port adapters */
+		global_quad_port_a++;
+		if (global_quad_port_a == 4)
+			global_quad_port_a = 0;
+		break;
+	default:
+		break;
+	}
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82571:
+		/* these dual ports don't have WoL on port B at all */
+		if (((pdev->device == E1000_DEV_ID_82571EB_FIBER) ||
+		     (pdev->device == E1000_DEV_ID_82571EB_SERDES) ||
+		     (pdev->device == E1000_DEV_ID_82571EB_COPPER)) &&
+		    (is_port_b))
+			adapter->flags &= ~FLAG_HAS_WOL;
+		/* quad ports only support WoL on port A */
+		if (adapter->flags & FLAG_IS_QUAD_PORT &&
+		    (!(adapter->flags & FLAG_IS_QUAD_PORT_A)))
+			adapter->flags &= ~FLAG_HAS_WOL;
+		/* Does not support WoL on any port */
+		if (pdev->device == E1000_DEV_ID_82571EB_SERDES_QUAD)
+			adapter->flags &= ~FLAG_HAS_WOL;
+		break;
+	case e1000_82573:
+		if (pdev->device == E1000_DEV_ID_82573L) {
+			adapter->flags |= FLAG_HAS_JUMBO_FRAMES;
+			adapter->max_hw_frame_size = DEFAULT_JUMBO;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_get_phy_id_82571 - Retrieve the PHY ID and revision
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
+ *  revision in the hardware structure.
+ **/
+static s32 e1000_get_phy_id_82571(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_id = 0;
+
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		/* The 82571 firmware may still be configuring the PHY.
+		 * In this case, we cannot access the PHY until the
+		 * configuration is done.  So we explicitly set the
+		 * PHY ID.
+		 */
+		phy->id = IGP01E1000_I_PHY_ID;
+		break;
+	case e1000_82573:
+		return e1000e_get_phy_id(hw);
+	case e1000_82574:
+	case e1000_82583:
+		ret_val = e1e_rphy(hw, MII_PHYSID1, &phy_id);
+		if (ret_val)
+			return ret_val;
+
+		phy->id = (u32)(phy_id << 16);
+		usleep_range(20, 40);
+		ret_val = e1e_rphy(hw, MII_PHYSID2, &phy_id);
+		if (ret_val)
+			return ret_val;
+
+		phy->id |= (u32)(phy_id);
+		phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+		break;
+	default:
+		return -E1000_ERR_PHY;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_get_hw_semaphore_82571 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 sw_timeout = hw->nvm.word_size + 1;
+	s32 fw_timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	/* If we have timedout 3 times on trying to acquire
+	 * the inter-port SMBI semaphore, there is old code
+	 * operating on the other port, and it is not
+	 * releasing SMBI. Modify the number of times that
+	 * we try for the semaphore to interwork with this
+	 * older code.
+	 */
+	if (hw->dev_spec.e82571.smb_counter > 2)
+		sw_timeout = 1;
+
+	/* Get the SW semaphore */
+	while (i < sw_timeout) {
+		swsm = er32(SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		usleep_range(50, 100);
+		i++;
+	}
+
+	if (i == sw_timeout) {
+		e_dbg("Driver can't access device - SMBI bit is set.\n");
+		hw->dev_spec.e82571.smb_counter++;
+	}
+	/* Get the FW semaphore. */
+	for (i = 0; i < fw_timeout; i++) {
+		swsm = er32(SWSM);
+		ew32(SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (er32(SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		usleep_range(50, 100);
+	}
+
+	if (i == fw_timeout) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore_82571(hw);
+		e_dbg("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_put_hw_semaphore_82571 - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ **/
+static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw)
+{
+	u32 swsm;
+
+	swsm = er32(SWSM);
+	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+	ew32(SWSM, swsm);
+}
+
+/**
+ *  e1000_get_hw_semaphore_82573 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore during reset.
+ *
+ **/
+static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw)
+{
+	u32 extcnf_ctrl;
+	s32 i = 0;
+
+	extcnf_ctrl = er32(EXTCNF_CTRL);
+	do {
+		extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
+		ew32(EXTCNF_CTRL, extcnf_ctrl);
+		extcnf_ctrl = er32(EXTCNF_CTRL);
+
+		if (extcnf_ctrl & E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP)
+			break;
+
+		usleep_range(2000, 4000);
+		i++;
+	} while (i < MDIO_OWNERSHIP_TIMEOUT);
+
+	if (i == MDIO_OWNERSHIP_TIMEOUT) {
+		/* Release semaphores */
+		e1000_put_hw_semaphore_82573(hw);
+		e_dbg("Driver can't access the PHY\n");
+		return -E1000_ERR_PHY;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_put_hw_semaphore_82573 - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used during reset.
+ *
+ **/
+static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw)
+{
+	u32 extcnf_ctrl;
+
+	extcnf_ctrl = er32(EXTCNF_CTRL);
+	extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP;
+	ew32(EXTCNF_CTRL, extcnf_ctrl);
+}
+
+static DEFINE_MUTEX(swflag_mutex);
+
+/**
+ *  e1000_get_hw_semaphore_82574 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM.
+ *
+ **/
+static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	mutex_lock(&swflag_mutex);
+	ret_val = e1000_get_hw_semaphore_82573(hw);
+	if (ret_val)
+		mutex_unlock(&swflag_mutex);
+	return ret_val;
+}
+
+/**
+ *  e1000_put_hw_semaphore_82574 - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ *
+ **/
+static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw)
+{
+	e1000_put_hw_semaphore_82573(hw);
+	mutex_unlock(&swflag_mutex);
+}
+
+/**
+ *  e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D0 state according to the active flag.
+ *  LPLU will not be activated unless the
+ *  device autonegotiation advertisement meets standards of
+ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active)
+{
+	u32 data = er32(POEMB);
+
+	if (active)
+		data |= E1000_PHY_CTRL_D0A_LPLU;
+	else
+		data &= ~E1000_PHY_CTRL_D0A_LPLU;
+
+	ew32(POEMB, data);
+	return 0;
+}
+
+/**
+ *  e1000_set_d3_lplu_state_82574 - Sets low power link up state for D3
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  The low power link up (lplu) state is set to the power management level D3
+ *  when active is true, else clear lplu for D3. LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.
+ **/
+static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active)
+{
+	u32 data = er32(POEMB);
+
+	if (!active) {
+		data &= ~E1000_PHY_CTRL_NOND0A_LPLU;
+	} else if ((hw->phy.autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+		   (hw->phy.autoneg_advertised == E1000_ALL_NOT_GIG) ||
+		   (hw->phy.autoneg_advertised == E1000_ALL_10_SPEED)) {
+		data |= E1000_PHY_CTRL_NOND0A_LPLU;
+	}
+
+	ew32(POEMB, data);
+	return 0;
+}
+
+/**
+ *  e1000_acquire_nvm_82571 - Request for access to the EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  To gain access to the EEPROM, first we must obtain a hardware semaphore.
+ *  Then for non-82573 hardware, set the EEPROM access request bit and wait
+ *  for EEPROM access grant bit.  If the access grant bit is not set, release
+ *  hardware semaphore.
+ **/
+static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	ret_val = e1000_get_hw_semaphore_82571(hw);
+	if (ret_val)
+		return ret_val;
+
+	switch (hw->mac.type) {
+	case e1000_82573:
+		break;
+	default:
+		ret_val = e1000e_acquire_nvm(hw);
+		break;
+	}
+
+	if (ret_val)
+		e1000_put_hw_semaphore_82571(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_release_nvm_82571 - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ **/
+static void e1000_release_nvm_82571(struct e1000_hw *hw)
+{
+	e1000e_release_nvm(hw);
+	e1000_put_hw_semaphore_82571(hw);
+}
+
+/**
+ *  e1000_write_nvm_82571 - Write to EEPROM using appropriate interface
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *
+ *  For non-82573 silicon, write data to EEPROM at offset using SPI interface.
+ *
+ *  If e1000e_update_nvm_checksum is not called after this function, the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+static s32 e1000_write_nvm_82571(struct e1000_hw *hw, u16 offset, u16 words,
+				 u16 *data)
+{
+	s32 ret_val;
+
+	switch (hw->mac.type) {
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		ret_val = e1000_write_nvm_eewr_82571(hw, offset, words, data);
+		break;
+	case e1000_82571:
+	case e1000_82572:
+		ret_val = e1000e_write_nvm_spi(hw, offset, words, data);
+		break;
+	default:
+		ret_val = -E1000_ERR_NVM;
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_update_nvm_checksum_82571 - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM.
+ **/
+static s32 e1000_update_nvm_checksum_82571(struct e1000_hw *hw)
+{
+	u32 eecd;
+	s32 ret_val;
+	u16 i;
+
+	ret_val = e1000e_update_nvm_checksum_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* If our nvm is an EEPROM, then we're done
+	 * otherwise, commit the checksum to the flash NVM.
+	 */
+	if (hw->nvm.type != e1000_nvm_flash_hw)
+		return 0;
+
+	/* Check for pending operations. */
+	for (i = 0; i < E1000_FLASH_UPDATES; i++) {
+		usleep_range(1000, 2000);
+		if (!(er32(EECD) & E1000_EECD_FLUPD))
+			break;
+	}
+
+	if (i == E1000_FLASH_UPDATES)
+		return -E1000_ERR_NVM;
+
+	/* Reset the firmware if using STM opcode. */
+	if ((er32(FLOP) & 0xFF00) == E1000_STM_OPCODE) {
+		/* The enabling of and the actual reset must be done
+		 * in two write cycles.
+		 */
+		ew32(HICR, E1000_HICR_FW_RESET_ENABLE);
+		e1e_flush();
+		ew32(HICR, E1000_HICR_FW_RESET);
+	}
+
+	/* Commit the write to flash */
+	eecd = er32(EECD) | E1000_EECD_FLUPD;
+	ew32(EECD, eecd);
+
+	for (i = 0; i < E1000_FLASH_UPDATES; i++) {
+		usleep_range(1000, 2000);
+		if (!(er32(EECD) & E1000_EECD_FLUPD))
+			break;
+	}
+
+	if (i == E1000_FLASH_UPDATES)
+		return -E1000_ERR_NVM;
+
+	return 0;
+}
+
+/**
+ *  e1000_validate_nvm_checksum_82571 - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+static s32 e1000_validate_nvm_checksum_82571(struct e1000_hw *hw)
+{
+	if (hw->nvm.type == e1000_nvm_flash_hw)
+		e1000_fix_nvm_checksum_82571(hw);
+
+	return e1000e_validate_nvm_checksum_generic(hw);
+}
+
+/**
+ *  e1000_write_nvm_eewr_82571 - Write to EEPROM for 82573 silicon
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *
+ *  After checking for invalid values, poll the EEPROM to ensure the previous
+ *  command has completed before trying to write the next word.  After write
+ *  poll for completion.
+ *
+ *  If e1000e_update_nvm_checksum is not called after this function, the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+static s32 e1000_write_nvm_eewr_82571(struct e1000_hw *hw, u16 offset,
+				      u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i, eewr = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		e_dbg("nvm parameter(s) out of bounds\n");
+		return -E1000_ERR_NVM;
+	}
+
+	for (i = 0; i < words; i++) {
+		eewr = ((data[i] << E1000_NVM_RW_REG_DATA) |
+			((offset + i) << E1000_NVM_RW_ADDR_SHIFT) |
+			E1000_NVM_RW_REG_START);
+
+		ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE);
+		if (ret_val)
+			break;
+
+		ew32(EEWR, eewr);
+
+		ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_WRITE);
+		if (ret_val)
+			break;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_cfg_done_82571 - Poll for configuration done
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the management control register for the config done bit to be set.
+ **/
+static s32 e1000_get_cfg_done_82571(struct e1000_hw *hw)
+{
+	s32 timeout = PHY_CFG_TIMEOUT;
+
+	while (timeout) {
+		if (er32(EEMNGCTL) & E1000_NVM_CFG_DONE_PORT_0)
+			break;
+		usleep_range(1000, 2000);
+		timeout--;
+	}
+	if (!timeout) {
+		e_dbg("MNG configuration cycle has not completed.\n");
+		return -E1000_ERR_RESET;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_set_d0_lplu_state_82571 - Set Low Power Linkup D0 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D0 state according to the active flag.  When activating LPLU
+ *  this function also disables smart speed and vice versa.  LPLU will not be
+ *  activated unless the device autonegotiation advertisement meets standards
+ *  of either 10 or 10/100 or 10/100/1000 at all duplexes.  This is a function
+ *  pointer entry point only called by PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	ret_val = e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (active) {
+		data |= IGP02E1000_PM_D0_LPLU;
+		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
+		if (ret_val)
+			return ret_val;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
+		if (ret_val)
+			return ret_val;
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
+		if (ret_val)
+			return ret_val;
+	} else {
+		data &= ~IGP02E1000_PM_D0_LPLU;
+		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
+		if (ret_val)
+			return ret_val;
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_reset_hw_82571 - Reset hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets the hardware into a known state.
+ **/
+static s32 e1000_reset_hw_82571(struct e1000_hw *hw)
+{
+	u32 ctrl, ctrl_ext, eecd, tctl;
+	s32 ret_val;
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = e1000e_disable_pcie_master(hw);
+	if (ret_val)
+		e_dbg("PCI-E Master disable polling has failed.\n");
+
+	e_dbg("Masking off all interrupts\n");
+	ew32(IMC, 0xffffffff);
+
+	ew32(RCTL, 0);
+	tctl = er32(TCTL);
+	tctl &= ~E1000_TCTL_EN;
+	ew32(TCTL, tctl);
+	e1e_flush();
+
+	usleep_range(10000, 11000);
+
+	/* Must acquire the MDIO ownership before MAC reset.
+	 * Ownership defaults to firmware after a reset.
+	 */
+	switch (hw->mac.type) {
+	case e1000_82573:
+		ret_val = e1000_get_hw_semaphore_82573(hw);
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		ret_val = e1000_get_hw_semaphore_82574(hw);
+		break;
+	default:
+		break;
+	}
+
+	ctrl = er32(CTRL);
+
+	e_dbg("Issuing a global reset to MAC\n");
+	ew32(CTRL, ctrl | E1000_CTRL_RST);
+
+	/* Must release MDIO ownership and mutex after MAC reset. */
+	switch (hw->mac.type) {
+	case e1000_82573:
+		/* Release mutex only if the hw semaphore is acquired */
+		if (!ret_val)
+			e1000_put_hw_semaphore_82573(hw);
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		/* Release mutex only if the hw semaphore is acquired */
+		if (!ret_val)
+			e1000_put_hw_semaphore_82574(hw);
+		break;
+	default:
+		break;
+	}
+
+	if (hw->nvm.type == e1000_nvm_flash_hw) {
+		usleep_range(10, 20);
+		ctrl_ext = er32(CTRL_EXT);
+		ctrl_ext |= E1000_CTRL_EXT_EE_RST;
+		ew32(CTRL_EXT, ctrl_ext);
+		e1e_flush();
+	}
+
+	ret_val = e1000e_get_auto_rd_done(hw);
+	if (ret_val)
+		/* We don't want to continue accessing MAC registers. */
+		return ret_val;
+
+	/* Phy configuration from NVM just starts after EECD_AUTO_RD is set.
+	 * Need to wait for Phy configuration completion before accessing
+	 * NVM and Phy.
+	 */
+
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		/* REQ and GNT bits need to be cleared when using AUTO_RD
+		 * to access the EEPROM.
+		 */
+		eecd = er32(EECD);
+		eecd &= ~(E1000_EECD_REQ | E1000_EECD_GNT);
+		ew32(EECD, eecd);
+		break;
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		msleep(25);
+		break;
+	default:
+		break;
+	}
+
+	/* Clear any pending interrupt events. */
+	ew32(IMC, 0xffffffff);
+	er32(ICR);
+
+	if (hw->mac.type == e1000_82571) {
+		/* Install any alternate MAC address into RAR0 */
+		ret_val = e1000_check_alt_mac_addr_generic(hw);
+		if (ret_val)
+			return ret_val;
+
+		e1000e_set_laa_state_82571(hw, true);
+	}
+
+	/* Reinitialize the 82571 serdes link state machine */
+	if (hw->phy.media_type == e1000_media_type_internal_serdes)
+		hw->mac.serdes_link_state = e1000_serdes_link_down;
+
+	return 0;
+}
+
+/**
+ *  e1000_init_hw_82571 - Initialize hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This inits the hardware readying it for operation.
+ **/
+static s32 e1000_init_hw_82571(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 reg_data;
+	s32 ret_val;
+	u16 i, rar_count = mac->rar_entry_count;
+
+	e1000_initialize_hw_bits_82571(hw);
+
+	/* Initialize identification LED */
+	ret_val = mac->ops.id_led_init(hw);
+	/* An error is not fatal and we should not stop init due to this */
+	if (ret_val)
+		e_dbg("Error initializing identification LED\n");
+
+	/* Disabling VLAN filtering */
+	e_dbg("Initializing the IEEE VLAN\n");
+	mac->ops.clear_vfta(hw);
+
+	/* Setup the receive address.
+	 * If, however, a locally administered address was assigned to the
+	 * 82571, we must reserve a RAR for it to work around an issue where
+	 * resetting one port will reload the MAC on the other port.
+	 */
+	if (e1000e_get_laa_state_82571(hw))
+		rar_count--;
+	e1000e_init_rx_addrs(hw, rar_count);
+
+	/* Zero out the Multicast HASH table */
+	e_dbg("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
+
+	/* Setup link and flow control */
+	ret_val = mac->ops.setup_link(hw);
+
+	/* Set the transmit descriptor write-back policy */
+	reg_data = er32(TXDCTL(0));
+	reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) |
+		    E1000_TXDCTL_FULL_TX_DESC_WB | E1000_TXDCTL_COUNT_DESC);
+	ew32(TXDCTL(0), reg_data);
+
+	/* ...for both queues. */
+	switch (mac->type) {
+	case e1000_82573:
+		e1000e_enable_tx_pkt_filtering(hw);
+		fallthrough;
+	case e1000_82574:
+	case e1000_82583:
+		reg_data = er32(GCR);
+		reg_data |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX;
+		ew32(GCR, reg_data);
+		break;
+	default:
+		reg_data = er32(TXDCTL(1));
+		reg_data = ((reg_data & ~E1000_TXDCTL_WTHRESH) |
+			    E1000_TXDCTL_FULL_TX_DESC_WB |
+			    E1000_TXDCTL_COUNT_DESC);
+		ew32(TXDCTL(1), reg_data);
+		break;
+	}
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	e1000_clear_hw_cntrs_82571(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_initialize_hw_bits_82571 - Initialize hardware-dependent bits
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes required hardware-dependent bits needed for normal operation.
+ **/
+static void e1000_initialize_hw_bits_82571(struct e1000_hw *hw)
+{
+	u32 reg;
+
+	/* Transmit Descriptor Control 0 */
+	reg = er32(TXDCTL(0));
+	reg |= BIT(22);
+	ew32(TXDCTL(0), reg);
+
+	/* Transmit Descriptor Control 1 */
+	reg = er32(TXDCTL(1));
+	reg |= BIT(22);
+	ew32(TXDCTL(1), reg);
+
+	/* Transmit Arbitration Control 0 */
+	reg = er32(TARC(0));
+	reg &= ~(0xF << 27);	/* 30:27 */
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		reg |= BIT(23) | BIT(24) | BIT(25) | BIT(26);
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		reg |= BIT(26);
+		break;
+	default:
+		break;
+	}
+	ew32(TARC(0), reg);
+
+	/* Transmit Arbitration Control 1 */
+	reg = er32(TARC(1));
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		reg &= ~(BIT(29) | BIT(30));
+		reg |= BIT(22) | BIT(24) | BIT(25) | BIT(26);
+		if (er32(TCTL) & E1000_TCTL_MULR)
+			reg &= ~BIT(28);
+		else
+			reg |= BIT(28);
+		ew32(TARC(1), reg);
+		break;
+	default:
+		break;
+	}
+
+	/* Device Control */
+	switch (hw->mac.type) {
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		reg = er32(CTRL);
+		reg &= ~BIT(29);
+		ew32(CTRL, reg);
+		break;
+	default:
+		break;
+	}
+
+	/* Extended Device Control */
+	switch (hw->mac.type) {
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		reg = er32(CTRL_EXT);
+		reg &= ~BIT(23);
+		reg |= BIT(22);
+		ew32(CTRL_EXT, reg);
+		break;
+	default:
+		break;
+	}
+
+	if (hw->mac.type == e1000_82571) {
+		reg = er32(PBA_ECC);
+		reg |= E1000_PBA_ECC_CORR_EN;
+		ew32(PBA_ECC, reg);
+	}
+
+	/* Workaround for hardware errata.
+	 * Ensure that DMA Dynamic Clock gating is disabled on 82571 and 82572
+	 */
+	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
+		reg = er32(CTRL_EXT);
+		reg &= ~E1000_CTRL_EXT_DMA_DYN_CLK_EN;
+		ew32(CTRL_EXT, reg);
+	}
+
+	/* Disable IPv6 extension header parsing because some malformed
+	 * IPv6 headers can hang the Rx.
+	 */
+	if (hw->mac.type <= e1000_82573) {
+		reg = er32(RFCTL);
+		reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS);
+		ew32(RFCTL, reg);
+	}
+
+	/* PCI-Ex Control Registers */
+	switch (hw->mac.type) {
+	case e1000_82574:
+	case e1000_82583:
+		reg = er32(GCR);
+		reg |= BIT(22);
+		ew32(GCR, reg);
+
+		/* Workaround for hardware errata.
+		 * apply workaround for hardware errata documented in errata
+		 * docs Fixes issue where some error prone or unreliable PCIe
+		 * completions are occurring, particularly with ASPM enabled.
+		 * Without fix, issue can cause Tx timeouts.
+		 */
+		reg = er32(GCR2);
+		reg |= 1;
+		ew32(GCR2, reg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ *  e1000_clear_vfta_82571 - Clear VLAN filter table
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the register array which contains the VLAN filter table by
+ *  setting all the values to 0.
+ **/
+static void e1000_clear_vfta_82571(struct e1000_hw *hw)
+{
+	u32 offset;
+	u32 vfta_value = 0;
+	u32 vfta_offset = 0;
+	u32 vfta_bit_in_reg = 0;
+
+	switch (hw->mac.type) {
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		if (hw->mng_cookie.vlan_id != 0) {
+			/* The VFTA is a 4096b bit-field, each identifying
+			 * a single VLAN ID.  The following operations
+			 * determine which 32b entry (i.e. offset) into the
+			 * array we want to set the VLAN ID (i.e. bit) of
+			 * the manageability unit.
+			 */
+			vfta_offset = (hw->mng_cookie.vlan_id >>
+				       E1000_VFTA_ENTRY_SHIFT) &
+			    E1000_VFTA_ENTRY_MASK;
+			vfta_bit_in_reg =
+			    BIT(hw->mng_cookie.vlan_id &
+				E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
+		}
+		break;
+	default:
+		break;
+	}
+	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+		/* If the offset we want to clear is the same offset of the
+		 * manageability VLAN ID, then clear all bits except that of
+		 * the manageability unit.
+		 */
+		vfta_value = (offset == vfta_offset) ? vfta_bit_in_reg : 0;
+		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, vfta_value);
+		e1e_flush();
+	}
+}
+
+/**
+ *  e1000_check_mng_mode_82574 - Check manageability is enabled
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the NVM Initialization Control Word 2 and returns true
+ *  (>0) if any manageability is enabled, else false (0).
+ **/
+static bool e1000_check_mng_mode_82574(struct e1000_hw *hw)
+{
+	u16 data;
+
+	e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data);
+	return (data & E1000_NVM_INIT_CTRL2_MNGM) != 0;
+}
+
+/**
+ *  e1000_led_on_82574 - Turn LED on
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn LED on.
+ **/
+static s32 e1000_led_on_82574(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	u32 i;
+
+	ctrl = hw->mac.ledctl_mode2;
+	if (!(E1000_STATUS_LU & er32(STATUS))) {
+		/* If no link, then turn LED on by setting the invert bit
+		 * for each LED that's "on" (0x0E) in ledctl_mode2.
+		 */
+		for (i = 0; i < 4; i++)
+			if (((hw->mac.ledctl_mode2 >> (i * 8)) & 0xFF) ==
+			    E1000_LEDCTL_MODE_LED_ON)
+				ctrl |= (E1000_LEDCTL_LED0_IVRT << (i * 8));
+	}
+	ew32(LEDCTL, ctrl);
+
+	return 0;
+}
+
+/**
+ *  e1000_check_phy_82574 - check 82574 phy hung state
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns whether phy is hung or not
+ **/
+bool e1000_check_phy_82574(struct e1000_hw *hw)
+{
+	u16 status_1kbt = 0;
+	u16 receive_errors = 0;
+	s32 ret_val;
+
+	/* Read PHY Receive Error counter first, if its is max - all F's then
+	 * read the Base1000T status register If both are max then PHY is hung.
+	 */
+	ret_val = e1e_rphy(hw, E1000_RECEIVE_ERROR_COUNTER, &receive_errors);
+	if (ret_val)
+		return false;
+	if (receive_errors == E1000_RECEIVE_ERROR_MAX) {
+		ret_val = e1e_rphy(hw, E1000_BASE1000T_STATUS, &status_1kbt);
+		if (ret_val)
+			return false;
+		if ((status_1kbt & E1000_IDLE_ERROR_COUNT_MASK) ==
+		    E1000_IDLE_ERROR_COUNT_MASK)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ *  e1000_setup_link_82571 - Setup flow control and link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines which flow control settings to use, then configures flow
+ *  control.  Calls the appropriate media-specific link configuration
+ *  function.  Assuming the adapter has a valid link partner, a valid link
+ *  should be established.  Assumes the hardware has previously been reset
+ *  and the transmitter and receiver are not enabled.
+ **/
+static s32 e1000_setup_link_82571(struct e1000_hw *hw)
+{
+	/* 82573 does not have a word in the NVM to determine
+	 * the default flow control setting, so we explicitly
+	 * set it to full.
+	 */
+	switch (hw->mac.type) {
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		if (hw->fc.requested_mode == e1000_fc_default)
+			hw->fc.requested_mode = e1000_fc_full;
+		break;
+	default:
+		break;
+	}
+
+	return e1000e_setup_link_generic(hw);
+}
+
+/**
+ *  e1000_setup_copper_link_82571 - Configure copper link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the link for auto-neg or forced speed and duplex.  Then we check
+ *  for link, once link is established calls to configure collision distance
+ *  and flow control are called.
+ **/
+static s32 e1000_setup_copper_link_82571(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+
+	ctrl = er32(CTRL);
+	ctrl |= E1000_CTRL_SLU;
+	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ew32(CTRL, ctrl);
+
+	switch (hw->phy.type) {
+	case e1000_phy_m88:
+	case e1000_phy_bm:
+		ret_val = e1000e_copper_link_setup_m88(hw);
+		break;
+	case e1000_phy_igp_2:
+		ret_val = e1000e_copper_link_setup_igp(hw);
+		break;
+	default:
+		return -E1000_ERR_PHY;
+	}
+
+	if (ret_val)
+		return ret_val;
+
+	return e1000e_setup_copper_link(hw);
+}
+
+/**
+ *  e1000_setup_fiber_serdes_link_82571 - Setup link for fiber/serdes
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures collision distance and flow control for fiber and serdes links.
+ *  Upon successful setup, poll for link.
+ **/
+static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw)
+{
+	switch (hw->mac.type) {
+	case e1000_82571:
+	case e1000_82572:
+		/* If SerDes loopback mode is entered, there is no form
+		 * of reset to take the adapter out of that mode.  So we
+		 * have to explicitly take the adapter out of loopback
+		 * mode.  This prevents drivers from twiddling their thumbs
+		 * if another tool failed to take it out of loopback mode.
+		 */
+		ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
+		break;
+	default:
+		break;
+	}
+
+	return e1000e_setup_fiber_serdes_link(hw);
+}
+
+/**
+ *  e1000_check_for_serdes_link_82571 - Check for link (Serdes)
+ *  @hw: pointer to the HW structure
+ *
+ *  Reports the link state as up or down.
+ *
+ *  If autonegotiation is supported by the link partner, the link state is
+ *  determined by the result of autonegotiation. This is the most likely case.
+ *  If autonegotiation is not supported by the link partner, and the link
+ *  has a valid signal, force the link up.
+ *
+ *  The link state is represented internally here by 4 states:
+ *
+ *  1) down
+ *  2) autoneg_progress
+ *  3) autoneg_complete (the link successfully autonegotiated)
+ *  4) forced_up (the link has been forced up, it did not autonegotiate)
+ *
+ **/
+static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 rxcw;
+	u32 ctrl;
+	u32 status;
+	u32 txcw;
+	u32 i;
+	s32 ret_val = 0;
+
+	ctrl = er32(CTRL);
+	status = er32(STATUS);
+	er32(RXCW);
+	/* SYNCH bit and IV bit are sticky */
+	usleep_range(10, 20);
+	rxcw = er32(RXCW);
+
+	if ((rxcw & E1000_RXCW_SYNCH) && !(rxcw & E1000_RXCW_IV)) {
+		/* Receiver is synchronized with no invalid bits.  */
+		switch (mac->serdes_link_state) {
+		case e1000_serdes_link_autoneg_complete:
+			if (!(status & E1000_STATUS_LU)) {
+				/* We have lost link, retry autoneg before
+				 * reporting link failure
+				 */
+				mac->serdes_link_state =
+				    e1000_serdes_link_autoneg_progress;
+				mac->serdes_has_link = false;
+				e_dbg("AN_UP     -> AN_PROG\n");
+			} else {
+				mac->serdes_has_link = true;
+			}
+			break;
+
+		case e1000_serdes_link_forced_up:
+			/* If we are receiving /C/ ordered sets, re-enable
+			 * auto-negotiation in the TXCW register and disable
+			 * forced link in the Device Control register in an
+			 * attempt to auto-negotiate with our link partner.
+			 */
+			if (rxcw & E1000_RXCW_C) {
+				/* Enable autoneg, and unforce link up */
+				ew32(TXCW, mac->txcw);
+				ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
+				mac->serdes_link_state =
+				    e1000_serdes_link_autoneg_progress;
+				mac->serdes_has_link = false;
+				e_dbg("FORCED_UP -> AN_PROG\n");
+			} else {
+				mac->serdes_has_link = true;
+			}
+			break;
+
+		case e1000_serdes_link_autoneg_progress:
+			if (rxcw & E1000_RXCW_C) {
+				/* We received /C/ ordered sets, meaning the
+				 * link partner has autonegotiated, and we can
+				 * trust the Link Up (LU) status bit.
+				 */
+				if (status & E1000_STATUS_LU) {
+					mac->serdes_link_state =
+					    e1000_serdes_link_autoneg_complete;
+					e_dbg("AN_PROG   -> AN_UP\n");
+					mac->serdes_has_link = true;
+				} else {
+					/* Autoneg completed, but failed. */
+					mac->serdes_link_state =
+					    e1000_serdes_link_down;
+					e_dbg("AN_PROG   -> DOWN\n");
+				}
+			} else {
+				/* The link partner did not autoneg.
+				 * Force link up and full duplex, and change
+				 * state to forced.
+				 */
+				ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE));
+				ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+				ew32(CTRL, ctrl);
+
+				/* Configure Flow Control after link up. */
+				ret_val = e1000e_config_fc_after_link_up(hw);
+				if (ret_val) {
+					e_dbg("Error config flow control\n");
+					break;
+				}
+				mac->serdes_link_state =
+				    e1000_serdes_link_forced_up;
+				mac->serdes_has_link = true;
+				e_dbg("AN_PROG   -> FORCED_UP\n");
+			}
+			break;
+
+		case e1000_serdes_link_down:
+		default:
+			/* The link was down but the receiver has now gained
+			 * valid sync, so lets see if we can bring the link
+			 * up.
+			 */
+			ew32(TXCW, mac->txcw);
+			ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
+			mac->serdes_link_state =
+			    e1000_serdes_link_autoneg_progress;
+			mac->serdes_has_link = false;
+			e_dbg("DOWN      -> AN_PROG\n");
+			break;
+		}
+	} else {
+		if (!(rxcw & E1000_RXCW_SYNCH)) {
+			mac->serdes_has_link = false;
+			mac->serdes_link_state = e1000_serdes_link_down;
+			e_dbg("ANYSTATE  -> DOWN\n");
+		} else {
+			/* Check several times, if SYNCH bit and CONFIG
+			 * bit both are consistently 1 then simply ignore
+			 * the IV bit and restart Autoneg
+			 */
+			for (i = 0; i < AN_RETRY_COUNT; i++) {
+				usleep_range(10, 20);
+				rxcw = er32(RXCW);
+				if ((rxcw & E1000_RXCW_SYNCH) &&
+				    (rxcw & E1000_RXCW_C))
+					continue;
+
+				if (rxcw & E1000_RXCW_IV) {
+					mac->serdes_has_link = false;
+					mac->serdes_link_state =
+					    e1000_serdes_link_down;
+					e_dbg("ANYSTATE  -> DOWN\n");
+					break;
+				}
+			}
+
+			if (i == AN_RETRY_COUNT) {
+				txcw = er32(TXCW);
+				txcw |= E1000_TXCW_ANE;
+				ew32(TXCW, txcw);
+				mac->serdes_link_state =
+				    e1000_serdes_link_autoneg_progress;
+				mac->serdes_has_link = false;
+				e_dbg("ANYSTATE  -> AN_PROG\n");
+			}
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_valid_led_default_82571 - Verify a valid default LED config
+ *  @hw: pointer to the HW structure
+ *  @data: pointer to the NVM (EEPROM)
+ *
+ *  Read the EEPROM for the current default LED configuration.  If the
+ *  LED configuration is not valid, set to a valid LED configuration.
+ **/
+static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	switch (hw->mac.type) {
+	case e1000_82573:
+	case e1000_82574:
+	case e1000_82583:
+		if (*data == ID_LED_RESERVED_F746)
+			*data = ID_LED_DEFAULT_82573;
+		break;
+	default:
+		if (*data == ID_LED_RESERVED_0000 ||
+		    *data == ID_LED_RESERVED_FFFF)
+			*data = ID_LED_DEFAULT;
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_get_laa_state_82571 - Get locally administered address state
+ *  @hw: pointer to the HW structure
+ *
+ *  Retrieve and return the current locally administered address state.
+ **/
+bool e1000e_get_laa_state_82571(struct e1000_hw *hw)
+{
+	if (hw->mac.type != e1000_82571)
+		return false;
+
+	return hw->dev_spec.e82571.laa_is_present;
+}
+
+/**
+ *  e1000e_set_laa_state_82571 - Set locally administered address state
+ *  @hw: pointer to the HW structure
+ *  @state: enable/disable locally administered address
+ *
+ *  Enable/Disable the current locally administered address state.
+ **/
+void e1000e_set_laa_state_82571(struct e1000_hw *hw, bool state)
+{
+	if (hw->mac.type != e1000_82571)
+		return;
+
+	hw->dev_spec.e82571.laa_is_present = state;
+
+	/* If workaround is activated... */
+	if (state)
+		/* Hold a copy of the LAA in RAR[14] This is done so that
+		 * between the time RAR[0] gets clobbered and the time it
+		 * gets fixed, the actual LAA is in one of the RARs and no
+		 * incoming packets directed to this port are dropped.
+		 * Eventually the LAA will be in RAR[0] and RAR[14].
+		 */
+		hw->mac.ops.rar_set(hw, hw->mac.addr,
+				    hw->mac.rar_entry_count - 1);
+}
+
+/**
+ *  e1000_fix_nvm_checksum_82571 - Fix EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Verifies that the EEPROM has completed the update.  After updating the
+ *  EEPROM, we need to check bit 15 in work 0x23 for the checksum fix.  If
+ *  the checksum fix is not implemented, we need to set the bit and update
+ *  the checksum.  Otherwise, if bit 15 is set and the checksum is incorrect,
+ *  we need to return bad checksum.
+ **/
+static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	s32 ret_val;
+	u16 data;
+
+	if (nvm->type != e1000_nvm_flash_hw)
+		return 0;
+
+	/* Check bit 4 of word 10h.  If it is 0, firmware is done updating
+	 * 10h-12h.  Checksum may need to be fixed.
+	 */
+	ret_val = e1000_read_nvm(hw, 0x10, 1, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (!(data & 0x10)) {
+		/* Read 0x23 and check bit 15.  This bit is a 1
+		 * when the checksum has already been fixed.  If
+		 * the checksum is still wrong and this bit is a
+		 * 1, we need to return bad checksum.  Otherwise,
+		 * we need to set this bit to a 1 and update the
+		 * checksum.
+		 */
+		ret_val = e1000_read_nvm(hw, 0x23, 1, &data);
+		if (ret_val)
+			return ret_val;
+
+		if (!(data & 0x8000)) {
+			data |= 0x8000;
+			ret_val = e1000_write_nvm(hw, 0x23, 1, &data);
+			if (ret_val)
+				return ret_val;
+			ret_val = e1000e_update_nvm_checksum(hw);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_read_mac_addr_82571 - Read device MAC address
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_read_mac_addr_82571(struct e1000_hw *hw)
+{
+	if (hw->mac.type == e1000_82571) {
+		s32 ret_val;
+
+		/* If there's an alternate MAC address place it in RAR0
+		 * so that it will override the Si installed default perm
+		 * address.
+		 */
+		ret_val = e1000_check_alt_mac_addr_generic(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return e1000_read_mac_addr_generic(hw);
+}
+
+/**
+ * e1000_power_down_phy_copper_82571 - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ **/
+static void e1000_power_down_phy_copper_82571(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	struct e1000_mac_info *mac = &hw->mac;
+
+	if (!phy->ops.check_reset_block)
+		return;
+
+	/* If the management interface is not enabled, then power down */
+	if (!(mac->ops.check_mng_mode(hw) || phy->ops.check_reset_block(hw)))
+		e1000_power_down_phy_copper(hw);
+}
+
+/**
+ *  e1000_clear_hw_cntrs_82571 - Clear device specific hardware counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the hardware counters by reading the counter registers.
+ **/
+static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw)
+{
+	e1000e_clear_hw_cntrs_base(hw);
+
+	er32(PRC64);
+	er32(PRC127);
+	er32(PRC255);
+	er32(PRC511);
+	er32(PRC1023);
+	er32(PRC1522);
+	er32(PTC64);
+	er32(PTC127);
+	er32(PTC255);
+	er32(PTC511);
+	er32(PTC1023);
+	er32(PTC1522);
+
+	er32(ALGNERRC);
+	er32(RXERRC);
+	er32(TNCRS);
+	er32(CEXTERR);
+	er32(TSCTC);
+	er32(TSCTFC);
+
+	er32(MGTPRC);
+	er32(MGTPDC);
+	er32(MGTPTC);
+
+	er32(IAC);
+	er32(ICRXOC);
+
+	er32(ICRXPTC);
+	er32(ICRXATC);
+	er32(ICTXPTC);
+	er32(ICTXATC);
+	er32(ICTXQEC);
+	er32(ICTXQMTC);
+	er32(ICRXDMTC);
+}
+
+static const struct e1000_mac_operations e82571_mac_ops = {
+	/* .check_mng_mode: mac type dependent */
+	/* .check_for_link: media type dependent */
+	.id_led_init		= e1000e_id_led_init_generic,
+	.cleanup_led		= e1000e_cleanup_led_generic,
+	.clear_hw_cntrs		= e1000_clear_hw_cntrs_82571,
+	.get_bus_info		= e1000e_get_bus_info_pcie,
+	.set_lan_id		= e1000_set_lan_id_multi_port_pcie,
+	/* .get_link_up_info: media type dependent */
+	/* .led_on: mac type dependent */
+	.led_off		= e1000e_led_off_generic,
+	.update_mc_addr_list	= e1000e_update_mc_addr_list_generic,
+	.write_vfta		= e1000_write_vfta_generic,
+	.clear_vfta		= e1000_clear_vfta_82571,
+	.reset_hw		= e1000_reset_hw_82571,
+	.init_hw		= e1000_init_hw_82571,
+	.setup_link		= e1000_setup_link_82571,
+	/* .setup_physical_interface: media type dependent */
+	.setup_led		= e1000e_setup_led_generic,
+	.config_collision_dist	= e1000e_config_collision_dist_generic,
+	.read_mac_addr		= e1000_read_mac_addr_82571,
+	.rar_set		= e1000e_rar_set_generic,
+	.rar_get_count		= e1000e_rar_get_count_generic,
+};
+
+static const struct e1000_phy_operations e82_phy_ops_igp = {
+	.acquire		= e1000_get_hw_semaphore_82571,
+	.check_polarity		= e1000_check_polarity_igp,
+	.check_reset_block	= e1000e_check_reset_block_generic,
+	.commit			= NULL,
+	.force_speed_duplex	= e1000e_phy_force_speed_duplex_igp,
+	.get_cfg_done		= e1000_get_cfg_done_82571,
+	.get_cable_length	= e1000e_get_cable_length_igp_2,
+	.get_info		= e1000e_get_phy_info_igp,
+	.read_reg		= e1000e_read_phy_reg_igp,
+	.release		= e1000_put_hw_semaphore_82571,
+	.reset			= e1000e_phy_hw_reset_generic,
+	.set_d0_lplu_state	= e1000_set_d0_lplu_state_82571,
+	.set_d3_lplu_state	= e1000e_set_d3_lplu_state,
+	.write_reg		= e1000e_write_phy_reg_igp,
+	.cfg_on_link_up		= NULL,
+};
+
+static const struct e1000_phy_operations e82_phy_ops_m88 = {
+	.acquire		= e1000_get_hw_semaphore_82571,
+	.check_polarity		= e1000_check_polarity_m88,
+	.check_reset_block	= e1000e_check_reset_block_generic,
+	.commit			= e1000e_phy_sw_reset,
+	.force_speed_duplex	= e1000e_phy_force_speed_duplex_m88,
+	.get_cfg_done		= e1000e_get_cfg_done_generic,
+	.get_cable_length	= e1000e_get_cable_length_m88,
+	.get_info		= e1000e_get_phy_info_m88,
+	.read_reg		= e1000e_read_phy_reg_m88,
+	.release		= e1000_put_hw_semaphore_82571,
+	.reset			= e1000e_phy_hw_reset_generic,
+	.set_d0_lplu_state	= e1000_set_d0_lplu_state_82571,
+	.set_d3_lplu_state	= e1000e_set_d3_lplu_state,
+	.write_reg		= e1000e_write_phy_reg_m88,
+	.cfg_on_link_up		= NULL,
+};
+
+static const struct e1000_phy_operations e82_phy_ops_bm = {
+	.acquire		= e1000_get_hw_semaphore_82571,
+	.check_polarity		= e1000_check_polarity_m88,
+	.check_reset_block	= e1000e_check_reset_block_generic,
+	.commit			= e1000e_phy_sw_reset,
+	.force_speed_duplex	= e1000e_phy_force_speed_duplex_m88,
+	.get_cfg_done		= e1000e_get_cfg_done_generic,
+	.get_cable_length	= e1000e_get_cable_length_m88,
+	.get_info		= e1000e_get_phy_info_m88,
+	.read_reg		= e1000e_read_phy_reg_bm2,
+	.release		= e1000_put_hw_semaphore_82571,
+	.reset			= e1000e_phy_hw_reset_generic,
+	.set_d0_lplu_state	= e1000_set_d0_lplu_state_82571,
+	.set_d3_lplu_state	= e1000e_set_d3_lplu_state,
+	.write_reg		= e1000e_write_phy_reg_bm2,
+	.cfg_on_link_up		= NULL,
+};
+
+static const struct e1000_nvm_operations e82571_nvm_ops = {
+	.acquire		= e1000_acquire_nvm_82571,
+	.read			= e1000e_read_nvm_eerd,
+	.release		= e1000_release_nvm_82571,
+	.reload			= e1000e_reload_nvm_generic,
+	.update			= e1000_update_nvm_checksum_82571,
+	.valid_led_default	= e1000_valid_led_default_82571,
+	.validate		= e1000_validate_nvm_checksum_82571,
+	.write			= e1000_write_nvm_82571,
+};
+
+const struct e1000_info e1000_82571_info = {
+	.mac			= e1000_82571,
+	.flags			= FLAG_HAS_HW_VLAN_FILTER
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_HAS_WOL
+				  | FLAG_APME_IN_CTRL3
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_SMART_POWER_DOWN
+				  | FLAG_RESET_OVERWRITES_LAA /* errata */
+				  | FLAG_TARC_SPEED_MODE_BIT /* errata */
+				  | FLAG_APME_CHECK_PORT_B,
+	.flags2			= FLAG2_DISABLE_ASPM_L1 /* errata 13 */
+				  | FLAG2_DMA_BURST,
+	.pba			= 38,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.get_variants		= e1000_get_variants_82571,
+	.mac_ops		= &e82571_mac_ops,
+	.phy_ops		= &e82_phy_ops_igp,
+	.nvm_ops		= &e82571_nvm_ops,
+};
+
+const struct e1000_info e1000_82572_info = {
+	.mac			= e1000_82572,
+	.flags			= FLAG_HAS_HW_VLAN_FILTER
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_HAS_WOL
+				  | FLAG_APME_IN_CTRL3
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_TARC_SPEED_MODE_BIT, /* errata */
+	.flags2			= FLAG2_DISABLE_ASPM_L1 /* errata 13 */
+				  | FLAG2_DMA_BURST,
+	.pba			= 38,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.get_variants		= e1000_get_variants_82571,
+	.mac_ops		= &e82571_mac_ops,
+	.phy_ops		= &e82_phy_ops_igp,
+	.nvm_ops		= &e82571_nvm_ops,
+};
+
+const struct e1000_info e1000_82573_info = {
+	.mac			= e1000_82573,
+	.flags			= FLAG_HAS_HW_VLAN_FILTER
+				  | FLAG_HAS_WOL
+				  | FLAG_APME_IN_CTRL3
+				  | FLAG_HAS_SMART_POWER_DOWN
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_SWSM_ON_LOAD,
+	.flags2			= FLAG2_DISABLE_ASPM_L1
+				  | FLAG2_DISABLE_ASPM_L0S,
+	.pba			= 20,
+	.max_hw_frame_size	= VLAN_ETH_FRAME_LEN + ETH_FCS_LEN,
+	.get_variants		= e1000_get_variants_82571,
+	.mac_ops		= &e82571_mac_ops,
+	.phy_ops		= &e82_phy_ops_m88,
+	.nvm_ops		= &e82571_nvm_ops,
+};
+
+const struct e1000_info e1000_82574_info = {
+	.mac			= e1000_82574,
+	.flags			= FLAG_HAS_HW_VLAN_FILTER
+				  | FLAG_HAS_MSIX
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_APME_IN_CTRL3
+				  | FLAG_HAS_SMART_POWER_DOWN
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_CTRLEXT_ON_LOAD,
+	.flags2			 = FLAG2_CHECK_PHY_HANG
+				  | FLAG2_DISABLE_ASPM_L0S
+				  | FLAG2_DISABLE_ASPM_L1
+				  | FLAG2_NO_DISABLE_RX
+				  | FLAG2_DMA_BURST
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
+	.pba			= 32,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.get_variants		= e1000_get_variants_82571,
+	.mac_ops		= &e82571_mac_ops,
+	.phy_ops		= &e82_phy_ops_bm,
+	.nvm_ops		= &e82571_nvm_ops,
+};
+
+const struct e1000_info e1000_82583_info = {
+	.mac			= e1000_82583,
+	.flags			= FLAG_HAS_HW_VLAN_FILTER
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_APME_IN_CTRL3
+				  | FLAG_HAS_SMART_POWER_DOWN
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_HAS_CTRLEXT_ON_LOAD,
+	.flags2			= FLAG2_DISABLE_ASPM_L0S
+				  | FLAG2_DISABLE_ASPM_L1
+				  | FLAG2_NO_DISABLE_RX
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
+	.pba			= 32,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.get_variants		= e1000_get_variants_82571,
+	.mac_ops		= &e82571_mac_ops,
+	.phy_ops		= &e82_phy_ops_bm,
+	.nvm_ops		= &e82571_nvm_ops,
+};
diff --git a/drivers/net/ethernet/intel/e1000e/82571.h b/drivers/net/ethernet/intel/e1000e/82571.h
new file mode 100644
index 0000000000..834c238d02
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/82571.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_82571_H_
+#define _E1000E_82571_H_
+
+#define ID_LED_RESERVED_F746	0xF746
+#define ID_LED_DEFAULT_82573	((ID_LED_DEF1_DEF2 << 12) | \
+				 (ID_LED_OFF1_ON2  <<  8) | \
+				 (ID_LED_DEF1_DEF2 <<  4) | \
+				 (ID_LED_DEF1_DEF2))
+
+#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX	0x08000000
+#define AN_RETRY_COUNT		5	/* Autoneg Retry Count value */
+
+/* Intr Throttling - RW */
+#define E1000_EITR_82574(_n)	(0x000E8 + (0x4 * (_n)))
+
+#define E1000_EIAC_82574	0x000DC	/* Ext. Interrupt Auto Clear - RW */
+#define E1000_EIAC_MASK_82574	0x01F00000
+
+#define E1000_IVAR_INT_ALLOC_VALID	0x8
+
+/* Manageability Operation Mode mask */
+#define E1000_NVM_INIT_CTRL2_MNGM	0x6000
+
+#define E1000_BASE1000T_STATUS		10
+#define E1000_IDLE_ERROR_COUNT_MASK	0xFF
+#define E1000_RECEIVE_ERROR_COUNTER	21
+#define E1000_RECEIVE_ERROR_MAX		0xFFFF
+bool e1000_check_phy_82574(struct e1000_hw *hw);
+bool e1000e_get_laa_state_82571(struct e1000_hw *hw);
+void e1000e_set_laa_state_82571(struct e1000_hw *hw, bool state);
+
+#endif
diff --git a/drivers/net/ethernet/intel/e1000e/Makefile b/drivers/net/ethernet/intel/e1000e/Makefile
new file mode 100644
index 0000000000..0baa15503c
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 1999 - 2018 Intel Corporation.
+
+#
+# Makefile for the Intel(R) PRO/1000 ethernet driver
+#
+
+ccflags-y += -I$(src)
+subdir-ccflags-y += -I$(src)
+
+obj-$(CONFIG_E1000E) += e1000e.o
+
+e1000e-objs := 82571.o ich8lan.o 80003es2lan.o \
+	       mac.o manage.o nvm.o phy.o \
+	       param.o ethtool.o netdev.o ptp.o
+
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
new file mode 100644
index 0000000000..63c3c79380
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -0,0 +1,811 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000_DEFINES_H_
+#define _E1000_DEFINES_H_
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE  8
+#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define E1000_WUC_APME		0x00000001	/* APM Enable */
+#define E1000_WUC_PME_EN	0x00000002	/* PME Enable */
+#define E1000_WUC_PME_STATUS	0x00000004	/* PME Status */
+#define E1000_WUC_APMPME	0x00000008	/* Assert PME on APM Wakeup */
+#define E1000_WUC_PHY_WAKE	0x00000100	/* if PHY supports wakeup */
+
+/* Wake Up Filter Control */
+#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define E1000_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
+#define E1000_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
+#define E1000_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
+#define E1000_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
+#define E1000_WUFC_ARP  0x00000020 /* ARP Request Packet Wakeup Enable */
+
+/* Wake Up Status */
+#define E1000_WUS_LNKC         E1000_WUFC_LNKC
+#define E1000_WUS_MAG          E1000_WUFC_MAG
+#define E1000_WUS_EX           E1000_WUFC_EX
+#define E1000_WUS_MC           E1000_WUFC_MC
+#define E1000_WUS_BC           E1000_WUFC_BC
+
+/* Extended Device Control */
+#define E1000_CTRL_EXT_LPCD  0x00000004     /* LCD Power Cycle Done */
+#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Definable Pin 3 */
+#define E1000_CTRL_EXT_FORCE_SMBUS 0x00000800 /* Force SMBus mode */
+#define E1000_CTRL_EXT_EE_RST    0x00002000 /* Reinitialize from EEPROM */
+#define E1000_CTRL_EXT_SPD_BYPS  0x00008000 /* Speed Select Bypass */
+#define E1000_CTRL_EXT_RO_DIS    0x00020000 /* Relaxed Ordering disable */
+#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clock Gating */
+#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES  0x00C00000
+#define E1000_CTRL_EXT_EIAME          0x01000000
+#define E1000_CTRL_EXT_DRV_LOAD       0x10000000 /* Driver loaded bit for FW */
+#define E1000_CTRL_EXT_IAME		0x08000000 /* Int ACK Auto-mask */
+#define E1000_CTRL_EXT_PBA_CLR        0x80000000 /* PBA Clear */
+#define E1000_CTRL_EXT_LSECCK         0x00001000
+#define E1000_CTRL_EXT_PHYPDEN        0x00100000
+
+/* Receive Descriptor bit definitions */
+#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */
+#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
+#define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
+#define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
+#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum calculated */
+#define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
+#define E1000_RXD_ERR_CE        0x01    /* CRC Error */
+#define E1000_RXD_ERR_SE        0x02    /* Symbol Error */
+#define E1000_RXD_ERR_SEQ       0x04    /* Sequence Error */
+#define E1000_RXD_ERR_CXE       0x10    /* Carrier Extension Error */
+#define E1000_RXD_ERR_TCPE      0x20    /* TCP/UDP Checksum Error */
+#define E1000_RXD_ERR_IPE       0x40    /* IP Checksum Error */
+#define E1000_RXD_ERR_RXE       0x80    /* Rx Data Error */
+#define E1000_RXD_SPC_VLAN_MASK 0x0FFF  /* VLAN ID is in lower 12 bits */
+
+#define E1000_RXDEXT_STATERR_TST   0x00000100	/* Time Stamp taken */
+#define E1000_RXDEXT_STATERR_CE    0x01000000
+#define E1000_RXDEXT_STATERR_SE    0x02000000
+#define E1000_RXDEXT_STATERR_SEQ   0x04000000
+#define E1000_RXDEXT_STATERR_CXE   0x10000000
+#define E1000_RXDEXT_STATERR_RXE   0x80000000
+
+/* mask to determine if packets should be dropped due to frame errors */
+#define E1000_RXD_ERR_FRAME_ERR_MASK ( \
+	E1000_RXD_ERR_CE  |		\
+	E1000_RXD_ERR_SE  |		\
+	E1000_RXD_ERR_SEQ |		\
+	E1000_RXD_ERR_CXE |		\
+	E1000_RXD_ERR_RXE)
+
+/* Same mask, but for extended and packet split descriptors */
+#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
+	E1000_RXDEXT_STATERR_CE  |	\
+	E1000_RXDEXT_STATERR_SE  |	\
+	E1000_RXDEXT_STATERR_SEQ |	\
+	E1000_RXDEXT_STATERR_CXE |	\
+	E1000_RXDEXT_STATERR_RXE)
+
+#define E1000_MRQC_RSS_FIELD_MASK              0xFFFF0000
+#define E1000_MRQC_RSS_FIELD_IPV4_TCP          0x00010000
+#define E1000_MRQC_RSS_FIELD_IPV4              0x00020000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX       0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6              0x00100000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP          0x00200000
+
+#define E1000_RXDPS_HDRSTAT_HDRSP              0x00008000
+
+/* Management Control */
+#define E1000_MANC_SMBUS_EN      0x00000001 /* SMBus Enabled - RO */
+#define E1000_MANC_ASF_EN        0x00000002 /* ASF Enabled - RO */
+#define E1000_MANC_ARP_EN        0x00002000 /* Enable ARP Request Filtering */
+#define E1000_MANC_RCV_TCO_EN    0x00020000 /* Receive TCO Packets Enabled */
+#define E1000_MANC_BLK_PHY_RST_ON_IDE   0x00040000 /* Block phy resets */
+/* Enable MAC address filtering */
+#define E1000_MANC_EN_MAC_ADDR_FILTER   0x00100000
+/* Enable MNG packets to host memory */
+#define E1000_MANC_EN_MNG2HOST   0x00200000
+
+#define E1000_MANC2H_PORT_623    0x00000020 /* Port 0x26f */
+#define E1000_MANC2H_PORT_664    0x00000040 /* Port 0x298 */
+#define E1000_MDEF_PORT_623      0x00000800 /* Port 0x26f */
+#define E1000_MDEF_PORT_664      0x00000400 /* Port 0x298 */
+
+/* Receive Control */
+#define E1000_RCTL_EN             0x00000002    /* enable */
+#define E1000_RCTL_SBP            0x00000004    /* store bad packet */
+#define E1000_RCTL_UPE            0x00000008    /* unicast promiscuous enable */
+#define E1000_RCTL_MPE            0x00000010    /* multicast promiscuous enab */
+#define E1000_RCTL_LPE            0x00000020    /* long packet enable */
+#define E1000_RCTL_LBM_NO         0x00000000    /* no loopback mode */
+#define E1000_RCTL_LBM_MAC        0x00000040    /* MAC loopback mode */
+#define E1000_RCTL_LBM_TCVR       0x000000C0    /* tcvr loopback mode */
+#define E1000_RCTL_DTYP_PS        0x00000400    /* Packet Split descriptor */
+#define E1000_RCTL_RDMTS_HALF     0x00000000    /* Rx desc min threshold size */
+#define E1000_RCTL_RDMTS_HEX      0x00010000
+#define E1000_RCTL_MO_SHIFT       12            /* multicast offset shift */
+#define E1000_RCTL_MO_3           0x00003000    /* multicast offset 15:4 */
+#define E1000_RCTL_BAM            0x00008000    /* broadcast enable */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */
+#define E1000_RCTL_SZ_2048        0x00000000    /* Rx buffer size 2048 */
+#define E1000_RCTL_SZ_1024        0x00010000    /* Rx buffer size 1024 */
+#define E1000_RCTL_SZ_512         0x00020000    /* Rx buffer size 512 */
+#define E1000_RCTL_SZ_256         0x00030000    /* Rx buffer size 256 */
+/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */
+#define E1000_RCTL_SZ_16384       0x00010000    /* Rx buffer size 16384 */
+#define E1000_RCTL_SZ_8192        0x00020000    /* Rx buffer size 8192 */
+#define E1000_RCTL_SZ_4096        0x00030000    /* Rx buffer size 4096 */
+#define E1000_RCTL_VFE            0x00040000    /* vlan filter enable */
+#define E1000_RCTL_CFIEN          0x00080000    /* canonical form enable */
+#define E1000_RCTL_CFI            0x00100000    /* canonical form indicator */
+#define E1000_RCTL_DPF            0x00400000    /* Discard Pause Frames */
+#define E1000_RCTL_PMCF           0x00800000    /* pass MAC control frames */
+#define E1000_RCTL_BSEX           0x02000000    /* Buffer size extension */
+#define E1000_RCTL_SECRC          0x04000000    /* Strip Ethernet CRC */
+
+/* Use byte values for the following shift parameters
+ * Usage:
+ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE0_MASK) |
+ *                ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE1_MASK) |
+ *                ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE2_MASK) |
+ *                ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
+ *                  E1000_PSRCTL_BSIZE3_MASK))
+ * where value0 = [128..16256],  default=256
+ *       value1 = [1024..64512], default=4096
+ *       value2 = [0..64512],    default=4096
+ *       value3 = [0..64512],    default=0
+ */
+
+#define E1000_PSRCTL_BSIZE0_MASK   0x0000007F
+#define E1000_PSRCTL_BSIZE1_MASK   0x00003F00
+#define E1000_PSRCTL_BSIZE2_MASK   0x003F0000
+#define E1000_PSRCTL_BSIZE3_MASK   0x3F000000
+
+#define E1000_PSRCTL_BSIZE0_SHIFT  7            /* Shift _right_ 7 */
+#define E1000_PSRCTL_BSIZE1_SHIFT  2            /* Shift _right_ 2 */
+#define E1000_PSRCTL_BSIZE2_SHIFT  6            /* Shift _left_ 6 */
+#define E1000_PSRCTL_BSIZE3_SHIFT 14            /* Shift _left_ 14 */
+
+/* SWFW_SYNC Definitions */
+#define E1000_SWFW_EEP_SM   0x1
+#define E1000_SWFW_PHY0_SM  0x2
+#define E1000_SWFW_PHY1_SM  0x4
+#define E1000_SWFW_CSR_SM   0x8
+
+/* Device Control */
+#define E1000_CTRL_FD       0x00000001  /* Full duplex.0=half; 1=full */
+#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */
+#define E1000_CTRL_LRST     0x00000008  /* Link reset. 0=normal,1=reset */
+#define E1000_CTRL_ASDE     0x00000020  /* Auto-speed detect enable */
+#define E1000_CTRL_SLU      0x00000040  /* Set link up (Force Link) */
+#define E1000_CTRL_ILOS     0x00000080  /* Invert Loss-Of Signal */
+#define E1000_CTRL_SPD_SEL  0x00000300  /* Speed Select Mask */
+#define E1000_CTRL_SPD_10   0x00000000  /* Force 10Mb */
+#define E1000_CTRL_SPD_100  0x00000100  /* Force 100Mb */
+#define E1000_CTRL_SPD_1000 0x00000200  /* Force 1Gb */
+#define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
+#define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
+#define E1000_CTRL_LANPHYPC_OVERRIDE 0x00010000 /* SW control of LANPHYPC */
+#define E1000_CTRL_LANPHYPC_VALUE    0x00020000 /* SW value of LANPHYPC */
+#define E1000_CTRL_MEHE     0x00080000  /* Memory Error Handling Enable */
+#define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
+#define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
+#define E1000_CTRL_ADVD3WUC 0x00100000  /* D3 WUC */
+#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */
+#define E1000_CTRL_SWDPIO0  0x00400000  /* SWDPIN 0 Input or output */
+#define E1000_CTRL_RST      0x04000000  /* Global reset */
+#define E1000_CTRL_RFCE     0x08000000  /* Receive Flow Control enable */
+#define E1000_CTRL_TFCE     0x10000000  /* Transmit flow control enable */
+#define E1000_CTRL_VME      0x40000000  /* IEEE VLAN mode enable */
+#define E1000_CTRL_PHY_RST  0x80000000  /* PHY Reset */
+
+#define E1000_PCS_LCTL_FORCE_FCTRL	0x80
+
+#define E1000_PCS_LSTS_AN_COMPLETE	0x10000
+
+/* Device Status */
+#define E1000_STATUS_FD         0x00000001      /* Full duplex.0=half,1=full */
+#define E1000_STATUS_LU         0x00000002      /* Link up.0=no,1=link */
+#define E1000_STATUS_FUNC_MASK  0x0000000C      /* PCI Function Mask */
+#define E1000_STATUS_FUNC_SHIFT 2
+#define E1000_STATUS_FUNC_1     0x00000004      /* Function 1 */
+#define E1000_STATUS_TXOFF      0x00000010      /* transmission paused */
+#define E1000_STATUS_SPEED_MASK 0x000000C0
+#define E1000_STATUS_SPEED_10   0x00000000      /* Speed 10Mb/s */
+#define E1000_STATUS_SPEED_100  0x00000040      /* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000 0x00000080      /* Speed 1000Mb/s */
+#define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init Completion by NVM */
+#define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset Asserted */
+#define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req status */
+
+/* PCIm function state */
+#define E1000_STATUS_PCIM_STATE	0x40000000
+
+#define HALF_DUPLEX 1
+#define FULL_DUPLEX 2
+
+#define ADVERTISE_10_HALF                 0x0001
+#define ADVERTISE_10_FULL                 0x0002
+#define ADVERTISE_100_HALF                0x0004
+#define ADVERTISE_100_FULL                0x0008
+#define ADVERTISE_1000_HALF               0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL               0x0020
+
+/* 1000/H is not supported, nor spec-compliant. */
+#define E1000_ALL_SPEED_DUPLEX	( \
+	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+	ADVERTISE_100_FULL | ADVERTISE_1000_FULL)
+#define E1000_ALL_NOT_GIG	( \
+	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+	ADVERTISE_100_FULL)
+#define E1000_ALL_100_SPEED	(ADVERTISE_100_HALF | ADVERTISE_100_FULL)
+#define E1000_ALL_10_SPEED	(ADVERTISE_10_HALF | ADVERTISE_10_FULL)
+#define E1000_ALL_HALF_DUPLEX	(ADVERTISE_10_HALF | ADVERTISE_100_HALF)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT   E1000_ALL_SPEED_DUPLEX
+
+/* LED Control */
+#define E1000_PHY_LED0_MODE_MASK          0x00000007
+#define E1000_PHY_LED0_IVRT               0x00000008
+#define E1000_PHY_LED0_MASK               0x0000001F
+
+#define E1000_LEDCTL_LED0_MODE_MASK       0x0000000F
+#define E1000_LEDCTL_LED0_MODE_SHIFT      0
+#define E1000_LEDCTL_LED0_IVRT            0x00000040
+#define E1000_LEDCTL_LED0_BLINK           0x00000080
+
+#define E1000_LEDCTL_MODE_LINK_UP       0x2
+#define E1000_LEDCTL_MODE_LED_ON        0xE
+#define E1000_LEDCTL_MODE_LED_OFF       0xF
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_DTYP_D     0x00100000 /* Data Descriptor */
+#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
+#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_TXD_CMD_IC     0x04000000 /* Insert Checksum */
+#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
+#define E1000_TXD_CMD_RPS    0x10000000 /* Report Packet Sent */
+#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
+#define E1000_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
+#define E1000_TXD_CMD_IDE    0x80000000 /* Enable Tidv register */
+#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
+#define E1000_TXD_STAT_EC    0x00000002 /* Excess Collisions */
+#define E1000_TXD_STAT_LC    0x00000004 /* Late Collisions */
+#define E1000_TXD_STAT_TU    0x00000008 /* Transmit underrun */
+#define E1000_TXD_CMD_TCP    0x01000000 /* TCP packet */
+#define E1000_TXD_CMD_IP     0x02000000 /* IP packet */
+#define E1000_TXD_CMD_TSE    0x04000000 /* TCP Seg enable */
+#define E1000_TXD_STAT_TC    0x00000004 /* Tx Underrun */
+#define E1000_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
+
+/* Transmit Control */
+#define E1000_TCTL_EN     0x00000002    /* enable Tx */
+#define E1000_TCTL_PSP    0x00000008    /* pad short packets */
+#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */
+#define E1000_TCTL_COLD   0x003ff000    /* collision distance */
+#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
+#define E1000_TCTL_MULR   0x10000000    /* Multiple request support */
+
+/* SerDes Control */
+#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
+#define E1000_SCTL_ENABLE_SERDES_LOOPBACK	0x0410
+
+/* Receive Checksum Control */
+#define E1000_RXCSUM_TUOFL     0x00000200   /* TCP / UDP checksum offload */
+#define E1000_RXCSUM_IPPCSE    0x00001000   /* IP payload checksum enable */
+#define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
+
+/* Header split receive */
+#define E1000_RFCTL_NFSW_DIS            0x00000040
+#define E1000_RFCTL_NFSR_DIS            0x00000080
+#define E1000_RFCTL_ACK_DIS             0x00001000
+#define E1000_RFCTL_EXTEN               0x00008000
+#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
+#define E1000_RFCTL_NEW_IPV6_EXT_DIS    0x00020000
+
+/* Collision related configuration parameters */
+#define E1000_COLLISION_THRESHOLD       15
+#define E1000_CT_SHIFT                  4
+#define E1000_COLLISION_DISTANCE        63
+#define E1000_COLD_SHIFT                12
+
+/* Default values for the transmit IPG register */
+#define DEFAULT_82543_TIPG_IPGT_COPPER 8
+
+#define E1000_TIPG_IPGT_MASK  0x000003FF
+
+#define DEFAULT_82543_TIPG_IPGR1 8
+#define E1000_TIPG_IPGR1_SHIFT  10
+
+#define DEFAULT_82543_TIPG_IPGR2 6
+#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7
+#define E1000_TIPG_IPGR2_SHIFT  20
+
+#define MAX_JUMBO_FRAME_SIZE    0x3F00
+#define E1000_TX_PTR_GAP		0x1F
+
+/* Extended Configuration Control and Size */
+#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP      0x00000020
+#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE       0x00000001
+#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE       0x00000008
+#define E1000_EXTCNF_CTRL_SWFLAG                 0x00000020
+#define E1000_EXTCNF_CTRL_GATE_PHY_CFG           0x00000080
+#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK   0x00FF0000
+#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT          16
+#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK   0x0FFF0000
+#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT          16
+
+#define E1000_PHY_CTRL_D0A_LPLU           0x00000002
+#define E1000_PHY_CTRL_NOND0A_LPLU        0x00000004
+#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008
+#define E1000_PHY_CTRL_GBE_DISABLE        0x00000040
+
+#define E1000_KABGTXD_BGSQLBIAS           0x00050000
+
+/* Low Power IDLE Control */
+#define E1000_LPIC_LPIET_SHIFT		24	/* Low Power Idle Entry Time */
+
+/* PBA constants */
+#define E1000_PBA_8K  0x0008    /* 8KB */
+#define E1000_PBA_16K 0x0010    /* 16KB */
+
+#define E1000_PBA_RXA_MASK	0xFFFF
+
+#define E1000_PBS_16K E1000_PBA_16K
+
+/* Uncorrectable/correctable ECC Error counts and enable bits */
+#define E1000_PBECCSTS_CORR_ERR_CNT_MASK	0x000000FF
+#define E1000_PBECCSTS_UNCORR_ERR_CNT_MASK	0x0000FF00
+#define E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT	8
+#define E1000_PBECCSTS_ECC_ENABLE		0x00010000
+
+#define IFS_MAX       80
+#define IFS_MIN       40
+#define IFS_RATIO     4
+#define IFS_STEP      10
+#define MIN_NUM_XMITS 1000
+
+/* SW Semaphore Register */
+#define E1000_SWSM_SMBI         0x00000001 /* Driver Semaphore bit */
+#define E1000_SWSM_SWESMBI      0x00000002 /* FW Semaphore bit */
+#define E1000_SWSM_DRV_LOAD     0x00000008 /* Driver Loaded Bit */
+
+#define E1000_SWSM2_LOCK        0x00000002 /* Secondary driver semaphore bit */
+
+/* Interrupt Cause Read */
+#define E1000_ICR_TXDW          0x00000001 /* Transmit desc written back */
+#define E1000_ICR_LSC           0x00000004 /* Link Status Change */
+#define E1000_ICR_RXSEQ         0x00000008 /* Rx sequence error */
+#define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
+#define E1000_ICR_RXO           0x00000040 /* Receiver Overrun */
+#define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
+#define E1000_ICR_MDAC          0x00000200 /* MDIO Access Complete */
+#define E1000_ICR_SRPD          0x00010000 /* Small Receive Packet Detected */
+#define E1000_ICR_ACK           0x00020000 /* Receive ACK Frame Detected */
+#define E1000_ICR_MNG           0x00040000 /* Manageability Event Detected */
+#define E1000_ICR_ECCER         0x00400000 /* Uncorrectable ECC Error */
+/* If this bit asserted, the driver should claim the interrupt */
+#define E1000_ICR_INT_ASSERTED	0x80000000
+#define E1000_ICR_RXQ0          0x00100000 /* Rx Queue 0 Interrupt */
+#define E1000_ICR_RXQ1          0x00200000 /* Rx Queue 1 Interrupt */
+#define E1000_ICR_TXQ0          0x00400000 /* Tx Queue 0 Interrupt */
+#define E1000_ICR_TXQ1          0x00800000 /* Tx Queue 1 Interrupt */
+#define E1000_ICR_OTHER         0x01000000 /* Other Interrupt */
+
+/* PBA ECC Register */
+#define E1000_PBA_ECC_COUNTER_MASK  0xFFF00000 /* ECC counter mask */
+#define E1000_PBA_ECC_COUNTER_SHIFT 20         /* ECC counter shift value */
+#define E1000_PBA_ECC_CORR_EN       0x00000001 /* ECC correction enable */
+#define E1000_PBA_ECC_STAT_CLR      0x00000002 /* Clear ECC error counter */
+#define E1000_PBA_ECC_INT_EN        0x00000004 /* Enable ICR bit 5 for ECC */
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register.  Each bit is documented below:
+ *   o RXT0   = Receiver Timer Interrupt (ring 0)
+ *   o TXDW   = Transmit Descriptor Written Back
+ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ *   o RXSEQ  = Receive Sequence Error
+ *   o LSC    = Link Status Change
+ */
+#define IMS_ENABLE_MASK ( \
+	E1000_IMS_RXT0   |    \
+	E1000_IMS_TXDW   |    \
+	E1000_IMS_RXDMT0 |    \
+	E1000_IMS_RXSEQ  |    \
+	E1000_IMS_LSC)
+
+/* These are all of the events related to the OTHER interrupt.
+ */
+#define IMS_OTHER_MASK ( \
+	E1000_IMS_LSC  | \
+	E1000_IMS_RXO  | \
+	E1000_IMS_MDAC | \
+	E1000_IMS_SRPD | \
+	E1000_IMS_ACK  | \
+	E1000_IMS_MNG)
+
+/* Interrupt Mask Set */
+#define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
+#define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
+#define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
+#define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
+#define E1000_IMS_RXO       E1000_ICR_RXO       /* Receiver Overrun */
+#define E1000_IMS_RXT0      E1000_ICR_RXT0      /* Rx timer intr */
+#define E1000_IMS_MDAC      E1000_ICR_MDAC      /* MDIO Access Complete */
+#define E1000_IMS_SRPD      E1000_ICR_SRPD      /* Small Receive Packet */
+#define E1000_IMS_ACK       E1000_ICR_ACK       /* Receive ACK Frame Detected */
+#define E1000_IMS_MNG       E1000_ICR_MNG       /* Manageability Event */
+#define E1000_IMS_ECCER     E1000_ICR_ECCER     /* Uncorrectable ECC Error */
+#define E1000_IMS_RXQ0      E1000_ICR_RXQ0      /* Rx Queue 0 Interrupt */
+#define E1000_IMS_RXQ1      E1000_ICR_RXQ1      /* Rx Queue 1 Interrupt */
+#define E1000_IMS_TXQ0      E1000_ICR_TXQ0      /* Tx Queue 0 Interrupt */
+#define E1000_IMS_TXQ1      E1000_ICR_TXQ1      /* Tx Queue 1 Interrupt */
+#define E1000_IMS_OTHER     E1000_ICR_OTHER     /* Other Interrupt */
+
+/* Interrupt Cause Set */
+#define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
+#define E1000_ICS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
+#define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
+#define E1000_ICS_OTHER     E1000_ICR_OTHER     /* Other Interrupt */
+
+/* Transmit Descriptor Control */
+#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */
+#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */
+#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */
+#define E1000_TXDCTL_GRAN    0x01000000 /* TXDCTL Granularity */
+#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */
+#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */
+/* Enable the counting of desc. still to be processed. */
+#define E1000_TXDCTL_COUNT_DESC 0x00400000
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
+#define FLOW_CONTROL_TYPE         0x8808
+
+/* 802.1q VLAN Packet Size */
+#define E1000_VLAN_FILTER_TBL_SIZE 128  /* VLAN Filter Table (4096 bits) */
+
+/* Receive Address
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots.  However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define E1000_RAR_ENTRIES     15
+#define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
+#define E1000_RAL_MAC_ADDR_LEN 4
+#define E1000_RAH_MAC_ADDR_LEN 2
+
+/* Error Codes */
+#define E1000_ERR_NVM      1
+#define E1000_ERR_PHY      2
+#define E1000_ERR_CONFIG   3
+#define E1000_ERR_PARAM    4
+#define E1000_ERR_MAC_INIT 5
+#define E1000_ERR_PHY_TYPE 6
+#define E1000_ERR_RESET   9
+#define E1000_ERR_MASTER_REQUESTS_PENDING 10
+#define E1000_ERR_HOST_INTERFACE_COMMAND 11
+#define E1000_BLK_PHY_RESET   12
+#define E1000_ERR_SWFW_SYNC 13
+#define E1000_NOT_IMPLEMENTED 14
+#define E1000_ERR_INVALID_ARGUMENT  16
+#define E1000_ERR_NO_SPACE          17
+#define E1000_ERR_NVM_PBA_SECTION   18
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define FIBER_LINK_UP_LIMIT               50
+#define COPPER_LINK_UP_LIMIT              10
+#define PHY_AUTO_NEG_LIMIT                45
+#define PHY_FORCE_LIMIT                   20
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT      800
+/* Number of milliseconds we wait for PHY configuration done after MAC reset */
+#define PHY_CFG_TIMEOUT             100
+/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
+#define MDIO_OWNERSHIP_TIMEOUT      10
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT      10
+
+/* Flow Control */
+#define E1000_FCRTH_RTH  0x0000FFF8     /* Mask Bits[15:3] for RTH */
+#define E1000_FCRTL_RTL  0x0000FFF8     /* Mask Bits[15:3] for RTL */
+#define E1000_FCRTL_XONE 0x80000000     /* Enable XON frame transmission */
+
+/* Transmit Configuration Word */
+#define E1000_TXCW_FD         0x00000020        /* TXCW full duplex */
+#define E1000_TXCW_PAUSE      0x00000080        /* TXCW sym pause request */
+#define E1000_TXCW_ASM_DIR    0x00000100        /* TXCW astm pause direction */
+#define E1000_TXCW_PAUSE_MASK 0x00000180        /* TXCW pause request mask */
+#define E1000_TXCW_ANE        0x80000000        /* Auto-neg enable */
+
+/* Receive Configuration Word */
+#define E1000_RXCW_CW         0x0000ffff        /* RxConfigWord mask */
+#define E1000_RXCW_IV         0x08000000        /* Receive config invalid */
+#define E1000_RXCW_C          0x20000000        /* Receive config */
+#define E1000_RXCW_SYNCH      0x40000000        /* Receive config synch */
+
+/* HH Time Sync */
+#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK	0x0000F000 /* max delay */
+#define E1000_TSYNCTXCTL_SYNC_COMP		0x40000000 /* sync complete */
+#define E1000_TSYNCTXCTL_START_SYNC		0x80000000 /* initiate sync */
+
+#define E1000_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
+#define E1000_TSYNCTXCTL_ENABLED	0x00000010 /* enable Tx timestamping */
+
+#define E1000_TSYNCRXCTL_VALID		0x00000001 /* Rx timestamp valid */
+#define E1000_TSYNCRXCTL_TYPE_MASK	0x0000000E /* Rx type mask */
+#define E1000_TSYNCRXCTL_TYPE_L2_V2	0x00
+#define E1000_TSYNCRXCTL_TYPE_L4_V1	0x02
+#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define E1000_TSYNCRXCTL_TYPE_ALL	0x08
+#define E1000_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
+#define E1000_TSYNCRXCTL_ENABLED	0x00000010 /* enable Rx timestamping */
+#define E1000_TSYNCRXCTL_SYSCFI		0x00000020 /* Sys clock frequency */
+
+#define E1000_RXMTRL_PTP_V1_SYNC_MESSAGE	0x00000000
+#define E1000_RXMTRL_PTP_V1_DELAY_REQ_MESSAGE	0x00010000
+
+#define E1000_RXMTRL_PTP_V2_SYNC_MESSAGE	0x00000000
+#define E1000_RXMTRL_PTP_V2_DELAY_REQ_MESSAGE	0x01000000
+
+#define E1000_TIMINCA_INCPERIOD_SHIFT	24
+#define E1000_TIMINCA_INCVALUE_MASK	0x00FFFFFF
+
+/* PCI Express Control */
+#define E1000_GCR_RXD_NO_SNOOP          0x00000001
+#define E1000_GCR_RXDSCW_NO_SNOOP       0x00000002
+#define E1000_GCR_RXDSCR_NO_SNOOP       0x00000004
+#define E1000_GCR_TXD_NO_SNOOP          0x00000008
+#define E1000_GCR_TXDSCW_NO_SNOOP       0x00000010
+#define E1000_GCR_TXDSCR_NO_SNOOP       0x00000020
+
+#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP         | \
+			   E1000_GCR_RXDSCW_NO_SNOOP      | \
+			   E1000_GCR_RXDSCR_NO_SNOOP      | \
+			   E1000_GCR_TXD_NO_SNOOP         | \
+			   E1000_GCR_TXDSCW_NO_SNOOP      | \
+			   E1000_GCR_TXDSCR_NO_SNOOP)
+
+/* NVM Control */
+#define E1000_EECD_SK        0x00000001 /* NVM Clock */
+#define E1000_EECD_CS        0x00000002 /* NVM Chip Select */
+#define E1000_EECD_DI        0x00000004 /* NVM Data In */
+#define E1000_EECD_DO        0x00000008 /* NVM Data Out */
+#define E1000_EECD_REQ       0x00000040 /* NVM Access Request */
+#define E1000_EECD_GNT       0x00000080 /* NVM Access Grant */
+#define E1000_EECD_PRES      0x00000100 /* NVM Present */
+#define E1000_EECD_SIZE      0x00000200 /* NVM Size (0=64 word 1=256 word) */
+/* NVM Addressing bits based on type (0-small, 1-large) */
+#define E1000_EECD_ADDR_BITS 0x00000400
+#define E1000_NVM_GRANT_ATTEMPTS   1000 /* NVM # attempts to gain grant */
+#define E1000_EECD_AUTO_RD          0x00000200  /* NVM Auto Read done */
+#define E1000_EECD_SIZE_EX_MASK     0x00007800  /* NVM Size */
+#define E1000_EECD_SIZE_EX_SHIFT     11
+#define E1000_EECD_FLUPD     0x00080000 /* Update FLASH */
+#define E1000_EECD_AUPDEN    0x00100000 /* Enable Autonomous FLASH update */
+#define E1000_EECD_SEC1VAL   0x00400000 /* Sector One Valid */
+#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES)
+
+#define E1000_NVM_RW_REG_DATA	16	/* Offset to data in NVM r/w regs */
+#define E1000_NVM_RW_REG_DONE	2	/* Offset to READ/WRITE done bit */
+#define E1000_NVM_RW_REG_START	1	/* Start operation */
+#define E1000_NVM_RW_ADDR_SHIFT	2	/* Shift to the address bits */
+#define E1000_NVM_POLL_WRITE	1	/* Flag for polling write complete */
+#define E1000_NVM_POLL_READ	0	/* Flag for polling read complete */
+#define E1000_FLASH_UPDATES	2000
+
+/* NVM Word Offsets */
+#define NVM_COMPAT                 0x0003
+#define NVM_ID_LED_SETTINGS        0x0004
+#define NVM_FUTURE_INIT_WORD1      0x0019
+#define NVM_COMPAT_VALID_CSUM      0x0001
+#define NVM_FUTURE_INIT_WORD1_VALID_CSUM	0x0040
+
+#define NVM_INIT_CONTROL2_REG      0x000F
+#define NVM_INIT_CONTROL3_PORT_B   0x0014
+#define NVM_INIT_3GIO_3            0x001A
+#define NVM_INIT_CONTROL3_PORT_A   0x0024
+#define NVM_CFG                    0x0012
+#define NVM_ALT_MAC_ADDR_PTR       0x0037
+#define NVM_CHECKSUM_REG           0x003F
+
+#define E1000_NVM_CFG_DONE_PORT_0  0x40000 /* MNG config cycle done */
+#define E1000_NVM_CFG_DONE_PORT_1  0x80000 /* ...for second port */
+
+/* Mask bits for fields in Word 0x0f of the NVM */
+#define NVM_WORD0F_PAUSE_MASK       0x3000
+#define NVM_WORD0F_PAUSE            0x1000
+#define NVM_WORD0F_ASM_DIR          0x2000
+
+/* Mask bits for fields in Word 0x1a of the NVM */
+#define NVM_WORD1A_ASPM_MASK  0x000C
+
+/* Mask bits for fields in Word 0x03 of the EEPROM */
+#define NVM_COMPAT_LOM    0x0800
+
+/* length of string needed to store PBA number */
+#define E1000_PBANUM_LENGTH             11
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM                    0xBABA
+
+/* PBA (printed board assembly) number words */
+#define NVM_PBA_OFFSET_0           8
+#define NVM_PBA_OFFSET_1           9
+#define NVM_PBA_PTR_GUARD          0xFAFA
+#define NVM_WORD_SIZE_BASE_SHIFT   6
+
+/* NVM Commands - SPI */
+#define NVM_MAX_RETRY_SPI          5000 /* Max wait of 5ms, for RDY signal */
+#define NVM_READ_OPCODE_SPI        0x03 /* NVM read opcode */
+#define NVM_WRITE_OPCODE_SPI       0x02 /* NVM write opcode */
+#define NVM_A8_OPCODE_SPI          0x08 /* opcode bit-3 = address bit-8 */
+#define NVM_WREN_OPCODE_SPI        0x06 /* NVM set Write Enable latch */
+#define NVM_RDSR_OPCODE_SPI        0x05 /* NVM read Status register */
+
+/* SPI NVM Status Register */
+#define NVM_STATUS_RDY_SPI         0x01
+
+/* Word definitions for ID LED Settings */
+#define ID_LED_RESERVED_0000 0x0000
+#define ID_LED_RESERVED_FFFF 0xFFFF
+#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2  << 12) | \
+			      (ID_LED_OFF1_OFF2 <<  8) | \
+			      (ID_LED_DEF1_DEF2 <<  4) | \
+			      (ID_LED_DEF1_DEF2))
+#define ID_LED_DEF1_DEF2     0x1
+#define ID_LED_DEF1_ON2      0x2
+#define ID_LED_DEF1_OFF2     0x3
+#define ID_LED_ON1_DEF2      0x4
+#define ID_LED_ON1_ON2       0x5
+#define ID_LED_ON1_OFF2      0x6
+#define ID_LED_OFF1_DEF2     0x7
+#define ID_LED_OFF1_ON2      0x8
+#define ID_LED_OFF1_OFF2     0x9
+
+#define IGP_ACTIVITY_LED_MASK   0xFFFFF0FF
+#define IGP_ACTIVITY_LED_ENABLE 0x0300
+#define IGP_LED3_MODE           0x07000000
+
+/* PCI/PCI-X/PCI-EX Config space */
+#define PCI_HEADER_TYPE_REGISTER     0x0E
+#define PCIE_LINK_STATUS             0x12
+
+#define PCI_HEADER_TYPE_MULTIFUNC    0x80
+#define PCIE_LINK_WIDTH_MASK         0x3F0
+#define PCIE_LINK_WIDTH_SHIFT        4
+
+#define PHY_REVISION_MASK      0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS    0x1F  /* 5 bit address bus (0-0x1F) */
+#define MAX_PHY_MULTI_PAGE_REG 0xF
+
+/* Bit definitions for valid PHY IDs.
+ * I = Integrated
+ * E = External
+ */
+#define M88E1000_E_PHY_ID    0x01410C50
+#define M88E1000_I_PHY_ID    0x01410C30
+#define M88E1011_I_PHY_ID    0x01410C20
+#define IGP01E1000_I_PHY_ID  0x02A80380
+#define M88E1111_I_PHY_ID    0x01410CC0
+#define GG82563_E_PHY_ID     0x01410CA0
+#define IGP03E1000_E_PHY_ID  0x02A80390
+#define IFE_E_PHY_ID         0x02A80330
+#define IFE_PLUS_E_PHY_ID    0x02A80320
+#define IFE_C_E_PHY_ID       0x02A80310
+#define BME1000_E_PHY_ID     0x01410CB0
+#define BME1000_E_PHY_ID_R2  0x01410CB1
+#define I82577_E_PHY_ID      0x01540050
+#define I82578_E_PHY_ID      0x004DD040
+#define I82579_E_PHY_ID      0x01540090
+#define I217_E_PHY_ID        0x015400A0
+
+/* M88E1000 Specific Registers */
+#define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
+#define M88E1000_PHY_SPEC_STATUS   0x11  /* PHY Specific Status Register */
+#define M88E1000_EXT_PHY_SPEC_CTRL 0x14  /* Extended PHY Specific Control */
+
+#define M88E1000_PHY_PAGE_SELECT   0x1D  /* Reg 29 for page number setting */
+#define M88E1000_PHY_GEN_CONTROL   0x1E  /* Its meaning depends on reg 29 */
+
+/* M88E1000 PHY Specific Control Register */
+#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
+#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000  /* MDI Crossover Mode bits 6:5 */
+					       /* Manual MDI configuration */
+#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020  /* Manual MDIX configuration */
+/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
+#define M88E1000_PSCR_AUTO_X_1000T     0x0040
+/* Auto crossover enabled all speeds */
+#define M88E1000_PSCR_AUTO_X_MODE      0x0060
+#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Transmit */
+
+/* M88E1000 PHY Specific Status Register */
+#define M88E1000_PSSR_REV_POLARITY       0x0002 /* 1=Polarity reversed */
+#define M88E1000_PSSR_DOWNSHIFT          0x0020 /* 1=Downshifted */
+#define M88E1000_PSSR_MDIX               0x0040 /* 1=MDIX; 0=MDI */
+/* 0=<50M; 1=50-80M; 2=80-110M; 3=110-140M; 4=>140M */
+#define M88E1000_PSSR_CABLE_LENGTH       0x0380
+#define M88E1000_PSSR_SPEED              0xC000 /* Speed, bits 14:15 */
+#define M88E1000_PSSR_1000MBS            0x8000 /* 10=1000Mbs */
+
+#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
+
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master
+ */
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the slave
+ */
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
+#define M88E1000_EPSCR_TX_CLK_25      0x0070 /* 25  MHz TX_CLK */
+
+/* M88EC018 Rev 2 specific DownShift settings */
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
+
+#define I82578_EPSCR_DOWNSHIFT_ENABLE          0x0020
+#define I82578_EPSCR_DOWNSHIFT_COUNTER_MASK    0x001C
+
+/* BME1000 PHY Specific Control Register */
+#define BME1000_PSCR_ENABLE_DOWNSHIFT   0x0800 /* 1 = enable downshift */
+
+/* Bits...
+ * 15-5: page
+ * 4-0: register offset
+ */
+#define GG82563_PAGE_SHIFT        5
+#define GG82563_REG(page, reg)    \
+	(((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS))
+#define GG82563_MIN_ALT_REG       30
+
+/* GG82563 Specific Registers */
+#define GG82563_PHY_SPEC_CTRL           \
+	GG82563_REG(0, 16) /* PHY Specific Control */
+#define GG82563_PHY_PAGE_SELECT         \
+	GG82563_REG(0, 22) /* Page Select */
+#define GG82563_PHY_SPEC_CTRL_2         \
+	GG82563_REG(0, 26) /* PHY Specific Control 2 */
+#define GG82563_PHY_PAGE_SELECT_ALT     \
+	GG82563_REG(0, 29) /* Alternate Page Select */
+
+#define GG82563_PHY_MAC_SPEC_CTRL       \
+	GG82563_REG(2, 21) /* MAC Specific Control Register */
+
+#define GG82563_PHY_DSP_DISTANCE    \
+	GG82563_REG(5, 26) /* DSP Distance */
+
+/* Page 193 - Port Control Registers */
+#define GG82563_PHY_KMRN_MODE_CTRL   \
+	GG82563_REG(193, 16) /* Kumeran Mode Control */
+#define GG82563_PHY_PWR_MGMT_CTRL       \
+	GG82563_REG(193, 20) /* Power Management Control */
+
+/* Page 194 - KMRN Registers */
+#define GG82563_PHY_INBAND_CTRL         \
+	GG82563_REG(194, 18) /* Inband Control */
+
+/* MDI Control */
+#define E1000_MDIC_REG_MASK	0x001F0000
+#define E1000_MDIC_REG_SHIFT 16
+#define E1000_MDIC_PHY_SHIFT 21
+#define E1000_MDIC_OP_WRITE  0x04000000
+#define E1000_MDIC_OP_READ   0x08000000
+#define E1000_MDIC_READY     0x10000000
+#define E1000_MDIC_ERROR     0x40000000
+
+/* SerDes Control */
+#define E1000_GEN_POLL_TIMEOUT          640
+
+#endif /* _E1000_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
new file mode 100644
index 0000000000..a187582d22
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -0,0 +1,598 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _E1000_H_
+#define _E1000_H_
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/io.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/crc32.h>
+#include <linux/if_vlan.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/ptp_classify.h>
+#include <linux/mii.h>
+#include <linux/mdio.h>
+#include <linux/mutex.h>
+#include <linux/pm_qos.h>
+#include "hw.h"
+
+struct e1000_info;
+
+#define e_dbg(format, arg...) \
+	netdev_dbg(hw->adapter->netdev, format, ## arg)
+#define e_err(format, arg...) \
+	netdev_err(adapter->netdev, format, ## arg)
+#define e_info(format, arg...) \
+	netdev_info(adapter->netdev, format, ## arg)
+#define e_warn(format, arg...) \
+	netdev_warn(adapter->netdev, format, ## arg)
+#define e_notice(format, arg...) \
+	netdev_notice(adapter->netdev, format, ## arg)
+
+/* Interrupt modes, as used by the IntMode parameter */
+#define E1000E_INT_MODE_LEGACY		0
+#define E1000E_INT_MODE_MSI		1
+#define E1000E_INT_MODE_MSIX		2
+
+/* Tx/Rx descriptor defines */
+#define E1000_DEFAULT_TXD		256
+#define E1000_MAX_TXD			4096
+#define E1000_MIN_TXD			64
+
+#define E1000_DEFAULT_RXD		256
+#define E1000_MAX_RXD			4096
+#define E1000_MIN_RXD			64
+
+#define E1000_MIN_ITR_USECS		10 /* 100000 irq/sec */
+#define E1000_MAX_ITR_USECS		10000 /* 100    irq/sec */
+
+#define E1000_FC_PAUSE_TIME		0x0680 /* 858 usec */
+
+/* How many Tx Descriptors do we need to call netif_wake_queue ? */
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define E1000_RX_BUFFER_WRITE		16 /* Must be power of 2 */
+
+#define AUTO_ALL_MODES			0
+#define E1000_EEPROM_APME		0x0400
+
+#define E1000_MNG_VLAN_NONE		(-1)
+
+#define DEFAULT_JUMBO			9234
+
+/* Time to wait before putting the device into D3 if there's no link (in ms). */
+#define LINK_TIMEOUT		100
+
+/* Count for polling __E1000_RESET condition every 10-20msec.
+ * Experimentation has shown the reset can take approximately 210msec.
+ */
+#define E1000_CHECK_RESET_COUNT		25
+
+#define PCICFG_DESC_RING_STATUS		0xe4
+#define FLUSH_DESC_REQUIRED		0x100
+
+/* in the case of WTHRESH, it appears at least the 82571/2 hardware
+ * writes back 4 descriptors when WTHRESH=5, and 3 descriptors when
+ * WTHRESH=4, so a setting of 5 gives the most efficient bus
+ * utilization but to avoid possible Tx stalls, set it to 1
+ */
+#define E1000_TXDCTL_DMA_BURST_ENABLE                          \
+	(E1000_TXDCTL_GRAN | /* set descriptor granularity */  \
+	 E1000_TXDCTL_COUNT_DESC |                             \
+	 (1u << 16) | /* wthresh must be +1 more than desired */\
+	 (1u << 8)  | /* hthresh */                             \
+	 0x1f)        /* pthresh */
+
+#define E1000_RXDCTL_DMA_BURST_ENABLE                          \
+	(0x01000000 | /* set descriptor granularity */         \
+	 (4u << 16) | /* set writeback threshold    */         \
+	 (4u << 8)  | /* set prefetch threshold     */         \
+	 0x20)        /* set hthresh                */
+
+#define E1000_TIDV_FPD BIT(31)
+#define E1000_RDTR_FPD BIT(31)
+
+enum e1000_boards {
+	board_82571,
+	board_82572,
+	board_82573,
+	board_82574,
+	board_82583,
+	board_80003es2lan,
+	board_ich8lan,
+	board_ich9lan,
+	board_ich10lan,
+	board_pchlan,
+	board_pch2lan,
+	board_pch_lpt,
+	board_pch_spt,
+	board_pch_cnp,
+	board_pch_tgp,
+	board_pch_adp,
+	board_pch_mtp
+};
+
+struct e1000_ps_page {
+	struct page *page;
+	u64 dma; /* must be u64 - written to hw */
+};
+
+/* wrappers around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct e1000_buffer {
+	dma_addr_t dma;
+	struct sk_buff *skb;
+	union {
+		/* Tx */
+		struct {
+			unsigned long time_stamp;
+			u16 length;
+			u16 next_to_watch;
+			unsigned int segs;
+			unsigned int bytecount;
+			u16 mapped_as_page;
+		};
+		/* Rx */
+		struct {
+			/* arrays of page information for packet split */
+			struct e1000_ps_page *ps_pages;
+			struct page *page;
+		};
+	};
+};
+
+struct e1000_ring {
+	struct e1000_adapter *adapter;	/* back pointer to adapter */
+	void *desc;			/* pointer to ring memory  */
+	dma_addr_t dma;			/* phys address of ring    */
+	unsigned int size;		/* length of ring in bytes */
+	unsigned int count;		/* number of desc. in ring */
+
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	void __iomem *head;
+	void __iomem *tail;
+
+	/* array of buffer information structs */
+	struct e1000_buffer *buffer_info;
+
+	char name[IFNAMSIZ + 5];
+	u32 ims_val;
+	u32 itr_val;
+	void __iomem *itr_register;
+	int set_itr;
+
+	struct sk_buff *rx_skb_top;
+};
+
+/* PHY register snapshot values */
+struct e1000_phy_regs {
+	u16 bmcr;		/* basic mode control register    */
+	u16 bmsr;		/* basic mode status register     */
+	u16 advertise;		/* auto-negotiation advertisement */
+	u16 lpa;		/* link partner ability register  */
+	u16 expansion;		/* auto-negotiation expansion reg */
+	u16 ctrl1000;		/* 1000BASE-T control register    */
+	u16 stat1000;		/* 1000BASE-T status register     */
+	u16 estatus;		/* extended status register       */
+};
+
+/* board specific private data structure */
+struct e1000_adapter {
+	struct timer_list watchdog_timer;
+	struct timer_list phy_info_timer;
+	struct timer_list blink_timer;
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+
+	const struct e1000_info *ei;
+
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	u32 bd_number;
+	u32 rx_buffer_len;
+	u16 mng_vlan_id;
+	u16 link_speed;
+	u16 link_duplex;
+	u16 eeprom_vers;
+
+	/* track device up/down/testing state */
+	unsigned long state;
+
+	/* Interrupt Throttle Rate */
+	u32 itr;
+	u32 itr_setting;
+	u16 tx_itr;
+	u16 rx_itr;
+
+	/* Tx - one ring per active queue */
+	struct e1000_ring *tx_ring ____cacheline_aligned_in_smp;
+	u32 tx_fifo_limit;
+
+	struct napi_struct napi;
+
+	unsigned int uncorr_errors;	/* uncorrectable ECC errors */
+	unsigned int corr_errors;	/* correctable ECC errors */
+	unsigned int restart_queue;
+	u32 txd_cmd;
+
+	bool detect_tx_hung;
+	bool tx_hang_recheck;
+	u8 tx_timeout_factor;
+
+	u32 tx_int_delay;
+	u32 tx_abs_int_delay;
+
+	unsigned int total_tx_bytes;
+	unsigned int total_tx_packets;
+	unsigned int total_rx_bytes;
+	unsigned int total_rx_packets;
+
+	/* Tx stats */
+	u64 tpt_old;
+	u64 colc_old;
+	u32 gotc;
+	u64 gotc_old;
+	u32 tx_timeout_count;
+	u32 tx_fifo_head;
+	u32 tx_head_addr;
+	u32 tx_fifo_size;
+	u32 tx_dma_failed;
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_skipped;
+
+	/* Rx */
+	bool (*clean_rx)(struct e1000_ring *ring, int *work_done,
+			 int work_to_do) ____cacheline_aligned_in_smp;
+	void (*alloc_rx_buf)(struct e1000_ring *ring, int cleaned_count,
+			     gfp_t gfp);
+	struct e1000_ring *rx_ring;
+
+	u32 rx_int_delay;
+	u32 rx_abs_int_delay;
+
+	/* Rx stats */
+	u64 hw_csum_err;
+	u64 hw_csum_good;
+	u64 rx_hdr_split;
+	u32 gorc;
+	u64 gorc_old;
+	u32 alloc_rx_buff_failed;
+	u32 rx_dma_failed;
+	u32 rx_hwtstamp_cleared;
+
+	unsigned int rx_ps_pages;
+	u16 rx_ps_bsize0;
+	u32 max_frame_size;
+	u32 min_frame_size;
+
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	/* structs defined in e1000_hw.h */
+	struct e1000_hw hw;
+
+	spinlock_t stats64_lock;	/* protects statistics counters */
+	struct e1000_hw_stats stats;
+	struct e1000_phy_info phy_info;
+	struct e1000_phy_stats phy_stats;
+
+	/* Snapshot of PHY registers */
+	struct e1000_phy_regs phy_regs;
+
+	struct e1000_ring test_tx_ring;
+	struct e1000_ring test_rx_ring;
+	u32 test_icr;
+
+	u32 msg_enable;
+	unsigned int num_vectors;
+	struct msix_entry *msix_entries;
+	int int_mode;
+	u32 eiac_mask;
+
+	u32 eeprom_wol;
+	u32 wol;
+	u32 pba;
+	u32 max_hw_frame_size;
+
+	bool fc_autoneg;
+
+	unsigned int flags;
+	unsigned int flags2;
+	struct work_struct downshift_task;
+	struct work_struct update_phy_task;
+	struct work_struct print_hang_task;
+
+	int phy_hang_count;
+
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+
+	struct hwtstamp_config hwtstamp_config;
+	struct delayed_work systim_overflow_work;
+	struct sk_buff *tx_hwtstamp_skb;
+	unsigned long tx_hwtstamp_start;
+	struct work_struct tx_hwtstamp_work;
+	spinlock_t systim_lock;	/* protects SYSTIML/H regsters */
+	struct cyclecounter cc;
+	struct timecounter tc;
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_clock_info;
+	struct pm_qos_request pm_qos_req;
+	long ptp_delta;
+
+	u16 eee_advert;
+};
+
+struct e1000_info {
+	enum e1000_mac_type	mac;
+	unsigned int		flags;
+	unsigned int		flags2;
+	u32			pba;
+	u32			max_hw_frame_size;
+	s32			(*get_variants)(struct e1000_adapter *);
+	const struct e1000_mac_operations *mac_ops;
+	const struct e1000_phy_operations *phy_ops;
+	const struct e1000_nvm_operations *nvm_ops;
+};
+
+s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca);
+
+/* The system time is maintained by a 64-bit counter comprised of the 32-bit
+ * SYSTIMH and SYSTIML registers.  How the counter increments (and therefore
+ * its resolution) is based on the contents of the TIMINCA register - it
+ * increments every incperiod (bits 31:24) clock ticks by incvalue (bits 23:0).
+ * For the best accuracy, the incperiod should be as small as possible.  The
+ * incvalue is scaled by a factor as large as possible (while still fitting
+ * in bits 23:0) so that relatively small clock corrections can be made.
+ *
+ * As a result, a shift of INCVALUE_SHIFT_n is used to fit a value of
+ * INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n)
+ * bits to count nanoseconds leaving the rest for fractional nonseconds.
+ */
+#define INCVALUE_96MHZ		125
+#define INCVALUE_SHIFT_96MHZ	17
+#define INCPERIOD_SHIFT_96MHZ	2
+#define INCPERIOD_96MHZ		(12 >> INCPERIOD_SHIFT_96MHZ)
+
+#define INCVALUE_25MHZ		40
+#define INCVALUE_SHIFT_25MHZ	18
+#define INCPERIOD_25MHZ		1
+
+#define INCVALUE_24MHZ		125
+#define INCVALUE_SHIFT_24MHZ	14
+#define INCPERIOD_24MHZ		3
+
+#define INCVALUE_38400KHZ	26
+#define INCVALUE_SHIFT_38400KHZ	19
+#define INCPERIOD_38400KHZ	1
+
+/* Another drawback of scaling the incvalue by a large factor is the
+ * 64-bit SYSTIM register overflows more quickly.  This is dealt with
+ * by simply reading the clock before it overflows.
+ *
+ * Clock	ns bits	Overflows after
+ * ~~~~~~	~~~~~~~	~~~~~~~~~~~~~~~
+ * 96MHz	47-bit	2^(47-INCPERIOD_SHIFT_96MHz) / 10^9 / 3600 = 9.77 hrs
+ * 25MHz	46-bit	2^46 / 10^9 / 3600 = 19.55 hours
+ */
+#define E1000_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 60 * 4)
+#define E1000_MAX_82574_SYSTIM_REREADS	50
+#define E1000_82574_SYSTIM_EPSILON	(1ULL << 35ULL)
+
+/* hardware capability, feature, and workaround flags */
+#define FLAG_HAS_AMT                      BIT(0)
+#define FLAG_HAS_FLASH                    BIT(1)
+#define FLAG_HAS_HW_VLAN_FILTER           BIT(2)
+#define FLAG_HAS_WOL                      BIT(3)
+/* reserved BIT(4) */
+#define FLAG_HAS_CTRLEXT_ON_LOAD          BIT(5)
+#define FLAG_HAS_SWSM_ON_LOAD             BIT(6)
+#define FLAG_HAS_JUMBO_FRAMES             BIT(7)
+#define FLAG_READ_ONLY_NVM                BIT(8)
+#define FLAG_IS_ICH                       BIT(9)
+#define FLAG_HAS_MSIX                     BIT(10)
+#define FLAG_HAS_SMART_POWER_DOWN         BIT(11)
+#define FLAG_IS_QUAD_PORT_A               BIT(12)
+#define FLAG_IS_QUAD_PORT                 BIT(13)
+#define FLAG_HAS_HW_TIMESTAMP             BIT(14)
+#define FLAG_APME_IN_WUC                  BIT(15)
+#define FLAG_APME_IN_CTRL3                BIT(16)
+#define FLAG_APME_CHECK_PORT_B            BIT(17)
+#define FLAG_DISABLE_FC_PAUSE_TIME        BIT(18)
+#define FLAG_NO_WAKE_UCAST                BIT(19)
+#define FLAG_MNG_PT_ENABLED               BIT(20)
+#define FLAG_RESET_OVERWRITES_LAA         BIT(21)
+#define FLAG_TARC_SPEED_MODE_BIT          BIT(22)
+#define FLAG_TARC_SET_BIT_ZERO            BIT(23)
+#define FLAG_RX_NEEDS_RESTART             BIT(24)
+#define FLAG_LSC_GIG_SPEED_DROP           BIT(25)
+#define FLAG_SMART_POWER_DOWN             BIT(26)
+#define FLAG_MSI_ENABLED                  BIT(27)
+/* reserved BIT(28) */
+#define FLAG_TSO_FORCE                    BIT(29)
+#define FLAG_RESTART_NOW                  BIT(30)
+#define FLAG_MSI_TEST_FAILED              BIT(31)
+
+#define FLAG2_CRC_STRIPPING               BIT(0)
+#define FLAG2_HAS_PHY_WAKEUP              BIT(1)
+#define FLAG2_IS_DISCARDING               BIT(2)
+#define FLAG2_DISABLE_ASPM_L1             BIT(3)
+#define FLAG2_HAS_PHY_STATS               BIT(4)
+#define FLAG2_HAS_EEE                     BIT(5)
+#define FLAG2_DMA_BURST                   BIT(6)
+#define FLAG2_DISABLE_ASPM_L0S            BIT(7)
+#define FLAG2_DISABLE_AIM                 BIT(8)
+#define FLAG2_CHECK_PHY_HANG              BIT(9)
+#define FLAG2_NO_DISABLE_RX               BIT(10)
+#define FLAG2_PCIM2PCI_ARBITER_WA         BIT(11)
+#define FLAG2_DFLT_CRC_STRIPPING          BIT(12)
+#define FLAG2_CHECK_RX_HWTSTAMP           BIT(13)
+#define FLAG2_CHECK_SYSTIM_OVERFLOW       BIT(14)
+#define FLAG2_ENABLE_S0IX_FLOWS           BIT(15)
+
+#define E1000_RX_DESC_PS(R, i)	    \
+	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
+#define E1000_RX_DESC_EXT(R, i)	    \
+	(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
+#define E1000_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
+#define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
+#define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
+
+enum e1000_state_t {
+	__E1000_TESTING,
+	__E1000_RESETTING,
+	__E1000_ACCESS_SHARED_RESOURCE,
+	__E1000_DOWN
+};
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+extern char e1000e_driver_name[];
+
+void e1000e_check_options(struct e1000_adapter *adapter);
+void e1000e_set_ethtool_ops(struct net_device *netdev);
+
+int e1000e_open(struct net_device *netdev);
+int e1000e_close(struct net_device *netdev);
+void e1000e_up(struct e1000_adapter *adapter);
+void e1000e_down(struct e1000_adapter *adapter, bool reset);
+void e1000e_reinit_locked(struct e1000_adapter *adapter);
+void e1000e_reset(struct e1000_adapter *adapter);
+void e1000e_power_up_phy(struct e1000_adapter *adapter);
+int e1000e_setup_rx_resources(struct e1000_ring *ring);
+int e1000e_setup_tx_resources(struct e1000_ring *ring);
+void e1000e_free_rx_resources(struct e1000_ring *ring);
+void e1000e_free_tx_resources(struct e1000_ring *ring);
+void e1000e_get_stats64(struct net_device *netdev,
+			struct rtnl_link_stats64 *stats);
+void e1000e_set_interrupt_capability(struct e1000_adapter *adapter);
+void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter);
+void e1000e_get_hw_control(struct e1000_adapter *adapter);
+void e1000e_release_hw_control(struct e1000_adapter *adapter);
+void e1000e_write_itr(struct e1000_adapter *adapter, u32 itr);
+
+extern unsigned int copybreak;
+
+extern const struct e1000_info e1000_82571_info;
+extern const struct e1000_info e1000_82572_info;
+extern const struct e1000_info e1000_82573_info;
+extern const struct e1000_info e1000_82574_info;
+extern const struct e1000_info e1000_82583_info;
+extern const struct e1000_info e1000_ich8_info;
+extern const struct e1000_info e1000_ich9_info;
+extern const struct e1000_info e1000_ich10_info;
+extern const struct e1000_info e1000_pch_info;
+extern const struct e1000_info e1000_pch2_info;
+extern const struct e1000_info e1000_pch_lpt_info;
+extern const struct e1000_info e1000_pch_spt_info;
+extern const struct e1000_info e1000_pch_cnp_info;
+extern const struct e1000_info e1000_pch_tgp_info;
+extern const struct e1000_info e1000_pch_adp_info;
+extern const struct e1000_info e1000_pch_mtp_info;
+extern const struct e1000_info e1000_es2_info;
+
+void e1000e_ptp_init(struct e1000_adapter *adapter);
+void e1000e_ptp_remove(struct e1000_adapter *adapter);
+
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+		       struct ptp_system_timestamp *sts);
+
+static inline s32 e1000_phy_hw_reset(struct e1000_hw *hw)
+{
+	return hw->phy.ops.reset(hw);
+}
+
+static inline s32 e1e_rphy(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return hw->phy.ops.read_reg(hw, offset, data);
+}
+
+static inline s32 e1e_rphy_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return hw->phy.ops.read_reg_locked(hw, offset, data);
+}
+
+static inline s32 e1e_wphy(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return hw->phy.ops.write_reg(hw, offset, data);
+}
+
+static inline s32 e1e_wphy_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return hw->phy.ops.write_reg_locked(hw, offset, data);
+}
+
+void e1000e_reload_nvm_generic(struct e1000_hw *hw);
+
+static inline s32 e1000e_read_mac_addr(struct e1000_hw *hw)
+{
+	if (hw->mac.ops.read_mac_addr)
+		return hw->mac.ops.read_mac_addr(hw);
+
+	return e1000_read_mac_addr_generic(hw);
+}
+
+static inline s32 e1000_validate_nvm_checksum(struct e1000_hw *hw)
+{
+	return hw->nvm.ops.validate(hw);
+}
+
+static inline s32 e1000e_update_nvm_checksum(struct e1000_hw *hw)
+{
+	return hw->nvm.ops.update(hw);
+}
+
+static inline s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words,
+				 u16 *data)
+{
+	return hw->nvm.ops.read(hw, offset, words, data);
+}
+
+static inline s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words,
+				  u16 *data)
+{
+	return hw->nvm.ops.write(hw, offset, words, data);
+}
+
+static inline s32 e1000_get_phy_info(struct e1000_hw *hw)
+{
+	return hw->phy.ops.get_info(hw);
+}
+
+static inline u32 __er32(struct e1000_hw *hw, unsigned long reg)
+{
+	return readl(hw->hw_addr + reg);
+}
+
+#define er32(reg)	__er32(hw, E1000_##reg)
+
+void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val);
+
+#define ew32(reg, val)	__ew32(hw, E1000_##reg, (val))
+
+#define e1e_flush()	er32(STATUS)
+
+#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) \
+	(__ew32((a), (reg + ((offset) << 2)), (value)))
+
+#define E1000_READ_REG_ARRAY(a, reg, offset) \
+	(readl((a)->hw_addr + reg + ((offset) << 2)))
+
+#endif /* _E1000_H_ */
diff --git a/drivers/net/ethernet/intel/e1000e/e1000e_trace.h b/drivers/net/ethernet/intel/e1000e/e1000e_trace.h
new file mode 100644
index 0000000000..19d3cf4d92
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/e1000e_trace.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2022, Intel Corporation. */
+/* Modeled on trace-events-sample.h */
+/* The trace subsystem name for e1000e will be "e1000e_trace".
+ *
+ * This file is named e1000e_trace.h.
+ *
+ * Since this include file's name is different from the trace
+ * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end
+ * of this file.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM e1000e_trace
+
+#if !defined(_TRACE_E1000E_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_E1000E_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(e1000e_trace_mac_register,
+	    TP_PROTO(uint32_t reg),
+	    TP_ARGS(reg),
+	    TP_STRUCT__entry(__field(uint32_t,	reg)),
+	    TP_fast_assign(__entry->reg = reg;),
+	    TP_printk("event: TraceHub e1000e mac register: 0x%08x",
+		      __entry->reg)
+);
+
+#endif
+/* This must be outside ifdef _E1000E_TRACE_H */
+/* This trace include file is not located in the .../include/trace
+ * with the kernel tracepoint definitions, because we're a loadable
+ * module.
+ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE e1000e_trace
+
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
new file mode 100644
index 0000000000..9835e6a90d
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -0,0 +1,2412 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* ethtool support for e1000 */
+
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include <linux/ethtool.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <linux/pm_runtime.h>
+
+#include "e1000.h"
+
+enum { NETDEV_STATS, E1000_STATS };
+
+struct e1000_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int type;
+	int sizeof_stat;
+	int stat_offset;
+};
+
+static const char e1000e_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define E1000E_PRIV_FLAGS_S0IX_ENABLED	BIT(0)
+	"s0ix-enabled",
+};
+
+#define E1000E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(e1000e_priv_flags_strings)
+
+#define E1000_STAT(str, m) { \
+		.stat_string = str, \
+		.type = E1000_STATS, \
+		.sizeof_stat = sizeof(((struct e1000_adapter *)0)->m), \
+		.stat_offset = offsetof(struct e1000_adapter, m) }
+#define E1000_NETDEV_STAT(str, m) { \
+		.stat_string = str, \
+		.type = NETDEV_STATS, \
+		.sizeof_stat = sizeof(((struct rtnl_link_stats64 *)0)->m), \
+		.stat_offset = offsetof(struct rtnl_link_stats64, m) }
+
+static const struct e1000_stats e1000_gstrings_stats[] = {
+	E1000_STAT("rx_packets", stats.gprc),
+	E1000_STAT("tx_packets", stats.gptc),
+	E1000_STAT("rx_bytes", stats.gorc),
+	E1000_STAT("tx_bytes", stats.gotc),
+	E1000_STAT("rx_broadcast", stats.bprc),
+	E1000_STAT("tx_broadcast", stats.bptc),
+	E1000_STAT("rx_multicast", stats.mprc),
+	E1000_STAT("tx_multicast", stats.mptc),
+	E1000_NETDEV_STAT("rx_errors", rx_errors),
+	E1000_NETDEV_STAT("tx_errors", tx_errors),
+	E1000_NETDEV_STAT("tx_dropped", tx_dropped),
+	E1000_STAT("multicast", stats.mprc),
+	E1000_STAT("collisions", stats.colc),
+	E1000_NETDEV_STAT("rx_length_errors", rx_length_errors),
+	E1000_NETDEV_STAT("rx_over_errors", rx_over_errors),
+	E1000_STAT("rx_crc_errors", stats.crcerrs),
+	E1000_NETDEV_STAT("rx_frame_errors", rx_frame_errors),
+	E1000_STAT("rx_no_buffer_count", stats.rnbc),
+	E1000_STAT("rx_missed_errors", stats.mpc),
+	E1000_STAT("tx_aborted_errors", stats.ecol),
+	E1000_STAT("tx_carrier_errors", stats.tncrs),
+	E1000_NETDEV_STAT("tx_fifo_errors", tx_fifo_errors),
+	E1000_NETDEV_STAT("tx_heartbeat_errors", tx_heartbeat_errors),
+	E1000_STAT("tx_window_errors", stats.latecol),
+	E1000_STAT("tx_abort_late_coll", stats.latecol),
+	E1000_STAT("tx_deferred_ok", stats.dc),
+	E1000_STAT("tx_single_coll_ok", stats.scc),
+	E1000_STAT("tx_multi_coll_ok", stats.mcc),
+	E1000_STAT("tx_timeout_count", tx_timeout_count),
+	E1000_STAT("tx_restart_queue", restart_queue),
+	E1000_STAT("rx_long_length_errors", stats.roc),
+	E1000_STAT("rx_short_length_errors", stats.ruc),
+	E1000_STAT("rx_align_errors", stats.algnerrc),
+	E1000_STAT("tx_tcp_seg_good", stats.tsctc),
+	E1000_STAT("tx_tcp_seg_failed", stats.tsctfc),
+	E1000_STAT("rx_flow_control_xon", stats.xonrxc),
+	E1000_STAT("rx_flow_control_xoff", stats.xoffrxc),
+	E1000_STAT("tx_flow_control_xon", stats.xontxc),
+	E1000_STAT("tx_flow_control_xoff", stats.xofftxc),
+	E1000_STAT("rx_csum_offload_good", hw_csum_good),
+	E1000_STAT("rx_csum_offload_errors", hw_csum_err),
+	E1000_STAT("rx_header_split", rx_hdr_split),
+	E1000_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
+	E1000_STAT("tx_smbus", stats.mgptc),
+	E1000_STAT("rx_smbus", stats.mgprc),
+	E1000_STAT("dropped_smbus", stats.mgpdc),
+	E1000_STAT("rx_dma_failed", rx_dma_failed),
+	E1000_STAT("tx_dma_failed", tx_dma_failed),
+	E1000_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+	E1000_STAT("uncorr_ecc_errors", uncorr_errors),
+	E1000_STAT("corr_ecc_errors", corr_errors),
+	E1000_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+	E1000_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped),
+};
+
+#define E1000_GLOBAL_STATS_LEN	ARRAY_SIZE(e1000_gstrings_stats)
+#define E1000_STATS_LEN (E1000_GLOBAL_STATS_LEN)
+static const char e1000_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)", "Eeprom test    (offline)",
+	"Interrupt test (offline)", "Loopback test  (offline)",
+	"Link test   (on/offline)"
+};
+
+#define E1000_TEST_LEN ARRAY_SIZE(e1000_gstrings_test)
+
+static int e1000_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	u32 speed, supported, advertising, lp_advertising, lpa_t;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		supported = (SUPPORTED_10baseT_Half |
+			     SUPPORTED_10baseT_Full |
+			     SUPPORTED_100baseT_Half |
+			     SUPPORTED_100baseT_Full |
+			     SUPPORTED_1000baseT_Full |
+			     SUPPORTED_Asym_Pause |
+			     SUPPORTED_Autoneg |
+			     SUPPORTED_Pause |
+			     SUPPORTED_TP);
+		if (hw->phy.type == e1000_phy_ife)
+			supported &= ~SUPPORTED_1000baseT_Full;
+		advertising = ADVERTISED_TP;
+
+		if (hw->mac.autoneg == 1) {
+			advertising |= ADVERTISED_Autoneg;
+			/* the e1000 autoneg seems to match ethtool nicely */
+			advertising |= hw->phy.autoneg_advertised;
+		}
+
+		cmd->base.port = PORT_TP;
+		cmd->base.phy_address = hw->phy.addr;
+	} else {
+		supported   = (SUPPORTED_1000baseT_Full |
+			       SUPPORTED_FIBRE |
+			       SUPPORTED_Autoneg);
+
+		advertising = (ADVERTISED_1000baseT_Full |
+			       ADVERTISED_FIBRE |
+			       ADVERTISED_Autoneg);
+
+		cmd->base.port = PORT_FIBRE;
+	}
+
+	speed = SPEED_UNKNOWN;
+	cmd->base.duplex = DUPLEX_UNKNOWN;
+
+	if (netif_running(netdev)) {
+		if (netif_carrier_ok(netdev)) {
+			speed = adapter->link_speed;
+			cmd->base.duplex = adapter->link_duplex - 1;
+		}
+	} else if (!pm_runtime_suspended(netdev->dev.parent)) {
+		u32 status = er32(STATUS);
+
+		if (status & E1000_STATUS_LU) {
+			if (status & E1000_STATUS_SPEED_1000)
+				speed = SPEED_1000;
+			else if (status & E1000_STATUS_SPEED_100)
+				speed = SPEED_100;
+			else
+				speed = SPEED_10;
+
+			if (status & E1000_STATUS_FD)
+				cmd->base.duplex = DUPLEX_FULL;
+			else
+				cmd->base.duplex = DUPLEX_HALF;
+		}
+	}
+
+	cmd->base.speed = speed;
+	cmd->base.autoneg = ((hw->phy.media_type == e1000_media_type_fiber) ||
+			 hw->mac.autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if ((hw->phy.media_type == e1000_media_type_copper) &&
+	    netif_carrier_ok(netdev))
+		cmd->base.eth_tp_mdix = hw->phy.is_mdix ?
+			ETH_TP_MDI_X : ETH_TP_MDI;
+	else
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix;
+
+	if (hw->phy.media_type != e1000_media_type_copper)
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+
+	lpa_t = mii_stat1000_to_ethtool_lpa_t(adapter->phy_regs.stat1000);
+	lp_advertising = lpa_t |
+	mii_lpa_to_ethtool_lpa_t(adapter->phy_regs.lpa);
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+						lp_advertising);
+
+	return 0;
+}
+
+static int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx)
+{
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+
+	mac->autoneg = 0;
+
+	/* Make sure dplx is at most 1 bit and lsb of speed is not set
+	 * for the switch() below to work
+	 */
+	if ((spd & 1) || (dplx & ~1))
+		goto err_inval;
+
+	/* Fiber NICs only allow 1000 gbps Full duplex */
+	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) &&
+	    (spd != SPEED_1000) && (dplx != DUPLEX_FULL)) {
+		goto err_inval;
+	}
+
+	switch (spd + dplx) {
+	case SPEED_10 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_10_HALF;
+		break;
+	case SPEED_10 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_10_FULL;
+		break;
+	case SPEED_100 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_100_HALF;
+		break;
+	case SPEED_100 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_100_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_FULL:
+		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
+			mac->autoneg = 1;
+			adapter->hw.phy.autoneg_advertised =
+				ADVERTISE_1000_FULL;
+		} else {
+			mac->forced_speed_duplex = ADVERTISE_1000_FULL;
+		}
+		break;
+	case SPEED_1000 + DUPLEX_HALF:	/* not supported */
+	default:
+		goto err_inval;
+	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
+	return 0;
+
+err_inval:
+	e_err("Unsupported Speed/Duplex configuration\n");
+	return -EINVAL;
+}
+
+static int e1000_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int ret_val = 0;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	/* When SoL/IDER sessions are active, autoneg/speed/duplex
+	 * cannot be changed
+	 */
+	if (hw->phy.ops.check_reset_block &&
+	    hw->phy.ops.check_reset_block(hw)) {
+		e_err("Cannot change link characteristics when SoL/IDER is active.\n");
+		ret_val = -EINVAL;
+		goto out;
+	}
+
+	/* MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		if (hw->phy.media_type != e1000_media_type_copper) {
+			ret_val = -EOPNOTSUPP;
+			goto out;
+		}
+
+		if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+		    (cmd->base.autoneg != AUTONEG_ENABLE)) {
+			e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			ret_val = -EINVAL;
+			goto out;
+		}
+	}
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		hw->mac.autoneg = 1;
+		if (hw->phy.media_type == e1000_media_type_fiber)
+			hw->phy.autoneg_advertised = ADVERTISED_1000baseT_Full |
+			    ADVERTISED_FIBRE | ADVERTISED_Autoneg;
+		else
+			hw->phy.autoneg_advertised = advertising |
+			    ADVERTISED_TP | ADVERTISED_Autoneg;
+		advertising = hw->phy.autoneg_advertised;
+		if (adapter->fc_autoneg)
+			hw->fc.requested_mode = e1000_fc_default;
+	} else {
+		u32 speed = cmd->base.speed;
+		/* calling this overrides forced MDI setting */
+		if (e1000_set_spd_dplx(adapter, speed, cmd->base.duplex)) {
+			ret_val = -EINVAL;
+			goto out;
+		}
+	}
+
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		/* fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl;
+	}
+
+	/* reset the link */
+	if (netif_running(adapter->netdev)) {
+		e1000e_down(adapter, true);
+		e1000e_up(adapter);
+	} else {
+		e1000e_reset(adapter);
+	}
+
+out:
+	pm_runtime_put_sync(netdev->dev.parent);
+	clear_bit(__E1000_RESETTING, &adapter->state);
+	return ret_val;
+}
+
+static void e1000_get_pauseparam(struct net_device *netdev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	pause->autoneg =
+	    (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (hw->fc.current_mode == e1000_fc_rx_pause) {
+		pause->rx_pause = 1;
+	} else if (hw->fc.current_mode == e1000_fc_tx_pause) {
+		pause->tx_pause = 1;
+	} else if (hw->fc.current_mode == e1000_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int e1000_set_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int retval = 0;
+
+	adapter->fc_autoneg = pause->autoneg;
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+		hw->fc.requested_mode = e1000_fc_default;
+		if (netif_running(adapter->netdev)) {
+			e1000e_down(adapter, true);
+			e1000e_up(adapter);
+		} else {
+			e1000e_reset(adapter);
+		}
+	} else {
+		if (pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_full;
+		else if (pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_rx_pause;
+		else if (!pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_tx_pause;
+		else if (!pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_none;
+
+		hw->fc.current_mode = hw->fc.requested_mode;
+
+		if (hw->phy.media_type == e1000_media_type_fiber) {
+			retval = hw->mac.ops.setup_link(hw);
+			/* implicit goto out */
+		} else {
+			retval = e1000e_force_mac_fc(hw);
+			if (retval)
+				goto out;
+			e1000e_set_fc_watermarks(hw);
+		}
+	}
+
+out:
+	pm_runtime_put_sync(netdev->dev.parent);
+	clear_bit(__E1000_RESETTING, &adapter->state);
+	return retval;
+}
+
+static u32 e1000_get_msglevel(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	return adapter->msg_enable;
+}
+
+static void e1000_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	adapter->msg_enable = data;
+}
+
+static int e1000_get_regs_len(struct net_device __always_unused *netdev)
+{
+#define E1000_REGS_LEN 32	/* overestimate */
+	return E1000_REGS_LEN * sizeof(u32);
+}
+
+static void e1000_get_regs(struct net_device *netdev,
+			   struct ethtool_regs *regs, void *p)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u16 phy_data;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	memset(p, 0, E1000_REGS_LEN * sizeof(u32));
+
+	regs->version = (1u << 24) |
+			(adapter->pdev->revision << 16) |
+			adapter->pdev->device;
+
+	regs_buff[0] = er32(CTRL);
+	regs_buff[1] = er32(STATUS);
+
+	regs_buff[2] = er32(RCTL);
+	regs_buff[3] = er32(RDLEN(0));
+	regs_buff[4] = er32(RDH(0));
+	regs_buff[5] = er32(RDT(0));
+	regs_buff[6] = er32(RDTR);
+
+	regs_buff[7] = er32(TCTL);
+	regs_buff[8] = er32(TDLEN(0));
+	regs_buff[9] = er32(TDH(0));
+	regs_buff[10] = er32(TDT(0));
+	regs_buff[11] = er32(TIDV);
+
+	regs_buff[12] = adapter->hw.phy.type;	/* PHY type (IGP=1, M88=0) */
+
+	/* ethtool doesn't use anything past this point, so all this
+	 * code is likely legacy junk for apps that may or may not exist
+	 */
+	if (hw->phy.type == e1000_phy_m88) {
+		e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+		regs_buff[13] = (u32)phy_data; /* cable length */
+		regs_buff[14] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		regs_buff[15] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		regs_buff[16] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+		regs_buff[17] = (u32)phy_data; /* extended 10bt distance */
+		regs_buff[18] = regs_buff[13]; /* cable polarity */
+		regs_buff[19] = 0;  /* Dummy (to align w/ IGP phy reg dump) */
+		regs_buff[20] = regs_buff[17]; /* polarity correction */
+		/* phy receive errors */
+		regs_buff[22] = adapter->phy_stats.receive_errors;
+		regs_buff[23] = regs_buff[13]; /* mdix mode */
+	}
+	regs_buff[21] = 0;	/* was idle_errors */
+	e1e_rphy(hw, MII_STAT1000, &phy_data);
+	regs_buff[24] = (u32)phy_data;	/* phy local receiver status */
+	regs_buff[25] = regs_buff[24];	/* phy remote receiver status */
+
+	pm_runtime_put_sync(netdev->dev.parent);
+}
+
+static int e1000_get_eeprom_len(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	return adapter->hw.nvm.word_size * 2;
+}
+
+static int e1000_get_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	int first_word;
+	int last_word;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = adapter->pdev->vendor | (adapter->pdev->device << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				    GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
+		ret_val = e1000_read_nvm(hw, first_word,
+					 last_word - first_word + 1,
+					 eeprom_buff);
+	} else {
+		for (i = 0; i < last_word - first_word + 1; i++) {
+			ret_val = e1000_read_nvm(hw, first_word + i, 1,
+						 &eeprom_buff[i]);
+			if (ret_val)
+				break;
+		}
+	}
+
+	pm_runtime_put_sync(netdev->dev.parent);
+
+	if (ret_val) {
+		/* a read error occurred, throw away the result */
+		memset(eeprom_buff, 0xff, sizeof(u16) *
+		       (last_word - first_word + 1));
+	} else {
+		/* Device's eeprom is always little-endian, word addressable */
+		for (i = 0; i < last_word - first_word + 1; i++)
+			le16_to_cpus(&eeprom_buff[i]);
+	}
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int e1000_set_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len;
+	int first_word;
+	int last_word;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if (eeprom->magic !=
+	    (adapter->pdev->vendor | (adapter->pdev->device << 16)))
+		return -EFAULT;
+
+	if (adapter->flags & FLAG_READ_ONLY_NVM)
+		return -EINVAL;
+
+	max_len = hw->nvm.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	if (eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word */
+		/* only the second byte of the word is being modified */
+		ret_val = e1000_read_nvm(hw, first_word, 1, &eeprom_buff[0]);
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && (!ret_val))
+		/* need read/modify/write of last changed EEPROM word */
+		/* only the first byte of the word is being modified */
+		ret_val = e1000_read_nvm(hw, last_word, 1,
+					 &eeprom_buff[last_word - first_word]);
+
+	if (ret_val)
+		goto out;
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = e1000_write_nvm(hw, first_word,
+				  last_word - first_word + 1, eeprom_buff);
+
+	if (ret_val)
+		goto out;
+
+	/* Update the checksum over the first part of the EEPROM if needed
+	 * and flush shadow RAM for applicable controllers
+	 */
+	if ((first_word <= NVM_CHECKSUM_REG) ||
+	    (hw->mac.type == e1000_82583) ||
+	    (hw->mac.type == e1000_82574) ||
+	    (hw->mac.type == e1000_82573))
+		ret_val = e1000e_update_nvm_checksum(hw);
+
+out:
+	pm_runtime_put_sync(netdev->dev.parent);
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void e1000_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	strscpy(drvinfo->driver, e1000e_driver_name, sizeof(drvinfo->driver));
+
+	/* EEPROM image version # is reported as firmware version # for
+	 * PCI-E controllers
+	 */
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%d.%d-%d",
+		 (adapter->eeprom_vers & 0xF000) >> 12,
+		 (adapter->eeprom_vers & 0x0FF0) >> 4,
+		 (adapter->eeprom_vers & 0x000F));
+
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+}
+
+static void e1000_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring,
+				struct kernel_ethtool_ringparam *kernel_ring,
+				struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = E1000_MAX_RXD;
+	ring->tx_max_pending = E1000_MAX_TXD;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+}
+
+static int e1000_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_ring *temp_tx = NULL, *temp_rx = NULL;
+	int err = 0, size = sizeof(struct e1000_ring);
+	bool set_tx = false, set_rx = false;
+	u16 new_rx_count, new_tx_count;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_rx_count = clamp_t(u32, ring->rx_pending, E1000_MIN_RXD,
+			       E1000_MAX_RXD);
+	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	new_tx_count = clamp_t(u32, ring->tx_pending, E1000_MIN_TXD,
+			       E1000_MAX_TXD);
+	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	if ((new_tx_count == adapter->tx_ring_count) &&
+	    (new_rx_count == adapter->rx_ring_count))
+		/* nothing to do */
+		return 0;
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		/* Set counts now and allocate resources during open() */
+		adapter->tx_ring->count = new_tx_count;
+		adapter->rx_ring->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	set_tx = (new_tx_count != adapter->tx_ring_count);
+	set_rx = (new_rx_count != adapter->rx_ring_count);
+
+	/* Allocate temporary storage for ring updates */
+	if (set_tx) {
+		temp_tx = vmalloc(size);
+		if (!temp_tx) {
+			err = -ENOMEM;
+			goto free_temp;
+		}
+	}
+	if (set_rx) {
+		temp_rx = vmalloc(size);
+		if (!temp_rx) {
+			err = -ENOMEM;
+			goto free_temp;
+		}
+	}
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	e1000e_down(adapter, true);
+
+	/* We can't just free everything and then setup again, because the
+	 * ISRs in MSI-X mode get passed pointers to the Tx and Rx ring
+	 * structs.  First, attempt to allocate new resources...
+	 */
+	if (set_tx) {
+		memcpy(temp_tx, adapter->tx_ring, size);
+		temp_tx->count = new_tx_count;
+		err = e1000e_setup_tx_resources(temp_tx);
+		if (err)
+			goto err_setup;
+	}
+	if (set_rx) {
+		memcpy(temp_rx, adapter->rx_ring, size);
+		temp_rx->count = new_rx_count;
+		err = e1000e_setup_rx_resources(temp_rx);
+		if (err)
+			goto err_setup_rx;
+	}
+
+	/* ...then free the old resources and copy back any new ring data */
+	if (set_tx) {
+		e1000e_free_tx_resources(adapter->tx_ring);
+		memcpy(adapter->tx_ring, temp_tx, size);
+		adapter->tx_ring_count = new_tx_count;
+	}
+	if (set_rx) {
+		e1000e_free_rx_resources(adapter->rx_ring);
+		memcpy(adapter->rx_ring, temp_rx, size);
+		adapter->rx_ring_count = new_rx_count;
+	}
+
+err_setup_rx:
+	if (err && set_tx)
+		e1000e_free_tx_resources(temp_tx);
+err_setup:
+	e1000e_up(adapter);
+	pm_runtime_put_sync(netdev->dev.parent);
+free_temp:
+	vfree(temp_tx);
+	vfree(temp_rx);
+clear_reset:
+	clear_bit(__E1000_RESETTING, &adapter->state);
+	return err;
+}
+
+static bool reg_pattern_test(struct e1000_adapter *adapter, u64 *data,
+			     int reg, int offset, u32 mask, u32 write)
+{
+	u32 pat, val;
+	static const u32 test[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+	};
+	for (pat = 0; pat < ARRAY_SIZE(test); pat++) {
+		E1000_WRITE_REG_ARRAY(&adapter->hw, reg, offset,
+				      (test[pat] & write));
+		val = E1000_READ_REG_ARRAY(&adapter->hw, reg, offset);
+		if (val != (test[pat] & write & mask)) {
+			e_err("pattern test failed (reg 0x%05X): got 0x%08X expected 0x%08X\n",
+			      reg + (offset << 2), val,
+			      (test[pat] & write & mask));
+			*data = reg;
+			return true;
+		}
+	}
+	return false;
+}
+
+static bool reg_set_and_check(struct e1000_adapter *adapter, u64 *data,
+			      int reg, u32 mask, u32 write)
+{
+	u32 val;
+
+	__ew32(&adapter->hw, reg, write & mask);
+	val = __er32(&adapter->hw, reg);
+	if ((write & mask) != (val & mask)) {
+		e_err("set/check test failed (reg 0x%05X): got 0x%08X expected 0x%08X\n",
+		      reg, (val & mask), (write & mask));
+		*data = reg;
+		return true;
+	}
+	return false;
+}
+
+#define REG_PATTERN_TEST_ARRAY(reg, offset, mask, write)                       \
+	do {                                                                   \
+		if (reg_pattern_test(adapter, data, reg, offset, mask, write)) \
+			return 1;                                              \
+	} while (0)
+#define REG_PATTERN_TEST(reg, mask, write)                                     \
+	REG_PATTERN_TEST_ARRAY(reg, 0, mask, write)
+
+#define REG_SET_AND_CHECK(reg, mask, write)                                    \
+	do {                                                                   \
+		if (reg_set_and_check(adapter, data, reg, mask, write))        \
+			return 1;                                              \
+	} while (0)
+
+static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+	u32 value;
+	u32 before;
+	u32 after;
+	u32 i;
+	u32 toggle;
+	u32 mask;
+	u32 wlock_mac = 0;
+
+	/* The status register is Read Only, so a write should fail.
+	 * Some bits that get toggled are ignored.  There are several bits
+	 * on newer hardware that are r/w.
+	 */
+	switch (mac->type) {
+	case e1000_82571:
+	case e1000_82572:
+	case e1000_80003es2lan:
+		toggle = 0x7FFFF3FF;
+		break;
+	default:
+		toggle = 0x7FFFF033;
+		break;
+	}
+
+	before = er32(STATUS);
+	value = (er32(STATUS) & toggle);
+	ew32(STATUS, toggle);
+	after = er32(STATUS) & toggle;
+	if (value != after) {
+		e_err("failed STATUS register test got: 0x%08X expected: 0x%08X\n",
+		      after, value);
+		*data = 1;
+		return 1;
+	}
+	/* restore previous status */
+	ew32(STATUS, before);
+
+	if (!(adapter->flags & FLAG_IS_ICH)) {
+		REG_PATTERN_TEST(E1000_FCAL, 0xFFFFFFFF, 0xFFFFFFFF);
+		REG_PATTERN_TEST(E1000_FCAH, 0x0000FFFF, 0xFFFFFFFF);
+		REG_PATTERN_TEST(E1000_FCT, 0x0000FFFF, 0xFFFFFFFF);
+		REG_PATTERN_TEST(E1000_VET, 0x0000FFFF, 0xFFFFFFFF);
+	}
+
+	REG_PATTERN_TEST(E1000_RDTR, 0x0000FFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(E1000_RDBAH(0), 0xFFFFFFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(E1000_RDLEN(0), 0x000FFF80, 0x000FFFFF);
+	REG_PATTERN_TEST(E1000_RDH(0), 0x0000FFFF, 0x0000FFFF);
+	REG_PATTERN_TEST(E1000_RDT(0), 0x0000FFFF, 0x0000FFFF);
+	REG_PATTERN_TEST(E1000_FCRTH, 0x0000FFF8, 0x0000FFF8);
+	REG_PATTERN_TEST(E1000_FCTTV, 0x0000FFFF, 0x0000FFFF);
+	REG_PATTERN_TEST(E1000_TIPG, 0x3FFFFFFF, 0x3FFFFFFF);
+	REG_PATTERN_TEST(E1000_TDBAH(0), 0xFFFFFFFF, 0xFFFFFFFF);
+	REG_PATTERN_TEST(E1000_TDLEN(0), 0x000FFF80, 0x000FFFFF);
+
+	REG_SET_AND_CHECK(E1000_RCTL, 0xFFFFFFFF, 0x00000000);
+
+	before = ((adapter->flags & FLAG_IS_ICH) ? 0x06C3B33E : 0x06DFB3FE);
+	REG_SET_AND_CHECK(E1000_RCTL, before, 0x003FFFFB);
+	REG_SET_AND_CHECK(E1000_TCTL, 0xFFFFFFFF, 0x00000000);
+
+	REG_SET_AND_CHECK(E1000_RCTL, before, 0xFFFFFFFF);
+	REG_PATTERN_TEST(E1000_RDBAL(0), 0xFFFFFFF0, 0xFFFFFFFF);
+	if (!(adapter->flags & FLAG_IS_ICH))
+		REG_PATTERN_TEST(E1000_TXCW, 0xC000FFFF, 0x0000FFFF);
+	REG_PATTERN_TEST(E1000_TDBAL(0), 0xFFFFFFF0, 0xFFFFFFFF);
+	REG_PATTERN_TEST(E1000_TIDV, 0x0000FFFF, 0x0000FFFF);
+	mask = 0x8003FFFF;
+	switch (mac->type) {
+	case e1000_ich10lan:
+	case e1000_pchlan:
+	case e1000_pch2lan:
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		mask |= BIT(18);
+		break;
+	default:
+		break;
+	}
+
+	if (mac->type >= e1000_pch_lpt)
+		wlock_mac = (er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK) >>
+		    E1000_FWSM_WLOCK_MAC_SHIFT;
+
+	for (i = 0; i < mac->rar_entry_count; i++) {
+		if (mac->type >= e1000_pch_lpt) {
+			/* Cannot test write-protected SHRAL[n] registers */
+			if ((wlock_mac == 1) || (wlock_mac && (i > wlock_mac)))
+				continue;
+
+			/* SHRAH[9] different than the others */
+			if (i == 10)
+				mask |= BIT(30);
+			else
+				mask &= ~BIT(30);
+		}
+		if (mac->type == e1000_pch2lan) {
+			/* SHRAH[0,1,2] different than previous */
+			if (i == 1)
+				mask &= 0xFFF4FFFF;
+			/* SHRAH[3] different than SHRAH[0,1,2] */
+			if (i == 4)
+				mask |= BIT(30);
+			/* RAR[1-6] owned by management engine - skipping */
+			if (i > 0)
+				i += 6;
+		}
+
+		REG_PATTERN_TEST_ARRAY(E1000_RA, ((i << 1) + 1), mask,
+				       0xFFFFFFFF);
+		/* reset index to actual value */
+		if ((mac->type == e1000_pch2lan) && (i > 6))
+			i -= 6;
+	}
+
+	for (i = 0; i < mac->mta_reg_count; i++)
+		REG_PATTERN_TEST_ARRAY(E1000_MTA, i, 0xFFFFFFFF, 0xFFFFFFFF);
+
+	*data = 0;
+
+	return 0;
+}
+
+static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data)
+{
+	u16 temp;
+	u16 checksum = 0;
+	u16 i;
+
+	*data = 0;
+	/* Read and add up the contents of the EEPROM */
+	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+		if ((e1000_read_nvm(&adapter->hw, i, 1, &temp)) < 0) {
+			*data = 1;
+			return *data;
+		}
+		checksum += temp;
+	}
+
+	/* If Checksum is not Correct return error else test passed */
+	if ((checksum != (u16)NVM_SUM) && !(*data))
+		*data = 2;
+
+	return *data;
+}
+
+static irqreturn_t e1000_test_intr(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = (struct net_device *)data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->test_icr |= er32(ICR);
+
+	return IRQ_HANDLED;
+}
+
+static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mask;
+	u32 shared_int = 1;
+	u32 irq = adapter->pdev->irq;
+	int i;
+	int ret_val = 0;
+	int int_mode = E1000E_INT_MODE_LEGACY;
+
+	*data = 0;
+
+	/* NOTE: we don't test MSI/MSI-X interrupts here, yet */
+	if (adapter->int_mode == E1000E_INT_MODE_MSIX) {
+		int_mode = adapter->int_mode;
+		e1000e_reset_interrupt_capability(adapter);
+		adapter->int_mode = E1000E_INT_MODE_LEGACY;
+		e1000e_set_interrupt_capability(adapter);
+	}
+	/* Hook up test interrupt handler just for this test */
+	if (!request_irq(irq, e1000_test_intr, IRQF_PROBE_SHARED, netdev->name,
+			 netdev)) {
+		shared_int = 0;
+	} else if (request_irq(irq, e1000_test_intr, IRQF_SHARED, netdev->name,
+			       netdev)) {
+		*data = 1;
+		ret_val = -1;
+		goto out;
+	}
+	e_info("testing %s interrupt\n", (shared_int ? "shared" : "unshared"));
+
+	/* Disable all the interrupts */
+	ew32(IMC, 0xFFFFFFFF);
+	e1e_flush();
+	usleep_range(10000, 11000);
+
+	/* Test each interrupt */
+	for (i = 0; i < 10; i++) {
+		/* Interrupt to test */
+		mask = BIT(i);
+
+		if (adapter->flags & FLAG_IS_ICH) {
+			switch (mask) {
+			case E1000_ICR_RXSEQ:
+				continue;
+			case 0x00000100:
+				if (adapter->hw.mac.type == e1000_ich8lan ||
+				    adapter->hw.mac.type == e1000_ich9lan)
+					continue;
+				break;
+			default:
+				break;
+			}
+		}
+
+		if (!shared_int) {
+			/* Disable the interrupt to be reported in
+			 * the cause register and then force the same
+			 * interrupt and see if one gets posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			ew32(IMC, mask);
+			ew32(ICS, mask);
+			e1e_flush();
+			usleep_range(10000, 11000);
+
+			if (adapter->test_icr & mask) {
+				*data = 3;
+				break;
+			}
+		}
+
+		/* Enable the interrupt to be reported in
+		 * the cause register and then force the same
+		 * interrupt and see if one gets posted.  If
+		 * an interrupt was not posted to the bus, the
+		 * test failed.
+		 */
+		adapter->test_icr = 0;
+		ew32(IMS, mask);
+		ew32(ICS, mask);
+		e1e_flush();
+		usleep_range(10000, 11000);
+
+		if (!(adapter->test_icr & mask)) {
+			*data = 4;
+			break;
+		}
+
+		if (!shared_int) {
+			/* Disable the other interrupts to be reported in
+			 * the cause register and then force the other
+			 * interrupts and see if any get posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			ew32(IMC, ~mask & 0x00007FFF);
+			ew32(ICS, ~mask & 0x00007FFF);
+			e1e_flush();
+			usleep_range(10000, 11000);
+
+			if (adapter->test_icr) {
+				*data = 5;
+				break;
+			}
+		}
+	}
+
+	/* Disable all the interrupts */
+	ew32(IMC, 0xFFFFFFFF);
+	e1e_flush();
+	usleep_range(10000, 11000);
+
+	/* Unhook test interrupt handler */
+	free_irq(irq, netdev);
+
+out:
+	if (int_mode == E1000E_INT_MODE_MSIX) {
+		e1000e_reset_interrupt_capability(adapter);
+		adapter->int_mode = int_mode;
+		e1000e_set_interrupt_capability(adapter);
+	}
+
+	return ret_val;
+}
+
+static void e1000_free_desc_rings(struct e1000_adapter *adapter)
+{
+	struct e1000_ring *tx_ring = &adapter->test_tx_ring;
+	struct e1000_ring *rx_ring = &adapter->test_rx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_buffer *buffer_info;
+	int i;
+
+	if (tx_ring->desc && tx_ring->buffer_info) {
+		for (i = 0; i < tx_ring->count; i++) {
+			buffer_info = &tx_ring->buffer_info[i];
+
+			if (buffer_info->dma)
+				dma_unmap_single(&pdev->dev,
+						 buffer_info->dma,
+						 buffer_info->length,
+						 DMA_TO_DEVICE);
+			dev_kfree_skb(buffer_info->skb);
+		}
+	}
+
+	if (rx_ring->desc && rx_ring->buffer_info) {
+		for (i = 0; i < rx_ring->count; i++) {
+			buffer_info = &rx_ring->buffer_info[i];
+
+			if (buffer_info->dma)
+				dma_unmap_single(&pdev->dev,
+						 buffer_info->dma,
+						 2048, DMA_FROM_DEVICE);
+			dev_kfree_skb(buffer_info->skb);
+		}
+	}
+
+	if (tx_ring->desc) {
+		dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
+				  tx_ring->dma);
+		tx_ring->desc = NULL;
+	}
+	if (rx_ring->desc) {
+		dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
+				  rx_ring->dma);
+		rx_ring->desc = NULL;
+	}
+
+	kfree(tx_ring->buffer_info);
+	tx_ring->buffer_info = NULL;
+	kfree(rx_ring->buffer_info);
+	rx_ring->buffer_info = NULL;
+}
+
+static int e1000_setup_desc_rings(struct e1000_adapter *adapter)
+{
+	struct e1000_ring *tx_ring = &adapter->test_tx_ring;
+	struct e1000_ring *rx_ring = &adapter->test_rx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+	int i;
+	int ret_val;
+
+	/* Setup Tx descriptor ring and Tx buffers */
+
+	if (!tx_ring->count)
+		tx_ring->count = E1000_DEFAULT_TXD;
+
+	tx_ring->buffer_info = kcalloc(tx_ring->count,
+				       sizeof(struct e1000_buffer), GFP_KERNEL);
+	if (!tx_ring->buffer_info) {
+		ret_val = 1;
+		goto err_nomem;
+	}
+
+	tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+	tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc) {
+		ret_val = 2;
+		goto err_nomem;
+	}
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	ew32(TDBAL(0), ((u64)tx_ring->dma & 0x00000000FFFFFFFF));
+	ew32(TDBAH(0), ((u64)tx_ring->dma >> 32));
+	ew32(TDLEN(0), tx_ring->count * sizeof(struct e1000_tx_desc));
+	ew32(TDH(0), 0);
+	ew32(TDT(0), 0);
+	ew32(TCTL, E1000_TCTL_PSP | E1000_TCTL_EN | E1000_TCTL_MULR |
+	     E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT |
+	     E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT);
+
+	for (i = 0; i < tx_ring->count; i++) {
+		struct e1000_tx_desc *tx_desc = E1000_TX_DESC(*tx_ring, i);
+		struct sk_buff *skb;
+		unsigned int skb_size = 1024;
+
+		skb = alloc_skb(skb_size, GFP_KERNEL);
+		if (!skb) {
+			ret_val = 3;
+			goto err_nomem;
+		}
+		skb_put(skb, skb_size);
+		tx_ring->buffer_info[i].skb = skb;
+		tx_ring->buffer_info[i].length = skb->len;
+		tx_ring->buffer_info[i].dma =
+		    dma_map_single(&pdev->dev, skb->data, skb->len,
+				   DMA_TO_DEVICE);
+		if (dma_mapping_error(&pdev->dev,
+				      tx_ring->buffer_info[i].dma)) {
+			ret_val = 4;
+			goto err_nomem;
+		}
+		tx_desc->buffer_addr = cpu_to_le64(tx_ring->buffer_info[i].dma);
+		tx_desc->lower.data = cpu_to_le32(skb->len);
+		tx_desc->lower.data |= cpu_to_le32(E1000_TXD_CMD_EOP |
+						   E1000_TXD_CMD_IFCS |
+						   E1000_TXD_CMD_RS);
+		tx_desc->upper.data = 0;
+	}
+
+	/* Setup Rx descriptor ring and Rx buffers */
+
+	if (!rx_ring->count)
+		rx_ring->count = E1000_DEFAULT_RXD;
+
+	rx_ring->buffer_info = kcalloc(rx_ring->count,
+				       sizeof(struct e1000_buffer), GFP_KERNEL);
+	if (!rx_ring->buffer_info) {
+		ret_val = 5;
+		goto err_nomem;
+	}
+
+	rx_ring->size = rx_ring->count * sizeof(union e1000_rx_desc_extended);
+	rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->desc) {
+		ret_val = 6;
+		goto err_nomem;
+	}
+	rx_ring->next_to_use = 0;
+	rx_ring->next_to_clean = 0;
+
+	rctl = er32(RCTL);
+	if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
+		ew32(RCTL, rctl & ~E1000_RCTL_EN);
+	ew32(RDBAL(0), ((u64)rx_ring->dma & 0xFFFFFFFF));
+	ew32(RDBAH(0), ((u64)rx_ring->dma >> 32));
+	ew32(RDLEN(0), rx_ring->size);
+	ew32(RDH(0), 0);
+	ew32(RDT(0), 0);
+	rctl = E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_SZ_2048 |
+	    E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_LPE |
+	    E1000_RCTL_SBP | E1000_RCTL_SECRC |
+	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+	    (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+	ew32(RCTL, rctl);
+
+	for (i = 0; i < rx_ring->count; i++) {
+		union e1000_rx_desc_extended *rx_desc;
+		struct sk_buff *skb;
+
+		skb = alloc_skb(2048 + NET_IP_ALIGN, GFP_KERNEL);
+		if (!skb) {
+			ret_val = 7;
+			goto err_nomem;
+		}
+		skb_reserve(skb, NET_IP_ALIGN);
+		rx_ring->buffer_info[i].skb = skb;
+		rx_ring->buffer_info[i].dma =
+		    dma_map_single(&pdev->dev, skb->data, 2048,
+				   DMA_FROM_DEVICE);
+		if (dma_mapping_error(&pdev->dev,
+				      rx_ring->buffer_info[i].dma)) {
+			ret_val = 8;
+			goto err_nomem;
+		}
+		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+		rx_desc->read.buffer_addr =
+		    cpu_to_le64(rx_ring->buffer_info[i].dma);
+		memset(skb->data, 0x00, skb->len);
+	}
+
+	return 0;
+
+err_nomem:
+	e1000_free_desc_rings(adapter);
+	return ret_val;
+}
+
+static void e1000_phy_disable_receiver(struct e1000_adapter *adapter)
+{
+	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
+	e1e_wphy(&adapter->hw, 29, 0x001F);
+	e1e_wphy(&adapter->hw, 30, 0x8FFC);
+	e1e_wphy(&adapter->hw, 29, 0x001A);
+	e1e_wphy(&adapter->hw, 30, 0x8FF0);
+}
+
+static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_reg = 0;
+	u16 phy_reg = 0;
+	s32 ret_val = 0;
+
+	hw->mac.autoneg = 0;
+
+	if (hw->phy.type == e1000_phy_ife) {
+		/* force 100, set loopback */
+		e1e_wphy(hw, MII_BMCR, 0x6100);
+
+		/* Now set up the MAC to the same speed/duplex as the PHY. */
+		ctrl_reg = er32(CTRL);
+		ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
+		ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
+			     E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
+			     E1000_CTRL_SPD_100 |/* Force Speed to 100 */
+			     E1000_CTRL_FD);	 /* Force Duplex to FULL */
+
+		ew32(CTRL, ctrl_reg);
+		e1e_flush();
+		usleep_range(500, 1000);
+
+		return 0;
+	}
+
+	/* Specific PHY configuration for loopback */
+	switch (hw->phy.type) {
+	case e1000_phy_m88:
+		/* Auto-MDI/MDIX Off */
+		e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
+		/* reset to update Auto-MDI/MDIX */
+		e1e_wphy(hw, MII_BMCR, 0x9140);
+		/* autoneg off */
+		e1e_wphy(hw, MII_BMCR, 0x8140);
+		break;
+	case e1000_phy_gg82563:
+		e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, 0x1CC);
+		break;
+	case e1000_phy_bm:
+		/* Set Default MAC Interface speed to 1GB */
+		e1e_rphy(hw, PHY_REG(2, 21), &phy_reg);
+		phy_reg &= ~0x0007;
+		phy_reg |= 0x006;
+		e1e_wphy(hw, PHY_REG(2, 21), phy_reg);
+		/* Assert SW reset for above settings to take effect */
+		hw->phy.ops.commit(hw);
+		usleep_range(1000, 2000);
+		/* Force Full Duplex */
+		e1e_rphy(hw, PHY_REG(769, 16), &phy_reg);
+		e1e_wphy(hw, PHY_REG(769, 16), phy_reg | 0x000C);
+		/* Set Link Up (in force link) */
+		e1e_rphy(hw, PHY_REG(776, 16), &phy_reg);
+		e1e_wphy(hw, PHY_REG(776, 16), phy_reg | 0x0040);
+		/* Force Link */
+		e1e_rphy(hw, PHY_REG(769, 16), &phy_reg);
+		e1e_wphy(hw, PHY_REG(769, 16), phy_reg | 0x0040);
+		/* Set Early Link Enable */
+		e1e_rphy(hw, PHY_REG(769, 20), &phy_reg);
+		e1e_wphy(hw, PHY_REG(769, 20), phy_reg | 0x0400);
+		break;
+	case e1000_phy_82577:
+	case e1000_phy_82578:
+		/* Workaround: K1 must be disabled for stable 1Gbps operation */
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val) {
+			e_err("Cannot setup 1Gbps loopback.\n");
+			return ret_val;
+		}
+		e1000_configure_k1_ich8lan(hw, false);
+		hw->phy.ops.release(hw);
+		break;
+	case e1000_phy_82579:
+		/* Disable PHY energy detect power down */
+		e1e_rphy(hw, PHY_REG(0, 21), &phy_reg);
+		e1e_wphy(hw, PHY_REG(0, 21), phy_reg & ~BIT(3));
+		/* Disable full chip energy detect */
+		e1e_rphy(hw, PHY_REG(776, 18), &phy_reg);
+		e1e_wphy(hw, PHY_REG(776, 18), phy_reg | 1);
+		/* Enable loopback on the PHY */
+		e1e_wphy(hw, I82577_PHY_LBK_CTRL, 0x8001);
+		break;
+	default:
+		break;
+	}
+
+	/* force 1000, set loopback */
+	e1e_wphy(hw, MII_BMCR, 0x4140);
+	msleep(250);
+
+	/* Now set up the MAC to the same speed/duplex as the PHY. */
+	ctrl_reg = er32(CTRL);
+	ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
+	ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
+		     E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
+		     E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
+		     E1000_CTRL_FD);	 /* Force Duplex to FULL */
+
+	if (adapter->flags & FLAG_IS_ICH)
+		ctrl_reg |= E1000_CTRL_SLU;	/* Set Link Up */
+
+	if (hw->phy.media_type == e1000_media_type_copper &&
+	    hw->phy.type == e1000_phy_m88) {
+		ctrl_reg |= E1000_CTRL_ILOS;	/* Invert Loss of Signal */
+	} else {
+		/* Set the ILOS bit on the fiber Nic if half duplex link is
+		 * detected.
+		 */
+		if ((er32(STATUS) & E1000_STATUS_FD) == 0)
+			ctrl_reg |= (E1000_CTRL_ILOS | E1000_CTRL_SLU);
+	}
+
+	ew32(CTRL, ctrl_reg);
+
+	/* Disable the receiver on the PHY so when a cable is plugged in, the
+	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
+	 */
+	if (hw->phy.type == e1000_phy_m88)
+		e1000_phy_disable_receiver(adapter);
+
+	usleep_range(500, 1000);
+
+	return 0;
+}
+
+static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl = er32(CTRL);
+	int link;
+
+	/* special requirements for 82571/82572 fiber adapters */
+
+	/* jump through hoops to make sure link is up because serdes
+	 * link is hardwired up
+	 */
+	ctrl |= E1000_CTRL_SLU;
+	ew32(CTRL, ctrl);
+
+	/* disable autoneg */
+	ctrl = er32(TXCW);
+	ctrl &= ~BIT(31);
+	ew32(TXCW, ctrl);
+
+	link = (er32(STATUS) & E1000_STATUS_LU);
+
+	if (!link) {
+		/* set invert loss of signal */
+		ctrl = er32(CTRL);
+		ctrl |= E1000_CTRL_ILOS;
+		ew32(CTRL, ctrl);
+	}
+
+	/* special write to serdes control register to enable SerDes analog
+	 * loopback
+	 */
+	ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK);
+	e1e_flush();
+	usleep_range(10000, 11000);
+
+	return 0;
+}
+
+/* only call this for fiber/serdes connections to es2lan */
+static int e1000_set_es2lan_mac_loopback(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrlext = er32(CTRL_EXT);
+	u32 ctrl = er32(CTRL);
+
+	/* save CTRL_EXT to restore later, reuse an empty variable (unused
+	 * on mac_type 80003es2lan)
+	 */
+	adapter->tx_fifo_head = ctrlext;
+
+	/* clear the serdes mode bits, putting the device into mac loopback */
+	ctrlext &= ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+	ew32(CTRL_EXT, ctrlext);
+
+	/* force speed to 1000/FD, link up */
+	ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+	ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX |
+		 E1000_CTRL_SPD_1000 | E1000_CTRL_FD);
+	ew32(CTRL, ctrl);
+
+	/* set mac loopback */
+	ctrl = er32(RCTL);
+	ctrl |= E1000_RCTL_LBM_MAC;
+	ew32(RCTL, ctrl);
+
+	/* set testing mode parameters (no need to reset later) */
+#define KMRNCTRLSTA_OPMODE (0x1F << 16)
+#define KMRNCTRLSTA_OPMODE_1GB_FD_GMII 0x0582
+	ew32(KMRNCTRLSTA,
+	     (KMRNCTRLSTA_OPMODE | KMRNCTRLSTA_OPMODE_1GB_FD_GMII));
+
+	return 0;
+}
+
+static int e1000_setup_loopback_test(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl, fext_nvm11, tarc0;
+
+	if (hw->mac.type >= e1000_pch_spt) {
+		fext_nvm11 = er32(FEXTNVM11);
+		fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
+		ew32(FEXTNVM11, fext_nvm11);
+		tarc0 = er32(TARC(0));
+		/* clear bits 28 & 29 (control of MULR concurrent requests) */
+		tarc0 &= 0xcfffffff;
+		/* set bit 29 (value of MULR requests is now 2) */
+		tarc0 |= 0x20000000;
+		ew32(TARC(0), tarc0);
+	}
+	if (hw->phy.media_type == e1000_media_type_fiber ||
+	    hw->phy.media_type == e1000_media_type_internal_serdes) {
+		switch (hw->mac.type) {
+		case e1000_80003es2lan:
+			return e1000_set_es2lan_mac_loopback(adapter);
+		case e1000_82571:
+		case e1000_82572:
+			return e1000_set_82571_fiber_loopback(adapter);
+		default:
+			rctl = er32(RCTL);
+			rctl |= E1000_RCTL_LBM_TCVR;
+			ew32(RCTL, rctl);
+			return 0;
+		}
+	} else if (hw->phy.media_type == e1000_media_type_copper) {
+		return e1000_integrated_phy_loopback(adapter);
+	}
+
+	return 7;
+}
+
+static void e1000_loopback_cleanup(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl, fext_nvm11, tarc0;
+	u16 phy_reg;
+
+	rctl = er32(RCTL);
+	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+	ew32(RCTL, rctl);
+
+	switch (hw->mac.type) {
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		fext_nvm11 = er32(FEXTNVM11);
+		fext_nvm11 &= ~E1000_FEXTNVM11_DISABLE_MULR_FIX;
+		ew32(FEXTNVM11, fext_nvm11);
+		tarc0 = er32(TARC(0));
+		/* clear bits 28 & 29 (control of MULR concurrent requests) */
+		/* set bit 29 (value of MULR requests is now 0) */
+		tarc0 &= 0xcfffffff;
+		ew32(TARC(0), tarc0);
+		fallthrough;
+	case e1000_80003es2lan:
+		if (hw->phy.media_type == e1000_media_type_fiber ||
+		    hw->phy.media_type == e1000_media_type_internal_serdes) {
+			/* restore CTRL_EXT, stealing space from tx_fifo_head */
+			ew32(CTRL_EXT, adapter->tx_fifo_head);
+			adapter->tx_fifo_head = 0;
+		}
+		fallthrough;
+	case e1000_82571:
+	case e1000_82572:
+		if (hw->phy.media_type == e1000_media_type_fiber ||
+		    hw->phy.media_type == e1000_media_type_internal_serdes) {
+			ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
+			e1e_flush();
+			usleep_range(10000, 11000);
+			break;
+		}
+		fallthrough;
+	default:
+		hw->mac.autoneg = 1;
+		if (hw->phy.type == e1000_phy_gg82563)
+			e1e_wphy(hw, GG82563_PHY_KMRN_MODE_CTRL, 0x180);
+		e1e_rphy(hw, MII_BMCR, &phy_reg);
+		if (phy_reg & BMCR_LOOPBACK) {
+			phy_reg &= ~BMCR_LOOPBACK;
+			e1e_wphy(hw, MII_BMCR, phy_reg);
+			if (hw->phy.ops.commit)
+				hw->phy.ops.commit(hw);
+		}
+		break;
+	}
+}
+
+static void e1000_create_lbtest_frame(struct sk_buff *skb,
+				      unsigned int frame_size)
+{
+	memset(skb->data, 0xFF, frame_size);
+	frame_size &= ~1;
+	memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1);
+	skb->data[frame_size / 2 + 10] = 0xBE;
+	skb->data[frame_size / 2 + 12] = 0xAF;
+}
+
+static int e1000_check_lbtest_frame(struct sk_buff *skb,
+				    unsigned int frame_size)
+{
+	frame_size &= ~1;
+	if (*(skb->data + 3) == 0xFF)
+		if ((*(skb->data + frame_size / 2 + 10) == 0xBE) &&
+		    (*(skb->data + frame_size / 2 + 12) == 0xAF))
+			return 0;
+	return 13;
+}
+
+static int e1000_run_loopback_test(struct e1000_adapter *adapter)
+{
+	struct e1000_ring *tx_ring = &adapter->test_tx_ring;
+	struct e1000_ring *rx_ring = &adapter->test_rx_ring;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_buffer *buffer_info;
+	int i, j, k, l;
+	int lc;
+	int good_cnt;
+	int ret_val = 0;
+	unsigned long time;
+
+	ew32(RDT(0), rx_ring->count - 1);
+
+	/* Calculate the loop count based on the largest descriptor ring
+	 * The idea is to wrap the largest ring a number of times using 64
+	 * send/receive pairs during each loop
+	 */
+
+	if (rx_ring->count <= tx_ring->count)
+		lc = ((tx_ring->count / 64) * 2) + 1;
+	else
+		lc = ((rx_ring->count / 64) * 2) + 1;
+
+	k = 0;
+	l = 0;
+	/* loop count loop */
+	for (j = 0; j <= lc; j++) {
+		/* send the packets */
+		for (i = 0; i < 64; i++) {
+			buffer_info = &tx_ring->buffer_info[k];
+
+			e1000_create_lbtest_frame(buffer_info->skb, 1024);
+			dma_sync_single_for_device(&pdev->dev,
+						   buffer_info->dma,
+						   buffer_info->length,
+						   DMA_TO_DEVICE);
+			k++;
+			if (k == tx_ring->count)
+				k = 0;
+		}
+		ew32(TDT(0), k);
+		e1e_flush();
+		msleep(200);
+		time = jiffies;	/* set the start time for the receive */
+		good_cnt = 0;
+		/* receive the sent packets */
+		do {
+			buffer_info = &rx_ring->buffer_info[l];
+
+			dma_sync_single_for_cpu(&pdev->dev,
+						buffer_info->dma, 2048,
+						DMA_FROM_DEVICE);
+
+			ret_val = e1000_check_lbtest_frame(buffer_info->skb,
+							   1024);
+			if (!ret_val)
+				good_cnt++;
+			l++;
+			if (l == rx_ring->count)
+				l = 0;
+			/* time + 20 msecs (200 msecs on 2.4) is more than
+			 * enough time to complete the receives, if it's
+			 * exceeded, break and error off
+			 */
+		} while ((good_cnt < 64) && !time_after(jiffies, time + 20));
+		if (good_cnt != 64) {
+			ret_val = 13;	/* ret_val is the same as mis-compare */
+			break;
+		}
+		if (time_after(jiffies, time + 20)) {
+			ret_val = 14;	/* error code for time out error */
+			break;
+		}
+	}
+	return ret_val;
+}
+
+static int e1000_loopback_test(struct e1000_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* PHY loopback cannot be performed if SoL/IDER sessions are active */
+	if (hw->phy.ops.check_reset_block &&
+	    hw->phy.ops.check_reset_block(hw)) {
+		e_err("Cannot do PHY loopback test when SoL/IDER is active.\n");
+		*data = 0;
+		goto out;
+	}
+
+	*data = e1000_setup_desc_rings(adapter);
+	if (*data)
+		goto out;
+
+	*data = e1000_setup_loopback_test(adapter);
+	if (*data)
+		goto err_loopback;
+
+	*data = e1000_run_loopback_test(adapter);
+	e1000_loopback_cleanup(adapter);
+
+err_loopback:
+	e1000_free_desc_rings(adapter);
+out:
+	return *data;
+}
+
+static int e1000_link_test(struct e1000_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	*data = 0;
+	if (hw->phy.media_type == e1000_media_type_internal_serdes) {
+		int i = 0;
+
+		hw->mac.serdes_has_link = false;
+
+		/* On some blade server designs, link establishment
+		 * could take as long as 2-3 minutes
+		 */
+		do {
+			hw->mac.ops.check_for_link(hw);
+			if (hw->mac.serdes_has_link)
+				return *data;
+			msleep(20);
+		} while (i++ < 3750);
+
+		*data = 1;
+	} else {
+		hw->mac.ops.check_for_link(hw);
+		if (hw->mac.autoneg)
+			/* On some Phy/switch combinations, link establishment
+			 * can take a few seconds more than expected.
+			 */
+			msleep_interruptible(5000);
+
+		if (!(er32(STATUS) & E1000_STATUS_LU))
+			*data = 1;
+	}
+	return *data;
+}
+
+static int e1000e_get_sset_count(struct net_device __always_unused *netdev,
+				 int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return E1000_TEST_LEN;
+	case ETH_SS_STATS:
+		return E1000_STATS_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return E1000E_PRIV_FLAGS_STR_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void e1000_diag_test(struct net_device *netdev,
+			    struct ethtool_test *eth_test, u64 *data)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	u16 autoneg_advertised;
+	u8 forced_speed_duplex;
+	u8 autoneg;
+	bool if_running = netif_running(netdev);
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	set_bit(__E1000_TESTING, &adapter->state);
+
+	if (!if_running) {
+		/* Get control of and reset hardware */
+		if (adapter->flags & FLAG_HAS_AMT)
+			e1000e_get_hw_control(adapter);
+
+		e1000e_power_up_phy(adapter);
+
+		adapter->hw.phy.autoneg_wait_to_complete = 1;
+		e1000e_reset(adapter);
+		adapter->hw.phy.autoneg_wait_to_complete = 0;
+	}
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		/* Offline tests */
+
+		/* save speed, duplex, autoneg settings */
+		autoneg_advertised = adapter->hw.phy.autoneg_advertised;
+		forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
+		autoneg = adapter->hw.mac.autoneg;
+
+		e_info("offline testing starting\n");
+
+		if (if_running)
+			/* indicate we're in test mode */
+			e1000e_close(netdev);
+
+		if (e1000_reg_test(adapter, &data[0]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		e1000e_reset(adapter);
+		if (e1000_eeprom_test(adapter, &data[1]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		e1000e_reset(adapter);
+		if (e1000_intr_test(adapter, &data[2]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		e1000e_reset(adapter);
+		if (e1000_loopback_test(adapter, &data[3]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* force this routine to wait until autoneg complete/timeout */
+		adapter->hw.phy.autoneg_wait_to_complete = 1;
+		e1000e_reset(adapter);
+		adapter->hw.phy.autoneg_wait_to_complete = 0;
+
+		if (e1000_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* restore speed, duplex, autoneg settings */
+		adapter->hw.phy.autoneg_advertised = autoneg_advertised;
+		adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
+		adapter->hw.mac.autoneg = autoneg;
+		e1000e_reset(adapter);
+
+		clear_bit(__E1000_TESTING, &adapter->state);
+		if (if_running)
+			e1000e_open(netdev);
+	} else {
+		/* Online tests */
+
+		e_info("online testing starting\n");
+
+		/* register, eeprom, intr and loopback tests not run online */
+		data[0] = 0;
+		data[1] = 0;
+		data[2] = 0;
+		data[3] = 0;
+
+		if (e1000_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		clear_bit(__E1000_TESTING, &adapter->state);
+	}
+
+	if (!if_running) {
+		e1000e_reset(adapter);
+
+		if (adapter->flags & FLAG_HAS_AMT)
+			e1000e_release_hw_control(adapter);
+	}
+
+	msleep_interruptible(4 * 1000);
+
+	pm_runtime_put_sync(netdev->dev.parent);
+}
+
+static void e1000_get_wol(struct net_device *netdev,
+			  struct ethtool_wolinfo *wol)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	wol->supported = 0;
+	wol->wolopts = 0;
+
+	if (!(adapter->flags & FLAG_HAS_WOL) ||
+	    !device_can_wakeup(&adapter->pdev->dev))
+		return;
+
+	wol->supported = WAKE_UCAST | WAKE_MCAST |
+	    WAKE_BCAST | WAKE_MAGIC | WAKE_PHY;
+
+	/* apply any specific unsupported masks here */
+	if (adapter->flags & FLAG_NO_WAKE_UCAST) {
+		wol->supported &= ~WAKE_UCAST;
+
+		if (adapter->wol & E1000_WUFC_EX)
+			e_err("Interface does not support directed (unicast) frame wake-up packets\n");
+	}
+
+	if (adapter->wol & E1000_WUFC_EX)
+		wol->wolopts |= WAKE_UCAST;
+	if (adapter->wol & E1000_WUFC_MC)
+		wol->wolopts |= WAKE_MCAST;
+	if (adapter->wol & E1000_WUFC_BC)
+		wol->wolopts |= WAKE_BCAST;
+	if (adapter->wol & E1000_WUFC_MAG)
+		wol->wolopts |= WAKE_MAGIC;
+	if (adapter->wol & E1000_WUFC_LNKC)
+		wol->wolopts |= WAKE_PHY;
+}
+
+static int e1000_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (!(adapter->flags & FLAG_HAS_WOL) ||
+	    !device_can_wakeup(&adapter->pdev->dev) ||
+	    (wol->wolopts & ~(WAKE_UCAST | WAKE_MCAST | WAKE_BCAST |
+			      WAKE_MAGIC | WAKE_PHY)))
+		return -EOPNOTSUPP;
+
+	/* these settings will always override what we currently have */
+	adapter->wol = 0;
+
+	if (wol->wolopts & WAKE_UCAST)
+		adapter->wol |= E1000_WUFC_EX;
+	if (wol->wolopts & WAKE_MCAST)
+		adapter->wol |= E1000_WUFC_MC;
+	if (wol->wolopts & WAKE_BCAST)
+		adapter->wol |= E1000_WUFC_BC;
+	if (wol->wolopts & WAKE_MAGIC)
+		adapter->wol |= E1000_WUFC_MAG;
+	if (wol->wolopts & WAKE_PHY)
+		adapter->wol |= E1000_WUFC_LNKC;
+
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	return 0;
+}
+
+static int e1000_set_phys_id(struct net_device *netdev,
+			     enum ethtool_phys_id_state state)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		pm_runtime_get_sync(netdev->dev.parent);
+
+		if (!hw->mac.ops.blink_led)
+			return 2;	/* cycle on/off twice per second */
+
+		hw->mac.ops.blink_led(hw);
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		if (hw->phy.type == e1000_phy_ife)
+			e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0);
+		hw->mac.ops.led_off(hw);
+		hw->mac.ops.cleanup_led(hw);
+		pm_runtime_put_sync(netdev->dev.parent);
+		break;
+
+	case ETHTOOL_ID_ON:
+		hw->mac.ops.led_on(hw);
+		break;
+
+	case ETHTOOL_ID_OFF:
+		hw->mac.ops.led_off(hw);
+		break;
+	}
+
+	return 0;
+}
+
+static int e1000_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->itr_setting <= 4)
+		ec->rx_coalesce_usecs = adapter->itr_setting;
+	else
+		ec->rx_coalesce_usecs = 1000000 / adapter->itr_setting;
+
+	return 0;
+}
+
+static int e1000_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if ((ec->rx_coalesce_usecs > E1000_MAX_ITR_USECS) ||
+	    ((ec->rx_coalesce_usecs > 4) &&
+	     (ec->rx_coalesce_usecs < E1000_MIN_ITR_USECS)) ||
+	    (ec->rx_coalesce_usecs == 2))
+		return -EINVAL;
+
+	if (ec->rx_coalesce_usecs == 4) {
+		adapter->itr_setting = 4;
+		adapter->itr = adapter->itr_setting;
+	} else if (ec->rx_coalesce_usecs <= 3) {
+		adapter->itr = 20000;
+		adapter->itr_setting = ec->rx_coalesce_usecs;
+	} else {
+		adapter->itr = (1000000 / ec->rx_coalesce_usecs);
+		adapter->itr_setting = adapter->itr & ~3;
+	}
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	if (adapter->itr_setting != 0)
+		e1000e_write_itr(adapter, adapter->itr);
+	else
+		e1000e_write_itr(adapter, 0);
+
+	pm_runtime_put_sync(netdev->dev.parent);
+
+	return 0;
+}
+
+static int e1000_nway_reset(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (!netif_running(netdev))
+		return -EAGAIN;
+
+	if (!adapter->hw.mac.autoneg)
+		return -EINVAL;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+	e1000e_reinit_locked(adapter);
+	pm_runtime_put_sync(netdev->dev.parent);
+
+	return 0;
+}
+
+static void e1000_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats __always_unused *stats,
+				    u64 *data)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct rtnl_link_stats64 net_stats;
+	int i;
+	char *p = NULL;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	dev_get_stats(netdev, &net_stats);
+
+	pm_runtime_put_sync(netdev->dev.parent);
+
+	for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
+		switch (e1000_gstrings_stats[i].type) {
+		case NETDEV_STATS:
+			p = (char *)&net_stats +
+			    e1000_gstrings_stats[i].stat_offset;
+			break;
+		case E1000_STATS:
+			p = (char *)adapter +
+			    e1000_gstrings_stats[i].stat_offset;
+			break;
+		default:
+			data[i] = 0;
+			continue;
+		}
+
+		data[i] = (e1000_gstrings_stats[i].sizeof_stat ==
+			   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+}
+
+static void e1000_get_strings(struct net_device __always_unused *netdev,
+			      u32 stringset, u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, e1000_gstrings_test, sizeof(e1000_gstrings_test));
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, e1000_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, e1000e_priv_flags_strings,
+		       E1000E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static int e1000_get_rxnfc(struct net_device *netdev,
+			   struct ethtool_rxnfc *info,
+			   u32 __always_unused *rule_locs)
+{
+	info->data = 0;
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXFH: {
+		struct e1000_adapter *adapter = netdev_priv(netdev);
+		struct e1000_hw *hw = &adapter->hw;
+		u32 mrqc;
+
+		pm_runtime_get_sync(netdev->dev.parent);
+		mrqc = er32(MRQC);
+		pm_runtime_put_sync(netdev->dev.parent);
+
+		if (!(mrqc & E1000_MRQC_RSS_FIELD_MASK))
+			return 0;
+
+		switch (info->flow_type) {
+		case TCP_V4_FLOW:
+			if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
+				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+			fallthrough;
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+		case AH_ESP_V4_FLOW:
+		case IPV4_FLOW:
+			if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
+				info->data |= RXH_IP_SRC | RXH_IP_DST;
+			break;
+		case TCP_V6_FLOW:
+			if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
+				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+			fallthrough;
+		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
+		case AH_ESP_V6_FLOW:
+		case IPV6_FLOW:
+			if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
+				info->data |= RXH_IP_SRC | RXH_IP_DST;
+			break;
+		default:
+			break;
+		}
+		return 0;
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int e1000e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 cap_addr, lpa_addr, pcs_stat_addr, phy_data;
+	u32 ret_val;
+
+	if (!(adapter->flags2 & FLAG2_HAS_EEE))
+		return -EOPNOTSUPP;
+
+	switch (hw->phy.type) {
+	case e1000_phy_82579:
+		cap_addr = I82579_EEE_CAPABILITY;
+		lpa_addr = I82579_EEE_LP_ABILITY;
+		pcs_stat_addr = I82579_EEE_PCS_STATUS;
+		break;
+	case e1000_phy_i217:
+		cap_addr = I217_EEE_CAPABILITY;
+		lpa_addr = I217_EEE_LP_ABILITY;
+		pcs_stat_addr = I217_EEE_PCS_STATUS;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val) {
+		pm_runtime_put_sync(netdev->dev.parent);
+		return -EBUSY;
+	}
+
+	/* EEE Capability */
+	ret_val = e1000_read_emi_reg_locked(hw, cap_addr, &phy_data);
+	if (ret_val)
+		goto release;
+	edata->supported = mmd_eee_cap_to_ethtool_sup_t(phy_data);
+
+	/* EEE Advertised */
+	edata->advertised = mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+
+	/* EEE Link Partner Advertised */
+	ret_val = e1000_read_emi_reg_locked(hw, lpa_addr, &phy_data);
+	if (ret_val)
+		goto release;
+	edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+
+	/* EEE PCS Status */
+	ret_val = e1000_read_emi_reg_locked(hw, pcs_stat_addr, &phy_data);
+	if (ret_val)
+		goto release;
+	if (hw->phy.type == e1000_phy_82579)
+		phy_data <<= 8;
+
+	/* Result of the EEE auto negotiation - there is no register that
+	 * has the status of the EEE negotiation so do a best-guess based
+	 * on whether Tx or Rx LPI indications have been received.
+	 */
+	if (phy_data & (E1000_EEE_TX_LPI_RCVD | E1000_EEE_RX_LPI_RCVD))
+		edata->eee_active = true;
+
+	edata->eee_enabled = !hw->dev_spec.ich8lan.eee_disable;
+	edata->tx_lpi_enabled = true;
+	edata->tx_lpi_timer = er32(LPIC) >> E1000_LPIC_LPIET_SHIFT;
+
+release:
+	hw->phy.ops.release(hw);
+	if (ret_val)
+		ret_val = -ENODATA;
+
+	pm_runtime_put_sync(netdev->dev.parent);
+
+	return ret_val;
+}
+
+static int e1000e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct ethtool_eee eee_curr;
+	s32 ret_val;
+
+	ret_val = e1000e_get_eee(netdev, &eee_curr);
+	if (ret_val)
+		return ret_val;
+
+	if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) {
+		e_err("Setting EEE tx-lpi is not supported\n");
+		return -EINVAL;
+	}
+
+	if (eee_curr.tx_lpi_timer != edata->tx_lpi_timer) {
+		e_err("Setting EEE Tx LPI timer is not supported\n");
+		return -EINVAL;
+	}
+
+	if (edata->advertised & ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) {
+		e_err("EEE advertisement supports only 100TX and/or 1000T full-duplex\n");
+		return -EINVAL;
+	}
+
+	adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+
+	hw->dev_spec.ich8lan.eee_disable = !edata->eee_enabled;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	/* reset the link */
+	if (netif_running(netdev))
+		e1000e_reinit_locked(adapter);
+	else
+		e1000e_reset(adapter);
+
+	pm_runtime_put_sync(netdev->dev.parent);
+
+	return 0;
+}
+
+static int e1000e_get_ts_info(struct net_device *netdev,
+			      struct ethtool_ts_info *info)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	ethtool_op_get_ts_info(netdev, info);
+
+	if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP))
+		return 0;
+
+	info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE |
+				  SOF_TIMESTAMPING_RX_HARDWARE |
+				  SOF_TIMESTAMPING_RAW_HARDWARE);
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
+
+	info->rx_filters = (BIT(HWTSTAMP_FILTER_NONE) |
+			    BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+			    BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) |
+			    BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) |
+			    BIT(HWTSTAMP_FILTER_ALL));
+
+	if (adapter->ptp_clock)
+		info->phc_index = ptp_clock_index(adapter->ptp_clock);
+
+	return 0;
+}
+
+static u32 e1000e_get_priv_flags(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
+		priv_flags |= E1000E_PRIV_FLAGS_S0IX_ENABLED;
+
+	return priv_flags;
+}
+
+static int e1000e_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags2 = adapter->flags2;
+
+	flags2 &= ~FLAG2_ENABLE_S0IX_FLOWS;
+	if (priv_flags & E1000E_PRIV_FLAGS_S0IX_ENABLED) {
+		struct e1000_hw *hw = &adapter->hw;
+
+		if (hw->mac.type < e1000_pch_cnp)
+			return -EINVAL;
+		flags2 |= FLAG2_ENABLE_S0IX_FLOWS;
+	}
+
+	if (flags2 != adapter->flags2)
+		adapter->flags2 = flags2;
+
+	return 0;
+}
+
+static const struct ethtool_ops e1000_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS,
+	.get_drvinfo		= e1000_get_drvinfo,
+	.get_regs_len		= e1000_get_regs_len,
+	.get_regs		= e1000_get_regs,
+	.get_wol		= e1000_get_wol,
+	.set_wol		= e1000_set_wol,
+	.get_msglevel		= e1000_get_msglevel,
+	.set_msglevel		= e1000_set_msglevel,
+	.nway_reset		= e1000_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= e1000_get_eeprom_len,
+	.get_eeprom		= e1000_get_eeprom,
+	.set_eeprom		= e1000_set_eeprom,
+	.get_ringparam		= e1000_get_ringparam,
+	.set_ringparam		= e1000_set_ringparam,
+	.get_pauseparam		= e1000_get_pauseparam,
+	.set_pauseparam		= e1000_set_pauseparam,
+	.self_test		= e1000_diag_test,
+	.get_strings		= e1000_get_strings,
+	.set_phys_id		= e1000_set_phys_id,
+	.get_ethtool_stats	= e1000_get_ethtool_stats,
+	.get_sset_count		= e1000e_get_sset_count,
+	.get_coalesce		= e1000_get_coalesce,
+	.set_coalesce		= e1000_set_coalesce,
+	.get_rxnfc		= e1000_get_rxnfc,
+	.get_ts_info		= e1000e_get_ts_info,
+	.get_eee		= e1000e_get_eee,
+	.set_eee		= e1000e_set_eee,
+	.get_link_ksettings	= e1000_get_link_ksettings,
+	.set_link_ksettings	= e1000_set_link_ksettings,
+	.get_priv_flags		= e1000e_get_priv_flags,
+	.set_priv_flags		= e1000e_set_priv_flags,
+};
+
+void e1000e_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &e1000_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h
new file mode 100644
index 0000000000..1fef6bb5a5
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/hw.h
@@ -0,0 +1,738 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_HW_H_
+#define _E1000E_HW_H_
+
+#include "regs.h"
+#include "defines.h"
+
+struct e1000_hw;
+
+#define E1000_DEV_ID_82571EB_COPPER		0x105E
+#define E1000_DEV_ID_82571EB_FIBER		0x105F
+#define E1000_DEV_ID_82571EB_SERDES		0x1060
+#define E1000_DEV_ID_82571EB_QUAD_COPPER	0x10A4
+#define E1000_DEV_ID_82571PT_QUAD_COPPER	0x10D5
+#define E1000_DEV_ID_82571EB_QUAD_FIBER		0x10A5
+#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP	0x10BC
+#define E1000_DEV_ID_82571EB_SERDES_DUAL	0x10D9
+#define E1000_DEV_ID_82571EB_SERDES_QUAD	0x10DA
+#define E1000_DEV_ID_82572EI_COPPER		0x107D
+#define E1000_DEV_ID_82572EI_FIBER		0x107E
+#define E1000_DEV_ID_82572EI_SERDES		0x107F
+#define E1000_DEV_ID_82572EI			0x10B9
+#define E1000_DEV_ID_82573E			0x108B
+#define E1000_DEV_ID_82573E_IAMT		0x108C
+#define E1000_DEV_ID_82573L			0x109A
+#define E1000_DEV_ID_82574L			0x10D3
+#define E1000_DEV_ID_82574LA			0x10F6
+#define E1000_DEV_ID_82583V			0x150C
+#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT	0x1096
+#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT	0x1098
+#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT	0x10BA
+#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT	0x10BB
+#define E1000_DEV_ID_ICH8_82567V_3		0x1501
+#define E1000_DEV_ID_ICH8_IGP_M_AMT		0x1049
+#define E1000_DEV_ID_ICH8_IGP_AMT		0x104A
+#define E1000_DEV_ID_ICH8_IGP_C			0x104B
+#define E1000_DEV_ID_ICH8_IFE			0x104C
+#define E1000_DEV_ID_ICH8_IFE_GT		0x10C4
+#define E1000_DEV_ID_ICH8_IFE_G			0x10C5
+#define E1000_DEV_ID_ICH8_IGP_M			0x104D
+#define E1000_DEV_ID_ICH9_IGP_AMT		0x10BD
+#define E1000_DEV_ID_ICH9_BM			0x10E5
+#define E1000_DEV_ID_ICH9_IGP_M_AMT		0x10F5
+#define E1000_DEV_ID_ICH9_IGP_M			0x10BF
+#define E1000_DEV_ID_ICH9_IGP_M_V		0x10CB
+#define E1000_DEV_ID_ICH9_IGP_C			0x294C
+#define E1000_DEV_ID_ICH9_IFE			0x10C0
+#define E1000_DEV_ID_ICH9_IFE_GT		0x10C3
+#define E1000_DEV_ID_ICH9_IFE_G			0x10C2
+#define E1000_DEV_ID_ICH10_R_BM_LM		0x10CC
+#define E1000_DEV_ID_ICH10_R_BM_LF		0x10CD
+#define E1000_DEV_ID_ICH10_R_BM_V		0x10CE
+#define E1000_DEV_ID_ICH10_D_BM_LM		0x10DE
+#define E1000_DEV_ID_ICH10_D_BM_LF		0x10DF
+#define E1000_DEV_ID_ICH10_D_BM_V		0x1525
+#define E1000_DEV_ID_PCH_M_HV_LM		0x10EA
+#define E1000_DEV_ID_PCH_M_HV_LC		0x10EB
+#define E1000_DEV_ID_PCH_D_HV_DM		0x10EF
+#define E1000_DEV_ID_PCH_D_HV_DC		0x10F0
+#define E1000_DEV_ID_PCH2_LV_LM			0x1502
+#define E1000_DEV_ID_PCH2_LV_V			0x1503
+#define E1000_DEV_ID_PCH_LPT_I217_LM		0x153A
+#define E1000_DEV_ID_PCH_LPT_I217_V		0x153B
+#define E1000_DEV_ID_PCH_LPTLP_I218_LM		0x155A
+#define E1000_DEV_ID_PCH_LPTLP_I218_V		0x1559
+#define E1000_DEV_ID_PCH_I218_LM2		0x15A0
+#define E1000_DEV_ID_PCH_I218_V2		0x15A1
+#define E1000_DEV_ID_PCH_I218_LM3		0x15A2	/* Wildcat Point PCH */
+#define E1000_DEV_ID_PCH_I218_V3		0x15A3	/* Wildcat Point PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM		0x156F	/* SPT PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_V		0x1570	/* SPT PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM2		0x15B7	/* SPT-H PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_V2		0x15B8	/* SPT-H PCH */
+#define E1000_DEV_ID_PCH_LBG_I219_LM3		0x15B9	/* LBG PCH */
+#define E1000_DEV_ID_PCH_SPT_I219_LM4		0x15D7
+#define E1000_DEV_ID_PCH_SPT_I219_V4		0x15D8
+#define E1000_DEV_ID_PCH_SPT_I219_LM5		0x15E3
+#define E1000_DEV_ID_PCH_SPT_I219_V5		0x15D6
+#define E1000_DEV_ID_PCH_CNP_I219_LM6		0x15BD
+#define E1000_DEV_ID_PCH_CNP_I219_V6		0x15BE
+#define E1000_DEV_ID_PCH_CNP_I219_LM7		0x15BB
+#define E1000_DEV_ID_PCH_CNP_I219_V7		0x15BC
+#define E1000_DEV_ID_PCH_ICP_I219_LM8		0x15DF
+#define E1000_DEV_ID_PCH_ICP_I219_V8		0x15E0
+#define E1000_DEV_ID_PCH_ICP_I219_LM9		0x15E1
+#define E1000_DEV_ID_PCH_ICP_I219_V9		0x15E2
+#define E1000_DEV_ID_PCH_CMP_I219_LM10		0x0D4E
+#define E1000_DEV_ID_PCH_CMP_I219_V10		0x0D4F
+#define E1000_DEV_ID_PCH_CMP_I219_LM11		0x0D4C
+#define E1000_DEV_ID_PCH_CMP_I219_V11		0x0D4D
+#define E1000_DEV_ID_PCH_CMP_I219_LM12		0x0D53
+#define E1000_DEV_ID_PCH_CMP_I219_V12		0x0D55
+#define E1000_DEV_ID_PCH_TGP_I219_LM13		0x15FB
+#define E1000_DEV_ID_PCH_TGP_I219_V13		0x15FC
+#define E1000_DEV_ID_PCH_TGP_I219_LM14		0x15F9
+#define E1000_DEV_ID_PCH_TGP_I219_V14		0x15FA
+#define E1000_DEV_ID_PCH_TGP_I219_LM15		0x15F4
+#define E1000_DEV_ID_PCH_TGP_I219_V15		0x15F5
+#define E1000_DEV_ID_PCH_RPL_I219_LM23		0x0DC5
+#define E1000_DEV_ID_PCH_RPL_I219_V23		0x0DC6
+#define E1000_DEV_ID_PCH_ADP_I219_LM16		0x1A1E
+#define E1000_DEV_ID_PCH_ADP_I219_V16		0x1A1F
+#define E1000_DEV_ID_PCH_ADP_I219_LM17		0x1A1C
+#define E1000_DEV_ID_PCH_ADP_I219_V17		0x1A1D
+#define E1000_DEV_ID_PCH_RPL_I219_LM22		0x0DC7
+#define E1000_DEV_ID_PCH_RPL_I219_V22		0x0DC8
+#define E1000_DEV_ID_PCH_MTP_I219_LM18		0x550A
+#define E1000_DEV_ID_PCH_MTP_I219_V18		0x550B
+#define E1000_DEV_ID_PCH_MTP_I219_LM19		0x550C
+#define E1000_DEV_ID_PCH_MTP_I219_V19		0x550D
+#define E1000_DEV_ID_PCH_LNP_I219_LM20		0x550E
+#define E1000_DEV_ID_PCH_LNP_I219_V20		0x550F
+#define E1000_DEV_ID_PCH_LNP_I219_LM21		0x5510
+#define E1000_DEV_ID_PCH_LNP_I219_V21		0x5511
+#define E1000_DEV_ID_PCH_ARL_I219_LM24		0x57A0
+#define E1000_DEV_ID_PCH_ARL_I219_V24		0x57A1
+#define E1000_DEV_ID_PCH_PTP_I219_LM25		0x57B3
+#define E1000_DEV_ID_PCH_PTP_I219_V25		0x57B4
+#define E1000_DEV_ID_PCH_PTP_I219_LM26		0x57B5
+#define E1000_DEV_ID_PCH_PTP_I219_V26		0x57B6
+#define E1000_DEV_ID_PCH_PTP_I219_LM27		0x57B7
+#define E1000_DEV_ID_PCH_PTP_I219_V27		0x57B8
+#define E1000_DEV_ID_PCH_NVL_I219_LM29		0x57B9
+#define E1000_DEV_ID_PCH_NVL_I219_V29		0x57BA
+
+#define E1000_REVISION_4	4
+
+#define E1000_FUNC_1		1
+
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0	0
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1	3
+
+enum e1000_mac_type {
+	e1000_82571,
+	e1000_82572,
+	e1000_82573,
+	e1000_82574,
+	e1000_82583,
+	e1000_80003es2lan,
+	e1000_ich8lan,
+	e1000_ich9lan,
+	e1000_ich10lan,
+	e1000_pchlan,
+	e1000_pch2lan,
+	e1000_pch_lpt,
+	e1000_pch_spt,
+	e1000_pch_cnp,
+	e1000_pch_tgp,
+	e1000_pch_adp,
+	e1000_pch_mtp,
+	e1000_pch_lnp,
+	e1000_pch_ptp,
+	e1000_pch_nvp,
+};
+
+enum e1000_media_type {
+	e1000_media_type_unknown = 0,
+	e1000_media_type_copper = 1,
+	e1000_media_type_fiber = 2,
+	e1000_media_type_internal_serdes = 3,
+	e1000_num_media_types
+};
+
+enum e1000_nvm_type {
+	e1000_nvm_unknown = 0,
+	e1000_nvm_none,
+	e1000_nvm_eeprom_spi,
+	e1000_nvm_flash_hw,
+	e1000_nvm_flash_sw
+};
+
+enum e1000_nvm_override {
+	e1000_nvm_override_none = 0,
+	e1000_nvm_override_spi_small,
+	e1000_nvm_override_spi_large
+};
+
+enum e1000_phy_type {
+	e1000_phy_unknown = 0,
+	e1000_phy_none,
+	e1000_phy_m88,
+	e1000_phy_igp,
+	e1000_phy_igp_2,
+	e1000_phy_gg82563,
+	e1000_phy_igp_3,
+	e1000_phy_ife,
+	e1000_phy_bm,
+	e1000_phy_82578,
+	e1000_phy_82577,
+	e1000_phy_82579,
+	e1000_phy_i217,
+};
+
+enum e1000_bus_width {
+	e1000_bus_width_unknown = 0,
+	e1000_bus_width_pcie_x1,
+	e1000_bus_width_pcie_x2,
+	e1000_bus_width_pcie_x4 = 4,
+	e1000_bus_width_pcie_x8 = 8,
+	e1000_bus_width_32,
+	e1000_bus_width_64,
+	e1000_bus_width_reserved
+};
+
+enum e1000_1000t_rx_status {
+	e1000_1000t_rx_status_not_ok = 0,
+	e1000_1000t_rx_status_ok,
+	e1000_1000t_rx_status_undefined = 0xFF
+};
+
+enum e1000_rev_polarity {
+	e1000_rev_polarity_normal = 0,
+	e1000_rev_polarity_reversed,
+	e1000_rev_polarity_undefined = 0xFF
+};
+
+enum e1000_fc_mode {
+	e1000_fc_none = 0,
+	e1000_fc_rx_pause,
+	e1000_fc_tx_pause,
+	e1000_fc_full,
+	e1000_fc_default = 0xFF
+};
+
+enum e1000_ms_type {
+	e1000_ms_hw_default = 0,
+	e1000_ms_force_master,
+	e1000_ms_force_slave,
+	e1000_ms_auto
+};
+
+enum e1000_smart_speed {
+	e1000_smart_speed_default = 0,
+	e1000_smart_speed_on,
+	e1000_smart_speed_off
+};
+
+enum e1000_serdes_link_state {
+	e1000_serdes_link_down = 0,
+	e1000_serdes_link_autoneg_progress,
+	e1000_serdes_link_autoneg_complete,
+	e1000_serdes_link_forced_up
+};
+
+/* Receive Descriptor - Extended */
+union e1000_rx_desc_extended {
+	struct {
+		__le64 buffer_addr;
+		__le64 reserved;
+	} read;
+	struct {
+		struct {
+			__le32 mrq;	      /* Multiple Rx Queues */
+			union {
+				__le32 rss;	    /* RSS Hash */
+				struct {
+					__le16 ip_id;  /* IP id */
+					__le16 csum;   /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error;     /* ext status/error */
+			__le16 length;
+			__le16 vlan;	     /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+#define MAX_PS_BUFFERS 4
+
+/* Number of packet split data buffers (not including the header buffer) */
+#define PS_PAGE_BUFFERS	(MAX_PS_BUFFERS - 1)
+
+/* Receive Descriptor - Packet Split */
+union e1000_rx_desc_packet_split {
+	struct {
+		/* one buffer for protocol header(s), three data buffers */
+		__le64 buffer_addr[MAX_PS_BUFFERS];
+	} read;
+	struct {
+		struct {
+			__le32 mrq;	      /* Multiple Rx Queues */
+			union {
+				__le32 rss;	      /* RSS Hash */
+				struct {
+					__le16 ip_id;    /* IP id */
+					__le16 csum;     /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error;     /* ext status/error */
+			__le16 length0;	  /* length of buffer 0 */
+			__le16 vlan;	     /* VLAN tag */
+		} middle;
+		struct {
+			__le16 header_status;
+			/* length of buffers 1-3 */
+			__le16 length[PS_PAGE_BUFFERS];
+		} upper;
+		__le64 reserved;
+	} wb; /* writeback */
+};
+
+/* Transmit Descriptor */
+struct e1000_tx_desc {
+	__le64 buffer_addr;      /* Address of the descriptor's data buffer */
+	union {
+		__le32 data;
+		struct {
+			__le16 length;    /* Data buffer length */
+			u8 cso;	/* Checksum offset */
+			u8 cmd;	/* Descriptor control */
+		} flags;
+	} lower;
+	union {
+		__le32 data;
+		struct {
+			u8 status;     /* Descriptor status */
+			u8 css;	/* Checksum start */
+			__le16 special;
+		} fields;
+	} upper;
+};
+
+/* Offload Context Descriptor */
+struct e1000_context_desc {
+	union {
+		__le32 ip_config;
+		struct {
+			u8 ipcss;      /* IP checksum start */
+			u8 ipcso;      /* IP checksum offset */
+			__le16 ipcse;     /* IP checksum end */
+		} ip_fields;
+	} lower_setup;
+	union {
+		__le32 tcp_config;
+		struct {
+			u8 tucss;      /* TCP checksum start */
+			u8 tucso;      /* TCP checksum offset */
+			__le16 tucse;     /* TCP checksum end */
+		} tcp_fields;
+	} upper_setup;
+	__le32 cmd_and_length;
+	union {
+		__le32 data;
+		struct {
+			u8 status;     /* Descriptor status */
+			u8 hdr_len;    /* Header length */
+			__le16 mss;       /* Maximum segment size */
+		} fields;
+	} tcp_seg_setup;
+};
+
+/* Offload data descriptor */
+struct e1000_data_desc {
+	__le64 buffer_addr;   /* Address of the descriptor's buffer address */
+	union {
+		__le32 data;
+		struct {
+			__le16 length;    /* Data buffer length */
+			u8 typ_len_ext;
+			u8 cmd;
+		} flags;
+	} lower;
+	union {
+		__le32 data;
+		struct {
+			u8 status;     /* Descriptor status */
+			u8 popts;      /* Packet Options */
+			__le16 special;
+		} fields;
+	} upper;
+};
+
+/* Statistics counters collected by the MAC */
+struct e1000_hw_stats {
+	u64 crcerrs;
+	u64 algnerrc;
+	u64 symerrs;
+	u64 rxerrc;
+	u64 mpc;
+	u64 scc;
+	u64 ecol;
+	u64 mcc;
+	u64 latecol;
+	u64 colc;
+	u64 dc;
+	u64 tncrs;
+	u64 sec;
+	u64 cexterr;
+	u64 rlec;
+	u64 xonrxc;
+	u64 xontxc;
+	u64 xoffrxc;
+	u64 xofftxc;
+	u64 fcruc;
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc;
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mgprc;
+	u64 mgpdc;
+	u64 mgptc;
+	u64 tor;
+	u64 tot;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 tsctc;
+	u64 tsctfc;
+	u64 iac;
+	u64 icrxptc;
+	u64 icrxatc;
+	u64 ictxptc;
+	u64 ictxatc;
+	u64 ictxqec;
+	u64 ictxqmtc;
+	u64 icrxdmtc;
+	u64 icrxoc;
+};
+
+struct e1000_phy_stats {
+	u32 idle_errors;
+	u32 receive_errors;
+};
+
+struct e1000_host_mng_dhcp_cookie {
+	u32 signature;
+	u8 status;
+	u8 reserved0;
+	u16 vlan_id;
+	u32 reserved1;
+	u16 reserved2;
+	u8 reserved3;
+	u8 checksum;
+};
+
+/* Host Interface "Rev 1" */
+struct e1000_host_command_header {
+	u8 command_id;
+	u8 command_length;
+	u8 command_options;
+	u8 checksum;
+};
+
+#define E1000_HI_MAX_DATA_LENGTH	252
+struct e1000_host_command_info {
+	struct e1000_host_command_header command_header;
+	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
+};
+
+/* Host Interface "Rev 2" */
+struct e1000_host_mng_command_header {
+	u8 command_id;
+	u8 checksum;
+	u16 reserved1;
+	u16 reserved2;
+	u16 command_length;
+};
+
+#define E1000_HI_MAX_MNG_DATA_LENGTH	0x6F8
+struct e1000_host_mng_command_info {
+	struct e1000_host_mng_command_header command_header;
+	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
+};
+
+#include "mac.h"
+#include "phy.h"
+#include "nvm.h"
+#include "manage.h"
+
+/* Function pointers for the MAC. */
+struct e1000_mac_operations {
+	s32  (*id_led_init)(struct e1000_hw *);
+	s32  (*blink_led)(struct e1000_hw *);
+	bool (*check_mng_mode)(struct e1000_hw *);
+	s32  (*check_for_link)(struct e1000_hw *);
+	s32  (*cleanup_led)(struct e1000_hw *);
+	void (*clear_hw_cntrs)(struct e1000_hw *);
+	void (*clear_vfta)(struct e1000_hw *);
+	s32  (*get_bus_info)(struct e1000_hw *);
+	void (*set_lan_id)(struct e1000_hw *);
+	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
+	s32  (*led_on)(struct e1000_hw *);
+	s32  (*led_off)(struct e1000_hw *);
+	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32);
+	s32  (*reset_hw)(struct e1000_hw *);
+	s32  (*init_hw)(struct e1000_hw *);
+	s32  (*setup_link)(struct e1000_hw *);
+	s32  (*setup_physical_interface)(struct e1000_hw *);
+	s32  (*setup_led)(struct e1000_hw *);
+	void (*write_vfta)(struct e1000_hw *, u32, u32);
+	void (*config_collision_dist)(struct e1000_hw *);
+	int  (*rar_set)(struct e1000_hw *, u8 *, u32);
+	s32  (*read_mac_addr)(struct e1000_hw *);
+	u32  (*rar_get_count)(struct e1000_hw *);
+};
+
+/* When to use various PHY register access functions:
+ *
+ *                 Func   Caller
+ *   Function      Does   Does    When to use
+ *   ~~~~~~~~~~~~  ~~~~~  ~~~~~~  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *   X_reg         L,P,A  n/a     for simple PHY reg accesses
+ *   X_reg_locked  P,A    L       for multiple accesses of different regs
+ *                                on different pages
+ *   X_reg_page    A      L,P     for multiple accesses of different regs
+ *                                on the same page
+ *
+ * Where X=[read|write], L=locking, P=sets page, A=register access
+ *
+ */
+struct e1000_phy_operations {
+	s32  (*acquire)(struct e1000_hw *);
+	s32  (*cfg_on_link_up)(struct e1000_hw *);
+	s32  (*check_polarity)(struct e1000_hw *);
+	s32  (*check_reset_block)(struct e1000_hw *);
+	s32  (*commit)(struct e1000_hw *);
+	s32  (*force_speed_duplex)(struct e1000_hw *);
+	s32  (*get_cfg_done)(struct e1000_hw *hw);
+	s32  (*get_cable_length)(struct e1000_hw *);
+	s32  (*get_info)(struct e1000_hw *);
+	s32  (*set_page)(struct e1000_hw *, u16);
+	s32  (*read_reg)(struct e1000_hw *, u32, u16 *);
+	s32  (*read_reg_locked)(struct e1000_hw *, u32, u16 *);
+	s32  (*read_reg_page)(struct e1000_hw *, u32, u16 *);
+	void (*release)(struct e1000_hw *);
+	s32  (*reset)(struct e1000_hw *);
+	s32  (*set_d0_lplu_state)(struct e1000_hw *, bool);
+	s32  (*set_d3_lplu_state)(struct e1000_hw *, bool);
+	s32  (*write_reg)(struct e1000_hw *, u32, u16);
+	s32  (*write_reg_locked)(struct e1000_hw *, u32, u16);
+	s32  (*write_reg_page)(struct e1000_hw *, u32, u16);
+	void (*power_up)(struct e1000_hw *);
+	void (*power_down)(struct e1000_hw *);
+};
+
+/* Function pointers for the NVM. */
+struct e1000_nvm_operations {
+	s32  (*acquire)(struct e1000_hw *);
+	s32  (*read)(struct e1000_hw *, u16, u16, u16 *);
+	void (*release)(struct e1000_hw *);
+	void (*reload)(struct e1000_hw *);
+	s32  (*update)(struct e1000_hw *);
+	s32  (*valid_led_default)(struct e1000_hw *, u16 *);
+	s32  (*validate)(struct e1000_hw *);
+	s32  (*write)(struct e1000_hw *, u16, u16, u16 *);
+};
+
+struct e1000_mac_info {
+	struct e1000_mac_operations ops;
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+
+	enum e1000_mac_type type;
+
+	u32 collision_delta;
+	u32 ledctl_default;
+	u32 ledctl_mode1;
+	u32 ledctl_mode2;
+	u32 mc_filter_type;
+	u32 tx_packet_delta;
+	u32 txcw;
+
+	u16 current_ifs_val;
+	u16 ifs_max_val;
+	u16 ifs_min_val;
+	u16 ifs_ratio;
+	u16 ifs_step_size;
+	u16 mta_reg_count;
+
+	/* Maximum size of the MTA register table in all supported adapters */
+#define MAX_MTA_REG 128
+	u32 mta_shadow[MAX_MTA_REG];
+	u16 rar_entry_count;
+
+	u8 forced_speed_duplex;
+
+	bool adaptive_ifs;
+	bool has_fwsm;
+	bool arc_subsystem_valid;
+	bool autoneg;
+	bool autoneg_failed;
+	bool get_link_status;
+	bool in_ifs_mode;
+	bool serdes_has_link;
+	bool tx_pkt_filtering;
+	enum e1000_serdes_link_state serdes_link_state;
+};
+
+struct e1000_phy_info {
+	struct e1000_phy_operations ops;
+
+	enum e1000_phy_type type;
+
+	enum e1000_1000t_rx_status local_rx;
+	enum e1000_1000t_rx_status remote_rx;
+	enum e1000_ms_type ms_type;
+	enum e1000_ms_type original_ms_type;
+	enum e1000_rev_polarity cable_polarity;
+	enum e1000_smart_speed smart_speed;
+
+	u32 addr;
+	u32 id;
+	u32 reset_delay_us;	/* in usec */
+	u32 revision;
+
+	enum e1000_media_type media_type;
+
+	u16 autoneg_advertised;
+	u16 autoneg_mask;
+	u16 cable_length;
+	u16 max_cable_length;
+	u16 min_cable_length;
+
+	u8 mdix;
+
+	bool disable_polarity_correction;
+	bool is_mdix;
+	bool polarity_correction;
+	bool speed_downgraded;
+	bool autoneg_wait_to_complete;
+};
+
+struct e1000_nvm_info {
+	struct e1000_nvm_operations ops;
+
+	enum e1000_nvm_type type;
+	enum e1000_nvm_override override;
+
+	u32 flash_bank_size;
+	u32 flash_base_addr;
+
+	u16 word_size;
+	u16 delay_usec;
+	u16 address_bits;
+	u16 opcode_bits;
+	u16 page_size;
+};
+
+struct e1000_bus_info {
+	enum e1000_bus_width width;
+
+	u16 func;
+};
+
+struct e1000_fc_info {
+	u32 high_water;          /* Flow control high-water mark */
+	u32 low_water;           /* Flow control low-water mark */
+	u16 pause_time;          /* Flow control pause timer */
+	u16 refresh_time;        /* Flow control refresh timer */
+	bool send_xon;           /* Flow control send XON */
+	bool strict_ieee;        /* Strict IEEE mode */
+	enum e1000_fc_mode current_mode; /* FC mode in effect */
+	enum e1000_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+struct e1000_dev_spec_82571 {
+	bool laa_is_present;
+	u32 smb_counter;
+};
+
+struct e1000_dev_spec_80003es2lan {
+	bool mdic_wa_enable;
+};
+
+struct e1000_shadow_ram {
+	u16 value;
+	bool modified;
+};
+
+#define E1000_ICH8_SHADOW_RAM_WORDS		2048
+
+/* I218 PHY Ultra Low Power (ULP) states */
+enum e1000_ulp_state {
+	e1000_ulp_state_unknown,
+	e1000_ulp_state_off,
+	e1000_ulp_state_on,
+};
+
+struct e1000_dev_spec_ich8lan {
+	bool kmrn_lock_loss_workaround_enabled;
+	struct e1000_shadow_ram shadow_ram[E1000_ICH8_SHADOW_RAM_WORDS];
+	bool nvm_k1_enabled;
+	bool eee_disable;
+	u16 eee_lp_ability;
+	enum e1000_ulp_state ulp_state;
+};
+
+struct e1000_hw {
+	struct e1000_adapter *adapter;
+
+	void __iomem *hw_addr;
+	void __iomem *flash_address;
+
+	struct e1000_mac_info mac;
+	struct e1000_fc_info fc;
+	struct e1000_phy_info phy;
+	struct e1000_nvm_info nvm;
+	struct e1000_bus_info bus;
+	struct e1000_host_mng_dhcp_cookie mng_cookie;
+
+	union {
+		struct e1000_dev_spec_82571 e82571;
+		struct e1000_dev_spec_80003es2lan e80003es2lan;
+		struct e1000_dev_spec_ich8lan ich8lan;
+	} dev_spec;
+};
+
+#include "82571.h"
+#include "80003es2lan.h"
+#include "ich8lan.h"
+
+#endif /* _E1000E_HW_H_ */
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
new file mode 100644
index 0000000000..39e9fc601b
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -0,0 +1,6077 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* 82562G 10/100 Network Connection
+ * 82562G-2 10/100 Network Connection
+ * 82562GT 10/100 Network Connection
+ * 82562GT-2 10/100 Network Connection
+ * 82562V 10/100 Network Connection
+ * 82562V-2 10/100 Network Connection
+ * 82566DC-2 Gigabit Network Connection
+ * 82566DC Gigabit Network Connection
+ * 82566DM-2 Gigabit Network Connection
+ * 82566DM Gigabit Network Connection
+ * 82566MC Gigabit Network Connection
+ * 82566MM Gigabit Network Connection
+ * 82567LM Gigabit Network Connection
+ * 82567LF Gigabit Network Connection
+ * 82567V Gigabit Network Connection
+ * 82567LM-2 Gigabit Network Connection
+ * 82567LF-2 Gigabit Network Connection
+ * 82567V-2 Gigabit Network Connection
+ * 82567LF-3 Gigabit Network Connection
+ * 82567LM-3 Gigabit Network Connection
+ * 82567LM-4 Gigabit Network Connection
+ * 82577LM Gigabit Network Connection
+ * 82577LC Gigabit Network Connection
+ * 82578DM Gigabit Network Connection
+ * 82578DC Gigabit Network Connection
+ * 82579LM Gigabit Network Connection
+ * 82579V Gigabit Network Connection
+ * Ethernet Connection I217-LM
+ * Ethernet Connection I217-V
+ * Ethernet Connection I218-V
+ * Ethernet Connection I218-LM
+ * Ethernet Connection (2) I218-LM
+ * Ethernet Connection (2) I218-V
+ * Ethernet Connection (3) I218-LM
+ * Ethernet Connection (3) I218-V
+ */
+
+#include "e1000.h"
+
+/* ICH GbE Flash Hardware Sequencing Flash Status Register bit breakdown */
+/* Offset 04h HSFSTS */
+union ich8_hws_flash_status {
+	struct ich8_hsfsts {
+		u16 flcdone:1;	/* bit 0 Flash Cycle Done */
+		u16 flcerr:1;	/* bit 1 Flash Cycle Error */
+		u16 dael:1;	/* bit 2 Direct Access error Log */
+		u16 berasesz:2;	/* bit 4:3 Sector Erase Size */
+		u16 flcinprog:1;	/* bit 5 flash cycle in Progress */
+		u16 reserved1:2;	/* bit 13:6 Reserved */
+		u16 reserved2:6;	/* bit 13:6 Reserved */
+		u16 fldesvalid:1;	/* bit 14 Flash Descriptor Valid */
+		u16 flockdn:1;	/* bit 15 Flash Config Lock-Down */
+	} hsf_status;
+	u16 regval;
+};
+
+/* ICH GbE Flash Hardware Sequencing Flash control Register bit breakdown */
+/* Offset 06h FLCTL */
+union ich8_hws_flash_ctrl {
+	struct ich8_hsflctl {
+		u16 flcgo:1;	/* 0 Flash Cycle Go */
+		u16 flcycle:2;	/* 2:1 Flash Cycle */
+		u16 reserved:5;	/* 7:3 Reserved  */
+		u16 fldbcount:2;	/* 9:8 Flash Data Byte Count */
+		u16 flockdn:6;	/* 15:10 Reserved */
+	} hsf_ctrl;
+	u16 regval;
+};
+
+/* ICH Flash Region Access Permissions */
+union ich8_hws_flash_regacc {
+	struct ich8_flracc {
+		u32 grra:8;	/* 0:7 GbE region Read Access */
+		u32 grwa:8;	/* 8:15 GbE region Write Access */
+		u32 gmrag:8;	/* 23:16 GbE Master Read Access Grant */
+		u32 gmwag:8;	/* 31:24 GbE Master Write Access Grant */
+	} hsf_flregacc;
+	u16 regval;
+};
+
+/* ICH Flash Protected Region */
+union ich8_flash_protected_range {
+	struct ich8_pr {
+		u32 base:13;	/* 0:12 Protected Range Base */
+		u32 reserved1:2;	/* 13:14 Reserved */
+		u32 rpe:1;	/* 15 Read Protection Enable */
+		u32 limit:13;	/* 16:28 Protected Range Limit */
+		u32 reserved2:2;	/* 29:30 Reserved */
+		u32 wpe:1;	/* 31 Write Protection Enable */
+	} range;
+	u32 regval;
+};
+
+static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw);
+static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw);
+static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank);
+static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
+						u32 offset, u8 byte);
+static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
+					 u8 *data);
+static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset,
+					 u16 *data);
+static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
+					 u8 size, u16 *data);
+static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+					   u32 *data);
+static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw,
+					  u32 offset, u32 *data);
+static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw,
+					    u32 offset, u32 data);
+static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw,
+						 u32 offset, u32 dword);
+static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw);
+static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw);
+static s32 e1000_led_on_ich8lan(struct e1000_hw *hw);
+static s32 e1000_led_off_ich8lan(struct e1000_hw *hw);
+static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw);
+static s32 e1000_setup_led_pchlan(struct e1000_hw *hw);
+static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw);
+static s32 e1000_led_on_pchlan(struct e1000_hw *hw);
+static s32 e1000_led_off_pchlan(struct e1000_hw *hw);
+static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active);
+static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw);
+static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw);
+static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link);
+static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw);
+static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw);
+static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw);
+static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index);
+static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index);
+static u32 e1000_rar_get_count_pch_lpt(struct e1000_hw *hw);
+static s32 e1000_k1_workaround_lv(struct e1000_hw *hw);
+static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate);
+static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force);
+static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw);
+static s32 e1000_oem_bits_config_ich8lan(struct e1000_hw *hw, bool d0_state);
+
+static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg)
+{
+	return readw(hw->flash_address + reg);
+}
+
+static inline u32 __er32flash(struct e1000_hw *hw, unsigned long reg)
+{
+	return readl(hw->flash_address + reg);
+}
+
+static inline void __ew16flash(struct e1000_hw *hw, unsigned long reg, u16 val)
+{
+	writew(val, hw->flash_address + reg);
+}
+
+static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val)
+{
+	writel(val, hw->flash_address + reg);
+}
+
+#define er16flash(reg)		__er16flash(hw, (reg))
+#define er32flash(reg)		__er32flash(hw, (reg))
+#define ew16flash(reg, val)	__ew16flash(hw, (reg), (val))
+#define ew32flash(reg, val)	__ew32flash(hw, (reg), (val))
+
+/**
+ *  e1000_phy_is_accessible_pchlan - Check if able to access PHY registers
+ *  @hw: pointer to the HW structure
+ *
+ *  Test access to the PHY registers by reading the PHY ID registers.  If
+ *  the PHY ID is already known (e.g. resume path) compare it with known ID,
+ *  otherwise assume the read PHY ID is correct if it is valid.
+ *
+ *  Assumes the sw/fw/hw semaphore is already acquired.
+ **/
+static bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw)
+{
+	u16 phy_reg = 0;
+	u32 phy_id = 0;
+	s32 ret_val = 0;
+	u16 retry_count;
+	u32 mac_reg = 0;
+
+	for (retry_count = 0; retry_count < 2; retry_count++) {
+		ret_val = e1e_rphy_locked(hw, MII_PHYSID1, &phy_reg);
+		if (ret_val || (phy_reg == 0xFFFF))
+			continue;
+		phy_id = (u32)(phy_reg << 16);
+
+		ret_val = e1e_rphy_locked(hw, MII_PHYSID2, &phy_reg);
+		if (ret_val || (phy_reg == 0xFFFF)) {
+			phy_id = 0;
+			continue;
+		}
+		phy_id |= (u32)(phy_reg & PHY_REVISION_MASK);
+		break;
+	}
+
+	if (hw->phy.id) {
+		if (hw->phy.id == phy_id)
+			goto out;
+	} else if (phy_id) {
+		hw->phy.id = phy_id;
+		hw->phy.revision = (u32)(phy_reg & ~PHY_REVISION_MASK);
+		goto out;
+	}
+
+	/* In case the PHY needs to be in mdio slow mode,
+	 * set slow mode and try to get the PHY id again.
+	 */
+	if (hw->mac.type < e1000_pch_lpt) {
+		hw->phy.ops.release(hw);
+		ret_val = e1000_set_mdio_slow_mode_hv(hw);
+		if (!ret_val)
+			ret_val = e1000e_get_phy_id(hw);
+		hw->phy.ops.acquire(hw);
+	}
+
+	if (ret_val)
+		return false;
+out:
+	if (hw->mac.type >= e1000_pch_lpt) {
+		/* Only unforce SMBus if ME is not active */
+		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+			/* Unforce SMBus mode in PHY */
+			e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
+			phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+			e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
+
+			/* Unforce SMBus mode in MAC */
+			mac_reg = er32(CTRL_EXT);
+			mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+			ew32(CTRL_EXT, mac_reg);
+		}
+	}
+
+	return true;
+}
+
+/**
+ *  e1000_toggle_lanphypc_pch_lpt - toggle the LANPHYPC pin value
+ *  @hw: pointer to the HW structure
+ *
+ *  Toggling the LANPHYPC pin value fully power-cycles the PHY and is
+ *  used to reset the PHY to a quiescent state when necessary.
+ **/
+static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw)
+{
+	u32 mac_reg;
+
+	/* Set Phy Config Counter to 50msec */
+	mac_reg = er32(FEXTNVM3);
+	mac_reg &= ~E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK;
+	mac_reg |= E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC;
+	ew32(FEXTNVM3, mac_reg);
+
+	/* Toggle LANPHYPC Value bit */
+	mac_reg = er32(CTRL);
+	mac_reg |= E1000_CTRL_LANPHYPC_OVERRIDE;
+	mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE;
+	ew32(CTRL, mac_reg);
+	e1e_flush();
+	usleep_range(10, 20);
+	mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE;
+	ew32(CTRL, mac_reg);
+	e1e_flush();
+
+	if (hw->mac.type < e1000_pch_lpt) {
+		msleep(50);
+	} else {
+		u16 count = 20;
+
+		do {
+			usleep_range(5000, 6000);
+		} while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--);
+
+		msleep(30);
+	}
+}
+
+/**
+ *  e1000_init_phy_workarounds_pchlan - PHY initialization workarounds
+ *  @hw: pointer to the HW structure
+ *
+ *  Workarounds/flow necessary for PHY initialization during driver load
+ *  and resume paths.
+ **/
+static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
+{
+	struct e1000_adapter *adapter = hw->adapter;
+	u32 mac_reg, fwsm = er32(FWSM);
+	s32 ret_val;
+
+	/* Gate automatic PHY configuration by hardware on managed and
+	 * non-managed 82579 and newer adapters.
+	 */
+	e1000_gate_hw_phy_config_ich8lan(hw, true);
+
+	/* It is not possible to be certain of the current state of ULP
+	 * so forcibly disable it.
+	 */
+	hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_unknown;
+	ret_val = e1000_disable_ulp_lpt_lp(hw, true);
+	if (ret_val)
+		e_warn("Failed to disable ULP\n");
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val) {
+		e_dbg("Failed to initialize PHY flow\n");
+		goto out;
+	}
+
+	/* The MAC-PHY interconnect may be in SMBus mode.  If the PHY is
+	 * inaccessible and resetting the PHY is not blocked, toggle the
+	 * LANPHYPC Value bit to force the interconnect to PCIe mode.
+	 */
+	switch (hw->mac.type) {
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		if (e1000_phy_is_accessible_pchlan(hw))
+			break;
+
+		/* Before toggling LANPHYPC, see if PHY is accessible by
+		 * forcing MAC to SMBus mode first.
+		 */
+		mac_reg = er32(CTRL_EXT);
+		mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+		ew32(CTRL_EXT, mac_reg);
+
+		/* Wait 50 milliseconds for MAC to finish any retries
+		 * that it might be trying to perform from previous
+		 * attempts to acknowledge any phy read requests.
+		 */
+		msleep(50);
+
+		fallthrough;
+	case e1000_pch2lan:
+		if (e1000_phy_is_accessible_pchlan(hw))
+			break;
+
+		fallthrough;
+	case e1000_pchlan:
+		if ((hw->mac.type == e1000_pchlan) &&
+		    (fwsm & E1000_ICH_FWSM_FW_VALID))
+			break;
+
+		if (hw->phy.ops.check_reset_block(hw)) {
+			e_dbg("Required LANPHYPC toggle blocked by ME\n");
+			ret_val = -E1000_ERR_PHY;
+			break;
+		}
+
+		/* Toggle LANPHYPC Value bit */
+		e1000_toggle_lanphypc_pch_lpt(hw);
+		if (hw->mac.type >= e1000_pch_lpt) {
+			if (e1000_phy_is_accessible_pchlan(hw))
+				break;
+
+			/* Toggling LANPHYPC brings the PHY out of SMBus mode
+			 * so ensure that the MAC is also out of SMBus mode
+			 */
+			mac_reg = er32(CTRL_EXT);
+			mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+			ew32(CTRL_EXT, mac_reg);
+
+			if (e1000_phy_is_accessible_pchlan(hw))
+				break;
+
+			ret_val = -E1000_ERR_PHY;
+		}
+		break;
+	default:
+		break;
+	}
+
+	hw->phy.ops.release(hw);
+	if (!ret_val) {
+
+		/* Check to see if able to reset PHY.  Print error if not */
+		if (hw->phy.ops.check_reset_block(hw)) {
+			e_err("Reset blocked by ME\n");
+			goto out;
+		}
+
+		/* Reset the PHY before any access to it.  Doing so, ensures
+		 * that the PHY is in a known good state before we read/write
+		 * PHY registers.  The generic reset is sufficient here,
+		 * because we haven't determined the PHY type yet.
+		 */
+		ret_val = e1000e_phy_hw_reset_generic(hw);
+		if (ret_val)
+			goto out;
+
+		/* On a successful reset, possibly need to wait for the PHY
+		 * to quiesce to an accessible state before returning control
+		 * to the calling function.  If the PHY does not quiesce, then
+		 * return E1000E_BLK_PHY_RESET, as this is the condition that
+		 *  the PHY is in.
+		 */
+		ret_val = hw->phy.ops.check_reset_block(hw);
+		if (ret_val)
+			e_err("ME blocked access to PHY after reset\n");
+	}
+
+out:
+	/* Ungate automatic PHY configuration on non-managed 82579 */
+	if ((hw->mac.type == e1000_pch2lan) &&
+	    !(fwsm & E1000_ICH_FWSM_FW_VALID)) {
+		usleep_range(10000, 11000);
+		e1000_gate_hw_phy_config_ich8lan(hw, false);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_init_phy_params_pchlan - Initialize PHY function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize family-specific PHY parameters and function pointers.
+ **/
+static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+
+	phy->addr = 1;
+	phy->reset_delay_us = 100;
+
+	phy->ops.set_page = e1000_set_page_igp;
+	phy->ops.read_reg = e1000_read_phy_reg_hv;
+	phy->ops.read_reg_locked = e1000_read_phy_reg_hv_locked;
+	phy->ops.read_reg_page = e1000_read_phy_reg_page_hv;
+	phy->ops.set_d0_lplu_state = e1000_set_lplu_state_pchlan;
+	phy->ops.set_d3_lplu_state = e1000_set_lplu_state_pchlan;
+	phy->ops.write_reg = e1000_write_phy_reg_hv;
+	phy->ops.write_reg_locked = e1000_write_phy_reg_hv_locked;
+	phy->ops.write_reg_page = e1000_write_phy_reg_page_hv;
+	phy->ops.power_up = e1000_power_up_phy_copper;
+	phy->ops.power_down = e1000_power_down_phy_copper_ich8lan;
+	phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+
+	phy->id = e1000_phy_unknown;
+
+	ret_val = e1000_init_phy_workarounds_pchlan(hw);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->id == e1000_phy_unknown)
+		switch (hw->mac.type) {
+		default:
+			ret_val = e1000e_get_phy_id(hw);
+			if (ret_val)
+				return ret_val;
+			if ((phy->id != 0) && (phy->id != PHY_REVISION_MASK))
+				break;
+			fallthrough;
+		case e1000_pch2lan:
+		case e1000_pch_lpt:
+		case e1000_pch_spt:
+		case e1000_pch_cnp:
+		case e1000_pch_tgp:
+		case e1000_pch_adp:
+		case e1000_pch_mtp:
+		case e1000_pch_lnp:
+		case e1000_pch_ptp:
+		case e1000_pch_nvp:
+			/* In case the PHY needs to be in mdio slow mode,
+			 * set slow mode and try to get the PHY id again.
+			 */
+			ret_val = e1000_set_mdio_slow_mode_hv(hw);
+			if (ret_val)
+				return ret_val;
+			ret_val = e1000e_get_phy_id(hw);
+			if (ret_val)
+				return ret_val;
+			break;
+		}
+	phy->type = e1000e_get_phy_type_from_id(phy->id);
+
+	switch (phy->type) {
+	case e1000_phy_82577:
+	case e1000_phy_82579:
+	case e1000_phy_i217:
+		phy->ops.check_polarity = e1000_check_polarity_82577;
+		phy->ops.force_speed_duplex =
+		    e1000_phy_force_speed_duplex_82577;
+		phy->ops.get_cable_length = e1000_get_cable_length_82577;
+		phy->ops.get_info = e1000_get_phy_info_82577;
+		phy->ops.commit = e1000e_phy_sw_reset;
+		break;
+	case e1000_phy_82578:
+		phy->ops.check_polarity = e1000_check_polarity_m88;
+		phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_m88;
+		phy->ops.get_cable_length = e1000e_get_cable_length_m88;
+		phy->ops.get_info = e1000e_get_phy_info_m88;
+		break;
+	default:
+		ret_val = -E1000_ERR_PHY;
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_init_phy_params_ich8lan - Initialize PHY function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize family-specific PHY parameters and function pointers.
+ **/
+static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 i = 0;
+
+	phy->addr = 1;
+	phy->reset_delay_us = 100;
+
+	phy->ops.power_up = e1000_power_up_phy_copper;
+	phy->ops.power_down = e1000_power_down_phy_copper_ich8lan;
+
+	/* We may need to do this twice - once for IGP and if that fails,
+	 * we'll set BM func pointers and try again
+	 */
+	ret_val = e1000e_determine_phy_address(hw);
+	if (ret_val) {
+		phy->ops.write_reg = e1000e_write_phy_reg_bm;
+		phy->ops.read_reg = e1000e_read_phy_reg_bm;
+		ret_val = e1000e_determine_phy_address(hw);
+		if (ret_val) {
+			e_dbg("Cannot determine PHY addr. Erroring out\n");
+			return ret_val;
+		}
+	}
+
+	phy->id = 0;
+	while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
+	       (i++ < 100)) {
+		usleep_range(1000, 1100);
+		ret_val = e1000e_get_phy_id(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Verify phy id */
+	switch (phy->id) {
+	case IGP03E1000_E_PHY_ID:
+		phy->type = e1000_phy_igp_3;
+		phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+		phy->ops.read_reg_locked = e1000e_read_phy_reg_igp_locked;
+		phy->ops.write_reg_locked = e1000e_write_phy_reg_igp_locked;
+		phy->ops.get_info = e1000e_get_phy_info_igp;
+		phy->ops.check_polarity = e1000_check_polarity_igp;
+		phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_igp;
+		break;
+	case IFE_E_PHY_ID:
+	case IFE_PLUS_E_PHY_ID:
+	case IFE_C_E_PHY_ID:
+		phy->type = e1000_phy_ife;
+		phy->autoneg_mask = E1000_ALL_NOT_GIG;
+		phy->ops.get_info = e1000_get_phy_info_ife;
+		phy->ops.check_polarity = e1000_check_polarity_ife;
+		phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_ife;
+		break;
+	case BME1000_E_PHY_ID:
+		phy->type = e1000_phy_bm;
+		phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
+		phy->ops.read_reg = e1000e_read_phy_reg_bm;
+		phy->ops.write_reg = e1000e_write_phy_reg_bm;
+		phy->ops.commit = e1000e_phy_sw_reset;
+		phy->ops.get_info = e1000e_get_phy_info_m88;
+		phy->ops.check_polarity = e1000_check_polarity_m88;
+		phy->ops.force_speed_duplex = e1000e_phy_force_speed_duplex_m88;
+		break;
+	default:
+		return -E1000_ERR_PHY;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_init_nvm_params_ich8lan - Initialize NVM function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize family-specific NVM parameters and function
+ *  pointers.
+ **/
+static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u32 gfpreg, sector_base_addr, sector_end_addr;
+	u16 i;
+	u32 nvm_size;
+
+	nvm->type = e1000_nvm_flash_sw;
+
+	if (hw->mac.type >= e1000_pch_spt) {
+		/* in SPT, gfpreg doesn't exist. NVM size is taken from the
+		 * STRAP register. This is because in SPT the GbE Flash region
+		 * is no longer accessed through the flash registers. Instead,
+		 * the mechanism has changed, and the Flash region access
+		 * registers are now implemented in GbE memory space.
+		 */
+		nvm->flash_base_addr = 0;
+		nvm_size = (((er32(STRAP) >> 1) & 0x1F) + 1)
+		    * NVM_SIZE_MULTIPLIER;
+		nvm->flash_bank_size = nvm_size / 2;
+		/* Adjust to word count */
+		nvm->flash_bank_size /= sizeof(u16);
+		/* Set the base address for flash register access */
+		hw->flash_address = hw->hw_addr + E1000_FLASH_BASE_ADDR;
+	} else {
+		/* Can't read flash registers if register set isn't mapped. */
+		if (!hw->flash_address) {
+			e_dbg("ERROR: Flash registers not mapped\n");
+			return -E1000_ERR_CONFIG;
+		}
+
+		gfpreg = er32flash(ICH_FLASH_GFPREG);
+
+		/* sector_X_addr is a "sector"-aligned address (4096 bytes)
+		 * Add 1 to sector_end_addr since this sector is included in
+		 * the overall size.
+		 */
+		sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK;
+		sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1;
+
+		/* flash_base_addr is byte-aligned */
+		nvm->flash_base_addr = sector_base_addr
+		    << FLASH_SECTOR_ADDR_SHIFT;
+
+		/* find total size of the NVM, then cut in half since the total
+		 * size represents two separate NVM banks.
+		 */
+		nvm->flash_bank_size = ((sector_end_addr - sector_base_addr)
+					<< FLASH_SECTOR_ADDR_SHIFT);
+		nvm->flash_bank_size /= 2;
+		/* Adjust to word count */
+		nvm->flash_bank_size /= sizeof(u16);
+	}
+
+	nvm->word_size = E1000_ICH8_SHADOW_RAM_WORDS;
+
+	/* Clear shadow ram */
+	for (i = 0; i < nvm->word_size; i++) {
+		dev_spec->shadow_ram[i].modified = false;
+		dev_spec->shadow_ram[i].value = 0xFFFF;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_init_mac_params_ich8lan - Initialize MAC function pointers
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize family-specific MAC parameters and function
+ *  pointers.
+ **/
+static s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+
+	/* Set media type function pointer */
+	hw->phy.media_type = e1000_media_type_copper;
+
+	/* Set mta register count */
+	mac->mta_reg_count = 32;
+	/* Set rar entry count */
+	mac->rar_entry_count = E1000_ICH_RAR_ENTRIES;
+	if (mac->type == e1000_ich8lan)
+		mac->rar_entry_count--;
+	/* FWSM register */
+	mac->has_fwsm = true;
+	/* ARC subsystem not supported */
+	mac->arc_subsystem_valid = false;
+	/* Adaptive IFS supported */
+	mac->adaptive_ifs = true;
+
+	/* LED and other operations */
+	switch (mac->type) {
+	case e1000_ich8lan:
+	case e1000_ich9lan:
+	case e1000_ich10lan:
+		/* check management mode */
+		mac->ops.check_mng_mode = e1000_check_mng_mode_ich8lan;
+		/* ID LED init */
+		mac->ops.id_led_init = e1000e_id_led_init_generic;
+		/* blink LED */
+		mac->ops.blink_led = e1000e_blink_led_generic;
+		/* setup LED */
+		mac->ops.setup_led = e1000e_setup_led_generic;
+		/* cleanup LED */
+		mac->ops.cleanup_led = e1000_cleanup_led_ich8lan;
+		/* turn on/off LED */
+		mac->ops.led_on = e1000_led_on_ich8lan;
+		mac->ops.led_off = e1000_led_off_ich8lan;
+		break;
+	case e1000_pch2lan:
+		mac->rar_entry_count = E1000_PCH2_RAR_ENTRIES;
+		mac->ops.rar_set = e1000_rar_set_pch2lan;
+		fallthrough;
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+	case e1000_pchlan:
+		/* check management mode */
+		mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan;
+		/* ID LED init */
+		mac->ops.id_led_init = e1000_id_led_init_pchlan;
+		/* setup LED */
+		mac->ops.setup_led = e1000_setup_led_pchlan;
+		/* cleanup LED */
+		mac->ops.cleanup_led = e1000_cleanup_led_pchlan;
+		/* turn on/off LED */
+		mac->ops.led_on = e1000_led_on_pchlan;
+		mac->ops.led_off = e1000_led_off_pchlan;
+		break;
+	default:
+		break;
+	}
+
+	if (mac->type >= e1000_pch_lpt) {
+		mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES;
+		mac->ops.rar_set = e1000_rar_set_pch_lpt;
+		mac->ops.setup_physical_interface =
+		    e1000_setup_copper_link_pch_lpt;
+		mac->ops.rar_get_count = e1000_rar_get_count_pch_lpt;
+	}
+
+	/* Enable PCS Lock-loss workaround for ICH8 */
+	if (mac->type == e1000_ich8lan)
+		e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, true);
+
+	return 0;
+}
+
+/**
+ *  __e1000_access_emi_reg_locked - Read/write EMI register
+ *  @hw: pointer to the HW structure
+ *  @address: EMI address to program
+ *  @data: pointer to value to read/write from/to the EMI address
+ *  @read: boolean flag to indicate read or write
+ *
+ *  This helper function assumes the SW/FW/HW Semaphore is already acquired.
+ **/
+static s32 __e1000_access_emi_reg_locked(struct e1000_hw *hw, u16 address,
+					 u16 *data, bool read)
+{
+	s32 ret_val;
+
+	ret_val = e1e_wphy_locked(hw, I82579_EMI_ADDR, address);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = e1e_rphy_locked(hw, I82579_EMI_DATA, data);
+	else
+		ret_val = e1e_wphy_locked(hw, I82579_EMI_DATA, *data);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_emi_reg_locked - Read Extended Management Interface register
+ *  @hw: pointer to the HW structure
+ *  @addr: EMI address to program
+ *  @data: value to be read from the EMI address
+ *
+ *  Assumes the SW/FW/HW Semaphore is already acquired.
+ **/
+s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data)
+{
+	return __e1000_access_emi_reg_locked(hw, addr, data, true);
+}
+
+/**
+ *  e1000_write_emi_reg_locked - Write Extended Management Interface register
+ *  @hw: pointer to the HW structure
+ *  @addr: EMI address to program
+ *  @data: value to be written to the EMI address
+ *
+ *  Assumes the SW/FW/HW Semaphore is already acquired.
+ **/
+s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data)
+{
+	return __e1000_access_emi_reg_locked(hw, addr, &data, false);
+}
+
+/**
+ *  e1000_set_eee_pchlan - Enable/disable EEE support
+ *  @hw: pointer to the HW structure
+ *
+ *  Enable/disable EEE based on setting in dev_spec structure, the duplex of
+ *  the link and the EEE capabilities of the link partner.  The LPI Control
+ *  register bits will remain set only if/when link is up.
+ *
+ *  EEE LPI must not be asserted earlier than one second after link is up.
+ *  On 82579, EEE LPI should not be enabled until such time otherwise there
+ *  can be link issues with some switches.  Other devices can have EEE LPI
+ *  enabled immediately upon link up since they have a timer in hardware which
+ *  prevents LPI from being asserted too early.
+ **/
+s32 e1000_set_eee_pchlan(struct e1000_hw *hw)
+{
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	s32 ret_val;
+	u16 lpa, pcs_status, adv, adv_addr, lpi_ctrl, data;
+
+	switch (hw->phy.type) {
+	case e1000_phy_82579:
+		lpa = I82579_EEE_LP_ABILITY;
+		pcs_status = I82579_EEE_PCS_STATUS;
+		adv_addr = I82579_EEE_ADVERTISEMENT;
+		break;
+	case e1000_phy_i217:
+		lpa = I217_EEE_LP_ABILITY;
+		pcs_status = I217_EEE_PCS_STATUS;
+		adv_addr = I217_EEE_ADVERTISEMENT;
+		break;
+	default:
+		return 0;
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1e_rphy_locked(hw, I82579_LPI_CTRL, &lpi_ctrl);
+	if (ret_val)
+		goto release;
+
+	/* Clear bits that enable EEE in various speeds */
+	lpi_ctrl &= ~I82579_LPI_CTRL_ENABLE_MASK;
+
+	/* Enable EEE if not disabled by user */
+	if (!dev_spec->eee_disable) {
+		/* Save off link partner's EEE ability */
+		ret_val = e1000_read_emi_reg_locked(hw, lpa,
+						    &dev_spec->eee_lp_ability);
+		if (ret_val)
+			goto release;
+
+		/* Read EEE advertisement */
+		ret_val = e1000_read_emi_reg_locked(hw, adv_addr, &adv);
+		if (ret_val)
+			goto release;
+
+		/* Enable EEE only for speeds in which the link partner is
+		 * EEE capable and for which we advertise EEE.
+		 */
+		if (adv & dev_spec->eee_lp_ability & I82579_EEE_1000_SUPPORTED)
+			lpi_ctrl |= I82579_LPI_CTRL_1000_ENABLE;
+
+		if (adv & dev_spec->eee_lp_ability & I82579_EEE_100_SUPPORTED) {
+			e1e_rphy_locked(hw, MII_LPA, &data);
+			if (data & LPA_100FULL)
+				lpi_ctrl |= I82579_LPI_CTRL_100_ENABLE;
+			else
+				/* EEE is not supported in 100Half, so ignore
+				 * partner's EEE in 100 ability if full-duplex
+				 * is not advertised.
+				 */
+				dev_spec->eee_lp_ability &=
+				    ~I82579_EEE_100_SUPPORTED;
+		}
+	}
+
+	if (hw->phy.type == e1000_phy_82579) {
+		ret_val = e1000_read_emi_reg_locked(hw, I82579_LPI_PLL_SHUT,
+						    &data);
+		if (ret_val)
+			goto release;
+
+		data &= ~I82579_LPI_100_PLL_SHUT;
+		ret_val = e1000_write_emi_reg_locked(hw, I82579_LPI_PLL_SHUT,
+						     data);
+	}
+
+	/* R/Clr IEEE MMD 3.1 bits 11:10 - Tx/Rx LPI Received */
+	ret_val = e1000_read_emi_reg_locked(hw, pcs_status, &data);
+	if (ret_val)
+		goto release;
+
+	ret_val = e1e_wphy_locked(hw, I82579_LPI_CTRL, lpi_ctrl);
+release:
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_k1_workaround_lpt_lp - K1 workaround on Lynxpoint-LP
+ *  @hw:   pointer to the HW structure
+ *  @link: link up bool flag
+ *
+ *  When K1 is enabled for 1Gbps, the MAC can miss 2 DMA completion indications
+ *  preventing further DMA write requests.  Workaround the issue by disabling
+ *  the de-assertion of the clock request when in 1Gpbs mode.
+ *  Also, set appropriate Tx re-transmission timeouts for 10 and 100Half link
+ *  speeds in order to avoid Tx hangs.
+ **/
+static s32 e1000_k1_workaround_lpt_lp(struct e1000_hw *hw, bool link)
+{
+	u32 fextnvm6 = er32(FEXTNVM6);
+	u32 status = er32(STATUS);
+	s32 ret_val = 0;
+	u16 reg;
+
+	if (link && (status & E1000_STATUS_SPEED_1000)) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val =
+		    e1000e_read_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG,
+						&reg);
+		if (ret_val)
+			goto release;
+
+		ret_val =
+		    e1000e_write_kmrn_reg_locked(hw,
+						 E1000_KMRNCTRLSTA_K1_CONFIG,
+						 reg &
+						 ~E1000_KMRNCTRLSTA_K1_ENABLE);
+		if (ret_val)
+			goto release;
+
+		usleep_range(10, 20);
+
+		ew32(FEXTNVM6, fextnvm6 | E1000_FEXTNVM6_REQ_PLL_CLK);
+
+		ret_val =
+		    e1000e_write_kmrn_reg_locked(hw,
+						 E1000_KMRNCTRLSTA_K1_CONFIG,
+						 reg);
+release:
+		hw->phy.ops.release(hw);
+	} else {
+		/* clear FEXTNVM6 bit 8 on link down or 10/100 */
+		fextnvm6 &= ~E1000_FEXTNVM6_REQ_PLL_CLK;
+
+		if ((hw->phy.revision > 5) || !link ||
+		    ((status & E1000_STATUS_SPEED_100) &&
+		     (status & E1000_STATUS_FD)))
+			goto update_fextnvm6;
+
+		ret_val = e1e_rphy(hw, I217_INBAND_CTRL, &reg);
+		if (ret_val)
+			return ret_val;
+
+		/* Clear link status transmit timeout */
+		reg &= ~I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_MASK;
+
+		if (status & E1000_STATUS_SPEED_100) {
+			/* Set inband Tx timeout to 5x10us for 100Half */
+			reg |= 5 << I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT;
+
+			/* Do not extend the K1 entry latency for 100Half */
+			fextnvm6 &= ~E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION;
+		} else {
+			/* Set inband Tx timeout to 50x10us for 10Full/Half */
+			reg |= 50 <<
+			    I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT;
+
+			/* Extend the K1 entry latency for 10 Mbps */
+			fextnvm6 |= E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION;
+		}
+
+		ret_val = e1e_wphy(hw, I217_INBAND_CTRL, reg);
+		if (ret_val)
+			return ret_val;
+
+update_fextnvm6:
+		ew32(FEXTNVM6, fextnvm6);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_platform_pm_pch_lpt - Set platform power management values
+ *  @hw: pointer to the HW structure
+ *  @link: bool indicating link status
+ *
+ *  Set the Latency Tolerance Reporting (LTR) values for the "PCIe-like"
+ *  GbE MAC in the Lynx Point PCH based on Rx buffer size and link speed
+ *  when link is up (which must not exceed the maximum latency supported
+ *  by the platform), otherwise specify there is no LTR requirement.
+ *  Unlike true-PCIe devices which set the LTR maximum snoop/no-snoop
+ *  latencies in the LTR Extended Capability Structure in the PCIe Extended
+ *  Capability register set, on this device LTR is set by writing the
+ *  equivalent snoop/no-snoop latencies in the LTRV register in the MAC and
+ *  set the SEND bit to send an Intel On-chip System Fabric sideband (IOSF-SB)
+ *  message to the PMC.
+ **/
+static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link)
+{
+	u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) |
+	    link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND;
+	u32 max_ltr_enc_d = 0;	/* maximum LTR decoded by platform */
+	u32 lat_enc_d = 0;	/* latency decoded */
+	u16 lat_enc = 0;	/* latency encoded */
+
+	if (link) {
+		u16 speed, duplex, scale = 0;
+		u16 max_snoop, max_nosnoop;
+		u16 max_ltr_enc;	/* max LTR latency encoded */
+		u64 value;
+		u32 rxa;
+
+		if (!hw->adapter->max_frame_size) {
+			e_dbg("max_frame_size not set.\n");
+			return -E1000_ERR_CONFIG;
+		}
+
+		hw->mac.ops.get_link_up_info(hw, &speed, &duplex);
+		if (!speed) {
+			e_dbg("Speed not set.\n");
+			return -E1000_ERR_CONFIG;
+		}
+
+		/* Rx Packet Buffer Allocation size (KB) */
+		rxa = er32(PBA) & E1000_PBA_RXA_MASK;
+
+		/* Determine the maximum latency tolerated by the device.
+		 *
+		 * Per the PCIe spec, the tolerated latencies are encoded as
+		 * a 3-bit encoded scale (only 0-5 are valid) multiplied by
+		 * a 10-bit value (0-1023) to provide a range from 1 ns to
+		 * 2^25*(2^10-1) ns.  The scale is encoded as 0=2^0ns,
+		 * 1=2^5ns, 2=2^10ns,...5=2^25ns.
+		 */
+		rxa *= 512;
+		value = (rxa > hw->adapter->max_frame_size) ?
+			(rxa - hw->adapter->max_frame_size) * (16000 / speed) :
+			0;
+
+		while (value > PCI_LTR_VALUE_MASK) {
+			scale++;
+			value = DIV_ROUND_UP(value, BIT(5));
+		}
+		if (scale > E1000_LTRV_SCALE_MAX) {
+			e_dbg("Invalid LTR latency scale %d\n", scale);
+			return -E1000_ERR_CONFIG;
+		}
+		lat_enc = (u16)((scale << PCI_LTR_SCALE_SHIFT) | value);
+
+		/* Determine the maximum latency tolerated by the platform */
+		pci_read_config_word(hw->adapter->pdev, E1000_PCI_LTR_CAP_LPT,
+				     &max_snoop);
+		pci_read_config_word(hw->adapter->pdev,
+				     E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop);
+		max_ltr_enc = max_t(u16, max_snoop, max_nosnoop);
+
+		lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) *
+			     (1U << (E1000_LTRV_SCALE_FACTOR *
+			     ((lat_enc & E1000_LTRV_SCALE_MASK)
+			     >> E1000_LTRV_SCALE_SHIFT)));
+
+		max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) *
+				 (1U << (E1000_LTRV_SCALE_FACTOR *
+				 ((max_ltr_enc & E1000_LTRV_SCALE_MASK)
+				 >> E1000_LTRV_SCALE_SHIFT)));
+
+		if (lat_enc_d > max_ltr_enc_d)
+			lat_enc = max_ltr_enc;
+	}
+
+	/* Set Snoop and No-Snoop latencies the same */
+	reg |= lat_enc | (lat_enc << E1000_LTRV_NOSNOOP_SHIFT);
+	ew32(LTRV, reg);
+
+	return 0;
+}
+
+/**
+ *  e1000_enable_ulp_lpt_lp - configure Ultra Low Power mode for LynxPoint-LP
+ *  @hw: pointer to the HW structure
+ *  @to_sx: boolean indicating a system power state transition to Sx
+ *
+ *  When link is down, configure ULP mode to significantly reduce the power
+ *  to the PHY.  If on a Manageability Engine (ME) enabled system, tell the
+ *  ME firmware to start the ULP configuration.  If not on an ME enabled
+ *  system, configure the ULP mode by software.
+ */
+s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
+{
+	u32 mac_reg;
+	s32 ret_val = 0;
+	u16 phy_reg;
+	u16 oem_reg = 0;
+
+	if ((hw->mac.type < e1000_pch_lpt) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_LM) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_V) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_LM2) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V2) ||
+	    (hw->dev_spec.ich8lan.ulp_state == e1000_ulp_state_on))
+		return 0;
+
+	if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+		/* Request ME configure ULP mode in the PHY */
+		mac_reg = er32(H2ME);
+		mac_reg |= E1000_H2ME_ULP | E1000_H2ME_ENFORCE_SETTINGS;
+		ew32(H2ME, mac_reg);
+
+		goto out;
+	}
+
+	if (!to_sx) {
+		int i = 0;
+
+		/* Poll up to 5 seconds for Cable Disconnected indication */
+		while (!(er32(FEXT) & E1000_FEXT_PHY_CABLE_DISCONNECTED)) {
+			/* Bail if link is re-acquired */
+			if (er32(STATUS) & E1000_STATUS_LU)
+				return -E1000_ERR_PHY;
+
+			if (i++ == 100)
+				break;
+
+			msleep(50);
+		}
+		e_dbg("CABLE_DISCONNECTED %s set after %dmsec\n",
+		      (er32(FEXT) &
+		       E1000_FEXT_PHY_CABLE_DISCONNECTED) ? "" : "not", i * 50);
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	/* Force SMBus mode in PHY */
+	ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+	if (ret_val)
+		goto release;
+	phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
+	e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
+	/* Force SMBus mode in MAC */
+	mac_reg = er32(CTRL_EXT);
+	mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+	ew32(CTRL_EXT, mac_reg);
+
+	/* Si workaround for ULP entry flow on i127/rev6 h/w.  Enable
+	 * LPLU and disable Gig speed when entering ULP
+	 */
+	if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) {
+		ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS,
+						       &oem_reg);
+		if (ret_val)
+			goto release;
+
+		phy_reg = oem_reg;
+		phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS;
+
+		ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+							phy_reg);
+
+		if (ret_val)
+			goto release;
+	}
+
+	/* Set Inband ULP Exit, Reset to SMBus mode and
+	 * Disable SMBus Release on PERST# in PHY
+	 */
+	ret_val = e1000_read_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, &phy_reg);
+	if (ret_val)
+		goto release;
+	phy_reg |= (I218_ULP_CONFIG1_RESET_TO_SMBUS |
+		    I218_ULP_CONFIG1_DISABLE_SMB_PERST);
+	if (to_sx) {
+		if (er32(WUFC) & E1000_WUFC_LNKC)
+			phy_reg |= I218_ULP_CONFIG1_WOL_HOST;
+		else
+			phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
+
+		phy_reg |= I218_ULP_CONFIG1_STICKY_ULP;
+		phy_reg &= ~I218_ULP_CONFIG1_INBAND_EXIT;
+	} else {
+		phy_reg |= I218_ULP_CONFIG1_INBAND_EXIT;
+		phy_reg &= ~I218_ULP_CONFIG1_STICKY_ULP;
+		phy_reg &= ~I218_ULP_CONFIG1_WOL_HOST;
+	}
+	e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+	/* Set Disable SMBus Release on PERST# in MAC */
+	mac_reg = er32(FEXTNVM7);
+	mac_reg |= E1000_FEXTNVM7_DISABLE_SMB_PERST;
+	ew32(FEXTNVM7, mac_reg);
+
+	/* Commit ULP changes in PHY by starting auto ULP configuration */
+	phy_reg |= I218_ULP_CONFIG1_START;
+	e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+	if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) &&
+	    to_sx && (er32(STATUS) & E1000_STATUS_LU)) {
+		ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS,
+							oem_reg);
+		if (ret_val)
+			goto release;
+	}
+
+release:
+	hw->phy.ops.release(hw);
+out:
+	if (ret_val)
+		e_dbg("Error in ULP enable flow: %d\n", ret_val);
+	else
+		hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_on;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_disable_ulp_lpt_lp - unconfigure Ultra Low Power mode for LynxPoint-LP
+ *  @hw: pointer to the HW structure
+ *  @force: boolean indicating whether or not to force disabling ULP
+ *
+ *  Un-configure ULP mode when link is up, the system is transitioned from
+ *  Sx or the driver is unloaded.  If on a Manageability Engine (ME) enabled
+ *  system, poll for an indication from ME that ULP has been un-configured.
+ *  If not on an ME enabled system, un-configure the ULP mode by software.
+ *
+ *  During nominal operation, this function is called when link is acquired
+ *  to disable ULP mode (force=false); otherwise, for example when unloading
+ *  the driver or during Sx->S0 transitions, this is called with force=true
+ *  to forcibly disable ULP.
+ */
+static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
+{
+	s32 ret_val = 0;
+	u32 mac_reg;
+	u16 phy_reg;
+	int i = 0;
+
+	if ((hw->mac.type < e1000_pch_lpt) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_LM) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPT_I217_V) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_LM2) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V2) ||
+	    (hw->dev_spec.ich8lan.ulp_state == e1000_ulp_state_off))
+		return 0;
+
+	if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+		struct e1000_adapter *adapter = hw->adapter;
+		bool firmware_bug = false;
+
+		if (force) {
+			/* Request ME un-configure ULP mode in the PHY */
+			mac_reg = er32(H2ME);
+			mac_reg &= ~E1000_H2ME_ULP;
+			mac_reg |= E1000_H2ME_ENFORCE_SETTINGS;
+			ew32(H2ME, mac_reg);
+		}
+
+		/* Poll up to 2.5 seconds for ME to clear ULP_CFG_DONE.
+		 * If this takes more than 1 second, show a warning indicating a
+		 * firmware bug
+		 */
+		while (er32(FWSM) & E1000_FWSM_ULP_CFG_DONE) {
+			if (i++ == 250) {
+				ret_val = -E1000_ERR_PHY;
+				goto out;
+			}
+			if (i > 100 && !firmware_bug)
+				firmware_bug = true;
+
+			usleep_range(10000, 11000);
+		}
+		if (firmware_bug)
+			e_warn("ULP_CONFIG_DONE took %d msec. This is a firmware bug\n",
+			       i * 10);
+		else
+			e_dbg("ULP_CONFIG_DONE cleared after %d msec\n",
+			      i * 10);
+
+		if (force) {
+			mac_reg = er32(H2ME);
+			mac_reg &= ~E1000_H2ME_ENFORCE_SETTINGS;
+			ew32(H2ME, mac_reg);
+		} else {
+			/* Clear H2ME.ULP after ME ULP configuration */
+			mac_reg = er32(H2ME);
+			mac_reg &= ~E1000_H2ME_ULP;
+			ew32(H2ME, mac_reg);
+		}
+
+		goto out;
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	if (force)
+		/* Toggle LANPHYPC Value bit */
+		e1000_toggle_lanphypc_pch_lpt(hw);
+
+	/* Unforce SMBus mode in PHY */
+	ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
+	if (ret_val) {
+		/* The MAC might be in PCIe mode, so temporarily force to
+		 * SMBus mode in order to access the PHY.
+		 */
+		mac_reg = er32(CTRL_EXT);
+		mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
+		ew32(CTRL_EXT, mac_reg);
+
+		msleep(50);
+
+		ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL,
+						       &phy_reg);
+		if (ret_val)
+			goto release;
+	}
+	phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
+	e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
+
+	/* Unforce SMBus mode in MAC */
+	mac_reg = er32(CTRL_EXT);
+	mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+	ew32(CTRL_EXT, mac_reg);
+
+	/* When ULP mode was previously entered, K1 was disabled by the
+	 * hardware.  Re-Enable K1 in the PHY when exiting ULP.
+	 */
+	ret_val = e1000_read_phy_reg_hv_locked(hw, HV_PM_CTRL, &phy_reg);
+	if (ret_val)
+		goto release;
+	phy_reg |= HV_PM_CTRL_K1_ENABLE;
+	e1000_write_phy_reg_hv_locked(hw, HV_PM_CTRL, phy_reg);
+
+	/* Clear ULP enabled configuration */
+	ret_val = e1000_read_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, &phy_reg);
+	if (ret_val)
+		goto release;
+	phy_reg &= ~(I218_ULP_CONFIG1_IND |
+		     I218_ULP_CONFIG1_STICKY_ULP |
+		     I218_ULP_CONFIG1_RESET_TO_SMBUS |
+		     I218_ULP_CONFIG1_WOL_HOST |
+		     I218_ULP_CONFIG1_INBAND_EXIT |
+		     I218_ULP_CONFIG1_EN_ULP_LANPHYPC |
+		     I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST |
+		     I218_ULP_CONFIG1_DISABLE_SMB_PERST);
+	e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+	/* Commit ULP changes by starting auto ULP configuration */
+	phy_reg |= I218_ULP_CONFIG1_START;
+	e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg);
+
+	/* Clear Disable SMBus Release on PERST# in MAC */
+	mac_reg = er32(FEXTNVM7);
+	mac_reg &= ~E1000_FEXTNVM7_DISABLE_SMB_PERST;
+	ew32(FEXTNVM7, mac_reg);
+
+release:
+	hw->phy.ops.release(hw);
+	if (force) {
+		e1000_phy_hw_reset(hw);
+		msleep(50);
+	}
+out:
+	if (ret_val)
+		e_dbg("Error in ULP disable flow: %d\n", ret_val);
+	else
+		hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_off;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_copper_link_ich8lan - Check for link (Copper)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks to see of the link status of the hardware has changed.  If a
+ *  change in link status has been detected, then we read the PHY registers
+ *  to get the current speed/duplex if link exists.
+ **/
+static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val, tipg_reg = 0;
+	u16 emi_addr, emi_val = 0;
+	bool link;
+	u16 phy_reg;
+
+	/* We only want to go out to the PHY registers to see if Auto-Neg
+	 * has completed and/or if our link status has changed.  The
+	 * get_link_status flag is set upon receiving a Link Status
+	 * Change or Rx Sequence Error interrupt.
+	 */
+	if (!mac->get_link_status)
+		return 0;
+	mac->get_link_status = false;
+
+	/* First we want to see if the MII Status Register reports
+	 * link.  If so, then we want to get the current speed/duplex
+	 * of the PHY.
+	 */
+	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (hw->mac.type == e1000_pchlan) {
+		ret_val = e1000_k1_gig_workaround_hv(hw, link);
+		if (ret_val)
+			goto out;
+	}
+
+	/* When connected at 10Mbps half-duplex, some parts are excessively
+	 * aggressive resulting in many collisions. To avoid this, increase
+	 * the IPG and reduce Rx latency in the PHY.
+	 */
+	if ((hw->mac.type >= e1000_pch2lan) && link) {
+		u16 speed, duplex;
+
+		e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
+		tipg_reg = er32(TIPG);
+		tipg_reg &= ~E1000_TIPG_IPGT_MASK;
+
+		if (duplex == HALF_DUPLEX && speed == SPEED_10) {
+			tipg_reg |= 0xFF;
+			/* Reduce Rx latency in analog PHY */
+			emi_val = 0;
+		} else if (hw->mac.type >= e1000_pch_spt &&
+			   duplex == FULL_DUPLEX && speed != SPEED_1000) {
+			tipg_reg |= 0xC;
+			emi_val = 1;
+		} else {
+
+			/* Roll back the default values */
+			tipg_reg |= 0x08;
+			emi_val = 1;
+		}
+
+		ew32(TIPG, tipg_reg);
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			goto out;
+
+		if (hw->mac.type == e1000_pch2lan)
+			emi_addr = I82579_RX_CONFIG;
+		else
+			emi_addr = I217_RX_CONFIG;
+		ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val);
+
+		if (hw->mac.type >= e1000_pch_lpt) {
+			u16 phy_reg;
+
+			e1e_rphy_locked(hw, I217_PLL_CLOCK_GATE_REG, &phy_reg);
+			phy_reg &= ~I217_PLL_CLOCK_GATE_MASK;
+			if (speed == SPEED_100 || speed == SPEED_10)
+				phy_reg |= 0x3E8;
+			else
+				phy_reg |= 0xFA;
+			e1e_wphy_locked(hw, I217_PLL_CLOCK_GATE_REG, phy_reg);
+
+			if (speed == SPEED_1000) {
+				hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL,
+							    &phy_reg);
+
+				phy_reg |= HV_PM_CTRL_K1_CLK_REQ;
+
+				hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL,
+							     phy_reg);
+			}
+		}
+		hw->phy.ops.release(hw);
+
+		if (ret_val)
+			goto out;
+
+		if (hw->mac.type >= e1000_pch_spt) {
+			u16 data;
+			u16 ptr_gap;
+
+			if (speed == SPEED_1000) {
+				ret_val = hw->phy.ops.acquire(hw);
+				if (ret_val)
+					goto out;
+
+				ret_val = e1e_rphy_locked(hw,
+							  PHY_REG(776, 20),
+							  &data);
+				if (ret_val) {
+					hw->phy.ops.release(hw);
+					goto out;
+				}
+
+				ptr_gap = (data & (0x3FF << 2)) >> 2;
+				if (ptr_gap < 0x18) {
+					data &= ~(0x3FF << 2);
+					data |= (0x18 << 2);
+					ret_val =
+					    e1e_wphy_locked(hw,
+							    PHY_REG(776, 20),
+							    data);
+				}
+				hw->phy.ops.release(hw);
+				if (ret_val)
+					goto out;
+			} else {
+				ret_val = hw->phy.ops.acquire(hw);
+				if (ret_val)
+					goto out;
+
+				ret_val = e1e_wphy_locked(hw,
+							  PHY_REG(776, 20),
+							  0xC023);
+				hw->phy.ops.release(hw);
+				if (ret_val)
+					goto out;
+
+			}
+		}
+	}
+
+	/* I217 Packet Loss issue:
+	 * ensure that FEXTNVM4 Beacon Duration is set correctly
+	 * on power up.
+	 * Set the Beacon Duration for I217 to 8 usec
+	 */
+	if (hw->mac.type >= e1000_pch_lpt) {
+		u32 mac_reg;
+
+		mac_reg = er32(FEXTNVM4);
+		mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
+		mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC;
+		ew32(FEXTNVM4, mac_reg);
+	}
+
+	/* Work-around I218 hang issue */
+	if ((hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_LM3) ||
+	    (hw->adapter->pdev->device == E1000_DEV_ID_PCH_I218_V3)) {
+		ret_val = e1000_k1_workaround_lpt_lp(hw, link);
+		if (ret_val)
+			goto out;
+	}
+	if (hw->mac.type >= e1000_pch_lpt) {
+		/* Set platform power management values for
+		 * Latency Tolerance Reporting (LTR)
+		 */
+		ret_val = e1000_platform_pm_pch_lpt(hw, link);
+		if (ret_val)
+			goto out;
+	}
+
+	/* Clear link partner's EEE ability */
+	hw->dev_spec.ich8lan.eee_lp_ability = 0;
+
+	if (hw->mac.type >= e1000_pch_lpt) {
+		u32 fextnvm6 = er32(FEXTNVM6);
+
+		if (hw->mac.type == e1000_pch_spt) {
+			/* FEXTNVM6 K1-off workaround - for SPT only */
+			u32 pcieanacfg = er32(PCIEANACFG);
+
+			if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE)
+				fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE;
+			else
+				fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE;
+		}
+
+		ew32(FEXTNVM6, fextnvm6);
+	}
+
+	if (!link)
+		goto out;
+
+	switch (hw->mac.type) {
+	case e1000_pch2lan:
+		ret_val = e1000_k1_workaround_lv(hw);
+		if (ret_val)
+			return ret_val;
+		fallthrough;
+	case e1000_pchlan:
+		if (hw->phy.type == e1000_phy_82578) {
+			ret_val = e1000_link_stall_workaround_hv(hw);
+			if (ret_val)
+				return ret_val;
+		}
+
+		/* Workaround for PCHx parts in half-duplex:
+		 * Set the number of preambles removed from the packet
+		 * when it is passed from the PHY to the MAC to prevent
+		 * the MAC from misinterpreting the packet type.
+		 */
+		e1e_rphy(hw, HV_KMRN_FIFO_CTRLSTA, &phy_reg);
+		phy_reg &= ~HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK;
+
+		if ((er32(STATUS) & E1000_STATUS_FD) != E1000_STATUS_FD)
+			phy_reg |= BIT(HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT);
+
+		e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, phy_reg);
+		break;
+	default:
+		break;
+	}
+
+	/* Check if there was DownShift, must be checked
+	 * immediately after link-up
+	 */
+	e1000e_check_downshift(hw);
+
+	/* Enable/Disable EEE after link up */
+	if (hw->phy.type > e1000_phy_82579) {
+		ret_val = e1000_set_eee_pchlan(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* If we are forcing speed/duplex, then we simply return since
+	 * we have already determined whether we have link or not.
+	 */
+	if (!mac->autoneg)
+		return -E1000_ERR_CONFIG;
+
+	/* Auto-Neg is enabled.  Auto Speed Detection takes care
+	 * of MAC speed/duplex configuration.  So we only need to
+	 * configure Collision Distance in the MAC.
+	 */
+	mac->ops.config_collision_dist(hw);
+
+	/* Configure Flow Control now that Auto-Neg has completed.
+	 * First, we need to restore the desired flow control
+	 * settings because we may have had to re-autoneg with a
+	 * different link partner.
+	 */
+	ret_val = e1000e_config_fc_after_link_up(hw);
+	if (ret_val)
+		e_dbg("Error configuring flow control\n");
+
+	return ret_val;
+
+out:
+	mac->get_link_status = true;
+	return ret_val;
+}
+
+static s32 e1000_get_variants_ich8lan(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	s32 rc;
+
+	rc = e1000_init_mac_params_ich8lan(hw);
+	if (rc)
+		return rc;
+
+	rc = e1000_init_nvm_params_ich8lan(hw);
+	if (rc)
+		return rc;
+
+	switch (hw->mac.type) {
+	case e1000_ich8lan:
+	case e1000_ich9lan:
+	case e1000_ich10lan:
+		rc = e1000_init_phy_params_ich8lan(hw);
+		break;
+	case e1000_pchlan:
+	case e1000_pch2lan:
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		rc = e1000_init_phy_params_pchlan(hw);
+		break;
+	default:
+		break;
+	}
+	if (rc)
+		return rc;
+
+	/* Disable Jumbo Frame support on parts with Intel 10/100 PHY or
+	 * on parts with MACsec enabled in NVM (reflected in CTRL_EXT).
+	 */
+	if ((adapter->hw.phy.type == e1000_phy_ife) ||
+	    ((adapter->hw.mac.type >= e1000_pch2lan) &&
+	     (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LSECCK)))) {
+		adapter->flags &= ~FLAG_HAS_JUMBO_FRAMES;
+		adapter->max_hw_frame_size = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
+
+		hw->mac.ops.blink_led = NULL;
+	}
+
+	if ((adapter->hw.mac.type == e1000_ich8lan) &&
+	    (adapter->hw.phy.type != e1000_phy_ife))
+		adapter->flags |= FLAG_LSC_GIG_SPEED_DROP;
+
+	/* Enable workaround for 82579 w/ ME enabled */
+	if ((adapter->hw.mac.type == e1000_pch2lan) &&
+	    (er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+		adapter->flags2 |= FLAG2_PCIM2PCI_ARBITER_WA;
+
+	return 0;
+}
+
+static DEFINE_MUTEX(nvm_mutex);
+
+/**
+ *  e1000_acquire_nvm_ich8lan - Acquire NVM mutex
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquires the mutex for performing NVM operations.
+ **/
+static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw __always_unused *hw)
+{
+	mutex_lock(&nvm_mutex);
+
+	return 0;
+}
+
+/**
+ *  e1000_release_nvm_ich8lan - Release NVM mutex
+ *  @hw: pointer to the HW structure
+ *
+ *  Releases the mutex used while performing NVM operations.
+ **/
+static void e1000_release_nvm_ich8lan(struct e1000_hw __always_unused *hw)
+{
+	mutex_unlock(&nvm_mutex);
+}
+
+/**
+ *  e1000_acquire_swflag_ich8lan - Acquire software control flag
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquires the software control flag for performing PHY and select
+ *  MAC CSR accesses.
+ **/
+static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw)
+{
+	u32 extcnf_ctrl, timeout = PHY_CFG_TIMEOUT;
+	s32 ret_val = 0;
+
+	if (test_and_set_bit(__E1000_ACCESS_SHARED_RESOURCE,
+			     &hw->adapter->state)) {
+		e_dbg("contention for Phy access\n");
+		return -E1000_ERR_PHY;
+	}
+
+	while (timeout) {
+		extcnf_ctrl = er32(EXTCNF_CTRL);
+		if (!(extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG))
+			break;
+
+		mdelay(1);
+		timeout--;
+	}
+
+	if (!timeout) {
+		e_dbg("SW has already locked the resource.\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	timeout = SW_FLAG_TIMEOUT;
+
+	extcnf_ctrl |= E1000_EXTCNF_CTRL_SWFLAG;
+	ew32(EXTCNF_CTRL, extcnf_ctrl);
+
+	while (timeout) {
+		extcnf_ctrl = er32(EXTCNF_CTRL);
+		if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG)
+			break;
+
+		mdelay(1);
+		timeout--;
+	}
+
+	if (!timeout) {
+		e_dbg("Failed to acquire the semaphore, FW or HW has it: FWSM=0x%8.8x EXTCNF_CTRL=0x%8.8x)\n",
+		      er32(FWSM), extcnf_ctrl);
+		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
+		ew32(EXTCNF_CTRL, extcnf_ctrl);
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+out:
+	if (ret_val)
+		clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_release_swflag_ich8lan - Release software control flag
+ *  @hw: pointer to the HW structure
+ *
+ *  Releases the software control flag for performing PHY and select
+ *  MAC CSR accesses.
+ **/
+static void e1000_release_swflag_ich8lan(struct e1000_hw *hw)
+{
+	u32 extcnf_ctrl;
+
+	extcnf_ctrl = er32(EXTCNF_CTRL);
+
+	if (extcnf_ctrl & E1000_EXTCNF_CTRL_SWFLAG) {
+		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_SWFLAG;
+		ew32(EXTCNF_CTRL, extcnf_ctrl);
+	} else {
+		e_dbg("Semaphore unexpectedly released by sw/fw/hw\n");
+	}
+
+	clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
+}
+
+/**
+ *  e1000_check_mng_mode_ich8lan - Checks management mode
+ *  @hw: pointer to the HW structure
+ *
+ *  This checks if the adapter has any manageability enabled.
+ *  This is a function pointer entry point only called by read/write
+ *  routines for the PHY and NVM parts.
+ **/
+static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw)
+{
+	u32 fwsm;
+
+	fwsm = er32(FWSM);
+	return (fwsm & E1000_ICH_FWSM_FW_VALID) &&
+		((fwsm & E1000_FWSM_MODE_MASK) ==
+		 (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT));
+}
+
+/**
+ *  e1000_check_mng_mode_pchlan - Checks management mode
+ *  @hw: pointer to the HW structure
+ *
+ *  This checks if the adapter has iAMT enabled.
+ *  This is a function pointer entry point only called by read/write
+ *  routines for the PHY and NVM parts.
+ **/
+static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw)
+{
+	u32 fwsm;
+
+	fwsm = er32(FWSM);
+	return (fwsm & E1000_ICH_FWSM_FW_VALID) &&
+	    (fwsm & (E1000_ICH_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT));
+}
+
+/**
+ *  e1000_rar_set_pch2lan - Set receive address register
+ *  @hw: pointer to the HW structure
+ *  @addr: pointer to the receive address
+ *  @index: receive address array register
+ *
+ *  Sets the receive address array register at index to the address passed
+ *  in by addr.  For 82579, RAR[0] is the base address register that is to
+ *  contain the MAC address but RAR[1-6] are reserved for manageability (ME).
+ *  Use SHRA[0-3] in place of those reserved for ME.
+ **/
+static int e1000_rar_set_pch2lan(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] |
+		   ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= E1000_RAH_AV;
+
+	if (index == 0) {
+		ew32(RAL(index), rar_low);
+		e1e_flush();
+		ew32(RAH(index), rar_high);
+		e1e_flush();
+		return 0;
+	}
+
+	/* RAR[1-6] are owned by manageability.  Skip those and program the
+	 * next address into the SHRA register array.
+	 */
+	if (index < (u32)(hw->mac.rar_entry_count)) {
+		s32 ret_val;
+
+		ret_val = e1000_acquire_swflag_ich8lan(hw);
+		if (ret_val)
+			goto out;
+
+		ew32(SHRAL(index - 1), rar_low);
+		e1e_flush();
+		ew32(SHRAH(index - 1), rar_high);
+		e1e_flush();
+
+		e1000_release_swflag_ich8lan(hw);
+
+		/* verify the register updates */
+		if ((er32(SHRAL(index - 1)) == rar_low) &&
+		    (er32(SHRAH(index - 1)) == rar_high))
+			return 0;
+
+		e_dbg("SHRA[%d] might be locked by ME - FWSM=0x%8.8x\n",
+		      (index - 1), er32(FWSM));
+	}
+
+out:
+	e_dbg("Failed to write receive address at index %d\n", index);
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_rar_get_count_pch_lpt - Get the number of available SHRA
+ *  @hw: pointer to the HW structure
+ *
+ *  Get the number of available receive registers that the Host can
+ *  program. SHRA[0-10] are the shared receive address registers
+ *  that are shared between the Host and manageability engine (ME).
+ *  ME can reserve any number of addresses and the host needs to be
+ *  able to tell how many available registers it has access to.
+ **/
+static u32 e1000_rar_get_count_pch_lpt(struct e1000_hw *hw)
+{
+	u32 wlock_mac;
+	u32 num_entries;
+
+	wlock_mac = er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK;
+	wlock_mac >>= E1000_FWSM_WLOCK_MAC_SHIFT;
+
+	switch (wlock_mac) {
+	case 0:
+		/* All SHRA[0..10] and RAR[0] available */
+		num_entries = hw->mac.rar_entry_count;
+		break;
+	case 1:
+		/* Only RAR[0] available */
+		num_entries = 1;
+		break;
+	default:
+		/* SHRA[0..(wlock_mac - 1)] available + RAR[0] */
+		num_entries = wlock_mac + 1;
+		break;
+	}
+
+	return num_entries;
+}
+
+/**
+ *  e1000_rar_set_pch_lpt - Set receive address registers
+ *  @hw: pointer to the HW structure
+ *  @addr: pointer to the receive address
+ *  @index: receive address array register
+ *
+ *  Sets the receive address register array at index to the address passed
+ *  in by addr. For LPT, RAR[0] is the base address register that is to
+ *  contain the MAC address. SHRA[0-10] are the shared receive address
+ *  registers that are shared between the Host and manageability engine (ME).
+ **/
+static int e1000_rar_set_pch_lpt(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+	u32 wlock_mac;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= E1000_RAH_AV;
+
+	if (index == 0) {
+		ew32(RAL(index), rar_low);
+		e1e_flush();
+		ew32(RAH(index), rar_high);
+		e1e_flush();
+		return 0;
+	}
+
+	/* The manageability engine (ME) can lock certain SHRAR registers that
+	 * it is using - those registers are unavailable for use.
+	 */
+	if (index < hw->mac.rar_entry_count) {
+		wlock_mac = er32(FWSM) & E1000_FWSM_WLOCK_MAC_MASK;
+		wlock_mac >>= E1000_FWSM_WLOCK_MAC_SHIFT;
+
+		/* Check if all SHRAR registers are locked */
+		if (wlock_mac == 1)
+			goto out;
+
+		if ((wlock_mac == 0) || (index <= wlock_mac)) {
+			s32 ret_val;
+
+			ret_val = e1000_acquire_swflag_ich8lan(hw);
+
+			if (ret_val)
+				goto out;
+
+			ew32(SHRAL_PCH_LPT(index - 1), rar_low);
+			e1e_flush();
+			ew32(SHRAH_PCH_LPT(index - 1), rar_high);
+			e1e_flush();
+
+			e1000_release_swflag_ich8lan(hw);
+
+			/* verify the register updates */
+			if ((er32(SHRAL_PCH_LPT(index - 1)) == rar_low) &&
+			    (er32(SHRAH_PCH_LPT(index - 1)) == rar_high))
+				return 0;
+		}
+	}
+
+out:
+	e_dbg("Failed to write receive address at index %d\n", index);
+	return -E1000_ERR_CONFIG;
+}
+
+/**
+ *  e1000_check_reset_block_ich8lan - Check if PHY reset is blocked
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks if firmware is blocking the reset of the PHY.
+ *  This is a function pointer entry point only called by
+ *  reset routines.
+ **/
+static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw)
+{
+	bool blocked = false;
+	int i = 0;
+
+	while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
+	       (i++ < 30))
+		usleep_range(10000, 11000);
+	return blocked ? E1000_BLK_PHY_RESET : 0;
+}
+
+/**
+ *  e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states
+ *  @hw: pointer to the HW structure
+ *
+ *  Assumes semaphore already acquired.
+ *
+ **/
+static s32 e1000_write_smbus_addr(struct e1000_hw *hw)
+{
+	u16 phy_data;
+	u32 strap = er32(STRAP);
+	u32 freq = (strap & E1000_STRAP_SMT_FREQ_MASK) >>
+	    E1000_STRAP_SMT_FREQ_SHIFT;
+	s32 ret_val;
+
+	strap &= E1000_STRAP_SMBUS_ADDRESS_MASK;
+
+	ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data &= ~HV_SMB_ADDR_MASK;
+	phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT);
+	phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID;
+
+	if (hw->phy.type == e1000_phy_i217) {
+		/* Restore SMBus frequency */
+		if (freq--) {
+			phy_data &= ~HV_SMB_ADDR_FREQ_MASK;
+			phy_data |= (freq & BIT(0)) <<
+			    HV_SMB_ADDR_FREQ_LOW_SHIFT;
+			phy_data |= (freq & BIT(1)) <<
+			    (HV_SMB_ADDR_FREQ_HIGH_SHIFT - 1);
+		} else {
+			e_dbg("Unsupported SMB frequency in PHY\n");
+		}
+	}
+
+	return e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data);
+}
+
+/**
+ *  e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration
+ *  @hw:   pointer to the HW structure
+ *
+ *  SW should configure the LCD from the NVM extended configuration region
+ *  as a workaround for certain parts.
+ **/
+static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask;
+	s32 ret_val = 0;
+	u16 word_addr, reg_data, reg_addr, phy_page = 0;
+
+	/* Initialize the PHY from the NVM on ICH platforms.  This
+	 * is needed due to an issue where the NVM configuration is
+	 * not properly autoloaded after power transitions.
+	 * Therefore, after each PHY reset, we will load the
+	 * configuration data out of the NVM manually.
+	 */
+	switch (hw->mac.type) {
+	case e1000_ich8lan:
+		if (phy->type != e1000_phy_igp_3)
+			return ret_val;
+
+		if ((hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) ||
+		    (hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_C)) {
+			sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG;
+			break;
+		}
+		fallthrough;
+	case e1000_pchlan:
+	case e1000_pch2lan:
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M;
+		break;
+	default:
+		return ret_val;
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	data = er32(FEXTNVM);
+	if (!(data & sw_cfg_mask))
+		goto release;
+
+	/* Make sure HW does not configure LCD from PHY
+	 * extended configuration before SW configuration
+	 */
+	data = er32(EXTCNF_CTRL);
+	if ((hw->mac.type < e1000_pch2lan) &&
+	    (data & E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE))
+		goto release;
+
+	cnf_size = er32(EXTCNF_SIZE);
+	cnf_size &= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK;
+	cnf_size >>= E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT;
+	if (!cnf_size)
+		goto release;
+
+	cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK;
+	cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT;
+
+	if (((hw->mac.type == e1000_pchlan) &&
+	     !(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE)) ||
+	    (hw->mac.type > e1000_pchlan)) {
+		/* HW configures the SMBus address and LEDs when the
+		 * OEM and LCD Write Enable bits are set in the NVM.
+		 * When both NVM bits are cleared, SW will configure
+		 * them instead.
+		 */
+		ret_val = e1000_write_smbus_addr(hw);
+		if (ret_val)
+			goto release;
+
+		data = er32(LEDCTL);
+		ret_val = e1000_write_phy_reg_hv_locked(hw, HV_LED_CONFIG,
+							(u16)data);
+		if (ret_val)
+			goto release;
+	}
+
+	/* Configure LCD from extended configuration region. */
+
+	/* cnf_base_addr is in DWORD */
+	word_addr = (u16)(cnf_base_addr << 1);
+
+	for (i = 0; i < cnf_size; i++) {
+		ret_val = e1000_read_nvm(hw, (word_addr + i * 2), 1, &reg_data);
+		if (ret_val)
+			goto release;
+
+		ret_val = e1000_read_nvm(hw, (word_addr + i * 2 + 1),
+					 1, &reg_addr);
+		if (ret_val)
+			goto release;
+
+		/* Save off the PHY page for future writes. */
+		if (reg_addr == IGP01E1000_PHY_PAGE_SELECT) {
+			phy_page = reg_data;
+			continue;
+		}
+
+		reg_addr &= PHY_REG_MASK;
+		reg_addr |= phy_page;
+
+		ret_val = e1e_wphy_locked(hw, (u32)reg_addr, reg_data);
+		if (ret_val)
+			goto release;
+	}
+
+release:
+	hw->phy.ops.release(hw);
+	return ret_val;
+}
+
+/**
+ *  e1000_k1_gig_workaround_hv - K1 Si workaround
+ *  @hw:   pointer to the HW structure
+ *  @link: link up bool flag
+ *
+ *  If K1 is enabled for 1Gbps, the MAC might stall when transitioning
+ *  from a lower speed.  This workaround disables K1 whenever link is at 1Gig
+ *  If link is down, the function will restore the default K1 setting located
+ *  in the NVM.
+ **/
+static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link)
+{
+	s32 ret_val = 0;
+	u16 status_reg = 0;
+	bool k1_enable = hw->dev_spec.ich8lan.nvm_k1_enabled;
+
+	if (hw->mac.type != e1000_pchlan)
+		return 0;
+
+	/* Wrap the whole flow with the sw flag */
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Disable K1 when link is 1Gbps, otherwise use the NVM setting */
+	if (link) {
+		if (hw->phy.type == e1000_phy_82578) {
+			ret_val = e1e_rphy_locked(hw, BM_CS_STATUS,
+						  &status_reg);
+			if (ret_val)
+				goto release;
+
+			status_reg &= (BM_CS_STATUS_LINK_UP |
+				       BM_CS_STATUS_RESOLVED |
+				       BM_CS_STATUS_SPEED_MASK);
+
+			if (status_reg == (BM_CS_STATUS_LINK_UP |
+					   BM_CS_STATUS_RESOLVED |
+					   BM_CS_STATUS_SPEED_1000))
+				k1_enable = false;
+		}
+
+		if (hw->phy.type == e1000_phy_82577) {
+			ret_val = e1e_rphy_locked(hw, HV_M_STATUS, &status_reg);
+			if (ret_val)
+				goto release;
+
+			status_reg &= (HV_M_STATUS_LINK_UP |
+				       HV_M_STATUS_AUTONEG_COMPLETE |
+				       HV_M_STATUS_SPEED_MASK);
+
+			if (status_reg == (HV_M_STATUS_LINK_UP |
+					   HV_M_STATUS_AUTONEG_COMPLETE |
+					   HV_M_STATUS_SPEED_1000))
+				k1_enable = false;
+		}
+
+		/* Link stall fix for link up */
+		ret_val = e1e_wphy_locked(hw, PHY_REG(770, 19), 0x0100);
+		if (ret_val)
+			goto release;
+
+	} else {
+		/* Link stall fix for link down */
+		ret_val = e1e_wphy_locked(hw, PHY_REG(770, 19), 0x4100);
+		if (ret_val)
+			goto release;
+	}
+
+	ret_val = e1000_configure_k1_ich8lan(hw, k1_enable);
+
+release:
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_configure_k1_ich8lan - Configure K1 power state
+ *  @hw: pointer to the HW structure
+ *  @k1_enable: K1 state to configure
+ *
+ *  Configure the K1 power state based on the provided parameter.
+ *  Assumes semaphore already acquired.
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ **/
+s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable)
+{
+	s32 ret_val;
+	u32 ctrl_reg = 0;
+	u32 ctrl_ext = 0;
+	u32 reg = 0;
+	u16 kmrn_reg = 0;
+
+	ret_val = e1000e_read_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG,
+					      &kmrn_reg);
+	if (ret_val)
+		return ret_val;
+
+	if (k1_enable)
+		kmrn_reg |= E1000_KMRNCTRLSTA_K1_ENABLE;
+	else
+		kmrn_reg &= ~E1000_KMRNCTRLSTA_K1_ENABLE;
+
+	ret_val = e1000e_write_kmrn_reg_locked(hw, E1000_KMRNCTRLSTA_K1_CONFIG,
+					       kmrn_reg);
+	if (ret_val)
+		return ret_val;
+
+	usleep_range(20, 40);
+	ctrl_ext = er32(CTRL_EXT);
+	ctrl_reg = er32(CTRL);
+
+	reg = ctrl_reg & ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+	reg |= E1000_CTRL_FRCSPD;
+	ew32(CTRL, reg);
+
+	ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_SPD_BYPS);
+	e1e_flush();
+	usleep_range(20, 40);
+	ew32(CTRL, ctrl_reg);
+	ew32(CTRL_EXT, ctrl_ext);
+	e1e_flush();
+	usleep_range(20, 40);
+
+	return 0;
+}
+
+/**
+ *  e1000_oem_bits_config_ich8lan - SW-based LCD Configuration
+ *  @hw:       pointer to the HW structure
+ *  @d0_state: boolean if entering d0 or d3 device state
+ *
+ *  SW will configure Gbe Disable and LPLU based on the NVM. The four bits are
+ *  collectively called OEM bits.  The OEM Write Enable bit and SW Config bit
+ *  in NVM determines whether HW should configure LPLU and Gbe Disable.
+ **/
+static s32 e1000_oem_bits_config_ich8lan(struct e1000_hw *hw, bool d0_state)
+{
+	s32 ret_val = 0;
+	u32 mac_reg;
+	u16 oem_reg;
+
+	if (hw->mac.type < e1000_pchlan)
+		return ret_val;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	if (hw->mac.type == e1000_pchlan) {
+		mac_reg = er32(EXTCNF_CTRL);
+		if (mac_reg & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE)
+			goto release;
+	}
+
+	mac_reg = er32(FEXTNVM);
+	if (!(mac_reg & E1000_FEXTNVM_SW_CONFIG_ICH8M))
+		goto release;
+
+	mac_reg = er32(PHY_CTRL);
+
+	ret_val = e1e_rphy_locked(hw, HV_OEM_BITS, &oem_reg);
+	if (ret_val)
+		goto release;
+
+	oem_reg &= ~(HV_OEM_BITS_GBE_DIS | HV_OEM_BITS_LPLU);
+
+	if (d0_state) {
+		if (mac_reg & E1000_PHY_CTRL_GBE_DISABLE)
+			oem_reg |= HV_OEM_BITS_GBE_DIS;
+
+		if (mac_reg & E1000_PHY_CTRL_D0A_LPLU)
+			oem_reg |= HV_OEM_BITS_LPLU;
+	} else {
+		if (mac_reg & (E1000_PHY_CTRL_GBE_DISABLE |
+			       E1000_PHY_CTRL_NOND0A_GBE_DISABLE))
+			oem_reg |= HV_OEM_BITS_GBE_DIS;
+
+		if (mac_reg & (E1000_PHY_CTRL_D0A_LPLU |
+			       E1000_PHY_CTRL_NOND0A_LPLU))
+			oem_reg |= HV_OEM_BITS_LPLU;
+	}
+
+	/* Set Restart auto-neg to activate the bits */
+	if ((d0_state || (hw->mac.type != e1000_pchlan)) &&
+	    !hw->phy.ops.check_reset_block(hw))
+		oem_reg |= HV_OEM_BITS_RESTART_AN;
+
+	ret_val = e1e_wphy_locked(hw, HV_OEM_BITS, oem_reg);
+
+release:
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_set_mdio_slow_mode_hv - Set slow MDIO access mode
+ *  @hw:   pointer to the HW structure
+ **/
+static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 data;
+
+	ret_val = e1e_rphy(hw, HV_KMRN_MODE_CTRL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data |= HV_KMRN_MDIO_SLOW;
+
+	ret_val = e1e_wphy(hw, HV_KMRN_MODE_CTRL, data);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_hv_phy_workarounds_ich8lan - apply PHY workarounds
+ *  @hw: pointer to the HW structure
+ *
+ *  A series of PHY workarounds to be done after every PHY reset.
+ **/
+static s32 e1000_hv_phy_workarounds_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 phy_data;
+
+	if (hw->mac.type != e1000_pchlan)
+		return 0;
+
+	/* Set MDIO slow mode before any other MDIO access */
+	if (hw->phy.type == e1000_phy_82577) {
+		ret_val = e1000_set_mdio_slow_mode_hv(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (((hw->phy.type == e1000_phy_82577) &&
+	     ((hw->phy.revision == 1) || (hw->phy.revision == 2))) ||
+	    ((hw->phy.type == e1000_phy_82578) && (hw->phy.revision == 1))) {
+		/* Disable generation of early preamble */
+		ret_val = e1e_wphy(hw, PHY_REG(769, 25), 0x4431);
+		if (ret_val)
+			return ret_val;
+
+		/* Preamble tuning for SSC */
+		ret_val = e1e_wphy(hw, HV_KMRN_FIFO_CTRLSTA, 0xA204);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (hw->phy.type == e1000_phy_82578) {
+		/* Return registers to default by doing a soft reset then
+		 * writing 0x3140 to the control register.
+		 */
+		if (hw->phy.revision < 2) {
+			e1000e_phy_sw_reset(hw);
+			ret_val = e1e_wphy(hw, MII_BMCR, 0x3140);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+
+	/* Select page 0 */
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	hw->phy.addr = 1;
+	ret_val = e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, 0);
+	hw->phy.ops.release(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Configure the K1 Si workaround during phy reset assuming there is
+	 * link so that it disables K1 if link is in 1Gbps.
+	 */
+	ret_val = e1000_k1_gig_workaround_hv(hw, true);
+	if (ret_val)
+		return ret_val;
+
+	/* Workaround for link disconnects on a busy hub in half duplex */
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+	ret_val = e1e_rphy_locked(hw, BM_PORT_GEN_CFG, &phy_data);
+	if (ret_val)
+		goto release;
+	ret_val = e1e_wphy_locked(hw, BM_PORT_GEN_CFG, phy_data & 0x00FF);
+	if (ret_val)
+		goto release;
+
+	/* set MSE higher to enable link to stay up when noise is high */
+	ret_val = e1000_write_emi_reg_locked(hw, I82577_MSE_THRESHOLD, 0x0034);
+release:
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_copy_rx_addrs_to_phy_ich8lan - Copy Rx addresses from MAC to PHY
+ *  @hw:   pointer to the HW structure
+ **/
+void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw)
+{
+	u32 mac_reg;
+	u16 i, phy_reg = 0;
+	s32 ret_val;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return;
+	ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg);
+	if (ret_val)
+		goto release;
+
+	/* Copy both RAL/H (rar_entry_count) and SHRAL/H to PHY */
+	for (i = 0; i < (hw->mac.rar_entry_count); i++) {
+		mac_reg = er32(RAL(i));
+		hw->phy.ops.write_reg_page(hw, BM_RAR_L(i),
+					   (u16)(mac_reg & 0xFFFF));
+		hw->phy.ops.write_reg_page(hw, BM_RAR_M(i),
+					   (u16)((mac_reg >> 16) & 0xFFFF));
+
+		mac_reg = er32(RAH(i));
+		hw->phy.ops.write_reg_page(hw, BM_RAR_H(i),
+					   (u16)(mac_reg & 0xFFFF));
+		hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i),
+					   (u16)((mac_reg & E1000_RAH_AV)
+						 >> 16));
+	}
+
+	e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
+
+release:
+	hw->phy.ops.release(hw);
+}
+
+/**
+ *  e1000_lv_jumbo_workaround_ich8lan - required for jumbo frame operation
+ *  with 82579 PHY
+ *  @hw: pointer to the HW structure
+ *  @enable: flag to enable/disable workaround when enabling/disabling jumbos
+ **/
+s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable)
+{
+	s32 ret_val = 0;
+	u16 phy_reg, data;
+	u32 mac_reg;
+	u16 i;
+
+	if (hw->mac.type < e1000_pch2lan)
+		return 0;
+
+	/* disable Rx path while enabling/disabling workaround */
+	e1e_rphy(hw, PHY_REG(769, 20), &phy_reg);
+	ret_val = e1e_wphy(hw, PHY_REG(769, 20), phy_reg | BIT(14));
+	if (ret_val)
+		return ret_val;
+
+	if (enable) {
+		/* Write Rx addresses (rar_entry_count for RAL/H, and
+		 * SHRAL/H) and initial CRC values to the MAC
+		 */
+		for (i = 0; i < hw->mac.rar_entry_count; i++) {
+			u8 mac_addr[ETH_ALEN] = { 0 };
+			u32 addr_high, addr_low;
+
+			addr_high = er32(RAH(i));
+			if (!(addr_high & E1000_RAH_AV))
+				continue;
+			addr_low = er32(RAL(i));
+			mac_addr[0] = (addr_low & 0xFF);
+			mac_addr[1] = ((addr_low >> 8) & 0xFF);
+			mac_addr[2] = ((addr_low >> 16) & 0xFF);
+			mac_addr[3] = ((addr_low >> 24) & 0xFF);
+			mac_addr[4] = (addr_high & 0xFF);
+			mac_addr[5] = ((addr_high >> 8) & 0xFF);
+
+			ew32(PCH_RAICC(i), ~ether_crc_le(ETH_ALEN, mac_addr));
+		}
+
+		/* Write Rx addresses to the PHY */
+		e1000_copy_rx_addrs_to_phy_ich8lan(hw);
+
+		/* Enable jumbo frame workaround in the MAC */
+		mac_reg = er32(FFLT_DBG);
+		mac_reg &= ~BIT(14);
+		mac_reg |= (7 << 15);
+		ew32(FFLT_DBG, mac_reg);
+
+		mac_reg = er32(RCTL);
+		mac_reg |= E1000_RCTL_SECRC;
+		ew32(RCTL, mac_reg);
+
+		ret_val = e1000e_read_kmrn_reg(hw,
+					       E1000_KMRNCTRLSTA_CTRL_OFFSET,
+					       &data);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000e_write_kmrn_reg(hw,
+						E1000_KMRNCTRLSTA_CTRL_OFFSET,
+						data | BIT(0));
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000e_read_kmrn_reg(hw,
+					       E1000_KMRNCTRLSTA_HD_CTRL,
+					       &data);
+		if (ret_val)
+			return ret_val;
+		data &= ~(0xF << 8);
+		data |= (0xB << 8);
+		ret_val = e1000e_write_kmrn_reg(hw,
+						E1000_KMRNCTRLSTA_HD_CTRL,
+						data);
+		if (ret_val)
+			return ret_val;
+
+		/* Enable jumbo frame workaround in the PHY */
+		e1e_rphy(hw, PHY_REG(769, 23), &data);
+		data &= ~(0x7F << 5);
+		data |= (0x37 << 5);
+		ret_val = e1e_wphy(hw, PHY_REG(769, 23), data);
+		if (ret_val)
+			return ret_val;
+		e1e_rphy(hw, PHY_REG(769, 16), &data);
+		data &= ~BIT(13);
+		ret_val = e1e_wphy(hw, PHY_REG(769, 16), data);
+		if (ret_val)
+			return ret_val;
+		e1e_rphy(hw, PHY_REG(776, 20), &data);
+		data &= ~(0x3FF << 2);
+		data |= (E1000_TX_PTR_GAP << 2);
+		ret_val = e1e_wphy(hw, PHY_REG(776, 20), data);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0xF100);
+		if (ret_val)
+			return ret_val;
+		e1e_rphy(hw, HV_PM_CTRL, &data);
+		ret_val = e1e_wphy(hw, HV_PM_CTRL, data | BIT(10));
+		if (ret_val)
+			return ret_val;
+	} else {
+		/* Write MAC register values back to h/w defaults */
+		mac_reg = er32(FFLT_DBG);
+		mac_reg &= ~(0xF << 14);
+		ew32(FFLT_DBG, mac_reg);
+
+		mac_reg = er32(RCTL);
+		mac_reg &= ~E1000_RCTL_SECRC;
+		ew32(RCTL, mac_reg);
+
+		ret_val = e1000e_read_kmrn_reg(hw,
+					       E1000_KMRNCTRLSTA_CTRL_OFFSET,
+					       &data);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000e_write_kmrn_reg(hw,
+						E1000_KMRNCTRLSTA_CTRL_OFFSET,
+						data & ~BIT(0));
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000e_read_kmrn_reg(hw,
+					       E1000_KMRNCTRLSTA_HD_CTRL,
+					       &data);
+		if (ret_val)
+			return ret_val;
+		data &= ~(0xF << 8);
+		data |= (0xB << 8);
+		ret_val = e1000e_write_kmrn_reg(hw,
+						E1000_KMRNCTRLSTA_HD_CTRL,
+						data);
+		if (ret_val)
+			return ret_val;
+
+		/* Write PHY register values back to h/w defaults */
+		e1e_rphy(hw, PHY_REG(769, 23), &data);
+		data &= ~(0x7F << 5);
+		ret_val = e1e_wphy(hw, PHY_REG(769, 23), data);
+		if (ret_val)
+			return ret_val;
+		e1e_rphy(hw, PHY_REG(769, 16), &data);
+		data |= BIT(13);
+		ret_val = e1e_wphy(hw, PHY_REG(769, 16), data);
+		if (ret_val)
+			return ret_val;
+		e1e_rphy(hw, PHY_REG(776, 20), &data);
+		data &= ~(0x3FF << 2);
+		data |= (0x8 << 2);
+		ret_val = e1e_wphy(hw, PHY_REG(776, 20), data);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0x7E00);
+		if (ret_val)
+			return ret_val;
+		e1e_rphy(hw, HV_PM_CTRL, &data);
+		ret_val = e1e_wphy(hw, HV_PM_CTRL, data & ~BIT(10));
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* re-enable Rx path after enabling/disabling workaround */
+	return e1e_wphy(hw, PHY_REG(769, 20), phy_reg & ~BIT(14));
+}
+
+/**
+ *  e1000_lv_phy_workarounds_ich8lan - apply ich8 specific workarounds
+ *  @hw: pointer to the HW structure
+ *
+ *  A series of PHY workarounds to be done after every PHY reset.
+ **/
+static s32 e1000_lv_phy_workarounds_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+
+	if (hw->mac.type != e1000_pch2lan)
+		return 0;
+
+	/* Set MDIO slow mode before any other MDIO access */
+	ret_val = e1000_set_mdio_slow_mode_hv(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+	/* set MSE higher to enable link to stay up when noise is high */
+	ret_val = e1000_write_emi_reg_locked(hw, I82579_MSE_THRESHOLD, 0x0034);
+	if (ret_val)
+		goto release;
+	/* drop link after 5 times MSE threshold was reached */
+	ret_val = e1000_write_emi_reg_locked(hw, I82579_MSE_LINK_DOWN, 0x0005);
+release:
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_k1_workaround_lv - K1 Si workaround
+ *  @hw:   pointer to the HW structure
+ *
+ *  Workaround to set the K1 beacon duration for 82579 parts in 10Mbps
+ *  Disable K1 in 1000Mbps and 100Mbps
+ **/
+static s32 e1000_k1_workaround_lv(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 status_reg = 0;
+
+	if (hw->mac.type != e1000_pch2lan)
+		return 0;
+
+	/* Set K1 beacon duration based on 10Mbs speed */
+	ret_val = e1e_rphy(hw, HV_M_STATUS, &status_reg);
+	if (ret_val)
+		return ret_val;
+
+	if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE))
+	    == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) {
+		if (status_reg &
+		    (HV_M_STATUS_SPEED_1000 | HV_M_STATUS_SPEED_100)) {
+			u16 pm_phy_reg;
+
+			/* LV 1G/100 Packet drop issue wa  */
+			ret_val = e1e_rphy(hw, HV_PM_CTRL, &pm_phy_reg);
+			if (ret_val)
+				return ret_val;
+			pm_phy_reg &= ~HV_PM_CTRL_K1_ENABLE;
+			ret_val = e1e_wphy(hw, HV_PM_CTRL, pm_phy_reg);
+			if (ret_val)
+				return ret_val;
+		} else {
+			u32 mac_reg;
+
+			mac_reg = er32(FEXTNVM4);
+			mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK;
+			mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC;
+			ew32(FEXTNVM4, mac_reg);
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware
+ *  @hw:   pointer to the HW structure
+ *  @gate: boolean set to true to gate, false to ungate
+ *
+ *  Gate/ungate the automatic PHY configuration via hardware; perform
+ *  the configuration via software instead.
+ **/
+static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate)
+{
+	u32 extcnf_ctrl;
+
+	if (hw->mac.type < e1000_pch2lan)
+		return;
+
+	extcnf_ctrl = er32(EXTCNF_CTRL);
+
+	if (gate)
+		extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+	else
+		extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+
+	ew32(EXTCNF_CTRL, extcnf_ctrl);
+}
+
+/**
+ *  e1000_lan_init_done_ich8lan - Check for PHY config completion
+ *  @hw: pointer to the HW structure
+ *
+ *  Check the appropriate indication the MAC has finished configuring the
+ *  PHY after a software reset.
+ **/
+static void e1000_lan_init_done_ich8lan(struct e1000_hw *hw)
+{
+	u32 data, loop = E1000_ICH8_LAN_INIT_TIMEOUT;
+
+	/* Wait for basic configuration completes before proceeding */
+	do {
+		data = er32(STATUS);
+		data &= E1000_STATUS_LAN_INIT_DONE;
+		usleep_range(100, 200);
+	} while ((!data) && --loop);
+
+	/* If basic configuration is incomplete before the above loop
+	 * count reaches 0, loading the configuration from NVM will
+	 * leave the PHY in a bad state possibly resulting in no link.
+	 */
+	if (loop == 0)
+		e_dbg("LAN_INIT_DONE not set, increase timeout\n");
+
+	/* Clear the Init Done bit for the next init event */
+	data = er32(STATUS);
+	data &= ~E1000_STATUS_LAN_INIT_DONE;
+	ew32(STATUS, data);
+}
+
+/**
+ *  e1000_post_phy_reset_ich8lan - Perform steps required after a PHY reset
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 reg;
+
+	if (hw->phy.ops.check_reset_block(hw))
+		return 0;
+
+	/* Allow time for h/w to get to quiescent state after reset */
+	usleep_range(10000, 11000);
+
+	/* Perform any necessary post-reset workarounds */
+	switch (hw->mac.type) {
+	case e1000_pchlan:
+		ret_val = e1000_hv_phy_workarounds_ich8lan(hw);
+		if (ret_val)
+			return ret_val;
+		break;
+	case e1000_pch2lan:
+		ret_val = e1000_lv_phy_workarounds_ich8lan(hw);
+		if (ret_val)
+			return ret_val;
+		break;
+	default:
+		break;
+	}
+
+	/* Clear the host wakeup bit after lcd reset */
+	if (hw->mac.type >= e1000_pchlan) {
+		e1e_rphy(hw, BM_PORT_GEN_CFG, &reg);
+		reg &= ~BM_WUC_HOST_WU_BIT;
+		e1e_wphy(hw, BM_PORT_GEN_CFG, reg);
+	}
+
+	/* Configure the LCD with the extended configuration region in NVM */
+	ret_val = e1000_sw_lcd_config_ich8lan(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Configure the LCD with the OEM bits in NVM */
+	ret_val = e1000_oem_bits_config_ich8lan(hw, true);
+
+	if (hw->mac.type == e1000_pch2lan) {
+		/* Ungate automatic PHY configuration on non-managed 82579 */
+		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+			usleep_range(10000, 11000);
+			e1000_gate_hw_phy_config_ich8lan(hw, false);
+		}
+
+		/* Set EEE LPI Update Timer to 200usec */
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1000_write_emi_reg_locked(hw,
+						     I82579_LPI_UPDATE_TIMER,
+						     0x1387);
+		hw->phy.ops.release(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_hw_reset_ich8lan - Performs a PHY reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Resets the PHY
+ *  This is a function pointer entry point called by drivers
+ *  or other shared routines.
+ **/
+static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+
+	/* Gate automatic PHY configuration by hardware on non-managed 82579 */
+	if ((hw->mac.type == e1000_pch2lan) &&
+	    !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+		e1000_gate_hw_phy_config_ich8lan(hw, true);
+
+	ret_val = e1000e_phy_hw_reset_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	return e1000_post_phy_reset_ich8lan(hw);
+}
+
+/**
+ *  e1000_set_lplu_state_pchlan - Set Low Power Link Up state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU state according to the active flag.  For PCH, if OEM write
+ *  bit are disabled in the NVM, writing the LPLU bits in the MAC will not set
+ *  the phy speed. This function will manually set the LPLU bit and restart
+ *  auto-neg as hw would do. D3 and D0 LPLU will call the same function
+ *  since it configures the same bit.
+ **/
+static s32 e1000_set_lplu_state_pchlan(struct e1000_hw *hw, bool active)
+{
+	s32 ret_val;
+	u16 oem_reg;
+
+	ret_val = e1e_rphy(hw, HV_OEM_BITS, &oem_reg);
+	if (ret_val)
+		return ret_val;
+
+	if (active)
+		oem_reg |= HV_OEM_BITS_LPLU;
+	else
+		oem_reg &= ~HV_OEM_BITS_LPLU;
+
+	if (!hw->phy.ops.check_reset_block(hw))
+		oem_reg |= HV_OEM_BITS_RESTART_AN;
+
+	return e1e_wphy(hw, HV_OEM_BITS, oem_reg);
+}
+
+/**
+ *  e1000_set_d0_lplu_state_ich8lan - Set Low Power Linkup D0 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D0 state according to the active flag.  When
+ *  activating LPLU this function also disables smart speed
+ *  and vice versa.  LPLU will not be activated unless the
+ *  device autonegotiation advertisement meets standards of
+ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 phy_ctrl;
+	s32 ret_val = 0;
+	u16 data;
+
+	if (phy->type == e1000_phy_ife)
+		return 0;
+
+	phy_ctrl = er32(PHY_CTRL);
+
+	if (active) {
+		phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU;
+		ew32(PHY_CTRL, phy_ctrl);
+
+		if (phy->type != e1000_phy_igp_3)
+			return 0;
+
+		/* Call gig speed drop workaround on LPLU before accessing
+		 * any PHY registers
+		 */
+		if (hw->mac.type == e1000_ich8lan)
+			e1000e_gig_downshift_workaround_ich8lan(hw);
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
+		if (ret_val)
+			return ret_val;
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
+		if (ret_val)
+			return ret_val;
+	} else {
+		phy_ctrl &= ~E1000_PHY_CTRL_D0A_LPLU;
+		ew32(PHY_CTRL, phy_ctrl);
+
+		if (phy->type != e1000_phy_igp_3)
+			return 0;
+
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_set_d3_lplu_state_ich8lan - Set Low Power Linkup D3 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D3 state according to the active flag.  When
+ *  activating LPLU this function also disables smart speed
+ *  and vice versa.  LPLU will not be activated unless the
+ *  device autonegotiation advertisement meets standards of
+ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+static s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 phy_ctrl;
+	s32 ret_val = 0;
+	u16 data;
+
+	phy_ctrl = er32(PHY_CTRL);
+
+	if (!active) {
+		phy_ctrl &= ~E1000_PHY_CTRL_NOND0A_LPLU;
+		ew32(PHY_CTRL, phy_ctrl);
+
+		if (phy->type != e1000_phy_igp_3)
+			return 0;
+
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		}
+	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+		phy_ctrl |= E1000_PHY_CTRL_NOND0A_LPLU;
+		ew32(PHY_CTRL, phy_ctrl);
+
+		if (phy->type != e1000_phy_igp_3)
+			return 0;
+
+		/* Call gig speed drop workaround on LPLU before accessing
+		 * any PHY registers
+		 */
+		if (hw->mac.type == e1000_ich8lan)
+			e1000e_gig_downshift_workaround_ich8lan(hw);
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
+		if (ret_val)
+			return ret_val;
+
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_valid_nvm_bank_detect_ich8lan - finds out the valid bank 0 or 1
+ *  @hw: pointer to the HW structure
+ *  @bank:  pointer to the variable that returns the active bank
+ *
+ *  Reads signature byte from the NVM using the flash access registers.
+ *  Word 0x13 bits 15:14 = 10b indicate a valid signature for that bank.
+ **/
+static s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank)
+{
+	u32 eecd;
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 bank1_offset = nvm->flash_bank_size * sizeof(u16);
+	u32 act_offset = E1000_ICH_NVM_SIG_WORD * 2 + 1;
+	u32 nvm_dword = 0;
+	u8 sig_byte = 0;
+	s32 ret_val;
+
+	switch (hw->mac.type) {
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		bank1_offset = nvm->flash_bank_size;
+		act_offset = E1000_ICH_NVM_SIG_WORD;
+
+		/* set bank to 0 in case flash read fails */
+		*bank = 0;
+
+		/* Check bank 0 */
+		ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset,
+							 &nvm_dword);
+		if (ret_val)
+			return ret_val;
+		sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+		    E1000_ICH_NVM_SIG_VALUE) {
+			*bank = 0;
+			return 0;
+		}
+
+		/* Check bank 1 */
+		ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset +
+							 bank1_offset,
+							 &nvm_dword);
+		if (ret_val)
+			return ret_val;
+		sig_byte = (u8)((nvm_dword & 0xFF00) >> 8);
+		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+		    E1000_ICH_NVM_SIG_VALUE) {
+			*bank = 1;
+			return 0;
+		}
+
+		e_dbg("ERROR: No valid NVM bank present\n");
+		return -E1000_ERR_NVM;
+	case e1000_ich8lan:
+	case e1000_ich9lan:
+		eecd = er32(EECD);
+		if ((eecd & E1000_EECD_SEC1VAL_VALID_MASK) ==
+		    E1000_EECD_SEC1VAL_VALID_MASK) {
+			if (eecd & E1000_EECD_SEC1VAL)
+				*bank = 1;
+			else
+				*bank = 0;
+
+			return 0;
+		}
+		e_dbg("Unable to determine valid NVM bank via EEC - reading flash signature\n");
+		fallthrough;
+	default:
+		/* set bank to 0 in case flash read fails */
+		*bank = 0;
+
+		/* Check bank 0 */
+		ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset,
+							&sig_byte);
+		if (ret_val)
+			return ret_val;
+		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+		    E1000_ICH_NVM_SIG_VALUE) {
+			*bank = 0;
+			return 0;
+		}
+
+		/* Check bank 1 */
+		ret_val = e1000_read_flash_byte_ich8lan(hw, act_offset +
+							bank1_offset,
+							&sig_byte);
+		if (ret_val)
+			return ret_val;
+		if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) ==
+		    E1000_ICH_NVM_SIG_VALUE) {
+			*bank = 1;
+			return 0;
+		}
+
+		e_dbg("ERROR: No valid NVM bank present\n");
+		return -E1000_ERR_NVM;
+	}
+}
+
+/**
+ *  e1000_read_nvm_spt - NVM access for SPT
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the word(s) to read.
+ *  @words: Size of data to read in words.
+ *  @data: pointer to the word(s) to read at offset.
+ *
+ *  Reads a word(s) from the NVM
+ **/
+static s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words,
+			      u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u32 act_offset;
+	s32 ret_val = 0;
+	u32 bank = 0;
+	u32 dword = 0;
+	u16 offset_to_read;
+	u16 i;
+
+	if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
+	    (words == 0)) {
+		e_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+	nvm->ops.acquire(hw);
+
+	ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+	if (ret_val) {
+		e_dbg("Could not detect valid bank, assuming bank 0\n");
+		bank = 0;
+	}
+
+	act_offset = (bank) ? nvm->flash_bank_size : 0;
+	act_offset += offset;
+
+	ret_val = 0;
+
+	for (i = 0; i < words; i += 2) {
+		if (words - i == 1) {
+			if (dev_spec->shadow_ram[offset + i].modified) {
+				data[i] =
+				    dev_spec->shadow_ram[offset + i].value;
+			} else {
+				offset_to_read = act_offset + i -
+				    ((act_offset + i) % 2);
+				ret_val =
+				  e1000_read_flash_dword_ich8lan(hw,
+								 offset_to_read,
+								 &dword);
+				if (ret_val)
+					break;
+				if ((act_offset + i) % 2 == 0)
+					data[i] = (u16)(dword & 0xFFFF);
+				else
+					data[i] = (u16)((dword >> 16) & 0xFFFF);
+			}
+		} else {
+			offset_to_read = act_offset + i;
+			if (!(dev_spec->shadow_ram[offset + i].modified) ||
+			    !(dev_spec->shadow_ram[offset + i + 1].modified)) {
+				ret_val =
+				  e1000_read_flash_dword_ich8lan(hw,
+								 offset_to_read,
+								 &dword);
+				if (ret_val)
+					break;
+			}
+			if (dev_spec->shadow_ram[offset + i].modified)
+				data[i] =
+				    dev_spec->shadow_ram[offset + i].value;
+			else
+				data[i] = (u16)(dword & 0xFFFF);
+			if (dev_spec->shadow_ram[offset + i].modified)
+				data[i + 1] =
+				    dev_spec->shadow_ram[offset + i + 1].value;
+			else
+				data[i + 1] = (u16)(dword >> 16 & 0xFFFF);
+		}
+	}
+
+	nvm->ops.release(hw);
+
+out:
+	if (ret_val)
+		e_dbg("NVM read error: %d\n", ret_val);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_nvm_ich8lan - Read word(s) from the NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the word(s) to read.
+ *  @words: Size of data to read in words
+ *  @data: Pointer to the word(s) to read at offset.
+ *
+ *  Reads a word(s) from the NVM using the flash access registers.
+ **/
+static s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
+				  u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u32 act_offset;
+	s32 ret_val = 0;
+	u32 bank = 0;
+	u16 i, word;
+
+	if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
+	    (words == 0)) {
+		e_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+	nvm->ops.acquire(hw);
+
+	ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+	if (ret_val) {
+		e_dbg("Could not detect valid bank, assuming bank 0\n");
+		bank = 0;
+	}
+
+	act_offset = (bank) ? nvm->flash_bank_size : 0;
+	act_offset += offset;
+
+	ret_val = 0;
+	for (i = 0; i < words; i++) {
+		if (dev_spec->shadow_ram[offset + i].modified) {
+			data[i] = dev_spec->shadow_ram[offset + i].value;
+		} else {
+			ret_val = e1000_read_flash_word_ich8lan(hw,
+								act_offset + i,
+								&word);
+			if (ret_val)
+				break;
+			data[i] = word;
+		}
+	}
+
+	nvm->ops.release(hw);
+
+out:
+	if (ret_val)
+		e_dbg("NVM read error: %d\n", ret_val);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_flash_cycle_init_ich8lan - Initialize flash
+ *  @hw: pointer to the HW structure
+ *
+ *  This function does initial flash setup so that a new read/write/erase cycle
+ *  can be started.
+ **/
+static s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw)
+{
+	union ich8_hws_flash_status hsfsts;
+	s32 ret_val = -E1000_ERR_NVM;
+
+	hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+
+	/* Check if the flash descriptor is valid */
+	if (!hsfsts.hsf_status.fldesvalid) {
+		e_dbg("Flash descriptor invalid.  SW Sequencing must be used.\n");
+		return -E1000_ERR_NVM;
+	}
+
+	/* Clear FCERR and DAEL in hw status by writing 1 */
+	hsfsts.hsf_status.flcerr = 1;
+	hsfsts.hsf_status.dael = 1;
+	if (hw->mac.type >= e1000_pch_spt)
+		ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF);
+	else
+		ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
+
+	/* Either we should have a hardware SPI cycle in progress
+	 * bit to check against, in order to start a new cycle or
+	 * FDONE bit should be changed in the hardware so that it
+	 * is 1 after hardware reset, which can then be used as an
+	 * indication whether a cycle is in progress or has been
+	 * completed.
+	 */
+
+	if (!hsfsts.hsf_status.flcinprog) {
+		/* There is no cycle running at present,
+		 * so we can start a cycle.
+		 * Begin by setting Flash Cycle Done.
+		 */
+		hsfsts.hsf_status.flcdone = 1;
+		if (hw->mac.type >= e1000_pch_spt)
+			ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval & 0xFFFF);
+		else
+			ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
+		ret_val = 0;
+	} else {
+		s32 i;
+
+		/* Otherwise poll for sometime so the current
+		 * cycle has a chance to end before giving up.
+		 */
+		for (i = 0; i < ICH_FLASH_READ_COMMAND_TIMEOUT; i++) {
+			hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+			if (!hsfsts.hsf_status.flcinprog) {
+				ret_val = 0;
+				break;
+			}
+			udelay(1);
+		}
+		if (!ret_val) {
+			/* Successful in waiting for previous cycle to timeout,
+			 * now set the Flash Cycle Done.
+			 */
+			hsfsts.hsf_status.flcdone = 1;
+			if (hw->mac.type >= e1000_pch_spt)
+				ew32flash(ICH_FLASH_HSFSTS,
+					  hsfsts.regval & 0xFFFF);
+			else
+				ew16flash(ICH_FLASH_HSFSTS, hsfsts.regval);
+		} else {
+			e_dbg("Flash controller busy, cannot get access\n");
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_flash_cycle_ich8lan - Starts flash cycle (read/write/erase)
+ *  @hw: pointer to the HW structure
+ *  @timeout: maximum time to wait for completion
+ *
+ *  This function starts a flash cycle and waits for its completion.
+ **/
+static s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout)
+{
+	union ich8_hws_flash_ctrl hsflctl;
+	union ich8_hws_flash_status hsfsts;
+	u32 i = 0;
+
+	/* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */
+	if (hw->mac.type >= e1000_pch_spt)
+		hsflctl.regval = er32flash(ICH_FLASH_HSFSTS) >> 16;
+	else
+		hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
+	hsflctl.hsf_ctrl.flcgo = 1;
+
+	if (hw->mac.type >= e1000_pch_spt)
+		ew32flash(ICH_FLASH_HSFSTS, hsflctl.regval << 16);
+	else
+		ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
+
+	/* wait till FDONE bit is set to 1 */
+	do {
+		hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+		if (hsfsts.hsf_status.flcdone)
+			break;
+		udelay(1);
+	} while (i++ < timeout);
+
+	if (hsfsts.hsf_status.flcdone && !hsfsts.hsf_status.flcerr)
+		return 0;
+
+	return -E1000_ERR_NVM;
+}
+
+/**
+ *  e1000_read_flash_dword_ich8lan - Read dword from flash
+ *  @hw: pointer to the HW structure
+ *  @offset: offset to data location
+ *  @data: pointer to the location for storing the data
+ *
+ *  Reads the flash dword at offset into data.  Offset is converted
+ *  to bytes before read.
+ **/
+static s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset,
+					  u32 *data)
+{
+	/* Must convert word offset into bytes. */
+	offset <<= 1;
+	return e1000_read_flash_data32_ich8lan(hw, offset, data);
+}
+
+/**
+ *  e1000_read_flash_word_ich8lan - Read word from flash
+ *  @hw: pointer to the HW structure
+ *  @offset: offset to data location
+ *  @data: pointer to the location for storing the data
+ *
+ *  Reads the flash word at offset into data.  Offset is converted
+ *  to bytes before read.
+ **/
+static s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset,
+					 u16 *data)
+{
+	/* Must convert offset into bytes. */
+	offset <<= 1;
+
+	return e1000_read_flash_data_ich8lan(hw, offset, 2, data);
+}
+
+/**
+ *  e1000_read_flash_byte_ich8lan - Read byte from flash
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset of the byte to read.
+ *  @data: Pointer to a byte to store the value read.
+ *
+ *  Reads a single byte from the NVM using the flash access registers.
+ **/
+static s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
+					 u8 *data)
+{
+	s32 ret_val;
+	u16 word = 0;
+
+	/* In SPT, only 32 bits access is supported,
+	 * so this function should not be called.
+	 */
+	if (hw->mac.type >= e1000_pch_spt)
+		return -E1000_ERR_NVM;
+	else
+		ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word);
+
+	if (ret_val)
+		return ret_val;
+
+	*data = (u8)word;
+
+	return 0;
+}
+
+/**
+ *  e1000_read_flash_data_ich8lan - Read byte or word from NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the byte or word to read.
+ *  @size: Size of data to read, 1=byte 2=word
+ *  @data: Pointer to the word to store the value read.
+ *
+ *  Reads a byte or word from the NVM using the flash access registers.
+ **/
+static s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
+					 u8 size, u16 *data)
+{
+	union ich8_hws_flash_status hsfsts;
+	union ich8_hws_flash_ctrl hsflctl;
+	u32 flash_linear_addr;
+	u32 flash_data = 0;
+	s32 ret_val = -E1000_ERR_NVM;
+	u8 count = 0;
+
+	if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
+		return -E1000_ERR_NVM;
+
+	flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+			     hw->nvm.flash_base_addr);
+
+	do {
+		udelay(1);
+		/* Steps */
+		ret_val = e1000_flash_cycle_init_ich8lan(hw);
+		if (ret_val)
+			break;
+
+		hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
+		/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
+		hsflctl.hsf_ctrl.fldbcount = size - 1;
+		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ;
+		ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
+
+		ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
+
+		ret_val =
+		    e1000_flash_cycle_ich8lan(hw,
+					      ICH_FLASH_READ_COMMAND_TIMEOUT);
+
+		/* Check if FCERR is set to 1, if set to 1, clear it
+		 * and try the whole sequence a few more times, else
+		 * read in (shift in) the Flash Data0, the order is
+		 * least significant byte first msb to lsb
+		 */
+		if (!ret_val) {
+			flash_data = er32flash(ICH_FLASH_FDATA0);
+			if (size == 1)
+				*data = (u8)(flash_data & 0x000000FF);
+			else if (size == 2)
+				*data = (u16)(flash_data & 0x0000FFFF);
+			break;
+		} else {
+			/* If we've gotten here, then things are probably
+			 * completely hosed, but if the error condition is
+			 * detected, it won't hurt to give it another try...
+			 * ICH_FLASH_CYCLE_REPEAT_COUNT times.
+			 */
+			hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+			if (hsfsts.hsf_status.flcerr) {
+				/* Repeat for some time before giving up. */
+				continue;
+			} else if (!hsfsts.hsf_status.flcdone) {
+				e_dbg("Timeout error - flash cycle did not complete.\n");
+				break;
+			}
+		}
+	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_flash_data32_ich8lan - Read dword from NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the dword to read.
+ *  @data: Pointer to the dword to store the value read.
+ *
+ *  Reads a byte or word from the NVM using the flash access registers.
+ **/
+
+static s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+					   u32 *data)
+{
+	union ich8_hws_flash_status hsfsts;
+	union ich8_hws_flash_ctrl hsflctl;
+	u32 flash_linear_addr;
+	s32 ret_val = -E1000_ERR_NVM;
+	u8 count = 0;
+
+	if (offset > ICH_FLASH_LINEAR_ADDR_MASK || hw->mac.type < e1000_pch_spt)
+		return -E1000_ERR_NVM;
+	flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+			     hw->nvm.flash_base_addr);
+
+	do {
+		udelay(1);
+		/* Steps */
+		ret_val = e1000_flash_cycle_init_ich8lan(hw);
+		if (ret_val)
+			break;
+		/* In SPT, This register is in Lan memory space, not flash.
+		 * Therefore, only 32 bit access is supported
+		 */
+		hsflctl.regval = er32flash(ICH_FLASH_HSFSTS) >> 16;
+
+		/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
+		hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1;
+		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ;
+		/* In SPT, This register is in Lan memory space, not flash.
+		 * Therefore, only 32 bit access is supported
+		 */
+		ew32flash(ICH_FLASH_HSFSTS, (u32)hsflctl.regval << 16);
+		ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
+
+		ret_val =
+		   e1000_flash_cycle_ich8lan(hw,
+					     ICH_FLASH_READ_COMMAND_TIMEOUT);
+
+		/* Check if FCERR is set to 1, if set to 1, clear it
+		 * and try the whole sequence a few more times, else
+		 * read in (shift in) the Flash Data0, the order is
+		 * least significant byte first msb to lsb
+		 */
+		if (!ret_val) {
+			*data = er32flash(ICH_FLASH_FDATA0);
+			break;
+		} else {
+			/* If we've gotten here, then things are probably
+			 * completely hosed, but if the error condition is
+			 * detected, it won't hurt to give it another try...
+			 * ICH_FLASH_CYCLE_REPEAT_COUNT times.
+			 */
+			hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+			if (hsfsts.hsf_status.flcerr) {
+				/* Repeat for some time before giving up. */
+				continue;
+			} else if (!hsfsts.hsf_status.flcdone) {
+				e_dbg("Timeout error - flash cycle did not complete.\n");
+				break;
+			}
+		}
+	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_nvm_ich8lan - Write word(s) to the NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the word(s) to write.
+ *  @words: Size of data to write in words
+ *  @data: Pointer to the word(s) to write at offset.
+ *
+ *  Writes a byte or word to the NVM using the flash access registers.
+ **/
+static s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words,
+				   u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u16 i;
+
+	if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) ||
+	    (words == 0)) {
+		e_dbg("nvm parameter(s) out of bounds\n");
+		return -E1000_ERR_NVM;
+	}
+
+	nvm->ops.acquire(hw);
+
+	for (i = 0; i < words; i++) {
+		dev_spec->shadow_ram[offset + i].modified = true;
+		dev_spec->shadow_ram[offset + i].value = data[i];
+	}
+
+	nvm->ops.release(hw);
+
+	return 0;
+}
+
+/**
+ *  e1000_update_nvm_checksum_spt - Update the checksum for NVM
+ *  @hw: pointer to the HW structure
+ *
+ *  The NVM checksum is updated by calling the generic update_nvm_checksum,
+ *  which writes the checksum to the shadow ram.  The changes in the shadow
+ *  ram are then committed to the EEPROM by processing each bank at a time
+ *  checking for the modified bit and writing only the pending changes.
+ *  After a successful commit, the shadow ram is cleared and is ready for
+ *  future writes.
+ **/
+static s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u32 i, act_offset, new_bank_offset, old_bank_offset, bank;
+	s32 ret_val;
+	u32 dword = 0;
+
+	ret_val = e1000e_update_nvm_checksum_generic(hw);
+	if (ret_val)
+		goto out;
+
+	if (nvm->type != e1000_nvm_flash_sw)
+		goto out;
+
+	nvm->ops.acquire(hw);
+
+	/* We're writing to the opposite bank so if we're on bank 1,
+	 * write to bank 0 etc.  We also need to erase the segment that
+	 * is going to be written
+	 */
+	ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+	if (ret_val) {
+		e_dbg("Could not detect valid bank, assuming bank 0\n");
+		bank = 0;
+	}
+
+	if (bank == 0) {
+		new_bank_offset = nvm->flash_bank_size;
+		old_bank_offset = 0;
+		ret_val = e1000_erase_flash_bank_ich8lan(hw, 1);
+		if (ret_val)
+			goto release;
+	} else {
+		old_bank_offset = nvm->flash_bank_size;
+		new_bank_offset = 0;
+		ret_val = e1000_erase_flash_bank_ich8lan(hw, 0);
+		if (ret_val)
+			goto release;
+	}
+	for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i += 2) {
+		/* Determine whether to write the value stored
+		 * in the other NVM bank or a modified value stored
+		 * in the shadow RAM
+		 */
+		ret_val = e1000_read_flash_dword_ich8lan(hw,
+							 i + old_bank_offset,
+							 &dword);
+
+		if (dev_spec->shadow_ram[i].modified) {
+			dword &= 0xffff0000;
+			dword |= (dev_spec->shadow_ram[i].value & 0xffff);
+		}
+		if (dev_spec->shadow_ram[i + 1].modified) {
+			dword &= 0x0000ffff;
+			dword |= ((dev_spec->shadow_ram[i + 1].value & 0xffff)
+				  << 16);
+		}
+		if (ret_val)
+			break;
+
+		/* If the word is 0x13, then make sure the signature bits
+		 * (15:14) are 11b until the commit has completed.
+		 * This will allow us to write 10b which indicates the
+		 * signature is valid.  We want to do this after the write
+		 * has completed so that we don't mark the segment valid
+		 * while the write is still in progress
+		 */
+		if (i == E1000_ICH_NVM_SIG_WORD - 1)
+			dword |= E1000_ICH_NVM_SIG_MASK << 16;
+
+		/* Convert offset to bytes. */
+		act_offset = (i + new_bank_offset) << 1;
+
+		usleep_range(100, 200);
+
+		/* Write the data to the new bank. Offset in words */
+		act_offset = i + new_bank_offset;
+		ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset,
+								dword);
+		if (ret_val)
+			break;
+	}
+
+	/* Don't bother writing the segment valid bits if sector
+	 * programming failed.
+	 */
+	if (ret_val) {
+		/* Possibly read-only, see e1000e_write_protect_nvm_ich8lan() */
+		e_dbg("Flash commit failed.\n");
+		goto release;
+	}
+
+	/* Finally validate the new segment by setting bit 15:14
+	 * to 10b in word 0x13 , this can be done without an
+	 * erase as well since these bits are 11 to start with
+	 * and we need to change bit 14 to 0b
+	 */
+	act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD;
+
+	/*offset in words but we read dword */
+	--act_offset;
+	ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword);
+
+	if (ret_val)
+		goto release;
+
+	dword &= 0xBFFFFFFF;
+	ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword);
+
+	if (ret_val)
+		goto release;
+
+	/* offset in words but we read dword */
+	act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1;
+	ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword);
+
+	if (ret_val)
+		goto release;
+
+	dword &= 0x00FFFFFF;
+	ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword);
+
+	if (ret_val)
+		goto release;
+
+	/* Great!  Everything worked, we can now clear the cached entries. */
+	for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i++) {
+		dev_spec->shadow_ram[i].modified = false;
+		dev_spec->shadow_ram[i].value = 0xFFFF;
+	}
+
+release:
+	nvm->ops.release(hw);
+
+	/* Reload the EEPROM, or else modifications will not appear
+	 * until after the next adapter reset.
+	 */
+	if (!ret_val) {
+		nvm->ops.reload(hw);
+		usleep_range(10000, 11000);
+	}
+
+out:
+	if (ret_val)
+		e_dbg("NVM update error: %d\n", ret_val);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM
+ *  @hw: pointer to the HW structure
+ *
+ *  The NVM checksum is updated by calling the generic update_nvm_checksum,
+ *  which writes the checksum to the shadow ram.  The changes in the shadow
+ *  ram are then committed to the EEPROM by processing each bank at a time
+ *  checking for the modified bit and writing only the pending changes.
+ *  After a successful commit, the shadow ram is cleared and is ready for
+ *  future writes.
+ **/
+static s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u32 i, act_offset, new_bank_offset, old_bank_offset, bank;
+	s32 ret_val;
+	u16 data = 0;
+
+	ret_val = e1000e_update_nvm_checksum_generic(hw);
+	if (ret_val)
+		goto out;
+
+	if (nvm->type != e1000_nvm_flash_sw)
+		goto out;
+
+	nvm->ops.acquire(hw);
+
+	/* We're writing to the opposite bank so if we're on bank 1,
+	 * write to bank 0 etc.  We also need to erase the segment that
+	 * is going to be written
+	 */
+	ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank);
+	if (ret_val) {
+		e_dbg("Could not detect valid bank, assuming bank 0\n");
+		bank = 0;
+	}
+
+	if (bank == 0) {
+		new_bank_offset = nvm->flash_bank_size;
+		old_bank_offset = 0;
+		ret_val = e1000_erase_flash_bank_ich8lan(hw, 1);
+		if (ret_val)
+			goto release;
+	} else {
+		old_bank_offset = nvm->flash_bank_size;
+		new_bank_offset = 0;
+		ret_val = e1000_erase_flash_bank_ich8lan(hw, 0);
+		if (ret_val)
+			goto release;
+	}
+	for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i++) {
+		if (dev_spec->shadow_ram[i].modified) {
+			data = dev_spec->shadow_ram[i].value;
+		} else {
+			ret_val = e1000_read_flash_word_ich8lan(hw, i +
+								old_bank_offset,
+								&data);
+			if (ret_val)
+				break;
+		}
+
+		/* If the word is 0x13, then make sure the signature bits
+		 * (15:14) are 11b until the commit has completed.
+		 * This will allow us to write 10b which indicates the
+		 * signature is valid.  We want to do this after the write
+		 * has completed so that we don't mark the segment valid
+		 * while the write is still in progress
+		 */
+		if (i == E1000_ICH_NVM_SIG_WORD)
+			data |= E1000_ICH_NVM_SIG_MASK;
+
+		/* Convert offset to bytes. */
+		act_offset = (i + new_bank_offset) << 1;
+
+		usleep_range(100, 200);
+		/* Write the bytes to the new bank. */
+		ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
+							       act_offset,
+							       (u8)data);
+		if (ret_val)
+			break;
+
+		usleep_range(100, 200);
+		ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
+							       act_offset + 1,
+							       (u8)(data >> 8));
+		if (ret_val)
+			break;
+	}
+
+	/* Don't bother writing the segment valid bits if sector
+	 * programming failed.
+	 */
+	if (ret_val) {
+		/* Possibly read-only, see e1000e_write_protect_nvm_ich8lan() */
+		e_dbg("Flash commit failed.\n");
+		goto release;
+	}
+
+	/* Finally validate the new segment by setting bit 15:14
+	 * to 10b in word 0x13 , this can be done without an
+	 * erase as well since these bits are 11 to start with
+	 * and we need to change bit 14 to 0b
+	 */
+	act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD;
+	ret_val = e1000_read_flash_word_ich8lan(hw, act_offset, &data);
+	if (ret_val)
+		goto release;
+
+	data &= 0xBFFF;
+	ret_val = e1000_retry_write_flash_byte_ich8lan(hw,
+						       act_offset * 2 + 1,
+						       (u8)(data >> 8));
+	if (ret_val)
+		goto release;
+
+	/* And invalidate the previously valid segment by setting
+	 * its signature word (0x13) high_byte to 0b. This can be
+	 * done without an erase because flash erase sets all bits
+	 * to 1's. We can write 1's to 0's without an erase
+	 */
+	act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1;
+	ret_val = e1000_retry_write_flash_byte_ich8lan(hw, act_offset, 0);
+	if (ret_val)
+		goto release;
+
+	/* Great!  Everything worked, we can now clear the cached entries. */
+	for (i = 0; i < E1000_ICH8_SHADOW_RAM_WORDS; i++) {
+		dev_spec->shadow_ram[i].modified = false;
+		dev_spec->shadow_ram[i].value = 0xFFFF;
+	}
+
+release:
+	nvm->ops.release(hw);
+
+	/* Reload the EEPROM, or else modifications will not appear
+	 * until after the next adapter reset.
+	 */
+	if (!ret_val) {
+		nvm->ops.reload(hw);
+		usleep_range(10000, 11000);
+	}
+
+out:
+	if (ret_val)
+		e_dbg("NVM update error: %d\n", ret_val);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_validate_nvm_checksum_ich8lan - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Check to see if checksum needs to be fixed by reading bit 6 in word 0x19.
+ *  If the bit is 0, that the EEPROM had been modified, but the checksum was not
+ *  calculated, in which case we need to calculate the checksum and set bit 6.
+ **/
+static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 data;
+	u16 word;
+	u16 valid_csum_mask;
+
+	/* Read NVM and check Invalid Image CSUM bit.  If this bit is 0,
+	 * the checksum needs to be fixed.  This bit is an indication that
+	 * the NVM was prepared by OEM software and did not calculate
+	 * the checksum...a likely scenario.
+	 */
+	switch (hw->mac.type) {
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		word = NVM_COMPAT;
+		valid_csum_mask = NVM_COMPAT_VALID_CSUM;
+		break;
+	default:
+		word = NVM_FUTURE_INIT_WORD1;
+		valid_csum_mask = NVM_FUTURE_INIT_WORD1_VALID_CSUM;
+		break;
+	}
+
+	ret_val = e1000_read_nvm(hw, word, 1, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (!(data & valid_csum_mask)) {
+		e_dbg("NVM Checksum valid bit not set\n");
+
+		if (hw->mac.type < e1000_pch_tgp) {
+			data |= valid_csum_mask;
+			ret_val = e1000_write_nvm(hw, word, 1, &data);
+			if (ret_val)
+				return ret_val;
+			ret_val = e1000e_update_nvm_checksum(hw);
+			if (ret_val)
+				return ret_val;
+		}
+	}
+
+	return e1000e_validate_nvm_checksum_generic(hw);
+}
+
+/**
+ *  e1000e_write_protect_nvm_ich8lan - Make the NVM read-only
+ *  @hw: pointer to the HW structure
+ *
+ *  To prevent malicious write/erase of the NVM, set it to be read-only
+ *  so that the hardware ignores all write/erase cycles of the NVM via
+ *  the flash control registers.  The shadow-ram copy of the NVM will
+ *  still be updated, however any updates to this copy will not stick
+ *  across driver reloads.
+ **/
+void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	union ich8_flash_protected_range pr0;
+	union ich8_hws_flash_status hsfsts;
+	u32 gfpreg;
+
+	nvm->ops.acquire(hw);
+
+	gfpreg = er32flash(ICH_FLASH_GFPREG);
+
+	/* Write-protect GbE Sector of NVM */
+	pr0.regval = er32flash(ICH_FLASH_PR0);
+	pr0.range.base = gfpreg & FLASH_GFPREG_BASE_MASK;
+	pr0.range.limit = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK);
+	pr0.range.wpe = true;
+	ew32flash(ICH_FLASH_PR0, pr0.regval);
+
+	/* Lock down a subset of GbE Flash Control Registers, e.g.
+	 * PR0 to prevent the write-protection from being lifted.
+	 * Once FLOCKDN is set, the registers protected by it cannot
+	 * be written until FLOCKDN is cleared by a hardware reset.
+	 */
+	hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+	hsfsts.hsf_status.flockdn = true;
+	ew32flash(ICH_FLASH_HSFSTS, hsfsts.regval);
+
+	nvm->ops.release(hw);
+}
+
+/**
+ *  e1000_write_flash_data_ich8lan - Writes bytes to the NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset (in bytes) of the byte/word to read.
+ *  @size: Size of data to read, 1=byte 2=word
+ *  @data: The byte(s) to write to the NVM.
+ *
+ *  Writes one/two bytes to the NVM using the flash access registers.
+ **/
+static s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset,
+					  u8 size, u16 data)
+{
+	union ich8_hws_flash_status hsfsts;
+	union ich8_hws_flash_ctrl hsflctl;
+	u32 flash_linear_addr;
+	u32 flash_data = 0;
+	s32 ret_val;
+	u8 count = 0;
+
+	if (hw->mac.type >= e1000_pch_spt) {
+		if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
+			return -E1000_ERR_NVM;
+	} else {
+		if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK)
+			return -E1000_ERR_NVM;
+	}
+
+	flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+			     hw->nvm.flash_base_addr);
+
+	do {
+		udelay(1);
+		/* Steps */
+		ret_val = e1000_flash_cycle_init_ich8lan(hw);
+		if (ret_val)
+			break;
+		/* In SPT, This register is in Lan memory space, not
+		 * flash.  Therefore, only 32 bit access is supported
+		 */
+		if (hw->mac.type >= e1000_pch_spt)
+			hsflctl.regval = er32flash(ICH_FLASH_HSFSTS) >> 16;
+		else
+			hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
+
+		/* 0b/1b corresponds to 1 or 2 byte size, respectively. */
+		hsflctl.hsf_ctrl.fldbcount = size - 1;
+		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
+		/* In SPT, This register is in Lan memory space,
+		 * not flash.  Therefore, only 32 bit access is
+		 * supported
+		 */
+		if (hw->mac.type >= e1000_pch_spt)
+			ew32flash(ICH_FLASH_HSFSTS, hsflctl.regval << 16);
+		else
+			ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
+
+		ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
+
+		if (size == 1)
+			flash_data = (u32)data & 0x00FF;
+		else
+			flash_data = (u32)data;
+
+		ew32flash(ICH_FLASH_FDATA0, flash_data);
+
+		/* check if FCERR is set to 1 , if set to 1, clear it
+		 * and try the whole sequence a few more times else done
+		 */
+		ret_val =
+		    e1000_flash_cycle_ich8lan(hw,
+					      ICH_FLASH_WRITE_COMMAND_TIMEOUT);
+		if (!ret_val)
+			break;
+
+		/* If we're here, then things are most likely
+		 * completely hosed, but if the error condition
+		 * is detected, it won't hurt to give it another
+		 * try...ICH_FLASH_CYCLE_REPEAT_COUNT times.
+		 */
+		hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+		if (hsfsts.hsf_status.flcerr)
+			/* Repeat for some time before giving up. */
+			continue;
+		if (!hsfsts.hsf_status.flcdone) {
+			e_dbg("Timeout error - flash cycle did not complete.\n");
+			break;
+		}
+	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+	return ret_val;
+}
+
+/**
+*  e1000_write_flash_data32_ich8lan - Writes 4 bytes to the NVM
+*  @hw: pointer to the HW structure
+*  @offset: The offset (in bytes) of the dwords to read.
+*  @data: The 4 bytes to write to the NVM.
+*
+*  Writes one/two/four bytes to the NVM using the flash access registers.
+**/
+static s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset,
+					    u32 data)
+{
+	union ich8_hws_flash_status hsfsts;
+	union ich8_hws_flash_ctrl hsflctl;
+	u32 flash_linear_addr;
+	s32 ret_val;
+	u8 count = 0;
+
+	if (hw->mac.type >= e1000_pch_spt) {
+		if (offset > ICH_FLASH_LINEAR_ADDR_MASK)
+			return -E1000_ERR_NVM;
+	}
+	flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) +
+			     hw->nvm.flash_base_addr);
+	do {
+		udelay(1);
+		/* Steps */
+		ret_val = e1000_flash_cycle_init_ich8lan(hw);
+		if (ret_val)
+			break;
+
+		/* In SPT, This register is in Lan memory space, not
+		 * flash.  Therefore, only 32 bit access is supported
+		 */
+		if (hw->mac.type >= e1000_pch_spt)
+			hsflctl.regval = er32flash(ICH_FLASH_HSFSTS)
+			    >> 16;
+		else
+			hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
+
+		hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1;
+		hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE;
+
+		/* In SPT, This register is in Lan memory space,
+		 * not flash.  Therefore, only 32 bit access is
+		 * supported
+		 */
+		if (hw->mac.type >= e1000_pch_spt)
+			ew32flash(ICH_FLASH_HSFSTS, hsflctl.regval << 16);
+		else
+			ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
+
+		ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
+
+		ew32flash(ICH_FLASH_FDATA0, data);
+
+		/* check if FCERR is set to 1 , if set to 1, clear it
+		 * and try the whole sequence a few more times else done
+		 */
+		ret_val =
+		   e1000_flash_cycle_ich8lan(hw,
+					     ICH_FLASH_WRITE_COMMAND_TIMEOUT);
+
+		if (!ret_val)
+			break;
+
+		/* If we're here, then things are most likely
+		 * completely hosed, but if the error condition
+		 * is detected, it won't hurt to give it another
+		 * try...ICH_FLASH_CYCLE_REPEAT_COUNT times.
+		 */
+		hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+
+		if (hsfsts.hsf_status.flcerr)
+			/* Repeat for some time before giving up. */
+			continue;
+		if (!hsfsts.hsf_status.flcdone) {
+			e_dbg("Timeout error - flash cycle did not complete.\n");
+			break;
+		}
+	} while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_flash_byte_ich8lan - Write a single byte to NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The index of the byte to read.
+ *  @data: The byte to write to the NVM.
+ *
+ *  Writes a single byte to the NVM using the flash access registers.
+ **/
+static s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset,
+					  u8 data)
+{
+	u16 word = (u16)data;
+
+	return e1000_write_flash_data_ich8lan(hw, offset, 1, word);
+}
+
+/**
+*  e1000_retry_write_flash_dword_ich8lan - Writes a dword to NVM
+*  @hw: pointer to the HW structure
+*  @offset: The offset of the word to write.
+*  @dword: The dword to write to the NVM.
+*
+*  Writes a single dword to the NVM using the flash access registers.
+*  Goes through a retry algorithm before giving up.
+**/
+static s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw,
+						 u32 offset, u32 dword)
+{
+	s32 ret_val;
+	u16 program_retries;
+
+	/* Must convert word offset into bytes. */
+	offset <<= 1;
+	ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword);
+
+	if (!ret_val)
+		return ret_val;
+	for (program_retries = 0; program_retries < 100; program_retries++) {
+		e_dbg("Retrying Byte %8.8X at offset %u\n", dword, offset);
+		usleep_range(100, 200);
+		ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword);
+		if (!ret_val)
+			break;
+	}
+	if (program_retries == 100)
+		return -E1000_ERR_NVM;
+
+	return 0;
+}
+
+/**
+ *  e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM
+ *  @hw: pointer to the HW structure
+ *  @offset: The offset of the byte to write.
+ *  @byte: The byte to write to the NVM.
+ *
+ *  Writes a single byte to the NVM using the flash access registers.
+ *  Goes through a retry algorithm before giving up.
+ **/
+static s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw,
+						u32 offset, u8 byte)
+{
+	s32 ret_val;
+	u16 program_retries;
+
+	ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte);
+	if (!ret_val)
+		return ret_val;
+
+	for (program_retries = 0; program_retries < 100; program_retries++) {
+		e_dbg("Retrying Byte %2.2X at offset %u\n", byte, offset);
+		usleep_range(100, 200);
+		ret_val = e1000_write_flash_byte_ich8lan(hw, offset, byte);
+		if (!ret_val)
+			break;
+	}
+	if (program_retries == 100)
+		return -E1000_ERR_NVM;
+
+	return 0;
+}
+
+/**
+ *  e1000_erase_flash_bank_ich8lan - Erase a bank (4k) from NVM
+ *  @hw: pointer to the HW structure
+ *  @bank: 0 for first bank, 1 for second bank, etc.
+ *
+ *  Erases the bank specified. Each bank is a 4k block. Banks are 0 based.
+ *  bank N is 4096 * N + flash_reg_addr.
+ **/
+static s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	union ich8_hws_flash_status hsfsts;
+	union ich8_hws_flash_ctrl hsflctl;
+	u32 flash_linear_addr;
+	/* bank size is in 16bit words - adjust to bytes */
+	u32 flash_bank_size = nvm->flash_bank_size * 2;
+	s32 ret_val;
+	s32 count = 0;
+	s32 j, iteration, sector_size;
+
+	hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+
+	/* Determine HW Sector size: Read BERASE bits of hw flash status
+	 * register
+	 * 00: The Hw sector is 256 bytes, hence we need to erase 16
+	 *     consecutive sectors.  The start index for the nth Hw sector
+	 *     can be calculated as = bank * 4096 + n * 256
+	 * 01: The Hw sector is 4K bytes, hence we need to erase 1 sector.
+	 *     The start index for the nth Hw sector can be calculated
+	 *     as = bank * 4096
+	 * 10: The Hw sector is 8K bytes, nth sector = bank * 8192
+	 *     (ich9 only, otherwise error condition)
+	 * 11: The Hw sector is 64K bytes, nth sector = bank * 65536
+	 */
+	switch (hsfsts.hsf_status.berasesz) {
+	case 0:
+		/* Hw sector size 256 */
+		sector_size = ICH_FLASH_SEG_SIZE_256;
+		iteration = flash_bank_size / ICH_FLASH_SEG_SIZE_256;
+		break;
+	case 1:
+		sector_size = ICH_FLASH_SEG_SIZE_4K;
+		iteration = 1;
+		break;
+	case 2:
+		sector_size = ICH_FLASH_SEG_SIZE_8K;
+		iteration = 1;
+		break;
+	case 3:
+		sector_size = ICH_FLASH_SEG_SIZE_64K;
+		iteration = 1;
+		break;
+	default:
+		return -E1000_ERR_NVM;
+	}
+
+	/* Start with the base address, then add the sector offset. */
+	flash_linear_addr = hw->nvm.flash_base_addr;
+	flash_linear_addr += (bank) ? flash_bank_size : 0;
+
+	for (j = 0; j < iteration; j++) {
+		do {
+			u32 timeout = ICH_FLASH_ERASE_COMMAND_TIMEOUT;
+
+			/* Steps */
+			ret_val = e1000_flash_cycle_init_ich8lan(hw);
+			if (ret_val)
+				return ret_val;
+
+			/* Write a value 11 (block Erase) in Flash
+			 * Cycle field in hw flash control
+			 */
+			if (hw->mac.type >= e1000_pch_spt)
+				hsflctl.regval =
+				    er32flash(ICH_FLASH_HSFSTS) >> 16;
+			else
+				hsflctl.regval = er16flash(ICH_FLASH_HSFCTL);
+
+			hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE;
+			if (hw->mac.type >= e1000_pch_spt)
+				ew32flash(ICH_FLASH_HSFSTS,
+					  hsflctl.regval << 16);
+			else
+				ew16flash(ICH_FLASH_HSFCTL, hsflctl.regval);
+
+			/* Write the last 24 bits of an index within the
+			 * block into Flash Linear address field in Flash
+			 * Address.
+			 */
+			flash_linear_addr += (j * sector_size);
+			ew32flash(ICH_FLASH_FADDR, flash_linear_addr);
+
+			ret_val = e1000_flash_cycle_ich8lan(hw, timeout);
+			if (!ret_val)
+				break;
+
+			/* Check if FCERR is set to 1.  If 1,
+			 * clear it and try the whole sequence
+			 * a few more times else Done
+			 */
+			hsfsts.regval = er16flash(ICH_FLASH_HSFSTS);
+			if (hsfsts.hsf_status.flcerr)
+				/* repeat for some time before giving up */
+				continue;
+			else if (!hsfsts.hsf_status.flcdone)
+				return ret_val;
+		} while (++count < ICH_FLASH_CYCLE_REPEAT_COUNT);
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_valid_led_default_ich8lan - Set the default LED settings
+ *  @hw: pointer to the HW structure
+ *  @data: Pointer to the LED settings
+ *
+ *  Reads the LED default settings from the NVM to data.  If the NVM LED
+ *  settings is all 0's or F's, set the LED default to a valid LED default
+ *  setting.
+ **/
+static s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF)
+		*data = ID_LED_DEFAULT_ICH8LAN;
+
+	return 0;
+}
+
+/**
+ *  e1000_id_led_init_pchlan - store LED configurations
+ *  @hw: pointer to the HW structure
+ *
+ *  PCH does not control LEDs via the LEDCTL register, rather it uses
+ *  the PHY LED configuration register.
+ *
+ *  PCH also does not have an "always on" or "always off" mode which
+ *  complicates the ID feature.  Instead of using the "on" mode to indicate
+ *  in ledctl_mode2 the LEDs to use for ID (see e1000e_id_led_init_generic()),
+ *  use "link_up" mode.  The LEDs will still ID on request if there is no
+ *  link based on logic in e1000_led_[on|off]_pchlan().
+ **/
+static s32 e1000_id_led_init_pchlan(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	const u32 ledctl_on = E1000_LEDCTL_MODE_LINK_UP;
+	const u32 ledctl_off = E1000_LEDCTL_MODE_LINK_UP | E1000_PHY_LED0_IVRT;
+	u16 data, i, temp, shift;
+
+	/* Get default ID LED modes */
+	ret_val = hw->nvm.ops.valid_led_default(hw, &data);
+	if (ret_val)
+		return ret_val;
+
+	mac->ledctl_default = er32(LEDCTL);
+	mac->ledctl_mode1 = mac->ledctl_default;
+	mac->ledctl_mode2 = mac->ledctl_default;
+
+	for (i = 0; i < 4; i++) {
+		temp = (data >> (i << 2)) & E1000_LEDCTL_LED0_MODE_MASK;
+		shift = (i * 5);
+		switch (temp) {
+		case ID_LED_ON1_DEF2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_ON1_OFF2:
+			mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift);
+			mac->ledctl_mode1 |= (ledctl_on << shift);
+			break;
+		case ID_LED_OFF1_DEF2:
+		case ID_LED_OFF1_ON2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode1 &= ~(E1000_PHY_LED0_MASK << shift);
+			mac->ledctl_mode1 |= (ledctl_off << shift);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+		switch (temp) {
+		case ID_LED_DEF1_ON2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_OFF1_ON2:
+			mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift);
+			mac->ledctl_mode2 |= (ledctl_on << shift);
+			break;
+		case ID_LED_DEF1_OFF2:
+		case ID_LED_ON1_OFF2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode2 &= ~(E1000_PHY_LED0_MASK << shift);
+			mac->ledctl_mode2 |= (ledctl_off << shift);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_get_bus_info_ich8lan - Get/Set the bus type and width
+ *  @hw: pointer to the HW structure
+ *
+ *  ICH8 use the PCI Express bus, but does not contain a PCI Express Capability
+ *  register, so the bus width is hard coded.
+ **/
+static s32 e1000_get_bus_info_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+	s32 ret_val;
+
+	ret_val = e1000e_get_bus_info_pcie(hw);
+
+	/* ICH devices are "PCI Express"-ish.  They have
+	 * a configuration space, but do not contain
+	 * PCI Express Capability registers, so bus width
+	 * must be hardcoded.
+	 */
+	if (bus->width == e1000_bus_width_unknown)
+		bus->width = e1000_bus_width_pcie_x1;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_reset_hw_ich8lan - Reset the hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  Does a full reset of the hardware which includes a reset of the PHY and
+ *  MAC.
+ **/
+static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u16 kum_cfg;
+	u32 ctrl, reg;
+	s32 ret_val;
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = e1000e_disable_pcie_master(hw);
+	if (ret_val)
+		e_dbg("PCI-E Master disable polling has failed.\n");
+
+	e_dbg("Masking off all interrupts\n");
+	ew32(IMC, 0xffffffff);
+
+	/* Disable the Transmit and Receive units.  Then delay to allow
+	 * any pending transactions to complete before we hit the MAC
+	 * with the global reset.
+	 */
+	ew32(RCTL, 0);
+	ew32(TCTL, E1000_TCTL_PSP);
+	e1e_flush();
+
+	usleep_range(10000, 11000);
+
+	/* Workaround for ICH8 bit corruption issue in FIFO memory */
+	if (hw->mac.type == e1000_ich8lan) {
+		/* Set Tx and Rx buffer allocation to 8k apiece. */
+		ew32(PBA, E1000_PBA_8K);
+		/* Set Packet Buffer Size to 16k. */
+		ew32(PBS, E1000_PBS_16K);
+	}
+
+	if (hw->mac.type == e1000_pchlan) {
+		/* Save the NVM K1 bit setting */
+		ret_val = e1000_read_nvm(hw, E1000_NVM_K1_CONFIG, 1, &kum_cfg);
+		if (ret_val)
+			return ret_val;
+
+		if (kum_cfg & E1000_NVM_K1_ENABLE)
+			dev_spec->nvm_k1_enabled = true;
+		else
+			dev_spec->nvm_k1_enabled = false;
+	}
+
+	ctrl = er32(CTRL);
+
+	if (!hw->phy.ops.check_reset_block(hw)) {
+		/* Full-chip reset requires MAC and PHY reset at the same
+		 * time to make sure the interface between MAC and the
+		 * external PHY is reset.
+		 */
+		ctrl |= E1000_CTRL_PHY_RST;
+
+		/* Gate automatic PHY configuration by hardware on
+		 * non-managed 82579
+		 */
+		if ((hw->mac.type == e1000_pch2lan) &&
+		    !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID))
+			e1000_gate_hw_phy_config_ich8lan(hw, true);
+	}
+	ret_val = e1000_acquire_swflag_ich8lan(hw);
+	e_dbg("Issuing a global reset to ich8lan\n");
+	ew32(CTRL, (ctrl | E1000_CTRL_RST));
+	/* cannot issue a flush here because it hangs the hardware */
+	msleep(20);
+
+	/* Set Phy Config Counter to 50msec */
+	if (hw->mac.type == e1000_pch2lan) {
+		reg = er32(FEXTNVM3);
+		reg &= ~E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK;
+		reg |= E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC;
+		ew32(FEXTNVM3, reg);
+	}
+
+	if (!ret_val)
+		clear_bit(__E1000_ACCESS_SHARED_RESOURCE, &hw->adapter->state);
+
+	if (ctrl & E1000_CTRL_PHY_RST) {
+		ret_val = hw->phy.ops.get_cfg_done(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1000_post_phy_reset_ich8lan(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* For PCH, this write will make sure that any noise
+	 * will be detected as a CRC error and be dropped rather than show up
+	 * as a bad packet to the DMA engine.
+	 */
+	if (hw->mac.type == e1000_pchlan)
+		ew32(CRC_OFFSET, 0x65656565);
+
+	ew32(IMC, 0xffffffff);
+	er32(ICR);
+
+	reg = er32(KABGTXD);
+	reg |= E1000_KABGTXD_BGSQLBIAS;
+	ew32(KABGTXD, reg);
+
+	return 0;
+}
+
+/**
+ *  e1000_init_hw_ich8lan - Initialize the hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  Prepares the hardware for transmit and receive by doing the following:
+ *   - initialize hardware bits
+ *   - initialize LED identification
+ *   - setup receive address registers
+ *   - setup flow control
+ *   - setup transmit descriptors
+ *   - clear statistics
+ **/
+static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 ctrl_ext, txdctl, snoop, fflt_dbg;
+	s32 ret_val;
+	u16 i;
+
+	e1000_initialize_hw_bits_ich8lan(hw);
+
+	/* Initialize identification LED */
+	ret_val = mac->ops.id_led_init(hw);
+	/* An error is not fatal and we should not stop init due to this */
+	if (ret_val)
+		e_dbg("Error initializing identification LED\n");
+
+	/* Setup the receive address. */
+	e1000e_init_rx_addrs(hw, mac->rar_entry_count);
+
+	/* Zero out the Multicast HASH table */
+	e_dbg("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0);
+
+	/* The 82578 Rx buffer will stall if wakeup is enabled in host and
+	 * the ME.  Disable wakeup by clearing the host wakeup bit.
+	 * Reset the phy after disabling host wakeup to reset the Rx buffer.
+	 */
+	if (hw->phy.type == e1000_phy_82578) {
+		e1e_rphy(hw, BM_PORT_GEN_CFG, &i);
+		i &= ~BM_WUC_HOST_WU_BIT;
+		e1e_wphy(hw, BM_PORT_GEN_CFG, i);
+		ret_val = e1000_phy_hw_reset_ich8lan(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Setup link and flow control */
+	ret_val = mac->ops.setup_link(hw);
+
+	/* Set the transmit descriptor write-back policy for both queues */
+	txdctl = er32(TXDCTL(0));
+	txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) |
+		  E1000_TXDCTL_FULL_TX_DESC_WB);
+	txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) |
+		  E1000_TXDCTL_MAX_TX_DESC_PREFETCH);
+	ew32(TXDCTL(0), txdctl);
+	txdctl = er32(TXDCTL(1));
+	txdctl = ((txdctl & ~E1000_TXDCTL_WTHRESH) |
+		  E1000_TXDCTL_FULL_TX_DESC_WB);
+	txdctl = ((txdctl & ~E1000_TXDCTL_PTHRESH) |
+		  E1000_TXDCTL_MAX_TX_DESC_PREFETCH);
+	ew32(TXDCTL(1), txdctl);
+
+	/* ICH8 has opposite polarity of no_snoop bits.
+	 * By default, we should use snoop behavior.
+	 */
+	if (mac->type == e1000_ich8lan)
+		snoop = PCIE_ICH8_SNOOP_ALL;
+	else
+		snoop = (u32)~(PCIE_NO_SNOOP_ALL);
+	e1000e_set_pcie_no_snoop(hw, snoop);
+
+	/* Enable workaround for packet loss issue on TGP PCH
+	 * Do not gate DMA clock from the modPHY block
+	 */
+	if (mac->type >= e1000_pch_tgp) {
+		fflt_dbg = er32(FFLT_DBG);
+		fflt_dbg |= E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK;
+		ew32(FFLT_DBG, fflt_dbg);
+	}
+
+	ctrl_ext = er32(CTRL_EXT);
+	ctrl_ext |= E1000_CTRL_EXT_RO_DIS;
+	ew32(CTRL_EXT, ctrl_ext);
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	e1000_clear_hw_cntrs_ich8lan(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_initialize_hw_bits_ich8lan - Initialize required hardware bits
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets/Clears required hardware bits necessary for correctly setting up the
+ *  hardware for transmit and receive.
+ **/
+static void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw)
+{
+	u32 reg;
+
+	/* Extended Device Control */
+	reg = er32(CTRL_EXT);
+	reg |= BIT(22);
+	/* Enable PHY low-power state when MAC is at D3 w/o WoL */
+	if (hw->mac.type >= e1000_pchlan)
+		reg |= E1000_CTRL_EXT_PHYPDEN;
+	ew32(CTRL_EXT, reg);
+
+	/* Transmit Descriptor Control 0 */
+	reg = er32(TXDCTL(0));
+	reg |= BIT(22);
+	ew32(TXDCTL(0), reg);
+
+	/* Transmit Descriptor Control 1 */
+	reg = er32(TXDCTL(1));
+	reg |= BIT(22);
+	ew32(TXDCTL(1), reg);
+
+	/* Transmit Arbitration Control 0 */
+	reg = er32(TARC(0));
+	if (hw->mac.type == e1000_ich8lan)
+		reg |= BIT(28) | BIT(29);
+	reg |= BIT(23) | BIT(24) | BIT(26) | BIT(27);
+	ew32(TARC(0), reg);
+
+	/* Transmit Arbitration Control 1 */
+	reg = er32(TARC(1));
+	if (er32(TCTL) & E1000_TCTL_MULR)
+		reg &= ~BIT(28);
+	else
+		reg |= BIT(28);
+	reg |= BIT(24) | BIT(26) | BIT(30);
+	ew32(TARC(1), reg);
+
+	/* Device Status */
+	if (hw->mac.type == e1000_ich8lan) {
+		reg = er32(STATUS);
+		reg &= ~BIT(31);
+		ew32(STATUS, reg);
+	}
+
+	/* work-around descriptor data corruption issue during nfs v2 udp
+	 * traffic, just disable the nfs filtering capability
+	 */
+	reg = er32(RFCTL);
+	reg |= (E1000_RFCTL_NFSW_DIS | E1000_RFCTL_NFSR_DIS);
+
+	/* Disable IPv6 extension header parsing because some malformed
+	 * IPv6 headers can hang the Rx.
+	 */
+	if (hw->mac.type == e1000_ich8lan)
+		reg |= (E1000_RFCTL_IPV6_EX_DIS | E1000_RFCTL_NEW_IPV6_EXT_DIS);
+	ew32(RFCTL, reg);
+
+	/* Enable ECC on Lynxpoint */
+	if (hw->mac.type >= e1000_pch_lpt) {
+		reg = er32(PBECCSTS);
+		reg |= E1000_PBECCSTS_ECC_ENABLE;
+		ew32(PBECCSTS, reg);
+
+		reg = er32(CTRL);
+		reg |= E1000_CTRL_MEHE;
+		ew32(CTRL, reg);
+	}
+}
+
+/**
+ *  e1000_setup_link_ich8lan - Setup flow control and link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines which flow control settings to use, then configures flow
+ *  control.  Calls the appropriate media-specific link configuration
+ *  function.  Assuming the adapter has a valid link partner, a valid link
+ *  should be established.  Assumes the hardware has previously been reset
+ *  and the transmitter and receiver are not enabled.
+ **/
+static s32 e1000_setup_link_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	if (hw->phy.ops.check_reset_block(hw))
+		return 0;
+
+	/* ICH parts do not have a word in the NVM to determine
+	 * the default flow control setting, so we explicitly
+	 * set it to full.
+	 */
+	if (hw->fc.requested_mode == e1000_fc_default) {
+		/* Workaround h/w hang when Tx flow control enabled */
+		if (hw->mac.type == e1000_pchlan)
+			hw->fc.requested_mode = e1000_fc_rx_pause;
+		else
+			hw->fc.requested_mode = e1000_fc_full;
+	}
+
+	/* Save off the requested flow control mode for use later.  Depending
+	 * on the link partner's capabilities, we may or may not use this mode.
+	 */
+	hw->fc.current_mode = hw->fc.requested_mode;
+
+	e_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
+
+	/* Continue to configure the copper link. */
+	ret_val = hw->mac.ops.setup_physical_interface(hw);
+	if (ret_val)
+		return ret_val;
+
+	ew32(FCTTV, hw->fc.pause_time);
+	if ((hw->phy.type == e1000_phy_82578) ||
+	    (hw->phy.type == e1000_phy_82579) ||
+	    (hw->phy.type == e1000_phy_i217) ||
+	    (hw->phy.type == e1000_phy_82577)) {
+		ew32(FCRTV_PCH, hw->fc.refresh_time);
+
+		ret_val = e1e_wphy(hw, PHY_REG(BM_PORT_CTRL_PAGE, 27),
+				   hw->fc.pause_time);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return e1000e_set_fc_watermarks(hw);
+}
+
+/**
+ *  e1000_setup_copper_link_ich8lan - Configure MAC/PHY interface
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the kumeran interface to the PHY to wait the appropriate time
+ *  when polling the PHY, then call the generic setup_copper_link to finish
+ *  configuring the copper link.
+ **/
+static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+	u16 reg_data;
+
+	ctrl = er32(CTRL);
+	ctrl |= E1000_CTRL_SLU;
+	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ew32(CTRL, ctrl);
+
+	/* Set the mac to wait the maximum time between each iteration
+	 * and increase the max iterations when polling the phy;
+	 * this fixes erroneous timeouts at 10Mbps.
+	 */
+	ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_TIMEOUTS, 0xFFFF);
+	if (ret_val)
+		return ret_val;
+	ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
+				       &reg_data);
+	if (ret_val)
+		return ret_val;
+	reg_data |= 0x3F;
+	ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_INBAND_PARAM,
+					reg_data);
+	if (ret_val)
+		return ret_val;
+
+	switch (hw->phy.type) {
+	case e1000_phy_igp_3:
+		ret_val = e1000e_copper_link_setup_igp(hw);
+		if (ret_val)
+			return ret_val;
+		break;
+	case e1000_phy_bm:
+	case e1000_phy_82578:
+		ret_val = e1000e_copper_link_setup_m88(hw);
+		if (ret_val)
+			return ret_val;
+		break;
+	case e1000_phy_82577:
+	case e1000_phy_82579:
+		ret_val = e1000_copper_link_setup_82577(hw);
+		if (ret_val)
+			return ret_val;
+		break;
+	case e1000_phy_ife:
+		ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &reg_data);
+		if (ret_val)
+			return ret_val;
+
+		reg_data &= ~IFE_PMC_AUTO_MDIX;
+
+		switch (hw->phy.mdix) {
+		case 1:
+			reg_data &= ~IFE_PMC_FORCE_MDIX;
+			break;
+		case 2:
+			reg_data |= IFE_PMC_FORCE_MDIX;
+			break;
+		case 0:
+		default:
+			reg_data |= IFE_PMC_AUTO_MDIX;
+			break;
+		}
+		ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, reg_data);
+		if (ret_val)
+			return ret_val;
+		break;
+	default:
+		break;
+	}
+
+	return e1000e_setup_copper_link(hw);
+}
+
+/**
+ *  e1000_setup_copper_link_pch_lpt - Configure MAC/PHY interface
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY specific link setup function and then calls the
+ *  generic setup_copper_link to finish configuring the link for
+ *  Lynxpoint PCH devices
+ **/
+static s32 e1000_setup_copper_link_pch_lpt(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+
+	ctrl = er32(CTRL);
+	ctrl |= E1000_CTRL_SLU;
+	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ew32(CTRL, ctrl);
+
+	ret_val = e1000_copper_link_setup_82577(hw);
+	if (ret_val)
+		return ret_val;
+
+	return e1000e_setup_copper_link(hw);
+}
+
+/**
+ *  e1000_get_link_up_info_ich8lan - Get current link speed and duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: pointer to store current link speed
+ *  @duplex: pointer to store the current link duplex
+ *
+ *  Calls the generic get_speed_and_duplex to retrieve the current link
+ *  information and then calls the Kumeran lock loss workaround for links at
+ *  gigabit speeds.
+ **/
+static s32 e1000_get_link_up_info_ich8lan(struct e1000_hw *hw, u16 *speed,
+					  u16 *duplex)
+{
+	s32 ret_val;
+
+	ret_val = e1000e_get_speed_and_duplex_copper(hw, speed, duplex);
+	if (ret_val)
+		return ret_val;
+
+	if ((hw->mac.type == e1000_ich8lan) &&
+	    (hw->phy.type == e1000_phy_igp_3) && (*speed == SPEED_1000)) {
+		ret_val = e1000_kmrn_lock_loss_workaround_ich8lan(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_kmrn_lock_loss_workaround_ich8lan - Kumeran workaround
+ *  @hw: pointer to the HW structure
+ *
+ *  Work-around for 82566 Kumeran PCS lock loss:
+ *  On link status change (i.e. PCI reset, speed change) and link is up and
+ *  speed is gigabit-
+ *    0) if workaround is optionally disabled do nothing
+ *    1) wait 1ms for Kumeran link to come up
+ *    2) check Kumeran Diagnostic register PCS lock loss bit
+ *    3) if not set the link is locked (all is good), otherwise...
+ *    4) reset the PHY
+ *    5) repeat up to 10 times
+ *  Note: this is only called for IGP3 copper when speed is 1gb.
+ **/
+static s32 e1000_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u32 phy_ctrl;
+	s32 ret_val;
+	u16 i, data;
+	bool link;
+
+	if (!dev_spec->kmrn_lock_loss_workaround_enabled)
+		return 0;
+
+	/* Make sure link is up before proceeding.  If not just return.
+	 * Attempting this while link is negotiating fouled up link
+	 * stability
+	 */
+	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
+	if (!link)
+		return 0;
+
+	for (i = 0; i < 10; i++) {
+		/* read once to clear */
+		ret_val = e1e_rphy(hw, IGP3_KMRN_DIAG, &data);
+		if (ret_val)
+			return ret_val;
+		/* and again to get new status */
+		ret_val = e1e_rphy(hw, IGP3_KMRN_DIAG, &data);
+		if (ret_val)
+			return ret_val;
+
+		/* check for PCS lock */
+		if (!(data & IGP3_KMRN_DIAG_PCS_LOCK_LOSS))
+			return 0;
+
+		/* Issue PHY reset */
+		e1000_phy_hw_reset(hw);
+		mdelay(5);
+	}
+	/* Disable GigE link negotiation */
+	phy_ctrl = er32(PHY_CTRL);
+	phy_ctrl |= (E1000_PHY_CTRL_GBE_DISABLE |
+		     E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
+	ew32(PHY_CTRL, phy_ctrl);
+
+	/* Call gig speed drop workaround on Gig disable before accessing
+	 * any PHY registers
+	 */
+	e1000e_gig_downshift_workaround_ich8lan(hw);
+
+	/* unable to acquire PCS lock */
+	return -E1000_ERR_PHY;
+}
+
+/**
+ *  e1000e_set_kmrn_lock_loss_workaround_ich8lan - Set Kumeran workaround state
+ *  @hw: pointer to the HW structure
+ *  @state: boolean value used to set the current Kumeran workaround state
+ *
+ *  If ICH8, set the current Kumeran workaround state (enabled - true
+ *  /disabled - false).
+ **/
+void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
+						  bool state)
+{
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+
+	if (hw->mac.type != e1000_ich8lan) {
+		e_dbg("Workaround applies to ICH8 only.\n");
+		return;
+	}
+
+	dev_spec->kmrn_lock_loss_workaround_enabled = state;
+}
+
+/**
+ *  e1000e_igp3_phy_powerdown_workaround_ich8lan - Power down workaround on D3
+ *  @hw: pointer to the HW structure
+ *
+ *  Workaround for 82566 power-down on D3 entry:
+ *    1) disable gigabit link
+ *    2) write VR power-down enable
+ *    3) read it back
+ *  Continue if successful, else issue LCD reset and repeat
+ **/
+void e1000e_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw)
+{
+	u32 reg;
+	u16 data;
+	u8 retry = 0;
+
+	if (hw->phy.type != e1000_phy_igp_3)
+		return;
+
+	/* Try the workaround twice (if needed) */
+	do {
+		/* Disable link */
+		reg = er32(PHY_CTRL);
+		reg |= (E1000_PHY_CTRL_GBE_DISABLE |
+			E1000_PHY_CTRL_NOND0A_GBE_DISABLE);
+		ew32(PHY_CTRL, reg);
+
+		/* Call gig speed drop workaround on Gig disable before
+		 * accessing any PHY registers
+		 */
+		if (hw->mac.type == e1000_ich8lan)
+			e1000e_gig_downshift_workaround_ich8lan(hw);
+
+		/* Write VR power-down enable */
+		e1e_rphy(hw, IGP3_VR_CTRL, &data);
+		data &= ~IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK;
+		e1e_wphy(hw, IGP3_VR_CTRL, data | IGP3_VR_CTRL_MODE_SHUTDOWN);
+
+		/* Read it back and test */
+		e1e_rphy(hw, IGP3_VR_CTRL, &data);
+		data &= IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK;
+		if ((data == IGP3_VR_CTRL_MODE_SHUTDOWN) || retry)
+			break;
+
+		/* Issue PHY reset and repeat at most one more time */
+		reg = er32(CTRL);
+		ew32(CTRL, reg | E1000_CTRL_PHY_RST);
+		retry++;
+	} while (retry);
+}
+
+/**
+ *  e1000e_gig_downshift_workaround_ich8lan - WoL from S5 stops working
+ *  @hw: pointer to the HW structure
+ *
+ *  Steps to take when dropping from 1Gb/s (eg. link cable removal (LSC),
+ *  LPLU, Gig disable, MDIC PHY reset):
+ *    1) Set Kumeran Near-end loopback
+ *    2) Clear Kumeran Near-end loopback
+ *  Should only be called for ICH8[m] devices with any 1G Phy.
+ **/
+void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 reg_data;
+
+	if ((hw->mac.type != e1000_ich8lan) || (hw->phy.type == e1000_phy_ife))
+		return;
+
+	ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
+				       &reg_data);
+	if (ret_val)
+		return;
+	reg_data |= E1000_KMRNCTRLSTA_DIAG_NELPBK;
+	ret_val = e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET,
+					reg_data);
+	if (ret_val)
+		return;
+	reg_data &= ~E1000_KMRNCTRLSTA_DIAG_NELPBK;
+	e1000e_write_kmrn_reg(hw, E1000_KMRNCTRLSTA_DIAG_OFFSET, reg_data);
+}
+
+/**
+ *  e1000_suspend_workarounds_ich8lan - workarounds needed during S0->Sx
+ *  @hw: pointer to the HW structure
+ *
+ *  During S0 to Sx transition, it is possible the link remains at gig
+ *  instead of negotiating to a lower speed.  Before going to Sx, set
+ *  'Gig Disable' to force link speed negotiation to a lower speed based on
+ *  the LPLU setting in the NVM or custom setting.  For PCH and newer parts,
+ *  the OEM bits PHY register (LED, GbE disable and LPLU configurations) also
+ *  needs to be written.
+ *  Parts that support (and are linked to a partner which support) EEE in
+ *  100Mbps should disable LPLU since 100Mbps w/ EEE requires less power
+ *  than 10Mbps w/o EEE.
+ **/
+void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw)
+{
+	struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan;
+	u32 phy_ctrl;
+	s32 ret_val;
+
+	phy_ctrl = er32(PHY_CTRL);
+	phy_ctrl |= E1000_PHY_CTRL_GBE_DISABLE;
+
+	if (hw->phy.type == e1000_phy_i217) {
+		u16 phy_reg, device_id = hw->adapter->pdev->device;
+
+		if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) ||
+		    (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) ||
+		    (device_id == E1000_DEV_ID_PCH_I218_LM3) ||
+		    (device_id == E1000_DEV_ID_PCH_I218_V3) ||
+		    (hw->mac.type >= e1000_pch_spt)) {
+			u32 fextnvm6 = er32(FEXTNVM6);
+
+			ew32(FEXTNVM6, fextnvm6 & ~E1000_FEXTNVM6_REQ_PLL_CLK);
+		}
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			goto out;
+
+		if (!dev_spec->eee_disable) {
+			u16 eee_advert;
+
+			ret_val =
+			    e1000_read_emi_reg_locked(hw,
+						      I217_EEE_ADVERTISEMENT,
+						      &eee_advert);
+			if (ret_val)
+				goto release;
+
+			/* Disable LPLU if both link partners support 100BaseT
+			 * EEE and 100Full is advertised on both ends of the
+			 * link, and enable Auto Enable LPI since there will
+			 * be no driver to enable LPI while in Sx.
+			 */
+			if ((eee_advert & I82579_EEE_100_SUPPORTED) &&
+			    (dev_spec->eee_lp_ability &
+			     I82579_EEE_100_SUPPORTED) &&
+			    (hw->phy.autoneg_advertised & ADVERTISE_100_FULL)) {
+				phy_ctrl &= ~(E1000_PHY_CTRL_D0A_LPLU |
+					      E1000_PHY_CTRL_NOND0A_LPLU);
+
+				/* Set Auto Enable LPI after link up */
+				e1e_rphy_locked(hw,
+						I217_LPI_GPIO_CTRL, &phy_reg);
+				phy_reg |= I217_LPI_GPIO_CTRL_AUTO_EN_LPI;
+				e1e_wphy_locked(hw,
+						I217_LPI_GPIO_CTRL, phy_reg);
+			}
+		}
+
+		/* For i217 Intel Rapid Start Technology support,
+		 * when the system is going into Sx and no manageability engine
+		 * is present, the driver must configure proxy to reset only on
+		 * power good.  LPI (Low Power Idle) state must also reset only
+		 * on power good, as well as the MTA (Multicast table array).
+		 * The SMBus release must also be disabled on LCD reset.
+		 */
+		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+			/* Enable proxy to reset only on power good. */
+			e1e_rphy_locked(hw, I217_PROXY_CTRL, &phy_reg);
+			phy_reg |= I217_PROXY_CTRL_AUTO_DISABLE;
+			e1e_wphy_locked(hw, I217_PROXY_CTRL, phy_reg);
+
+			/* Set bit enable LPI (EEE) to reset only on
+			 * power good.
+			 */
+			e1e_rphy_locked(hw, I217_SxCTRL, &phy_reg);
+			phy_reg |= I217_SxCTRL_ENABLE_LPI_RESET;
+			e1e_wphy_locked(hw, I217_SxCTRL, phy_reg);
+
+			/* Disable the SMB release on LCD reset. */
+			e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg);
+			phy_reg &= ~I217_MEMPWR_DISABLE_SMB_RELEASE;
+			e1e_wphy_locked(hw, I217_MEMPWR, phy_reg);
+		}
+
+		/* Enable MTA to reset for Intel Rapid Start Technology
+		 * Support
+		 */
+		e1e_rphy_locked(hw, I217_CGFREG, &phy_reg);
+		phy_reg |= I217_CGFREG_ENABLE_MTA_RESET;
+		e1e_wphy_locked(hw, I217_CGFREG, phy_reg);
+
+release:
+		hw->phy.ops.release(hw);
+	}
+out:
+	ew32(PHY_CTRL, phy_ctrl);
+
+	if (hw->mac.type == e1000_ich8lan)
+		e1000e_gig_downshift_workaround_ich8lan(hw);
+
+	if (hw->mac.type >= e1000_pchlan) {
+		e1000_oem_bits_config_ich8lan(hw, false);
+
+		/* Reset PHY to activate OEM bits on 82577/8 */
+		if (hw->mac.type == e1000_pchlan)
+			e1000e_phy_hw_reset_generic(hw);
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return;
+		e1000_write_smbus_addr(hw);
+		hw->phy.ops.release(hw);
+	}
+}
+
+/**
+ *  e1000_resume_workarounds_pchlan - workarounds needed during Sx->S0
+ *  @hw: pointer to the HW structure
+ *
+ *  During Sx to S0 transitions on non-managed devices or managed devices
+ *  on which PHY resets are not blocked, if the PHY registers cannot be
+ *  accessed properly by the s/w toggle the LANPHYPC value to power cycle
+ *  the PHY.
+ *  On i217, setup Intel Rapid Start Technology.
+ **/
+void e1000_resume_workarounds_pchlan(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	if (hw->mac.type < e1000_pch2lan)
+		return;
+
+	ret_val = e1000_init_phy_workarounds_pchlan(hw);
+	if (ret_val) {
+		e_dbg("Failed to init PHY flow ret_val=%d\n", ret_val);
+		return;
+	}
+
+	/* For i217 Intel Rapid Start Technology support when the system
+	 * is transitioning from Sx and no manageability engine is present
+	 * configure SMBus to restore on reset, disable proxy, and enable
+	 * the reset on MTA (Multicast table array).
+	 */
+	if (hw->phy.type == e1000_phy_i217) {
+		u16 phy_reg;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val) {
+			e_dbg("Failed to setup iRST\n");
+			return;
+		}
+
+		/* Clear Auto Enable LPI after link up */
+		e1e_rphy_locked(hw, I217_LPI_GPIO_CTRL, &phy_reg);
+		phy_reg &= ~I217_LPI_GPIO_CTRL_AUTO_EN_LPI;
+		e1e_wphy_locked(hw, I217_LPI_GPIO_CTRL, phy_reg);
+
+		if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+			/* Restore clear on SMB if no manageability engine
+			 * is present
+			 */
+			ret_val = e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg);
+			if (ret_val)
+				goto release;
+			phy_reg |= I217_MEMPWR_DISABLE_SMB_RELEASE;
+			e1e_wphy_locked(hw, I217_MEMPWR, phy_reg);
+
+			/* Disable Proxy */
+			e1e_wphy_locked(hw, I217_PROXY_CTRL, 0);
+		}
+		/* Enable reset on MTA */
+		ret_val = e1e_rphy_locked(hw, I217_CGFREG, &phy_reg);
+		if (ret_val)
+			goto release;
+		phy_reg &= ~I217_CGFREG_ENABLE_MTA_RESET;
+		e1e_wphy_locked(hw, I217_CGFREG, phy_reg);
+release:
+		if (ret_val)
+			e_dbg("Error %d in resume workarounds\n", ret_val);
+		hw->phy.ops.release(hw);
+	}
+}
+
+/**
+ *  e1000_cleanup_led_ich8lan - Restore the default LED operation
+ *  @hw: pointer to the HW structure
+ *
+ *  Return the LED back to the default configuration.
+ **/
+static s32 e1000_cleanup_led_ich8lan(struct e1000_hw *hw)
+{
+	if (hw->phy.type == e1000_phy_ife)
+		return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED, 0);
+
+	ew32(LEDCTL, hw->mac.ledctl_default);
+	return 0;
+}
+
+/**
+ *  e1000_led_on_ich8lan - Turn LEDs on
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn on the LEDs.
+ **/
+static s32 e1000_led_on_ich8lan(struct e1000_hw *hw)
+{
+	if (hw->phy.type == e1000_phy_ife)
+		return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED,
+				(IFE_PSCL_PROBE_MODE | IFE_PSCL_PROBE_LEDS_ON));
+
+	ew32(LEDCTL, hw->mac.ledctl_mode2);
+	return 0;
+}
+
+/**
+ *  e1000_led_off_ich8lan - Turn LEDs off
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn off the LEDs.
+ **/
+static s32 e1000_led_off_ich8lan(struct e1000_hw *hw)
+{
+	if (hw->phy.type == e1000_phy_ife)
+		return e1e_wphy(hw, IFE_PHY_SPECIAL_CONTROL_LED,
+				(IFE_PSCL_PROBE_MODE |
+				 IFE_PSCL_PROBE_LEDS_OFF));
+
+	ew32(LEDCTL, hw->mac.ledctl_mode1);
+	return 0;
+}
+
+/**
+ *  e1000_setup_led_pchlan - Configures SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  This prepares the SW controllable LED for use.
+ **/
+static s32 e1000_setup_led_pchlan(struct e1000_hw *hw)
+{
+	return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_mode1);
+}
+
+/**
+ *  e1000_cleanup_led_pchlan - Restore the default LED operation
+ *  @hw: pointer to the HW structure
+ *
+ *  Return the LED back to the default configuration.
+ **/
+static s32 e1000_cleanup_led_pchlan(struct e1000_hw *hw)
+{
+	return e1e_wphy(hw, HV_LED_CONFIG, (u16)hw->mac.ledctl_default);
+}
+
+/**
+ *  e1000_led_on_pchlan - Turn LEDs on
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn on the LEDs.
+ **/
+static s32 e1000_led_on_pchlan(struct e1000_hw *hw)
+{
+	u16 data = (u16)hw->mac.ledctl_mode2;
+	u32 i, led;
+
+	/* If no link, then turn LED on by setting the invert bit
+	 * for each LED that's mode is "link_up" in ledctl_mode2.
+	 */
+	if (!(er32(STATUS) & E1000_STATUS_LU)) {
+		for (i = 0; i < 3; i++) {
+			led = (data >> (i * 5)) & E1000_PHY_LED0_MASK;
+			if ((led & E1000_PHY_LED0_MODE_MASK) !=
+			    E1000_LEDCTL_MODE_LINK_UP)
+				continue;
+			if (led & E1000_PHY_LED0_IVRT)
+				data &= ~(E1000_PHY_LED0_IVRT << (i * 5));
+			else
+				data |= (E1000_PHY_LED0_IVRT << (i * 5));
+		}
+	}
+
+	return e1e_wphy(hw, HV_LED_CONFIG, data);
+}
+
+/**
+ *  e1000_led_off_pchlan - Turn LEDs off
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn off the LEDs.
+ **/
+static s32 e1000_led_off_pchlan(struct e1000_hw *hw)
+{
+	u16 data = (u16)hw->mac.ledctl_mode1;
+	u32 i, led;
+
+	/* If no link, then turn LED off by clearing the invert bit
+	 * for each LED that's mode is "link_up" in ledctl_mode1.
+	 */
+	if (!(er32(STATUS) & E1000_STATUS_LU)) {
+		for (i = 0; i < 3; i++) {
+			led = (data >> (i * 5)) & E1000_PHY_LED0_MASK;
+			if ((led & E1000_PHY_LED0_MODE_MASK) !=
+			    E1000_LEDCTL_MODE_LINK_UP)
+				continue;
+			if (led & E1000_PHY_LED0_IVRT)
+				data &= ~(E1000_PHY_LED0_IVRT << (i * 5));
+			else
+				data |= (E1000_PHY_LED0_IVRT << (i * 5));
+		}
+	}
+
+	return e1e_wphy(hw, HV_LED_CONFIG, data);
+}
+
+/**
+ *  e1000_get_cfg_done_ich8lan - Read config done bit after Full or PHY reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Read appropriate register for the config done bit for completion status
+ *  and configure the PHY through s/w for EEPROM-less parts.
+ *
+ *  NOTE: some silicon which is EEPROM-less will fail trying to read the
+ *  config done bit, so only an error is logged and continues.  If we were
+ *  to return with error, EEPROM-less silicon would not be able to be reset
+ *  or change link.
+ **/
+static s32 e1000_get_cfg_done_ich8lan(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 bank = 0;
+	u32 status;
+
+	e1000e_get_cfg_done_generic(hw);
+
+	/* Wait for indication from h/w that it has completed basic config */
+	if (hw->mac.type >= e1000_ich10lan) {
+		e1000_lan_init_done_ich8lan(hw);
+	} else {
+		ret_val = e1000e_get_auto_rd_done(hw);
+		if (ret_val) {
+			/* When auto config read does not complete, do not
+			 * return with an error. This can happen in situations
+			 * where there is no eeprom and prevents getting link.
+			 */
+			e_dbg("Auto Read Done did not complete\n");
+			ret_val = 0;
+		}
+	}
+
+	/* Clear PHY Reset Asserted bit */
+	status = er32(STATUS);
+	if (status & E1000_STATUS_PHYRA)
+		ew32(STATUS, status & ~E1000_STATUS_PHYRA);
+	else
+		e_dbg("PHY Reset Asserted not set - needs delay\n");
+
+	/* If EEPROM is not marked present, init the IGP 3 PHY manually */
+	if (hw->mac.type <= e1000_ich9lan) {
+		if (!(er32(EECD) & E1000_EECD_PRES) &&
+		    (hw->phy.type == e1000_phy_igp_3)) {
+			e1000e_phy_init_script_igp3(hw);
+		}
+	} else {
+		if (e1000_valid_nvm_bank_detect_ich8lan(hw, &bank)) {
+			/* Maybe we should do a basic PHY config */
+			e_dbg("EEPROM not present\n");
+			ret_val = -E1000_ERR_CONFIG;
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ * e1000_power_down_phy_copper_ich8lan - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ **/
+static void e1000_power_down_phy_copper_ich8lan(struct e1000_hw *hw)
+{
+	/* If the management interface is not enabled, then power down */
+	if (!(hw->mac.ops.check_mng_mode(hw) ||
+	      hw->phy.ops.check_reset_block(hw)))
+		e1000_power_down_phy_copper(hw);
+}
+
+/**
+ *  e1000_clear_hw_cntrs_ich8lan - Clear statistical counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears hardware counters specific to the silicon family and calls
+ *  clear_hw_cntrs_generic to clear all general purpose counters.
+ **/
+static void e1000_clear_hw_cntrs_ich8lan(struct e1000_hw *hw)
+{
+	u16 phy_data;
+	s32 ret_val;
+
+	e1000e_clear_hw_cntrs_base(hw);
+
+	er32(ALGNERRC);
+	er32(RXERRC);
+	er32(TNCRS);
+	er32(CEXTERR);
+	er32(TSCTC);
+	er32(TSCTFC);
+
+	er32(MGTPRC);
+	er32(MGTPDC);
+	er32(MGTPTC);
+
+	er32(IAC);
+	er32(ICRXOC);
+
+	/* Clear PHY statistics registers */
+	if ((hw->phy.type == e1000_phy_82578) ||
+	    (hw->phy.type == e1000_phy_82579) ||
+	    (hw->phy.type == e1000_phy_i217) ||
+	    (hw->phy.type == e1000_phy_82577)) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return;
+		ret_val = hw->phy.ops.set_page(hw,
+					       HV_STATS_PAGE << IGP_PAGE_SHIFT);
+		if (ret_val)
+			goto release;
+		hw->phy.ops.read_reg_page(hw, HV_SCC_UPPER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_SCC_LOWER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_ECOL_UPPER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_ECOL_LOWER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_MCC_UPPER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_MCC_LOWER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_LATECOL_UPPER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_LATECOL_LOWER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_COLC_UPPER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_COLC_LOWER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_DC_UPPER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_DC_LOWER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_TNCRS_UPPER, &phy_data);
+		hw->phy.ops.read_reg_page(hw, HV_TNCRS_LOWER, &phy_data);
+release:
+		hw->phy.ops.release(hw);
+	}
+}
+
+static const struct e1000_mac_operations ich8_mac_ops = {
+	/* check_mng_mode dependent on mac type */
+	.check_for_link		= e1000_check_for_copper_link_ich8lan,
+	/* cleanup_led dependent on mac type */
+	.clear_hw_cntrs		= e1000_clear_hw_cntrs_ich8lan,
+	.get_bus_info		= e1000_get_bus_info_ich8lan,
+	.set_lan_id		= e1000_set_lan_id_single_port,
+	.get_link_up_info	= e1000_get_link_up_info_ich8lan,
+	/* led_on dependent on mac type */
+	/* led_off dependent on mac type */
+	.update_mc_addr_list	= e1000e_update_mc_addr_list_generic,
+	.reset_hw		= e1000_reset_hw_ich8lan,
+	.init_hw		= e1000_init_hw_ich8lan,
+	.setup_link		= e1000_setup_link_ich8lan,
+	.setup_physical_interface = e1000_setup_copper_link_ich8lan,
+	/* id_led_init dependent on mac type */
+	.config_collision_dist	= e1000e_config_collision_dist_generic,
+	.rar_set		= e1000e_rar_set_generic,
+	.rar_get_count		= e1000e_rar_get_count_generic,
+};
+
+static const struct e1000_phy_operations ich8_phy_ops = {
+	.acquire		= e1000_acquire_swflag_ich8lan,
+	.check_reset_block	= e1000_check_reset_block_ich8lan,
+	.commit			= NULL,
+	.get_cfg_done		= e1000_get_cfg_done_ich8lan,
+	.get_cable_length	= e1000e_get_cable_length_igp_2,
+	.read_reg		= e1000e_read_phy_reg_igp,
+	.release		= e1000_release_swflag_ich8lan,
+	.reset			= e1000_phy_hw_reset_ich8lan,
+	.set_d0_lplu_state	= e1000_set_d0_lplu_state_ich8lan,
+	.set_d3_lplu_state	= e1000_set_d3_lplu_state_ich8lan,
+	.write_reg		= e1000e_write_phy_reg_igp,
+};
+
+static const struct e1000_nvm_operations ich8_nvm_ops = {
+	.acquire		= e1000_acquire_nvm_ich8lan,
+	.read			= e1000_read_nvm_ich8lan,
+	.release		= e1000_release_nvm_ich8lan,
+	.reload			= e1000e_reload_nvm_generic,
+	.update			= e1000_update_nvm_checksum_ich8lan,
+	.valid_led_default	= e1000_valid_led_default_ich8lan,
+	.validate		= e1000_validate_nvm_checksum_ich8lan,
+	.write			= e1000_write_nvm_ich8lan,
+};
+
+static const struct e1000_nvm_operations spt_nvm_ops = {
+	.acquire		= e1000_acquire_nvm_ich8lan,
+	.release		= e1000_release_nvm_ich8lan,
+	.read			= e1000_read_nvm_spt,
+	.update			= e1000_update_nvm_checksum_spt,
+	.reload			= e1000e_reload_nvm_generic,
+	.valid_led_default	= e1000_valid_led_default_ich8lan,
+	.validate		= e1000_validate_nvm_checksum_ich8lan,
+	.write			= e1000_write_nvm_ich8lan,
+};
+
+const struct e1000_info e1000_ich8_info = {
+	.mac			= e1000_ich8lan,
+	.flags			= FLAG_HAS_WOL
+				  | FLAG_IS_ICH
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_APME_IN_WUC,
+	.pba			= 8,
+	.max_hw_frame_size	= VLAN_ETH_FRAME_LEN + ETH_FCS_LEN,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &ich8_nvm_ops,
+};
+
+const struct e1000_info e1000_ich9_info = {
+	.mac			= e1000_ich9lan,
+	.flags			= FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_APME_IN_WUC,
+	.pba			= 18,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &ich8_nvm_ops,
+};
+
+const struct e1000_info e1000_ich10_info = {
+	.mac			= e1000_ich10lan,
+	.flags			= FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_APME_IN_WUC,
+	.pba			= 18,
+	.max_hw_frame_size	= DEFAULT_JUMBO,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &ich8_nvm_ops,
+};
+
+const struct e1000_info e1000_pch_info = {
+	.mac			= e1000_pchlan,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_DISABLE_FC_PAUSE_TIME /* errata */
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS,
+	.pba			= 26,
+	.max_hw_frame_size	= 4096,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &ich8_nvm_ops,
+};
+
+const struct e1000_info e1000_pch2_info = {
+	.mac			= e1000_pch2lan,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS
+				  | FLAG2_HAS_EEE
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
+	.pba			= 26,
+	.max_hw_frame_size	= 9022,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &ich8_nvm_ops,
+};
+
+const struct e1000_info e1000_pch_lpt_info = {
+	.mac			= e1000_pch_lpt,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS
+				  | FLAG2_HAS_EEE
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
+	.pba			= 26,
+	.max_hw_frame_size	= 9022,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &ich8_nvm_ops,
+};
+
+const struct e1000_info e1000_pch_spt_info = {
+	.mac			= e1000_pch_spt,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS
+				  | FLAG2_HAS_EEE,
+	.pba			= 26,
+	.max_hw_frame_size	= 9022,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &spt_nvm_ops,
+};
+
+const struct e1000_info e1000_pch_cnp_info = {
+	.mac			= e1000_pch_cnp,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS
+				  | FLAG2_HAS_EEE,
+	.pba			= 26,
+	.max_hw_frame_size	= 9022,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &spt_nvm_ops,
+};
+
+const struct e1000_info e1000_pch_tgp_info = {
+	.mac			= e1000_pch_tgp,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS
+				  | FLAG2_HAS_EEE,
+	.pba			= 26,
+	.max_hw_frame_size	= 9022,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &spt_nvm_ops,
+};
+
+const struct e1000_info e1000_pch_adp_info = {
+	.mac			= e1000_pch_adp,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS
+				  | FLAG2_HAS_EEE,
+	.pba			= 26,
+	.max_hw_frame_size	= 9022,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &spt_nvm_ops,
+};
+
+const struct e1000_info e1000_pch_mtp_info = {
+	.mac			= e1000_pch_mtp,
+	.flags			= FLAG_IS_ICH
+				  | FLAG_HAS_WOL
+				  | FLAG_HAS_HW_TIMESTAMP
+				  | FLAG_HAS_CTRLEXT_ON_LOAD
+				  | FLAG_HAS_AMT
+				  | FLAG_HAS_FLASH
+				  | FLAG_HAS_JUMBO_FRAMES
+				  | FLAG_APME_IN_WUC,
+	.flags2			= FLAG2_HAS_PHY_STATS
+				  | FLAG2_HAS_EEE,
+	.pba			= 26,
+	.max_hw_frame_size	= 9022,
+	.get_variants		= e1000_get_variants_ich8lan,
+	.mac_ops		= &ich8_mac_ops,
+	.phy_ops		= &ich8_phy_ops,
+	.nvm_ops		= &spt_nvm_ops,
+};
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h
new file mode 100644
index 0000000000..2504b11c31
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_ICH8LAN_H_
+#define _E1000E_ICH8LAN_H_
+
+#define ICH_FLASH_GFPREG		0x0000
+#define ICH_FLASH_HSFSTS		0x0004
+#define ICH_FLASH_HSFCTL		0x0006
+#define ICH_FLASH_FADDR			0x0008
+#define ICH_FLASH_FDATA0		0x0010
+#define ICH_FLASH_PR0			0x0074
+
+/* Requires up to 10 seconds when MNG might be accessing part. */
+#define ICH_FLASH_READ_COMMAND_TIMEOUT	10000000
+#define ICH_FLASH_WRITE_COMMAND_TIMEOUT	10000000
+#define ICH_FLASH_ERASE_COMMAND_TIMEOUT	10000000
+#define ICH_FLASH_LINEAR_ADDR_MASK	0x00FFFFFF
+#define ICH_FLASH_CYCLE_REPEAT_COUNT	10
+
+#define ICH_CYCLE_READ			0
+#define ICH_CYCLE_WRITE			2
+#define ICH_CYCLE_ERASE			3
+
+#define FLASH_GFPREG_BASE_MASK		0x1FFF
+#define FLASH_SECTOR_ADDR_SHIFT		12
+
+#define ICH_FLASH_SEG_SIZE_256		256
+#define ICH_FLASH_SEG_SIZE_4K		4096
+#define ICH_FLASH_SEG_SIZE_8K		8192
+#define ICH_FLASH_SEG_SIZE_64K		65536
+
+#define E1000_ICH_FWSM_RSPCIPHY	0x00000040	/* Reset PHY on PCI Reset */
+/* FW established a valid mode */
+#define E1000_ICH_FWSM_FW_VALID	0x00008000
+#define E1000_ICH_FWSM_PCIM2PCI	0x01000000	/* ME PCIm-to-PCI active */
+#define E1000_ICH_FWSM_PCIM2PCI_COUNT	2000
+
+#define E1000_ICH_MNG_IAMT_MODE		0x2
+
+#define E1000_FWSM_WLOCK_MAC_MASK	0x0380
+#define E1000_FWSM_WLOCK_MAC_SHIFT	7
+#define E1000_FWSM_ULP_CFG_DONE		0x00000400	/* Low power cfg done */
+#define E1000_EXFWSM_DPG_EXIT_DONE	0x00000001
+
+/* Shared Receive Address Registers */
+#define E1000_SHRAL_PCH_LPT(_i)		(0x05408 + ((_i) * 8))
+#define E1000_SHRAH_PCH_LPT(_i)		(0x0540C + ((_i) * 8))
+
+#define E1000_H2ME		0x05B50	/* Host to ME */
+#define E1000_H2ME_START_DPG	0x00000001	/* indicate the ME of DPG */
+#define E1000_H2ME_EXIT_DPG	0x00000002	/* indicate the ME exit DPG */
+#define E1000_H2ME_ULP		0x00000800	/* ULP Indication Bit */
+#define E1000_H2ME_ENFORCE_SETTINGS	0x00001000	/* Enforce Settings */
+
+#define ID_LED_DEFAULT_ICH8LAN	((ID_LED_DEF1_DEF2 << 12) | \
+				 (ID_LED_OFF1_OFF2 <<  8) | \
+				 (ID_LED_OFF1_ON2  <<  4) | \
+				 (ID_LED_DEF1_DEF2))
+
+#define E1000_ICH_NVM_SIG_WORD		0x13u
+#define E1000_ICH_NVM_SIG_MASK		0xC000u
+#define E1000_ICH_NVM_VALID_SIG_MASK	0xC0u
+#define E1000_ICH_NVM_SIG_VALUE		0x80u
+
+#define E1000_ICH8_LAN_INIT_TIMEOUT	1500
+
+/* FEXT register bit definition */
+#define E1000_FEXT_PHY_CABLE_DISCONNECTED	0x00000004
+
+#define E1000_FEXTNVM_SW_CONFIG		1
+#define E1000_FEXTNVM_SW_CONFIG_ICH8M	(1 << 27)	/* different on ICH8M */
+
+#define E1000_FEXTNVM3_PHY_CFG_COUNTER_MASK	0x0C000000
+#define E1000_FEXTNVM3_PHY_CFG_COUNTER_50MSEC	0x08000000
+
+#define E1000_FEXTNVM4_BEACON_DURATION_MASK	0x7
+#define E1000_FEXTNVM4_BEACON_DURATION_8USEC	0x7
+#define E1000_FEXTNVM4_BEACON_DURATION_16USEC	0x3
+
+#define E1000_FEXTNVM6_REQ_PLL_CLK	0x00000100
+#define E1000_FEXTNVM6_ENABLE_K1_ENTRY_CONDITION	0x00000200
+#define E1000_FEXTNVM6_K1_OFF_ENABLE	0x80000000
+/* bit for disabling packet buffer read */
+#define E1000_FEXTNVM7_DISABLE_PB_READ	0x00040000
+#define E1000_FEXTNVM7_SIDE_CLK_UNGATE	0x00000004
+#define E1000_FEXTNVM7_DISABLE_SMB_PERST	0x00000020
+#define E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS	0x00000800
+#define E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS	0x00001000
+#define E1000_FEXTNVM11_DISABLE_PB_READ		0x00000200
+#define E1000_FEXTNVM11_DISABLE_MULR_FIX	0x00002000
+
+/* bit24: RXDCTL thresholds granularity: 0 - cache lines, 1 - descriptors */
+#define E1000_RXDCTL_THRESH_UNIT_DESC	0x01000000
+
+#define K1_ENTRY_LATENCY	0
+#define K1_MIN_TIME		1
+#define NVM_SIZE_MULTIPLIER 4096	/*multiplier for NVMS field */
+#define E1000_FLASH_BASE_ADDR 0xE000	/*offset of NVM access regs */
+#define E1000_CTRL_EXT_NVMVS 0x3	/*NVM valid sector */
+#define E1000_TARC0_CB_MULTIQ_3_REQ	0x30000000
+#define E1000_TARC0_CB_MULTIQ_2_REQ	0x20000000
+#define PCIE_ICH8_SNOOP_ALL	PCIE_NO_SNOOP_ALL
+
+#define E1000_ICH_RAR_ENTRIES	7
+#define E1000_PCH2_RAR_ENTRIES	5	/* RAR[0], SHRA[0-3] */
+#define E1000_PCH_LPT_RAR_ENTRIES	12	/* RAR[0], SHRA[0-10] */
+
+#define PHY_PAGE_SHIFT		5
+#define PHY_REG(page, reg)	(((page) << PHY_PAGE_SHIFT) | \
+				 ((reg) & MAX_PHY_REG_ADDRESS))
+#define IGP3_KMRN_DIAG	PHY_REG(770, 19)	/* KMRN Diagnostic */
+#define IGP3_VR_CTRL	PHY_REG(776, 18)	/* Voltage Regulator Control */
+
+#define IGP3_KMRN_DIAG_PCS_LOCK_LOSS		0x0002
+#define IGP3_VR_CTRL_DEV_POWERDOWN_MODE_MASK	0x0300
+#define IGP3_VR_CTRL_MODE_SHUTDOWN		0x0200
+
+/* PHY Wakeup Registers and defines */
+#define BM_PORT_GEN_CFG		PHY_REG(BM_PORT_CTRL_PAGE, 17)
+#define BM_RCTL			PHY_REG(BM_WUC_PAGE, 0)
+#define BM_WUC			PHY_REG(BM_WUC_PAGE, 1)
+#define BM_WUFC			PHY_REG(BM_WUC_PAGE, 2)
+#define BM_WUS			PHY_REG(BM_WUC_PAGE, 3)
+#define BM_RAR_L(_i)		(BM_PHY_REG(BM_WUC_PAGE, 16 + ((_i) << 2)))
+#define BM_RAR_M(_i)		(BM_PHY_REG(BM_WUC_PAGE, 17 + ((_i) << 2)))
+#define BM_RAR_H(_i)		(BM_PHY_REG(BM_WUC_PAGE, 18 + ((_i) << 2)))
+#define BM_RAR_CTRL(_i)		(BM_PHY_REG(BM_WUC_PAGE, 19 + ((_i) << 2)))
+#define BM_MTA(_i)		(BM_PHY_REG(BM_WUC_PAGE, 128 + ((_i) << 1)))
+
+#define BM_RCTL_UPE		0x0001	/* Unicast Promiscuous Mode */
+#define BM_RCTL_MPE		0x0002	/* Multicast Promiscuous Mode */
+#define BM_RCTL_MO_SHIFT	3	/* Multicast Offset Shift */
+#define BM_RCTL_MO_MASK		(3 << 3)	/* Multicast Offset Mask */
+#define BM_RCTL_BAM		0x0020	/* Broadcast Accept Mode */
+#define BM_RCTL_PMCF		0x0040	/* Pass MAC Control Frames */
+#define BM_RCTL_RFCE		0x0080	/* Rx Flow Control Enable */
+
+#define HV_LED_CONFIG		PHY_REG(768, 30)	/* LED Configuration */
+#define HV_MUX_DATA_CTRL	PHY_REG(776, 16)
+#define HV_MUX_DATA_CTRL_GEN_TO_MAC	0x0400
+#define HV_MUX_DATA_CTRL_FORCE_SPEED	0x0004
+#define HV_STATS_PAGE	778
+/* Half-duplex collision counts */
+#define HV_SCC_UPPER	PHY_REG(HV_STATS_PAGE, 16)	/* Single Collision */
+#define HV_SCC_LOWER	PHY_REG(HV_STATS_PAGE, 17)
+#define HV_ECOL_UPPER	PHY_REG(HV_STATS_PAGE, 18)	/* Excessive Coll. */
+#define HV_ECOL_LOWER	PHY_REG(HV_STATS_PAGE, 19)
+#define HV_MCC_UPPER	PHY_REG(HV_STATS_PAGE, 20)	/* Multiple Collision */
+#define HV_MCC_LOWER	PHY_REG(HV_STATS_PAGE, 21)
+#define HV_LATECOL_UPPER PHY_REG(HV_STATS_PAGE, 23)	/* Late Collision */
+#define HV_LATECOL_LOWER PHY_REG(HV_STATS_PAGE, 24)
+#define HV_COLC_UPPER	PHY_REG(HV_STATS_PAGE, 25)	/* Collision */
+#define HV_COLC_LOWER	PHY_REG(HV_STATS_PAGE, 26)
+#define HV_DC_UPPER	PHY_REG(HV_STATS_PAGE, 27)	/* Defer Count */
+#define HV_DC_LOWER	PHY_REG(HV_STATS_PAGE, 28)
+#define HV_TNCRS_UPPER	PHY_REG(HV_STATS_PAGE, 29)	/* Tx with no CRS */
+#define HV_TNCRS_LOWER	PHY_REG(HV_STATS_PAGE, 30)
+
+#define E1000_FCRTV_PCH	0x05F40	/* PCH Flow Control Refresh Timer Value */
+
+#define E1000_NVM_K1_CONFIG	0x1B	/* NVM K1 Config Word */
+#define E1000_NVM_K1_ENABLE	0x1	/* NVM Enable K1 bit */
+
+/* SMBus Control Phy Register */
+#define CV_SMB_CTRL		PHY_REG(769, 23)
+#define CV_SMB_CTRL_FORCE_SMBUS	0x0001
+
+/* I218 Ultra Low Power Configuration 1 Register */
+#define I218_ULP_CONFIG1		PHY_REG(779, 16)
+#define I218_ULP_CONFIG1_START		0x0001	/* Start auto ULP config */
+#define I218_ULP_CONFIG1_IND		0x0004	/* Pwr up from ULP indication */
+#define I218_ULP_CONFIG1_STICKY_ULP	0x0010	/* Set sticky ULP mode */
+#define I218_ULP_CONFIG1_INBAND_EXIT	0x0020	/* Inband on ULP exit */
+#define I218_ULP_CONFIG1_WOL_HOST	0x0040	/* WoL Host on ULP exit */
+#define I218_ULP_CONFIG1_RESET_TO_SMBUS	0x0100	/* Reset to SMBus mode */
+/* enable ULP even if when phy powered down via lanphypc */
+#define I218_ULP_CONFIG1_EN_ULP_LANPHYPC	0x0400
+/* disable clear of sticky ULP on PERST */
+#define I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST	0x0800
+#define I218_ULP_CONFIG1_DISABLE_SMB_PERST	0x1000	/* Disable on PERST# */
+
+/* SMBus Address Phy Register */
+#define HV_SMB_ADDR		PHY_REG(768, 26)
+#define HV_SMB_ADDR_MASK	0x007F
+#define HV_SMB_ADDR_PEC_EN	0x0200
+#define HV_SMB_ADDR_VALID	0x0080
+#define HV_SMB_ADDR_FREQ_MASK		0x1100
+#define HV_SMB_ADDR_FREQ_LOW_SHIFT	8
+#define HV_SMB_ADDR_FREQ_HIGH_SHIFT	12
+
+/* Strapping Option Register - RO */
+#define E1000_STRAP			0x0000C
+#define E1000_STRAP_SMBUS_ADDRESS_MASK	0x00FE0000
+#define E1000_STRAP_SMBUS_ADDRESS_SHIFT	17
+#define E1000_STRAP_SMT_FREQ_MASK	0x00003000
+#define E1000_STRAP_SMT_FREQ_SHIFT	12
+
+/* OEM Bits Phy Register */
+#define HV_OEM_BITS		PHY_REG(768, 25)
+#define HV_OEM_BITS_LPLU	0x0004	/* Low Power Link Up */
+#define HV_OEM_BITS_GBE_DIS	0x0040	/* Gigabit Disable */
+#define HV_OEM_BITS_RESTART_AN	0x0400	/* Restart Auto-negotiation */
+
+/* KMRN Mode Control */
+#define HV_KMRN_MODE_CTRL	PHY_REG(769, 16)
+#define HV_KMRN_MDIO_SLOW	0x0400
+
+/* KMRN FIFO Control and Status */
+#define HV_KMRN_FIFO_CTRLSTA			PHY_REG(770, 16)
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_MASK	0x7000
+#define HV_KMRN_FIFO_CTRLSTA_PREAMBLE_SHIFT	12
+
+/* PHY Power Management Control */
+#define HV_PM_CTRL		PHY_REG(770, 17)
+#define HV_PM_CTRL_K1_CLK_REQ		0x200
+#define HV_PM_CTRL_K1_ENABLE		0x4000
+
+#define I217_PLL_CLOCK_GATE_REG	PHY_REG(772, 28)
+#define I217_PLL_CLOCK_GATE_MASK	0x07FF
+
+#define SW_FLAG_TIMEOUT		1000	/* SW Semaphore flag timeout in ms */
+
+/* Inband Control */
+#define I217_INBAND_CTRL				PHY_REG(770, 18)
+#define I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_MASK	0x3F00
+#define I217_INBAND_CTRL_LINK_STAT_TX_TIMEOUT_SHIFT	8
+
+/* Low Power Idle GPIO Control */
+#define I217_LPI_GPIO_CTRL			PHY_REG(772, 18)
+#define I217_LPI_GPIO_CTRL_AUTO_EN_LPI		0x0800
+
+/* PHY Low Power Idle Control */
+#define I82579_LPI_CTRL				PHY_REG(772, 20)
+#define I82579_LPI_CTRL_100_ENABLE		0x2000
+#define I82579_LPI_CTRL_1000_ENABLE		0x4000
+#define I82579_LPI_CTRL_ENABLE_MASK		0x6000
+#define I82579_LPI_CTRL_FORCE_PLL_LOCK_COUNT	0x80
+
+/* Extended Management Interface (EMI) Registers */
+#define I82579_EMI_ADDR		0x10
+#define I82579_EMI_DATA		0x11
+#define I82579_LPI_UPDATE_TIMER	0x4805	/* in 40ns units + 40 ns base value */
+#define I82579_MSE_THRESHOLD	0x084F	/* 82579 Mean Square Error Threshold */
+#define I82577_MSE_THRESHOLD	0x0887	/* 82577 Mean Square Error Threshold */
+#define I82579_MSE_LINK_DOWN	0x2411	/* MSE count before dropping link */
+#define I82579_RX_CONFIG		0x3412	/* Receive configuration */
+#define I82579_LPI_PLL_SHUT		0x4412	/* LPI PLL Shut Enable */
+#define I82579_EEE_PCS_STATUS		0x182E	/* IEEE MMD Register 3.1 >> 8 */
+#define I82579_EEE_CAPABILITY		0x0410	/* IEEE MMD Register 3.20 */
+#define I82579_EEE_ADVERTISEMENT	0x040E	/* IEEE MMD Register 7.60 */
+#define I82579_EEE_LP_ABILITY		0x040F	/* IEEE MMD Register 7.61 */
+#define I82579_EEE_100_SUPPORTED	(1 << 1)	/* 100BaseTx EEE */
+#define I82579_EEE_1000_SUPPORTED	(1 << 2)	/* 1000BaseTx EEE */
+#define I82579_LPI_100_PLL_SHUT	(1 << 2)	/* 100M LPI PLL Shut Enabled */
+#define I217_EEE_PCS_STATUS	0x9401	/* IEEE MMD Register 3.1 */
+#define I217_EEE_CAPABILITY	0x8000	/* IEEE MMD Register 3.20 */
+#define I217_EEE_ADVERTISEMENT	0x8001	/* IEEE MMD Register 7.60 */
+#define I217_EEE_LP_ABILITY	0x8002	/* IEEE MMD Register 7.61 */
+#define I217_RX_CONFIG		0xB20C	/* Receive configuration */
+
+#define E1000_EEE_RX_LPI_RCVD	0x0400	/* Tx LP idle received */
+#define E1000_EEE_TX_LPI_RCVD	0x0800	/* Rx LP idle received */
+
+/* Intel Rapid Start Technology Support */
+#define I217_PROXY_CTRL		BM_PHY_REG(BM_WUC_PAGE, 70)
+#define I217_PROXY_CTRL_AUTO_DISABLE	0x0080
+#define I217_SxCTRL			PHY_REG(BM_PORT_CTRL_PAGE, 28)
+#define I217_SxCTRL_ENABLE_LPI_RESET	0x1000
+#define I217_CGFREG			PHY_REG(772, 29)
+#define I217_CGFREG_ENABLE_MTA_RESET	0x0002
+#define I217_MEMPWR			PHY_REG(772, 26)
+#define I217_MEMPWR_DISABLE_SMB_RELEASE	0x0010
+
+/* Receive Address Initial CRC Calculation */
+#define E1000_PCH_RAICC(_n)	(0x05F50 + ((_n) * 4))
+
+/* Latency Tolerance Reporting */
+#define E1000_LTRV			0x000F8
+#define E1000_LTRV_VALUE_MASK		0x000003FF
+#define E1000_LTRV_SCALE_MAX		5
+#define E1000_LTRV_SCALE_FACTOR		5
+#define E1000_LTRV_SCALE_SHIFT		10
+#define E1000_LTRV_SCALE_MASK		0x00001C00
+#define E1000_LTRV_REQ_SHIFT		15
+#define E1000_LTRV_NOSNOOP_SHIFT	16
+#define E1000_LTRV_SEND			(1 << 30)
+
+/* Proprietary Latency Tolerance Reporting PCI Capability */
+#define E1000_PCI_LTR_CAP_LPT		0xA8
+
+/* Don't gate wake DMA clock */
+#define E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK	0x1000
+
+void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw);
+void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
+						  bool state);
+void e1000e_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw);
+void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw);
+void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw);
+void e1000_resume_workarounds_pchlan(struct e1000_hw *hw);
+s32 e1000_configure_k1_ich8lan(struct e1000_hw *hw, bool k1_enable);
+void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw);
+s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable);
+s32 e1000_read_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 *data);
+s32 e1000_write_emi_reg_locked(struct e1000_hw *hw, u16 addr, u16 data);
+s32 e1000_set_eee_pchlan(struct e1000_hw *hw);
+s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx);
+#endif /* _E1000E_ICH8LAN_H_ */
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
new file mode 100644
index 0000000000..5df7ad93f3
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -0,0 +1,1782 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "e1000.h"
+
+/**
+ *  e1000e_get_bus_info_pcie - Get PCIe bus information
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines and stores the system bus information for a particular
+ *  network interface.  The following bus information is determined and stored:
+ *  bus speed, bus width, type (PCIe), and PCIe function.
+ **/
+s32 e1000e_get_bus_info_pcie(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	struct e1000_bus_info *bus = &hw->bus;
+	struct e1000_adapter *adapter = hw->adapter;
+	u16 pcie_link_status, cap_offset;
+
+	cap_offset = adapter->pdev->pcie_cap;
+	if (!cap_offset) {
+		bus->width = e1000_bus_width_unknown;
+	} else {
+		pci_read_config_word(adapter->pdev,
+				     cap_offset + PCIE_LINK_STATUS,
+				     &pcie_link_status);
+		bus->width = (enum e1000_bus_width)((pcie_link_status &
+						     PCIE_LINK_WIDTH_MASK) >>
+						    PCIE_LINK_WIDTH_SHIFT);
+	}
+
+	mac->ops.set_lan_id(hw);
+
+	return 0;
+}
+
+/**
+ *  e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ *
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines the LAN function id by reading memory-mapped registers
+ *  and swaps the port value if requested.
+ **/
+void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+	u32 reg;
+
+	/* The status register reports the correct function number
+	 * for the device regardless of function swap state.
+	 */
+	reg = er32(STATUS);
+	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
+}
+
+/**
+ *  e1000_set_lan_id_single_port - Set LAN id for a single port device
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets the LAN function id to zero for a single port device.
+ **/
+void e1000_set_lan_id_single_port(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+
+	bus->func = 0;
+}
+
+/**
+ *  e1000_clear_vfta_generic - Clear VLAN filter table
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the register array which contains the VLAN filter table by
+ *  setting all the values to 0.
+ **/
+void e1000_clear_vfta_generic(struct e1000_hw *hw)
+{
+	u32 offset;
+
+	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+		E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0);
+		e1e_flush();
+	}
+}
+
+/**
+ *  e1000_write_vfta_generic - Write value to VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset in VLAN filter table
+ *  @value: register value written to VLAN filter table
+ *
+ *  Writes value at the given offset in the register array which stores
+ *  the VLAN filter table.
+ **/
+void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value);
+	e1e_flush();
+}
+
+/**
+ *  e1000e_init_rx_addrs - Initialize receive address's
+ *  @hw: pointer to the HW structure
+ *  @rar_count: receive address registers
+ *
+ *  Setup the receive address registers by setting the base receive address
+ *  register to the devices MAC address and clearing all the other receive
+ *  address registers to 0.
+ **/
+void e1000e_init_rx_addrs(struct e1000_hw *hw, u16 rar_count)
+{
+	u32 i;
+	u8 mac_addr[ETH_ALEN] = { 0 };
+
+	/* Setup the receive address */
+	e_dbg("Programming MAC Address into RAR[0]\n");
+
+	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+	/* Zero out the other (rar_entry_count - 1) receive addresses */
+	e_dbg("Clearing RAR[1-%u]\n", rar_count - 1);
+	for (i = 1; i < rar_count; i++)
+		hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ *  e1000_check_alt_mac_addr_generic - Check for alternate MAC addr
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks the nvm for an alternate MAC address.  An alternate MAC address
+ *  can be setup by pre-boot software and must be treated like a permanent
+ *  address and must override the actual permanent MAC address. If an
+ *  alternate MAC address is found it is programmed into RAR0, replacing
+ *  the permanent address that was installed into RAR0 by the Si on reset.
+ *  This function will return SUCCESS unless it encounters an error while
+ *  reading the EEPROM.
+ **/
+s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw)
+{
+	u32 i;
+	s32 ret_val;
+	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
+	u8 alt_mac_addr[ETH_ALEN];
+
+	ret_val = e1000_read_nvm(hw, NVM_COMPAT, 1, &nvm_data);
+	if (ret_val)
+		return ret_val;
+
+	/* not supported on 82573 */
+	if (hw->mac.type == e1000_82573)
+		return 0;
+
+	ret_val = e1000_read_nvm(hw, NVM_ALT_MAC_ADDR_PTR, 1,
+				 &nvm_alt_mac_addr_offset);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
+	    (nvm_alt_mac_addr_offset == 0x0000))
+		/* There is no Alternate MAC Address */
+		return 0;
+
+	if (hw->bus.func == E1000_FUNC_1)
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
+	for (i = 0; i < ETH_ALEN; i += 2) {
+		offset = nvm_alt_mac_addr_offset + (i >> 1);
+		ret_val = e1000_read_nvm(hw, offset, 1, &nvm_data);
+		if (ret_val) {
+			e_dbg("NVM Read Error\n");
+			return ret_val;
+		}
+
+		alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
+		alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
+	}
+
+	/* if multicast bit is set, the alternate address will not be used */
+	if (is_multicast_ether_addr(alt_mac_addr)) {
+		e_dbg("Ignoring Alternate Mac Address with MC bit set\n");
+		return 0;
+	}
+
+	/* We have a valid alternate MAC address, and we want to treat it the
+	 * same as the normal permanent MAC address stored by the HW into the
+	 * RAR. Do this by mapping this address into RAR0.
+	 */
+	hw->mac.ops.rar_set(hw, alt_mac_addr, 0);
+
+	return 0;
+}
+
+u32 e1000e_rar_get_count_generic(struct e1000_hw *hw)
+{
+	return hw->mac.rar_entry_count;
+}
+
+/**
+ *  e1000e_rar_set_generic - Set receive address register
+ *  @hw: pointer to the HW structure
+ *  @addr: pointer to the receive address
+ *  @index: receive address array register
+ *
+ *  Sets the receive address array register at index to the address passed
+ *  in by addr.
+ **/
+int e1000e_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] | ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= E1000_RAH_AV;
+
+	/* Some bridges will combine consecutive 32-bit writes into
+	 * a single burst write, which will malfunction on some parts.
+	 * The flushes avoid this.
+	 */
+	ew32(RAL(index), rar_low);
+	e1e_flush();
+	ew32(RAH(index), rar_high);
+	e1e_flush();
+
+	return 0;
+}
+
+/**
+ *  e1000_hash_mc_addr - Generate a multicast hash value
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: pointer to a multicast address
+ *
+ *  Generates a multicast address hash value which is used to determine
+ *  the multicast filter table array address and new table value.
+ **/
+static u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+{
+	u32 hash_value, hash_mask;
+	u8 bit_shift = 0;
+
+	/* Register count multiplied by bits per register */
+	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+	/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+	 * where 0xFF would still fall within the hash mask.
+	 */
+	while (hash_mask >> bit_shift != 0xFF)
+		bit_shift++;
+
+	/* The portion of the address that is used for the hash table
+	 * is determined by the mc_filter_type setting.
+	 * The algorithm is such that there is a total of 8 bits of shifting.
+	 * The bit_shift for a mc_filter_type of 0 represents the number of
+	 * left-shifts where the MSB of mc_addr[5] would still fall within
+	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
+	 * of 8 bits of shifting, then mc_addr[4] will shift right the
+	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
+	 * cases are a variation of this algorithm...essentially raising the
+	 * number of bits to shift mc_addr[5] left, while still keeping the
+	 * 8-bit shifting total.
+	 *
+	 * For example, given the following Destination MAC Address and an
+	 * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
+	 * we can see that the bit_shift for case 0 is 4.  These are the hash
+	 * values resulting from each mc_filter_type...
+	 * [0] [1] [2] [3] [4] [5]
+	 * 01  AA  00  12  34  56
+	 * LSB           MSB
+	 *
+	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
+	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
+	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
+	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
+	 */
+	switch (hw->mac.mc_filter_type) {
+	default:
+	case 0:
+		break;
+	case 1:
+		bit_shift += 1;
+		break;
+	case 2:
+		bit_shift += 2;
+		break;
+	case 3:
+		bit_shift += 4;
+		break;
+	}
+
+	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+				   (((u16)mc_addr[5]) << bit_shift)));
+
+	return hash_value;
+}
+
+/**
+ *  e1000e_update_mc_addr_list_generic - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates entire Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw,
+					u8 *mc_addr_list, u32 mc_addr_count)
+{
+	u32 hash_value, hash_bit, hash_reg;
+	int i;
+
+	/* clear mta_shadow */
+	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+	/* update mta_shadow from mc_addr_list */
+	for (i = 0; (u32)i < mc_addr_count; i++) {
+		hash_value = e1000_hash_mc_addr(hw, mc_addr_list);
+
+		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+		hash_bit = hash_value & 0x1F;
+
+		hw->mac.mta_shadow[hash_reg] |= BIT(hash_bit);
+		mc_addr_list += (ETH_ALEN);
+	}
+
+	/* replace the entire MTA table */
+	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+		E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]);
+	e1e_flush();
+}
+
+/**
+ *  e1000e_clear_hw_cntrs_base - Clear base hardware counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the base hardware counters by reading the counter registers.
+ **/
+void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw)
+{
+	er32(CRCERRS);
+	er32(SYMERRS);
+	er32(MPC);
+	er32(SCC);
+	er32(ECOL);
+	er32(MCC);
+	er32(LATECOL);
+	er32(COLC);
+	er32(DC);
+	er32(SEC);
+	er32(RLEC);
+	er32(XONRXC);
+	er32(XONTXC);
+	er32(XOFFRXC);
+	er32(XOFFTXC);
+	er32(FCRUC);
+	er32(GPRC);
+	er32(BPRC);
+	er32(MPRC);
+	er32(GPTC);
+	er32(GORCL);
+	er32(GORCH);
+	er32(GOTCL);
+	er32(GOTCH);
+	er32(RNBC);
+	er32(RUC);
+	er32(RFC);
+	er32(ROC);
+	er32(RJC);
+	er32(TORL);
+	er32(TORH);
+	er32(TOTL);
+	er32(TOTH);
+	er32(TPR);
+	er32(TPT);
+	er32(MPTC);
+	er32(BPTC);
+}
+
+/**
+ *  e1000e_check_for_copper_link - Check for link (Copper)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks to see of the link status of the hardware has changed.  If a
+ *  change in link status has been detected, then we read the PHY registers
+ *  to get the current speed/duplex if link exists.
+ **/
+s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	bool link;
+
+	/* We only want to go out to the PHY registers to see if Auto-Neg
+	 * has completed and/or if our link status has changed.  The
+	 * get_link_status flag is set upon receiving a Link Status
+	 * Change or Rx Sequence Error interrupt.
+	 */
+	if (!mac->get_link_status)
+		return 0;
+	mac->get_link_status = false;
+
+	/* First we want to see if the MII Status Register reports
+	 * link.  If so, then we want to get the current speed/duplex
+	 * of the PHY.
+	 */
+	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val || !link)
+		goto out;
+
+	/* Check if there was DownShift, must be checked
+	 * immediately after link-up
+	 */
+	e1000e_check_downshift(hw);
+
+	/* If we are forcing speed/duplex, then we simply return since
+	 * we have already determined whether we have link or not.
+	 */
+	if (!mac->autoneg)
+		return -E1000_ERR_CONFIG;
+
+	/* Auto-Neg is enabled.  Auto Speed Detection takes care
+	 * of MAC speed/duplex configuration.  So we only need to
+	 * configure Collision Distance in the MAC.
+	 */
+	mac->ops.config_collision_dist(hw);
+
+	/* Configure Flow Control now that Auto-Neg has completed.
+	 * First, we need to restore the desired flow control
+	 * settings because we may have had to re-autoneg with a
+	 * different link partner.
+	 */
+	ret_val = e1000e_config_fc_after_link_up(hw);
+	if (ret_val)
+		e_dbg("Error configuring flow control\n");
+
+	return ret_val;
+
+out:
+	mac->get_link_status = true;
+	return ret_val;
+}
+
+/**
+ *  e1000e_check_for_fiber_link - Check for link (Fiber)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks for link up on the hardware.  If link is not up and we have
+ *  a signal, then we need to force link up.
+ **/
+s32 e1000e_check_for_fiber_link(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 rxcw;
+	u32 ctrl;
+	u32 status;
+	s32 ret_val;
+
+	ctrl = er32(CTRL);
+	status = er32(STATUS);
+	rxcw = er32(RXCW);
+
+	/* If we don't have link (auto-negotiation failed or link partner
+	 * cannot auto-negotiate), the cable is plugged in (we have signal),
+	 * and our link partner is not trying to auto-negotiate with us (we
+	 * are receiving idles or data), we need to force link up. We also
+	 * need to give auto-negotiation time to complete, in case the cable
+	 * was just plugged in. The autoneg_failed flag does this.
+	 */
+	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
+	if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) &&
+	    !(rxcw & E1000_RXCW_C)) {
+		if (!mac->autoneg_failed) {
+			mac->autoneg_failed = true;
+			return 0;
+		}
+		e_dbg("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
+
+		/* Disable auto-negotiation in the TXCW register */
+		ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE));
+
+		/* Force link-up and also force full-duplex. */
+		ctrl = er32(CTRL);
+		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+		ew32(CTRL, ctrl);
+
+		/* Configure Flow Control after forcing link up. */
+		ret_val = e1000e_config_fc_after_link_up(hw);
+		if (ret_val) {
+			e_dbg("Error configuring flow control\n");
+			return ret_val;
+		}
+	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
+		/* If we are forcing link and we are receiving /C/ ordered
+		 * sets, re-enable auto-negotiation in the TXCW register
+		 * and disable forced link in the Device Control register
+		 * in an attempt to auto-negotiate with our link partner.
+		 */
+		e_dbg("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
+		ew32(TXCW, mac->txcw);
+		ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
+
+		mac->serdes_has_link = true;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_check_for_serdes_link - Check for link (Serdes)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks for link up on the hardware.  If link is not up and we have
+ *  a signal, then we need to force link up.
+ **/
+s32 e1000e_check_for_serdes_link(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 rxcw;
+	u32 ctrl;
+	u32 status;
+	s32 ret_val;
+
+	ctrl = er32(CTRL);
+	status = er32(STATUS);
+	rxcw = er32(RXCW);
+
+	/* If we don't have link (auto-negotiation failed or link partner
+	 * cannot auto-negotiate), and our link partner is not trying to
+	 * auto-negotiate with us (we are receiving idles or data),
+	 * we need to force link up. We also need to give auto-negotiation
+	 * time to complete.
+	 */
+	/* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */
+	if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) {
+		if (!mac->autoneg_failed) {
+			mac->autoneg_failed = true;
+			return 0;
+		}
+		e_dbg("NOT Rx'ing /C/, disable AutoNeg and force link.\n");
+
+		/* Disable auto-negotiation in the TXCW register */
+		ew32(TXCW, (mac->txcw & ~E1000_TXCW_ANE));
+
+		/* Force link-up and also force full-duplex. */
+		ctrl = er32(CTRL);
+		ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD);
+		ew32(CTRL, ctrl);
+
+		/* Configure Flow Control after forcing link up. */
+		ret_val = e1000e_config_fc_after_link_up(hw);
+		if (ret_val) {
+			e_dbg("Error configuring flow control\n");
+			return ret_val;
+		}
+	} else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) {
+		/* If we are forcing link and we are receiving /C/ ordered
+		 * sets, re-enable auto-negotiation in the TXCW register
+		 * and disable forced link in the Device Control register
+		 * in an attempt to auto-negotiate with our link partner.
+		 */
+		e_dbg("Rx'ing /C/, enable AutoNeg and stop forcing link.\n");
+		ew32(TXCW, mac->txcw);
+		ew32(CTRL, (ctrl & ~E1000_CTRL_SLU));
+
+		mac->serdes_has_link = true;
+	} else if (!(E1000_TXCW_ANE & er32(TXCW))) {
+		/* If we force link for non-auto-negotiation switch, check
+		 * link status based on MAC synchronization for internal
+		 * serdes media type.
+		 */
+		/* SYNCH bit and IV bit are sticky. */
+		usleep_range(10, 20);
+		rxcw = er32(RXCW);
+		if (rxcw & E1000_RXCW_SYNCH) {
+			if (!(rxcw & E1000_RXCW_IV)) {
+				mac->serdes_has_link = true;
+				e_dbg("SERDES: Link up - forced.\n");
+			}
+		} else {
+			mac->serdes_has_link = false;
+			e_dbg("SERDES: Link down - force failed.\n");
+		}
+	}
+
+	if (E1000_TXCW_ANE & er32(TXCW)) {
+		status = er32(STATUS);
+		if (status & E1000_STATUS_LU) {
+			/* SYNCH bit and IV bit are sticky, so reread rxcw. */
+			usleep_range(10, 20);
+			rxcw = er32(RXCW);
+			if (rxcw & E1000_RXCW_SYNCH) {
+				if (!(rxcw & E1000_RXCW_IV)) {
+					mac->serdes_has_link = true;
+					e_dbg("SERDES: Link up - autoneg completed successfully.\n");
+				} else {
+					mac->serdes_has_link = false;
+					e_dbg("SERDES: Link down - invalid codewords detected in autoneg.\n");
+				}
+			} else {
+				mac->serdes_has_link = false;
+				e_dbg("SERDES: Link down - no sync.\n");
+			}
+		} else {
+			mac->serdes_has_link = false;
+			e_dbg("SERDES: Link down - autoneg failed\n");
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_set_default_fc_generic - Set flow control default values
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the EEPROM for the default values for flow control and store the
+ *  values.
+ **/
+static s32 e1000_set_default_fc_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 nvm_data;
+
+	/* Read and store word 0x0F of the EEPROM. This word contains bits
+	 * that determine the hardware's default PAUSE (flow control) mode,
+	 * a bit that determines whether the HW defaults to enabling or
+	 * disabling auto-negotiation, and the direction of the
+	 * SW defined pins. If there is no SW over-ride of the flow
+	 * control setting, then the variable hw->fc will
+	 * be initialized based on a value in the EEPROM.
+	 */
+	ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data);
+
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (!(nvm_data & NVM_WORD0F_PAUSE_MASK))
+		hw->fc.requested_mode = e1000_fc_none;
+	else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == NVM_WORD0F_ASM_DIR)
+		hw->fc.requested_mode = e1000_fc_tx_pause;
+	else
+		hw->fc.requested_mode = e1000_fc_full;
+
+	return 0;
+}
+
+/**
+ *  e1000e_setup_link_generic - Setup flow control and link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines which flow control settings to use, then configures flow
+ *  control.  Calls the appropriate media-specific link configuration
+ *  function.  Assuming the adapter has a valid link partner, a valid link
+ *  should be established.  Assumes the hardware has previously been reset
+ *  and the transmitter and receiver are not enabled.
+ **/
+s32 e1000e_setup_link_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	/* In the case of the phy reset being blocked, we already have a link.
+	 * We do not need to set it up again.
+	 */
+	if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw))
+		return 0;
+
+	/* If requested flow control is set to default, set flow control
+	 * based on the EEPROM flow control settings.
+	 */
+	if (hw->fc.requested_mode == e1000_fc_default) {
+		ret_val = e1000_set_default_fc_generic(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Save off the requested flow control mode for use later.  Depending
+	 * on the link partner's capabilities, we may or may not use this mode.
+	 */
+	hw->fc.current_mode = hw->fc.requested_mode;
+
+	e_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
+
+	/* Call the necessary media_type subroutine to configure the link. */
+	ret_val = hw->mac.ops.setup_physical_interface(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Initialize the flow control address, type, and PAUSE timer
+	 * registers to their default values.  This is done even if flow
+	 * control is disabled, because it does not hurt anything to
+	 * initialize these registers.
+	 */
+	e_dbg("Initializing the Flow Control address, type and timer regs\n");
+	ew32(FCT, FLOW_CONTROL_TYPE);
+	ew32(FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+	ew32(FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+	ew32(FCTTV, hw->fc.pause_time);
+
+	return e1000e_set_fc_watermarks(hw);
+}
+
+/**
+ *  e1000_commit_fc_settings_generic - Configure flow control
+ *  @hw: pointer to the HW structure
+ *
+ *  Write the flow control settings to the Transmit Config Word Register (TXCW)
+ *  base on the flow control settings in e1000_mac_info.
+ **/
+static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 txcw;
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the device accordingly.  If auto-negotiation is enabled, then
+	 * software will have to set the "PAUSE" bits to the correct value in
+	 * the Transmit Config Word Register (TXCW) and re-start auto-
+	 * negotiation.  However, if auto-negotiation is disabled, then
+	 * software will have to manually configure the two flow control enable
+	 * bits in the CTRL register.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames,
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames but we
+	 *          do not support receiving pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
+	 */
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		/* Flow control completely disabled by a software over-ride. */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD);
+		break;
+	case e1000_fc_rx_pause:
+		/* Rx Flow control is enabled and Tx Flow control is disabled
+		 * by a software over-ride. Since there really isn't a way to
+		 * advertise that we are capable of Rx Pause ONLY, we will
+		 * advertise that we support both symmetric and asymmetric Rx
+		 * PAUSE.  Later, we will disable the adapter's ability to send
+		 * PAUSE frames.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+		break;
+	case e1000_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is disabled,
+		 * by a software over-ride.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR);
+		break;
+	case e1000_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK);
+		break;
+	default:
+		e_dbg("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ew32(TXCW, txcw);
+	mac->txcw = txcw;
+
+	return 0;
+}
+
+/**
+ *  e1000_poll_fiber_serdes_link_generic - Poll for link up
+ *  @hw: pointer to the HW structure
+ *
+ *  Polls for link up by reading the status register, if link fails to come
+ *  up with auto-negotiation, then the link is forced if a signal is detected.
+ **/
+static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 i, status;
+	s32 ret_val;
+
+	/* If we have a signal (the cable is plugged in, or assumed true for
+	 * serdes media) then poll for a "Link-Up" indication in the Device
+	 * Status Register.  Time-out if a link isn't seen in 500 milliseconds
+	 * seconds (Auto-negotiation should complete in less than 500
+	 * milliseconds even if the other end is doing it in SW).
+	 */
+	for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) {
+		usleep_range(10000, 11000);
+		status = er32(STATUS);
+		if (status & E1000_STATUS_LU)
+			break;
+	}
+	if (i == FIBER_LINK_UP_LIMIT) {
+		e_dbg("Never got a valid link from auto-neg!!!\n");
+		mac->autoneg_failed = true;
+		/* AutoNeg failed to achieve a link, so we'll call
+		 * mac->check_for_link. This routine will force the
+		 * link up if we detect a signal. This will allow us to
+		 * communicate with non-autonegotiating link partners.
+		 */
+		ret_val = mac->ops.check_for_link(hw);
+		if (ret_val) {
+			e_dbg("Error while checking for link\n");
+			return ret_val;
+		}
+		mac->autoneg_failed = false;
+	} else {
+		mac->autoneg_failed = false;
+		e_dbg("Valid Link Found\n");
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_setup_fiber_serdes_link - Setup link for fiber/serdes
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures collision distance and flow control for fiber and serdes
+ *  links.  Upon successful setup, poll for link.
+ **/
+s32 e1000e_setup_fiber_serdes_link(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+
+	ctrl = er32(CTRL);
+
+	/* Take the link out of reset */
+	ctrl &= ~E1000_CTRL_LRST;
+
+	hw->mac.ops.config_collision_dist(hw);
+
+	ret_val = e1000_commit_fc_settings_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Since auto-negotiation is enabled, take the link out of reset (the
+	 * link will be in reset, because we previously reset the chip). This
+	 * will restart auto-negotiation.  If auto-negotiation is successful
+	 * then the link-up status bit will be set and the flow control enable
+	 * bits (RFCE and TFCE) will be set according to their negotiated value.
+	 */
+	e_dbg("Auto-negotiation enabled\n");
+
+	ew32(CTRL, ctrl);
+	e1e_flush();
+	usleep_range(1000, 2000);
+
+	/* For these adapters, the SW definable pin 1 is set when the optics
+	 * detect a signal.  If we have a signal, then poll for a "Link-Up"
+	 * indication.
+	 */
+	if (hw->phy.media_type == e1000_media_type_internal_serdes ||
+	    (er32(CTRL) & E1000_CTRL_SWDPIN1)) {
+		ret_val = e1000_poll_fiber_serdes_link_generic(hw);
+	} else {
+		e_dbg("No signal detected\n");
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_config_collision_dist_generic - Configure collision distance
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the collision distance to the default value and is used
+ *  during link setup.
+ **/
+void e1000e_config_collision_dist_generic(struct e1000_hw *hw)
+{
+	u32 tctl;
+
+	tctl = er32(TCTL);
+
+	tctl &= ~E1000_TCTL_COLD;
+	tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
+
+	ew32(TCTL, tctl);
+	e1e_flush();
+}
+
+/**
+ *  e1000e_set_fc_watermarks - Set flow control high/low watermarks
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets the flow control high/low threshold (watermark) registers.  If
+ *  flow control XON frame transmission is enabled, then set XON frame
+ *  transmission as well.
+ **/
+s32 e1000e_set_fc_watermarks(struct e1000_hw *hw)
+{
+	u32 fcrtl = 0, fcrth = 0;
+
+	/* Set the flow control receive threshold registers.  Normally,
+	 * these registers will be set to a default threshold that may be
+	 * adjusted later by the driver's runtime code.  However, if the
+	 * ability to transmit pause frames is not enabled, then these
+	 * registers will be set to 0.
+	 */
+	if (hw->fc.current_mode & e1000_fc_tx_pause) {
+		/* We need to set up the Receive Threshold high and low water
+		 * marks as well as (optionally) enabling the transmission of
+		 * XON frames.
+		 */
+		fcrtl = hw->fc.low_water;
+		if (hw->fc.send_xon)
+			fcrtl |= E1000_FCRTL_XONE;
+
+		fcrth = hw->fc.high_water;
+	}
+	ew32(FCRTL, fcrtl);
+	ew32(FCRTH, fcrth);
+
+	return 0;
+}
+
+/**
+ *  e1000e_force_mac_fc - Force the MAC's flow control settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
+ *  device control register to reflect the adapter settings.  TFCE and RFCE
+ *  need to be explicitly set by software when a copper PHY is used because
+ *  autonegotiation is managed by the PHY rather than the MAC.  Software must
+ *  also configure these bits when link is forced on a fiber connection.
+ **/
+s32 e1000e_force_mac_fc(struct e1000_hw *hw)
+{
+	u32 ctrl;
+
+	ctrl = er32(CTRL);
+
+	/* Because we didn't get link via the internal auto-negotiation
+	 * mechanism (we either forced link or we got link via PHY
+	 * auto-neg), we have to manually enable/disable transmit an
+	 * receive flow control.
+	 *
+	 * The "Case" statement below enables/disable flow control
+	 * according to the "hw->fc.current_mode" parameter.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause
+	 *          frames but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not receive pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) is enabled.
+	 *  other:  No other values should be possible at this point.
+	 */
+	e_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
+		break;
+	case e1000_fc_rx_pause:
+		ctrl &= (~E1000_CTRL_TFCE);
+		ctrl |= E1000_CTRL_RFCE;
+		break;
+	case e1000_fc_tx_pause:
+		ctrl &= (~E1000_CTRL_RFCE);
+		ctrl |= E1000_CTRL_TFCE;
+		break;
+	case e1000_fc_full:
+		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
+		break;
+	default:
+		e_dbg("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ew32(CTRL, ctrl);
+
+	return 0;
+}
+
+/**
+ *  e1000e_config_fc_after_link_up - Configures flow control after link
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks the status of auto-negotiation after link up to ensure that the
+ *  speed and duplex were not forced.  If the link needed to be forced, then
+ *  flow control needs to be forced also.  If auto-negotiation is enabled
+ *  and did not fail, then we configure flow control based on our link
+ *  partner.
+ **/
+s32 e1000e_config_fc_after_link_up(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val = 0;
+	u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg;
+	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+	u16 speed, duplex;
+
+	/* Check for the case where we have fiber media and auto-neg failed
+	 * so we had to force link.  In this case, we need to force the
+	 * configuration of the MAC to match the "fc" parameter.
+	 */
+	if (mac->autoneg_failed) {
+		if (hw->phy.media_type == e1000_media_type_fiber ||
+		    hw->phy.media_type == e1000_media_type_internal_serdes)
+			ret_val = e1000e_force_mac_fc(hw);
+	} else {
+		if (hw->phy.media_type == e1000_media_type_copper)
+			ret_val = e1000e_force_mac_fc(hw);
+	}
+
+	if (ret_val) {
+		e_dbg("Error forcing flow control settings\n");
+		return ret_val;
+	}
+
+	/* Check for the case where we have copper media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
+		/* Read the MII Status Register and check to see if AutoNeg
+		 * has completed.  We read this twice because this reg has
+		 * some "sticky" (latched) bits.
+		 */
+		ret_val = e1e_rphy(hw, MII_BMSR, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1e_rphy(hw, MII_BMSR, &mii_status_reg);
+		if (ret_val)
+			return ret_val;
+
+		if (!(mii_status_reg & BMSR_ANEGCOMPLETE)) {
+			e_dbg("Copper PHY and Auto Neg has not completed.\n");
+			return ret_val;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (Address 4) and the Auto_Negotiation Base
+		 * Page Ability Register (Address 5) to determine how
+		 * flow control was negotiated.
+		 */
+		ret_val = e1e_rphy(hw, MII_ADVERTISE, &mii_nway_adv_reg);
+		if (ret_val)
+			return ret_val;
+		ret_val = e1e_rphy(hw, MII_LPA, &mii_nway_lp_ability_reg);
+		if (ret_val)
+			return ret_val;
+
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (Address 4) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (Address 5) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
+		 *   0   |    1    |   0   |   DC    | e1000_fc_none
+		 *   0   |    1    |   1   |    0    | e1000_fc_none
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | e1000_fc_none
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *   1   |    1    |   0   |    0    | e1000_fc_none
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | E1000_fc_full
+		 *
+		 */
+		if ((mii_nway_adv_reg & ADVERTISE_PAUSE_CAP) &&
+		    (mii_nway_lp_ability_reg & LPA_PAUSE_CAP)) {
+			/* Now we need to check if the user selected Rx ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise Rx
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == e1000_fc_full) {
+				hw->fc.current_mode = e1000_fc_full;
+				e_dbg("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = e1000_fc_rx_pause;
+				e_dbg("Flow Control = Rx PAUSE frames only.\n");
+			}
+		}
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 */
+		else if (!(mii_nway_adv_reg & ADVERTISE_PAUSE_CAP) &&
+			 (mii_nway_adv_reg & ADVERTISE_PAUSE_ASYM) &&
+			 (mii_nway_lp_ability_reg & LPA_PAUSE_CAP) &&
+			 (mii_nway_lp_ability_reg & LPA_PAUSE_ASYM)) {
+			hw->fc.current_mode = e1000_fc_tx_pause;
+			e_dbg("Flow Control = Tx PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 */
+		else if ((mii_nway_adv_reg & ADVERTISE_PAUSE_CAP) &&
+			 (mii_nway_adv_reg & ADVERTISE_PAUSE_ASYM) &&
+			 !(mii_nway_lp_ability_reg & LPA_PAUSE_CAP) &&
+			 (mii_nway_lp_ability_reg & LPA_PAUSE_ASYM)) {
+			hw->fc.current_mode = e1000_fc_rx_pause;
+			e_dbg("Flow Control = Rx PAUSE frames only.\n");
+		} else {
+			/* Per the IEEE spec, at this point flow control
+			 * should be disabled.
+			 */
+			hw->fc.current_mode = e1000_fc_none;
+			e_dbg("Flow Control = NONE.\n");
+		}
+
+		/* Now we need to do one last check...  If we auto-
+		 * negotiated to HALF DUPLEX, flow control should not be
+		 * enabled per IEEE 802.3 spec.
+		 */
+		ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex);
+		if (ret_val) {
+			e_dbg("Error getting link speed and duplex\n");
+			return ret_val;
+		}
+
+		if (duplex == HALF_DUPLEX)
+			hw->fc.current_mode = e1000_fc_none;
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		ret_val = e1000e_force_mac_fc(hw);
+		if (ret_val) {
+			e_dbg("Error forcing flow control settings\n");
+			return ret_val;
+		}
+	}
+
+	/* Check for the case where we have SerDes media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if ((hw->phy.media_type == e1000_media_type_internal_serdes) &&
+	    mac->autoneg) {
+		/* Read the PCS_LSTS and check to see if AutoNeg
+		 * has completed.
+		 */
+		pcs_status_reg = er32(PCS_LSTAT);
+
+		if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) {
+			e_dbg("PCS Auto Neg has not completed.\n");
+			return ret_val;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (PCS_ANADV) and the Auto_Negotiation Base
+		 * Page Ability Register (PCS_LPAB) to determine how
+		 * flow control was negotiated.
+		 */
+		pcs_adv_reg = er32(PCS_ANADV);
+		pcs_lp_ability_reg = er32(PCS_LPAB);
+
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (PCS_ANADV) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (PCS_LPAB) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
+		 *   0   |    1    |   0   |   DC    | e1000_fc_none
+		 *   0   |    1    |   1   |    0    | e1000_fc_none
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | e1000_fc_none
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *   1   |    1    |   0   |    0    | e1000_fc_none
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *
+		 */
+		if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+		    (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) {
+			/* Now we need to check if the user selected Rx ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise Rx
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == e1000_fc_full) {
+				hw->fc.current_mode = e1000_fc_full;
+				e_dbg("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = e1000_fc_rx_pause;
+				e_dbg("Flow Control = Rx PAUSE frames only.\n");
+			}
+		}
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 */
+		else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+			 (pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_tx_pause;
+			e_dbg("Flow Control = Tx PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 */
+		else if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+			 !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_rx_pause;
+			e_dbg("Flow Control = Rx PAUSE frames only.\n");
+		} else {
+			/* Per the IEEE spec, at this point flow control
+			 * should be disabled.
+			 */
+			hw->fc.current_mode = e1000_fc_none;
+			e_dbg("Flow Control = NONE.\n");
+		}
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		pcs_ctrl_reg = er32(PCS_LCTL);
+		pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL;
+		ew32(PCS_LCTL, pcs_ctrl_reg);
+
+		ret_val = e1000e_force_mac_fc(hw);
+		if (ret_val) {
+			e_dbg("Error forcing flow control settings\n");
+			return ret_val;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_get_speed_and_duplex_copper - Retrieve current speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  Read the status register for the current speed/duplex and store the current
+ *  speed and duplex for copper connections.
+ **/
+s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
+				       u16 *duplex)
+{
+	u32 status;
+
+	status = er32(STATUS);
+	if (status & E1000_STATUS_SPEED_1000)
+		*speed = SPEED_1000;
+	else if (status & E1000_STATUS_SPEED_100)
+		*speed = SPEED_100;
+	else
+		*speed = SPEED_10;
+
+	if (status & E1000_STATUS_FD)
+		*duplex = FULL_DUPLEX;
+	else
+		*duplex = HALF_DUPLEX;
+
+	e_dbg("%u Mbps, %s Duplex\n",
+	      *speed == SPEED_1000 ? 1000 : *speed == SPEED_100 ? 100 : 10,
+	      *duplex == FULL_DUPLEX ? "Full" : "Half");
+
+	return 0;
+}
+
+/**
+ *  e1000e_get_speed_and_duplex_fiber_serdes - Retrieve current speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  Sets the speed and duplex to gigabit full duplex (the only possible option)
+ *  for fiber/serdes links.
+ **/
+s32 e1000e_get_speed_and_duplex_fiber_serdes(struct e1000_hw __always_unused
+					     *hw, u16 *speed, u16 *duplex)
+{
+	*speed = SPEED_1000;
+	*duplex = FULL_DUPLEX;
+
+	return 0;
+}
+
+/**
+ *  e1000e_get_hw_semaphore - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+s32 e1000e_get_hw_semaphore(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = er32(SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		udelay(100);
+		i++;
+	}
+
+	if (i == timeout) {
+		e_dbg("Driver can't access device - SMBI bit is set.\n");
+		return -E1000_ERR_NVM;
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = er32(SWSM);
+		ew32(SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (er32(SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		udelay(100);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		e1000e_put_hw_semaphore(hw);
+		e_dbg("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_put_hw_semaphore - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ **/
+void e1000e_put_hw_semaphore(struct e1000_hw *hw)
+{
+	u32 swsm;
+
+	swsm = er32(SWSM);
+	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+	ew32(SWSM, swsm);
+}
+
+/**
+ *  e1000e_get_auto_rd_done - Check for auto read completion
+ *  @hw: pointer to the HW structure
+ *
+ *  Check EEPROM for Auto Read done bit.
+ **/
+s32 e1000e_get_auto_rd_done(struct e1000_hw *hw)
+{
+	s32 i = 0;
+
+	while (i < AUTO_READ_DONE_TIMEOUT) {
+		if (er32(EECD) & E1000_EECD_AUTO_RD)
+			break;
+		usleep_range(1000, 2000);
+		i++;
+	}
+
+	if (i == AUTO_READ_DONE_TIMEOUT) {
+		e_dbg("Auto read by HW from NVM has not completed.\n");
+		return -E1000_ERR_RESET;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_valid_led_default - Verify a valid default LED config
+ *  @hw: pointer to the HW structure
+ *  @data: pointer to the NVM (EEPROM)
+ *
+ *  Read the EEPROM for the current default LED configuration.  If the
+ *  LED configuration is not valid, set to a valid LED configuration.
+ **/
+s32 e1000e_valid_led_default(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	ret_val = e1000_read_nvm(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF)
+		*data = ID_LED_DEFAULT;
+
+	return 0;
+}
+
+/**
+ *  e1000e_id_led_init_generic -
+ *  @hw: pointer to the HW structure
+ *
+ **/
+s32 e1000e_id_led_init_generic(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	const u32 ledctl_mask = 0x000000FF;
+	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
+	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
+	u16 data, i, temp;
+	const u16 led_mask = 0x0F;
+
+	ret_val = hw->nvm.ops.valid_led_default(hw, &data);
+	if (ret_val)
+		return ret_val;
+
+	mac->ledctl_default = er32(LEDCTL);
+	mac->ledctl_mode1 = mac->ledctl_default;
+	mac->ledctl_mode2 = mac->ledctl_default;
+
+	for (i = 0; i < 4; i++) {
+		temp = (data >> (i << 2)) & led_mask;
+		switch (temp) {
+		case ID_LED_ON1_DEF2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_ON1_OFF2:
+			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode1 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_OFF1_DEF2:
+		case ID_LED_OFF1_ON2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode1 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+		switch (temp) {
+		case ID_LED_DEF1_ON2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_OFF1_ON2:
+			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode2 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_DEF1_OFF2:
+		case ID_LED_ON1_OFF2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode2 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_setup_led_generic - Configures SW controllable LED
+ *  @hw: pointer to the HW structure
+ *
+ *  This prepares the SW controllable LED for use and saves the current state
+ *  of the LED so it can be later restored.
+ **/
+s32 e1000e_setup_led_generic(struct e1000_hw *hw)
+{
+	u32 ledctl;
+
+	if (hw->mac.ops.setup_led != e1000e_setup_led_generic)
+		return -E1000_ERR_CONFIG;
+
+	if (hw->phy.media_type == e1000_media_type_fiber) {
+		ledctl = er32(LEDCTL);
+		hw->mac.ledctl_default = ledctl;
+		/* Turn off LED0 */
+		ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK |
+			    E1000_LEDCTL_LED0_MODE_MASK);
+		ledctl |= (E1000_LEDCTL_MODE_LED_OFF <<
+			   E1000_LEDCTL_LED0_MODE_SHIFT);
+		ew32(LEDCTL, ledctl);
+	} else if (hw->phy.media_type == e1000_media_type_copper) {
+		ew32(LEDCTL, hw->mac.ledctl_mode1);
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_cleanup_led_generic - Set LED config to default operation
+ *  @hw: pointer to the HW structure
+ *
+ *  Remove the current LED configuration and set the LED configuration
+ *  to the default value, saved from the EEPROM.
+ **/
+s32 e1000e_cleanup_led_generic(struct e1000_hw *hw)
+{
+	ew32(LEDCTL, hw->mac.ledctl_default);
+	return 0;
+}
+
+/**
+ *  e1000e_blink_led_generic - Blink LED
+ *  @hw: pointer to the HW structure
+ *
+ *  Blink the LEDs which are set to be on.
+ **/
+s32 e1000e_blink_led_generic(struct e1000_hw *hw)
+{
+	u32 ledctl_blink = 0;
+	u32 i;
+
+	if (hw->phy.media_type == e1000_media_type_fiber) {
+		/* always blink LED0 for PCI-E fiber */
+		ledctl_blink = E1000_LEDCTL_LED0_BLINK |
+		    (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
+	} else {
+		/* Set the blink bit for each LED that's "on" (0x0E)
+		 * (or "off" if inverted) in ledctl_mode2.  The blink
+		 * logic in hardware only works when mode is set to "on"
+		 * so it must be changed accordingly when the mode is
+		 * "off" and inverted.
+		 */
+		ledctl_blink = hw->mac.ledctl_mode2;
+		for (i = 0; i < 32; i += 8) {
+			u32 mode = (hw->mac.ledctl_mode2 >> i) &
+			    E1000_LEDCTL_LED0_MODE_MASK;
+			u32 led_default = hw->mac.ledctl_default >> i;
+
+			if ((!(led_default & E1000_LEDCTL_LED0_IVRT) &&
+			     (mode == E1000_LEDCTL_MODE_LED_ON)) ||
+			    ((led_default & E1000_LEDCTL_LED0_IVRT) &&
+			     (mode == E1000_LEDCTL_MODE_LED_OFF))) {
+				ledctl_blink &=
+				    ~(E1000_LEDCTL_LED0_MODE_MASK << i);
+				ledctl_blink |= (E1000_LEDCTL_LED0_BLINK |
+						 E1000_LEDCTL_MODE_LED_ON) << i;
+			}
+		}
+	}
+
+	ew32(LEDCTL, ledctl_blink);
+
+	return 0;
+}
+
+/**
+ *  e1000e_led_on_generic - Turn LED on
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn LED on.
+ **/
+s32 e1000e_led_on_generic(struct e1000_hw *hw)
+{
+	u32 ctrl;
+
+	switch (hw->phy.media_type) {
+	case e1000_media_type_fiber:
+		ctrl = er32(CTRL);
+		ctrl &= ~E1000_CTRL_SWDPIN0;
+		ctrl |= E1000_CTRL_SWDPIO0;
+		ew32(CTRL, ctrl);
+		break;
+	case e1000_media_type_copper:
+		ew32(LEDCTL, hw->mac.ledctl_mode2);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_led_off_generic - Turn LED off
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn LED off.
+ **/
+s32 e1000e_led_off_generic(struct e1000_hw *hw)
+{
+	u32 ctrl;
+
+	switch (hw->phy.media_type) {
+	case e1000_media_type_fiber:
+		ctrl = er32(CTRL);
+		ctrl |= E1000_CTRL_SWDPIN0;
+		ctrl |= E1000_CTRL_SWDPIO0;
+		ew32(CTRL, ctrl);
+		break;
+	case e1000_media_type_copper:
+		ew32(LEDCTL, hw->mac.ledctl_mode1);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_set_pcie_no_snoop - Set PCI-express capabilities
+ *  @hw: pointer to the HW structure
+ *  @no_snoop: bitmap of snoop events
+ *
+ *  Set the PCI-express register to snoop for events enabled in 'no_snoop'.
+ **/
+void e1000e_set_pcie_no_snoop(struct e1000_hw *hw, u32 no_snoop)
+{
+	u32 gcr;
+
+	if (no_snoop) {
+		gcr = er32(GCR);
+		gcr &= ~(PCIE_NO_SNOOP_ALL);
+		gcr |= no_snoop;
+		ew32(GCR, gcr);
+	}
+}
+
+/**
+ *  e1000e_disable_pcie_master - Disables PCI-express master access
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns 0 if successful, else returns -10
+ *  (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ *  the master requests to be disabled.
+ *
+ *  Disables PCI-Express master access and verifies there are no pending
+ *  requests.
+ **/
+s32 e1000e_disable_pcie_master(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 timeout = MASTER_DISABLE_TIMEOUT;
+
+	ctrl = er32(CTRL);
+	ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
+	ew32(CTRL, ctrl);
+
+	while (timeout) {
+		if (!(er32(STATUS) & E1000_STATUS_GIO_MASTER_ENABLE))
+			break;
+		usleep_range(100, 200);
+		timeout--;
+	}
+
+	if (!timeout) {
+		e_dbg("Master requests are pending.\n");
+		return -E1000_ERR_MASTER_REQUESTS_PENDING;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_reset_adaptive - Reset Adaptive Interframe Spacing
+ *  @hw: pointer to the HW structure
+ *
+ *  Reset the Adaptive Interframe Spacing throttle to default values.
+ **/
+void e1000e_reset_adaptive(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+
+	if (!mac->adaptive_ifs) {
+		e_dbg("Not in Adaptive IFS mode!\n");
+		return;
+	}
+
+	mac->current_ifs_val = 0;
+	mac->ifs_min_val = IFS_MIN;
+	mac->ifs_max_val = IFS_MAX;
+	mac->ifs_step_size = IFS_STEP;
+	mac->ifs_ratio = IFS_RATIO;
+
+	mac->in_ifs_mode = false;
+	ew32(AIT, 0);
+}
+
+/**
+ *  e1000e_update_adaptive - Update Adaptive Interframe Spacing
+ *  @hw: pointer to the HW structure
+ *
+ *  Update the Adaptive Interframe Spacing Throttle value based on the
+ *  time between transmitted packets and time between collisions.
+ **/
+void e1000e_update_adaptive(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+
+	if (!mac->adaptive_ifs) {
+		e_dbg("Not in Adaptive IFS mode!\n");
+		return;
+	}
+
+	if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) {
+		if (mac->tx_packet_delta > MIN_NUM_XMITS) {
+			mac->in_ifs_mode = true;
+			if (mac->current_ifs_val < mac->ifs_max_val) {
+				if (!mac->current_ifs_val)
+					mac->current_ifs_val = mac->ifs_min_val;
+				else
+					mac->current_ifs_val +=
+					    mac->ifs_step_size;
+				ew32(AIT, mac->current_ifs_val);
+			}
+		}
+	} else {
+		if (mac->in_ifs_mode &&
+		    (mac->tx_packet_delta <= MIN_NUM_XMITS)) {
+			mac->current_ifs_val = 0;
+			mac->in_ifs_mode = false;
+			ew32(AIT, 0);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/intel/e1000e/mac.h b/drivers/net/ethernet/intel/e1000e/mac.h
new file mode 100644
index 0000000000..563176fd43
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/mac.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_MAC_H_
+#define _E1000E_MAC_H_
+
+s32 e1000e_blink_led_generic(struct e1000_hw *hw);
+s32 e1000e_check_for_copper_link(struct e1000_hw *hw);
+s32 e1000e_check_for_fiber_link(struct e1000_hw *hw);
+s32 e1000e_check_for_serdes_link(struct e1000_hw *hw);
+s32 e1000e_cleanup_led_generic(struct e1000_hw *hw);
+s32 e1000e_config_fc_after_link_up(struct e1000_hw *hw);
+s32 e1000e_disable_pcie_master(struct e1000_hw *hw);
+s32 e1000e_force_mac_fc(struct e1000_hw *hw);
+s32 e1000e_get_auto_rd_done(struct e1000_hw *hw);
+s32 e1000e_get_bus_info_pcie(struct e1000_hw *hw);
+void e1000_set_lan_id_single_port(struct e1000_hw *hw);
+s32 e1000e_get_hw_semaphore(struct e1000_hw *hw);
+s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
+				       u16 *duplex);
+s32 e1000e_get_speed_and_duplex_fiber_serdes(struct e1000_hw *hw,
+					     u16 *speed, u16 *duplex);
+s32 e1000e_id_led_init_generic(struct e1000_hw *hw);
+s32 e1000e_led_on_generic(struct e1000_hw *hw);
+s32 e1000e_led_off_generic(struct e1000_hw *hw);
+void e1000e_update_mc_addr_list_generic(struct e1000_hw *hw,
+					u8 *mc_addr_list, u32 mc_addr_count);
+s32 e1000e_set_fc_watermarks(struct e1000_hw *hw);
+s32 e1000e_setup_fiber_serdes_link(struct e1000_hw *hw);
+s32 e1000e_setup_led_generic(struct e1000_hw *hw);
+s32 e1000e_setup_link_generic(struct e1000_hw *hw);
+
+void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw);
+void e1000_clear_vfta_generic(struct e1000_hw *hw);
+void e1000e_init_rx_addrs(struct e1000_hw *hw, u16 rar_count);
+void e1000e_put_hw_semaphore(struct e1000_hw *hw);
+s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw);
+void e1000e_reset_adaptive(struct e1000_hw *hw);
+void e1000e_set_pcie_no_snoop(struct e1000_hw *hw, u32 no_snoop);
+void e1000e_update_adaptive(struct e1000_hw *hw);
+void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value);
+
+void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw);
+u32 e1000e_rar_get_count_generic(struct e1000_hw *hw);
+int e1000e_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index);
+void e1000e_config_collision_dist_generic(struct e1000_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
new file mode 100644
index 0000000000..c4c9b20bc5
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "e1000.h"
+
+/**
+ *  e1000_calculate_checksum - Calculate checksum for buffer
+ *  @buffer: pointer to EEPROM
+ *  @length: size of EEPROM to calculate a checksum for
+ *
+ *  Calculates the checksum for some buffer on a specified length.  The
+ *  checksum calculated is returned.
+ **/
+static u8 e1000_calculate_checksum(u8 *buffer, u32 length)
+{
+	u32 i;
+	u8 sum = 0;
+
+	if (!buffer)
+		return 0;
+
+	for (i = 0; i < length; i++)
+		sum += buffer[i];
+
+	return (u8)(0 - sum);
+}
+
+/**
+ *  e1000_mng_enable_host_if - Checks host interface is enabled
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns 0 upon success, else -E1000_ERR_HOST_INTERFACE_COMMAND
+ *
+ *  This function checks whether the HOST IF is enabled for command operation
+ *  and also checks whether the previous command is completed.  It busy waits
+ *  in case of previous command is not completed.
+ **/
+static s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
+{
+	u32 hicr;
+	u8 i;
+
+	if (!hw->mac.arc_subsystem_valid) {
+		e_dbg("ARC subsystem not valid.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Check that the host interface is enabled. */
+	hicr = er32(HICR);
+	if (!(hicr & E1000_HICR_EN)) {
+		e_dbg("E1000_HOST_EN bit disabled.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+	/* check the previous command is completed */
+	for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) {
+		hicr = er32(HICR);
+		if (!(hicr & E1000_HICR_C))
+			break;
+		mdelay(1);
+	}
+
+	if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
+		e_dbg("Previous command timeout failed.\n");
+		return -E1000_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_check_mng_mode_generic - Generic check management mode
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the firmware semaphore register and returns true (>0) if
+ *  manageability is enabled, else false (0).
+ **/
+bool e1000e_check_mng_mode_generic(struct e1000_hw *hw)
+{
+	u32 fwsm = er32(FWSM);
+
+	return (fwsm & E1000_FWSM_MODE_MASK) ==
+	    (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT);
+}
+
+/**
+ *  e1000e_enable_tx_pkt_filtering - Enable packet filtering on Tx
+ *  @hw: pointer to the HW structure
+ *
+ *  Enables packet filtering on transmit packets if manageability is enabled
+ *  and host interface is enabled.
+ **/
+bool e1000e_enable_tx_pkt_filtering(struct e1000_hw *hw)
+{
+	struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie;
+	u32 *buffer = (u32 *)&hw->mng_cookie;
+	u32 offset;
+	s32 ret_val, hdr_csum, csum;
+	u8 i, len;
+
+	hw->mac.tx_pkt_filtering = true;
+
+	/* No manageability, no filtering */
+	if (!hw->mac.ops.check_mng_mode(hw)) {
+		hw->mac.tx_pkt_filtering = false;
+		return hw->mac.tx_pkt_filtering;
+	}
+
+	/* If we can't read from the host interface for whatever
+	 * reason, disable filtering.
+	 */
+	ret_val = e1000_mng_enable_host_if(hw);
+	if (ret_val) {
+		hw->mac.tx_pkt_filtering = false;
+		return hw->mac.tx_pkt_filtering;
+	}
+
+	/* Read in the header.  Length and offset are in dwords. */
+	len = E1000_MNG_DHCP_COOKIE_LENGTH >> 2;
+	offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2;
+	for (i = 0; i < len; i++)
+		*(buffer + i) = E1000_READ_REG_ARRAY(hw, E1000_HOST_IF,
+						     offset + i);
+	hdr_csum = hdr->checksum;
+	hdr->checksum = 0;
+	csum = e1000_calculate_checksum((u8 *)hdr,
+					E1000_MNG_DHCP_COOKIE_LENGTH);
+	/* If either the checksums or signature don't match, then
+	 * the cookie area isn't considered valid, in which case we
+	 * take the safe route of assuming Tx filtering is enabled.
+	 */
+	if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) {
+		hw->mac.tx_pkt_filtering = true;
+		return hw->mac.tx_pkt_filtering;
+	}
+
+	/* Cookie area is valid, make the final check for filtering. */
+	if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING))
+		hw->mac.tx_pkt_filtering = false;
+
+	return hw->mac.tx_pkt_filtering;
+}
+
+/**
+ *  e1000_mng_write_cmd_header - Writes manageability command header
+ *  @hw: pointer to the HW structure
+ *  @hdr: pointer to the host interface command header
+ *
+ *  Writes the command header after does the checksum calculation.
+ **/
+static s32 e1000_mng_write_cmd_header(struct e1000_hw *hw,
+				      struct e1000_host_mng_command_header *hdr)
+{
+	u16 i, length = sizeof(struct e1000_host_mng_command_header);
+
+	/* Write the whole command header structure with new checksum. */
+
+	hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length);
+
+	length >>= 2;
+	/* Write the relevant command block into the ram area. */
+	for (i = 0; i < length; i++) {
+		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, i, *((u32 *)hdr + i));
+		e1e_flush();
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_mng_host_if_write - Write to the manageability host interface
+ *  @hw: pointer to the HW structure
+ *  @buffer: pointer to the host interface buffer
+ *  @length: size of the buffer
+ *  @offset: location in the buffer to write to
+ *  @sum: sum of the data (not checksum)
+ *
+ *  This function writes the buffer content at the offset given on the host if.
+ *  It also does alignment considerations to do the writes in most efficient
+ *  way.  Also fills up the sum of the buffer in *buffer parameter.
+ **/
+static s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer,
+				   u16 length, u16 offset, u8 *sum)
+{
+	u8 *tmp;
+	u8 *bufptr = buffer;
+	u32 data = 0;
+	u16 remaining, i, j, prev_bytes;
+
+	/* sum = only sum of the data and it is not checksum */
+
+	if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH)
+		return -E1000_ERR_PARAM;
+
+	tmp = (u8 *)&data;
+	prev_bytes = offset & 0x3;
+	offset >>= 2;
+
+	if (prev_bytes) {
+		data = E1000_READ_REG_ARRAY(hw, E1000_HOST_IF, offset);
+		for (j = prev_bytes; j < sizeof(u32); j++) {
+			*(tmp + j) = *bufptr++;
+			*sum += *(tmp + j);
+		}
+		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset, data);
+		length -= j - prev_bytes;
+		offset++;
+	}
+
+	remaining = length & 0x3;
+	length -= remaining;
+
+	/* Calculate length in DWORDs */
+	length >>= 2;
+
+	/* The device driver writes the relevant command block into the
+	 * ram area.
+	 */
+	for (i = 0; i < length; i++) {
+		for (j = 0; j < sizeof(u32); j++) {
+			*(tmp + j) = *bufptr++;
+			*sum += *(tmp + j);
+		}
+
+		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset + i, data);
+	}
+	if (remaining) {
+		for (j = 0; j < sizeof(u32); j++) {
+			if (j < remaining)
+				*(tmp + j) = *bufptr++;
+			else
+				*(tmp + j) = 0;
+
+			*sum += *(tmp + j);
+		}
+		E1000_WRITE_REG_ARRAY(hw, E1000_HOST_IF, offset + i, data);
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_mng_write_dhcp_info - Writes DHCP info to host interface
+ *  @hw: pointer to the HW structure
+ *  @buffer: pointer to the host interface
+ *  @length: size of the buffer
+ *
+ *  Writes the DHCP information to the host interface.
+ **/
+s32 e1000e_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length)
+{
+	struct e1000_host_mng_command_header hdr;
+	s32 ret_val;
+	u32 hicr;
+
+	hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD;
+	hdr.command_length = length;
+	hdr.reserved1 = 0;
+	hdr.reserved2 = 0;
+	hdr.checksum = 0;
+
+	/* Enable the host interface */
+	ret_val = e1000_mng_enable_host_if(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Populate the host interface with the contents of "buffer". */
+	ret_val = e1000_mng_host_if_write(hw, buffer, length,
+					  sizeof(hdr), &(hdr.checksum));
+	if (ret_val)
+		return ret_val;
+
+	/* Write the manageability command header */
+	ret_val = e1000_mng_write_cmd_header(hw, &hdr);
+	if (ret_val)
+		return ret_val;
+
+	/* Tell the ARC a new command is pending. */
+	hicr = er32(HICR);
+	ew32(HICR, hicr | E1000_HICR_C);
+
+	return 0;
+}
+
+/**
+ *  e1000e_enable_mng_pass_thru - Check if management passthrough is needed
+ *  @hw: pointer to the HW structure
+ *
+ *  Verifies the hardware needs to leave interface enabled so that frames can
+ *  be directed to and from the management interface.
+ **/
+bool e1000e_enable_mng_pass_thru(struct e1000_hw *hw)
+{
+	u32 manc;
+	u32 fwsm, factps;
+
+	manc = er32(MANC);
+
+	if (!(manc & E1000_MANC_RCV_TCO_EN))
+		return false;
+
+	if (hw->mac.has_fwsm) {
+		fwsm = er32(FWSM);
+		factps = er32(FACTPS);
+
+		if (!(factps & E1000_FACTPS_MNGCG) &&
+		    ((fwsm & E1000_FWSM_MODE_MASK) ==
+		     (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)))
+			return true;
+	} else if ((hw->mac.type == e1000_82574) ||
+		   (hw->mac.type == e1000_82583)) {
+		u16 data;
+		s32 ret_val;
+
+		factps = er32(FACTPS);
+		ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &data);
+		if (ret_val)
+			return false;
+
+		if (!(factps & E1000_FACTPS_MNGCG) &&
+		    ((data & E1000_NVM_INIT_CTRL2_MNGM) ==
+		     (e1000_mng_mode_pt << 13)))
+			return true;
+	} else if ((manc & E1000_MANC_SMBUS_EN) &&
+		   !(manc & E1000_MANC_ASF_EN)) {
+		return true;
+	}
+
+	return false;
+}
diff --git a/drivers/net/ethernet/intel/e1000e/manage.h b/drivers/net/ethernet/intel/e1000e/manage.h
new file mode 100644
index 0000000000..d868aad806
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/manage.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_MANAGE_H_
+#define _E1000E_MANAGE_H_
+
+bool e1000e_check_mng_mode_generic(struct e1000_hw *hw);
+bool e1000e_enable_tx_pkt_filtering(struct e1000_hw *hw);
+s32 e1000e_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length);
+bool e1000e_enable_mng_pass_thru(struct e1000_hw *hw);
+
+enum e1000_mng_mode {
+	e1000_mng_mode_none = 0,
+	e1000_mng_mode_asf,
+	e1000_mng_mode_pt,
+	e1000_mng_mode_ipmi,
+	e1000_mng_mode_host_if_only
+};
+
+#define E1000_FACTPS_MNGCG			0x20000000
+
+#define E1000_FWSM_MODE_MASK			0xE
+#define E1000_FWSM_MODE_SHIFT			1
+
+#define E1000_MNG_IAMT_MODE			0x3
+#define E1000_MNG_DHCP_COOKIE_LENGTH		0x10
+#define E1000_MNG_DHCP_COOKIE_OFFSET		0x6F0
+#define E1000_MNG_DHCP_COMMAND_TIMEOUT		10
+#define E1000_MNG_DHCP_TX_PAYLOAD_CMD		64
+#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING	0x1
+#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN	0x2
+
+#define E1000_VFTA_ENTRY_SHIFT			5
+#define E1000_VFTA_ENTRY_MASK			0x7F
+#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK		0x1F
+
+#define E1000_HICR_EN			0x01	/* Enable bit - RO */
+/* Driver sets this bit when done to put command in RAM */
+#define E1000_HICR_C			0x02
+#define E1000_HICR_SV			0x04	/* Status Validity */
+#define E1000_HICR_FW_RESET_ENABLE	0x40
+#define E1000_HICR_FW_RESET		0x80
+
+/* Intel(R) Active Management Technology signature */
+#define E1000_IAMT_SIGNATURE		0x544D4149
+
+#endif
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
new file mode 100644
index 0000000000..f536c85672
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -0,0 +1,7983 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/interrupt.h>
+#include <linux/tcp.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/pm_qos.h>
+#include <linux/pm_runtime.h>
+#include <linux/prefetch.h>
+#include <linux/suspend.h>
+
+#include "e1000.h"
+#define CREATE_TRACE_POINTS
+#include "e1000e_trace.h"
+
+char e1000e_driver_name[] = "e1000e";
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static const struct e1000_info *e1000_info_tbl[] = {
+	[board_82571]		= &e1000_82571_info,
+	[board_82572]		= &e1000_82572_info,
+	[board_82573]		= &e1000_82573_info,
+	[board_82574]		= &e1000_82574_info,
+	[board_82583]		= &e1000_82583_info,
+	[board_80003es2lan]	= &e1000_es2_info,
+	[board_ich8lan]		= &e1000_ich8_info,
+	[board_ich9lan]		= &e1000_ich9_info,
+	[board_ich10lan]	= &e1000_ich10_info,
+	[board_pchlan]		= &e1000_pch_info,
+	[board_pch2lan]		= &e1000_pch2_info,
+	[board_pch_lpt]		= &e1000_pch_lpt_info,
+	[board_pch_spt]		= &e1000_pch_spt_info,
+	[board_pch_cnp]		= &e1000_pch_cnp_info,
+	[board_pch_tgp]		= &e1000_pch_tgp_info,
+	[board_pch_adp]		= &e1000_pch_adp_info,
+	[board_pch_mtp]		= &e1000_pch_mtp_info,
+};
+
+struct e1000_reg_info {
+	u32 ofs;
+	char *name;
+};
+
+static const struct e1000_reg_info e1000_reg_info_tbl[] = {
+	/* General Registers */
+	{E1000_CTRL, "CTRL"},
+	{E1000_STATUS, "STATUS"},
+	{E1000_CTRL_EXT, "CTRL_EXT"},
+
+	/* Interrupt Registers */
+	{E1000_ICR, "ICR"},
+
+	/* Rx Registers */
+	{E1000_RCTL, "RCTL"},
+	{E1000_RDLEN(0), "RDLEN"},
+	{E1000_RDH(0), "RDH"},
+	{E1000_RDT(0), "RDT"},
+	{E1000_RDTR, "RDTR"},
+	{E1000_RXDCTL(0), "RXDCTL"},
+	{E1000_ERT, "ERT"},
+	{E1000_RDBAL(0), "RDBAL"},
+	{E1000_RDBAH(0), "RDBAH"},
+	{E1000_RDFH, "RDFH"},
+	{E1000_RDFT, "RDFT"},
+	{E1000_RDFHS, "RDFHS"},
+	{E1000_RDFTS, "RDFTS"},
+	{E1000_RDFPC, "RDFPC"},
+
+	/* Tx Registers */
+	{E1000_TCTL, "TCTL"},
+	{E1000_TDBAL(0), "TDBAL"},
+	{E1000_TDBAH(0), "TDBAH"},
+	{E1000_TDLEN(0), "TDLEN"},
+	{E1000_TDH(0), "TDH"},
+	{E1000_TDT(0), "TDT"},
+	{E1000_TIDV, "TIDV"},
+	{E1000_TXDCTL(0), "TXDCTL"},
+	{E1000_TADV, "TADV"},
+	{E1000_TARC(0), "TARC"},
+	{E1000_TDFH, "TDFH"},
+	{E1000_TDFT, "TDFT"},
+	{E1000_TDFHS, "TDFHS"},
+	{E1000_TDFTS, "TDFTS"},
+	{E1000_TDFPC, "TDFPC"},
+
+	/* List Terminator */
+	{0, NULL}
+};
+
+/**
+ * __ew32_prepare - prepare to write to MAC CSR register on certain parts
+ * @hw: pointer to the HW structure
+ *
+ * When updating the MAC CSR registers, the Manageability Engine (ME) could
+ * be accessing the registers at the same time.  Normally, this is handled in
+ * h/w by an arbiter but on some parts there is a bug that acknowledges Host
+ * accesses later than it should which could result in the register to have
+ * an incorrect value.  Workaround this by checking the FWSM register which
+ * has bit 24 set while ME is accessing MAC CSR registers, wait if it is set
+ * and try again a number of times.
+ **/
+static void __ew32_prepare(struct e1000_hw *hw)
+{
+	s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT;
+
+	while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i)
+		udelay(50);
+}
+
+void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
+{
+	if (hw->adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+		__ew32_prepare(hw);
+
+	writel(val, hw->hw_addr + reg);
+}
+
+/**
+ * e1000_regdump - register printout routine
+ * @hw: pointer to the HW structure
+ * @reginfo: pointer to the register info table
+ **/
+static void e1000_regdump(struct e1000_hw *hw, struct e1000_reg_info *reginfo)
+{
+	int n = 0;
+	char rname[16];
+	u32 regs[8];
+
+	switch (reginfo->ofs) {
+	case E1000_RXDCTL(0):
+		for (n = 0; n < 2; n++)
+			regs[n] = __er32(hw, E1000_RXDCTL(n));
+		break;
+	case E1000_TXDCTL(0):
+		for (n = 0; n < 2; n++)
+			regs[n] = __er32(hw, E1000_TXDCTL(n));
+		break;
+	case E1000_TARC(0):
+		for (n = 0; n < 2; n++)
+			regs[n] = __er32(hw, E1000_TARC(n));
+		break;
+	default:
+		pr_info("%-15s %08x\n",
+			reginfo->name, __er32(hw, reginfo->ofs));
+		return;
+	}
+
+	snprintf(rname, 16, "%s%s", reginfo->name, "[0-1]");
+	pr_info("%-15s %08x %08x\n", rname, regs[0], regs[1]);
+}
+
+static void e1000e_dump_ps_pages(struct e1000_adapter *adapter,
+				 struct e1000_buffer *bi)
+{
+	int i;
+	struct e1000_ps_page *ps_page;
+
+	for (i = 0; i < adapter->rx_ps_pages; i++) {
+		ps_page = &bi->ps_pages[i];
+
+		if (ps_page->page) {
+			pr_info("packet dump for ps_page %d:\n", i);
+			print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS,
+				       16, 1, page_address(ps_page->page),
+				       PAGE_SIZE, true);
+		}
+	}
+}
+
+/**
+ * e1000e_dump - Print registers, Tx-ring and Rx-ring
+ * @adapter: board private structure
+ **/
+static void e1000e_dump(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_reg_info *reginfo;
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+	struct e1000_tx_desc *tx_desc;
+	struct my_u0 {
+		__le64 a;
+		__le64 b;
+	} *u0;
+	struct e1000_buffer *buffer_info;
+	struct e1000_ring *rx_ring = adapter->rx_ring;
+	union e1000_rx_desc_packet_split *rx_desc_ps;
+	union e1000_rx_desc_extended *rx_desc;
+	struct my_u1 {
+		__le64 a;
+		__le64 b;
+		__le64 c;
+		__le64 d;
+	} *u1;
+	u32 staterr;
+	int i = 0;
+
+	if (!netif_msg_hw(adapter))
+		return;
+
+	/* Print netdevice Info */
+	if (netdev) {
+		dev_info(&adapter->pdev->dev, "Net device Info\n");
+		pr_info("Device Name     state            trans_start\n");
+		pr_info("%-15s %016lX %016lX\n", netdev->name,
+			netdev->state, dev_trans_start(netdev));
+	}
+
+	/* Print Registers */
+	dev_info(&adapter->pdev->dev, "Register Dump\n");
+	pr_info(" Register Name   Value\n");
+	for (reginfo = (struct e1000_reg_info *)e1000_reg_info_tbl;
+	     reginfo->name; reginfo++) {
+		e1000_regdump(hw, reginfo);
+	}
+
+	/* Print Tx Ring Summary */
+	if (!netdev || !netif_running(netdev))
+		return;
+
+	dev_info(&adapter->pdev->dev, "Tx Ring Summary\n");
+	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
+	buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
+	pr_info(" %5d %5X %5X %016llX %04X %3X %016llX\n",
+		0, tx_ring->next_to_use, tx_ring->next_to_clean,
+		(unsigned long long)buffer_info->dma,
+		buffer_info->length,
+		buffer_info->next_to_watch,
+		(unsigned long long)buffer_info->time_stamp);
+
+	/* Print Tx Ring */
+	if (!netif_msg_tx_done(adapter))
+		goto rx_ring_summary;
+
+	dev_info(&adapter->pdev->dev, "Tx Ring Dump\n");
+
+	/* Transmit Descriptor Formats - DEXT[29] is 0 (Legacy) or 1 (Extended)
+	 *
+	 * Legacy Transmit Descriptor
+	 *   +--------------------------------------------------------------+
+	 * 0 |         Buffer Address [63:0] (Reserved on Write Back)       |
+	 *   +--------------------------------------------------------------+
+	 * 8 | Special  |    CSS     | Status |  CMD    |  CSO   |  Length  |
+	 *   +--------------------------------------------------------------+
+	 *   63       48 47        36 35    32 31     24 23    16 15        0
+	 *
+	 * Extended Context Descriptor (DTYP=0x0) for TSO or checksum offload
+	 *   63      48 47    40 39       32 31             16 15    8 7      0
+	 *   +----------------------------------------------------------------+
+	 * 0 |  TUCSE  | TUCS0  |   TUCSS   |     IPCSE       | IPCS0 | IPCSS |
+	 *   +----------------------------------------------------------------+
+	 * 8 |   MSS   | HDRLEN | RSV | STA | TUCMD | DTYP |      PAYLEN      |
+	 *   +----------------------------------------------------------------+
+	 *   63      48 47    40 39 36 35 32 31   24 23  20 19                0
+	 *
+	 * Extended Data Descriptor (DTYP=0x1)
+	 *   +----------------------------------------------------------------+
+	 * 0 |                     Buffer Address [63:0]                      |
+	 *   +----------------------------------------------------------------+
+	 * 8 | VLAN tag |  POPTS  | Rsvd | Status | Command | DTYP |  DTALEN  |
+	 *   +----------------------------------------------------------------+
+	 *   63       48 47     40 39  36 35    32 31     24 23  20 19        0
+	 */
+	pr_info("Tl[desc]     [address 63:0  ] [SpeCssSCmCsLen] [bi->dma       ] leng  ntw timestamp        bi->skb <-- Legacy format\n");
+	pr_info("Tc[desc]     [Ce CoCsIpceCoS] [MssHlRSCm0Plen] [bi->dma       ] leng  ntw timestamp        bi->skb <-- Ext Context format\n");
+	pr_info("Td[desc]     [address 63:0  ] [VlaPoRSCm1Dlen] [bi->dma       ] leng  ntw timestamp        bi->skb <-- Ext Data format\n");
+	for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
+		const char *next_desc;
+		tx_desc = E1000_TX_DESC(*tx_ring, i);
+		buffer_info = &tx_ring->buffer_info[i];
+		u0 = (struct my_u0 *)tx_desc;
+		if (i == tx_ring->next_to_use && i == tx_ring->next_to_clean)
+			next_desc = " NTC/U";
+		else if (i == tx_ring->next_to_use)
+			next_desc = " NTU";
+		else if (i == tx_ring->next_to_clean)
+			next_desc = " NTC";
+		else
+			next_desc = "";
+		pr_info("T%c[0x%03X]    %016llX %016llX %016llX %04X  %3X %016llX %p%s\n",
+			(!(le64_to_cpu(u0->b) & BIT(29)) ? 'l' :
+			 ((le64_to_cpu(u0->b) & BIT(20)) ? 'd' : 'c')),
+			i,
+			(unsigned long long)le64_to_cpu(u0->a),
+			(unsigned long long)le64_to_cpu(u0->b),
+			(unsigned long long)buffer_info->dma,
+			buffer_info->length, buffer_info->next_to_watch,
+			(unsigned long long)buffer_info->time_stamp,
+			buffer_info->skb, next_desc);
+
+		if (netif_msg_pktdata(adapter) && buffer_info->skb)
+			print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS,
+				       16, 1, buffer_info->skb->data,
+				       buffer_info->skb->len, true);
+	}
+
+	/* Print Rx Ring Summary */
+rx_ring_summary:
+	dev_info(&adapter->pdev->dev, "Rx Ring Summary\n");
+	pr_info("Queue [NTU] [NTC]\n");
+	pr_info(" %5d %5X %5X\n",
+		0, rx_ring->next_to_use, rx_ring->next_to_clean);
+
+	/* Print Rx Ring */
+	if (!netif_msg_rx_status(adapter))
+		return;
+
+	dev_info(&adapter->pdev->dev, "Rx Ring Dump\n");
+	switch (adapter->rx_ps_pages) {
+	case 1:
+	case 2:
+	case 3:
+		/* [Extended] Packet Split Receive Descriptor Format
+		 *
+		 *    +-----------------------------------------------------+
+		 *  0 |                Buffer Address 0 [63:0]              |
+		 *    +-----------------------------------------------------+
+		 *  8 |                Buffer Address 1 [63:0]              |
+		 *    +-----------------------------------------------------+
+		 * 16 |                Buffer Address 2 [63:0]              |
+		 *    +-----------------------------------------------------+
+		 * 24 |                Buffer Address 3 [63:0]              |
+		 *    +-----------------------------------------------------+
+		 */
+		pr_info("R  [desc]      [buffer 0 63:0 ] [buffer 1 63:0 ] [buffer 2 63:0 ] [buffer 3 63:0 ] [bi->dma       ] [bi->skb] <-- Ext Pkt Split format\n");
+		/* [Extended] Receive Descriptor (Write-Back) Format
+		 *
+		 *   63       48 47    32 31     13 12    8 7    4 3        0
+		 *   +------------------------------------------------------+
+		 * 0 | Packet   | IP     |  Rsvd   | MRQ   | Rsvd | MRQ RSS |
+		 *   | Checksum | Ident  |         | Queue |      |  Type   |
+		 *   +------------------------------------------------------+
+		 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
+		 *   +------------------------------------------------------+
+		 *   63       48 47    32 31            20 19               0
+		 */
+		pr_info("RWB[desc]      [ck ipid mrqhsh] [vl   l0 ee  es] [ l3  l2  l1 hs] [reserved      ] ---------------- [bi->skb] <-- Ext Rx Write-Back format\n");
+		for (i = 0; i < rx_ring->count; i++) {
+			const char *next_desc;
+			buffer_info = &rx_ring->buffer_info[i];
+			rx_desc_ps = E1000_RX_DESC_PS(*rx_ring, i);
+			u1 = (struct my_u1 *)rx_desc_ps;
+			staterr =
+			    le32_to_cpu(rx_desc_ps->wb.middle.status_error);
+
+			if (i == rx_ring->next_to_use)
+				next_desc = " NTU";
+			else if (i == rx_ring->next_to_clean)
+				next_desc = " NTC";
+			else
+				next_desc = "";
+
+			if (staterr & E1000_RXD_STAT_DD) {
+				/* Descriptor Done */
+				pr_info("%s[0x%03X]     %016llX %016llX %016llX %016llX ---------------- %p%s\n",
+					"RWB", i,
+					(unsigned long long)le64_to_cpu(u1->a),
+					(unsigned long long)le64_to_cpu(u1->b),
+					(unsigned long long)le64_to_cpu(u1->c),
+					(unsigned long long)le64_to_cpu(u1->d),
+					buffer_info->skb, next_desc);
+			} else {
+				pr_info("%s[0x%03X]     %016llX %016llX %016llX %016llX %016llX %p%s\n",
+					"R  ", i,
+					(unsigned long long)le64_to_cpu(u1->a),
+					(unsigned long long)le64_to_cpu(u1->b),
+					(unsigned long long)le64_to_cpu(u1->c),
+					(unsigned long long)le64_to_cpu(u1->d),
+					(unsigned long long)buffer_info->dma,
+					buffer_info->skb, next_desc);
+
+				if (netif_msg_pktdata(adapter))
+					e1000e_dump_ps_pages(adapter,
+							     buffer_info);
+			}
+		}
+		break;
+	default:
+	case 0:
+		/* Extended Receive Descriptor (Read) Format
+		 *
+		 *   +-----------------------------------------------------+
+		 * 0 |                Buffer Address [63:0]                |
+		 *   +-----------------------------------------------------+
+		 * 8 |                      Reserved                       |
+		 *   +-----------------------------------------------------+
+		 */
+		pr_info("R  [desc]      [buf addr 63:0 ] [reserved 63:0 ] [bi->dma       ] [bi->skb] <-- Ext (Read) format\n");
+		/* Extended Receive Descriptor (Write-Back) Format
+		 *
+		 *   63       48 47    32 31    24 23            4 3        0
+		 *   +------------------------------------------------------+
+		 *   |     RSS Hash      |        |               |         |
+		 * 0 +-------------------+  Rsvd  |   Reserved    | MRQ RSS |
+		 *   | Packet   | IP     |        |               |  Type   |
+		 *   | Checksum | Ident  |        |               |         |
+		 *   +------------------------------------------------------+
+		 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
+		 *   +------------------------------------------------------+
+		 *   63       48 47    32 31            20 19               0
+		 */
+		pr_info("RWB[desc]      [cs ipid    mrq] [vt   ln xe  xs] [bi->skb] <-- Ext (Write-Back) format\n");
+
+		for (i = 0; i < rx_ring->count; i++) {
+			const char *next_desc;
+
+			buffer_info = &rx_ring->buffer_info[i];
+			rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+			u1 = (struct my_u1 *)rx_desc;
+			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+
+			if (i == rx_ring->next_to_use)
+				next_desc = " NTU";
+			else if (i == rx_ring->next_to_clean)
+				next_desc = " NTC";
+			else
+				next_desc = "";
+
+			if (staterr & E1000_RXD_STAT_DD) {
+				/* Descriptor Done */
+				pr_info("%s[0x%03X]     %016llX %016llX ---------------- %p%s\n",
+					"RWB", i,
+					(unsigned long long)le64_to_cpu(u1->a),
+					(unsigned long long)le64_to_cpu(u1->b),
+					buffer_info->skb, next_desc);
+			} else {
+				pr_info("%s[0x%03X]     %016llX %016llX %016llX %p%s\n",
+					"R  ", i,
+					(unsigned long long)le64_to_cpu(u1->a),
+					(unsigned long long)le64_to_cpu(u1->b),
+					(unsigned long long)buffer_info->dma,
+					buffer_info->skb, next_desc);
+
+				if (netif_msg_pktdata(adapter) &&
+				    buffer_info->skb)
+					print_hex_dump(KERN_INFO, "",
+						       DUMP_PREFIX_ADDRESS, 16,
+						       1,
+						       buffer_info->skb->data,
+						       adapter->rx_buffer_len,
+						       true);
+			}
+		}
+	}
+}
+
+/**
+ * e1000_desc_unused - calculate if we have unused descriptors
+ * @ring: pointer to ring struct to perform calculation on
+ **/
+static int e1000_desc_unused(struct e1000_ring *ring)
+{
+	if (ring->next_to_clean > ring->next_to_use)
+		return ring->next_to_clean - ring->next_to_use - 1;
+
+	return ring->count + ring->next_to_clean - ring->next_to_use - 1;
+}
+
+/**
+ * e1000e_systim_to_hwtstamp - convert system time value to hw time stamp
+ * @adapter: board private structure
+ * @hwtstamps: time stamp structure to update
+ * @systim: unsigned 64bit system time value.
+ *
+ * Convert the system time value stored in the RX/TXSTMP registers into a
+ * hwtstamp which can be used by the upper level time stamping functions.
+ *
+ * The 'systim_lock' spinlock is used to protect the consistency of the
+ * system time value. This is needed because reading the 64 bit time
+ * value involves reading two 32 bit registers. The first read latches the
+ * value.
+ **/
+static void e1000e_systim_to_hwtstamp(struct e1000_adapter *adapter,
+				      struct skb_shared_hwtstamps *hwtstamps,
+				      u64 systim)
+{
+	u64 ns;
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+	ns = timecounter_cyc2time(&adapter->tc, systim);
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	memset(hwtstamps, 0, sizeof(*hwtstamps));
+	hwtstamps->hwtstamp = ns_to_ktime(ns);
+}
+
+/**
+ * e1000e_rx_hwtstamp - utility function which checks for Rx time stamp
+ * @adapter: board private structure
+ * @status: descriptor extended error and status field
+ * @skb: particular skb to include time stamp
+ *
+ * If the time stamp is valid, convert it into the timecounter ns value
+ * and store that result into the shhwtstamps structure which is passed
+ * up the network stack.
+ **/
+static void e1000e_rx_hwtstamp(struct e1000_adapter *adapter, u32 status,
+			       struct sk_buff *skb)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u64 rxstmp;
+
+	if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP) ||
+	    !(status & E1000_RXDEXT_STATERR_TST) ||
+	    !(er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
+		return;
+
+	/* The Rx time stamp registers contain the time stamp.  No other
+	 * received packet will be time stamped until the Rx time stamp
+	 * registers are read.  Because only one packet can be time stamped
+	 * at a time, the register values must belong to this packet and
+	 * therefore none of the other additional attributes need to be
+	 * compared.
+	 */
+	rxstmp = (u64)er32(RXSTMPL);
+	rxstmp |= (u64)er32(RXSTMPH) << 32;
+	e1000e_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), rxstmp);
+
+	adapter->flags2 &= ~FLAG2_CHECK_RX_HWTSTAMP;
+}
+
+/**
+ * e1000_receive_skb - helper function to handle Rx indications
+ * @adapter: board private structure
+ * @netdev: pointer to netdev struct
+ * @staterr: descriptor extended error and status field as written by hardware
+ * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
+ * @skb: pointer to sk_buff to be indicated to stack
+ **/
+static void e1000_receive_skb(struct e1000_adapter *adapter,
+			      struct net_device *netdev, struct sk_buff *skb,
+			      u32 staterr, __le16 vlan)
+{
+	u16 tag = le16_to_cpu(vlan);
+
+	e1000e_rx_hwtstamp(adapter, staterr, skb);
+
+	skb->protocol = eth_type_trans(skb, netdev);
+
+	if (staterr & E1000_RXD_STAT_VP)
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag);
+
+	napi_gro_receive(&adapter->napi, skb);
+}
+
+/**
+ * e1000_rx_checksum - Receive Checksum Offload
+ * @adapter: board private structure
+ * @status_err: receive descriptor status and error fields
+ * @skb: socket buffer with received data
+ **/
+static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
+			      struct sk_buff *skb)
+{
+	u16 status = (u16)status_err;
+	u8 errors = (u8)(status_err >> 24);
+
+	skb_checksum_none_assert(skb);
+
+	/* Rx checksum disabled */
+	if (!(adapter->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* Ignore Checksum bit is set */
+	if (status & E1000_RXD_STAT_IXSM)
+		return;
+
+	/* TCP/UDP checksum error bit or IP checksum error bit is set */
+	if (errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) {
+		/* let the stack verify checksum errors */
+		adapter->hw_csum_err++;
+		return;
+	}
+
+	/* TCP/UDP Checksum has not been calculated */
+	if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)))
+		return;
+
+	/* It must be a TCP or UDP packet with a valid checksum */
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	adapter->hw_csum_good++;
+}
+
+static void e1000e_update_rdt_wa(struct e1000_ring *rx_ring, unsigned int i)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+
+	__ew32_prepare(hw);
+	writel(i, rx_ring->tail);
+
+	if (unlikely(i != readl(rx_ring->tail))) {
+		u32 rctl = er32(RCTL);
+
+		ew32(RCTL, rctl & ~E1000_RCTL_EN);
+		e_err("ME firmware caused invalid RDT - resetting\n");
+		schedule_work(&adapter->reset_task);
+	}
+}
+
+static void e1000e_update_tdt_wa(struct e1000_ring *tx_ring, unsigned int i)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+
+	__ew32_prepare(hw);
+	writel(i, tx_ring->tail);
+
+	if (unlikely(i != readl(tx_ring->tail))) {
+		u32 tctl = er32(TCTL);
+
+		ew32(TCTL, tctl & ~E1000_TCTL_EN);
+		e_err("ME firmware caused invalid TDT - resetting\n");
+		schedule_work(&adapter->reset_task);
+	}
+}
+
+/**
+ * e1000_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: Rx descriptor ring
+ * @cleaned_count: number to reallocate
+ * @gfp: flags for allocation
+ **/
+static void e1000_alloc_rx_buffers(struct e1000_ring *rx_ring,
+				   int cleaned_count, gfp_t gfp)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	union e1000_rx_desc_extended *rx_desc;
+	struct e1000_buffer *buffer_info;
+	struct sk_buff *skb;
+	unsigned int i;
+	unsigned int bufsz = adapter->rx_buffer_len;
+
+	i = rx_ring->next_to_use;
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (cleaned_count--) {
+		skb = buffer_info->skb;
+		if (skb) {
+			skb_trim(skb, 0);
+			goto map_skb;
+		}
+
+		skb = __netdev_alloc_skb_ip_align(netdev, bufsz, gfp);
+		if (!skb) {
+			/* Better luck next round */
+			adapter->alloc_rx_buff_failed++;
+			break;
+		}
+
+		buffer_info->skb = skb;
+map_skb:
+		buffer_info->dma = dma_map_single(&pdev->dev, skb->data,
+						  adapter->rx_buffer_len,
+						  DMA_FROM_DEVICE);
+		if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+			dev_err(&pdev->dev, "Rx DMA map failed\n");
+			adapter->rx_dma_failed++;
+			break;
+		}
+
+		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
+
+		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
+			/* Force memory writes to complete before letting h/w
+			 * know there are new descriptors to fetch.  (Only
+			 * applicable for weak-ordered memory model archs,
+			 * such as IA-64).
+			 */
+			wmb();
+			if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+				e1000e_update_rdt_wa(rx_ring, i);
+			else
+				writel(i, rx_ring->tail);
+		}
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+		buffer_info = &rx_ring->buffer_info[i];
+	}
+
+	rx_ring->next_to_use = i;
+}
+
+/**
+ * e1000_alloc_rx_buffers_ps - Replace used receive buffers; packet split
+ * @rx_ring: Rx descriptor ring
+ * @cleaned_count: number to reallocate
+ * @gfp: flags for allocation
+ **/
+static void e1000_alloc_rx_buffers_ps(struct e1000_ring *rx_ring,
+				      int cleaned_count, gfp_t gfp)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	union e1000_rx_desc_packet_split *rx_desc;
+	struct e1000_buffer *buffer_info;
+	struct e1000_ps_page *ps_page;
+	struct sk_buff *skb;
+	unsigned int i, j;
+
+	i = rx_ring->next_to_use;
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (cleaned_count--) {
+		rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
+
+		for (j = 0; j < PS_PAGE_BUFFERS; j++) {
+			ps_page = &buffer_info->ps_pages[j];
+			if (j >= adapter->rx_ps_pages) {
+				/* all unused desc entries get hw null ptr */
+				rx_desc->read.buffer_addr[j + 1] =
+				    ~cpu_to_le64(0);
+				continue;
+			}
+			if (!ps_page->page) {
+				ps_page->page = alloc_page(gfp);
+				if (!ps_page->page) {
+					adapter->alloc_rx_buff_failed++;
+					goto no_buffers;
+				}
+				ps_page->dma = dma_map_page(&pdev->dev,
+							    ps_page->page,
+							    0, PAGE_SIZE,
+							    DMA_FROM_DEVICE);
+				if (dma_mapping_error(&pdev->dev,
+						      ps_page->dma)) {
+					dev_err(&adapter->pdev->dev,
+						"Rx DMA page map failed\n");
+					adapter->rx_dma_failed++;
+					goto no_buffers;
+				}
+			}
+			/* Refresh the desc even if buffer_addrs
+			 * didn't change because each write-back
+			 * erases this info.
+			 */
+			rx_desc->read.buffer_addr[j + 1] =
+			    cpu_to_le64(ps_page->dma);
+		}
+
+		skb = __netdev_alloc_skb_ip_align(netdev, adapter->rx_ps_bsize0,
+						  gfp);
+
+		if (!skb) {
+			adapter->alloc_rx_buff_failed++;
+			break;
+		}
+
+		buffer_info->skb = skb;
+		buffer_info->dma = dma_map_single(&pdev->dev, skb->data,
+						  adapter->rx_ps_bsize0,
+						  DMA_FROM_DEVICE);
+		if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+			dev_err(&pdev->dev, "Rx DMA map failed\n");
+			adapter->rx_dma_failed++;
+			/* cleanup skb */
+			dev_kfree_skb_any(skb);
+			buffer_info->skb = NULL;
+			break;
+		}
+
+		rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
+
+		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
+			/* Force memory writes to complete before letting h/w
+			 * know there are new descriptors to fetch.  (Only
+			 * applicable for weak-ordered memory model archs,
+			 * such as IA-64).
+			 */
+			wmb();
+			if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+				e1000e_update_rdt_wa(rx_ring, i << 1);
+			else
+				writel(i << 1, rx_ring->tail);
+		}
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+		buffer_info = &rx_ring->buffer_info[i];
+	}
+
+no_buffers:
+	rx_ring->next_to_use = i;
+}
+
+/**
+ * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers
+ * @rx_ring: Rx descriptor ring
+ * @cleaned_count: number of buffers to allocate this pass
+ * @gfp: flags for allocation
+ **/
+
+static void e1000_alloc_jumbo_rx_buffers(struct e1000_ring *rx_ring,
+					 int cleaned_count, gfp_t gfp)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	union e1000_rx_desc_extended *rx_desc;
+	struct e1000_buffer *buffer_info;
+	struct sk_buff *skb;
+	unsigned int i;
+	unsigned int bufsz = 256 - 16;	/* for skb_reserve */
+
+	i = rx_ring->next_to_use;
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (cleaned_count--) {
+		skb = buffer_info->skb;
+		if (skb) {
+			skb_trim(skb, 0);
+			goto check_page;
+		}
+
+		skb = __netdev_alloc_skb_ip_align(netdev, bufsz, gfp);
+		if (unlikely(!skb)) {
+			/* Better luck next round */
+			adapter->alloc_rx_buff_failed++;
+			break;
+		}
+
+		buffer_info->skb = skb;
+check_page:
+		/* allocate a new page if necessary */
+		if (!buffer_info->page) {
+			buffer_info->page = alloc_page(gfp);
+			if (unlikely(!buffer_info->page)) {
+				adapter->alloc_rx_buff_failed++;
+				break;
+			}
+		}
+
+		if (!buffer_info->dma) {
+			buffer_info->dma = dma_map_page(&pdev->dev,
+							buffer_info->page, 0,
+							PAGE_SIZE,
+							DMA_FROM_DEVICE);
+			if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+				adapter->alloc_rx_buff_failed++;
+				break;
+			}
+		}
+
+		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
+
+		if (unlikely(++i == rx_ring->count))
+			i = 0;
+		buffer_info = &rx_ring->buffer_info[i];
+	}
+
+	if (likely(rx_ring->next_to_use != i)) {
+		rx_ring->next_to_use = i;
+		if (unlikely(i-- == 0))
+			i = (rx_ring->count - 1);
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+			e1000e_update_rdt_wa(rx_ring, i);
+		else
+			writel(i, rx_ring->tail);
+	}
+}
+
+static inline void e1000_rx_hash(struct net_device *netdev, __le32 rss,
+				 struct sk_buff *skb)
+{
+	if (netdev->features & NETIF_F_RXHASH)
+		skb_set_hash(skb, le32_to_cpu(rss), PKT_HASH_TYPE_L3);
+}
+
+/**
+ * e1000_clean_rx_irq - Send received data up the network stack
+ * @rx_ring: Rx descriptor ring
+ * @work_done: output parameter for indicating completed work
+ * @work_to_do: how many packets we can clean
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ **/
+static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
+			       int work_to_do)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	union e1000_rx_desc_extended *rx_desc, *next_rxd;
+	struct e1000_buffer *buffer_info, *next_buffer;
+	u32 length, staterr;
+	unsigned int i;
+	int cleaned_count = 0;
+	bool cleaned = false;
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+
+	i = rx_ring->next_to_clean;
+	rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (staterr & E1000_RXD_STAT_DD) {
+		struct sk_buff *skb;
+
+		if (*work_done >= work_to_do)
+			break;
+		(*work_done)++;
+		dma_rmb();	/* read descriptor and rx_buffer_info after status DD */
+
+		skb = buffer_info->skb;
+		buffer_info->skb = NULL;
+
+		prefetch(skb->data - NET_IP_ALIGN);
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+		next_rxd = E1000_RX_DESC_EXT(*rx_ring, i);
+		prefetch(next_rxd);
+
+		next_buffer = &rx_ring->buffer_info[i];
+
+		cleaned = true;
+		cleaned_count++;
+		dma_unmap_single(&pdev->dev, buffer_info->dma,
+				 adapter->rx_buffer_len, DMA_FROM_DEVICE);
+		buffer_info->dma = 0;
+
+		length = le16_to_cpu(rx_desc->wb.upper.length);
+
+		/* !EOP means multiple descriptors were used to store a single
+		 * packet, if that's the case we need to toss it.  In fact, we
+		 * need to toss every packet with the EOP bit clear and the
+		 * next frame that _does_ have the EOP bit set, as it is by
+		 * definition only a frame fragment
+		 */
+		if (unlikely(!(staterr & E1000_RXD_STAT_EOP)))
+			adapter->flags2 |= FLAG2_IS_DISCARDING;
+
+		if (adapter->flags2 & FLAG2_IS_DISCARDING) {
+			/* All receives must fit into a single buffer */
+			e_dbg("Receive packet consumed multiple buffers\n");
+			/* recycle */
+			buffer_info->skb = skb;
+			if (staterr & E1000_RXD_STAT_EOP)
+				adapter->flags2 &= ~FLAG2_IS_DISCARDING;
+			goto next_desc;
+		}
+
+		if (unlikely((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) &&
+			     !(netdev->features & NETIF_F_RXALL))) {
+			/* recycle */
+			buffer_info->skb = skb;
+			goto next_desc;
+		}
+
+		/* adjust length to remove Ethernet CRC */
+		if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) {
+			/* If configured to store CRC, don't subtract FCS,
+			 * but keep the FCS bytes out of the total_rx_bytes
+			 * counter
+			 */
+			if (netdev->features & NETIF_F_RXFCS)
+				total_rx_bytes -= 4;
+			else
+				length -= 4;
+		}
+
+		total_rx_bytes += length;
+		total_rx_packets++;
+
+		/* code added for copybreak, this should improve
+		 * performance for small packets with large amounts
+		 * of reassembly being done in the stack
+		 */
+		if (length < copybreak) {
+			struct sk_buff *new_skb =
+				napi_alloc_skb(&adapter->napi, length);
+			if (new_skb) {
+				skb_copy_to_linear_data_offset(new_skb,
+							       -NET_IP_ALIGN,
+							       (skb->data -
+								NET_IP_ALIGN),
+							       (length +
+								NET_IP_ALIGN));
+				/* save the skb in buffer_info as good */
+				buffer_info->skb = skb;
+				skb = new_skb;
+			}
+			/* else just continue with the old one */
+		}
+		/* end copybreak code */
+		skb_put(skb, length);
+
+		/* Receive Checksum Offload */
+		e1000_rx_checksum(adapter, staterr, skb);
+
+		e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb);
+
+		e1000_receive_skb(adapter, netdev, skb, staterr,
+				  rx_desc->wb.upper.vlan);
+
+next_desc:
+		rx_desc->wb.upper.status_error &= cpu_to_le32(~0xFF);
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= E1000_RX_BUFFER_WRITE) {
+			adapter->alloc_rx_buf(rx_ring, cleaned_count,
+					      GFP_ATOMIC);
+			cleaned_count = 0;
+		}
+
+		/* use prefetched values */
+		rx_desc = next_rxd;
+		buffer_info = next_buffer;
+
+		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+	}
+	rx_ring->next_to_clean = i;
+
+	cleaned_count = e1000_desc_unused(rx_ring);
+	if (cleaned_count)
+		adapter->alloc_rx_buf(rx_ring, cleaned_count, GFP_ATOMIC);
+
+	adapter->total_rx_bytes += total_rx_bytes;
+	adapter->total_rx_packets += total_rx_packets;
+	return cleaned;
+}
+
+static void e1000_put_txbuf(struct e1000_ring *tx_ring,
+			    struct e1000_buffer *buffer_info,
+			    bool drop)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+
+	if (buffer_info->dma) {
+		if (buffer_info->mapped_as_page)
+			dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
+				       buffer_info->length, DMA_TO_DEVICE);
+		else
+			dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
+					 buffer_info->length, DMA_TO_DEVICE);
+		buffer_info->dma = 0;
+	}
+	if (buffer_info->skb) {
+		if (drop)
+			dev_kfree_skb_any(buffer_info->skb);
+		else
+			dev_consume_skb_any(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+	buffer_info->time_stamp = 0;
+}
+
+static void e1000_print_hw_hang(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work,
+						     struct e1000_adapter,
+						     print_hang_task);
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+	unsigned int i = tx_ring->next_to_clean;
+	unsigned int eop = tx_ring->buffer_info[i].next_to_watch;
+	struct e1000_tx_desc *eop_desc = E1000_TX_DESC(*tx_ring, eop);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 phy_status, phy_1000t_status, phy_ext_status;
+	u16 pci_status;
+
+	if (test_bit(__E1000_DOWN, &adapter->state))
+		return;
+
+	if (!adapter->tx_hang_recheck && (adapter->flags2 & FLAG2_DMA_BURST)) {
+		/* May be block on write-back, flush and detect again
+		 * flush pending descriptor writebacks to memory
+		 */
+		ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
+		/* execute the writes immediately */
+		e1e_flush();
+		/* Due to rare timing issues, write to TIDV again to ensure
+		 * the write is successful
+		 */
+		ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
+		/* execute the writes immediately */
+		e1e_flush();
+		adapter->tx_hang_recheck = true;
+		return;
+	}
+	adapter->tx_hang_recheck = false;
+
+	if (er32(TDH(0)) == er32(TDT(0))) {
+		e_dbg("false hang detected, ignoring\n");
+		return;
+	}
+
+	/* Real hang detected */
+	netif_stop_queue(netdev);
+
+	e1e_rphy(hw, MII_BMSR, &phy_status);
+	e1e_rphy(hw, MII_STAT1000, &phy_1000t_status);
+	e1e_rphy(hw, MII_ESTATUS, &phy_ext_status);
+
+	pci_read_config_word(adapter->pdev, PCI_STATUS, &pci_status);
+
+	/* detected Hardware unit hang */
+	e_err("Detected Hardware Unit Hang:\n"
+	      "  TDH                  <%x>\n"
+	      "  TDT                  <%x>\n"
+	      "  next_to_use          <%x>\n"
+	      "  next_to_clean        <%x>\n"
+	      "buffer_info[next_to_clean]:\n"
+	      "  time_stamp           <%lx>\n"
+	      "  next_to_watch        <%x>\n"
+	      "  jiffies              <%lx>\n"
+	      "  next_to_watch.status <%x>\n"
+	      "MAC Status             <%x>\n"
+	      "PHY Status             <%x>\n"
+	      "PHY 1000BASE-T Status  <%x>\n"
+	      "PHY Extended Status    <%x>\n"
+	      "PCI Status             <%x>\n",
+	      readl(tx_ring->head), readl(tx_ring->tail), tx_ring->next_to_use,
+	      tx_ring->next_to_clean, tx_ring->buffer_info[eop].time_stamp,
+	      eop, jiffies, eop_desc->upper.fields.status, er32(STATUS),
+	      phy_status, phy_1000t_status, phy_ext_status, pci_status);
+
+	e1000e_dump(adapter);
+
+	/* Suggest workaround for known h/w issue */
+	if ((hw->mac.type == e1000_pchlan) && (er32(CTRL) & E1000_CTRL_TFCE))
+		e_err("Try turning off Tx pause (flow control) via ethtool\n");
+}
+
+/**
+ * e1000e_tx_hwtstamp_work - check for Tx time stamp
+ * @work: pointer to work struct
+ *
+ * This work function polls the TSYNCTXCTL valid bit to determine when a
+ * timestamp has been taken for the current stored skb.  The timestamp must
+ * be for this skb because only one such packet is allowed in the queue.
+ */
+static void e1000e_tx_hwtstamp_work(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work, struct e1000_adapter,
+						     tx_hwtstamp_work);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID) {
+		struct sk_buff *skb = adapter->tx_hwtstamp_skb;
+		struct skb_shared_hwtstamps shhwtstamps;
+		u64 txstmp;
+
+		txstmp = er32(TXSTMPL);
+		txstmp |= (u64)er32(TXSTMPH) << 32;
+
+		e1000e_systim_to_hwtstamp(adapter, &shhwtstamps, txstmp);
+
+		/* Clear the global tx_hwtstamp_skb pointer and force writes
+		 * prior to notifying the stack of a Tx timestamp.
+		 */
+		adapter->tx_hwtstamp_skb = NULL;
+		wmb(); /* force write prior to skb_tstamp_tx */
+
+		skb_tstamp_tx(skb, &shhwtstamps);
+		dev_consume_skb_any(skb);
+	} else if (time_after(jiffies, adapter->tx_hwtstamp_start
+			      + adapter->tx_timeout_factor * HZ)) {
+		dev_kfree_skb_any(adapter->tx_hwtstamp_skb);
+		adapter->tx_hwtstamp_skb = NULL;
+		adapter->tx_hwtstamp_timeouts++;
+		e_warn("clearing Tx timestamp hang\n");
+	} else {
+		/* reschedule to check later */
+		schedule_work(&adapter->tx_hwtstamp_work);
+	}
+}
+
+/**
+ * e1000_clean_tx_irq - Reclaim resources after transmit completes
+ * @tx_ring: Tx descriptor ring
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ **/
+static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_tx_desc *tx_desc, *eop_desc;
+	struct e1000_buffer *buffer_info;
+	unsigned int i, eop;
+	unsigned int count = 0;
+	unsigned int total_tx_bytes = 0, total_tx_packets = 0;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
+
+	i = tx_ring->next_to_clean;
+	eop = tx_ring->buffer_info[i].next_to_watch;
+	eop_desc = E1000_TX_DESC(*tx_ring, eop);
+
+	while ((eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) &&
+	       (count < tx_ring->count)) {
+		bool cleaned = false;
+
+		dma_rmb();		/* read buffer_info after eop_desc */
+		for (; !cleaned; count++) {
+			tx_desc = E1000_TX_DESC(*tx_ring, i);
+			buffer_info = &tx_ring->buffer_info[i];
+			cleaned = (i == eop);
+
+			if (cleaned) {
+				total_tx_packets += buffer_info->segs;
+				total_tx_bytes += buffer_info->bytecount;
+				if (buffer_info->skb) {
+					bytes_compl += buffer_info->skb->len;
+					pkts_compl++;
+				}
+			}
+
+			e1000_put_txbuf(tx_ring, buffer_info, false);
+			tx_desc->upper.data = 0;
+
+			i++;
+			if (i == tx_ring->count)
+				i = 0;
+		}
+
+		if (i == tx_ring->next_to_use)
+			break;
+		eop = tx_ring->buffer_info[i].next_to_watch;
+		eop_desc = E1000_TX_DESC(*tx_ring, eop);
+	}
+
+	tx_ring->next_to_clean = i;
+
+	netdev_completed_queue(netdev, pkts_compl, bytes_compl);
+
+#define TX_WAKE_THRESHOLD 32
+	if (count && netif_carrier_ok(netdev) &&
+	    e1000_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+
+		if (netif_queue_stopped(netdev) &&
+		    !(test_bit(__E1000_DOWN, &adapter->state))) {
+			netif_wake_queue(netdev);
+			++adapter->restart_queue;
+		}
+	}
+
+	if (adapter->detect_tx_hung) {
+		/* Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i
+		 */
+		adapter->detect_tx_hung = false;
+		if (tx_ring->buffer_info[i].time_stamp &&
+		    time_after(jiffies, tx_ring->buffer_info[i].time_stamp
+			       + (adapter->tx_timeout_factor * HZ)) &&
+		    !(er32(STATUS) & E1000_STATUS_TXOFF))
+			schedule_work(&adapter->print_hang_task);
+		else
+			adapter->tx_hang_recheck = false;
+	}
+	adapter->total_tx_bytes += total_tx_bytes;
+	adapter->total_tx_packets += total_tx_packets;
+	return count < tx_ring->count;
+}
+
+/**
+ * e1000_clean_rx_irq_ps - Send received data up the network stack; packet split
+ * @rx_ring: Rx descriptor ring
+ * @work_done: output parameter for indicating completed work
+ * @work_to_do: how many packets we can clean
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ **/
+static bool e1000_clean_rx_irq_ps(struct e1000_ring *rx_ring, int *work_done,
+				  int work_to_do)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	union e1000_rx_desc_packet_split *rx_desc, *next_rxd;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_buffer *buffer_info, *next_buffer;
+	struct e1000_ps_page *ps_page;
+	struct sk_buff *skb;
+	unsigned int i, j;
+	u32 length, staterr;
+	int cleaned_count = 0;
+	bool cleaned = false;
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+
+	i = rx_ring->next_to_clean;
+	rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
+	staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (staterr & E1000_RXD_STAT_DD) {
+		if (*work_done >= work_to_do)
+			break;
+		(*work_done)++;
+		skb = buffer_info->skb;
+		dma_rmb();	/* read descriptor and rx_buffer_info after status DD */
+
+		/* in the packet split case this is header only */
+		prefetch(skb->data - NET_IP_ALIGN);
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+		next_rxd = E1000_RX_DESC_PS(*rx_ring, i);
+		prefetch(next_rxd);
+
+		next_buffer = &rx_ring->buffer_info[i];
+
+		cleaned = true;
+		cleaned_count++;
+		dma_unmap_single(&pdev->dev, buffer_info->dma,
+				 adapter->rx_ps_bsize0, DMA_FROM_DEVICE);
+		buffer_info->dma = 0;
+
+		/* see !EOP comment in other Rx routine */
+		if (!(staterr & E1000_RXD_STAT_EOP))
+			adapter->flags2 |= FLAG2_IS_DISCARDING;
+
+		if (adapter->flags2 & FLAG2_IS_DISCARDING) {
+			e_dbg("Packet Split buffers didn't pick up the full packet\n");
+			dev_kfree_skb_irq(skb);
+			if (staterr & E1000_RXD_STAT_EOP)
+				adapter->flags2 &= ~FLAG2_IS_DISCARDING;
+			goto next_desc;
+		}
+
+		if (unlikely((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) &&
+			     !(netdev->features & NETIF_F_RXALL))) {
+			dev_kfree_skb_irq(skb);
+			goto next_desc;
+		}
+
+		length = le16_to_cpu(rx_desc->wb.middle.length0);
+
+		if (!length) {
+			e_dbg("Last part of the packet spanning multiple descriptors\n");
+			dev_kfree_skb_irq(skb);
+			goto next_desc;
+		}
+
+		/* Good Receive */
+		skb_put(skb, length);
+
+		{
+			/* this looks ugly, but it seems compiler issues make
+			 * it more efficient than reusing j
+			 */
+			int l1 = le16_to_cpu(rx_desc->wb.upper.length[0]);
+
+			/* page alloc/put takes too long and effects small
+			 * packet throughput, so unsplit small packets and
+			 * save the alloc/put
+			 */
+			if (l1 && (l1 <= copybreak) &&
+			    ((length + l1) <= adapter->rx_ps_bsize0)) {
+				ps_page = &buffer_info->ps_pages[0];
+
+				dma_sync_single_for_cpu(&pdev->dev,
+							ps_page->dma,
+							PAGE_SIZE,
+							DMA_FROM_DEVICE);
+				memcpy(skb_tail_pointer(skb),
+				       page_address(ps_page->page), l1);
+				dma_sync_single_for_device(&pdev->dev,
+							   ps_page->dma,
+							   PAGE_SIZE,
+							   DMA_FROM_DEVICE);
+
+				/* remove the CRC */
+				if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) {
+					if (!(netdev->features & NETIF_F_RXFCS))
+						l1 -= 4;
+				}
+
+				skb_put(skb, l1);
+				goto copydone;
+			}	/* if */
+		}
+
+		for (j = 0; j < PS_PAGE_BUFFERS; j++) {
+			length = le16_to_cpu(rx_desc->wb.upper.length[j]);
+			if (!length)
+				break;
+
+			ps_page = &buffer_info->ps_pages[j];
+			dma_unmap_page(&pdev->dev, ps_page->dma, PAGE_SIZE,
+				       DMA_FROM_DEVICE);
+			ps_page->dma = 0;
+			skb_fill_page_desc(skb, j, ps_page->page, 0, length);
+			ps_page->page = NULL;
+			skb->len += length;
+			skb->data_len += length;
+			skb->truesize += PAGE_SIZE;
+		}
+
+		/* strip the ethernet crc, problem is we're using pages now so
+		 * this whole operation can get a little cpu intensive
+		 */
+		if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) {
+			if (!(netdev->features & NETIF_F_RXFCS))
+				pskb_trim(skb, skb->len - 4);
+		}
+
+copydone:
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		e1000_rx_checksum(adapter, staterr, skb);
+
+		e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb);
+
+		if (rx_desc->wb.upper.header_status &
+		    cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP))
+			adapter->rx_hdr_split++;
+
+		e1000_receive_skb(adapter, netdev, skb, staterr,
+				  rx_desc->wb.middle.vlan);
+
+next_desc:
+		rx_desc->wb.middle.status_error &= cpu_to_le32(~0xFF);
+		buffer_info->skb = NULL;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= E1000_RX_BUFFER_WRITE) {
+			adapter->alloc_rx_buf(rx_ring, cleaned_count,
+					      GFP_ATOMIC);
+			cleaned_count = 0;
+		}
+
+		/* use prefetched values */
+		rx_desc = next_rxd;
+		buffer_info = next_buffer;
+
+		staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
+	}
+	rx_ring->next_to_clean = i;
+
+	cleaned_count = e1000_desc_unused(rx_ring);
+	if (cleaned_count)
+		adapter->alloc_rx_buf(rx_ring, cleaned_count, GFP_ATOMIC);
+
+	adapter->total_rx_bytes += total_rx_bytes;
+	adapter->total_rx_packets += total_rx_packets;
+	return cleaned;
+}
+
+static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb,
+			       u16 length)
+{
+	bi->page = NULL;
+	skb->len += length;
+	skb->data_len += length;
+	skb->truesize += PAGE_SIZE;
+}
+
+/**
+ * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy
+ * @rx_ring: Rx descriptor ring
+ * @work_done: output parameter for indicating completed work
+ * @work_to_do: how many packets we can clean
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ **/
+static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done,
+				     int work_to_do)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	union e1000_rx_desc_extended *rx_desc, *next_rxd;
+	struct e1000_buffer *buffer_info, *next_buffer;
+	u32 length, staterr;
+	unsigned int i;
+	int cleaned_count = 0;
+	bool cleaned = false;
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	struct skb_shared_info *shinfo;
+
+	i = rx_ring->next_to_clean;
+	rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while (staterr & E1000_RXD_STAT_DD) {
+		struct sk_buff *skb;
+
+		if (*work_done >= work_to_do)
+			break;
+		(*work_done)++;
+		dma_rmb();	/* read descriptor and rx_buffer_info after status DD */
+
+		skb = buffer_info->skb;
+		buffer_info->skb = NULL;
+
+		++i;
+		if (i == rx_ring->count)
+			i = 0;
+		next_rxd = E1000_RX_DESC_EXT(*rx_ring, i);
+		prefetch(next_rxd);
+
+		next_buffer = &rx_ring->buffer_info[i];
+
+		cleaned = true;
+		cleaned_count++;
+		dma_unmap_page(&pdev->dev, buffer_info->dma, PAGE_SIZE,
+			       DMA_FROM_DEVICE);
+		buffer_info->dma = 0;
+
+		length = le16_to_cpu(rx_desc->wb.upper.length);
+
+		/* errors is only valid for DD + EOP descriptors */
+		if (unlikely((staterr & E1000_RXD_STAT_EOP) &&
+			     ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) &&
+			      !(netdev->features & NETIF_F_RXALL)))) {
+			/* recycle both page and skb */
+			buffer_info->skb = skb;
+			/* an error means any chain goes out the window too */
+			if (rx_ring->rx_skb_top)
+				dev_kfree_skb_irq(rx_ring->rx_skb_top);
+			rx_ring->rx_skb_top = NULL;
+			goto next_desc;
+		}
+#define rxtop (rx_ring->rx_skb_top)
+		if (!(staterr & E1000_RXD_STAT_EOP)) {
+			/* this descriptor is only the beginning (or middle) */
+			if (!rxtop) {
+				/* this is the beginning of a chain */
+				rxtop = skb;
+				skb_fill_page_desc(rxtop, 0, buffer_info->page,
+						   0, length);
+			} else {
+				/* this is the middle of a chain */
+				shinfo = skb_shinfo(rxtop);
+				skb_fill_page_desc(rxtop, shinfo->nr_frags,
+						   buffer_info->page, 0,
+						   length);
+				/* re-use the skb, only consumed the page */
+				buffer_info->skb = skb;
+			}
+			e1000_consume_page(buffer_info, rxtop, length);
+			goto next_desc;
+		} else {
+			if (rxtop) {
+				/* end of the chain */
+				shinfo = skb_shinfo(rxtop);
+				skb_fill_page_desc(rxtop, shinfo->nr_frags,
+						   buffer_info->page, 0,
+						   length);
+				/* re-use the current skb, we only consumed the
+				 * page
+				 */
+				buffer_info->skb = skb;
+				skb = rxtop;
+				rxtop = NULL;
+				e1000_consume_page(buffer_info, skb, length);
+			} else {
+				/* no chain, got EOP, this buf is the packet
+				 * copybreak to save the put_page/alloc_page
+				 */
+				if (length <= copybreak &&
+				    skb_tailroom(skb) >= length) {
+					memcpy(skb_tail_pointer(skb),
+					       page_address(buffer_info->page),
+					       length);
+					/* re-use the page, so don't erase
+					 * buffer_info->page
+					 */
+					skb_put(skb, length);
+				} else {
+					skb_fill_page_desc(skb, 0,
+							   buffer_info->page, 0,
+							   length);
+					e1000_consume_page(buffer_info, skb,
+							   length);
+				}
+			}
+		}
+
+		/* Receive Checksum Offload */
+		e1000_rx_checksum(adapter, staterr, skb);
+
+		e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb);
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		/* eth type trans needs skb->data to point to something */
+		if (!pskb_may_pull(skb, ETH_HLEN)) {
+			e_err("pskb_may_pull failed.\n");
+			dev_kfree_skb_irq(skb);
+			goto next_desc;
+		}
+
+		e1000_receive_skb(adapter, netdev, skb, staterr,
+				  rx_desc->wb.upper.vlan);
+
+next_desc:
+		rx_desc->wb.upper.status_error &= cpu_to_le32(~0xFF);
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
+			adapter->alloc_rx_buf(rx_ring, cleaned_count,
+					      GFP_ATOMIC);
+			cleaned_count = 0;
+		}
+
+		/* use prefetched values */
+		rx_desc = next_rxd;
+		buffer_info = next_buffer;
+
+		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+	}
+	rx_ring->next_to_clean = i;
+
+	cleaned_count = e1000_desc_unused(rx_ring);
+	if (cleaned_count)
+		adapter->alloc_rx_buf(rx_ring, cleaned_count, GFP_ATOMIC);
+
+	adapter->total_rx_bytes += total_rx_bytes;
+	adapter->total_rx_packets += total_rx_packets;
+	return cleaned;
+}
+
+/**
+ * e1000_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: Rx descriptor ring
+ **/
+static void e1000_clean_rx_ring(struct e1000_ring *rx_ring)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct e1000_buffer *buffer_info;
+	struct e1000_ps_page *ps_page;
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned int i, j;
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		buffer_info = &rx_ring->buffer_info[i];
+		if (buffer_info->dma) {
+			if (adapter->clean_rx == e1000_clean_rx_irq)
+				dma_unmap_single(&pdev->dev, buffer_info->dma,
+						 adapter->rx_buffer_len,
+						 DMA_FROM_DEVICE);
+			else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq)
+				dma_unmap_page(&pdev->dev, buffer_info->dma,
+					       PAGE_SIZE, DMA_FROM_DEVICE);
+			else if (adapter->clean_rx == e1000_clean_rx_irq_ps)
+				dma_unmap_single(&pdev->dev, buffer_info->dma,
+						 adapter->rx_ps_bsize0,
+						 DMA_FROM_DEVICE);
+			buffer_info->dma = 0;
+		}
+
+		if (buffer_info->page) {
+			put_page(buffer_info->page);
+			buffer_info->page = NULL;
+		}
+
+		if (buffer_info->skb) {
+			dev_kfree_skb(buffer_info->skb);
+			buffer_info->skb = NULL;
+		}
+
+		for (j = 0; j < PS_PAGE_BUFFERS; j++) {
+			ps_page = &buffer_info->ps_pages[j];
+			if (!ps_page->page)
+				break;
+			dma_unmap_page(&pdev->dev, ps_page->dma, PAGE_SIZE,
+				       DMA_FROM_DEVICE);
+			ps_page->dma = 0;
+			put_page(ps_page->page);
+			ps_page->page = NULL;
+		}
+	}
+
+	/* there also may be some cached data from a chained receive */
+	if (rx_ring->rx_skb_top) {
+		dev_kfree_skb(rx_ring->rx_skb_top);
+		rx_ring->rx_skb_top = NULL;
+	}
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+	adapter->flags2 &= ~FLAG2_IS_DISCARDING;
+}
+
+static void e1000e_downshift_workaround(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work,
+						     struct e1000_adapter,
+						     downshift_task);
+
+	if (test_bit(__E1000_DOWN, &adapter->state))
+		return;
+
+	e1000e_gig_downshift_workaround_ich8lan(&adapter->hw);
+}
+
+/**
+ * e1000_intr_msi - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 icr = er32(ICR);
+
+	/* read ICR disables interrupts using IAM */
+	if (icr & E1000_ICR_LSC) {
+		hw->mac.get_link_status = true;
+		/* ICH8 workaround-- Call gig speed drop workaround on cable
+		 * disconnect (LSC) before accessing any PHY registers
+		 */
+		if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) &&
+		    (!(er32(STATUS) & E1000_STATUS_LU)))
+			schedule_work(&adapter->downshift_task);
+
+		/* 80003ES2LAN workaround-- For packet buffer work-around on
+		 * link down event; disable receives here in the ISR and reset
+		 * adapter in watchdog
+		 */
+		if (netif_carrier_ok(netdev) &&
+		    adapter->flags & FLAG_RX_NEEDS_RESTART) {
+			/* disable receives */
+			u32 rctl = er32(RCTL);
+
+			ew32(RCTL, rctl & ~E1000_RCTL_EN);
+			adapter->flags |= FLAG_RESTART_NOW;
+		}
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__E1000_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	/* Reset on uncorrectable ECC error */
+	if ((icr & E1000_ICR_ECCER) && (hw->mac.type >= e1000_pch_lpt)) {
+		u32 pbeccsts = er32(PBECCSTS);
+
+		adapter->corr_errors +=
+		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+		adapter->uncorr_errors +=
+		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+
+		/* Do the reset outside of interrupt context */
+		schedule_work(&adapter->reset_task);
+
+		/* return immediately since reset is imminent */
+		return IRQ_HANDLED;
+	}
+
+	if (napi_schedule_prep(&adapter->napi)) {
+		adapter->total_tx_bytes = 0;
+		adapter->total_tx_packets = 0;
+		adapter->total_rx_bytes = 0;
+		adapter->total_rx_packets = 0;
+		__napi_schedule(&adapter->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * e1000_intr - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t e1000_intr(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl, icr = er32(ICR);
+
+	if (!icr || test_bit(__E1000_DOWN, &adapter->state))
+		return IRQ_NONE;	/* Not our interrupt */
+
+	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+	 * not set, then the adapter didn't send an interrupt
+	 */
+	if (!(icr & E1000_ICR_INT_ASSERTED))
+		return IRQ_NONE;
+
+	/* Interrupt Auto-Mask...upon reading ICR,
+	 * interrupts are masked.  No need for the
+	 * IMC write
+	 */
+
+	if (icr & E1000_ICR_LSC) {
+		hw->mac.get_link_status = true;
+		/* ICH8 workaround-- Call gig speed drop workaround on cable
+		 * disconnect (LSC) before accessing any PHY registers
+		 */
+		if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) &&
+		    (!(er32(STATUS) & E1000_STATUS_LU)))
+			schedule_work(&adapter->downshift_task);
+
+		/* 80003ES2LAN workaround--
+		 * For packet buffer work-around on link down event;
+		 * disable receives here in the ISR and
+		 * reset adapter in watchdog
+		 */
+		if (netif_carrier_ok(netdev) &&
+		    (adapter->flags & FLAG_RX_NEEDS_RESTART)) {
+			/* disable receives */
+			rctl = er32(RCTL);
+			ew32(RCTL, rctl & ~E1000_RCTL_EN);
+			adapter->flags |= FLAG_RESTART_NOW;
+		}
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__E1000_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	/* Reset on uncorrectable ECC error */
+	if ((icr & E1000_ICR_ECCER) && (hw->mac.type >= e1000_pch_lpt)) {
+		u32 pbeccsts = er32(PBECCSTS);
+
+		adapter->corr_errors +=
+		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+		adapter->uncorr_errors +=
+		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+
+		/* Do the reset outside of interrupt context */
+		schedule_work(&adapter->reset_task);
+
+		/* return immediately since reset is imminent */
+		return IRQ_HANDLED;
+	}
+
+	if (napi_schedule_prep(&adapter->napi)) {
+		adapter->total_tx_bytes = 0;
+		adapter->total_tx_packets = 0;
+		adapter->total_rx_bytes = 0;
+		adapter->total_rx_packets = 0;
+		__napi_schedule(&adapter->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 icr = er32(ICR);
+
+	if (icr & adapter->eiac_mask)
+		ew32(ICS, (icr & adapter->eiac_mask));
+
+	if (icr & E1000_ICR_LSC) {
+		hw->mac.get_link_status = true;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__E1000_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	if (!test_bit(__E1000_DOWN, &adapter->state))
+		ew32(IMS, E1000_IMS_OTHER | IMS_OTHER_MASK);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t e1000_intr_msix_tx(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+
+	adapter->total_tx_bytes = 0;
+	adapter->total_tx_packets = 0;
+
+	if (!e1000_clean_tx_irq(tx_ring))
+		/* Ring was not completely cleaned, so fire another interrupt */
+		ew32(ICS, tx_ring->ims_val);
+
+	if (!test_bit(__E1000_DOWN, &adapter->state))
+		ew32(IMS, adapter->tx_ring->ims_val);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t e1000_intr_msix_rx(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_ring *rx_ring = adapter->rx_ring;
+
+	/* Write the ITR value calculated at the end of the
+	 * previous interrupt.
+	 */
+	if (rx_ring->set_itr) {
+		u32 itr = rx_ring->itr_val ?
+			  1000000000 / (rx_ring->itr_val * 256) : 0;
+
+		writel(itr, rx_ring->itr_register);
+		rx_ring->set_itr = 0;
+	}
+
+	if (napi_schedule_prep(&adapter->napi)) {
+		adapter->total_rx_bytes = 0;
+		adapter->total_rx_packets = 0;
+		__napi_schedule(&adapter->napi);
+	}
+	return IRQ_HANDLED;
+}
+
+/**
+ * e1000_configure_msix - Configure MSI-X hardware
+ * @adapter: board private structure
+ *
+ * e1000_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ **/
+static void e1000_configure_msix(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_ring *rx_ring = adapter->rx_ring;
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+	int vector = 0;
+	u32 ctrl_ext, ivar = 0;
+
+	adapter->eiac_mask = 0;
+
+	/* Workaround issue with spurious interrupts on 82574 in MSI-X mode */
+	if (hw->mac.type == e1000_82574) {
+		u32 rfctl = er32(RFCTL);
+
+		rfctl |= E1000_RFCTL_ACK_DIS;
+		ew32(RFCTL, rfctl);
+	}
+
+	/* Configure Rx vector */
+	rx_ring->ims_val = E1000_IMS_RXQ0;
+	adapter->eiac_mask |= rx_ring->ims_val;
+	if (rx_ring->itr_val)
+		writel(1000000000 / (rx_ring->itr_val * 256),
+		       rx_ring->itr_register);
+	else
+		writel(1, rx_ring->itr_register);
+	ivar = E1000_IVAR_INT_ALLOC_VALID | vector;
+
+	/* Configure Tx vector */
+	tx_ring->ims_val = E1000_IMS_TXQ0;
+	vector++;
+	if (tx_ring->itr_val)
+		writel(1000000000 / (tx_ring->itr_val * 256),
+		       tx_ring->itr_register);
+	else
+		writel(1, tx_ring->itr_register);
+	adapter->eiac_mask |= tx_ring->ims_val;
+	ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 8);
+
+	/* set vector for Other Causes, e.g. link changes */
+	vector++;
+	ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 16);
+	if (rx_ring->itr_val)
+		writel(1000000000 / (rx_ring->itr_val * 256),
+		       hw->hw_addr + E1000_EITR_82574(vector));
+	else
+		writel(1, hw->hw_addr + E1000_EITR_82574(vector));
+
+	/* Cause Tx interrupts on every write back */
+	ivar |= BIT(31);
+
+	ew32(IVAR, ivar);
+
+	/* enable MSI-X PBA support */
+	ctrl_ext = er32(CTRL_EXT) & ~E1000_CTRL_EXT_IAME;
+	ctrl_ext |= E1000_CTRL_EXT_PBA_CLR | E1000_CTRL_EXT_EIAME;
+	ew32(CTRL_EXT, ctrl_ext);
+	e1e_flush();
+}
+
+void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter)
+{
+	if (adapter->msix_entries) {
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	} else if (adapter->flags & FLAG_MSI_ENABLED) {
+		pci_disable_msi(adapter->pdev);
+		adapter->flags &= ~FLAG_MSI_ENABLED;
+	}
+}
+
+/**
+ * e1000e_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: board private structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+void e1000e_set_interrupt_capability(struct e1000_adapter *adapter)
+{
+	int err;
+	int i;
+
+	switch (adapter->int_mode) {
+	case E1000E_INT_MODE_MSIX:
+		if (adapter->flags & FLAG_HAS_MSIX) {
+			adapter->num_vectors = 3; /* RxQ0, TxQ0 and other */
+			adapter->msix_entries = kcalloc(adapter->num_vectors,
+							sizeof(struct
+							       msix_entry),
+							GFP_KERNEL);
+			if (adapter->msix_entries) {
+				struct e1000_adapter *a = adapter;
+
+				for (i = 0; i < adapter->num_vectors; i++)
+					adapter->msix_entries[i].entry = i;
+
+				err = pci_enable_msix_range(a->pdev,
+							    a->msix_entries,
+							    a->num_vectors,
+							    a->num_vectors);
+				if (err > 0)
+					return;
+			}
+			/* MSI-X failed, so fall through and try MSI */
+			e_err("Failed to initialize MSI-X interrupts.  Falling back to MSI interrupts.\n");
+			e1000e_reset_interrupt_capability(adapter);
+		}
+		adapter->int_mode = E1000E_INT_MODE_MSI;
+		fallthrough;
+	case E1000E_INT_MODE_MSI:
+		if (!pci_enable_msi(adapter->pdev)) {
+			adapter->flags |= FLAG_MSI_ENABLED;
+		} else {
+			adapter->int_mode = E1000E_INT_MODE_LEGACY;
+			e_err("Failed to initialize MSI interrupts.  Falling back to legacy interrupts.\n");
+		}
+		fallthrough;
+	case E1000E_INT_MODE_LEGACY:
+		/* Don't do anything; this is the system default */
+		break;
+	}
+
+	/* store the number of vectors being used */
+	adapter->num_vectors = 1;
+}
+
+/**
+ * e1000_request_msix - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * e1000_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ **/
+static int e1000_request_msix(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err = 0, vector = 0;
+
+	if (strlen(netdev->name) < (IFNAMSIZ - 5))
+		snprintf(adapter->rx_ring->name,
+			 sizeof(adapter->rx_ring->name) - 1,
+			 "%.14s-rx-0", netdev->name);
+	else
+		memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  e1000_intr_msix_rx, 0, adapter->rx_ring->name,
+			  netdev);
+	if (err)
+		return err;
+	adapter->rx_ring->itr_register = adapter->hw.hw_addr +
+	    E1000_EITR_82574(vector);
+	adapter->rx_ring->itr_val = adapter->itr;
+	vector++;
+
+	if (strlen(netdev->name) < (IFNAMSIZ - 5))
+		snprintf(adapter->tx_ring->name,
+			 sizeof(adapter->tx_ring->name) - 1,
+			 "%.14s-tx-0", netdev->name);
+	else
+		memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ);
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  e1000_intr_msix_tx, 0, adapter->tx_ring->name,
+			  netdev);
+	if (err)
+		return err;
+	adapter->tx_ring->itr_register = adapter->hw.hw_addr +
+	    E1000_EITR_82574(vector);
+	adapter->tx_ring->itr_val = adapter->itr;
+	vector++;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  e1000_msix_other, 0, netdev->name, netdev);
+	if (err)
+		return err;
+
+	e1000_configure_msix(adapter);
+
+	return 0;
+}
+
+/**
+ * e1000_request_irq - initialize interrupts
+ * @adapter: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int e1000_request_irq(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	if (adapter->msix_entries) {
+		err = e1000_request_msix(adapter);
+		if (!err)
+			return err;
+		/* fall back to MSI */
+		e1000e_reset_interrupt_capability(adapter);
+		adapter->int_mode = E1000E_INT_MODE_MSI;
+		e1000e_set_interrupt_capability(adapter);
+	}
+	if (adapter->flags & FLAG_MSI_ENABLED) {
+		err = request_irq(adapter->pdev->irq, e1000_intr_msi, 0,
+				  netdev->name, netdev);
+		if (!err)
+			return err;
+
+		/* fall back to legacy interrupt */
+		e1000e_reset_interrupt_capability(adapter);
+		adapter->int_mode = E1000E_INT_MODE_LEGACY;
+	}
+
+	err = request_irq(adapter->pdev->irq, e1000_intr, IRQF_SHARED,
+			  netdev->name, netdev);
+	if (err)
+		e_err("Unable to allocate interrupt, Error: %d\n", err);
+
+	return err;
+}
+
+static void e1000_free_irq(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	if (adapter->msix_entries) {
+		int vector = 0;
+
+		free_irq(adapter->msix_entries[vector].vector, netdev);
+		vector++;
+
+		free_irq(adapter->msix_entries[vector].vector, netdev);
+		vector++;
+
+		/* Other Causes interrupt vector */
+		free_irq(adapter->msix_entries[vector].vector, netdev);
+		return;
+	}
+
+	free_irq(adapter->pdev->irq, netdev);
+}
+
+/**
+ * e1000_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void e1000_irq_disable(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	ew32(IMC, ~0);
+	if (adapter->msix_entries)
+		ew32(EIAC_82574, 0);
+	e1e_flush();
+
+	if (adapter->msix_entries) {
+		int i;
+
+		for (i = 0; i < adapter->num_vectors; i++)
+			synchronize_irq(adapter->msix_entries[i].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+/**
+ * e1000_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static void e1000_irq_enable(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
+		ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER |
+		     IMS_OTHER_MASK);
+	} else if (hw->mac.type >= e1000_pch_lpt) {
+		ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
+	} else {
+		ew32(IMS, IMS_ENABLE_MASK);
+	}
+	e1e_flush();
+}
+
+/**
+ * e1000e_get_hw_control - get control of the h/w from f/w
+ * @adapter: address of board private structure
+ *
+ * e1000e_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that
+ * the driver is loaded. For AMT version (only with 82573)
+ * of the f/w this means that the network i/f is open.
+ **/
+void e1000e_get_hw_control(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+	u32 swsm;
+
+	/* Let firmware know the driver has taken over */
+	if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) {
+		swsm = er32(SWSM);
+		ew32(SWSM, swsm | E1000_SWSM_DRV_LOAD);
+	} else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) {
+		ctrl_ext = er32(CTRL_EXT);
+		ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
+	}
+}
+
+/**
+ * e1000e_release_hw_control - release control of the h/w to f/w
+ * @adapter: address of board private structure
+ *
+ * e1000e_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that the
+ * driver is no longer loaded. For AMT version (only with 82573) i
+ * of the f/w this means that the network i/f is closed.
+ *
+ **/
+void e1000e_release_hw_control(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+	u32 swsm;
+
+	/* Let firmware taken over control of h/w */
+	if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) {
+		swsm = er32(SWSM);
+		ew32(SWSM, swsm & ~E1000_SWSM_DRV_LOAD);
+	} else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) {
+		ctrl_ext = er32(CTRL_EXT);
+		ew32(CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
+	}
+}
+
+/**
+ * e1000_alloc_ring_dma - allocate memory for a ring structure
+ * @adapter: board private structure
+ * @ring: ring struct for which to allocate dma
+ **/
+static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
+				struct e1000_ring *ring)
+{
+	struct pci_dev *pdev = adapter->pdev;
+
+	ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
+					GFP_KERNEL);
+	if (!ring->desc)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * e1000e_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: Tx descriptor ring
+ *
+ * Return 0 on success, negative on failure
+ **/
+int e1000e_setup_tx_resources(struct e1000_ring *tx_ring)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	int err = -ENOMEM, size;
+
+	size = sizeof(struct e1000_buffer) * tx_ring->count;
+	tx_ring->buffer_info = vzalloc(size);
+	if (!tx_ring->buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	err = e1000_alloc_ring_dma(adapter, tx_ring);
+	if (err)
+		goto err;
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	return 0;
+err:
+	vfree(tx_ring->buffer_info);
+	e_err("Unable to allocate memory for the transmit descriptor ring\n");
+	return err;
+}
+
+/**
+ * e1000e_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring: Rx descriptor ring
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int e1000e_setup_rx_resources(struct e1000_ring *rx_ring)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct e1000_buffer *buffer_info;
+	int i, size, desc_len, err = -ENOMEM;
+
+	size = sizeof(struct e1000_buffer) * rx_ring->count;
+	rx_ring->buffer_info = vzalloc(size);
+	if (!rx_ring->buffer_info)
+		goto err;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		buffer_info = &rx_ring->buffer_info[i];
+		buffer_info->ps_pages = kcalloc(PS_PAGE_BUFFERS,
+						sizeof(struct e1000_ps_page),
+						GFP_KERNEL);
+		if (!buffer_info->ps_pages)
+			goto err_pages;
+	}
+
+	desc_len = sizeof(union e1000_rx_desc_packet_split);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * desc_len;
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	err = e1000_alloc_ring_dma(adapter, rx_ring);
+	if (err)
+		goto err_pages;
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+	rx_ring->rx_skb_top = NULL;
+
+	return 0;
+
+err_pages:
+	for (i = 0; i < rx_ring->count; i++) {
+		buffer_info = &rx_ring->buffer_info[i];
+		kfree(buffer_info->ps_pages);
+	}
+err:
+	vfree(rx_ring->buffer_info);
+	e_err("Unable to allocate memory for the receive descriptor ring\n");
+	return err;
+}
+
+/**
+ * e1000_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: Tx descriptor ring
+ **/
+static void e1000_clean_tx_ring(struct e1000_ring *tx_ring)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	struct e1000_buffer *buffer_info;
+	unsigned long size;
+	unsigned int i;
+
+	for (i = 0; i < tx_ring->count; i++) {
+		buffer_info = &tx_ring->buffer_info[i];
+		e1000_put_txbuf(tx_ring, buffer_info, false);
+	}
+
+	netdev_reset_queue(adapter->netdev);
+	size = sizeof(struct e1000_buffer) * tx_ring->count;
+	memset(tx_ring->buffer_info, 0, size);
+
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ * e1000e_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring
+ *
+ * Free all transmit software resources
+ **/
+void e1000e_free_tx_resources(struct e1000_ring *tx_ring)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	struct pci_dev *pdev = adapter->pdev;
+
+	e1000_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->buffer_info);
+	tx_ring->buffer_info = NULL;
+
+	dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
+			  tx_ring->dma);
+	tx_ring->desc = NULL;
+}
+
+/**
+ * e1000e_free_rx_resources - Free Rx Resources
+ * @rx_ring: Rx descriptor ring
+ *
+ * Free all receive software resources
+ **/
+void e1000e_free_rx_resources(struct e1000_ring *rx_ring)
+{
+	struct e1000_adapter *adapter = rx_ring->adapter;
+	struct pci_dev *pdev = adapter->pdev;
+	int i;
+
+	e1000_clean_rx_ring(rx_ring);
+
+	for (i = 0; i < rx_ring->count; i++)
+		kfree(rx_ring->buffer_info[i].ps_pages);
+
+	vfree(rx_ring->buffer_info);
+	rx_ring->buffer_info = NULL;
+
+	dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
+			  rx_ring->dma);
+	rx_ring->desc = NULL;
+}
+
+/**
+ * e1000_update_itr - update the dynamic ITR value based on statistics
+ * @itr_setting: current adapter->itr
+ * @packets: the number of packets during this measurement interval
+ * @bytes: the number of bytes during this measurement interval
+ *
+ *      Stores a new ITR value based on packets and byte
+ *      counts during the last interrupt.  The advantage of per interrupt
+ *      computation is faster updates and more accurate ITR for the current
+ *      traffic pattern.  Constants in this function were computed
+ *      based on theoretical maximum wire speed and thresholds were set based
+ *      on testing data as well as attempting to minimize response time
+ *      while increasing bulk throughput.  This functionality is controlled
+ *      by the InterruptThrottleRate module parameter.
+ **/
+static unsigned int e1000_update_itr(u16 itr_setting, int packets, int bytes)
+{
+	unsigned int retval = itr_setting;
+
+	if (packets == 0)
+		return itr_setting;
+
+	switch (itr_setting) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes / packets > 8000)
+			retval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			retval = low_latency;
+		break;
+	case low_latency:	/* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes / packets > 8000)
+				retval = bulk_latency;
+			else if ((packets < 10) || ((bytes / packets) > 1200))
+				retval = bulk_latency;
+			else if ((packets > 35))
+				retval = lowest_latency;
+		} else if (bytes / packets > 2000) {
+			retval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			retval = lowest_latency;
+		}
+		break;
+	case bulk_latency:	/* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				retval = low_latency;
+		} else if (bytes < 6000) {
+			retval = low_latency;
+		}
+		break;
+	}
+
+	return retval;
+}
+
+static void e1000_set_itr(struct e1000_adapter *adapter)
+{
+	u16 current_itr;
+	u32 new_itr = adapter->itr;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	if (adapter->link_speed != SPEED_1000) {
+		new_itr = 4000;
+		goto set_itr_now;
+	}
+
+	if (adapter->flags2 & FLAG2_DISABLE_AIM) {
+		new_itr = 0;
+		goto set_itr_now;
+	}
+
+	adapter->tx_itr = e1000_update_itr(adapter->tx_itr,
+					   adapter->total_tx_packets,
+					   adapter->total_tx_bytes);
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (adapter->itr_setting == 3 && adapter->tx_itr == lowest_latency)
+		adapter->tx_itr = low_latency;
+
+	adapter->rx_itr = e1000_update_itr(adapter->rx_itr,
+					   adapter->total_rx_packets,
+					   adapter->total_rx_bytes);
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (adapter->itr_setting == 3 && adapter->rx_itr == lowest_latency)
+		adapter->rx_itr = low_latency;
+
+	current_itr = max(adapter->rx_itr, adapter->tx_itr);
+
+	/* counts and packets in update_itr are dependent on these numbers */
+	switch (current_itr) {
+	case lowest_latency:
+		new_itr = 70000;
+		break;
+	case low_latency:
+		new_itr = 20000;	/* aka hwitr = ~200 */
+		break;
+	case bulk_latency:
+		new_itr = 4000;
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != adapter->itr) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > adapter->itr ?
+		    min(adapter->itr + (new_itr >> 2), new_itr) : new_itr;
+		adapter->itr = new_itr;
+		adapter->rx_ring->itr_val = new_itr;
+		if (adapter->msix_entries)
+			adapter->rx_ring->set_itr = 1;
+		else
+			e1000e_write_itr(adapter, new_itr);
+	}
+}
+
+/**
+ * e1000e_write_itr - write the ITR value to the appropriate registers
+ * @adapter: address of board private structure
+ * @itr: new ITR value to program
+ *
+ * e1000e_write_itr determines if the adapter is in MSI-X mode
+ * and, if so, writes the EITR registers with the ITR value.
+ * Otherwise, it writes the ITR value into the ITR register.
+ **/
+void e1000e_write_itr(struct e1000_adapter *adapter, u32 itr)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 new_itr = itr ? 1000000000 / (itr * 256) : 0;
+
+	if (adapter->msix_entries) {
+		int vector;
+
+		for (vector = 0; vector < adapter->num_vectors; vector++)
+			writel(new_itr, hw->hw_addr + E1000_EITR_82574(vector));
+	} else {
+		ew32(ITR, new_itr);
+	}
+}
+
+/**
+ * e1000_alloc_queues - Allocate memory for all rings
+ * @adapter: board private structure to initialize
+ **/
+static int e1000_alloc_queues(struct e1000_adapter *adapter)
+{
+	int size = sizeof(struct e1000_ring);
+
+	adapter->tx_ring = kzalloc(size, GFP_KERNEL);
+	if (!adapter->tx_ring)
+		goto err;
+	adapter->tx_ring->count = adapter->tx_ring_count;
+	adapter->tx_ring->adapter = adapter;
+
+	adapter->rx_ring = kzalloc(size, GFP_KERNEL);
+	if (!adapter->rx_ring)
+		goto err;
+	adapter->rx_ring->count = adapter->rx_ring_count;
+	adapter->rx_ring->adapter = adapter;
+
+	return 0;
+err:
+	e_err("Unable to allocate memory for queues\n");
+	kfree(adapter->rx_ring);
+	kfree(adapter->tx_ring);
+	return -ENOMEM;
+}
+
+/**
+ * e1000e_poll - NAPI Rx polling callback
+ * @napi: struct associated with this polling callback
+ * @budget: number of packets driver is allowed to process this poll
+ **/
+static int e1000e_poll(struct napi_struct *napi, int budget)
+{
+	struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter,
+						     napi);
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *poll_dev = adapter->netdev;
+	int tx_cleaned = 1, work_done = 0;
+
+	adapter = netdev_priv(poll_dev);
+
+	if (!adapter->msix_entries ||
+	    (adapter->rx_ring->ims_val & adapter->tx_ring->ims_val))
+		tx_cleaned = e1000_clean_tx_irq(adapter->tx_ring);
+
+	adapter->clean_rx(adapter->rx_ring, &work_done, budget);
+
+	if (!tx_cleaned || work_done == budget)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (adapter->itr_setting & 3)
+			e1000_set_itr(adapter);
+		if (!test_bit(__E1000_DOWN, &adapter->state)) {
+			if (adapter->msix_entries)
+				ew32(IMS, adapter->rx_ring->ims_val);
+			else
+				e1000_irq_enable(adapter);
+		}
+	}
+
+	return work_done;
+}
+
+static int e1000_vlan_rx_add_vid(struct net_device *netdev,
+				 __always_unused __be16 proto, u16 vid)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vfta, index;
+
+	/* don't update vlan cookie if already programmed */
+	if ((adapter->hw.mng_cookie.status &
+	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
+	    (vid == adapter->mng_vlan_id))
+		return 0;
+
+	/* add VID to filter table */
+	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
+		index = (vid >> 5) & 0x7F;
+		vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index);
+		vfta |= BIT((vid & 0x1F));
+		hw->mac.ops.write_vfta(hw, index, vfta);
+	}
+
+	set_bit(vid, adapter->active_vlans);
+
+	return 0;
+}
+
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev,
+				  __always_unused __be16 proto, u16 vid)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vfta, index;
+
+	if ((adapter->hw.mng_cookie.status &
+	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
+	    (vid == adapter->mng_vlan_id)) {
+		/* release control to f/w */
+		e1000e_release_hw_control(adapter);
+		return 0;
+	}
+
+	/* remove VID from filter table */
+	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
+		index = (vid >> 5) & 0x7F;
+		vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index);
+		vfta &= ~BIT((vid & 0x1F));
+		hw->mac.ops.write_vfta(hw, index, vfta);
+	}
+
+	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
+}
+
+/**
+ * e1000e_vlan_filter_disable - helper to disable hw VLAN filtering
+ * @adapter: board private structure to initialize
+ **/
+static void e1000e_vlan_filter_disable(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
+		/* disable VLAN receive filtering */
+		rctl = er32(RCTL);
+		rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN);
+		ew32(RCTL, rctl);
+
+		if (adapter->mng_vlan_id != (u16)E1000_MNG_VLAN_NONE) {
+			e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q),
+					       adapter->mng_vlan_id);
+			adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
+		}
+	}
+}
+
+/**
+ * e1000e_vlan_filter_enable - helper to enable HW VLAN filtering
+ * @adapter: board private structure to initialize
+ **/
+static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
+		/* enable VLAN receive filtering */
+		rctl = er32(RCTL);
+		rctl |= E1000_RCTL_VFE;
+		rctl &= ~E1000_RCTL_CFIEN;
+		ew32(RCTL, rctl);
+	}
+}
+
+/**
+ * e1000e_vlan_strip_disable - helper to disable HW VLAN stripping
+ * @adapter: board private structure to initialize
+ **/
+static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl;
+
+	/* disable VLAN tag insert/strip */
+	ctrl = er32(CTRL);
+	ctrl &= ~E1000_CTRL_VME;
+	ew32(CTRL, ctrl);
+}
+
+/**
+ * e1000e_vlan_strip_enable - helper to enable HW VLAN stripping
+ * @adapter: board private structure to initialize
+ **/
+static void e1000e_vlan_strip_enable(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl;
+
+	/* enable VLAN tag insert/strip */
+	ctrl = er32(CTRL);
+	ctrl |= E1000_CTRL_VME;
+	ew32(CTRL, ctrl);
+}
+
+static void e1000_update_mng_vlan(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	u16 vid = adapter->hw.mng_cookie.vlan_id;
+	u16 old_vid = adapter->mng_vlan_id;
+
+	if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
+		e1000_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
+		adapter->mng_vlan_id = vid;
+	}
+
+	if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid))
+		e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), old_vid);
+}
+
+static void e1000_restore_vlan(struct e1000_adapter *adapter)
+{
+	u16 vid;
+
+	e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0);
+
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+	    e1000_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
+}
+
+static void e1000_init_manageability_pt(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 manc, manc2h, mdef, i, j;
+
+	if (!(adapter->flags & FLAG_MNG_PT_ENABLED))
+		return;
+
+	manc = er32(MANC);
+
+	/* enable receiving management packets to the host. this will probably
+	 * generate destination unreachable messages from the host OS, but
+	 * the packets will be handled on SMBUS
+	 */
+	manc |= E1000_MANC_EN_MNG2HOST;
+	manc2h = er32(MANC2H);
+
+	switch (hw->mac.type) {
+	default:
+		manc2h |= (E1000_MANC2H_PORT_623 | E1000_MANC2H_PORT_664);
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		/* Check if IPMI pass-through decision filter already exists;
+		 * if so, enable it.
+		 */
+		for (i = 0, j = 0; i < 8; i++) {
+			mdef = er32(MDEF(i));
+
+			/* Ignore filters with anything other than IPMI ports */
+			if (mdef & ~(E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664))
+				continue;
+
+			/* Enable this decision filter in MANC2H */
+			if (mdef)
+				manc2h |= BIT(i);
+
+			j |= mdef;
+		}
+
+		if (j == (E1000_MDEF_PORT_623 | E1000_MDEF_PORT_664))
+			break;
+
+		/* Create new decision filter in an empty filter */
+		for (i = 0, j = 0; i < 8; i++)
+			if (er32(MDEF(i)) == 0) {
+				ew32(MDEF(i), (E1000_MDEF_PORT_623 |
+					       E1000_MDEF_PORT_664));
+				manc2h |= BIT(1);
+				j++;
+				break;
+			}
+
+		if (!j)
+			e_warn("Unable to create IPMI pass-through filter\n");
+		break;
+	}
+
+	ew32(MANC2H, manc2h);
+	ew32(MANC, manc);
+}
+
+/**
+ * e1000_configure_tx - Configure Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void e1000_configure_tx(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+	u64 tdba;
+	u32 tdlen, tctl, tarc;
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+	tdba = tx_ring->dma;
+	tdlen = tx_ring->count * sizeof(struct e1000_tx_desc);
+	ew32(TDBAL(0), (tdba & DMA_BIT_MASK(32)));
+	ew32(TDBAH(0), (tdba >> 32));
+	ew32(TDLEN(0), tdlen);
+	ew32(TDH(0), 0);
+	ew32(TDT(0), 0);
+	tx_ring->head = adapter->hw.hw_addr + E1000_TDH(0);
+	tx_ring->tail = adapter->hw.hw_addr + E1000_TDT(0);
+
+	writel(0, tx_ring->head);
+	if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+		e1000e_update_tdt_wa(tx_ring, 0);
+	else
+		writel(0, tx_ring->tail);
+
+	/* Set the Tx Interrupt Delay register */
+	ew32(TIDV, adapter->tx_int_delay);
+	/* Tx irq moderation */
+	ew32(TADV, adapter->tx_abs_int_delay);
+
+	if (adapter->flags2 & FLAG2_DMA_BURST) {
+		u32 txdctl = er32(TXDCTL(0));
+
+		txdctl &= ~(E1000_TXDCTL_PTHRESH | E1000_TXDCTL_HTHRESH |
+			    E1000_TXDCTL_WTHRESH);
+		/* set up some performance related parameters to encourage the
+		 * hardware to use the bus more efficiently in bursts, depends
+		 * on the tx_int_delay to be enabled,
+		 * wthresh = 1 ==> burst write is disabled to avoid Tx stalls
+		 * hthresh = 1 ==> prefetch when one or more available
+		 * pthresh = 0x1f ==> prefetch if internal cache 31 or less
+		 * BEWARE: this seems to work but should be considered first if
+		 * there are Tx hangs or other Tx related bugs
+		 */
+		txdctl |= E1000_TXDCTL_DMA_BURST_ENABLE;
+		ew32(TXDCTL(0), txdctl);
+	}
+	/* erratum work around: set txdctl the same for both queues */
+	ew32(TXDCTL(1), er32(TXDCTL(0)));
+
+	/* Program the Transmit Control Register */
+	tctl = er32(TCTL);
+	tctl &= ~E1000_TCTL_CT;
+	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
+		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
+
+	if (adapter->flags & FLAG_TARC_SPEED_MODE_BIT) {
+		tarc = er32(TARC(0));
+		/* set the speed mode bit, we'll clear it if we're not at
+		 * gigabit link later
+		 */
+#define SPEED_MODE_BIT BIT(21)
+		tarc |= SPEED_MODE_BIT;
+		ew32(TARC(0), tarc);
+	}
+
+	/* errata: program both queues to unweighted RR */
+	if (adapter->flags & FLAG_TARC_SET_BIT_ZERO) {
+		tarc = er32(TARC(0));
+		tarc |= 1;
+		ew32(TARC(0), tarc);
+		tarc = er32(TARC(1));
+		tarc |= 1;
+		ew32(TARC(1), tarc);
+	}
+
+	/* Setup Transmit Descriptor Settings for eop descriptor */
+	adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
+
+	/* only set IDE if we are delaying interrupts using the timers */
+	if (adapter->tx_int_delay)
+		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
+
+	/* enable Report Status bit */
+	adapter->txd_cmd |= E1000_TXD_CMD_RS;
+
+	ew32(TCTL, tctl);
+
+	hw->mac.ops.config_collision_dist(hw);
+
+	/* SPT and KBL Si errata workaround to avoid data corruption */
+	if (hw->mac.type == e1000_pch_spt) {
+		u32 reg_val;
+
+		reg_val = er32(IOSFPC);
+		reg_val |= E1000_RCTL_RDMTS_HEX;
+		ew32(IOSFPC, reg_val);
+
+		reg_val = er32(TARC(0));
+		/* SPT and KBL Si errata workaround to avoid Tx hang.
+		 * Dropping the number of outstanding requests from
+		 * 3 to 2 in order to avoid a buffer overrun.
+		 */
+		reg_val &= ~E1000_TARC0_CB_MULTIQ_3_REQ;
+		reg_val |= E1000_TARC0_CB_MULTIQ_2_REQ;
+		ew32(TARC(0), reg_val);
+	}
+}
+
+#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
+			   (((S) & (PAGE_SIZE - 1)) ? 1 : 0))
+
+/**
+ * e1000_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ **/
+static void e1000_setup_rctl(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl, rfctl;
+	u32 pages = 0;
+
+	/* Workaround Si errata on PCHx - configure jumbo frame flow.
+	 * If jumbo frames not set, program related MAC/PHY registers
+	 * to h/w defaults
+	 */
+	if (hw->mac.type >= e1000_pch2lan) {
+		s32 ret_val;
+
+		if (adapter->netdev->mtu > ETH_DATA_LEN)
+			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true);
+		else
+			ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false);
+
+		if (ret_val)
+			e_dbg("failed to enable|disable jumbo frame workaround mode\n");
+	}
+
+	/* Program MC offset vector base */
+	rctl = er32(RCTL);
+	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
+	    E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
+	    (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+	/* Do not Store bad packets */
+	rctl &= ~E1000_RCTL_SBP;
+
+	/* Enable Long Packet receive */
+	if (adapter->netdev->mtu <= ETH_DATA_LEN)
+		rctl &= ~E1000_RCTL_LPE;
+	else
+		rctl |= E1000_RCTL_LPE;
+
+	/* Some systems expect that the CRC is included in SMBUS traffic. The
+	 * hardware strips the CRC before sending to both SMBUS (BMC) and to
+	 * host memory when this is enabled
+	 */
+	if (adapter->flags2 & FLAG2_CRC_STRIPPING)
+		rctl |= E1000_RCTL_SECRC;
+
+	/* Workaround Si errata on 82577 PHY - configure IPG for jumbos */
+	if ((hw->phy.type == e1000_phy_82577) && (rctl & E1000_RCTL_LPE)) {
+		u16 phy_data;
+
+		e1e_rphy(hw, PHY_REG(770, 26), &phy_data);
+		phy_data &= 0xfff8;
+		phy_data |= BIT(2);
+		e1e_wphy(hw, PHY_REG(770, 26), phy_data);
+
+		e1e_rphy(hw, 22, &phy_data);
+		phy_data &= 0x0fff;
+		phy_data |= BIT(14);
+		e1e_wphy(hw, 0x10, 0x2823);
+		e1e_wphy(hw, 0x11, 0x0003);
+		e1e_wphy(hw, 22, phy_data);
+	}
+
+	/* Setup buffer sizes */
+	rctl &= ~E1000_RCTL_SZ_4096;
+	rctl |= E1000_RCTL_BSEX;
+	switch (adapter->rx_buffer_len) {
+	case 2048:
+	default:
+		rctl |= E1000_RCTL_SZ_2048;
+		rctl &= ~E1000_RCTL_BSEX;
+		break;
+	case 4096:
+		rctl |= E1000_RCTL_SZ_4096;
+		break;
+	case 8192:
+		rctl |= E1000_RCTL_SZ_8192;
+		break;
+	case 16384:
+		rctl |= E1000_RCTL_SZ_16384;
+		break;
+	}
+
+	/* Enable Extended Status in all Receive Descriptors */
+	rfctl = er32(RFCTL);
+	rfctl |= E1000_RFCTL_EXTEN;
+	ew32(RFCTL, rfctl);
+
+	/* 82571 and greater support packet-split where the protocol
+	 * header is placed in skb->data and the packet data is
+	 * placed in pages hanging off of skb_shinfo(skb)->nr_frags.
+	 * In the case of a non-split, skb->data is linearly filled,
+	 * followed by the page buffers.  Therefore, skb->data is
+	 * sized to hold the largest protocol header.
+	 *
+	 * allocations using alloc_page take too long for regular MTU
+	 * so only enable packet split for jumbo frames
+	 *
+	 * Using pages when the page size is greater than 16k wastes
+	 * a lot of memory, since we allocate 3 pages at all times
+	 * per packet.
+	 */
+	pages = PAGE_USE_COUNT(adapter->netdev->mtu);
+	if ((pages <= 3) && (PAGE_SIZE <= 16384) && (rctl & E1000_RCTL_LPE))
+		adapter->rx_ps_pages = pages;
+	else
+		adapter->rx_ps_pages = 0;
+
+	if (adapter->rx_ps_pages) {
+		u32 psrctl = 0;
+
+		/* Enable Packet split descriptors */
+		rctl |= E1000_RCTL_DTYP_PS;
+
+		psrctl |= adapter->rx_ps_bsize0 >> E1000_PSRCTL_BSIZE0_SHIFT;
+
+		switch (adapter->rx_ps_pages) {
+		case 3:
+			psrctl |= PAGE_SIZE << E1000_PSRCTL_BSIZE3_SHIFT;
+			fallthrough;
+		case 2:
+			psrctl |= PAGE_SIZE << E1000_PSRCTL_BSIZE2_SHIFT;
+			fallthrough;
+		case 1:
+			psrctl |= PAGE_SIZE >> E1000_PSRCTL_BSIZE1_SHIFT;
+			break;
+		}
+
+		ew32(PSRCTL, psrctl);
+	}
+
+	/* This is useful for sniffing bad packets. */
+	if (adapter->netdev->features & NETIF_F_RXALL) {
+		/* UPE and MPE will be handled by normal PROMISC logic
+		 * in e1000e_set_rx_mode
+		 */
+		rctl |= (E1000_RCTL_SBP |	/* Receive bad packets */
+			 E1000_RCTL_BAM |	/* RX All Bcast Pkts */
+			 E1000_RCTL_PMCF);	/* RX All MAC Ctrl Pkts */
+
+		rctl &= ~(E1000_RCTL_VFE |	/* Disable VLAN filter */
+			  E1000_RCTL_DPF |	/* Allow filtered pause */
+			  E1000_RCTL_CFIEN);	/* Dis VLAN CFIEN Filter */
+		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
+		 * and that breaks VLANs.
+		 */
+	}
+
+	ew32(RCTL, rctl);
+	/* just started the receive unit, no need to restart */
+	adapter->flags &= ~FLAG_RESTART_NOW;
+}
+
+/**
+ * e1000_configure_rx - Configure Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void e1000_configure_rx(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_ring *rx_ring = adapter->rx_ring;
+	u64 rdba;
+	u32 rdlen, rctl, rxcsum, ctrl_ext;
+
+	if (adapter->rx_ps_pages) {
+		/* this is a 32 byte descriptor */
+		rdlen = rx_ring->count *
+		    sizeof(union e1000_rx_desc_packet_split);
+		adapter->clean_rx = e1000_clean_rx_irq_ps;
+		adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps;
+	} else if (adapter->netdev->mtu > ETH_FRAME_LEN + ETH_FCS_LEN) {
+		rdlen = rx_ring->count * sizeof(union e1000_rx_desc_extended);
+		adapter->clean_rx = e1000_clean_jumbo_rx_irq;
+		adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
+	} else {
+		rdlen = rx_ring->count * sizeof(union e1000_rx_desc_extended);
+		adapter->clean_rx = e1000_clean_rx_irq;
+		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
+	}
+
+	/* disable receives while setting up the descriptors */
+	rctl = er32(RCTL);
+	if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
+		ew32(RCTL, rctl & ~E1000_RCTL_EN);
+	e1e_flush();
+	usleep_range(10000, 11000);
+
+	if (adapter->flags2 & FLAG2_DMA_BURST) {
+		/* set the writeback threshold (only takes effect if the RDTR
+		 * is set). set GRAN=1 and write back up to 0x4 worth, and
+		 * enable prefetching of 0x20 Rx descriptors
+		 * granularity = 01
+		 * wthresh = 04,
+		 * hthresh = 04,
+		 * pthresh = 0x20
+		 */
+		ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
+		ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
+	}
+
+	/* set the Receive Delay Timer Register */
+	ew32(RDTR, adapter->rx_int_delay);
+
+	/* irq moderation */
+	ew32(RADV, adapter->rx_abs_int_delay);
+	if ((adapter->itr_setting != 0) && (adapter->itr != 0))
+		e1000e_write_itr(adapter, adapter->itr);
+
+	ctrl_ext = er32(CTRL_EXT);
+	/* Auto-Mask interrupts upon ICR access */
+	ctrl_ext |= E1000_CTRL_EXT_IAME;
+	ew32(IAM, 0xffffffff);
+	ew32(CTRL_EXT, ctrl_ext);
+	e1e_flush();
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	rdba = rx_ring->dma;
+	ew32(RDBAL(0), (rdba & DMA_BIT_MASK(32)));
+	ew32(RDBAH(0), (rdba >> 32));
+	ew32(RDLEN(0), rdlen);
+	ew32(RDH(0), 0);
+	ew32(RDT(0), 0);
+	rx_ring->head = adapter->hw.hw_addr + E1000_RDH(0);
+	rx_ring->tail = adapter->hw.hw_addr + E1000_RDT(0);
+
+	writel(0, rx_ring->head);
+	if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+		e1000e_update_rdt_wa(rx_ring, 0);
+	else
+		writel(0, rx_ring->tail);
+
+	/* Enable Receive Checksum Offload for TCP and UDP */
+	rxcsum = er32(RXCSUM);
+	if (adapter->netdev->features & NETIF_F_RXCSUM)
+		rxcsum |= E1000_RXCSUM_TUOFL;
+	else
+		rxcsum &= ~E1000_RXCSUM_TUOFL;
+	ew32(RXCSUM, rxcsum);
+
+	/* With jumbo frames, excessive C-state transition latencies result
+	 * in dropped transactions.
+	 */
+	if (adapter->netdev->mtu > ETH_DATA_LEN) {
+		u32 lat =
+		    ((er32(PBA) & E1000_PBA_RXA_MASK) * 1024 -
+		     adapter->max_frame_size) * 8 / 1000;
+
+		if (adapter->flags & FLAG_IS_ICH) {
+			u32 rxdctl = er32(RXDCTL(0));
+
+			ew32(RXDCTL(0), rxdctl | 0x3 | BIT(8));
+		}
+
+		dev_info(&adapter->pdev->dev,
+			 "Some CPU C-states have been disabled in order to enable jumbo frames\n");
+		cpu_latency_qos_update_request(&adapter->pm_qos_req, lat);
+	} else {
+		cpu_latency_qos_update_request(&adapter->pm_qos_req,
+					       PM_QOS_DEFAULT_VALUE);
+	}
+
+	/* Enable Receives */
+	ew32(RCTL, rctl);
+}
+
+/**
+ * e1000e_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ *                0 on no addresses written
+ *                X on writing X addresses to MTA
+ */
+static int e1000e_write_mc_addr_list(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct netdev_hw_addr *ha;
+	u8 *mta_list;
+	int i;
+
+	if (netdev_mc_empty(netdev)) {
+		/* nothing to program, so clear mc list */
+		hw->mac.ops.update_mc_addr_list(hw, NULL, 0);
+		return 0;
+	}
+
+	mta_list = kcalloc(netdev_mc_count(netdev), ETH_ALEN, GFP_ATOMIC);
+	if (!mta_list)
+		return -ENOMEM;
+
+	/* update_mc_addr_list expects a packed array of only addresses. */
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev)
+	    memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+
+	hw->mac.ops.update_mc_addr_list(hw, mta_list, i);
+	kfree(mta_list);
+
+	return netdev_mc_count(netdev);
+}
+
+/**
+ * e1000e_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ *                0 on no addresses written
+ *                X on writing X addresses to the RAR table
+ **/
+static int e1000e_write_uc_addr_list(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned int rar_entries;
+	int count = 0;
+
+	rar_entries = hw->mac.ops.rar_get_count(hw);
+
+	/* save a rar entry for our hardware address */
+	rar_entries--;
+
+	/* save a rar entry for the LAA workaround */
+	if (adapter->flags & FLAG_RESET_OVERWRITES_LAA)
+		rar_entries--;
+
+	/* return ENOMEM indicating insufficient memory for addresses */
+	if (netdev_uc_count(netdev) > rar_entries)
+		return -ENOMEM;
+
+	if (!netdev_uc_empty(netdev) && rar_entries) {
+		struct netdev_hw_addr *ha;
+
+		/* write the addresses in reverse order to avoid write
+		 * combining
+		 */
+		netdev_for_each_uc_addr(ha, netdev) {
+			int ret_val;
+
+			if (!rar_entries)
+				break;
+			ret_val = hw->mac.ops.rar_set(hw, ha->addr, rar_entries--);
+			if (ret_val < 0)
+				return -ENOMEM;
+			count++;
+		}
+	}
+
+	/* zero out the remaining RAR entries not used above */
+	for (; rar_entries > 0; rar_entries--) {
+		ew32(RAH(rar_entries), 0);
+		ew32(RAL(rar_entries), 0);
+	}
+	e1e_flush();
+
+	return count;
+}
+
+/**
+ * e1000e_set_rx_mode - secondary unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The ndo_set_rx_mode entry point is called whenever the unicast or multicast
+ * address list or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+static void e1000e_set_rx_mode(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	if (pm_runtime_suspended(netdev->dev.parent))
+		return;
+
+	/* Check for Promiscuous and All Multicast modes */
+	rctl = er32(RCTL);
+
+	/* clear the affected bits */
+	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
+
+	if (netdev->flags & IFF_PROMISC) {
+		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
+		/* Do not hardware filter VLANs in promisc mode */
+		e1000e_vlan_filter_disable(adapter);
+	} else {
+		int count;
+
+		if (netdev->flags & IFF_ALLMULTI) {
+			rctl |= E1000_RCTL_MPE;
+		} else {
+			/* Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			count = e1000e_write_mc_addr_list(netdev);
+			if (count < 0)
+				rctl |= E1000_RCTL_MPE;
+		}
+		e1000e_vlan_filter_enable(adapter);
+		/* Write addresses to available RAR registers, if there is not
+		 * sufficient space to store all the addresses then enable
+		 * unicast promiscuous mode
+		 */
+		count = e1000e_write_uc_addr_list(netdev);
+		if (count < 0)
+			rctl |= E1000_RCTL_UPE;
+	}
+
+	ew32(RCTL, rctl);
+
+	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		e1000e_vlan_strip_enable(adapter);
+	else
+		e1000e_vlan_strip_disable(adapter);
+}
+
+static void e1000e_setup_rss_hash(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mrqc, rxcsum;
+	u32 rss_key[10];
+	int i;
+
+	netdev_rss_key_fill(rss_key, sizeof(rss_key));
+	for (i = 0; i < 10; i++)
+		ew32(RSSRK(i), rss_key[i]);
+
+	/* Direct all traffic to queue 0 */
+	for (i = 0; i < 32; i++)
+		ew32(RETA(i), 0);
+
+	/* Disable raw packet checksumming so that RSS hash is placed in
+	 * descriptor on writeback.
+	 */
+	rxcsum = er32(RXCSUM);
+	rxcsum |= E1000_RXCSUM_PCSD;
+
+	ew32(RXCSUM, rxcsum);
+
+	mrqc = (E1000_MRQC_RSS_FIELD_IPV4 |
+		E1000_MRQC_RSS_FIELD_IPV4_TCP |
+		E1000_MRQC_RSS_FIELD_IPV6 |
+		E1000_MRQC_RSS_FIELD_IPV6_TCP |
+		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
+
+	ew32(MRQC, mrqc);
+}
+
+/**
+ * e1000e_get_base_timinca - get default SYSTIM time increment attributes
+ * @adapter: board private structure
+ * @timinca: pointer to returned time increment attributes
+ *
+ * Get attributes for incrementing the System Time Register SYSTIML/H at
+ * the default base frequency, and set the cyclecounter shift value.
+ **/
+s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 incvalue, incperiod, shift;
+
+	/* Make sure clock is enabled on I217/I218/I219  before checking
+	 * the frequency
+	 */
+	if ((hw->mac.type >= e1000_pch_lpt) &&
+	    !(er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_ENABLED) &&
+	    !(er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_ENABLED)) {
+		u32 fextnvm7 = er32(FEXTNVM7);
+
+		if (!(fextnvm7 & BIT(0))) {
+			ew32(FEXTNVM7, fextnvm7 | BIT(0));
+			e1e_flush();
+		}
+	}
+
+	switch (hw->mac.type) {
+	case e1000_pch2lan:
+		/* Stable 96MHz frequency */
+		incperiod = INCPERIOD_96MHZ;
+		incvalue = INCVALUE_96MHZ;
+		shift = INCVALUE_SHIFT_96MHZ;
+		adapter->cc.shift = shift + INCPERIOD_SHIFT_96MHZ;
+		break;
+	case e1000_pch_lpt:
+		if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
+			/* Stable 96MHz frequency */
+			incperiod = INCPERIOD_96MHZ;
+			incvalue = INCVALUE_96MHZ;
+			shift = INCVALUE_SHIFT_96MHZ;
+			adapter->cc.shift = shift + INCPERIOD_SHIFT_96MHZ;
+		} else {
+			/* Stable 25MHz frequency */
+			incperiod = INCPERIOD_25MHZ;
+			incvalue = INCVALUE_25MHZ;
+			shift = INCVALUE_SHIFT_25MHZ;
+			adapter->cc.shift = shift;
+		}
+		break;
+	case e1000_pch_spt:
+		/* Stable 24MHz frequency */
+		incperiod = INCPERIOD_24MHZ;
+		incvalue = INCVALUE_24MHZ;
+		shift = INCVALUE_SHIFT_24MHZ;
+		adapter->cc.shift = shift;
+		break;
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) {
+			/* Stable 24MHz frequency */
+			incperiod = INCPERIOD_24MHZ;
+			incvalue = INCVALUE_24MHZ;
+			shift = INCVALUE_SHIFT_24MHZ;
+			adapter->cc.shift = shift;
+		} else {
+			/* Stable 38400KHz frequency */
+			incperiod = INCPERIOD_38400KHZ;
+			incvalue = INCVALUE_38400KHZ;
+			shift = INCVALUE_SHIFT_38400KHZ;
+			adapter->cc.shift = shift;
+		}
+		break;
+	case e1000_82574:
+	case e1000_82583:
+		/* Stable 25MHz frequency */
+		incperiod = INCPERIOD_25MHZ;
+		incvalue = INCVALUE_25MHZ;
+		shift = INCVALUE_SHIFT_25MHZ;
+		adapter->cc.shift = shift;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*timinca = ((incperiod << E1000_TIMINCA_INCPERIOD_SHIFT) |
+		    ((incvalue << shift) & E1000_TIMINCA_INCVALUE_MASK));
+
+	return 0;
+}
+
+/**
+ * e1000e_config_hwtstamp - configure the hwtstamp registers and enable/disable
+ * @adapter: board private structure
+ * @config: timestamp configuration
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't cause any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware filters.
+ * Not all combinations are supported, in particular event type has to be
+ * specified. Matching the kind of event packet is not supported, with the
+ * exception of "all V2 events regardless of level 2 or 4".
+ **/
+static int e1000e_config_hwtstamp(struct e1000_adapter *adapter,
+				  struct hwtstamp_config *config)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+	u32 rxmtrl = 0;
+	u16 rxudp = 0;
+	bool is_l4 = false;
+	bool is_l2 = false;
+	u32 regval;
+
+	if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP))
+		return -EINVAL;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+		break;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		rxmtrl = E1000_RXMTRL_PTP_V1_SYNC_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		rxmtrl = E1000_RXMTRL_PTP_V1_DELAY_REQ_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+		/* Also time stamps V2 L2 Path Delay Request/Response */
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_V2;
+		rxmtrl = E1000_RXMTRL_PTP_V2_SYNC_MESSAGE;
+		is_l2 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		/* Also time stamps V2 L2 Path Delay Request/Response. */
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_V2;
+		rxmtrl = E1000_RXMTRL_PTP_V2_DELAY_REQ_MESSAGE;
+		is_l2 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+		/* Hardware cannot filter just V2 L4 Sync messages */
+		fallthrough;
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+		/* Also time stamps V2 Path Delay Request/Response. */
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+		rxmtrl = E1000_RXMTRL_PTP_V2_SYNC_MESSAGE;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		/* Hardware cannot filter just V2 L4 Delay Request messages */
+		fallthrough;
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		/* Also time stamps V2 Path Delay Request/Response. */
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+		rxmtrl = E1000_RXMTRL_PTP_V2_DELAY_REQ_MESSAGE;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+		/* Hardware cannot filter just V2 L4 or L2 Event messages */
+		fallthrough;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+		/* For V1, the hardware can only filter Sync messages or
+		 * Delay Request messages but not both so fall-through to
+		 * time stamp all packets.
+		 */
+		fallthrough;
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+		is_l2 = true;
+		is_l4 = true;
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	adapter->hwtstamp_config = *config;
+
+	/* enable/disable Tx h/w time stamping */
+	regval = er32(TSYNCTXCTL);
+	regval &= ~E1000_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	ew32(TSYNCTXCTL, regval);
+	if ((er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_ENABLED) !=
+	    (regval & E1000_TSYNCTXCTL_ENABLED)) {
+		e_err("Timesync Tx Control register not set as expected\n");
+		return -EAGAIN;
+	}
+
+	/* enable/disable Rx h/w time stamping */
+	regval = er32(TSYNCRXCTL);
+	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	ew32(TSYNCRXCTL, regval);
+	if ((er32(TSYNCRXCTL) & (E1000_TSYNCRXCTL_ENABLED |
+				 E1000_TSYNCRXCTL_TYPE_MASK)) !=
+	    (regval & (E1000_TSYNCRXCTL_ENABLED |
+		       E1000_TSYNCRXCTL_TYPE_MASK))) {
+		e_err("Timesync Rx Control register not set as expected\n");
+		return -EAGAIN;
+	}
+
+	/* L2: define ethertype filter for time stamped packets */
+	if (is_l2)
+		rxmtrl |= ETH_P_1588;
+
+	/* define which PTP packets get time stamped */
+	ew32(RXMTRL, rxmtrl);
+
+	/* Filter by destination port */
+	if (is_l4) {
+		rxudp = PTP_EV_PORT;
+		cpu_to_be16s(&rxudp);
+	}
+	ew32(RXUDP, rxudp);
+
+	e1e_flush();
+
+	/* Clear TSYNCRXCTL_VALID & TSYNCTXCTL_VALID bit */
+	er32(RXSTMPH);
+	er32(TXSTMPH);
+
+	return 0;
+}
+
+/**
+ * e1000_configure - configure the hardware for Rx and Tx
+ * @adapter: private board structure
+ **/
+static void e1000_configure(struct e1000_adapter *adapter)
+{
+	struct e1000_ring *rx_ring = adapter->rx_ring;
+
+	e1000e_set_rx_mode(adapter->netdev);
+
+	e1000_restore_vlan(adapter);
+	e1000_init_manageability_pt(adapter);
+
+	e1000_configure_tx(adapter);
+
+	if (adapter->netdev->features & NETIF_F_RXHASH)
+		e1000e_setup_rss_hash(adapter);
+	e1000_setup_rctl(adapter);
+	e1000_configure_rx(adapter);
+	adapter->alloc_rx_buf(rx_ring, e1000_desc_unused(rx_ring), GFP_KERNEL);
+}
+
+/**
+ * e1000e_power_up_phy - restore link in case the phy was powered down
+ * @adapter: address of board private structure
+ *
+ * The phy may be powered down to save power and turn off link when the
+ * driver is unloaded and wake on lan is not enabled (among others)
+ * *** this routine MUST be followed by a call to e1000e_reset ***
+ **/
+void e1000e_power_up_phy(struct e1000_adapter *adapter)
+{
+	if (adapter->hw.phy.ops.power_up)
+		adapter->hw.phy.ops.power_up(&adapter->hw);
+
+	adapter->hw.mac.ops.setup_link(&adapter->hw);
+}
+
+/**
+ * e1000_power_down_phy - Power down the PHY
+ * @adapter: board private structure
+ *
+ * Power down the PHY so no link is implied when interface is down.
+ * The PHY cannot be powered down if management or WoL is active.
+ */
+static void e1000_power_down_phy(struct e1000_adapter *adapter)
+{
+	if (adapter->hw.phy.ops.power_down)
+		adapter->hw.phy.ops.power_down(&adapter->hw);
+}
+
+/**
+ * e1000_flush_tx_ring - remove all descriptors from the tx_ring
+ * @adapter: board private structure
+ *
+ * We want to clear all pending descriptors from the TX ring.
+ * zeroing happens when the HW reads the regs. We  assign the ring itself as
+ * the data of the next descriptor. We don't care about the data we are about
+ * to reset the HW.
+ */
+static void e1000_flush_tx_ring(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+	struct e1000_tx_desc *tx_desc = NULL;
+	u32 tdt, tctl, txd_lower = E1000_TXD_CMD_IFCS;
+	u16 size = 512;
+
+	tctl = er32(TCTL);
+	ew32(TCTL, tctl | E1000_TCTL_EN);
+	tdt = er32(TDT(0));
+	BUG_ON(tdt != tx_ring->next_to_use);
+	tx_desc =  E1000_TX_DESC(*tx_ring, tx_ring->next_to_use);
+	tx_desc->buffer_addr = cpu_to_le64(tx_ring->dma);
+
+	tx_desc->lower.data = cpu_to_le32(txd_lower | size);
+	tx_desc->upper.data = 0;
+	/* flush descriptors to memory before notifying the HW */
+	wmb();
+	tx_ring->next_to_use++;
+	if (tx_ring->next_to_use == tx_ring->count)
+		tx_ring->next_to_use = 0;
+	ew32(TDT(0), tx_ring->next_to_use);
+	usleep_range(200, 250);
+}
+
+/**
+ * e1000_flush_rx_ring - remove all descriptors from the rx_ring
+ * @adapter: board private structure
+ *
+ * Mark all descriptors in the RX ring as consumed and disable the rx ring
+ */
+static void e1000_flush_rx_ring(struct e1000_adapter *adapter)
+{
+	u32 rctl, rxdctl;
+	struct e1000_hw *hw = &adapter->hw;
+
+	rctl = er32(RCTL);
+	ew32(RCTL, rctl & ~E1000_RCTL_EN);
+	e1e_flush();
+	usleep_range(100, 150);
+
+	rxdctl = er32(RXDCTL(0));
+	/* zero the lower 14 bits (prefetch and host thresholds) */
+	rxdctl &= 0xffffc000;
+
+	/* update thresholds: prefetch threshold to 31, host threshold to 1
+	 * and make sure the granularity is "descriptors" and not "cache lines"
+	 */
+	rxdctl |= (0x1F | BIT(8) | E1000_RXDCTL_THRESH_UNIT_DESC);
+
+	ew32(RXDCTL(0), rxdctl);
+	/* momentarily enable the RX ring for the changes to take effect */
+	ew32(RCTL, rctl | E1000_RCTL_EN);
+	e1e_flush();
+	usleep_range(100, 150);
+	ew32(RCTL, rctl & ~E1000_RCTL_EN);
+}
+
+/**
+ * e1000_flush_desc_rings - remove all descriptors from the descriptor rings
+ * @adapter: board private structure
+ *
+ * In i219, the descriptor rings must be emptied before resetting the HW
+ * or before changing the device state to D3 during runtime (runtime PM).
+ *
+ * Failure to do this will cause the HW to enter a unit hang state which can
+ * only be released by PCI reset on the device
+ *
+ */
+
+static void e1000_flush_desc_rings(struct e1000_adapter *adapter)
+{
+	u16 hang_state;
+	u32 fext_nvm11, tdlen;
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* First, disable MULR fix in FEXTNVM11 */
+	fext_nvm11 = er32(FEXTNVM11);
+	fext_nvm11 |= E1000_FEXTNVM11_DISABLE_MULR_FIX;
+	ew32(FEXTNVM11, fext_nvm11);
+	/* do nothing if we're not in faulty state, or if the queue is empty */
+	tdlen = er32(TDLEN(0));
+	pci_read_config_word(adapter->pdev, PCICFG_DESC_RING_STATUS,
+			     &hang_state);
+	if (!(hang_state & FLUSH_DESC_REQUIRED) || !tdlen)
+		return;
+	e1000_flush_tx_ring(adapter);
+	/* recheck, maybe the fault is caused by the rx ring */
+	pci_read_config_word(adapter->pdev, PCICFG_DESC_RING_STATUS,
+			     &hang_state);
+	if (hang_state & FLUSH_DESC_REQUIRED)
+		e1000_flush_rx_ring(adapter);
+}
+
+/**
+ * e1000e_systim_reset - reset the timesync registers after a hardware reset
+ * @adapter: board private structure
+ *
+ * When the MAC is reset, all hardware bits for timesync will be reset to the
+ * default values. This function will restore the settings last in place.
+ * Since the clock SYSTIME registers are reset, we will simply restore the
+ * cyclecounter to the kernel real clock time.
+ **/
+static void e1000e_systim_reset(struct e1000_adapter *adapter)
+{
+	struct ptp_clock_info *info = &adapter->ptp_clock_info;
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+	u32 timinca;
+	s32 ret_val;
+
+	if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP))
+		return;
+
+	if (info->adjfine) {
+		/* restore the previous ptp frequency delta */
+		ret_val = info->adjfine(info, adapter->ptp_delta);
+	} else {
+		/* set the default base frequency if no adjustment possible */
+		ret_val = e1000e_get_base_timinca(adapter, &timinca);
+		if (!ret_val)
+			ew32(TIMINCA, timinca);
+	}
+
+	if (ret_val) {
+		dev_warn(&adapter->pdev->dev,
+			 "Failed to restore TIMINCA clock rate delta: %d\n",
+			 ret_val);
+		return;
+	}
+
+	/* reset the systim ns time counter */
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+	timecounter_init(&adapter->tc, &adapter->cc,
+			 ktime_to_ns(ktime_get_real()));
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	/* restore the previous hwtstamp configuration settings */
+	e1000e_config_hwtstamp(adapter, &adapter->hwtstamp_config);
+}
+
+/**
+ * e1000e_reset - bring the hardware into a known good state
+ * @adapter: board private structure
+ *
+ * This function boots the hardware and enables some settings that
+ * require a configuration cycle of the hardware - those cannot be
+ * set/changed during runtime. After reset the device needs to be
+ * properly configured for Rx, Tx etc.
+ */
+void e1000e_reset(struct e1000_adapter *adapter)
+{
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+	struct e1000_fc_info *fc = &adapter->hw.fc;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tx_space, min_tx_space, min_rx_space;
+	u32 pba = adapter->pba;
+	u16 hwm;
+
+	/* reset Packet Buffer Allocation to default */
+	ew32(PBA, pba);
+
+	if (adapter->max_frame_size > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) {
+		/* To maintain wire speed transmits, the Tx FIFO should be
+		 * large enough to accommodate two full transmit packets,
+		 * rounded up to the next 1KB and expressed in KB.  Likewise,
+		 * the Rx FIFO should be large enough to accommodate at least
+		 * one full receive packet and is similarly rounded up and
+		 * expressed in KB.
+		 */
+		pba = er32(PBA);
+		/* upper 16 bits has Tx packet buffer allocation size in KB */
+		tx_space = pba >> 16;
+		/* lower 16 bits has Rx packet buffer allocation size in KB */
+		pba &= 0xffff;
+		/* the Tx fifo also stores 16 bytes of information about the Tx
+		 * but don't include ethernet FCS because hardware appends it
+		 */
+		min_tx_space = (adapter->max_frame_size +
+				sizeof(struct e1000_tx_desc) - ETH_FCS_LEN) * 2;
+		min_tx_space = ALIGN(min_tx_space, 1024);
+		min_tx_space >>= 10;
+		/* software strips receive CRC, so leave room for it */
+		min_rx_space = adapter->max_frame_size;
+		min_rx_space = ALIGN(min_rx_space, 1024);
+		min_rx_space >>= 10;
+
+		/* If current Tx allocation is less than the min Tx FIFO size,
+		 * and the min Tx FIFO size is less than the current Rx FIFO
+		 * allocation, take space away from current Rx allocation
+		 */
+		if ((tx_space < min_tx_space) &&
+		    ((min_tx_space - tx_space) < pba)) {
+			pba -= min_tx_space - tx_space;
+
+			/* if short on Rx space, Rx wins and must trump Tx
+			 * adjustment
+			 */
+			if (pba < min_rx_space)
+				pba = min_rx_space;
+		}
+
+		ew32(PBA, pba);
+	}
+
+	/* flow control settings
+	 *
+	 * The high water mark must be low enough to fit one full frame
+	 * (or the size used for early receive) above it in the Rx FIFO.
+	 * Set it to the lower of:
+	 * - 90% of the Rx FIFO size, and
+	 * - the full Rx FIFO size minus one full frame
+	 */
+	if (adapter->flags & FLAG_DISABLE_FC_PAUSE_TIME)
+		fc->pause_time = 0xFFFF;
+	else
+		fc->pause_time = E1000_FC_PAUSE_TIME;
+	fc->send_xon = true;
+	fc->current_mode = fc->requested_mode;
+
+	switch (hw->mac.type) {
+	case e1000_ich9lan:
+	case e1000_ich10lan:
+		if (adapter->netdev->mtu > ETH_DATA_LEN) {
+			pba = 14;
+			ew32(PBA, pba);
+			fc->high_water = 0x2800;
+			fc->low_water = fc->high_water - 8;
+			break;
+		}
+		fallthrough;
+	default:
+		hwm = min(((pba << 10) * 9 / 10),
+			  ((pba << 10) - adapter->max_frame_size));
+
+		fc->high_water = hwm & E1000_FCRTH_RTH;	/* 8-byte granularity */
+		fc->low_water = fc->high_water - 8;
+		break;
+	case e1000_pchlan:
+		/* Workaround PCH LOM adapter hangs with certain network
+		 * loads.  If hangs persist, try disabling Tx flow control.
+		 */
+		if (adapter->netdev->mtu > ETH_DATA_LEN) {
+			fc->high_water = 0x3500;
+			fc->low_water = 0x1500;
+		} else {
+			fc->high_water = 0x5000;
+			fc->low_water = 0x3000;
+		}
+		fc->refresh_time = 0x1000;
+		break;
+	case e1000_pch2lan:
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		fc->refresh_time = 0xFFFF;
+		fc->pause_time = 0xFFFF;
+
+		if (adapter->netdev->mtu <= ETH_DATA_LEN) {
+			fc->high_water = 0x05C20;
+			fc->low_water = 0x05048;
+			break;
+		}
+
+		pba = 14;
+		ew32(PBA, pba);
+		fc->high_water = ((pba << 10) * 9 / 10) & E1000_FCRTH_RTH;
+		fc->low_water = ((pba << 10) * 8 / 10) & E1000_FCRTL_RTL;
+		break;
+	}
+
+	/* Alignment of Tx data is on an arbitrary byte boundary with the
+	 * maximum size per Tx descriptor limited only to the transmit
+	 * allocation of the packet buffer minus 96 bytes with an upper
+	 * limit of 24KB due to receive synchronization limitations.
+	 */
+	adapter->tx_fifo_limit = min_t(u32, ((er32(PBA) >> 16) << 10) - 96,
+				       24 << 10);
+
+	/* Disable Adaptive Interrupt Moderation if 2 full packets cannot
+	 * fit in receive buffer.
+	 */
+	if (adapter->itr_setting & 0x3) {
+		if ((adapter->max_frame_size * 2) > (pba << 10)) {
+			if (!(adapter->flags2 & FLAG2_DISABLE_AIM)) {
+				dev_info(&adapter->pdev->dev,
+					 "Interrupt Throttle Rate off\n");
+				adapter->flags2 |= FLAG2_DISABLE_AIM;
+				e1000e_write_itr(adapter, 0);
+			}
+		} else if (adapter->flags2 & FLAG2_DISABLE_AIM) {
+			dev_info(&adapter->pdev->dev,
+				 "Interrupt Throttle Rate on\n");
+			adapter->flags2 &= ~FLAG2_DISABLE_AIM;
+			adapter->itr = 20000;
+			e1000e_write_itr(adapter, adapter->itr);
+		}
+	}
+
+	if (hw->mac.type >= e1000_pch_spt)
+		e1000_flush_desc_rings(adapter);
+	/* Allow time for pending master requests to run */
+	mac->ops.reset_hw(hw);
+
+	/* For parts with AMT enabled, let the firmware know
+	 * that the network interface is in control
+	 */
+	if (adapter->flags & FLAG_HAS_AMT)
+		e1000e_get_hw_control(adapter);
+
+	ew32(WUC, 0);
+
+	if (mac->ops.init_hw(hw))
+		e_err("Hardware Error\n");
+
+	e1000_update_mng_vlan(adapter);
+
+	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
+	ew32(VET, ETH_P_8021Q);
+
+	e1000e_reset_adaptive(hw);
+
+	/* restore systim and hwtstamp settings */
+	e1000e_systim_reset(adapter);
+
+	/* Set EEE advertisement as appropriate */
+	if (adapter->flags2 & FLAG2_HAS_EEE) {
+		s32 ret_val;
+		u16 adv_addr;
+
+		switch (hw->phy.type) {
+		case e1000_phy_82579:
+			adv_addr = I82579_EEE_ADVERTISEMENT;
+			break;
+		case e1000_phy_i217:
+			adv_addr = I217_EEE_ADVERTISEMENT;
+			break;
+		default:
+			dev_err(&adapter->pdev->dev,
+				"Invalid PHY type setting EEE advertisement\n");
+			return;
+		}
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val) {
+			dev_err(&adapter->pdev->dev,
+				"EEE advertisement - unable to acquire PHY\n");
+			return;
+		}
+
+		e1000_write_emi_reg_locked(hw, adv_addr,
+					   hw->dev_spec.ich8lan.eee_disable ?
+					   0 : adapter->eee_advert);
+
+		hw->phy.ops.release(hw);
+	}
+
+	if (!netif_running(adapter->netdev) &&
+	    !test_bit(__E1000_TESTING, &adapter->state))
+		e1000_power_down_phy(adapter);
+
+	e1000_get_phy_info(hw);
+
+	if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) &&
+	    !(adapter->flags & FLAG_SMART_POWER_DOWN)) {
+		u16 phy_data = 0;
+		/* speed up time to link by disabling smart power down, ignore
+		 * the return value of this function because there is nothing
+		 * different we would do if it failed
+		 */
+		e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data);
+		phy_data &= ~IGP02E1000_PM_SPD;
+		e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, phy_data);
+	}
+	if (hw->mac.type >= e1000_pch_spt && adapter->int_mode == 0) {
+		u32 reg;
+
+		/* Fextnvm7 @ 0xe4[2] = 1 */
+		reg = er32(FEXTNVM7);
+		reg |= E1000_FEXTNVM7_SIDE_CLK_UNGATE;
+		ew32(FEXTNVM7, reg);
+		/* Fextnvm9 @ 0x5bb4[13:12] = 11 */
+		reg = er32(FEXTNVM9);
+		reg |= E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS |
+		       E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS;
+		ew32(FEXTNVM9, reg);
+	}
+
+}
+
+/**
+ * e1000e_trigger_lsc - trigger an LSC interrupt
+ * @adapter: board private structure
+ *
+ * Fire a link status change interrupt to start the watchdog.
+ **/
+static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries)
+		ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER);
+	else
+		ew32(ICS, E1000_ICS_LSC);
+}
+
+void e1000e_up(struct e1000_adapter *adapter)
+{
+	/* hardware has been reset, we need to reload some things */
+	e1000_configure(adapter);
+
+	clear_bit(__E1000_DOWN, &adapter->state);
+
+	if (adapter->msix_entries)
+		e1000_configure_msix(adapter);
+	e1000_irq_enable(adapter);
+
+	/* Tx queue started by watchdog timer when link is up */
+
+	e1000e_trigger_lsc(adapter);
+}
+
+static void e1000e_flush_descriptors(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (!(adapter->flags2 & FLAG2_DMA_BURST))
+		return;
+
+	/* flush pending descriptor writebacks to memory */
+	ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
+	ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
+
+	/* execute the writes immediately */
+	e1e_flush();
+
+	/* due to rare timing issues, write to TIDV/RDTR again to ensure the
+	 * write is successful
+	 */
+	ew32(TIDV, adapter->tx_int_delay | E1000_TIDV_FPD);
+	ew32(RDTR, adapter->rx_int_delay | E1000_RDTR_FPD);
+
+	/* execute the writes immediately */
+	e1e_flush();
+}
+
+static void e1000e_update_stats(struct e1000_adapter *adapter);
+
+/**
+ * e1000e_down - quiesce the device and optionally reset the hardware
+ * @adapter: board private structure
+ * @reset: boolean flag to reset the hardware or not
+ */
+void e1000e_down(struct e1000_adapter *adapter, bool reset)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tctl, rctl;
+
+	/* signal that we're down so the interrupt handler does not
+	 * reschedule our watchdog timer
+	 */
+	set_bit(__E1000_DOWN, &adapter->state);
+
+	netif_carrier_off(netdev);
+
+	/* disable receives in the hardware */
+	rctl = er32(RCTL);
+	if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX))
+		ew32(RCTL, rctl & ~E1000_RCTL_EN);
+	/* flush and sleep below */
+
+	netif_stop_queue(netdev);
+
+	/* disable transmits in the hardware */
+	tctl = er32(TCTL);
+	tctl &= ~E1000_TCTL_EN;
+	ew32(TCTL, tctl);
+
+	/* flush both disables and wait for them to finish */
+	e1e_flush();
+	usleep_range(10000, 11000);
+
+	e1000_irq_disable(adapter);
+
+	napi_synchronize(&adapter->napi);
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	spin_lock(&adapter->stats64_lock);
+	e1000e_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	e1000e_flush_descriptors(adapter);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+	/* Disable Si errata workaround on PCHx for jumbo frame flow */
+	if ((hw->mac.type >= e1000_pch2lan) &&
+	    (adapter->netdev->mtu > ETH_DATA_LEN) &&
+	    e1000_lv_jumbo_workaround_ich8lan(hw, false))
+		e_dbg("failed to disable jumbo frame workaround mode\n");
+
+	if (!pci_channel_offline(adapter->pdev)) {
+		if (reset)
+			e1000e_reset(adapter);
+		else if (hw->mac.type >= e1000_pch_spt)
+			e1000_flush_desc_rings(adapter);
+	}
+	e1000_clean_tx_ring(adapter->tx_ring);
+	e1000_clean_rx_ring(adapter->rx_ring);
+}
+
+void e1000e_reinit_locked(struct e1000_adapter *adapter)
+{
+	might_sleep();
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
+		usleep_range(1000, 1100);
+	e1000e_down(adapter, true);
+	e1000e_up(adapter);
+	clear_bit(__E1000_RESETTING, &adapter->state);
+}
+
+/**
+ * e1000e_sanitize_systim - sanitize raw cycle counter reads
+ * @hw: pointer to the HW structure
+ * @systim: PHC time value read, sanitized and returned
+ * @sts: structure to hold system time before and after reading SYSTIML,
+ * may be NULL
+ *
+ * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
+ * check to see that the time is incrementing at a reasonable
+ * rate and is a multiple of incvalue.
+ **/
+static u64 e1000e_sanitize_systim(struct e1000_hw *hw, u64 systim,
+				  struct ptp_system_timestamp *sts)
+{
+	u64 time_delta, rem, temp;
+	u64 systim_next;
+	u32 incvalue;
+	int i;
+
+	incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
+	for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
+		/* latch SYSTIMH on read of SYSTIML */
+		ptp_read_system_prets(sts);
+		systim_next = (u64)er32(SYSTIML);
+		ptp_read_system_postts(sts);
+		systim_next |= (u64)er32(SYSTIMH) << 32;
+
+		time_delta = systim_next - systim;
+		temp = time_delta;
+		/* VMWare users have seen incvalue of zero, don't div / 0 */
+		rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
+
+		systim = systim_next;
+
+		if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0))
+			break;
+	}
+
+	return systim;
+}
+
+/**
+ * e1000e_read_systim - read SYSTIM register
+ * @adapter: board private structure
+ * @sts: structure which will contain system time before and after reading
+ * SYSTIML, may be NULL
+ **/
+u64 e1000e_read_systim(struct e1000_adapter *adapter,
+		       struct ptp_system_timestamp *sts)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 systimel, systimel_2, systimeh;
+	u64 systim;
+	/* SYSTIMH latching upon SYSTIML read does not work well.
+	 * This means that if SYSTIML overflows after we read it but before
+	 * we read SYSTIMH, the value of SYSTIMH has been incremented and we
+	 * will experience a huge non linear increment in the systime value
+	 * to fix that we test for overflow and if true, we re-read systime.
+	 */
+	ptp_read_system_prets(sts);
+	systimel = er32(SYSTIML);
+	ptp_read_system_postts(sts);
+	systimeh = er32(SYSTIMH);
+	/* Is systimel is so large that overflow is possible? */
+	if (systimel >= (u32)0xffffffff - E1000_TIMINCA_INCVALUE_MASK) {
+		ptp_read_system_prets(sts);
+		systimel_2 = er32(SYSTIML);
+		ptp_read_system_postts(sts);
+		if (systimel > systimel_2) {
+			/* There was an overflow, read again SYSTIMH, and use
+			 * systimel_2
+			 */
+			systimeh = er32(SYSTIMH);
+			systimel = systimel_2;
+		}
+	}
+	systim = (u64)systimel;
+	systim |= (u64)systimeh << 32;
+
+	if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
+		systim = e1000e_sanitize_systim(hw, systim, sts);
+
+	return systim;
+}
+
+/**
+ * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
+ * @cc: cyclecounter structure
+ **/
+static u64 e1000e_cyclecounter_read(const struct cyclecounter *cc)
+{
+	struct e1000_adapter *adapter = container_of(cc, struct e1000_adapter,
+						     cc);
+
+	return e1000e_read_systim(adapter, NULL);
+}
+
+/**
+ * e1000_sw_init - Initialize general software structures (struct e1000_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * e1000_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int e1000_sw_init(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
+	adapter->rx_ps_bsize0 = 128;
+	adapter->max_frame_size = netdev->mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
+	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+	adapter->tx_ring_count = E1000_DEFAULT_TXD;
+	adapter->rx_ring_count = E1000_DEFAULT_RXD;
+
+	spin_lock_init(&adapter->stats64_lock);
+
+	e1000e_set_interrupt_capability(adapter);
+
+	if (e1000_alloc_queues(adapter))
+		return -ENOMEM;
+
+	/* Setup hardware time stamping cyclecounter */
+	if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
+		adapter->cc.read = e1000e_cyclecounter_read;
+		adapter->cc.mask = CYCLECOUNTER_MASK(64);
+		adapter->cc.mult = 1;
+		/* cc.shift set in e1000e_get_base_tininca() */
+
+		spin_lock_init(&adapter->systim_lock);
+		INIT_WORK(&adapter->tx_hwtstamp_work, e1000e_tx_hwtstamp_work);
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	e1000_irq_disable(adapter);
+
+	set_bit(__E1000_DOWN, &adapter->state);
+	return 0;
+}
+
+/**
+ * e1000_intr_msi_test - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t e1000_intr_msi_test(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 icr = er32(ICR);
+
+	e_dbg("icr is %08X\n", icr);
+	if (icr & E1000_ICR_RXSEQ) {
+		adapter->flags &= ~FLAG_MSI_TEST_FAILED;
+		/* Force memory writes to complete before acknowledging the
+		 * interrupt is handled.
+		 */
+		wmb();
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * e1000_test_msi_interrupt - Returns 0 for successful test
+ * @adapter: board private struct
+ *
+ * code flow taken from tg3.c
+ **/
+static int e1000_test_msi_interrupt(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	/* poll_enable hasn't been called yet, so don't need disable */
+	/* clear any pending events */
+	er32(ICR);
+
+	/* free the real vector and request a test handler */
+	e1000_free_irq(adapter);
+	e1000e_reset_interrupt_capability(adapter);
+
+	/* Assume that the test fails, if it succeeds then the test
+	 * MSI irq handler will unset this flag
+	 */
+	adapter->flags |= FLAG_MSI_TEST_FAILED;
+
+	err = pci_enable_msi(adapter->pdev);
+	if (err)
+		goto msi_test_failed;
+
+	err = request_irq(adapter->pdev->irq, e1000_intr_msi_test, 0,
+			  netdev->name, netdev);
+	if (err) {
+		pci_disable_msi(adapter->pdev);
+		goto msi_test_failed;
+	}
+
+	/* Force memory writes to complete before enabling and firing an
+	 * interrupt.
+	 */
+	wmb();
+
+	e1000_irq_enable(adapter);
+
+	/* fire an unusual interrupt on the test handler */
+	ew32(ICS, E1000_ICS_RXSEQ);
+	e1e_flush();
+	msleep(100);
+
+	e1000_irq_disable(adapter);
+
+	rmb();			/* read flags after interrupt has been fired */
+
+	if (adapter->flags & FLAG_MSI_TEST_FAILED) {
+		adapter->int_mode = E1000E_INT_MODE_LEGACY;
+		e_info("MSI interrupt test failed, using legacy interrupt.\n");
+	} else {
+		e_dbg("MSI interrupt test succeeded!\n");
+	}
+
+	free_irq(adapter->pdev->irq, netdev);
+	pci_disable_msi(adapter->pdev);
+
+msi_test_failed:
+	e1000e_set_interrupt_capability(adapter);
+	return e1000_request_irq(adapter);
+}
+
+/**
+ * e1000_test_msi - Returns 0 if MSI test succeeds or INTx mode is restored
+ * @adapter: board private struct
+ *
+ * code flow taken from tg3.c, called with e1000 interrupts disabled.
+ **/
+static int e1000_test_msi(struct e1000_adapter *adapter)
+{
+	int err;
+	u16 pci_cmd;
+
+	if (!(adapter->flags & FLAG_MSI_ENABLED))
+		return 0;
+
+	/* disable SERR in case the MSI write causes a master abort */
+	pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd);
+	if (pci_cmd & PCI_COMMAND_SERR)
+		pci_write_config_word(adapter->pdev, PCI_COMMAND,
+				      pci_cmd & ~PCI_COMMAND_SERR);
+
+	err = e1000_test_msi_interrupt(adapter);
+
+	/* re-enable SERR */
+	if (pci_cmd & PCI_COMMAND_SERR) {
+		pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd);
+		pci_cmd |= PCI_COMMAND_SERR;
+		pci_write_config_word(adapter->pdev, PCI_COMMAND, pci_cmd);
+	}
+
+	return err;
+}
+
+/**
+ * e1000e_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+int e1000e_open(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+
+	/* disallow open during test */
+	if (test_bit(__E1000_TESTING, &adapter->state))
+		return -EBUSY;
+
+	pm_runtime_get_sync(&pdev->dev);
+
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+
+	/* allocate transmit descriptors */
+	err = e1000e_setup_tx_resources(adapter->tx_ring);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = e1000e_setup_rx_resources(adapter->rx_ring);
+	if (err)
+		goto err_setup_rx;
+
+	/* If AMT is enabled, let the firmware know that the network
+	 * interface is now open and reset the part to a known state.
+	 */
+	if (adapter->flags & FLAG_HAS_AMT) {
+		e1000e_get_hw_control(adapter);
+		e1000e_reset(adapter);
+	}
+
+	e1000e_power_up_phy(adapter);
+
+	adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
+	if ((adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
+		e1000_update_mng_vlan(adapter);
+
+	/* DMA latency requirement to workaround jumbo issue */
+	cpu_latency_qos_add_request(&adapter->pm_qos_req, PM_QOS_DEFAULT_VALUE);
+
+	/* before we allocate an interrupt, we must be ready to handle it.
+	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+	 * as soon as we call pci_request_irq, so we have to setup our
+	 * clean_rx handler before we do so.
+	 */
+	e1000_configure(adapter);
+
+	err = e1000_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Work around PCIe errata with MSI interrupts causing some chipsets to
+	 * ignore e1000e MSI messages, which means we need to test our MSI
+	 * interrupt now
+	 */
+	if (adapter->int_mode != E1000E_INT_MODE_LEGACY) {
+		err = e1000_test_msi(adapter);
+		if (err) {
+			e_err("Interrupt allocation failed\n");
+			goto err_req_irq;
+		}
+	}
+
+	/* From here on the code is the same as e1000e_up() */
+	clear_bit(__E1000_DOWN, &adapter->state);
+
+	napi_enable(&adapter->napi);
+
+	e1000_irq_enable(adapter);
+
+	adapter->tx_hang_recheck = false;
+
+	hw->mac.get_link_status = true;
+	pm_runtime_put(&pdev->dev);
+
+	e1000e_trigger_lsc(adapter);
+
+	return 0;
+
+err_req_irq:
+	cpu_latency_qos_remove_request(&adapter->pm_qos_req);
+	e1000e_release_hw_control(adapter);
+	e1000_power_down_phy(adapter);
+	e1000e_free_rx_resources(adapter->rx_ring);
+err_setup_rx:
+	e1000e_free_tx_resources(adapter->tx_ring);
+err_setup_tx:
+	e1000e_reset(adapter);
+	pm_runtime_put_sync(&pdev->dev);
+
+	return err;
+}
+
+/**
+ * e1000e_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+int e1000e_close(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+	int count = E1000_CHECK_RESET_COUNT;
+
+	while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
+		usleep_range(10000, 11000);
+
+	WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
+
+	pm_runtime_get_sync(&pdev->dev);
+
+	if (netif_device_present(netdev)) {
+		e1000e_down(adapter, true);
+		e1000_free_irq(adapter);
+
+		/* Link status message must follow this format */
+		netdev_info(netdev, "NIC Link is Down\n");
+	}
+
+	napi_disable(&adapter->napi);
+
+	e1000e_free_tx_resources(adapter->tx_ring);
+	e1000e_free_rx_resources(adapter->rx_ring);
+
+	/* kill manageability vlan ID if supported, but not if a vlan with
+	 * the same ID is registered on the host OS (let 8021q kill it)
+	 */
+	if (adapter->hw.mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN)
+		e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q),
+				       adapter->mng_vlan_id);
+
+	/* If AMT is enabled, let the firmware know that the network
+	 * interface is now closed
+	 */
+	if ((adapter->flags & FLAG_HAS_AMT) &&
+	    !test_bit(__E1000_TESTING, &adapter->state))
+		e1000e_release_hw_control(adapter);
+
+	cpu_latency_qos_remove_request(&adapter->pm_qos_req);
+
+	pm_runtime_put_sync(&pdev->dev);
+
+	return 0;
+}
+
+/**
+ * e1000_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int e1000_set_mac(struct net_device *netdev, void *p)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	eth_hw_addr_set(netdev, addr->sa_data);
+	memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len);
+
+	hw->mac.ops.rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
+
+	if (adapter->flags & FLAG_RESET_OVERWRITES_LAA) {
+		/* activate the work around */
+		e1000e_set_laa_state_82571(&adapter->hw, 1);
+
+		/* Hold a copy of the LAA in RAR[14] This is done so that
+		 * between the time RAR[0] gets clobbered  and the time it
+		 * gets fixed (in e1000_watchdog), the actual LAA is in one
+		 * of the RARs and no incoming packets directed to this port
+		 * are dropped. Eventually the LAA will be in RAR[0] and
+		 * RAR[14]
+		 */
+		hw->mac.ops.rar_set(&adapter->hw, adapter->hw.mac.addr,
+				    adapter->hw.mac.rar_entry_count - 1);
+	}
+
+	return 0;
+}
+
+/**
+ * e1000e_update_phy_task - work thread to update phy
+ * @work: pointer to our work struct
+ *
+ * this worker thread exists because we must acquire a
+ * semaphore to read the phy, which we could msleep while
+ * waiting for it, and we can't msleep in a timer.
+ **/
+static void e1000e_update_phy_task(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work,
+						     struct e1000_adapter,
+						     update_phy_task);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (test_bit(__E1000_DOWN, &adapter->state))
+		return;
+
+	e1000_get_phy_info(hw);
+
+	/* Enable EEE on 82579 after link up */
+	if (hw->phy.type >= e1000_phy_82579)
+		e1000_set_eee_pchlan(hw);
+}
+
+/**
+ * e1000_update_phy_info - timre call-back to update PHY info
+ * @t: pointer to timer_list containing private info adapter
+ *
+ * Need to wait a few seconds after link up to get diagnostic information from
+ * the phy
+ **/
+static void e1000_update_phy_info(struct timer_list *t)
+{
+	struct e1000_adapter *adapter = from_timer(adapter, t, phy_info_timer);
+
+	if (test_bit(__E1000_DOWN, &adapter->state))
+		return;
+
+	schedule_work(&adapter->update_phy_task);
+}
+
+/**
+ * e1000e_update_phy_stats - Update the PHY statistics counters
+ * @adapter: board private structure
+ *
+ * Read/clear the upper 16-bit PHY registers and read/accumulate lower
+ **/
+static void e1000e_update_phy_stats(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	s32 ret_val;
+	u16 phy_data;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return;
+
+	/* A page set is expensive so check if already on desired page.
+	 * If not, set to the page with the PHY status registers.
+	 */
+	hw->phy.addr = 1;
+	ret_val = e1000e_read_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
+					   &phy_data);
+	if (ret_val)
+		goto release;
+	if (phy_data != (HV_STATS_PAGE << IGP_PAGE_SHIFT)) {
+		ret_val = hw->phy.ops.set_page(hw,
+					       HV_STATS_PAGE << IGP_PAGE_SHIFT);
+		if (ret_val)
+			goto release;
+	}
+
+	/* Single Collision Count */
+	hw->phy.ops.read_reg_page(hw, HV_SCC_UPPER, &phy_data);
+	ret_val = hw->phy.ops.read_reg_page(hw, HV_SCC_LOWER, &phy_data);
+	if (!ret_val)
+		adapter->stats.scc += phy_data;
+
+	/* Excessive Collision Count */
+	hw->phy.ops.read_reg_page(hw, HV_ECOL_UPPER, &phy_data);
+	ret_val = hw->phy.ops.read_reg_page(hw, HV_ECOL_LOWER, &phy_data);
+	if (!ret_val)
+		adapter->stats.ecol += phy_data;
+
+	/* Multiple Collision Count */
+	hw->phy.ops.read_reg_page(hw, HV_MCC_UPPER, &phy_data);
+	ret_val = hw->phy.ops.read_reg_page(hw, HV_MCC_LOWER, &phy_data);
+	if (!ret_val)
+		adapter->stats.mcc += phy_data;
+
+	/* Late Collision Count */
+	hw->phy.ops.read_reg_page(hw, HV_LATECOL_UPPER, &phy_data);
+	ret_val = hw->phy.ops.read_reg_page(hw, HV_LATECOL_LOWER, &phy_data);
+	if (!ret_val)
+		adapter->stats.latecol += phy_data;
+
+	/* Collision Count - also used for adaptive IFS */
+	hw->phy.ops.read_reg_page(hw, HV_COLC_UPPER, &phy_data);
+	ret_val = hw->phy.ops.read_reg_page(hw, HV_COLC_LOWER, &phy_data);
+	if (!ret_val)
+		hw->mac.collision_delta = phy_data;
+
+	/* Defer Count */
+	hw->phy.ops.read_reg_page(hw, HV_DC_UPPER, &phy_data);
+	ret_val = hw->phy.ops.read_reg_page(hw, HV_DC_LOWER, &phy_data);
+	if (!ret_val)
+		adapter->stats.dc += phy_data;
+
+	/* Transmit with no CRS */
+	hw->phy.ops.read_reg_page(hw, HV_TNCRS_UPPER, &phy_data);
+	ret_val = hw->phy.ops.read_reg_page(hw, HV_TNCRS_LOWER, &phy_data);
+	if (!ret_val)
+		adapter->stats.tncrs += phy_data;
+
+release:
+	hw->phy.ops.release(hw);
+}
+
+/**
+ * e1000e_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ **/
+static void e1000e_update_stats(struct e1000_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* Prevent stats update while adapter is being reset, or if the pci
+	 * connection is down.
+	 */
+	if (adapter->link_speed == 0)
+		return;
+	if (pci_channel_offline(pdev))
+		return;
+
+	adapter->stats.crcerrs += er32(CRCERRS);
+	adapter->stats.gprc += er32(GPRC);
+	adapter->stats.gorc += er32(GORCL);
+	er32(GORCH);		/* Clear gorc */
+	adapter->stats.bprc += er32(BPRC);
+	adapter->stats.mprc += er32(MPRC);
+	adapter->stats.roc += er32(ROC);
+
+	adapter->stats.mpc += er32(MPC);
+
+	/* Half-duplex statistics */
+	if (adapter->link_duplex == HALF_DUPLEX) {
+		if (adapter->flags2 & FLAG2_HAS_PHY_STATS) {
+			e1000e_update_phy_stats(adapter);
+		} else {
+			adapter->stats.scc += er32(SCC);
+			adapter->stats.ecol += er32(ECOL);
+			adapter->stats.mcc += er32(MCC);
+			adapter->stats.latecol += er32(LATECOL);
+			adapter->stats.dc += er32(DC);
+
+			hw->mac.collision_delta = er32(COLC);
+
+			if ((hw->mac.type != e1000_82574) &&
+			    (hw->mac.type != e1000_82583))
+				adapter->stats.tncrs += er32(TNCRS);
+		}
+		adapter->stats.colc += hw->mac.collision_delta;
+	}
+
+	adapter->stats.xonrxc += er32(XONRXC);
+	adapter->stats.xontxc += er32(XONTXC);
+	adapter->stats.xoffrxc += er32(XOFFRXC);
+	adapter->stats.xofftxc += er32(XOFFTXC);
+	adapter->stats.gptc += er32(GPTC);
+	adapter->stats.gotc += er32(GOTCL);
+	er32(GOTCH);		/* Clear gotc */
+	adapter->stats.rnbc += er32(RNBC);
+	adapter->stats.ruc += er32(RUC);
+
+	adapter->stats.mptc += er32(MPTC);
+	adapter->stats.bptc += er32(BPTC);
+
+	/* used for adaptive IFS */
+
+	hw->mac.tx_packet_delta = er32(TPT);
+	adapter->stats.tpt += hw->mac.tx_packet_delta;
+
+	adapter->stats.algnerrc += er32(ALGNERRC);
+	adapter->stats.rxerrc += er32(RXERRC);
+	adapter->stats.cexterr += er32(CEXTERR);
+	adapter->stats.tsctc += er32(TSCTC);
+	adapter->stats.tsctfc += er32(TSCTFC);
+
+	/* Fill out the OS statistics structure */
+	netdev->stats.multicast = adapter->stats.mprc;
+	netdev->stats.collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	/* RLEC on some newer hardware can be incorrect so build
+	 * our own version based on RUC and ROC
+	 */
+	netdev->stats.rx_errors = adapter->stats.rxerrc +
+	    adapter->stats.crcerrs + adapter->stats.algnerrc +
+	    adapter->stats.ruc + adapter->stats.roc + adapter->stats.cexterr;
+	netdev->stats.rx_length_errors = adapter->stats.ruc +
+	    adapter->stats.roc;
+	netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
+	netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
+	netdev->stats.rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+	netdev->stats.tx_errors = adapter->stats.ecol + adapter->stats.latecol;
+	netdev->stats.tx_aborted_errors = adapter->stats.ecol;
+	netdev->stats.tx_window_errors = adapter->stats.latecol;
+	netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
+
+	/* Tx Dropped needs to be maintained elsewhere */
+
+	/* Management Stats */
+	adapter->stats.mgptc += er32(MGTPTC);
+	adapter->stats.mgprc += er32(MGTPRC);
+	adapter->stats.mgpdc += er32(MGTPDC);
+
+	/* Correctable ECC Errors */
+	if (hw->mac.type >= e1000_pch_lpt) {
+		u32 pbeccsts = er32(PBECCSTS);
+
+		adapter->corr_errors +=
+		    pbeccsts & E1000_PBECCSTS_CORR_ERR_CNT_MASK;
+		adapter->uncorr_errors +=
+		    (pbeccsts & E1000_PBECCSTS_UNCORR_ERR_CNT_MASK) >>
+		    E1000_PBECCSTS_UNCORR_ERR_CNT_SHIFT;
+	}
+}
+
+/**
+ * e1000_phy_read_status - Update the PHY register status snapshot
+ * @adapter: board private structure
+ **/
+static void e1000_phy_read_status(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_phy_regs *phy = &adapter->phy_regs;
+
+	if (!pm_runtime_suspended((&adapter->pdev->dev)->parent) &&
+	    (er32(STATUS) & E1000_STATUS_LU) &&
+	    (adapter->hw.phy.media_type == e1000_media_type_copper)) {
+		int ret_val;
+
+		ret_val = e1e_rphy(hw, MII_BMCR, &phy->bmcr);
+		ret_val |= e1e_rphy(hw, MII_BMSR, &phy->bmsr);
+		ret_val |= e1e_rphy(hw, MII_ADVERTISE, &phy->advertise);
+		ret_val |= e1e_rphy(hw, MII_LPA, &phy->lpa);
+		ret_val |= e1e_rphy(hw, MII_EXPANSION, &phy->expansion);
+		ret_val |= e1e_rphy(hw, MII_CTRL1000, &phy->ctrl1000);
+		ret_val |= e1e_rphy(hw, MII_STAT1000, &phy->stat1000);
+		ret_val |= e1e_rphy(hw, MII_ESTATUS, &phy->estatus);
+		if (ret_val)
+			e_warn("Error reading PHY register\n");
+	} else {
+		/* Do not read PHY registers if link is not up
+		 * Set values to typical power-on defaults
+		 */
+		phy->bmcr = (BMCR_SPEED1000 | BMCR_ANENABLE | BMCR_FULLDPLX);
+		phy->bmsr = (BMSR_100FULL | BMSR_100HALF | BMSR_10FULL |
+			     BMSR_10HALF | BMSR_ESTATEN | BMSR_ANEGCAPABLE |
+			     BMSR_ERCAP);
+		phy->advertise = (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP |
+				  ADVERTISE_ALL | ADVERTISE_CSMA);
+		phy->lpa = 0;
+		phy->expansion = EXPANSION_ENABLENPAGE;
+		phy->ctrl1000 = ADVERTISE_1000FULL;
+		phy->stat1000 = 0;
+		phy->estatus = (ESTATUS_1000_TFULL | ESTATUS_1000_THALF);
+	}
+}
+
+static void e1000_print_link_info(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl = er32(CTRL);
+
+	/* Link status message must follow this format for user tools */
+	netdev_info(adapter->netdev,
+		    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+		    adapter->link_speed,
+		    adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half",
+		    (ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE) ? "Rx/Tx" :
+		    (ctrl & E1000_CTRL_RFCE) ? "Rx" :
+		    (ctrl & E1000_CTRL_TFCE) ? "Tx" : "None");
+}
+
+static bool e1000e_has_link(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	bool link_active = false;
+	s32 ret_val = 0;
+
+	/* get_link_status is set on LSC (link status) interrupt or
+	 * Rx sequence error interrupt.  get_link_status will stay
+	 * true until the check_for_link establishes link
+	 * for copper adapters ONLY
+	 */
+	switch (hw->phy.media_type) {
+	case e1000_media_type_copper:
+		if (hw->mac.get_link_status) {
+			ret_val = hw->mac.ops.check_for_link(hw);
+			link_active = !hw->mac.get_link_status;
+		} else {
+			link_active = true;
+		}
+		break;
+	case e1000_media_type_fiber:
+		ret_val = hw->mac.ops.check_for_link(hw);
+		link_active = !!(er32(STATUS) & E1000_STATUS_LU);
+		break;
+	case e1000_media_type_internal_serdes:
+		ret_val = hw->mac.ops.check_for_link(hw);
+		link_active = hw->mac.serdes_has_link;
+		break;
+	default:
+	case e1000_media_type_unknown:
+		break;
+	}
+
+	if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
+	    (er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
+		/* See e1000_kmrn_lock_loss_workaround_ich8lan() */
+		e_info("Gigabit has been disabled, downgrading speed\n");
+	}
+
+	return link_active;
+}
+
+static void e1000e_enable_receives(struct e1000_adapter *adapter)
+{
+	/* make sure the receive unit is started */
+	if ((adapter->flags & FLAG_RX_NEEDS_RESTART) &&
+	    (adapter->flags & FLAG_RESTART_NOW)) {
+		struct e1000_hw *hw = &adapter->hw;
+		u32 rctl = er32(RCTL);
+
+		ew32(RCTL, rctl | E1000_RCTL_EN);
+		adapter->flags &= ~FLAG_RESTART_NOW;
+	}
+}
+
+static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* With 82574 controllers, PHY needs to be checked periodically
+	 * for hung state and reset, if two calls return true
+	 */
+	if (e1000_check_phy_82574(hw))
+		adapter->phy_hang_count++;
+	else
+		adapter->phy_hang_count = 0;
+
+	if (adapter->phy_hang_count > 1) {
+		adapter->phy_hang_count = 0;
+		e_dbg("PHY appears hung - resetting\n");
+		schedule_work(&adapter->reset_task);
+	}
+}
+
+/**
+ * e1000_watchdog - Timer Call-back
+ * @t: pointer to timer_list containing private info adapter
+ **/
+static void e1000_watchdog(struct timer_list *t)
+{
+	struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
+
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->watchdog_task);
+
+	/* TODO: make this use queue_delayed_work() */
+}
+
+static void e1000_watchdog_task(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work,
+						     struct e1000_adapter,
+						     watchdog_task);
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+	struct e1000_phy_info *phy = &adapter->hw.phy;
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+	u32 dmoff_exit_timeout = 100, tries = 0;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 link, tctl, pcim_state;
+
+	if (test_bit(__E1000_DOWN, &adapter->state))
+		return;
+
+	link = e1000e_has_link(adapter);
+	if ((netif_carrier_ok(netdev)) && link) {
+		/* Cancel scheduled suspend requests. */
+		pm_runtime_resume(netdev->dev.parent);
+
+		e1000e_enable_receives(adapter);
+		goto link_up;
+	}
+
+	if ((e1000e_enable_tx_pkt_filtering(hw)) &&
+	    (adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id))
+		e1000_update_mng_vlan(adapter);
+
+	if (link) {
+		if (!netif_carrier_ok(netdev)) {
+			bool txb2b = true;
+
+			/* Cancel scheduled suspend requests. */
+			pm_runtime_resume(netdev->dev.parent);
+
+			/* Checking if MAC is in DMoff state*/
+			if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) {
+				pcim_state = er32(STATUS);
+				while (pcim_state & E1000_STATUS_PCIM_STATE) {
+					if (tries++ == dmoff_exit_timeout) {
+						e_dbg("Error in exiting dmoff\n");
+						break;
+					}
+					usleep_range(10000, 20000);
+					pcim_state = er32(STATUS);
+
+					/* Checking if MAC exited DMoff state */
+					if (!(pcim_state & E1000_STATUS_PCIM_STATE))
+						e1000_phy_hw_reset(&adapter->hw);
+				}
+			}
+
+			/* update snapshot of PHY registers on LSC */
+			e1000_phy_read_status(adapter);
+			mac->ops.get_link_up_info(&adapter->hw,
+						  &adapter->link_speed,
+						  &adapter->link_duplex);
+			e1000_print_link_info(adapter);
+
+			/* check if SmartSpeed worked */
+			e1000e_check_downshift(hw);
+			if (phy->speed_downgraded)
+				netdev_warn(netdev,
+					    "Link Speed was downgraded by SmartSpeed\n");
+
+			/* On supported PHYs, check for duplex mismatch only
+			 * if link has autonegotiated at 10/100 half
+			 */
+			if ((hw->phy.type == e1000_phy_igp_3 ||
+			     hw->phy.type == e1000_phy_bm) &&
+			    hw->mac.autoneg &&
+			    (adapter->link_speed == SPEED_10 ||
+			     adapter->link_speed == SPEED_100) &&
+			    (adapter->link_duplex == HALF_DUPLEX)) {
+				u16 autoneg_exp;
+
+				e1e_rphy(hw, MII_EXPANSION, &autoneg_exp);
+
+				if (!(autoneg_exp & EXPANSION_NWAY))
+					e_info("Autonegotiated half duplex but link partner cannot autoneg.  Try forcing full duplex if link gets many collisions.\n");
+			}
+
+			/* adjust timeout factor according to speed/duplex */
+			adapter->tx_timeout_factor = 1;
+			switch (adapter->link_speed) {
+			case SPEED_10:
+				txb2b = false;
+				adapter->tx_timeout_factor = 16;
+				break;
+			case SPEED_100:
+				txb2b = false;
+				adapter->tx_timeout_factor = 10;
+				break;
+			}
+
+			/* workaround: re-program speed mode bit after
+			 * link-up event
+			 */
+			if ((adapter->flags & FLAG_TARC_SPEED_MODE_BIT) &&
+			    !txb2b) {
+				u32 tarc0;
+
+				tarc0 = er32(TARC(0));
+				tarc0 &= ~SPEED_MODE_BIT;
+				ew32(TARC(0), tarc0);
+			}
+
+			/* enable transmits in the hardware, need to do this
+			 * after setting TARC(0)
+			 */
+			tctl = er32(TCTL);
+			tctl |= E1000_TCTL_EN;
+			ew32(TCTL, tctl);
+
+			/* Perform any post-link-up configuration before
+			 * reporting link up.
+			 */
+			if (phy->ops.cfg_on_link_up)
+				phy->ops.cfg_on_link_up(hw);
+
+			netif_wake_queue(netdev);
+			netif_carrier_on(netdev);
+
+			if (!test_bit(__E1000_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+			/* Link status message must follow this format */
+			netdev_info(netdev, "NIC Link is Down\n");
+			netif_carrier_off(netdev);
+			netif_stop_queue(netdev);
+			if (!test_bit(__E1000_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+
+			/* 8000ES2LAN requires a Rx packet buffer work-around
+			 * on link down event; reset the controller to flush
+			 * the Rx packet buffer.
+			 */
+			if (adapter->flags & FLAG_RX_NEEDS_RESTART)
+				adapter->flags |= FLAG_RESTART_NOW;
+			else
+				pm_schedule_suspend(netdev->dev.parent,
+						    LINK_TIMEOUT);
+		}
+	}
+
+link_up:
+	spin_lock(&adapter->stats64_lock);
+	e1000e_update_stats(adapter);
+
+	mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
+	adapter->tpt_old = adapter->stats.tpt;
+	mac->collision_delta = adapter->stats.colc - adapter->colc_old;
+	adapter->colc_old = adapter->stats.colc;
+
+	adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
+	adapter->gorc_old = adapter->stats.gorc;
+	adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
+	adapter->gotc_old = adapter->stats.gotc;
+	spin_unlock(&adapter->stats64_lock);
+
+	/* If the link is lost the controller stops DMA, but
+	 * if there is queued Tx work it cannot be done.  So
+	 * reset the controller to flush the Tx packet buffers.
+	 */
+	if (!netif_carrier_ok(netdev) &&
+	    (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
+		adapter->flags |= FLAG_RESTART_NOW;
+
+	/* If reset is necessary, do it outside of interrupt context. */
+	if (adapter->flags & FLAG_RESTART_NOW) {
+		schedule_work(&adapter->reset_task);
+		/* return immediately since reset is imminent */
+		return;
+	}
+
+	e1000e_update_adaptive(&adapter->hw);
+
+	/* Simple mode for Interrupt Throttle Rate (ITR) */
+	if (adapter->itr_setting == 4) {
+		/* Symmetric Tx/Rx gets a reduced ITR=2000;
+		 * Total asymmetrical Tx or Rx gets ITR=8000;
+		 * everyone else is between 2000-8000.
+		 */
+		u32 goc = (adapter->gotc + adapter->gorc) / 10000;
+		u32 dif = (adapter->gotc > adapter->gorc ?
+			   adapter->gotc - adapter->gorc :
+			   adapter->gorc - adapter->gotc) / 10000;
+		u32 itr = goc > 0 ? (dif * 6000 / goc + 2000) : 8000;
+
+		e1000e_write_itr(adapter, itr);
+	}
+
+	/* Cause software interrupt to ensure Rx ring is cleaned */
+	if (adapter->msix_entries)
+		ew32(ICS, adapter->rx_ring->ims_val);
+	else
+		ew32(ICS, E1000_ICS_RXDMT0);
+
+	/* flush pending descriptors to memory before detecting Tx hang */
+	e1000e_flush_descriptors(adapter);
+
+	/* Force detection of hung controller every watchdog period */
+	adapter->detect_tx_hung = true;
+
+	/* With 82571 controllers, LAA may be overwritten due to controller
+	 * reset from the other port. Set the appropriate LAA in RAR[0]
+	 */
+	if (e1000e_get_laa_state_82571(hw))
+		hw->mac.ops.rar_set(hw, adapter->hw.mac.addr, 0);
+
+	if (adapter->flags2 & FLAG2_CHECK_PHY_HANG)
+		e1000e_check_82574_phy_workaround(adapter);
+
+	/* Clear valid timestamp stuck in RXSTMPL/H due to a Rx error */
+	if (adapter->hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) {
+		if ((adapter->flags2 & FLAG2_CHECK_RX_HWTSTAMP) &&
+		    (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) {
+			er32(RXSTMPH);
+			adapter->rx_hwtstamp_cleared++;
+		} else {
+			adapter->flags2 |= FLAG2_CHECK_RX_HWTSTAMP;
+		}
+	}
+
+	/* Reset the timer */
+	if (!test_bit(__E1000_DOWN, &adapter->state))
+		mod_timer(&adapter->watchdog_timer,
+			  round_jiffies(jiffies + 2 * HZ));
+}
+
+#define E1000_TX_FLAGS_CSUM		0x00000001
+#define E1000_TX_FLAGS_VLAN		0x00000002
+#define E1000_TX_FLAGS_TSO		0x00000004
+#define E1000_TX_FLAGS_IPV4		0x00000008
+#define E1000_TX_FLAGS_NO_FCS		0x00000010
+#define E1000_TX_FLAGS_HWTSTAMP		0x00000020
+#define E1000_TX_FLAGS_VLAN_MASK	0xffff0000
+#define E1000_TX_FLAGS_VLAN_SHIFT	16
+
+static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb,
+		     __be16 protocol)
+{
+	struct e1000_context_desc *context_desc;
+	struct e1000_buffer *buffer_info;
+	unsigned int i;
+	u32 cmd_length = 0;
+	u16 ipcse = 0, mss;
+	u8 ipcss, ipcso, tucss, tucso, hdr_len;
+	int err;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	hdr_len = skb_tcp_all_headers(skb);
+	mss = skb_shinfo(skb)->gso_size;
+	if (protocol == htons(ETH_P_IP)) {
+		struct iphdr *iph = ip_hdr(skb);
+		iph->tot_len = 0;
+		iph->check = 0;
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 0, IPPROTO_TCP, 0);
+		cmd_length = E1000_TXD_CMD_IP;
+		ipcse = skb_transport_offset(skb) - 1;
+	} else if (skb_is_gso_v6(skb)) {
+		tcp_v6_gso_csum_prep(skb);
+		ipcse = 0;
+	}
+	ipcss = skb_network_offset(skb);
+	ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
+	tucss = skb_transport_offset(skb);
+	tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
+
+	cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
+		       E1000_TXD_CMD_TCP | (skb->len - (hdr_len)));
+
+	i = tx_ring->next_to_use;
+	context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
+	buffer_info = &tx_ring->buffer_info[i];
+
+	context_desc->lower_setup.ip_fields.ipcss = ipcss;
+	context_desc->lower_setup.ip_fields.ipcso = ipcso;
+	context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse);
+	context_desc->upper_setup.tcp_fields.tucss = tucss;
+	context_desc->upper_setup.tcp_fields.tucso = tucso;
+	context_desc->upper_setup.tcp_fields.tucse = 0;
+	context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss);
+	context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
+	context_desc->cmd_and_length = cpu_to_le32(cmd_length);
+
+	buffer_info->time_stamp = jiffies;
+	buffer_info->next_to_watch = i;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+	tx_ring->next_to_use = i;
+
+	return 1;
+}
+
+static bool e1000_tx_csum(struct e1000_ring *tx_ring, struct sk_buff *skb,
+			  __be16 protocol)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	struct e1000_context_desc *context_desc;
+	struct e1000_buffer *buffer_info;
+	unsigned int i;
+	u8 css;
+	u32 cmd_len = E1000_TXD_CMD_DEXT;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return false;
+
+	switch (protocol) {
+	case cpu_to_be16(ETH_P_IP):
+		if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+			cmd_len |= E1000_TXD_CMD_TCP;
+		break;
+	case cpu_to_be16(ETH_P_IPV6):
+		/* XXX not handling all IPV6 headers */
+		if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
+			cmd_len |= E1000_TXD_CMD_TCP;
+		break;
+	default:
+		if (unlikely(net_ratelimit()))
+			e_warn("checksum_partial proto=%x!\n",
+			       be16_to_cpu(protocol));
+		break;
+	}
+
+	css = skb_checksum_start_offset(skb);
+
+	i = tx_ring->next_to_use;
+	buffer_info = &tx_ring->buffer_info[i];
+	context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
+
+	context_desc->lower_setup.ip_config = 0;
+	context_desc->upper_setup.tcp_fields.tucss = css;
+	context_desc->upper_setup.tcp_fields.tucso = css + skb->csum_offset;
+	context_desc->upper_setup.tcp_fields.tucse = 0;
+	context_desc->tcp_seg_setup.data = 0;
+	context_desc->cmd_and_length = cpu_to_le32(cmd_len);
+
+	buffer_info->time_stamp = jiffies;
+	buffer_info->next_to_watch = i;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+	tx_ring->next_to_use = i;
+
+	return true;
+}
+
+static int e1000_tx_map(struct e1000_ring *tx_ring, struct sk_buff *skb,
+			unsigned int first, unsigned int max_per_txd,
+			unsigned int nr_frags)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_buffer *buffer_info;
+	unsigned int len = skb_headlen(skb);
+	unsigned int offset = 0, size, count = 0, i;
+	unsigned int f, bytecount, segs;
+
+	i = tx_ring->next_to_use;
+
+	while (len) {
+		buffer_info = &tx_ring->buffer_info[i];
+		size = min(len, max_per_txd);
+
+		buffer_info->length = size;
+		buffer_info->time_stamp = jiffies;
+		buffer_info->next_to_watch = i;
+		buffer_info->dma = dma_map_single(&pdev->dev,
+						  skb->data + offset,
+						  size, DMA_TO_DEVICE);
+		buffer_info->mapped_as_page = false;
+		if (dma_mapping_error(&pdev->dev, buffer_info->dma))
+			goto dma_error;
+
+		len -= size;
+		offset += size;
+		count++;
+
+		if (len) {
+			i++;
+			if (i == tx_ring->count)
+				i = 0;
+		}
+	}
+
+	for (f = 0; f < nr_frags; f++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+		len = skb_frag_size(frag);
+		offset = 0;
+
+		while (len) {
+			i++;
+			if (i == tx_ring->count)
+				i = 0;
+
+			buffer_info = &tx_ring->buffer_info[i];
+			size = min(len, max_per_txd);
+
+			buffer_info->length = size;
+			buffer_info->time_stamp = jiffies;
+			buffer_info->next_to_watch = i;
+			buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag,
+							    offset, size,
+							    DMA_TO_DEVICE);
+			buffer_info->mapped_as_page = true;
+			if (dma_mapping_error(&pdev->dev, buffer_info->dma))
+				goto dma_error;
+
+			len -= size;
+			offset += size;
+			count++;
+		}
+	}
+
+	segs = skb_shinfo(skb)->gso_segs ? : 1;
+	/* multiply data chunks by size of headers */
+	bytecount = ((segs - 1) * skb_headlen(skb)) + skb->len;
+
+	tx_ring->buffer_info[i].skb = skb;
+	tx_ring->buffer_info[i].segs = segs;
+	tx_ring->buffer_info[i].bytecount = bytecount;
+	tx_ring->buffer_info[first].next_to_watch = i;
+
+	return count;
+
+dma_error:
+	dev_err(&pdev->dev, "Tx DMA map failed\n");
+	buffer_info->dma = 0;
+	if (count)
+		count--;
+
+	while (count--) {
+		if (i == 0)
+			i += tx_ring->count;
+		i--;
+		buffer_info = &tx_ring->buffer_info[i];
+		e1000_put_txbuf(tx_ring, buffer_info, true);
+	}
+
+	return 0;
+}
+
+static void e1000_tx_queue(struct e1000_ring *tx_ring, int tx_flags, int count)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+	struct e1000_tx_desc *tx_desc = NULL;
+	struct e1000_buffer *buffer_info;
+	u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
+	unsigned int i;
+
+	if (tx_flags & E1000_TX_FLAGS_TSO) {
+		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
+		    E1000_TXD_CMD_TSE;
+		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+
+		if (tx_flags & E1000_TX_FLAGS_IPV4)
+			txd_upper |= E1000_TXD_POPTS_IXSM << 8;
+	}
+
+	if (tx_flags & E1000_TX_FLAGS_CSUM) {
+		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+		txd_upper |= E1000_TXD_POPTS_TXSM << 8;
+	}
+
+	if (tx_flags & E1000_TX_FLAGS_VLAN) {
+		txd_lower |= E1000_TXD_CMD_VLE;
+		txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
+	}
+
+	if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS))
+		txd_lower &= ~(E1000_TXD_CMD_IFCS);
+
+	if (unlikely(tx_flags & E1000_TX_FLAGS_HWTSTAMP)) {
+		txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
+		txd_upper |= E1000_TXD_EXTCMD_TSTAMP;
+	}
+
+	i = tx_ring->next_to_use;
+
+	do {
+		buffer_info = &tx_ring->buffer_info[i];
+		tx_desc = E1000_TX_DESC(*tx_ring, i);
+		tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+		tx_desc->lower.data = cpu_to_le32(txd_lower |
+						  buffer_info->length);
+		tx_desc->upper.data = cpu_to_le32(txd_upper);
+
+		i++;
+		if (i == tx_ring->count)
+			i = 0;
+	} while (--count > 0);
+
+	tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
+
+	/* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */
+	if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS))
+		tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS));
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+
+	tx_ring->next_to_use = i;
+}
+
+#define MINIMUM_DHCP_PACKET_SIZE 282
+static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter,
+				    struct sk_buff *skb)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 length, offset;
+
+	if (skb_vlan_tag_present(skb) &&
+	    !((skb_vlan_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) &&
+	      (adapter->hw.mng_cookie.status &
+	       E1000_MNG_DHCP_COOKIE_STATUS_VLAN)))
+		return 0;
+
+	if (skb->len <= MINIMUM_DHCP_PACKET_SIZE)
+		return 0;
+
+	if (((struct ethhdr *)skb->data)->h_proto != htons(ETH_P_IP))
+		return 0;
+
+	{
+		const struct iphdr *ip = (struct iphdr *)((u8 *)skb->data + 14);
+		struct udphdr *udp;
+
+		if (ip->protocol != IPPROTO_UDP)
+			return 0;
+
+		udp = (struct udphdr *)((u8 *)ip + (ip->ihl << 2));
+		if (ntohs(udp->dest) != 67)
+			return 0;
+
+		offset = (u8 *)udp + 8 - skb->data;
+		length = skb->len - offset;
+		return e1000e_mng_write_dhcp_info(hw, (u8 *)udp + 8, length);
+	}
+
+	return 0;
+}
+
+static int __e1000_maybe_stop_tx(struct e1000_ring *tx_ring, int size)
+{
+	struct e1000_adapter *adapter = tx_ring->adapter;
+
+	netif_stop_queue(adapter->netdev);
+	/* Herbert's original patch had:
+	 *  smp_mb__after_netif_stop_queue();
+	 * but since that doesn't exist yet, just open code it.
+	 */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (e1000_desc_unused(tx_ring) < size)
+		return -EBUSY;
+
+	/* A reprieve! */
+	netif_start_queue(adapter->netdev);
+	++adapter->restart_queue;
+	return 0;
+}
+
+static int e1000_maybe_stop_tx(struct e1000_ring *tx_ring, int size)
+{
+	BUG_ON(size > tx_ring->count);
+
+	if (e1000_desc_unused(tx_ring) >= size)
+		return 0;
+	return __e1000_maybe_stop_tx(tx_ring, size);
+}
+
+static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_ring *tx_ring = adapter->tx_ring;
+	unsigned int first;
+	unsigned int tx_flags = 0;
+	unsigned int len = skb_headlen(skb);
+	unsigned int nr_frags;
+	unsigned int mss;
+	int count = 0;
+	int tso;
+	unsigned int f;
+	__be16 protocol = vlan_get_protocol(skb);
+
+	if (test_bit(__E1000_DOWN, &adapter->state)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* The minimum packet size with TCTL.PSP set is 17 bytes so
+	 * pad skb in order to meet this minimum size requirement
+	 */
+	if (skb_put_padto(skb, 17))
+		return NETDEV_TX_OK;
+
+	mss = skb_shinfo(skb)->gso_size;
+	if (mss) {
+		u8 hdr_len;
+
+		/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
+		 * points to just header, pull a few bytes of payload from
+		 * frags into skb->data
+		 */
+		hdr_len = skb_tcp_all_headers(skb);
+		/* we do this workaround for ES2LAN, but it is un-necessary,
+		 * avoiding it could save a lot of cycles
+		 */
+		if (skb->data_len && (hdr_len == len)) {
+			unsigned int pull_size;
+
+			pull_size = min_t(unsigned int, 4, skb->data_len);
+			if (!__pskb_pull_tail(skb, pull_size)) {
+				e_err("__pskb_pull_tail failed.\n");
+				dev_kfree_skb_any(skb);
+				return NETDEV_TX_OK;
+			}
+			len = skb_headlen(skb);
+		}
+	}
+
+	/* reserve a descriptor for the offload context */
+	if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
+		count++;
+	count++;
+
+	count += DIV_ROUND_UP(len, adapter->tx_fifo_limit);
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	for (f = 0; f < nr_frags; f++)
+		count += DIV_ROUND_UP(skb_frag_size(&skb_shinfo(skb)->frags[f]),
+				      adapter->tx_fifo_limit);
+
+	if (adapter->hw.mac.tx_pkt_filtering)
+		e1000_transfer_dhcp_info(adapter, skb);
+
+	/* need: count + 2 desc gap to keep tail from touching
+	 * head, otherwise try next time
+	 */
+	if (e1000_maybe_stop_tx(tx_ring, count + 2))
+		return NETDEV_TX_BUSY;
+
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= E1000_TX_FLAGS_VLAN;
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     E1000_TX_FLAGS_VLAN_SHIFT);
+	}
+
+	first = tx_ring->next_to_use;
+
+	tso = e1000_tso(tx_ring, skb, protocol);
+	if (tso < 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (tso)
+		tx_flags |= E1000_TX_FLAGS_TSO;
+	else if (e1000_tx_csum(tx_ring, skb, protocol))
+		tx_flags |= E1000_TX_FLAGS_CSUM;
+
+	/* Old method was to assume IPv4 packet by default if TSO was enabled.
+	 * 82571 hardware supports TSO capabilities for IPv6 as well...
+	 * no longer assume, we must.
+	 */
+	if (protocol == htons(ETH_P_IP))
+		tx_flags |= E1000_TX_FLAGS_IPV4;
+
+	if (unlikely(skb->no_fcs))
+		tx_flags |= E1000_TX_FLAGS_NO_FCS;
+
+	/* if count is 0 then mapping error has occurred */
+	count = e1000_tx_map(tx_ring, skb, first, adapter->tx_fifo_limit,
+			     nr_frags);
+	if (count) {
+		if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+		    (adapter->flags & FLAG_HAS_HW_TIMESTAMP)) {
+			if (!adapter->tx_hwtstamp_skb) {
+				skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+				tx_flags |= E1000_TX_FLAGS_HWTSTAMP;
+				adapter->tx_hwtstamp_skb = skb_get(skb);
+				adapter->tx_hwtstamp_start = jiffies;
+				schedule_work(&adapter->tx_hwtstamp_work);
+			} else {
+				adapter->tx_hwtstamp_skipped++;
+			}
+		}
+
+		skb_tx_timestamp(skb);
+
+		netdev_sent_queue(netdev, skb->len);
+		e1000_tx_queue(tx_ring, tx_flags, count);
+		/* Make sure there is space in the ring for the next send. */
+		e1000_maybe_stop_tx(tx_ring,
+				    ((MAX_SKB_FRAGS + 1) *
+				     DIV_ROUND_UP(PAGE_SIZE,
+						  adapter->tx_fifo_limit) + 4));
+
+		if (!netdev_xmit_more() ||
+		    netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
+			if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
+				e1000e_update_tdt_wa(tx_ring,
+						     tx_ring->next_to_use);
+			else
+				writel(tx_ring->next_to_use, tx_ring->tail);
+		}
+	} else {
+		dev_kfree_skb_any(skb);
+		tx_ring->buffer_info[first].time_stamp = 0;
+		tx_ring->next_to_use = first;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+/**
+ * e1000_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: index of the hung queue (unused)
+ **/
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	/* Do the reset outside of interrupt context */
+	adapter->tx_timeout_count++;
+	schedule_work(&adapter->reset_task);
+}
+
+static void e1000_reset_task(struct work_struct *work)
+{
+	struct e1000_adapter *adapter;
+	adapter = container_of(work, struct e1000_adapter, reset_task);
+
+	rtnl_lock();
+	/* don't run the task if already down */
+	if (test_bit(__E1000_DOWN, &adapter->state)) {
+		rtnl_unlock();
+		return;
+	}
+
+	if (!(adapter->flags & FLAG_RESTART_NOW)) {
+		e1000e_dump(adapter);
+		e_err("Reset adapter unexpectedly\n");
+	}
+	e1000e_reinit_locked(adapter);
+	rtnl_unlock();
+}
+
+/**
+ * e1000e_get_stats64 - Get System Network Statistics
+ * @netdev: network interface device structure
+ * @stats: rtnl_link_stats64 pointer
+ *
+ * Returns the address of the device statistics structure.
+ **/
+void e1000e_get_stats64(struct net_device *netdev,
+			struct rtnl_link_stats64 *stats)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	spin_lock(&adapter->stats64_lock);
+	e1000e_update_stats(adapter);
+	/* Fill out the OS statistics structure */
+	stats->rx_bytes = adapter->stats.gorc;
+	stats->rx_packets = adapter->stats.gprc;
+	stats->tx_bytes = adapter->stats.gotc;
+	stats->tx_packets = adapter->stats.gptc;
+	stats->multicast = adapter->stats.mprc;
+	stats->collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	/* RLEC on some newer hardware can be incorrect so build
+	 * our own version based on RUC and ROC
+	 */
+	stats->rx_errors = adapter->stats.rxerrc +
+	    adapter->stats.crcerrs + adapter->stats.algnerrc +
+	    adapter->stats.ruc + adapter->stats.roc + adapter->stats.cexterr;
+	stats->rx_length_errors = adapter->stats.ruc + adapter->stats.roc;
+	stats->rx_crc_errors = adapter->stats.crcerrs;
+	stats->rx_frame_errors = adapter->stats.algnerrc;
+	stats->rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+	stats->tx_errors = adapter->stats.ecol + adapter->stats.latecol;
+	stats->tx_aborted_errors = adapter->stats.ecol;
+	stats->tx_window_errors = adapter->stats.latecol;
+	stats->tx_carrier_errors = adapter->stats.tncrs;
+
+	/* Tx Dropped needs to be maintained elsewhere */
+
+	spin_unlock(&adapter->stats64_lock);
+}
+
+/**
+ * e1000_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	int max_frame = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
+
+	/* Jumbo frame support */
+	if ((new_mtu > ETH_DATA_LEN) &&
+	    !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) {
+		e_err("Jumbo Frames not supported.\n");
+		return -EINVAL;
+	}
+
+	/* Jumbo frame workaround on 82579 and newer requires CRC be stripped */
+	if ((adapter->hw.mac.type >= e1000_pch2lan) &&
+	    !(adapter->flags2 & FLAG2_CRC_STRIPPING) &&
+	    (new_mtu > ETH_DATA_LEN)) {
+		e_err("Jumbo Frames not supported on this device when CRC stripping is disabled.\n");
+		return -EINVAL;
+	}
+
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
+		usleep_range(1000, 1100);
+	/* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */
+	adapter->max_frame_size = max_frame;
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	if (netif_running(netdev))
+		e1000e_down(adapter, true);
+
+	/* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
+	 * means we reserve 2 more, this pushes us to allocate from the next
+	 * larger slab size.
+	 * i.e. RXBUFFER_2048 --> size-4096 slab
+	 * However with the new *_jumbo_rx* routines, jumbo receives will use
+	 * fragmented skbs
+	 */
+
+	if (max_frame <= 2048)
+		adapter->rx_buffer_len = 2048;
+	else
+		adapter->rx_buffer_len = 4096;
+
+	/* adjust allocation if LPE protects us, and we aren't using SBP */
+	if (max_frame <= (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN))
+		adapter->rx_buffer_len = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
+
+	if (netif_running(netdev))
+		e1000e_up(adapter);
+	else
+		e1000e_reset(adapter);
+
+	pm_runtime_put_sync(netdev->dev.parent);
+
+	clear_bit(__E1000_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
+			   int cmd)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	if (adapter->hw.phy.media_type != e1000_media_type_copper)
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = adapter->hw.phy.addr;
+		break;
+	case SIOCGMIIREG:
+		e1000_phy_read_status(adapter);
+
+		switch (data->reg_num & 0x1F) {
+		case MII_BMCR:
+			data->val_out = adapter->phy_regs.bmcr;
+			break;
+		case MII_BMSR:
+			data->val_out = adapter->phy_regs.bmsr;
+			break;
+		case MII_PHYSID1:
+			data->val_out = (adapter->hw.phy.id >> 16);
+			break;
+		case MII_PHYSID2:
+			data->val_out = (adapter->hw.phy.id & 0xFFFF);
+			break;
+		case MII_ADVERTISE:
+			data->val_out = adapter->phy_regs.advertise;
+			break;
+		case MII_LPA:
+			data->val_out = adapter->phy_regs.lpa;
+			break;
+		case MII_EXPANSION:
+			data->val_out = adapter->phy_regs.expansion;
+			break;
+		case MII_CTRL1000:
+			data->val_out = adapter->phy_regs.ctrl1000;
+			break;
+		case MII_STAT1000:
+			data->val_out = adapter->phy_regs.stat1000;
+			break;
+		case MII_ESTATUS:
+			data->val_out = adapter->phy_regs.estatus;
+			break;
+		default:
+			return -EIO;
+		}
+		break;
+	case SIOCSMIIREG:
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/**
+ * e1000e_hwtstamp_set - control hardware time stamping
+ * @netdev: network interface device structure
+ * @ifr: interface request
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't cause any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware filters.
+ * Not all combinations are supported, in particular event type has to be
+ * specified. Matching the kind of event packet is not supported, with the
+ * exception of "all V2 events regardless of level 2 or 4".
+ **/
+static int e1000e_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config config;
+	int ret_val;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	ret_val = e1000e_config_hwtstamp(adapter, &config);
+	if (ret_val)
+		return ret_val;
+
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		/* With V2 type filters which specify a Sync or Delay Request,
+		 * Path Delay Request/Response messages are also time stamped
+		 * by hardware so notify the caller the requested packets plus
+		 * some others are time stamped.
+		 */
+		config.rx_filter = HWTSTAMP_FILTER_SOME;
+		break;
+	default:
+		break;
+	}
+
+	return copy_to_user(ifr->ifr_data, &config,
+			    sizeof(config)) ? -EFAULT : 0;
+}
+
+static int e1000e_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	return copy_to_user(ifr->ifr_data, &adapter->hwtstamp_config,
+			    sizeof(adapter->hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSMIIREG:
+		return e1000_mii_ioctl(netdev, ifr, cmd);
+	case SIOCSHWTSTAMP:
+		return e1000e_hwtstamp_set(netdev, ifr);
+	case SIOCGHWTSTAMP:
+		return e1000e_hwtstamp_get(netdev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 i, mac_reg, wuc;
+	u16 phy_reg, wuc_enable;
+	int retval;
+
+	/* copy MAC RARs to PHY RARs */
+	e1000_copy_rx_addrs_to_phy_ich8lan(hw);
+
+	retval = hw->phy.ops.acquire(hw);
+	if (retval) {
+		e_err("Could not acquire PHY\n");
+		return retval;
+	}
+
+	/* Enable access to wakeup registers on and set page to BM_WUC_PAGE */
+	retval = e1000_enable_phy_wakeup_reg_access_bm(hw, &wuc_enable);
+	if (retval)
+		goto release;
+
+	/* copy MAC MTA to PHY MTA - only needed for pchlan */
+	for (i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
+		mac_reg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
+		hw->phy.ops.write_reg_page(hw, BM_MTA(i),
+					   (u16)(mac_reg & 0xFFFF));
+		hw->phy.ops.write_reg_page(hw, BM_MTA(i) + 1,
+					   (u16)((mac_reg >> 16) & 0xFFFF));
+	}
+
+	/* configure PHY Rx Control register */
+	hw->phy.ops.read_reg_page(&adapter->hw, BM_RCTL, &phy_reg);
+	mac_reg = er32(RCTL);
+	if (mac_reg & E1000_RCTL_UPE)
+		phy_reg |= BM_RCTL_UPE;
+	if (mac_reg & E1000_RCTL_MPE)
+		phy_reg |= BM_RCTL_MPE;
+	phy_reg &= ~(BM_RCTL_MO_MASK);
+	if (mac_reg & E1000_RCTL_MO_3)
+		phy_reg |= (((mac_reg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
+			    << BM_RCTL_MO_SHIFT);
+	if (mac_reg & E1000_RCTL_BAM)
+		phy_reg |= BM_RCTL_BAM;
+	if (mac_reg & E1000_RCTL_PMCF)
+		phy_reg |= BM_RCTL_PMCF;
+	mac_reg = er32(CTRL);
+	if (mac_reg & E1000_CTRL_RFCE)
+		phy_reg |= BM_RCTL_RFCE;
+	hw->phy.ops.write_reg_page(&adapter->hw, BM_RCTL, phy_reg);
+
+	wuc = E1000_WUC_PME_EN;
+	if (wufc & (E1000_WUFC_MAG | E1000_WUFC_LNKC))
+		wuc |= E1000_WUC_APME;
+
+	/* enable PHY wakeup in MAC register */
+	ew32(WUFC, wufc);
+	ew32(WUC, (E1000_WUC_PHY_WAKE | E1000_WUC_APMPME |
+		   E1000_WUC_PME_STATUS | wuc));
+
+	/* configure and enable PHY wakeup in PHY registers */
+	hw->phy.ops.write_reg_page(&adapter->hw, BM_WUFC, wufc);
+	hw->phy.ops.write_reg_page(&adapter->hw, BM_WUC, wuc);
+
+	/* activate PHY wakeup */
+	wuc_enable |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
+	retval = e1000_disable_phy_wakeup_reg_access_bm(hw, &wuc_enable);
+	if (retval)
+		e_err("Could not set PHY Host Wakeup bit\n");
+release:
+	hw->phy.ops.release(hw);
+
+	return retval;
+}
+
+static void e1000e_flush_lpic(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ret_val;
+
+	pm_runtime_get_sync(netdev->dev.parent);
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto fl_out;
+
+	pr_info("EEE TX LPI TIMER: %08X\n",
+		er32(LPIC) >> E1000_LPIC_LPIET_SHIFT);
+
+	hw->phy.ops.release(hw);
+
+fl_out:
+	pm_runtime_put_sync(netdev->dev.parent);
+}
+
+/* S0ix implementation */
+static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mac_data;
+	u16 phy_data;
+
+	if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
+	    hw->mac.type >= e1000_pch_adp) {
+		/* Request ME configure the device for S0ix */
+		mac_data = er32(H2ME);
+		mac_data |= E1000_H2ME_START_DPG;
+		mac_data &= ~E1000_H2ME_EXIT_DPG;
+		trace_e1000e_trace_mac_register(mac_data);
+		ew32(H2ME, mac_data);
+	} else {
+		/* Request driver configure the device to S0ix */
+		/* Disable the periodic inband message,
+		 * don't request PCIe clock in K1 page770_17[10:9] = 10b
+		 */
+		e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+		phy_data &= ~HV_PM_CTRL_K1_CLK_REQ;
+		phy_data |= BIT(10);
+		e1e_wphy(hw, HV_PM_CTRL, phy_data);
+
+		/* Make sure we don't exit K1 every time a new packet arrives
+		 * 772_29[5] = 1 CS_Mode_Stay_In_K1
+		 */
+		e1e_rphy(hw, I217_CGFREG, &phy_data);
+		phy_data |= BIT(5);
+		e1e_wphy(hw, I217_CGFREG, phy_data);
+
+		/* Change the MAC/PHY interface to SMBus
+		 * Force the SMBus in PHY page769_23[0] = 1
+		 * Force the SMBus in MAC CTRL_EXT[11] = 1
+		 */
+		e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+		phy_data |= CV_SMB_CTRL_FORCE_SMBUS;
+		e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+		mac_data = er32(CTRL_EXT);
+		mac_data |= E1000_CTRL_EXT_FORCE_SMBUS;
+		ew32(CTRL_EXT, mac_data);
+
+		/* DFT control: PHY bit: page769_20[0] = 1
+		 * page769_20[7] - PHY PLL stop
+		 * page769_20[8] - PHY go to the electrical idle
+		 * page769_20[9] - PHY serdes disable
+		 * Gate PPW via EXTCNF_CTRL - set 0x0F00[7] = 1
+		 */
+		e1e_rphy(hw, I82579_DFT_CTRL, &phy_data);
+		phy_data |= BIT(0);
+		phy_data |= BIT(7);
+		phy_data |= BIT(8);
+		phy_data |= BIT(9);
+		e1e_wphy(hw, I82579_DFT_CTRL, phy_data);
+
+		mac_data = er32(EXTCNF_CTRL);
+		mac_data |= E1000_EXTCNF_CTRL_GATE_PHY_CFG;
+		ew32(EXTCNF_CTRL, mac_data);
+
+		/* Enable the Dynamic Power Gating in the MAC */
+		mac_data = er32(FEXTNVM7);
+		mac_data |= BIT(22);
+		ew32(FEXTNVM7, mac_data);
+
+		/* Disable disconnected cable conditioning for Power Gating */
+		mac_data = er32(DPGFR);
+		mac_data |= BIT(2);
+		ew32(DPGFR, mac_data);
+
+		/* Don't wake from dynamic Power Gating with clock request */
+		mac_data = er32(FEXTNVM12);
+		mac_data |= BIT(12);
+		ew32(FEXTNVM12, mac_data);
+
+		/* Ungate PGCB clock */
+		mac_data = er32(FEXTNVM9);
+		mac_data &= ~BIT(28);
+		ew32(FEXTNVM9, mac_data);
+
+		/* Enable K1 off to enable mPHY Power Gating */
+		mac_data = er32(FEXTNVM6);
+		mac_data |= BIT(31);
+		ew32(FEXTNVM6, mac_data);
+
+		/* Enable mPHY power gating for any link and speed */
+		mac_data = er32(FEXTNVM8);
+		mac_data |= BIT(9);
+		ew32(FEXTNVM8, mac_data);
+
+		/* Enable the Dynamic Clock Gating in the DMA and MAC */
+		mac_data = er32(CTRL_EXT);
+		mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
+		ew32(CTRL_EXT, mac_data);
+
+		/* No MAC DPG gating SLP_S0 in modern standby
+		 * Switch the logic of the lanphypc to use PMC counter
+		 */
+		mac_data = er32(FEXTNVM5);
+		mac_data |= BIT(7);
+		ew32(FEXTNVM5, mac_data);
+	}
+
+	/* Disable the time synchronization clock */
+	mac_data = er32(FEXTNVM7);
+	mac_data |= BIT(31);
+	mac_data &= ~BIT(0);
+	ew32(FEXTNVM7, mac_data);
+
+	/* Dynamic Power Gating Enable */
+	mac_data = er32(CTRL_EXT);
+	mac_data |= BIT(3);
+	ew32(CTRL_EXT, mac_data);
+
+	/* Check MAC Tx/Rx packet buffer pointers.
+	 * Reset MAC Tx/Rx packet buffer pointers to suppress any
+	 * pending traffic indication that would prevent power gating.
+	 */
+	mac_data = er32(TDFH);
+	if (mac_data)
+		ew32(TDFH, 0);
+	mac_data = er32(TDFT);
+	if (mac_data)
+		ew32(TDFT, 0);
+	mac_data = er32(TDFHS);
+	if (mac_data)
+		ew32(TDFHS, 0);
+	mac_data = er32(TDFTS);
+	if (mac_data)
+		ew32(TDFTS, 0);
+	mac_data = er32(TDFPC);
+	if (mac_data)
+		ew32(TDFPC, 0);
+	mac_data = er32(RDFH);
+	if (mac_data)
+		ew32(RDFH, 0);
+	mac_data = er32(RDFT);
+	if (mac_data)
+		ew32(RDFT, 0);
+	mac_data = er32(RDFHS);
+	if (mac_data)
+		ew32(RDFHS, 0);
+	mac_data = er32(RDFTS);
+	if (mac_data)
+		ew32(RDFTS, 0);
+	mac_data = er32(RDFPC);
+	if (mac_data)
+		ew32(RDFPC, 0);
+}
+
+static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	bool firmware_bug = false;
+	u32 mac_data;
+	u16 phy_data;
+	u32 i = 0;
+
+	if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
+	    hw->mac.type >= e1000_pch_adp) {
+		/* Keep the GPT clock enabled for CSME */
+		mac_data = er32(FEXTNVM);
+		mac_data |= BIT(3);
+		ew32(FEXTNVM, mac_data);
+		/* Request ME unconfigure the device from S0ix */
+		mac_data = er32(H2ME);
+		mac_data &= ~E1000_H2ME_START_DPG;
+		mac_data |= E1000_H2ME_EXIT_DPG;
+		trace_e1000e_trace_mac_register(mac_data);
+		ew32(H2ME, mac_data);
+
+		/* Poll up to 2.5 seconds for ME to unconfigure DPG.
+		 * If this takes more than 1 second, show a warning indicating a
+		 * firmware bug
+		 */
+		while (!(er32(EXFWSM) & E1000_EXFWSM_DPG_EXIT_DONE)) {
+			if (i > 100 && !firmware_bug)
+				firmware_bug = true;
+
+			if (i++ == 250) {
+				e_dbg("Timeout (firmware bug): %d msec\n",
+				      i * 10);
+				break;
+			}
+
+			usleep_range(10000, 11000);
+		}
+		if (firmware_bug)
+			e_warn("DPG_EXIT_DONE took %d msec. This is a firmware bug\n",
+			       i * 10);
+		else
+			e_dbg("DPG_EXIT_DONE cleared after %d msec\n", i * 10);
+	} else {
+		/* Request driver unconfigure the device from S0ix */
+
+		/* Disable the Dynamic Power Gating in the MAC */
+		mac_data = er32(FEXTNVM7);
+		mac_data &= 0xFFBFFFFF;
+		ew32(FEXTNVM7, mac_data);
+
+		/* Disable mPHY power gating for any link and speed */
+		mac_data = er32(FEXTNVM8);
+		mac_data &= ~BIT(9);
+		ew32(FEXTNVM8, mac_data);
+
+		/* Disable K1 off */
+		mac_data = er32(FEXTNVM6);
+		mac_data &= ~BIT(31);
+		ew32(FEXTNVM6, mac_data);
+
+		/* Disable Ungate PGCB clock */
+		mac_data = er32(FEXTNVM9);
+		mac_data |= BIT(28);
+		ew32(FEXTNVM9, mac_data);
+
+		/* Cancel not waking from dynamic
+		 * Power Gating with clock request
+		 */
+		mac_data = er32(FEXTNVM12);
+		mac_data &= ~BIT(12);
+		ew32(FEXTNVM12, mac_data);
+
+		/* Cancel disable disconnected cable conditioning
+		 * for Power Gating
+		 */
+		mac_data = er32(DPGFR);
+		mac_data &= ~BIT(2);
+		ew32(DPGFR, mac_data);
+
+		/* Disable the Dynamic Clock Gating in the DMA and MAC */
+		mac_data = er32(CTRL_EXT);
+		mac_data &= 0xFFF7FFFF;
+		ew32(CTRL_EXT, mac_data);
+
+		/* Revert the lanphypc logic to use the internal Gbe counter
+		 * and not the PMC counter
+		 */
+		mac_data = er32(FEXTNVM5);
+		mac_data &= 0xFFFFFF7F;
+		ew32(FEXTNVM5, mac_data);
+
+		/* Enable the periodic inband message,
+		 * Request PCIe clock in K1 page770_17[10:9] =01b
+		 */
+		e1e_rphy(hw, HV_PM_CTRL, &phy_data);
+		phy_data &= 0xFBFF;
+		phy_data |= HV_PM_CTRL_K1_CLK_REQ;
+		e1e_wphy(hw, HV_PM_CTRL, phy_data);
+
+		/* Return back configuration
+		 * 772_29[5] = 0 CS_Mode_Stay_In_K1
+		 */
+		e1e_rphy(hw, I217_CGFREG, &phy_data);
+		phy_data &= 0xFFDF;
+		e1e_wphy(hw, I217_CGFREG, phy_data);
+
+		/* Change the MAC/PHY interface to Kumeran
+		 * Unforce the SMBus in PHY page769_23[0] = 0
+		 * Unforce the SMBus in MAC CTRL_EXT[11] = 0
+		 */
+		e1e_rphy(hw, CV_SMB_CTRL, &phy_data);
+		phy_data &= ~CV_SMB_CTRL_FORCE_SMBUS;
+		e1e_wphy(hw, CV_SMB_CTRL, phy_data);
+		mac_data = er32(CTRL_EXT);
+		mac_data &= ~E1000_CTRL_EXT_FORCE_SMBUS;
+		ew32(CTRL_EXT, mac_data);
+	}
+
+	/* Disable Dynamic Power Gating */
+	mac_data = er32(CTRL_EXT);
+	mac_data &= 0xFFFFFFF7;
+	ew32(CTRL_EXT, mac_data);
+
+	/* Enable the time synchronization clock */
+	mac_data = er32(FEXTNVM7);
+	mac_data &= ~BIT(31);
+	mac_data |= BIT(0);
+	ew32(FEXTNVM7, mac_data);
+}
+
+static int e1000e_pm_freeze(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	bool present;
+
+	rtnl_lock();
+
+	present = netif_device_present(netdev);
+	netif_device_detach(netdev);
+
+	if (present && netif_running(netdev)) {
+		int count = E1000_CHECK_RESET_COUNT;
+
+		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
+			usleep_range(10000, 11000);
+
+		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
+
+		/* Quiesce the device without resetting the hardware */
+		e1000e_down(adapter, false);
+		e1000_free_irq(adapter);
+	}
+	rtnl_unlock();
+
+	e1000e_reset_interrupt_capability(adapter);
+
+	/* Allow time for pending master requests to run */
+	e1000e_disable_pcie_master(&adapter->hw);
+
+	return 0;
+}
+
+static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl, ctrl_ext, rctl, status, wufc;
+	int retval = 0;
+
+	/* Runtime suspend should only enable wakeup for link changes */
+	if (runtime)
+		wufc = E1000_WUFC_LNKC;
+	else if (device_may_wakeup(&pdev->dev))
+		wufc = adapter->wol;
+	else
+		wufc = 0;
+
+	status = er32(STATUS);
+	if (status & E1000_STATUS_LU)
+		wufc &= ~E1000_WUFC_LNKC;
+
+	if (wufc) {
+		e1000_setup_rctl(adapter);
+		e1000e_set_rx_mode(netdev);
+
+		/* turn on all-multi mode if wake on multicast is enabled */
+		if (wufc & E1000_WUFC_MC) {
+			rctl = er32(RCTL);
+			rctl |= E1000_RCTL_MPE;
+			ew32(RCTL, rctl);
+		}
+
+		ctrl = er32(CTRL);
+		ctrl |= E1000_CTRL_ADVD3WUC;
+		if (!(adapter->flags2 & FLAG2_HAS_PHY_WAKEUP))
+			ctrl |= E1000_CTRL_EN_PHY_PWR_MGMT;
+		ew32(CTRL, ctrl);
+
+		if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
+		    adapter->hw.phy.media_type ==
+		    e1000_media_type_internal_serdes) {
+			/* keep the laser running in D3 */
+			ctrl_ext = er32(CTRL_EXT);
+			ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
+			ew32(CTRL_EXT, ctrl_ext);
+		}
+
+		if (!runtime)
+			e1000e_power_up_phy(adapter);
+
+		if (adapter->flags & FLAG_IS_ICH)
+			e1000_suspend_workarounds_ich8lan(&adapter->hw);
+
+		if (adapter->flags2 & FLAG2_HAS_PHY_WAKEUP) {
+			/* enable wakeup by the PHY */
+			retval = e1000_init_phy_wakeup(adapter, wufc);
+			if (retval)
+				return retval;
+		} else {
+			/* enable wakeup by the MAC */
+			ew32(WUFC, wufc);
+			ew32(WUC, E1000_WUC_PME_EN);
+		}
+	} else {
+		ew32(WUC, 0);
+		ew32(WUFC, 0);
+
+		e1000_power_down_phy(adapter);
+	}
+
+	if (adapter->hw.phy.type == e1000_phy_igp_3) {
+		e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
+	} else if (hw->mac.type >= e1000_pch_lpt) {
+		if (wufc && !(wufc & (E1000_WUFC_EX | E1000_WUFC_MC | E1000_WUFC_BC)))
+			/* ULP does not support wake from unicast, multicast
+			 * or broadcast.
+			 */
+			retval = e1000_enable_ulp_lpt_lp(hw, !runtime);
+
+		if (retval)
+			return retval;
+	}
+
+	/* Ensure that the appropriate bits are set in LPI_CTRL
+	 * for EEE in Sx
+	 */
+	if ((hw->phy.type >= e1000_phy_i217) &&
+	    adapter->eee_advert && hw->dev_spec.ich8lan.eee_lp_ability) {
+		u16 lpi_ctrl = 0;
+
+		retval = hw->phy.ops.acquire(hw);
+		if (!retval) {
+			retval = e1e_rphy_locked(hw, I82579_LPI_CTRL,
+						 &lpi_ctrl);
+			if (!retval) {
+				if (adapter->eee_advert &
+				    hw->dev_spec.ich8lan.eee_lp_ability &
+				    I82579_EEE_100_SUPPORTED)
+					lpi_ctrl |= I82579_LPI_CTRL_100_ENABLE;
+				if (adapter->eee_advert &
+				    hw->dev_spec.ich8lan.eee_lp_ability &
+				    I82579_EEE_1000_SUPPORTED)
+					lpi_ctrl |= I82579_LPI_CTRL_1000_ENABLE;
+
+				retval = e1e_wphy_locked(hw, I82579_LPI_CTRL,
+							 lpi_ctrl);
+			}
+		}
+		hw->phy.ops.release(hw);
+	}
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	e1000e_release_hw_control(adapter);
+
+	pci_clear_master(pdev);
+
+	/* The pci-e switch on some quad port adapters will report a
+	 * correctable error when the MAC transitions from D0 to D3.  To
+	 * prevent this we need to mask off the correctable errors on the
+	 * downstream port of the pci-e switch.
+	 *
+	 * We don't have the associated upstream bridge while assigning
+	 * the PCI device into guest. For example, the KVM on power is
+	 * one of the cases.
+	 */
+	if (adapter->flags & FLAG_IS_QUAD_PORT) {
+		struct pci_dev *us_dev = pdev->bus->self;
+		u16 devctl;
+
+		if (!us_dev)
+			return 0;
+
+		pcie_capability_read_word(us_dev, PCI_EXP_DEVCTL, &devctl);
+		pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL,
+					   (devctl & ~PCI_EXP_DEVCTL_CERE));
+
+		pci_save_state(pdev);
+		pci_prepare_to_sleep(pdev);
+
+		pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL, devctl);
+	}
+
+	return 0;
+}
+
+/**
+ * __e1000e_disable_aspm - Disable ASPM states
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ * @locked: indication if this context holds pci_bus_sem locked.
+ *
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state, int locked)
+{
+	struct pci_dev *parent = pdev->bus->self;
+	u16 aspm_dis_mask = 0;
+	u16 pdev_aspmc, parent_aspmc;
+
+	switch (state) {
+	case PCIE_LINK_STATE_L0S:
+	case PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1:
+		aspm_dis_mask |= PCI_EXP_LNKCTL_ASPM_L0S;
+		fallthrough; /* can't have L1 without L0s */
+	case PCIE_LINK_STATE_L1:
+		aspm_dis_mask |= PCI_EXP_LNKCTL_ASPM_L1;
+		break;
+	default:
+		return;
+	}
+
+	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &pdev_aspmc);
+	pdev_aspmc &= PCI_EXP_LNKCTL_ASPMC;
+
+	if (parent) {
+		pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
+					  &parent_aspmc);
+		parent_aspmc &= PCI_EXP_LNKCTL_ASPMC;
+	}
+
+	/* Nothing to do if the ASPM states to be disabled already are */
+	if (!(pdev_aspmc & aspm_dis_mask) &&
+	    (!parent || !(parent_aspmc & aspm_dis_mask)))
+		return;
+
+	dev_info(&pdev->dev, "Disabling ASPM %s %s\n",
+		 (aspm_dis_mask & pdev_aspmc & PCI_EXP_LNKCTL_ASPM_L0S) ?
+		 "L0s" : "",
+		 (aspm_dis_mask & pdev_aspmc & PCI_EXP_LNKCTL_ASPM_L1) ?
+		 "L1" : "");
+
+#ifdef CONFIG_PCIEASPM
+	if (locked)
+		pci_disable_link_state_locked(pdev, state);
+	else
+		pci_disable_link_state(pdev, state);
+
+	/* Double-check ASPM control.  If not disabled by the above, the
+	 * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
+	 * not enabled); override by writing PCI config space directly.
+	 */
+	pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &pdev_aspmc);
+	pdev_aspmc &= PCI_EXP_LNKCTL_ASPMC;
+
+	if (!(aspm_dis_mask & pdev_aspmc))
+		return;
+#endif
+
+	/* Both device and parent should have the same ASPM setting.
+	 * Disable ASPM in downstream component first and then upstream.
+	 */
+	pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_dis_mask);
+
+	if (parent)
+		pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
+					   aspm_dis_mask);
+}
+
+/**
+ * e1000e_disable_aspm - Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function acquires the pci_bus_sem!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state)
+{
+	__e1000e_disable_aspm(pdev, state, 0);
+}
+
+/**
+ * e1000e_disable_aspm_locked - Disable ASPM states.
+ * @pdev: pointer to PCI device struct
+ * @state: bit-mask of ASPM states to disable
+ *
+ * This function must be called with pci_bus_sem acquired!
+ * Some devices *must* have certain ASPM states disabled per hardware errata.
+ **/
+static void e1000e_disable_aspm_locked(struct pci_dev *pdev, u16 state)
+{
+	__e1000e_disable_aspm(pdev, state, 1);
+}
+
+static int e1000e_pm_thaw(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	e1000e_set_interrupt_capability(adapter);
+
+	rtnl_lock();
+	if (netif_running(netdev)) {
+		rc = e1000_request_irq(adapter);
+		if (rc)
+			goto err_irq;
+
+		e1000e_up(adapter);
+	}
+
+	netif_device_attach(netdev);
+err_irq:
+	rtnl_unlock();
+
+	return rc;
+}
+
+static int __e1000_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 aspm_disable_flag = 0;
+
+	if (adapter->flags2 & FLAG2_DISABLE_ASPM_L0S)
+		aspm_disable_flag = PCIE_LINK_STATE_L0S;
+	if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1)
+		aspm_disable_flag |= PCIE_LINK_STATE_L1;
+	if (aspm_disable_flag)
+		e1000e_disable_aspm(pdev, aspm_disable_flag);
+
+	pci_set_master(pdev);
+
+	if (hw->mac.type >= e1000_pch2lan)
+		e1000_resume_workarounds_pchlan(&adapter->hw);
+
+	e1000e_power_up_phy(adapter);
+
+	/* report the system wakeup cause from S3/S4 */
+	if (adapter->flags2 & FLAG2_HAS_PHY_WAKEUP) {
+		u16 phy_data;
+
+		e1e_rphy(&adapter->hw, BM_WUS, &phy_data);
+		if (phy_data) {
+			e_info("PHY Wakeup cause - %s\n",
+			       phy_data & E1000_WUS_EX ? "Unicast Packet" :
+			       phy_data & E1000_WUS_MC ? "Multicast Packet" :
+			       phy_data & E1000_WUS_BC ? "Broadcast Packet" :
+			       phy_data & E1000_WUS_MAG ? "Magic Packet" :
+			       phy_data & E1000_WUS_LNKC ?
+			       "Link Status Change" : "other");
+		}
+		e1e_wphy(&adapter->hw, BM_WUS, ~0);
+	} else {
+		u32 wus = er32(WUS);
+
+		if (wus) {
+			e_info("MAC Wakeup cause - %s\n",
+			       wus & E1000_WUS_EX ? "Unicast Packet" :
+			       wus & E1000_WUS_MC ? "Multicast Packet" :
+			       wus & E1000_WUS_BC ? "Broadcast Packet" :
+			       wus & E1000_WUS_MAG ? "Magic Packet" :
+			       wus & E1000_WUS_LNKC ? "Link Status Change" :
+			       "other");
+		}
+		ew32(WUS, ~0);
+	}
+
+	e1000e_reset(adapter);
+
+	e1000_init_manageability_pt(adapter);
+
+	/* If the controller has AMT, do not set DRV_LOAD until the interface
+	 * is up.  For all other cases, let the f/w know that the h/w is now
+	 * under the control of the driver.
+	 */
+	if (!(adapter->flags & FLAG_HAS_AMT))
+		e1000e_get_hw_control(adapter);
+
+	return 0;
+}
+
+static __maybe_unused int e1000e_pm_prepare(struct device *dev)
+{
+	return pm_runtime_suspended(dev) &&
+		pm_suspend_via_firmware();
+}
+
+static __maybe_unused int e1000e_pm_suspend(struct device *dev)
+{
+	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int rc;
+
+	e1000e_flush_lpic(pdev);
+
+	e1000e_pm_freeze(dev);
+
+	rc = __e1000_shutdown(pdev, false);
+	if (rc) {
+		e1000e_pm_thaw(dev);
+	} else {
+		/* Introduce S0ix implementation */
+		if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
+			e1000e_s0ix_entry_flow(adapter);
+	}
+
+	return rc;
+}
+
+static __maybe_unused int e1000e_pm_resume(struct device *dev)
+{
+	struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int rc;
+
+	/* Introduce S0ix implementation */
+	if (adapter->flags2 & FLAG2_ENABLE_S0IX_FLOWS)
+		e1000e_s0ix_exit_flow(adapter);
+
+	rc = __e1000_resume(pdev);
+	if (rc)
+		return rc;
+
+	return e1000e_pm_thaw(dev);
+}
+
+static __maybe_unused int e1000e_pm_runtime_idle(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	u16 eee_lp;
+
+	eee_lp = adapter->hw.dev_spec.ich8lan.eee_lp_ability;
+
+	if (!e1000e_has_link(adapter)) {
+		adapter->hw.dev_spec.ich8lan.eee_lp_ability = eee_lp;
+		pm_schedule_suspend(dev, 5 * MSEC_PER_SEC);
+	}
+
+	return -EBUSY;
+}
+
+static __maybe_unused int e1000e_pm_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	int rc;
+
+	pdev->pme_poll = true;
+
+	rc = __e1000_resume(pdev);
+	if (rc)
+		return rc;
+
+	if (netdev->flags & IFF_UP)
+		e1000e_up(adapter);
+
+	return rc;
+}
+
+static __maybe_unused int e1000e_pm_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (netdev->flags & IFF_UP) {
+		int count = E1000_CHECK_RESET_COUNT;
+
+		while (test_bit(__E1000_RESETTING, &adapter->state) && count--)
+			usleep_range(10000, 11000);
+
+		WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
+
+		/* Down the device without resetting the hardware */
+		e1000e_down(adapter, false);
+	}
+
+	if (__e1000_shutdown(pdev, true)) {
+		e1000e_pm_runtime_resume(dev);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void e1000_shutdown(struct pci_dev *pdev)
+{
+	e1000e_flush_lpic(pdev);
+
+	e1000e_pm_freeze(&pdev->dev);
+
+	__e1000_shutdown(pdev, false);
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+
+static irqreturn_t e1000_intr_msix(int __always_unused irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->msix_entries) {
+		int vector, msix_irq;
+
+		vector = 0;
+		msix_irq = adapter->msix_entries[vector].vector;
+		if (disable_hardirq(msix_irq))
+			e1000_intr_msix_rx(msix_irq, netdev);
+		enable_irq(msix_irq);
+
+		vector++;
+		msix_irq = adapter->msix_entries[vector].vector;
+		if (disable_hardirq(msix_irq))
+			e1000_intr_msix_tx(msix_irq, netdev);
+		enable_irq(msix_irq);
+
+		vector++;
+		msix_irq = adapter->msix_entries[vector].vector;
+		if (disable_hardirq(msix_irq))
+			e1000_msix_other(msix_irq, netdev);
+		enable_irq(msix_irq);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * e1000_netpoll
+ * @netdev: network interface device structure
+ *
+ * Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+static void e1000_netpoll(struct net_device *netdev)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	switch (adapter->int_mode) {
+	case E1000E_INT_MODE_MSIX:
+		e1000_intr_msix(adapter->pdev->irq, netdev);
+		break;
+	case E1000E_INT_MODE_MSI:
+		if (disable_hardirq(adapter->pdev->irq))
+			e1000_intr_msi(adapter->pdev->irq, netdev);
+		enable_irq(adapter->pdev->irq);
+		break;
+	default:		/* E1000E_INT_MODE_LEGACY */
+		if (disable_hardirq(adapter->pdev->irq))
+			e1000_intr(adapter->pdev->irq, netdev);
+		enable_irq(adapter->pdev->irq);
+		break;
+	}
+}
+#endif
+
+/**
+ * e1000_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t state)
+{
+	e1000e_pm_freeze(&pdev->dev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * e1000_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+ * resembles the first-half of the e1000e_pm_resume routine.
+ */
+static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 aspm_disable_flag = 0;
+	int err;
+	pci_ers_result_t result;
+
+	if (adapter->flags2 & FLAG2_DISABLE_ASPM_L0S)
+		aspm_disable_flag = PCIE_LINK_STATE_L0S;
+	if (adapter->flags2 & FLAG2_DISABLE_ASPM_L1)
+		aspm_disable_flag |= PCIE_LINK_STATE_L1;
+	if (aspm_disable_flag)
+		e1000e_disable_aspm_locked(pdev, aspm_disable_flag);
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"Cannot re-enable PCI device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pdev->state_saved = true;
+		pci_restore_state(pdev);
+		pci_set_master(pdev);
+
+		pci_enable_wake(pdev, PCI_D3hot, 0);
+		pci_enable_wake(pdev, PCI_D3cold, 0);
+
+		e1000e_reset(adapter);
+		ew32(WUS, ~0);
+		result = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	return result;
+}
+
+/**
+ * e1000_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+ * second-half of the e1000e_pm_resume routine.
+ */
+static void e1000_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	e1000_init_manageability_pt(adapter);
+
+	e1000e_pm_thaw(&pdev->dev);
+
+	/* If the controller has AMT, do not set DRV_LOAD until the interface
+	 * is up.  For all other cases, let the f/w know that the h/w is now
+	 * under the control of the driver.
+	 */
+	if (!(adapter->flags & FLAG_HAS_AMT))
+		e1000e_get_hw_control(adapter);
+}
+
+static void e1000_print_device_info(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 ret_val;
+	u8 pba_str[E1000_PBANUM_LENGTH];
+
+	/* print bus type/speed/width info */
+	e_info("(PCI Express:2.5GT/s:%s) %pM\n",
+	       /* bus width */
+	       ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
+		"Width x1"),
+	       /* MAC address */
+	       netdev->dev_addr);
+	e_info("Intel(R) PRO/%s Network Connection\n",
+	       (hw->phy.type == e1000_phy_ife) ? "10/100" : "1000");
+	ret_val = e1000_read_pba_string_generic(hw, pba_str,
+						E1000_PBANUM_LENGTH);
+	if (ret_val)
+		strscpy((char *)pba_str, "Unknown", sizeof(pba_str));
+	e_info("MAC: %d, PHY: %d, PBA No: %s\n",
+	       hw->mac.type, hw->phy.type, pba_str);
+}
+
+static void e1000_eeprom_checks(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int ret_val;
+	u16 buf = 0;
+
+	if (hw->mac.type != e1000_82573)
+		return;
+
+	ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &buf);
+	le16_to_cpus(&buf);
+	if (!ret_val && (!(buf & BIT(0)))) {
+		/* Deep Smart Power Down (DSPD) */
+		dev_warn(&adapter->pdev->dev,
+			 "Warning: detected DSPD enabled in EEPROM\n");
+	}
+}
+
+static netdev_features_t e1000_fix_features(struct net_device *netdev,
+					    netdev_features_t features)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Jumbo frame workaround on 82579 and newer requires CRC be stripped */
+	if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN))
+		features &= ~NETIF_F_RXFCS;
+
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
+	return features;
+}
+
+static int e1000_set_features(struct net_device *netdev,
+			      netdev_features_t features)
+{
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+	netdev_features_t changed = features ^ netdev->features;
+
+	if (changed & (NETIF_F_TSO | NETIF_F_TSO6))
+		adapter->flags |= FLAG_TSO_FORCE;
+
+	if (!(changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX |
+			 NETIF_F_RXCSUM | NETIF_F_RXHASH | NETIF_F_RXFCS |
+			 NETIF_F_RXALL)))
+		return 0;
+
+	if (changed & NETIF_F_RXFCS) {
+		if (features & NETIF_F_RXFCS) {
+			adapter->flags2 &= ~FLAG2_CRC_STRIPPING;
+		} else {
+			/* We need to take it back to defaults, which might mean
+			 * stripping is still disabled at the adapter level.
+			 */
+			if (adapter->flags2 & FLAG2_DFLT_CRC_STRIPPING)
+				adapter->flags2 |= FLAG2_CRC_STRIPPING;
+			else
+				adapter->flags2 &= ~FLAG2_CRC_STRIPPING;
+		}
+	}
+
+	netdev->features = features;
+
+	if (netif_running(netdev))
+		e1000e_reinit_locked(adapter);
+	else
+		e1000e_reset(adapter);
+
+	return 1;
+}
+
+static const struct net_device_ops e1000e_netdev_ops = {
+	.ndo_open		= e1000e_open,
+	.ndo_stop		= e1000e_close,
+	.ndo_start_xmit		= e1000_xmit_frame,
+	.ndo_get_stats64	= e1000e_get_stats64,
+	.ndo_set_rx_mode	= e1000e_set_rx_mode,
+	.ndo_set_mac_address	= e1000_set_mac,
+	.ndo_change_mtu		= e1000_change_mtu,
+	.ndo_eth_ioctl		= e1000_ioctl,
+	.ndo_tx_timeout		= e1000_tx_timeout,
+	.ndo_validate_addr	= eth_validate_addr,
+
+	.ndo_vlan_rx_add_vid	= e1000_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= e1000_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= e1000_netpoll,
+#endif
+	.ndo_set_features = e1000_set_features,
+	.ndo_fix_features = e1000_fix_features,
+	.ndo_features_check	= passthru_features_check,
+};
+
+/**
+ * e1000_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in e1000_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * e1000_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct e1000_adapter *adapter;
+	struct e1000_hw *hw;
+	const struct e1000_info *ei = e1000_info_tbl[ent->driver_data];
+	resource_size_t mmio_start, mmio_len;
+	resource_size_t flash_start, flash_len;
+	static int cards_found;
+	u16 aspm_disable_flag = 0;
+	u16 eeprom_data = 0;
+	u16 eeprom_apme_mask = E1000_EEPROM_APME;
+	int bars, i, err;
+	s32 ret_val = 0;
+
+	if (ei->flags2 & FLAG2_DISABLE_ASPM_L0S)
+		aspm_disable_flag = PCIE_LINK_STATE_L0S;
+	if (ei->flags2 & FLAG2_DISABLE_ASPM_L1)
+		aspm_disable_flag |= PCIE_LINK_STATE_L1;
+	if (aspm_disable_flag)
+		e1000e_disable_aspm(pdev, aspm_disable_flag);
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"No usable DMA configuration, aborting\n");
+		goto err_dma;
+	}
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	err = pci_request_selected_regions_exclusive(pdev, bars,
+						     e1000e_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_set_master(pdev);
+	/* PCI config space info */
+	err = pci_save_state(pdev);
+	if (err)
+		goto err_alloc_etherdev;
+
+	err = -ENOMEM;
+	netdev = alloc_etherdev(sizeof(struct e1000_adapter));
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	netdev->irq = pdev->irq;
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	hw = &adapter->hw;
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	adapter->ei = ei;
+	adapter->pba = ei->pba;
+	adapter->flags = ei->flags;
+	adapter->flags2 = ei->flags2;
+	adapter->hw.adapter = adapter;
+	adapter->hw.mac.type = ei->mac;
+	adapter->max_hw_frame_size = ei->max_hw_frame_size;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	mmio_start = pci_resource_start(pdev, 0);
+	mmio_len = pci_resource_len(pdev, 0);
+
+	err = -EIO;
+	adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
+	if (!adapter->hw.hw_addr)
+		goto err_ioremap;
+
+	if ((adapter->flags & FLAG_HAS_FLASH) &&
+	    (pci_resource_flags(pdev, 1) & IORESOURCE_MEM) &&
+	    (hw->mac.type < e1000_pch_spt)) {
+		flash_start = pci_resource_start(pdev, 1);
+		flash_len = pci_resource_len(pdev, 1);
+		adapter->hw.flash_address = ioremap(flash_start, flash_len);
+		if (!adapter->hw.flash_address)
+			goto err_flashmap;
+	}
+
+	/* Set default EEE advertisement */
+	if (adapter->flags2 & FLAG2_HAS_EEE)
+		adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
+
+	/* construct the net_device struct */
+	netdev->netdev_ops = &e1000e_netdev_ops;
+	e1000e_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+	netif_napi_add(netdev, &adapter->napi, e1000e_poll);
+	strscpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
+
+	netdev->mem_start = mmio_start;
+	netdev->mem_end = mmio_start + mmio_len;
+
+	adapter->bd_number = cards_found++;
+
+	e1000e_check_options(adapter);
+
+	/* setup adapter struct */
+	err = e1000_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
+	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
+	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
+
+	err = ei->get_variants(adapter);
+	if (err)
+		goto err_hw_init;
+
+	if ((adapter->flags & FLAG_IS_ICH) &&
+	    (adapter->flags & FLAG_READ_ONLY_NVM) &&
+	    (hw->mac.type < e1000_pch_spt))
+		e1000e_write_protect_nvm_ich8lan(&adapter->hw);
+
+	hw->mac.ops.get_bus_info(&adapter->hw);
+
+	adapter->hw.phy.autoneg_wait_to_complete = 0;
+
+	/* Copper options */
+	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
+		adapter->hw.phy.mdix = AUTO_ALL_MODES;
+		adapter->hw.phy.disable_polarity_correction = 0;
+		adapter->hw.phy.ms_type = e1000_ms_hw_default;
+	}
+
+	if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw))
+		dev_info(&pdev->dev,
+			 "PHY reset is blocked due to SOL/IDER session.\n");
+
+	/* Set initial default active device features */
+	netdev->features = (NETIF_F_SG |
+			    NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_TX |
+			    NETIF_F_TSO |
+			    NETIF_F_TSO6 |
+			    NETIF_F_RXHASH |
+			    NETIF_F_RXCSUM |
+			    NETIF_F_HW_CSUM);
+
+	/* disable TSO for pcie and 10/100 speeds to avoid
+	 * some hardware issues and for i219 to fix transfer
+	 * speed being capped at 60%
+	 */
+	if (!(adapter->flags & FLAG_TSO_FORCE)) {
+		switch (adapter->link_speed) {
+		case SPEED_10:
+		case SPEED_100:
+			e_info("10/100 speed: disabling TSO\n");
+			netdev->features &= ~NETIF_F_TSO;
+			netdev->features &= ~NETIF_F_TSO6;
+			break;
+		case SPEED_1000:
+			netdev->features |= NETIF_F_TSO;
+			netdev->features |= NETIF_F_TSO6;
+			break;
+		default:
+			/* oops */
+			break;
+		}
+		if (hw->mac.type == e1000_pch_spt) {
+			netdev->features &= ~NETIF_F_TSO;
+			netdev->features &= ~NETIF_F_TSO6;
+		}
+	}
+
+	/* Set user-changeable features (subset of all device features) */
+	netdev->hw_features = netdev->features;
+	netdev->hw_features |= NETIF_F_RXFCS;
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+	netdev->hw_features |= NETIF_F_RXALL;
+
+	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER)
+		netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	netdev->vlan_features |= (NETIF_F_SG |
+				  NETIF_F_TSO |
+				  NETIF_F_TSO6 |
+				  NETIF_F_HW_CSUM);
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	netdev->features |= NETIF_F_HIGHDMA;
+	netdev->vlan_features |= NETIF_F_HIGHDMA;
+
+	/* MTU range: 68 - max_hw_frame_size */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = adapter->max_hw_frame_size -
+			  (VLAN_ETH_HLEN + ETH_FCS_LEN);
+
+	if (e1000e_enable_mng_pass_thru(&adapter->hw))
+		adapter->flags |= FLAG_MNG_PT_ENABLED;
+
+	/* before reading the NVM, reset the controller to
+	 * put the device in a known good starting state
+	 */
+	adapter->hw.mac.ops.reset_hw(&adapter->hw);
+
+	/* systems with ASPM and others may see the checksum fail on the first
+	 * attempt. Let's give it a few tries
+	 */
+	for (i = 0;; i++) {
+		if (e1000_validate_nvm_checksum(&adapter->hw) >= 0)
+			break;
+		if (i == 2) {
+			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
+			err = -EIO;
+			goto err_eeprom;
+		}
+	}
+
+	e1000_eeprom_checks(adapter);
+
+	/* copy the MAC address */
+	if (e1000e_read_mac_addr(&adapter->hw))
+		dev_err(&pdev->dev,
+			"NVM Read Error while reading MAC address\n");
+
+	eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		dev_err(&pdev->dev, "Invalid MAC Address: %pM\n",
+			netdev->dev_addr);
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+	timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
+
+	INIT_WORK(&adapter->reset_task, e1000_reset_task);
+	INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
+	INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
+	INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
+	INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang);
+
+	/* Initialize link parameters. User can change them with ethtool */
+	adapter->hw.mac.autoneg = 1;
+	adapter->fc_autoneg = true;
+	adapter->hw.fc.requested_mode = e1000_fc_default;
+	adapter->hw.fc.current_mode = e1000_fc_default;
+	adapter->hw.phy.autoneg_advertised = 0x2f;
+
+	/* Initial Wake on LAN setting - If APM wake is enabled in
+	 * the EEPROM, enable the ACPI Magic Packet filter
+	 */
+	if (adapter->flags & FLAG_APME_IN_WUC) {
+		/* APME bit in EEPROM is mapped to WUC.APME */
+		eeprom_data = er32(WUC);
+		eeprom_apme_mask = E1000_WUC_APME;
+		if ((hw->mac.type > e1000_ich10lan) &&
+		    (eeprom_data & E1000_WUC_PHY_WAKE))
+			adapter->flags2 |= FLAG2_HAS_PHY_WAKEUP;
+	} else if (adapter->flags & FLAG_APME_IN_CTRL3) {
+		if (adapter->flags & FLAG_APME_CHECK_PORT_B &&
+		    (adapter->hw.bus.func == 1))
+			ret_val = e1000_read_nvm(&adapter->hw,
+					      NVM_INIT_CONTROL3_PORT_B,
+					      1, &eeprom_data);
+		else
+			ret_val = e1000_read_nvm(&adapter->hw,
+					      NVM_INIT_CONTROL3_PORT_A,
+					      1, &eeprom_data);
+	}
+
+	/* fetch WoL from EEPROM */
+	if (ret_val)
+		e_dbg("NVM read error getting WoL initial values: %d\n", ret_val);
+	else if (eeprom_data & eeprom_apme_mask)
+		adapter->eeprom_wol |= E1000_WUFC_MAG;
+
+	/* now that we have the eeprom settings, apply the special cases
+	 * where the eeprom may be wrong or the board simply won't support
+	 * wake on lan on a particular port
+	 */
+	if (!(adapter->flags & FLAG_HAS_WOL))
+		adapter->eeprom_wol = 0;
+
+	/* initialize the wol settings based on the eeprom settings */
+	adapter->wol = adapter->eeprom_wol;
+
+	/* make sure adapter isn't asleep if manageability is enabled */
+	if (adapter->wol || (adapter->flags & FLAG_MNG_PT_ENABLED) ||
+	    (hw->mac.ops.check_mng_mode(hw)))
+		device_wakeup_enable(&pdev->dev);
+
+	/* save off EEPROM version number */
+	ret_val = e1000_read_nvm(&adapter->hw, 5, 1, &adapter->eeprom_vers);
+
+	if (ret_val) {
+		e_dbg("NVM read error getting EEPROM version: %d\n", ret_val);
+		adapter->eeprom_vers = 0;
+	}
+
+	/* init PTP hardware clock */
+	e1000e_ptp_init(adapter);
+
+	/* reset the hardware with the new settings */
+	e1000e_reset(adapter);
+
+	/* If the controller has AMT, do not set DRV_LOAD until the interface
+	 * is up.  For all other cases, let the f/w know that the h/w is now
+	 * under the control of the driver.
+	 */
+	if (!(adapter->flags & FLAG_HAS_AMT))
+		e1000e_get_hw_control(adapter);
+
+	if (hw->mac.type >= e1000_pch_cnp)
+		adapter->flags2 |= FLAG2_ENABLE_S0IX_FLOWS;
+
+	strscpy(netdev->name, "eth%d", sizeof(netdev->name));
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	e1000_print_device_info(adapter);
+
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
+
+	if (pci_dev_run_wake(pdev))
+		pm_runtime_put_noidle(&pdev->dev);
+
+	return 0;
+
+err_register:
+	if (!(adapter->flags & FLAG_HAS_AMT))
+		e1000e_release_hw_control(adapter);
+err_eeprom:
+	if (hw->phy.ops.check_reset_block && !hw->phy.ops.check_reset_block(hw))
+		e1000_phy_hw_reset(&adapter->hw);
+err_hw_init:
+	kfree(adapter->tx_ring);
+	kfree(adapter->rx_ring);
+err_sw_init:
+	if ((adapter->hw.flash_address) && (hw->mac.type < e1000_pch_spt))
+		iounmap(adapter->hw.flash_address);
+	e1000e_reset_interrupt_capability(adapter);
+err_flashmap:
+	iounmap(adapter->hw.hw_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_mem_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * e1000_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * e1000_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void e1000_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct e1000_adapter *adapter = netdev_priv(netdev);
+
+	e1000e_ptp_remove(adapter);
+
+	/* The timers may be rescheduled, so explicitly disable them
+	 * from being rescheduled.
+	 */
+	set_bit(__E1000_DOWN, &adapter->state);
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	cancel_work_sync(&adapter->reset_task);
+	cancel_work_sync(&adapter->watchdog_task);
+	cancel_work_sync(&adapter->downshift_task);
+	cancel_work_sync(&adapter->update_phy_task);
+	cancel_work_sync(&adapter->print_hang_task);
+
+	if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
+		cancel_work_sync(&adapter->tx_hwtstamp_work);
+		if (adapter->tx_hwtstamp_skb) {
+			dev_consume_skb_any(adapter->tx_hwtstamp_skb);
+			adapter->tx_hwtstamp_skb = NULL;
+		}
+	}
+
+	unregister_netdev(netdev);
+
+	if (pci_dev_run_wake(pdev))
+		pm_runtime_get_noresume(&pdev->dev);
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	e1000e_release_hw_control(adapter);
+
+	e1000e_reset_interrupt_capability(adapter);
+	kfree(adapter->tx_ring);
+	kfree(adapter->rx_ring);
+
+	iounmap(adapter->hw.hw_addr);
+	if ((adapter->hw.flash_address) &&
+	    (adapter->hw.mac.type < e1000_pch_spt))
+		iounmap(adapter->hw.flash_address);
+	pci_release_mem_regions(pdev);
+
+	free_netdev(netdev);
+
+	pci_disable_device(pdev);
+}
+
+/* PCI Error Recovery (ERS) */
+static const struct pci_error_handlers e1000_err_handler = {
+	.error_detected = e1000_io_error_detected,
+	.slot_reset = e1000_io_slot_reset,
+	.resume = e1000_io_resume,
+};
+
+static const struct pci_device_id e1000_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP),
+	  board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER), board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES), board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL), board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD), board_82571 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER), board_82571 },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI), board_82572 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_COPPER), board_82572 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_FIBER), board_82572 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_SERDES), board_82572 },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E), board_82573 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E_IAMT), board_82573 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573L), board_82573 },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82574L), board_82574 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82574LA), board_82574 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82583V), board_82583 },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_DPT),
+	  board_80003es2lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_SPT),
+	  board_80003es2lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_DPT),
+	  board_80003es2lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_SPT),
+	  board_80003es2lan },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE), board_ich8lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_G), board_ich8lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_GT), board_ich8lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_AMT), board_ich8lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_C), board_ich8lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M), board_ich8lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M_AMT), board_ich8lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_82567V_3), board_ich8lan },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_G), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_GT), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_AMT), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_C), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_BM), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_AMT), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V), board_ich9lan },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LM), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LF), board_ich9lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_V), board_ich9lan },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LM), board_ich10lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LF), board_ich10lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_V), board_ich10lan },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_M_HV_LM), board_pchlan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_M_HV_LC), board_pchlan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_D_HV_DM), board_pchlan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_D_HV_DC), board_pchlan },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH2_LV_LM), board_pch2lan },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH2_LV_V), board_pch2lan },
+
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_LM), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_V), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_I218_LM2), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_I218_V2), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_I218_LM3), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_I218_V3), board_pch_lpt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM2), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V2), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LBG_I219_LM3), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM4), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V4), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_LM5), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_SPT_I219_V5), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_LM6), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_V6), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_LM7), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CNP_I219_V7), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM8), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V8), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_LM9), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ICP_I219_V9), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM10), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V10), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM11), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_tgp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_tgp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_tgp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_adp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_LM24), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ARL_I219_V24), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM25), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V25), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM26), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V26), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_LM27), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_PTP_I219_V27), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_LM29), board_pch_mtp },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_NVL_I219_V29), board_pch_mtp },
+
+	{ 0, 0, 0, 0, 0, 0, 0 }	/* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
+
+static const struct dev_pm_ops e1000_pm_ops = {
+#ifdef CONFIG_PM_SLEEP
+	.prepare	= e1000e_pm_prepare,
+	.suspend	= e1000e_pm_suspend,
+	.resume		= e1000e_pm_resume,
+	.freeze		= e1000e_pm_freeze,
+	.thaw		= e1000e_pm_thaw,
+	.poweroff	= e1000e_pm_suspend,
+	.restore	= e1000e_pm_resume,
+#endif
+	SET_RUNTIME_PM_OPS(e1000e_pm_runtime_suspend, e1000e_pm_runtime_resume,
+			   e1000e_pm_runtime_idle)
+};
+
+/* PCI Device API Driver */
+static struct pci_driver e1000_driver = {
+	.name     = e1000e_driver_name,
+	.id_table = e1000_pci_tbl,
+	.probe    = e1000_probe,
+	.remove   = e1000_remove,
+	.driver   = {
+		.pm = &e1000_pm_ops,
+	},
+	.shutdown = e1000_shutdown,
+	.err_handler = &e1000_err_handler
+};
+
+/**
+ * e1000_init_module - Driver Registration Routine
+ *
+ * e1000_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init e1000_init_module(void)
+{
+	pr_info("Intel(R) PRO/1000 Network Driver\n");
+	pr_info("Copyright(c) 1999 - 2015 Intel Corporation.\n");
+
+	return pci_register_driver(&e1000_driver);
+}
+module_init(e1000_init_module);
+
+/**
+ * e1000_exit_module - Driver Exit Cleanup Routine
+ *
+ * e1000_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit e1000_exit_module(void)
+{
+	pci_unregister_driver(&e1000_driver);
+}
+module_exit(e1000_exit_module);
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
+MODULE_LICENSE("GPL v2");
+
+/* netdev.c */
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c
new file mode 100644
index 0000000000..e609f4df86
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/nvm.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "e1000.h"
+
+/**
+ *  e1000_raise_eec_clk - Raise EEPROM clock
+ *  @hw: pointer to the HW structure
+ *  @eecd: pointer to the EEPROM
+ *
+ *  Enable/Raise the EEPROM clock bit.
+ **/
+static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	*eecd = *eecd | E1000_EECD_SK;
+	ew32(EECD, *eecd);
+	e1e_flush();
+	udelay(hw->nvm.delay_usec);
+}
+
+/**
+ *  e1000_lower_eec_clk - Lower EEPROM clock
+ *  @hw: pointer to the HW structure
+ *  @eecd: pointer to the EEPROM
+ *
+ *  Clear/Lower the EEPROM clock bit.
+ **/
+static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	*eecd = *eecd & ~E1000_EECD_SK;
+	ew32(EECD, *eecd);
+	e1e_flush();
+	udelay(hw->nvm.delay_usec);
+}
+
+/**
+ *  e1000_shift_out_eec_bits - Shift data bits our to the EEPROM
+ *  @hw: pointer to the HW structure
+ *  @data: data to send to the EEPROM
+ *  @count: number of bits to shift out
+ *
+ *  We need to shift 'count' bits out to the EEPROM.  So, the value in the
+ *  "data" parameter will be shifted out to the EEPROM one bit at a time.
+ *  In order to do this, "data" must be broken down into bits.
+ **/
+static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = er32(EECD);
+	u32 mask;
+
+	mask = BIT(count - 1);
+	if (nvm->type == e1000_nvm_eeprom_spi)
+		eecd |= E1000_EECD_DO;
+
+	do {
+		eecd &= ~E1000_EECD_DI;
+
+		if (data & mask)
+			eecd |= E1000_EECD_DI;
+
+		ew32(EECD, eecd);
+		e1e_flush();
+
+		udelay(nvm->delay_usec);
+
+		e1000_raise_eec_clk(hw, &eecd);
+		e1000_lower_eec_clk(hw, &eecd);
+
+		mask >>= 1;
+	} while (mask);
+
+	eecd &= ~E1000_EECD_DI;
+	ew32(EECD, eecd);
+}
+
+/**
+ *  e1000_shift_in_eec_bits - Shift data bits in from the EEPROM
+ *  @hw: pointer to the HW structure
+ *  @count: number of bits to shift in
+ *
+ *  In order to read a register from the EEPROM, we need to shift 'count' bits
+ *  in from the EEPROM.  Bits are "shifted in" by raising the clock input to
+ *  the EEPROM (setting the SK bit), and then reading the value of the data out
+ *  "DO" bit.  During this "shifting in" process the data in "DI" bit should
+ *  always be clear.
+ **/
+static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
+{
+	u32 eecd;
+	u32 i;
+	u16 data;
+
+	eecd = er32(EECD);
+	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+	data = 0;
+
+	for (i = 0; i < count; i++) {
+		data <<= 1;
+		e1000_raise_eec_clk(hw, &eecd);
+
+		eecd = er32(EECD);
+
+		eecd &= ~E1000_EECD_DI;
+		if (eecd & E1000_EECD_DO)
+			data |= 1;
+
+		e1000_lower_eec_clk(hw, &eecd);
+	}
+
+	return data;
+}
+
+/**
+ *  e1000e_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ *  @hw: pointer to the HW structure
+ *  @ee_reg: EEPROM flag for polling
+ *
+ *  Polls the EEPROM status bit for either read or write completion based
+ *  upon the value of 'ee_reg'.
+ **/
+s32 e1000e_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
+{
+	u32 attempts = 100000;
+	u32 i, reg = 0;
+
+	for (i = 0; i < attempts; i++) {
+		if (ee_reg == E1000_NVM_POLL_READ)
+			reg = er32(EERD);
+		else
+			reg = er32(EEWR);
+
+		if (reg & E1000_NVM_RW_REG_DONE)
+			return 0;
+
+		udelay(5);
+	}
+
+	return -E1000_ERR_NVM;
+}
+
+/**
+ *  e1000e_acquire_nvm - Generic request for access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ *  Return successful if access grant bit set, else clear the request for
+ *  EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+s32 e1000e_acquire_nvm(struct e1000_hw *hw)
+{
+	u32 eecd = er32(EECD);
+	s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
+
+	ew32(EECD, eecd | E1000_EECD_REQ);
+	eecd = er32(EECD);
+
+	while (timeout) {
+		if (eecd & E1000_EECD_GNT)
+			break;
+		udelay(5);
+		eecd = er32(EECD);
+		timeout--;
+	}
+
+	if (!timeout) {
+		eecd &= ~E1000_EECD_REQ;
+		ew32(EECD, eecd);
+		e_dbg("Could not acquire NVM grant\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_standby_nvm - Return EEPROM to standby state
+ *  @hw: pointer to the HW structure
+ *
+ *  Return the EEPROM to a standby state.
+ **/
+static void e1000_standby_nvm(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = er32(EECD);
+
+	if (nvm->type == e1000_nvm_eeprom_spi) {
+		/* Toggle CS to flush commands */
+		eecd |= E1000_EECD_CS;
+		ew32(EECD, eecd);
+		e1e_flush();
+		udelay(nvm->delay_usec);
+		eecd &= ~E1000_EECD_CS;
+		ew32(EECD, eecd);
+		e1e_flush();
+		udelay(nvm->delay_usec);
+	}
+}
+
+/**
+ *  e1000_stop_nvm - Terminate EEPROM command
+ *  @hw: pointer to the HW structure
+ *
+ *  Terminates the current command by inverting the EEPROM's chip select pin.
+ **/
+static void e1000_stop_nvm(struct e1000_hw *hw)
+{
+	u32 eecd;
+
+	eecd = er32(EECD);
+	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
+		/* Pull CS high */
+		eecd |= E1000_EECD_CS;
+		e1000_lower_eec_clk(hw, &eecd);
+	}
+}
+
+/**
+ *  e1000e_release_nvm - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ **/
+void e1000e_release_nvm(struct e1000_hw *hw)
+{
+	u32 eecd;
+
+	e1000_stop_nvm(hw);
+
+	eecd = er32(EECD);
+	eecd &= ~E1000_EECD_REQ;
+	ew32(EECD, eecd);
+}
+
+/**
+ *  e1000_ready_nvm_eeprom - Prepares EEPROM for read/write
+ *  @hw: pointer to the HW structure
+ *
+ *  Setups the EEPROM for reading and writing.
+ **/
+static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = er32(EECD);
+	u8 spi_stat_reg;
+
+	if (nvm->type == e1000_nvm_eeprom_spi) {
+		u16 timeout = NVM_MAX_RETRY_SPI;
+
+		/* Clear SK and CS */
+		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+		ew32(EECD, eecd);
+		e1e_flush();
+		udelay(1);
+
+		/* Read "Status Register" repeatedly until the LSB is cleared.
+		 * The EEPROM will signal that the command has been completed
+		 * by clearing bit 0 of the internal status register.  If it's
+		 * not cleared within 'timeout', then error out.
+		 */
+		while (timeout) {
+			e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
+						 hw->nvm.opcode_bits);
+			spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8);
+			if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
+				break;
+
+			udelay(5);
+			e1000_standby_nvm(hw);
+			timeout--;
+		}
+
+		if (!timeout) {
+			e_dbg("SPI NVM Status error\n");
+			return -E1000_ERR_NVM;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_read_nvm_eerd - Reads EEPROM using EERD register
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of word in the EEPROM to read
+ *  @words: number of words to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 e1000e_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i, eerd = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * too many words for the offset, and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		e_dbg("nvm parameter(s) out of bounds\n");
+		return -E1000_ERR_NVM;
+	}
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset + i) << E1000_NVM_RW_ADDR_SHIFT) +
+		    E1000_NVM_RW_REG_START;
+
+		ew32(EERD, eerd);
+		ret_val = e1000e_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
+		if (ret_val) {
+			e_dbg("NVM read error: %d\n", ret_val);
+			break;
+		}
+
+		data[i] = (er32(EERD) >> E1000_NVM_RW_REG_DATA);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_write_nvm_spi - Write to EEPROM using SPI
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *
+ *  Writes data to EEPROM at offset using SPI interface.
+ *
+ *  If e1000e_update_nvm_checksum is not called after this function , the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	s32 ret_val = -E1000_ERR_NVM;
+	u16 widx = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		e_dbg("nvm parameter(s) out of bounds\n");
+		return -E1000_ERR_NVM;
+	}
+
+	while (widx < words) {
+		u8 write_opcode = NVM_WRITE_OPCODE_SPI;
+
+		ret_val = nvm->ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1000_ready_nvm_eeprom(hw);
+		if (ret_val) {
+			nvm->ops.release(hw);
+			return ret_val;
+		}
+
+		e1000_standby_nvm(hw);
+
+		/* Send the WRITE ENABLE command (8 bit opcode) */
+		e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
+					 nvm->opcode_bits);
+
+		e1000_standby_nvm(hw);
+
+		/* Some SPI eeproms use the 8th address bit embedded in the
+		 * opcode
+		 */
+		if ((nvm->address_bits == 8) && (offset >= 128))
+			write_opcode |= NVM_A8_OPCODE_SPI;
+
+		/* Send the Write command (8-bit opcode + addr) */
+		e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
+		e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
+					 nvm->address_bits);
+
+		/* Loop to allow for up to whole page write of eeprom */
+		while (widx < words) {
+			u16 word_out = data[widx];
+
+			word_out = (word_out >> 8) | (word_out << 8);
+			e1000_shift_out_eec_bits(hw, word_out, 16);
+			widx++;
+
+			if ((((offset + widx) * 2) % nvm->page_size) == 0) {
+				e1000_standby_nvm(hw);
+				break;
+			}
+		}
+		usleep_range(10000, 11000);
+		nvm->ops.release(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_pba_string_generic - Read device part number
+ *  @hw: pointer to the HW structure
+ *  @pba_num: pointer to device part number
+ *  @pba_num_size: size of part number buffer
+ *
+ *  Reads the product board assembly (PBA) number from the EEPROM and stores
+ *  the value in pba_num.
+ **/
+s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+				  u32 pba_num_size)
+{
+	s32 ret_val;
+	u16 nvm_data;
+	u16 pba_ptr;
+	u16 offset;
+	u16 length;
+
+	if (pba_num == NULL) {
+		e_dbg("PBA string buffer was null\n");
+		return -E1000_ERR_INVALID_ARGUMENT;
+	}
+
+	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	ret_val = e1000_read_nvm(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	/* if nvm_data is not ptr guard the PBA must be in legacy format which
+	 * means pba_ptr is actually our second data word for the PBA number
+	 * and we can decode it into an ascii string
+	 */
+	if (nvm_data != NVM_PBA_PTR_GUARD) {
+		e_dbg("NVM PBA number is not stored as string\n");
+
+		/* make sure callers buffer is big enough to store the PBA */
+		if (pba_num_size < E1000_PBANUM_LENGTH) {
+			e_dbg("PBA string buffer too small\n");
+			return E1000_ERR_NO_SPACE;
+		}
+
+		/* extract hex string from data and pba_ptr */
+		pba_num[0] = (nvm_data >> 12) & 0xF;
+		pba_num[1] = (nvm_data >> 8) & 0xF;
+		pba_num[2] = (nvm_data >> 4) & 0xF;
+		pba_num[3] = nvm_data & 0xF;
+		pba_num[4] = (pba_ptr >> 12) & 0xF;
+		pba_num[5] = (pba_ptr >> 8) & 0xF;
+		pba_num[6] = '-';
+		pba_num[7] = 0;
+		pba_num[8] = (pba_ptr >> 4) & 0xF;
+		pba_num[9] = pba_ptr & 0xF;
+
+		/* put a null character on the end of our string */
+		pba_num[10] = '\0';
+
+		/* switch all the data but the '-' to hex char */
+		for (offset = 0; offset < 10; offset++) {
+			if (pba_num[offset] < 0xA)
+				pba_num[offset] += '0';
+			else if (pba_num[offset] < 0x10)
+				pba_num[offset] += 'A' - 0xA;
+		}
+
+		return 0;
+	}
+
+	ret_val = e1000_read_nvm(hw, pba_ptr, 1, &length);
+	if (ret_val) {
+		e_dbg("NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (length == 0xFFFF || length == 0) {
+		e_dbg("NVM PBA number section invalid length\n");
+		return -E1000_ERR_NVM_PBA_SECTION;
+	}
+	/* check if pba_num buffer is big enough */
+	if (pba_num_size < (((u32)length * 2) - 1)) {
+		e_dbg("PBA string buffer too small\n");
+		return -E1000_ERR_NO_SPACE;
+	}
+
+	/* trim pba length from start of string */
+	pba_ptr++;
+	length--;
+
+	for (offset = 0; offset < length; offset++) {
+		ret_val = e1000_read_nvm(hw, pba_ptr + offset, 1, &nvm_data);
+		if (ret_val) {
+			e_dbg("NVM Read Error\n");
+			return ret_val;
+		}
+		pba_num[offset * 2] = (u8)(nvm_data >> 8);
+		pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
+	}
+	pba_num[offset * 2] = '\0';
+
+	return 0;
+}
+
+/**
+ *  e1000_read_mac_addr_generic - Read device MAC address
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the device MAC address from the EEPROM and stores the value.
+ *  Since devices with two ports use the same EEPROM, we increment the
+ *  last bit in the MAC address for the second port.
+ **/
+s32 e1000_read_mac_addr_generic(struct e1000_hw *hw)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = er32(RAH(0));
+	rar_low = er32(RAL(0));
+
+	for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i] = (u8)(rar_low >> (i * 8));
+
+	for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i + 4] = (u8)(rar_high >> (i * 8));
+
+	for (i = 0; i < ETH_ALEN; i++)
+		hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+	return 0;
+}
+
+/**
+ *  e1000e_validate_nvm_checksum_generic - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+		ret_val = e1000_read_nvm(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			e_dbg("NVM Read Error\n");
+			return ret_val;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16)NVM_SUM) {
+		e_dbg("NVM Checksum Invalid\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_update_nvm_checksum_generic - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM.
+ **/
+s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = e1000_read_nvm(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			e_dbg("NVM Read Error while updating checksum.\n");
+			return ret_val;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16)NVM_SUM - checksum;
+	ret_val = e1000_write_nvm(hw, NVM_CHECKSUM_REG, 1, &checksum);
+	if (ret_val)
+		e_dbg("NVM Write Error while updating checksum.\n");
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_reload_nvm_generic - Reloads EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the
+ *  extended control register.
+ **/
+void e1000e_reload_nvm_generic(struct e1000_hw *hw)
+{
+	u32 ctrl_ext;
+
+	usleep_range(10, 20);
+	ctrl_ext = er32(CTRL_EXT);
+	ctrl_ext |= E1000_CTRL_EXT_EE_RST;
+	ew32(CTRL_EXT, ctrl_ext);
+	e1e_flush();
+}
diff --git a/drivers/net/ethernet/intel/e1000e/nvm.h b/drivers/net/ethernet/intel/e1000e/nvm.h
new file mode 100644
index 0000000000..6a30dfea41
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/nvm.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_NVM_H_
+#define _E1000E_NVM_H_
+
+s32 e1000e_acquire_nvm(struct e1000_hw *hw);
+
+s32 e1000e_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg);
+s32 e1000_read_mac_addr_generic(struct e1000_hw *hw);
+s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num,
+				  u32 pba_num_size);
+s32 e1000e_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32 e1000e_valid_led_default(struct e1000_hw *hw, u16 *data);
+s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw);
+s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw);
+void e1000e_release_nvm(struct e1000_hw *hw);
+
+#define E1000_STM_OPCODE	0xDB00
+
+#endif
diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
new file mode 100644
index 0000000000..3132d8f2f2
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/param.c
@@ -0,0 +1,527 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "e1000.h"
+
+/* This is the only thing that needs to be changed to adjust the
+ * maximum number of ports that the driver can manage.
+ */
+#define E1000_MAX_NIC 32
+
+#define OPTION_UNSET   -1
+#define OPTION_DISABLED 0
+#define OPTION_ENABLED  1
+
+#define COPYBREAK_DEFAULT 256
+unsigned int copybreak = COPYBREAK_DEFAULT;
+module_param(copybreak, uint, 0644);
+MODULE_PARM_DESC(copybreak,
+		 "Maximum size of packet that is copied to a new buffer on receive");
+
+/* All parameters are treated the same, as an integer array of values.
+ * This macro just reduces the need to repeat the same declaration code
+ * over and over (plus this helps to avoid typo bugs).
+ */
+#define E1000_PARAM_INIT { [0 ... E1000_MAX_NIC] = OPTION_UNSET }
+#define E1000_PARAM(X, desc)					\
+	static int X[E1000_MAX_NIC+1] = E1000_PARAM_INIT;	\
+	static unsigned int num_##X;				\
+	module_param_array_named(X, X, int, &num_##X, 0);	\
+	MODULE_PARM_DESC(X, desc);
+
+/* Transmit Interrupt Delay in units of 1.024 microseconds
+ * Tx interrupt delay needs to typically be set to something non-zero
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(TxIntDelay, "Transmit Interrupt Delay");
+#define DEFAULT_TIDV 8
+#define MAX_TXDELAY 0xFFFF
+#define MIN_TXDELAY 0
+
+/* Transmit Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
+#define DEFAULT_TADV 32
+#define MAX_TXABSDELAY 0xFFFF
+#define MIN_TXABSDELAY 0
+
+/* Receive Interrupt Delay in units of 1.024 microseconds
+ * hardware will likely hang if you set this to anything but zero.
+ *
+ * Burst variant is used as default if device has FLAG2_DMA_BURST.
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
+#define DEFAULT_RDTR	0
+#define BURST_RDTR	0x20
+#define MAX_RXDELAY 0xFFFF
+#define MIN_RXDELAY 0
+
+/* Receive Absolute Interrupt Delay in units of 1.024 microseconds
+ *
+ * Burst variant is used as default if device has FLAG2_DMA_BURST.
+ *
+ * Valid Range: 0-65535
+ */
+E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
+#define DEFAULT_RADV	8
+#define BURST_RADV	0x20
+#define MAX_RXABSDELAY 0xFFFF
+#define MIN_RXABSDELAY 0
+
+/* Interrupt Throttle Rate (interrupts/sec)
+ *
+ * Valid Range: 100-100000 or one of: 0=off, 1=dynamic, 3=dynamic conservative
+ */
+E1000_PARAM(InterruptThrottleRate, "Interrupt Throttling Rate");
+#define DEFAULT_ITR 3
+#define MAX_ITR 100000
+#define MIN_ITR 100
+
+/* IntMode (Interrupt Mode)
+ *
+ * Valid Range: varies depending on kernel configuration & hardware support
+ *
+ * legacy=0, MSI=1, MSI-X=2
+ *
+ * When MSI/MSI-X support is enabled in kernel-
+ *   Default Value: 2 (MSI-X) when supported by hardware, 1 (MSI) otherwise
+ * When MSI/MSI-X support is not enabled in kernel-
+ *   Default Value: 0 (legacy)
+ *
+ * When a mode is specified that is not allowed/supported, it will be
+ * demoted to the most advanced interrupt mode available.
+ */
+E1000_PARAM(IntMode, "Interrupt Mode");
+
+/* Enable Smart Power Down of the PHY
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 0 (disabled)
+ */
+E1000_PARAM(SmartPowerDownEnable, "Enable PHY smart power down");
+
+/* Enable Kumeran Lock Loss workaround
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 1 (enabled)
+ */
+E1000_PARAM(KumeranLockLoss, "Enable Kumeran lock loss workaround");
+
+/* Write Protect NVM
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 1 (enabled)
+ */
+E1000_PARAM(WriteProtectNVM,
+	    "Write-protect NVM [WARNING: disabling this can lead to corrupted NVM]");
+
+/* Enable CRC Stripping
+ *
+ * Valid Range: 0, 1
+ *
+ * Default Value: 1 (enabled)
+ */
+E1000_PARAM(CrcStripping,
+	    "Enable CRC Stripping, disable if your BMC needs the CRC");
+
+struct e1000_option {
+	enum { enable_option, range_option, list_option } type;
+	const char *name;
+	const char *err;
+	int def;
+	union {
+		/* range_option info */
+		struct {
+			int min;
+			int max;
+		} r;
+		/* list_option info */
+		struct {
+			int nr;
+			struct e1000_opt_list {
+				int i;
+				char *str;
+			} *p;
+		} l;
+	} arg;
+};
+
+static int e1000_validate_option(unsigned int *value,
+				 const struct e1000_option *opt,
+				 struct e1000_adapter *adapter)
+{
+	if (*value == OPTION_UNSET) {
+		*value = opt->def;
+		return 0;
+	}
+
+	switch (opt->type) {
+	case enable_option:
+		switch (*value) {
+		case OPTION_ENABLED:
+			dev_info(&adapter->pdev->dev, "%s Enabled\n",
+				 opt->name);
+			return 0;
+		case OPTION_DISABLED:
+			dev_info(&adapter->pdev->dev, "%s Disabled\n",
+				 opt->name);
+			return 0;
+		}
+		break;
+	case range_option:
+		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
+			dev_info(&adapter->pdev->dev, "%s set to %i\n",
+				 opt->name, *value);
+			return 0;
+		}
+		break;
+	case list_option: {
+		int i;
+		struct e1000_opt_list *ent;
+
+		for (i = 0; i < opt->arg.l.nr; i++) {
+			ent = &opt->arg.l.p[i];
+			if (*value == ent->i) {
+				if (ent->str[0] != '\0')
+					dev_info(&adapter->pdev->dev, "%s\n",
+						 ent->str);
+				return 0;
+			}
+		}
+	}
+		break;
+	default:
+		BUG();
+	}
+
+	dev_info(&adapter->pdev->dev, "Invalid %s value specified (%i) %s\n",
+		 opt->name, *value, opt->err);
+	*value = opt->def;
+	return -1;
+}
+
+/**
+ * e1000e_check_options - Range Checking for Command Line Parameters
+ * @adapter: board private structure
+ *
+ * This routine checks all command line parameters for valid user
+ * input.  If an invalid value is given, or if no user specified
+ * value exists, a default value is used.  The final value is stored
+ * in a variable in the adapter structure.
+ **/
+void e1000e_check_options(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int bd = adapter->bd_number;
+
+	if (bd >= E1000_MAX_NIC) {
+		dev_notice(&adapter->pdev->dev,
+			   "Warning: no configuration for board #%i\n", bd);
+		dev_notice(&adapter->pdev->dev,
+			   "Using defaults for all values\n");
+	}
+
+	/* Transmit Interrupt Delay */
+	{
+		static const struct e1000_option opt = {
+			.type = range_option,
+			.name = "Transmit Interrupt Delay",
+			.err  = "using default of "
+				__MODULE_STRING(DEFAULT_TIDV),
+			.def  = DEFAULT_TIDV,
+			.arg  = { .r = { .min = MIN_TXDELAY,
+					 .max = MAX_TXDELAY } }
+		};
+
+		if (num_TxIntDelay > bd) {
+			adapter->tx_int_delay = TxIntDelay[bd];
+			e1000_validate_option(&adapter->tx_int_delay, &opt,
+					      adapter);
+		} else {
+			adapter->tx_int_delay = opt.def;
+		}
+	}
+	/* Transmit Absolute Interrupt Delay */
+	{
+		static const struct e1000_option opt = {
+			.type = range_option,
+			.name = "Transmit Absolute Interrupt Delay",
+			.err  = "using default of "
+				__MODULE_STRING(DEFAULT_TADV),
+			.def  = DEFAULT_TADV,
+			.arg  = { .r = { .min = MIN_TXABSDELAY,
+					 .max = MAX_TXABSDELAY } }
+		};
+
+		if (num_TxAbsIntDelay > bd) {
+			adapter->tx_abs_int_delay = TxAbsIntDelay[bd];
+			e1000_validate_option(&adapter->tx_abs_int_delay, &opt,
+					      adapter);
+		} else {
+			adapter->tx_abs_int_delay = opt.def;
+		}
+	}
+	/* Receive Interrupt Delay */
+	{
+		static struct e1000_option opt = {
+			.type = range_option,
+			.name = "Receive Interrupt Delay",
+			.err  = "using default of "
+				__MODULE_STRING(DEFAULT_RDTR),
+			.def  = DEFAULT_RDTR,
+			.arg  = { .r = { .min = MIN_RXDELAY,
+					 .max = MAX_RXDELAY } }
+		};
+
+		if (adapter->flags2 & FLAG2_DMA_BURST)
+			opt.def = BURST_RDTR;
+
+		if (num_RxIntDelay > bd) {
+			adapter->rx_int_delay = RxIntDelay[bd];
+			e1000_validate_option(&adapter->rx_int_delay, &opt,
+					      adapter);
+		} else {
+			adapter->rx_int_delay = opt.def;
+		}
+	}
+	/* Receive Absolute Interrupt Delay */
+	{
+		static struct e1000_option opt = {
+			.type = range_option,
+			.name = "Receive Absolute Interrupt Delay",
+			.err  = "using default of "
+				__MODULE_STRING(DEFAULT_RADV),
+			.def  = DEFAULT_RADV,
+			.arg  = { .r = { .min = MIN_RXABSDELAY,
+					 .max = MAX_RXABSDELAY } }
+		};
+
+		if (adapter->flags2 & FLAG2_DMA_BURST)
+			opt.def = BURST_RADV;
+
+		if (num_RxAbsIntDelay > bd) {
+			adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
+			e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
+					      adapter);
+		} else {
+			adapter->rx_abs_int_delay = opt.def;
+		}
+	}
+	/* Interrupt Throttling Rate */
+	{
+		static const struct e1000_option opt = {
+			.type = range_option,
+			.name = "Interrupt Throttling Rate (ints/sec)",
+			.err  = "using default of "
+				__MODULE_STRING(DEFAULT_ITR),
+			.def  = DEFAULT_ITR,
+			.arg  = { .r = { .min = MIN_ITR,
+					 .max = MAX_ITR } }
+		};
+
+		if (num_InterruptThrottleRate > bd) {
+			adapter->itr = InterruptThrottleRate[bd];
+
+			/* Make sure a message is printed for non-special
+			 * values. And in case of an invalid option, display
+			 * warning, use default and go through itr/itr_setting
+			 * adjustment logic below
+			 */
+			if ((adapter->itr > 4) &&
+			    e1000_validate_option(&adapter->itr, &opt, adapter))
+				adapter->itr = opt.def;
+		} else {
+			/* If no option specified, use default value and go
+			 * through the logic below to adjust itr/itr_setting
+			 */
+			adapter->itr = opt.def;
+
+			/* Make sure a message is printed for non-special
+			 * default values
+			 */
+			if (adapter->itr > 4)
+				dev_info(&adapter->pdev->dev,
+					 "%s set to default %d\n", opt.name,
+					 adapter->itr);
+		}
+
+		adapter->itr_setting = adapter->itr;
+		switch (adapter->itr) {
+		case 0:
+			dev_info(&adapter->pdev->dev, "%s turned off\n",
+				 opt.name);
+			break;
+		case 1:
+			dev_info(&adapter->pdev->dev,
+				 "%s set to dynamic mode\n", opt.name);
+			adapter->itr = 20000;
+			break;
+		case 2:
+			dev_info(&adapter->pdev->dev,
+				 "%s Invalid mode - setting default\n",
+				 opt.name);
+			adapter->itr_setting = opt.def;
+			fallthrough;
+		case 3:
+			dev_info(&adapter->pdev->dev,
+				 "%s set to dynamic conservative mode\n",
+				 opt.name);
+			adapter->itr = 20000;
+			break;
+		case 4:
+			dev_info(&adapter->pdev->dev,
+				 "%s set to simplified (2000-8000 ints) mode\n",
+				 opt.name);
+			break;
+		default:
+			/* Save the setting, because the dynamic bits
+			 * change itr.
+			 *
+			 * Clear the lower two bits because
+			 * they are used as control.
+			 */
+			adapter->itr_setting &= ~3;
+			break;
+		}
+	}
+	/* Interrupt Mode */
+	{
+		static struct e1000_option opt = {
+			.type = range_option,
+			.name = "Interrupt Mode",
+#ifndef CONFIG_PCI_MSI
+			.err  = "defaulting to 0 (legacy)",
+			.def  = E1000E_INT_MODE_LEGACY,
+			.arg  = { .r = { .min = 0,
+					 .max = 0 } }
+#endif
+		};
+
+#ifdef CONFIG_PCI_MSI
+		if (adapter->flags & FLAG_HAS_MSIX) {
+			opt.err = kstrdup("defaulting to 2 (MSI-X)",
+					  GFP_KERNEL);
+			opt.def = E1000E_INT_MODE_MSIX;
+			opt.arg.r.max = E1000E_INT_MODE_MSIX;
+		} else {
+			opt.err = kstrdup("defaulting to 1 (MSI)", GFP_KERNEL);
+			opt.def = E1000E_INT_MODE_MSI;
+			opt.arg.r.max = E1000E_INT_MODE_MSI;
+		}
+
+		if (!opt.err) {
+			dev_err(&adapter->pdev->dev,
+				"Failed to allocate memory\n");
+			return;
+		}
+#endif
+
+		if (num_IntMode > bd) {
+			unsigned int int_mode = IntMode[bd];
+
+			e1000_validate_option(&int_mode, &opt, adapter);
+			adapter->int_mode = int_mode;
+		} else {
+			adapter->int_mode = opt.def;
+		}
+
+#ifdef CONFIG_PCI_MSI
+		kfree(opt.err);
+#endif
+	}
+	/* Smart Power Down */
+	{
+		static const struct e1000_option opt = {
+			.type = enable_option,
+			.name = "PHY Smart Power Down",
+			.err  = "defaulting to Disabled",
+			.def  = OPTION_DISABLED
+		};
+
+		if (num_SmartPowerDownEnable > bd) {
+			unsigned int spd = SmartPowerDownEnable[bd];
+
+			e1000_validate_option(&spd, &opt, adapter);
+			if ((adapter->flags & FLAG_HAS_SMART_POWER_DOWN) && spd)
+				adapter->flags |= FLAG_SMART_POWER_DOWN;
+		}
+	}
+	/* CRC Stripping */
+	{
+		static const struct e1000_option opt = {
+			.type = enable_option,
+			.name = "CRC Stripping",
+			.err  = "defaulting to Enabled",
+			.def  = OPTION_ENABLED
+		};
+
+		if (num_CrcStripping > bd) {
+			unsigned int crc_stripping = CrcStripping[bd];
+
+			e1000_validate_option(&crc_stripping, &opt, adapter);
+			if (crc_stripping == OPTION_ENABLED) {
+				adapter->flags2 |= FLAG2_CRC_STRIPPING;
+				adapter->flags2 |= FLAG2_DFLT_CRC_STRIPPING;
+			}
+		} else {
+			adapter->flags2 |= FLAG2_CRC_STRIPPING;
+			adapter->flags2 |= FLAG2_DFLT_CRC_STRIPPING;
+		}
+	}
+	/* Kumeran Lock Loss Workaround */
+	{
+		static const struct e1000_option opt = {
+			.type = enable_option,
+			.name = "Kumeran Lock Loss Workaround",
+			.err  = "defaulting to Enabled",
+			.def  = OPTION_ENABLED
+		};
+		bool enabled = opt.def;
+
+		if (num_KumeranLockLoss > bd) {
+			unsigned int kmrn_lock_loss = KumeranLockLoss[bd];
+
+			e1000_validate_option(&kmrn_lock_loss, &opt, adapter);
+			enabled = kmrn_lock_loss;
+		}
+
+		if (hw->mac.type == e1000_ich8lan)
+			e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw,
+								     enabled);
+	}
+	/* Write-protect NVM */
+	{
+		static const struct e1000_option opt = {
+			.type = enable_option,
+			.name = "Write-protect NVM",
+			.err  = "defaulting to Enabled",
+			.def  = OPTION_ENABLED
+		};
+
+		if (adapter->flags & FLAG_IS_ICH) {
+			if (num_WriteProtectNVM > bd) {
+				unsigned int write_protect_nvm =
+				    WriteProtectNVM[bd];
+				e1000_validate_option(&write_protect_nvm, &opt,
+						      adapter);
+				if (write_protect_nvm)
+					adapter->flags |= FLAG_READ_ONLY_NVM;
+			} else {
+				if (opt.def)
+					adapter->flags |= FLAG_READ_ONLY_NVM;
+			}
+		}
+	}
+}
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
new file mode 100644
index 0000000000..08c3d477dd
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -0,0 +1,3246 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "e1000.h"
+#include <linux/ethtool.h>
+
+static s32 e1000_wait_autoneg(struct e1000_hw *hw);
+static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset,
+					  u16 *data, bool read, bool page_set);
+static u32 e1000_get_phy_addr_for_hv_page(u32 page);
+static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset,
+					  u16 *data, bool read);
+
+/* Cable length tables */
+static const u16 e1000_m88_cable_length_table[] = {
+	0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED
+};
+
+#define M88E1000_CABLE_LENGTH_TABLE_SIZE \
+		ARRAY_SIZE(e1000_m88_cable_length_table)
+
+static const u16 e1000_igp_2_cable_length_table[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3,
+	6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22,
+	26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40,
+	44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61,
+	66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82,
+	87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95,
+	100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121,
+	124
+};
+
+#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \
+		ARRAY_SIZE(e1000_igp_2_cable_length_table)
+
+/**
+ *  e1000e_check_reset_block_generic - Check if PHY reset is blocked
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the PHY management control register and check whether a PHY reset
+ *  is blocked.  If a reset is not blocked return 0, otherwise
+ *  return E1000_BLK_PHY_RESET (12).
+ **/
+s32 e1000e_check_reset_block_generic(struct e1000_hw *hw)
+{
+	u32 manc;
+
+	manc = er32(MANC);
+
+	return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? E1000_BLK_PHY_RESET : 0;
+}
+
+/**
+ *  e1000e_get_phy_id - Retrieve the PHY ID and revision
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
+ *  revision in the hardware structure.
+ **/
+s32 e1000e_get_phy_id(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_id;
+	u16 retry_count = 0;
+
+	if (!phy->ops.read_reg)
+		return 0;
+
+	while (retry_count < 2) {
+		ret_val = e1e_rphy(hw, MII_PHYSID1, &phy_id);
+		if (ret_val)
+			return ret_val;
+
+		phy->id = (u32)(phy_id << 16);
+		usleep_range(20, 40);
+		ret_val = e1e_rphy(hw, MII_PHYSID2, &phy_id);
+		if (ret_val)
+			return ret_val;
+
+		phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+		phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+		if (phy->id != 0 && phy->id != PHY_REVISION_MASK)
+			return 0;
+
+		retry_count++;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_phy_reset_dsp - Reset PHY DSP
+ *  @hw: pointer to the HW structure
+ *
+ *  Reset the digital signal processor.
+ **/
+s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	ret_val = e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
+	if (ret_val)
+		return ret_val;
+
+	return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
+}
+
+/**
+ *  e1000e_read_phy_reg_mdic - Read MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the MDI control register in the PHY at offset and stores the
+ *  information read to data.
+ **/
+s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		e_dbg("PHY Address %d is out of range\n", offset);
+		return -E1000_ERR_PARAM;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+		(E1000_MDIC_OP_READ));
+
+	ew32(MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+		udelay(50);
+		mdic = er32(MDIC);
+		if (mdic & E1000_MDIC_READY)
+			break;
+	}
+	if (!(mdic & E1000_MDIC_READY)) {
+		e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
+		return -E1000_ERR_PHY;
+	}
+	if (mdic & E1000_MDIC_ERROR) {
+		e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+		return -E1000_ERR_PHY;
+	}
+	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+		e_dbg("MDI Read offset error - requested %d, returned %d\n",
+		      offset,
+		      (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+		return -E1000_ERR_PHY;
+	}
+	*data = (u16)mdic;
+
+	/* Allow some time after each MDIC transaction to avoid
+	 * reading duplicate data in the next MDIC transaction.
+	 */
+	if (hw->mac.type == e1000_pch2lan)
+		udelay(100);
+
+	return 0;
+}
+
+/**
+ *  e1000e_write_phy_reg_mdic - Write MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write to register at offset
+ *
+ *  Writes data to MDI control register in the PHY at offset.
+ **/
+s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		e_dbg("PHY Address %d is out of range\n", offset);
+		return -E1000_ERR_PARAM;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = (((u32)data) |
+		(offset << E1000_MDIC_REG_SHIFT) |
+		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+		(E1000_MDIC_OP_WRITE));
+
+	ew32(MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+		udelay(50);
+		mdic = er32(MDIC);
+		if (mdic & E1000_MDIC_READY)
+			break;
+	}
+	if (!(mdic & E1000_MDIC_READY)) {
+		e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
+		return -E1000_ERR_PHY;
+	}
+	if (mdic & E1000_MDIC_ERROR) {
+		e_dbg("MDI Write PHY Red Address %d Error\n", offset);
+		return -E1000_ERR_PHY;
+	}
+	if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) {
+		e_dbg("MDI Write offset error - requested %d, returned %d\n",
+		      offset,
+		      (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT);
+		return -E1000_ERR_PHY;
+	}
+
+	/* Allow some time after each MDIC transaction to avoid
+	 * reading duplicate data in the next MDIC transaction.
+	 */
+	if (hw->mac.type == e1000_pch2lan)
+		udelay(100);
+
+	return 0;
+}
+
+/**
+ *  e1000e_read_phy_reg_m88 - Read m88 PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and storing the retrieved information in data.  Release any acquired
+ *  semaphores before exiting.
+ **/
+s32 e1000e_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					   data);
+
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_write_phy_reg_m88 - Write m88 PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000e_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					    data);
+
+	hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_set_page_igp - Set page as on IGP-like PHY(s)
+ *  @hw: pointer to the HW structure
+ *  @page: page to set (shifted left when necessary)
+ *
+ *  Sets PHY page required for PHY register access.  Assumes semaphore is
+ *  already acquired.  Note, this function sets phy.addr to 1 so the caller
+ *  must set it appropriately (if necessary) after this function returns.
+ **/
+s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page)
+{
+	e_dbg("Setting page 0x%x\n", page);
+
+	hw->phy.addr = 1;
+
+	return e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page);
+}
+
+/**
+ *  __e1000e_read_phy_reg_igp - Read igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and stores the retrieved information in data.  Release any acquired
+ *  semaphores before exiting.
+ **/
+static s32 __e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data,
+				     bool locked)
+{
+	s32 ret_val = 0;
+
+	if (!locked) {
+		if (!hw->phy.ops.acquire)
+			return 0;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG)
+		ret_val = e1000e_write_phy_reg_mdic(hw,
+						    IGP01E1000_PHY_PAGE_SELECT,
+						    (u16)offset);
+	if (!ret_val)
+		ret_val = e1000e_read_phy_reg_mdic(hw,
+						   MAX_PHY_REG_ADDRESS & offset,
+						   data);
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_read_phy_reg_igp - Read igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore then reads the PHY register at offset and stores the
+ *  retrieved information in data.
+ *  Release the acquired semaphore before exiting.
+ **/
+s32 e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000e_read_phy_reg_igp(hw, offset, data, false);
+}
+
+/**
+ *  e1000e_read_phy_reg_igp_locked - Read igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset and stores the retrieved information
+ *  in data.  Assumes semaphore already acquired.
+ **/
+s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000e_read_phy_reg_igp(hw, offset, data, true);
+}
+
+/**
+ *  __e1000e_write_phy_reg_igp - Write igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+static s32 __e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data,
+				      bool locked)
+{
+	s32 ret_val = 0;
+
+	if (!locked) {
+		if (!hw->phy.ops.acquire)
+			return 0;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG)
+		ret_val = e1000e_write_phy_reg_mdic(hw,
+						    IGP01E1000_PHY_PAGE_SELECT,
+						    (u16)offset);
+	if (!ret_val)
+		ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS &
+						    offset, data);
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_write_phy_reg_igp - Write igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000e_write_phy_reg_igp(hw, offset, data, false);
+}
+
+/**
+ *  e1000e_write_phy_reg_igp_locked - Write igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset.
+ *  Assumes semaphore already acquired.
+ **/
+s32 e1000e_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000e_write_phy_reg_igp(hw, offset, data, true);
+}
+
+/**
+ *  __e1000_read_kmrn_reg - Read kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary.  Then reads the PHY register at offset
+ *  using the kumeran interface.  The information retrieved is stored in data.
+ *  Release any acquired semaphores before exiting.
+ **/
+static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data,
+				 bool locked)
+{
+	u32 kmrnctrlsta;
+
+	if (!locked) {
+		s32 ret_val = 0;
+
+		if (!hw->phy.ops.acquire)
+			return 0;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+		       E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN;
+	ew32(KMRNCTRLSTA, kmrnctrlsta);
+	e1e_flush();
+
+	udelay(2);
+
+	kmrnctrlsta = er32(KMRNCTRLSTA);
+	*data = (u16)kmrnctrlsta;
+
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return 0;
+}
+
+/**
+ *  e1000e_read_kmrn_reg -  Read kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore then reads the PHY register at offset using the
+ *  kumeran interface.  The information retrieved is stored in data.
+ *  Release the acquired semaphore before exiting.
+ **/
+s32 e1000e_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_kmrn_reg(hw, offset, data, false);
+}
+
+/**
+ *  e1000e_read_kmrn_reg_locked -  Read kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset using the kumeran interface.  The
+ *  information retrieved is stored in data.
+ *  Assumes semaphore already acquired.
+ **/
+s32 e1000e_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_kmrn_reg(hw, offset, data, true);
+}
+
+/**
+ *  __e1000_write_kmrn_reg - Write kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *  @locked: semaphore has already been acquired or not
+ *
+ *  Acquires semaphore, if necessary.  Then write the data to PHY register
+ *  at the offset using the kumeran interface.  Release any acquired semaphores
+ *  before exiting.
+ **/
+static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data,
+				  bool locked)
+{
+	u32 kmrnctrlsta;
+
+	if (!locked) {
+		s32 ret_val = 0;
+
+		if (!hw->phy.ops.acquire)
+			return 0;
+
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) &
+		       E1000_KMRNCTRLSTA_OFFSET) | data;
+	ew32(KMRNCTRLSTA, kmrnctrlsta);
+	e1e_flush();
+
+	udelay(2);
+
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return 0;
+}
+
+/**
+ *  e1000e_write_kmrn_reg -  Write kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore then writes the data to the PHY register at the offset
+ *  using the kumeran interface.  Release the acquired semaphore before exiting.
+ **/
+s32 e1000e_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_kmrn_reg(hw, offset, data, false);
+}
+
+/**
+ *  e1000e_write_kmrn_reg_locked -  Write kumeran register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Write the data to PHY register at the offset using the kumeran interface.
+ *  Assumes semaphore already acquired.
+ **/
+s32 e1000e_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_kmrn_reg(hw, offset, data, true);
+}
+
+/**
+ *  e1000_set_master_slave_mode - Setup PHY for Master/slave mode
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up Master/slave mode
+ **/
+static s32 e1000_set_master_slave_mode(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	/* Resolve Master/Slave mode */
+	ret_val = e1e_rphy(hw, MII_CTRL1000, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* load defaults for future use */
+	hw->phy.original_ms_type = (phy_data & CTL1000_ENABLE_MASTER) ?
+	    ((phy_data & CTL1000_AS_MASTER) ?
+	     e1000_ms_force_master : e1000_ms_force_slave) : e1000_ms_auto;
+
+	switch (hw->phy.ms_type) {
+	case e1000_ms_force_master:
+		phy_data |= (CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER);
+		break;
+	case e1000_ms_force_slave:
+		phy_data |= CTL1000_ENABLE_MASTER;
+		phy_data &= ~(CTL1000_AS_MASTER);
+		break;
+	case e1000_ms_auto:
+		phy_data &= ~CTL1000_ENABLE_MASTER;
+		fallthrough;
+	default:
+		break;
+	}
+
+	return e1e_wphy(hw, MII_CTRL1000, phy_data);
+}
+
+/**
+ *  e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up Carrier-sense on Transmit and downshift values.
+ **/
+s32 e1000_copper_link_setup_82577(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	/* Enable CRS on Tx. This must be set for half-duplex operation. */
+	ret_val = e1e_rphy(hw, I82577_CFG_REG, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= I82577_CFG_ASSERT_CRS_ON_TX;
+
+	/* Enable downshift */
+	phy_data |= I82577_CFG_ENABLE_DOWNSHIFT;
+
+	ret_val = e1e_wphy(hw, I82577_CFG_REG, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Set MDI/MDIX mode */
+	ret_val = e1e_rphy(hw, I82577_PHY_CTRL_2, &phy_data);
+	if (ret_val)
+		return ret_val;
+	phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK;
+	/* Options:
+	 *   0 - Auto (default)
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 */
+	switch (hw->phy.mdix) {
+	case 1:
+		break;
+	case 2:
+		phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX;
+		break;
+	case 0:
+	default:
+		phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX;
+		break;
+	}
+	ret_val = e1e_wphy(hw, I82577_PHY_CTRL_2, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	return e1000_set_master_slave_mode(hw);
+}
+
+/**
+ *  e1000e_copper_link_setup_m88 - Setup m88 PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up MDI/MDI-X and polarity for m88 PHY's.  If necessary, transmit clock
+ *  and downshift values are set also.
+ **/
+s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+
+	/* Enable CRS on Tx. This must be set for half-duplex operation. */
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* For BM PHY this bit is downshift enable */
+	if (phy->type != e1000_phy_bm)
+		phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+	/* Options:
+	 *   MDI/MDI-X = 0 (default)
+	 *   0 - Auto for all speeds
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+	 */
+	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+	switch (phy->mdix) {
+	case 1:
+		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+		break;
+	case 2:
+		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+		break;
+	case 3:
+		phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+		break;
+	case 0:
+	default:
+		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+		break;
+	}
+
+	/* Options:
+	 *   disable_polarity_correction = 0 (default)
+	 *       Automatic Correction for Reversed Cable Polarity
+	 *   0 - Disabled
+	 *   1 - Enabled
+	 */
+	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+	if (phy->disable_polarity_correction)
+		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+
+	/* Enable downshift on BM (disabled by default) */
+	if (phy->type == e1000_phy_bm) {
+		/* For 82574/82583, first disable then enable downshift */
+		if (phy->id == BME1000_E_PHY_ID_R2) {
+			phy_data &= ~BME1000_PSCR_ENABLE_DOWNSHIFT;
+			ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL,
+					   phy_data);
+			if (ret_val)
+				return ret_val;
+			/* Commit the changes. */
+			ret_val = phy->ops.commit(hw);
+			if (ret_val) {
+				e_dbg("Error committing the PHY changes\n");
+				return ret_val;
+			}
+		}
+
+		phy_data |= BME1000_PSCR_ENABLE_DOWNSHIFT;
+	}
+
+	ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	if ((phy->type == e1000_phy_m88) &&
+	    (phy->revision < E1000_REVISION_4) &&
+	    (phy->id != BME1000_E_PHY_ID_R2)) {
+		/* Force TX_CLK in the Extended PHY Specific Control Register
+		 * to 25MHz clock.
+		 */
+		ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy_data |= M88E1000_EPSCR_TX_CLK_25;
+
+		if ((phy->revision == 2) && (phy->id == M88E1111_I_PHY_ID)) {
+			/* 82573L PHY - set the downshift counter to 5x. */
+			phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
+			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
+		} else {
+			/* Configure Master and Slave downshift values */
+			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+				      M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+		}
+		ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if ((phy->type == e1000_phy_bm) && (phy->id == BME1000_E_PHY_ID_R2)) {
+		/* Set PHY page 0, register 29 to 0x0003 */
+		ret_val = e1e_wphy(hw, 29, 0x0003);
+		if (ret_val)
+			return ret_val;
+
+		/* Set PHY page 0, register 30 to 0x0000 */
+		ret_val = e1e_wphy(hw, 30, 0x0000);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Commit the changes. */
+	if (phy->ops.commit) {
+		ret_val = phy->ops.commit(hw);
+		if (ret_val) {
+			e_dbg("Error committing the PHY changes\n");
+			return ret_val;
+		}
+	}
+
+	if (phy->type == e1000_phy_82578) {
+		ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* 82578 PHY - set the downshift count to 1x. */
+		phy_data |= I82578_EPSCR_DOWNSHIFT_ENABLE;
+		phy_data &= ~I82578_EPSCR_DOWNSHIFT_COUNTER_MASK;
+		ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_copper_link_setup_igp - Setup igp PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
+ *  igp PHY's.
+ **/
+s32 e1000e_copper_link_setup_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	ret_val = e1000_phy_hw_reset(hw);
+	if (ret_val) {
+		e_dbg("Error resetting the PHY.\n");
+		return ret_val;
+	}
+
+	/* Wait 100ms for MAC to configure PHY from NVM settings, to avoid
+	 * timeout issues when LFS is enabled.
+	 */
+	msleep(100);
+
+	/* disable lplu d0 during driver init */
+	if (hw->phy.ops.set_d0_lplu_state) {
+		ret_val = hw->phy.ops.set_d0_lplu_state(hw, false);
+		if (ret_val) {
+			e_dbg("Error Disabling LPLU D0\n");
+			return ret_val;
+		}
+	}
+	/* Configure mdi-mdix settings */
+	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CTRL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+
+	switch (phy->mdix) {
+	case 1:
+		data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+		break;
+	case 2:
+		data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
+		break;
+	case 0:
+	default:
+		data |= IGP01E1000_PSCR_AUTO_MDIX;
+		break;
+	}
+	ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CTRL, data);
+	if (ret_val)
+		return ret_val;
+
+	/* set auto-master slave resolution settings */
+	if (hw->mac.autoneg) {
+		/* when autonegotiation advertisement is only 1000Mbps then we
+		 * should disable SmartSpeed and enable Auto MasterSlave
+		 * resolution as hardware default.
+		 */
+		if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
+			/* Disable SmartSpeed */
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+
+			/* Set auto Master/Slave resolution process */
+			ret_val = e1e_rphy(hw, MII_CTRL1000, &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~CTL1000_ENABLE_MASTER;
+			ret_val = e1e_wphy(hw, MII_CTRL1000, data);
+			if (ret_val)
+				return ret_val;
+		}
+
+		ret_val = e1000_set_master_slave_mode(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_setup_autoneg - Configure PHY for auto-negotiation
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the MII auto-neg advertisement register and/or the 1000T control
+ *  register and if the PHY is already setup for auto-negotiation, then
+ *  return successful.  Otherwise, setup advertisement and flow control to
+ *  the appropriate values for the wanted auto-negotiation.
+ **/
+static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 mii_autoneg_adv_reg;
+	u16 mii_1000t_ctrl_reg = 0;
+
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
+	ret_val = e1e_rphy(hw, MII_ADVERTISE, &mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+		/* Read the MII 1000Base-T Control Register (Address 9). */
+		ret_val = e1e_rphy(hw, MII_CTRL1000, &mii_1000t_ctrl_reg);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Need to parse both autoneg_advertised and fc and set up
+	 * the appropriate PHY registers.  First we will parse for
+	 * autoneg_advertised software override.  Since we can advertise
+	 * a plethora of combinations, we need to check each bit
+	 * individually.
+	 */
+
+	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
+	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
+	 * the  1000Base-T Control Register (Address 9).
+	 */
+	mii_autoneg_adv_reg &= ~(ADVERTISE_100FULL |
+				 ADVERTISE_100HALF |
+				 ADVERTISE_10FULL | ADVERTISE_10HALF);
+	mii_1000t_ctrl_reg &= ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL);
+
+	e_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+	/* Do we want to advertise 10 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+		e_dbg("Advertise 10mb Half duplex\n");
+		mii_autoneg_adv_reg |= ADVERTISE_10HALF;
+	}
+
+	/* Do we want to advertise 10 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+		e_dbg("Advertise 10mb Full duplex\n");
+		mii_autoneg_adv_reg |= ADVERTISE_10FULL;
+	}
+
+	/* Do we want to advertise 100 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+		e_dbg("Advertise 100mb Half duplex\n");
+		mii_autoneg_adv_reg |= ADVERTISE_100HALF;
+	}
+
+	/* Do we want to advertise 100 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+		e_dbg("Advertise 100mb Full duplex\n");
+		mii_autoneg_adv_reg |= ADVERTISE_100FULL;
+	}
+
+	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+		e_dbg("Advertise 1000mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 1000 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+		e_dbg("Advertise 1000mb Full duplex\n");
+		mii_1000t_ctrl_reg |= ADVERTISE_1000FULL;
+	}
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the PHY advertisement registers accordingly.  If
+	 * auto-negotiation is enabled, then software will have to set the
+	 * "PAUSE" bits to the correct value in the Auto-Negotiation
+	 * Advertisement Register (MII_ADVERTISE) and re-start auto-
+	 * negotiation.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not support receiving pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
+	 *  other:  No software override.  The flow control configuration
+	 *          in the EEPROM is used.
+	 */
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		/* Flow control (Rx & Tx) is completely disabled by a
+		 * software over-ride.
+		 */
+		mii_autoneg_adv_reg &=
+		    ~(ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP);
+		phy->autoneg_advertised &=
+		    ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+		break;
+	case e1000_fc_rx_pause:
+		/* Rx Flow control is enabled, and Tx Flow control is
+		 * disabled, by a software over-ride.
+		 *
+		 * Since there really isn't a way to advertise that we are
+		 * capable of Rx Pause ONLY, we will advertise that we
+		 * support both symmetric and asymmetric Rx PAUSE.  Later
+		 * (in e1000e_config_fc_after_link_up) we will disable the
+		 * hw's ability to send PAUSE frames.
+		 */
+		mii_autoneg_adv_reg |=
+		    (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP);
+		phy->autoneg_advertised |=
+		    (ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+		break;
+	case e1000_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		mii_autoneg_adv_reg |= ADVERTISE_PAUSE_ASYM;
+		mii_autoneg_adv_reg &= ~ADVERTISE_PAUSE_CAP;
+		phy->autoneg_advertised |= ADVERTISED_Asym_Pause;
+		phy->autoneg_advertised &= ~ADVERTISED_Pause;
+		break;
+	case e1000_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		mii_autoneg_adv_reg |=
+		    (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP);
+		phy->autoneg_advertised |=
+		    (ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+		break;
+	default:
+		e_dbg("Flow control param set incorrectly\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = e1e_wphy(hw, MII_ADVERTISE, mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	e_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL)
+		ret_val = e1e_wphy(hw, MII_CTRL1000, mii_1000t_ctrl_reg);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_copper_link_autoneg - Setup/Enable autoneg for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Performs initial bounds checking on autoneg advertisement parameter, then
+ *  configure to advertise the full capability.  Setup the PHY to autoneg
+ *  and restart the negotiation process between the link partner.  If
+ *  autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ **/
+static s32 e1000_copper_link_autoneg(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_ctrl;
+
+	/* Perform some bounds checking on the autoneg advertisement
+	 * parameter.
+	 */
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* If autoneg_advertised is zero, we assume it was not defaulted
+	 * by the calling code so we set to advertise full capability.
+	 */
+	if (!phy->autoneg_advertised)
+		phy->autoneg_advertised = phy->autoneg_mask;
+
+	e_dbg("Reconfiguring auto-neg advertisement params\n");
+	ret_val = e1000_phy_setup_autoneg(hw);
+	if (ret_val) {
+		e_dbg("Error Setting up Auto-Negotiation\n");
+		return ret_val;
+	}
+	e_dbg("Restarting Auto-Neg\n");
+
+	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
+	 * the Auto Neg Restart bit in the PHY control register.
+	 */
+	ret_val = e1e_rphy(hw, MII_BMCR, &phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	phy_ctrl |= (BMCR_ANENABLE | BMCR_ANRESTART);
+	ret_val = e1e_wphy(hw, MII_BMCR, phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	/* Does the user want to wait for Auto-Neg to complete here, or
+	 * check at a later time (for example, callback routine).
+	 */
+	if (phy->autoneg_wait_to_complete) {
+		ret_val = e1000_wait_autoneg(hw);
+		if (ret_val) {
+			e_dbg("Error while waiting for autoneg to complete\n");
+			return ret_val;
+		}
+	}
+
+	hw->mac.get_link_status = true;
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_setup_copper_link - Configure copper link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the appropriate function to configure the link for auto-neg or forced
+ *  speed and duplex.  Then we check for link, once link is established calls
+ *  to configure collision distance and flow control are called.  If link is
+ *  not established, we return -E1000_ERR_PHY (-2).
+ **/
+s32 e1000e_setup_copper_link(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	bool link;
+
+	if (hw->mac.autoneg) {
+		/* Setup autoneg and flow control advertisement and perform
+		 * autonegotiation.
+		 */
+		ret_val = e1000_copper_link_autoneg(hw);
+		if (ret_val)
+			return ret_val;
+	} else {
+		/* PHY will be set to 10H, 10F, 100H or 100F
+		 * depending on user settings.
+		 */
+		e_dbg("Forcing Speed and Duplex\n");
+		ret_val = hw->phy.ops.force_speed_duplex(hw);
+		if (ret_val) {
+			e_dbg("Error Forcing Speed and Duplex\n");
+			return ret_val;
+		}
+	}
+
+	/* Check link status. Wait up to 100 microseconds for link to become
+	 * valid.
+	 */
+	ret_val = e1000e_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10,
+					      &link);
+	if (ret_val)
+		return ret_val;
+
+	if (link) {
+		e_dbg("Valid link established!!!\n");
+		hw->mac.ops.config_collision_dist(hw);
+		ret_val = e1000e_config_fc_after_link_up(hw);
+	} else {
+		e_dbg("Unable to establish link!!!\n");
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.  Clears the
+ *  auto-crossover to force MDI manually.  Waits for link and returns
+ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
+ **/
+s32 e1000e_phy_force_speed_duplex_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	ret_val = e1e_rphy(hw, MII_BMCR, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = e1e_wphy(hw, MII_BMCR, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Clear Auto-Crossover to force MDI manually.  IGP requires MDI
+	 * forced whenever speed and duplex are forced.
+	 */
+	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+	phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+
+	ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e_dbg("IGP PSCR: %X\n", phy_data);
+
+	udelay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		e_dbg("Waiting for forced speed/duplex link on IGP phy.\n");
+
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link)
+			e_dbg("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.  Clears the
+ *  auto-crossover to force MDI manually.  Resets the PHY to commit the
+ *  changes.  If time expires while waiting for link up, we reset the DSP.
+ *  After reset, TX_CLK and CRS on Tx must be set.  Return successful upon
+ *  successful completion, else return corresponding error code.
+ **/
+s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	/* Clear Auto-Crossover to force MDI manually.  M88E1000 requires MDI
+	 * forced whenever speed and duplex are forced.
+	 */
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+	ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e_dbg("M88E1000 PSCR: %X\n", phy_data);
+
+	ret_val = e1e_rphy(hw, MII_BMCR, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = e1e_wphy(hw, MII_BMCR, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Reset the phy to commit changes. */
+	if (hw->phy.ops.commit) {
+		ret_val = hw->phy.ops.commit(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (phy->autoneg_wait_to_complete) {
+		e_dbg("Waiting for forced speed/duplex link on M88 phy.\n");
+
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link) {
+			if (hw->phy.type != e1000_phy_m88) {
+				e_dbg("Link taking longer than expected.\n");
+			} else {
+				/* We didn't get link.
+				 * Reset the DSP and cross our fingers.
+				 */
+				ret_val = e1e_wphy(hw, M88E1000_PHY_PAGE_SELECT,
+						   0x001d);
+				if (ret_val)
+					return ret_val;
+				ret_val = e1000e_phy_reset_dsp(hw);
+				if (ret_val)
+					return ret_val;
+			}
+		}
+
+		/* Try once more */
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (hw->phy.type != e1000_phy_m88)
+		return 0;
+
+	ret_val = e1e_rphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Resetting the phy means we need to re-force TX_CLK in the
+	 * Extended PHY Specific Control Register to 25MHz clock from
+	 * the reset value of 2.5MHz.
+	 */
+	phy_data |= M88E1000_EPSCR_TX_CLK_25;
+	ret_val = e1e_wphy(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* In addition, we must re-enable CRS on Tx for both half and full
+	 * duplex.
+	 */
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+	ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex
+ *  @hw: pointer to the HW structure
+ *
+ *  Forces the speed and duplex settings of the PHY.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	ret_val = e1e_rphy(hw, MII_BMCR, &data);
+	if (ret_val)
+		return ret_val;
+
+	e1000e_phy_force_speed_duplex_setup(hw, &data);
+
+	ret_val = e1e_wphy(hw, MII_BMCR, data);
+	if (ret_val)
+		return ret_val;
+
+	/* Disable MDI-X support for 10/100 */
+	ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &data);
+	if (ret_val)
+		return ret_val;
+
+	data &= ~IFE_PMC_AUTO_MDIX;
+	data &= ~IFE_PMC_FORCE_MDIX;
+
+	ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, data);
+	if (ret_val)
+		return ret_val;
+
+	e_dbg("IFE PMC: %X\n", data);
+
+	udelay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		e_dbg("Waiting for forced speed/duplex link on IFE phy.\n");
+
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link)
+			e_dbg("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000e_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @phy_ctrl: pointer to current value of MII_BMCR
+ *
+ *  Forces speed and duplex on the PHY by doing the following: disable flow
+ *  control, force speed/duplex on the MAC, disable auto speed detection,
+ *  disable auto-negotiation, configure duplex, configure speed, configure
+ *  the collision distance, write configuration to CTRL register.  The
+ *  caller must write to the MII_BMCR register for these settings to
+ *  take affect.
+ **/
+void e1000e_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 ctrl;
+
+	/* Turn off flow control when forcing speed/duplex */
+	hw->fc.current_mode = e1000_fc_none;
+
+	/* Force speed/duplex on the mac */
+	ctrl = er32(CTRL);
+	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ctrl &= ~E1000_CTRL_SPD_SEL;
+
+	/* Disable Auto Speed Detection */
+	ctrl &= ~E1000_CTRL_ASDE;
+
+	/* Disable autoneg on the phy */
+	*phy_ctrl &= ~BMCR_ANENABLE;
+
+	/* Forcing Full or Half Duplex? */
+	if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
+		ctrl &= ~E1000_CTRL_FD;
+		*phy_ctrl &= ~BMCR_FULLDPLX;
+		e_dbg("Half Duplex\n");
+	} else {
+		ctrl |= E1000_CTRL_FD;
+		*phy_ctrl |= BMCR_FULLDPLX;
+		e_dbg("Full Duplex\n");
+	}
+
+	/* Forcing 10mb or 100mb? */
+	if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
+		ctrl |= E1000_CTRL_SPD_100;
+		*phy_ctrl |= BMCR_SPEED100;
+		*phy_ctrl &= ~BMCR_SPEED1000;
+		e_dbg("Forcing 100mb\n");
+	} else {
+		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+		*phy_ctrl &= ~(BMCR_SPEED1000 | BMCR_SPEED100);
+		e_dbg("Forcing 10mb\n");
+	}
+
+	hw->mac.ops.config_collision_dist(hw);
+
+	ew32(CTRL, ctrl);
+}
+
+/**
+ *  e1000e_set_d3_lplu_state - Sets low power link up state for D3
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  The low power link up (lplu) state is set to the power management level D3
+ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
+ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.
+ **/
+s32 e1000e_set_d3_lplu_state(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	ret_val = e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (!active) {
+		data &= ~IGP02E1000_PM_D3_LPLU;
+		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
+		if (ret_val)
+			return ret_val;
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   &data);
+			if (ret_val)
+				return ret_val;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG,
+					   data);
+			if (ret_val)
+				return ret_val;
+		}
+	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+		data |= IGP02E1000_PM_D3_LPLU;
+		ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
+		if (ret_val)
+			return ret_val;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_CONFIG, &data);
+		if (ret_val)
+			return ret_val;
+
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = e1e_wphy(hw, IGP01E1000_PHY_PORT_CONFIG, data);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_check_downshift - Checks whether a downshift in speed occurred
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  A downshift is detected by querying the PHY link health.
+ **/
+s32 e1000e_check_downshift(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, offset, mask;
+
+	switch (phy->type) {
+	case e1000_phy_m88:
+	case e1000_phy_gg82563:
+	case e1000_phy_bm:
+	case e1000_phy_82578:
+		offset = M88E1000_PHY_SPEC_STATUS;
+		mask = M88E1000_PSSR_DOWNSHIFT;
+		break;
+	case e1000_phy_igp_2:
+	case e1000_phy_igp_3:
+		offset = IGP01E1000_PHY_LINK_HEALTH;
+		mask = IGP01E1000_PLHR_SS_DOWNGRADE;
+		break;
+	default:
+		/* speed downshift not supported */
+		phy->speed_downgraded = false;
+		return 0;
+	}
+
+	ret_val = e1e_rphy(hw, offset, &phy_data);
+
+	if (!ret_val)
+		phy->speed_downgraded = !!(phy_data & mask);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_polarity_m88 - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY specific status register.
+ **/
+s32 e1000_check_polarity_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_polarity_igp - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY port status register, and the
+ *  current speed (since there is no polarity at 100Mbps).
+ **/
+s32 e1000_check_polarity_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data, offset, mask;
+
+	/* Polarity is determined based on the speed of
+	 * our connection.
+	 */
+	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+		offset = IGP01E1000_PHY_PCS_INIT_REG;
+		mask = IGP01E1000_PHY_POLARITY_MASK;
+	} else {
+		/* This really only applies to 10Mbps since
+		 * there is no polarity for 100Mbps (always 0).
+		 */
+		offset = IGP01E1000_PHY_PORT_STATUS;
+		mask = IGP01E1000_PSSR_POLARITY_REVERSED;
+	}
+
+	ret_val = e1e_rphy(hw, offset, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((data & mask)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_polarity_ife - Check cable polarity for IFE PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Polarity is determined on the polarity reversal feature being enabled.
+ **/
+s32 e1000_check_polarity_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, offset, mask;
+
+	/* Polarity is determined based on the reversal feature being enabled.
+	 */
+	if (phy->polarity_correction) {
+		offset = IFE_PHY_EXTENDED_STATUS_CONTROL;
+		mask = IFE_PESC_POLARITY_REVERSED;
+	} else {
+		offset = IFE_PHY_SPECIAL_CONTROL;
+		mask = IFE_PSC_FORCE_POLARITY;
+	}
+
+	ret_val = e1e_rphy(hw, offset, &phy_data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((phy_data & mask)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_wait_autoneg - Wait for auto-neg completion
+ *  @hw: pointer to the HW structure
+ *
+ *  Waits for auto-negotiation to complete or for the auto-negotiation time
+ *  limit to expire, which ever happens first.
+ **/
+static s32 e1000_wait_autoneg(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 i, phy_status;
+
+	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+		ret_val = e1e_rphy(hw, MII_BMSR, &phy_status);
+		if (ret_val)
+			break;
+		ret_val = e1e_rphy(hw, MII_BMSR, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & BMSR_ANEGCOMPLETE)
+			break;
+		msleep(100);
+	}
+
+	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+	 * has completed.
+	 */
+	return ret_val;
+}
+
+/**
+ *  e1000e_phy_has_link_generic - Polls PHY for link
+ *  @hw: pointer to the HW structure
+ *  @iterations: number of times to poll for link
+ *  @usec_interval: delay between polling attempts
+ *  @success: pointer to whether polling was successful or not
+ *
+ *  Polls the PHY status register for link, 'iterations' number of times.
+ **/
+s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
+				u32 usec_interval, bool *success)
+{
+	s32 ret_val = 0;
+	u16 i, phy_status;
+
+	*success = false;
+	for (i = 0; i < iterations; i++) {
+		/* Some PHYs require the MII_BMSR register to be read
+		 * twice due to the link bit being sticky.  No harm doing
+		 * it across the board.
+		 */
+		ret_val = e1e_rphy(hw, MII_BMSR, &phy_status);
+		if (ret_val) {
+			/* If the first read fails, another entity may have
+			 * ownership of the resources, wait and try again to
+			 * see if they have relinquished the resources yet.
+			 */
+			if (usec_interval >= 1000)
+				msleep(usec_interval / 1000);
+			else
+				udelay(usec_interval);
+		}
+		ret_val = e1e_rphy(hw, MII_BMSR, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & BMSR_LSTATUS) {
+			*success = true;
+			break;
+		}
+		if (usec_interval >= 1000)
+			msleep(usec_interval / 1000);
+		else
+			udelay(usec_interval);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_get_cable_length_m88 - Determine cable length for m88 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the PHY specific status register to retrieve the cable length
+ *  information.  The cable length is determined by averaging the minimum and
+ *  maximum values to get the "average" cable length.  The m88 PHY has four
+ *  possible cable length values, which are:
+ *	Register Value		Cable Length
+ *	0			< 50 meters
+ *	1			50 - 80 meters
+ *	2			80 - 110 meters
+ *	3			110 - 140 meters
+ *	4			> 140 meters
+ **/
+s32 e1000e_get_cable_length_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, index;
+
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+		 M88E1000_PSSR_CABLE_LENGTH_SHIFT);
+
+	if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1)
+		return -E1000_ERR_PHY;
+
+	phy->min_cable_length = e1000_m88_cable_length_table[index];
+	phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
+
+	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+	return 0;
+}
+
+/**
+ *  e1000e_get_cable_length_igp_2 - Determine cable length for igp2 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  The automatic gain control (agc) normalizes the amplitude of the
+ *  received signal, adjusting for the attenuation produced by the
+ *  cable.  By reading the AGC registers, which represent the
+ *  combination of coarse and fine gain value, the value can be put
+ *  into a lookup table to obtain the approximate cable length
+ *  for each channel.
+ **/
+s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, i, agc_value = 0;
+	u16 cur_agc_index, max_agc_index = 0;
+	u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1;
+	static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = {
+		IGP02E1000_PHY_AGC_A,
+		IGP02E1000_PHY_AGC_B,
+		IGP02E1000_PHY_AGC_C,
+		IGP02E1000_PHY_AGC_D
+	};
+
+	/* Read the AGC registers for all channels */
+	for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
+		ret_val = e1e_rphy(hw, agc_reg_array[i], &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		/* Getting bits 15:9, which represent the combination of
+		 * coarse and fine gain values.  The result is a number
+		 * that can be put into the lookup table to obtain the
+		 * approximate cable length.
+		 */
+		cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
+				 IGP02E1000_AGC_LENGTH_MASK);
+
+		/* Array index bound check. */
+		if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) ||
+		    (cur_agc_index == 0))
+			return -E1000_ERR_PHY;
+
+		/* Remove min & max AGC values from calculation. */
+		if (e1000_igp_2_cable_length_table[min_agc_index] >
+		    e1000_igp_2_cable_length_table[cur_agc_index])
+			min_agc_index = cur_agc_index;
+		if (e1000_igp_2_cable_length_table[max_agc_index] <
+		    e1000_igp_2_cable_length_table[cur_agc_index])
+			max_agc_index = cur_agc_index;
+
+		agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
+	}
+
+	agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
+		      e1000_igp_2_cable_length_table[max_agc_index]);
+	agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
+
+	/* Calculate cable length with the error range of +/- 10 meters. */
+	phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
+				 (agc_value - IGP02E1000_AGC_RANGE) : 0);
+	phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
+
+	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+	return 0;
+}
+
+/**
+ *  e1000e_get_phy_info_m88 - Retrieve PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Valid for only copper links.  Read the PHY status register (sticky read)
+ *  to verify that link is up.  Read the PHY special control register to
+ *  determine the polarity and 10base-T extended distance.  Read the PHY
+ *  special status register to determine MDI/MDIx and current speed.  If
+ *  speed is 1000, then determine cable length, local and remote receiver.
+ **/
+s32 e1000e_get_phy_info_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	if (phy->media_type != e1000_media_type_copper) {
+		e_dbg("Phy info is only valid for copper media\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		e_dbg("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy->polarity_correction = !!(phy_data &
+				      M88E1000_PSCR_POLARITY_REVERSAL);
+
+	ret_val = e1000_check_polarity_m88(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1e_rphy(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX);
+
+	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
+		ret_val = hw->phy.ops.get_cable_length(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1e_rphy(hw, MII_STAT1000, &phy_data);
+		if (ret_val)
+			return ret_val;
+
+		phy->local_rx = (phy_data & LPA_1000LOCALRXOK)
+		    ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (phy_data & LPA_1000REMRXOK)
+		    ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+	} else {
+		/* Set values to "undefined" */
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_get_phy_info_igp - Retrieve igp PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Read PHY status to determine if link is up.  If link is up, then
+ *  set/determine 10base-T extended distance and polarity correction.  Read
+ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
+ *  determine on the cable length, local and remote receiver.
+ **/
+s32 e1000e_get_phy_info_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		e_dbg("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	phy->polarity_correction = true;
+
+	ret_val = e1000_check_polarity_igp(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1e_rphy(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX);
+
+	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+		ret_val = phy->ops.get_cable_length(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1e_rphy(hw, MII_STAT1000, &data);
+		if (ret_val)
+			return ret_val;
+
+		phy->local_rx = (data & LPA_1000LOCALRXOK)
+		    ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (data & LPA_1000REMRXOK)
+		    ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+	} else {
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_phy_info_ife - Retrieves various IFE PHY states
+ *  @hw: pointer to the HW structure
+ *
+ *  Populates "phy" structure with various feature states.
+ **/
+s32 e1000_get_phy_info_ife(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		e_dbg("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	ret_val = e1e_rphy(hw, IFE_PHY_SPECIAL_CONTROL, &data);
+	if (ret_val)
+		return ret_val;
+	phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE);
+
+	if (phy->polarity_correction) {
+		ret_val = e1000_check_polarity_ife(hw);
+		if (ret_val)
+			return ret_val;
+	} else {
+		/* Polarity is forced */
+		phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+	}
+
+	ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS);
+
+	/* The following parameters are undefined for 10/100 operation. */
+	phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+	phy->local_rx = e1000_1000t_rx_status_undefined;
+	phy->remote_rx = e1000_1000t_rx_status_undefined;
+
+	return 0;
+}
+
+/**
+ *  e1000e_phy_sw_reset - PHY software reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Does a software reset of the PHY by reading the PHY control register and
+ *  setting/write the control register reset bit to the PHY.
+ **/
+s32 e1000e_phy_sw_reset(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_ctrl;
+
+	ret_val = e1e_rphy(hw, MII_BMCR, &phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	phy_ctrl |= BMCR_RESET;
+	ret_val = e1e_wphy(hw, MII_BMCR, phy_ctrl);
+	if (ret_val)
+		return ret_val;
+
+	udelay(1);
+
+	return ret_val;
+}
+
+/**
+ *  e1000e_phy_hw_reset_generic - PHY hardware reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Verify the reset block is not blocking us from resetting.  Acquire
+ *  semaphore (if necessary) and read/set/write the device control reset
+ *  bit in the PHY.  Wait the appropriate delay time for the device to
+ *  reset and release the semaphore (if necessary).
+ **/
+s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u32 ctrl;
+
+	if (phy->ops.check_reset_block) {
+		ret_val = phy->ops.check_reset_block(hw);
+		if (ret_val)
+			return 0;
+	}
+
+	ret_val = phy->ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	ctrl = er32(CTRL);
+	ew32(CTRL, ctrl | E1000_CTRL_PHY_RST);
+	e1e_flush();
+
+	udelay(phy->reset_delay_us);
+
+	ew32(CTRL, ctrl);
+	e1e_flush();
+
+	usleep_range(150, 300);
+
+	phy->ops.release(hw);
+
+	return phy->ops.get_cfg_done(hw);
+}
+
+/**
+ *  e1000e_get_cfg_done_generic - Generic configuration done
+ *  @hw: pointer to the HW structure
+ *
+ *  Generic function to wait 10 milli-seconds for configuration to complete
+ *  and return success.
+ **/
+s32 e1000e_get_cfg_done_generic(struct e1000_hw __always_unused *hw)
+{
+	mdelay(10);
+
+	return 0;
+}
+
+/**
+ *  e1000e_phy_init_script_igp3 - Inits the IGP3 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
+ **/
+s32 e1000e_phy_init_script_igp3(struct e1000_hw *hw)
+{
+	e_dbg("Running IGP 3 PHY init script\n");
+
+	/* PHY init IGP 3 */
+	/* Enable rise/fall, 10-mode work in class-A */
+	e1e_wphy(hw, 0x2F5B, 0x9018);
+	/* Remove all caps from Replica path filter */
+	e1e_wphy(hw, 0x2F52, 0x0000);
+	/* Bias trimming for ADC, AFE and Driver (Default) */
+	e1e_wphy(hw, 0x2FB1, 0x8B24);
+	/* Increase Hybrid poly bias */
+	e1e_wphy(hw, 0x2FB2, 0xF8F0);
+	/* Add 4% to Tx amplitude in Gig mode */
+	e1e_wphy(hw, 0x2010, 0x10B0);
+	/* Disable trimming (TTT) */
+	e1e_wphy(hw, 0x2011, 0x0000);
+	/* Poly DC correction to 94.6% + 2% for all channels */
+	e1e_wphy(hw, 0x20DD, 0x249A);
+	/* ABS DC correction to 95.9% */
+	e1e_wphy(hw, 0x20DE, 0x00D3);
+	/* BG temp curve trim */
+	e1e_wphy(hw, 0x28B4, 0x04CE);
+	/* Increasing ADC OPAMP stage 1 currents to max */
+	e1e_wphy(hw, 0x2F70, 0x29E4);
+	/* Force 1000 ( required for enabling PHY regs configuration) */
+	e1e_wphy(hw, 0x0000, 0x0140);
+	/* Set upd_freq to 6 */
+	e1e_wphy(hw, 0x1F30, 0x1606);
+	/* Disable NPDFE */
+	e1e_wphy(hw, 0x1F31, 0xB814);
+	/* Disable adaptive fixed FFE (Default) */
+	e1e_wphy(hw, 0x1F35, 0x002A);
+	/* Enable FFE hysteresis */
+	e1e_wphy(hw, 0x1F3E, 0x0067);
+	/* Fixed FFE for short cable lengths */
+	e1e_wphy(hw, 0x1F54, 0x0065);
+	/* Fixed FFE for medium cable lengths */
+	e1e_wphy(hw, 0x1F55, 0x002A);
+	/* Fixed FFE for long cable lengths */
+	e1e_wphy(hw, 0x1F56, 0x002A);
+	/* Enable Adaptive Clip Threshold */
+	e1e_wphy(hw, 0x1F72, 0x3FB0);
+	/* AHT reset limit to 1 */
+	e1e_wphy(hw, 0x1F76, 0xC0FF);
+	/* Set AHT master delay to 127 msec */
+	e1e_wphy(hw, 0x1F77, 0x1DEC);
+	/* Set scan bits for AHT */
+	e1e_wphy(hw, 0x1F78, 0xF9EF);
+	/* Set AHT Preset bits */
+	e1e_wphy(hw, 0x1F79, 0x0210);
+	/* Change integ_factor of channel A to 3 */
+	e1e_wphy(hw, 0x1895, 0x0003);
+	/* Change prop_factor of channels BCD to 8 */
+	e1e_wphy(hw, 0x1796, 0x0008);
+	/* Change cg_icount + enable integbp for channels BCD */
+	e1e_wphy(hw, 0x1798, 0xD008);
+	/* Change cg_icount + enable integbp + change prop_factor_master
+	 * to 8 for channel A
+	 */
+	e1e_wphy(hw, 0x1898, 0xD918);
+	/* Disable AHT in Slave mode on channel A */
+	e1e_wphy(hw, 0x187A, 0x0800);
+	/* Enable LPLU and disable AN to 1000 in non-D0a states,
+	 * Enable SPD+B2B
+	 */
+	e1e_wphy(hw, 0x0019, 0x008D);
+	/* Enable restart AN on an1000_dis change */
+	e1e_wphy(hw, 0x001B, 0x2080);
+	/* Enable wh_fifo read clock in 10/100 modes */
+	e1e_wphy(hw, 0x0014, 0x0045);
+	/* Restart AN, Speed selection is 1000 */
+	e1e_wphy(hw, 0x0000, 0x1340);
+
+	return 0;
+}
+
+/**
+ *  e1000e_get_phy_type_from_id - Get PHY type from id
+ *  @phy_id: phy_id read from the phy
+ *
+ *  Returns the phy type from the id.
+ **/
+enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id)
+{
+	enum e1000_phy_type phy_type = e1000_phy_unknown;
+
+	switch (phy_id) {
+	case M88E1000_I_PHY_ID:
+	case M88E1000_E_PHY_ID:
+	case M88E1111_I_PHY_ID:
+	case M88E1011_I_PHY_ID:
+		phy_type = e1000_phy_m88;
+		break;
+	case IGP01E1000_I_PHY_ID:	/* IGP 1 & 2 share this */
+		phy_type = e1000_phy_igp_2;
+		break;
+	case GG82563_E_PHY_ID:
+		phy_type = e1000_phy_gg82563;
+		break;
+	case IGP03E1000_E_PHY_ID:
+		phy_type = e1000_phy_igp_3;
+		break;
+	case IFE_E_PHY_ID:
+	case IFE_PLUS_E_PHY_ID:
+	case IFE_C_E_PHY_ID:
+		phy_type = e1000_phy_ife;
+		break;
+	case BME1000_E_PHY_ID:
+	case BME1000_E_PHY_ID_R2:
+		phy_type = e1000_phy_bm;
+		break;
+	case I82578_E_PHY_ID:
+		phy_type = e1000_phy_82578;
+		break;
+	case I82577_E_PHY_ID:
+		phy_type = e1000_phy_82577;
+		break;
+	case I82579_E_PHY_ID:
+		phy_type = e1000_phy_82579;
+		break;
+	case I217_E_PHY_ID:
+		phy_type = e1000_phy_i217;
+		break;
+	default:
+		phy_type = e1000_phy_unknown;
+		break;
+	}
+	return phy_type;
+}
+
+/**
+ *  e1000e_determine_phy_address - Determines PHY address.
+ *  @hw: pointer to the HW structure
+ *
+ *  This uses a trial and error method to loop through possible PHY
+ *  addresses. It tests each by reading the PHY ID registers and
+ *  checking for a match.
+ **/
+s32 e1000e_determine_phy_address(struct e1000_hw *hw)
+{
+	u32 phy_addr = 0;
+	u32 i;
+	enum e1000_phy_type phy_type = e1000_phy_unknown;
+
+	hw->phy.id = phy_type;
+
+	for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) {
+		hw->phy.addr = phy_addr;
+		i = 0;
+
+		do {
+			e1000e_get_phy_id(hw);
+			phy_type = e1000e_get_phy_type_from_id(hw->phy.id);
+
+			/* If phy_type is valid, break - we found our
+			 * PHY address
+			 */
+			if (phy_type != e1000_phy_unknown)
+				return 0;
+
+			usleep_range(1000, 2000);
+			i++;
+		} while (i < 10);
+	}
+
+	return -E1000_ERR_PHY_TYPE;
+}
+
+/**
+ *  e1000_get_phy_addr_for_bm_page - Retrieve PHY page address
+ *  @page: page to access
+ *  @reg: register to check
+ *
+ *  Returns the phy address for the page requested.
+ **/
+static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg)
+{
+	u32 phy_addr = 2;
+
+	if ((page >= 768) || (page == 0 && reg == 25) || (reg == 31))
+		phy_addr = 1;
+
+	return phy_addr;
+}
+
+/**
+ *  e1000e_write_phy_reg_bm - Write BM PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val;
+	u32 page = offset >> IGP_PAGE_SHIFT;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Page 800 works differently than the rest so it has its own func */
+	if (page == BM_WUC_PAGE) {
+		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
+							 false, false);
+		goto release;
+	}
+
+	hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG) {
+		u32 page_shift, page_select;
+
+		/* Page select is register 31 for phy address 1 and 22 for
+		 * phy address 2 and 3. Page select is shifted only for
+		 * phy address 1.
+		 */
+		if (hw->phy.addr == 1) {
+			page_shift = IGP_PAGE_SHIFT;
+			page_select = IGP01E1000_PHY_PAGE_SELECT;
+		} else {
+			page_shift = 0;
+			page_select = BM_PHY_PAGE_SELECT;
+		}
+
+		/* Page is shifted left, PHY expects (page x 32) */
+		ret_val = e1000e_write_phy_reg_mdic(hw, page_select,
+						    (page << page_shift));
+		if (ret_val)
+			goto release;
+	}
+
+	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					    data);
+
+release:
+	hw->phy.ops.release(hw);
+	return ret_val;
+}
+
+/**
+ *  e1000e_read_phy_reg_bm - Read BM PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and storing the retrieved information in data.  Release any acquired
+ *  semaphores before exiting.
+ **/
+s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val;
+	u32 page = offset >> IGP_PAGE_SHIFT;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Page 800 works differently than the rest so it has its own func */
+	if (page == BM_WUC_PAGE) {
+		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
+							 true, false);
+		goto release;
+	}
+
+	hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG) {
+		u32 page_shift, page_select;
+
+		/* Page select is register 31 for phy address 1 and 22 for
+		 * phy address 2 and 3. Page select is shifted only for
+		 * phy address 1.
+		 */
+		if (hw->phy.addr == 1) {
+			page_shift = IGP_PAGE_SHIFT;
+			page_select = IGP01E1000_PHY_PAGE_SELECT;
+		} else {
+			page_shift = 0;
+			page_select = BM_PHY_PAGE_SELECT;
+		}
+
+		/* Page is shifted left, PHY expects (page x 32) */
+		ret_val = e1000e_write_phy_reg_mdic(hw, page_select,
+						    (page << page_shift));
+		if (ret_val)
+			goto release;
+	}
+
+	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					   data);
+release:
+	hw->phy.ops.release(hw);
+	return ret_val;
+}
+
+/**
+ *  e1000e_read_phy_reg_bm2 - Read BM PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and storing the retrieved information in data.  Release any acquired
+ *  semaphores before exiting.
+ **/
+s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val;
+	u16 page = (u16)(offset >> IGP_PAGE_SHIFT);
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Page 800 works differently than the rest so it has its own func */
+	if (page == BM_WUC_PAGE) {
+		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
+							 true, false);
+		goto release;
+	}
+
+	hw->phy.addr = 1;
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG) {
+		/* Page is shifted left, PHY expects (page x 32) */
+		ret_val = e1000e_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT,
+						    page);
+
+		if (ret_val)
+			goto release;
+	}
+
+	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					   data);
+release:
+	hw->phy.ops.release(hw);
+	return ret_val;
+}
+
+/**
+ *  e1000e_write_phy_reg_bm2 - Write BM PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val;
+	u16 page = (u16)(offset >> IGP_PAGE_SHIFT);
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Page 800 works differently than the rest so it has its own func */
+	if (page == BM_WUC_PAGE) {
+		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
+							 false, false);
+		goto release;
+	}
+
+	hw->phy.addr = 1;
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG) {
+		/* Page is shifted left, PHY expects (page x 32) */
+		ret_val = e1000e_write_phy_reg_mdic(hw, BM_PHY_PAGE_SELECT,
+						    page);
+
+		if (ret_val)
+			goto release;
+	}
+
+	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					    data);
+
+release:
+	hw->phy.ops.release(hw);
+	return ret_val;
+}
+
+/**
+ *  e1000_enable_phy_wakeup_reg_access_bm - enable access to BM wakeup registers
+ *  @hw: pointer to the HW structure
+ *  @phy_reg: pointer to store original contents of BM_WUC_ENABLE_REG
+ *
+ *  Assumes semaphore already acquired and phy_reg points to a valid memory
+ *  address to store contents of the BM_WUC_ENABLE_REG register.
+ **/
+s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg)
+{
+	s32 ret_val;
+	u16 temp;
+
+	/* All page select, port ctrl and wakeup registers use phy address 1 */
+	hw->phy.addr = 1;
+
+	/* Select Port Control Registers page */
+	ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT));
+	if (ret_val) {
+		e_dbg("Could not set Port Control page\n");
+		return ret_val;
+	}
+
+	ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, phy_reg);
+	if (ret_val) {
+		e_dbg("Could not read PHY register %d.%d\n",
+		      BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG);
+		return ret_val;
+	}
+
+	/* Enable both PHY wakeup mode and Wakeup register page writes.
+	 * Prevent a power state change by disabling ME and Host PHY wakeup.
+	 */
+	temp = *phy_reg;
+	temp |= BM_WUC_ENABLE_BIT;
+	temp &= ~(BM_WUC_ME_WU_BIT | BM_WUC_HOST_WU_BIT);
+
+	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, temp);
+	if (ret_val) {
+		e_dbg("Could not write PHY register %d.%d\n",
+		      BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG);
+		return ret_val;
+	}
+
+	/* Select Host Wakeup Registers page - caller now able to write
+	 * registers on the Wakeup registers page
+	 */
+	return e1000_set_page_igp(hw, (BM_WUC_PAGE << IGP_PAGE_SHIFT));
+}
+
+/**
+ *  e1000_disable_phy_wakeup_reg_access_bm - disable access to BM wakeup regs
+ *  @hw: pointer to the HW structure
+ *  @phy_reg: pointer to original contents of BM_WUC_ENABLE_REG
+ *
+ *  Restore BM_WUC_ENABLE_REG to its original value.
+ *
+ *  Assumes semaphore already acquired and *phy_reg is the contents of the
+ *  BM_WUC_ENABLE_REG before register(s) on BM_WUC_PAGE were accessed by
+ *  caller.
+ **/
+s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg)
+{
+	s32 ret_val;
+
+	/* Select Port Control Registers page */
+	ret_val = e1000_set_page_igp(hw, (BM_PORT_CTRL_PAGE << IGP_PAGE_SHIFT));
+	if (ret_val) {
+		e_dbg("Could not set Port Control page\n");
+		return ret_val;
+	}
+
+	/* Restore 769.17 to its original value */
+	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, *phy_reg);
+	if (ret_val)
+		e_dbg("Could not restore PHY register %d.%d\n",
+		      BM_PORT_CTRL_PAGE, BM_WUC_ENABLE_REG);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_access_phy_wakeup_reg_bm - Read/write BM PHY wakeup register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read or written
+ *  @data: pointer to the data to read or write
+ *  @read: determines if operation is read or write
+ *  @page_set: BM_WUC_PAGE already set and access enabled
+ *
+ *  Read the PHY register at offset and store the retrieved information in
+ *  data, or write data to PHY register at offset.  Note the procedure to
+ *  access the PHY wakeup registers is different than reading the other PHY
+ *  registers. It works as such:
+ *  1) Set 769.17.2 (page 769, register 17, bit 2) = 1
+ *  2) Set page to 800 for host (801 if we were manageability)
+ *  3) Write the address using the address opcode (0x11)
+ *  4) Read or write the data using the data opcode (0x12)
+ *  5) Restore 769.17.2 to its original value
+ *
+ *  Steps 1 and 2 are done by e1000_enable_phy_wakeup_reg_access_bm() and
+ *  step 5 is done by e1000_disable_phy_wakeup_reg_access_bm().
+ *
+ *  Assumes semaphore is already acquired.  When page_set==true, assumes
+ *  the PHY page is set to BM_WUC_PAGE (i.e. a function in the call stack
+ *  is responsible for calls to e1000_[enable|disable]_phy_wakeup_reg_bm()).
+ **/
+static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset,
+					  u16 *data, bool read, bool page_set)
+{
+	s32 ret_val;
+	u16 reg = BM_PHY_REG_NUM(offset);
+	u16 page = BM_PHY_REG_PAGE(offset);
+	u16 phy_reg = 0;
+
+	/* Gig must be disabled for MDIO accesses to Host Wakeup reg page */
+	if ((hw->mac.type == e1000_pchlan) &&
+	    (!(er32(PHY_CTRL) & E1000_PHY_CTRL_GBE_DISABLE)))
+		e_dbg("Attempting to access page %d while gig enabled.\n",
+		      page);
+
+	if (!page_set) {
+		/* Enable access to PHY wakeup registers */
+		ret_val = e1000_enable_phy_wakeup_reg_access_bm(hw, &phy_reg);
+		if (ret_val) {
+			e_dbg("Could not enable PHY wakeup reg access\n");
+			return ret_val;
+		}
+	}
+
+	e_dbg("Accessing PHY page %d reg 0x%x\n", page, reg);
+
+	/* Write the Wakeup register page offset value using opcode 0x11 */
+	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ADDRESS_OPCODE, reg);
+	if (ret_val) {
+		e_dbg("Could not write address opcode to page %d\n", page);
+		return ret_val;
+	}
+
+	if (read) {
+		/* Read the Wakeup register page value using opcode 0x12 */
+		ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE,
+						   data);
+	} else {
+		/* Write the Wakeup register page value using opcode 0x12 */
+		ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE,
+						    *data);
+	}
+
+	if (ret_val) {
+		e_dbg("Could not access PHY reg %d.%d\n", page, reg);
+		return ret_val;
+	}
+
+	if (!page_set)
+		ret_val = e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg);
+
+	return ret_val;
+}
+
+/**
+ * e1000_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, restore the link to previous
+ * settings.
+ **/
+void e1000_power_up_phy_copper(struct e1000_hw *hw)
+{
+	u16 mii_reg = 0;
+	int ret;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	ret = e1e_rphy(hw, MII_BMCR, &mii_reg);
+	if (ret) {
+		e_dbg("Error reading PHY register\n");
+		return;
+	}
+	mii_reg &= ~BMCR_PDOWN;
+	e1e_wphy(hw, MII_BMCR, mii_reg);
+}
+
+/**
+ * e1000_power_down_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, restore the link to previous
+ * settings.
+ **/
+void e1000_power_down_phy_copper(struct e1000_hw *hw)
+{
+	u16 mii_reg = 0;
+	int ret;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	ret = e1e_rphy(hw, MII_BMCR, &mii_reg);
+	if (ret) {
+		e_dbg("Error reading PHY register\n");
+		return;
+	}
+	mii_reg |= BMCR_PDOWN;
+	e1e_wphy(hw, MII_BMCR, mii_reg);
+	usleep_range(1000, 2000);
+}
+
+/**
+ *  __e1000_read_phy_reg_hv -  Read HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *  @locked: semaphore has already been acquired or not
+ *  @page_set: BM_WUC_PAGE already set and access enabled
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and stores the retrieved information in data.  Release any acquired
+ *  semaphore before exiting.
+ **/
+static s32 __e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data,
+				   bool locked, bool page_set)
+{
+	s32 ret_val;
+	u16 page = BM_PHY_REG_PAGE(offset);
+	u16 reg = BM_PHY_REG_NUM(offset);
+	u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page);
+
+	if (!locked) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Page 800 works differently than the rest so it has its own func */
+	if (page == BM_WUC_PAGE) {
+		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
+							 true, page_set);
+		goto out;
+	}
+
+	if (page > 0 && page < HV_INTC_FC_PAGE_START) {
+		ret_val = e1000_access_phy_debug_regs_hv(hw, offset,
+							 data, true);
+		goto out;
+	}
+
+	if (!page_set) {
+		if (page == HV_INTC_FC_PAGE_START)
+			page = 0;
+
+		if (reg > MAX_PHY_MULTI_PAGE_REG) {
+			/* Page is shifted left, PHY expects (page x 32) */
+			ret_val = e1000_set_page_igp(hw,
+						     (page << IGP_PAGE_SHIFT));
+
+			hw->phy.addr = phy_addr;
+
+			if (ret_val)
+				goto out;
+		}
+	}
+
+	e_dbg("reading PHY page %d (or 0x%x shifted) reg 0x%x\n", page,
+	      page << IGP_PAGE_SHIFT, reg);
+
+	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg, data);
+out:
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_read_phy_reg_hv -  Read HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore then reads the PHY register at offset and stores
+ *  the retrieved information in data.  Release the acquired semaphore
+ *  before exiting.
+ **/
+s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_phy_reg_hv(hw, offset, data, false, false);
+}
+
+/**
+ *  e1000_read_phy_reg_hv_locked -  Read HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset and stores the retrieved information
+ *  in data.  Assumes semaphore already acquired.
+ **/
+s32 e1000_read_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_phy_reg_hv(hw, offset, data, true, false);
+}
+
+/**
+ *  e1000_read_phy_reg_page_hv - Read HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Reads the PHY register at offset and stores the retrieved information
+ *  in data.  Assumes semaphore already acquired and page already set.
+ **/
+s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	return __e1000_read_phy_reg_hv(hw, offset, data, true, true);
+}
+
+/**
+ *  __e1000_write_phy_reg_hv - Write HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *  @locked: semaphore has already been acquired or not
+ *  @page_set: BM_WUC_PAGE already set and access enabled
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+static s32 __e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data,
+				    bool locked, bool page_set)
+{
+	s32 ret_val;
+	u16 page = BM_PHY_REG_PAGE(offset);
+	u16 reg = BM_PHY_REG_NUM(offset);
+	u32 phy_addr = hw->phy.addr = e1000_get_phy_addr_for_hv_page(page);
+
+	if (!locked) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Page 800 works differently than the rest so it has its own func */
+	if (page == BM_WUC_PAGE) {
+		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
+							 false, page_set);
+		goto out;
+	}
+
+	if (page > 0 && page < HV_INTC_FC_PAGE_START) {
+		ret_val = e1000_access_phy_debug_regs_hv(hw, offset,
+							 &data, false);
+		goto out;
+	}
+
+	if (!page_set) {
+		if (page == HV_INTC_FC_PAGE_START)
+			page = 0;
+
+		/* Workaround MDIO accesses being disabled after entering IEEE
+		 * Power Down (when bit 11 of the PHY Control register is set)
+		 */
+		if ((hw->phy.type == e1000_phy_82578) &&
+		    (hw->phy.revision >= 1) &&
+		    (hw->phy.addr == 2) &&
+		    !(MAX_PHY_REG_ADDRESS & reg) && (data & BIT(11))) {
+			u16 data2 = 0x7EFF;
+
+			ret_val = e1000_access_phy_debug_regs_hv(hw,
+								 BIT(6) | 0x3,
+								 &data2, false);
+			if (ret_val)
+				goto out;
+		}
+
+		if (reg > MAX_PHY_MULTI_PAGE_REG) {
+			/* Page is shifted left, PHY expects (page x 32) */
+			ret_val = e1000_set_page_igp(hw,
+						     (page << IGP_PAGE_SHIFT));
+
+			hw->phy.addr = phy_addr;
+
+			if (ret_val)
+				goto out;
+		}
+	}
+
+	e_dbg("writing PHY page %d (or 0x%x shifted) reg 0x%x\n", page,
+	      page << IGP_PAGE_SHIFT, reg);
+
+	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & reg,
+					    data);
+
+out:
+	if (!locked)
+		hw->phy.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_phy_reg_hv - Write HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore then writes the data to PHY register at the offset.
+ *  Release the acquired semaphores before exiting.
+ **/
+s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_phy_reg_hv(hw, offset, data, false, false);
+}
+
+/**
+ *  e1000_write_phy_reg_hv_locked - Write HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset.  Assumes semaphore
+ *  already acquired.
+ **/
+s32 e1000_write_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_phy_reg_hv(hw, offset, data, true, false);
+}
+
+/**
+ *  e1000_write_phy_reg_page_hv - Write HV PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset.  Assumes semaphore
+ *  already acquired and page already set.
+ **/
+s32 e1000_write_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	return __e1000_write_phy_reg_hv(hw, offset, data, true, true);
+}
+
+/**
+ *  e1000_get_phy_addr_for_hv_page - Get PHY address based on page
+ *  @page: page to be accessed
+ **/
+static u32 e1000_get_phy_addr_for_hv_page(u32 page)
+{
+	u32 phy_addr = 2;
+
+	if (page >= HV_INTC_FC_PAGE_START)
+		phy_addr = 1;
+
+	return phy_addr;
+}
+
+/**
+ *  e1000_access_phy_debug_regs_hv - Read HV PHY vendor specific high registers
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read or written
+ *  @data: pointer to the data to be read or written
+ *  @read: determines if operation is read or write
+ *
+ *  Reads the PHY register at offset and stores the retrieved information
+ *  in data.  Assumes semaphore already acquired.  Note that the procedure
+ *  to access these regs uses the address port and data port to read/write.
+ *  These accesses done with PHY address 2 and without using pages.
+ **/
+static s32 e1000_access_phy_debug_regs_hv(struct e1000_hw *hw, u32 offset,
+					  u16 *data, bool read)
+{
+	s32 ret_val;
+	u32 addr_reg;
+	u32 data_reg;
+
+	/* This takes care of the difference with desktop vs mobile phy */
+	addr_reg = ((hw->phy.type == e1000_phy_82578) ?
+		    I82578_ADDR_REG : I82577_ADDR_REG);
+	data_reg = addr_reg + 1;
+
+	/* All operations in this function are phy address 2 */
+	hw->phy.addr = 2;
+
+	/* masking with 0x3F to remove the page from offset */
+	ret_val = e1000e_write_phy_reg_mdic(hw, addr_reg, (u16)offset & 0x3F);
+	if (ret_val) {
+		e_dbg("Could not write the Address Offset port register\n");
+		return ret_val;
+	}
+
+	/* Read or write the data value next */
+	if (read)
+		ret_val = e1000e_read_phy_reg_mdic(hw, data_reg, data);
+	else
+		ret_val = e1000e_write_phy_reg_mdic(hw, data_reg, *data);
+
+	if (ret_val)
+		e_dbg("Could not access the Data port register\n");
+
+	return ret_val;
+}
+
+/**
+ *  e1000_link_stall_workaround_hv - Si workaround
+ *  @hw: pointer to the HW structure
+ *
+ *  This function works around a Si bug where the link partner can get
+ *  a link up indication before the PHY does.  If small packets are sent
+ *  by the link partner they can be placed in the packet buffer without
+ *  being properly accounted for by the PHY and will stall preventing
+ *  further packets from being received.  The workaround is to clear the
+ *  packet buffer after the PHY detects link up.
+ **/
+s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 data;
+
+	if (hw->phy.type != e1000_phy_82578)
+		return 0;
+
+	/* Do not apply workaround if in PHY loopback bit 14 set */
+	ret_val = e1e_rphy(hw, MII_BMCR, &data);
+	if (ret_val) {
+		e_dbg("Error reading PHY register\n");
+		return ret_val;
+	}
+	if (data & BMCR_LOOPBACK)
+		return 0;
+
+	/* check if link is up and at 1Gbps */
+	ret_val = e1e_rphy(hw, BM_CS_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	data &= (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED |
+		 BM_CS_STATUS_SPEED_MASK);
+
+	if (data != (BM_CS_STATUS_LINK_UP | BM_CS_STATUS_RESOLVED |
+		     BM_CS_STATUS_SPEED_1000))
+		return 0;
+
+	msleep(200);
+
+	/* flush the packets in the fifo buffer */
+	ret_val = e1e_wphy(hw, HV_MUX_DATA_CTRL,
+			   (HV_MUX_DATA_CTRL_GEN_TO_MAC |
+			    HV_MUX_DATA_CTRL_FORCE_SPEED));
+	if (ret_val)
+		return ret_val;
+
+	return e1e_wphy(hw, HV_MUX_DATA_CTRL, HV_MUX_DATA_CTRL_GEN_TO_MAC);
+}
+
+/**
+ *  e1000_check_polarity_82577 - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY specific status register.
+ **/
+s32 e1000_check_polarity_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY)
+				       ? e1000_rev_polarity_reversed
+				       : e1000_rev_polarity_normal);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.
+ **/
+s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	ret_val = e1e_rphy(hw, MII_BMCR, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	e1000e_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = e1e_wphy(hw, MII_BMCR, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	udelay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		e_dbg("Waiting for forced speed/duplex link on 82577 phy\n");
+
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+		if (ret_val)
+			return ret_val;
+
+		if (!link)
+			e_dbg("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = e1000e_phy_has_link_generic(hw, PHY_FORCE_LIMIT,
+						      100000, &link);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_get_phy_info_82577 - Retrieve I82577 PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Read PHY status to determine if link is up.  If link is up, then
+ *  set/determine 10base-T extended distance and polarity correction.  Read
+ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
+ *  determine on the cable length, local and remote receiver.
+ **/
+s32 e1000_get_phy_info_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	ret_val = e1000e_phy_has_link_generic(hw, 1, 0, &link);
+	if (ret_val)
+		return ret_val;
+
+	if (!link) {
+		e_dbg("Phy info is only valid if link is up\n");
+		return -E1000_ERR_CONFIG;
+	}
+
+	phy->polarity_correction = true;
+
+	ret_val = e1000_check_polarity_82577(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = e1e_rphy(hw, I82577_PHY_STATUS_2, &data);
+	if (ret_val)
+		return ret_val;
+
+	phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX);
+
+	if ((data & I82577_PHY_STATUS2_SPEED_MASK) ==
+	    I82577_PHY_STATUS2_SPEED_1000MBPS) {
+		ret_val = hw->phy.ops.get_cable_length(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = e1e_rphy(hw, MII_STAT1000, &data);
+		if (ret_val)
+			return ret_val;
+
+		phy->local_rx = (data & LPA_1000LOCALRXOK)
+		    ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (data & LPA_1000REMRXOK)
+		    ? e1000_1000t_rx_status_ok : e1000_1000t_rx_status_not_ok;
+	} else {
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+	return 0;
+}
+
+/**
+ *  e1000_get_cable_length_82577 - Determine cable length for 82577 PHY
+ *  @hw: pointer to the HW structure
+ *
+ * Reads the diagnostic status register and verifies result is valid before
+ * placing it in the phy_cable_length field.
+ **/
+s32 e1000_get_cable_length_82577(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, length;
+
+	ret_val = e1e_rphy(hw, I82577_PHY_DIAG_STATUS, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >>
+		  I82577_DSTATUS_CABLE_LENGTH_SHIFT);
+
+	if (length == E1000_CABLE_LENGTH_UNDEFINED)
+		return -E1000_ERR_PHY;
+
+	phy->cable_length = length;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/e1000e/phy.h b/drivers/net/ethernet/intel/e1000e/phy.h
new file mode 100644
index 0000000000..c48777d095
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/phy.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_PHY_H_
+#define _E1000E_PHY_H_
+
+s32 e1000e_check_downshift(struct e1000_hw *hw);
+s32 e1000_check_polarity_m88(struct e1000_hw *hw);
+s32 e1000_check_polarity_igp(struct e1000_hw *hw);
+s32 e1000_check_polarity_ife(struct e1000_hw *hw);
+s32 e1000e_check_reset_block_generic(struct e1000_hw *hw);
+s32 e1000e_copper_link_setup_igp(struct e1000_hw *hw);
+s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw);
+s32 e1000e_phy_force_speed_duplex_igp(struct e1000_hw *hw);
+s32 e1000e_phy_force_speed_duplex_m88(struct e1000_hw *hw);
+s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw);
+s32 e1000e_get_cable_length_m88(struct e1000_hw *hw);
+s32 e1000e_get_cable_length_igp_2(struct e1000_hw *hw);
+s32 e1000e_get_cfg_done_generic(struct e1000_hw *hw);
+s32 e1000e_get_phy_id(struct e1000_hw *hw);
+s32 e1000e_get_phy_info_igp(struct e1000_hw *hw);
+s32 e1000e_get_phy_info_m88(struct e1000_hw *hw);
+s32 e1000_get_phy_info_ife(struct e1000_hw *hw);
+s32 e1000e_phy_sw_reset(struct e1000_hw *hw);
+void e1000e_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl);
+s32 e1000e_phy_hw_reset_generic(struct e1000_hw *hw);
+s32 e1000e_phy_reset_dsp(struct e1000_hw *hw);
+s32 e1000e_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000e_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page);
+s32 e1000e_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000e_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000e_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000e_set_d3_lplu_state(struct e1000_hw *hw, bool active);
+s32 e1000e_setup_copper_link(struct e1000_hw *hw);
+s32 e1000e_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000e_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000e_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000e_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000e_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
+				u32 usec_interval, bool *success);
+s32 e1000e_phy_init_script_igp3(struct e1000_hw *hw);
+enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id);
+s32 e1000e_determine_phy_address(struct e1000_hw *hw);
+s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
+s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg);
+s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
+void e1000_power_up_phy_copper(struct e1000_hw *hw);
+void e1000_power_down_phy_copper(struct e1000_hw *hw);
+s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_read_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_read_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 *data);
+s32 e1000_write_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_phy_reg_hv_locked(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_write_phy_reg_page_hv(struct e1000_hw *hw, u32 offset, u16 data);
+s32 e1000_link_stall_workaround_hv(struct e1000_hw *hw);
+s32 e1000_copper_link_setup_82577(struct e1000_hw *hw);
+s32 e1000_check_polarity_82577(struct e1000_hw *hw);
+s32 e1000_get_phy_info_82577(struct e1000_hw *hw);
+s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw);
+s32 e1000_get_cable_length_82577(struct e1000_hw *hw);
+
+#define E1000_MAX_PHY_ADDR		8
+
+/* IGP01E1000 Specific Registers */
+#define IGP01E1000_PHY_PORT_CONFIG	0x10	/* Port Config */
+#define IGP01E1000_PHY_PORT_STATUS	0x11	/* Status */
+#define IGP01E1000_PHY_PORT_CTRL	0x12	/* Control */
+#define IGP01E1000_PHY_LINK_HEALTH	0x13	/* PHY Link Health */
+#define IGP02E1000_PHY_POWER_MGMT	0x19	/* Power Management */
+#define IGP01E1000_PHY_PAGE_SELECT	0x1F	/* Page Select */
+#define BM_PHY_PAGE_SELECT		22	/* Page Select for BM */
+#define IGP_PAGE_SHIFT			5
+#define PHY_REG_MASK			0x1F
+
+/* BM/HV Specific Registers */
+#define BM_PORT_CTRL_PAGE		769
+#define BM_WUC_PAGE			800
+#define BM_WUC_ADDRESS_OPCODE		0x11
+#define BM_WUC_DATA_OPCODE		0x12
+#define BM_WUC_ENABLE_PAGE		BM_PORT_CTRL_PAGE
+#define BM_WUC_ENABLE_REG		17
+#define BM_WUC_ENABLE_BIT		BIT(2)
+#define BM_WUC_HOST_WU_BIT		BIT(4)
+#define BM_WUC_ME_WU_BIT		BIT(5)
+
+#define PHY_UPPER_SHIFT			21
+#define BM_PHY_REG(page, reg) \
+	(((reg) & MAX_PHY_REG_ADDRESS) |\
+	 (((page) & 0xFFFF) << PHY_PAGE_SHIFT) |\
+	 (((reg) & ~MAX_PHY_REG_ADDRESS) << (PHY_UPPER_SHIFT - PHY_PAGE_SHIFT)))
+#define BM_PHY_REG_PAGE(offset) \
+	((u16)(((offset) >> PHY_PAGE_SHIFT) & 0xFFFF))
+#define BM_PHY_REG_NUM(offset) \
+	((u16)(((offset) & MAX_PHY_REG_ADDRESS) |\
+	 (((offset) >> (PHY_UPPER_SHIFT - PHY_PAGE_SHIFT)) &\
+		~MAX_PHY_REG_ADDRESS)))
+
+#define HV_INTC_FC_PAGE_START		768
+#define I82578_ADDR_REG			29
+#define I82577_ADDR_REG			16
+#define I82577_CFG_REG			22
+#define I82577_CFG_ASSERT_CRS_ON_TX	BIT(15)
+#define I82577_CFG_ENABLE_DOWNSHIFT	(3u << 10)	/* auto downshift */
+#define I82577_CTRL_REG			23
+
+/* 82577 specific PHY registers */
+#define I82577_PHY_CTRL_2		18
+#define I82577_PHY_LBK_CTRL		19
+#define I82577_PHY_STATUS_2		26
+#define I82577_PHY_DIAG_STATUS		31
+
+/* I82577 PHY Status 2 */
+#define I82577_PHY_STATUS2_REV_POLARITY		0x0400
+#define I82577_PHY_STATUS2_MDIX			0x0800
+#define I82577_PHY_STATUS2_SPEED_MASK		0x0300
+#define I82577_PHY_STATUS2_SPEED_1000MBPS	0x0200
+
+/* I82577 PHY Control 2 */
+#define I82577_PHY_CTRL2_MANUAL_MDIX		0x0200
+#define I82577_PHY_CTRL2_AUTO_MDI_MDIX		0x0400
+#define I82577_PHY_CTRL2_MDIX_CFG_MASK		0x0600
+
+/* I82577 PHY Diagnostics Status */
+#define I82577_DSTATUS_CABLE_LENGTH		0x03FC
+#define I82577_DSTATUS_CABLE_LENGTH_SHIFT	2
+
+/* BM PHY Copper Specific Control 1 */
+#define BM_CS_CTRL1			16
+
+/* BM PHY Copper Specific Status */
+#define BM_CS_STATUS			17
+#define BM_CS_STATUS_LINK_UP		0x0400
+#define BM_CS_STATUS_RESOLVED		0x0800
+#define BM_CS_STATUS_SPEED_MASK		0xC000
+#define BM_CS_STATUS_SPEED_1000		0x8000
+
+/* 82577 Mobile Phy Status Register */
+#define HV_M_STATUS			26
+#define HV_M_STATUS_AUTONEG_COMPLETE	0x1000
+#define HV_M_STATUS_SPEED_MASK		0x0300
+#define HV_M_STATUS_SPEED_1000		0x0200
+#define HV_M_STATUS_SPEED_100		0x0100
+#define HV_M_STATUS_LINK_UP		0x0040
+
+#define IGP01E1000_PHY_PCS_INIT_REG	0x00B4
+#define IGP01E1000_PHY_POLARITY_MASK	0x0078
+
+#define IGP01E1000_PSCR_AUTO_MDIX	0x1000
+#define IGP01E1000_PSCR_FORCE_MDI_MDIX	0x2000	/* 0=MDI, 1=MDIX */
+
+#define IGP01E1000_PSCFR_SMART_SPEED	0x0080
+
+#define IGP02E1000_PM_SPD		0x0001	/* Smart Power Down */
+#define IGP02E1000_PM_D0_LPLU		0x0002	/* For D0a states */
+#define IGP02E1000_PM_D3_LPLU		0x0004	/* For all other states */
+
+#define IGP01E1000_PLHR_SS_DOWNGRADE	0x8000
+
+#define IGP01E1000_PSSR_POLARITY_REVERSED	0x0002
+#define IGP01E1000_PSSR_MDIX		0x0800
+#define IGP01E1000_PSSR_SPEED_MASK	0xC000
+#define IGP01E1000_PSSR_SPEED_1000MBPS	0xC000
+
+#define IGP02E1000_PHY_CHANNEL_NUM	4
+#define IGP02E1000_PHY_AGC_A		0x11B1
+#define IGP02E1000_PHY_AGC_B		0x12B1
+#define IGP02E1000_PHY_AGC_C		0x14B1
+#define IGP02E1000_PHY_AGC_D		0x18B1
+
+#define IGP02E1000_AGC_LENGTH_SHIFT	9	/* Course=15:13, Fine=12:9 */
+#define IGP02E1000_AGC_LENGTH_MASK	0x7F
+#define IGP02E1000_AGC_RANGE		15
+
+#define E1000_CABLE_LENGTH_UNDEFINED	0xFF
+
+#define E1000_KMRNCTRLSTA_OFFSET	0x001F0000
+#define E1000_KMRNCTRLSTA_OFFSET_SHIFT	16
+#define E1000_KMRNCTRLSTA_REN		0x00200000
+#define E1000_KMRNCTRLSTA_CTRL_OFFSET	0x1	/* Kumeran Control */
+#define E1000_KMRNCTRLSTA_DIAG_OFFSET	0x3	/* Kumeran Diagnostic */
+#define E1000_KMRNCTRLSTA_TIMEOUTS	0x4	/* Kumeran Timeouts */
+#define E1000_KMRNCTRLSTA_INBAND_PARAM	0x9	/* Kumeran InBand Parameters */
+#define E1000_KMRNCTRLSTA_IBIST_DISABLE	0x0200	/* Kumeran IBIST Disable */
+#define E1000_KMRNCTRLSTA_DIAG_NELPBK	0x1000	/* Nearend Loopback mode */
+#define E1000_KMRNCTRLSTA_K1_CONFIG	0x7
+#define E1000_KMRNCTRLSTA_K1_ENABLE	0x0002	/* enable K1 */
+#define E1000_KMRNCTRLSTA_HD_CTRL	0x10	/* Kumeran HD Control */
+
+#define IFE_PHY_EXTENDED_STATUS_CONTROL	0x10
+#define IFE_PHY_SPECIAL_CONTROL		0x11	/* 100BaseTx PHY Special Ctrl */
+#define IFE_PHY_SPECIAL_CONTROL_LED	0x1B	/* PHY Special and LED Ctrl */
+#define IFE_PHY_MDIX_CONTROL		0x1C	/* MDI/MDI-X Control */
+
+/* IFE PHY Extended Status Control */
+#define IFE_PESC_POLARITY_REVERSED	0x0100
+
+/* IFE PHY Special Control */
+#define IFE_PSC_AUTO_POLARITY_DISABLE	0x0010
+#define IFE_PSC_FORCE_POLARITY		0x0020
+
+/* IFE PHY Special Control and LED Control */
+#define IFE_PSCL_PROBE_MODE		0x0020
+#define IFE_PSCL_PROBE_LEDS_OFF		0x0006	/* Force LEDs 0 and 2 off */
+#define IFE_PSCL_PROBE_LEDS_ON		0x0007	/* Force LEDs 0 and 2 on */
+
+/* IFE PHY MDIX Control */
+#define IFE_PMC_MDIX_STATUS		0x0020	/* 1=MDI-X, 0=MDI */
+#define IFE_PMC_FORCE_MDIX		0x0040	/* 1=force MDI-X, 0=force MDI */
+#define IFE_PMC_AUTO_MDIX		0x0080	/* 1=enable auto, 0=disable */
+
+#endif
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
new file mode 100644
index 0000000000..02d871bc11
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* PTP 1588 Hardware Clock (PHC)
+ * Derived from PTP Hardware Clock driver for Intel 82576 and 82580 (igb)
+ * Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com>
+ */
+
+#include "e1000.h"
+
+#ifdef CONFIG_E1000E_HWTS
+#include <linux/clocksource.h>
+#include <linux/ktime.h>
+#include <asm/tsc.h>
+#endif
+
+/**
+ * e1000e_phc_adjfine - adjust the frequency of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired frequency chance in scaled parts per million
+ *
+ * Adjust the frequency of the PHC cycle counter by the indicated delta from
+ * the base frequency.
+ *
+ * Scaled parts per million is ppm but with a 16 bit binary fractional field.
+ **/
+static int e1000e_phc_adjfine(struct ptp_clock_info *ptp, long delta)
+{
+	struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
+						     ptp_clock_info);
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+	u64 incvalue;
+	u32 timinca;
+	s32 ret_val;
+
+	/* Get the System Time Register SYSTIM base frequency */
+	ret_val = e1000e_get_base_timinca(adapter, &timinca);
+	if (ret_val)
+		return ret_val;
+
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+
+	incvalue = timinca & E1000_TIMINCA_INCVALUE_MASK;
+	incvalue = adjust_by_scaled_ppm(incvalue, delta);
+
+	timinca &= ~E1000_TIMINCA_INCVALUE_MASK;
+	timinca |= incvalue;
+
+	ew32(TIMINCA, timinca);
+
+	adapter->ptp_delta = delta;
+
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	return 0;
+}
+
+/**
+ * e1000e_phc_adjtime - Shift the time of the hardware clock
+ * @ptp: ptp clock structure
+ * @delta: Desired change in nanoseconds
+ *
+ * Adjust the timer by resetting the timecounter structure.
+ **/
+static int e1000e_phc_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
+						     ptp_clock_info);
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+	timecounter_adjtime(&adapter->tc, delta);
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	return 0;
+}
+
+#ifdef CONFIG_E1000E_HWTS
+#define MAX_HW_WAIT_COUNT (3)
+
+/**
+ * e1000e_phc_get_syncdevicetime - Callback given to timekeeping code reads system/device registers
+ * @device: current device time
+ * @system: system counter value read synchronously with device time
+ * @ctx: context provided by timekeeping code
+ *
+ * Read device and system (ART) clock simultaneously and return the corrected
+ * clock values in ns.
+ **/
+static int e1000e_phc_get_syncdevicetime(ktime_t *device,
+					 struct system_counterval_t *system,
+					 void *ctx)
+{
+	struct e1000_adapter *adapter = (struct e1000_adapter *)ctx;
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+	int i;
+	u32 tsync_ctrl;
+	u64 dev_cycles;
+	u64 sys_cycles;
+
+	tsync_ctrl = er32(TSYNCTXCTL);
+	tsync_ctrl |= E1000_TSYNCTXCTL_START_SYNC |
+		E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK;
+	ew32(TSYNCTXCTL, tsync_ctrl);
+	for (i = 0; i < MAX_HW_WAIT_COUNT; ++i) {
+		udelay(1);
+		tsync_ctrl = er32(TSYNCTXCTL);
+		if (tsync_ctrl & E1000_TSYNCTXCTL_SYNC_COMP)
+			break;
+	}
+
+	if (i == MAX_HW_WAIT_COUNT)
+		return -ETIMEDOUT;
+
+	dev_cycles = er32(SYSSTMPH);
+	dev_cycles <<= 32;
+	dev_cycles |= er32(SYSSTMPL);
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+	*device = ns_to_ktime(timecounter_cyc2time(&adapter->tc, dev_cycles));
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	sys_cycles = er32(PLTSTMPH);
+	sys_cycles <<= 32;
+	sys_cycles |= er32(PLTSTMPL);
+	*system = convert_art_to_tsc(sys_cycles);
+
+	return 0;
+}
+
+/**
+ * e1000e_phc_getcrosststamp - Reads the current system/device cross timestamp
+ * @ptp: ptp clock structure
+ * @xtstamp: structure containing timestamp
+ *
+ * Read device and system (ART) clock simultaneously and return the scaled
+ * clock values in ns.
+ **/
+static int e1000e_phc_getcrosststamp(struct ptp_clock_info *ptp,
+				     struct system_device_crosststamp *xtstamp)
+{
+	struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
+						     ptp_clock_info);
+
+	return get_device_system_crosststamp(e1000e_phc_get_syncdevicetime,
+						adapter, NULL, xtstamp);
+}
+#endif/*CONFIG_E1000E_HWTS*/
+
+/**
+ * e1000e_phc_gettimex - Reads the current time from the hardware clock and
+ *                       system clock
+ * @ptp: ptp clock structure
+ * @ts: timespec structure to hold the current PHC time
+ * @sts: structure to hold the current system time
+ *
+ * Read the timecounter and return the correct value in ns after converting
+ * it into a struct timespec.
+ **/
+static int e1000e_phc_gettimex(struct ptp_clock_info *ptp,
+			       struct timespec64 *ts,
+			       struct ptp_system_timestamp *sts)
+{
+	struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
+						     ptp_clock_info);
+	unsigned long flags;
+	u64 cycles, ns;
+
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+
+	/* NOTE: Non-monotonic SYSTIM readings may be returned */
+	cycles = e1000e_read_systim(adapter, sts);
+	ns = timecounter_cyc2time(&adapter->tc, cycles);
+
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+/**
+ * e1000e_phc_settime - Set the current time on the hardware clock
+ * @ptp: ptp clock structure
+ * @ts: timespec containing the new time for the cycle counter
+ *
+ * Reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ **/
+static int e1000e_phc_settime(struct ptp_clock_info *ptp,
+			      const struct timespec64 *ts)
+{
+	struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter,
+						     ptp_clock_info);
+	unsigned long flags;
+	u64 ns;
+
+	ns = timespec64_to_ns(ts);
+
+	/* reset the timecounter */
+	spin_lock_irqsave(&adapter->systim_lock, flags);
+	timecounter_init(&adapter->tc, &adapter->cc, ns);
+	spin_unlock_irqrestore(&adapter->systim_lock, flags);
+
+	return 0;
+}
+
+/**
+ * e1000e_phc_enable - enable or disable an ancillary feature
+ * @ptp: ptp clock structure
+ * @request: Desired resource to enable or disable
+ * @on: Caller passes one to enable or zero to disable
+ *
+ * Enable (or disable) ancillary features of the PHC subsystem.
+ * Currently, no ancillary features are supported.
+ **/
+static int e1000e_phc_enable(struct ptp_clock_info __always_unused *ptp,
+			     struct ptp_clock_request __always_unused *request,
+			     int __always_unused on)
+{
+	return -EOPNOTSUPP;
+}
+
+static void e1000e_systim_overflow_work(struct work_struct *work)
+{
+	struct e1000_adapter *adapter = container_of(work, struct e1000_adapter,
+						     systim_overflow_work.work);
+	struct e1000_hw *hw = &adapter->hw;
+	struct timespec64 ts;
+	u64 ns;
+
+	/* Update the timecounter */
+	ns = timecounter_read(&adapter->tc);
+
+	ts = ns_to_timespec64(ns);
+	e_dbg("SYSTIM overflow check at %lld.%09lu\n",
+	      (long long) ts.tv_sec, ts.tv_nsec);
+
+	schedule_delayed_work(&adapter->systim_overflow_work,
+			      E1000_SYSTIM_OVERFLOW_PERIOD);
+}
+
+static const struct ptp_clock_info e1000e_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfine	= e1000e_phc_adjfine,
+	.adjtime	= e1000e_phc_adjtime,
+	.gettimex64	= e1000e_phc_gettimex,
+	.settime64	= e1000e_phc_settime,
+	.enable		= e1000e_phc_enable,
+};
+
+/**
+ * e1000e_ptp_init - initialize PTP for devices which support it
+ * @adapter: board private structure
+ *
+ * This function performs the required steps for enabling PTP support.
+ * If PTP support has already been loaded it simply calls the cyclecounter
+ * init routine and exits.
+ **/
+void e1000e_ptp_init(struct e1000_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->ptp_clock = NULL;
+
+	if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP))
+		return;
+
+	adapter->ptp_clock_info = e1000e_ptp_clock_info;
+
+	snprintf(adapter->ptp_clock_info.name,
+		 sizeof(adapter->ptp_clock_info.name), "%pm",
+		 adapter->netdev->perm_addr);
+
+	switch (hw->mac.type) {
+	case e1000_pch2lan:
+	case e1000_pch_lpt:
+	case e1000_pch_spt:
+	case e1000_pch_cnp:
+	case e1000_pch_tgp:
+	case e1000_pch_adp:
+	case e1000_pch_mtp:
+	case e1000_pch_lnp:
+	case e1000_pch_ptp:
+	case e1000_pch_nvp:
+		if ((hw->mac.type < e1000_pch_lpt) ||
+		    (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) {
+			adapter->ptp_clock_info.max_adj = 24000000 - 1;
+			break;
+		}
+		fallthrough;
+	case e1000_82574:
+	case e1000_82583:
+		adapter->ptp_clock_info.max_adj = 600000000 - 1;
+		break;
+	default:
+		break;
+	}
+
+#ifdef CONFIG_E1000E_HWTS
+	/* CPU must have ART and GBe must be from Sunrise Point or greater */
+	if (hw->mac.type >= e1000_pch_spt && boot_cpu_has(X86_FEATURE_ART))
+		adapter->ptp_clock_info.getcrosststamp =
+			e1000e_phc_getcrosststamp;
+#endif/*CONFIG_E1000E_HWTS*/
+
+	INIT_DELAYED_WORK(&adapter->systim_overflow_work,
+			  e1000e_systim_overflow_work);
+
+	schedule_delayed_work(&adapter->systim_overflow_work,
+			      E1000_SYSTIM_OVERFLOW_PERIOD);
+
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_clock_info,
+						&adapter->pdev->dev);
+	if (IS_ERR(adapter->ptp_clock)) {
+		adapter->ptp_clock = NULL;
+		e_err("ptp_clock_register failed\n");
+	} else if (adapter->ptp_clock) {
+		e_info("registered PHC clock\n");
+	}
+}
+
+/**
+ * e1000e_ptp_remove - disable PTP device and stop the overflow check
+ * @adapter: board private structure
+ *
+ * Stop the PTP support, and cancel the delayed work.
+ **/
+void e1000e_ptp_remove(struct e1000_adapter *adapter)
+{
+	if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP))
+		return;
+
+	cancel_delayed_work_sync(&adapter->systim_overflow_work);
+
+	if (adapter->ptp_clock) {
+		ptp_clock_unregister(adapter->ptp_clock);
+		adapter->ptp_clock = NULL;
+		e_info("removed PHC\n");
+	}
+}
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
new file mode 100644
index 0000000000..6c0cd8cab3
--- /dev/null
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000E_REGS_H_
+#define _E1000E_REGS_H_
+
+#define E1000_CTRL	0x00000	/* Device Control - RW */
+#define E1000_STATUS	0x00008	/* Device Status - RO */
+#define E1000_EECD	0x00010	/* EEPROM/Flash Control - RW */
+#define E1000_EERD	0x00014	/* EEPROM Read - RW */
+#define E1000_CTRL_EXT	0x00018	/* Extended Device Control - RW */
+#define E1000_FLA	0x0001C	/* Flash Access - RW */
+#define E1000_MDIC	0x00020	/* MDI Control - RW */
+#define E1000_SCTL	0x00024	/* SerDes Control - RW */
+#define E1000_FCAL	0x00028	/* Flow Control Address Low - RW */
+#define E1000_FCAH	0x0002C	/* Flow Control Address High -RW */
+#define E1000_FEXT	0x0002C	/* Future Extended - RW */
+#define E1000_FEXTNVM	0x00028	/* Future Extended NVM - RW */
+#define E1000_FEXTNVM3	0x0003C	/* Future Extended NVM 3 - RW */
+#define E1000_FEXTNVM4	0x00024	/* Future Extended NVM 4 - RW */
+#define E1000_FEXTNVM5	0x00014	/* Future Extended NVM 5 - RW */
+#define E1000_FEXTNVM6	0x00010	/* Future Extended NVM 6 - RW */
+#define E1000_FEXTNVM7	0x000E4	/* Future Extended NVM 7 - RW */
+#define E1000_FEXTNVM8	0x5BB0	/* Future Extended NVM 8 - RW */
+#define E1000_FEXTNVM9	0x5BB4	/* Future Extended NVM 9 - RW */
+#define E1000_FEXTNVM11	0x5BBC	/* Future Extended NVM 11 - RW */
+#define E1000_FEXTNVM12	0x5BC0	/* Future Extended NVM 12 - RW */
+#define E1000_PCIEANACFG	0x00F18	/* PCIE Analog Config */
+#define E1000_DPGFR	0x00FAC	/* Dynamic Power Gate Force Control Register */
+#define E1000_FCT	0x00030	/* Flow Control Type - RW */
+#define E1000_VET	0x00038	/* VLAN Ether Type - RW */
+#define E1000_ICR	0x000C0	/* Interrupt Cause Read - R/clr */
+#define E1000_ITR	0x000C4	/* Interrupt Throttling Rate - RW */
+#define E1000_ICS	0x000C8	/* Interrupt Cause Set - WO */
+#define E1000_IMS	0x000D0	/* Interrupt Mask Set - RW */
+#define E1000_IMC	0x000D8	/* Interrupt Mask Clear - WO */
+#define E1000_IAM	0x000E0	/* Interrupt Acknowledge Auto Mask */
+#define E1000_IVAR	0x000E4	/* Interrupt Vector Allocation Register - RW */
+#define E1000_SVCR	0x000F0
+#define E1000_SVT	0x000F4
+#define E1000_LPIC	0x000FC	/* Low Power IDLE control */
+#define E1000_RCTL	0x00100	/* Rx Control - RW */
+#define E1000_FCTTV	0x00170	/* Flow Control Transmit Timer Value - RW */
+#define E1000_TXCW	0x00178	/* Tx Configuration Word - RW */
+#define E1000_RXCW	0x00180	/* Rx Configuration Word - RO */
+#define E1000_PBA_ECC	0x01100	/* PBA ECC Register */
+#define E1000_TCTL	0x00400	/* Tx Control - RW */
+#define E1000_TCTL_EXT	0x00404	/* Extended Tx Control - RW */
+#define E1000_TIPG	0x00410	/* Tx Inter-packet gap -RW */
+#define E1000_AIT	0x00458	/* Adaptive Interframe Spacing Throttle - RW */
+#define E1000_LEDCTL	0x00E00	/* LED Control - RW */
+#define E1000_EXTCNF_CTRL	0x00F00	/* Extended Configuration Control */
+#define E1000_EXTCNF_SIZE	0x00F08	/* Extended Configuration Size */
+#define E1000_PHY_CTRL	0x00F10	/* PHY Control Register in CSR */
+#define E1000_POEMB	E1000_PHY_CTRL	/* PHY OEM Bits */
+#define E1000_PBA	0x01000	/* Packet Buffer Allocation - RW */
+#define E1000_PBS	0x01008	/* Packet Buffer Size */
+#define E1000_PBECCSTS	0x0100C	/* Packet Buffer ECC Status - RW */
+#define E1000_IOSFPC	0x00F28	/* TX corrupted data  */
+#define E1000_EEMNGCTL	0x01010	/* MNG EEprom Control */
+#define E1000_EEWR	0x0102C	/* EEPROM Write Register - RW */
+#define E1000_FLOP	0x0103C	/* FLASH Opcode Register */
+#define E1000_ERT	0x02008	/* Early Rx Threshold - RW */
+#define E1000_FCRTL	0x02160	/* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTH	0x02168	/* Flow Control Receive Threshold High - RW */
+#define E1000_PSRCTL	0x02170	/* Packet Split Receive Control - RW */
+#define E1000_RDFH	0x02410	/* Rx Data FIFO Head - RW */
+#define E1000_RDFT	0x02418	/* Rx Data FIFO Tail - RW */
+#define E1000_RDFHS	0x02420	/* Rx Data FIFO Head Saved - RW */
+#define E1000_RDFTS	0x02428	/* Rx Data FIFO Tail Saved - RW */
+#define E1000_RDFPC	0x02430	/* Rx Data FIFO Packet Count - RW */
+/* Split and Replication Rx Control - RW */
+#define E1000_RDTR	0x02820	/* Rx Delay Timer - RW */
+#define E1000_RADV	0x0282C	/* Rx Interrupt Absolute Delay Timer - RW */
+/* Convenience macros
+ *
+ * Note: "_n" is the queue number of the register to be written to.
+ *
+ * Example usage:
+ * E1000_RDBAL_REG(current_rx_queue)
+ */
+#define E1000_RDBAL(_n)	((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \
+			 (0x0C000 + ((_n) * 0x40)))
+#define E1000_RDBAH(_n)	((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \
+			 (0x0C004 + ((_n) * 0x40)))
+#define E1000_RDLEN(_n)	((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \
+			 (0x0C008 + ((_n) * 0x40)))
+#define E1000_RDH(_n)	((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \
+			 (0x0C010 + ((_n) * 0x40)))
+#define E1000_RDT(_n)	((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \
+			 (0x0C018 + ((_n) * 0x40)))
+#define E1000_RXDCTL(_n)	((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \
+				 (0x0C028 + ((_n) * 0x40)))
+#define E1000_TDBAL(_n)	((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \
+			 (0x0E000 + ((_n) * 0x40)))
+#define E1000_TDBAH(_n)	((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \
+			 (0x0E004 + ((_n) * 0x40)))
+#define E1000_TDLEN(_n)	((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \
+			 (0x0E008 + ((_n) * 0x40)))
+#define E1000_TDH(_n)	((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \
+			 (0x0E010 + ((_n) * 0x40)))
+#define E1000_TDT(_n)	((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \
+			 (0x0E018 + ((_n) * 0x40)))
+#define E1000_TXDCTL(_n)	((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \
+				 (0x0E028 + ((_n) * 0x40)))
+#define E1000_TARC(_n)		(0x03840 + ((_n) * 0x100))
+#define E1000_KABGTXD		0x03004	/* AFE Band Gap Transmit Ref Data */
+#define E1000_RAL(_i)		(((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+				 (0x054E0 + ((_i - 16) * 8)))
+#define E1000_RAH(_i)		(((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+				 (0x054E4 + ((_i - 16) * 8)))
+#define E1000_SHRAL(_i)		(0x05438 + ((_i) * 8))
+#define E1000_SHRAH(_i)		(0x0543C + ((_i) * 8))
+#define E1000_TDFH		0x03410	/* Tx Data FIFO Head - RW */
+#define E1000_TDFT		0x03418	/* Tx Data FIFO Tail - RW */
+#define E1000_TDFHS		0x03420	/* Tx Data FIFO Head Saved - RW */
+#define E1000_TDFTS		0x03428	/* Tx Data FIFO Tail Saved - RW */
+#define E1000_TDFPC		0x03430	/* Tx Data FIFO Packet Count - RW */
+#define E1000_TIDV	0x03820	/* Tx Interrupt Delay Value - RW */
+#define E1000_TADV	0x0382C	/* Tx Interrupt Absolute Delay Val - RW */
+#define E1000_CRCERRS	0x04000	/* CRC Error Count - R/clr */
+#define E1000_ALGNERRC	0x04004	/* Alignment Error Count - R/clr */
+#define E1000_SYMERRS	0x04008	/* Symbol Error Count - R/clr */
+#define E1000_RXERRC	0x0400C	/* Receive Error Count - R/clr */
+#define E1000_MPC	0x04010	/* Missed Packet Count - R/clr */
+#define E1000_SCC	0x04014	/* Single Collision Count - R/clr */
+#define E1000_ECOL	0x04018	/* Excessive Collision Count - R/clr */
+#define E1000_MCC	0x0401C	/* Multiple Collision Count - R/clr */
+#define E1000_LATECOL	0x04020	/* Late Collision Count - R/clr */
+#define E1000_COLC	0x04028	/* Collision Count - R/clr */
+#define E1000_DC	0x04030	/* Defer Count - R/clr */
+#define E1000_TNCRS	0x04034	/* Tx-No CRS - R/clr */
+#define E1000_SEC	0x04038	/* Sequence Error Count - R/clr */
+#define E1000_CEXTERR	0x0403C	/* Carrier Extension Error Count - R/clr */
+#define E1000_RLEC	0x04040	/* Receive Length Error Count - R/clr */
+#define E1000_XONRXC	0x04048	/* XON Rx Count - R/clr */
+#define E1000_XONTXC	0x0404C	/* XON Tx Count - R/clr */
+#define E1000_XOFFRXC	0x04050	/* XOFF Rx Count - R/clr */
+#define E1000_XOFFTXC	0x04054	/* XOFF Tx Count - R/clr */
+#define E1000_FCRUC	0x04058	/* Flow Control Rx Unsupported Count- R/clr */
+#define E1000_PRC64	0x0405C	/* Packets Rx (64 bytes) - R/clr */
+#define E1000_PRC127	0x04060	/* Packets Rx (65-127 bytes) - R/clr */
+#define E1000_PRC255	0x04064	/* Packets Rx (128-255 bytes) - R/clr */
+#define E1000_PRC511	0x04068	/* Packets Rx (255-511 bytes) - R/clr */
+#define E1000_PRC1023	0x0406C	/* Packets Rx (512-1023 bytes) - R/clr */
+#define E1000_PRC1522	0x04070	/* Packets Rx (1024-1522 bytes) - R/clr */
+#define E1000_GPRC	0x04074	/* Good Packets Rx Count - R/clr */
+#define E1000_BPRC	0x04078	/* Broadcast Packets Rx Count - R/clr */
+#define E1000_MPRC	0x0407C	/* Multicast Packets Rx Count - R/clr */
+#define E1000_GPTC	0x04080	/* Good Packets Tx Count - R/clr */
+#define E1000_GORCL	0x04088	/* Good Octets Rx Count Low - R/clr */
+#define E1000_GORCH	0x0408C	/* Good Octets Rx Count High - R/clr */
+#define E1000_GOTCL	0x04090	/* Good Octets Tx Count Low - R/clr */
+#define E1000_GOTCH	0x04094	/* Good Octets Tx Count High - R/clr */
+#define E1000_RNBC	0x040A0	/* Rx No Buffers Count - R/clr */
+#define E1000_RUC	0x040A4	/* Rx Undersize Count - R/clr */
+#define E1000_RFC	0x040A8	/* Rx Fragment Count - R/clr */
+#define E1000_ROC	0x040AC	/* Rx Oversize Count - R/clr */
+#define E1000_RJC	0x040B0	/* Rx Jabber Count - R/clr */
+#define E1000_MGTPRC	0x040B4	/* Management Packets Rx Count - R/clr */
+#define E1000_MGTPDC	0x040B8	/* Management Packets Dropped Count - R/clr */
+#define E1000_MGTPTC	0x040BC	/* Management Packets Tx Count - R/clr */
+#define E1000_TORL	0x040C0	/* Total Octets Rx Low - R/clr */
+#define E1000_TORH	0x040C4	/* Total Octets Rx High - R/clr */
+#define E1000_TOTL	0x040C8	/* Total Octets Tx Low - R/clr */
+#define E1000_TOTH	0x040CC	/* Total Octets Tx High - R/clr */
+#define E1000_TPR	0x040D0	/* Total Packets Rx - R/clr */
+#define E1000_TPT	0x040D4	/* Total Packets Tx - R/clr */
+#define E1000_PTC64	0x040D8	/* Packets Tx (64 bytes) - R/clr */
+#define E1000_PTC127	0x040DC	/* Packets Tx (65-127 bytes) - R/clr */
+#define E1000_PTC255	0x040E0	/* Packets Tx (128-255 bytes) - R/clr */
+#define E1000_PTC511	0x040E4	/* Packets Tx (256-511 bytes) - R/clr */
+#define E1000_PTC1023	0x040E8	/* Packets Tx (512-1023 bytes) - R/clr */
+#define E1000_PTC1522	0x040EC	/* Packets Tx (1024-1522 Bytes) - R/clr */
+#define E1000_MPTC	0x040F0	/* Multicast Packets Tx Count - R/clr */
+#define E1000_BPTC	0x040F4	/* Broadcast Packets Tx Count - R/clr */
+#define E1000_TSCTC	0x040F8	/* TCP Segmentation Context Tx - R/clr */
+#define E1000_TSCTFC	0x040FC	/* TCP Segmentation Context Tx Fail - R/clr */
+#define E1000_IAC	0x04100	/* Interrupt Assertion Count */
+#define E1000_ICRXPTC	0x04104	/* Interrupt Cause Rx Pkt Timer Expire Count */
+#define E1000_ICRXATC	0x04108	/* Interrupt Cause Rx Abs Timer Expire Count */
+#define E1000_ICTXPTC	0x0410C	/* Interrupt Cause Tx Pkt Timer Expire Count */
+#define E1000_ICTXATC	0x04110	/* Interrupt Cause Tx Abs Timer Expire Count */
+#define E1000_ICTXQEC	0x04118	/* Interrupt Cause Tx Queue Empty Count */
+#define E1000_ICTXQMTC	0x0411C	/* Interrupt Cause Tx Queue Min Thresh Count */
+#define E1000_ICRXDMTC	0x04120	/* Interrupt Cause Rx Desc Min Thresh Count */
+#define E1000_ICRXOC	0x04124	/* Interrupt Cause Receiver Overrun Count */
+#define E1000_CRC_OFFSET	0x05F50	/* CRC Offset register */
+
+#define E1000_PCS_LCTL	0x04208	/* PCS Link Control - RW */
+#define E1000_PCS_LSTAT	0x0420C	/* PCS Link Status - RO */
+#define E1000_PCS_ANADV	0x04218	/* AN advertisement - RW */
+#define E1000_PCS_LPAB	0x0421C	/* Link Partner Ability - RW */
+#define E1000_RXCSUM	0x05000	/* Rx Checksum Control - RW */
+#define E1000_RFCTL	0x05008	/* Receive Filter Control */
+#define E1000_MTA	0x05200	/* Multicast Table Array - RW Array */
+#define E1000_RA	0x05400	/* Receive Address - RW Array */
+#define E1000_VFTA	0x05600	/* VLAN Filter Table Array - RW Array */
+#define E1000_WUC	0x05800	/* Wakeup Control - RW */
+#define E1000_WUFC	0x05808	/* Wakeup Filter Control - RW */
+#define E1000_WUS	0x05810	/* Wakeup Status - RO */
+#define E1000_MANC	0x05820	/* Management Control - RW */
+#define E1000_FFLT	0x05F00	/* Flexible Filter Length Table - RW Array */
+#define E1000_HOST_IF	0x08800	/* Host Interface */
+
+#define E1000_KMRNCTRLSTA	0x00034	/* MAC-PHY interface - RW */
+#define E1000_MANC2H		0x05860	/* Management Control To Host - RW */
+/* Management Decision Filters */
+#define E1000_MDEF(_n)		(0x05890 + (4 * (_n)))
+#define E1000_SW_FW_SYNC	0x05B5C	/* SW-FW Synchronization - RW */
+#define E1000_GCR	0x05B00	/* PCI-Ex Control */
+#define E1000_GCR2	0x05B64	/* PCI-Ex Control #2 */
+#define E1000_FACTPS	0x05B30	/* Function Active and Power State to MNG */
+#define E1000_SWSM	0x05B50	/* SW Semaphore */
+#define E1000_FWSM	0x05B54	/* FW Semaphore */
+#define E1000_EXFWSM	0x05B58	/* Extended FW Semaphore */
+/* Driver-only SW semaphore (not used by BOOT agents) */
+#define E1000_SWSM2	0x05B58
+#define E1000_FFLT_DBG	0x05F04	/* Debug Register */
+#define E1000_HICR	0x08F00	/* Host Interface Control */
+
+/* RSS registers */
+#define E1000_MRQC	0x05818	/* Multiple Receive Control - RW */
+#define E1000_RETA(_i)	(0x05C00 + ((_i) * 4))	/* Redirection Table - RW */
+#define E1000_RSSRK(_i)	(0x05C80 + ((_i) * 4))	/* RSS Random Key - RW */
+#define E1000_TSYNCRXCTL	0x0B620	/* Rx Time Sync Control register - RW */
+#define E1000_TSYNCTXCTL	0x0B614	/* Tx Time Sync Control register - RW */
+#define E1000_RXSTMPL	0x0B624	/* Rx timestamp Low - RO */
+#define E1000_RXSTMPH	0x0B628	/* Rx timestamp High - RO */
+#define E1000_TXSTMPL	0x0B618	/* Tx timestamp value Low - RO */
+#define E1000_TXSTMPH	0x0B61C	/* Tx timestamp value High - RO */
+#define E1000_SYSTIML	0x0B600	/* System time register Low - RO */
+#define E1000_SYSTIMH	0x0B604	/* System time register High - RO */
+#define E1000_TIMINCA	0x0B608	/* Increment attributes register - RW */
+#define E1000_SYSSTMPL  0x0B648 /* HH Timesync system stamp low register */
+#define E1000_SYSSTMPH  0x0B64C /* HH Timesync system stamp hi register */
+#define E1000_PLTSTMPL  0x0B640 /* HH Timesync platform stamp low register */
+#define E1000_PLTSTMPH  0x0B644 /* HH Timesync platform stamp hi register */
+#define E1000_RXMTRL	0x0B634	/* Time sync Rx EtherType and Msg Type - RW */
+#define E1000_RXUDP	0x0B638	/* Time Sync Rx UDP Port - RW */
+
+/* PHY registers */
+#define I82579_DFT_CTRL	PHY_REG(769, 20)
+
+#endif
diff --git a/drivers/net/ethernet/intel/fm10k/Makefile b/drivers/net/ethernet/intel/fm10k/Makefile
new file mode 100644
index 0000000000..26a9746ccb
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2013 - 2018 Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Switch Host Interface Driver
+#
+
+obj-$(CONFIG_FM10K) += fm10k.o
+
+fm10k-y := fm10k_main.o \
+	   fm10k_common.o \
+	   fm10k_pci.o \
+	   fm10k_netdev.o \
+	   fm10k_ethtool.o \
+	   fm10k_pf.o \
+	   fm10k_vf.o \
+	   fm10k_mbx.o \
+	   fm10k_iov.o \
+	   fm10k_tlv.o
+
+fm10k-$(CONFIG_DEBUG_FS) += fm10k_debugfs.o
+fm10k-$(CONFIG_DCB) += fm10k_dcbnl.o
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
new file mode 100644
index 0000000000..6119a41088
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -0,0 +1,565 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#ifndef _FM10K_H_
+#define _FM10K_H_
+
+#include <linux/types.h>
+#include <linux/etherdevice.h>
+#include <linux/cpumask.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <linux/pci.h>
+
+#include "fm10k_pf.h"
+#include "fm10k_vf.h"
+
+#define FM10K_MAX_JUMBO_FRAME_SIZE	15342	/* Maximum supported size 15K */
+
+#define MAX_QUEUES	FM10K_MAX_QUEUES_PF
+
+#define FM10K_MIN_RXD		 128
+#define FM10K_MAX_RXD		4096
+#define FM10K_DEFAULT_RXD	 256
+
+#define FM10K_MIN_TXD		 128
+#define FM10K_MAX_TXD		4096
+#define FM10K_DEFAULT_TXD	 256
+#define FM10K_DEFAULT_TX_WORK	 256
+
+#define FM10K_RXBUFFER_256	  256
+#define FM10K_RX_HDR_LEN	FM10K_RXBUFFER_256
+#define FM10K_RXBUFFER_2048	 2048
+#define FM10K_RX_BUFSZ		FM10K_RXBUFFER_2048
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define FM10K_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+
+#define FM10K_MAX_STATIONS	63
+struct fm10k_l2_accel {
+	int size;
+	u16 count;
+	u16 dglort;
+	struct rcu_head rcu;
+	struct net_device *macvlan[];
+};
+
+enum fm10k_ring_state_t {
+	__FM10K_TX_DETECT_HANG,
+	__FM10K_HANG_CHECK_ARMED,
+	__FM10K_TX_XPS_INIT_DONE,
+	/* This must be last and is used to calculate BITMAP size */
+	__FM10K_TX_STATE_SIZE__,
+};
+
+#define check_for_tx_hang(ring) \
+	test_bit(__FM10K_TX_DETECT_HANG, (ring)->state)
+#define set_check_for_tx_hang(ring) \
+	set_bit(__FM10K_TX_DETECT_HANG, (ring)->state)
+#define clear_check_for_tx_hang(ring) \
+	clear_bit(__FM10K_TX_DETECT_HANG, (ring)->state)
+
+struct fm10k_tx_buffer {
+	struct fm10k_tx_desc *next_to_watch;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	u16 gso_segs;
+	u16 tx_flags;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+};
+
+struct fm10k_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+	u32 page_offset;
+};
+
+struct fm10k_queue_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+struct fm10k_tx_queue_stats {
+	u64 restart_queue;
+	u64 csum_err;
+	u64 tx_busy;
+	u64 tx_done_old;
+	u64 csum_good;
+};
+
+struct fm10k_rx_queue_stats {
+	u64 alloc_failed;
+	u64 csum_err;
+	u64 errors;
+	u64 csum_good;
+	u64 switch_errors;
+	u64 drops;
+	u64 pp_errors;
+	u64 link_errors;
+	u64 length_errors;
+};
+
+struct fm10k_ring {
+	struct fm10k_q_vector *q_vector;/* backpointer to host q_vector */
+	struct net_device *netdev;	/* netdev ring belongs to */
+	struct device *dev;		/* device for DMA mapping */
+	struct fm10k_l2_accel __rcu *l2_accel;	/* L2 acceleration list */
+	void *desc;			/* descriptor ring memory */
+	union {
+		struct fm10k_tx_buffer *tx_buffer;
+		struct fm10k_rx_buffer *rx_buffer;
+	};
+	u32 __iomem *tail;
+	DECLARE_BITMAP(state, __FM10K_TX_STATE_SIZE__);
+	dma_addr_t dma;			/* phys. address of descriptor ring */
+	unsigned int size;		/* length in bytes */
+
+	u8 queue_index;			/* needed for queue management */
+	u8 reg_idx;			/* holds the special value that gets
+					 * the hardware register offset
+					 * associated with this ring, which is
+					 * different for DCB and RSS modes
+					 */
+	u8 qos_pc;			/* priority class of queue */
+	u16 vid;			/* default VLAN ID of queue */
+	u16 count;			/* amount of descriptors */
+
+	u16 next_to_alloc;
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	struct fm10k_queue_stats stats;
+	struct u64_stats_sync syncp;
+	union {
+		/* Tx */
+		struct fm10k_tx_queue_stats tx_stats;
+		/* Rx */
+		struct {
+			struct fm10k_rx_queue_stats rx_stats;
+			struct sk_buff *skb;
+		};
+	};
+} ____cacheline_internodealigned_in_smp;
+
+struct fm10k_ring_container {
+	struct fm10k_ring *ring;	/* pointer to linked list of rings */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 work_limit;			/* total work allowed per interrupt */
+	u16 itr;			/* interrupt throttle rate value */
+	u8 itr_scale;			/* ITR adjustment based on PCI speed */
+	u8 count;			/* total number of rings in vector */
+};
+
+#define FM10K_ITR_MAX		0x0FFF	/* maximum value for ITR */
+#define FM10K_ITR_10K		100	/* 100us */
+#define FM10K_ITR_20K		50	/* 50us */
+#define FM10K_ITR_40K		25	/* 25us */
+#define FM10K_ITR_ADAPTIVE	0x8000	/* adaptive interrupt moderation flag */
+
+#define ITR_IS_ADAPTIVE(itr) (!!(itr & FM10K_ITR_ADAPTIVE))
+
+#define FM10K_TX_ITR_DEFAULT	FM10K_ITR_40K
+#define FM10K_RX_ITR_DEFAULT	FM10K_ITR_20K
+#define FM10K_ITR_ENABLE	(FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR)
+
+static inline struct netdev_queue *txring_txq(const struct fm10k_ring *ring)
+{
+	return &ring->netdev->_tx[ring->queue_index];
+}
+
+/* iterator for handling rings in ring container */
+#define fm10k_for_each_ring(pos, head) \
+	for (pos = &(head).ring[(head).count]; (--pos) >= (head).ring;)
+
+#define MAX_Q_VECTORS 256
+#define MIN_Q_VECTORS	1
+enum fm10k_non_q_vectors {
+	FM10K_MBX_VECTOR,
+	NON_Q_VECTORS
+};
+
+#define MIN_MSIX_COUNT(hw)	(MIN_Q_VECTORS + NON_Q_VECTORS)
+
+struct fm10k_q_vector {
+	struct fm10k_intfc *interface;
+	u32 __iomem *itr;	/* pointer to ITR register for this vector */
+	u16 v_idx;		/* index of q_vector within interface array */
+	struct fm10k_ring_container rx, tx;
+
+	struct napi_struct napi;
+	cpumask_t affinity_mask;
+	char name[IFNAMSIZ + 9];
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dbg_q_vector;
+#endif /* CONFIG_DEBUG_FS */
+	struct rcu_head rcu;	/* to avoid race with update stats on free */
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct fm10k_ring ring[] ____cacheline_internodealigned_in_smp;
+};
+
+enum fm10k_ring_f_enum {
+	RING_F_RSS,
+	RING_F_QOS,
+	RING_F_ARRAY_SIZE  /* must be last in enum set */
+};
+
+struct fm10k_ring_feature {
+	u16 limit;	/* upper limit on feature indices */
+	u16 indices;	/* current value of indices */
+	u16 mask;	/* Mask used for feature to ring mapping */
+	u16 offset;	/* offset to start of feature */
+};
+
+struct fm10k_iov_data {
+	unsigned int		num_vfs;
+	unsigned int		next_vf_mbx;
+	struct rcu_head		rcu;
+	struct fm10k_vf_info	vf_info[];
+};
+
+enum fm10k_macvlan_request_type {
+	FM10K_UC_MAC_REQUEST,
+	FM10K_MC_MAC_REQUEST,
+	FM10K_VLAN_REQUEST
+};
+
+struct fm10k_macvlan_request {
+	enum fm10k_macvlan_request_type type;
+	struct list_head list;
+	union {
+		struct fm10k_mac_request {
+			u8 addr[ETH_ALEN];
+			u16 glort;
+			u16 vid;
+		} mac;
+		struct fm10k_vlan_request {
+			u32 vid;
+			u8 vsi;
+		} vlan;
+	};
+	bool set;
+};
+
+/* one work queue for entire driver */
+extern struct workqueue_struct *fm10k_workqueue;
+
+/* The following enumeration contains flags which indicate or enable modified
+ * driver behaviors. To avoid race conditions, the flags are stored in
+ * a BITMAP in the fm10k_intfc structure. The BITMAP should be accessed using
+ * atomic *_bit() operations.
+ */
+enum fm10k_flags_t {
+	FM10K_FLAG_RESET_REQUESTED,
+	FM10K_FLAG_RSS_FIELD_IPV4_UDP,
+	FM10K_FLAG_RSS_FIELD_IPV6_UDP,
+	FM10K_FLAG_SWPRI_CONFIG,
+	/* __FM10K_FLAGS_SIZE__ is used to calculate the size of
+	 * interface->flags and must be the last value in this
+	 * enumeration.
+	 */
+	__FM10K_FLAGS_SIZE__
+};
+
+enum fm10k_state_t {
+	__FM10K_RESETTING,
+	__FM10K_RESET_DETACHED,
+	__FM10K_RESET_SUSPENDED,
+	__FM10K_DOWN,
+	__FM10K_SERVICE_SCHED,
+	__FM10K_SERVICE_REQUEST,
+	__FM10K_SERVICE_DISABLE,
+	__FM10K_MACVLAN_SCHED,
+	__FM10K_MACVLAN_REQUEST,
+	__FM10K_MACVLAN_DISABLE,
+	__FM10K_LINK_DOWN,
+	__FM10K_UPDATING_STATS,
+	/* This value must be last and determines the BITMAP size */
+	__FM10K_STATE_SIZE__,
+};
+
+struct fm10k_intfc {
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	struct net_device *netdev;
+	struct fm10k_l2_accel *l2_accel; /* pointer to L2 acceleration list */
+	struct pci_dev *pdev;
+	DECLARE_BITMAP(state, __FM10K_STATE_SIZE__);
+
+	/* Access flag values using atomic *_bit() operations */
+	DECLARE_BITMAP(flags, __FM10K_FLAGS_SIZE__);
+
+	int xcast_mode;
+
+	/* Tx fast path data */
+	int num_tx_queues;
+	u16 tx_itr;
+
+	/* Rx fast path data */
+	int num_rx_queues;
+	u16 rx_itr;
+
+	/* TX */
+	struct fm10k_ring *tx_ring[MAX_QUEUES] ____cacheline_aligned_in_smp;
+
+	u64 restart_queue;
+	u64 tx_busy;
+	u64 tx_csum_errors;
+	u64 alloc_failed;
+	u64 rx_csum_errors;
+
+	u64 tx_bytes_nic;
+	u64 tx_packets_nic;
+	u64 rx_bytes_nic;
+	u64 rx_packets_nic;
+	u64 rx_drops_nic;
+	u64 rx_overrun_pf;
+	u64 rx_overrun_vf;
+
+	/* Debug Statistics */
+	u64 hw_sm_mbx_full;
+	u64 hw_csum_tx_good;
+	u64 hw_csum_rx_good;
+	u64 rx_switch_errors;
+	u64 rx_drops;
+	u64 rx_pp_errors;
+	u64 rx_link_errors;
+	u64 rx_length_errors;
+
+	u32 tx_timeout_count;
+
+	/* RX */
+	struct fm10k_ring *rx_ring[MAX_QUEUES];
+
+	/* Queueing vectors */
+	struct fm10k_q_vector *q_vector[MAX_Q_VECTORS];
+	struct msix_entry *msix_entries;
+	int num_q_vectors;	/* current number of q_vectors for device */
+	struct fm10k_ring_feature ring_feature[RING_F_ARRAY_SIZE];
+
+	/* SR-IOV information management structure */
+	struct fm10k_iov_data *iov_data;
+
+	struct fm10k_hw_stats stats;
+	struct fm10k_hw hw;
+	/* Mailbox lock */
+	spinlock_t mbx_lock;
+	u32 __iomem *uc_addr;
+	u32 __iomem *sw_addr;
+	u16 msg_enable;
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+	struct timer_list service_timer;
+	struct work_struct service_task;
+	unsigned long next_stats_update;
+	unsigned long next_tx_hang_check;
+	unsigned long last_reset;
+	unsigned long link_down_event;
+	bool host_ready;
+	bool lport_map_failed;
+
+	u32 reta[FM10K_RETA_SIZE];
+	u32 rssrk[FM10K_RSSRK_SIZE];
+
+	/* UDP encapsulation port tracking information */
+	__be16 vxlan_port;
+	__be16 geneve_port;
+
+	/* MAC/VLAN update queue */
+	struct list_head macvlan_requests;
+	struct delayed_work macvlan_task;
+	/* MAC/VLAN update queue lock */
+	spinlock_t macvlan_lock;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *dbg_intfc;
+#endif /* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_DCB
+	u8 pfc_en;
+#endif
+	u8 rx_pause;
+
+	/* GLORT resources in use by PF */
+	u16 glort;
+	u16 glort_count;
+
+	/* VLAN ID for updating multicast/unicast lists */
+	u16 vid;
+};
+
+static inline void fm10k_mbx_lock(struct fm10k_intfc *interface)
+{
+	spin_lock(&interface->mbx_lock);
+}
+
+static inline void fm10k_mbx_unlock(struct fm10k_intfc *interface)
+{
+	spin_unlock(&interface->mbx_lock);
+}
+
+static inline int fm10k_mbx_trylock(struct fm10k_intfc *interface)
+{
+	return spin_trylock(&interface->mbx_lock);
+}
+
+/* fm10k_test_staterr - test bits in Rx descriptor status and error fields */
+static inline __le32 fm10k_test_staterr(union fm10k_rx_desc *rx_desc,
+					const u32 stat_err_bits)
+{
+	return rx_desc->d.staterr & cpu_to_le32(stat_err_bits);
+}
+
+/* fm10k_desc_unused - calculate if we have unused descriptors */
+static inline u16 fm10k_desc_unused(struct fm10k_ring *ring)
+{
+	s16 unused = ring->next_to_clean - ring->next_to_use - 1;
+
+	return likely(unused < 0) ? unused + ring->count : unused;
+}
+
+#define FM10K_TX_DESC(R, i)	\
+	(&(((struct fm10k_tx_desc *)((R)->desc))[i]))
+#define FM10K_RX_DESC(R, i)	\
+	 (&(((union fm10k_rx_desc *)((R)->desc))[i]))
+
+#define FM10K_MAX_TXD_PWR	14
+#define FM10K_MAX_DATA_PER_TXD	(1u << FM10K_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), FM10K_MAX_DATA_PER_TXD)
+#define DESC_NEEDED	(MAX_SKB_FRAGS + 4)
+
+enum fm10k_tx_flags {
+	/* Tx offload flags */
+	FM10K_TX_FLAGS_CSUM	= 0x01,
+};
+
+/* This structure is stored as little endian values as that is the native
+ * format of the Rx descriptor.  The ordering of these fields is reversed
+ * from the actual ftag header to allow for a single bswap to take care
+ * of placing all of the values in network order
+ */
+union fm10k_ftag_info {
+	__le64 ftag;
+	struct {
+		/* dglort and sglort combined into a single 32bit desc read */
+		__le32 glort;
+		/* upper 16 bits of VLAN are reserved 0 for swpri_type_user */
+		__le32 vlan;
+	} d;
+	struct {
+		__le16 dglort;
+		__le16 sglort;
+		__le16 vlan;
+		__le16 swpri_type_user;
+	} w;
+};
+
+struct fm10k_cb {
+	union {
+		__le64 tstamp;
+		unsigned long ts_tx_timeout;
+	};
+	union fm10k_ftag_info fi;
+};
+
+#define FM10K_CB(skb) ((struct fm10k_cb *)(skb)->cb)
+
+/* main */
+extern char fm10k_driver_name[];
+int fm10k_init_queueing_scheme(struct fm10k_intfc *interface);
+void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface);
+__be16 fm10k_tx_encap_offload(struct sk_buff *skb);
+netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
+				  struct fm10k_ring *tx_ring);
+void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw);
+bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
+void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);
+
+/* PCI */
+void fm10k_mbx_free_irq(struct fm10k_intfc *);
+int fm10k_mbx_request_irq(struct fm10k_intfc *);
+void fm10k_qv_free_irq(struct fm10k_intfc *interface);
+int fm10k_qv_request_irq(struct fm10k_intfc *interface);
+int fm10k_register_pci_driver(void);
+void fm10k_unregister_pci_driver(void);
+void fm10k_up(struct fm10k_intfc *interface);
+void fm10k_down(struct fm10k_intfc *interface);
+void fm10k_update_stats(struct fm10k_intfc *interface);
+void fm10k_service_event_schedule(struct fm10k_intfc *interface);
+void fm10k_macvlan_schedule(struct fm10k_intfc *interface);
+void fm10k_update_rx_drop_en(struct fm10k_intfc *interface);
+
+/* Netdev */
+struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info);
+int fm10k_setup_rx_resources(struct fm10k_ring *);
+int fm10k_setup_tx_resources(struct fm10k_ring *);
+void fm10k_free_rx_resources(struct fm10k_ring *);
+void fm10k_free_tx_resources(struct fm10k_ring *);
+void fm10k_clean_all_rx_rings(struct fm10k_intfc *);
+void fm10k_clean_all_tx_rings(struct fm10k_intfc *);
+void fm10k_unmap_and_free_tx_resource(struct fm10k_ring *,
+				      struct fm10k_tx_buffer *);
+void fm10k_restore_rx_state(struct fm10k_intfc *);
+void fm10k_reset_rx_state(struct fm10k_intfc *);
+int fm10k_setup_tc(struct net_device *dev, u8 tc);
+int fm10k_open(struct net_device *netdev);
+int fm10k_close(struct net_device *netdev);
+int fm10k_queue_vlan_request(struct fm10k_intfc *interface, u32 vid,
+			     u8 vsi, bool set);
+int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort,
+			    const unsigned char *addr, u16 vid, bool set);
+void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface,
+			       u16 glort, bool vlans);
+
+/* Ethtool */
+void fm10k_set_ethtool_ops(struct net_device *dev);
+void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir);
+
+/* IOV */
+s32 fm10k_iov_event(struct fm10k_intfc *interface);
+s32 fm10k_iov_mbx(struct fm10k_intfc *interface);
+void fm10k_iov_suspend(struct pci_dev *pdev);
+int fm10k_iov_resume(struct pci_dev *pdev);
+void fm10k_iov_disable(struct pci_dev *pdev);
+int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs);
+void fm10k_iov_update_stats(struct fm10k_intfc *interface);
+s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
+int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
+int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
+			  int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
+int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
+			int __always_unused min_rate, int max_rate);
+int fm10k_ndo_get_vf_config(struct net_device *netdev,
+			    int vf_idx, struct ifla_vf_info *ivi);
+int fm10k_ndo_get_vf_stats(struct net_device *netdev,
+			   int vf_idx, struct ifla_vf_stats *stats);
+
+/* DebugFS */
+#ifdef CONFIG_DEBUG_FS
+void fm10k_dbg_q_vector_init(struct fm10k_q_vector *q_vector);
+void fm10k_dbg_q_vector_exit(struct fm10k_q_vector *q_vector);
+void fm10k_dbg_intfc_init(struct fm10k_intfc *interface);
+void fm10k_dbg_intfc_exit(struct fm10k_intfc *interface);
+void fm10k_dbg_init(void);
+void fm10k_dbg_exit(void);
+#else
+static inline void fm10k_dbg_q_vector_init(struct fm10k_q_vector *q_vector) {}
+static inline void fm10k_dbg_q_vector_exit(struct fm10k_q_vector *q_vector) {}
+static inline void fm10k_dbg_intfc_init(struct fm10k_intfc *interface) {}
+static inline void fm10k_dbg_intfc_exit(struct fm10k_intfc *interface) {}
+static inline void fm10k_dbg_init(void) {}
+static inline void fm10k_dbg_exit(void) {}
+#endif /* CONFIG_DEBUG_FS */
+
+/* DCB */
+#ifdef CONFIG_DCB
+void fm10k_dcbnl_set_ops(struct net_device *dev);
+#else
+static inline void fm10k_dcbnl_set_ops(struct net_device *dev) {}
+#endif
+#endif /* _FM10K_H_ */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
new file mode 100644
index 0000000000..f51a63fca5
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "fm10k_common.h"
+
+/**
+ *  fm10k_get_bus_info_generic - Generic set PCI bus info
+ *  @hw: pointer to hardware structure
+ *
+ *  Gets the PCI bus info (speed, width, type) then calls helper function to
+ *  store this data within the fm10k_hw structure.
+ **/
+s32 fm10k_get_bus_info_generic(struct fm10k_hw *hw)
+{
+	u16 link_cap, link_status, device_cap, device_control;
+
+	/* Get the maximum link width and speed from PCIe config space */
+	link_cap = fm10k_read_pci_cfg_word(hw, FM10K_PCIE_LINK_CAP);
+
+	switch (link_cap & FM10K_PCIE_LINK_WIDTH) {
+	case FM10K_PCIE_LINK_WIDTH_1:
+		hw->bus_caps.width = fm10k_bus_width_pcie_x1;
+		break;
+	case FM10K_PCIE_LINK_WIDTH_2:
+		hw->bus_caps.width = fm10k_bus_width_pcie_x2;
+		break;
+	case FM10K_PCIE_LINK_WIDTH_4:
+		hw->bus_caps.width = fm10k_bus_width_pcie_x4;
+		break;
+	case FM10K_PCIE_LINK_WIDTH_8:
+		hw->bus_caps.width = fm10k_bus_width_pcie_x8;
+		break;
+	default:
+		hw->bus_caps.width = fm10k_bus_width_unknown;
+		break;
+	}
+
+	switch (link_cap & FM10K_PCIE_LINK_SPEED) {
+	case FM10K_PCIE_LINK_SPEED_2500:
+		hw->bus_caps.speed = fm10k_bus_speed_2500;
+		break;
+	case FM10K_PCIE_LINK_SPEED_5000:
+		hw->bus_caps.speed = fm10k_bus_speed_5000;
+		break;
+	case FM10K_PCIE_LINK_SPEED_8000:
+		hw->bus_caps.speed = fm10k_bus_speed_8000;
+		break;
+	default:
+		hw->bus_caps.speed = fm10k_bus_speed_unknown;
+		break;
+	}
+
+	/* Get the PCIe maximum payload size for the PCIe function */
+	device_cap = fm10k_read_pci_cfg_word(hw, FM10K_PCIE_DEV_CAP);
+
+	switch (device_cap & FM10K_PCIE_DEV_CAP_PAYLOAD) {
+	case FM10K_PCIE_DEV_CAP_PAYLOAD_128:
+		hw->bus_caps.payload = fm10k_bus_payload_128;
+		break;
+	case FM10K_PCIE_DEV_CAP_PAYLOAD_256:
+		hw->bus_caps.payload = fm10k_bus_payload_256;
+		break;
+	case FM10K_PCIE_DEV_CAP_PAYLOAD_512:
+		hw->bus_caps.payload = fm10k_bus_payload_512;
+		break;
+	default:
+		hw->bus_caps.payload = fm10k_bus_payload_unknown;
+		break;
+	}
+
+	/* Get the negotiated link width and speed from PCIe config space */
+	link_status = fm10k_read_pci_cfg_word(hw, FM10K_PCIE_LINK_STATUS);
+
+	switch (link_status & FM10K_PCIE_LINK_WIDTH) {
+	case FM10K_PCIE_LINK_WIDTH_1:
+		hw->bus.width = fm10k_bus_width_pcie_x1;
+		break;
+	case FM10K_PCIE_LINK_WIDTH_2:
+		hw->bus.width = fm10k_bus_width_pcie_x2;
+		break;
+	case FM10K_PCIE_LINK_WIDTH_4:
+		hw->bus.width = fm10k_bus_width_pcie_x4;
+		break;
+	case FM10K_PCIE_LINK_WIDTH_8:
+		hw->bus.width = fm10k_bus_width_pcie_x8;
+		break;
+	default:
+		hw->bus.width = fm10k_bus_width_unknown;
+		break;
+	}
+
+	switch (link_status & FM10K_PCIE_LINK_SPEED) {
+	case FM10K_PCIE_LINK_SPEED_2500:
+		hw->bus.speed = fm10k_bus_speed_2500;
+		break;
+	case FM10K_PCIE_LINK_SPEED_5000:
+		hw->bus.speed = fm10k_bus_speed_5000;
+		break;
+	case FM10K_PCIE_LINK_SPEED_8000:
+		hw->bus.speed = fm10k_bus_speed_8000;
+		break;
+	default:
+		hw->bus.speed = fm10k_bus_speed_unknown;
+		break;
+	}
+
+	/* Get the negotiated PCIe maximum payload size for the PCIe function */
+	device_control = fm10k_read_pci_cfg_word(hw, FM10K_PCIE_DEV_CTRL);
+
+	switch (device_control & FM10K_PCIE_DEV_CTRL_PAYLOAD) {
+	case FM10K_PCIE_DEV_CTRL_PAYLOAD_128:
+		hw->bus.payload = fm10k_bus_payload_128;
+		break;
+	case FM10K_PCIE_DEV_CTRL_PAYLOAD_256:
+		hw->bus.payload = fm10k_bus_payload_256;
+		break;
+	case FM10K_PCIE_DEV_CTRL_PAYLOAD_512:
+		hw->bus.payload = fm10k_bus_payload_512;
+		break;
+	default:
+		hw->bus.payload = fm10k_bus_payload_unknown;
+		break;
+	}
+
+	return 0;
+}
+
+static u16 fm10k_get_pcie_msix_count_generic(struct fm10k_hw *hw)
+{
+	u16 msix_count;
+
+	/* read in value from MSI-X capability register */
+	msix_count = fm10k_read_pci_cfg_word(hw, FM10K_PCI_MSIX_MSG_CTRL);
+	msix_count &= FM10K_PCI_MSIX_MSG_CTRL_TBL_SZ_MASK;
+
+	/* MSI-X count is zero-based in HW */
+	msix_count++;
+
+	if (msix_count > FM10K_MAX_MSIX_VECTORS)
+		msix_count = FM10K_MAX_MSIX_VECTORS;
+
+	return msix_count;
+}
+
+/**
+ *  fm10k_get_invariants_generic - Inits constant values
+ *  @hw: pointer to the hardware structure
+ *
+ *  Initialize the common invariants for the device.
+ **/
+s32 fm10k_get_invariants_generic(struct fm10k_hw *hw)
+{
+	struct fm10k_mac_info *mac = &hw->mac;
+
+	/* initialize GLORT state to avoid any false hits */
+	mac->dglort_map = FM10K_DGLORTMAP_NONE;
+
+	/* record maximum number of MSI-X vectors */
+	mac->max_msix_vectors = fm10k_get_pcie_msix_count_generic(hw);
+
+	return 0;
+}
+
+/**
+ *  fm10k_start_hw_generic - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  This function sets the Tx ready flag to indicate that the Tx path has
+ *  been initialized.
+ **/
+s32 fm10k_start_hw_generic(struct fm10k_hw *hw)
+{
+	/* set flag indicating we are beginning Tx */
+	hw->mac.tx_ready = true;
+
+	return 0;
+}
+
+/**
+ *  fm10k_disable_queues_generic - Stop Tx/Rx queues
+ *  @hw: pointer to hardware structure
+ *  @q_cnt: number of queues to be disabled
+ *
+ **/
+s32 fm10k_disable_queues_generic(struct fm10k_hw *hw, u16 q_cnt)
+{
+	u32 reg;
+	u16 i, time;
+
+	/* clear tx_ready to prevent any false hits for reset */
+	hw->mac.tx_ready = false;
+
+	if (FM10K_REMOVED(hw->hw_addr))
+		return 0;
+
+	/* clear the enable bit for all rings */
+	for (i = 0; i < q_cnt; i++) {
+		reg = fm10k_read_reg(hw, FM10K_TXDCTL(i));
+		fm10k_write_reg(hw, FM10K_TXDCTL(i),
+				reg & ~FM10K_TXDCTL_ENABLE);
+		reg = fm10k_read_reg(hw, FM10K_RXQCTL(i));
+		fm10k_write_reg(hw, FM10K_RXQCTL(i),
+				reg & ~FM10K_RXQCTL_ENABLE);
+	}
+
+	fm10k_write_flush(hw);
+	udelay(1);
+
+	/* loop through all queues to verify that they are all disabled */
+	for (i = 0, time = FM10K_QUEUE_DISABLE_TIMEOUT; time;) {
+		/* if we are at end of rings all rings are disabled */
+		if (i == q_cnt)
+			return 0;
+
+		/* if queue enables cleared, then move to next ring pair */
+		reg = fm10k_read_reg(hw, FM10K_TXDCTL(i));
+		if (!~reg || !(reg & FM10K_TXDCTL_ENABLE)) {
+			reg = fm10k_read_reg(hw, FM10K_RXQCTL(i));
+			if (!~reg || !(reg & FM10K_RXQCTL_ENABLE)) {
+				i++;
+				continue;
+			}
+		}
+
+		/* decrement time and wait 1 usec */
+		time--;
+		if (time)
+			udelay(1);
+	}
+
+	return FM10K_ERR_REQUESTS_PENDING;
+}
+
+/**
+ *  fm10k_stop_hw_generic - Stop Tx/Rx units
+ *  @hw: pointer to hardware structure
+ *
+ **/
+s32 fm10k_stop_hw_generic(struct fm10k_hw *hw)
+{
+	return fm10k_disable_queues_generic(hw, hw->mac.max_queues);
+}
+
+/**
+ *  fm10k_read_hw_stats_32b - Reads value of 32-bit registers
+ *  @hw: pointer to the hardware structure
+ *  @addr: address of register containing a 32-bit value
+ *  @stat: pointer to structure holding hw stat information
+ *
+ *  Function reads the content of the register and returns the delta
+ *  between the base and the current value.
+ *  **/
+u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
+			    struct fm10k_hw_stat *stat)
+{
+	u32 delta = fm10k_read_reg(hw, addr) - stat->base_l;
+
+	if (FM10K_REMOVED(hw->hw_addr))
+		stat->base_h = 0;
+
+	return delta;
+}
+
+/**
+ *  fm10k_read_hw_stats_48b - Reads value of 48-bit registers
+ *  @hw: pointer to the hardware structure
+ *  @addr: address of register containing the lower 32-bit value
+ *  @stat: pointer to structure holding hw stat information
+ *
+ *  Function reads the content of 2 registers, combined to represent a 48-bit
+ *  statistical value. Extra processing is required to handle overflowing.
+ *  Finally, a delta value is returned representing the difference between the
+ *  values stored in registers and values stored in the statistic counters.
+ *  **/
+static u64 fm10k_read_hw_stats_48b(struct fm10k_hw *hw, u32 addr,
+				   struct fm10k_hw_stat *stat)
+{
+	u32 count_l;
+	u32 count_h;
+	u32 count_tmp;
+	u64 delta;
+
+	count_h = fm10k_read_reg(hw, addr + 1);
+
+	/* Check for overflow */
+	do {
+		count_tmp = count_h;
+		count_l = fm10k_read_reg(hw, addr);
+		count_h = fm10k_read_reg(hw, addr + 1);
+	} while (count_h != count_tmp);
+
+	delta = ((u64)(count_h - stat->base_h) << 32) + count_l;
+	delta -= stat->base_l;
+
+	return delta & FM10K_48_BIT_MASK;
+}
+
+/**
+ *  fm10k_update_hw_base_48b - Updates 48-bit statistic base value
+ *  @stat: pointer to the hardware statistic structure
+ *  @delta: value to be updated into the hardware statistic structure
+ *
+ *  Function receives a value and determines if an update is required based on
+ *  a delta calculation. Only the base value will be updated.
+ **/
+static void fm10k_update_hw_base_48b(struct fm10k_hw_stat *stat, u64 delta)
+{
+	if (!delta)
+		return;
+
+	/* update lower 32 bits */
+	delta += stat->base_l;
+	stat->base_l = (u32)delta;
+
+	/* update upper 32 bits */
+	stat->base_h += (u32)(delta >> 32);
+}
+
+/**
+ *  fm10k_update_hw_stats_tx_q - Updates TX queue statistics counters
+ *  @hw: pointer to the hardware structure
+ *  @q: pointer to the ring of hardware statistics queue
+ *  @idx: index pointing to the start of the ring iteration
+ *
+ *  Function updates the TX queue statistics counters that are related to the
+ *  hardware.
+ **/
+static void fm10k_update_hw_stats_tx_q(struct fm10k_hw *hw,
+				       struct fm10k_hw_stats_q *q,
+				       u32 idx)
+{
+	u32 id_tx, id_tx_prev, tx_packets;
+	u64 tx_bytes = 0;
+
+	/* Retrieve TX Owner Data */
+	id_tx = fm10k_read_reg(hw, FM10K_TXQCTL(idx));
+
+	/* Process TX Ring */
+	do {
+		tx_packets = fm10k_read_hw_stats_32b(hw, FM10K_QPTC(idx),
+						     &q->tx_packets);
+
+		if (tx_packets)
+			tx_bytes = fm10k_read_hw_stats_48b(hw,
+							   FM10K_QBTC_L(idx),
+							   &q->tx_bytes);
+
+		/* Re-Check Owner Data */
+		id_tx_prev = id_tx;
+		id_tx = fm10k_read_reg(hw, FM10K_TXQCTL(idx));
+	} while ((id_tx ^ id_tx_prev) & FM10K_TXQCTL_ID_MASK);
+
+	/* drop non-ID bits and set VALID ID bit */
+	id_tx &= FM10K_TXQCTL_ID_MASK;
+	id_tx |= FM10K_STAT_VALID;
+
+	/* update packet counts */
+	if (q->tx_stats_idx == id_tx) {
+		q->tx_packets.count += tx_packets;
+		q->tx_bytes.count += tx_bytes;
+	}
+
+	/* update bases and record ID */
+	fm10k_update_hw_base_32b(&q->tx_packets, tx_packets);
+	fm10k_update_hw_base_48b(&q->tx_bytes, tx_bytes);
+
+	q->tx_stats_idx = id_tx;
+}
+
+/**
+ *  fm10k_update_hw_stats_rx_q - Updates RX queue statistics counters
+ *  @hw: pointer to the hardware structure
+ *  @q: pointer to the ring of hardware statistics queue
+ *  @idx: index pointing to the start of the ring iteration
+ *
+ *  Function updates the RX queue statistics counters that are related to the
+ *  hardware.
+ **/
+static void fm10k_update_hw_stats_rx_q(struct fm10k_hw *hw,
+				       struct fm10k_hw_stats_q *q,
+				       u32 idx)
+{
+	u32 id_rx, id_rx_prev, rx_packets, rx_drops;
+	u64 rx_bytes = 0;
+
+	/* Retrieve RX Owner Data */
+	id_rx = fm10k_read_reg(hw, FM10K_RXQCTL(idx));
+
+	/* Process RX Ring */
+	do {
+		rx_drops = fm10k_read_hw_stats_32b(hw, FM10K_QPRDC(idx),
+						   &q->rx_drops);
+
+		rx_packets = fm10k_read_hw_stats_32b(hw, FM10K_QPRC(idx),
+						     &q->rx_packets);
+
+		if (rx_packets)
+			rx_bytes = fm10k_read_hw_stats_48b(hw,
+							   FM10K_QBRC_L(idx),
+							   &q->rx_bytes);
+
+		/* Re-Check Owner Data */
+		id_rx_prev = id_rx;
+		id_rx = fm10k_read_reg(hw, FM10K_RXQCTL(idx));
+	} while ((id_rx ^ id_rx_prev) & FM10K_RXQCTL_ID_MASK);
+
+	/* drop non-ID bits and set VALID ID bit */
+	id_rx &= FM10K_RXQCTL_ID_MASK;
+	id_rx |= FM10K_STAT_VALID;
+
+	/* update packet counts */
+	if (q->rx_stats_idx == id_rx) {
+		q->rx_drops.count += rx_drops;
+		q->rx_packets.count += rx_packets;
+		q->rx_bytes.count += rx_bytes;
+	}
+
+	/* update bases and record ID */
+	fm10k_update_hw_base_32b(&q->rx_drops, rx_drops);
+	fm10k_update_hw_base_32b(&q->rx_packets, rx_packets);
+	fm10k_update_hw_base_48b(&q->rx_bytes, rx_bytes);
+
+	q->rx_stats_idx = id_rx;
+}
+
+/**
+ *  fm10k_update_hw_stats_q - Updates queue statistics counters
+ *  @hw: pointer to the hardware structure
+ *  @q: pointer to the ring of hardware statistics queue
+ *  @idx: index pointing to the start of the ring iteration
+ *  @count: number of queues to iterate over
+ *
+ *  Function updates the queue statistics counters that are related to the
+ *  hardware.
+ **/
+void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
+			     u32 idx, u32 count)
+{
+	u32 i;
+
+	for (i = 0; i < count; i++, idx++, q++) {
+		fm10k_update_hw_stats_tx_q(hw, q, idx);
+		fm10k_update_hw_stats_rx_q(hw, q, idx);
+	}
+}
+
+/**
+ *  fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues
+ *  @q: pointer to the ring of hardware statistics queue
+ *  @idx: index pointing to the start of the ring iteration
+ *  @count: number of queues to iterate over
+ *
+ *  Function invalidates the index values for the queues so any updates that
+ *  may have happened are ignored and the base for the queue stats is reset.
+ **/
+void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx, u32 count)
+{
+	u32 i;
+
+	for (i = 0; i < count; i++, idx++, q++) {
+		q->rx_stats_idx = 0;
+		q->tx_stats_idx = 0;
+	}
+}
+
+/**
+ *  fm10k_get_host_state_generic - Returns the state of the host
+ *  @hw: pointer to hardware structure
+ *  @host_ready: pointer to boolean value that will record host state
+ *
+ *  This function will check the health of the mailbox and Tx queue 0
+ *  in order to determine if we should report that the link is up or not.
+ **/
+s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	struct fm10k_mac_info *mac = &hw->mac;
+	s32 ret_val = 0;
+	u32 txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(0));
+
+	/* process upstream mailbox in case interrupts were disabled */
+	mbx->ops.process(hw, mbx);
+
+	/* If Tx is no longer enabled link should come down */
+	if (!(~txdctl) || !(txdctl & FM10K_TXDCTL_ENABLE))
+		mac->get_host_state = true;
+
+	/* exit if not checking for link, or link cannot be changed */
+	if (!mac->get_host_state || !(~txdctl))
+		goto out;
+
+	/* if we somehow dropped the Tx enable we should reset */
+	if (mac->tx_ready && !(txdctl & FM10K_TXDCTL_ENABLE)) {
+		ret_val = FM10K_ERR_RESET_REQUESTED;
+		goto out;
+	}
+
+	/* if Mailbox timed out we should request reset */
+	if (!mbx->timeout) {
+		ret_val = FM10K_ERR_RESET_REQUESTED;
+		goto out;
+	}
+
+	/* verify Mailbox is still open */
+	if (mbx->state != FM10K_STATE_OPEN)
+		goto out;
+
+	/* interface cannot receive traffic without logical ports */
+	if (mac->dglort_map == FM10K_DGLORTMAP_NONE) {
+		if (mac->ops.request_lport_map)
+			ret_val = mac->ops.request_lport_map(hw);
+
+		goto out;
+	}
+
+	/* if we passed all the tests above then the switch is ready and we no
+	 * longer need to check for link
+	 */
+	mac->get_host_state = false;
+
+out:
+	*host_ready = !mac->get_host_state;
+	return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
new file mode 100644
index 0000000000..4c48fb73b3
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _FM10K_COMMON_H_
+#define _FM10K_COMMON_H_
+
+#include "fm10k_type.h"
+
+#define FM10K_REMOVED(hw_addr) unlikely(!(hw_addr))
+
+/* PCI configuration read */
+u16 fm10k_read_pci_cfg_word(struct fm10k_hw *hw, u32 reg);
+
+/* read operations, indexed using DWORDS */
+u32 fm10k_read_reg(struct fm10k_hw *hw, int reg);
+
+/* write operations, indexed using DWORDS */
+#define fm10k_write_reg(hw, reg, val) \
+do { \
+	u32 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+	if (!FM10K_REMOVED(hw_addr)) \
+		writel((val), &hw_addr[(reg)]); \
+} while (0)
+
+/* Switch register write operations, index using DWORDS */
+#define fm10k_write_sw_reg(hw, reg, val) \
+do { \
+	u32 __iomem *sw_addr = READ_ONCE((hw)->sw_addr); \
+	if (!FM10K_REMOVED(sw_addr)) \
+		writel((val), &sw_addr[(reg)]); \
+} while (0)
+
+/* read ctrl register which has no clear on read fields as PCIe flush */
+#define fm10k_write_flush(hw) fm10k_read_reg((hw), FM10K_CTRL)
+s32 fm10k_get_bus_info_generic(struct fm10k_hw *hw);
+s32 fm10k_get_invariants_generic(struct fm10k_hw *hw);
+s32 fm10k_disable_queues_generic(struct fm10k_hw *hw, u16 q_cnt);
+s32 fm10k_start_hw_generic(struct fm10k_hw *hw);
+s32 fm10k_stop_hw_generic(struct fm10k_hw *hw);
+u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr,
+			    struct fm10k_hw_stat *stat);
+#define fm10k_update_hw_base_32b(stat, delta) ((stat)->base_l += (delta))
+void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q,
+			     u32 idx, u32 count);
+#define fm10k_unbind_hw_stats_32b(s) ((s)->base_h = 0)
+void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx, u32 count);
+s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready);
+#endif /* _FM10K_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
new file mode 100644
index 0000000000..86397c564d
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_dcbnl.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include "fm10k.h"
+
+/**
+ * fm10k_dcbnl_ieee_getets - get the ETS configuration for the device
+ * @dev: netdev interface for the device
+ * @ets: ETS structure to push configuration to
+ **/
+static int fm10k_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets)
+{
+	int i;
+
+	/* we support 8 TCs in all modes */
+	ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
+	ets->cbs = 0;
+
+	/* we only support strict priority and cannot do traffic shaping */
+	memset(ets->tc_tx_bw, 0, sizeof(ets->tc_tx_bw));
+	memset(ets->tc_rx_bw, 0, sizeof(ets->tc_rx_bw));
+	memset(ets->tc_tsa, IEEE_8021QAZ_TSA_STRICT, sizeof(ets->tc_tsa));
+
+	/* populate the prio map based on the netdev */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		ets->prio_tc[i] = netdev_get_prio_tc_map(dev, i);
+
+	return 0;
+}
+
+/**
+ * fm10k_dcbnl_ieee_setets - set the ETS configuration for the device
+ * @dev: netdev interface for the device
+ * @ets: ETS structure to pull configuration from
+ **/
+static int fm10k_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets)
+{
+	u8 num_tc = 0;
+	int i;
+
+	/* verify type and determine num_tcs needed */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (ets->tc_tx_bw[i] || ets->tc_rx_bw[i])
+			return -EINVAL;
+		if (ets->tc_tsa[i] != IEEE_8021QAZ_TSA_STRICT)
+			return -EINVAL;
+		if (ets->prio_tc[i] > num_tc)
+			num_tc = ets->prio_tc[i];
+	}
+
+	/* if requested TC is greater than 0 then num_tcs is max + 1 */
+	if (num_tc)
+		num_tc++;
+
+	if (num_tc > IEEE_8021QAZ_MAX_TCS)
+		return -EINVAL;
+
+	/* update TC hardware mapping if necessary */
+	if (num_tc != netdev_get_num_tc(dev)) {
+		int err = fm10k_setup_tc(dev, num_tc);
+		if (err)
+			return err;
+	}
+
+	/* update priority mapping */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		netdev_set_prio_tc_map(dev, i, ets->prio_tc[i]);
+
+	return 0;
+}
+
+/**
+ * fm10k_dcbnl_ieee_getpfc - get the PFC configuration for the device
+ * @dev: netdev interface for the device
+ * @pfc: PFC structure to push configuration to
+ **/
+static int fm10k_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+
+	/* record flow control max count and state of TCs */
+	pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+	pfc->pfc_en = interface->pfc_en;
+
+	return 0;
+}
+
+/**
+ * fm10k_dcbnl_ieee_setpfc - set the PFC configuration for the device
+ * @dev: netdev interface for the device
+ * @pfc: PFC structure to pull configuration from
+ **/
+static int fm10k_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+
+	/* record PFC configuration to interface */
+	interface->pfc_en = pfc->pfc_en;
+
+	/* if we are running update the drop_en state for all queues */
+	if (netif_running(dev))
+		fm10k_update_rx_drop_en(interface);
+
+	return 0;
+}
+
+/**
+ * fm10k_dcbnl_getdcbx - get the DCBX configuration for the device
+ * @dev: netdev interface for the device
+ *
+ * Returns that we support only IEEE DCB for this interface
+ **/
+static u8 fm10k_dcbnl_getdcbx(struct net_device __always_unused *dev)
+{
+	return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+}
+
+/**
+ * fm10k_dcbnl_setdcbx - get the DCBX configuration for the device
+ * @dev: netdev interface for the device
+ * @mode: new mode for this device
+ *
+ * Returns error on attempt to enable anything but IEEE DCB for this interface
+ **/
+static u8 fm10k_dcbnl_setdcbx(struct net_device __always_unused *dev, u8 mode)
+{
+	return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0;
+}
+
+static const struct dcbnl_rtnl_ops fm10k_dcbnl_ops = {
+	.ieee_getets	= fm10k_dcbnl_ieee_getets,
+	.ieee_setets	= fm10k_dcbnl_ieee_setets,
+	.ieee_getpfc	= fm10k_dcbnl_ieee_getpfc,
+	.ieee_setpfc	= fm10k_dcbnl_ieee_setpfc,
+
+	.getdcbx	= fm10k_dcbnl_getdcbx,
+	.setdcbx	= fm10k_dcbnl_setdcbx,
+};
+
+/**
+ * fm10k_dcbnl_set_ops - Configures dcbnl ops pointer for netdev
+ * @dev: netdev interface for the device
+ *
+ * Enables PF for DCB by assigning DCBNL ops pointer.
+ **/
+void fm10k_dcbnl_set_ops(struct net_device *dev)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_hw *hw = &interface->hw;
+
+	if (hw->mac.type == fm10k_mac_pf)
+		dev->dcbnl_ops = &fm10k_dcbnl_ops;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
new file mode 100644
index 0000000000..5c77054d67
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "fm10k.h"
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+static struct dentry *dbg_root;
+
+/* Descriptor Seq Functions */
+
+static void *fm10k_dbg_desc_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct fm10k_ring *ring = s->private;
+
+	return (*pos < ring->count) ? pos : NULL;
+}
+
+static void *fm10k_dbg_desc_seq_next(struct seq_file *s,
+				     void __always_unused *v,
+				     loff_t *pos)
+{
+	struct fm10k_ring *ring = s->private;
+
+	return (++(*pos) < ring->count) ? pos : NULL;
+}
+
+static void fm10k_dbg_desc_seq_stop(struct seq_file __always_unused *s,
+				    void __always_unused *v)
+{
+	/* Do nothing. */
+}
+
+static void fm10k_dbg_desc_break(struct seq_file *s, int i)
+{
+	while (i--)
+		seq_putc(s, '-');
+
+	seq_putc(s, '\n');
+}
+
+static int fm10k_dbg_tx_desc_seq_show(struct seq_file *s, void *v)
+{
+	struct fm10k_ring *ring = s->private;
+	int i = *(loff_t *)v;
+	static const char tx_desc_hdr[] =
+		"DES BUFFER_ADDRESS     LENGTH VLAN   MSS    HDRLEN FLAGS\n";
+
+	/* Generate header */
+	if (!i) {
+		seq_printf(s, tx_desc_hdr);
+		fm10k_dbg_desc_break(s, sizeof(tx_desc_hdr) - 1);
+	}
+
+	/* Validate descriptor allocation */
+	if (!ring->desc) {
+		seq_printf(s, "%03X Descriptor ring not allocated.\n", i);
+	} else {
+		struct fm10k_tx_desc *txd = FM10K_TX_DESC(ring, i);
+
+		seq_printf(s, "%03X %#018llx %#06x %#06x %#06x %#06x %#04x\n",
+			   i, txd->buffer_addr, txd->buflen, txd->vlan,
+			   txd->mss, txd->hdrlen, txd->flags);
+	}
+
+	return 0;
+}
+
+static int fm10k_dbg_rx_desc_seq_show(struct seq_file *s, void *v)
+{
+	struct fm10k_ring *ring = s->private;
+	int i = *(loff_t *)v;
+	static const char rx_desc_hdr[] =
+	"DES DATA       RSS        STATERR    LENGTH VLAN   DGLORT SGLORT TIMESTAMP\n";
+
+	/* Generate header */
+	if (!i) {
+		seq_printf(s, rx_desc_hdr);
+		fm10k_dbg_desc_break(s, sizeof(rx_desc_hdr) - 1);
+	}
+
+	/* Validate descriptor allocation */
+	if (!ring->desc) {
+		seq_printf(s, "%03X Descriptor ring not allocated.\n", i);
+	} else {
+		union fm10k_rx_desc *rxd = FM10K_RX_DESC(ring, i);
+
+		seq_printf(s,
+			   "%03X %#010x %#010x %#010x %#06x %#06x %#06x %#06x %#018llx\n",
+			   i, rxd->d.data, rxd->d.rss, rxd->d.staterr,
+			   rxd->w.length, rxd->w.vlan, rxd->w.dglort,
+			   rxd->w.sglort, rxd->q.timestamp);
+	}
+
+	return 0;
+}
+
+static const struct seq_operations fm10k_dbg_tx_desc_seq_ops = {
+	.start = fm10k_dbg_desc_seq_start,
+	.next  = fm10k_dbg_desc_seq_next,
+	.stop  = fm10k_dbg_desc_seq_stop,
+	.show  = fm10k_dbg_tx_desc_seq_show,
+};
+
+static const struct seq_operations fm10k_dbg_rx_desc_seq_ops = {
+	.start = fm10k_dbg_desc_seq_start,
+	.next  = fm10k_dbg_desc_seq_next,
+	.stop  = fm10k_dbg_desc_seq_stop,
+	.show  = fm10k_dbg_rx_desc_seq_show,
+};
+
+static int fm10k_dbg_desc_open(struct inode *inode, struct file *filep)
+{
+	struct fm10k_ring *ring = inode->i_private;
+	struct fm10k_q_vector *q_vector = ring->q_vector;
+	const struct seq_operations *desc_seq_ops;
+	int err;
+
+	if (ring < q_vector->rx.ring)
+		desc_seq_ops = &fm10k_dbg_tx_desc_seq_ops;
+	else
+		desc_seq_ops = &fm10k_dbg_rx_desc_seq_ops;
+
+	err = seq_open(filep, desc_seq_ops);
+	if (err)
+		return err;
+
+	((struct seq_file *)filep->private_data)->private = ring;
+
+	return 0;
+}
+
+static const struct file_operations fm10k_dbg_desc_fops = {
+	.owner   = THIS_MODULE,
+	.open    = fm10k_dbg_desc_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release,
+};
+
+/**
+ * fm10k_dbg_q_vector_init - setup debugfs for the q_vectors
+ * @q_vector: q_vector to allocate directories for
+ *
+ * A folder is created for each q_vector found. In each q_vector
+ * folder, a debugfs file is created for each tx and rx ring
+ * allocated to the q_vector.
+ **/
+void fm10k_dbg_q_vector_init(struct fm10k_q_vector *q_vector)
+{
+	struct fm10k_intfc *interface = q_vector->interface;
+	char name[16];
+	int i;
+
+	if (!interface->dbg_intfc)
+		return;
+
+	/* Generate a folder for each q_vector */
+	snprintf(name, sizeof(name), "q_vector.%03d", q_vector->v_idx);
+
+	q_vector->dbg_q_vector = debugfs_create_dir(name, interface->dbg_intfc);
+
+	/* Generate a file for each rx ring in the q_vector */
+	for (i = 0; i < q_vector->tx.count; i++) {
+		struct fm10k_ring *ring = &q_vector->tx.ring[i];
+
+		snprintf(name, sizeof(name), "tx_ring.%03d", ring->queue_index);
+
+		debugfs_create_file(name, 0600,
+				    q_vector->dbg_q_vector, ring,
+				    &fm10k_dbg_desc_fops);
+	}
+
+	/* Generate a file for each rx ring in the q_vector */
+	for (i = 0; i < q_vector->rx.count; i++) {
+		struct fm10k_ring *ring = &q_vector->rx.ring[i];
+
+		snprintf(name, sizeof(name), "rx_ring.%03d", ring->queue_index);
+
+		debugfs_create_file(name, 0600,
+				    q_vector->dbg_q_vector, ring,
+				    &fm10k_dbg_desc_fops);
+	}
+}
+
+/**
+ * fm10k_dbg_q_vector_exit - setup debugfs for the q_vectors
+ * @q_vector: q_vector to allocate directories for
+ **/
+void fm10k_dbg_q_vector_exit(struct fm10k_q_vector *q_vector)
+{
+	struct fm10k_intfc *interface = q_vector->interface;
+
+	if (interface->dbg_intfc)
+		debugfs_remove_recursive(q_vector->dbg_q_vector);
+	q_vector->dbg_q_vector = NULL;
+}
+
+/**
+ * fm10k_dbg_intfc_init - setup the debugfs directory for the intferface
+ * @interface: the interface that is starting up
+ **/
+
+void fm10k_dbg_intfc_init(struct fm10k_intfc *interface)
+{
+	const char *name = pci_name(interface->pdev);
+
+	if (dbg_root)
+		interface->dbg_intfc = debugfs_create_dir(name, dbg_root);
+}
+
+/**
+ * fm10k_dbg_intfc_exit - clean out the interface's debugfs entries
+ * @interface: the interface that is stopping
+ **/
+void fm10k_dbg_intfc_exit(struct fm10k_intfc *interface)
+{
+	if (dbg_root)
+		debugfs_remove_recursive(interface->dbg_intfc);
+	interface->dbg_intfc = NULL;
+}
+
+/**
+ * fm10k_dbg_init - start up debugfs for the driver
+ **/
+void fm10k_dbg_init(void)
+{
+	dbg_root = debugfs_create_dir(fm10k_driver_name, NULL);
+}
+
+/**
+ * fm10k_dbg_exit - clean out the driver's debugfs entries
+ **/
+void fm10k_dbg_exit(void)
+{
+	debugfs_remove_recursive(dbg_root);
+	dbg_root = NULL;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
new file mode 100644
index 0000000000..d53369e300
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -0,0 +1,1195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+
+#include "fm10k.h"
+
+struct fm10k_stats {
+	/* The stat_string is expected to be a format string formatted using
+	 * vsnprintf by fm10k_add_stat_strings. Every member of a stats array
+	 * should use the same format specifiers as they will be formatted
+	 * using the same variadic arguments.
+	 */
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define FM10K_STAT_FIELDS(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+/* netdevice statistics */
+#define FM10K_NETDEV_STAT(_net_stat) \
+	FM10K_STAT_FIELDS(struct net_device_stats, __stringify(_net_stat), \
+			  _net_stat)
+
+static const struct fm10k_stats fm10k_gstrings_net_stats[] = {
+	FM10K_NETDEV_STAT(tx_packets),
+	FM10K_NETDEV_STAT(tx_bytes),
+	FM10K_NETDEV_STAT(tx_errors),
+	FM10K_NETDEV_STAT(rx_packets),
+	FM10K_NETDEV_STAT(rx_bytes),
+	FM10K_NETDEV_STAT(rx_errors),
+	FM10K_NETDEV_STAT(rx_dropped),
+
+	/* detailed Rx errors */
+	FM10K_NETDEV_STAT(rx_length_errors),
+	FM10K_NETDEV_STAT(rx_crc_errors),
+	FM10K_NETDEV_STAT(rx_fifo_errors),
+};
+
+#define FM10K_NETDEV_STATS_LEN	ARRAY_SIZE(fm10k_gstrings_net_stats)
+
+/* General interface statistics */
+#define FM10K_STAT(_name, _stat) \
+	FM10K_STAT_FIELDS(struct fm10k_intfc, _name, _stat)
+
+static const struct fm10k_stats fm10k_gstrings_global_stats[] = {
+	FM10K_STAT("tx_restart_queue", restart_queue),
+	FM10K_STAT("tx_busy", tx_busy),
+	FM10K_STAT("tx_csum_errors", tx_csum_errors),
+	FM10K_STAT("rx_alloc_failed", alloc_failed),
+	FM10K_STAT("rx_csum_errors", rx_csum_errors),
+
+	FM10K_STAT("tx_packets_nic", tx_packets_nic),
+	FM10K_STAT("tx_bytes_nic", tx_bytes_nic),
+	FM10K_STAT("rx_packets_nic", rx_packets_nic),
+	FM10K_STAT("rx_bytes_nic", rx_bytes_nic),
+	FM10K_STAT("rx_drops_nic", rx_drops_nic),
+	FM10K_STAT("rx_overrun_pf", rx_overrun_pf),
+	FM10K_STAT("rx_overrun_vf", rx_overrun_vf),
+
+	FM10K_STAT("swapi_status", hw.swapi.status),
+	FM10K_STAT("mac_rules_used", hw.swapi.mac.used),
+	FM10K_STAT("mac_rules_avail", hw.swapi.mac.avail),
+
+	FM10K_STAT("reset_while_pending", hw.mac.reset_while_pending),
+
+	FM10K_STAT("tx_hang_count", tx_timeout_count),
+};
+
+static const struct fm10k_stats fm10k_gstrings_pf_stats[] = {
+	FM10K_STAT("timeout", stats.timeout.count),
+	FM10K_STAT("ur", stats.ur.count),
+	FM10K_STAT("ca", stats.ca.count),
+	FM10K_STAT("um", stats.um.count),
+	FM10K_STAT("xec", stats.xec.count),
+	FM10K_STAT("vlan_drop", stats.vlan_drop.count),
+	FM10K_STAT("loopback_drop", stats.loopback_drop.count),
+	FM10K_STAT("nodesc_drop", stats.nodesc_drop.count),
+};
+
+/* mailbox statistics */
+#define FM10K_MBX_STAT(_name, _stat) \
+	FM10K_STAT_FIELDS(struct fm10k_mbx_info, _name, _stat)
+
+static const struct fm10k_stats fm10k_gstrings_mbx_stats[] = {
+	FM10K_MBX_STAT("mbx_tx_busy", tx_busy),
+	FM10K_MBX_STAT("mbx_tx_dropped", tx_dropped),
+	FM10K_MBX_STAT("mbx_tx_messages", tx_messages),
+	FM10K_MBX_STAT("mbx_tx_dwords", tx_dwords),
+	FM10K_MBX_STAT("mbx_tx_mbmem_pulled", tx_mbmem_pulled),
+	FM10K_MBX_STAT("mbx_rx_messages", rx_messages),
+	FM10K_MBX_STAT("mbx_rx_dwords", rx_dwords),
+	FM10K_MBX_STAT("mbx_rx_parse_err", rx_parse_err),
+	FM10K_MBX_STAT("mbx_rx_mbmem_pushed", rx_mbmem_pushed),
+};
+
+/* per-queue ring statistics */
+#define FM10K_QUEUE_STAT(_name, _stat) \
+	FM10K_STAT_FIELDS(struct fm10k_ring, _name, _stat)
+
+static const struct fm10k_stats fm10k_gstrings_queue_stats[] = {
+	FM10K_QUEUE_STAT("%s_queue_%u_packets", stats.packets),
+	FM10K_QUEUE_STAT("%s_queue_%u_bytes", stats.bytes),
+};
+
+#define FM10K_GLOBAL_STATS_LEN ARRAY_SIZE(fm10k_gstrings_global_stats)
+#define FM10K_PF_STATS_LEN ARRAY_SIZE(fm10k_gstrings_pf_stats)
+#define FM10K_MBX_STATS_LEN ARRAY_SIZE(fm10k_gstrings_mbx_stats)
+#define FM10K_QUEUE_STATS_LEN ARRAY_SIZE(fm10k_gstrings_queue_stats)
+
+#define FM10K_STATIC_STATS_LEN (FM10K_GLOBAL_STATS_LEN + \
+				FM10K_NETDEV_STATS_LEN + \
+				FM10K_MBX_STATS_LEN)
+
+static const char fm10k_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Mailbox test (on/offline)"
+};
+
+#define FM10K_TEST_LEN (sizeof(fm10k_gstrings_test) / ETH_GSTRING_LEN)
+
+enum fm10k_self_test_types {
+	FM10K_TEST_MBX,
+	FM10K_TEST_MAX = FM10K_TEST_LEN
+};
+
+enum {
+	FM10K_PRV_FLAG_LEN,
+};
+
+static const char fm10k_prv_flags[FM10K_PRV_FLAG_LEN][ETH_GSTRING_LEN] = {
+};
+
+static void __fm10k_add_stat_strings(u8 **p, const struct fm10k_stats stats[],
+				     const unsigned int size, ...)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		va_list args;
+
+		va_start(args, size);
+		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
+		*p += ETH_GSTRING_LEN;
+		va_end(args);
+	}
+}
+
+#define fm10k_add_stat_strings(p, stats, ...) \
+	__fm10k_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
+
+static void fm10k_get_stat_strings(struct net_device *dev, u8 *data)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	unsigned int i;
+
+	fm10k_add_stat_strings(&data, fm10k_gstrings_net_stats);
+
+	fm10k_add_stat_strings(&data, fm10k_gstrings_global_stats);
+
+	fm10k_add_stat_strings(&data, fm10k_gstrings_mbx_stats);
+
+	if (interface->hw.mac.type != fm10k_mac_vf)
+		fm10k_add_stat_strings(&data, fm10k_gstrings_pf_stats);
+
+	for (i = 0; i < interface->hw.mac.max_queues; i++) {
+		fm10k_add_stat_strings(&data, fm10k_gstrings_queue_stats,
+				       "tx", i);
+
+		fm10k_add_stat_strings(&data, fm10k_gstrings_queue_stats,
+				       "rx", i);
+	}
+}
+
+static void fm10k_get_strings(struct net_device *dev,
+			      u32 stringset, u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, fm10k_gstrings_test,
+		       FM10K_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_STATS:
+		fm10k_get_stat_strings(dev, data);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, fm10k_prv_flags,
+		       FM10K_PRV_FLAG_LEN * ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static int fm10k_get_sset_count(struct net_device *dev, int sset)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_hw *hw = &interface->hw;
+	int stats_len = FM10K_STATIC_STATS_LEN;
+
+	switch (sset) {
+	case ETH_SS_TEST:
+		return FM10K_TEST_LEN;
+	case ETH_SS_STATS:
+		stats_len += hw->mac.max_queues * 2 * FM10K_QUEUE_STATS_LEN;
+
+		if (hw->mac.type != fm10k_mac_vf)
+			stats_len += FM10K_PF_STATS_LEN;
+
+		return stats_len;
+	case ETH_SS_PRIV_FLAGS:
+		return FM10K_PRV_FLAG_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void __fm10k_add_ethtool_stats(u64 **data, void *pointer,
+				      const struct fm10k_stats stats[],
+				      const unsigned int size)
+{
+	unsigned int i;
+
+	if (!pointer) {
+		/* memory is not zero allocated so we have to clear it */
+		for (i = 0; i < size; i++)
+			*((*data)++) = 0;
+		return;
+	}
+
+	for (i = 0; i < size; i++) {
+		char *p = (char *)pointer + stats[i].stat_offset;
+
+		switch (stats[i].sizeof_stat) {
+		case sizeof(u64):
+			*((*data)++) = *(u64 *)p;
+			break;
+		case sizeof(u32):
+			*((*data)++) = *(u32 *)p;
+			break;
+		case sizeof(u16):
+			*((*data)++) = *(u16 *)p;
+			break;
+		case sizeof(u8):
+			*((*data)++) = *(u8 *)p;
+			break;
+		default:
+			WARN_ONCE(1, "unexpected stat size for %s",
+				  stats[i].stat_string);
+			*((*data)++) = 0;
+		}
+	}
+}
+
+#define fm10k_add_ethtool_stats(data, pointer, stats) \
+	__fm10k_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
+static void fm10k_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats __always_unused *stats,
+				    u64 *data)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct net_device_stats *net_stats = &netdev->stats;
+	int i;
+
+	fm10k_update_stats(interface);
+
+	fm10k_add_ethtool_stats(&data, net_stats, fm10k_gstrings_net_stats);
+
+	fm10k_add_ethtool_stats(&data, interface, fm10k_gstrings_global_stats);
+
+	fm10k_add_ethtool_stats(&data, &interface->hw.mbx,
+				fm10k_gstrings_mbx_stats);
+
+	if (interface->hw.mac.type != fm10k_mac_vf) {
+		fm10k_add_ethtool_stats(&data, interface,
+					fm10k_gstrings_pf_stats);
+	}
+
+	for (i = 0; i < interface->hw.mac.max_queues; i++) {
+		struct fm10k_ring *ring;
+
+		ring = interface->tx_ring[i];
+		fm10k_add_ethtool_stats(&data, ring,
+					fm10k_gstrings_queue_stats);
+
+		ring = interface->rx_ring[i];
+		fm10k_add_ethtool_stats(&data, ring,
+					fm10k_gstrings_queue_stats);
+	}
+}
+
+/* If function below adds more registers this define needs to be updated */
+#define FM10K_REGS_LEN_Q 29
+
+static void fm10k_get_reg_q(struct fm10k_hw *hw, u32 *buff, int i)
+{
+	int idx = 0;
+
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RDBAL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RDBAH(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RDLEN(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TPH_RXCTRL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RDH(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RDT(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RXQCTL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RXDCTL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_RXINT(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_SRRCTL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_QPRC(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_QPRDC(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_QBRC_L(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_QBRC_H(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TDBAL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TDBAH(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TDLEN(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TPH_TXCTRL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TDH(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TDT(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TXDCTL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TXQCTL(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TXINT(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_QPTC(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_QBTC_L(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_QBTC_H(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TQDLOC(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_TX_SGLORT(i));
+	buff[idx++] = fm10k_read_reg(hw, FM10K_PFVTCTL(i));
+
+	BUG_ON(idx != FM10K_REGS_LEN_Q);
+}
+
+/* If function above adds more registers this define needs to be updated */
+#define FM10K_REGS_LEN_VSI 43
+
+static void fm10k_get_reg_vsi(struct fm10k_hw *hw, u32 *buff, int i)
+{
+	int idx = 0, j;
+
+	buff[idx++] = fm10k_read_reg(hw, FM10K_MRQC(i));
+	for (j = 0; j < 10; j++)
+		buff[idx++] = fm10k_read_reg(hw, FM10K_RSSRK(i, j));
+	for (j = 0; j < 32; j++)
+		buff[idx++] = fm10k_read_reg(hw, FM10K_RETA(i, j));
+
+	BUG_ON(idx != FM10K_REGS_LEN_VSI);
+}
+
+static void fm10k_get_regs(struct net_device *netdev,
+			   struct ethtool_regs *regs, void *p)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_hw *hw = &interface->hw;
+	u32 *buff = p;
+	u16 i;
+
+	regs->version = BIT(24) | (hw->revision_id << 16) | hw->device_id;
+
+	switch (hw->mac.type) {
+	case fm10k_mac_pf:
+		/* General PF Registers */
+		*(buff++) = fm10k_read_reg(hw, FM10K_CTRL);
+		*(buff++) = fm10k_read_reg(hw, FM10K_CTRL_EXT);
+		*(buff++) = fm10k_read_reg(hw, FM10K_GCR);
+		*(buff++) = fm10k_read_reg(hw, FM10K_GCR_EXT);
+
+		for (i = 0; i < 8; i++) {
+			*(buff++) = fm10k_read_reg(hw, FM10K_DGLORTMAP(i));
+			*(buff++) = fm10k_read_reg(hw, FM10K_DGLORTDEC(i));
+		}
+
+		for (i = 0; i < 65; i++) {
+			fm10k_get_reg_vsi(hw, buff, i);
+			buff += FM10K_REGS_LEN_VSI;
+		}
+
+		*(buff++) = fm10k_read_reg(hw, FM10K_DMA_CTRL);
+		*(buff++) = fm10k_read_reg(hw, FM10K_DMA_CTRL2);
+
+		for (i = 0; i < FM10K_MAX_QUEUES_PF; i++) {
+			fm10k_get_reg_q(hw, buff, i);
+			buff += FM10K_REGS_LEN_Q;
+		}
+
+		*(buff++) = fm10k_read_reg(hw, FM10K_TPH_CTRL);
+
+		for (i = 0; i < 8; i++)
+			*(buff++) = fm10k_read_reg(hw, FM10K_INT_MAP(i));
+
+		/* Interrupt Throttling Registers */
+		for (i = 0; i < 130; i++)
+			*(buff++) = fm10k_read_reg(hw, FM10K_ITR(i));
+
+		break;
+	case fm10k_mac_vf:
+		/* General VF registers */
+		*(buff++) = fm10k_read_reg(hw, FM10K_VFCTRL);
+		*(buff++) = fm10k_read_reg(hw, FM10K_VFINT_MAP);
+		*(buff++) = fm10k_read_reg(hw, FM10K_VFSYSTIME);
+
+		/* Interrupt Throttling Registers */
+		for (i = 0; i < 8; i++)
+			*(buff++) = fm10k_read_reg(hw, FM10K_VFITR(i));
+
+		fm10k_get_reg_vsi(hw, buff, 0);
+		buff += FM10K_REGS_LEN_VSI;
+
+		for (i = 0; i < FM10K_MAX_QUEUES_POOL; i++) {
+			if (i < hw->mac.max_queues)
+				fm10k_get_reg_q(hw, buff, i);
+			else
+				memset(buff, 0, sizeof(u32) * FM10K_REGS_LEN_Q);
+			buff += FM10K_REGS_LEN_Q;
+		}
+
+		break;
+	default:
+		return;
+	}
+}
+
+/* If function above adds more registers these define need to be updated */
+#define FM10K_REGS_LEN_PF \
+(162 + (65 * FM10K_REGS_LEN_VSI) + (FM10K_MAX_QUEUES_PF * FM10K_REGS_LEN_Q))
+#define FM10K_REGS_LEN_VF \
+(11 + FM10K_REGS_LEN_VSI + (FM10K_MAX_QUEUES_POOL * FM10K_REGS_LEN_Q))
+
+static int fm10k_get_regs_len(struct net_device *netdev)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_hw *hw = &interface->hw;
+
+	switch (hw->mac.type) {
+	case fm10k_mac_pf:
+		return FM10K_REGS_LEN_PF * sizeof(u32);
+	case fm10k_mac_vf:
+		return FM10K_REGS_LEN_VF * sizeof(u32);
+	default:
+		return 0;
+	}
+}
+
+static void fm10k_get_drvinfo(struct net_device *dev,
+			      struct ethtool_drvinfo *info)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+
+	strncpy(info->driver, fm10k_driver_name,
+		sizeof(info->driver) - 1);
+	strncpy(info->bus_info, pci_name(interface->pdev),
+		sizeof(info->bus_info) - 1);
+}
+
+static void fm10k_get_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+
+	/* record fixed values for autoneg and tx pause */
+	pause->autoneg = 0;
+	pause->tx_pause = 1;
+
+	pause->rx_pause = interface->rx_pause ? 1 : 0;
+}
+
+static int fm10k_set_pauseparam(struct net_device *dev,
+				struct ethtool_pauseparam *pause)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_hw *hw = &interface->hw;
+
+	if (pause->autoneg || !pause->tx_pause)
+		return -EINVAL;
+
+	/* we can only support pause on the PF to avoid head-of-line blocking */
+	if (hw->mac.type == fm10k_mac_pf)
+		interface->rx_pause = pause->rx_pause ? ~0 : 0;
+	else if (pause->rx_pause)
+		return -EINVAL;
+
+	if (netif_running(dev))
+		fm10k_update_rx_drop_en(interface);
+
+	return 0;
+}
+
+static u32 fm10k_get_msglevel(struct net_device *netdev)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+
+	return interface->msg_enable;
+}
+
+static void fm10k_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+
+	interface->msg_enable = data;
+}
+
+static void fm10k_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring,
+				struct kernel_ethtool_ringparam *kernel_ring,
+				struct netlink_ext_ack *extack)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+
+	ring->rx_max_pending = FM10K_MAX_RXD;
+	ring->tx_max_pending = FM10K_MAX_TXD;
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_pending = interface->rx_ring_count;
+	ring->tx_pending = interface->tx_ring_count;
+	ring->rx_mini_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static int fm10k_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_ring *temp_ring;
+	int i, err = 0;
+	u32 new_rx_count, new_tx_count;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_tx_count = clamp_t(u32, ring->tx_pending,
+			       FM10K_MIN_TXD, FM10K_MAX_TXD);
+	new_tx_count = ALIGN(new_tx_count, FM10K_REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	new_rx_count = clamp_t(u32, ring->rx_pending,
+			       FM10K_MIN_RXD, FM10K_MAX_RXD);
+	new_rx_count = ALIGN(new_rx_count, FM10K_REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	if ((new_tx_count == interface->tx_ring_count) &&
+	    (new_rx_count == interface->rx_ring_count)) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__FM10K_RESETTING, interface->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(interface->netdev)) {
+		for (i = 0; i < interface->num_tx_queues; i++)
+			interface->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < interface->num_rx_queues; i++)
+			interface->rx_ring[i]->count = new_rx_count;
+		interface->tx_ring_count = new_tx_count;
+		interface->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	/* allocate temporary buffer to store rings in */
+	i = max_t(int, interface->num_tx_queues, interface->num_rx_queues);
+	temp_ring = vmalloc(array_size(i, sizeof(struct fm10k_ring)));
+
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	fm10k_down(interface);
+
+	/* Setup new Tx resources and free the old Tx resources in that order.
+	 * We can then assign the new resources to the rings via a memcpy.
+	 * The advantage to this approach is that we are guaranteed to still
+	 * have resources even in the case of an allocation failure.
+	 */
+	if (new_tx_count != interface->tx_ring_count) {
+		for (i = 0; i < interface->num_tx_queues; i++) {
+			memcpy(&temp_ring[i], interface->tx_ring[i],
+			       sizeof(struct fm10k_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = fm10k_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					fm10k_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < interface->num_tx_queues; i++) {
+			fm10k_free_tx_resources(interface->tx_ring[i]);
+
+			memcpy(interface->tx_ring[i], &temp_ring[i],
+			       sizeof(struct fm10k_ring));
+		}
+
+		interface->tx_ring_count = new_tx_count;
+	}
+
+	/* Repeat the process for the Rx rings if needed */
+	if (new_rx_count != interface->rx_ring_count) {
+		for (i = 0; i < interface->num_rx_queues; i++) {
+			memcpy(&temp_ring[i], interface->rx_ring[i],
+			       sizeof(struct fm10k_ring));
+
+			temp_ring[i].count = new_rx_count;
+			err = fm10k_setup_rx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					fm10k_free_rx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < interface->num_rx_queues; i++) {
+			fm10k_free_rx_resources(interface->rx_ring[i]);
+
+			memcpy(interface->rx_ring[i], &temp_ring[i],
+			       sizeof(struct fm10k_ring));
+		}
+
+		interface->rx_ring_count = new_rx_count;
+	}
+
+err_setup:
+	fm10k_up(interface);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__FM10K_RESETTING, interface->state);
+	return err;
+}
+
+static int fm10k_get_coalesce(struct net_device *dev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+
+	ec->use_adaptive_tx_coalesce = ITR_IS_ADAPTIVE(interface->tx_itr);
+	ec->tx_coalesce_usecs = interface->tx_itr & ~FM10K_ITR_ADAPTIVE;
+
+	ec->use_adaptive_rx_coalesce = ITR_IS_ADAPTIVE(interface->rx_itr);
+	ec->rx_coalesce_usecs = interface->rx_itr & ~FM10K_ITR_ADAPTIVE;
+
+	return 0;
+}
+
+static int fm10k_set_coalesce(struct net_device *dev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	u16 tx_itr, rx_itr;
+	int i;
+
+	/* verify limits */
+	if ((ec->rx_coalesce_usecs > FM10K_ITR_MAX) ||
+	    (ec->tx_coalesce_usecs > FM10K_ITR_MAX))
+		return -EINVAL;
+
+	/* record settings */
+	tx_itr = ec->tx_coalesce_usecs;
+	rx_itr = ec->rx_coalesce_usecs;
+
+	/* set initial values for adaptive ITR */
+	if (ec->use_adaptive_tx_coalesce)
+		tx_itr = FM10K_ITR_ADAPTIVE | FM10K_TX_ITR_DEFAULT;
+
+	if (ec->use_adaptive_rx_coalesce)
+		rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT;
+
+	/* update interface */
+	interface->tx_itr = tx_itr;
+	interface->rx_itr = rx_itr;
+
+	/* update q_vectors */
+	for (i = 0; i < interface->num_q_vectors; i++) {
+		struct fm10k_q_vector *qv = interface->q_vector[i];
+
+		qv->tx.itr = tx_itr;
+		qv->rx.itr = rx_itr;
+	}
+
+	return 0;
+}
+
+static int fm10k_get_rss_hash_opts(struct fm10k_intfc *interface,
+				   struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* Report default options for RSS on fm10k */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case UDP_V4_FLOW:
+		if (test_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP,
+			     interface->flags))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V4_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V4_FLOW:
+	case ESP_V6_FLOW:
+	case IPV4_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case UDP_V6_FLOW:
+		if (test_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP,
+			     interface->flags))
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int fm10k_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			   u32 __always_unused *rule_locs)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = interface->num_rx_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		ret = fm10k_get_rss_hash_opts(interface, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int fm10k_set_rss_hash_opt(struct fm10k_intfc *interface,
+				  struct ethtool_rxnfc *nfc)
+{
+	int rss_ipv4_udp = test_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP,
+				    interface->flags);
+	int rss_ipv6_udp = test_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP,
+				    interface->flags);
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			clear_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP,
+				  interface->flags);
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			set_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP,
+				interface->flags);
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			clear_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP,
+				  interface->flags);
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			set_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP,
+				interface->flags);
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* If something changed we need to update the MRQC register. Note that
+	 * test_bit() is guaranteed to return strictly 0 or 1, so testing for
+	 * equality is safe.
+	 */
+	if ((rss_ipv4_udp != test_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP,
+				      interface->flags)) ||
+	    (rss_ipv6_udp != test_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP,
+				      interface->flags))) {
+		struct fm10k_hw *hw = &interface->hw;
+		bool warn = false;
+		u32 mrqc;
+
+		/* Perform hash on these packet types */
+		mrqc = FM10K_MRQC_IPV4 |
+		       FM10K_MRQC_TCP_IPV4 |
+		       FM10K_MRQC_IPV6 |
+		       FM10K_MRQC_TCP_IPV6;
+
+		if (test_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP,
+			     interface->flags)) {
+			mrqc |= FM10K_MRQC_UDP_IPV4;
+			warn = true;
+		}
+		if (test_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP,
+			     interface->flags)) {
+			mrqc |= FM10K_MRQC_UDP_IPV6;
+			warn = true;
+		}
+
+		/* If we enable UDP RSS display a warning that this may cause
+		 * fragmented UDP packets to arrive out of order.
+		 */
+		if (warn)
+			netif_warn(interface, drv, interface->netdev,
+				   "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+
+		fm10k_write_reg(hw, FM10K_MRQC(0), mrqc);
+	}
+
+	return 0;
+}
+
+static int fm10k_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = fm10k_set_rss_hash_opt(interface, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int fm10k_mbx_test(struct fm10k_intfc *interface, u64 *data)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 attr_flag, test_msg[6];
+	unsigned long timeout;
+	int err = -EINVAL;
+
+	/* For now this is a VF only feature */
+	if (hw->mac.type != fm10k_mac_vf)
+		return 0;
+
+	/* loop through both nested and unnested attribute types */
+	for (attr_flag = BIT(FM10K_TEST_MSG_UNSET);
+	     attr_flag < BIT(2 * FM10K_TEST_MSG_NESTED);
+	     attr_flag += attr_flag) {
+		/* generate message to be tested */
+		fm10k_tlv_msg_test_create(test_msg, attr_flag);
+
+		fm10k_mbx_lock(interface);
+		mbx->test_result = FM10K_NOT_IMPLEMENTED;
+		err = mbx->ops.enqueue_tx(hw, mbx, test_msg);
+		fm10k_mbx_unlock(interface);
+
+		/* wait up to 1 second for response */
+		timeout = jiffies + HZ;
+		do {
+			if (err < 0)
+				goto err_out;
+
+			usleep_range(500, 1000);
+
+			fm10k_mbx_lock(interface);
+			mbx->ops.process(hw, mbx);
+			fm10k_mbx_unlock(interface);
+
+			err = mbx->test_result;
+			if (!err)
+				break;
+		} while (time_is_after_jiffies(timeout));
+
+		/* reporting errors */
+		if (err)
+			goto err_out;
+	}
+
+err_out:
+	*data = err < 0 ? (attr_flag) : (err > 0);
+	return err;
+}
+
+static void fm10k_self_test(struct net_device *dev,
+			    struct ethtool_test *eth_test, u64 *data)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_hw *hw = &interface->hw;
+
+	memset(data, 0, sizeof(*data) * FM10K_TEST_LEN);
+
+	if (FM10K_REMOVED(hw->hw_addr)) {
+		netif_err(interface, drv, dev,
+			  "Interface removed - test blocked\n");
+		eth_test->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+
+	if (fm10k_mbx_test(interface, &data[FM10K_TEST_MBX]))
+		eth_test->flags |= ETH_TEST_FL_FAILED;
+}
+
+static u32 fm10k_get_priv_flags(struct net_device *netdev)
+{
+	return 0;
+}
+
+static int fm10k_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	if (priv_flags >= BIT(FM10K_PRV_FLAG_LEN))
+		return -EINVAL;
+
+	return 0;
+}
+
+static u32 fm10k_get_reta_size(struct net_device __always_unused *netdev)
+{
+	return FM10K_RETA_SIZE * FM10K_RETA_ENTRIES_PER_REG;
+}
+
+void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir)
+{
+	u16 rss_i = interface->ring_feature[RING_F_RSS].indices;
+	struct fm10k_hw *hw = &interface->hw;
+	u32 table[4];
+	int i, j;
+
+	/* record entries to reta table */
+	for (i = 0; i < FM10K_RETA_SIZE; i++) {
+		u32 reta, n;
+
+		/* generate a new table if we weren't given one */
+		for (j = 0; j < 4; j++) {
+			if (indir)
+				n = indir[4 * i + j];
+			else
+				n = ethtool_rxfh_indir_default(4 * i + j,
+							       rss_i);
+
+			table[j] = n;
+		}
+
+		reta = table[0] |
+			(table[1] << 8) |
+			(table[2] << 16) |
+			(table[3] << 24);
+
+		if (interface->reta[i] == reta)
+			continue;
+
+		interface->reta[i] = reta;
+		fm10k_write_reg(hw, FM10K_RETA(0, i), reta);
+	}
+}
+
+static int fm10k_get_reta(struct net_device *netdev, u32 *indir)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	int i;
+
+	if (!indir)
+		return 0;
+
+	for (i = 0; i < FM10K_RETA_SIZE; i++, indir += 4) {
+		u32 reta = interface->reta[i];
+
+		indir[0] = (reta << 24) >> 24;
+		indir[1] = (reta << 16) >> 24;
+		indir[2] = (reta <<  8) >> 24;
+		indir[3] = (reta) >> 24;
+	}
+
+	return 0;
+}
+
+static int fm10k_set_reta(struct net_device *netdev, const u32 *indir)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	int i;
+	u16 rss_i;
+
+	if (!indir)
+		return 0;
+
+	/* Verify user input. */
+	rss_i = interface->ring_feature[RING_F_RSS].indices;
+	for (i = fm10k_get_reta_size(netdev); i--;) {
+		if (indir[i] < rss_i)
+			continue;
+		return -EINVAL;
+	}
+
+	fm10k_write_reta(interface, indir);
+
+	return 0;
+}
+
+static u32 fm10k_get_rssrk_size(struct net_device __always_unused *netdev)
+{
+	return FM10K_RSSRK_SIZE * FM10K_RSSRK_ENTRIES_PER_REG;
+}
+
+static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key,
+			  u8 *hfunc)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	int i, err;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	err = fm10k_get_reta(netdev, indir);
+	if (err || !key)
+		return err;
+
+	for (i = 0; i < FM10K_RSSRK_SIZE; i++, key += 4)
+		*(__le32 *)key = cpu_to_le32(interface->rssrk[i]);
+
+	return 0;
+}
+
+static int fm10k_set_rssh(struct net_device *netdev, const u32 *indir,
+			  const u8 *key, const u8 hfunc)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_hw *hw = &interface->hw;
+	int i, err;
+
+	/* We do not allow change in unsupported parameters */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	err = fm10k_set_reta(netdev, indir);
+	if (err || !key)
+		return err;
+
+	for (i = 0; i < FM10K_RSSRK_SIZE; i++, key += 4) {
+		u32 rssrk = le32_to_cpu(*(__le32 *)key);
+
+		if (interface->rssrk[i] == rssrk)
+			continue;
+
+		interface->rssrk[i] = rssrk;
+		fm10k_write_reg(hw, FM10K_RSSRK(0, i), rssrk);
+	}
+
+	return 0;
+}
+
+static unsigned int fm10k_max_channels(struct net_device *dev)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	unsigned int max_combined = interface->hw.mac.max_queues;
+	u8 tcs = netdev_get_num_tc(dev);
+
+	/* For QoS report channels per traffic class */
+	if (tcs > 1)
+		max_combined = BIT((fls(max_combined / tcs) - 1));
+
+	return max_combined;
+}
+
+static void fm10k_get_channels(struct net_device *dev,
+			       struct ethtool_channels *ch)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+
+	/* report maximum channels */
+	ch->max_combined = fm10k_max_channels(dev);
+
+	/* report info for other vector */
+	ch->max_other = NON_Q_VECTORS;
+	ch->other_count = ch->max_other;
+
+	/* record RSS queues */
+	ch->combined_count = interface->ring_feature[RING_F_RSS].indices;
+}
+
+static int fm10k_set_channels(struct net_device *dev,
+			      struct ethtool_channels *ch)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	unsigned int count = ch->combined_count;
+
+	/* verify they are not requesting separate vectors */
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	/* verify other_count has not changed */
+	if (ch->other_count != NON_Q_VECTORS)
+		return -EINVAL;
+
+	/* verify the number of channels does not exceed hardware limits */
+	if (count > fm10k_max_channels(dev))
+		return -EINVAL;
+
+	interface->ring_feature[RING_F_RSS].limit = count;
+
+	/* use setup TC to update any traffic class queue mapping */
+	return fm10k_setup_tc(dev, netdev_get_num_tc(dev));
+}
+
+static const struct ethtool_ops fm10k_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE,
+	.get_strings		= fm10k_get_strings,
+	.get_sset_count		= fm10k_get_sset_count,
+	.get_ethtool_stats      = fm10k_get_ethtool_stats,
+	.get_drvinfo		= fm10k_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_pauseparam		= fm10k_get_pauseparam,
+	.set_pauseparam		= fm10k_set_pauseparam,
+	.get_msglevel		= fm10k_get_msglevel,
+	.set_msglevel		= fm10k_set_msglevel,
+	.get_ringparam		= fm10k_get_ringparam,
+	.set_ringparam		= fm10k_set_ringparam,
+	.get_coalesce		= fm10k_get_coalesce,
+	.set_coalesce		= fm10k_set_coalesce,
+	.get_rxnfc		= fm10k_get_rxnfc,
+	.set_rxnfc		= fm10k_set_rxnfc,
+	.get_regs               = fm10k_get_regs,
+	.get_regs_len           = fm10k_get_regs_len,
+	.self_test		= fm10k_self_test,
+	.get_priv_flags		= fm10k_get_priv_flags,
+	.set_priv_flags		= fm10k_set_priv_flags,
+	.get_rxfh_indir_size	= fm10k_get_reta_size,
+	.get_rxfh_key_size	= fm10k_get_rssrk_size,
+	.get_rxfh		= fm10k_get_rssh,
+	.set_rxfh		= fm10k_set_rssh,
+	.get_channels		= fm10k_get_channels,
+	.set_channels		= fm10k_set_channels,
+	.get_ts_info		= ethtool_op_get_ts_info,
+};
+
+void fm10k_set_ethtool_ops(struct net_device *dev)
+{
+	dev->ethtool_ops = &fm10k_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
new file mode 100644
index 0000000000..8c50a128df
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include "fm10k.h"
+#include "fm10k_vf.h"
+#include "fm10k_pf.h"
+
+static s32 fm10k_iov_msg_error(struct fm10k_hw *hw, u32 **results,
+			       struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
+	struct fm10k_intfc *interface = hw->back;
+	struct pci_dev *pdev = interface->pdev;
+
+	dev_err(&pdev->dev, "Unknown message ID %u on VF %d\n",
+		**results & FM10K_TLV_ID_MASK, vf_info->vf_idx);
+
+	return fm10k_tlv_msg_error(hw, results, mbx);
+}
+
+/**
+ *  fm10k_iov_msg_queue_mac_vlan - Message handler for MAC/VLAN request from VF
+ *  @hw: Pointer to hardware structure
+ *  @results: Pointer array to message, results[0] is pointer to message
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This function is a custom handler for MAC/VLAN requests from the VF. The
+ *  assumption is that it is acceptable to directly hand off the message from
+ *  the VF to the PF's switch manager. However, we use a MAC/VLAN message
+ *  queue to avoid overloading the mailbox when a large number of requests
+ *  come in.
+ **/
+static s32 fm10k_iov_msg_queue_mac_vlan(struct fm10k_hw *hw, u32 **results,
+					struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
+	struct fm10k_intfc *interface = hw->back;
+	u8 mac[ETH_ALEN];
+	u32 *result;
+	int err = 0;
+	bool set;
+	u16 vlan;
+	u32 vid;
+
+	/* we shouldn't be updating rules on a disabled interface */
+	if (!FM10K_VF_FLAG_ENABLED(vf_info))
+		err = FM10K_ERR_PARAM;
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) {
+		result = results[FM10K_MAC_VLAN_MSG_VLAN];
+
+		/* record VLAN id requested */
+		err = fm10k_tlv_attr_get_u32(result, &vid);
+		if (err)
+			return err;
+
+		set = !(vid & FM10K_VLAN_CLEAR);
+		vid &= ~FM10K_VLAN_CLEAR;
+
+		/* if the length field has been set, this is a multi-bit
+		 * update request. For multi-bit requests, simply disallow
+		 * them when the pf_vid has been set. In this case, the PF
+		 * should have already cleared the VLAN_TABLE, and if we
+		 * allowed them, it could allow a rogue VF to receive traffic
+		 * on a VLAN it was not assigned. In the single-bit case, we
+		 * need to modify requests for VLAN 0 to use the default PF or
+		 * SW vid when assigned.
+		 */
+
+		if (vid >> 16) {
+			/* prevent multi-bit requests when PF has
+			 * administratively set the VLAN for this VF
+			 */
+			if (vf_info->pf_vid)
+				return FM10K_ERR_PARAM;
+		} else {
+			err = fm10k_iov_select_vid(vf_info, (u16)vid);
+			if (err < 0)
+				return err;
+
+			vid = err;
+		}
+
+		/* update VSI info for VF in regards to VLAN table */
+		err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
+	}
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) {
+		result = results[FM10K_MAC_VLAN_MSG_MAC];
+
+		/* record unicast MAC address requested */
+		err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+		if (err)
+			return err;
+
+		/* block attempts to set MAC for a locked device */
+		if (is_valid_ether_addr(vf_info->mac) &&
+		    !ether_addr_equal(mac, vf_info->mac))
+			return FM10K_ERR_PARAM;
+
+		set = !(vlan & FM10K_VLAN_CLEAR);
+		vlan &= ~FM10K_VLAN_CLEAR;
+
+		err = fm10k_iov_select_vid(vf_info, vlan);
+		if (err < 0)
+			return err;
+
+		vlan = (u16)err;
+
+		/* Add this request to the MAC/VLAN queue */
+		err = fm10k_queue_mac_request(interface, vf_info->glort,
+					      mac, vlan, set);
+	}
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) {
+		result = results[FM10K_MAC_VLAN_MSG_MULTICAST];
+
+		/* record multicast MAC address requested */
+		err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+		if (err)
+			return err;
+
+		/* verify that the VF is allowed to request multicast */
+		if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED))
+			return FM10K_ERR_PARAM;
+
+		set = !(vlan & FM10K_VLAN_CLEAR);
+		vlan &= ~FM10K_VLAN_CLEAR;
+
+		err = fm10k_iov_select_vid(vf_info, vlan);
+		if (err < 0)
+			return err;
+
+		vlan = (u16)err;
+
+		/* Add this request to the MAC/VLAN queue */
+		err = fm10k_queue_mac_request(interface, vf_info->glort,
+					      mac, vlan, set);
+	}
+
+	return err;
+}
+
+static const struct fm10k_msg_data iov_mbx_data[] = {
+	FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
+	FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf),
+	FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_queue_mac_vlan),
+	FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf),
+	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_iov_msg_error),
+};
+
+s32 fm10k_iov_event(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_iov_data *iov_data;
+	s64 vflre;
+	int i;
+
+	/* if there is no iov_data then there is no mailbox to process */
+	if (!READ_ONCE(interface->iov_data))
+		return 0;
+
+	rcu_read_lock();
+
+	iov_data = interface->iov_data;
+
+	/* check again now that we are in the RCU block */
+	if (!iov_data)
+		goto read_unlock;
+
+	if (!(fm10k_read_reg(hw, FM10K_EICR) & FM10K_EICR_VFLR))
+		goto read_unlock;
+
+	/* read VFLRE to determine if any VFs have been reset */
+	vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1));
+	vflre <<= 32;
+	vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0));
+
+	i = iov_data->num_vfs;
+
+	for (vflre <<= 64 - i; vflre && i--; vflre += vflre) {
+		struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+
+		if (vflre >= 0)
+			continue;
+
+		hw->iov.ops.reset_resources(hw, vf_info);
+		vf_info->mbx.ops.connect(hw, &vf_info->mbx);
+	}
+
+read_unlock:
+	rcu_read_unlock();
+
+	return 0;
+}
+
+s32 fm10k_iov_mbx(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_iov_data *iov_data;
+	int i;
+
+	/* if there is no iov_data then there is no mailbox to process */
+	if (!READ_ONCE(interface->iov_data))
+		return 0;
+
+	rcu_read_lock();
+
+	iov_data = interface->iov_data;
+
+	/* check again now that we are in the RCU block */
+	if (!iov_data)
+		goto read_unlock;
+
+	/* lock the mailbox for transmit and receive */
+	fm10k_mbx_lock(interface);
+
+	/* Most VF messages sent to the PF cause the PF to respond by
+	 * requesting from the SM mailbox. This means that too many VF
+	 * messages processed at once could cause a mailbox timeout on the PF.
+	 * To prevent this, store a pointer to the next VF mbx to process. Use
+	 * that as the start of the loop so that we don't starve whichever VF
+	 * got ignored on the previous run.
+	 */
+process_mbx:
+	for (i = iov_data->next_vf_mbx ? : iov_data->num_vfs; i--;) {
+		struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+		struct fm10k_mbx_info *mbx = &vf_info->mbx;
+		u16 glort = vf_info->glort;
+
+		/* process the SM mailbox first to drain outgoing messages */
+		hw->mbx.ops.process(hw, &hw->mbx);
+
+		/* verify port mapping is valid, if not reset port */
+		if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) {
+			hw->iov.ops.reset_lport(hw, vf_info);
+			fm10k_clear_macvlan_queue(interface, glort, false);
+		}
+
+		/* reset VFs that have mailbox timed out */
+		if (!mbx->timeout) {
+			hw->iov.ops.reset_resources(hw, vf_info);
+			mbx->ops.connect(hw, mbx);
+		}
+
+		/* guarantee we have free space in the SM mailbox */
+		if (hw->mbx.state == FM10K_STATE_OPEN &&
+		    !hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
+			/* keep track of how many times this occurs */
+			interface->hw_sm_mbx_full++;
+
+			/* make sure we try again momentarily */
+			fm10k_service_event_schedule(interface);
+
+			break;
+		}
+
+		/* cleanup mailbox and process received messages */
+		mbx->ops.process(hw, mbx);
+	}
+
+	/* if we stopped processing mailboxes early, update next_vf_mbx.
+	 * Otherwise, reset next_vf_mbx, and restart loop so that we process
+	 * the remaining mailboxes we skipped at the start.
+	 */
+	if (i >= 0) {
+		iov_data->next_vf_mbx = i + 1;
+	} else if (iov_data->next_vf_mbx) {
+		iov_data->next_vf_mbx = 0;
+		goto process_mbx;
+	}
+
+	/* free the lock */
+	fm10k_mbx_unlock(interface);
+
+read_unlock:
+	rcu_read_unlock();
+
+	return 0;
+}
+
+void fm10k_iov_suspend(struct pci_dev *pdev)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	int num_vfs, i;
+
+	/* pull out num_vfs from iov_data */
+	num_vfs = iov_data ? iov_data->num_vfs : 0;
+
+	/* shut down queue mapping for VFs */
+	fm10k_write_reg(hw, FM10K_DGLORTMAP(fm10k_dglort_vf_rss),
+			FM10K_DGLORTMAP_NONE);
+
+	/* Stop any active VFs and reset their resources */
+	for (i = 0; i < num_vfs; i++) {
+		struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+
+		hw->iov.ops.reset_resources(hw, vf_info);
+		hw->iov.ops.reset_lport(hw, vf_info);
+		fm10k_clear_macvlan_queue(interface, vf_info->glort, false);
+	}
+}
+
+static void fm10k_mask_aer_comp_abort(struct pci_dev *pdev)
+{
+	u32 err_mask;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return;
+
+	/* Mask the completion abort bit in the ERR_UNCOR_MASK register,
+	 * preventing the device from reporting these errors to the upstream
+	 * PCIe root device. This avoids bringing down platforms which upgrade
+	 * non-fatal completer aborts into machine check exceptions. Completer
+	 * aborts can occur whenever a VF reads a queue it doesn't own.
+	 */
+	pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, &err_mask);
+	err_mask |= PCI_ERR_UNC_COMP_ABORT;
+	pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, err_mask);
+}
+
+int fm10k_iov_resume(struct pci_dev *pdev)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_dglort_cfg dglort = { 0 };
+	struct fm10k_hw *hw = &interface->hw;
+	int num_vfs, i;
+
+	/* pull out num_vfs from iov_data */
+	num_vfs = iov_data ? iov_data->num_vfs : 0;
+
+	/* return error if iov_data is not already populated */
+	if (!iov_data)
+		return -ENOMEM;
+
+	/* Lower severity of completer abort error reporting as
+	 * the VFs can trigger this any time they read a queue
+	 * that they don't own.
+	 */
+	fm10k_mask_aer_comp_abort(pdev);
+
+	/* allocate hardware resources for the VFs */
+	hw->iov.ops.assign_resources(hw, num_vfs, num_vfs);
+
+	/* configure DGLORT mapping for RSS */
+	dglort.glort = hw->mac.dglort_map & FM10K_DGLORTMAP_NONE;
+	dglort.idx = fm10k_dglort_vf_rss;
+	dglort.inner_rss = 1;
+	dglort.rss_l = fls(fm10k_queues_per_pool(hw) - 1);
+	dglort.queue_b = fm10k_vf_queue_index(hw, 0);
+	dglort.vsi_l = fls(hw->iov.total_vfs - 1);
+	dglort.vsi_b = 1;
+
+	hw->mac.ops.configure_dglort_map(hw, &dglort);
+
+	/* assign resources to the device */
+	for (i = 0; i < num_vfs; i++) {
+		struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+
+		/* allocate all but the last GLORT to the VFs */
+		if (i == (~hw->mac.dglort_map >> FM10K_DGLORTMAP_MASK_SHIFT))
+			break;
+
+		/* assign GLORT to VF, and restrict it to multicast */
+		hw->iov.ops.set_lport(hw, vf_info, i,
+				      FM10K_VF_FLAG_MULTI_CAPABLE);
+
+		/* mailbox is disconnected so we don't send a message */
+		hw->iov.ops.assign_default_mac_vlan(hw, vf_info);
+
+		/* now we are ready so we can connect */
+		vf_info->mbx.ops.connect(hw, &vf_info->mbx);
+	}
+
+	return 0;
+}
+
+s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid)
+{
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_vf_info *vf_info;
+	u16 vf_idx = (glort - hw->mac.dglort_map) & FM10K_DGLORTMAP_NONE;
+
+	/* no IOV support, not our message to process */
+	if (!iov_data)
+		return FM10K_ERR_PARAM;
+
+	/* glort outside our range, not our message to process */
+	if (vf_idx >= iov_data->num_vfs)
+		return FM10K_ERR_PARAM;
+
+	/* determine if an update has occurred and if so notify the VF */
+	vf_info = &iov_data->vf_info[vf_idx];
+	if (vf_info->sw_vid != pvid) {
+		vf_info->sw_vid = pvid;
+		hw->iov.ops.assign_default_mac_vlan(hw, vf_info);
+	}
+
+	return 0;
+}
+
+static void fm10k_iov_free_data(struct pci_dev *pdev)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+
+	if (!interface->iov_data)
+		return;
+
+	/* reclaim hardware resources */
+	fm10k_iov_suspend(pdev);
+
+	/* drop iov_data from interface */
+	kfree_rcu(interface->iov_data, rcu);
+	interface->iov_data = NULL;
+}
+
+static s32 fm10k_iov_alloc_data(struct pci_dev *pdev, int num_vfs)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	size_t size;
+	int i;
+
+	/* return error if iov_data is already populated */
+	if (iov_data)
+		return -EBUSY;
+
+	/* The PF should always be able to assign resources */
+	if (!hw->iov.ops.assign_resources)
+		return -ENODEV;
+
+	/* nothing to do if no VFs are requested */
+	if (!num_vfs)
+		return 0;
+
+	/* allocate memory for VF storage */
+	size = offsetof(struct fm10k_iov_data, vf_info[num_vfs]);
+	iov_data = kzalloc(size, GFP_KERNEL);
+	if (!iov_data)
+		return -ENOMEM;
+
+	/* record number of VFs */
+	iov_data->num_vfs = num_vfs;
+
+	/* loop through vf_info structures initializing each entry */
+	for (i = 0; i < num_vfs; i++) {
+		struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+		int err;
+
+		/* Record VF VSI value */
+		vf_info->vsi = i + 1;
+		vf_info->vf_idx = i;
+
+		/* initialize mailbox memory */
+		err = fm10k_pfvf_mbx_init(hw, &vf_info->mbx, iov_mbx_data, i);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Unable to initialize SR-IOV mailbox\n");
+			kfree(iov_data);
+			return err;
+		}
+	}
+
+	/* assign iov_data to interface */
+	interface->iov_data = iov_data;
+
+	/* allocate hardware resources for the VFs */
+	fm10k_iov_resume(pdev);
+
+	return 0;
+}
+
+void fm10k_iov_disable(struct pci_dev *pdev)
+{
+	if (pci_num_vf(pdev) && pci_vfs_assigned(pdev))
+		dev_err(&pdev->dev,
+			"Cannot disable SR-IOV while VFs are assigned\n");
+	else
+		pci_disable_sriov(pdev);
+
+	fm10k_iov_free_data(pdev);
+}
+
+int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	int current_vfs = pci_num_vf(pdev);
+	int err = 0;
+
+	if (current_vfs && pci_vfs_assigned(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot modify SR-IOV while VFs are assigned\n");
+		num_vfs = current_vfs;
+	} else {
+		pci_disable_sriov(pdev);
+		fm10k_iov_free_data(pdev);
+	}
+
+	/* allocate resources for the VFs */
+	err = fm10k_iov_alloc_data(pdev, num_vfs);
+	if (err)
+		return err;
+
+	/* allocate VFs if not already allocated */
+	if (num_vfs && num_vfs != current_vfs) {
+		err = pci_enable_sriov(pdev, num_vfs);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Enable PCI SR-IOV failed: %d\n", err);
+			return err;
+		}
+	}
+
+	return num_vfs;
+}
+
+/**
+ * fm10k_iov_update_stats - Update stats for all VFs
+ * @interface: device private structure
+ *
+ * Updates the VF statistics for all enabled VFs. Expects to be called by
+ * fm10k_update_stats and assumes that locking via the __FM10K_UPDATING_STATS
+ * bit is already handled.
+ */
+void fm10k_iov_update_stats(struct fm10k_intfc *interface)
+{
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	int i;
+
+	if (!iov_data)
+		return;
+
+	for (i = 0; i < iov_data->num_vfs; i++)
+		hw->iov.ops.update_stats(hw, iov_data->vf_info[i].stats, i);
+}
+
+static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface,
+				       struct fm10k_vf_info *vf_info)
+{
+	struct fm10k_hw *hw = &interface->hw;
+
+	/* assigning the MAC address will send a mailbox message */
+	fm10k_mbx_lock(interface);
+
+	/* disable LPORT for this VF which clears switch rules */
+	hw->iov.ops.reset_lport(hw, vf_info);
+
+	fm10k_clear_macvlan_queue(interface, vf_info->glort, false);
+
+	/* assign new MAC+VLAN for this VF */
+	hw->iov.ops.assign_default_mac_vlan(hw, vf_info);
+
+	/* re-enable the LPORT for this VF */
+	hw->iov.ops.set_lport(hw, vf_info, vf_info->vf_idx,
+			      FM10K_VF_FLAG_MULTI_CAPABLE);
+
+	fm10k_mbx_unlock(interface);
+}
+
+int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_vf_info *vf_info;
+
+	/* verify SR-IOV is active and that vf idx is valid */
+	if (!iov_data || vf_idx >= iov_data->num_vfs)
+		return -EINVAL;
+
+	/* verify MAC addr is valid */
+	if (!is_zero_ether_addr(mac) && !is_valid_ether_addr(mac))
+		return -EINVAL;
+
+	/* record new MAC address */
+	vf_info = &iov_data->vf_info[vf_idx];
+	ether_addr_copy(vf_info->mac, mac);
+
+	fm10k_reset_vf_info(interface, vf_info);
+
+	return 0;
+}
+
+int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
+			  u8 qos, __be16 vlan_proto)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_vf_info *vf_info;
+
+	/* verify SR-IOV is active and that vf idx is valid */
+	if (!iov_data || vf_idx >= iov_data->num_vfs)
+		return -EINVAL;
+
+	/* QOS is unsupported and VLAN IDs accepted range 0-4094 */
+	if (qos || (vid > (VLAN_VID_MASK - 1)))
+		return -EINVAL;
+
+	/* VF VLAN Protocol part to default is unsupported */
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	vf_info = &iov_data->vf_info[vf_idx];
+
+	/* exit if there is nothing to do */
+	if (vf_info->pf_vid == vid)
+		return 0;
+
+	/* record default VLAN ID for VF */
+	vf_info->pf_vid = vid;
+
+	/* Clear the VLAN table for the VF */
+	hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, vf_info->vsi, false);
+
+	fm10k_reset_vf_info(interface, vf_info);
+
+	return 0;
+}
+
+int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
+			int __always_unused min_rate, int max_rate)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+
+	/* verify SR-IOV is active and that vf idx is valid */
+	if (!iov_data || vf_idx >= iov_data->num_vfs)
+		return -EINVAL;
+
+	/* rate limit cannot be less than 10Mbs or greater than link speed */
+	if (max_rate &&
+	    (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX))
+		return -EINVAL;
+
+	/* store values */
+	iov_data->vf_info[vf_idx].rate = max_rate;
+
+	/* update hardware configuration */
+	hw->iov.ops.configure_tc(hw, vf_idx, max_rate);
+
+	return 0;
+}
+
+int fm10k_ndo_get_vf_config(struct net_device *netdev,
+			    int vf_idx, struct ifla_vf_info *ivi)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_vf_info *vf_info;
+
+	/* verify SR-IOV is active and that vf idx is valid */
+	if (!iov_data || vf_idx >= iov_data->num_vfs)
+		return -EINVAL;
+
+	vf_info = &iov_data->vf_info[vf_idx];
+
+	ivi->vf = vf_idx;
+	ivi->max_tx_rate = vf_info->rate;
+	ivi->min_tx_rate = 0;
+	ether_addr_copy(ivi->mac, vf_info->mac);
+	ivi->vlan = vf_info->pf_vid;
+	ivi->qos = 0;
+
+	return 0;
+}
+
+int fm10k_ndo_get_vf_stats(struct net_device *netdev,
+			   int vf_idx, struct ifla_vf_stats *stats)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_hw_stats_q *hw_stats;
+	u32 idx, qpp;
+
+	/* verify SR-IOV is active and that vf idx is valid */
+	if (!iov_data || vf_idx >= iov_data->num_vfs)
+		return -EINVAL;
+
+	qpp = fm10k_queues_per_pool(hw);
+	hw_stats = iov_data->vf_info[vf_idx].stats;
+
+	for (idx = 0; idx < qpp; idx++) {
+		stats->rx_packets += hw_stats[idx].rx_packets.count;
+		stats->tx_packets += hw_stats[idx].tx_packets.count;
+		stats->rx_bytes += hw_stats[idx].rx_bytes.count;
+		stats->tx_bytes += hw_stats[idx].tx_bytes.count;
+		stats->rx_dropped += hw_stats[idx].rx_drops.count;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
new file mode 100644
index 0000000000..fc373472e4
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -0,0 +1,2009 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <linux/if_macvlan.h>
+#include <linux/prefetch.h>
+
+#include "fm10k.h"
+
+#define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
+char fm10k_driver_name[] = "fm10k";
+static const char fm10k_driver_string[] = DRV_SUMMARY;
+static const char fm10k_copyright[] =
+	"Copyright(c) 2013 - 2019 Intel Corporation.";
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+
+/* single workqueue for entire fm10k driver */
+struct workqueue_struct *fm10k_workqueue;
+
+/**
+ * fm10k_init_module - Driver Registration Routine
+ *
+ * fm10k_init_module is the first routine called when the driver is
+ * loaded.  All it does is register with the PCI subsystem.
+ **/
+static int __init fm10k_init_module(void)
+{
+	int ret;
+
+	pr_info("%s\n", fm10k_driver_string);
+	pr_info("%s\n", fm10k_copyright);
+
+	/* create driver workqueue */
+	fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+					  fm10k_driver_name);
+	if (!fm10k_workqueue)
+		return -ENOMEM;
+
+	fm10k_dbg_init();
+
+	ret = fm10k_register_pci_driver();
+	if (ret) {
+		fm10k_dbg_exit();
+		destroy_workqueue(fm10k_workqueue);
+	}
+
+	return ret;
+}
+module_init(fm10k_init_module);
+
+/**
+ * fm10k_exit_module - Driver Exit Cleanup Routine
+ *
+ * fm10k_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit fm10k_exit_module(void)
+{
+	fm10k_unregister_pci_driver();
+
+	fm10k_dbg_exit();
+
+	/* destroy driver workqueue */
+	destroy_workqueue(fm10k_workqueue);
+}
+module_exit(fm10k_exit_module);
+
+static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring,
+				    struct fm10k_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* Only page will be NULL if buffer was consumed */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_page();
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_page(page);
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = 0;
+
+	return true;
+}
+
+/**
+ * fm10k_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
+{
+	union fm10k_rx_desc *rx_desc;
+	struct fm10k_rx_buffer *bi;
+	u16 i = rx_ring->next_to_use;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = FM10K_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer[i];
+	i -= rx_ring->count;
+
+	do {
+		if (!fm10k_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = FM10K_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer;
+			i -= rx_ring->count;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->d.staterr = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		/* record the next descriptor to use */
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+
+		/* notify hardware of new descriptors */
+		writel(i, rx_ring->tail);
+	}
+}
+
+/**
+ * fm10k_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the interface
+ **/
+static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring,
+				struct fm10k_rx_buffer *old_buff)
+{
+	struct fm10k_rx_buffer *new_buff;
+	u16 nta = rx_ring->next_to_alloc;
+
+	new_buff = &rx_ring->rx_buffer[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	*new_buff = *old_buff;
+
+	/* sync the buffer for use by the device */
+	dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
+					 old_buff->page_offset,
+					 FM10K_RX_BUFSZ,
+					 DMA_FROM_DEVICE);
+}
+
+static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
+				    struct page *page,
+				    unsigned int __maybe_unused truesize)
+{
+	/* avoid re-using remote and pfmemalloc pages */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely(page_count(page) != 1))
+		return false;
+
+	/* flip page offset to other buffer */
+	rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
+#else
+	/* move offset up to the next cache line */
+	rx_buffer->page_offset += truesize;
+
+	if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ))
+		return false;
+#endif
+
+	/* Even if we own the page, we are not allowed to use atomic_set()
+	 * This would break get_page_unless_zero() users.
+	 */
+	page_ref_inc(page);
+
+	return true;
+}
+
+/**
+ * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_buffer: buffer containing page to add
+ * @size: packet size from rx_desc
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the interface.
+ **/
+static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer,
+			      unsigned int size,
+			      union fm10k_rx_desc *rx_desc,
+			      struct sk_buff *skb)
+{
+	struct page *page = rx_buffer->page;
+	unsigned char *va = page_address(page) + rx_buffer->page_offset;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = FM10K_RX_BUFSZ;
+#else
+	unsigned int truesize = ALIGN(size, 512);
+#endif
+	unsigned int pull_len;
+
+	if (unlikely(skb_is_nonlinear(skb)))
+		goto add_tail_frag;
+
+	if (likely(size <= FM10K_RX_HDR_LEN)) {
+		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+
+		/* page is reusable, we can reuse buffer as-is */
+		if (dev_page_is_reusable(page))
+			return true;
+
+		/* this page cannot be reused so discard it */
+		__free_page(page);
+		return false;
+	}
+
+	/* we need the header to contain the greater of either ETH_HLEN or
+	 * 60 bytes if the skb->len is less than 60 for skb_pad.
+	 */
+	pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));
+
+	/* update all of the pointers */
+	va += pull_len;
+	size -= pull_len;
+
+add_tail_frag:
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+			(unsigned long)va & ~PAGE_MASK, size, truesize);
+
+	return fm10k_can_reuse_rx_page(rx_buffer, page, truesize);
+}
+
+static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
+					     union fm10k_rx_desc *rx_desc,
+					     struct sk_buff *skb)
+{
+	unsigned int size = le16_to_cpu(rx_desc->w.length);
+	struct fm10k_rx_buffer *rx_buffer;
+	struct page *page;
+
+	rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean];
+	page = rx_buffer->page;
+	prefetchw(page);
+
+	if (likely(!skb)) {
+		void *page_addr = page_address(page) +
+				  rx_buffer->page_offset;
+
+		/* prefetch first cache line of first page */
+		net_prefetch(page_addr);
+
+		/* allocate a skb to store the frags */
+		skb = napi_alloc_skb(&rx_ring->q_vector->napi,
+				     FM10K_RX_HDR_LEN);
+		if (unlikely(!skb)) {
+			rx_ring->rx_stats.alloc_failed++;
+			return NULL;
+		}
+
+		/* we will be copying header into skb->data in
+		 * pskb_may_pull so it is in our interest to prefetch
+		 * it now to avoid a possible cache miss
+		 */
+		prefetchw(skb->data);
+	}
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	/* pull page into skb */
+	if (fm10k_add_rx_frag(rx_buffer, size, rx_desc, skb)) {
+		/* hand second half of page back to the ring */
+		fm10k_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page(rx_ring->dev, rx_buffer->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+
+	return skb;
+}
+
+static inline void fm10k_rx_checksum(struct fm10k_ring *ring,
+				     union fm10k_rx_desc *rx_desc,
+				     struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* Rx checksum disabled via ethtool */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* TCP/UDP checksum error bit is set */
+	if (fm10k_test_staterr(rx_desc,
+			       FM10K_RXD_STATUS_L4E |
+			       FM10K_RXD_STATUS_L4E2 |
+			       FM10K_RXD_STATUS_IPE |
+			       FM10K_RXD_STATUS_IPE2)) {
+		ring->rx_stats.csum_err++;
+		return;
+	}
+
+	/* It must be a TCP or UDP packet with a valid checksum */
+	if (fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS2))
+		skb->encapsulation = true;
+	else if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_L4CS))
+		return;
+
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	ring->rx_stats.csum_good++;
+}
+
+#define FM10K_RSS_L4_TYPES_MASK \
+	(BIT(FM10K_RSSTYPE_IPV4_TCP) | \
+	 BIT(FM10K_RSSTYPE_IPV4_UDP) | \
+	 BIT(FM10K_RSSTYPE_IPV6_TCP) | \
+	 BIT(FM10K_RSSTYPE_IPV6_UDP))
+
+static inline void fm10k_rx_hash(struct fm10k_ring *ring,
+				 union fm10k_rx_desc *rx_desc,
+				 struct sk_buff *skb)
+{
+	u16 rss_type;
+
+	if (!(ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	rss_type = le16_to_cpu(rx_desc->w.pkt_info) & FM10K_RXD_RSSTYPE_MASK;
+	if (!rss_type)
+		return;
+
+	skb_set_hash(skb, le32_to_cpu(rx_desc->d.rss),
+		     (BIT(rss_type) & FM10K_RSS_L4_TYPES_MASK) ?
+		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+}
+
+static void fm10k_type_trans(struct fm10k_ring *rx_ring,
+			     union fm10k_rx_desc __maybe_unused *rx_desc,
+			     struct sk_buff *skb)
+{
+	struct net_device *dev = rx_ring->netdev;
+	struct fm10k_l2_accel *l2_accel = rcu_dereference_bh(rx_ring->l2_accel);
+
+	/* check to see if DGLORT belongs to a MACVLAN */
+	if (l2_accel) {
+		u16 idx = le16_to_cpu(FM10K_CB(skb)->fi.w.dglort) - 1;
+
+		idx -= l2_accel->dglort;
+		if (idx < l2_accel->size && l2_accel->macvlan[idx])
+			dev = l2_accel->macvlan[idx];
+		else
+			l2_accel = NULL;
+	}
+
+	/* Record Rx queue, or update macvlan statistics */
+	if (!l2_accel)
+		skb_record_rx_queue(skb, rx_ring->queue_index);
+	else
+		macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
+				 false);
+
+	skb->protocol = eth_type_trans(skb, dev);
+}
+
+/**
+ * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ **/
+static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
+					     union fm10k_rx_desc *rx_desc,
+					     struct sk_buff *skb)
+{
+	unsigned int len = skb->len;
+
+	fm10k_rx_hash(rx_ring, rx_desc, skb);
+
+	fm10k_rx_checksum(rx_ring, rx_desc, skb);
+
+	FM10K_CB(skb)->tstamp = rx_desc->q.timestamp;
+
+	FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
+
+	FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort;
+
+	if (rx_desc->w.vlan) {
+		u16 vid = le16_to_cpu(rx_desc->w.vlan);
+
+		if ((vid & VLAN_VID_MASK) != rx_ring->vid)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+		else if (vid & VLAN_PRIO_MASK)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       vid & VLAN_PRIO_MASK);
+	}
+
+	fm10k_type_trans(rx_ring, rx_desc, skb);
+
+	return len;
+}
+
+/**
+ * fm10k_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring,
+			     union fm10k_rx_desc *rx_desc)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(FM10K_RX_DESC(rx_ring, ntc));
+
+	if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP)))
+		return false;
+
+	return true;
+}
+
+/**
+ * fm10k_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring,
+				  union fm10k_rx_desc *rx_desc,
+				  struct sk_buff *skb)
+{
+	if (unlikely((fm10k_test_staterr(rx_desc,
+					 FM10K_RXD_STATUS_RXE)))) {
+#define FM10K_TEST_RXD_BIT(rxd, bit) \
+	((rxd)->w.csum_err & cpu_to_le16(bit))
+		if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_ERROR))
+			rx_ring->rx_stats.switch_errors++;
+		if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_NO_DESCRIPTOR))
+			rx_ring->rx_stats.drops++;
+		if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_PP_ERROR))
+			rx_ring->rx_stats.pp_errors++;
+		if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_SWITCH_READY))
+			rx_ring->rx_stats.link_errors++;
+		if (FM10K_TEST_RXD_BIT(rx_desc, FM10K_RXD_ERR_TOO_BIG))
+			rx_ring->rx_stats.length_errors++;
+		dev_kfree_skb_any(skb);
+		rx_ring->rx_stats.errors++;
+		return true;
+	}
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * fm10k_receive_skb - helper function to handle rx indications
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: packet to send up
+ **/
+static void fm10k_receive_skb(struct fm10k_q_vector *q_vector,
+			      struct sk_buff *skb)
+{
+	napi_gro_receive(&q_vector->napi, skb);
+}
+
+static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
+			      struct fm10k_ring *rx_ring,
+			      int budget)
+{
+	struct sk_buff *skb = rx_ring->skb;
+	unsigned int total_bytes = 0, total_packets = 0;
+	u16 cleaned_count = fm10k_desc_unused(rx_ring);
+
+	while (likely(total_packets < budget)) {
+		union fm10k_rx_desc *rx_desc;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= FM10K_RX_BUFFER_WRITE) {
+			fm10k_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		if (!rx_desc->d.staterr)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		/* retrieve a buffer from the ring */
+		skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb)
+			break;
+
+		cleaned_count++;
+
+		/* fetch next buffer in frame if non-eop */
+		if (fm10k_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb);
+
+		fm10k_receive_skb(q_vector, skb);
+
+		/* reset skb pointer */
+		skb = NULL;
+
+		/* update budget accounting */
+		total_packets++;
+	}
+
+	/* place incomplete frames back on ring for completion */
+	rx_ring->skb = skb;
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_packets;
+	rx_ring->stats.bytes += total_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
+
+	return total_packets;
+}
+
+#define VXLAN_HLEN (sizeof(struct udphdr) + 8)
+static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb)
+{
+	struct fm10k_intfc *interface = netdev_priv(skb->dev);
+
+	if (interface->vxlan_port != udp_hdr(skb)->dest)
+		return NULL;
+
+	/* return offset of udp_hdr plus 8 bytes for VXLAN header */
+	return (struct ethhdr *)(skb_transport_header(skb) + VXLAN_HLEN);
+}
+
+#define FM10K_NVGRE_RESERVED0_FLAGS htons(0x9FFF)
+#define NVGRE_TNI htons(0x2000)
+struct fm10k_nvgre_hdr {
+	__be16 flags;
+	__be16 proto;
+	__be32 tni;
+};
+
+static struct ethhdr *fm10k_gre_is_nvgre(struct sk_buff *skb)
+{
+	struct fm10k_nvgre_hdr *nvgre_hdr;
+	int hlen = ip_hdrlen(skb);
+
+	/* currently only IPv4 is supported due to hlen above */
+	if (vlan_get_protocol(skb) != htons(ETH_P_IP))
+		return NULL;
+
+	/* our transport header should be NVGRE */
+	nvgre_hdr = (struct fm10k_nvgre_hdr *)(skb_network_header(skb) + hlen);
+
+	/* verify all reserved flags are 0 */
+	if (nvgre_hdr->flags & FM10K_NVGRE_RESERVED0_FLAGS)
+		return NULL;
+
+	/* report start of ethernet header */
+	if (nvgre_hdr->flags & NVGRE_TNI)
+		return (struct ethhdr *)(nvgre_hdr + 1);
+
+	return (struct ethhdr *)(&nvgre_hdr->tni);
+}
+
+__be16 fm10k_tx_encap_offload(struct sk_buff *skb)
+{
+	u8 l4_hdr = 0, inner_l4_hdr = 0, inner_l4_hlen;
+	struct ethhdr *eth_hdr;
+
+	if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
+	    skb->inner_protocol != htons(ETH_P_TEB))
+		return 0;
+
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		l4_hdr = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_hdr = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return 0;
+	}
+
+	switch (l4_hdr) {
+	case IPPROTO_UDP:
+		eth_hdr = fm10k_port_is_vxlan(skb);
+		break;
+	case IPPROTO_GRE:
+		eth_hdr = fm10k_gre_is_nvgre(skb);
+		break;
+	default:
+		return 0;
+	}
+
+	if (!eth_hdr)
+		return 0;
+
+	switch (eth_hdr->h_proto) {
+	case htons(ETH_P_IP):
+		inner_l4_hdr = inner_ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		inner_l4_hdr = inner_ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return 0;
+	}
+
+	switch (inner_l4_hdr) {
+	case IPPROTO_TCP:
+		inner_l4_hlen = inner_tcp_hdrlen(skb);
+		break;
+	case IPPROTO_UDP:
+		inner_l4_hlen = 8;
+		break;
+	default:
+		return 0;
+	}
+
+	/* The hardware allows tunnel offloads only if the combined inner and
+	 * outer header is 184 bytes or less
+	 */
+	if (skb_inner_transport_header(skb) + inner_l4_hlen -
+	    skb_mac_header(skb) > FM10K_TUNNEL_HEADER_LENGTH)
+		return 0;
+
+	return eth_hdr->h_proto;
+}
+
+static int fm10k_tso(struct fm10k_ring *tx_ring,
+		     struct fm10k_tx_buffer *first)
+{
+	struct sk_buff *skb = first->skb;
+	struct fm10k_tx_desc *tx_desc;
+	unsigned char *th;
+	u8 hdrlen;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	/* compute header lengths */
+	if (skb->encapsulation) {
+		if (!fm10k_tx_encap_offload(skb))
+			goto err_vxlan;
+		th = skb_inner_transport_header(skb);
+	} else {
+		th = skb_transport_header(skb);
+	}
+
+	/* compute offset from SOF to transport header and add header len */
+	hdrlen = (th - skb->data) + (((struct tcphdr *)th)->doff << 2);
+
+	first->tx_flags |= FM10K_TX_FLAGS_CSUM;
+
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * hdrlen;
+
+	/* populate Tx descriptor header size and mss */
+	tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
+	tx_desc->hdrlen = hdrlen;
+	tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
+
+	return 1;
+
+err_vxlan:
+	tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
+	if (net_ratelimit())
+		netdev_err(tx_ring->netdev,
+			   "TSO requested for unsupported tunnel, disabling offload\n");
+	return -1;
+}
+
+static void fm10k_tx_csum(struct fm10k_ring *tx_ring,
+			  struct fm10k_tx_buffer *first)
+{
+	struct sk_buff *skb = first->skb;
+	struct fm10k_tx_desc *tx_desc;
+	union {
+		struct iphdr *ipv4;
+		struct ipv6hdr *ipv6;
+		u8 *raw;
+	} network_hdr;
+	u8 *transport_hdr;
+	__be16 frag_off;
+	__be16 protocol;
+	u8 l4_hdr = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		goto no_csum;
+
+	if (skb->encapsulation) {
+		protocol = fm10k_tx_encap_offload(skb);
+		if (!protocol) {
+			if (skb_checksum_help(skb)) {
+				dev_warn(tx_ring->dev,
+					 "failed to offload encap csum!\n");
+				tx_ring->tx_stats.csum_err++;
+			}
+			goto no_csum;
+		}
+		network_hdr.raw = skb_inner_network_header(skb);
+		transport_hdr = skb_inner_transport_header(skb);
+	} else {
+		protocol = vlan_get_protocol(skb);
+		network_hdr.raw = skb_network_header(skb);
+		transport_hdr = skb_transport_header(skb);
+	}
+
+	switch (protocol) {
+	case htons(ETH_P_IP):
+		l4_hdr = network_hdr.ipv4->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_hdr = network_hdr.ipv6->nexthdr;
+		if (likely((transport_hdr - network_hdr.raw) ==
+			   sizeof(struct ipv6hdr)))
+			break;
+		ipv6_skip_exthdr(skb, network_hdr.raw - skb->data +
+				      sizeof(struct ipv6hdr),
+				 &l4_hdr, &frag_off);
+		if (unlikely(frag_off))
+			l4_hdr = NEXTHDR_FRAGMENT;
+		break;
+	default:
+		break;
+	}
+
+	switch (l4_hdr) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		break;
+	case IPPROTO_GRE:
+		if (skb->encapsulation)
+			break;
+		fallthrough;
+	default:
+		if (unlikely(net_ratelimit())) {
+			dev_warn(tx_ring->dev,
+				 "partial checksum, version=%d l4 proto=%x\n",
+				 protocol, l4_hdr);
+		}
+		skb_checksum_help(skb);
+		tx_ring->tx_stats.csum_err++;
+		goto no_csum;
+	}
+
+	/* update TX checksum flag */
+	first->tx_flags |= FM10K_TX_FLAGS_CSUM;
+	tx_ring->tx_stats.csum_good++;
+
+no_csum:
+	/* populate Tx descriptor header size and mss */
+	tx_desc = FM10K_TX_DESC(tx_ring, tx_ring->next_to_use);
+	tx_desc->hdrlen = 0;
+	tx_desc->mss = 0;
+}
+
+#define FM10K_SET_FLAG(_input, _flag, _result) \
+	((_flag <= _result) ? \
+	 ((u32)(_input & _flag) * (_result / _flag)) : \
+	 ((u32)(_input & _flag) / (_flag / _result)))
+
+static u8 fm10k_tx_desc_flags(struct sk_buff *skb, u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	u32 desc_flags = 0;
+
+	/* set checksum offload bits */
+	desc_flags |= FM10K_SET_FLAG(tx_flags, FM10K_TX_FLAGS_CSUM,
+				     FM10K_TXD_FLAG_CSUM);
+
+	return desc_flags;
+}
+
+static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
+			       struct fm10k_tx_desc *tx_desc, u16 i,
+			       dma_addr_t dma, unsigned int size, u8 desc_flags)
+{
+	/* set RS and INT for last frame in a cache line */
+	if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0)
+		desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT;
+
+	/* record values to descriptor */
+	tx_desc->buffer_addr = cpu_to_le64(dma);
+	tx_desc->flags = desc_flags;
+	tx_desc->buflen = cpu_to_le16(size);
+
+	/* return true if we just wrapped the ring */
+	return i == tx_ring->count;
+}
+
+static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+	/* Memory barrier before checking head and tail */
+	smp_mb();
+
+	/* Check again in a case another CPU has just made room available */
+	if (likely(fm10k_desc_unused(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_queue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	++tx_ring->tx_stats.restart_queue;
+	return 0;
+}
+
+static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
+{
+	if (likely(fm10k_desc_unused(tx_ring) >= size))
+		return 0;
+	return __fm10k_maybe_stop_tx(tx_ring, size);
+}
+
+static void fm10k_tx_map(struct fm10k_ring *tx_ring,
+			 struct fm10k_tx_buffer *first)
+{
+	struct sk_buff *skb = first->skb;
+	struct fm10k_tx_buffer *tx_buffer;
+	struct fm10k_tx_desc *tx_desc;
+	skb_frag_t *frag;
+	unsigned char *data;
+	dma_addr_t dma;
+	unsigned int data_len, size;
+	u32 tx_flags = first->tx_flags;
+	u16 i = tx_ring->next_to_use;
+	u8 flags = fm10k_tx_desc_flags(skb, tx_flags);
+
+	tx_desc = FM10K_TX_DESC(tx_ring, i);
+
+	/* add HW VLAN tag */
+	if (skb_vlan_tag_present(skb))
+		tx_desc->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
+	else
+		tx_desc->vlan = 0;
+
+	size = skb_headlen(skb);
+	data = skb->data;
+
+	dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+
+	data_len = skb->data_len;
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) {
+			if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma,
+					       FM10K_MAX_DATA_PER_TXD, flags)) {
+				tx_desc = FM10K_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+
+			dma += FM10K_MAX_DATA_PER_TXD;
+			size -= FM10K_MAX_DATA_PER_TXD;
+		}
+
+		if (likely(!data_len))
+			break;
+
+		if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++,
+				       dma, size, flags)) {
+			tx_desc = FM10K_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer[i];
+	}
+
+	/* write last descriptor with LAST bit set */
+	flags |= FM10K_TXD_FLAG_LAST;
+
+	if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags))
+		i = 0;
+
+	/* record bytecount for BQL */
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* record SW timestamp if HW timestamp is not available */
+	skb_tx_timestamp(first->skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	tx_ring->next_to_use = i;
+
+	/* Make sure there is space in the ring for the next send. */
+	fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* notify HW of packet */
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+		writel(i, tx_ring->tail);
+	}
+
+	return;
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+
+	/* clear dma mappings for failed tx_buffer map */
+	for (;;) {
+		tx_buffer = &tx_ring->tx_buffer[i];
+		fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
+		if (tx_buffer == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+}
+
+netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
+				  struct fm10k_ring *tx_ring)
+{
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	struct fm10k_tx_buffer *first;
+	unsigned short f;
+	u32 tx_flags = 0;
+	int tso;
+
+	/* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
+	 *       + 2 desc gap to keep tail from touching head
+	 * otherwise try next time
+	 */
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+		count += TXD_USE_COUNT(skb_frag_size(frag));
+	}
+
+	if (fm10k_maybe_stop_tx(tx_ring, count + 3)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+	first->gso_segs = 1;
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+
+	tso = fm10k_tso(tx_ring, first);
+	if (tso < 0)
+		goto out_drop;
+	else if (!tso)
+		fm10k_tx_csum(tx_ring, first);
+
+	fm10k_tx_map(tx_ring, first);
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+
+	return NETDEV_TX_OK;
+}
+
+static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
+{
+	return ring->stats.packets;
+}
+
+/**
+ * fm10k_get_tx_pending - how many Tx descriptors not processed
+ * @ring: the ring structure
+ * @in_sw: is tx_pending being checked in SW or in HW?
+ */
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw)
+{
+	struct fm10k_intfc *interface = ring->q_vector->interface;
+	struct fm10k_hw *hw = &interface->hw;
+	u32 head, tail;
+
+	if (likely(in_sw)) {
+		head = ring->next_to_clean;
+		tail = ring->next_to_use;
+	} else {
+		head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
+		tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
+	}
+
+	return ((head <= tail) ? tail : tail + ring->count) - head;
+}
+
+bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
+{
+	u32 tx_done = fm10k_get_tx_completed(tx_ring);
+	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
+	u32 tx_pending = fm10k_get_tx_pending(tx_ring, true);
+
+	clear_check_for_tx_hang(tx_ring);
+
+	/* Check for a hung queue, but be thorough. This verifies
+	 * that a transmit has been completed since the previous
+	 * check AND there is at least one packet pending. By
+	 * requiring this to fail twice we avoid races with
+	 * clearing the ARMED bit and conditions where we
+	 * run the check_tx_hang logic with a transmit completion
+	 * pending but without time to complete it yet.
+	 */
+	if (!tx_pending || (tx_done_old != tx_done)) {
+		/* update completed stats and continue */
+		tx_ring->tx_stats.tx_done_old = tx_done;
+		/* reset the countdown */
+		clear_bit(__FM10K_HANG_CHECK_ARMED, tx_ring->state);
+
+		return false;
+	}
+
+	/* make sure it is true for two checks in a row */
+	return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, tx_ring->state);
+}
+
+/**
+ * fm10k_tx_timeout_reset - initiate reset due to Tx timeout
+ * @interface: driver private struct
+ **/
+void fm10k_tx_timeout_reset(struct fm10k_intfc *interface)
+{
+	/* Do the reset outside of interrupt context */
+	if (!test_bit(__FM10K_DOWN, interface->state)) {
+		interface->tx_timeout_count++;
+		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+		fm10k_service_event_schedule(interface);
+	}
+}
+
+/**
+ * fm10k_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: structure containing interrupt and ring information
+ * @tx_ring: tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ **/
+static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
+			       struct fm10k_ring *tx_ring, int napi_budget)
+{
+	struct fm10k_intfc *interface = q_vector->interface;
+	struct fm10k_tx_buffer *tx_buffer;
+	struct fm10k_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	unsigned int i = tx_ring->next_to_clean;
+
+	if (test_bit(__FM10K_DOWN, interface->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer[i];
+	tx_desc = FM10K_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buffer->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		tx_buffer->skb = NULL;
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer;
+				tx_desc = FM10K_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer;
+			tx_desc = FM10K_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+	if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) {
+		/* schedule immediate reset if we believe we hung */
+		struct fm10k_hw *hw = &interface->hw;
+
+		netif_err(interface, drv, tx_ring->netdev,
+			  "Detected Tx Unit Hang\n"
+			  "  Tx Queue             <%d>\n"
+			  "  TDH, TDT             <%x>, <%x>\n"
+			  "  next_to_use          <%x>\n"
+			  "  next_to_clean        <%x>\n",
+			  tx_ring->queue_index,
+			  fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)),
+			  fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)),
+			  tx_ring->next_to_use, i);
+
+		netif_stop_subqueue(tx_ring->netdev,
+				    tx_ring->queue_index);
+
+		netif_info(interface, probe, tx_ring->netdev,
+			   "tx hang %d detected on queue %d, resetting interface\n",
+			   interface->tx_timeout_count + 1,
+			   tx_ring->queue_index);
+
+		fm10k_tx_timeout_reset(interface);
+
+		/* the netdev is about to reset, no point in enabling stuff */
+		return true;
+	}
+
+	/* notify netdev of completed buffers */
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+#define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2)
+	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+		     (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    !test_bit(__FM10K_DOWN, interface->state)) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+			++tx_ring->tx_stats.restart_queue;
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * fm10k_update_itr - update the dynamic ITR value based on packet size
+ *
+ *      Stores a new ITR value based on strictly on packet size.  The
+ *      divisors and thresholds used by this function were determined based
+ *      on theoretical maximum wire speed and testing data, in order to
+ *      minimize response time while increasing bulk throughput.
+ *
+ * @ring_container: Container for rings to have ITR updated
+ **/
+static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
+{
+	unsigned int avg_wire_size, packets, itr_round;
+
+	/* Only update ITR if we are using adaptive setting */
+	if (!ITR_IS_ADAPTIVE(ring_container->itr))
+		goto clear_counts;
+
+	packets = ring_container->total_packets;
+	if (!packets)
+		goto clear_counts;
+
+	avg_wire_size = ring_container->total_bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (34 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 360) {
+		/* Start at 250K ints/sec and gradually drop to 77K ints/sec */
+		avg_wire_size *= 8;
+		avg_wire_size += 376;
+	} else if (avg_wire_size <= 1152) {
+		/* 77K ints/sec to 45K ints/sec */
+		avg_wire_size *= 3;
+		avg_wire_size += 2176;
+	} else if (avg_wire_size <= 1920) {
+		/* 45K ints/sec to 38K ints/sec */
+		avg_wire_size += 4480;
+	} else {
+		/* plateau at a limit of 38K ints/sec */
+		avg_wire_size = 6656;
+	}
+
+	/* Perform final bitshift for division after rounding up to ensure
+	 * that the calculation will never get below a 1. The bit shift
+	 * accounts for changes in the ITR due to PCIe link speed.
+	 */
+	itr_round = READ_ONCE(ring_container->itr_scale) + 8;
+	avg_wire_size += BIT(itr_round) - 1;
+	avg_wire_size >>= itr_round;
+
+	/* write back value and retain adaptive flag */
+	ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE;
+
+clear_counts:
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+}
+
+static void fm10k_qv_enable(struct fm10k_q_vector *q_vector)
+{
+	/* Enable auto-mask and clear the current mask */
+	u32 itr = FM10K_ITR_ENABLE;
+
+	/* Update Tx ITR */
+	fm10k_update_itr(&q_vector->tx);
+
+	/* Update Rx ITR */
+	fm10k_update_itr(&q_vector->rx);
+
+	/* Store Tx itr in timer slot 0 */
+	itr |= (q_vector->tx.itr & FM10K_ITR_MAX);
+
+	/* Shift Rx itr to timer slot 1 */
+	itr |= (q_vector->rx.itr & FM10K_ITR_MAX) << FM10K_ITR_INTERVAL1_SHIFT;
+
+	/* Write the final value to the ITR register */
+	writel(itr, q_vector->itr);
+}
+
+static int fm10k_poll(struct napi_struct *napi, int budget)
+{
+	struct fm10k_q_vector *q_vector =
+			       container_of(napi, struct fm10k_q_vector, napi);
+	struct fm10k_ring *ring;
+	int per_ring_budget, work_done = 0;
+	bool clean_complete = true;
+
+	fm10k_for_each_ring(ring, q_vector->tx) {
+		if (!fm10k_clean_tx_irq(q_vector, ring, budget))
+			clean_complete = false;
+	}
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (budget <= 0)
+		return budget;
+
+	/* attempt to distribute budget to each queue fairly, but don't
+	 * allow the budget to go below 1 because we'll exit polling
+	 */
+	if (q_vector->rx.count > 1)
+		per_ring_budget = max(budget / q_vector->rx.count, 1);
+	else
+		per_ring_budget = budget;
+
+	fm10k_for_each_ring(ring, q_vector->rx) {
+		int work = fm10k_clean_rx_irq(q_vector, ring, per_ring_budget);
+
+		work_done += work;
+		if (work >= per_ring_budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		fm10k_qv_enable(q_vector);
+
+	return min(work_done, budget - 1);
+}
+
+/**
+ * fm10k_set_qos_queues: Allocate queues for a QOS-enabled device
+ * @interface: board private structure to initialize
+ *
+ * When QoS (Quality of Service) is enabled, allocate queues for
+ * each traffic class.  If multiqueue isn't available,then abort QoS
+ * initialization.
+ *
+ * This function handles all combinations of Qos and RSS.
+ *
+ **/
+static bool fm10k_set_qos_queues(struct fm10k_intfc *interface)
+{
+	struct net_device *dev = interface->netdev;
+	struct fm10k_ring_feature *f;
+	int rss_i, i;
+	int pcs;
+
+	/* Map queue offset and counts onto allocated tx queues */
+	pcs = netdev_get_num_tc(dev);
+
+	if (pcs <= 1)
+		return false;
+
+	/* set QoS mask and indices */
+	f = &interface->ring_feature[RING_F_QOS];
+	f->indices = pcs;
+	f->mask = BIT(fls(pcs - 1)) - 1;
+
+	/* determine the upper limit for our current DCB mode */
+	rss_i = interface->hw.mac.max_queues / pcs;
+	rss_i = BIT(fls(rss_i) - 1);
+
+	/* set RSS mask and indices */
+	f = &interface->ring_feature[RING_F_RSS];
+	rss_i = min_t(u16, rss_i, f->limit);
+	f->indices = rss_i;
+	f->mask = BIT(fls(rss_i - 1)) - 1;
+
+	/* configure pause class to queue mapping */
+	for (i = 0; i < pcs; i++)
+		netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
+
+	interface->num_rx_queues = rss_i * pcs;
+	interface->num_tx_queues = rss_i * pcs;
+
+	return true;
+}
+
+/**
+ * fm10k_set_rss_queues: Allocate queues for RSS
+ * @interface: board private structure to initialize
+ *
+ * This is our "base" multiqueue mode.  RSS (Receive Side Scaling) will try
+ * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
+ *
+ **/
+static bool fm10k_set_rss_queues(struct fm10k_intfc *interface)
+{
+	struct fm10k_ring_feature *f;
+	u16 rss_i;
+
+	f = &interface->ring_feature[RING_F_RSS];
+	rss_i = min_t(u16, interface->hw.mac.max_queues, f->limit);
+
+	/* record indices and power of 2 mask for RSS */
+	f->indices = rss_i;
+	f->mask = BIT(fls(rss_i - 1)) - 1;
+
+	interface->num_rx_queues = rss_i;
+	interface->num_tx_queues = rss_i;
+
+	return true;
+}
+
+/**
+ * fm10k_set_num_queues: Allocate queues for device, feature dependent
+ * @interface: board private structure to initialize
+ *
+ * This is the top level queue allocation routine.  The order here is very
+ * important, starting with the "most" number of features turned on at once,
+ * and ending with the smallest set of features.  This way large combinations
+ * can be allocated if they're turned on, and smaller combinations are the
+ * fall through conditions.
+ *
+ **/
+static void fm10k_set_num_queues(struct fm10k_intfc *interface)
+{
+	/* Attempt to setup QoS and RSS first */
+	if (fm10k_set_qos_queues(interface))
+		return;
+
+	/* If we don't have QoS, just fallback to only RSS. */
+	fm10k_set_rss_queues(interface);
+}
+
+/**
+ * fm10k_reset_num_queues - Reset the number of queues to zero
+ * @interface: board private structure
+ *
+ * This function should be called whenever we need to reset the number of
+ * queues after an error condition.
+ */
+static void fm10k_reset_num_queues(struct fm10k_intfc *interface)
+{
+	interface->num_tx_queues = 0;
+	interface->num_rx_queues = 0;
+	interface->num_q_vectors = 0;
+}
+
+/**
+ * fm10k_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @interface: board private structure to initialize
+ * @v_count: q_vectors allocated on interface, used for ring interleaving
+ * @v_idx: index of vector in interface struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
+				unsigned int v_count, unsigned int v_idx,
+				unsigned int txr_count, unsigned int txr_idx,
+				unsigned int rxr_count, unsigned int rxr_idx)
+{
+	struct fm10k_q_vector *q_vector;
+	struct fm10k_ring *ring;
+	int ring_count;
+
+	ring_count = txr_count + rxr_count;
+
+	/* allocate q_vector and rings */
+	q_vector = kzalloc(struct_size(q_vector, ring, ring_count), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(interface->netdev, &q_vector->napi, fm10k_poll);
+
+	/* tie q_vector and interface together */
+	interface->q_vector[v_idx] = q_vector;
+	q_vector->interface = interface;
+	q_vector->v_idx = v_idx;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	/* save Tx ring container info */
+	q_vector->tx.ring = ring;
+	q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK;
+	q_vector->tx.itr = interface->tx_itr;
+	q_vector->tx.itr_scale = interface->hw.mac.itr_scale;
+	q_vector->tx.count = txr_count;
+
+	while (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &interface->pdev->dev;
+		ring->netdev = interface->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* apply Tx specific ring traits */
+		ring->count = interface->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		/* assign ring to interface */
+		interface->tx_ring[txr_idx] = ring;
+
+		/* update count and index */
+		txr_count--;
+		txr_idx += v_count;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	/* save Rx ring container info */
+	q_vector->rx.ring = ring;
+	q_vector->rx.itr = interface->rx_itr;
+	q_vector->rx.itr_scale = interface->hw.mac.itr_scale;
+	q_vector->rx.count = rxr_count;
+
+	while (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &interface->pdev->dev;
+		ring->netdev = interface->netdev;
+		rcu_assign_pointer(ring->l2_accel, interface->l2_accel);
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* apply Rx specific ring traits */
+		ring->count = interface->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to interface */
+		interface->rx_ring[rxr_idx] = ring;
+
+		/* update count and index */
+		rxr_count--;
+		rxr_idx += v_count;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	fm10k_dbg_q_vector_init(q_vector);
+
+	return 0;
+}
+
+/**
+ * fm10k_free_q_vector - Free memory allocated for specific interrupt vector
+ * @interface: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void fm10k_free_q_vector(struct fm10k_intfc *interface, int v_idx)
+{
+	struct fm10k_q_vector *q_vector = interface->q_vector[v_idx];
+	struct fm10k_ring *ring;
+
+	fm10k_dbg_q_vector_exit(q_vector);
+
+	fm10k_for_each_ring(ring, q_vector->tx)
+		interface->tx_ring[ring->queue_index] = NULL;
+
+	fm10k_for_each_ring(ring, q_vector->rx)
+		interface->rx_ring[ring->queue_index] = NULL;
+
+	interface->q_vector[v_idx] = NULL;
+	netif_napi_del(&q_vector->napi);
+	kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * fm10k_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @interface: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int fm10k_alloc_q_vectors(struct fm10k_intfc *interface)
+{
+	unsigned int q_vectors = interface->num_q_vectors;
+	unsigned int rxr_remaining = interface->num_rx_queues;
+	unsigned int txr_remaining = interface->num_tx_queues;
+	unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
+						   0, 0, 1, rxr_idx);
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+		err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
+					   tqpv, txr_idx,
+					   rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	fm10k_reset_num_queues(interface);
+
+	while (v_idx--)
+		fm10k_free_q_vector(interface, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * fm10k_free_q_vectors - Free memory allocated for interrupt vectors
+ * @interface: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void fm10k_free_q_vectors(struct fm10k_intfc *interface)
+{
+	int v_idx = interface->num_q_vectors;
+
+	fm10k_reset_num_queues(interface);
+
+	while (v_idx--)
+		fm10k_free_q_vector(interface, v_idx);
+}
+
+/**
+ * fm10k_reset_msix_capability - reset MSI-X capability
+ * @interface: board private structure to initialize
+ *
+ * Reset the MSI-X capability back to its starting state
+ **/
+static void fm10k_reset_msix_capability(struct fm10k_intfc *interface)
+{
+	pci_disable_msix(interface->pdev);
+	kfree(interface->msix_entries);
+	interface->msix_entries = NULL;
+}
+
+/**
+ * fm10k_init_msix_capability - configure MSI-X capability
+ * @interface: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int fm10k_init_msix_capability(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	int v_budget, vector;
+
+	/* It's easy to be greedy for MSI-X vectors, but it really
+	 * doesn't do us much good if we have a lot more vectors
+	 * than CPU's.  So let's be conservative and only ask for
+	 * (roughly) the same number of vectors as there are CPU's.
+	 * the default is to use pairs of vectors
+	 */
+	v_budget = max(interface->num_rx_queues, interface->num_tx_queues);
+	v_budget = min_t(u16, v_budget, num_online_cpus());
+
+	/* account for vectors not related to queues */
+	v_budget += NON_Q_VECTORS;
+
+	/* At the same time, hardware can only support a maximum of
+	 * hw.mac->max_msix_vectors vectors.  With features
+	 * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
+	 * descriptor queues supported by our device.  Thus, we cap it off in
+	 * those rare cases where the cpu count also exceeds our vector limit.
+	 */
+	v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors);
+
+	/* A failure in MSI-X entry allocation is fatal. */
+	interface->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
+					  GFP_KERNEL);
+	if (!interface->msix_entries)
+		return -ENOMEM;
+
+	/* populate entry values */
+	for (vector = 0; vector < v_budget; vector++)
+		interface->msix_entries[vector].entry = vector;
+
+	/* Attempt to enable MSI-X with requested value */
+	v_budget = pci_enable_msix_range(interface->pdev,
+					 interface->msix_entries,
+					 MIN_MSIX_COUNT(hw),
+					 v_budget);
+	if (v_budget < 0) {
+		kfree(interface->msix_entries);
+		interface->msix_entries = NULL;
+		return v_budget;
+	}
+
+	/* record the number of queues available for q_vectors */
+	interface->num_q_vectors = v_budget - NON_Q_VECTORS;
+
+	return 0;
+}
+
+/**
+ * fm10k_cache_ring_qos - Descriptor ring to register mapping for QoS
+ * @interface: Interface structure continaining rings and devices
+ *
+ * Cache the descriptor ring offsets for Qos
+ **/
+static bool fm10k_cache_ring_qos(struct fm10k_intfc *interface)
+{
+	struct net_device *dev = interface->netdev;
+	int pc, offset, rss_i, i;
+	u16 pc_stride = interface->ring_feature[RING_F_QOS].mask + 1;
+	u8 num_pcs = netdev_get_num_tc(dev);
+
+	if (num_pcs <= 1)
+		return false;
+
+	rss_i = interface->ring_feature[RING_F_RSS].indices;
+
+	for (pc = 0, offset = 0; pc < num_pcs; pc++, offset += rss_i) {
+		int q_idx = pc;
+
+		for (i = 0; i < rss_i; i++) {
+			interface->tx_ring[offset + i]->reg_idx = q_idx;
+			interface->tx_ring[offset + i]->qos_pc = pc;
+			interface->rx_ring[offset + i]->reg_idx = q_idx;
+			interface->rx_ring[offset + i]->qos_pc = pc;
+			q_idx += pc_stride;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * fm10k_cache_ring_rss - Descriptor ring to register mapping for RSS
+ * @interface: Interface structure continaining rings and devices
+ *
+ * Cache the descriptor ring offsets for RSS
+ **/
+static void fm10k_cache_ring_rss(struct fm10k_intfc *interface)
+{
+	int i;
+
+	for (i = 0; i < interface->num_rx_queues; i++)
+		interface->rx_ring[i]->reg_idx = i;
+
+	for (i = 0; i < interface->num_tx_queues; i++)
+		interface->tx_ring[i]->reg_idx = i;
+}
+
+/**
+ * fm10k_assign_rings - Map rings to network devices
+ * @interface: Interface structure containing rings and devices
+ *
+ * This function is meant to go though and configure both the network
+ * devices so that they contain rings, and configure the rings so that
+ * they function with their network devices.
+ **/
+static void fm10k_assign_rings(struct fm10k_intfc *interface)
+{
+	if (fm10k_cache_ring_qos(interface))
+		return;
+
+	fm10k_cache_ring_rss(interface);
+}
+
+static void fm10k_init_reta(struct fm10k_intfc *interface)
+{
+	u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices;
+	u32 reta;
+
+	/* If the Rx flow indirection table has been configured manually, we
+	 * need to maintain it when possible.
+	 */
+	if (netif_is_rxfh_configured(interface->netdev)) {
+		for (i = FM10K_RETA_SIZE; i--;) {
+			reta = interface->reta[i];
+			if ((((reta << 24) >> 24) < rss_i) &&
+			    (((reta << 16) >> 24) < rss_i) &&
+			    (((reta <<  8) >> 24) < rss_i) &&
+			    (((reta)       >> 24) < rss_i))
+				continue;
+
+			/* this should never happen */
+			dev_err(&interface->pdev->dev,
+				"RSS indirection table assigned flows out of queue bounds. Reconfiguring.\n");
+			goto repopulate_reta;
+		}
+
+		/* do nothing if all of the elements are in bounds */
+		return;
+	}
+
+repopulate_reta:
+	fm10k_write_reta(interface, NULL);
+}
+
+/**
+ * fm10k_init_queueing_scheme - Determine proper queueing scheme
+ * @interface: board private structure to initialize
+ *
+ * We determine which queueing scheme to use based on...
+ * - Hardware queue count (num_*_queues)
+ *   - defined by miscellaneous hardware support/features (RSS, etc.)
+ **/
+int fm10k_init_queueing_scheme(struct fm10k_intfc *interface)
+{
+	int err;
+
+	/* Number of supported queues */
+	fm10k_set_num_queues(interface);
+
+	/* Configure MSI-X capability */
+	err = fm10k_init_msix_capability(interface);
+	if (err) {
+		dev_err(&interface->pdev->dev,
+			"Unable to initialize MSI-X capability\n");
+		goto err_init_msix;
+	}
+
+	/* Allocate memory for queues */
+	err = fm10k_alloc_q_vectors(interface);
+	if (err) {
+		dev_err(&interface->pdev->dev,
+			"Unable to allocate queue vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	/* Map rings to devices, and map devices to physical queues */
+	fm10k_assign_rings(interface);
+
+	/* Initialize RSS redirection table */
+	fm10k_init_reta(interface);
+
+	return 0;
+
+err_alloc_q_vectors:
+	fm10k_reset_msix_capability(interface);
+err_init_msix:
+	fm10k_reset_num_queues(interface);
+	return err;
+}
+
+/**
+ * fm10k_clear_queueing_scheme - Clear the current queueing scheme settings
+ * @interface: board private structure to clear queueing scheme on
+ *
+ * We go through and clear queueing specific resources and reset the structure
+ * to pre-load conditions
+ **/
+void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface)
+{
+	fm10k_free_q_vectors(interface);
+	fm10k_reset_msix_capability(interface);
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
new file mode 100644
index 0000000000..87fa5874f1
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -0,0 +1,2182 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include "fm10k_common.h"
+
+/**
+ *  fm10k_fifo_init - Initialize a message FIFO
+ *  @fifo: pointer to FIFO
+ *  @buffer: pointer to memory to be used to store FIFO
+ *  @size: maximum message size to store in FIFO, must be 2^n - 1
+ **/
+static void fm10k_fifo_init(struct fm10k_mbx_fifo *fifo, u32 *buffer, u16 size)
+{
+	fifo->buffer = buffer;
+	fifo->size = size;
+	fifo->head = 0;
+	fifo->tail = 0;
+}
+
+/**
+ *  fm10k_fifo_used - Retrieve used space in FIFO
+ *  @fifo: pointer to FIFO
+ *
+ *  This function returns the number of DWORDs used in the FIFO
+ **/
+static u16 fm10k_fifo_used(struct fm10k_mbx_fifo *fifo)
+{
+	return fifo->tail - fifo->head;
+}
+
+/**
+ *  fm10k_fifo_unused - Retrieve unused space in FIFO
+ *  @fifo: pointer to FIFO
+ *
+ *  This function returns the number of unused DWORDs in the FIFO
+ **/
+static u16 fm10k_fifo_unused(struct fm10k_mbx_fifo *fifo)
+{
+	return fifo->size + fifo->head - fifo->tail;
+}
+
+/**
+ *  fm10k_fifo_empty - Test to verify if FIFO is empty
+ *  @fifo: pointer to FIFO
+ *
+ *  This function returns true if the FIFO is empty, else false
+ **/
+static bool fm10k_fifo_empty(struct fm10k_mbx_fifo *fifo)
+{
+	return fifo->head == fifo->tail;
+}
+
+/**
+ *  fm10k_fifo_head_offset - returns indices of head with given offset
+ *  @fifo: pointer to FIFO
+ *  @offset: offset to add to head
+ *
+ *  This function returns the indices into the FIFO based on head + offset
+ **/
+static u16 fm10k_fifo_head_offset(struct fm10k_mbx_fifo *fifo, u16 offset)
+{
+	return (fifo->head + offset) & (fifo->size - 1);
+}
+
+/**
+ *  fm10k_fifo_tail_offset - returns indices of tail with given offset
+ *  @fifo: pointer to FIFO
+ *  @offset: offset to add to tail
+ *
+ *  This function returns the indices into the FIFO based on tail + offset
+ **/
+static u16 fm10k_fifo_tail_offset(struct fm10k_mbx_fifo *fifo, u16 offset)
+{
+	return (fifo->tail + offset) & (fifo->size - 1);
+}
+
+/**
+ *  fm10k_fifo_head_len - Retrieve length of first message in FIFO
+ *  @fifo: pointer to FIFO
+ *
+ *  This function returns the size of the first message in the FIFO
+ **/
+static u16 fm10k_fifo_head_len(struct fm10k_mbx_fifo *fifo)
+{
+	u32 *head = fifo->buffer + fm10k_fifo_head_offset(fifo, 0);
+
+	/* verify there is at least 1 DWORD in the fifo so *head is valid */
+	if (fm10k_fifo_empty(fifo))
+		return 0;
+
+	/* retieve the message length */
+	return FM10K_TLV_DWORD_LEN(*head);
+}
+
+/**
+ *  fm10k_fifo_head_drop - Drop the first message in FIFO
+ *  @fifo: pointer to FIFO
+ *
+ *  This function returns the size of the message dropped from the FIFO
+ **/
+static u16 fm10k_fifo_head_drop(struct fm10k_mbx_fifo *fifo)
+{
+	u16 len = fm10k_fifo_head_len(fifo);
+
+	/* update head so it is at the start of next frame */
+	fifo->head += len;
+
+	return len;
+}
+
+/**
+ *  fm10k_fifo_drop_all - Drop all messages in FIFO
+ *  @fifo: pointer to FIFO
+ *
+ *  This function resets the head pointer to drop all messages in the FIFO and
+ *  ensure the FIFO is empty.
+ **/
+static void fm10k_fifo_drop_all(struct fm10k_mbx_fifo *fifo)
+{
+	fifo->head = fifo->tail;
+}
+
+/**
+ *  fm10k_mbx_index_len - Convert a head/tail index into a length value
+ *  @mbx: pointer to mailbox
+ *  @head: head index
+ *  @tail: head index
+ *
+ *  This function takes the head and tail index and determines the length
+ *  of the data indicated by this pair.
+ **/
+static u16 fm10k_mbx_index_len(struct fm10k_mbx_info *mbx, u16 head, u16 tail)
+{
+	u16 len = tail - head;
+
+	/* we wrapped so subtract 2, one for index 0, one for all 1s index */
+	if (len > tail)
+		len -= 2;
+
+	return len & ((mbx->mbmem_len << 1) - 1);
+}
+
+/**
+ *  fm10k_mbx_tail_add - Determine new tail value with added offset
+ *  @mbx: pointer to mailbox
+ *  @offset: length to add to tail offset
+ *
+ *  This function takes the local tail index and recomputes it for
+ *  a given length added as an offset.
+ **/
+static u16 fm10k_mbx_tail_add(struct fm10k_mbx_info *mbx, u16 offset)
+{
+	u16 tail = (mbx->tail + offset + 1) & ((mbx->mbmem_len << 1) - 1);
+
+	/* add/sub 1 because we cannot have offset 0 or all 1s */
+	return (tail > mbx->tail) ? --tail : ++tail;
+}
+
+/**
+ *  fm10k_mbx_tail_sub - Determine new tail value with subtracted offset
+ *  @mbx: pointer to mailbox
+ *  @offset: length to add to tail offset
+ *
+ *  This function takes the local tail index and recomputes it for
+ *  a given length added as an offset.
+ **/
+static u16 fm10k_mbx_tail_sub(struct fm10k_mbx_info *mbx, u16 offset)
+{
+	u16 tail = (mbx->tail - offset - 1) & ((mbx->mbmem_len << 1) - 1);
+
+	/* sub/add 1 because we cannot have offset 0 or all 1s */
+	return (tail < mbx->tail) ? ++tail : --tail;
+}
+
+/**
+ *  fm10k_mbx_head_add - Determine new head value with added offset
+ *  @mbx: pointer to mailbox
+ *  @offset: length to add to head offset
+ *
+ *  This function takes the local head index and recomputes it for
+ *  a given length added as an offset.
+ **/
+static u16 fm10k_mbx_head_add(struct fm10k_mbx_info *mbx, u16 offset)
+{
+	u16 head = (mbx->head + offset + 1) & ((mbx->mbmem_len << 1) - 1);
+
+	/* add/sub 1 because we cannot have offset 0 or all 1s */
+	return (head > mbx->head) ? --head : ++head;
+}
+
+/**
+ *  fm10k_mbx_head_sub - Determine new head value with subtracted offset
+ *  @mbx: pointer to mailbox
+ *  @offset: length to add to head offset
+ *
+ *  This function takes the local head index and recomputes it for
+ *  a given length added as an offset.
+ **/
+static u16 fm10k_mbx_head_sub(struct fm10k_mbx_info *mbx, u16 offset)
+{
+	u16 head = (mbx->head - offset - 1) & ((mbx->mbmem_len << 1) - 1);
+
+	/* sub/add 1 because we cannot have offset 0 or all 1s */
+	return (head < mbx->head) ? ++head : --head;
+}
+
+/**
+ *  fm10k_mbx_pushed_tail_len - Retrieve the length of message being pushed
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will return the length of the message currently being
+ *  pushed onto the tail of the Rx queue.
+ **/
+static u16 fm10k_mbx_pushed_tail_len(struct fm10k_mbx_info *mbx)
+{
+	u32 *tail = mbx->rx.buffer + fm10k_fifo_tail_offset(&mbx->rx, 0);
+
+	/* pushed tail is only valid if pushed is set */
+	if (!mbx->pushed)
+		return 0;
+
+	return FM10K_TLV_DWORD_LEN(*tail);
+}
+
+/**
+ *  fm10k_fifo_write_copy - pulls data off of msg and places it in FIFO
+ *  @fifo: pointer to FIFO
+ *  @msg: message array to populate
+ *  @tail_offset: additional offset to add to tail pointer
+ *  @len: length of FIFO to copy into message header
+ *
+ *  This function will take a message and copy it into a section of the
+ *  FIFO.  In order to get something into a location other than just
+ *  the tail you can use tail_offset to adjust the pointer.
+ **/
+static void fm10k_fifo_write_copy(struct fm10k_mbx_fifo *fifo,
+				  const u32 *msg, u16 tail_offset, u16 len)
+{
+	u16 end = fm10k_fifo_tail_offset(fifo, tail_offset);
+	u32 *tail = fifo->buffer + end;
+
+	/* track when we should cross the end of the FIFO */
+	end = fifo->size - end;
+
+	/* copy end of message before start of message */
+	if (end < len)
+		memcpy(fifo->buffer, msg + end, (len - end) << 2);
+	else
+		end = len;
+
+	/* Copy remaining message into Tx FIFO */
+	memcpy(tail, msg, end << 2);
+}
+
+/**
+ *  fm10k_fifo_enqueue - Enqueues the message to the tail of the FIFO
+ *  @fifo: pointer to FIFO
+ *  @msg: message array to read
+ *
+ *  This function enqueues a message up to the size specified by the length
+ *  contained in the first DWORD of the message and will place at the tail
+ *  of the FIFO.  It will return 0 on success, or a negative value on error.
+ **/
+static s32 fm10k_fifo_enqueue(struct fm10k_mbx_fifo *fifo, const u32 *msg)
+{
+	u16 len = FM10K_TLV_DWORD_LEN(*msg);
+
+	/* verify parameters */
+	if (len > fifo->size)
+		return FM10K_MBX_ERR_SIZE;
+
+	/* verify there is room for the message */
+	if (len > fm10k_fifo_unused(fifo))
+		return FM10K_MBX_ERR_NO_SPACE;
+
+	/* Copy message into FIFO */
+	fm10k_fifo_write_copy(fifo, msg, 0, len);
+
+	/* memory barrier to guarantee FIFO is written before tail update */
+	wmb();
+
+	/* Update Tx FIFO tail */
+	fifo->tail += len;
+
+	return 0;
+}
+
+/**
+ *  fm10k_mbx_validate_msg_size - Validate incoming message based on size
+ *  @mbx: pointer to mailbox
+ *  @len: length of data pushed onto buffer
+ *
+ *  This function analyzes the frame and will return a non-zero value when
+ *  the start of a message larger than the mailbox is detected.
+ **/
+static u16 fm10k_mbx_validate_msg_size(struct fm10k_mbx_info *mbx, u16 len)
+{
+	struct fm10k_mbx_fifo *fifo = &mbx->rx;
+	u16 total_len = 0, msg_len;
+
+	/* length should include previous amounts pushed */
+	len += mbx->pushed;
+
+	/* offset in message is based off of current message size */
+	do {
+		u32 *msg;
+
+		msg = fifo->buffer + fm10k_fifo_tail_offset(fifo, total_len);
+		msg_len = FM10K_TLV_DWORD_LEN(*msg);
+		total_len += msg_len;
+	} while (total_len < len);
+
+	/* message extends out of pushed section, but fits in FIFO */
+	if ((len < total_len) && (msg_len <= mbx->max_size))
+		return 0;
+
+	/* return length of invalid section */
+	return (len < total_len) ? len : (len - total_len);
+}
+
+/**
+ *  fm10k_mbx_write_copy - pulls data off of Tx FIFO and places it in mbmem
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will take a section of the Tx FIFO and copy it into the
+ *  mailbox memory.  The offset in mbmem is based on the lower bits of the
+ *  tail and len determines the length to copy.
+ **/
+static void fm10k_mbx_write_copy(struct fm10k_hw *hw,
+				 struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_mbx_fifo *fifo = &mbx->tx;
+	u32 mbmem = mbx->mbmem_reg;
+	u32 *head = fifo->buffer;
+	u16 end, len, tail, mask;
+
+	if (!mbx->tail_len)
+		return;
+
+	/* determine data length and mbmem tail index */
+	mask = mbx->mbmem_len - 1;
+	len = mbx->tail_len;
+	tail = fm10k_mbx_tail_sub(mbx, len);
+	if (tail > mask)
+		tail++;
+
+	/* determine offset in the ring */
+	end = fm10k_fifo_head_offset(fifo, mbx->pulled);
+	head += end;
+
+	/* memory barrier to guarantee data is ready to be read */
+	rmb();
+
+	/* Copy message from Tx FIFO */
+	for (end = fifo->size - end; len; head = fifo->buffer) {
+		do {
+			/* adjust tail to match offset for FIFO */
+			tail &= mask;
+			if (!tail)
+				tail++;
+
+			mbx->tx_mbmem_pulled++;
+
+			/* write message to hardware FIFO */
+			fm10k_write_reg(hw, mbmem + tail++, *(head++));
+		} while (--len && --end);
+	}
+}
+
+/**
+ *  fm10k_mbx_pull_head - Pulls data off of head of Tx FIFO
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @head: acknowledgement number last received
+ *
+ *  This function will push the tail index forward based on the remote
+ *  head index.  It will then pull up to mbmem_len DWORDs off of the
+ *  head of the FIFO and will place it in the MBMEM registers
+ *  associated with the mailbox.
+ **/
+static void fm10k_mbx_pull_head(struct fm10k_hw *hw,
+				struct fm10k_mbx_info *mbx, u16 head)
+{
+	u16 mbmem_len, len, ack = fm10k_mbx_index_len(mbx, head, mbx->tail);
+	struct fm10k_mbx_fifo *fifo = &mbx->tx;
+
+	/* update number of bytes pulled and update bytes in transit */
+	mbx->pulled += mbx->tail_len - ack;
+
+	/* determine length of data to pull, reserve space for mbmem header */
+	mbmem_len = mbx->mbmem_len - 1;
+	len = fm10k_fifo_used(fifo) - mbx->pulled;
+	if (len > mbmem_len)
+		len = mbmem_len;
+
+	/* update tail and record number of bytes in transit */
+	mbx->tail = fm10k_mbx_tail_add(mbx, len - ack);
+	mbx->tail_len = len;
+
+	/* drop pulled messages from the FIFO */
+	for (len = fm10k_fifo_head_len(fifo);
+	     len && (mbx->pulled >= len);
+	     len = fm10k_fifo_head_len(fifo)) {
+		mbx->pulled -= fm10k_fifo_head_drop(fifo);
+		mbx->tx_messages++;
+		mbx->tx_dwords += len;
+	}
+
+	/* Copy message out from the Tx FIFO */
+	fm10k_mbx_write_copy(hw, mbx);
+}
+
+/**
+ *  fm10k_mbx_read_copy - pulls data off of mbmem and places it in Rx FIFO
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will take a section of the mailbox memory and copy it
+ *  into the Rx FIFO.  The offset is based on the lower bits of the
+ *  head and len determines the length to copy.
+ **/
+static void fm10k_mbx_read_copy(struct fm10k_hw *hw,
+				struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_mbx_fifo *fifo = &mbx->rx;
+	u32 mbmem = mbx->mbmem_reg ^ mbx->mbmem_len;
+	u32 *tail = fifo->buffer;
+	u16 end, len, head;
+
+	/* determine data length and mbmem head index */
+	len = mbx->head_len;
+	head = fm10k_mbx_head_sub(mbx, len);
+	if (head >= mbx->mbmem_len)
+		head++;
+
+	/* determine offset in the ring */
+	end = fm10k_fifo_tail_offset(fifo, mbx->pushed);
+	tail += end;
+
+	/* Copy message into Rx FIFO */
+	for (end = fifo->size - end; len; tail = fifo->buffer) {
+		do {
+			/* adjust head to match offset for FIFO */
+			head &= mbx->mbmem_len - 1;
+			if (!head)
+				head++;
+
+			mbx->rx_mbmem_pushed++;
+
+			/* read message from hardware FIFO */
+			*(tail++) = fm10k_read_reg(hw, mbmem + head++);
+		} while (--len && --end);
+	}
+
+	/* memory barrier to guarantee FIFO is written before tail update */
+	wmb();
+}
+
+/**
+ *  fm10k_mbx_push_tail - Pushes up to 15 DWORDs on to tail of FIFO
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @tail: tail index of message
+ *
+ *  This function will first validate the tail index and size for the
+ *  incoming message.  It then updates the acknowledgment number and
+ *  copies the data into the FIFO.  It will return the number of messages
+ *  dequeued on success and a negative value on error.
+ **/
+static s32 fm10k_mbx_push_tail(struct fm10k_hw *hw,
+			       struct fm10k_mbx_info *mbx,
+			       u16 tail)
+{
+	struct fm10k_mbx_fifo *fifo = &mbx->rx;
+	u16 len, seq = fm10k_mbx_index_len(mbx, mbx->head, tail);
+
+	/* determine length of data to push */
+	len = fm10k_fifo_unused(fifo) - mbx->pushed;
+	if (len > seq)
+		len = seq;
+
+	/* update head and record bytes received */
+	mbx->head = fm10k_mbx_head_add(mbx, len);
+	mbx->head_len = len;
+
+	/* nothing to do if there is no data */
+	if (!len)
+		return 0;
+
+	/* Copy msg into Rx FIFO */
+	fm10k_mbx_read_copy(hw, mbx);
+
+	/* determine if there are any invalid lengths in message */
+	if (fm10k_mbx_validate_msg_size(mbx, len))
+		return FM10K_MBX_ERR_SIZE;
+
+	/* Update pushed */
+	mbx->pushed += len;
+
+	/* flush any completed messages */
+	for (len = fm10k_mbx_pushed_tail_len(mbx);
+	     len && (mbx->pushed >= len);
+	     len = fm10k_mbx_pushed_tail_len(mbx)) {
+		fifo->tail += len;
+		mbx->pushed -= len;
+		mbx->rx_messages++;
+		mbx->rx_dwords += len;
+	}
+
+	return 0;
+}
+
+/* pre-generated data for generating the CRC based on the poly 0xAC9A. */
+static const u16 fm10k_crc_16b_table[256] = {
+	0x0000, 0x7956, 0xF2AC, 0x8BFA, 0xBC6D, 0xC53B, 0x4EC1, 0x3797,
+	0x21EF, 0x58B9, 0xD343, 0xAA15, 0x9D82, 0xE4D4, 0x6F2E, 0x1678,
+	0x43DE, 0x3A88, 0xB172, 0xC824, 0xFFB3, 0x86E5, 0x0D1F, 0x7449,
+	0x6231, 0x1B67, 0x909D, 0xE9CB, 0xDE5C, 0xA70A, 0x2CF0, 0x55A6,
+	0x87BC, 0xFEEA, 0x7510, 0x0C46, 0x3BD1, 0x4287, 0xC97D, 0xB02B,
+	0xA653, 0xDF05, 0x54FF, 0x2DA9, 0x1A3E, 0x6368, 0xE892, 0x91C4,
+	0xC462, 0xBD34, 0x36CE, 0x4F98, 0x780F, 0x0159, 0x8AA3, 0xF3F5,
+	0xE58D, 0x9CDB, 0x1721, 0x6E77, 0x59E0, 0x20B6, 0xAB4C, 0xD21A,
+	0x564D, 0x2F1B, 0xA4E1, 0xDDB7, 0xEA20, 0x9376, 0x188C, 0x61DA,
+	0x77A2, 0x0EF4, 0x850E, 0xFC58, 0xCBCF, 0xB299, 0x3963, 0x4035,
+	0x1593, 0x6CC5, 0xE73F, 0x9E69, 0xA9FE, 0xD0A8, 0x5B52, 0x2204,
+	0x347C, 0x4D2A, 0xC6D0, 0xBF86, 0x8811, 0xF147, 0x7ABD, 0x03EB,
+	0xD1F1, 0xA8A7, 0x235D, 0x5A0B, 0x6D9C, 0x14CA, 0x9F30, 0xE666,
+	0xF01E, 0x8948, 0x02B2, 0x7BE4, 0x4C73, 0x3525, 0xBEDF, 0xC789,
+	0x922F, 0xEB79, 0x6083, 0x19D5, 0x2E42, 0x5714, 0xDCEE, 0xA5B8,
+	0xB3C0, 0xCA96, 0x416C, 0x383A, 0x0FAD, 0x76FB, 0xFD01, 0x8457,
+	0xAC9A, 0xD5CC, 0x5E36, 0x2760, 0x10F7, 0x69A1, 0xE25B, 0x9B0D,
+	0x8D75, 0xF423, 0x7FD9, 0x068F, 0x3118, 0x484E, 0xC3B4, 0xBAE2,
+	0xEF44, 0x9612, 0x1DE8, 0x64BE, 0x5329, 0x2A7F, 0xA185, 0xD8D3,
+	0xCEAB, 0xB7FD, 0x3C07, 0x4551, 0x72C6, 0x0B90, 0x806A, 0xF93C,
+	0x2B26, 0x5270, 0xD98A, 0xA0DC, 0x974B, 0xEE1D, 0x65E7, 0x1CB1,
+	0x0AC9, 0x739F, 0xF865, 0x8133, 0xB6A4, 0xCFF2, 0x4408, 0x3D5E,
+	0x68F8, 0x11AE, 0x9A54, 0xE302, 0xD495, 0xADC3, 0x2639, 0x5F6F,
+	0x4917, 0x3041, 0xBBBB, 0xC2ED, 0xF57A, 0x8C2C, 0x07D6, 0x7E80,
+	0xFAD7, 0x8381, 0x087B, 0x712D, 0x46BA, 0x3FEC, 0xB416, 0xCD40,
+	0xDB38, 0xA26E, 0x2994, 0x50C2, 0x6755, 0x1E03, 0x95F9, 0xECAF,
+	0xB909, 0xC05F, 0x4BA5, 0x32F3, 0x0564, 0x7C32, 0xF7C8, 0x8E9E,
+	0x98E6, 0xE1B0, 0x6A4A, 0x131C, 0x248B, 0x5DDD, 0xD627, 0xAF71,
+	0x7D6B, 0x043D, 0x8FC7, 0xF691, 0xC106, 0xB850, 0x33AA, 0x4AFC,
+	0x5C84, 0x25D2, 0xAE28, 0xD77E, 0xE0E9, 0x99BF, 0x1245, 0x6B13,
+	0x3EB5, 0x47E3, 0xCC19, 0xB54F, 0x82D8, 0xFB8E, 0x7074, 0x0922,
+	0x1F5A, 0x660C, 0xEDF6, 0x94A0, 0xA337, 0xDA61, 0x519B, 0x28CD };
+
+/**
+ *  fm10k_crc_16b - Generate a 16 bit CRC for a region of 16 bit data
+ *  @data: pointer to data to process
+ *  @seed: seed value for CRC
+ *  @len: length measured in 16 bits words
+ *
+ *  This function will generate a CRC based on the polynomial 0xAC9A and
+ *  whatever value is stored in the seed variable.  Note that this
+ *  value inverts the local seed and the result in order to capture all
+ *  leading and trailing zeros.
+ */
+static u16 fm10k_crc_16b(const u32 *data, u16 seed, u16 len)
+{
+	u32 result = seed;
+
+	while (len--) {
+		result ^= *(data++);
+		result = (result >> 8) ^ fm10k_crc_16b_table[result & 0xFF];
+		result = (result >> 8) ^ fm10k_crc_16b_table[result & 0xFF];
+
+		if (!(len--))
+			break;
+
+		result = (result >> 8) ^ fm10k_crc_16b_table[result & 0xFF];
+		result = (result >> 8) ^ fm10k_crc_16b_table[result & 0xFF];
+	}
+
+	return (u16)result;
+}
+
+/**
+ *  fm10k_fifo_crc - generate a CRC based off of FIFO data
+ *  @fifo: pointer to FIFO
+ *  @offset: offset point for start of FIFO
+ *  @len: number of DWORDS words to process
+ *  @seed: seed value for CRC
+ *
+ *  This function generates a CRC for some region of the FIFO
+ **/
+static u16 fm10k_fifo_crc(struct fm10k_mbx_fifo *fifo, u16 offset,
+			  u16 len, u16 seed)
+{
+	u32 *data = fifo->buffer + offset;
+
+	/* track when we should cross the end of the FIFO */
+	offset = fifo->size - offset;
+
+	/* if we are in 2 blocks process the end of the FIFO first */
+	if (offset < len) {
+		seed = fm10k_crc_16b(data, seed, offset * 2);
+		data = fifo->buffer;
+		len -= offset;
+	}
+
+	/* process any remaining bits */
+	return fm10k_crc_16b(data, seed, len * 2);
+}
+
+/**
+ *  fm10k_mbx_update_local_crc - Update the local CRC for outgoing data
+ *  @mbx: pointer to mailbox
+ *  @head: head index provided by remote mailbox
+ *
+ *  This function will generate the CRC for all data from the end of the
+ *  last head update to the current one.  It uses the result of the
+ *  previous CRC as the seed for this update.  The result is stored in
+ *  mbx->local.
+ **/
+static void fm10k_mbx_update_local_crc(struct fm10k_mbx_info *mbx, u16 head)
+{
+	u16 len = mbx->tail_len - fm10k_mbx_index_len(mbx, head, mbx->tail);
+
+	/* determine the offset for the start of the region to be pulled */
+	head = fm10k_fifo_head_offset(&mbx->tx, mbx->pulled);
+
+	/* update local CRC to include all of the pulled data */
+	mbx->local = fm10k_fifo_crc(&mbx->tx, head, len, mbx->local);
+}
+
+/**
+ *  fm10k_mbx_verify_remote_crc - Verify the CRC is correct for current data
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will take all data that has been provided from the remote
+ *  end and generate a CRC for it.  This is stored in mbx->remote.  The
+ *  CRC for the header is then computed and if the result is non-zero this
+ *  is an error and we signal an error dropping all data and resetting the
+ *  connection.
+ */
+static s32 fm10k_mbx_verify_remote_crc(struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_mbx_fifo *fifo = &mbx->rx;
+	u16 len = mbx->head_len;
+	u16 offset = fm10k_fifo_tail_offset(fifo, mbx->pushed) - len;
+	u16 crc;
+
+	/* update the remote CRC if new data has been received */
+	if (len)
+		mbx->remote = fm10k_fifo_crc(fifo, offset, len, mbx->remote);
+
+	/* process the full header as we have to validate the CRC */
+	crc = fm10k_crc_16b(&mbx->mbx_hdr, mbx->remote, 1);
+
+	/* notify other end if we have a problem */
+	return crc ? FM10K_MBX_ERR_CRC : 0;
+}
+
+/**
+ *  fm10k_mbx_rx_ready - Indicates that a message is ready in the Rx FIFO
+ *  @mbx: pointer to mailbox
+ *
+ *  This function returns true if there is a message in the Rx FIFO to dequeue.
+ **/
+static bool fm10k_mbx_rx_ready(struct fm10k_mbx_info *mbx)
+{
+	u16 msg_size = fm10k_fifo_head_len(&mbx->rx);
+
+	return msg_size && (fm10k_fifo_used(&mbx->rx) >= msg_size);
+}
+
+/**
+ *  fm10k_mbx_tx_ready - Indicates that the mailbox is in state ready for Tx
+ *  @mbx: pointer to mailbox
+ *  @len: verify free space is >= this value
+ *
+ *  This function returns true if the mailbox is in a state ready to transmit.
+ **/
+static bool fm10k_mbx_tx_ready(struct fm10k_mbx_info *mbx, u16 len)
+{
+	u16 fifo_unused = fm10k_fifo_unused(&mbx->tx);
+
+	return (mbx->state == FM10K_STATE_OPEN) && (fifo_unused >= len);
+}
+
+/**
+ *  fm10k_mbx_tx_complete - Indicates that the Tx FIFO has been emptied
+ *  @mbx: pointer to mailbox
+ *
+ *  This function returns true if the Tx FIFO is empty.
+ **/
+static bool fm10k_mbx_tx_complete(struct fm10k_mbx_info *mbx)
+{
+	return fm10k_fifo_empty(&mbx->tx);
+}
+
+/**
+ *  fm10k_mbx_dequeue_rx - Dequeues the message from the head in the Rx FIFO
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function dequeues messages and hands them off to the TLV parser.
+ *  It will return the number of messages processed when called.
+ **/
+static u16 fm10k_mbx_dequeue_rx(struct fm10k_hw *hw,
+				struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_mbx_fifo *fifo = &mbx->rx;
+	s32 err;
+	u16 cnt;
+
+	/* parse Rx messages out of the Rx FIFO to empty it */
+	for (cnt = 0; !fm10k_fifo_empty(fifo); cnt++) {
+		err = fm10k_tlv_msg_parse(hw, fifo->buffer + fifo->head,
+					  mbx, mbx->msg_data);
+		if (err < 0)
+			mbx->rx_parse_err++;
+
+		fm10k_fifo_head_drop(fifo);
+	}
+
+	/* shift remaining bytes back to start of FIFO */
+	memmove(fifo->buffer, fifo->buffer + fifo->tail, mbx->pushed << 2);
+
+	/* shift head and tail based on the memory we moved */
+	fifo->tail -= fifo->head;
+	fifo->head = 0;
+
+	return cnt;
+}
+
+/**
+ *  fm10k_mbx_enqueue_tx - Enqueues the message to the tail of the Tx FIFO
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @msg: message array to read
+ *
+ *  This function enqueues a message up to the size specified by the length
+ *  contained in the first DWORD of the message and will place at the tail
+ *  of the FIFO.  It will return 0 on success, or a negative value on error.
+ **/
+static s32 fm10k_mbx_enqueue_tx(struct fm10k_hw *hw,
+				struct fm10k_mbx_info *mbx, const u32 *msg)
+{
+	u32 countdown = mbx->timeout;
+	s32 err;
+
+	switch (mbx->state) {
+	case FM10K_STATE_CLOSED:
+	case FM10K_STATE_DISCONNECT:
+		return FM10K_MBX_ERR_NO_MBX;
+	default:
+		break;
+	}
+
+	/* enqueue the message on the Tx FIFO */
+	err = fm10k_fifo_enqueue(&mbx->tx, msg);
+
+	/* if it failed give the FIFO a chance to drain */
+	while (err && countdown) {
+		countdown--;
+		udelay(mbx->udelay);
+		mbx->ops.process(hw, mbx);
+		err = fm10k_fifo_enqueue(&mbx->tx, msg);
+	}
+
+	/* if we failed treat the error */
+	if (err) {
+		mbx->timeout = 0;
+		mbx->tx_busy++;
+	}
+
+	/* begin processing message, ignore errors as this is just meant
+	 * to start the mailbox flow so we are not concerned if there
+	 * is a bad error, or the mailbox is already busy with a request
+	 */
+	if (!mbx->tail_len)
+		mbx->ops.process(hw, mbx);
+
+	return 0;
+}
+
+/**
+ *  fm10k_mbx_read - Copies the mbmem to local message buffer
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function copies the message from the mbmem to the message array
+ **/
+static s32 fm10k_mbx_read(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx)
+{
+	/* only allow one reader in here at a time */
+	if (mbx->mbx_hdr)
+		return FM10K_MBX_ERR_BUSY;
+
+	/* read to capture initial interrupt bits */
+	if (fm10k_read_reg(hw, mbx->mbx_reg) & FM10K_MBX_REQ_INTERRUPT)
+		mbx->mbx_lock = FM10K_MBX_ACK;
+
+	/* write back interrupt bits to clear */
+	fm10k_write_reg(hw, mbx->mbx_reg,
+			FM10K_MBX_REQ_INTERRUPT | FM10K_MBX_ACK_INTERRUPT);
+
+	/* read remote header */
+	mbx->mbx_hdr = fm10k_read_reg(hw, mbx->mbmem_reg ^ mbx->mbmem_len);
+
+	return 0;
+}
+
+/**
+ *  fm10k_mbx_write - Copies the local message buffer to mbmem
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function copies the message from the message array to mbmem
+ **/
+static void fm10k_mbx_write(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx)
+{
+	u32 mbmem = mbx->mbmem_reg;
+
+	/* write new msg header to notify recipient of change */
+	fm10k_write_reg(hw, mbmem, mbx->mbx_hdr);
+
+	/* write mailbox to send interrupt */
+	if (mbx->mbx_lock)
+		fm10k_write_reg(hw, mbx->mbx_reg, mbx->mbx_lock);
+
+	/* we no longer are using the header so free it */
+	mbx->mbx_hdr = 0;
+	mbx->mbx_lock = 0;
+}
+
+/**
+ *  fm10k_mbx_create_connect_hdr - Generate a connect mailbox header
+ *  @mbx: pointer to mailbox
+ *
+ *  This function returns a connection mailbox header
+ **/
+static void fm10k_mbx_create_connect_hdr(struct fm10k_mbx_info *mbx)
+{
+	mbx->mbx_lock |= FM10K_MBX_REQ;
+
+	mbx->mbx_hdr = FM10K_MSG_HDR_FIELD_SET(FM10K_MSG_CONNECT, TYPE) |
+		       FM10K_MSG_HDR_FIELD_SET(mbx->head, HEAD) |
+		       FM10K_MSG_HDR_FIELD_SET(mbx->rx.size - 1, CONNECT_SIZE);
+}
+
+/**
+ *  fm10k_mbx_create_data_hdr - Generate a data mailbox header
+ *  @mbx: pointer to mailbox
+ *
+ *  This function returns a data mailbox header
+ **/
+static void fm10k_mbx_create_data_hdr(struct fm10k_mbx_info *mbx)
+{
+	u32 hdr = FM10K_MSG_HDR_FIELD_SET(FM10K_MSG_DATA, TYPE) |
+		  FM10K_MSG_HDR_FIELD_SET(mbx->tail, TAIL) |
+		  FM10K_MSG_HDR_FIELD_SET(mbx->head, HEAD);
+	struct fm10k_mbx_fifo *fifo = &mbx->tx;
+	u16 crc;
+
+	if (mbx->tail_len)
+		mbx->mbx_lock |= FM10K_MBX_REQ;
+
+	/* generate CRC for data in flight and header */
+	crc = fm10k_fifo_crc(fifo, fm10k_fifo_head_offset(fifo, mbx->pulled),
+			     mbx->tail_len, mbx->local);
+	crc = fm10k_crc_16b(&hdr, crc, 1);
+
+	/* load header to memory to be written */
+	mbx->mbx_hdr = hdr | FM10K_MSG_HDR_FIELD_SET(crc, CRC);
+}
+
+/**
+ *  fm10k_mbx_create_disconnect_hdr - Generate a disconnect mailbox header
+ *  @mbx: pointer to mailbox
+ *
+ *  This function returns a disconnect mailbox header
+ **/
+static void fm10k_mbx_create_disconnect_hdr(struct fm10k_mbx_info *mbx)
+{
+	u32 hdr = FM10K_MSG_HDR_FIELD_SET(FM10K_MSG_DISCONNECT, TYPE) |
+		  FM10K_MSG_HDR_FIELD_SET(mbx->tail, TAIL) |
+		  FM10K_MSG_HDR_FIELD_SET(mbx->head, HEAD);
+	u16 crc = fm10k_crc_16b(&hdr, mbx->local, 1);
+
+	mbx->mbx_lock |= FM10K_MBX_ACK;
+
+	/* load header to memory to be written */
+	mbx->mbx_hdr = hdr | FM10K_MSG_HDR_FIELD_SET(crc, CRC);
+}
+
+/**
+ *  fm10k_mbx_create_fake_disconnect_hdr - Generate a false disconnect mbox hdr
+ *  @mbx: pointer to mailbox
+ *
+ *  This function creates a fake disconnect header for loading into remote
+ *  mailbox header. The primary purpose is to prevent errors on immediate
+ *  start up after mbx->connect.
+ **/
+static void fm10k_mbx_create_fake_disconnect_hdr(struct fm10k_mbx_info *mbx)
+{
+	u32 hdr = FM10K_MSG_HDR_FIELD_SET(FM10K_MSG_DISCONNECT, TYPE) |
+		  FM10K_MSG_HDR_FIELD_SET(mbx->head, TAIL) |
+		  FM10K_MSG_HDR_FIELD_SET(mbx->tail, HEAD);
+	u16 crc = fm10k_crc_16b(&hdr, mbx->local, 1);
+
+	mbx->mbx_lock |= FM10K_MBX_ACK;
+
+	/* load header to memory to be written */
+	mbx->mbx_hdr = hdr | FM10K_MSG_HDR_FIELD_SET(crc, CRC);
+}
+
+/**
+ *  fm10k_mbx_create_error_msg - Generate an error message
+ *  @mbx: pointer to mailbox
+ *  @err: local error encountered
+ *
+ *  This function will interpret the error provided by err, and based on
+ *  that it may shift the message by 1 DWORD and then place an error header
+ *  at the start of the message.
+ **/
+static void fm10k_mbx_create_error_msg(struct fm10k_mbx_info *mbx, s32 err)
+{
+	/* only generate an error message for these types */
+	switch (err) {
+	case FM10K_MBX_ERR_TAIL:
+	case FM10K_MBX_ERR_HEAD:
+	case FM10K_MBX_ERR_TYPE:
+	case FM10K_MBX_ERR_SIZE:
+	case FM10K_MBX_ERR_RSVD0:
+	case FM10K_MBX_ERR_CRC:
+		break;
+	default:
+		return;
+	}
+
+	mbx->mbx_lock |= FM10K_MBX_REQ;
+
+	mbx->mbx_hdr = FM10K_MSG_HDR_FIELD_SET(FM10K_MSG_ERROR, TYPE) |
+		       FM10K_MSG_HDR_FIELD_SET(err, ERR_NO) |
+		       FM10K_MSG_HDR_FIELD_SET(mbx->head, HEAD);
+}
+
+/**
+ *  fm10k_mbx_validate_msg_hdr - Validate common fields in the message header
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will parse up the fields in the mailbox header and return
+ *  an error if the header contains any of a number of invalid configurations
+ *  including unrecognized type, invalid route, or a malformed message.
+ **/
+static s32 fm10k_mbx_validate_msg_hdr(struct fm10k_mbx_info *mbx)
+{
+	u16 type, rsvd0, head, tail, size;
+	const u32 *hdr = &mbx->mbx_hdr;
+
+	type = FM10K_MSG_HDR_FIELD_GET(*hdr, TYPE);
+	rsvd0 = FM10K_MSG_HDR_FIELD_GET(*hdr, RSVD0);
+	tail = FM10K_MSG_HDR_FIELD_GET(*hdr, TAIL);
+	head = FM10K_MSG_HDR_FIELD_GET(*hdr, HEAD);
+	size = FM10K_MSG_HDR_FIELD_GET(*hdr, CONNECT_SIZE);
+
+	if (rsvd0)
+		return FM10K_MBX_ERR_RSVD0;
+
+	switch (type) {
+	case FM10K_MSG_DISCONNECT:
+		/* validate that all data has been received */
+		if (tail != mbx->head)
+			return FM10K_MBX_ERR_TAIL;
+
+		fallthrough;
+	case FM10K_MSG_DATA:
+		/* validate that head is moving correctly */
+		if (!head || (head == FM10K_MSG_HDR_MASK(HEAD)))
+			return FM10K_MBX_ERR_HEAD;
+		if (fm10k_mbx_index_len(mbx, head, mbx->tail) > mbx->tail_len)
+			return FM10K_MBX_ERR_HEAD;
+
+		/* validate that tail is moving correctly */
+		if (!tail || (tail == FM10K_MSG_HDR_MASK(TAIL)))
+			return FM10K_MBX_ERR_TAIL;
+		if (fm10k_mbx_index_len(mbx, mbx->head, tail) < mbx->mbmem_len)
+			break;
+
+		return FM10K_MBX_ERR_TAIL;
+	case FM10K_MSG_CONNECT:
+		/* validate size is in range and is power of 2 mask */
+		if ((size < FM10K_VFMBX_MSG_MTU) || (size & (size + 1)))
+			return FM10K_MBX_ERR_SIZE;
+
+		fallthrough;
+	case FM10K_MSG_ERROR:
+		if (!head || (head == FM10K_MSG_HDR_MASK(HEAD)))
+			return FM10K_MBX_ERR_HEAD;
+		/* neither create nor error include a tail offset */
+		if (tail)
+			return FM10K_MBX_ERR_TAIL;
+
+		break;
+	default:
+		return FM10K_MBX_ERR_TYPE;
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_mbx_create_reply - Generate reply based on state and remote head
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @head: acknowledgement number
+ *
+ *  This function will generate an outgoing message based on the current
+ *  mailbox state and the remote FIFO head.  It will return the length
+ *  of the outgoing message excluding header on success, and a negative value
+ *  on error.
+ **/
+static s32 fm10k_mbx_create_reply(struct fm10k_hw *hw,
+				  struct fm10k_mbx_info *mbx, u16 head)
+{
+	switch (mbx->state) {
+	case FM10K_STATE_OPEN:
+	case FM10K_STATE_DISCONNECT:
+		/* update our checksum for the outgoing data */
+		fm10k_mbx_update_local_crc(mbx, head);
+
+		/* as long as other end recognizes us keep sending data */
+		fm10k_mbx_pull_head(hw, mbx, head);
+
+		/* generate new header based on data */
+		if (mbx->tail_len || (mbx->state == FM10K_STATE_OPEN))
+			fm10k_mbx_create_data_hdr(mbx);
+		else
+			fm10k_mbx_create_disconnect_hdr(mbx);
+		break;
+	case FM10K_STATE_CONNECT:
+		/* send disconnect even if we aren't connected */
+		fm10k_mbx_create_connect_hdr(mbx);
+		break;
+	case FM10K_STATE_CLOSED:
+		/* generate new header based on data */
+		fm10k_mbx_create_disconnect_hdr(mbx);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_mbx_reset_work- Reset internal pointers for any pending work
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will reset all internal pointers so any work in progress
+ *  is dropped.  This call should occur every time we transition from the
+ *  open state to the connect state.
+ **/
+static void fm10k_mbx_reset_work(struct fm10k_mbx_info *mbx)
+{
+	u16 len, head, ack;
+
+	/* reset our outgoing max size back to Rx limits */
+	mbx->max_size = mbx->rx.size - 1;
+
+	/* update mbx->pulled to account for tail_len and ack */
+	head = FM10K_MSG_HDR_FIELD_GET(mbx->mbx_hdr, HEAD);
+	ack = fm10k_mbx_index_len(mbx, head, mbx->tail);
+	mbx->pulled += mbx->tail_len - ack;
+
+	/* now drop any messages which have started or finished transmitting */
+	while (fm10k_fifo_head_len(&mbx->tx) && mbx->pulled) {
+		len = fm10k_fifo_head_drop(&mbx->tx);
+		mbx->tx_dropped++;
+		if (mbx->pulled >= len)
+			mbx->pulled -= len;
+		else
+			mbx->pulled = 0;
+	}
+
+	/* just do a quick resysnc to start of message */
+	mbx->pushed = 0;
+	mbx->pulled = 0;
+	mbx->tail_len = 0;
+	mbx->head_len = 0;
+	mbx->rx.tail = 0;
+	mbx->rx.head = 0;
+}
+
+/**
+ *  fm10k_mbx_update_max_size - Update the max_size and drop any large messages
+ *  @mbx: pointer to mailbox
+ *  @size: new value for max_size
+ *
+ *  This function updates the max_size value and drops any outgoing messages
+ *  at the head of the Tx FIFO if they are larger than max_size. It does not
+ *  drop all messages, as this is too difficult to parse and remove them from
+ *  the FIFO. Instead, rely on the checking to ensure that messages larger
+ *  than max_size aren't pushed into the memory buffer.
+ **/
+static void fm10k_mbx_update_max_size(struct fm10k_mbx_info *mbx, u16 size)
+{
+	u16 len;
+
+	mbx->max_size = size;
+
+	/* flush any oversized messages from the queue */
+	for (len = fm10k_fifo_head_len(&mbx->tx);
+	     len > size;
+	     len = fm10k_fifo_head_len(&mbx->tx)) {
+		fm10k_fifo_head_drop(&mbx->tx);
+		mbx->tx_dropped++;
+	}
+}
+
+/**
+ *  fm10k_mbx_connect_reset - Reset following request for reset
+ *  @mbx: pointer to mailbox
+ *
+ *  This function resets the mailbox to either a disconnected state
+ *  or a connect state depending on the current mailbox state
+ **/
+static void fm10k_mbx_connect_reset(struct fm10k_mbx_info *mbx)
+{
+	/* just do a quick resysnc to start of frame */
+	fm10k_mbx_reset_work(mbx);
+
+	/* reset CRC seeds */
+	mbx->local = FM10K_MBX_CRC_SEED;
+	mbx->remote = FM10K_MBX_CRC_SEED;
+
+	/* we cannot exit connect until the size is good */
+	if (mbx->state == FM10K_STATE_OPEN)
+		mbx->state = FM10K_STATE_CONNECT;
+	else
+		mbx->state = FM10K_STATE_CLOSED;
+}
+
+/**
+ *  fm10k_mbx_process_connect - Process connect header
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will read an incoming connect header and reply with the
+ *  appropriate message.  It will return a value indicating the number of
+ *  data DWORDs on success, or will return a negative value on failure.
+ **/
+static s32 fm10k_mbx_process_connect(struct fm10k_hw *hw,
+				     struct fm10k_mbx_info *mbx)
+{
+	const enum fm10k_mbx_state state = mbx->state;
+	const u32 *hdr = &mbx->mbx_hdr;
+	u16 size, head;
+
+	/* we will need to pull all of the fields for verification */
+	size = FM10K_MSG_HDR_FIELD_GET(*hdr, CONNECT_SIZE);
+	head = FM10K_MSG_HDR_FIELD_GET(*hdr, HEAD);
+
+	switch (state) {
+	case FM10K_STATE_DISCONNECT:
+	case FM10K_STATE_OPEN:
+		/* reset any in-progress work */
+		fm10k_mbx_connect_reset(mbx);
+		break;
+	case FM10K_STATE_CONNECT:
+		/* we cannot exit connect until the size is good */
+		if (size > mbx->rx.size) {
+			mbx->max_size = mbx->rx.size - 1;
+		} else {
+			/* record the remote system requesting connection */
+			mbx->state = FM10K_STATE_OPEN;
+
+			fm10k_mbx_update_max_size(mbx, size);
+		}
+		break;
+	default:
+		break;
+	}
+
+	/* align our tail index to remote head index */
+	mbx->tail = head;
+
+	return fm10k_mbx_create_reply(hw, mbx, head);
+}
+
+/**
+ *  fm10k_mbx_process_data - Process data header
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will read an incoming data header and reply with the
+ *  appropriate message.  It will return a value indicating the number of
+ *  data DWORDs on success, or will return a negative value on failure.
+ **/
+static s32 fm10k_mbx_process_data(struct fm10k_hw *hw,
+				  struct fm10k_mbx_info *mbx)
+{
+	const u32 *hdr = &mbx->mbx_hdr;
+	u16 head, tail;
+	s32 err;
+
+	/* we will need to pull all of the fields for verification */
+	head = FM10K_MSG_HDR_FIELD_GET(*hdr, HEAD);
+	tail = FM10K_MSG_HDR_FIELD_GET(*hdr, TAIL);
+
+	/* if we are in connect just update our data and go */
+	if (mbx->state == FM10K_STATE_CONNECT) {
+		mbx->tail = head;
+		mbx->state = FM10K_STATE_OPEN;
+	}
+
+	/* abort on message size errors */
+	err = fm10k_mbx_push_tail(hw, mbx, tail);
+	if (err < 0)
+		return err;
+
+	/* verify the checksum on the incoming data */
+	err = fm10k_mbx_verify_remote_crc(mbx);
+	if (err)
+		return err;
+
+	/* process messages if we have received any */
+	fm10k_mbx_dequeue_rx(hw, mbx);
+
+	return fm10k_mbx_create_reply(hw, mbx, head);
+}
+
+/**
+ *  fm10k_mbx_process_disconnect - Process disconnect header
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will read an incoming disconnect header and reply with the
+ *  appropriate message.  It will return a value indicating the number of
+ *  data DWORDs on success, or will return a negative value on failure.
+ **/
+static s32 fm10k_mbx_process_disconnect(struct fm10k_hw *hw,
+					struct fm10k_mbx_info *mbx)
+{
+	const enum fm10k_mbx_state state = mbx->state;
+	const u32 *hdr = &mbx->mbx_hdr;
+	u16 head;
+	s32 err;
+
+	/* we will need to pull the header field for verification */
+	head = FM10K_MSG_HDR_FIELD_GET(*hdr, HEAD);
+
+	/* We should not be receiving disconnect if Rx is incomplete */
+	if (mbx->pushed)
+		return FM10K_MBX_ERR_TAIL;
+
+	/* we have already verified mbx->head == tail so we know this is 0 */
+	mbx->head_len = 0;
+
+	/* verify the checksum on the incoming header is correct */
+	err = fm10k_mbx_verify_remote_crc(mbx);
+	if (err)
+		return err;
+
+	switch (state) {
+	case FM10K_STATE_DISCONNECT:
+	case FM10K_STATE_OPEN:
+		/* state doesn't change if we still have work to do */
+		if (!fm10k_mbx_tx_complete(mbx))
+			break;
+
+		/* verify the head indicates we completed all transmits */
+		if (head != mbx->tail)
+			return FM10K_MBX_ERR_HEAD;
+
+		/* reset any in-progress work */
+		fm10k_mbx_connect_reset(mbx);
+		break;
+	default:
+		break;
+	}
+
+	return fm10k_mbx_create_reply(hw, mbx, head);
+}
+
+/**
+ *  fm10k_mbx_process_error - Process error header
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will read an incoming error header and reply with the
+ *  appropriate message.  It will return a value indicating the number of
+ *  data DWORDs on success, or will return a negative value on failure.
+ **/
+static s32 fm10k_mbx_process_error(struct fm10k_hw *hw,
+				   struct fm10k_mbx_info *mbx)
+{
+	const u32 *hdr = &mbx->mbx_hdr;
+	u16 head;
+
+	/* we will need to pull all of the fields for verification */
+	head = FM10K_MSG_HDR_FIELD_GET(*hdr, HEAD);
+
+	switch (mbx->state) {
+	case FM10K_STATE_OPEN:
+	case FM10K_STATE_DISCONNECT:
+		/* flush any uncompleted work */
+		fm10k_mbx_reset_work(mbx);
+
+		/* reset CRC seeds */
+		mbx->local = FM10K_MBX_CRC_SEED;
+		mbx->remote = FM10K_MBX_CRC_SEED;
+
+		/* reset tail index and size to prepare for reconnect */
+		mbx->tail = head;
+
+		/* if open then reset max_size and go back to connect */
+		if (mbx->state == FM10K_STATE_OPEN) {
+			mbx->state = FM10K_STATE_CONNECT;
+			break;
+		}
+
+		/* send a connect message to get data flowing again */
+		fm10k_mbx_create_connect_hdr(mbx);
+		return 0;
+	default:
+		break;
+	}
+
+	return fm10k_mbx_create_reply(hw, mbx, mbx->tail);
+}
+
+/**
+ *  fm10k_mbx_process - Process mailbox interrupt
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will process incoming mailbox events and generate mailbox
+ *  replies.  It will return a value indicating the number of DWORDs
+ *  transmitted excluding header on success or a negative value on error.
+ **/
+static s32 fm10k_mbx_process(struct fm10k_hw *hw,
+			     struct fm10k_mbx_info *mbx)
+{
+	s32 err;
+
+	/* we do not read mailbox if closed */
+	if (mbx->state == FM10K_STATE_CLOSED)
+		return 0;
+
+	/* copy data from mailbox */
+	err = fm10k_mbx_read(hw, mbx);
+	if (err)
+		return err;
+
+	/* validate type, source, and destination */
+	err = fm10k_mbx_validate_msg_hdr(mbx);
+	if (err < 0)
+		goto msg_err;
+
+	switch (FM10K_MSG_HDR_FIELD_GET(mbx->mbx_hdr, TYPE)) {
+	case FM10K_MSG_CONNECT:
+		err = fm10k_mbx_process_connect(hw, mbx);
+		break;
+	case FM10K_MSG_DATA:
+		err = fm10k_mbx_process_data(hw, mbx);
+		break;
+	case FM10K_MSG_DISCONNECT:
+		err = fm10k_mbx_process_disconnect(hw, mbx);
+		break;
+	case FM10K_MSG_ERROR:
+		err = fm10k_mbx_process_error(hw, mbx);
+		break;
+	default:
+		err = FM10K_MBX_ERR_TYPE;
+		break;
+	}
+
+msg_err:
+	/* notify partner of errors on our end */
+	if (err < 0)
+		fm10k_mbx_create_error_msg(mbx, err);
+
+	/* copy data from mailbox */
+	fm10k_mbx_write(hw, mbx);
+
+	return err;
+}
+
+/**
+ *  fm10k_mbx_disconnect - Shutdown mailbox connection
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will shut down the mailbox.  It places the mailbox first
+ *  in the disconnect state, it then allows up to a predefined timeout for
+ *  the mailbox to transition to close on its own.  If this does not occur
+ *  then the mailbox will be forced into the closed state.
+ *
+ *  Any mailbox transactions not completed before calling this function
+ *  are not guaranteed to complete and may be dropped.
+ **/
+static void fm10k_mbx_disconnect(struct fm10k_hw *hw,
+				 struct fm10k_mbx_info *mbx)
+{
+	int timeout = mbx->timeout ? FM10K_MBX_DISCONNECT_TIMEOUT : 0;
+
+	/* Place mbx in ready to disconnect state */
+	mbx->state = FM10K_STATE_DISCONNECT;
+
+	/* trigger interrupt to start shutdown process */
+	fm10k_write_reg(hw, mbx->mbx_reg, FM10K_MBX_REQ |
+					  FM10K_MBX_INTERRUPT_DISABLE);
+	do {
+		udelay(FM10K_MBX_POLL_DELAY);
+		mbx->ops.process(hw, mbx);
+		timeout -= FM10K_MBX_POLL_DELAY;
+	} while ((timeout > 0) && (mbx->state != FM10K_STATE_CLOSED));
+
+	/* in case we didn't close, just force the mailbox into shutdown and
+	 * drop all left over messages in the FIFO.
+	 */
+	fm10k_mbx_connect_reset(mbx);
+	fm10k_fifo_drop_all(&mbx->tx);
+
+	fm10k_write_reg(hw, mbx->mbmem_reg, 0);
+}
+
+/**
+ *  fm10k_mbx_connect - Start mailbox connection
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will initiate a mailbox connection.  It will populate the
+ *  mailbox with a broadcast connect message and then initialize the lock.
+ *  This is safe since the connect message is a single DWORD so the mailbox
+ *  transaction is guaranteed to be atomic.
+ *
+ *  This function will return an error if the mailbox has not been initiated
+ *  or is currently in use.
+ **/
+static s32 fm10k_mbx_connect(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx)
+{
+	/* we cannot connect an uninitialized mailbox */
+	if (!mbx->rx.buffer)
+		return FM10K_MBX_ERR_NO_SPACE;
+
+	/* we cannot connect an already connected mailbox */
+	if (mbx->state != FM10K_STATE_CLOSED)
+		return FM10K_MBX_ERR_BUSY;
+
+	/* mailbox timeout can now become active */
+	mbx->timeout = FM10K_MBX_INIT_TIMEOUT;
+
+	/* Place mbx in ready to connect state */
+	mbx->state = FM10K_STATE_CONNECT;
+
+	fm10k_mbx_reset_work(mbx);
+
+	/* initialize header of remote mailbox */
+	fm10k_mbx_create_fake_disconnect_hdr(mbx);
+	fm10k_write_reg(hw, mbx->mbmem_reg ^ mbx->mbmem_len, mbx->mbx_hdr);
+
+	/* enable interrupt and notify other party of new message */
+	mbx->mbx_lock = FM10K_MBX_REQ_INTERRUPT | FM10K_MBX_ACK_INTERRUPT |
+			FM10K_MBX_INTERRUPT_ENABLE;
+
+	/* generate and load connect header into mailbox */
+	fm10k_mbx_create_connect_hdr(mbx);
+	fm10k_mbx_write(hw, mbx);
+
+	return 0;
+}
+
+/**
+ *  fm10k_mbx_validate_handlers - Validate layout of message parsing data
+ *  @msg_data: handlers for mailbox events
+ *
+ *  This function validates the layout of the message parsing data.  This
+ *  should be mostly static, but it is important to catch any errors that
+ *  are made when constructing the parsers.
+ **/
+static s32 fm10k_mbx_validate_handlers(const struct fm10k_msg_data *msg_data)
+{
+	const struct fm10k_tlv_attr *attr;
+	unsigned int id;
+
+	/* Allow NULL mailboxes that transmit but don't receive */
+	if (!msg_data)
+		return 0;
+
+	while (msg_data->id != FM10K_TLV_ERROR) {
+		/* all messages should have a function handler */
+		if (!msg_data->func)
+			return FM10K_ERR_PARAM;
+
+		/* parser is optional */
+		attr = msg_data->attr;
+		if (attr) {
+			while (attr->id != FM10K_TLV_ERROR) {
+				id = attr->id;
+				attr++;
+				/* ID should always be increasing */
+				if (id >= attr->id)
+					return FM10K_ERR_PARAM;
+				/* ID should fit in results array */
+				if (id >= FM10K_TLV_RESULTS_MAX)
+					return FM10K_ERR_PARAM;
+			}
+
+			/* verify terminator is in the list */
+			if (attr->id != FM10K_TLV_ERROR)
+				return FM10K_ERR_PARAM;
+		}
+
+		id = msg_data->id;
+		msg_data++;
+		/* ID should always be increasing */
+		if (id >= msg_data->id)
+			return FM10K_ERR_PARAM;
+	}
+
+	/* verify terminator is in the list */
+	if ((msg_data->id != FM10K_TLV_ERROR) || !msg_data->func)
+		return FM10K_ERR_PARAM;
+
+	return 0;
+}
+
+/**
+ *  fm10k_mbx_register_handlers - Register a set of handler ops for mailbox
+ *  @mbx: pointer to mailbox
+ *  @msg_data: handlers for mailbox events
+ *
+ *  This function associates a set of message handling ops with a mailbox.
+ **/
+static s32 fm10k_mbx_register_handlers(struct fm10k_mbx_info *mbx,
+				       const struct fm10k_msg_data *msg_data)
+{
+	/* validate layout of handlers before assigning them */
+	if (fm10k_mbx_validate_handlers(msg_data))
+		return FM10K_ERR_PARAM;
+
+	/* initialize the message handlers */
+	mbx->msg_data = msg_data;
+
+	return 0;
+}
+
+/**
+ *  fm10k_pfvf_mbx_init - Initialize mailbox memory for PF/VF mailbox
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @msg_data: handlers for mailbox events
+ *  @id: ID reference for PF as it supports up to 64 PF/VF mailboxes
+ *
+ *  This function initializes the mailbox for use.  It will split the
+ *  buffer provided and use that to populate both the Tx and Rx FIFO by
+ *  evenly splitting it.  In order to allow for easy masking of head/tail
+ *  the value reported in size must be a power of 2 and is reported in
+ *  DWORDs, not bytes.  Any invalid values will cause the mailbox to return
+ *  error.
+ **/
+s32 fm10k_pfvf_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx,
+			const struct fm10k_msg_data *msg_data, u8 id)
+{
+	/* initialize registers */
+	switch (hw->mac.type) {
+	case fm10k_mac_vf:
+		mbx->mbx_reg = FM10K_VFMBX;
+		mbx->mbmem_reg = FM10K_VFMBMEM(FM10K_VFMBMEM_VF_XOR);
+		break;
+	case fm10k_mac_pf:
+		/* there are only 64 VF <-> PF mailboxes */
+		if (id < 64) {
+			mbx->mbx_reg = FM10K_MBX(id);
+			mbx->mbmem_reg = FM10K_MBMEM_VF(id, 0);
+			break;
+		}
+		fallthrough;
+	default:
+		return FM10K_MBX_ERR_NO_MBX;
+	}
+
+	/* start out in closed state */
+	mbx->state = FM10K_STATE_CLOSED;
+
+	/* validate layout of handlers before assigning them */
+	if (fm10k_mbx_validate_handlers(msg_data))
+		return FM10K_ERR_PARAM;
+
+	/* initialize the message handlers */
+	mbx->msg_data = msg_data;
+
+	/* start mailbox as timed out and let the reset_hw call
+	 * set the timeout value to begin communications
+	 */
+	mbx->timeout = 0;
+	mbx->udelay = FM10K_MBX_INIT_DELAY;
+
+	/* initialize tail and head */
+	mbx->tail = 1;
+	mbx->head = 1;
+
+	/* initialize CRC seeds */
+	mbx->local = FM10K_MBX_CRC_SEED;
+	mbx->remote = FM10K_MBX_CRC_SEED;
+
+	/* Split buffer for use by Tx/Rx FIFOs */
+	mbx->max_size = FM10K_MBX_MSG_MAX_SIZE;
+	mbx->mbmem_len = FM10K_VFMBMEM_VF_XOR;
+
+	/* initialize the FIFOs, sizes are in 4 byte increments */
+	fm10k_fifo_init(&mbx->tx, mbx->buffer, FM10K_MBX_TX_BUFFER_SIZE);
+	fm10k_fifo_init(&mbx->rx, &mbx->buffer[FM10K_MBX_TX_BUFFER_SIZE],
+			FM10K_MBX_RX_BUFFER_SIZE);
+
+	/* initialize function pointers */
+	mbx->ops.connect = fm10k_mbx_connect;
+	mbx->ops.disconnect = fm10k_mbx_disconnect;
+	mbx->ops.rx_ready = fm10k_mbx_rx_ready;
+	mbx->ops.tx_ready = fm10k_mbx_tx_ready;
+	mbx->ops.tx_complete = fm10k_mbx_tx_complete;
+	mbx->ops.enqueue_tx = fm10k_mbx_enqueue_tx;
+	mbx->ops.process = fm10k_mbx_process;
+	mbx->ops.register_handlers = fm10k_mbx_register_handlers;
+
+	return 0;
+}
+
+/**
+ *  fm10k_sm_mbx_create_data_hdr - Generate a mailbox header for local FIFO
+ *  @mbx: pointer to mailbox
+ *
+ *  This function returns a data mailbox header
+ **/
+static void fm10k_sm_mbx_create_data_hdr(struct fm10k_mbx_info *mbx)
+{
+	if (mbx->tail_len)
+		mbx->mbx_lock |= FM10K_MBX_REQ;
+
+	mbx->mbx_hdr = FM10K_MSG_HDR_FIELD_SET(mbx->tail, SM_TAIL) |
+		       FM10K_MSG_HDR_FIELD_SET(mbx->remote, SM_VER) |
+		       FM10K_MSG_HDR_FIELD_SET(mbx->head, SM_HEAD);
+}
+
+/**
+ *  fm10k_sm_mbx_create_connect_hdr - Generate a mailbox header for local FIFO
+ *  @mbx: pointer to mailbox
+ *  @err: error flags to report if any
+ *
+ *  This function returns a connection mailbox header
+ **/
+static void fm10k_sm_mbx_create_connect_hdr(struct fm10k_mbx_info *mbx, u8 err)
+{
+	if (mbx->local)
+		mbx->mbx_lock |= FM10K_MBX_REQ;
+
+	mbx->mbx_hdr = FM10K_MSG_HDR_FIELD_SET(mbx->tail, SM_TAIL) |
+		       FM10K_MSG_HDR_FIELD_SET(mbx->remote, SM_VER) |
+		       FM10K_MSG_HDR_FIELD_SET(mbx->head, SM_HEAD) |
+		       FM10K_MSG_HDR_FIELD_SET(err, SM_ERR);
+}
+
+/**
+ *  fm10k_sm_mbx_connect_reset - Reset following request for reset
+ *  @mbx: pointer to mailbox
+ *
+ *  This function resets the mailbox to a just connected state
+ **/
+static void fm10k_sm_mbx_connect_reset(struct fm10k_mbx_info *mbx)
+{
+	/* flush any uncompleted work */
+	fm10k_mbx_reset_work(mbx);
+
+	/* set local version to max and remote version to 0 */
+	mbx->local = FM10K_SM_MBX_VERSION;
+	mbx->remote = 0;
+
+	/* initialize tail and head */
+	mbx->tail = 1;
+	mbx->head = 1;
+
+	/* reset state back to connect */
+	mbx->state = FM10K_STATE_CONNECT;
+}
+
+/**
+ *  fm10k_sm_mbx_connect - Start switch manager mailbox connection
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will initiate a mailbox connection with the switch
+ *  manager.  To do this it will first disconnect the mailbox, and then
+ *  reconnect it in order to complete a reset of the mailbox.
+ *
+ *  This function will return an error if the mailbox has not been initiated
+ *  or is currently in use.
+ **/
+static s32 fm10k_sm_mbx_connect(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx)
+{
+	/* we cannot connect an uninitialized mailbox */
+	if (!mbx->rx.buffer)
+		return FM10K_MBX_ERR_NO_SPACE;
+
+	/* we cannot connect an already connected mailbox */
+	if (mbx->state != FM10K_STATE_CLOSED)
+		return FM10K_MBX_ERR_BUSY;
+
+	/* mailbox timeout can now become active */
+	mbx->timeout = FM10K_MBX_INIT_TIMEOUT;
+
+	/* Place mbx in ready to connect state */
+	mbx->state = FM10K_STATE_CONNECT;
+	mbx->max_size = FM10K_MBX_MSG_MAX_SIZE;
+
+	/* reset interface back to connect */
+	fm10k_sm_mbx_connect_reset(mbx);
+
+	/* enable interrupt and notify other party of new message */
+	mbx->mbx_lock = FM10K_MBX_REQ_INTERRUPT | FM10K_MBX_ACK_INTERRUPT |
+			FM10K_MBX_INTERRUPT_ENABLE;
+
+	/* generate and load connect header into mailbox */
+	fm10k_sm_mbx_create_connect_hdr(mbx, 0);
+	fm10k_mbx_write(hw, mbx);
+
+	return 0;
+}
+
+/**
+ *  fm10k_sm_mbx_disconnect - Shutdown mailbox connection
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will shut down the mailbox.  It places the mailbox first
+ *  in the disconnect state, it then allows up to a predefined timeout for
+ *  the mailbox to transition to close on its own.  If this does not occur
+ *  then the mailbox will be forced into the closed state.
+ *
+ *  Any mailbox transactions not completed before calling this function
+ *  are not guaranteed to complete and may be dropped.
+ **/
+static void fm10k_sm_mbx_disconnect(struct fm10k_hw *hw,
+				    struct fm10k_mbx_info *mbx)
+{
+	int timeout = mbx->timeout ? FM10K_MBX_DISCONNECT_TIMEOUT : 0;
+
+	/* Place mbx in ready to disconnect state */
+	mbx->state = FM10K_STATE_DISCONNECT;
+
+	/* trigger interrupt to start shutdown process */
+	fm10k_write_reg(hw, mbx->mbx_reg, FM10K_MBX_REQ |
+					  FM10K_MBX_INTERRUPT_DISABLE);
+	do {
+		udelay(FM10K_MBX_POLL_DELAY);
+		mbx->ops.process(hw, mbx);
+		timeout -= FM10K_MBX_POLL_DELAY;
+	} while ((timeout > 0) && (mbx->state != FM10K_STATE_CLOSED));
+
+	/* in case we didn't close just force the mailbox into shutdown */
+	mbx->state = FM10K_STATE_CLOSED;
+	mbx->remote = 0;
+	fm10k_mbx_reset_work(mbx);
+	fm10k_fifo_drop_all(&mbx->tx);
+
+	fm10k_write_reg(hw, mbx->mbmem_reg, 0);
+}
+
+/**
+ *  fm10k_sm_mbx_validate_fifo_hdr - Validate fields in the remote FIFO header
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will parse up the fields in the mailbox header and return
+ *  an error if the header contains any of a number of invalid configurations
+ *  including unrecognized offsets or version numbers.
+ **/
+static s32 fm10k_sm_mbx_validate_fifo_hdr(struct fm10k_mbx_info *mbx)
+{
+	const u32 *hdr = &mbx->mbx_hdr;
+	u16 tail, head, ver;
+
+	tail = FM10K_MSG_HDR_FIELD_GET(*hdr, SM_TAIL);
+	ver = FM10K_MSG_HDR_FIELD_GET(*hdr, SM_VER);
+	head = FM10K_MSG_HDR_FIELD_GET(*hdr, SM_HEAD);
+
+	switch (ver) {
+	case 0:
+		break;
+	case FM10K_SM_MBX_VERSION:
+		if (!head || head > FM10K_SM_MBX_FIFO_LEN)
+			return FM10K_MBX_ERR_HEAD;
+		if (!tail || tail > FM10K_SM_MBX_FIFO_LEN)
+			return FM10K_MBX_ERR_TAIL;
+		if (mbx->tail < head)
+			head += mbx->mbmem_len - 1;
+		if (tail < mbx->head)
+			tail += mbx->mbmem_len - 1;
+		if (fm10k_mbx_index_len(mbx, head, mbx->tail) > mbx->tail_len)
+			return FM10K_MBX_ERR_HEAD;
+		if (fm10k_mbx_index_len(mbx, mbx->head, tail) < mbx->mbmem_len)
+			break;
+		return FM10K_MBX_ERR_TAIL;
+	default:
+		return FM10K_MBX_ERR_SRC;
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_sm_mbx_process_error - Process header with error flag set
+ *  @mbx: pointer to mailbox
+ *
+ *  This function is meant to respond to a request where the error flag
+ *  is set.  As a result we will terminate a connection if one is present
+ *  and fall back into the reset state with a connection header of version
+ *  0 (RESET).
+ **/
+static void fm10k_sm_mbx_process_error(struct fm10k_mbx_info *mbx)
+{
+	const enum fm10k_mbx_state state = mbx->state;
+
+	switch (state) {
+	case FM10K_STATE_DISCONNECT:
+		/* if there is an error just disconnect */
+		mbx->remote = 0;
+		break;
+	case FM10K_STATE_OPEN:
+		/* flush any uncompleted work */
+		fm10k_sm_mbx_connect_reset(mbx);
+		break;
+	case FM10K_STATE_CONNECT:
+		/* try connecting at lower version */
+		if (mbx->remote) {
+			while (mbx->local > 1)
+				mbx->local--;
+			mbx->remote = 0;
+		}
+		break;
+	default:
+		break;
+	}
+
+	fm10k_sm_mbx_create_connect_hdr(mbx, 0);
+}
+
+/**
+ *  fm10k_sm_mbx_create_error_msg - Process an error in FIFO header
+ *  @mbx: pointer to mailbox
+ *  @err: local error encountered
+ *
+ *  This function will interpret the error provided by err, and based on
+ *  that it may set the error bit in the local message header
+ **/
+static void fm10k_sm_mbx_create_error_msg(struct fm10k_mbx_info *mbx, s32 err)
+{
+	/* only generate an error message for these types */
+	switch (err) {
+	case FM10K_MBX_ERR_TAIL:
+	case FM10K_MBX_ERR_HEAD:
+	case FM10K_MBX_ERR_SRC:
+	case FM10K_MBX_ERR_SIZE:
+	case FM10K_MBX_ERR_RSVD0:
+		break;
+	default:
+		return;
+	}
+
+	/* process it as though we received an error, and send error reply */
+	fm10k_sm_mbx_process_error(mbx);
+	fm10k_sm_mbx_create_connect_hdr(mbx, 1);
+}
+
+/**
+ *  fm10k_sm_mbx_receive - Take message from Rx mailbox FIFO and put it in Rx
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @tail: tail index of message
+ *
+ *  This function will dequeue one message from the Rx switch manager mailbox
+ *  FIFO and place it in the Rx mailbox FIFO for processing by software.
+ **/
+static s32 fm10k_sm_mbx_receive(struct fm10k_hw *hw,
+				struct fm10k_mbx_info *mbx,
+				u16 tail)
+{
+	/* reduce length by 1 to convert to a mask */
+	u16 mbmem_len = mbx->mbmem_len - 1;
+	s32 err;
+
+	/* push tail in front of head */
+	if (tail < mbx->head)
+		tail += mbmem_len;
+
+	/* copy data to the Rx FIFO */
+	err = fm10k_mbx_push_tail(hw, mbx, tail);
+	if (err < 0)
+		return err;
+
+	/* process messages if we have received any */
+	fm10k_mbx_dequeue_rx(hw, mbx);
+
+	/* guarantee head aligns with the end of the last message */
+	mbx->head = fm10k_mbx_head_sub(mbx, mbx->pushed);
+	mbx->pushed = 0;
+
+	/* clear any extra bits left over since index adds 1 extra bit */
+	if (mbx->head > mbmem_len)
+		mbx->head -= mbmem_len;
+
+	return err;
+}
+
+/**
+ *  fm10k_sm_mbx_transmit - Take message from Tx and put it in Tx mailbox FIFO
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @head: head index of message
+ *
+ *  This function will dequeue one message from the Tx mailbox FIFO and place
+ *  it in the Tx switch manager mailbox FIFO for processing by hardware.
+ **/
+static void fm10k_sm_mbx_transmit(struct fm10k_hw *hw,
+				  struct fm10k_mbx_info *mbx, u16 head)
+{
+	struct fm10k_mbx_fifo *fifo = &mbx->tx;
+	/* reduce length by 1 to convert to a mask */
+	u16 mbmem_len = mbx->mbmem_len - 1;
+	u16 tail_len, len = 0;
+
+	/* push head behind tail */
+	if (mbx->tail < head)
+		head += mbmem_len;
+
+	fm10k_mbx_pull_head(hw, mbx, head);
+
+	/* determine msg aligned offset for end of buffer */
+	do {
+		u32 *msg;
+
+		msg = fifo->buffer + fm10k_fifo_head_offset(fifo, len);
+		tail_len = len;
+		len += FM10K_TLV_DWORD_LEN(*msg);
+	} while ((len <= mbx->tail_len) && (len < mbmem_len));
+
+	/* guarantee we stop on a message boundary */
+	if (mbx->tail_len > tail_len) {
+		mbx->tail = fm10k_mbx_tail_sub(mbx, mbx->tail_len - tail_len);
+		mbx->tail_len = tail_len;
+	}
+
+	/* clear any extra bits left over since index adds 1 extra bit */
+	if (mbx->tail > mbmem_len)
+		mbx->tail -= mbmem_len;
+}
+
+/**
+ *  fm10k_sm_mbx_create_reply - Generate reply based on state and remote head
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @head: acknowledgement number
+ *
+ *  This function will generate an outgoing message based on the current
+ *  mailbox state and the remote FIFO head.  It will return the length
+ *  of the outgoing message excluding header on success, and a negative value
+ *  on error.
+ **/
+static void fm10k_sm_mbx_create_reply(struct fm10k_hw *hw,
+				      struct fm10k_mbx_info *mbx, u16 head)
+{
+	switch (mbx->state) {
+	case FM10K_STATE_OPEN:
+	case FM10K_STATE_DISCONNECT:
+		/* flush out Tx data */
+		fm10k_sm_mbx_transmit(hw, mbx, head);
+
+		/* generate new header based on data */
+		if (mbx->tail_len || (mbx->state == FM10K_STATE_OPEN)) {
+			fm10k_sm_mbx_create_data_hdr(mbx);
+		} else {
+			mbx->remote = 0;
+			fm10k_sm_mbx_create_connect_hdr(mbx, 0);
+		}
+		break;
+	case FM10K_STATE_CONNECT:
+	case FM10K_STATE_CLOSED:
+		fm10k_sm_mbx_create_connect_hdr(mbx, 0);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ *  fm10k_sm_mbx_process_reset - Process header with version == 0 (RESET)
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function is meant to respond to a request where the version data
+ *  is set to 0.  As such we will either terminate the connection or go
+ *  into the connect state in order to re-establish the connection.  This
+ *  function can also be used to respond to an error as the connection
+ *  resetting would also be a means of dealing with errors.
+ **/
+static s32 fm10k_sm_mbx_process_reset(struct fm10k_hw *hw,
+				      struct fm10k_mbx_info *mbx)
+{
+	s32 err = 0;
+	const enum fm10k_mbx_state state = mbx->state;
+
+	switch (state) {
+	case FM10K_STATE_DISCONNECT:
+		/* drop remote connections and disconnect */
+		mbx->state = FM10K_STATE_CLOSED;
+		mbx->remote = 0;
+		mbx->local = 0;
+		break;
+	case FM10K_STATE_OPEN:
+		/* flush any incomplete work */
+		fm10k_sm_mbx_connect_reset(mbx);
+		err = FM10K_ERR_RESET_REQUESTED;
+		break;
+	case FM10K_STATE_CONNECT:
+		/* Update remote value to match local value */
+		mbx->remote = mbx->local;
+		break;
+	default:
+		break;
+	}
+
+	fm10k_sm_mbx_create_reply(hw, mbx, mbx->tail);
+
+	return err;
+}
+
+/**
+ *  fm10k_sm_mbx_process_version_1 - Process header with version == 1
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function is meant to process messages received when the remote
+ *  mailbox is active.
+ **/
+static s32 fm10k_sm_mbx_process_version_1(struct fm10k_hw *hw,
+					  struct fm10k_mbx_info *mbx)
+{
+	const u32 *hdr = &mbx->mbx_hdr;
+	u16 head, tail;
+	s32 len;
+
+	/* pull all fields needed for verification */
+	tail = FM10K_MSG_HDR_FIELD_GET(*hdr, SM_TAIL);
+	head = FM10K_MSG_HDR_FIELD_GET(*hdr, SM_HEAD);
+
+	/* if we are in connect and wanting version 1 then start up and go */
+	if (mbx->state == FM10K_STATE_CONNECT) {
+		if (!mbx->remote)
+			goto send_reply;
+		if (mbx->remote != 1)
+			return FM10K_MBX_ERR_SRC;
+
+		mbx->state = FM10K_STATE_OPEN;
+	}
+
+	do {
+		/* abort on message size errors */
+		len = fm10k_sm_mbx_receive(hw, mbx, tail);
+		if (len < 0)
+			return len;
+
+		/* continue until we have flushed the Rx FIFO */
+	} while (len);
+
+send_reply:
+	fm10k_sm_mbx_create_reply(hw, mbx, head);
+
+	return 0;
+}
+
+/**
+ *  fm10k_sm_mbx_process - Process switch manager mailbox interrupt
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *
+ *  This function will process incoming mailbox events and generate mailbox
+ *  replies.  It will return a value indicating the number of DWORDs
+ *  transmitted excluding header on success or a negative value on error.
+ **/
+static s32 fm10k_sm_mbx_process(struct fm10k_hw *hw,
+				struct fm10k_mbx_info *mbx)
+{
+	s32 err;
+
+	/* we do not read mailbox if closed */
+	if (mbx->state == FM10K_STATE_CLOSED)
+		return 0;
+
+	/* retrieve data from switch manager */
+	err = fm10k_mbx_read(hw, mbx);
+	if (err)
+		return err;
+
+	err = fm10k_sm_mbx_validate_fifo_hdr(mbx);
+	if (err < 0)
+		goto fifo_err;
+
+	if (FM10K_MSG_HDR_FIELD_GET(mbx->mbx_hdr, SM_ERR)) {
+		fm10k_sm_mbx_process_error(mbx);
+		goto fifo_err;
+	}
+
+	switch (FM10K_MSG_HDR_FIELD_GET(mbx->mbx_hdr, SM_VER)) {
+	case 0:
+		err = fm10k_sm_mbx_process_reset(hw, mbx);
+		break;
+	case FM10K_SM_MBX_VERSION:
+		err = fm10k_sm_mbx_process_version_1(hw, mbx);
+		break;
+	}
+
+fifo_err:
+	if (err < 0)
+		fm10k_sm_mbx_create_error_msg(mbx, err);
+
+	/* report data to switch manager */
+	fm10k_mbx_write(hw, mbx);
+
+	return err;
+}
+
+/**
+ *  fm10k_sm_mbx_init - Initialize mailbox memory for PF/SM mailbox
+ *  @hw: pointer to hardware structure
+ *  @mbx: pointer to mailbox
+ *  @msg_data: handlers for mailbox events
+ *
+ *  This function initializes the PF/SM mailbox for use.  It will split the
+ *  buffer provided and use that to populate both the Tx and Rx FIFO by
+ *  evenly splitting it.  In order to allow for easy masking of head/tail
+ *  the value reported in size must be a power of 2 and is reported in
+ *  DWORDs, not bytes.  Any invalid values will cause the mailbox to return
+ *  error.
+ **/
+s32 fm10k_sm_mbx_init(struct fm10k_hw __always_unused *hw,
+		      struct fm10k_mbx_info *mbx,
+		      const struct fm10k_msg_data *msg_data)
+{
+	mbx->mbx_reg = FM10K_GMBX;
+	mbx->mbmem_reg = FM10K_MBMEM_PF(0);
+
+	/* start out in closed state */
+	mbx->state = FM10K_STATE_CLOSED;
+
+	/* validate layout of handlers before assigning them */
+	if (fm10k_mbx_validate_handlers(msg_data))
+		return FM10K_ERR_PARAM;
+
+	/* initialize the message handlers */
+	mbx->msg_data = msg_data;
+
+	/* start mailbox as timed out and let the reset_hw call
+	 * set the timeout value to begin communications
+	 */
+	mbx->timeout = 0;
+	mbx->udelay = FM10K_MBX_INIT_DELAY;
+
+	/* Split buffer for use by Tx/Rx FIFOs */
+	mbx->max_size = FM10K_MBX_MSG_MAX_SIZE;
+	mbx->mbmem_len = FM10K_MBMEM_PF_XOR;
+
+	/* initialize the FIFOs, sizes are in 4 byte increments */
+	fm10k_fifo_init(&mbx->tx, mbx->buffer, FM10K_MBX_TX_BUFFER_SIZE);
+	fm10k_fifo_init(&mbx->rx, &mbx->buffer[FM10K_MBX_TX_BUFFER_SIZE],
+			FM10K_MBX_RX_BUFFER_SIZE);
+
+	/* initialize function pointers */
+	mbx->ops.connect = fm10k_sm_mbx_connect;
+	mbx->ops.disconnect = fm10k_sm_mbx_disconnect;
+	mbx->ops.rx_ready = fm10k_mbx_rx_ready;
+	mbx->ops.tx_ready = fm10k_mbx_tx_ready;
+	mbx->ops.tx_complete = fm10k_mbx_tx_complete;
+	mbx->ops.enqueue_tx = fm10k_mbx_enqueue_tx;
+	mbx->ops.process = fm10k_sm_mbx_process;
+	mbx->ops.register_handlers = fm10k_mbx_register_handlers;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
new file mode 100644
index 0000000000..56d1abff04
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.h
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _FM10K_MBX_H_
+#define _FM10K_MBX_H_
+
+/* forward declaration */
+struct fm10k_mbx_info;
+
+#include "fm10k_type.h"
+#include "fm10k_tlv.h"
+
+/* PF Mailbox Registers */
+#define FM10K_MBMEM(_n)		((_n) + 0x18000)
+#define FM10K_MBMEM_VF(_n, _m)	(((_n) * 0x10) + (_m) + 0x18000)
+#define FM10K_MBMEM_SM(_n)	((_n) + 0x18400)
+#define FM10K_MBMEM_PF(_n)	((_n) + 0x18600)
+/* XOR provides means of switching from Tx to Rx FIFO */
+#define FM10K_MBMEM_PF_XOR	(FM10K_MBMEM_SM(0) ^ FM10K_MBMEM_PF(0))
+#define FM10K_MBX(_n)		((_n) + 0x18800)
+#define FM10K_MBX_REQ				0x00000002
+#define FM10K_MBX_ACK				0x00000004
+#define FM10K_MBX_REQ_INTERRUPT			0x00000008
+#define FM10K_MBX_ACK_INTERRUPT			0x00000010
+#define FM10K_MBX_INTERRUPT_ENABLE		0x00000020
+#define FM10K_MBX_INTERRUPT_DISABLE		0x00000040
+#define FM10K_MBX_GLOBAL_REQ_INTERRUPT		0x00000200
+#define FM10K_MBX_GLOBAL_ACK_INTERRUPT		0x00000400
+#define FM10K_MBICR(_n)		((_n) + 0x18840)
+#define FM10K_GMBX		0x18842
+
+/* VF Mailbox Registers */
+#define FM10K_VFMBX		0x00010
+#define FM10K_VFMBMEM(_n)	((_n) + 0x00020)
+#define FM10K_VFMBMEM_LEN	16
+#define FM10K_VFMBMEM_VF_XOR	(FM10K_VFMBMEM_LEN / 2)
+
+/* Delays/timeouts */
+#define FM10K_MBX_DISCONNECT_TIMEOUT		500
+#define FM10K_MBX_POLL_DELAY			19
+#define FM10K_MBX_INT_DELAY			20
+
+/* PF/VF Mailbox state machine
+ *
+ * +----------+	    connect()	+----------+
+ * |  CLOSED  | --------------> |  CONNECT |
+ * +----------+			+----------+
+ *   ^				  ^	 |
+ *   | rcv:	      rcv:	  |	 | rcv:
+ *   |  Connect	       Disconnect |	 |  Connect
+ *   |  Disconnect     Error	  |	 |  Data
+ *   |				  |	 |
+ *   |				  |	 V
+ * +----------+   disconnect()	+----------+
+ * |DISCONNECT| <-------------- |   OPEN   |
+ * +----------+			+----------+
+ *
+ * The diagram above describes the PF/VF mailbox state machine.  There
+ * are four main states to this machine.
+ * Closed: This state represents a mailbox that is in a standby state
+ *	   with interrupts disabled.  In this state the mailbox should not
+ *	   read the mailbox or write any data.  The only means of exiting
+ *	   this state is for the system to make the connect() call for the
+ *	   mailbox, it will then transition to the connect state.
+ * Connect: In this state the mailbox is seeking a connection.  It will
+ *	    post a connect message with no specified destination and will
+ *	    wait for a reply from the other side of the mailbox.  This state
+ *	    is exited when either a connect with the local mailbox as the
+ *	    destination is received or when a data message is received with
+ *	    a valid sequence number.
+ * Open: In this state the mailbox is able to transfer data between the local
+ *       entity and the remote.  It will fall back to connect in the event of
+ *       receiving either an error message, or a disconnect message.  It will
+ *       transition to disconnect on a call to disconnect();
+ * Disconnect: In this state the mailbox is attempting to gracefully terminate
+ *	       the connection.  It will do so at the first point where it knows
+ *	       that the remote endpoint is either done sending, or when the
+ *	       remote endpoint has fallen back into connect.
+ */
+enum fm10k_mbx_state {
+	FM10K_STATE_CLOSED,
+	FM10K_STATE_CONNECT,
+	FM10K_STATE_OPEN,
+	FM10K_STATE_DISCONNECT,
+};
+
+/* PF/VF Mailbox header format
+ *    3			  2		      1			  0
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |        Size/Err_no/CRC        | Rsvd0 | Head  | Tail  | Type  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * The layout above describes the format for the header used in the PF/VF
+ * mailbox.  The header is broken out into the following fields:
+ * Type: There are 4 supported message types
+ *		0x8: Data header - used to transport message data
+ *		0xC: Connect header - used to establish connection
+ *		0xD: Disconnect header - used to tear down a connection
+ *		0xE: Error header - used to address message exceptions
+ * Tail: Tail index for local FIFO
+ *		Tail index actually consists of two parts.  The MSB of
+ *		the head is a loop tracker, it is 0 on an even numbered
+ *		loop through the FIFO, and 1 on the odd numbered loops.
+ *		To get the actual mailbox offset based on the tail it
+ *		is necessary to add bit 3 to bit 0 and clear bit 3.  This
+ *		gives us a valid range of 0x1 - 0xE.
+ * Head: Head index for remote FIFO
+ *		Head index follows the same format as the tail index.
+ * Rsvd0: Reserved 0 portion of the mailbox header
+ * CRC: Running CRC for all data since connect plus current message header
+ * Size: Maximum message size - Applies only to connect headers
+ *		The maximum message size is provided during connect to avoid
+ *		jamming the mailbox with messages that do not fit.
+ * Err_no: Error number - Applies only to error headers
+ *		The error number provides an indication of the type of error
+ *		experienced.
+ */
+
+/* macros for retrieving and setting header values */
+#define FM10K_MSG_HDR_MASK(name) \
+	((0x1u << FM10K_MSG_##name##_SIZE) - 1)
+#define FM10K_MSG_HDR_FIELD_SET(value, name) \
+	(((u32)(value) & FM10K_MSG_HDR_MASK(name)) << FM10K_MSG_##name##_SHIFT)
+#define FM10K_MSG_HDR_FIELD_GET(value, name) \
+	((u16)((value) >> FM10K_MSG_##name##_SHIFT) & FM10K_MSG_HDR_MASK(name))
+
+/* offsets shared between all headers */
+#define FM10K_MSG_TYPE_SHIFT			0
+#define FM10K_MSG_TYPE_SIZE			4
+#define FM10K_MSG_TAIL_SHIFT			4
+#define FM10K_MSG_TAIL_SIZE			4
+#define FM10K_MSG_HEAD_SHIFT			8
+#define FM10K_MSG_HEAD_SIZE			4
+#define FM10K_MSG_RSVD0_SHIFT			12
+#define FM10K_MSG_RSVD0_SIZE			4
+
+/* offsets for data/disconnect headers */
+#define FM10K_MSG_CRC_SHIFT			16
+#define FM10K_MSG_CRC_SIZE			16
+
+/* offsets for connect headers */
+#define FM10K_MSG_CONNECT_SIZE_SHIFT		16
+#define FM10K_MSG_CONNECT_SIZE_SIZE		16
+
+/* offsets for error headers */
+#define FM10K_MSG_ERR_NO_SHIFT			16
+#define FM10K_MSG_ERR_NO_SIZE			16
+
+enum fm10k_msg_type {
+	FM10K_MSG_DATA			= 0x8,
+	FM10K_MSG_CONNECT		= 0xC,
+	FM10K_MSG_DISCONNECT		= 0xD,
+	FM10K_MSG_ERROR			= 0xE,
+};
+
+/* HNI/SM Mailbox FIFO format
+ *    3                   2                   1                   0
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-------+-----------------------+-------+-----------------------+
+ * | Error |      Remote Head      |Version|      Local Tail       |
+ * +-------+-----------------------+-------+-----------------------+
+ * |                                                               |
+ * .                        Local FIFO Data                        .
+ * .                                                               .
+ * +-------+-----------------------+-------+-----------------------+
+ *
+ * The layout above describes the format for the FIFOs used by the host
+ * network interface and the switch manager to communicate messages back
+ * and forth.  Both the HNI and the switch maintain one such FIFO.  The
+ * layout in memory has the switch manager FIFO followed immediately by
+ * the HNI FIFO.  For this reason I am using just the pointer to the
+ * HNI FIFO in the mailbox ops as the offset between the two is fixed.
+ *
+ * The header for the FIFO is broken out into the following fields:
+ * Local Tail:  Offset into FIFO region for next DWORD to write.
+ * Version:  Version info for mailbox, only values of 0/1 are supported.
+ * Remote Head:  Offset into remote FIFO to indicate how much we have read.
+ * Error: Error indication, values TBD.
+ */
+
+/* version number for switch manager mailboxes */
+#define FM10K_SM_MBX_VERSION		1
+#define FM10K_SM_MBX_FIFO_LEN		(FM10K_MBMEM_PF_XOR - 1)
+
+/* offsets shared between all SM FIFO headers */
+#define FM10K_MSG_SM_TAIL_SHIFT			0
+#define FM10K_MSG_SM_TAIL_SIZE			12
+#define FM10K_MSG_SM_VER_SHIFT			12
+#define FM10K_MSG_SM_VER_SIZE			4
+#define FM10K_MSG_SM_HEAD_SHIFT			16
+#define FM10K_MSG_SM_HEAD_SIZE			12
+#define FM10K_MSG_SM_ERR_SHIFT			28
+#define FM10K_MSG_SM_ERR_SIZE			4
+
+/* All error messages returned by mailbox functions
+ * The value -511 is 0xFE01 in hex.  The idea is to order the errors
+ * from 0xFE01 - 0xFEFF so error codes are easily visible in the mailbox
+ * messages.  This also helps to avoid error number collisions as Linux
+ * doesn't appear to use error numbers 256 - 511.
+ */
+#define FM10K_MBX_ERR(_n) ((_n) - 512)
+#define FM10K_MBX_ERR_NO_MBX		FM10K_MBX_ERR(0x01)
+#define FM10K_MBX_ERR_NO_SPACE		FM10K_MBX_ERR(0x03)
+#define FM10K_MBX_ERR_TAIL		FM10K_MBX_ERR(0x05)
+#define FM10K_MBX_ERR_HEAD		FM10K_MBX_ERR(0x06)
+#define FM10K_MBX_ERR_SRC		FM10K_MBX_ERR(0x08)
+#define FM10K_MBX_ERR_TYPE		FM10K_MBX_ERR(0x09)
+#define FM10K_MBX_ERR_SIZE		FM10K_MBX_ERR(0x0B)
+#define FM10K_MBX_ERR_BUSY		FM10K_MBX_ERR(0x0C)
+#define FM10K_MBX_ERR_RSVD0		FM10K_MBX_ERR(0x0E)
+#define FM10K_MBX_ERR_CRC		FM10K_MBX_ERR(0x0F)
+
+#define FM10K_MBX_CRC_SEED		0xFFFF
+
+struct fm10k_mbx_ops {
+	s32 (*connect)(struct fm10k_hw *, struct fm10k_mbx_info *);
+	void (*disconnect)(struct fm10k_hw *, struct fm10k_mbx_info *);
+	bool (*rx_ready)(struct fm10k_mbx_info *);
+	bool (*tx_ready)(struct fm10k_mbx_info *, u16);
+	bool (*tx_complete)(struct fm10k_mbx_info *);
+	s32 (*enqueue_tx)(struct fm10k_hw *, struct fm10k_mbx_info *,
+			  const u32 *);
+	s32 (*process)(struct fm10k_hw *, struct fm10k_mbx_info *);
+	s32 (*register_handlers)(struct fm10k_mbx_info *,
+				 const struct fm10k_msg_data *);
+};
+
+struct fm10k_mbx_fifo {
+	u32 *buffer;
+	u16 head;
+	u16 tail;
+	u16 size;
+};
+
+/* size of buffer to be stored in mailbox for FIFOs */
+#define FM10K_MBX_TX_BUFFER_SIZE	512
+#define FM10K_MBX_RX_BUFFER_SIZE	128
+#define FM10K_MBX_BUFFER_SIZE \
+	(FM10K_MBX_TX_BUFFER_SIZE + FM10K_MBX_RX_BUFFER_SIZE)
+
+/* minimum and maximum message size in dwords */
+#define FM10K_MBX_MSG_MAX_SIZE \
+	((FM10K_MBX_TX_BUFFER_SIZE - 1) & (FM10K_MBX_RX_BUFFER_SIZE - 1))
+#define FM10K_VFMBX_MSG_MTU	((FM10K_VFMBMEM_LEN / 2) - 1)
+
+#define FM10K_MBX_INIT_TIMEOUT	2000 /* number of retries on mailbox */
+#define FM10K_MBX_INIT_DELAY	500  /* microseconds between retries */
+
+struct fm10k_mbx_info {
+	/* function pointers for mailbox operations */
+	struct fm10k_mbx_ops ops;
+	const struct fm10k_msg_data *msg_data;
+
+	/* message FIFOs */
+	struct fm10k_mbx_fifo rx;
+	struct fm10k_mbx_fifo tx;
+
+	/* delay for handling timeouts */
+	u32 timeout;
+	u32 udelay;
+
+	/* mailbox state info */
+	u32 mbx_reg, mbmem_reg, mbx_lock, mbx_hdr;
+	u16 max_size, mbmem_len;
+	u16 tail, tail_len, pulled;
+	u16 head, head_len, pushed;
+	u16 local, remote;
+	enum fm10k_mbx_state state;
+
+	/* result of last mailbox test */
+	s32 test_result;
+
+	/* statistics */
+	u64 tx_busy;
+	u64 tx_dropped;
+	u64 tx_messages;
+	u64 tx_dwords;
+	u64 tx_mbmem_pulled;
+	u64 rx_messages;
+	u64 rx_dwords;
+	u64 rx_mbmem_pushed;
+	u64 rx_parse_err;
+
+	/* Buffer to store messages */
+	u32 buffer[FM10K_MBX_BUFFER_SIZE];
+};
+
+s32 fm10k_pfvf_mbx_init(struct fm10k_hw *, struct fm10k_mbx_info *,
+			const struct fm10k_msg_data *, u8);
+s32 fm10k_sm_mbx_init(struct fm10k_hw *, struct fm10k_mbx_info *,
+		      const struct fm10k_msg_data *);
+
+#endif /* _FM10K_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
new file mode 100644
index 0000000000..34ab5ff982
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -0,0 +1,1610 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include "fm10k.h"
+#include <linux/vmalloc.h>
+#include <net/udp_tunnel.h>
+#include <linux/if_macvlan.h>
+
+/**
+ * fm10k_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring:    tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int fm10k_setup_tx_resources(struct fm10k_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int size;
+
+	size = sizeof(struct fm10k_tx_buffer) * tx_ring->count;
+
+	tx_ring->tx_buffer = vzalloc(size);
+	if (!tx_ring->tx_buffer)
+		goto err;
+
+	u64_stats_init(&tx_ring->syncp);
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(struct fm10k_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc)
+		goto err;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer);
+	tx_ring->tx_buffer = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * fm10k_setup_all_tx_resources - allocate all queues Tx resources
+ * @interface: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int fm10k_setup_all_tx_resources(struct fm10k_intfc *interface)
+{
+	int i, err;
+
+	for (i = 0; i < interface->num_tx_queues; i++) {
+		err = fm10k_setup_tx_resources(interface->tx_ring[i]);
+		if (!err)
+			continue;
+
+		netif_err(interface, probe, interface->netdev,
+			  "Allocation for Tx Queue %u failed\n", i);
+		goto err_setup_tx;
+	}
+
+	return 0;
+err_setup_tx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		fm10k_free_tx_resources(interface->tx_ring[i]);
+	return err;
+}
+
+/**
+ * fm10k_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int fm10k_setup_rx_resources(struct fm10k_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int size;
+
+	size = sizeof(struct fm10k_rx_buffer) * rx_ring->count;
+
+	rx_ring->rx_buffer = vzalloc(size);
+	if (!rx_ring->rx_buffer)
+		goto err;
+
+	u64_stats_init(&rx_ring->syncp);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union fm10k_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->desc)
+		goto err;
+
+	return 0;
+err:
+	vfree(rx_ring->rx_buffer);
+	rx_ring->rx_buffer = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * fm10k_setup_all_rx_resources - allocate all queues Rx resources
+ * @interface: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int fm10k_setup_all_rx_resources(struct fm10k_intfc *interface)
+{
+	int i, err;
+
+	for (i = 0; i < interface->num_rx_queues; i++) {
+		err = fm10k_setup_rx_resources(interface->rx_ring[i]);
+		if (!err)
+			continue;
+
+		netif_err(interface, probe, interface->netdev,
+			  "Allocation for Rx Queue %u failed\n", i);
+		goto err_setup_rx;
+	}
+
+	return 0;
+err_setup_rx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		fm10k_free_rx_resources(interface->rx_ring[i]);
+	return err;
+}
+
+void fm10k_unmap_and_free_tx_resource(struct fm10k_ring *ring,
+				      struct fm10k_tx_buffer *tx_buffer)
+{
+	if (tx_buffer->skb) {
+		dev_kfree_skb_any(tx_buffer->skb);
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_single(ring->dev,
+					 dma_unmap_addr(tx_buffer, dma),
+					 dma_unmap_len(tx_buffer, len),
+					 DMA_TO_DEVICE);
+	} else if (dma_unmap_len(tx_buffer, len)) {
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buffer, dma),
+			       dma_unmap_len(tx_buffer, len),
+			       DMA_TO_DEVICE);
+	}
+	tx_buffer->next_to_watch = NULL;
+	tx_buffer->skb = NULL;
+	dma_unmap_len_set(tx_buffer, len, 0);
+	/* tx_buffer must be completely set up in the transmit path */
+}
+
+/**
+ * fm10k_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void fm10k_clean_tx_ring(struct fm10k_ring *tx_ring)
+{
+	unsigned long size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!tx_ring->tx_buffer)
+		return;
+
+	/* Free all the Tx ring sk_buffs */
+	for (i = 0; i < tx_ring->count; i++) {
+		struct fm10k_tx_buffer *tx_buffer = &tx_ring->tx_buffer[i];
+
+		fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
+	}
+
+	/* reset BQL values */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	size = sizeof(struct fm10k_tx_buffer) * tx_ring->count;
+	memset(tx_ring->tx_buffer, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+}
+
+/**
+ * fm10k_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void fm10k_free_tx_resources(struct fm10k_ring *tx_ring)
+{
+	fm10k_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer);
+	tx_ring->tx_buffer = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+	tx_ring->desc = NULL;
+}
+
+/**
+ * fm10k_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @interface: board private structure
+ **/
+void fm10k_clean_all_tx_rings(struct fm10k_intfc *interface)
+{
+	int i;
+
+	for (i = 0; i < interface->num_tx_queues; i++)
+		fm10k_clean_tx_ring(interface->tx_ring[i]);
+}
+
+/**
+ * fm10k_free_all_tx_resources - Free Tx Resources for All Queues
+ * @interface: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void fm10k_free_all_tx_resources(struct fm10k_intfc *interface)
+{
+	int i = interface->num_tx_queues;
+
+	while (i--)
+		fm10k_free_tx_resources(interface->tx_ring[i]);
+}
+
+/**
+ * fm10k_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+static void fm10k_clean_rx_ring(struct fm10k_ring *rx_ring)
+{
+	unsigned long size;
+	u16 i;
+
+	if (!rx_ring->rx_buffer)
+		return;
+
+	dev_kfree_skb(rx_ring->skb);
+	rx_ring->skb = NULL;
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct fm10k_rx_buffer *buffer = &rx_ring->rx_buffer[i];
+		/* clean-up will only set page pointer to NULL */
+		if (!buffer->page)
+			continue;
+
+		dma_unmap_page(rx_ring->dev, buffer->dma,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+		__free_page(buffer->page);
+
+		buffer->page = NULL;
+	}
+
+	size = sizeof(struct fm10k_rx_buffer) * rx_ring->count;
+	memset(rx_ring->rx_buffer, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * fm10k_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void fm10k_free_rx_resources(struct fm10k_ring *rx_ring)
+{
+	fm10k_clean_rx_ring(rx_ring);
+
+	vfree(rx_ring->rx_buffer);
+	rx_ring->rx_buffer = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * fm10k_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @interface: board private structure
+ **/
+void fm10k_clean_all_rx_rings(struct fm10k_intfc *interface)
+{
+	int i;
+
+	for (i = 0; i < interface->num_rx_queues; i++)
+		fm10k_clean_rx_ring(interface->rx_ring[i]);
+}
+
+/**
+ * fm10k_free_all_rx_resources - Free Rx Resources for All Queues
+ * @interface: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void fm10k_free_all_rx_resources(struct fm10k_intfc *interface)
+{
+	int i = interface->num_rx_queues;
+
+	while (i--)
+		fm10k_free_rx_resources(interface->rx_ring[i]);
+}
+
+/**
+ * fm10k_request_glort_range - Request GLORTs for use in configuring rules
+ * @interface: board private structure
+ *
+ * This function allocates a range of glorts for this interface to use.
+ **/
+static void fm10k_request_glort_range(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	u16 mask = (~hw->mac.dglort_map) >> FM10K_DGLORTMAP_MASK_SHIFT;
+
+	/* establish GLORT base */
+	interface->glort = hw->mac.dglort_map & FM10K_DGLORTMAP_NONE;
+	interface->glort_count = 0;
+
+	/* nothing we can do until mask is allocated */
+	if (hw->mac.dglort_map == FM10K_DGLORTMAP_NONE)
+		return;
+
+	/* we support 3 possible GLORT configurations.
+	 * 1: VFs consume all but the last 1
+	 * 2: VFs and PF split glorts with possible gap between
+	 * 3: VFs allocated first 64, all others belong to PF
+	 */
+	if (mask <= hw->iov.total_vfs) {
+		interface->glort_count = 1;
+		interface->glort += mask;
+	} else if (mask < 64) {
+		interface->glort_count = (mask + 1) / 2;
+		interface->glort += interface->glort_count;
+	} else {
+		interface->glort_count = mask - 63;
+		interface->glort += 64;
+	}
+}
+
+/**
+ * fm10k_restore_udp_port_info
+ * @interface: board private structure
+ *
+ * This function restores the value in the tunnel_cfg register(s) after reset
+ **/
+static void fm10k_restore_udp_port_info(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+
+	/* only the PF supports configuring tunnels */
+	if (hw->mac.type != fm10k_mac_pf)
+		return;
+
+	/* restore tunnel configuration register */
+	fm10k_write_reg(hw, FM10K_TUNNEL_CFG,
+			ntohs(interface->vxlan_port) |
+			(ETH_P_TEB << FM10K_TUNNEL_CFG_NVGRE_SHIFT));
+
+	/* restore Geneve tunnel configuration register */
+	fm10k_write_reg(hw, FM10K_TUNNEL_CFG_GENEVE,
+			ntohs(interface->geneve_port));
+}
+
+/**
+ * fm10k_udp_tunnel_sync - Called when UDP tunnel ports change
+ * @dev: network interface device structure
+ * @table: Tunnel table (according to tables of @fm10k_udp_tunnels)
+ *
+ * This function is called when a new UDP tunnel port is added or deleted.
+ * Due to hardware restrictions, only one port per type can be offloaded at
+ * once. Core will send to the driver a port of its choice.
+ **/
+static int fm10k_udp_tunnel_sync(struct net_device *dev, unsigned int table)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct udp_tunnel_info ti;
+
+	udp_tunnel_nic_get_port(dev, table, 0, &ti);
+	if (!table)
+		interface->vxlan_port = ti.port;
+	else
+		interface->geneve_port = ti.port;
+
+	fm10k_restore_udp_port_info(interface);
+	return 0;
+}
+
+static const struct udp_tunnel_nic_info fm10k_udp_tunnels = {
+	.sync_table	= fm10k_udp_tunnel_sync,
+	.tables		= {
+		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN,  },
+		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
+	},
+};
+
+/**
+ * fm10k_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+int fm10k_open(struct net_device *netdev)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	int err;
+
+	/* allocate transmit descriptors */
+	err = fm10k_setup_all_tx_resources(interface);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = fm10k_setup_all_rx_resources(interface);
+	if (err)
+		goto err_setup_rx;
+
+	/* allocate interrupt resources */
+	err = fm10k_qv_request_irq(interface);
+	if (err)
+		goto err_req_irq;
+
+	/* setup GLORT assignment for this port */
+	fm10k_request_glort_range(interface);
+
+	/* Notify the stack of the actual queue counts */
+	err = netif_set_real_num_tx_queues(netdev,
+					   interface->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev,
+					   interface->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
+	fm10k_up(interface);
+
+	return 0;
+
+err_set_queues:
+	fm10k_qv_free_irq(interface);
+err_req_irq:
+	fm10k_free_all_rx_resources(interface);
+err_setup_rx:
+	fm10k_free_all_tx_resources(interface);
+err_setup_tx:
+	return err;
+}
+
+/**
+ * fm10k_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+int fm10k_close(struct net_device *netdev)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+
+	fm10k_down(interface);
+
+	fm10k_qv_free_irq(interface);
+
+	fm10k_free_all_tx_resources(interface);
+	fm10k_free_all_rx_resources(interface);
+
+	return 0;
+}
+
+static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	int num_tx_queues = READ_ONCE(interface->num_tx_queues);
+	unsigned int r_idx = skb->queue_mapping;
+	int err;
+
+	if (!num_tx_queues)
+		return NETDEV_TX_BUSY;
+
+	if ((skb->protocol == htons(ETH_P_8021Q)) &&
+	    !skb_vlan_tag_present(skb)) {
+		/* FM10K only supports hardware tagging, any tags in frame
+		 * are considered 2nd level or "outer" tags
+		 */
+		struct vlan_hdr *vhdr;
+		__be16 proto;
+
+		/* make sure skb is not shared */
+		skb = skb_share_check(skb, GFP_ATOMIC);
+		if (!skb)
+			return NETDEV_TX_OK;
+
+		/* make sure there is enough room to move the ethernet header */
+		if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+			return NETDEV_TX_OK;
+
+		/* verify the skb head is not shared */
+		err = skb_cow_head(skb, 0);
+		if (err) {
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+		}
+
+		/* locate VLAN header */
+		vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
+
+		/* pull the 2 key pieces of data out of it */
+		__vlan_hwaccel_put_tag(skb,
+				       htons(ETH_P_8021Q),
+				       ntohs(vhdr->h_vlan_TCI));
+		proto = vhdr->h_vlan_encapsulated_proto;
+		skb->protocol = (ntohs(proto) >= 1536) ? proto :
+							 htons(ETH_P_802_2);
+
+		/* squash it by moving the ethernet addresses up 4 bytes */
+		memmove(skb->data + VLAN_HLEN, skb->data, 12);
+		__skb_pull(skb, VLAN_HLEN);
+		skb_reset_mac_header(skb);
+	}
+
+	/* The minimum packet size for a single buffer is 17B so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (unlikely(skb->len < 17)) {
+		int pad_len = 17 - skb->len;
+
+		if (skb_pad(skb, pad_len))
+			return NETDEV_TX_OK;
+		__skb_put(skb, pad_len);
+	}
+
+	if (r_idx >= num_tx_queues)
+		r_idx %= num_tx_queues;
+
+	err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]);
+
+	return err;
+}
+
+/**
+ * fm10k_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: the index of the Tx queue that timed out
+ **/
+static void fm10k_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_ring *tx_ring;
+	bool real_tx_hang = false;
+
+	if (txqueue >= interface->num_tx_queues) {
+		WARN(1, "invalid Tx queue index %d", txqueue);
+		return;
+	}
+
+	tx_ring = interface->tx_ring[txqueue];
+	if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring))
+		real_tx_hang = true;
+
+#define TX_TIMEO_LIMIT 16000
+	if (real_tx_hang) {
+		fm10k_tx_timeout_reset(interface);
+	} else {
+		netif_info(interface, drv, netdev,
+			   "Fake Tx hang detected with timeout of %d seconds\n",
+			   netdev->watchdog_timeo / HZ);
+
+		/* fake Tx hang - increase the kernel timeout */
+		if (netdev->watchdog_timeo < TX_TIMEO_LIMIT)
+			netdev->watchdog_timeo *= 2;
+	}
+}
+
+/**
+ * fm10k_host_mbx_ready - Check PF interface's mailbox readiness
+ * @interface: board private structure
+ *
+ * This function checks if the PF interface's mailbox is ready before queueing
+ * mailbox messages for transmission. This will prevent filling the TX mailbox
+ * queue when the receiver is not ready. VF interfaces are exempt from this
+ * check since it will block all PF-VF mailbox messages from being sent from
+ * the VF to the PF at initialization.
+ **/
+static bool fm10k_host_mbx_ready(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+
+	return (hw->mac.type == fm10k_mac_vf || interface->host_ready);
+}
+
+/**
+ * fm10k_queue_vlan_request - Queue a VLAN update request
+ * @interface: the fm10k interface structure
+ * @vid: the VLAN vid
+ * @vsi: VSI index number
+ * @set: whether to set or clear
+ *
+ * This function queues up a VLAN update. For VFs, this must be sent to the
+ * managing PF over the mailbox. For PFs, we'll use the same handling so that
+ * it's similar to the VF. This avoids storming the PF<->VF mailbox with too
+ * many VLAN updates during reset.
+ */
+int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
+			     u32 vid, u8 vsi, bool set)
+{
+	struct fm10k_macvlan_request *request;
+	unsigned long flags;
+
+	/* This must be atomic since we may be called while the netdev
+	 * addr_list_lock is held
+	 */
+	request = kzalloc(sizeof(*request), GFP_ATOMIC);
+	if (!request)
+		return -ENOMEM;
+
+	request->type = FM10K_VLAN_REQUEST;
+	request->vlan.vid = vid;
+	request->vlan.vsi = vsi;
+	request->set = set;
+
+	spin_lock_irqsave(&interface->macvlan_lock, flags);
+	list_add_tail(&request->list, &interface->macvlan_requests);
+	spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+	fm10k_macvlan_schedule(interface);
+
+	return 0;
+}
+
+/**
+ * fm10k_queue_mac_request - Queue a MAC update request
+ * @interface: the fm10k interface structure
+ * @glort: the target glort for this update
+ * @addr: the address to update
+ * @vid: the vid to update
+ * @set: whether to add or remove
+ *
+ * This function queues up a MAC request for sending to the switch manager.
+ * A separate thread monitors the queue and sends updates to the switch
+ * manager. Return 0 on success, and negative error code on failure.
+ **/
+int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort,
+			    const unsigned char *addr, u16 vid, bool set)
+{
+	struct fm10k_macvlan_request *request;
+	unsigned long flags;
+
+	/* This must be atomic since we may be called while the netdev
+	 * addr_list_lock is held
+	 */
+	request = kzalloc(sizeof(*request), GFP_ATOMIC);
+	if (!request)
+		return -ENOMEM;
+
+	if (is_multicast_ether_addr(addr))
+		request->type = FM10K_MC_MAC_REQUEST;
+	else
+		request->type = FM10K_UC_MAC_REQUEST;
+
+	ether_addr_copy(request->mac.addr, addr);
+	request->mac.glort = glort;
+	request->mac.vid = vid;
+	request->set = set;
+
+	spin_lock_irqsave(&interface->macvlan_lock, flags);
+	list_add_tail(&request->list, &interface->macvlan_requests);
+	spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+	fm10k_macvlan_schedule(interface);
+
+	return 0;
+}
+
+/**
+ * fm10k_clear_macvlan_queue - Cancel pending updates for a given glort
+ * @interface: the fm10k interface structure
+ * @glort: the target glort to clear
+ * @vlans: true to clear VLAN messages, false to ignore them
+ *
+ * Cancel any outstanding MAC/VLAN requests for a given glort. This is
+ * expected to be called when a logical port goes down.
+ **/
+void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface,
+			       u16 glort, bool vlans)
+
+{
+	struct fm10k_macvlan_request *r, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&interface->macvlan_lock, flags);
+
+	/* Free any outstanding MAC/VLAN requests for this interface */
+	list_for_each_entry_safe(r, tmp, &interface->macvlan_requests, list) {
+		switch (r->type) {
+		case FM10K_MC_MAC_REQUEST:
+		case FM10K_UC_MAC_REQUEST:
+			/* Don't free requests for other interfaces */
+			if (r->mac.glort != glort)
+				break;
+			fallthrough;
+		case FM10K_VLAN_REQUEST:
+			if (vlans) {
+				list_del(&r->list);
+				kfree(r);
+			}
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+}
+
+static int fm10k_uc_vlan_unsync(struct net_device *netdev,
+				const unsigned char *uc_addr)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	u16 glort = interface->glort;
+	u16 vid = interface->vid;
+	bool set = !!(vid / VLAN_N_VID);
+	int err;
+
+	/* drop any leading bits on the VLAN ID */
+	vid &= VLAN_N_VID - 1;
+
+	err = fm10k_queue_mac_request(interface, glort, uc_addr, vid, set);
+	if (err)
+		return err;
+
+	/* return non-zero value as we are only doing a partial sync/unsync */
+	return 1;
+}
+
+static int fm10k_mc_vlan_unsync(struct net_device *netdev,
+				const unsigned char *mc_addr)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	u16 glort = interface->glort;
+	u16 vid = interface->vid;
+	bool set = !!(vid / VLAN_N_VID);
+	int err;
+
+	/* drop any leading bits on the VLAN ID */
+	vid &= VLAN_N_VID - 1;
+
+	err = fm10k_queue_mac_request(interface, glort, mc_addr, vid, set);
+	if (err)
+		return err;
+
+	/* return non-zero value as we are only doing a partial sync/unsync */
+	return 1;
+}
+
+static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_l2_accel *l2_accel = interface->l2_accel;
+	struct fm10k_hw *hw = &interface->hw;
+	u16 glort;
+	s32 err;
+	int i;
+
+	/* updates do not apply to VLAN 0 */
+	if (!vid)
+		return 0;
+
+	if (vid >= VLAN_N_VID)
+		return -EINVAL;
+
+	/* Verify that we have permission to add VLANs. If this is a request
+	 * to remove a VLAN, we still want to allow the user to remove the
+	 * VLAN device. In that case, we need to clear the bit in the
+	 * active_vlans bitmask.
+	 */
+	if (set && hw->mac.vlan_override)
+		return -EACCES;
+
+	/* update active_vlans bitmask */
+	set_bit(vid, interface->active_vlans);
+	if (!set)
+		clear_bit(vid, interface->active_vlans);
+
+	/* disable the default VLAN ID on ring if we have an active VLAN */
+	for (i = 0; i < interface->num_rx_queues; i++) {
+		struct fm10k_ring *rx_ring = interface->rx_ring[i];
+		u16 rx_vid = rx_ring->vid & (VLAN_N_VID - 1);
+
+		if (test_bit(rx_vid, interface->active_vlans))
+			rx_ring->vid |= FM10K_VLAN_CLEAR;
+		else
+			rx_ring->vid &= ~FM10K_VLAN_CLEAR;
+	}
+
+	/* If our VLAN has been overridden, there is no reason to send VLAN
+	 * removal requests as they will be silently ignored.
+	 */
+	if (hw->mac.vlan_override)
+		return 0;
+
+	/* Do not remove default VLAN ID related entries from VLAN and MAC
+	 * tables
+	 */
+	if (!set && vid == hw->mac.default_vid)
+		return 0;
+
+	/* Do not throw an error if the interface is down. We will sync once
+	 * we come up
+	 */
+	if (test_bit(__FM10K_DOWN, interface->state))
+		return 0;
+
+	fm10k_mbx_lock(interface);
+
+	/* only need to update the VLAN if not in promiscuous mode */
+	if (!(netdev->flags & IFF_PROMISC)) {
+		err = fm10k_queue_vlan_request(interface, vid, 0, set);
+		if (err)
+			goto err_out;
+	}
+
+	/* Update our base MAC address */
+	err = fm10k_queue_mac_request(interface, interface->glort,
+				      hw->mac.addr, vid, set);
+	if (err)
+		goto err_out;
+
+	/* Update L2 accelerated macvlan addresses */
+	if (l2_accel) {
+		for (i = 0; i < l2_accel->size; i++) {
+			struct net_device *sdev = l2_accel->macvlan[i];
+
+			if (!sdev)
+				continue;
+
+			glort = l2_accel->dglort + 1 + i;
+
+			fm10k_queue_mac_request(interface, glort,
+						sdev->dev_addr,
+						vid, set);
+		}
+	}
+
+	/* set VLAN ID prior to syncing/unsyncing the VLAN */
+	interface->vid = vid + (set ? VLAN_N_VID : 0);
+
+	/* Update the unicast and multicast address list to add/drop VLAN */
+	__dev_uc_unsync(netdev, fm10k_uc_vlan_unsync);
+	__dev_mc_unsync(netdev, fm10k_mc_vlan_unsync);
+
+err_out:
+	fm10k_mbx_unlock(interface);
+
+	return err;
+}
+
+static int fm10k_vlan_rx_add_vid(struct net_device *netdev,
+				 __always_unused __be16 proto, u16 vid)
+{
+	/* update VLAN and address table based on changes */
+	return fm10k_update_vid(netdev, vid, true);
+}
+
+static int fm10k_vlan_rx_kill_vid(struct net_device *netdev,
+				  __always_unused __be16 proto, u16 vid)
+{
+	/* update VLAN and address table based on changes */
+	return fm10k_update_vid(netdev, vid, false);
+}
+
+static u16 fm10k_find_next_vlan(struct fm10k_intfc *interface, u16 vid)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	u16 default_vid = hw->mac.default_vid;
+	u16 vid_limit = vid < default_vid ? default_vid : VLAN_N_VID;
+
+	vid = find_next_bit(interface->active_vlans, vid_limit, ++vid);
+
+	return vid;
+}
+
+static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface)
+{
+	u32 vid, prev_vid;
+
+	/* loop through and find any gaps in the table */
+	for (vid = 0, prev_vid = 0;
+	     prev_vid < VLAN_N_VID;
+	     prev_vid = vid + 1, vid = fm10k_find_next_vlan(interface, vid)) {
+		if (prev_vid == vid)
+			continue;
+
+		/* send request to clear multiple bits at a time */
+		prev_vid += (vid - prev_vid - 1) << FM10K_VLAN_LENGTH_SHIFT;
+		fm10k_queue_vlan_request(interface, prev_vid, 0, false);
+	}
+}
+
+static int __fm10k_uc_sync(struct net_device *dev,
+			   const unsigned char *addr, bool sync)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	u16 vid, glort = interface->glort;
+	s32 err;
+
+	if (!is_valid_ether_addr(addr))
+		return -EADDRNOTAVAIL;
+
+	for (vid = fm10k_find_next_vlan(interface, 0);
+	     vid < VLAN_N_VID;
+	     vid = fm10k_find_next_vlan(interface, vid)) {
+		err = fm10k_queue_mac_request(interface, glort,
+					      addr, vid, sync);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int fm10k_uc_sync(struct net_device *dev,
+			 const unsigned char *addr)
+{
+	return __fm10k_uc_sync(dev, addr, true);
+}
+
+static int fm10k_uc_unsync(struct net_device *dev,
+			   const unsigned char *addr)
+{
+	return __fm10k_uc_sync(dev, addr, false);
+}
+
+static int fm10k_set_mac(struct net_device *dev, void *p)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_hw *hw = &interface->hw;
+	struct sockaddr *addr = p;
+	s32 err = 0;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (dev->flags & IFF_UP) {
+		/* setting MAC address requires mailbox */
+		fm10k_mbx_lock(interface);
+
+		err = fm10k_uc_sync(dev, addr->sa_data);
+		if (!err)
+			fm10k_uc_unsync(dev, hw->mac.addr);
+
+		fm10k_mbx_unlock(interface);
+	}
+
+	if (!err) {
+		eth_hw_addr_set(dev, addr->sa_data);
+		ether_addr_copy(hw->mac.addr, addr->sa_data);
+		dev->addr_assign_type &= ~NET_ADDR_RANDOM;
+	}
+
+	/* if we had a mailbox error suggest trying again */
+	return err ? -EAGAIN : 0;
+}
+
+static int __fm10k_mc_sync(struct net_device *dev,
+			   const unsigned char *addr, bool sync)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	u16 vid, glort = interface->glort;
+	s32 err;
+
+	if (!is_multicast_ether_addr(addr))
+		return -EADDRNOTAVAIL;
+
+	for (vid = fm10k_find_next_vlan(interface, 0);
+	     vid < VLAN_N_VID;
+	     vid = fm10k_find_next_vlan(interface, vid)) {
+		err = fm10k_queue_mac_request(interface, glort,
+					      addr, vid, sync);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int fm10k_mc_sync(struct net_device *dev,
+			 const unsigned char *addr)
+{
+	return __fm10k_mc_sync(dev, addr, true);
+}
+
+static int fm10k_mc_unsync(struct net_device *dev,
+			   const unsigned char *addr)
+{
+	return __fm10k_mc_sync(dev, addr, false);
+}
+
+static void fm10k_set_rx_mode(struct net_device *dev)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_hw *hw = &interface->hw;
+	int xcast_mode;
+
+	/* no need to update the harwdare if we are not running */
+	if (!(dev->flags & IFF_UP))
+		return;
+
+	/* determine new mode based on flags */
+	xcast_mode = (dev->flags & IFF_PROMISC) ? FM10K_XCAST_MODE_PROMISC :
+		     (dev->flags & IFF_ALLMULTI) ? FM10K_XCAST_MODE_ALLMULTI :
+		     (dev->flags & (IFF_BROADCAST | IFF_MULTICAST)) ?
+		     FM10K_XCAST_MODE_MULTI : FM10K_XCAST_MODE_NONE;
+
+	fm10k_mbx_lock(interface);
+
+	/* update xcast mode first, but only if it changed */
+	if (interface->xcast_mode != xcast_mode) {
+		/* update VLAN table when entering promiscuous mode */
+		if (xcast_mode == FM10K_XCAST_MODE_PROMISC)
+			fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL,
+						 0, true);
+
+		/* clear VLAN table when exiting promiscuous mode */
+		if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC)
+			fm10k_clear_unused_vlans(interface);
+
+		/* update xcast mode if host's mailbox is ready */
+		if (fm10k_host_mbx_ready(interface))
+			hw->mac.ops.update_xcast_mode(hw, interface->glort,
+						      xcast_mode);
+
+		/* record updated xcast mode state */
+		interface->xcast_mode = xcast_mode;
+	}
+
+	/* synchronize all of the addresses */
+	__dev_uc_sync(dev, fm10k_uc_sync, fm10k_uc_unsync);
+	__dev_mc_sync(dev, fm10k_mc_sync, fm10k_mc_unsync);
+
+	fm10k_mbx_unlock(interface);
+}
+
+void fm10k_restore_rx_state(struct fm10k_intfc *interface)
+{
+	struct fm10k_l2_accel *l2_accel = interface->l2_accel;
+	struct net_device *netdev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int xcast_mode, i;
+	u16 vid, glort;
+
+	/* record glort for this interface */
+	glort = interface->glort;
+
+	/* convert interface flags to xcast mode */
+	if (netdev->flags & IFF_PROMISC)
+		xcast_mode = FM10K_XCAST_MODE_PROMISC;
+	else if (netdev->flags & IFF_ALLMULTI)
+		xcast_mode = FM10K_XCAST_MODE_ALLMULTI;
+	else if (netdev->flags & (IFF_BROADCAST | IFF_MULTICAST))
+		xcast_mode = FM10K_XCAST_MODE_MULTI;
+	else
+		xcast_mode = FM10K_XCAST_MODE_NONE;
+
+	fm10k_mbx_lock(interface);
+
+	/* Enable logical port if host's mailbox is ready */
+	if (fm10k_host_mbx_ready(interface))
+		hw->mac.ops.update_lport_state(hw, glort,
+					       interface->glort_count, true);
+
+	/* update VLAN table */
+	fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0,
+				 xcast_mode == FM10K_XCAST_MODE_PROMISC);
+
+	/* update table with current entries */
+	for (vid = fm10k_find_next_vlan(interface, 0);
+	     vid < VLAN_N_VID;
+	     vid = fm10k_find_next_vlan(interface, vid)) {
+		fm10k_queue_vlan_request(interface, vid, 0, true);
+
+		fm10k_queue_mac_request(interface, glort,
+					hw->mac.addr, vid, true);
+
+		/* synchronize macvlan addresses */
+		if (l2_accel) {
+			for (i = 0; i < l2_accel->size; i++) {
+				struct net_device *sdev = l2_accel->macvlan[i];
+
+				if (!sdev)
+					continue;
+
+				glort = l2_accel->dglort + 1 + i;
+
+				fm10k_queue_mac_request(interface, glort,
+							sdev->dev_addr,
+							vid, true);
+			}
+		}
+	}
+
+	/* update xcast mode before synchronizing addresses if host's mailbox
+	 * is ready
+	 */
+	if (fm10k_host_mbx_ready(interface))
+		hw->mac.ops.update_xcast_mode(hw, glort, xcast_mode);
+
+	/* synchronize all of the addresses */
+	__dev_uc_sync(netdev, fm10k_uc_sync, fm10k_uc_unsync);
+	__dev_mc_sync(netdev, fm10k_mc_sync, fm10k_mc_unsync);
+
+	/* synchronize macvlan addresses */
+	if (l2_accel) {
+		for (i = 0; i < l2_accel->size; i++) {
+			struct net_device *sdev = l2_accel->macvlan[i];
+
+			if (!sdev)
+				continue;
+
+			glort = l2_accel->dglort + 1 + i;
+
+			hw->mac.ops.update_xcast_mode(hw, glort,
+						      FM10K_XCAST_MODE_NONE);
+			fm10k_queue_mac_request(interface, glort,
+						sdev->dev_addr,
+						hw->mac.default_vid, true);
+		}
+	}
+
+	fm10k_mbx_unlock(interface);
+
+	/* record updated xcast mode state */
+	interface->xcast_mode = xcast_mode;
+
+	/* Restore tunnel configuration */
+	fm10k_restore_udp_port_info(interface);
+}
+
+void fm10k_reset_rx_state(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+
+	/* Wait for MAC/VLAN work to finish */
+	while (test_bit(__FM10K_MACVLAN_SCHED, interface->state))
+		usleep_range(1000, 2000);
+
+	/* Cancel pending MAC/VLAN requests */
+	fm10k_clear_macvlan_queue(interface, interface->glort, true);
+
+	fm10k_mbx_lock(interface);
+
+	/* clear the logical port state on lower device if host's mailbox is
+	 * ready
+	 */
+	if (fm10k_host_mbx_ready(interface))
+		hw->mac.ops.update_lport_state(hw, interface->glort,
+					       interface->glort_count, false);
+
+	fm10k_mbx_unlock(interface);
+
+	/* reset flags to default state */
+	interface->xcast_mode = FM10K_XCAST_MODE_NONE;
+
+	/* clear the sync flag since the lport has been dropped */
+	__dev_uc_unsync(netdev, NULL);
+	__dev_mc_unsync(netdev, NULL);
+}
+
+/**
+ * fm10k_get_stats64 - Get System Network Statistics
+ * @netdev: network interface device structure
+ * @stats: storage space for 64bit statistics
+ *
+ * Obtain 64bit statistics in a way that is safe for both 32bit and 64bit
+ * architectures.
+ */
+static void fm10k_get_stats64(struct net_device *netdev,
+			      struct rtnl_link_stats64 *stats)
+{
+	struct fm10k_intfc *interface = netdev_priv(netdev);
+	struct fm10k_ring *ring;
+	unsigned int start, i;
+	u64 bytes, packets;
+
+	rcu_read_lock();
+
+	for (i = 0; i < interface->num_rx_queues; i++) {
+		ring = READ_ONCE(interface->rx_ring[i]);
+
+		if (!ring)
+			continue;
+
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			packets = ring->stats.packets;
+			bytes   = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+
+		stats->rx_packets += packets;
+		stats->rx_bytes   += bytes;
+	}
+
+	for (i = 0; i < interface->num_tx_queues; i++) {
+		ring = READ_ONCE(interface->tx_ring[i]);
+
+		if (!ring)
+			continue;
+
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			packets = ring->stats.packets;
+			bytes   = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+
+		stats->tx_packets += packets;
+		stats->tx_bytes   += bytes;
+	}
+
+	rcu_read_unlock();
+
+	/* following stats updated by fm10k_service_task() */
+	stats->rx_missed_errors	= netdev->stats.rx_missed_errors;
+}
+
+int fm10k_setup_tc(struct net_device *dev, u8 tc)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	int err;
+
+	/* Currently only the PF supports priority classes */
+	if (tc && (interface->hw.mac.type != fm10k_mac_pf))
+		return -EINVAL;
+
+	/* Hardware supports up to 8 traffic classes */
+	if (tc > 8)
+		return -EINVAL;
+
+	/* Hardware has to reinitialize queues to match packet
+	 * buffer alignment. Unfortunately, the hardware is not
+	 * flexible enough to do this dynamically.
+	 */
+	if (netif_running(dev))
+		fm10k_close(dev);
+
+	fm10k_mbx_free_irq(interface);
+
+	fm10k_clear_queueing_scheme(interface);
+
+	/* we expect the prio_tc map to be repopulated later */
+	netdev_reset_tc(dev);
+	netdev_set_num_tc(dev, tc);
+
+	err = fm10k_init_queueing_scheme(interface);
+	if (err)
+		goto err_queueing_scheme;
+
+	err = fm10k_mbx_request_irq(interface);
+	if (err)
+		goto err_mbx_irq;
+
+	err = netif_running(dev) ? fm10k_open(dev) : 0;
+	if (err)
+		goto err_open;
+
+	/* flag to indicate SWPRI has yet to be updated */
+	set_bit(FM10K_FLAG_SWPRI_CONFIG, interface->flags);
+
+	return 0;
+err_open:
+	fm10k_mbx_free_irq(interface);
+err_mbx_irq:
+	fm10k_clear_queueing_scheme(interface);
+err_queueing_scheme:
+	netif_device_detach(dev);
+
+	return err;
+}
+
+static int __fm10k_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			    void *type_data)
+{
+	struct tc_mqprio_qopt *mqprio = type_data;
+
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EOPNOTSUPP;
+
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+
+	return fm10k_setup_tc(dev, mqprio->num_tc);
+}
+
+static void fm10k_assign_l2_accel(struct fm10k_intfc *interface,
+				  struct fm10k_l2_accel *l2_accel)
+{
+	int i;
+
+	for (i = 0; i < interface->num_rx_queues; i++) {
+		struct fm10k_ring *ring = interface->rx_ring[i];
+
+		rcu_assign_pointer(ring->l2_accel, l2_accel);
+	}
+
+	interface->l2_accel = l2_accel;
+}
+
+static void *fm10k_dfwd_add_station(struct net_device *dev,
+				    struct net_device *sdev)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_l2_accel *l2_accel = interface->l2_accel;
+	struct fm10k_l2_accel *old_l2_accel = NULL;
+	struct fm10k_dglort_cfg dglort = { 0 };
+	struct fm10k_hw *hw = &interface->hw;
+	int size, i;
+	u16 vid, glort;
+
+	/* The hardware supported by fm10k only filters on the destination MAC
+	 * address. In order to avoid issues we only support offloading modes
+	 * where the hardware can actually provide the functionality.
+	 */
+	if (!macvlan_supports_dest_filter(sdev))
+		return ERR_PTR(-EMEDIUMTYPE);
+
+	/* allocate l2 accel structure if it is not available */
+	if (!l2_accel) {
+		/* verify there is enough free GLORTs to support l2_accel */
+		if (interface->glort_count < 7)
+			return ERR_PTR(-EBUSY);
+
+		size = offsetof(struct fm10k_l2_accel, macvlan[7]);
+		l2_accel = kzalloc(size, GFP_KERNEL);
+		if (!l2_accel)
+			return ERR_PTR(-ENOMEM);
+
+		l2_accel->size = 7;
+		l2_accel->dglort = interface->glort;
+
+		/* update pointers */
+		fm10k_assign_l2_accel(interface, l2_accel);
+	/* do not expand if we are at our limit */
+	} else if ((l2_accel->count == FM10K_MAX_STATIONS) ||
+		   (l2_accel->count == (interface->glort_count - 1))) {
+		return ERR_PTR(-EBUSY);
+	/* expand if we have hit the size limit */
+	} else if (l2_accel->count == l2_accel->size) {
+		old_l2_accel = l2_accel;
+		size = offsetof(struct fm10k_l2_accel,
+				macvlan[(l2_accel->size * 2) + 1]);
+		l2_accel = kzalloc(size, GFP_KERNEL);
+		if (!l2_accel)
+			return ERR_PTR(-ENOMEM);
+
+		memcpy(l2_accel, old_l2_accel,
+		       offsetof(struct fm10k_l2_accel,
+				macvlan[old_l2_accel->size]));
+
+		l2_accel->size = (old_l2_accel->size * 2) + 1;
+
+		/* update pointers */
+		fm10k_assign_l2_accel(interface, l2_accel);
+		kfree_rcu(old_l2_accel, rcu);
+	}
+
+	/* add macvlan to accel table, and record GLORT for position */
+	for (i = 0; i < l2_accel->size; i++) {
+		if (!l2_accel->macvlan[i])
+			break;
+	}
+
+	/* record station */
+	l2_accel->macvlan[i] = sdev;
+	l2_accel->count++;
+
+	/* configure default DGLORT mapping for RSS/DCB */
+	dglort.idx = fm10k_dglort_pf_rss;
+	dglort.inner_rss = 1;
+	dglort.rss_l = fls(interface->ring_feature[RING_F_RSS].mask);
+	dglort.pc_l = fls(interface->ring_feature[RING_F_QOS].mask);
+	dglort.glort = interface->glort;
+	dglort.shared_l = fls(l2_accel->size);
+	hw->mac.ops.configure_dglort_map(hw, &dglort);
+
+	/* Add rules for this specific dglort to the switch */
+	fm10k_mbx_lock(interface);
+
+	glort = l2_accel->dglort + 1 + i;
+
+	if (fm10k_host_mbx_ready(interface))
+		hw->mac.ops.update_xcast_mode(hw, glort,
+					      FM10K_XCAST_MODE_NONE);
+
+	fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+				hw->mac.default_vid, true);
+
+	for (vid = fm10k_find_next_vlan(interface, 0);
+	     vid < VLAN_N_VID;
+	     vid = fm10k_find_next_vlan(interface, vid))
+		fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+					vid, true);
+
+	fm10k_mbx_unlock(interface);
+
+	return sdev;
+}
+
+static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
+{
+	struct fm10k_intfc *interface = netdev_priv(dev);
+	struct fm10k_l2_accel *l2_accel = READ_ONCE(interface->l2_accel);
+	struct fm10k_dglort_cfg dglort = { 0 };
+	struct fm10k_hw *hw = &interface->hw;
+	struct net_device *sdev = priv;
+	u16 vid, glort;
+	int i;
+
+	if (!l2_accel)
+		return;
+
+	/* search table for matching interface */
+	for (i = 0; i < l2_accel->size; i++) {
+		if (l2_accel->macvlan[i] == sdev)
+			break;
+	}
+
+	/* exit if macvlan not found */
+	if (i == l2_accel->size)
+		return;
+
+	/* Remove any rules specific to this dglort */
+	fm10k_mbx_lock(interface);
+
+	glort = l2_accel->dglort + 1 + i;
+
+	if (fm10k_host_mbx_ready(interface))
+		hw->mac.ops.update_xcast_mode(hw, glort,
+					      FM10K_XCAST_MODE_NONE);
+
+	fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+				hw->mac.default_vid, false);
+
+	for (vid = fm10k_find_next_vlan(interface, 0);
+	     vid < VLAN_N_VID;
+	     vid = fm10k_find_next_vlan(interface, vid))
+		fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+					vid, false);
+
+	fm10k_mbx_unlock(interface);
+
+	/* record removal */
+	l2_accel->macvlan[i] = NULL;
+	l2_accel->count--;
+
+	/* configure default DGLORT mapping for RSS/DCB */
+	dglort.idx = fm10k_dglort_pf_rss;
+	dglort.inner_rss = 1;
+	dglort.rss_l = fls(interface->ring_feature[RING_F_RSS].mask);
+	dglort.pc_l = fls(interface->ring_feature[RING_F_QOS].mask);
+	dglort.glort = interface->glort;
+	dglort.shared_l = fls(l2_accel->size);
+	hw->mac.ops.configure_dglort_map(hw, &dglort);
+
+	/* If table is empty remove it */
+	if (l2_accel->count == 0) {
+		fm10k_assign_l2_accel(interface, NULL);
+		kfree_rcu(l2_accel, rcu);
+	}
+}
+
+static netdev_features_t fm10k_features_check(struct sk_buff *skb,
+					      struct net_device *dev,
+					      netdev_features_t features)
+{
+	if (!skb->encapsulation || fm10k_tx_encap_offload(skb))
+		return features;
+
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static const struct net_device_ops fm10k_netdev_ops = {
+	.ndo_open		= fm10k_open,
+	.ndo_stop		= fm10k_close,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_start_xmit		= fm10k_xmit_frame,
+	.ndo_set_mac_address	= fm10k_set_mac,
+	.ndo_tx_timeout		= fm10k_tx_timeout,
+	.ndo_vlan_rx_add_vid	= fm10k_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= fm10k_vlan_rx_kill_vid,
+	.ndo_set_rx_mode	= fm10k_set_rx_mode,
+	.ndo_get_stats64	= fm10k_get_stats64,
+	.ndo_setup_tc		= __fm10k_setup_tc,
+	.ndo_set_vf_mac		= fm10k_ndo_set_vf_mac,
+	.ndo_set_vf_vlan	= fm10k_ndo_set_vf_vlan,
+	.ndo_set_vf_rate	= fm10k_ndo_set_vf_bw,
+	.ndo_get_vf_config	= fm10k_ndo_get_vf_config,
+	.ndo_get_vf_stats	= fm10k_ndo_get_vf_stats,
+	.ndo_dfwd_add_station	= fm10k_dfwd_add_station,
+	.ndo_dfwd_del_station	= fm10k_dfwd_del_station,
+	.ndo_features_check	= fm10k_features_check,
+};
+
+#define DEFAULT_DEBUG_LEVEL_SHIFT 3
+
+struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info)
+{
+	netdev_features_t hw_features;
+	struct fm10k_intfc *interface;
+	struct net_device *dev;
+
+	dev = alloc_etherdev_mq(sizeof(struct fm10k_intfc), MAX_QUEUES);
+	if (!dev)
+		return NULL;
+
+	/* set net device and ethtool ops */
+	dev->netdev_ops = &fm10k_netdev_ops;
+	fm10k_set_ethtool_ops(dev);
+
+	/* configure default debug level */
+	interface = netdev_priv(dev);
+	interface->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+
+	/* configure default features */
+	dev->features |= NETIF_F_IP_CSUM |
+			 NETIF_F_IPV6_CSUM |
+			 NETIF_F_SG |
+			 NETIF_F_TSO |
+			 NETIF_F_TSO6 |
+			 NETIF_F_TSO_ECN |
+			 NETIF_F_RXHASH |
+			 NETIF_F_RXCSUM;
+
+	/* Only the PF can support VXLAN and NVGRE tunnel offloads */
+	if (info->mac == fm10k_mac_pf) {
+		dev->hw_enc_features = NETIF_F_IP_CSUM |
+				       NETIF_F_TSO |
+				       NETIF_F_TSO6 |
+				       NETIF_F_TSO_ECN |
+				       NETIF_F_GSO_UDP_TUNNEL |
+				       NETIF_F_IPV6_CSUM |
+				       NETIF_F_SG;
+
+		dev->features |= NETIF_F_GSO_UDP_TUNNEL;
+
+		dev->udp_tunnel_nic_info = &fm10k_udp_tunnels;
+	}
+
+	/* all features defined to this point should be changeable */
+	hw_features = dev->features;
+
+	/* allow user to enable L2 forwarding acceleration */
+	hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
+
+	/* configure VLAN features */
+	dev->vlan_features |= dev->features;
+
+	/* we want to leave these both on as we cannot disable VLAN tag
+	 * insertion or stripping on the hardware since it is contained
+	 * in the FTAG and not in the frame itself.
+	 */
+	dev->features |= NETIF_F_HW_VLAN_CTAG_TX |
+			 NETIF_F_HW_VLAN_CTAG_RX |
+			 NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	dev->priv_flags |= IFF_UNICAST_FLT;
+
+	dev->hw_features |= hw_features;
+
+	/* MTU range: 68 - 15342 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = FM10K_MAX_JUMBO_FRAME_SIZE;
+
+	return dev;
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
new file mode 100644
index 0000000000..d748b98274
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -0,0 +1,2537 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+#include "fm10k.h"
+
+static const struct fm10k_info *fm10k_info_tbl[] = {
+	[fm10k_device_pf] = &fm10k_pf_info,
+	[fm10k_device_vf] = &fm10k_vf_info,
+};
+
+/*
+ * fm10k_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id fm10k_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_PF), fm10k_device_pf },
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2), fm10k_device_pf },
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_DA2), fm10k_device_pf },
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_VF), fm10k_device_vf },
+	/* required last entry */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, fm10k_pci_tbl);
+
+u16 fm10k_read_pci_cfg_word(struct fm10k_hw *hw, u32 reg)
+{
+	struct fm10k_intfc *interface = hw->back;
+	u16 value = 0;
+
+	if (FM10K_REMOVED(hw->hw_addr))
+		return ~value;
+
+	pci_read_config_word(interface->pdev, reg, &value);
+	if (value == 0xFFFF)
+		fm10k_write_flush(hw);
+
+	return value;
+}
+
+u32 fm10k_read_reg(struct fm10k_hw *hw, int reg)
+{
+	u32 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+	u32 value = 0;
+
+	if (FM10K_REMOVED(hw_addr))
+		return ~value;
+
+	value = readl(&hw_addr[reg]);
+	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
+		struct fm10k_intfc *interface = hw->back;
+		struct net_device *netdev = interface->netdev;
+
+		hw->hw_addr = NULL;
+		netif_device_detach(netdev);
+		netdev_err(netdev, "PCIe link lost, device now detached\n");
+	}
+
+	return value;
+}
+
+static int fm10k_hw_ready(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+
+	fm10k_write_flush(hw);
+
+	return FM10K_REMOVED(hw->hw_addr) ? -ENODEV : 0;
+}
+
+/**
+ * fm10k_macvlan_schedule - Schedule MAC/VLAN queue task
+ * @interface: fm10k private interface structure
+ *
+ * Schedule the MAC/VLAN queue monitor task. If the MAC/VLAN task cannot be
+ * started immediately, request that it be restarted when possible.
+ */
+void fm10k_macvlan_schedule(struct fm10k_intfc *interface)
+{
+	/* Avoid processing the MAC/VLAN queue when the service task is
+	 * disabled, or when we're resetting the device.
+	 */
+	if (!test_bit(__FM10K_MACVLAN_DISABLE, interface->state) &&
+	    !test_and_set_bit(__FM10K_MACVLAN_SCHED, interface->state)) {
+		clear_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+		/* We delay the actual start of execution in order to allow
+		 * multiple MAC/VLAN updates to accumulate before handling
+		 * them, and to allow some time to let the mailbox drain
+		 * between runs.
+		 */
+		queue_delayed_work(fm10k_workqueue,
+				   &interface->macvlan_task, 10);
+	} else {
+		set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+	}
+}
+
+/**
+ * fm10k_stop_macvlan_task - Stop the MAC/VLAN queue monitor
+ * @interface: fm10k private interface structure
+ *
+ * Wait until the MAC/VLAN queue task has stopped, and cancel any future
+ * requests.
+ */
+static void fm10k_stop_macvlan_task(struct fm10k_intfc *interface)
+{
+	/* Disable the MAC/VLAN work item */
+	set_bit(__FM10K_MACVLAN_DISABLE, interface->state);
+
+	/* Make sure we waited until any current invocations have stopped */
+	cancel_delayed_work_sync(&interface->macvlan_task);
+
+	/* We set the __FM10K_MACVLAN_SCHED bit when we schedule the task.
+	 * However, it may not be unset of the MAC/VLAN task never actually
+	 * got a chance to run. Since we've canceled the task here, and it
+	 * cannot be rescheuled right now, we need to ensure the scheduled bit
+	 * gets unset.
+	 */
+	clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
+}
+
+/**
+ * fm10k_resume_macvlan_task - Restart the MAC/VLAN queue monitor
+ * @interface: fm10k private interface structure
+ *
+ * Clear the __FM10K_MACVLAN_DISABLE bit and, if a request occurred, schedule
+ * the MAC/VLAN work monitor.
+ */
+static void fm10k_resume_macvlan_task(struct fm10k_intfc *interface)
+{
+	/* Re-enable the MAC/VLAN work item */
+	clear_bit(__FM10K_MACVLAN_DISABLE, interface->state);
+
+	/* We might have received a MAC/VLAN request while disabled. If so,
+	 * kick off the queue now.
+	 */
+	if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
+		fm10k_macvlan_schedule(interface);
+}
+
+void fm10k_service_event_schedule(struct fm10k_intfc *interface)
+{
+	if (!test_bit(__FM10K_SERVICE_DISABLE, interface->state) &&
+	    !test_and_set_bit(__FM10K_SERVICE_SCHED, interface->state)) {
+		clear_bit(__FM10K_SERVICE_REQUEST, interface->state);
+		queue_work(fm10k_workqueue, &interface->service_task);
+	} else {
+		set_bit(__FM10K_SERVICE_REQUEST, interface->state);
+	}
+}
+
+static void fm10k_service_event_complete(struct fm10k_intfc *interface)
+{
+	WARN_ON(!test_bit(__FM10K_SERVICE_SCHED, interface->state));
+
+	/* flush memory to make sure state is correct before next watchog */
+	smp_mb__before_atomic();
+	clear_bit(__FM10K_SERVICE_SCHED, interface->state);
+
+	/* If a service event was requested since we started, immediately
+	 * re-schedule now. This ensures we don't drop a request until the
+	 * next timer event.
+	 */
+	if (test_bit(__FM10K_SERVICE_REQUEST, interface->state))
+		fm10k_service_event_schedule(interface);
+}
+
+static void fm10k_stop_service_event(struct fm10k_intfc *interface)
+{
+	set_bit(__FM10K_SERVICE_DISABLE, interface->state);
+	cancel_work_sync(&interface->service_task);
+
+	/* It's possible that cancel_work_sync stopped the service task from
+	 * running before it could actually start. In this case the
+	 * __FM10K_SERVICE_SCHED bit will never be cleared. Since we know that
+	 * the service task cannot be running at this point, we need to clear
+	 * the scheduled bit, as otherwise the service task may never be
+	 * restarted.
+	 */
+	clear_bit(__FM10K_SERVICE_SCHED, interface->state);
+}
+
+static void fm10k_start_service_event(struct fm10k_intfc *interface)
+{
+	clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
+	fm10k_service_event_schedule(interface);
+}
+
+/**
+ * fm10k_service_timer - Timer Call-back
+ * @t: pointer to timer data
+ **/
+static void fm10k_service_timer(struct timer_list *t)
+{
+	struct fm10k_intfc *interface = from_timer(interface, t,
+						   service_timer);
+
+	/* Reset the timer */
+	mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
+
+	fm10k_service_event_schedule(interface);
+}
+
+/**
+ * fm10k_prepare_for_reset - Prepare the driver and device for a pending reset
+ * @interface: fm10k private data structure
+ *
+ * This function prepares for a device reset by shutting as much down as we
+ * can. It does nothing and returns false if __FM10K_RESETTING was already set
+ * prior to calling this function. It returns true if it actually did work.
+ */
+static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+
+	/* put off any impending NetWatchDogTimeout */
+	netif_trans_update(netdev);
+
+	/* Nothing to do if a reset is already in progress */
+	if (test_and_set_bit(__FM10K_RESETTING, interface->state))
+		return false;
+
+	/* As the MAC/VLAN task will be accessing registers it must not be
+	 * running while we reset. Although the task will not be scheduled
+	 * once we start resetting it may already be running
+	 */
+	fm10k_stop_macvlan_task(interface);
+
+	rtnl_lock();
+
+	fm10k_iov_suspend(interface->pdev);
+
+	if (netif_running(netdev))
+		fm10k_close(netdev);
+
+	fm10k_mbx_free_irq(interface);
+
+	/* free interrupts */
+	fm10k_clear_queueing_scheme(interface);
+
+	/* delay any future reset requests */
+	interface->last_reset = jiffies + (10 * HZ);
+
+	rtnl_unlock();
+
+	return true;
+}
+
+static int fm10k_handle_reset(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	WARN_ON(!test_bit(__FM10K_RESETTING, interface->state));
+
+	rtnl_lock();
+
+	pci_set_master(interface->pdev);
+
+	/* reset and initialize the hardware so it is in a known state */
+	err = hw->mac.ops.reset_hw(hw);
+	if (err) {
+		dev_err(&interface->pdev->dev, "reset_hw failed: %d\n", err);
+		goto reinit_err;
+	}
+
+	err = hw->mac.ops.init_hw(hw);
+	if (err) {
+		dev_err(&interface->pdev->dev, "init_hw failed: %d\n", err);
+		goto reinit_err;
+	}
+
+	err = fm10k_init_queueing_scheme(interface);
+	if (err) {
+		dev_err(&interface->pdev->dev,
+			"init_queueing_scheme failed: %d\n", err);
+		goto reinit_err;
+	}
+
+	/* re-associate interrupts */
+	err = fm10k_mbx_request_irq(interface);
+	if (err)
+		goto err_mbx_irq;
+
+	err = fm10k_hw_ready(interface);
+	if (err)
+		goto err_open;
+
+	/* update hardware address for VFs if perm_addr has changed */
+	if (hw->mac.type == fm10k_mac_vf) {
+		if (is_valid_ether_addr(hw->mac.perm_addr)) {
+			ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
+			ether_addr_copy(netdev->perm_addr, hw->mac.perm_addr);
+			eth_hw_addr_set(netdev, hw->mac.perm_addr);
+			netdev->addr_assign_type &= ~NET_ADDR_RANDOM;
+		}
+
+		if (hw->mac.vlan_override)
+			netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+		else
+			netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+	}
+
+	err = netif_running(netdev) ? fm10k_open(netdev) : 0;
+	if (err)
+		goto err_open;
+
+	fm10k_iov_resume(interface->pdev);
+
+	rtnl_unlock();
+
+	fm10k_resume_macvlan_task(interface);
+
+	clear_bit(__FM10K_RESETTING, interface->state);
+
+	return err;
+err_open:
+	fm10k_mbx_free_irq(interface);
+err_mbx_irq:
+	fm10k_clear_queueing_scheme(interface);
+reinit_err:
+	netif_device_detach(netdev);
+
+	rtnl_unlock();
+
+	clear_bit(__FM10K_RESETTING, interface->state);
+
+	return err;
+}
+
+static void fm10k_detach_subtask(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+	u32 __iomem *hw_addr;
+	u32 value;
+
+	/* do nothing if netdev is still present or hw_addr is set */
+	if (netif_device_present(netdev) || interface->hw.hw_addr)
+		return;
+
+	/* We've lost the PCIe register space, and can no longer access the
+	 * device. Shut everything except the detach subtask down and prepare
+	 * to reset the device in case we recover. If we actually prepare for
+	 * reset, indicate that we're detached.
+	 */
+	if (fm10k_prepare_for_reset(interface))
+		set_bit(__FM10K_RESET_DETACHED, interface->state);
+
+	/* check the real address space to see if we've recovered */
+	hw_addr = READ_ONCE(interface->uc_addr);
+	value = readl(hw_addr);
+	if (~value) {
+		int err;
+
+		/* Make sure the reset was initiated because we detached,
+		 * otherwise we might race with a different reset flow.
+		 */
+		if (!test_and_clear_bit(__FM10K_RESET_DETACHED,
+					interface->state))
+			return;
+
+		/* Restore the hardware address */
+		interface->hw.hw_addr = interface->uc_addr;
+
+		/* PCIe link has been restored, and the device is active
+		 * again. Restore everything and reset the device.
+		 */
+		err = fm10k_handle_reset(interface);
+		if (err) {
+			netdev_err(netdev, "Unable to reset device: %d\n", err);
+			interface->hw.hw_addr = NULL;
+			return;
+		}
+
+		/* Re-attach the netdev */
+		netif_device_attach(netdev);
+		netdev_warn(netdev, "PCIe link restored, device now attached\n");
+		return;
+	}
+}
+
+static void fm10k_reset_subtask(struct fm10k_intfc *interface)
+{
+	int err;
+
+	if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED,
+				interface->flags))
+		return;
+
+	/* If another thread has already prepared to reset the device, we
+	 * should not attempt to handle a reset here, since we'd race with
+	 * that thread. This may happen if we suspend the device or if the
+	 * PCIe link is lost. In this case, we'll just ignore the RESET
+	 * request, as it will (eventually) be taken care of when the thread
+	 * which actually started the reset is finished.
+	 */
+	if (!fm10k_prepare_for_reset(interface))
+		return;
+
+	netdev_err(interface->netdev, "Reset interface\n");
+
+	err = fm10k_handle_reset(interface);
+	if (err)
+		dev_err(&interface->pdev->dev,
+			"fm10k_handle_reset failed: %d\n", err);
+}
+
+/**
+ * fm10k_configure_swpri_map - Configure Receive SWPRI to PC mapping
+ * @interface: board private structure
+ *
+ * Configure the SWPRI to PC mapping for the port.
+ **/
+static void fm10k_configure_swpri_map(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int i;
+
+	/* clear flag indicating update is needed */
+	clear_bit(FM10K_FLAG_SWPRI_CONFIG, interface->flags);
+
+	/* these registers are only available on the PF */
+	if (hw->mac.type != fm10k_mac_pf)
+		return;
+
+	/* configure SWPRI to PC map */
+	for (i = 0; i < FM10K_SWPRI_MAX; i++)
+		fm10k_write_reg(hw, FM10K_SWPRI_MAP(i),
+				netdev_get_prio_tc_map(netdev, i));
+}
+
+/**
+ * fm10k_watchdog_update_host_state - Update the link status based on host.
+ * @interface: board private structure
+ **/
+static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	s32 err;
+
+	if (test_bit(__FM10K_LINK_DOWN, interface->state)) {
+		interface->host_ready = false;
+		if (time_is_after_jiffies(interface->link_down_event))
+			return;
+		clear_bit(__FM10K_LINK_DOWN, interface->state);
+	}
+
+	if (test_bit(FM10K_FLAG_SWPRI_CONFIG, interface->flags)) {
+		if (rtnl_trylock()) {
+			fm10k_configure_swpri_map(interface);
+			rtnl_unlock();
+		}
+	}
+
+	/* lock the mailbox for transmit and receive */
+	fm10k_mbx_lock(interface);
+
+	err = hw->mac.ops.get_host_state(hw, &interface->host_ready);
+	if (err && time_is_before_jiffies(interface->last_reset))
+		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+
+	/* free the lock */
+	fm10k_mbx_unlock(interface);
+}
+
+/**
+ * fm10k_mbx_subtask - Process upstream and downstream mailboxes
+ * @interface: board private structure
+ *
+ * This function will process both the upstream and downstream mailboxes.
+ **/
+static void fm10k_mbx_subtask(struct fm10k_intfc *interface)
+{
+	/* If we're resetting, bail out */
+	if (test_bit(__FM10K_RESETTING, interface->state))
+		return;
+
+	/* process upstream mailbox and update device state */
+	fm10k_watchdog_update_host_state(interface);
+
+	/* process downstream mailboxes */
+	fm10k_iov_mbx(interface);
+}
+
+/**
+ * fm10k_watchdog_host_is_ready - Update netdev status based on host ready
+ * @interface: board private structure
+ **/
+static void fm10k_watchdog_host_is_ready(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+
+	/* only continue if link state is currently down */
+	if (netif_carrier_ok(netdev))
+		return;
+
+	netif_info(interface, drv, netdev, "NIC Link is up\n");
+
+	netif_carrier_on(netdev);
+	netif_tx_wake_all_queues(netdev);
+}
+
+/**
+ * fm10k_watchdog_host_not_ready - Update netdev status based on host not ready
+ * @interface: board private structure
+ **/
+static void fm10k_watchdog_host_not_ready(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+
+	/* only continue if link state is currently up */
+	if (!netif_carrier_ok(netdev))
+		return;
+
+	netif_info(interface, drv, netdev, "NIC Link is down\n");
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+}
+
+/**
+ * fm10k_update_stats - Update the board statistics counters.
+ * @interface: board private structure
+ **/
+void fm10k_update_stats(struct fm10k_intfc *interface)
+{
+	struct net_device_stats *net_stats = &interface->netdev->stats;
+	struct fm10k_hw *hw = &interface->hw;
+	u64 hw_csum_tx_good = 0, hw_csum_rx_good = 0, rx_length_errors = 0;
+	u64 rx_switch_errors = 0, rx_drops = 0, rx_pp_errors = 0;
+	u64 rx_link_errors = 0;
+	u64 rx_errors = 0, rx_csum_errors = 0, tx_csum_errors = 0;
+	u64 restart_queue = 0, tx_busy = 0, alloc_failed = 0;
+	u64 rx_bytes_nic = 0, rx_pkts_nic = 0, rx_drops_nic = 0;
+	u64 tx_bytes_nic = 0, tx_pkts_nic = 0;
+	u64 bytes, pkts;
+	int i;
+
+	/* ensure only one thread updates stats at a time */
+	if (test_and_set_bit(__FM10K_UPDATING_STATS, interface->state))
+		return;
+
+	/* do not allow stats update via service task for next second */
+	interface->next_stats_update = jiffies + HZ;
+
+	/* gather some stats to the interface struct that are per queue */
+	for (bytes = 0, pkts = 0, i = 0; i < interface->num_tx_queues; i++) {
+		struct fm10k_ring *tx_ring = READ_ONCE(interface->tx_ring[i]);
+
+		if (!tx_ring)
+			continue;
+
+		restart_queue += tx_ring->tx_stats.restart_queue;
+		tx_busy += tx_ring->tx_stats.tx_busy;
+		tx_csum_errors += tx_ring->tx_stats.csum_err;
+		bytes += tx_ring->stats.bytes;
+		pkts += tx_ring->stats.packets;
+		hw_csum_tx_good += tx_ring->tx_stats.csum_good;
+	}
+
+	interface->restart_queue = restart_queue;
+	interface->tx_busy = tx_busy;
+	net_stats->tx_bytes = bytes;
+	net_stats->tx_packets = pkts;
+	interface->tx_csum_errors = tx_csum_errors;
+	interface->hw_csum_tx_good = hw_csum_tx_good;
+
+	/* gather some stats to the interface struct that are per queue */
+	for (bytes = 0, pkts = 0, i = 0; i < interface->num_rx_queues; i++) {
+		struct fm10k_ring *rx_ring = READ_ONCE(interface->rx_ring[i]);
+
+		if (!rx_ring)
+			continue;
+
+		bytes += rx_ring->stats.bytes;
+		pkts += rx_ring->stats.packets;
+		alloc_failed += rx_ring->rx_stats.alloc_failed;
+		rx_csum_errors += rx_ring->rx_stats.csum_err;
+		rx_errors += rx_ring->rx_stats.errors;
+		hw_csum_rx_good += rx_ring->rx_stats.csum_good;
+		rx_switch_errors += rx_ring->rx_stats.switch_errors;
+		rx_drops += rx_ring->rx_stats.drops;
+		rx_pp_errors += rx_ring->rx_stats.pp_errors;
+		rx_link_errors += rx_ring->rx_stats.link_errors;
+		rx_length_errors += rx_ring->rx_stats.length_errors;
+	}
+
+	net_stats->rx_bytes = bytes;
+	net_stats->rx_packets = pkts;
+	interface->alloc_failed = alloc_failed;
+	interface->rx_csum_errors = rx_csum_errors;
+	interface->hw_csum_rx_good = hw_csum_rx_good;
+	interface->rx_switch_errors = rx_switch_errors;
+	interface->rx_drops = rx_drops;
+	interface->rx_pp_errors = rx_pp_errors;
+	interface->rx_link_errors = rx_link_errors;
+	interface->rx_length_errors = rx_length_errors;
+
+	hw->mac.ops.update_hw_stats(hw, &interface->stats);
+
+	for (i = 0; i < hw->mac.max_queues; i++) {
+		struct fm10k_hw_stats_q *q = &interface->stats.q[i];
+
+		tx_bytes_nic += q->tx_bytes.count;
+		tx_pkts_nic += q->tx_packets.count;
+		rx_bytes_nic += q->rx_bytes.count;
+		rx_pkts_nic += q->rx_packets.count;
+		rx_drops_nic += q->rx_drops.count;
+	}
+
+	interface->tx_bytes_nic = tx_bytes_nic;
+	interface->tx_packets_nic = tx_pkts_nic;
+	interface->rx_bytes_nic = rx_bytes_nic;
+	interface->rx_packets_nic = rx_pkts_nic;
+	interface->rx_drops_nic = rx_drops_nic;
+
+	/* Fill out the OS statistics structure */
+	net_stats->rx_errors = rx_errors;
+	net_stats->rx_dropped = interface->stats.nodesc_drop.count;
+
+	/* Update VF statistics */
+	fm10k_iov_update_stats(interface);
+
+	clear_bit(__FM10K_UPDATING_STATS, interface->state);
+}
+
+/**
+ * fm10k_watchdog_flush_tx - flush queues on host not ready
+ * @interface: pointer to the device interface structure
+ **/
+static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface)
+{
+	int some_tx_pending = 0;
+	int i;
+
+	/* nothing to do if carrier is up */
+	if (netif_carrier_ok(interface->netdev))
+		return;
+
+	for (i = 0; i < interface->num_tx_queues; i++) {
+		struct fm10k_ring *tx_ring = interface->tx_ring[i];
+
+		if (tx_ring->next_to_use != tx_ring->next_to_clean) {
+			some_tx_pending = 1;
+			break;
+		}
+	}
+
+	/* We've lost link, so the controller stops DMA, but we've got
+	 * queued Tx work that's never going to get done, so reset
+	 * controller to flush Tx.
+	 */
+	if (some_tx_pending)
+		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+}
+
+/**
+ * fm10k_watchdog_subtask - check and bring link up
+ * @interface: pointer to the device interface structure
+ **/
+static void fm10k_watchdog_subtask(struct fm10k_intfc *interface)
+{
+	/* if interface is down do nothing */
+	if (test_bit(__FM10K_DOWN, interface->state) ||
+	    test_bit(__FM10K_RESETTING, interface->state))
+		return;
+
+	if (interface->host_ready)
+		fm10k_watchdog_host_is_ready(interface);
+	else
+		fm10k_watchdog_host_not_ready(interface);
+
+	/* update stats only once every second */
+	if (time_is_before_jiffies(interface->next_stats_update))
+		fm10k_update_stats(interface);
+
+	/* flush any uncompleted work */
+	fm10k_watchdog_flush_tx(interface);
+}
+
+/**
+ * fm10k_check_hang_subtask - check for hung queues and dropped interrupts
+ * @interface: pointer to the device interface structure
+ *
+ * This function serves two purposes.  First it strobes the interrupt lines
+ * in order to make certain interrupts are occurring.  Secondly it sets the
+ * bits needed to check for TX hangs.  As a result we should immediately
+ * determine if a hang has occurred.
+ */
+static void fm10k_check_hang_subtask(struct fm10k_intfc *interface)
+{
+	/* If we're down or resetting, just bail */
+	if (test_bit(__FM10K_DOWN, interface->state) ||
+	    test_bit(__FM10K_RESETTING, interface->state))
+		return;
+
+	/* rate limit tx hang checks to only once every 2 seconds */
+	if (time_is_after_eq_jiffies(interface->next_tx_hang_check))
+		return;
+	interface->next_tx_hang_check = jiffies + (2 * HZ);
+
+	if (netif_carrier_ok(interface->netdev)) {
+		int i;
+
+		/* Force detection of hung controller */
+		for (i = 0; i < interface->num_tx_queues; i++)
+			set_check_for_tx_hang(interface->tx_ring[i]);
+
+		/* Rearm all in-use q_vectors for immediate firing */
+		for (i = 0; i < interface->num_q_vectors; i++) {
+			struct fm10k_q_vector *qv = interface->q_vector[i];
+
+			if (!qv->tx.count && !qv->rx.count)
+				continue;
+			writel(FM10K_ITR_ENABLE | FM10K_ITR_PENDING2, qv->itr);
+		}
+	}
+}
+
+/**
+ * fm10k_service_task - manages and runs subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void fm10k_service_task(struct work_struct *work)
+{
+	struct fm10k_intfc *interface;
+
+	interface = container_of(work, struct fm10k_intfc, service_task);
+
+	/* Check whether we're detached first */
+	fm10k_detach_subtask(interface);
+
+	/* tasks run even when interface is down */
+	fm10k_mbx_subtask(interface);
+	fm10k_reset_subtask(interface);
+
+	/* tasks only run when interface is up */
+	fm10k_watchdog_subtask(interface);
+	fm10k_check_hang_subtask(interface);
+
+	/* release lock on service events to allow scheduling next event */
+	fm10k_service_event_complete(interface);
+}
+
+/**
+ * fm10k_macvlan_task - send queued MAC/VLAN requests to switch manager
+ * @work: pointer to work_struct containing our data
+ *
+ * This work item handles sending MAC/VLAN updates to the switch manager. When
+ * the interface is up, it will attempt to queue mailbox messages to the
+ * switch manager requesting updates for MAC/VLAN pairs. If the Tx fifo of the
+ * mailbox is full, it will reschedule itself to try again in a short while.
+ * This ensures that the driver does not overload the switch mailbox with too
+ * many simultaneous requests, causing an unnecessary reset.
+ **/
+static void fm10k_macvlan_task(struct work_struct *work)
+{
+	struct fm10k_macvlan_request *item;
+	struct fm10k_intfc *interface;
+	struct delayed_work *dwork;
+	struct list_head *requests;
+	struct fm10k_hw *hw;
+	unsigned long flags;
+
+	dwork = to_delayed_work(work);
+	interface = container_of(dwork, struct fm10k_intfc, macvlan_task);
+	hw = &interface->hw;
+	requests = &interface->macvlan_requests;
+
+	do {
+		/* Pop the first item off the list */
+		spin_lock_irqsave(&interface->macvlan_lock, flags);
+		item = list_first_entry_or_null(requests,
+						struct fm10k_macvlan_request,
+						list);
+		if (item)
+			list_del_init(&item->list);
+
+		spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+		/* We have no more items to process */
+		if (!item)
+			goto done;
+
+		fm10k_mbx_lock(interface);
+
+		/* Check that we have plenty of space to send the message. We
+		 * want to ensure that the mailbox stays low enough to avoid a
+		 * change in the host state, otherwise we may see spurious
+		 * link up / link down notifications.
+		 */
+		if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU + 5)) {
+			hw->mbx.ops.process(hw, &hw->mbx);
+			set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+			fm10k_mbx_unlock(interface);
+
+			/* Put the request back on the list */
+			spin_lock_irqsave(&interface->macvlan_lock, flags);
+			list_add(&item->list, requests);
+			spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+			break;
+		}
+
+		switch (item->type) {
+		case FM10K_MC_MAC_REQUEST:
+			hw->mac.ops.update_mc_addr(hw,
+						   item->mac.glort,
+						   item->mac.addr,
+						   item->mac.vid,
+						   item->set);
+			break;
+		case FM10K_UC_MAC_REQUEST:
+			hw->mac.ops.update_uc_addr(hw,
+						   item->mac.glort,
+						   item->mac.addr,
+						   item->mac.vid,
+						   item->set,
+						   0);
+			break;
+		case FM10K_VLAN_REQUEST:
+			hw->mac.ops.update_vlan(hw,
+						item->vlan.vid,
+						item->vlan.vsi,
+						item->set);
+			break;
+		default:
+			break;
+		}
+
+		fm10k_mbx_unlock(interface);
+
+		/* Free the item now that we've sent the update */
+		kfree(item);
+	} while (true);
+
+done:
+	WARN_ON(!test_bit(__FM10K_MACVLAN_SCHED, interface->state));
+
+	/* flush memory to make sure state is correct */
+	smp_mb__before_atomic();
+	clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
+
+	/* If a MAC/VLAN request was scheduled since we started, we should
+	 * re-schedule. However, there is no reason to re-schedule if there is
+	 * no work to do.
+	 */
+	if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
+		fm10k_macvlan_schedule(interface);
+}
+
+/**
+ * fm10k_configure_tx_ring - Configure Tx ring after Reset
+ * @interface: board private structure
+ * @ring: structure containing ring specific data
+ *
+ * Configure the Tx descriptor ring after a reset.
+ **/
+static void fm10k_configure_tx_ring(struct fm10k_intfc *interface,
+				    struct fm10k_ring *ring)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	u64 tdba = ring->dma;
+	u32 size = ring->count * sizeof(struct fm10k_tx_desc);
+	u32 txint = FM10K_INT_MAP_DISABLE;
+	u32 txdctl = BIT(FM10K_TXDCTL_MAX_TIME_SHIFT) | FM10K_TXDCTL_ENABLE;
+	u8 reg_idx = ring->reg_idx;
+
+	/* disable queue to avoid issues while updating state */
+	fm10k_write_reg(hw, FM10K_TXDCTL(reg_idx), 0);
+	fm10k_write_flush(hw);
+
+	/* possible poll here to verify ring resources have been cleaned */
+
+	/* set location and size for descriptor ring */
+	fm10k_write_reg(hw, FM10K_TDBAL(reg_idx), tdba & DMA_BIT_MASK(32));
+	fm10k_write_reg(hw, FM10K_TDBAH(reg_idx), tdba >> 32);
+	fm10k_write_reg(hw, FM10K_TDLEN(reg_idx), size);
+
+	/* reset head and tail pointers */
+	fm10k_write_reg(hw, FM10K_TDH(reg_idx), 0);
+	fm10k_write_reg(hw, FM10K_TDT(reg_idx), 0);
+
+	/* store tail pointer */
+	ring->tail = &interface->uc_addr[FM10K_TDT(reg_idx)];
+
+	/* reset ntu and ntc to place SW in sync with hardware */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+
+	/* Map interrupt */
+	if (ring->q_vector) {
+		txint = ring->q_vector->v_idx + NON_Q_VECTORS;
+		txint |= FM10K_INT_MAP_TIMER0;
+	}
+
+	fm10k_write_reg(hw, FM10K_TXINT(reg_idx), txint);
+
+	/* enable use of FTAG bit in Tx descriptor, register is RO for VF */
+	fm10k_write_reg(hw, FM10K_PFVTCTL(reg_idx),
+			FM10K_PFVTCTL_FTAG_DESC_ENABLE);
+
+	/* Initialize XPS */
+	if (!test_and_set_bit(__FM10K_TX_XPS_INIT_DONE, ring->state) &&
+	    ring->q_vector)
+		netif_set_xps_queue(ring->netdev,
+				    &ring->q_vector->affinity_mask,
+				    ring->queue_index);
+
+	/* enable queue */
+	fm10k_write_reg(hw, FM10K_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ * fm10k_enable_tx_ring - Verify Tx ring is enabled after configuration
+ * @interface: board private structure
+ * @ring: structure containing ring specific data
+ *
+ * Verify the Tx descriptor ring is ready for transmit.
+ **/
+static void fm10k_enable_tx_ring(struct fm10k_intfc *interface,
+				 struct fm10k_ring *ring)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	int wait_loop = 10;
+	u32 txdctl;
+	u8 reg_idx = ring->reg_idx;
+
+	/* if we are already enabled just exit */
+	if (fm10k_read_reg(hw, FM10K_TXDCTL(reg_idx)) & FM10K_TXDCTL_ENABLE)
+		return;
+
+	/* poll to verify queue is enabled */
+	do {
+		usleep_range(1000, 2000);
+		txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(reg_idx));
+	} while (!(txdctl & FM10K_TXDCTL_ENABLE) && --wait_loop);
+	if (!wait_loop)
+		netif_err(interface, drv, interface->netdev,
+			  "Could not enable Tx Queue %d\n", reg_idx);
+}
+
+/**
+ * fm10k_configure_tx - Configure Transmit Unit after Reset
+ * @interface: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void fm10k_configure_tx(struct fm10k_intfc *interface)
+{
+	int i;
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+	for (i = 0; i < interface->num_tx_queues; i++)
+		fm10k_configure_tx_ring(interface, interface->tx_ring[i]);
+
+	/* poll here to verify that Tx rings are now enabled */
+	for (i = 0; i < interface->num_tx_queues; i++)
+		fm10k_enable_tx_ring(interface, interface->tx_ring[i]);
+}
+
+/**
+ * fm10k_configure_rx_ring - Configure Rx ring after Reset
+ * @interface: board private structure
+ * @ring: structure containing ring specific data
+ *
+ * Configure the Rx descriptor ring after a reset.
+ **/
+static void fm10k_configure_rx_ring(struct fm10k_intfc *interface,
+				    struct fm10k_ring *ring)
+{
+	u64 rdba = ring->dma;
+	struct fm10k_hw *hw = &interface->hw;
+	u32 size = ring->count * sizeof(union fm10k_rx_desc);
+	u32 rxqctl, rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
+	u32 srrctl = FM10K_SRRCTL_BUFFER_CHAINING_EN;
+	u32 rxint = FM10K_INT_MAP_DISABLE;
+	u8 rx_pause = interface->rx_pause;
+	u8 reg_idx = ring->reg_idx;
+
+	/* disable queue to avoid issues while updating state */
+	rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
+	rxqctl &= ~FM10K_RXQCTL_ENABLE;
+	fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
+	fm10k_write_flush(hw);
+
+	/* possible poll here to verify ring resources have been cleaned */
+
+	/* set location and size for descriptor ring */
+	fm10k_write_reg(hw, FM10K_RDBAL(reg_idx), rdba & DMA_BIT_MASK(32));
+	fm10k_write_reg(hw, FM10K_RDBAH(reg_idx), rdba >> 32);
+	fm10k_write_reg(hw, FM10K_RDLEN(reg_idx), size);
+
+	/* reset head and tail pointers */
+	fm10k_write_reg(hw, FM10K_RDH(reg_idx), 0);
+	fm10k_write_reg(hw, FM10K_RDT(reg_idx), 0);
+
+	/* store tail pointer */
+	ring->tail = &interface->uc_addr[FM10K_RDT(reg_idx)];
+
+	/* reset ntu and ntc to place SW in sync with hardware */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+	ring->next_to_alloc = 0;
+
+	/* Configure the Rx buffer size for one buff without split */
+	srrctl |= FM10K_RX_BUFSZ >> FM10K_SRRCTL_BSIZEPKT_SHIFT;
+
+	/* Configure the Rx ring to suppress loopback packets */
+	srrctl |= FM10K_SRRCTL_LOOPBACK_SUPPRESS;
+	fm10k_write_reg(hw, FM10K_SRRCTL(reg_idx), srrctl);
+
+	/* Enable drop on empty */
+#ifdef CONFIG_DCB
+	if (interface->pfc_en)
+		rx_pause = interface->pfc_en;
+#endif
+	if (!(rx_pause & BIT(ring->qos_pc)))
+		rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
+
+	fm10k_write_reg(hw, FM10K_RXDCTL(reg_idx), rxdctl);
+
+	/* assign default VLAN to queue */
+	ring->vid = hw->mac.default_vid;
+
+	/* if we have an active VLAN, disable default VLAN ID */
+	if (test_bit(hw->mac.default_vid, interface->active_vlans))
+		ring->vid |= FM10K_VLAN_CLEAR;
+
+	/* Map interrupt */
+	if (ring->q_vector) {
+		rxint = ring->q_vector->v_idx + NON_Q_VECTORS;
+		rxint |= FM10K_INT_MAP_TIMER1;
+	}
+
+	fm10k_write_reg(hw, FM10K_RXINT(reg_idx), rxint);
+
+	/* enable queue */
+	rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
+	rxqctl |= FM10K_RXQCTL_ENABLE;
+	fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
+
+	/* place buffers on ring for receive data */
+	fm10k_alloc_rx_buffers(ring, fm10k_desc_unused(ring));
+}
+
+/**
+ * fm10k_update_rx_drop_en - Configures the drop enable bits for Rx rings
+ * @interface: board private structure
+ *
+ * Configure the drop enable bits for the Rx rings.
+ **/
+void fm10k_update_rx_drop_en(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	u8 rx_pause = interface->rx_pause;
+	int i;
+
+#ifdef CONFIG_DCB
+	if (interface->pfc_en)
+		rx_pause = interface->pfc_en;
+
+#endif
+	for (i = 0; i < interface->num_rx_queues; i++) {
+		struct fm10k_ring *ring = interface->rx_ring[i];
+		u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
+		u8 reg_idx = ring->reg_idx;
+
+		if (!(rx_pause & BIT(ring->qos_pc)))
+			rxdctl |= FM10K_RXDCTL_DROP_ON_EMPTY;
+
+		fm10k_write_reg(hw, FM10K_RXDCTL(reg_idx), rxdctl);
+	}
+}
+
+/**
+ * fm10k_configure_dglort - Configure Receive DGLORT after reset
+ * @interface: board private structure
+ *
+ * Configure the DGLORT description and RSS tables.
+ **/
+static void fm10k_configure_dglort(struct fm10k_intfc *interface)
+{
+	struct fm10k_dglort_cfg dglort = { 0 };
+	struct fm10k_hw *hw = &interface->hw;
+	int i;
+	u32 mrqc;
+
+	/* Fill out hash function seeds */
+	for (i = 0; i < FM10K_RSSRK_SIZE; i++)
+		fm10k_write_reg(hw, FM10K_RSSRK(0, i), interface->rssrk[i]);
+
+	/* Write RETA table to hardware */
+	for (i = 0; i < FM10K_RETA_SIZE; i++)
+		fm10k_write_reg(hw, FM10K_RETA(0, i), interface->reta[i]);
+
+	/* Generate RSS hash based on packet types, TCP/UDP
+	 * port numbers and/or IPv4/v6 src and dst addresses
+	 */
+	mrqc = FM10K_MRQC_IPV4 |
+	       FM10K_MRQC_TCP_IPV4 |
+	       FM10K_MRQC_IPV6 |
+	       FM10K_MRQC_TCP_IPV6;
+
+	if (test_bit(FM10K_FLAG_RSS_FIELD_IPV4_UDP, interface->flags))
+		mrqc |= FM10K_MRQC_UDP_IPV4;
+	if (test_bit(FM10K_FLAG_RSS_FIELD_IPV6_UDP, interface->flags))
+		mrqc |= FM10K_MRQC_UDP_IPV6;
+
+	fm10k_write_reg(hw, FM10K_MRQC(0), mrqc);
+
+	/* configure default DGLORT mapping for RSS/DCB */
+	dglort.inner_rss = 1;
+	dglort.rss_l = fls(interface->ring_feature[RING_F_RSS].mask);
+	dglort.pc_l = fls(interface->ring_feature[RING_F_QOS].mask);
+	hw->mac.ops.configure_dglort_map(hw, &dglort);
+
+	/* assign GLORT per queue for queue mapped testing */
+	if (interface->glort_count > 64) {
+		memset(&dglort, 0, sizeof(dglort));
+		dglort.inner_rss = 1;
+		dglort.glort = interface->glort + 64;
+		dglort.idx = fm10k_dglort_pf_queue;
+		dglort.queue_l = fls(interface->num_rx_queues - 1);
+		hw->mac.ops.configure_dglort_map(hw, &dglort);
+	}
+
+	/* assign glort value for RSS/DCB specific to this interface */
+	memset(&dglort, 0, sizeof(dglort));
+	dglort.inner_rss = 1;
+	dglort.glort = interface->glort;
+	dglort.rss_l = fls(interface->ring_feature[RING_F_RSS].mask);
+	dglort.pc_l = fls(interface->ring_feature[RING_F_QOS].mask);
+	/* configure DGLORT mapping for RSS/DCB */
+	dglort.idx = fm10k_dglort_pf_rss;
+	if (interface->l2_accel)
+		dglort.shared_l = fls(interface->l2_accel->size);
+	hw->mac.ops.configure_dglort_map(hw, &dglort);
+}
+
+/**
+ * fm10k_configure_rx - Configure Receive Unit after Reset
+ * @interface: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void fm10k_configure_rx(struct fm10k_intfc *interface)
+{
+	int i;
+
+	/* Configure SWPRI to PC map */
+	fm10k_configure_swpri_map(interface);
+
+	/* Configure RSS and DGLORT map */
+	fm10k_configure_dglort(interface);
+
+	/* Setup the HW Rx Head and Tail descriptor pointers */
+	for (i = 0; i < interface->num_rx_queues; i++)
+		fm10k_configure_rx_ring(interface, interface->rx_ring[i]);
+
+	/* possible poll here to verify that Rx rings are now enabled */
+}
+
+static void fm10k_napi_enable_all(struct fm10k_intfc *interface)
+{
+	struct fm10k_q_vector *q_vector;
+	int q_idx;
+
+	for (q_idx = 0; q_idx < interface->num_q_vectors; q_idx++) {
+		q_vector = interface->q_vector[q_idx];
+		napi_enable(&q_vector->napi);
+	}
+}
+
+static irqreturn_t fm10k_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct fm10k_q_vector *q_vector = data;
+
+	if (q_vector->rx.count || q_vector->tx.count)
+		napi_schedule_irqoff(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fm10k_msix_mbx_vf(int __always_unused irq, void *data)
+{
+	struct fm10k_intfc *interface = data;
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+
+	/* re-enable mailbox interrupt and indicate 20us delay */
+	fm10k_write_reg(hw, FM10K_VFITR(FM10K_MBX_VECTOR),
+			(FM10K_MBX_INT_DELAY >> hw->mac.itr_scale) |
+			FM10K_ITR_ENABLE);
+
+	/* service upstream mailbox */
+	if (fm10k_mbx_trylock(interface)) {
+		mbx->ops.process(hw, mbx);
+		fm10k_mbx_unlock(interface);
+	}
+
+	hw->mac.get_host_state = true;
+	fm10k_service_event_schedule(interface);
+
+	return IRQ_HANDLED;
+}
+
+#define FM10K_ERR_MSG(type) case (type): error = #type; break
+static void fm10k_handle_fault(struct fm10k_intfc *interface, int type,
+			       struct fm10k_fault *fault)
+{
+	struct pci_dev *pdev = interface->pdev;
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_iov_data *iov_data = interface->iov_data;
+	char *error;
+
+	switch (type) {
+	case FM10K_PCA_FAULT:
+		switch (fault->type) {
+		default:
+			error = "Unknown PCA error";
+			break;
+		FM10K_ERR_MSG(PCA_NO_FAULT);
+		FM10K_ERR_MSG(PCA_UNMAPPED_ADDR);
+		FM10K_ERR_MSG(PCA_BAD_QACCESS_PF);
+		FM10K_ERR_MSG(PCA_BAD_QACCESS_VF);
+		FM10K_ERR_MSG(PCA_MALICIOUS_REQ);
+		FM10K_ERR_MSG(PCA_POISONED_TLP);
+		FM10K_ERR_MSG(PCA_TLP_ABORT);
+		}
+		break;
+	case FM10K_THI_FAULT:
+		switch (fault->type) {
+		default:
+			error = "Unknown THI error";
+			break;
+		FM10K_ERR_MSG(THI_NO_FAULT);
+		FM10K_ERR_MSG(THI_MAL_DIS_Q_FAULT);
+		}
+		break;
+	case FM10K_FUM_FAULT:
+		switch (fault->type) {
+		default:
+			error = "Unknown FUM error";
+			break;
+		FM10K_ERR_MSG(FUM_NO_FAULT);
+		FM10K_ERR_MSG(FUM_UNMAPPED_ADDR);
+		FM10K_ERR_MSG(FUM_BAD_VF_QACCESS);
+		FM10K_ERR_MSG(FUM_ADD_DECODE_ERR);
+		FM10K_ERR_MSG(FUM_RO_ERROR);
+		FM10K_ERR_MSG(FUM_QPRC_CRC_ERROR);
+		FM10K_ERR_MSG(FUM_CSR_TIMEOUT);
+		FM10K_ERR_MSG(FUM_INVALID_TYPE);
+		FM10K_ERR_MSG(FUM_INVALID_LENGTH);
+		FM10K_ERR_MSG(FUM_INVALID_BE);
+		FM10K_ERR_MSG(FUM_INVALID_ALIGN);
+		}
+		break;
+	default:
+		error = "Undocumented fault";
+		break;
+	}
+
+	dev_warn(&pdev->dev,
+		 "%s Address: 0x%llx SpecInfo: 0x%x Func: %02x.%0x\n",
+		 error, fault->address, fault->specinfo,
+		 PCI_SLOT(fault->func), PCI_FUNC(fault->func));
+
+	/* For VF faults, clear out the respective LPORT, reset the queue
+	 * resources, and then reconnect to the mailbox. This allows the
+	 * VF in question to resume behavior. For transient faults that are
+	 * the result of non-malicious behavior this will log the fault and
+	 * allow the VF to resume functionality. Obviously for malicious VFs
+	 * they will be able to attempt malicious behavior again. In this
+	 * case, the system administrator will need to step in and manually
+	 * remove or disable the VF in question.
+	 */
+	if (fault->func && iov_data) {
+		int vf = fault->func - 1;
+		struct fm10k_vf_info *vf_info = &iov_data->vf_info[vf];
+
+		hw->iov.ops.reset_lport(hw, vf_info);
+		hw->iov.ops.reset_resources(hw, vf_info);
+
+		/* reset_lport disables the VF, so re-enable it */
+		hw->iov.ops.set_lport(hw, vf_info, vf,
+				      FM10K_VF_FLAG_MULTI_CAPABLE);
+
+		/* reset_resources will disconnect from the mbx  */
+		vf_info->mbx.ops.connect(hw, &vf_info->mbx);
+	}
+}
+
+static void fm10k_report_fault(struct fm10k_intfc *interface, u32 eicr)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_fault fault = { 0 };
+	int type, err;
+
+	for (eicr &= FM10K_EICR_FAULT_MASK, type = FM10K_PCA_FAULT;
+	     eicr;
+	     eicr >>= 1, type += FM10K_FAULT_SIZE) {
+		/* only check if there is an error reported */
+		if (!(eicr & 0x1))
+			continue;
+
+		/* retrieve fault info */
+		err = hw->mac.ops.get_fault(hw, type, &fault);
+		if (err) {
+			dev_err(&interface->pdev->dev,
+				"error reading fault\n");
+			continue;
+		}
+
+		fm10k_handle_fault(interface, type, &fault);
+	}
+}
+
+static void fm10k_reset_drop_on_empty(struct fm10k_intfc *interface, u32 eicr)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	const u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
+	u32 maxholdq;
+	int q;
+
+	if (!(eicr & FM10K_EICR_MAXHOLDTIME))
+		return;
+
+	maxholdq = fm10k_read_reg(hw, FM10K_MAXHOLDQ(7));
+	if (maxholdq)
+		fm10k_write_reg(hw, FM10K_MAXHOLDQ(7), maxholdq);
+	for (q = 255;;) {
+		if (maxholdq & BIT(31)) {
+			if (q < FM10K_MAX_QUEUES_PF) {
+				interface->rx_overrun_pf++;
+				fm10k_write_reg(hw, FM10K_RXDCTL(q), rxdctl);
+			} else {
+				interface->rx_overrun_vf++;
+			}
+		}
+
+		maxholdq *= 2;
+		if (!maxholdq)
+			q &= ~(32 - 1);
+
+		if (!q)
+			break;
+
+		if (q-- % 32)
+			continue;
+
+		maxholdq = fm10k_read_reg(hw, FM10K_MAXHOLDQ(q / 32));
+		if (maxholdq)
+			fm10k_write_reg(hw, FM10K_MAXHOLDQ(q / 32), maxholdq);
+	}
+}
+
+static irqreturn_t fm10k_msix_mbx_pf(int __always_unused irq, void *data)
+{
+	struct fm10k_intfc *interface = data;
+	struct fm10k_hw *hw = &interface->hw;
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 eicr;
+
+	/* unmask any set bits related to this interrupt */
+	eicr = fm10k_read_reg(hw, FM10K_EICR);
+	fm10k_write_reg(hw, FM10K_EICR, eicr & (FM10K_EICR_MAILBOX |
+						FM10K_EICR_SWITCHREADY |
+						FM10K_EICR_SWITCHNOTREADY));
+
+	/* report any faults found to the message log */
+	fm10k_report_fault(interface, eicr);
+
+	/* reset any queues disabled due to receiver overrun */
+	fm10k_reset_drop_on_empty(interface, eicr);
+
+	/* service mailboxes */
+	if (fm10k_mbx_trylock(interface)) {
+		s32 err = mbx->ops.process(hw, mbx);
+
+		if (err == FM10K_ERR_RESET_REQUESTED)
+			set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+
+		/* handle VFLRE events */
+		fm10k_iov_event(interface);
+		fm10k_mbx_unlock(interface);
+	}
+
+	/* if switch toggled state we should reset GLORTs */
+	if (eicr & FM10K_EICR_SWITCHNOTREADY) {
+		/* force link down for at least 4 seconds */
+		interface->link_down_event = jiffies + (4 * HZ);
+		set_bit(__FM10K_LINK_DOWN, interface->state);
+
+		/* reset dglort_map back to no config */
+		hw->mac.dglort_map = FM10K_DGLORTMAP_NONE;
+	}
+
+	/* we should validate host state after interrupt event */
+	hw->mac.get_host_state = true;
+
+	/* validate host state, and handle VF mailboxes in the service task */
+	fm10k_service_event_schedule(interface);
+
+	/* re-enable mailbox interrupt and indicate 20us delay */
+	fm10k_write_reg(hw, FM10K_ITR(FM10K_MBX_VECTOR),
+			(FM10K_MBX_INT_DELAY >> hw->mac.itr_scale) |
+			FM10K_ITR_ENABLE);
+
+	return IRQ_HANDLED;
+}
+
+void fm10k_mbx_free_irq(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	struct msix_entry *entry;
+	int itr_reg;
+
+	/* no mailbox IRQ to free if MSI-X is not enabled */
+	if (!interface->msix_entries)
+		return;
+
+	entry = &interface->msix_entries[FM10K_MBX_VECTOR];
+
+	/* disconnect the mailbox */
+	hw->mbx.ops.disconnect(hw, &hw->mbx);
+
+	/* disable Mailbox cause */
+	if (hw->mac.type == fm10k_mac_pf) {
+		fm10k_write_reg(hw, FM10K_EIMR,
+				FM10K_EIMR_DISABLE(PCA_FAULT) |
+				FM10K_EIMR_DISABLE(FUM_FAULT) |
+				FM10K_EIMR_DISABLE(MAILBOX) |
+				FM10K_EIMR_DISABLE(SWITCHREADY) |
+				FM10K_EIMR_DISABLE(SWITCHNOTREADY) |
+				FM10K_EIMR_DISABLE(SRAMERROR) |
+				FM10K_EIMR_DISABLE(VFLR) |
+				FM10K_EIMR_DISABLE(MAXHOLDTIME));
+		itr_reg = FM10K_ITR(FM10K_MBX_VECTOR);
+	} else {
+		itr_reg = FM10K_VFITR(FM10K_MBX_VECTOR);
+	}
+
+	fm10k_write_reg(hw, itr_reg, FM10K_ITR_MASK_SET);
+
+	free_irq(entry->vector, interface);
+}
+
+static s32 fm10k_mbx_mac_addr(struct fm10k_hw *hw, u32 **results,
+			      struct fm10k_mbx_info *mbx)
+{
+	bool vlan_override = hw->mac.vlan_override;
+	u16 default_vid = hw->mac.default_vid;
+	struct fm10k_intfc *interface;
+	s32 err;
+
+	err = fm10k_msg_mac_vlan_vf(hw, results, mbx);
+	if (err)
+		return err;
+
+	interface = container_of(hw, struct fm10k_intfc, hw);
+
+	/* MAC was changed so we need reset */
+	if (is_valid_ether_addr(hw->mac.perm_addr) &&
+	    !ether_addr_equal(hw->mac.perm_addr, hw->mac.addr))
+		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+
+	/* VLAN override was changed, or default VLAN changed */
+	if ((vlan_override != hw->mac.vlan_override) ||
+	    (default_vid != hw->mac.default_vid))
+		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+
+	return 0;
+}
+
+/* generic error handler for mailbox issues */
+static s32 fm10k_mbx_error(struct fm10k_hw *hw, u32 **results,
+			   struct fm10k_mbx_info __always_unused *mbx)
+{
+	struct fm10k_intfc *interface;
+	struct pci_dev *pdev;
+
+	interface = container_of(hw, struct fm10k_intfc, hw);
+	pdev = interface->pdev;
+
+	dev_err(&pdev->dev, "Unknown message ID %u\n",
+		**results & FM10K_TLV_ID_MASK);
+
+	return 0;
+}
+
+static const struct fm10k_msg_data vf_mbx_data[] = {
+	FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
+	FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_mbx_mac_addr),
+	FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
+	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_mbx_error),
+};
+
+static int fm10k_mbx_request_irq_vf(struct fm10k_intfc *interface)
+{
+	struct msix_entry *entry = &interface->msix_entries[FM10K_MBX_VECTOR];
+	struct net_device *dev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	/* Use timer0 for interrupt moderation on the mailbox */
+	u32 itr = entry->entry | FM10K_INT_MAP_TIMER0;
+
+	/* register mailbox handlers */
+	err = hw->mbx.ops.register_handlers(&hw->mbx, vf_mbx_data);
+	if (err)
+		return err;
+
+	/* request the IRQ */
+	err = request_irq(entry->vector, fm10k_msix_mbx_vf, 0,
+			  dev->name, interface);
+	if (err) {
+		netif_err(interface, probe, dev,
+			  "request_irq for msix_mbx failed: %d\n", err);
+		return err;
+	}
+
+	/* map all of the interrupt sources */
+	fm10k_write_reg(hw, FM10K_VFINT_MAP, itr);
+
+	/* enable interrupt */
+	fm10k_write_reg(hw, FM10K_VFITR(entry->entry), FM10K_ITR_ENABLE);
+
+	return 0;
+}
+
+static s32 fm10k_lport_map(struct fm10k_hw *hw, u32 **results,
+			   struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_intfc *interface;
+	u32 dglort_map = hw->mac.dglort_map;
+	s32 err;
+
+	interface = container_of(hw, struct fm10k_intfc, hw);
+
+	err = fm10k_msg_err_pf(hw, results, mbx);
+	if (!err && hw->swapi.status) {
+		/* force link down for a reasonable delay */
+		interface->link_down_event = jiffies + (2 * HZ);
+		set_bit(__FM10K_LINK_DOWN, interface->state);
+
+		/* reset dglort_map back to no config */
+		hw->mac.dglort_map = FM10K_DGLORTMAP_NONE;
+
+		fm10k_service_event_schedule(interface);
+
+		/* prevent overloading kernel message buffer */
+		if (interface->lport_map_failed)
+			return 0;
+
+		interface->lport_map_failed = true;
+
+		if (hw->swapi.status == FM10K_MSG_ERR_PEP_NOT_SCHEDULED)
+			dev_warn(&interface->pdev->dev,
+				 "cannot obtain link because the host interface is configured for a PCIe host interface bandwidth of zero\n");
+		dev_warn(&interface->pdev->dev,
+			 "request logical port map failed: %d\n",
+			 hw->swapi.status);
+
+		return 0;
+	}
+
+	err = fm10k_msg_lport_map_pf(hw, results, mbx);
+	if (err)
+		return err;
+
+	interface->lport_map_failed = false;
+
+	/* we need to reset if port count was just updated */
+	if (dglort_map != hw->mac.dglort_map)
+		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+
+	return 0;
+}
+
+static s32 fm10k_update_pvid(struct fm10k_hw *hw, u32 **results,
+			     struct fm10k_mbx_info __always_unused *mbx)
+{
+	struct fm10k_intfc *interface;
+	u16 glort, pvid;
+	u32 pvid_update;
+	s32 err;
+
+	err = fm10k_tlv_attr_get_u32(results[FM10K_PF_ATTR_ID_UPDATE_PVID],
+				     &pvid_update);
+	if (err)
+		return err;
+
+	/* extract values from the pvid update */
+	glort = FM10K_MSG_HDR_FIELD_GET(pvid_update, UPDATE_PVID_GLORT);
+	pvid = FM10K_MSG_HDR_FIELD_GET(pvid_update, UPDATE_PVID_PVID);
+
+	/* if glort is not valid return error */
+	if (!fm10k_glort_valid_pf(hw, glort))
+		return FM10K_ERR_PARAM;
+
+	/* verify VLAN ID is valid */
+	if (pvid >= FM10K_VLAN_TABLE_VID_MAX)
+		return FM10K_ERR_PARAM;
+
+	interface = container_of(hw, struct fm10k_intfc, hw);
+
+	/* check to see if this belongs to one of the VFs */
+	err = fm10k_iov_update_pvid(interface, glort, pvid);
+	if (!err)
+		return 0;
+
+	/* we need to reset if default VLAN was just updated */
+	if (pvid != hw->mac.default_vid)
+		set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
+
+	hw->mac.default_vid = pvid;
+
+	return 0;
+}
+
+static const struct fm10k_msg_data pf_mbx_data[] = {
+	FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
+	FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_lport_map),
+	FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_update_pvid),
+	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_mbx_error),
+};
+
+static int fm10k_mbx_request_irq_pf(struct fm10k_intfc *interface)
+{
+	struct msix_entry *entry = &interface->msix_entries[FM10K_MBX_VECTOR];
+	struct net_device *dev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	/* Use timer0 for interrupt moderation on the mailbox */
+	u32 mbx_itr = entry->entry | FM10K_INT_MAP_TIMER0;
+	u32 other_itr = entry->entry | FM10K_INT_MAP_IMMEDIATE;
+
+	/* register mailbox handlers */
+	err = hw->mbx.ops.register_handlers(&hw->mbx, pf_mbx_data);
+	if (err)
+		return err;
+
+	/* request the IRQ */
+	err = request_irq(entry->vector, fm10k_msix_mbx_pf, 0,
+			  dev->name, interface);
+	if (err) {
+		netif_err(interface, probe, dev,
+			  "request_irq for msix_mbx failed: %d\n", err);
+		return err;
+	}
+
+	/* Enable interrupts w/ no moderation for "other" interrupts */
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_pcie_fault), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_switch_up_down), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_sram), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_max_hold_time), other_itr);
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_vflr), other_itr);
+
+	/* Enable interrupts w/ moderation for mailbox */
+	fm10k_write_reg(hw, FM10K_INT_MAP(fm10k_int_mailbox), mbx_itr);
+
+	/* Enable individual interrupt causes */
+	fm10k_write_reg(hw, FM10K_EIMR, FM10K_EIMR_ENABLE(PCA_FAULT) |
+					FM10K_EIMR_ENABLE(FUM_FAULT) |
+					FM10K_EIMR_ENABLE(MAILBOX) |
+					FM10K_EIMR_ENABLE(SWITCHREADY) |
+					FM10K_EIMR_ENABLE(SWITCHNOTREADY) |
+					FM10K_EIMR_ENABLE(SRAMERROR) |
+					FM10K_EIMR_ENABLE(VFLR) |
+					FM10K_EIMR_ENABLE(MAXHOLDTIME));
+
+	/* enable interrupt */
+	fm10k_write_reg(hw, FM10K_ITR(entry->entry), FM10K_ITR_ENABLE);
+
+	return 0;
+}
+
+int fm10k_mbx_request_irq(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	/* enable Mailbox cause */
+	if (hw->mac.type == fm10k_mac_pf)
+		err = fm10k_mbx_request_irq_pf(interface);
+	else
+		err = fm10k_mbx_request_irq_vf(interface);
+	if (err)
+		return err;
+
+	/* connect mailbox */
+	err = hw->mbx.ops.connect(hw, &hw->mbx);
+
+	/* if the mailbox failed to connect, then free IRQ */
+	if (err)
+		fm10k_mbx_free_irq(interface);
+
+	return err;
+}
+
+/**
+ * fm10k_qv_free_irq - release interrupts associated with queue vectors
+ * @interface: board private structure
+ *
+ * Release all interrupts associated with this interface
+ **/
+void fm10k_qv_free_irq(struct fm10k_intfc *interface)
+{
+	int vector = interface->num_q_vectors;
+	struct msix_entry *entry;
+
+	entry = &interface->msix_entries[NON_Q_VECTORS + vector];
+
+	while (vector) {
+		struct fm10k_q_vector *q_vector;
+
+		vector--;
+		entry--;
+		q_vector = interface->q_vector[vector];
+
+		if (!q_vector->tx.count && !q_vector->rx.count)
+			continue;
+
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(entry->vector, NULL);
+
+		/* disable interrupts */
+		writel(FM10K_ITR_MASK_SET, q_vector->itr);
+
+		free_irq(entry->vector, q_vector);
+	}
+}
+
+/**
+ * fm10k_qv_request_irq - initialize interrupts for queue vectors
+ * @interface: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+int fm10k_qv_request_irq(struct fm10k_intfc *interface)
+{
+	struct net_device *dev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	struct msix_entry *entry;
+	unsigned int ri = 0, ti = 0;
+	int vector, err;
+
+	entry = &interface->msix_entries[NON_Q_VECTORS];
+
+	for (vector = 0; vector < interface->num_q_vectors; vector++) {
+		struct fm10k_q_vector *q_vector = interface->q_vector[vector];
+
+		/* name the vector */
+		if (q_vector->tx.count && q_vector->rx.count) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-TxRx-%u", dev->name, ri++);
+			ti++;
+		} else if (q_vector->rx.count) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-rx-%u", dev->name, ri++);
+		} else if (q_vector->tx.count) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-tx-%u", dev->name, ti++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+
+		/* Assign ITR register to q_vector */
+		q_vector->itr = (hw->mac.type == fm10k_mac_pf) ?
+				&interface->uc_addr[FM10K_ITR(entry->entry)] :
+				&interface->uc_addr[FM10K_VFITR(entry->entry)];
+
+		/* request the IRQ */
+		err = request_irq(entry->vector, &fm10k_msix_clean_rings, 0,
+				  q_vector->name, q_vector);
+		if (err) {
+			netif_err(interface, probe, dev,
+				  "request_irq failed for MSIX interrupt Error: %d\n",
+				  err);
+			goto err_out;
+		}
+
+		/* assign the mask for this irq */
+		irq_set_affinity_hint(entry->vector, &q_vector->affinity_mask);
+
+		/* Enable q_vector */
+		writel(FM10K_ITR_ENABLE, q_vector->itr);
+
+		entry++;
+	}
+
+	return 0;
+
+err_out:
+	/* wind through the ring freeing all entries and vectors */
+	while (vector) {
+		struct fm10k_q_vector *q_vector;
+
+		entry--;
+		vector--;
+		q_vector = interface->q_vector[vector];
+
+		if (!q_vector->tx.count && !q_vector->rx.count)
+			continue;
+
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(entry->vector, NULL);
+
+		/* disable interrupts */
+		writel(FM10K_ITR_MASK_SET, q_vector->itr);
+
+		free_irq(entry->vector, q_vector);
+	}
+
+	return err;
+}
+
+void fm10k_up(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+
+	/* Enable Tx/Rx DMA */
+	hw->mac.ops.start_hw(hw);
+
+	/* configure Tx descriptor rings */
+	fm10k_configure_tx(interface);
+
+	/* configure Rx descriptor rings */
+	fm10k_configure_rx(interface);
+
+	/* configure interrupts */
+	hw->mac.ops.update_int_moderator(hw);
+
+	/* enable statistics capture again */
+	clear_bit(__FM10K_UPDATING_STATS, interface->state);
+
+	/* clear down bit to indicate we are ready to go */
+	clear_bit(__FM10K_DOWN, interface->state);
+
+	/* enable polling cleanups */
+	fm10k_napi_enable_all(interface);
+
+	/* re-establish Rx filters */
+	fm10k_restore_rx_state(interface);
+
+	/* enable transmits */
+	netif_tx_start_all_queues(interface->netdev);
+
+	/* kick off the service timer now */
+	hw->mac.get_host_state = true;
+	mod_timer(&interface->service_timer, jiffies);
+}
+
+static void fm10k_napi_disable_all(struct fm10k_intfc *interface)
+{
+	struct fm10k_q_vector *q_vector;
+	int q_idx;
+
+	for (q_idx = 0; q_idx < interface->num_q_vectors; q_idx++) {
+		q_vector = interface->q_vector[q_idx];
+		napi_disable(&q_vector->napi);
+	}
+}
+
+void fm10k_down(struct fm10k_intfc *interface)
+{
+	struct net_device *netdev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int err, i = 0, count = 0;
+
+	/* signal that we are down to the interrupt handler and service task */
+	if (test_and_set_bit(__FM10K_DOWN, interface->state))
+		return;
+
+	/* call carrier off first to avoid false dev_watchdog timeouts */
+	netif_carrier_off(netdev);
+
+	/* disable transmits */
+	netif_tx_stop_all_queues(netdev);
+	netif_tx_disable(netdev);
+
+	/* reset Rx filters */
+	fm10k_reset_rx_state(interface);
+
+	/* disable polling routines */
+	fm10k_napi_disable_all(interface);
+
+	/* capture stats one last time before stopping interface */
+	fm10k_update_stats(interface);
+
+	/* prevent updating statistics while we're down */
+	while (test_and_set_bit(__FM10K_UPDATING_STATS, interface->state))
+		usleep_range(1000, 2000);
+
+	/* skip waiting for TX DMA if we lost PCIe link */
+	if (FM10K_REMOVED(hw->hw_addr))
+		goto skip_tx_dma_drain;
+
+	/* In some rare circumstances it can take a while for Tx queues to
+	 * quiesce and be fully disabled. Attempt to .stop_hw() first, and
+	 * then if we get ERR_REQUESTS_PENDING, go ahead and wait in a loop
+	 * until the Tx queues have emptied, or until a number of retries. If
+	 * we fail to clear within the retry loop, we will issue a warning
+	 * indicating that Tx DMA is probably hung. Note this means we call
+	 * .stop_hw() twice but this shouldn't cause any problems.
+	 */
+	err = hw->mac.ops.stop_hw(hw);
+	if (err != FM10K_ERR_REQUESTS_PENDING)
+		goto skip_tx_dma_drain;
+
+#define TX_DMA_DRAIN_RETRIES 25
+	for (count = 0; count < TX_DMA_DRAIN_RETRIES; count++) {
+		usleep_range(10000, 20000);
+
+		/* start checking at the last ring to have pending Tx */
+		for (; i < interface->num_tx_queues; i++)
+			if (fm10k_get_tx_pending(interface->tx_ring[i], false))
+				break;
+
+		/* if all the queues are drained, we can break now */
+		if (i == interface->num_tx_queues)
+			break;
+	}
+
+	if (count >= TX_DMA_DRAIN_RETRIES)
+		dev_err(&interface->pdev->dev,
+			"Tx queues failed to drain after %d tries. Tx DMA is probably hung.\n",
+			count);
+skip_tx_dma_drain:
+	/* Disable DMA engine for Tx/Rx */
+	err = hw->mac.ops.stop_hw(hw);
+	if (err == FM10K_ERR_REQUESTS_PENDING)
+		dev_err(&interface->pdev->dev,
+			"due to pending requests hw was not shut down gracefully\n");
+	else if (err)
+		dev_err(&interface->pdev->dev, "stop_hw failed: %d\n", err);
+
+	/* free any buffers still on the rings */
+	fm10k_clean_all_tx_rings(interface);
+	fm10k_clean_all_rx_rings(interface);
+}
+
+/**
+ * fm10k_sw_init - Initialize general software structures
+ * @interface: host interface private structure to initialize
+ * @ent: PCI device ID entry
+ *
+ * fm10k_sw_init initializes the interface private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int fm10k_sw_init(struct fm10k_intfc *interface,
+			 const struct pci_device_id *ent)
+{
+	const struct fm10k_info *fi = fm10k_info_tbl[ent->driver_data];
+	struct fm10k_hw *hw = &interface->hw;
+	struct pci_dev *pdev = interface->pdev;
+	struct net_device *netdev = interface->netdev;
+	u32 rss_key[FM10K_RSSRK_SIZE];
+	unsigned int rss;
+	int err;
+
+	/* initialize back pointer */
+	hw->back = interface;
+	hw->hw_addr = interface->uc_addr;
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->revision_id = pdev->revision;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	/* Setup hw api */
+	memcpy(&hw->mac.ops, fi->mac_ops, sizeof(hw->mac.ops));
+	hw->mac.type = fi->mac;
+
+	/* Setup IOV handlers */
+	if (fi->iov_ops)
+		memcpy(&hw->iov.ops, fi->iov_ops, sizeof(hw->iov.ops));
+
+	/* Set common capability flags and settings */
+	rss = min_t(int, FM10K_MAX_RSS_INDICES, num_online_cpus());
+	interface->ring_feature[RING_F_RSS].limit = rss;
+	fi->get_invariants(hw);
+
+	/* pick up the PCIe bus settings for reporting later */
+	if (hw->mac.ops.get_bus_info)
+		hw->mac.ops.get_bus_info(hw);
+
+	/* limit the usable DMA range */
+	if (hw->mac.ops.set_dma_mask)
+		hw->mac.ops.set_dma_mask(hw, dma_get_mask(&pdev->dev));
+
+	/* update netdev with DMA restrictions */
+	if (dma_get_mask(&pdev->dev) > DMA_BIT_MASK(32)) {
+		netdev->features |= NETIF_F_HIGHDMA;
+		netdev->vlan_features |= NETIF_F_HIGHDMA;
+	}
+
+	/* reset and initialize the hardware so it is in a known state */
+	err = hw->mac.ops.reset_hw(hw);
+	if (err) {
+		dev_err(&pdev->dev, "reset_hw failed: %d\n", err);
+		return err;
+	}
+
+	err = hw->mac.ops.init_hw(hw);
+	if (err) {
+		dev_err(&pdev->dev, "init_hw failed: %d\n", err);
+		return err;
+	}
+
+	/* initialize hardware statistics */
+	hw->mac.ops.update_hw_stats(hw, &interface->stats);
+
+	/* Set upper limit on IOV VFs that can be allocated */
+	pci_sriov_set_totalvfs(pdev, hw->iov.total_vfs);
+
+	/* Start with random Ethernet address */
+	eth_random_addr(hw->mac.addr);
+
+	/* Initialize MAC address from hardware */
+	err = hw->mac.ops.read_mac_addr(hw);
+	if (err) {
+		dev_warn(&pdev->dev,
+			 "Failed to obtain MAC address defaulting to random\n");
+		/* tag address assignment as random */
+		netdev->addr_assign_type |= NET_ADDR_RANDOM;
+	}
+
+	eth_hw_addr_set(netdev, hw->mac.addr);
+	ether_addr_copy(netdev->perm_addr, hw->mac.addr);
+
+	if (!is_valid_ether_addr(netdev->perm_addr)) {
+		dev_err(&pdev->dev, "Invalid MAC Address\n");
+		return -EIO;
+	}
+
+	/* initialize DCBNL interface */
+	fm10k_dcbnl_set_ops(netdev);
+
+	/* set default ring sizes */
+	interface->tx_ring_count = FM10K_DEFAULT_TXD;
+	interface->rx_ring_count = FM10K_DEFAULT_RXD;
+
+	/* set default interrupt moderation */
+	interface->tx_itr = FM10K_TX_ITR_DEFAULT;
+	interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT;
+
+	/* Initialize the MAC/VLAN queue */
+	INIT_LIST_HEAD(&interface->macvlan_requests);
+
+	netdev_rss_key_fill(rss_key, sizeof(rss_key));
+	memcpy(interface->rssrk, rss_key, sizeof(rss_key));
+
+	/* Initialize the mailbox lock */
+	spin_lock_init(&interface->mbx_lock);
+	spin_lock_init(&interface->macvlan_lock);
+
+	/* Start off interface as being down */
+	set_bit(__FM10K_DOWN, interface->state);
+	set_bit(__FM10K_UPDATING_STATS, interface->state);
+
+	return 0;
+}
+
+/**
+ * fm10k_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in fm10k_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * fm10k_probe initializes an interface identified by a pci_dev structure.
+ * The OS initialization, configuring of the interface private structure,
+ * and a hardware reset occur.
+ **/
+static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct fm10k_intfc *interface;
+	int err;
+
+	if (pdev->error_state != pci_channel_io_normal) {
+		dev_err(&pdev->dev,
+			"PCI device still in an error state. Unable to load...\n");
+		return -EIO;
+	}
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"PCI enable device failed: %d\n", err);
+		return err;
+	}
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
+	if (err)
+		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(&pdev->dev,
+			"DMA configuration failed: %d\n", err);
+		goto err_dma;
+	}
+
+	err = pci_request_mem_regions(pdev, fm10k_driver_name);
+	if (err) {
+		dev_err(&pdev->dev,
+			"pci_request_selected_regions failed: %d\n", err);
+		goto err_pci_reg;
+	}
+
+	pci_set_master(pdev);
+	pci_save_state(pdev);
+
+	netdev = fm10k_alloc_netdev(fm10k_info_tbl[ent->driver_data]);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_alloc_netdev;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	interface = netdev_priv(netdev);
+	pci_set_drvdata(pdev, interface);
+
+	interface->netdev = netdev;
+	interface->pdev = pdev;
+
+	interface->uc_addr = ioremap(pci_resource_start(pdev, 0),
+				     FM10K_UC_ADDR_SIZE);
+	if (!interface->uc_addr) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+
+	err = fm10k_sw_init(interface, ent);
+	if (err)
+		goto err_sw_init;
+
+	/* enable debugfs support */
+	fm10k_dbg_intfc_init(interface);
+
+	err = fm10k_init_queueing_scheme(interface);
+	if (err)
+		goto err_sw_init;
+
+	/* the mbx interrupt might attempt to schedule the service task, so we
+	 * must ensure it is disabled since we haven't yet requested the timer
+	 * or work item.
+	 */
+	set_bit(__FM10K_SERVICE_DISABLE, interface->state);
+
+	err = fm10k_mbx_request_irq(interface);
+	if (err)
+		goto err_mbx_interrupt;
+
+	/* final check of hardware state before registering the interface */
+	err = fm10k_hw_ready(interface);
+	if (err)
+		goto err_register;
+
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	/* stop all the transmit queues from transmitting until link is up */
+	netif_tx_stop_all_queues(netdev);
+
+	/* Initialize service timer and service task late in order to avoid
+	 * cleanup issues.
+	 */
+	timer_setup(&interface->service_timer, fm10k_service_timer, 0);
+	INIT_WORK(&interface->service_task, fm10k_service_task);
+
+	/* Setup the MAC/VLAN queue */
+	INIT_DELAYED_WORK(&interface->macvlan_task, fm10k_macvlan_task);
+
+	/* kick off service timer now, even when interface is down */
+	mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
+
+	/* print warning for non-optimal configurations */
+	pcie_print_link_status(interface->pdev);
+
+	/* report MAC address for logging */
+	dev_info(&pdev->dev, "%pM\n", netdev->dev_addr);
+
+	/* enable SR-IOV after registering netdev to enforce PF/VF ordering */
+	fm10k_iov_configure(pdev, 0);
+
+	/* clear the service task disable bit and kick off service task */
+	clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
+	fm10k_service_event_schedule(interface);
+
+	return 0;
+
+err_register:
+	fm10k_mbx_free_irq(interface);
+err_mbx_interrupt:
+	fm10k_clear_queueing_scheme(interface);
+err_sw_init:
+	if (interface->sw_addr)
+		iounmap(interface->sw_addr);
+	iounmap(interface->uc_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_netdev:
+	pci_release_mem_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * fm10k_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * fm10k_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void fm10k_remove(struct pci_dev *pdev)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct net_device *netdev = interface->netdev;
+
+	del_timer_sync(&interface->service_timer);
+
+	fm10k_stop_service_event(interface);
+	fm10k_stop_macvlan_task(interface);
+
+	/* Remove all pending MAC/VLAN requests */
+	fm10k_clear_macvlan_queue(interface, interface->glort, true);
+
+	/* free netdev, this may bounce the interrupts due to setup_tc */
+	if (netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(netdev);
+
+	/* release VFs */
+	fm10k_iov_disable(pdev);
+
+	/* disable mailbox interrupt */
+	fm10k_mbx_free_irq(interface);
+
+	/* free interrupts */
+	fm10k_clear_queueing_scheme(interface);
+
+	/* remove any debugfs interfaces */
+	fm10k_dbg_intfc_exit(interface);
+
+	if (interface->sw_addr)
+		iounmap(interface->sw_addr);
+	iounmap(interface->uc_addr);
+
+	free_netdev(netdev);
+
+	pci_release_mem_regions(pdev);
+
+	pci_disable_device(pdev);
+}
+
+static void fm10k_prepare_suspend(struct fm10k_intfc *interface)
+{
+	/* the watchdog task reads from registers, which might appear like
+	 * a surprise remove if the PCIe device is disabled while we're
+	 * stopped. We stop the watchdog task until after we resume software
+	 * activity.
+	 *
+	 * Note that the MAC/VLAN task will be stopped as part of preparing
+	 * for reset so we don't need to handle it here.
+	 */
+	fm10k_stop_service_event(interface);
+
+	if (fm10k_prepare_for_reset(interface))
+		set_bit(__FM10K_RESET_SUSPENDED, interface->state);
+}
+
+static int fm10k_handle_resume(struct fm10k_intfc *interface)
+{
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	/* Even if we didn't properly prepare for reset in
+	 * fm10k_prepare_suspend, we'll attempt to resume anyways.
+	 */
+	if (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state))
+		dev_warn(&interface->pdev->dev,
+			 "Device was shut down as part of suspend... Attempting to recover\n");
+
+	/* reset statistics starting values */
+	hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
+
+	err = fm10k_handle_reset(interface);
+	if (err)
+		return err;
+
+	/* assume host is not ready, to prevent race with watchdog in case we
+	 * actually don't have connection to the switch
+	 */
+	interface->host_ready = false;
+	fm10k_watchdog_host_not_ready(interface);
+
+	/* force link to stay down for a second to prevent link flutter */
+	interface->link_down_event = jiffies + (HZ);
+	set_bit(__FM10K_LINK_DOWN, interface->state);
+
+	/* restart the service task */
+	fm10k_start_service_event(interface);
+
+	/* Restart the MAC/VLAN request queue in-case of outstanding events */
+	fm10k_macvlan_schedule(interface);
+
+	return 0;
+}
+
+/**
+ * fm10k_resume - Generic PM resume hook
+ * @dev: generic device structure
+ *
+ * Generic PM hook used when waking the device from a low power state after
+ * suspend or hibernation. This function does not need to handle lower PCIe
+ * device state as the stack takes care of that for us.
+ **/
+static int __maybe_unused fm10k_resume(struct device *dev)
+{
+	struct fm10k_intfc *interface = dev_get_drvdata(dev);
+	struct net_device *netdev = interface->netdev;
+	struct fm10k_hw *hw = &interface->hw;
+	int err;
+
+	/* refresh hw_addr in case it was dropped */
+	hw->hw_addr = interface->uc_addr;
+
+	err = fm10k_handle_resume(interface);
+	if (err)
+		return err;
+
+	netif_device_attach(netdev);
+
+	return 0;
+}
+
+/**
+ * fm10k_suspend - Generic PM suspend hook
+ * @dev: generic device structure
+ *
+ * Generic PM hook used when setting the device into a low power state for
+ * system suspend or hibernation. This function does not need to handle lower
+ * PCIe device state as the stack takes care of that for us.
+ **/
+static int __maybe_unused fm10k_suspend(struct device *dev)
+{
+	struct fm10k_intfc *interface = dev_get_drvdata(dev);
+	struct net_device *netdev = interface->netdev;
+
+	netif_device_detach(netdev);
+
+	fm10k_prepare_suspend(interface);
+
+	return 0;
+}
+
+/**
+ * fm10k_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t fm10k_io_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t state)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct net_device *netdev = interface->netdev;
+
+	netif_device_detach(netdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	fm10k_prepare_suspend(interface);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * fm10k_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ */
+static pci_ers_result_t fm10k_io_slot_reset(struct pci_dev *pdev)
+{
+	pci_ers_result_t result;
+
+	if (pci_reenable_device(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot re-enable PCI device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+
+		/* After second error pci->state_saved is false, this
+		 * resets it so EEH doesn't break.
+		 */
+		pci_save_state(pdev);
+
+		pci_wake_from_d3(pdev, false);
+
+		result = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	return result;
+}
+
+/**
+ * fm10k_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation.
+ */
+static void fm10k_io_resume(struct pci_dev *pdev)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	struct net_device *netdev = interface->netdev;
+	int err;
+
+	err = fm10k_handle_resume(interface);
+
+	if (err)
+		dev_warn(&pdev->dev,
+			 "%s failed: %d\n", __func__, err);
+	else
+		netif_device_attach(netdev);
+}
+
+/**
+ * fm10k_io_reset_prepare - called when PCI function is about to be reset
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the PCI function is about to be reset,
+ * allowing the device driver to prepare for it.
+ */
+static void fm10k_io_reset_prepare(struct pci_dev *pdev)
+{
+	/* warn incase we have any active VF devices */
+	if (pci_num_vf(pdev))
+		dev_warn(&pdev->dev,
+			 "PCIe FLR may cause issues for any active VF devices\n");
+	fm10k_prepare_suspend(pci_get_drvdata(pdev));
+}
+
+/**
+ * fm10k_io_reset_done - called when PCI function has finished resetting
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called just after the PCI function is reset, such as via
+ * /sys/class/net/<enpX>/device/reset or similar.
+ */
+static void fm10k_io_reset_done(struct pci_dev *pdev)
+{
+	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+	int err = fm10k_handle_resume(interface);
+
+	if (err) {
+		dev_warn(&pdev->dev,
+			 "%s failed: %d\n", __func__, err);
+		netif_device_detach(interface->netdev);
+	}
+}
+
+static const struct pci_error_handlers fm10k_err_handler = {
+	.error_detected = fm10k_io_error_detected,
+	.slot_reset = fm10k_io_slot_reset,
+	.resume = fm10k_io_resume,
+	.reset_prepare = fm10k_io_reset_prepare,
+	.reset_done = fm10k_io_reset_done,
+};
+
+static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume);
+
+static struct pci_driver fm10k_driver = {
+	.name			= fm10k_driver_name,
+	.id_table		= fm10k_pci_tbl,
+	.probe			= fm10k_probe,
+	.remove			= fm10k_remove,
+	.driver = {
+		.pm		= &fm10k_pm_ops,
+	},
+	.sriov_configure	= fm10k_iov_configure,
+	.err_handler		= &fm10k_err_handler
+};
+
+/**
+ * fm10k_register_pci_driver - register driver interface
+ *
+ * This function is called on module load in order to register the driver.
+ **/
+int fm10k_register_pci_driver(void)
+{
+	return pci_register_driver(&fm10k_driver);
+}
+
+/**
+ * fm10k_unregister_pci_driver - unregister driver interface
+ *
+ * This function is called on module unload in order to remove the driver.
+ **/
+void fm10k_unregister_pci_driver(void)
+{
+	pci_unregister_driver(&fm10k_driver);
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
new file mode 100644
index 0000000000..af1b0cde36
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -0,0 +1,1825 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include "fm10k_pf.h"
+#include "fm10k_vf.h"
+
+/**
+ *  fm10k_reset_hw_pf - PF hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  This function should return the hardware to a state similar to the
+ *  one it is in after being powered on.
+ **/
+static s32 fm10k_reset_hw_pf(struct fm10k_hw *hw)
+{
+	s32 err;
+	u32 reg;
+	u16 i;
+
+	/* Disable interrupts */
+	fm10k_write_reg(hw, FM10K_EIMR, FM10K_EIMR_DISABLE(ALL));
+
+	/* Lock ITR2 reg 0 into itself and disable interrupt moderation */
+	fm10k_write_reg(hw, FM10K_ITR2(0), 0);
+	fm10k_write_reg(hw, FM10K_INT_CTRL, 0);
+
+	/* We assume here Tx and Rx queue 0 are owned by the PF */
+
+	/* Shut off VF access to their queues forcing them to queue 0 */
+	for (i = 0; i < FM10K_TQMAP_TABLE_SIZE; i++) {
+		fm10k_write_reg(hw, FM10K_TQMAP(i), 0);
+		fm10k_write_reg(hw, FM10K_RQMAP(i), 0);
+	}
+
+	/* shut down all rings */
+	err = fm10k_disable_queues_generic(hw, FM10K_MAX_QUEUES);
+	if (err == FM10K_ERR_REQUESTS_PENDING) {
+		hw->mac.reset_while_pending++;
+		goto force_reset;
+	} else if (err) {
+		return err;
+	}
+
+	/* Verify that DMA is no longer active */
+	reg = fm10k_read_reg(hw, FM10K_DMA_CTRL);
+	if (reg & (FM10K_DMA_CTRL_TX_ACTIVE | FM10K_DMA_CTRL_RX_ACTIVE))
+		return FM10K_ERR_DMA_PENDING;
+
+force_reset:
+	/* Inititate data path reset */
+	reg = FM10K_DMA_CTRL_DATAPATH_RESET;
+	fm10k_write_reg(hw, FM10K_DMA_CTRL, reg);
+
+	/* Flush write and allow 100us for reset to complete */
+	fm10k_write_flush(hw);
+	udelay(FM10K_RESET_TIMEOUT);
+
+	/* Verify we made it out of reset */
+	reg = fm10k_read_reg(hw, FM10K_IP);
+	if (!(reg & FM10K_IP_NOTINRESET))
+		return FM10K_ERR_RESET_FAILED;
+
+	return 0;
+}
+
+/**
+ *  fm10k_is_ari_hierarchy_pf - Indicate ARI hierarchy support
+ *  @hw: pointer to hardware structure
+ *
+ *  Looks at the ARI hierarchy bit to determine whether ARI is supported or not.
+ **/
+static bool fm10k_is_ari_hierarchy_pf(struct fm10k_hw *hw)
+{
+	u16 sriov_ctrl = fm10k_read_pci_cfg_word(hw, FM10K_PCIE_SRIOV_CTRL);
+
+	return !!(sriov_ctrl & FM10K_PCIE_SRIOV_CTRL_VFARI);
+}
+
+/**
+ *  fm10k_init_hw_pf - PF hardware initialization
+ *  @hw: pointer to hardware structure
+ *
+ **/
+static s32 fm10k_init_hw_pf(struct fm10k_hw *hw)
+{
+	u32 dma_ctrl, txqctl;
+	u16 i;
+
+	/* Establish default VSI as valid */
+	fm10k_write_reg(hw, FM10K_DGLORTDEC(fm10k_dglort_default), 0);
+	fm10k_write_reg(hw, FM10K_DGLORTMAP(fm10k_dglort_default),
+			FM10K_DGLORTMAP_ANY);
+
+	/* Invalidate all other GLORT entries */
+	for (i = 1; i < FM10K_DGLORT_COUNT; i++)
+		fm10k_write_reg(hw, FM10K_DGLORTMAP(i), FM10K_DGLORTMAP_NONE);
+
+	/* reset ITR2(0) to point to itself */
+	fm10k_write_reg(hw, FM10K_ITR2(0), 0);
+
+	/* reset VF ITR2(0) to point to 0 avoid PF registers */
+	fm10k_write_reg(hw, FM10K_ITR2(FM10K_ITR_REG_COUNT_PF), 0);
+
+	/* loop through all PF ITR2 registers pointing them to the previous */
+	for (i = 1; i < FM10K_ITR_REG_COUNT_PF; i++)
+		fm10k_write_reg(hw, FM10K_ITR2(i), i - 1);
+
+	/* Enable interrupt moderator if not already enabled */
+	fm10k_write_reg(hw, FM10K_INT_CTRL, FM10K_INT_CTRL_ENABLEMODERATOR);
+
+	/* compute the default txqctl configuration */
+	txqctl = FM10K_TXQCTL_PF | FM10K_TXQCTL_UNLIMITED_BW |
+		 (hw->mac.default_vid << FM10K_TXQCTL_VID_SHIFT);
+
+	for (i = 0; i < FM10K_MAX_QUEUES; i++) {
+		/* configure rings for 256 Queue / 32 Descriptor cache mode */
+		fm10k_write_reg(hw, FM10K_TQDLOC(i),
+				(i * FM10K_TQDLOC_BASE_32_DESC) |
+				FM10K_TQDLOC_SIZE_32_DESC);
+		fm10k_write_reg(hw, FM10K_TXQCTL(i), txqctl);
+
+		/* configure rings to provide TPH processing hints */
+		fm10k_write_reg(hw, FM10K_TPH_TXCTRL(i),
+				FM10K_TPH_TXCTRL_DESC_TPHEN |
+				FM10K_TPH_TXCTRL_DESC_RROEN |
+				FM10K_TPH_TXCTRL_DESC_WROEN |
+				FM10K_TPH_TXCTRL_DATA_RROEN);
+		fm10k_write_reg(hw, FM10K_TPH_RXCTRL(i),
+				FM10K_TPH_RXCTRL_DESC_TPHEN |
+				FM10K_TPH_RXCTRL_DESC_RROEN |
+				FM10K_TPH_RXCTRL_DATA_WROEN |
+				FM10K_TPH_RXCTRL_HDR_WROEN);
+	}
+
+	/* set max hold interval to align with 1.024 usec in all modes and
+	 * store ITR scale
+	 */
+	switch (hw->bus.speed) {
+	case fm10k_bus_speed_2500:
+		dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN1;
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN1;
+		break;
+	case fm10k_bus_speed_5000:
+		dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN2;
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN2;
+		break;
+	case fm10k_bus_speed_8000:
+		dma_ctrl = FM10K_DMA_CTRL_MAX_HOLD_1US_GEN3;
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3;
+		break;
+	default:
+		dma_ctrl = 0;
+		/* just in case, assume Gen3 ITR scale */
+		hw->mac.itr_scale = FM10K_TDLEN_ITR_SCALE_GEN3;
+		break;
+	}
+
+	/* Configure TSO flags */
+	fm10k_write_reg(hw, FM10K_DTXTCPFLGL, FM10K_TSO_FLAGS_LOW);
+	fm10k_write_reg(hw, FM10K_DTXTCPFLGH, FM10K_TSO_FLAGS_HI);
+
+	/* Enable DMA engine
+	 * Set Rx Descriptor size to 32
+	 * Set Minimum MSS to 64
+	 * Set Maximum number of Rx queues to 256 / 32 Descriptor
+	 */
+	dma_ctrl |= FM10K_DMA_CTRL_TX_ENABLE | FM10K_DMA_CTRL_RX_ENABLE |
+		    FM10K_DMA_CTRL_RX_DESC_SIZE | FM10K_DMA_CTRL_MINMSS_64 |
+		    FM10K_DMA_CTRL_32_DESC;
+
+	fm10k_write_reg(hw, FM10K_DMA_CTRL, dma_ctrl);
+
+	/* record maximum queue count, we limit ourselves to 128 */
+	hw->mac.max_queues = FM10K_MAX_QUEUES_PF;
+
+	/* We support either 64 VFs or 7 VFs depending on if we have ARI */
+	hw->iov.total_vfs = fm10k_is_ari_hierarchy_pf(hw) ? 64 : 7;
+
+	return 0;
+}
+
+/**
+ *  fm10k_update_vlan_pf - Update status of VLAN ID in VLAN filter table
+ *  @hw: pointer to hardware structure
+ *  @vid: VLAN ID to add to table
+ *  @vsi: Index indicating VF ID or PF ID in table
+ *  @set: Indicates if this is a set or clear operation
+ *
+ *  This function adds or removes the corresponding VLAN ID from the VLAN
+ *  filter table for the corresponding function.  In addition to the
+ *  standard set/clear that supports one bit a multi-bit write is
+ *  supported to set 64 bits at a time.
+ **/
+static s32 fm10k_update_vlan_pf(struct fm10k_hw *hw, u32 vid, u8 vsi, bool set)
+{
+	u32 vlan_table, reg, mask, bit, len;
+
+	/* verify the VSI index is valid */
+	if (vsi > FM10K_VLAN_TABLE_VSI_MAX)
+		return FM10K_ERR_PARAM;
+
+	/* VLAN multi-bit write:
+	 * The multi-bit write has several parts to it.
+	 *               24              16               8               0
+	 *  7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+	 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	 * | RSVD0 |         Length        |C|RSVD0|        VLAN ID        |
+	 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+	 *
+	 * VLAN ID: Vlan Starting value
+	 * RSVD0: Reserved section, must be 0
+	 * C: Flag field, 0 is set, 1 is clear (Used in VF VLAN message)
+	 * Length: Number of times to repeat the bit being set
+	 */
+	len = vid >> 16;
+	vid = (vid << 17) >> 17;
+
+	/* verify the reserved 0 fields are 0 */
+	if (len >= FM10K_VLAN_TABLE_VID_MAX || vid >= FM10K_VLAN_TABLE_VID_MAX)
+		return FM10K_ERR_PARAM;
+
+	/* Loop through the table updating all required VLANs */
+	for (reg = FM10K_VLAN_TABLE(vsi, vid / 32), bit = vid % 32;
+	     len < FM10K_VLAN_TABLE_VID_MAX;
+	     len -= 32 - bit, reg++, bit = 0) {
+		/* record the initial state of the register */
+		vlan_table = fm10k_read_reg(hw, reg);
+
+		/* truncate mask if we are at the start or end of the run */
+		mask = (~(u32)0 >> ((len < 31) ? 31 - len : 0)) << bit;
+
+		/* make necessary modifications to the register */
+		mask &= set ? ~vlan_table : vlan_table;
+		if (mask)
+			fm10k_write_reg(hw, reg, vlan_table ^ mask);
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_read_mac_addr_pf - Read device MAC address
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the device MAC address from the SM_AREA and stores the value.
+ **/
+static s32 fm10k_read_mac_addr_pf(struct fm10k_hw *hw)
+{
+	u8 perm_addr[ETH_ALEN];
+	u32 serial_num;
+
+	serial_num = fm10k_read_reg(hw, FM10K_SM_AREA(1));
+
+	/* last byte should be all 1's */
+	if ((~serial_num) << 24)
+		return  FM10K_ERR_INVALID_MAC_ADDR;
+
+	perm_addr[0] = (u8)(serial_num >> 24);
+	perm_addr[1] = (u8)(serial_num >> 16);
+	perm_addr[2] = (u8)(serial_num >> 8);
+
+	serial_num = fm10k_read_reg(hw, FM10K_SM_AREA(0));
+
+	/* first byte should be all 1's */
+	if ((~serial_num) >> 24)
+		return  FM10K_ERR_INVALID_MAC_ADDR;
+
+	perm_addr[3] = (u8)(serial_num >> 16);
+	perm_addr[4] = (u8)(serial_num >> 8);
+	perm_addr[5] = (u8)(serial_num);
+
+	ether_addr_copy(hw->mac.perm_addr, perm_addr);
+	ether_addr_copy(hw->mac.addr, perm_addr);
+
+	return 0;
+}
+
+/**
+ *  fm10k_glort_valid_pf - Validate that the provided glort is valid
+ *  @hw: pointer to the HW structure
+ *  @glort: base glort to be validated
+ *
+ *  This function will return an error if the provided glort is invalid
+ **/
+bool fm10k_glort_valid_pf(struct fm10k_hw *hw, u16 glort)
+{
+	glort &= hw->mac.dglort_map >> FM10K_DGLORTMAP_MASK_SHIFT;
+
+	return glort == (hw->mac.dglort_map & FM10K_DGLORTMAP_NONE);
+}
+
+/**
+ *  fm10k_update_xc_addr_pf - Update device addresses
+ *  @hw: pointer to the HW structure
+ *  @glort: base resource tag for this request
+ *  @mac: MAC address to add/remove from table
+ *  @vid: VLAN ID to add/remove from table
+ *  @add: Indicates if this is an add or remove operation
+ *  @flags: flags field to indicate add and secure
+ *
+ *  This function generates a message to the Switch API requesting
+ *  that the given logical port add/remove the given L2 MAC/VLAN address.
+ **/
+static s32 fm10k_update_xc_addr_pf(struct fm10k_hw *hw, u16 glort,
+				   const u8 *mac, u16 vid, bool add, u8 flags)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	struct fm10k_mac_update mac_update;
+	u32 msg[5];
+
+	/* clear set bit from VLAN ID */
+	vid &= ~FM10K_VLAN_CLEAR;
+
+	/* if glort or VLAN are not valid return error */
+	if (!fm10k_glort_valid_pf(hw, glort) || vid >= FM10K_VLAN_TABLE_VID_MAX)
+		return FM10K_ERR_PARAM;
+
+	/* record fields */
+	mac_update.mac_lower = cpu_to_le32(((u32)mac[2] << 24) |
+						 ((u32)mac[3] << 16) |
+						 ((u32)mac[4] << 8) |
+						 ((u32)mac[5]));
+	mac_update.mac_upper = cpu_to_le16(((u16)mac[0] << 8) |
+					   ((u16)mac[1]));
+	mac_update.vlan = cpu_to_le16(vid);
+	mac_update.glort = cpu_to_le16(glort);
+	mac_update.action = add ? 0 : 1;
+	mac_update.flags = flags;
+
+	/* populate mac_update fields */
+	fm10k_tlv_msg_init(msg, FM10K_PF_MSG_ID_UPDATE_MAC_FWD_RULE);
+	fm10k_tlv_attr_put_le_struct(msg, FM10K_PF_ATTR_ID_MAC_UPDATE,
+				     &mac_update, sizeof(mac_update));
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_update_uc_addr_pf - Update device unicast addresses
+ *  @hw: pointer to the HW structure
+ *  @glort: base resource tag for this request
+ *  @mac: MAC address to add/remove from table
+ *  @vid: VLAN ID to add/remove from table
+ *  @add: Indicates if this is an add or remove operation
+ *  @flags: flags field to indicate add and secure
+ *
+ *  This function is used to add or remove unicast addresses for
+ *  the PF.
+ **/
+static s32 fm10k_update_uc_addr_pf(struct fm10k_hw *hw, u16 glort,
+				   const u8 *mac, u16 vid, bool add, u8 flags)
+{
+	/* verify MAC address is valid */
+	if (!is_valid_ether_addr(mac))
+		return FM10K_ERR_PARAM;
+
+	return fm10k_update_xc_addr_pf(hw, glort, mac, vid, add, flags);
+}
+
+/**
+ *  fm10k_update_mc_addr_pf - Update device multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @glort: base resource tag for this request
+ *  @mac: MAC address to add/remove from table
+ *  @vid: VLAN ID to add/remove from table
+ *  @add: Indicates if this is an add or remove operation
+ *
+ *  This function is used to add or remove multicast MAC addresses for
+ *  the PF.
+ **/
+static s32 fm10k_update_mc_addr_pf(struct fm10k_hw *hw, u16 glort,
+				   const u8 *mac, u16 vid, bool add)
+{
+	/* verify multicast address is valid */
+	if (!is_multicast_ether_addr(mac))
+		return FM10K_ERR_PARAM;
+
+	return fm10k_update_xc_addr_pf(hw, glort, mac, vid, add, 0);
+}
+
+/**
+ *  fm10k_update_xcast_mode_pf - Request update of multicast mode
+ *  @hw: pointer to hardware structure
+ *  @glort: base resource tag for this request
+ *  @mode: integer value indicating mode being requested
+ *
+ *  This function will attempt to request a higher mode for the port
+ *  so that it can enable either multicast, multicast promiscuous, or
+ *  promiscuous mode of operation.
+ **/
+static s32 fm10k_update_xcast_mode_pf(struct fm10k_hw *hw, u16 glort, u8 mode)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[3], xcast_mode;
+
+	if (mode > FM10K_XCAST_MODE_NONE)
+		return FM10K_ERR_PARAM;
+
+	/* if glort is not valid return error */
+	if (!fm10k_glort_valid_pf(hw, glort))
+		return FM10K_ERR_PARAM;
+
+	/* write xcast mode as a single u32 value,
+	 * lower 16 bits: glort
+	 * upper 16 bits: mode
+	 */
+	xcast_mode = ((u32)mode << 16) | glort;
+
+	/* generate message requesting to change xcast mode */
+	fm10k_tlv_msg_init(msg, FM10K_PF_MSG_ID_XCAST_MODES);
+	fm10k_tlv_attr_put_u32(msg, FM10K_PF_ATTR_ID_XCAST_MODE, xcast_mode);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_update_int_moderator_pf - Update interrupt moderator linked list
+ *  @hw: pointer to hardware structure
+ *
+ *  This function walks through the MSI-X vector table to determine the
+ *  number of active interrupts and based on that information updates the
+ *  interrupt moderator linked list.
+ **/
+static void fm10k_update_int_moderator_pf(struct fm10k_hw *hw)
+{
+	u32 i;
+
+	/* Disable interrupt moderator */
+	fm10k_write_reg(hw, FM10K_INT_CTRL, 0);
+
+	/* loop through PF from last to first looking enabled vectors */
+	for (i = FM10K_ITR_REG_COUNT_PF - 1; i; i--) {
+		if (!fm10k_read_reg(hw, FM10K_MSIX_VECTOR_MASK(i)))
+			break;
+	}
+
+	/* always reset VFITR2[0] to point to last enabled PF vector */
+	fm10k_write_reg(hw, FM10K_ITR2(FM10K_ITR_REG_COUNT_PF), i);
+
+	/* reset ITR2[0] to point to last enabled PF vector */
+	if (!hw->iov.num_vfs)
+		fm10k_write_reg(hw, FM10K_ITR2(0), i);
+
+	/* Enable interrupt moderator */
+	fm10k_write_reg(hw, FM10K_INT_CTRL, FM10K_INT_CTRL_ENABLEMODERATOR);
+}
+
+/**
+ *  fm10k_update_lport_state_pf - Notify the switch of a change in port state
+ *  @hw: pointer to the HW structure
+ *  @glort: base resource tag for this request
+ *  @count: number of logical ports being updated
+ *  @enable: boolean value indicating enable or disable
+ *
+ *  This function is used to add/remove a logical port from the switch.
+ **/
+static s32 fm10k_update_lport_state_pf(struct fm10k_hw *hw, u16 glort,
+				       u16 count, bool enable)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[3], lport_msg;
+
+	/* do nothing if we are being asked to create or destroy 0 ports */
+	if (!count)
+		return 0;
+
+	/* if glort is not valid return error */
+	if (!fm10k_glort_valid_pf(hw, glort))
+		return FM10K_ERR_PARAM;
+
+	/* reset multicast mode if deleting lport */
+	if (!enable)
+		fm10k_update_xcast_mode_pf(hw, glort, FM10K_XCAST_MODE_NONE);
+
+	/* construct the lport message from the 2 pieces of data we have */
+	lport_msg = ((u32)count << 16) | glort;
+
+	/* generate lport create/delete message */
+	fm10k_tlv_msg_init(msg, enable ? FM10K_PF_MSG_ID_LPORT_CREATE :
+					 FM10K_PF_MSG_ID_LPORT_DELETE);
+	fm10k_tlv_attr_put_u32(msg, FM10K_PF_ATTR_ID_PORT, lport_msg);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_configure_dglort_map_pf - Configures GLORT entry and queues
+ *  @hw: pointer to hardware structure
+ *  @dglort: pointer to dglort configuration structure
+ *
+ *  Reads the configuration structure contained in dglort_cfg and uses
+ *  that information to then populate a DGLORTMAP/DEC entry and the queues
+ *  to which it has been assigned.
+ **/
+static s32 fm10k_configure_dglort_map_pf(struct fm10k_hw *hw,
+					 struct fm10k_dglort_cfg *dglort)
+{
+	u16 glort, queue_count, vsi_count, pc_count;
+	u16 vsi, queue, pc, q_idx;
+	u32 txqctl, dglortdec, dglortmap;
+
+	/* verify the dglort pointer */
+	if (!dglort)
+		return FM10K_ERR_PARAM;
+
+	/* verify the dglort values */
+	if ((dglort->idx > 7) || (dglort->rss_l > 7) || (dglort->pc_l > 3) ||
+	    (dglort->vsi_l > 6) || (dglort->vsi_b > 64) ||
+	    (dglort->queue_l > 8) || (dglort->queue_b >= 256))
+		return FM10K_ERR_PARAM;
+
+	/* determine count of VSIs and queues */
+	queue_count = BIT(dglort->rss_l + dglort->pc_l);
+	vsi_count = BIT(dglort->vsi_l + dglort->queue_l);
+	glort = dglort->glort;
+	q_idx = dglort->queue_b;
+
+	/* configure SGLORT for queues */
+	for (vsi = 0; vsi < vsi_count; vsi++, glort++) {
+		for (queue = 0; queue < queue_count; queue++, q_idx++) {
+			if (q_idx >= FM10K_MAX_QUEUES)
+				break;
+
+			fm10k_write_reg(hw, FM10K_TX_SGLORT(q_idx), glort);
+			fm10k_write_reg(hw, FM10K_RX_SGLORT(q_idx), glort);
+		}
+	}
+
+	/* determine count of PCs and queues */
+	queue_count = BIT(dglort->queue_l + dglort->rss_l + dglort->vsi_l);
+	pc_count = BIT(dglort->pc_l);
+
+	/* configure PC for Tx queues */
+	for (pc = 0; pc < pc_count; pc++) {
+		q_idx = pc + dglort->queue_b;
+		for (queue = 0; queue < queue_count; queue++) {
+			if (q_idx >= FM10K_MAX_QUEUES)
+				break;
+
+			txqctl = fm10k_read_reg(hw, FM10K_TXQCTL(q_idx));
+			txqctl &= ~FM10K_TXQCTL_PC_MASK;
+			txqctl |= pc << FM10K_TXQCTL_PC_SHIFT;
+			fm10k_write_reg(hw, FM10K_TXQCTL(q_idx), txqctl);
+
+			q_idx += pc_count;
+		}
+	}
+
+	/* configure DGLORTDEC */
+	dglortdec = ((u32)(dglort->rss_l) << FM10K_DGLORTDEC_RSSLENGTH_SHIFT) |
+		    ((u32)(dglort->queue_b) << FM10K_DGLORTDEC_QBASE_SHIFT) |
+		    ((u32)(dglort->pc_l) << FM10K_DGLORTDEC_PCLENGTH_SHIFT) |
+		    ((u32)(dglort->vsi_b) << FM10K_DGLORTDEC_VSIBASE_SHIFT) |
+		    ((u32)(dglort->vsi_l) << FM10K_DGLORTDEC_VSILENGTH_SHIFT) |
+		    ((u32)(dglort->queue_l));
+	if (dglort->inner_rss)
+		dglortdec |=  FM10K_DGLORTDEC_INNERRSS_ENABLE;
+
+	/* configure DGLORTMAP */
+	dglortmap = (dglort->idx == fm10k_dglort_default) ?
+			FM10K_DGLORTMAP_ANY : FM10K_DGLORTMAP_ZERO;
+	dglortmap <<= dglort->vsi_l + dglort->queue_l + dglort->shared_l;
+	dglortmap |= dglort->glort;
+
+	/* write values to hardware */
+	fm10k_write_reg(hw, FM10K_DGLORTDEC(dglort->idx), dglortdec);
+	fm10k_write_reg(hw, FM10K_DGLORTMAP(dglort->idx), dglortmap);
+
+	return 0;
+}
+
+u16 fm10k_queues_per_pool(struct fm10k_hw *hw)
+{
+	u16 num_pools = hw->iov.num_pools;
+
+	return (num_pools > 32) ? 2 : (num_pools > 16) ? 4 : (num_pools > 8) ?
+	       8 : FM10K_MAX_QUEUES_POOL;
+}
+
+u16 fm10k_vf_queue_index(struct fm10k_hw *hw, u16 vf_idx)
+{
+	u16 num_vfs = hw->iov.num_vfs;
+	u16 vf_q_idx = FM10K_MAX_QUEUES;
+
+	vf_q_idx -= fm10k_queues_per_pool(hw) * (num_vfs - vf_idx);
+
+	return vf_q_idx;
+}
+
+static u16 fm10k_vectors_per_pool(struct fm10k_hw *hw)
+{
+	u16 num_pools = hw->iov.num_pools;
+
+	return (num_pools > 32) ? 8 : (num_pools > 16) ? 16 :
+	       FM10K_MAX_VECTORS_POOL;
+}
+
+static u16 fm10k_vf_vector_index(struct fm10k_hw *hw, u16 vf_idx)
+{
+	u16 vf_v_idx = FM10K_MAX_VECTORS_PF;
+
+	vf_v_idx += fm10k_vectors_per_pool(hw) * vf_idx;
+
+	return vf_v_idx;
+}
+
+/**
+ *  fm10k_iov_assign_resources_pf - Assign pool resources for virtualization
+ *  @hw: pointer to the HW structure
+ *  @num_vfs: number of VFs to be allocated
+ *  @num_pools: number of virtualization pools to be allocated
+ *
+ *  Allocates queues and traffic classes to virtualization entities to prepare
+ *  the PF for SR-IOV and VMDq
+ **/
+static s32 fm10k_iov_assign_resources_pf(struct fm10k_hw *hw, u16 num_vfs,
+					 u16 num_pools)
+{
+	u16 qmap_stride, qpp, vpp, vf_q_idx, vf_q_idx0, qmap_idx;
+	u32 vid = hw->mac.default_vid << FM10K_TXQCTL_VID_SHIFT;
+	int i, j;
+
+	/* hardware only supports up to 64 pools */
+	if (num_pools > 64)
+		return FM10K_ERR_PARAM;
+
+	/* the number of VFs cannot exceed the number of pools */
+	if ((num_vfs > num_pools) || (num_vfs > hw->iov.total_vfs))
+		return FM10K_ERR_PARAM;
+
+	/* record number of virtualization entities */
+	hw->iov.num_vfs = num_vfs;
+	hw->iov.num_pools = num_pools;
+
+	/* determine qmap offsets and counts */
+	qmap_stride = (num_vfs > 8) ? 32 : 256;
+	qpp = fm10k_queues_per_pool(hw);
+	vpp = fm10k_vectors_per_pool(hw);
+
+	/* calculate starting index for queues */
+	vf_q_idx = fm10k_vf_queue_index(hw, 0);
+	qmap_idx = 0;
+
+	/* establish TCs with -1 credits and no quanta to prevent transmit */
+	for (i = 0; i < num_vfs; i++) {
+		fm10k_write_reg(hw, FM10K_TC_MAXCREDIT(i), 0);
+		fm10k_write_reg(hw, FM10K_TC_RATE(i), 0);
+		fm10k_write_reg(hw, FM10K_TC_CREDIT(i),
+				FM10K_TC_CREDIT_CREDIT_MASK);
+	}
+
+	/* zero out all mbmem registers */
+	for (i = FM10K_VFMBMEM_LEN * num_vfs; i--;)
+		fm10k_write_reg(hw, FM10K_MBMEM(i), 0);
+
+	/* clear event notification of VF FLR */
+	fm10k_write_reg(hw, FM10K_PFVFLREC(0), ~0);
+	fm10k_write_reg(hw, FM10K_PFVFLREC(1), ~0);
+
+	/* loop through unallocated rings assigning them back to PF */
+	for (i = FM10K_MAX_QUEUES_PF; i < vf_q_idx; i++) {
+		fm10k_write_reg(hw, FM10K_TXDCTL(i), 0);
+		fm10k_write_reg(hw, FM10K_TXQCTL(i), FM10K_TXQCTL_PF |
+				FM10K_TXQCTL_UNLIMITED_BW | vid);
+		fm10k_write_reg(hw, FM10K_RXQCTL(i), FM10K_RXQCTL_PF);
+	}
+
+	/* PF should have already updated VFITR2[0] */
+
+	/* update all ITR registers to flow to VFITR2[0] */
+	for (i = FM10K_ITR_REG_COUNT_PF + 1; i < FM10K_ITR_REG_COUNT; i++) {
+		if (!(i & (vpp - 1)))
+			fm10k_write_reg(hw, FM10K_ITR2(i), i - vpp);
+		else
+			fm10k_write_reg(hw, FM10K_ITR2(i), i - 1);
+	}
+
+	/* update PF ITR2[0] to reference the last vector */
+	fm10k_write_reg(hw, FM10K_ITR2(0),
+			fm10k_vf_vector_index(hw, num_vfs - 1));
+
+	/* loop through rings populating rings and TCs */
+	for (i = 0; i < num_vfs; i++) {
+		/* record index for VF queue 0 for use in end of loop */
+		vf_q_idx0 = vf_q_idx;
+
+		for (j = 0; j < qpp; j++, qmap_idx++, vf_q_idx++) {
+			/* assign VF and locked TC to queues */
+			fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0);
+			fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx),
+					(i << FM10K_TXQCTL_TC_SHIFT) | i |
+					FM10K_TXQCTL_VF | vid);
+			fm10k_write_reg(hw, FM10K_RXDCTL(vf_q_idx),
+					FM10K_RXDCTL_WRITE_BACK_MIN_DELAY |
+					FM10K_RXDCTL_DROP_ON_EMPTY);
+			fm10k_write_reg(hw, FM10K_RXQCTL(vf_q_idx),
+					(i << FM10K_RXQCTL_VF_SHIFT) |
+					FM10K_RXQCTL_VF);
+
+			/* map queue pair to VF */
+			fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), vf_q_idx);
+			fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx), vf_q_idx);
+		}
+
+		/* repeat the first ring for all of the remaining VF rings */
+		for (; j < qmap_stride; j++, qmap_idx++) {
+			fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), vf_q_idx0);
+			fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx), vf_q_idx0);
+		}
+	}
+
+	/* loop through remaining indexes assigning all to queue 0 */
+	while (qmap_idx < FM10K_TQMAP_TABLE_SIZE) {
+		fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0);
+		fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx), 0);
+		qmap_idx++;
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_iov_configure_tc_pf - Configure the shaping group for VF
+ *  @hw: pointer to the HW structure
+ *  @vf_idx: index of VF receiving GLORT
+ *  @rate: Rate indicated in Mb/s
+ *
+ *  Configured the TC for a given VF to allow only up to a given number
+ *  of Mb/s of outgoing Tx throughput.
+ **/
+static s32 fm10k_iov_configure_tc_pf(struct fm10k_hw *hw, u16 vf_idx, int rate)
+{
+	/* configure defaults */
+	u32 interval = FM10K_TC_RATE_INTERVAL_4US_GEN3;
+	u32 tc_rate = FM10K_TC_RATE_QUANTA_MASK;
+
+	/* verify vf is in range */
+	if (vf_idx >= hw->iov.num_vfs)
+		return FM10K_ERR_PARAM;
+
+	/* set interval to align with 4.096 usec in all modes */
+	switch (hw->bus.speed) {
+	case fm10k_bus_speed_2500:
+		interval = FM10K_TC_RATE_INTERVAL_4US_GEN1;
+		break;
+	case fm10k_bus_speed_5000:
+		interval = FM10K_TC_RATE_INTERVAL_4US_GEN2;
+		break;
+	default:
+		break;
+	}
+
+	if (rate) {
+		if (rate > FM10K_VF_TC_MAX || rate < FM10K_VF_TC_MIN)
+			return FM10K_ERR_PARAM;
+
+		/* The quanta is measured in Bytes per 4.096 or 8.192 usec
+		 * The rate is provided in Mbits per second
+		 * To tralslate from rate to quanta we need to multiply the
+		 * rate by 8.192 usec and divide by 8 bits/byte.  To avoid
+		 * dealing with floating point we can round the values up
+		 * to the nearest whole number ratio which gives us 128 / 125.
+		 */
+		tc_rate = (rate * 128) / 125;
+
+		/* try to keep the rate limiting accurate by increasing
+		 * the number of credits and interval for rates less than 4Gb/s
+		 */
+		if (rate < 4000)
+			interval <<= 1;
+		else
+			tc_rate >>= 1;
+	}
+
+	/* update rate limiter with new values */
+	fm10k_write_reg(hw, FM10K_TC_RATE(vf_idx), tc_rate | interval);
+	fm10k_write_reg(hw, FM10K_TC_MAXCREDIT(vf_idx), FM10K_TC_MAXCREDIT_64K);
+	fm10k_write_reg(hw, FM10K_TC_CREDIT(vf_idx), FM10K_TC_MAXCREDIT_64K);
+
+	return 0;
+}
+
+/**
+ *  fm10k_iov_assign_int_moderator_pf - Add VF interrupts to moderator list
+ *  @hw: pointer to the HW structure
+ *  @vf_idx: index of VF receiving GLORT
+ *
+ *  Update the interrupt moderator linked list to include any MSI-X
+ *  interrupts which the VF has enabled in the MSI-X vector table.
+ **/
+static s32 fm10k_iov_assign_int_moderator_pf(struct fm10k_hw *hw, u16 vf_idx)
+{
+	u16 vf_v_idx, vf_v_limit, i;
+
+	/* verify vf is in range */
+	if (vf_idx >= hw->iov.num_vfs)
+		return FM10K_ERR_PARAM;
+
+	/* determine vector offset and count */
+	vf_v_idx = fm10k_vf_vector_index(hw, vf_idx);
+	vf_v_limit = vf_v_idx + fm10k_vectors_per_pool(hw);
+
+	/* search for first vector that is not masked */
+	for (i = vf_v_limit - 1; i > vf_v_idx; i--) {
+		if (!fm10k_read_reg(hw, FM10K_MSIX_VECTOR_MASK(i)))
+			break;
+	}
+
+	/* reset linked list so it now includes our active vectors */
+	if (vf_idx == (hw->iov.num_vfs - 1))
+		fm10k_write_reg(hw, FM10K_ITR2(0), i);
+	else
+		fm10k_write_reg(hw, FM10K_ITR2(vf_v_limit), i);
+
+	return 0;
+}
+
+/**
+ *  fm10k_iov_assign_default_mac_vlan_pf - Assign a MAC and VLAN to VF
+ *  @hw: pointer to the HW structure
+ *  @vf_info: pointer to VF information structure
+ *
+ *  Assign a MAC address and default VLAN to a VF and notify it of the update
+ **/
+static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw,
+						struct fm10k_vf_info *vf_info)
+{
+	u16 qmap_stride, queues_per_pool, vf_q_idx, timeout, qmap_idx, i;
+	u32 msg[4], txdctl, txqctl, tdbal = 0, tdbah = 0;
+	s32 err = 0;
+	u16 vf_idx, vf_vid;
+
+	/* verify vf is in range */
+	if (!vf_info || vf_info->vf_idx >= hw->iov.num_vfs)
+		return FM10K_ERR_PARAM;
+
+	/* determine qmap offsets and counts */
+	qmap_stride = (hw->iov.num_vfs > 8) ? 32 : 256;
+	queues_per_pool = fm10k_queues_per_pool(hw);
+
+	/* calculate starting index for queues */
+	vf_idx = vf_info->vf_idx;
+	vf_q_idx = fm10k_vf_queue_index(hw, vf_idx);
+	qmap_idx = qmap_stride * vf_idx;
+
+	/* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is
+	 * used here to indicate to the VF that it will not have privilege to
+	 * write VLAN_TABLE. All policy is enforced on the PF but this allows
+	 * the VF to correctly report errors to userspace requests.
+	 */
+	if (vf_info->pf_vid)
+		vf_vid = vf_info->pf_vid | FM10K_VLAN_OVERRIDE;
+	else
+		vf_vid = vf_info->sw_vid;
+
+	/* generate MAC_ADDR request */
+	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_MAC_VLAN);
+	fm10k_tlv_attr_put_mac_vlan(msg, FM10K_MAC_VLAN_MSG_DEFAULT_MAC,
+				    vf_info->mac, vf_vid);
+
+	/* Configure Queue control register with new VLAN ID. The TXQCTL
+	 * register is RO from the VF, so the PF must do this even in the
+	 * case of notifying the VF of a new VID via the mailbox.
+	 */
+	txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) &
+		 FM10K_TXQCTL_VID_MASK;
+	txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
+		  FM10K_TXQCTL_VF | vf_idx;
+
+	for (i = 0; i < queues_per_pool; i++)
+		fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl);
+
+	/* try loading a message onto outgoing mailbox first */
+	if (vf_info->mbx.ops.enqueue_tx) {
+		err = vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg);
+		if (err != FM10K_MBX_ERR_NO_MBX)
+			return err;
+		err = 0;
+	}
+
+	/* If we aren't connected to a mailbox, this is most likely because
+	 * the VF driver is not running. It should thus be safe to re-map
+	 * queues and use the registers to pass the MAC address so that the VF
+	 * driver gets correct information during its initialization.
+	 */
+
+	/* MAP Tx queue back to 0 temporarily, and disable it */
+	fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0);
+	fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0);
+
+	/* verify ring has disabled before modifying base address registers */
+	txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(vf_q_idx));
+	for (timeout = 0; txdctl & FM10K_TXDCTL_ENABLE; timeout++) {
+		/* limit ourselves to a 1ms timeout */
+		if (timeout == 10) {
+			err = FM10K_ERR_DMA_PENDING;
+			goto err_out;
+		}
+
+		usleep_range(100, 200);
+		txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(vf_q_idx));
+	}
+
+	/* Update base address registers to contain MAC address */
+	if (is_valid_ether_addr(vf_info->mac)) {
+		tdbal = (((u32)vf_info->mac[3]) << 24) |
+			(((u32)vf_info->mac[4]) << 16) |
+			(((u32)vf_info->mac[5]) << 8);
+
+		tdbah = (((u32)0xFF)	        << 24) |
+			(((u32)vf_info->mac[0]) << 16) |
+			(((u32)vf_info->mac[1]) << 8) |
+			((u32)vf_info->mac[2]);
+	}
+
+	/* Record the base address into queue 0 */
+	fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx), tdbal);
+	fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx), tdbah);
+
+	/* Provide the VF the ITR scale, using software-defined fields in TDLEN
+	 * to pass the information during VF initialization. See definition of
+	 * FM10K_TDLEN_ITR_SCALE_SHIFT for more details.
+	 */
+	fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx), hw->mac.itr_scale <<
+						   FM10K_TDLEN_ITR_SCALE_SHIFT);
+
+err_out:
+	/* restore the queue back to VF ownership */
+	fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), vf_q_idx);
+	return err;
+}
+
+/**
+ *  fm10k_iov_reset_resources_pf - Reassign queues and interrupts to a VF
+ *  @hw: pointer to the HW structure
+ *  @vf_info: pointer to VF information structure
+ *
+ *  Reassign the interrupts and queues to a VF following an FLR
+ **/
+static s32 fm10k_iov_reset_resources_pf(struct fm10k_hw *hw,
+					struct fm10k_vf_info *vf_info)
+{
+	u16 qmap_stride, queues_per_pool, vf_q_idx, qmap_idx;
+	u32 tdbal = 0, tdbah = 0, txqctl, rxqctl;
+	u16 vf_v_idx, vf_v_limit, vf_vid;
+	u8 vf_idx = vf_info->vf_idx;
+	int i;
+
+	/* verify vf is in range */
+	if (vf_idx >= hw->iov.num_vfs)
+		return FM10K_ERR_PARAM;
+
+	/* clear event notification of VF FLR */
+	fm10k_write_reg(hw, FM10K_PFVFLREC(vf_idx / 32), BIT(vf_idx % 32));
+
+	/* force timeout and then disconnect the mailbox */
+	vf_info->mbx.timeout = 0;
+	if (vf_info->mbx.ops.disconnect)
+		vf_info->mbx.ops.disconnect(hw, &vf_info->mbx);
+
+	/* determine vector offset and count */
+	vf_v_idx = fm10k_vf_vector_index(hw, vf_idx);
+	vf_v_limit = vf_v_idx + fm10k_vectors_per_pool(hw);
+
+	/* determine qmap offsets and counts */
+	qmap_stride = (hw->iov.num_vfs > 8) ? 32 : 256;
+	queues_per_pool = fm10k_queues_per_pool(hw);
+	qmap_idx = qmap_stride * vf_idx;
+
+	/* make all the queues inaccessible to the VF */
+	for (i = qmap_idx; i < (qmap_idx + qmap_stride); i++) {
+		fm10k_write_reg(hw, FM10K_TQMAP(i), 0);
+		fm10k_write_reg(hw, FM10K_RQMAP(i), 0);
+	}
+
+	/* calculate starting index for queues */
+	vf_q_idx = fm10k_vf_queue_index(hw, vf_idx);
+
+	/* determine correct default VLAN ID */
+	if (vf_info->pf_vid)
+		vf_vid = vf_info->pf_vid;
+	else
+		vf_vid = vf_info->sw_vid;
+
+	/* configure Queue control register */
+	txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) |
+		 (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
+		 FM10K_TXQCTL_VF | vf_idx;
+	rxqctl = (vf_idx << FM10K_RXQCTL_VF_SHIFT) | FM10K_RXQCTL_VF;
+
+	/* stop further DMA and reset queue ownership back to VF */
+	for (i = vf_q_idx; i < (queues_per_pool + vf_q_idx); i++) {
+		fm10k_write_reg(hw, FM10K_TXDCTL(i), 0);
+		fm10k_write_reg(hw, FM10K_TXQCTL(i), txqctl);
+		fm10k_write_reg(hw, FM10K_RXDCTL(i),
+				FM10K_RXDCTL_WRITE_BACK_MIN_DELAY |
+				FM10K_RXDCTL_DROP_ON_EMPTY);
+		fm10k_write_reg(hw, FM10K_RXQCTL(i), rxqctl);
+	}
+
+	/* reset TC with -1 credits and no quanta to prevent transmit */
+	fm10k_write_reg(hw, FM10K_TC_MAXCREDIT(vf_idx), 0);
+	fm10k_write_reg(hw, FM10K_TC_RATE(vf_idx), 0);
+	fm10k_write_reg(hw, FM10K_TC_CREDIT(vf_idx),
+			FM10K_TC_CREDIT_CREDIT_MASK);
+
+	/* update our first entry in the table based on previous VF */
+	if (!vf_idx)
+		hw->mac.ops.update_int_moderator(hw);
+	else
+		hw->iov.ops.assign_int_moderator(hw, vf_idx - 1);
+
+	/* reset linked list so it now includes our active vectors */
+	if (vf_idx == (hw->iov.num_vfs - 1))
+		fm10k_write_reg(hw, FM10K_ITR2(0), vf_v_idx);
+	else
+		fm10k_write_reg(hw, FM10K_ITR2(vf_v_limit), vf_v_idx);
+
+	/* link remaining vectors so that next points to previous */
+	for (vf_v_idx++; vf_v_idx < vf_v_limit; vf_v_idx++)
+		fm10k_write_reg(hw, FM10K_ITR2(vf_v_idx), vf_v_idx - 1);
+
+	/* zero out MBMEM, VLAN_TABLE, RETA, RSSRK, and MRQC registers */
+	for (i = FM10K_VFMBMEM_LEN; i--;)
+		fm10k_write_reg(hw, FM10K_MBMEM_VF(vf_idx, i), 0);
+	for (i = FM10K_VLAN_TABLE_SIZE; i--;)
+		fm10k_write_reg(hw, FM10K_VLAN_TABLE(vf_info->vsi, i), 0);
+	for (i = FM10K_RETA_SIZE; i--;)
+		fm10k_write_reg(hw, FM10K_RETA(vf_info->vsi, i), 0);
+	for (i = FM10K_RSSRK_SIZE; i--;)
+		fm10k_write_reg(hw, FM10K_RSSRK(vf_info->vsi, i), 0);
+	fm10k_write_reg(hw, FM10K_MRQC(vf_info->vsi), 0);
+
+	/* Update base address registers to contain MAC address */
+	if (is_valid_ether_addr(vf_info->mac)) {
+		tdbal = (((u32)vf_info->mac[3]) << 24) |
+			(((u32)vf_info->mac[4]) << 16) |
+			(((u32)vf_info->mac[5]) << 8);
+		tdbah = (((u32)0xFF)	   << 24) |
+			(((u32)vf_info->mac[0]) << 16) |
+			(((u32)vf_info->mac[1]) << 8) |
+			((u32)vf_info->mac[2]);
+	}
+
+	/* map queue pairs back to VF from last to first */
+	for (i = queues_per_pool; i--;) {
+		fm10k_write_reg(hw, FM10K_TDBAL(vf_q_idx + i), tdbal);
+		fm10k_write_reg(hw, FM10K_TDBAH(vf_q_idx + i), tdbah);
+		/* See definition of FM10K_TDLEN_ITR_SCALE_SHIFT for an
+		 * explanation of how TDLEN is used.
+		 */
+		fm10k_write_reg(hw, FM10K_TDLEN(vf_q_idx + i),
+				hw->mac.itr_scale <<
+				FM10K_TDLEN_ITR_SCALE_SHIFT);
+		fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx + i), vf_q_idx + i);
+		fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx + i), vf_q_idx + i);
+	}
+
+	/* repeat the first ring for all the remaining VF rings */
+	for (i = queues_per_pool; i < qmap_stride; i++) {
+		fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx + i), vf_q_idx);
+		fm10k_write_reg(hw, FM10K_RQMAP(qmap_idx + i), vf_q_idx);
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_iov_set_lport_pf - Assign and enable a logical port for a given VF
+ *  @hw: pointer to hardware structure
+ *  @vf_info: pointer to VF information structure
+ *  @lport_idx: Logical port offset from the hardware glort
+ *  @flags: Set of capability flags to extend port beyond basic functionality
+ *
+ *  This function allows enabling a VF port by assigning it a GLORT and
+ *  setting the flags so that it can enable an Rx mode.
+ **/
+static s32 fm10k_iov_set_lport_pf(struct fm10k_hw *hw,
+				  struct fm10k_vf_info *vf_info,
+				  u16 lport_idx, u8 flags)
+{
+	u16 glort = (hw->mac.dglort_map + lport_idx) & FM10K_DGLORTMAP_NONE;
+
+	/* if glort is not valid return error */
+	if (!fm10k_glort_valid_pf(hw, glort))
+		return FM10K_ERR_PARAM;
+
+	vf_info->vf_flags = flags | FM10K_VF_FLAG_NONE_CAPABLE;
+	vf_info->glort = glort;
+
+	return 0;
+}
+
+/**
+ *  fm10k_iov_reset_lport_pf - Disable a logical port for a given VF
+ *  @hw: pointer to hardware structure
+ *  @vf_info: pointer to VF information structure
+ *
+ *  This function disables a VF port by stripping it of a GLORT and
+ *  setting the flags so that it cannot enable any Rx mode.
+ **/
+static void fm10k_iov_reset_lport_pf(struct fm10k_hw *hw,
+				     struct fm10k_vf_info *vf_info)
+{
+	u32 msg[1];
+
+	/* need to disable the port if it is already enabled */
+	if (FM10K_VF_FLAG_ENABLED(vf_info)) {
+		/* notify switch that this port has been disabled */
+		fm10k_update_lport_state_pf(hw, vf_info->glort, 1, false);
+
+		/* generate port state response to notify VF it is not ready */
+		fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_LPORT_STATE);
+		vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg);
+	}
+
+	/* clear flags and glort if it exists */
+	vf_info->vf_flags = 0;
+	vf_info->glort = 0;
+}
+
+/**
+ *  fm10k_iov_update_stats_pf - Updates hardware related statistics for VFs
+ *  @hw: pointer to hardware structure
+ *  @q: stats for all queues of a VF
+ *  @vf_idx: index of VF
+ *
+ *  This function collects queue stats for VFs.
+ **/
+static void fm10k_iov_update_stats_pf(struct fm10k_hw *hw,
+				      struct fm10k_hw_stats_q *q,
+				      u16 vf_idx)
+{
+	u32 idx, qpp;
+
+	/* get stats for all of the queues */
+	qpp = fm10k_queues_per_pool(hw);
+	idx = fm10k_vf_queue_index(hw, vf_idx);
+	fm10k_update_hw_stats_q(hw, q, idx, qpp);
+}
+
+/**
+ *  fm10k_iov_msg_msix_pf - Message handler for MSI-X request from VF
+ *  @hw: Pointer to hardware structure
+ *  @results: Pointer array to message, results[0] is pointer to message
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This function is a default handler for MSI-X requests from the VF. The
+ *  assumption is that in this case it is acceptable to just directly
+ *  hand off the message from the VF to the underlying shared code.
+ **/
+s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 __always_unused **results,
+			  struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
+	u8 vf_idx = vf_info->vf_idx;
+
+	return hw->iov.ops.assign_int_moderator(hw, vf_idx);
+}
+
+/**
+ * fm10k_iov_select_vid - Select correct default VLAN ID
+ * @vf_info: pointer to VF information structure
+ * @vid: VLAN ID to correct
+ *
+ * Will report an error if the VLAN ID is out of range. For VID = 0, it will
+ * return either the pf_vid or sw_vid depending on which one is set.
+ */
+s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
+{
+	if (!vid)
+		return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid;
+	else if (vf_info->pf_vid && vid != vf_info->pf_vid)
+		return FM10K_ERR_PARAM;
+	else
+		return vid;
+}
+
+/**
+ *  fm10k_iov_msg_mac_vlan_pf - Message handler for MAC/VLAN request from VF
+ *  @hw: Pointer to hardware structure
+ *  @results: Pointer array to message, results[0] is pointer to message
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This function is a default handler for MAC/VLAN requests from the VF.
+ *  The assumption is that in this case it is acceptable to just directly
+ *  hand off the message from the VF to the underlying shared code.
+ **/
+s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results,
+			      struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
+	u8 mac[ETH_ALEN];
+	u32 *result;
+	int err = 0;
+	bool set;
+	u16 vlan;
+	u32 vid;
+
+	/* we shouldn't be updating rules on a disabled interface */
+	if (!FM10K_VF_FLAG_ENABLED(vf_info))
+		err = FM10K_ERR_PARAM;
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) {
+		result = results[FM10K_MAC_VLAN_MSG_VLAN];
+
+		/* record VLAN id requested */
+		err = fm10k_tlv_attr_get_u32(result, &vid);
+		if (err)
+			return err;
+
+		set = !(vid & FM10K_VLAN_CLEAR);
+		vid &= ~FM10K_VLAN_CLEAR;
+
+		/* if the length field has been set, this is a multi-bit
+		 * update request. For multi-bit requests, simply disallow
+		 * them when the pf_vid has been set. In this case, the PF
+		 * should have already cleared the VLAN_TABLE, and if we
+		 * allowed them, it could allow a rogue VF to receive traffic
+		 * on a VLAN it was not assigned. In the single-bit case, we
+		 * need to modify requests for VLAN 0 to use the default PF or
+		 * SW vid when assigned.
+		 */
+
+		if (vid >> 16) {
+			/* prevent multi-bit requests when PF has
+			 * administratively set the VLAN for this VF
+			 */
+			if (vf_info->pf_vid)
+				return FM10K_ERR_PARAM;
+		} else {
+			err = fm10k_iov_select_vid(vf_info, (u16)vid);
+			if (err < 0)
+				return err;
+
+			vid = err;
+		}
+
+		/* update VSI info for VF in regards to VLAN table */
+		err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
+	}
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) {
+		result = results[FM10K_MAC_VLAN_MSG_MAC];
+
+		/* record unicast MAC address requested */
+		err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+		if (err)
+			return err;
+
+		/* block attempts to set MAC for a locked device */
+		if (is_valid_ether_addr(vf_info->mac) &&
+		    !ether_addr_equal(mac, vf_info->mac))
+			return FM10K_ERR_PARAM;
+
+		set = !(vlan & FM10K_VLAN_CLEAR);
+		vlan &= ~FM10K_VLAN_CLEAR;
+
+		err = fm10k_iov_select_vid(vf_info, vlan);
+		if (err < 0)
+			return err;
+
+		vlan = (u16)err;
+
+		/* notify switch of request for new unicast address */
+		err = hw->mac.ops.update_uc_addr(hw, vf_info->glort,
+						 mac, vlan, set, 0);
+	}
+
+	if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) {
+		result = results[FM10K_MAC_VLAN_MSG_MULTICAST];
+
+		/* record multicast MAC address requested */
+		err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+		if (err)
+			return err;
+
+		/* verify that the VF is allowed to request multicast */
+		if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED))
+			return FM10K_ERR_PARAM;
+
+		set = !(vlan & FM10K_VLAN_CLEAR);
+		vlan &= ~FM10K_VLAN_CLEAR;
+
+		err = fm10k_iov_select_vid(vf_info, vlan);
+		if (err < 0)
+			return err;
+
+		vlan = (u16)err;
+
+		/* notify switch of request for new multicast address */
+		err = hw->mac.ops.update_mc_addr(hw, vf_info->glort,
+						 mac, vlan, set);
+	}
+
+	return err;
+}
+
+/**
+ *  fm10k_iov_supported_xcast_mode_pf - Determine best match for xcast mode
+ *  @vf_info: VF info structure containing capability flags
+ *  @mode: Requested xcast mode
+ *
+ *  This function outputs the mode that most closely matches the requested
+ *  mode.  If not modes match it will request we disable the port
+ **/
+static u8 fm10k_iov_supported_xcast_mode_pf(struct fm10k_vf_info *vf_info,
+					    u8 mode)
+{
+	u8 vf_flags = vf_info->vf_flags;
+
+	/* match up mode to capabilities as best as possible */
+	switch (mode) {
+	case FM10K_XCAST_MODE_PROMISC:
+		if (vf_flags & FM10K_VF_FLAG_PROMISC_CAPABLE)
+			return FM10K_XCAST_MODE_PROMISC;
+		fallthrough;
+	case FM10K_XCAST_MODE_ALLMULTI:
+		if (vf_flags & FM10K_VF_FLAG_ALLMULTI_CAPABLE)
+			return FM10K_XCAST_MODE_ALLMULTI;
+		fallthrough;
+	case FM10K_XCAST_MODE_MULTI:
+		if (vf_flags & FM10K_VF_FLAG_MULTI_CAPABLE)
+			return FM10K_XCAST_MODE_MULTI;
+		fallthrough;
+	case FM10K_XCAST_MODE_NONE:
+		if (vf_flags & FM10K_VF_FLAG_NONE_CAPABLE)
+			return FM10K_XCAST_MODE_NONE;
+		fallthrough;
+	default:
+		break;
+	}
+
+	/* disable interface as it should not be able to request any */
+	return FM10K_XCAST_MODE_DISABLE;
+}
+
+/**
+ *  fm10k_iov_msg_lport_state_pf - Message handler for port state requests
+ *  @hw: Pointer to hardware structure
+ *  @results: Pointer array to message, results[0] is pointer to message
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This function is a default handler for port state requests.  The port
+ *  state requests for now are basic and consist of enabling or disabling
+ *  the port.
+ **/
+s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *hw, u32 **results,
+				 struct fm10k_mbx_info *mbx)
+{
+	struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
+	s32 err = 0;
+	u32 msg[2];
+	u8 mode = 0;
+
+	/* verify VF is allowed to enable even minimal mode */
+	if (!(vf_info->vf_flags & FM10K_VF_FLAG_NONE_CAPABLE))
+		return FM10K_ERR_PARAM;
+
+	if (!!results[FM10K_LPORT_STATE_MSG_XCAST_MODE]) {
+		u32 *result = results[FM10K_LPORT_STATE_MSG_XCAST_MODE];
+
+		/* XCAST mode update requested */
+		err = fm10k_tlv_attr_get_u8(result, &mode);
+		if (err)
+			return FM10K_ERR_PARAM;
+
+		/* prep for possible demotion depending on capabilities */
+		mode = fm10k_iov_supported_xcast_mode_pf(vf_info, mode);
+
+		/* if mode is not currently enabled, enable it */
+		if (!(FM10K_VF_FLAG_ENABLED(vf_info) & BIT(mode)))
+			fm10k_update_xcast_mode_pf(hw, vf_info->glort, mode);
+
+		/* swap mode back to a bit flag */
+		mode = FM10K_VF_FLAG_SET_MODE(mode);
+	} else if (!results[FM10K_LPORT_STATE_MSG_DISABLE]) {
+		/* need to disable the port if it is already enabled */
+		if (FM10K_VF_FLAG_ENABLED(vf_info))
+			err = fm10k_update_lport_state_pf(hw, vf_info->glort,
+							  1, false);
+
+		/* we need to clear VF_FLAG_ENABLED flags in order to ensure
+		 * that we actually re-enable the LPORT state below. Note that
+		 * this has no impact if the VF is already disabled, as the
+		 * flags are already cleared.
+		 */
+		if (!err)
+			vf_info->vf_flags = FM10K_VF_FLAG_CAPABLE(vf_info);
+
+		/* when enabling the port we should reset the rate limiters */
+		hw->iov.ops.configure_tc(hw, vf_info->vf_idx, vf_info->rate);
+
+		/* set mode for minimal functionality */
+		mode = FM10K_VF_FLAG_SET_MODE_NONE;
+
+		/* generate port state response to notify VF it is ready */
+		fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_LPORT_STATE);
+		fm10k_tlv_attr_put_bool(msg, FM10K_LPORT_STATE_MSG_READY);
+		mbx->ops.enqueue_tx(hw, mbx, msg);
+	}
+
+	/* if enable state toggled note the update */
+	if (!err && (!FM10K_VF_FLAG_ENABLED(vf_info) != !mode))
+		err = fm10k_update_lport_state_pf(hw, vf_info->glort, 1,
+						  !!mode);
+
+	/* if state change succeeded, then update our stored state */
+	mode |= FM10K_VF_FLAG_CAPABLE(vf_info);
+	if (!err)
+		vf_info->vf_flags = mode;
+
+	return err;
+}
+
+/**
+ *  fm10k_update_hw_stats_pf - Updates hardware related statistics of PF
+ *  @hw: pointer to hardware structure
+ *  @stats: pointer to the stats structure to update
+ *
+ *  This function collects and aggregates global and per queue hardware
+ *  statistics.
+ **/
+static void fm10k_update_hw_stats_pf(struct fm10k_hw *hw,
+				     struct fm10k_hw_stats *stats)
+{
+	u32 timeout, ur, ca, um, xec, vlan_drop, loopback_drop, nodesc_drop;
+	u32 id, id_prev;
+
+	/* Use Tx queue 0 as a canary to detect a reset */
+	id = fm10k_read_reg(hw, FM10K_TXQCTL(0));
+
+	/* Read Global Statistics */
+	do {
+		timeout = fm10k_read_hw_stats_32b(hw, FM10K_STATS_TIMEOUT,
+						  &stats->timeout);
+		ur = fm10k_read_hw_stats_32b(hw, FM10K_STATS_UR, &stats->ur);
+		ca = fm10k_read_hw_stats_32b(hw, FM10K_STATS_CA, &stats->ca);
+		um = fm10k_read_hw_stats_32b(hw, FM10K_STATS_UM, &stats->um);
+		xec = fm10k_read_hw_stats_32b(hw, FM10K_STATS_XEC, &stats->xec);
+		vlan_drop = fm10k_read_hw_stats_32b(hw, FM10K_STATS_VLAN_DROP,
+						    &stats->vlan_drop);
+		loopback_drop =
+			fm10k_read_hw_stats_32b(hw,
+						FM10K_STATS_LOOPBACK_DROP,
+						&stats->loopback_drop);
+		nodesc_drop = fm10k_read_hw_stats_32b(hw,
+						      FM10K_STATS_NODESC_DROP,
+						      &stats->nodesc_drop);
+
+		/* if value has not changed then we have consistent data */
+		id_prev = id;
+		id = fm10k_read_reg(hw, FM10K_TXQCTL(0));
+	} while ((id ^ id_prev) & FM10K_TXQCTL_ID_MASK);
+
+	/* drop non-ID bits and set VALID ID bit */
+	id &= FM10K_TXQCTL_ID_MASK;
+	id |= FM10K_STAT_VALID;
+
+	/* Update Global Statistics */
+	if (stats->stats_idx == id) {
+		stats->timeout.count += timeout;
+		stats->ur.count += ur;
+		stats->ca.count += ca;
+		stats->um.count += um;
+		stats->xec.count += xec;
+		stats->vlan_drop.count += vlan_drop;
+		stats->loopback_drop.count += loopback_drop;
+		stats->nodesc_drop.count += nodesc_drop;
+	}
+
+	/* Update bases and record current PF id */
+	fm10k_update_hw_base_32b(&stats->timeout, timeout);
+	fm10k_update_hw_base_32b(&stats->ur, ur);
+	fm10k_update_hw_base_32b(&stats->ca, ca);
+	fm10k_update_hw_base_32b(&stats->um, um);
+	fm10k_update_hw_base_32b(&stats->xec, xec);
+	fm10k_update_hw_base_32b(&stats->vlan_drop, vlan_drop);
+	fm10k_update_hw_base_32b(&stats->loopback_drop, loopback_drop);
+	fm10k_update_hw_base_32b(&stats->nodesc_drop, nodesc_drop);
+	stats->stats_idx = id;
+
+	/* Update Queue Statistics */
+	fm10k_update_hw_stats_q(hw, stats->q, 0, hw->mac.max_queues);
+}
+
+/**
+ *  fm10k_rebind_hw_stats_pf - Resets base for hardware statistics of PF
+ *  @hw: pointer to hardware structure
+ *  @stats: pointer to the stats structure to update
+ *
+ *  This function resets the base for global and per queue hardware
+ *  statistics.
+ **/
+static void fm10k_rebind_hw_stats_pf(struct fm10k_hw *hw,
+				     struct fm10k_hw_stats *stats)
+{
+	/* Unbind Global Statistics */
+	fm10k_unbind_hw_stats_32b(&stats->timeout);
+	fm10k_unbind_hw_stats_32b(&stats->ur);
+	fm10k_unbind_hw_stats_32b(&stats->ca);
+	fm10k_unbind_hw_stats_32b(&stats->um);
+	fm10k_unbind_hw_stats_32b(&stats->xec);
+	fm10k_unbind_hw_stats_32b(&stats->vlan_drop);
+	fm10k_unbind_hw_stats_32b(&stats->loopback_drop);
+	fm10k_unbind_hw_stats_32b(&stats->nodesc_drop);
+
+	/* Unbind Queue Statistics */
+	fm10k_unbind_hw_stats_q(stats->q, 0, hw->mac.max_queues);
+
+	/* Reinitialize bases for all stats */
+	fm10k_update_hw_stats_pf(hw, stats);
+}
+
+/**
+ *  fm10k_set_dma_mask_pf - Configures PhyAddrSpace to limit DMA to system
+ *  @hw: pointer to hardware structure
+ *  @dma_mask: 64 bit DMA mask required for platform
+ *
+ *  This function sets the PHYADDR.PhyAddrSpace bits for the endpoint in order
+ *  to limit the access to memory beyond what is physically in the system.
+ **/
+static void fm10k_set_dma_mask_pf(struct fm10k_hw *hw, u64 dma_mask)
+{
+	/* we need to write the upper 32 bits of DMA mask to PhyAddrSpace */
+	u32 phyaddr = (u32)(dma_mask >> 32);
+
+	fm10k_write_reg(hw, FM10K_PHYADDR, phyaddr);
+}
+
+/**
+ *  fm10k_get_fault_pf - Record a fault in one of the interface units
+ *  @hw: pointer to hardware structure
+ *  @type: pointer to fault type register offset
+ *  @fault: pointer to memory location to record the fault
+ *
+ *  Record the fault register contents to the fault data structure and
+ *  clear the entry from the register.
+ *
+ *  Returns ERR_PARAM if invalid register is specified or no error is present.
+ **/
+static s32 fm10k_get_fault_pf(struct fm10k_hw *hw, int type,
+			      struct fm10k_fault *fault)
+{
+	u32 func;
+
+	/* verify the fault register is in range and is aligned */
+	switch (type) {
+	case FM10K_PCA_FAULT:
+	case FM10K_THI_FAULT:
+	case FM10K_FUM_FAULT:
+		break;
+	default:
+		return FM10K_ERR_PARAM;
+	}
+
+	/* only service faults that are valid */
+	func = fm10k_read_reg(hw, type + FM10K_FAULT_FUNC);
+	if (!(func & FM10K_FAULT_FUNC_VALID))
+		return FM10K_ERR_PARAM;
+
+	/* read remaining fields */
+	fault->address = fm10k_read_reg(hw, type + FM10K_FAULT_ADDR_HI);
+	fault->address <<= 32;
+	fault->address |= fm10k_read_reg(hw, type + FM10K_FAULT_ADDR_LO);
+	fault->specinfo = fm10k_read_reg(hw, type + FM10K_FAULT_SPECINFO);
+
+	/* clear valid bit to allow for next error */
+	fm10k_write_reg(hw, type + FM10K_FAULT_FUNC, FM10K_FAULT_FUNC_VALID);
+
+	/* Record which function triggered the error */
+	if (func & FM10K_FAULT_FUNC_PF)
+		fault->func = 0;
+	else
+		fault->func = 1 + ((func & FM10K_FAULT_FUNC_VF_MASK) >>
+				   FM10K_FAULT_FUNC_VF_SHIFT);
+
+	/* record fault type */
+	fault->type = func & FM10K_FAULT_FUNC_TYPE_MASK;
+
+	return 0;
+}
+
+/**
+ *  fm10k_request_lport_map_pf - Request LPORT map from the switch API
+ *  @hw: pointer to hardware structure
+ *
+ **/
+static s32 fm10k_request_lport_map_pf(struct fm10k_hw *hw)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[1];
+
+	/* issue request asking for LPORT map */
+	fm10k_tlv_msg_init(msg, FM10K_PF_MSG_ID_LPORT_MAP);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_get_host_state_pf - Returns the state of the switch and mailbox
+ *  @hw: pointer to hardware structure
+ *  @switch_ready: pointer to boolean value that will record switch state
+ *
+ *  This function will check the DMA_CTRL2 register and mailbox in order
+ *  to determine if the switch is ready for the PF to begin requesting
+ *  addresses and mapping traffic to the local interface.
+ **/
+static s32 fm10k_get_host_state_pf(struct fm10k_hw *hw, bool *switch_ready)
+{
+	u32 dma_ctrl2;
+
+	/* verify the switch is ready for interaction */
+	dma_ctrl2 = fm10k_read_reg(hw, FM10K_DMA_CTRL2);
+	if (!(dma_ctrl2 & FM10K_DMA_CTRL2_SWITCH_READY))
+		return 0;
+
+	/* retrieve generic host state info */
+	return fm10k_get_host_state_generic(hw, switch_ready);
+}
+
+/* This structure defines the attibutes to be parsed below */
+const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[] = {
+	FM10K_TLV_ATTR_LE_STRUCT(FM10K_PF_ATTR_ID_ERR,
+				 sizeof(struct fm10k_swapi_error)),
+	FM10K_TLV_ATTR_U32(FM10K_PF_ATTR_ID_LPORT_MAP),
+	FM10K_TLV_ATTR_LAST
+};
+
+/**
+ *  fm10k_msg_lport_map_pf - Message handler for lport_map message from SM
+ *  @hw: Pointer to hardware structure
+ *  @results: pointer array containing parsed data
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This handler configures the lport mapping based on the reply from the
+ *  switch API.
+ **/
+s32 fm10k_msg_lport_map_pf(struct fm10k_hw *hw, u32 **results,
+			   struct fm10k_mbx_info __always_unused *mbx)
+{
+	u16 glort, mask;
+	u32 dglort_map;
+	s32 err;
+
+	err = fm10k_tlv_attr_get_u32(results[FM10K_PF_ATTR_ID_LPORT_MAP],
+				     &dglort_map);
+	if (err)
+		return err;
+
+	/* extract values out of the header */
+	glort = FM10K_MSG_HDR_FIELD_GET(dglort_map, LPORT_MAP_GLORT);
+	mask = FM10K_MSG_HDR_FIELD_GET(dglort_map, LPORT_MAP_MASK);
+
+	/* verify mask is set and none of the masked bits in glort are set */
+	if (!mask || (glort & ~mask))
+		return FM10K_ERR_PARAM;
+
+	/* verify the mask is contiguous, and that it is 1's followed by 0's */
+	if (((~(mask - 1) & mask) + mask) & FM10K_DGLORTMAP_NONE)
+		return FM10K_ERR_PARAM;
+
+	/* record the glort, mask, and port count */
+	hw->mac.dglort_map = dglort_map;
+
+	return 0;
+}
+
+const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[] = {
+	FM10K_TLV_ATTR_U32(FM10K_PF_ATTR_ID_UPDATE_PVID),
+	FM10K_TLV_ATTR_LAST
+};
+
+/**
+ *  fm10k_msg_update_pvid_pf - Message handler for port VLAN message from SM
+ *  @hw: Pointer to hardware structure
+ *  @results: pointer array containing parsed data
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This handler configures the default VLAN for the PF
+ **/
+static s32 fm10k_msg_update_pvid_pf(struct fm10k_hw *hw, u32 **results,
+				    struct fm10k_mbx_info __always_unused *mbx)
+{
+	u16 glort, pvid;
+	u32 pvid_update;
+	s32 err;
+
+	err = fm10k_tlv_attr_get_u32(results[FM10K_PF_ATTR_ID_UPDATE_PVID],
+				     &pvid_update);
+	if (err)
+		return err;
+
+	/* extract values from the pvid update */
+	glort = FM10K_MSG_HDR_FIELD_GET(pvid_update, UPDATE_PVID_GLORT);
+	pvid = FM10K_MSG_HDR_FIELD_GET(pvid_update, UPDATE_PVID_PVID);
+
+	/* if glort is not valid return error */
+	if (!fm10k_glort_valid_pf(hw, glort))
+		return FM10K_ERR_PARAM;
+
+	/* verify VLAN ID is valid */
+	if (pvid >= FM10K_VLAN_TABLE_VID_MAX)
+		return FM10K_ERR_PARAM;
+
+	/* record the port VLAN ID value */
+	hw->mac.default_vid = pvid;
+
+	return 0;
+}
+
+/**
+ *  fm10k_record_global_table_data - Move global table data to swapi table info
+ *  @from: pointer to source table data structure
+ *  @to: pointer to destination table info structure
+ *
+ *  This function is will copy table_data to the table_info contained in
+ *  the hw struct.
+ **/
+static void fm10k_record_global_table_data(struct fm10k_global_table_data *from,
+					   struct fm10k_swapi_table_info *to)
+{
+	/* convert from le32 struct to CPU byte ordered values */
+	to->used = le32_to_cpu(from->used);
+	to->avail = le32_to_cpu(from->avail);
+}
+
+const struct fm10k_tlv_attr fm10k_err_msg_attr[] = {
+	FM10K_TLV_ATTR_LE_STRUCT(FM10K_PF_ATTR_ID_ERR,
+				 sizeof(struct fm10k_swapi_error)),
+	FM10K_TLV_ATTR_LAST
+};
+
+/**
+ *  fm10k_msg_err_pf - Message handler for error reply
+ *  @hw: Pointer to hardware structure
+ *  @results: pointer array containing parsed data
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This handler will capture the data for any error replies to previous
+ *  messages that the PF has sent.
+ **/
+s32 fm10k_msg_err_pf(struct fm10k_hw *hw, u32 **results,
+		     struct fm10k_mbx_info __always_unused *mbx)
+{
+	struct fm10k_swapi_error err_msg;
+	s32 err;
+
+	/* extract structure from message */
+	err = fm10k_tlv_attr_get_le_struct(results[FM10K_PF_ATTR_ID_ERR],
+					   &err_msg, sizeof(err_msg));
+	if (err)
+		return err;
+
+	/* record table status */
+	fm10k_record_global_table_data(&err_msg.mac, &hw->swapi.mac);
+	fm10k_record_global_table_data(&err_msg.nexthop, &hw->swapi.nexthop);
+	fm10k_record_global_table_data(&err_msg.ffu, &hw->swapi.ffu);
+
+	/* record SW API status value */
+	hw->swapi.status = le32_to_cpu(err_msg.status);
+
+	return 0;
+}
+
+static const struct fm10k_msg_data fm10k_msg_data_pf[] = {
+	FM10K_PF_MSG_ERR_HANDLER(XCAST_MODES, fm10k_msg_err_pf),
+	FM10K_PF_MSG_ERR_HANDLER(UPDATE_MAC_FWD_RULE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_LPORT_MAP_HANDLER(fm10k_msg_lport_map_pf),
+	FM10K_PF_MSG_ERR_HANDLER(LPORT_CREATE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_ERR_HANDLER(LPORT_DELETE, fm10k_msg_err_pf),
+	FM10K_PF_MSG_UPDATE_PVID_HANDLER(fm10k_msg_update_pvid_pf),
+	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
+};
+
+static const struct fm10k_mac_ops mac_ops_pf = {
+	.get_bus_info		= fm10k_get_bus_info_generic,
+	.reset_hw		= fm10k_reset_hw_pf,
+	.init_hw		= fm10k_init_hw_pf,
+	.start_hw		= fm10k_start_hw_generic,
+	.stop_hw		= fm10k_stop_hw_generic,
+	.update_vlan		= fm10k_update_vlan_pf,
+	.read_mac_addr		= fm10k_read_mac_addr_pf,
+	.update_uc_addr		= fm10k_update_uc_addr_pf,
+	.update_mc_addr		= fm10k_update_mc_addr_pf,
+	.update_xcast_mode	= fm10k_update_xcast_mode_pf,
+	.update_int_moderator	= fm10k_update_int_moderator_pf,
+	.update_lport_state	= fm10k_update_lport_state_pf,
+	.update_hw_stats	= fm10k_update_hw_stats_pf,
+	.rebind_hw_stats	= fm10k_rebind_hw_stats_pf,
+	.configure_dglort_map	= fm10k_configure_dglort_map_pf,
+	.set_dma_mask		= fm10k_set_dma_mask_pf,
+	.get_fault		= fm10k_get_fault_pf,
+	.get_host_state		= fm10k_get_host_state_pf,
+	.request_lport_map	= fm10k_request_lport_map_pf,
+};
+
+static const struct fm10k_iov_ops iov_ops_pf = {
+	.assign_resources		= fm10k_iov_assign_resources_pf,
+	.configure_tc			= fm10k_iov_configure_tc_pf,
+	.assign_int_moderator		= fm10k_iov_assign_int_moderator_pf,
+	.assign_default_mac_vlan	= fm10k_iov_assign_default_mac_vlan_pf,
+	.reset_resources		= fm10k_iov_reset_resources_pf,
+	.set_lport			= fm10k_iov_set_lport_pf,
+	.reset_lport			= fm10k_iov_reset_lport_pf,
+	.update_stats			= fm10k_iov_update_stats_pf,
+};
+
+static s32 fm10k_get_invariants_pf(struct fm10k_hw *hw)
+{
+	fm10k_get_invariants_generic(hw);
+
+	return fm10k_sm_mbx_init(hw, &hw->mbx, fm10k_msg_data_pf);
+}
+
+const struct fm10k_info fm10k_pf_info = {
+	.mac		= fm10k_mac_pf,
+	.get_invariants	= fm10k_get_invariants_pf,
+	.mac_ops	= &mac_ops_pf,
+	.iov_ops	= &iov_ops_pf,
+};
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
new file mode 100644
index 0000000000..8e814df709
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _FM10K_PF_H_
+#define _FM10K_PF_H_
+
+#include "fm10k_type.h"
+#include "fm10k_common.h"
+
+bool fm10k_glort_valid_pf(struct fm10k_hw *hw, u16 glort);
+u16 fm10k_queues_per_pool(struct fm10k_hw *hw);
+u16 fm10k_vf_queue_index(struct fm10k_hw *hw, u16 vf_idx);
+
+enum fm10k_pf_tlv_msg_id_v1 {
+	FM10K_PF_MSG_ID_TEST			= 0x000, /* msg ID reserved */
+	FM10K_PF_MSG_ID_XCAST_MODES		= 0x001,
+	FM10K_PF_MSG_ID_UPDATE_MAC_FWD_RULE	= 0x002,
+	FM10K_PF_MSG_ID_LPORT_MAP		= 0x100,
+	FM10K_PF_MSG_ID_LPORT_CREATE		= 0x200,
+	FM10K_PF_MSG_ID_LPORT_DELETE		= 0x201,
+	FM10K_PF_MSG_ID_CONFIG			= 0x300,
+	FM10K_PF_MSG_ID_UPDATE_PVID		= 0x400,
+	FM10K_PF_MSG_ID_CREATE_FLOW_TABLE	= 0x501,
+	FM10K_PF_MSG_ID_DELETE_FLOW_TABLE	= 0x502,
+	FM10K_PF_MSG_ID_UPDATE_FLOW		= 0x503,
+	FM10K_PF_MSG_ID_DELETE_FLOW		= 0x504,
+	FM10K_PF_MSG_ID_SET_FLOW_STATE		= 0x505,
+};
+
+enum fm10k_pf_tlv_attr_id_v1 {
+	FM10K_PF_ATTR_ID_ERR			= 0x00,
+	FM10K_PF_ATTR_ID_LPORT_MAP		= 0x01,
+	FM10K_PF_ATTR_ID_XCAST_MODE		= 0x02,
+	FM10K_PF_ATTR_ID_MAC_UPDATE		= 0x03,
+	FM10K_PF_ATTR_ID_VLAN_UPDATE		= 0x04,
+	FM10K_PF_ATTR_ID_CONFIG			= 0x05,
+	FM10K_PF_ATTR_ID_CREATE_FLOW_TABLE	= 0x06,
+	FM10K_PF_ATTR_ID_DELETE_FLOW_TABLE	= 0x07,
+	FM10K_PF_ATTR_ID_UPDATE_FLOW		= 0x08,
+	FM10K_PF_ATTR_ID_FLOW_STATE		= 0x09,
+	FM10K_PF_ATTR_ID_FLOW_HANDLE		= 0x0A,
+	FM10K_PF_ATTR_ID_DELETE_FLOW		= 0x0B,
+	FM10K_PF_ATTR_ID_PORT			= 0x0C,
+	FM10K_PF_ATTR_ID_UPDATE_PVID		= 0x0D,
+};
+
+#define FM10K_MSG_LPORT_MAP_GLORT_SHIFT	0
+#define FM10K_MSG_LPORT_MAP_GLORT_SIZE	16
+#define FM10K_MSG_LPORT_MAP_MASK_SHIFT	16
+#define FM10K_MSG_LPORT_MAP_MASK_SIZE	16
+
+#define FM10K_MSG_UPDATE_PVID_GLORT_SHIFT	0
+#define FM10K_MSG_UPDATE_PVID_GLORT_SIZE	16
+#define FM10K_MSG_UPDATE_PVID_PVID_SHIFT	16
+#define FM10K_MSG_UPDATE_PVID_PVID_SIZE		16
+
+#define FM10K_MSG_ERR_PEP_NOT_SCHEDULED	280
+
+/* The following data structures are overlayed directly onto TLV mailbox
+ * messages, and must not break 4 byte alignment. Ensure the structures line
+ * up correctly as per their TLV definition.
+ */
+
+struct fm10k_mac_update {
+	__le32	mac_lower;
+	__le16	mac_upper;
+	__le16	vlan;
+	__le16	glort;
+	u8	flags;
+	u8	action;
+} __aligned(4) __packed;
+
+struct fm10k_global_table_data {
+	__le32	used;
+	__le32	avail;
+} __aligned(4) __packed;
+
+struct fm10k_swapi_error {
+	__le32				status;
+	struct fm10k_global_table_data	mac;
+	struct fm10k_global_table_data	nexthop;
+	struct fm10k_global_table_data	ffu;
+} __aligned(4) __packed;
+
+s32 fm10k_msg_lport_map_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
+extern const struct fm10k_tlv_attr fm10k_lport_map_msg_attr[];
+#define FM10K_PF_MSG_LPORT_MAP_HANDLER(func) \
+	FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_LPORT_MAP, \
+			  fm10k_lport_map_msg_attr, func)
+extern const struct fm10k_tlv_attr fm10k_update_pvid_msg_attr[];
+#define FM10K_PF_MSG_UPDATE_PVID_HANDLER(func) \
+	FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_UPDATE_PVID, \
+			  fm10k_update_pvid_msg_attr, func)
+
+s32 fm10k_msg_err_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
+extern const struct fm10k_tlv_attr fm10k_err_msg_attr[];
+#define FM10K_PF_MSG_ERR_HANDLER(msg, func) \
+	FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_##msg, fm10k_err_msg_attr, func)
+
+s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid);
+s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
+s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
+			      struct fm10k_mbx_info *);
+s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *, u32 **,
+				 struct fm10k_mbx_info *);
+
+extern const struct fm10k_info fm10k_pf_info;
+#endif /* _FM10K_PF_H */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
new file mode 100644
index 0000000000..75cbdf2dbb
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include "fm10k_tlv.h"
+
+/**
+ *  fm10k_tlv_msg_init - Initialize message block for TLV data storage
+ *  @msg: Pointer to message block
+ *  @msg_id: Message ID indicating message type
+ *
+ *  This function return success if provided with a valid message pointer
+ **/
+s32 fm10k_tlv_msg_init(u32 *msg, u16 msg_id)
+{
+	/* verify pointer is not NULL */
+	if (!msg)
+		return FM10K_ERR_PARAM;
+
+	*msg = (FM10K_TLV_FLAGS_MSG << FM10K_TLV_FLAGS_SHIFT) | msg_id;
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_put_null_string - Place null terminated string on message
+ *  @msg: Pointer to message block
+ *  @attr_id: Attribute ID
+ *  @string: Pointer to string to be stored in attribute
+ *
+ *  This function will reorder a string to be CPU endian and store it in
+ *  the attribute buffer.  It will return success if provided with a valid
+ *  pointers.
+ **/
+static s32 fm10k_tlv_attr_put_null_string(u32 *msg, u16 attr_id,
+					  const unsigned char *string)
+{
+	u32 attr_data = 0, len = 0;
+	u32 *attr;
+
+	/* verify pointers are not NULL */
+	if (!string || !msg)
+		return FM10K_ERR_PARAM;
+
+	attr = &msg[FM10K_TLV_DWORD_LEN(*msg)];
+
+	/* copy string into local variable and then write to msg */
+	do {
+		/* write data to message */
+		if (len && !(len % 4)) {
+			attr[len / 4] = attr_data;
+			attr_data = 0;
+		}
+
+		/* record character to offset location */
+		attr_data |= (u32)(*string) << (8 * (len % 4));
+		len++;
+
+		/* test for NULL and then increment */
+	} while (*(string++));
+
+	/* write last piece of data to message */
+	attr[(len + 3) / 4] = attr_data;
+
+	/* record attribute header, update message length */
+	len <<= FM10K_TLV_LEN_SHIFT;
+	attr[0] = len | attr_id;
+
+	/* add header length to length */
+	len += FM10K_TLV_HDR_LEN << FM10K_TLV_LEN_SHIFT;
+	*msg += FM10K_TLV_LEN_ALIGN(len);
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_get_null_string - Get null terminated string from attribute
+ *  @attr: Pointer to attribute
+ *  @string: Pointer to location of destination string
+ *
+ *  This function pulls the string back out of the attribute and will place
+ *  it in the array pointed by string.  It will return success if provided
+ *  with a valid pointers.
+ **/
+static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string)
+{
+	u32 len;
+
+	/* verify pointers are not NULL */
+	if (!string || !attr)
+		return FM10K_ERR_PARAM;
+
+	len = *attr >> FM10K_TLV_LEN_SHIFT;
+	attr++;
+
+	while (len--)
+		string[len] = (u8)(attr[len / 4] >> (8 * (len % 4)));
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_put_mac_vlan - Store MAC/VLAN attribute in message
+ *  @msg: Pointer to message block
+ *  @attr_id: Attribute ID
+ *  @mac_addr: MAC address to be stored
+ *  @vlan: VLAN to be stored
+ *
+ *  This function will reorder a MAC address to be CPU endian and store it
+ *  in the attribute buffer.  It will return success if provided with a
+ *  valid pointers.
+ **/
+s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id,
+				const u8 *mac_addr, u16 vlan)
+{
+	u32 len = ETH_ALEN << FM10K_TLV_LEN_SHIFT;
+	u32 *attr;
+
+	/* verify pointers are not NULL */
+	if (!msg || !mac_addr)
+		return FM10K_ERR_PARAM;
+
+	attr = &msg[FM10K_TLV_DWORD_LEN(*msg)];
+
+	/* record attribute header, update message length */
+	attr[0] = len | attr_id;
+
+	/* copy value into local variable and then write to msg */
+	attr[1] = le32_to_cpu(*(const __le32 *)&mac_addr[0]);
+	attr[2] = le16_to_cpu(*(const __le16 *)&mac_addr[4]);
+	attr[2] |= (u32)vlan << 16;
+
+	/* add header length to length */
+	len += FM10K_TLV_HDR_LEN << FM10K_TLV_LEN_SHIFT;
+	*msg += FM10K_TLV_LEN_ALIGN(len);
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute
+ *  @attr: Pointer to attribute
+ *  @mac_addr: location of buffer to store MAC address
+ *  @vlan: location of buffer to store VLAN
+ *
+ *  This function pulls the MAC address back out of the attribute and will
+ *  place it in the array pointed by mac_addr.  It will return success
+ *  if provided with a valid pointers.
+ **/
+s32 fm10k_tlv_attr_get_mac_vlan(u32 *attr, u8 *mac_addr, u16 *vlan)
+{
+	/* verify pointers are not NULL */
+	if (!mac_addr || !attr)
+		return FM10K_ERR_PARAM;
+
+	*(__le32 *)&mac_addr[0] = cpu_to_le32(attr[1]);
+	*(__le16 *)&mac_addr[4] = cpu_to_le16((u16)(attr[2]));
+	*vlan = (u16)(attr[2] >> 16);
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_put_bool - Add header indicating value "true"
+ *  @msg: Pointer to message block
+ *  @attr_id: Attribute ID
+ *
+ *  This function will simply add an attribute header, the fact
+ *  that the header is here means the attribute value is true, else
+ *  it is false.  The function will return success if provided with a
+ *  valid pointers.
+ **/
+s32 fm10k_tlv_attr_put_bool(u32 *msg, u16 attr_id)
+{
+	/* verify pointers are not NULL */
+	if (!msg)
+		return FM10K_ERR_PARAM;
+
+	/* record attribute header */
+	msg[FM10K_TLV_DWORD_LEN(*msg)] = attr_id;
+
+	/* add header length to length */
+	*msg += FM10K_TLV_HDR_LEN << FM10K_TLV_LEN_SHIFT;
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_put_value - Store integer value attribute in message
+ *  @msg: Pointer to message block
+ *  @attr_id: Attribute ID
+ *  @value: Value to be written
+ *  @len: Size of value
+ *
+ *  This function will place an integer value of up to 8 bytes in size
+ *  in a message attribute.  The function will return success provided
+ *  that msg is a valid pointer, and len is 1, 2, 4, or 8.
+ **/
+s32 fm10k_tlv_attr_put_value(u32 *msg, u16 attr_id, s64 value, u32 len)
+{
+	u32 *attr;
+
+	/* verify non-null msg and len is 1, 2, 4, or 8 */
+	if (!msg || !len || len > 8 || (len & (len - 1)))
+		return FM10K_ERR_PARAM;
+
+	attr = &msg[FM10K_TLV_DWORD_LEN(*msg)];
+
+	if (len < 4) {
+		attr[1] = (u32)value & (BIT(8 * len) - 1);
+	} else {
+		attr[1] = (u32)value;
+		if (len > 4)
+			attr[2] = (u32)(value >> 32);
+	}
+
+	/* record attribute header, update message length */
+	len <<= FM10K_TLV_LEN_SHIFT;
+	attr[0] = len | attr_id;
+
+	/* add header length to length */
+	len += FM10K_TLV_HDR_LEN << FM10K_TLV_LEN_SHIFT;
+	*msg += FM10K_TLV_LEN_ALIGN(len);
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_get_value - Get integer value stored in attribute
+ *  @attr: Pointer to attribute
+ *  @value: Pointer to destination buffer
+ *  @len: Size of value
+ *
+ *  This function will place an integer value of up to 8 bytes in size
+ *  in the offset pointed to by value.  The function will return success
+ *  provided that pointers are valid and the len value matches the
+ *  attribute length.
+ **/
+s32 fm10k_tlv_attr_get_value(u32 *attr, void *value, u32 len)
+{
+	/* verify pointers are not NULL */
+	if (!attr || !value)
+		return FM10K_ERR_PARAM;
+
+	if ((*attr >> FM10K_TLV_LEN_SHIFT) != len)
+		return FM10K_ERR_PARAM;
+
+	if (len == 8)
+		*(u64 *)value = ((u64)attr[2] << 32) | attr[1];
+	else if (len == 4)
+		*(u32 *)value = attr[1];
+	else if (len == 2)
+		*(u16 *)value = (u16)attr[1];
+	else
+		*(u8 *)value = (u8)attr[1];
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_put_le_struct - Store little endian structure in message
+ *  @msg: Pointer to message block
+ *  @attr_id: Attribute ID
+ *  @le_struct: Pointer to structure to be written
+ *  @len: Size of le_struct
+ *
+ *  This function will place a little endian structure value in a message
+ *  attribute.  The function will return success provided that all pointers
+ *  are valid and length is a non-zero multiple of 4.
+ **/
+s32 fm10k_tlv_attr_put_le_struct(u32 *msg, u16 attr_id,
+				 const void *le_struct, u32 len)
+{
+	const __le32 *le32_ptr = (const __le32 *)le_struct;
+	u32 *attr;
+	u32 i;
+
+	/* verify non-null msg and len is in 32 bit words */
+	if (!msg || !len || (len % 4))
+		return FM10K_ERR_PARAM;
+
+	attr = &msg[FM10K_TLV_DWORD_LEN(*msg)];
+
+	/* copy le32 structure into host byte order at 32b boundaries */
+	for (i = 0; i < (len / 4); i++)
+		attr[i + 1] = le32_to_cpu(le32_ptr[i]);
+
+	/* record attribute header, update message length */
+	len <<= FM10K_TLV_LEN_SHIFT;
+	attr[0] = len | attr_id;
+
+	/* add header length to length */
+	len += FM10K_TLV_HDR_LEN << FM10K_TLV_LEN_SHIFT;
+	*msg += FM10K_TLV_LEN_ALIGN(len);
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_get_le_struct - Get little endian struct form attribute
+ *  @attr: Pointer to attribute
+ *  @le_struct: Pointer to structure to be written
+ *  @len: Size of structure
+ *
+ *  This function will place a little endian structure in the buffer
+ *  pointed to by le_struct.  The function will return success
+ *  provided that pointers are valid and the len value matches the
+ *  attribute length.
+ **/
+s32 fm10k_tlv_attr_get_le_struct(u32 *attr, void *le_struct, u32 len)
+{
+	__le32 *le32_ptr = (__le32 *)le_struct;
+	u32 i;
+
+	/* verify pointers are not NULL */
+	if (!le_struct || !attr)
+		return FM10K_ERR_PARAM;
+
+	if ((*attr >> FM10K_TLV_LEN_SHIFT) != len)
+		return FM10K_ERR_PARAM;
+
+	attr++;
+
+	for (i = 0; len; i++, len -= 4)
+		le32_ptr[i] = cpu_to_le32(attr[i]);
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_nest_start - Start a set of nested attributes
+ *  @msg: Pointer to message block
+ *  @attr_id: Attribute ID
+ *
+ *  This function will mark off a new nested region for encapsulating
+ *  a given set of attributes.  The idea is if you wish to place a secondary
+ *  structure within the message this mechanism allows for that.  The
+ *  function will return NULL on failure, and a pointer to the start
+ *  of the nested attributes on success.
+ **/
+static u32 *fm10k_tlv_attr_nest_start(u32 *msg, u16 attr_id)
+{
+	u32 *attr;
+
+	/* verify pointer is not NULL */
+	if (!msg)
+		return NULL;
+
+	attr = &msg[FM10K_TLV_DWORD_LEN(*msg)];
+
+	attr[0] = attr_id;
+
+	/* return pointer to nest header */
+	return attr;
+}
+
+/**
+ *  fm10k_tlv_attr_nest_stop - Stop a set of nested attributes
+ *  @msg: Pointer to message block
+ *
+ *  This function closes off an existing set of nested attributes.  The
+ *  message pointer should be pointing to the parent of the nest.  So in
+ *  the case of a nest within the nest this would be the outer nest pointer.
+ *  This function will return success provided all pointers are valid.
+ **/
+static s32 fm10k_tlv_attr_nest_stop(u32 *msg)
+{
+	u32 *attr;
+	u32 len;
+
+	/* verify pointer is not NULL */
+	if (!msg)
+		return FM10K_ERR_PARAM;
+
+	/* locate the nested header and retrieve its length */
+	attr = &msg[FM10K_TLV_DWORD_LEN(*msg)];
+	len = (attr[0] >> FM10K_TLV_LEN_SHIFT) << FM10K_TLV_LEN_SHIFT;
+
+	/* only include nest if data was added to it */
+	if (len) {
+		len += FM10K_TLV_HDR_LEN << FM10K_TLV_LEN_SHIFT;
+		*msg += len;
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_validate - Validate attribute metadata
+ *  @attr: Pointer to attribute
+ *  @tlv_attr: Type and length info for attribute
+ *
+ *  This function does some basic validation of the input TLV.  It
+ *  verifies the length, and in the case of null terminated strings
+ *  it verifies that the last byte is null.  The function will
+ *  return FM10K_ERR_PARAM if any attribute is malformed, otherwise
+ *  it returns 0.
+ **/
+static s32 fm10k_tlv_attr_validate(u32 *attr,
+				   const struct fm10k_tlv_attr *tlv_attr)
+{
+	u32 attr_id = *attr & FM10K_TLV_ID_MASK;
+	u16 len = *attr >> FM10K_TLV_LEN_SHIFT;
+
+	/* verify this is an attribute and not a message */
+	if (*attr & (FM10K_TLV_FLAGS_MSG << FM10K_TLV_FLAGS_SHIFT))
+		return FM10K_ERR_PARAM;
+
+	/* search through the list of attributes to find a matching ID */
+	while (tlv_attr->id < attr_id)
+		tlv_attr++;
+
+	/* if didn't find a match then we should exit */
+	if (tlv_attr->id != attr_id)
+		return FM10K_NOT_IMPLEMENTED;
+
+	/* move to start of attribute data */
+	attr++;
+
+	switch (tlv_attr->type) {
+	case FM10K_TLV_NULL_STRING:
+		if (!len ||
+		    (attr[(len - 1) / 4] & (0xFF << (8 * ((len - 1) % 4)))))
+			return FM10K_ERR_PARAM;
+		if (len > tlv_attr->len)
+			return FM10K_ERR_PARAM;
+		break;
+	case FM10K_TLV_MAC_ADDR:
+		if (len != ETH_ALEN)
+			return FM10K_ERR_PARAM;
+		break;
+	case FM10K_TLV_BOOL:
+		if (len)
+			return FM10K_ERR_PARAM;
+		break;
+	case FM10K_TLV_UNSIGNED:
+	case FM10K_TLV_SIGNED:
+		if (len != tlv_attr->len)
+			return FM10K_ERR_PARAM;
+		break;
+	case FM10K_TLV_LE_STRUCT:
+		/* struct must be 4 byte aligned */
+		if ((len % 4) || len != tlv_attr->len)
+			return FM10K_ERR_PARAM;
+		break;
+	case FM10K_TLV_NESTED:
+		/* nested attributes must be 4 byte aligned */
+		if (len % 4)
+			return FM10K_ERR_PARAM;
+		break;
+	default:
+		/* attribute id is mapped to bad value */
+		return FM10K_ERR_PARAM;
+	}
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_attr_parse - Parses stream of attribute data
+ *  @attr: Pointer to attribute list
+ *  @results: Pointer array to store pointers to attributes
+ *  @tlv_attr: Type and length info for attributes
+ *
+ *  This function validates a stream of attributes and parses them
+ *  up into an array of pointers stored in results.  The function will
+ *  return FM10K_ERR_PARAM on any input or message error,
+ *  FM10K_NOT_IMPLEMENTED for any attribute that is outside of the array
+ *  and 0 on success. Any attributes not found in tlv_attr will be silently
+ *  ignored.
+ **/
+static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results,
+				const struct fm10k_tlv_attr *tlv_attr)
+{
+	u32 i, attr_id, offset = 0;
+	s32 err;
+	u16 len;
+
+	/* verify pointers are not NULL */
+	if (!attr || !results)
+		return FM10K_ERR_PARAM;
+
+	/* initialize results to NULL */
+	for (i = 0; i < FM10K_TLV_RESULTS_MAX; i++)
+		results[i] = NULL;
+
+	/* pull length from the message header */
+	len = *attr >> FM10K_TLV_LEN_SHIFT;
+
+	/* no attributes to parse if there is no length */
+	if (!len)
+		return 0;
+
+	/* no attributes to parse, just raw data, message becomes attribute */
+	if (!tlv_attr) {
+		results[0] = attr;
+		return 0;
+	}
+
+	/* move to start of attribute data */
+	attr++;
+
+	/* run through list parsing all attributes */
+	while (offset < len) {
+		attr_id = *attr & FM10K_TLV_ID_MASK;
+
+		if (attr_id >= FM10K_TLV_RESULTS_MAX)
+			return FM10K_NOT_IMPLEMENTED;
+
+		err = fm10k_tlv_attr_validate(attr, tlv_attr);
+		if (err == FM10K_NOT_IMPLEMENTED)
+			; /* silently ignore non-implemented attributes */
+		else if (err)
+			return err;
+		else
+			results[attr_id] = attr;
+
+		/* update offset */
+		offset += FM10K_TLV_DWORD_LEN(*attr) * 4;
+
+		/* move to next attribute */
+		attr = &attr[FM10K_TLV_DWORD_LEN(*attr)];
+	}
+
+	/* we should find ourselves at the end of the list */
+	if (offset != len)
+		return FM10K_ERR_PARAM;
+
+	return 0;
+}
+
+/**
+ *  fm10k_tlv_msg_parse - Parses message header and calls function handler
+ *  @hw: Pointer to hardware structure
+ *  @msg: Pointer to message
+ *  @mbx: Pointer to mailbox information structure
+ *  @data: Pointer to message handler data structure
+ *
+ *  This function should be the first function called upon receiving a
+ *  message.  The handler will identify the message type and call the correct
+ *  handler for the given message.  It will return the value from the function
+ *  call on a recognized message type, otherwise it will return
+ *  FM10K_NOT_IMPLEMENTED on an unrecognized type.
+ **/
+s32 fm10k_tlv_msg_parse(struct fm10k_hw *hw, u32 *msg,
+			struct fm10k_mbx_info *mbx,
+			const struct fm10k_msg_data *data)
+{
+	u32 *results[FM10K_TLV_RESULTS_MAX];
+	u32 msg_id;
+	s32 err;
+
+	/* verify pointer is not NULL */
+	if (!msg || !data)
+		return FM10K_ERR_PARAM;
+
+	/* verify this is a message and not an attribute */
+	if (!(*msg & (FM10K_TLV_FLAGS_MSG << FM10K_TLV_FLAGS_SHIFT)))
+		return FM10K_ERR_PARAM;
+
+	/* grab message ID */
+	msg_id = *msg & FM10K_TLV_ID_MASK;
+
+	while (data->id < msg_id)
+		data++;
+
+	/* if we didn't find it then pass it up as an error */
+	if (data->id != msg_id) {
+		while (data->id != FM10K_TLV_ERROR)
+			data++;
+	}
+
+	/* parse the attributes into the results list */
+	err = fm10k_tlv_attr_parse(msg, results, data->attr);
+	if (err < 0)
+		return err;
+
+	return data->func(hw, results, mbx);
+}
+
+/**
+ *  fm10k_tlv_msg_error - Default handler for unrecognized TLV message IDs
+ *  @hw: Pointer to hardware structure
+ *  @results: Pointer array to message, results[0] is pointer to message
+ *  @mbx: Unused mailbox pointer
+ *
+ *  This function is a default handler for unrecognized messages.  At a
+ *  minimum it just indicates that the message requested was
+ *  unimplemented.
+ **/
+s32 fm10k_tlv_msg_error(struct fm10k_hw __always_unused *hw,
+			u32 __always_unused **results,
+			struct fm10k_mbx_info __always_unused *mbx)
+{
+	return FM10K_NOT_IMPLEMENTED;
+}
+
+static const unsigned char test_str[] =	"fm10k";
+static const unsigned char test_mac[ETH_ALEN] = { 0x12, 0x34, 0x56,
+						  0x78, 0x9a, 0xbc };
+static const u16 test_vlan = 0x0FED;
+static const u64 test_u64 = 0xfedcba9876543210ull;
+static const u32 test_u32 = 0x87654321;
+static const u16 test_u16 = 0x8765;
+static const u8  test_u8  = 0x87;
+static const s64 test_s64 = -0x123456789abcdef0ll;
+static const s32 test_s32 = -0x1235678;
+static const s16 test_s16 = -0x1234;
+static const s8  test_s8  = -0x12;
+static const __le32 test_le[2] = { cpu_to_le32(0x12345678),
+				   cpu_to_le32(0x9abcdef0)};
+
+/* The message below is meant to be used as a test message to demonstrate
+ * how to use the TLV interface and to test the types.  Normally this code
+ * be compiled out by stripping the code wrapped in FM10K_TLV_TEST_MSG
+ */
+const struct fm10k_tlv_attr fm10k_tlv_msg_test_attr[] = {
+	FM10K_TLV_ATTR_NULL_STRING(FM10K_TEST_MSG_STRING, 80),
+	FM10K_TLV_ATTR_MAC_ADDR(FM10K_TEST_MSG_MAC_ADDR),
+	FM10K_TLV_ATTR_U8(FM10K_TEST_MSG_U8),
+	FM10K_TLV_ATTR_U16(FM10K_TEST_MSG_U16),
+	FM10K_TLV_ATTR_U32(FM10K_TEST_MSG_U32),
+	FM10K_TLV_ATTR_U64(FM10K_TEST_MSG_U64),
+	FM10K_TLV_ATTR_S8(FM10K_TEST_MSG_S8),
+	FM10K_TLV_ATTR_S16(FM10K_TEST_MSG_S16),
+	FM10K_TLV_ATTR_S32(FM10K_TEST_MSG_S32),
+	FM10K_TLV_ATTR_S64(FM10K_TEST_MSG_S64),
+	FM10K_TLV_ATTR_LE_STRUCT(FM10K_TEST_MSG_LE_STRUCT, 8),
+	FM10K_TLV_ATTR_NESTED(FM10K_TEST_MSG_NESTED),
+	FM10K_TLV_ATTR_S32(FM10K_TEST_MSG_RESULT),
+	FM10K_TLV_ATTR_LAST
+};
+
+/**
+ *  fm10k_tlv_msg_test_generate_data - Stuff message with data
+ *  @msg: Pointer to message
+ *  @attr_flags: List of flags indicating what attributes to add
+ *
+ *  This function is meant to load a message buffer with attribute data
+ **/
+static void fm10k_tlv_msg_test_generate_data(u32 *msg, u32 attr_flags)
+{
+	if (attr_flags & BIT(FM10K_TEST_MSG_STRING))
+		fm10k_tlv_attr_put_null_string(msg, FM10K_TEST_MSG_STRING,
+					       test_str);
+	if (attr_flags & BIT(FM10K_TEST_MSG_MAC_ADDR))
+		fm10k_tlv_attr_put_mac_vlan(msg, FM10K_TEST_MSG_MAC_ADDR,
+					    test_mac, test_vlan);
+	if (attr_flags & BIT(FM10K_TEST_MSG_U8))
+		fm10k_tlv_attr_put_u8(msg, FM10K_TEST_MSG_U8,  test_u8);
+	if (attr_flags & BIT(FM10K_TEST_MSG_U16))
+		fm10k_tlv_attr_put_u16(msg, FM10K_TEST_MSG_U16, test_u16);
+	if (attr_flags & BIT(FM10K_TEST_MSG_U32))
+		fm10k_tlv_attr_put_u32(msg, FM10K_TEST_MSG_U32, test_u32);
+	if (attr_flags & BIT(FM10K_TEST_MSG_U64))
+		fm10k_tlv_attr_put_u64(msg, FM10K_TEST_MSG_U64, test_u64);
+	if (attr_flags & BIT(FM10K_TEST_MSG_S8))
+		fm10k_tlv_attr_put_s8(msg, FM10K_TEST_MSG_S8,  test_s8);
+	if (attr_flags & BIT(FM10K_TEST_MSG_S16))
+		fm10k_tlv_attr_put_s16(msg, FM10K_TEST_MSG_S16, test_s16);
+	if (attr_flags & BIT(FM10K_TEST_MSG_S32))
+		fm10k_tlv_attr_put_s32(msg, FM10K_TEST_MSG_S32, test_s32);
+	if (attr_flags & BIT(FM10K_TEST_MSG_S64))
+		fm10k_tlv_attr_put_s64(msg, FM10K_TEST_MSG_S64, test_s64);
+	if (attr_flags & BIT(FM10K_TEST_MSG_LE_STRUCT))
+		fm10k_tlv_attr_put_le_struct(msg, FM10K_TEST_MSG_LE_STRUCT,
+					     test_le, 8);
+}
+
+/**
+ *  fm10k_tlv_msg_test_create - Create a test message testing all attributes
+ *  @msg: Pointer to message
+ *  @attr_flags: List of flags indicating what attributes to add
+ *
+ *  This function is meant to load a message buffer with all attribute types
+ *  including a nested attribute.
+ **/
+void fm10k_tlv_msg_test_create(u32 *msg, u32 attr_flags)
+{
+	u32 *nest = NULL;
+
+	fm10k_tlv_msg_init(msg, FM10K_TLV_MSG_ID_TEST);
+
+	fm10k_tlv_msg_test_generate_data(msg, attr_flags);
+
+	/* check for nested attributes */
+	attr_flags >>= FM10K_TEST_MSG_NESTED;
+
+	if (attr_flags) {
+		nest = fm10k_tlv_attr_nest_start(msg, FM10K_TEST_MSG_NESTED);
+
+		fm10k_tlv_msg_test_generate_data(nest, attr_flags);
+
+		fm10k_tlv_attr_nest_stop(msg);
+	}
+}
+
+/**
+ *  fm10k_tlv_msg_test - Validate all results on test message receive
+ *  @hw: Pointer to hardware structure
+ *  @results: Pointer array to attributes in the message
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This function does a check to verify all attributes match what the test
+ *  message placed in the message buffer.  It is the default handler
+ *  for TLV test messages.
+ **/
+s32 fm10k_tlv_msg_test(struct fm10k_hw *hw, u32 **results,
+		       struct fm10k_mbx_info *mbx)
+{
+	u32 *nest_results[FM10K_TLV_RESULTS_MAX];
+	unsigned char result_str[80];
+	unsigned char result_mac[ETH_ALEN];
+	s32 err = 0;
+	__le32 result_le[2];
+	u16 result_vlan;
+	u64 result_u64;
+	u32 result_u32;
+	u16 result_u16;
+	u8  result_u8;
+	s64 result_s64;
+	s32 result_s32;
+	s16 result_s16;
+	s8  result_s8;
+	u32 reply[3];
+
+	/* retrieve results of a previous test */
+	if (!!results[FM10K_TEST_MSG_RESULT])
+		return fm10k_tlv_attr_get_s32(results[FM10K_TEST_MSG_RESULT],
+					      &mbx->test_result);
+
+parse_nested:
+	if (!!results[FM10K_TEST_MSG_STRING]) {
+		err = fm10k_tlv_attr_get_null_string(
+					results[FM10K_TEST_MSG_STRING],
+					result_str);
+		if (!err && memcmp(test_str, result_str, sizeof(test_str)))
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_MAC_ADDR]) {
+		err = fm10k_tlv_attr_get_mac_vlan(
+					results[FM10K_TEST_MSG_MAC_ADDR],
+					result_mac, &result_vlan);
+		if (!err && !ether_addr_equal(test_mac, result_mac))
+			err = FM10K_ERR_INVALID_VALUE;
+		if (!err && test_vlan != result_vlan)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_U8]) {
+		err = fm10k_tlv_attr_get_u8(results[FM10K_TEST_MSG_U8],
+					    &result_u8);
+		if (!err && test_u8 != result_u8)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_U16]) {
+		err = fm10k_tlv_attr_get_u16(results[FM10K_TEST_MSG_U16],
+					     &result_u16);
+		if (!err && test_u16 != result_u16)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_U32]) {
+		err = fm10k_tlv_attr_get_u32(results[FM10K_TEST_MSG_U32],
+					     &result_u32);
+		if (!err && test_u32 != result_u32)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_U64]) {
+		err = fm10k_tlv_attr_get_u64(results[FM10K_TEST_MSG_U64],
+					     &result_u64);
+		if (!err && test_u64 != result_u64)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_S8]) {
+		err = fm10k_tlv_attr_get_s8(results[FM10K_TEST_MSG_S8],
+					    &result_s8);
+		if (!err && test_s8 != result_s8)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_S16]) {
+		err = fm10k_tlv_attr_get_s16(results[FM10K_TEST_MSG_S16],
+					     &result_s16);
+		if (!err && test_s16 != result_s16)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_S32]) {
+		err = fm10k_tlv_attr_get_s32(results[FM10K_TEST_MSG_S32],
+					     &result_s32);
+		if (!err && test_s32 != result_s32)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_S64]) {
+		err = fm10k_tlv_attr_get_s64(results[FM10K_TEST_MSG_S64],
+					     &result_s64);
+		if (!err && test_s64 != result_s64)
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+	if (!!results[FM10K_TEST_MSG_LE_STRUCT]) {
+		err = fm10k_tlv_attr_get_le_struct(
+					results[FM10K_TEST_MSG_LE_STRUCT],
+					result_le,
+					sizeof(result_le));
+		if (!err && memcmp(test_le, result_le, sizeof(test_le)))
+			err = FM10K_ERR_INVALID_VALUE;
+		if (err)
+			goto report_result;
+	}
+
+	if (!!results[FM10K_TEST_MSG_NESTED]) {
+		/* clear any pointers */
+		memset(nest_results, 0, sizeof(nest_results));
+
+		/* parse the nested attributes into the nest results list */
+		err = fm10k_tlv_attr_parse(results[FM10K_TEST_MSG_NESTED],
+					   nest_results,
+					   fm10k_tlv_msg_test_attr);
+		if (err)
+			goto report_result;
+
+		/* loop back through to the start */
+		results = nest_results;
+		goto parse_nested;
+	}
+
+report_result:
+	/* generate reply with test result */
+	fm10k_tlv_msg_init(reply, FM10K_TLV_MSG_ID_TEST);
+	fm10k_tlv_attr_put_s32(reply, FM10K_TEST_MSG_RESULT, err);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, reply);
+}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
new file mode 100644
index 0000000000..ceb9b791f7
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#ifndef _FM10K_TLV_H_
+#define _FM10K_TLV_H_
+
+/* forward declaration */
+struct fm10k_msg_data;
+
+#include "fm10k_type.h"
+
+/* Message / Argument header format
+ *    3			  2		      1			  0
+ *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |	     Length	   | Flags |	      Type / ID		   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * The message header format described here is used for messages that are
+ * passed between the PF and the VF.  To allow for messages larger then
+ * mailbox size we will provide a message with the above header and it
+ * will be segmented and transported to the mailbox to the other side where
+ * it is reassembled.  It contains the following fields:
+ * Length: Length of the message in bytes excluding the message header
+ * Flags: TBD
+ * Type/ID: These will be the message/argument types we pass
+ */
+/* message data header */
+#define FM10K_TLV_ID_SHIFT		0
+#define FM10K_TLV_ID_SIZE		16
+#define FM10K_TLV_ID_MASK		((1u << FM10K_TLV_ID_SIZE) - 1)
+#define FM10K_TLV_FLAGS_SHIFT		16
+#define FM10K_TLV_FLAGS_MSG		0x1
+#define FM10K_TLV_FLAGS_SIZE		4
+#define FM10K_TLV_LEN_SHIFT		20
+#define FM10K_TLV_LEN_SIZE		12
+
+#define FM10K_TLV_HDR_LEN		4ul
+#define FM10K_TLV_LEN_ALIGN_MASK \
+	((FM10K_TLV_HDR_LEN - 1) << FM10K_TLV_LEN_SHIFT)
+#define FM10K_TLV_LEN_ALIGN(tlv) \
+	(((tlv) + FM10K_TLV_LEN_ALIGN_MASK) & ~FM10K_TLV_LEN_ALIGN_MASK)
+#define FM10K_TLV_DWORD_LEN(tlv) \
+	((u16)((FM10K_TLV_LEN_ALIGN(tlv)) >> (FM10K_TLV_LEN_SHIFT + 2)) + 1)
+
+#define FM10K_TLV_RESULTS_MAX		32
+
+enum fm10k_tlv_type {
+	FM10K_TLV_NULL_STRING,
+	FM10K_TLV_MAC_ADDR,
+	FM10K_TLV_BOOL,
+	FM10K_TLV_UNSIGNED,
+	FM10K_TLV_SIGNED,
+	FM10K_TLV_LE_STRUCT,
+	FM10K_TLV_NESTED,
+	FM10K_TLV_MAX_TYPE
+};
+
+#define FM10K_TLV_ERROR (~0u)
+
+struct fm10k_tlv_attr {
+	unsigned int		id;
+	enum fm10k_tlv_type	type;
+	u16			len;
+};
+
+#define FM10K_TLV_ATTR_NULL_STRING(id, len) { id, FM10K_TLV_NULL_STRING, len }
+#define FM10K_TLV_ATTR_MAC_ADDR(id)	    { id, FM10K_TLV_MAC_ADDR, 6 }
+#define FM10K_TLV_ATTR_BOOL(id)		    { id, FM10K_TLV_BOOL, 0 }
+#define FM10K_TLV_ATTR_U8(id)		    { id, FM10K_TLV_UNSIGNED, 1 }
+#define FM10K_TLV_ATTR_U16(id)		    { id, FM10K_TLV_UNSIGNED, 2 }
+#define FM10K_TLV_ATTR_U32(id)		    { id, FM10K_TLV_UNSIGNED, 4 }
+#define FM10K_TLV_ATTR_U64(id)		    { id, FM10K_TLV_UNSIGNED, 8 }
+#define FM10K_TLV_ATTR_S8(id)		    { id, FM10K_TLV_SIGNED, 1 }
+#define FM10K_TLV_ATTR_S16(id)		    { id, FM10K_TLV_SIGNED, 2 }
+#define FM10K_TLV_ATTR_S32(id)		    { id, FM10K_TLV_SIGNED, 4 }
+#define FM10K_TLV_ATTR_S64(id)		    { id, FM10K_TLV_SIGNED, 8 }
+#define FM10K_TLV_ATTR_LE_STRUCT(id, len)   { id, FM10K_TLV_LE_STRUCT, len }
+#define FM10K_TLV_ATTR_NESTED(id)	    { id, FM10K_TLV_NESTED, 0 }
+#define FM10K_TLV_ATTR_LAST		    { FM10K_TLV_ERROR, 0, 0 }
+
+struct fm10k_msg_data {
+	unsigned int		    id;
+	const struct fm10k_tlv_attr *attr;
+	s32			    (*func)(struct fm10k_hw *, u32 **,
+					    struct fm10k_mbx_info *);
+};
+
+#define FM10K_MSG_HANDLER(id, attr, func) { id, attr, func }
+
+s32 fm10k_tlv_msg_init(u32 *, u16);
+s32 fm10k_tlv_attr_put_mac_vlan(u32 *, u16, const u8 *, u16);
+s32 fm10k_tlv_attr_get_mac_vlan(u32 *, u8 *, u16 *);
+s32 fm10k_tlv_attr_put_bool(u32 *, u16);
+s32 fm10k_tlv_attr_put_value(u32 *, u16, s64, u32);
+#define fm10k_tlv_attr_put_u8(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 1)
+#define fm10k_tlv_attr_put_u16(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 2)
+#define fm10k_tlv_attr_put_u32(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 4)
+#define fm10k_tlv_attr_put_u64(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 8)
+#define fm10k_tlv_attr_put_s8(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 1)
+#define fm10k_tlv_attr_put_s16(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 2)
+#define fm10k_tlv_attr_put_s32(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 4)
+#define fm10k_tlv_attr_put_s64(msg, attr_id, val) \
+		fm10k_tlv_attr_put_value(msg, attr_id, val, 8)
+s32 fm10k_tlv_attr_get_value(u32 *, void *, u32);
+#define fm10k_tlv_attr_get_u8(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(u8))
+#define fm10k_tlv_attr_get_u16(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(u16))
+#define fm10k_tlv_attr_get_u32(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(u32))
+#define fm10k_tlv_attr_get_u64(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(u64))
+#define fm10k_tlv_attr_get_s8(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(s8))
+#define fm10k_tlv_attr_get_s16(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(s16))
+#define fm10k_tlv_attr_get_s32(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(s32))
+#define fm10k_tlv_attr_get_s64(attr, ptr) \
+		fm10k_tlv_attr_get_value(attr, ptr, sizeof(s64))
+s32 fm10k_tlv_attr_put_le_struct(u32 *, u16, const void *, u32);
+s32 fm10k_tlv_attr_get_le_struct(u32 *, void *, u32);
+s32 fm10k_tlv_msg_parse(struct fm10k_hw *, u32 *, struct fm10k_mbx_info *,
+			const struct fm10k_msg_data *);
+s32 fm10k_tlv_msg_error(struct fm10k_hw *hw, u32 **results,
+			struct fm10k_mbx_info *);
+
+#define FM10K_TLV_MSG_ID_TEST	0
+
+enum fm10k_tlv_test_attr_id {
+	FM10K_TEST_MSG_UNSET,
+	FM10K_TEST_MSG_STRING,
+	FM10K_TEST_MSG_MAC_ADDR,
+	FM10K_TEST_MSG_U8,
+	FM10K_TEST_MSG_U16,
+	FM10K_TEST_MSG_U32,
+	FM10K_TEST_MSG_U64,
+	FM10K_TEST_MSG_S8,
+	FM10K_TEST_MSG_S16,
+	FM10K_TEST_MSG_S32,
+	FM10K_TEST_MSG_S64,
+	FM10K_TEST_MSG_LE_STRUCT,
+	FM10K_TEST_MSG_NESTED,
+	FM10K_TEST_MSG_RESULT,
+	FM10K_TEST_MSG_MAX
+};
+
+extern const struct fm10k_tlv_attr fm10k_tlv_msg_test_attr[];
+void fm10k_tlv_msg_test_create(u32 *, u32);
+s32 fm10k_tlv_msg_test(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
+
+#define FM10K_TLV_MSG_TEST_HANDLER(func) \
+	FM10K_MSG_HANDLER(FM10K_TLV_MSG_ID_TEST, fm10k_tlv_msg_test_attr, func)
+#define FM10K_TLV_MSG_ERROR_HANDLER(func) \
+	FM10K_MSG_HANDLER(FM10K_TLV_ERROR, NULL, func)
+#endif /* _FM10K_MSG_H_ */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
new file mode 100644
index 0000000000..63968c5d7c
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -0,0 +1,768 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#ifndef _FM10K_TYPE_H_
+#define _FM10K_TYPE_H_
+
+/* forward declaration */
+struct fm10k_hw;
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/etherdevice.h>
+
+#include "fm10k_mbx.h"
+
+#define FM10K_DEV_ID_PF			0x15A4
+#define FM10K_DEV_ID_VF			0x15A5
+#define FM10K_DEV_ID_SDI_FM10420_QDA2	0x15D0
+#define FM10K_DEV_ID_SDI_FM10420_DA2	0x15D5
+
+#define FM10K_MAX_QUEUES		256
+#define FM10K_MAX_QUEUES_PF		128
+#define FM10K_MAX_QUEUES_POOL		16
+
+#define FM10K_48_BIT_MASK		0x0000FFFFFFFFFFFFull
+#define FM10K_STAT_VALID		0x80000000
+
+/* PCI Bus Info */
+#define FM10K_PCIE_LINK_CAP		0x7C
+#define FM10K_PCIE_LINK_STATUS		0x82
+#define FM10K_PCIE_LINK_WIDTH		0x3F0
+#define FM10K_PCIE_LINK_WIDTH_1		0x10
+#define FM10K_PCIE_LINK_WIDTH_2		0x20
+#define FM10K_PCIE_LINK_WIDTH_4		0x40
+#define FM10K_PCIE_LINK_WIDTH_8		0x80
+#define FM10K_PCIE_LINK_SPEED		0xF
+#define FM10K_PCIE_LINK_SPEED_2500	0x1
+#define FM10K_PCIE_LINK_SPEED_5000	0x2
+#define FM10K_PCIE_LINK_SPEED_8000	0x3
+
+/* PCIe payload size */
+#define FM10K_PCIE_DEV_CAP			0x74
+#define FM10K_PCIE_DEV_CAP_PAYLOAD		0x07
+#define FM10K_PCIE_DEV_CAP_PAYLOAD_128		0x00
+#define FM10K_PCIE_DEV_CAP_PAYLOAD_256		0x01
+#define FM10K_PCIE_DEV_CAP_PAYLOAD_512		0x02
+#define FM10K_PCIE_DEV_CTRL			0x78
+#define FM10K_PCIE_DEV_CTRL_PAYLOAD		0xE0
+#define FM10K_PCIE_DEV_CTRL_PAYLOAD_128		0x00
+#define FM10K_PCIE_DEV_CTRL_PAYLOAD_256		0x20
+#define FM10K_PCIE_DEV_CTRL_PAYLOAD_512		0x40
+
+/* PCIe MSI-X Capability info */
+#define FM10K_PCI_MSIX_MSG_CTRL			0xB2
+#define FM10K_PCI_MSIX_MSG_CTRL_TBL_SZ_MASK	0x7FF
+#define FM10K_MAX_MSIX_VECTORS			256
+#define FM10K_MAX_VECTORS_PF			256
+#define FM10K_MAX_VECTORS_POOL			32
+
+/* PCIe SR-IOV Info */
+#define FM10K_PCIE_SRIOV_CTRL			0x190
+#define FM10K_PCIE_SRIOV_CTRL_VFARI		0x10
+
+#define FM10K_ERR_PARAM				-2
+#define FM10K_ERR_NO_RESOURCES			-3
+#define FM10K_ERR_REQUESTS_PENDING		-4
+#define FM10K_ERR_RESET_REQUESTED		-5
+#define FM10K_ERR_DMA_PENDING			-6
+#define FM10K_ERR_RESET_FAILED			-7
+#define FM10K_ERR_INVALID_MAC_ADDR		-8
+#define FM10K_ERR_INVALID_VALUE			-9
+#define FM10K_NOT_IMPLEMENTED			0x7FFFFFFF
+
+/* Start of PF registers */
+#define FM10K_CTRL		0x0000
+#define FM10K_CTRL_BAR4_ALLOWED			0x00000004
+
+#define FM10K_CTRL_EXT		0x0001
+#define FM10K_GCR		0x0003
+#define FM10K_GCR_EXT		0x0005
+
+/* Interrupt control registers */
+#define FM10K_EICR		0x0006
+#define FM10K_EICR_FAULT_MASK			0x0000003F
+#define FM10K_EICR_MAILBOX			0x00000040
+#define FM10K_EICR_SWITCHREADY			0x00000080
+#define FM10K_EICR_SWITCHNOTREADY		0x00000100
+#define FM10K_EICR_SWITCHINTERRUPT		0x00000200
+#define FM10K_EICR_VFLR				0x00000800
+#define FM10K_EICR_MAXHOLDTIME			0x00001000
+#define FM10K_EIMR		0x0007
+#define FM10K_EIMR_PCA_FAULT			0x00000001
+#define FM10K_EIMR_THI_FAULT			0x00000010
+#define FM10K_EIMR_FUM_FAULT			0x00000400
+#define FM10K_EIMR_MAILBOX			0x00001000
+#define FM10K_EIMR_SWITCHREADY			0x00004000
+#define FM10K_EIMR_SWITCHNOTREADY		0x00010000
+#define FM10K_EIMR_SWITCHINTERRUPT		0x00040000
+#define FM10K_EIMR_SRAMERROR			0x00100000
+#define FM10K_EIMR_VFLR				0x00400000
+#define FM10K_EIMR_MAXHOLDTIME			0x01000000
+#define FM10K_EIMR_ALL				0x55555555
+#define FM10K_EIMR_DISABLE(NAME)		((FM10K_EIMR_ ## NAME) << 0)
+#define FM10K_EIMR_ENABLE(NAME)			((FM10K_EIMR_ ## NAME) << 1)
+#define FM10K_FAULT_ADDR_LO		0x0
+#define FM10K_FAULT_ADDR_HI		0x1
+#define FM10K_FAULT_SPECINFO		0x2
+#define FM10K_FAULT_FUNC		0x3
+#define FM10K_FAULT_SIZE		0x4
+#define FM10K_FAULT_FUNC_VALID			0x00008000
+#define FM10K_FAULT_FUNC_PF			0x00004000
+#define FM10K_FAULT_FUNC_VF_MASK		0x00003F00
+#define FM10K_FAULT_FUNC_VF_SHIFT		8
+#define FM10K_FAULT_FUNC_TYPE_MASK		0x000000FF
+
+#define FM10K_PCA_FAULT		0x0008
+#define FM10K_THI_FAULT		0x0010
+#define FM10K_FUM_FAULT		0x001C
+
+/* Rx queue timeout indicator */
+#define FM10K_MAXHOLDQ(_n)	((_n) + 0x0020)
+
+/* Switch Manager info */
+#define FM10K_SM_AREA(_n)	((_n) + 0x0028)
+
+/* GLORT mapping registers */
+#define FM10K_DGLORTMAP(_n)	((_n) + 0x0030)
+#define FM10K_DGLORT_COUNT			8
+#define FM10K_DGLORTMAP_MASK_SHIFT		16
+#define FM10K_DGLORTMAP_ANY			0x00000000
+#define FM10K_DGLORTMAP_NONE			0x0000FFFF
+#define FM10K_DGLORTMAP_ZERO			0xFFFF0000
+#define FM10K_DGLORTDEC(_n)	((_n) + 0x0038)
+#define FM10K_DGLORTDEC_VSILENGTH_SHIFT		4
+#define FM10K_DGLORTDEC_VSIBASE_SHIFT		7
+#define FM10K_DGLORTDEC_PCLENGTH_SHIFT		14
+#define FM10K_DGLORTDEC_QBASE_SHIFT		16
+#define FM10K_DGLORTDEC_RSSLENGTH_SHIFT		24
+#define FM10K_DGLORTDEC_INNERRSS_ENABLE		0x08000000
+#define FM10K_TUNNEL_CFG	0x0040
+#define FM10K_TUNNEL_CFG_NVGRE_SHIFT		16
+#define FM10K_TUNNEL_CFG_GENEVE	0x0041
+#define FM10K_SWPRI_MAP(_n)	((_n) + 0x0050)
+#define FM10K_SWPRI_MAX		16
+#define FM10K_RSSRK(_n, _m)	(((_n) * 0x10) + (_m) + 0x0800)
+#define FM10K_RSSRK_SIZE	10
+#define FM10K_RSSRK_ENTRIES_PER_REG		4
+#define FM10K_RETA(_n, _m)	(((_n) * 0x20) + (_m) + 0x1000)
+#define FM10K_RETA_SIZE		32
+#define FM10K_RETA_ENTRIES_PER_REG		4
+#define FM10K_MAX_RSS_INDICES	128
+
+/* Rate limiting registers */
+#define FM10K_TC_CREDIT(_n)	((_n) + 0x2000)
+#define FM10K_TC_CREDIT_CREDIT_MASK		0x001FFFFF
+#define FM10K_TC_MAXCREDIT(_n)	((_n) + 0x2040)
+#define FM10K_TC_MAXCREDIT_64K			0x00010000
+#define FM10K_TC_RATE(_n)	((_n) + 0x2080)
+#define FM10K_TC_RATE_QUANTA_MASK		0x0000FFFF
+#define FM10K_TC_RATE_INTERVAL_4US_GEN1		0x00020000
+#define FM10K_TC_RATE_INTERVAL_4US_GEN2		0x00040000
+#define FM10K_TC_RATE_INTERVAL_4US_GEN3		0x00080000
+
+/* DMA control registers */
+#define FM10K_DMA_CTRL		0x20C3
+#define FM10K_DMA_CTRL_TX_ENABLE		0x00000001
+#define FM10K_DMA_CTRL_TX_ACTIVE		0x00000008
+#define FM10K_DMA_CTRL_RX_ENABLE		0x00000010
+#define FM10K_DMA_CTRL_RX_ACTIVE		0x00000080
+#define FM10K_DMA_CTRL_RX_DESC_SIZE		0x00000100
+#define FM10K_DMA_CTRL_MINMSS_64		0x00008000
+#define FM10K_DMA_CTRL_MAX_HOLD_1US_GEN3	0x04800000
+#define FM10K_DMA_CTRL_MAX_HOLD_1US_GEN2	0x04000000
+#define FM10K_DMA_CTRL_MAX_HOLD_1US_GEN1	0x03800000
+#define FM10K_DMA_CTRL_DATAPATH_RESET		0x20000000
+#define FM10K_DMA_CTRL_32_DESC			0x00000000
+
+#define FM10K_DMA_CTRL2		0x20C4
+#define FM10K_DMA_CTRL2_SWITCH_READY		0x00002000
+
+/* TSO flags configuration
+ * First packet contains all flags except for fin and psh
+ * Middle packet contains only urg and ack
+ * Last packet contains urg, ack, fin, and psh
+ */
+#define FM10K_TSO_FLAGS_LOW		0x00300FF6
+#define FM10K_TSO_FLAGS_HI		0x00000039
+#define FM10K_DTXTCPFLGL	0x20C5
+#define FM10K_DTXTCPFLGH	0x20C6
+
+#define FM10K_TPH_CTRL		0x20C7
+#define FM10K_MRQC(_n)		((_n) + 0x2100)
+#define FM10K_MRQC_TCP_IPV4			0x00000001
+#define FM10K_MRQC_IPV4				0x00000002
+#define FM10K_MRQC_IPV6				0x00000010
+#define FM10K_MRQC_TCP_IPV6			0x00000020
+#define FM10K_MRQC_UDP_IPV4			0x00000040
+#define FM10K_MRQC_UDP_IPV6			0x00000080
+
+#define FM10K_TQMAP(_n)		((_n) + 0x2800)
+#define FM10K_TQMAP_TABLE_SIZE			2048
+#define FM10K_RQMAP(_n)		((_n) + 0x3000)
+
+/* Hardware Statistics */
+#define FM10K_STATS_TIMEOUT		0x3800
+#define FM10K_STATS_UR			0x3801
+#define FM10K_STATS_CA			0x3802
+#define FM10K_STATS_UM			0x3803
+#define FM10K_STATS_XEC			0x3804
+#define FM10K_STATS_VLAN_DROP		0x3805
+#define FM10K_STATS_LOOPBACK_DROP	0x3806
+#define FM10K_STATS_NODESC_DROP		0x3807
+
+/* PCIe state registers */
+#define FM10K_PHYADDR		0x381C
+
+/* Rx ring registers */
+#define FM10K_RDBAL(_n)		((0x40 * (_n)) + 0x4000)
+#define FM10K_RDBAH(_n)		((0x40 * (_n)) + 0x4001)
+#define FM10K_RDLEN(_n)		((0x40 * (_n)) + 0x4002)
+#define FM10K_TPH_RXCTRL(_n)	((0x40 * (_n)) + 0x4003)
+#define FM10K_TPH_RXCTRL_DESC_TPHEN		0x00000020
+#define FM10K_TPH_RXCTRL_DESC_RROEN		0x00000200
+#define FM10K_TPH_RXCTRL_DATA_WROEN		0x00002000
+#define FM10K_TPH_RXCTRL_HDR_WROEN		0x00008000
+#define FM10K_RDH(_n)		((0x40 * (_n)) + 0x4004)
+#define FM10K_RDT(_n)		((0x40 * (_n)) + 0x4005)
+#define FM10K_RXQCTL(_n)	((0x40 * (_n)) + 0x4006)
+#define FM10K_RXQCTL_ENABLE			0x00000001
+#define FM10K_RXQCTL_PF				0x000000FC
+#define FM10K_RXQCTL_VF_SHIFT			2
+#define FM10K_RXQCTL_VF				0x00000100
+#define FM10K_RXQCTL_ID_MASK	(FM10K_RXQCTL_PF | FM10K_RXQCTL_VF)
+#define FM10K_RXDCTL(_n)	((0x40 * (_n)) + 0x4007)
+#define FM10K_RXDCTL_WRITE_BACK_MIN_DELAY	0x00000001
+#define FM10K_RXDCTL_DROP_ON_EMPTY		0x00000200
+#define FM10K_RXINT(_n)		((0x40 * (_n)) + 0x4008)
+#define FM10K_SRRCTL(_n)	((0x40 * (_n)) + 0x4009)
+#define FM10K_SRRCTL_BSIZEPKT_SHIFT		8 /* shift _right_ */
+#define FM10K_SRRCTL_LOOPBACK_SUPPRESS		0x40000000
+#define FM10K_SRRCTL_BUFFER_CHAINING_EN		0x80000000
+
+/* Rx Statistics */
+#define FM10K_QPRC(_n)		((0x40 * (_n)) + 0x400A)
+#define FM10K_QPRDC(_n)		((0x40 * (_n)) + 0x400B)
+#define FM10K_QBRC_L(_n)	((0x40 * (_n)) + 0x400C)
+#define FM10K_QBRC_H(_n)	((0x40 * (_n)) + 0x400D)
+
+/* Rx GLORT register */
+#define FM10K_RX_SGLORT(_n)		((0x40 * (_n)) + 0x400E)
+
+/* Tx ring registers */
+#define FM10K_TDBAL(_n)		((0x40 * (_n)) + 0x8000)
+#define FM10K_TDBAH(_n)		((0x40 * (_n)) + 0x8001)
+#define FM10K_TDLEN(_n)		((0x40 * (_n)) + 0x8002)
+/* When fist initialized, VFs need to know the Interrupt Throttle Rate (ITR)
+ * scale which is based on the PCIe speed but the speed information in the PCI
+ * configuration space may not be accurate. The PF already knows the ITR scale
+ * but there is no defined method to pass that information from the PF to the
+ * VF. This is accomplished during VF initialization by temporarily co-opting
+ * the yet-to-be-used TDLEN register to have the PF store the ITR shift for
+ * the VF to retrieve before the VF needs to use the TDLEN register for its
+ * intended purpose, i.e. before the Tx resources are allocated.
+ */
+#define FM10K_TDLEN_ITR_SCALE_SHIFT		9
+#define FM10K_TDLEN_ITR_SCALE_MASK		0x00000E00
+#define FM10K_TDLEN_ITR_SCALE_GEN1		2
+#define FM10K_TDLEN_ITR_SCALE_GEN2		1
+#define FM10K_TDLEN_ITR_SCALE_GEN3		0
+#define FM10K_TPH_TXCTRL(_n)	((0x40 * (_n)) + 0x8003)
+#define FM10K_TPH_TXCTRL_DESC_TPHEN		0x00000020
+#define FM10K_TPH_TXCTRL_DESC_RROEN		0x00000200
+#define FM10K_TPH_TXCTRL_DESC_WROEN		0x00000800
+#define FM10K_TPH_TXCTRL_DATA_RROEN		0x00002000
+#define FM10K_TDH(_n)		((0x40 * (_n)) + 0x8004)
+#define FM10K_TDT(_n)		((0x40 * (_n)) + 0x8005)
+#define FM10K_TXDCTL(_n)	((0x40 * (_n)) + 0x8006)
+#define FM10K_TXDCTL_ENABLE			0x00004000
+#define FM10K_TXDCTL_MAX_TIME_SHIFT		16
+#define FM10K_TXQCTL(_n)	((0x40 * (_n)) + 0x8007)
+#define FM10K_TXQCTL_PF				0x0000003F
+#define FM10K_TXQCTL_VF				0x00000040
+#define FM10K_TXQCTL_ID_MASK	(FM10K_TXQCTL_PF | FM10K_TXQCTL_VF)
+#define FM10K_TXQCTL_PC_SHIFT			7
+#define FM10K_TXQCTL_PC_MASK			0x00000380
+#define FM10K_TXQCTL_TC_SHIFT			10
+#define FM10K_TXQCTL_VID_SHIFT			16
+#define FM10K_TXQCTL_VID_MASK			0x0FFF0000
+#define FM10K_TXQCTL_UNLIMITED_BW		0x10000000
+#define FM10K_TXINT(_n)		((0x40 * (_n)) + 0x8008)
+
+/* Tx Statistics */
+#define FM10K_QPTC(_n)		((0x40 * (_n)) + 0x8009)
+#define FM10K_QBTC_L(_n)	((0x40 * (_n)) + 0x800A)
+#define FM10K_QBTC_H(_n)	((0x40 * (_n)) + 0x800B)
+
+/* Tx Push registers */
+#define FM10K_TQDLOC(_n)	((0x40 * (_n)) + 0x800C)
+#define FM10K_TQDLOC_BASE_32_DESC		0x08
+#define FM10K_TQDLOC_SIZE_32_DESC		0x00050000
+
+/* Tx GLORT registers */
+#define FM10K_TX_SGLORT(_n)	((0x40 * (_n)) + 0x800D)
+#define FM10K_PFVTCTL(_n)	((0x40 * (_n)) + 0x800E)
+#define FM10K_PFVTCTL_FTAG_DESC_ENABLE		0x00000001
+
+/* Interrupt moderation and control registers */
+#define FM10K_INT_MAP(_n)	((_n) + 0x10080)
+#define FM10K_INT_MAP_TIMER0			0x00000000
+#define FM10K_INT_MAP_TIMER1			0x00000100
+#define FM10K_INT_MAP_IMMEDIATE			0x00000200
+#define FM10K_INT_MAP_DISABLE			0x00000300
+#define FM10K_MSIX_VECTOR_MASK(_n)	((0x4 * (_n)) + 0x11003)
+#define FM10K_INT_CTRL		0x12000
+#define FM10K_INT_CTRL_ENABLEMODERATOR		0x00000400
+#define FM10K_ITR(_n)		((_n) + 0x12400)
+#define FM10K_ITR_INTERVAL1_SHIFT		12
+#define FM10K_ITR_PENDING2			0x10000000
+#define FM10K_ITR_AUTOMASK			0x20000000
+#define FM10K_ITR_MASK_SET			0x40000000
+#define FM10K_ITR_MASK_CLEAR			0x80000000
+#define FM10K_ITR2(_n)		((0x2 * (_n)) + 0x12800)
+#define FM10K_ITR_REG_COUNT			768
+#define FM10K_ITR_REG_COUNT_PF			256
+
+/* Switch manager interrupt registers */
+#define FM10K_IP		0x13000
+#define FM10K_IP_NOTINRESET			0x00000100
+
+/* VLAN registers */
+#define FM10K_VLAN_TABLE(_n, _m)	((0x80 * (_n)) + (_m) + 0x14000)
+#define FM10K_VLAN_TABLE_SIZE			128
+
+/* VLAN specific message offsets */
+#define FM10K_VLAN_TABLE_VID_MAX		4096
+#define FM10K_VLAN_TABLE_VSI_MAX		64
+#define FM10K_VLAN_LENGTH_SHIFT			16
+#define FM10K_VLAN_CLEAR			BIT(15)
+#define FM10K_VLAN_OVERRIDE			FM10K_VLAN_CLEAR
+#define FM10K_VLAN_ALL \
+	((FM10K_VLAN_TABLE_VID_MAX - 1) << FM10K_VLAN_LENGTH_SHIFT)
+
+/* VF FLR event notification registers */
+#define FM10K_PFVFLRE(_n)	((0x1 * (_n)) + 0x18844)
+#define FM10K_PFVFLREC(_n)	((0x1 * (_n)) + 0x18846)
+
+/* Defines for size of uncacheable memories */
+#define FM10K_UC_ADDR_START	0x000000	/* start of standard regs */
+#define FM10K_UC_ADDR_END	0x100000	/* end of standard regs */
+#define FM10K_UC_ADDR_SIZE	(FM10K_UC_ADDR_END - FM10K_UC_ADDR_START)
+
+/* Define timeouts for resets and disables */
+#define FM10K_QUEUE_DISABLE_TIMEOUT		100
+#define FM10K_RESET_TIMEOUT			150
+
+/* Maximum supported combined inner and outer header length for encapsulation */
+#define FM10K_TUNNEL_HEADER_LENGTH	184
+
+/* VF registers */
+#define FM10K_VFCTRL		0x00000
+#define FM10K_VFCTRL_RST			0x00000008
+#define FM10K_VFINT_MAP		0x00030
+#define FM10K_VFSYSTIME		0x00040
+#define FM10K_VFITR(_n)		((_n) + 0x00060)
+
+enum fm10k_int_source {
+	fm10k_int_mailbox		= 0,
+	fm10k_int_pcie_fault		= 1,
+	fm10k_int_switch_up_down	= 2,
+	fm10k_int_switch_event		= 3,
+	fm10k_int_sram			= 4,
+	fm10k_int_vflr			= 5,
+	fm10k_int_max_hold_time		= 6,
+	fm10k_int_sources_max_pf
+};
+
+/* PCIe bus speeds */
+enum fm10k_bus_speed {
+	fm10k_bus_speed_unknown	= 0,
+	fm10k_bus_speed_2500	= 2500,
+	fm10k_bus_speed_5000	= 5000,
+	fm10k_bus_speed_8000	= 8000,
+	fm10k_bus_speed_reserved
+};
+
+/* PCIe bus widths */
+enum fm10k_bus_width {
+	fm10k_bus_width_unknown	= 0,
+	fm10k_bus_width_pcie_x1	= 1,
+	fm10k_bus_width_pcie_x2	= 2,
+	fm10k_bus_width_pcie_x4	= 4,
+	fm10k_bus_width_pcie_x8	= 8,
+	fm10k_bus_width_reserved
+};
+
+/* PCIe payload sizes */
+enum fm10k_bus_payload {
+	fm10k_bus_payload_unknown = 0,
+	fm10k_bus_payload_128	  = 1,
+	fm10k_bus_payload_256	  = 2,
+	fm10k_bus_payload_512	  = 3,
+	fm10k_bus_payload_reserved
+};
+
+/* Bus parameters */
+struct fm10k_bus_info {
+	enum fm10k_bus_speed speed;
+	enum fm10k_bus_width width;
+	enum fm10k_bus_payload payload;
+};
+
+/* Statistics related declarations */
+struct fm10k_hw_stat {
+	u64 count;
+	u32 base_l;
+	u32 base_h;
+};
+
+struct fm10k_hw_stats_q {
+	struct fm10k_hw_stat tx_bytes;
+	struct fm10k_hw_stat tx_packets;
+#define tx_stats_idx	tx_packets.base_h
+	struct fm10k_hw_stat rx_bytes;
+	struct fm10k_hw_stat rx_packets;
+#define rx_stats_idx	rx_packets.base_h
+	struct fm10k_hw_stat rx_drops;
+};
+
+struct fm10k_hw_stats {
+	struct fm10k_hw_stat	timeout;
+#define stats_idx	timeout.base_h
+	struct fm10k_hw_stat	ur;
+	struct fm10k_hw_stat	ca;
+	struct fm10k_hw_stat	um;
+	struct fm10k_hw_stat	xec;
+	struct fm10k_hw_stat	vlan_drop;
+	struct fm10k_hw_stat	loopback_drop;
+	struct fm10k_hw_stat	nodesc_drop;
+	struct fm10k_hw_stats_q q[FM10K_MAX_QUEUES_PF];
+};
+
+/* Establish DGLORT feature priority */
+enum fm10k_dglortdec_idx {
+	fm10k_dglort_default	= 0,
+	fm10k_dglort_vf_rsvd0	= 1,
+	fm10k_dglort_vf_rss	= 2,
+	fm10k_dglort_pf_rsvd0	= 3,
+	fm10k_dglort_pf_queue	= 4,
+	fm10k_dglort_pf_vsi	= 5,
+	fm10k_dglort_pf_rsvd1	= 6,
+	fm10k_dglort_pf_rss	= 7
+};
+
+struct fm10k_dglort_cfg {
+	u16 glort;	/* GLORT base */
+	u16 queue_b;	/* Base value for queue */
+	u8  vsi_b;	/* Base value for VSI */
+	u8  idx;	/* index of DGLORTDEC entry */
+	u8  rss_l;	/* RSS indices */
+	u8  pc_l;	/* Priority Class indices */
+	u8  vsi_l;	/* Number of bits from GLORT used to determine VSI */
+	u8  queue_l;	/* Number of bits from GLORT used to determine queue */
+	u8  shared_l;	/* Ignored bits from GLORT resulting in shared VSI */
+	u8  inner_rss;	/* Boolean value if inner header is used for RSS */
+};
+
+enum fm10k_pca_fault {
+	PCA_NO_FAULT,
+	PCA_UNMAPPED_ADDR,
+	PCA_BAD_QACCESS_PF,
+	PCA_BAD_QACCESS_VF,
+	PCA_MALICIOUS_REQ,
+	PCA_POISONED_TLP,
+	PCA_TLP_ABORT,
+	__PCA_MAX
+};
+
+enum fm10k_thi_fault {
+	THI_NO_FAULT,
+	THI_MAL_DIS_Q_FAULT,
+	__THI_MAX
+};
+
+enum fm10k_fum_fault {
+	FUM_NO_FAULT,
+	FUM_UNMAPPED_ADDR,
+	FUM_POISONED_TLP,
+	FUM_BAD_VF_QACCESS,
+	FUM_ADD_DECODE_ERR,
+	FUM_RO_ERROR,
+	FUM_QPRC_CRC_ERROR,
+	FUM_CSR_TIMEOUT,
+	FUM_INVALID_TYPE,
+	FUM_INVALID_LENGTH,
+	FUM_INVALID_BE,
+	FUM_INVALID_ALIGN,
+	__FUM_MAX
+};
+
+struct fm10k_fault {
+	u64 address;	/* Address at the time fault was detected */
+	u32 specinfo;	/* Extra info on this fault (fault dependent) */
+	u8 type;	/* Fault value dependent on subunit */
+	u8 func;	/* Function number of the fault */
+};
+
+struct fm10k_mac_ops {
+	/* basic bring-up and tear-down */
+	s32 (*reset_hw)(struct fm10k_hw *);
+	s32 (*init_hw)(struct fm10k_hw *);
+	s32 (*start_hw)(struct fm10k_hw *);
+	s32 (*stop_hw)(struct fm10k_hw *);
+	s32 (*get_bus_info)(struct fm10k_hw *);
+	s32 (*get_host_state)(struct fm10k_hw *, bool *);
+	s32 (*request_lport_map)(struct fm10k_hw *);
+	s32 (*update_vlan)(struct fm10k_hw *, u32, u8, bool);
+	s32 (*read_mac_addr)(struct fm10k_hw *);
+	s32 (*update_uc_addr)(struct fm10k_hw *, u16, const u8 *,
+			      u16, bool, u8);
+	s32 (*update_mc_addr)(struct fm10k_hw *, u16, const u8 *, u16, bool);
+	s32 (*update_xcast_mode)(struct fm10k_hw *, u16, u8);
+	void (*update_int_moderator)(struct fm10k_hw *);
+	s32  (*update_lport_state)(struct fm10k_hw *, u16, u16, bool);
+	void (*update_hw_stats)(struct fm10k_hw *, struct fm10k_hw_stats *);
+	void (*rebind_hw_stats)(struct fm10k_hw *, struct fm10k_hw_stats *);
+	s32 (*configure_dglort_map)(struct fm10k_hw *,
+				    struct fm10k_dglort_cfg *);
+	void (*set_dma_mask)(struct fm10k_hw *, u64);
+	s32 (*get_fault)(struct fm10k_hw *, int, struct fm10k_fault *);
+};
+
+enum fm10k_mac_type {
+	fm10k_mac_unknown = 0,
+	fm10k_mac_pf,
+	fm10k_mac_vf,
+	fm10k_num_macs
+};
+
+struct fm10k_mac_info {
+	struct fm10k_mac_ops ops;
+	enum fm10k_mac_type type;
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+	u16 default_vid;
+	u16 max_msix_vectors;
+	u16 max_queues;
+	bool vlan_override;
+	bool get_host_state;
+	bool tx_ready;
+	u32 dglort_map;
+	u8 itr_scale;
+	u64 reset_while_pending;
+};
+
+struct fm10k_swapi_table_info {
+	u32 used;
+	u32 avail;
+};
+
+struct fm10k_swapi_info {
+	u32 status;
+	struct fm10k_swapi_table_info mac;
+	struct fm10k_swapi_table_info nexthop;
+	struct fm10k_swapi_table_info ffu;
+};
+
+enum fm10k_xcast_modes {
+	FM10K_XCAST_MODE_ALLMULTI	= 0,
+	FM10K_XCAST_MODE_MULTI		= 1,
+	FM10K_XCAST_MODE_PROMISC	= 2,
+	FM10K_XCAST_MODE_NONE		= 3,
+	FM10K_XCAST_MODE_DISABLE	= 4
+};
+
+#define FM10K_VF_TC_MAX		100000	/* 100,000 Mb/s aka 100Gb/s */
+#define FM10K_VF_TC_MIN		1	/* 1 Mb/s is the slowest rate */
+
+struct fm10k_vf_info {
+	/* mbx must be first field in struct unless all default IOV message
+	 * handlers are redone as the assumption is that vf_info starts
+	 * at the same offset as the mailbox
+	 */
+	struct fm10k_mbx_info	mbx;		/* PF side of VF mailbox */
+	struct fm10k_hw_stats_q	stats[FM10K_MAX_QUEUES_POOL];
+	int			rate;		/* Tx BW cap as defined by OS */
+	u16			glort;		/* resource tag for this VF */
+	u16			sw_vid;		/* Switch API assigned VLAN */
+	u16			pf_vid;		/* PF assigned Default VLAN */
+	u8			mac[ETH_ALEN];	/* PF Default MAC address */
+	u8			vsi;		/* VSI identifier */
+	u8			vf_idx;		/* which VF this is */
+	u8			vf_flags;	/* flags indicating what modes
+						 * are supported for the port
+						 */
+};
+
+#define FM10K_VF_FLAG_ALLMULTI_CAPABLE	(u8)(BIT(FM10K_XCAST_MODE_ALLMULTI))
+#define FM10K_VF_FLAG_MULTI_CAPABLE	(u8)(BIT(FM10K_XCAST_MODE_MULTI))
+#define FM10K_VF_FLAG_PROMISC_CAPABLE	(u8)(BIT(FM10K_XCAST_MODE_PROMISC))
+#define FM10K_VF_FLAG_NONE_CAPABLE	(u8)(BIT(FM10K_XCAST_MODE_NONE))
+#define FM10K_VF_FLAG_CAPABLE(vf_info)	((vf_info)->vf_flags & (u8)0xF)
+#define FM10K_VF_FLAG_ENABLED(vf_info)	((vf_info)->vf_flags >> 4)
+#define FM10K_VF_FLAG_SET_MODE(mode)	((u8)0x10 << (mode))
+#define FM10K_VF_FLAG_SET_MODE_NONE \
+	FM10K_VF_FLAG_SET_MODE(FM10K_XCAST_MODE_NONE)
+#define FM10K_VF_FLAG_MULTI_ENABLED \
+	(FM10K_VF_FLAG_SET_MODE(FM10K_XCAST_MODE_ALLMULTI) | \
+	 FM10K_VF_FLAG_SET_MODE(FM10K_XCAST_MODE_MULTI) | \
+	 FM10K_VF_FLAG_SET_MODE(FM10K_XCAST_MODE_PROMISC))
+
+struct fm10k_iov_ops {
+	/* IOV related bring-up and tear-down */
+	s32 (*assign_resources)(struct fm10k_hw *, u16, u16);
+	s32 (*configure_tc)(struct fm10k_hw *, u16, int);
+	s32 (*assign_int_moderator)(struct fm10k_hw *, u16);
+	s32 (*assign_default_mac_vlan)(struct fm10k_hw *,
+				       struct fm10k_vf_info *);
+	s32 (*reset_resources)(struct fm10k_hw *,
+			       struct fm10k_vf_info *);
+	s32 (*set_lport)(struct fm10k_hw *, struct fm10k_vf_info *, u16, u8);
+	void (*reset_lport)(struct fm10k_hw *, struct fm10k_vf_info *);
+	void (*update_stats)(struct fm10k_hw *, struct fm10k_hw_stats_q *, u16);
+};
+
+struct fm10k_iov_info {
+	struct fm10k_iov_ops ops;
+	u16 total_vfs;
+	u16 num_vfs;
+	u16 num_pools;
+};
+
+enum fm10k_devices {
+	fm10k_device_pf,
+	fm10k_device_vf,
+};
+
+struct fm10k_info {
+	enum fm10k_mac_type		mac;
+	s32				(*get_invariants)(struct fm10k_hw *);
+	const struct fm10k_mac_ops	*mac_ops;
+	const struct fm10k_iov_ops	*iov_ops;
+};
+
+struct fm10k_hw {
+	u32 __iomem *hw_addr;
+	void *back;
+	struct fm10k_mac_info mac;
+	struct fm10k_bus_info bus;
+	struct fm10k_bus_info bus_caps;
+	struct fm10k_iov_info iov;
+	struct fm10k_mbx_info mbx;
+	struct fm10k_swapi_info swapi;
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+};
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define FM10K_REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define FM10K_REQ_RX_DESCRIPTOR_MULTIPLE	8
+
+/* Transmit Descriptor */
+struct fm10k_tx_desc {
+	__le64 buffer_addr;	/* Address of the descriptor's data buffer */
+	__le16 buflen;		/* Length of data to be DMAed */
+	__le16 vlan;		/* VLAN_ID and VPRI to be inserted in FTAG */
+	__le16 mss;		/* MSS for segmentation offload */
+	u8 hdrlen;		/* Header size for segmentation offload */
+	u8 flags;		/* Status and offload request flags */
+};
+
+/* Transmit Descriptor Cache Structure */
+struct fm10k_tx_desc_cache {
+	struct fm10k_tx_desc tx_desc[256];
+};
+
+#define FM10K_TXD_FLAG_INT	0x01
+#define FM10K_TXD_FLAG_TIME	0x02
+#define FM10K_TXD_FLAG_CSUM	0x04
+#define FM10K_TXD_FLAG_FTAG	0x10
+#define FM10K_TXD_FLAG_RS	0x20
+#define FM10K_TXD_FLAG_LAST	0x40
+#define FM10K_TXD_FLAG_DONE	0x80
+
+/* These macros are meant to enable optimal placement of the RS and INT
+ * bits.  It will point us to the last descriptor in the cache for either the
+ * start of the packet, or the end of the packet.  If the index is actually
+ * at the start of the FIFO it will point to the offset for the last index
+ * in the FIFO to prevent an unnecessary write.
+ */
+#define FM10K_TXD_WB_FIFO_SIZE	4
+
+/* Receive Descriptor - 32B */
+union fm10k_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+		__le64 reserved; /* Empty space, RSS hash */
+		__le64 timestamp;
+	} q; /* Read, Writeback, 64b quad-words */
+	struct {
+		__le32 data; /* RSS and header data */
+		__le32 rss;  /* RSS Hash */
+		__le32 staterr;
+		__le32 vlan_len;
+		__le32 glort; /* sglort/dglort */
+	} d; /* Writeback, 32b double-words */
+	struct {
+		__le16 pkt_info; /* RSS, Pkt type */
+		__le16 hdr_info; /* Splithdr, hdrlen, xC */
+		__le16 rss_lower;
+		__le16 rss_upper;
+		__le16 status; /* status/error */
+		__le16 csum_err; /* checksum or extended error value */
+		__le16 length; /* Packet length */
+		__le16 vlan; /* VLAN tag */
+		__le16 dglort;
+		__le16 sglort;
+	} w; /* Writeback, 16b words */
+};
+
+#define FM10K_RXD_RSSTYPE_MASK		0x000F
+enum fm10k_rdesc_rss_type {
+	FM10K_RSSTYPE_NONE	= 0x0,
+	FM10K_RSSTYPE_IPV4_TCP	= 0x1,
+	FM10K_RSSTYPE_IPV4	= 0x2,
+	FM10K_RSSTYPE_IPV6_TCP	= 0x3,
+	/* Reserved 0x4 */
+	FM10K_RSSTYPE_IPV6	= 0x5,
+	/* Reserved 0x6 */
+	FM10K_RSSTYPE_IPV4_UDP	= 0x7,
+	FM10K_RSSTYPE_IPV6_UDP	= 0x8
+	/* Reserved 0x9 - 0xF */
+};
+
+#define FM10K_RXD_HDR_INFO_XC_MASK	0x0006
+enum fm10k_rxdesc_xc {
+	FM10K_XC_UNICAST	= 0x0,
+	FM10K_XC_MULTICAST	= 0x4,
+	FM10K_XC_BROADCAST	= 0x6
+};
+
+#define FM10K_RXD_STATUS_DD		0x0001 /* Descriptor done */
+#define FM10K_RXD_STATUS_EOP		0x0002 /* End of packet */
+#define FM10K_RXD_STATUS_L4CS		0x0010 /* Indicates an L4 csum */
+#define FM10K_RXD_STATUS_L4CS2		0x0040 /* Inner header L4 csum */
+#define FM10K_RXD_STATUS_L4E2		0x0800 /* Inner header L4 csum err */
+#define FM10K_RXD_STATUS_IPE2		0x1000 /* Inner header IPv4 csum err */
+#define FM10K_RXD_STATUS_RXE		0x2000 /* Generic Rx error */
+#define FM10K_RXD_STATUS_L4E		0x4000 /* L4 csum error */
+#define FM10K_RXD_STATUS_IPE		0x8000 /* IPv4 csum error */
+
+#define FM10K_RXD_ERR_SWITCH_ERROR	0x0001 /* Switch found bad packet */
+#define FM10K_RXD_ERR_NO_DESCRIPTOR	0x0002 /* No descriptor available */
+#define FM10K_RXD_ERR_PP_ERROR		0x0004 /* RAM error during processing */
+#define FM10K_RXD_ERR_SWITCH_READY	0x0008 /* Link transition mid-packet */
+#define FM10K_RXD_ERR_TOO_BIG		0x0010 /* Pkt too big for single buf */
+
+struct fm10k_ftag {
+	__be16 swpri_type_user;
+	__be16 vlan;
+	__be16 sglort;
+	__be16 dglort;
+};
+
+#endif /* _FM10K_TYPE_H */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
new file mode 100644
index 0000000000..dc8ccd378e
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2019 Intel Corporation. */
+
+#include "fm10k_vf.h"
+
+/**
+ *  fm10k_stop_hw_vf - Stop Tx/Rx units
+ *  @hw: pointer to hardware structure
+ *
+ **/
+static s32 fm10k_stop_hw_vf(struct fm10k_hw *hw)
+{
+	u8 *perm_addr = hw->mac.perm_addr;
+	u32 bal = 0, bah = 0, tdlen;
+	s32 err;
+	u16 i;
+
+	/* we need to disable the queues before taking further steps */
+	err = fm10k_stop_hw_generic(hw);
+	if (err && err != FM10K_ERR_REQUESTS_PENDING)
+		return err;
+
+	/* If permanent address is set then we need to restore it */
+	if (is_valid_ether_addr(perm_addr)) {
+		bal = (((u32)perm_addr[3]) << 24) |
+		      (((u32)perm_addr[4]) << 16) |
+		      (((u32)perm_addr[5]) << 8);
+		bah = (((u32)0xFF)	   << 24) |
+		      (((u32)perm_addr[0]) << 16) |
+		      (((u32)perm_addr[1]) << 8) |
+		       ((u32)perm_addr[2]);
+	}
+
+	/* restore default itr_scale for next VF initialization */
+	tdlen = hw->mac.itr_scale << FM10K_TDLEN_ITR_SCALE_SHIFT;
+
+	/* The queues have already been disabled so we just need to
+	 * update their base address registers
+	 */
+	for (i = 0; i < hw->mac.max_queues; i++) {
+		fm10k_write_reg(hw, FM10K_TDBAL(i), bal);
+		fm10k_write_reg(hw, FM10K_TDBAH(i), bah);
+		fm10k_write_reg(hw, FM10K_RDBAL(i), bal);
+		fm10k_write_reg(hw, FM10K_RDBAH(i), bah);
+		/* Restore ITR scale in software-defined mechanism in TDLEN
+		 * for next VF initialization. See definition of
+		 * FM10K_TDLEN_ITR_SCALE_SHIFT for more details on the use of
+		 * TDLEN here.
+		 */
+		fm10k_write_reg(hw, FM10K_TDLEN(i), tdlen);
+	}
+
+	return err;
+}
+
+/**
+ *  fm10k_reset_hw_vf - VF hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  This function should return the hardware to a state similar to the
+ *  one it is in after just being initialized.
+ **/
+static s32 fm10k_reset_hw_vf(struct fm10k_hw *hw)
+{
+	s32 err;
+
+	/* shut down queues we own and reset DMA configuration */
+	err = fm10k_stop_hw_vf(hw);
+	if (err == FM10K_ERR_REQUESTS_PENDING)
+		hw->mac.reset_while_pending++;
+	else if (err)
+		return err;
+
+	/* Inititate VF reset */
+	fm10k_write_reg(hw, FM10K_VFCTRL, FM10K_VFCTRL_RST);
+
+	/* Flush write and allow 100us for reset to complete */
+	fm10k_write_flush(hw);
+	udelay(FM10K_RESET_TIMEOUT);
+
+	/* Clear reset bit and verify it was cleared */
+	fm10k_write_reg(hw, FM10K_VFCTRL, 0);
+	if (fm10k_read_reg(hw, FM10K_VFCTRL) & FM10K_VFCTRL_RST)
+		return FM10K_ERR_RESET_FAILED;
+
+	return 0;
+}
+
+/**
+ *  fm10k_init_hw_vf - VF hardware initialization
+ *  @hw: pointer to hardware structure
+ *
+ **/
+static s32 fm10k_init_hw_vf(struct fm10k_hw *hw)
+{
+	u32 tqdloc, tqdloc0 = ~fm10k_read_reg(hw, FM10K_TQDLOC(0));
+	s32 err;
+	u16 i;
+
+	/* verify we have at least 1 queue */
+	if (!~fm10k_read_reg(hw, FM10K_TXQCTL(0)) ||
+	    !~fm10k_read_reg(hw, FM10K_RXQCTL(0))) {
+		err = FM10K_ERR_NO_RESOURCES;
+		goto reset_max_queues;
+	}
+
+	/* determine how many queues we have */
+	for (i = 1; tqdloc0 && (i < FM10K_MAX_QUEUES_POOL); i++) {
+		/* verify the Descriptor cache offsets are increasing */
+		tqdloc = ~fm10k_read_reg(hw, FM10K_TQDLOC(i));
+		if (!tqdloc || (tqdloc == tqdloc0))
+			break;
+
+		/* check to verify the PF doesn't own any of our queues */
+		if (!~fm10k_read_reg(hw, FM10K_TXQCTL(i)) ||
+		    !~fm10k_read_reg(hw, FM10K_RXQCTL(i)))
+			break;
+	}
+
+	/* shut down queues we own and reset DMA configuration */
+	err = fm10k_disable_queues_generic(hw, i);
+	if (err)
+		goto reset_max_queues;
+
+	/* record maximum queue count */
+	hw->mac.max_queues = i;
+
+	/* fetch default VLAN and ITR scale */
+	hw->mac.default_vid = (fm10k_read_reg(hw, FM10K_TXQCTL(0)) &
+			       FM10K_TXQCTL_VID_MASK) >> FM10K_TXQCTL_VID_SHIFT;
+	/* Read the ITR scale from TDLEN. See the definition of
+	 * FM10K_TDLEN_ITR_SCALE_SHIFT for more information about how TDLEN is
+	 * used here.
+	 */
+	hw->mac.itr_scale = (fm10k_read_reg(hw, FM10K_TDLEN(0)) &
+			     FM10K_TDLEN_ITR_SCALE_MASK) >>
+			    FM10K_TDLEN_ITR_SCALE_SHIFT;
+
+	return 0;
+
+reset_max_queues:
+	hw->mac.max_queues = 0;
+
+	return err;
+}
+
+/* This structure defines the attibutes to be parsed below */
+const struct fm10k_tlv_attr fm10k_mac_vlan_msg_attr[] = {
+	FM10K_TLV_ATTR_U32(FM10K_MAC_VLAN_MSG_VLAN),
+	FM10K_TLV_ATTR_BOOL(FM10K_MAC_VLAN_MSG_SET),
+	FM10K_TLV_ATTR_MAC_ADDR(FM10K_MAC_VLAN_MSG_MAC),
+	FM10K_TLV_ATTR_MAC_ADDR(FM10K_MAC_VLAN_MSG_DEFAULT_MAC),
+	FM10K_TLV_ATTR_MAC_ADDR(FM10K_MAC_VLAN_MSG_MULTICAST),
+	FM10K_TLV_ATTR_LAST
+};
+
+/**
+ *  fm10k_update_vlan_vf - Update status of VLAN ID in VLAN filter table
+ *  @hw: pointer to hardware structure
+ *  @vid: VLAN ID to add to table
+ *  @vsi: Reserved, should always be 0
+ *  @set: Indicates if this is a set or clear operation
+ *
+ *  This function adds or removes the corresponding VLAN ID from the VLAN
+ *  filter table for this VF.
+ **/
+static s32 fm10k_update_vlan_vf(struct fm10k_hw *hw, u32 vid, u8 vsi, bool set)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[4];
+
+	/* verify the index is not set */
+	if (vsi)
+		return FM10K_ERR_PARAM;
+
+	/* clever trick to verify reserved bits in both vid and length */
+	if ((vid << 16 | vid) >> 28)
+		return FM10K_ERR_PARAM;
+
+	/* encode set bit into the VLAN ID */
+	if (!set)
+		vid |= FM10K_VLAN_CLEAR;
+
+	/* generate VLAN request */
+	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_MAC_VLAN);
+	fm10k_tlv_attr_put_u32(msg, FM10K_MAC_VLAN_MSG_VLAN, vid);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_msg_mac_vlan_vf - Read device MAC address from mailbox message
+ *  @hw: pointer to the HW structure
+ *  @results: Attributes for message
+ *  @mbx: unused mailbox data
+ *
+ *  This function should determine the MAC address for the VF
+ **/
+s32 fm10k_msg_mac_vlan_vf(struct fm10k_hw *hw, u32 **results,
+			  struct fm10k_mbx_info __always_unused *mbx)
+{
+	u8 perm_addr[ETH_ALEN];
+	u16 vid;
+	s32 err;
+
+	/* record MAC address requested */
+	err = fm10k_tlv_attr_get_mac_vlan(
+					results[FM10K_MAC_VLAN_MSG_DEFAULT_MAC],
+					perm_addr, &vid);
+	if (err)
+		return err;
+
+	ether_addr_copy(hw->mac.perm_addr, perm_addr);
+	hw->mac.default_vid = vid & (FM10K_VLAN_TABLE_VID_MAX - 1);
+	hw->mac.vlan_override = !!(vid & FM10K_VLAN_OVERRIDE);
+
+	return 0;
+}
+
+/**
+ *  fm10k_read_mac_addr_vf - Read device MAC address
+ *  @hw: pointer to the HW structure
+ *
+ *  This function should determine the MAC address for the VF
+ **/
+static s32 fm10k_read_mac_addr_vf(struct fm10k_hw *hw)
+{
+	u8 perm_addr[ETH_ALEN];
+	u32 base_addr;
+
+	base_addr = fm10k_read_reg(hw, FM10K_TDBAL(0));
+
+	/* last byte should be 0 */
+	if (base_addr << 24)
+		return  FM10K_ERR_INVALID_MAC_ADDR;
+
+	perm_addr[3] = (u8)(base_addr >> 24);
+	perm_addr[4] = (u8)(base_addr >> 16);
+	perm_addr[5] = (u8)(base_addr >> 8);
+
+	base_addr = fm10k_read_reg(hw, FM10K_TDBAH(0));
+
+	/* first byte should be all 1's */
+	if ((~base_addr) >> 24)
+		return  FM10K_ERR_INVALID_MAC_ADDR;
+
+	perm_addr[0] = (u8)(base_addr >> 16);
+	perm_addr[1] = (u8)(base_addr >> 8);
+	perm_addr[2] = (u8)(base_addr);
+
+	ether_addr_copy(hw->mac.perm_addr, perm_addr);
+	ether_addr_copy(hw->mac.addr, perm_addr);
+
+	return 0;
+}
+
+/**
+ *  fm10k_update_uc_addr_vf - Update device unicast addresses
+ *  @hw: pointer to the HW structure
+ *  @glort: unused
+ *  @mac: MAC address to add/remove from table
+ *  @vid: VLAN ID to add/remove from table
+ *  @add: Indicates if this is an add or remove operation
+ *  @flags: flags field to indicate add and secure - unused
+ *
+ *  This function is used to add or remove unicast MAC addresses for
+ *  the VF.
+ **/
+static s32 fm10k_update_uc_addr_vf(struct fm10k_hw *hw,
+				   u16 __always_unused glort,
+				   const u8 *mac, u16 vid, bool add,
+				   u8 __always_unused flags)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[7];
+
+	/* verify VLAN ID is valid */
+	if (vid >= FM10K_VLAN_TABLE_VID_MAX)
+		return FM10K_ERR_PARAM;
+
+	/* verify MAC address is valid */
+	if (!is_valid_ether_addr(mac))
+		return FM10K_ERR_PARAM;
+
+	/* verify we are not locked down on the MAC address */
+	if (is_valid_ether_addr(hw->mac.perm_addr) &&
+	    !ether_addr_equal(hw->mac.perm_addr, mac))
+		return FM10K_ERR_PARAM;
+
+	/* add bit to notify us if this is a set or clear operation */
+	if (!add)
+		vid |= FM10K_VLAN_CLEAR;
+
+	/* generate VLAN request */
+	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_MAC_VLAN);
+	fm10k_tlv_attr_put_mac_vlan(msg, FM10K_MAC_VLAN_MSG_MAC, mac, vid);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_update_mc_addr_vf - Update device multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @glort: unused
+ *  @mac: MAC address to add/remove from table
+ *  @vid: VLAN ID to add/remove from table
+ *  @add: Indicates if this is an add or remove operation
+ *
+ *  This function is used to add or remove multicast MAC addresses for
+ *  the VF.
+ **/
+static s32 fm10k_update_mc_addr_vf(struct fm10k_hw *hw,
+				   u16 __always_unused glort,
+				   const u8 *mac, u16 vid, bool add)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[7];
+
+	/* verify VLAN ID is valid */
+	if (vid >= FM10K_VLAN_TABLE_VID_MAX)
+		return FM10K_ERR_PARAM;
+
+	/* verify multicast address is valid */
+	if (!is_multicast_ether_addr(mac))
+		return FM10K_ERR_PARAM;
+
+	/* add bit to notify us if this is a set or clear operation */
+	if (!add)
+		vid |= FM10K_VLAN_CLEAR;
+
+	/* generate VLAN request */
+	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_MAC_VLAN);
+	fm10k_tlv_attr_put_mac_vlan(msg, FM10K_MAC_VLAN_MSG_MULTICAST,
+				    mac, vid);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_update_int_moderator_vf - Request update of interrupt moderator list
+ *  @hw: pointer to hardware structure
+ *
+ *  This function will issue a request to the PF to rescan our MSI-X table
+ *  and to update the interrupt moderator linked list.
+ **/
+static void fm10k_update_int_moderator_vf(struct fm10k_hw *hw)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[1];
+
+	/* generate MSI-X request */
+	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_MSIX);
+
+	/* load onto outgoing mailbox */
+	mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/* This structure defines the attibutes to be parsed below */
+const struct fm10k_tlv_attr fm10k_lport_state_msg_attr[] = {
+	FM10K_TLV_ATTR_BOOL(FM10K_LPORT_STATE_MSG_DISABLE),
+	FM10K_TLV_ATTR_U8(FM10K_LPORT_STATE_MSG_XCAST_MODE),
+	FM10K_TLV_ATTR_BOOL(FM10K_LPORT_STATE_MSG_READY),
+	FM10K_TLV_ATTR_LAST
+};
+
+/**
+ *  fm10k_msg_lport_state_vf - Message handler for lport_state message from PF
+ *  @hw: Pointer to hardware structure
+ *  @results: pointer array containing parsed data
+ *  @mbx: Pointer to mailbox information structure
+ *
+ *  This handler is meant to capture the indication from the PF that we
+ *  are ready to bring up the interface.
+ **/
+s32 fm10k_msg_lport_state_vf(struct fm10k_hw *hw, u32 **results,
+			     struct fm10k_mbx_info __always_unused *mbx)
+{
+	hw->mac.dglort_map = !results[FM10K_LPORT_STATE_MSG_READY] ?
+			     FM10K_DGLORTMAP_NONE : FM10K_DGLORTMAP_ZERO;
+
+	return 0;
+}
+
+/**
+ *  fm10k_update_lport_state_vf - Update device state in lower device
+ *  @hw: pointer to the HW structure
+ *  @glort: unused
+ *  @count: number of logical ports to enable - unused (always 1)
+ *  @enable: boolean value indicating if this is an enable or disable request
+ *
+ *  Notify the lower device of a state change.  If the lower device is
+ *  enabled we can add filters, if it is disabled all filters for this
+ *  logical port are flushed.
+ **/
+static s32 fm10k_update_lport_state_vf(struct fm10k_hw *hw,
+				       u16 __always_unused glort,
+				       u16 __always_unused count, bool enable)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[2];
+
+	/* reset glort mask 0 as we have to wait to be enabled */
+	hw->mac.dglort_map = FM10K_DGLORTMAP_NONE;
+
+	/* generate port state request */
+	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_LPORT_STATE);
+	if (!enable)
+		fm10k_tlv_attr_put_bool(msg, FM10K_LPORT_STATE_MSG_DISABLE);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_update_xcast_mode_vf - Request update of multicast mode
+ *  @hw: pointer to hardware structure
+ *  @glort: unused
+ *  @mode: integer value indicating mode being requested
+ *
+ *  This function will attempt to request a higher mode for the port
+ *  so that it can enable either multicast, multicast promiscuous, or
+ *  promiscuous mode of operation.
+ **/
+static s32 fm10k_update_xcast_mode_vf(struct fm10k_hw *hw,
+				      u16 __always_unused glort, u8 mode)
+{
+	struct fm10k_mbx_info *mbx = &hw->mbx;
+	u32 msg[3];
+
+	if (mode > FM10K_XCAST_MODE_NONE)
+		return FM10K_ERR_PARAM;
+
+	/* generate message requesting to change xcast mode */
+	fm10k_tlv_msg_init(msg, FM10K_VF_MSG_ID_LPORT_STATE);
+	fm10k_tlv_attr_put_u8(msg, FM10K_LPORT_STATE_MSG_XCAST_MODE, mode);
+
+	/* load onto outgoing mailbox */
+	return mbx->ops.enqueue_tx(hw, mbx, msg);
+}
+
+/**
+ *  fm10k_update_hw_stats_vf - Updates hardware related statistics of VF
+ *  @hw: pointer to hardware structure
+ *  @stats: pointer to statistics structure
+ *
+ *  This function collects and aggregates per queue hardware statistics.
+ **/
+static void fm10k_update_hw_stats_vf(struct fm10k_hw *hw,
+				     struct fm10k_hw_stats *stats)
+{
+	fm10k_update_hw_stats_q(hw, stats->q, 0, hw->mac.max_queues);
+}
+
+/**
+ *  fm10k_rebind_hw_stats_vf - Resets base for hardware statistics of VF
+ *  @hw: pointer to hardware structure
+ *  @stats: pointer to the stats structure to update
+ *
+ *  This function resets the base for queue hardware statistics.
+ **/
+static void fm10k_rebind_hw_stats_vf(struct fm10k_hw *hw,
+				     struct fm10k_hw_stats *stats)
+{
+	/* Unbind Queue Statistics */
+	fm10k_unbind_hw_stats_q(stats->q, 0, hw->mac.max_queues);
+
+	/* Reinitialize bases for all stats */
+	fm10k_update_hw_stats_vf(hw, stats);
+}
+
+/**
+ *  fm10k_configure_dglort_map_vf - Configures GLORT entry and queues
+ *  @hw: pointer to hardware structure
+ *  @dglort: pointer to dglort configuration structure
+ *
+ *  Reads the configuration structure contained in dglort_cfg and uses
+ *  that information to then populate a DGLORTMAP/DEC entry and the queues
+ *  to which it has been assigned.
+ **/
+static s32 fm10k_configure_dglort_map_vf(struct fm10k_hw __always_unused *hw,
+					 struct fm10k_dglort_cfg *dglort)
+{
+	/* verify the dglort pointer */
+	if (!dglort)
+		return FM10K_ERR_PARAM;
+
+	/* stub for now until we determine correct message for this */
+
+	return 0;
+}
+
+static const struct fm10k_msg_data fm10k_msg_data_vf[] = {
+	FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
+	FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_msg_mac_vlan_vf),
+	FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_msg_lport_state_vf),
+	FM10K_TLV_MSG_ERROR_HANDLER(fm10k_tlv_msg_error),
+};
+
+static const struct fm10k_mac_ops mac_ops_vf = {
+	.get_bus_info		= fm10k_get_bus_info_generic,
+	.reset_hw		= fm10k_reset_hw_vf,
+	.init_hw		= fm10k_init_hw_vf,
+	.start_hw		= fm10k_start_hw_generic,
+	.stop_hw		= fm10k_stop_hw_vf,
+	.update_vlan		= fm10k_update_vlan_vf,
+	.read_mac_addr		= fm10k_read_mac_addr_vf,
+	.update_uc_addr		= fm10k_update_uc_addr_vf,
+	.update_mc_addr		= fm10k_update_mc_addr_vf,
+	.update_xcast_mode	= fm10k_update_xcast_mode_vf,
+	.update_int_moderator	= fm10k_update_int_moderator_vf,
+	.update_lport_state	= fm10k_update_lport_state_vf,
+	.update_hw_stats	= fm10k_update_hw_stats_vf,
+	.rebind_hw_stats	= fm10k_rebind_hw_stats_vf,
+	.configure_dglort_map	= fm10k_configure_dglort_map_vf,
+	.get_host_state		= fm10k_get_host_state_generic,
+};
+
+static s32 fm10k_get_invariants_vf(struct fm10k_hw *hw)
+{
+	fm10k_get_invariants_generic(hw);
+
+	return fm10k_pfvf_mbx_init(hw, &hw->mbx, fm10k_msg_data_vf, 0);
+}
+
+const struct fm10k_info fm10k_vf_info = {
+	.mac		= fm10k_mac_vf,
+	.get_invariants	= fm10k_get_invariants_vf,
+	.mac_ops	= &mac_ops_vf,
+};
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.h b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
new file mode 100644
index 0000000000..787d0d570a
--- /dev/null
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _FM10K_VF_H_
+#define _FM10K_VF_H_
+
+#include "fm10k_type.h"
+#include "fm10k_common.h"
+
+enum fm10k_vf_tlv_msg_id {
+	FM10K_VF_MSG_ID_TEST = 0,	/* msg ID reserved for testing */
+	FM10K_VF_MSG_ID_MSIX,
+	FM10K_VF_MSG_ID_MAC_VLAN,
+	FM10K_VF_MSG_ID_LPORT_STATE,
+	FM10K_VF_MSG_ID_MAX,
+};
+
+enum fm10k_tlv_mac_vlan_attr_id {
+	FM10K_MAC_VLAN_MSG_VLAN,
+	FM10K_MAC_VLAN_MSG_SET,
+	FM10K_MAC_VLAN_MSG_MAC,
+	FM10K_MAC_VLAN_MSG_DEFAULT_MAC,
+	FM10K_MAC_VLAN_MSG_MULTICAST,
+	FM10K_MAC_VLAN_MSG_ID_MAX
+};
+
+enum fm10k_tlv_lport_state_attr_id {
+	FM10K_LPORT_STATE_MSG_DISABLE,
+	FM10K_LPORT_STATE_MSG_XCAST_MODE,
+	FM10K_LPORT_STATE_MSG_READY,
+	FM10K_LPORT_STATE_MSG_MAX
+};
+
+#define FM10K_VF_MSG_MSIX_HANDLER(func) \
+	 FM10K_MSG_HANDLER(FM10K_VF_MSG_ID_MSIX, NULL, func)
+
+s32 fm10k_msg_mac_vlan_vf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
+extern const struct fm10k_tlv_attr fm10k_mac_vlan_msg_attr[];
+#define FM10K_VF_MSG_MAC_VLAN_HANDLER(func) \
+	FM10K_MSG_HANDLER(FM10K_VF_MSG_ID_MAC_VLAN, \
+			  fm10k_mac_vlan_msg_attr, func)
+
+s32 fm10k_msg_lport_state_vf(struct fm10k_hw *, u32 **,
+			     struct fm10k_mbx_info *);
+extern const struct fm10k_tlv_attr fm10k_lport_state_msg_attr[];
+#define FM10K_VF_MSG_LPORT_STATE_HANDLER(func) \
+	FM10K_MSG_HANDLER(FM10K_VF_MSG_ID_LPORT_STATE, \
+			  fm10k_lport_state_msg_attr, func)
+
+extern const struct fm10k_info fm10k_vf_info;
+#endif /* _FM10K_VF_H */
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
new file mode 100644
index 0000000000..2f21b3e89f
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2013 - 2018 Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection XL710 (i40e.ko) driver
+#
+
+ccflags-y += -I$(src)
+subdir-ccflags-y += -I$(src)
+
+obj-$(CONFIG_I40E) += i40e.o
+
+i40e-objs := i40e_main.o \
+	i40e_ethtool.o	\
+	i40e_adminq.o	\
+	i40e_common.o	\
+	i40e_hmc.o	\
+	i40e_lan_hmc.o	\
+	i40e_nvm.o	\
+	i40e_debugfs.o	\
+	i40e_diag.o	\
+	i40e_txrx.o	\
+	i40e_ptp.o	\
+	i40e_ddp.o \
+	i40e_client.o   \
+	i40e_virtchnl_pf.o \
+	i40e_xsk.o
+
+i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
new file mode 100644
index 0000000000..55bb0b5310
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -0,0 +1,1324 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#ifndef _I40E_H_
+#define _I40E_H_
+
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ioport.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/hashtable.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/pkt_sched.h>
+#include <linux/ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
+#include <linux/if_bridge.h>
+#include <linux/clocksource.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/udp_tunnel.h>
+#include <net/xdp_sock.h>
+#include <linux/bitfield.h>
+#include "i40e_type.h"
+#include "i40e_prototype.h"
+#include <linux/net/intel/i40e_client.h>
+#include <linux/avf/virtchnl.h>
+#include "i40e_virtchnl_pf.h"
+#include "i40e_txrx.h"
+#include "i40e_dcb.h"
+
+/* Useful i40e defaults */
+#define I40E_MAX_VEB			16
+
+#define I40E_MAX_NUM_DESCRIPTORS	4096
+#define I40E_MAX_CSR_SPACE		(4 * 1024 * 1024 - 64 * 1024)
+#define I40E_DEFAULT_NUM_DESCRIPTORS	512
+#define I40E_REQ_DESCRIPTOR_MULTIPLE	32
+#define I40E_MIN_NUM_DESCRIPTORS	64
+#define I40E_MIN_MSIX			2
+#define I40E_DEFAULT_NUM_VMDQ_VSI	8 /* max 256 VSIs */
+#define I40E_MIN_VSI_ALLOC		83 /* LAN, ATR, FCOE, 64 VF */
+/* max 16 qps */
+#define i40e_default_queues_per_vmdq(pf) \
+		(((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1)
+#define I40E_DEFAULT_QUEUES_PER_VF	4
+#define I40E_MAX_VF_QUEUES		16
+#define i40e_pf_get_max_q_per_tc(pf) \
+		(((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64)
+#define I40E_FDIR_RING_COUNT		32
+#define I40E_MAX_AQ_BUF_SIZE		4096
+#define I40E_AQ_LEN			256
+#define I40E_MIN_ARQ_LEN		1
+#define I40E_MIN_ASQ_LEN		2
+#define I40E_AQ_WORK_LIMIT		66 /* max number of VFs + a little */
+#define I40E_MAX_USER_PRIORITY		8
+#define I40E_DEFAULT_TRAFFIC_CLASS	BIT(0)
+#define I40E_QUEUE_WAIT_RETRY_LIMIT	10
+#define I40E_INT_NAME_STR_LEN		(IFNAMSIZ + 16)
+
+#define I40E_NVM_VERSION_LO_SHIFT	0
+#define I40E_NVM_VERSION_LO_MASK	(0xff << I40E_NVM_VERSION_LO_SHIFT)
+#define I40E_NVM_VERSION_HI_SHIFT	12
+#define I40E_NVM_VERSION_HI_MASK	(0xf << I40E_NVM_VERSION_HI_SHIFT)
+#define I40E_OEM_VER_BUILD_MASK		0xffff
+#define I40E_OEM_VER_PATCH_MASK		0xff
+#define I40E_OEM_VER_BUILD_SHIFT	8
+#define I40E_OEM_VER_SHIFT		24
+#define I40E_PHY_DEBUG_ALL \
+	(I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW | \
+	I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW)
+
+#define I40E_OEM_EETRACK_ID		0xffffffff
+#define I40E_OEM_GEN_SHIFT		24
+#define I40E_OEM_SNAP_MASK		0x00ff0000
+#define I40E_OEM_SNAP_SHIFT		16
+#define I40E_OEM_RELEASE_MASK		0x0000ffff
+
+#define I40E_RX_DESC(R, i)	\
+	(&(((union i40e_rx_desc *)((R)->desc))[i]))
+#define I40E_TX_DESC(R, i)	\
+	(&(((struct i40e_tx_desc *)((R)->desc))[i]))
+#define I40E_TX_CTXTDESC(R, i)	\
+	(&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
+#define I40E_TX_FDIRDESC(R, i)	\
+	(&(((struct i40e_filter_program_desc *)((R)->desc))[i]))
+
+/* BW rate limiting */
+#define I40E_BW_CREDIT_DIVISOR		50 /* 50Mbps per BW credit */
+#define I40E_BW_MBPS_DIVISOR		125000 /* rate / (1000000 / 8) Mbps */
+#define I40E_MAX_BW_INACTIVE_ACCUM	4 /* accumulate 4 credits max */
+
+/* driver state flags */
+enum i40e_state_t {
+	__I40E_TESTING,
+	__I40E_CONFIG_BUSY,
+	__I40E_CONFIG_DONE,
+	__I40E_DOWN,
+	__I40E_SERVICE_SCHED,
+	__I40E_ADMINQ_EVENT_PENDING,
+	__I40E_MDD_EVENT_PENDING,
+	__I40E_VFLR_EVENT_PENDING,
+	__I40E_RESET_RECOVERY_PENDING,
+	__I40E_TIMEOUT_RECOVERY_PENDING,
+	__I40E_MISC_IRQ_REQUESTED,
+	__I40E_RESET_INTR_RECEIVED,
+	__I40E_REINIT_REQUESTED,
+	__I40E_PF_RESET_REQUESTED,
+	__I40E_PF_RESET_AND_REBUILD_REQUESTED,
+	__I40E_CORE_RESET_REQUESTED,
+	__I40E_GLOBAL_RESET_REQUESTED,
+	__I40E_EMP_RESET_INTR_RECEIVED,
+	__I40E_SUSPENDED,
+	__I40E_PTP_TX_IN_PROGRESS,
+	__I40E_BAD_EEPROM,
+	__I40E_DOWN_REQUESTED,
+	__I40E_FD_FLUSH_REQUESTED,
+	__I40E_FD_ATR_AUTO_DISABLED,
+	__I40E_FD_SB_AUTO_DISABLED,
+	__I40E_RESET_FAILED,
+	__I40E_PORT_SUSPENDED,
+	__I40E_VF_DISABLE,
+	__I40E_MACVLAN_SYNC_PENDING,
+	__I40E_TEMP_LINK_POLLING,
+	__I40E_CLIENT_SERVICE_REQUESTED,
+	__I40E_CLIENT_L2_CHANGE,
+	__I40E_CLIENT_RESET,
+	__I40E_VIRTCHNL_OP_PENDING,
+	__I40E_RECOVERY_MODE,
+	__I40E_VF_RESETS_DISABLED,	/* disable resets during i40e_remove */
+	__I40E_IN_REMOVE,
+	__I40E_VFS_RELEASING,
+	/* This must be last as it determines the size of the BITMAP */
+	__I40E_STATE_SIZE__,
+};
+
+#define I40E_PF_RESET_FLAG	BIT_ULL(__I40E_PF_RESET_REQUESTED)
+#define I40E_PF_RESET_AND_REBUILD_FLAG	\
+	BIT_ULL(__I40E_PF_RESET_AND_REBUILD_REQUESTED)
+
+/* VSI state flags */
+enum i40e_vsi_state_t {
+	__I40E_VSI_DOWN,
+	__I40E_VSI_NEEDS_RESTART,
+	__I40E_VSI_SYNCING_FILTERS,
+	__I40E_VSI_OVERFLOW_PROMISC,
+	__I40E_VSI_REINIT_REQUESTED,
+	__I40E_VSI_DOWN_REQUESTED,
+	__I40E_VSI_RELEASING,
+	/* This must be last as it determines the size of the BITMAP */
+	__I40E_VSI_STATE_SIZE__,
+};
+
+enum i40e_interrupt_policy {
+	I40E_INTERRUPT_BEST_CASE,
+	I40E_INTERRUPT_MEDIUM,
+	I40E_INTERRUPT_LOWEST
+};
+
+struct i40e_lump_tracking {
+	u16 num_entries;
+	u16 list[];
+#define I40E_PILE_VALID_BIT  0x8000
+#define I40E_IWARP_IRQ_PILE_ID  (I40E_PILE_VALID_BIT - 2)
+};
+
+#define I40E_DEFAULT_ATR_SAMPLE_RATE	20
+#define I40E_FDIR_MAX_RAW_PACKET_SIZE	512
+#define I40E_FDIR_BUFFER_FULL_MARGIN	10
+#define I40E_FDIR_BUFFER_HEAD_ROOM	32
+#define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4)
+
+#define I40E_HKEY_ARRAY_SIZE	((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)
+#define I40E_HLUT_ARRAY_SIZE	((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40E_VF_HLUT_ARRAY_SIZE	((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4)
+
+enum i40e_fd_stat_idx {
+	I40E_FD_STAT_ATR,
+	I40E_FD_STAT_SB,
+	I40E_FD_STAT_ATR_TUNNEL,
+	I40E_FD_STAT_PF_COUNT
+};
+#define I40E_FD_STAT_PF_IDX(pf_id) ((pf_id) * I40E_FD_STAT_PF_COUNT)
+#define I40E_FD_ATR_STAT_IDX(pf_id) \
+			(I40E_FD_STAT_PF_IDX(pf_id) + I40E_FD_STAT_ATR)
+#define I40E_FD_SB_STAT_IDX(pf_id)  \
+			(I40E_FD_STAT_PF_IDX(pf_id) + I40E_FD_STAT_SB)
+#define I40E_FD_ATR_TUNNEL_STAT_IDX(pf_id) \
+			(I40E_FD_STAT_PF_IDX(pf_id) + I40E_FD_STAT_ATR_TUNNEL)
+
+/* The following structure contains the data parsed from the user-defined
+ * field of the ethtool_rx_flow_spec structure.
+ */
+struct i40e_rx_flow_userdef {
+	bool flex_filter;
+	u16 flex_word;
+	u16 flex_offset;
+};
+
+struct i40e_fdir_filter {
+	struct hlist_node fdir_node;
+	/* filter ipnut set */
+	u8 flow_type;
+	u8 ipl4_proto;
+	/* TX packet view of src and dst */
+	__be32 dst_ip;
+	__be32 src_ip;
+	__be32 dst_ip6[4];
+	__be32 src_ip6[4];
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 sctp_v_tag;
+
+	__be16 vlan_etype;
+	__be16 vlan_tag;
+	/* Flexible data to match within the packet payload */
+	__be16 flex_word;
+	u16 flex_offset;
+	bool flex_filter;
+
+	/* filter control */
+	u16 q_index;
+	u8  flex_off;
+	u8  pctype;
+	u16 dest_vsi;
+	u8  dest_ctl;
+	u8  fd_status;
+	u16 cnt_index;
+	u32 fd_id;
+};
+
+#define I40E_CLOUD_FIELD_OMAC		BIT(0)
+#define I40E_CLOUD_FIELD_IMAC		BIT(1)
+#define I40E_CLOUD_FIELD_IVLAN		BIT(2)
+#define I40E_CLOUD_FIELD_TEN_ID		BIT(3)
+#define I40E_CLOUD_FIELD_IIP		BIT(4)
+
+#define I40E_CLOUD_FILTER_FLAGS_OMAC	I40E_CLOUD_FIELD_OMAC
+#define I40E_CLOUD_FILTER_FLAGS_IMAC	I40E_CLOUD_FIELD_IMAC
+#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN	(I40E_CLOUD_FIELD_IMAC | \
+						 I40E_CLOUD_FIELD_IVLAN)
+#define I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID	(I40E_CLOUD_FIELD_IMAC | \
+						 I40E_CLOUD_FIELD_TEN_ID)
+#define I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC (I40E_CLOUD_FIELD_OMAC | \
+						  I40E_CLOUD_FIELD_IMAC | \
+						  I40E_CLOUD_FIELD_TEN_ID)
+#define I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID (I40E_CLOUD_FIELD_IMAC | \
+						   I40E_CLOUD_FIELD_IVLAN | \
+						   I40E_CLOUD_FIELD_TEN_ID)
+#define I40E_CLOUD_FILTER_FLAGS_IIP	I40E_CLOUD_FIELD_IIP
+
+struct i40e_cloud_filter {
+	struct hlist_node cloud_node;
+	unsigned long cookie;
+	/* cloud filter input set follows */
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	__be16 vlan_id;
+	u16 seid;       /* filter control */
+	__be16 dst_port;
+	__be16 src_port;
+	u32 tenant_id;
+	union {
+		struct {
+			struct in_addr dst_ip;
+			struct in_addr src_ip;
+		} v4;
+		struct {
+			struct in6_addr dst_ip6;
+			struct in6_addr src_ip6;
+		} v6;
+	} ip;
+#define dst_ipv6	ip.v6.dst_ip6.s6_addr32
+#define src_ipv6	ip.v6.src_ip6.s6_addr32
+#define dst_ipv4	ip.v4.dst_ip.s_addr
+#define src_ipv4	ip.v4.src_ip.s_addr
+	u16 n_proto;    /* Ethernet Protocol */
+	u8 ip_proto;    /* IPPROTO value */
+	u8 flags;
+#define I40E_CLOUD_TNL_TYPE_NONE        0xff
+	u8 tunnel_type;
+};
+
+#define I40E_DCB_PRIO_TYPE_STRICT	0
+#define I40E_DCB_PRIO_TYPE_ETS		1
+#define I40E_DCB_STRICT_PRIO_CREDITS	127
+/* DCB per TC information data structure */
+struct i40e_tc_info {
+	u16	qoffset;	/* Queue offset from base queue */
+	u16	qcount;		/* Total Queues */
+	u8	netdev_tc;	/* Netdev TC index if netdev associated */
+};
+
+/* TC configuration data structure */
+struct i40e_tc_configuration {
+	u8	numtc;		/* Total number of enabled TCs */
+	u8	enabled_tc;	/* TC map */
+	struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS];
+};
+
+#define I40E_UDP_PORT_INDEX_UNUSED	255
+struct i40e_udp_port_config {
+	/* AdminQ command interface expects port number in Host byte order */
+	u16 port;
+	u8 type;
+	u8 filter_index;
+};
+
+#define I40_DDP_FLASH_REGION 100
+#define I40E_PROFILE_INFO_SIZE 48
+#define I40E_MAX_PROFILE_NUM 16
+#define I40E_PROFILE_LIST_SIZE \
+	(I40E_PROFILE_INFO_SIZE * I40E_MAX_PROFILE_NUM + 4)
+#define I40E_DDP_PROFILE_PATH "intel/i40e/ddp/"
+#define I40E_DDP_PROFILE_NAME_MAX 64
+
+int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size,
+		  bool is_add);
+int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash);
+
+struct i40e_ddp_profile_list {
+	u32 p_count;
+	struct i40e_profile_info p_info[];
+};
+
+struct i40e_ddp_old_profile_list {
+	struct list_head list;
+	size_t old_ddp_size;
+	u8 old_ddp_buf[];
+};
+
+/* macros related to FLX_PIT */
+#define I40E_FLEX_SET_FSIZE(fsize) (((fsize) << \
+				    I40E_PRTQF_FLX_PIT_FSIZE_SHIFT) & \
+				    I40E_PRTQF_FLX_PIT_FSIZE_MASK)
+#define I40E_FLEX_SET_DST_WORD(dst) (((dst) << \
+				     I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT) & \
+				     I40E_PRTQF_FLX_PIT_DEST_OFF_MASK)
+#define I40E_FLEX_SET_SRC_WORD(src) (((src) << \
+				     I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT) & \
+				     I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK)
+#define I40E_FLEX_PREP_VAL(dst, fsize, src) (I40E_FLEX_SET_DST_WORD(dst) | \
+					     I40E_FLEX_SET_FSIZE(fsize) | \
+					     I40E_FLEX_SET_SRC_WORD(src))
+
+
+#define I40E_MAX_FLEX_SRC_OFFSET 0x1F
+
+/* macros related to GLQF_ORT */
+#define I40E_ORT_SET_IDX(idx)		(((idx) << \
+					  I40E_GLQF_ORT_PIT_INDX_SHIFT) & \
+					 I40E_GLQF_ORT_PIT_INDX_MASK)
+
+#define I40E_ORT_SET_COUNT(count)	(((count) << \
+					  I40E_GLQF_ORT_FIELD_CNT_SHIFT) & \
+					 I40E_GLQF_ORT_FIELD_CNT_MASK)
+
+#define I40E_ORT_SET_PAYLOAD(payload)	(((payload) << \
+					  I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT) & \
+					 I40E_GLQF_ORT_FLX_PAYLOAD_MASK)
+
+#define I40E_ORT_PREP_VAL(idx, count, payload) (I40E_ORT_SET_IDX(idx) | \
+						I40E_ORT_SET_COUNT(count) | \
+						I40E_ORT_SET_PAYLOAD(payload))
+
+#define I40E_L3_GLQF_ORT_IDX		34
+#define I40E_L4_GLQF_ORT_IDX		35
+
+/* Flex PIT register index */
+#define I40E_FLEX_PIT_IDX_START_L3	3
+#define I40E_FLEX_PIT_IDX_START_L4	6
+
+#define I40E_FLEX_PIT_TABLE_SIZE	3
+
+#define I40E_FLEX_DEST_UNUSED		63
+
+#define I40E_FLEX_INDEX_ENTRIES		8
+
+/* Flex MASK to disable all flexible entries */
+#define I40E_FLEX_INPUT_MASK	(I40E_FLEX_50_MASK | I40E_FLEX_51_MASK | \
+				 I40E_FLEX_52_MASK | I40E_FLEX_53_MASK | \
+				 I40E_FLEX_54_MASK | I40E_FLEX_55_MASK | \
+				 I40E_FLEX_56_MASK | I40E_FLEX_57_MASK)
+
+#define I40E_QINT_TQCTL_VAL(qp, vector, nextq_type) \
+	(I40E_QINT_TQCTL_CAUSE_ENA_MASK | \
+	(I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | \
+	((vector) << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | \
+	((qp) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | \
+	(I40E_QUEUE_TYPE_##nextq_type << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT))
+
+#define I40E_QINT_RQCTL_VAL(qp, vector, nextq_type) \
+	(I40E_QINT_RQCTL_CAUSE_ENA_MASK | \
+	(I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | \
+	((vector) << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | \
+	((qp) << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | \
+	(I40E_QUEUE_TYPE_##nextq_type << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT))
+
+struct i40e_flex_pit {
+	struct list_head list;
+	u16 src_offset;
+	u8 pit_index;
+};
+
+struct i40e_fwd_adapter {
+	struct net_device *netdev;
+	int bit_no;
+};
+
+struct i40e_channel {
+	struct list_head list;
+	bool initialized;
+	u8 type;
+	u16 vsi_number; /* Assigned VSI number from AQ 'Add VSI' response */
+	u16 stat_counter_idx;
+	u16 base_queue;
+	u16 num_queue_pairs; /* Requested by user */
+	u16 seid;
+
+	u8 enabled_tc;
+	struct i40e_aqc_vsi_properties_data info;
+
+	u64 max_tx_rate;
+	struct i40e_fwd_adapter *fwd;
+
+	/* track this channel belongs to which VSI */
+	struct i40e_vsi *parent_vsi;
+};
+
+struct i40e_ptp_pins_settings;
+
+static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
+{
+	return !!ch->fwd;
+}
+
+static inline const u8 *i40e_channel_mac(struct i40e_channel *ch)
+{
+	if (i40e_is_channel_macvlan(ch))
+		return ch->fwd->netdev->dev_addr;
+	else
+		return NULL;
+}
+
+/* struct that defines the Ethernet device */
+struct i40e_pf {
+	struct pci_dev *pdev;
+	struct i40e_hw hw;
+	DECLARE_BITMAP(state, __I40E_STATE_SIZE__);
+	struct msix_entry *msix_entries;
+	bool fc_autoneg_status;
+
+	u16 eeprom_version;
+	u16 num_vmdq_vsis;         /* num vmdq vsis this PF has set up */
+	u16 num_vmdq_qps;          /* num queue pairs per vmdq pool */
+	u16 num_vmdq_msix;         /* num queue vectors per vmdq pool */
+	u16 num_req_vfs;           /* num VFs requested for this PF */
+	u16 num_vf_qps;            /* num queue pairs per VF */
+	u16 num_lan_qps;           /* num lan queues this PF has set up */
+	u16 num_lan_msix;          /* num queue vectors for the base PF vsi */
+	u16 num_fdsb_msix;         /* num queue vectors for sideband Fdir */
+	u16 num_iwarp_msix;        /* num of iwarp vectors for this PF */
+	int iwarp_base_vector;
+	int queues_left;           /* queues left unclaimed */
+	u16 alloc_rss_size;        /* allocated RSS queues */
+	u16 rss_size_max;          /* HW defined max RSS queues */
+	u16 fdir_pf_filter_count;  /* num of guaranteed filters for this PF */
+	u16 num_alloc_vsi;         /* num VSIs this driver supports */
+	u8 atr_sample_rate;
+	bool wol_en;
+
+	struct hlist_head fdir_filter_list;
+	u16 fdir_pf_active_filters;
+	unsigned long fd_flush_timestamp;
+	u32 fd_flush_cnt;
+	u32 fd_add_err;
+	u32 fd_atr_cnt;
+
+	/* Book-keeping of side-band filter count per flow-type.
+	 * This is used to detect and handle input set changes for
+	 * respective flow-type.
+	 */
+	u16 fd_tcp4_filter_cnt;
+	u16 fd_udp4_filter_cnt;
+	u16 fd_sctp4_filter_cnt;
+	u16 fd_ip4_filter_cnt;
+
+	u16 fd_tcp6_filter_cnt;
+	u16 fd_udp6_filter_cnt;
+	u16 fd_sctp6_filter_cnt;
+	u16 fd_ip6_filter_cnt;
+
+	/* Flexible filter table values that need to be programmed into
+	 * hardware, which expects L3 and L4 to be programmed separately. We
+	 * need to ensure that the values are in ascended order and don't have
+	 * duplicates, so we track each L3 and L4 values in separate lists.
+	 */
+	struct list_head l3_flex_pit_list;
+	struct list_head l4_flex_pit_list;
+
+	struct udp_tunnel_nic_shared udp_tunnel_shared;
+	struct udp_tunnel_nic_info udp_tunnel_nic;
+
+	struct hlist_head cloud_filter_list;
+	u16 num_cloud_filters;
+
+	enum i40e_interrupt_policy int_policy;
+	u16 rx_itr_default;
+	u16 tx_itr_default;
+	u32 msg_enable;
+	char int_name[I40E_INT_NAME_STR_LEN];
+	u16 adminq_work_limit; /* num of admin receive queue desc to process */
+	unsigned long service_timer_period;
+	unsigned long service_timer_previous;
+	struct timer_list service_timer;
+	struct work_struct service_task;
+
+	u32 hw_features;
+#define I40E_HW_RSS_AQ_CAPABLE			BIT(0)
+#define I40E_HW_128_QP_RSS_CAPABLE		BIT(1)
+#define I40E_HW_ATR_EVICT_CAPABLE		BIT(2)
+#define I40E_HW_WB_ON_ITR_CAPABLE		BIT(3)
+#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE	BIT(4)
+#define I40E_HW_NO_PCI_LINK_CHECK		BIT(5)
+#define I40E_HW_100M_SGMII_CAPABLE		BIT(6)
+#define I40E_HW_NO_DCB_SUPPORT			BIT(7)
+#define I40E_HW_USE_SET_LLDP_MIB		BIT(8)
+#define I40E_HW_GENEVE_OFFLOAD_CAPABLE		BIT(9)
+#define I40E_HW_PTP_L4_CAPABLE			BIT(10)
+#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE		BIT(11)
+#define I40E_HW_HAVE_CRT_RETIMER		BIT(13)
+#define I40E_HW_OUTER_UDP_CSUM_CAPABLE		BIT(14)
+#define I40E_HW_PHY_CONTROLS_LEDS		BIT(15)
+#define I40E_HW_STOP_FW_LLDP			BIT(16)
+#define I40E_HW_PORT_ID_VALID			BIT(17)
+#define I40E_HW_RESTART_AUTONEG			BIT(18)
+
+	u32 flags;
+#define I40E_FLAG_RX_CSUM_ENABLED		BIT(0)
+#define I40E_FLAG_MSI_ENABLED			BIT(1)
+#define I40E_FLAG_MSIX_ENABLED			BIT(2)
+#define I40E_FLAG_RSS_ENABLED			BIT(3)
+#define I40E_FLAG_VMDQ_ENABLED			BIT(4)
+#define I40E_FLAG_SRIOV_ENABLED			BIT(5)
+#define I40E_FLAG_DCB_CAPABLE			BIT(6)
+#define I40E_FLAG_DCB_ENABLED			BIT(7)
+#define I40E_FLAG_FD_SB_ENABLED			BIT(8)
+#define I40E_FLAG_FD_ATR_ENABLED		BIT(9)
+#define I40E_FLAG_MFP_ENABLED			BIT(10)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED		BIT(11)
+#define I40E_FLAG_VEB_MODE_ENABLED		BIT(12)
+#define I40E_FLAG_VEB_STATS_ENABLED		BIT(13)
+#define I40E_FLAG_LINK_POLLING_ENABLED		BIT(14)
+#define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT(15)
+#define I40E_FLAG_LEGACY_RX			BIT(16)
+#define I40E_FLAG_PTP				BIT(17)
+#define I40E_FLAG_IWARP_ENABLED			BIT(18)
+#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED	BIT(19)
+#define I40E_FLAG_SOURCE_PRUNING_DISABLED       BIT(20)
+#define I40E_FLAG_TC_MQPRIO			BIT(21)
+#define I40E_FLAG_FD_SB_INACTIVE		BIT(22)
+#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER		BIT(23)
+#define I40E_FLAG_DISABLE_FW_LLDP		BIT(24)
+#define I40E_FLAG_RS_FEC			BIT(25)
+#define I40E_FLAG_BASE_R_FEC			BIT(26)
+/* TOTAL_PORT_SHUTDOWN
+ * Allows to physically disable the link on the NIC's port.
+ * If enabled, (after link down request from the OS)
+ * no link, traffic or led activity is possible on that port.
+ *
+ * If I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED is set, the
+ * I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED must be explicitly forced to true
+ * and cannot be disabled by system admin at that time.
+ * The functionalities are exclusive in terms of configuration, but they also
+ * have similar behavior (allowing to disable physical link of the port),
+ * with following differences:
+ * - LINK_DOWN_ON_CLOSE_ENABLED is configurable at host OS run-time and is
+ *   supported by whole family of 7xx Intel Ethernet Controllers
+ * - TOTAL_PORT_SHUTDOWN may be enabled only before OS loads (in BIOS)
+ *   only if motherboard's BIOS and NIC's FW has support of it
+ * - when LINK_DOWN_ON_CLOSE_ENABLED is used, the link is being brought down
+ *   by sending phy_type=0 to NIC's FW
+ * - when TOTAL_PORT_SHUTDOWN is used, phy_type is not altered, instead
+ *   the link is being brought down by clearing bit (I40E_AQ_PHY_ENABLE_LINK)
+ *   in abilities field of i40e_aq_set_phy_config structure
+ */
+#define I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED	BIT(27)
+#define I40E_FLAG_VF_VLAN_PRUNING		BIT(28)
+
+	struct i40e_client_instance *cinst;
+	bool stat_offsets_loaded;
+	struct i40e_hw_port_stats stats;
+	struct i40e_hw_port_stats stats_offsets;
+	u32 tx_timeout_count;
+	u32 tx_timeout_recovery_level;
+	unsigned long tx_timeout_last_recovery;
+	u32 tx_sluggish_count;
+	u32 hw_csum_rx_error;
+	u32 led_status;
+	u16 corer_count; /* Core reset count */
+	u16 globr_count; /* Global reset count */
+	u16 empr_count; /* EMP reset count */
+	u16 pfr_count; /* PF reset count */
+	u16 sw_int_count; /* SW interrupt count */
+
+	struct mutex switch_mutex;
+	u16 lan_vsi;       /* our default LAN VSI */
+	u16 lan_veb;       /* initial relay, if exists */
+#define I40E_NO_VEB	0xffff
+#define I40E_NO_VSI	0xffff
+	u16 next_vsi;      /* Next unallocated VSI - 0-based! */
+	struct i40e_vsi **vsi;
+	struct i40e_veb *veb[I40E_MAX_VEB];
+
+	struct i40e_lump_tracking *qp_pile;
+	struct i40e_lump_tracking *irq_pile;
+
+	/* switch config info */
+	u16 pf_seid;
+	u16 main_vsi_seid;
+	u16 mac_seid;
+	struct kobject *switch_kobj;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *i40e_dbg_pf;
+#endif /* CONFIG_DEBUG_FS */
+	bool cur_promisc;
+
+	u16 instance; /* A unique number per i40e_pf instance in the system */
+
+	/* sr-iov config info */
+	struct i40e_vf *vf;
+	int num_alloc_vfs;	/* actual number of VFs allocated */
+	u32 vf_aq_requests;
+	u32 arq_overflows;	/* Not fatal, possibly indicative of problems */
+
+	/* DCBx/DCBNL capability for PF that indicates
+	 * whether DCBx is managed by firmware or host
+	 * based agent (LLDPAD). Also, indicates what
+	 * flavor of DCBx protocol (IEEE/CEE) is supported
+	 * by the device. For now we're supporting IEEE
+	 * mode only.
+	 */
+	u16 dcbx_cap;
+
+	struct i40e_filter_control_settings filter_settings;
+	struct i40e_rx_pb_config pb_cfg; /* Current Rx packet buffer config */
+	struct i40e_dcbx_config tmp_cfg;
+
+/* GPIO defines used by PTP */
+#define I40E_SDP3_2			18
+#define I40E_SDP3_3			19
+#define I40E_GPIO_4			20
+#define I40E_LED2_0			26
+#define I40E_LED2_1			27
+#define I40E_LED3_0			28
+#define I40E_LED3_1			29
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_HI \
+	(1 << I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT)
+#define I40E_GLGEN_GPIO_SET_DRV_SDP_DATA \
+	(1 << I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_0 \
+	(0 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_1 \
+	(1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_RESERVED	BIT(2)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z \
+	(1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_DIR_OUT \
+	(1 << I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TRI_DRV_HI \
+	(1 << I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_HI_RST \
+	(1 << I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TIMESYNC_0 \
+	(3 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TIMESYNC_1 \
+	(4 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN \
+	(0x3F << I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT \
+	(1 << I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0 \
+	(I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+	 I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \
+	 I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0)
+#define I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0 \
+	(I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+	 I40E_GLGEN_GPIO_CTL_TIMESYNC_0 | \
+	 I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1)
+#define I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1 \
+	(I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+	 I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+	 I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+	 I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_0)
+#define I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1 \
+	(I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN | \
+	 I40E_GLGEN_GPIO_CTL_TIMESYNC_1 | I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+	 I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+	 I40E_GLGEN_GPIO_CTL_RESERVED | I40E_GLGEN_GPIO_CTL_PRT_NUM_1)
+#define I40E_GLGEN_GPIO_CTL_LED_INIT \
+	(I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_Z | \
+	 I40E_GLGEN_GPIO_CTL_DIR_OUT | \
+	 I40E_GLGEN_GPIO_CTL_TRI_DRV_HI | \
+	 I40E_GLGEN_GPIO_CTL_OUT_HI_RST | \
+	 I40E_GLGEN_GPIO_CTL_OUT_DEFAULT | \
+	 I40E_GLGEN_GPIO_CTL_NOT_FOR_PHY_CONN)
+#define I40E_PRTTSYN_AUX_1_INSTNT \
+	(1 << I40E_PRTTSYN_AUX_1_INSTNT_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_ENABLE \
+	(1 << I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_CLK_MOD	(3 << I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD \
+	(I40E_PRTTSYN_AUX_0_OUT_ENABLE | I40E_PRTTSYN_AUX_0_OUT_CLK_MOD)
+#define I40E_PTP_HALF_SECOND		500000000LL /* nano seconds */
+#define I40E_PTP_2_SEC_DELAY		2
+
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	struct sk_buff *ptp_tx_skb;
+	unsigned long ptp_tx_start;
+	struct hwtstamp_config tstamp_config;
+	struct timespec64 ptp_prev_hw_time;
+	struct work_struct ptp_pps_work;
+	struct work_struct ptp_extts0_work;
+	struct work_struct ptp_extts1_work;
+	ktime_t ptp_reset_start;
+	struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
+	u32 ptp_adj_mult;
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_skipped;
+	u32 rx_hwtstamp_cleared;
+	u32 latch_event_flags;
+	u64 ptp_pps_start;
+	u32 pps_delay;
+	spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */
+	struct ptp_pin_desc ptp_pin[3];
+	unsigned long latch_events[4];
+	bool ptp_tx;
+	bool ptp_rx;
+	struct i40e_ptp_pins_settings *ptp_pins;
+	u16 rss_table_size; /* HW RSS table size */
+	u32 max_bw;
+	u32 min_bw;
+
+	u32 ioremap_len;
+	u32 fd_inv;
+	u16 phy_led_val;
+
+	u16 override_q_count;
+	u16 last_sw_conf_flags;
+	u16 last_sw_conf_valid_flags;
+	/* List to keep previous DDP profiles to be rolled back in the future */
+	struct list_head ddp_old_prof;
+};
+
+/**
+ * i40e_mac_to_hkey - Convert a 6-byte MAC Address to a u64 hash key
+ * @macaddr: the MAC Address as the base key
+ *
+ * Simply copies the address and returns it as a u64 for hashing
+ **/
+static inline u64 i40e_addr_to_hkey(const u8 *macaddr)
+{
+	u64 key = 0;
+
+	ether_addr_copy((u8 *)&key, macaddr);
+	return key;
+}
+
+enum i40e_filter_state {
+	I40E_FILTER_INVALID = 0,	/* Invalid state */
+	I40E_FILTER_NEW,		/* New, not sent to FW yet */
+	I40E_FILTER_ACTIVE,		/* Added to switch by FW */
+	I40E_FILTER_FAILED,		/* Rejected by FW */
+	I40E_FILTER_REMOVE,		/* To be removed */
+/* There is no 'removed' state; the filter struct is freed */
+};
+struct i40e_mac_filter {
+	struct hlist_node hlist;
+	u8 macaddr[ETH_ALEN];
+#define I40E_VLAN_ANY -1
+	s16 vlan;
+	enum i40e_filter_state state;
+};
+
+/* Wrapper structure to keep track of filters while we are preparing to send
+ * firmware commands. We cannot send firmware commands while holding a
+ * spinlock, since it might sleep. To avoid this, we wrap the added filters in
+ * a separate structure, which will track the state change and update the real
+ * filter while under lock. We can't simply hold the filters in a separate
+ * list, as this opens a window for a race condition when adding new MAC
+ * addresses to all VLANs, or when adding new VLANs to all MAC addresses.
+ */
+struct i40e_new_mac_filter {
+	struct hlist_node hlist;
+	struct i40e_mac_filter *f;
+
+	/* Track future changes to state separately */
+	enum i40e_filter_state state;
+};
+
+struct i40e_veb {
+	struct i40e_pf *pf;
+	u16 idx;
+	u16 veb_idx;		/* index of VEB parent */
+	u16 seid;
+	u16 uplink_seid;
+	u16 stats_idx;		/* index of VEB parent */
+	u8  enabled_tc;
+	u16 bridge_mode;	/* Bridge Mode (VEB/VEPA) */
+	u16 flags;
+	u16 bw_limit;
+	u8  bw_max_quanta;
+	bool is_abs_credits;
+	u8  bw_tc_share_credits[I40E_MAX_TRAFFIC_CLASS];
+	u16 bw_tc_limit_credits[I40E_MAX_TRAFFIC_CLASS];
+	u8  bw_tc_max_quanta[I40E_MAX_TRAFFIC_CLASS];
+	struct kobject *kobj;
+	bool stat_offsets_loaded;
+	struct i40e_eth_stats stats;
+	struct i40e_eth_stats stats_offsets;
+	struct i40e_veb_tc_stats tc_stats;
+	struct i40e_veb_tc_stats tc_stats_offsets;
+};
+
+/* struct that defines a VSI, associated with a dev */
+struct i40e_vsi {
+	struct net_device *netdev;
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	bool netdev_registered;
+	bool stat_offsets_loaded;
+
+	u32 current_netdev_flags;
+	DECLARE_BITMAP(state, __I40E_VSI_STATE_SIZE__);
+#define I40E_VSI_FLAG_FILTER_CHANGED	BIT(0)
+#define I40E_VSI_FLAG_VEB_OWNER		BIT(1)
+	unsigned long flags;
+
+	/* Per VSI lock to protect elements/hash (MAC filter) */
+	spinlock_t mac_filter_hash_lock;
+	/* Fixed size hash table with 2^8 buckets for MAC filters */
+	DECLARE_HASHTABLE(mac_filter_hash, 8);
+	bool has_vlan_filter;
+
+	/* VSI stats */
+	struct rtnl_link_stats64 net_stats;
+	struct rtnl_link_stats64 net_stats_offsets;
+	struct i40e_eth_stats eth_stats;
+	struct i40e_eth_stats eth_stats_offsets;
+	u64 tx_restart;
+	u64 tx_busy;
+	u64 tx_linearize;
+	u64 tx_force_wb;
+	u64 tx_stopped;
+	u64 rx_buf_failed;
+	u64 rx_page_failed;
+	u64 rx_page_reuse;
+	u64 rx_page_alloc;
+	u64 rx_page_waive;
+	u64 rx_page_busy;
+
+	/* These are containers of ring pointers, allocated at run-time */
+	struct i40e_ring **rx_rings;
+	struct i40e_ring **tx_rings;
+	struct i40e_ring **xdp_rings; /* XDP Tx rings */
+
+	u32  active_filters;
+	u32  promisc_threshold;
+
+	u16 work_limit;
+	u16 int_rate_limit;	/* value in usecs */
+
+	u16 rss_table_size;	/* HW RSS table size */
+	u16 rss_size;		/* Allocated RSS queues */
+	u8  *rss_hkey_user;	/* User configured hash keys */
+	u8  *rss_lut_user;	/* User configured lookup table entries */
+
+
+	u16 max_frame;
+	u16 rx_buf_len;
+
+	struct bpf_prog *xdp_prog;
+
+	/* List of q_vectors allocated to this VSI */
+	struct i40e_q_vector **q_vectors;
+	int num_q_vectors;
+	int base_vector;
+	bool irqs_ready;
+
+	u16 seid;		/* HW index of this VSI (absolute index) */
+	u16 id;			/* VSI number */
+	u16 uplink_seid;
+
+	u16 base_queue;		/* vsi's first queue in hw array */
+	u16 alloc_queue_pairs;	/* Allocated Tx/Rx queues */
+	u16 req_queue_pairs;	/* User requested queue pairs */
+	u16 num_queue_pairs;	/* Used tx and rx pairs */
+	u16 num_tx_desc;
+	u16 num_rx_desc;
+	enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
+	s16 vf_id;		/* Virtual function ID for SRIOV VSIs */
+
+	struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
+	struct i40e_tc_configuration tc_config;
+	struct i40e_aqc_vsi_properties_data info;
+
+	/* VSI BW limit (absolute across all TCs) */
+	u16 bw_limit;		/* VSI BW Limit (0 = disabled) */
+	u8  bw_max_quanta;	/* Max Quanta when BW limit is enabled */
+
+	/* Relative TC credits across VSIs */
+	u8  bw_ets_share_credits[I40E_MAX_TRAFFIC_CLASS];
+	/* TC BW limit credits within VSI */
+	u16  bw_ets_limit_credits[I40E_MAX_TRAFFIC_CLASS];
+	/* TC BW limit max quanta within VSI */
+	u8  bw_ets_max_quanta[I40E_MAX_TRAFFIC_CLASS];
+
+	struct i40e_pf *back;	/* Backreference to associated PF */
+	u16 idx;		/* index in pf->vsi[] */
+	u16 veb_idx;		/* index of VEB parent */
+	struct kobject *kobj;	/* sysfs object */
+	bool current_isup;	/* Sync 'link up' logging */
+	enum i40e_aq_link_speed current_speed;	/* Sync link speed logging */
+
+	/* channel specific fields */
+	u16 cnt_q_avail;	/* num of queues available for channel usage */
+	u16 orig_rss_size;
+	u16 current_rss_size;
+	bool reconfig_rss;
+
+	u16 next_base_queue;	/* next queue to be used for channel setup */
+
+	struct list_head ch_list;
+	u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS];
+
+	/* macvlan fields */
+#define I40E_MAX_MACVLANS		128 /* Max HW vectors - 1 on FVL */
+#define I40E_MIN_MACVLAN_VECTORS	2   /* Min vectors to enable macvlans */
+	DECLARE_BITMAP(fwd_bitmask, I40E_MAX_MACVLANS);
+	struct list_head macvlan_list;
+	int macvlan_cnt;
+
+	void *priv;	/* client driver data reference. */
+
+	/* VSI specific handlers */
+	irqreturn_t (*irq_handler)(int irq, void *data);
+
+	unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */
+} ____cacheline_internodealigned_in_smp;
+
+struct i40e_netdev_priv {
+	struct i40e_vsi *vsi;
+};
+
+extern struct ida i40e_client_ida;
+
+/* struct that defines an interrupt vector */
+struct i40e_q_vector {
+	struct i40e_vsi *vsi;
+
+	u16 v_idx;		/* index in the vsi->q_vector array. */
+	u16 reg_idx;		/* register index of the interrupt */
+
+	struct napi_struct napi;
+
+	struct i40e_ring_container rx;
+	struct i40e_ring_container tx;
+
+	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
+	u8 num_ringpairs;	/* total number of ring pairs in vector */
+
+	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
+
+	struct rcu_head rcu;	/* to avoid race with update stats on free */
+	char name[I40E_INT_NAME_STR_LEN];
+	bool arm_wb_state;
+	int irq_num;		/* IRQ assigned to this q_vector */
+} ____cacheline_internodealigned_in_smp;
+
+/* lan device */
+struct i40e_device {
+	struct list_head list;
+	struct i40e_pf *pf;
+};
+
+/**
+ * i40e_nvm_version_str - format the NVM version strings
+ * @hw: ptr to the hardware info
+ **/
+static inline char *i40e_nvm_version_str(struct i40e_hw *hw)
+{
+	static char buf[32];
+	u32 full_ver;
+
+	full_ver = hw->nvm.oem_ver;
+
+	if (hw->nvm.eetrack == I40E_OEM_EETRACK_ID) {
+		u8 gen, snap;
+		u16 release;
+
+		gen = (u8)(full_ver >> I40E_OEM_GEN_SHIFT);
+		snap = (u8)((full_ver & I40E_OEM_SNAP_MASK) >>
+			I40E_OEM_SNAP_SHIFT);
+		release = (u16)(full_ver & I40E_OEM_RELEASE_MASK);
+
+		snprintf(buf, sizeof(buf), "%x.%x.%x", gen, snap, release);
+	} else {
+		u8 ver, patch;
+		u16 build;
+
+		ver = (u8)(full_ver >> I40E_OEM_VER_SHIFT);
+		build = (u16)((full_ver >> I40E_OEM_VER_BUILD_SHIFT) &
+			 I40E_OEM_VER_BUILD_MASK);
+		patch = (u8)(full_ver & I40E_OEM_VER_PATCH_MASK);
+
+		snprintf(buf, sizeof(buf),
+			 "%x.%02x 0x%x %d.%d.%d",
+			 (hw->nvm.version & I40E_NVM_VERSION_HI_MASK) >>
+				I40E_NVM_VERSION_HI_SHIFT,
+			 (hw->nvm.version & I40E_NVM_VERSION_LO_MASK) >>
+				I40E_NVM_VERSION_LO_SHIFT,
+			 hw->nvm.eetrack, ver, build, patch);
+	}
+
+	return buf;
+}
+
+/**
+ * i40e_netdev_to_pf: Retrieve the PF struct for given netdev
+ * @netdev: the corresponding netdev
+ *
+ * Return the PF struct for the given netdev
+ **/
+static inline struct i40e_pf *i40e_netdev_to_pf(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	return vsi->back;
+}
+
+static inline void i40e_vsi_setup_irqhandler(struct i40e_vsi *vsi,
+				irqreturn_t (*irq_handler)(int, void *))
+{
+	vsi->irq_handler = irq_handler;
+}
+
+/**
+ * i40e_get_fd_cnt_all - get the total FD filter space available
+ * @pf: pointer to the PF struct
+ **/
+static inline int i40e_get_fd_cnt_all(struct i40e_pf *pf)
+{
+	return pf->hw.fdir_shared_filter_count + pf->fdir_pf_filter_count;
+}
+
+/**
+ * i40e_read_fd_input_set - reads value of flow director input set register
+ * @pf: pointer to the PF struct
+ * @addr: register addr
+ *
+ * This function reads value of flow director input set register
+ * specified by 'addr' (which is specific to flow-type)
+ **/
+static inline u64 i40e_read_fd_input_set(struct i40e_pf *pf, u16 addr)
+{
+	u64 val;
+
+	val = i40e_read_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 1));
+	val <<= 32;
+	val += i40e_read_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 0));
+
+	return val;
+}
+
+/**
+ * i40e_write_fd_input_set - writes value into flow director input set register
+ * @pf: pointer to the PF struct
+ * @addr: register addr
+ * @val: value to be written
+ *
+ * This function writes specified value to the register specified by 'addr'.
+ * This register is input set register based on flow-type.
+ **/
+static inline void i40e_write_fd_input_set(struct i40e_pf *pf,
+					   u16 addr, u64 val)
+{
+	i40e_write_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 1),
+			  (u32)(val >> 32));
+	i40e_write_rx_ctl(&pf->hw, I40E_PRTQF_FD_INSET(addr, 0),
+			  (u32)(val & 0xFFFFFFFFULL));
+}
+
+/**
+ * i40e_get_pf_count - get PCI PF count.
+ * @hw: pointer to a hw.
+ *
+ * Reports the function number of the highest PCI physical
+ * function plus 1 as it is loaded from the NVM.
+ *
+ * Return: PCI PF count.
+ **/
+static inline u32 i40e_get_pf_count(struct i40e_hw *hw)
+{
+	return FIELD_GET(I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK,
+			 rd32(hw, I40E_GLGEN_PCIFCNCNT));
+}
+
+/* needed by i40e_ethtool.c */
+int i40e_up(struct i40e_vsi *vsi);
+void i40e_down(struct i40e_vsi *vsi);
+extern const char i40e_driver_name[];
+void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags);
+void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired);
+int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+		       u16 rss_table_size, u16 rss_size);
+struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id);
+/**
+ * i40e_find_vsi_by_type - Find and return Flow Director VSI
+ * @pf: PF to search for VSI
+ * @type: Value indicating type of VSI we are looking for
+ **/
+static inline struct i40e_vsi *
+i40e_find_vsi_by_type(struct i40e_pf *pf, u16 type)
+{
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		struct i40e_vsi *vsi = pf->vsi[i];
+
+		if (vsi && vsi->type == type)
+			return vsi;
+	}
+
+	return NULL;
+}
+void i40e_update_stats(struct i40e_vsi *vsi);
+void i40e_update_veb_stats(struct i40e_veb *veb);
+void i40e_update_eth_stats(struct i40e_vsi *vsi);
+struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
+int i40e_fetch_switch_configuration(struct i40e_pf *pf,
+				    bool printconfig);
+
+int i40e_add_del_fdir(struct i40e_vsi *vsi,
+		      struct i40e_fdir_filter *input, bool add);
+void i40e_fdir_check_and_reenable(struct i40e_pf *pf);
+u32 i40e_get_current_fd_count(struct i40e_pf *pf);
+u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf);
+u32 i40e_get_current_atr_cnt(struct i40e_pf *pf);
+u32 i40e_get_global_fd_count(struct i40e_pf *pf);
+bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features);
+void i40e_set_ethtool_ops(struct net_device *netdev);
+struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
+					const u8 *macaddr, s16 vlan);
+void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f);
+void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan);
+int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
+struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
+				u16 uplink, u32 param1);
+int i40e_vsi_release(struct i40e_vsi *vsi);
+void i40e_service_event_schedule(struct i40e_pf *pf);
+void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
+				  u8 *msg, u16 len);
+
+int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, bool is_xdp,
+			   bool enable);
+int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable);
+int i40e_vsi_start_rings(struct i40e_vsi *vsi);
+void i40e_vsi_stop_rings(struct i40e_vsi *vsi);
+void i40e_vsi_stop_rings_no_wait(struct  i40e_vsi *vsi);
+int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi);
+int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count);
+struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
+				u16 downlink_seid, u8 enabled_tc);
+void i40e_veb_release(struct i40e_veb *veb);
+
+int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc);
+int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid);
+void i40e_vsi_remove_pvid(struct i40e_vsi *vsi);
+void i40e_vsi_reset_stats(struct i40e_vsi *vsi);
+void i40e_pf_reset_stats(struct i40e_pf *pf);
+#ifdef CONFIG_DEBUG_FS
+void i40e_dbg_pf_init(struct i40e_pf *pf);
+void i40e_dbg_pf_exit(struct i40e_pf *pf);
+void i40e_dbg_init(void);
+void i40e_dbg_exit(void);
+#else
+static inline void i40e_dbg_pf_init(struct i40e_pf *pf) {}
+static inline void i40e_dbg_pf_exit(struct i40e_pf *pf) {}
+static inline void i40e_dbg_init(void) {}
+static inline void i40e_dbg_exit(void) {}
+#endif /* CONFIG_DEBUG_FS*/
+/* needed by client drivers */
+int i40e_lan_add_device(struct i40e_pf *pf);
+int i40e_lan_del_device(struct i40e_pf *pf);
+void i40e_client_subtask(struct i40e_pf *pf);
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi);
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset);
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs);
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id);
+void i40e_client_update_msix_info(struct i40e_pf *pf);
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id);
+/**
+ * i40e_irq_dynamic_enable - Enable default interrupt generation settings
+ * @vsi: pointer to a vsi
+ * @vector: enable a particular Hw Interrupt vector, without base_vector
+ **/
+static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u32 val;
+
+	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+	      I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+	      (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
+	wr32(hw, I40E_PFINT_DYN_CTLN(vector + vsi->base_vector - 1), val);
+	/* skip the flush */
+}
+
+void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf);
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf);
+int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
+int i40e_open(struct net_device *netdev);
+int i40e_close(struct net_device *netdev);
+int i40e_vsi_open(struct i40e_vsi *vsi);
+void i40e_vlan_stripping_disable(struct i40e_vsi *vsi);
+int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
+int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid);
+void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid);
+struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
+					    const u8 *macaddr);
+int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr);
+bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
+int i40e_count_filters(struct i40e_vsi *vsi);
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
+void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
+static inline bool i40e_is_sw_dcb(struct i40e_pf *pf)
+{
+	return !!(pf->flags & I40E_FLAG_DISABLE_FW_LLDP);
+}
+
+#ifdef CONFIG_I40E_DCB
+void i40e_dcbnl_flush_apps(struct i40e_pf *pf,
+			   struct i40e_dcbx_config *old_cfg,
+			   struct i40e_dcbx_config *new_cfg);
+void i40e_dcbnl_set_all(struct i40e_vsi *vsi);
+void i40e_dcbnl_setup(struct i40e_vsi *vsi);
+bool i40e_dcb_need_reconfig(struct i40e_pf *pf,
+			    struct i40e_dcbx_config *old_cfg,
+			    struct i40e_dcbx_config *new_cfg);
+int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg);
+int i40e_dcb_sw_default_config(struct i40e_pf *pf);
+#endif /* CONFIG_I40E_DCB */
+void i40e_ptp_rx_hang(struct i40e_pf *pf);
+void i40e_ptp_tx_hang(struct i40e_pf *pf);
+void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf);
+void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index);
+void i40e_ptp_set_increment(struct i40e_pf *pf);
+int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr);
+int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr);
+void i40e_ptp_save_hw_time(struct i40e_pf *pf);
+void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
+void i40e_ptp_init(struct i40e_pf *pf);
+void i40e_ptp_stop(struct i40e_pf *pf);
+int i40e_ptp_alloc_pins(struct i40e_pf *pf);
+int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset);
+int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
+int i40e_get_partition_bw_setting(struct i40e_pf *pf);
+int i40e_set_partition_bw_setting(struct i40e_pf *pf);
+int i40e_commit_partition_bw_setting(struct i40e_pf *pf);
+void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
+
+void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
+
+static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
+{
+	return !!READ_ONCE(vsi->xdp_prog);
+}
+
+int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
+int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter,
+			      bool add);
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add);
+
+/**
+ * i40e_is_tc_mqprio_enabled - check if TC MQPRIO is enabled on PF
+ * @pf: pointer to a pf.
+ *
+ * Check and return value of flag I40E_FLAG_TC_MQPRIO.
+ *
+ * Return: I40E_FLAG_TC_MQPRIO set state.
+ **/
+static inline u32 i40e_is_tc_mqprio_enabled(struct i40e_pf *pf)
+{
+	return pf->flags & I40E_FLAG_TC_MQPRIO;
+}
+
+#endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
new file mode 100644
index 0000000000..100eb77b8d
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -0,0 +1,1192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e_type.h"
+#include "i40e_register.h"
+#include "i40e_adminq.h"
+#include "i40e_prototype.h"
+
+static void i40e_resume_aq(struct i40e_hw *hw);
+
+/**
+ *  i40e_adminq_init_regs - Initialize AdminQ registers
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the alloc_asq and alloc_arq functions have already been called
+ **/
+static void i40e_adminq_init_regs(struct i40e_hw *hw)
+{
+	/* set head and tail registers in our local struct */
+	if (i40e_is_vf(hw)) {
+		hw->aq.asq.tail = I40E_VF_ATQT1;
+		hw->aq.asq.head = I40E_VF_ATQH1;
+		hw->aq.asq.len  = I40E_VF_ATQLEN1;
+		hw->aq.asq.bal  = I40E_VF_ATQBAL1;
+		hw->aq.asq.bah  = I40E_VF_ATQBAH1;
+		hw->aq.arq.tail = I40E_VF_ARQT1;
+		hw->aq.arq.head = I40E_VF_ARQH1;
+		hw->aq.arq.len  = I40E_VF_ARQLEN1;
+		hw->aq.arq.bal  = I40E_VF_ARQBAL1;
+		hw->aq.arq.bah  = I40E_VF_ARQBAH1;
+	} else {
+		hw->aq.asq.tail = I40E_PF_ATQT;
+		hw->aq.asq.head = I40E_PF_ATQH;
+		hw->aq.asq.len  = I40E_PF_ATQLEN;
+		hw->aq.asq.bal  = I40E_PF_ATQBAL;
+		hw->aq.asq.bah  = I40E_PF_ATQBAH;
+		hw->aq.arq.tail = I40E_PF_ARQT;
+		hw->aq.arq.head = I40E_PF_ARQH;
+		hw->aq.arq.len  = I40E_PF_ARQLEN;
+		hw->aq.arq.bal  = I40E_PF_ARQBAL;
+		hw->aq.arq.bah  = I40E_PF_ARQBAH;
+	}
+}
+
+/**
+ *  i40e_alloc_adminq_asq_ring - Allocate Admin Queue send rings
+ *  @hw: pointer to the hardware structure
+ **/
+static int i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+{
+	int ret_code;
+
+	ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
+					 i40e_mem_atq_ring,
+					 (hw->aq.num_asq_entries *
+					 sizeof(struct i40e_aq_desc)),
+					 I40E_ADMINQ_DESC_ALIGNMENT);
+	if (ret_code)
+		return ret_code;
+
+	ret_code = i40e_allocate_virt_mem(hw, &hw->aq.asq.cmd_buf,
+					  (hw->aq.num_asq_entries *
+					  sizeof(struct i40e_asq_cmd_details)));
+	if (ret_code) {
+		i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+		return ret_code;
+	}
+
+	return ret_code;
+}
+
+/**
+ *  i40e_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
+ *  @hw: pointer to the hardware structure
+ **/
+static int i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
+{
+	int ret_code;
+
+	ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
+					 i40e_mem_arq_ring,
+					 (hw->aq.num_arq_entries *
+					 sizeof(struct i40e_aq_desc)),
+					 I40E_ADMINQ_DESC_ALIGNMENT);
+
+	return ret_code;
+}
+
+/**
+ *  i40e_free_adminq_asq - Free Admin Queue send rings
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the posted send buffers have already been cleaned
+ *  and de-allocated
+ **/
+static void i40e_free_adminq_asq(struct i40e_hw *hw)
+{
+	i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+}
+
+/**
+ *  i40e_free_adminq_arq - Free Admin Queue receive rings
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the posted receive buffers have already been cleaned
+ *  and de-allocated
+ **/
+static void i40e_free_adminq_arq(struct i40e_hw *hw)
+{
+	i40e_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+}
+
+/**
+ *  i40e_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
+ *  @hw: pointer to the hardware structure
+ **/
+static int i40e_alloc_arq_bufs(struct i40e_hw *hw)
+{
+	struct i40e_aq_desc *desc;
+	struct i40e_dma_mem *bi;
+	int ret_code;
+	int i;
+
+	/* We'll be allocating the buffer info memory first, then we can
+	 * allocate the mapped buffers for the event processing
+	 */
+
+	/* buffer_info structures do not need alignment */
+	ret_code = i40e_allocate_virt_mem(hw, &hw->aq.arq.dma_head,
+		(hw->aq.num_arq_entries * sizeof(struct i40e_dma_mem)));
+	if (ret_code)
+		goto alloc_arq_bufs;
+	hw->aq.arq.r.arq_bi = (struct i40e_dma_mem *)hw->aq.arq.dma_head.va;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < hw->aq.num_arq_entries; i++) {
+		bi = &hw->aq.arq.r.arq_bi[i];
+		ret_code = i40e_allocate_dma_mem(hw, bi,
+						 i40e_mem_arq_buf,
+						 hw->aq.arq_buf_size,
+						 I40E_ADMINQ_DESC_ALIGNMENT);
+		if (ret_code)
+			goto unwind_alloc_arq_bufs;
+
+		/* now configure the descriptors for use */
+		desc = I40E_ADMINQ_DESC(hw->aq.arq, i);
+
+		desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF);
+		if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
+			desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB);
+		desc->opcode = 0;
+		/* This is in accordance with Admin queue design, there is no
+		 * register for buffer size configuration
+		 */
+		desc->datalen = cpu_to_le16((u16)bi->size);
+		desc->retval = 0;
+		desc->cookie_high = 0;
+		desc->cookie_low = 0;
+		desc->params.external.addr_high =
+			cpu_to_le32(upper_32_bits(bi->pa));
+		desc->params.external.addr_low =
+			cpu_to_le32(lower_32_bits(bi->pa));
+		desc->params.external.param0 = 0;
+		desc->params.external.param1 = 0;
+	}
+
+alloc_arq_bufs:
+	return ret_code;
+
+unwind_alloc_arq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--)
+		i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+	i40e_free_virt_mem(hw, &hw->aq.arq.dma_head);
+
+	return ret_code;
+}
+
+/**
+ *  i40e_alloc_asq_bufs - Allocate empty buffer structs for the send queue
+ *  @hw: pointer to the hardware structure
+ **/
+static int i40e_alloc_asq_bufs(struct i40e_hw *hw)
+{
+	struct i40e_dma_mem *bi;
+	int ret_code;
+	int i;
+
+	/* No mapped memory needed yet, just the buffer info structures */
+	ret_code = i40e_allocate_virt_mem(hw, &hw->aq.asq.dma_head,
+		(hw->aq.num_asq_entries * sizeof(struct i40e_dma_mem)));
+	if (ret_code)
+		goto alloc_asq_bufs;
+	hw->aq.asq.r.asq_bi = (struct i40e_dma_mem *)hw->aq.asq.dma_head.va;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < hw->aq.num_asq_entries; i++) {
+		bi = &hw->aq.asq.r.asq_bi[i];
+		ret_code = i40e_allocate_dma_mem(hw, bi,
+						 i40e_mem_asq_buf,
+						 hw->aq.asq_buf_size,
+						 I40E_ADMINQ_DESC_ALIGNMENT);
+		if (ret_code)
+			goto unwind_alloc_asq_bufs;
+	}
+alloc_asq_bufs:
+	return ret_code;
+
+unwind_alloc_asq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--)
+		i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+	i40e_free_virt_mem(hw, &hw->aq.asq.dma_head);
+
+	return ret_code;
+}
+
+/**
+ *  i40e_free_arq_bufs - Free receive queue buffer info elements
+ *  @hw: pointer to the hardware structure
+ **/
+static void i40e_free_arq_bufs(struct i40e_hw *hw)
+{
+	int i;
+
+	/* free descriptors */
+	for (i = 0; i < hw->aq.num_arq_entries; i++)
+		i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+
+	/* free the descriptor memory */
+	i40e_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+
+	/* free the dma header */
+	i40e_free_virt_mem(hw, &hw->aq.arq.dma_head);
+}
+
+/**
+ *  i40e_free_asq_bufs - Free send queue buffer info elements
+ *  @hw: pointer to the hardware structure
+ **/
+static void i40e_free_asq_bufs(struct i40e_hw *hw)
+{
+	int i;
+
+	/* only unmap if the address is non-NULL */
+	for (i = 0; i < hw->aq.num_asq_entries; i++)
+		if (hw->aq.asq.r.asq_bi[i].pa)
+			i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+
+	/* free the buffer info list */
+	i40e_free_virt_mem(hw, &hw->aq.asq.cmd_buf);
+
+	/* free the descriptor memory */
+	i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+
+	/* free the dma header */
+	i40e_free_virt_mem(hw, &hw->aq.asq.dma_head);
+}
+
+/**
+ *  i40e_config_asq_regs - configure ASQ registers
+ *  @hw: pointer to the hardware structure
+ *
+ *  Configure base address and length registers for the transmit queue
+ **/
+static int i40e_config_asq_regs(struct i40e_hw *hw)
+{
+	int ret_code = 0;
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, hw->aq.asq.head, 0);
+	wr32(hw, hw->aq.asq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, hw->aq.asq.len, (hw->aq.num_asq_entries |
+				  I40E_PF_ATQLEN_ATQENABLE_MASK));
+	wr32(hw, hw->aq.asq.bal, lower_32_bits(hw->aq.asq.desc_buf.pa));
+	wr32(hw, hw->aq.asq.bah, upper_32_bits(hw->aq.asq.desc_buf.pa));
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, hw->aq.asq.bal);
+	if (reg != lower_32_bits(hw->aq.asq.desc_buf.pa))
+		ret_code = -EIO;
+
+	return ret_code;
+}
+
+/**
+ *  i40e_config_arq_regs - ARQ register configuration
+ *  @hw: pointer to the hardware structure
+ *
+ * Configure base address and length registers for the receive (event queue)
+ **/
+static int i40e_config_arq_regs(struct i40e_hw *hw)
+{
+	int ret_code = 0;
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, hw->aq.arq.head, 0);
+	wr32(hw, hw->aq.arq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, hw->aq.arq.len, (hw->aq.num_arq_entries |
+				  I40E_PF_ARQLEN_ARQENABLE_MASK));
+	wr32(hw, hw->aq.arq.bal, lower_32_bits(hw->aq.arq.desc_buf.pa));
+	wr32(hw, hw->aq.arq.bah, upper_32_bits(hw->aq.arq.desc_buf.pa));
+
+	/* Update tail in the HW to post pre-allocated buffers */
+	wr32(hw, hw->aq.arq.tail, hw->aq.num_arq_entries - 1);
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, hw->aq.arq.bal);
+	if (reg != lower_32_bits(hw->aq.arq.desc_buf.pa))
+		ret_code = -EIO;
+
+	return ret_code;
+}
+
+/**
+ *  i40e_init_asq - main initialization routine for ASQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  This is the main initialization routine for the Admin Send Queue
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.arq_buf_size
+ *
+ *  Do *NOT* hold the lock when calling this as the memory allocation routines
+ *  called are not going to be atomic context safe
+ **/
+static int i40e_init_asq(struct i40e_hw *hw)
+{
+	int ret_code = 0;
+
+	if (hw->aq.asq.count > 0) {
+		/* queue already initialized */
+		ret_code = -EBUSY;
+		goto init_adminq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if ((hw->aq.num_asq_entries == 0) ||
+	    (hw->aq.asq_buf_size == 0)) {
+		ret_code = -EIO;
+		goto init_adminq_exit;
+	}
+
+	hw->aq.asq.next_to_use = 0;
+	hw->aq.asq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = i40e_alloc_adminq_asq_ring(hw);
+	if (ret_code)
+		goto init_adminq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = i40e_alloc_asq_bufs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
+
+	/* initialize base registers */
+	ret_code = i40e_config_asq_regs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
+
+	/* success! */
+	hw->aq.asq.count = hw->aq.num_asq_entries;
+	goto init_adminq_exit;
+
+init_adminq_free_rings:
+	i40e_free_adminq_asq(hw);
+
+init_adminq_exit:
+	return ret_code;
+}
+
+/**
+ *  i40e_init_arq - initialize ARQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  The main initialization routine for the Admin Receive (Event) Queue.
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.arq_buf_size
+ *
+ *  Do *NOT* hold the lock when calling this as the memory allocation routines
+ *  called are not going to be atomic context safe
+ **/
+static int i40e_init_arq(struct i40e_hw *hw)
+{
+	int ret_code = 0;
+
+	if (hw->aq.arq.count > 0) {
+		/* queue already initialized */
+		ret_code = -EBUSY;
+		goto init_adminq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if ((hw->aq.num_arq_entries == 0) ||
+	    (hw->aq.arq_buf_size == 0)) {
+		ret_code = -EIO;
+		goto init_adminq_exit;
+	}
+
+	hw->aq.arq.next_to_use = 0;
+	hw->aq.arq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = i40e_alloc_adminq_arq_ring(hw);
+	if (ret_code)
+		goto init_adminq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = i40e_alloc_arq_bufs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
+
+	/* initialize base registers */
+	ret_code = i40e_config_arq_regs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
+
+	/* success! */
+	hw->aq.arq.count = hw->aq.num_arq_entries;
+	goto init_adminq_exit;
+
+init_adminq_free_rings:
+	i40e_free_adminq_arq(hw);
+
+init_adminq_exit:
+	return ret_code;
+}
+
+/**
+ *  i40e_shutdown_asq - shutdown the ASQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  The main shutdown routine for the Admin Send Queue
+ **/
+static int i40e_shutdown_asq(struct i40e_hw *hw)
+{
+	int ret_code = 0;
+
+	mutex_lock(&hw->aq.asq_mutex);
+
+	if (hw->aq.asq.count == 0) {
+		ret_code = -EBUSY;
+		goto shutdown_asq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, hw->aq.asq.head, 0);
+	wr32(hw, hw->aq.asq.tail, 0);
+	wr32(hw, hw->aq.asq.len, 0);
+	wr32(hw, hw->aq.asq.bal, 0);
+	wr32(hw, hw->aq.asq.bah, 0);
+
+	hw->aq.asq.count = 0; /* to indicate uninitialized queue */
+
+	/* free ring buffers */
+	i40e_free_asq_bufs(hw);
+
+shutdown_asq_out:
+	mutex_unlock(&hw->aq.asq_mutex);
+	return ret_code;
+}
+
+/**
+ *  i40e_shutdown_arq - shutdown ARQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  The main shutdown routine for the Admin Receive Queue
+ **/
+static int i40e_shutdown_arq(struct i40e_hw *hw)
+{
+	int ret_code = 0;
+
+	mutex_lock(&hw->aq.arq_mutex);
+
+	if (hw->aq.arq.count == 0) {
+		ret_code = -EBUSY;
+		goto shutdown_arq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, hw->aq.arq.head, 0);
+	wr32(hw, hw->aq.arq.tail, 0);
+	wr32(hw, hw->aq.arq.len, 0);
+	wr32(hw, hw->aq.arq.bal, 0);
+	wr32(hw, hw->aq.arq.bah, 0);
+
+	hw->aq.arq.count = 0; /* to indicate uninitialized queue */
+
+	/* free ring buffers */
+	i40e_free_arq_bufs(hw);
+
+shutdown_arq_out:
+	mutex_unlock(&hw->aq.arq_mutex);
+	return ret_code;
+}
+
+/**
+ *  i40e_set_hw_flags - set HW flags
+ *  @hw: pointer to the hardware structure
+ **/
+static void i40e_set_hw_flags(struct i40e_hw *hw)
+{
+	struct i40e_adminq_info *aq = &hw->aq;
+
+	hw->flags = 0;
+
+	switch (hw->mac.type) {
+	case I40E_MAC_XL710:
+		if (aq->api_maj_ver > 1 ||
+		    (aq->api_maj_ver == 1 &&
+		     aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710)) {
+			hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+			hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+			/* The ability to RX (not drop) 802.1ad frames */
+			hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+		}
+		break;
+	case I40E_MAC_X722:
+		hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE |
+			     I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+
+		if (aq->api_maj_ver > 1 ||
+		    (aq->api_maj_ver == 1 &&
+		     aq->api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722))
+			hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+
+		if (aq->api_maj_ver > 1 ||
+		    (aq->api_maj_ver == 1 &&
+		     aq->api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_X722))
+			hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+
+		if (aq->api_maj_ver > 1 ||
+		    (aq->api_maj_ver == 1 &&
+		     aq->api_min_ver >= I40E_MINOR_VER_FW_REQUEST_FEC_X722))
+			hw->flags |= I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE;
+
+		fallthrough;
+	default:
+		break;
+	}
+
+	/* Newer versions of firmware require lock when reading the NVM */
+	if (aq->api_maj_ver > 1 ||
+	    (aq->api_maj_ver == 1 &&
+	     aq->api_min_ver >= 5))
+		hw->flags |= I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK;
+
+	if (aq->api_maj_ver > 1 ||
+	    (aq->api_maj_ver == 1 &&
+	     aq->api_min_ver >= 8)) {
+		hw->flags |= I40E_HW_FLAG_FW_LLDP_PERSISTENT;
+		hw->flags |= I40E_HW_FLAG_DROP_MODE;
+	}
+
+	if (aq->api_maj_ver > 1 ||
+	    (aq->api_maj_ver == 1 &&
+	     aq->api_min_ver >= 9))
+		hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED;
+}
+
+/**
+ *  i40e_init_adminq - main initialization routine for Admin Queue
+ *  @hw: pointer to the hardware structure
+ *
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.num_arq_entries
+ *     - hw->aq.arq_buf_size
+ *     - hw->aq.asq_buf_size
+ **/
+int i40e_init_adminq(struct i40e_hw *hw)
+{
+	u16 cfg_ptr, oem_hi, oem_lo;
+	u16 eetrack_lo, eetrack_hi;
+	int retry = 0;
+	int ret_code;
+
+	/* verify input for valid configuration */
+	if ((hw->aq.num_arq_entries == 0) ||
+	    (hw->aq.num_asq_entries == 0) ||
+	    (hw->aq.arq_buf_size == 0) ||
+	    (hw->aq.asq_buf_size == 0)) {
+		ret_code = -EIO;
+		goto init_adminq_exit;
+	}
+
+	/* Set up register offsets */
+	i40e_adminq_init_regs(hw);
+
+	/* setup ASQ command write back timeout */
+	hw->aq.asq_cmd_timeout = I40E_ASQ_CMD_TIMEOUT;
+
+	/* allocate the ASQ */
+	ret_code = i40e_init_asq(hw);
+	if (ret_code)
+		goto init_adminq_destroy_locks;
+
+	/* allocate the ARQ */
+	ret_code = i40e_init_arq(hw);
+	if (ret_code)
+		goto init_adminq_free_asq;
+
+	/* There are some cases where the firmware may not be quite ready
+	 * for AdminQ operations, so we retry the AdminQ setup a few times
+	 * if we see timeouts in this first AQ call.
+	 */
+	do {
+		ret_code = i40e_aq_get_firmware_version(hw,
+							&hw->aq.fw_maj_ver,
+							&hw->aq.fw_min_ver,
+							&hw->aq.fw_build,
+							&hw->aq.api_maj_ver,
+							&hw->aq.api_min_ver,
+							NULL);
+		if (ret_code != -EIO)
+			break;
+		retry++;
+		msleep(100);
+		i40e_resume_aq(hw);
+	} while (retry < 10);
+	if (ret_code != 0)
+		goto init_adminq_free_arq;
+
+	/* Some features were introduced in different FW API version
+	 * for different MAC type.
+	 */
+	i40e_set_hw_flags(hw);
+
+	/* get the NVM version info */
+	i40e_read_nvm_word(hw, I40E_SR_NVM_DEV_STARTER_VERSION,
+			   &hw->nvm.version);
+	i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_LO, &eetrack_lo);
+	i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_HI, &eetrack_hi);
+	hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+	i40e_read_nvm_word(hw, I40E_SR_BOOT_CONFIG_PTR, &cfg_ptr);
+	i40e_read_nvm_word(hw, (cfg_ptr + I40E_NVM_OEM_VER_OFF),
+			   &oem_hi);
+	i40e_read_nvm_word(hw, (cfg_ptr + (I40E_NVM_OEM_VER_OFF + 1)),
+			   &oem_lo);
+	hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo;
+
+	if (hw->mac.type == I40E_MAC_XL710 &&
+	    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+	    hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+		hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+		hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+	}
+	if (hw->mac.type == I40E_MAC_X722 &&
+	    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+	    hw->aq.api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722) {
+		hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+	}
+
+	/* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
+	if (hw->aq.api_maj_ver > 1 ||
+	    (hw->aq.api_maj_ver == 1 &&
+	     hw->aq.api_min_ver >= 7))
+		hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+
+	if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
+		ret_code = -EIO;
+		goto init_adminq_free_arq;
+	}
+
+	/* pre-emptive resource lock release */
+	i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+	hw->nvm_release_on_done = false;
+	hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+
+	ret_code = 0;
+
+	/* success! */
+	goto init_adminq_exit;
+
+init_adminq_free_arq:
+	i40e_shutdown_arq(hw);
+init_adminq_free_asq:
+	i40e_shutdown_asq(hw);
+init_adminq_destroy_locks:
+
+init_adminq_exit:
+	return ret_code;
+}
+
+/**
+ *  i40e_shutdown_adminq - shutdown routine for the Admin Queue
+ *  @hw: pointer to the hardware structure
+ **/
+void i40e_shutdown_adminq(struct i40e_hw *hw)
+{
+	if (i40e_check_asq_alive(hw))
+		i40e_aq_queue_shutdown(hw, true);
+
+	i40e_shutdown_asq(hw);
+	i40e_shutdown_arq(hw);
+
+	if (hw->nvm_buff.va)
+		i40e_free_virt_mem(hw, &hw->nvm_buff);
+}
+
+/**
+ *  i40e_clean_asq - cleans Admin send queue
+ *  @hw: pointer to the hardware structure
+ *
+ *  returns the number of free desc
+ **/
+static u16 i40e_clean_asq(struct i40e_hw *hw)
+{
+	struct i40e_adminq_ring *asq = &(hw->aq.asq);
+	struct i40e_asq_cmd_details *details;
+	u16 ntc = asq->next_to_clean;
+	struct i40e_aq_desc desc_cb;
+	struct i40e_aq_desc *desc;
+
+	desc = I40E_ADMINQ_DESC(*asq, ntc);
+	details = I40E_ADMINQ_DETAILS(*asq, ntc);
+	while (rd32(hw, hw->aq.asq.head) != ntc) {
+		i40e_debug(hw, I40E_DEBUG_AQ_COMMAND,
+			   "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head));
+
+		if (details->callback) {
+			I40E_ADMINQ_CALLBACK cb_func =
+					(I40E_ADMINQ_CALLBACK)details->callback;
+			desc_cb = *desc;
+			cb_func(hw, &desc_cb);
+		}
+		memset(desc, 0, sizeof(*desc));
+		memset(details, 0, sizeof(*details));
+		ntc++;
+		if (ntc == asq->count)
+			ntc = 0;
+		desc = I40E_ADMINQ_DESC(*asq, ntc);
+		details = I40E_ADMINQ_DETAILS(*asq, ntc);
+	}
+
+	asq->next_to_clean = ntc;
+
+	return I40E_DESC_UNUSED(asq);
+}
+
+/**
+ *  i40e_asq_done - check if FW has processed the Admin Send Queue
+ *  @hw: pointer to the hw struct
+ *
+ *  Returns true if the firmware has processed all descriptors on the
+ *  admin send queue. Returns false if there are still requests pending.
+ **/
+static bool i40e_asq_done(struct i40e_hw *hw)
+{
+	/* AQ designers suggest use of head for better
+	 * timing reliability than DD bit
+	 */
+	return rd32(hw, hw->aq.asq.head) == hw->aq.asq.next_to_use;
+
+}
+
+/**
+ *  i40e_asq_send_command_atomic_exec - send command to Admin Queue
+ *  @hw: pointer to the hw struct
+ *  @desc: prefilled descriptor describing the command (non DMA mem)
+ *  @buff: buffer to use for indirect commands
+ *  @buff_size: size of buffer for indirect commands
+ *  @cmd_details: pointer to command details structure
+ *  @is_atomic_context: is the function called in an atomic context?
+ *
+ *  This is the main send command driver routine for the Admin Queue send
+ *  queue.  It runs the queue, cleans the queue, etc
+ **/
+static int
+i40e_asq_send_command_atomic_exec(struct i40e_hw *hw,
+				  struct i40e_aq_desc *desc,
+				  void *buff, /* can be NULL */
+				  u16  buff_size,
+				  struct i40e_asq_cmd_details *cmd_details,
+				  bool is_atomic_context)
+{
+	struct i40e_dma_mem *dma_buff = NULL;
+	struct i40e_asq_cmd_details *details;
+	struct i40e_aq_desc *desc_on_ring;
+	bool cmd_completed = false;
+	u16  retval = 0;
+	int status = 0;
+	u32  val = 0;
+
+	if (hw->aq.asq.count == 0) {
+		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+			   "AQTX: Admin queue not initialized.\n");
+		status = -EIO;
+		goto asq_send_command_error;
+	}
+
+	hw->aq.asq_last_status = I40E_AQ_RC_OK;
+
+	val = rd32(hw, hw->aq.asq.head);
+	if (val >= hw->aq.num_asq_entries) {
+		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+			   "AQTX: head overrun at %d\n", val);
+		status = -ENOSPC;
+		goto asq_send_command_error;
+	}
+
+	details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
+	if (cmd_details) {
+		*details = *cmd_details;
+
+		/* If the cmd_details are defined copy the cookie.  The
+		 * cpu_to_le32 is not needed here because the data is ignored
+		 * by the FW, only used by the driver
+		 */
+		if (details->cookie) {
+			desc->cookie_high =
+				cpu_to_le32(upper_32_bits(details->cookie));
+			desc->cookie_low =
+				cpu_to_le32(lower_32_bits(details->cookie));
+		}
+	} else {
+		memset(details, 0, sizeof(struct i40e_asq_cmd_details));
+	}
+
+	/* clear requested flags and then set additional flags if defined */
+	desc->flags &= ~cpu_to_le16(details->flags_dis);
+	desc->flags |= cpu_to_le16(details->flags_ena);
+
+	if (buff_size > hw->aq.asq_buf_size) {
+		i40e_debug(hw,
+			   I40E_DEBUG_AQ_MESSAGE,
+			   "AQTX: Invalid buffer size: %d.\n",
+			   buff_size);
+		status = -EINVAL;
+		goto asq_send_command_error;
+	}
+
+	if (details->postpone && !details->async) {
+		i40e_debug(hw,
+			   I40E_DEBUG_AQ_MESSAGE,
+			   "AQTX: Async flag not set along with postpone flag");
+		status = -EINVAL;
+		goto asq_send_command_error;
+	}
+
+	/* call clean and check queue available function to reclaim the
+	 * descriptors that were processed by FW, the function returns the
+	 * number of desc available
+	 */
+	/* the clean function called here could be called in a separate thread
+	 * in case of asynchronous completions
+	 */
+	if (i40e_clean_asq(hw) == 0) {
+		i40e_debug(hw,
+			   I40E_DEBUG_AQ_MESSAGE,
+			   "AQTX: Error queue is full.\n");
+		status = -ENOSPC;
+		goto asq_send_command_error;
+	}
+
+	/* initialize the temp desc pointer with the right desc */
+	desc_on_ring = I40E_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use);
+
+	/* if the desc is available copy the temp desc to the right place */
+	*desc_on_ring = *desc;
+
+	/* if buff is not NULL assume indirect command */
+	if (buff != NULL) {
+		dma_buff = &(hw->aq.asq.r.asq_bi[hw->aq.asq.next_to_use]);
+		/* copy the user buff into the respective DMA buff */
+		memcpy(dma_buff->va, buff, buff_size);
+		desc_on_ring->datalen = cpu_to_le16(buff_size);
+
+		/* Update the address values in the desc with the pa value
+		 * for respective buffer
+		 */
+		desc_on_ring->params.external.addr_high =
+				cpu_to_le32(upper_32_bits(dma_buff->pa));
+		desc_on_ring->params.external.addr_low =
+				cpu_to_le32(lower_32_bits(dma_buff->pa));
+	}
+
+	/* bump the tail */
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQTX: desc and buffer:\n");
+	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
+		      buff, buff_size);
+	(hw->aq.asq.next_to_use)++;
+	if (hw->aq.asq.next_to_use == hw->aq.asq.count)
+		hw->aq.asq.next_to_use = 0;
+	if (!details->postpone)
+		wr32(hw, hw->aq.asq.tail, hw->aq.asq.next_to_use);
+
+	/* if cmd_details are not defined or async flag is not set,
+	 * we need to wait for desc write back
+	 */
+	if (!details->async && !details->postpone) {
+		u32 total_delay = 0;
+
+		do {
+			/* AQ designers suggest use of head for better
+			 * timing reliability than DD bit
+			 */
+			if (i40e_asq_done(hw))
+				break;
+
+			if (is_atomic_context)
+				udelay(50);
+			else
+				usleep_range(40, 60);
+
+			total_delay += 50;
+		} while (total_delay < hw->aq.asq_cmd_timeout);
+	}
+
+	/* if ready, copy the desc back to temp */
+	if (i40e_asq_done(hw)) {
+		*desc = *desc_on_ring;
+		if (buff != NULL)
+			memcpy(buff, dma_buff->va, buff_size);
+		retval = le16_to_cpu(desc->retval);
+		if (retval != 0) {
+			i40e_debug(hw,
+				   I40E_DEBUG_AQ_MESSAGE,
+				   "AQTX: Command completed with error 0x%X.\n",
+				   retval);
+
+			/* strip off FW internal code */
+			retval &= 0xff;
+		}
+		cmd_completed = true;
+		if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_OK)
+			status = 0;
+		else if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_EBUSY)
+			status = -EBUSY;
+		else
+			status = -EIO;
+		hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
+	}
+
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND,
+		   "AQTX: desc and buffer writeback:\n");
+	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, buff, buff_size);
+
+	/* save writeback aq if requested */
+	if (details->wb_desc)
+		*details->wb_desc = *desc_on_ring;
+
+	/* update the error if time out occurred */
+	if ((!cmd_completed) &&
+	    (!details->async && !details->postpone)) {
+		if (rd32(hw, hw->aq.asq.len) & I40E_GL_ATQLEN_ATQCRIT_MASK) {
+			i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+				   "AQTX: AQ Critical error.\n");
+			status = -EIO;
+		} else {
+			i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+				   "AQTX: Writeback timeout.\n");
+			status = -EIO;
+		}
+	}
+
+asq_send_command_error:
+	return status;
+}
+
+/**
+ *  i40e_asq_send_command_atomic - send command to Admin Queue
+ *  @hw: pointer to the hw struct
+ *  @desc: prefilled descriptor describing the command (non DMA mem)
+ *  @buff: buffer to use for indirect commands
+ *  @buff_size: size of buffer for indirect commands
+ *  @cmd_details: pointer to command details structure
+ *  @is_atomic_context: is the function called in an atomic context?
+ *
+ *  Acquires the lock and calls the main send command execution
+ *  routine.
+ **/
+int
+i40e_asq_send_command_atomic(struct i40e_hw *hw,
+			     struct i40e_aq_desc *desc,
+			     void *buff, /* can be NULL */
+			     u16  buff_size,
+			     struct i40e_asq_cmd_details *cmd_details,
+			     bool is_atomic_context)
+{
+	int status;
+
+	mutex_lock(&hw->aq.asq_mutex);
+	status = i40e_asq_send_command_atomic_exec(hw, desc, buff, buff_size,
+						   cmd_details,
+						   is_atomic_context);
+
+	mutex_unlock(&hw->aq.asq_mutex);
+	return status;
+}
+
+int
+i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+		      void *buff, /* can be NULL */ u16  buff_size,
+		      struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_asq_send_command_atomic(hw, desc, buff, buff_size,
+					    cmd_details, false);
+}
+
+/**
+ *  i40e_asq_send_command_atomic_v2 - send command to Admin Queue
+ *  @hw: pointer to the hw struct
+ *  @desc: prefilled descriptor describing the command (non DMA mem)
+ *  @buff: buffer to use for indirect commands
+ *  @buff_size: size of buffer for indirect commands
+ *  @cmd_details: pointer to command details structure
+ *  @is_atomic_context: is the function called in an atomic context?
+ *  @aq_status: pointer to Admin Queue status return value
+ *
+ *  Acquires the lock and calls the main send command execution
+ *  routine. Returns the last Admin Queue status in aq_status
+ *  to avoid race conditions in access to hw->aq.asq_last_status.
+ **/
+int
+i40e_asq_send_command_atomic_v2(struct i40e_hw *hw,
+				struct i40e_aq_desc *desc,
+				void *buff, /* can be NULL */
+				u16  buff_size,
+				struct i40e_asq_cmd_details *cmd_details,
+				bool is_atomic_context,
+				enum i40e_admin_queue_err *aq_status)
+{
+	int status;
+
+	mutex_lock(&hw->aq.asq_mutex);
+	status = i40e_asq_send_command_atomic_exec(hw, desc, buff,
+						   buff_size,
+						   cmd_details,
+						   is_atomic_context);
+	if (aq_status)
+		*aq_status = hw->aq.asq_last_status;
+	mutex_unlock(&hw->aq.asq_mutex);
+	return status;
+}
+
+int
+i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+			 void *buff, /* can be NULL */ u16  buff_size,
+			 struct i40e_asq_cmd_details *cmd_details,
+			 enum i40e_admin_queue_err *aq_status)
+{
+	return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size,
+					       cmd_details, true, aq_status);
+}
+
+/**
+ *  i40e_fill_default_direct_cmd_desc - AQ descriptor helper function
+ *  @desc:     pointer to the temp descriptor (non DMA mem)
+ *  @opcode:   the opcode can be used to decide which flags to turn off or on
+ *
+ *  Fill the desc with default values
+ **/
+void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
+				       u16 opcode)
+{
+	/* zero out the desc */
+	memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flags = cpu_to_le16(I40E_AQ_FLAG_SI);
+}
+
+/**
+ *  i40e_clean_arq_element
+ *  @hw: pointer to the hw struct
+ *  @e: event info from the receive descriptor, includes any buffers
+ *  @pending: number of events that could be left to process
+ *
+ *  This function cleans one Admin Receive Queue element and returns
+ *  the contents through e.  It can also return how many events are
+ *  left to process through 'pending'
+ **/
+int i40e_clean_arq_element(struct i40e_hw *hw,
+			   struct i40e_arq_event_info *e,
+			   u16 *pending)
+{
+	u16 ntc = hw->aq.arq.next_to_clean;
+	struct i40e_aq_desc *desc;
+	struct i40e_dma_mem *bi;
+	int ret_code = 0;
+	u16 desc_idx;
+	u16 datalen;
+	u16 flags;
+	u16 ntu;
+
+	/* pre-clean the event info */
+	memset(&e->desc, 0, sizeof(e->desc));
+
+	/* take the lock before we start messing with the ring */
+	mutex_lock(&hw->aq.arq_mutex);
+
+	if (hw->aq.arq.count == 0) {
+		i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+			   "AQRX: Admin queue not initialized.\n");
+		ret_code = -EIO;
+		goto clean_arq_element_err;
+	}
+
+	/* set next_to_use to head */
+	ntu = rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK;
+	if (ntu == ntc) {
+		/* nothing to do - shouldn't need to update ring's values */
+		ret_code = -EALREADY;
+		goto clean_arq_element_out;
+	}
+
+	/* now clean the next descriptor */
+	desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
+	desc_idx = ntc;
+
+	hw->aq.arq_last_status =
+		(enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
+	flags = le16_to_cpu(desc->flags);
+	if (flags & I40E_AQ_FLAG_ERR) {
+		ret_code = -EIO;
+		i40e_debug(hw,
+			   I40E_DEBUG_AQ_MESSAGE,
+			   "AQRX: Event received with error 0x%X.\n",
+			   hw->aq.arq_last_status);
+	}
+
+	e->desc = *desc;
+	datalen = le16_to_cpu(desc->datalen);
+	e->msg_len = min(datalen, e->buf_len);
+	if (e->msg_buf != NULL && (e->msg_len != 0))
+		memcpy(e->msg_buf, hw->aq.arq.r.arq_bi[desc_idx].va,
+		       e->msg_len);
+
+	i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQRX: desc and buffer:\n");
+	i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf,
+		      hw->aq.arq_buf_size);
+
+	/* Restore the original datalen and buffer address in the desc,
+	 * FW updates datalen to indicate the event message
+	 * size
+	 */
+	bi = &hw->aq.arq.r.arq_bi[ntc];
+	memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+
+	desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF);
+	if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
+		desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB);
+	desc->datalen = cpu_to_le16((u16)bi->size);
+	desc->params.external.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+	desc->params.external.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+	/* set tail = the last cleaned desc index. */
+	wr32(hw, hw->aq.arq.tail, ntc);
+	/* ntc is updated to tail + 1 */
+	ntc++;
+	if (ntc == hw->aq.num_arq_entries)
+		ntc = 0;
+	hw->aq.arq.next_to_clean = ntc;
+	hw->aq.arq.next_to_use = ntu;
+
+	i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode), &e->desc);
+clean_arq_element_out:
+	/* Set pending if needed, unlock and return */
+	if (pending)
+		*pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
+clean_arq_element_err:
+	mutex_unlock(&hw->aq.arq_mutex);
+
+	return ret_code;
+}
+
+static void i40e_resume_aq(struct i40e_hw *hw)
+{
+	/* Registers are reset after PF reset */
+	hw->aq.asq.next_to_use = 0;
+	hw->aq.asq.next_to_clean = 0;
+
+	i40e_config_asq_regs(hw);
+
+	hw->aq.arq.next_to_use = 0;
+	hw->aq.arq.next_to_clean = 0;
+
+	i40e_config_arq_regs(hw);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
new file mode 100644
index 0000000000..267f2e0a21
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_ADMINQ_H_
+#define _I40E_ADMINQ_H_
+
+#include "i40e_osdep.h"
+#include "i40e_adminq_cmd.h"
+
+#define I40E_ADMINQ_DESC(R, i)   \
+	(&(((struct i40e_aq_desc *)((R).desc_buf.va))[i]))
+
+#define I40E_ADMINQ_DESC_ALIGNMENT 4096
+
+struct i40e_adminq_ring {
+	struct i40e_virt_mem dma_head;	/* space for dma structures */
+	struct i40e_dma_mem desc_buf;	/* descriptor ring memory */
+	struct i40e_virt_mem cmd_buf;	/* command buffer memory */
+
+	union {
+		struct i40e_dma_mem *asq_bi;
+		struct i40e_dma_mem *arq_bi;
+	} r;
+
+	u16 count;		/* Number of descriptors */
+	u16 rx_buf_len;		/* Admin Receive Queue buffer length */
+
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	u32 len;
+	u32 bah;
+	u32 bal;
+};
+
+/* ASQ transaction details */
+struct i40e_asq_cmd_details {
+	void *callback; /* cast from type I40E_ADMINQ_CALLBACK */
+	u64 cookie;
+	u16 flags_ena;
+	u16 flags_dis;
+	bool async;
+	bool postpone;
+	struct i40e_aq_desc *wb_desc;
+};
+
+#define I40E_ADMINQ_DETAILS(R, i)   \
+	(&(((struct i40e_asq_cmd_details *)((R).cmd_buf.va))[i]))
+
+/* ARQ event information */
+struct i40e_arq_event_info {
+	struct i40e_aq_desc desc;
+	u16 msg_len;
+	u16 buf_len;
+	u8 *msg_buf;
+};
+
+/* Admin Queue information */
+struct i40e_adminq_info {
+	struct i40e_adminq_ring arq;    /* receive queue */
+	struct i40e_adminq_ring asq;    /* send queue */
+	u32 asq_cmd_timeout;            /* send queue cmd write back timeout*/
+	u16 num_arq_entries;            /* receive queue depth */
+	u16 num_asq_entries;            /* send queue depth */
+	u16 arq_buf_size;               /* receive queue buffer size */
+	u16 asq_buf_size;               /* send queue buffer size */
+	u16 fw_maj_ver;                 /* firmware major version */
+	u16 fw_min_ver;                 /* firmware minor version */
+	u32 fw_build;                   /* firmware build number */
+	u16 api_maj_ver;                /* api major version */
+	u16 api_min_ver;                /* api minor version */
+
+	struct mutex asq_mutex; /* Send queue lock */
+	struct mutex arq_mutex; /* Receive queue lock */
+
+	/* last status values on send and receive queues */
+	enum i40e_admin_queue_err asq_last_status;
+	enum i40e_admin_queue_err arq_last_status;
+};
+
+/**
+ * i40e_aq_rc_to_posix - convert errors to user-land codes
+ * @aq_ret: AdminQ handler error code can override aq_rc
+ * @aq_rc: AdminQ firmware error code to convert
+ **/
+static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc)
+{
+	int aq_to_posix[] = {
+		0,           /* I40E_AQ_RC_OK */
+		-EPERM,      /* I40E_AQ_RC_EPERM */
+		-ENOENT,     /* I40E_AQ_RC_ENOENT */
+		-ESRCH,      /* I40E_AQ_RC_ESRCH */
+		-EINTR,      /* I40E_AQ_RC_EINTR */
+		-EIO,        /* I40E_AQ_RC_EIO */
+		-ENXIO,      /* I40E_AQ_RC_ENXIO */
+		-E2BIG,      /* I40E_AQ_RC_E2BIG */
+		-EAGAIN,     /* I40E_AQ_RC_EAGAIN */
+		-ENOMEM,     /* I40E_AQ_RC_ENOMEM */
+		-EACCES,     /* I40E_AQ_RC_EACCES */
+		-EFAULT,     /* I40E_AQ_RC_EFAULT */
+		-EBUSY,      /* I40E_AQ_RC_EBUSY */
+		-EEXIST,     /* I40E_AQ_RC_EEXIST */
+		-EINVAL,     /* I40E_AQ_RC_EINVAL */
+		-ENOTTY,     /* I40E_AQ_RC_ENOTTY */
+		-ENOSPC,     /* I40E_AQ_RC_ENOSPC */
+		-ENOSYS,     /* I40E_AQ_RC_ENOSYS */
+		-ERANGE,     /* I40E_AQ_RC_ERANGE */
+		-EPIPE,      /* I40E_AQ_RC_EFLUSHED */
+		-ESPIPE,     /* I40E_AQ_RC_BAD_ADDR */
+		-EROFS,      /* I40E_AQ_RC_EMODE */
+		-EFBIG,      /* I40E_AQ_RC_EFBIG */
+	};
+
+	/* aq_rc is invalid if AQ timed out */
+	if (aq_ret == -EIO)
+		return -EAGAIN;
+
+	if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
+		return -ERANGE;
+
+	return aq_to_posix[aq_rc];
+}
+
+/* general information */
+#define I40E_AQ_LARGE_BUF	512
+#define I40E_ASQ_CMD_TIMEOUT	250000  /* usecs */
+
+void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
+				       u16 opcode);
+
+#endif /* _I40E_ADMINQ_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
new file mode 100644
index 0000000000..3357d65a90
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -0,0 +1,2432 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#ifndef _I40E_ADMINQ_CMD_H_
+#define _I40E_ADMINQ_CMD_H_
+
+/* This header file defines the i40e Admin Queue commands and is shared between
+ * i40e Firmware and Software.
+ *
+ * This file needs to comply with the Linux Kernel coding style.
+ */
+
+#define I40E_FW_API_VERSION_MAJOR	0x0001
+#define I40E_FW_API_VERSION_MINOR_X722	0x000C
+#define I40E_FW_API_VERSION_MINOR_X710	0x000F
+
+#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
+					I40E_FW_API_VERSION_MINOR_X710 : \
+					I40E_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs  */
+#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
+/* API version 1.9 for X722 implements additional link and PHY-specific APIs */
+#define I40E_MINOR_VER_GET_LINK_INFO_X722 0x0009
+/* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */
+#define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006
+/* API version 1.10 for X722 devices adds ability to request FEC encoding */
+#define I40E_MINOR_VER_FW_REQUEST_FEC_X722 0x000A
+
+struct i40e_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 param2;
+			__le32 param3;
+		} internal;
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 addr_high;
+			__le32 addr_low;
+		} external;
+		u8 raw[16];
+	} params;
+};
+
+/* Flags sub-structure
+ * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR|VFE| * *  RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
+ */
+
+/* command flags and offsets*/
+#define I40E_AQ_FLAG_ERR_SHIFT	2
+#define I40E_AQ_FLAG_LB_SHIFT	9
+#define I40E_AQ_FLAG_RD_SHIFT	10
+#define I40E_AQ_FLAG_BUF_SHIFT	12
+#define I40E_AQ_FLAG_SI_SHIFT	13
+
+#define I40E_AQ_FLAG_ERR	BIT(I40E_AQ_FLAG_ERR_SHIFT) /* 0x4    */
+#define I40E_AQ_FLAG_LB		BIT(I40E_AQ_FLAG_LB_SHIFT)  /* 0x200  */
+#define I40E_AQ_FLAG_RD		BIT(I40E_AQ_FLAG_RD_SHIFT)  /* 0x400  */
+#define I40E_AQ_FLAG_BUF	BIT(I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
+#define I40E_AQ_FLAG_SI		BIT(I40E_AQ_FLAG_SI_SHIFT)  /* 0x2000 */
+
+/* error codes */
+enum i40e_admin_queue_err {
+	I40E_AQ_RC_OK		= 0,  /* success */
+	I40E_AQ_RC_EPERM	= 1,  /* Operation not permitted */
+	I40E_AQ_RC_ENOENT	= 2,  /* No such element */
+	I40E_AQ_RC_ESRCH	= 3,  /* Bad opcode */
+	I40E_AQ_RC_EINTR	= 4,  /* operation interrupted */
+	I40E_AQ_RC_EIO		= 5,  /* I/O error */
+	I40E_AQ_RC_ENXIO	= 6,  /* No such resource */
+	I40E_AQ_RC_E2BIG	= 7,  /* Arg too long */
+	I40E_AQ_RC_EAGAIN	= 8,  /* Try again */
+	I40E_AQ_RC_ENOMEM	= 9,  /* Out of memory */
+	I40E_AQ_RC_EACCES	= 10, /* Permission denied */
+	I40E_AQ_RC_EFAULT	= 11, /* Bad address */
+	I40E_AQ_RC_EBUSY	= 12, /* Device or resource busy */
+	I40E_AQ_RC_EEXIST	= 13, /* object already exists */
+	I40E_AQ_RC_EINVAL	= 14, /* Invalid argument */
+	I40E_AQ_RC_ENOTTY	= 15, /* Not a typewriter */
+	I40E_AQ_RC_ENOSPC	= 16, /* No space left or alloc failure */
+	I40E_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	I40E_AQ_RC_ERANGE	= 18, /* Parameter out of range */
+	I40E_AQ_RC_EFLUSHED	= 19, /* Cmd flushed due to prev cmd error */
+	I40E_AQ_RC_BAD_ADDR	= 20, /* Descriptor contains a bad pointer */
+	I40E_AQ_RC_EMODE	= 21, /* Op not allowed in current dev mode */
+	I40E_AQ_RC_EFBIG	= 22, /* File too large */
+};
+
+/* Admin Queue command opcodes */
+enum i40e_admin_queue_opc {
+	/* aq commands */
+	i40e_aqc_opc_get_version	= 0x0001,
+	i40e_aqc_opc_driver_version	= 0x0002,
+	i40e_aqc_opc_queue_shutdown	= 0x0003,
+	i40e_aqc_opc_set_pf_context	= 0x0004,
+
+	/* resource ownership */
+	i40e_aqc_opc_request_resource	= 0x0008,
+	i40e_aqc_opc_release_resource	= 0x0009,
+
+	i40e_aqc_opc_list_func_capabilities	= 0x000A,
+	i40e_aqc_opc_list_dev_capabilities	= 0x000B,
+
+	/* Proxy commands */
+	i40e_aqc_opc_set_proxy_config		= 0x0104,
+	i40e_aqc_opc_set_ns_proxy_table_entry	= 0x0105,
+
+	/* LAA */
+	i40e_aqc_opc_mac_address_read	= 0x0107,
+	i40e_aqc_opc_mac_address_write	= 0x0108,
+
+	/* PXE */
+	i40e_aqc_opc_clear_pxe_mode	= 0x0110,
+
+	/* WoL commands */
+	i40e_aqc_opc_set_wol_filter	= 0x0120,
+	i40e_aqc_opc_get_wake_reason	= 0x0121,
+
+	/* internal switch commands */
+	i40e_aqc_opc_get_switch_config		= 0x0200,
+	i40e_aqc_opc_add_statistics		= 0x0201,
+	i40e_aqc_opc_remove_statistics		= 0x0202,
+	i40e_aqc_opc_set_port_parameters	= 0x0203,
+	i40e_aqc_opc_get_switch_resource_alloc	= 0x0204,
+	i40e_aqc_opc_set_switch_config		= 0x0205,
+	i40e_aqc_opc_rx_ctl_reg_read		= 0x0206,
+	i40e_aqc_opc_rx_ctl_reg_write		= 0x0207,
+
+	i40e_aqc_opc_add_vsi			= 0x0210,
+	i40e_aqc_opc_update_vsi_parameters	= 0x0211,
+	i40e_aqc_opc_get_vsi_parameters		= 0x0212,
+
+	i40e_aqc_opc_add_pv			= 0x0220,
+	i40e_aqc_opc_update_pv_parameters	= 0x0221,
+	i40e_aqc_opc_get_pv_parameters		= 0x0222,
+
+	i40e_aqc_opc_add_veb			= 0x0230,
+	i40e_aqc_opc_update_veb_parameters	= 0x0231,
+	i40e_aqc_opc_get_veb_parameters		= 0x0232,
+
+	i40e_aqc_opc_delete_element		= 0x0243,
+
+	i40e_aqc_opc_add_macvlan		= 0x0250,
+	i40e_aqc_opc_remove_macvlan		= 0x0251,
+	i40e_aqc_opc_add_vlan			= 0x0252,
+	i40e_aqc_opc_remove_vlan		= 0x0253,
+	i40e_aqc_opc_set_vsi_promiscuous_modes	= 0x0254,
+	i40e_aqc_opc_add_tag			= 0x0255,
+	i40e_aqc_opc_remove_tag			= 0x0256,
+	i40e_aqc_opc_add_multicast_etag		= 0x0257,
+	i40e_aqc_opc_remove_multicast_etag	= 0x0258,
+	i40e_aqc_opc_update_tag			= 0x0259,
+	i40e_aqc_opc_add_control_packet_filter	= 0x025A,
+	i40e_aqc_opc_remove_control_packet_filter	= 0x025B,
+	i40e_aqc_opc_add_cloud_filters		= 0x025C,
+	i40e_aqc_opc_remove_cloud_filters	= 0x025D,
+	i40e_aqc_opc_clear_wol_switch_filters	= 0x025E,
+
+	i40e_aqc_opc_add_mirror_rule	= 0x0260,
+	i40e_aqc_opc_delete_mirror_rule	= 0x0261,
+
+	/* Dynamic Device Personalization */
+	i40e_aqc_opc_write_personalization_profile	= 0x0270,
+	i40e_aqc_opc_get_personalization_profile_list	= 0x0271,
+
+	/* DCB commands */
+	i40e_aqc_opc_dcb_ignore_pfc	= 0x0301,
+	i40e_aqc_opc_dcb_updated	= 0x0302,
+	i40e_aqc_opc_set_dcb_parameters = 0x0303,
+
+	/* TX scheduler */
+	i40e_aqc_opc_configure_vsi_bw_limit		= 0x0400,
+	i40e_aqc_opc_configure_vsi_ets_sla_bw_limit	= 0x0406,
+	i40e_aqc_opc_configure_vsi_tc_bw		= 0x0407,
+	i40e_aqc_opc_query_vsi_bw_config		= 0x0408,
+	i40e_aqc_opc_query_vsi_ets_sla_config		= 0x040A,
+	i40e_aqc_opc_configure_switching_comp_bw_limit	= 0x0410,
+
+	i40e_aqc_opc_enable_switching_comp_ets			= 0x0413,
+	i40e_aqc_opc_modify_switching_comp_ets			= 0x0414,
+	i40e_aqc_opc_disable_switching_comp_ets			= 0x0415,
+	i40e_aqc_opc_configure_switching_comp_ets_bw_limit	= 0x0416,
+	i40e_aqc_opc_configure_switching_comp_bw_config		= 0x0417,
+	i40e_aqc_opc_query_switching_comp_ets_config		= 0x0418,
+	i40e_aqc_opc_query_port_ets_config			= 0x0419,
+	i40e_aqc_opc_query_switching_comp_bw_config		= 0x041A,
+	i40e_aqc_opc_suspend_port_tx				= 0x041B,
+	i40e_aqc_opc_resume_port_tx				= 0x041C,
+	i40e_aqc_opc_configure_partition_bw			= 0x041D,
+	/* hmc */
+	i40e_aqc_opc_query_hmc_resource_profile	= 0x0500,
+	i40e_aqc_opc_set_hmc_resource_profile	= 0x0501,
+
+	/* phy commands*/
+	i40e_aqc_opc_get_phy_abilities		= 0x0600,
+	i40e_aqc_opc_set_phy_config		= 0x0601,
+	i40e_aqc_opc_set_mac_config		= 0x0603,
+	i40e_aqc_opc_set_link_restart_an	= 0x0605,
+	i40e_aqc_opc_get_link_status		= 0x0607,
+	i40e_aqc_opc_set_phy_int_mask		= 0x0613,
+	i40e_aqc_opc_get_local_advt_reg		= 0x0614,
+	i40e_aqc_opc_set_local_advt_reg		= 0x0615,
+	i40e_aqc_opc_get_partner_advt		= 0x0616,
+	i40e_aqc_opc_set_lb_modes		= 0x0618,
+	i40e_aqc_opc_get_phy_wol_caps		= 0x0621,
+	i40e_aqc_opc_set_phy_debug		= 0x0622,
+	i40e_aqc_opc_upload_ext_phy_fm		= 0x0625,
+	i40e_aqc_opc_run_phy_activity		= 0x0626,
+	i40e_aqc_opc_set_phy_register		= 0x0628,
+	i40e_aqc_opc_get_phy_register		= 0x0629,
+
+	/* NVM commands */
+	i40e_aqc_opc_nvm_read			= 0x0701,
+	i40e_aqc_opc_nvm_erase			= 0x0702,
+	i40e_aqc_opc_nvm_update			= 0x0703,
+	i40e_aqc_opc_nvm_config_read		= 0x0704,
+	i40e_aqc_opc_nvm_config_write		= 0x0705,
+	i40e_aqc_opc_oem_post_update		= 0x0720,
+	i40e_aqc_opc_thermal_sensor		= 0x0721,
+
+	/* virtualization commands */
+	i40e_aqc_opc_send_msg_to_pf		= 0x0801,
+	i40e_aqc_opc_send_msg_to_vf		= 0x0802,
+	i40e_aqc_opc_send_msg_to_peer		= 0x0803,
+
+	/* alternate structure */
+	i40e_aqc_opc_alternate_write		= 0x0900,
+	i40e_aqc_opc_alternate_write_indirect	= 0x0901,
+	i40e_aqc_opc_alternate_read		= 0x0902,
+	i40e_aqc_opc_alternate_read_indirect	= 0x0903,
+	i40e_aqc_opc_alternate_write_done	= 0x0904,
+	i40e_aqc_opc_alternate_set_mode		= 0x0905,
+	i40e_aqc_opc_alternate_clear_port	= 0x0906,
+
+	/* LLDP commands */
+	i40e_aqc_opc_lldp_get_mib	= 0x0A00,
+	i40e_aqc_opc_lldp_update_mib	= 0x0A01,
+	i40e_aqc_opc_lldp_add_tlv	= 0x0A02,
+	i40e_aqc_opc_lldp_update_tlv	= 0x0A03,
+	i40e_aqc_opc_lldp_delete_tlv	= 0x0A04,
+	i40e_aqc_opc_lldp_stop		= 0x0A05,
+	i40e_aqc_opc_lldp_start		= 0x0A06,
+	i40e_aqc_opc_get_cee_dcb_cfg	= 0x0A07,
+	i40e_aqc_opc_lldp_set_local_mib	= 0x0A08,
+	i40e_aqc_opc_lldp_stop_start_spec_agent	= 0x0A09,
+	i40e_aqc_opc_lldp_restore		= 0x0A0A,
+
+	/* Tunnel commands */
+	i40e_aqc_opc_add_udp_tunnel	= 0x0B00,
+	i40e_aqc_opc_del_udp_tunnel	= 0x0B01,
+	i40e_aqc_opc_set_rss_key	= 0x0B02,
+	i40e_aqc_opc_set_rss_lut	= 0x0B03,
+	i40e_aqc_opc_get_rss_key	= 0x0B04,
+	i40e_aqc_opc_get_rss_lut	= 0x0B05,
+
+	/* Async Events */
+	i40e_aqc_opc_event_lan_overflow		= 0x1001,
+
+	/* OEM commands */
+	i40e_aqc_opc_oem_parameter_change	= 0xFE00,
+	i40e_aqc_opc_oem_device_status_change	= 0xFE01,
+	i40e_aqc_opc_oem_ocsd_initialize	= 0xFE02,
+	i40e_aqc_opc_oem_ocbb_initialize	= 0xFE03,
+
+	/* debug commands */
+	i40e_aqc_opc_debug_read_reg		= 0xFF03,
+	i40e_aqc_opc_debug_write_reg		= 0xFF04,
+	i40e_aqc_opc_debug_modify_reg		= 0xFF07,
+	i40e_aqc_opc_debug_dump_internals	= 0xFF08,
+};
+
+/* command structures and indirect data structures */
+
+/* Structure naming conventions:
+ * - no suffix for direct command descriptor structures
+ * - _data for indirect sent data
+ * - _resp for indirect return data (data which is both will use _data)
+ * - _completion for direct return data
+ * - _element_ for repeated elements (may also be _data or _resp)
+ *
+ * Command structures are expected to overlay the params.raw member of the basic
+ * descriptor, and as such cannot exceed 16 bytes in length.
+ */
+
+/* This macro is used to generate a compilation error if a structure
+ * is not exactly the correct length. It gives a divide by zero error if the
+ * structure is not of the correct size, otherwise it creates an enum that is
+ * never used.
+ */
+#define I40E_CHECK_STRUCT_LEN(n, X) enum i40e_static_assert_enum_##X \
+	{ i40e_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+
+/* This macro is used extensively to ensure that command structures are 16
+ * bytes in length as they have to map to the raw array of that size.
+ */
+#define I40E_CHECK_CMD_LENGTH(X)	I40E_CHECK_STRUCT_LEN(16, X)
+
+/* internal (0x00XX) commands */
+
+/* Get version (direct 0x0001) */
+struct i40e_aqc_get_version {
+	__le32 rom_ver;
+	__le32 fw_build;
+	__le16 fw_major;
+	__le16 fw_minor;
+	__le16 api_major;
+	__le16 api_minor;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_version);
+
+/* Send driver version (indirect 0x0002) */
+struct i40e_aqc_driver_version {
+	u8	driver_major_ver;
+	u8	driver_minor_ver;
+	u8	driver_build_ver;
+	u8	driver_subbuild_ver;
+	u8	reserved[4];
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_driver_version);
+
+/* Queue Shutdown (direct 0x0003) */
+struct i40e_aqc_queue_shutdown {
+	__le32	driver_unloading;
+#define I40E_AQ_DRIVER_UNLOADING	0x1
+	u8	reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown);
+
+/* Set PF context (0x0004, direct) */
+struct i40e_aqc_set_pf_context {
+	u8	pf_id;
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_pf_context);
+
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+struct i40e_aqc_request_resource {
+	__le16	resource_id;
+	__le16	access_type;
+	__le32	timeout;
+	__le32	resource_number;
+	u8	reserved[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_request_resource);
+
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct i40e_aqc_list_capabilites {
+	u8 command_flags;
+	u8 pf_index;
+	u8 reserved[2];
+	__le32 count;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_list_capabilites);
+
+struct i40e_aqc_list_capabilities_element_resp {
+	__le16	id;
+	u8	major_rev;
+	u8	minor_rev;
+	__le32	number;
+	__le32	logical_id;
+	__le32	phys_id;
+	u8	reserved[16];
+};
+
+/* list of caps */
+
+#define I40E_AQ_CAP_ID_SWITCH_MODE	0x0001
+#define I40E_AQ_CAP_ID_MNG_MODE		0x0002
+#define I40E_AQ_CAP_ID_NPAR_ACTIVE	0x0003
+#define I40E_AQ_CAP_ID_OS2BMC_CAP	0x0004
+#define I40E_AQ_CAP_ID_FUNCTIONS_VALID	0x0005
+#define I40E_AQ_CAP_ID_SRIOV		0x0012
+#define I40E_AQ_CAP_ID_VF		0x0013
+#define I40E_AQ_CAP_ID_VMDQ		0x0014
+#define I40E_AQ_CAP_ID_8021QBG		0x0015
+#define I40E_AQ_CAP_ID_8021QBR		0x0016
+#define I40E_AQ_CAP_ID_VSI		0x0017
+#define I40E_AQ_CAP_ID_DCB		0x0018
+#define I40E_AQ_CAP_ID_FCOE		0x0021
+#define I40E_AQ_CAP_ID_ISCSI		0x0022
+#define I40E_AQ_CAP_ID_RSS		0x0040
+#define I40E_AQ_CAP_ID_RXQ		0x0041
+#define I40E_AQ_CAP_ID_TXQ		0x0042
+#define I40E_AQ_CAP_ID_MSIX		0x0043
+#define I40E_AQ_CAP_ID_VF_MSIX		0x0044
+#define I40E_AQ_CAP_ID_FLOW_DIRECTOR	0x0045
+#define I40E_AQ_CAP_ID_1588		0x0046
+#define I40E_AQ_CAP_ID_IWARP		0x0051
+#define I40E_AQ_CAP_ID_LED		0x0061
+#define I40E_AQ_CAP_ID_SDP		0x0062
+#define I40E_AQ_CAP_ID_MDIO		0x0063
+#define I40E_AQ_CAP_ID_WSR_PROT		0x0064
+#define I40E_AQ_CAP_ID_NVM_MGMT		0x0080
+#define I40E_AQ_CAP_ID_FLEX10		0x00F1
+#define I40E_AQ_CAP_ID_CEM		0x00F2
+
+/* Set CPPM Configuration (direct 0x0103) */
+struct i40e_aqc_cppm_configuration {
+	__le16	command_flags;
+	__le16	ttlx;
+	__le32	dmacr;
+	__le16	dmcth;
+	u8	hptc;
+	u8	reserved;
+	__le32	pfltrc;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration);
+
+/* Set ARP Proxy command / response (indirect 0x0104) */
+struct i40e_aqc_arp_proxy_data {
+	__le16	command_flags;
+	__le16	table_id;
+	__le32	enabled_offloads;
+	__le32	ip_addr;
+	u8	mac_addr[6];
+	u8	reserved[2];
+};
+
+I40E_CHECK_STRUCT_LEN(0x14, i40e_aqc_arp_proxy_data);
+
+/* Set NS Proxy Table Entry Command (indirect 0x0105) */
+struct i40e_aqc_ns_proxy_data {
+	__le16	table_idx_mac_addr_0;
+	__le16	table_idx_mac_addr_1;
+	__le16	table_idx_ipv6_0;
+	__le16	table_idx_ipv6_1;
+	__le16	control;
+	u8	mac_addr_0[6];
+	u8	mac_addr_1[6];
+	u8	local_mac_addr[6];
+	u8	ipv6_addr_0[16]; /* Warning! spec specifies BE byte order */
+	u8	ipv6_addr_1[16];
+};
+
+I40E_CHECK_STRUCT_LEN(0x3c, i40e_aqc_ns_proxy_data);
+
+/* Manage LAA Command (0x0106) - obsolete */
+struct i40e_aqc_mng_laa {
+	__le16	command_flags;
+	u8	reserved[2];
+	__le32	sal;
+	__le16	sah;
+	u8	reserved2[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mng_laa);
+
+/* Manage MAC Address Read Command (indirect 0x0107) */
+struct i40e_aqc_mac_address_read {
+	__le16	command_flags;
+#define I40E_AQC_LAN_ADDR_VALID		0x10
+#define I40E_AQC_PORT_ADDR_VALID	0x40
+	u8	reserved[6];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_read);
+
+struct i40e_aqc_mac_address_read_data {
+	u8 pf_lan_mac[6];
+	u8 pf_san_mac[6];
+	u8 port_mac[6];
+	u8 pf_wol_mac[6];
+};
+
+I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data);
+
+/* Manage MAC Address Write Command (0x0108) */
+struct i40e_aqc_mac_address_write {
+	__le16	command_flags;
+#define I40E_AQC_MC_MAG_EN		0x0100
+#define I40E_AQC_WOL_PRESERVE_ON_PFR	0x0200
+#define I40E_AQC_WRITE_TYPE_LAA_ONLY	0x0000
+#define I40E_AQC_WRITE_TYPE_LAA_WOL	0x4000
+#define I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG	0xC000
+
+	__le16	mac_sah;
+	__le32	mac_sal;
+	u8	reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_write);
+
+/* PXE commands (0x011x) */
+
+/* Clear PXE Command and response  (direct 0x0110) */
+struct i40e_aqc_clear_pxe {
+	u8	rx_cnt;
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe);
+
+/* Set WoL Filter (0x0120) */
+
+struct i40e_aqc_set_wol_filter {
+	__le16 filter_index;
+
+	__le16 cmd_flags;
+	__le16 valid_flags;
+	u8 reserved[2];
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_wol_filter);
+
+struct i40e_aqc_set_wol_filter_data {
+	u8 filter[128];
+	u8 mask[16];
+};
+
+I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data);
+
+/* Get Wake Reason (0x0121) */
+
+struct i40e_aqc_get_wake_reason_completion {
+	u8 reserved_1[2];
+	__le16 wake_reason;
+	u8 reserved_2[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_wake_reason_completion);
+
+/* Switch configuration commands (0x02xx) */
+
+/* Used by many indirect commands that only pass an seid and a buffer in the
+ * command
+ */
+struct i40e_aqc_switch_seid {
+	__le16	seid;
+	u8	reserved[6];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_switch_seid);
+
+/* Get Switch Configuration command (indirect 0x0200)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+struct i40e_aqc_get_switch_config_header_resp {
+	__le16	num_reported;
+	__le16	num_total;
+	u8	reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_config_header_resp);
+
+struct i40e_aqc_switch_config_element_resp {
+	u8	element_type;
+	u8	revision;
+	__le16	seid;
+	__le16	uplink_seid;
+	__le16	downlink_seid;
+	u8	reserved[3];
+	u8	connection_type;
+	__le16	scheduler_id;
+	__le16	element_info;
+};
+
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_config_element_resp);
+
+/* Get Switch Configuration (indirect 0x0200)
+ *    an array of elements are returned in the response buffer
+ *    the first in the array is the header, remainder are elements
+ */
+struct i40e_aqc_get_switch_config_resp {
+	struct i40e_aqc_get_switch_config_header_resp	header;
+	struct i40e_aqc_switch_config_element_resp	element[1];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_switch_config_resp);
+
+/* Add Statistics (direct 0x0201)
+ * Remove Statistics (direct 0x0202)
+ */
+struct i40e_aqc_add_remove_statistics {
+	__le16	seid;
+	__le16	vlan;
+	__le16	stat_index;
+	u8	reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_statistics);
+
+/* Set Port Parameters command (direct 0x0203) */
+struct i40e_aqc_set_port_parameters {
+	__le16	command_flags;
+	__le16	bad_frame_vsi;
+	__le16	default_seid;        /* reserved for command */
+	u8	reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_port_parameters);
+
+/* Get Switch Resource Allocation (indirect 0x0204) */
+struct i40e_aqc_get_switch_resource_alloc {
+	u8	num_entries;         /* reserved for command */
+	u8	reserved[7];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_resource_alloc);
+
+/* expect an array of these structs in the response buffer */
+struct i40e_aqc_switch_resource_alloc_element_resp {
+	u8	resource_type;
+	u8	reserved1;
+	__le16	guaranteed;
+	__le16	total;
+	__le16	used;
+	__le16	total_unalloced;
+	u8	reserved2[6];
+};
+
+I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp);
+
+/* Set Switch Configuration (direct 0x0205) */
+struct i40e_aqc_set_switch_config {
+	__le16	flags;
+/* flags used for both fields below */
+#define I40E_AQ_SET_SWITCH_CFG_PROMISC		0x0001
+	__le16	valid_flags;
+	/* The ethertype in switch_tag is dropped on ingress and used
+	 * internally by the switch. Set this to zero for the default
+	 * of 0x88a8 (802.1ad). Should be zero for firmware API
+	 * versions lower than 1.7.
+	 */
+	__le16	switch_tag;
+	/* The ethertypes in first_tag and second_tag are used to
+	 * match the outer and inner VLAN tags (respectively) when HW
+	 * double VLAN tagging is enabled via the set port parameters
+	 * AQ command. Otherwise these are both ignored. Set them to
+	 * zero for their defaults of 0x8100 (802.1Q). Should be zero
+	 * for firmware API versions lower than 1.7.
+	 */
+	__le16	first_tag;
+	__le16	second_tag;
+	/* Next byte is split into following:
+	 * Bit 7    : 0 : No action, 1: Switch to mode defined by bits 6:0
+	 * Bit 6    : 0 : Destination Port, 1: source port
+	 * Bit 5..4 : L4 type
+	 * 0: rsvd
+	 * 1: TCP
+	 * 2: UDP
+	 * 3: Both TCP and UDP
+	 * Bits 3:0 Mode
+	 * 0: default mode
+	 * 1: L4 port only mode
+	 * 2: non-tunneled mode
+	 * 3: tunneled mode
+	 */
+#define I40E_AQ_SET_SWITCH_BIT7_VALID		0x80
+
+
+#define I40E_AQ_SET_SWITCH_L4_TYPE_TCP		0x10
+
+#define I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL	0x02
+	u8	mode;
+	u8	rsvd5[5];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config);
+
+/* Read Receive control registers  (direct 0x0206)
+ * Write Receive control registers (direct 0x0207)
+ *     used for accessing Rx control registers that can be
+ *     slow and need special handling when under high Rx load
+ */
+struct i40e_aqc_rx_ctl_reg_read_write {
+	__le32 reserved1;
+	__le32 address;
+	__le32 reserved2;
+	__le32 value;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_rx_ctl_reg_read_write);
+
+/* Add VSI (indirect 0x0210)
+ *    this indirect command uses struct i40e_aqc_vsi_properties_data
+ *    as the indirect buffer (128 bytes)
+ *
+ * Update VSI (indirect 0x211)
+ *     uses the same data structure as Add VSI
+ *
+ * Get VSI (indirect 0x0212)
+ *     uses the same completion and data structure as Add VSI
+ */
+struct i40e_aqc_add_get_update_vsi {
+	__le16	uplink_seid;
+	u8	connection_type;
+#define I40E_AQ_VSI_CONN_TYPE_NORMAL	0x1
+	u8	reserved1;
+	u8	vf_id;
+	u8	reserved2;
+	__le16	vsi_flags;
+#define I40E_AQ_VSI_TYPE_VF		0x0
+#define I40E_AQ_VSI_TYPE_VMDQ2		0x1
+#define I40E_AQ_VSI_TYPE_PF		0x2
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi);
+
+struct i40e_aqc_add_get_update_vsi_completion {
+	__le16 seid;
+	__le16 vsi_number;
+	__le16 vsi_used;
+	__le16 vsi_free;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi_completion);
+
+struct i40e_aqc_vsi_properties_data {
+	/* first 96 byte are written by SW */
+	__le16	valid_sections;
+#define I40E_AQ_VSI_PROP_SWITCH_VALID		0x0001
+#define I40E_AQ_VSI_PROP_SECURITY_VALID		0x0002
+#define I40E_AQ_VSI_PROP_VLAN_VALID		0x0004
+#define I40E_AQ_VSI_PROP_QUEUE_MAP_VALID	0x0040
+#define I40E_AQ_VSI_PROP_QUEUE_OPT_VALID	0x0080
+#define I40E_AQ_VSI_PROP_SCHED_VALID		0x0200
+	/* switch section */
+	__le16	switch_id; /* 12bit id combined with flags below */
+#define I40E_AQ_VSI_SW_ID_SHIFT		0x0000
+#define I40E_AQ_VSI_SW_ID_MASK		(0xFFF << I40E_AQ_VSI_SW_ID_SHIFT)
+#define I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB	0x2000
+#define I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB	0x4000
+	u8	sw_reserved[2];
+	/* security section */
+	u8	sec_flags;
+#define I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK	0x02
+#define I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK	0x04
+	u8	sec_reserved;
+	/* VLAN section */
+	__le16	pvid; /* VLANS include priority bits */
+	__le16	fcoe_pvid;
+	u8	port_vlan_flags;
+#define I40E_AQ_VSI_PVLAN_MODE_SHIFT	0x00
+#define I40E_AQ_VSI_PVLAN_MODE_MASK	(0x03 << \
+					 I40E_AQ_VSI_PVLAN_MODE_SHIFT)
+#define I40E_AQ_VSI_PVLAN_MODE_TAGGED	0x01
+#define I40E_AQ_VSI_PVLAN_MODE_ALL	0x03
+#define I40E_AQ_VSI_PVLAN_INSERT_PVID	0x04
+#define I40E_AQ_VSI_PVLAN_EMOD_SHIFT	0x03
+#define I40E_AQ_VSI_PVLAN_EMOD_MASK	(0x3 << \
+					 I40E_AQ_VSI_PVLAN_EMOD_SHIFT)
+#define I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH	0x0
+#define I40E_AQ_VSI_PVLAN_EMOD_STR	0x10
+#define I40E_AQ_VSI_PVLAN_EMOD_NOTHING	0x18
+	u8	pvlan_reserved[3];
+	/* ingress egress up sections */
+	__le32	ingress_table; /* bitmap, 3 bits per up */
+	__le32	egress_table;   /* same defines as for ingress table */
+	/* cascaded PV section */
+	__le16	cas_pv_tag;
+	u8	cas_pv_flags;
+	u8	cas_pv_reserved;
+	/* queue mapping section */
+	__le16	mapping_flags;
+#define I40E_AQ_VSI_QUE_MAP_CONTIG	0x0
+#define I40E_AQ_VSI_QUE_MAP_NONCONTIG	0x1
+	__le16	queue_mapping[16];
+	__le16	tc_mapping[8];
+#define I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT	0
+#define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT	9
+	/* queueing option section */
+	u8	queueing_opt_flags;
+#define I40E_AQ_VSI_QUE_OPT_TCP_ENA	0x10
+#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI	0x40
+	u8	queueing_opt_reserved[3];
+	/* scheduler section */
+	u8	up_enable_bits;
+	u8	sched_reserved;
+	/* outer up section */
+	__le32	outer_up_table; /* same structure and defines as ingress tbl */
+	u8	cmd_reserved[8];
+	/* last 32 bytes are written by FW */
+	__le16	qs_handle[8];
+#define I40E_AQ_VSI_QS_HANDLE_INVALID	0xFFFF
+	__le16	stat_counter_idx;
+	__le16	sched_id;
+	u8	resp_reserved[12];
+};
+
+I40E_CHECK_STRUCT_LEN(128, i40e_aqc_vsi_properties_data);
+
+/* Add Port Virtualizer (direct 0x0220)
+ * also used for update PV (direct 0x0221) but only flags are used
+ * (IS_CTRL_PORT only works on add PV)
+ */
+struct i40e_aqc_add_update_pv {
+	__le16	command_flags;
+	__le16	uplink_seid;
+	__le16	connected_seid;
+	u8	reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv);
+
+struct i40e_aqc_add_update_pv_completion {
+	/* reserved for update; for add also encodes error if rc == ENOSPC */
+	__le16	pv_seid;
+	u8	reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv_completion);
+
+/* Get PV Params (direct 0x0222)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+
+struct i40e_aqc_get_pv_params_completion {
+	__le16	seid;
+	__le16	default_stag;
+	__le16	pv_flags; /* same flags as add_pv */
+	u8	reserved[8];
+	__le16	default_port_seid;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_pv_params_completion);
+
+/* Add VEB (direct 0x0230) */
+struct i40e_aqc_add_veb {
+	__le16	uplink_seid;
+	__le16	downlink_seid;
+	__le16	veb_flags;
+#define I40E_AQC_ADD_VEB_FLOATING		0x1
+#define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT	0x2
+#define I40E_AQC_ADD_VEB_PORT_TYPE_DATA		0x4
+#define I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS	0x10
+	u8	enable_tcs;
+	u8	reserved[9];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb);
+
+struct i40e_aqc_add_veb_completion {
+	u8	reserved[6];
+	__le16	switch_seid;
+	/* also encodes error if rc == ENOSPC; codes are the same as add_pv */
+	__le16	veb_seid;
+	__le16	statistic_index;
+	__le16	vebs_used;
+	__le16	vebs_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb_completion);
+
+/* Get VEB Parameters (direct 0x0232)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+struct i40e_aqc_get_veb_parameters_completion {
+	__le16	seid;
+	__le16	switch_id;
+	__le16	veb_flags; /* only the first/last flags from 0x0230 is valid */
+	__le16	statistic_index;
+	__le16	vebs_used;
+	__le16	vebs_free;
+	u8	reserved[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_veb_parameters_completion);
+
+/* Delete Element (direct 0x0243)
+ * uses the generic i40e_aqc_switch_seid
+ */
+
+/* Add MAC-VLAN (indirect 0x0250) */
+
+/* used for the command for most vlan commands */
+struct i40e_aqc_macvlan {
+	__le16	num_addresses;
+	__le16	seid[3];
+#define I40E_AQC_MACVLAN_CMD_SEID_VALID		0x8000
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_macvlan);
+
+/* indirect data for command and response */
+struct i40e_aqc_add_macvlan_element_data {
+	u8	mac_addr[6];
+	__le16	vlan_tag;
+	__le16	flags;
+#define I40E_AQC_MACVLAN_ADD_PERFECT_MATCH	0x0001
+#define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN	0x0004
+#define I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC	0x0010
+	__le16	queue_number;
+	/* response section */
+	u8	match_method;
+#define I40E_AQC_MM_ERR_NO_RES		0xFF
+	u8	reserved1[3];
+};
+
+struct i40e_aqc_add_remove_macvlan_completion {
+	__le16 perfect_mac_used;
+	__le16 perfect_mac_free;
+	__le16 unicast_hash_free;
+	__le16 multicast_hash_free;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_macvlan_completion);
+
+/* Remove MAC-VLAN (indirect 0x0251)
+ * uses i40e_aqc_macvlan for the descriptor
+ * data points to an array of num_addresses of elements
+ */
+
+struct i40e_aqc_remove_macvlan_element_data {
+	u8	mac_addr[6];
+	__le16	vlan_tag;
+	u8	flags;
+#define I40E_AQC_MACVLAN_DEL_PERFECT_MATCH	0x01
+#define I40E_AQC_MACVLAN_DEL_IGNORE_VLAN	0x08
+	u8	reserved[3];
+	/* reply section */
+	u8	error_code;
+	u8	reply_reserved[3];
+};
+
+/* Add VLAN (indirect 0x0252)
+ * Remove VLAN (indirect 0x0253)
+ * use the generic i40e_aqc_macvlan for the command
+ */
+struct i40e_aqc_add_remove_vlan_element_data {
+	__le16	vlan_tag;
+	u8	vlan_flags;
+	u8	reserved;
+	u8	result;
+	u8	reserved1[3];
+};
+
+struct i40e_aqc_add_remove_vlan_completion {
+	u8	reserved[4];
+	__le16	vlans_used;
+	__le16	vlans_free;
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+/* Set VSI Promiscuous Modes (direct 0x0254) */
+struct i40e_aqc_set_vsi_promiscuous_modes {
+	__le16	promiscuous_flags;
+	__le16	valid_flags;
+/* flags used for both fields above */
+#define I40E_AQC_SET_VSI_PROMISC_UNICAST	0x01
+#define I40E_AQC_SET_VSI_PROMISC_MULTICAST	0x02
+#define I40E_AQC_SET_VSI_PROMISC_BROADCAST	0x04
+#define I40E_AQC_SET_VSI_DEFAULT		0x08
+#define I40E_AQC_SET_VSI_PROMISC_VLAN		0x10
+#define I40E_AQC_SET_VSI_PROMISC_RX_ONLY	0x8000
+	__le16	seid;
+	__le16	vlan_tag;
+#define I40E_AQC_SET_VSI_VLAN_VALID		0x8000
+	u8	reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_vsi_promiscuous_modes);
+
+/* Add S/E-tag command (direct 0x0255)
+ * Uses generic i40e_aqc_add_remove_tag_completion for completion
+ */
+struct i40e_aqc_add_tag {
+	__le16	flags;
+	__le16	seid;
+	__le16	tag;
+	__le16	queue_number;
+	u8	reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_tag);
+
+struct i40e_aqc_add_remove_tag_completion {
+	u8	reserved[12];
+	__le16	tags_used;
+	__le16	tags_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_tag_completion);
+
+/* Remove S/E-tag command (direct 0x0256)
+ * Uses generic i40e_aqc_add_remove_tag_completion for completion
+ */
+struct i40e_aqc_remove_tag {
+	__le16	seid;
+	__le16	tag;
+	u8	reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_tag);
+
+/* Add multicast E-Tag (direct 0x0257)
+ * del multicast E-Tag (direct 0x0258) only uses pv_seid and etag fields
+ * and no external data
+ */
+struct i40e_aqc_add_remove_mcast_etag {
+	__le16	pv_seid;
+	__le16	etag;
+	u8	num_unicast_etags;
+	u8	reserved[3];
+	__le32	addr_high;          /* address of array of 2-byte s-tags */
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag);
+
+struct i40e_aqc_add_remove_mcast_etag_completion {
+	u8	reserved[4];
+	__le16	mcast_etags_used;
+	__le16	mcast_etags_free;
+	__le32	addr_high;
+	__le32	addr_low;
+
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag_completion);
+
+/* Update S/E-Tag (direct 0x0259) */
+struct i40e_aqc_update_tag {
+	__le16	seid;
+	__le16	old_tag;
+	__le16	new_tag;
+	u8	reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag);
+
+struct i40e_aqc_update_tag_completion {
+	u8	reserved[12];
+	__le16	tags_used;
+	__le16	tags_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag_completion);
+
+/* Add Control Packet filter (direct 0x025A)
+ * Remove Control Packet filter (direct 0x025B)
+ * uses the i40e_aqc_add_oveb_cloud,
+ * and the generic direct completion structure
+ */
+struct i40e_aqc_add_remove_control_packet_filter {
+	u8	mac[6];
+	__le16	etype;
+	__le16	flags;
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC	0x0001
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP		0x0002
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX		0x0008
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX		0x0000
+	__le16	seid;
+	__le16	queue;
+	u8	reserved[2];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter);
+
+struct i40e_aqc_add_remove_control_packet_filter_completion {
+	__le16	mac_etype_used;
+	__le16	etype_used;
+	__le16	mac_etype_free;
+	__le16	etype_free;
+	u8	reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter_completion);
+
+/* Add Cloud filters (indirect 0x025C)
+ * Remove Cloud filters (indirect 0x025D)
+ * uses the i40e_aqc_add_remove_cloud_filters,
+ * and the generic indirect completion structure
+ */
+struct i40e_aqc_add_remove_cloud_filters {
+	u8	num_filters;
+	u8	reserved;
+	__le16	seid;
+	u8	big_buffer_flag;
+#define I40E_AQC_ADD_CLOUD_CMD_BB	1
+	u8	reserved2[3];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters);
+
+struct i40e_aqc_cloud_filters_element_data {
+	u8	outer_mac[6];
+	u8	inner_mac[6];
+	__le16	inner_vlan;
+	union {
+		struct {
+			u8 reserved[12];
+			u8 data[4];
+		} v4;
+		struct {
+			u8 data[16];
+		} v6;
+		struct {
+			__le16 data[8];
+		} raw_v6;
+	} ipaddr;
+	__le16	flags;
+/* 0x0000 reserved */
+/* 0x0001 reserved */
+/* 0x0002 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN		0x0003
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID	0x0004
+/* 0x0005 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID		0x0006
+/* 0x0007 reserved */
+/* 0x0008 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_OMAC			0x0009
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC			0x000A
+#define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC	0x000B
+#define I40E_AQC_ADD_CLOUD_FILTER_IIP			0x000C
+/* 0x000D reserved */
+/* 0x000E reserved */
+/* 0x000F reserved */
+/* 0x0010 to 0x0017 is for custom filters */
+#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT		0x0010 /* Dest IP + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT		0x0011 /* Dest MAC + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT		0x0012 /* Dest MAC + VLAN + L4 Port */
+
+#define I40E_AQC_ADD_CLOUD_FLAGS_IPV4			0
+#define I40E_AQC_ADD_CLOUD_FLAGS_IPV6			0x0100
+
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT		9
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK		0x1E00
+#define I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE		2
+
+
+	__le32	tenant_id;
+	u8	reserved[4];
+	__le16	queue_number;
+	u8	reserved2[14];
+	/* response section */
+	u8	allocation_result;
+	u8	response_reserved[7];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data);
+
+/* i40e_aqc_cloud_filters_element_bb is used when
+ * I40E_AQC_CLOUD_CMD_BB flag is set.
+ */
+struct i40e_aqc_cloud_filters_element_bb {
+	struct i40e_aqc_cloud_filters_element_data element;
+	u16     general_fields[32];
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0	15
+};
+
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb);
+
+struct i40e_aqc_remove_cloud_filters_completion {
+	__le16 perfect_ovlan_used;
+	__le16 perfect_ovlan_free;
+	__le16 vlan_used;
+	__le16 vlan_free;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion);
+
+/* Replace filter Command 0x025F
+ * uses the i40e_aqc_replace_cloud_filters,
+ * and the generic indirect completion structure
+ */
+struct i40e_filter_data {
+	u8 filter_type;
+	u8 input[3];
+};
+
+I40E_CHECK_STRUCT_LEN(4, i40e_filter_data);
+
+struct i40e_aqc_replace_cloud_filters_cmd {
+	u8      valid_flags;
+	u8      old_filter_type;
+	u8      new_filter_type;
+	u8      tr_bit;
+	u8      reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd);
+
+struct i40e_aqc_replace_cloud_filters_cmd_buf {
+	u8      data[32];
+	struct i40e_filter_data filters[8];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf);
+
+/* Add Mirror Rule (indirect or direct 0x0260)
+ * Delete Mirror Rule (indirect or direct 0x0261)
+ * note: some rule types (4,5) do not use an external buffer.
+ *       take care to set the flags correctly.
+ */
+struct i40e_aqc_add_delete_mirror_rule {
+	__le16 seid;
+	__le16 rule_type;
+#define I40E_AQC_MIRROR_RULE_TYPE_SHIFT		0
+#define I40E_AQC_MIRROR_RULE_TYPE_MASK		(0x7 << \
+						I40E_AQC_MIRROR_RULE_TYPE_SHIFT)
+#define I40E_AQC_MIRROR_RULE_TYPE_VLAN		3
+#define I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS	4
+#define I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS	5
+	__le16 num_entries;
+	__le16 destination;  /* VSI for add, rule id for delete */
+	__le32 addr_high;    /* address of array of 2-byte VSI or VLAN ids */
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule);
+
+struct i40e_aqc_add_delete_mirror_rule_completion {
+	u8	reserved[2];
+	__le16	rule_id;  /* only used on add */
+	__le16	mirror_rules_used;
+	__le16	mirror_rules_free;
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion);
+
+/* Dynamic Device Personalization */
+struct i40e_aqc_write_personalization_profile {
+	u8      flags;
+	u8      reserved[3];
+	__le32  profile_track_id;
+	__le32  addr_high;
+	__le32  addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_write_personalization_profile);
+
+struct i40e_aqc_write_ddp_resp {
+	__le32 error_offset;
+	__le32 error_info;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct i40e_aqc_get_applied_profiles {
+	u8      flags;
+	u8      rsv[3];
+	__le32  reserved;
+	__le32  addr_high;
+	__le32  addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_applied_profiles);
+
+/* DCB 0x03xx*/
+
+/* PFC Ignore (direct 0x0301)
+ *    the command and response use the same descriptor structure
+ */
+struct i40e_aqc_pfc_ignore {
+	u8	tc_bitmap;
+	u8	command_flags; /* unused on response */
+	u8	reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_pfc_ignore);
+
+/* DCB Update (direct 0x0302) uses the i40e_aq_desc structure
+ * with no parameters
+ */
+
+/* TX scheduler 0x04xx */
+
+/* Almost all the indirect commands use
+ * this generic struct to pass the SEID in param0
+ */
+struct i40e_aqc_tx_sched_ind {
+	__le16	vsi_seid;
+	u8	reserved[6];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_tx_sched_ind);
+
+/* Several commands respond with a set of queue set handles */
+struct i40e_aqc_qs_handles_resp {
+	__le16 qs_handles[8];
+};
+
+/* Configure VSI BW limits (direct 0x0400) */
+struct i40e_aqc_configure_vsi_bw_limit {
+	__le16	vsi_seid;
+	u8	reserved[2];
+	__le16	credit;
+	u8	reserved1[2];
+	u8	max_credit; /* 0-3, limit = 2^max */
+	u8	reserved2[7];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_vsi_bw_limit);
+
+/* Configure VSI Bandwidth Limit per Traffic Type (indirect 0x0406)
+ *    responds with i40e_aqc_qs_handles_resp
+ */
+struct i40e_aqc_configure_vsi_ets_sla_bw_data {
+	u8	tc_valid_bits;
+	u8	reserved[15];
+	__le16	tc_bw_credits[8]; /* FW writesback QS handles here */
+
+	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+	__le16	tc_bw_max[2];
+	u8	reserved1[28];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_configure_vsi_ets_sla_bw_data);
+
+/* Configure VSI Bandwidth Allocation per Traffic Type (indirect 0x0407)
+ *    responds with i40e_aqc_qs_handles_resp
+ */
+struct i40e_aqc_configure_vsi_tc_bw_data {
+	u8	tc_valid_bits;
+	u8	reserved[3];
+	u8	tc_bw_credits[8];
+	u8	reserved1[4];
+	__le16	qs_handles[8];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_vsi_tc_bw_data);
+
+/* Query vsi bw configuration (indirect 0x0408) */
+struct i40e_aqc_query_vsi_bw_config_resp {
+	u8	tc_valid_bits;
+	u8	tc_suspended_bits;
+	u8	reserved[14];
+	__le16	qs_handles[8];
+	u8	reserved1[4];
+	__le16	port_bw_limit;
+	u8	reserved2[2];
+	u8	max_bw; /* 0-3, limit = 2^max */
+	u8	reserved3[23];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_vsi_bw_config_resp);
+
+/* Query VSI Bandwidth Allocation per Traffic Type (indirect 0x040A) */
+struct i40e_aqc_query_vsi_ets_sla_config_resp {
+	u8	tc_valid_bits;
+	u8	reserved[3];
+	u8	share_credits[8];
+	__le16	credits[8];
+
+	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+	__le16	tc_bw_max[2];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_vsi_ets_sla_config_resp);
+
+/* Configure Switching Component Bandwidth Limit (direct 0x0410) */
+struct i40e_aqc_configure_switching_comp_bw_limit {
+	__le16	seid;
+	u8	reserved[2];
+	__le16	credit;
+	u8	reserved1[2];
+	u8	max_bw; /* 0-3, limit = 2^max */
+	u8	reserved2[7];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_switching_comp_bw_limit);
+
+/* Enable  Physical Port ETS (indirect 0x0413)
+ * Modify  Physical Port ETS (indirect 0x0414)
+ * Disable Physical Port ETS (indirect 0x0415)
+ */
+struct i40e_aqc_configure_switching_comp_ets_data {
+	u8	reserved[4];
+	u8	tc_valid_bits;
+	u8	seepage;
+	u8	tc_strict_priority_flags;
+	u8	reserved1[17];
+	u8	tc_bw_share_credits[8];
+	u8	reserved2[96];
+};
+
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_configure_switching_comp_ets_data);
+
+/* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
+struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
+	u8	tc_valid_bits;
+	u8	reserved[15];
+	__le16	tc_bw_credit[8];
+
+	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+	__le16	tc_bw_max[2];
+	u8	reserved1[28];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40,
+		      i40e_aqc_configure_switching_comp_ets_bw_limit_data);
+
+/* Configure Switching Component Bandwidth Allocation per Tc
+ * (indirect 0x0417)
+ */
+struct i40e_aqc_configure_switching_comp_bw_config_data {
+	u8	tc_valid_bits;
+	u8	reserved[2];
+	u8	absolute_credits; /* bool */
+	u8	tc_bw_share_credits[8];
+	u8	reserved1[20];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_configure_switching_comp_bw_config_data);
+
+/* Query Switching Component Configuration (indirect 0x0418) */
+struct i40e_aqc_query_switching_comp_ets_config_resp {
+	u8	tc_valid_bits;
+	u8	reserved[35];
+	__le16	port_bw_limit;
+	u8	reserved1[2];
+	u8	tc_bw_max; /* 0-3, limit = 2^max */
+	u8	reserved2[23];
+};
+
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_query_switching_comp_ets_config_resp);
+
+/* Query PhysicalPort ETS Configuration (indirect 0x0419) */
+struct i40e_aqc_query_port_ets_config_resp {
+	u8	reserved[4];
+	u8	tc_valid_bits;
+	u8	reserved1;
+	u8	tc_strict_priority_bits;
+	u8	reserved2;
+	u8	tc_bw_share_credits[8];
+	__le16	tc_bw_limits[8];
+
+	/* 4 bits per tc 0-7, 4th bit reserved, limit = 2^max */
+	__le16	tc_bw_max[2];
+	u8	reserved3[32];
+};
+
+I40E_CHECK_STRUCT_LEN(0x44, i40e_aqc_query_port_ets_config_resp);
+
+/* Query Switching Component Bandwidth Allocation per Traffic Type
+ * (indirect 0x041A)
+ */
+struct i40e_aqc_query_switching_comp_bw_config_resp {
+	u8	tc_valid_bits;
+	u8	reserved[2];
+	u8	absolute_credits_enable; /* bool */
+	u8	tc_bw_share_credits[8];
+	__le16	tc_bw_limits[8];
+
+	/* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+	__le16	tc_bw_max[2];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_query_switching_comp_bw_config_resp);
+
+/* Suspend/resume port TX traffic
+ * (direct 0x041B and 0x041C) uses the generic SEID struct
+ */
+
+/* Configure partition BW
+ * (indirect 0x041D)
+ */
+struct i40e_aqc_configure_partition_bw_data {
+	__le16	pf_valid_bits;
+	u8	min_bw[16];      /* guaranteed bandwidth */
+	u8	max_bw[16];      /* bandwidth limit */
+};
+
+I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
+
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+	u8	pm_profile;
+	u8	pe_vf_enabled;
+	u8	reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+	/* I40E_HMC_PROFILE_NO_CHANGE	= 0, reserved */
+	I40E_HMC_PROFILE_DEFAULT	= 1,
+	I40E_HMC_PROFILE_FAVOR_VF	= 2,
+	I40E_HMC_PROFILE_EQUAL		= 3,
+};
+
+/* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
+
+/* set in param0 for get phy abilities to report qualified modules */
+#define I40E_AQ_PHY_REPORT_QUALIFIED_MODULES	0x0001
+#define I40E_AQ_PHY_REPORT_INITIAL_VALUES	0x0002
+
+enum i40e_aq_phy_type {
+	I40E_PHY_TYPE_SGMII			= 0x0,
+	I40E_PHY_TYPE_1000BASE_KX		= 0x1,
+	I40E_PHY_TYPE_10GBASE_KX4		= 0x2,
+	I40E_PHY_TYPE_10GBASE_KR		= 0x3,
+	I40E_PHY_TYPE_40GBASE_KR4		= 0x4,
+	I40E_PHY_TYPE_XAUI			= 0x5,
+	I40E_PHY_TYPE_XFI			= 0x6,
+	I40E_PHY_TYPE_SFI			= 0x7,
+	I40E_PHY_TYPE_XLAUI			= 0x8,
+	I40E_PHY_TYPE_XLPPI			= 0x9,
+	I40E_PHY_TYPE_40GBASE_CR4_CU		= 0xA,
+	I40E_PHY_TYPE_10GBASE_CR1_CU		= 0xB,
+	I40E_PHY_TYPE_10GBASE_AOC		= 0xC,
+	I40E_PHY_TYPE_40GBASE_AOC		= 0xD,
+	I40E_PHY_TYPE_UNRECOGNIZED		= 0xE,
+	I40E_PHY_TYPE_UNSUPPORTED		= 0xF,
+	I40E_PHY_TYPE_100BASE_TX		= 0x11,
+	I40E_PHY_TYPE_1000BASE_T		= 0x12,
+	I40E_PHY_TYPE_10GBASE_T			= 0x13,
+	I40E_PHY_TYPE_10GBASE_SR		= 0x14,
+	I40E_PHY_TYPE_10GBASE_LR		= 0x15,
+	I40E_PHY_TYPE_10GBASE_SFPP_CU		= 0x16,
+	I40E_PHY_TYPE_10GBASE_CR1		= 0x17,
+	I40E_PHY_TYPE_40GBASE_CR4		= 0x18,
+	I40E_PHY_TYPE_40GBASE_SR4		= 0x19,
+	I40E_PHY_TYPE_40GBASE_LR4		= 0x1A,
+	I40E_PHY_TYPE_1000BASE_SX		= 0x1B,
+	I40E_PHY_TYPE_1000BASE_LX		= 0x1C,
+	I40E_PHY_TYPE_1000BASE_T_OPTICAL	= 0x1D,
+	I40E_PHY_TYPE_20GBASE_KR2		= 0x1E,
+	I40E_PHY_TYPE_25GBASE_KR		= 0x1F,
+	I40E_PHY_TYPE_25GBASE_CR		= 0x20,
+	I40E_PHY_TYPE_25GBASE_SR		= 0x21,
+	I40E_PHY_TYPE_25GBASE_LR		= 0x22,
+	I40E_PHY_TYPE_25GBASE_AOC		= 0x23,
+	I40E_PHY_TYPE_25GBASE_ACC		= 0x24,
+	I40E_PHY_TYPE_2_5GBASE_T		= 0x26,
+	I40E_PHY_TYPE_5GBASE_T			= 0x27,
+	I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS	= 0x30,
+	I40E_PHY_TYPE_5GBASE_T_LINK_STATUS	= 0x31,
+	I40E_PHY_TYPE_MAX,
+	I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP	= 0xFD,
+	I40E_PHY_TYPE_EMPTY			= 0xFE,
+	I40E_PHY_TYPE_DEFAULT			= 0xFF,
+};
+
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+				BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XFI) | \
+				BIT_ULL(I40E_PHY_TYPE_SFI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+				BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+				BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+				BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+				BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+				BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+				BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+				BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC) | \
+				BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T) | \
+				BIT_ULL(I40E_PHY_TYPE_5GBASE_T))
+
+#define I40E_LINK_SPEED_2_5GB_SHIFT	0x0
+#define I40E_LINK_SPEED_100MB_SHIFT	0x1
+#define I40E_LINK_SPEED_1000MB_SHIFT	0x2
+#define I40E_LINK_SPEED_10GB_SHIFT	0x3
+#define I40E_LINK_SPEED_40GB_SHIFT	0x4
+#define I40E_LINK_SPEED_20GB_SHIFT	0x5
+#define I40E_LINK_SPEED_25GB_SHIFT	0x6
+#define I40E_LINK_SPEED_5GB_SHIFT	0x7
+
+enum i40e_aq_link_speed {
+	I40E_LINK_SPEED_UNKNOWN	= 0,
+	I40E_LINK_SPEED_100MB	= BIT(I40E_LINK_SPEED_100MB_SHIFT),
+	I40E_LINK_SPEED_1GB	= BIT(I40E_LINK_SPEED_1000MB_SHIFT),
+	I40E_LINK_SPEED_2_5GB	= (1 << I40E_LINK_SPEED_2_5GB_SHIFT),
+	I40E_LINK_SPEED_5GB	= (1 << I40E_LINK_SPEED_5GB_SHIFT),
+	I40E_LINK_SPEED_10GB	= BIT(I40E_LINK_SPEED_10GB_SHIFT),
+	I40E_LINK_SPEED_40GB	= BIT(I40E_LINK_SPEED_40GB_SHIFT),
+	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT),
+	I40E_LINK_SPEED_25GB	= BIT(I40E_LINK_SPEED_25GB_SHIFT),
+};
+
+struct i40e_aqc_module_desc {
+	u8 oui[3];
+	u8 reserved1;
+	u8 part_number[16];
+	u8 revision[4];
+	u8 reserved2[8];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_module_desc);
+
+struct i40e_aq_get_phy_abilities_resp {
+	__le32	phy_type;       /* bitmap using the above enum for offsets */
+	u8	link_speed;     /* bitmap using the above enum bit patterns */
+	u8	abilities;
+#define I40E_AQ_PHY_FLAG_PAUSE_TX	0x01
+#define I40E_AQ_PHY_FLAG_PAUSE_RX	0x02
+	__le16	eee_capability;
+	__le32	eeer_val;
+	u8	d3_lpan;
+	u8	phy_type_ext;
+#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
+#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
+#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
+#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+	u8	fec_cfg_curr_mod_ext_info;
+#define I40E_AQ_REQUEST_FEC_KR		0x04
+#define I40E_AQ_REQUEST_FEC_RS		0x08
+#define I40E_AQ_ENABLE_FEC_AUTO		0x10
+
+	u8	ext_comp_code;
+	u8	phy_id[4];
+	u8	module_type[3];
+	u8	qualified_module_count;
+#define I40E_AQ_PHY_MAX_QMS		16
+	struct i40e_aqc_module_desc	qualified_module[I40E_AQ_PHY_MAX_QMS];
+};
+
+I40E_CHECK_STRUCT_LEN(0x218, i40e_aq_get_phy_abilities_resp);
+
+/* Set PHY Config (direct 0x0601) */
+struct i40e_aq_set_phy_config { /* same bits as above in all */
+	__le32	phy_type;
+	u8	link_speed;
+	u8	abilities;
+/* bits 0-2 use the values from get_phy_abilities_resp */
+#define I40E_AQ_PHY_ENABLE_LINK		0x08
+#define I40E_AQ_PHY_ENABLE_AN		0x10
+#define I40E_AQ_PHY_ENABLE_ATOMIC_LINK	0x20
+	__le16	eee_capability;
+	__le32	eeer;
+	u8	low_power_ctrl;
+	u8	phy_type_ext;
+#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
+#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
+#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
+#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+	u8	fec_config;
+#define I40E_AQ_SET_FEC_ABILITY_KR	BIT(0)
+#define I40E_AQ_SET_FEC_ABILITY_RS	BIT(1)
+#define I40E_AQ_SET_FEC_REQUEST_KR	BIT(2)
+#define I40E_AQ_SET_FEC_REQUEST_RS	BIT(3)
+#define I40E_AQ_SET_FEC_AUTO		BIT(4)
+#define I40E_AQ_PHY_FEC_CONFIG_SHIFT	0x0
+#define I40E_AQ_PHY_FEC_CONFIG_MASK	(0x1F << I40E_AQ_PHY_FEC_CONFIG_SHIFT)
+	u8	reserved;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
+
+/* Set MAC Config command data structure (direct 0x0603) */
+struct i40e_aq_set_mac_config {
+	__le16	max_frame_size;
+	u8	params;
+	u8	tx_timer_priority; /* bitmap */
+	__le16	tx_timer_value;
+	__le16	fc_refresh_threshold;
+	u8	reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_set_mac_config);
+
+/* Restart Auto-Negotiation (direct 0x605) */
+struct i40e_aqc_set_link_restart_an {
+	u8	command;
+#define I40E_AQ_PHY_RESTART_AN	0x02
+#define I40E_AQ_PHY_LINK_ENABLE	0x04
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_link_restart_an);
+
+/* Get Link Status cmd & response data structure (direct 0x0607) */
+struct i40e_aqc_get_link_status {
+	__le16	command_flags; /* only field set on command */
+#define I40E_AQ_LSE_DISABLE		0x2
+#define I40E_AQ_LSE_ENABLE		0x3
+/* only response uses this flag */
+#define I40E_AQ_LSE_IS_ENABLED		0x1
+	u8	phy_type;    /* i40e_aq_phy_type   */
+	u8	link_speed;  /* i40e_aq_link_speed */
+	u8	link_info;
+#define I40E_AQ_LINK_UP			0x01    /* obsolete */
+#define I40E_AQ_MEDIA_AVAILABLE		0x40
+	u8	an_info;
+#define I40E_AQ_AN_COMPLETED		0x01
+#define I40E_AQ_LINK_PAUSE_TX		0x20
+#define I40E_AQ_LINK_PAUSE_RX		0x40
+#define I40E_AQ_QUALIFIED_MODULE	0x80
+	u8	ext_info;
+	u8	loopback; /* use defines from i40e_aqc_set_lb_mode */
+/* Since firmware API 1.7 loopback field keeps power class info as well */
+#define I40E_AQ_LOOPBACK_MASK		0x07
+	__le16	max_frame_size;
+	u8	config;
+#define I40E_AQ_CONFIG_FEC_KR_ENA	0x01
+#define I40E_AQ_CONFIG_FEC_RS_ENA	0x02
+#define I40E_AQ_CONFIG_CRC_ENA		0x04
+#define I40E_AQ_CONFIG_PACING_MASK	0x78
+	union {
+		struct {
+			u8	power_desc;
+			u8	reserved[4];
+		};
+		struct {
+			u8	link_type[4];
+			u8	link_type_ext;
+		};
+	};
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
+
+/* Set event mask command (direct 0x613) */
+struct i40e_aqc_set_phy_int_mask {
+	u8	reserved[8];
+	__le16	event_mask;
+#define I40E_AQ_EVENT_LINK_UPDOWN	0x0002
+#define I40E_AQ_EVENT_MEDIA_NA		0x0004
+#define I40E_AQ_EVENT_MODULE_QUAL_FAIL	0x0100
+	u8	reserved1[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_int_mask);
+
+/* Get Local AN advt register (direct 0x0614)
+ * Set Local AN advt register (direct 0x0615)
+ * Get Link Partner AN advt register (direct 0x0616)
+ */
+struct i40e_aqc_an_advt_reg {
+	__le32	local_an_reg0;
+	__le16	local_an_reg1;
+	u8	reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_an_advt_reg);
+
+/* Set Loopback mode (0x0618) */
+struct i40e_aqc_set_lb_mode {
+	__le16	lb_mode;
+#define I40E_LEGACY_LOOPBACK_NVM_VER	0x6000
+#define I40E_AQ_LB_MAC_LOCAL		0x01
+#define I40E_AQ_LB_PHY_LOCAL		0x05
+#define I40E_AQ_LB_PHY_REMOTE		0x06
+#define I40E_AQ_LB_MAC_LOCAL_LEGACY	0x04
+	u8	reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_lb_mode);
+
+/* Set PHY Debug command (0x0622) */
+struct i40e_aqc_set_phy_debug {
+	u8	command_flags;
+/* Disable link manageability on a single port */
+#define I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW	0x10
+/* Disable link manageability on all ports */
+#define I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW	0x20
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_debug);
+
+enum i40e_aq_phy_reg_type {
+	I40E_AQC_PHY_REG_INTERNAL	= 0x1,
+	I40E_AQC_PHY_REG_EXERNAL_BASET	= 0x2,
+	I40E_AQC_PHY_REG_EXERNAL_MODULE	= 0x3
+};
+
+/* Run PHY Activity (0x0626) */
+struct i40e_aqc_run_phy_activity {
+	__le16  activity_id;
+	u8      flags;
+	u8      reserved1;
+	__le32  control;
+	__le32  data;
+	u8      reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity);
+
+/* Set PHY Register command (0x0628) */
+/* Get PHY Register command (0x0629) */
+struct i40e_aqc_phy_register_access {
+	u8	phy_interface;
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL	1
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE	2
+	u8	dev_address;
+	u8	cmd_flags;
+#define I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE	0x01
+#define I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER	0x02
+#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT	2
+#define I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK	(0x3 << \
+		I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT)
+	u8	reserved1;
+	__le32	reg_address;
+	__le32	reg_value;
+	u8	reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct i40e_aqc_nvm_update {
+	u8	command_flags;
+#define I40E_AQ_NVM_LAST_CMD			0x01
+#define I40E_AQ_NVM_REARRANGE_TO_FLAT		0x20
+#define I40E_AQ_NVM_REARRANGE_TO_STRUCT		0x40
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT	1
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED	0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL	0x01
+	u8	module_pointer;
+	__le16	length;
+	__le32	offset;
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
+
+/* NVM Config Read (indirect 0x0704) */
+struct i40e_aqc_nvm_config_read {
+	__le16	cmd_flags;
+	__le16	element_count;
+	__le16	element_id;	/* Feature/field ID */
+	__le16	element_id_msw;	/* MSWord of field ID */
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_read);
+
+/* NVM Config Write (indirect 0x0705) */
+struct i40e_aqc_nvm_config_write {
+	__le16	cmd_flags;
+	__le16	element_count;
+	u8	reserved[4];
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_config_write);
+
+/* Used for 0x0704 as well as for 0x0705 commands */
+struct i40e_aqc_nvm_config_data_feature {
+	__le16 feature_id;
+	__le16 feature_options;
+	__le16 feature_selection;
+};
+
+I40E_CHECK_STRUCT_LEN(0x6, i40e_aqc_nvm_config_data_feature);
+
+struct i40e_aqc_nvm_config_data_immediate_field {
+	__le32 field_id;
+	__le32 field_value;
+	__le16 field_options;
+	__le16 reserved;
+};
+
+I40E_CHECK_STRUCT_LEN(0xc, i40e_aqc_nvm_config_data_immediate_field);
+
+/* OEM Post Update (indirect 0x0720)
+ * no command data struct used
+ */
+struct i40e_aqc_nvm_oem_post_update {
+	u8 sel_data;
+	u8 reserved[7];
+};
+
+I40E_CHECK_STRUCT_LEN(0x8, i40e_aqc_nvm_oem_post_update);
+
+struct i40e_aqc_nvm_oem_post_update_buffer {
+	u8 str_len;
+	u8 dev_addr;
+	__le16 eeprom_addr;
+	u8 data[36];
+};
+
+I40E_CHECK_STRUCT_LEN(0x28, i40e_aqc_nvm_oem_post_update_buffer);
+
+/* Thermal Sensor (indirect 0x0721)
+ *     read or set thermal sensor configs and values
+ *     takes a sensor and command specific data buffer, not detailed here
+ */
+struct i40e_aqc_thermal_sensor {
+	u8 sensor_action;
+	u8 reserved[7];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_thermal_sensor);
+
+/* Send to PF command (indirect 0x0801) id is only used by PF
+ * Send to VF command (indirect 0x0802) id is only used by PF
+ * Send to Peer PF command (indirect 0x0803)
+ */
+struct i40e_aqc_pf_vf_message {
+	__le32	id;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_pf_vf_message);
+
+/* Alternate structure */
+
+/* Direct write (direct 0x0900)
+ * Direct read (direct 0x0902)
+ */
+struct i40e_aqc_alternate_write {
+	__le32 address0;
+	__le32 data0;
+	__le32 address1;
+	__le32 data1;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write);
+
+/* Indirect write (indirect 0x0901)
+ * Indirect read (indirect 0x0903)
+ */
+
+struct i40e_aqc_alternate_ind_write {
+	__le32 address;
+	__le32 length;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_ind_write);
+
+/* Done alternate write (direct 0x0904)
+ * uses i40e_aq_desc
+ */
+struct i40e_aqc_alternate_write_done {
+	__le16	cmd_flags;
+	u8	reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write_done);
+
+/* Set OEM mode (direct 0x0905) */
+struct i40e_aqc_alternate_set_mode {
+	__le32	mode;
+	u8	reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_set_mode);
+
+/* Clear port Alternate RAM (direct 0x0906) uses i40e_aq_desc */
+
+/* async events 0x10xx */
+
+/* Lan Queue Overflow Event (direct, 0x1001) */
+struct i40e_aqc_lan_overflow {
+	__le32	prtdcb_rupto;
+	__le32	otx_ctl;
+	u8	reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lan_overflow);
+
+/* Get LLDP MIB (indirect 0x0A00) */
+struct i40e_aqc_lldp_get_mib {
+	u8	type;
+	u8	reserved1;
+#define I40E_AQ_LLDP_MIB_TYPE_MASK		0x3
+#define I40E_AQ_LLDP_MIB_LOCAL			0x0
+#define I40E_AQ_LLDP_MIB_REMOTE			0x1
+#define I40E_AQ_LLDP_BRIDGE_TYPE_MASK		0xC
+#define I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT		0x2
+#define I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE	0x0
+/* TX pause flags use I40E_AQ_LINK_TX_* above */
+	__le16	local_len;
+	__le16	remote_len;
+	u8	reserved2[2];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_get_mib);
+
+/* Configure LLDP MIB Change Event (direct 0x0A01)
+ * also used for the event (with type in the command field)
+ */
+struct i40e_aqc_lldp_update_mib {
+	u8	command;
+#define I40E_AQ_LLDP_MIB_UPDATE_DISABLE	0x1
+	u8	reserved[7];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_mib);
+
+/* Add LLDP TLV (indirect 0x0A02)
+ * Delete LLDP TLV (indirect 0x0A04)
+ */
+struct i40e_aqc_lldp_add_tlv {
+	u8	type; /* only nearest bridge and non-TPMR from 0x0A00 */
+	u8	reserved1[1];
+	__le16	len;
+	u8	reserved2[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_add_tlv);
+
+/* Update LLDP TLV (indirect 0x0A03) */
+struct i40e_aqc_lldp_update_tlv {
+	u8	type; /* only nearest bridge and non-TPMR from 0x0A00 */
+	u8	reserved;
+	__le16	old_len;
+	__le16	new_offset;
+	__le16	new_len;
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_tlv);
+
+/* Stop LLDP (direct 0x0A05) */
+struct i40e_aqc_lldp_stop {
+	u8	command;
+#define I40E_AQ_LLDP_AGENT_SHUTDOWN		0x1
+#define I40E_AQ_LLDP_AGENT_STOP_PERSIST		0x2
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop);
+
+/* Start LLDP (direct 0x0A06) */
+struct i40e_aqc_lldp_start {
+	u8	command;
+#define I40E_AQ_LLDP_AGENT_START		0x1
+#define I40E_AQ_LLDP_AGENT_START_PERSIST	0x2
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);
+
+/* Set DCB (direct 0x0303) */
+struct i40e_aqc_set_dcb_parameters {
+	u8 command;
+#define I40E_AQ_DCB_SET_AGENT	0x1
+#define I40E_DCB_VALID		0x1
+	u8 valid_flags;
+	u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_dcb_parameters);
+
+/* Get CEE DCBX Oper Config (0x0A07)
+ * uses the generic descriptor struct
+ * returns below as indirect response
+ */
+
+#define I40E_AQC_CEE_APP_FCOE_SHIFT	0x0
+#define I40E_AQC_CEE_APP_FCOE_MASK	(0x7 << I40E_AQC_CEE_APP_FCOE_SHIFT)
+#define I40E_AQC_CEE_APP_ISCSI_SHIFT	0x3
+#define I40E_AQC_CEE_APP_ISCSI_MASK	(0x7 << I40E_AQC_CEE_APP_ISCSI_SHIFT)
+#define I40E_AQC_CEE_APP_FIP_SHIFT	0x8
+#define I40E_AQC_CEE_APP_FIP_MASK	(0x7 << I40E_AQC_CEE_APP_FIP_SHIFT)
+
+#define I40E_AQC_CEE_PG_STATUS_SHIFT	0x0
+#define I40E_AQC_CEE_PG_STATUS_MASK	(0x7 << I40E_AQC_CEE_PG_STATUS_SHIFT)
+#define I40E_AQC_CEE_PFC_STATUS_SHIFT	0x3
+#define I40E_AQC_CEE_PFC_STATUS_MASK	(0x7 << I40E_AQC_CEE_PFC_STATUS_SHIFT)
+#define I40E_AQC_CEE_APP_STATUS_SHIFT	0x8
+#define I40E_AQC_CEE_APP_STATUS_MASK	(0x7 << I40E_AQC_CEE_APP_STATUS_SHIFT)
+#define I40E_AQC_CEE_FCOE_STATUS_SHIFT	0x8
+#define I40E_AQC_CEE_FCOE_STATUS_MASK	(0x7 << I40E_AQC_CEE_FCOE_STATUS_SHIFT)
+#define I40E_AQC_CEE_ISCSI_STATUS_SHIFT	0xB
+#define I40E_AQC_CEE_ISCSI_STATUS_MASK	(0x7 << I40E_AQC_CEE_ISCSI_STATUS_SHIFT)
+#define I40E_AQC_CEE_FIP_STATUS_SHIFT	0x10
+#define I40E_AQC_CEE_FIP_STATUS_MASK	(0x7 << I40E_AQC_CEE_FIP_STATUS_SHIFT)
+
+/* struct i40e_aqc_get_cee_dcb_cfg_v1_resp was originally defined with
+ * word boundary layout issues, which the Linux compilers silently deal
+ * with by adding padding, making the actual struct larger than designed.
+ * However, the FW compiler for the NIC is less lenient and complains
+ * about the struct.  Hence, the struct defined here has an extra byte in
+ * fields reserved3 and reserved4 to directly acknowledge that padding,
+ * and the new length is used in the length check macro.
+ */
+struct i40e_aqc_get_cee_dcb_cfg_v1_resp {
+	u8	reserved1;
+	u8	oper_num_tc;
+	u8	oper_prio_tc[4];
+	u8	reserved2;
+	u8	oper_tc_bw[8];
+	u8	oper_pfc_en;
+	u8	reserved3[2];
+	__le16	oper_app_prio;
+	u8	reserved4[2];
+	__le16	tlv_status;
+};
+
+I40E_CHECK_STRUCT_LEN(0x18, i40e_aqc_get_cee_dcb_cfg_v1_resp);
+
+struct i40e_aqc_get_cee_dcb_cfg_resp {
+	u8	oper_num_tc;
+	u8	oper_prio_tc[4];
+	u8	oper_tc_bw[8];
+	u8	oper_pfc_en;
+	__le16	oper_app_prio;
+#define I40E_AQC_CEE_APP_FCOE_SHIFT	0x0
+#define I40E_AQC_CEE_APP_FCOE_MASK	(0x7 << I40E_AQC_CEE_APP_FCOE_SHIFT)
+#define I40E_AQC_CEE_APP_ISCSI_SHIFT	0x3
+#define I40E_AQC_CEE_APP_ISCSI_MASK	(0x7 << I40E_AQC_CEE_APP_ISCSI_SHIFT)
+#define I40E_AQC_CEE_APP_FIP_SHIFT	0x8
+#define I40E_AQC_CEE_APP_FIP_MASK	(0x7 << I40E_AQC_CEE_APP_FIP_SHIFT)
+#define I40E_AQC_CEE_APP_FIP_MASK	(0x7 << I40E_AQC_CEE_APP_FIP_SHIFT)
+	__le32	tlv_status;
+#define I40E_AQC_CEE_PG_STATUS_SHIFT	0x0
+#define I40E_AQC_CEE_PG_STATUS_MASK	(0x7 << I40E_AQC_CEE_PG_STATUS_SHIFT)
+#define I40E_AQC_CEE_PFC_STATUS_SHIFT	0x3
+#define I40E_AQC_CEE_PFC_STATUS_MASK	(0x7 << I40E_AQC_CEE_PFC_STATUS_SHIFT)
+#define I40E_AQC_CEE_APP_STATUS_SHIFT	0x8
+#define I40E_AQC_CEE_APP_STATUS_MASK	(0x7 << I40E_AQC_CEE_APP_STATUS_SHIFT)
+	u8	reserved[12];
+};
+
+I40E_CHECK_STRUCT_LEN(0x20, i40e_aqc_get_cee_dcb_cfg_resp);
+
+/*	Set Local LLDP MIB (indirect 0x0A08)
+ *	Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_set_local_mib {
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT	0
+#define SET_LOCAL_MIB_AC_TYPE_DCBX_MASK	(1 << \
+					SET_LOCAL_MIB_AC_TYPE_DCBX_SHIFT)
+#define SET_LOCAL_MIB_AC_TYPE_LOCAL_MIB	0x0
+#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT	(1)
+#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_MASK	(1 << \
+				SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT)
+#define SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS		0x1
+	u8	type;
+	u8	reserved0;
+	__le16	length;
+	u8	reserved1[4];
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_set_local_mib);
+
+/*	Stop/Start LLDP Agent (direct 0x0A09)
+ *	Used for stopping/starting specific LLDP agent. e.g. DCBx
+ */
+struct i40e_aqc_lldp_stop_start_specific_agent {
+	u8	command;
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop_start_specific_agent);
+
+/* Restore LLDP Agent factory settings (direct 0x0A0A) */
+struct i40e_aqc_lldp_restore {
+	u8	command;
+#define I40E_AQ_LLDP_AGENT_RESTORE		0x1
+	u8	reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_restore);
+
+/* Add Udp Tunnel command and completion (direct 0x0B00) */
+struct i40e_aqc_add_udp_tunnel {
+	__le16	udp_port;
+	u8	reserved0[3];
+	u8	protocol_type;
+#define I40E_AQC_TUNNEL_TYPE_VXLAN	0x00
+#define I40E_AQC_TUNNEL_TYPE_NGE	0x01
+	u8	reserved1[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel);
+
+struct i40e_aqc_add_udp_tunnel_completion {
+	__le16	udp_port;
+	u8	filter_entry_index;
+	u8	multiple_pfs;
+	u8	total_filters;
+	u8	reserved[11];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel_completion);
+
+/* remove UDP Tunnel command (0x0B01) */
+struct i40e_aqc_remove_udp_tunnel {
+	u8	reserved[2];
+	u8	index; /* 0 to 15 */
+	u8	reserved2[13];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_udp_tunnel);
+
+struct i40e_aqc_del_udp_tunnel_completion {
+	__le16	udp_port;
+	u8	index; /* 0 to 15 */
+	u8	multiple_pfs;
+	u8	total_filters_used;
+	u8	reserved1[11];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion);
+
+struct i40e_aqc_get_set_rss_key {
+#define I40E_AQC_SET_RSS_KEY_VSI_VALID		BIT(15)
+#define I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT	0
+#define I40E_AQC_SET_RSS_KEY_VSI_ID_MASK	(0x3FF << \
+					I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT)
+	__le16	vsi_id;
+	u8	reserved[6];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_key);
+
+struct i40e_aqc_get_set_rss_key_data {
+	u8 standard_rss_key[0x28];
+	u8 extended_hash_key[0xc];
+};
+
+I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data);
+
+struct  i40e_aqc_get_set_rss_lut {
+#define I40E_AQC_SET_RSS_LUT_VSI_VALID		BIT(15)
+#define I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT	0
+#define I40E_AQC_SET_RSS_LUT_VSI_ID_MASK	(0x3FF << \
+					I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT)
+	__le16	vsi_id;
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT	0
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK	BIT(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
+
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI	0
+#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF	1
+	__le16	flags;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut);
+
+/* tunnel key structure 0x0B10 */
+
+struct i40e_aqc_tunnel_key_structure {
+	u8	key1_off;
+	u8	key2_off;
+	u8	key1_len;  /* 0 to 15 */
+	u8	key2_len;  /* 0 to 15 */
+	u8	flags;
+	u8	network_key_index;
+	u8	reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_tunnel_key_structure);
+
+/* OEM mode commands (direct 0xFE0x) */
+struct i40e_aqc_oem_param_change {
+	__le32	param_type;
+	__le32	param_value1;
+	__le16	param_value2;
+	u8	reserved[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_param_change);
+
+struct i40e_aqc_oem_state_change {
+	__le32	state;
+	u8	reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_state_change);
+
+/* Initialize OCSD (0xFE02, direct) */
+struct i40e_aqc_opc_oem_ocsd_initialize {
+	u8 type_status;
+	u8 reserved1[3];
+	__le32 ocsd_memory_block_addr_high;
+	__le32 ocsd_memory_block_addr_low;
+	__le32 requested_update_interval;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocsd_initialize);
+
+/* Initialize OCBB  (0xFE03, direct) */
+struct i40e_aqc_opc_oem_ocbb_initialize {
+	u8 type_status;
+	u8 reserved1[3];
+	__le32 ocbb_memory_block_addr_high;
+	__le32 ocbb_memory_block_addr_low;
+	u8 reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_opc_oem_ocbb_initialize);
+
+/* debug commands */
+
+/* get device id (0xFF00) uses the generic structure */
+
+/* set test more (0xFF01, internal) */
+
+struct i40e_acq_set_test_mode {
+	u8	mode;
+	u8	reserved[3];
+	u8	command;
+	u8	reserved2[3];
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_acq_set_test_mode);
+
+/* Debug Read Register command (0xFF03)
+ * Debug Write Register command (0xFF04)
+ */
+struct i40e_aqc_debug_reg_read_write {
+	__le32 reserved;
+	__le32 address;
+	__le32 value_high;
+	__le32 value_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_reg_read_write);
+
+/* Scatter/gather Reg Read  (indirect 0xFF05)
+ * Scatter/gather Reg Write (indirect 0xFF06)
+ */
+
+/* i40e_aq_desc is used for the command */
+struct i40e_aqc_debug_reg_sg_element_data {
+	__le32 address;
+	__le32 value;
+};
+
+/* Debug Modify register (direct 0xFF07) */
+struct i40e_aqc_debug_modify_reg {
+	__le32 address;
+	__le32 value;
+	__le32 clear_mask;
+	__le32 set_mask;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_reg);
+
+/* dump internal data (0xFF08, indirect) */
+struct i40e_aqc_debug_dump_internals {
+	u8	cluster_id;
+	u8	table_id;
+	__le16	data_size;
+	__le32	idx;
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_dump_internals);
+
+struct i40e_aqc_debug_modify_internals {
+	u8	cluster_id;
+	u8	cluster_specific_params[7];
+	__le32	address_high;
+	__le32	address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals);
+
+#endif /* _I40E_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
new file mode 100644
index 0000000000..a6c9a9e343
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_ALLOC_H_
+#define _I40E_ALLOC_H_
+
+struct i40e_hw;
+
+/* Memory allocation types */
+enum i40e_memory_type {
+	i40e_mem_arq_buf = 0,		/* ARQ indirect command buffer */
+	i40e_mem_asq_buf = 1,
+	i40e_mem_atq_buf = 2,		/* ATQ indirect command buffer */
+	i40e_mem_arq_ring = 3,		/* ARQ descriptor ring */
+	i40e_mem_atq_ring = 4,		/* ATQ descriptor ring */
+	i40e_mem_pd = 5,		/* Page Descriptor */
+	i40e_mem_bp = 6,		/* Backing Page - 4KB */
+	i40e_mem_bp_jumbo = 7,		/* Backing Page - > 4KB */
+	i40e_mem_reserved
+};
+
+/* prototype for functions used for dynamic memory allocation */
+int i40e_allocate_dma_mem(struct i40e_hw *hw,
+			  struct i40e_dma_mem *mem,
+			  enum i40e_memory_type type,
+			  u64 size, u32 alignment);
+int i40e_free_dma_mem(struct i40e_hw *hw,
+		      struct i40e_dma_mem *mem);
+int i40e_allocate_virt_mem(struct i40e_hw *hw,
+			   struct i40e_virt_mem *mem,
+			   u32 size);
+int i40e_free_virt_mem(struct i40e_hw *hw,
+		       struct i40e_virt_mem *mem);
+
+#endif /* _I40E_ALLOC_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
new file mode 100644
index 0000000000..639c5a1ca8
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include <linux/list.h>
+#include <linux/errno.h>
+#include <linux/net/intel/i40e_client.h>
+
+#include "i40e.h"
+#include "i40e_prototype.h"
+
+static LIST_HEAD(i40e_devices);
+static DEFINE_MUTEX(i40e_device_mutex);
+DEFINE_IDA(i40e_client_ida);
+
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+				     struct i40e_client *client,
+				     u32 vf_id, u8 *msg, u16 len);
+
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+				    struct i40e_client *client,
+				    struct i40e_qvlist_info *qvlist_info);
+
+static void i40e_client_request_reset(struct i40e_info *ldev,
+				      struct i40e_client *client,
+				      u32 reset_level);
+
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+				       struct i40e_client *client,
+				       bool is_vf, u32 vf_id,
+				       u32 flag, u32 valid_flag);
+
+static struct i40e_ops i40e_lan_ops = {
+	.virtchnl_send = i40e_client_virtchnl_send,
+	.setup_qvlist = i40e_client_setup_qvlist,
+	.request_reset = i40e_client_request_reset,
+	.update_vsi_ctxt = i40e_client_update_vsi_ctxt,
+};
+
+/**
+ * i40e_client_get_params - Get the params that can change at runtime
+ * @vsi: the VSI with the message
+ * @params: client param struct
+ *
+ **/
+static
+int i40e_client_get_params(struct i40e_vsi *vsi, struct i40e_params *params)
+{
+	struct i40e_dcbx_config *dcb_cfg = &vsi->back->hw.local_dcbx_config;
+	int i = 0;
+
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		u8 tc = dcb_cfg->etscfg.prioritytable[i];
+		u16 qs_handle;
+
+		/* If TC is not enabled for VSI use TC0 for UP */
+		if (!(vsi->tc_config.enabled_tc & BIT(tc)))
+			tc = 0;
+
+		qs_handle = le16_to_cpu(vsi->info.qs_handle[tc]);
+		params->qos.prio_qos[i].tc = tc;
+		params->qos.prio_qos[i].qs_handle = qs_handle;
+		if (qs_handle == I40E_AQ_VSI_QS_HANDLE_INVALID) {
+			dev_err(&vsi->back->pdev->dev, "Invalid queue set handle for TC = %d, vsi id = %d\n",
+				tc, vsi->id);
+			return -EINVAL;
+		}
+	}
+
+	params->mtu = vsi->netdev->mtu;
+	return 0;
+}
+
+/**
+ * i40e_notify_client_of_vf_msg - call the client vf message callback
+ * @vsi: the VSI with the message
+ * @vf_id: the absolute VF id that sent the message
+ * @msg: message buffer
+ * @len: length of the message
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void
+i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+	if (!cdev->client->ops || !cdev->client->ops->virtchnl_receive) {
+		dev_dbg(&pf->pdev->dev,
+			"Cannot locate client instance virtual channel receive routine\n");
+		return;
+	}
+	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+		dev_dbg(&pf->pdev->dev, "Client is not open, abort virtchnl_receive\n");
+		return;
+	}
+	cdev->client->ops->virtchnl_receive(&cdev->lan_info, cdev->client,
+					    vf_id, msg, len);
+}
+
+/**
+ * i40e_notify_client_of_l2_param_changes - call the client notify callback
+ * @vsi: the VSI with l2 param changes
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_client_instance *cdev = pf->cinst;
+	struct i40e_params params;
+
+	if (!cdev || !cdev->client)
+		return;
+	if (!cdev->client->ops || !cdev->client->ops->l2_param_change) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance l2_param_change routine\n");
+		return;
+	}
+	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+		dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n");
+		return;
+	}
+	memset(&params, 0, sizeof(params));
+	i40e_client_get_params(vsi, &params);
+	memcpy(&cdev->lan_info.params, &params, sizeof(struct i40e_params));
+	cdev->client->ops->l2_param_change(&cdev->lan_info, cdev->client,
+					   &params);
+}
+
+/**
+ * i40e_client_release_qvlist - release MSI-X vector mapping for client
+ * @ldev: pointer to L2 context.
+ *
+ **/
+static void i40e_client_release_qvlist(struct i40e_info *ldev)
+{
+	struct i40e_qvlist_info *qvlist_info = ldev->qvlist_info;
+	u32 i;
+
+	if (!ldev->qvlist_info)
+		return;
+
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		struct i40e_pf *pf = ldev->pf;
+		struct i40e_qv_info *qv_info;
+		u32 reg_idx;
+
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		reg_idx = I40E_PFINT_LNKLSTN(qv_info->v_idx - 1);
+		wr32(&pf->hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+	}
+	kfree(ldev->qvlist_info);
+	ldev->qvlist_info = NULL;
+}
+
+/**
+ * i40e_notify_client_of_netdev_close - call the client close callback
+ * @vsi: the VSI with netdev closed
+ * @reset: true when close called due to a reset pending
+ *
+ * If there is a client to this netdev, call the client with close
+ **/
+void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+	if (!cdev->client->ops || !cdev->client->ops->close) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance close routine\n");
+		return;
+	}
+	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+		dev_dbg(&pf->pdev->dev, "Client is not open, abort close\n");
+		return;
+	}
+	cdev->client->ops->close(&cdev->lan_info, cdev->client, reset);
+	clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+	i40e_client_release_qvlist(&cdev->lan_info);
+}
+
+/**
+ * i40e_notify_client_of_vf_reset - call the client vf reset callback
+ * @pf: PF device pointer
+ * @vf_id: asolute id of VF being reset
+ *
+ * If there is a client attached to this PF, notify when a VF is reset
+ **/
+void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id)
+{
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+	if (!cdev->client->ops || !cdev->client->ops->vf_reset) {
+		dev_dbg(&pf->pdev->dev,
+			"Cannot locate client instance VF reset routine\n");
+		return;
+	}
+	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,  &cdev->state)) {
+		dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-reset\n");
+		return;
+	}
+	cdev->client->ops->vf_reset(&cdev->lan_info, cdev->client, vf_id);
+}
+
+/**
+ * i40e_notify_client_of_vf_enable - call the client vf notification callback
+ * @pf: PF device pointer
+ * @num_vfs: the number of VFs currently enabled, 0 for disable
+ *
+ * If there is a client attached to this PF, call its VF notification routine
+ **/
+void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs)
+{
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+	if (!cdev->client->ops || !cdev->client->ops->vf_enable) {
+		dev_dbg(&pf->pdev->dev,
+			"Cannot locate client instance VF enable routine\n");
+		return;
+	}
+	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+		      &cdev->state)) {
+		dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-enable\n");
+		return;
+	}
+	cdev->client->ops->vf_enable(&cdev->lan_info, cdev->client, num_vfs);
+}
+
+/**
+ * i40e_vf_client_capable - ask the client if it likes the specified VF
+ * @pf: PF device pointer
+ * @vf_id: the VF in question
+ *
+ * If there is a client of the specified type attached to this PF, call
+ * its vf_capable routine
+ **/
+int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id)
+{
+	struct i40e_client_instance *cdev = pf->cinst;
+	int capable = false;
+
+	if (!cdev || !cdev->client)
+		goto out;
+	if (!cdev->client->ops || !cdev->client->ops->vf_capable) {
+		dev_dbg(&pf->pdev->dev,
+			"Cannot locate client instance VF capability routine\n");
+		goto out;
+	}
+	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state))
+		goto out;
+
+	capable = cdev->client->ops->vf_capable(&cdev->lan_info,
+						cdev->client,
+						vf_id);
+out:
+	return capable;
+}
+
+void i40e_client_update_msix_info(struct i40e_pf *pf)
+{
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev || !cdev->client)
+		return;
+
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+}
+
+static void i40e_auxiliary_dev_release(struct device *dev)
+{
+	struct i40e_auxiliary_device *i40e_aux_dev =
+			container_of(dev, struct i40e_auxiliary_device, aux_dev.dev);
+
+	ida_free(&i40e_client_ida, i40e_aux_dev->aux_dev.id);
+	kfree(i40e_aux_dev);
+}
+
+static int i40e_register_auxiliary_dev(struct i40e_info *ldev, const char *name)
+{
+	struct i40e_auxiliary_device *i40e_aux_dev;
+	struct pci_dev *pdev = ldev->pcidev;
+	struct auxiliary_device *aux_dev;
+	int ret;
+
+	i40e_aux_dev = kzalloc(sizeof(*i40e_aux_dev), GFP_KERNEL);
+	if (!i40e_aux_dev)
+		return -ENOMEM;
+
+	i40e_aux_dev->ldev = ldev;
+
+	aux_dev = &i40e_aux_dev->aux_dev;
+	aux_dev->name = name;
+	aux_dev->dev.parent = &pdev->dev;
+	aux_dev->dev.release = i40e_auxiliary_dev_release;
+	ldev->aux_dev = aux_dev;
+
+	ret = ida_alloc(&i40e_client_ida, GFP_KERNEL);
+	if (ret < 0) {
+		kfree(i40e_aux_dev);
+		return ret;
+	}
+	aux_dev->id = ret;
+
+	ret = auxiliary_device_init(aux_dev);
+	if (ret < 0) {
+		ida_free(&i40e_client_ida, aux_dev->id);
+		kfree(i40e_aux_dev);
+		return ret;
+	}
+
+	ret = auxiliary_device_add(aux_dev);
+	if (ret) {
+		auxiliary_device_uninit(aux_dev);
+		return ret;
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_client_add_instance - add a client instance struct to the instance list
+ * @pf: pointer to the board struct
+ *
+ **/
+static void i40e_client_add_instance(struct i40e_pf *pf)
+{
+	struct i40e_client_instance *cdev = NULL;
+	struct netdev_hw_addr *mac = NULL;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+
+	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
+	if (!cdev)
+		return;
+
+	cdev->lan_info.pf = (void *)pf;
+	cdev->lan_info.netdev = vsi->netdev;
+	cdev->lan_info.pcidev = pf->pdev;
+	cdev->lan_info.fid = pf->hw.pf_id;
+	cdev->lan_info.ftype = I40E_CLIENT_FTYPE_PF;
+	cdev->lan_info.hw_addr = pf->hw.hw_addr;
+	cdev->lan_info.ops = &i40e_lan_ops;
+	cdev->lan_info.version.major = I40E_CLIENT_VERSION_MAJOR;
+	cdev->lan_info.version.minor = I40E_CLIENT_VERSION_MINOR;
+	cdev->lan_info.version.build = I40E_CLIENT_VERSION_BUILD;
+	cdev->lan_info.fw_maj_ver = pf->hw.aq.fw_maj_ver;
+	cdev->lan_info.fw_min_ver = pf->hw.aq.fw_min_ver;
+	cdev->lan_info.fw_build = pf->hw.aq.fw_build;
+	set_bit(__I40E_CLIENT_INSTANCE_NONE, &cdev->state);
+
+	if (i40e_client_get_params(vsi, &cdev->lan_info.params))
+		goto free_cdev;
+
+	mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list,
+			       struct netdev_hw_addr, list);
+	if (mac)
+		ether_addr_copy(cdev->lan_info.lanmac, mac->addr);
+	else
+		dev_err(&pf->pdev->dev, "MAC address list is empty!\n");
+
+	pf->cinst = cdev;
+
+	cdev->lan_info.msix_count = pf->num_iwarp_msix;
+	cdev->lan_info.msix_entries = &pf->msix_entries[pf->iwarp_base_vector];
+
+	if (i40e_register_auxiliary_dev(&cdev->lan_info, "iwarp"))
+		goto free_cdev;
+
+	return;
+
+free_cdev:
+	kfree(cdev);
+	pf->cinst = NULL;
+}
+
+/**
+ * i40e_client_del_instance - removes a client instance from the list
+ * @pf: pointer to the board struct
+ *
+ **/
+static
+void i40e_client_del_instance(struct i40e_pf *pf)
+{
+	kfree(pf->cinst);
+	pf->cinst = NULL;
+}
+
+/**
+ * i40e_client_subtask - client maintenance work
+ * @pf: board private structure
+ **/
+void i40e_client_subtask(struct i40e_pf *pf)
+{
+	struct i40e_client *client;
+	struct i40e_client_instance *cdev;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	int ret = 0;
+
+	if (!test_and_clear_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state))
+		return;
+	cdev = pf->cinst;
+
+	/* If we're down or resetting, just bail */
+	if (test_bit(__I40E_DOWN, pf->state) ||
+	    test_bit(__I40E_CONFIG_BUSY, pf->state))
+		return;
+
+	if (!cdev || !cdev->client)
+		return;
+
+	client = cdev->client;
+
+	/* Here we handle client opens. If the client is down, and
+	 * the netdev is registered, then open the client.
+	 */
+	if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+		if (vsi->netdev_registered &&
+		    client->ops && client->ops->open) {
+			set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+			ret = client->ops->open(&cdev->lan_info, client);
+			if (ret) {
+				/* Remove failed client instance */
+				clear_bit(__I40E_CLIENT_INSTANCE_OPENED,
+					  &cdev->state);
+				return;
+			}
+		}
+	}
+
+	/* enable/disable PE TCP_ENA flag based on netdev down/up
+	 */
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		i40e_client_update_vsi_ctxt(&cdev->lan_info, client,
+					    0, 0, 0,
+					    I40E_CLIENT_VSI_FLAG_TCP_ENABLE);
+	else
+		i40e_client_update_vsi_ctxt(&cdev->lan_info, client,
+					    0, 0,
+					    I40E_CLIENT_VSI_FLAG_TCP_ENABLE,
+					    I40E_CLIENT_VSI_FLAG_TCP_ENABLE);
+}
+
+/**
+ * i40e_lan_add_device - add a lan device struct to the list of lan devices
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or none 0 on error
+ **/
+int i40e_lan_add_device(struct i40e_pf *pf)
+{
+	struct i40e_device *ldev;
+	int ret = 0;
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry(ldev, &i40e_devices, list) {
+		if (ldev->pf == pf) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ldev->pf = pf;
+	INIT_LIST_HEAD(&ldev->list);
+	list_add(&ldev->list, &i40e_devices);
+	dev_info(&pf->pdev->dev, "Added LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n",
+		 pf->hw.pf_id, pf->hw.bus.bus_id,
+		 pf->hw.bus.device, pf->hw.bus.func);
+
+	i40e_client_add_instance(pf);
+
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	i40e_service_event_schedule(pf);
+
+out:
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_lan_del_device - removes a lan device from the device list
+ * @pf: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int i40e_lan_del_device(struct i40e_pf *pf)
+{
+	struct auxiliary_device *aux_dev = pf->cinst->lan_info.aux_dev;
+	struct i40e_device *ldev, *tmp;
+	int ret = -ENODEV;
+
+	auxiliary_device_delete(aux_dev);
+	auxiliary_device_uninit(aux_dev);
+
+	/* First, remove any client instance. */
+	i40e_client_del_instance(pf);
+
+	mutex_lock(&i40e_device_mutex);
+	list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) {
+		if (ldev->pf == pf) {
+			dev_info(&pf->pdev->dev, "Deleted LAN device PF%d bus=0x%02x dev=0x%02x func=0x%02x\n",
+				 pf->hw.pf_id, pf->hw.bus.bus_id,
+				 pf->hw.bus.device, pf->hw.bus.func);
+			list_del(&ldev->list);
+			kfree(ldev);
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&i40e_device_mutex);
+	return ret;
+}
+
+/**
+ * i40e_client_virtchnl_send - TBD
+ * @ldev: pointer to L2 context
+ * @client: Client pointer
+ * @vf_id: absolute VF identifier
+ * @msg: message buffer
+ * @len: length of message buffer
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_virtchnl_send(struct i40e_info *ldev,
+				     struct i40e_client *client,
+				     u32 vf_id, u8 *msg, u16 len)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_hw *hw = &pf->hw;
+	int err;
+
+	err = i40e_aq_send_msg_to_vf(hw, vf_id, VIRTCHNL_OP_RDMA,
+				     0, msg, len, NULL);
+	if (err)
+		dev_err(&pf->pdev->dev, "Unable to send iWarp message to VF, error %d, aq status %d\n",
+			err, hw->aq.asq_last_status);
+
+	return err;
+}
+
+/**
+ * i40e_client_setup_qvlist
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @qvlist_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_setup_qvlist(struct i40e_info *ldev,
+				    struct i40e_client *client,
+				    struct i40e_qvlist_info *qvlist_info)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_qv_info *qv_info;
+	u32 v_idx, i, reg_idx, reg;
+
+	ldev->qvlist_info = kzalloc(struct_size(ldev->qvlist_info, qv_info,
+				    qvlist_info->num_vectors), GFP_KERNEL);
+	if (!ldev->qvlist_info)
+		return -ENOMEM;
+	ldev->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+
+		/* Validate vector id belongs to this client */
+		if ((v_idx >= (pf->iwarp_base_vector + pf->num_iwarp_msix)) ||
+		    (v_idx < pf->iwarp_base_vector))
+			goto err;
+
+		ldev->qvlist_info->qv_info[i] = *qv_info;
+		reg_idx = I40E_PFINT_LNKLSTN(v_idx - 1);
+
+		if (qv_info->ceq_idx == I40E_QUEUE_INVALID_IDX) {
+			/* Special case - No CEQ mapped on this vector */
+			wr32(hw, reg_idx, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK);
+		} else {
+			reg = (qv_info->ceq_idx &
+			       I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (I40E_QUEUE_TYPE_PE_CEQ <<
+			       I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+			wr32(hw, reg_idx, reg);
+
+			reg = (I40E_PFINT_CEQCTL_CAUSE_ENA_MASK |
+			       (v_idx << I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT) |
+			       (qv_info->itr_idx <<
+				I40E_PFINT_CEQCTL_ITR_INDX_SHIFT) |
+			       (I40E_QUEUE_END_OF_LIST <<
+				I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT));
+			wr32(hw, I40E_PFINT_CEQCTL(qv_info->ceq_idx), reg);
+		}
+		if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg = (I40E_PFINT_AEQCTL_CAUSE_ENA_MASK |
+			       (v_idx << I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT) |
+			       (qv_info->itr_idx <<
+				I40E_PFINT_AEQCTL_ITR_INDX_SHIFT));
+
+			wr32(hw, I40E_PFINT_AEQCTL, reg);
+		}
+	}
+	/* Mitigate sync problems with iwarp VF driver */
+	i40e_flush(hw);
+	return 0;
+err:
+	kfree(ldev->qvlist_info);
+	ldev->qvlist_info = NULL;
+	return -EINVAL;
+}
+
+/**
+ * i40e_client_request_reset
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @reset_level: reset level
+ **/
+static void i40e_client_request_reset(struct i40e_info *ldev,
+				      struct i40e_client *client,
+				      u32 reset_level)
+{
+	struct i40e_pf *pf = ldev->pf;
+
+	switch (reset_level) {
+	case I40E_CLIENT_RESET_LEVEL_PF:
+		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+		break;
+	case I40E_CLIENT_RESET_LEVEL_CORE:
+		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+		break;
+	default:
+		dev_warn(&pf->pdev->dev,
+			 "Client for PF id %d requested an unsupported reset: %d.\n",
+			 pf->hw.pf_id, reset_level);
+		break;
+	}
+
+	i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_client_update_vsi_ctxt
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @is_vf: if this for the VF
+ * @vf_id: if is_vf true this carries the vf_id
+ * @flag: Any device level setting that needs to be done for PE
+ * @valid_flag: Bits in this match up and enable changing of flag bits
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
+				       struct i40e_client *client,
+				       bool is_vf, u32 vf_id,
+				       u32 flag, u32 valid_flag)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_vsi_context ctxt;
+	bool update = true;
+	int err;
+
+	/* TODO: for now do not allow setting VF's VSI setting */
+	if (is_vf)
+		return -EINVAL;
+
+	ctxt.seid = pf->main_vsi_seid;
+	ctxt.pf_num = pf->hw.pf_id;
+	err = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+	if (err) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get PF vsi config, err %pe aq_err %s\n",
+			 ERR_PTR(err),
+			 i40e_aq_str(&pf->hw,
+				     pf->hw.aq.asq_last_status));
+		return -ENOENT;
+	}
+
+	if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) &&
+	    (flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) {
+		ctxt.info.valid_sections =
+			cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	} else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) &&
+		  !(flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) {
+		ctxt.info.valid_sections =
+			cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	} else {
+		update = false;
+		dev_warn(&pf->pdev->dev,
+			 "Client for PF id %d request an unsupported Config: %x.\n",
+			 pf->hw.pf_id, flag);
+	}
+
+	if (update) {
+		err = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "update VSI ctxt for PE failed, err %pe aq_err %s\n",
+				 ERR_PTR(err),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+		}
+	}
+	return err;
+}
+
+void i40e_client_device_register(struct i40e_info *ldev, struct i40e_client *client)
+{
+	struct i40e_pf *pf = ldev->pf;
+
+	pf->cinst->client = client;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	i40e_service_event_schedule(pf);
+}
+EXPORT_SYMBOL_GPL(i40e_client_device_register);
+
+void i40e_client_device_unregister(struct i40e_info *ldev)
+{
+	struct i40e_pf *pf = ldev->pf;
+	struct i40e_client_instance *cdev = pf->cinst;
+
+	if (!cdev)
+		return;
+
+	while (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
+		usleep_range(500, 1000);
+
+	if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
+		cdev->client->ops->close(&cdev->lan_info, cdev->client, false);
+		clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
+		i40e_client_release_qvlist(&cdev->lan_info);
+	}
+
+	pf->cinst->client = NULL;
+	clear_bit(__I40E_SERVICE_SCHED, pf->state);
+}
+EXPORT_SYMBOL_GPL(i40e_client_device_unregister);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
new file mode 100644
index 0000000000..1b493854f5
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -0,0 +1,6029 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#include "i40e.h"
+#include "i40e_type.h"
+#include "i40e_adminq.h"
+#include "i40e_prototype.h"
+#include <linux/avf/virtchnl.h>
+
+/**
+ * i40e_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the mac type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ **/
+int i40e_set_mac_type(struct i40e_hw *hw)
+{
+	int status = 0;
+
+	if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
+		switch (hw->device_id) {
+		case I40E_DEV_ID_SFP_XL710:
+		case I40E_DEV_ID_QEMU:
+		case I40E_DEV_ID_KX_B:
+		case I40E_DEV_ID_KX_C:
+		case I40E_DEV_ID_QSFP_A:
+		case I40E_DEV_ID_QSFP_B:
+		case I40E_DEV_ID_QSFP_C:
+		case I40E_DEV_ID_1G_BASE_T_BC:
+		case I40E_DEV_ID_5G_BASE_T_BC:
+		case I40E_DEV_ID_10G_BASE_T:
+		case I40E_DEV_ID_10G_BASE_T4:
+		case I40E_DEV_ID_10G_BASE_T_BC:
+		case I40E_DEV_ID_10G_B:
+		case I40E_DEV_ID_10G_SFP:
+		case I40E_DEV_ID_20G_KR2:
+		case I40E_DEV_ID_20G_KR2_A:
+		case I40E_DEV_ID_25G_B:
+		case I40E_DEV_ID_25G_SFP28:
+		case I40E_DEV_ID_X710_N3000:
+		case I40E_DEV_ID_XXV710_N3000:
+			hw->mac.type = I40E_MAC_XL710;
+			break;
+		case I40E_DEV_ID_KX_X722:
+		case I40E_DEV_ID_QSFP_X722:
+		case I40E_DEV_ID_SFP_X722:
+		case I40E_DEV_ID_1G_BASE_T_X722:
+		case I40E_DEV_ID_10G_BASE_T_X722:
+		case I40E_DEV_ID_SFP_I_X722:
+		case I40E_DEV_ID_SFP_X722_A:
+			hw->mac.type = I40E_MAC_X722;
+			break;
+		default:
+			hw->mac.type = I40E_MAC_GENERIC;
+			break;
+		}
+	} else {
+		status = -ENODEV;
+	}
+
+	hw_dbg(hw, "i40e_set_mac_type found mac: %d, returns: %d\n",
+		  hw->mac.type, status);
+	return status;
+}
+
+/**
+ * i40e_aq_str - convert AQ err code to a string
+ * @hw: pointer to the HW structure
+ * @aq_err: the AQ error code to convert
+ **/
+const char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err)
+{
+	switch (aq_err) {
+	case I40E_AQ_RC_OK:
+		return "OK";
+	case I40E_AQ_RC_EPERM:
+		return "I40E_AQ_RC_EPERM";
+	case I40E_AQ_RC_ENOENT:
+		return "I40E_AQ_RC_ENOENT";
+	case I40E_AQ_RC_ESRCH:
+		return "I40E_AQ_RC_ESRCH";
+	case I40E_AQ_RC_EINTR:
+		return "I40E_AQ_RC_EINTR";
+	case I40E_AQ_RC_EIO:
+		return "I40E_AQ_RC_EIO";
+	case I40E_AQ_RC_ENXIO:
+		return "I40E_AQ_RC_ENXIO";
+	case I40E_AQ_RC_E2BIG:
+		return "I40E_AQ_RC_E2BIG";
+	case I40E_AQ_RC_EAGAIN:
+		return "I40E_AQ_RC_EAGAIN";
+	case I40E_AQ_RC_ENOMEM:
+		return "I40E_AQ_RC_ENOMEM";
+	case I40E_AQ_RC_EACCES:
+		return "I40E_AQ_RC_EACCES";
+	case I40E_AQ_RC_EFAULT:
+		return "I40E_AQ_RC_EFAULT";
+	case I40E_AQ_RC_EBUSY:
+		return "I40E_AQ_RC_EBUSY";
+	case I40E_AQ_RC_EEXIST:
+		return "I40E_AQ_RC_EEXIST";
+	case I40E_AQ_RC_EINVAL:
+		return "I40E_AQ_RC_EINVAL";
+	case I40E_AQ_RC_ENOTTY:
+		return "I40E_AQ_RC_ENOTTY";
+	case I40E_AQ_RC_ENOSPC:
+		return "I40E_AQ_RC_ENOSPC";
+	case I40E_AQ_RC_ENOSYS:
+		return "I40E_AQ_RC_ENOSYS";
+	case I40E_AQ_RC_ERANGE:
+		return "I40E_AQ_RC_ERANGE";
+	case I40E_AQ_RC_EFLUSHED:
+		return "I40E_AQ_RC_EFLUSHED";
+	case I40E_AQ_RC_BAD_ADDR:
+		return "I40E_AQ_RC_BAD_ADDR";
+	case I40E_AQ_RC_EMODE:
+		return "I40E_AQ_RC_EMODE";
+	case I40E_AQ_RC_EFBIG:
+		return "I40E_AQ_RC_EFBIG";
+	}
+
+	snprintf(hw->err_str, sizeof(hw->err_str), "%d", aq_err);
+	return hw->err_str;
+}
+
+/**
+ * i40e_debug_aq
+ * @hw: debug mask related to admin queue
+ * @mask: debug mask
+ * @desc: pointer to admin queue descriptor
+ * @buffer: pointer to command buffer
+ * @buf_len: max length of buffer
+ *
+ * Dumps debug log about adminq command with descriptor contents.
+ **/
+void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
+		   void *buffer, u16 buf_len)
+{
+	struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
+	u32 effective_mask = hw->debug_mask & mask;
+	char prefix[27];
+	u16 len;
+	u8 *buf = (u8 *)buffer;
+
+	if (!effective_mask || !desc)
+		return;
+
+	len = le16_to_cpu(aq_desc->datalen);
+
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		   le16_to_cpu(aq_desc->opcode),
+		   le16_to_cpu(aq_desc->flags),
+		   le16_to_cpu(aq_desc->datalen),
+		   le16_to_cpu(aq_desc->retval));
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\tcookie (h,l) 0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->cookie_high),
+		   le32_to_cpu(aq_desc->cookie_low));
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\tparam (0,1)  0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->params.internal.param0),
+		   le32_to_cpu(aq_desc->params.internal.param1));
+	i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR,
+		   "\taddr (h,l)   0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->params.external.addr_high),
+		   le32_to_cpu(aq_desc->params.external.addr_low));
+
+	if (buffer && buf_len != 0 && len != 0 &&
+	    (effective_mask & I40E_DEBUG_AQ_DESC_BUFFER)) {
+		i40e_debug(hw, mask, "AQ CMD Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+
+		snprintf(prefix, sizeof(prefix),
+			 "i40e %02x:%02x.%x: \t0x",
+			 hw->bus.bus_id,
+			 hw->bus.device,
+			 hw->bus.func);
+
+		print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
+			       16, 1, buf, len, false);
+	}
+}
+
+/**
+ * i40e_check_asq_alive
+ * @hw: pointer to the hw struct
+ *
+ * Returns true if Queue is enabled else false.
+ **/
+bool i40e_check_asq_alive(struct i40e_hw *hw)
+{
+	if (hw->aq.asq.len)
+		return !!(rd32(hw, hw->aq.asq.len) &
+			  I40E_PF_ATQLEN_ATQENABLE_MASK);
+	else
+		return false;
+}
+
+/**
+ * i40e_aq_queue_shutdown
+ * @hw: pointer to the hw struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well.
+ **/
+int i40e_aq_queue_shutdown(struct i40e_hw *hw,
+			   bool unloading)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_queue_shutdown *cmd =
+		(struct i40e_aqc_queue_shutdown *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_queue_shutdown);
+
+	if (unloading)
+		cmd->driver_unloading = cpu_to_le32(I40E_AQ_DRIVER_UNLOADING);
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get or set RSS look up table
+ **/
+static int i40e_aq_get_set_rss_lut(struct i40e_hw *hw,
+				   u16 vsi_id, bool pf_lut,
+				   u8 *lut, u16 lut_size,
+				   bool set)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_get_set_rss_lut *cmd_resp =
+		   (struct i40e_aqc_get_set_rss_lut *)&desc.params.raw;
+	int status;
+
+	if (set)
+		i40e_fill_default_direct_cmd_desc(&desc,
+						  i40e_aqc_opc_set_rss_lut);
+	else
+		i40e_fill_default_direct_cmd_desc(&desc,
+						  i40e_aqc_opc_get_rss_lut);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
+
+	cmd_resp->vsi_id =
+			cpu_to_le16((u16)((vsi_id <<
+					  I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) &
+					  I40E_AQC_SET_RSS_LUT_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_LUT_VSI_VALID);
+
+	if (pf_lut)
+		cmd_resp->flags |= cpu_to_le16((u16)
+					((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF <<
+					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+	else
+		cmd_resp->flags |= cpu_to_le16((u16)
+					((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI <<
+					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+
+	status = i40e_asq_send_command(hw, &desc, lut, lut_size, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ *
+ * get the RSS lookup table, PF or VSI type
+ **/
+int i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+			bool pf_lut, u8 *lut, u16 lut_size)
+{
+	return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size,
+				       false);
+}
+
+/**
+ * i40e_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ *
+ * set the RSS lookup table, PF or VSI type
+ **/
+int i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 vsi_id,
+			bool pf_lut, u8 *lut, u16 lut_size)
+{
+	return i40e_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, true);
+}
+
+/**
+ * i40e_aq_get_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get the RSS key per VSI
+ **/
+static int i40e_aq_get_set_rss_key(struct i40e_hw *hw,
+				   u16 vsi_id,
+				   struct i40e_aqc_get_set_rss_key_data *key,
+				   bool set)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_get_set_rss_key *cmd_resp =
+			(struct i40e_aqc_get_set_rss_key *)&desc.params.raw;
+	u16 key_size = sizeof(struct i40e_aqc_get_set_rss_key_data);
+	int status;
+
+	if (set)
+		i40e_fill_default_direct_cmd_desc(&desc,
+						  i40e_aqc_opc_set_rss_key);
+	else
+		i40e_fill_default_direct_cmd_desc(&desc,
+						  i40e_aqc_opc_get_rss_key);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
+
+	cmd_resp->vsi_id =
+			cpu_to_le16((u16)((vsi_id <<
+					  I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) &
+					  I40E_AQC_SET_RSS_KEY_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_KEY_VSI_VALID);
+
+	status = i40e_asq_send_command(hw, &desc, key, key_size, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ *
+ **/
+int i40e_aq_get_rss_key(struct i40e_hw *hw,
+			u16 vsi_id,
+			struct i40e_aqc_get_set_rss_key_data *key)
+{
+	return i40e_aq_get_set_rss_key(hw, vsi_id, key, false);
+}
+
+/**
+ * i40e_aq_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ **/
+int i40e_aq_set_rss_key(struct i40e_hw *hw,
+			u16 vsi_id,
+			struct i40e_aqc_get_set_rss_key_data *key)
+{
+	return i40e_aq_get_set_rss_key(hw, vsi_id, key, true);
+}
+
+/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT i40e_ptype_lookup[ptype].known
+ * THEN
+ *      Packet is unknown
+ * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ *      Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ *      Use the enum i40e_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short, use explicit indexing with [PTYPE] */
+#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	[PTYPE] = { \
+		1, \
+		I40E_RX_PTYPE_OUTER_##OUTER_IP, \
+		I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		I40E_RX_PTYPE_##OUTER_FRAG, \
+		I40E_RX_PTYPE_TUNNEL_##T, \
+		I40E_RX_PTYPE_TUNNEL_END_##TE, \
+		I40E_RX_PTYPE_##TEF, \
+		I40E_RX_PTYPE_INNER_PROT_##I, \
+		I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define I40E_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define I40E_RX_PTYPE_NOF		I40E_RX_PTYPE_NOT_FRAG
+#define I40E_RX_PTYPE_FRG		I40E_RX_PTYPE_FRAG
+#define I40E_RX_PTYPE_INNER_PROT_TS	I40E_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping in the 8-bit HW PTYPE to the bit field for decoding */
+struct i40e_rx_ptype_decoded i40e_ptype_lookup[BIT(8)] = {
+	/* L2 Packet types */
+	I40E_PTT_UNUSED_ENTRY(0),
+	I40E_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
+	I40E_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT_UNUSED_ENTRY(4),
+	I40E_PTT_UNUSED_ENTRY(5),
+	I40E_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT_UNUSED_ENTRY(8),
+	I40E_PTT_UNUSED_ENTRY(9),
+	I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+	/* Non Tunneled IPv4 */
+	I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(25),
+	I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv4 */
+	I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(32),
+	I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv6 */
+	I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(39),
+	I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT */
+	I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> IPv4 */
+	I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(47),
+	I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> IPv6 */
+	I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(54),
+	I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC */
+	I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+	I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(62),
+	I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+	I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(69),
+	I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC/VLAN */
+	I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+	I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(77),
+	I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+	I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(84),
+	I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* Non Tunneled IPv6 */
+	I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(91),
+	I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv4 */
+	I40E_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(98),
+	I40E_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv6 */
+	I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(105),
+	I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT */
+	I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> IPv4 */
+	I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(113),
+	I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> IPv6 */
+	I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(120),
+	I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC */
+	I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+	I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(128),
+	I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+	I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(135),
+	I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN */
+	I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+	I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(143),
+	I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+	I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(150),
+	I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* unused entries */
+	[154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
+
+/**
+ * i40e_init_shared_code - Initialize the shared code
+ * @hw: pointer to hardware structure
+ *
+ * This assigns the MAC type and PHY code and inits the NVM.
+ * Does not touch the hardware. This function must be called prior to any
+ * other function in the shared code. The i40e_hw structure should be
+ * memset to 0 prior to calling this function.  The following fields in
+ * hw structure should be filled in prior to calling this function:
+ * hw_addr, back, device_id, vendor_id, subsystem_device_id,
+ * subsystem_vendor_id, and revision_id
+ **/
+int i40e_init_shared_code(struct i40e_hw *hw)
+{
+	u32 port, ari, func_rid;
+	int status = 0;
+
+	i40e_set_mac_type(hw);
+
+	switch (hw->mac.type) {
+	case I40E_MAC_XL710:
+	case I40E_MAC_X722:
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	hw->phy.get_link_info = true;
+
+	/* Determine port number and PF number*/
+	port = (rd32(hw, I40E_PFGEN_PORTNUM) & I40E_PFGEN_PORTNUM_PORT_NUM_MASK)
+					   >> I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT;
+	hw->port = (u8)port;
+	ari = (rd32(hw, I40E_GLPCI_CAPSUP) & I40E_GLPCI_CAPSUP_ARI_EN_MASK) >>
+						 I40E_GLPCI_CAPSUP_ARI_EN_SHIFT;
+	func_rid = rd32(hw, I40E_PF_FUNC_RID);
+	if (ari)
+		hw->pf_id = (u8)(func_rid & 0xff);
+	else
+		hw->pf_id = (u8)(func_rid & 0x7);
+
+	status = i40e_init_nvm(hw);
+	return status;
+}
+
+/**
+ * i40e_aq_mac_address_read - Retrieve the MAC addresses
+ * @hw: pointer to the hw struct
+ * @flags: a return indicator of what addresses were added to the addr store
+ * @addrs: the requestor's mac addr store
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+static int
+i40e_aq_mac_address_read(struct i40e_hw *hw,
+			 u16 *flags,
+			 struct i40e_aqc_mac_address_read_data *addrs,
+			 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_mac_address_read *cmd_data =
+		(struct i40e_aqc_mac_address_read *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_mac_address_read);
+	desc.flags |= cpu_to_le16(I40E_AQ_FLAG_BUF);
+
+	status = i40e_asq_send_command(hw, &desc, addrs,
+				       sizeof(*addrs), cmd_details);
+	*flags = le16_to_cpu(cmd_data->command_flags);
+
+	return status;
+}
+
+/**
+ * i40e_aq_mac_address_write - Change the MAC addresses
+ * @hw: pointer to the hw struct
+ * @flags: indicates which MAC to be written
+ * @mac_addr: address to write
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_mac_address_write(struct i40e_hw *hw,
+			      u16 flags, u8 *mac_addr,
+			      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_mac_address_write *cmd_data =
+		(struct i40e_aqc_mac_address_write *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_mac_address_write);
+	cmd_data->command_flags = cpu_to_le16(flags);
+	cmd_data->mac_sah = cpu_to_le16((u16)mac_addr[0] << 8 | mac_addr[1]);
+	cmd_data->mac_sal = cpu_to_le32(((u32)mac_addr[2] << 24) |
+					((u32)mac_addr[3] << 16) |
+					((u32)mac_addr[4] << 8) |
+					mac_addr[5]);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_get_mac_addr - get MAC address
+ * @hw: pointer to the HW structure
+ * @mac_addr: pointer to MAC address
+ *
+ * Reads the adapter's MAC address from register
+ **/
+int i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+{
+	struct i40e_aqc_mac_address_read_data addrs;
+	u16 flags = 0;
+	int status;
+
+	status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+
+	if (flags & I40E_AQC_LAN_ADDR_VALID)
+		ether_addr_copy(mac_addr, addrs.pf_lan_mac);
+
+	return status;
+}
+
+/**
+ * i40e_get_port_mac_addr - get Port MAC address
+ * @hw: pointer to the HW structure
+ * @mac_addr: pointer to Port MAC address
+ *
+ * Reads the adapter's Port MAC address
+ **/
+int i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+{
+	struct i40e_aqc_mac_address_read_data addrs;
+	u16 flags = 0;
+	int status;
+
+	status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+	if (status)
+		return status;
+
+	if (flags & I40E_AQC_PORT_ADDR_VALID)
+		ether_addr_copy(mac_addr, addrs.port_mac);
+	else
+		status = -EINVAL;
+
+	return status;
+}
+
+/**
+ * i40e_pre_tx_queue_cfg - pre tx queue configure
+ * @hw: pointer to the HW structure
+ * @queue: target PF queue index
+ * @enable: state change request
+ *
+ * Handles hw requirement to indicate intention to enable
+ * or disable target queue.
+ **/
+void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable)
+{
+	u32 abs_queue_idx = hw->func_caps.base_queue + queue;
+	u32 reg_block = 0;
+	u32 reg_val;
+
+	if (abs_queue_idx >= 128) {
+		reg_block = abs_queue_idx / 128;
+		abs_queue_idx %= 128;
+	}
+
+	reg_val = rd32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block));
+	reg_val &= ~I40E_GLLAN_TXPRE_QDIS_QINDX_MASK;
+	reg_val |= (abs_queue_idx << I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT);
+
+	if (enable)
+		reg_val |= I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK;
+	else
+		reg_val |= I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK;
+
+	wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), reg_val);
+}
+
+/**
+ *  i40e_read_pba_string - Reads part number string from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @pba_num: stores the part number string from the EEPROM
+ *  @pba_num_size: part number string buffer length
+ *
+ *  Reads the part number string from the EEPROM.
+ **/
+int i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
+			 u32 pba_num_size)
+{
+	u16 pba_word = 0;
+	u16 pba_size = 0;
+	u16 pba_ptr = 0;
+	int status = 0;
+	u16 i = 0;
+
+	status = i40e_read_nvm_word(hw, I40E_SR_PBA_FLAGS, &pba_word);
+	if (status || (pba_word != 0xFAFA)) {
+		hw_dbg(hw, "Failed to read PBA flags or flag is invalid.\n");
+		return status;
+	}
+
+	status = i40e_read_nvm_word(hw, I40E_SR_PBA_BLOCK_PTR, &pba_ptr);
+	if (status) {
+		hw_dbg(hw, "Failed to read PBA Block pointer.\n");
+		return status;
+	}
+
+	status = i40e_read_nvm_word(hw, pba_ptr, &pba_size);
+	if (status) {
+		hw_dbg(hw, "Failed to read PBA Block size.\n");
+		return status;
+	}
+
+	/* Subtract one to get PBA word count (PBA Size word is included in
+	 * total size)
+	 */
+	pba_size--;
+	if (pba_num_size < (((u32)pba_size * 2) + 1)) {
+		hw_dbg(hw, "Buffer too small for PBA data.\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < pba_size; i++) {
+		status = i40e_read_nvm_word(hw, (pba_ptr + 1) + i, &pba_word);
+		if (status) {
+			hw_dbg(hw, "Failed to read PBA Block word %d.\n", i);
+			return status;
+		}
+
+		pba_num[(i * 2)] = (pba_word >> 8) & 0xFF;
+		pba_num[(i * 2) + 1] = pba_word & 0xFF;
+	}
+	pba_num[(pba_size * 2)] = '\0';
+
+	return status;
+}
+
+/**
+ * i40e_get_media_type - Gets media type
+ * @hw: pointer to the hardware structure
+ **/
+static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
+{
+	enum i40e_media_type media;
+
+	switch (hw->phy.link_info.phy_type) {
+	case I40E_PHY_TYPE_10GBASE_SR:
+	case I40E_PHY_TYPE_10GBASE_LR:
+	case I40E_PHY_TYPE_1000BASE_SX:
+	case I40E_PHY_TYPE_1000BASE_LX:
+	case I40E_PHY_TYPE_40GBASE_SR4:
+	case I40E_PHY_TYPE_40GBASE_LR4:
+	case I40E_PHY_TYPE_25GBASE_LR:
+	case I40E_PHY_TYPE_25GBASE_SR:
+		media = I40E_MEDIA_TYPE_FIBER;
+		break;
+	case I40E_PHY_TYPE_100BASE_TX:
+	case I40E_PHY_TYPE_1000BASE_T:
+	case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS:
+	case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS:
+	case I40E_PHY_TYPE_10GBASE_T:
+		media = I40E_MEDIA_TYPE_BASET;
+		break;
+	case I40E_PHY_TYPE_10GBASE_CR1_CU:
+	case I40E_PHY_TYPE_40GBASE_CR4_CU:
+	case I40E_PHY_TYPE_10GBASE_CR1:
+	case I40E_PHY_TYPE_40GBASE_CR4:
+	case I40E_PHY_TYPE_10GBASE_SFPP_CU:
+	case I40E_PHY_TYPE_40GBASE_AOC:
+	case I40E_PHY_TYPE_10GBASE_AOC:
+	case I40E_PHY_TYPE_25GBASE_CR:
+	case I40E_PHY_TYPE_25GBASE_AOC:
+	case I40E_PHY_TYPE_25GBASE_ACC:
+		media = I40E_MEDIA_TYPE_DA;
+		break;
+	case I40E_PHY_TYPE_1000BASE_KX:
+	case I40E_PHY_TYPE_10GBASE_KX4:
+	case I40E_PHY_TYPE_10GBASE_KR:
+	case I40E_PHY_TYPE_40GBASE_KR4:
+	case I40E_PHY_TYPE_20GBASE_KR2:
+	case I40E_PHY_TYPE_25GBASE_KR:
+		media = I40E_MEDIA_TYPE_BACKPLANE;
+		break;
+	case I40E_PHY_TYPE_SGMII:
+	case I40E_PHY_TYPE_XAUI:
+	case I40E_PHY_TYPE_XFI:
+	case I40E_PHY_TYPE_XLAUI:
+	case I40E_PHY_TYPE_XLPPI:
+	default:
+		media = I40E_MEDIA_TYPE_UNKNOWN;
+		break;
+	}
+
+	return media;
+}
+
+/**
+ * i40e_poll_globr - Poll for Global Reset completion
+ * @hw: pointer to the hardware structure
+ * @retry_limit: how many times to retry before failure
+ **/
+static int i40e_poll_globr(struct i40e_hw *hw,
+			   u32 retry_limit)
+{
+	u32 cnt, reg = 0;
+
+	for (cnt = 0; cnt < retry_limit; cnt++) {
+		reg = rd32(hw, I40E_GLGEN_RSTAT);
+		if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+			return 0;
+		msleep(100);
+	}
+
+	hw_dbg(hw, "Global reset failed.\n");
+	hw_dbg(hw, "I40E_GLGEN_RSTAT = 0x%x\n", reg);
+
+	return -EIO;
+}
+
+#define I40E_PF_RESET_WAIT_COUNT_A0	200
+#define I40E_PF_RESET_WAIT_COUNT	200
+/**
+ * i40e_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * Assuming someone else has triggered a global reset,
+ * assure the global reset is complete and then reset the PF
+ **/
+int i40e_pf_reset(struct i40e_hw *hw)
+{
+	u32 cnt = 0;
+	u32 cnt1 = 0;
+	u32 reg = 0;
+	u32 grst_del;
+
+	/* Poll for Global Reset steady state in case of recent GRST.
+	 * The grst delay value is in 100ms units, and we'll wait a
+	 * couple counts longer to be sure we don't just miss the end.
+	 */
+	grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) &
+		    I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >>
+		    I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;
+
+	/* It can take upto 15 secs for GRST steady state.
+	 * Bump it to 16 secs max to be safe.
+	 */
+	grst_del = grst_del * 20;
+
+	for (cnt = 0; cnt < grst_del; cnt++) {
+		reg = rd32(hw, I40E_GLGEN_RSTAT);
+		if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+			break;
+		msleep(100);
+	}
+	if (reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+		hw_dbg(hw, "Global reset polling failed to complete.\n");
+		return -EIO;
+	}
+
+	/* Now Wait for the FW to be ready */
+	for (cnt1 = 0; cnt1 < I40E_PF_RESET_WAIT_COUNT; cnt1++) {
+		reg = rd32(hw, I40E_GLNVM_ULD);
+		reg &= (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK |
+			I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK);
+		if (reg == (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK |
+			    I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK)) {
+			hw_dbg(hw, "Core and Global modules ready %d\n", cnt1);
+			break;
+		}
+		usleep_range(10000, 20000);
+	}
+	if (!(reg & (I40E_GLNVM_ULD_CONF_CORE_DONE_MASK |
+		     I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK))) {
+		hw_dbg(hw, "wait for FW Reset complete timedout\n");
+		hw_dbg(hw, "I40E_GLNVM_ULD = 0x%x\n", reg);
+		return -EIO;
+	}
+
+	/* If there was a Global Reset in progress when we got here,
+	 * we don't need to do the PF Reset
+	 */
+	if (!cnt) {
+		u32 reg2 = 0;
+		if (hw->revision_id == 0)
+			cnt = I40E_PF_RESET_WAIT_COUNT_A0;
+		else
+			cnt = I40E_PF_RESET_WAIT_COUNT;
+		reg = rd32(hw, I40E_PFGEN_CTRL);
+		wr32(hw, I40E_PFGEN_CTRL,
+		     (reg | I40E_PFGEN_CTRL_PFSWR_MASK));
+		for (; cnt; cnt--) {
+			reg = rd32(hw, I40E_PFGEN_CTRL);
+			if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
+				break;
+			reg2 = rd32(hw, I40E_GLGEN_RSTAT);
+			if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK)
+				break;
+			usleep_range(1000, 2000);
+		}
+		if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+			if (i40e_poll_globr(hw, grst_del))
+				return -EIO;
+		} else if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+			hw_dbg(hw, "PF reset polling failed to complete.\n");
+			return -EIO;
+		}
+	}
+
+	i40e_clear_pxe_mode(hw);
+
+	return 0;
+}
+
+/**
+ * i40e_clear_hw - clear out any left over hw state
+ * @hw: pointer to the hw struct
+ *
+ * Clear queues and interrupts, typically called at init time,
+ * but after the capabilities have been found so we know how many
+ * queues and msix vectors have been allocated.
+ **/
+void i40e_clear_hw(struct i40e_hw *hw)
+{
+	u32 num_queues, base_queue;
+	u32 num_pf_int;
+	u32 num_vf_int;
+	u32 num_vfs;
+	u32 i, j;
+	u32 val;
+	u32 eol = 0x7ff;
+
+	/* get number of interrupts, queues, and VFs */
+	val = rd32(hw, I40E_GLPCI_CNF2);
+	num_pf_int = (val & I40E_GLPCI_CNF2_MSI_X_PF_N_MASK) >>
+		     I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT;
+	num_vf_int = (val & I40E_GLPCI_CNF2_MSI_X_VF_N_MASK) >>
+		     I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT;
+
+	val = rd32(hw, I40E_PFLAN_QALLOC);
+	base_queue = (val & I40E_PFLAN_QALLOC_FIRSTQ_MASK) >>
+		     I40E_PFLAN_QALLOC_FIRSTQ_SHIFT;
+	j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >>
+	    I40E_PFLAN_QALLOC_LASTQ_SHIFT;
+	if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue)
+		num_queues = (j - base_queue) + 1;
+	else
+		num_queues = 0;
+
+	val = rd32(hw, I40E_PF_VT_PFALLOC);
+	i = (val & I40E_PF_VT_PFALLOC_FIRSTVF_MASK) >>
+	    I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT;
+	j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >>
+	    I40E_PF_VT_PFALLOC_LASTVF_SHIFT;
+	if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i)
+		num_vfs = (j - i) + 1;
+	else
+		num_vfs = 0;
+
+	/* stop all the interrupts */
+	wr32(hw, I40E_PFINT_ICR0_ENA, 0);
+	val = 0x3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT;
+	for (i = 0; i < num_pf_int - 2; i++)
+		wr32(hw, I40E_PFINT_DYN_CTLN(i), val);
+
+	/* Set the FIRSTQ_INDX field to 0x7FF in PFINT_LNKLSTx */
+	val = eol << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT;
+	wr32(hw, I40E_PFINT_LNKLST0, val);
+	for (i = 0; i < num_pf_int - 2; i++)
+		wr32(hw, I40E_PFINT_LNKLSTN(i), val);
+	val = eol << I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT;
+	for (i = 0; i < num_vfs; i++)
+		wr32(hw, I40E_VPINT_LNKLST0(i), val);
+	for (i = 0; i < num_vf_int - 2; i++)
+		wr32(hw, I40E_VPINT_LNKLSTN(i), val);
+
+	/* warn the HW of the coming Tx disables */
+	for (i = 0; i < num_queues; i++) {
+		u32 abs_queue_idx = base_queue + i;
+		u32 reg_block = 0;
+
+		if (abs_queue_idx >= 128) {
+			reg_block = abs_queue_idx / 128;
+			abs_queue_idx %= 128;
+		}
+
+		val = rd32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block));
+		val &= ~I40E_GLLAN_TXPRE_QDIS_QINDX_MASK;
+		val |= (abs_queue_idx << I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT);
+		val |= I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK;
+
+		wr32(hw, I40E_GLLAN_TXPRE_QDIS(reg_block), val);
+	}
+	udelay(400);
+
+	/* stop all the queues */
+	for (i = 0; i < num_queues; i++) {
+		wr32(hw, I40E_QINT_TQCTL(i), 0);
+		wr32(hw, I40E_QTX_ENA(i), 0);
+		wr32(hw, I40E_QINT_RQCTL(i), 0);
+		wr32(hw, I40E_QRX_ENA(i), 0);
+	}
+
+	/* short wait for all queue disables to settle */
+	udelay(50);
+}
+
+/**
+ * i40e_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the hw struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ **/
+void i40e_clear_pxe_mode(struct i40e_hw *hw)
+{
+	u32 reg;
+
+	if (i40e_check_asq_alive(hw))
+		i40e_aq_clear_pxe_mode(hw, NULL);
+
+	/* Clear single descriptor fetch/write-back mode */
+	reg = rd32(hw, I40E_GLLAN_RCTL_0);
+
+	if (hw->revision_id == 0) {
+		/* As a work around clear PXE_MODE instead of setting it */
+		wr32(hw, I40E_GLLAN_RCTL_0, (reg & (~I40E_GLLAN_RCTL_0_PXE_MODE_MASK)));
+	} else {
+		wr32(hw, I40E_GLLAN_RCTL_0, (reg | I40E_GLLAN_RCTL_0_PXE_MODE_MASK));
+	}
+}
+
+/**
+ * i40e_led_is_mine - helper to find matching led
+ * @hw: pointer to the hw struct
+ * @idx: index into GPIO registers
+ *
+ * returns: 0 if no match, otherwise the value of the GPIO_CTL register
+ */
+static u32 i40e_led_is_mine(struct i40e_hw *hw, int idx)
+{
+	u32 gpio_val = 0;
+	u32 port;
+
+	if (!I40E_IS_X710TL_DEVICE(hw->device_id) &&
+	    !hw->func_caps.led[idx])
+		return 0;
+	gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(idx));
+	port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK) >>
+		I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT;
+
+	/* if PRT_NUM_NA is 1 then this LED is not port specific, OR
+	 * if it is not our port then ignore
+	 */
+	if ((gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK) ||
+	    (port != hw->port))
+		return 0;
+
+	return gpio_val;
+}
+
+#define I40E_FW_LED BIT(4)
+#define I40E_LED_MODE_VALID (I40E_GLGEN_GPIO_CTL_LED_MODE_MASK >> \
+			     I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+
+#define I40E_LED0 22
+
+#define I40E_PIN_FUNC_SDP 0x0
+#define I40E_PIN_FUNC_LED 0x1
+
+/**
+ * i40e_led_get - return current on/off mode
+ * @hw: pointer to the hw struct
+ *
+ * The value returned is the 'mode' field as defined in the
+ * GPIO register definitions: 0x0 = off, 0xf = on, and other
+ * values are variations of possible behaviors relating to
+ * blink, link, and wire.
+ **/
+u32 i40e_led_get(struct i40e_hw *hw)
+{
+	u32 mode = 0;
+	int i;
+
+	/* as per the documentation GPIO 22-29 are the LED
+	 * GPIO pins named LED0..LED7
+	 */
+	for (i = I40E_LED0; i <= I40E_GLGEN_GPIO_CTL_MAX_INDEX; i++) {
+		u32 gpio_val = i40e_led_is_mine(hw, i);
+
+		if (!gpio_val)
+			continue;
+
+		mode = (gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK) >>
+			I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT;
+		break;
+	}
+
+	return mode;
+}
+
+/**
+ * i40e_led_set - set new on/off mode
+ * @hw: pointer to the hw struct
+ * @mode: 0=off, 0xf=on (else see manual for mode details)
+ * @blink: true if the LED should blink when on, false if steady
+ *
+ * if this function is used to turn on the blink it should
+ * be used to disable the blink when restoring the original state.
+ **/
+void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
+{
+	int i;
+
+	if (mode & ~I40E_LED_MODE_VALID) {
+		hw_dbg(hw, "invalid mode passed in %X\n", mode);
+		return;
+	}
+
+	/* as per the documentation GPIO 22-29 are the LED
+	 * GPIO pins named LED0..LED7
+	 */
+	for (i = I40E_LED0; i <= I40E_GLGEN_GPIO_CTL_MAX_INDEX; i++) {
+		u32 gpio_val = i40e_led_is_mine(hw, i);
+
+		if (!gpio_val)
+			continue;
+
+		if (I40E_IS_X710TL_DEVICE(hw->device_id)) {
+			u32 pin_func = 0;
+
+			if (mode & I40E_FW_LED)
+				pin_func = I40E_PIN_FUNC_SDP;
+			else
+				pin_func = I40E_PIN_FUNC_LED;
+
+			gpio_val &= ~I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK;
+			gpio_val |= ((pin_func <<
+				     I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT) &
+				     I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK);
+		}
+		gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
+		/* this & is a bit of paranoia, but serves as a range check */
+		gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
+			     I40E_GLGEN_GPIO_CTL_LED_MODE_MASK);
+
+		if (blink)
+			gpio_val |= BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT);
+		else
+			gpio_val &= ~BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT);
+
+		wr32(hw, I40E_GLGEN_GPIO_CTL(i), gpio_val);
+		break;
+	}
+}
+
+/* Admin command wrappers */
+
+/**
+ * i40e_aq_get_phy_capabilities
+ * @hw: pointer to the hw struct
+ * @abilities: structure for PHY capabilities to be filled
+ * @qualified_modules: report Qualified Modules
+ * @report_init: report init capabilities (active are default)
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Returns the various PHY abilities supported on the Port.
+ **/
+int
+i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
+			     bool qualified_modules, bool report_init,
+			     struct i40e_aq_get_phy_abilities_resp *abilities,
+			     struct i40e_asq_cmd_details *cmd_details)
+{
+	u16 abilities_size = sizeof(struct i40e_aq_get_phy_abilities_resp);
+	u16 max_delay = I40E_MAX_PHY_TIMEOUT, total_delay = 0;
+	struct i40e_aq_desc desc;
+	int status;
+
+	if (!abilities)
+		return -EINVAL;
+
+	do {
+		i40e_fill_default_direct_cmd_desc(&desc,
+					       i40e_aqc_opc_get_phy_abilities);
+
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+		if (abilities_size > I40E_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+		if (qualified_modules)
+			desc.params.external.param0 |=
+			cpu_to_le32(I40E_AQ_PHY_REPORT_QUALIFIED_MODULES);
+
+		if (report_init)
+			desc.params.external.param0 |=
+			cpu_to_le32(I40E_AQ_PHY_REPORT_INITIAL_VALUES);
+
+		status = i40e_asq_send_command(hw, &desc, abilities,
+					       abilities_size, cmd_details);
+
+		switch (hw->aq.asq_last_status) {
+		case I40E_AQ_RC_EIO:
+			status = -EIO;
+			break;
+		case I40E_AQ_RC_EAGAIN:
+			usleep_range(1000, 2000);
+			total_delay++;
+			status = -EIO;
+			break;
+		/* also covers I40E_AQ_RC_OK */
+		default:
+			break;
+		}
+
+	} while ((hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) &&
+		(total_delay < max_delay));
+
+	if (status)
+		return status;
+
+	if (report_init) {
+		if (hw->mac.type ==  I40E_MAC_XL710 &&
+		    hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+		    hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+			status = i40e_aq_get_link_info(hw, true, NULL, NULL);
+		} else {
+			hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
+			hw->phy.phy_types |=
+					((u64)abilities->phy_type_ext << 32);
+		}
+	}
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_phy_config
+ * @hw: pointer to the hw struct
+ * @config: structure with PHY configuration to be set
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set the various PHY configuration parameters
+ * supported on the Port.One or more of the Set PHY config parameters may be
+ * ignored in an MFP mode as the PF may not have the privilege to set some
+ * of the PHY Config parameters. This status will be indicated by the
+ * command response.
+ **/
+int i40e_aq_set_phy_config(struct i40e_hw *hw,
+			   struct i40e_aq_set_phy_config *config,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aq_set_phy_config *cmd =
+			(struct i40e_aq_set_phy_config *)&desc.params.raw;
+	int status;
+
+	if (!config)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_phy_config);
+
+	*cmd = *config;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+static noinline_for_stack int
+i40e_set_fc_status(struct i40e_hw *hw,
+		   struct i40e_aq_get_phy_abilities_resp *abilities,
+		   bool atomic_restart)
+{
+	struct i40e_aq_set_phy_config config;
+	enum i40e_fc_mode fc_mode = hw->fc.requested_mode;
+	u8 pause_mask = 0x0;
+
+	switch (fc_mode) {
+	case I40E_FC_FULL:
+		pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_TX;
+		pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_RX;
+		break;
+	case I40E_FC_RX_PAUSE:
+		pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_RX;
+		break;
+	case I40E_FC_TX_PAUSE:
+		pause_mask |= I40E_AQ_PHY_FLAG_PAUSE_TX;
+		break;
+	default:
+		break;
+	}
+
+	memset(&config, 0, sizeof(struct i40e_aq_set_phy_config));
+	/* clear the old pause settings */
+	config.abilities = abilities->abilities & ~(I40E_AQ_PHY_FLAG_PAUSE_TX) &
+			   ~(I40E_AQ_PHY_FLAG_PAUSE_RX);
+	/* set the new abilities */
+	config.abilities |= pause_mask;
+	/* If the abilities have changed, then set the new config */
+	if (config.abilities == abilities->abilities)
+		return 0;
+
+	/* Auto restart link so settings take effect */
+	if (atomic_restart)
+		config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
+	/* Copy over all the old settings */
+	config.phy_type = abilities->phy_type;
+	config.phy_type_ext = abilities->phy_type_ext;
+	config.link_speed = abilities->link_speed;
+	config.eee_capability = abilities->eee_capability;
+	config.eeer = abilities->eeer_val;
+	config.low_power_ctrl = abilities->d3_lpan;
+	config.fec_config = abilities->fec_cfg_curr_mod_ext_info &
+			    I40E_AQ_PHY_FEC_CONFIG_MASK;
+
+	return i40e_aq_set_phy_config(hw, &config, NULL);
+}
+
+/**
+ * i40e_set_fc
+ * @hw: pointer to the hw struct
+ * @aq_failures: buffer to return AdminQ failure information
+ * @atomic_restart: whether to enable atomic link restart
+ *
+ * Set the requested flow control mode using set_phy_config.
+ **/
+int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+		bool atomic_restart)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	int status;
+
+	*aq_failures = 0x0;
+
+	/* Get the current phy config */
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					      NULL);
+	if (status) {
+		*aq_failures |= I40E_SET_FC_AQ_FAIL_GET;
+		return status;
+	}
+
+	status = i40e_set_fc_status(hw, &abilities, atomic_restart);
+	if (status)
+		*aq_failures |= I40E_SET_FC_AQ_FAIL_SET;
+
+	/* Update the link info */
+	status = i40e_update_link_info(hw);
+	if (status) {
+		/* Wait a little bit (on 40G cards it sometimes takes a really
+		 * long time for link to come back from the atomic reset)
+		 * and try once more
+		 */
+		msleep(1000);
+		status = i40e_update_link_info(hw);
+	}
+	if (status)
+		*aq_failures |= I40E_SET_FC_AQ_FAIL_UPDATE;
+
+	return status;
+}
+
+/**
+ * i40e_aq_clear_pxe_mode
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Tell the firmware that the driver is taking over from PXE
+ **/
+int i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_clear_pxe *cmd =
+		(struct i40e_aqc_clear_pxe *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_clear_pxe_mode);
+
+	cmd->rx_cnt = 0x2;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	wr32(hw, I40E_GLLAN_RCTL_0, 0x1);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_link_restart_an
+ * @hw: pointer to the hw struct
+ * @enable_link: if true: enable link, if false: disable link
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ **/
+int i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+				bool enable_link,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_link_restart_an *cmd =
+		(struct i40e_aqc_set_link_restart_an *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_link_restart_an);
+
+	cmd->command = I40E_AQ_PHY_RESTART_AN;
+	if (enable_link)
+		cmd->command |= I40E_AQ_PHY_LINK_ENABLE;
+	else
+		cmd->command &= ~I40E_AQ_PHY_LINK_ENABLE;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_link_info
+ * @hw: pointer to the hw struct
+ * @enable_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Returns the link status of the adapter.
+ **/
+int i40e_aq_get_link_info(struct i40e_hw *hw,
+			  bool enable_lse, struct i40e_link_status *link,
+			  struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_get_link_status *resp =
+		(struct i40e_aqc_get_link_status *)&desc.params.raw;
+	struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+	bool tx_pause, rx_pause;
+	u16 command_flags;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status);
+
+	if (enable_lse)
+		command_flags = I40E_AQ_LSE_ENABLE;
+	else
+		command_flags = I40E_AQ_LSE_DISABLE;
+	resp->command_flags = cpu_to_le16(command_flags);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (status)
+		goto aq_get_link_info_exit;
+
+	/* save off old link status information */
+	hw->phy.link_info_old = *hw_link_info;
+
+	/* update link status */
+	hw_link_info->phy_type = (enum i40e_aq_phy_type)resp->phy_type;
+	hw->phy.media_type = i40e_get_media_type(hw);
+	hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed;
+	hw_link_info->link_info = resp->link_info;
+	hw_link_info->an_info = resp->an_info;
+	hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA |
+						 I40E_AQ_CONFIG_FEC_RS_ENA);
+	hw_link_info->ext_info = resp->ext_info;
+	hw_link_info->loopback = resp->loopback & I40E_AQ_LOOPBACK_MASK;
+	hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size);
+	hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK;
+
+	/* update fc info */
+	tx_pause = !!(resp->an_info & I40E_AQ_LINK_PAUSE_TX);
+	rx_pause = !!(resp->an_info & I40E_AQ_LINK_PAUSE_RX);
+	if (tx_pause & rx_pause)
+		hw->fc.current_mode = I40E_FC_FULL;
+	else if (tx_pause)
+		hw->fc.current_mode = I40E_FC_TX_PAUSE;
+	else if (rx_pause)
+		hw->fc.current_mode = I40E_FC_RX_PAUSE;
+	else
+		hw->fc.current_mode = I40E_FC_NONE;
+
+	if (resp->config & I40E_AQ_CONFIG_CRC_ENA)
+		hw_link_info->crc_enable = true;
+	else
+		hw_link_info->crc_enable = false;
+
+	if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_IS_ENABLED))
+		hw_link_info->lse_enable = true;
+	else
+		hw_link_info->lse_enable = false;
+
+	if ((hw->mac.type == I40E_MAC_XL710) &&
+	    (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
+	     hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
+		hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE &&
+	    hw->mac.type != I40E_MAC_X722) {
+		__le32 tmp;
+
+		memcpy(&tmp, resp->link_type, sizeof(tmp));
+		hw->phy.phy_types = le32_to_cpu(tmp);
+		hw->phy.phy_types |= ((u64)resp->link_type_ext << 32);
+	}
+
+	/* save link status information */
+	if (link)
+		*link = *hw_link_info;
+
+	/* flag cleared so helper functions don't call AQ again */
+	hw->phy.get_link_info = false;
+
+aq_get_link_info_exit:
+	return status;
+}
+
+/**
+ * i40e_aq_set_phy_int_mask
+ * @hw: pointer to the hw struct
+ * @mask: interrupt mask to be set
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set link interrupt mask.
+ **/
+int i40e_aq_set_phy_int_mask(struct i40e_hw *hw,
+			     u16 mask,
+			     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_phy_int_mask *cmd =
+		(struct i40e_aqc_set_phy_int_mask *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_phy_int_mask);
+
+	cmd->event_mask = cpu_to_le16(mask);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+int i40e_aq_set_mac_loopback(struct i40e_hw *hw, bool ena_lpbk,
+			     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_lb_mode *cmd =
+		(struct i40e_aqc_set_lb_mode *)&desc.params.raw;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_lb_modes);
+	if (ena_lpbk) {
+		if (hw->nvm.version <= I40E_LEGACY_LOOPBACK_NVM_VER)
+			cmd->lb_mode = cpu_to_le16(I40E_AQ_LB_MAC_LOCAL_LEGACY);
+		else
+			cmd->lb_mode = cpu_to_le16(I40E_AQ_LB_MAC_LOCAL);
+	}
+
+	return i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+}
+
+/**
+ * i40e_aq_set_phy_debug
+ * @hw: pointer to the hw struct
+ * @cmd_flags: debug command flags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Reset the external PHY.
+ **/
+int i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
+			  struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_phy_debug *cmd =
+		(struct i40e_aqc_set_phy_debug *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_phy_debug);
+
+	cmd->command_flags = cmd_flags;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_is_aq_api_ver_ge
+ * @aq: pointer to AdminQ info containing HW API version to compare
+ * @maj: API major value
+ * @min: API minor value
+ *
+ * Assert whether current HW API version is greater/equal than provided.
+ **/
+static bool i40e_is_aq_api_ver_ge(struct i40e_adminq_info *aq, u16 maj,
+				  u16 min)
+{
+	return (aq->api_maj_ver > maj ||
+		(aq->api_maj_ver == maj && aq->api_min_ver >= min));
+}
+
+/**
+ * i40e_aq_add_vsi
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware.
+**/
+int i40e_aq_add_vsi(struct i40e_hw *hw,
+		    struct i40e_vsi_context *vsi_ctx,
+		    struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_get_update_vsi *cmd =
+		(struct i40e_aqc_add_get_update_vsi *)&desc.params.raw;
+	struct i40e_aqc_add_get_update_vsi_completion *resp =
+		(struct i40e_aqc_add_get_update_vsi_completion *)
+		&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_add_vsi);
+
+	cmd->uplink_seid = cpu_to_le16(vsi_ctx->uplink_seid);
+	cmd->connection_type = vsi_ctx->connection_type;
+	cmd->vf_id = vsi_ctx->vf_num;
+	cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
+
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+
+	status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info,
+					      sizeof(vsi_ctx->info),
+					      cmd_details, true);
+
+	if (status)
+		goto aq_add_vsi_exit;
+
+	vsi_ctx->seid = le16_to_cpu(resp->seid);
+	vsi_ctx->vsi_number = le16_to_cpu(resp->vsi_number);
+	vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used);
+	vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+
+aq_add_vsi_exit:
+	return status;
+}
+
+/**
+ * i40e_aq_set_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_set_default_vsi(struct i40e_hw *hw,
+			    u16 seid,
+			    struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)
+		&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	cmd->promiscuous_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_clear_default_vsi
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_clear_default_vsi(struct i40e_hw *hw,
+			      u16 seid,
+			      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)
+		&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	cmd->promiscuous_flags = cpu_to_le16(0);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_DEFAULT);
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_vsi_unicast_promiscuous
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set: set unicast promiscuous enable/disable
+ * @cmd_details: pointer to command details structure or NULL
+ * @rx_only_promisc: flag to decide if egress traffic gets mirrored in promisc
+ **/
+int i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+					u16 seid, bool set,
+					struct i40e_asq_cmd_details *cmd_details,
+					bool rx_only_promisc)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	u16 flags = 0;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	if (set) {
+		flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
+		if (rx_only_promisc && i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+			flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY;
+	}
+
+	cmd->promiscuous_flags = cpu_to_le16(flags);
+
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
+	if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+		cmd->valid_flags |=
+			cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY);
+
+	cmd->seid = cpu_to_le16(seid);
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_vsi_multicast_promiscuous
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set: set multicast promiscuous enable/disable
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+					  u16 seid, bool set,
+					  struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	u16 flags = 0;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	if (set)
+		flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST;
+
+	cmd->promiscuous_flags = cpu_to_le16(flags);
+
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_MULTICAST);
+
+	cmd->seid = cpu_to_le16(seid);
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_vsi_mc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN
+ * @vid: The VLAN tag filter - capture any multicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+				       u16 seid, bool enable,
+				       u16 vid,
+				       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	u16 flags = 0;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	if (enable)
+		flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST;
+
+	cmd->promiscuous_flags = cpu_to_le16(flags);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_MULTICAST);
+	cmd->seid = cpu_to_le16(seid);
+	cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+	status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0,
+					      cmd_details, true);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_vsi_uc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN
+ * @vid: The VLAN tag filter - capture any unicast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+				       u16 seid, bool enable,
+				       u16 vid,
+				       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	u16 flags = 0;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	if (enable) {
+		flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
+		if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+			flags |= I40E_AQC_SET_VSI_PROMISC_RX_ONLY;
+	}
+
+	cmd->promiscuous_flags = cpu_to_le16(flags);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
+	if (i40e_is_aq_api_ver_ge(&hw->aq, 1, 5))
+		cmd->valid_flags |=
+			cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_RX_ONLY);
+	cmd->seid = cpu_to_le16(seid);
+	cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+	status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0,
+					      cmd_details, true);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_vsi_bc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set broadcast promiscuous enable/disable for a given VLAN
+ * @vid: The VLAN tag filter - capture any broadcast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+				       u16 seid, bool enable, u16 vid,
+				       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	u16 flags = 0;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	if (enable)
+		flags |= I40E_AQC_SET_VSI_PROMISC_BROADCAST;
+
+	cmd->promiscuous_flags = cpu_to_le16(flags);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+	cmd->seid = cpu_to_le16(seid);
+	cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_vsi_broadcast
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set_filter: true to set filter, false to clear filter
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set or clear the broadcast promiscuous flag (filter) for a given VSI.
+ **/
+int i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+			      u16 seid, bool set_filter,
+			      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	if (set_filter)
+		cmd->promiscuous_flags
+			    |= cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+	else
+		cmd->promiscuous_flags
+			    &= cpu_to_le16(~I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+	cmd->seid = cpu_to_le16(seid);
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_vsi_vlan_promisc - control the VLAN promiscuous setting
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
+				 u16 seid, bool enable,
+				 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	u16 flags = 0;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					i40e_aqc_opc_set_vsi_promiscuous_modes);
+	if (enable)
+		flags |= I40E_AQC_SET_VSI_PROMISC_VLAN;
+
+	cmd->promiscuous_flags = cpu_to_le16(flags);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_VLAN);
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_vsi_params - get VSI configuration info
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_get_vsi_params(struct i40e_hw *hw,
+			   struct i40e_vsi_context *vsi_ctx,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_get_update_vsi *cmd =
+		(struct i40e_aqc_add_get_update_vsi *)&desc.params.raw;
+	struct i40e_aqc_add_get_update_vsi_completion *resp =
+		(struct i40e_aqc_add_get_update_vsi_completion *)
+		&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_get_vsi_parameters);
+
+	cmd->uplink_seid = cpu_to_le16(vsi_ctx->seid);
+
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+
+	status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
+				    sizeof(vsi_ctx->info), NULL);
+
+	if (status)
+		goto aq_get_vsi_params_exit;
+
+	vsi_ctx->seid = le16_to_cpu(resp->seid);
+	vsi_ctx->vsi_number = le16_to_cpu(resp->vsi_number);
+	vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used);
+	vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+
+aq_get_vsi_params_exit:
+	return status;
+}
+
+/**
+ * i40e_aq_update_vsi_params
+ * @hw: pointer to the hw struct
+ * @vsi_ctx: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Update a VSI context.
+ **/
+int i40e_aq_update_vsi_params(struct i40e_hw *hw,
+			      struct i40e_vsi_context *vsi_ctx,
+			      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_get_update_vsi *cmd =
+		(struct i40e_aqc_add_get_update_vsi *)&desc.params.raw;
+	struct i40e_aqc_add_get_update_vsi_completion *resp =
+		(struct i40e_aqc_add_get_update_vsi_completion *)
+		&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_update_vsi_parameters);
+	cmd->uplink_seid = cpu_to_le16(vsi_ctx->seid);
+
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+
+	status = i40e_asq_send_command_atomic(hw, &desc, &vsi_ctx->info,
+					      sizeof(vsi_ctx->info),
+					      cmd_details, true);
+
+	vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used);
+	vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_switch_config
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of input buffer
+ * @start_seid: seid to start for the report, 0 == beginning
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Fill the buf with switch configuration returned from AdminQ command
+ **/
+int i40e_aq_get_switch_config(struct i40e_hw *hw,
+			      struct i40e_aqc_get_switch_config_resp *buf,
+			      u16 buf_size, u16 *start_seid,
+			      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_switch_seid *scfg =
+		(struct i40e_aqc_switch_seid *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_get_switch_config);
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	if (buf_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+	scfg->seid = cpu_to_le16(*start_seid);
+
+	status = i40e_asq_send_command(hw, &desc, buf, buf_size, cmd_details);
+	*start_seid = le16_to_cpu(scfg->seid);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_switch_config
+ * @hw: pointer to the hardware structure
+ * @flags: bit flag values to set
+ * @mode: cloud filter mode
+ * @valid_flags: which bit flags to set
+ * @mode: cloud filter mode
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set switch configuration bits
+ **/
+int i40e_aq_set_switch_config(struct i40e_hw *hw,
+			      u16 flags,
+			      u16 valid_flags, u8 mode,
+			      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_switch_config *scfg =
+		(struct i40e_aqc_set_switch_config *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_switch_config);
+	scfg->flags = cpu_to_le16(flags);
+	scfg->valid_flags = cpu_to_le16(valid_flags);
+	scfg->mode = mode;
+	if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) {
+		scfg->switch_tag = cpu_to_le16(hw->switch_tag);
+		scfg->first_tag = cpu_to_le16(hw->first_tag);
+		scfg->second_tag = cpu_to_le16(hw->second_tag);
+	}
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_firmware_version
+ * @hw: pointer to the hw struct
+ * @fw_major_version: firmware major version
+ * @fw_minor_version: firmware minor version
+ * @fw_build: firmware build number
+ * @api_major_version: major queue version
+ * @api_minor_version: minor queue version
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the firmware version from the admin queue commands
+ **/
+int i40e_aq_get_firmware_version(struct i40e_hw *hw,
+				 u16 *fw_major_version, u16 *fw_minor_version,
+				 u32 *fw_build,
+				 u16 *api_major_version, u16 *api_minor_version,
+				 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_get_version *resp =
+		(struct i40e_aqc_get_version *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_version);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (!status) {
+		if (fw_major_version)
+			*fw_major_version = le16_to_cpu(resp->fw_major);
+		if (fw_minor_version)
+			*fw_minor_version = le16_to_cpu(resp->fw_minor);
+		if (fw_build)
+			*fw_build = le32_to_cpu(resp->fw_build);
+		if (api_major_version)
+			*api_major_version = le16_to_cpu(resp->api_major);
+		if (api_minor_version)
+			*api_minor_version = le16_to_cpu(resp->api_minor);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_aq_send_driver_version
+ * @hw: pointer to the hw struct
+ * @dv: driver's major, minor version
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Send the driver version to the firmware
+ **/
+int i40e_aq_send_driver_version(struct i40e_hw *hw,
+				struct i40e_driver_version *dv,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_driver_version *cmd =
+		(struct i40e_aqc_driver_version *)&desc.params.raw;
+	int status;
+	u16 len;
+
+	if (dv == NULL)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_driver_version);
+
+	desc.flags |= cpu_to_le16(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD);
+	cmd->driver_major_ver = dv->major_version;
+	cmd->driver_minor_ver = dv->minor_version;
+	cmd->driver_build_ver = dv->build_version;
+	cmd->driver_subbuild_ver = dv->subbuild_version;
+
+	len = 0;
+	while (len < sizeof(dv->driver_string) &&
+	       (dv->driver_string[len] < 0x80) &&
+	       dv->driver_string[len])
+		len++;
+	status = i40e_asq_send_command(hw, &desc, dv->driver_string,
+				       len, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_get_link_status - get status of the HW network link
+ * @hw: pointer to the hw struct
+ * @link_up: pointer to bool (true/false = linkup/linkdown)
+ *
+ * Variable link_up true if link is up, false if link is down.
+ * The variable link_up is invalid if returned value of status != 0
+ *
+ * Side effect: LinkStatusEvent reporting becomes enabled
+ **/
+int i40e_get_link_status(struct i40e_hw *hw, bool *link_up)
+{
+	int status = 0;
+
+	if (hw->phy.get_link_info) {
+		status = i40e_update_link_info(hw);
+
+		if (status)
+			i40e_debug(hw, I40E_DEBUG_LINK, "get link failed: status %d\n",
+				   status);
+	}
+
+	*link_up = hw->phy.link_info.link_info & I40E_AQ_LINK_UP;
+
+	return status;
+}
+
+/**
+ * i40e_update_link_info - update status of the HW network link
+ * @hw: pointer to the hw struct
+ **/
+noinline_for_stack int i40e_update_link_info(struct i40e_hw *hw)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	int status = 0;
+
+	status = i40e_aq_get_link_info(hw, true, NULL, NULL);
+	if (status)
+		return status;
+
+	/* extra checking needed to ensure link info to user is timely */
+	if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+	    ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) ||
+	     !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) {
+		status = i40e_aq_get_phy_capabilities(hw, false, false,
+						      &abilities, NULL);
+		if (status)
+			return status;
+
+		if (abilities.fec_cfg_curr_mod_ext_info &
+		    I40E_AQ_ENABLE_FEC_AUTO)
+			hw->phy.link_info.req_fec_info =
+				(I40E_AQ_REQUEST_FEC_KR |
+				 I40E_AQ_REQUEST_FEC_RS);
+		else
+			hw->phy.link_info.req_fec_info =
+				abilities.fec_cfg_curr_mod_ext_info &
+				(I40E_AQ_REQUEST_FEC_KR |
+				 I40E_AQ_REQUEST_FEC_RS);
+
+		memcpy(hw->phy.link_info.module_type, &abilities.module_type,
+		       sizeof(hw->phy.link_info.module_type));
+	}
+
+	return status;
+}
+
+/**
+ * i40e_aq_add_veb - Insert a VEB between the VSI and the MAC
+ * @hw: pointer to the hw struct
+ * @uplink_seid: the MAC or other gizmo SEID
+ * @downlink_seid: the VSI SEID
+ * @enabled_tc: bitmap of TCs to be enabled
+ * @default_port: true for default port VSI, false for control port
+ * @veb_seid: pointer to where to put the resulting VEB SEID
+ * @enable_stats: true to turn on VEB stats
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This asks the FW to add a VEB between the uplink and downlink
+ * elements.  If the uplink SEID is 0, this will be a floating VEB.
+ **/
+int i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+		    u16 downlink_seid, u8 enabled_tc,
+		    bool default_port, u16 *veb_seid,
+		    bool enable_stats,
+		    struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_veb *cmd =
+		(struct i40e_aqc_add_veb *)&desc.params.raw;
+	struct i40e_aqc_add_veb_completion *resp =
+		(struct i40e_aqc_add_veb_completion *)&desc.params.raw;
+	u16 veb_flags = 0;
+	int status;
+
+	/* SEIDs need to either both be set or both be 0 for floating VEB */
+	if (!!uplink_seid != !!downlink_seid)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_veb);
+
+	cmd->uplink_seid = cpu_to_le16(uplink_seid);
+	cmd->downlink_seid = cpu_to_le16(downlink_seid);
+	cmd->enable_tcs = enabled_tc;
+	if (!uplink_seid)
+		veb_flags |= I40E_AQC_ADD_VEB_FLOATING;
+	if (default_port)
+		veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT;
+	else
+		veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DATA;
+
+	/* reverse logic here: set the bitflag to disable the stats */
+	if (!enable_stats)
+		veb_flags |= I40E_AQC_ADD_VEB_ENABLE_DISABLE_STATS;
+
+	cmd->veb_flags = cpu_to_le16(veb_flags);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (!status && veb_seid)
+		*veb_seid = le16_to_cpu(resp->veb_seid);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_veb_parameters - Retrieve VEB parameters
+ * @hw: pointer to the hw struct
+ * @veb_seid: the SEID of the VEB to query
+ * @switch_id: the uplink switch id
+ * @floating: set to true if the VEB is floating
+ * @statistic_index: index of the stats counter block for this VEB
+ * @vebs_used: number of VEB's used by function
+ * @vebs_free: total VEB's not reserved by any function
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This retrieves the parameters for a particular VEB, specified by
+ * uplink_seid, and returns them to the caller.
+ **/
+int i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+			       u16 veb_seid, u16 *switch_id,
+			       bool *floating, u16 *statistic_index,
+			       u16 *vebs_used, u16 *vebs_free,
+			       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_get_veb_parameters_completion *cmd_resp =
+		(struct i40e_aqc_get_veb_parameters_completion *)
+		&desc.params.raw;
+	int status;
+
+	if (veb_seid == 0)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_get_veb_parameters);
+	cmd_resp->seid = cpu_to_le16(veb_seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+	if (status)
+		goto get_veb_exit;
+
+	if (switch_id)
+		*switch_id = le16_to_cpu(cmd_resp->switch_id);
+	if (statistic_index)
+		*statistic_index = le16_to_cpu(cmd_resp->statistic_index);
+	if (vebs_used)
+		*vebs_used = le16_to_cpu(cmd_resp->vebs_used);
+	if (vebs_free)
+		*vebs_free = le16_to_cpu(cmd_resp->vebs_free);
+	if (floating) {
+		u16 flags = le16_to_cpu(cmd_resp->veb_flags);
+
+		if (flags & I40E_AQC_ADD_VEB_FLOATING)
+			*floating = true;
+		else
+			*floating = false;
+	}
+
+get_veb_exit:
+	return status;
+}
+
+/**
+ * i40e_prepare_add_macvlan
+ * @mv_list: list of macvlans to be added
+ * @desc: pointer to AQ descriptor structure
+ * @count: length of the list
+ * @seid: VSI for the mac address
+ *
+ * Internal helper function that prepares the add macvlan request
+ * and returns the buffer size.
+ **/
+static u16
+i40e_prepare_add_macvlan(struct i40e_aqc_add_macvlan_element_data *mv_list,
+			 struct i40e_aq_desc *desc, u16 count, u16 seid)
+{
+	struct i40e_aqc_macvlan *cmd =
+		(struct i40e_aqc_macvlan *)&desc->params.raw;
+	u16 buf_size;
+	int i;
+
+	buf_size = count * sizeof(*mv_list);
+
+	/* prep the rest of the request */
+	i40e_fill_default_direct_cmd_desc(desc, i40e_aqc_opc_add_macvlan);
+	cmd->num_addresses = cpu_to_le16(count);
+	cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+	cmd->seid[1] = 0;
+	cmd->seid[2] = 0;
+
+	for (i = 0; i < count; i++)
+		if (is_multicast_ether_addr(mv_list[i].mac_addr))
+			mv_list[i].flags |=
+			       cpu_to_le16(I40E_AQC_MACVLAN_ADD_USE_SHARED_MAC);
+
+	desc->flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	if (buf_size > I40E_AQ_LARGE_BUF)
+		desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	return buf_size;
+}
+
+/**
+ * i40e_aq_add_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add MAC/VLAN addresses to the HW filtering
+ **/
+int
+i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
+		    struct i40e_aqc_add_macvlan_element_data *mv_list,
+		    u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	u16 buf_size;
+
+	if (count == 0 || !mv_list || !hw)
+		return -EINVAL;
+
+	buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid);
+
+	return i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size,
+					    cmd_details, true);
+}
+
+/**
+ * i40e_aq_add_macvlan_v2
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ * @aq_status: pointer to Admin Queue status return value
+ *
+ * Add MAC/VLAN addresses to the HW filtering.
+ * The _v2 version returns the last Admin Queue status in aq_status
+ * to avoid race conditions in access to hw->aq.asq_last_status.
+ * It also calls _v2 versions of asq_send_command functions to
+ * get the aq_status on the stack.
+ **/
+int
+i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid,
+		       struct i40e_aqc_add_macvlan_element_data *mv_list,
+		       u16 count, struct i40e_asq_cmd_details *cmd_details,
+		       enum i40e_admin_queue_err *aq_status)
+{
+	struct i40e_aq_desc desc;
+	u16 buf_size;
+
+	if (count == 0 || !mv_list || !hw)
+		return -EINVAL;
+
+	buf_size = i40e_prepare_add_macvlan(mv_list, &desc, count, seid);
+
+	return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size,
+					       cmd_details, true, aq_status);
+}
+
+/**
+ * i40e_aq_remove_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Remove MAC/VLAN addresses from the HW filtering
+ **/
+int
+i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
+		       struct i40e_aqc_remove_macvlan_element_data *mv_list,
+		       u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_macvlan *cmd =
+		(struct i40e_aqc_macvlan *)&desc.params.raw;
+	u16 buf_size;
+	int status;
+
+	if (count == 0 || !mv_list || !hw)
+		return -EINVAL;
+
+	buf_size = count * sizeof(*mv_list);
+
+	/* prep the rest of the request */
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan);
+	cmd->num_addresses = cpu_to_le16(count);
+	cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+	cmd->seid[1] = 0;
+	cmd->seid[2] = 0;
+
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	if (buf_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	status = i40e_asq_send_command_atomic(hw, &desc, mv_list, buf_size,
+					      cmd_details, true);
+
+	return status;
+}
+
+/**
+ * i40e_aq_remove_macvlan_v2
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ * @aq_status: pointer to Admin Queue status return value
+ *
+ * Remove MAC/VLAN addresses from the HW filtering.
+ * The _v2 version returns the last Admin Queue status in aq_status
+ * to avoid race conditions in access to hw->aq.asq_last_status.
+ * It also calls _v2 versions of asq_send_command functions to
+ * get the aq_status on the stack.
+ **/
+int
+i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_remove_macvlan_element_data *mv_list,
+			  u16 count, struct i40e_asq_cmd_details *cmd_details,
+			  enum i40e_admin_queue_err *aq_status)
+{
+	struct i40e_aqc_macvlan *cmd;
+	struct i40e_aq_desc desc;
+	u16 buf_size;
+
+	if (count == 0 || !mv_list || !hw)
+		return -EINVAL;
+
+	buf_size = count * sizeof(*mv_list);
+
+	/* prep the rest of the request */
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan);
+	cmd = (struct i40e_aqc_macvlan *)&desc.params.raw;
+	cmd->num_addresses = cpu_to_le16(count);
+	cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+	cmd->seid[1] = 0;
+	cmd->seid[2] = 0;
+
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	if (buf_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	return i40e_asq_send_command_atomic_v2(hw, &desc, mv_list, buf_size,
+						 cmd_details, true, aq_status);
+}
+
+/**
+ * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule
+ * @hw: pointer to the hw struct
+ * @opcode: AQ opcode for add or delete mirror rule
+ * @sw_seid: Switch SEID (to which rule refers)
+ * @rule_type: Rule Type (ingress/egress/VLAN)
+ * @id: Destination VSI SEID or Rule ID
+ * @count: length of the list
+ * @mr_list: list of mirrored VSI SEIDs or VLAN IDs
+ * @cmd_details: pointer to command details structure or NULL
+ * @rule_id: Rule ID returned from FW
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
+ *
+ * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for
+ * VEBs/VEPA elements only
+ **/
+static int i40e_mirrorrule_op(struct i40e_hw *hw,
+			      u16 opcode, u16 sw_seid, u16 rule_type, u16 id,
+			      u16 count, __le16 *mr_list,
+			      struct i40e_asq_cmd_details *cmd_details,
+			      u16 *rule_id, u16 *rules_used, u16 *rules_free)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_delete_mirror_rule *cmd =
+		(struct i40e_aqc_add_delete_mirror_rule *)&desc.params.raw;
+	struct i40e_aqc_add_delete_mirror_rule_completion *resp =
+	(struct i40e_aqc_add_delete_mirror_rule_completion *)&desc.params.raw;
+	u16 buf_size;
+	int status;
+
+	buf_size = count * sizeof(*mr_list);
+
+	/* prep the rest of the request */
+	i40e_fill_default_direct_cmd_desc(&desc, opcode);
+	cmd->seid = cpu_to_le16(sw_seid);
+	cmd->rule_type = cpu_to_le16(rule_type &
+				     I40E_AQC_MIRROR_RULE_TYPE_MASK);
+	cmd->num_entries = cpu_to_le16(count);
+	/* Dest VSI for add, rule_id for delete */
+	cmd->destination = cpu_to_le16(id);
+	if (mr_list) {
+		desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF |
+						I40E_AQ_FLAG_RD));
+		if (buf_size > I40E_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+	}
+
+	status = i40e_asq_send_command(hw, &desc, mr_list, buf_size,
+				       cmd_details);
+	if (!status ||
+	    hw->aq.asq_last_status == I40E_AQ_RC_ENOSPC) {
+		if (rule_id)
+			*rule_id = le16_to_cpu(resp->rule_id);
+		if (rules_used)
+			*rules_used = le16_to_cpu(resp->mirror_rules_used);
+		if (rules_free)
+			*rules_free = le16_to_cpu(resp->mirror_rules_free);
+	}
+	return status;
+}
+
+/**
+ * i40e_aq_add_mirrorrule - add a mirror rule
+ * @hw: pointer to the hw struct
+ * @sw_seid: Switch SEID (to which rule refers)
+ * @rule_type: Rule Type (ingress/egress/VLAN)
+ * @dest_vsi: SEID of VSI to which packets will be mirrored
+ * @count: length of the list
+ * @mr_list: list of mirrored VSI SEIDs or VLAN IDs
+ * @cmd_details: pointer to command details structure or NULL
+ * @rule_id: Rule ID returned from FW
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
+ *
+ * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only
+ **/
+int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+			   u16 rule_type, u16 dest_vsi, u16 count,
+			   __le16 *mr_list,
+			   struct i40e_asq_cmd_details *cmd_details,
+			   u16 *rule_id, u16 *rules_used, u16 *rules_free)
+{
+	if (!(rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS ||
+	    rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS)) {
+		if (count == 0 || !mr_list)
+			return -EINVAL;
+	}
+
+	return i40e_mirrorrule_op(hw, i40e_aqc_opc_add_mirror_rule, sw_seid,
+				  rule_type, dest_vsi, count, mr_list,
+				  cmd_details, rule_id, rules_used, rules_free);
+}
+
+/**
+ * i40e_aq_delete_mirrorrule - delete a mirror rule
+ * @hw: pointer to the hw struct
+ * @sw_seid: Switch SEID (to which rule refers)
+ * @rule_type: Rule Type (ingress/egress/VLAN)
+ * @count: length of the list
+ * @rule_id: Rule ID that is returned in the receive desc as part of
+ *		add_mirrorrule.
+ * @mr_list: list of mirrored VLAN IDs to be removed
+ * @cmd_details: pointer to command details structure or NULL
+ * @rules_used: Number of rules used in internal switch
+ * @rules_free: Number of rules free in internal switch
+ *
+ * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only
+ **/
+int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+			      u16 rule_type, u16 rule_id, u16 count,
+			      __le16 *mr_list,
+			      struct i40e_asq_cmd_details *cmd_details,
+			      u16 *rules_used, u16 *rules_free)
+{
+	/* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */
+	if (rule_type == I40E_AQC_MIRROR_RULE_TYPE_VLAN) {
+		/* count and mr_list shall be valid for rule_type INGRESS VLAN
+		 * mirroring. For other rule_type, count and rule_type should
+		 * not matter.
+		 */
+		if (count == 0 || !mr_list)
+			return -EINVAL;
+	}
+
+	return i40e_mirrorrule_op(hw, i40e_aqc_opc_delete_mirror_rule, sw_seid,
+				  rule_type, rule_id, count, mr_list,
+				  cmd_details, NULL, rules_used, rules_free);
+}
+
+/**
+ * i40e_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: VF id to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cmd_details: pointer to command details
+ *
+ * send msg to vf
+ **/
+int i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+			   u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_pf_vf_message *cmd =
+		(struct i40e_aqc_pf_vf_message *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_vf);
+	cmd->id = cpu_to_le32(vfid);
+	desc.cookie_high = cpu_to_le32(v_opcode);
+	desc.cookie_low = cpu_to_le32(v_retval);
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_SI);
+	if (msglen) {
+		desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF |
+						I40E_AQ_FLAG_RD));
+		if (msglen > I40E_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+		desc.datalen = cpu_to_le16(msglen);
+	}
+	status = i40e_asq_send_command(hw, &desc, msg, msglen, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_debug_read_register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the register using the admin queue commands
+ **/
+int i40e_aq_debug_read_register(struct i40e_hw *hw,
+				u32 reg_addr, u64 *reg_val,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_debug_reg_read_write *cmd_resp =
+		(struct i40e_aqc_debug_reg_read_write *)&desc.params.raw;
+	int status;
+
+	if (reg_val == NULL)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_debug_read_reg);
+
+	cmd_resp->address = cpu_to_le32(reg_addr);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (!status) {
+		*reg_val = ((u64)le32_to_cpu(cmd_resp->value_high) << 32) |
+			   (u64)le32_to_cpu(cmd_resp->value_low);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_aq_debug_write_register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Write to a register using the admin queue commands
+ **/
+int i40e_aq_debug_write_register(struct i40e_hw *hw,
+				 u32 reg_addr, u64 reg_val,
+				 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_debug_reg_read_write *cmd =
+		(struct i40e_aqc_debug_reg_read_write *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_debug_write_reg);
+
+	cmd->address = cpu_to_le32(reg_addr);
+	cmd->value_high = cpu_to_le32((u32)(reg_val >> 32));
+	cmd->value_low = cpu_to_le32((u32)(reg_val & 0xFFFFFFFF));
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_request_resource
+ * @hw: pointer to the hw struct
+ * @resource: resource id
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * requests common resource using the admin queue commands
+ **/
+int i40e_aq_request_resource(struct i40e_hw *hw,
+			     enum i40e_aq_resources_ids resource,
+			     enum i40e_aq_resource_access_type access,
+			     u8 sdp_number, u64 *timeout,
+			     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_request_resource *cmd_resp =
+		(struct i40e_aqc_request_resource *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_request_resource);
+
+	cmd_resp->resource_id = cpu_to_le16(resource);
+	cmd_resp->access_type = cpu_to_le16(access);
+	cmd_resp->resource_number = cpu_to_le32(sdp_number);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+	/* The completion specifies the maximum time in ms that the driver
+	 * may hold the resource in the Timeout field.
+	 * If the resource is held by someone else, the command completes with
+	 * busy return value and the timeout field indicates the maximum time
+	 * the current owner of the resource has to free it.
+	 */
+	if (!status || hw->aq.asq_last_status == I40E_AQ_RC_EBUSY)
+		*timeout = le32_to_cpu(cmd_resp->timeout);
+
+	return status;
+}
+
+/**
+ * i40e_aq_release_resource
+ * @hw: pointer to the hw struct
+ * @resource: resource id
+ * @sdp_number: resource number
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands
+ **/
+int i40e_aq_release_resource(struct i40e_hw *hw,
+			     enum i40e_aq_resources_ids resource,
+			     u8 sdp_number,
+			     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_request_resource *cmd =
+		(struct i40e_aqc_request_resource *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_release_resource);
+
+	cmd->resource_id = cpu_to_le16(resource);
+	cmd->resource_number = cpu_to_le32(sdp_number);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_read_nvm
+ * @hw: pointer to the hw struct
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands
+ **/
+int i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+		     u32 offset, u16 length, void *data,
+		     bool last_command,
+		     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_nvm_update *cmd =
+		(struct i40e_aqc_nvm_update *)&desc.params.raw;
+	int status;
+
+	/* In offset the highest byte must be zeroed. */
+	if (offset & 0xFF000000) {
+		status = -EINVAL;
+		goto i40e_aq_read_nvm_exit;
+	}
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_read);
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+	cmd->module_pointer = module_pointer;
+	cmd->offset = cpu_to_le32(offset);
+	cmd->length = cpu_to_le16(length);
+
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	if (length > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	status = i40e_asq_send_command(hw, &desc, data, length, cmd_details);
+
+i40e_aq_read_nvm_exit:
+	return status;
+}
+
+/**
+ * i40e_aq_erase_nvm
+ * @hw: pointer to the hw struct
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset in the module (expressed in 4 KB from module's beginning)
+ * @length: length of the section to be erased (expressed in 4 KB)
+ * @last_command: tells if this is the last command in a series
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Erase the NVM sector using the admin queue commands
+ **/
+int i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
+		      u32 offset, u16 length, bool last_command,
+		      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_nvm_update *cmd =
+		(struct i40e_aqc_nvm_update *)&desc.params.raw;
+	int status;
+
+	/* In offset the highest byte must be zeroed. */
+	if (offset & 0xFF000000) {
+		status = -EINVAL;
+		goto i40e_aq_erase_nvm_exit;
+	}
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_erase);
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+	cmd->module_pointer = module_pointer;
+	cmd->offset = cpu_to_le32(offset);
+	cmd->length = cpu_to_le16(length);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+i40e_aq_erase_nvm_exit:
+	return status;
+}
+
+/**
+ * i40e_parse_discover_capabilities
+ * @hw: pointer to the hw struct
+ * @buff: pointer to a buffer containing device/function capability records
+ * @cap_count: number of capability records in the list
+ * @list_type_opc: type of capabilities list to parse
+ *
+ * Parse the device/function capabilities list.
+ **/
+static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
+				     u32 cap_count,
+				     enum i40e_admin_queue_opc list_type_opc)
+{
+	struct i40e_aqc_list_capabilities_element_resp *cap;
+	u32 valid_functions, num_functions;
+	u32 number, logical_id, phys_id;
+	struct i40e_hw_capabilities *p;
+	u16 id, ocp_cfg_word0;
+	u8 major_rev;
+	int status;
+	u32 i = 0;
+
+	cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
+
+	if (list_type_opc == i40e_aqc_opc_list_dev_capabilities)
+		p = &hw->dev_caps;
+	else if (list_type_opc == i40e_aqc_opc_list_func_capabilities)
+		p = &hw->func_caps;
+	else
+		return;
+
+	for (i = 0; i < cap_count; i++, cap++) {
+		id = le16_to_cpu(cap->id);
+		number = le32_to_cpu(cap->number);
+		logical_id = le32_to_cpu(cap->logical_id);
+		phys_id = le32_to_cpu(cap->phys_id);
+		major_rev = cap->major_rev;
+
+		switch (id) {
+		case I40E_AQ_CAP_ID_SWITCH_MODE:
+			p->switch_mode = number;
+			break;
+		case I40E_AQ_CAP_ID_MNG_MODE:
+			p->management_mode = number;
+			if (major_rev > 1) {
+				p->mng_protocols_over_mctp = logical_id;
+				i40e_debug(hw, I40E_DEBUG_INIT,
+					   "HW Capability: Protocols over MCTP = %d\n",
+					   p->mng_protocols_over_mctp);
+			} else {
+				p->mng_protocols_over_mctp = 0;
+			}
+			break;
+		case I40E_AQ_CAP_ID_NPAR_ACTIVE:
+			p->npar_enable = number;
+			break;
+		case I40E_AQ_CAP_ID_OS2BMC_CAP:
+			p->os2bmc = number;
+			break;
+		case I40E_AQ_CAP_ID_FUNCTIONS_VALID:
+			p->valid_functions = number;
+			break;
+		case I40E_AQ_CAP_ID_SRIOV:
+			if (number == 1)
+				p->sr_iov_1_1 = true;
+			break;
+		case I40E_AQ_CAP_ID_VF:
+			p->num_vfs = number;
+			p->vf_base_id = logical_id;
+			break;
+		case I40E_AQ_CAP_ID_VMDQ:
+			if (number == 1)
+				p->vmdq = true;
+			break;
+		case I40E_AQ_CAP_ID_8021QBG:
+			if (number == 1)
+				p->evb_802_1_qbg = true;
+			break;
+		case I40E_AQ_CAP_ID_8021QBR:
+			if (number == 1)
+				p->evb_802_1_qbh = true;
+			break;
+		case I40E_AQ_CAP_ID_VSI:
+			p->num_vsis = number;
+			break;
+		case I40E_AQ_CAP_ID_DCB:
+			if (number == 1) {
+				p->dcb = true;
+				p->enabled_tcmap = logical_id;
+				p->maxtc = phys_id;
+			}
+			break;
+		case I40E_AQ_CAP_ID_FCOE:
+			if (number == 1)
+				p->fcoe = true;
+			break;
+		case I40E_AQ_CAP_ID_ISCSI:
+			if (number == 1)
+				p->iscsi = true;
+			break;
+		case I40E_AQ_CAP_ID_RSS:
+			p->rss = true;
+			p->rss_table_size = number;
+			p->rss_table_entry_width = logical_id;
+			break;
+		case I40E_AQ_CAP_ID_RXQ:
+			p->num_rx_qp = number;
+			p->base_queue = phys_id;
+			break;
+		case I40E_AQ_CAP_ID_TXQ:
+			p->num_tx_qp = number;
+			p->base_queue = phys_id;
+			break;
+		case I40E_AQ_CAP_ID_MSIX:
+			p->num_msix_vectors = number;
+			i40e_debug(hw, I40E_DEBUG_INIT,
+				   "HW Capability: MSIX vector count = %d\n",
+				   p->num_msix_vectors);
+			break;
+		case I40E_AQ_CAP_ID_VF_MSIX:
+			p->num_msix_vectors_vf = number;
+			break;
+		case I40E_AQ_CAP_ID_FLEX10:
+			if (major_rev == 1) {
+				if (number == 1) {
+					p->flex10_enable = true;
+					p->flex10_capable = true;
+				}
+			} else {
+				/* Capability revision >= 2 */
+				if (number & 1)
+					p->flex10_enable = true;
+				if (number & 2)
+					p->flex10_capable = true;
+			}
+			p->flex10_mode = logical_id;
+			p->flex10_status = phys_id;
+			break;
+		case I40E_AQ_CAP_ID_CEM:
+			if (number == 1)
+				p->mgmt_cem = true;
+			break;
+		case I40E_AQ_CAP_ID_IWARP:
+			if (number == 1)
+				p->iwarp = true;
+			break;
+		case I40E_AQ_CAP_ID_LED:
+			if (phys_id < I40E_HW_CAP_MAX_GPIO)
+				p->led[phys_id] = true;
+			break;
+		case I40E_AQ_CAP_ID_SDP:
+			if (phys_id < I40E_HW_CAP_MAX_GPIO)
+				p->sdp[phys_id] = true;
+			break;
+		case I40E_AQ_CAP_ID_MDIO:
+			if (number == 1) {
+				p->mdio_port_num = phys_id;
+				p->mdio_port_mode = logical_id;
+			}
+			break;
+		case I40E_AQ_CAP_ID_1588:
+			if (number == 1)
+				p->ieee_1588 = true;
+			break;
+		case I40E_AQ_CAP_ID_FLOW_DIRECTOR:
+			p->fd = true;
+			p->fd_filters_guaranteed = number;
+			p->fd_filters_best_effort = logical_id;
+			break;
+		case I40E_AQ_CAP_ID_WSR_PROT:
+			p->wr_csr_prot = (u64)number;
+			p->wr_csr_prot |= (u64)logical_id << 32;
+			break;
+		case I40E_AQ_CAP_ID_NVM_MGMT:
+			if (number & I40E_NVM_MGMT_SEC_REV_DISABLED)
+				p->sec_rev_disabled = true;
+			if (number & I40E_NVM_MGMT_UPDATE_DISABLED)
+				p->update_disabled = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (p->fcoe)
+		i40e_debug(hw, I40E_DEBUG_ALL, "device is FCoE capable\n");
+
+	/* Software override ensuring FCoE is disabled if npar or mfp
+	 * mode because it is not supported in these modes.
+	 */
+	if (p->npar_enable || p->flex10_enable)
+		p->fcoe = false;
+
+	/* count the enabled ports (aka the "not disabled" ports) */
+	hw->num_ports = 0;
+	for (i = 0; i < 4; i++) {
+		u32 port_cfg_reg = I40E_PRTGEN_CNF + (4 * i);
+		u64 port_cfg = 0;
+
+		/* use AQ read to get the physical register offset instead
+		 * of the port relative offset
+		 */
+		i40e_aq_debug_read_register(hw, port_cfg_reg, &port_cfg, NULL);
+		if (!(port_cfg & I40E_PRTGEN_CNF_PORT_DIS_MASK))
+			hw->num_ports++;
+	}
+
+	/* OCP cards case: if a mezz is removed the Ethernet port is at
+	 * disabled state in PRTGEN_CNF register. Additional NVM read is
+	 * needed in order to check if we are dealing with OCP card.
+	 * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting
+	 * physical ports results in wrong partition id calculation and thus
+	 * not supporting WoL.
+	 */
+	if (hw->mac.type == I40E_MAC_X722) {
+		if (!i40e_acquire_nvm(hw, I40E_RESOURCE_READ)) {
+			status = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR,
+						  2 * I40E_SR_OCP_CFG_WORD0,
+						  sizeof(ocp_cfg_word0),
+						  &ocp_cfg_word0, true, NULL);
+			if (!status &&
+			    (ocp_cfg_word0 & I40E_SR_OCP_ENABLED))
+				hw->num_ports = 4;
+			i40e_release_nvm(hw);
+		}
+	}
+
+	valid_functions = p->valid_functions;
+	num_functions = 0;
+	while (valid_functions) {
+		if (valid_functions & 1)
+			num_functions++;
+		valid_functions >>= 1;
+	}
+
+	/* partition id is 1-based, and functions are evenly spread
+	 * across the ports as partitions
+	 */
+	if (hw->num_ports != 0) {
+		hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
+		hw->num_partitions = num_functions / hw->num_ports;
+	}
+
+	/* additional HW specific goodies that might
+	 * someday be HW version specific
+	 */
+	p->rx_buf_chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
+}
+
+/**
+ * i40e_aq_discover_capabilities
+ * @hw: pointer to the hw struct
+ * @buff: a virtual buffer to hold the capabilities
+ * @buff_size: Size of the virtual buffer
+ * @data_size: Size of the returned data, or buff size needed if AQ err==ENOMEM
+ * @list_type_opc: capabilities type to discover - pass in the command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the device capabilities descriptions from the firmware
+ **/
+int i40e_aq_discover_capabilities(struct i40e_hw *hw,
+				  void *buff, u16 buff_size, u16 *data_size,
+				  enum i40e_admin_queue_opc list_type_opc,
+				  struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aqc_list_capabilites *cmd;
+	struct i40e_aq_desc desc;
+	int status = 0;
+
+	cmd = (struct i40e_aqc_list_capabilites *)&desc.params.raw;
+
+	if (list_type_opc != i40e_aqc_opc_list_func_capabilities &&
+		list_type_opc != i40e_aqc_opc_list_dev_capabilities) {
+		status = -EINVAL;
+		goto exit;
+	}
+
+	i40e_fill_default_direct_cmd_desc(&desc, list_type_opc);
+
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	if (buff_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+	*data_size = le16_to_cpu(desc.datalen);
+
+	if (status)
+		goto exit;
+
+	i40e_parse_discover_capabilities(hw, buff, le32_to_cpu(cmd->count),
+					 list_type_opc);
+
+exit:
+	return status;
+}
+
+/**
+ * i40e_aq_update_nvm
+ * @hw: pointer to the hw struct
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be written (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @preservation_flags: Preservation mode flags
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Update the NVM using the admin queue commands
+ **/
+int i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
+		       u32 offset, u16 length, void *data,
+		       bool last_command, u8 preservation_flags,
+		       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_nvm_update *cmd =
+		(struct i40e_aqc_nvm_update *)&desc.params.raw;
+	int status;
+
+	/* In offset the highest byte must be zeroed. */
+	if (offset & 0xFF000000) {
+		status = -EINVAL;
+		goto i40e_aq_update_nvm_exit;
+	}
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_update);
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+	if (hw->mac.type == I40E_MAC_X722) {
+		if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_SELECTED)
+			cmd->command_flags |=
+				(I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED <<
+				 I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT);
+		else if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_ALL)
+			cmd->command_flags |=
+				(I40E_AQ_NVM_PRESERVATION_FLAGS_ALL <<
+				 I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT);
+	}
+	cmd->module_pointer = module_pointer;
+	cmd->offset = cpu_to_le32(offset);
+	cmd->length = cpu_to_le16(length);
+
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	if (length > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	status = i40e_asq_send_command(hw, &desc, data, length, cmd_details);
+
+i40e_aq_update_nvm_exit:
+	return status;
+}
+
+/**
+ * i40e_aq_rearrange_nvm
+ * @hw: pointer to the hw struct
+ * @rearrange_nvm: defines direction of rearrangement
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Rearrange NVM structure, available only for transition FW
+ **/
+int i40e_aq_rearrange_nvm(struct i40e_hw *hw,
+			  u8 rearrange_nvm,
+			  struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aqc_nvm_update *cmd;
+	struct i40e_aq_desc desc;
+	int status;
+
+	cmd = (struct i40e_aqc_nvm_update *)&desc.params.raw;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_update);
+
+	rearrange_nvm &= (I40E_AQ_NVM_REARRANGE_TO_FLAT |
+			 I40E_AQ_NVM_REARRANGE_TO_STRUCT);
+
+	if (!rearrange_nvm) {
+		status = -EINVAL;
+		goto i40e_aq_rearrange_nvm_exit;
+	}
+
+	cmd->command_flags |= rearrange_nvm;
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+i40e_aq_rearrange_nvm_exit:
+	return status;
+}
+
+/**
+ * i40e_aq_get_lldp_mib
+ * @hw: pointer to the hw struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buff: pointer to a user supplied buffer to store the MIB block
+ * @buff_size: size of the buffer (in bytes)
+ * @local_len : length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet).
+ **/
+int i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+			 u8 mib_type, void *buff, u16 buff_size,
+			 u16 *local_len, u16 *remote_len,
+			 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_lldp_get_mib *cmd =
+		(struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+	struct i40e_aqc_lldp_get_mib *resp =
+		(struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+	int status;
+
+	if (buff_size == 0 || !buff)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_get_mib);
+	/* Indirect Command */
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+
+	cmd->type = mib_type & I40E_AQ_LLDP_MIB_TYPE_MASK;
+	cmd->type |= ((bridge_type << I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) &
+		       I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+
+	desc.datalen = cpu_to_le16(buff_size);
+
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	if (buff_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+	if (!status) {
+		if (local_len != NULL)
+			*local_len = le16_to_cpu(resp->local_len);
+		if (remote_len != NULL)
+			*remote_len = le16_to_cpu(resp->remote_len);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_lldp_mib - Set the LLDP MIB
+ * @hw: pointer to the hw struct
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buff: pointer to a user supplied buffer to store the MIB block
+ * @buff_size: size of the buffer (in bytes)
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set the LLDP MIB.
+ **/
+int
+i40e_aq_set_lldp_mib(struct i40e_hw *hw,
+		     u8 mib_type, void *buff, u16 buff_size,
+		     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aqc_lldp_set_local_mib *cmd;
+	struct i40e_aq_desc desc;
+	int status;
+
+	cmd = (struct i40e_aqc_lldp_set_local_mib *)&desc.params.raw;
+	if (buff_size == 0 || !buff)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_lldp_set_local_mib);
+	/* Indirect Command */
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	if (buff_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+	desc.datalen = cpu_to_le16(buff_size);
+
+	cmd->type = mib_type;
+	cmd->length = cpu_to_le16(buff_size);
+	cmd->address_high = cpu_to_le32(upper_32_bits((uintptr_t)buff));
+	cmd->address_low = cpu_to_le32(lower_32_bits((uintptr_t)buff));
+
+	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+	return status;
+}
+
+/**
+ * i40e_aq_cfg_lldp_mib_change_event
+ * @hw: pointer to the hw struct
+ * @enable_update: Enable or Disable event posting
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes
+ **/
+int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+				      bool enable_update,
+				      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_lldp_update_mib *cmd =
+		(struct i40e_aqc_lldp_update_mib *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_update_mib);
+
+	if (!enable_update)
+		cmd->command |= I40E_AQ_LLDP_MIB_UPDATE_DISABLE;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_restore_lldp
+ * @hw: pointer to the hw struct
+ * @setting: pointer to factory setting variable or NULL
+ * @restore: True if factory settings should be restored
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Restore LLDP Agent factory settings if @restore set to True. In other case
+ * only returns factory setting in AQ response.
+ **/
+int
+i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
+		     struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_lldp_restore *cmd =
+		(struct i40e_aqc_lldp_restore *)&desc.params.raw;
+	int status;
+
+	if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)) {
+		i40e_debug(hw, I40E_DEBUG_ALL,
+			   "Restore LLDP not supported by current FW version.\n");
+		return -ENODEV;
+	}
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_restore);
+
+	if (restore)
+		cmd->command |= I40E_AQ_LLDP_AGENT_RESTORE;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (setting)
+		*setting = cmd->command & 1;
+
+	return status;
+}
+
+/**
+ * i40e_aq_stop_lldp
+ * @hw: pointer to the hw struct
+ * @shutdown_agent: True if LLDP Agent needs to be Shutdown
+ * @persist: True if stop of LLDP should be persistent across power cycles
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent
+ **/
+int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+		      bool persist,
+		      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_lldp_stop *cmd =
+		(struct i40e_aqc_lldp_stop *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_stop);
+
+	if (shutdown_agent)
+		cmd->command |= I40E_AQ_LLDP_AGENT_SHUTDOWN;
+
+	if (persist) {
+		if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+			cmd->command |= I40E_AQ_LLDP_AGENT_STOP_PERSIST;
+		else
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Persistent Stop LLDP not supported by current FW version.\n");
+	}
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_start_lldp
+ * @hw: pointer to the hw struct
+ * @persist: True if start of LLDP should be persistent across power cycles
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports.
+ **/
+int i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
+		       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_lldp_start *cmd =
+		(struct i40e_aqc_lldp_start *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_start);
+
+	cmd->command = I40E_AQ_LLDP_AGENT_START;
+
+	if (persist) {
+		if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT)
+			cmd->command |= I40E_AQ_LLDP_AGENT_START_PERSIST;
+		else
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Persistent Start LLDP not supported by current FW version.\n");
+	}
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_set_dcb_parameters
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ * @dcb_enable: True if DCB configuration needs to be applied
+ *
+ **/
+int
+i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_dcb_parameters *cmd =
+		(struct i40e_aqc_set_dcb_parameters *)&desc.params.raw;
+	int status;
+
+	if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+		return -ENODEV;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_dcb_parameters);
+
+	if (dcb_enable) {
+		cmd->valid_flags = I40E_DCB_VALID;
+		cmd->command = I40E_AQ_DCB_SET_AGENT;
+	}
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_cee_dcb_config
+ * @hw: pointer to the hw struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @buff_size: size of the buffer passed
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware
+ **/
+int i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+			       void *buff, u16 buff_size,
+			       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	int status;
+
+	if (buff_size == 0 || !buff)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_cee_dcb_cfg);
+
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	status = i40e_asq_send_command(hw, &desc, (void *)buff, buff_size,
+				       cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_add_udp_tunnel
+ * @hw: pointer to the hw struct
+ * @udp_port: the UDP port to add in Host byte order
+ * @protocol_index: protocol index type
+ * @filter_index: pointer to filter index
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Note: Firmware expects the udp_port value to be in Little Endian format,
+ * and this function will call cpu_to_le16 to convert from Host byte order to
+ * Little Endian order.
+ **/
+int i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
+			   u16 udp_port, u8 protocol_index,
+			   u8 *filter_index,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_udp_tunnel *cmd =
+		(struct i40e_aqc_add_udp_tunnel *)&desc.params.raw;
+	struct i40e_aqc_del_udp_tunnel_completion *resp =
+		(struct i40e_aqc_del_udp_tunnel_completion *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_udp_tunnel);
+
+	cmd->udp_port = cpu_to_le16(udp_port);
+	cmd->protocol_type = protocol_index;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (!status && filter_index)
+		*filter_index = resp->index;
+
+	return status;
+}
+
+/**
+ * i40e_aq_del_udp_tunnel
+ * @hw: pointer to the hw struct
+ * @index: filter index
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_remove_udp_tunnel *cmd =
+		(struct i40e_aqc_remove_udp_tunnel *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_del_udp_tunnel);
+
+	cmd->index = index;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_delete_element - Delete switch element
+ * @hw: pointer to the hw struct
+ * @seid: the SEID to delete from the switch
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This deletes a switch element from the switch.
+ **/
+int i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_switch_seid *cmd =
+		(struct i40e_aqc_switch_seid *)&desc.params.raw;
+	int status;
+
+	if (seid == 0)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_delete_element);
+
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command_atomic(hw, &desc, NULL, 0,
+					      cmd_details, true);
+
+	return status;
+}
+
+/**
+ * i40e_aq_dcb_updated - DCB Updated Command
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * EMP will return when the shared RPB settings have been
+ * recomputed and modified. The retval field in the descriptor
+ * will be set to 0 when RPB is modified.
+ **/
+int i40e_aq_dcb_updated(struct i40e_hw *hw,
+			struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_dcb_updated);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_tx_sched_cmd - generic Tx scheduler AQ command handler
+ * @hw: pointer to the hw struct
+ * @seid: seid for the physical port/switching component/vsi
+ * @buff: Indirect buffer to hold data parameters and response
+ * @buff_size: Indirect buffer size
+ * @opcode: Tx scheduler AQ command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Generic command handler for Tx scheduler AQ commands
+ **/
+static int i40e_aq_tx_sched_cmd(struct i40e_hw *hw, u16 seid,
+				void *buff, u16 buff_size,
+				enum i40e_admin_queue_opc opcode,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_tx_sched_ind *cmd =
+		(struct i40e_aqc_tx_sched_ind *)&desc.params.raw;
+	int status;
+	bool cmd_param_flag = false;
+
+	switch (opcode) {
+	case i40e_aqc_opc_configure_vsi_ets_sla_bw_limit:
+	case i40e_aqc_opc_configure_vsi_tc_bw:
+	case i40e_aqc_opc_enable_switching_comp_ets:
+	case i40e_aqc_opc_modify_switching_comp_ets:
+	case i40e_aqc_opc_disable_switching_comp_ets:
+	case i40e_aqc_opc_configure_switching_comp_ets_bw_limit:
+	case i40e_aqc_opc_configure_switching_comp_bw_config:
+		cmd_param_flag = true;
+		break;
+	case i40e_aqc_opc_query_vsi_bw_config:
+	case i40e_aqc_opc_query_vsi_ets_sla_config:
+	case i40e_aqc_opc_query_switching_comp_ets_config:
+	case i40e_aqc_opc_query_port_ets_config:
+	case i40e_aqc_opc_query_switching_comp_bw_config:
+		cmd_param_flag = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	i40e_fill_default_direct_cmd_desc(&desc, opcode);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	if (cmd_param_flag)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
+	if (buff_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	desc.datalen = cpu_to_le16(buff_size);
+
+	cmd->vsi_seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_config_vsi_bw_limit - Configure VSI BW Limit
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid
+ * @credit: BW limit credits (0 = disabled)
+ * @max_credit: Max BW limit credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+				u16 seid, u16 credit, u8 max_credit,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_configure_vsi_bw_limit *cmd =
+		(struct i40e_aqc_configure_vsi_bw_limit *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_configure_vsi_bw_limit);
+
+	cmd->vsi_seid = cpu_to_le16(seid);
+	cmd->credit = cpu_to_le16(credit);
+	cmd->max_credit = max_credit;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_config_vsi_tc_bw - Config VSI BW Allocation per TC
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid
+ * @bw_data: Buffer holding enabled TCs, relative TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
+			     u16 seid,
+			     struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
+			     struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+				    i40e_aqc_opc_configure_vsi_tc_bw,
+				    cmd_details);
+}
+
+/**
+ * i40e_aq_config_switch_comp_ets - Enable/Disable/Modify ETS on the port
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component connected to Physical Port
+ * @ets_data: Buffer holding ETS parameters
+ * @opcode: Tx scheduler AQ command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int
+i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
+			       u16 seid,
+			       struct i40e_aqc_configure_switching_comp_ets_data *ets_data,
+			       enum i40e_admin_queue_opc opcode,
+			       struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)ets_data,
+				    sizeof(*ets_data), opcode, cmd_details);
+}
+
+/**
+ * i40e_aq_config_switch_comp_bw_config - Config Switch comp BW Alloc per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer holding enabled TCs, relative/absolute TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int
+i40e_aq_config_switch_comp_bw_config(struct i40e_hw *hw,
+	u16 seid,
+	struct i40e_aqc_configure_switching_comp_bw_config_data *bw_data,
+	struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+			    i40e_aqc_opc_configure_switching_comp_bw_config,
+			    cmd_details);
+}
+
+/**
+ * i40e_aq_query_vsi_bw_config - Query VSI BW configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI
+ * @bw_data: Buffer to hold VSI BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int
+i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+			    u16 seid,
+			    struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+			    struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+				    i40e_aqc_opc_query_vsi_bw_config,
+				    cmd_details);
+}
+
+/**
+ * i40e_aq_query_vsi_ets_sla_config - Query VSI BW configuration per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI
+ * @bw_data: Buffer to hold VSI BW configuration per TC
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int
+i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+				 u16 seid,
+				 struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+				 struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+				    i40e_aqc_opc_query_vsi_ets_sla_config,
+				    cmd_details);
+}
+
+/**
+ * i40e_aq_query_switch_comp_ets_config - Query Switch comp BW config per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer to hold switching component's per TC BW config
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int
+i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+				     u16 seid,
+				     struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+				     struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+				   i40e_aqc_opc_query_switching_comp_ets_config,
+				   cmd_details);
+}
+
+/**
+ * i40e_aq_query_port_ets_config - Query Physical Port ETS configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI or switching component connected to Physical Port
+ * @bw_data: Buffer to hold current ETS configuration for the Physical Port
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int
+i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+			      u16 seid,
+			      struct i40e_aqc_query_port_ets_config_resp *bw_data,
+			      struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+				    i40e_aqc_opc_query_port_ets_config,
+				    cmd_details);
+}
+
+/**
+ * i40e_aq_query_switch_comp_bw_config - Query Switch comp BW configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer to hold switching component's BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int
+i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+				    u16 seid,
+				    struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+				    struct i40e_asq_cmd_details *cmd_details)
+{
+	return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+				    i40e_aqc_opc_query_switching_comp_bw_config,
+				    cmd_details);
+}
+
+/**
+ * i40e_validate_filter_settings
+ * @hw: pointer to the hardware structure
+ * @settings: Filter control settings
+ *
+ * Check and validate the filter control settings passed.
+ * The function checks for the valid filter/context sizes being
+ * passed for FCoE and PE.
+ *
+ * Returns 0 if the values passed are valid and within
+ * range else returns an error.
+ **/
+static int
+i40e_validate_filter_settings(struct i40e_hw *hw,
+			      struct i40e_filter_control_settings *settings)
+{
+	u32 fcoe_cntx_size, fcoe_filt_size;
+	u32 fcoe_fmax;
+	u32 val;
+
+	/* Validate FCoE settings passed */
+	switch (settings->fcoe_filt_num) {
+	case I40E_HASH_FILTER_SIZE_1K:
+	case I40E_HASH_FILTER_SIZE_2K:
+	case I40E_HASH_FILTER_SIZE_4K:
+	case I40E_HASH_FILTER_SIZE_8K:
+	case I40E_HASH_FILTER_SIZE_16K:
+	case I40E_HASH_FILTER_SIZE_32K:
+		fcoe_filt_size = I40E_HASH_FILTER_BASE_SIZE;
+		fcoe_filt_size <<= (u32)settings->fcoe_filt_num;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (settings->fcoe_cntx_num) {
+	case I40E_DMA_CNTX_SIZE_512:
+	case I40E_DMA_CNTX_SIZE_1K:
+	case I40E_DMA_CNTX_SIZE_2K:
+	case I40E_DMA_CNTX_SIZE_4K:
+		fcoe_cntx_size = I40E_DMA_CNTX_BASE_SIZE;
+		fcoe_cntx_size <<= (u32)settings->fcoe_cntx_num;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Validate PE settings passed */
+	switch (settings->pe_filt_num) {
+	case I40E_HASH_FILTER_SIZE_1K:
+	case I40E_HASH_FILTER_SIZE_2K:
+	case I40E_HASH_FILTER_SIZE_4K:
+	case I40E_HASH_FILTER_SIZE_8K:
+	case I40E_HASH_FILTER_SIZE_16K:
+	case I40E_HASH_FILTER_SIZE_32K:
+	case I40E_HASH_FILTER_SIZE_64K:
+	case I40E_HASH_FILTER_SIZE_128K:
+	case I40E_HASH_FILTER_SIZE_256K:
+	case I40E_HASH_FILTER_SIZE_512K:
+	case I40E_HASH_FILTER_SIZE_1M:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (settings->pe_cntx_num) {
+	case I40E_DMA_CNTX_SIZE_512:
+	case I40E_DMA_CNTX_SIZE_1K:
+	case I40E_DMA_CNTX_SIZE_2K:
+	case I40E_DMA_CNTX_SIZE_4K:
+	case I40E_DMA_CNTX_SIZE_8K:
+	case I40E_DMA_CNTX_SIZE_16K:
+	case I40E_DMA_CNTX_SIZE_32K:
+	case I40E_DMA_CNTX_SIZE_64K:
+	case I40E_DMA_CNTX_SIZE_128K:
+	case I40E_DMA_CNTX_SIZE_256K:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* FCHSIZE + FCDSIZE should not be greater than PMFCOEFMAX */
+	val = rd32(hw, I40E_GLHMC_FCOEFMAX);
+	fcoe_fmax = (val & I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK)
+		     >> I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT;
+	if (fcoe_filt_size + fcoe_cntx_size >  fcoe_fmax)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * i40e_set_filter_control
+ * @hw: pointer to the hardware structure
+ * @settings: Filter control settings
+ *
+ * Set the Queue Filters for PE/FCoE and enable filters required
+ * for a single PF. It is expected that these settings are programmed
+ * at the driver initialization time.
+ **/
+int i40e_set_filter_control(struct i40e_hw *hw,
+			    struct i40e_filter_control_settings *settings)
+{
+	u32 hash_lut_size = 0;
+	int ret = 0;
+	u32 val;
+
+	if (!settings)
+		return -EINVAL;
+
+	/* Validate the input settings */
+	ret = i40e_validate_filter_settings(hw, settings);
+	if (ret)
+		return ret;
+
+	/* Read the PF Queue Filter control register */
+	val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0);
+
+	/* Program required PE hash buckets for the PF */
+	val &= ~I40E_PFQF_CTL_0_PEHSIZE_MASK;
+	val |= ((u32)settings->pe_filt_num << I40E_PFQF_CTL_0_PEHSIZE_SHIFT) &
+		I40E_PFQF_CTL_0_PEHSIZE_MASK;
+	/* Program required PE contexts for the PF */
+	val &= ~I40E_PFQF_CTL_0_PEDSIZE_MASK;
+	val |= ((u32)settings->pe_cntx_num << I40E_PFQF_CTL_0_PEDSIZE_SHIFT) &
+		I40E_PFQF_CTL_0_PEDSIZE_MASK;
+
+	/* Program required FCoE hash buckets for the PF */
+	val &= ~I40E_PFQF_CTL_0_PFFCHSIZE_MASK;
+	val |= ((u32)settings->fcoe_filt_num <<
+			I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT) &
+		I40E_PFQF_CTL_0_PFFCHSIZE_MASK;
+	/* Program required FCoE DDP contexts for the PF */
+	val &= ~I40E_PFQF_CTL_0_PFFCDSIZE_MASK;
+	val |= ((u32)settings->fcoe_cntx_num <<
+			I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT) &
+		I40E_PFQF_CTL_0_PFFCDSIZE_MASK;
+
+	/* Program Hash LUT size for the PF */
+	val &= ~I40E_PFQF_CTL_0_HASHLUTSIZE_MASK;
+	if (settings->hash_lut_size == I40E_HASH_LUT_SIZE_512)
+		hash_lut_size = 1;
+	val |= (hash_lut_size << I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT) &
+		I40E_PFQF_CTL_0_HASHLUTSIZE_MASK;
+
+	/* Enable FDIR, Ethertype and MACVLAN filters for PF and VFs */
+	if (settings->enable_fdir)
+		val |= I40E_PFQF_CTL_0_FD_ENA_MASK;
+	if (settings->enable_ethtype)
+		val |= I40E_PFQF_CTL_0_ETYPE_ENA_MASK;
+	if (settings->enable_macvlan)
+		val |= I40E_PFQF_CTL_0_MACVLAN_ENA_MASK;
+
+	i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, val);
+
+	return 0;
+}
+
+/**
+ * i40e_aq_add_rem_control_packet_filter - Add or Remove Control Packet Filter
+ * @hw: pointer to the hw struct
+ * @mac_addr: MAC address to use in the filter
+ * @ethtype: Ethertype to use in the filter
+ * @flags: Flags that needs to be applied to the filter
+ * @vsi_seid: seid of the control VSI
+ * @queue: VSI queue number to send the packet to
+ * @is_add: Add control packet filter if True else remove
+ * @stats: Structure to hold information on control filter counts
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This command will Add or Remove control packet filter for a control VSI.
+ * In return it will update the total number of perfect filter count in
+ * the stats member.
+ **/
+int i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
+					  u8 *mac_addr, u16 ethtype, u16 flags,
+					  u16 vsi_seid, u16 queue, bool is_add,
+					  struct i40e_control_filter_stats *stats,
+					  struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_control_packet_filter *cmd =
+		(struct i40e_aqc_add_remove_control_packet_filter *)
+		&desc.params.raw;
+	struct i40e_aqc_add_remove_control_packet_filter_completion *resp =
+		(struct i40e_aqc_add_remove_control_packet_filter_completion *)
+		&desc.params.raw;
+	int status;
+
+	if (vsi_seid == 0)
+		return -EINVAL;
+
+	if (is_add) {
+		i40e_fill_default_direct_cmd_desc(&desc,
+				i40e_aqc_opc_add_control_packet_filter);
+		cmd->queue = cpu_to_le16(queue);
+	} else {
+		i40e_fill_default_direct_cmd_desc(&desc,
+				i40e_aqc_opc_remove_control_packet_filter);
+	}
+
+	if (mac_addr)
+		ether_addr_copy(cmd->mac, mac_addr);
+
+	cmd->etype = cpu_to_le16(ethtype);
+	cmd->flags = cpu_to_le16(flags);
+	cmd->seid = cpu_to_le16(vsi_seid);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (!status && stats) {
+		stats->mac_etype_used = le16_to_cpu(resp->mac_etype_used);
+		stats->etype_used = le16_to_cpu(resp->etype_used);
+		stats->mac_etype_free = le16_to_cpu(resp->mac_etype_free);
+		stats->etype_free = le16_to_cpu(resp->etype_free);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_add_filter_to_drop_tx_flow_control_frames- filter to drop flow control
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid to add ethertype filter from
+ **/
+void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
+						    u16 seid)
+{
+#define I40E_FLOW_CONTROL_ETHTYPE 0x8808
+	u16 flag = I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC |
+		   I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP |
+		   I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX;
+	u16 ethtype = I40E_FLOW_CONTROL_ETHTYPE;
+	int status;
+
+	status = i40e_aq_add_rem_control_packet_filter(hw, NULL, ethtype, flag,
+						       seid, 0, true, NULL,
+						       NULL);
+	if (status)
+		hw_dbg(hw, "Ethtype Filter Add failed: Error pruning Tx flow control frames\n");
+}
+
+/**
+ * i40e_aq_alternate_read
+ * @hw: pointer to the hardware structure
+ * @reg_addr0: address of first dword to be read
+ * @reg_val0: pointer for data read from 'reg_addr0'
+ * @reg_addr1: address of second dword to be read
+ * @reg_val1: pointer for data read from 'reg_addr1'
+ *
+ * Read one or two dwords from alternate structure. Fields are indicated
+ * by 'reg_addr0' and 'reg_addr1' register numbers. If 'reg_val1' pointer
+ * is not passed then only register at 'reg_addr0' is read.
+ *
+ **/
+static int i40e_aq_alternate_read(struct i40e_hw *hw,
+				  u32 reg_addr0, u32 *reg_val0,
+				  u32 reg_addr1, u32 *reg_val1)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_alternate_write *cmd_resp =
+		(struct i40e_aqc_alternate_write *)&desc.params.raw;
+	int status;
+
+	if (!reg_val0)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_alternate_read);
+	cmd_resp->address0 = cpu_to_le32(reg_addr0);
+	cmd_resp->address1 = cpu_to_le32(reg_addr1);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+	if (!status) {
+		*reg_val0 = le32_to_cpu(cmd_resp->data0);
+
+		if (reg_val1)
+			*reg_val1 = le32_to_cpu(cmd_resp->data1);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_aq_suspend_port_tx
+ * @hw: pointer to the hardware structure
+ * @seid: port seid
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Suspend port's Tx traffic
+ **/
+int i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid,
+			    struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aqc_tx_sched_ind *cmd;
+	struct i40e_aq_desc desc;
+	int status;
+
+	cmd = (struct i40e_aqc_tx_sched_ind *)&desc.params.raw;
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_suspend_port_tx);
+	cmd->vsi_seid = cpu_to_le16(seid);
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_resume_port_tx
+ * @hw: pointer to the hardware structure
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Resume port's Tx traffic
+ **/
+int i40e_aq_resume_port_tx(struct i40e_hw *hw,
+			   struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_resume_port_tx);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_set_pci_config_data - store PCI bus info
+ * @hw: pointer to hardware structure
+ * @link_status: the link status word from PCI config space
+ *
+ * Stores the PCI bus info (speed, width, type) within the i40e_hw structure
+ **/
+void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status)
+{
+	hw->bus.type = i40e_bus_type_pci_express;
+
+	switch (link_status & PCI_EXP_LNKSTA_NLW) {
+	case PCI_EXP_LNKSTA_NLW_X1:
+		hw->bus.width = i40e_bus_width_pcie_x1;
+		break;
+	case PCI_EXP_LNKSTA_NLW_X2:
+		hw->bus.width = i40e_bus_width_pcie_x2;
+		break;
+	case PCI_EXP_LNKSTA_NLW_X4:
+		hw->bus.width = i40e_bus_width_pcie_x4;
+		break;
+	case PCI_EXP_LNKSTA_NLW_X8:
+		hw->bus.width = i40e_bus_width_pcie_x8;
+		break;
+	default:
+		hw->bus.width = i40e_bus_width_unknown;
+		break;
+	}
+
+	switch (link_status & PCI_EXP_LNKSTA_CLS) {
+	case PCI_EXP_LNKSTA_CLS_2_5GB:
+		hw->bus.speed = i40e_bus_speed_2500;
+		break;
+	case PCI_EXP_LNKSTA_CLS_5_0GB:
+		hw->bus.speed = i40e_bus_speed_5000;
+		break;
+	case PCI_EXP_LNKSTA_CLS_8_0GB:
+		hw->bus.speed = i40e_bus_speed_8000;
+		break;
+	default:
+		hw->bus.speed = i40e_bus_speed_unknown;
+		break;
+	}
+}
+
+/**
+ * i40e_aq_debug_dump
+ * @hw: pointer to the hardware structure
+ * @cluster_id: specific cluster to dump
+ * @table_id: table id within cluster
+ * @start_index: index of line in the block to read
+ * @buff_size: dump buffer size
+ * @buff: dump buffer
+ * @ret_buff_size: actual buffer size returned
+ * @ret_next_table: next block to read
+ * @ret_next_index: next index to read
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Dump internal FW/HW data for debug purposes.
+ *
+ **/
+int i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
+		       u8 table_id, u32 start_index, u16 buff_size,
+		       void *buff, u16 *ret_buff_size,
+		       u8 *ret_next_table, u32 *ret_next_index,
+		       struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_debug_dump_internals *cmd =
+		(struct i40e_aqc_debug_dump_internals *)&desc.params.raw;
+	struct i40e_aqc_debug_dump_internals *resp =
+		(struct i40e_aqc_debug_dump_internals *)&desc.params.raw;
+	int status;
+
+	if (buff_size == 0 || !buff)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_debug_dump_internals);
+	/* Indirect Command */
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	if (buff_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	cmd->cluster_id = cluster_id;
+	cmd->table_id = table_id;
+	cmd->idx = cpu_to_le32(start_index);
+
+	desc.datalen = cpu_to_le16(buff_size);
+
+	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+	if (!status) {
+		if (ret_buff_size)
+			*ret_buff_size = le16_to_cpu(desc.datalen);
+		if (ret_next_table)
+			*ret_next_table = resp->table_id;
+		if (ret_next_index)
+			*ret_next_index = le32_to_cpu(resp->idx);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_read_bw_from_alt_ram
+ * @hw: pointer to the hardware structure
+ * @max_bw: pointer for max_bw read
+ * @min_bw: pointer for min_bw read
+ * @min_valid: pointer for bool that is true if min_bw is a valid value
+ * @max_valid: pointer for bool that is true if max_bw is a valid value
+ *
+ * Read bw from the alternate ram for the given pf
+ **/
+int i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
+			      u32 *max_bw, u32 *min_bw,
+			      bool *min_valid, bool *max_valid)
+{
+	u32 max_bw_addr, min_bw_addr;
+	int status;
+
+	/* Calculate the address of the min/max bw registers */
+	max_bw_addr = I40E_ALT_STRUCT_FIRST_PF_OFFSET +
+		      I40E_ALT_STRUCT_MAX_BW_OFFSET +
+		      (I40E_ALT_STRUCT_DWORDS_PER_PF * hw->pf_id);
+	min_bw_addr = I40E_ALT_STRUCT_FIRST_PF_OFFSET +
+		      I40E_ALT_STRUCT_MIN_BW_OFFSET +
+		      (I40E_ALT_STRUCT_DWORDS_PER_PF * hw->pf_id);
+
+	/* Read the bandwidths from alt ram */
+	status = i40e_aq_alternate_read(hw, max_bw_addr, max_bw,
+					min_bw_addr, min_bw);
+
+	if (*min_bw & I40E_ALT_BW_VALID_MASK)
+		*min_valid = true;
+	else
+		*min_valid = false;
+
+	if (*max_bw & I40E_ALT_BW_VALID_MASK)
+		*max_valid = true;
+	else
+		*max_valid = false;
+
+	return status;
+}
+
+/**
+ * i40e_aq_configure_partition_bw
+ * @hw: pointer to the hardware structure
+ * @bw_data: Buffer holding valid pfs and bw limits
+ * @cmd_details: pointer to command details
+ *
+ * Configure partitions guaranteed/max bw
+ **/
+int
+i40e_aq_configure_partition_bw(struct i40e_hw *hw,
+			       struct i40e_aqc_configure_partition_bw_data *bw_data,
+			       struct i40e_asq_cmd_details *cmd_details)
+{
+	u16 bwd_size = sizeof(*bw_data);
+	struct i40e_aq_desc desc;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_configure_partition_bw);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
+
+	if (bwd_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	desc.datalen = cpu_to_le16(bwd_size);
+
+	status = i40e_asq_send_command(hw, &desc, bw_data, bwd_size,
+				       cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_read_phy_register_clause22
+ * @hw: pointer to the HW structure
+ * @reg: register address in the page
+ * @phy_addr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Reads specified PHY register value
+ **/
+int i40e_read_phy_register_clause22(struct i40e_hw *hw,
+				    u16 reg, u8 phy_addr, u16 *value)
+{
+	u8 port_num = (u8)hw->func_caps.mdio_port_num;
+	int status = -EIO;
+	u32 command = 0;
+	u16 retry = 1000;
+
+	command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE22_OPCODE_READ_MASK) |
+		  (I40E_MDIO_CLAUSE22_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK);
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		udelay(10);
+		retry--;
+	} while (retry);
+
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_PHY,
+			   "PHY: Can't write command to external PHY.\n");
+	} else {
+		command = rd32(hw, I40E_GLGEN_MSRWD(port_num));
+		*value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >>
+			 I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT;
+	}
+
+	return status;
+}
+
+/**
+ * i40e_write_phy_register_clause22
+ * @hw: pointer to the HW structure
+ * @reg: register address in the page
+ * @phy_addr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Writes specified PHY register value
+ **/
+int i40e_write_phy_register_clause22(struct i40e_hw *hw,
+				     u16 reg, u8 phy_addr, u16 value)
+{
+	u8 port_num = (u8)hw->func_caps.mdio_port_num;
+	int status = -EIO;
+	u32 command  = 0;
+	u16 retry = 1000;
+
+	command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT;
+	wr32(hw, I40E_GLGEN_MSRWD(port_num), command);
+
+	command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK) |
+		  (I40E_MDIO_CLAUSE22_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK);
+
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		udelay(10);
+		retry--;
+	} while (retry);
+
+	return status;
+}
+
+/**
+ * i40e_read_phy_register_clause45
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_addr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Reads specified PHY register value
+ **/
+int i40e_read_phy_register_clause45(struct i40e_hw *hw,
+				    u8 page, u16 reg, u8 phy_addr, u16 *value)
+{
+	u8 port_num = hw->func_caps.mdio_port_num;
+	int status = -EIO;
+	u32 command = 0;
+	u16 retry = 1000;
+
+	command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
+		  (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
+		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		usleep_range(10, 20);
+		retry--;
+	} while (retry);
+
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_PHY,
+			   "PHY: Can't write command to external PHY.\n");
+		goto phy_read_end;
+	}
+
+	command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_READ_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
+		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
+	status = -EIO;
+	retry = 1000;
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		usleep_range(10, 20);
+		retry--;
+	} while (retry);
+
+	if (!status) {
+		command = rd32(hw, I40E_GLGEN_MSRWD(port_num));
+		*value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >>
+			 I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT;
+	} else {
+		i40e_debug(hw, I40E_DEBUG_PHY,
+			   "PHY: Can't read register value from external PHY.\n");
+	}
+
+phy_read_end:
+	return status;
+}
+
+/**
+ * i40e_write_phy_register_clause45
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_addr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Writes value to specified PHY register
+ **/
+int i40e_write_phy_register_clause45(struct i40e_hw *hw,
+				     u8 page, u16 reg, u8 phy_addr, u16 value)
+{
+	u8 port_num = hw->func_caps.mdio_port_num;
+	int status = -EIO;
+	u16 retry = 1000;
+	u32 command = 0;
+
+	command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
+		  (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
+		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		usleep_range(10, 20);
+		retry--;
+	} while (retry);
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_PHY,
+			   "PHY: Can't write command to external PHY.\n");
+		goto phy_write_end;
+	}
+
+	command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT;
+	wr32(hw, I40E_GLGEN_MSRWD(port_num), command);
+
+	command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
+		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
+	status = -EIO;
+	retry = 1000;
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		usleep_range(10, 20);
+		retry--;
+	} while (retry);
+
+phy_write_end:
+	return status;
+}
+
+/**
+ * i40e_write_phy_register
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_addr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Writes value to specified PHY register
+ **/
+int i40e_write_phy_register(struct i40e_hw *hw,
+			    u8 page, u16 reg, u8 phy_addr, u16 value)
+{
+	int status;
+
+	switch (hw->device_id) {
+	case I40E_DEV_ID_1G_BASE_T_X722:
+		status = i40e_write_phy_register_clause22(hw, reg, phy_addr,
+							  value);
+		break;
+	case I40E_DEV_ID_1G_BASE_T_BC:
+	case I40E_DEV_ID_5G_BASE_T_BC:
+	case I40E_DEV_ID_10G_BASE_T:
+	case I40E_DEV_ID_10G_BASE_T4:
+	case I40E_DEV_ID_10G_BASE_T_BC:
+	case I40E_DEV_ID_10G_BASE_T_X722:
+	case I40E_DEV_ID_25G_B:
+	case I40E_DEV_ID_25G_SFP28:
+		status = i40e_write_phy_register_clause45(hw, page, reg,
+							  phy_addr, value);
+		break;
+	default:
+		status = -EIO;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ * i40e_read_phy_register
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_addr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Reads specified PHY register value
+ **/
+int i40e_read_phy_register(struct i40e_hw *hw,
+			   u8 page, u16 reg, u8 phy_addr, u16 *value)
+{
+	int status;
+
+	switch (hw->device_id) {
+	case I40E_DEV_ID_1G_BASE_T_X722:
+		status = i40e_read_phy_register_clause22(hw, reg, phy_addr,
+							 value);
+		break;
+	case I40E_DEV_ID_1G_BASE_T_BC:
+	case I40E_DEV_ID_5G_BASE_T_BC:
+	case I40E_DEV_ID_10G_BASE_T:
+	case I40E_DEV_ID_10G_BASE_T4:
+	case I40E_DEV_ID_10G_BASE_T_BC:
+	case I40E_DEV_ID_10G_BASE_T_X722:
+	case I40E_DEV_ID_25G_B:
+	case I40E_DEV_ID_25G_SFP28:
+		status = i40e_read_phy_register_clause45(hw, page, reg,
+							 phy_addr, value);
+		break;
+	default:
+		status = -EIO;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ * i40e_get_phy_address
+ * @hw: pointer to the HW structure
+ * @dev_num: PHY port num that address we want
+ *
+ * Gets PHY address for current port
+ **/
+u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num)
+{
+	u8 port_num = hw->func_caps.mdio_port_num;
+	u32 reg_val = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(port_num));
+
+	return (u8)(reg_val >> ((dev_num + 1) * 5)) & 0x1f;
+}
+
+/**
+ * i40e_blink_phy_link_led
+ * @hw: pointer to the HW structure
+ * @time: time how long led will blinks in secs
+ * @interval: gap between LED on and off in msecs
+ *
+ * Blinks PHY link LED
+ **/
+int i40e_blink_phy_link_led(struct i40e_hw *hw,
+			    u32 time, u32 interval)
+{
+	u16 led_addr = I40E_PHY_LED_PROV_REG_1;
+	u16 gpio_led_port;
+	u8 phy_addr = 0;
+	int status = 0;
+	u16 led_ctl;
+	u8 port_num;
+	u16 led_reg;
+	u32 i;
+
+	i = rd32(hw, I40E_PFGEN_PORTNUM);
+	port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+	phy_addr = i40e_get_phy_address(hw, port_num);
+
+	for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++,
+	     led_addr++) {
+		status = i40e_read_phy_register_clause45(hw,
+							 I40E_PHY_COM_REG_PAGE,
+							 led_addr, phy_addr,
+							 &led_reg);
+		if (status)
+			goto phy_blinking_end;
+		led_ctl = led_reg;
+		if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
+			led_reg = 0;
+			status = i40e_write_phy_register_clause45(hw,
+							 I40E_PHY_COM_REG_PAGE,
+							 led_addr, phy_addr,
+							 led_reg);
+			if (status)
+				goto phy_blinking_end;
+			break;
+		}
+	}
+
+	if (time > 0 && interval > 0) {
+		for (i = 0; i < time * 1000; i += interval) {
+			status = i40e_read_phy_register_clause45(hw,
+						I40E_PHY_COM_REG_PAGE,
+						led_addr, phy_addr, &led_reg);
+			if (status)
+				goto restore_config;
+			if (led_reg & I40E_PHY_LED_MANUAL_ON)
+				led_reg = 0;
+			else
+				led_reg = I40E_PHY_LED_MANUAL_ON;
+			status = i40e_write_phy_register_clause45(hw,
+						I40E_PHY_COM_REG_PAGE,
+						led_addr, phy_addr, led_reg);
+			if (status)
+				goto restore_config;
+			msleep(interval);
+		}
+	}
+
+restore_config:
+	status = i40e_write_phy_register_clause45(hw,
+						  I40E_PHY_COM_REG_PAGE,
+						  led_addr, phy_addr, led_ctl);
+
+phy_blinking_end:
+	return status;
+}
+
+/**
+ * i40e_led_get_reg - read LED register
+ * @hw: pointer to the HW structure
+ * @led_addr: LED register address
+ * @reg_val: read register value
+ **/
+static int i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
+			    u32 *reg_val)
+{
+	u8 phy_addr = 0;
+	u8 port_num;
+	int status;
+	u32 i;
+
+	*reg_val = 0;
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+		status =
+		       i40e_aq_get_phy_register(hw,
+						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+						I40E_PHY_COM_REG_PAGE, true,
+						I40E_PHY_LED_PROV_REG_1,
+						reg_val, NULL);
+	} else {
+		i = rd32(hw, I40E_PFGEN_PORTNUM);
+		port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+		phy_addr = i40e_get_phy_address(hw, port_num);
+		status = i40e_read_phy_register_clause45(hw,
+							 I40E_PHY_COM_REG_PAGE,
+							 led_addr, phy_addr,
+							 (u16 *)reg_val);
+	}
+	return status;
+}
+
+/**
+ * i40e_led_set_reg - write LED register
+ * @hw: pointer to the HW structure
+ * @led_addr: LED register address
+ * @reg_val: register value to write
+ **/
+static int i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
+			    u32 reg_val)
+{
+	u8 phy_addr = 0;
+	u8 port_num;
+	int status;
+	u32 i;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+		status =
+		       i40e_aq_set_phy_register(hw,
+						I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+						I40E_PHY_COM_REG_PAGE, true,
+						I40E_PHY_LED_PROV_REG_1,
+						reg_val, NULL);
+	} else {
+		i = rd32(hw, I40E_PFGEN_PORTNUM);
+		port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+		phy_addr = i40e_get_phy_address(hw, port_num);
+		status = i40e_write_phy_register_clause45(hw,
+							  I40E_PHY_COM_REG_PAGE,
+							  led_addr, phy_addr,
+							  (u16)reg_val);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_led_get_phy - return current on/off mode
+ * @hw: pointer to the hw struct
+ * @led_addr: address of led register to use
+ * @val: original value of register to use
+ *
+ **/
+int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
+		     u16 *val)
+{
+	u16 gpio_led_port;
+	u8 phy_addr = 0;
+	u32 reg_val_aq;
+	int status = 0;
+	u16 temp_addr;
+	u16 reg_val;
+	u8 port_num;
+	u32 i;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+		status =
+		      i40e_aq_get_phy_register(hw,
+					       I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+					       I40E_PHY_COM_REG_PAGE, true,
+					       I40E_PHY_LED_PROV_REG_1,
+					       &reg_val_aq, NULL);
+		if (status == 0)
+			*val = (u16)reg_val_aq;
+		return status;
+	}
+	temp_addr = I40E_PHY_LED_PROV_REG_1;
+	i = rd32(hw, I40E_PFGEN_PORTNUM);
+	port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+	phy_addr = i40e_get_phy_address(hw, port_num);
+
+	for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++,
+	     temp_addr++) {
+		status = i40e_read_phy_register_clause45(hw,
+							 I40E_PHY_COM_REG_PAGE,
+							 temp_addr, phy_addr,
+							 &reg_val);
+		if (status)
+			return status;
+		*val = reg_val;
+		if (reg_val & I40E_PHY_LED_LINK_MODE_MASK) {
+			*led_addr = temp_addr;
+			break;
+		}
+	}
+	return status;
+}
+
+/**
+ * i40e_led_set_phy
+ * @hw: pointer to the HW structure
+ * @on: true or false
+ * @led_addr: address of led register to use
+ * @mode: original val plus bit for set or ignore
+ *
+ * Set led's on or off when controlled by the PHY
+ *
+ **/
+int i40e_led_set_phy(struct i40e_hw *hw, bool on,
+		     u16 led_addr, u32 mode)
+{
+	u32 led_ctl = 0;
+	u32 led_reg = 0;
+	int status = 0;
+
+	status = i40e_led_get_reg(hw, led_addr, &led_reg);
+	if (status)
+		return status;
+	led_ctl = led_reg;
+	if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
+		led_reg = 0;
+		status = i40e_led_set_reg(hw, led_addr, led_reg);
+		if (status)
+			return status;
+	}
+	status = i40e_led_get_reg(hw, led_addr, &led_reg);
+	if (status)
+		goto restore_config;
+	if (on)
+		led_reg = I40E_PHY_LED_MANUAL_ON;
+	else
+		led_reg = 0;
+
+	status = i40e_led_set_reg(hw, led_addr, led_reg);
+	if (status)
+		goto restore_config;
+	if (mode & I40E_PHY_LED_MODE_ORIG) {
+		led_ctl = (mode & I40E_PHY_LED_MODE_MASK);
+		status = i40e_led_set_reg(hw, led_addr, led_ctl);
+	}
+	return status;
+
+restore_config:
+	status = i40e_led_set_reg(hw, led_addr, led_ctl);
+	return status;
+}
+
+/**
+ * i40e_aq_rx_ctl_read_register - use FW to read from an Rx control register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: ptr to register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Use the firmware to read the Rx control register,
+ * especially useful if the Rx unit is under heavy pressure
+ **/
+int i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
+				 u32 reg_addr, u32 *reg_val,
+				 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_rx_ctl_reg_read_write *cmd_resp =
+		(struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
+	int status;
+
+	if (!reg_val)
+		return -EINVAL;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_read);
+
+	cmd_resp->address = cpu_to_le32(reg_addr);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	if (status == 0)
+		*reg_val = le32_to_cpu(cmd_resp->value);
+
+	return status;
+}
+
+/**
+ * i40e_read_rx_ctl - read from an Rx control register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ **/
+u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr)
+{
+	bool use_register;
+	int status = 0;
+	int retry = 5;
+	u32 val = 0;
+
+	use_register = (((hw->aq.api_maj_ver == 1) &&
+			(hw->aq.api_min_ver < 5)) ||
+			(hw->mac.type == I40E_MAC_X722));
+	if (!use_register) {
+do_retry:
+		status = i40e_aq_rx_ctl_read_register(hw, reg_addr, &val, NULL);
+		if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) {
+			usleep_range(1000, 2000);
+			retry--;
+			goto do_retry;
+		}
+	}
+
+	/* if the AQ access failed, try the old-fashioned way */
+	if (status || use_register)
+		val = rd32(hw, reg_addr);
+
+	return val;
+}
+
+/**
+ * i40e_aq_rx_ctl_write_register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Use the firmware to write to an Rx control register,
+ * especially useful if the Rx unit is under heavy pressure
+ **/
+int i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
+				  u32 reg_addr, u32 reg_val,
+				  struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_rx_ctl_reg_read_write *cmd =
+		(struct i40e_aqc_rx_ctl_reg_read_write *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_rx_ctl_reg_write);
+
+	cmd->address = cpu_to_le32(reg_addr);
+	cmd->value = cpu_to_le32(reg_val);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_write_rx_ctl - write to an Rx control register
+ * @hw: pointer to the hw struct
+ * @reg_addr: register address
+ * @reg_val: register value
+ **/
+void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val)
+{
+	bool use_register;
+	int status = 0;
+	int retry = 5;
+
+	use_register = (((hw->aq.api_maj_ver == 1) &&
+			(hw->aq.api_min_ver < 5)) ||
+			(hw->mac.type == I40E_MAC_X722));
+	if (!use_register) {
+do_retry:
+		status = i40e_aq_rx_ctl_write_register(hw, reg_addr,
+						       reg_val, NULL);
+		if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN && retry) {
+			usleep_range(1000, 2000);
+			retry--;
+			goto do_retry;
+		}
+	}
+
+	/* if the AQ access failed, try the old-fashioned way */
+	if (status || use_register)
+		wr32(hw, reg_addr, reg_val);
+}
+
+/**
+ * i40e_mdio_if_number_selection - MDIO I/F number selection
+ * @hw: pointer to the hw struct
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
+ * @cmd: pointer to PHY Register command structure
+ **/
+static void i40e_mdio_if_number_selection(struct i40e_hw *hw, bool set_mdio,
+					  u8 mdio_num,
+					  struct i40e_aqc_phy_register_access *cmd)
+{
+	if (set_mdio && cmd->phy_interface == I40E_AQ_PHY_REG_ACCESS_EXTERNAL) {
+		if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED)
+			cmd->cmd_flags |=
+				I40E_AQ_PHY_REG_ACCESS_SET_MDIO_IF_NUMBER |
+				((mdio_num <<
+				I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_SHIFT) &
+				I40E_AQ_PHY_REG_ACCESS_MDIO_IF_NUMBER_MASK);
+		else
+			i40e_debug(hw, I40E_DEBUG_PHY,
+				   "MDIO I/F number selection not supported by current FW version.\n");
+	}
+}
+
+/**
+ * i40e_aq_set_phy_register_ext
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @page_change: flag to indicate if phy page should be updated
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
+ * @reg_addr: PHY register address
+ * @reg_val: new register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Write the external PHY register.
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_set_phy_register.
+ **/
+int i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+				 u8 phy_select, u8 dev_addr, bool page_change,
+				 bool set_mdio, u8 mdio_num,
+				 u32 reg_addr, u32 reg_val,
+				 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_phy_register_access *cmd =
+		(struct i40e_aqc_phy_register_access *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_set_phy_register);
+
+	cmd->phy_interface = phy_select;
+	cmd->dev_address = dev_addr;
+	cmd->reg_address = cpu_to_le32(reg_addr);
+	cmd->reg_value = cpu_to_le32(reg_val);
+
+	i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd);
+
+	if (!page_change)
+		cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_phy_register_ext
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @page_change: flag to indicate if phy page should be updated
+ * @set_mdio: use MDIO I/F number specified by mdio_num
+ * @mdio_num: MDIO I/F number
+ * @reg_addr: PHY register address
+ * @reg_val: read register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the external PHY register.
+ * NOTE: In common cases MDIO I/F number should not be changed, thats why you
+ * may use simple wrapper i40e_aq_get_phy_register.
+ **/
+int i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+				 u8 phy_select, u8 dev_addr, bool page_change,
+				 bool set_mdio, u8 mdio_num,
+				 u32 reg_addr, u32 *reg_val,
+				 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_phy_register_access *cmd =
+		(struct i40e_aqc_phy_register_access *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_get_phy_register);
+
+	cmd->phy_interface = phy_select;
+	cmd->dev_address = dev_addr;
+	cmd->reg_address = cpu_to_le32(reg_addr);
+
+	i40e_mdio_if_number_selection(hw, set_mdio, mdio_num, cmd);
+
+	if (!page_change)
+		cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+	if (!status)
+		*reg_val = le32_to_cpu(cmd->reg_value);
+
+	return status;
+}
+
+/**
+ * i40e_aq_write_ddp - Write dynamic device personalization (ddp)
+ * @hw: pointer to the hw struct
+ * @buff: command buffer (size in bytes = buff_size)
+ * @buff_size: buffer size in bytes
+ * @track_id: package tracking id
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
+		      u16 buff_size, u32 track_id,
+		      u32 *error_offset, u32 *error_info,
+		      struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_write_personalization_profile *cmd =
+		(struct i40e_aqc_write_personalization_profile *)
+		&desc.params.raw;
+	struct i40e_aqc_write_ddp_resp *resp;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_write_personalization_profile);
+
+	desc.flags |= cpu_to_le16(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD);
+	if (buff_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+	desc.datalen = cpu_to_le16(buff_size);
+
+	cmd->profile_track_id = cpu_to_le32(track_id);
+
+	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+	if (!status) {
+		resp = (struct i40e_aqc_write_ddp_resp *)&desc.params.raw;
+		if (error_offset)
+			*error_offset = le32_to_cpu(resp->error_offset);
+		if (error_info)
+			*error_info = le32_to_cpu(resp->error_info);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_aq_get_ddp_list - Read dynamic device personalization (ddp)
+ * @hw: pointer to the hw struct
+ * @buff: command buffer (size in bytes = buff_size)
+ * @buff_size: buffer size in bytes
+ * @flags: AdminQ command flags
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+int i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
+			 u16 buff_size, u8 flags,
+			 struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_get_applied_profiles *cmd =
+		(struct i40e_aqc_get_applied_profiles *)&desc.params.raw;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_get_personalization_profile_list);
+
+	desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+	if (buff_size > I40E_AQ_LARGE_BUF)
+		desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+	desc.datalen = cpu_to_le16(buff_size);
+
+	cmd->flags = flags;
+
+	status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+
+	return status;
+}
+
+/**
+ * i40e_find_segment_in_package
+ * @segment_type: the segment type to search for (i.e., SEGMENT_TYPE_I40E)
+ * @pkg_hdr: pointer to the package header to be searched
+ *
+ * This function searches a package file for a particular segment type. On
+ * success it returns a pointer to the segment header, otherwise it will
+ * return NULL.
+ **/
+struct i40e_generic_seg_header *
+i40e_find_segment_in_package(u32 segment_type,
+			     struct i40e_package_header *pkg_hdr)
+{
+	struct i40e_generic_seg_header *segment;
+	u32 i;
+
+	/* Search all package segments for the requested segment type */
+	for (i = 0; i < pkg_hdr->segment_count; i++) {
+		segment =
+			(struct i40e_generic_seg_header *)((u8 *)pkg_hdr +
+			 pkg_hdr->segment_offset[i]);
+
+		if (segment->type == segment_type)
+			return segment;
+	}
+
+	return NULL;
+}
+
+/* Get section table in profile */
+#define I40E_SECTION_TABLE(profile, sec_tbl)				\
+	do {								\
+		struct i40e_profile_segment *p = (profile);		\
+		u32 count;						\
+		u32 *nvm;						\
+		count = p->device_table_count;				\
+		nvm = (u32 *)&p->device_table[count];			\
+		sec_tbl = (struct i40e_section_table *)&nvm[nvm[0] + 1]; \
+	} while (0)
+
+/* Get section header in profile */
+#define I40E_SECTION_HEADER(profile, offset)				\
+	(struct i40e_profile_section_header *)((u8 *)(profile) + (offset))
+
+/**
+ * i40e_find_section_in_profile
+ * @section_type: the section type to search for (i.e., SECTION_TYPE_NOTE)
+ * @profile: pointer to the i40e segment header to be searched
+ *
+ * This function searches i40e segment for a particular section type. On
+ * success it returns a pointer to the section header, otherwise it will
+ * return NULL.
+ **/
+struct i40e_profile_section_header *
+i40e_find_section_in_profile(u32 section_type,
+			     struct i40e_profile_segment *profile)
+{
+	struct i40e_profile_section_header *sec;
+	struct i40e_section_table *sec_tbl;
+	u32 sec_off;
+	u32 i;
+
+	if (profile->header.type != SEGMENT_TYPE_I40E)
+		return NULL;
+
+	I40E_SECTION_TABLE(profile, sec_tbl);
+
+	for (i = 0; i < sec_tbl->section_count; i++) {
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+		if (sec->section.type == section_type)
+			return sec;
+	}
+
+	return NULL;
+}
+
+/**
+ * i40e_ddp_exec_aq_section - Execute generic AQ for DDP
+ * @hw: pointer to the hw struct
+ * @aq: command buffer containing all data to execute AQ
+ **/
+static int i40e_ddp_exec_aq_section(struct i40e_hw *hw,
+				    struct i40e_profile_aq_section *aq)
+{
+	struct i40e_aq_desc desc;
+	u8 *msg = NULL;
+	u16 msglen;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc, aq->opcode);
+	desc.flags |= cpu_to_le16(aq->flags);
+	memcpy(desc.params.raw, aq->param, sizeof(desc.params.raw));
+
+	msglen = aq->datalen;
+	if (msglen) {
+		desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF |
+						I40E_AQ_FLAG_RD));
+		if (msglen > I40E_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+		desc.datalen = cpu_to_le16(msglen);
+		msg = &aq->data[0];
+	}
+
+	status = i40e_asq_send_command(hw, &desc, msg, msglen, NULL);
+
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_PACKAGE,
+			   "unable to exec DDP AQ opcode %u, error %d\n",
+			   aq->opcode, status);
+		return status;
+	}
+
+	/* copy returned desc to aq_buf */
+	memcpy(aq->param, desc.params.raw, sizeof(desc.params.raw));
+
+	return 0;
+}
+
+/**
+ * i40e_validate_profile
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package to be validated
+ * @track_id: package tracking id
+ * @rollback: flag if the profile is for rollback.
+ *
+ * Validates supported devices and profile's sections.
+ */
+static int
+i40e_validate_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+		      u32 track_id, bool rollback)
+{
+	struct i40e_profile_section_header *sec = NULL;
+	struct i40e_section_table *sec_tbl;
+	u32 vendor_dev_id;
+	int status = 0;
+	u32 dev_cnt;
+	u32 sec_off;
+	u32 i;
+
+	if (track_id == I40E_DDP_TRACKID_INVALID) {
+		i40e_debug(hw, I40E_DEBUG_PACKAGE, "Invalid track_id\n");
+		return -EOPNOTSUPP;
+	}
+
+	dev_cnt = profile->device_table_count;
+	for (i = 0; i < dev_cnt; i++) {
+		vendor_dev_id = profile->device_table[i].vendor_dev_id;
+		if ((vendor_dev_id >> 16) == PCI_VENDOR_ID_INTEL &&
+		    hw->device_id == (vendor_dev_id & 0xFFFF))
+			break;
+	}
+	if (dev_cnt && i == dev_cnt) {
+		i40e_debug(hw, I40E_DEBUG_PACKAGE,
+			   "Device doesn't support DDP\n");
+		return -ENODEV;
+	}
+
+	I40E_SECTION_TABLE(profile, sec_tbl);
+
+	/* Validate sections types */
+	for (i = 0; i < sec_tbl->section_count; i++) {
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+		if (rollback) {
+			if (sec->section.type == SECTION_TYPE_MMIO ||
+			    sec->section.type == SECTION_TYPE_AQ ||
+			    sec->section.type == SECTION_TYPE_RB_AQ) {
+				i40e_debug(hw, I40E_DEBUG_PACKAGE,
+					   "Not a roll-back package\n");
+				return -EOPNOTSUPP;
+			}
+		} else {
+			if (sec->section.type == SECTION_TYPE_RB_AQ ||
+			    sec->section.type == SECTION_TYPE_RB_MMIO) {
+				i40e_debug(hw, I40E_DEBUG_PACKAGE,
+					   "Not an original package\n");
+				return -EOPNOTSUPP;
+			}
+		}
+	}
+
+	return status;
+}
+
+/**
+ * i40e_write_profile
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package to be downloaded
+ * @track_id: package tracking id
+ *
+ * Handles the download of a complete package.
+ */
+int
+i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+		   u32 track_id)
+{
+	struct i40e_profile_section_header *sec = NULL;
+	struct i40e_profile_aq_section *ddp_aq;
+	struct i40e_section_table *sec_tbl;
+	u32 offset = 0, info = 0;
+	u32 section_size = 0;
+	int status = 0;
+	u32 sec_off;
+	u32 i;
+
+	status = i40e_validate_profile(hw, profile, track_id, false);
+	if (status)
+		return status;
+
+	I40E_SECTION_TABLE(profile, sec_tbl);
+
+	for (i = 0; i < sec_tbl->section_count; i++) {
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+		/* Process generic admin command */
+		if (sec->section.type == SECTION_TYPE_AQ) {
+			ddp_aq = (struct i40e_profile_aq_section *)&sec[1];
+			status = i40e_ddp_exec_aq_section(hw, ddp_aq);
+			if (status) {
+				i40e_debug(hw, I40E_DEBUG_PACKAGE,
+					   "Failed to execute aq: section %d, opcode %u\n",
+					   i, ddp_aq->opcode);
+				break;
+			}
+			sec->section.type = SECTION_TYPE_RB_AQ;
+		}
+
+		/* Skip any non-mmio sections */
+		if (sec->section.type != SECTION_TYPE_MMIO)
+			continue;
+
+		section_size = sec->section.size +
+			sizeof(struct i40e_profile_section_header);
+
+		/* Write MMIO section */
+		status = i40e_aq_write_ddp(hw, (void *)sec, (u16)section_size,
+					   track_id, &offset, &info, NULL);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_PACKAGE,
+				   "Failed to write profile: section %d, offset %d, info %d\n",
+				   i, offset, info);
+			break;
+		}
+	}
+	return status;
+}
+
+/**
+ * i40e_rollback_profile
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package to be removed
+ * @track_id: package tracking id
+ *
+ * Rolls back previously loaded package.
+ */
+int
+i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+		      u32 track_id)
+{
+	struct i40e_profile_section_header *sec = NULL;
+	struct i40e_section_table *sec_tbl;
+	u32 offset = 0, info = 0;
+	u32 section_size = 0;
+	int status = 0;
+	u32 sec_off;
+	int i;
+
+	status = i40e_validate_profile(hw, profile, track_id, true);
+	if (status)
+		return status;
+
+	I40E_SECTION_TABLE(profile, sec_tbl);
+
+	/* For rollback write sections in reverse */
+	for (i = sec_tbl->section_count - 1; i >= 0; i--) {
+		sec_off = sec_tbl->section_offset[i];
+		sec = I40E_SECTION_HEADER(profile, sec_off);
+
+		/* Skip any non-rollback sections */
+		if (sec->section.type != SECTION_TYPE_RB_MMIO)
+			continue;
+
+		section_size = sec->section.size +
+			sizeof(struct i40e_profile_section_header);
+
+		/* Write roll-back MMIO section */
+		status = i40e_aq_write_ddp(hw, (void *)sec, (u16)section_size,
+					   track_id, &offset, &info, NULL);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_PACKAGE,
+				   "Failed to write profile: section %d, offset %d, info %d\n",
+				   i, offset, info);
+			break;
+		}
+	}
+	return status;
+}
+
+/**
+ * i40e_add_pinfo_to_list
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package
+ * @profile_info_sec: buffer for information section
+ * @track_id: package tracking id
+ *
+ * Register a profile to the list of loaded profiles.
+ */
+int
+i40e_add_pinfo_to_list(struct i40e_hw *hw,
+		       struct i40e_profile_segment *profile,
+		       u8 *profile_info_sec, u32 track_id)
+{
+	struct i40e_profile_section_header *sec = NULL;
+	struct i40e_profile_info *pinfo;
+	u32 offset = 0, info = 0;
+	int status = 0;
+
+	sec = (struct i40e_profile_section_header *)profile_info_sec;
+	sec->tbl_size = 1;
+	sec->data_end = sizeof(struct i40e_profile_section_header) +
+			sizeof(struct i40e_profile_info);
+	sec->section.type = SECTION_TYPE_INFO;
+	sec->section.offset = sizeof(struct i40e_profile_section_header);
+	sec->section.size = sizeof(struct i40e_profile_info);
+	pinfo = (struct i40e_profile_info *)(profile_info_sec +
+					     sec->section.offset);
+	pinfo->track_id = track_id;
+	pinfo->version = profile->version;
+	pinfo->op = I40E_DDP_ADD_TRACKID;
+	memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
+
+	status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
+				   track_id, &offset, &info, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_add_cloud_filters
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to add cloud filters from
+ * @filters: Buffer which contains the filters to be added
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Set the cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_data are filled in by the caller
+ * of the function.
+ *
+ **/
+int
+i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	u16 buff_len;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_add_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_add_cloud_filters_bb
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to add cloud filters from
+ * @filters: Buffer which contains the filters in big buffer to be added
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Set the big buffer cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the
+ * function.
+ *
+ **/
+int
+i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	u16 buff_len;
+	int status;
+	int i;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_add_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+	cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB;
+
+	for (i = 0; i < filter_count; i++) {
+		u16 tnl_type;
+		u32 ti;
+
+		tnl_type = (le16_to_cpu(filters[i].element.flags) &
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >>
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT;
+
+		/* Due to hardware eccentricities, the VNI for Geneve is shifted
+		 * one more byte further than normally used for Tenant ID in
+		 * other tunnel types.
+		 */
+		if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) {
+			ti = le32_to_cpu(filters[i].element.tenant_id);
+			filters[i].element.tenant_id = cpu_to_le32(ti << 8);
+		}
+	}
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_rem_cloud_filters
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to remove cloud filters from
+ * @filters: Buffer which contains the filters to be removed
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Remove the cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_data are filled in by the caller
+ * of the function.
+ *
+ **/
+int
+i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	u16 buff_len;
+	int status;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_remove_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_aq_rem_cloud_filters_bb
+ * @hw: pointer to the hardware structure
+ * @seid: VSI seid to remove cloud filters from
+ * @filters: Buffer which contains the filters in big buffer to be removed
+ * @filter_count: number of filters contained in the buffer
+ *
+ * Remove the big buffer cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the
+ * function.
+ *
+ **/
+int
+i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_add_remove_cloud_filters *cmd =
+	(struct i40e_aqc_add_remove_cloud_filters *)&desc.params.raw;
+	u16 buff_len;
+	int status;
+	int i;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					  i40e_aqc_opc_remove_cloud_filters);
+
+	buff_len = filter_count * sizeof(*filters);
+	desc.datalen = cpu_to_le16(buff_len);
+	desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+	cmd->num_filters = filter_count;
+	cmd->seid = cpu_to_le16(seid);
+	cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB;
+
+	for (i = 0; i < filter_count; i++) {
+		u16 tnl_type;
+		u32 ti;
+
+		tnl_type = (le16_to_cpu(filters[i].element.flags) &
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >>
+			   I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT;
+
+		/* Due to hardware eccentricities, the VNI for Geneve is shifted
+		 * one more byte further than normally used for Tenant ID in
+		 * other tunnel types.
+		 */
+		if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) {
+			ti = le32_to_cpu(filters[i].element.tenant_id);
+			filters[i].element.tenant_id = cpu_to_le32(ti << 8);
+		}
+	}
+
+	status = i40e_asq_send_command(hw, &desc, filters, buff_len, NULL);
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
new file mode 100644
index 0000000000..f81e744c0f
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -0,0 +1,1987 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#include "i40e_adminq.h"
+#include "i40e_prototype.h"
+#include "i40e_dcb.h"
+
+/**
+ * i40e_get_dcbx_status
+ * @hw: pointer to the hw struct
+ * @status: Embedded DCBX Engine Status
+ *
+ * Get the DCBX status from the Firmware
+ **/
+int i40e_get_dcbx_status(struct i40e_hw *hw, u16 *status)
+{
+	u32 reg;
+
+	if (!status)
+		return -EINVAL;
+
+	reg = rd32(hw, I40E_PRTDCB_GENS);
+	*status = (u16)((reg & I40E_PRTDCB_GENS_DCBX_STATUS_MASK) >>
+			I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT);
+
+	return 0;
+}
+
+/**
+ * i40e_parse_ieee_etscfg_tlv
+ * @tlv: IEEE 802.1Qaz ETS CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses IEEE 802.1Qaz ETS CFG TLV
+ **/
+static void i40e_parse_ieee_etscfg_tlv(struct i40e_lldp_org_tlv *tlv,
+				       struct i40e_dcbx_config *dcbcfg)
+{
+	struct i40e_dcb_ets_config *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u16 offset = 0;
+	u8 priority;
+	int i;
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	etscfg->willing = (u8)((buf[offset] & I40E_IEEE_ETS_WILLING_MASK) >>
+			       I40E_IEEE_ETS_WILLING_SHIFT);
+	etscfg->cbs = (u8)((buf[offset] & I40E_IEEE_ETS_CBS_MASK) >>
+			   I40E_IEEE_ETS_CBS_SHIFT);
+	etscfg->maxtcs = (u8)((buf[offset] & I40E_IEEE_ETS_MAXTC_MASK) >>
+			      I40E_IEEE_ETS_MAXTC_SHIFT);
+
+	/* Move offset to Priority Assignment Table */
+	offset++;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_1_MASK) >>
+				I40E_IEEE_ETS_PRIO_1_SHIFT);
+		etscfg->prioritytable[i * 2] =  priority;
+		priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_0_MASK) >>
+				I40E_IEEE_ETS_PRIO_0_SHIFT);
+		etscfg->prioritytable[i * 2 + 1] = priority;
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		etscfg->tcbwtable[i] = buf[offset++];
+
+	/* TSA Assignment Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		etscfg->tsatable[i] = buf[offset++];
+}
+
+/**
+ * i40e_parse_ieee_etsrec_tlv
+ * @tlv: IEEE 802.1Qaz ETS REC TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Parses IEEE 802.1Qaz ETS REC TLV
+ **/
+static void i40e_parse_ieee_etsrec_tlv(struct i40e_lldp_org_tlv *tlv,
+				       struct i40e_dcbx_config *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+	u16 offset = 0;
+	u8 priority;
+	int i;
+
+	/* Move offset to priority table */
+	offset++;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_1_MASK) >>
+				I40E_IEEE_ETS_PRIO_1_SHIFT);
+		dcbcfg->etsrec.prioritytable[i*2] =  priority;
+		priority = (u8)((buf[offset] & I40E_IEEE_ETS_PRIO_0_MASK) >>
+				I40E_IEEE_ETS_PRIO_0_SHIFT);
+		dcbcfg->etsrec.prioritytable[i*2 + 1] = priority;
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		dcbcfg->etsrec.tcbwtable[i] = buf[offset++];
+
+	/* TSA Assignment Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		dcbcfg->etsrec.tsatable[i] = buf[offset++];
+}
+
+/**
+ * i40e_parse_ieee_pfccfg_tlv
+ * @tlv: IEEE 802.1Qaz PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses IEEE 802.1Qaz PFC CFG TLV
+ **/
+static void i40e_parse_ieee_pfccfg_tlv(struct i40e_lldp_org_tlv *tlv,
+				       struct i40e_dcbx_config *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	dcbcfg->pfc.willing = (u8)((buf[0] & I40E_IEEE_PFC_WILLING_MASK) >>
+				   I40E_IEEE_PFC_WILLING_SHIFT);
+	dcbcfg->pfc.mbc = (u8)((buf[0] & I40E_IEEE_PFC_MBC_MASK) >>
+			       I40E_IEEE_PFC_MBC_SHIFT);
+	dcbcfg->pfc.pfccap = (u8)((buf[0] & I40E_IEEE_PFC_CAP_MASK) >>
+				  I40E_IEEE_PFC_CAP_SHIFT);
+	dcbcfg->pfc.pfcenable = buf[1];
+}
+
+/**
+ * i40e_parse_ieee_app_tlv
+ * @tlv: IEEE 802.1Qaz APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses IEEE 802.1Qaz APP PRIO TLV
+ **/
+static void i40e_parse_ieee_app_tlv(struct i40e_lldp_org_tlv *tlv,
+				    struct i40e_dcbx_config *dcbcfg)
+{
+	u16 typelength;
+	u16 offset = 0;
+	u16 length;
+	int i = 0;
+	u8 *buf;
+
+	typelength = ntohs(tlv->typelength);
+	length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >>
+		       I40E_LLDP_TLV_LEN_SHIFT);
+	buf = tlv->tlvinfo;
+
+	/* The App priority table starts 5 octets after TLV header */
+	length -= (sizeof(tlv->ouisubtype) + 1);
+
+	/* Move offset to App Priority Table */
+	offset++;
+
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (offset < length) {
+		dcbcfg->app[i].priority = (u8)((buf[offset] &
+						I40E_IEEE_APP_PRIO_MASK) >>
+					       I40E_IEEE_APP_PRIO_SHIFT);
+		dcbcfg->app[i].selector = (u8)((buf[offset] &
+						I40E_IEEE_APP_SEL_MASK) >>
+					       I40E_IEEE_APP_SEL_SHIFT);
+		dcbcfg->app[i].protocolid = (buf[offset + 1] << 0x8) |
+					     buf[offset + 2];
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= I40E_DCBX_MAX_APPS)
+			break;
+	}
+
+	dcbcfg->numapps = i;
+}
+
+/**
+ * i40e_parse_ieee_tlv
+ * @tlv: IEEE 802.1Qaz TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ **/
+static void i40e_parse_ieee_tlv(struct i40e_lldp_org_tlv *tlv,
+				struct i40e_dcbx_config *dcbcfg)
+{
+	u32 ouisubtype;
+	u8 subtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & I40E_LLDP_TLV_SUBTYPE_MASK) >>
+		       I40E_LLDP_TLV_SUBTYPE_SHIFT);
+	switch (subtype) {
+	case I40E_IEEE_SUBTYPE_ETS_CFG:
+		i40e_parse_ieee_etscfg_tlv(tlv, dcbcfg);
+		break;
+	case I40E_IEEE_SUBTYPE_ETS_REC:
+		i40e_parse_ieee_etsrec_tlv(tlv, dcbcfg);
+		break;
+	case I40E_IEEE_SUBTYPE_PFC_CFG:
+		i40e_parse_ieee_pfccfg_tlv(tlv, dcbcfg);
+		break;
+	case I40E_IEEE_SUBTYPE_APP_PRI:
+		i40e_parse_ieee_app_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40e_parse_cee_pgcfg_tlv
+ * @tlv: CEE DCBX PG CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses CEE DCBX PG CFG TLV
+ **/
+static void i40e_parse_cee_pgcfg_tlv(struct i40e_cee_feat_tlv *tlv,
+				     struct i40e_dcbx_config *dcbcfg)
+{
+	struct i40e_dcb_ets_config *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u16 offset = 0;
+	u8 priority;
+	int i;
+
+	etscfg = &dcbcfg->etscfg;
+
+	if (tlv->en_will_err & I40E_CEE_FEAT_TLV_WILLING_MASK)
+		etscfg->willing = 1;
+
+	etscfg->cbs = 0;
+	/* Priority Group Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		priority = (u8)((buf[offset] & I40E_CEE_PGID_PRIO_1_MASK) >>
+				 I40E_CEE_PGID_PRIO_1_SHIFT);
+		etscfg->prioritytable[i * 2] =  priority;
+		priority = (u8)((buf[offset] & I40E_CEE_PGID_PRIO_0_MASK) >>
+				 I40E_CEE_PGID_PRIO_0_SHIFT);
+		etscfg->prioritytable[i * 2 + 1] = priority;
+		offset++;
+	}
+
+	/* PG Percentage Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		etscfg->tcbwtable[i] = buf[offset++];
+
+	/* Number of TCs supported (1 octet) */
+	etscfg->maxtcs = buf[offset];
+}
+
+/**
+ * i40e_parse_cee_pfccfg_tlv
+ * @tlv: CEE DCBX PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses CEE DCBX PFC CFG TLV
+ **/
+static void i40e_parse_cee_pfccfg_tlv(struct i40e_cee_feat_tlv *tlv,
+				      struct i40e_dcbx_config *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	if (tlv->en_will_err & I40E_CEE_FEAT_TLV_WILLING_MASK)
+		dcbcfg->pfc.willing = 1;
+
+	/* ------------------------
+	 * | PFC Enable | PFC TCs |
+	 * ------------------------
+	 * | 1 octet    | 1 octet |
+	 */
+	dcbcfg->pfc.pfcenable = buf[0];
+	dcbcfg->pfc.pfccap = buf[1];
+}
+
+/**
+ * i40e_parse_cee_app_tlv
+ * @tlv: CEE DCBX APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses CEE DCBX APP PRIO TLV
+ **/
+static void i40e_parse_cee_app_tlv(struct i40e_cee_feat_tlv *tlv,
+				   struct i40e_dcbx_config *dcbcfg)
+{
+	u16 length, typelength, offset = 0;
+	struct i40e_cee_app_prio *app;
+	u8 i;
+
+	typelength = ntohs(tlv->hdr.typelen);
+	length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >>
+		       I40E_LLDP_TLV_LEN_SHIFT);
+
+	dcbcfg->numapps = length / sizeof(*app);
+
+	if (!dcbcfg->numapps)
+		return;
+	if (dcbcfg->numapps > I40E_DCBX_MAX_APPS)
+		dcbcfg->numapps = I40E_DCBX_MAX_APPS;
+
+	for (i = 0; i < dcbcfg->numapps; i++) {
+		u8 up, selector;
+
+		app = (struct i40e_cee_app_prio *)(tlv->tlvinfo + offset);
+		for (up = 0; up < I40E_MAX_USER_PRIORITY; up++) {
+			if (app->prio_map & BIT(up))
+				break;
+		}
+		dcbcfg->app[i].priority = up;
+
+		/* Get Selector from lower 2 bits, and convert to IEEE */
+		selector = (app->upper_oui_sel & I40E_CEE_APP_SELECTOR_MASK);
+		switch (selector) {
+		case I40E_CEE_APP_SEL_ETHTYPE:
+			dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE;
+			break;
+		case I40E_CEE_APP_SEL_TCPIP:
+			dcbcfg->app[i].selector = I40E_APP_SEL_TCPIP;
+			break;
+		default:
+			/* Keep selector as it is for unknown types */
+			dcbcfg->app[i].selector = selector;
+		}
+
+		dcbcfg->app[i].protocolid = ntohs(app->protocol);
+		/* Move to next app */
+		offset += sizeof(*app);
+	}
+}
+
+/**
+ * i40e_parse_cee_tlv
+ * @tlv: CEE DCBX TLV
+ * @dcbcfg: Local store to update DCBX config data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ **/
+static void i40e_parse_cee_tlv(struct i40e_lldp_org_tlv *tlv,
+			       struct i40e_dcbx_config *dcbcfg)
+{
+	u16 len, tlvlen, sublen, typelength;
+	struct i40e_cee_feat_tlv *sub_tlv;
+	u8 subtype, feat_tlv_count = 0;
+	u32 ouisubtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & I40E_LLDP_TLV_SUBTYPE_MASK) >>
+		       I40E_LLDP_TLV_SUBTYPE_SHIFT);
+	/* Return if not CEE DCBX */
+	if (subtype != I40E_CEE_DCBX_TYPE)
+		return;
+
+	typelength = ntohs(tlv->typelength);
+	tlvlen = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >>
+			I40E_LLDP_TLV_LEN_SHIFT);
+	len = sizeof(tlv->typelength) + sizeof(ouisubtype) +
+	      sizeof(struct i40e_cee_ctrl_tlv);
+	/* Return if no CEE DCBX Feature TLVs */
+	if (tlvlen <= len)
+		return;
+
+	sub_tlv = (struct i40e_cee_feat_tlv *)((char *)tlv + len);
+	while (feat_tlv_count < I40E_CEE_MAX_FEAT_TYPE) {
+		typelength = ntohs(sub_tlv->hdr.typelen);
+		sublen = (u16)((typelength &
+				I40E_LLDP_TLV_LEN_MASK) >>
+				I40E_LLDP_TLV_LEN_SHIFT);
+		subtype = (u8)((typelength & I40E_LLDP_TLV_TYPE_MASK) >>
+				I40E_LLDP_TLV_TYPE_SHIFT);
+		switch (subtype) {
+		case I40E_CEE_SUBTYPE_PG_CFG:
+			i40e_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case I40E_CEE_SUBTYPE_PFC_CFG:
+			i40e_parse_cee_pfccfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case I40E_CEE_SUBTYPE_APP_PRI:
+			i40e_parse_cee_app_tlv(sub_tlv, dcbcfg);
+			break;
+		default:
+			return; /* Invalid Sub-type return */
+		}
+		feat_tlv_count++;
+		/* Move to next sub TLV */
+		sub_tlv = (struct i40e_cee_feat_tlv *)((char *)sub_tlv +
+						sizeof(sub_tlv->hdr.typelen) +
+						sublen);
+	}
+}
+
+/**
+ * i40e_parse_org_tlv
+ * @tlv: Organization specific TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Currently only IEEE 802.1Qaz TLV is supported, all others
+ * will be returned
+ **/
+static void i40e_parse_org_tlv(struct i40e_lldp_org_tlv *tlv,
+			       struct i40e_dcbx_config *dcbcfg)
+{
+	u32 ouisubtype;
+	u32 oui;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	oui = (u32)((ouisubtype & I40E_LLDP_TLV_OUI_MASK) >>
+		    I40E_LLDP_TLV_OUI_SHIFT);
+	switch (oui) {
+	case I40E_IEEE_8021QAZ_OUI:
+		i40e_parse_ieee_tlv(tlv, dcbcfg);
+		break;
+	case I40E_CEE_DCBX_OUI:
+		i40e_parse_cee_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40e_lldp_to_dcb_config
+ * @lldpmib: LLDPDU to be parsed
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Parse DCB configuration from the LLDPDU
+ **/
+int i40e_lldp_to_dcb_config(u8 *lldpmib,
+			    struct i40e_dcbx_config *dcbcfg)
+{
+	struct i40e_lldp_org_tlv *tlv;
+	u16 typelength;
+	u16 offset = 0;
+	int ret = 0;
+	u16 length;
+	u16 type;
+
+	if (!lldpmib || !dcbcfg)
+		return -EINVAL;
+
+	/* set to the start of LLDPDU */
+	lldpmib += ETH_HLEN;
+	tlv = (struct i40e_lldp_org_tlv *)lldpmib;
+	while (1) {
+		typelength = ntohs(tlv->typelength);
+		type = (u16)((typelength & I40E_LLDP_TLV_TYPE_MASK) >>
+			     I40E_LLDP_TLV_TYPE_SHIFT);
+		length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >>
+			       I40E_LLDP_TLV_LEN_SHIFT);
+		offset += sizeof(typelength) + length;
+
+		/* END TLV or beyond LLDPDU size */
+		if ((type == I40E_TLV_TYPE_END) || (offset > I40E_LLDPDU_SIZE))
+			break;
+
+		switch (type) {
+		case I40E_TLV_TYPE_ORG:
+			i40e_parse_org_tlv(tlv, dcbcfg);
+			break;
+		default:
+			break;
+		}
+
+		/* Move to next TLV */
+		tlv = (struct i40e_lldp_org_tlv *)((char *)tlv +
+						    sizeof(tlv->typelength) +
+						    length);
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_aq_get_dcb_config
+ * @hw: pointer to the hw struct
+ * @mib_type: mib type for the query
+ * @bridgetype: bridge type for the query (remote)
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Query DCB configuration from the Firmware
+ **/
+int i40e_aq_get_dcb_config(struct i40e_hw *hw, u8 mib_type,
+			   u8 bridgetype,
+			   struct i40e_dcbx_config *dcbcfg)
+{
+	struct i40e_virt_mem mem;
+	int ret = 0;
+	u8 *lldpmib;
+
+	/* Allocate the LLDPDU */
+	ret = i40e_allocate_virt_mem(hw, &mem, I40E_LLDPDU_SIZE);
+	if (ret)
+		return ret;
+
+	lldpmib = (u8 *)mem.va;
+	ret = i40e_aq_get_lldp_mib(hw, bridgetype, mib_type,
+				   (void *)lldpmib, I40E_LLDPDU_SIZE,
+				   NULL, NULL, NULL);
+	if (ret)
+		goto free_mem;
+
+	/* Parse LLDP MIB to get dcb configuration */
+	ret = i40e_lldp_to_dcb_config(lldpmib, dcbcfg);
+
+free_mem:
+	i40e_free_virt_mem(hw, &mem);
+	return ret;
+}
+
+/**
+ * i40e_cee_to_dcb_v1_config
+ * @cee_cfg: pointer to CEE v1 response configuration struct
+ * @dcbcfg: DCB configuration struct
+ *
+ * Convert CEE v1 configuration from firmware to DCB configuration
+ **/
+static void i40e_cee_to_dcb_v1_config(
+			struct i40e_aqc_get_cee_dcb_cfg_v1_resp *cee_cfg,
+			struct i40e_dcbx_config *dcbcfg)
+{
+	u16 status, tlv_status = le16_to_cpu(cee_cfg->tlv_status);
+	u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
+	u8 i, tc, err;
+
+	/* CEE PG data to ETS config */
+	dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
+
+	/* Note that the FW creates the oper_prio_tc nibbles reversed
+	 * from those in the CEE Priority Group sub-TLV.
+	 */
+	for (i = 0; i < 4; i++) {
+		tc = (u8)((cee_cfg->oper_prio_tc[i] &
+			 I40E_CEE_PGID_PRIO_0_MASK) >>
+			 I40E_CEE_PGID_PRIO_0_SHIFT);
+		dcbcfg->etscfg.prioritytable[i * 2] =  tc;
+		tc = (u8)((cee_cfg->oper_prio_tc[i] &
+			 I40E_CEE_PGID_PRIO_1_MASK) >>
+			 I40E_CEE_PGID_PRIO_1_SHIFT);
+		dcbcfg->etscfg.prioritytable[i*2 + 1] = tc;
+	}
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		dcbcfg->etscfg.tcbwtable[i] = cee_cfg->oper_tc_bw[i];
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (dcbcfg->etscfg.prioritytable[i] == I40E_CEE_PGID_STRICT) {
+			/* Map it to next empty TC */
+			dcbcfg->etscfg.prioritytable[i] =
+						cee_cfg->oper_num_tc - 1;
+			dcbcfg->etscfg.tsatable[i] = I40E_IEEE_TSA_STRICT;
+		} else {
+			dcbcfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;
+		}
+	}
+
+	/* CEE PFC data to ETS config */
+	dcbcfg->pfc.pfcenable = cee_cfg->oper_pfc_en;
+	dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+
+	status = (tlv_status & I40E_AQC_CEE_APP_STATUS_MASK) >>
+		  I40E_AQC_CEE_APP_STATUS_SHIFT;
+	err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0;
+	/* Add APPs if Error is False */
+	if (!err) {
+		/* CEE operating configuration supports FCoE/iSCSI/FIP only */
+		dcbcfg->numapps = I40E_CEE_OPER_MAX_APPS;
+
+		/* FCoE APP */
+		dcbcfg->app[0].priority =
+			(app_prio & I40E_AQC_CEE_APP_FCOE_MASK) >>
+			 I40E_AQC_CEE_APP_FCOE_SHIFT;
+		dcbcfg->app[0].selector = I40E_APP_SEL_ETHTYPE;
+		dcbcfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;
+
+		/* iSCSI APP */
+		dcbcfg->app[1].priority =
+			(app_prio & I40E_AQC_CEE_APP_ISCSI_MASK) >>
+			 I40E_AQC_CEE_APP_ISCSI_SHIFT;
+		dcbcfg->app[1].selector = I40E_APP_SEL_TCPIP;
+		dcbcfg->app[1].protocolid = I40E_APP_PROTOID_ISCSI;
+
+		/* FIP APP */
+		dcbcfg->app[2].priority =
+			(app_prio & I40E_AQC_CEE_APP_FIP_MASK) >>
+			 I40E_AQC_CEE_APP_FIP_SHIFT;
+		dcbcfg->app[2].selector = I40E_APP_SEL_ETHTYPE;
+		dcbcfg->app[2].protocolid = I40E_APP_PROTOID_FIP;
+	}
+}
+
+/**
+ * i40e_cee_to_dcb_config
+ * @cee_cfg: pointer to CEE configuration struct
+ * @dcbcfg: DCB configuration struct
+ *
+ * Convert CEE configuration from firmware to DCB configuration
+ **/
+static void i40e_cee_to_dcb_config(
+				struct i40e_aqc_get_cee_dcb_cfg_resp *cee_cfg,
+				struct i40e_dcbx_config *dcbcfg)
+{
+	u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status);
+	u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
+	u8 i, tc, err, sync, oper;
+
+	/* CEE PG data to ETS config */
+	dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
+
+	/* Note that the FW creates the oper_prio_tc nibbles reversed
+	 * from those in the CEE Priority Group sub-TLV.
+	 */
+	for (i = 0; i < 4; i++) {
+		tc = (u8)((cee_cfg->oper_prio_tc[i] &
+			 I40E_CEE_PGID_PRIO_0_MASK) >>
+			 I40E_CEE_PGID_PRIO_0_SHIFT);
+		dcbcfg->etscfg.prioritytable[i * 2] =  tc;
+		tc = (u8)((cee_cfg->oper_prio_tc[i] &
+			 I40E_CEE_PGID_PRIO_1_MASK) >>
+			 I40E_CEE_PGID_PRIO_1_SHIFT);
+		dcbcfg->etscfg.prioritytable[i * 2 + 1] = tc;
+	}
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		dcbcfg->etscfg.tcbwtable[i] = cee_cfg->oper_tc_bw[i];
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (dcbcfg->etscfg.prioritytable[i] == I40E_CEE_PGID_STRICT) {
+			/* Map it to next empty TC */
+			dcbcfg->etscfg.prioritytable[i] =
+						cee_cfg->oper_num_tc - 1;
+			dcbcfg->etscfg.tsatable[i] = I40E_IEEE_TSA_STRICT;
+		} else {
+			dcbcfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;
+		}
+	}
+
+	/* CEE PFC data to ETS config */
+	dcbcfg->pfc.pfcenable = cee_cfg->oper_pfc_en;
+	dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+
+	i = 0;
+	status = (tlv_status & I40E_AQC_CEE_FCOE_STATUS_MASK) >>
+		  I40E_AQC_CEE_FCOE_STATUS_SHIFT;
+	err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0;
+	sync = (status & I40E_TLV_STATUS_SYNC) ? 1 : 0;
+	oper = (status & I40E_TLV_STATUS_OPER) ? 1 : 0;
+	/* Add FCoE APP if Error is False and Oper/Sync is True */
+	if (!err && sync && oper) {
+		/* FCoE APP */
+		dcbcfg->app[i].priority =
+			(app_prio & I40E_AQC_CEE_APP_FCOE_MASK) >>
+			 I40E_AQC_CEE_APP_FCOE_SHIFT;
+		dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE;
+		dcbcfg->app[i].protocolid = I40E_APP_PROTOID_FCOE;
+		i++;
+	}
+
+	status = (tlv_status & I40E_AQC_CEE_ISCSI_STATUS_MASK) >>
+		  I40E_AQC_CEE_ISCSI_STATUS_SHIFT;
+	err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0;
+	sync = (status & I40E_TLV_STATUS_SYNC) ? 1 : 0;
+	oper = (status & I40E_TLV_STATUS_OPER) ? 1 : 0;
+	/* Add iSCSI APP if Error is False and Oper/Sync is True */
+	if (!err && sync && oper) {
+		/* iSCSI APP */
+		dcbcfg->app[i].priority =
+			(app_prio & I40E_AQC_CEE_APP_ISCSI_MASK) >>
+			 I40E_AQC_CEE_APP_ISCSI_SHIFT;
+		dcbcfg->app[i].selector = I40E_APP_SEL_TCPIP;
+		dcbcfg->app[i].protocolid = I40E_APP_PROTOID_ISCSI;
+		i++;
+	}
+
+	status = (tlv_status & I40E_AQC_CEE_FIP_STATUS_MASK) >>
+		  I40E_AQC_CEE_FIP_STATUS_SHIFT;
+	err = (status & I40E_TLV_STATUS_ERR) ? 1 : 0;
+	sync = (status & I40E_TLV_STATUS_SYNC) ? 1 : 0;
+	oper = (status & I40E_TLV_STATUS_OPER) ? 1 : 0;
+	/* Add FIP APP if Error is False and Oper/Sync is True */
+	if (!err && sync && oper) {
+		/* FIP APP */
+		dcbcfg->app[i].priority =
+			(app_prio & I40E_AQC_CEE_APP_FIP_MASK) >>
+			 I40E_AQC_CEE_APP_FIP_SHIFT;
+		dcbcfg->app[i].selector = I40E_APP_SEL_ETHTYPE;
+		dcbcfg->app[i].protocolid = I40E_APP_PROTOID_FIP;
+		i++;
+	}
+	dcbcfg->numapps = i;
+}
+
+/**
+ * i40e_get_ieee_dcb_config
+ * @hw: pointer to the hw struct
+ *
+ * Get IEEE mode DCB configuration from the Firmware
+ **/
+static int i40e_get_ieee_dcb_config(struct i40e_hw *hw)
+{
+	int ret = 0;
+
+	/* IEEE mode */
+	hw->local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_IEEE;
+	/* Get Local DCB Config */
+	ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_LOCAL, 0,
+				     &hw->local_dcbx_config);
+	if (ret)
+		goto out;
+
+	/* Get Remote DCB Config */
+	ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
+				     I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
+				     &hw->remote_dcbx_config);
+	/* Don't treat ENOENT as an error for Remote MIBs */
+	if (hw->aq.asq_last_status == I40E_AQ_RC_ENOENT)
+		ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * i40e_get_dcb_config
+ * @hw: pointer to the hw struct
+ *
+ * Get DCB configuration from the Firmware
+ **/
+int i40e_get_dcb_config(struct i40e_hw *hw)
+{
+	struct i40e_aqc_get_cee_dcb_cfg_v1_resp cee_v1_cfg;
+	struct i40e_aqc_get_cee_dcb_cfg_resp cee_cfg;
+	int ret = 0;
+
+	/* If Firmware version < v4.33 on X710/XL710, IEEE only */
+	if ((hw->mac.type == I40E_MAC_XL710) &&
+	    (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) ||
+	      (hw->aq.fw_maj_ver < 4)))
+		return i40e_get_ieee_dcb_config(hw);
+
+	/* If Firmware version == v4.33 on X710/XL710, use old CEE struct */
+	if ((hw->mac.type == I40E_MAC_XL710) &&
+	    ((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver == 33))) {
+		ret = i40e_aq_get_cee_dcb_config(hw, &cee_v1_cfg,
+						 sizeof(cee_v1_cfg), NULL);
+		if (!ret) {
+			/* CEE mode */
+			hw->local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_CEE;
+			hw->local_dcbx_config.tlv_status =
+					le16_to_cpu(cee_v1_cfg.tlv_status);
+			i40e_cee_to_dcb_v1_config(&cee_v1_cfg,
+						  &hw->local_dcbx_config);
+		}
+	} else {
+		ret = i40e_aq_get_cee_dcb_config(hw, &cee_cfg,
+						 sizeof(cee_cfg), NULL);
+		if (!ret) {
+			/* CEE mode */
+			hw->local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_CEE;
+			hw->local_dcbx_config.tlv_status =
+					le32_to_cpu(cee_cfg.tlv_status);
+			i40e_cee_to_dcb_config(&cee_cfg,
+					       &hw->local_dcbx_config);
+		}
+	}
+
+	/* CEE mode not enabled try querying IEEE data */
+	if (hw->aq.asq_last_status == I40E_AQ_RC_ENOENT)
+		return i40e_get_ieee_dcb_config(hw);
+
+	if (ret)
+		goto out;
+
+	/* Get CEE DCB Desired Config */
+	ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_LOCAL, 0,
+				     &hw->desired_dcbx_config);
+	if (ret)
+		goto out;
+
+	/* Get Remote DCB Config */
+	ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
+				     I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
+				     &hw->remote_dcbx_config);
+	/* Don't treat ENOENT as an error for Remote MIBs */
+	if (hw->aq.asq_last_status == I40E_AQ_RC_ENOENT)
+		ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * i40e_init_dcb
+ * @hw: pointer to the hw struct
+ * @enable_mib_change: enable mib change event
+ *
+ * Update DCB configuration from the Firmware
+ **/
+int i40e_init_dcb(struct i40e_hw *hw, bool enable_mib_change)
+{
+	struct i40e_lldp_variables lldp_cfg;
+	u8 adminstatus = 0;
+	int ret = 0;
+
+	if (!hw->func_caps.dcb)
+		return -EOPNOTSUPP;
+
+	/* Read LLDP NVM area */
+	if (hw->flags & I40E_HW_FLAG_FW_LLDP_PERSISTENT) {
+		u8 offset = 0;
+
+		if (hw->mac.type == I40E_MAC_XL710)
+			offset = I40E_LLDP_CURRENT_STATUS_XL710_OFFSET;
+		else if (hw->mac.type == I40E_MAC_X722)
+			offset = I40E_LLDP_CURRENT_STATUS_X722_OFFSET;
+		else
+			return -EOPNOTSUPP;
+
+		ret = i40e_read_nvm_module_data(hw,
+						I40E_SR_EMP_SR_SETTINGS_PTR,
+						offset,
+						I40E_LLDP_CURRENT_STATUS_OFFSET,
+						I40E_LLDP_CURRENT_STATUS_SIZE,
+						&lldp_cfg.adminstatus);
+	} else {
+		ret = i40e_read_lldp_cfg(hw, &lldp_cfg);
+	}
+	if (ret)
+		return -EBUSY;
+
+	/* Get the LLDP AdminStatus for the current port */
+	adminstatus = lldp_cfg.adminstatus >> (hw->port * 4);
+	adminstatus &= 0xF;
+
+	/* LLDP agent disabled */
+	if (!adminstatus) {
+		hw->dcbx_status = I40E_DCBX_STATUS_DISABLED;
+		return -EBUSY;
+	}
+
+	/* Get DCBX status */
+	ret = i40e_get_dcbx_status(hw, &hw->dcbx_status);
+	if (ret)
+		return ret;
+
+	/* Check the DCBX Status */
+	if (hw->dcbx_status == I40E_DCBX_STATUS_DONE ||
+	    hw->dcbx_status == I40E_DCBX_STATUS_IN_PROGRESS) {
+		/* Get current DCBX configuration */
+		ret = i40e_get_dcb_config(hw);
+		if (ret)
+			return ret;
+	} else if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) {
+		return -EBUSY;
+	}
+
+	/* Configure the LLDP MIB change event */
+	if (enable_mib_change)
+		ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL);
+
+	return ret;
+}
+
+/**
+ * i40e_get_fw_lldp_status
+ * @hw: pointer to the hw struct
+ * @lldp_status: pointer to the status enum
+ *
+ * Get status of FW Link Layer Discovery Protocol (LLDP) Agent.
+ * Status of agent is reported via @lldp_status parameter.
+ **/
+int
+i40e_get_fw_lldp_status(struct i40e_hw *hw,
+			enum i40e_get_fw_lldp_status_resp *lldp_status)
+{
+	struct i40e_virt_mem mem;
+	u8 *lldpmib;
+	int ret;
+
+	if (!lldp_status)
+		return -EINVAL;
+
+	/* Allocate buffer for the LLDPDU */
+	ret = i40e_allocate_virt_mem(hw, &mem, I40E_LLDPDU_SIZE);
+	if (ret)
+		return ret;
+
+	lldpmib = (u8 *)mem.va;
+	ret = i40e_aq_get_lldp_mib(hw, 0, 0, (void *)lldpmib,
+				   I40E_LLDPDU_SIZE, NULL, NULL, NULL);
+
+	if (!ret) {
+		*lldp_status = I40E_GET_FW_LLDP_STATUS_ENABLED;
+	} else if (hw->aq.asq_last_status == I40E_AQ_RC_ENOENT) {
+		/* MIB is not available yet but the agent is running */
+		*lldp_status = I40E_GET_FW_LLDP_STATUS_ENABLED;
+		ret = 0;
+	} else if (hw->aq.asq_last_status == I40E_AQ_RC_EPERM) {
+		*lldp_status = I40E_GET_FW_LLDP_STATUS_DISABLED;
+		ret = 0;
+	}
+
+	i40e_free_virt_mem(hw, &mem);
+	return ret;
+}
+
+/**
+ * i40e_add_ieee_ets_tlv - Prepare ETS TLV in IEEE format
+ * @tlv: Fill the ETS config data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS CFG TLV
+ **/
+static void i40e_add_ieee_ets_tlv(struct i40e_lldp_org_tlv *tlv,
+				  struct i40e_dcbx_config *dcbcfg)
+{
+	u8 priority0, priority1, maxtcwilling = 0;
+	struct i40e_dcb_ets_config *etscfg;
+	u16 offset = 0, typelength, i;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+
+	typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) |
+			I40E_IEEE_ETS_TLV_LENGTH);
+	tlv->typelength = htons(typelength);
+
+	ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) |
+			I40E_IEEE_SUBTYPE_ETS_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	if (etscfg->willing)
+		maxtcwilling = BIT(I40E_IEEE_ETS_WILLING_SHIFT);
+	maxtcwilling |= etscfg->maxtcs & I40E_IEEE_ETS_MAXTC_MASK;
+	buf[offset] = maxtcwilling;
+
+	/* Move offset to Priority Assignment Table */
+	offset++;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		priority0 = etscfg->prioritytable[i * 2] & 0xF;
+		priority1 = etscfg->prioritytable[i * 2 + 1] & 0xF;
+		buf[offset] = (priority0 << I40E_IEEE_ETS_PRIO_1_SHIFT) |
+				priority1;
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		buf[offset++] = etscfg->tcbwtable[i];
+
+	/* TSA Assignment Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		buf[offset++] = etscfg->tsatable[i];
+}
+
+/**
+ * i40e_add_ieee_etsrec_tlv - Prepare ETS Recommended TLV in IEEE format
+ * @tlv: Fill ETS Recommended TLV in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS REC TLV
+ **/
+static void i40e_add_ieee_etsrec_tlv(struct i40e_lldp_org_tlv *tlv,
+				     struct i40e_dcbx_config *dcbcfg)
+{
+	struct i40e_dcb_ets_config *etsrec;
+	u16 offset = 0, typelength, i;
+	u8 priority0, priority1;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+
+	typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) |
+			I40E_IEEE_ETS_TLV_LENGTH);
+	tlv->typelength = htons(typelength);
+
+	ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) |
+			I40E_IEEE_SUBTYPE_ETS_REC);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	etsrec = &dcbcfg->etsrec;
+	/* First Octet is reserved */
+	/* Move offset to Priority Assignment Table */
+	offset++;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		priority0 = etsrec->prioritytable[i * 2] & 0xF;
+		priority1 = etsrec->prioritytable[i * 2 + 1] & 0xF;
+		buf[offset] = (priority0 << I40E_IEEE_ETS_PRIO_1_SHIFT) |
+				priority1;
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		buf[offset++] = etsrec->tcbwtable[i];
+
+	/* TSA Assignment Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		buf[offset++] = etsrec->tsatable[i];
+}
+
+/**
+ * i40e_add_ieee_pfc_tlv - Prepare PFC TLV in IEEE format
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store to get PFC CFG data
+ *
+ * Prepare IEEE 802.1Qaz PFC CFG TLV
+ **/
+static void i40e_add_ieee_pfc_tlv(struct i40e_lldp_org_tlv *tlv,
+				  struct i40e_dcbx_config *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelength;
+
+	typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) |
+			I40E_IEEE_PFC_TLV_LENGTH);
+	tlv->typelength = htons(typelength);
+
+	ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) |
+			I40E_IEEE_SUBTYPE_PFC_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	if (dcbcfg->pfc.willing)
+		buf[0] = BIT(I40E_IEEE_PFC_WILLING_SHIFT);
+
+	if (dcbcfg->pfc.mbc)
+		buf[0] |= BIT(I40E_IEEE_PFC_MBC_SHIFT);
+
+	buf[0] |= dcbcfg->pfc.pfccap & 0xF;
+	buf[1] = dcbcfg->pfc.pfcenable;
+}
+
+/**
+ * i40e_add_ieee_app_pri_tlv -  Prepare APP TLV in IEEE format
+ * @tlv: Fill APP TLV in IEEE format
+ * @dcbcfg: Local store to get APP CFG data
+ *
+ * Prepare IEEE 802.1Qaz APP CFG TLV
+ **/
+static void i40e_add_ieee_app_pri_tlv(struct i40e_lldp_org_tlv *tlv,
+				      struct i40e_dcbx_config *dcbcfg)
+{
+	u16 typelength, length, offset = 0;
+	u8 priority, selector, i = 0;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+
+	/* No APP TLVs then just return */
+	if (dcbcfg->numapps == 0)
+		return;
+	ouisubtype = (u32)((I40E_IEEE_8021QAZ_OUI << I40E_LLDP_TLV_OUI_SHIFT) |
+			I40E_IEEE_SUBTYPE_APP_PRI);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* Move offset to App Priority Table */
+	offset++;
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (i < dcbcfg->numapps) {
+		priority = dcbcfg->app[i].priority & 0x7;
+		selector = dcbcfg->app[i].selector & 0x7;
+		buf[offset] = (priority << I40E_IEEE_APP_PRIO_SHIFT) | selector;
+		buf[offset + 1] = (dcbcfg->app[i].protocolid >> 0x8) & 0xFF;
+		buf[offset + 2] =  dcbcfg->app[i].protocolid & 0xFF;
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= I40E_DCBX_MAX_APPS)
+			break;
+	}
+	/* length includes size of ouisubtype + 1 reserved + 3*numapps */
+	length = sizeof(tlv->ouisubtype) + 1 + (i * 3);
+	typelength = (u16)((I40E_TLV_TYPE_ORG << I40E_LLDP_TLV_TYPE_SHIFT) |
+		(length & 0x1FF));
+	tlv->typelength = htons(typelength);
+}
+
+/**
+ * i40e_add_dcb_tlv - Add all IEEE TLVs
+ * @tlv: pointer to org tlv
+ * @dcbcfg: pointer to modified dcbx config structure *
+ * @tlvid: tlv id to be added
+ * add tlv information
+ **/
+static void i40e_add_dcb_tlv(struct i40e_lldp_org_tlv *tlv,
+			     struct i40e_dcbx_config *dcbcfg,
+			     u16 tlvid)
+{
+	switch (tlvid) {
+	case I40E_IEEE_TLV_ID_ETS_CFG:
+		i40e_add_ieee_ets_tlv(tlv, dcbcfg);
+		break;
+	case I40E_IEEE_TLV_ID_ETS_REC:
+		i40e_add_ieee_etsrec_tlv(tlv, dcbcfg);
+		break;
+	case I40E_IEEE_TLV_ID_PFC_CFG:
+		i40e_add_ieee_pfc_tlv(tlv, dcbcfg);
+		break;
+	case I40E_IEEE_TLV_ID_APP_PRI:
+		i40e_add_ieee_app_pri_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40e_set_dcb_config - Set the local LLDP MIB to FW
+ * @hw: pointer to the hw struct
+ *
+ * Set DCB configuration to the Firmware
+ **/
+int i40e_set_dcb_config(struct i40e_hw *hw)
+{
+	struct i40e_dcbx_config *dcbcfg;
+	struct i40e_virt_mem mem;
+	u8 mib_type, *lldpmib;
+	u16 miblen;
+	int ret;
+
+	/* update the hw local config */
+	dcbcfg = &hw->local_dcbx_config;
+	/* Allocate the LLDPDU */
+	ret = i40e_allocate_virt_mem(hw, &mem, I40E_LLDPDU_SIZE);
+	if (ret)
+		return ret;
+
+	mib_type = SET_LOCAL_MIB_AC_TYPE_LOCAL_MIB;
+	if (dcbcfg->app_mode == I40E_DCBX_APPS_NON_WILLING) {
+		mib_type |= SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS <<
+			    SET_LOCAL_MIB_AC_TYPE_NON_WILLING_APPS_SHIFT;
+	}
+	lldpmib = (u8 *)mem.va;
+	i40e_dcb_config_to_lldp(lldpmib, &miblen, dcbcfg);
+	ret = i40e_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, miblen, NULL);
+
+	i40e_free_virt_mem(hw, &mem);
+	return ret;
+}
+
+/**
+ * i40e_dcb_config_to_lldp - Convert Dcbconfig to MIB format
+ * @lldpmib: pointer to mib to be output
+ * @miblen: pointer to u16 for length of lldpmib
+ * @dcbcfg: store for LLDPDU data
+ *
+ * send DCB configuration to FW
+ **/
+int i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen,
+			    struct i40e_dcbx_config *dcbcfg)
+{
+	u16 length, offset = 0, tlvid, typelength;
+	struct i40e_lldp_org_tlv *tlv;
+
+	tlv = (struct i40e_lldp_org_tlv *)lldpmib;
+	tlvid = I40E_TLV_ID_START;
+	do {
+		i40e_add_dcb_tlv(tlv, dcbcfg, tlvid++);
+		typelength = ntohs(tlv->typelength);
+		length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >>
+				I40E_LLDP_TLV_LEN_SHIFT);
+		if (length)
+			offset += length + I40E_IEEE_TLV_HEADER_LENGTH;
+		/* END TLV or beyond LLDPDU size */
+		if (tlvid >= I40E_TLV_ID_END_OF_LLDPPDU ||
+		    offset >= I40E_LLDPDU_SIZE)
+			break;
+		/* Move to next TLV */
+		if (length)
+			tlv = (struct i40e_lldp_org_tlv *)((char *)tlv +
+			      sizeof(tlv->typelength) + length);
+	} while (tlvid < I40E_TLV_ID_END_OF_LLDPPDU);
+	*miblen = offset;
+	return 0;
+}
+
+/**
+ * i40e_dcb_hw_rx_fifo_config
+ * @hw: pointer to the hw struct
+ * @ets_mode: Strict Priority or Round Robin mode
+ * @non_ets_mode: Strict Priority or Round Robin
+ * @max_exponent: Exponent to calculate max refill credits
+ * @lltc_map: Low latency TC bitmap
+ *
+ * Configure HW Rx FIFO as part of DCB configuration.
+ **/
+void i40e_dcb_hw_rx_fifo_config(struct i40e_hw *hw,
+				enum i40e_dcb_arbiter_mode ets_mode,
+				enum i40e_dcb_arbiter_mode non_ets_mode,
+				u32 max_exponent,
+				u8 lltc_map)
+{
+	u32 reg = rd32(hw, I40E_PRTDCB_RETSC);
+
+	reg &= ~I40E_PRTDCB_RETSC_ETS_MODE_MASK;
+	reg |= ((u32)ets_mode << I40E_PRTDCB_RETSC_ETS_MODE_SHIFT) &
+		I40E_PRTDCB_RETSC_ETS_MODE_MASK;
+
+	reg &= ~I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK;
+	reg |= ((u32)non_ets_mode << I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT) &
+		I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK;
+
+	reg &= ~I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK;
+	reg |= (max_exponent << I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT) &
+		I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK;
+
+	reg &= ~I40E_PRTDCB_RETSC_LLTC_MASK;
+	reg |= (lltc_map << I40E_PRTDCB_RETSC_LLTC_SHIFT) &
+		I40E_PRTDCB_RETSC_LLTC_MASK;
+	wr32(hw, I40E_PRTDCB_RETSC, reg);
+}
+
+/**
+ * i40e_dcb_hw_rx_cmd_monitor_config
+ * @hw: pointer to the hw struct
+ * @num_tc: Total number of traffic class
+ * @num_ports: Total number of ports on device
+ *
+ * Configure HW Rx command monitor as part of DCB configuration.
+ **/
+void i40e_dcb_hw_rx_cmd_monitor_config(struct i40e_hw *hw,
+				       u8 num_tc, u8 num_ports)
+{
+	u32 threshold;
+	u32 fifo_size;
+	u32 reg;
+
+	/* Set the threshold and fifo_size based on number of ports */
+	switch (num_ports) {
+	case 1:
+		threshold = I40E_DCB_1_PORT_THRESHOLD;
+		fifo_size = I40E_DCB_1_PORT_FIFO_SIZE;
+		break;
+	case 2:
+		if (num_tc > 4) {
+			threshold = I40E_DCB_2_PORT_THRESHOLD_HIGH_NUM_TC;
+			fifo_size = I40E_DCB_2_PORT_FIFO_SIZE_HIGH_NUM_TC;
+		} else {
+			threshold = I40E_DCB_2_PORT_THRESHOLD_LOW_NUM_TC;
+			fifo_size = I40E_DCB_2_PORT_FIFO_SIZE_LOW_NUM_TC;
+		}
+		break;
+	case 4:
+		if (num_tc > 4) {
+			threshold = I40E_DCB_4_PORT_THRESHOLD_HIGH_NUM_TC;
+			fifo_size = I40E_DCB_4_PORT_FIFO_SIZE_HIGH_NUM_TC;
+		} else {
+			threshold = I40E_DCB_4_PORT_THRESHOLD_LOW_NUM_TC;
+			fifo_size = I40E_DCB_4_PORT_FIFO_SIZE_LOW_NUM_TC;
+		}
+		break;
+	default:
+		i40e_debug(hw, I40E_DEBUG_DCB, "Invalid num_ports %u.\n",
+			   (u32)num_ports);
+		return;
+	}
+
+	/* The hardware manual describes setting up of I40E_PRT_SWR_PM_THR
+	 * based on the number of ports and traffic classes for a given port as
+	 * part of DCB configuration.
+	 */
+	reg = rd32(hw, I40E_PRT_SWR_PM_THR);
+	reg &= ~I40E_PRT_SWR_PM_THR_THRESHOLD_MASK;
+	reg |= (threshold << I40E_PRT_SWR_PM_THR_THRESHOLD_SHIFT) &
+		I40E_PRT_SWR_PM_THR_THRESHOLD_MASK;
+	wr32(hw, I40E_PRT_SWR_PM_THR, reg);
+
+	reg = rd32(hw, I40E_PRTDCB_RPPMC);
+	reg &= ~I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK;
+	reg |= (fifo_size << I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT) &
+		I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK;
+	wr32(hw, I40E_PRTDCB_RPPMC, reg);
+}
+
+/**
+ * i40e_dcb_hw_pfc_config
+ * @hw: pointer to the hw struct
+ * @pfc_en: Bitmap of PFC enabled priorities
+ * @prio_tc: priority to tc assignment indexed by priority
+ *
+ * Configure HW Priority Flow Controller as part of DCB configuration.
+ **/
+void i40e_dcb_hw_pfc_config(struct i40e_hw *hw,
+			    u8 pfc_en, u8 *prio_tc)
+{
+	u16 refresh_time = (u16)I40E_DEFAULT_PAUSE_TIME / 2;
+	u32 link_speed = hw->phy.link_info.link_speed;
+	u8 first_pfc_prio = 0;
+	u8 num_pfc_tc = 0;
+	u8 tc2pfc = 0;
+	u32 reg;
+	u8 i;
+
+	/* Get Number of PFC TCs and TC2PFC map */
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		if (pfc_en & BIT(i)) {
+			if (!first_pfc_prio)
+				first_pfc_prio = i;
+			/* Set bit for the PFC TC */
+			tc2pfc |= BIT(prio_tc[i]);
+			num_pfc_tc++;
+		}
+	}
+
+	switch (link_speed) {
+	case I40E_LINK_SPEED_10GB:
+		reg = rd32(hw, I40E_PRTDCB_MFLCN);
+		reg |= BIT(I40E_PRTDCB_MFLCN_DPF_SHIFT) &
+			I40E_PRTDCB_MFLCN_DPF_MASK;
+		reg &= ~I40E_PRTDCB_MFLCN_RFCE_MASK;
+		reg &= ~I40E_PRTDCB_MFLCN_RPFCE_MASK;
+		if (pfc_en) {
+			reg |= BIT(I40E_PRTDCB_MFLCN_RPFCM_SHIFT) &
+				I40E_PRTDCB_MFLCN_RPFCM_MASK;
+			reg |= ((u32)pfc_en << I40E_PRTDCB_MFLCN_RPFCE_SHIFT) &
+				I40E_PRTDCB_MFLCN_RPFCE_MASK;
+		}
+		wr32(hw, I40E_PRTDCB_MFLCN, reg);
+
+		reg = rd32(hw, I40E_PRTDCB_FCCFG);
+		reg &= ~I40E_PRTDCB_FCCFG_TFCE_MASK;
+		if (pfc_en)
+			reg |= (I40E_DCB_PFC_ENABLED <<
+				I40E_PRTDCB_FCCFG_TFCE_SHIFT) &
+				I40E_PRTDCB_FCCFG_TFCE_MASK;
+		wr32(hw, I40E_PRTDCB_FCCFG, reg);
+
+		/* FCTTV and FCRTV to be set by default */
+		break;
+	case I40E_LINK_SPEED_40GB:
+		reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP);
+		reg &= ~I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_MASK;
+		wr32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP, reg);
+
+		reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP);
+		reg &= ~I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_MASK;
+		reg |= BIT(I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_SHIFT) &
+			I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_MASK;
+		wr32(hw, I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP, reg);
+
+		reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE);
+		reg &= ~I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK;
+		reg |= ((u32)pfc_en <<
+			   I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT) &
+			I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK;
+		wr32(hw, I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE, reg);
+
+		reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE);
+		reg &= ~I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK;
+		reg |= ((u32)pfc_en <<
+			   I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT) &
+			I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK;
+		wr32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE, reg);
+
+		for (i = 0; i < I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MAX_INDEX; i++) {
+			reg = rd32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(i));
+			reg &= ~I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK;
+			if (pfc_en) {
+				reg |= ((u32)refresh_time <<
+					I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT) &
+					I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK;
+			}
+			wr32(hw, I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(i), reg);
+		}
+		/* PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA default value is 0xFFFF
+		 * for all user priorities
+		 */
+		break;
+	}
+
+	reg = rd32(hw, I40E_PRTDCB_TC2PFC);
+	reg &= ~I40E_PRTDCB_TC2PFC_TC2PFC_MASK;
+	reg |= ((u32)tc2pfc << I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT) &
+		I40E_PRTDCB_TC2PFC_TC2PFC_MASK;
+	wr32(hw, I40E_PRTDCB_TC2PFC, reg);
+
+	reg = rd32(hw, I40E_PRTDCB_RUP);
+	reg &= ~I40E_PRTDCB_RUP_NOVLANUP_MASK;
+	reg |= ((u32)first_pfc_prio << I40E_PRTDCB_RUP_NOVLANUP_SHIFT) &
+		 I40E_PRTDCB_RUP_NOVLANUP_MASK;
+	wr32(hw, I40E_PRTDCB_RUP, reg);
+
+	reg = rd32(hw, I40E_PRTDCB_TDPMC);
+	reg &= ~I40E_PRTDCB_TDPMC_TCPM_MODE_MASK;
+	if (num_pfc_tc > I40E_DCB_PFC_FORCED_NUM_TC) {
+		reg |= BIT(I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT) &
+			I40E_PRTDCB_TDPMC_TCPM_MODE_MASK;
+	}
+	wr32(hw, I40E_PRTDCB_TDPMC, reg);
+
+	reg = rd32(hw, I40E_PRTDCB_TCPMC);
+	reg &= ~I40E_PRTDCB_TCPMC_TCPM_MODE_MASK;
+	if (num_pfc_tc > I40E_DCB_PFC_FORCED_NUM_TC) {
+		reg |= BIT(I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT) &
+			I40E_PRTDCB_TCPMC_TCPM_MODE_MASK;
+	}
+	wr32(hw, I40E_PRTDCB_TCPMC, reg);
+}
+
+/**
+ * i40e_dcb_hw_set_num_tc
+ * @hw: pointer to the hw struct
+ * @num_tc: number of traffic classes
+ *
+ * Configure number of traffic classes in HW
+ **/
+void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc)
+{
+	u32 reg = rd32(hw, I40E_PRTDCB_GENC);
+
+	reg &= ~I40E_PRTDCB_GENC_NUMTC_MASK;
+	reg |= ((u32)num_tc << I40E_PRTDCB_GENC_NUMTC_SHIFT) &
+		I40E_PRTDCB_GENC_NUMTC_MASK;
+	wr32(hw, I40E_PRTDCB_GENC, reg);
+}
+
+/**
+ * i40e_dcb_hw_get_num_tc
+ * @hw: pointer to the hw struct
+ *
+ * Returns number of traffic classes configured in HW
+ **/
+u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw)
+{
+	u32 reg = rd32(hw, I40E_PRTDCB_GENC);
+
+	return (u8)((reg & I40E_PRTDCB_GENC_NUMTC_MASK) >>
+		I40E_PRTDCB_GENC_NUMTC_SHIFT);
+}
+
+/**
+ * i40e_dcb_hw_rx_ets_bw_config
+ * @hw: pointer to the hw struct
+ * @bw_share: Bandwidth share indexed per traffic class
+ * @mode: Strict Priority or Round Robin mode between UP sharing same
+ * traffic class
+ * @prio_type: TC is ETS enabled or strict priority
+ *
+ * Configure HW Rx ETS bandwidth as part of DCB configuration.
+ **/
+void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share,
+				  u8 *mode, u8 *prio_type)
+{
+	u32 reg;
+	u8 i;
+
+	for (i = 0; i <= I40E_PRTDCB_RETSTCC_MAX_INDEX; i++) {
+		reg = rd32(hw, I40E_PRTDCB_RETSTCC(i));
+		reg &= ~(I40E_PRTDCB_RETSTCC_BWSHARE_MASK     |
+			 I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK |
+			 I40E_PRTDCB_RETSTCC_ETSTC_SHIFT);
+		reg |= ((u32)bw_share[i] << I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT) &
+			 I40E_PRTDCB_RETSTCC_BWSHARE_MASK;
+		reg |= ((u32)mode[i] << I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT) &
+			 I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK;
+		reg |= ((u32)prio_type[i] << I40E_PRTDCB_RETSTCC_ETSTC_SHIFT) &
+			 I40E_PRTDCB_RETSTCC_ETSTC_MASK;
+		wr32(hw, I40E_PRTDCB_RETSTCC(i), reg);
+	}
+}
+
+/**
+ * i40e_dcb_hw_rx_up2tc_config
+ * @hw: pointer to the hw struct
+ * @prio_tc: priority to tc assignment indexed by priority
+ *
+ * Configure HW Rx UP2TC map as part of DCB configuration.
+ **/
+void i40e_dcb_hw_rx_up2tc_config(struct i40e_hw *hw, u8 *prio_tc)
+{
+	u32 reg = rd32(hw, I40E_PRTDCB_RUP2TC);
+#define I40E_UP2TC_REG(val, i) \
+		(((val) << I40E_PRTDCB_RUP2TC_UP##i##TC_SHIFT) & \
+		  I40E_PRTDCB_RUP2TC_UP##i##TC_MASK)
+
+	reg |= I40E_UP2TC_REG(prio_tc[0], 0);
+	reg |= I40E_UP2TC_REG(prio_tc[1], 1);
+	reg |= I40E_UP2TC_REG(prio_tc[2], 2);
+	reg |= I40E_UP2TC_REG(prio_tc[3], 3);
+	reg |= I40E_UP2TC_REG(prio_tc[4], 4);
+	reg |= I40E_UP2TC_REG(prio_tc[5], 5);
+	reg |= I40E_UP2TC_REG(prio_tc[6], 6);
+	reg |= I40E_UP2TC_REG(prio_tc[7], 7);
+
+	wr32(hw, I40E_PRTDCB_RUP2TC, reg);
+}
+
+/**
+ * i40e_dcb_hw_calculate_pool_sizes - configure dcb pool sizes
+ * @hw: pointer to the hw struct
+ * @num_ports: Number of available ports on the device
+ * @eee_enabled: EEE enabled for the given port
+ * @pfc_en: Bit map of PFC enabled traffic classes
+ * @mfs_tc: Array of max frame size for each traffic class
+ * @pb_cfg: pointer to packet buffer configuration
+ *
+ * Calculate the shared and dedicated per TC pool sizes,
+ * watermarks and threshold values.
+ **/
+void i40e_dcb_hw_calculate_pool_sizes(struct i40e_hw *hw,
+				      u8 num_ports, bool eee_enabled,
+				      u8 pfc_en, u32 *mfs_tc,
+				      struct i40e_rx_pb_config *pb_cfg)
+{
+	u32 pool_size[I40E_MAX_TRAFFIC_CLASS];
+	u32 high_wm[I40E_MAX_TRAFFIC_CLASS];
+	u32 low_wm[I40E_MAX_TRAFFIC_CLASS];
+	u32 total_pool_size = 0;
+	int shared_pool_size; /* Need signed variable */
+	u32 port_pb_size;
+	u32 mfs_max = 0;
+	u32 pcirtt;
+	u8 i;
+
+	/* Get the MFS(max) for the port */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (mfs_tc[i] > mfs_max)
+			mfs_max = mfs_tc[i];
+	}
+
+	pcirtt = I40E_BT2B(I40E_PCIRTT_LINK_SPEED_10G);
+
+	/* Calculate effective Rx PB size per port */
+	port_pb_size = I40E_DEVICE_RPB_SIZE / num_ports;
+	if (eee_enabled)
+		port_pb_size -= I40E_BT2B(I40E_EEE_TX_LPI_EXIT_TIME);
+	port_pb_size -= mfs_max;
+
+	/* Step 1 Calculating tc pool/shared pool sizes and watermarks */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (pfc_en & BIT(i)) {
+			low_wm[i] = (I40E_DCB_WATERMARK_START_FACTOR *
+				     mfs_tc[i]) + pcirtt;
+			high_wm[i] = low_wm[i];
+			high_wm[i] += ((mfs_max > I40E_MAX_FRAME_SIZE)
+					? mfs_max : I40E_MAX_FRAME_SIZE);
+			pool_size[i] = high_wm[i];
+			pool_size[i] += I40E_BT2B(I40E_STD_DV_TC(mfs_max,
+								mfs_tc[i]));
+		} else {
+			low_wm[i] = 0;
+			pool_size[i] = (I40E_DCB_WATERMARK_START_FACTOR *
+					mfs_tc[i]) + pcirtt;
+			high_wm[i] = pool_size[i];
+		}
+		total_pool_size += pool_size[i];
+	}
+
+	shared_pool_size = port_pb_size - total_pool_size;
+	if (shared_pool_size > 0) {
+		pb_cfg->shared_pool_size = shared_pool_size;
+		pb_cfg->shared_pool_high_wm = shared_pool_size;
+		pb_cfg->shared_pool_low_wm = 0;
+		for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+			pb_cfg->shared_pool_low_thresh[i] = 0;
+			pb_cfg->shared_pool_high_thresh[i] = shared_pool_size;
+			pb_cfg->tc_pool_size[i] = pool_size[i];
+			pb_cfg->tc_pool_high_wm[i] = high_wm[i];
+			pb_cfg->tc_pool_low_wm[i] = low_wm[i];
+		}
+
+	} else {
+		i40e_debug(hw, I40E_DEBUG_DCB,
+			   "The shared pool size for the port is negative %d.\n",
+			   shared_pool_size);
+	}
+}
+
+/**
+ * i40e_dcb_hw_rx_pb_config
+ * @hw: pointer to the hw struct
+ * @old_pb_cfg: Existing Rx Packet buffer configuration
+ * @new_pb_cfg: New Rx Packet buffer configuration
+ *
+ * Program the Rx Packet Buffer registers.
+ **/
+void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw,
+			      struct i40e_rx_pb_config *old_pb_cfg,
+			      struct i40e_rx_pb_config *new_pb_cfg)
+{
+	u32 old_val;
+	u32 new_val;
+	u32 reg;
+	u8 i;
+
+	/* The Rx Packet buffer register programming needs to be done in a
+	 * certain order and the following code is based on that
+	 * requirement.
+	 */
+
+	/* Program the shared pool low water mark per port if decreasing */
+	old_val = old_pb_cfg->shared_pool_low_wm;
+	new_val = new_pb_cfg->shared_pool_low_wm;
+	if (new_val < old_val) {
+		reg = rd32(hw, I40E_PRTRPB_SLW);
+		reg &= ~I40E_PRTRPB_SLW_SLW_MASK;
+		reg |= (new_val << I40E_PRTRPB_SLW_SLW_SHIFT) &
+			I40E_PRTRPB_SLW_SLW_MASK;
+		wr32(hw, I40E_PRTRPB_SLW, reg);
+	}
+
+	/* Program the shared pool low threshold and tc pool
+	 * low water mark per TC that are decreasing.
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		old_val = old_pb_cfg->shared_pool_low_thresh[i];
+		new_val = new_pb_cfg->shared_pool_low_thresh[i];
+		if (new_val < old_val) {
+			reg = rd32(hw, I40E_PRTRPB_SLT(i));
+			reg &= ~I40E_PRTRPB_SLT_SLT_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_SLT_SLT_TCN_SHIFT) &
+				I40E_PRTRPB_SLT_SLT_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_SLT(i), reg);
+		}
+
+		old_val = old_pb_cfg->tc_pool_low_wm[i];
+		new_val = new_pb_cfg->tc_pool_low_wm[i];
+		if (new_val < old_val) {
+			reg = rd32(hw, I40E_PRTRPB_DLW(i));
+			reg &= ~I40E_PRTRPB_DLW_DLW_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_DLW_DLW_TCN_SHIFT) &
+				I40E_PRTRPB_DLW_DLW_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_DLW(i), reg);
+		}
+	}
+
+	/* Program the shared pool high water mark per port if decreasing */
+	old_val = old_pb_cfg->shared_pool_high_wm;
+	new_val = new_pb_cfg->shared_pool_high_wm;
+	if (new_val < old_val) {
+		reg = rd32(hw, I40E_PRTRPB_SHW);
+		reg &= ~I40E_PRTRPB_SHW_SHW_MASK;
+		reg |= (new_val << I40E_PRTRPB_SHW_SHW_SHIFT) &
+			I40E_PRTRPB_SHW_SHW_MASK;
+		wr32(hw, I40E_PRTRPB_SHW, reg);
+	}
+
+	/* Program the shared pool high threshold and tc pool
+	 * high water mark per TC that are decreasing.
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		old_val = old_pb_cfg->shared_pool_high_thresh[i];
+		new_val = new_pb_cfg->shared_pool_high_thresh[i];
+		if (new_val < old_val) {
+			reg = rd32(hw, I40E_PRTRPB_SHT(i));
+			reg &= ~I40E_PRTRPB_SHT_SHT_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_SHT_SHT_TCN_SHIFT) &
+				I40E_PRTRPB_SHT_SHT_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_SHT(i), reg);
+		}
+
+		old_val = old_pb_cfg->tc_pool_high_wm[i];
+		new_val = new_pb_cfg->tc_pool_high_wm[i];
+		if (new_val < old_val) {
+			reg = rd32(hw, I40E_PRTRPB_DHW(i));
+			reg &= ~I40E_PRTRPB_DHW_DHW_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_DHW_DHW_TCN_SHIFT) &
+				I40E_PRTRPB_DHW_DHW_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_DHW(i), reg);
+		}
+	}
+
+	/* Write Dedicated Pool Sizes per TC */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		new_val = new_pb_cfg->tc_pool_size[i];
+		reg = rd32(hw, I40E_PRTRPB_DPS(i));
+		reg &= ~I40E_PRTRPB_DPS_DPS_TCN_MASK;
+		reg |= (new_val << I40E_PRTRPB_DPS_DPS_TCN_SHIFT) &
+			I40E_PRTRPB_DPS_DPS_TCN_MASK;
+		wr32(hw, I40E_PRTRPB_DPS(i), reg);
+	}
+
+	/* Write Shared Pool Size per port */
+	new_val = new_pb_cfg->shared_pool_size;
+	reg = rd32(hw, I40E_PRTRPB_SPS);
+	reg &= ~I40E_PRTRPB_SPS_SPS_MASK;
+	reg |= (new_val << I40E_PRTRPB_SPS_SPS_SHIFT) &
+		I40E_PRTRPB_SPS_SPS_MASK;
+	wr32(hw, I40E_PRTRPB_SPS, reg);
+
+	/* Program the shared pool low water mark per port if increasing */
+	old_val = old_pb_cfg->shared_pool_low_wm;
+	new_val = new_pb_cfg->shared_pool_low_wm;
+	if (new_val > old_val) {
+		reg = rd32(hw, I40E_PRTRPB_SLW);
+		reg &= ~I40E_PRTRPB_SLW_SLW_MASK;
+		reg |= (new_val << I40E_PRTRPB_SLW_SLW_SHIFT) &
+			I40E_PRTRPB_SLW_SLW_MASK;
+		wr32(hw, I40E_PRTRPB_SLW, reg);
+	}
+
+	/* Program the shared pool low threshold and tc pool
+	 * low water mark per TC that are increasing.
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		old_val = old_pb_cfg->shared_pool_low_thresh[i];
+		new_val = new_pb_cfg->shared_pool_low_thresh[i];
+		if (new_val > old_val) {
+			reg = rd32(hw, I40E_PRTRPB_SLT(i));
+			reg &= ~I40E_PRTRPB_SLT_SLT_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_SLT_SLT_TCN_SHIFT) &
+				I40E_PRTRPB_SLT_SLT_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_SLT(i), reg);
+		}
+
+		old_val = old_pb_cfg->tc_pool_low_wm[i];
+		new_val = new_pb_cfg->tc_pool_low_wm[i];
+		if (new_val > old_val) {
+			reg = rd32(hw, I40E_PRTRPB_DLW(i));
+			reg &= ~I40E_PRTRPB_DLW_DLW_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_DLW_DLW_TCN_SHIFT) &
+				I40E_PRTRPB_DLW_DLW_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_DLW(i), reg);
+		}
+	}
+
+	/* Program the shared pool high water mark per port if increasing */
+	old_val = old_pb_cfg->shared_pool_high_wm;
+	new_val = new_pb_cfg->shared_pool_high_wm;
+	if (new_val > old_val) {
+		reg = rd32(hw, I40E_PRTRPB_SHW);
+		reg &= ~I40E_PRTRPB_SHW_SHW_MASK;
+		reg |= (new_val << I40E_PRTRPB_SHW_SHW_SHIFT) &
+			I40E_PRTRPB_SHW_SHW_MASK;
+		wr32(hw, I40E_PRTRPB_SHW, reg);
+	}
+
+	/* Program the shared pool high threshold and tc pool
+	 * high water mark per TC that are increasing.
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		old_val = old_pb_cfg->shared_pool_high_thresh[i];
+		new_val = new_pb_cfg->shared_pool_high_thresh[i];
+		if (new_val > old_val) {
+			reg = rd32(hw, I40E_PRTRPB_SHT(i));
+			reg &= ~I40E_PRTRPB_SHT_SHT_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_SHT_SHT_TCN_SHIFT) &
+				I40E_PRTRPB_SHT_SHT_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_SHT(i), reg);
+		}
+
+		old_val = old_pb_cfg->tc_pool_high_wm[i];
+		new_val = new_pb_cfg->tc_pool_high_wm[i];
+		if (new_val > old_val) {
+			reg = rd32(hw, I40E_PRTRPB_DHW(i));
+			reg &= ~I40E_PRTRPB_DHW_DHW_TCN_MASK;
+			reg |= (new_val << I40E_PRTRPB_DHW_DHW_TCN_SHIFT) &
+				I40E_PRTRPB_DHW_DHW_TCN_MASK;
+			wr32(hw, I40E_PRTRPB_DHW(i), reg);
+		}
+	}
+}
+
+/**
+ * _i40e_read_lldp_cfg - generic read of LLDP Configuration data from NVM
+ * @hw: pointer to the HW structure
+ * @lldp_cfg: pointer to hold lldp configuration variables
+ * @module: address of the module pointer
+ * @word_offset: offset of LLDP configuration
+ *
+ * Reads the LLDP configuration data from NVM using passed addresses
+ **/
+static int _i40e_read_lldp_cfg(struct i40e_hw *hw,
+			       struct i40e_lldp_variables *lldp_cfg,
+			       u8 module, u32 word_offset)
+{
+	u32 address, offset = (2 * word_offset);
+	__le16 raw_mem;
+	int ret;
+	u16 mem;
+
+	ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret)
+		return ret;
+
+	ret = i40e_aq_read_nvm(hw, 0x0, module * 2, sizeof(raw_mem), &raw_mem,
+			       true, NULL);
+	i40e_release_nvm(hw);
+	if (ret)
+		return ret;
+
+	mem = le16_to_cpu(raw_mem);
+	/* Check if this pointer needs to be read in word size or 4K sector
+	 * units.
+	 */
+	if (mem & I40E_PTR_TYPE)
+		address = (0x7FFF & mem) * 4096;
+	else
+		address = (0x7FFF & mem) * 2;
+
+	ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret)
+		goto err_lldp_cfg;
+
+	ret = i40e_aq_read_nvm(hw, module, offset, sizeof(raw_mem), &raw_mem,
+			       true, NULL);
+	i40e_release_nvm(hw);
+	if (ret)
+		return ret;
+
+	mem = le16_to_cpu(raw_mem);
+	offset = mem + word_offset;
+	offset *= 2;
+
+	ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret)
+		goto err_lldp_cfg;
+
+	ret = i40e_aq_read_nvm(hw, 0, address + offset,
+			       sizeof(struct i40e_lldp_variables), lldp_cfg,
+			       true, NULL);
+	i40e_release_nvm(hw);
+
+err_lldp_cfg:
+	return ret;
+}
+
+/**
+ * i40e_read_lldp_cfg - read LLDP Configuration data from NVM
+ * @hw: pointer to the HW structure
+ * @lldp_cfg: pointer to hold lldp configuration variables
+ *
+ * Reads the LLDP configuration data from NVM
+ **/
+int i40e_read_lldp_cfg(struct i40e_hw *hw,
+		       struct i40e_lldp_variables *lldp_cfg)
+{
+	int ret = 0;
+	u32 mem;
+
+	if (!lldp_cfg)
+		return -EINVAL;
+
+	ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret)
+		return ret;
+
+	ret = i40e_aq_read_nvm(hw, I40E_SR_NVM_CONTROL_WORD, 0, sizeof(mem),
+			       &mem, true, NULL);
+	i40e_release_nvm(hw);
+	if (ret)
+		return ret;
+
+	/* Read a bit that holds information whether we are running flat or
+	 * structured NVM image. Flat image has LLDP configuration in shadow
+	 * ram, so there is a need to pass different addresses for both cases.
+	 */
+	if (mem & I40E_SR_NVM_MAP_STRUCTURE_TYPE) {
+		/* Flat NVM case */
+		ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_SR_EMP_MODULE_PTR,
+					  I40E_SR_LLDP_CFG_PTR);
+	} else {
+		/* Good old structured NVM image */
+		ret = _i40e_read_lldp_cfg(hw, lldp_cfg, I40E_EMP_MODULE_PTR,
+					  I40E_NVM_LLDP_CFG_PTR);
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
new file mode 100644
index 0000000000..6b60dc9b77
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#ifndef _I40E_DCB_H_
+#define _I40E_DCB_H_
+
+#include "i40e_type.h"
+
+#define I40E_DCBX_STATUS_NOT_STARTED	0
+#define I40E_DCBX_STATUS_IN_PROGRESS	1
+#define I40E_DCBX_STATUS_DONE		2
+#define I40E_DCBX_STATUS_MULTIPLE_PEERS	3
+#define I40E_DCBX_STATUS_DISABLED	7
+
+#define I40E_TLV_TYPE_END		0
+#define I40E_TLV_TYPE_ORG		127
+
+#define I40E_IEEE_8021QAZ_OUI		0x0080C2
+#define I40E_IEEE_SUBTYPE_ETS_CFG	9
+#define I40E_IEEE_SUBTYPE_ETS_REC	10
+#define I40E_IEEE_SUBTYPE_PFC_CFG	11
+#define I40E_IEEE_SUBTYPE_APP_PRI	12
+
+#define I40E_CEE_DCBX_OUI		0x001b21
+#define I40E_CEE_DCBX_TYPE		2
+
+#define I40E_CEE_SUBTYPE_CTRL		1
+#define I40E_CEE_SUBTYPE_PG_CFG		2
+#define I40E_CEE_SUBTYPE_PFC_CFG	3
+#define I40E_CEE_SUBTYPE_APP_PRI	4
+
+#define I40E_CEE_MAX_FEAT_TYPE		3
+#define I40E_LLDP_CURRENT_STATUS_XL710_OFFSET	0x2B
+#define I40E_LLDP_CURRENT_STATUS_X722_OFFSET	0x31
+#define I40E_LLDP_CURRENT_STATUS_OFFSET		1
+#define I40E_LLDP_CURRENT_STATUS_SIZE		1
+
+/* Defines for LLDP TLV header */
+#define I40E_LLDP_TLV_LEN_SHIFT		0
+#define I40E_LLDP_TLV_LEN_MASK		(0x01FF << I40E_LLDP_TLV_LEN_SHIFT)
+#define I40E_LLDP_TLV_TYPE_SHIFT	9
+#define I40E_LLDP_TLV_TYPE_MASK		(0x7F << I40E_LLDP_TLV_TYPE_SHIFT)
+#define I40E_LLDP_TLV_SUBTYPE_SHIFT	0
+#define I40E_LLDP_TLV_SUBTYPE_MASK	(0xFF << I40E_LLDP_TLV_SUBTYPE_SHIFT)
+#define I40E_LLDP_TLV_OUI_SHIFT		8
+#define I40E_LLDP_TLV_OUI_MASK		(0xFFFFFF << I40E_LLDP_TLV_OUI_SHIFT)
+
+/* Defines for IEEE ETS TLV */
+#define I40E_IEEE_ETS_MAXTC_SHIFT	0
+#define I40E_IEEE_ETS_MAXTC_MASK	(0x7 << I40E_IEEE_ETS_MAXTC_SHIFT)
+#define I40E_IEEE_ETS_CBS_SHIFT		6
+#define I40E_IEEE_ETS_CBS_MASK		BIT(I40E_IEEE_ETS_CBS_SHIFT)
+#define I40E_IEEE_ETS_WILLING_SHIFT	7
+#define I40E_IEEE_ETS_WILLING_MASK	BIT(I40E_IEEE_ETS_WILLING_SHIFT)
+#define I40E_IEEE_ETS_PRIO_0_SHIFT	0
+#define I40E_IEEE_ETS_PRIO_0_MASK	(0x7 << I40E_IEEE_ETS_PRIO_0_SHIFT)
+#define I40E_IEEE_ETS_PRIO_1_SHIFT	4
+#define I40E_IEEE_ETS_PRIO_1_MASK	(0x7 << I40E_IEEE_ETS_PRIO_1_SHIFT)
+#define I40E_CEE_PGID_PRIO_0_SHIFT	0
+#define I40E_CEE_PGID_PRIO_0_MASK	(0xF << I40E_CEE_PGID_PRIO_0_SHIFT)
+#define I40E_CEE_PGID_PRIO_1_SHIFT	4
+#define I40E_CEE_PGID_PRIO_1_MASK	(0xF << I40E_CEE_PGID_PRIO_1_SHIFT)
+#define I40E_CEE_PGID_STRICT		15
+
+/* Defines for IEEE TSA types */
+#define I40E_IEEE_TSA_STRICT		0
+#define I40E_IEEE_TSA_ETS		2
+
+/* Defines for IEEE PFC TLV */
+#define I40E_DCB_PFC_ENABLED		2
+#define I40E_DCB_PFC_FORCED_NUM_TC	2
+#define I40E_IEEE_PFC_CAP_SHIFT		0
+#define I40E_IEEE_PFC_CAP_MASK		(0xF << I40E_IEEE_PFC_CAP_SHIFT)
+#define I40E_IEEE_PFC_MBC_SHIFT		6
+#define I40E_IEEE_PFC_MBC_MASK		BIT(I40E_IEEE_PFC_MBC_SHIFT)
+#define I40E_IEEE_PFC_WILLING_SHIFT	7
+#define I40E_IEEE_PFC_WILLING_MASK	BIT(I40E_IEEE_PFC_WILLING_SHIFT)
+
+/* Defines for IEEE APP TLV */
+#define I40E_IEEE_APP_SEL_SHIFT		0
+#define I40E_IEEE_APP_SEL_MASK		(0x7 << I40E_IEEE_APP_SEL_SHIFT)
+#define I40E_IEEE_APP_PRIO_SHIFT	5
+#define I40E_IEEE_APP_PRIO_MASK		(0x7 << I40E_IEEE_APP_PRIO_SHIFT)
+
+/* TLV definitions for preparing MIB */
+#define I40E_TLV_ID_CHASSIS_ID		0
+#define I40E_TLV_ID_PORT_ID		1
+#define I40E_TLV_ID_TIME_TO_LIVE	2
+#define I40E_IEEE_TLV_ID_ETS_CFG	3
+#define I40E_IEEE_TLV_ID_ETS_REC	4
+#define I40E_IEEE_TLV_ID_PFC_CFG	5
+#define I40E_IEEE_TLV_ID_APP_PRI	6
+#define I40E_TLV_ID_END_OF_LLDPPDU	7
+#define I40E_TLV_ID_START		I40E_IEEE_TLV_ID_ETS_CFG
+
+#define I40E_IEEE_TLV_HEADER_LENGTH	2
+#define I40E_IEEE_ETS_TLV_LENGTH	25
+#define I40E_IEEE_PFC_TLV_LENGTH	6
+#define I40E_IEEE_APP_TLV_LENGTH	11
+
+/* Defines for default SW DCB config */
+#define I40E_IEEE_DEFAULT_ETS_TCBW	100
+#define I40E_IEEE_DEFAULT_ETS_WILLING	1
+#define I40E_IEEE_DEFAULT_PFC_WILLING	1
+#define I40E_IEEE_DEFAULT_NUM_APPS	1
+#define I40E_IEEE_DEFAULT_APP_PRIO	3
+
+#pragma pack(1)
+/* IEEE 802.1AB LLDP Organization specific TLV */
+struct i40e_lldp_org_tlv {
+	__be16 typelength;
+	__be32 ouisubtype;
+	u8 tlvinfo[1];
+};
+
+struct i40e_cee_tlv_hdr {
+	__be16 typelen;
+	u8 operver;
+	u8 maxver;
+};
+
+struct i40e_cee_ctrl_tlv {
+	struct i40e_cee_tlv_hdr hdr;
+	__be32 seqno;
+	__be32 ackno;
+};
+
+struct i40e_cee_feat_tlv {
+	struct i40e_cee_tlv_hdr hdr;
+	u8 en_will_err; /* Bits: |En|Will|Err|Reserved(5)| */
+#define I40E_CEE_FEAT_TLV_ENABLE_MASK	0x80
+#define I40E_CEE_FEAT_TLV_WILLING_MASK	0x40
+#define I40E_CEE_FEAT_TLV_ERR_MASK	0x20
+	u8 subtype;
+	u8 tlvinfo[1];
+};
+
+struct i40e_cee_app_prio {
+	__be16 protocol;
+	u8 upper_oui_sel; /* Bits: |Upper OUI(6)|Selector(2)| */
+#define I40E_CEE_APP_SELECTOR_MASK	0x03
+	__be16 lower_oui;
+	u8 prio_map;
+};
+#pragma pack()
+
+enum i40e_get_fw_lldp_status_resp {
+	I40E_GET_FW_LLDP_STATUS_DISABLED = 0,
+	I40E_GET_FW_LLDP_STATUS_ENABLED = 1
+};
+
+/* Data structures to pass for SW DCBX */
+struct i40e_rx_pb_config {
+	u32	shared_pool_size;
+	u32	shared_pool_high_wm;
+	u32	shared_pool_low_wm;
+	u32	shared_pool_high_thresh[I40E_MAX_TRAFFIC_CLASS];
+	u32	shared_pool_low_thresh[I40E_MAX_TRAFFIC_CLASS];
+	u32	tc_pool_size[I40E_MAX_TRAFFIC_CLASS];
+	u32	tc_pool_high_wm[I40E_MAX_TRAFFIC_CLASS];
+	u32	tc_pool_low_wm[I40E_MAX_TRAFFIC_CLASS];
+};
+
+enum i40e_dcb_arbiter_mode {
+	I40E_DCB_ARB_MODE_STRICT_PRIORITY = 0,
+	I40E_DCB_ARB_MODE_ROUND_ROBIN = 1
+};
+
+#define I40E_DCB_DEFAULT_MAX_EXPONENT		0xB
+#define I40E_DEFAULT_PAUSE_TIME			0xffff
+#define I40E_MAX_FRAME_SIZE			4608 /* 4.5 KB */
+
+#define I40E_DEVICE_RPB_SIZE			968000 /* 968 KB */
+
+/* BitTimes (BT) conversion */
+#define I40E_BT2KB(BT) (((BT) + (8 * 1024 - 1)) / (8 * 1024))
+#define I40E_B2BT(BT) ((BT) * 8)
+#define I40E_BT2B(BT) (((BT) + (8 - 1)) / 8)
+
+/* Max Frame(TC) = MFS(max) + MFS(TC) */
+#define I40E_MAX_FRAME_TC(mfs_max, mfs_tc)	I40E_B2BT((mfs_max) + (mfs_tc))
+
+/* EEE Tx LPI Exit time in Bit Times */
+#define I40E_EEE_TX_LPI_EXIT_TIME		142500
+
+/* PCI Round Trip Time in Bit Times */
+#define I40E_PCIRTT_LINK_SPEED_10G		20000
+#define I40E_PCIRTT_BYTE_LINK_SPEED_20G		40000
+#define I40E_PCIRTT_BYTE_LINK_SPEED_40G		80000
+
+/* PFC Frame Delay Bit Times */
+#define I40E_PFC_FRAME_DELAY			672
+
+/* Worst case Cable (10GBase-T) Delay Bit Times */
+#define I40E_CABLE_DELAY			5556
+
+/* Higher Layer Delay @10G Bit Times */
+#define I40E_HIGHER_LAYER_DELAY_10G		6144
+
+/* Interface Delays in Bit Times */
+/* TODO: Add for other link speeds 20G/40G/etc. */
+#define I40E_INTERFACE_DELAY_10G_MAC_CONTROL	8192
+#define I40E_INTERFACE_DELAY_10G_MAC		8192
+#define I40E_INTERFACE_DELAY_10G_RS		8192
+
+#define I40E_INTERFACE_DELAY_XGXS		2048
+#define I40E_INTERFACE_DELAY_XAUI		2048
+
+#define I40E_INTERFACE_DELAY_10G_BASEX_PCS	2048
+#define I40E_INTERFACE_DELAY_10G_BASER_PCS	3584
+#define I40E_INTERFACE_DELAY_LX4_PMD		512
+#define I40E_INTERFACE_DELAY_CX4_PMD		512
+#define I40E_INTERFACE_DELAY_SERIAL_PMA		512
+#define I40E_INTERFACE_DELAY_PMD		512
+
+#define I40E_INTERFACE_DELAY_10G_BASET		25600
+
+/* Hardware RX DCB config related defines */
+#define I40E_DCB_1_PORT_THRESHOLD		0xF
+#define I40E_DCB_1_PORT_FIFO_SIZE		0x10
+#define I40E_DCB_2_PORT_THRESHOLD_LOW_NUM_TC	0xF
+#define I40E_DCB_2_PORT_FIFO_SIZE_LOW_NUM_TC	0x10
+#define I40E_DCB_2_PORT_THRESHOLD_HIGH_NUM_TC	0xC
+#define I40E_DCB_2_PORT_FIFO_SIZE_HIGH_NUM_TC	0x8
+#define I40E_DCB_4_PORT_THRESHOLD_LOW_NUM_TC	0x9
+#define I40E_DCB_4_PORT_FIFO_SIZE_LOW_NUM_TC	0x8
+#define I40E_DCB_4_PORT_THRESHOLD_HIGH_NUM_TC	0x6
+#define I40E_DCB_4_PORT_FIFO_SIZE_HIGH_NUM_TC	0x4
+#define I40E_DCB_WATERMARK_START_FACTOR		0x2
+
+/* delay values for with 10G BaseT in Bit Times */
+#define I40E_INTERFACE_DELAY_10G_COPPER	\
+	(I40E_INTERFACE_DELAY_10G_MAC + (2 * I40E_INTERFACE_DELAY_XAUI) \
+	 + I40E_INTERFACE_DELAY_10G_BASET)
+#define I40E_DV_TC(mfs_max, mfs_tc) \
+		((2 * I40E_MAX_FRAME_TC(mfs_max, mfs_tc)) \
+		 + I40E_PFC_FRAME_DELAY \
+		 + (2 * I40E_CABLE_DELAY) \
+		 + (2 * I40E_INTERFACE_DELAY_10G_COPPER) \
+		 + I40E_HIGHER_LAYER_DELAY_10G)
+static inline u32 I40E_STD_DV_TC(u32 mfs_max, u32 mfs_tc)
+{
+	return I40E_DV_TC(mfs_max, mfs_tc) + I40E_B2BT(mfs_max);
+}
+
+/* APIs for SW DCBX */
+void i40e_dcb_hw_rx_fifo_config(struct i40e_hw *hw,
+				enum i40e_dcb_arbiter_mode ets_mode,
+				enum i40e_dcb_arbiter_mode non_ets_mode,
+				u32 max_exponent, u8 lltc_map);
+void i40e_dcb_hw_rx_cmd_monitor_config(struct i40e_hw *hw,
+				       u8 num_tc, u8 num_ports);
+void i40e_dcb_hw_pfc_config(struct i40e_hw *hw,
+			    u8 pfc_en, u8 *prio_tc);
+void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc);
+u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw);
+void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share,
+				  u8 *mode, u8 *prio_type);
+void i40e_dcb_hw_rx_up2tc_config(struct i40e_hw *hw, u8 *prio_tc);
+void i40e_dcb_hw_calculate_pool_sizes(struct i40e_hw *hw,
+				      u8 num_ports, bool eee_enabled,
+				      u8 pfc_en, u32 *mfs_tc,
+				      struct i40e_rx_pb_config *pb_cfg);
+void i40e_dcb_hw_rx_pb_config(struct i40e_hw *hw,
+			      struct i40e_rx_pb_config *old_pb_cfg,
+			      struct i40e_rx_pb_config *new_pb_cfg);
+int i40e_get_dcbx_status(struct i40e_hw *hw,
+			 u16 *status);
+int i40e_lldp_to_dcb_config(u8 *lldpmib,
+			    struct i40e_dcbx_config *dcbcfg);
+int i40e_aq_get_dcb_config(struct i40e_hw *hw, u8 mib_type,
+			   u8 bridgetype,
+			   struct i40e_dcbx_config *dcbcfg);
+int i40e_get_dcb_config(struct i40e_hw *hw);
+int i40e_init_dcb(struct i40e_hw *hw,
+		  bool enable_mib_change);
+int
+i40e_get_fw_lldp_status(struct i40e_hw *hw,
+			enum i40e_get_fw_lldp_status_resp *lldp_status);
+int i40e_set_dcb_config(struct i40e_hw *hw);
+int i40e_dcb_config_to_lldp(u8 *lldpmib, u16 *miblen,
+			    struct i40e_dcbx_config *dcbcfg);
+#endif /* _I40E_DCB_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
new file mode 100644
index 0000000000..195421d863
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c
@@ -0,0 +1,1036 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#ifdef CONFIG_I40E_DCB
+#include "i40e.h"
+#include <net/dcbnl.h>
+
+#define I40E_DCBNL_STATUS_SUCCESS	0
+#define I40E_DCBNL_STATUS_ERROR		1
+static bool i40e_dcbnl_find_app(struct i40e_dcbx_config *cfg,
+				struct i40e_dcb_app_priority_table *app);
+/**
+ * i40e_get_pfc_delay - retrieve PFC Link Delay
+ * @hw: pointer to hardware struct
+ * @delay: holds the PFC Link delay value
+ *
+ * Returns PFC Link Delay from the PRTDCB_GENC.PFCLDA
+ **/
+static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay)
+{
+	u32 val;
+
+	val = rd32(hw, I40E_PRTDCB_GENC);
+	*delay = (u16)((val & I40E_PRTDCB_GENC_PFCLDA_MASK) >>
+		       I40E_PRTDCB_GENC_PFCLDA_SHIFT);
+}
+
+/**
+ * i40e_dcbnl_ieee_getets - retrieve local IEEE ETS configuration
+ * @dev: the corresponding netdev
+ * @ets: structure to hold the ETS information
+ *
+ * Returns local IEEE ETS configuration
+ **/
+static int i40e_dcbnl_ieee_getets(struct net_device *dev,
+				  struct ieee_ets *ets)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
+	struct i40e_dcbx_config *dcbxcfg;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	dcbxcfg = &pf->hw.local_dcbx_config;
+	ets->willing = dcbxcfg->etscfg.willing;
+	ets->ets_cap = I40E_MAX_TRAFFIC_CLASS;
+	ets->cbs = dcbxcfg->etscfg.cbs;
+	memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable,
+		sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable,
+		sizeof(ets->tc_rx_bw));
+	memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable,
+		sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, dcbxcfg->etscfg.prioritytable,
+		sizeof(ets->prio_tc));
+	memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable,
+		sizeof(ets->tc_reco_bw));
+	memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable,
+		sizeof(ets->tc_reco_tsa));
+	memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prioritytable,
+		sizeof(ets->reco_prio_tc));
+
+	return 0;
+}
+
+/**
+ * i40e_dcbnl_ieee_getpfc - retrieve local IEEE PFC configuration
+ * @dev: the corresponding netdev
+ * @pfc: structure to hold the PFC information
+ *
+ * Returns local IEEE PFC configuration
+ **/
+static int i40e_dcbnl_ieee_getpfc(struct net_device *dev,
+				  struct ieee_pfc *pfc)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
+	struct i40e_dcbx_config *dcbxcfg;
+	struct i40e_hw *hw = &pf->hw;
+	int i;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	dcbxcfg = &hw->local_dcbx_config;
+	pfc->pfc_cap = dcbxcfg->pfc.pfccap;
+	pfc->pfc_en = dcbxcfg->pfc.pfcenable;
+	pfc->mbc = dcbxcfg->pfc.mbc;
+	i40e_get_pfc_delay(hw, &pfc->delay);
+
+	/* Get Requests/Indications */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		pfc->requests[i] = pf->stats.priority_xoff_tx[i];
+		pfc->indications[i] = pf->stats.priority_xoff_rx[i];
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_dcbnl_ieee_setets - set IEEE ETS configuration
+ * @netdev: the corresponding netdev
+ * @ets: structure to hold the ETS information
+ *
+ * Set IEEE ETS configuration
+ **/
+static int i40e_dcbnl_ieee_setets(struct net_device *netdev,
+				  struct ieee_ets *ets)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	struct i40e_dcbx_config *old_cfg;
+	int i, ret;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return -EINVAL;
+
+	old_cfg = &pf->hw.local_dcbx_config;
+	/* Copy current config into temp */
+	pf->tmp_cfg = *old_cfg;
+
+	/* Update the ETS configuration for temp */
+	pf->tmp_cfg.etscfg.willing = ets->willing;
+	pf->tmp_cfg.etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS;
+	pf->tmp_cfg.etscfg.cbs = ets->cbs;
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		pf->tmp_cfg.etscfg.tcbwtable[i] = ets->tc_tx_bw[i];
+		pf->tmp_cfg.etscfg.tsatable[i] = ets->tc_tsa[i];
+		pf->tmp_cfg.etscfg.prioritytable[i] = ets->prio_tc[i];
+		pf->tmp_cfg.etsrec.tcbwtable[i] = ets->tc_reco_bw[i];
+		pf->tmp_cfg.etsrec.tsatable[i] = ets->tc_reco_tsa[i];
+		pf->tmp_cfg.etsrec.prioritytable[i] = ets->reco_prio_tc[i];
+	}
+
+	/* Commit changes to HW */
+	ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed setting DCB ETS configuration err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_dcbnl_ieee_setpfc - set local IEEE PFC configuration
+ * @netdev: the corresponding netdev
+ * @pfc: structure to hold the PFC information
+ *
+ * Sets local IEEE PFC configuration
+ **/
+static int i40e_dcbnl_ieee_setpfc(struct net_device *netdev,
+				  struct ieee_pfc *pfc)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	struct i40e_dcbx_config *old_cfg;
+	int ret;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return -EINVAL;
+
+	old_cfg = &pf->hw.local_dcbx_config;
+	/* Copy current config into temp */
+	pf->tmp_cfg = *old_cfg;
+	if (pfc->pfc_cap)
+		pf->tmp_cfg.pfc.pfccap = pfc->pfc_cap;
+	else
+		pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+	pf->tmp_cfg.pfc.pfcenable = pfc->pfc_en;
+
+	ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed setting DCB PFC configuration err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_dcbnl_ieee_setapp - set local IEEE App configuration
+ * @netdev: the corresponding netdev
+ * @app: structure to hold the Application information
+ *
+ * Sets local IEEE App configuration
+ **/
+static int i40e_dcbnl_ieee_setapp(struct net_device *netdev,
+				  struct dcb_app *app)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	struct i40e_dcb_app_priority_table new_app;
+	struct i40e_dcbx_config *old_cfg;
+	int ret;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return -EINVAL;
+
+	old_cfg = &pf->hw.local_dcbx_config;
+	if (old_cfg->numapps == I40E_DCBX_MAX_APPS)
+		return -EINVAL;
+
+	ret = dcb_ieee_setapp(netdev, app);
+	if (ret)
+		return ret;
+
+	new_app.selector = app->selector;
+	new_app.protocolid = app->protocol;
+	new_app.priority = app->priority;
+	/* Already internally available */
+	if (i40e_dcbnl_find_app(old_cfg, &new_app))
+		return 0;
+
+	/* Copy current config into temp */
+	pf->tmp_cfg = *old_cfg;
+	/* Add the app */
+	pf->tmp_cfg.app[pf->tmp_cfg.numapps++] = new_app;
+
+	ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed setting DCB configuration err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_dcbnl_ieee_delapp - delete local IEEE App configuration
+ * @netdev: the corresponding netdev
+ * @app: structure to hold the Application information
+ *
+ * Deletes local IEEE App configuration other than the first application
+ * required by firmware
+ **/
+static int i40e_dcbnl_ieee_delapp(struct net_device *netdev,
+				  struct dcb_app *app)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	struct i40e_dcbx_config *old_cfg;
+	int i, j, ret;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return -EINVAL;
+
+	ret = dcb_ieee_delapp(netdev, app);
+	if (ret)
+		return ret;
+
+	old_cfg = &pf->hw.local_dcbx_config;
+	/* Need one app for FW so keep it */
+	if (old_cfg->numapps == 1)
+		return 0;
+
+	/* Copy current config into temp */
+	pf->tmp_cfg = *old_cfg;
+
+	/* Find and reset the app */
+	for (i = 1; i < pf->tmp_cfg.numapps; i++) {
+		if (app->selector == pf->tmp_cfg.app[i].selector &&
+		    app->protocol == pf->tmp_cfg.app[i].protocolid &&
+		    app->priority == pf->tmp_cfg.app[i].priority) {
+			/* Reset the app data */
+			pf->tmp_cfg.app[i].selector = 0;
+			pf->tmp_cfg.app[i].protocolid = 0;
+			pf->tmp_cfg.app[i].priority = 0;
+			break;
+		}
+	}
+
+	/* If the specific DCB app not found */
+	if (i == pf->tmp_cfg.numapps)
+		return -EINVAL;
+
+	pf->tmp_cfg.numapps--;
+	/* Overwrite the tmp_cfg app */
+	for (j = i; j < pf->tmp_cfg.numapps; j++)
+		pf->tmp_cfg.app[j] = old_cfg->app[j + 1];
+
+	ret = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed setting DCB configuration err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_dcbnl_getstate - Get DCB enabled state
+ * @netdev: the corresponding netdev
+ *
+ * Get the current DCB enabled state
+ **/
+static u8 i40e_dcbnl_getstate(struct net_device *netdev)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	dev_dbg(&pf->pdev->dev, "DCB state=%d\n",
+		!!(pf->flags & I40E_FLAG_DCB_ENABLED));
+	return !!(pf->flags & I40E_FLAG_DCB_ENABLED);
+}
+
+/**
+ * i40e_dcbnl_setstate - Set DCB state
+ * @netdev: the corresponding netdev
+ * @state: enable or disable
+ *
+ * Set the DCB state
+ **/
+static u8 i40e_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	int ret = I40E_DCBNL_STATUS_SUCCESS;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return ret;
+
+	dev_dbg(&pf->pdev->dev, "new state=%d current state=%d\n",
+		state, (pf->flags & I40E_FLAG_DCB_ENABLED) ? 1 : 0);
+	/* Nothing to do */
+	if (!state == !(pf->flags & I40E_FLAG_DCB_ENABLED))
+		return ret;
+
+	if (i40e_is_sw_dcb(pf)) {
+		if (state) {
+			pf->flags |= I40E_FLAG_DCB_ENABLED;
+			memcpy(&pf->hw.desired_dcbx_config,
+			       &pf->hw.local_dcbx_config,
+			       sizeof(struct i40e_dcbx_config));
+		} else {
+			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		}
+	} else {
+		/* Cannot directly manipulate FW LLDP Agent */
+		ret = I40E_DCBNL_STATUS_ERROR;
+	}
+	return ret;
+}
+
+/**
+ * i40e_dcbnl_set_pg_tc_cfg_tx - Set CEE PG Tx config
+ * @netdev: the corresponding netdev
+ * @tc: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @bwg_id: the BW group id the traffic class belongs to
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding tc
+ *
+ * Set Tx PG settings for CEE mode
+ **/
+static void i40e_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+					u8 prio_type, u8 bwg_id, u8 bw_pct,
+					u8 up_map)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	int i;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+
+	/* LLTC not supported yet */
+	if (tc >= I40E_MAX_TRAFFIC_CLASS)
+		return;
+
+	/* prio_type, bwg_id and bw_pct per UP are not supported */
+
+	/* Use only up_map to map tc */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (up_map & BIT(i))
+			pf->tmp_cfg.etscfg.prioritytable[i] = tc;
+	}
+	pf->tmp_cfg.etscfg.tsatable[tc] = I40E_IEEE_TSA_ETS;
+	dev_dbg(&pf->pdev->dev,
+		"Set PG config tc=%d bwg_id=%d prio_type=%d bw_pct=%d up_map=%d\n",
+		tc, bwg_id, prio_type, bw_pct, up_map);
+}
+
+/**
+ * i40e_dcbnl_set_pg_bwg_cfg_tx - Set CEE PG Tx BW config
+ * @netdev: the corresponding netdev
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the specified traffic class
+ *
+ * Set Tx BW settings for CEE mode
+ **/
+static void i40e_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid,
+					 u8 bw_pct)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+
+	/* LLTC not supported yet */
+	if (pgid >= I40E_MAX_TRAFFIC_CLASS)
+		return;
+
+	pf->tmp_cfg.etscfg.tcbwtable[pgid] = bw_pct;
+	dev_dbg(&pf->pdev->dev, "Set PG BW config tc=%d bw_pct=%d\n",
+		pgid, bw_pct);
+}
+
+/**
+ * i40e_dcbnl_set_pg_tc_cfg_rx - Set CEE PG Rx config
+ * @netdev: the corresponding netdev
+ * @prio: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @pgid: the BW group id the traffic class belongs to
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding tc
+ *
+ * Set Rx BW settings for CEE mode. The hardware does not support this
+ * so we won't allow setting of this parameter.
+ **/
+static void i40e_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev,
+					int __always_unused prio,
+					u8 __always_unused prio_type,
+					u8 __always_unused pgid,
+					u8 __always_unused bw_pct,
+					u8 __always_unused up_map)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	dev_dbg(&pf->pdev->dev, "Rx TC PG Config Not Supported.\n");
+}
+
+/**
+ * i40e_dcbnl_set_pg_bwg_cfg_rx - Set CEE PG Rx config
+ * @netdev: the corresponding netdev
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the specified traffic class
+ *
+ * Set Rx BW settings for CEE mode. The hardware does not support this
+ * so we won't allow setting of this parameter.
+ **/
+static void i40e_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int pgid,
+					 u8 bw_pct)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	dev_dbg(&pf->pdev->dev, "Rx BWG PG Config Not Supported.\n");
+}
+
+/**
+ * i40e_dcbnl_get_pg_tc_cfg_tx - Get CEE PG Tx config
+ * @netdev: the corresponding netdev
+ * @prio: the corresponding user priority
+ * @prio_type: traffic priority type
+ * @pgid: the BW group ID the traffic class belongs to
+ * @bw_pct: BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ *
+ * Get Tx PG settings for CEE mode
+ **/
+static void i40e_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
+					u8 __always_unused *prio_type,
+					u8 *pgid,
+					u8 __always_unused *bw_pct,
+					u8 __always_unused *up_map)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+
+	if (prio >= I40E_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pf->hw.local_dcbx_config.etscfg.prioritytable[prio];
+	dev_dbg(&pf->pdev->dev, "Get PG config prio=%d tc=%d\n",
+		prio, *pgid);
+}
+
+/**
+ * i40e_dcbnl_get_pg_bwg_cfg_tx - Get CEE PG BW config
+ * @netdev: the corresponding netdev
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ *
+ * Get Tx BW settings for given TC in CEE mode
+ **/
+static void i40e_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid,
+					 u8 *bw_pct)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+
+	if (pgid >= I40E_MAX_TRAFFIC_CLASS)
+		return;
+
+	*bw_pct = pf->hw.local_dcbx_config.etscfg.tcbwtable[pgid];
+	dev_dbg(&pf->pdev->dev, "Get PG BW config tc=%d bw_pct=%d\n",
+		pgid, *bw_pct);
+}
+
+/**
+ * i40e_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config
+ * @netdev: the corresponding netdev
+ * @prio: the corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ *
+ * Get Rx PG settings for CEE mode. The UP2TC map is applied in same
+ * manner for Tx and Rx (symmetrical) so return the TC information for
+ * given priority accordingly.
+ **/
+static void i40e_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
+					u8 *prio_type, u8 *pgid, u8 *bw_pct,
+					u8 *up_map)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+
+	if (prio >= I40E_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pf->hw.local_dcbx_config.etscfg.prioritytable[prio];
+}
+
+/**
+ * i40e_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config
+ * @netdev: the corresponding netdev
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ *
+ * Get Rx BW settings for given TC in CEE mode
+ * The adapter doesn't support Rx ETS and runs in strict priority
+ * mode in Rx path and hence just return 0.
+ **/
+static void i40e_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int pgid,
+					 u8 *bw_pct)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+	*bw_pct = 0;
+}
+
+/**
+ * i40e_dcbnl_set_pfc_cfg - Set CEE PFC configuration
+ * @netdev: the corresponding netdev
+ * @prio: the corresponding user priority
+ * @setting: the PFC setting for given priority
+ *
+ * Set the PFC enabled/disabled setting for given user priority
+ **/
+static void i40e_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio,
+				   u8 setting)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+
+	if (prio >= I40E_MAX_USER_PRIORITY)
+		return;
+
+	pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+	if (setting)
+		pf->tmp_cfg.pfc.pfcenable |= BIT(prio);
+	else
+		pf->tmp_cfg.pfc.pfcenable &= ~BIT(prio);
+	dev_dbg(&pf->pdev->dev,
+		"Set PFC Config up=%d setting=%d pfcenable=0x%x\n",
+		prio, setting, pf->tmp_cfg.pfc.pfcenable);
+}
+
+/**
+ * i40e_dcbnl_get_pfc_cfg - Get CEE PFC configuration
+ * @netdev: the corresponding netdev
+ * @prio: the corresponding user priority
+ * @setting: the PFC setting for given priority
+ *
+ * Get the PFC enabled/disabled setting for given user priority
+ **/
+static void i40e_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio,
+				   u8 *setting)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return;
+
+	if (prio >= I40E_MAX_USER_PRIORITY)
+		return;
+
+	*setting = (pf->hw.local_dcbx_config.pfc.pfcenable >> prio) & 0x1;
+	dev_dbg(&pf->pdev->dev,
+		"Get PFC Config up=%d setting=%d pfcenable=0x%x\n",
+		prio, *setting, pf->hw.local_dcbx_config.pfc.pfcenable);
+}
+
+/**
+ * i40e_dcbnl_cee_set_all - Commit CEE DCB settings to hardware
+ * @netdev: the corresponding netdev
+ *
+ * Commit the current DCB configuration to hardware
+ **/
+static u8 i40e_dcbnl_cee_set_all(struct net_device *netdev)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	int err;
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return I40E_DCBNL_STATUS_ERROR;
+
+	dev_dbg(&pf->pdev->dev, "Commit DCB Configuration to the hardware\n");
+	err = i40e_hw_dcb_config(pf, &pf->tmp_cfg);
+
+	return err ? I40E_DCBNL_STATUS_ERROR : I40E_DCBNL_STATUS_SUCCESS;
+}
+
+/**
+ * i40e_dcbnl_get_cap - Get DCBX capabilities of adapter
+ * @netdev: the corresponding netdev
+ * @capid: the capability type
+ * @cap: the capability value
+ *
+ * Return the capability value for a given capability type
+ **/
+static u8 i40e_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+		return I40E_DCBNL_STATUS_ERROR;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = pf->dcbx_cap;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+	case DCB_CAP_ATTR_GSP:
+	case DCB_CAP_ATTR_BCN:
+	default:
+		*cap = false;
+		break;
+	}
+
+	dev_dbg(&pf->pdev->dev, "Get Capability cap=%d capval=0x%x\n",
+		capid, *cap);
+	return I40E_DCBNL_STATUS_SUCCESS;
+}
+
+/**
+ * i40e_dcbnl_getnumtcs - Get max number of traffic classes supported
+ * @netdev: the corresponding netdev
+ * @tcid: the TC id
+ * @num: total number of TCs supported by the device
+ *
+ * Return the total number of TCs supported by the adapter
+ **/
+static int i40e_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+		return -EINVAL;
+
+	*num = I40E_MAX_TRAFFIC_CLASS;
+	return 0;
+}
+
+/**
+ * i40e_dcbnl_setnumtcs - Set CEE number of traffic classes
+ * @netdev: the corresponding netdev
+ * @tcid: the TC id
+ * @num: total number of TCs
+ *
+ * Set the total number of TCs (Unsupported)
+ **/
+static int i40e_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num)
+{
+	return -EINVAL;
+}
+
+/**
+ * i40e_dcbnl_getpfcstate - Get CEE PFC mode
+ * @netdev: the corresponding netdev
+ *
+ * Get the current PFC enabled state
+ **/
+static u8 i40e_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	/* Return enabled if any PFC enabled UP */
+	if (pf->hw.local_dcbx_config.pfc.pfcenable)
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * i40e_dcbnl_setpfcstate - Set CEE PFC mode
+ * @netdev: the corresponding netdev
+ * @state: required state
+ *
+ * The PFC state to be set; this is enabled/disabled based on the PFC
+ * priority settings and not via this call for i40e driver
+ **/
+static void i40e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	dev_dbg(&pf->pdev->dev, "PFC State is modified via PFC config.\n");
+}
+
+/**
+ * i40e_dcbnl_getapp - Get CEE APP
+ * @netdev: the corresponding netdev
+ * @idtype: the App selector
+ * @id: the App ethtype or port number
+ *
+ * Return the CEE mode app for the given idtype and id
+ **/
+static int i40e_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+	struct dcb_app app = {
+				.selector = idtype,
+				.protocol = id,
+			     };
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE) ||
+	    (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED))
+		return -EINVAL;
+
+	return dcb_getapp(netdev, &app);
+}
+
+/**
+ * i40e_dcbnl_setdcbx - set required DCBx capability
+ * @netdev: the corresponding netdev
+ * @mode: new DCB mode managed or CEE+IEEE
+ *
+ * Set DCBx capability features
+ **/
+static u8 i40e_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(netdev);
+
+	/* Do not allow to set mode if managed by Firmware */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
+		return I40E_DCBNL_STATUS_ERROR;
+
+	/* No support for LLD_MANAGED modes or CEE+IEEE */
+	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+	    !(mode & DCB_CAP_DCBX_HOST))
+		return I40E_DCBNL_STATUS_ERROR;
+
+	/* Already set to the given mode no change */
+	if (mode == pf->dcbx_cap)
+		return I40E_DCBNL_STATUS_SUCCESS;
+
+	pf->dcbx_cap = mode;
+	if (mode & DCB_CAP_DCBX_VER_CEE)
+		pf->hw.local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_CEE;
+	else
+		pf->hw.local_dcbx_config.dcbx_mode = I40E_DCBX_MODE_IEEE;
+
+	dev_dbg(&pf->pdev->dev, "mode=%d\n", mode);
+	return I40E_DCBNL_STATUS_SUCCESS;
+}
+
+/**
+ * i40e_dcbnl_getdcbx - retrieve current DCBx capability
+ * @dev: the corresponding netdev
+ *
+ * Returns DCBx capability features
+ **/
+static u8 i40e_dcbnl_getdcbx(struct net_device *dev)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
+
+	return pf->dcbx_cap;
+}
+
+/**
+ * i40e_dcbnl_get_perm_hw_addr - MAC address used by DCBx
+ * @dev: the corresponding netdev
+ * @perm_addr: buffer to store the MAC address
+ *
+ * Returns the SAN MAC address used for LLDP exchange
+ **/
+static void i40e_dcbnl_get_perm_hw_addr(struct net_device *dev,
+					u8 *perm_addr)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
+	int i, j;
+
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+	for (i = 0; i < dev->addr_len; i++)
+		perm_addr[i] = pf->hw.mac.perm_addr[i];
+
+	for (j = 0; j < dev->addr_len; j++, i++)
+		perm_addr[i] = pf->hw.mac.san_addr[j];
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+	.ieee_getets	= i40e_dcbnl_ieee_getets,
+	.ieee_getpfc	= i40e_dcbnl_ieee_getpfc,
+	.getdcbx	= i40e_dcbnl_getdcbx,
+	.getpermhwaddr	= i40e_dcbnl_get_perm_hw_addr,
+	.ieee_setets	= i40e_dcbnl_ieee_setets,
+	.ieee_setpfc	= i40e_dcbnl_ieee_setpfc,
+	.ieee_setapp	= i40e_dcbnl_ieee_setapp,
+	.ieee_delapp	= i40e_dcbnl_ieee_delapp,
+	.getstate	= i40e_dcbnl_getstate,
+	.setstate	= i40e_dcbnl_setstate,
+	.setpgtccfgtx	= i40e_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx	= i40e_dcbnl_set_pg_bwg_cfg_tx,
+	.setpgtccfgrx	= i40e_dcbnl_set_pg_tc_cfg_rx,
+	.setpgbwgcfgrx	= i40e_dcbnl_set_pg_bwg_cfg_rx,
+	.getpgtccfgtx	= i40e_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx	= i40e_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx	= i40e_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx	= i40e_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg	= i40e_dcbnl_set_pfc_cfg,
+	.getpfccfg	= i40e_dcbnl_get_pfc_cfg,
+	.setall		= i40e_dcbnl_cee_set_all,
+	.getcap		= i40e_dcbnl_get_cap,
+	.getnumtcs	= i40e_dcbnl_getnumtcs,
+	.setnumtcs	= i40e_dcbnl_setnumtcs,
+	.getpfcstate	= i40e_dcbnl_getpfcstate,
+	.setpfcstate	= i40e_dcbnl_setpfcstate,
+	.getapp		= i40e_dcbnl_getapp,
+	.setdcbx	= i40e_dcbnl_setdcbx,
+};
+
+/**
+ * i40e_dcbnl_set_all - set all the apps and ieee data from DCBx config
+ * @vsi: the corresponding vsi
+ *
+ * Set up all the IEEE APPs in the DCBNL App Table and generate event for
+ * other settings
+ **/
+void i40e_dcbnl_set_all(struct i40e_vsi *vsi)
+{
+	struct net_device *dev = vsi->netdev;
+	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
+	struct i40e_dcbx_config *dcbxcfg;
+	struct i40e_hw *hw = &pf->hw;
+	struct dcb_app sapp;
+	u8 prio, tc_map;
+	int i;
+
+	/* SW DCB taken care by DCBNL set calls */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST)
+		return;
+
+	/* DCB not enabled */
+	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+		return;
+
+	/* MFP mode but not an iSCSI PF so return */
+	if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(hw->func_caps.iscsi))
+		return;
+
+	dcbxcfg = &hw->local_dcbx_config;
+
+	/* Set up all the App TLVs if DCBx is negotiated */
+	for (i = 0; i < dcbxcfg->numapps; i++) {
+		prio = dcbxcfg->app[i].priority;
+		tc_map = BIT(dcbxcfg->etscfg.prioritytable[prio]);
+
+		/* Add APP only if the TC is enabled for this VSI */
+		if (tc_map & vsi->tc_config.enabled_tc) {
+			sapp.selector = dcbxcfg->app[i].selector;
+			sapp.protocol = dcbxcfg->app[i].protocolid;
+			sapp.priority = prio;
+			dcb_ieee_setapp(dev, &sapp);
+		}
+	}
+
+	/* Notify user-space of the changes */
+	dcbnl_ieee_notify(dev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0);
+}
+
+/**
+ * i40e_dcbnl_vsi_del_app - Delete APP for given VSI
+ * @vsi: the corresponding vsi
+ * @app: APP to delete
+ *
+ * Delete given APP from the DCBNL APP table for given
+ * VSI
+ **/
+static int i40e_dcbnl_vsi_del_app(struct i40e_vsi *vsi,
+				  struct i40e_dcb_app_priority_table *app)
+{
+	struct net_device *dev = vsi->netdev;
+	struct dcb_app sapp;
+
+	if (!dev)
+		return -EINVAL;
+
+	sapp.selector = app->selector;
+	sapp.protocol = app->protocolid;
+	sapp.priority = app->priority;
+	return dcb_ieee_delapp(dev, &sapp);
+}
+
+/**
+ * i40e_dcbnl_del_app - Delete APP on all VSIs
+ * @pf: the corresponding PF
+ * @app: APP to delete
+ *
+ * Delete given APP from all the VSIs for given PF
+ **/
+static void i40e_dcbnl_del_app(struct i40e_pf *pf,
+			       struct i40e_dcb_app_priority_table *app)
+{
+	int v, err;
+
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
+		if (pf->vsi[v] && pf->vsi[v]->netdev) {
+			err = i40e_dcbnl_vsi_del_app(pf->vsi[v], app);
+			dev_dbg(&pf->pdev->dev, "Deleting app for VSI seid=%d err=%d sel=%d proto=0x%x prio=%d\n",
+				pf->vsi[v]->seid, err, app->selector,
+				app->protocolid, app->priority);
+		}
+	}
+}
+
+/**
+ * i40e_dcbnl_find_app - Search APP in given DCB config
+ * @cfg: DCBX configuration data
+ * @app: APP to search for
+ *
+ * Find given APP in the DCB configuration
+ **/
+static bool i40e_dcbnl_find_app(struct i40e_dcbx_config *cfg,
+				struct i40e_dcb_app_priority_table *app)
+{
+	int i;
+
+	for (i = 0; i < cfg->numapps; i++) {
+		if (app->selector == cfg->app[i].selector &&
+		    app->protocolid == cfg->app[i].protocolid &&
+		    app->priority == cfg->app[i].priority)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * i40e_dcbnl_flush_apps - Delete all removed APPs
+ * @pf: the corresponding PF
+ * @old_cfg: old DCBX configuration data
+ * @new_cfg: new DCBX configuration data
+ *
+ * Find and delete all APPs that are not present in the passed
+ * DCB configuration
+ **/
+void i40e_dcbnl_flush_apps(struct i40e_pf *pf,
+			   struct i40e_dcbx_config *old_cfg,
+			   struct i40e_dcbx_config *new_cfg)
+{
+	struct i40e_dcb_app_priority_table app;
+	int i;
+
+	/* MFP mode but not an iSCSI PF so return */
+	if ((pf->flags & I40E_FLAG_MFP_ENABLED) && !(pf->hw.func_caps.iscsi))
+		return;
+
+	for (i = 0; i < old_cfg->numapps; i++) {
+		app = old_cfg->app[i];
+		/* The APP is not available anymore delete it */
+		if (!i40e_dcbnl_find_app(new_cfg, &app))
+			i40e_dcbnl_del_app(pf, &app);
+	}
+}
+
+/**
+ * i40e_dcbnl_setup - DCBNL setup
+ * @vsi: the corresponding vsi
+ *
+ * Set up DCBNL ops and initial APP TLVs
+ **/
+void i40e_dcbnl_setup(struct i40e_vsi *vsi)
+{
+	struct net_device *dev = vsi->netdev;
+	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
+
+	/* Not DCB capable */
+	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+		return;
+
+	dev->dcbnl_ops = &dcbnl_ops;
+
+	/* Set initial IEEE DCB settings */
+	i40e_dcbnl_set_all(vsi);
+}
+#endif /* CONFIG_I40E_DCB */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ddp.c b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
new file mode 100644
index 0000000000..0e72abd178
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_ddp.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e.h"
+
+#include <linux/firmware.h>
+
+/**
+ * i40e_ddp_profiles_eq - checks if DDP profiles are the equivalent
+ * @a: new profile info
+ * @b: old profile info
+ *
+ * checks if DDP profiles are the equivalent.
+ * Returns true if profiles are the same.
+ **/
+static bool i40e_ddp_profiles_eq(struct i40e_profile_info *a,
+				 struct i40e_profile_info *b)
+{
+	return a->track_id == b->track_id &&
+		!memcmp(&a->version, &b->version, sizeof(a->version)) &&
+		!memcmp(&a->name, &b->name, I40E_DDP_NAME_SIZE);
+}
+
+/**
+ * i40e_ddp_does_profile_exist - checks if DDP profile loaded already
+ * @hw: HW data structure
+ * @pinfo: DDP profile information structure
+ *
+ * checks if DDP profile loaded already.
+ * Returns >0 if the profile exists.
+ * Returns  0 if the profile is absent.
+ * Returns <0 if error.
+ **/
+static int i40e_ddp_does_profile_exist(struct i40e_hw *hw,
+				       struct i40e_profile_info *pinfo)
+{
+	struct i40e_ddp_profile_list *profile_list;
+	u8 buff[I40E_PROFILE_LIST_SIZE];
+	int status;
+	int i;
+
+	status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+				      NULL);
+	if (status)
+		return -1;
+
+	profile_list = (struct i40e_ddp_profile_list *)buff;
+	for (i = 0; i < profile_list->p_count; i++) {
+		if (i40e_ddp_profiles_eq(pinfo, &profile_list->p_info[i]))
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * i40e_ddp_profiles_overlap - checks if DDP profiles overlap.
+ * @new: new profile info
+ * @old: old profile info
+ *
+ * checks if DDP profiles overlap.
+ * Returns true if profiles are overlap.
+ **/
+static bool i40e_ddp_profiles_overlap(struct i40e_profile_info *new,
+				      struct i40e_profile_info *old)
+{
+	unsigned int group_id_old = (u8)((old->track_id & 0x00FF0000) >> 16);
+	unsigned int group_id_new = (u8)((new->track_id & 0x00FF0000) >> 16);
+
+	/* 0x00 group must be only the first */
+	if (group_id_new == 0)
+		return true;
+	/* 0xFF group is compatible with anything else */
+	if (group_id_new == 0xFF || group_id_old == 0xFF)
+		return false;
+	/* otherwise only profiles from the same group are compatible*/
+	return group_id_old != group_id_new;
+}
+
+/**
+ * i40e_ddp_does_profile_overlap - checks if DDP overlaps with existing one.
+ * @hw: HW data structure
+ * @pinfo: DDP profile information structure
+ *
+ * checks if DDP profile overlaps with existing one.
+ * Returns >0 if the profile overlaps.
+ * Returns  0 if the profile is ok.
+ * Returns <0 if error.
+ **/
+static int i40e_ddp_does_profile_overlap(struct i40e_hw *hw,
+					 struct i40e_profile_info *pinfo)
+{
+	struct i40e_ddp_profile_list *profile_list;
+	u8 buff[I40E_PROFILE_LIST_SIZE];
+	int status;
+	int i;
+
+	status = i40e_aq_get_ddp_list(hw, buff, I40E_PROFILE_LIST_SIZE, 0,
+				      NULL);
+	if (status)
+		return -EIO;
+
+	profile_list = (struct i40e_ddp_profile_list *)buff;
+	for (i = 0; i < profile_list->p_count; i++) {
+		if (i40e_ddp_profiles_overlap(pinfo,
+					      &profile_list->p_info[i]))
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * i40e_add_pinfo
+ * @hw: pointer to the hardware structure
+ * @profile: pointer to the profile segment of the package
+ * @profile_info_sec: buffer for information section
+ * @track_id: package tracking id
+ *
+ * Register a profile to the list of loaded profiles.
+ */
+static int
+i40e_add_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+	       u8 *profile_info_sec, u32 track_id)
+{
+	struct i40e_profile_section_header *sec;
+	struct i40e_profile_info *pinfo;
+	u32 offset = 0, info = 0;
+	int status;
+
+	sec = (struct i40e_profile_section_header *)profile_info_sec;
+	sec->tbl_size = 1;
+	sec->data_end = sizeof(struct i40e_profile_section_header) +
+			sizeof(struct i40e_profile_info);
+	sec->section.type = SECTION_TYPE_INFO;
+	sec->section.offset = sizeof(struct i40e_profile_section_header);
+	sec->section.size = sizeof(struct i40e_profile_info);
+	pinfo = (struct i40e_profile_info *)(profile_info_sec +
+					     sec->section.offset);
+	pinfo->track_id = track_id;
+	pinfo->version = profile->version;
+	pinfo->op = I40E_DDP_ADD_TRACKID;
+
+	/* Clear reserved field */
+	memset(pinfo->reserved, 0, sizeof(pinfo->reserved));
+	memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
+
+	status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
+				   track_id, &offset, &info, NULL);
+	return status;
+}
+
+/**
+ * i40e_del_pinfo - delete DDP profile info from NIC
+ * @hw: HW data structure
+ * @profile: DDP profile segment to be deleted
+ * @profile_info_sec: DDP profile section header
+ * @track_id: track ID of the profile for deletion
+ *
+ * Removes DDP profile from the NIC.
+ **/
+static int
+i40e_del_pinfo(struct i40e_hw *hw, struct i40e_profile_segment *profile,
+	       u8 *profile_info_sec, u32 track_id)
+{
+	struct i40e_profile_section_header *sec;
+	struct i40e_profile_info *pinfo;
+	u32 offset = 0, info = 0;
+	int status;
+
+	sec = (struct i40e_profile_section_header *)profile_info_sec;
+	sec->tbl_size = 1;
+	sec->data_end = sizeof(struct i40e_profile_section_header) +
+			sizeof(struct i40e_profile_info);
+	sec->section.type = SECTION_TYPE_INFO;
+	sec->section.offset = sizeof(struct i40e_profile_section_header);
+	sec->section.size = sizeof(struct i40e_profile_info);
+	pinfo = (struct i40e_profile_info *)(profile_info_sec +
+					     sec->section.offset);
+	pinfo->track_id = track_id;
+	pinfo->version = profile->version;
+	pinfo->op = I40E_DDP_REMOVE_TRACKID;
+
+	/* Clear reserved field */
+	memset(pinfo->reserved, 0, sizeof(pinfo->reserved));
+	memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
+
+	status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
+				   track_id, &offset, &info, NULL);
+	return status;
+}
+
+/**
+ * i40e_ddp_is_pkg_hdr_valid - performs basic pkg header integrity checks
+ * @netdev: net device structure (for logging purposes)
+ * @pkg_hdr: pointer to package header
+ * @size_huge: size of the whole DDP profile package in size_t
+ *
+ * Checks correctness of pkg header: Version, size too big/small, and
+ * all segment offsets alignment and boundaries. This function lets
+ * reject non DDP profile file to be loaded by administrator mistake.
+ **/
+static bool i40e_ddp_is_pkg_hdr_valid(struct net_device *netdev,
+				      struct i40e_package_header *pkg_hdr,
+				      size_t size_huge)
+{
+	u32 size = 0xFFFFFFFFU & size_huge;
+	u32 pkg_hdr_size;
+	u32 segment;
+
+	if (!pkg_hdr)
+		return false;
+
+	if (pkg_hdr->version.major > 0) {
+		struct i40e_ddp_version ver = pkg_hdr->version;
+
+		netdev_err(netdev, "Unsupported DDP profile version %u.%u.%u.%u",
+			   ver.major, ver.minor, ver.update, ver.draft);
+		return false;
+	}
+	if (size_huge > size) {
+		netdev_err(netdev, "Invalid DDP profile - size is bigger than 4G");
+		return false;
+	}
+	if (size < (sizeof(struct i40e_package_header) + sizeof(u32) +
+		sizeof(struct i40e_metadata_segment) + sizeof(u32) * 2)) {
+		netdev_err(netdev, "Invalid DDP profile - size is too small.");
+		return false;
+	}
+
+	pkg_hdr_size = sizeof(u32) * (pkg_hdr->segment_count + 2U);
+	if (size < pkg_hdr_size) {
+		netdev_err(netdev, "Invalid DDP profile - too many segments");
+		return false;
+	}
+	for (segment = 0; segment < pkg_hdr->segment_count; ++segment) {
+		u32 offset = pkg_hdr->segment_offset[segment];
+
+		if (0xFU & offset) {
+			netdev_err(netdev,
+				   "Invalid DDP profile %u segment alignment",
+				   segment);
+			return false;
+		}
+		if (pkg_hdr_size > offset || offset >= size) {
+			netdev_err(netdev,
+				   "Invalid DDP profile %u segment offset",
+				   segment);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * i40e_ddp_load - performs DDP loading
+ * @netdev: net device structure
+ * @data: buffer containing recipe file
+ * @size: size of the buffer
+ * @is_add: true when loading profile, false when rolling back the previous one
+ *
+ * Checks correctness and loads DDP profile to the NIC. The function is
+ * also used for rolling back previously loaded profile.
+ **/
+int i40e_ddp_load(struct net_device *netdev, const u8 *data, size_t size,
+		  bool is_add)
+{
+	u8 profile_info_sec[sizeof(struct i40e_profile_section_header) +
+			    sizeof(struct i40e_profile_info)];
+	struct i40e_metadata_segment *metadata_hdr;
+	struct i40e_profile_segment *profile_hdr;
+	struct i40e_profile_info pinfo;
+	struct i40e_package_header *pkg_hdr;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	u32 track_id;
+	int istatus;
+	int status;
+
+	pkg_hdr = (struct i40e_package_header *)data;
+	if (!i40e_ddp_is_pkg_hdr_valid(netdev, pkg_hdr, size))
+		return -EINVAL;
+
+	if (size < (sizeof(struct i40e_package_header) + sizeof(u32) +
+		    sizeof(struct i40e_metadata_segment) + sizeof(u32) * 2)) {
+		netdev_err(netdev, "Invalid DDP recipe size.");
+		return -EINVAL;
+	}
+
+	/* Find beginning of segment data in buffer */
+	metadata_hdr = (struct i40e_metadata_segment *)
+		i40e_find_segment_in_package(SEGMENT_TYPE_METADATA, pkg_hdr);
+	if (!metadata_hdr) {
+		netdev_err(netdev, "Failed to find metadata segment in DDP recipe.");
+		return -EINVAL;
+	}
+
+	track_id = metadata_hdr->track_id;
+	profile_hdr = (struct i40e_profile_segment *)
+		i40e_find_segment_in_package(SEGMENT_TYPE_I40E, pkg_hdr);
+	if (!profile_hdr) {
+		netdev_err(netdev, "Failed to find profile segment in DDP recipe.");
+		return -EINVAL;
+	}
+
+	pinfo.track_id = track_id;
+	pinfo.version = profile_hdr->version;
+	if (is_add)
+		pinfo.op = I40E_DDP_ADD_TRACKID;
+	else
+		pinfo.op = I40E_DDP_REMOVE_TRACKID;
+
+	memcpy(pinfo.name, profile_hdr->name, I40E_DDP_NAME_SIZE);
+
+	/* Check if profile data already exists*/
+	istatus = i40e_ddp_does_profile_exist(&pf->hw, &pinfo);
+	if (istatus < 0) {
+		netdev_err(netdev, "Failed to fetch loaded profiles.");
+		return istatus;
+	}
+	if (is_add) {
+		if (istatus > 0) {
+			netdev_err(netdev, "DDP profile already loaded.");
+			return -EINVAL;
+		}
+		istatus = i40e_ddp_does_profile_overlap(&pf->hw, &pinfo);
+		if (istatus < 0) {
+			netdev_err(netdev, "Failed to fetch loaded profiles.");
+			return istatus;
+		}
+		if (istatus > 0) {
+			netdev_err(netdev, "DDP profile overlaps with existing one.");
+			return -EINVAL;
+		}
+	} else {
+		if (istatus == 0) {
+			netdev_err(netdev,
+				   "DDP profile for deletion does not exist.");
+			return -EINVAL;
+		}
+	}
+
+	/* Load profile data */
+	if (is_add) {
+		status = i40e_write_profile(&pf->hw, profile_hdr, track_id);
+		if (status) {
+			if (status == -ENODEV) {
+				netdev_err(netdev,
+					   "Profile is not supported by the device.");
+				return -EPERM;
+			}
+			netdev_err(netdev, "Failed to write DDP profile.");
+			return -EIO;
+		}
+	} else {
+		status = i40e_rollback_profile(&pf->hw, profile_hdr, track_id);
+		if (status) {
+			netdev_err(netdev, "Failed to remove DDP profile.");
+			return -EIO;
+		}
+	}
+
+	/* Add/remove profile to/from profile list in FW */
+	if (is_add) {
+		status = i40e_add_pinfo(&pf->hw, profile_hdr, profile_info_sec,
+					track_id);
+		if (status) {
+			netdev_err(netdev, "Failed to add DDP profile info.");
+			return -EIO;
+		}
+	} else {
+		status = i40e_del_pinfo(&pf->hw, profile_hdr, profile_info_sec,
+					track_id);
+		if (status) {
+			netdev_err(netdev, "Failed to restore DDP profile info.");
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_ddp_restore - restore previously loaded profile and remove from list
+ * @pf: PF data struct
+ *
+ * Restores previously loaded profile stored on the list in driver memory.
+ * After rolling back removes entry from the list.
+ **/
+static int i40e_ddp_restore(struct i40e_pf *pf)
+{
+	struct i40e_ddp_old_profile_list *entry;
+	struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev;
+	int status = 0;
+
+	if (!list_empty(&pf->ddp_old_prof)) {
+		entry = list_first_entry(&pf->ddp_old_prof,
+					 struct i40e_ddp_old_profile_list,
+					 list);
+		status = i40e_ddp_load(netdev, entry->old_ddp_buf,
+				       entry->old_ddp_size, false);
+		list_del(&entry->list);
+		kfree(entry);
+	}
+	return status;
+}
+
+/**
+ * i40e_ddp_flash - callback function for ethtool flash feature
+ * @netdev: net device structure
+ * @flash: kernel flash structure
+ *
+ * Ethtool callback function used for loading and unloading DDP profiles.
+ **/
+int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash)
+{
+	const struct firmware *ddp_config;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int status = 0;
+
+	/* Check for valid region first */
+	if (flash->region != I40_DDP_FLASH_REGION) {
+		netdev_err(netdev, "Requested firmware region is not recognized by this driver.");
+		return -EINVAL;
+	}
+	if (pf->hw.bus.func != 0) {
+		netdev_err(netdev, "Any DDP operation is allowed only on Phy0 NIC interface");
+		return -EINVAL;
+	}
+
+	/* If the user supplied "-" instead of file name rollback previously
+	 * stored profile.
+	 */
+	if (strncmp(flash->data, "-", 2) != 0) {
+		struct i40e_ddp_old_profile_list *list_entry;
+		char profile_name[sizeof(I40E_DDP_PROFILE_PATH)
+				  + I40E_DDP_PROFILE_NAME_MAX];
+
+		profile_name[sizeof(profile_name) - 1] = 0;
+		strncpy(profile_name, I40E_DDP_PROFILE_PATH,
+			sizeof(profile_name) - 1);
+		strncat(profile_name, flash->data, I40E_DDP_PROFILE_NAME_MAX);
+		/* Load DDP recipe. */
+		status = request_firmware(&ddp_config, profile_name,
+					  &netdev->dev);
+		if (status) {
+			netdev_err(netdev, "DDP recipe file request failed.");
+			return status;
+		}
+
+		status = i40e_ddp_load(netdev, ddp_config->data,
+				       ddp_config->size, true);
+
+		if (!status) {
+			list_entry =
+			  kzalloc(sizeof(struct i40e_ddp_old_profile_list) +
+				  ddp_config->size, GFP_KERNEL);
+			if (!list_entry) {
+				netdev_info(netdev, "Failed to allocate memory for previous DDP profile data.");
+				netdev_info(netdev, "New profile loaded but roll-back will be impossible.");
+			} else {
+				memcpy(list_entry->old_ddp_buf,
+				       ddp_config->data, ddp_config->size);
+				list_entry->old_ddp_size = ddp_config->size;
+				list_add(&list_entry->list, &pf->ddp_old_prof);
+			}
+		}
+
+		release_firmware(ddp_config);
+	} else {
+		if (!list_empty(&pf->ddp_old_prof)) {
+			status = i40e_ddp_restore(pf);
+		} else {
+			netdev_warn(netdev, "There is no DDP profile to restore.");
+			status = -ENOENT;
+		}
+	}
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
new file mode 100644
index 0000000000..1a497cb077
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -0,0 +1,1855 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+
+#include "i40e.h"
+
+static struct dentry *i40e_dbg_root;
+
+enum ring_type {
+	RING_TYPE_RX,
+	RING_TYPE_TX,
+	RING_TYPE_XDP
+};
+
+/**
+ * i40e_dbg_find_vsi - searches for the vsi with the given seid
+ * @pf: the PF structure to search for the vsi
+ * @seid: seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid)
+{
+	int i;
+
+	if (seid < 0)
+		dev_info(&pf->pdev->dev, "%d: bad seid\n", seid);
+	else
+		for (i = 0; i < pf->num_alloc_vsi; i++)
+			if (pf->vsi[i] && (pf->vsi[i]->seid == seid))
+				return pf->vsi[i];
+
+	return NULL;
+}
+
+/**
+ * i40e_dbg_find_veb - searches for the veb with the given seid
+ * @pf: the PF structure to search for the veb
+ * @seid: seid of the veb it is searching for
+ **/
+static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid)
+{
+	int i;
+
+	for (i = 0; i < I40E_MAX_VEB; i++)
+		if (pf->veb[i] && pf->veb[i]->seid == seid)
+			return pf->veb[i];
+	return NULL;
+}
+
+/**************************************************************
+ * command
+ * The command entry in debugfs is for giving the driver commands
+ * to be executed - these may be for changing the internal switch
+ * setup, adding or removing filters, or other things.  Many of
+ * these will be useful for some forms of unit testing.
+ **************************************************************/
+static char i40e_dbg_command_buf[256] = "";
+
+/**
+ * i40e_dbg_command_read - read for command datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer,
+				     size_t count, loff_t *ppos)
+{
+	struct i40e_pf *pf = filp->private_data;
+	int bytes_not_copied;
+	int buf_size = 256;
+	char *buf;
+	int len;
+
+	/* don't allow partial reads */
+	if (*ppos != 0)
+		return 0;
+	if (count < buf_size)
+		return -ENOSPC;
+
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOSPC;
+
+	len = snprintf(buf, buf_size, "%s: %s\n",
+		       pf->vsi[pf->lan_vsi]->netdev->name,
+		       i40e_dbg_command_buf);
+
+	bytes_not_copied = copy_to_user(buffer, buf, len);
+	kfree(buf);
+
+	if (bytes_not_copied)
+		return -EFAULT;
+
+	*ppos = len;
+	return len;
+}
+
+static char *i40e_filter_state_string[] = {
+	"INVALID",
+	"NEW",
+	"ACTIVE",
+	"FAILED",
+	"REMOVE",
+};
+
+/**
+ * i40e_dbg_dump_vsi_seid - handles dump vsi seid write into command datum
+ * @pf: the i40e_pf created in command write
+ * @seid: the seid the user put in
+ **/
+static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
+{
+	struct rtnl_link_stats64 *nstat;
+	struct i40e_mac_filter *f;
+	struct i40e_vsi *vsi;
+	int i, bkt;
+
+	vsi = i40e_dbg_find_vsi(pf, seid);
+	if (!vsi) {
+		dev_info(&pf->pdev->dev,
+			 "dump %d: seid not found\n", seid);
+		return;
+	}
+	dev_info(&pf->pdev->dev, "vsi seid %d\n", seid);
+	if (vsi->netdev) {
+		struct net_device *nd = vsi->netdev;
+
+		dev_info(&pf->pdev->dev, "    netdev: name = %s, state = %lu, flags = 0x%08x\n",
+			 nd->name, nd->state, nd->flags);
+		dev_info(&pf->pdev->dev, "        features      = 0x%08lx\n",
+			 (unsigned long int)nd->features);
+		dev_info(&pf->pdev->dev, "        hw_features   = 0x%08lx\n",
+			 (unsigned long int)nd->hw_features);
+		dev_info(&pf->pdev->dev, "        vlan_features = 0x%08lx\n",
+			 (unsigned long int)nd->vlan_features);
+	}
+	dev_info(&pf->pdev->dev,
+		 "    flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
+		 vsi->flags, vsi->netdev_registered, vsi->current_netdev_flags);
+	for (i = 0; i < BITS_TO_LONGS(__I40E_VSI_STATE_SIZE__); i++)
+		dev_info(&pf->pdev->dev,
+			 "    state[%d] = %08lx\n",
+			 i, vsi->state[i]);
+	if (vsi == pf->vsi[pf->lan_vsi])
+		dev_info(&pf->pdev->dev, "    MAC address: %pM SAN MAC: %pM Port MAC: %pM\n",
+			 pf->hw.mac.addr,
+			 pf->hw.mac.san_addr,
+			 pf->hw.mac.port_addr);
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
+		dev_info(&pf->pdev->dev,
+			 "    mac_filter_hash: %pM vid=%d, state %s\n",
+			 f->macaddr, f->vlan,
+			 i40e_filter_state_string[f->state]);
+	}
+	dev_info(&pf->pdev->dev, "    active_filters %u, promisc_threshold %u, overflow promisc %s\n",
+		 vsi->active_filters, vsi->promisc_threshold,
+		 (test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state) ?
+		  "ON" : "OFF"));
+	nstat = i40e_get_vsi_stats_struct(vsi);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: rx_packets = %lu, rx_bytes = %lu, rx_errors = %lu, rx_dropped = %lu\n",
+		 (unsigned long int)nstat->rx_packets,
+		 (unsigned long int)nstat->rx_bytes,
+		 (unsigned long int)nstat->rx_errors,
+		 (unsigned long int)nstat->rx_dropped);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: tx_packets = %lu, tx_bytes = %lu, tx_errors = %lu, tx_dropped = %lu\n",
+		 (unsigned long int)nstat->tx_packets,
+		 (unsigned long int)nstat->tx_bytes,
+		 (unsigned long int)nstat->tx_errors,
+		 (unsigned long int)nstat->tx_dropped);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: multicast = %lu, collisions = %lu\n",
+		 (unsigned long int)nstat->multicast,
+		 (unsigned long int)nstat->collisions);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: rx_length_errors = %lu, rx_over_errors = %lu, rx_crc_errors = %lu\n",
+		 (unsigned long int)nstat->rx_length_errors,
+		 (unsigned long int)nstat->rx_over_errors,
+		 (unsigned long int)nstat->rx_crc_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: rx_frame_errors = %lu, rx_fifo_errors = %lu, rx_missed_errors = %lu\n",
+		 (unsigned long int)nstat->rx_frame_errors,
+		 (unsigned long int)nstat->rx_fifo_errors,
+		 (unsigned long int)nstat->rx_missed_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: tx_aborted_errors = %lu, tx_carrier_errors = %lu, tx_fifo_errors = %lu\n",
+		 (unsigned long int)nstat->tx_aborted_errors,
+		 (unsigned long int)nstat->tx_carrier_errors,
+		 (unsigned long int)nstat->tx_fifo_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: tx_heartbeat_errors = %lu, tx_window_errors = %lu\n",
+		 (unsigned long int)nstat->tx_heartbeat_errors,
+		 (unsigned long int)nstat->tx_window_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats: rx_compressed = %lu, tx_compressed = %lu\n",
+		 (unsigned long int)nstat->rx_compressed,
+		 (unsigned long int)nstat->tx_compressed);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: rx_packets = %lu, rx_bytes = %lu, rx_errors = %lu, rx_dropped = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.rx_packets,
+		 (unsigned long int)vsi->net_stats_offsets.rx_bytes,
+		 (unsigned long int)vsi->net_stats_offsets.rx_errors,
+		 (unsigned long int)vsi->net_stats_offsets.rx_dropped);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: tx_packets = %lu, tx_bytes = %lu, tx_errors = %lu, tx_dropped = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.tx_packets,
+		 (unsigned long int)vsi->net_stats_offsets.tx_bytes,
+		 (unsigned long int)vsi->net_stats_offsets.tx_errors,
+		 (unsigned long int)vsi->net_stats_offsets.tx_dropped);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: multicast = %lu, collisions = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.multicast,
+		 (unsigned long int)vsi->net_stats_offsets.collisions);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: rx_length_errors = %lu, rx_over_errors = %lu, rx_crc_errors = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.rx_length_errors,
+		 (unsigned long int)vsi->net_stats_offsets.rx_over_errors,
+		 (unsigned long int)vsi->net_stats_offsets.rx_crc_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: rx_frame_errors = %lu, rx_fifo_errors = %lu, rx_missed_errors = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.rx_frame_errors,
+		 (unsigned long int)vsi->net_stats_offsets.rx_fifo_errors,
+		 (unsigned long int)vsi->net_stats_offsets.rx_missed_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: tx_aborted_errors = %lu, tx_carrier_errors = %lu, tx_fifo_errors = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.tx_aborted_errors,
+		 (unsigned long int)vsi->net_stats_offsets.tx_carrier_errors,
+		 (unsigned long int)vsi->net_stats_offsets.tx_fifo_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: tx_heartbeat_errors = %lu, tx_window_errors = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.tx_heartbeat_errors,
+		 (unsigned long int)vsi->net_stats_offsets.tx_window_errors);
+	dev_info(&pf->pdev->dev,
+		 "    net_stats_offsets: rx_compressed = %lu, tx_compressed = %lu\n",
+		 (unsigned long int)vsi->net_stats_offsets.rx_compressed,
+		 (unsigned long int)vsi->net_stats_offsets.tx_compressed);
+	dev_info(&pf->pdev->dev,
+		 "    tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n",
+		 vsi->tx_restart, vsi->tx_busy,
+		 vsi->rx_buf_failed, vsi->rx_page_failed);
+	rcu_read_lock();
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		struct i40e_ring *rx_ring = READ_ONCE(vsi->rx_rings[i]);
+
+		if (!rx_ring)
+			continue;
+
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+			 i, *rx_ring->state,
+			 rx_ring->queue_index,
+			 rx_ring->reg_idx);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: rx_buf_len = %d\n",
+			 i, rx_ring->rx_buf_len);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+			 i,
+			 rx_ring->next_to_use,
+			 rx_ring->next_to_clean,
+			 rx_ring->ring_active);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n",
+			 i, rx_ring->stats.packets,
+			 rx_ring->stats.bytes,
+			 rx_ring->rx_stats.non_eop_descs);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: rx_stats: alloc_page_failed = %lld, alloc_buff_failed = %lld\n",
+			 i,
+			 rx_ring->rx_stats.alloc_page_failed,
+			 rx_ring->rx_stats.alloc_buff_failed);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: rx_stats: realloc_count = 0, page_reuse_count = %lld\n",
+			 i,
+			 rx_ring->rx_stats.page_reuse_count);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: size = %i\n",
+			 i, rx_ring->size);
+		dev_info(&pf->pdev->dev,
+			 "    rx_rings[%i]: itr_setting = %d (%s)\n",
+			 i, rx_ring->itr_setting,
+			 ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed");
+	}
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]);
+
+		if (!tx_ring)
+			continue;
+
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+			 i, *tx_ring->state,
+			 tx_ring->queue_index,
+			 tx_ring->reg_idx);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+			 i,
+			 tx_ring->next_to_use,
+			 tx_ring->next_to_clean,
+			 tx_ring->ring_active);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n",
+			 i, tx_ring->stats.packets,
+			 tx_ring->stats.bytes,
+			 tx_ring->tx_stats.restart_queue);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld, tx_stopped = %lld\n",
+			 i,
+			 tx_ring->tx_stats.tx_busy,
+			 tx_ring->tx_stats.tx_done_old,
+			 tx_ring->tx_stats.tx_stopped);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: size = %i\n",
+			 i, tx_ring->size);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: DCB tc = %d\n",
+			 i, tx_ring->dcb_tc);
+		dev_info(&pf->pdev->dev,
+			 "    tx_rings[%i]: itr_setting = %d (%s)\n",
+			 i, tx_ring->itr_setting,
+			 ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed");
+	}
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			struct i40e_ring *xdp_ring = READ_ONCE(vsi->xdp_rings[i]);
+
+			if (!xdp_ring)
+				continue;
+
+			dev_info(&pf->pdev->dev,
+				 "    xdp_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+				 i, *xdp_ring->state,
+				 xdp_ring->queue_index,
+				 xdp_ring->reg_idx);
+			dev_info(&pf->pdev->dev,
+				 "    xdp_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+				 i,
+				 xdp_ring->next_to_use,
+				 xdp_ring->next_to_clean,
+				 xdp_ring->ring_active);
+			dev_info(&pf->pdev->dev,
+				 "    xdp_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n",
+				 i, xdp_ring->stats.packets,
+				 xdp_ring->stats.bytes,
+				 xdp_ring->tx_stats.restart_queue);
+			dev_info(&pf->pdev->dev,
+				 "    xdp_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n",
+				 i,
+				 xdp_ring->tx_stats.tx_busy,
+				 xdp_ring->tx_stats.tx_done_old);
+			dev_info(&pf->pdev->dev,
+				 "    xdp_rings[%i]: size = %i\n",
+				 i, xdp_ring->size);
+			dev_info(&pf->pdev->dev,
+				 "    xdp_rings[%i]: DCB tc = %d\n",
+				 i, xdp_ring->dcb_tc);
+			dev_info(&pf->pdev->dev,
+				 "    xdp_rings[%i]: itr_setting = %d (%s)\n",
+				 i, xdp_ring->itr_setting,
+				 ITR_IS_DYNAMIC(xdp_ring->itr_setting) ?
+				 "dynamic" : "fixed");
+		}
+	}
+	rcu_read_unlock();
+	dev_info(&pf->pdev->dev,
+		 "    work_limit = %d\n",
+		 vsi->work_limit);
+	dev_info(&pf->pdev->dev,
+		 "    max_frame = %d, rx_buf_len = %d dtype = %d\n",
+		 vsi->max_frame, vsi->rx_buf_len, 0);
+	dev_info(&pf->pdev->dev,
+		 "    num_q_vectors = %i, base_vector = %i\n",
+		 vsi->num_q_vectors, vsi->base_vector);
+	dev_info(&pf->pdev->dev,
+		 "    seid = %d, id = %d, uplink_seid = %d\n",
+		 vsi->seid, vsi->id, vsi->uplink_seid);
+	dev_info(&pf->pdev->dev,
+		 "    base_queue = %d, num_queue_pairs = %d, num_tx_desc = %d, num_rx_desc = %d\n",
+		 vsi->base_queue, vsi->num_queue_pairs, vsi->num_tx_desc,
+		 vsi->num_rx_desc);
+	dev_info(&pf->pdev->dev, "    type = %i\n", vsi->type);
+	if (vsi->type == I40E_VSI_SRIOV)
+		dev_info(&pf->pdev->dev, "    VF ID = %i\n", vsi->vf_id);
+	dev_info(&pf->pdev->dev,
+		 "    info: valid_sections = 0x%04x, switch_id = 0x%04x\n",
+		 vsi->info.valid_sections, vsi->info.switch_id);
+	dev_info(&pf->pdev->dev,
+		 "    info: sw_reserved[] = 0x%02x 0x%02x\n",
+		 vsi->info.sw_reserved[0], vsi->info.sw_reserved[1]);
+	dev_info(&pf->pdev->dev,
+		 "    info: sec_flags = 0x%02x, sec_reserved = 0x%02x\n",
+		 vsi->info.sec_flags, vsi->info.sec_reserved);
+	dev_info(&pf->pdev->dev,
+		 "    info: pvid = 0x%04x, fcoe_pvid = 0x%04x, port_vlan_flags = 0x%02x\n",
+		 vsi->info.pvid, vsi->info.fcoe_pvid,
+		 vsi->info.port_vlan_flags);
+	dev_info(&pf->pdev->dev,
+		 "    info: pvlan_reserved[] = 0x%02x 0x%02x 0x%02x\n",
+		 vsi->info.pvlan_reserved[0], vsi->info.pvlan_reserved[1],
+		 vsi->info.pvlan_reserved[2]);
+	dev_info(&pf->pdev->dev,
+		 "    info: ingress_table = 0x%08x, egress_table = 0x%08x\n",
+		 vsi->info.ingress_table, vsi->info.egress_table);
+	dev_info(&pf->pdev->dev,
+		 "    info: cas_pv_stag = 0x%04x, cas_pv_flags= 0x%02x, cas_pv_reserved = 0x%02x\n",
+		 vsi->info.cas_pv_tag, vsi->info.cas_pv_flags,
+		 vsi->info.cas_pv_reserved);
+	dev_info(&pf->pdev->dev,
+		 "    info: queue_mapping[0..7 ] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+		 vsi->info.queue_mapping[0], vsi->info.queue_mapping[1],
+		 vsi->info.queue_mapping[2], vsi->info.queue_mapping[3],
+		 vsi->info.queue_mapping[4], vsi->info.queue_mapping[5],
+		 vsi->info.queue_mapping[6], vsi->info.queue_mapping[7]);
+	dev_info(&pf->pdev->dev,
+		 "    info: queue_mapping[8..15] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+		 vsi->info.queue_mapping[8], vsi->info.queue_mapping[9],
+		 vsi->info.queue_mapping[10], vsi->info.queue_mapping[11],
+		 vsi->info.queue_mapping[12], vsi->info.queue_mapping[13],
+		 vsi->info.queue_mapping[14], vsi->info.queue_mapping[15]);
+	dev_info(&pf->pdev->dev,
+		 "    info: tc_mapping[] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+		 vsi->info.tc_mapping[0], vsi->info.tc_mapping[1],
+		 vsi->info.tc_mapping[2], vsi->info.tc_mapping[3],
+		 vsi->info.tc_mapping[4], vsi->info.tc_mapping[5],
+		 vsi->info.tc_mapping[6], vsi->info.tc_mapping[7]);
+	dev_info(&pf->pdev->dev,
+		 "    info: queueing_opt_flags = 0x%02x  queueing_opt_reserved[0..2] = 0x%02x 0x%02x 0x%02x\n",
+		 vsi->info.queueing_opt_flags,
+		 vsi->info.queueing_opt_reserved[0],
+		 vsi->info.queueing_opt_reserved[1],
+		 vsi->info.queueing_opt_reserved[2]);
+	dev_info(&pf->pdev->dev,
+		 "    info: up_enable_bits = 0x%02x\n",
+		 vsi->info.up_enable_bits);
+	dev_info(&pf->pdev->dev,
+		 "    info: sched_reserved = 0x%02x, outer_up_table = 0x%04x\n",
+		 vsi->info.sched_reserved, vsi->info.outer_up_table);
+	dev_info(&pf->pdev->dev,
+		 "    info: cmd_reserved[] = 0x%02x 0x%02x 0x%02x 0x0%02x 0x%02x 0x%02x 0x%02x 0x0%02x\n",
+		 vsi->info.cmd_reserved[0], vsi->info.cmd_reserved[1],
+		 vsi->info.cmd_reserved[2], vsi->info.cmd_reserved[3],
+		 vsi->info.cmd_reserved[4], vsi->info.cmd_reserved[5],
+		 vsi->info.cmd_reserved[6], vsi->info.cmd_reserved[7]);
+	dev_info(&pf->pdev->dev,
+		 "    info: qs_handle[] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+		 vsi->info.qs_handle[0], vsi->info.qs_handle[1],
+		 vsi->info.qs_handle[2], vsi->info.qs_handle[3],
+		 vsi->info.qs_handle[4], vsi->info.qs_handle[5],
+		 vsi->info.qs_handle[6], vsi->info.qs_handle[7]);
+	dev_info(&pf->pdev->dev,
+		 "    info: stat_counter_idx = 0x%04x, sched_id = 0x%04x\n",
+		 vsi->info.stat_counter_idx, vsi->info.sched_id);
+	dev_info(&pf->pdev->dev,
+		 "    info: resp_reserved[] = 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",
+		 vsi->info.resp_reserved[0], vsi->info.resp_reserved[1],
+		 vsi->info.resp_reserved[2], vsi->info.resp_reserved[3],
+		 vsi->info.resp_reserved[4], vsi->info.resp_reserved[5],
+		 vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
+		 vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
+		 vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
+	dev_info(&pf->pdev->dev, "    idx = %d\n", vsi->idx);
+	dev_info(&pf->pdev->dev,
+		 "    tc_config: numtc = %d, enabled_tc = 0x%x\n",
+		 vsi->tc_config.numtc, vsi->tc_config.enabled_tc);
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		dev_info(&pf->pdev->dev,
+			 "    tc_config: tc = %d, qoffset = %d, qcount = %d, netdev_tc = %d\n",
+			 i, vsi->tc_config.tc_info[i].qoffset,
+			 vsi->tc_config.tc_info[i].qcount,
+			 vsi->tc_config.tc_info[i].netdev_tc);
+	}
+	dev_info(&pf->pdev->dev,
+		 "    bw: bw_limit = %d, bw_max_quanta = %d\n",
+		 vsi->bw_limit, vsi->bw_max_quanta);
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		dev_info(&pf->pdev->dev,
+			 "    bw[%d]: ets_share_credits = %d, ets_limit_credits = %d, max_quanta = %d\n",
+			 i, vsi->bw_ets_share_credits[i],
+			 vsi->bw_ets_limit_credits[i],
+			 vsi->bw_ets_max_quanta[i]);
+	}
+}
+
+/**
+ * i40e_dbg_dump_aq_desc - handles dump aq_desc write into command datum
+ * @pf: the i40e_pf created in command write
+ **/
+static void i40e_dbg_dump_aq_desc(struct i40e_pf *pf)
+{
+	struct i40e_adminq_ring *ring;
+	struct i40e_hw *hw = &pf->hw;
+	char hdr[32];
+	int i;
+
+	snprintf(hdr, sizeof(hdr), "%s %s:         ",
+		 dev_driver_string(&pf->pdev->dev),
+		 dev_name(&pf->pdev->dev));
+
+	/* first the send (command) ring, then the receive (event) ring */
+	dev_info(&pf->pdev->dev, "AdminQ Tx Ring\n");
+	ring = &(hw->aq.asq);
+	for (i = 0; i < ring->count; i++) {
+		struct i40e_aq_desc *d = I40E_ADMINQ_DESC(*ring, i);
+
+		dev_info(&pf->pdev->dev,
+			 "   at[%02d] flags=0x%04x op=0x%04x dlen=0x%04x ret=0x%04x cookie_h=0x%08x cookie_l=0x%08x\n",
+			 i, d->flags, d->opcode, d->datalen, d->retval,
+			 d->cookie_high, d->cookie_low);
+		print_hex_dump(KERN_INFO, hdr, DUMP_PREFIX_NONE,
+			       16, 1, d->params.raw, 16, 0);
+	}
+
+	dev_info(&pf->pdev->dev, "AdminQ Rx Ring\n");
+	ring = &(hw->aq.arq);
+	for (i = 0; i < ring->count; i++) {
+		struct i40e_aq_desc *d = I40E_ADMINQ_DESC(*ring, i);
+
+		dev_info(&pf->pdev->dev,
+			 "   ar[%02d] flags=0x%04x op=0x%04x dlen=0x%04x ret=0x%04x cookie_h=0x%08x cookie_l=0x%08x\n",
+			 i, d->flags, d->opcode, d->datalen, d->retval,
+			 d->cookie_high, d->cookie_low);
+		print_hex_dump(KERN_INFO, hdr, DUMP_PREFIX_NONE,
+			       16, 1, d->params.raw, 16, 0);
+	}
+}
+
+/**
+ * i40e_dbg_dump_desc - handles dump desc write into command datum
+ * @cnt: number of arguments that the user supplied
+ * @vsi_seid: vsi id entered by user
+ * @ring_id: ring id entered by user
+ * @desc_n: descriptor number entered by user
+ * @pf: the i40e_pf created in command write
+ * @type: enum describing whether ring is RX, TX or XDP
+ **/
+static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
+			       struct i40e_pf *pf, enum ring_type type)
+{
+	bool is_rx_ring = type == RING_TYPE_RX;
+	struct i40e_tx_desc *txd;
+	union i40e_rx_desc *rxd;
+	struct i40e_ring *ring;
+	struct i40e_vsi *vsi;
+	int i;
+
+	vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+	if (!vsi) {
+		dev_info(&pf->pdev->dev, "vsi %d not found\n", vsi_seid);
+		return;
+	}
+	if (vsi->type != I40E_VSI_MAIN &&
+	    vsi->type != I40E_VSI_FDIR &&
+	    vsi->type != I40E_VSI_VMDQ2) {
+		dev_info(&pf->pdev->dev,
+			 "vsi %d type %d descriptor rings not available\n",
+			 vsi_seid, vsi->type);
+		return;
+	}
+	if (type == RING_TYPE_XDP && !i40e_enabled_xdp_vsi(vsi)) {
+		dev_info(&pf->pdev->dev, "XDP not enabled on VSI %d\n", vsi_seid);
+		return;
+	}
+	if (ring_id >= vsi->num_queue_pairs || ring_id < 0) {
+		dev_info(&pf->pdev->dev, "ring %d not found\n", ring_id);
+		return;
+	}
+	if (!vsi->tx_rings || !vsi->tx_rings[0]->desc) {
+		dev_info(&pf->pdev->dev,
+			 "descriptor rings have not been allocated for vsi %d\n",
+			 vsi_seid);
+		return;
+	}
+
+	switch (type) {
+	case RING_TYPE_RX:
+		ring = kmemdup(vsi->rx_rings[ring_id], sizeof(*ring), GFP_KERNEL);
+		break;
+	case RING_TYPE_TX:
+		ring = kmemdup(vsi->tx_rings[ring_id], sizeof(*ring), GFP_KERNEL);
+		break;
+	case RING_TYPE_XDP:
+		ring = kmemdup(vsi->xdp_rings[ring_id], sizeof(*ring), GFP_KERNEL);
+		break;
+	default:
+		ring = NULL;
+		break;
+	}
+	if (!ring)
+		return;
+
+	if (cnt == 2) {
+		switch (type) {
+		case RING_TYPE_RX:
+			dev_info(&pf->pdev->dev, "VSI = %02i Rx ring = %02i\n", vsi_seid, ring_id);
+			break;
+		case RING_TYPE_TX:
+			dev_info(&pf->pdev->dev, "VSI = %02i Tx ring = %02i\n", vsi_seid, ring_id);
+			break;
+		case RING_TYPE_XDP:
+			dev_info(&pf->pdev->dev, "VSI = %02i XDP ring = %02i\n", vsi_seid, ring_id);
+			break;
+		}
+		for (i = 0; i < ring->count; i++) {
+			if (!is_rx_ring) {
+				txd = I40E_TX_DESC(ring, i);
+				dev_info(&pf->pdev->dev,
+					 "   d[%03x] = 0x%016llx 0x%016llx\n",
+					 i, txd->buffer_addr,
+					 txd->cmd_type_offset_bsz);
+			} else {
+				rxd = I40E_RX_DESC(ring, i);
+				dev_info(&pf->pdev->dev,
+					 "   d[%03x] = 0x%016llx 0x%016llx\n",
+					 i, rxd->read.pkt_addr,
+					 rxd->read.hdr_addr);
+			}
+		}
+	} else if (cnt == 3) {
+		if (desc_n >= ring->count || desc_n < 0) {
+			dev_info(&pf->pdev->dev,
+				 "descriptor %d not found\n", desc_n);
+			goto out;
+		}
+		if (!is_rx_ring) {
+			txd = I40E_TX_DESC(ring, desc_n);
+			dev_info(&pf->pdev->dev,
+				 "vsi = %02i tx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n",
+				 vsi_seid, ring_id, desc_n,
+				 txd->buffer_addr, txd->cmd_type_offset_bsz);
+		} else {
+			rxd = I40E_RX_DESC(ring, desc_n);
+			dev_info(&pf->pdev->dev,
+				 "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n",
+				 vsi_seid, ring_id, desc_n,
+				 rxd->read.pkt_addr, rxd->read.hdr_addr);
+		}
+	} else {
+		dev_info(&pf->pdev->dev, "dump desc rx/tx/xdp <vsi_seid> <ring_id> [<desc_n>]\n");
+	}
+
+out:
+	kfree(ring);
+}
+
+/**
+ * i40e_dbg_dump_vsi_no_seid - handles dump vsi write into command datum
+ * @pf: the i40e_pf created in command write
+ **/
+static void i40e_dbg_dump_vsi_no_seid(struct i40e_pf *pf)
+{
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vsi; i++)
+		if (pf->vsi[i])
+			dev_info(&pf->pdev->dev, "dump vsi[%d]: %d\n",
+				 i, pf->vsi[i]->seid);
+}
+
+/**
+ * i40e_dbg_dump_eth_stats - handles dump stats write into command datum
+ * @pf: the i40e_pf created in command write
+ * @estats: the eth stats structure to be dumped
+ **/
+static void i40e_dbg_dump_eth_stats(struct i40e_pf *pf,
+				    struct i40e_eth_stats *estats)
+{
+	dev_info(&pf->pdev->dev, "  ethstats:\n");
+	dev_info(&pf->pdev->dev,
+		 "    rx_bytes = \t%lld \trx_unicast = \t\t%lld \trx_multicast = \t%lld\n",
+		estats->rx_bytes, estats->rx_unicast, estats->rx_multicast);
+	dev_info(&pf->pdev->dev,
+		 "    rx_broadcast = \t%lld \trx_discards = \t\t%lld\n",
+		 estats->rx_broadcast, estats->rx_discards);
+	dev_info(&pf->pdev->dev,
+		 "    rx_unknown_protocol = \t%lld \ttx_bytes = \t%lld\n",
+		 estats->rx_unknown_protocol, estats->tx_bytes);
+	dev_info(&pf->pdev->dev,
+		 "    tx_unicast = \t%lld \ttx_multicast = \t\t%lld \ttx_broadcast = \t%lld\n",
+		 estats->tx_unicast, estats->tx_multicast, estats->tx_broadcast);
+	dev_info(&pf->pdev->dev,
+		 "    tx_discards = \t%lld \ttx_errors = \t\t%lld\n",
+		 estats->tx_discards, estats->tx_errors);
+}
+
+/**
+ * i40e_dbg_dump_veb_seid - handles dump stats of a single given veb
+ * @pf: the i40e_pf created in command write
+ * @seid: the seid the user put in
+ **/
+static void i40e_dbg_dump_veb_seid(struct i40e_pf *pf, int seid)
+{
+	struct i40e_veb *veb;
+
+	veb = i40e_dbg_find_veb(pf, seid);
+	if (!veb) {
+		dev_info(&pf->pdev->dev, "can't find veb %d\n", seid);
+		return;
+	}
+	dev_info(&pf->pdev->dev,
+		 "veb idx=%d,%d stats_ic=%d  seid=%d uplink=%d mode=%s\n",
+		 veb->idx, veb->veb_idx, veb->stats_idx, veb->seid,
+		 veb->uplink_seid,
+		 veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+	i40e_dbg_dump_eth_stats(pf, &veb->stats);
+}
+
+/**
+ * i40e_dbg_dump_veb_all - dumps all known veb's stats
+ * @pf: the i40e_pf created in command write
+ **/
+static void i40e_dbg_dump_veb_all(struct i40e_pf *pf)
+{
+	struct i40e_veb *veb;
+	int i;
+
+	for (i = 0; i < I40E_MAX_VEB; i++) {
+		veb = pf->veb[i];
+		if (veb)
+			i40e_dbg_dump_veb_seid(pf, veb->seid);
+	}
+}
+
+/**
+ * i40e_dbg_dump_vf - dump VF info
+ * @pf: the i40e_pf created in command write
+ * @vf_id: the vf_id from the user
+ **/
+static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
+{
+	struct i40e_vf *vf;
+	struct i40e_vsi *vsi;
+
+	if (!pf->num_alloc_vfs) {
+		dev_info(&pf->pdev->dev, "no VFs allocated\n");
+	} else if ((vf_id >= 0) && (vf_id < pf->num_alloc_vfs)) {
+		vf = &pf->vf[vf_id];
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
+			 vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
+		dev_info(&pf->pdev->dev, "       num MDD=%lld\n",
+			 vf->num_mdd_events);
+	} else {
+		dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
+	}
+}
+
+/**
+ * i40e_dbg_dump_vf_all - dump VF info for all VFs
+ * @pf: the i40e_pf created in command write
+ **/
+static void i40e_dbg_dump_vf_all(struct i40e_pf *pf)
+{
+	int i;
+
+	if (!pf->num_alloc_vfs)
+		dev_info(&pf->pdev->dev, "no VFs enabled!\n");
+	else
+		for (i = 0; i < pf->num_alloc_vfs; i++)
+			i40e_dbg_dump_vf(pf, i);
+}
+
+/**
+ * i40e_dbg_command_write - write into command datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_command_write(struct file *filp,
+				      const char __user *buffer,
+				      size_t count, loff_t *ppos)
+{
+	struct i40e_pf *pf = filp->private_data;
+	char *cmd_buf, *cmd_buf_tmp;
+	int bytes_not_copied;
+	struct i40e_vsi *vsi;
+	int vsi_seid;
+	int veb_seid;
+	int vf_id;
+	int cnt;
+
+	/* don't allow partial writes */
+	if (*ppos != 0)
+		return 0;
+
+	cmd_buf = kzalloc(count + 1, GFP_KERNEL);
+	if (!cmd_buf)
+		return count;
+	bytes_not_copied = copy_from_user(cmd_buf, buffer, count);
+	if (bytes_not_copied) {
+		kfree(cmd_buf);
+		return -EFAULT;
+	}
+	cmd_buf[count] = '\0';
+
+	cmd_buf_tmp = strchr(cmd_buf, '\n');
+	if (cmd_buf_tmp) {
+		*cmd_buf_tmp = '\0';
+		count = cmd_buf_tmp - cmd_buf + 1;
+	}
+
+	if (strncmp(cmd_buf, "add vsi", 7) == 0) {
+		vsi_seid = -1;
+		cnt = sscanf(&cmd_buf[7], "%i", &vsi_seid);
+		if (cnt == 0) {
+			/* default to PF VSI */
+			vsi_seid = pf->vsi[pf->lan_vsi]->seid;
+		} else if (vsi_seid < 0) {
+			dev_info(&pf->pdev->dev, "add VSI %d: bad vsi seid\n",
+				 vsi_seid);
+			goto command_write_done;
+		}
+
+		/* By default we are in VEPA mode, if this is the first VF/VMDq
+		 * VSI to be added switch to VEB mode.
+		 */
+		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+			i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
+		}
+
+		vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
+		if (vsi)
+			dev_info(&pf->pdev->dev, "added VSI %d to relay %d\n",
+				 vsi->seid, vsi->uplink_seid);
+		else
+			dev_info(&pf->pdev->dev, "'%s' failed\n", cmd_buf);
+
+	} else if (strncmp(cmd_buf, "del vsi", 7) == 0) {
+		cnt = sscanf(&cmd_buf[7], "%i", &vsi_seid);
+		if (cnt != 1) {
+			dev_info(&pf->pdev->dev,
+				 "del vsi: bad command string, cnt=%d\n",
+				 cnt);
+			goto command_write_done;
+		}
+		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev, "del VSI %d: seid not found\n",
+				 vsi_seid);
+			goto command_write_done;
+		}
+
+		dev_info(&pf->pdev->dev, "deleting VSI %d\n", vsi_seid);
+		i40e_vsi_release(vsi);
+
+	} else if (strncmp(cmd_buf, "add relay", 9) == 0) {
+		struct i40e_veb *veb;
+		int uplink_seid, i;
+
+		cnt = sscanf(&cmd_buf[9], "%i %i", &uplink_seid, &vsi_seid);
+		if (cnt != 2) {
+			dev_info(&pf->pdev->dev,
+				 "add relay: bad command string, cnt=%d\n",
+				 cnt);
+			goto command_write_done;
+		} else if (uplink_seid < 0) {
+			dev_info(&pf->pdev->dev,
+				 "add relay %d: bad uplink seid\n",
+				 uplink_seid);
+			goto command_write_done;
+		}
+
+		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev,
+				 "add relay: VSI %d not found\n", vsi_seid);
+			goto command_write_done;
+		}
+
+		for (i = 0; i < I40E_MAX_VEB; i++)
+			if (pf->veb[i] && pf->veb[i]->seid == uplink_seid)
+				break;
+		if (i >= I40E_MAX_VEB && uplink_seid != 0 &&
+		    uplink_seid != pf->mac_seid) {
+			dev_info(&pf->pdev->dev,
+				 "add relay: relay uplink %d not found\n",
+				 uplink_seid);
+			goto command_write_done;
+		}
+
+		veb = i40e_veb_setup(pf, 0, uplink_seid, vsi_seid,
+				     vsi->tc_config.enabled_tc);
+		if (veb)
+			dev_info(&pf->pdev->dev, "added relay %d\n", veb->seid);
+		else
+			dev_info(&pf->pdev->dev, "add relay failed\n");
+
+	} else if (strncmp(cmd_buf, "del relay", 9) == 0) {
+		int i;
+		cnt = sscanf(&cmd_buf[9], "%i", &veb_seid);
+		if (cnt != 1) {
+			dev_info(&pf->pdev->dev,
+				 "del relay: bad command string, cnt=%d\n",
+				 cnt);
+			goto command_write_done;
+		} else if (veb_seid < 0) {
+			dev_info(&pf->pdev->dev,
+				 "del relay %d: bad relay seid\n", veb_seid);
+			goto command_write_done;
+		}
+
+		/* find the veb */
+		for (i = 0; i < I40E_MAX_VEB; i++)
+			if (pf->veb[i] && pf->veb[i]->seid == veb_seid)
+				break;
+		if (i >= I40E_MAX_VEB) {
+			dev_info(&pf->pdev->dev,
+				 "del relay: relay %d not found\n", veb_seid);
+			goto command_write_done;
+		}
+
+		dev_info(&pf->pdev->dev, "deleting relay %d\n", veb_seid);
+		i40e_veb_release(pf->veb[i]);
+	} else if (strncmp(cmd_buf, "add pvid", 8) == 0) {
+		unsigned int v;
+		int ret;
+		u16 vid;
+
+		cnt = sscanf(&cmd_buf[8], "%i %u", &vsi_seid, &v);
+		if (cnt != 2) {
+			dev_info(&pf->pdev->dev,
+				 "add pvid: bad command string, cnt=%d\n", cnt);
+			goto command_write_done;
+		}
+
+		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev, "add pvid: VSI %d not found\n",
+				 vsi_seid);
+			goto command_write_done;
+		}
+
+		vid = v;
+		ret = i40e_vsi_add_pvid(vsi, vid);
+		if (!ret)
+			dev_info(&pf->pdev->dev,
+				 "add pvid: %d added to VSI %d\n",
+				 vid, vsi_seid);
+		else
+			dev_info(&pf->pdev->dev,
+				 "add pvid: %d to VSI %d failed, ret=%d\n",
+				 vid, vsi_seid, ret);
+
+	} else if (strncmp(cmd_buf, "del pvid", 8) == 0) {
+
+		cnt = sscanf(&cmd_buf[8], "%i", &vsi_seid);
+		if (cnt != 1) {
+			dev_info(&pf->pdev->dev,
+				 "del pvid: bad command string, cnt=%d\n",
+				 cnt);
+			goto command_write_done;
+		}
+
+		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev,
+				 "del pvid: VSI %d not found\n", vsi_seid);
+			goto command_write_done;
+		}
+
+		i40e_vsi_remove_pvid(vsi);
+		dev_info(&pf->pdev->dev,
+			 "del pvid: removed from VSI %d\n", vsi_seid);
+
+	} else if (strncmp(cmd_buf, "dump", 4) == 0) {
+		if (strncmp(&cmd_buf[5], "switch", 6) == 0) {
+			i40e_fetch_switch_configuration(pf, true);
+		} else if (strncmp(&cmd_buf[5], "vsi", 3) == 0) {
+			cnt = sscanf(&cmd_buf[8], "%i", &vsi_seid);
+			if (cnt > 0)
+				i40e_dbg_dump_vsi_seid(pf, vsi_seid);
+			else
+				i40e_dbg_dump_vsi_no_seid(pf);
+		} else if (strncmp(&cmd_buf[5], "veb", 3) == 0) {
+			cnt = sscanf(&cmd_buf[8], "%i", &vsi_seid);
+			if (cnt > 0)
+				i40e_dbg_dump_veb_seid(pf, vsi_seid);
+			else
+				i40e_dbg_dump_veb_all(pf);
+		} else if (strncmp(&cmd_buf[5], "vf", 2) == 0) {
+			cnt = sscanf(&cmd_buf[7], "%i", &vf_id);
+			if (cnt > 0)
+				i40e_dbg_dump_vf(pf, vf_id);
+			else
+				i40e_dbg_dump_vf_all(pf);
+		} else if (strncmp(&cmd_buf[5], "desc", 4) == 0) {
+			int ring_id, desc_n;
+			if (strncmp(&cmd_buf[10], "rx", 2) == 0) {
+				cnt = sscanf(&cmd_buf[12], "%i %i %i",
+					     &vsi_seid, &ring_id, &desc_n);
+				i40e_dbg_dump_desc(cnt, vsi_seid, ring_id,
+						   desc_n, pf, RING_TYPE_RX);
+			} else if (strncmp(&cmd_buf[10], "tx", 2)
+					== 0) {
+				cnt = sscanf(&cmd_buf[12], "%i %i %i",
+					     &vsi_seid, &ring_id, &desc_n);
+				i40e_dbg_dump_desc(cnt, vsi_seid, ring_id,
+						   desc_n, pf, RING_TYPE_TX);
+			} else if (strncmp(&cmd_buf[10], "xdp", 3)
+					== 0) {
+				cnt = sscanf(&cmd_buf[13], "%i %i %i",
+					     &vsi_seid, &ring_id, &desc_n);
+				i40e_dbg_dump_desc(cnt, vsi_seid, ring_id,
+						   desc_n, pf, RING_TYPE_XDP);
+			} else if (strncmp(&cmd_buf[10], "aq", 2) == 0) {
+				i40e_dbg_dump_aq_desc(pf);
+			} else {
+				dev_info(&pf->pdev->dev,
+					 "dump desc tx <vsi_seid> <ring_id> [<desc_n>]\n");
+				dev_info(&pf->pdev->dev,
+					 "dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n");
+				dev_info(&pf->pdev->dev,
+					 "dump desc xdp <vsi_seid> <ring_id> [<desc_n>]\n");
+				dev_info(&pf->pdev->dev, "dump desc aq\n");
+			}
+		} else if (strncmp(&cmd_buf[5], "reset stats", 11) == 0) {
+			dev_info(&pf->pdev->dev,
+				 "core reset count: %d\n", pf->corer_count);
+			dev_info(&pf->pdev->dev,
+				 "global reset count: %d\n", pf->globr_count);
+			dev_info(&pf->pdev->dev,
+				 "emp reset count: %d\n", pf->empr_count);
+			dev_info(&pf->pdev->dev,
+				 "pf reset count: %d\n", pf->pfr_count);
+			dev_info(&pf->pdev->dev,
+				 "pf tx sluggish count: %d\n",
+				 pf->tx_sluggish_count);
+		} else if (strncmp(&cmd_buf[5], "port", 4) == 0) {
+			struct i40e_aqc_query_port_ets_config_resp *bw_data;
+			struct i40e_dcbx_config *cfg =
+						&pf->hw.local_dcbx_config;
+			struct i40e_dcbx_config *r_cfg =
+						&pf->hw.remote_dcbx_config;
+			int i, ret;
+			u16 switch_id;
+
+			bw_data = kzalloc(sizeof(
+				    struct i40e_aqc_query_port_ets_config_resp),
+					  GFP_KERNEL);
+			if (!bw_data) {
+				ret = -ENOMEM;
+				goto command_write_done;
+			}
+
+			vsi = pf->vsi[pf->lan_vsi];
+			switch_id =
+				le16_to_cpu(vsi->info.switch_id) &
+					    I40E_AQ_VSI_SW_ID_MASK;
+
+			ret = i40e_aq_query_port_ets_config(&pf->hw,
+							    switch_id,
+							    bw_data, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "Query Port ETS Config AQ command failed =0x%x\n",
+					 pf->hw.aq.asq_last_status);
+				kfree(bw_data);
+				bw_data = NULL;
+				goto command_write_done;
+			}
+			dev_info(&pf->pdev->dev,
+				 "port bw: tc_valid=0x%x tc_strict_prio=0x%x, tc_bw_max=0x%04x,0x%04x\n",
+				 bw_data->tc_valid_bits,
+				 bw_data->tc_strict_priority_bits,
+				 le16_to_cpu(bw_data->tc_bw_max[0]),
+				 le16_to_cpu(bw_data->tc_bw_max[1]));
+			for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+				dev_info(&pf->pdev->dev, "port bw: tc_bw_share=%d tc_bw_limit=%d\n",
+					 bw_data->tc_bw_share_credits[i],
+					 le16_to_cpu(bw_data->tc_bw_limits[i]));
+			}
+
+			kfree(bw_data);
+			bw_data = NULL;
+
+			dev_info(&pf->pdev->dev,
+				 "port dcbx_mode=%d\n", cfg->dcbx_mode);
+			dev_info(&pf->pdev->dev,
+				 "port ets_cfg: willing=%d cbs=%d, maxtcs=%d\n",
+				 cfg->etscfg.willing, cfg->etscfg.cbs,
+				 cfg->etscfg.maxtcs);
+			for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+				dev_info(&pf->pdev->dev, "port ets_cfg: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+					 i, cfg->etscfg.prioritytable[i],
+					 cfg->etscfg.tcbwtable[i],
+					 cfg->etscfg.tsatable[i]);
+			}
+			for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+				dev_info(&pf->pdev->dev, "port ets_rec: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+					 i, cfg->etsrec.prioritytable[i],
+					 cfg->etsrec.tcbwtable[i],
+					 cfg->etsrec.tsatable[i]);
+			}
+			dev_info(&pf->pdev->dev,
+				 "port pfc_cfg: willing=%d mbc=%d, pfccap=%d pfcenable=0x%x\n",
+				 cfg->pfc.willing, cfg->pfc.mbc,
+				 cfg->pfc.pfccap, cfg->pfc.pfcenable);
+			dev_info(&pf->pdev->dev,
+				 "port app_table: num_apps=%d\n", cfg->numapps);
+			for (i = 0; i < cfg->numapps; i++) {
+				dev_info(&pf->pdev->dev, "port app_table: %d prio=%d selector=%d protocol=0x%x\n",
+					 i, cfg->app[i].priority,
+					 cfg->app[i].selector,
+					 cfg->app[i].protocolid);
+			}
+			/* Peer TLV DCBX data */
+			dev_info(&pf->pdev->dev,
+				 "remote port ets_cfg: willing=%d cbs=%d, maxtcs=%d\n",
+				 r_cfg->etscfg.willing,
+				 r_cfg->etscfg.cbs, r_cfg->etscfg.maxtcs);
+			for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+				dev_info(&pf->pdev->dev, "remote port ets_cfg: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+					 i, r_cfg->etscfg.prioritytable[i],
+					 r_cfg->etscfg.tcbwtable[i],
+					 r_cfg->etscfg.tsatable[i]);
+			}
+			for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+				dev_info(&pf->pdev->dev, "remote port ets_rec: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+					 i, r_cfg->etsrec.prioritytable[i],
+					 r_cfg->etsrec.tcbwtable[i],
+					 r_cfg->etsrec.tsatable[i]);
+			}
+			dev_info(&pf->pdev->dev,
+				 "remote port pfc_cfg: willing=%d mbc=%d, pfccap=%d pfcenable=0x%x\n",
+				 r_cfg->pfc.willing,
+				 r_cfg->pfc.mbc,
+				 r_cfg->pfc.pfccap,
+				 r_cfg->pfc.pfcenable);
+			dev_info(&pf->pdev->dev,
+				 "remote port app_table: num_apps=%d\n",
+				 r_cfg->numapps);
+			for (i = 0; i < r_cfg->numapps; i++) {
+				dev_info(&pf->pdev->dev, "remote port app_table: %d prio=%d selector=%d protocol=0x%x\n",
+					 i, r_cfg->app[i].priority,
+					 r_cfg->app[i].selector,
+					 r_cfg->app[i].protocolid);
+			}
+		} else if (strncmp(&cmd_buf[5], "debug fwdata", 12) == 0) {
+			int cluster_id, table_id;
+			int index, ret;
+			u16 buff_len = 4096;
+			u32 next_index;
+			u8 next_table;
+			u8 *buff;
+			u16 rlen;
+
+			cnt = sscanf(&cmd_buf[18], "%i %i %i",
+				     &cluster_id, &table_id, &index);
+			if (cnt != 3) {
+				dev_info(&pf->pdev->dev,
+					 "dump debug fwdata <cluster_id> <table_id> <index>\n");
+				goto command_write_done;
+			}
+
+			dev_info(&pf->pdev->dev,
+				 "AQ debug dump fwdata params %x %x %x %x\n",
+				 cluster_id, table_id, index, buff_len);
+			buff = kzalloc(buff_len, GFP_KERNEL);
+			if (!buff)
+				goto command_write_done;
+
+			ret = i40e_aq_debug_dump(&pf->hw, cluster_id, table_id,
+						 index, buff_len, buff, &rlen,
+						 &next_table, &next_index,
+						 NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "debug dump fwdata AQ Failed %d 0x%x\n",
+					 ret, pf->hw.aq.asq_last_status);
+				kfree(buff);
+				buff = NULL;
+				goto command_write_done;
+			}
+			dev_info(&pf->pdev->dev,
+				 "AQ debug dump fwdata rlen=0x%x next_table=0x%x next_index=0x%x\n",
+				 rlen, next_table, next_index);
+			print_hex_dump(KERN_INFO, "AQ buffer WB: ",
+				       DUMP_PREFIX_OFFSET, 16, 1,
+				       buff, rlen, true);
+			kfree(buff);
+			buff = NULL;
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "dump desc tx <vsi_seid> <ring_id> [<desc_n>], dump desc rx <vsi_seid> <ring_id> [<desc_n>], dump desc xdp <vsi_seid> <ring_id> [<desc_n>],\n");
+			dev_info(&pf->pdev->dev, "dump switch\n");
+			dev_info(&pf->pdev->dev, "dump vsi [seid]\n");
+			dev_info(&pf->pdev->dev, "dump reset stats\n");
+			dev_info(&pf->pdev->dev, "dump port\n");
+			dev_info(&pf->pdev->dev, "dump vf [vf_id]\n");
+			dev_info(&pf->pdev->dev,
+				 "dump debug fwdata <cluster_id> <table_id> <index>\n");
+		}
+	} else if (strncmp(cmd_buf, "pfr", 3) == 0) {
+		dev_info(&pf->pdev->dev, "debugfs: forcing PFR\n");
+		i40e_do_reset_safe(pf, BIT(__I40E_PF_RESET_REQUESTED));
+
+	} else if (strncmp(cmd_buf, "corer", 5) == 0) {
+		dev_info(&pf->pdev->dev, "debugfs: forcing CoreR\n");
+		i40e_do_reset_safe(pf, BIT(__I40E_CORE_RESET_REQUESTED));
+
+	} else if (strncmp(cmd_buf, "globr", 5) == 0) {
+		dev_info(&pf->pdev->dev, "debugfs: forcing GlobR\n");
+		i40e_do_reset_safe(pf, BIT(__I40E_GLOBAL_RESET_REQUESTED));
+
+	} else if (strncmp(cmd_buf, "read", 4) == 0) {
+		u32 address;
+		u32 value;
+
+		cnt = sscanf(&cmd_buf[4], "%i", &address);
+		if (cnt != 1) {
+			dev_info(&pf->pdev->dev, "read <reg>\n");
+			goto command_write_done;
+		}
+
+		/* check the range on address */
+		if (address > (pf->ioremap_len - sizeof(u32))) {
+			dev_info(&pf->pdev->dev, "read reg address 0x%08x too large, max=0x%08lx\n",
+				 address, (unsigned long int)(pf->ioremap_len - sizeof(u32)));
+			goto command_write_done;
+		}
+
+		value = rd32(&pf->hw, address);
+		dev_info(&pf->pdev->dev, "read: 0x%08x = 0x%08x\n",
+			 address, value);
+
+	} else if (strncmp(cmd_buf, "write", 5) == 0) {
+		u32 address, value;
+
+		cnt = sscanf(&cmd_buf[5], "%i %i", &address, &value);
+		if (cnt != 2) {
+			dev_info(&pf->pdev->dev, "write <reg> <value>\n");
+			goto command_write_done;
+		}
+
+		/* check the range on address */
+		if (address > (pf->ioremap_len - sizeof(u32))) {
+			dev_info(&pf->pdev->dev, "write reg address 0x%08x too large, max=0x%08lx\n",
+				 address, (unsigned long int)(pf->ioremap_len - sizeof(u32)));
+			goto command_write_done;
+		}
+		wr32(&pf->hw, address, value);
+		value = rd32(&pf->hw, address);
+		dev_info(&pf->pdev->dev, "write: 0x%08x = 0x%08x\n",
+			 address, value);
+	} else if (strncmp(cmd_buf, "clear_stats", 11) == 0) {
+		if (strncmp(&cmd_buf[12], "vsi", 3) == 0) {
+			cnt = sscanf(&cmd_buf[15], "%i", &vsi_seid);
+			if (cnt == 0) {
+				int i;
+
+				for (i = 0; i < pf->num_alloc_vsi; i++)
+					i40e_vsi_reset_stats(pf->vsi[i]);
+				dev_info(&pf->pdev->dev, "vsi clear stats called for all vsi's\n");
+			} else if (cnt == 1) {
+				vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+				if (!vsi) {
+					dev_info(&pf->pdev->dev,
+						 "clear_stats vsi: bad vsi %d\n",
+						 vsi_seid);
+					goto command_write_done;
+				}
+				i40e_vsi_reset_stats(vsi);
+				dev_info(&pf->pdev->dev,
+					 "vsi clear stats called for vsi %d\n",
+					 vsi_seid);
+			} else {
+				dev_info(&pf->pdev->dev, "clear_stats vsi [seid]\n");
+			}
+		} else if (strncmp(&cmd_buf[12], "port", 4) == 0) {
+			if (pf->hw.partition_id == 1) {
+				i40e_pf_reset_stats(pf);
+				dev_info(&pf->pdev->dev, "port stats cleared\n");
+			} else {
+				dev_info(&pf->pdev->dev, "clear port stats not allowed on this port partition\n");
+			}
+		} else {
+			dev_info(&pf->pdev->dev, "clear_stats vsi [seid] or clear_stats port\n");
+		}
+	} else if (strncmp(cmd_buf, "send aq_cmd", 11) == 0) {
+		struct i40e_aq_desc *desc;
+		int ret;
+
+		desc = kzalloc(sizeof(struct i40e_aq_desc), GFP_KERNEL);
+		if (!desc)
+			goto command_write_done;
+		cnt = sscanf(&cmd_buf[11],
+			     "%hi %hi %hi %hi %i %i %i %i %i %i",
+			     &desc->flags,
+			     &desc->opcode, &desc->datalen, &desc->retval,
+			     &desc->cookie_high, &desc->cookie_low,
+			     &desc->params.internal.param0,
+			     &desc->params.internal.param1,
+			     &desc->params.internal.param2,
+			     &desc->params.internal.param3);
+		if (cnt != 10) {
+			dev_info(&pf->pdev->dev,
+				 "send aq_cmd: bad command string, cnt=%d\n",
+				 cnt);
+			kfree(desc);
+			desc = NULL;
+			goto command_write_done;
+		}
+		ret = i40e_asq_send_command(&pf->hw, desc, NULL, 0, NULL);
+		if (!ret) {
+			dev_info(&pf->pdev->dev, "AQ command sent Status : Success\n");
+		} else if (ret == -EIO) {
+			dev_info(&pf->pdev->dev,
+				 "AQ command send failed Opcode %x AQ Error: %d\n",
+				 desc->opcode, pf->hw.aq.asq_last_status);
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "AQ command send failed Opcode %x Status: %d\n",
+				 desc->opcode, ret);
+		}
+		dev_info(&pf->pdev->dev,
+			 "AQ desc WB 0x%04x 0x%04x 0x%04x 0x%04x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+			 desc->flags, desc->opcode, desc->datalen, desc->retval,
+			 desc->cookie_high, desc->cookie_low,
+			 desc->params.internal.param0,
+			 desc->params.internal.param1,
+			 desc->params.internal.param2,
+			 desc->params.internal.param3);
+		kfree(desc);
+		desc = NULL;
+	} else if (strncmp(cmd_buf, "send indirect aq_cmd", 20) == 0) {
+		struct i40e_aq_desc *desc;
+		u16 buffer_len;
+		u8 *buff;
+		int ret;
+
+		desc = kzalloc(sizeof(struct i40e_aq_desc), GFP_KERNEL);
+		if (!desc)
+			goto command_write_done;
+		cnt = sscanf(&cmd_buf[20],
+			     "%hi %hi %hi %hi %i %i %i %i %i %i %hi",
+			     &desc->flags,
+			     &desc->opcode, &desc->datalen, &desc->retval,
+			     &desc->cookie_high, &desc->cookie_low,
+			     &desc->params.internal.param0,
+			     &desc->params.internal.param1,
+			     &desc->params.internal.param2,
+			     &desc->params.internal.param3,
+			     &buffer_len);
+		if (cnt != 11) {
+			dev_info(&pf->pdev->dev,
+				 "send indirect aq_cmd: bad command string, cnt=%d\n",
+				 cnt);
+			kfree(desc);
+			desc = NULL;
+			goto command_write_done;
+		}
+		/* Just stub a buffer big enough in case user messed up */
+		if (buffer_len == 0)
+			buffer_len = 1280;
+
+		buff = kzalloc(buffer_len, GFP_KERNEL);
+		if (!buff) {
+			kfree(desc);
+			desc = NULL;
+			goto command_write_done;
+		}
+		desc->flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+		ret = i40e_asq_send_command(&pf->hw, desc, buff,
+					    buffer_len, NULL);
+		if (!ret) {
+			dev_info(&pf->pdev->dev, "AQ command sent Status : Success\n");
+		} else if (ret == -EIO) {
+			dev_info(&pf->pdev->dev,
+				 "AQ command send failed Opcode %x AQ Error: %d\n",
+				 desc->opcode, pf->hw.aq.asq_last_status);
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "AQ command send failed Opcode %x Status: %d\n",
+				 desc->opcode, ret);
+		}
+		dev_info(&pf->pdev->dev,
+			 "AQ desc WB 0x%04x 0x%04x 0x%04x 0x%04x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+			 desc->flags, desc->opcode, desc->datalen, desc->retval,
+			 desc->cookie_high, desc->cookie_low,
+			 desc->params.internal.param0,
+			 desc->params.internal.param1,
+			 desc->params.internal.param2,
+			 desc->params.internal.param3);
+		print_hex_dump(KERN_INFO, "AQ buffer WB: ",
+			       DUMP_PREFIX_OFFSET, 16, 1,
+			       buff, buffer_len, true);
+		kfree(buff);
+		buff = NULL;
+		kfree(desc);
+		desc = NULL;
+	} else if (strncmp(cmd_buf, "fd current cnt", 14) == 0) {
+		dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n",
+			 i40e_get_current_fd_count(pf));
+	} else if (strncmp(cmd_buf, "lldp", 4) == 0) {
+		if (strncmp(&cmd_buf[5], "stop", 4) == 0) {
+			int ret;
+
+			ret = i40e_aq_stop_lldp(&pf->hw, false, false, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "Stop LLDP AQ command failed =0x%x\n",
+					 pf->hw.aq.asq_last_status);
+				goto command_write_done;
+			}
+			ret = i40e_aq_add_rem_control_packet_filter(&pf->hw,
+						pf->hw.mac.addr,
+						ETH_P_LLDP, 0,
+						pf->vsi[pf->lan_vsi]->seid,
+						0, true, NULL, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					"%s: Add Control Packet Filter AQ command failed =0x%x\n",
+					__func__, pf->hw.aq.asq_last_status);
+				goto command_write_done;
+			}
+#ifdef CONFIG_I40E_DCB
+			pf->dcbx_cap = DCB_CAP_DCBX_HOST |
+				       DCB_CAP_DCBX_VER_IEEE;
+#endif /* CONFIG_I40E_DCB */
+		} else if (strncmp(&cmd_buf[5], "start", 5) == 0) {
+			int ret;
+
+			ret = i40e_aq_add_rem_control_packet_filter(&pf->hw,
+						pf->hw.mac.addr,
+						ETH_P_LLDP, 0,
+						pf->vsi[pf->lan_vsi]->seid,
+						0, false, NULL, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					"%s: Remove Control Packet Filter AQ command failed =0x%x\n",
+					__func__, pf->hw.aq.asq_last_status);
+				/* Continue and start FW LLDP anyways */
+			}
+
+			ret = i40e_aq_start_lldp(&pf->hw, false, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "Start LLDP AQ command failed =0x%x\n",
+					 pf->hw.aq.asq_last_status);
+				goto command_write_done;
+			}
+#ifdef CONFIG_I40E_DCB
+			pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
+				       DCB_CAP_DCBX_VER_IEEE;
+#endif /* CONFIG_I40E_DCB */
+		} else if (strncmp(&cmd_buf[5],
+			   "get local", 9) == 0) {
+			u16 llen, rlen;
+			int ret;
+			u8 *buff;
+
+			buff = kzalloc(I40E_LLDPDU_SIZE, GFP_KERNEL);
+			if (!buff)
+				goto command_write_done;
+
+			ret = i40e_aq_get_lldp_mib(&pf->hw, 0,
+						   I40E_AQ_LLDP_MIB_LOCAL,
+						   buff, I40E_LLDPDU_SIZE,
+						   &llen, &rlen, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "Get LLDP MIB (local) AQ command failed =0x%x\n",
+					 pf->hw.aq.asq_last_status);
+				kfree(buff);
+				buff = NULL;
+				goto command_write_done;
+			}
+			dev_info(&pf->pdev->dev, "LLDP MIB (local)\n");
+			print_hex_dump(KERN_INFO, "LLDP MIB (local): ",
+				       DUMP_PREFIX_OFFSET, 16, 1,
+				       buff, I40E_LLDPDU_SIZE, true);
+			kfree(buff);
+			buff = NULL;
+		} else if (strncmp(&cmd_buf[5], "get remote", 10) == 0) {
+			u16 llen, rlen;
+			int ret;
+			u8 *buff;
+
+			buff = kzalloc(I40E_LLDPDU_SIZE, GFP_KERNEL);
+			if (!buff)
+				goto command_write_done;
+
+			ret = i40e_aq_get_lldp_mib(&pf->hw,
+					I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
+					I40E_AQ_LLDP_MIB_REMOTE,
+					buff, I40E_LLDPDU_SIZE,
+					&llen, &rlen, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "Get LLDP MIB (remote) AQ command failed =0x%x\n",
+					 pf->hw.aq.asq_last_status);
+				kfree(buff);
+				buff = NULL;
+				goto command_write_done;
+			}
+			dev_info(&pf->pdev->dev, "LLDP MIB (remote)\n");
+			print_hex_dump(KERN_INFO, "LLDP MIB (remote): ",
+				       DUMP_PREFIX_OFFSET, 16, 1,
+				       buff, I40E_LLDPDU_SIZE, true);
+			kfree(buff);
+			buff = NULL;
+		} else if (strncmp(&cmd_buf[5], "event on", 8) == 0) {
+			int ret;
+
+			ret = i40e_aq_cfg_lldp_mib_change_event(&pf->hw,
+								true, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "Config LLDP MIB Change Event (on) AQ command failed =0x%x\n",
+					 pf->hw.aq.asq_last_status);
+				goto command_write_done;
+			}
+		} else if (strncmp(&cmd_buf[5], "event off", 9) == 0) {
+			int ret;
+
+			ret = i40e_aq_cfg_lldp_mib_change_event(&pf->hw,
+								false, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "Config LLDP MIB Change Event (off) AQ command failed =0x%x\n",
+					 pf->hw.aq.asq_last_status);
+				goto command_write_done;
+			}
+		}
+	} else if (strncmp(cmd_buf, "nvm read", 8) == 0) {
+		u16 buffer_len, bytes;
+		u16 module;
+		u32 offset;
+		u16 *buff;
+		int ret;
+
+		cnt = sscanf(&cmd_buf[8], "%hx %x %hx",
+			     &module, &offset, &buffer_len);
+		if (cnt == 0) {
+			module = 0;
+			offset = 0;
+			buffer_len = 0;
+		} else if (cnt == 1) {
+			offset = 0;
+			buffer_len = 0;
+		} else if (cnt == 2) {
+			buffer_len = 0;
+		} else if (cnt > 3) {
+			dev_info(&pf->pdev->dev,
+				 "nvm read: bad command string, cnt=%d\n", cnt);
+			goto command_write_done;
+		}
+
+		/* set the max length */
+		buffer_len = min_t(u16, buffer_len, I40E_MAX_AQ_BUF_SIZE/2);
+
+		bytes = 2 * buffer_len;
+
+		/* read at least 1k bytes, no more than 4kB */
+		bytes = clamp(bytes, (u16)1024, (u16)I40E_MAX_AQ_BUF_SIZE);
+		buff = kzalloc(bytes, GFP_KERNEL);
+		if (!buff)
+			goto command_write_done;
+
+		ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Failed Acquiring NVM resource for read err=%d status=0x%x\n",
+				 ret, pf->hw.aq.asq_last_status);
+			kfree(buff);
+			goto command_write_done;
+		}
+
+		ret = i40e_aq_read_nvm(&pf->hw, module, (2 * offset),
+				       bytes, (u8 *)buff, true, NULL);
+		i40e_release_nvm(&pf->hw);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Read NVM AQ failed err=%d status=0x%x\n",
+				 ret, pf->hw.aq.asq_last_status);
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "Read NVM module=0x%x offset=0x%x words=%d\n",
+				 module, offset, buffer_len);
+			if (bytes)
+				print_hex_dump(KERN_INFO, "NVM Dump: ",
+					DUMP_PREFIX_OFFSET, 16, 2,
+					buff, bytes, true);
+		}
+		kfree(buff);
+		buff = NULL;
+	} else {
+		dev_info(&pf->pdev->dev, "unknown command '%s'\n", cmd_buf);
+		dev_info(&pf->pdev->dev, "available commands\n");
+		dev_info(&pf->pdev->dev, "  add vsi [relay_seid]\n");
+		dev_info(&pf->pdev->dev, "  del vsi [vsi_seid]\n");
+		dev_info(&pf->pdev->dev, "  add relay <uplink_seid> <vsi_seid>\n");
+		dev_info(&pf->pdev->dev, "  del relay <relay_seid>\n");
+		dev_info(&pf->pdev->dev, "  add pvid <vsi_seid> <vid>\n");
+		dev_info(&pf->pdev->dev, "  del pvid <vsi_seid>\n");
+		dev_info(&pf->pdev->dev, "  dump switch\n");
+		dev_info(&pf->pdev->dev, "  dump vsi [seid]\n");
+		dev_info(&pf->pdev->dev, "  dump desc tx <vsi_seid> <ring_id> [<desc_n>]\n");
+		dev_info(&pf->pdev->dev, "  dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n");
+		dev_info(&pf->pdev->dev, "  dump desc xdp <vsi_seid> <ring_id> [<desc_n>]\n");
+		dev_info(&pf->pdev->dev, "  dump desc aq\n");
+		dev_info(&pf->pdev->dev, "  dump reset stats\n");
+		dev_info(&pf->pdev->dev, "  dump debug fwdata <cluster_id> <table_id> <index>\n");
+		dev_info(&pf->pdev->dev, "  read <reg>\n");
+		dev_info(&pf->pdev->dev, "  write <reg> <value>\n");
+		dev_info(&pf->pdev->dev, "  clear_stats vsi [seid]\n");
+		dev_info(&pf->pdev->dev, "  clear_stats port\n");
+		dev_info(&pf->pdev->dev, "  pfr\n");
+		dev_info(&pf->pdev->dev, "  corer\n");
+		dev_info(&pf->pdev->dev, "  globr\n");
+		dev_info(&pf->pdev->dev, "  send aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3>\n");
+		dev_info(&pf->pdev->dev, "  send indirect aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3> <buffer_len>\n");
+		dev_info(&pf->pdev->dev, "  fd current cnt");
+		dev_info(&pf->pdev->dev, "  lldp start\n");
+		dev_info(&pf->pdev->dev, "  lldp stop\n");
+		dev_info(&pf->pdev->dev, "  lldp get local\n");
+		dev_info(&pf->pdev->dev, "  lldp get remote\n");
+		dev_info(&pf->pdev->dev, "  lldp event on\n");
+		dev_info(&pf->pdev->dev, "  lldp event off\n");
+		dev_info(&pf->pdev->dev, "  nvm read [module] [word_offset] [word_count]\n");
+	}
+
+command_write_done:
+	kfree(cmd_buf);
+	cmd_buf = NULL;
+	return count;
+}
+
+static const struct file_operations i40e_dbg_command_fops = {
+	.owner = THIS_MODULE,
+	.open =  simple_open,
+	.read =  i40e_dbg_command_read,
+	.write = i40e_dbg_command_write,
+};
+
+/**************************************************************
+ * netdev_ops
+ * The netdev_ops entry in debugfs is for giving the driver commands
+ * to be executed from the netdev operations.
+ **************************************************************/
+static char i40e_dbg_netdev_ops_buf[256] = "";
+
+/**
+ * i40e_dbg_netdev_ops_read - read for netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer,
+					size_t count, loff_t *ppos)
+{
+	struct i40e_pf *pf = filp->private_data;
+	int bytes_not_copied;
+	int buf_size = 256;
+	char *buf;
+	int len;
+
+	/* don't allow partal reads */
+	if (*ppos != 0)
+		return 0;
+	if (count < buf_size)
+		return -ENOSPC;
+
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOSPC;
+
+	len = snprintf(buf, buf_size, "%s: %s\n",
+		       pf->vsi[pf->lan_vsi]->netdev->name,
+		       i40e_dbg_netdev_ops_buf);
+
+	bytes_not_copied = copy_to_user(buffer, buf, len);
+	kfree(buf);
+
+	if (bytes_not_copied)
+		return -EFAULT;
+
+	*ppos = len;
+	return len;
+}
+
+/**
+ * i40e_dbg_netdev_ops_write - write into netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_netdev_ops_write(struct file *filp,
+					 const char __user *buffer,
+					 size_t count, loff_t *ppos)
+{
+	struct i40e_pf *pf = filp->private_data;
+	int bytes_not_copied;
+	struct i40e_vsi *vsi;
+	char *buf_tmp;
+	int vsi_seid;
+	int i, cnt;
+
+	/* don't allow partial writes */
+	if (*ppos != 0)
+		return 0;
+	if (count >= sizeof(i40e_dbg_netdev_ops_buf))
+		return -ENOSPC;
+
+	memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf));
+	bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf,
+					  buffer, count);
+	if (bytes_not_copied)
+		return -EFAULT;
+	i40e_dbg_netdev_ops_buf[count] = '\0';
+
+	buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n');
+	if (buf_tmp) {
+		*buf_tmp = '\0';
+		count = buf_tmp - i40e_dbg_netdev_ops_buf + 1;
+	}
+
+	if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) {
+		int mtu;
+
+		cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i %i",
+			     &vsi_seid, &mtu);
+		if (cnt != 2) {
+			dev_info(&pf->pdev->dev, "change_mtu <vsi_seid> <mtu>\n");
+			goto netdev_ops_write_done;
+		}
+		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev,
+				 "change_mtu: VSI %d not found\n", vsi_seid);
+		} else if (!vsi->netdev) {
+			dev_info(&pf->pdev->dev, "change_mtu: no netdev for VSI %d\n",
+				 vsi_seid);
+		} else if (rtnl_trylock()) {
+			vsi->netdev->netdev_ops->ndo_change_mtu(vsi->netdev,
+								mtu);
+			rtnl_unlock();
+			dev_info(&pf->pdev->dev, "change_mtu called\n");
+		} else {
+			dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n");
+		}
+
+	} else if (strncmp(i40e_dbg_netdev_ops_buf, "set_rx_mode", 11) == 0) {
+		cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid);
+		if (cnt != 1) {
+			dev_info(&pf->pdev->dev, "set_rx_mode <vsi_seid>\n");
+			goto netdev_ops_write_done;
+		}
+		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev,
+				 "set_rx_mode: VSI %d not found\n", vsi_seid);
+		} else if (!vsi->netdev) {
+			dev_info(&pf->pdev->dev, "set_rx_mode: no netdev for VSI %d\n",
+				 vsi_seid);
+		} else if (rtnl_trylock()) {
+			vsi->netdev->netdev_ops->ndo_set_rx_mode(vsi->netdev);
+			rtnl_unlock();
+			dev_info(&pf->pdev->dev, "set_rx_mode called\n");
+		} else {
+			dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n");
+		}
+
+	} else if (strncmp(i40e_dbg_netdev_ops_buf, "napi", 4) == 0) {
+		cnt = sscanf(&i40e_dbg_netdev_ops_buf[4], "%i", &vsi_seid);
+		if (cnt != 1) {
+			dev_info(&pf->pdev->dev, "napi <vsi_seid>\n");
+			goto netdev_ops_write_done;
+		}
+		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev, "napi: VSI %d not found\n",
+				 vsi_seid);
+		} else if (!vsi->netdev) {
+			dev_info(&pf->pdev->dev, "napi: no netdev for VSI %d\n",
+				 vsi_seid);
+		} else {
+			for (i = 0; i < vsi->num_q_vectors; i++)
+				napi_schedule(&vsi->q_vectors[i]->napi);
+			dev_info(&pf->pdev->dev, "napi called\n");
+		}
+	} else {
+		dev_info(&pf->pdev->dev, "unknown command '%s'\n",
+			 i40e_dbg_netdev_ops_buf);
+		dev_info(&pf->pdev->dev, "available commands\n");
+		dev_info(&pf->pdev->dev, "  change_mtu <vsi_seid> <mtu>\n");
+		dev_info(&pf->pdev->dev, "  set_rx_mode <vsi_seid>\n");
+		dev_info(&pf->pdev->dev, "  napi <vsi_seid>\n");
+	}
+netdev_ops_write_done:
+	return count;
+}
+
+static const struct file_operations i40e_dbg_netdev_ops_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = i40e_dbg_netdev_ops_read,
+	.write = i40e_dbg_netdev_ops_write,
+};
+
+/**
+ * i40e_dbg_pf_init - setup the debugfs directory for the PF
+ * @pf: the PF that is starting up
+ **/
+void i40e_dbg_pf_init(struct i40e_pf *pf)
+{
+	const char *name = pci_name(pf->pdev);
+
+	pf->i40e_dbg_pf = debugfs_create_dir(name, i40e_dbg_root);
+
+	debugfs_create_file("command", 0600, pf->i40e_dbg_pf, pf,
+			    &i40e_dbg_command_fops);
+
+	debugfs_create_file("netdev_ops", 0600, pf->i40e_dbg_pf, pf,
+			    &i40e_dbg_netdev_ops_fops);
+}
+
+/**
+ * i40e_dbg_pf_exit - clear out the PF's debugfs entries
+ * @pf: the PF that is stopping
+ **/
+void i40e_dbg_pf_exit(struct i40e_pf *pf)
+{
+	debugfs_remove_recursive(pf->i40e_dbg_pf);
+	pf->i40e_dbg_pf = NULL;
+}
+
+/**
+ * i40e_dbg_init - start up debugfs for the driver
+ **/
+void i40e_dbg_init(void)
+{
+	i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL);
+	if (IS_ERR(i40e_dbg_root))
+		pr_info("init of debugfs failed\n");
+}
+
+/**
+ * i40e_dbg_exit - clean out the driver's debugfs entries
+ **/
+void i40e_dbg_exit(void)
+{
+	debugfs_remove_recursive(i40e_dbg_root);
+	i40e_dbg_root = NULL;
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
new file mode 100644
index 0000000000..d9c51a238d
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_DEVIDS_H_
+#define _I40E_DEVIDS_H_
+
+/* Device IDs */
+#define I40E_DEV_ID_X710_N3000		0x0CF8
+#define I40E_DEV_ID_XXV710_N3000	0x0D58
+#define I40E_DEV_ID_SFP_XL710		0x1572
+#define I40E_DEV_ID_QEMU		0x1574
+#define I40E_DEV_ID_KX_B		0x1580
+#define I40E_DEV_ID_KX_C		0x1581
+#define I40E_DEV_ID_QSFP_A		0x1583
+#define I40E_DEV_ID_QSFP_B		0x1584
+#define I40E_DEV_ID_QSFP_C		0x1585
+#define I40E_DEV_ID_10G_BASE_T		0x1586
+#define I40E_DEV_ID_20G_KR2		0x1587
+#define I40E_DEV_ID_20G_KR2_A		0x1588
+#define I40E_DEV_ID_10G_BASE_T4		0x1589
+#define I40E_DEV_ID_25G_B		0x158A
+#define I40E_DEV_ID_25G_SFP28		0x158B
+#define I40E_DEV_ID_10G_BASE_T_BC	0x15FF
+#define I40E_DEV_ID_10G_B		0x104F
+#define I40E_DEV_ID_10G_SFP		0x104E
+#define I40E_DEV_ID_5G_BASE_T_BC	0x101F
+#define I40E_DEV_ID_1G_BASE_T_BC	0x0DD2
+#define I40E_IS_X710TL_DEVICE(d) \
+	(((d) == I40E_DEV_ID_1G_BASE_T_BC) || \
+	 ((d) == I40E_DEV_ID_5G_BASE_T_BC) || \
+	 ((d) == I40E_DEV_ID_10G_BASE_T_BC))
+#define I40E_DEV_ID_KX_X722		0x37CE
+#define I40E_DEV_ID_QSFP_X722		0x37CF
+#define I40E_DEV_ID_SFP_X722		0x37D0
+#define I40E_DEV_ID_1G_BASE_T_X722	0x37D1
+#define I40E_DEV_ID_10G_BASE_T_X722	0x37D2
+#define I40E_DEV_ID_SFP_I_X722		0x37D3
+#define I40E_DEV_ID_SFP_X722_A		0x0DDA
+
+
+#endif /* _I40E_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
new file mode 100644
index 0000000000..b1ad7c4259
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e_diag.h"
+#include "i40e_prototype.h"
+
+/**
+ * i40e_diag_reg_pattern_test
+ * @hw: pointer to the hw struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ **/
+static int i40e_diag_reg_pattern_test(struct i40e_hw *hw,
+				      u32 reg, u32 mask)
+{
+	static const u32 patterns[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+	};
+	u32 pat, val, orig_val;
+	int i;
+
+	orig_val = rd32(hw, reg);
+	for (i = 0; i < ARRAY_SIZE(patterns); i++) {
+		pat = patterns[i];
+		wr32(hw, reg, (pat & mask));
+		val = rd32(hw, reg);
+		if ((val & mask) != (pat & mask)) {
+			i40e_debug(hw, I40E_DEBUG_DIAG,
+				   "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n",
+				   __func__, reg, pat, val);
+			return -EIO;
+		}
+	}
+
+	wr32(hw, reg, orig_val);
+	val = rd32(hw, reg);
+	if (val != orig_val) {
+		i40e_debug(hw, I40E_DEBUG_DIAG,
+			   "%s: reg restore test failed - reg 0x%08x orig_val 0x%08x val 0x%08x\n",
+			   __func__, reg, orig_val, val);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+const struct i40e_diag_reg_test_info i40e_reg_list[] = {
+	/* offset               mask         elements   stride */
+	{I40E_QTX_CTL(0),       0x0000FFBF, 1,
+		I40E_QTX_CTL(1) - I40E_QTX_CTL(0)},
+	{I40E_PFINT_ITR0(0),    0x00000FFF, 3,
+		I40E_PFINT_ITR0(1) - I40E_PFINT_ITR0(0)},
+	{I40E_PFINT_ITRN(0, 0), 0x00000FFF, 1,
+		I40E_PFINT_ITRN(0, 1) - I40E_PFINT_ITRN(0, 0)},
+	{I40E_PFINT_ITRN(1, 0), 0x00000FFF, 1,
+		I40E_PFINT_ITRN(1, 1) - I40E_PFINT_ITRN(1, 0)},
+	{I40E_PFINT_ITRN(2, 0), 0x00000FFF, 1,
+		I40E_PFINT_ITRN(2, 1) - I40E_PFINT_ITRN(2, 0)},
+	{I40E_PFINT_STAT_CTL0,  0x0000000C, 1, 0},
+	{I40E_PFINT_LNKLST0,    0x00001FFF, 1, 0},
+	{I40E_PFINT_LNKLSTN(0), 0x000007FF, 1,
+		I40E_PFINT_LNKLSTN(1) - I40E_PFINT_LNKLSTN(0)},
+	{I40E_QINT_TQCTL(0),    0x000000FF, 1,
+		I40E_QINT_TQCTL(1) - I40E_QINT_TQCTL(0)},
+	{I40E_QINT_RQCTL(0),    0x000000FF, 1,
+		I40E_QINT_RQCTL(1) - I40E_QINT_RQCTL(0)},
+	{I40E_PFINT_ICR0_ENA,   0xF7F20000, 1, 0},
+	{ 0 }
+};
+
+/**
+ * i40e_diag_reg_test
+ * @hw: pointer to the hw struct
+ *
+ * Perform registers diagnostic test
+ **/
+int i40e_diag_reg_test(struct i40e_hw *hw)
+{
+	int ret_code = 0;
+	u32 reg, mask;
+	u32 elements;
+	u32 i, j;
+
+	for (i = 0; i40e_reg_list[i].offset != 0 &&
+					     !ret_code; i++) {
+
+		elements = i40e_reg_list[i].elements;
+		/* set actual reg range for dynamically allocated resources */
+		if (i40e_reg_list[i].offset == I40E_QTX_CTL(0) &&
+		    hw->func_caps.num_tx_qp != 0)
+			elements = hw->func_caps.num_tx_qp;
+		if ((i40e_reg_list[i].offset == I40E_PFINT_ITRN(0, 0) ||
+		     i40e_reg_list[i].offset == I40E_PFINT_ITRN(1, 0) ||
+		     i40e_reg_list[i].offset == I40E_PFINT_ITRN(2, 0) ||
+		     i40e_reg_list[i].offset == I40E_QINT_TQCTL(0) ||
+		     i40e_reg_list[i].offset == I40E_QINT_RQCTL(0)) &&
+		    hw->func_caps.num_msix_vectors != 0)
+			elements = hw->func_caps.num_msix_vectors - 1;
+
+		/* test register access */
+		mask = i40e_reg_list[i].mask;
+		for (j = 0; j < elements && !ret_code; j++) {
+			reg = i40e_reg_list[i].offset +
+			      (j * i40e_reg_list[i].stride);
+			ret_code = i40e_diag_reg_pattern_test(hw, reg, mask);
+		}
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40e_diag_eeprom_test
+ * @hw: pointer to the hw struct
+ *
+ * Perform EEPROM diagnostic test
+ **/
+int i40e_diag_eeprom_test(struct i40e_hw *hw)
+{
+	int ret_code;
+	u16 reg_val;
+
+	/* read NVM control word and if NVM valid, validate EEPROM checksum*/
+	ret_code = i40e_read_nvm_word(hw, I40E_SR_NVM_CONTROL_WORD, &reg_val);
+	if (!ret_code &&
+	    ((reg_val & I40E_SR_CONTROL_WORD_1_MASK) ==
+	     BIT(I40E_SR_CONTROL_WORD_1_SHIFT)))
+		return i40e_validate_nvm_checksum(hw, NULL);
+	else
+		return -EIO;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
new file mode 100644
index 0000000000..c3ce5f3521
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_DIAG_H_
+#define _I40E_DIAG_H_
+
+#include "i40e_type.h"
+
+enum i40e_lb_mode {
+	I40E_LB_MODE_NONE       = 0x0,
+	I40E_LB_MODE_PHY_LOCAL  = I40E_AQ_LB_PHY_LOCAL,
+	I40E_LB_MODE_PHY_REMOTE = I40E_AQ_LB_PHY_REMOTE,
+	I40E_LB_MODE_MAC_LOCAL  = I40E_AQ_LB_MAC_LOCAL,
+};
+
+struct i40e_diag_reg_test_info {
+	u32 offset;	/* the base register */
+	u32 mask;	/* bits that can be tested */
+	u32 elements;	/* number of elements if array */
+	u32 stride;	/* bytes between each element */
+};
+
+extern const struct i40e_diag_reg_test_info i40e_reg_list[];
+
+int i40e_diag_reg_test(struct i40e_hw *hw);
+int i40e_diag_eeprom_test(struct i40e_hw *hw);
+
+#endif /* _I40E_DIAG_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
new file mode 100644
index 0000000000..bd1321bf7e
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -0,0 +1,5827 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+/* ethtool support for i40e */
+
+#include "i40e.h"
+#include "i40e_diag.h"
+#include "i40e_txrx_common.h"
+
+/* ethtool statistics helpers */
+
+/**
+ * struct i40e_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the I40E_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the i40e_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the i40e_add_stat_string() helper function.
+ **/
+struct i40e_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+/* Helper macro to define an i40e_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
+#define I40E_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+/* Helper macro for defining some statistics directly copied from the netdev
+ * stats structure.
+ */
+#define I40E_NETDEV_STAT(_net_stat) \
+	I40E_STAT(struct rtnl_link_stats64, #_net_stat, _net_stat)
+
+/* Helper macro for defining some statistics related to queues */
+#define I40E_QUEUE_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_ring, _name, _stat)
+
+/* Stats associated with a Tx or Rx ring */
+static const struct i40e_stats i40e_gstrings_queue_stats[] = {
+	I40E_QUEUE_STAT("%s-%u.packets", stats.packets),
+	I40E_QUEUE_STAT("%s-%u.bytes", stats.bytes),
+};
+
+/**
+ * i40e_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pointer: basis for where to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. Used to implement i40e_add_ethtool_stats and
+ * i40e_add_queue_stats. If the pointer is null, data will be zero'd.
+ */
+static void
+i40e_add_one_ethtool_stat(u64 *data, void *pointer,
+			  const struct i40e_stats *stat)
+{
+	char *p;
+
+	if (!pointer) {
+		/* ensure that the ethtool data buffer is zero'd for any stats
+		 * which don't have a valid pointer.
+		 */
+		*data = 0;
+		return;
+	}
+
+	p = (char *)pointer + stat->stat_offset;
+	switch (stat->sizeof_stat) {
+	case sizeof(u64):
+		*data = *((u64 *)p);
+		break;
+	case sizeof(u32):
+		*data = *((u32 *)p);
+		break;
+	case sizeof(u16):
+		*data = *((u16 *)p);
+		break;
+	case sizeof(u8):
+		*data = *((u8 *)p);
+		break;
+	default:
+		WARN_ONCE(1, "unexpected stat size for %s",
+			  stat->stat_string);
+		*data = 0;
+	}
+}
+
+/**
+ * __i40e_add_ethtool_stats - copy stats into the ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location to copy stats from
+ * @stats: array of stats to copy
+ * @size: the size of the stats definition
+ *
+ * Copy the stats defined by the stats array using the pointer as a base into
+ * the data buffer supplied by ethtool. Updates the data pointer to point to
+ * the next empty location for successive calls to __i40e_add_ethtool_stats.
+ * If pointer is null, set the data values to zero and update the pointer to
+ * skip these stats.
+ **/
+static void
+__i40e_add_ethtool_stats(u64 **data, void *pointer,
+			 const struct i40e_stats stats[],
+			 const unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		i40e_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
+}
+
+/**
+ * i40e_add_ethtool_stats - copy stats into ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location where stats are stored
+ * @stats: static const array of stat definitions
+ *
+ * Macro to ease the use of __i40e_add_ethtool_stats by taking a static
+ * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
+ * ensuring that we pass the size associated with the given stats array.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided.
+ **/
+#define i40e_add_ethtool_stats(data, pointer, stats) \
+	__i40e_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
+/**
+ * i40e_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @ring: the ring to copy
+ *
+ * Queue statistics must be copied while protected by
+ * u64_stats_fetch_begin, so we can't directly use i40e_add_ethtool_stats.
+ * Assumes that queue stats are defined in i40e_gstrings_queue_stats. If the
+ * ring pointer is null, zero out the queue stat values and update the data
+ * pointer. Otherwise safely copy the stats from the ring into the supplied
+ * buffer and update the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ **/
+static void
+i40e_add_queue_stats(u64 **data, struct i40e_ring *ring)
+{
+	const unsigned int size = ARRAY_SIZE(i40e_gstrings_queue_stats);
+	const struct i40e_stats *stats = i40e_gstrings_queue_stats;
+	unsigned int start;
+	unsigned int i;
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry. But first, make sure our ring is
+	 * non-null before attempting to access its syncp.
+	 */
+	do {
+		start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp);
+		for (i = 0; i < size; i++) {
+			i40e_add_one_ethtool_stat(&(*data)[i], ring,
+						  &stats[i]);
+		}
+	} while (ring && u64_stats_fetch_retry(&ring->syncp, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	*data += size;
+}
+
+/**
+ * __i40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ **/
+static void __i40e_add_stat_strings(u8 **p, const struct i40e_stats stats[],
+				    const unsigned int size, ...)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		va_list args;
+
+		va_start(args, size);
+		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
+		*p += ETH_GSTRING_LEN;
+		va_end(args);
+	}
+}
+
+/**
+ * i40e_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ **/
+#define i40e_add_stat_strings(p, stats, ...) \
+	__i40e_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
+
+#define I40E_PF_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_pf, _name, _stat)
+#define I40E_VSI_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_vsi, _name, _stat)
+#define I40E_VEB_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_veb, _name, _stat)
+#define I40E_VEB_TC_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_cp_veb_tc_stats, _name, _stat)
+#define I40E_PFC_STAT(_name, _stat) \
+	I40E_STAT(struct i40e_pfc_stats, _name, _stat)
+
+static const struct i40e_stats i40e_gstrings_net_stats[] = {
+	I40E_NETDEV_STAT(rx_packets),
+	I40E_NETDEV_STAT(tx_packets),
+	I40E_NETDEV_STAT(rx_bytes),
+	I40E_NETDEV_STAT(tx_bytes),
+	I40E_NETDEV_STAT(rx_errors),
+	I40E_NETDEV_STAT(tx_errors),
+	I40E_NETDEV_STAT(rx_dropped),
+	I40E_NETDEV_STAT(tx_dropped),
+	I40E_NETDEV_STAT(collisions),
+	I40E_NETDEV_STAT(rx_length_errors),
+	I40E_NETDEV_STAT(rx_crc_errors),
+};
+
+static const struct i40e_stats i40e_gstrings_veb_stats[] = {
+	I40E_VEB_STAT("veb.rx_bytes", stats.rx_bytes),
+	I40E_VEB_STAT("veb.tx_bytes", stats.tx_bytes),
+	I40E_VEB_STAT("veb.rx_unicast", stats.rx_unicast),
+	I40E_VEB_STAT("veb.tx_unicast", stats.tx_unicast),
+	I40E_VEB_STAT("veb.rx_multicast", stats.rx_multicast),
+	I40E_VEB_STAT("veb.tx_multicast", stats.tx_multicast),
+	I40E_VEB_STAT("veb.rx_broadcast", stats.rx_broadcast),
+	I40E_VEB_STAT("veb.tx_broadcast", stats.tx_broadcast),
+	I40E_VEB_STAT("veb.rx_discards", stats.rx_discards),
+	I40E_VEB_STAT("veb.tx_discards", stats.tx_discards),
+	I40E_VEB_STAT("veb.tx_errors", stats.tx_errors),
+	I40E_VEB_STAT("veb.rx_unknown_protocol", stats.rx_unknown_protocol),
+};
+
+struct i40e_cp_veb_tc_stats {
+	u64 tc_rx_packets;
+	u64 tc_rx_bytes;
+	u64 tc_tx_packets;
+	u64 tc_tx_bytes;
+};
+
+static const struct i40e_stats i40e_gstrings_veb_tc_stats[] = {
+	I40E_VEB_TC_STAT("veb.tc_%u_tx_packets", tc_tx_packets),
+	I40E_VEB_TC_STAT("veb.tc_%u_tx_bytes", tc_tx_bytes),
+	I40E_VEB_TC_STAT("veb.tc_%u_rx_packets", tc_rx_packets),
+	I40E_VEB_TC_STAT("veb.tc_%u_rx_bytes", tc_rx_bytes),
+};
+
+static const struct i40e_stats i40e_gstrings_misc_stats[] = {
+	I40E_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
+	I40E_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
+	I40E_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
+	I40E_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	I40E_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
+	I40E_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+	I40E_VSI_STAT("tx_linearize", tx_linearize),
+	I40E_VSI_STAT("tx_force_wb", tx_force_wb),
+	I40E_VSI_STAT("tx_busy", tx_busy),
+	I40E_VSI_STAT("tx_stopped", tx_stopped),
+	I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed),
+	I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+	I40E_VSI_STAT("rx_cache_reuse", rx_page_reuse),
+	I40E_VSI_STAT("rx_cache_alloc", rx_page_alloc),
+	I40E_VSI_STAT("rx_cache_waive", rx_page_waive),
+	I40E_VSI_STAT("rx_cache_busy", rx_page_busy),
+	I40E_VSI_STAT("tx_restart", tx_restart),
+};
+
+/* These PF_STATs might look like duplicates of some NETDEV_STATs,
+ * but they are separate.  This device supports Virtualization, and
+ * as such might have several netdevs supporting VMDq and FCoE going
+ * through a single port.  The NETDEV_STATs are for individual netdevs
+ * seen at the top of the stack, and the PF_STATs are for the physical
+ * function at the bottom of the stack hosting those netdevs.
+ *
+ * The PF_STATs are appended to the netdev stats only when ethtool -S
+ * is queried on the base PF netdev, not on the VMDq or FCoE netdev.
+ */
+static const struct i40e_stats i40e_gstrings_stats[] = {
+	I40E_PF_STAT("port.rx_bytes", stats.eth.rx_bytes),
+	I40E_PF_STAT("port.tx_bytes", stats.eth.tx_bytes),
+	I40E_PF_STAT("port.rx_unicast", stats.eth.rx_unicast),
+	I40E_PF_STAT("port.tx_unicast", stats.eth.tx_unicast),
+	I40E_PF_STAT("port.rx_multicast", stats.eth.rx_multicast),
+	I40E_PF_STAT("port.tx_multicast", stats.eth.tx_multicast),
+	I40E_PF_STAT("port.rx_broadcast", stats.eth.rx_broadcast),
+	I40E_PF_STAT("port.tx_broadcast", stats.eth.tx_broadcast),
+	I40E_PF_STAT("port.tx_errors", stats.eth.tx_errors),
+	I40E_PF_STAT("port.rx_dropped", stats.eth.rx_discards),
+	I40E_PF_STAT("port.tx_dropped_link_down", stats.tx_dropped_link_down),
+	I40E_PF_STAT("port.rx_crc_errors", stats.crc_errors),
+	I40E_PF_STAT("port.illegal_bytes", stats.illegal_bytes),
+	I40E_PF_STAT("port.mac_local_faults", stats.mac_local_faults),
+	I40E_PF_STAT("port.mac_remote_faults", stats.mac_remote_faults),
+	I40E_PF_STAT("port.tx_timeout", tx_timeout_count),
+	I40E_PF_STAT("port.rx_csum_bad", hw_csum_rx_error),
+	I40E_PF_STAT("port.rx_length_errors", stats.rx_length_errors),
+	I40E_PF_STAT("port.link_xon_rx", stats.link_xon_rx),
+	I40E_PF_STAT("port.link_xoff_rx", stats.link_xoff_rx),
+	I40E_PF_STAT("port.link_xon_tx", stats.link_xon_tx),
+	I40E_PF_STAT("port.link_xoff_tx", stats.link_xoff_tx),
+	I40E_PF_STAT("port.rx_size_64", stats.rx_size_64),
+	I40E_PF_STAT("port.rx_size_127", stats.rx_size_127),
+	I40E_PF_STAT("port.rx_size_255", stats.rx_size_255),
+	I40E_PF_STAT("port.rx_size_511", stats.rx_size_511),
+	I40E_PF_STAT("port.rx_size_1023", stats.rx_size_1023),
+	I40E_PF_STAT("port.rx_size_1522", stats.rx_size_1522),
+	I40E_PF_STAT("port.rx_size_big", stats.rx_size_big),
+	I40E_PF_STAT("port.tx_size_64", stats.tx_size_64),
+	I40E_PF_STAT("port.tx_size_127", stats.tx_size_127),
+	I40E_PF_STAT("port.tx_size_255", stats.tx_size_255),
+	I40E_PF_STAT("port.tx_size_511", stats.tx_size_511),
+	I40E_PF_STAT("port.tx_size_1023", stats.tx_size_1023),
+	I40E_PF_STAT("port.tx_size_1522", stats.tx_size_1522),
+	I40E_PF_STAT("port.tx_size_big", stats.tx_size_big),
+	I40E_PF_STAT("port.rx_undersize", stats.rx_undersize),
+	I40E_PF_STAT("port.rx_fragments", stats.rx_fragments),
+	I40E_PF_STAT("port.rx_oversize", stats.rx_oversize),
+	I40E_PF_STAT("port.rx_jabber", stats.rx_jabber),
+	I40E_PF_STAT("port.VF_admin_queue_requests", vf_aq_requests),
+	I40E_PF_STAT("port.arq_overflows", arq_overflows),
+	I40E_PF_STAT("port.tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+	I40E_PF_STAT("port.rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+	I40E_PF_STAT("port.tx_hwtstamp_skipped", tx_hwtstamp_skipped),
+	I40E_PF_STAT("port.fdir_flush_cnt", fd_flush_cnt),
+	I40E_PF_STAT("port.fdir_atr_match", stats.fd_atr_match),
+	I40E_PF_STAT("port.fdir_atr_tunnel_match", stats.fd_atr_tunnel_match),
+	I40E_PF_STAT("port.fdir_atr_status", stats.fd_atr_status),
+	I40E_PF_STAT("port.fdir_sb_match", stats.fd_sb_match),
+	I40E_PF_STAT("port.fdir_sb_status", stats.fd_sb_status),
+
+	/* LPI stats */
+	I40E_PF_STAT("port.tx_lpi_status", stats.tx_lpi_status),
+	I40E_PF_STAT("port.rx_lpi_status", stats.rx_lpi_status),
+	I40E_PF_STAT("port.tx_lpi_count", stats.tx_lpi_count),
+	I40E_PF_STAT("port.rx_lpi_count", stats.rx_lpi_count),
+};
+
+struct i40e_pfc_stats {
+	u64 priority_xon_rx;
+	u64 priority_xoff_rx;
+	u64 priority_xon_tx;
+	u64 priority_xoff_tx;
+	u64 priority_xon_2_xoff;
+};
+
+static const struct i40e_stats i40e_gstrings_pfc_stats[] = {
+	I40E_PFC_STAT("port.tx_priority_%u_xon_tx", priority_xon_tx),
+	I40E_PFC_STAT("port.tx_priority_%u_xoff_tx", priority_xoff_tx),
+	I40E_PFC_STAT("port.rx_priority_%u_xon_rx", priority_xon_rx),
+	I40E_PFC_STAT("port.rx_priority_%u_xoff_rx", priority_xoff_rx),
+	I40E_PFC_STAT("port.rx_priority_%u_xon_2_xoff", priority_xon_2_xoff),
+};
+
+#define I40E_NETDEV_STATS_LEN	ARRAY_SIZE(i40e_gstrings_net_stats)
+
+#define I40E_MISC_STATS_LEN	ARRAY_SIZE(i40e_gstrings_misc_stats)
+
+#define I40E_VSI_STATS_LEN	(I40E_NETDEV_STATS_LEN + I40E_MISC_STATS_LEN)
+
+#define I40E_PFC_STATS_LEN	(ARRAY_SIZE(i40e_gstrings_pfc_stats) * \
+				 I40E_MAX_USER_PRIORITY)
+
+#define I40E_VEB_STATS_LEN	(ARRAY_SIZE(i40e_gstrings_veb_stats) + \
+				 (ARRAY_SIZE(i40e_gstrings_veb_tc_stats) * \
+				  I40E_MAX_TRAFFIC_CLASS))
+
+#define I40E_GLOBAL_STATS_LEN	ARRAY_SIZE(i40e_gstrings_stats)
+
+#define I40E_PF_STATS_LEN	(I40E_GLOBAL_STATS_LEN + \
+				 I40E_PFC_STATS_LEN + \
+				 I40E_VEB_STATS_LEN + \
+				 I40E_VSI_STATS_LEN)
+
+/* Length of stats for a single queue */
+#define I40E_QUEUE_STATS_LEN	ARRAY_SIZE(i40e_gstrings_queue_stats)
+
+enum i40e_ethtool_test_id {
+	I40E_ETH_TEST_REG = 0,
+	I40E_ETH_TEST_EEPROM,
+	I40E_ETH_TEST_INTR,
+	I40E_ETH_TEST_LINK,
+};
+
+static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)",
+	"Eeprom test    (offline)",
+	"Interrupt test (offline)",
+	"Link test   (on/offline)"
+};
+
+#define I40E_TEST_LEN (sizeof(i40e_gstrings_test) / ETH_GSTRING_LEN)
+
+struct i40e_priv_flags {
+	char flag_string[ETH_GSTRING_LEN];
+	u64 flag;
+	bool read_only;
+};
+
+#define I40E_PRIV_FLAG(_name, _flag, _read_only) { \
+	.flag_string = _name, \
+	.flag = _flag, \
+	.read_only = _read_only, \
+}
+
+static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
+	/* NOTE: MFP setting cannot be changed */
+	I40E_PRIV_FLAG("MFP", I40E_FLAG_MFP_ENABLED, 1),
+	I40E_PRIV_FLAG("total-port-shutdown",
+		       I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED, 1),
+	I40E_PRIV_FLAG("LinkPolling", I40E_FLAG_LINK_POLLING_ENABLED, 0),
+	I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0),
+	I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
+	I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
+	I40E_PRIV_FLAG("link-down-on-close",
+		       I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0),
+	I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+	I40E_PRIV_FLAG("disable-source-pruning",
+		       I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
+	I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0),
+	I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0),
+	I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
+	I40E_PRIV_FLAG("vf-vlan-pruning",
+		       I40E_FLAG_VF_VLAN_PRUNING, 0),
+};
+
+#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
+
+/* Private flags with a global effect, restricted to PF 0 */
+static const struct i40e_priv_flags i40e_gl_gstrings_priv_flags[] = {
+	I40E_PRIV_FLAG("vf-true-promisc-support",
+		       I40E_FLAG_TRUE_PROMISC_SUPPORT, 0),
+};
+
+#define I40E_GL_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gl_gstrings_priv_flags)
+
+/**
+ * i40e_partition_setting_complaint - generic complaint for MFP restriction
+ * @pf: the PF struct
+ **/
+static void i40e_partition_setting_complaint(struct i40e_pf *pf)
+{
+	dev_info(&pf->pdev->dev,
+		 "The link settings are allowed to be changed only from the first partition of a given port. Please switch to the first partition in order to change the setting.\n");
+}
+
+/**
+ * i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes
+ * @pf: PF struct with phy_types
+ * @ks: ethtool link ksettings struct to fill out
+ *
+ **/
+static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
+				     struct ethtool_link_ksettings *ks)
+{
+	struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
+	u64 phy_types = pf->hw.phy.phy_types;
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+
+	if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
+		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
+			ethtool_link_ksettings_add_link_mode(ks, supported,
+							     100baseT_Full);
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     100baseT_Full);
+		}
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
+	    phy_types & I40E_CAP_PHY_TYPE_XFI ||
+	    phy_types & I40E_CAP_PHY_TYPE_SFI ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_2_5GBASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_2_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     2500baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_5GBASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     5000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     5000baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
+	    phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     40000baseCR4_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     100baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseSR4_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseLR4_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseKR4_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     20000baseKR2_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     20000baseKR2_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKX4_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseKX4_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
+	    !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseKR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
+	    !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseKX_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseKX_Full);
+	}
+	/* need to add 25G PHY types */
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseKR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseKR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseCR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseSR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     25000baseCR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB) {
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_NONE);
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_RS);
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     FEC_BASER);
+		}
+	}
+	/* need to add new 10G PHY types */
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseCR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseCR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseSR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseLR_Full);
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseX_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseX_Full);
+	}
+	/* Autoneg PHY types */
+	if (phy_types & I40E_CAP_PHY_TYPE_SGMII ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+	    phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+	    phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+	    phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_5GBASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_2_5GBASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+	    phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX ||
+	    phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Autoneg);
+	}
+}
+
+/**
+ * i40e_get_settings_link_up_fec - Get the FEC mode encoding from mask
+ * @req_fec_info: mask request FEC info
+ * @ks: ethtool ksettings to fill in
+ **/
+static void i40e_get_settings_link_up_fec(u8 req_fec_info,
+					  struct ethtool_link_ksettings *ks)
+{
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+
+	if ((I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) &&
+	    (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info)) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_NONE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+	} else if (I40E_AQ_SET_FEC_REQUEST_RS & req_fec_info) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+	} else if (I40E_AQ_SET_FEC_REQUEST_KR & req_fec_info) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+	} else {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_NONE);
+	}
+}
+
+/**
+ * i40e_get_settings_link_up - Get the Link settings for when link is up
+ * @hw: hw structure
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ * @pf: pointer to physical function struct
+ **/
+static void i40e_get_settings_link_up(struct i40e_hw *hw,
+				      struct ethtool_link_ksettings *ks,
+				      struct net_device *netdev,
+				      struct i40e_pf *pf)
+{
+	struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+	struct ethtool_link_ksettings cap_ksettings;
+	u32 link_speed = hw_link_info->link_speed;
+
+	/* Initialize supported and advertised settings based on phy settings */
+	switch (hw_link_info->phy_type) {
+	case I40E_PHY_TYPE_40GBASE_CR4:
+	case I40E_PHY_TYPE_40GBASE_CR4_CU:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseCR4_Full);
+		break;
+	case I40E_PHY_TYPE_XLAUI:
+	case I40E_PHY_TYPE_XLPPI:
+	case I40E_PHY_TYPE_40GBASE_AOC:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseCR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseCR4_Full);
+		break;
+	case I40E_PHY_TYPE_40GBASE_SR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseSR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseSR4_Full);
+		break;
+	case I40E_PHY_TYPE_40GBASE_LR4:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseLR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseLR4_Full);
+		break;
+	case I40E_PHY_TYPE_25GBASE_SR:
+	case I40E_PHY_TYPE_25GBASE_LR:
+	case I40E_PHY_TYPE_10GBASE_SR:
+	case I40E_PHY_TYPE_10GBASE_LR:
+	case I40E_PHY_TYPE_1000BASE_SX:
+	case I40E_PHY_TYPE_1000BASE_LX:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseSR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseLR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseLR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		if (hw_link_info->module_type[2] &
+		    I40E_MODULE_TYPE_1000BASE_SX ||
+		    hw_link_info->module_type[2] &
+		    I40E_MODULE_TYPE_1000BASE_LX) {
+			ethtool_link_ksettings_add_link_mode(ks, supported,
+							     1000baseT_Full);
+			if (hw_link_info->requested_speeds &
+			    I40E_LINK_SPEED_1GB)
+				ethtool_link_ksettings_add_link_mode(
+				     ks, advertising, 1000baseT_Full);
+		}
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
+		break;
+	case I40E_PHY_TYPE_10GBASE_T:
+	case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS:
+	case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS:
+	case I40E_PHY_TYPE_1000BASE_T:
+	case I40E_PHY_TYPE_100BASE_TX:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     5000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     2500baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     100baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     5000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_2_5GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     2500baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     100baseT_Full);
+		break;
+	case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseT_Full);
+		break;
+	case I40E_PHY_TYPE_10GBASE_CR1_CU:
+	case I40E_PHY_TYPE_10GBASE_CR1:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseT_Full);
+		break;
+	case I40E_PHY_TYPE_XAUI:
+	case I40E_PHY_TYPE_XFI:
+	case I40E_PHY_TYPE_SFI:
+	case I40E_PHY_TYPE_10GBASE_SFPP_CU:
+	case I40E_PHY_TYPE_10GBASE_AOC:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     10000baseT_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
+		break;
+	case I40E_PHY_TYPE_SGMII:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseT_Full);
+		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+			ethtool_link_ksettings_add_link_mode(ks, advertising,
+							     1000baseT_Full);
+		if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
+			ethtool_link_ksettings_add_link_mode(ks, supported,
+							     100baseT_Full);
+			if (hw_link_info->requested_speeds &
+			    I40E_LINK_SPEED_100MB)
+				ethtool_link_ksettings_add_link_mode(
+				      ks, advertising, 100baseT_Full);
+		}
+		break;
+	case I40E_PHY_TYPE_40GBASE_KR4:
+	case I40E_PHY_TYPE_25GBASE_KR:
+	case I40E_PHY_TYPE_20GBASE_KR2:
+	case I40E_PHY_TYPE_10GBASE_KR:
+	case I40E_PHY_TYPE_10GBASE_KX4:
+	case I40E_PHY_TYPE_1000BASE_KX:
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     40000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     20000baseKR2_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseKX4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     1000baseKX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     40000baseKR4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseKR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     20000baseKR2_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseKR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseKX4_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     1000baseKX_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		break;
+	case I40E_PHY_TYPE_25GBASE_CR:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseCR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
+
+		break;
+	case I40E_PHY_TYPE_25GBASE_AOC:
+	case I40E_PHY_TYPE_25GBASE_ACC:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     25000baseCR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     25000baseCR_Full);
+		i40e_get_settings_link_up_fec(hw_link_info->req_fec_info, ks);
+
+		ethtool_link_ksettings_add_link_mode(ks, supported,
+						     10000baseCR_Full);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     10000baseCR_Full);
+		break;
+	default:
+		/* if we got here and link is up something bad is afoot */
+		netdev_info(netdev,
+			    "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n",
+			    hw_link_info->phy_type);
+	}
+
+	/* Now that we've worked out everything that could be supported by the
+	 * current PHY type, get what is supported by the NVM and intersect
+	 * them to get what is truly supported
+	 */
+	memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings));
+	i40e_phy_type_to_ethtool(pf, &cap_ksettings);
+	ethtool_intersect_link_masks(ks, &cap_ksettings);
+
+	/* Set speed and duplex */
+	switch (link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		ks->base.speed = SPEED_40000;
+		break;
+	case I40E_LINK_SPEED_25GB:
+		ks->base.speed = SPEED_25000;
+		break;
+	case I40E_LINK_SPEED_20GB:
+		ks->base.speed = SPEED_20000;
+		break;
+	case I40E_LINK_SPEED_10GB:
+		ks->base.speed = SPEED_10000;
+		break;
+	case I40E_LINK_SPEED_5GB:
+		ks->base.speed = SPEED_5000;
+		break;
+	case I40E_LINK_SPEED_2_5GB:
+		ks->base.speed = SPEED_2500;
+		break;
+	case I40E_LINK_SPEED_1GB:
+		ks->base.speed = SPEED_1000;
+		break;
+	case I40E_LINK_SPEED_100MB:
+		ks->base.speed = SPEED_100;
+		break;
+	default:
+		ks->base.speed = SPEED_UNKNOWN;
+		break;
+	}
+	ks->base.duplex = DUPLEX_FULL;
+}
+
+/**
+ * i40e_get_settings_link_down - Get the Link settings for when link is down
+ * @hw: hw structure
+ * @ks: ethtool ksettings to fill in
+ * @pf: pointer to physical function struct
+ *
+ * Reports link settings that can be determined when link is down
+ **/
+static void i40e_get_settings_link_down(struct i40e_hw *hw,
+					struct ethtool_link_ksettings *ks,
+					struct i40e_pf *pf)
+{
+	/* link is down and the driver needs to fall back on
+	 * supported phy types to figure out what info to display
+	 */
+	i40e_phy_type_to_ethtool(pf, ks);
+
+	/* With no link speed and duplex are unknown */
+	ks->base.speed = SPEED_UNKNOWN;
+	ks->base.duplex = DUPLEX_UNKNOWN;
+}
+
+/**
+ * i40e_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Reports speed/duplex settings based on media_type
+ **/
+static int i40e_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *ks)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+	bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+
+	if (link_up)
+		i40e_get_settings_link_up(hw, ks, netdev, pf);
+	else
+		i40e_get_settings_link_down(hw, ks, pf);
+
+	/* Now set the settings that don't rely on link being up/down */
+	/* Set autoneg settings */
+	ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+			    AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	/* Set media type settings */
+	switch (hw->phy.media_type) {
+	case I40E_MEDIA_TYPE_BACKPLANE:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Backplane);
+		ks->base.port = PORT_NONE;
+		break;
+	case I40E_MEDIA_TYPE_BASET:
+		ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+		ks->base.port = PORT_TP;
+		break;
+	case I40E_MEDIA_TYPE_DA:
+	case I40E_MEDIA_TYPE_CX4:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+		ks->base.port = PORT_DA;
+		break;
+	case I40E_MEDIA_TYPE_FIBER:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+		ks->base.port = PORT_FIBRE;
+		break;
+	case I40E_MEDIA_TYPE_UNKNOWN:
+	default:
+		ks->base.port = PORT_OTHER;
+		break;
+	}
+
+	/* Set flow control settings */
+	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+	ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause);
+
+	switch (hw->fc.requested_mode) {
+	case I40E_FC_FULL:
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		break;
+	case I40E_FC_TX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	case I40E_FC_RX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	default:
+		ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, advertising,
+						     Asym_Pause);
+		break;
+	}
+
+	return 0;
+}
+
+#define I40E_LBIT_SIZE 8
+/**
+ * i40e_speed_to_link_speed - Translate decimal speed to i40e_aq_link_speed
+ * @speed: speed in decimal
+ * @ks: ethtool ksettings
+ *
+ * Return i40e_aq_link_speed based on speed
+ **/
+static enum i40e_aq_link_speed
+i40e_speed_to_link_speed(__u32 speed, const struct ethtool_link_ksettings *ks)
+{
+	enum i40e_aq_link_speed link_speed = I40E_LINK_SPEED_UNKNOWN;
+	bool speed_changed = false;
+	int i, j;
+
+	static const struct {
+		__u32 speed;
+		enum i40e_aq_link_speed link_speed;
+		__u8 bit[I40E_LBIT_SIZE];
+	} i40e_speed_lut[] = {
+#define I40E_LBIT(mode) ETHTOOL_LINK_MODE_ ## mode ##_Full_BIT
+		{SPEED_100, I40E_LINK_SPEED_100MB, {I40E_LBIT(100baseT)} },
+		{SPEED_1000, I40E_LINK_SPEED_1GB,
+		 {I40E_LBIT(1000baseT), I40E_LBIT(1000baseX),
+		  I40E_LBIT(1000baseKX)} },
+		{SPEED_10000, I40E_LINK_SPEED_10GB,
+		 {I40E_LBIT(10000baseT), I40E_LBIT(10000baseKR),
+		  I40E_LBIT(10000baseLR), I40E_LBIT(10000baseCR),
+		  I40E_LBIT(10000baseSR), I40E_LBIT(10000baseKX4)} },
+
+		{SPEED_25000, I40E_LINK_SPEED_25GB,
+		 {I40E_LBIT(25000baseCR), I40E_LBIT(25000baseKR),
+		  I40E_LBIT(25000baseSR)} },
+		{SPEED_40000, I40E_LINK_SPEED_40GB,
+		 {I40E_LBIT(40000baseKR4), I40E_LBIT(40000baseCR4),
+		  I40E_LBIT(40000baseSR4), I40E_LBIT(40000baseLR4)} },
+		{SPEED_20000, I40E_LINK_SPEED_20GB,
+		 {I40E_LBIT(20000baseKR2)} },
+		{SPEED_2500, I40E_LINK_SPEED_2_5GB, {I40E_LBIT(2500baseT)} },
+		{SPEED_5000, I40E_LINK_SPEED_5GB, {I40E_LBIT(2500baseT)} }
+#undef I40E_LBIT
+};
+
+	for (i = 0; i < ARRAY_SIZE(i40e_speed_lut); i++) {
+		if (i40e_speed_lut[i].speed == speed) {
+			for (j = 0; j < I40E_LBIT_SIZE; j++) {
+				if (test_bit(i40e_speed_lut[i].bit[j],
+					     ks->link_modes.supported)) {
+					speed_changed = true;
+					break;
+				}
+				if (!i40e_speed_lut[i].bit[j])
+					break;
+			}
+			if (speed_changed) {
+				link_speed = i40e_speed_lut[i].link_speed;
+				break;
+			}
+		}
+	}
+	return link_speed;
+}
+
+#undef I40E_LBIT_SIZE
+
+/**
+ * i40e_set_link_ksettings - Set Speed and Duplex
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Set speed/duplex per media_types advertised/forced
+ **/
+static int i40e_set_link_ksettings(struct net_device *netdev,
+				   const struct ethtool_link_ksettings *ks)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct ethtool_link_ksettings safe_ks;
+	struct ethtool_link_ksettings copy_ks;
+	struct i40e_aq_set_phy_config config;
+	struct i40e_pf *pf = np->vsi->back;
+	enum i40e_aq_link_speed link_speed;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_hw *hw = &pf->hw;
+	bool autoneg_changed = false;
+	int timeout = 50;
+	int status = 0;
+	int err = 0;
+	__u32 speed;
+	u8 autoneg;
+
+	/* Changing port settings is not supported if this isn't the
+	 * port's controlling PF
+	 */
+	if (hw->partition_id != 1) {
+		i40e_partition_setting_complaint(pf);
+		return -EOPNOTSUPP;
+	}
+	if (vsi != pf->vsi[pf->lan_vsi])
+		return -EOPNOTSUPP;
+	if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
+	    hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
+	    hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
+	    hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
+	    hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
+		return -EOPNOTSUPP;
+	if (hw->device_id == I40E_DEV_ID_KX_B ||
+	    hw->device_id == I40E_DEV_ID_KX_C ||
+	    hw->device_id == I40E_DEV_ID_20G_KR2 ||
+	    hw->device_id == I40E_DEV_ID_20G_KR2_A ||
+	    hw->device_id == I40E_DEV_ID_25G_B ||
+	    hw->device_id == I40E_DEV_ID_KX_X722) {
+		netdev_info(netdev, "Changing settings is not supported on backplane.\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* copy the ksettings to copy_ks to avoid modifying the origin */
+	memcpy(&copy_ks, ks, sizeof(struct ethtool_link_ksettings));
+
+	/* save autoneg out of ksettings */
+	autoneg = copy_ks.base.autoneg;
+	speed = copy_ks.base.speed;
+
+	/* get our own copy of the bits to check against */
+	memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+	safe_ks.base.cmd = copy_ks.base.cmd;
+	safe_ks.base.link_mode_masks_nwords =
+		copy_ks.base.link_mode_masks_nwords;
+	i40e_get_link_ksettings(netdev, &safe_ks);
+
+	/* Get link modes supported by hardware and check against modes
+	 * requested by the user.  Return an error if unsupported mode was set.
+	 */
+	if (!bitmap_subset(copy_ks.link_modes.advertising,
+			   safe_ks.link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS))
+		return -EINVAL;
+
+	/* set autoneg back to what it currently is */
+	copy_ks.base.autoneg = safe_ks.base.autoneg;
+	copy_ks.base.speed  = safe_ks.base.speed;
+
+	/* If copy_ks.base and safe_ks.base are not the same now, then they are
+	 * trying to set something that we do not support.
+	 */
+	if (memcmp(&copy_ks.base, &safe_ks.base,
+		   sizeof(struct ethtool_link_settings))) {
+		netdev_err(netdev, "Only speed and autoneg are supported.\n");
+		return -EOPNOTSUPP;
+	}
+
+	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	/* Get the current phy config */
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					      NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	/* Copy abilities to config in case autoneg is not
+	 * set below
+	 */
+	memset(&config, 0, sizeof(struct i40e_aq_set_phy_config));
+	config.abilities = abilities.abilities;
+
+	/* Check autoneg */
+	if (autoneg == AUTONEG_ENABLE) {
+		/* If autoneg was not already enabled */
+		if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
+			/* If autoneg is not supported, return error */
+			if (!ethtool_link_ksettings_test_link_mode(&safe_ks,
+								   supported,
+								   Autoneg)) {
+				netdev_info(netdev, "Autoneg not supported on this phy\n");
+				err = -EINVAL;
+				goto done;
+			}
+			/* Autoneg is allowed to change */
+			config.abilities = abilities.abilities |
+					   I40E_AQ_PHY_ENABLE_AN;
+			autoneg_changed = true;
+		}
+	} else {
+		/* If autoneg is currently enabled */
+		if (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) {
+			/* If autoneg is supported 10GBASE_T is the only PHY
+			 * that can disable it, so otherwise return error
+			 */
+			if (ethtool_link_ksettings_test_link_mode(&safe_ks,
+								  supported,
+								  Autoneg) &&
+			    hw->phy.media_type != I40E_MEDIA_TYPE_BASET) {
+				netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
+				err = -EINVAL;
+				goto done;
+			}
+			/* Autoneg is allowed to change */
+			config.abilities = abilities.abilities &
+					   ~I40E_AQ_PHY_ENABLE_AN;
+			autoneg_changed = true;
+		}
+	}
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100baseT_Full))
+		config.link_speed |= I40E_LINK_SPEED_100MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseX_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseKX_Full))
+		config.link_speed |= I40E_LINK_SPEED_1GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseKX4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseKR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseCR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseSR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseLR_Full))
+		config.link_speed |= I40E_LINK_SPEED_10GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseT_Full))
+		config.link_speed |= I40E_LINK_SPEED_2_5GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  5000baseT_Full))
+		config.link_speed |= I40E_LINK_SPEED_5GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  20000baseKR2_Full))
+		config.link_speed |= I40E_LINK_SPEED_20GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseCR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseKR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseSR_Full))
+		config.link_speed |= I40E_LINK_SPEED_25GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseKR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseLR4_Full))
+		config.link_speed |= I40E_LINK_SPEED_40GB;
+
+	/* Autonegotiation must be disabled to change speed */
+	if ((speed != SPEED_UNKNOWN && safe_ks.base.speed != speed) &&
+	    (autoneg == AUTONEG_DISABLE ||
+	    (safe_ks.base.autoneg == AUTONEG_DISABLE && !autoneg_changed))) {
+		link_speed = i40e_speed_to_link_speed(speed, ks);
+		if (link_speed == I40E_LINK_SPEED_UNKNOWN) {
+			netdev_info(netdev, "Given speed is not supported\n");
+			err = -EOPNOTSUPP;
+			goto done;
+		} else {
+			config.link_speed = link_speed;
+		}
+	} else {
+		if (safe_ks.base.speed != speed) {
+			netdev_info(netdev,
+				    "Unable to set speed, disable autoneg\n");
+			err = -EOPNOTSUPP;
+			goto done;
+		}
+	}
+
+	/* If speed didn't get set, set it to what it currently is.
+	 * This is needed because if advertise is 0 (as it is when autoneg
+	 * is disabled) then speed won't get set.
+	 */
+	if (!config.link_speed)
+		config.link_speed = abilities.link_speed;
+	if (autoneg_changed || abilities.link_speed != config.link_speed) {
+		/* copy over the rest of the abilities */
+		config.phy_type = abilities.phy_type;
+		config.phy_type_ext = abilities.phy_type_ext;
+		config.eee_capability = abilities.eee_capability;
+		config.eeer = abilities.eeer_val;
+		config.low_power_ctrl = abilities.d3_lpan;
+		config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
+				    I40E_AQ_PHY_FEC_CONFIG_MASK;
+
+		/* save the requested speeds */
+		hw->phy.link_info.requested_speeds = config.link_speed;
+		/* set link and auto negotiation so changes take effect */
+		config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
+		/* If link is up put link down */
+		if (hw->phy.link_info.link_info & I40E_AQ_LINK_UP) {
+			/* Tell the OS link is going down, the link will go
+			 * back up when fw says it is ready asynchronously
+			 */
+			i40e_print_link_message(vsi, false);
+			netif_carrier_off(netdev);
+			netif_tx_stop_all_queues(netdev);
+		}
+
+		/* make the aq call */
+		status = i40e_aq_set_phy_config(hw, &config, NULL);
+		if (status) {
+			netdev_info(netdev,
+				    "Set phy config failed, err %pe aq_err %s\n",
+				    ERR_PTR(status),
+				    i40e_aq_str(hw, hw->aq.asq_last_status));
+			err = -EAGAIN;
+			goto done;
+		}
+
+		status = i40e_update_link_info(hw);
+		if (status)
+			netdev_dbg(netdev,
+				   "Updating link info failed with err %pe aq_err %s\n",
+				   ERR_PTR(status),
+				   i40e_aq_str(hw, hw->aq.asq_last_status));
+
+	} else {
+		netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
+	}
+
+done:
+	clear_bit(__I40E_CONFIG_BUSY, pf->state);
+
+	return err;
+}
+
+static int i40e_set_fec_cfg(struct net_device *netdev, u8 fec_cfg)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int status = 0;
+	u32 flags = 0;
+	int err = 0;
+
+	flags = READ_ONCE(pf->flags);
+	i40e_set_fec_in_flags(fec_cfg, &flags);
+
+	/* Get the current phy config */
+	memset(&abilities, 0, sizeof(abilities));
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					      NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	if (abilities.fec_cfg_curr_mod_ext_info != fec_cfg) {
+		struct i40e_aq_set_phy_config config;
+
+		memset(&config, 0, sizeof(config));
+		config.phy_type = abilities.phy_type;
+		config.abilities = abilities.abilities |
+				   I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
+		config.phy_type_ext = abilities.phy_type_ext;
+		config.link_speed = abilities.link_speed;
+		config.eee_capability = abilities.eee_capability;
+		config.eeer = abilities.eeer_val;
+		config.low_power_ctrl = abilities.d3_lpan;
+		config.fec_config = fec_cfg & I40E_AQ_PHY_FEC_CONFIG_MASK;
+		status = i40e_aq_set_phy_config(hw, &config, NULL);
+		if (status) {
+			netdev_info(netdev,
+				    "Set phy config failed, err %pe aq_err %s\n",
+				    ERR_PTR(status),
+				    i40e_aq_str(hw, hw->aq.asq_last_status));
+			err = -EAGAIN;
+			goto done;
+		}
+		pf->flags = flags;
+		status = i40e_update_link_info(hw);
+		if (status)
+			/* debug level message only due to relation to the link
+			 * itself rather than to the FEC settings
+			 * (e.g. no physical connection etc.)
+			 */
+			netdev_dbg(netdev,
+				   "Updating link info failed with err %pe aq_err %s\n",
+				   ERR_PTR(status),
+				   i40e_aq_str(hw, hw->aq.asq_last_status));
+	}
+
+done:
+	return err;
+}
+
+static int i40e_get_fec_param(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int status = 0;
+	int err = 0;
+	u8 fec_cfg;
+
+	/* Get the current phy config */
+	memset(&abilities, 0, sizeof(abilities));
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					      NULL);
+	if (status) {
+		err = -EAGAIN;
+		goto done;
+	}
+
+	fecparam->fec = 0;
+	fec_cfg = abilities.fec_cfg_curr_mod_ext_info;
+	if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
+		fecparam->fec |= ETHTOOL_FEC_AUTO;
+	else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_RS |
+		 I40E_AQ_SET_FEC_ABILITY_RS))
+		fecparam->fec |= ETHTOOL_FEC_RS;
+	else if (fec_cfg & (I40E_AQ_SET_FEC_REQUEST_KR |
+		 I40E_AQ_SET_FEC_ABILITY_KR))
+		fecparam->fec |= ETHTOOL_FEC_BASER;
+	if (fec_cfg == 0)
+		fecparam->fec |= ETHTOOL_FEC_OFF;
+
+	if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA)
+		fecparam->active_fec = ETHTOOL_FEC_BASER;
+	else if (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA)
+		fecparam->active_fec = ETHTOOL_FEC_RS;
+	else
+		fecparam->active_fec = ETHTOOL_FEC_OFF;
+done:
+	return err;
+}
+
+static int i40e_set_fec_param(struct net_device *netdev,
+			      struct ethtool_fecparam *fecparam)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u8 fec_cfg = 0;
+
+	if (hw->device_id != I40E_DEV_ID_25G_SFP28 &&
+	    hw->device_id != I40E_DEV_ID_25G_B &&
+	    hw->device_id != I40E_DEV_ID_KX_X722)
+		return -EPERM;
+
+	if (hw->mac.type == I40E_MAC_X722 &&
+	    !(hw->flags & I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE)) {
+		netdev_err(netdev, "Setting FEC encoding not supported by firmware. Please update the NVM image.\n");
+		return -EOPNOTSUPP;
+	}
+
+	switch (fecparam->fec) {
+	case ETHTOOL_FEC_AUTO:
+		fec_cfg = I40E_AQ_SET_FEC_AUTO;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS |
+			     I40E_AQ_SET_FEC_ABILITY_RS);
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR |
+			     I40E_AQ_SET_FEC_ABILITY_KR);
+		break;
+	case ETHTOOL_FEC_OFF:
+	case ETHTOOL_FEC_NONE:
+		fec_cfg = 0;
+		break;
+	default:
+		dev_warn(&pf->pdev->dev, "Unsupported FEC mode: %d",
+			 fecparam->fec);
+		return -EINVAL;
+	}
+
+	return i40e_set_fec_cfg(netdev, fec_cfg);
+}
+
+static int i40e_nway_reset(struct net_device *netdev)
+{
+	/* restart autonegotiation */
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	bool link_up = hw->phy.link_info.link_info & I40E_AQ_LINK_UP;
+	int ret = 0;
+
+	ret = i40e_aq_set_link_restart_an(hw, link_up, NULL);
+	if (ret) {
+		netdev_info(netdev, "link restart failed, err %pe aq_err %s\n",
+			    ERR_PTR(ret),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_get_pauseparam -  Get Flow Control status
+ * @netdev: netdevice structure
+ * @pause: buffer to return pause parameters
+ *
+ * Return tx/rx-pause status
+ **/
+static void i40e_get_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+	struct i40e_dcbx_config *dcbx_cfg = &hw->local_dcbx_config;
+
+	pause->autoneg =
+		((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+		  AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	/* PFC enabled so report LFC as off */
+	if (dcbx_cfg->pfc.pfcenable) {
+		pause->rx_pause = 0;
+		pause->tx_pause = 0;
+		return;
+	}
+
+	if (hw->fc.current_mode == I40E_FC_RX_PAUSE) {
+		pause->rx_pause = 1;
+	} else if (hw->fc.current_mode == I40E_FC_TX_PAUSE) {
+		pause->tx_pause = 1;
+	} else if (hw->fc.current_mode == I40E_FC_FULL) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+/**
+ * i40e_set_pauseparam - Set Flow Control parameter
+ * @netdev: network interface device structure
+ * @pause: return tx/rx flow control status
+ **/
+static int i40e_set_pauseparam(struct net_device *netdev,
+			       struct ethtool_pauseparam *pause)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+	struct i40e_dcbx_config *dcbx_cfg = &hw->local_dcbx_config;
+	bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
+	u8 aq_failures;
+	int err = 0;
+	int status;
+	u32 is_an;
+
+	/* Changing the port's flow control is not supported if this isn't the
+	 * port's controlling PF
+	 */
+	if (hw->partition_id != 1) {
+		i40e_partition_setting_complaint(pf);
+		return -EOPNOTSUPP;
+	}
+
+	if (vsi != pf->vsi[pf->lan_vsi])
+		return -EOPNOTSUPP;
+
+	is_an = hw_link_info->an_info & I40E_AQ_AN_COMPLETED;
+	if (pause->autoneg != is_an) {
+		netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If we have link and don't have autoneg */
+	if (!test_bit(__I40E_DOWN, pf->state) && !is_an) {
+		/* Send message that it might not necessarily work*/
+		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
+	}
+
+	if (dcbx_cfg->pfc.pfcenable) {
+		netdev_info(netdev,
+			    "Priority flow control enabled. Cannot set link flow control.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (pause->rx_pause && pause->tx_pause)
+		hw->fc.requested_mode = I40E_FC_FULL;
+	else if (pause->rx_pause && !pause->tx_pause)
+		hw->fc.requested_mode = I40E_FC_RX_PAUSE;
+	else if (!pause->rx_pause && pause->tx_pause)
+		hw->fc.requested_mode = I40E_FC_TX_PAUSE;
+	else if (!pause->rx_pause && !pause->tx_pause)
+		hw->fc.requested_mode = I40E_FC_NONE;
+	else
+		return -EINVAL;
+
+	/* Tell the OS link is going down, the link will go back up when fw
+	 * says it is ready asynchronously
+	 */
+	i40e_print_link_message(vsi, false);
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	/* Set the fc mode and only restart an if link is up*/
+	status = i40e_set_fc(hw, &aq_failures, link_up);
+
+	if (aq_failures & I40E_SET_FC_AQ_FAIL_GET) {
+		netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %pe aq_err %s\n",
+			    ERR_PTR(status),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		err = -EAGAIN;
+	}
+	if (aq_failures & I40E_SET_FC_AQ_FAIL_SET) {
+		netdev_info(netdev, "Set fc failed on the set_phy_config call with err %pe aq_err %s\n",
+			    ERR_PTR(status),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		err = -EAGAIN;
+	}
+	if (aq_failures & I40E_SET_FC_AQ_FAIL_UPDATE) {
+		netdev_info(netdev, "Set fc failed on the get_link_info call with err %pe aq_err %s\n",
+			    ERR_PTR(status),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		err = -EAGAIN;
+	}
+
+	if (!test_bit(__I40E_DOWN, pf->state) && is_an) {
+		/* Give it a little more time to try to come back */
+		msleep(75);
+		if (!test_bit(__I40E_DOWN, pf->state))
+			return i40e_nway_reset(netdev);
+	}
+
+	return err;
+}
+
+static u32 i40e_get_msglevel(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	u32 debug_mask = pf->hw.debug_mask;
+
+	if (debug_mask)
+		netdev_info(netdev, "i40e debug_mask: 0x%08X\n", debug_mask);
+
+	return pf->msg_enable;
+}
+
+static void i40e_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+
+	if (I40E_DEBUG_USER & data)
+		pf->hw.debug_mask = data;
+	else
+		pf->msg_enable = data;
+}
+
+static int i40e_get_regs_len(struct net_device *netdev)
+{
+	int reg_count = 0;
+	int i;
+
+	for (i = 0; i40e_reg_list[i].offset != 0; i++)
+		reg_count += i40e_reg_list[i].elements;
+
+	return reg_count * sizeof(u32);
+}
+
+static void i40e_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+			  void *p)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u32 *reg_buf = p;
+	unsigned int i, j, ri;
+	u32 reg;
+
+	/* Tell ethtool which driver-version-specific regs output we have.
+	 *
+	 * At some point, if we have ethtool doing special formatting of
+	 * this data, it will rely on this version number to know how to
+	 * interpret things.  Hence, this needs to be updated if/when the
+	 * diags register table is changed.
+	 */
+	regs->version = 1;
+
+	/* loop through the diags reg table for what to print */
+	ri = 0;
+	for (i = 0; i40e_reg_list[i].offset != 0; i++) {
+		for (j = 0; j < i40e_reg_list[i].elements; j++) {
+			reg = i40e_reg_list[i].offset
+				+ (j * i40e_reg_list[i].stride);
+			reg_buf[ri++] = rd32(hw, reg);
+		}
+	}
+
+}
+
+static int i40e_get_eeprom(struct net_device *netdev,
+			   struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	struct i40e_pf *pf = np->vsi->back;
+	int ret_val = 0, len, offset;
+	u8 *eeprom_buff;
+	u16 i, sectors;
+	bool last;
+	u32 magic;
+
+#define I40E_NVM_SECTOR_SIZE  4096
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	/* check for NVMUpdate access method */
+	magic = hw->vendor_id | (hw->device_id << 16);
+	if (eeprom->magic && eeprom->magic != magic) {
+		struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom;
+		int errno = 0;
+
+		/* make sure it is the right magic for NVMUpdate */
+		if ((eeprom->magic >> 16) != hw->device_id)
+			errno = -EINVAL;
+		else if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+			 test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+			errno = -EBUSY;
+		else
+			ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno);
+
+		if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM))
+			dev_info(&pf->pdev->dev,
+				 "NVMUpdate read failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n",
+				 ret_val, hw->aq.asq_last_status, errno,
+				 (u8)(cmd->config & I40E_NVM_MOD_PNT_MASK),
+				 cmd->offset, cmd->data_size);
+
+		return errno;
+	}
+
+	/* normal ethtool get_eeprom support */
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	eeprom_buff = kzalloc(eeprom->len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ret_val = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret_val) {
+		dev_info(&pf->pdev->dev,
+			 "Failed Acquiring NVM resource for read err=%d status=0x%x\n",
+			 ret_val, hw->aq.asq_last_status);
+		goto free_buff;
+	}
+
+	sectors = eeprom->len / I40E_NVM_SECTOR_SIZE;
+	sectors += (eeprom->len % I40E_NVM_SECTOR_SIZE) ? 1 : 0;
+	len = I40E_NVM_SECTOR_SIZE;
+	last = false;
+	for (i = 0; i < sectors; i++) {
+		if (i == (sectors - 1)) {
+			len = eeprom->len - (I40E_NVM_SECTOR_SIZE * i);
+			last = true;
+		}
+		offset = eeprom->offset + (I40E_NVM_SECTOR_SIZE * i),
+		ret_val = i40e_aq_read_nvm(hw, 0x0, offset, len,
+				(u8 *)eeprom_buff + (I40E_NVM_SECTOR_SIZE * i),
+				last, NULL);
+		if (ret_val && hw->aq.asq_last_status == I40E_AQ_RC_EPERM) {
+			dev_info(&pf->pdev->dev,
+				 "read NVM failed, invalid offset 0x%x\n",
+				 offset);
+			break;
+		} else if (ret_val &&
+			   hw->aq.asq_last_status == I40E_AQ_RC_EACCES) {
+			dev_info(&pf->pdev->dev,
+				 "read NVM failed, access, offset 0x%x\n",
+				 offset);
+			break;
+		} else if (ret_val) {
+			dev_info(&pf->pdev->dev,
+				 "read NVM failed offset %d err=%d status=0x%x\n",
+				 offset, ret_val, hw->aq.asq_last_status);
+			break;
+		}
+	}
+
+	i40e_release_nvm(hw);
+	memcpy(bytes, (u8 *)eeprom_buff, eeprom->len);
+free_buff:
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static int i40e_get_eeprom_len(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	u32 val;
+
+#define X722_EEPROM_SCOPE_LIMIT 0x5B9FFF
+	if (hw->mac.type == I40E_MAC_X722) {
+		val = X722_EEPROM_SCOPE_LIMIT + 1;
+		return val;
+	}
+	val = (rd32(hw, I40E_GLPCI_LBARCTRL)
+		& I40E_GLPCI_LBARCTRL_FL_SIZE_MASK)
+		>> I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT;
+	/* register returns value in power of 2, 64Kbyte chunks. */
+	val = (64 * 1024) * BIT(val);
+	return val;
+}
+
+static int i40e_set_eeprom(struct net_device *netdev,
+			   struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_nvm_access *cmd = (struct i40e_nvm_access *)eeprom;
+	int ret_val = 0;
+	int errno = 0;
+	u32 magic;
+
+	/* normal ethtool set_eeprom is not supported */
+	magic = hw->vendor_id | (hw->device_id << 16);
+	if (eeprom->magic == magic)
+		errno = -EOPNOTSUPP;
+	/* check for NVMUpdate access method */
+	else if (!eeprom->magic || (eeprom->magic >> 16) != hw->device_id)
+		errno = -EINVAL;
+	else if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+		 test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+		errno = -EBUSY;
+	else
+		ret_val = i40e_nvmupd_command(hw, cmd, bytes, &errno);
+
+	if ((errno || ret_val) && (hw->debug_mask & I40E_DEBUG_NVM))
+		dev_info(&pf->pdev->dev,
+			 "NVMUpdate write failed err=%d status=0x%x errno=%d module=%d offset=0x%x size=%d\n",
+			 ret_val, hw->aq.asq_last_status, errno,
+			 (u8)(cmd->config & I40E_NVM_MOD_PNT_MASK),
+			 cmd->offset, cmd->data_size);
+
+	return errno;
+}
+
+static void i40e_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+
+	strscpy(drvinfo->driver, i40e_driver_name, sizeof(drvinfo->driver));
+	strscpy(drvinfo->fw_version, i40e_nvm_version_str(&pf->hw),
+		sizeof(drvinfo->fw_version));
+	strscpy(drvinfo->bus_info, pci_name(pf->pdev),
+		sizeof(drvinfo->bus_info));
+	drvinfo->n_priv_flags = I40E_PRIV_FLAGS_STR_LEN;
+	if (pf->hw.pf_id == 0)
+		drvinfo->n_priv_flags += I40E_GL_PRIV_FLAGS_STR_LEN;
+}
+
+static void i40e_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+
+	ring->rx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
+	ring->tx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_pending = vsi->rx_rings[0]->count;
+	ring->tx_pending = vsi->tx_rings[0]->count;
+	ring->rx_mini_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static bool i40e_active_tx_ring_index(struct i40e_vsi *vsi, u16 index)
+{
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		return index < vsi->num_queue_pairs ||
+			(index >= vsi->alloc_queue_pairs &&
+			 index < vsi->alloc_queue_pairs + vsi->num_queue_pairs);
+	}
+
+	return index < vsi->num_queue_pairs;
+}
+
+static int i40e_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring,
+			      struct kernel_ethtool_ringparam *kernel_ring,
+			      struct netlink_ext_ack *extack)
+{
+	struct i40e_ring *tx_rings = NULL, *rx_rings = NULL;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	u32 new_rx_count, new_tx_count;
+	u16 tx_alloc_queue_pairs;
+	int timeout = 50;
+	int i, err = 0;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	if (ring->tx_pending > I40E_MAX_NUM_DESCRIPTORS ||
+	    ring->tx_pending < I40E_MIN_NUM_DESCRIPTORS ||
+	    ring->rx_pending > I40E_MAX_NUM_DESCRIPTORS ||
+	    ring->rx_pending < I40E_MIN_NUM_DESCRIPTORS) {
+		netdev_info(netdev,
+			    "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d]\n",
+			    ring->tx_pending, ring->rx_pending,
+			    I40E_MIN_NUM_DESCRIPTORS, I40E_MAX_NUM_DESCRIPTORS);
+		return -EINVAL;
+	}
+
+	new_tx_count = ALIGN(ring->tx_pending, I40E_REQ_DESCRIPTOR_MULTIPLE);
+	new_rx_count = ALIGN(ring->rx_pending, I40E_REQ_DESCRIPTOR_MULTIPLE);
+
+	/* if nothing to do return success */
+	if ((new_tx_count == vsi->tx_rings[0]->count) &&
+	    (new_rx_count == vsi->rx_rings[0]->count))
+		return 0;
+
+	/* If there is a AF_XDP page pool attached to any of Rx rings,
+	 * disallow changing the number of descriptors -- regardless
+	 * if the netdev is running or not.
+	 */
+	if (i40e_xsk_any_rx_ring_enabled(vsi))
+		return -EBUSY;
+
+	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	if (!netif_running(vsi->netdev)) {
+		/* simple case - set for the next time the netdev is started */
+		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			vsi->tx_rings[i]->count = new_tx_count;
+			vsi->rx_rings[i]->count = new_rx_count;
+			if (i40e_enabled_xdp_vsi(vsi))
+				vsi->xdp_rings[i]->count = new_tx_count;
+		}
+		vsi->num_tx_desc = new_tx_count;
+		vsi->num_rx_desc = new_rx_count;
+		goto done;
+	}
+
+	/* We can't just free everything and then setup again,
+	 * because the ISRs in MSI-X mode get passed pointers
+	 * to the Tx and Rx ring structs.
+	 */
+
+	/* alloc updated Tx and XDP Tx resources */
+	tx_alloc_queue_pairs = vsi->alloc_queue_pairs *
+			       (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
+	if (new_tx_count != vsi->tx_rings[0]->count) {
+		netdev_info(netdev,
+			    "Changing Tx descriptor count from %d to %d.\n",
+			    vsi->tx_rings[0]->count, new_tx_count);
+		tx_rings = kcalloc(tx_alloc_queue_pairs,
+				   sizeof(struct i40e_ring), GFP_KERNEL);
+		if (!tx_rings) {
+			err = -ENOMEM;
+			goto done;
+		}
+
+		for (i = 0; i < tx_alloc_queue_pairs; i++) {
+			if (!i40e_active_tx_ring_index(vsi, i))
+				continue;
+
+			tx_rings[i] = *vsi->tx_rings[i];
+			tx_rings[i].count = new_tx_count;
+			/* the desc and bi pointers will be reallocated in the
+			 * setup call
+			 */
+			tx_rings[i].desc = NULL;
+			tx_rings[i].rx_bi = NULL;
+			err = i40e_setup_tx_descriptors(&tx_rings[i]);
+			if (err) {
+				while (i) {
+					i--;
+					if (!i40e_active_tx_ring_index(vsi, i))
+						continue;
+					i40e_free_tx_resources(&tx_rings[i]);
+				}
+				kfree(tx_rings);
+				tx_rings = NULL;
+
+				goto done;
+			}
+		}
+	}
+
+	/* alloc updated Rx resources */
+	if (new_rx_count != vsi->rx_rings[0]->count) {
+		netdev_info(netdev,
+			    "Changing Rx descriptor count from %d to %d\n",
+			    vsi->rx_rings[0]->count, new_rx_count);
+		rx_rings = kcalloc(vsi->alloc_queue_pairs,
+				   sizeof(struct i40e_ring), GFP_KERNEL);
+		if (!rx_rings) {
+			err = -ENOMEM;
+			goto free_tx;
+		}
+
+		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			u16 unused;
+
+			/* clone ring and setup updated count */
+			rx_rings[i] = *vsi->rx_rings[i];
+			rx_rings[i].count = new_rx_count;
+			/* the desc and bi pointers will be reallocated in the
+			 * setup call
+			 */
+			rx_rings[i].desc = NULL;
+			rx_rings[i].rx_bi = NULL;
+			/* Clear cloned XDP RX-queue info before setup call */
+			memset(&rx_rings[i].xdp_rxq, 0, sizeof(rx_rings[i].xdp_rxq));
+			/* this is to allow wr32 to have something to write to
+			 * during early allocation of Rx buffers
+			 */
+			rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
+			err = i40e_setup_rx_descriptors(&rx_rings[i]);
+			if (err)
+				goto rx_unwind;
+
+			/* now allocate the Rx buffers to make sure the OS
+			 * has enough memory, any failure here means abort
+			 */
+			unused = I40E_DESC_UNUSED(&rx_rings[i]);
+			err = i40e_alloc_rx_buffers(&rx_rings[i], unused);
+rx_unwind:
+			if (err) {
+				do {
+					i40e_free_rx_resources(&rx_rings[i]);
+				} while (i--);
+				kfree(rx_rings);
+				rx_rings = NULL;
+
+				goto free_tx;
+			}
+		}
+	}
+
+	/* Bring interface down, copy in the new ring info,
+	 * then restore the interface
+	 */
+	i40e_down(vsi);
+
+	if (tx_rings) {
+		for (i = 0; i < tx_alloc_queue_pairs; i++) {
+			if (i40e_active_tx_ring_index(vsi, i)) {
+				i40e_free_tx_resources(vsi->tx_rings[i]);
+				*vsi->tx_rings[i] = tx_rings[i];
+			}
+		}
+		kfree(tx_rings);
+		tx_rings = NULL;
+	}
+
+	if (rx_rings) {
+		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			i40e_free_rx_resources(vsi->rx_rings[i]);
+			/* get the real tail offset */
+			rx_rings[i].tail = vsi->rx_rings[i]->tail;
+			/* this is to fake out the allocation routine
+			 * into thinking it has to realloc everything
+			 * but the recycling logic will let us re-use
+			 * the buffers allocated above
+			 */
+			rx_rings[i].next_to_use = 0;
+			rx_rings[i].next_to_clean = 0;
+			rx_rings[i].next_to_alloc = 0;
+			/* do a struct copy */
+			*vsi->rx_rings[i] = rx_rings[i];
+		}
+		kfree(rx_rings);
+		rx_rings = NULL;
+	}
+
+	vsi->num_tx_desc = new_tx_count;
+	vsi->num_rx_desc = new_rx_count;
+	i40e_up(vsi);
+
+free_tx:
+	/* error cleanup if the Rx allocations failed after getting Tx */
+	if (tx_rings) {
+		for (i = 0; i < tx_alloc_queue_pairs; i++) {
+			if (i40e_active_tx_ring_index(vsi, i))
+				i40e_free_tx_resources(vsi->tx_rings[i]);
+		}
+		kfree(tx_rings);
+		tx_rings = NULL;
+	}
+
+done:
+	clear_bit(__I40E_CONFIG_BUSY, pf->state);
+
+	return err;
+}
+
+/**
+ * i40e_get_stats_count - return the stats count for a device
+ * @netdev: the netdev to return the count for
+ *
+ * Returns the total number of statistics for this netdev. Note that even
+ * though this is a function, it is required that the count for a specific
+ * netdev must never change. Basing the count on static values such as the
+ * maximum number of queues or the device type is ok. However, the API for
+ * obtaining stats is *not* safe against changes based on non-static
+ * values such as the *current* number of queues, or runtime flags.
+ *
+ * If a statistic is not always enabled, return it as part of the count
+ * anyways, always return its string, and report its value as zero.
+ **/
+static int i40e_get_stats_count(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int stats_len;
+
+	if (vsi == pf->vsi[pf->lan_vsi] && pf->hw.partition_id == 1)
+		stats_len = I40E_PF_STATS_LEN;
+	else
+		stats_len = I40E_VSI_STATS_LEN;
+
+	/* The number of stats reported for a given net_device must remain
+	 * constant throughout the life of that device.
+	 *
+	 * This is because the API for obtaining the size, strings, and stats
+	 * is spread out over three separate ethtool ioctls. There is no safe
+	 * way to lock the number of stats across these calls, so we must
+	 * assume that they will never change.
+	 *
+	 * Due to this, we report the maximum number of queues, even if not
+	 * every queue is currently configured. Since we always allocate
+	 * queues in pairs, we'll just use netdev->num_tx_queues * 2. This
+	 * works because the num_tx_queues is set at device creation and never
+	 * changes.
+	 */
+	stats_len += I40E_QUEUE_STATS_LEN * 2 * netdev->num_tx_queues;
+
+	return stats_len;
+}
+
+static int i40e_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+
+	switch (sset) {
+	case ETH_SS_TEST:
+		return I40E_TEST_LEN;
+	case ETH_SS_STATS:
+		return i40e_get_stats_count(netdev);
+	case ETH_SS_PRIV_FLAGS:
+		return I40E_PRIV_FLAGS_STR_LEN +
+			(pf->hw.pf_id == 0 ? I40E_GL_PRIV_FLAGS_STR_LEN : 0);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40e_get_veb_tc_stats - copy VEB TC statistics to formatted structure
+ * @tc: the TC statistics in VEB structure (veb->tc_stats)
+ * @i: the index of traffic class in (veb->tc_stats) structure to copy
+ *
+ * Copy VEB TC statistics from structure of arrays (veb->tc_stats) to
+ * one dimensional structure i40e_cp_veb_tc_stats.
+ * Produce formatted i40e_cp_veb_tc_stats structure of the VEB TC
+ * statistics for the given TC.
+ **/
+static struct i40e_cp_veb_tc_stats
+i40e_get_veb_tc_stats(struct i40e_veb_tc_stats *tc, unsigned int i)
+{
+	struct i40e_cp_veb_tc_stats veb_tc = {
+		.tc_rx_packets = tc->tc_rx_packets[i],
+		.tc_rx_bytes = tc->tc_rx_bytes[i],
+		.tc_tx_packets = tc->tc_tx_packets[i],
+		.tc_tx_bytes = tc->tc_tx_bytes[i],
+	};
+
+	return veb_tc;
+}
+
+/**
+ * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure
+ * @pf: the PF device structure
+ * @i: the priority value to copy
+ *
+ * The PFC stats are found as arrays in pf->stats, which is not easy to pass
+ * into i40e_add_ethtool_stats. Produce a formatted i40e_pfc_stats structure
+ * of the PFC stats for the given priority.
+ **/
+static inline struct i40e_pfc_stats
+i40e_get_pfc_stats(struct i40e_pf *pf, unsigned int i)
+{
+#define I40E_GET_PFC_STAT(stat, priority) \
+	.stat = pf->stats.stat[priority]
+
+	struct i40e_pfc_stats pfc = {
+		I40E_GET_PFC_STAT(priority_xon_rx, i),
+		I40E_GET_PFC_STAT(priority_xoff_rx, i),
+		I40E_GET_PFC_STAT(priority_xon_tx, i),
+		I40E_GET_PFC_STAT(priority_xoff_tx, i),
+		I40E_GET_PFC_STAT(priority_xon_2_xoff, i),
+	};
+	return pfc;
+}
+
+/**
+ * i40e_get_ethtool_stats - copy stat values into supplied buffer
+ * @netdev: the netdev to collect stats for
+ * @stats: ethtool stats command structure
+ * @data: ethtool supplied buffer
+ *
+ * Copy the stats values for this netdev into the buffer. Expects data to be
+ * pre-allocated to the size returned by i40e_get_stats_count.. Note that all
+ * statistics must be copied in a static order, and the count must not change
+ * for a given netdev. See i40e_get_stats_count for more details.
+ *
+ * If a statistic is not currently valid (such as a disabled queue), this
+ * function reports its value as zero.
+ **/
+static void i40e_get_ethtool_stats(struct net_device *netdev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_veb *veb = NULL;
+	unsigned int i;
+	bool veb_stats;
+	u64 *p = data;
+
+	i40e_update_stats(vsi);
+
+	i40e_add_ethtool_stats(&data, i40e_get_vsi_stats_struct(vsi),
+			       i40e_gstrings_net_stats);
+
+	i40e_add_ethtool_stats(&data, vsi, i40e_gstrings_misc_stats);
+
+	rcu_read_lock();
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		i40e_add_queue_stats(&data, READ_ONCE(vsi->tx_rings[i]));
+		i40e_add_queue_stats(&data, READ_ONCE(vsi->rx_rings[i]));
+	}
+	rcu_read_unlock();
+
+	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
+		goto check_data_pointer;
+
+	veb_stats = ((pf->lan_veb != I40E_NO_VEB) &&
+		     (pf->lan_veb < I40E_MAX_VEB) &&
+		     (pf->flags & I40E_FLAG_VEB_STATS_ENABLED));
+
+	if (veb_stats) {
+		veb = pf->veb[pf->lan_veb];
+		i40e_update_veb_stats(veb);
+	}
+
+	/* If veb stats aren't enabled, pass NULL instead of the veb so that
+	 * we initialize stats to zero and update the data pointer
+	 * intelligently
+	 */
+	i40e_add_ethtool_stats(&data, veb_stats ? veb : NULL,
+			       i40e_gstrings_veb_stats);
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		if (veb_stats) {
+			struct i40e_cp_veb_tc_stats veb_tc =
+				i40e_get_veb_tc_stats(&veb->tc_stats, i);
+
+			i40e_add_ethtool_stats(&data, &veb_tc,
+					       i40e_gstrings_veb_tc_stats);
+		} else {
+			i40e_add_ethtool_stats(&data, NULL,
+					       i40e_gstrings_veb_tc_stats);
+		}
+
+	i40e_add_ethtool_stats(&data, pf, i40e_gstrings_stats);
+
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		struct i40e_pfc_stats pfc = i40e_get_pfc_stats(pf, i);
+
+		i40e_add_ethtool_stats(&data, &pfc, i40e_gstrings_pfc_stats);
+	}
+
+check_data_pointer:
+	WARN_ONCE(data - p != i40e_get_stats_count(netdev),
+		  "ethtool stats count mismatch!");
+}
+
+/**
+ * i40e_get_stat_strings - copy stat strings into supplied buffer
+ * @netdev: the netdev to collect strings for
+ * @data: supplied buffer to copy strings into
+ *
+ * Copy the strings related to stats for this netdev. Expects data to be
+ * pre-allocated with the size reported by i40e_get_stats_count. Note that the
+ * strings must be copied in a static order and the total count must not
+ * change for a given netdev. See i40e_get_stats_count for more details.
+ **/
+static void i40e_get_stat_strings(struct net_device *netdev, u8 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	unsigned int i;
+	u8 *p = data;
+
+	i40e_add_stat_strings(&data, i40e_gstrings_net_stats);
+
+	i40e_add_stat_strings(&data, i40e_gstrings_misc_stats);
+
+	for (i = 0; i < netdev->num_tx_queues; i++) {
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "tx", i);
+		i40e_add_stat_strings(&data, i40e_gstrings_queue_stats,
+				      "rx", i);
+	}
+
+	if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
+		goto check_data_pointer;
+
+	i40e_add_stat_strings(&data, i40e_gstrings_veb_stats);
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		i40e_add_stat_strings(&data, i40e_gstrings_veb_tc_stats, i);
+
+	i40e_add_stat_strings(&data, i40e_gstrings_stats);
+
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+		i40e_add_stat_strings(&data, i40e_gstrings_pfc_stats, i);
+
+check_data_pointer:
+	WARN_ONCE(data - p != i40e_get_stats_count(netdev) * ETH_GSTRING_LEN,
+		  "stat strings count mismatch!");
+}
+
+static void i40e_get_priv_flag_strings(struct net_device *netdev, u8 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	unsigned int i;
+	u8 *p = data;
+
+	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++)
+		ethtool_sprintf(&p, i40e_gstrings_priv_flags[i].flag_string);
+	if (pf->hw.pf_id != 0)
+		return;
+	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++)
+		ethtool_sprintf(&p, i40e_gl_gstrings_priv_flags[i].flag_string);
+}
+
+static void i40e_get_strings(struct net_device *netdev, u32 stringset,
+			     u8 *data)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, i40e_gstrings_test,
+		       I40E_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_STATS:
+		i40e_get_stat_strings(netdev, data);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		i40e_get_priv_flag_strings(netdev, data);
+		break;
+	default:
+		break;
+	}
+}
+
+static int i40e_get_ts_info(struct net_device *dev,
+			    struct ethtool_ts_info *info)
+{
+	struct i40e_pf *pf = i40e_netdev_to_pf(dev);
+
+	/* only report HW timestamping if PTP is enabled */
+	if (!(pf->flags & I40E_FLAG_PTP))
+		return ethtool_op_get_ts_info(dev, info);
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	if (pf->ptp_clock)
+		info->phc_index = ptp_clock_index(pf->ptp_clock);
+	else
+		info->phc_index = -1;
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ);
+
+	if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE)
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+				    BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ);
+
+	return 0;
+}
+
+static u64 i40e_link_test(struct net_device *netdev, u64 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	bool link_up = false;
+	int status;
+
+	netif_info(pf, hw, netdev, "link test\n");
+	status = i40e_get_link_status(&pf->hw, &link_up);
+	if (status) {
+		netif_err(pf, drv, netdev, "link query timed out, please retry test\n");
+		*data = 1;
+		return *data;
+	}
+
+	if (link_up)
+		*data = 0;
+	else
+		*data = 1;
+
+	return *data;
+}
+
+static u64 i40e_reg_test(struct net_device *netdev, u64 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+
+	netif_info(pf, hw, netdev, "register test\n");
+	*data = i40e_diag_reg_test(&pf->hw);
+
+	return *data;
+}
+
+static u64 i40e_eeprom_test(struct net_device *netdev, u64 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+
+	netif_info(pf, hw, netdev, "eeprom test\n");
+	*data = i40e_diag_eeprom_test(&pf->hw);
+
+	/* forcebly clear the NVM Update state machine */
+	pf->hw.nvmupd_state = I40E_NVMUPD_STATE_INIT;
+
+	return *data;
+}
+
+static u64 i40e_intr_test(struct net_device *netdev, u64 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	u16 swc_old = pf->sw_int_count;
+
+	netif_info(pf, hw, netdev, "interrupt test\n");
+	wr32(&pf->hw, I40E_PFINT_DYN_CTL0,
+	     (I40E_PFINT_DYN_CTL0_INTENA_MASK |
+	      I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
+	      I40E_PFINT_DYN_CTL0_ITR_INDX_MASK |
+	      I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK |
+	      I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK));
+	usleep_range(1000, 2000);
+	*data = (swc_old == pf->sw_int_count);
+
+	return *data;
+}
+
+static inline bool i40e_active_vfs(struct i40e_pf *pf)
+{
+	struct i40e_vf *vfs = pf->vf;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++)
+		if (test_bit(I40E_VF_STATE_ACTIVE, &vfs[i].vf_states))
+			return true;
+	return false;
+}
+
+static inline bool i40e_active_vmdqs(struct i40e_pf *pf)
+{
+	return !!i40e_find_vsi_by_type(pf, I40E_VSI_VMDQ2);
+}
+
+static void i40e_diag_test(struct net_device *netdev,
+			   struct ethtool_test *eth_test, u64 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		/* Offline tests */
+		netif_info(pf, drv, netdev, "offline testing starting\n");
+
+		set_bit(__I40E_TESTING, pf->state);
+
+		if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+		    test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
+			dev_warn(&pf->pdev->dev,
+				 "Cannot start offline testing when PF is in reset state.\n");
+			goto skip_ol_tests;
+		}
+
+		if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) {
+			dev_warn(&pf->pdev->dev,
+				 "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+			goto skip_ol_tests;
+		}
+
+		/* If the device is online then take it offline */
+		if (if_running)
+			/* indicate we're in test mode */
+			i40e_close(netdev);
+		else
+			/* This reset does not affect link - if it is
+			 * changed to a type of reset that does affect
+			 * link then the following link test would have
+			 * to be moved to before the reset
+			 */
+			i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
+
+		if (i40e_link_test(netdev, &data[I40E_ETH_TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (i40e_eeprom_test(netdev, &data[I40E_ETH_TEST_EEPROM]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (i40e_intr_test(netdev, &data[I40E_ETH_TEST_INTR]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* run reg test last, a reset is required after it */
+		if (i40e_reg_test(netdev, &data[I40E_ETH_TEST_REG]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		clear_bit(__I40E_TESTING, pf->state);
+		i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
+
+		if (if_running)
+			i40e_open(netdev);
+	} else {
+		/* Online tests */
+		netif_info(pf, drv, netdev, "online testing starting\n");
+
+		if (i40e_link_test(netdev, &data[I40E_ETH_TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Offline only tests, not run in online; pass by default */
+		data[I40E_ETH_TEST_REG] = 0;
+		data[I40E_ETH_TEST_EEPROM] = 0;
+		data[I40E_ETH_TEST_INTR] = 0;
+	}
+
+	netif_info(pf, drv, netdev, "testing finished\n");
+	return;
+
+skip_ol_tests:
+	data[I40E_ETH_TEST_REG]		= 1;
+	data[I40E_ETH_TEST_EEPROM]	= 1;
+	data[I40E_ETH_TEST_INTR]	= 1;
+	data[I40E_ETH_TEST_LINK]	= 1;
+	eth_test->flags |= ETH_TEST_FL_FAILED;
+	clear_bit(__I40E_TESTING, pf->state);
+	netif_info(pf, drv, netdev, "testing failed\n");
+}
+
+static void i40e_get_wol(struct net_device *netdev,
+			 struct ethtool_wolinfo *wol)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u16 wol_nvm_bits;
+
+	/* NVM bit on means WoL disabled for the port */
+	i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
+	if ((BIT(hw->port) & wol_nvm_bits) || (hw->partition_id != 1)) {
+		wol->supported = 0;
+		wol->wolopts = 0;
+	} else {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = (pf->wol_en ? WAKE_MAGIC : 0);
+	}
+}
+
+/**
+ * i40e_set_wol - set the WakeOnLAN configuration
+ * @netdev: the netdev in question
+ * @wol: the ethtool WoL setting data
+ **/
+static int i40e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_hw *hw = &pf->hw;
+	u16 wol_nvm_bits;
+
+	/* WoL not supported if this isn't the controlling PF on the port */
+	if (hw->partition_id != 1) {
+		i40e_partition_setting_complaint(pf);
+		return -EOPNOTSUPP;
+	}
+
+	if (vsi != pf->vsi[pf->lan_vsi])
+		return -EOPNOTSUPP;
+
+	/* NVM bit on means WoL disabled for the port */
+	i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
+	if (BIT(hw->port) & wol_nvm_bits)
+		return -EOPNOTSUPP;
+
+	/* only magic packet is supported */
+	if (wol->wolopts & ~WAKE_MAGIC)
+		return -EOPNOTSUPP;
+
+	/* is this a new value? */
+	if (pf->wol_en != !!wol->wolopts) {
+		pf->wol_en = !!wol->wolopts;
+		device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en);
+	}
+
+	return 0;
+}
+
+static int i40e_set_phys_id(struct net_device *netdev,
+			    enum ethtool_phys_id_state state)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int blink_freq = 2;
+	u16 temp_status;
+	int ret = 0;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
+			pf->led_status = i40e_led_get(hw);
+		} else {
+			if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+				i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
+						      NULL);
+			ret = i40e_led_get_phy(hw, &temp_status,
+					       &pf->phy_led_val);
+			pf->led_status = temp_status;
+		}
+		return blink_freq;
+	case ETHTOOL_ID_ON:
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
+			i40e_led_set(hw, 0xf, false);
+		else
+			ret = i40e_led_set_phy(hw, true, pf->led_status, 0);
+		break;
+	case ETHTOOL_ID_OFF:
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS))
+			i40e_led_set(hw, 0x0, false);
+		else
+			ret = i40e_led_set_phy(hw, false, pf->led_status, 0);
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
+			i40e_led_set(hw, pf->led_status, false);
+		} else {
+			ret = i40e_led_set_phy(hw, false, pf->led_status,
+					       (pf->phy_led_val |
+					       I40E_PHY_LED_MODE_ORIG));
+			if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+				i40e_aq_set_phy_debug(hw, 0, NULL);
+		}
+		break;
+	default:
+		break;
+	}
+	if (ret)
+		return -ENOENT;
+	else
+		return 0;
+}
+
+/* NOTE: i40e hardware uses a conversion factor of 2 for Interrupt
+ * Throttle Rate (ITR) ie. ITR(1) = 2us ITR(10) = 20 us, and also
+ * 125us (8000 interrupts per second) == ITR(62)
+ */
+
+/**
+ * __i40e_get_coalesce - get per-queue coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @queue: which queue to pick
+ *
+ * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
+ * are per queue. If queue is <0 then we default to queue 0 as the
+ * representative value.
+ **/
+static int __i40e_get_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec,
+			       int queue)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_ring *rx_ring, *tx_ring;
+	struct i40e_vsi *vsi = np->vsi;
+
+	ec->tx_max_coalesced_frames_irq = vsi->work_limit;
+	ec->rx_max_coalesced_frames_irq = vsi->work_limit;
+
+	/* rx and tx usecs has per queue value. If user doesn't specify the
+	 * queue, return queue 0's value to represent.
+	 */
+	if (queue < 0)
+		queue = 0;
+	else if (queue >= vsi->num_queue_pairs)
+		return -EINVAL;
+
+	rx_ring = vsi->rx_rings[queue];
+	tx_ring = vsi->tx_rings[queue];
+
+	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
+		ec->use_adaptive_rx_coalesce = 1;
+
+	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
+		ec->use_adaptive_tx_coalesce = 1;
+
+	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC;
+
+	/* we use the _usecs_high to store/set the interrupt rate limit
+	 * that the hardware supports, that almost but not quite
+	 * fits the original intent of the ethtool variable,
+	 * the rx_coalesce_usecs_high limits total interrupts
+	 * per second from both tx/rx sources.
+	 */
+	ec->rx_coalesce_usecs_high = vsi->int_rate_limit;
+	ec->tx_coalesce_usecs_high = vsi->int_rate_limit;
+
+	return 0;
+}
+
+/**
+ * i40e_get_coalesce - get a netdev's coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
+ *
+ * Gets the coalesce settings for a particular netdev. Note that if user has
+ * modified per-queue settings, this only guarantees to represent queue 0. See
+ * __i40e_get_coalesce for more details.
+ **/
+static int i40e_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec,
+			     struct kernel_ethtool_coalesce *kernel_coal,
+			     struct netlink_ext_ack *extack)
+{
+	return __i40e_get_coalesce(netdev, ec, -1);
+}
+
+/**
+ * i40e_get_per_queue_coalesce - gets coalesce settings for particular queue
+ * @netdev: netdev structure
+ * @ec: ethtool's coalesce settings
+ * @queue: the particular queue to read
+ *
+ * Will read a specific queue's coalesce settings
+ **/
+static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
+				       struct ethtool_coalesce *ec)
+{
+	return __i40e_get_coalesce(netdev, ec, queue);
+}
+
+/**
+ * i40e_set_itr_per_queue - set ITR values for specific queue
+ * @vsi: the VSI to set values for
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to modify
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
+				   struct ethtool_coalesce *ec,
+				   int queue)
+{
+	struct i40e_ring *rx_ring = vsi->rx_rings[queue];
+	struct i40e_ring *tx_ring = vsi->tx_rings[queue];
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_q_vector *q_vector;
+	u16 intrl;
+
+	intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
+
+	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
+
+	if (ec->use_adaptive_rx_coalesce)
+		rx_ring->itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
+
+	if (ec->use_adaptive_tx_coalesce)
+		tx_ring->itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC;
+
+	q_vector = rx_ring->q_vector;
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
+
+	q_vector = tx_ring->q_vector;
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
+
+	/* The interrupt handler itself will take care of programming
+	 * the Tx and Rx ITR values based on the values we have entered
+	 * into the q_vector, no need to write the values now.
+	 */
+
+	wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl);
+	i40e_flush(hw);
+}
+
+/**
+ * __i40e_set_coalesce - set coalesce settings for particular queue
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the coalesce settings for a particular queue.
+ **/
+static int __i40e_set_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec,
+			       int queue)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	u16 intrl_reg, cur_rx_itr, cur_tx_itr;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int i;
+
+	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
+		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
+
+	if (queue < 0) {
+		cur_rx_itr = vsi->rx_rings[0]->itr_setting;
+		cur_tx_itr = vsi->tx_rings[0]->itr_setting;
+	} else if (queue < vsi->num_queue_pairs) {
+		cur_rx_itr = vsi->rx_rings[queue]->itr_setting;
+		cur_tx_itr = vsi->tx_rings[queue]->itr_setting;
+	} else {
+		netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
+			   vsi->num_queue_pairs - 1);
+		return -EINVAL;
+	}
+
+	cur_tx_itr &= ~I40E_ITR_DYNAMIC;
+	cur_rx_itr &= ~I40E_ITR_DYNAMIC;
+
+	/* tx_coalesce_usecs_high is ignored, use rx-usecs-high instead */
+	if (ec->tx_coalesce_usecs_high != vsi->int_rate_limit) {
+		netif_info(pf, drv, netdev, "tx-usecs-high is not used, please program rx-usecs-high\n");
+		return -EINVAL;
+	}
+
+	if (ec->rx_coalesce_usecs_high > INTRL_REG_TO_USEC(I40E_MAX_INTRL)) {
+		netif_info(pf, drv, netdev, "Invalid value, rx-usecs-high range is 0-%lu\n",
+			   INTRL_REG_TO_USEC(I40E_MAX_INTRL));
+		return -EINVAL;
+	}
+
+	if (ec->rx_coalesce_usecs != cur_rx_itr &&
+	    ec->use_adaptive_rx_coalesce) {
+		netif_info(pf, drv, netdev, "RX interrupt moderation cannot be changed if adaptive-rx is enabled.\n");
+		return -EINVAL;
+	}
+
+	if (ec->rx_coalesce_usecs > I40E_MAX_ITR) {
+		netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
+
+	if (ec->tx_coalesce_usecs != cur_tx_itr &&
+	    ec->use_adaptive_tx_coalesce) {
+		netif_info(pf, drv, netdev, "TX interrupt moderation cannot be changed if adaptive-tx is enabled.\n");
+		return -EINVAL;
+	}
+
+	if (ec->tx_coalesce_usecs > I40E_MAX_ITR) {
+		netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
+
+	if (ec->use_adaptive_rx_coalesce && !cur_rx_itr)
+		ec->rx_coalesce_usecs = I40E_MIN_ITR;
+
+	if (ec->use_adaptive_tx_coalesce && !cur_tx_itr)
+		ec->tx_coalesce_usecs = I40E_MIN_ITR;
+
+	intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high);
+	vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg);
+	if (vsi->int_rate_limit != ec->rx_coalesce_usecs_high) {
+		netif_info(pf, drv, netdev, "Interrupt rate limit rounded down to %d\n",
+			   vsi->int_rate_limit);
+	}
+
+	/* rx and tx usecs has per queue value. If user doesn't specify the
+	 * queue, apply to all queues.
+	 */
+	if (queue < 0) {
+		for (i = 0; i < vsi->num_queue_pairs; i++)
+			i40e_set_itr_per_queue(vsi, ec, i);
+	} else {
+		i40e_set_itr_per_queue(vsi, ec, queue);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_set_coalesce - set coalesce settings for every queue on the netdev
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
+ *
+ * This will set each queue to the same coalesce settings.
+ **/
+static int i40e_set_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec,
+			     struct kernel_ethtool_coalesce *kernel_coal,
+			     struct netlink_ext_ack *extack)
+{
+	return __i40e_set_coalesce(netdev, ec, -1);
+}
+
+/**
+ * i40e_set_per_queue_coalesce - set specific queue's coalesce settings
+ * @netdev: the netdev to change
+ * @ec: ethtool's coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the specified queue's coalesce settings.
+ **/
+static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
+				       struct ethtool_coalesce *ec)
+{
+	return __i40e_set_coalesce(netdev, ec, queue);
+}
+
+/**
+ * i40e_get_rss_hash_opts - Get RSS hash Input Set for each flow type
+ * @pf: pointer to the physical function struct
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow is supported, else Invalid Input.
+ **/
+static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u8 flow_pctype = 0;
+	u64 i_set = 0;
+
+	cmd->data = 0;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+		break;
+	case UDP_V4_FLOW:
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+		break;
+	case TCP_V6_FLOW:
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+		break;
+	case UDP_V6_FLOW:
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+		break;
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		/* Default is src/dest for IP, no matter the L4 hashing */
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Read flow based hash input set register */
+	if (flow_pctype) {
+		i_set = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0,
+					      flow_pctype)) |
+			((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1,
+					       flow_pctype)) << 32);
+	}
+
+	/* Process bits of hash input set */
+	if (i_set) {
+		if (i_set & I40E_L4_SRC_MASK)
+			cmd->data |= RXH_L4_B_0_1;
+		if (i_set & I40E_L4_DST_MASK)
+			cmd->data |= RXH_L4_B_2_3;
+
+		if (cmd->flow_type == TCP_V4_FLOW ||
+		    cmd->flow_type == UDP_V4_FLOW) {
+			if (hw->mac.type == I40E_MAC_X722) {
+				if (i_set & I40E_X722_L3_SRC_MASK)
+					cmd->data |= RXH_IP_SRC;
+				if (i_set & I40E_X722_L3_DST_MASK)
+					cmd->data |= RXH_IP_DST;
+			} else {
+				if (i_set & I40E_L3_SRC_MASK)
+					cmd->data |= RXH_IP_SRC;
+				if (i_set & I40E_L3_DST_MASK)
+					cmd->data |= RXH_IP_DST;
+			}
+		} else if (cmd->flow_type == TCP_V6_FLOW ||
+			  cmd->flow_type == UDP_V6_FLOW) {
+			if (i_set & I40E_L3_V6_SRC_MASK)
+				cmd->data |= RXH_IP_SRC;
+			if (i_set & I40E_L3_V6_DST_MASK)
+				cmd->data |= RXH_IP_DST;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_check_mask - Check whether a mask field is set
+ * @mask: the full mask value
+ * @field: mask of the field to check
+ *
+ * If the given mask is fully set, return positive value. If the mask for the
+ * field is fully unset, return zero. Otherwise return a negative error code.
+ **/
+static int i40e_check_mask(u64 mask, u64 field)
+{
+	u64 value = mask & field;
+
+	if (value == field)
+		return 1;
+	else if (!value)
+		return 0;
+	else
+		return -1;
+}
+
+/**
+ * i40e_parse_rx_flow_user_data - Deconstruct user-defined data
+ * @fsp: pointer to rx flow specification
+ * @data: pointer to userdef data structure for storage
+ *
+ * Read the user-defined data and deconstruct the value into a structure. No
+ * other code should read the user-defined data, so as to ensure that every
+ * place consistently reads the value correctly.
+ *
+ * The user-defined field is a 64bit Big Endian format value, which we
+ * deconstruct by reading bits or bit fields from it. Single bit flags shall
+ * be defined starting from the highest bits, while small bit field values
+ * shall be defined starting from the lowest bits.
+ *
+ * Returns 0 if the data is valid, and non-zero if the userdef data is invalid
+ * and the filter should be rejected. The data structure will always be
+ * modified even if FLOW_EXT is not set.
+ *
+ **/
+static int i40e_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+					struct i40e_rx_flow_userdef *data)
+{
+	u64 value, mask;
+	int valid;
+
+	/* Zero memory first so it's always consistent. */
+	memset(data, 0, sizeof(*data));
+
+	if (!(fsp->flow_type & FLOW_EXT))
+		return 0;
+
+	value = be64_to_cpu(*((__be64 *)fsp->h_ext.data));
+	mask = be64_to_cpu(*((__be64 *)fsp->m_ext.data));
+
+#define I40E_USERDEF_FLEX_WORD		GENMASK_ULL(15, 0)
+#define I40E_USERDEF_FLEX_OFFSET	GENMASK_ULL(31, 16)
+#define I40E_USERDEF_FLEX_FILTER	GENMASK_ULL(31, 0)
+
+	valid = i40e_check_mask(mask, I40E_USERDEF_FLEX_FILTER);
+	if (valid < 0) {
+		return -EINVAL;
+	} else if (valid) {
+		data->flex_word = value & I40E_USERDEF_FLEX_WORD;
+		data->flex_offset =
+			(value & I40E_USERDEF_FLEX_OFFSET) >> 16;
+		data->flex_filter = true;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_fill_rx_flow_user_data - Fill in user-defined data field
+ * @fsp: pointer to rx_flow specification
+ * @data: pointer to return userdef data
+ *
+ * Reads the userdef data structure and properly fills in the user defined
+ * fields of the rx_flow_spec.
+ **/
+static void i40e_fill_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+					struct i40e_rx_flow_userdef *data)
+{
+	u64 value = 0, mask = 0;
+
+	if (data->flex_filter) {
+		value |= data->flex_word;
+		value |= (u64)data->flex_offset << 16;
+		mask |= I40E_USERDEF_FLEX_FILTER;
+	}
+
+	if (value || mask)
+		fsp->flow_type |= FLOW_EXT;
+
+	*((__be64 *)fsp->h_ext.data) = cpu_to_be64(value);
+	*((__be64 *)fsp->m_ext.data) = cpu_to_be64(mask);
+}
+
+/**
+ * i40e_get_ethtool_fdir_all - Populates the rule count of a command
+ * @pf: Pointer to the physical function struct
+ * @cmd: The command to get or set Rx flow classification rules
+ * @rule_locs: Array of used rule locations
+ *
+ * This function populates both the total and actual rule count of
+ * the ethtool flow classification command
+ *
+ * Returns 0 on success or -EMSGSIZE if entry not found
+ **/
+static int i40e_get_ethtool_fdir_all(struct i40e_pf *pf,
+				     struct ethtool_rxnfc *cmd,
+				     u32 *rule_locs)
+{
+	struct i40e_fdir_filter *rule;
+	struct hlist_node *node2;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = i40e_get_fd_cnt_all(pf);
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+
+		rule_locs[cnt] = rule->fd_id;
+		cnt++;
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+/**
+ * i40e_get_ethtool_fdir_entry - Look up a filter based on Rx flow
+ * @pf: Pointer to the physical function struct
+ * @cmd: The command to get or set Rx flow classification rules
+ *
+ * This function looks up a filter based on the Rx flow classification
+ * command and fills the flow spec info for it if found
+ *
+ * Returns 0 on success or -EINVAL if filter not found
+ **/
+static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
+				       struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+			(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct i40e_rx_flow_userdef userdef = {0};
+	struct i40e_fdir_filter *rule = NULL;
+	struct hlist_node *node2;
+	u64 input_set;
+	u16 index;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		if (fsp->location <= rule->fd_id)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->fd_id)
+		return -EINVAL;
+
+	fsp->flow_type = rule->flow_type;
+	if (fsp->flow_type == IP_USER_FLOW) {
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+	}
+
+	if (fsp->flow_type == IPV6_USER_FLOW ||
+	    fsp->flow_type == UDP_V6_FLOW ||
+	    fsp->flow_type == TCP_V6_FLOW ||
+	    fsp->flow_type == SCTP_V6_FLOW) {
+		/* Reverse the src and dest notion, since the HW views them
+		 * from Tx perspective where as the user expects it from
+		 * Rx filter view.
+		 */
+		fsp->h_u.tcp_ip6_spec.psrc = rule->dst_port;
+		fsp->h_u.tcp_ip6_spec.pdst = rule->src_port;
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->src_ip6,
+		       sizeof(__be32) * 4);
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->dst_ip6,
+		       sizeof(__be32) * 4);
+	} else {
+		/* Reverse the src and dest notion, since the HW views them
+		 * from Tx perspective where as the user expects it from
+		 * Rx filter view.
+		 */
+		fsp->h_u.tcp_ip4_spec.psrc = rule->dst_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->src_port;
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->dst_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->src_ip;
+	}
+
+	switch (rule->flow_type) {
+	case SCTP_V4_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+		break;
+	case TCP_V4_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+		break;
+	case UDP_V4_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+		break;
+	case SCTP_V6_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_SCTP;
+		break;
+	case TCP_V6_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+		break;
+	case UDP_V6_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+		break;
+	case IP_USER_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+		break;
+	case IPV6_USER_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_OTHER;
+		break;
+	default:
+		/* If we have stored a filter with a flow type not listed here
+		 * it is almost certainly a driver bug. WARN(), and then
+		 * assign the input_set as if all fields are enabled to avoid
+		 * reading unassigned memory.
+		 */
+		WARN(1, "Missing input set index for flow_type %d\n",
+		     rule->flow_type);
+		input_set = 0xFFFFFFFFFFFFFFFFULL;
+		goto no_input_set;
+	}
+
+	input_set = i40e_read_fd_input_set(pf, index);
+
+no_input_set:
+	if (input_set & I40E_L3_V6_SRC_MASK) {
+		fsp->m_u.tcp_ip6_spec.ip6src[0] = htonl(0xFFFFFFFF);
+		fsp->m_u.tcp_ip6_spec.ip6src[1] = htonl(0xFFFFFFFF);
+		fsp->m_u.tcp_ip6_spec.ip6src[2] = htonl(0xFFFFFFFF);
+		fsp->m_u.tcp_ip6_spec.ip6src[3] = htonl(0xFFFFFFFF);
+	}
+
+	if (input_set & I40E_L3_V6_DST_MASK) {
+		fsp->m_u.tcp_ip6_spec.ip6dst[0] = htonl(0xFFFFFFFF);
+		fsp->m_u.tcp_ip6_spec.ip6dst[1] = htonl(0xFFFFFFFF);
+		fsp->m_u.tcp_ip6_spec.ip6dst[2] = htonl(0xFFFFFFFF);
+		fsp->m_u.tcp_ip6_spec.ip6dst[3] = htonl(0xFFFFFFFF);
+	}
+
+	if (input_set & I40E_L3_SRC_MASK)
+		fsp->m_u.tcp_ip4_spec.ip4src = htonl(0xFFFFFFFF);
+
+	if (input_set & I40E_L3_DST_MASK)
+		fsp->m_u.tcp_ip4_spec.ip4dst = htonl(0xFFFFFFFF);
+
+	if (input_set & I40E_L4_SRC_MASK)
+		fsp->m_u.tcp_ip4_spec.psrc = htons(0xFFFF);
+
+	if (input_set & I40E_L4_DST_MASK)
+		fsp->m_u.tcp_ip4_spec.pdst = htons(0xFFFF);
+
+	if (rule->dest_ctl == I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->q_index;
+
+	if (rule->vlan_tag) {
+		fsp->h_ext.vlan_etype = rule->vlan_etype;
+		fsp->m_ext.vlan_etype = htons(0xFFFF);
+		fsp->h_ext.vlan_tci = rule->vlan_tag;
+		fsp->m_ext.vlan_tci = htons(0xFFFF);
+		fsp->flow_type |= FLOW_EXT;
+	}
+
+	if (rule->dest_vsi != pf->vsi[pf->lan_vsi]->id) {
+		struct i40e_vsi *vsi;
+
+		vsi = i40e_find_vsi_from_id(pf, rule->dest_vsi);
+		if (vsi && vsi->type == I40E_VSI_SRIOV) {
+			/* VFs are zero-indexed by the driver, but ethtool
+			 * expects them to be one-indexed, so add one here
+			 */
+			u64 ring_vf = vsi->vf_id + 1;
+
+			ring_vf <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+			fsp->ring_cookie |= ring_vf;
+		}
+	}
+
+	if (rule->flex_filter) {
+		userdef.flex_filter = true;
+		userdef.flex_word = be16_to_cpu(rule->flex_word);
+		userdef.flex_offset = rule->flex_offset;
+	}
+
+	i40e_fill_rx_flow_user_data(fsp, &userdef);
+
+	return 0;
+}
+
+/**
+ * i40e_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule data
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+			  u32 *rule_locs)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = vsi->rss_size;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		ret = i40e_get_rss_hash_opts(pf, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = pf->fdir_pf_active_filters;
+		/* report total rule count */
+		cmd->data = i40e_get_fd_cnt_all(pf);
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = i40e_get_ethtool_fdir_entry(pf, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = i40e_get_ethtool_fdir_all(pf, cmd, rule_locs);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_get_rss_hash_bits - Read RSS Hash bits from register
+ * @hw: hw structure
+ * @nfc: pointer to user request
+ * @i_setc: bits currently set
+ *
+ * Returns value of bits to be set per user request
+ **/
+static u64 i40e_get_rss_hash_bits(struct i40e_hw *hw,
+				  struct ethtool_rxnfc *nfc,
+				  u64 i_setc)
+{
+	u64 i_set = i_setc;
+	u64 src_l3 = 0, dst_l3 = 0;
+
+	if (nfc->data & RXH_L4_B_0_1)
+		i_set |= I40E_L4_SRC_MASK;
+	else
+		i_set &= ~I40E_L4_SRC_MASK;
+	if (nfc->data & RXH_L4_B_2_3)
+		i_set |= I40E_L4_DST_MASK;
+	else
+		i_set &= ~I40E_L4_DST_MASK;
+
+	if (nfc->flow_type == TCP_V6_FLOW || nfc->flow_type == UDP_V6_FLOW) {
+		src_l3 = I40E_L3_V6_SRC_MASK;
+		dst_l3 = I40E_L3_V6_DST_MASK;
+	} else if (nfc->flow_type == TCP_V4_FLOW ||
+		  nfc->flow_type == UDP_V4_FLOW) {
+		if (hw->mac.type == I40E_MAC_X722) {
+			src_l3 = I40E_X722_L3_SRC_MASK;
+			dst_l3 = I40E_X722_L3_DST_MASK;
+		} else {
+			src_l3 = I40E_L3_SRC_MASK;
+			dst_l3 = I40E_L3_DST_MASK;
+		}
+	} else {
+		/* Any other flow type are not supported here */
+		return i_set;
+	}
+
+	if (nfc->data & RXH_IP_SRC)
+		i_set |= src_l3;
+	else
+		i_set &= ~src_l3;
+	if (nfc->data & RXH_IP_DST)
+		i_set |= dst_l3;
+	else
+		i_set &= ~dst_l3;
+
+	return i_set;
+}
+
+#define FLOW_PCTYPES_SIZE 64
+/**
+ * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @pf: pointer to the physical function struct
+ * @nfc: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ **/
+static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
+		   ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
+	DECLARE_BITMAP(flow_pctypes, FLOW_PCTYPES_SIZE);
+	u64 i_set, i_setc;
+
+	bitmap_zero(flow_pctypes, FLOW_PCTYPES_SIZE);
+
+	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+		dev_err(&pf->pdev->dev,
+			"Change of RSS hash input set is not supported when MFP mode is enabled\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP, flow_pctypes);
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+			set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK,
+				flow_pctypes);
+		break;
+	case TCP_V6_FLOW:
+		set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP, flow_pctypes);
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+			set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK,
+				flow_pctypes);
+		break;
+	case UDP_V4_FLOW:
+		set_bit(I40E_FILTER_PCTYPE_NONF_IPV4_UDP, flow_pctypes);
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+			set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP,
+				flow_pctypes);
+			set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP,
+				flow_pctypes);
+		}
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4);
+		break;
+	case UDP_V6_FLOW:
+		set_bit(I40E_FILTER_PCTYPE_NONF_IPV6_UDP, flow_pctypes);
+		if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+			set_bit(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP,
+				flow_pctypes);
+			set_bit(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP,
+				flow_pctypes);
+		}
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6);
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		if ((nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
+		break;
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if ((nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
+		break;
+	case IPV4_FLOW:
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) |
+			BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4);
+		break;
+	case IPV6_FLOW:
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
+			BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (bitmap_weight(flow_pctypes, FLOW_PCTYPES_SIZE)) {
+		u8 flow_id;
+
+		for_each_set_bit(flow_id, flow_pctypes, FLOW_PCTYPES_SIZE) {
+			i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id)) |
+				 ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id)) << 32);
+			i_set = i40e_get_rss_hash_bits(&pf->hw, nfc, i_setc);
+
+			i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_id),
+					  (u32)i_set);
+			i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_id),
+					  (u32)(i_set >> 32));
+			hena |= BIT_ULL(flow_id);
+		}
+	}
+
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
+	i40e_flush(hw);
+
+	return 0;
+}
+
+/**
+ * i40e_update_ethtool_fdir_entry - Updates the fdir filter entry
+ * @vsi: Pointer to the targeted VSI
+ * @input: The filter to update or NULL to indicate deletion
+ * @sw_idx: Software index to the filter
+ * @cmd: The command to get or set Rx flow classification rules
+ *
+ * This function updates (or deletes) a Flow Director entry from
+ * the hlist of the corresponding PF
+ *
+ * Returns 0 on success
+ **/
+static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
+					  struct i40e_fdir_filter *input,
+					  u16 sw_idx,
+					  struct ethtool_rxnfc *cmd)
+{
+	struct i40e_fdir_filter *rule, *parent;
+	struct i40e_pf *pf = vsi->back;
+	struct hlist_node *node2;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		/* hash found, or no matching entry */
+		if (rule->fd_id >= sw_idx)
+			break;
+		parent = rule;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->fd_id == sw_idx)) {
+		/* Remove this rule, since we're either deleting it, or
+		 * replacing it.
+		 */
+		err = i40e_add_del_fdir(vsi, rule, false);
+		hlist_del(&rule->fdir_node);
+		kfree(rule);
+		pf->fdir_pf_active_filters--;
+	}
+
+	/* If we weren't given an input, this is a delete, so just return the
+	 * error code indicating if there was an entry at the requested slot
+	 */
+	if (!input)
+		return err;
+
+	/* Otherwise, install the new rule as requested */
+	INIT_HLIST_NODE(&input->fdir_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
+	else
+		hlist_add_head(&input->fdir_node,
+			       &pf->fdir_filter_list);
+
+	/* update counts */
+	pf->fdir_pf_active_filters++;
+
+	return 0;
+}
+
+/**
+ * i40e_prune_flex_pit_list - Cleanup unused entries in FLX_PIT table
+ * @pf: pointer to PF structure
+ *
+ * This function searches the list of filters and determines which FLX_PIT
+ * entries are still required. It will prune any entries which are no longer
+ * in use after the deletion.
+ **/
+static void i40e_prune_flex_pit_list(struct i40e_pf *pf)
+{
+	struct i40e_flex_pit *entry, *tmp;
+	struct i40e_fdir_filter *rule;
+
+	/* First, we'll check the l3 table */
+	list_for_each_entry_safe(entry, tmp, &pf->l3_flex_pit_list, list) {
+		bool found = false;
+
+		hlist_for_each_entry(rule, &pf->fdir_filter_list, fdir_node) {
+			if (rule->flow_type != IP_USER_FLOW)
+				continue;
+			if (rule->flex_filter &&
+			    rule->flex_offset == entry->src_offset) {
+				found = true;
+				break;
+			}
+		}
+
+		/* If we didn't find the filter, then we can prune this entry
+		 * from the list.
+		 */
+		if (!found) {
+			list_del(&entry->list);
+			kfree(entry);
+		}
+	}
+
+	/* Followed by the L4 table */
+	list_for_each_entry_safe(entry, tmp, &pf->l4_flex_pit_list, list) {
+		bool found = false;
+
+		hlist_for_each_entry(rule, &pf->fdir_filter_list, fdir_node) {
+			/* Skip this filter if it's L3, since we already
+			 * checked those in the above loop
+			 */
+			if (rule->flow_type == IP_USER_FLOW)
+				continue;
+			if (rule->flex_filter &&
+			    rule->flex_offset == entry->src_offset) {
+				found = true;
+				break;
+			}
+		}
+
+		/* If we didn't find the filter, then we can prune this entry
+		 * from the list.
+		 */
+		if (!found) {
+			list_del(&entry->list);
+			kfree(entry);
+		}
+	}
+}
+
+/**
+ * i40e_del_fdir_entry - Deletes a Flow Director filter entry
+ * @vsi: Pointer to the targeted VSI
+ * @cmd: The command to get or set Rx flow classification rules
+ *
+ * The function removes a Flow Director filter entry from the
+ * hlist of the corresponding PF
+ *
+ * Returns 0 on success
+ */
+static int i40e_del_fdir_entry(struct i40e_vsi *vsi,
+			       struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct i40e_pf *pf = vsi->back;
+	int ret = 0;
+
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+	    test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+		return -EBUSY;
+
+	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
+		return -EBUSY;
+
+	ret = i40e_update_ethtool_fdir_entry(vsi, NULL, fsp->location, cmd);
+
+	i40e_prune_flex_pit_list(pf);
+
+	i40e_fdir_check_and_reenable(pf);
+	return ret;
+}
+
+/**
+ * i40e_unused_pit_index - Find an unused PIT index for given list
+ * @pf: the PF data structure
+ *
+ * Find the first unused flexible PIT index entry. We search both the L3 and
+ * L4 flexible PIT lists so that the returned index is unique and unused by
+ * either currently programmed L3 or L4 filters. We use a bit field as storage
+ * to track which indexes are already used.
+ **/
+static u8 i40e_unused_pit_index(struct i40e_pf *pf)
+{
+	unsigned long available_index = 0xFF;
+	struct i40e_flex_pit *entry;
+
+	/* We need to make sure that the new index isn't in use by either L3
+	 * or L4 filters so that IP_USER_FLOW filters can program both L3 and
+	 * L4 to use the same index.
+	 */
+
+	list_for_each_entry(entry, &pf->l4_flex_pit_list, list)
+		clear_bit(entry->pit_index, &available_index);
+
+	list_for_each_entry(entry, &pf->l3_flex_pit_list, list)
+		clear_bit(entry->pit_index, &available_index);
+
+	return find_first_bit(&available_index, 8);
+}
+
+/**
+ * i40e_find_flex_offset - Find an existing flex src_offset
+ * @flex_pit_list: L3 or L4 flex PIT list
+ * @src_offset: new src_offset to find
+ *
+ * Searches the flex_pit_list for an existing offset. If no offset is
+ * currently programmed, then this will return an ERR_PTR if there is no space
+ * to add a new offset, otherwise it returns NULL.
+ **/
+static
+struct i40e_flex_pit *i40e_find_flex_offset(struct list_head *flex_pit_list,
+					    u16 src_offset)
+{
+	struct i40e_flex_pit *entry;
+	int size = 0;
+
+	/* Search for the src_offset first. If we find a matching entry
+	 * already programmed, we can simply re-use it.
+	 */
+	list_for_each_entry(entry, flex_pit_list, list) {
+		size++;
+		if (entry->src_offset == src_offset)
+			return entry;
+	}
+
+	/* If we haven't found an entry yet, then the provided src offset has
+	 * not yet been programmed. We will program the src offset later on,
+	 * but we need to indicate whether there is enough space to do so
+	 * here. We'll make use of ERR_PTR for this purpose.
+	 */
+	if (size >= I40E_FLEX_PIT_TABLE_SIZE)
+		return ERR_PTR(-ENOSPC);
+
+	return NULL;
+}
+
+/**
+ * i40e_add_flex_offset - Add src_offset to flex PIT table list
+ * @flex_pit_list: L3 or L4 flex PIT list
+ * @src_offset: new src_offset to add
+ * @pit_index: the PIT index to program
+ *
+ * This function programs the new src_offset to the list. It is expected that
+ * i40e_find_flex_offset has already been tried and returned NULL, indicating
+ * that this offset is not programmed, and that the list has enough space to
+ * store another offset.
+ *
+ * Returns 0 on success, and negative value on error.
+ **/
+static int i40e_add_flex_offset(struct list_head *flex_pit_list,
+				u16 src_offset,
+				u8 pit_index)
+{
+	struct i40e_flex_pit *new_pit, *entry;
+
+	new_pit = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!new_pit)
+		return -ENOMEM;
+
+	new_pit->src_offset = src_offset;
+	new_pit->pit_index = pit_index;
+
+	/* We need to insert this item such that the list is sorted by
+	 * src_offset in ascending order.
+	 */
+	list_for_each_entry(entry, flex_pit_list, list) {
+		if (new_pit->src_offset < entry->src_offset) {
+			list_add_tail(&new_pit->list, &entry->list);
+			return 0;
+		}
+
+		/* If we found an entry with our offset already programmed we
+		 * can simply return here, after freeing the memory. However,
+		 * if the pit_index does not match we need to report an error.
+		 */
+		if (new_pit->src_offset == entry->src_offset) {
+			int err = 0;
+
+			/* If the PIT index is not the same we can't re-use
+			 * the entry, so we must report an error.
+			 */
+			if (new_pit->pit_index != entry->pit_index)
+				err = -EINVAL;
+
+			kfree(new_pit);
+			return err;
+		}
+	}
+
+	/* If we reached here, then we haven't yet added the item. This means
+	 * that we should add the item at the end of the list.
+	 */
+	list_add_tail(&new_pit->list, flex_pit_list);
+	return 0;
+}
+
+/**
+ * __i40e_reprogram_flex_pit - Re-program specific FLX_PIT table
+ * @pf: Pointer to the PF structure
+ * @flex_pit_list: list of flexible src offsets in use
+ * @flex_pit_start: index to first entry for this section of the table
+ *
+ * In order to handle flexible data, the hardware uses a table of values
+ * called the FLX_PIT table. This table is used to indicate which sections of
+ * the input correspond to what PIT index values. Unfortunately, hardware is
+ * very restrictive about programming this table. Entries must be ordered by
+ * src_offset in ascending order, without duplicates. Additionally, unused
+ * entries must be set to the unused index value, and must have valid size and
+ * length according to the src_offset ordering.
+ *
+ * This function will reprogram the FLX_PIT register from a book-keeping
+ * structure that we guarantee is already ordered correctly, and has no more
+ * than 3 entries.
+ *
+ * To make things easier, we only support flexible values of one word length,
+ * rather than allowing variable length flexible values.
+ **/
+static void __i40e_reprogram_flex_pit(struct i40e_pf *pf,
+				      struct list_head *flex_pit_list,
+				      int flex_pit_start)
+{
+	struct i40e_flex_pit *entry = NULL;
+	u16 last_offset = 0;
+	int i = 0, j = 0;
+
+	/* First, loop over the list of flex PIT entries, and reprogram the
+	 * registers.
+	 */
+	list_for_each_entry(entry, flex_pit_list, list) {
+		/* We have to be careful when programming values for the
+		 * largest SRC_OFFSET value. It is possible that adding
+		 * additional empty values at the end would overflow the space
+		 * for the SRC_OFFSET in the FLX_PIT register. To avoid this,
+		 * we check here and add the empty values prior to adding the
+		 * largest value.
+		 *
+		 * To determine this, we will use a loop from i+1 to 3, which
+		 * will determine whether the unused entries would have valid
+		 * SRC_OFFSET. Note that there cannot be extra entries past
+		 * this value, because the only valid values would have been
+		 * larger than I40E_MAX_FLEX_SRC_OFFSET, and thus would not
+		 * have been added to the list in the first place.
+		 */
+		for (j = i + 1; j < 3; j++) {
+			u16 offset = entry->src_offset + j;
+			int index = flex_pit_start + i;
+			u32 value = I40E_FLEX_PREP_VAL(I40E_FLEX_DEST_UNUSED,
+						       1,
+						       offset - 3);
+
+			if (offset > I40E_MAX_FLEX_SRC_OFFSET) {
+				i40e_write_rx_ctl(&pf->hw,
+						  I40E_PRTQF_FLX_PIT(index),
+						  value);
+				i++;
+			}
+		}
+
+		/* Now, we can program the actual value into the table */
+		i40e_write_rx_ctl(&pf->hw,
+				  I40E_PRTQF_FLX_PIT(flex_pit_start + i),
+				  I40E_FLEX_PREP_VAL(entry->pit_index + 50,
+						     1,
+						     entry->src_offset));
+		i++;
+	}
+
+	/* In order to program the last entries in the table, we need to
+	 * determine the valid offset. If the list is empty, we'll just start
+	 * with 0. Otherwise, we'll start with the last item offset and add 1.
+	 * This ensures that all entries have valid sizes. If we don't do this
+	 * correctly, the hardware will disable flexible field parsing.
+	 */
+	if (!list_empty(flex_pit_list))
+		last_offset = list_prev_entry(entry, list)->src_offset + 1;
+
+	for (; i < 3; i++, last_offset++) {
+		i40e_write_rx_ctl(&pf->hw,
+				  I40E_PRTQF_FLX_PIT(flex_pit_start + i),
+				  I40E_FLEX_PREP_VAL(I40E_FLEX_DEST_UNUSED,
+						     1,
+						     last_offset));
+	}
+}
+
+/**
+ * i40e_reprogram_flex_pit - Reprogram all FLX_PIT tables after input set change
+ * @pf: pointer to the PF structure
+ *
+ * This function reprograms both the L3 and L4 FLX_PIT tables. See the
+ * internal helper function for implementation details.
+ **/
+static void i40e_reprogram_flex_pit(struct i40e_pf *pf)
+{
+	__i40e_reprogram_flex_pit(pf, &pf->l3_flex_pit_list,
+				  I40E_FLEX_PIT_IDX_START_L3);
+
+	__i40e_reprogram_flex_pit(pf, &pf->l4_flex_pit_list,
+				  I40E_FLEX_PIT_IDX_START_L4);
+
+	/* We also need to program the L3 and L4 GLQF ORT register */
+	i40e_write_rx_ctl(&pf->hw,
+			  I40E_GLQF_ORT(I40E_L3_GLQF_ORT_IDX),
+			  I40E_ORT_PREP_VAL(I40E_FLEX_PIT_IDX_START_L3,
+					    3, 1));
+
+	i40e_write_rx_ctl(&pf->hw,
+			  I40E_GLQF_ORT(I40E_L4_GLQF_ORT_IDX),
+			  I40E_ORT_PREP_VAL(I40E_FLEX_PIT_IDX_START_L4,
+					    3, 1));
+}
+
+/**
+ * i40e_flow_str - Converts a flow_type into a human readable string
+ * @fsp: the flow specification
+ *
+ * Currently only flow types we support are included here, and the string
+ * value attempts to match what ethtool would use to configure this flow type.
+ **/
+static const char *i40e_flow_str(struct ethtool_rx_flow_spec *fsp)
+{
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		return "tcp4";
+	case UDP_V4_FLOW:
+		return "udp4";
+	case SCTP_V4_FLOW:
+		return "sctp4";
+	case IP_USER_FLOW:
+		return "ip4";
+	case TCP_V6_FLOW:
+		return "tcp6";
+	case UDP_V6_FLOW:
+		return "udp6";
+	case SCTP_V6_FLOW:
+		return "sctp6";
+	case IPV6_USER_FLOW:
+		return "ip6";
+	default:
+		return "unknown";
+	}
+}
+
+/**
+ * i40e_pit_index_to_mask - Return the FLEX mask for a given PIT index
+ * @pit_index: PIT index to convert
+ *
+ * Returns the mask for a given PIT index. Will return 0 if the pit_index is
+ * of range.
+ **/
+static u64 i40e_pit_index_to_mask(int pit_index)
+{
+	switch (pit_index) {
+	case 0:
+		return I40E_FLEX_50_MASK;
+	case 1:
+		return I40E_FLEX_51_MASK;
+	case 2:
+		return I40E_FLEX_52_MASK;
+	case 3:
+		return I40E_FLEX_53_MASK;
+	case 4:
+		return I40E_FLEX_54_MASK;
+	case 5:
+		return I40E_FLEX_55_MASK;
+	case 6:
+		return I40E_FLEX_56_MASK;
+	case 7:
+		return I40E_FLEX_57_MASK;
+	default:
+		return 0;
+	}
+}
+
+/**
+ * i40e_print_input_set - Show changes between two input sets
+ * @vsi: the vsi being configured
+ * @old: the old input set
+ * @new: the new input set
+ *
+ * Print the difference between old and new input sets by showing which series
+ * of words are toggled on or off. Only displays the bits we actually support
+ * changing.
+ **/
+static void i40e_print_input_set(struct i40e_vsi *vsi, u64 old, u64 new)
+{
+	struct i40e_pf *pf = vsi->back;
+	bool old_value, new_value;
+	int i;
+
+	old_value = !!(old & I40E_L3_SRC_MASK);
+	new_value = !!(new & I40E_L3_SRC_MASK);
+	if (old_value != new_value)
+		netif_info(pf, drv, vsi->netdev, "L3 source address: %s -> %s\n",
+			   old_value ? "ON" : "OFF",
+			   new_value ? "ON" : "OFF");
+
+	old_value = !!(old & I40E_L3_DST_MASK);
+	new_value = !!(new & I40E_L3_DST_MASK);
+	if (old_value != new_value)
+		netif_info(pf, drv, vsi->netdev, "L3 destination address: %s -> %s\n",
+			   old_value ? "ON" : "OFF",
+			   new_value ? "ON" : "OFF");
+
+	old_value = !!(old & I40E_L4_SRC_MASK);
+	new_value = !!(new & I40E_L4_SRC_MASK);
+	if (old_value != new_value)
+		netif_info(pf, drv, vsi->netdev, "L4 source port: %s -> %s\n",
+			   old_value ? "ON" : "OFF",
+			   new_value ? "ON" : "OFF");
+
+	old_value = !!(old & I40E_L4_DST_MASK);
+	new_value = !!(new & I40E_L4_DST_MASK);
+	if (old_value != new_value)
+		netif_info(pf, drv, vsi->netdev, "L4 destination port: %s -> %s\n",
+			   old_value ? "ON" : "OFF",
+			   new_value ? "ON" : "OFF");
+
+	old_value = !!(old & I40E_VERIFY_TAG_MASK);
+	new_value = !!(new & I40E_VERIFY_TAG_MASK);
+	if (old_value != new_value)
+		netif_info(pf, drv, vsi->netdev, "SCTP verification tag: %s -> %s\n",
+			   old_value ? "ON" : "OFF",
+			   new_value ? "ON" : "OFF");
+
+	/* Show change of flexible filter entries */
+	for (i = 0; i < I40E_FLEX_INDEX_ENTRIES; i++) {
+		u64 flex_mask = i40e_pit_index_to_mask(i);
+
+		old_value = !!(old & flex_mask);
+		new_value = !!(new & flex_mask);
+		if (old_value != new_value)
+			netif_info(pf, drv, vsi->netdev, "FLEX index %d: %s -> %s\n",
+				   i,
+				   old_value ? "ON" : "OFF",
+				   new_value ? "ON" : "OFF");
+	}
+
+	netif_info(pf, drv, vsi->netdev, "  Current input set: %0llx\n",
+		   old);
+	netif_info(pf, drv, vsi->netdev, "Requested input set: %0llx\n",
+		   new);
+}
+
+/**
+ * i40e_check_fdir_input_set - Check that a given rx_flow_spec mask is valid
+ * @vsi: pointer to the targeted VSI
+ * @fsp: pointer to Rx flow specification
+ * @userdef: userdefined data from flow specification
+ *
+ * Ensures that a given ethtool_rx_flow_spec has a valid mask. Some support
+ * for partial matches exists with a few limitations. First, hardware only
+ * supports masking by word boundary (2 bytes) and not per individual bit.
+ * Second, hardware is limited to using one mask for a flow type and cannot
+ * use a separate mask for each filter.
+ *
+ * To support these limitations, if we already have a configured filter for
+ * the specified type, this function enforces that new filters of the type
+ * match the configured input set. Otherwise, if we do not have a filter of
+ * the specified type, we allow the input set to be updated to match the
+ * desired filter.
+ *
+ * To help ensure that administrators understand why filters weren't displayed
+ * as supported, we print a diagnostic message displaying how the input set
+ * would change and warning to delete the preexisting filters if required.
+ *
+ * Returns 0 on successful input set match, and a negative return code on
+ * failure.
+ **/
+static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
+				     struct ethtool_rx_flow_spec *fsp,
+				     struct i40e_rx_flow_userdef *userdef)
+{
+	static const __be32 ipv6_full_mask[4] = {cpu_to_be32(0xffffffff),
+		cpu_to_be32(0xffffffff), cpu_to_be32(0xffffffff),
+		cpu_to_be32(0xffffffff)};
+	struct ethtool_tcpip6_spec *tcp_ip6_spec;
+	struct ethtool_usrip6_spec *usr_ip6_spec;
+	struct ethtool_tcpip4_spec *tcp_ip4_spec;
+	struct ethtool_usrip4_spec *usr_ip4_spec;
+	struct i40e_pf *pf = vsi->back;
+	u64 current_mask, new_mask;
+	bool new_flex_offset = false;
+	bool flex_l3 = false;
+	u16 *fdir_filter_count;
+	u16 index, src_offset = 0;
+	u8 pit_index = 0;
+	int err;
+
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case SCTP_V4_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
+		fdir_filter_count = &pf->fd_sctp4_filter_cnt;
+		break;
+	case TCP_V4_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+		fdir_filter_count = &pf->fd_tcp4_filter_cnt;
+		break;
+	case UDP_V4_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+		fdir_filter_count = &pf->fd_udp4_filter_cnt;
+		break;
+	case SCTP_V6_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_SCTP;
+		fdir_filter_count = &pf->fd_sctp6_filter_cnt;
+		break;
+	case TCP_V6_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+		fdir_filter_count = &pf->fd_tcp6_filter_cnt;
+		break;
+	case UDP_V6_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+		fdir_filter_count = &pf->fd_udp6_filter_cnt;
+		break;
+	case IP_USER_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+		fdir_filter_count = &pf->fd_ip4_filter_cnt;
+		flex_l3 = true;
+		break;
+	case IPV6_USER_FLOW:
+		index = I40E_FILTER_PCTYPE_NONF_IPV6_OTHER;
+		fdir_filter_count = &pf->fd_ip6_filter_cnt;
+		flex_l3 = true;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* Read the current input set from register memory. */
+	current_mask = i40e_read_fd_input_set(pf, index);
+	new_mask = current_mask;
+
+	/* Determine, if any, the required changes to the input set in order
+	 * to support the provided mask.
+	 *
+	 * Hardware only supports masking at word (2 byte) granularity and does
+	 * not support full bitwise masking. This implementation simplifies
+	 * even further and only supports fully enabled or fully disabled
+	 * masks for each field, even though we could split the ip4src and
+	 * ip4dst fields.
+	 */
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case SCTP_V4_FLOW:
+		new_mask &= ~I40E_VERIFY_TAG_MASK;
+		fallthrough;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		tcp_ip4_spec = &fsp->m_u.tcp_ip4_spec;
+
+		/* IPv4 source address */
+		if (tcp_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+			new_mask |= I40E_L3_SRC_MASK;
+		else if (!tcp_ip4_spec->ip4src)
+			new_mask &= ~I40E_L3_SRC_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* IPv4 destination address */
+		if (tcp_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+			new_mask |= I40E_L3_DST_MASK;
+		else if (!tcp_ip4_spec->ip4dst)
+			new_mask &= ~I40E_L3_DST_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* L4 source port */
+		if (tcp_ip4_spec->psrc == htons(0xFFFF))
+			new_mask |= I40E_L4_SRC_MASK;
+		else if (!tcp_ip4_spec->psrc)
+			new_mask &= ~I40E_L4_SRC_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* L4 destination port */
+		if (tcp_ip4_spec->pdst == htons(0xFFFF))
+			new_mask |= I40E_L4_DST_MASK;
+		else if (!tcp_ip4_spec->pdst)
+			new_mask &= ~I40E_L4_DST_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* Filtering on Type of Service is not supported. */
+		if (tcp_ip4_spec->tos)
+			return -EOPNOTSUPP;
+
+		break;
+	case SCTP_V6_FLOW:
+		new_mask &= ~I40E_VERIFY_TAG_MASK;
+		fallthrough;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		tcp_ip6_spec = &fsp->m_u.tcp_ip6_spec;
+
+		/* Check if user provided IPv6 source address. */
+		if (ipv6_addr_equal((struct in6_addr *)&tcp_ip6_spec->ip6src,
+				    (struct in6_addr *)&ipv6_full_mask))
+			new_mask |= I40E_L3_V6_SRC_MASK;
+		else if (ipv6_addr_any((struct in6_addr *)
+				       &tcp_ip6_spec->ip6src))
+			new_mask &= ~I40E_L3_V6_SRC_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* Check if user provided destination address. */
+		if (ipv6_addr_equal((struct in6_addr *)&tcp_ip6_spec->ip6dst,
+				    (struct in6_addr *)&ipv6_full_mask))
+			new_mask |= I40E_L3_V6_DST_MASK;
+		else if (ipv6_addr_any((struct in6_addr *)
+				       &tcp_ip6_spec->ip6dst))
+			new_mask &= ~I40E_L3_V6_DST_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* L4 source port */
+		if (tcp_ip6_spec->psrc == htons(0xFFFF))
+			new_mask |= I40E_L4_SRC_MASK;
+		else if (!tcp_ip6_spec->psrc)
+			new_mask &= ~I40E_L4_SRC_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* L4 destination port */
+		if (tcp_ip6_spec->pdst == htons(0xFFFF))
+			new_mask |= I40E_L4_DST_MASK;
+		else if (!tcp_ip6_spec->pdst)
+			new_mask &= ~I40E_L4_DST_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* Filtering on Traffic Classes is not supported. */
+		if (tcp_ip6_spec->tclass)
+			return -EOPNOTSUPP;
+		break;
+	case IP_USER_FLOW:
+		usr_ip4_spec = &fsp->m_u.usr_ip4_spec;
+
+		/* IPv4 source address */
+		if (usr_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+			new_mask |= I40E_L3_SRC_MASK;
+		else if (!usr_ip4_spec->ip4src)
+			new_mask &= ~I40E_L3_SRC_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* IPv4 destination address */
+		if (usr_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+			new_mask |= I40E_L3_DST_MASK;
+		else if (!usr_ip4_spec->ip4dst)
+			new_mask &= ~I40E_L3_DST_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* First 4 bytes of L4 header */
+		if (usr_ip4_spec->l4_4_bytes)
+			return -EOPNOTSUPP;
+
+		/* Filtering on Type of Service is not supported. */
+		if (usr_ip4_spec->tos)
+			return -EOPNOTSUPP;
+
+		/* Filtering on IP version is not supported */
+		if (usr_ip4_spec->ip_ver)
+			return -EINVAL;
+
+		/* Filtering on L4 protocol is not supported */
+		if (usr_ip4_spec->proto)
+			return -EINVAL;
+
+		break;
+	case IPV6_USER_FLOW:
+		usr_ip6_spec = &fsp->m_u.usr_ip6_spec;
+
+		/* Check if user provided IPv6 source address. */
+		if (ipv6_addr_equal((struct in6_addr *)&usr_ip6_spec->ip6src,
+				    (struct in6_addr *)&ipv6_full_mask))
+			new_mask |= I40E_L3_V6_SRC_MASK;
+		else if (ipv6_addr_any((struct in6_addr *)
+				       &usr_ip6_spec->ip6src))
+			new_mask &= ~I40E_L3_V6_SRC_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		/* Check if user provided destination address. */
+		if (ipv6_addr_equal((struct in6_addr *)&usr_ip6_spec->ip6dst,
+				    (struct in6_addr *)&ipv6_full_mask))
+			new_mask |= I40E_L3_V6_DST_MASK;
+		else if (ipv6_addr_any((struct in6_addr *)
+				       &usr_ip6_spec->ip6dst))
+			new_mask &= ~I40E_L3_V6_DST_MASK;
+		else
+			return -EOPNOTSUPP;
+
+		if (usr_ip6_spec->l4_4_bytes)
+			return -EOPNOTSUPP;
+
+		/* Filtering on Traffic class is not supported. */
+		if (usr_ip6_spec->tclass)
+			return -EOPNOTSUPP;
+
+		/* Filtering on L4 protocol is not supported */
+		if (usr_ip6_spec->l4_proto)
+			return -EINVAL;
+
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (fsp->flow_type & FLOW_EXT) {
+		/* Allow only 802.1Q and no etype defined, as
+		 * later it's modified to 0x8100
+		 */
+		if (fsp->h_ext.vlan_etype != htons(ETH_P_8021Q) &&
+		    fsp->h_ext.vlan_etype != 0)
+			return -EOPNOTSUPP;
+		if (fsp->m_ext.vlan_tci == htons(0xFFFF))
+			new_mask |= I40E_VLAN_SRC_MASK;
+		else
+			new_mask &= ~I40E_VLAN_SRC_MASK;
+	}
+
+	/* First, clear all flexible filter entries */
+	new_mask &= ~I40E_FLEX_INPUT_MASK;
+
+	/* If we have a flexible filter, try to add this offset to the correct
+	 * flexible filter PIT list. Once finished, we can update the mask.
+	 * If the src_offset changed, we will get a new mask value which will
+	 * trigger an input set change.
+	 */
+	if (userdef->flex_filter) {
+		struct i40e_flex_pit *l3_flex_pit = NULL, *flex_pit = NULL;
+
+		/* Flexible offset must be even, since the flexible payload
+		 * must be aligned on 2-byte boundary.
+		 */
+		if (userdef->flex_offset & 0x1) {
+			dev_warn(&pf->pdev->dev,
+				 "Flexible data offset must be 2-byte aligned\n");
+			return -EINVAL;
+		}
+
+		src_offset = userdef->flex_offset >> 1;
+
+		/* FLX_PIT source offset value is only so large */
+		if (src_offset > I40E_MAX_FLEX_SRC_OFFSET) {
+			dev_warn(&pf->pdev->dev,
+				 "Flexible data must reside within first 64 bytes of the packet payload\n");
+			return -EINVAL;
+		}
+
+		/* See if this offset has already been programmed. If we get
+		 * an ERR_PTR, then the filter is not safe to add. Otherwise,
+		 * if we get a NULL pointer, this means we will need to add
+		 * the offset.
+		 */
+		flex_pit = i40e_find_flex_offset(&pf->l4_flex_pit_list,
+						 src_offset);
+		if (IS_ERR(flex_pit))
+			return PTR_ERR(flex_pit);
+
+		/* IP_USER_FLOW filters match both L4 (ICMP) and L3 (unknown)
+		 * packet types, and thus we need to program both L3 and L4
+		 * flexible values. These must have identical flexible index,
+		 * as otherwise we can't correctly program the input set. So
+		 * we'll find both an L3 and L4 index and make sure they are
+		 * the same.
+		 */
+		if (flex_l3) {
+			l3_flex_pit =
+				i40e_find_flex_offset(&pf->l3_flex_pit_list,
+						      src_offset);
+			if (IS_ERR(l3_flex_pit))
+				return PTR_ERR(l3_flex_pit);
+
+			if (flex_pit) {
+				/* If we already had a matching L4 entry, we
+				 * need to make sure that the L3 entry we
+				 * obtained uses the same index.
+				 */
+				if (l3_flex_pit) {
+					if (l3_flex_pit->pit_index !=
+					    flex_pit->pit_index) {
+						return -EINVAL;
+					}
+				} else {
+					new_flex_offset = true;
+				}
+			} else {
+				flex_pit = l3_flex_pit;
+			}
+		}
+
+		/* If we didn't find an existing flex offset, we need to
+		 * program a new one. However, we don't immediately program it
+		 * here because we will wait to program until after we check
+		 * that it is safe to change the input set.
+		 */
+		if (!flex_pit) {
+			new_flex_offset = true;
+			pit_index = i40e_unused_pit_index(pf);
+		} else {
+			pit_index = flex_pit->pit_index;
+		}
+
+		/* Update the mask with the new offset */
+		new_mask |= i40e_pit_index_to_mask(pit_index);
+	}
+
+	/* If the mask and flexible filter offsets for this filter match the
+	 * currently programmed values we don't need any input set change, so
+	 * this filter is safe to install.
+	 */
+	if (new_mask == current_mask && !new_flex_offset)
+		return 0;
+
+	netif_info(pf, drv, vsi->netdev, "Input set change requested for %s flows:\n",
+		   i40e_flow_str(fsp));
+	i40e_print_input_set(vsi, current_mask, new_mask);
+	if (new_flex_offset) {
+		netif_info(pf, drv, vsi->netdev, "FLEX index %d: Offset -> %d",
+			   pit_index, src_offset);
+	}
+
+	/* Hardware input sets are global across multiple ports, so even the
+	 * main port cannot change them when in MFP mode as this would impact
+	 * any filters on the other ports.
+	 */
+	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+		netif_err(pf, drv, vsi->netdev, "Cannot change Flow Director input sets while MFP is enabled\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* This filter requires us to update the input set. However, hardware
+	 * only supports one input set per flow type, and does not support
+	 * separate masks for each filter. This means that we can only support
+	 * a single mask for all filters of a specific type.
+	 *
+	 * If we have preexisting filters, they obviously depend on the
+	 * current programmed input set. Display a diagnostic message in this
+	 * case explaining why the filter could not be accepted.
+	 */
+	if (*fdir_filter_count) {
+		netif_err(pf, drv, vsi->netdev, "Cannot change input set for %s flows until %d preexisting filters are removed\n",
+			  i40e_flow_str(fsp),
+			  *fdir_filter_count);
+		return -EOPNOTSUPP;
+	}
+
+	i40e_write_fd_input_set(pf, index, new_mask);
+
+	/* IP_USER_FLOW filters match both IPv4/Other and IPv4/Fragmented
+	 * frames. If we're programming the input set for IPv4/Other, we also
+	 * need to program the IPv4/Fragmented input set. Since we don't have
+	 * separate support, we'll always assume and enforce that the two flow
+	 * types must have matching input sets.
+	 */
+	if (index == I40E_FILTER_PCTYPE_NONF_IPV4_OTHER)
+		i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4,
+					new_mask);
+
+	/* Add the new offset and update table, if necessary */
+	if (new_flex_offset) {
+		err = i40e_add_flex_offset(&pf->l4_flex_pit_list, src_offset,
+					   pit_index);
+		if (err)
+			return err;
+
+		if (flex_l3) {
+			err = i40e_add_flex_offset(&pf->l3_flex_pit_list,
+						   src_offset,
+						   pit_index);
+			if (err)
+				return err;
+		}
+
+		i40e_reprogram_flex_pit(pf);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_match_fdir_filter - Return true of two filters match
+ * @a: pointer to filter struct
+ * @b: pointer to filter struct
+ *
+ * Returns true if the two filters match exactly the same criteria. I.e. they
+ * match the same flow type and have the same parameters. We don't need to
+ * check any input-set since all filters of the same flow type must use the
+ * same input set.
+ **/
+static bool i40e_match_fdir_filter(struct i40e_fdir_filter *a,
+				   struct i40e_fdir_filter *b)
+{
+	/* The filters do not much if any of these criteria differ. */
+	if (a->dst_ip != b->dst_ip ||
+	    a->src_ip != b->src_ip ||
+	    a->dst_port != b->dst_port ||
+	    a->src_port != b->src_port ||
+	    a->flow_type != b->flow_type ||
+	    a->ipl4_proto != b->ipl4_proto ||
+	    a->vlan_tag != b->vlan_tag ||
+	    a->vlan_etype != b->vlan_etype)
+		return false;
+
+	return true;
+}
+
+/**
+ * i40e_disallow_matching_filters - Check that new filters differ
+ * @vsi: pointer to the targeted VSI
+ * @input: new filter to check
+ *
+ * Due to hardware limitations, it is not possible for two filters that match
+ * similar criteria to be programmed at the same time. This is true for a few
+ * reasons:
+ *
+ * (a) all filters matching a particular flow type must use the same input
+ * set, that is they must match the same criteria.
+ * (b) different flow types will never match the same packet, as the flow type
+ * is decided by hardware before checking which rules apply.
+ * (c) hardware has no way to distinguish which order filters apply in.
+ *
+ * Due to this, we can't really support using the location data to order
+ * filters in the hardware parsing. It is technically possible for the user to
+ * request two filters matching the same criteria but which select different
+ * queues. In this case, rather than keep both filters in the list, we reject
+ * the 2nd filter when the user requests adding it.
+ *
+ * This avoids needing to track location for programming the filter to
+ * hardware, and ensures that we avoid some strange scenarios involving
+ * deleting filters which match the same criteria.
+ **/
+static int i40e_disallow_matching_filters(struct i40e_vsi *vsi,
+					  struct i40e_fdir_filter *input)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_fdir_filter *rule;
+	struct hlist_node *node2;
+
+	/* Loop through every filter, and check that it doesn't match */
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		/* Don't check the filters match if they share the same fd_id,
+		 * since the new filter is actually just updating the target
+		 * of the old filter.
+		 */
+		if (rule->fd_id == input->fd_id)
+			continue;
+
+		/* If any filters match, then print a warning message to the
+		 * kernel message buffer and bail out.
+		 */
+		if (i40e_match_fdir_filter(rule, input)) {
+			dev_warn(&pf->pdev->dev,
+				 "Existing user defined filter %d already matches this flow.\n",
+				 rule->fd_id);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_add_fdir_ethtool - Add/Remove Flow Director filters
+ * @vsi: pointer to the targeted VSI
+ * @cmd: command to get or set RX flow classification rules
+ *
+ * Add Flow Director filters for a specific flow spec based on their
+ * protocol.  Returns 0 if the filters were successfully added.
+ **/
+static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
+				 struct ethtool_rxnfc *cmd)
+{
+	struct i40e_rx_flow_userdef userdef;
+	struct ethtool_rx_flow_spec *fsp;
+	struct i40e_fdir_filter *input;
+	u16 dest_vsi = 0, q_index = 0;
+	struct i40e_pf *pf;
+	int ret = -EINVAL;
+	u8 dest_ctl;
+
+	if (!vsi)
+		return -EINVAL;
+	pf = vsi->back;
+
+	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+		return -EOPNOTSUPP;
+
+	if (test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
+		return -ENOSPC;
+
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+	    test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+		return -EBUSY;
+
+	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
+		return -EBUSY;
+
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	/* Parse the user-defined field */
+	if (i40e_parse_rx_flow_user_data(fsp, &userdef))
+		return -EINVAL;
+
+	/* Extended MAC field is not supported */
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return -EINVAL;
+
+	ret = i40e_check_fdir_input_set(vsi, fsp, &userdef);
+	if (ret)
+		return ret;
+
+	if (fsp->location >= (pf->hw.func_caps.fd_filters_best_effort +
+			      pf->hw.func_caps.fd_filters_guaranteed)) {
+		return -EINVAL;
+	}
+
+	/* ring_cookie is either the drop index, or is a mask of the queue
+	 * index and VF id we wish to target.
+	 */
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+		if (!vf) {
+			if (ring >= vsi->num_queue_pairs)
+				return -EINVAL;
+			dest_vsi = vsi->id;
+		} else {
+			/* VFs are zero-indexed, so we subtract one here */
+			vf--;
+
+			if (vf >= pf->num_alloc_vfs)
+				return -EINVAL;
+			if (ring >= pf->vf[vf].num_queue_pairs)
+				return -EINVAL;
+			dest_vsi = pf->vf[vf].lan_vsi_id;
+		}
+		dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
+		q_index = ring;
+	}
+
+	input = kzalloc(sizeof(*input), GFP_KERNEL);
+
+	if (!input)
+		return -ENOMEM;
+
+	input->fd_id = fsp->location;
+	input->q_index = q_index;
+	input->dest_vsi = dest_vsi;
+	input->dest_ctl = dest_ctl;
+	input->fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
+	input->cnt_index  = I40E_FD_SB_STAT_IDX(pf->hw.pf_id);
+	input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+	input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+	input->flow_type = fsp->flow_type & ~FLOW_EXT;
+
+	input->vlan_etype = fsp->h_ext.vlan_etype;
+	if (!fsp->m_ext.vlan_etype && fsp->h_ext.vlan_tci)
+		input->vlan_etype = cpu_to_be16(ETH_P_8021Q);
+	if (fsp->m_ext.vlan_tci && input->vlan_etype)
+		input->vlan_tag = fsp->h_ext.vlan_tci;
+	if (input->flow_type == IPV6_USER_FLOW ||
+	    input->flow_type == UDP_V6_FLOW ||
+	    input->flow_type == TCP_V6_FLOW ||
+	    input->flow_type == SCTP_V6_FLOW) {
+		/* Reverse the src and dest notion, since the HW expects them
+		 * to be from Tx perspective where as the input from user is
+		 * from Rx filter view.
+		 */
+		input->ipl4_proto = fsp->h_u.usr_ip6_spec.l4_proto;
+		input->dst_port = fsp->h_u.tcp_ip6_spec.psrc;
+		input->src_port = fsp->h_u.tcp_ip6_spec.pdst;
+		memcpy(input->dst_ip6, fsp->h_u.ah_ip6_spec.ip6src,
+		       sizeof(__be32) * 4);
+		memcpy(input->src_ip6, fsp->h_u.ah_ip6_spec.ip6dst,
+		       sizeof(__be32) * 4);
+	} else {
+		/* Reverse the src and dest notion, since the HW expects them
+		 * to be from Tx perspective where as the input from user is
+		 * from Rx filter view.
+		 */
+		input->ipl4_proto = fsp->h_u.usr_ip4_spec.proto;
+		input->dst_port = fsp->h_u.tcp_ip4_spec.psrc;
+		input->src_port = fsp->h_u.tcp_ip4_spec.pdst;
+		input->dst_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		input->src_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+	}
+
+	if (userdef.flex_filter) {
+		input->flex_filter = true;
+		input->flex_word = cpu_to_be16(userdef.flex_word);
+		input->flex_offset = userdef.flex_offset;
+	}
+
+	/* Avoid programming two filters with identical match criteria. */
+	ret = i40e_disallow_matching_filters(vsi, input);
+	if (ret)
+		goto free_filter_memory;
+
+	/* Add the input filter to the fdir_input_list, possibly replacing
+	 * a previous filter. Do not free the input structure after adding it
+	 * to the list as this would cause a use-after-free bug.
+	 */
+	i40e_update_ethtool_fdir_entry(vsi, input, fsp->location, NULL);
+	ret = i40e_add_del_fdir(vsi, input, true);
+	if (ret)
+		goto remove_sw_rule;
+	return 0;
+
+remove_sw_rule:
+	hlist_del(&input->fdir_node);
+	pf->fdir_pf_active_filters--;
+free_filter_memory:
+	kfree(input);
+	return ret;
+}
+
+/**
+ * i40e_set_rxnfc - command to set RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40e_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = i40e_set_rss_hash_opt(pf, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLINS:
+		ret = i40e_add_fdir_ethtool(vsi, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = i40e_del_fdir_entry(vsi, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_max_channels - get Max number of combined channels supported
+ * @vsi: vsi pointer
+ **/
+static unsigned int i40e_max_channels(struct i40e_vsi *vsi)
+{
+	/* TODO: This code assumes DCB and FD is disabled for now. */
+	return vsi->alloc_queue_pairs;
+}
+
+/**
+ * i40e_get_channels - Get the current channels enabled and max supported etc.
+ * @dev: network interface device structure
+ * @ch: ethtool channels structure
+ *
+ * We don't support separate tx and rx queues as channels. The other count
+ * represents how many queues are being used for control. max_combined counts
+ * how many queue pairs we can support. They may not be mapped 1 to 1 with
+ * q_vectors since we support a lot more queue pairs than q_vectors.
+ **/
+static void i40e_get_channels(struct net_device *dev,
+			      struct ethtool_channels *ch)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+
+	/* report maximum channels */
+	ch->max_combined = i40e_max_channels(vsi);
+
+	/* report info for other vector */
+	ch->other_count = (pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0;
+	ch->max_other = ch->other_count;
+
+	/* Note: This code assumes DCB is disabled for now. */
+	ch->combined_count = vsi->num_queue_pairs;
+}
+
+/**
+ * i40e_set_channels - Set the new channels count.
+ * @dev: network interface device structure
+ * @ch: ethtool channels structure
+ *
+ * The new channels count may not be the same as requested by the user
+ * since it gets rounded down to a power of 2 value.
+ **/
+static int i40e_set_channels(struct net_device *dev,
+			     struct ethtool_channels *ch)
+{
+	const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	unsigned int count = ch->combined_count;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_fdir_filter *rule;
+	struct hlist_node *node2;
+	int new_count;
+	int err = 0;
+
+	/* We do not support setting channels for any other VSI at present */
+	if (vsi->type != I40E_VSI_MAIN)
+		return -EINVAL;
+
+	/* We do not support setting channels via ethtool when TCs are
+	 * configured through mqprio
+	 */
+	if (i40e_is_tc_mqprio_enabled(pf))
+		return -EINVAL;
+
+	/* verify they are not requesting separate vectors */
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	/* verify other_count has not changed */
+	if (ch->other_count != ((pf->flags & I40E_FLAG_FD_SB_ENABLED) ? 1 : 0))
+		return -EINVAL;
+
+	/* verify the number of channels does not exceed hardware limits */
+	if (count > i40e_max_channels(vsi))
+		return -EINVAL;
+
+	/* verify that the number of channels does not invalidate any current
+	 * flow director rules
+	 */
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		if (rule->dest_ctl != drop && count <= rule->q_index) {
+			dev_warn(&pf->pdev->dev,
+				 "Existing user defined filter %d assigns flow to queue %d\n",
+				 rule->fd_id, rule->q_index);
+			err = -EINVAL;
+		}
+	}
+
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"Existing filter rules must be deleted to reduce combined channel count to %d\n",
+			count);
+		return err;
+	}
+
+	/* update feature limits from largest to smallest supported values */
+	/* TODO: Flow director limit, DCB etc */
+
+	/* use rss_reconfig to rebuild with new queue count and update traffic
+	 * class queue mapping
+	 */
+	new_count = i40e_reconfig_rss_queues(pf, count);
+	if (new_count > 0)
+		return 0;
+	else
+		return -EINVAL;
+}
+
+/**
+ * i40e_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ **/
+static u32 i40e_get_rxfh_key_size(struct net_device *netdev)
+{
+	return I40E_HKEY_ARRAY_SIZE;
+}
+
+/**
+ * i40e_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ **/
+static u32 i40e_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return I40E_HLUT_ARRAY_SIZE;
+}
+
+/**
+ * i40e_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware. Returns 0 on
+ * success.
+ **/
+static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			 u8 *hfunc)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	u8 *lut, *seed = NULL;
+	int ret;
+	u16 i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (!indir)
+		return 0;
+
+	seed = key;
+	lut = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+	ret = i40e_get_rss(vsi, seed, lut, I40E_HLUT_ARRAY_SIZE);
+	if (ret)
+		goto out;
+	for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
+		indir[i] = (u32)(lut[i]);
+
+out:
+	kfree(lut);
+
+	return ret;
+}
+
+/**
+ * i40e_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function to use
+ *
+ * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * returns 0 after programming the table.
+ **/
+static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir,
+			 const u8 *key, const u8 hfunc)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	u8 *seed = NULL;
+	u16 i;
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (key) {
+		if (!vsi->rss_hkey_user) {
+			vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE,
+						     GFP_KERNEL);
+			if (!vsi->rss_hkey_user)
+				return -ENOMEM;
+		}
+		memcpy(vsi->rss_hkey_user, key, I40E_HKEY_ARRAY_SIZE);
+		seed = vsi->rss_hkey_user;
+	}
+	if (!vsi->rss_lut_user) {
+		vsi->rss_lut_user = kzalloc(I40E_HLUT_ARRAY_SIZE, GFP_KERNEL);
+		if (!vsi->rss_lut_user)
+			return -ENOMEM;
+	}
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	if (indir)
+		for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
+			vsi->rss_lut_user[i] = (u8)(indir[i]);
+	else
+		i40e_fill_rss_lut(pf, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE,
+				  vsi->rss_size);
+
+	return i40e_config_rss(vsi, seed, vsi->rss_lut_user,
+			       I40E_HLUT_ARRAY_SIZE);
+}
+
+/**
+ * i40e_get_priv_flags - report device private flags
+ * @dev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned.  Add new strings for each flag to the i40e_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ **/
+static u32 i40e_get_priv_flags(struct net_device *dev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	u32 i, j, ret_flags = 0;
+
+	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
+		const struct i40e_priv_flags *priv_flags;
+
+		priv_flags = &i40e_gstrings_priv_flags[i];
+
+		if (priv_flags->flag & pf->flags)
+			ret_flags |= BIT(i);
+	}
+
+	if (pf->hw.pf_id != 0)
+		return ret_flags;
+
+	for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
+		const struct i40e_priv_flags *priv_flags;
+
+		priv_flags = &i40e_gl_gstrings_priv_flags[j];
+
+		if (priv_flags->flag & pf->flags)
+			ret_flags |= BIT(i + j);
+	}
+
+	return ret_flags;
+}
+
+/**
+ * i40e_set_priv_flags - set private flags
+ * @dev: network interface device structure
+ * @flags: bit flags to be set
+ **/
+static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	u64 orig_flags, new_flags, changed_flags;
+	enum i40e_admin_queue_err adq_err;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	u32 reset_needed = 0;
+	int status;
+	u32 i, j;
+
+	orig_flags = READ_ONCE(pf->flags);
+	new_flags = orig_flags;
+
+	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
+		const struct i40e_priv_flags *priv_flags;
+
+		priv_flags = &i40e_gstrings_priv_flags[i];
+
+		if (flags & BIT(i))
+			new_flags |= priv_flags->flag;
+		else
+			new_flags &= ~(priv_flags->flag);
+
+		/* If this is a read-only flag, it can't be changed */
+		if (priv_flags->read_only &&
+		    ((orig_flags ^ new_flags) & ~BIT(i)))
+			return -EOPNOTSUPP;
+	}
+
+	if (pf->hw.pf_id != 0)
+		goto flags_complete;
+
+	for (j = 0; j < I40E_GL_PRIV_FLAGS_STR_LEN; j++) {
+		const struct i40e_priv_flags *priv_flags;
+
+		priv_flags = &i40e_gl_gstrings_priv_flags[j];
+
+		if (flags & BIT(i + j))
+			new_flags |= priv_flags->flag;
+		else
+			new_flags &= ~(priv_flags->flag);
+
+		/* If this is a read-only flag, it can't be changed */
+		if (priv_flags->read_only &&
+		    ((orig_flags ^ new_flags) & ~BIT(i)))
+			return -EOPNOTSUPP;
+	}
+
+flags_complete:
+	changed_flags = orig_flags ^ new_flags;
+
+	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP)
+		reset_needed = I40E_PF_RESET_AND_REBUILD_FLAG;
+	if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+	    I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED))
+		reset_needed = BIT(__I40E_PF_RESET_REQUESTED);
+
+	/* Before we finalize any flag changes, we need to perform some
+	 * checks to ensure that the changes are supported and safe.
+	 */
+
+	/* ATR eviction is not supported on all devices */
+	if ((new_flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) &&
+	    !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
+		return -EOPNOTSUPP;
+
+	/* If the driver detected FW LLDP was disabled on init, this flag could
+	 * be set, however we do not support _changing_ the flag:
+	 * - on XL710 if NPAR is enabled or FW API version < 1.7
+	 * - on X722 with FW API version < 1.6
+	 * There are situations where older FW versions/NPAR enabled PFs could
+	 * disable LLDP, however we _must_ not allow the user to enable/disable
+	 * LLDP with this flag on unsupported FW versions.
+	 */
+	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+		if (!(pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE)) {
+			dev_warn(&pf->pdev->dev,
+				 "Device does not support changing FW LLDP\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if (changed_flags & I40E_FLAG_RS_FEC &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_B) {
+		dev_warn(&pf->pdev->dev,
+			 "Device does not support changing FEC configuration\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (changed_flags & I40E_FLAG_BASE_R_FEC &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_SFP28 &&
+	    pf->hw.device_id != I40E_DEV_ID_25G_B &&
+	    pf->hw.device_id != I40E_DEV_ID_KX_X722) {
+		dev_warn(&pf->pdev->dev,
+			 "Device does not support changing FEC configuration\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Process any additional changes needed as a result of flag changes.
+	 * The changed_flags value reflects the list of bits that were
+	 * changed in the code above.
+	 */
+
+	/* Flush current ATR settings if ATR was disabled */
+	if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) &&
+	    !(new_flags & I40E_FLAG_FD_ATR_ENABLED)) {
+		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
+		set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
+	}
+
+	if (changed_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT) {
+		u16 sw_flags = 0, valid_flags = 0;
+		int ret;
+
+		if (!(new_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT))
+			sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+		ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags,
+						0, NULL);
+		if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
+			dev_info(&pf->pdev->dev,
+				 "couldn't set switch config bits, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			/* not a fatal problem, just keep going */
+		}
+	}
+
+	if ((changed_flags & I40E_FLAG_RS_FEC) ||
+	    (changed_flags & I40E_FLAG_BASE_R_FEC)) {
+		u8 fec_cfg = 0;
+
+		if (new_flags & I40E_FLAG_RS_FEC &&
+		    new_flags & I40E_FLAG_BASE_R_FEC) {
+			fec_cfg = I40E_AQ_SET_FEC_AUTO;
+		} else if (new_flags & I40E_FLAG_RS_FEC) {
+			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS |
+				   I40E_AQ_SET_FEC_ABILITY_RS);
+		} else if (new_flags & I40E_FLAG_BASE_R_FEC) {
+			fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR |
+				   I40E_AQ_SET_FEC_ABILITY_KR);
+		}
+		if (i40e_set_fec_cfg(dev, fec_cfg))
+			dev_warn(&pf->pdev->dev, "Cannot change FEC config\n");
+	}
+
+	if ((changed_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
+	    (orig_flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)) {
+		dev_err(&pf->pdev->dev,
+			"Setting link-down-on-close not supported on this port (because total-port-shutdown is enabled)\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((changed_flags & I40E_FLAG_VF_VLAN_PRUNING) &&
+	    pf->num_alloc_vfs) {
+		dev_warn(&pf->pdev->dev,
+			 "Changing vf-vlan-pruning flag while VF(s) are active is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((changed_flags & I40E_FLAG_LEGACY_RX) &&
+	    I40E_2K_TOO_SMALL_WITH_PADDING) {
+		dev_warn(&pf->pdev->dev,
+			 "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((changed_flags & new_flags &
+	     I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
+	    (new_flags & I40E_FLAG_MFP_ENABLED))
+		dev_warn(&pf->pdev->dev,
+			 "Turning on link-down-on-close flag may affect other partitions\n");
+
+	if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+		if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+#ifdef CONFIG_I40E_DCB
+			i40e_dcb_sw_default_config(pf);
+#endif /* CONFIG_I40E_DCB */
+			i40e_aq_cfg_lldp_mib_change_event(&pf->hw, false, NULL);
+			i40e_aq_stop_lldp(&pf->hw, true, false, NULL);
+		} else {
+			status = i40e_aq_start_lldp(&pf->hw, false, NULL);
+			if (status) {
+				adq_err = pf->hw.aq.asq_last_status;
+				switch (adq_err) {
+				case I40E_AQ_RC_EEXIST:
+					dev_warn(&pf->pdev->dev,
+						 "FW LLDP agent is already running\n");
+					reset_needed = 0;
+					break;
+				case I40E_AQ_RC_EPERM:
+					dev_warn(&pf->pdev->dev,
+						 "Device configuration forbids SW from starting the LLDP agent.\n");
+					return -EINVAL;
+				case I40E_AQ_RC_EAGAIN:
+					dev_warn(&pf->pdev->dev,
+						 "Stop FW LLDP agent command is still being processed, please try again in a second.\n");
+					return -EBUSY;
+				default:
+					dev_warn(&pf->pdev->dev,
+						 "Starting FW LLDP agent failed: error: %pe, %s\n",
+						 ERR_PTR(status),
+						 i40e_aq_str(&pf->hw,
+							     adq_err));
+					return -EINVAL;
+				}
+			}
+		}
+	}
+
+	/* Now that we've checked to ensure that the new flags are valid, load
+	 * them into place. Since we only modify flags either (a) during
+	 * initialization or (b) while holding the RTNL lock, we don't need
+	 * anything fancy here.
+	 */
+	pf->flags = new_flags;
+
+	/* Issue reset to cause things to take effect, as additional bits
+	 * are added we will need to create a mask of bits requiring reset
+	 */
+	if (reset_needed)
+		i40e_do_reset(pf, reset_needed, true);
+
+	return 0;
+}
+
+/**
+ * i40e_get_module_info - get (Q)SFP+ module type info
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ **/
+static int i40e_get_module_info(struct net_device *netdev,
+				struct ethtool_modinfo *modinfo)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u32 sff8472_comp = 0;
+	u32 sff8472_swap = 0;
+	u32 sff8636_rev = 0;
+	u32 type = 0;
+	int status;
+
+	/* Check if firmware supports reading module EEPROM. */
+	if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+		netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
+		return -EINVAL;
+	}
+
+	status = i40e_update_link_info(hw);
+	if (status)
+		return -EIO;
+
+	if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) {
+		netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n");
+		return -EINVAL;
+	}
+
+	type = hw->phy.link_info.module_type[0];
+
+	switch (type) {
+	case I40E_MODULE_TYPE_SFP:
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				I40E_I2C_EEPROM_DEV_ADDR, true,
+				I40E_MODULE_SFF_8472_COMP,
+				&sff8472_comp, NULL);
+		if (status)
+			return -EIO;
+
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				I40E_I2C_EEPROM_DEV_ADDR, true,
+				I40E_MODULE_SFF_8472_SWAP,
+				&sff8472_swap, NULL);
+		if (status)
+			return -EIO;
+
+		/* Check if the module requires address swap to access
+		 * the other EEPROM memory page.
+		 */
+		if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) {
+			netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n");
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (sff8472_comp == 0x00) {
+			/* Module is not SFF-8472 compliant */
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (!(sff8472_swap & I40E_MODULE_SFF_DDM_IMPLEMENTED)) {
+			/* Module is SFF-8472 compliant but doesn't implement
+			 * Digital Diagnostic Monitoring (DDM).
+			 */
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		}
+		break;
+	case I40E_MODULE_TYPE_QSFP_PLUS:
+		/* Read from memory page 0. */
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				0, true,
+				I40E_MODULE_REVISION_ADDR,
+				&sff8636_rev, NULL);
+		if (status)
+			return -EIO;
+		/* Determine revision compliance byte */
+		if (sff8636_rev > 0x02) {
+			/* Module is SFF-8636 compliant */
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+		}
+		break;
+	case I40E_MODULE_TYPE_QSFP28:
+		modinfo->type = ETH_MODULE_SFF_8636;
+		modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+		break;
+	default:
+		netdev_err(vsi->netdev, "Module type unrecognized\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ **/
+static int i40e_get_module_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *ee,
+				  u8 *data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	bool is_sfp = false;
+	u32 value = 0;
+	int status;
+	int i;
+
+	if (!ee || !ee->len || !data)
+		return -EINVAL;
+
+	if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP)
+		is_sfp = true;
+
+	for (i = 0; i < ee->len; i++) {
+		u32 offset = i + ee->offset;
+		u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0;
+
+		/* Check if we need to access the other memory page */
+		if (is_sfp) {
+			if (offset >= ETH_MODULE_SFF_8079_LEN) {
+				offset -= ETH_MODULE_SFF_8079_LEN;
+				addr = I40E_I2C_EEPROM_DEV_ADDR2;
+			}
+		} else {
+			while (offset >= ETH_MODULE_SFF_8436_LEN) {
+				/* Compute memory page number and offset. */
+				offset -= ETH_MODULE_SFF_8436_LEN / 2;
+				addr++;
+			}
+		}
+
+		status = i40e_aq_get_phy_register(hw,
+				I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+				addr, true, offset, &value, NULL);
+		if (status)
+			return -EIO;
+		data[i] = value;
+	}
+	return 0;
+}
+
+static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_aq_get_phy_abilities_resp phy_cfg;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int status = 0;
+
+	/* Get initial PHY capabilities */
+	status = i40e_aq_get_phy_capabilities(hw, false, true, &phy_cfg, NULL);
+	if (status)
+		return -EAGAIN;
+
+	/* Check whether NIC configuration is compatible with Energy Efficient
+	 * Ethernet (EEE) mode.
+	 */
+	if (phy_cfg.eee_capability == 0)
+		return -EOPNOTSUPP;
+
+	edata->supported = SUPPORTED_Autoneg;
+	edata->lp_advertised = edata->supported;
+
+	/* Get current configuration */
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &phy_cfg, NULL);
+	if (status)
+		return -EAGAIN;
+
+	edata->advertised = phy_cfg.eee_capability ? SUPPORTED_Autoneg : 0U;
+	edata->eee_enabled = !!edata->advertised;
+	edata->tx_lpi_enabled = pf->stats.tx_lpi_status;
+
+	edata->eee_active = pf->stats.tx_lpi_status && pf->stats.rx_lpi_status;
+
+	return 0;
+}
+
+static int i40e_is_eee_param_supported(struct net_device *netdev,
+				       struct ethtool_eee *edata)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_ethtool_not_used {
+		u32 value;
+		const char *name;
+	} param[] = {
+		{edata->advertised & ~SUPPORTED_Autoneg, "advertise"},
+		{edata->tx_lpi_timer, "tx-timer"},
+		{edata->tx_lpi_enabled != pf->stats.tx_lpi_status, "tx-lpi"}
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(param); i++) {
+		if (param[i].value) {
+			netdev_info(netdev,
+				    "EEE setting %s not supported\n",
+				    param[i].name);
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_aq_set_phy_config config;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	__le16 eee_capability;
+	int status = 0;
+
+	/* Deny parameters we don't support */
+	if (i40e_is_eee_param_supported(netdev, edata))
+		return -EOPNOTSUPP;
+
+	/* Get initial PHY capabilities */
+	status = i40e_aq_get_phy_capabilities(hw, false, true, &abilities,
+					      NULL);
+	if (status)
+		return -EAGAIN;
+
+	/* Check whether NIC configuration is compatible with Energy Efficient
+	 * Ethernet (EEE) mode.
+	 */
+	if (abilities.eee_capability == 0)
+		return -EOPNOTSUPP;
+
+	/* Cache initial EEE capability */
+	eee_capability = abilities.eee_capability;
+
+	/* Get current PHY configuration */
+	status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					      NULL);
+	if (status)
+		return -EAGAIN;
+
+	/* Cache current PHY configuration */
+	config.phy_type = abilities.phy_type;
+	config.phy_type_ext = abilities.phy_type_ext;
+	config.link_speed = abilities.link_speed;
+	config.abilities = abilities.abilities |
+			   I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
+	config.eeer = abilities.eeer_val;
+	config.low_power_ctrl = abilities.d3_lpan;
+	config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
+			    I40E_AQ_PHY_FEC_CONFIG_MASK;
+
+	/* Set desired EEE state */
+	if (edata->eee_enabled) {
+		config.eee_capability = eee_capability;
+		config.eeer |= cpu_to_le32(I40E_PRTPM_EEER_TX_LPI_EN_MASK);
+	} else {
+		config.eee_capability = 0;
+		config.eeer &= cpu_to_le32(~I40E_PRTPM_EEER_TX_LPI_EN_MASK);
+	}
+
+	/* Apply modified PHY configuration */
+	status = i40e_aq_set_phy_config(hw, &config, NULL);
+	if (status)
+		return -EAGAIN;
+
+	return 0;
+}
+
+static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = {
+	.get_drvinfo		= i40e_get_drvinfo,
+	.set_eeprom		= i40e_set_eeprom,
+	.get_eeprom_len		= i40e_get_eeprom_len,
+	.get_eeprom		= i40e_get_eeprom,
+};
+
+static const struct ethtool_ops i40e_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_MAX_FRAMES_IRQ |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE |
+				     ETHTOOL_COALESCE_RX_USECS_HIGH |
+				     ETHTOOL_COALESCE_TX_USECS_HIGH,
+	.get_drvinfo		= i40e_get_drvinfo,
+	.get_regs_len		= i40e_get_regs_len,
+	.get_regs		= i40e_get_regs,
+	.nway_reset		= i40e_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_wol		= i40e_get_wol,
+	.set_wol		= i40e_set_wol,
+	.set_eeprom		= i40e_set_eeprom,
+	.get_eeprom_len		= i40e_get_eeprom_len,
+	.get_eeprom		= i40e_get_eeprom,
+	.get_ringparam		= i40e_get_ringparam,
+	.set_ringparam		= i40e_set_ringparam,
+	.get_pauseparam		= i40e_get_pauseparam,
+	.set_pauseparam		= i40e_set_pauseparam,
+	.get_msglevel		= i40e_get_msglevel,
+	.set_msglevel		= i40e_set_msglevel,
+	.get_rxnfc		= i40e_get_rxnfc,
+	.set_rxnfc		= i40e_set_rxnfc,
+	.self_test		= i40e_diag_test,
+	.get_strings		= i40e_get_strings,
+	.get_eee		= i40e_get_eee,
+	.set_eee		= i40e_set_eee,
+	.set_phys_id		= i40e_set_phys_id,
+	.get_sset_count		= i40e_get_sset_count,
+	.get_ethtool_stats	= i40e_get_ethtool_stats,
+	.get_coalesce		= i40e_get_coalesce,
+	.set_coalesce		= i40e_set_coalesce,
+	.get_rxfh_key_size	= i40e_get_rxfh_key_size,
+	.get_rxfh_indir_size	= i40e_get_rxfh_indir_size,
+	.get_rxfh		= i40e_get_rxfh,
+	.set_rxfh		= i40e_set_rxfh,
+	.get_channels		= i40e_get_channels,
+	.set_channels		= i40e_set_channels,
+	.get_module_info	= i40e_get_module_info,
+	.get_module_eeprom	= i40e_get_module_eeprom,
+	.get_ts_info		= i40e_get_ts_info,
+	.get_priv_flags		= i40e_get_priv_flags,
+	.set_priv_flags		= i40e_set_priv_flags,
+	.get_per_queue_coalesce	= i40e_get_per_queue_coalesce,
+	.set_per_queue_coalesce	= i40e_set_per_queue_coalesce,
+	.get_link_ksettings	= i40e_get_link_ksettings,
+	.set_link_ksettings	= i40e_set_link_ksettings,
+	.get_fecparam = i40e_get_fec_param,
+	.set_fecparam = i40e_set_fec_param,
+	.flash_device = i40e_ddp_flash,
+};
+
+void i40e_set_ethtool_ops(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf		*pf = np->vsi->back;
+
+	if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
+		netdev->ethtool_ops = &i40e_ethtool_ops;
+	else
+		netdev->ethtool_ops = &i40e_ethtool_recovery_mode_ops;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
new file mode 100644
index 0000000000..96ee63aca7
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e.h"
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_alloc.h"
+#include "i40e_hmc.h"
+#include "i40e_type.h"
+
+/**
+ * i40e_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ **/
+int i40e_add_sd_table_entry(struct i40e_hw *hw,
+			    struct i40e_hmc_info *hmc_info,
+			    u32 sd_index,
+			    enum i40e_sd_entry_type type,
+			    u64 direct_mode_sz)
+{
+	enum i40e_memory_type mem_type __attribute__((unused));
+	struct i40e_hmc_sd_entry *sd_entry;
+	bool dma_mem_alloc_done = false;
+	struct i40e_dma_mem mem;
+	int ret_code = 0;
+	u64 alloc_len;
+
+	if (NULL == hmc_info->sd_table.sd_entry) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_add_sd_table_entry: bad sd_entry\n");
+		goto exit;
+	}
+
+	if (sd_index >= hmc_info->sd_table.sd_cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_add_sd_table_entry: bad sd_index\n");
+		goto exit;
+	}
+
+	sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+	if (!sd_entry->valid) {
+		if (I40E_SD_TYPE_PAGED == type) {
+			mem_type = i40e_mem_pd;
+			alloc_len = I40E_HMC_PAGED_BP_SIZE;
+		} else {
+			mem_type = i40e_mem_bp_jumbo;
+			alloc_len = direct_mode_sz;
+		}
+
+		/* allocate a 4K pd page or 2M backing page */
+		ret_code = i40e_allocate_dma_mem(hw, &mem, mem_type, alloc_len,
+						 I40E_HMC_PD_BP_BUF_ALIGNMENT);
+		if (ret_code)
+			goto exit;
+		dma_mem_alloc_done = true;
+		if (I40E_SD_TYPE_PAGED == type) {
+			ret_code = i40e_allocate_virt_mem(hw,
+					&sd_entry->u.pd_table.pd_entry_virt_mem,
+					sizeof(struct i40e_hmc_pd_entry) * 512);
+			if (ret_code)
+				goto exit;
+			sd_entry->u.pd_table.pd_entry =
+				(struct i40e_hmc_pd_entry *)
+				sd_entry->u.pd_table.pd_entry_virt_mem.va;
+			sd_entry->u.pd_table.pd_page_addr = mem;
+		} else {
+			sd_entry->u.bp.addr = mem;
+			sd_entry->u.bp.sd_pd_index = sd_index;
+		}
+		/* initialize the sd entry */
+		hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+
+		/* increment the ref count */
+		I40E_INC_SD_REFCNT(&hmc_info->sd_table);
+	}
+	/* Increment backing page reference count */
+	if (I40E_SD_TYPE_DIRECT == sd_entry->entry_type)
+		I40E_INC_BP_REFCNT(&sd_entry->u.bp);
+exit:
+	if (ret_code)
+		if (dma_mem_alloc_done)
+			i40e_free_dma_mem(hw, &mem);
+
+	return ret_code;
+}
+
+/**
+ * i40e_add_pd_table_entry - Adds page descriptor to the specified table
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ * @rsrc_pg: if not NULL, use preallocated page instead of allocating new one.
+ *
+ * This function:
+ *	1. Initializes the pd entry
+ *	2. Adds pd_entry in the pd_table
+ *	3. Mark the entry valid in i40e_hmc_pd_entry structure
+ *	4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ *	1. The memory for pd should be pinned down, physically contiguous and
+ *	   aligned on 4K boundary and zeroed memory.
+ *	2. It should be 4K in size.
+ **/
+int i40e_add_pd_table_entry(struct i40e_hw *hw,
+			    struct i40e_hmc_info *hmc_info,
+			    u32 pd_index,
+			    struct i40e_dma_mem *rsrc_pg)
+{
+	struct i40e_hmc_pd_table *pd_table;
+	struct i40e_hmc_pd_entry *pd_entry;
+	struct i40e_dma_mem mem;
+	struct i40e_dma_mem *page = &mem;
+	u32 sd_idx, rel_pd_idx;
+	int ret_code = 0;
+	u64 page_desc;
+	u64 *pd_addr;
+
+	if (pd_index / I40E_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_add_pd_table_entry: bad pd_index\n");
+		goto exit;
+	}
+
+	/* find corresponding sd */
+	sd_idx = (pd_index / I40E_HMC_PD_CNT_IN_SD);
+	if (I40E_SD_TYPE_PAGED !=
+	    hmc_info->sd_table.sd_entry[sd_idx].entry_type)
+		goto exit;
+
+	rel_pd_idx = (pd_index % I40E_HMC_PD_CNT_IN_SD);
+	pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+	pd_entry = &pd_table->pd_entry[rel_pd_idx];
+	if (!pd_entry->valid) {
+		if (rsrc_pg) {
+			pd_entry->rsrc_pg = true;
+			page = rsrc_pg;
+		} else {
+			/* allocate a 4K backing page */
+			ret_code = i40e_allocate_dma_mem(hw, page, i40e_mem_bp,
+						I40E_HMC_PAGED_BP_SIZE,
+						I40E_HMC_PD_BP_BUF_ALIGNMENT);
+			if (ret_code)
+				goto exit;
+			pd_entry->rsrc_pg = false;
+		}
+
+		pd_entry->bp.addr = *page;
+		pd_entry->bp.sd_pd_index = pd_index;
+		pd_entry->bp.entry_type = I40E_SD_TYPE_PAGED;
+		/* Set page address and valid bit */
+		page_desc = page->pa | 0x1;
+
+		pd_addr = (u64 *)pd_table->pd_page_addr.va;
+		pd_addr += rel_pd_idx;
+
+		/* Add the backing page physical address in the pd entry */
+		memcpy(pd_addr, &page_desc, sizeof(u64));
+
+		pd_entry->sd_index = sd_idx;
+		pd_entry->valid = true;
+		I40E_INC_PD_REFCNT(pd_table);
+	}
+	I40E_INC_BP_REFCNT(&pd_entry->bp);
+exit:
+	return ret_code;
+}
+
+/**
+ * i40e_remove_pd_bp - remove a backing page from a page descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ *
+ * This function:
+ *	1. Marks the entry in pd tabe (for paged address mode) or in sd table
+ *	   (for direct address mode) invalid.
+ *	2. Write to register PMPDINV to invalidate the backing page in FV cache
+ *	3. Decrement the ref count for the pd _entry
+ * assumptions:
+ *	1. Caller can deallocate the memory used by backing storage after this
+ *	   function returns.
+ **/
+int i40e_remove_pd_bp(struct i40e_hw *hw,
+		      struct i40e_hmc_info *hmc_info,
+		      u32 idx)
+{
+	struct i40e_hmc_pd_entry *pd_entry;
+	struct i40e_hmc_pd_table *pd_table;
+	struct i40e_hmc_sd_entry *sd_entry;
+	u32 sd_idx, rel_pd_idx;
+	int ret_code = 0;
+	u64 *pd_addr;
+
+	/* calculate index */
+	sd_idx = idx / I40E_HMC_PD_CNT_IN_SD;
+	rel_pd_idx = idx % I40E_HMC_PD_CNT_IN_SD;
+	if (sd_idx >= hmc_info->sd_table.sd_cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_remove_pd_bp: bad idx\n");
+		goto exit;
+	}
+	sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+	if (I40E_SD_TYPE_PAGED != sd_entry->entry_type) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_remove_pd_bp: wrong sd_entry type\n");
+		goto exit;
+	}
+	/* get the entry and decrease its ref counter */
+	pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+	pd_entry = &pd_table->pd_entry[rel_pd_idx];
+	I40E_DEC_BP_REFCNT(&pd_entry->bp);
+	if (pd_entry->bp.ref_cnt)
+		goto exit;
+
+	/* mark the entry invalid */
+	pd_entry->valid = false;
+	I40E_DEC_PD_REFCNT(pd_table);
+	pd_addr = (u64 *)pd_table->pd_page_addr.va;
+	pd_addr += rel_pd_idx;
+	memset(pd_addr, 0, sizeof(u64));
+	I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
+
+	/* free memory here */
+	if (!pd_entry->rsrc_pg)
+		ret_code = i40e_free_dma_mem(hw, &pd_entry->bp.addr);
+	if (ret_code)
+		goto exit;
+	if (!pd_table->ref_cnt)
+		i40e_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
+exit:
+	return ret_code;
+}
+
+/**
+ * i40e_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ **/
+int i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+			   u32 idx)
+{
+	struct i40e_hmc_sd_entry *sd_entry;
+	int ret_code = 0;
+
+	/* get the entry and decrease its ref counter */
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+	I40E_DEC_BP_REFCNT(&sd_entry->u.bp);
+	if (sd_entry->u.bp.ref_cnt) {
+		ret_code = -EBUSY;
+		goto exit;
+	}
+	I40E_DEC_SD_REFCNT(&hmc_info->sd_table);
+
+	/* mark the entry invalid */
+	sd_entry->valid = false;
+exit:
+	return ret_code;
+}
+
+/**
+ * i40e_remove_sd_bp_new - Removes a backing page from a segment descriptor
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: used to distinguish between VF and PF
+ **/
+int i40e_remove_sd_bp_new(struct i40e_hw *hw,
+			  struct i40e_hmc_info *hmc_info,
+			  u32 idx, bool is_pf)
+{
+	struct i40e_hmc_sd_entry *sd_entry;
+
+	if (!is_pf)
+		return -EOPNOTSUPP;
+
+	/* get the entry and decrease its ref counter */
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+	I40E_CLEAR_PF_SD_ENTRY(hw, idx, I40E_SD_TYPE_DIRECT);
+
+	return i40e_free_dma_mem(hw, &sd_entry->u.bp.addr);
+}
+
+/**
+ * i40e_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ **/
+int i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+			     u32 idx)
+{
+	struct i40e_hmc_sd_entry *sd_entry;
+	int ret_code = 0;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+	if (sd_entry->u.pd_table.ref_cnt) {
+		ret_code = -EBUSY;
+		goto exit;
+	}
+
+	/* mark the entry invalid */
+	sd_entry->valid = false;
+
+	I40E_DEC_SD_REFCNT(&hmc_info->sd_table);
+exit:
+	return ret_code;
+}
+
+/**
+ * i40e_remove_pd_page_new - Removes a PD page from sd entry.
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ * @is_pf: used to distinguish between VF and PF
+ **/
+int i40e_remove_pd_page_new(struct i40e_hw *hw,
+			    struct i40e_hmc_info *hmc_info,
+			    u32 idx, bool is_pf)
+{
+	struct i40e_hmc_sd_entry *sd_entry;
+
+	if (!is_pf)
+		return -EOPNOTSUPP;
+
+	sd_entry = &hmc_info->sd_table.sd_entry[idx];
+	I40E_CLEAR_PF_SD_ENTRY(hw, idx, I40E_SD_TYPE_PAGED);
+
+	return  i40e_free_dma_mem(hw, &sd_entry->u.pd_table.pd_page_addr);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
new file mode 100644
index 0000000000..9960da07a5
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_HMC_H_
+#define _I40E_HMC_H_
+
+#define I40E_HMC_MAX_BP_COUNT 512
+
+/* forward-declare the HW struct for the compiler */
+struct i40e_hw;
+
+#define I40E_HMC_INFO_SIGNATURE		0x484D5347 /* HMSG */
+#define I40E_HMC_PD_CNT_IN_SD		512
+#define I40E_HMC_DIRECT_BP_SIZE		0x200000 /* 2M */
+#define I40E_HMC_PAGED_BP_SIZE		4096
+#define I40E_HMC_PD_BP_BUF_ALIGNMENT	4096
+
+struct i40e_hmc_obj_info {
+	u64 base;	/* base addr in FPM */
+	u32 max_cnt;	/* max count available for this hmc func */
+	u32 cnt;	/* count of objects driver actually wants to create */
+	u64 size;	/* size in bytes of one object */
+};
+
+enum i40e_sd_entry_type {
+	I40E_SD_TYPE_INVALID = 0,
+	I40E_SD_TYPE_PAGED   = 1,
+	I40E_SD_TYPE_DIRECT  = 2
+};
+
+struct i40e_hmc_bp {
+	enum i40e_sd_entry_type entry_type;
+	struct i40e_dma_mem addr; /* populate to be used by hw */
+	u32 sd_pd_index;
+	u32 ref_cnt;
+};
+
+struct i40e_hmc_pd_entry {
+	struct i40e_hmc_bp bp;
+	u32 sd_index;
+	bool rsrc_pg;
+	bool valid;
+};
+
+struct i40e_hmc_pd_table {
+	struct i40e_dma_mem pd_page_addr; /* populate to be used by hw */
+	struct i40e_hmc_pd_entry  *pd_entry; /* [512] for sw book keeping */
+	struct i40e_virt_mem pd_entry_virt_mem; /* virt mem for pd_entry */
+
+	u32 ref_cnt;
+	u32 sd_index;
+};
+
+struct i40e_hmc_sd_entry {
+	enum i40e_sd_entry_type entry_type;
+	bool valid;
+
+	union {
+		struct i40e_hmc_pd_table pd_table;
+		struct i40e_hmc_bp bp;
+	} u;
+};
+
+struct i40e_hmc_sd_table {
+	struct i40e_virt_mem addr; /* used to track sd_entry allocations */
+	u32 sd_cnt;
+	u32 ref_cnt;
+	struct i40e_hmc_sd_entry *sd_entry; /* (sd_cnt*512) entries max */
+};
+
+struct i40e_hmc_info {
+	u32 signature;
+	/* equals to pci func num for PF and dynamically allocated for VFs */
+	u8 hmc_fn_id;
+	u16 first_sd_index; /* index of the first available SD */
+
+	/* hmc objects */
+	struct i40e_hmc_obj_info *hmc_obj;
+	struct i40e_virt_mem hmc_obj_virt_mem;
+	struct i40e_hmc_sd_table sd_table;
+};
+
+#define I40E_INC_SD_REFCNT(sd_table)	((sd_table)->ref_cnt++)
+#define I40E_INC_PD_REFCNT(pd_table)	((pd_table)->ref_cnt++)
+#define I40E_INC_BP_REFCNT(bp)		((bp)->ref_cnt++)
+
+#define I40E_DEC_SD_REFCNT(sd_table)	((sd_table)->ref_cnt--)
+#define I40E_DEC_PD_REFCNT(pd_table)	((pd_table)->ref_cnt--)
+#define I40E_DEC_BP_REFCNT(bp)		((bp)->ref_cnt--)
+
+/**
+ * I40E_SET_PF_SD_ENTRY - marks the sd entry as valid in the hardware
+ * @hw: pointer to our hw struct
+ * @pa: pointer to physical address
+ * @sd_index: segment descriptor index
+ * @type: if sd entry is direct or paged
+ **/
+#define I40E_SET_PF_SD_ENTRY(hw, pa, sd_index, type)			\
+{									\
+	u32 val1, val2, val3;						\
+	val1 = (u32)(upper_32_bits(pa));				\
+	val2 = (u32)(pa) | (I40E_HMC_MAX_BP_COUNT <<			\
+		 I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |		\
+		((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) <<		\
+		I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |			\
+		BIT(I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);		\
+	val3 = (sd_index) | BIT_ULL(I40E_PFHMC_SDCMD_PMSDWR_SHIFT);	\
+	wr32((hw), I40E_PFHMC_SDDATAHIGH, val1);			\
+	wr32((hw), I40E_PFHMC_SDDATALOW, val2);				\
+	wr32((hw), I40E_PFHMC_SDCMD, val3);				\
+}
+
+/**
+ * I40E_CLEAR_PF_SD_ENTRY - marks the sd entry as invalid in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_index: segment descriptor index
+ * @type: if sd entry is direct or paged
+ **/
+#define I40E_CLEAR_PF_SD_ENTRY(hw, sd_index, type)			\
+{									\
+	u32 val2, val3;							\
+	val2 = (I40E_HMC_MAX_BP_COUNT <<				\
+		I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |		\
+		((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) <<		\
+		I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);			\
+	val3 = (sd_index) | BIT_ULL(I40E_PFHMC_SDCMD_PMSDWR_SHIFT);	\
+	wr32((hw), I40E_PFHMC_SDDATAHIGH, 0);				\
+	wr32((hw), I40E_PFHMC_SDDATALOW, val2);				\
+	wr32((hw), I40E_PFHMC_SDCMD, val3);				\
+}
+
+/**
+ * I40E_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ **/
+#define I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx)			\
+	wr32((hw), I40E_PFHMC_PDINV,					\
+	    (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |		\
+	     ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+/**
+ * I40E_FIND_SD_INDEX_LIMIT - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @index: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by i40e_hmc_rsrc_type.
+ **/
+#define I40E_FIND_SD_INDEX_LIMIT(hmc_info, type, index, cnt, sd_idx, sd_limit)\
+{									\
+	u64 fpm_addr, fpm_limit;					\
+	fpm_addr = (hmc_info)->hmc_obj[(type)].base +			\
+		   (hmc_info)->hmc_obj[(type)].size * (index);		\
+	fpm_limit = fpm_addr + (hmc_info)->hmc_obj[(type)].size * (cnt);\
+	*(sd_idx) = (u32)(fpm_addr / I40E_HMC_DIRECT_BP_SIZE);		\
+	*(sd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_DIRECT_BP_SIZE);	\
+	/* add one more to the limit to correct our range */		\
+	*(sd_limit) += 1;						\
+}
+
+/**
+ * I40E_FIND_PD_INDEX_LIMIT - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_index: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by i40e_hmc_rsrc_type.
+ **/
+#define I40E_FIND_PD_INDEX_LIMIT(hmc_info, type, idx, cnt, pd_index, pd_limit)\
+{									\
+	u64 fpm_adr, fpm_limit;						\
+	fpm_adr = (hmc_info)->hmc_obj[(type)].base +			\
+		  (hmc_info)->hmc_obj[(type)].size * (idx);		\
+	fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt);	\
+	*(pd_index) = (u32)(fpm_adr / I40E_HMC_PAGED_BP_SIZE);		\
+	*(pd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_PAGED_BP_SIZE);	\
+	/* add one more to the limit to correct our range */		\
+	*(pd_limit) += 1;						\
+}
+
+int i40e_add_sd_table_entry(struct i40e_hw *hw,
+			    struct i40e_hmc_info *hmc_info,
+			    u32 sd_index,
+			    enum i40e_sd_entry_type type,
+			    u64 direct_mode_sz);
+int i40e_add_pd_table_entry(struct i40e_hw *hw,
+			    struct i40e_hmc_info *hmc_info,
+			    u32 pd_index,
+			    struct i40e_dma_mem *rsrc_pg);
+int i40e_remove_pd_bp(struct i40e_hw *hw,
+		      struct i40e_hmc_info *hmc_info,
+		      u32 idx);
+int i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+			   u32 idx);
+int i40e_remove_sd_bp_new(struct i40e_hw *hw,
+			  struct i40e_hmc_info *hmc_info,
+			  u32 idx, bool is_pf);
+int i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+			     u32 idx);
+int i40e_remove_pd_page_new(struct i40e_hw *hw,
+			    struct i40e_hmc_info *hmc_info,
+			    u32 idx, bool is_pf);
+
+#endif /* _I40E_HMC_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
new file mode 100644
index 0000000000..474365bf06
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -0,0 +1,1120 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e.h"
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_type.h"
+#include "i40e_hmc.h"
+#include "i40e_lan_hmc.h"
+#include "i40e_prototype.h"
+
+/* lan specific interface functions */
+
+/**
+ * i40e_align_l2obj_base - aligns base object pointer to 512 bytes
+ * @offset: base address offset needing alignment
+ *
+ * Aligns the layer 2 function private memory so it's 512-byte aligned.
+ **/
+static u64 i40e_align_l2obj_base(u64 offset)
+{
+	u64 aligned_offset = offset;
+
+	if ((offset % I40E_HMC_L2OBJ_BASE_ALIGNMENT) > 0)
+		aligned_offset += (I40E_HMC_L2OBJ_BASE_ALIGNMENT -
+				   (offset % I40E_HMC_L2OBJ_BASE_ALIGNMENT));
+
+	return aligned_offset;
+}
+
+/**
+ * i40e_calculate_l2fpm_size - calculates layer 2 FPM memory size
+ * @txq_num: number of Tx queues needing backing context
+ * @rxq_num: number of Rx queues needing backing context
+ * @fcoe_cntx_num: amount of FCoE statefull contexts needing backing context
+ * @fcoe_filt_num: number of FCoE filters needing backing context
+ *
+ * Calculates the maximum amount of memory for the function required, based
+ * on the number of resources it must provide context for.
+ **/
+static u64 i40e_calculate_l2fpm_size(u32 txq_num, u32 rxq_num,
+			      u32 fcoe_cntx_num, u32 fcoe_filt_num)
+{
+	u64 fpm_size = 0;
+
+	fpm_size = txq_num * I40E_HMC_OBJ_SIZE_TXQ;
+	fpm_size = i40e_align_l2obj_base(fpm_size);
+
+	fpm_size += (rxq_num * I40E_HMC_OBJ_SIZE_RXQ);
+	fpm_size = i40e_align_l2obj_base(fpm_size);
+
+	fpm_size += (fcoe_cntx_num * I40E_HMC_OBJ_SIZE_FCOE_CNTX);
+	fpm_size = i40e_align_l2obj_base(fpm_size);
+
+	fpm_size += (fcoe_filt_num * I40E_HMC_OBJ_SIZE_FCOE_FILT);
+	fpm_size = i40e_align_l2obj_base(fpm_size);
+
+	return fpm_size;
+}
+
+/**
+ * i40e_init_lan_hmc - initialize i40e_hmc_info struct
+ * @hw: pointer to the HW structure
+ * @txq_num: number of Tx queues needing backing context
+ * @rxq_num: number of Rx queues needing backing context
+ * @fcoe_cntx_num: amount of FCoE statefull contexts needing backing context
+ * @fcoe_filt_num: number of FCoE filters needing backing context
+ *
+ * This function will be called once per physical function initialization.
+ * It will fill out the i40e_hmc_obj_info structure for LAN objects based on
+ * the driver's provided input, as well as information from the HMC itself
+ * loaded from NVRAM.
+ *
+ * Assumptions:
+ *   - HMC Resource Profile has been selected before calling this function.
+ **/
+int i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+		      u32 rxq_num, u32 fcoe_cntx_num,
+		      u32 fcoe_filt_num)
+{
+	struct i40e_hmc_obj_info *obj, *full_obj;
+	int ret_code = 0;
+	u64 l2fpm_size;
+	u32 size_exp;
+
+	hw->hmc.signature = I40E_HMC_INFO_SIGNATURE;
+	hw->hmc.hmc_fn_id = hw->pf_id;
+
+	/* allocate memory for hmc_obj */
+	ret_code = i40e_allocate_virt_mem(hw, &hw->hmc.hmc_obj_virt_mem,
+			sizeof(struct i40e_hmc_obj_info) * I40E_HMC_LAN_MAX);
+	if (ret_code)
+		goto init_lan_hmc_out;
+	hw->hmc.hmc_obj = (struct i40e_hmc_obj_info *)
+			  hw->hmc.hmc_obj_virt_mem.va;
+
+	/* The full object will be used to create the LAN HMC SD */
+	full_obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_FULL];
+	full_obj->max_cnt = 0;
+	full_obj->cnt = 0;
+	full_obj->base = 0;
+	full_obj->size = 0;
+
+	/* Tx queue context information */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_TX];
+	obj->max_cnt = rd32(hw, I40E_GLHMC_LANQMAX);
+	obj->cnt = txq_num;
+	obj->base = 0;
+	size_exp = rd32(hw, I40E_GLHMC_LANTXOBJSZ);
+	obj->size = BIT_ULL(size_exp);
+
+	/* validate values requested by driver don't exceed HMC capacity */
+	if (txq_num > obj->max_cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_init_lan_hmc: Tx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+			  txq_num, obj->max_cnt, ret_code);
+		goto init_lan_hmc_out;
+	}
+
+	/* aggregate values into the full LAN object for later */
+	full_obj->max_cnt += obj->max_cnt;
+	full_obj->cnt += obj->cnt;
+
+	/* Rx queue context information */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_RX];
+	obj->max_cnt = rd32(hw, I40E_GLHMC_LANQMAX);
+	obj->cnt = rxq_num;
+	obj->base = hw->hmc.hmc_obj[I40E_HMC_LAN_TX].base +
+		    (hw->hmc.hmc_obj[I40E_HMC_LAN_TX].cnt *
+		     hw->hmc.hmc_obj[I40E_HMC_LAN_TX].size);
+	obj->base = i40e_align_l2obj_base(obj->base);
+	size_exp = rd32(hw, I40E_GLHMC_LANRXOBJSZ);
+	obj->size = BIT_ULL(size_exp);
+
+	/* validate values requested by driver don't exceed HMC capacity */
+	if (rxq_num > obj->max_cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_init_lan_hmc: Rx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+			  rxq_num, obj->max_cnt, ret_code);
+		goto init_lan_hmc_out;
+	}
+
+	/* aggregate values into the full LAN object for later */
+	full_obj->max_cnt += obj->max_cnt;
+	full_obj->cnt += obj->cnt;
+
+	/* FCoE context information */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX];
+	obj->max_cnt = rd32(hw, I40E_GLHMC_FCOEMAX);
+	obj->cnt = fcoe_cntx_num;
+	obj->base = hw->hmc.hmc_obj[I40E_HMC_LAN_RX].base +
+		    (hw->hmc.hmc_obj[I40E_HMC_LAN_RX].cnt *
+		     hw->hmc.hmc_obj[I40E_HMC_LAN_RX].size);
+	obj->base = i40e_align_l2obj_base(obj->base);
+	size_exp = rd32(hw, I40E_GLHMC_FCOEDDPOBJSZ);
+	obj->size = BIT_ULL(size_exp);
+
+	/* validate values requested by driver don't exceed HMC capacity */
+	if (fcoe_cntx_num > obj->max_cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_init_lan_hmc: FCoE context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+			  fcoe_cntx_num, obj->max_cnt, ret_code);
+		goto init_lan_hmc_out;
+	}
+
+	/* aggregate values into the full LAN object for later */
+	full_obj->max_cnt += obj->max_cnt;
+	full_obj->cnt += obj->cnt;
+
+	/* FCoE filter information */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_FILT];
+	obj->max_cnt = rd32(hw, I40E_GLHMC_FCOEFMAX);
+	obj->cnt = fcoe_filt_num;
+	obj->base = hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].base +
+		    (hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].cnt *
+		     hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].size);
+	obj->base = i40e_align_l2obj_base(obj->base);
+	size_exp = rd32(hw, I40E_GLHMC_FCOEFOBJSZ);
+	obj->size = BIT_ULL(size_exp);
+
+	/* validate values requested by driver don't exceed HMC capacity */
+	if (fcoe_filt_num > obj->max_cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_init_lan_hmc: FCoE filter: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+			  fcoe_filt_num, obj->max_cnt, ret_code);
+		goto init_lan_hmc_out;
+	}
+
+	/* aggregate values into the full LAN object for later */
+	full_obj->max_cnt += obj->max_cnt;
+	full_obj->cnt += obj->cnt;
+
+	hw->hmc.first_sd_index = 0;
+	hw->hmc.sd_table.ref_cnt = 0;
+	l2fpm_size = i40e_calculate_l2fpm_size(txq_num, rxq_num, fcoe_cntx_num,
+					       fcoe_filt_num);
+	if (NULL == hw->hmc.sd_table.sd_entry) {
+		hw->hmc.sd_table.sd_cnt = (u32)
+				   (l2fpm_size + I40E_HMC_DIRECT_BP_SIZE - 1) /
+				   I40E_HMC_DIRECT_BP_SIZE;
+
+		/* allocate the sd_entry members in the sd_table */
+		ret_code = i40e_allocate_virt_mem(hw, &hw->hmc.sd_table.addr,
+					  (sizeof(struct i40e_hmc_sd_entry) *
+					  hw->hmc.sd_table.sd_cnt));
+		if (ret_code)
+			goto init_lan_hmc_out;
+		hw->hmc.sd_table.sd_entry =
+			(struct i40e_hmc_sd_entry *)hw->hmc.sd_table.addr.va;
+	}
+	/* store in the LAN full object for later */
+	full_obj->size = l2fpm_size;
+
+init_lan_hmc_out:
+	return ret_code;
+}
+
+/**
+ * i40e_remove_pd_page - Remove a page from the page descriptor table
+ * @hw: pointer to the HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ *
+ * This function:
+ *	1. Marks the entry in pd table (for paged address mode) invalid
+ *	2. write to register PMPDINV to invalidate the backing page in FV cache
+ *	3. Decrement the ref count for  pd_entry
+ * assumptions:
+ *	1. caller can deallocate the memory used by pd after this function
+ *	   returns.
+ **/
+static int i40e_remove_pd_page(struct i40e_hw *hw,
+			       struct i40e_hmc_info *hmc_info,
+			       u32 idx)
+{
+	int ret_code = 0;
+
+	if (!i40e_prep_remove_pd_page(hmc_info, idx))
+		ret_code = i40e_remove_pd_page_new(hw, hmc_info, idx, true);
+
+	return ret_code;
+}
+
+/**
+ * i40e_remove_sd_bp - remove a backing page from a segment descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ *
+ * This function:
+ *	1. Marks the entry in sd table (for direct address mode) invalid
+ *	2. write to register PMSDCMD, PMSDDATALOW(PMSDDATALOW.PMSDVALID set
+ *	   to 0) and PMSDDATAHIGH to invalidate the sd page
+ *	3. Decrement the ref count for the sd_entry
+ * assumptions:
+ *	1. caller can deallocate the memory used by backing storage after this
+ *	   function returns.
+ **/
+static int i40e_remove_sd_bp(struct i40e_hw *hw,
+			     struct i40e_hmc_info *hmc_info,
+			     u32 idx)
+{
+	int ret_code = 0;
+
+	if (!i40e_prep_remove_sd_bp(hmc_info, idx))
+		ret_code = i40e_remove_sd_bp_new(hw, hmc_info, idx, true);
+
+	return ret_code;
+}
+
+/**
+ * i40e_create_lan_hmc_object - allocate backing store for hmc objects
+ * @hw: pointer to the HW structure
+ * @info: pointer to i40e_hmc_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ **/
+static int i40e_create_lan_hmc_object(struct i40e_hw *hw,
+				      struct i40e_hmc_lan_create_obj_info *info)
+{
+	struct i40e_hmc_sd_entry *sd_entry;
+	u32 pd_idx1 = 0, pd_lmt1 = 0;
+	u32 pd_idx = 0, pd_lmt = 0;
+	bool pd_error = false;
+	u32 sd_idx, sd_lmt;
+	int ret_code = 0;
+	u64 sd_size;
+	u32 i, j;
+
+	if (NULL == info) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_create_lan_hmc_object: bad info ptr\n");
+		goto exit;
+	}
+	if (NULL == info->hmc_info) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_create_lan_hmc_object: bad hmc_info ptr\n");
+		goto exit;
+	}
+	if (I40E_HMC_INFO_SIGNATURE != info->hmc_info->signature) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_create_lan_hmc_object: bad signature\n");
+		goto exit;
+	}
+
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_create_lan_hmc_object: returns error %d\n",
+			  ret_code);
+		goto exit;
+	}
+	if ((info->start_idx + info->count) >
+	    info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_create_lan_hmc_object: returns error %d\n",
+			  ret_code);
+		goto exit;
+	}
+
+	/* find sd index and limit */
+	I40E_FIND_SD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+				 info->start_idx, info->count,
+				 &sd_idx, &sd_lmt);
+	if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+	    sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+		ret_code = -EINVAL;
+		goto exit;
+	}
+	/* find pd index */
+	I40E_FIND_PD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+				 info->start_idx, info->count, &pd_idx,
+				 &pd_lmt);
+
+	/* This is to cover for cases where you may not want to have an SD with
+	 * the full 2M memory but something smaller. By not filling out any
+	 * size, the function will default the SD size to be 2M.
+	 */
+	if (info->direct_mode_sz == 0)
+		sd_size = I40E_HMC_DIRECT_BP_SIZE;
+	else
+		sd_size = info->direct_mode_sz;
+
+	/* check if all the sds are valid. If not, allocate a page and
+	 * initialize it.
+	 */
+	for (j = sd_idx; j < sd_lmt; j++) {
+		/* update the sd table entry */
+		ret_code = i40e_add_sd_table_entry(hw, info->hmc_info, j,
+						   info->entry_type,
+						   sd_size);
+		if (ret_code)
+			goto exit_sd_error;
+		sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+		if (I40E_SD_TYPE_PAGED == sd_entry->entry_type) {
+			/* check if all the pds in this sd are valid. If not,
+			 * allocate a page and initialize it.
+			 */
+
+			/* find pd_idx and pd_lmt in this sd */
+			pd_idx1 = max(pd_idx, (j * I40E_HMC_MAX_BP_COUNT));
+			pd_lmt1 = min(pd_lmt,
+				      ((j + 1) * I40E_HMC_MAX_BP_COUNT));
+			for (i = pd_idx1; i < pd_lmt1; i++) {
+				/* update the pd table entry */
+				ret_code = i40e_add_pd_table_entry(hw,
+								info->hmc_info,
+								i, NULL);
+				if (ret_code) {
+					pd_error = true;
+					break;
+				}
+			}
+			if (pd_error) {
+				/* remove the backing pages from pd_idx1 to i */
+				while (i && (i > pd_idx1)) {
+					i40e_remove_pd_bp(hw, info->hmc_info,
+							  (i - 1));
+					i--;
+				}
+			}
+		}
+		if (!sd_entry->valid) {
+			sd_entry->valid = true;
+			switch (sd_entry->entry_type) {
+			case I40E_SD_TYPE_PAGED:
+				I40E_SET_PF_SD_ENTRY(hw,
+					sd_entry->u.pd_table.pd_page_addr.pa,
+					j, sd_entry->entry_type);
+				break;
+			case I40E_SD_TYPE_DIRECT:
+				I40E_SET_PF_SD_ENTRY(hw, sd_entry->u.bp.addr.pa,
+						     j, sd_entry->entry_type);
+				break;
+			default:
+				ret_code = -EINVAL;
+				goto exit;
+			}
+		}
+	}
+	goto exit;
+
+exit_sd_error:
+	/* cleanup for sd entries from j to sd_idx */
+	while (j && (j > sd_idx)) {
+		sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+		switch (sd_entry->entry_type) {
+		case I40E_SD_TYPE_PAGED:
+			pd_idx1 = max(pd_idx,
+				      ((j - 1) * I40E_HMC_MAX_BP_COUNT));
+			pd_lmt1 = min(pd_lmt, (j * I40E_HMC_MAX_BP_COUNT));
+			for (i = pd_idx1; i < pd_lmt1; i++)
+				i40e_remove_pd_bp(hw, info->hmc_info, i);
+			i40e_remove_pd_page(hw, info->hmc_info, (j - 1));
+			break;
+		case I40E_SD_TYPE_DIRECT:
+			i40e_remove_sd_bp(hw, info->hmc_info, (j - 1));
+			break;
+		default:
+			ret_code = -EINVAL;
+			break;
+		}
+		j--;
+	}
+exit:
+	return ret_code;
+}
+
+/**
+ * i40e_configure_lan_hmc - prepare the HMC backing store
+ * @hw: pointer to the hw structure
+ * @model: the model for the layout of the SD/PD tables
+ *
+ * - This function will be called once per physical function initialization.
+ * - This function will be called after i40e_init_lan_hmc() and before
+ *   any LAN/FCoE HMC objects can be created.
+ **/
+int i40e_configure_lan_hmc(struct i40e_hw *hw,
+			   enum i40e_hmc_model model)
+{
+	struct i40e_hmc_lan_create_obj_info info;
+	u8 hmc_fn_id = hw->hmc.hmc_fn_id;
+	struct i40e_hmc_obj_info *obj;
+	int ret_code = 0;
+
+	/* Initialize part of the create object info struct */
+	info.hmc_info = &hw->hmc;
+	info.rsrc_type = I40E_HMC_LAN_FULL;
+	info.start_idx = 0;
+	info.direct_mode_sz = hw->hmc.hmc_obj[I40E_HMC_LAN_FULL].size;
+
+	/* Build the SD entry for the LAN objects */
+	switch (model) {
+	case I40E_HMC_MODEL_DIRECT_PREFERRED:
+	case I40E_HMC_MODEL_DIRECT_ONLY:
+		info.entry_type = I40E_SD_TYPE_DIRECT;
+		/* Make one big object, a single SD */
+		info.count = 1;
+		ret_code = i40e_create_lan_hmc_object(hw, &info);
+		if (ret_code && (model == I40E_HMC_MODEL_DIRECT_PREFERRED))
+			goto try_type_paged;
+		else if (ret_code)
+			goto configure_lan_hmc_out;
+		/* else clause falls through the break */
+		break;
+	case I40E_HMC_MODEL_PAGED_ONLY:
+try_type_paged:
+		info.entry_type = I40E_SD_TYPE_PAGED;
+		/* Make one big object in the PD table */
+		info.count = 1;
+		ret_code = i40e_create_lan_hmc_object(hw, &info);
+		if (ret_code)
+			goto configure_lan_hmc_out;
+		break;
+	default:
+		/* unsupported type */
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_configure_lan_hmc: Unknown SD type: %d\n",
+			  ret_code);
+		goto configure_lan_hmc_out;
+	}
+
+	/* Configure and program the FPM registers so objects can be created */
+
+	/* Tx contexts */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_TX];
+	wr32(hw, I40E_GLHMC_LANTXBASE(hmc_fn_id),
+	     (u32)((obj->base & I40E_GLHMC_LANTXBASE_FPMLANTXBASE_MASK) / 512));
+	wr32(hw, I40E_GLHMC_LANTXCNT(hmc_fn_id), obj->cnt);
+
+	/* Rx contexts */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_RX];
+	wr32(hw, I40E_GLHMC_LANRXBASE(hmc_fn_id),
+	     (u32)((obj->base & I40E_GLHMC_LANRXBASE_FPMLANRXBASE_MASK) / 512));
+	wr32(hw, I40E_GLHMC_LANRXCNT(hmc_fn_id), obj->cnt);
+
+	/* FCoE contexts */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX];
+	wr32(hw, I40E_GLHMC_FCOEDDPBASE(hmc_fn_id),
+	 (u32)((obj->base & I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_MASK) / 512));
+	wr32(hw, I40E_GLHMC_FCOEDDPCNT(hmc_fn_id), obj->cnt);
+
+	/* FCoE filters */
+	obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_FILT];
+	wr32(hw, I40E_GLHMC_FCOEFBASE(hmc_fn_id),
+	     (u32)((obj->base & I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_MASK) / 512));
+	wr32(hw, I40E_GLHMC_FCOEFCNT(hmc_fn_id), obj->cnt);
+
+configure_lan_hmc_out:
+	return ret_code;
+}
+
+/**
+ * i40e_delete_lan_hmc_object - remove hmc objects
+ * @hw: pointer to the HW structure
+ * @info: pointer to i40e_hmc_delete_obj_info struct
+ *
+ * This will de-populate the SDs and PDs.  It frees
+ * the memory for PDS and backing storage.  After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ **/
+static int i40e_delete_lan_hmc_object(struct i40e_hw *hw,
+				      struct i40e_hmc_lan_delete_obj_info *info)
+{
+	struct i40e_hmc_pd_table *pd_table;
+	u32 pd_idx, pd_lmt, rel_pd_idx;
+	u32 sd_idx, sd_lmt;
+	int ret_code = 0;
+	u32 i, j;
+
+	if (NULL == info) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_delete_hmc_object: bad info ptr\n");
+		goto exit;
+	}
+	if (NULL == info->hmc_info) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_delete_hmc_object: bad info->hmc_info ptr\n");
+		goto exit;
+	}
+	if (I40E_HMC_INFO_SIGNATURE != info->hmc_info->signature) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_delete_hmc_object: bad hmc_info->signature\n");
+		goto exit;
+	}
+
+	if (NULL == info->hmc_info->sd_table.sd_entry) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_delete_hmc_object: bad sd_entry\n");
+		goto exit;
+	}
+
+	if (NULL == info->hmc_info->hmc_obj) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_delete_hmc_object: bad hmc_info->hmc_obj\n");
+		goto exit;
+	}
+	if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_delete_hmc_object: returns error %d\n",
+			  ret_code);
+		goto exit;
+	}
+
+	if ((info->start_idx + info->count) >
+	    info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_delete_hmc_object: returns error %d\n",
+			  ret_code);
+		goto exit;
+	}
+
+	I40E_FIND_PD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+				 info->start_idx, info->count, &pd_idx,
+				 &pd_lmt);
+
+	for (j = pd_idx; j < pd_lmt; j++) {
+		sd_idx = j / I40E_HMC_PD_CNT_IN_SD;
+
+		if (I40E_SD_TYPE_PAGED !=
+		    info->hmc_info->sd_table.sd_entry[sd_idx].entry_type)
+			continue;
+
+		rel_pd_idx = j % I40E_HMC_PD_CNT_IN_SD;
+
+		pd_table =
+			&info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+		if (pd_table->pd_entry[rel_pd_idx].valid) {
+			ret_code = i40e_remove_pd_bp(hw, info->hmc_info, j);
+			if (ret_code)
+				goto exit;
+		}
+	}
+
+	/* find sd index and limit */
+	I40E_FIND_SD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+				 info->start_idx, info->count,
+				 &sd_idx, &sd_lmt);
+	if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+	    sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+		ret_code = -EINVAL;
+		goto exit;
+	}
+
+	for (i = sd_idx; i < sd_lmt; i++) {
+		if (!info->hmc_info->sd_table.sd_entry[i].valid)
+			continue;
+		switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+		case I40E_SD_TYPE_DIRECT:
+			ret_code = i40e_remove_sd_bp(hw, info->hmc_info, i);
+			if (ret_code)
+				goto exit;
+			break;
+		case I40E_SD_TYPE_PAGED:
+			ret_code = i40e_remove_pd_page(hw, info->hmc_info, i);
+			if (ret_code)
+				goto exit;
+			break;
+		default:
+			break;
+		}
+	}
+exit:
+	return ret_code;
+}
+
+/**
+ * i40e_shutdown_lan_hmc - Remove HMC backing store, free allocated memory
+ * @hw: pointer to the hw structure
+ *
+ * This must be called by drivers as they are shutting down and being
+ * removed from the OS.
+ **/
+int i40e_shutdown_lan_hmc(struct i40e_hw *hw)
+{
+	struct i40e_hmc_lan_delete_obj_info info;
+	int ret_code;
+
+	info.hmc_info = &hw->hmc;
+	info.rsrc_type = I40E_HMC_LAN_FULL;
+	info.start_idx = 0;
+	info.count = 1;
+
+	/* delete the object */
+	ret_code = i40e_delete_lan_hmc_object(hw, &info);
+
+	/* free the SD table entry for LAN */
+	i40e_free_virt_mem(hw, &hw->hmc.sd_table.addr);
+	hw->hmc.sd_table.sd_cnt = 0;
+	hw->hmc.sd_table.sd_entry = NULL;
+
+	/* free memory used for hmc_obj */
+	i40e_free_virt_mem(hw, &hw->hmc.hmc_obj_virt_mem);
+	hw->hmc.hmc_obj = NULL;
+
+	return ret_code;
+}
+
+#define I40E_HMC_STORE(_struct, _ele)		\
+	offsetof(struct _struct, _ele),		\
+	sizeof_field(struct _struct, _ele)
+
+struct i40e_context_ele {
+	u16 offset;
+	u16 size_of;
+	u16 width;
+	u16 lsb;
+};
+
+/* LAN Tx Queue Context */
+static struct i40e_context_ele i40e_hmc_txq_ce_info[] = {
+					     /* Field      Width    LSB */
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, head),           13,      0 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, new_context),     1,     30 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, base),           57,     32 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, fc_ena),          1,     89 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, timesync_ena),    1,     90 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, fd_ena),          1,     91 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, alt_vlan_ena),    1,     92 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, cpuid),           8,     96 },
+/* line 1 */
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, thead_wb),       13,  0 + 128 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, head_wb_ena),     1, 32 + 128 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, qlen),           13, 33 + 128 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, tphrdesc_ena),    1, 46 + 128 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, tphrpacket_ena),  1, 47 + 128 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, tphwdesc_ena),    1, 48 + 128 },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, head_wb_addr),   64, 64 + 128 },
+/* line 7 */
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, crc),            32,  0 + (7 * 128) },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, rdylist),        10, 84 + (7 * 128) },
+	{I40E_HMC_STORE(i40e_hmc_obj_txq, rdylist_act),     1, 94 + (7 * 128) },
+	{ 0 }
+};
+
+/* LAN Rx Queue Context */
+static struct i40e_context_ele i40e_hmc_rxq_ce_info[] = {
+					 /* Field      Width    LSB */
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, head),        13,	0   },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, cpuid),        8,	13  },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, base),        57,	32  },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, qlen),        13,	89  },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, dbuff),        7,	102 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, hbuff),        5,	109 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, dtype),        2,	114 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, dsize),        1,	116 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, crcstrip),     1,	117 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, fc_ena),       1,	118 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, l2tsel),       1,	119 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, hsplit_0),     4,	120 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, hsplit_1),     2,	124 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, showiv),       1,	127 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, rxmax),       14,	174 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, tphrdesc_ena), 1,	193 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, tphwdesc_ena), 1,	194 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, tphdata_ena),  1,	195 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, tphhead_ena),  1,	196 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, lrxqthresh),   3,	198 },
+	{ I40E_HMC_STORE(i40e_hmc_obj_rxq, prefena),      1,	201 },
+	{ 0 }
+};
+
+/**
+ * i40e_write_byte - replace HMC context byte
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_byte(u8 *hmc_bits,
+			    struct i40e_context_ele *ce_info,
+			    u8 *src)
+{
+	u8 src_byte, dest_byte, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = (u8)(BIT(ce_info->width) - 1);
+
+	src_byte = *from;
+	src_byte &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_byte <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = hmc_bits + (ce_info->lsb / 8);
+
+	memcpy(&dest_byte, dest, sizeof(dest_byte));
+
+	dest_byte &= ~mask;	/* get the bits not changing */
+	dest_byte |= src_byte;	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_byte, sizeof(dest_byte));
+}
+
+/**
+ * i40e_write_word - replace HMC context word
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_word(u8 *hmc_bits,
+			    struct i40e_context_ele *ce_info,
+			    u8 *src)
+{
+	u16 src_word, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+	__le16 dest_word;
+
+	/* copy from the next struct field */
+	from = src + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = BIT(ce_info->width) - 1;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_word = *(u16 *)from;
+	src_word &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_word <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = hmc_bits + (ce_info->lsb / 8);
+
+	memcpy(&dest_word, dest, sizeof(dest_word));
+
+	dest_word &= ~(cpu_to_le16(mask));	/* get the bits not changing */
+	dest_word |= cpu_to_le16(src_word);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_word, sizeof(dest_word));
+}
+
+/**
+ * i40e_write_dword - replace HMC context dword
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_dword(u8 *hmc_bits,
+			     struct i40e_context_ele *ce_info,
+			     u8 *src)
+{
+	u32 src_dword, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+	__le32 dest_dword;
+
+	/* copy from the next struct field */
+	from = src + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 32 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 5 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 32)
+		mask = BIT(ce_info->width) - 1;
+	else
+		mask = ~(u32)0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_dword = *(u32 *)from;
+	src_dword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_dword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = hmc_bits + (ce_info->lsb / 8);
+
+	memcpy(&dest_dword, dest, sizeof(dest_dword));
+
+	dest_dword &= ~(cpu_to_le32(mask));	/* get the bits not changing */
+	dest_dword |= cpu_to_le32(src_dword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_dword, sizeof(dest_dword));
+}
+
+/**
+ * i40e_write_qword - replace HMC context qword
+ * @hmc_bits: pointer to the HMC memory
+ * @ce_info: a description of the struct to be read from
+ * @src: the struct to be read from
+ **/
+static void i40e_write_qword(u8 *hmc_bits,
+			     struct i40e_context_ele *ce_info,
+			     u8 *src)
+{
+	u64 src_qword, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+	__le64 dest_qword;
+
+	/* copy from the next struct field */
+	from = src + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 64 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 6 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 64)
+		mask = BIT_ULL(ce_info->width) - 1;
+	else
+		mask = ~(u64)0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_qword = *(u64 *)from;
+	src_qword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_qword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = hmc_bits + (ce_info->lsb / 8);
+
+	memcpy(&dest_qword, dest, sizeof(dest_qword));
+
+	dest_qword &= ~(cpu_to_le64(mask));	/* get the bits not changing */
+	dest_qword |= cpu_to_le64(src_qword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_qword, sizeof(dest_qword));
+}
+
+/**
+ * i40e_clear_hmc_context - zero out the HMC context bits
+ * @hw:       the hardware struct
+ * @context_bytes: pointer to the context bit array (DMA memory)
+ * @hmc_type: the type of HMC resource
+ **/
+static int i40e_clear_hmc_context(struct i40e_hw *hw,
+				  u8 *context_bytes,
+				  enum i40e_hmc_lan_rsrc_type hmc_type)
+{
+	/* clean the bit array */
+	memset(context_bytes, 0, (u32)hw->hmc.hmc_obj[hmc_type].size);
+
+	return 0;
+}
+
+/**
+ * i40e_set_hmc_context - replace HMC context bits
+ * @context_bytes: pointer to the context bit array
+ * @ce_info:  a description of the struct to be filled
+ * @dest:     the struct to be filled
+ **/
+static int i40e_set_hmc_context(u8 *context_bytes,
+				struct i40e_context_ele *ce_info,
+				u8 *dest)
+{
+	int f;
+
+	for (f = 0; ce_info[f].width != 0; f++) {
+
+		/* we have to deal with each element of the HMC using the
+		 * correct size so that we are correct regardless of the
+		 * endianness of the machine
+		 */
+		switch (ce_info[f].size_of) {
+		case 1:
+			i40e_write_byte(context_bytes, &ce_info[f], dest);
+			break;
+		case 2:
+			i40e_write_word(context_bytes, &ce_info[f], dest);
+			break;
+		case 4:
+			i40e_write_dword(context_bytes, &ce_info[f], dest);
+			break;
+		case 8:
+			i40e_write_qword(context_bytes, &ce_info[f], dest);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_hmc_get_object_va - retrieves an object's virtual address
+ * @hw: the hardware struct, from which we obtain the i40e_hmc_info pointer
+ * @object_base: pointer to u64 to get the va
+ * @rsrc_type: the hmc resource type
+ * @obj_idx: hmc object index
+ *
+ * This function retrieves the object's virtual address from the object
+ * base pointer.  This function is used for LAN Queue contexts.
+ **/
+static
+int i40e_hmc_get_object_va(struct i40e_hw *hw, u8 **object_base,
+			   enum i40e_hmc_lan_rsrc_type rsrc_type,
+			   u32 obj_idx)
+{
+	struct i40e_hmc_info *hmc_info = &hw->hmc;
+	u32 obj_offset_in_sd, obj_offset_in_pd;
+	struct i40e_hmc_sd_entry *sd_entry;
+	struct i40e_hmc_pd_entry *pd_entry;
+	u32 pd_idx, pd_lmt, rel_pd_idx;
+	u64 obj_offset_in_fpm;
+	u32 sd_idx, sd_lmt;
+	int ret_code = 0;
+
+	if (NULL == hmc_info) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_hmc_get_object_va: bad hmc_info ptr\n");
+		goto exit;
+	}
+	if (NULL == hmc_info->hmc_obj) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_hmc_get_object_va: bad hmc_info->hmc_obj ptr\n");
+		goto exit;
+	}
+	if (NULL == object_base) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_hmc_get_object_va: bad object_base ptr\n");
+		goto exit;
+	}
+	if (I40E_HMC_INFO_SIGNATURE != hmc_info->signature) {
+		ret_code = -EINVAL;
+		hw_dbg(hw, "i40e_hmc_get_object_va: bad hmc_info->signature\n");
+		goto exit;
+	}
+	if (obj_idx >= hmc_info->hmc_obj[rsrc_type].cnt) {
+		hw_dbg(hw, "i40e_hmc_get_object_va: returns error %d\n",
+			  ret_code);
+		ret_code = -EINVAL;
+		goto exit;
+	}
+	/* find sd index and limit */
+	I40E_FIND_SD_INDEX_LIMIT(hmc_info, rsrc_type, obj_idx, 1,
+				 &sd_idx, &sd_lmt);
+
+	sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+	obj_offset_in_fpm = hmc_info->hmc_obj[rsrc_type].base +
+			    hmc_info->hmc_obj[rsrc_type].size * obj_idx;
+
+	if (I40E_SD_TYPE_PAGED == sd_entry->entry_type) {
+		I40E_FIND_PD_INDEX_LIMIT(hmc_info, rsrc_type, obj_idx, 1,
+					 &pd_idx, &pd_lmt);
+		rel_pd_idx = pd_idx % I40E_HMC_PD_CNT_IN_SD;
+		pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx];
+		obj_offset_in_pd = (u32)(obj_offset_in_fpm %
+					 I40E_HMC_PAGED_BP_SIZE);
+		*object_base = (u8 *)pd_entry->bp.addr.va + obj_offset_in_pd;
+	} else {
+		obj_offset_in_sd = (u32)(obj_offset_in_fpm %
+					 I40E_HMC_DIRECT_BP_SIZE);
+		*object_base = (u8 *)sd_entry->u.bp.addr.va + obj_offset_in_sd;
+	}
+exit:
+	return ret_code;
+}
+
+/**
+ * i40e_clear_lan_tx_queue_context - clear the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ **/
+int i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+				    u16 queue)
+{
+	u8 *context_bytes;
+	int err;
+
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
+				     I40E_HMC_LAN_TX, queue);
+	if (err < 0)
+		return err;
+
+	return i40e_clear_hmc_context(hw, context_bytes, I40E_HMC_LAN_TX);
+}
+
+/**
+ * i40e_set_lan_tx_queue_context - set the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ * @s:     the struct to be filled
+ **/
+int i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+				  u16 queue,
+				  struct i40e_hmc_obj_txq *s)
+{
+	u8 *context_bytes;
+	int err;
+
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
+				     I40E_HMC_LAN_TX, queue);
+	if (err < 0)
+		return err;
+
+	return i40e_set_hmc_context(context_bytes,
+				    i40e_hmc_txq_ce_info, (u8 *)s);
+}
+
+/**
+ * i40e_clear_lan_rx_queue_context - clear the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ **/
+int i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+				    u16 queue)
+{
+	u8 *context_bytes;
+	int err;
+
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
+				     I40E_HMC_LAN_RX, queue);
+	if (err < 0)
+		return err;
+
+	return i40e_clear_hmc_context(hw, context_bytes, I40E_HMC_LAN_RX);
+}
+
+/**
+ * i40e_set_lan_rx_queue_context - set the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ * @s:     the struct to be filled
+ **/
+int i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+				  u16 queue,
+				  struct i40e_hmc_obj_rxq *s)
+{
+	u8 *context_bytes;
+	int err;
+
+	err = i40e_hmc_get_object_va(hw, &context_bytes,
+				     I40E_HMC_LAN_RX, queue);
+	if (err < 0)
+		return err;
+
+	return i40e_set_hmc_context(context_bytes,
+				    i40e_hmc_rxq_ce_info, (u8 *)s);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
new file mode 100644
index 0000000000..9f960404c2
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_LAN_HMC_H_
+#define _I40E_LAN_HMC_H_
+
+/* forward-declare the HW struct for the compiler */
+struct i40e_hw;
+
+/* HMC element context information */
+
+/* Rx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct i40e_hmc_obj_rxq {
+	u16 head;
+	u16 cpuid; /* bigger than needed, see above for reason */
+	u64 base;
+	u16 qlen;
+#define I40E_RXQ_CTX_DBUFF_SHIFT 7
+	u16 dbuff; /* bigger than needed, see above for reason */
+#define I40E_RXQ_CTX_HBUFF_SHIFT 6
+	u16 hbuff; /* bigger than needed, see above for reason */
+	u8  dtype;
+	u8  dsize;
+	u8  crcstrip;
+	u8  fc_ena;
+	u8  l2tsel;
+	u8  hsplit_0;
+	u8  hsplit_1;
+	u8  showiv;
+	u32 rxmax; /* bigger than needed, see above for reason */
+	u8  tphrdesc_ena;
+	u8  tphwdesc_ena;
+	u8  tphdata_ena;
+	u8  tphhead_ena;
+	u16 lrxqthresh; /* bigger than needed, see above for reason */
+	u8  prefena;	/* NOTE: normally must be set to 1 at init */
+};
+
+/* Tx queue context data
+*
+* The sizes of the variables may be larger than needed due to crossing byte
+* boundaries. If we do not have the width of the variable set to the correct
+* size then we could end up shifting bits off the top of the variable when the
+* variable is at the top of a byte and crosses over into the next byte.
+*/
+struct i40e_hmc_obj_txq {
+	u16 head;
+	u8  new_context;
+	u64 base;
+	u8  fc_ena;
+	u8  timesync_ena;
+	u8  fd_ena;
+	u8  alt_vlan_ena;
+	u16 thead_wb;
+	u8  cpuid;
+	u8  head_wb_ena;
+	u16 qlen;
+	u8  tphrdesc_ena;
+	u8  tphrpacket_ena;
+	u8  tphwdesc_ena;
+	u64 head_wb_addr;
+	u32 crc;
+	u16 rdylist;
+	u8  rdylist_act;
+};
+
+/* for hsplit_0 field of Rx HMC context */
+enum i40e_hmc_obj_rx_hsplit_0 {
+	I40E_HMC_OBJ_RX_HSPLIT_0_NO_SPLIT      = 0,
+	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_L2      = 1,
+	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_IP      = 2,
+	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_TCP_UDP = 4,
+	I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_SCTP    = 8,
+};
+
+/* fcoe_cntx and fcoe_filt are for debugging purpose only */
+struct i40e_hmc_obj_fcoe_cntx {
+	u32 rsv[32];
+};
+
+struct i40e_hmc_obj_fcoe_filt {
+	u32 rsv[8];
+};
+
+/* Context sizes for LAN objects */
+enum i40e_hmc_lan_object_size {
+	I40E_HMC_LAN_OBJ_SZ_8   = 0x3,
+	I40E_HMC_LAN_OBJ_SZ_16  = 0x4,
+	I40E_HMC_LAN_OBJ_SZ_32  = 0x5,
+	I40E_HMC_LAN_OBJ_SZ_64  = 0x6,
+	I40E_HMC_LAN_OBJ_SZ_128 = 0x7,
+	I40E_HMC_LAN_OBJ_SZ_256 = 0x8,
+	I40E_HMC_LAN_OBJ_SZ_512 = 0x9,
+};
+
+#define I40E_HMC_L2OBJ_BASE_ALIGNMENT 512
+#define I40E_HMC_OBJ_SIZE_TXQ         128
+#define I40E_HMC_OBJ_SIZE_RXQ         32
+#define I40E_HMC_OBJ_SIZE_FCOE_CNTX   64
+#define I40E_HMC_OBJ_SIZE_FCOE_FILT   64
+
+enum i40e_hmc_lan_rsrc_type {
+	I40E_HMC_LAN_FULL  = 0,
+	I40E_HMC_LAN_TX    = 1,
+	I40E_HMC_LAN_RX    = 2,
+	I40E_HMC_FCOE_CTX  = 3,
+	I40E_HMC_FCOE_FILT = 4,
+	I40E_HMC_LAN_MAX   = 5
+};
+
+enum i40e_hmc_model {
+	I40E_HMC_MODEL_DIRECT_PREFERRED = 0,
+	I40E_HMC_MODEL_DIRECT_ONLY      = 1,
+	I40E_HMC_MODEL_PAGED_ONLY       = 2,
+	I40E_HMC_MODEL_UNKNOWN,
+};
+
+struct i40e_hmc_lan_create_obj_info {
+	struct i40e_hmc_info *hmc_info;
+	u32 rsrc_type;
+	u32 start_idx;
+	u32 count;
+	enum i40e_sd_entry_type entry_type;
+	u64 direct_mode_sz;
+};
+
+struct i40e_hmc_lan_delete_obj_info {
+	struct i40e_hmc_info *hmc_info;
+	u32 rsrc_type;
+	u32 start_idx;
+	u32 count;
+};
+
+int i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+		      u32 rxq_num, u32 fcoe_cntx_num,
+		      u32 fcoe_filt_num);
+int i40e_configure_lan_hmc(struct i40e_hw *hw,
+			   enum i40e_hmc_model model);
+int i40e_shutdown_lan_hmc(struct i40e_hw *hw);
+
+int i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+				    u16 queue);
+int i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+				  u16 queue,
+				  struct i40e_hmc_obj_txq *s);
+int i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+				    u16 queue);
+int i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+				  u16 queue,
+				  struct i40e_hmc_obj_rxq *s);
+
+#endif /* _I40E_LAN_HMC_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
new file mode 100644
index 0000000000..aad39ebff4
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -0,0 +1,16790 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#include <linux/etherdevice.h>
+#include <linux/of_net.h>
+#include <linux/pci.h>
+#include <linux/bpf.h>
+#include <generated/utsrelease.h>
+#include <linux/crash_dump.h>
+
+/* Local includes */
+#include "i40e.h"
+#include "i40e_diag.h"
+#include "i40e_xsk.h"
+#include <net/udp_tunnel.h>
+#include <net/xdp_sock_drv.h>
+/* All i40e tracepoints are defined by the include below, which
+ * must be included exactly once across the whole kernel with
+ * CREATE_TRACE_POINTS defined
+ */
+#define CREATE_TRACE_POINTS
+#include "i40e_trace.h"
+
+const char i40e_driver_name[] = "i40e";
+static const char i40e_driver_string[] =
+			"Intel(R) Ethernet Connection XL710 Network Driver";
+
+static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation.";
+
+/* a bit of forward declarations */
+static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
+static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired);
+static int i40e_add_vsi(struct i40e_vsi *vsi);
+static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi);
+static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired);
+static int i40e_setup_misc_vector(struct i40e_pf *pf);
+static void i40e_determine_queue_usage(struct i40e_pf *pf);
+static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
+static void i40e_prep_for_reset(struct i40e_pf *pf);
+static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
+				   bool lock_acquired);
+static int i40e_reset(struct i40e_pf *pf);
+static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired);
+static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf);
+static int i40e_restore_interrupt_scheme(struct i40e_pf *pf);
+static bool i40e_check_recovery_mode(struct i40e_pf *pf);
+static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw);
+static void i40e_fdir_sb_setup(struct i40e_pf *pf);
+static int i40e_veb_get_bw_info(struct i40e_veb *veb);
+static int i40e_get_capabilities(struct i40e_pf *pf,
+				 enum i40e_admin_queue_opc list_type);
+static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf);
+
+/* i40e_pci_tbl - PCI Device ID Table
+ *
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id i40e_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_XL710), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QEMU), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_B), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_C), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_A), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_B), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_C), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_BC), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T4), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_BC), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_SFP), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_B), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_KX_X722), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_QSFP_X722), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722_A), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_X710_N3000), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_XXV710_N3000), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0},
+	/* required last entry */
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, i40e_pci_tbl);
+
+#define I40E_MAX_VF_COUNT 128
+static int debug = -1;
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)");
+
+MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
+MODULE_LICENSE("GPL v2");
+
+static struct workqueue_struct *i40e_wq;
+
+static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f,
+				  struct net_device *netdev, int delta)
+{
+	struct netdev_hw_addr_list *ha_list;
+	struct netdev_hw_addr *ha;
+
+	if (!f || !netdev)
+		return;
+
+	if (is_unicast_ether_addr(f->macaddr) || is_link_local_ether_addr(f->macaddr))
+		ha_list = &netdev->uc;
+	else
+		ha_list = &netdev->mc;
+
+	netdev_hw_addr_list_for_each(ha, ha_list) {
+		if (ether_addr_equal(ha->addr, f->macaddr)) {
+			ha->refcount += delta;
+			if (ha->refcount <= 0)
+				ha->refcount = 1;
+			break;
+		}
+	}
+}
+
+/**
+ * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ **/
+int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+			    u64 size, u32 alignment)
+{
+	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
+
+	mem->size = ALIGN(size, alignment);
+	mem->va = dma_alloc_coherent(&pf->pdev->dev, mem->size, &mem->pa,
+				     GFP_KERNEL);
+	if (!mem->va)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * i40e_free_dma_mem_d - OS specific memory free for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+{
+	struct i40e_pf *pf = (struct i40e_pf *)hw->back;
+
+	dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa);
+	mem->va = NULL;
+	mem->pa = 0;
+	mem->size = 0;
+
+	return 0;
+}
+
+/**
+ * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ **/
+int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
+			     u32 size)
+{
+	mem->size = size;
+	mem->va = kzalloc(size, GFP_KERNEL);
+
+	if (!mem->va)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * i40e_free_virt_mem_d - OS specific memory free for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
+{
+	/* it's ok to kfree a NULL pointer */
+	kfree(mem->va);
+	mem->va = NULL;
+	mem->size = 0;
+
+	return 0;
+}
+
+/**
+ * i40e_get_lump - find a lump of free generic resource
+ * @pf: board private structure
+ * @pile: the pile of resource to search
+ * @needed: the number of items needed
+ * @id: an owner id to stick on the items assigned
+ *
+ * Returns the base item index of the lump, or negative for error
+ **/
+static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
+			 u16 needed, u16 id)
+{
+	int ret = -ENOMEM;
+	int i, j;
+
+	if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
+		dev_info(&pf->pdev->dev,
+			 "param err: pile=%s needed=%d id=0x%04x\n",
+			 pile ? "<valid>" : "<null>", needed, id);
+		return -EINVAL;
+	}
+
+	/* Allocate last queue in the pile for FDIR VSI queue
+	 * so it doesn't fragment the qp_pile
+	 */
+	if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) {
+		if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) {
+			dev_err(&pf->pdev->dev,
+				"Cannot allocate queue %d for I40E_VSI_FDIR\n",
+				pile->num_entries - 1);
+			return -ENOMEM;
+		}
+		pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT;
+		return pile->num_entries - 1;
+	}
+
+	i = 0;
+	while (i < pile->num_entries) {
+		/* skip already allocated entries */
+		if (pile->list[i] & I40E_PILE_VALID_BIT) {
+			i++;
+			continue;
+		}
+
+		/* do we have enough in this lump? */
+		for (j = 0; (j < needed) && ((i+j) < pile->num_entries); j++) {
+			if (pile->list[i+j] & I40E_PILE_VALID_BIT)
+				break;
+		}
+
+		if (j == needed) {
+			/* there was enough, so assign it to the requestor */
+			for (j = 0; j < needed; j++)
+				pile->list[i+j] = id | I40E_PILE_VALID_BIT;
+			ret = i;
+			break;
+		}
+
+		/* not enough, so skip over it and continue looking */
+		i += j;
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_put_lump - return a lump of generic resource
+ * @pile: the pile of resource to search
+ * @index: the base item index
+ * @id: the owner id of the items assigned
+ *
+ * Returns the count of items in the lump
+ **/
+static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
+{
+	int valid_id = (id | I40E_PILE_VALID_BIT);
+	int count = 0;
+	u16 i;
+
+	if (!pile || index >= pile->num_entries)
+		return -EINVAL;
+
+	for (i = index;
+	     i < pile->num_entries && pile->list[i] == valid_id;
+	     i++) {
+		pile->list[i] = 0;
+		count++;
+	}
+
+
+	return count;
+}
+
+/**
+ * i40e_find_vsi_from_id - searches for the vsi with the given id
+ * @pf: the pf structure to search for the vsi
+ * @id: id of the vsi it is searching for
+ **/
+struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
+{
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vsi; i++)
+		if (pf->vsi[i] && (pf->vsi[i]->id == id))
+			return pf->vsi[i];
+
+	return NULL;
+}
+
+/**
+ * i40e_service_event_schedule - Schedule the service task to wake up
+ * @pf: board private structure
+ *
+ * If not already scheduled, this puts the task into the work queue
+ **/
+void i40e_service_event_schedule(struct i40e_pf *pf)
+{
+	if ((!test_bit(__I40E_DOWN, pf->state) &&
+	     !test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) ||
+	      test_bit(__I40E_RECOVERY_MODE, pf->state))
+		queue_work(i40e_wq, &pf->service_task);
+}
+
+/**
+ * i40e_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: queue number timing out
+ *
+ * If any port has noticed a Tx timeout, it is likely that the whole
+ * device is munged, not just the one netdev port, so go for the full
+ * reset.
+ **/
+static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_ring *tx_ring = NULL;
+	unsigned int i;
+	u32 head, val;
+
+	pf->tx_timeout_count++;
+
+	/* with txqueue index, find the tx_ring struct */
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
+			if (txqueue ==
+			    vsi->tx_rings[i]->queue_index) {
+				tx_ring = vsi->tx_rings[i];
+				break;
+			}
+		}
+	}
+
+	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20)))
+		pf->tx_timeout_recovery_level = 1;  /* reset after some time */
+	else if (time_before(jiffies,
+		      (pf->tx_timeout_last_recovery + netdev->watchdog_timeo)))
+		return;   /* don't do any new action before the next timeout */
+
+	/* don't kick off another recovery if one is already pending */
+	if (test_and_set_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state))
+		return;
+
+	if (tx_ring) {
+		head = i40e_get_head(tx_ring);
+		/* Read interrupt register */
+		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+			val = rd32(&pf->hw,
+			     I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
+						tx_ring->vsi->base_vector - 1));
+		else
+			val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
+
+		netdev_info(netdev, "tx_timeout: VSI_seid: %d, Q %d, NTC: 0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x, INT: 0x%x\n",
+			    vsi->seid, txqueue, tx_ring->next_to_clean,
+			    head, tx_ring->next_to_use,
+			    readl(tx_ring->tail), val);
+	}
+
+	pf->tx_timeout_last_recovery = jiffies;
+	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %d\n",
+		    pf->tx_timeout_recovery_level, txqueue);
+
+	switch (pf->tx_timeout_recovery_level) {
+	case 1:
+		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+		break;
+	case 2:
+		set_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
+		break;
+	case 3:
+		set_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
+		break;
+	default:
+		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in non-recoverable state.\n");
+		set_bit(__I40E_DOWN_REQUESTED, pf->state);
+		set_bit(__I40E_VSI_DOWN_REQUESTED, vsi->state);
+		break;
+	}
+
+	i40e_service_event_schedule(pf);
+	pf->tx_timeout_recovery_level++;
+}
+
+/**
+ * i40e_get_vsi_stats_struct - Get System Network Statistics
+ * @vsi: the VSI we care about
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the service task.
+ **/
+struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi)
+{
+	return &vsi->net_stats;
+}
+
+/**
+ * i40e_get_netdev_stats_struct_tx - populate stats from a Tx ring
+ * @ring: Tx ring to get statistics from
+ * @stats: statistics entry to be updated
+ **/
+static void i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring,
+					    struct rtnl_link_stats64 *stats)
+{
+	u64 bytes, packets;
+	unsigned int start;
+
+	do {
+		start = u64_stats_fetch_begin(&ring->syncp);
+		packets = ring->stats.packets;
+		bytes   = ring->stats.bytes;
+	} while (u64_stats_fetch_retry(&ring->syncp, start));
+
+	stats->tx_packets += packets;
+	stats->tx_bytes   += bytes;
+}
+
+/**
+ * i40e_get_netdev_stats_struct - Get statistics for netdev interface
+ * @netdev: network interface device structure
+ * @stats: data structure to store statistics
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the service task.
+ **/
+static void i40e_get_netdev_stats_struct(struct net_device *netdev,
+				  struct rtnl_link_stats64 *stats)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi);
+	struct i40e_ring *ring;
+	int i;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return;
+
+	if (!vsi->tx_rings)
+		return;
+
+	rcu_read_lock();
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		u64 bytes, packets;
+		unsigned int start;
+
+		ring = READ_ONCE(vsi->tx_rings[i]);
+		if (!ring)
+			continue;
+		i40e_get_netdev_stats_struct_tx(ring, stats);
+
+		if (i40e_enabled_xdp_vsi(vsi)) {
+			ring = READ_ONCE(vsi->xdp_rings[i]);
+			if (!ring)
+				continue;
+			i40e_get_netdev_stats_struct_tx(ring, stats);
+		}
+
+		ring = READ_ONCE(vsi->rx_rings[i]);
+		if (!ring)
+			continue;
+		do {
+			start   = u64_stats_fetch_begin(&ring->syncp);
+			packets = ring->stats.packets;
+			bytes   = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+
+		stats->rx_packets += packets;
+		stats->rx_bytes   += bytes;
+
+	}
+	rcu_read_unlock();
+
+	/* following stats updated by i40e_watchdog_subtask() */
+	stats->multicast	= vsi_stats->multicast;
+	stats->tx_errors	= vsi_stats->tx_errors;
+	stats->tx_dropped	= vsi_stats->tx_dropped;
+	stats->rx_errors	= vsi_stats->rx_errors;
+	stats->rx_dropped	= vsi_stats->rx_dropped;
+	stats->rx_crc_errors	= vsi_stats->rx_crc_errors;
+	stats->rx_length_errors	= vsi_stats->rx_length_errors;
+}
+
+/**
+ * i40e_vsi_reset_stats - Resets all stats of the given vsi
+ * @vsi: the VSI to have its stats reset
+ **/
+void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
+{
+	struct rtnl_link_stats64 *ns;
+	int i;
+
+	if (!vsi)
+		return;
+
+	ns = i40e_get_vsi_stats_struct(vsi);
+	memset(ns, 0, sizeof(*ns));
+	memset(&vsi->net_stats_offsets, 0, sizeof(vsi->net_stats_offsets));
+	memset(&vsi->eth_stats, 0, sizeof(vsi->eth_stats));
+	memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
+	if (vsi->rx_rings && vsi->rx_rings[0]) {
+		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			memset(&vsi->rx_rings[i]->stats, 0,
+			       sizeof(vsi->rx_rings[i]->stats));
+			memset(&vsi->rx_rings[i]->rx_stats, 0,
+			       sizeof(vsi->rx_rings[i]->rx_stats));
+			memset(&vsi->tx_rings[i]->stats, 0,
+			       sizeof(vsi->tx_rings[i]->stats));
+			memset(&vsi->tx_rings[i]->tx_stats, 0,
+			       sizeof(vsi->tx_rings[i]->tx_stats));
+		}
+	}
+	vsi->stat_offsets_loaded = false;
+}
+
+/**
+ * i40e_pf_reset_stats - Reset all of the stats for the given PF
+ * @pf: the PF to be reset
+ **/
+void i40e_pf_reset_stats(struct i40e_pf *pf)
+{
+	int i;
+
+	memset(&pf->stats, 0, sizeof(pf->stats));
+	memset(&pf->stats_offsets, 0, sizeof(pf->stats_offsets));
+	pf->stat_offsets_loaded = false;
+
+	for (i = 0; i < I40E_MAX_VEB; i++) {
+		if (pf->veb[i]) {
+			memset(&pf->veb[i]->stats, 0,
+			       sizeof(pf->veb[i]->stats));
+			memset(&pf->veb[i]->stats_offsets, 0,
+			       sizeof(pf->veb[i]->stats_offsets));
+			memset(&pf->veb[i]->tc_stats, 0,
+			       sizeof(pf->veb[i]->tc_stats));
+			memset(&pf->veb[i]->tc_stats_offsets, 0,
+			       sizeof(pf->veb[i]->tc_stats_offsets));
+			pf->veb[i]->stat_offsets_loaded = false;
+		}
+	}
+	pf->hw_csum_rx_error = 0;
+}
+
+/**
+ * i40e_compute_pci_to_hw_id - compute index form PCI function.
+ * @vsi: ptr to the VSI to read from.
+ * @hw: ptr to the hardware info.
+ **/
+static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw)
+{
+	int pf_count = i40e_get_pf_count(hw);
+
+	if (vsi->type == I40E_VSI_SRIOV)
+		return (hw->port * BIT(7)) / pf_count + vsi->vf_id;
+
+	return hw->port + BIT(7);
+}
+
+/**
+ * i40e_stat_update64 - read and update a 64 bit stat from the chip.
+ * @hw: ptr to the hardware info.
+ * @hireg: the high 32 bit reg to read.
+ * @loreg: the low 32 bit reg to read.
+ * @offset_loaded: has the initial offset been loaded yet.
+ * @offset: ptr to current offset value.
+ * @stat: ptr to the stat.
+ *
+ * Since the device stats are not reset at PFReset, they will not
+ * be zeroed when the driver starts.  We'll save the first values read
+ * and use them as offsets to be subtracted from the raw values in order
+ * to report stats that count from zero.
+ **/
+static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg,
+			       bool offset_loaded, u64 *offset, u64 *stat)
+{
+	u64 new_data;
+
+	new_data = rd64(hw, loreg);
+
+	if (!offset_loaded || new_data < *offset)
+		*offset = new_data;
+	*stat = new_data - *offset;
+}
+
+/**
+ * i40e_stat_update48 - read and update a 48 bit stat from the chip
+ * @hw: ptr to the hardware info
+ * @hireg: the high 32 bit reg to read
+ * @loreg: the low 32 bit reg to read
+ * @offset_loaded: has the initial offset been loaded yet
+ * @offset: ptr to current offset value
+ * @stat: ptr to the stat
+ *
+ * Since the device stats are not reset at PFReset, they likely will not
+ * be zeroed when the driver starts.  We'll save the first values read
+ * and use them as offsets to be subtracted from the raw values in order
+ * to report stats that count from zero.  In the process, we also manage
+ * the potential roll-over.
+ **/
+static void i40e_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg,
+			       bool offset_loaded, u64 *offset, u64 *stat)
+{
+	u64 new_data;
+
+	if (hw->device_id == I40E_DEV_ID_QEMU) {
+		new_data = rd32(hw, loreg);
+		new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32;
+	} else {
+		new_data = rd64(hw, loreg);
+	}
+	if (!offset_loaded)
+		*offset = new_data;
+	if (likely(new_data >= *offset))
+		*stat = new_data - *offset;
+	else
+		*stat = (new_data + BIT_ULL(48)) - *offset;
+	*stat &= 0xFFFFFFFFFFFFULL;
+}
+
+/**
+ * i40e_stat_update32 - read and update a 32 bit stat from the chip
+ * @hw: ptr to the hardware info
+ * @reg: the hw reg to read
+ * @offset_loaded: has the initial offset been loaded yet
+ * @offset: ptr to current offset value
+ * @stat: ptr to the stat
+ **/
+static void i40e_stat_update32(struct i40e_hw *hw, u32 reg,
+			       bool offset_loaded, u64 *offset, u64 *stat)
+{
+	u32 new_data;
+
+	new_data = rd32(hw, reg);
+	if (!offset_loaded)
+		*offset = new_data;
+	if (likely(new_data >= *offset))
+		*stat = (u32)(new_data - *offset);
+	else
+		*stat = (u32)((new_data + BIT_ULL(32)) - *offset);
+}
+
+/**
+ * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
+ * @hw: ptr to the hardware info
+ * @reg: the hw reg to read and clear
+ * @stat: ptr to the stat
+ **/
+static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
+{
+	u32 new_data = rd32(hw, reg);
+
+	wr32(hw, reg, 1); /* must write a nonzero value to clear register */
+	*stat += new_data;
+}
+
+/**
+ * i40e_stats_update_rx_discards - update rx_discards.
+ * @vsi: ptr to the VSI to be updated.
+ * @hw: ptr to the hardware info.
+ * @stat_idx: VSI's stat_counter_idx.
+ * @offset_loaded: ptr to the VSI's stat_offsets_loaded.
+ * @stat_offset: ptr to stat_offset to store first read of specific register.
+ * @stat: ptr to VSI's stat to be updated.
+ **/
+static void
+i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw,
+			      int stat_idx, bool offset_loaded,
+			      struct i40e_eth_stats *stat_offset,
+			      struct i40e_eth_stats *stat)
+{
+	u64 rx_rdpc, rx_rxerr;
+
+	i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded,
+			   &stat_offset->rx_discards, &rx_rdpc);
+	i40e_stat_update64(hw,
+			   I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)),
+			   I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)),
+			   offset_loaded, &stat_offset->rx_discards_other,
+			   &rx_rxerr);
+
+	stat->rx_discards = rx_rdpc + rx_rxerr;
+}
+
+/**
+ * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
+ * @vsi: the VSI to be updated
+ **/
+void i40e_update_eth_stats(struct i40e_vsi *vsi)
+{
+	int stat_idx = le16_to_cpu(vsi->info.stat_counter_idx);
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_eth_stats *oes;
+	struct i40e_eth_stats *es;     /* device's eth stats */
+
+	es = &vsi->eth_stats;
+	oes = &vsi->eth_stats_offsets;
+
+	/* Gather up the stats that the hw collects */
+	i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->tx_errors, &es->tx_errors);
+	i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->rx_discards, &es->rx_discards);
+	i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->rx_unknown_protocol, &es->rx_unknown_protocol);
+
+	i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx),
+			   I40E_GLV_GORCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->rx_bytes, &es->rx_bytes);
+	i40e_stat_update48(hw, I40E_GLV_UPRCH(stat_idx),
+			   I40E_GLV_UPRCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->rx_unicast, &es->rx_unicast);
+	i40e_stat_update48(hw, I40E_GLV_MPRCH(stat_idx),
+			   I40E_GLV_MPRCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->rx_multicast, &es->rx_multicast);
+	i40e_stat_update48(hw, I40E_GLV_BPRCH(stat_idx),
+			   I40E_GLV_BPRCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->rx_broadcast, &es->rx_broadcast);
+
+	i40e_stat_update48(hw, I40E_GLV_GOTCH(stat_idx),
+			   I40E_GLV_GOTCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->tx_bytes, &es->tx_bytes);
+	i40e_stat_update48(hw, I40E_GLV_UPTCH(stat_idx),
+			   I40E_GLV_UPTCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->tx_unicast, &es->tx_unicast);
+	i40e_stat_update48(hw, I40E_GLV_MPTCH(stat_idx),
+			   I40E_GLV_MPTCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->tx_multicast, &es->tx_multicast);
+	i40e_stat_update48(hw, I40E_GLV_BPTCH(stat_idx),
+			   I40E_GLV_BPTCL(stat_idx),
+			   vsi->stat_offsets_loaded,
+			   &oes->tx_broadcast, &es->tx_broadcast);
+
+	i40e_stats_update_rx_discards(vsi, hw, stat_idx,
+				      vsi->stat_offsets_loaded, oes, es);
+
+	vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * i40e_update_veb_stats - Update Switch component statistics
+ * @veb: the VEB being updated
+ **/
+void i40e_update_veb_stats(struct i40e_veb *veb)
+{
+	struct i40e_pf *pf = veb->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_eth_stats *oes;
+	struct i40e_eth_stats *es;     /* device's eth stats */
+	struct i40e_veb_tc_stats *veb_oes;
+	struct i40e_veb_tc_stats *veb_es;
+	int i, idx = 0;
+
+	idx = veb->stats_idx;
+	es = &veb->stats;
+	oes = &veb->stats_offsets;
+	veb_es = &veb->tc_stats;
+	veb_oes = &veb->tc_stats_offsets;
+
+	/* Gather up the stats that the hw collects */
+	i40e_stat_update32(hw, I40E_GLSW_TDPC(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->tx_discards, &es->tx_discards);
+	if (hw->revision_id > 0)
+		i40e_stat_update32(hw, I40E_GLSW_RUPP(idx),
+				   veb->stat_offsets_loaded,
+				   &oes->rx_unknown_protocol,
+				   &es->rx_unknown_protocol);
+	i40e_stat_update48(hw, I40E_GLSW_GORCH(idx), I40E_GLSW_GORCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->rx_bytes, &es->rx_bytes);
+	i40e_stat_update48(hw, I40E_GLSW_UPRCH(idx), I40E_GLSW_UPRCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->rx_unicast, &es->rx_unicast);
+	i40e_stat_update48(hw, I40E_GLSW_MPRCH(idx), I40E_GLSW_MPRCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->rx_multicast, &es->rx_multicast);
+	i40e_stat_update48(hw, I40E_GLSW_BPRCH(idx), I40E_GLSW_BPRCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->rx_broadcast, &es->rx_broadcast);
+
+	i40e_stat_update48(hw, I40E_GLSW_GOTCH(idx), I40E_GLSW_GOTCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->tx_bytes, &es->tx_bytes);
+	i40e_stat_update48(hw, I40E_GLSW_UPTCH(idx), I40E_GLSW_UPTCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->tx_unicast, &es->tx_unicast);
+	i40e_stat_update48(hw, I40E_GLSW_MPTCH(idx), I40E_GLSW_MPTCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->tx_multicast, &es->tx_multicast);
+	i40e_stat_update48(hw, I40E_GLSW_BPTCH(idx), I40E_GLSW_BPTCL(idx),
+			   veb->stat_offsets_loaded,
+			   &oes->tx_broadcast, &es->tx_broadcast);
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		i40e_stat_update48(hw, I40E_GLVEBTC_RPCH(i, idx),
+				   I40E_GLVEBTC_RPCL(i, idx),
+				   veb->stat_offsets_loaded,
+				   &veb_oes->tc_rx_packets[i],
+				   &veb_es->tc_rx_packets[i]);
+		i40e_stat_update48(hw, I40E_GLVEBTC_RBCH(i, idx),
+				   I40E_GLVEBTC_RBCL(i, idx),
+				   veb->stat_offsets_loaded,
+				   &veb_oes->tc_rx_bytes[i],
+				   &veb_es->tc_rx_bytes[i]);
+		i40e_stat_update48(hw, I40E_GLVEBTC_TPCH(i, idx),
+				   I40E_GLVEBTC_TPCL(i, idx),
+				   veb->stat_offsets_loaded,
+				   &veb_oes->tc_tx_packets[i],
+				   &veb_es->tc_tx_packets[i]);
+		i40e_stat_update48(hw, I40E_GLVEBTC_TBCH(i, idx),
+				   I40E_GLVEBTC_TBCL(i, idx),
+				   veb->stat_offsets_loaded,
+				   &veb_oes->tc_tx_bytes[i],
+				   &veb_es->tc_tx_bytes[i]);
+	}
+	veb->stat_offsets_loaded = true;
+}
+
+/**
+ * i40e_update_vsi_stats - Update the vsi statistics counters.
+ * @vsi: the VSI to be updated
+ *
+ * There are a few instances where we store the same stat in a
+ * couple of different structs.  This is partly because we have
+ * the netdev stats that need to be filled out, which is slightly
+ * different from the "eth_stats" defined by the chip and used in
+ * VF communications.  We sort it out here.
+ **/
+static void i40e_update_vsi_stats(struct i40e_vsi *vsi)
+{
+	u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy;
+	struct i40e_pf *pf = vsi->back;
+	struct rtnl_link_stats64 *ons;
+	struct rtnl_link_stats64 *ns;   /* netdev stats */
+	struct i40e_eth_stats *oes;
+	struct i40e_eth_stats *es;     /* device's eth stats */
+	u64 tx_restart, tx_busy;
+	struct i40e_ring *p;
+	u64 bytes, packets;
+	unsigned int start;
+	u64 tx_linearize;
+	u64 tx_force_wb;
+	u64 tx_stopped;
+	u64 rx_p, rx_b;
+	u64 tx_p, tx_b;
+	u16 q;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state) ||
+	    test_bit(__I40E_CONFIG_BUSY, pf->state))
+		return;
+
+	ns = i40e_get_vsi_stats_struct(vsi);
+	ons = &vsi->net_stats_offsets;
+	es = &vsi->eth_stats;
+	oes = &vsi->eth_stats_offsets;
+
+	/* Gather up the netdev and vsi stats that the driver collects
+	 * on the fly during packet processing
+	 */
+	rx_b = rx_p = 0;
+	tx_b = tx_p = 0;
+	tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
+	tx_stopped = 0;
+	rx_page = 0;
+	rx_buf = 0;
+	rx_reuse = 0;
+	rx_alloc = 0;
+	rx_waive = 0;
+	rx_busy = 0;
+	rcu_read_lock();
+	for (q = 0; q < vsi->num_queue_pairs; q++) {
+		/* locate Tx ring */
+		p = READ_ONCE(vsi->tx_rings[q]);
+		if (!p)
+			continue;
+
+		do {
+			start = u64_stats_fetch_begin(&p->syncp);
+			packets = p->stats.packets;
+			bytes = p->stats.bytes;
+		} while (u64_stats_fetch_retry(&p->syncp, start));
+		tx_b += bytes;
+		tx_p += packets;
+		tx_restart += p->tx_stats.restart_queue;
+		tx_busy += p->tx_stats.tx_busy;
+		tx_linearize += p->tx_stats.tx_linearize;
+		tx_force_wb += p->tx_stats.tx_force_wb;
+		tx_stopped += p->tx_stats.tx_stopped;
+
+		/* locate Rx ring */
+		p = READ_ONCE(vsi->rx_rings[q]);
+		if (!p)
+			continue;
+
+		do {
+			start = u64_stats_fetch_begin(&p->syncp);
+			packets = p->stats.packets;
+			bytes = p->stats.bytes;
+		} while (u64_stats_fetch_retry(&p->syncp, start));
+		rx_b += bytes;
+		rx_p += packets;
+		rx_buf += p->rx_stats.alloc_buff_failed;
+		rx_page += p->rx_stats.alloc_page_failed;
+		rx_reuse += p->rx_stats.page_reuse_count;
+		rx_alloc += p->rx_stats.page_alloc_count;
+		rx_waive += p->rx_stats.page_waive_count;
+		rx_busy += p->rx_stats.page_busy_count;
+
+		if (i40e_enabled_xdp_vsi(vsi)) {
+			/* locate XDP ring */
+			p = READ_ONCE(vsi->xdp_rings[q]);
+			if (!p)
+				continue;
+
+			do {
+				start = u64_stats_fetch_begin(&p->syncp);
+				packets = p->stats.packets;
+				bytes = p->stats.bytes;
+			} while (u64_stats_fetch_retry(&p->syncp, start));
+			tx_b += bytes;
+			tx_p += packets;
+			tx_restart += p->tx_stats.restart_queue;
+			tx_busy += p->tx_stats.tx_busy;
+			tx_linearize += p->tx_stats.tx_linearize;
+			tx_force_wb += p->tx_stats.tx_force_wb;
+		}
+	}
+	rcu_read_unlock();
+	vsi->tx_restart = tx_restart;
+	vsi->tx_busy = tx_busy;
+	vsi->tx_linearize = tx_linearize;
+	vsi->tx_force_wb = tx_force_wb;
+	vsi->tx_stopped = tx_stopped;
+	vsi->rx_page_failed = rx_page;
+	vsi->rx_buf_failed = rx_buf;
+	vsi->rx_page_reuse = rx_reuse;
+	vsi->rx_page_alloc = rx_alloc;
+	vsi->rx_page_waive = rx_waive;
+	vsi->rx_page_busy = rx_busy;
+
+	ns->rx_packets = rx_p;
+	ns->rx_bytes = rx_b;
+	ns->tx_packets = tx_p;
+	ns->tx_bytes = tx_b;
+
+	/* update netdev stats from eth stats */
+	i40e_update_eth_stats(vsi);
+	ons->tx_errors = oes->tx_errors;
+	ns->tx_errors = es->tx_errors;
+	ons->multicast = oes->rx_multicast;
+	ns->multicast = es->rx_multicast;
+	ons->rx_dropped = oes->rx_discards;
+	ns->rx_dropped = es->rx_discards;
+	ons->tx_dropped = oes->tx_discards;
+	ns->tx_dropped = es->tx_discards;
+
+	/* pull in a couple PF stats if this is the main vsi */
+	if (vsi == pf->vsi[pf->lan_vsi]) {
+		ns->rx_crc_errors = pf->stats.crc_errors;
+		ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes;
+		ns->rx_length_errors = pf->stats.rx_length_errors;
+	}
+}
+
+/**
+ * i40e_update_pf_stats - Update the PF statistics counters.
+ * @pf: the PF to be updated
+ **/
+static void i40e_update_pf_stats(struct i40e_pf *pf)
+{
+	struct i40e_hw_port_stats *osd = &pf->stats_offsets;
+	struct i40e_hw_port_stats *nsd = &pf->stats;
+	struct i40e_hw *hw = &pf->hw;
+	u32 val;
+	int i;
+
+	i40e_stat_update48(hw, I40E_GLPRT_GORCH(hw->port),
+			   I40E_GLPRT_GORCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_bytes, &nsd->eth.rx_bytes);
+	i40e_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port),
+			   I40E_GLPRT_GOTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_bytes, &nsd->eth.tx_bytes);
+	i40e_stat_update32(hw, I40E_GLPRT_RDPC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_discards,
+			   &nsd->eth.rx_discards);
+	i40e_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port),
+			   I40E_GLPRT_UPRCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_unicast,
+			   &nsd->eth.rx_unicast);
+	i40e_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port),
+			   I40E_GLPRT_MPRCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_multicast,
+			   &nsd->eth.rx_multicast);
+	i40e_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port),
+			   I40E_GLPRT_BPRCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.rx_broadcast,
+			   &nsd->eth.rx_broadcast);
+	i40e_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port),
+			   I40E_GLPRT_UPTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_unicast,
+			   &nsd->eth.tx_unicast);
+	i40e_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port),
+			   I40E_GLPRT_MPTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_multicast,
+			   &nsd->eth.tx_multicast);
+	i40e_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port),
+			   I40E_GLPRT_BPTCL(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->eth.tx_broadcast,
+			   &nsd->eth.tx_broadcast);
+
+	i40e_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_dropped_link_down,
+			   &nsd->tx_dropped_link_down);
+
+	i40e_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->crc_errors, &nsd->crc_errors);
+
+	i40e_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->illegal_bytes, &nsd->illegal_bytes);
+
+	i40e_stat_update32(hw, I40E_GLPRT_MLFC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->mac_local_faults,
+			   &nsd->mac_local_faults);
+	i40e_stat_update32(hw, I40E_GLPRT_MRFC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->mac_remote_faults,
+			   &nsd->mac_remote_faults);
+
+	i40e_stat_update32(hw, I40E_GLPRT_RLEC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_length_errors,
+			   &nsd->rx_length_errors);
+
+	i40e_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xon_rx, &nsd->link_xon_rx);
+	i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xon_tx, &nsd->link_xon_tx);
+	i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xoff_rx, &nsd->link_xoff_rx);
+	i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->link_xoff_tx, &nsd->link_xoff_tx);
+
+	for (i = 0; i < 8; i++) {
+		i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
+				   pf->stat_offsets_loaded,
+				   &osd->priority_xoff_rx[i],
+				   &nsd->priority_xoff_rx[i]);
+		i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
+				   pf->stat_offsets_loaded,
+				   &osd->priority_xon_rx[i],
+				   &nsd->priority_xon_rx[i]);
+		i40e_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
+				   pf->stat_offsets_loaded,
+				   &osd->priority_xon_tx[i],
+				   &nsd->priority_xon_tx[i]);
+		i40e_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
+				   pf->stat_offsets_loaded,
+				   &osd->priority_xoff_tx[i],
+				   &nsd->priority_xoff_tx[i]);
+		i40e_stat_update32(hw,
+				   I40E_GLPRT_RXON2OFFCNT(hw->port, i),
+				   pf->stat_offsets_loaded,
+				   &osd->priority_xon_2_xoff[i],
+				   &nsd->priority_xon_2_xoff[i]);
+	}
+
+	i40e_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
+			   I40E_GLPRT_PRC64L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_64, &nsd->rx_size_64);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port),
+			   I40E_GLPRT_PRC127L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_127, &nsd->rx_size_127);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port),
+			   I40E_GLPRT_PRC255L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_255, &nsd->rx_size_255);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port),
+			   I40E_GLPRT_PRC511L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_511, &nsd->rx_size_511);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port),
+			   I40E_GLPRT_PRC1023L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_1023, &nsd->rx_size_1023);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port),
+			   I40E_GLPRT_PRC1522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_1522, &nsd->rx_size_1522);
+	i40e_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port),
+			   I40E_GLPRT_PRC9522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_size_big, &nsd->rx_size_big);
+
+	i40e_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port),
+			   I40E_GLPRT_PTC64L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_64, &nsd->tx_size_64);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port),
+			   I40E_GLPRT_PTC127L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_127, &nsd->tx_size_127);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port),
+			   I40E_GLPRT_PTC255L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_255, &nsd->tx_size_255);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port),
+			   I40E_GLPRT_PTC511L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_511, &nsd->tx_size_511);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port),
+			   I40E_GLPRT_PTC1023L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_1023, &nsd->tx_size_1023);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port),
+			   I40E_GLPRT_PTC1522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_1522, &nsd->tx_size_1522);
+	i40e_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port),
+			   I40E_GLPRT_PTC9522L(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->tx_size_big, &nsd->tx_size_big);
+
+	i40e_stat_update32(hw, I40E_GLPRT_RUC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_undersize, &nsd->rx_undersize);
+	i40e_stat_update32(hw, I40E_GLPRT_RFC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_fragments, &nsd->rx_fragments);
+	i40e_stat_update32(hw, I40E_GLPRT_ROC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_oversize, &nsd->rx_oversize);
+	i40e_stat_update32(hw, I40E_GLPRT_RJC(hw->port),
+			   pf->stat_offsets_loaded,
+			   &osd->rx_jabber, &nsd->rx_jabber);
+
+	/* FDIR stats */
+	i40e_stat_update_and_clear32(hw,
+			I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
+			&nsd->fd_atr_match);
+	i40e_stat_update_and_clear32(hw,
+			I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
+			&nsd->fd_sb_match);
+	i40e_stat_update_and_clear32(hw,
+			I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
+			&nsd->fd_atr_tunnel_match);
+
+	val = rd32(hw, I40E_PRTPM_EEE_STAT);
+	nsd->tx_lpi_status =
+		       (val & I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK) >>
+			I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT;
+	nsd->rx_lpi_status =
+		       (val & I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK) >>
+			I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT;
+	i40e_stat_update32(hw, I40E_PRTPM_TLPIC,
+			   pf->stat_offsets_loaded,
+			   &osd->tx_lpi_count, &nsd->tx_lpi_count);
+	i40e_stat_update32(hw, I40E_PRTPM_RLPIC,
+			   pf->stat_offsets_loaded,
+			   &osd->rx_lpi_count, &nsd->rx_lpi_count);
+
+	if (pf->flags & I40E_FLAG_FD_SB_ENABLED &&
+	    !test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
+		nsd->fd_sb_status = true;
+	else
+		nsd->fd_sb_status = false;
+
+	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED &&
+	    !test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
+		nsd->fd_atr_status = true;
+	else
+		nsd->fd_atr_status = false;
+
+	pf->stat_offsets_loaded = true;
+}
+
+/**
+ * i40e_update_stats - Update the various statistics counters.
+ * @vsi: the VSI to be updated
+ *
+ * Update the various stats for this VSI and its related entities.
+ **/
+void i40e_update_stats(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	if (vsi == pf->vsi[pf->lan_vsi])
+		i40e_update_pf_stats(pf);
+
+	i40e_update_vsi_stats(vsi);
+}
+
+/**
+ * i40e_count_filters - counts VSI mac filters
+ * @vsi: the VSI to be searched
+ *
+ * Returns count of mac filters
+ **/
+int i40e_count_filters(struct i40e_vsi *vsi)
+{
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	int bkt;
+	int cnt = 0;
+
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+		++cnt;
+
+	return cnt;
+}
+
+/**
+ * i40e_find_filter - Search VSI filter list for specific mac/vlan filter
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address
+ * @vlan: the vlan
+ *
+ * Returns ptr to the filter object or NULL
+ **/
+static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
+						const u8 *macaddr, s16 vlan)
+{
+	struct i40e_mac_filter *f;
+	u64 key;
+
+	if (!vsi || !macaddr)
+		return NULL;
+
+	key = i40e_addr_to_hkey(macaddr);
+	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
+		if ((ether_addr_equal(macaddr, f->macaddr)) &&
+		    (vlan == f->vlan))
+			return f;
+	}
+	return NULL;
+}
+
+/**
+ * i40e_find_mac - Find a mac addr in the macvlan filters list
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address we are searching for
+ *
+ * Returns the first filter with the provided MAC address or NULL if
+ * MAC address was not found
+ **/
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr)
+{
+	struct i40e_mac_filter *f;
+	u64 key;
+
+	if (!vsi || !macaddr)
+		return NULL;
+
+	key = i40e_addr_to_hkey(macaddr);
+	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
+		if ((ether_addr_equal(macaddr, f->macaddr)))
+			return f;
+	}
+	return NULL;
+}
+
+/**
+ * i40e_is_vsi_in_vlan - Check if VSI is in vlan mode
+ * @vsi: the VSI to be searched
+ *
+ * Returns true if VSI is in vlan mode or false otherwise
+ **/
+bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi)
+{
+	/* If we have a PVID, always operate in VLAN mode */
+	if (vsi->info.pvid)
+		return true;
+
+	/* We need to operate in VLAN mode whenever we have any filters with
+	 * a VLAN other than I40E_VLAN_ALL. We could check the table each
+	 * time, incurring search cost repeatedly. However, we can notice two
+	 * things:
+	 *
+	 * 1) the only place where we can gain a VLAN filter is in
+	 *    i40e_add_filter.
+	 *
+	 * 2) the only place where filters are actually removed is in
+	 *    i40e_sync_filters_subtask.
+	 *
+	 * Thus, we can simply use a boolean value, has_vlan_filters which we
+	 * will set to true when we add a VLAN filter in i40e_add_filter. Then
+	 * we have to perform the full search after deleting filters in
+	 * i40e_sync_filters_subtask, but we already have to search
+	 * filters here and can perform the check at the same time. This
+	 * results in avoiding embedding a loop for VLAN mode inside another
+	 * loop over all the filters, and should maintain correctness as noted
+	 * above.
+	 */
+	return vsi->has_vlan_filter;
+}
+
+/**
+ * i40e_correct_mac_vlan_filters - Correct non-VLAN filters if necessary
+ * @vsi: the VSI to configure
+ * @tmp_add_list: list of filters ready to be added
+ * @tmp_del_list: list of filters ready to be deleted
+ * @vlan_filters: the number of active VLAN filters
+ *
+ * Update VLAN=0 and VLAN=-1 (I40E_VLAN_ANY) filters properly so that they
+ * behave as expected. If we have any active VLAN filters remaining or about
+ * to be added then we need to update non-VLAN filters to be marked as VLAN=0
+ * so that they only match against untagged traffic. If we no longer have any
+ * active VLAN filters, we need to make all non-VLAN filters marked as VLAN=-1
+ * so that they match against both tagged and untagged traffic. In this way,
+ * we ensure that we correctly receive the desired traffic. This ensures that
+ * when we have an active VLAN we will receive only untagged traffic and
+ * traffic matching active VLANs. If we have no active VLANs then we will
+ * operate in non-VLAN mode and receive all traffic, tagged or untagged.
+ *
+ * Finally, in a similar fashion, this function also corrects filters when
+ * there is an active PVID assigned to this VSI.
+ *
+ * In case of memory allocation failure return -ENOMEM. Otherwise, return 0.
+ *
+ * This function is only expected to be called from within
+ * i40e_sync_vsi_filters.
+ *
+ * NOTE: This function expects to be called while under the
+ * mac_filter_hash_lock
+ */
+static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi,
+					 struct hlist_head *tmp_add_list,
+					 struct hlist_head *tmp_del_list,
+					 int vlan_filters)
+{
+	s16 pvid = le16_to_cpu(vsi->info.pvid);
+	struct i40e_mac_filter *f, *add_head;
+	struct i40e_new_mac_filter *new;
+	struct hlist_node *h;
+	int bkt, new_vlan;
+
+	/* To determine if a particular filter needs to be replaced we
+	 * have the three following conditions:
+	 *
+	 * a) if we have a PVID assigned, then all filters which are
+	 *    not marked as VLAN=PVID must be replaced with filters that
+	 *    are.
+	 * b) otherwise, if we have any active VLANS, all filters
+	 *    which are marked as VLAN=-1 must be replaced with
+	 *    filters marked as VLAN=0
+	 * c) finally, if we do not have any active VLANS, all filters
+	 *    which are marked as VLAN=0 must be replaced with filters
+	 *    marked as VLAN=-1
+	 */
+
+	/* Update the filters about to be added in place */
+	hlist_for_each_entry(new, tmp_add_list, hlist) {
+		if (pvid && new->f->vlan != pvid)
+			new->f->vlan = pvid;
+		else if (vlan_filters && new->f->vlan == I40E_VLAN_ANY)
+			new->f->vlan = 0;
+		else if (!vlan_filters && new->f->vlan == 0)
+			new->f->vlan = I40E_VLAN_ANY;
+	}
+
+	/* Update the remaining active filters */
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		/* Combine the checks for whether a filter needs to be changed
+		 * and then determine the new VLAN inside the if block, in
+		 * order to avoid duplicating code for adding the new filter
+		 * then deleting the old filter.
+		 */
+		if ((pvid && f->vlan != pvid) ||
+		    (vlan_filters && f->vlan == I40E_VLAN_ANY) ||
+		    (!vlan_filters && f->vlan == 0)) {
+			/* Determine the new vlan we will be adding */
+			if (pvid)
+				new_vlan = pvid;
+			else if (vlan_filters)
+				new_vlan = 0;
+			else
+				new_vlan = I40E_VLAN_ANY;
+
+			/* Create the new filter */
+			add_head = i40e_add_filter(vsi, f->macaddr, new_vlan);
+			if (!add_head)
+				return -ENOMEM;
+
+			/* Create a temporary i40e_new_mac_filter */
+			new = kzalloc(sizeof(*new), GFP_ATOMIC);
+			if (!new)
+				return -ENOMEM;
+
+			new->f = add_head;
+			new->state = add_head->state;
+
+			/* Add the new filter to the tmp list */
+			hlist_add_head(&new->hlist, tmp_add_list);
+
+			/* Put the original filter into the delete list */
+			f->state = I40E_FILTER_REMOVE;
+			hash_del(&f->hlist);
+			hlist_add_head(&f->hlist, tmp_del_list);
+		}
+	}
+
+	vsi->has_vlan_filter = !!vlan_filters;
+
+	return 0;
+}
+
+/**
+ * i40e_get_vf_new_vlan - Get new vlan id on a vf
+ * @vsi: the vsi to configure
+ * @new_mac: new mac filter to be added
+ * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL
+ * @vlan_filters: the number of active VLAN filters
+ * @trusted: flag if the VF is trusted
+ *
+ * Get new VLAN id based on current VLAN filters, trust, PVID
+ * and vf-vlan-prune-disable flag.
+ *
+ * Returns the value of the new vlan filter or
+ * the old value if no new filter is needed.
+ */
+static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi,
+				struct i40e_new_mac_filter *new_mac,
+				struct i40e_mac_filter *f,
+				int vlan_filters,
+				bool trusted)
+{
+	s16 pvid = le16_to_cpu(vsi->info.pvid);
+	struct i40e_pf *pf = vsi->back;
+	bool is_any;
+
+	if (new_mac)
+		f = new_mac->f;
+
+	if (pvid && f->vlan != pvid)
+		return pvid;
+
+	is_any = (trusted ||
+		  !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING));
+
+	if ((vlan_filters && f->vlan == I40E_VLAN_ANY) ||
+	    (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) ||
+	    (is_any && !vlan_filters && f->vlan == 0)) {
+		if (is_any)
+			return I40E_VLAN_ANY;
+		else
+			return 0;
+	}
+
+	return f->vlan;
+}
+
+/**
+ * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary
+ * @vsi: the vsi to configure
+ * @tmp_add_list: list of filters ready to be added
+ * @tmp_del_list: list of filters ready to be deleted
+ * @vlan_filters: the number of active VLAN filters
+ * @trusted: flag if the VF is trusted
+ *
+ * Correct VF VLAN filters based on current VLAN filters, trust, PVID
+ * and vf-vlan-prune-disable flag.
+ *
+ * In case of memory allocation failure return -ENOMEM. Otherwise, return 0.
+ *
+ * This function is only expected to be called from within
+ * i40e_sync_vsi_filters.
+ *
+ * NOTE: This function expects to be called while under the
+ * mac_filter_hash_lock
+ */
+static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi,
+					    struct hlist_head *tmp_add_list,
+					    struct hlist_head *tmp_del_list,
+					    int vlan_filters,
+					    bool trusted)
+{
+	struct i40e_mac_filter *f, *add_head;
+	struct i40e_new_mac_filter *new_mac;
+	struct hlist_node *h;
+	int bkt, new_vlan;
+
+	hlist_for_each_entry(new_mac, tmp_add_list, hlist) {
+		new_mac->f->vlan = i40e_get_vf_new_vlan(vsi, new_mac, NULL,
+							vlan_filters, trusted);
+	}
+
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		new_vlan = i40e_get_vf_new_vlan(vsi, NULL, f, vlan_filters,
+						trusted);
+		if (new_vlan != f->vlan) {
+			add_head = i40e_add_filter(vsi, f->macaddr, new_vlan);
+			if (!add_head)
+				return -ENOMEM;
+			/* Create a temporary i40e_new_mac_filter */
+			new_mac = kzalloc(sizeof(*new_mac), GFP_ATOMIC);
+			if (!new_mac)
+				return -ENOMEM;
+			new_mac->f = add_head;
+			new_mac->state = add_head->state;
+
+			/* Add the new filter to the tmp list */
+			hlist_add_head(&new_mac->hlist, tmp_add_list);
+
+			/* Put the original filter into the delete list */
+			f->state = I40E_FILTER_REMOVE;
+			hash_del(&f->hlist);
+			hlist_add_head(&f->hlist, tmp_del_list);
+		}
+	}
+
+	vsi->has_vlan_filter = !!vlan_filters;
+	return 0;
+}
+
+/**
+ * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM
+ * @vsi: the PF Main VSI - inappropriate for any other VSI
+ * @macaddr: the MAC address
+ *
+ * Remove whatever filter the firmware set up so the driver can manage
+ * its own filtering intelligently.
+ **/
+static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
+{
+	struct i40e_aqc_remove_macvlan_element_data element;
+	struct i40e_pf *pf = vsi->back;
+
+	/* Only appropriate for the PF main VSI */
+	if (vsi->type != I40E_VSI_MAIN)
+		return;
+
+	memset(&element, 0, sizeof(element));
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	/* Ignore error returns, some firmware does it this way... */
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
+
+	memset(&element, 0, sizeof(element));
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	/* ...and some firmware does it this way. */
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
+			I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
+	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
+}
+
+/**
+ * i40e_add_filter - Add a mac/vlan filter to the VSI
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address
+ * @vlan: the vlan
+ *
+ * Returns ptr to the filter object or NULL when no memory available.
+ *
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
+ * being held.
+ **/
+struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
+					const u8 *macaddr, s16 vlan)
+{
+	struct i40e_mac_filter *f;
+	u64 key;
+
+	if (!vsi || !macaddr)
+		return NULL;
+
+	f = i40e_find_filter(vsi, macaddr, vlan);
+	if (!f) {
+		f = kzalloc(sizeof(*f), GFP_ATOMIC);
+		if (!f)
+			return NULL;
+
+		/* Update the boolean indicating if we need to function in
+		 * VLAN mode.
+		 */
+		if (vlan >= 0)
+			vsi->has_vlan_filter = true;
+
+		ether_addr_copy(f->macaddr, macaddr);
+		f->vlan = vlan;
+		f->state = I40E_FILTER_NEW;
+		INIT_HLIST_NODE(&f->hlist);
+
+		key = i40e_addr_to_hkey(macaddr);
+		hash_add(vsi->mac_filter_hash, &f->hlist, key);
+
+		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
+	}
+
+	/* If we're asked to add a filter that has been marked for removal, it
+	 * is safe to simply restore it to active state. __i40e_del_filter
+	 * will have simply deleted any filters which were previously marked
+	 * NEW or FAILED, so if it is currently marked REMOVE it must have
+	 * previously been ACTIVE. Since we haven't yet run the sync filters
+	 * task, just restore this filter to the ACTIVE state so that the
+	 * sync task leaves it in place
+	 */
+	if (f->state == I40E_FILTER_REMOVE)
+		f->state = I40E_FILTER_ACTIVE;
+
+	return f;
+}
+
+/**
+ * __i40e_del_filter - Remove a specific filter from the VSI
+ * @vsi: VSI to remove from
+ * @f: the filter to remove from the list
+ *
+ * This function should be called instead of i40e_del_filter only if you know
+ * the exact filter you will remove already, such as via i40e_find_filter or
+ * i40e_find_mac.
+ *
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
+ * being held.
+ * ANOTHER NOTE: This function MUST be called from within the context of
+ * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
+ * instead of list_for_each_entry().
+ **/
+void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
+{
+	if (!f)
+		return;
+
+	/* If the filter was never added to firmware then we can just delete it
+	 * directly and we don't want to set the status to remove or else an
+	 * admin queue command will unnecessarily fire.
+	 */
+	if ((f->state == I40E_FILTER_FAILED) ||
+	    (f->state == I40E_FILTER_NEW)) {
+		hash_del(&f->hlist);
+		kfree(f);
+	} else {
+		f->state = I40E_FILTER_REMOVE;
+	}
+
+	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+	set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
+}
+
+/**
+ * i40e_del_filter - Remove a MAC/VLAN filter from the VSI
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address
+ * @vlan: the VLAN
+ *
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
+ * being held.
+ * ANOTHER NOTE: This function MUST be called from within the context of
+ * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
+ * instead of list_for_each_entry().
+ **/
+void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
+{
+	struct i40e_mac_filter *f;
+
+	if (!vsi || !macaddr)
+		return;
+
+	f = i40e_find_filter(vsi, macaddr, vlan);
+	__i40e_del_filter(vsi, f);
+}
+
+/**
+ * i40e_add_mac_filter - Add a MAC filter for all active VLANs
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be filtered
+ *
+ * If we're not in VLAN mode, just add the filter to I40E_VLAN_ANY. Otherwise,
+ * go through all the macvlan filters and add a macvlan filter for each
+ * unique vlan that already exists. If a PVID has been assigned, instead only
+ * add the macaddr to that VLAN.
+ *
+ * Returns last filter added on success, else NULL
+ **/
+struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi,
+					    const u8 *macaddr)
+{
+	struct i40e_mac_filter *f, *add = NULL;
+	struct hlist_node *h;
+	int bkt;
+
+	if (vsi->info.pvid)
+		return i40e_add_filter(vsi, macaddr,
+				       le16_to_cpu(vsi->info.pvid));
+
+	if (!i40e_is_vsi_in_vlan(vsi))
+		return i40e_add_filter(vsi, macaddr, I40E_VLAN_ANY);
+
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		if (f->state == I40E_FILTER_REMOVE)
+			continue;
+		add = i40e_add_filter(vsi, macaddr, f->vlan);
+		if (!add)
+			return NULL;
+	}
+
+	return add;
+}
+
+/**
+ * i40e_del_mac_filter - Remove a MAC filter from all VLANs
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be removed
+ *
+ * Removes a given MAC address from a VSI regardless of what VLAN it has been
+ * associated with.
+ *
+ * Returns 0 for success, or error
+ **/
+int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr)
+{
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	bool found = false;
+	int bkt;
+
+	lockdep_assert_held(&vsi->mac_filter_hash_lock);
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		if (ether_addr_equal(macaddr, f->macaddr)) {
+			__i40e_del_filter(vsi, f);
+			found = true;
+		}
+	}
+
+	if (found)
+		return 0;
+	else
+		return -ENOENT;
+}
+
+/**
+ * i40e_set_mac - NDO callback to set mac address
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_set_mac(struct net_device *netdev, void *p)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (test_bit(__I40E_DOWN, pf->state) ||
+	    test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(hw->mac.addr, addr->sa_data))
+		netdev_info(netdev, "returning to hw mac address %pM\n",
+			    hw->mac.addr);
+	else
+		netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
+
+	/* Copy the address first, so that we avoid a possible race with
+	 * .set_rx_mode().
+	 * - Remove old address from MAC filter
+	 * - Copy new address
+	 * - Add new address to MAC filter
+	 */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	i40e_del_mac_filter(vsi, netdev->dev_addr);
+	eth_hw_addr_set(netdev, addr->sa_data);
+	i40e_add_mac_filter(vsi, netdev->dev_addr);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		int ret;
+
+		ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL,
+						addr->sa_data, NULL);
+		if (ret)
+			netdev_info(netdev, "Ignoring error from firmware on LAA update, status %pe, AQ ret %s\n",
+				    ERR_PTR(ret),
+				    i40e_aq_str(hw, hw->aq.asq_last_status));
+	}
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	i40e_service_event_schedule(pf);
+	return 0;
+}
+
+/**
+ * i40e_config_rss_aq - Prepare for RSS using AQ commands
+ * @vsi: vsi structure
+ * @seed: RSS hash seed
+ * @lut: pointer to lookup table of lut_size
+ * @lut_size: size of the lookup table
+ **/
+static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+			      u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int ret = 0;
+
+	if (seed) {
+		struct i40e_aqc_get_set_rss_key_data *seed_dw =
+			(struct i40e_aqc_get_set_rss_key_data *)seed;
+		ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot set RSS key, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
+	if (lut) {
+		bool pf_lut = vsi->type == I40E_VSI_MAIN;
+
+		ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot set RSS lut, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
+	return ret;
+}
+
+/**
+ * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+ * @vsi: VSI structure
+ **/
+static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 seed[I40E_HKEY_ARRAY_SIZE];
+	u8 *lut;
+	int ret;
+
+	if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+		return 0;
+	if (!vsi->rss_size)
+		vsi->rss_size = min_t(int, pf->alloc_rss_size,
+				      vsi->num_queue_pairs);
+	if (!vsi->rss_size)
+		return -EINVAL;
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* Use the user configured hash keys and lookup table if there is one,
+	 * otherwise use default
+	 */
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+	kfree(lut);
+	return ret;
+}
+
+/**
+ * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @enabled_tc: number of traffic classes to enable
+ *
+ * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+ **/
+static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
+					   struct i40e_vsi_context *ctxt,
+					   u8 enabled_tc)
+{
+	u16 qcount = 0, max_qcount, qmap, sections = 0;
+	int i, override_q, pow, num_qps, ret;
+	u8 netdev_tc = 0, offset = 0;
+
+	if (vsi->type != I40E_VSI_MAIN)
+		return -EINVAL;
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+	vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
+	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+	num_qps = vsi->mqprio_qopt.qopt.count[0];
+
+	/* find the next higher power-of-2 of num queue pairs */
+	pow = ilog2(num_qps);
+	if (!is_power_of_2(num_qps))
+		pow++;
+	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+	/* Setup queue offset/count for all TCs for given VSI */
+	max_qcount = vsi->mqprio_qopt.qopt.count[0];
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		/* See if the given TC is enabled for the given VSI */
+		if (vsi->tc_config.enabled_tc & BIT(i)) {
+			offset = vsi->mqprio_qopt.qopt.offset[i];
+			qcount = vsi->mqprio_qopt.qopt.count[i];
+			if (qcount > max_qcount)
+				max_qcount = qcount;
+			vsi->tc_config.tc_info[i].qoffset = offset;
+			vsi->tc_config.tc_info[i].qcount = qcount;
+			vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+		} else {
+			/* TC is not enabled so set the offset to
+			 * default queue and allocate one queue
+			 * for the given TC.
+			 */
+			vsi->tc_config.tc_info[i].qoffset = 0;
+			vsi->tc_config.tc_info[i].qcount = 1;
+			vsi->tc_config.tc_info[i].netdev_tc = 0;
+		}
+	}
+
+	/* Set actual Tx/Rx queue pairs */
+	vsi->num_queue_pairs = offset + qcount;
+
+	/* Setup queue TC[0].qmap for given VSI context */
+	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+	ctxt->info.valid_sections |= cpu_to_le16(sections);
+
+	/* Reconfigure RSS for main VSI with max queue count */
+	vsi->rss_size = max_qcount;
+	ret = i40e_vsi_config_rss(vsi);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to reconfig rss for num_queues (%u)\n",
+			 max_qcount);
+		return ret;
+	}
+	vsi->reconfig_rss = true;
+	dev_dbg(&vsi->back->pdev->dev,
+		"Reconfigured rss with num_queues (%u)\n", max_qcount);
+
+	/* Find queue count available for channel VSIs and starting offset
+	 * for channel VSIs
+	 */
+	override_q = vsi->mqprio_qopt.qopt.count[0];
+	if (override_q && override_q < vsi->num_queue_pairs) {
+		vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
+		vsi->next_base_queue = override_q;
+	}
+	return 0;
+}
+
+/**
+ * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
+ * @vsi: the VSI being setup
+ * @ctxt: VSI context structure
+ * @enabled_tc: Enabled TCs bitmap
+ * @is_add: True if called before Add VSI
+ *
+ * Setup VSI queue mapping for enabled traffic classes.
+ **/
+static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+				     struct i40e_vsi_context *ctxt,
+				     u8 enabled_tc,
+				     bool is_add)
+{
+	struct i40e_pf *pf = vsi->back;
+	u16 num_tc_qps = 0;
+	u16 sections = 0;
+	u8 netdev_tc = 0;
+	u16 numtc = 1;
+	u16 qcount;
+	u8 offset;
+	u16 qmap;
+	int i;
+
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	offset = 0;
+	/* zero out queue mapping, it will get updated on the end of the function */
+	memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping));
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		/* This code helps add more queue to the VSI if we have
+		 * more cores than RSS can support, the higher cores will
+		 * be served by ATR or other filters. Furthermore, the
+		 * non-zero req_queue_pairs says that user requested a new
+		 * queue count via ethtool's set_channels, so use this
+		 * value for queues distribution across traffic classes
+		 * We need at least one queue pair for the interface
+		 * to be usable as we see in else statement.
+		 */
+		if (vsi->req_queue_pairs > 0)
+			vsi->num_queue_pairs = vsi->req_queue_pairs;
+		else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+			vsi->num_queue_pairs = pf->num_lan_msix;
+		else
+			vsi->num_queue_pairs = 1;
+	}
+
+	/* Number of queues per enabled TC */
+	if (vsi->type == I40E_VSI_MAIN ||
+	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0))
+		num_tc_qps = vsi->num_queue_pairs;
+	else
+		num_tc_qps = vsi->alloc_queue_pairs;
+
+	if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+		/* Find numtc from enabled TC bitmap */
+		for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+			if (enabled_tc & BIT(i)) /* TC is enabled */
+				numtc++;
+		}
+		if (!numtc) {
+			dev_warn(&pf->pdev->dev, "DCB is enabled but no TC enabled, forcing TC0\n");
+			numtc = 1;
+		}
+		num_tc_qps = num_tc_qps / numtc;
+		num_tc_qps = min_t(int, num_tc_qps,
+				   i40e_pf_get_max_q_per_tc(pf));
+	}
+
+	vsi->tc_config.numtc = numtc;
+	vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+
+	/* Do not allow use more TC queue pairs than MSI-X vectors exist */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix);
+
+	/* Setup queue offset/count for all TCs for given VSI */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		/* See if the given TC is enabled for the given VSI */
+		if (vsi->tc_config.enabled_tc & BIT(i)) {
+			/* TC is enabled */
+			int pow, num_qps;
+
+			switch (vsi->type) {
+			case I40E_VSI_MAIN:
+				if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED |
+				    I40E_FLAG_FD_ATR_ENABLED)) ||
+				    vsi->tc_config.enabled_tc != 1) {
+					qcount = min_t(int, pf->alloc_rss_size,
+						       num_tc_qps);
+					break;
+				}
+				fallthrough;
+			case I40E_VSI_FDIR:
+			case I40E_VSI_SRIOV:
+			case I40E_VSI_VMDQ2:
+			default:
+				qcount = num_tc_qps;
+				WARN_ON(i != 0);
+				break;
+			}
+			vsi->tc_config.tc_info[i].qoffset = offset;
+			vsi->tc_config.tc_info[i].qcount = qcount;
+
+			/* find the next higher power-of-2 of num queue pairs */
+			num_qps = qcount;
+			pow = 0;
+			while (num_qps && (BIT_ULL(pow) < qcount)) {
+				pow++;
+				num_qps >>= 1;
+			}
+
+			vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+			qmap =
+			    (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+			    (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+			offset += qcount;
+		} else {
+			/* TC is not enabled so set the offset to
+			 * default queue and allocate one queue
+			 * for the given TC.
+			 */
+			vsi->tc_config.tc_info[i].qoffset = 0;
+			vsi->tc_config.tc_info[i].qcount = 1;
+			vsi->tc_config.tc_info[i].netdev_tc = 0;
+
+			qmap = 0;
+		}
+		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+	}
+	/* Do not change previously set num_queue_pairs for PFs and VFs*/
+	if ((vsi->type == I40E_VSI_MAIN && numtc != 1) ||
+	    (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) ||
+	    (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV))
+		vsi->num_queue_pairs = offset;
+
+	/* Scheduler section valid can only be set for ADD VSI */
+	if (is_add) {
+		sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+
+		ctxt->info.up_enable_bits = enabled_tc;
+	}
+	if (vsi->type == I40E_VSI_SRIOV) {
+		ctxt->info.mapping_flags |=
+				     cpu_to_le16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
+		for (i = 0; i < vsi->num_queue_pairs; i++)
+			ctxt->info.queue_mapping[i] =
+					       cpu_to_le16(vsi->base_queue + i);
+	} else {
+		ctxt->info.mapping_flags |=
+					cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+		ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+	}
+	ctxt->info.valid_sections |= cpu_to_le16(sections);
+}
+
+/**
+ * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40e_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	if (i40e_add_mac_filter(vsi, addr))
+		return 0;
+	else
+		return -ENOMEM;
+}
+
+/**
+ * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
+	i40e_del_mac_filter(vsi, addr);
+
+	return 0;
+}
+
+/**
+ * i40e_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void i40e_set_rx_mode(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+	__dev_uc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
+	__dev_mc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
+
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* check for other flag changes */
+	if (vsi->current_netdev_flags != vsi->netdev->flags) {
+		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
+	}
+}
+
+/**
+ * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
+ * @vsi: Pointer to VSI struct
+ * @from: Pointer to list which contains MAC filter entries - changes to
+ *        those entries needs to be undone.
+ *
+ * MAC filter entries from this list were slated for deletion.
+ **/
+static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
+					 struct hlist_head *from)
+{
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+
+	hlist_for_each_entry_safe(f, h, from, hlist) {
+		u64 key = i40e_addr_to_hkey(f->macaddr);
+
+		/* Move the element back into MAC filter list*/
+		hlist_del(&f->hlist);
+		hash_add(vsi->mac_filter_hash, &f->hlist, key);
+	}
+}
+
+/**
+ * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries
+ * @vsi: Pointer to vsi struct
+ * @from: Pointer to list which contains MAC filter entries - changes to
+ *        those entries needs to be undone.
+ *
+ * MAC filter entries from this list were slated for addition.
+ **/
+static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi,
+					 struct hlist_head *from)
+{
+	struct i40e_new_mac_filter *new;
+	struct hlist_node *h;
+
+	hlist_for_each_entry_safe(new, h, from, hlist) {
+		/* We can simply free the wrapper structure */
+		hlist_del(&new->hlist);
+		netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
+		kfree(new);
+	}
+}
+
+/**
+ * i40e_next_filter - Get the next non-broadcast filter from a list
+ * @next: pointer to filter in list
+ *
+ * Returns the next non-broadcast filter in the list. Required so that we
+ * ignore broadcast filters within the list, since these are not handled via
+ * the normal firmware update path.
+ */
+static
+struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
+{
+	hlist_for_each_entry_continue(next, hlist) {
+		if (!is_broadcast_ether_addr(next->f->macaddr))
+			return next;
+	}
+
+	return NULL;
+}
+
+/**
+ * i40e_update_filter_state - Update filter state based on return data
+ * from firmware
+ * @count: Number of filters added
+ * @add_list: return data from fw
+ * @add_head: pointer to first filter in current batch
+ *
+ * MAC filter entries from list were slated to be added to device. Returns
+ * number of successful filters. Note that 0 does NOT mean success!
+ **/
+static int
+i40e_update_filter_state(int count,
+			 struct i40e_aqc_add_macvlan_element_data *add_list,
+			 struct i40e_new_mac_filter *add_head)
+{
+	int retval = 0;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		/* Always check status of each filter. We don't need to check
+		 * the firmware return status because we pre-set the filter
+		 * status to I40E_AQC_MM_ERR_NO_RES when sending the filter
+		 * request to the adminq. Thus, if it no longer matches then
+		 * we know the filter is active.
+		 */
+		if (add_list[i].match_method == I40E_AQC_MM_ERR_NO_RES) {
+			add_head->state = I40E_FILTER_FAILED;
+		} else {
+			add_head->state = I40E_FILTER_ACTIVE;
+			retval++;
+		}
+
+		add_head = i40e_next_filter(add_head);
+		if (!add_head)
+			break;
+	}
+
+	return retval;
+}
+
+/**
+ * i40e_aqc_del_filters - Request firmware to delete a set of filters
+ * @vsi: ptr to the VSI
+ * @vsi_name: name to display in messages
+ * @list: the list of filters to send to firmware
+ * @num_del: the number of filters to delete
+ * @retval: Set to -EIO on failure to delete
+ *
+ * Send a request to firmware via AdminQ to delete a set of filters. Uses
+ * *retval instead of a return value so that success does not force ret_val to
+ * be set to 0. This ensures that a sequence of calls to this function
+ * preserve the previous value of *retval on successful delete.
+ */
+static
+void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
+			  struct i40e_aqc_remove_macvlan_element_data *list,
+			  int num_del, int *retval)
+{
+	struct i40e_hw *hw = &vsi->back->hw;
+	enum i40e_admin_queue_err aq_status;
+	int aq_ret;
+
+	aq_ret = i40e_aq_remove_macvlan_v2(hw, vsi->seid, list, num_del, NULL,
+					   &aq_status);
+
+	/* Explicitly ignore and do not report when firmware returns ENOENT */
+	if (aq_ret && !(aq_status == I40E_AQ_RC_ENOENT)) {
+		*retval = -EIO;
+		dev_info(&vsi->back->pdev->dev,
+			 "ignoring delete macvlan error on %s, err %pe, aq_err %s\n",
+			 vsi_name, ERR_PTR(aq_ret),
+			 i40e_aq_str(hw, aq_status));
+	}
+}
+
+/**
+ * i40e_aqc_add_filters - Request firmware to add a set of filters
+ * @vsi: ptr to the VSI
+ * @vsi_name: name to display in messages
+ * @list: the list of filters to send to firmware
+ * @add_head: Position in the add hlist
+ * @num_add: the number of filters to add
+ *
+ * Send a request to firmware via AdminQ to add a chunk of filters. Will set
+ * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of
+ * space for more filters.
+ */
+static
+void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
+			  struct i40e_aqc_add_macvlan_element_data *list,
+			  struct i40e_new_mac_filter *add_head,
+			  int num_add)
+{
+	struct i40e_hw *hw = &vsi->back->hw;
+	enum i40e_admin_queue_err aq_status;
+	int fcnt;
+
+	i40e_aq_add_macvlan_v2(hw, vsi->seid, list, num_add, NULL, &aq_status);
+	fcnt = i40e_update_filter_state(num_add, list, add_head);
+
+	if (fcnt != num_add) {
+		if (vsi->type == I40E_VSI_MAIN) {
+			set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+			dev_warn(&vsi->back->pdev->dev,
+				 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
+				 i40e_aq_str(hw, aq_status), vsi_name);
+		} else if (vsi->type == I40E_VSI_SRIOV ||
+			   vsi->type == I40E_VSI_VMDQ1 ||
+			   vsi->type == I40E_VSI_VMDQ2) {
+			dev_warn(&vsi->back->pdev->dev,
+				 "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
+				 i40e_aq_str(hw, aq_status), vsi_name,
+					     vsi_name);
+		} else {
+			dev_warn(&vsi->back->pdev->dev,
+				 "Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
+				 i40e_aq_str(hw, aq_status), vsi_name,
+					     vsi->type);
+		}
+	}
+}
+
+/**
+ * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags
+ * @vsi: pointer to the VSI
+ * @vsi_name: the VSI name
+ * @f: filter data
+ *
+ * This function sets or clears the promiscuous broadcast flags for VLAN
+ * filters in order to properly receive broadcast frames. Assumes that only
+ * broadcast filters are passed.
+ *
+ * Returns status indicating success or failure;
+ **/
+static int
+i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
+			  struct i40e_mac_filter *f)
+{
+	bool enable = f->state == I40E_FILTER_NEW;
+	struct i40e_hw *hw = &vsi->back->hw;
+	int aq_ret;
+
+	if (f->vlan == I40E_VLAN_ANY) {
+		aq_ret = i40e_aq_set_vsi_broadcast(hw,
+						   vsi->seid,
+						   enable,
+						   NULL);
+	} else {
+		aq_ret = i40e_aq_set_vsi_bc_promisc_on_vlan(hw,
+							    vsi->seid,
+							    enable,
+							    f->vlan,
+							    NULL);
+	}
+
+	if (aq_ret) {
+		set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+		dev_warn(&vsi->back->pdev->dev,
+			 "Error %s, forcing overflow promiscuous on %s\n",
+			 i40e_aq_str(hw, hw->aq.asq_last_status),
+			 vsi_name);
+	}
+
+	return aq_ret;
+}
+
+/**
+ * i40e_set_promiscuous - set promiscuous mode
+ * @pf: board private structure
+ * @promisc: promisc on or off
+ *
+ * There are different ways of setting promiscuous mode on a PF depending on
+ * what state/environment we're in.  This identifies and sets it appropriately.
+ * Returns 0 on success.
+ **/
+static int i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_hw *hw = &pf->hw;
+	int aq_ret;
+
+	if (vsi->type == I40E_VSI_MAIN &&
+	    pf->lan_veb != I40E_NO_VEB &&
+	    !(pf->flags & I40E_FLAG_MFP_ENABLED)) {
+		/* set defport ON for Main VSI instead of true promisc
+		 * this way we will get all unicast/multicast and VLAN
+		 * promisc behavior but will not get VF or VMDq traffic
+		 * replicated on the Main VSI.
+		 */
+		if (promisc)
+			aq_ret = i40e_aq_set_default_vsi(hw,
+							 vsi->seid,
+							 NULL);
+		else
+			aq_ret = i40e_aq_clear_default_vsi(hw,
+							   vsi->seid,
+							   NULL);
+		if (aq_ret) {
+			dev_info(&pf->pdev->dev,
+				 "Set default VSI failed, err %pe, aq_err %s\n",
+				 ERR_PTR(aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		}
+	} else {
+		aq_ret = i40e_aq_set_vsi_unicast_promiscuous(
+						  hw,
+						  vsi->seid,
+						  promisc, NULL,
+						  true);
+		if (aq_ret) {
+			dev_info(&pf->pdev->dev,
+				 "set unicast promisc failed, err %pe, aq_err %s\n",
+				 ERR_PTR(aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		}
+		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
+						  hw,
+						  vsi->seid,
+						  promisc, NULL);
+		if (aq_ret) {
+			dev_info(&pf->pdev->dev,
+				 "set multicast promisc failed, err %pe, aq_err %s\n",
+				 ERR_PTR(aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		}
+	}
+
+	if (!aq_ret)
+		pf->cur_promisc = promisc;
+
+	return aq_ret;
+}
+
+/**
+ * i40e_sync_vsi_filters - Update the VSI filter list to the HW
+ * @vsi: ptr to the VSI
+ *
+ * Push any outstanding VSI filter changes through the AdminQ.
+ *
+ * Returns 0 or error value
+ **/
+int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+{
+	struct hlist_head tmp_add_list, tmp_del_list;
+	struct i40e_mac_filter *f;
+	struct i40e_new_mac_filter *new, *add_head = NULL;
+	struct i40e_hw *hw = &vsi->back->hw;
+	bool old_overflow, new_overflow;
+	unsigned int failed_filters = 0;
+	unsigned int vlan_filters = 0;
+	char vsi_name[16] = "PF";
+	int filter_list_len = 0;
+	u32 changed_flags = 0;
+	struct hlist_node *h;
+	struct i40e_pf *pf;
+	int num_add = 0;
+	int num_del = 0;
+	int aq_ret = 0;
+	int retval = 0;
+	u16 cmd_flags;
+	int list_size;
+	int bkt;
+
+	/* empty array typed pointers, kcalloc later */
+	struct i40e_aqc_add_macvlan_element_data *add_list;
+	struct i40e_aqc_remove_macvlan_element_data *del_list;
+
+	while (test_and_set_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state))
+		usleep_range(1000, 2000);
+	pf = vsi->back;
+
+	old_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+	if (vsi->netdev) {
+		changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
+		vsi->current_netdev_flags = vsi->netdev->flags;
+	}
+
+	INIT_HLIST_HEAD(&tmp_add_list);
+	INIT_HLIST_HEAD(&tmp_del_list);
+
+	if (vsi->type == I40E_VSI_SRIOV)
+		snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id);
+	else if (vsi->type != I40E_VSI_MAIN)
+		snprintf(vsi_name, sizeof(vsi_name) - 1, "vsi %d", vsi->seid);
+
+	if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
+		vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
+
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		/* Create a list of filters to delete. */
+		hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+			if (f->state == I40E_FILTER_REMOVE) {
+				/* Move the element into temporary del_list */
+				hash_del(&f->hlist);
+				hlist_add_head(&f->hlist, &tmp_del_list);
+
+				/* Avoid counting removed filters */
+				continue;
+			}
+			if (f->state == I40E_FILTER_NEW) {
+				/* Create a temporary i40e_new_mac_filter */
+				new = kzalloc(sizeof(*new), GFP_ATOMIC);
+				if (!new)
+					goto err_no_memory_locked;
+
+				/* Store pointer to the real filter */
+				new->f = f;
+				new->state = f->state;
+
+				/* Add it to the hash list */
+				hlist_add_head(&new->hlist, &tmp_add_list);
+			}
+
+			/* Count the number of active (current and new) VLAN
+			 * filters we have now. Does not count filters which
+			 * are marked for deletion.
+			 */
+			if (f->vlan > 0)
+				vlan_filters++;
+		}
+
+		if (vsi->type != I40E_VSI_SRIOV)
+			retval = i40e_correct_mac_vlan_filters
+				(vsi, &tmp_add_list, &tmp_del_list,
+				 vlan_filters);
+		else if (pf->vf)
+			retval = i40e_correct_vf_mac_vlan_filters
+				(vsi, &tmp_add_list, &tmp_del_list,
+				 vlan_filters, pf->vf[vsi->vf_id].trusted);
+
+		hlist_for_each_entry(new, &tmp_add_list, hlist)
+			netdev_hw_addr_refcnt(new->f, vsi->netdev, 1);
+
+		if (retval)
+			goto err_no_memory_locked;
+
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+	}
+
+	/* Now process 'del_list' outside the lock */
+	if (!hlist_empty(&tmp_del_list)) {
+		filter_list_len = hw->aq.asq_buf_size /
+			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
+		list_size = filter_list_len *
+			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
+		del_list = kzalloc(list_size, GFP_ATOMIC);
+		if (!del_list)
+			goto err_no_memory;
+
+		hlist_for_each_entry_safe(f, h, &tmp_del_list, hlist) {
+			cmd_flags = 0;
+
+			/* handle broadcast filters by updating the broadcast
+			 * promiscuous flag and release filter list.
+			 */
+			if (is_broadcast_ether_addr(f->macaddr)) {
+				i40e_aqc_broadcast_filter(vsi, vsi_name, f);
+
+				hlist_del(&f->hlist);
+				kfree(f);
+				continue;
+			}
+
+			/* add to delete list */
+			ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
+			if (f->vlan == I40E_VLAN_ANY) {
+				del_list[num_del].vlan_tag = 0;
+				cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
+			} else {
+				del_list[num_del].vlan_tag =
+					cpu_to_le16((u16)(f->vlan));
+			}
+
+			cmd_flags |= I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+			del_list[num_del].flags = cmd_flags;
+			num_del++;
+
+			/* flush a full buffer */
+			if (num_del == filter_list_len) {
+				i40e_aqc_del_filters(vsi, vsi_name, del_list,
+						     num_del, &retval);
+				memset(del_list, 0, list_size);
+				num_del = 0;
+			}
+			/* Release memory for MAC filter entries which were
+			 * synced up with HW.
+			 */
+			hlist_del(&f->hlist);
+			kfree(f);
+		}
+
+		if (num_del) {
+			i40e_aqc_del_filters(vsi, vsi_name, del_list,
+					     num_del, &retval);
+		}
+
+		kfree(del_list);
+		del_list = NULL;
+	}
+
+	if (!hlist_empty(&tmp_add_list)) {
+		/* Do all the adds now. */
+		filter_list_len = hw->aq.asq_buf_size /
+			       sizeof(struct i40e_aqc_add_macvlan_element_data);
+		list_size = filter_list_len *
+			       sizeof(struct i40e_aqc_add_macvlan_element_data);
+		add_list = kzalloc(list_size, GFP_ATOMIC);
+		if (!add_list)
+			goto err_no_memory;
+
+		num_add = 0;
+		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
+			/* handle broadcast filters by updating the broadcast
+			 * promiscuous flag instead of adding a MAC filter.
+			 */
+			if (is_broadcast_ether_addr(new->f->macaddr)) {
+				if (i40e_aqc_broadcast_filter(vsi, vsi_name,
+							      new->f))
+					new->state = I40E_FILTER_FAILED;
+				else
+					new->state = I40E_FILTER_ACTIVE;
+				continue;
+			}
+
+			/* add to add array */
+			if (num_add == 0)
+				add_head = new;
+			cmd_flags = 0;
+			ether_addr_copy(add_list[num_add].mac_addr,
+					new->f->macaddr);
+			if (new->f->vlan == I40E_VLAN_ANY) {
+				add_list[num_add].vlan_tag = 0;
+				cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+			} else {
+				add_list[num_add].vlan_tag =
+					cpu_to_le16((u16)(new->f->vlan));
+			}
+			add_list[num_add].queue_number = 0;
+			/* set invalid match method for later detection */
+			add_list[num_add].match_method = I40E_AQC_MM_ERR_NO_RES;
+			cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
+			add_list[num_add].flags = cpu_to_le16(cmd_flags);
+			num_add++;
+
+			/* flush a full buffer */
+			if (num_add == filter_list_len) {
+				i40e_aqc_add_filters(vsi, vsi_name, add_list,
+						     add_head, num_add);
+				memset(add_list, 0, list_size);
+				num_add = 0;
+			}
+		}
+		if (num_add) {
+			i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
+					     num_add);
+		}
+		/* Now move all of the filters from the temp add list back to
+		 * the VSI's list.
+		 */
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) {
+			/* Only update the state if we're still NEW */
+			if (new->f->state == I40E_FILTER_NEW)
+				new->f->state = new->state;
+			hlist_del(&new->hlist);
+			netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
+			kfree(new);
+		}
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+		kfree(add_list);
+		add_list = NULL;
+	}
+
+	/* Determine the number of active and failed filters. */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	vsi->active_filters = 0;
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
+		if (f->state == I40E_FILTER_ACTIVE)
+			vsi->active_filters++;
+		else if (f->state == I40E_FILTER_FAILED)
+			failed_filters++;
+	}
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* Check if we are able to exit overflow promiscuous mode. We can
+	 * safely exit if we didn't just enter, we no longer have any failed
+	 * filters, and we have reduced filters below the threshold value.
+	 */
+	if (old_overflow && !failed_filters &&
+	    vsi->active_filters < vsi->promisc_threshold) {
+		dev_info(&pf->pdev->dev,
+			 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
+			 vsi_name);
+		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+		vsi->promisc_threshold = 0;
+	}
+
+	/* if the VF is not trusted do not do promisc */
+	if (vsi->type == I40E_VSI_SRIOV && pf->vf &&
+	    !pf->vf[vsi->vf_id].trusted) {
+		clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+		goto out;
+	}
+
+	new_overflow = test_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+	/* If we are entering overflow promiscuous, we need to calculate a new
+	 * threshold for when we are safe to exit
+	 */
+	if (!old_overflow && new_overflow)
+		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+
+	/* check for changes in promiscuous modes */
+	if (changed_flags & IFF_ALLMULTI) {
+		bool cur_multipromisc;
+
+		cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
+		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
+							       vsi->seid,
+							       cur_multipromisc,
+							       NULL);
+		if (aq_ret) {
+			retval = i40e_aq_rc_to_posix(aq_ret,
+						     hw->aq.asq_last_status);
+			dev_info(&pf->pdev->dev,
+				 "set multi promisc failed on %s, err %pe aq_err %s\n",
+				 vsi_name,
+				 ERR_PTR(aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		} else {
+			dev_info(&pf->pdev->dev, "%s allmulti mode.\n",
+				 cur_multipromisc ? "entering" : "leaving");
+		}
+	}
+
+	if ((changed_flags & IFF_PROMISC) || old_overflow != new_overflow) {
+		bool cur_promisc;
+
+		cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
+			       new_overflow);
+		aq_ret = i40e_set_promiscuous(pf, cur_promisc);
+		if (aq_ret) {
+			retval = i40e_aq_rc_to_posix(aq_ret,
+						     hw->aq.asq_last_status);
+			dev_info(&pf->pdev->dev,
+				 "Setting promiscuous %s failed on %s, err %pe aq_err %s\n",
+				 cur_promisc ? "on" : "off",
+				 vsi_name,
+				 ERR_PTR(aq_ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+		}
+	}
+out:
+	/* if something went wrong then set the changed flag so we try again */
+	if (retval)
+		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+
+	clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state);
+	return retval;
+
+err_no_memory:
+	/* Restore elements on the temporary add and delete lists */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+err_no_memory_locked:
+	i40e_undo_del_filter_entries(vsi, &tmp_del_list);
+	i40e_undo_add_filter_entries(vsi, &tmp_add_list);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+	clear_bit(__I40E_VSI_SYNCING_FILTERS, vsi->state);
+	return -ENOMEM;
+}
+
+/**
+ * i40e_sync_filters_subtask - Sync the VSI filter list with HW
+ * @pf: board private structure
+ **/
+static void i40e_sync_filters_subtask(struct i40e_pf *pf)
+{
+	int v;
+
+	if (!pf)
+		return;
+	if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
+		return;
+	if (test_bit(__I40E_VF_DISABLE, pf->state)) {
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
+		return;
+	}
+
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
+		if (pf->vsi[v] &&
+		    (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
+		    !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) {
+			int ret = i40e_sync_vsi_filters(pf->vsi[v]);
+
+			if (ret) {
+				/* come back and try again later */
+				set_bit(__I40E_MACVLAN_SYNC_PENDING,
+					pf->state);
+				break;
+			}
+		}
+	}
+}
+
+/**
+ * i40e_calculate_vsi_rx_buf_len - Calculates buffer length
+ *
+ * @vsi: VSI to calculate rx_buf_len from
+ */
+static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
+{
+	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
+		return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
+
+	return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
+}
+
+/**
+ * i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI
+ * @vsi: the vsi
+ * @xdp_prog: XDP program
+ **/
+static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi,
+				   struct bpf_prog *xdp_prog)
+{
+	u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
+	u16 chain_len;
+
+	if (xdp_prog && !xdp_prog->aux->xdp_has_frags)
+		chain_len = 1;
+	else
+		chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
+
+	return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER);
+}
+
+/**
+ * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int frame_size;
+
+	frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
+	if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) {
+		netdev_err(netdev, "Error changing mtu to %d, Max is %d\n",
+			   new_mtu, frame_size - I40E_PACKET_HDR_PAD);
+		return -EINVAL;
+	}
+
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+	if (netif_running(netdev))
+		i40e_vsi_reinit_locked(vsi);
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
+	return 0;
+}
+
+/**
+ * i40e_ioctl - Access the hwtstamp interface
+ * @netdev: network interface device structure
+ * @ifr: interface request data
+ * @cmd: ioctl command
+ **/
+int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		return i40e_ptp_get_ts_config(pf, ifr);
+	case SIOCSHWTSTAMP:
+		return i40e_ptp_set_ts_config(pf, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI
+ * @vsi: the vsi being adjusted
+ **/
+void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
+{
+	struct i40e_vsi_context ctxt;
+	int ret;
+
+	/* Don't modify stripping options if a port VLAN is active */
+	if (vsi->info.pvid)
+		return;
+
+	if ((vsi->info.valid_sections &
+	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
+	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0))
+		return;  /* already enabled */
+
+	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
+				    I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH;
+
+	ctxt.seid = vsi->seid;
+	ctxt.info = vsi->info;
+	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "update vlan stripping failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&vsi->back->hw,
+				     vsi->back->hw.aq.asq_last_status));
+	}
+}
+
+/**
+ * i40e_vlan_stripping_disable - Turn off vlan stripping for the VSI
+ * @vsi: the vsi being adjusted
+ **/
+void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
+{
+	struct i40e_vsi_context ctxt;
+	int ret;
+
+	/* Don't modify stripping options if a port VLAN is active */
+	if (vsi->info.pvid)
+		return;
+
+	if ((vsi->info.valid_sections &
+	     cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
+	    ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) ==
+	     I40E_AQ_VSI_PVLAN_EMOD_MASK))
+		return;  /* already disabled */
+
+	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
+				    I40E_AQ_VSI_PVLAN_EMOD_NOTHING;
+
+	ctxt.seid = vsi->seid;
+	ctxt.info = vsi->info;
+	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "update vlan stripping failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&vsi->back->hw,
+				     vsi->back->hw.aq.asq_last_status));
+	}
+}
+
+/**
+ * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
+ * @vsi: the vsi being configured
+ * @vid: vlan id to be added (0 = untagged only , -1 = any)
+ *
+ * This is a helper function for adding a new MAC/VLAN filter with the
+ * specified VLAN for each existing MAC address already in the hash table.
+ * This function does *not* perform any accounting to update filters based on
+ * VLAN mode.
+ *
+ * NOTE: this function expects to be called while under the
+ * mac_filter_hash_lock
+ **/
+int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
+{
+	struct i40e_mac_filter *f, *add_f;
+	struct hlist_node *h;
+	int bkt;
+
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		/* If we're asked to add a filter that has been marked for
+		 * removal, it is safe to simply restore it to active state.
+		 * __i40e_del_filter will have simply deleted any filters which
+		 * were previously marked NEW or FAILED, so if it is currently
+		 * marked REMOVE it must have previously been ACTIVE. Since we
+		 * haven't yet run the sync filters task, just restore this
+		 * filter to the ACTIVE state so that the sync task leaves it
+		 * in place.
+		 */
+		if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) {
+			f->state = I40E_FILTER_ACTIVE;
+			continue;
+		} else if (f->state == I40E_FILTER_REMOVE) {
+			continue;
+		}
+		add_f = i40e_add_filter(vsi, f->macaddr, vid);
+		if (!add_f) {
+			dev_info(&vsi->back->pdev->dev,
+				 "Could not add vlan filter %d for %pM\n",
+				 vid, f->macaddr);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_add_vlan - Add VSI membership for given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN id to be added
+ **/
+int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid)
+{
+	int err;
+
+	if (vsi->info.pvid)
+		return -EINVAL;
+
+	/* The network stack will attempt to add VID=0, with the intention to
+	 * receive priority tagged packets with a VLAN of 0. Our HW receives
+	 * these packets by default when configured to receive untagged
+	 * packets, so we don't need to add a filter for this case.
+	 * Additionally, HW interprets adding a VID=0 filter as meaning to
+	 * receive *only* tagged traffic and stops receiving untagged traffic.
+	 * Thus, we do not want to actually add a filter for VID=0
+	 */
+	if (!vid)
+		return 0;
+
+	/* Locked once because all functions invoked below iterates list*/
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	err = i40e_add_vlan_all_mac(vsi, vid);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+	if (err)
+		return err;
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	i40e_service_event_schedule(vsi->back);
+	return 0;
+}
+
+/**
+ * i40e_rm_vlan_all_mac - Remove MAC/VLAN pair for all MAC with the given VLAN
+ * @vsi: the vsi being configured
+ * @vid: vlan id to be removed (0 = untagged only , -1 = any)
+ *
+ * This function should be used to remove all VLAN filters which match the
+ * given VID. It does not schedule the service event and does not take the
+ * mac_filter_hash_lock so it may be combined with other operations under
+ * a single invocation of the mac_filter_hash_lock.
+ *
+ * NOTE: this function expects to be called while under the
+ * mac_filter_hash_lock
+ */
+void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
+{
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	int bkt;
+
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		if (f->vlan == vid)
+			__i40e_del_filter(vsi, f);
+	}
+}
+
+/**
+ * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN id to be removed
+ **/
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
+{
+	if (!vid || vsi->info.pvid)
+		return;
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	i40e_rm_vlan_all_mac(vsi, vid);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	i40e_service_event_schedule(vsi->back);
+}
+
+/**
+ * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol value
+ * @vid: vlan id to be added
+ *
+ * net_device_ops implementation for adding vlan ids
+ **/
+static int i40e_vlan_rx_add_vid(struct net_device *netdev,
+				__always_unused __be16 proto, u16 vid)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	int ret = 0;
+
+	if (vid >= VLAN_N_VID)
+		return -EINVAL;
+
+	ret = i40e_vsi_add_vlan(vsi, vid);
+	if (!ret)
+		set_bit(vid, vsi->active_vlans);
+
+	return ret;
+}
+
+/**
+ * i40e_vlan_rx_add_vid_up - Add a vlan id filter to HW offload in UP path
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol value
+ * @vid: vlan id to be added
+ **/
+static void i40e_vlan_rx_add_vid_up(struct net_device *netdev,
+				    __always_unused __be16 proto, u16 vid)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	if (vid >= VLAN_N_VID)
+		return;
+	set_bit(vid, vsi->active_vlans);
+}
+
+/**
+ * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: unused protocol value
+ * @vid: vlan id to be removed
+ *
+ * net_device_ops implementation for removing vlan ids
+ **/
+static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
+				 __always_unused __be16 proto, u16 vid)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	/* return code is ignored as there is nothing a user
+	 * can do about failure to remove and a log message was
+	 * already printed from the other function
+	 */
+	i40e_vsi_kill_vlan(vsi, vid);
+
+	clear_bit(vid, vsi->active_vlans);
+
+	return 0;
+}
+
+/**
+ * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up
+ * @vsi: the vsi being brought back up
+ **/
+static void i40e_restore_vlan(struct i40e_vsi *vsi)
+{
+	u16 vid;
+
+	if (!vsi->netdev)
+		return;
+
+	if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		i40e_vlan_stripping_enable(vsi);
+	else
+		i40e_vlan_stripping_disable(vsi);
+
+	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
+		i40e_vlan_rx_add_vid_up(vsi->netdev, htons(ETH_P_8021Q),
+					vid);
+}
+
+/**
+ * i40e_vsi_add_pvid - Add pvid for the VSI
+ * @vsi: the vsi being adjusted
+ * @vid: the vlan id to set as a PVID
+ **/
+int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
+{
+	struct i40e_vsi_context ctxt;
+	int ret;
+
+	vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+	vsi->info.pvid = cpu_to_le16(vid);
+	vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_TAGGED |
+				    I40E_AQ_VSI_PVLAN_INSERT_PVID |
+				    I40E_AQ_VSI_PVLAN_EMOD_STR;
+
+	ctxt.seid = vsi->seid;
+	ctxt.info = vsi->info;
+	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "add pvid failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&vsi->back->hw,
+				     vsi->back->hw.aq.asq_last_status));
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_remove_pvid - Remove the pvid from the VSI
+ * @vsi: the vsi being adjusted
+ *
+ * Just use the vlan_rx_register() service to put it back to normal
+ **/
+void i40e_vsi_remove_pvid(struct i40e_vsi *vsi)
+{
+	vsi->info.pvid = 0;
+
+	i40e_vlan_stripping_disable(vsi);
+}
+
+/**
+ * i40e_vsi_setup_tx_resources - Allocate VSI Tx queue resources
+ * @vsi: ptr to the VSI
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi)
+{
+	int i, err = 0;
+
+	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+		err = i40e_setup_tx_descriptors(vsi->tx_rings[i]);
+
+	if (!i40e_enabled_xdp_vsi(vsi))
+		return err;
+
+	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+		err = i40e_setup_tx_descriptors(vsi->xdp_rings[i]);
+
+	return err;
+}
+
+/**
+ * i40e_vsi_free_tx_resources - Free Tx resources for VSI queues
+ * @vsi: ptr to the VSI
+ *
+ * Free VSI's transmit software resources
+ **/
+static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi)
+{
+	int i;
+
+	if (vsi->tx_rings) {
+		for (i = 0; i < vsi->num_queue_pairs; i++)
+			if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+				i40e_free_tx_resources(vsi->tx_rings[i]);
+	}
+
+	if (vsi->xdp_rings) {
+		for (i = 0; i < vsi->num_queue_pairs; i++)
+			if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
+				i40e_free_tx_resources(vsi->xdp_rings[i]);
+	}
+}
+
+/**
+ * i40e_vsi_setup_rx_resources - Allocate VSI queues Rx resources
+ * @vsi: ptr to the VSI
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi)
+{
+	int i, err = 0;
+
+	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+		err = i40e_setup_rx_descriptors(vsi->rx_rings[i]);
+	return err;
+}
+
+/**
+ * i40e_vsi_free_rx_resources - Free Rx Resources for VSI queues
+ * @vsi: ptr to the VSI
+ *
+ * Free all receive software resources
+ **/
+static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->rx_rings)
+		return;
+
+	for (i = 0; i < vsi->num_queue_pairs; i++)
+		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
+			i40e_free_rx_resources(vsi->rx_rings[i]);
+}
+
+/**
+ * i40e_config_xps_tx_ring - Configure XPS for a Tx ring
+ * @ring: The Tx ring to configure
+ *
+ * This enables/disables XPS for a given Tx descriptor ring
+ * based on the TCs enabled for the VSI that ring belongs to.
+ **/
+static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
+{
+	int cpu;
+
+	if (!ring->q_vector || !ring->netdev || ring->ch)
+		return;
+
+	/* We only initialize XPS once, so as not to overwrite user settings */
+	if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
+		return;
+
+	cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
+	netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
+			    ring->queue_index);
+}
+
+/**
+ * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled
+ * @ring: The Tx or Rx ring
+ *
+ * Returns the AF_XDP buffer pool or NULL.
+ **/
+static struct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring)
+{
+	bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
+	int qid = ring->queue_index;
+
+	if (ring_is_xdp(ring))
+		qid -= ring->vsi->alloc_queue_pairs;
+
+	if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps))
+		return NULL;
+
+	return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
+}
+
+/**
+ * i40e_configure_tx_ring - Configure a transmit ring context and rest
+ * @ring: The Tx ring to configure
+ *
+ * Configure the Tx descriptor ring in the HMC context.
+ **/
+static int i40e_configure_tx_ring(struct i40e_ring *ring)
+{
+	struct i40e_vsi *vsi = ring->vsi;
+	u16 pf_q = vsi->base_queue + ring->queue_index;
+	struct i40e_hw *hw = &vsi->back->hw;
+	struct i40e_hmc_obj_txq tx_ctx;
+	u32 qtx_ctl = 0;
+	int err = 0;
+
+	if (ring_is_xdp(ring))
+		ring->xsk_pool = i40e_xsk_pool(ring);
+
+	/* some ATR related tx ring init */
+	if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
+		ring->atr_sample_rate = vsi->back->atr_sample_rate;
+		ring->atr_count = 0;
+	} else {
+		ring->atr_sample_rate = 0;
+	}
+
+	/* configure XPS */
+	i40e_config_xps_tx_ring(ring);
+
+	/* clear the context structure first */
+	memset(&tx_ctx, 0, sizeof(tx_ctx));
+
+	tx_ctx.new_context = 1;
+	tx_ctx.base = (ring->dma / 128);
+	tx_ctx.qlen = ring->count;
+	tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
+					       I40E_FLAG_FD_ATR_ENABLED));
+	tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
+	/* FDIR VSI tx ring can still use RS bit and writebacks */
+	if (vsi->type != I40E_VSI_FDIR)
+		tx_ctx.head_wb_ena = 1;
+	tx_ctx.head_wb_addr = ring->dma +
+			      (ring->count * sizeof(struct i40e_tx_desc));
+
+	/* As part of VSI creation/update, FW allocates certain
+	 * Tx arbitration queue sets for each TC enabled for
+	 * the VSI. The FW returns the handles to these queue
+	 * sets as part of the response buffer to Add VSI,
+	 * Update VSI, etc. AQ commands. It is expected that
+	 * these queue set handles be associated with the Tx
+	 * queues by the driver as part of the TX queue context
+	 * initialization. This has to be done regardless of
+	 * DCB as by default everything is mapped to TC0.
+	 */
+
+	if (ring->ch)
+		tx_ctx.rdylist =
+			le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
+
+	else
+		tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
+	tx_ctx.rdylist_act = 0;
+
+	/* clear the context in the HMC */
+	err = i40e_clear_lan_tx_queue_context(hw, pf_q);
+	if (err) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to clear LAN Tx queue context on Tx ring %d (pf_q %d), error: %d\n",
+			 ring->queue_index, pf_q, err);
+		return -ENOMEM;
+	}
+
+	/* set the context in the HMC */
+	err = i40e_set_lan_tx_queue_context(hw, pf_q, &tx_ctx);
+	if (err) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to set LAN Tx queue context on Tx ring %d (pf_q %d, error: %d\n",
+			 ring->queue_index, pf_q, err);
+		return -ENOMEM;
+	}
+
+	/* Now associate this queue with this PCI function */
+	if (ring->ch) {
+		if (ring->ch->type == I40E_VSI_VMDQ2)
+			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+		else
+			return -EINVAL;
+
+		qtx_ctl |= (ring->ch->vsi_number <<
+			    I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+			    I40E_QTX_CTL_VFVM_INDX_MASK;
+	} else {
+		if (vsi->type == I40E_VSI_VMDQ2) {
+			qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+			qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+				    I40E_QTX_CTL_VFVM_INDX_MASK;
+		} else {
+			qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+		}
+	}
+
+	qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
+		    I40E_QTX_CTL_PF_INDX_MASK);
+	wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
+	i40e_flush(hw);
+
+	/* cache tail off for easier writes later */
+	ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
+
+	return 0;
+}
+
+/**
+ * i40e_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
+}
+
+/**
+ * i40e_configure_rx_ring - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in the HMC context.
+ **/
+static int i40e_configure_rx_ring(struct i40e_ring *ring)
+{
+	struct i40e_vsi *vsi = ring->vsi;
+	u32 chain_len = vsi->back->hw.func_caps.rx_buf_chain_len;
+	u16 pf_q = vsi->base_queue + ring->queue_index;
+	struct i40e_hw *hw = &vsi->back->hw;
+	struct i40e_hmc_obj_rxq rx_ctx;
+	int err = 0;
+	bool ok;
+
+	bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
+
+	/* clear the context structure first */
+	memset(&rx_ctx, 0, sizeof(rx_ctx));
+
+	ring->rx_buf_len = vsi->rx_buf_len;
+
+	/* XDP RX-queue info only needed for RX rings exposed to XDP */
+	if (ring->vsi->type != I40E_VSI_MAIN)
+		goto skip;
+
+	if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->queue_index,
+					 ring->q_vector->napi.napi_id,
+					 ring->rx_buf_len);
+		if (err)
+			return err;
+	}
+
+	ring->xsk_pool = i40e_xsk_pool(ring);
+	if (ring->xsk_pool) {
+		xdp_rxq_info_unreg(&ring->xdp_rxq);
+		ring->rx_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+		err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+					 ring->queue_index,
+					 ring->q_vector->napi.napi_id,
+					 ring->rx_buf_len);
+		if (err)
+			return err;
+		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						 MEM_TYPE_XSK_BUFF_POOL,
+						 NULL);
+		if (err)
+			return err;
+		dev_info(&vsi->back->pdev->dev,
+			 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
+			 ring->queue_index);
+
+	} else {
+		err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						 MEM_TYPE_PAGE_SHARED,
+						 NULL);
+		if (err)
+			return err;
+	}
+
+skip:
+	xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+
+	rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
+				    BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
+
+	rx_ctx.base = (ring->dma / 128);
+	rx_ctx.qlen = ring->count;
+
+	/* use 16 byte descriptors */
+	rx_ctx.dsize = 0;
+
+	/* descriptor type is always zero
+	 * rx_ctx.dtype = 0;
+	 */
+	rx_ctx.hsplit_0 = 0;
+
+	rx_ctx.rxmax = min_t(u16, vsi->max_frame, chain_len * ring->rx_buf_len);
+	if (hw->revision_id == 0)
+		rx_ctx.lrxqthresh = 0;
+	else
+		rx_ctx.lrxqthresh = 1;
+	rx_ctx.crcstrip = 1;
+	rx_ctx.l2tsel = 1;
+	/* this controls whether VLAN is stripped from inner headers */
+	rx_ctx.showiv = 0;
+	/* set the prefena field to 1 because the manual says to */
+	rx_ctx.prefena = 1;
+
+	/* clear the context in the HMC */
+	err = i40e_clear_lan_rx_queue_context(hw, pf_q);
+	if (err) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
+			 ring->queue_index, pf_q, err);
+		return -ENOMEM;
+	}
+
+	/* set the context in the HMC */
+	err = i40e_set_lan_rx_queue_context(hw, pf_q, &rx_ctx);
+	if (err) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
+			 ring->queue_index, pf_q, err);
+		return -ENOMEM;
+	}
+
+	/* configure Rx buffer alignment */
+	if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
+		if (I40E_2K_TOO_SMALL_WITH_PADDING) {
+			dev_info(&vsi->back->pdev->dev,
+				 "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
+			return -EOPNOTSUPP;
+		}
+		clear_ring_build_skb_enabled(ring);
+	} else {
+		set_ring_build_skb_enabled(ring);
+	}
+
+	ring->rx_offset = i40e_rx_offset(ring);
+
+	/* cache tail for quicker writes, and clear the reg before use */
+	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+
+	if (ring->xsk_pool) {
+		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+		ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
+	} else {
+		ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+	}
+	if (!ok) {
+		/* Log this in case the user has forgotten to give the kernel
+		 * any buffers, even later in the application.
+		 */
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
+			 ring->xsk_pool ? "AF_XDP ZC enabled " : "",
+			 ring->queue_index, pf_q);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_configure_tx - Configure the VSI for Tx
+ * @vsi: VSI structure describing this set of rings and resources
+ *
+ * Configure the Tx VSI for operation.
+ **/
+static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
+{
+	int err = 0;
+	u16 i;
+
+	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
+		err = i40e_configure_tx_ring(vsi->tx_rings[i]);
+
+	if (err || !i40e_enabled_xdp_vsi(vsi))
+		return err;
+
+	for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
+		err = i40e_configure_tx_ring(vsi->xdp_rings[i]);
+
+	return err;
+}
+
+/**
+ * i40e_vsi_configure_rx - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Configure the Rx VSI for operation.
+ **/
+static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
+{
+	int err = 0;
+	u16 i;
+
+	vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
+	vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
+
+#if (PAGE_SIZE < 8192)
+	if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING &&
+	    vsi->netdev->mtu <= ETH_DATA_LEN) {
+		vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
+		vsi->max_frame = vsi->rx_buf_len;
+	}
+#endif
+
+	/* set up individual rings */
+	for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+		err = i40e_configure_rx_ring(vsi->rx_rings[i]);
+
+	return err;
+}
+
+/**
+ * i40e_vsi_config_dcb_rings - Update rings to reflect DCB TC
+ * @vsi: ptr to the VSI
+ **/
+static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
+{
+	struct i40e_ring *tx_ring, *rx_ring;
+	u16 qoffset, qcount;
+	int i, n;
+
+	if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+		/* Reset the TC information */
+		for (i = 0; i < vsi->num_queue_pairs; i++) {
+			rx_ring = vsi->rx_rings[i];
+			tx_ring = vsi->tx_rings[i];
+			rx_ring->dcb_tc = 0;
+			tx_ring->dcb_tc = 0;
+		}
+		return;
+	}
+
+	for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
+		if (!(vsi->tc_config.enabled_tc & BIT_ULL(n)))
+			continue;
+
+		qoffset = vsi->tc_config.tc_info[n].qoffset;
+		qcount = vsi->tc_config.tc_info[n].qcount;
+		for (i = qoffset; i < (qoffset + qcount); i++) {
+			rx_ring = vsi->rx_rings[i];
+			tx_ring = vsi->tx_rings[i];
+			rx_ring->dcb_tc = n;
+			tx_ring->dcb_tc = n;
+		}
+	}
+}
+
+/**
+ * i40e_set_vsi_rx_mode - Call set_rx_mode on a VSI
+ * @vsi: ptr to the VSI
+ **/
+static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
+{
+	if (vsi->netdev)
+		i40e_set_rx_mode(vsi->netdev);
+}
+
+/**
+ * i40e_reset_fdir_filter_cnt - Reset flow director filter counters
+ * @pf: Pointer to the targeted PF
+ *
+ * Set all flow director counters to 0.
+ */
+static void i40e_reset_fdir_filter_cnt(struct i40e_pf *pf)
+{
+	pf->fd_tcp4_filter_cnt = 0;
+	pf->fd_udp4_filter_cnt = 0;
+	pf->fd_sctp4_filter_cnt = 0;
+	pf->fd_ip4_filter_cnt = 0;
+	pf->fd_tcp6_filter_cnt = 0;
+	pf->fd_udp6_filter_cnt = 0;
+	pf->fd_sctp6_filter_cnt = 0;
+	pf->fd_ip6_filter_cnt = 0;
+}
+
+/**
+ * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters
+ * @vsi: Pointer to the targeted VSI
+ *
+ * This function replays the hlist on the hw where all the SB Flow Director
+ * filters were saved.
+ **/
+static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
+{
+	struct i40e_fdir_filter *filter;
+	struct i40e_pf *pf = vsi->back;
+	struct hlist_node *node;
+
+	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+		return;
+
+	/* Reset FDir counters as we're replaying all existing filters */
+	i40e_reset_fdir_filter_cnt(pf);
+
+	hlist_for_each_entry_safe(filter, node,
+				  &pf->fdir_filter_list, fdir_node) {
+		i40e_add_del_fdir(vsi, filter, true);
+	}
+}
+
+/**
+ * i40e_vsi_configure - Set up the VSI for action
+ * @vsi: the VSI being configured
+ **/
+static int i40e_vsi_configure(struct i40e_vsi *vsi)
+{
+	int err;
+
+	i40e_set_vsi_rx_mode(vsi);
+	i40e_restore_vlan(vsi);
+	i40e_vsi_config_dcb_rings(vsi);
+	err = i40e_vsi_configure_tx(vsi);
+	if (!err)
+		err = i40e_vsi_configure_rx(vsi);
+
+	return err;
+}
+
+/**
+ * i40e_vsi_configure_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ **/
+static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
+{
+	bool has_xdp = i40e_enabled_xdp_vsi(vsi);
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u16 vector;
+	int i, q;
+	u32 qp;
+
+	/* The interrupt indexing is offset by 1 in the PFINT_ITRn
+	 * and PFINT_LNKLSTn registers, e.g.:
+	 *   PFINT_ITRn[0..n-1] gets msix-1..msix-n  (qpair interrupts)
+	 */
+	qp = vsi->base_queue;
+	vector = vsi->base_vector;
+	for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[i];
+
+		q_vector->rx.next_update = jiffies + 1;
+		q_vector->rx.target_itr =
+			ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
+		wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
+		     q_vector->rx.target_itr >> 1);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+
+		q_vector->tx.next_update = jiffies + 1;
+		q_vector->tx.target_itr =
+			ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
+		wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
+		     q_vector->tx.target_itr >> 1);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+
+		wr32(hw, I40E_PFINT_RATEN(vector - 1),
+		     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
+
+		/* begin of linked list for RX queue assigned to this vector */
+		wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp);
+		for (q = 0; q < q_vector->num_ringpairs; q++) {
+			u32 nextqp = has_xdp ? qp + vsi->alloc_queue_pairs : qp;
+			u32 val;
+
+			val = I40E_QINT_RQCTL_CAUSE_ENA_MASK |
+			      (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
+			      (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
+			      (nextqp << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+			      (I40E_QUEUE_TYPE_TX <<
+			       I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
+
+			wr32(hw, I40E_QINT_RQCTL(qp), val);
+
+			if (has_xdp) {
+				/* TX queue with next queue set to TX */
+				val = I40E_QINT_TQCTL_CAUSE_ENA_MASK |
+				      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
+				      (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
+				      (qp << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
+				      (I40E_QUEUE_TYPE_TX <<
+				       I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
+
+				wr32(hw, I40E_QINT_TQCTL(nextqp), val);
+			}
+			/* TX queue with next RX or end of linked list */
+			val = I40E_QINT_TQCTL_CAUSE_ENA_MASK |
+			      (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
+			      (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
+			      ((qp + 1) << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) |
+			      (I40E_QUEUE_TYPE_RX <<
+			       I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
+
+			/* Terminate the linked list */
+			if (q == (q_vector->num_ringpairs - 1))
+				val |= (I40E_QUEUE_END_OF_LIST <<
+					I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT);
+
+			wr32(hw, I40E_QINT_TQCTL(qp), val);
+			qp++;
+		}
+	}
+
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_enable_misc_int_causes - enable the non-queue interrupts
+ * @pf: pointer to private device data structure
+ **/
+static void i40e_enable_misc_int_causes(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 val;
+
+	/* clear things first */
+	wr32(hw, I40E_PFINT_ICR0_ENA, 0);  /* disable all */
+	rd32(hw, I40E_PFINT_ICR0);         /* read to clear */
+
+	val = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK       |
+	      I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK    |
+	      I40E_PFINT_ICR0_ENA_GRST_MASK          |
+	      I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK |
+	      I40E_PFINT_ICR0_ENA_GPIO_MASK          |
+	      I40E_PFINT_ICR0_ENA_HMC_ERR_MASK       |
+	      I40E_PFINT_ICR0_ENA_VFLR_MASK          |
+	      I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+		val |= I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
+
+	if (pf->flags & I40E_FLAG_PTP)
+		val |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
+
+	wr32(hw, I40E_PFINT_ICR0_ENA, val);
+
+	/* SW_ITR_IDX = 0, but don't change INTENA */
+	wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK |
+					I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK);
+
+	/* OTHER_ITR_IDX = 0 */
+	wr32(hw, I40E_PFINT_STAT_CTL0, 0);
+}
+
+/**
+ * i40e_configure_msi_and_legacy - Legacy mode interrupt config in the HW
+ * @vsi: the VSI being configured
+ **/
+static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
+{
+	u32 nextqp = i40e_enabled_xdp_vsi(vsi) ? vsi->alloc_queue_pairs : 0;
+	struct i40e_q_vector *q_vector = vsi->q_vectors[0];
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	/* set the ITR configuration */
+	q_vector->rx.next_update = jiffies + 1;
+	q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
+	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr >> 1);
+	q_vector->rx.current_itr = q_vector->rx.target_itr;
+	q_vector->tx.next_update = jiffies + 1;
+	q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
+	wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr >> 1);
+	q_vector->tx.current_itr = q_vector->tx.target_itr;
+
+	i40e_enable_misc_int_causes(pf);
+
+	/* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */
+	wr32(hw, I40E_PFINT_LNKLST0, 0);
+
+	/* Associate the queue pair to the vector and enable the queue
+	 * interrupt RX queue in linked list with next queue set to TX
+	 */
+	wr32(hw, I40E_QINT_RQCTL(0), I40E_QINT_RQCTL_VAL(nextqp, 0, TX));
+
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		/* TX queue in linked list with next queue set to TX */
+		wr32(hw, I40E_QINT_TQCTL(nextqp),
+		     I40E_QINT_TQCTL_VAL(nextqp, 0, TX));
+	}
+
+	/* last TX queue so the next RX queue doesn't matter */
+	wr32(hw, I40E_QINT_TQCTL(0),
+	     I40E_QINT_TQCTL_VAL(I40E_QUEUE_END_OF_LIST, 0, RX));
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_irq_dynamic_disable_icr0 - Disable default interrupt generation for icr0
+ * @pf: board private structure
+ **/
+void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+
+	wr32(hw, I40E_PFINT_DYN_CTL0,
+	     I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
+ * @pf: board private structure
+ **/
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 val;
+
+	val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
+	      I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
+	      (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
+
+	wr32(hw, I40E_PFINT_DYN_CTL0, val);
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ **/
+static irqreturn_t i40e_msix_clean_rings(int irq, void *data)
+{
+	struct i40e_q_vector *q_vector = data;
+
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
+		return IRQ_HANDLED;
+
+	napi_schedule_irqoff(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * i40e_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ **/
+static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct i40e_q_vector *q_vector =
+		container_of(notify, struct i40e_q_vector, affinity_notify);
+
+	cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * i40e_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ **/
+static void i40e_irq_affinity_release(struct kref *ref) {}
+
+/**
+ * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ *
+ * Allocates MSI-X vectors and requests interrupts from the kernel.
+ **/
+static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
+{
+	int q_vectors = vsi->num_q_vectors;
+	struct i40e_pf *pf = vsi->back;
+	int base = vsi->base_vector;
+	int rx_int_idx = 0;
+	int tx_int_idx = 0;
+	int vector, err;
+	int irq_num;
+	int cpu;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
+
+		irq_num = pf->msix_entries[base + vector].vector;
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
+			tx_int_idx++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "rx", rx_int_idx++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "tx", tx_int_idx++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = request_irq(irq_num,
+				  vsi->irq_handler,
+				  0,
+				  q_vector->name,
+				  q_vector);
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "MSIX request_irq failed, error: %d\n", err);
+			goto free_queue_irqs;
+		}
+
+		/* register for affinity change notifications */
+		q_vector->irq_num = irq_num;
+		q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
+		q_vector->affinity_notify.release = i40e_irq_affinity_release;
+		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
+		/* Spread affinity hints out across online CPUs.
+		 *
+		 * get_cpu_mask returns a static constant mask with
+		 * a permanent lifetime so it's ok to pass to
+		 * irq_update_affinity_hint without making a copy.
+		 */
+		cpu = cpumask_local_spread(q_vector->v_idx, -1);
+		irq_update_affinity_hint(irq_num, get_cpu_mask(cpu));
+	}
+
+	vsi->irqs_ready = true;
+	return 0;
+
+free_queue_irqs:
+	while (vector) {
+		vector--;
+		irq_num = pf->msix_entries[base + vector].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_update_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &vsi->q_vectors[vector]);
+	}
+	return err;
+}
+
+/**
+ * i40e_vsi_disable_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ **/
+static void i40e_vsi_disable_irq(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int base = vsi->base_vector;
+	int i;
+
+	/* disable interrupt causation from each queue */
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		u32 val;
+
+		val = rd32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx));
+		val &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+		wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val);
+
+		val = rd32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx));
+		val &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+		wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), val);
+
+		if (!i40e_enabled_xdp_vsi(vsi))
+			continue;
+		wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), 0);
+	}
+
+	/* disable each interrupt */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		for (i = vsi->base_vector;
+		     i < (vsi->num_q_vectors + vsi->base_vector); i++)
+			wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0);
+
+		i40e_flush(hw);
+		for (i = 0; i < vsi->num_q_vectors; i++)
+			synchronize_irq(pf->msix_entries[i + base].vector);
+	} else {
+		/* Legacy and MSI mode - this stops all interrupt handling */
+		wr32(hw, I40E_PFINT_ICR0_ENA, 0);
+		wr32(hw, I40E_PFINT_DYN_CTL0, 0);
+		i40e_flush(hw);
+		synchronize_irq(pf->pdev->irq);
+	}
+}
+
+/**
+ * i40e_vsi_enable_irq - Enable IRQ for the given VSI
+ * @vsi: the VSI being configured
+ **/
+static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int i;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		for (i = 0; i < vsi->num_q_vectors; i++)
+			i40e_irq_dynamic_enable(vsi, i);
+	} else {
+		i40e_irq_dynamic_enable_icr0(pf);
+	}
+
+	i40e_flush(&pf->hw);
+	return 0;
+}
+
+/**
+ * i40e_free_misc_vector - Free the vector that handles non-queue events
+ * @pf: board private structure
+ **/
+static void i40e_free_misc_vector(struct i40e_pf *pf)
+{
+	/* Disable ICR 0 */
+	wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
+	i40e_flush(&pf->hw);
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+		free_irq(pf->msix_entries[0].vector, pf);
+		clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
+	}
+}
+
+/**
+ * i40e_intr - MSI/Legacy and non-queue interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ *
+ * This is the handler used for all MSI/Legacy interrupts, and deals
+ * with both queue and non-queue interrupts.  This is also used in
+ * MSIX mode to handle the non-queue interrupts.
+ **/
+static irqreturn_t i40e_intr(int irq, void *data)
+{
+	struct i40e_pf *pf = (struct i40e_pf *)data;
+	struct i40e_hw *hw = &pf->hw;
+	irqreturn_t ret = IRQ_NONE;
+	u32 icr0, icr0_remaining;
+	u32 val, ena_mask;
+
+	icr0 = rd32(hw, I40E_PFINT_ICR0);
+	ena_mask = rd32(hw, I40E_PFINT_ICR0_ENA);
+
+	/* if sharing a legacy IRQ, we might get called w/o an intr pending */
+	if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0)
+		goto enable_intr;
+
+	/* if interrupt but no bits showing, must be SWINT */
+	if (((icr0 & ~I40E_PFINT_ICR0_INTEVENT_MASK) == 0) ||
+	    (icr0 & I40E_PFINT_ICR0_SWINT_MASK))
+		pf->sw_int_count++;
+
+	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+	    (icr0 & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) {
+		ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
+		dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n");
+		set_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
+	}
+
+	/* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
+	if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) {
+		struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+		struct i40e_q_vector *q_vector = vsi->q_vectors[0];
+
+		/* We do not have a way to disarm Queue causes while leaving
+		 * interrupt enabled for all other causes, ideally
+		 * interrupt should be disabled while we are in NAPI but
+		 * this is not a performance path and napi_schedule()
+		 * can deal with rescheduling.
+		 */
+		if (!test_bit(__I40E_DOWN, pf->state))
+			napi_schedule_irqoff(&q_vector->napi);
+	}
+
+	if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
+		ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+		set_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state);
+		i40e_debug(&pf->hw, I40E_DEBUG_NVM, "AdminQ event\n");
+	}
+
+	if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) {
+		ena_mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
+		set_bit(__I40E_MDD_EVENT_PENDING, pf->state);
+	}
+
+	if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
+		/* disable any further VFLR event notifications */
+		if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) {
+			u32 reg = rd32(hw, I40E_PFINT_ICR0_ENA);
+
+			reg &= ~I40E_PFINT_ICR0_VFLR_MASK;
+			wr32(hw, I40E_PFINT_ICR0_ENA, reg);
+		} else {
+			ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
+			set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
+		}
+	}
+
+	if (icr0 & I40E_PFINT_ICR0_GRST_MASK) {
+		if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+			set_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
+		ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK;
+		val = rd32(hw, I40E_GLGEN_RSTAT);
+		val = (val & I40E_GLGEN_RSTAT_RESET_TYPE_MASK)
+		       >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT;
+		if (val == I40E_RESET_CORER) {
+			pf->corer_count++;
+		} else if (val == I40E_RESET_GLOBR) {
+			pf->globr_count++;
+		} else if (val == I40E_RESET_EMPR) {
+			pf->empr_count++;
+			set_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state);
+		}
+	}
+
+	if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) {
+		icr0 &= ~I40E_PFINT_ICR0_HMC_ERR_MASK;
+		dev_info(&pf->pdev->dev, "HMC error interrupt\n");
+		dev_info(&pf->pdev->dev, "HMC error info 0x%x, HMC error data 0x%x\n",
+			 rd32(hw, I40E_PFHMC_ERRORINFO),
+			 rd32(hw, I40E_PFHMC_ERRORDATA));
+	}
+
+	if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) {
+		u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0);
+
+		if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK)
+			schedule_work(&pf->ptp_extts0_work);
+
+		if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK)
+			i40e_ptp_tx_hwtstamp(pf);
+
+		icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
+	}
+
+	/* If a critical error is pending we have no choice but to reset the
+	 * device.
+	 * Report and mask out any remaining unexpected interrupts.
+	 */
+	icr0_remaining = icr0 & ena_mask;
+	if (icr0_remaining) {
+		dev_info(&pf->pdev->dev, "unhandled interrupt icr0=0x%08x\n",
+			 icr0_remaining);
+		if ((icr0_remaining & I40E_PFINT_ICR0_PE_CRITERR_MASK) ||
+		    (icr0_remaining & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) ||
+		    (icr0_remaining & I40E_PFINT_ICR0_ECC_ERR_MASK)) {
+			dev_info(&pf->pdev->dev, "device will be reset\n");
+			set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+			i40e_service_event_schedule(pf);
+		}
+		ena_mask &= ~icr0_remaining;
+	}
+	ret = IRQ_HANDLED;
+
+enable_intr:
+	/* re-enable interrupt causes */
+	wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
+	if (!test_bit(__I40E_DOWN, pf->state) ||
+	    test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+		i40e_service_event_schedule(pf);
+		i40e_irq_dynamic_enable_icr0(pf);
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_clean_fdir_tx_irq - Reclaim resources after transmit completes
+ * @tx_ring:  tx ring to clean
+ * @budget:   how many cleans we're allowed
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ **/
+static bool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget)
+{
+	struct i40e_vsi *vsi = tx_ring->vsi;
+	u16 i = tx_ring->next_to_clean;
+	struct i40e_tx_buffer *tx_buf;
+	struct i40e_tx_desc *tx_desc;
+
+	tx_buf = &tx_ring->tx_bi[i];
+	tx_desc = I40E_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if the descriptor isn't done, no work yet to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		tx_desc->buffer_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+		/* move past filter desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_bi;
+			tx_desc = I40E_TX_DESC(tx_ring, 0);
+		}
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+		if (tx_buf->tx_flags & I40E_TX_FLAGS_FD_SB)
+			kfree(tx_buf->raw_buf);
+
+		tx_buf->raw_buf = NULL;
+		tx_buf->tx_flags = 0;
+		tx_buf->next_to_watch = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+		tx_desc->buffer_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+
+		/* move us past the eop_desc for start of next FD desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_bi;
+			tx_desc = I40E_TX_DESC(tx_ring, 0);
+		}
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+
+	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED)
+		i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
+
+	return budget > 0;
+}
+
+/**
+ * i40e_fdir_clean_ring - Interrupt Handler for FDIR SB ring
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ **/
+static irqreturn_t i40e_fdir_clean_ring(int irq, void *data)
+{
+	struct i40e_q_vector *q_vector = data;
+	struct i40e_vsi *vsi;
+
+	if (!q_vector->tx.ring)
+		return IRQ_HANDLED;
+
+	vsi = q_vector->tx.ring->vsi;
+	i40e_clean_fdir_tx_irq(q_vector->tx.ring, vsi->work_limit);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * i40e_map_vector_to_qp - Assigns the queue pair to the vector
+ * @vsi: the VSI being configured
+ * @v_idx: vector index
+ * @qp_idx: queue pair index
+ **/
+static void i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx)
+{
+	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
+	struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx];
+	struct i40e_ring *rx_ring = vsi->rx_rings[qp_idx];
+
+	tx_ring->q_vector = q_vector;
+	tx_ring->next = q_vector->tx.ring;
+	q_vector->tx.ring = tx_ring;
+	q_vector->tx.count++;
+
+	/* Place XDP Tx ring in the same q_vector ring list as regular Tx */
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		struct i40e_ring *xdp_ring = vsi->xdp_rings[qp_idx];
+
+		xdp_ring->q_vector = q_vector;
+		xdp_ring->next = q_vector->tx.ring;
+		q_vector->tx.ring = xdp_ring;
+		q_vector->tx.count++;
+	}
+
+	rx_ring->q_vector = q_vector;
+	rx_ring->next = q_vector->rx.ring;
+	q_vector->rx.ring = rx_ring;
+	q_vector->rx.count++;
+}
+
+/**
+ * i40e_vsi_map_rings_to_vectors - Maps descriptor rings to vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors
+ * we were allotted through the MSI-X enabling code.  Ideally, we'd have
+ * one vector per queue pair, but on a constrained vector budget, we
+ * group the queue pairs as "efficiently" as possible.
+ **/
+static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
+{
+	int qp_remaining = vsi->num_queue_pairs;
+	int q_vectors = vsi->num_q_vectors;
+	int num_ringpairs;
+	int v_start = 0;
+	int qp_idx = 0;
+
+	/* If we don't have enough vectors for a 1-to-1 mapping, we'll have to
+	 * group them so there are multiple queues per vector.
+	 * It is also important to go through all the vectors available to be
+	 * sure that if we don't use all the vectors, that the remaining vectors
+	 * are cleared. This is especially important when decreasing the
+	 * number of queues in use.
+	 */
+	for (; v_start < q_vectors; v_start++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[v_start];
+
+		num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
+
+		q_vector->num_ringpairs = num_ringpairs;
+		q_vector->reg_idx = q_vector->v_idx + vsi->base_vector - 1;
+
+		q_vector->rx.count = 0;
+		q_vector->tx.count = 0;
+		q_vector->rx.ring = NULL;
+		q_vector->tx.ring = NULL;
+
+		while (num_ringpairs--) {
+			i40e_map_vector_to_qp(vsi, v_start, qp_idx);
+			qp_idx++;
+			qp_remaining--;
+		}
+	}
+}
+
+/**
+ * i40e_vsi_request_irq - Request IRQ from the OS
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ **/
+static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
+{
+	struct i40e_pf *pf = vsi->back;
+	int err;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		err = i40e_vsi_request_irq_msix(vsi, basename);
+	else if (pf->flags & I40E_FLAG_MSI_ENABLED)
+		err = request_irq(pf->pdev->irq, i40e_intr, 0,
+				  pf->int_name, pf);
+	else
+		err = request_irq(pf->pdev->irq, i40e_intr, IRQF_SHARED,
+				  pf->int_name, pf);
+
+	if (err)
+		dev_info(&pf->pdev->dev, "request_irq failed, Error %d\n", err);
+
+	return err;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/**
+ * i40e_netpoll - A Polling 'interrupt' handler
+ * @netdev: network interface device structure
+ *
+ * This is used by netconsole to send skbs without having to re-enable
+ * interrupts.  It's not called while the normal interrupt routine is executing.
+ **/
+static void i40e_netpoll(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int i;
+
+	/* if interface is down do nothing */
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		for (i = 0; i < vsi->num_q_vectors; i++)
+			i40e_msix_clean_rings(0, vsi->q_vectors[i]);
+	} else {
+		i40e_intr(pf->pdev->irq, netdev);
+	}
+}
+#endif
+
+#define I40E_QTX_ENA_WAIT_COUNT 50
+
+/**
+ * i40e_pf_txq_wait - Wait for a PF's Tx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @enable: enable or disable state of the queue
+ *
+ * This routine will wait for the given Tx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ **/
+static int i40e_pf_txq_wait(struct i40e_pf *pf, int pf_q, bool enable)
+{
+	int i;
+	u32 tx_reg;
+
+	for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
+		tx_reg = rd32(&pf->hw, I40E_QTX_ENA(pf_q));
+		if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
+			break;
+
+		usleep_range(10, 20);
+	}
+	if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/**
+ * i40e_control_tx_q - Start or stop a particular Tx queue
+ * @pf: the PF structure
+ * @pf_q: the PF queue to configure
+ * @enable: start or stop the queue
+ *
+ * This function enables or disables a single queue. Note that any delay
+ * required after the operation is expected to be handled by the caller of
+ * this function.
+ **/
+static void i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 tx_reg;
+	int i;
+
+	/* warn the TX unit of coming changes */
+	i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable);
+	if (!enable)
+		usleep_range(10, 20);
+
+	for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
+		tx_reg = rd32(hw, I40E_QTX_ENA(pf_q));
+		if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) ==
+		    ((tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT) & 1))
+			break;
+		usleep_range(1000, 2000);
+	}
+
+	/* Skip if the queue is already in the requested state */
+	if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
+		return;
+
+	/* turn on/off the queue */
+	if (enable) {
+		wr32(hw, I40E_QTX_HEAD(pf_q), 0);
+		tx_reg |= I40E_QTX_ENA_QENA_REQ_MASK;
+	} else {
+		tx_reg &= ~I40E_QTX_ENA_QENA_REQ_MASK;
+	}
+
+	wr32(hw, I40E_QTX_ENA(pf_q), tx_reg);
+}
+
+/**
+ * i40e_control_wait_tx_q - Start/stop Tx queue and wait for completion
+ * @seid: VSI SEID
+ * @pf: the PF structure
+ * @pf_q: the PF queue to configure
+ * @is_xdp: true if the queue is used for XDP
+ * @enable: start or stop the queue
+ **/
+int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q,
+			   bool is_xdp, bool enable)
+{
+	int ret;
+
+	i40e_control_tx_q(pf, pf_q, enable);
+
+	/* wait for the change to finish */
+	ret = i40e_pf_txq_wait(pf, pf_q, enable);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VSI seid %d %sTx ring %d %sable timeout\n",
+			 seid, (is_xdp ? "XDP " : ""), pf_q,
+			 (enable ? "en" : "dis"));
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_vsi_enable_tx - Start a VSI's rings
+ * @vsi: the VSI being configured
+ **/
+static int i40e_vsi_enable_tx(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int i, pf_q, ret = 0;
+
+	pf_q = vsi->base_queue;
+	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+		ret = i40e_control_wait_tx_q(vsi->seid, pf,
+					     pf_q,
+					     false /*is xdp*/, true);
+		if (ret)
+			break;
+
+		if (!i40e_enabled_xdp_vsi(vsi))
+			continue;
+
+		ret = i40e_control_wait_tx_q(vsi->seid, pf,
+					     pf_q + vsi->alloc_queue_pairs,
+					     true /*is xdp*/, true);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/**
+ * i40e_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @enable: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ **/
+static int i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable)
+{
+	int i;
+	u32 rx_reg;
+
+	for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
+		rx_reg = rd32(&pf->hw, I40E_QRX_ENA(pf_q));
+		if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+			break;
+
+		usleep_range(10, 20);
+	}
+	if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/**
+ * i40e_control_rx_q - Start or stop a particular Rx queue
+ * @pf: the PF structure
+ * @pf_q: the PF queue to configure
+ * @enable: start or stop the queue
+ *
+ * This function enables or disables a single queue. Note that
+ * any delay required after the operation is expected to be
+ * handled by the caller of this function.
+ **/
+static void i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 rx_reg;
+	int i;
+
+	for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
+		rx_reg = rd32(hw, I40E_QRX_ENA(pf_q));
+		if (((rx_reg >> I40E_QRX_ENA_QENA_REQ_SHIFT) & 1) ==
+		    ((rx_reg >> I40E_QRX_ENA_QENA_STAT_SHIFT) & 1))
+			break;
+		usleep_range(1000, 2000);
+	}
+
+	/* Skip if the queue is already in the requested state */
+	if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+		return;
+
+	/* turn on/off the queue */
+	if (enable)
+		rx_reg |= I40E_QRX_ENA_QENA_REQ_MASK;
+	else
+		rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
+
+	wr32(hw, I40E_QRX_ENA(pf_q), rx_reg);
+}
+
+/**
+ * i40e_control_wait_rx_q
+ * @pf: the PF structure
+ * @pf_q: queue being configured
+ * @enable: start or stop the rings
+ *
+ * This function enables or disables a single queue along with waiting
+ * for the change to finish. The caller of this function should handle
+ * the delays needed in the case of disabling queues.
+ **/
+int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
+{
+	int ret = 0;
+
+	i40e_control_rx_q(pf, pf_q, enable);
+
+	/* wait for the change to finish */
+	ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+/**
+ * i40e_vsi_enable_rx - Start a VSI's rings
+ * @vsi: the VSI being configured
+ **/
+static int i40e_vsi_enable_rx(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int i, pf_q, ret = 0;
+
+	pf_q = vsi->base_queue;
+	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+		ret = i40e_control_wait_rx_q(pf, pf_q, true);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "VSI seid %d Rx ring %d enable timeout\n",
+				 vsi->seid, pf_q);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_vsi_start_rings - Start a VSI's rings
+ * @vsi: the VSI being configured
+ **/
+int i40e_vsi_start_rings(struct i40e_vsi *vsi)
+{
+	int ret = 0;
+
+	/* do rx first for enable and last for disable */
+	ret = i40e_vsi_enable_rx(vsi);
+	if (ret)
+		return ret;
+	ret = i40e_vsi_enable_tx(vsi);
+
+	return ret;
+}
+
+#define I40E_DISABLE_TX_GAP_MSEC	50
+
+/**
+ * i40e_vsi_stop_rings - Stop a VSI's rings
+ * @vsi: the VSI being configured
+ **/
+void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int pf_q, err, q_end;
+
+	/* When port TX is suspended, don't wait */
+	if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state))
+		return i40e_vsi_stop_rings_no_wait(vsi);
+
+	q_end = vsi->base_queue + vsi->num_queue_pairs;
+	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+		i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false);
+
+	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) {
+		err = i40e_control_wait_rx_q(pf, pf_q, false);
+		if (err)
+			dev_info(&pf->pdev->dev,
+				 "VSI seid %d Rx ring %d disable timeout\n",
+				 vsi->seid, pf_q);
+	}
+
+	msleep(I40E_DISABLE_TX_GAP_MSEC);
+	pf_q = vsi->base_queue;
+	for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++)
+		wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0);
+
+	i40e_vsi_wait_queues_disabled(vsi);
+}
+
+/**
+ * i40e_vsi_stop_rings_no_wait - Stop a VSI's rings and do not delay
+ * @vsi: the VSI being shutdown
+ *
+ * This function stops all the rings for a VSI but does not delay to verify
+ * that rings have been disabled. It is expected that the caller is shutting
+ * down multiple VSIs at once and will delay together for all the VSIs after
+ * initiating the shutdown. This is particularly useful for shutting down lots
+ * of VFs together. Otherwise, a large delay can be incurred while configuring
+ * each VSI in serial.
+ **/
+void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int i, pf_q;
+
+	pf_q = vsi->base_queue;
+	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+		i40e_control_tx_q(pf, pf_q, false);
+		i40e_control_rx_q(pf, pf_q, false);
+	}
+}
+
+/**
+ * i40e_vsi_free_irq - Free the irq association with the OS
+ * @vsi: the VSI being configured
+ **/
+static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int base = vsi->base_vector;
+	u32 val, qp;
+	int i;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		if (!vsi->q_vectors)
+			return;
+
+		if (!vsi->irqs_ready)
+			return;
+
+		vsi->irqs_ready = false;
+		for (i = 0; i < vsi->num_q_vectors; i++) {
+			int irq_num;
+			u16 vector;
+
+			vector = i + base;
+			irq_num = pf->msix_entries[vector].vector;
+
+			/* free only the irqs that were actually requested */
+			if (!vsi->q_vectors[i] ||
+			    !vsi->q_vectors[i]->num_ringpairs)
+				continue;
+
+			/* clear the affinity notifier in the IRQ descriptor */
+			irq_set_affinity_notifier(irq_num, NULL);
+			/* remove our suggested affinity mask for this IRQ */
+			irq_update_affinity_hint(irq_num, NULL);
+			free_irq(irq_num, vsi->q_vectors[i]);
+
+			/* Tear down the interrupt queue link list
+			 *
+			 * We know that they come in pairs and always
+			 * the Rx first, then the Tx.  To clear the
+			 * link list, stick the EOL value into the
+			 * next_q field of the registers.
+			 */
+			val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1));
+			qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
+				>> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
+			val |= I40E_QUEUE_END_OF_LIST
+				<< I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
+			wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val);
+
+			while (qp != I40E_QUEUE_END_OF_LIST) {
+				u32 next;
+
+				val = rd32(hw, I40E_QINT_RQCTL(qp));
+
+				val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
+					 I40E_QINT_RQCTL_MSIX0_INDX_MASK |
+					 I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
+					 I40E_QINT_RQCTL_INTEVENT_MASK);
+
+				val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
+					 I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
+
+				wr32(hw, I40E_QINT_RQCTL(qp), val);
+
+				val = rd32(hw, I40E_QINT_TQCTL(qp));
+
+				next = (val & I40E_QINT_TQCTL_NEXTQ_INDX_MASK)
+					>> I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT;
+
+				val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
+					 I40E_QINT_TQCTL_MSIX0_INDX_MASK |
+					 I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
+					 I40E_QINT_TQCTL_INTEVENT_MASK);
+
+				val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
+					 I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
+
+				wr32(hw, I40E_QINT_TQCTL(qp), val);
+				qp = next;
+			}
+		}
+	} else {
+		free_irq(pf->pdev->irq, pf);
+
+		val = rd32(hw, I40E_PFINT_LNKLST0);
+		qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
+			>> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
+		val |= I40E_QUEUE_END_OF_LIST
+			<< I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT;
+		wr32(hw, I40E_PFINT_LNKLST0, val);
+
+		val = rd32(hw, I40E_QINT_RQCTL(qp));
+		val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
+			 I40E_QINT_RQCTL_MSIX0_INDX_MASK |
+			 I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
+			 I40E_QINT_RQCTL_INTEVENT_MASK);
+
+		val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
+			I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
+
+		wr32(hw, I40E_QINT_RQCTL(qp), val);
+
+		val = rd32(hw, I40E_QINT_TQCTL(qp));
+
+		val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
+			 I40E_QINT_TQCTL_MSIX0_INDX_MASK |
+			 I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
+			 I40E_QINT_TQCTL_INTEVENT_MASK);
+
+		val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
+			I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
+
+		wr32(hw, I40E_QINT_TQCTL(qp), val);
+	}
+}
+
+/**
+ * i40e_free_q_vector - Free memory allocated for specific interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx)
+{
+	struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx];
+	struct i40e_ring *ring;
+
+	if (!q_vector)
+		return;
+
+	/* disassociate q_vector from rings */
+	i40e_for_each_ring(ring, q_vector->tx)
+		ring->q_vector = NULL;
+
+	i40e_for_each_ring(ring, q_vector->rx)
+		ring->q_vector = NULL;
+
+	/* only VSI w/ an associated netdev is set up w/ NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	vsi->q_vectors[v_idx] = NULL;
+
+	kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI being un-configured
+ *
+ * This frees the memory allocated to the q_vectors and
+ * deletes references to the NAPI struct.
+ **/
+static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
+{
+	int v_idx;
+
+	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++)
+		i40e_free_q_vector(vsi, v_idx);
+}
+
+/**
+ * i40e_reset_interrupt_capability - Disable interrupt setup in OS
+ * @pf: board private structure
+ **/
+static void i40e_reset_interrupt_capability(struct i40e_pf *pf)
+{
+	/* If we're in Legacy mode, the interrupt was cleaned in vsi_close */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		pci_disable_msix(pf->pdev);
+		kfree(pf->msix_entries);
+		pf->msix_entries = NULL;
+		kfree(pf->irq_pile);
+		pf->irq_pile = NULL;
+	} else if (pf->flags & I40E_FLAG_MSI_ENABLED) {
+		pci_disable_msi(pf->pdev);
+	}
+	pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+}
+
+/**
+ * i40e_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @pf: board private structure
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
+{
+	int i;
+
+	if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state))
+		i40e_free_misc_vector(pf);
+
+	i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
+		      I40E_IWARP_IRQ_PILE_ID);
+
+	i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
+	for (i = 0; i < pf->num_alloc_vsi; i++)
+		if (pf->vsi[i])
+			i40e_vsi_free_q_vectors(pf->vsi[i]);
+	i40e_reset_interrupt_capability(pf);
+}
+
+/**
+ * i40e_napi_enable_all - Enable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ **/
+static void i40e_napi_enable_all(struct i40e_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_enable(&q_vector->napi);
+	}
+}
+
+/**
+ * i40e_napi_disable_all - Disable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ **/
+static void i40e_napi_disable_all(struct i40e_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) {
+		struct i40e_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.ring || q_vector->tx.ring)
+			napi_disable(&q_vector->napi);
+	}
+}
+
+/**
+ * i40e_vsi_close - Shut down a VSI
+ * @vsi: the vsi to be quelled
+ **/
+static void i40e_vsi_close(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	if (!test_and_set_bit(__I40E_VSI_DOWN, vsi->state))
+		i40e_down(vsi);
+	i40e_vsi_free_irq(vsi);
+	i40e_vsi_free_tx_resources(vsi);
+	i40e_vsi_free_rx_resources(vsi);
+	vsi->current_netdev_flags = 0;
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+		set_bit(__I40E_CLIENT_RESET, pf->state);
+}
+
+/**
+ * i40e_quiesce_vsi - Pause a given VSI
+ * @vsi: the VSI being paused
+ **/
+static void i40e_quiesce_vsi(struct i40e_vsi *vsi)
+{
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return;
+
+	set_bit(__I40E_VSI_NEEDS_RESTART, vsi->state);
+	if (vsi->netdev && netif_running(vsi->netdev))
+		vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+	else
+		i40e_vsi_close(vsi);
+}
+
+/**
+ * i40e_unquiesce_vsi - Resume a given VSI
+ * @vsi: the VSI being resumed
+ **/
+static void i40e_unquiesce_vsi(struct i40e_vsi *vsi)
+{
+	if (!test_and_clear_bit(__I40E_VSI_NEEDS_RESTART, vsi->state))
+		return;
+
+	if (vsi->netdev && netif_running(vsi->netdev))
+		vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
+	else
+		i40e_vsi_open(vsi);   /* this clears the DOWN bit */
+}
+
+/**
+ * i40e_pf_quiesce_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ **/
+static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf)
+{
+	int v;
+
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
+		if (pf->vsi[v])
+			i40e_quiesce_vsi(pf->vsi[v]);
+	}
+}
+
+/**
+ * i40e_pf_unquiesce_all_vsi - Resume all VSIs on a PF
+ * @pf: the PF
+ **/
+static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
+{
+	int v;
+
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
+		if (pf->vsi[v])
+			i40e_unquiesce_vsi(pf->vsi[v]);
+	}
+}
+
+/**
+ * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled
+ * @vsi: the VSI being configured
+ *
+ * Wait until all queues on a given VSI have been disabled.
+ **/
+int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int i, pf_q, ret;
+
+	pf_q = vsi->base_queue;
+	for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+		/* Check and wait for the Tx queue */
+		ret = i40e_pf_txq_wait(pf, pf_q, false);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "VSI seid %d Tx ring %d disable timeout\n",
+				 vsi->seid, pf_q);
+			return ret;
+		}
+
+		if (!i40e_enabled_xdp_vsi(vsi))
+			goto wait_rx;
+
+		/* Check and wait for the XDP Tx queue */
+		ret = i40e_pf_txq_wait(pf, pf_q + vsi->alloc_queue_pairs,
+				       false);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "VSI seid %d XDP Tx ring %d disable timeout\n",
+				 vsi->seid, pf_q);
+			return ret;
+		}
+wait_rx:
+		/* Check and wait for the Rx queue */
+		ret = i40e_pf_rxq_wait(pf, pf_q, false);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "VSI seid %d Rx ring %d disable timeout\n",
+				 vsi->seid, pf_q);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_I40E_DCB
+/**
+ * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled
+ * @pf: the PF
+ *
+ * This function waits for the queues to be in disabled state for all the
+ * VSIs that are managed by this PF.
+ **/
+static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
+{
+	int v, ret = 0;
+
+	for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+		if (pf->vsi[v]) {
+			ret = i40e_vsi_wait_queues_disabled(pf->vsi[v]);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+#endif
+
+/**
+ * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP
+ * @pf: pointer to PF
+ *
+ * Get TC map for ISCSI PF type that will include iSCSI TC
+ * and LAN TC.
+ **/
+static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
+{
+	struct i40e_dcb_app_priority_table app;
+	struct i40e_hw *hw = &pf->hw;
+	u8 enabled_tc = 1; /* TC0 is always enabled */
+	u8 tc, i;
+	/* Get the iSCSI APP TLV */
+	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
+
+	for (i = 0; i < dcbcfg->numapps; i++) {
+		app = dcbcfg->app[i];
+		if (app.selector == I40E_APP_SEL_TCPIP &&
+		    app.protocolid == I40E_APP_PROTOID_ISCSI) {
+			tc = dcbcfg->etscfg.prioritytable[app.priority];
+			enabled_tc |= BIT(tc);
+			break;
+		}
+	}
+
+	return enabled_tc;
+}
+
+/**
+ * i40e_dcb_get_num_tc -  Get the number of TCs from DCBx config
+ * @dcbcfg: the corresponding DCBx configuration structure
+ *
+ * Return the number of TCs from given DCBx configuration
+ **/
+static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
+{
+	int i, tc_unused = 0;
+	u8 num_tc = 0;
+	u8 ret = 0;
+
+	/* Scan the ETS Config Priority Table to find
+	 * traffic class enabled for a given priority
+	 * and create a bitmask of enabled TCs
+	 */
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+		num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
+
+	/* Now scan the bitmask to check for
+	 * contiguous TCs starting with TC0
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (num_tc & BIT(i)) {
+			if (!tc_unused) {
+				ret++;
+			} else {
+				pr_err("Non-contiguous TC - Disabling DCB\n");
+				return 1;
+			}
+		} else {
+			tc_unused = 1;
+		}
+	}
+
+	/* There is always at least TC0 */
+	if (!ret)
+		ret = 1;
+
+	return ret;
+}
+
+/**
+ * i40e_dcb_get_enabled_tc - Get enabled traffic classes
+ * @dcbcfg: the corresponding DCBx configuration structure
+ *
+ * Query the current DCB configuration and return the number of
+ * traffic classes enabled from the given DCBX config
+ **/
+static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
+{
+	u8 num_tc = i40e_dcb_get_num_tc(dcbcfg);
+	u8 enabled_tc = 1;
+	u8 i;
+
+	for (i = 0; i < num_tc; i++)
+		enabled_tc |= BIT(i);
+
+	return enabled_tc;
+}
+
+/**
+ * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
+ * @pf: PF being queried
+ *
+ * Query the current MQPRIO configuration and return the number of
+ * traffic classes enabled.
+ **/
+static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
+	u8 enabled_tc = 1, i;
+
+	for (i = 1; i < num_tc; i++)
+		enabled_tc |= BIT(i);
+	return enabled_tc;
+}
+
+/**
+ * i40e_pf_get_num_tc - Get enabled traffic classes for PF
+ * @pf: PF being queried
+ *
+ * Return number of traffic classes enabled for the given PF
+ **/
+static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u8 i, enabled_tc = 1;
+	u8 num_tc = 0;
+	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
+
+	if (i40e_is_tc_mqprio_enabled(pf))
+		return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+
+	/* If neither MQPRIO nor DCB is enabled, then always use single TC */
+	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+		return 1;
+
+	/* SFP mode will be enabled for all TCs on port */
+	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+		return i40e_dcb_get_num_tc(dcbcfg);
+
+	/* MFP mode return count of enabled TCs for this PF */
+	if (pf->hw.func_caps.iscsi)
+		enabled_tc =  i40e_get_iscsi_tc_map(pf);
+	else
+		return 1; /* Only TC0 */
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (enabled_tc & BIT(i))
+			num_tc++;
+	}
+	return num_tc;
+}
+
+/**
+ * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes
+ * @pf: PF being queried
+ *
+ * Return a bitmap for enabled traffic classes for this PF.
+ **/
+static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
+{
+	if (i40e_is_tc_mqprio_enabled(pf))
+		return i40e_mqprio_get_enabled_tc(pf);
+
+	/* If neither MQPRIO nor DCB is enabled for this PF then just return
+	 * default TC
+	 */
+	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+		return I40E_DEFAULT_TRAFFIC_CLASS;
+
+	/* SFP mode we want PF to be enabled for all TCs */
+	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+		return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config);
+
+	/* MFP enabled and iSCSI PF type */
+	if (pf->hw.func_caps.iscsi)
+		return i40e_get_iscsi_tc_map(pf);
+	else
+		return I40E_DEFAULT_TRAFFIC_CLASS;
+}
+
+/**
+ * i40e_vsi_get_bw_info - Query VSI BW Information
+ * @vsi: the VSI being queried
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
+{
+	struct i40e_aqc_query_vsi_ets_sla_config_resp bw_ets_config = {0};
+	struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u32 tc_bw_max;
+	int ret;
+	int i;
+
+	/* Get the VSI level BW configuration */
+	ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get PF vsi bw config, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EINVAL;
+	}
+
+	/* Get the VSI level BW configuration per TC */
+	ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &bw_ets_config,
+					       NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get PF vsi ets bw config, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EINVAL;
+	}
+
+	if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) {
+		dev_info(&pf->pdev->dev,
+			 "Enabled TCs mismatch from querying VSI BW info 0x%08x 0x%08x\n",
+			 bw_config.tc_valid_bits,
+			 bw_ets_config.tc_valid_bits);
+		/* Still continuing */
+	}
+
+	vsi->bw_limit = le16_to_cpu(bw_config.port_bw_limit);
+	vsi->bw_max_quanta = bw_config.max_bw;
+	tc_bw_max = le16_to_cpu(bw_ets_config.tc_bw_max[0]) |
+		    (le16_to_cpu(bw_ets_config.tc_bw_max[1]) << 16);
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		vsi->bw_ets_share_credits[i] = bw_ets_config.share_credits[i];
+		vsi->bw_ets_limit_credits[i] =
+					le16_to_cpu(bw_ets_config.credits[i]);
+		/* 3 bits out of 4 for each TC */
+		vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC
+ * @vsi: the VSI being configured
+ * @enabled_tc: TC bitmap
+ * @bw_share: BW shared credits per TC
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
+				       u8 *bw_share)
+{
+	struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+	struct i40e_pf *pf = vsi->back;
+	int ret;
+	int i;
+
+	/* There is no need to reset BW when mqprio mode is on.  */
+	if (i40e_is_tc_mqprio_enabled(pf))
+		return 0;
+	if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+		ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+		if (ret)
+			dev_info(&pf->pdev->dev,
+				 "Failed to reset tx rate for vsi->seid %u\n",
+				 vsi->seid);
+		return ret;
+	}
+	memset(&bw_data, 0, sizeof(bw_data));
+	bw_data.tc_valid_bits = enabled_tc;
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		bw_data.tc_bw_credits[i] = bw_share[i];
+
+	ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "AQ command Config VSI BW allocation per TC failed = %d\n",
+			 pf->hw.aq.asq_last_status);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		vsi->info.qs_handle[i] = bw_data.qs_handles[i];
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_config_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @enabled_tc: TC map to be enabled
+ *
+ **/
+static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u8 netdev_tc = 0;
+	int i;
+	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
+
+	if (!netdev)
+		return;
+
+	if (!enabled_tc) {
+		netdev_reset_tc(netdev);
+		return;
+	}
+
+	/* Set up actual enabled TCs on the VSI */
+	if (netdev_set_num_tc(netdev, vsi->tc_config.numtc))
+		return;
+
+	/* set per TC queues for the VSI */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		/* Only set TC queues for enabled tcs
+		 *
+		 * e.g. For a VSI that has TC0 and TC3 enabled the
+		 * enabled_tc bitmap would be 0x00001001; the driver
+		 * will set the numtc for netdev as 2 that will be
+		 * referenced by the netdev layer as TC 0 and 1.
+		 */
+		if (vsi->tc_config.enabled_tc & BIT(i))
+			netdev_set_tc_queue(netdev,
+					vsi->tc_config.tc_info[i].netdev_tc,
+					vsi->tc_config.tc_info[i].qcount,
+					vsi->tc_config.tc_info[i].qoffset);
+	}
+
+	if (i40e_is_tc_mqprio_enabled(pf))
+		return;
+
+	/* Assign UP2TC map for the VSI */
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+		/* Get the actual TC# for the UP */
+		u8 ets_tc = dcbcfg->etscfg.prioritytable[i];
+		/* Get the mapped netdev TC# for the UP */
+		netdev_tc =  vsi->tc_config.tc_info[ets_tc].netdev_tc;
+		netdev_set_prio_tc_map(netdev, i, netdev_tc);
+	}
+}
+
+/**
+ * i40e_vsi_update_queue_map - Update our copy of VSi info with new queue map
+ * @vsi: the VSI being configured
+ * @ctxt: the ctxt buffer returned from AQ VSI update param command
+ **/
+static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
+				      struct i40e_vsi_context *ctxt)
+{
+	/* copy just the sections touched not the entire info
+	 * since not all sections are valid as returned by
+	 * update vsi params
+	 */
+	vsi->info.mapping_flags = ctxt->info.mapping_flags;
+	memcpy(&vsi->info.queue_mapping,
+	       &ctxt->info.queue_mapping, sizeof(vsi->info.queue_mapping));
+	memcpy(&vsi->info.tc_mapping, ctxt->info.tc_mapping,
+	       sizeof(vsi->info.tc_mapping));
+}
+
+/**
+ * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI
+ * @vsi: the VSI being reconfigured
+ * @vsi_offset: offset from main VF VSI
+ */
+int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset)
+{
+	struct i40e_vsi_context ctxt = {};
+	struct i40e_pf *pf;
+	struct i40e_hw *hw;
+	int ret;
+
+	if (!vsi)
+		return -EINVAL;
+	pf = vsi->back;
+	hw = &pf->hw;
+
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = hw->pf_id;
+	ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+	ctxt.flags = I40E_AQ_VSI_TYPE_VF;
+	ctxt.info = vsi->info;
+
+	i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc,
+				 false);
+	if (vsi->reconfig_rss) {
+		vsi->rss_size = min_t(int, pf->alloc_rss_size,
+				      vsi->num_queue_pairs);
+		ret = i40e_vsi_config_rss(vsi);
+		if (ret) {
+			dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n");
+			return ret;
+		}
+		vsi->reconfig_rss = false;
+	}
+
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "Update vsi config failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+	}
+	/* update the local VSI info with updated queue map */
+	i40e_vsi_update_queue_map(vsi, &ctxt);
+	vsi->info.valid_sections = 0;
+
+	return ret;
+}
+
+/**
+ * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map
+ * @vsi: VSI to be configured
+ * @enabled_tc: TC bitmap
+ *
+ * This configures a particular VSI for TCs that are mapped to the
+ * given TC bitmap. It uses default bandwidth share for TCs across
+ * VSIs to configure TC for a particular VSI.
+ *
+ * NOTE:
+ * It is expected that the VSI queues have been quisced before calling
+ * this function.
+ **/
+static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+{
+	u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vsi_context ctxt;
+	int ret = 0;
+	int i;
+
+	/* Check if enabled_tc is same as existing or new TCs */
+	if (vsi->tc_config.enabled_tc == enabled_tc &&
+	    vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
+		return ret;
+
+	/* Enable ETS TCs with equal BW Share for now across all VSIs */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (enabled_tc & BIT(i))
+			bw_share[i] = 1;
+	}
+
+	ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
+	if (ret) {
+		struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
+
+		dev_info(&pf->pdev->dev,
+			 "Failed configuring TC map %d for VSI %d\n",
+			 enabled_tc, vsi->seid);
+		ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid,
+						  &bw_config, NULL);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Failed querying vsi bw info, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+			goto out;
+		}
+		if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) {
+			u8 valid_tc = bw_config.tc_valid_bits & enabled_tc;
+
+			if (!valid_tc)
+				valid_tc = bw_config.tc_valid_bits;
+			/* Always enable TC0, no matter what */
+			valid_tc |= 1;
+			dev_info(&pf->pdev->dev,
+				 "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n",
+				 enabled_tc, bw_config.tc_valid_bits, valid_tc);
+			enabled_tc = valid_tc;
+		}
+
+		ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"Unable to  configure TC map %d for VSI %d\n",
+				enabled_tc, vsi->seid);
+			goto out;
+		}
+	}
+
+	/* Update Queue Pairs Mapping for currently enabled UPs */
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = vsi->back->hw.pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.info = vsi->info;
+	if (i40e_is_tc_mqprio_enabled(pf)) {
+		ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+		if (ret)
+			goto out;
+	} else {
+		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+	}
+
+	/* On destroying the qdisc, reset vsi->rss_size, as number of enabled
+	 * queues changed.
+	 */
+	if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
+		vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
+				      vsi->num_queue_pairs);
+		ret = i40e_vsi_config_rss(vsi);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "Failed to reconfig rss for num_queues\n");
+			return ret;
+		}
+		vsi->reconfig_rss = false;
+	}
+	if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+		ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+		ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
+	}
+
+	/* Update the VSI after updating the VSI queue-mapping
+	 * information
+	 */
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Update vsi tc config failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		goto out;
+	}
+	/* update the local VSI info with updated queue map */
+	i40e_vsi_update_queue_map(vsi, &ctxt);
+	vsi->info.valid_sections = 0;
+
+	/* Update current VSI BW information */
+	ret = i40e_vsi_get_bw_info(vsi);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed updating vsi bw info, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		goto out;
+	}
+
+	/* Update the netdev TC setup */
+	i40e_vsi_config_netdev_tc(vsi, enabled_tc);
+out:
+	return ret;
+}
+
+/**
+ * i40e_get_link_speed - Returns link speed for the interface
+ * @vsi: VSI to be configured
+ *
+ **/
+static int i40e_get_link_speed(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	switch (pf->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		return 40000;
+	case I40E_LINK_SPEED_25GB:
+		return 25000;
+	case I40E_LINK_SPEED_20GB:
+		return 20000;
+	case I40E_LINK_SPEED_10GB:
+		return 10000;
+	case I40E_LINK_SPEED_1GB:
+		return 1000;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits
+ * @vsi: Pointer to vsi structure
+ * @max_tx_rate: max TX rate in bytes to be converted into Mbits
+ *
+ * Helper function to convert units before send to set BW limit
+ **/
+static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate)
+{
+	if (max_tx_rate < I40E_BW_MBPS_DIVISOR) {
+		dev_warn(&vsi->back->pdev->dev,
+			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
+		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
+	} else {
+		do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+	}
+
+	return max_tx_rate;
+}
+
+/**
+ * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @seid: seid of the channel/VSI
+ * @max_tx_rate: max TX rate to be configured as BW limit
+ *
+ * Helper function to set BW limit for a given VSI
+ **/
+int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
+{
+	struct i40e_pf *pf = vsi->back;
+	u64 credits = 0;
+	int speed = 0;
+	int ret = 0;
+
+	speed = i40e_get_link_speed(vsi);
+	if (max_tx_rate > speed) {
+		dev_err(&pf->pdev->dev,
+			"Invalid max tx rate %llu specified for VSI seid %d.",
+			max_tx_rate, seid);
+		return -EINVAL;
+	}
+	if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) {
+		dev_warn(&pf->pdev->dev,
+			 "Setting max tx rate to minimum usable value of 50Mbps.\n");
+		max_tx_rate = I40E_BW_CREDIT_DIVISOR;
+	}
+
+	/* Tx rate credits are in values of 50Mbps, 0 is disabled */
+	credits = max_tx_rate;
+	do_div(credits, I40E_BW_CREDIT_DIVISOR);
+	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
+					  I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+	if (ret)
+		dev_err(&pf->pdev->dev,
+			"Failed set tx rate (%llu Mbps) for vsi->seid %u, err %pe aq_err %s\n",
+			max_tx_rate, seid, ERR_PTR(ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+	return ret;
+}
+
+/**
+ * i40e_remove_queue_channels - Remove queue channels for the TCs
+ * @vsi: VSI to be configured
+ *
+ * Remove queue channels for the TCs
+ **/
+static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+{
+	enum i40e_admin_queue_err last_aq_status;
+	struct i40e_cloud_filter *cfilter;
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_pf *pf = vsi->back;
+	struct hlist_node *node;
+	int ret, i;
+
+	/* Reset rss size that was stored when reconfiguring rss for
+	 * channel VSIs with non-power-of-2 queue count.
+	 */
+	vsi->current_rss_size = 0;
+
+	/* perform cleanup for channels if they exist */
+	if (list_empty(&vsi->ch_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		struct i40e_vsi *p_vsi;
+
+		list_del(&ch->list);
+		p_vsi = ch->parent_vsi;
+		if (!p_vsi || !ch->initialized) {
+			kfree(ch);
+			continue;
+		}
+		/* Reset queue contexts */
+		for (i = 0; i < ch->num_queue_pairs; i++) {
+			struct i40e_ring *tx_ring, *rx_ring;
+			u16 pf_q;
+
+			pf_q = ch->base_queue + i;
+			tx_ring = vsi->tx_rings[pf_q];
+			tx_ring->ch = NULL;
+
+			rx_ring = vsi->rx_rings[pf_q];
+			rx_ring->ch = NULL;
+		}
+
+		/* Reset BW configured for this VSI via mqprio */
+		ret = i40e_set_bw_limit(vsi, ch->seid, 0);
+		if (ret)
+			dev_info(&vsi->back->pdev->dev,
+				 "Failed to reset tx rate for ch->seid %u\n",
+				 ch->seid);
+
+		/* delete cloud filters associated with this channel */
+		hlist_for_each_entry_safe(cfilter, node,
+					  &pf->cloud_filter_list, cloud_node) {
+			if (cfilter->seid != ch->seid)
+				continue;
+
+			hash_del(&cfilter->cloud_node);
+			if (cfilter->dst_port)
+				ret = i40e_add_del_cloud_filter_big_buf(vsi,
+									cfilter,
+									false);
+			else
+				ret = i40e_add_del_cloud_filter(vsi, cfilter,
+								false);
+			last_aq_status = pf->hw.aq.asq_last_status;
+			if (ret)
+				dev_info(&pf->pdev->dev,
+					 "Failed to delete cloud filter, err %pe aq_err %s\n",
+					 ERR_PTR(ret),
+					 i40e_aq_str(&pf->hw, last_aq_status));
+			kfree(cfilter);
+		}
+
+		/* delete VSI from FW */
+		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+					     NULL);
+		if (ret)
+			dev_err(&vsi->back->pdev->dev,
+				"unable to remove channel (%d) for parent VSI(%d)\n",
+				ch->seid, p_vsi->seid);
+		kfree(ch);
+	}
+	INIT_LIST_HEAD(&vsi->ch_list);
+}
+
+/**
+ * i40e_get_max_queues_for_channel
+ * @vsi: ptr to VSI to which channels are associated with
+ *
+ * Helper function which returns max value among the queue counts set on the
+ * channels/TCs created.
+ **/
+static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	int max = 0;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		if (!ch->initialized)
+			continue;
+		if (ch->num_queue_pairs > max)
+			max = ch->num_queue_pairs;
+	}
+
+	return max;
+}
+
+/**
+ * i40e_validate_num_queues - validate num_queues w.r.t channel
+ * @pf: ptr to PF device
+ * @num_queues: number of queues
+ * @vsi: the parent VSI
+ * @reconfig_rss: indicates should the RSS be reconfigured or not
+ *
+ * This function validates number of queues in the context of new channel
+ * which is being established and determines if RSS should be reconfigured
+ * or not for parent VSI.
+ **/
+static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
+				    struct i40e_vsi *vsi, bool *reconfig_rss)
+{
+	int max_ch_queues;
+
+	if (!reconfig_rss)
+		return -EINVAL;
+
+	*reconfig_rss = false;
+	if (vsi->current_rss_size) {
+		if (num_queues > vsi->current_rss_size) {
+			dev_dbg(&pf->pdev->dev,
+				"Error: num_queues (%d) > vsi's current_size(%d)\n",
+				num_queues, vsi->current_rss_size);
+			return -EINVAL;
+		} else if ((num_queues < vsi->current_rss_size) &&
+			   (!is_power_of_2(num_queues))) {
+			dev_dbg(&pf->pdev->dev,
+				"Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
+				num_queues, vsi->current_rss_size);
+			return -EINVAL;
+		}
+	}
+
+	if (!is_power_of_2(num_queues)) {
+		/* Find the max num_queues configured for channel if channel
+		 * exist.
+		 * if channel exist, then enforce 'num_queues' to be more than
+		 * max ever queues configured for channel.
+		 */
+		max_ch_queues = i40e_get_max_queues_for_channel(vsi);
+		if (num_queues < max_ch_queues) {
+			dev_dbg(&pf->pdev->dev,
+				"Error: num_queues (%d) < max queues configured for channel(%d)\n",
+				num_queues, max_ch_queues);
+			return -EINVAL;
+		}
+		*reconfig_rss = true;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
+ * @vsi: the VSI being setup
+ * @rss_size: size of RSS, accordingly LUT gets reprogrammed
+ *
+ * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
+ **/
+static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 seed[I40E_HKEY_ARRAY_SIZE];
+	struct i40e_hw *hw = &pf->hw;
+	int local_rss_size;
+	u8 *lut;
+	int ret;
+
+	if (!vsi->rss_size)
+		return -EINVAL;
+
+	if (rss_size > vsi->rss_size)
+		return -EINVAL;
+
+	local_rss_size = min_t(int, vsi->rss_size, rss_size);
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* Ignoring user configured lut if there is one */
+	i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
+
+	/* Use user configured hash key if there is one, otherwise
+	 * use default.
+	 */
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+
+	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Cannot set RSS lut, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		kfree(lut);
+		return ret;
+	}
+	kfree(lut);
+
+	/* Do the update w.r.t. storing rss_size */
+	if (!vsi->orig_rss_size)
+		vsi->orig_rss_size = vsi->rss_size;
+	vsi->current_rss_size = local_rss_size;
+
+	return ret;
+}
+
+/**
+ * i40e_channel_setup_queue_map - Setup a channel queue map
+ * @pf: ptr to PF device
+ * @ctxt: VSI context structure
+ * @ch: ptr to channel structure
+ *
+ * Setup queue map for a specific channel
+ **/
+static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
+					 struct i40e_vsi_context *ctxt,
+					 struct i40e_channel *ch)
+{
+	u16 qcount, qmap, sections = 0;
+	u8 offset = 0;
+	int pow;
+
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+
+	qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
+	ch->num_queue_pairs = qcount;
+
+	/* find the next higher power-of-2 of num queue pairs */
+	pow = ilog2(qcount);
+	if (!is_power_of_2(qcount))
+		pow++;
+
+	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+	/* Setup queue TC[0].qmap for given VSI context */
+	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+
+	ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
+	ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
+	ctxt->info.valid_sections |= cpu_to_le16(sections);
+}
+
+/**
+ * i40e_add_channel - add a channel by adding VSI
+ * @pf: ptr to PF device
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @ch: ptr to channel structure
+ *
+ * Add a channel (VSI) using add_vsi and queue_map
+ **/
+static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
+			    struct i40e_channel *ch)
+{
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vsi_context ctxt;
+	u8 enabled_tc = 0x1; /* TC0 enabled */
+	int ret;
+
+	if (ch->type != I40E_VSI_VMDQ2) {
+		dev_info(&pf->pdev->dev,
+			 "add new vsi failed, ch->type %d\n", ch->type);
+		return -EINVAL;
+	}
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.pf_num = hw->pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = uplink_seid;
+	ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+	if (ch->type == I40E_VSI_VMDQ2)
+		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+
+	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+		ctxt.info.valid_sections |=
+		     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+		ctxt.info.switch_id =
+		   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+	}
+
+	/* Set queue map for a given VSI context */
+	i40e_channel_setup_queue_map(pf, &ctxt, ch);
+
+	/* Now time to create VSI */
+	ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "add new vsi failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw,
+				     pf->hw.aq.asq_last_status));
+		return -ENOENT;
+	}
+
+	/* Success, update channel, set enabled_tc only if the channel
+	 * is not a macvlan
+	 */
+	ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
+	ch->seid = ctxt.seid;
+	ch->vsi_number = ctxt.vsi_number;
+	ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx);
+
+	/* copy just the sections touched not the entire info
+	 * since not all sections are valid as returned by
+	 * update vsi params
+	 */
+	ch->info.mapping_flags = ctxt.info.mapping_flags;
+	memcpy(&ch->info.queue_mapping,
+	       &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
+	memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
+	       sizeof(ctxt.info.tc_mapping));
+
+	return 0;
+}
+
+static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
+				  u8 *bw_share)
+{
+	struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+	int ret;
+	int i;
+
+	memset(&bw_data, 0, sizeof(bw_data));
+	bw_data.tc_valid_bits = ch->enabled_tc;
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		bw_data.tc_bw_credits[i] = bw_share[i];
+
+	ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
+				       &bw_data, NULL);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
+			 vsi->back->hw.aq.asq_last_status, ch->seid);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+		ch->info.qs_handle[i] = bw_data.qs_handles[i];
+
+	return 0;
+}
+
+/**
+ * i40e_channel_config_tx_ring - config TX ring associated with new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Configure TX rings associated with channel (VSI) since queues are being
+ * from parent VSI.
+ **/
+static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
+				       struct i40e_vsi *vsi,
+				       struct i40e_channel *ch)
+{
+	u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+	int ret;
+	int i;
+
+	/* Enable ETS TCs with equal BW Share for now across all VSIs */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (ch->enabled_tc & BIT(i))
+			bw_share[i] = 1;
+	}
+
+	/* configure BW for new VSI */
+	ret = i40e_channel_config_bw(vsi, ch, bw_share);
+	if (ret) {
+		dev_info(&vsi->back->pdev->dev,
+			 "Failed configuring TC map %d for channel (seid %u)\n",
+			 ch->enabled_tc, ch->seid);
+		return ret;
+	}
+
+	for (i = 0; i < ch->num_queue_pairs; i++) {
+		struct i40e_ring *tx_ring, *rx_ring;
+		u16 pf_q;
+
+		pf_q = ch->base_queue + i;
+
+		/* Get to TX ring ptr of main VSI, for re-setup TX queue
+		 * context
+		 */
+		tx_ring = vsi->tx_rings[pf_q];
+		tx_ring->ch = ch;
+
+		/* Get the RX ring ptr */
+		rx_ring = vsi->rx_rings[pf_q];
+		rx_ring->ch = ch;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_setup_hw_channel - setup new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @type: type of channel to be created (VMDq2/VF)
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and configures TX rings accordingly
+ **/
+static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
+					struct i40e_vsi *vsi,
+					struct i40e_channel *ch,
+					u16 uplink_seid, u8 type)
+{
+	int ret;
+
+	ch->initialized = false;
+	ch->base_queue = vsi->next_base_queue;
+	ch->type = type;
+
+	/* Proceed with creation of channel (VMDq2) VSI */
+	ret = i40e_add_channel(pf, uplink_seid, ch);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "failed to add_channel using uplink_seid %u\n",
+			 uplink_seid);
+		return ret;
+	}
+
+	/* Mark the successful creation of channel */
+	ch->initialized = true;
+
+	/* Reconfigure TX queues using QTX_CTL register */
+	ret = i40e_channel_config_tx_ring(pf, vsi, ch);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "failed to configure TX rings for channel %u\n",
+			 ch->seid);
+		return ret;
+	}
+
+	/* update 'next_base_queue' */
+	vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
+	dev_dbg(&pf->pdev->dev,
+		"Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
+		ch->seid, ch->vsi_number, ch->stat_counter_idx,
+		ch->num_queue_pairs,
+		vsi->next_base_queue);
+	return ret;
+}
+
+/**
+ * i40e_setup_channel - setup new channel using uplink element
+ * @pf: ptr to PF device
+ * @vsi: pointer to the VSI to set up the channel within
+ * @ch: ptr to channel structure
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and uplink switching element (uplink_seid)
+ **/
+static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
+			       struct i40e_channel *ch)
+{
+	u8 vsi_type;
+	u16 seid;
+	int ret;
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		vsi_type = I40E_VSI_VMDQ2;
+	} else {
+		dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
+			vsi->type);
+		return false;
+	}
+
+	/* underlying switching element */
+	seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+
+	/* create channel (VSI), configure TX rings */
+	ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
+		return false;
+	}
+
+	return ch->initialized ? true : false;
+}
+
+/**
+ * i40e_validate_and_set_switch_mode - sets up switch mode correctly
+ * @vsi: ptr to VSI which has PF backing
+ *
+ * Sets up switch mode correctly if it needs to be changed and perform
+ * what are allowed modes.
+ **/
+static int i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
+{
+	u8 mode;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int ret;
+
+	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities);
+	if (ret)
+		return -EINVAL;
+
+	if (hw->dev_caps.switch_mode) {
+		/* if switch mode is set, support mode2 (non-tunneled for
+		 * cloud filter) for now
+		 */
+		u32 switch_mode = hw->dev_caps.switch_mode &
+				  I40E_SWITCH_MODE_MASK;
+		if (switch_mode >= I40E_CLOUD_FILTER_MODE1) {
+			if (switch_mode == I40E_CLOUD_FILTER_MODE2)
+				return 0;
+			dev_err(&pf->pdev->dev,
+				"Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
+				hw->dev_caps.switch_mode);
+			return -EINVAL;
+		}
+	}
+
+	/* Set Bit 7 to be valid */
+	mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
+
+	/* Set L4type for TCP support */
+	mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
+
+	/* Set cloud filter mode */
+	mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
+
+	/* Prep mode field for set_switch_config */
+	ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
+					pf->last_sw_conf_valid_flags,
+					mode, NULL);
+	if (ret && hw->aq.asq_last_status != I40E_AQ_RC_ESRCH)
+		dev_err(&pf->pdev->dev,
+			"couldn't set switch config bits, err %pe aq_err %s\n",
+			ERR_PTR(ret),
+			i40e_aq_str(hw,
+				    hw->aq.asq_last_status));
+
+	return ret;
+}
+
+/**
+ * i40e_create_queue_channel - function to create channel
+ * @vsi: VSI to be configured
+ * @ch: ptr to channel (it contains channel specific params)
+ *
+ * This function creates channel (VSI) using num_queues specified by user,
+ * reconfigs RSS if needed.
+ **/
+int i40e_create_queue_channel(struct i40e_vsi *vsi,
+			      struct i40e_channel *ch)
+{
+	struct i40e_pf *pf = vsi->back;
+	bool reconfig_rss;
+	int err;
+
+	if (!ch)
+		return -EINVAL;
+
+	if (!ch->num_queue_pairs) {
+		dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
+			ch->num_queue_pairs);
+		return -EINVAL;
+	}
+
+	/* validate user requested num_queues for channel */
+	err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
+				       &reconfig_rss);
+	if (err) {
+		dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
+			 ch->num_queue_pairs);
+		return -EINVAL;
+	}
+
+	/* By default we are in VEPA mode, if this is the first VF/VMDq
+	 * VSI to be added switch to VEB mode.
+	 */
+
+	if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+		pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+
+		if (vsi->type == I40E_VSI_MAIN) {
+			if (i40e_is_tc_mqprio_enabled(pf))
+				i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+			else
+				i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
+		}
+		/* now onwards for main VSI, number of queues will be value
+		 * of TC0's queue count
+		 */
+	}
+
+	/* By this time, vsi->cnt_q_avail shall be set to non-zero and
+	 * it should be more than num_queues
+	 */
+	if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
+		dev_dbg(&pf->pdev->dev,
+			"Error: cnt_q_avail (%u) less than num_queues %d\n",
+			vsi->cnt_q_avail, ch->num_queue_pairs);
+		return -EINVAL;
+	}
+
+	/* reconfig_rss only if vsi type is MAIN_VSI */
+	if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
+		err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "Error: unable to reconfig rss for num_queues (%u)\n",
+				 ch->num_queue_pairs);
+			return -EINVAL;
+		}
+	}
+
+	if (!i40e_setup_channel(pf, vsi, ch)) {
+		dev_info(&pf->pdev->dev, "Failed to setup channel\n");
+		return -EINVAL;
+	}
+
+	dev_info(&pf->pdev->dev,
+		 "Setup channel (id:%u) utilizing num_queues %d\n",
+		 ch->seid, ch->num_queue_pairs);
+
+	/* configure VSI for BW limit */
+	if (ch->max_tx_rate) {
+		u64 credits = ch->max_tx_rate;
+
+		if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
+			return -EINVAL;
+
+		do_div(credits, I40E_BW_CREDIT_DIVISOR);
+		dev_dbg(&pf->pdev->dev,
+			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+			ch->max_tx_rate,
+			credits,
+			ch->seid);
+	}
+
+	/* in case of VF, this will be main SRIOV VSI */
+	ch->parent_vsi = vsi;
+
+	/* and update main_vsi's count for queue_available to use */
+	vsi->cnt_q_avail -= ch->num_queue_pairs;
+
+	return 0;
+}
+
+/**
+ * i40e_configure_queue_channels - Add queue channel for the given TCs
+ * @vsi: VSI to be configured
+ *
+ * Configures queue channel mapping to the given TCs
+ **/
+static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch;
+	u64 max_rate = 0;
+	int ret = 0, i;
+
+	/* Create app vsi with the TCs. Main VSI with TC0 is already set up */
+	vsi->tc_seid_map[0] = vsi->seid;
+	for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (vsi->tc_config.enabled_tc & BIT(i)) {
+			ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+			if (!ch) {
+				ret = -ENOMEM;
+				goto err_free;
+			}
+
+			INIT_LIST_HEAD(&ch->list);
+			ch->num_queue_pairs =
+				vsi->tc_config.tc_info[i].qcount;
+			ch->base_queue =
+				vsi->tc_config.tc_info[i].qoffset;
+
+			/* Bandwidth limit through tc interface is in bytes/s,
+			 * change to Mbit/s
+			 */
+			max_rate = vsi->mqprio_qopt.max_rate[i];
+			do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+			ch->max_tx_rate = max_rate;
+
+			list_add_tail(&ch->list, &vsi->ch_list);
+
+			ret = i40e_create_queue_channel(vsi, ch);
+			if (ret) {
+				dev_err(&vsi->back->pdev->dev,
+					"Failed creating queue channel with TC%d: queues %d\n",
+					i, ch->num_queue_pairs);
+				goto err_free;
+			}
+			vsi->tc_seid_map[i] = ch->seid;
+		}
+	}
+
+	/* reset to reconfigure TX queue contexts */
+	i40e_do_reset(vsi->back, I40E_PF_RESET_FLAG, true);
+	return ret;
+
+err_free:
+	i40e_remove_queue_channels(vsi);
+	return ret;
+}
+
+/**
+ * i40e_veb_config_tc - Configure TCs for given VEB
+ * @veb: given VEB
+ * @enabled_tc: TC bitmap
+ *
+ * Configures given TC bitmap for VEB (switching) element
+ **/
+int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
+{
+	struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0};
+	struct i40e_pf *pf = veb->pf;
+	int ret = 0;
+	int i;
+
+	/* No TCs or already enabled TCs just return */
+	if (!enabled_tc || veb->enabled_tc == enabled_tc)
+		return ret;
+
+	bw_data.tc_valid_bits = enabled_tc;
+	/* bw_data.absolute_credits is not set (relative) */
+
+	/* Enable ETS TCs with equal BW Share for now */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (enabled_tc & BIT(i))
+			bw_data.tc_bw_share_credits[i] = 1;
+	}
+
+	ret = i40e_aq_config_switch_comp_bw_config(&pf->hw, veb->seid,
+						   &bw_data, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VEB bw config failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto out;
+	}
+
+	/* Update the BW information */
+	ret = i40e_veb_get_bw_info(veb);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed getting veb bw config, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+	}
+
+out:
+	return ret;
+}
+
+#ifdef CONFIG_I40E_DCB
+/**
+ * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs
+ * @pf: PF struct
+ *
+ * Reconfigure VEB/VSIs on a given PF; it is assumed that
+ * the caller would've quiesce all the VSIs before calling
+ * this function
+ **/
+static void i40e_dcb_reconfigure(struct i40e_pf *pf)
+{
+	u8 tc_map = 0;
+	int ret;
+	u8 v;
+
+	/* Enable the TCs available on PF to all VEBs */
+	tc_map = i40e_pf_get_tc_map(pf);
+	if (tc_map == I40E_DEFAULT_TRAFFIC_CLASS)
+		return;
+
+	for (v = 0; v < I40E_MAX_VEB; v++) {
+		if (!pf->veb[v])
+			continue;
+		ret = i40e_veb_config_tc(pf->veb[v], tc_map);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Failed configuring TC for VEB seid=%d\n",
+				 pf->veb[v]->seid);
+			/* Will try to configure as many components */
+		}
+	}
+
+	/* Update each VSI */
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
+		if (!pf->vsi[v])
+			continue;
+
+		/* - Enable all TCs for the LAN VSI
+		 * - For all others keep them at TC0 for now
+		 */
+		if (v == pf->lan_vsi)
+			tc_map = i40e_pf_get_tc_map(pf);
+		else
+			tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
+
+		ret = i40e_vsi_config_tc(pf->vsi[v], tc_map);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Failed configuring TC for VSI seid=%d\n",
+				 pf->vsi[v]->seid);
+			/* Will try to configure as many components */
+		} else {
+			/* Re-configure VSI vectors based on updated TC map */
+			i40e_vsi_map_rings_to_vectors(pf->vsi[v]);
+			if (pf->vsi[v]->netdev)
+				i40e_dcbnl_set_all(pf->vsi[v]);
+		}
+	}
+}
+
+/**
+ * i40e_resume_port_tx - Resume port Tx
+ * @pf: PF struct
+ *
+ * Resume a port's Tx and issue a PF reset in case of failure to
+ * resume.
+ **/
+static int i40e_resume_port_tx(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int ret;
+
+	ret = i40e_aq_resume_port_tx(hw, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Resume Port Tx failed, err %pe aq_err %s\n",
+			  ERR_PTR(ret),
+			  i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		/* Schedule PF reset to recover */
+		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+		i40e_service_event_schedule(pf);
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_suspend_port_tx - Suspend port Tx
+ * @pf: PF struct
+ *
+ * Suspend a port's Tx and issue a PF reset in case of failure.
+ **/
+static int i40e_suspend_port_tx(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int ret;
+
+	ret = i40e_aq_suspend_port_tx(hw, pf->mac_seid, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Suspend Port Tx failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		/* Schedule PF reset to recover */
+		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+		i40e_service_event_schedule(pf);
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_hw_set_dcb_config - Program new DCBX settings into HW
+ * @pf: PF being configured
+ * @new_cfg: New DCBX configuration
+ *
+ * Program DCB settings into HW and reconfigure VEB/VSIs on
+ * given PF. Uses "Set LLDP MIB" AQC to program the hardware.
+ **/
+static int i40e_hw_set_dcb_config(struct i40e_pf *pf,
+				  struct i40e_dcbx_config *new_cfg)
+{
+	struct i40e_dcbx_config *old_cfg = &pf->hw.local_dcbx_config;
+	int ret;
+
+	/* Check if need reconfiguration */
+	if (!memcmp(&new_cfg, &old_cfg, sizeof(new_cfg))) {
+		dev_dbg(&pf->pdev->dev, "No Change in DCB Config required.\n");
+		return 0;
+	}
+
+	/* Config change disable all VSIs */
+	i40e_pf_quiesce_all_vsi(pf);
+
+	/* Copy the new config to the current config */
+	*old_cfg = *new_cfg;
+	old_cfg->etsrec = old_cfg->etscfg;
+	ret = i40e_set_dcb_config(&pf->hw);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Set DCB Config failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto out;
+	}
+
+	/* Changes in configuration update VEB/VSI */
+	i40e_dcb_reconfigure(pf);
+out:
+	/* In case of reset do not try to resume anything */
+	if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) {
+		/* Re-start the VSIs if disabled */
+		ret = i40e_resume_port_tx(pf);
+		/* In case of error no point in resuming VSIs */
+		if (ret)
+			goto err;
+		i40e_pf_unquiesce_all_vsi(pf);
+	}
+err:
+	return ret;
+}
+
+/**
+ * i40e_hw_dcb_config - Program new DCBX settings into HW
+ * @pf: PF being configured
+ * @new_cfg: New DCBX configuration
+ *
+ * Program DCB settings into HW and reconfigure VEB/VSIs on
+ * given PF
+ **/
+int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
+{
+	struct i40e_aqc_configure_switching_comp_ets_data ets_data;
+	u8 prio_type[I40E_MAX_TRAFFIC_CLASS] = {0};
+	u32 mfs_tc[I40E_MAX_TRAFFIC_CLASS];
+	struct i40e_dcbx_config *old_cfg;
+	u8 mode[I40E_MAX_TRAFFIC_CLASS];
+	struct i40e_rx_pb_config pb_cfg;
+	struct i40e_hw *hw = &pf->hw;
+	u8 num_ports = hw->num_ports;
+	bool need_reconfig;
+	int ret = -EINVAL;
+	u8 lltc_map = 0;
+	u8 tc_map = 0;
+	u8 new_numtc;
+	u8 i;
+
+	dev_dbg(&pf->pdev->dev, "Configuring DCB registers directly\n");
+	/* Un-pack information to Program ETS HW via shared API
+	 * numtc, tcmap
+	 * LLTC map
+	 * ETS/NON-ETS arbiter mode
+	 * max exponent (credit refills)
+	 * Total number of ports
+	 * PFC priority bit-map
+	 * Priority Table
+	 * BW % per TC
+	 * Arbiter mode between UPs sharing same TC
+	 * TSA table (ETS or non-ETS)
+	 * EEE enabled or not
+	 * MFS TC table
+	 */
+
+	new_numtc = i40e_dcb_get_num_tc(new_cfg);
+
+	memset(&ets_data, 0, sizeof(ets_data));
+	for (i = 0; i < new_numtc; i++) {
+		tc_map |= BIT(i);
+		switch (new_cfg->etscfg.tsatable[i]) {
+		case I40E_IEEE_TSA_ETS:
+			prio_type[i] = I40E_DCB_PRIO_TYPE_ETS;
+			ets_data.tc_bw_share_credits[i] =
+					new_cfg->etscfg.tcbwtable[i];
+			break;
+		case I40E_IEEE_TSA_STRICT:
+			prio_type[i] = I40E_DCB_PRIO_TYPE_STRICT;
+			lltc_map |= BIT(i);
+			ets_data.tc_bw_share_credits[i] =
+					I40E_DCB_STRICT_PRIO_CREDITS;
+			break;
+		default:
+			/* Invalid TSA type */
+			need_reconfig = false;
+			goto out;
+		}
+	}
+
+	old_cfg = &hw->local_dcbx_config;
+	/* Check if need reconfiguration */
+	need_reconfig = i40e_dcb_need_reconfig(pf, old_cfg, new_cfg);
+
+	/* If needed, enable/disable frame tagging, disable all VSIs
+	 * and suspend port tx
+	 */
+	if (need_reconfig) {
+		/* Enable DCB tagging only when more than one TC */
+		if (new_numtc > 1)
+			pf->flags |= I40E_FLAG_DCB_ENABLED;
+		else
+			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+
+		set_bit(__I40E_PORT_SUSPENDED, pf->state);
+		/* Reconfiguration needed quiesce all VSIs */
+		i40e_pf_quiesce_all_vsi(pf);
+		ret = i40e_suspend_port_tx(pf);
+		if (ret)
+			goto err;
+	}
+
+	/* Configure Port ETS Tx Scheduler */
+	ets_data.tc_valid_bits = tc_map;
+	ets_data.tc_strict_priority_flags = lltc_map;
+	ret = i40e_aq_config_switch_comp_ets
+		(hw, pf->mac_seid, &ets_data,
+		 i40e_aqc_opc_modify_switching_comp_ets, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Modify Port ETS failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto out;
+	}
+
+	/* Configure Rx ETS HW */
+	memset(&mode, I40E_DCB_ARB_MODE_ROUND_ROBIN, sizeof(mode));
+	i40e_dcb_hw_set_num_tc(hw, new_numtc);
+	i40e_dcb_hw_rx_fifo_config(hw, I40E_DCB_ARB_MODE_ROUND_ROBIN,
+				   I40E_DCB_ARB_MODE_STRICT_PRIORITY,
+				   I40E_DCB_DEFAULT_MAX_EXPONENT,
+				   lltc_map);
+	i40e_dcb_hw_rx_cmd_monitor_config(hw, new_numtc, num_ports);
+	i40e_dcb_hw_rx_ets_bw_config(hw, new_cfg->etscfg.tcbwtable, mode,
+				     prio_type);
+	i40e_dcb_hw_pfc_config(hw, new_cfg->pfc.pfcenable,
+			       new_cfg->etscfg.prioritytable);
+	i40e_dcb_hw_rx_up2tc_config(hw, new_cfg->etscfg.prioritytable);
+
+	/* Configure Rx Packet Buffers in HW */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		mfs_tc[i] = pf->vsi[pf->lan_vsi]->netdev->mtu;
+		mfs_tc[i] += I40E_PACKET_HDR_PAD;
+	}
+
+	i40e_dcb_hw_calculate_pool_sizes(hw, num_ports,
+					 false, new_cfg->pfc.pfcenable,
+					 mfs_tc, &pb_cfg);
+	i40e_dcb_hw_rx_pb_config(hw, &pf->pb_cfg, &pb_cfg);
+
+	/* Update the local Rx Packet buffer config */
+	pf->pb_cfg = pb_cfg;
+
+	/* Inform the FW about changes to DCB configuration */
+	ret = i40e_aq_dcb_updated(&pf->hw, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "DCB Updated failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto out;
+	}
+
+	/* Update the port DCBx configuration */
+	*old_cfg = *new_cfg;
+
+	/* Changes in configuration update VEB/VSI */
+	i40e_dcb_reconfigure(pf);
+out:
+	/* Re-start the VSIs if disabled */
+	if (need_reconfig) {
+		ret = i40e_resume_port_tx(pf);
+
+		clear_bit(__I40E_PORT_SUSPENDED, pf->state);
+		/* In case of error no point in resuming VSIs */
+		if (ret)
+			goto err;
+
+		/* Wait for the PF's queues to be disabled */
+		ret = i40e_pf_wait_queues_disabled(pf);
+		if (ret) {
+			/* Schedule PF reset to recover */
+			set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+			i40e_service_event_schedule(pf);
+			goto err;
+		} else {
+			i40e_pf_unquiesce_all_vsi(pf);
+			set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+			set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
+		}
+		/* registers are set, lets apply */
+		if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB)
+			ret = i40e_hw_set_dcb_config(pf, new_cfg);
+	}
+
+err:
+	return ret;
+}
+
+/**
+ * i40e_dcb_sw_default_config - Set default DCB configuration when DCB in SW
+ * @pf: PF being queried
+ *
+ * Set default DCB configuration in case DCB is to be done in SW.
+ **/
+int i40e_dcb_sw_default_config(struct i40e_pf *pf)
+{
+	struct i40e_dcbx_config *dcb_cfg = &pf->hw.local_dcbx_config;
+	struct i40e_aqc_configure_switching_comp_ets_data ets_data;
+	struct i40e_hw *hw = &pf->hw;
+	int err;
+
+	if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB) {
+		/* Update the local cached instance with TC0 ETS */
+		memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config));
+		pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
+		pf->tmp_cfg.etscfg.maxtcs = 0;
+		pf->tmp_cfg.etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW;
+		pf->tmp_cfg.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS;
+		pf->tmp_cfg.pfc.willing = I40E_IEEE_DEFAULT_PFC_WILLING;
+		pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+		/* FW needs one App to configure HW */
+		pf->tmp_cfg.numapps = I40E_IEEE_DEFAULT_NUM_APPS;
+		pf->tmp_cfg.app[0].selector = I40E_APP_SEL_ETHTYPE;
+		pf->tmp_cfg.app[0].priority = I40E_IEEE_DEFAULT_APP_PRIO;
+		pf->tmp_cfg.app[0].protocolid = I40E_APP_PROTOID_FCOE;
+
+		return i40e_hw_set_dcb_config(pf, &pf->tmp_cfg);
+	}
+
+	memset(&ets_data, 0, sizeof(ets_data));
+	ets_data.tc_valid_bits = I40E_DEFAULT_TRAFFIC_CLASS; /* TC0 only */
+	ets_data.tc_strict_priority_flags = 0; /* ETS */
+	ets_data.tc_bw_share_credits[0] = I40E_IEEE_DEFAULT_ETS_TCBW; /* 100% to TC0 */
+
+	/* Enable ETS on the Physical port */
+	err = i40e_aq_config_switch_comp_ets
+		(hw, pf->mac_seid, &ets_data,
+		 i40e_aqc_opc_enable_switching_comp_ets, NULL);
+	if (err) {
+		dev_info(&pf->pdev->dev,
+			 "Enable Port ETS failed, err %pe aq_err %s\n",
+			 ERR_PTR(err),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		err = -ENOENT;
+		goto out;
+	}
+
+	/* Update the local cached instance with TC0 ETS */
+	dcb_cfg->etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
+	dcb_cfg->etscfg.cbs = 0;
+	dcb_cfg->etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS;
+	dcb_cfg->etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW;
+
+out:
+	return err;
+}
+
+/**
+ * i40e_init_pf_dcb - Initialize DCB configuration
+ * @pf: PF being configured
+ *
+ * Query the current DCB configuration and cache it
+ * in the hardware structure
+ **/
+static int i40e_init_pf_dcb(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int err;
+
+	/* Do not enable DCB for SW1 and SW2 images even if the FW is capable
+	 * Also do not enable DCBx if FW LLDP agent is disabled
+	 */
+	if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT) {
+		dev_info(&pf->pdev->dev, "DCB is not supported.\n");
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+	if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
+		dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n");
+		err = i40e_dcb_sw_default_config(pf);
+		if (err) {
+			dev_info(&pf->pdev->dev, "Could not initialize SW DCB\n");
+			goto out;
+		}
+		dev_info(&pf->pdev->dev, "SW DCB initialization succeeded.\n");
+		pf->dcbx_cap = DCB_CAP_DCBX_HOST |
+			       DCB_CAP_DCBX_VER_IEEE;
+		/* at init capable but disabled */
+		pf->flags |= I40E_FLAG_DCB_CAPABLE;
+		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		goto out;
+	}
+	err = i40e_init_dcb(hw, true);
+	if (!err) {
+		/* Device/Function is not DCBX capable */
+		if ((!hw->func_caps.dcb) ||
+		    (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
+			dev_info(&pf->pdev->dev,
+				 "DCBX offload is not supported or is disabled for this PF.\n");
+		} else {
+			/* When status is not DISABLED then DCBX in FW */
+			pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
+				       DCB_CAP_DCBX_VER_IEEE;
+
+			pf->flags |= I40E_FLAG_DCB_CAPABLE;
+			/* Enable DCB tagging only when more than one TC
+			 * or explicitly disable if only one TC
+			 */
+			if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
+				pf->flags |= I40E_FLAG_DCB_ENABLED;
+			else
+				pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+			dev_dbg(&pf->pdev->dev,
+				"DCBX offload is supported for this PF.\n");
+		}
+	} else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) {
+		dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n");
+		pf->flags |= I40E_FLAG_DISABLE_FW_LLDP;
+	} else {
+		dev_info(&pf->pdev->dev,
+			 "Query for DCB configuration failed, err %pe aq_err %s\n",
+			 ERR_PTR(err),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+	}
+
+out:
+	return err;
+}
+#endif /* CONFIG_I40E_DCB */
+
+/**
+ * i40e_print_link_message - print link up or down
+ * @vsi: the VSI for which link needs a message
+ * @isup: true of link is up, false otherwise
+ */
+void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
+{
+	enum i40e_aq_link_speed new_speed;
+	struct i40e_pf *pf = vsi->back;
+	char *speed = "Unknown";
+	char *fc = "Unknown";
+	char *fec = "";
+	char *req_fec = "";
+	char *an = "";
+
+	if (isup)
+		new_speed = pf->hw.phy.link_info.link_speed;
+	else
+		new_speed = I40E_LINK_SPEED_UNKNOWN;
+
+	if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
+		return;
+	vsi->current_isup = isup;
+	vsi->current_speed = new_speed;
+	if (!isup) {
+		netdev_info(vsi->netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	/* Warn user if link speed on NPAR enabled partition is not at
+	 * least 10GB
+	 */
+	if (pf->hw.func_caps.npar_enable &&
+	    (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
+	     pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
+		netdev_warn(vsi->netdev,
+			    "The partition detected link speed that is less than 10Gbps\n");
+
+	switch (pf->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		speed = "40 G";
+		break;
+	case I40E_LINK_SPEED_20GB:
+		speed = "20 G";
+		break;
+	case I40E_LINK_SPEED_25GB:
+		speed = "25 G";
+		break;
+	case I40E_LINK_SPEED_10GB:
+		speed = "10 G";
+		break;
+	case I40E_LINK_SPEED_5GB:
+		speed = "5 G";
+		break;
+	case I40E_LINK_SPEED_2_5GB:
+		speed = "2.5 G";
+		break;
+	case I40E_LINK_SPEED_1GB:
+		speed = "1000 M";
+		break;
+	case I40E_LINK_SPEED_100MB:
+		speed = "100 M";
+		break;
+	default:
+		break;
+	}
+
+	switch (pf->hw.fc.current_mode) {
+	case I40E_FC_FULL:
+		fc = "RX/TX";
+		break;
+	case I40E_FC_TX_PAUSE:
+		fc = "TX";
+		break;
+	case I40E_FC_RX_PAUSE:
+		fc = "RX";
+		break;
+	default:
+		fc = "None";
+		break;
+	}
+
+	if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
+		req_fec = "None";
+		fec = "None";
+		an = "False";
+
+		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+			an = "True";
+
+		if (pf->hw.phy.link_info.fec_info &
+		    I40E_AQ_CONFIG_FEC_KR_ENA)
+			fec = "CL74 FC-FEC/BASE-R";
+		else if (pf->hw.phy.link_info.fec_info &
+			 I40E_AQ_CONFIG_FEC_RS_ENA)
+			fec = "CL108 RS-FEC";
+
+		/* 'CL108 RS-FEC' should be displayed when RS is requested, or
+		 * both RS and FC are requested
+		 */
+		if (vsi->back->hw.phy.link_info.req_fec_info &
+		    (I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) {
+			if (vsi->back->hw.phy.link_info.req_fec_info &
+			    I40E_AQ_REQUEST_FEC_RS)
+				req_fec = "CL108 RS-FEC";
+			else
+				req_fec = "CL74 FC-FEC/BASE-R";
+		}
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
+			    speed, req_fec, fec, an, fc);
+	} else if (pf->hw.device_id == I40E_DEV_ID_KX_X722) {
+		req_fec = "None";
+		fec = "None";
+		an = "False";
+
+		if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+			an = "True";
+
+		if (pf->hw.phy.link_info.fec_info &
+		    I40E_AQ_CONFIG_FEC_KR_ENA)
+			fec = "CL74 FC-FEC/BASE-R";
+
+		if (pf->hw.phy.link_info.req_fec_info &
+		    I40E_AQ_REQUEST_FEC_KR)
+			req_fec = "CL74 FC-FEC/BASE-R";
+
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
+			    speed, req_fec, fec, an, fc);
+	} else {
+		netdev_info(vsi->netdev,
+			    "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
+			    speed, fc);
+	}
+
+}
+
+/**
+ * i40e_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: the VSI being configured
+ **/
+static int i40e_up_complete(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int err;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		i40e_vsi_configure_msix(vsi);
+	else
+		i40e_configure_msi_and_legacy(vsi);
+
+	/* start rings */
+	err = i40e_vsi_start_rings(vsi);
+	if (err)
+		return err;
+
+	clear_bit(__I40E_VSI_DOWN, vsi->state);
+	i40e_napi_enable_all(vsi);
+	i40e_vsi_enable_irq(vsi);
+
+	if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
+	    (vsi->netdev)) {
+		i40e_print_link_message(vsi, true);
+		netif_tx_start_all_queues(vsi->netdev);
+		netif_carrier_on(vsi->netdev);
+	}
+
+	/* replay FDIR SB filters */
+	if (vsi->type == I40E_VSI_FDIR) {
+		/* reset fd counters */
+		pf->fd_add_err = 0;
+		pf->fd_atr_cnt = 0;
+		i40e_fdir_filter_restore(vsi);
+	}
+
+	/* On the next run of the service_task, notify any clients of the new
+	 * opened netdev
+	 */
+	set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+	i40e_service_event_schedule(pf);
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_reinit_locked - Reset the VSI
+ * @vsi: the VSI being configured
+ *
+ * Rebuild the ring structs after some configuration
+ * has changed, e.g. MTU size.
+ **/
+static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
+		usleep_range(1000, 2000);
+	i40e_down(vsi);
+
+	i40e_up(vsi);
+	clear_bit(__I40E_CONFIG_BUSY, pf->state);
+}
+
+/**
+ * i40e_force_link_state - Force the link status
+ * @pf: board private structure
+ * @is_up: whether the link state should be forced up or down
+ **/
+static int i40e_force_link_state(struct i40e_pf *pf, bool is_up)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+	struct i40e_aq_set_phy_config config = {0};
+	bool non_zero_phy_type = is_up;
+	struct i40e_hw *hw = &pf->hw;
+	u64 mask;
+	u8 speed;
+	int err;
+
+	/* Card might've been put in an unstable state by other drivers
+	 * and applications, which causes incorrect speed values being
+	 * set on startup. In order to clear speed registers, we call
+	 * get_phy_capabilities twice, once to get initial state of
+	 * available speeds, and once to get current PHY config.
+	 */
+	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities,
+					   NULL);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"failed to get phy cap., ret =  %pe last_status =  %s\n",
+			ERR_PTR(err),
+			i40e_aq_str(hw, hw->aq.asq_last_status));
+		return err;
+	}
+	speed = abilities.link_speed;
+
+	/* Get the current phy config */
+	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
+					   NULL);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"failed to get phy cap., ret =  %pe last_status =  %s\n",
+			ERR_PTR(err),
+			i40e_aq_str(hw, hw->aq.asq_last_status));
+		return err;
+	}
+
+	/* If link needs to go up, but was not forced to go down,
+	 * and its speed values are OK, no need for a flap
+	 * if non_zero_phy_type was set, still need to force up
+	 */
+	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED)
+		non_zero_phy_type = true;
+	else if (is_up && abilities.phy_type != 0 && abilities.link_speed != 0)
+		return 0;
+
+	/* To force link we need to set bits for all supported PHY types,
+	 * but there are now more than 32, so we need to split the bitmap
+	 * across two fields.
+	 */
+	mask = I40E_PHY_TYPES_BITMASK;
+	config.phy_type =
+		non_zero_phy_type ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
+	config.phy_type_ext =
+		non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0;
+	/* Copy the old settings, except of phy_type */
+	config.abilities = abilities.abilities;
+	if (pf->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED) {
+		if (is_up)
+			config.abilities |= I40E_AQ_PHY_ENABLE_LINK;
+		else
+			config.abilities &= ~(I40E_AQ_PHY_ENABLE_LINK);
+	}
+	if (abilities.link_speed != 0)
+		config.link_speed = abilities.link_speed;
+	else
+		config.link_speed = speed;
+	config.eee_capability = abilities.eee_capability;
+	config.eeer = abilities.eeer_val;
+	config.low_power_ctrl = abilities.d3_lpan;
+	config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
+			    I40E_AQ_PHY_FEC_CONFIG_MASK;
+	err = i40e_aq_set_phy_config(hw, &config, NULL);
+
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"set phy config ret =  %pe last_status =  %s\n",
+			ERR_PTR(err),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return err;
+	}
+
+	/* Update the link info */
+	err = i40e_update_link_info(hw);
+	if (err) {
+		/* Wait a little bit (on 40G cards it sometimes takes a really
+		 * long time for link to come back from the atomic reset)
+		 * and try once more
+		 */
+		msleep(1000);
+		i40e_update_link_info(hw);
+	}
+
+	i40e_aq_set_link_restart_an(hw, is_up, NULL);
+
+	return 0;
+}
+
+/**
+ * i40e_up - Bring the connection back up after being down
+ * @vsi: the VSI being configured
+ **/
+int i40e_up(struct i40e_vsi *vsi)
+{
+	int err;
+
+	if (vsi->type == I40E_VSI_MAIN &&
+	    (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
+	     vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
+		i40e_force_link_state(vsi->back, true);
+
+	err = i40e_vsi_configure(vsi);
+	if (!err)
+		err = i40e_up_complete(vsi);
+
+	return err;
+}
+
+/**
+ * i40e_down - Shutdown the connection processing
+ * @vsi: the VSI being stopped
+ **/
+void i40e_down(struct i40e_vsi *vsi)
+{
+	int i;
+
+	/* It is assumed that the caller of this function
+	 * sets the vsi->state __I40E_VSI_DOWN bit.
+	 */
+	if (vsi->netdev) {
+		netif_carrier_off(vsi->netdev);
+		netif_tx_disable(vsi->netdev);
+	}
+	i40e_vsi_disable_irq(vsi);
+	i40e_vsi_stop_rings(vsi);
+	if (vsi->type == I40E_VSI_MAIN &&
+	   (vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED ||
+	    vsi->back->flags & I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED))
+		i40e_force_link_state(vsi->back, false);
+	i40e_napi_disable_all(vsi);
+
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		i40e_clean_tx_ring(vsi->tx_rings[i]);
+		if (i40e_enabled_xdp_vsi(vsi)) {
+			/* Make sure that in-progress ndo_xdp_xmit and
+			 * ndo_xsk_wakeup calls are completed.
+			 */
+			synchronize_rcu();
+			i40e_clean_tx_ring(vsi->xdp_rings[i]);
+		}
+		i40e_clean_rx_ring(vsi->rx_rings[i]);
+	}
+
+}
+
+/**
+ * i40e_validate_mqprio_qopt- validate queue mapping info
+ * @vsi: the VSI being configured
+ * @mqprio_qopt: queue parametrs
+ **/
+static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
+				     struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	u64 sum_max_rate = 0;
+	u64 max_rate = 0;
+	int i;
+
+	if (mqprio_qopt->qopt.offset[0] != 0 ||
+	    mqprio_qopt->qopt.num_tc < 1 ||
+	    mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
+		return -EINVAL;
+	for (i = 0; ; i++) {
+		if (!mqprio_qopt->qopt.count[i])
+			return -EINVAL;
+		if (mqprio_qopt->min_rate[i]) {
+			dev_err(&vsi->back->pdev->dev,
+				"Invalid min tx rate (greater than 0) specified\n");
+			return -EINVAL;
+		}
+		max_rate = mqprio_qopt->max_rate[i];
+		do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+		sum_max_rate += max_rate;
+
+		if (i >= mqprio_qopt->qopt.num_tc - 1)
+			break;
+		if (mqprio_qopt->qopt.offset[i + 1] !=
+		    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+			return -EINVAL;
+	}
+	if (vsi->num_queue_pairs <
+	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+		dev_err(&vsi->back->pdev->dev,
+			"Failed to create traffic channel, insufficient number of queues.\n");
+		return -EINVAL;
+	}
+	if (sum_max_rate > i40e_get_link_speed(vsi)) {
+		dev_err(&vsi->back->pdev->dev,
+			"Invalid max tx rate specified\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * i40e_vsi_set_default_tc_config - set default values for tc configuration
+ * @vsi: the VSI being configured
+ **/
+static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
+{
+	u16 qcount;
+	int i;
+
+	/* Only TC0 is enabled */
+	vsi->tc_config.numtc = 1;
+	vsi->tc_config.enabled_tc = 1;
+	qcount = min_t(int, vsi->alloc_queue_pairs,
+		       i40e_pf_get_max_q_per_tc(vsi->back));
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		/* For the TC that is not enabled set the offset to default
+		 * queue and allocate one queue for the given TC.
+		 */
+		vsi->tc_config.tc_info[i].qoffset = 0;
+		if (i == 0)
+			vsi->tc_config.tc_info[i].qcount = qcount;
+		else
+			vsi->tc_config.tc_info[i].qcount = 1;
+		vsi->tc_config.tc_info[i].netdev_tc = 0;
+	}
+}
+
+/**
+ * i40e_del_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function deletes a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static int i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid,
+				   const u8 *macaddr, int *aq_err)
+{
+	struct i40e_aqc_remove_macvlan_element_data element;
+	int status;
+
+	memset(&element, 0, sizeof(element));
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+	status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL);
+	*aq_err = hw->aq.asq_last_status;
+
+	return status;
+}
+
+/**
+ * i40e_add_macvlan_filter
+ * @hw: pointer to the HW structure
+ * @seid: seid of the channel VSI
+ * @macaddr: the mac address to apply as a filter
+ * @aq_err: store the admin Q error
+ *
+ * This function adds a mac filter on the channel VSI which serves as the
+ * macvlan. Returns 0 on success.
+ **/
+static int i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid,
+				   const u8 *macaddr, int *aq_err)
+{
+	struct i40e_aqc_add_macvlan_element_data element;
+	u16 cmd_flags = 0;
+	int status;
+
+	ether_addr_copy(element.mac_addr, macaddr);
+	element.vlan_tag = 0;
+	element.queue_number = 0;
+	element.match_method = I40E_AQC_MM_ERR_NO_RES;
+	cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
+	element.flags = cpu_to_le16(cmd_flags);
+	status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL);
+	*aq_err = hw->aq.asq_last_status;
+
+	return status;
+}
+
+/**
+ * i40e_reset_ch_rings - Reset the queue contexts in a channel
+ * @vsi: the VSI we want to access
+ * @ch: the channel we want to access
+ */
+static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
+{
+	struct i40e_ring *tx_ring, *rx_ring;
+	u16 pf_q;
+	int i;
+
+	for (i = 0; i < ch->num_queue_pairs; i++) {
+		pf_q = ch->base_queue + i;
+		tx_ring = vsi->tx_rings[pf_q];
+		tx_ring->ch = NULL;
+		rx_ring = vsi->rx_rings[pf_q];
+		rx_ring->ch = NULL;
+	}
+}
+
+/**
+ * i40e_free_macvlan_channels
+ * @vsi: the VSI we want to access
+ *
+ * This function frees the Qs of the channel VSI from
+ * the stack and also deletes the channel VSIs which
+ * serve as macvlans.
+ */
+static void i40e_free_macvlan_channels(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	int ret;
+
+	if (list_empty(&vsi->macvlan_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		struct i40e_vsi *parent_vsi;
+
+		if (i40e_is_channel_macvlan(ch)) {
+			i40e_reset_ch_rings(vsi, ch);
+			clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+			netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev);
+			netdev_set_sb_channel(ch->fwd->netdev, 0);
+			kfree(ch->fwd);
+			ch->fwd = NULL;
+		}
+
+		list_del(&ch->list);
+		parent_vsi = ch->parent_vsi;
+		if (!parent_vsi || !ch->initialized) {
+			kfree(ch);
+			continue;
+		}
+
+		/* remove the VSI */
+		ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+					     NULL);
+		if (ret)
+			dev_err(&vsi->back->pdev->dev,
+				"unable to remove channel (%d) for parent VSI(%d)\n",
+				ch->seid, parent_vsi->seid);
+		kfree(ch);
+	}
+	vsi->macvlan_cnt = 0;
+}
+
+/**
+ * i40e_fwd_ring_up - bring the macvlan device up
+ * @vsi: the VSI we want to access
+ * @vdev: macvlan netdevice
+ * @fwd: the private fwd structure
+ */
+static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev,
+			    struct i40e_fwd_adapter *fwd)
+{
+	struct i40e_channel *ch = NULL, *ch_tmp, *iter;
+	int ret = 0, num_tc = 1,  i, aq_err;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	/* Go through the list and find an available channel */
+	list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) {
+		if (!i40e_is_channel_macvlan(iter)) {
+			iter->fwd = fwd;
+			/* record configuration for macvlan interface in vdev */
+			for (i = 0; i < num_tc; i++)
+				netdev_bind_sb_channel_queue(vsi->netdev, vdev,
+							     i,
+							     iter->num_queue_pairs,
+							     iter->base_queue);
+			for (i = 0; i < iter->num_queue_pairs; i++) {
+				struct i40e_ring *tx_ring, *rx_ring;
+				u16 pf_q;
+
+				pf_q = iter->base_queue + i;
+
+				/* Get to TX ring ptr */
+				tx_ring = vsi->tx_rings[pf_q];
+				tx_ring->ch = iter;
+
+				/* Get the RX ring ptr */
+				rx_ring = vsi->rx_rings[pf_q];
+				rx_ring->ch = iter;
+			}
+			ch = iter;
+			break;
+		}
+	}
+
+	if (!ch)
+		return -EINVAL;
+
+	/* Guarantee all rings are updated before we update the
+	 * MAC address filter.
+	 */
+	wmb();
+
+	/* Add a mac filter */
+	ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err);
+	if (ret) {
+		/* if we cannot add the MAC rule then disable the offload */
+		macvlan_release_l2fw_offload(vdev);
+		for (i = 0; i < ch->num_queue_pairs; i++) {
+			struct i40e_ring *rx_ring;
+			u16 pf_q;
+
+			pf_q = ch->base_queue + i;
+			rx_ring = vsi->rx_rings[pf_q];
+			rx_ring->netdev = NULL;
+		}
+		dev_info(&pf->pdev->dev,
+			 "Error adding mac filter on macvlan err %pe, aq_err %s\n",
+			  ERR_PTR(ret),
+			  i40e_aq_str(hw, aq_err));
+		netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n");
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_setup_macvlans - create the channels which will be macvlans
+ * @vsi: the VSI we want to access
+ * @macvlan_cnt: no. of macvlans to be setup
+ * @qcnt: no. of Qs per macvlan
+ * @vdev: macvlan netdevice
+ */
+static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt,
+			       struct net_device *vdev)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vsi_context ctxt;
+	u16 sections, qmap, num_qps;
+	struct i40e_channel *ch;
+	int i, pow, ret = 0;
+	u8 offset = 0;
+
+	if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt)
+		return -EINVAL;
+
+	num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt);
+
+	/* find the next higher power-of-2 of num queue pairs */
+	pow = fls(roundup_pow_of_two(num_qps) - 1);
+
+	qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+		(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+	/* Setup context bits for the main VSI */
+	sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+	sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = vsi->back->hw.pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.info = vsi->info;
+	ctxt.info.tc_mapping[0] = cpu_to_le16(qmap);
+	ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+	ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+	ctxt.info.valid_sections |= cpu_to_le16(sections);
+
+	/* Reconfigure RSS for main VSI with new max queue count */
+	vsi->rss_size = max_t(u16, num_qps, qcnt);
+	ret = i40e_vsi_config_rss(vsi);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Failed to reconfig RSS for num_queues (%u)\n",
+			 vsi->rss_size);
+		return ret;
+	}
+	vsi->reconfig_rss = true;
+	dev_dbg(&vsi->back->pdev->dev,
+		"Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
+	vsi->next_base_queue = num_qps;
+	vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
+
+	/* Update the VSI after updating the VSI queue-mapping
+	 * information
+	 */
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Update vsi tc config failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+	}
+	/* update the local VSI info with updated queue map */
+	i40e_vsi_update_queue_map(vsi, &ctxt);
+	vsi->info.valid_sections = 0;
+
+	/* Create channels for macvlans */
+	INIT_LIST_HEAD(&vsi->macvlan_list);
+	for (i = 0; i < macvlan_cnt; i++) {
+		ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+		if (!ch) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+		INIT_LIST_HEAD(&ch->list);
+		ch->num_queue_pairs = qcnt;
+		if (!i40e_setup_channel(pf, vsi, ch)) {
+			ret = -EINVAL;
+			kfree(ch);
+			goto err_free;
+		}
+		ch->parent_vsi = vsi;
+		vsi->cnt_q_avail -= ch->num_queue_pairs;
+		vsi->macvlan_cnt++;
+		list_add_tail(&ch->list, &vsi->macvlan_list);
+	}
+
+	return ret;
+
+err_free:
+	dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
+	i40e_free_macvlan_channels(vsi);
+
+	return ret;
+}
+
+/**
+ * i40e_fwd_add - configure macvlans
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ **/
+static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_fwd_adapter *fwd;
+	int avail_macvlan, ret;
+
+	if ((pf->flags & I40E_FLAG_DCB_ENABLED)) {
+		netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if (i40e_is_tc_mqprio_enabled(pf)) {
+		netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
+		netdev_info(netdev, "Not enough vectors available to support macvlans\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The macvlan device has to be a single Q device so that the
+	 * tc_to_txq field can be reused to pick the tx queue.
+	 */
+	if (netif_is_multiqueue(vdev))
+		return ERR_PTR(-ERANGE);
+
+	if (!vsi->macvlan_cnt) {
+		/* reserve bit 0 for the pf device */
+		set_bit(0, vsi->fwd_bitmask);
+
+		/* Try to reserve as many queues as possible for macvlans. First
+		 * reserve 3/4th of max vectors, then half, then quarter and
+		 * calculate Qs per macvlan as you go
+		 */
+		vectors = pf->num_lan_msix;
+		if (vectors <= I40E_MAX_MACVLANS && vectors > 64) {
+			/* allocate 4 Qs per macvlan and 32 Qs to the PF*/
+			q_per_macvlan = 4;
+			macvlan_cnt = (vectors - 32) / 4;
+		} else if (vectors <= 64 && vectors > 32) {
+			/* allocate 2 Qs per macvlan and 16 Qs to the PF*/
+			q_per_macvlan = 2;
+			macvlan_cnt = (vectors - 16) / 2;
+		} else if (vectors <= 32 && vectors > 16) {
+			/* allocate 1 Q per macvlan and 16 Qs to the PF*/
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 16;
+		} else if (vectors <= 16 && vectors > 8) {
+			/* allocate 1 Q per macvlan and 8 Qs to the PF */
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 8;
+		} else {
+			/* allocate 1 Q per macvlan and 1 Q to the PF */
+			q_per_macvlan = 1;
+			macvlan_cnt = vectors - 1;
+		}
+
+		if (macvlan_cnt == 0)
+			return ERR_PTR(-EBUSY);
+
+		/* Quiesce VSI queues */
+		i40e_quiesce_vsi(vsi);
+
+		/* sets up the macvlans but does not "enable" them */
+		ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
+					  vdev);
+		if (ret)
+			return ERR_PTR(ret);
+
+		/* Unquiesce VSI */
+		i40e_unquiesce_vsi(vsi);
+	}
+	avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
+					    vsi->macvlan_cnt);
+	if (avail_macvlan >= I40E_MAX_MACVLANS)
+		return ERR_PTR(-EBUSY);
+
+	/* create the fwd struct */
+	fwd = kzalloc(sizeof(*fwd), GFP_KERNEL);
+	if (!fwd)
+		return ERR_PTR(-ENOMEM);
+
+	set_bit(avail_macvlan, vsi->fwd_bitmask);
+	fwd->bit_no = avail_macvlan;
+	netdev_set_sb_channel(vdev, avail_macvlan);
+	fwd->netdev = vdev;
+
+	if (!netif_running(netdev))
+		return fwd;
+
+	/* Set fwd ring up */
+	ret = i40e_fwd_ring_up(vsi, vdev, fwd);
+	if (ret) {
+		/* unbind the queues and drop the subordinate channel config */
+		netdev_unbind_sb_channel(netdev, vdev);
+		netdev_set_sb_channel(vdev, 0);
+
+		kfree(fwd);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return fwd;
+}
+
+/**
+ * i40e_del_all_macvlans - Delete all the mac filters on the channels
+ * @vsi: the VSI we want to access
+ */
+static void i40e_del_all_macvlans(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_err, ret = 0;
+
+	if (list_empty(&vsi->macvlan_list))
+		return;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (i40e_is_channel_macvlan(ch)) {
+			ret = i40e_del_macvlan_filter(hw, ch->seid,
+						      i40e_channel_mac(ch),
+						      &aq_err);
+			if (!ret) {
+				/* Reset queue contexts */
+				i40e_reset_ch_rings(vsi, ch);
+				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+				netdev_unbind_sb_channel(vsi->netdev,
+							 ch->fwd->netdev);
+				netdev_set_sb_channel(ch->fwd->netdev, 0);
+				kfree(ch->fwd);
+				ch->fwd = NULL;
+			}
+		}
+	}
+}
+
+/**
+ * i40e_fwd_del - delete macvlan interfaces
+ * @netdev: net device to configure
+ * @vdev: macvlan netdevice
+ */
+static void i40e_fwd_del(struct net_device *netdev, void *vdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_fwd_adapter *fwd = vdev;
+	struct i40e_channel *ch, *ch_tmp;
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_err, ret = 0;
+
+	/* Find the channel associated with the macvlan and del mac filter */
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) {
+		if (i40e_is_channel_macvlan(ch) &&
+		    ether_addr_equal(i40e_channel_mac(ch),
+				     fwd->netdev->dev_addr)) {
+			ret = i40e_del_macvlan_filter(hw, ch->seid,
+						      i40e_channel_mac(ch),
+						      &aq_err);
+			if (!ret) {
+				/* Reset queue contexts */
+				i40e_reset_ch_rings(vsi, ch);
+				clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask);
+				netdev_unbind_sb_channel(netdev, fwd->netdev);
+				netdev_set_sb_channel(fwd->netdev, 0);
+				kfree(ch->fwd);
+				ch->fwd = NULL;
+			} else {
+				dev_info(&pf->pdev->dev,
+					 "Error deleting mac filter on macvlan err %pe, aq_err %s\n",
+					  ERR_PTR(ret),
+					  i40e_aq_str(hw, aq_err));
+			}
+			break;
+		}
+	}
+}
+
+/**
+ * i40e_setup_tc - configure multiple traffic classes
+ * @netdev: net device to configure
+ * @type_data: tc offload data
+ **/
+static int i40e_setup_tc(struct net_device *netdev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	u8 enabled_tc = 0, num_tc, hw;
+	bool need_reset = false;
+	int old_queue_pairs;
+	int ret = -EINVAL;
+	u16 mode;
+	int i;
+
+	old_queue_pairs = vsi->num_queue_pairs;
+	num_tc = mqprio_qopt->qopt.num_tc;
+	hw = mqprio_qopt->qopt.hw;
+	mode = mqprio_qopt->mode;
+	if (!hw) {
+		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+		goto config_tc;
+	}
+
+	/* Check if MFP enabled */
+	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+		netdev_info(netdev,
+			    "Configuring TC not supported in MFP mode\n");
+		return ret;
+	}
+	switch (mode) {
+	case TC_MQPRIO_MODE_DCB:
+		pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+
+		/* Check if DCB enabled to continue */
+		if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+			netdev_info(netdev,
+				    "DCB is not enabled for adapter\n");
+			return ret;
+		}
+
+		/* Check whether tc count is within enabled limit */
+		if (num_tc > i40e_pf_get_num_tc(pf)) {
+			netdev_info(netdev,
+				    "TC count greater than enabled on link for adapter\n");
+			return ret;
+		}
+		break;
+	case TC_MQPRIO_MODE_CHANNEL:
+		if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+			netdev_info(netdev,
+				    "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
+			return ret;
+		}
+		if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+			return ret;
+		ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
+		if (ret)
+			return ret;
+		memcpy(&vsi->mqprio_qopt, mqprio_qopt,
+		       sizeof(*mqprio_qopt));
+		pf->flags |= I40E_FLAG_TC_MQPRIO;
+		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+config_tc:
+	/* Generate TC map for number of tc requested */
+	for (i = 0; i < num_tc; i++)
+		enabled_tc |= BIT(i);
+
+	/* Requesting same TC configuration as already enabled */
+	if (enabled_tc == vsi->tc_config.enabled_tc &&
+	    mode != TC_MQPRIO_MODE_CHANNEL)
+		return 0;
+
+	/* Quiesce VSI queues */
+	i40e_quiesce_vsi(vsi);
+
+	if (!hw && !i40e_is_tc_mqprio_enabled(pf))
+		i40e_remove_queue_channels(vsi);
+
+	/* Configure VSI for enabled TCs */
+	ret = i40e_vsi_config_tc(vsi, enabled_tc);
+	if (ret) {
+		netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
+			    vsi->seid);
+		need_reset = true;
+		goto exit;
+	} else if (enabled_tc &&
+		   (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) {
+		netdev_info(netdev,
+			    "Failed to create channel. Override queues (%u) not power of 2\n",
+			    vsi->tc_config.tc_info[0].qcount);
+		ret = -EINVAL;
+		need_reset = true;
+		goto exit;
+	}
+
+	dev_info(&vsi->back->pdev->dev,
+		 "Setup channel (id:%u) utilizing num_queues %d\n",
+		 vsi->seid, vsi->tc_config.tc_info[0].qcount);
+
+	if (i40e_is_tc_mqprio_enabled(pf)) {
+		if (vsi->mqprio_qopt.max_rate[0]) {
+			u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
+						  vsi->mqprio_qopt.max_rate[0]);
+
+			ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+			if (!ret) {
+				u64 credits = max_tx_rate;
+
+				do_div(credits, I40E_BW_CREDIT_DIVISOR);
+				dev_dbg(&vsi->back->pdev->dev,
+					"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+					max_tx_rate,
+					credits,
+					vsi->seid);
+			} else {
+				need_reset = true;
+				goto exit;
+			}
+		}
+		ret = i40e_configure_queue_channels(vsi);
+		if (ret) {
+			vsi->num_queue_pairs = old_queue_pairs;
+			netdev_info(netdev,
+				    "Failed configuring queue channels\n");
+			need_reset = true;
+			goto exit;
+		}
+	}
+
+exit:
+	/* Reset the configuration data to defaults, only TC0 is enabled */
+	if (need_reset) {
+		i40e_vsi_set_default_tc_config(vsi);
+		need_reset = false;
+	}
+
+	/* Unquiesce VSI */
+	i40e_unquiesce_vsi(vsi);
+	return ret;
+}
+
+/**
+ * i40e_set_cld_element - sets cloud filter element data
+ * @filter: cloud filter rule
+ * @cld: ptr to cloud filter element data
+ *
+ * This is helper function to copy data into cloud filter element
+ **/
+static inline void
+i40e_set_cld_element(struct i40e_cloud_filter *filter,
+		     struct i40e_aqc_cloud_filters_element_data *cld)
+{
+	u32 ipa;
+	int i;
+
+	memset(cld, 0, sizeof(*cld));
+	ether_addr_copy(cld->outer_mac, filter->dst_mac);
+	ether_addr_copy(cld->inner_mac, filter->src_mac);
+
+	if (filter->n_proto != ETH_P_IP && filter->n_proto != ETH_P_IPV6)
+		return;
+
+	if (filter->n_proto == ETH_P_IPV6) {
+#define IPV6_MAX_INDEX	(ARRAY_SIZE(filter->dst_ipv6) - 1)
+		for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) {
+			ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
+
+			*(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa);
+		}
+	} else {
+		ipa = be32_to_cpu(filter->dst_ipv4);
+
+		memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
+	}
+
+	cld->inner_vlan = cpu_to_le16(ntohs(filter->vlan_id));
+
+	/* tenant_id is not supported by FW now, once the support is enabled
+	 * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
+	 */
+	if (filter->tenant_id)
+		return;
+}
+
+/**
+ * i40e_add_del_cloud_filter - Add/del cloud filter
+ * @vsi: pointer to VSI
+ * @filter: cloud filter rule
+ * @add: if true, add, if false, delete
+ *
+ * Add or delete a cloud filter for a specific flow spec.
+ * Returns 0 if the filter were successfully added.
+ **/
+int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
+			      struct i40e_cloud_filter *filter, bool add)
+{
+	struct i40e_aqc_cloud_filters_element_data cld_filter;
+	struct i40e_pf *pf = vsi->back;
+	int ret;
+	static const u16 flag_table[128] = {
+		[I40E_CLOUD_FILTER_FLAGS_OMAC]  =
+			I40E_AQC_ADD_CLOUD_FILTER_OMAC,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC]  =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN]  =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
+		[I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
+			I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
+		[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
+			I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
+		[I40E_CLOUD_FILTER_FLAGS_IIP] =
+			I40E_AQC_ADD_CLOUD_FILTER_IIP,
+	};
+
+	if (filter->flags >= ARRAY_SIZE(flag_table))
+		return -EIO;
+
+	memset(&cld_filter, 0, sizeof(cld_filter));
+
+	/* copy element needed to add cloud filter from filter */
+	i40e_set_cld_element(filter, &cld_filter);
+
+	if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
+		cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
+					     I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
+
+	if (filter->n_proto == ETH_P_IPV6)
+		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+						I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+	else
+		cld_filter.flags |= cpu_to_le16(flag_table[filter->flags] |
+						I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
+
+	if (add)
+		ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
+						&cld_filter, 1);
+	else
+		ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
+						&cld_filter, 1);
+	if (ret)
+		dev_dbg(&pf->pdev->dev,
+			"Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
+			add ? "add" : "delete", filter->dst_port, ret,
+			pf->hw.aq.asq_last_status);
+	else
+		dev_info(&pf->pdev->dev,
+			 "%s cloud filter for VSI: %d\n",
+			 add ? "Added" : "Deleted", filter->seid);
+	return ret;
+}
+
+/**
+ * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf
+ * @vsi: pointer to VSI
+ * @filter: cloud filter rule
+ * @add: if true, add, if false, delete
+ *
+ * Add or delete a cloud filter for a specific flow spec using big buffer.
+ * Returns 0 if the filter were successfully added.
+ **/
+int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
+				      struct i40e_cloud_filter *filter,
+				      bool add)
+{
+	struct i40e_aqc_cloud_filters_element_bb cld_filter;
+	struct i40e_pf *pf = vsi->back;
+	int ret;
+
+	/* Both (src/dst) valid mac_addr are not supported */
+	if ((is_valid_ether_addr(filter->dst_mac) &&
+	     is_valid_ether_addr(filter->src_mac)) ||
+	    (is_multicast_ether_addr(filter->dst_mac) &&
+	     is_multicast_ether_addr(filter->src_mac)))
+		return -EOPNOTSUPP;
+
+	/* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP
+	 * ports are not supported via big buffer now.
+	 */
+	if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP)
+		return -EOPNOTSUPP;
+
+	/* adding filter using src_port/src_ip is not supported at this stage */
+	if (filter->src_port ||
+	    (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) ||
+	    !ipv6_addr_any(&filter->ip.v6.src_ip6))
+		return -EOPNOTSUPP;
+
+	memset(&cld_filter, 0, sizeof(cld_filter));
+
+	/* copy element needed to add cloud filter from filter */
+	i40e_set_cld_element(filter, &cld_filter.element);
+
+	if (is_valid_ether_addr(filter->dst_mac) ||
+	    is_valid_ether_addr(filter->src_mac) ||
+	    is_multicast_ether_addr(filter->dst_mac) ||
+	    is_multicast_ether_addr(filter->src_mac)) {
+		/* MAC + IP : unsupported mode */
+		if (filter->dst_ipv4)
+			return -EOPNOTSUPP;
+
+		/* since we validated that L4 port must be valid before
+		 * we get here, start with respective "flags" value
+		 * and update if vlan is present or not
+		 */
+		cld_filter.element.flags =
+			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
+
+		if (filter->vlan_id) {
+			cld_filter.element.flags =
+			cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
+		}
+
+	} else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) ||
+		   !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
+		cld_filter.element.flags =
+				cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
+		if (filter->n_proto == ETH_P_IPV6)
+			cld_filter.element.flags |=
+				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6);
+		else
+			cld_filter.element.flags |=
+				cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"either mac or ip has to be valid for cloud filter\n");
+		return -EINVAL;
+	}
+
+	/* Now copy L4 port in Byte 6..7 in general fields */
+	cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
+						be16_to_cpu(filter->dst_port);
+
+	if (add) {
+		/* Validate current device switch mode, change if necessary */
+		ret = i40e_validate_and_set_switch_mode(vsi);
+		if (ret) {
+			dev_err(&pf->pdev->dev,
+				"failed to set switch mode, ret %d\n",
+				ret);
+			return ret;
+		}
+
+		ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
+						   &cld_filter, 1);
+	} else {
+		ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
+						   &cld_filter, 1);
+	}
+
+	if (ret)
+		dev_dbg(&pf->pdev->dev,
+			"Failed to %s cloud filter(big buffer) err %d aq_err %d\n",
+			add ? "add" : "delete", ret, pf->hw.aq.asq_last_status);
+	else
+		dev_info(&pf->pdev->dev,
+			 "%s cloud filter for VSI: %d, L4 port: %d\n",
+			 add ? "add" : "delete", filter->seid,
+			 ntohs(filter->dst_port));
+	return ret;
+}
+
+/**
+ * i40e_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @vsi: Pointer to VSI
+ * @f: Pointer to struct flow_cls_offload
+ * @filter: Pointer to cloud filter structure
+ *
+ **/
+static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
+				 struct flow_cls_offload *f,
+				 struct i40e_cloud_filter *filter)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
+	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
+	struct i40e_pf *pf = vsi->back;
+	u8 field_flags = 0;
+
+	if (dissector->used_keys &
+	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+		dev_err(&pf->pdev->dev, "Unsupported key used: 0x%llx\n",
+			dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+
+		flow_rule_match_enc_keyid(rule, &match);
+		if (match.mask->keyid != 0)
+			field_flags |= I40E_CLOUD_FIELD_TEN_ID;
+
+		filter->tenant_id = be32_to_cpu(match.key->keyid);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+		filter->n_proto = n_proto_key & n_proto_mask;
+		filter->ip_proto = match.key->ip_proto;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+
+		/* use is_broadcast and is_zero to check for all 0xf or 0 */
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (is_broadcast_ether_addr(match.mask->dst)) {
+				field_flags |= I40E_CLOUD_FIELD_OMAC;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
+					match.mask->dst);
+				return -EIO;
+			}
+		}
+
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (is_broadcast_ether_addr(match.mask->src)) {
+				field_flags |= I40E_CLOUD_FIELD_IMAC;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
+					match.mask->src);
+				return -EIO;
+			}
+		}
+		ether_addr_copy(filter->dst_mac, match.key->dst);
+		ether_addr_copy(filter->src_mac, match.key->src);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
+				field_flags |= I40E_CLOUD_FIELD_IVLAN;
+
+			} else {
+				dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
+					match.mask->vlan_id);
+				return -EIO;
+			}
+		}
+
+		filter->vlan_id = cpu_to_be16(match.key->vlan_id);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
+					&match.mask->dst);
+				return -EIO;
+			}
+		}
+
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be32(0xffffffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
+					&match.mask->src);
+				return -EIO;
+			}
+		}
+
+		if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
+			dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
+			return -EIO;
+		}
+		filter->dst_ipv4 = match.key->dst;
+		filter->src_ipv4 = match.key->src;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+
+		/* src and dest IPV6 address should not be LOOPBACK
+		 * (0:0:0:0:0:0:0:1), which can be represented as ::1
+		 */
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
+			dev_err(&pf->pdev->dev,
+				"Bad ipv6, addr is LOOPBACK\n");
+			return -EIO;
+		}
+		if (!ipv6_addr_any(&match.mask->dst) ||
+		    !ipv6_addr_any(&match.mask->src))
+			field_flags |= I40E_CLOUD_FIELD_IIP;
+
+		memcpy(&filter->src_ipv6, &match.key->src.s6_addr32,
+		       sizeof(filter->src_ipv6));
+		memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32,
+		       sizeof(filter->dst_ipv6));
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be16(0xffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
+					be16_to_cpu(match.mask->src));
+				return -EIO;
+			}
+		}
+
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be16(0xffff)) {
+				field_flags |= I40E_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
+					be16_to_cpu(match.mask->dst));
+				return -EIO;
+			}
+		}
+
+		filter->dst_port = match.key->dst;
+		filter->src_port = match.key->src;
+
+		switch (filter->ip_proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+			break;
+		default:
+			dev_err(&pf->pdev->dev,
+				"Only UDP and TCP transport are supported\n");
+			return -EINVAL;
+		}
+	}
+	filter->flags = field_flags;
+	return 0;
+}
+
+/**
+ * i40e_handle_tclass: Forward to a traffic class on the device
+ * @vsi: Pointer to VSI
+ * @tc: traffic class index on the device
+ * @filter: Pointer to cloud filter structure
+ *
+ **/
+static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc,
+			      struct i40e_cloud_filter *filter)
+{
+	struct i40e_channel *ch, *ch_tmp;
+
+	/* direct to a traffic class on the same device */
+	if (tc == 0) {
+		filter->seid = vsi->seid;
+		return 0;
+	} else if (vsi->tc_config.enabled_tc & BIT(tc)) {
+		if (!filter->dst_port) {
+			dev_err(&vsi->back->pdev->dev,
+				"Specify destination port to direct to traffic class that is not default\n");
+			return -EINVAL;
+		}
+		if (list_empty(&vsi->ch_list))
+			return -EINVAL;
+		list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
+					 list) {
+			if (ch->seid == vsi->tc_seid_map[tc])
+				filter->seid = ch->seid;
+		}
+		return 0;
+	}
+	dev_err(&vsi->back->pdev->dev, "TC is not enabled\n");
+	return -EINVAL;
+}
+
+/**
+ * i40e_configure_clsflower - Configure tc flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct flow_cls_offload
+ *
+ **/
+static int i40e_configure_clsflower(struct i40e_vsi *vsi,
+				    struct flow_cls_offload *cls_flower)
+{
+	int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+	struct i40e_cloud_filter *filter = NULL;
+	struct i40e_pf *pf = vsi->back;
+	int err = 0;
+
+	if (tc < 0) {
+		dev_err(&vsi->back->pdev->dev, "Invalid traffic class\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!tc) {
+		dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination");
+		return -EINVAL;
+	}
+
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+	    test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
+		return -EBUSY;
+
+	if (pf->fdir_pf_active_filters ||
+	    (!hlist_empty(&pf->fdir_filter_list))) {
+		dev_err(&vsi->back->pdev->dev,
+			"Flow Director Sideband filters exists, turn ntuple off to configure cloud filters\n");
+		return -EINVAL;
+	}
+
+	if (vsi->back->flags & I40E_FLAG_FD_SB_ENABLED) {
+		dev_err(&vsi->back->pdev->dev,
+			"Disable Flow Director Sideband, configuring Cloud filters via tc-flower\n");
+		vsi->back->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		vsi->back->flags |= I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->cookie = cls_flower->cookie;
+
+	err = i40e_parse_cls_flower(vsi, cls_flower, filter);
+	if (err < 0)
+		goto err;
+
+	err = i40e_handle_tclass(vsi, tc, filter);
+	if (err < 0)
+		goto err;
+
+	/* Add cloud filter */
+	if (filter->dst_port)
+		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, true);
+	else
+		err = i40e_add_del_cloud_filter(vsi, filter, true);
+
+	if (err) {
+		dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n",
+			err);
+		goto err;
+	}
+
+	/* add filter to the ordered list */
+	INIT_HLIST_NODE(&filter->cloud_node);
+
+	hlist_add_head(&filter->cloud_node, &pf->cloud_filter_list);
+
+	pf->num_cloud_filters++;
+
+	return err;
+err:
+	kfree(filter);
+	return err;
+}
+
+/**
+ * i40e_find_cloud_filter - Find the could filter in the list
+ * @vsi: Pointer to VSI
+ * @cookie: filter specific cookie
+ *
+ **/
+static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi,
+							unsigned long *cookie)
+{
+	struct i40e_cloud_filter *filter = NULL;
+	struct hlist_node *node2;
+
+	hlist_for_each_entry_safe(filter, node2,
+				  &vsi->back->cloud_filter_list, cloud_node)
+		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+			return filter;
+	return NULL;
+}
+
+/**
+ * i40e_delete_clsflower - Remove tc flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct flow_cls_offload
+ *
+ **/
+static int i40e_delete_clsflower(struct i40e_vsi *vsi,
+				 struct flow_cls_offload *cls_flower)
+{
+	struct i40e_cloud_filter *filter = NULL;
+	struct i40e_pf *pf = vsi->back;
+	int err = 0;
+
+	filter = i40e_find_cloud_filter(vsi, &cls_flower->cookie);
+
+	if (!filter)
+		return -EINVAL;
+
+	hash_del(&filter->cloud_node);
+
+	if (filter->dst_port)
+		err = i40e_add_del_cloud_filter_big_buf(vsi, filter, false);
+	else
+		err = i40e_add_del_cloud_filter(vsi, filter, false);
+
+	kfree(filter);
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"Failed to delete cloud filter, err %pe\n",
+			ERR_PTR(err));
+		return i40e_aq_rc_to_posix(err, pf->hw.aq.asq_last_status);
+	}
+
+	pf->num_cloud_filters--;
+	if (!pf->num_cloud_filters)
+		if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+		    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+			pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+		}
+	return 0;
+}
+
+/**
+ * i40e_setup_tc_cls_flower - flower classifier offloads
+ * @np: net device to configure
+ * @cls_flower: offload data
+ **/
+static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
+				    struct flow_cls_offload *cls_flower)
+{
+	struct i40e_vsi *vsi = np->vsi;
+
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return i40e_configure_clsflower(vsi, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return i40e_delete_clsflower(vsi, cls_flower);
+	case FLOW_CLS_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				  void *cb_priv)
+{
+	struct i40e_netdev_priv *np = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(np->vsi->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return i40e_setup_tc_cls_flower(np, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(i40e_block_cb_list);
+
+static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			   void *type_data)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return i40e_setup_tc(netdev, type_data);
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple(type_data,
+						  &i40e_block_cb_list,
+						  i40e_setup_tc_block_cb,
+						  np, np, true);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * i40e_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog subtask is
+ * enabled, and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+int i40e_open(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	int err;
+
+	/* disallow open during test or if eeprom is broken */
+	if (test_bit(__I40E_TESTING, pf->state) ||
+	    test_bit(__I40E_BAD_EEPROM, pf->state))
+		return -EBUSY;
+
+	netif_carrier_off(netdev);
+
+	if (i40e_force_link_state(pf, true))
+		return -EAGAIN;
+
+	err = i40e_vsi_open(vsi);
+	if (err)
+		return err;
+
+	/* configure global TSO hardware offload settings */
+	wr32(&pf->hw, I40E_GLLAN_TSOMSK_F, be32_to_cpu(TCP_FLAG_PSH |
+						       TCP_FLAG_FIN) >> 16);
+	wr32(&pf->hw, I40E_GLLAN_TSOMSK_M, be32_to_cpu(TCP_FLAG_PSH |
+						       TCP_FLAG_FIN |
+						       TCP_FLAG_CWR) >> 16);
+	wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16);
+	udp_tunnel_get_rx_info(netdev);
+
+	return 0;
+}
+
+/**
+ * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues
+ * @vsi: vsi structure
+ *
+ * This updates netdev's number of tx/rx queues
+ *
+ * Returns status of setting tx/rx queues
+ **/
+static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi)
+{
+	int ret;
+
+	ret = netif_set_real_num_rx_queues(vsi->netdev,
+					   vsi->num_queue_pairs);
+	if (ret)
+		return ret;
+
+	return netif_set_real_num_tx_queues(vsi->netdev,
+					    vsi->num_queue_pairs);
+}
+
+/**
+ * i40e_vsi_open -
+ * @vsi: the VSI to open
+ *
+ * Finish initialization of the VSI.
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * Note: expects to be called while under rtnl_lock()
+ **/
+int i40e_vsi_open(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	char int_name[I40E_INT_NAME_STR_LEN];
+	int err;
+
+	/* allocate descriptors */
+	err = i40e_vsi_setup_tx_resources(vsi);
+	if (err)
+		goto err_setup_tx;
+	err = i40e_vsi_setup_rx_resources(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = i40e_vsi_configure(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	if (vsi->netdev) {
+		snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+			 dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
+		err = i40e_vsi_request_irq(vsi, int_name);
+		if (err)
+			goto err_setup_rx;
+
+		/* Notify the stack of the actual queue counts. */
+		err = i40e_netif_set_realnum_tx_rx_queues(vsi);
+		if (err)
+			goto err_set_queues;
+
+	} else if (vsi->type == I40E_VSI_FDIR) {
+		snprintf(int_name, sizeof(int_name) - 1, "%s-%s:fdir",
+			 dev_driver_string(&pf->pdev->dev),
+			 dev_name(&pf->pdev->dev));
+		err = i40e_vsi_request_irq(vsi, int_name);
+		if (err)
+			goto err_setup_rx;
+
+	} else {
+		err = -EINVAL;
+		goto err_setup_rx;
+	}
+
+	err = i40e_up_complete(vsi);
+	if (err)
+		goto err_up_complete;
+
+	return 0;
+
+err_up_complete:
+	i40e_down(vsi);
+err_set_queues:
+	i40e_vsi_free_irq(vsi);
+err_setup_rx:
+	i40e_vsi_free_rx_resources(vsi);
+err_setup_tx:
+	i40e_vsi_free_tx_resources(vsi);
+	if (vsi == pf->vsi[pf->lan_vsi])
+		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+
+	return err;
+}
+
+/**
+ * i40e_fdir_filter_exit - Cleans up the Flow Director accounting
+ * @pf: Pointer to PF
+ *
+ * This function destroys the hlist where all the Flow Director
+ * filters were saved.
+ **/
+static void i40e_fdir_filter_exit(struct i40e_pf *pf)
+{
+	struct i40e_fdir_filter *filter;
+	struct i40e_flex_pit *pit_entry, *tmp;
+	struct hlist_node *node2;
+
+	hlist_for_each_entry_safe(filter, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		hlist_del(&filter->fdir_node);
+		kfree(filter);
+	}
+
+	list_for_each_entry_safe(pit_entry, tmp, &pf->l3_flex_pit_list, list) {
+		list_del(&pit_entry->list);
+		kfree(pit_entry);
+	}
+	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+
+	list_for_each_entry_safe(pit_entry, tmp, &pf->l4_flex_pit_list, list) {
+		list_del(&pit_entry->list);
+		kfree(pit_entry);
+	}
+	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+
+	pf->fdir_pf_active_filters = 0;
+	i40e_reset_fdir_filter_cnt(pf);
+
+	/* Reprogram the default input set for TCP/IPv4 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+	/* Reprogram the default input set for TCP/IPv6 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_TCP,
+				I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
+				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+	/* Reprogram the default input set for UDP/IPv4 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_UDP,
+				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+	/* Reprogram the default input set for UDP/IPv6 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_UDP,
+				I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
+				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+	/* Reprogram the default input set for SCTP/IPv4 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP,
+				I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+	/* Reprogram the default input set for SCTP/IPv6 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_SCTP,
+				I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
+				I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+	/* Reprogram the default input set for Other/IPv4 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
+				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
+
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4,
+				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
+
+	/* Reprogram the default input set for Other/IPv6 */
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV6_OTHER,
+				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
+
+	i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV6,
+				I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
+}
+
+/**
+ * i40e_cloud_filter_exit - Cleans up the cloud filters
+ * @pf: Pointer to PF
+ *
+ * This function destroys the hlist where all the cloud filters
+ * were saved.
+ **/
+static void i40e_cloud_filter_exit(struct i40e_pf *pf)
+{
+	struct i40e_cloud_filter *cfilter;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(cfilter, node,
+				  &pf->cloud_filter_list, cloud_node) {
+		hlist_del(&cfilter->cloud_node);
+		kfree(cfilter);
+	}
+	pf->num_cloud_filters = 0;
+
+	if ((pf->flags & I40E_FLAG_FD_SB_TO_CLOUD_FILTER) &&
+	    !(pf->flags & I40E_FLAG_FD_SB_INACTIVE)) {
+		pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+		pf->flags &= ~I40E_FLAG_FD_SB_TO_CLOUD_FILTER;
+		pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+	}
+}
+
+/**
+ * i40e_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the driver's control, but
+ * this netdev interface is disabled.
+ *
+ * Returns 0, this is not allowed to fail
+ **/
+int i40e_close(struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	i40e_vsi_close(vsi);
+
+	return 0;
+}
+
+/**
+ * i40e_do_reset - Start a PF or Core Reset sequence
+ * @pf: board private structure
+ * @reset_flags: which reset is requested
+ * @lock_acquired: indicates whether or not the lock has been acquired
+ * before this function was called.
+ *
+ * The essential difference in resets is that the PF Reset
+ * doesn't clear the packet buffers, doesn't reset the PE
+ * firmware, and doesn't bother the other PFs on the chip.
+ **/
+void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
+{
+	u32 val;
+
+	/* do the biggest reset indicated */
+	if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) {
+
+		/* Request a Global Reset
+		 *
+		 * This will start the chip's countdown to the actual full
+		 * chip reset event, and a warning interrupt to be sent
+		 * to all PFs, including the requestor.  Our handler
+		 * for the warning interrupt will deal with the shutdown
+		 * and recovery of the switch setup.
+		 */
+		dev_dbg(&pf->pdev->dev, "GlobalR requested\n");
+		val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
+		val |= I40E_GLGEN_RTRIG_GLOBR_MASK;
+		wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
+
+	} else if (reset_flags & BIT_ULL(__I40E_CORE_RESET_REQUESTED)) {
+
+		/* Request a Core Reset
+		 *
+		 * Same as Global Reset, except does *not* include the MAC/PHY
+		 */
+		dev_dbg(&pf->pdev->dev, "CoreR requested\n");
+		val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
+		val |= I40E_GLGEN_RTRIG_CORER_MASK;
+		wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
+		i40e_flush(&pf->hw);
+
+	} else if (reset_flags & I40E_PF_RESET_FLAG) {
+
+		/* Request a PF Reset
+		 *
+		 * Resets only the PF-specific registers
+		 *
+		 * This goes directly to the tear-down and rebuild of
+		 * the switch, since we need to do all the recovery as
+		 * for the Core Reset.
+		 */
+		dev_dbg(&pf->pdev->dev, "PFR requested\n");
+		i40e_handle_reset_warning(pf, lock_acquired);
+
+	} else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) {
+		/* Request a PF Reset
+		 *
+		 * Resets PF and reinitializes PFs VSI.
+		 */
+		i40e_prep_for_reset(pf);
+		i40e_reset_and_rebuild(pf, true, lock_acquired);
+		dev_info(&pf->pdev->dev,
+			 pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
+			 "FW LLDP is disabled\n" :
+			 "FW LLDP is enabled\n");
+
+	} else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
+		int v;
+
+		/* Find the VSI(s) that requested a re-init */
+		dev_info(&pf->pdev->dev,
+			 "VSI reinit requested\n");
+		for (v = 0; v < pf->num_alloc_vsi; v++) {
+			struct i40e_vsi *vsi = pf->vsi[v];
+
+			if (vsi != NULL &&
+			    test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED,
+					       vsi->state))
+				i40e_vsi_reinit_locked(pf->vsi[v]);
+		}
+	} else if (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) {
+		int v;
+
+		/* Find the VSI(s) that needs to be brought down */
+		dev_info(&pf->pdev->dev, "VSI down requested\n");
+		for (v = 0; v < pf->num_alloc_vsi; v++) {
+			struct i40e_vsi *vsi = pf->vsi[v];
+
+			if (vsi != NULL &&
+			    test_and_clear_bit(__I40E_VSI_DOWN_REQUESTED,
+					       vsi->state)) {
+				set_bit(__I40E_VSI_DOWN, vsi->state);
+				i40e_down(vsi);
+			}
+		}
+	} else {
+		dev_info(&pf->pdev->dev,
+			 "bad reset request 0x%08x\n", reset_flags);
+	}
+}
+
+#ifdef CONFIG_I40E_DCB
+/**
+ * i40e_dcb_need_reconfig - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ **/
+bool i40e_dcb_need_reconfig(struct i40e_pf *pf,
+			    struct i40e_dcbx_config *old_cfg,
+			    struct i40e_dcbx_config *new_cfg)
+{
+	bool need_reconfig = false;
+
+	/* Check if ETS configuration has changed */
+	if (memcmp(&new_cfg->etscfg,
+		   &old_cfg->etscfg,
+		   sizeof(new_cfg->etscfg))) {
+		/* If Priority Table has changed reconfig is needed */
+		if (memcmp(&new_cfg->etscfg.prioritytable,
+			   &old_cfg->etscfg.prioritytable,
+			   sizeof(new_cfg->etscfg.prioritytable))) {
+			need_reconfig = true;
+			dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
+		}
+
+		if (memcmp(&new_cfg->etscfg.tcbwtable,
+			   &old_cfg->etscfg.tcbwtable,
+			   sizeof(new_cfg->etscfg.tcbwtable)))
+			dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+
+		if (memcmp(&new_cfg->etscfg.tsatable,
+			   &old_cfg->etscfg.tsatable,
+			   sizeof(new_cfg->etscfg.tsatable)))
+			dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
+	}
+
+	/* Check if PFC configuration has changed */
+	if (memcmp(&new_cfg->pfc,
+		   &old_cfg->pfc,
+		   sizeof(new_cfg->pfc))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
+	}
+
+	/* Check if APP Table has changed */
+	if (memcmp(&new_cfg->app,
+		   &old_cfg->app,
+		   sizeof(new_cfg->app))) {
+		need_reconfig = true;
+		dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
+	}
+
+	dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
+	return need_reconfig;
+}
+
+/**
+ * i40e_handle_lldp_event - Handle LLDP Change MIB event
+ * @pf: board private structure
+ * @e: event info posted on ARQ
+ **/
+static int i40e_handle_lldp_event(struct i40e_pf *pf,
+				  struct i40e_arq_event_info *e)
+{
+	struct i40e_aqc_lldp_get_mib *mib =
+		(struct i40e_aqc_lldp_get_mib *)&e->desc.params.raw;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_dcbx_config tmp_dcbx_cfg;
+	bool need_reconfig = false;
+	int ret = 0;
+	u8 type;
+
+	/* X710-T*L 2.5G and 5G speeds don't support DCB */
+	if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
+	    (hw->phy.link_info.link_speed &
+	     ~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) &&
+	     !(pf->flags & I40E_FLAG_DCB_CAPABLE))
+		/* let firmware decide if the DCB should be disabled */
+		pf->flags |= I40E_FLAG_DCB_CAPABLE;
+
+	/* Not DCB capable or capability disabled */
+	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+		return ret;
+
+	/* Ignore if event is not for Nearest Bridge */
+	type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
+		& I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+	dev_dbg(&pf->pdev->dev, "LLDP event mib bridge type 0x%x\n", type);
+	if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE)
+		return ret;
+
+	/* Check MIB Type and return if event for Remote MIB update */
+	type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK;
+	dev_dbg(&pf->pdev->dev,
+		"LLDP event mib type %s\n", type ? "remote" : "local");
+	if (type == I40E_AQ_LLDP_MIB_REMOTE) {
+		/* Update the remote cached instance and return */
+		ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
+				I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
+				&hw->remote_dcbx_config);
+		goto exit;
+	}
+
+	/* Store the old configuration */
+	tmp_dcbx_cfg = hw->local_dcbx_config;
+
+	/* Reset the old DCBx configuration data */
+	memset(&hw->local_dcbx_config, 0, sizeof(hw->local_dcbx_config));
+	/* Get updated DCBX data from firmware */
+	ret = i40e_get_dcb_config(&pf->hw);
+	if (ret) {
+		/* X710-T*L 2.5G and 5G speeds don't support DCB */
+		if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
+		    (hw->phy.link_info.link_speed &
+		     (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
+			dev_warn(&pf->pdev->dev,
+				 "DCB is not supported for X710-T*L 2.5/5G speeds\n");
+			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "Failed querying DCB configuration data from firmware, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+		}
+		goto exit;
+	}
+
+	/* No change detected in DCBX configs */
+	if (!memcmp(&tmp_dcbx_cfg, &hw->local_dcbx_config,
+		    sizeof(tmp_dcbx_cfg))) {
+		dev_dbg(&pf->pdev->dev, "No change detected in DCBX configuration.\n");
+		goto exit;
+	}
+
+	need_reconfig = i40e_dcb_need_reconfig(pf, &tmp_dcbx_cfg,
+					       &hw->local_dcbx_config);
+
+	i40e_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &hw->local_dcbx_config);
+
+	if (!need_reconfig)
+		goto exit;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
+		pf->flags |= I40E_FLAG_DCB_ENABLED;
+	else
+		pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+
+	set_bit(__I40E_PORT_SUSPENDED, pf->state);
+	/* Reconfiguration needed quiesce all VSIs */
+	i40e_pf_quiesce_all_vsi(pf);
+
+	/* Changes in configuration update VEB/VSI */
+	i40e_dcb_reconfigure(pf);
+
+	ret = i40e_resume_port_tx(pf);
+
+	clear_bit(__I40E_PORT_SUSPENDED, pf->state);
+	/* In case of error no point in resuming VSIs */
+	if (ret)
+		goto exit;
+
+	/* Wait for the PF's queues to be disabled */
+	ret = i40e_pf_wait_queues_disabled(pf);
+	if (ret) {
+		/* Schedule PF reset to recover */
+		set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+		i40e_service_event_schedule(pf);
+	} else {
+		i40e_pf_unquiesce_all_vsi(pf);
+		set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
+		set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
+	}
+
+exit:
+	return ret;
+}
+#endif /* CONFIG_I40E_DCB */
+
+/**
+ * i40e_do_reset_safe - Protected reset path for userland calls.
+ * @pf: board private structure
+ * @reset_flags: which reset is requested
+ *
+ **/
+void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags)
+{
+	rtnl_lock();
+	i40e_do_reset(pf, reset_flags, true);
+	rtnl_unlock();
+}
+
+/**
+ * i40e_handle_lan_overflow_event - Handler for LAN queue overflow event
+ * @pf: board private structure
+ * @e: event info posted on ARQ
+ *
+ * Handler for LAN Queue Overflow Event generated by the firmware for PF
+ * and VF queues
+ **/
+static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
+					   struct i40e_arq_event_info *e)
+{
+	struct i40e_aqc_lan_overflow *data =
+		(struct i40e_aqc_lan_overflow *)&e->desc.params.raw;
+	u32 queue = le32_to_cpu(data->prtdcb_rupto);
+	u32 qtx_ctl = le32_to_cpu(data->otx_ctl);
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vf *vf;
+	u16 vf_id;
+
+	dev_dbg(&pf->pdev->dev, "overflow Rx Queue Number = %d QTX_CTL=0x%08x\n",
+		queue, qtx_ctl);
+
+	/* Queue belongs to VF, find the VF and issue VF reset */
+	if (((qtx_ctl & I40E_QTX_CTL_PFVF_Q_MASK)
+	    >> I40E_QTX_CTL_PFVF_Q_SHIFT) == I40E_QTX_CTL_VF_QUEUE) {
+		vf_id = (u16)((qtx_ctl & I40E_QTX_CTL_VFVM_INDX_MASK)
+			 >> I40E_QTX_CTL_VFVM_INDX_SHIFT);
+		vf_id -= hw->func_caps.vf_base_id;
+		vf = &pf->vf[vf_id];
+		i40e_vc_notify_vf_reset(vf);
+		/* Allow VF to process pending reset notification */
+		msleep(20);
+		i40e_reset_vf(vf, false);
+	}
+}
+
+/**
+ * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters
+ * @pf: board private structure
+ **/
+u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf)
+{
+	u32 val, fcnt_prog;
+
+	val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
+	fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK);
+	return fcnt_prog;
+}
+
+/**
+ * i40e_get_current_fd_count - Get total FD filters programmed for this PF
+ * @pf: board private structure
+ **/
+u32 i40e_get_current_fd_count(struct i40e_pf *pf)
+{
+	u32 val, fcnt_prog;
+
+	val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
+	fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) +
+		    ((val & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >>
+		      I40E_PFQF_FDSTAT_BEST_CNT_SHIFT);
+	return fcnt_prog;
+}
+
+/**
+ * i40e_get_global_fd_count - Get total FD filters programmed on device
+ * @pf: board private structure
+ **/
+u32 i40e_get_global_fd_count(struct i40e_pf *pf)
+{
+	u32 val, fcnt_prog;
+
+	val = rd32(&pf->hw, I40E_GLQF_FDCNT_0);
+	fcnt_prog = (val & I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK) +
+		    ((val & I40E_GLQF_FDCNT_0_BESTCNT_MASK) >>
+		     I40E_GLQF_FDCNT_0_BESTCNT_SHIFT);
+	return fcnt_prog;
+}
+
+/**
+ * i40e_reenable_fdir_sb - Restore FDir SB capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_sb(struct i40e_pf *pf)
+{
+	if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state))
+		if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
+}
+
+/**
+ * i40e_reenable_fdir_atr - Restore FDir ATR capability
+ * @pf: board private structure
+ **/
+static void i40e_reenable_fdir_atr(struct i40e_pf *pf)
+{
+	if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) {
+		/* ATR uses the same filtering logic as SB rules. It only
+		 * functions properly if the input set mask is at the default
+		 * settings. It is safe to restore the default input set
+		 * because there are no active TCPv4 filter rules.
+		 */
+		i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_TCP,
+					I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
+					I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
+
+		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
+	}
+}
+
+/**
+ * i40e_delete_invalid_filter - Delete an invalid FDIR filter
+ * @pf: board private structure
+ * @filter: FDir filter to remove
+ */
+static void i40e_delete_invalid_filter(struct i40e_pf *pf,
+				       struct i40e_fdir_filter *filter)
+{
+	/* Update counters */
+	pf->fdir_pf_active_filters--;
+	pf->fd_inv = 0;
+
+	switch (filter->flow_type) {
+	case TCP_V4_FLOW:
+		pf->fd_tcp4_filter_cnt--;
+		break;
+	case UDP_V4_FLOW:
+		pf->fd_udp4_filter_cnt--;
+		break;
+	case SCTP_V4_FLOW:
+		pf->fd_sctp4_filter_cnt--;
+		break;
+	case TCP_V6_FLOW:
+		pf->fd_tcp6_filter_cnt--;
+		break;
+	case UDP_V6_FLOW:
+		pf->fd_udp6_filter_cnt--;
+		break;
+	case SCTP_V6_FLOW:
+		pf->fd_udp6_filter_cnt--;
+		break;
+	case IP_USER_FLOW:
+		switch (filter->ipl4_proto) {
+		case IPPROTO_TCP:
+			pf->fd_tcp4_filter_cnt--;
+			break;
+		case IPPROTO_UDP:
+			pf->fd_udp4_filter_cnt--;
+			break;
+		case IPPROTO_SCTP:
+			pf->fd_sctp4_filter_cnt--;
+			break;
+		case IPPROTO_IP:
+			pf->fd_ip4_filter_cnt--;
+			break;
+		}
+		break;
+	case IPV6_USER_FLOW:
+		switch (filter->ipl4_proto) {
+		case IPPROTO_TCP:
+			pf->fd_tcp6_filter_cnt--;
+			break;
+		case IPPROTO_UDP:
+			pf->fd_udp6_filter_cnt--;
+			break;
+		case IPPROTO_SCTP:
+			pf->fd_sctp6_filter_cnt--;
+			break;
+		case IPPROTO_IP:
+			pf->fd_ip6_filter_cnt--;
+			break;
+		}
+		break;
+	}
+
+	/* Remove the filter from the list and free memory */
+	hlist_del(&filter->fdir_node);
+	kfree(filter);
+}
+
+/**
+ * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled
+ * @pf: board private structure
+ **/
+void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
+{
+	struct i40e_fdir_filter *filter;
+	u32 fcnt_prog, fcnt_avail;
+	struct hlist_node *node;
+
+	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
+		return;
+
+	/* Check if we have enough room to re-enable FDir SB capability. */
+	fcnt_prog = i40e_get_global_fd_count(pf);
+	fcnt_avail = pf->fdir_pf_filter_count;
+	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
+	    (pf->fd_add_err == 0) ||
+	    (i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
+		i40e_reenable_fdir_sb(pf);
+
+	/* We should wait for even more space before re-enabling ATR.
+	 * Additionally, we cannot enable ATR as long as we still have TCP SB
+	 * rules active.
+	 */
+	if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
+	    pf->fd_tcp4_filter_cnt == 0 && pf->fd_tcp6_filter_cnt == 0)
+		i40e_reenable_fdir_atr(pf);
+
+	/* if hw had a problem adding a filter, delete it */
+	if (pf->fd_inv > 0) {
+		hlist_for_each_entry_safe(filter, node,
+					  &pf->fdir_filter_list, fdir_node)
+			if (filter->fd_id == pf->fd_inv)
+				i40e_delete_invalid_filter(pf, filter);
+	}
+}
+
+#define I40E_MIN_FD_FLUSH_INTERVAL 10
+#define I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE 30
+/**
+ * i40e_fdir_flush_and_replay - Function to flush all FD filters and replay SB
+ * @pf: board private structure
+ **/
+static void i40e_fdir_flush_and_replay(struct i40e_pf *pf)
+{
+	unsigned long min_flush_time;
+	int flush_wait_retry = 50;
+	bool disable_atr = false;
+	int fd_room;
+	int reg;
+
+	if (!time_after(jiffies, pf->fd_flush_timestamp +
+				 (I40E_MIN_FD_FLUSH_INTERVAL * HZ)))
+		return;
+
+	/* If the flush is happening too quick and we have mostly SB rules we
+	 * should not re-enable ATR for some time.
+	 */
+	min_flush_time = pf->fd_flush_timestamp +
+			 (I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
+	fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
+
+	if (!(time_after(jiffies, min_flush_time)) &&
+	    (fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) {
+		if (I40E_DEBUG_FD & pf->hw.debug_mask)
+			dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
+		disable_atr = true;
+	}
+
+	pf->fd_flush_timestamp = jiffies;
+	set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
+	/* flush all filters */
+	wr32(&pf->hw, I40E_PFQF_CTL_1,
+	     I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
+	i40e_flush(&pf->hw);
+	pf->fd_flush_cnt++;
+	pf->fd_add_err = 0;
+	do {
+		/* Check FD flush status every 5-6msec */
+		usleep_range(5000, 6000);
+		reg = rd32(&pf->hw, I40E_PFQF_CTL_1);
+		if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK))
+			break;
+	} while (flush_wait_retry--);
+	if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
+		dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
+	} else {
+		/* replay sideband filters */
+		i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
+		if (!disable_atr && !pf->fd_tcp4_filter_cnt)
+			clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
+		clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
+		if (I40E_DEBUG_FD & pf->hw.debug_mask)
+			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
+	}
+}
+
+/**
+ * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed
+ * @pf: board private structure
+ **/
+u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
+{
+	return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters;
+}
+
+/**
+ * i40e_fdir_reinit_subtask - Worker thread to reinit FDIR filter table
+ * @pf: board private structure
+ **/
+static void i40e_fdir_reinit_subtask(struct i40e_pf *pf)
+{
+
+	/* if interface is down do nothing */
+	if (test_bit(__I40E_DOWN, pf->state))
+		return;
+
+	if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
+		i40e_fdir_flush_and_replay(pf);
+
+	i40e_fdir_check_and_reenable(pf);
+
+}
+
+/**
+ * i40e_vsi_link_event - notify VSI of a link event
+ * @vsi: vsi to be notified
+ * @link_up: link up or down
+ **/
+static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
+{
+	if (!vsi || test_bit(__I40E_VSI_DOWN, vsi->state))
+		return;
+
+	switch (vsi->type) {
+	case I40E_VSI_MAIN:
+		if (!vsi->netdev || !vsi->netdev_registered)
+			break;
+
+		if (link_up) {
+			netif_carrier_on(vsi->netdev);
+			netif_tx_wake_all_queues(vsi->netdev);
+		} else {
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+		break;
+
+	case I40E_VSI_SRIOV:
+	case I40E_VSI_VMDQ2:
+	case I40E_VSI_CTRL:
+	case I40E_VSI_IWARP:
+	case I40E_VSI_MIRROR:
+	default:
+		/* there is no notification for other VSIs */
+		break;
+	}
+}
+
+/**
+ * i40e_veb_link_event - notify elements on the veb of a link event
+ * @veb: veb to be notified
+ * @link_up: link up or down
+ **/
+static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up)
+{
+	struct i40e_pf *pf;
+	int i;
+
+	if (!veb || !veb->pf)
+		return;
+	pf = veb->pf;
+
+	/* depth first... */
+	for (i = 0; i < I40E_MAX_VEB; i++)
+		if (pf->veb[i] && (pf->veb[i]->uplink_seid == veb->seid))
+			i40e_veb_link_event(pf->veb[i], link_up);
+
+	/* ... now the local VSIs */
+	for (i = 0; i < pf->num_alloc_vsi; i++)
+		if (pf->vsi[i] && (pf->vsi[i]->uplink_seid == veb->seid))
+			i40e_vsi_link_event(pf->vsi[i], link_up);
+}
+
+/**
+ * i40e_link_event - Update netif_carrier status
+ * @pf: board private structure
+ **/
+static void i40e_link_event(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	u8 new_link_speed, old_link_speed;
+	bool new_link, old_link;
+	int status;
+#ifdef CONFIG_I40E_DCB
+	int err;
+#endif /* CONFIG_I40E_DCB */
+
+	/* set this to force the get_link_status call to refresh state */
+	pf->hw.phy.get_link_info = true;
+	old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
+	status = i40e_get_link_status(&pf->hw, &new_link);
+
+	/* On success, disable temp link polling */
+	if (status == 0) {
+		clear_bit(__I40E_TEMP_LINK_POLLING, pf->state);
+	} else {
+		/* Enable link polling temporarily until i40e_get_link_status
+		 * returns 0
+		 */
+		set_bit(__I40E_TEMP_LINK_POLLING, pf->state);
+		dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
+			status);
+		return;
+	}
+
+	old_link_speed = pf->hw.phy.link_info_old.link_speed;
+	new_link_speed = pf->hw.phy.link_info.link_speed;
+
+	if (new_link == old_link &&
+	    new_link_speed == old_link_speed &&
+	    (test_bit(__I40E_VSI_DOWN, vsi->state) ||
+	     new_link == netif_carrier_ok(vsi->netdev)))
+		return;
+
+	i40e_print_link_message(vsi, new_link);
+
+	/* Notify the base of the switch tree connected to
+	 * the link.  Floating VEBs are not notified.
+	 */
+	if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
+		i40e_veb_link_event(pf->veb[pf->lan_veb], new_link);
+	else
+		i40e_vsi_link_event(vsi, new_link);
+
+	if (pf->vf)
+		i40e_vc_notify_link_state(pf);
+
+	if (pf->flags & I40E_FLAG_PTP)
+		i40e_ptp_set_increment(pf);
+#ifdef CONFIG_I40E_DCB
+	if (new_link == old_link)
+		return;
+	/* Not SW DCB so firmware will take care of default settings */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
+		return;
+
+	/* We cover here only link down, as after link up in case of SW DCB
+	 * SW LLDP agent will take care of setting it up
+	 */
+	if (!new_link) {
+		dev_dbg(&pf->pdev->dev, "Reconfig DCB to single TC as result of Link Down\n");
+		memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg));
+		err = i40e_dcb_sw_default_config(pf);
+		if (err) {
+			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
+				       I40E_FLAG_DCB_ENABLED);
+		} else {
+			pf->dcbx_cap = DCB_CAP_DCBX_HOST |
+				       DCB_CAP_DCBX_VER_IEEE;
+			pf->flags |= I40E_FLAG_DCB_CAPABLE;
+			pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+		}
+	}
+#endif /* CONFIG_I40E_DCB */
+}
+
+/**
+ * i40e_watchdog_subtask - periodic checks not using event driven response
+ * @pf: board private structure
+ **/
+static void i40e_watchdog_subtask(struct i40e_pf *pf)
+{
+	int i;
+
+	/* if interface is down do nothing */
+	if (test_bit(__I40E_DOWN, pf->state) ||
+	    test_bit(__I40E_CONFIG_BUSY, pf->state))
+		return;
+
+	/* make sure we don't do these things too often */
+	if (time_before(jiffies, (pf->service_timer_previous +
+				  pf->service_timer_period)))
+		return;
+	pf->service_timer_previous = jiffies;
+
+	if ((pf->flags & I40E_FLAG_LINK_POLLING_ENABLED) ||
+	    test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
+		i40e_link_event(pf);
+
+	/* Update the stats for active netdevs so the network stack
+	 * can look at updated numbers whenever it cares to
+	 */
+	for (i = 0; i < pf->num_alloc_vsi; i++)
+		if (pf->vsi[i] && pf->vsi[i]->netdev)
+			i40e_update_stats(pf->vsi[i]);
+
+	if (pf->flags & I40E_FLAG_VEB_STATS_ENABLED) {
+		/* Update the stats for the active switching components */
+		for (i = 0; i < I40E_MAX_VEB; i++)
+			if (pf->veb[i])
+				i40e_update_veb_stats(pf->veb[i]);
+	}
+
+	i40e_ptp_rx_hang(pf);
+	i40e_ptp_tx_hang(pf);
+}
+
+/**
+ * i40e_reset_subtask - Set up for resetting the device and driver
+ * @pf: board private structure
+ **/
+static void i40e_reset_subtask(struct i40e_pf *pf)
+{
+	u32 reset_flags = 0;
+
+	if (test_bit(__I40E_REINIT_REQUESTED, pf->state)) {
+		reset_flags |= BIT(__I40E_REINIT_REQUESTED);
+		clear_bit(__I40E_REINIT_REQUESTED, pf->state);
+	}
+	if (test_bit(__I40E_PF_RESET_REQUESTED, pf->state)) {
+		reset_flags |= BIT(__I40E_PF_RESET_REQUESTED);
+		clear_bit(__I40E_PF_RESET_REQUESTED, pf->state);
+	}
+	if (test_bit(__I40E_CORE_RESET_REQUESTED, pf->state)) {
+		reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED);
+		clear_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
+	}
+	if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state)) {
+		reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED);
+		clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
+	}
+	if (test_bit(__I40E_DOWN_REQUESTED, pf->state)) {
+		reset_flags |= BIT(__I40E_DOWN_REQUESTED);
+		clear_bit(__I40E_DOWN_REQUESTED, pf->state);
+	}
+
+	/* If there's a recovery already waiting, it takes
+	 * precedence before starting a new reset sequence.
+	 */
+	if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
+		i40e_prep_for_reset(pf);
+		i40e_reset(pf);
+		i40e_rebuild(pf, false, false);
+	}
+
+	/* If we're already down or resetting, just bail */
+	if (reset_flags &&
+	    !test_bit(__I40E_DOWN, pf->state) &&
+	    !test_bit(__I40E_CONFIG_BUSY, pf->state)) {
+		i40e_do_reset(pf, reset_flags, false);
+	}
+}
+
+/**
+ * i40e_handle_link_event - Handle link event
+ * @pf: board private structure
+ * @e: event info posted on ARQ
+ **/
+static void i40e_handle_link_event(struct i40e_pf *pf,
+				   struct i40e_arq_event_info *e)
+{
+	struct i40e_aqc_get_link_status *status =
+		(struct i40e_aqc_get_link_status *)&e->desc.params.raw;
+
+	/* Do a new status request to re-enable LSE reporting
+	 * and load new status information into the hw struct
+	 * This completely ignores any state information
+	 * in the ARQ event info, instead choosing to always
+	 * issue the AQ update link status command.
+	 */
+	i40e_link_event(pf);
+
+	/* Check if module meets thermal requirements */
+	if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
+		dev_err(&pf->pdev->dev,
+			"Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
+		dev_err(&pf->pdev->dev,
+			"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+	} else {
+		/* check for unqualified module, if link is down, suppress
+		 * the message if link was forced to be down.
+		 */
+		if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+		    (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
+		    (!(status->link_info & I40E_AQ_LINK_UP)) &&
+		    (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+			dev_err(&pf->pdev->dev,
+				"Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
+			dev_err(&pf->pdev->dev,
+				"Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+		}
+	}
+}
+
+/**
+ * i40e_clean_adminq_subtask - Clean the AdminQ rings
+ * @pf: board private structure
+ **/
+static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
+{
+	struct i40e_arq_event_info event;
+	struct i40e_hw *hw = &pf->hw;
+	u16 pending, i = 0;
+	u16 opcode;
+	u32 oldval;
+	int ret;
+	u32 val;
+
+	/* Do not run clean AQ when PF reset fails */
+	if (test_bit(__I40E_RESET_FAILED, pf->state))
+		return;
+
+	/* check for error indications */
+	val = rd32(&pf->hw, pf->hw.aq.arq.len);
+	oldval = val;
+	if (val & I40E_PF_ARQLEN_ARQVFE_MASK) {
+		if (hw->debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ARQ VF Error detected\n");
+		val &= ~I40E_PF_ARQLEN_ARQVFE_MASK;
+	}
+	if (val & I40E_PF_ARQLEN_ARQOVFL_MASK) {
+		if (hw->debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ARQ Overflow Error detected\n");
+		val &= ~I40E_PF_ARQLEN_ARQOVFL_MASK;
+		pf->arq_overflows++;
+	}
+	if (val & I40E_PF_ARQLEN_ARQCRIT_MASK) {
+		if (hw->debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ARQ Critical Error detected\n");
+		val &= ~I40E_PF_ARQLEN_ARQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(&pf->hw, pf->hw.aq.arq.len, val);
+
+	val = rd32(&pf->hw, pf->hw.aq.asq.len);
+	oldval = val;
+	if (val & I40E_PF_ATQLEN_ATQVFE_MASK) {
+		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ASQ VF Error detected\n");
+		val &= ~I40E_PF_ATQLEN_ATQVFE_MASK;
+	}
+	if (val & I40E_PF_ATQLEN_ATQOVFL_MASK) {
+		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ASQ Overflow Error detected\n");
+		val &= ~I40E_PF_ATQLEN_ATQOVFL_MASK;
+	}
+	if (val & I40E_PF_ATQLEN_ATQCRIT_MASK) {
+		if (pf->hw.debug_mask & I40E_DEBUG_AQ)
+			dev_info(&pf->pdev->dev, "ASQ Critical Error detected\n");
+		val &= ~I40E_PF_ATQLEN_ATQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(&pf->hw, pf->hw.aq.asq.len, val);
+
+	event.buf_len = I40E_MAX_AQ_BUF_SIZE;
+	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
+	if (!event.msg_buf)
+		return;
+
+	do {
+		ret = i40e_clean_arq_element(hw, &event, &pending);
+		if (ret == -EALREADY)
+			break;
+		else if (ret) {
+			dev_info(&pf->pdev->dev, "ARQ event error %d\n", ret);
+			break;
+		}
+
+		opcode = le16_to_cpu(event.desc.opcode);
+		switch (opcode) {
+
+		case i40e_aqc_opc_get_link_status:
+			rtnl_lock();
+			i40e_handle_link_event(pf, &event);
+			rtnl_unlock();
+			break;
+		case i40e_aqc_opc_send_msg_to_pf:
+			ret = i40e_vc_process_vf_msg(pf,
+					le16_to_cpu(event.desc.retval),
+					le32_to_cpu(event.desc.cookie_high),
+					le32_to_cpu(event.desc.cookie_low),
+					event.msg_buf,
+					event.msg_len);
+			break;
+		case i40e_aqc_opc_lldp_update_mib:
+			dev_dbg(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n");
+#ifdef CONFIG_I40E_DCB
+			rtnl_lock();
+			i40e_handle_lldp_event(pf, &event);
+			rtnl_unlock();
+#endif /* CONFIG_I40E_DCB */
+			break;
+		case i40e_aqc_opc_event_lan_overflow:
+			dev_dbg(&pf->pdev->dev, "ARQ LAN queue overflow event received\n");
+			i40e_handle_lan_overflow_event(pf, &event);
+			break;
+		case i40e_aqc_opc_send_msg_to_peer:
+			dev_info(&pf->pdev->dev, "ARQ: Msg from other pf\n");
+			break;
+		case i40e_aqc_opc_nvm_erase:
+		case i40e_aqc_opc_nvm_update:
+		case i40e_aqc_opc_oem_post_update:
+			i40e_debug(&pf->hw, I40E_DEBUG_NVM,
+				   "ARQ NVM operation 0x%04x completed\n",
+				   opcode);
+			break;
+		default:
+			dev_info(&pf->pdev->dev,
+				 "ARQ: Unknown event 0x%04x ignored\n",
+				 opcode);
+			break;
+		}
+	} while (i++ < pf->adminq_work_limit);
+
+	if (i < pf->adminq_work_limit)
+		clear_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state);
+
+	/* re-enable Admin queue interrupt cause */
+	val = rd32(hw, I40E_PFINT_ICR0_ENA);
+	val |=  I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+	wr32(hw, I40E_PFINT_ICR0_ENA, val);
+	i40e_flush(hw);
+
+	kfree(event.msg_buf);
+}
+
+/**
+ * i40e_verify_eeprom - make sure eeprom is good to use
+ * @pf: board private structure
+ **/
+static void i40e_verify_eeprom(struct i40e_pf *pf)
+{
+	int err;
+
+	err = i40e_diag_eeprom_test(&pf->hw);
+	if (err) {
+		/* retry in case of garbage read */
+		err = i40e_diag_eeprom_test(&pf->hw);
+		if (err) {
+			dev_info(&pf->pdev->dev, "eeprom check failed (%d), Tx/Rx traffic disabled\n",
+				 err);
+			set_bit(__I40E_BAD_EEPROM, pf->state);
+		}
+	}
+
+	if (!err && test_bit(__I40E_BAD_EEPROM, pf->state)) {
+		dev_info(&pf->pdev->dev, "eeprom check passed, Tx/Rx traffic enabled\n");
+		clear_bit(__I40E_BAD_EEPROM, pf->state);
+	}
+}
+
+/**
+ * i40e_enable_pf_switch_lb
+ * @pf: pointer to the PF structure
+ *
+ * enable switch loop back or die - no point in a return value
+ **/
+static void i40e_enable_pf_switch_lb(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_vsi_context ctxt;
+	int ret;
+
+	ctxt.seid = pf->main_vsi_seid;
+	ctxt.pf_num = pf->hw.pf_id;
+	ctxt.vf_num = 0;
+	ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get PF vsi config, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return;
+	}
+	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+	ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+
+	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "update vsi switch failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+	}
+}
+
+/**
+ * i40e_disable_pf_switch_lb
+ * @pf: pointer to the PF structure
+ *
+ * disable switch loop back or die - no point in a return value
+ **/
+static void i40e_disable_pf_switch_lb(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_vsi_context ctxt;
+	int ret;
+
+	ctxt.seid = pf->main_vsi_seid;
+	ctxt.pf_num = pf->hw.pf_id;
+	ctxt.vf_num = 0;
+	ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get PF vsi config, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return;
+	}
+	ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+	ctxt.info.switch_id &= ~cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+
+	ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "update vsi switch failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+	}
+}
+
+/**
+ * i40e_config_bridge_mode - Configure the HW bridge mode
+ * @veb: pointer to the bridge instance
+ *
+ * Configure the loop back mode for the LAN VSI that is downlink to the
+ * specified HW bridge instance. It is expected this function is called
+ * when a new HW bridge is instantiated.
+ **/
+static void i40e_config_bridge_mode(struct i40e_veb *veb)
+{
+	struct i40e_pf *pf = veb->pf;
+
+	if (pf->hw.debug_mask & I40E_DEBUG_LAN)
+		dev_info(&pf->pdev->dev, "enabling bridge mode: %s\n",
+			 veb->bridge_mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+	if (veb->bridge_mode & BRIDGE_MODE_VEPA)
+		i40e_disable_pf_switch_lb(pf);
+	else
+		i40e_enable_pf_switch_lb(pf);
+}
+
+/**
+ * i40e_reconstitute_veb - rebuild the VEB and anything connected to it
+ * @veb: pointer to the VEB instance
+ *
+ * This is a recursive function that first builds the attached VSIs then
+ * recurses in to build the next layer of VEB.  We track the connections
+ * through our own index numbers because the seid's from the HW could
+ * change across the reset.
+ **/
+static int i40e_reconstitute_veb(struct i40e_veb *veb)
+{
+	struct i40e_vsi *ctl_vsi = NULL;
+	struct i40e_pf *pf = veb->pf;
+	int v, veb_idx;
+	int ret;
+
+	/* build VSI that owns this VEB, temporarily attached to base VEB */
+	for (v = 0; v < pf->num_alloc_vsi && !ctl_vsi; v++) {
+		if (pf->vsi[v] &&
+		    pf->vsi[v]->veb_idx == veb->idx &&
+		    pf->vsi[v]->flags & I40E_VSI_FLAG_VEB_OWNER) {
+			ctl_vsi = pf->vsi[v];
+			break;
+		}
+	}
+	if (!ctl_vsi) {
+		dev_info(&pf->pdev->dev,
+			 "missing owner VSI for veb_idx %d\n", veb->idx);
+		ret = -ENOENT;
+		goto end_reconstitute;
+	}
+	if (ctl_vsi != pf->vsi[pf->lan_vsi])
+		ctl_vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+	ret = i40e_add_vsi(ctl_vsi);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "rebuild of veb_idx %d owner VSI failed: %d\n",
+			 veb->idx, ret);
+		goto end_reconstitute;
+	}
+	i40e_vsi_reset_stats(ctl_vsi);
+
+	/* create the VEB in the switch and move the VSI onto the VEB */
+	ret = i40e_add_veb(veb, ctl_vsi);
+	if (ret)
+		goto end_reconstitute;
+
+	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+		veb->bridge_mode = BRIDGE_MODE_VEB;
+	else
+		veb->bridge_mode = BRIDGE_MODE_VEPA;
+	i40e_config_bridge_mode(veb);
+
+	/* create the remaining VSIs attached to this VEB */
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
+		if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi)
+			continue;
+
+		if (pf->vsi[v]->veb_idx == veb->idx) {
+			struct i40e_vsi *vsi = pf->vsi[v];
+
+			vsi->uplink_seid = veb->seid;
+			ret = i40e_add_vsi(vsi);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "rebuild of vsi_idx %d failed: %d\n",
+					 v, ret);
+				goto end_reconstitute;
+			}
+			i40e_vsi_reset_stats(vsi);
+		}
+	}
+
+	/* create any VEBs attached to this VEB - RECURSION */
+	for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
+		if (pf->veb[veb_idx] && pf->veb[veb_idx]->veb_idx == veb->idx) {
+			pf->veb[veb_idx]->uplink_seid = veb->seid;
+			ret = i40e_reconstitute_veb(pf->veb[veb_idx]);
+			if (ret)
+				break;
+		}
+	}
+
+end_reconstitute:
+	return ret;
+}
+
+/**
+ * i40e_get_capabilities - get info about the HW
+ * @pf: the PF struct
+ * @list_type: AQ capability to be queried
+ **/
+static int i40e_get_capabilities(struct i40e_pf *pf,
+				 enum i40e_admin_queue_opc list_type)
+{
+	struct i40e_aqc_list_capabilities_element_resp *cap_buf;
+	u16 data_size;
+	int buf_len;
+	int err;
+
+	buf_len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp);
+	do {
+		cap_buf = kzalloc(buf_len, GFP_KERNEL);
+		if (!cap_buf)
+			return -ENOMEM;
+
+		/* this loads the data into the hw struct for us */
+		err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
+						    &data_size, list_type,
+						    NULL);
+		/* data loaded, buffer no longer needed */
+		kfree(cap_buf);
+
+		if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) {
+			/* retry with a larger buffer */
+			buf_len = data_size;
+		} else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) {
+			dev_info(&pf->pdev->dev,
+				 "capability discovery failed, err %pe aq_err %s\n",
+				 ERR_PTR(err),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			return -ENODEV;
+		}
+	} while (err);
+
+	if (pf->hw.debug_mask & I40E_DEBUG_USER) {
+		if (list_type == i40e_aqc_opc_list_func_capabilities) {
+			dev_info(&pf->pdev->dev,
+				 "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
+				 pf->hw.pf_id, pf->hw.func_caps.num_vfs,
+				 pf->hw.func_caps.num_msix_vectors,
+				 pf->hw.func_caps.num_msix_vectors_vf,
+				 pf->hw.func_caps.fd_filters_guaranteed,
+				 pf->hw.func_caps.fd_filters_best_effort,
+				 pf->hw.func_caps.num_tx_qp,
+				 pf->hw.func_caps.num_vsis);
+		} else if (list_type == i40e_aqc_opc_list_dev_capabilities) {
+			dev_info(&pf->pdev->dev,
+				 "switch_mode=0x%04x, function_valid=0x%08x\n",
+				 pf->hw.dev_caps.switch_mode,
+				 pf->hw.dev_caps.valid_functions);
+			dev_info(&pf->pdev->dev,
+				 "SR-IOV=%d, num_vfs for all function=%u\n",
+				 pf->hw.dev_caps.sr_iov_1_1,
+				 pf->hw.dev_caps.num_vfs);
+			dev_info(&pf->pdev->dev,
+				 "num_vsis=%u, num_rx:%u, num_tx=%u\n",
+				 pf->hw.dev_caps.num_vsis,
+				 pf->hw.dev_caps.num_rx_qp,
+				 pf->hw.dev_caps.num_tx_qp);
+		}
+	}
+	if (list_type == i40e_aqc_opc_list_func_capabilities) {
+#define DEF_NUM_VSI (1 + (pf->hw.func_caps.fcoe ? 1 : 0) \
+		       + pf->hw.func_caps.num_vfs)
+		if (pf->hw.revision_id == 0 &&
+		    pf->hw.func_caps.num_vsis < DEF_NUM_VSI) {
+			dev_info(&pf->pdev->dev,
+				 "got num_vsis %d, setting num_vsis to %d\n",
+				 pf->hw.func_caps.num_vsis, DEF_NUM_VSI);
+			pf->hw.func_caps.num_vsis = DEF_NUM_VSI;
+		}
+	}
+	return 0;
+}
+
+static int i40e_vsi_clear(struct i40e_vsi *vsi);
+
+/**
+ * i40e_fdir_sb_setup - initialize the Flow Director resources for Sideband
+ * @pf: board private structure
+ **/
+static void i40e_fdir_sb_setup(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi;
+
+	/* quick workaround for an NVM issue that leaves a critical register
+	 * uninitialized
+	 */
+	if (!rd32(&pf->hw, I40E_GLQF_HKEY(0))) {
+		static const u32 hkey[] = {
+			0xe640d33f, 0xcdfe98ab, 0x73fa7161, 0x0d7a7d36,
+			0xeacb7d61, 0xaa4f05b6, 0x9c5c89ed, 0xfc425ddb,
+			0xa4654832, 0xfc7461d4, 0x8f827619, 0xf5c63c21,
+			0x95b3a76d};
+		int i;
+
+		for (i = 0; i <= I40E_GLQF_HKEY_MAX_INDEX; i++)
+			wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]);
+	}
+
+	if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+		return;
+
+	/* find existing VSI and see if it needs configuring */
+	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+
+	/* create a new VSI if none exists */
+	if (!vsi) {
+		vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR,
+				     pf->vsi[pf->lan_vsi]->seid, 0);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
+			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+			return;
+		}
+	}
+
+	i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_ring);
+}
+
+/**
+ * i40e_fdir_teardown - release the Flow Director resources
+ * @pf: board private structure
+ **/
+static void i40e_fdir_teardown(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi;
+
+	i40e_fdir_filter_exit(pf);
+	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+	if (vsi)
+		i40e_vsi_release(vsi);
+}
+
+/**
+ * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs
+ * @vsi: PF main vsi
+ * @seid: seid of main or channel VSIs
+ *
+ * Rebuilds cloud filters associated with main VSI and channel VSIs if they
+ * existed before reset
+ **/
+static int i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
+{
+	struct i40e_cloud_filter *cfilter;
+	struct i40e_pf *pf = vsi->back;
+	struct hlist_node *node;
+	int ret;
+
+	/* Add cloud filters back if they exist */
+	hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
+				  cloud_node) {
+		if (cfilter->seid != seid)
+			continue;
+
+		if (cfilter->dst_port)
+			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+								true);
+		else
+			ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+
+		if (ret) {
+			dev_dbg(&pf->pdev->dev,
+				"Failed to rebuild cloud filter, err %pe aq_err %s\n",
+				ERR_PTR(ret),
+				i40e_aq_str(&pf->hw,
+					    pf->hw.aq.asq_last_status));
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
+ * @vsi: PF main vsi
+ *
+ * Rebuilds channel VSIs if they existed before reset
+ **/
+static int i40e_rebuild_channels(struct i40e_vsi *vsi)
+{
+	struct i40e_channel *ch, *ch_tmp;
+	int ret;
+
+	if (list_empty(&vsi->ch_list))
+		return 0;
+
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		if (!ch->initialized)
+			break;
+		/* Proceed with creation of channel (VMDq2) VSI */
+		ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "failed to rebuild channels using uplink_seid %u\n",
+				 vsi->uplink_seid);
+			return ret;
+		}
+		/* Reconfigure TX queues using QTX_CTL register */
+		ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "failed to configure TX rings for channel %u\n",
+				 ch->seid);
+			return ret;
+		}
+		/* update 'next_base_queue' */
+		vsi->next_base_queue = vsi->next_base_queue +
+							ch->num_queue_pairs;
+		if (ch->max_tx_rate) {
+			u64 credits = ch->max_tx_rate;
+
+			if (i40e_set_bw_limit(vsi, ch->seid,
+					      ch->max_tx_rate))
+				return -EINVAL;
+
+			do_div(credits, I40E_BW_CREDIT_DIVISOR);
+			dev_dbg(&vsi->back->pdev->dev,
+				"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+				ch->max_tx_rate,
+				credits,
+				ch->seid);
+		}
+		ret = i40e_rebuild_cloud_filters(vsi, ch->seid);
+		if (ret) {
+			dev_dbg(&vsi->back->pdev->dev,
+				"Failed to rebuild cloud filters for channel VSI %u\n",
+				ch->seid);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * i40e_clean_xps_state - clean xps state for every tx_ring
+ * @vsi: ptr to the VSI
+ **/
+static void i40e_clean_xps_state(struct i40e_vsi *vsi)
+{
+	int i;
+
+	if (vsi->tx_rings)
+		for (i = 0; i < vsi->num_queue_pairs; i++)
+			if (vsi->tx_rings[i])
+				clear_bit(__I40E_TX_XPS_INIT_DONE,
+					  vsi->tx_rings[i]->state);
+}
+
+/**
+ * i40e_prep_for_reset - prep for the core to reset
+ * @pf: board private structure
+ *
+ * Close up the VFs and other things in prep for PF Reset.
+  **/
+static void i40e_prep_for_reset(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int ret = 0;
+	u32 v;
+
+	clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
+	if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+		return;
+	if (i40e_check_asq_alive(&pf->hw))
+		i40e_vc_notify_reset(pf);
+
+	dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
+
+	/* quiesce the VSIs and their queues that are not already DOWN */
+	i40e_pf_quiesce_all_vsi(pf);
+
+	for (v = 0; v < pf->num_alloc_vsi; v++) {
+		if (pf->vsi[v]) {
+			i40e_clean_xps_state(pf->vsi[v]);
+			pf->vsi[v]->seid = 0;
+		}
+	}
+
+	i40e_shutdown_adminq(&pf->hw);
+
+	/* call shutdown HMC */
+	if (hw->hmc.hmc_obj) {
+		ret = i40e_shutdown_lan_hmc(hw);
+		if (ret)
+			dev_warn(&pf->pdev->dev,
+				 "shutdown_lan_hmc failed: %d\n", ret);
+	}
+
+	/* Save the current PTP time so that we can restore the time after the
+	 * reset completes.
+	 */
+	i40e_ptp_save_hw_time(pf);
+}
+
+/**
+ * i40e_send_version - update firmware with driver version
+ * @pf: PF struct
+ */
+static void i40e_send_version(struct i40e_pf *pf)
+{
+	struct i40e_driver_version dv;
+
+	dv.major_version = 0xff;
+	dv.minor_version = 0xff;
+	dv.build_version = 0xff;
+	dv.subbuild_version = 0;
+	strscpy(dv.driver_string, UTS_RELEASE, sizeof(dv.driver_string));
+	i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
+}
+
+/**
+ * i40e_get_oem_version - get OEM specific version information
+ * @hw: pointer to the hardware structure
+ **/
+static void i40e_get_oem_version(struct i40e_hw *hw)
+{
+	u16 block_offset = 0xffff;
+	u16 block_length = 0;
+	u16 capabilities = 0;
+	u16 gen_snap = 0;
+	u16 release = 0;
+
+#define I40E_SR_NVM_OEM_VERSION_PTR		0x1B
+#define I40E_NVM_OEM_LENGTH_OFFSET		0x00
+#define I40E_NVM_OEM_CAPABILITIES_OFFSET	0x01
+#define I40E_NVM_OEM_GEN_OFFSET			0x02
+#define I40E_NVM_OEM_RELEASE_OFFSET		0x03
+#define I40E_NVM_OEM_CAPABILITIES_MASK		0x000F
+#define I40E_NVM_OEM_LENGTH			3
+
+	/* Check if pointer to OEM version block is valid. */
+	i40e_read_nvm_word(hw, I40E_SR_NVM_OEM_VERSION_PTR, &block_offset);
+	if (block_offset == 0xffff)
+		return;
+
+	/* Check if OEM version block has correct length. */
+	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_LENGTH_OFFSET,
+			   &block_length);
+	if (block_length < I40E_NVM_OEM_LENGTH)
+		return;
+
+	/* Check if OEM version format is as expected. */
+	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_CAPABILITIES_OFFSET,
+			   &capabilities);
+	if ((capabilities & I40E_NVM_OEM_CAPABILITIES_MASK) != 0)
+		return;
+
+	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_GEN_OFFSET,
+			   &gen_snap);
+	i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_RELEASE_OFFSET,
+			   &release);
+	hw->nvm.oem_ver = (gen_snap << I40E_OEM_SNAP_SHIFT) | release;
+	hw->nvm.eetrack = I40E_OEM_EETRACK_ID;
+}
+
+/**
+ * i40e_reset - wait for core reset to finish reset, reset pf if corer not seen
+ * @pf: board private structure
+ **/
+static int i40e_reset(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int ret;
+
+	ret = i40e_pf_reset(hw);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret);
+		set_bit(__I40E_RESET_FAILED, pf->state);
+		clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state);
+	} else {
+		pf->pfr_count++;
+	}
+	return ret;
+}
+
+/**
+ * i40e_rebuild - rebuild using a saved config
+ * @pf: board private structure
+ * @reinit: if the Main VSI needs to re-initialized.
+ * @lock_acquired: indicates whether or not the lock has been acquired
+ * before this function was called.
+ **/
+static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+{
+	const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	struct i40e_hw *hw = &pf->hw;
+	int ret;
+	u32 val;
+	int v;
+
+	if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
+	    is_recovery_mode_reported)
+		i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
+
+	if (test_bit(__I40E_DOWN, pf->state) &&
+	    !test_bit(__I40E_RECOVERY_MODE, pf->state))
+		goto clear_recovery;
+	dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
+
+	/* rebuild the basics for the AdminQ, HMC, and initial HW switch */
+	ret = i40e_init_adminq(&pf->hw);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto clear_recovery;
+	}
+	i40e_get_oem_version(&pf->hw);
+
+	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) {
+		/* The following delay is necessary for firmware update. */
+		mdelay(1000);
+	}
+
+	/* re-verify the eeprom if we just had an EMP reset */
+	if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
+		i40e_verify_eeprom(pf);
+
+	/* if we are going out of or into recovery mode we have to act
+	 * accordingly with regard to resources initialization
+	 * and deinitialization
+	 */
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+		if (i40e_get_capabilities(pf,
+					  i40e_aqc_opc_list_func_capabilities))
+			goto end_unlock;
+
+		if (is_recovery_mode_reported) {
+			/* we're staying in recovery mode so we'll reinitialize
+			 * misc vector here
+			 */
+			if (i40e_setup_misc_vector_for_recovery_mode(pf))
+				goto end_unlock;
+		} else {
+			if (!lock_acquired)
+				rtnl_lock();
+			/* we're going out of recovery mode so we'll free
+			 * the IRQ allocated specifically for recovery mode
+			 * and restore the interrupt scheme
+			 */
+			free_irq(pf->pdev->irq, pf);
+			i40e_clear_interrupt_scheme(pf);
+			if (i40e_restore_interrupt_scheme(pf))
+				goto end_unlock;
+		}
+
+		/* tell the firmware that we're starting */
+		i40e_send_version(pf);
+
+		/* bail out in case recovery mode was detected, as there is
+		 * no need for further configuration.
+		 */
+		goto end_unlock;
+	}
+
+	i40e_clear_pxe_mode(hw);
+	ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
+	if (ret)
+		goto end_core_reset;
+
+	ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
+				hw->func_caps.num_rx_qp, 0, 0);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret);
+		goto end_core_reset;
+	}
+	ret = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "configure_lan_hmc failed: %d\n", ret);
+		goto end_core_reset;
+	}
+
+#ifdef CONFIG_I40E_DCB
+	/* Enable FW to write a default DCB config on link-up
+	 * unless I40E_FLAG_TC_MQPRIO was enabled or DCB
+	 * is not supported with new link speed
+	 */
+	if (i40e_is_tc_mqprio_enabled(pf)) {
+		i40e_aq_set_dcb_parameters(hw, false, NULL);
+	} else {
+		if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
+		    (hw->phy.link_info.link_speed &
+		     (I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
+			i40e_aq_set_dcb_parameters(hw, false, NULL);
+			dev_warn(&pf->pdev->dev,
+				 "DCB is not supported for X710-T*L 2.5/5G speeds\n");
+			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+		} else {
+			i40e_aq_set_dcb_parameters(hw, true, NULL);
+			ret = i40e_init_pf_dcb(pf);
+			if (ret) {
+				dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n",
+					 ret);
+				pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+				/* Continue without DCB enabled */
+			}
+		}
+	}
+
+#endif /* CONFIG_I40E_DCB */
+	if (!lock_acquired)
+		rtnl_lock();
+	ret = i40e_setup_pf_switch(pf, reinit, true);
+	if (ret)
+		goto end_unlock;
+
+	/* The driver only wants link up/down and module qualification
+	 * reports from firmware.  Note the negative logic.
+	 */
+	ret = i40e_aq_set_phy_int_mask(&pf->hw,
+				       ~(I40E_AQ_EVENT_LINK_UPDOWN |
+					 I40E_AQ_EVENT_MEDIA_NA |
+					 I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
+	if (ret)
+		dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+	/* Rebuild the VSIs and VEBs that existed before reset.
+	 * They are still in our local switch element arrays, so only
+	 * need to rebuild the switch model in the HW.
+	 *
+	 * If there were VEBs but the reconstitution failed, we'll try
+	 * to recover minimal use by getting the basic PF VSI working.
+	 */
+	if (vsi->uplink_seid != pf->mac_seid) {
+		dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
+		/* find the one VEB connected to the MAC, and find orphans */
+		for (v = 0; v < I40E_MAX_VEB; v++) {
+			if (!pf->veb[v])
+				continue;
+
+			if (pf->veb[v]->uplink_seid == pf->mac_seid ||
+			    pf->veb[v]->uplink_seid == 0) {
+				ret = i40e_reconstitute_veb(pf->veb[v]);
+
+				if (!ret)
+					continue;
+
+				/* If Main VEB failed, we're in deep doodoo,
+				 * so give up rebuilding the switch and set up
+				 * for minimal rebuild of PF VSI.
+				 * If orphan failed, we'll report the error
+				 * but try to keep going.
+				 */
+				if (pf->veb[v]->uplink_seid == pf->mac_seid) {
+					dev_info(&pf->pdev->dev,
+						 "rebuild of switch failed: %d, will try to set up simple PF connection\n",
+						 ret);
+					vsi->uplink_seid = pf->mac_seid;
+					break;
+				} else if (pf->veb[v]->uplink_seid == 0) {
+					dev_info(&pf->pdev->dev,
+						 "rebuild of orphan VEB failed: %d\n",
+						 ret);
+				}
+			}
+		}
+	}
+
+	if (vsi->uplink_seid == pf->mac_seid) {
+		dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
+		/* no VEB, so rebuild only the Main VSI */
+		ret = i40e_add_vsi(vsi);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "rebuild of Main VSI failed: %d\n", ret);
+			goto end_unlock;
+		}
+	}
+
+	if (vsi->mqprio_qopt.max_rate[0]) {
+		u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
+						  vsi->mqprio_qopt.max_rate[0]);
+		u64 credits = 0;
+
+		ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+		if (ret)
+			goto end_unlock;
+
+		credits = max_tx_rate;
+		do_div(credits, I40E_BW_CREDIT_DIVISOR);
+		dev_dbg(&vsi->back->pdev->dev,
+			"Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+			max_tx_rate,
+			credits,
+			vsi->seid);
+	}
+
+	ret = i40e_rebuild_cloud_filters(vsi, vsi->seid);
+	if (ret)
+		goto end_unlock;
+
+	/* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
+	 * for this main VSI if they exist
+	 */
+	ret = i40e_rebuild_channels(vsi);
+	if (ret)
+		goto end_unlock;
+
+	/* Reconfigure hardware for allowing smaller MSS in the case
+	 * of TSO, so that we avoid the MDD being fired and causing
+	 * a reset in the case of small MSS+TSO.
+	 */
+#define I40E_REG_MSS          0x000E64DC
+#define I40E_REG_MSS_MIN_MASK 0x3FF0000
+#define I40E_64BYTE_MSS       0x400000
+	val = rd32(hw, I40E_REG_MSS);
+	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
+		val &= ~I40E_REG_MSS_MIN_MASK;
+		val |= I40E_64BYTE_MSS;
+		wr32(hw, I40E_REG_MSS, val);
+	}
+
+	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
+		msleep(75);
+		ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
+		if (ret)
+			dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+	}
+	/* reinit the misc interrupt */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		ret = i40e_setup_misc_vector(pf);
+		if (ret)
+			goto end_unlock;
+	}
+
+	/* Add a filter to drop all Flow control frames from any VSI from being
+	 * transmitted. By doing so we stop a malicious VF from sending out
+	 * PAUSE or PFC frames and potentially controlling traffic for other
+	 * PF/VF VSIs.
+	 * The FW can still send Flow control frames if enabled.
+	 */
+	i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
+						       pf->main_vsi_seid);
+
+	/* restart the VSIs that were rebuilt and running before the reset */
+	i40e_pf_unquiesce_all_vsi(pf);
+
+	/* Release the RTNL lock before we start resetting VFs */
+	if (!lock_acquired)
+		rtnl_unlock();
+
+	/* Restore promiscuous settings */
+	ret = i40e_set_promiscuous(pf, pf->cur_promisc);
+	if (ret)
+		dev_warn(&pf->pdev->dev,
+			 "Failed to restore promiscuous setting: %s, err %pe aq_err %s\n",
+			 pf->cur_promisc ? "on" : "off",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+	i40e_reset_all_vfs(pf, true);
+
+	/* tell the firmware that we're starting */
+	i40e_send_version(pf);
+
+	/* We've already released the lock, so don't do it again */
+	goto end_core_reset;
+
+end_unlock:
+	if (!lock_acquired)
+		rtnl_unlock();
+end_core_reset:
+	clear_bit(__I40E_RESET_FAILED, pf->state);
+clear_recovery:
+	clear_bit(__I40E_RESET_RECOVERY_PENDING, pf->state);
+	clear_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state);
+}
+
+/**
+ * i40e_reset_and_rebuild - reset and rebuild using a saved config
+ * @pf: board private structure
+ * @reinit: if the Main VSI needs to re-initialized.
+ * @lock_acquired: indicates whether or not the lock has been acquired
+ * before this function was called.
+ **/
+static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit,
+				   bool lock_acquired)
+{
+	int ret;
+
+	if (test_bit(__I40E_IN_REMOVE, pf->state))
+		return;
+	/* Now we wait for GRST to settle out.
+	 * We don't have to delete the VEBs or VSIs from the hw switch
+	 * because the reset will make them disappear.
+	 */
+	ret = i40e_reset(pf);
+	if (!ret)
+		i40e_rebuild(pf, reinit, lock_acquired);
+}
+
+/**
+ * i40e_handle_reset_warning - prep for the PF to reset, reset and rebuild
+ * @pf: board private structure
+ *
+ * Close up the VFs and other things in prep for a Core Reset,
+ * then get ready to rebuild the world.
+ * @lock_acquired: indicates whether or not the lock has been acquired
+ * before this function was called.
+ **/
+static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
+{
+	i40e_prep_for_reset(pf);
+	i40e_reset_and_rebuild(pf, false, lock_acquired);
+}
+
+/**
+ * i40e_handle_mdd_event
+ * @pf: pointer to the PF structure
+ *
+ * Called from the MDD irq handler to identify possibly malicious vfs
+ **/
+static void i40e_handle_mdd_event(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	bool mdd_detected = false;
+	struct i40e_vf *vf;
+	u32 reg;
+	int i;
+
+	if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
+		return;
+
+	/* find what triggered the MDD event */
+	reg = rd32(hw, I40E_GL_MDET_TX);
+	if (reg & I40E_GL_MDET_TX_VALID_MASK) {
+		u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >>
+				I40E_GL_MDET_TX_PF_NUM_SHIFT;
+		u16 vf_num = (reg & I40E_GL_MDET_TX_VF_NUM_MASK) >>
+				I40E_GL_MDET_TX_VF_NUM_SHIFT;
+		u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >>
+				I40E_GL_MDET_TX_EVENT_SHIFT;
+		u16 queue = ((reg & I40E_GL_MDET_TX_QUEUE_MASK) >>
+				I40E_GL_MDET_TX_QUEUE_SHIFT) -
+				pf->hw.func_caps.base_queue;
+		if (netif_msg_tx_err(pf))
+			dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on TX queue %d PF number 0x%02x VF number 0x%02x\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, I40E_GL_MDET_TX, 0xffffffff);
+		mdd_detected = true;
+	}
+	reg = rd32(hw, I40E_GL_MDET_RX);
+	if (reg & I40E_GL_MDET_RX_VALID_MASK) {
+		u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >>
+				I40E_GL_MDET_RX_FUNCTION_SHIFT;
+		u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >>
+				I40E_GL_MDET_RX_EVENT_SHIFT;
+		u16 queue = ((reg & I40E_GL_MDET_RX_QUEUE_MASK) >>
+				I40E_GL_MDET_RX_QUEUE_SHIFT) -
+				pf->hw.func_caps.base_queue;
+		if (netif_msg_rx_err(pf))
+			dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on RX queue %d of function 0x%02x\n",
+				 event, queue, func);
+		wr32(hw, I40E_GL_MDET_RX, 0xffffffff);
+		mdd_detected = true;
+	}
+
+	if (mdd_detected) {
+		reg = rd32(hw, I40E_PF_MDET_TX);
+		if (reg & I40E_PF_MDET_TX_VALID_MASK) {
+			wr32(hw, I40E_PF_MDET_TX, 0xFFFF);
+			dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n");
+		}
+		reg = rd32(hw, I40E_PF_MDET_RX);
+		if (reg & I40E_PF_MDET_RX_VALID_MASK) {
+			wr32(hw, I40E_PF_MDET_RX, 0xFFFF);
+			dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n");
+		}
+	}
+
+	/* see if one of the VFs needs its hand slapped */
+	for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+		vf = &(pf->vf[i]);
+		reg = rd32(hw, I40E_VP_MDET_TX(i));
+		if (reg & I40E_VP_MDET_TX_VALID_MASK) {
+			wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
+			vf->num_mdd_events++;
+			dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
+				 i);
+			dev_info(&pf->pdev->dev,
+				 "Use PF Control I/F to re-enable the VF\n");
+			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+		}
+
+		reg = rd32(hw, I40E_VP_MDET_RX(i));
+		if (reg & I40E_VP_MDET_RX_VALID_MASK) {
+			wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
+			vf->num_mdd_events++;
+			dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
+				 i);
+			dev_info(&pf->pdev->dev,
+				 "Use PF Control I/F to re-enable the VF\n");
+			set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+		}
+	}
+
+	/* re-enable mdd interrupt cause */
+	clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
+	reg = rd32(hw, I40E_PFINT_ICR0_ENA);
+	reg |=  I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
+	wr32(hw, I40E_PFINT_ICR0_ENA, reg);
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_service_task - Run the driver's async subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void i40e_service_task(struct work_struct *work)
+{
+	struct i40e_pf *pf = container_of(work,
+					  struct i40e_pf,
+					  service_task);
+	unsigned long start_time = jiffies;
+
+	/* don't bother with service tasks if a reset is in progress */
+	if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+	    test_bit(__I40E_SUSPENDED, pf->state))
+		return;
+
+	if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
+		return;
+
+	if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+		i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]);
+		i40e_sync_filters_subtask(pf);
+		i40e_reset_subtask(pf);
+		i40e_handle_mdd_event(pf);
+		i40e_vc_process_vflr_event(pf);
+		i40e_watchdog_subtask(pf);
+		i40e_fdir_reinit_subtask(pf);
+		if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) {
+			/* Client subtask will reopen next time through. */
+			i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi],
+							   true);
+		} else {
+			i40e_client_subtask(pf);
+			if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
+					       pf->state))
+				i40e_notify_client_of_l2_param_changes(
+								pf->vsi[pf->lan_vsi]);
+		}
+		i40e_sync_filters_subtask(pf);
+	} else {
+		i40e_reset_subtask(pf);
+	}
+
+	i40e_clean_adminq_subtask(pf);
+
+	/* flush memory to make sure state is correct before next watchdog */
+	smp_mb__before_atomic();
+	clear_bit(__I40E_SERVICE_SCHED, pf->state);
+
+	/* If the tasks have taken longer than one timer cycle or there
+	 * is more work to be done, reschedule the service task now
+	 * rather than wait for the timer to tick again.
+	 */
+	if (time_after(jiffies, (start_time + pf->service_timer_period)) ||
+	    test_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state)		 ||
+	    test_bit(__I40E_MDD_EVENT_PENDING, pf->state)		 ||
+	    test_bit(__I40E_VFLR_EVENT_PENDING, pf->state))
+		i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_service_timer - timer callback
+ * @t: timer list pointer
+ **/
+static void i40e_service_timer(struct timer_list *t)
+{
+	struct i40e_pf *pf = from_timer(pf, t, service_timer);
+
+	mod_timer(&pf->service_timer,
+		  round_jiffies(jiffies + pf->service_timer_period));
+	i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_set_num_rings_in_vsi - Determine number of rings in the VSI
+ * @vsi: the VSI being configured
+ **/
+static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	switch (vsi->type) {
+	case I40E_VSI_MAIN:
+		vsi->alloc_queue_pairs = pf->num_lan_qps;
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+			vsi->num_q_vectors = pf->num_lan_msix;
+		else
+			vsi->num_q_vectors = 1;
+
+		break;
+
+	case I40E_VSI_FDIR:
+		vsi->alloc_queue_pairs = 1;
+		vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+					 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT,
+					 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		vsi->num_q_vectors = pf->num_fdsb_msix;
+		break;
+
+	case I40E_VSI_VMDQ2:
+		vsi->alloc_queue_pairs = pf->num_vmdq_qps;
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		vsi->num_q_vectors = pf->num_vmdq_msix;
+		break;
+
+	case I40E_VSI_SRIOV:
+		vsi->alloc_queue_pairs = pf->num_vf_qps;
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+						 I40E_REQ_DESCRIPTOR_MULTIPLE);
+		break;
+
+	default:
+		WARN_ON(1);
+		return -ENODATA;
+	}
+
+	if (is_kdump_kernel()) {
+		vsi->num_tx_desc = I40E_MIN_NUM_DESCRIPTORS;
+		vsi->num_rx_desc = I40E_MIN_NUM_DESCRIPTORS;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
+ * @vsi: VSI pointer
+ * @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ **/
+static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
+{
+	struct i40e_ring **next_rings;
+	int size;
+	int ret = 0;
+
+	/* allocate memory for both Tx, XDP Tx and Rx ring pointers */
+	size = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs *
+	       (i40e_enabled_xdp_vsi(vsi) ? 3 : 2);
+	vsi->tx_rings = kzalloc(size, GFP_KERNEL);
+	if (!vsi->tx_rings)
+		return -ENOMEM;
+	next_rings = vsi->tx_rings + vsi->alloc_queue_pairs;
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		vsi->xdp_rings = next_rings;
+		next_rings += vsi->alloc_queue_pairs;
+	}
+	vsi->rx_rings = next_rings;
+
+	if (alloc_qvectors) {
+		/* allocate memory for q_vector pointers */
+		size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
+		vsi->q_vectors = kzalloc(size, GFP_KERNEL);
+		if (!vsi->q_vectors) {
+			ret = -ENOMEM;
+			goto err_vectors;
+		}
+	}
+	return ret;
+
+err_vectors:
+	kfree(vsi->tx_rings);
+	return ret;
+}
+
+/**
+ * i40e_vsi_mem_alloc - Allocates the next available struct vsi in the PF
+ * @pf: board private structure
+ * @type: type of VSI
+ *
+ * On error: returns error code (negative)
+ * On success: returns vsi index in PF (positive)
+ **/
+static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
+{
+	int ret = -ENODEV;
+	struct i40e_vsi *vsi;
+	int vsi_idx;
+	int i;
+
+	/* Need to protect the allocation of the VSIs at the PF level */
+	mutex_lock(&pf->switch_mutex);
+
+	/* VSI list may be fragmented if VSI creation/destruction has
+	 * been happening.  We can afford to do a quick scan to look
+	 * for any free VSIs in the list.
+	 *
+	 * find next empty vsi slot, looping back around if necessary
+	 */
+	i = pf->next_vsi;
+	while (i < pf->num_alloc_vsi && pf->vsi[i])
+		i++;
+	if (i >= pf->num_alloc_vsi) {
+		i = 0;
+		while (i < pf->next_vsi && pf->vsi[i])
+			i++;
+	}
+
+	if (i < pf->num_alloc_vsi && !pf->vsi[i]) {
+		vsi_idx = i;             /* Found one! */
+	} else {
+		ret = -ENODEV;
+		goto unlock_pf;  /* out of VSI slots! */
+	}
+	pf->next_vsi = ++i;
+
+	vsi = kzalloc(sizeof(*vsi), GFP_KERNEL);
+	if (!vsi) {
+		ret = -ENOMEM;
+		goto unlock_pf;
+	}
+	vsi->type = type;
+	vsi->back = pf;
+	set_bit(__I40E_VSI_DOWN, vsi->state);
+	vsi->flags = 0;
+	vsi->idx = vsi_idx;
+	vsi->int_rate_limit = 0;
+	vsi->rss_table_size = (vsi->type == I40E_VSI_MAIN) ?
+				pf->rss_table_size : 64;
+	vsi->netdev_registered = false;
+	vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
+	hash_init(vsi->mac_filter_hash);
+	vsi->irqs_ready = false;
+
+	if (type == I40E_VSI_MAIN) {
+		vsi->af_xdp_zc_qps = bitmap_zalloc(pf->num_lan_qps, GFP_KERNEL);
+		if (!vsi->af_xdp_zc_qps)
+			goto err_rings;
+	}
+
+	ret = i40e_set_num_rings_in_vsi(vsi);
+	if (ret)
+		goto err_rings;
+
+	ret = i40e_vsi_alloc_arrays(vsi, true);
+	if (ret)
+		goto err_rings;
+
+	/* Setup default MSIX irq handler for VSI */
+	i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
+
+	/* Initialize VSI lock */
+	spin_lock_init(&vsi->mac_filter_hash_lock);
+	pf->vsi[vsi_idx] = vsi;
+	ret = vsi_idx;
+	goto unlock_pf;
+
+err_rings:
+	bitmap_free(vsi->af_xdp_zc_qps);
+	pf->next_vsi = i - 1;
+	kfree(vsi);
+unlock_pf:
+	mutex_unlock(&pf->switch_mutex);
+	return ret;
+}
+
+/**
+ * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI
+ * @vsi: VSI pointer
+ * @free_qvectors: a bool to specify if q_vectors need to be freed.
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ **/
+static void i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors)
+{
+	/* free the ring and vector containers */
+	if (free_qvectors) {
+		kfree(vsi->q_vectors);
+		vsi->q_vectors = NULL;
+	}
+	kfree(vsi->tx_rings);
+	vsi->tx_rings = NULL;
+	vsi->rx_rings = NULL;
+	vsi->xdp_rings = NULL;
+}
+
+/**
+ * i40e_clear_rss_config_user - clear the user configured RSS hash keys
+ * and lookup table
+ * @vsi: Pointer to VSI structure
+ */
+static void i40e_clear_rss_config_user(struct i40e_vsi *vsi)
+{
+	if (!vsi)
+		return;
+
+	kfree(vsi->rss_hkey_user);
+	vsi->rss_hkey_user = NULL;
+
+	kfree(vsi->rss_lut_user);
+	vsi->rss_lut_user = NULL;
+}
+
+/**
+ * i40e_vsi_clear - Deallocate the VSI provided
+ * @vsi: the VSI being un-configured
+ **/
+static int i40e_vsi_clear(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf;
+
+	if (!vsi)
+		return 0;
+
+	if (!vsi->back)
+		goto free_vsi;
+	pf = vsi->back;
+
+	mutex_lock(&pf->switch_mutex);
+	if (!pf->vsi[vsi->idx]) {
+		dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
+			vsi->idx, vsi->idx, vsi->type);
+		goto unlock_vsi;
+	}
+
+	if (pf->vsi[vsi->idx] != vsi) {
+		dev_err(&pf->pdev->dev,
+			"pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
+			pf->vsi[vsi->idx]->idx,
+			pf->vsi[vsi->idx]->type,
+			vsi->idx, vsi->type);
+		goto unlock_vsi;
+	}
+
+	/* updates the PF for this cleared vsi */
+	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
+	i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
+
+	bitmap_free(vsi->af_xdp_zc_qps);
+	i40e_vsi_free_arrays(vsi, true);
+	i40e_clear_rss_config_user(vsi);
+
+	pf->vsi[vsi->idx] = NULL;
+	if (vsi->idx < pf->next_vsi)
+		pf->next_vsi = vsi->idx;
+
+unlock_vsi:
+	mutex_unlock(&pf->switch_mutex);
+free_vsi:
+	kfree(vsi);
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI
+ * @vsi: the VSI being cleaned
+ **/
+static void i40e_vsi_clear_rings(struct i40e_vsi *vsi)
+{
+	int i;
+
+	if (vsi->tx_rings && vsi->tx_rings[0]) {
+		for (i = 0; i < vsi->alloc_queue_pairs; i++) {
+			kfree_rcu(vsi->tx_rings[i], rcu);
+			WRITE_ONCE(vsi->tx_rings[i], NULL);
+			WRITE_ONCE(vsi->rx_rings[i], NULL);
+			if (vsi->xdp_rings)
+				WRITE_ONCE(vsi->xdp_rings[i], NULL);
+		}
+	}
+}
+
+/**
+ * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI
+ * @vsi: the VSI being configured
+ **/
+static int i40e_alloc_rings(struct i40e_vsi *vsi)
+{
+	int i, qpv = i40e_enabled_xdp_vsi(vsi) ? 3 : 2;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_ring *ring;
+
+	/* Set basic values in the rings to be used later during open() */
+	for (i = 0; i < vsi->alloc_queue_pairs; i++) {
+		/* allocate space for both Tx and Rx in one shot */
+		ring = kcalloc(qpv, sizeof(struct i40e_ring), GFP_KERNEL);
+		if (!ring)
+			goto err_out;
+
+		ring->queue_index = i;
+		ring->reg_idx = vsi->base_queue + i;
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_tx_desc;
+		ring->size = 0;
+		ring->dcb_tc = 0;
+		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
+			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
+		ring->itr_setting = pf->tx_itr_default;
+		WRITE_ONCE(vsi->tx_rings[i], ring++);
+
+		if (!i40e_enabled_xdp_vsi(vsi))
+			goto setup_rx;
+
+		ring->queue_index = vsi->alloc_queue_pairs + i;
+		ring->reg_idx = vsi->base_queue + ring->queue_index;
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = NULL;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_tx_desc;
+		ring->size = 0;
+		ring->dcb_tc = 0;
+		if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE)
+			ring->flags = I40E_TXR_FLAGS_WB_ON_ITR;
+		set_ring_xdp(ring);
+		ring->itr_setting = pf->tx_itr_default;
+		WRITE_ONCE(vsi->xdp_rings[i], ring++);
+
+setup_rx:
+		ring->queue_index = i;
+		ring->reg_idx = vsi->base_queue + i;
+		ring->ring_active = false;
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = &pf->pdev->dev;
+		ring->count = vsi->num_rx_desc;
+		ring->size = 0;
+		ring->dcb_tc = 0;
+		ring->itr_setting = pf->rx_itr_default;
+		WRITE_ONCE(vsi->rx_rings[i], ring);
+	}
+
+	return 0;
+
+err_out:
+	i40e_vsi_clear_rings(vsi);
+	return -ENOMEM;
+}
+
+/**
+ * i40e_reserve_msix_vectors - Reserve MSI-X vectors in the kernel
+ * @pf: board private structure
+ * @vectors: the number of MSI-X vectors to request
+ *
+ * Returns the number of vectors reserved, or error
+ **/
+static int i40e_reserve_msix_vectors(struct i40e_pf *pf, int vectors)
+{
+	vectors = pci_enable_msix_range(pf->pdev, pf->msix_entries,
+					I40E_MIN_MSIX, vectors);
+	if (vectors < 0) {
+		dev_info(&pf->pdev->dev,
+			 "MSI-X vector reservation failed: %d\n", vectors);
+		vectors = 0;
+	}
+
+	return vectors;
+}
+
+/**
+ * i40e_init_msix - Setup the MSIX capability
+ * @pf: board private structure
+ *
+ * Work with the OS to set up the MSIX vectors needed.
+ *
+ * Returns the number of vectors reserved or negative on failure
+ **/
+static int i40e_init_msix(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int cpus, extra_vectors;
+	int vectors_left;
+	int v_budget, i;
+	int v_actual;
+	int iwarp_requested = 0;
+
+	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+		return -ENODEV;
+
+	/* The number of vectors we'll request will be comprised of:
+	 *   - Add 1 for "other" cause for Admin Queue events, etc.
+	 *   - The number of LAN queue pairs
+	 *	- Queues being used for RSS.
+	 *		We don't need as many as max_rss_size vectors.
+	 *		use rss_size instead in the calculation since that
+	 *		is governed by number of cpus in the system.
+	 *	- assumes symmetric Tx/Rx pairing
+	 *   - The number of VMDq pairs
+	 *   - The CPU count within the NUMA node if iWARP is enabled
+	 * Once we count this up, try the request.
+	 *
+	 * If we can't get what we want, we'll simplify to nearly nothing
+	 * and try again.  If that still fails, we punt.
+	 */
+	vectors_left = hw->func_caps.num_msix_vectors;
+	v_budget = 0;
+
+	/* reserve one vector for miscellaneous handler */
+	if (vectors_left) {
+		v_budget++;
+		vectors_left--;
+	}
+
+	/* reserve some vectors for the main PF traffic queues. Initially we
+	 * only reserve at most 50% of the available vectors, in the case that
+	 * the number of online CPUs is large. This ensures that we can enable
+	 * extra features as well. Once we've enabled the other features, we
+	 * will use any remaining vectors to reach as close as we can to the
+	 * number of online CPUs.
+	 */
+	cpus = num_online_cpus();
+	pf->num_lan_msix = min_t(int, cpus, vectors_left / 2);
+	vectors_left -= pf->num_lan_msix;
+
+	/* reserve one vector for sideband flow director */
+	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+		if (vectors_left) {
+			pf->num_fdsb_msix = 1;
+			v_budget++;
+			vectors_left--;
+		} else {
+			pf->num_fdsb_msix = 0;
+		}
+	}
+
+	/* can we reserve enough for iWARP? */
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		iwarp_requested = pf->num_iwarp_msix;
+
+		if (!vectors_left)
+			pf->num_iwarp_msix = 0;
+		else if (vectors_left < pf->num_iwarp_msix)
+			pf->num_iwarp_msix = 1;
+		v_budget += pf->num_iwarp_msix;
+		vectors_left -= pf->num_iwarp_msix;
+	}
+
+	/* any vectors left over go for VMDq support */
+	if (pf->flags & I40E_FLAG_VMDQ_ENABLED) {
+		if (!vectors_left) {
+			pf->num_vmdq_msix = 0;
+			pf->num_vmdq_qps = 0;
+		} else {
+			int vmdq_vecs_wanted =
+				pf->num_vmdq_vsis * pf->num_vmdq_qps;
+			int vmdq_vecs =
+				min_t(int, vectors_left, vmdq_vecs_wanted);
+
+			/* if we're short on vectors for what's desired, we limit
+			 * the queues per vmdq.  If this is still more than are
+			 * available, the user will need to change the number of
+			 * queues/vectors used by the PF later with the ethtool
+			 * channels command
+			 */
+			if (vectors_left < vmdq_vecs_wanted) {
+				pf->num_vmdq_qps = 1;
+				vmdq_vecs_wanted = pf->num_vmdq_vsis;
+				vmdq_vecs = min_t(int,
+						  vectors_left,
+						  vmdq_vecs_wanted);
+			}
+			pf->num_vmdq_msix = pf->num_vmdq_qps;
+
+			v_budget += vmdq_vecs;
+			vectors_left -= vmdq_vecs;
+		}
+	}
+
+	/* On systems with a large number of SMP cores, we previously limited
+	 * the number of vectors for num_lan_msix to be at most 50% of the
+	 * available vectors, to allow for other features. Now, we add back
+	 * the remaining vectors. However, we ensure that the total
+	 * num_lan_msix will not exceed num_online_cpus(). To do this, we
+	 * calculate the number of vectors we can add without going over the
+	 * cap of CPUs. For systems with a small number of CPUs this will be
+	 * zero.
+	 */
+	extra_vectors = min_t(int, cpus - pf->num_lan_msix, vectors_left);
+	pf->num_lan_msix += extra_vectors;
+	vectors_left -= extra_vectors;
+
+	WARN(vectors_left < 0,
+	     "Calculation of remaining vectors underflowed. This is an accounting bug when determining total MSI-X vectors.\n");
+
+	v_budget += pf->num_lan_msix;
+	pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
+				   GFP_KERNEL);
+	if (!pf->msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < v_budget; i++)
+		pf->msix_entries[i].entry = i;
+	v_actual = i40e_reserve_msix_vectors(pf, v_budget);
+
+	if (v_actual < I40E_MIN_MSIX) {
+		pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
+		kfree(pf->msix_entries);
+		pf->msix_entries = NULL;
+		pci_disable_msix(pf->pdev);
+		return -ENODEV;
+
+	} else if (v_actual == I40E_MIN_MSIX) {
+		/* Adjust for minimal MSIX use */
+		pf->num_vmdq_vsis = 0;
+		pf->num_vmdq_qps = 0;
+		pf->num_lan_qps = 1;
+		pf->num_lan_msix = 1;
+
+	} else if (v_actual != v_budget) {
+		/* If we have limited resources, we will start with no vectors
+		 * for the special features and then allocate vectors to some
+		 * of these features based on the policy and at the end disable
+		 * the features that did not get any vectors.
+		 */
+		int vec;
+
+		dev_info(&pf->pdev->dev,
+			 "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
+			 v_actual, v_budget);
+		/* reserve the misc vector */
+		vec = v_actual - 1;
+
+		/* Scale vector usage down */
+		pf->num_vmdq_msix = 1;    /* force VMDqs to only one vector */
+		pf->num_vmdq_vsis = 1;
+		pf->num_vmdq_qps = 1;
+
+		/* partition out the remaining vectors */
+		switch (vec) {
+		case 2:
+			pf->num_lan_msix = 1;
+			break;
+		case 3:
+			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+				pf->num_lan_msix = 1;
+				pf->num_iwarp_msix = 1;
+			} else {
+				pf->num_lan_msix = 2;
+			}
+			break;
+		default:
+			if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+				pf->num_iwarp_msix = min_t(int, (vec / 3),
+						 iwarp_requested);
+				pf->num_vmdq_vsis = min_t(int, (vec / 3),
+						  I40E_DEFAULT_NUM_VMDQ_VSI);
+			} else {
+				pf->num_vmdq_vsis = min_t(int, (vec / 2),
+						  I40E_DEFAULT_NUM_VMDQ_VSI);
+			}
+			if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+				pf->num_fdsb_msix = 1;
+				vec--;
+			}
+			pf->num_lan_msix = min_t(int,
+			       (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
+							      pf->num_lan_msix);
+			pf->num_lan_qps = pf->num_lan_msix;
+			break;
+		}
+	}
+
+	if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+	    (pf->num_fdsb_msix == 0)) {
+		dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
+		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+	}
+	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
+	    (pf->num_vmdq_msix == 0)) {
+		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
+		pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
+	}
+
+	if ((pf->flags & I40E_FLAG_IWARP_ENABLED) &&
+	    (pf->num_iwarp_msix == 0)) {
+		dev_info(&pf->pdev->dev, "IWARP disabled, not enough MSI-X vectors\n");
+		pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+	}
+	i40e_debug(&pf->hw, I40E_DEBUG_INIT,
+		   "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
+		   pf->num_lan_msix,
+		   pf->num_vmdq_msix * pf->num_vmdq_vsis,
+		   pf->num_fdsb_msix,
+		   pf->num_iwarp_msix);
+
+	return v_actual;
+}
+
+/**
+ * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the vsi struct
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
+{
+	struct i40e_q_vector *q_vector;
+
+	/* allocate q_vector */
+	q_vector = kzalloc(sizeof(struct i40e_q_vector), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
+
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll);
+
+	/* tie q_vector and vsi together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int err, v_idx, num_q_vectors;
+
+	/* if not MSIX, give the one vector only to the LAN VSI */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		num_q_vectors = vsi->num_q_vectors;
+	else if (vsi == pf->vsi[pf->lan_vsi])
+		num_q_vectors = 1;
+	else
+		return -EINVAL;
+
+	for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+		err = i40e_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		i40e_free_q_vector(vsi, v_idx);
+
+	return err;
+}
+
+/**
+ * i40e_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ **/
+static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
+{
+	int vectors = 0;
+	ssize_t size;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		vectors = i40e_init_msix(pf);
+		if (vectors < 0) {
+			pf->flags &= ~(I40E_FLAG_MSIX_ENABLED	|
+				       I40E_FLAG_IWARP_ENABLED	|
+				       I40E_FLAG_RSS_ENABLED	|
+				       I40E_FLAG_DCB_CAPABLE	|
+				       I40E_FLAG_DCB_ENABLED	|
+				       I40E_FLAG_SRIOV_ENABLED	|
+				       I40E_FLAG_FD_SB_ENABLED	|
+				       I40E_FLAG_FD_ATR_ENABLED	|
+				       I40E_FLAG_VMDQ_ENABLED);
+			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+
+			/* rework the queue expectations without MSIX */
+			i40e_determine_queue_usage(pf);
+		}
+	}
+
+	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+	    (pf->flags & I40E_FLAG_MSI_ENABLED)) {
+		dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n");
+		vectors = pci_enable_msi(pf->pdev);
+		if (vectors < 0) {
+			dev_info(&pf->pdev->dev, "MSI init failed - %d\n",
+				 vectors);
+			pf->flags &= ~I40E_FLAG_MSI_ENABLED;
+		}
+		vectors = 1;  /* one MSI or Legacy vector */
+	}
+
+	if (!(pf->flags & (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED)))
+		dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n");
+
+	/* set up vector assignment tracking */
+	size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors);
+	pf->irq_pile = kzalloc(size, GFP_KERNEL);
+	if (!pf->irq_pile)
+		return -ENOMEM;
+
+	pf->irq_pile->num_entries = vectors;
+
+	/* track first vector for misc interrupts, ignore return */
+	(void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1);
+
+	return 0;
+}
+
+/**
+ * i40e_restore_interrupt_scheme - Restore the interrupt scheme
+ * @pf: private board data structure
+ *
+ * Restore the interrupt scheme that was cleared when we suspended the
+ * device. This should be called during resume to re-allocate the q_vectors
+ * and reacquire IRQs.
+ */
+static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
+{
+	int err, i;
+
+	/* We cleared the MSI and MSI-X flags when disabling the old interrupt
+	 * scheme. We need to re-enabled them here in order to attempt to
+	 * re-acquire the MSI or MSI-X vectors
+	 */
+	pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+
+	err = i40e_init_interrupt_scheme(pf);
+	if (err)
+		return err;
+
+	/* Now that we've re-acquired IRQs, we need to remap the vectors and
+	 * rings together again.
+	 */
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (pf->vsi[i]) {
+			err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
+			if (err)
+				goto err_unwind;
+			i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
+		}
+	}
+
+	err = i40e_setup_misc_vector(pf);
+	if (err)
+		goto err_unwind;
+
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED)
+		i40e_client_update_msix_info(pf);
+
+	return 0;
+
+err_unwind:
+	while (i--) {
+		if (pf->vsi[i])
+			i40e_vsi_free_q_vectors(pf->vsi[i]);
+	}
+
+	return err;
+}
+
+/**
+ * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle
+ * non queue events in recovery mode
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage
+ * the non-queue interrupts, e.g. AdminQ and errors in recovery mode.
+ * This is handled differently than in recovery mode since no Tx/Rx resources
+ * are being allocated.
+ **/
+static int i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
+{
+	int err;
+
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		err = i40e_setup_misc_vector(pf);
+
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "MSI-X misc vector request failed, error %d\n",
+				 err);
+			return err;
+		}
+	} else {
+		u32 flags = pf->flags & I40E_FLAG_MSI_ENABLED ? 0 : IRQF_SHARED;
+
+		err = request_irq(pf->pdev->irq, i40e_intr, flags,
+				  pf->int_name, pf);
+
+		if (err) {
+			dev_info(&pf->pdev->dev,
+				 "MSI/legacy misc vector request failed, error %d\n",
+				 err);
+			return err;
+		}
+		i40e_enable_misc_int_causes(pf);
+		i40e_irq_dynamic_enable_icr0(pf);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0, which is used to manage the
+ * non-queue interrupts, e.g. AdminQ and errors.  This is not used
+ * when in MSI or Legacy interrupt mode.
+ **/
+static int i40e_setup_misc_vector(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int err = 0;
+
+	/* Only request the IRQ once, the first time through. */
+	if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
+		err = request_irq(pf->msix_entries[0].vector,
+				  i40e_intr, 0, pf->int_name, pf);
+		if (err) {
+			clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
+			dev_info(&pf->pdev->dev,
+				 "request_irq for %s failed: %d\n",
+				 pf->int_name, err);
+			return -EFAULT;
+		}
+	}
+
+	i40e_enable_misc_int_causes(pf);
+
+	/* associate no queues to the misc vector */
+	wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
+	wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1);
+
+	i40e_flush(hw);
+
+	i40e_irq_dynamic_enable_icr0(pf);
+
+	return err;
+}
+
+/**
+ * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
+ * @vsi: Pointer to vsi structure
+ * @seed: Buffter to store the hash keys
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Return 0 on success, negative on failure
+ */
+static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+			   u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	int ret = 0;
+
+	if (seed) {
+		ret = i40e_aq_get_rss_key(hw, vsi->id,
+			(struct i40e_aqc_get_set_rss_key_data *)seed);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot get RSS key, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			return ret;
+		}
+	}
+
+	if (lut) {
+		bool pf_lut = vsi->type == I40E_VSI_MAIN;
+
+		ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot get RSS lut, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
+ * @vsi: Pointer to vsi structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_config_rss_reg(struct i40e_vsi *vsi, const u8 *seed,
+			       const u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u16 vf_id = vsi->vf_id;
+	u8 i;
+
+	/* Fill out hash function seed */
+	if (seed) {
+		u32 *seed_dw = (u32 *)seed;
+
+		if (vsi->type == I40E_VSI_MAIN) {
+			for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
+				wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
+		} else if (vsi->type == I40E_VSI_SRIOV) {
+			for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++)
+				wr32(hw, I40E_VFQF_HKEY1(i, vf_id), seed_dw[i]);
+		} else {
+			dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n");
+		}
+	}
+
+	if (lut) {
+		u32 *lut_dw = (u32 *)lut;
+
+		if (vsi->type == I40E_VSI_MAIN) {
+			if (lut_size != I40E_HLUT_ARRAY_SIZE)
+				return -EINVAL;
+			for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+				wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]);
+		} else if (vsi->type == I40E_VSI_SRIOV) {
+			if (lut_size != I40E_VF_HLUT_ARRAY_SIZE)
+				return -EINVAL;
+			for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
+				wr32(hw, I40E_VFQF_HLUT1(i, vf_id), lut_dw[i]);
+		} else {
+			dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
+		}
+	}
+	i40e_flush(hw);
+
+	return 0;
+}
+
+/**
+ * i40e_get_rss_reg - Get the RSS keys and lut by reading registers
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the keys
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed,
+			    u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u16 i;
+
+	if (seed) {
+		u32 *seed_dw = (u32 *)seed;
+
+		for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
+			seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i));
+	}
+	if (lut) {
+		u32 *lut_dw = (u32 *)lut;
+
+		if (lut_size != I40E_HLUT_ARRAY_SIZE)
+			return -EINVAL;
+		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+			lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i));
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_config_rss - Configure RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: RSS hash seed
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ */
+int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
+		return i40e_config_rss_aq(vsi, seed, lut, lut_size);
+	else
+		return i40e_config_rss_reg(vsi, seed, lut, lut_size);
+}
+
+/**
+ * i40e_get_rss - Get RSS keys and lut
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the keys
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	if (pf->hw_features & I40E_HW_RSS_AQ_CAPABLE)
+		return i40e_get_rss_aq(vsi, seed, lut, lut_size);
+	else
+		return i40e_get_rss_reg(vsi, seed, lut, lut_size);
+}
+
+/**
+ * i40e_fill_rss_lut - Fill the RSS lookup table with default values
+ * @pf: Pointer to board private structure
+ * @lut: Lookup table
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ */
+void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+		       u16 rss_table_size, u16 rss_size)
+{
+	u16 i;
+
+	for (i = 0; i < rss_table_size; i++)
+		lut[i] = i % rss_size;
+}
+
+/**
+ * i40e_pf_config_rss - Prepare for RSS if used
+ * @pf: board private structure
+ **/
+static int i40e_pf_config_rss(struct i40e_pf *pf)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	u8 seed[I40E_HKEY_ARRAY_SIZE];
+	u8 *lut;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg_val;
+	u64 hena;
+	int ret;
+
+	/* By default we enable TCP/UDP with IPv4/IPv6 ptypes */
+	hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
+		((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
+	hena |= i40e_pf_get_default_rss_hena(pf);
+
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
+
+	/* Determine the RSS table size based on the hardware capabilities */
+	reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0);
+	reg_val = (pf->rss_table_size == 512) ?
+			(reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) :
+			(reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512);
+	i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val);
+
+	/* Determine the RSS size of the VSI */
+	if (!vsi->rss_size) {
+		u16 qcount;
+		/* If the firmware does something weird during VSI init, we
+		 * could end up with zero TCs. Check for that to avoid
+		 * divide-by-zero. It probably won't pass traffic, but it also
+		 * won't panic.
+		 */
+		qcount = vsi->num_queue_pairs /
+			 (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1);
+		vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
+	}
+	if (!vsi->rss_size)
+		return -EINVAL;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* Use user configured lut if there is one, otherwise use default */
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+
+	/* Use user configured hash key if there is one, otherwise
+	 * use default.
+	 */
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+	ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+	kfree(lut);
+
+	return ret;
+}
+
+/**
+ * i40e_reconfig_rss_queues - change number of queues for rss and rebuild
+ * @pf: board private structure
+ * @queue_count: the requested queue count for rss.
+ *
+ * returns 0 if rss is not enabled, if enabled returns the final rss queue
+ * count which may be different from the requested queue count.
+ * Note: expects to be called while under rtnl_lock()
+ **/
+int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
+{
+	struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+	int new_rss_size;
+
+	if (!(pf->flags & I40E_FLAG_RSS_ENABLED))
+		return 0;
+
+	queue_count = min_t(int, queue_count, num_online_cpus());
+	new_rss_size = min_t(int, queue_count, pf->rss_size_max);
+
+	if (queue_count != vsi->num_queue_pairs) {
+		u16 qcount;
+
+		vsi->req_queue_pairs = queue_count;
+		i40e_prep_for_reset(pf);
+		if (test_bit(__I40E_IN_REMOVE, pf->state))
+			return pf->alloc_rss_size;
+
+		pf->alloc_rss_size = new_rss_size;
+
+		i40e_reset_and_rebuild(pf, true, true);
+
+		/* Discard the user configured hash keys and lut, if less
+		 * queues are enabled.
+		 */
+		if (queue_count < vsi->rss_size) {
+			i40e_clear_rss_config_user(vsi);
+			dev_dbg(&pf->pdev->dev,
+				"discard user configured hash keys and lut\n");
+		}
+
+		/* Reset vsi->rss_size, as number of enabled queues changed */
+		qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
+		vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
+
+		i40e_pf_config_rss(pf);
+	}
+	dev_info(&pf->pdev->dev, "User requested queue count/HW max RSS count:  %d/%d\n",
+		 vsi->req_queue_pairs, pf->rss_size_max);
+	return pf->alloc_rss_size;
+}
+
+/**
+ * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition
+ * @pf: board private structure
+ **/
+int i40e_get_partition_bw_setting(struct i40e_pf *pf)
+{
+	bool min_valid, max_valid;
+	u32 max_bw, min_bw;
+	int status;
+
+	status = i40e_read_bw_from_alt_ram(&pf->hw, &max_bw, &min_bw,
+					   &min_valid, &max_valid);
+
+	if (!status) {
+		if (min_valid)
+			pf->min_bw = min_bw;
+		if (max_valid)
+			pf->max_bw = max_bw;
+	}
+
+	return status;
+}
+
+/**
+ * i40e_set_partition_bw_setting - Set BW settings for this PF partition
+ * @pf: board private structure
+ **/
+int i40e_set_partition_bw_setting(struct i40e_pf *pf)
+{
+	struct i40e_aqc_configure_partition_bw_data bw_data;
+	int status;
+
+	memset(&bw_data, 0, sizeof(bw_data));
+
+	/* Set the valid bit for this PF */
+	bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id));
+	bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK;
+	bw_data.min_bw[pf->hw.pf_id] = pf->min_bw & I40E_ALT_BW_VALUE_MASK;
+
+	/* Set the new bandwidths */
+	status = i40e_aq_configure_partition_bw(&pf->hw, &bw_data, NULL);
+
+	return status;
+}
+
+/**
+ * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition
+ * @pf: board private structure
+ **/
+int i40e_commit_partition_bw_setting(struct i40e_pf *pf)
+{
+	/* Commit temporary BW setting to permanent NVM image */
+	enum i40e_admin_queue_err last_aq_status;
+	u16 nvm_word;
+	int ret;
+
+	if (pf->hw.partition_id != 1) {
+		dev_info(&pf->pdev->dev,
+			 "Commit BW only works on partition 1! This is partition %d",
+			 pf->hw.partition_id);
+		ret = -EOPNOTSUPP;
+		goto bw_commit_out;
+	}
+
+	/* Acquire NVM for read access */
+	ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ);
+	last_aq_status = pf->hw.aq.asq_last_status;
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Cannot acquire NVM for read access, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, last_aq_status));
+		goto bw_commit_out;
+	}
+
+	/* Read word 0x10 of NVM - SW compatibility word 1 */
+	ret = i40e_aq_read_nvm(&pf->hw,
+			       I40E_SR_NVM_CONTROL_WORD,
+			       0x10, sizeof(nvm_word), &nvm_word,
+			       false, NULL);
+	/* Save off last admin queue command status before releasing
+	 * the NVM
+	 */
+	last_aq_status = pf->hw.aq.asq_last_status;
+	i40e_release_nvm(&pf->hw);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "NVM read error, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, last_aq_status));
+		goto bw_commit_out;
+	}
+
+	/* Wait a bit for NVM release to complete */
+	msleep(50);
+
+	/* Acquire NVM for write access */
+	ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE);
+	last_aq_status = pf->hw.aq.asq_last_status;
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Cannot acquire NVM for write access, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, last_aq_status));
+		goto bw_commit_out;
+	}
+	/* Write it back out unchanged to initiate update NVM,
+	 * which will force a write of the shadow (alt) RAM to
+	 * the NVM - thus storing the bandwidth values permanently.
+	 */
+	ret = i40e_aq_update_nvm(&pf->hw,
+				 I40E_SR_NVM_CONTROL_WORD,
+				 0x10, sizeof(nvm_word),
+				 &nvm_word, true, 0, NULL);
+	/* Save off last admin queue command status before releasing
+	 * the NVM
+	 */
+	last_aq_status = pf->hw.aq.asq_last_status;
+	i40e_release_nvm(&pf->hw);
+	if (ret)
+		dev_info(&pf->pdev->dev,
+			 "BW settings NOT SAVED, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, last_aq_status));
+bw_commit_out:
+
+	return ret;
+}
+
+/**
+ * i40e_is_total_port_shutdown_enabled - read NVM and return value
+ * if total port shutdown feature is enabled for this PF
+ * @pf: board private structure
+ **/
+static bool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf)
+{
+#define I40E_TOTAL_PORT_SHUTDOWN_ENABLED	BIT(4)
+#define I40E_FEATURES_ENABLE_PTR		0x2A
+#define I40E_CURRENT_SETTING_PTR		0x2B
+#define I40E_LINK_BEHAVIOR_WORD_OFFSET		0x2D
+#define I40E_LINK_BEHAVIOR_WORD_LENGTH		0x1
+#define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED	BIT(0)
+#define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH	4
+	u16 sr_emp_sr_settings_ptr = 0;
+	u16 features_enable = 0;
+	u16 link_behavior = 0;
+	int read_status = 0;
+	bool ret = false;
+
+	read_status = i40e_read_nvm_word(&pf->hw,
+					 I40E_SR_EMP_SR_SETTINGS_PTR,
+					 &sr_emp_sr_settings_ptr);
+	if (read_status)
+		goto err_nvm;
+	read_status = i40e_read_nvm_word(&pf->hw,
+					 sr_emp_sr_settings_ptr +
+					 I40E_FEATURES_ENABLE_PTR,
+					 &features_enable);
+	if (read_status)
+		goto err_nvm;
+	if (I40E_TOTAL_PORT_SHUTDOWN_ENABLED & features_enable) {
+		read_status = i40e_read_nvm_module_data(&pf->hw,
+							I40E_SR_EMP_SR_SETTINGS_PTR,
+							I40E_CURRENT_SETTING_PTR,
+							I40E_LINK_BEHAVIOR_WORD_OFFSET,
+							I40E_LINK_BEHAVIOR_WORD_LENGTH,
+							&link_behavior);
+		if (read_status)
+			goto err_nvm;
+		link_behavior >>= (pf->hw.port * I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH);
+		ret = I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED & link_behavior;
+	}
+	return ret;
+
+err_nvm:
+	dev_warn(&pf->pdev->dev,
+		 "total-port-shutdown feature is off due to read nvm error: %pe\n",
+		 ERR_PTR(read_status));
+	return ret;
+}
+
+/**
+ * i40e_sw_init - Initialize general software structures (struct i40e_pf)
+ * @pf: board private structure to initialize
+ *
+ * i40e_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int i40e_sw_init(struct i40e_pf *pf)
+{
+	int err = 0;
+	int size;
+	u16 pow;
+
+	/* Set default capability flags */
+	pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
+		    I40E_FLAG_MSI_ENABLED     |
+		    I40E_FLAG_MSIX_ENABLED;
+
+	/* Set default ITR */
+	pf->rx_itr_default = I40E_ITR_RX_DEF;
+	pf->tx_itr_default = I40E_ITR_TX_DEF;
+
+	/* Depending on PF configurations, it is possible that the RSS
+	 * maximum might end up larger than the available queues
+	 */
+	pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width);
+	pf->alloc_rss_size = 1;
+	pf->rss_table_size = pf->hw.func_caps.rss_table_size;
+	pf->rss_size_max = min_t(int, pf->rss_size_max,
+				 pf->hw.func_caps.num_tx_qp);
+
+	/* find the next higher power-of-2 of num cpus */
+	pow = roundup_pow_of_two(num_online_cpus());
+	pf->rss_size_max = min_t(int, pf->rss_size_max, pow);
+
+	if (pf->hw.func_caps.rss) {
+		pf->flags |= I40E_FLAG_RSS_ENABLED;
+		pf->alloc_rss_size = min_t(int, pf->rss_size_max,
+					   num_online_cpus());
+	}
+
+	/* MFP mode enabled */
+	if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.flex10_enable) {
+		pf->flags |= I40E_FLAG_MFP_ENABLED;
+		dev_info(&pf->pdev->dev, "MFP mode Enabled\n");
+		if (i40e_get_partition_bw_setting(pf)) {
+			dev_warn(&pf->pdev->dev,
+				 "Could not get partition bw settings\n");
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "Partition BW Min = %8.8x, Max = %8.8x\n",
+				 pf->min_bw, pf->max_bw);
+
+			/* nudge the Tx scheduler */
+			i40e_set_partition_bw_setting(pf);
+		}
+	}
+
+	if ((pf->hw.func_caps.fd_filters_guaranteed > 0) ||
+	    (pf->hw.func_caps.fd_filters_best_effort > 0)) {
+		pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
+		pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
+		if (pf->flags & I40E_FLAG_MFP_ENABLED &&
+		    pf->hw.num_partitions > 1)
+			dev_info(&pf->pdev->dev,
+				 "Flow Director Sideband mode Disabled in MFP mode\n");
+		else
+			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+		pf->fdir_pf_filter_count =
+				 pf->hw.func_caps.fd_filters_guaranteed;
+		pf->hw.fdir_shared_filter_count =
+				 pf->hw.func_caps.fd_filters_best_effort;
+	}
+
+	if (pf->hw.mac.type == I40E_MAC_X722) {
+		pf->hw_features |= (I40E_HW_RSS_AQ_CAPABLE |
+				    I40E_HW_128_QP_RSS_CAPABLE |
+				    I40E_HW_ATR_EVICT_CAPABLE |
+				    I40E_HW_WB_ON_ITR_CAPABLE |
+				    I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE |
+				    I40E_HW_NO_PCI_LINK_CHECK |
+				    I40E_HW_USE_SET_LLDP_MIB |
+				    I40E_HW_GENEVE_OFFLOAD_CAPABLE |
+				    I40E_HW_PTP_L4_CAPABLE |
+				    I40E_HW_WOL_MC_MAGIC_PKT_WAKE |
+				    I40E_HW_OUTER_UDP_CSUM_CAPABLE);
+
+#define I40E_FDEVICT_PCTYPE_DEFAULT 0xc03
+		if (rd32(&pf->hw, I40E_GLQF_FDEVICTENA(1)) !=
+		    I40E_FDEVICT_PCTYPE_DEFAULT) {
+			dev_warn(&pf->pdev->dev,
+				 "FD EVICT PCTYPES are not right, disable FD HW EVICT\n");
+			pf->hw_features &= ~I40E_HW_ATR_EVICT_CAPABLE;
+		}
+	} else if ((pf->hw.aq.api_maj_ver > 1) ||
+		   ((pf->hw.aq.api_maj_ver == 1) &&
+		    (pf->hw.aq.api_min_ver > 4))) {
+		/* Supported in FW API version higher than 1.4 */
+		pf->hw_features |= I40E_HW_GENEVE_OFFLOAD_CAPABLE;
+	}
+
+	/* Enable HW ATR eviction if possible */
+	if (pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)
+		pf->flags |= I40E_FLAG_HW_ATR_EVICT_ENABLED;
+
+	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
+	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 33)) ||
+	    (pf->hw.aq.fw_maj_ver < 4))) {
+		pf->hw_features |= I40E_HW_RESTART_AUTONEG;
+		/* No DCB support  for FW < v4.33 */
+		pf->hw_features |= I40E_HW_NO_DCB_SUPPORT;
+	}
+
+	/* Disable FW LLDP if FW < v4.3 */
+	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
+	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver < 3)) ||
+	    (pf->hw.aq.fw_maj_ver < 4)))
+		pf->hw_features |= I40E_HW_STOP_FW_LLDP;
+
+	/* Use the FW Set LLDP MIB API if FW > v4.40 */
+	if ((pf->hw.mac.type == I40E_MAC_XL710) &&
+	    (((pf->hw.aq.fw_maj_ver == 4) && (pf->hw.aq.fw_min_ver >= 40)) ||
+	    (pf->hw.aq.fw_maj_ver >= 5)))
+		pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
+
+	/* Enable PTP L4 if FW > v6.0 */
+	if (pf->hw.mac.type == I40E_MAC_XL710 &&
+	    pf->hw.aq.fw_maj_ver >= 6)
+		pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
+
+	if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) {
+		pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
+		pf->flags |= I40E_FLAG_VMDQ_ENABLED;
+		pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
+	}
+
+	if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) {
+		pf->flags |= I40E_FLAG_IWARP_ENABLED;
+		/* IWARP needs one extra vector for CQP just like MISC.*/
+		pf->num_iwarp_msix = (int)num_online_cpus() + 1;
+	}
+	/* Stopping FW LLDP engine is supported on XL710 and X722
+	 * starting from FW versions determined in i40e_init_adminq.
+	 * Stopping the FW LLDP engine is not supported on XL710
+	 * if NPAR is functioning so unset this hw flag in this case.
+	 */
+	if (pf->hw.mac.type == I40E_MAC_XL710 &&
+	    pf->hw.func_caps.npar_enable &&
+	    (pf->hw.flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+		pf->hw.flags &= ~I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+
+#ifdef CONFIG_PCI_IOV
+	if (pf->hw.func_caps.num_vfs && pf->hw.partition_id == 1) {
+		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
+		pf->flags |= I40E_FLAG_SRIOV_ENABLED;
+		pf->num_req_vfs = min_t(int,
+					pf->hw.func_caps.num_vfs,
+					I40E_MAX_VF_COUNT);
+	}
+#endif /* CONFIG_PCI_IOV */
+	pf->eeprom_version = 0xDEAD;
+	pf->lan_veb = I40E_NO_VEB;
+	pf->lan_vsi = I40E_NO_VSI;
+
+	/* By default FW has this off for performance reasons */
+	pf->flags &= ~I40E_FLAG_VEB_STATS_ENABLED;
+
+	/* set up queue assignment tracking */
+	size = sizeof(struct i40e_lump_tracking)
+		+ (sizeof(u16) * pf->hw.func_caps.num_tx_qp);
+	pf->qp_pile = kzalloc(size, GFP_KERNEL);
+	if (!pf->qp_pile) {
+		err = -ENOMEM;
+		goto sw_init_done;
+	}
+	pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp;
+
+	pf->tx_timeout_recovery_level = 1;
+
+	if (pf->hw.mac.type != I40E_MAC_X722 &&
+	    i40e_is_total_port_shutdown_enabled(pf)) {
+		/* Link down on close must be on when total port shutdown
+		 * is enabled for a given port
+		 */
+		pf->flags |= (I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENABLED |
+			      I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED);
+		dev_info(&pf->pdev->dev,
+			 "total-port-shutdown was enabled, link-down-on-close is forced on\n");
+	}
+	mutex_init(&pf->switch_mutex);
+
+sw_init_done:
+	return err;
+}
+
+/**
+ * i40e_set_ntuple - set the ntuple feature flag and take action
+ * @pf: board private structure to initialize
+ * @features: the feature set that the stack is suggesting
+ *
+ * returns a bool to indicate if reset needs to happen
+ **/
+bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
+{
+	bool need_reset = false;
+
+	/* Check if Flow Director n-tuple support was enabled or disabled.  If
+	 * the state changed, we need to reset.
+	 */
+	if (features & NETIF_F_NTUPLE) {
+		/* Enable filters and mark for reset */
+		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
+			need_reset = true;
+		/* enable FD_SB only if there is MSI-X vector and no cloud
+		 * filters exist
+		 */
+		if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
+			pf->flags |= I40E_FLAG_FD_SB_ENABLED;
+			pf->flags &= ~I40E_FLAG_FD_SB_INACTIVE;
+		}
+	} else {
+		/* turn off filters, mark for reset and clear SW filter list */
+		if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+			need_reset = true;
+			i40e_fdir_filter_exit(pf);
+		}
+		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+		clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+
+		/* reset fd counters */
+		pf->fd_add_err = 0;
+		pf->fd_atr_cnt = 0;
+		/* if ATR was auto disabled it can be re-enabled. */
+		if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
+			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+			    (I40E_DEBUG_FD & pf->hw.debug_mask))
+				dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
+	}
+	return need_reset;
+}
+
+/**
+ * i40e_clear_rss_lut - clear the rx hash lookup table
+ * @vsi: the VSI being configured
+ **/
+static void i40e_clear_rss_lut(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u16 vf_id = vsi->vf_id;
+	u8 i;
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+			wr32(hw, I40E_PFQF_HLUT(i), 0);
+	} else if (vsi->type == I40E_VSI_SRIOV) {
+		for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
+			i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0);
+	} else {
+		dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
+	}
+}
+
+/**
+ * i40e_set_loopback - turn on/off loopback mode on underlying PF
+ * @vsi: ptr to VSI
+ * @ena: flag to indicate the on/off setting
+ */
+static int i40e_set_loopback(struct i40e_vsi *vsi, bool ena)
+{
+	bool if_running = netif_running(vsi->netdev) &&
+			  !test_and_set_bit(__I40E_VSI_DOWN, vsi->state);
+	int ret;
+
+	if (if_running)
+		i40e_down(vsi);
+
+	ret = i40e_aq_set_mac_loopback(&vsi->back->hw, ena, NULL);
+	if (ret)
+		netdev_err(vsi->netdev, "Failed to toggle loopback state\n");
+	if (if_running)
+		i40e_up(vsi);
+
+	return ret;
+}
+
+/**
+ * i40e_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ * Note: expects to be called while under rtnl_lock()
+ **/
+static int i40e_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	bool need_reset;
+
+	if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
+		i40e_pf_config_rss(pf);
+	else if (!(features & NETIF_F_RXHASH) &&
+		 netdev->features & NETIF_F_RXHASH)
+		i40e_clear_rss_lut(vsi);
+
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		i40e_vlan_stripping_enable(vsi);
+	else
+		i40e_vlan_stripping_disable(vsi);
+
+	if (!(features & NETIF_F_HW_TC) &&
+	    (netdev->features & NETIF_F_HW_TC) && pf->num_cloud_filters) {
+		dev_err(&pf->pdev->dev,
+			"Offloaded tc filters active, can't turn hw_tc_offload off");
+		return -EINVAL;
+	}
+
+	if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt)
+		i40e_del_all_macvlans(vsi);
+
+	need_reset = i40e_set_ntuple(pf, features);
+
+	if (need_reset)
+		i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+
+	if ((features ^ netdev->features) & NETIF_F_LOOPBACK)
+		return i40e_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK));
+
+	return 0;
+}
+
+static int i40e_udp_tunnel_set_port(struct net_device *netdev,
+				    unsigned int table, unsigned int idx,
+				    struct udp_tunnel_info *ti)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	u8 type, filter_index;
+	int ret;
+
+	type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN :
+						   I40E_AQC_TUNNEL_TYPE_NGE;
+
+	ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index,
+				     NULL);
+	if (ret) {
+		netdev_info(netdev, "add UDP port failed, err %pe aq_err %s\n",
+			    ERR_PTR(ret),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		return -EIO;
+	}
+
+	udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index);
+	return 0;
+}
+
+static int i40e_udp_tunnel_unset_port(struct net_device *netdev,
+				      unsigned int table, unsigned int idx,
+				      struct udp_tunnel_info *ti)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_hw *hw = &np->vsi->back->hw;
+	int ret;
+
+	ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL);
+	if (ret) {
+		netdev_info(netdev, "delete UDP port failed, err %pe aq_err %s\n",
+			    ERR_PTR(ret),
+			    i40e_aq_str(hw, hw->aq.asq_last_status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int i40e_get_phys_port_id(struct net_device *netdev,
+				 struct netdev_phys_item_id *ppid)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	if (!(pf->hw_features & I40E_HW_PORT_ID_VALID))
+		return -EOPNOTSUPP;
+
+	ppid->id_len = min_t(int, sizeof(hw->mac.port_addr), sizeof(ppid->id));
+	memcpy(ppid->id, hw->mac.port_addr, ppid->id_len);
+
+	return 0;
+}
+
+/**
+ * i40e_ndo_fdb_add - add an entry to the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN ID
+ * @flags: instructions from stack about fdb operation
+ * @extack: netlink extended ack, unused currently
+ */
+static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+			    struct net_device *dev,
+			    const unsigned char *addr, u16 vid,
+			    u16 flags,
+			    struct netlink_ext_ack *extack)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_pf *pf = np->vsi->back;
+	int err = 0;
+
+	if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
+		return -EOPNOTSUPP;
+
+	if (vid) {
+		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+		return -EINVAL;
+	}
+
+	/* Hardware does not support aging addresses so if a
+	 * ndm_state is given only allow permanent addresses
+	 */
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		netdev_info(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_add_excl(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_add_excl(dev, addr);
+	else
+		err = -EINVAL;
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+
+/**
+ * i40e_ndo_bridge_setlink - Set the hardware bridge mode
+ * @dev: the netdev being configured
+ * @nlh: RTNL message
+ * @flags: bridge flags
+ * @extack: netlink extended ack
+ *
+ * Inserts a new hardware bridge if not already created and
+ * enables the bridging mode requested (VEB or VEPA). If the
+ * hardware bridge has already been inserted and the request
+ * is to change the mode then that requires a PF reset to
+ * allow rebuild of the components with required hardware
+ * bridge mode enabled.
+ *
+ * Note: expects to be called while under rtnl_lock()
+ **/
+static int i40e_ndo_bridge_setlink(struct net_device *dev,
+				   struct nlmsghdr *nlh,
+				   u16 flags,
+				   struct netlink_ext_ack *extack)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_veb *veb = NULL;
+	struct nlattr *attr, *br_spec;
+	int i, rem;
+
+	/* Only for PF VSI for now */
+	if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
+		return -EOPNOTSUPP;
+
+	/* Find the HW bridge for PF VSI */
+	for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
+		if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
+			veb = pf->veb[i];
+	}
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (!br_spec)
+		return -EINVAL;
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		__u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+
+		mode = nla_get_u16(attr);
+		if ((mode != BRIDGE_MODE_VEPA) &&
+		    (mode != BRIDGE_MODE_VEB))
+			return -EINVAL;
+
+		/* Insert a new HW bridge */
+		if (!veb) {
+			veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid,
+					     vsi->tc_config.enabled_tc);
+			if (veb) {
+				veb->bridge_mode = mode;
+				i40e_config_bridge_mode(veb);
+			} else {
+				/* No Bridge HW offload available */
+				return -ENOENT;
+			}
+			break;
+		} else if (mode != veb->bridge_mode) {
+			/* Existing HW bridge but different mode needs reset */
+			veb->bridge_mode = mode;
+			/* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
+			if (mode == BRIDGE_MODE_VEB)
+				pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+			else
+				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+			i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_ndo_bridge_getlink - Get the hardware bridge mode
+ * @skb: skb buff
+ * @pid: process id
+ * @seq: RTNL message seq #
+ * @dev: the netdev being configured
+ * @filter_mask: unused
+ * @nlflags: netlink flags passed in
+ *
+ * Return the mode in which the hardware bridge is operating in
+ * i.e VEB or VEPA.
+ **/
+static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				   struct net_device *dev,
+				   u32 __always_unused filter_mask,
+				   int nlflags)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_veb *veb = NULL;
+	int i;
+
+	/* Only for PF VSI for now */
+	if (vsi->seid != pf->vsi[pf->lan_vsi]->seid)
+		return -EOPNOTSUPP;
+
+	/* Find the HW bridge for the PF VSI */
+	for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
+		if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
+			veb = pf->veb[i];
+	}
+
+	if (!veb)
+		return 0;
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode,
+				       0, 0, nlflags, filter_mask, NULL);
+}
+
+/**
+ * i40e_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buff
+ * @dev: This physical port's netdev
+ * @features: Offload features that the stack believes apply
+ **/
+static netdev_features_t i40e_features_check(struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+		features &= ~NETIF_F_GSO_MASK;
+
+	/* MACLEN can support at most 63 words */
+	len = skb_network_header(skb) - skb->data;
+	if (len & ~(63 * 2))
+		goto out_err;
+
+	/* IPLEN and EIPLEN can support at most 127 dwords */
+	len = skb_transport_header(skb) - skb_network_header(skb);
+	if (len & ~(127 * 4))
+		goto out_err;
+
+	if (skb->encapsulation) {
+		/* L4TUNLEN can support 127 words */
+		len = skb_inner_network_header(skb) - skb_transport_header(skb);
+		if (len & ~(127 * 2))
+			goto out_err;
+
+		/* IPLEN can support at most 127 dwords */
+		len = skb_inner_transport_header(skb) -
+		      skb_inner_network_header(skb);
+		if (len & ~(127 * 4))
+			goto out_err;
+	}
+
+	/* No need to validate L4LEN as TCP is the only protocol with a
+	 * flexible value and we support all possible values supported
+	 * by TCP, which is at most 15 dwords
+	 */
+
+	return features;
+out_err:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+/**
+ * i40e_xdp_setup - add/remove an XDP program
+ * @vsi: VSI to changed
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ **/
+static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
+			  struct netlink_ext_ack *extack)
+{
+	int frame_size = i40e_max_vsi_frame_size(vsi, prog);
+	struct i40e_pf *pf = vsi->back;
+	struct bpf_prog *old_prog;
+	bool need_reset;
+	int i;
+
+	/* Don't allow frames that span over multiple buffers */
+	if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
+		NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
+		return -EINVAL;
+	}
+
+	/* When turning XDP on->off/off->on we reset and rebuild the rings. */
+	need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog);
+
+	if (need_reset)
+		i40e_prep_for_reset(pf);
+
+	/* VSI shall be deleted in a moment, just return EINVAL */
+	if (test_bit(__I40E_IN_REMOVE, pf->state))
+		return -EINVAL;
+
+	old_prog = xchg(&vsi->xdp_prog, prog);
+
+	if (need_reset) {
+		if (!prog) {
+			xdp_features_clear_redirect_target(vsi->netdev);
+			/* Wait until ndo_xsk_wakeup completes. */
+			synchronize_rcu();
+		}
+		i40e_reset_and_rebuild(pf, true, true);
+	}
+
+	if (!i40e_enabled_xdp_vsi(vsi) && prog) {
+		if (i40e_realloc_rx_bi_zc(vsi, true))
+			return -ENOMEM;
+	} else if (i40e_enabled_xdp_vsi(vsi) && !prog) {
+		if (i40e_realloc_rx_bi_zc(vsi, false))
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < vsi->num_queue_pairs; i++)
+		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	/* Kick start the NAPI context if there is an AF_XDP socket open
+	 * on that queue id. This so that receiving will start.
+	 */
+	if (need_reset && prog) {
+		for (i = 0; i < vsi->num_queue_pairs; i++)
+			if (vsi->xdp_rings[i]->xsk_pool)
+				(void)i40e_xsk_wakeup(vsi->netdev, i,
+						      XDP_WAKEUP_RX);
+		xdp_features_set_redirect_target(vsi->netdev, true);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_enter_busy_conf - Enters busy config state
+ * @vsi: vsi
+ *
+ * Returns 0 on success, <0 for failure.
+ **/
+static int i40e_enter_busy_conf(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	int timeout = 50;
+
+	while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_exit_busy_conf - Exits busy config state
+ * @vsi: vsi
+ **/
+static void i40e_exit_busy_conf(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+
+	clear_bit(__I40E_CONFIG_BUSY, pf->state);
+}
+
+/**
+ * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ **/
+static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
+{
+	memset(&vsi->rx_rings[queue_pair]->rx_stats, 0,
+	       sizeof(vsi->rx_rings[queue_pair]->rx_stats));
+	memset(&vsi->tx_rings[queue_pair]->stats, 0,
+	       sizeof(vsi->tx_rings[queue_pair]->stats));
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		memset(&vsi->xdp_rings[queue_pair]->stats, 0,
+		       sizeof(vsi->xdp_rings[queue_pair]->stats));
+	}
+}
+
+/**
+ * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ **/
+static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
+{
+	i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		/* Make sure that in-progress ndo_xdp_xmit calls are
+		 * completed.
+		 */
+		synchronize_rcu();
+		i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
+	}
+	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
+}
+
+/**
+ * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ * @enable: true for enable, false for disable
+ **/
+static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair,
+					bool enable)
+{
+	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+	struct i40e_q_vector *q_vector = rxr->q_vector;
+
+	if (!vsi->netdev)
+		return;
+
+	/* All rings in a qp belong to the same qvector. */
+	if (q_vector->rx.ring || q_vector->tx.ring) {
+		if (enable)
+			napi_enable(&q_vector->napi);
+		else
+			napi_disable(&q_vector->napi);
+	}
+}
+
+/**
+ * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ * @enable: true for enable, false for disable
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair,
+					bool enable)
+{
+	struct i40e_pf *pf = vsi->back;
+	int pf_q, ret = 0;
+
+	pf_q = vsi->base_queue + queue_pair;
+	ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q,
+				     false /*is xdp*/, enable);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VSI seid %d Tx ring %d %sable timeout\n",
+			 vsi->seid, pf_q, (enable ? "en" : "dis"));
+		return ret;
+	}
+
+	i40e_control_rx_q(pf, pf_q, enable);
+	ret = i40e_pf_rxq_wait(pf, pf_q, enable);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VSI seid %d Rx ring %d %sable timeout\n",
+			 vsi->seid, pf_q, (enable ? "en" : "dis"));
+		return ret;
+	}
+
+	/* Due to HW errata, on Rx disable only, the register can
+	 * indicate done before it really is. Needs 50ms to be sure
+	 */
+	if (!enable)
+		mdelay(50);
+
+	if (!i40e_enabled_xdp_vsi(vsi))
+		return ret;
+
+	ret = i40e_control_wait_tx_q(vsi->seid, pf,
+				     pf_q + vsi->alloc_queue_pairs,
+				     true /*is xdp*/, enable);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "VSI seid %d XDP Tx ring %d %sable timeout\n",
+			 vsi->seid, pf_q, (enable ? "en" : "dis"));
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue_pair
+ **/
+static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
+{
+	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	/* All rings in a qp belong to the same qvector. */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+		i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
+	else
+		i40e_irq_dynamic_enable_icr0(pf);
+
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue_pair
+ **/
+static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
+{
+	struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+
+	/* For simplicity, instead of removing the qp interrupt causes
+	 * from the interrupt linked list, we simply disable the interrupt, and
+	 * leave the list intact.
+	 *
+	 * All rings in a qp belong to the same qvector.
+	 */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
+
+		wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
+		i40e_flush(hw);
+		synchronize_irq(pf->msix_entries[intpf].vector);
+	} else {
+		/* Legacy and MSI mode - this stops all interrupt handling */
+		wr32(hw, I40E_PFINT_ICR0_ENA, 0);
+		wr32(hw, I40E_PFINT_DYN_CTL0, 0);
+		i40e_flush(hw);
+		synchronize_irq(pf->pdev->irq);
+	}
+}
+
+/**
+ * i40e_queue_pair_disable - Disables a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
+{
+	int err;
+
+	err = i40e_enter_busy_conf(vsi);
+	if (err)
+		return err;
+
+	i40e_queue_pair_disable_irq(vsi, queue_pair);
+	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
+	i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
+	i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
+	i40e_queue_pair_clean_rings(vsi, queue_pair);
+	i40e_queue_pair_reset_stats(vsi, queue_pair);
+
+	return err;
+}
+
+/**
+ * i40e_queue_pair_enable - Enables a queue pair
+ * @vsi: vsi
+ * @queue_pair: queue pair
+ *
+ * Returns 0 on success, <0 on failure.
+ **/
+int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)
+{
+	int err;
+
+	err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]);
+	if (err)
+		return err;
+
+	if (i40e_enabled_xdp_vsi(vsi)) {
+		err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]);
+		if (err)
+			return err;
+	}
+
+	err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]);
+	if (err)
+		return err;
+
+	err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */);
+	i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */);
+	i40e_queue_pair_enable_irq(vsi, queue_pair);
+
+	i40e_exit_busy_conf(vsi);
+
+	return err;
+}
+
+/**
+ * i40e_xdp - implements ndo_bpf for i40e
+ * @dev: netdevice
+ * @xdp: XDP command
+ **/
+static int i40e_xdp(struct net_device *dev,
+		    struct netdev_bpf *xdp)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	if (vsi->type != I40E_VSI_MAIN)
+		return -EINVAL;
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return i40e_xdp_setup(vsi, xdp->prog, xdp->extack);
+	case XDP_SETUP_XSK_POOL:
+		return i40e_xsk_pool_setup(vsi, xdp->xsk.pool,
+					   xdp->xsk.queue_id);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct net_device_ops i40e_netdev_ops = {
+	.ndo_open		= i40e_open,
+	.ndo_stop		= i40e_close,
+	.ndo_start_xmit		= i40e_lan_xmit_frame,
+	.ndo_get_stats64	= i40e_get_netdev_stats_struct,
+	.ndo_set_rx_mode	= i40e_set_rx_mode,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= i40e_set_mac,
+	.ndo_change_mtu		= i40e_change_mtu,
+	.ndo_eth_ioctl		= i40e_ioctl,
+	.ndo_tx_timeout		= i40e_tx_timeout,
+	.ndo_vlan_rx_add_vid	= i40e_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= i40e_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= i40e_netpoll,
+#endif
+	.ndo_setup_tc		= __i40e_setup_tc,
+	.ndo_select_queue	= i40e_lan_select_queue,
+	.ndo_set_features	= i40e_set_features,
+	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
+	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
+	.ndo_get_vf_stats	= i40e_get_vf_stats,
+	.ndo_set_vf_rate	= i40e_ndo_set_vf_bw,
+	.ndo_get_vf_config	= i40e_ndo_get_vf_config,
+	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
+	.ndo_set_vf_spoofchk	= i40e_ndo_set_vf_spoofchk,
+	.ndo_set_vf_trust	= i40e_ndo_set_vf_trust,
+	.ndo_get_phys_port_id	= i40e_get_phys_port_id,
+	.ndo_fdb_add		= i40e_ndo_fdb_add,
+	.ndo_features_check	= i40e_features_check,
+	.ndo_bridge_getlink	= i40e_ndo_bridge_getlink,
+	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
+	.ndo_bpf		= i40e_xdp,
+	.ndo_xdp_xmit		= i40e_xdp_xmit,
+	.ndo_xsk_wakeup	        = i40e_xsk_wakeup,
+	.ndo_dfwd_add_station	= i40e_fwd_add,
+	.ndo_dfwd_del_station	= i40e_fwd_del,
+};
+
+/**
+ * i40e_config_netdev - Setup the netdev flags
+ * @vsi: the VSI being configured
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_config_netdev(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_netdev_priv *np;
+	struct net_device *netdev;
+	u8 broadcast[ETH_ALEN];
+	u8 mac_addr[ETH_ALEN];
+	int etherdev_size;
+	netdev_features_t hw_enc_features;
+	netdev_features_t hw_features;
+
+	etherdev_size = sizeof(struct i40e_netdev_priv);
+	netdev = alloc_etherdev_mq(etherdev_size, vsi->alloc_queue_pairs);
+	if (!netdev)
+		return -ENOMEM;
+
+	vsi->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vsi = vsi;
+
+	hw_enc_features = NETIF_F_SG			|
+			  NETIF_F_HW_CSUM		|
+			  NETIF_F_HIGHDMA		|
+			  NETIF_F_SOFT_FEATURES		|
+			  NETIF_F_TSO			|
+			  NETIF_F_TSO_ECN		|
+			  NETIF_F_TSO6			|
+			  NETIF_F_GSO_GRE		|
+			  NETIF_F_GSO_GRE_CSUM		|
+			  NETIF_F_GSO_PARTIAL		|
+			  NETIF_F_GSO_IPXIP4		|
+			  NETIF_F_GSO_IPXIP6		|
+			  NETIF_F_GSO_UDP_TUNNEL	|
+			  NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+			  NETIF_F_GSO_UDP_L4		|
+			  NETIF_F_SCTP_CRC		|
+			  NETIF_F_RXHASH		|
+			  NETIF_F_RXCSUM		|
+			  0;
+
+	if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
+		netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+	netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;
+
+	netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+
+	netdev->hw_enc_features |= hw_enc_features;
+
+	/* record features VLANs can make use of */
+	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
+
+#define I40E_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE |		\
+				   NETIF_F_GSO_GRE_CSUM |	\
+				   NETIF_F_GSO_IPXIP4 |		\
+				   NETIF_F_GSO_IPXIP6 |		\
+				   NETIF_F_GSO_UDP_TUNNEL |	\
+				   NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = I40E_GSO_PARTIAL_FEATURES;
+	netdev->features |= NETIF_F_GSO_PARTIAL |
+			    I40E_GSO_PARTIAL_FEATURES;
+
+	netdev->mpls_features |= NETIF_F_SG;
+	netdev->mpls_features |= NETIF_F_HW_CSUM;
+	netdev->mpls_features |= NETIF_F_TSO;
+	netdev->mpls_features |= NETIF_F_TSO6;
+	netdev->mpls_features |= I40E_GSO_PARTIAL_FEATURES;
+
+	/* enable macvlan offloads */
+	netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD;
+
+	hw_features = hw_enc_features		|
+		      NETIF_F_HW_VLAN_CTAG_TX	|
+		      NETIF_F_HW_VLAN_CTAG_RX;
+
+	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+		hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+
+	netdev->hw_features |= hw_features | NETIF_F_LOOPBACK;
+
+	netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
+	netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+
+	netdev->features &= ~NETIF_F_HW_TC;
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		SET_NETDEV_DEV(netdev, &pf->pdev->dev);
+		ether_addr_copy(mac_addr, hw->mac.perm_addr);
+		/* The following steps are necessary for two reasons. First,
+		 * some older NVM configurations load a default MAC-VLAN
+		 * filter that will accept any tagged packet, and we want to
+		 * replace this with a normal filter. Additionally, it is
+		 * possible our MAC address was provided by the platform using
+		 * Open Firmware or similar.
+		 *
+		 * Thus, we need to remove the default filter and install one
+		 * specific to the MAC address.
+		 */
+		i40e_rm_default_mac_filter(vsi, mac_addr);
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		i40e_add_mac_filter(vsi, mac_addr);
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+		netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
+				       NETDEV_XDP_ACT_REDIRECT |
+				       NETDEV_XDP_ACT_XSK_ZEROCOPY |
+				       NETDEV_XDP_ACT_RX_SG;
+		netdev->xdp_zc_max_segs = I40E_MAX_BUFFER_TXD;
+	} else {
+		/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
+		 * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
+		 * the end, which is 4 bytes long, so force truncation of the
+		 * original name by IFNAMSIZ - 4
+		 */
+		snprintf(netdev->name, IFNAMSIZ, "%.*sv%%d",
+			 IFNAMSIZ - 4,
+			 pf->vsi[pf->lan_vsi]->netdev->name);
+		eth_random_addr(mac_addr);
+
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		i40e_add_mac_filter(vsi, mac_addr);
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+	}
+
+	/* Add the broadcast filter so that we initially will receive
+	 * broadcast packets. Note that when a new VLAN is first added the
+	 * driver will convert all filters marked I40E_VLAN_ANY into VLAN
+	 * specific filters as part of transitioning into "vlan" operation.
+	 * When more VLANs are added, the driver will copy each existing MAC
+	 * filter and add it for the new VLAN.
+	 *
+	 * Broadcast filters are handled specially by
+	 * i40e_sync_filters_subtask, as the driver must to set the broadcast
+	 * promiscuous bit instead of adding this directly as a MAC/VLAN
+	 * filter. The subtask will update the correct broadcast promiscuous
+	 * bits as VLANs become active or inactive.
+	 */
+	eth_broadcast_addr(broadcast);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	i40e_add_mac_filter(vsi, broadcast);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	eth_hw_addr_set(netdev, mac_addr);
+	ether_addr_copy(netdev->perm_addr, mac_addr);
+
+	/* i40iw_net_event() reads 16 bytes from neigh->primary_key */
+	netdev->neigh_priv_len = sizeof(u32) * 4;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+	/* Setup netdev TC information */
+	i40e_vsi_config_netdev_tc(vsi, vsi->tc_config.enabled_tc);
+
+	netdev->netdev_ops = &i40e_netdev_ops;
+	netdev->watchdog_timeo = 5 * HZ;
+	i40e_set_ethtool_ops(netdev);
+
+	/* MTU range: 68 - 9706 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = I40E_MAX_RXBUFFER - I40E_PACKET_HDR_PAD;
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_delete - Delete a VSI from the switch
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static void i40e_vsi_delete(struct i40e_vsi *vsi)
+{
+	/* remove default VSI is not allowed */
+	if (vsi == vsi->back->vsi[vsi->back->lan_vsi])
+		return;
+
+	i40e_aq_delete_element(&vsi->back->hw, vsi->seid, NULL);
+}
+
+/**
+ * i40e_is_vsi_uplink_mode_veb - Check if the VSI's uplink bridge mode is VEB
+ * @vsi: the VSI being queried
+ *
+ * Returns 1 if HW bridge mode is VEB and return 0 in case of VEPA mode
+ **/
+int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
+{
+	struct i40e_veb *veb;
+	struct i40e_pf *pf = vsi->back;
+
+	/* Uplink is not a bridge so default to VEB */
+	if (vsi->veb_idx >= I40E_MAX_VEB)
+		return 1;
+
+	veb = pf->veb[vsi->veb_idx];
+	if (!veb) {
+		dev_info(&pf->pdev->dev,
+			 "There is no veb associated with the bridge\n");
+		return -ENOENT;
+	}
+
+	/* Uplink is a bridge in VEPA mode */
+	if (veb->bridge_mode & BRIDGE_MODE_VEPA) {
+		return 0;
+	} else {
+		/* Uplink is a bridge in VEB mode */
+		return 1;
+	}
+
+	/* VEPA is now default bridge, so return 0 */
+	return 0;
+}
+
+/**
+ * i40e_add_vsi - Add a VSI to the switch
+ * @vsi: the VSI being configured
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command.
+ **/
+static int i40e_add_vsi(struct i40e_vsi *vsi)
+{
+	int ret = -ENODEV;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vsi_context ctxt;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	int bkt;
+
+	u8 enabled_tc = 0x1; /* TC0 enabled */
+	int f_count = 0;
+
+	memset(&ctxt, 0, sizeof(ctxt));
+	switch (vsi->type) {
+	case I40E_VSI_MAIN:
+		/* The PF's main VSI is already setup as part of the
+		 * device initialization, so we'll not bother with
+		 * the add_vsi call, but we will retrieve the current
+		 * VSI context.
+		 */
+		ctxt.seid = pf->main_vsi_seid;
+		ctxt.pf_num = pf->hw.pf_id;
+		ctxt.vf_num = 0;
+		ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "couldn't get PF vsi config, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			return -ENOENT;
+		}
+		vsi->info = ctxt.info;
+		vsi->info.valid_sections = 0;
+
+		vsi->seid = ctxt.seid;
+		vsi->id = ctxt.vsi_number;
+
+		enabled_tc = i40e_pf_get_tc_map(pf);
+
+		/* Source pruning is enabled by default, so the flag is
+		 * negative logic - if it's set, we need to fiddle with
+		 * the VSI to disable source pruning.
+		 */
+		if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+			memset(&ctxt, 0, sizeof(ctxt));
+			ctxt.seid = pf->main_vsi_seid;
+			ctxt.pf_num = pf->hw.pf_id;
+			ctxt.vf_num = 0;
+			ctxt.info.valid_sections |=
+				     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+			ctxt.info.switch_id =
+				   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+			ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "update vsi failed, err %d aq_err %s\n",
+					 ret,
+					 i40e_aq_str(&pf->hw,
+						     pf->hw.aq.asq_last_status));
+				ret = -ENOENT;
+				goto err;
+			}
+		}
+
+		/* MFP mode setup queue map and update VSI */
+		if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
+		    !(pf->hw.func_caps.iscsi)) { /* NIC type PF */
+			memset(&ctxt, 0, sizeof(ctxt));
+			ctxt.seid = pf->main_vsi_seid;
+			ctxt.pf_num = pf->hw.pf_id;
+			ctxt.vf_num = 0;
+			i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+			ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+			if (ret) {
+				dev_info(&pf->pdev->dev,
+					 "update vsi failed, err %pe aq_err %s\n",
+					 ERR_PTR(ret),
+					 i40e_aq_str(&pf->hw,
+						    pf->hw.aq.asq_last_status));
+				ret = -ENOENT;
+				goto err;
+			}
+			/* update the local VSI info queue map */
+			i40e_vsi_update_queue_map(vsi, &ctxt);
+			vsi->info.valid_sections = 0;
+		} else {
+			/* Default/Main VSI is only enabled for TC0
+			 * reconfigure it to enable all TCs that are
+			 * available on the port in SFP mode.
+			 * For MFP case the iSCSI PF would use this
+			 * flow to enable LAN+iSCSI TC.
+			 */
+			ret = i40e_vsi_config_tc(vsi, enabled_tc);
+			if (ret) {
+				/* Single TC condition is not fatal,
+				 * message and continue
+				 */
+				dev_info(&pf->pdev->dev,
+					 "failed to configure TCs for main VSI tc_map 0x%08x, err %pe aq_err %s\n",
+					 enabled_tc,
+					 ERR_PTR(ret),
+					 i40e_aq_str(&pf->hw,
+						    pf->hw.aq.asq_last_status));
+			}
+		}
+		break;
+
+	case I40E_VSI_FDIR:
+		ctxt.pf_num = hw->pf_id;
+		ctxt.vf_num = 0;
+		ctxt.uplink_seid = vsi->uplink_seid;
+		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+		if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
+		    (i40e_is_vsi_uplink_mode_veb(vsi))) {
+			ctxt.info.valid_sections |=
+			     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+			ctxt.info.switch_id =
+			   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+		}
+		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
+		break;
+
+	case I40E_VSI_VMDQ2:
+		ctxt.pf_num = hw->pf_id;
+		ctxt.vf_num = 0;
+		ctxt.uplink_seid = vsi->uplink_seid;
+		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+		ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+
+		/* This VSI is connected to VEB so the switch_id
+		 * should be set to zero by default.
+		 */
+		if (i40e_is_vsi_uplink_mode_veb(vsi)) {
+			ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+			ctxt.info.switch_id =
+				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+		}
+
+		/* Setup the VSI tx/rx queue map for TC0 only for now */
+		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
+		break;
+
+	case I40E_VSI_SRIOV:
+		ctxt.pf_num = hw->pf_id;
+		ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
+		ctxt.uplink_seid = vsi->uplink_seid;
+		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+		ctxt.flags = I40E_AQ_VSI_TYPE_VF;
+
+		/* This VSI is connected to VEB so the switch_id
+		 * should be set to zero by default.
+		 */
+		if (i40e_is_vsi_uplink_mode_veb(vsi)) {
+			ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+			ctxt.info.switch_id =
+				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+		}
+
+		if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
+			ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
+			ctxt.info.queueing_opt_flags |=
+				(I40E_AQ_VSI_QUE_OPT_TCP_ENA |
+				 I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI);
+		}
+
+		ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+		ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
+		if (pf->vf[vsi->vf_id].spoofchk) {
+			ctxt.info.valid_sections |=
+				cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
+			ctxt.info.sec_flags |=
+				(I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK |
+				 I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK);
+		}
+		/* Setup the VSI tx/rx queue map for TC0 only for now */
+		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
+		break;
+
+	case I40E_VSI_IWARP:
+		/* send down message to iWARP */
+		break;
+
+	default:
+		return -ENODEV;
+	}
+
+	if (vsi->type != I40E_VSI_MAIN) {
+		ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "add vsi failed, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			ret = -ENOENT;
+			goto err;
+		}
+		vsi->info = ctxt.info;
+		vsi->info.valid_sections = 0;
+		vsi->seid = ctxt.seid;
+		vsi->id = ctxt.vsi_number;
+	}
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	vsi->active_filters = 0;
+	/* If macvlan filters already exist, force them to get loaded */
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		f->state = I40E_FILTER_NEW;
+		f_count++;
+	}
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+	clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
+
+	if (f_count) {
+		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+		set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
+	}
+
+	/* Update VSI BW information */
+	ret = i40e_vsi_get_bw_info(vsi);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get vsi bw info, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		/* VSI is already added so not tearing that up */
+		ret = 0;
+	}
+
+err:
+	return ret;
+}
+
+/**
+ * i40e_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ **/
+int i40e_vsi_release(struct i40e_vsi *vsi)
+{
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	struct i40e_veb *veb = NULL;
+	struct i40e_pf *pf;
+	u16 uplink_seid;
+	int i, n, bkt;
+
+	pf = vsi->back;
+
+	/* release of a VEB-owner or last VSI is not allowed */
+	if (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) {
+		dev_info(&pf->pdev->dev, "VSI %d has existing VEB %d\n",
+			 vsi->seid, vsi->uplink_seid);
+		return -ENODEV;
+	}
+	if (vsi == pf->vsi[pf->lan_vsi] &&
+	    !test_bit(__I40E_DOWN, pf->state)) {
+		dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
+		return -ENODEV;
+	}
+	set_bit(__I40E_VSI_RELEASING, vsi->state);
+	uplink_seid = vsi->uplink_seid;
+	if (vsi->type != I40E_VSI_SRIOV) {
+		if (vsi->netdev_registered) {
+			vsi->netdev_registered = false;
+			if (vsi->netdev) {
+				/* results in a call to i40e_close() */
+				unregister_netdev(vsi->netdev);
+			}
+		} else {
+			i40e_vsi_close(vsi);
+		}
+		i40e_vsi_disable_irq(vsi);
+	}
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+	/* clear the sync flag on all filters */
+	if (vsi->netdev) {
+		__dev_uc_unsync(vsi->netdev, NULL);
+		__dev_mc_unsync(vsi->netdev, NULL);
+	}
+
+	/* make sure any remaining filters are marked for deletion */
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+		__i40e_del_filter(vsi, f);
+
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	i40e_sync_vsi_filters(vsi);
+
+	i40e_vsi_delete(vsi);
+	i40e_vsi_free_q_vectors(vsi);
+	if (vsi->netdev) {
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+	i40e_vsi_clear_rings(vsi);
+	i40e_vsi_clear(vsi);
+
+	/* If this was the last thing on the VEB, except for the
+	 * controlling VSI, remove the VEB, which puts the controlling
+	 * VSI onto the next level down in the switch.
+	 *
+	 * Well, okay, there's one more exception here: don't remove
+	 * the orphan VEBs yet.  We'll wait for an explicit remove request
+	 * from up the network stack.
+	 */
+	for (n = 0, i = 0; i < pf->num_alloc_vsi; i++) {
+		if (pf->vsi[i] &&
+		    pf->vsi[i]->uplink_seid == uplink_seid &&
+		    (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
+			n++;      /* count the VSIs */
+		}
+	}
+	for (i = 0; i < I40E_MAX_VEB; i++) {
+		if (!pf->veb[i])
+			continue;
+		if (pf->veb[i]->uplink_seid == uplink_seid)
+			n++;     /* count the VEBs */
+		if (pf->veb[i]->seid == uplink_seid)
+			veb = pf->veb[i];
+	}
+	if (n == 0 && veb && veb->uplink_seid != 0)
+		i40e_veb_release(veb);
+
+	return 0;
+}
+
+/**
+ * i40e_vsi_setup_vectors - Set up the q_vectors for the given VSI
+ * @vsi: ptr to the VSI
+ *
+ * This should only be called after i40e_vsi_mem_alloc() which allocates the
+ * corresponding SW VSI structure and initializes num_queue_pairs for the
+ * newly allocated VSI.
+ *
+ * Returns 0 on success or negative on failure
+ **/
+static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
+{
+	int ret = -ENOENT;
+	struct i40e_pf *pf = vsi->back;
+
+	if (vsi->q_vectors[0]) {
+		dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+			 vsi->seid);
+		return -EEXIST;
+	}
+
+	if (vsi->base_vector) {
+		dev_info(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+			 vsi->seid, vsi->base_vector);
+		return -EEXIST;
+	}
+
+	ret = i40e_vsi_alloc_q_vectors(vsi);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "failed to allocate %d q_vector for VSI %d, ret=%d\n",
+			 vsi->num_q_vectors, vsi->seid, ret);
+		vsi->num_q_vectors = 0;
+		goto vector_setup_out;
+	}
+
+	/* In Legacy mode, we do not have to get any other vector since we
+	 * piggyback on the misc/ICR0 for queue interrupts.
+	*/
+	if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+		return ret;
+	if (vsi->num_q_vectors)
+		vsi->base_vector = i40e_get_lump(pf, pf->irq_pile,
+						 vsi->num_q_vectors, vsi->idx);
+	if (vsi->base_vector < 0) {
+		dev_info(&pf->pdev->dev,
+			 "failed to get tracking for %d vectors for VSI %d, err=%d\n",
+			 vsi->num_q_vectors, vsi->seid, vsi->base_vector);
+		i40e_vsi_free_q_vectors(vsi);
+		ret = -ENOENT;
+		goto vector_setup_out;
+	}
+
+vector_setup_out:
+	return ret;
+}
+
+/**
+ * i40e_vsi_reinit_setup - return and reallocate resources for a VSI
+ * @vsi: pointer to the vsi.
+ *
+ * This re-allocates a vsi's queue resources.
+ *
+ * Returns pointer to the successfully allocated and configured VSI sw struct
+ * on success, otherwise returns NULL on failure.
+ **/
+static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
+{
+	u16 alloc_queue_pairs;
+	struct i40e_pf *pf;
+	u8 enabled_tc;
+	int ret;
+
+	if (!vsi)
+		return NULL;
+
+	pf = vsi->back;
+
+	i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
+	i40e_vsi_clear_rings(vsi);
+
+	i40e_vsi_free_arrays(vsi, false);
+	i40e_set_num_rings_in_vsi(vsi);
+	ret = i40e_vsi_alloc_arrays(vsi, false);
+	if (ret)
+		goto err_vsi;
+
+	alloc_queue_pairs = vsi->alloc_queue_pairs *
+			    (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
+
+	ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx);
+	if (ret < 0) {
+		dev_info(&pf->pdev->dev,
+			 "failed to get tracking for %d queues for VSI %d err %d\n",
+			 alloc_queue_pairs, vsi->seid, ret);
+		goto err_vsi;
+	}
+	vsi->base_queue = ret;
+
+	/* Update the FW view of the VSI. Force a reset of TC and queue
+	 * layout configurations.
+	 */
+	enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
+	pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
+	pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
+	i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
+	if (vsi->type == I40E_VSI_MAIN)
+		i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
+
+	/* assign it some queues */
+	ret = i40e_alloc_rings(vsi);
+	if (ret)
+		goto err_rings;
+
+	/* map all of the rings to the q_vectors */
+	i40e_vsi_map_rings_to_vectors(vsi);
+	return vsi;
+
+err_rings:
+	i40e_vsi_free_q_vectors(vsi);
+	if (vsi->netdev_registered) {
+		vsi->netdev_registered = false;
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
+err_vsi:
+	i40e_vsi_clear(vsi);
+	return NULL;
+}
+
+/**
+ * i40e_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @type: VSI type
+ * @uplink_seid: the switch element to link to
+ * @param1: usage depends upon VSI type. For VF types, indicates VF id
+ *
+ * This allocates the sw VSI structure and its queue resources, then add a VSI
+ * to the identified VEB.
+ *
+ * Returns pointer to the successfully allocated and configure VSI sw struct on
+ * success, otherwise returns NULL on failure.
+ **/
+struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
+				u16 uplink_seid, u32 param1)
+{
+	struct i40e_vsi *vsi = NULL;
+	struct i40e_veb *veb = NULL;
+	u16 alloc_queue_pairs;
+	int ret, i;
+	int v_idx;
+
+	/* The requested uplink_seid must be either
+	 *     - the PF's port seid
+	 *              no VEB is needed because this is the PF
+	 *              or this is a Flow Director special case VSI
+	 *     - seid of an existing VEB
+	 *     - seid of a VSI that owns an existing VEB
+	 *     - seid of a VSI that doesn't own a VEB
+	 *              a new VEB is created and the VSI becomes the owner
+	 *     - seid of the PF VSI, which is what creates the first VEB
+	 *              this is a special case of the previous
+	 *
+	 * Find which uplink_seid we were given and create a new VEB if needed
+	 */
+	for (i = 0; i < I40E_MAX_VEB; i++) {
+		if (pf->veb[i] && pf->veb[i]->seid == uplink_seid) {
+			veb = pf->veb[i];
+			break;
+		}
+	}
+
+	if (!veb && uplink_seid != pf->mac_seid) {
+
+		for (i = 0; i < pf->num_alloc_vsi; i++) {
+			if (pf->vsi[i] && pf->vsi[i]->seid == uplink_seid) {
+				vsi = pf->vsi[i];
+				break;
+			}
+		}
+		if (!vsi) {
+			dev_info(&pf->pdev->dev, "no such uplink_seid %d\n",
+				 uplink_seid);
+			return NULL;
+		}
+
+		if (vsi->uplink_seid == pf->mac_seid)
+			veb = i40e_veb_setup(pf, 0, pf->mac_seid, vsi->seid,
+					     vsi->tc_config.enabled_tc);
+		else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
+			veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid,
+					     vsi->tc_config.enabled_tc);
+		if (veb) {
+			if (vsi->seid != pf->vsi[pf->lan_vsi]->seid) {
+				dev_info(&vsi->back->pdev->dev,
+					 "New VSI creation error, uplink seid of LAN VSI expected.\n");
+				return NULL;
+			}
+			/* We come up by default in VEPA mode if SRIOV is not
+			 * already enabled, in which case we can't force VEPA
+			 * mode.
+			 */
+			if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+				veb->bridge_mode = BRIDGE_MODE_VEPA;
+				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+			}
+			i40e_config_bridge_mode(veb);
+		}
+		for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
+			if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
+				veb = pf->veb[i];
+		}
+		if (!veb) {
+			dev_info(&pf->pdev->dev, "couldn't add VEB\n");
+			return NULL;
+		}
+
+		vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
+		uplink_seid = veb->seid;
+	}
+
+	/* get vsi sw struct */
+	v_idx = i40e_vsi_mem_alloc(pf, type);
+	if (v_idx < 0)
+		goto err_alloc;
+	vsi = pf->vsi[v_idx];
+	if (!vsi)
+		goto err_alloc;
+	vsi->type = type;
+	vsi->veb_idx = (veb ? veb->idx : I40E_NO_VEB);
+
+	if (type == I40E_VSI_MAIN)
+		pf->lan_vsi = v_idx;
+	else if (type == I40E_VSI_SRIOV)
+		vsi->vf_id = param1;
+	/* assign it some queues */
+	alloc_queue_pairs = vsi->alloc_queue_pairs *
+			    (i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
+
+	ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx);
+	if (ret < 0) {
+		dev_info(&pf->pdev->dev,
+			 "failed to get tracking for %d queues for VSI %d err=%d\n",
+			 alloc_queue_pairs, vsi->seid, ret);
+		goto err_vsi;
+	}
+	vsi->base_queue = ret;
+
+	/* get a VSI from the hardware */
+	vsi->uplink_seid = uplink_seid;
+	ret = i40e_add_vsi(vsi);
+	if (ret)
+		goto err_vsi;
+
+	switch (vsi->type) {
+	/* setup the netdev if needed */
+	case I40E_VSI_MAIN:
+	case I40E_VSI_VMDQ2:
+		ret = i40e_config_netdev(vsi);
+		if (ret)
+			goto err_netdev;
+		ret = i40e_netif_set_realnum_tx_rx_queues(vsi);
+		if (ret)
+			goto err_netdev;
+		ret = register_netdev(vsi->netdev);
+		if (ret)
+			goto err_netdev;
+		vsi->netdev_registered = true;
+		netif_carrier_off(vsi->netdev);
+#ifdef CONFIG_I40E_DCB
+		/* Setup DCB netlink interface */
+		i40e_dcbnl_setup(vsi);
+#endif /* CONFIG_I40E_DCB */
+		fallthrough;
+	case I40E_VSI_FDIR:
+		/* set up vectors and rings if needed */
+		ret = i40e_vsi_setup_vectors(vsi);
+		if (ret)
+			goto err_msix;
+
+		ret = i40e_alloc_rings(vsi);
+		if (ret)
+			goto err_rings;
+
+		/* map all of the rings to the q_vectors */
+		i40e_vsi_map_rings_to_vectors(vsi);
+
+		i40e_vsi_reset_stats(vsi);
+		break;
+	default:
+		/* no netdev or rings for the other VSI types */
+		break;
+	}
+
+	if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
+	    (vsi->type == I40E_VSI_VMDQ2)) {
+		ret = i40e_vsi_config_rss(vsi);
+	}
+	return vsi;
+
+err_rings:
+	i40e_vsi_free_q_vectors(vsi);
+err_msix:
+	if (vsi->netdev_registered) {
+		vsi->netdev_registered = false;
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+		vsi->netdev = NULL;
+	}
+err_netdev:
+	i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
+err_vsi:
+	i40e_vsi_clear(vsi);
+err_alloc:
+	return NULL;
+}
+
+/**
+ * i40e_veb_get_bw_info - Query VEB BW information
+ * @veb: the veb to query
+ *
+ * Query the Tx scheduler BW configuration data for given VEB
+ **/
+static int i40e_veb_get_bw_info(struct i40e_veb *veb)
+{
+	struct i40e_aqc_query_switching_comp_ets_config_resp ets_data;
+	struct i40e_aqc_query_switching_comp_bw_config_resp bw_data;
+	struct i40e_pf *pf = veb->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 tc_bw_max;
+	int ret = 0;
+	int i;
+
+	ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid,
+						  &bw_data, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "query veb bw config failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
+		goto out;
+	}
+
+	ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid,
+						   &ets_data, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "query veb bw ets config failed, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, hw->aq.asq_last_status));
+		goto out;
+	}
+
+	veb->bw_limit = le16_to_cpu(ets_data.port_bw_limit);
+	veb->bw_max_quanta = ets_data.tc_bw_max;
+	veb->is_abs_credits = bw_data.absolute_credits_enable;
+	veb->enabled_tc = ets_data.tc_valid_bits;
+	tc_bw_max = le16_to_cpu(bw_data.tc_bw_max[0]) |
+		    (le16_to_cpu(bw_data.tc_bw_max[1]) << 16);
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		veb->bw_tc_share_credits[i] = bw_data.tc_bw_share_credits[i];
+		veb->bw_tc_limit_credits[i] =
+					le16_to_cpu(bw_data.tc_bw_limits[i]);
+		veb->bw_tc_max_quanta[i] = ((tc_bw_max >> (i*4)) & 0x7);
+	}
+
+out:
+	return ret;
+}
+
+/**
+ * i40e_veb_mem_alloc - Allocates the next available struct veb in the PF
+ * @pf: board private structure
+ *
+ * On error: returns error code (negative)
+ * On success: returns vsi index in PF (positive)
+ **/
+static int i40e_veb_mem_alloc(struct i40e_pf *pf)
+{
+	int ret = -ENOENT;
+	struct i40e_veb *veb;
+	int i;
+
+	/* Need to protect the allocation of switch elements at the PF level */
+	mutex_lock(&pf->switch_mutex);
+
+	/* VEB list may be fragmented if VEB creation/destruction has
+	 * been happening.  We can afford to do a quick scan to look
+	 * for any free slots in the list.
+	 *
+	 * find next empty veb slot, looping back around if necessary
+	 */
+	i = 0;
+	while ((i < I40E_MAX_VEB) && (pf->veb[i] != NULL))
+		i++;
+	if (i >= I40E_MAX_VEB) {
+		ret = -ENOMEM;
+		goto err_alloc_veb;  /* out of VEB slots! */
+	}
+
+	veb = kzalloc(sizeof(*veb), GFP_KERNEL);
+	if (!veb) {
+		ret = -ENOMEM;
+		goto err_alloc_veb;
+	}
+	veb->pf = pf;
+	veb->idx = i;
+	veb->enabled_tc = 1;
+
+	pf->veb[i] = veb;
+	ret = i;
+err_alloc_veb:
+	mutex_unlock(&pf->switch_mutex);
+	return ret;
+}
+
+/**
+ * i40e_switch_branch_release - Delete a branch of the switch tree
+ * @branch: where to start deleting
+ *
+ * This uses recursion to find the tips of the branch to be
+ * removed, deleting until we get back to and can delete this VEB.
+ **/
+static void i40e_switch_branch_release(struct i40e_veb *branch)
+{
+	struct i40e_pf *pf = branch->pf;
+	u16 branch_seid = branch->seid;
+	u16 veb_idx = branch->idx;
+	int i;
+
+	/* release any VEBs on this VEB - RECURSION */
+	for (i = 0; i < I40E_MAX_VEB; i++) {
+		if (!pf->veb[i])
+			continue;
+		if (pf->veb[i]->uplink_seid == branch->seid)
+			i40e_switch_branch_release(pf->veb[i]);
+	}
+
+	/* Release the VSIs on this VEB, but not the owner VSI.
+	 *
+	 * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing
+	 *       the VEB itself, so don't use (*branch) after this loop.
+	 */
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (!pf->vsi[i])
+			continue;
+		if (pf->vsi[i]->uplink_seid == branch_seid &&
+		   (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
+			i40e_vsi_release(pf->vsi[i]);
+		}
+	}
+
+	/* There's one corner case where the VEB might not have been
+	 * removed, so double check it here and remove it if needed.
+	 * This case happens if the veb was created from the debugfs
+	 * commands and no VSIs were added to it.
+	 */
+	if (pf->veb[veb_idx])
+		i40e_veb_release(pf->veb[veb_idx]);
+}
+
+/**
+ * i40e_veb_clear - remove veb struct
+ * @veb: the veb to remove
+ **/
+static void i40e_veb_clear(struct i40e_veb *veb)
+{
+	if (!veb)
+		return;
+
+	if (veb->pf) {
+		struct i40e_pf *pf = veb->pf;
+
+		mutex_lock(&pf->switch_mutex);
+		if (pf->veb[veb->idx] == veb)
+			pf->veb[veb->idx] = NULL;
+		mutex_unlock(&pf->switch_mutex);
+	}
+
+	kfree(veb);
+}
+
+/**
+ * i40e_veb_release - Delete a VEB and free its resources
+ * @veb: the VEB being removed
+ **/
+void i40e_veb_release(struct i40e_veb *veb)
+{
+	struct i40e_vsi *vsi = NULL;
+	struct i40e_pf *pf;
+	int i, n = 0;
+
+	pf = veb->pf;
+
+	/* find the remaining VSI and check for extras */
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (pf->vsi[i] && pf->vsi[i]->uplink_seid == veb->seid) {
+			n++;
+			vsi = pf->vsi[i];
+		}
+	}
+	if (n != 1) {
+		dev_info(&pf->pdev->dev,
+			 "can't remove VEB %d with %d VSIs left\n",
+			 veb->seid, n);
+		return;
+	}
+
+	/* move the remaining VSI to uplink veb */
+	vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER;
+	if (veb->uplink_seid) {
+		vsi->uplink_seid = veb->uplink_seid;
+		if (veb->uplink_seid == pf->mac_seid)
+			vsi->veb_idx = I40E_NO_VEB;
+		else
+			vsi->veb_idx = veb->veb_idx;
+	} else {
+		/* floating VEB */
+		vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+		vsi->veb_idx = pf->vsi[pf->lan_vsi]->veb_idx;
+	}
+
+	i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
+	i40e_veb_clear(veb);
+}
+
+/**
+ * i40e_add_veb - create the VEB in the switch
+ * @veb: the VEB to be instantiated
+ * @vsi: the controlling VSI
+ **/
+static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = veb->pf;
+	bool enable_stats = !!(pf->flags & I40E_FLAG_VEB_STATS_ENABLED);
+	int ret;
+
+	ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi->seid,
+			      veb->enabled_tc, false,
+			      &veb->seid, enable_stats, NULL);
+
+	/* get a VEB from the hardware */
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't add VEB, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EPERM;
+	}
+
+	/* get statistics counter */
+	ret = i40e_aq_get_veb_parameters(&pf->hw, veb->seid, NULL, NULL,
+					 &veb->stats_idx, NULL, NULL, NULL);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get VEB statistics idx, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return -EPERM;
+	}
+	ret = i40e_veb_get_bw_info(veb);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't get VEB bw info, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
+		return -ENOENT;
+	}
+
+	vsi->uplink_seid = veb->seid;
+	vsi->veb_idx = veb->idx;
+	vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
+
+	return 0;
+}
+
+/**
+ * i40e_veb_setup - Set up a VEB
+ * @pf: board private structure
+ * @flags: VEB setup flags
+ * @uplink_seid: the switch element to link to
+ * @vsi_seid: the initial VSI seid
+ * @enabled_tc: Enabled TC bit-map
+ *
+ * This allocates the sw VEB structure and links it into the switch
+ * It is possible and legal for this to be a duplicate of an already
+ * existing VEB.  It is also possible for both uplink and vsi seids
+ * to be zero, in order to create a floating VEB.
+ *
+ * Returns pointer to the successfully allocated VEB sw struct on
+ * success, otherwise returns NULL on failure.
+ **/
+struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
+				u16 uplink_seid, u16 vsi_seid,
+				u8 enabled_tc)
+{
+	struct i40e_veb *veb, *uplink_veb = NULL;
+	int vsi_idx, veb_idx;
+	int ret;
+
+	/* if one seid is 0, the other must be 0 to create a floating relay */
+	if ((uplink_seid == 0 || vsi_seid == 0) &&
+	    (uplink_seid + vsi_seid != 0)) {
+		dev_info(&pf->pdev->dev,
+			 "one, not both seid's are 0: uplink=%d vsi=%d\n",
+			 uplink_seid, vsi_seid);
+		return NULL;
+	}
+
+	/* make sure there is such a vsi and uplink */
+	for (vsi_idx = 0; vsi_idx < pf->num_alloc_vsi; vsi_idx++)
+		if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
+			break;
+	if (vsi_idx == pf->num_alloc_vsi && vsi_seid != 0) {
+		dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
+			 vsi_seid);
+		return NULL;
+	}
+
+	if (uplink_seid && uplink_seid != pf->mac_seid) {
+		for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
+			if (pf->veb[veb_idx] &&
+			    pf->veb[veb_idx]->seid == uplink_seid) {
+				uplink_veb = pf->veb[veb_idx];
+				break;
+			}
+		}
+		if (!uplink_veb) {
+			dev_info(&pf->pdev->dev,
+				 "uplink seid %d not found\n", uplink_seid);
+			return NULL;
+		}
+	}
+
+	/* get veb sw struct */
+	veb_idx = i40e_veb_mem_alloc(pf);
+	if (veb_idx < 0)
+		goto err_alloc;
+	veb = pf->veb[veb_idx];
+	veb->flags = flags;
+	veb->uplink_seid = uplink_seid;
+	veb->veb_idx = (uplink_veb ? uplink_veb->idx : I40E_NO_VEB);
+	veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1);
+
+	/* create the VEB in the switch */
+	ret = i40e_add_veb(veb, pf->vsi[vsi_idx]);
+	if (ret)
+		goto err_veb;
+	if (vsi_idx == pf->lan_vsi)
+		pf->lan_veb = veb->idx;
+
+	return veb;
+
+err_veb:
+	i40e_veb_clear(veb);
+err_alloc:
+	return NULL;
+}
+
+/**
+ * i40e_setup_pf_switch_element - set PF vars based on switch type
+ * @pf: board private structure
+ * @ele: element we are building info from
+ * @num_reported: total number of elements
+ * @printconfig: should we print the contents
+ *
+ * helper function to assist in extracting a few useful SEID values.
+ **/
+static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
+				struct i40e_aqc_switch_config_element_resp *ele,
+				u16 num_reported, bool printconfig)
+{
+	u16 downlink_seid = le16_to_cpu(ele->downlink_seid);
+	u16 uplink_seid = le16_to_cpu(ele->uplink_seid);
+	u8 element_type = ele->element_type;
+	u16 seid = le16_to_cpu(ele->seid);
+
+	if (printconfig)
+		dev_info(&pf->pdev->dev,
+			 "type=%d seid=%d uplink=%d downlink=%d\n",
+			 element_type, seid, uplink_seid, downlink_seid);
+
+	switch (element_type) {
+	case I40E_SWITCH_ELEMENT_TYPE_MAC:
+		pf->mac_seid = seid;
+		break;
+	case I40E_SWITCH_ELEMENT_TYPE_VEB:
+		/* Main VEB? */
+		if (uplink_seid != pf->mac_seid)
+			break;
+		if (pf->lan_veb >= I40E_MAX_VEB) {
+			int v;
+
+			/* find existing or else empty VEB */
+			for (v = 0; v < I40E_MAX_VEB; v++) {
+				if (pf->veb[v] && (pf->veb[v]->seid == seid)) {
+					pf->lan_veb = v;
+					break;
+				}
+			}
+			if (pf->lan_veb >= I40E_MAX_VEB) {
+				v = i40e_veb_mem_alloc(pf);
+				if (v < 0)
+					break;
+				pf->lan_veb = v;
+			}
+		}
+		if (pf->lan_veb >= I40E_MAX_VEB)
+			break;
+
+		pf->veb[pf->lan_veb]->seid = seid;
+		pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid;
+		pf->veb[pf->lan_veb]->pf = pf;
+		pf->veb[pf->lan_veb]->veb_idx = I40E_NO_VEB;
+		break;
+	case I40E_SWITCH_ELEMENT_TYPE_VSI:
+		if (num_reported != 1)
+			break;
+		/* This is immediately after a reset so we can assume this is
+		 * the PF's VSI
+		 */
+		pf->mac_seid = uplink_seid;
+		pf->pf_seid = downlink_seid;
+		pf->main_vsi_seid = seid;
+		if (printconfig)
+			dev_info(&pf->pdev->dev,
+				 "pf_seid=%d main_vsi_seid=%d\n",
+				 pf->pf_seid, pf->main_vsi_seid);
+		break;
+	case I40E_SWITCH_ELEMENT_TYPE_PF:
+	case I40E_SWITCH_ELEMENT_TYPE_VF:
+	case I40E_SWITCH_ELEMENT_TYPE_EMP:
+	case I40E_SWITCH_ELEMENT_TYPE_BMC:
+	case I40E_SWITCH_ELEMENT_TYPE_PE:
+	case I40E_SWITCH_ELEMENT_TYPE_PA:
+		/* ignore these for now */
+		break;
+	default:
+		dev_info(&pf->pdev->dev, "unknown element type=%d seid=%d\n",
+			 element_type, seid);
+		break;
+	}
+}
+
+/**
+ * i40e_fetch_switch_configuration - Get switch config from firmware
+ * @pf: board private structure
+ * @printconfig: should we print the contents
+ *
+ * Get the current switch configuration from the device and
+ * extract a few useful SEID values.
+ **/
+int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig)
+{
+	struct i40e_aqc_get_switch_config_resp *sw_config;
+	u16 next_seid = 0;
+	int ret = 0;
+	u8 *aq_buf;
+	int i;
+
+	aq_buf = kzalloc(I40E_AQ_LARGE_BUF, GFP_KERNEL);
+	if (!aq_buf)
+		return -ENOMEM;
+
+	sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf;
+	do {
+		u16 num_reported, num_total;
+
+		ret = i40e_aq_get_switch_config(&pf->hw, sw_config,
+						I40E_AQ_LARGE_BUF,
+						&next_seid, NULL);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "get switch config failed err %d aq_err %s\n",
+				 ret,
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			kfree(aq_buf);
+			return -ENOENT;
+		}
+
+		num_reported = le16_to_cpu(sw_config->header.num_reported);
+		num_total = le16_to_cpu(sw_config->header.num_total);
+
+		if (printconfig)
+			dev_info(&pf->pdev->dev,
+				 "header: %d reported %d total\n",
+				 num_reported, num_total);
+
+		for (i = 0; i < num_reported; i++) {
+			struct i40e_aqc_switch_config_element_resp *ele =
+				&sw_config->element[i];
+
+			i40e_setup_pf_switch_element(pf, ele, num_reported,
+						     printconfig);
+		}
+	} while (next_seid != 0);
+
+	kfree(aq_buf);
+	return ret;
+}
+
+/**
+ * i40e_setup_pf_switch - Setup the HW switch on startup or after reset
+ * @pf: board private structure
+ * @reinit: if the Main VSI needs to re-initialized.
+ * @lock_acquired: indicates whether or not the lock has been acquired
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired)
+{
+	u16 flags = 0;
+	int ret;
+
+	/* find out what's out there already */
+	ret = i40e_fetch_switch_configuration(pf, false);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "couldn't fetch switch config, err %pe aq_err %s\n",
+			 ERR_PTR(ret),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		return ret;
+	}
+	i40e_pf_reset_stats(pf);
+
+	/* set the switch config bit for the whole device to
+	 * support limited promisc or true promisc
+	 * when user requests promisc. The default is limited
+	 * promisc.
+	*/
+
+	if ((pf->hw.pf_id == 0) &&
+	    !(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) {
+		flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+		pf->last_sw_conf_flags = flags;
+	}
+
+	if (pf->hw.pf_id == 0) {
+		u16 valid_flags;
+
+		valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
+		ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
+						NULL);
+		if (ret && pf->hw.aq.asq_last_status != I40E_AQ_RC_ESRCH) {
+			dev_info(&pf->pdev->dev,
+				 "couldn't set switch config bits, err %pe aq_err %s\n",
+				 ERR_PTR(ret),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+			/* not a fatal problem, just keep going */
+		}
+		pf->last_sw_conf_valid_flags = valid_flags;
+	}
+
+	/* first time setup */
+	if (pf->lan_vsi == I40E_NO_VSI || reinit) {
+		struct i40e_vsi *vsi = NULL;
+		u16 uplink_seid;
+
+		/* Set up the PF VSI associated with the PF's main VSI
+		 * that is already in the HW switch
+		 */
+		if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb])
+			uplink_seid = pf->veb[pf->lan_veb]->seid;
+		else
+			uplink_seid = pf->mac_seid;
+		if (pf->lan_vsi == I40E_NO_VSI)
+			vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0);
+		else if (reinit)
+			vsi = i40e_vsi_reinit_setup(pf->vsi[pf->lan_vsi]);
+		if (!vsi) {
+			dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
+			i40e_cloud_filter_exit(pf);
+			i40e_fdir_teardown(pf);
+			return -EAGAIN;
+		}
+	} else {
+		/* force a reset of TC and queue layout configurations */
+		u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
+
+		pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
+		pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
+		i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
+	}
+	i40e_vlan_stripping_disable(pf->vsi[pf->lan_vsi]);
+
+	i40e_fdir_sb_setup(pf);
+
+	/* Setup static PF queue filter control settings */
+	ret = i40e_setup_pf_filter_control(pf);
+	if (ret) {
+		dev_info(&pf->pdev->dev, "setup_pf_filter_control failed: %d\n",
+			 ret);
+		/* Failure here should not stop continuing other steps */
+	}
+
+	/* enable RSS in the HW, even for only one queue, as the stack can use
+	 * the hash
+	 */
+	if ((pf->flags & I40E_FLAG_RSS_ENABLED))
+		i40e_pf_config_rss(pf);
+
+	/* fill in link information and enable LSE reporting */
+	i40e_link_event(pf);
+
+	/* Initialize user-specific link properties */
+	pf->fc_autoneg_status = ((pf->hw.phy.link_info.an_info &
+				  I40E_AQ_AN_COMPLETED) ? true : false);
+
+	i40e_ptp_init(pf);
+
+	if (!lock_acquired)
+		rtnl_lock();
+
+	/* repopulate tunnel port filters */
+	udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev);
+
+	if (!lock_acquired)
+		rtnl_unlock();
+
+	return ret;
+}
+
+/**
+ * i40e_determine_queue_usage - Work out queue distribution
+ * @pf: board private structure
+ **/
+static void i40e_determine_queue_usage(struct i40e_pf *pf)
+{
+	int queues_left;
+	int q_max;
+
+	pf->num_lan_qps = 0;
+
+	/* Find the max queues to be put into basic use.  We'll always be
+	 * using TC0, whether or not DCB is running, and TC0 will get the
+	 * big RSS set.
+	 */
+	queues_left = pf->hw.func_caps.num_tx_qp;
+
+	if ((queues_left == 1) ||
+	    !(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
+		/* one qp for PF, no queues for anything else */
+		queues_left = 0;
+		pf->alloc_rss_size = pf->num_lan_qps = 1;
+
+		/* make sure all the fancies are disabled */
+		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
+			       I40E_FLAG_IWARP_ENABLED	|
+			       I40E_FLAG_FD_SB_ENABLED	|
+			       I40E_FLAG_FD_ATR_ENABLED	|
+			       I40E_FLAG_DCB_CAPABLE	|
+			       I40E_FLAG_DCB_ENABLED	|
+			       I40E_FLAG_SRIOV_ENABLED	|
+			       I40E_FLAG_VMDQ_ENABLED);
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+	} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
+				  I40E_FLAG_FD_SB_ENABLED |
+				  I40E_FLAG_FD_ATR_ENABLED |
+				  I40E_FLAG_DCB_CAPABLE))) {
+		/* one qp for PF */
+		pf->alloc_rss_size = pf->num_lan_qps = 1;
+		queues_left -= pf->num_lan_qps;
+
+		pf->flags &= ~(I40E_FLAG_RSS_ENABLED	|
+			       I40E_FLAG_IWARP_ENABLED	|
+			       I40E_FLAG_FD_SB_ENABLED	|
+			       I40E_FLAG_FD_ATR_ENABLED	|
+			       I40E_FLAG_DCB_ENABLED	|
+			       I40E_FLAG_VMDQ_ENABLED);
+		pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+	} else {
+		/* Not enough queues for all TCs */
+		if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
+		    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
+			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
+					I40E_FLAG_DCB_ENABLED);
+			dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
+		}
+
+		/* limit lan qps to the smaller of qps, cpus or msix */
+		q_max = max_t(int, pf->rss_size_max, num_online_cpus());
+		q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
+		q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
+		pf->num_lan_qps = q_max;
+
+		queues_left -= pf->num_lan_qps;
+	}
+
+	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+		if (queues_left > 1) {
+			queues_left -= 1; /* save 1 queue for FD */
+		} else {
+			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+			pf->flags |= I40E_FLAG_FD_SB_INACTIVE;
+			dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
+		}
+	}
+
+	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+	    pf->num_vf_qps && pf->num_req_vfs && queues_left) {
+		pf->num_req_vfs = min_t(int, pf->num_req_vfs,
+					(queues_left / pf->num_vf_qps));
+		queues_left -= (pf->num_req_vfs * pf->num_vf_qps);
+	}
+
+	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
+	    pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) {
+		pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis,
+					  (queues_left / pf->num_vmdq_qps));
+		queues_left -= (pf->num_vmdq_vsis * pf->num_vmdq_qps);
+	}
+
+	pf->queues_left = queues_left;
+	dev_dbg(&pf->pdev->dev,
+		"qs_avail=%d FD SB=%d lan_qs=%d lan_tc0=%d vf=%d*%d vmdq=%d*%d, remaining=%d\n",
+		pf->hw.func_caps.num_tx_qp,
+		!!(pf->flags & I40E_FLAG_FD_SB_ENABLED),
+		pf->num_lan_qps, pf->alloc_rss_size, pf->num_req_vfs,
+		pf->num_vf_qps, pf->num_vmdq_vsis, pf->num_vmdq_qps,
+		queues_left);
+}
+
+/**
+ * i40e_setup_pf_filter_control - Setup PF static filter control
+ * @pf: PF to be setup
+ *
+ * i40e_setup_pf_filter_control sets up a PF's initial filter control
+ * settings. If PE/FCoE are enabled then it will also set the per PF
+ * based filter sizes required for them. It also enables Flow director,
+ * ethertype and macvlan type filter settings for the pf.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
+{
+	struct i40e_filter_control_settings *settings = &pf->filter_settings;
+
+	settings->hash_lut_size = I40E_HASH_LUT_SIZE_128;
+
+	/* Flow Director is enabled */
+	if (pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))
+		settings->enable_fdir = true;
+
+	/* Ethtype and MACVLAN filters enabled for PF */
+	settings->enable_ethtype = true;
+	settings->enable_macvlan = true;
+
+	if (i40e_set_filter_control(&pf->hw, settings))
+		return -ENOENT;
+
+	return 0;
+}
+
+#define INFO_STRING_LEN 255
+#define REMAIN(__x) (INFO_STRING_LEN - (__x))
+static void i40e_print_features(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	char *buf;
+	int i;
+
+	buf = kmalloc(INFO_STRING_LEN, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id);
+#ifdef CONFIG_PCI_IOV
+	i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
+#endif
+	i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
+		      pf->hw.func_caps.num_vsis,
+		      pf->vsi[pf->lan_vsi]->num_queue_pairs);
+	if (pf->flags & I40E_FLAG_RSS_ENABLED)
+		i += scnprintf(&buf[i], REMAIN(i), " RSS");
+	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
+		i += scnprintf(&buf[i], REMAIN(i), " FD_ATR");
+	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+		i += scnprintf(&buf[i], REMAIN(i), " FD_SB");
+		i += scnprintf(&buf[i], REMAIN(i), " NTUPLE");
+	}
+	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
+		i += scnprintf(&buf[i], REMAIN(i), " DCB");
+	i += scnprintf(&buf[i], REMAIN(i), " VxLAN");
+	i += scnprintf(&buf[i], REMAIN(i), " Geneve");
+	if (pf->flags & I40E_FLAG_PTP)
+		i += scnprintf(&buf[i], REMAIN(i), " PTP");
+	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+		i += scnprintf(&buf[i], REMAIN(i), " VEB");
+	else
+		i += scnprintf(&buf[i], REMAIN(i), " VEPA");
+
+	dev_info(&pf->pdev->dev, "%s\n", buf);
+	kfree(buf);
+	WARN_ON(i > INFO_STRING_LEN);
+}
+
+/**
+ * i40e_get_platform_mac_addr - get platform-specific MAC address
+ * @pdev: PCI device information struct
+ * @pf: board private structure
+ *
+ * Look up the MAC address for the device. First we'll try
+ * eth_platform_get_mac_address, which will check Open Firmware, or arch
+ * specific fallback. Otherwise, we'll default to the stored value in
+ * firmware.
+ **/
+static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
+{
+	if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr))
+		i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr);
+}
+
+/**
+ * i40e_set_fec_in_flags - helper function for setting FEC options in flags
+ * @fec_cfg: FEC option to set in flags
+ * @flags: ptr to flags in which we set FEC option
+ **/
+void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags)
+{
+	if (fec_cfg & I40E_AQ_SET_FEC_AUTO)
+		*flags |= I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC;
+	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) ||
+	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) {
+		*flags |= I40E_FLAG_RS_FEC;
+		*flags &= ~I40E_FLAG_BASE_R_FEC;
+	}
+	if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) ||
+	    (fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) {
+		*flags |= I40E_FLAG_BASE_R_FEC;
+		*flags &= ~I40E_FLAG_RS_FEC;
+	}
+	if (fec_cfg == 0)
+		*flags &= ~(I40E_FLAG_RS_FEC | I40E_FLAG_BASE_R_FEC);
+}
+
+/**
+ * i40e_check_recovery_mode - check if we are running transition firmware
+ * @pf: board private structure
+ *
+ * Check registers indicating the firmware runs in recovery mode. Sets the
+ * appropriate driver state.
+ *
+ * Returns true if the recovery mode was detected, false otherwise
+ **/
+static bool i40e_check_recovery_mode(struct i40e_pf *pf)
+{
+	u32 val = rd32(&pf->hw, I40E_GL_FWSTS);
+
+	if (val & I40E_GL_FWSTS_FWS1B_MASK) {
+		dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
+		dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
+		set_bit(__I40E_RECOVERY_MODE, pf->state);
+
+		return true;
+	}
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
+		dev_info(&pf->pdev->dev, "Please do Power-On Reset to initialize adapter in normal mode with full functionality.\n");
+
+	return false;
+}
+
+/**
+ * i40e_pf_loop_reset - perform reset in a loop.
+ * @pf: board private structure
+ *
+ * This function is useful when a NIC is about to enter recovery mode.
+ * When a NIC's internal data structures are corrupted the NIC's
+ * firmware is going to enter recovery mode.
+ * Right after a POR it takes about 7 minutes for firmware to enter
+ * recovery mode. Until that time a NIC is in some kind of intermediate
+ * state. After that time period the NIC almost surely enters
+ * recovery mode. The only way for a driver to detect intermediate
+ * state is to issue a series of pf-resets and check a return value.
+ * If a PF reset returns success then the firmware could be in recovery
+ * mode so the caller of this code needs to check for recovery mode
+ * if this function returns success. There is a little chance that
+ * firmware will hang in intermediate state forever.
+ * Since waiting 7 minutes is quite a lot of time this function waits
+ * 10 seconds and then gives up by returning an error.
+ *
+ * Return 0 on success, negative on failure.
+ **/
+static int i40e_pf_loop_reset(struct i40e_pf *pf)
+{
+	/* wait max 10 seconds for PF reset to succeed */
+	const unsigned long time_end = jiffies + 10 * HZ;
+	struct i40e_hw *hw = &pf->hw;
+	int ret;
+
+	ret = i40e_pf_reset(hw);
+	while (ret != 0 && time_before(jiffies, time_end)) {
+		usleep_range(10000, 20000);
+		ret = i40e_pf_reset(hw);
+	}
+
+	if (ret == 0)
+		pf->pfr_count++;
+	else
+		dev_info(&pf->pdev->dev, "PF reset failed: %d\n", ret);
+
+	return ret;
+}
+
+/**
+ * i40e_check_fw_empr - check if FW issued unexpected EMP Reset
+ * @pf: board private structure
+ *
+ * Check FW registers to determine if FW issued unexpected EMP Reset.
+ * Every time when unexpected EMP Reset occurs the FW increments
+ * a counter of unexpected EMP Resets. When the counter reaches 10
+ * the FW should enter the Recovery mode
+ *
+ * Returns true if FW issued unexpected EMP Reset
+ **/
+static bool i40e_check_fw_empr(struct i40e_pf *pf)
+{
+	const u32 fw_sts = rd32(&pf->hw, I40E_GL_FWSTS) &
+			   I40E_GL_FWSTS_FWS1B_MASK;
+	return (fw_sts > I40E_GL_FWSTS_FWS1B_EMPR_0) &&
+	       (fw_sts <= I40E_GL_FWSTS_FWS1B_EMPR_10);
+}
+
+/**
+ * i40e_handle_resets - handle EMP resets and PF resets
+ * @pf: board private structure
+ *
+ * Handle both EMP resets and PF resets and conclude whether there are
+ * any issues regarding these resets. If there are any issues then
+ * generate log entry.
+ *
+ * Return 0 if NIC is healthy or negative value when there are issues
+ * with resets
+ **/
+static int i40e_handle_resets(struct i40e_pf *pf)
+{
+	const int pfr = i40e_pf_loop_reset(pf);
+	const bool is_empr = i40e_check_fw_empr(pf);
+
+	if (is_empr || pfr != 0)
+		dev_crit(&pf->pdev->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n");
+
+	return is_empr ? -EIO : pfr;
+}
+
+/**
+ * i40e_init_recovery_mode - initialize subsystems needed in recovery mode
+ * @pf: board private structure
+ * @hw: ptr to the hardware info
+ *
+ * This function does a minimal setup of all subsystems needed for running
+ * recovery mode.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
+{
+	struct i40e_vsi *vsi;
+	int err;
+	int v_idx;
+
+	pci_set_drvdata(pf->pdev, pf);
+	pci_save_state(pf->pdev);
+
+	/* set up periodic task facility */
+	timer_setup(&pf->service_timer, i40e_service_timer, 0);
+	pf->service_timer_period = HZ;
+
+	INIT_WORK(&pf->service_task, i40e_service_task);
+	clear_bit(__I40E_SERVICE_SCHED, pf->state);
+
+	err = i40e_init_interrupt_scheme(pf);
+	if (err)
+		goto err_switch_setup;
+
+	/* The number of VSIs reported by the FW is the minimum guaranteed
+	 * to us; HW supports far more and we share the remaining pool with
+	 * the other PFs. We allocate space for more than the guarantee with
+	 * the understanding that we might not get them all later.
+	 */
+	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
+		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
+	else
+		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
+
+	/* Set up the vsi struct and our local tracking of the MAIN PF vsi. */
+	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
+			  GFP_KERNEL);
+	if (!pf->vsi) {
+		err = -ENOMEM;
+		goto err_switch_setup;
+	}
+
+	/* We allocate one VSI which is needed as absolute minimum
+	 * in order to register the netdev
+	 */
+	v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN);
+	if (v_idx < 0) {
+		err = v_idx;
+		goto err_switch_setup;
+	}
+	pf->lan_vsi = v_idx;
+	vsi = pf->vsi[v_idx];
+	if (!vsi) {
+		err = -EFAULT;
+		goto err_switch_setup;
+	}
+	vsi->alloc_queue_pairs = 1;
+	err = i40e_config_netdev(vsi);
+	if (err)
+		goto err_switch_setup;
+	err = register_netdev(vsi->netdev);
+	if (err)
+		goto err_switch_setup;
+	vsi->netdev_registered = true;
+	i40e_dbg_pf_init(pf);
+
+	err = i40e_setup_misc_vector_for_recovery_mode(pf);
+	if (err)
+		goto err_switch_setup;
+
+	/* tell the firmware that we're starting */
+	i40e_send_version(pf);
+
+	/* since everything's happy, start the service_task timer */
+	mod_timer(&pf->service_timer,
+		  round_jiffies(jiffies + pf->service_timer_period));
+
+	return 0;
+
+err_switch_setup:
+	i40e_reset_interrupt_capability(pf);
+	timer_shutdown_sync(&pf->service_timer);
+	i40e_shutdown_adminq(hw);
+	iounmap(hw->hw_addr);
+	pci_release_mem_regions(pf->pdev);
+	pci_disable_device(pf->pdev);
+	kfree(pf);
+
+	return err;
+}
+
+/**
+ * i40e_set_subsystem_device_id - set subsystem device id
+ * @hw: pointer to the hardware info
+ *
+ * Set PCI subsystem device id either from a pci_dev structure or
+ * a specific FW register.
+ **/
+static inline void i40e_set_subsystem_device_id(struct i40e_hw *hw)
+{
+	struct pci_dev *pdev = ((struct i40e_pf *)hw->back)->pdev;
+
+	hw->subsystem_device_id = pdev->subsystem_device ?
+		pdev->subsystem_device :
+		(ushort)(rd32(hw, I40E_PFPCI_SUBSYSID) & USHRT_MAX);
+}
+
+/**
+ * i40e_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in i40e_pci_tbl
+ *
+ * i40e_probe initializes a PF identified by a pci_dev structure.
+ * The OS initialization, configuring of the PF private structure,
+ * and a hardware reset occur.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct i40e_aq_get_phy_abilities_resp abilities;
+#ifdef CONFIG_I40E_DCB
+	enum i40e_get_fw_lldp_status_resp lldp_status;
+#endif /* CONFIG_I40E_DCB */
+	struct i40e_pf *pf;
+	struct i40e_hw *hw;
+	static u16 pfs_found;
+	u16 wol_nvm_bits;
+	u16 link_status;
+#ifdef CONFIG_I40E_DCB
+	int status;
+#endif /* CONFIG_I40E_DCB */
+	int err;
+	u32 val;
+	u32 i;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	/* set up for high or low dma */
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"DMA configuration failed: 0x%x\n", err);
+		goto err_dma;
+	}
+
+	/* set up pci connections */
+	err = pci_request_mem_regions(pdev, i40e_driver_name);
+	if (err) {
+		dev_info(&pdev->dev,
+			 "pci_request_selected_regions failed %d\n", err);
+		goto err_pci_reg;
+	}
+
+	pci_set_master(pdev);
+
+	/* Now that we have a PCI connection, we need to do the
+	 * low level device setup.  This is primarily setting up
+	 * the Admin Queue structures and then querying for the
+	 * device's current profile information.
+	 */
+	pf = kzalloc(sizeof(*pf), GFP_KERNEL);
+	if (!pf) {
+		err = -ENOMEM;
+		goto err_pf_alloc;
+	}
+	pf->next_vsi = 0;
+	pf->pdev = pdev;
+	set_bit(__I40E_DOWN, pf->state);
+
+	hw = &pf->hw;
+	hw->back = pf;
+
+	pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
+				I40E_MAX_CSR_SPACE);
+	/* We believe that the highest register to read is
+	 * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size
+	 * is not less than that before mapping to prevent a
+	 * kernel panic.
+	 */
+	if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) {
+		dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n",
+			pf->ioremap_len);
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len);
+	if (!hw->hw_addr) {
+		err = -EIO;
+		dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n",
+			 (unsigned int)pci_resource_start(pdev, 0),
+			 pf->ioremap_len, err);
+		goto err_ioremap;
+	}
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	i40e_set_subsystem_device_id(hw);
+	hw->bus.device = PCI_SLOT(pdev->devfn);
+	hw->bus.func = PCI_FUNC(pdev->devfn);
+	hw->bus.bus_id = pdev->bus->number;
+	pf->instance = pfs_found;
+
+	/* Select something other than the 802.1ad ethertype for the
+	 * switch to use internally and drop on ingress.
+	 */
+	hw->switch_tag = 0xffff;
+	hw->first_tag = ETH_P_8021AD;
+	hw->second_tag = ETH_P_8021Q;
+
+	INIT_LIST_HEAD(&pf->l3_flex_pit_list);
+	INIT_LIST_HEAD(&pf->l4_flex_pit_list);
+	INIT_LIST_HEAD(&pf->ddp_old_prof);
+
+	/* set up the locks for the AQ, do this only once in probe
+	 * and destroy them only once in remove
+	 */
+	mutex_init(&hw->aq.asq_mutex);
+	mutex_init(&hw->aq.arq_mutex);
+
+	pf->msg_enable = netif_msg_init(debug,
+					NETIF_MSG_DRV |
+					NETIF_MSG_PROBE |
+					NETIF_MSG_LINK);
+	if (debug < -1)
+		pf->hw.debug_mask = debug;
+
+	/* do a special CORER for clearing PXE mode once at init */
+	if (hw->revision_id == 0 &&
+	    (rd32(hw, I40E_GLLAN_RCTL_0) & I40E_GLLAN_RCTL_0_PXE_MODE_MASK)) {
+		wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_CORER_MASK);
+		i40e_flush(hw);
+		msleep(200);
+		pf->corer_count++;
+
+		i40e_clear_pxe_mode(hw);
+	}
+
+	/* Reset here to make sure all is clean and to define PF 'n' */
+	i40e_clear_hw(hw);
+
+	err = i40e_set_mac_type(hw);
+	if (err) {
+		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
+			 err);
+		goto err_pf_reset;
+	}
+
+	err = i40e_handle_resets(pf);
+	if (err)
+		goto err_pf_reset;
+
+	i40e_check_recovery_mode(pf);
+
+	if (is_kdump_kernel()) {
+		hw->aq.num_arq_entries = I40E_MIN_ARQ_LEN;
+		hw->aq.num_asq_entries = I40E_MIN_ASQ_LEN;
+	} else {
+		hw->aq.num_arq_entries = I40E_AQ_LEN;
+		hw->aq.num_asq_entries = I40E_AQ_LEN;
+	}
+	hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE;
+	hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE;
+	pf->adminq_work_limit = I40E_AQ_WORK_LIMIT;
+
+	snprintf(pf->int_name, sizeof(pf->int_name) - 1,
+		 "%s-%s:misc",
+		 dev_driver_string(&pf->pdev->dev), dev_name(&pdev->dev));
+
+	err = i40e_init_shared_code(hw);
+	if (err) {
+		dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
+			 err);
+		goto err_pf_reset;
+	}
+
+	/* set up a default setting for link flow control */
+	pf->hw.fc.requested_mode = I40E_FC_NONE;
+
+	err = i40e_init_adminq(hw);
+	if (err) {
+		if (err == -EIO)
+			dev_info(&pdev->dev,
+				 "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n",
+				 hw->aq.api_maj_ver,
+				 hw->aq.api_min_ver,
+				 I40E_FW_API_VERSION_MAJOR,
+				 I40E_FW_MINOR_VERSION(hw));
+		else
+			dev_info(&pdev->dev,
+				 "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
+
+		goto err_pf_reset;
+	}
+	i40e_get_oem_version(hw);
+
+	/* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */
+	dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n",
+		 hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build,
+		 hw->aq.api_maj_ver, hw->aq.api_min_ver,
+		 i40e_nvm_version_str(hw), hw->vendor_id, hw->device_id,
+		 hw->subsystem_vendor_id, hw->subsystem_device_id);
+
+	if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+	    hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
+		dev_dbg(&pdev->dev,
+			"The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n",
+			 hw->aq.api_maj_ver,
+			 hw->aq.api_min_ver,
+			 I40E_FW_API_VERSION_MAJOR,
+			 I40E_FW_MINOR_VERSION(hw));
+	else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
+		dev_info(&pdev->dev,
+			 "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
+			 hw->aq.api_maj_ver,
+			 hw->aq.api_min_ver,
+			 I40E_FW_API_VERSION_MAJOR,
+			 I40E_FW_MINOR_VERSION(hw));
+
+	i40e_verify_eeprom(pf);
+
+	/* Rev 0 hardware was never productized */
+	if (hw->revision_id < 1)
+		dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
+
+	i40e_clear_pxe_mode(hw);
+
+	err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities);
+	if (err)
+		goto err_adminq_setup;
+
+	err = i40e_sw_init(pf);
+	if (err) {
+		dev_info(&pdev->dev, "sw_init failed: %d\n", err);
+		goto err_sw_init;
+	}
+
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
+		return i40e_init_recovery_mode(pf, hw);
+
+	err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
+				hw->func_caps.num_rx_qp, 0, 0);
+	if (err) {
+		dev_info(&pdev->dev, "init_lan_hmc failed: %d\n", err);
+		goto err_init_lan_hmc;
+	}
+
+	err = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
+	if (err) {
+		dev_info(&pdev->dev, "configure_lan_hmc failed: %d\n", err);
+		err = -ENOENT;
+		goto err_configure_lan_hmc;
+	}
+
+	/* Disable LLDP for NICs that have firmware versions lower than v4.3.
+	 * Ignore error return codes because if it was already disabled via
+	 * hardware settings this will fail
+	 */
+	if (pf->hw_features & I40E_HW_STOP_FW_LLDP) {
+		dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
+		i40e_aq_stop_lldp(hw, true, false, NULL);
+	}
+
+	/* allow a platform config to override the HW addr */
+	i40e_get_platform_mac_addr(pdev, pf);
+
+	if (!is_valid_ether_addr(hw->mac.addr)) {
+		dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr);
+		err = -EIO;
+		goto err_mac_addr;
+	}
+	dev_info(&pdev->dev, "MAC address: %pM\n", hw->mac.addr);
+	ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
+	i40e_get_port_mac_addr(hw, hw->mac.port_addr);
+	if (is_valid_ether_addr(hw->mac.port_addr))
+		pf->hw_features |= I40E_HW_PORT_ID_VALID;
+
+	i40e_ptp_alloc_pins(pf);
+	pci_set_drvdata(pdev, pf);
+	pci_save_state(pdev);
+
+#ifdef CONFIG_I40E_DCB
+	status = i40e_get_fw_lldp_status(&pf->hw, &lldp_status);
+	(!status &&
+	 lldp_status == I40E_GET_FW_LLDP_STATUS_ENABLED) ?
+		(pf->flags &= ~I40E_FLAG_DISABLE_FW_LLDP) :
+		(pf->flags |= I40E_FLAG_DISABLE_FW_LLDP);
+	dev_info(&pdev->dev,
+		 (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ?
+			"FW LLDP is disabled\n" :
+			"FW LLDP is enabled\n");
+
+	/* Enable FW to write default DCB config on link-up */
+	i40e_aq_set_dcb_parameters(hw, true, NULL);
+
+	err = i40e_init_pf_dcb(pf);
+	if (err) {
+		dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
+		pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED);
+		/* Continue without DCB enabled */
+	}
+#endif /* CONFIG_I40E_DCB */
+
+	/* set up periodic task facility */
+	timer_setup(&pf->service_timer, i40e_service_timer, 0);
+	pf->service_timer_period = HZ;
+
+	INIT_WORK(&pf->service_task, i40e_service_task);
+	clear_bit(__I40E_SERVICE_SCHED, pf->state);
+
+	/* NVM bit on means WoL disabled for the port */
+	i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits);
+	if (BIT (hw->port) & wol_nvm_bits || hw->partition_id != 1)
+		pf->wol_en = false;
+	else
+		pf->wol_en = true;
+	device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en);
+
+	/* set up the main switch operations */
+	i40e_determine_queue_usage(pf);
+	err = i40e_init_interrupt_scheme(pf);
+	if (err)
+		goto err_switch_setup;
+
+	/* Reduce Tx and Rx pairs for kdump
+	 * When MSI-X is enabled, it's not allowed to use more TC queue
+	 * pairs than MSI-X vectors (pf->num_lan_msix) exist. Thus
+	 * vsi->num_queue_pairs will be equal to pf->num_lan_msix, i.e., 1.
+	 */
+	if (is_kdump_kernel())
+		pf->num_lan_msix = 1;
+
+	pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
+	pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
+	pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
+	pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
+	pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
+	pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
+						    UDP_TUNNEL_TYPE_GENEVE;
+
+	/* The number of VSIs reported by the FW is the minimum guaranteed
+	 * to us; HW supports far more and we share the remaining pool with
+	 * the other PFs. We allocate space for more than the guarantee with
+	 * the understanding that we might not get them all later.
+	 */
+	if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
+		pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
+	else
+		pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
+	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
+		dev_warn(&pf->pdev->dev,
+			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
+			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
+		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
+	}
+
+	/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
+	pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
+			  GFP_KERNEL);
+	if (!pf->vsi) {
+		err = -ENOMEM;
+		goto err_switch_setup;
+	}
+
+#ifdef CONFIG_PCI_IOV
+	/* prep for VF support */
+	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
+		if (pci_num_vf(pdev))
+			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+	}
+#endif
+	err = i40e_setup_pf_switch(pf, false, false);
+	if (err) {
+		dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
+		goto err_vsis;
+	}
+	INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
+
+	/* if FDIR VSI was set up, start it now */
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
+			i40e_vsi_open(pf->vsi[i]);
+			break;
+		}
+	}
+
+	/* The driver only wants link up/down and module qualification
+	 * reports from firmware.  Note the negative logic.
+	 */
+	err = i40e_aq_set_phy_int_mask(&pf->hw,
+				       ~(I40E_AQ_EVENT_LINK_UPDOWN |
+					 I40E_AQ_EVENT_MEDIA_NA |
+					 I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
+	if (err)
+		dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n",
+			 ERR_PTR(err),
+			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+	/* Reconfigure hardware for allowing smaller MSS in the case
+	 * of TSO, so that we avoid the MDD being fired and causing
+	 * a reset in the case of small MSS+TSO.
+	 */
+	val = rd32(hw, I40E_REG_MSS);
+	if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
+		val &= ~I40E_REG_MSS_MIN_MASK;
+		val |= I40E_64BYTE_MSS;
+		wr32(hw, I40E_REG_MSS, val);
+	}
+
+	if (pf->hw_features & I40E_HW_RESTART_AUTONEG) {
+		msleep(75);
+		err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL);
+		if (err)
+			dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n",
+				 ERR_PTR(err),
+				 i40e_aq_str(&pf->hw,
+					     pf->hw.aq.asq_last_status));
+	}
+	/* The main driver is (mostly) up and happy. We need to set this state
+	 * before setting up the misc vector or we get a race and the vector
+	 * ends up disabled forever.
+	 */
+	clear_bit(__I40E_DOWN, pf->state);
+
+	/* In case of MSIX we are going to setup the misc vector right here
+	 * to handle admin queue events etc. In case of legacy and MSI
+	 * the misc functionality and queue processing is combined in
+	 * the same vector and that gets setup at open.
+	 */
+	if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+		err = i40e_setup_misc_vector(pf);
+		if (err) {
+			dev_info(&pdev->dev,
+				 "setup of misc vector failed: %d\n", err);
+			i40e_cloud_filter_exit(pf);
+			i40e_fdir_teardown(pf);
+			goto err_vsis;
+		}
+	}
+
+#ifdef CONFIG_PCI_IOV
+	/* prep for VF support */
+	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+	    !test_bit(__I40E_BAD_EEPROM, pf->state)) {
+		/* disable link interrupts for VFs */
+		val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
+		val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK;
+		wr32(hw, I40E_PFGEN_PORTMDIO_NUM, val);
+		i40e_flush(hw);
+
+		if (pci_num_vf(pdev)) {
+			dev_info(&pdev->dev,
+				 "Active VFs found, allocating resources.\n");
+			err = i40e_alloc_vfs(pf, pci_num_vf(pdev));
+			if (err)
+				dev_info(&pdev->dev,
+					 "Error %d allocating resources for existing VFs\n",
+					 err);
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
+
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
+						      pf->num_iwarp_msix,
+						      I40E_IWARP_IRQ_PILE_ID);
+		if (pf->iwarp_base_vector < 0) {
+			dev_info(&pdev->dev,
+				 "failed to get tracking for %d vectors for IWARP err=%d\n",
+				 pf->num_iwarp_msix, pf->iwarp_base_vector);
+			pf->flags &= ~I40E_FLAG_IWARP_ENABLED;
+		}
+	}
+
+	i40e_dbg_pf_init(pf);
+
+	/* tell the firmware that we're starting */
+	i40e_send_version(pf);
+
+	/* since everything's happy, start the service_task timer */
+	mod_timer(&pf->service_timer,
+		  round_jiffies(jiffies + pf->service_timer_period));
+
+	/* add this PF to client device list and launch a client service task */
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		err = i40e_lan_add_device(pf);
+		if (err)
+			dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
+				 err);
+	}
+
+#define PCI_SPEED_SIZE 8
+#define PCI_WIDTH_SIZE 8
+	/* Devices on the IOSF bus do not have this information
+	 * and will report PCI Gen 1 x 1 by default so don't bother
+	 * checking them.
+	 */
+	if (!(pf->hw_features & I40E_HW_NO_PCI_LINK_CHECK)) {
+		char speed[PCI_SPEED_SIZE] = "Unknown";
+		char width[PCI_WIDTH_SIZE] = "Unknown";
+
+		/* Get the negotiated link width and speed from PCI config
+		 * space
+		 */
+		pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA,
+					  &link_status);
+
+		i40e_set_pci_config_data(hw, link_status);
+
+		switch (hw->bus.speed) {
+		case i40e_bus_speed_8000:
+			strscpy(speed, "8.0", PCI_SPEED_SIZE); break;
+		case i40e_bus_speed_5000:
+			strscpy(speed, "5.0", PCI_SPEED_SIZE); break;
+		case i40e_bus_speed_2500:
+			strscpy(speed, "2.5", PCI_SPEED_SIZE); break;
+		default:
+			break;
+		}
+		switch (hw->bus.width) {
+		case i40e_bus_width_pcie_x8:
+			strscpy(width, "8", PCI_WIDTH_SIZE); break;
+		case i40e_bus_width_pcie_x4:
+			strscpy(width, "4", PCI_WIDTH_SIZE); break;
+		case i40e_bus_width_pcie_x2:
+			strscpy(width, "2", PCI_WIDTH_SIZE); break;
+		case i40e_bus_width_pcie_x1:
+			strscpy(width, "1", PCI_WIDTH_SIZE); break;
+		default:
+			break;
+		}
+
+		dev_info(&pdev->dev, "PCI-Express: Speed %sGT/s Width x%s\n",
+			 speed, width);
+
+		if (hw->bus.width < i40e_bus_width_pcie_x8 ||
+		    hw->bus.speed < i40e_bus_speed_8000) {
+			dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
+			dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
+		}
+	}
+
+	/* get the requested speeds from the fw */
+	err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL);
+	if (err)
+		dev_dbg(&pf->pdev->dev, "get requested speeds ret =  %pe last_status =  %s\n",
+			ERR_PTR(err),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+	pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
+
+	/* set the FEC config due to the board capabilities */
+	i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, &pf->flags);
+
+	/* get the supported phy types from the fw */
+	err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL);
+	if (err)
+		dev_dbg(&pf->pdev->dev, "get supported phy types ret =  %pe last_status =  %s\n",
+			ERR_PTR(err),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+
+	/* make sure the MFS hasn't been set lower than the default */
+#define MAX_FRAME_SIZE_DEFAULT 0x2600
+	val = (rd32(&pf->hw, I40E_PRTGL_SAH) &
+	       I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT;
+	if (val < MAX_FRAME_SIZE_DEFAULT)
+		dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n",
+			 pf->hw.port, val);
+
+	/* Add a filter to drop all Flow control frames from any VSI from being
+	 * transmitted. By doing so we stop a malicious VF from sending out
+	 * PAUSE or PFC frames and potentially controlling traffic for other
+	 * PF/VF VSIs.
+	 * The FW can still send Flow control frames if enabled.
+	 */
+	i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
+						       pf->main_vsi_seid);
+
+	if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
+		(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
+		pf->hw_features |= I40E_HW_PHY_CONTROLS_LEDS;
+	if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
+		pf->hw_features |= I40E_HW_HAVE_CRT_RETIMER;
+	/* print a string summarizing features */
+	i40e_print_features(pf);
+
+	return 0;
+
+	/* Unwind what we've done if something failed in the setup */
+err_vsis:
+	set_bit(__I40E_DOWN, pf->state);
+	i40e_clear_interrupt_scheme(pf);
+	kfree(pf->vsi);
+err_switch_setup:
+	i40e_reset_interrupt_capability(pf);
+	timer_shutdown_sync(&pf->service_timer);
+err_mac_addr:
+err_configure_lan_hmc:
+	(void)i40e_shutdown_lan_hmc(hw);
+err_init_lan_hmc:
+	kfree(pf->qp_pile);
+err_sw_init:
+err_adminq_setup:
+err_pf_reset:
+	iounmap(hw->hw_addr);
+err_ioremap:
+	kfree(pf);
+err_pf_alloc:
+	pci_release_mem_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * i40e_remove - Device removal routine
+ * @pdev: PCI device information struct
+ *
+ * i40e_remove is called by the PCI subsystem to alert the driver
+ * that is should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void i40e_remove(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	struct i40e_hw *hw = &pf->hw;
+	int ret_code;
+	int i;
+
+	i40e_dbg_pf_exit(pf);
+
+	i40e_ptp_stop(pf);
+
+	/* Disable RSS in hw */
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0);
+	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0);
+
+	/* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE
+	 * flags, once they are set, i40e_rebuild should not be called as
+	 * i40e_prep_for_reset always returns early.
+	 */
+	while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
+		usleep_range(1000, 2000);
+	set_bit(__I40E_IN_REMOVE, pf->state);
+
+	if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+		set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
+		i40e_free_vfs(pf);
+		pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
+	}
+	/* no more scheduling of any task */
+	set_bit(__I40E_SUSPENDED, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
+	if (pf->service_timer.function)
+		timer_shutdown_sync(&pf->service_timer);
+	if (pf->service_task.func)
+		cancel_work_sync(&pf->service_task);
+
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+		struct i40e_vsi *vsi = pf->vsi[0];
+
+		/* We know that we have allocated only one vsi for this PF,
+		 * it was just for registering netdevice, so the interface
+		 * could be visible in the 'ifconfig' output
+		 */
+		unregister_netdev(vsi->netdev);
+		free_netdev(vsi->netdev);
+
+		goto unmap;
+	}
+
+	/* Client close must be called explicitly here because the timer
+	 * has been stopped.
+	 */
+	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
+
+	i40e_fdir_teardown(pf);
+
+	/* If there is a switch structure or any orphans, remove them.
+	 * This will leave only the PF's VSI remaining.
+	 */
+	for (i = 0; i < I40E_MAX_VEB; i++) {
+		if (!pf->veb[i])
+			continue;
+
+		if (pf->veb[i]->uplink_seid == pf->mac_seid ||
+		    pf->veb[i]->uplink_seid == 0)
+			i40e_switch_branch_release(pf->veb[i]);
+	}
+
+	/* Now we can shutdown the PF's VSIs, just before we kill
+	 * adminq and hmc.
+	 */
+	for (i = pf->num_alloc_vsi; i--;)
+		if (pf->vsi[i]) {
+			i40e_vsi_close(pf->vsi[i]);
+			i40e_vsi_release(pf->vsi[i]);
+			pf->vsi[i] = NULL;
+		}
+
+	i40e_cloud_filter_exit(pf);
+
+	/* remove attached clients */
+	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		ret_code = i40e_lan_del_device(pf);
+		if (ret_code)
+			dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+				 ret_code);
+	}
+
+	/* shutdown and destroy the HMC */
+	if (hw->hmc.hmc_obj) {
+		ret_code = i40e_shutdown_lan_hmc(hw);
+		if (ret_code)
+			dev_warn(&pdev->dev,
+				 "Failed to destroy the HMC resources: %d\n",
+				 ret_code);
+	}
+
+unmap:
+	/* Free MSI/legacy interrupt 0 when in recovery mode. */
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
+	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+		free_irq(pf->pdev->irq, pf);
+
+	/* shutdown the adminq */
+	i40e_shutdown_adminq(hw);
+
+	/* destroy the locks only once, here */
+	mutex_destroy(&hw->aq.arq_mutex);
+	mutex_destroy(&hw->aq.asq_mutex);
+
+	/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
+	rtnl_lock();
+	i40e_clear_interrupt_scheme(pf);
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		if (pf->vsi[i]) {
+			if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
+				i40e_vsi_clear_rings(pf->vsi[i]);
+			i40e_vsi_clear(pf->vsi[i]);
+			pf->vsi[i] = NULL;
+		}
+	}
+	rtnl_unlock();
+
+	for (i = 0; i < I40E_MAX_VEB; i++) {
+		kfree(pf->veb[i]);
+		pf->veb[i] = NULL;
+	}
+
+	kfree(pf->qp_pile);
+	kfree(pf->vsi);
+
+	iounmap(hw->hw_addr);
+	kfree(pf);
+	pci_release_mem_regions(pdev);
+
+	pci_disable_device(pdev);
+}
+
+/**
+ * i40e_pci_error_detected - warning that something funky happened in PCI land
+ * @pdev: PCI device information struct
+ * @error: the type of PCI error
+ *
+ * Called to warn that something happened and the error handling steps
+ * are in progress.  Allows the driver to quiesce things, be ready for
+ * remediation.
+ **/
+static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t error)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+	dev_info(&pdev->dev, "%s: error %d\n", __func__, error);
+
+	if (!pf) {
+		dev_info(&pdev->dev,
+			 "Cannot recover - error happened during device probe\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* shutdown all operations */
+	if (!test_bit(__I40E_SUSPENDED, pf->state))
+		i40e_prep_for_reset(pf);
+
+	/* Request a slot reset */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * i40e_pci_error_slot_reset - a PCI slot reset just happened
+ * @pdev: PCI device information struct
+ *
+ * Called to find if the driver can work with the device now that
+ * the pci slot has been reset.  If a basic connection seems good
+ * (registers are readable and have sane content) then return a
+ * happy little PCI_ERS_RESULT_xxx.
+ **/
+static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	pci_ers_result_t result;
+	u32 reg;
+
+	dev_dbg(&pdev->dev, "%s\n", __func__);
+	if (pci_enable_device_mem(pdev)) {
+		dev_info(&pdev->dev,
+			 "Cannot re-enable PCI device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+		pci_wake_from_d3(pdev, false);
+
+		reg = rd32(&pf->hw, I40E_GLGEN_RTRIG);
+		if (reg == 0)
+			result = PCI_ERS_RESULT_RECOVERED;
+		else
+			result = PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return result;
+}
+
+/**
+ * i40e_pci_error_reset_prepare - prepare device driver for pci reset
+ * @pdev: PCI device information struct
+ */
+static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+	i40e_prep_for_reset(pf);
+}
+
+/**
+ * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+	if (test_bit(__I40E_IN_REMOVE, pf->state))
+		return;
+
+	i40e_reset_and_rebuild(pf, false, false);
+#ifdef CONFIG_PCI_IOV
+	i40e_restore_all_vfs_msi_state(pdev);
+#endif /* CONFIG_PCI_IOV */
+}
+
+/**
+ * i40e_pci_error_resume - restart operations after PCI error recovery
+ * @pdev: PCI device information struct
+ *
+ * Called to allow the driver to bring things back up after PCI error
+ * and/or reset recovery has finished.
+ **/
+static void i40e_pci_error_resume(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+	dev_dbg(&pdev->dev, "%s\n", __func__);
+	if (test_bit(__I40E_SUSPENDED, pf->state))
+		return;
+
+	i40e_handle_reset_warning(pf, false);
+}
+
+/**
+ * i40e_enable_mc_magic_wake - enable multicast magic packet wake up
+ * using the mac_address_write admin q function
+ * @pf: pointer to i40e_pf struct
+ **/
+static void i40e_enable_mc_magic_wake(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u8 mac_addr[6];
+	u16 flags = 0;
+	int ret;
+
+	/* Get current MAC address in case it's an LAA */
+	if (pf->vsi[pf->lan_vsi] && pf->vsi[pf->lan_vsi]->netdev) {
+		ether_addr_copy(mac_addr,
+				pf->vsi[pf->lan_vsi]->netdev->dev_addr);
+	} else {
+		dev_err(&pf->pdev->dev,
+			"Failed to retrieve MAC address; using default\n");
+		ether_addr_copy(mac_addr, hw->mac.addr);
+	}
+
+	/* The FW expects the mac address write cmd to first be called with
+	 * one of these flags before calling it again with the multicast
+	 * enable flags.
+	 */
+	flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
+
+	if (hw->func_caps.flex10_enable && hw->partition_id != 1)
+		flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
+
+	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Failed to update MAC address registers; cannot enable Multicast Magic packet wake up");
+		return;
+	}
+
+	flags = I40E_AQC_MC_MAG_EN
+			| I40E_AQC_WOL_PRESERVE_ON_PFR
+			| I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
+	ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL);
+	if (ret)
+		dev_err(&pf->pdev->dev,
+			"Failed to enable Multicast Magic Packet wake up\n");
+}
+
+/**
+ * i40e_shutdown - PCI callback for shutting down
+ * @pdev: PCI device information struct
+ **/
+static void i40e_shutdown(struct pci_dev *pdev)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	struct i40e_hw *hw = &pf->hw;
+
+	set_bit(__I40E_SUSPENDED, pf->state);
+	set_bit(__I40E_DOWN, pf->state);
+
+	del_timer_sync(&pf->service_timer);
+	cancel_work_sync(&pf->service_task);
+	i40e_cloud_filter_exit(pf);
+	i40e_fdir_teardown(pf);
+
+	/* Client close must be called explicitly here because the timer
+	 * has been stopped.
+	 */
+	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
+
+	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
+		i40e_enable_mc_magic_wake(pf);
+
+	i40e_prep_for_reset(pf);
+
+	wr32(hw, I40E_PFPM_APM,
+	     (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
+	wr32(hw, I40E_PFPM_WUFC,
+	     (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
+
+	/* Free MSI/legacy interrupt 0 when in recovery mode. */
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
+	    !(pf->flags & I40E_FLAG_MSIX_ENABLED))
+		free_irq(pf->pdev->irq, pf);
+
+	/* Since we're going to destroy queues during the
+	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+	 * whole section
+	 */
+	rtnl_lock();
+	i40e_clear_interrupt_scheme(pf);
+	rtnl_unlock();
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, pf->wol_en);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+/**
+ * i40e_suspend - PM callback for moving to D3
+ * @dev: generic device information structure
+ **/
+static int __maybe_unused i40e_suspend(struct device *dev)
+{
+	struct i40e_pf *pf = dev_get_drvdata(dev);
+	struct i40e_hw *hw = &pf->hw;
+
+	/* If we're already suspended, then there is nothing to do */
+	if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
+		return 0;
+
+	set_bit(__I40E_DOWN, pf->state);
+
+	/* Ensure service task will not be running */
+	del_timer_sync(&pf->service_timer);
+	cancel_work_sync(&pf->service_task);
+
+	/* Client close must be called explicitly here because the timer
+	 * has been stopped.
+	 */
+	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
+
+	if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
+		i40e_enable_mc_magic_wake(pf);
+
+	/* Since we're going to destroy queues during the
+	 * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this
+	 * whole section
+	 */
+	rtnl_lock();
+
+	i40e_prep_for_reset(pf);
+
+	wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
+	wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
+
+	/* Clear the interrupt scheme and release our IRQs so that the system
+	 * can safely hibernate even when there are a large number of CPUs.
+	 * Otherwise hibernation might fail when mapping all the vectors back
+	 * to CPU0.
+	 */
+	i40e_clear_interrupt_scheme(pf);
+
+	rtnl_unlock();
+
+	return 0;
+}
+
+/**
+ * i40e_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
+ **/
+static int __maybe_unused i40e_resume(struct device *dev)
+{
+	struct i40e_pf *pf = dev_get_drvdata(dev);
+	int err;
+
+	/* If we're not suspended, then there is nothing to do */
+	if (!test_bit(__I40E_SUSPENDED, pf->state))
+		return 0;
+
+	/* We need to hold the RTNL lock prior to restoring interrupt schemes,
+	 * since we're going to be restoring queues
+	 */
+	rtnl_lock();
+
+	/* We cleared the interrupt scheme when we suspended, so we need to
+	 * restore it now to resume device functionality.
+	 */
+	err = i40e_restore_interrupt_scheme(pf);
+	if (err) {
+		dev_err(dev, "Cannot restore interrupt scheme: %d\n",
+			err);
+	}
+
+	clear_bit(__I40E_DOWN, pf->state);
+	i40e_reset_and_rebuild(pf, false, true);
+
+	rtnl_unlock();
+
+	/* Clear suspended state last after everything is recovered */
+	clear_bit(__I40E_SUSPENDED, pf->state);
+
+	/* Restart the service task */
+	mod_timer(&pf->service_timer,
+		  round_jiffies(jiffies + pf->service_timer_period));
+
+	return 0;
+}
+
+static const struct pci_error_handlers i40e_err_handler = {
+	.error_detected = i40e_pci_error_detected,
+	.slot_reset = i40e_pci_error_slot_reset,
+	.reset_prepare = i40e_pci_error_reset_prepare,
+	.reset_done = i40e_pci_error_reset_done,
+	.resume = i40e_pci_error_resume,
+};
+
+static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
+
+static struct pci_driver i40e_driver = {
+	.name     = i40e_driver_name,
+	.id_table = i40e_pci_tbl,
+	.probe    = i40e_probe,
+	.remove   = i40e_remove,
+	.driver   = {
+		.pm = &i40e_pm_ops,
+	},
+	.shutdown = i40e_shutdown,
+	.err_handler = &i40e_err_handler,
+	.sriov_configure = i40e_pci_sriov_configure,
+};
+
+/**
+ * i40e_init_module - Driver registration routine
+ *
+ * i40e_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init i40e_init_module(void)
+{
+	int err;
+
+	pr_info("%s: %s\n", i40e_driver_name, i40e_driver_string);
+	pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
+
+	/* There is no need to throttle the number of active tasks because
+	 * each device limits its own task using a state bit for scheduling
+	 * the service task, and the device tasks do not interfere with each
+	 * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM
+	 * since we need to be able to guarantee forward progress even under
+	 * memory pressure.
+	 */
+	i40e_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, i40e_driver_name);
+	if (!i40e_wq) {
+		pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
+		return -ENOMEM;
+	}
+
+	i40e_dbg_init();
+	err = pci_register_driver(&i40e_driver);
+	if (err) {
+		destroy_workqueue(i40e_wq);
+		i40e_dbg_exit();
+		return err;
+	}
+
+	return 0;
+}
+module_init(i40e_init_module);
+
+/**
+ * i40e_exit_module - Driver exit cleanup routine
+ *
+ * i40e_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit i40e_exit_module(void)
+{
+	pci_unregister_driver(&i40e_driver);
+	destroy_workqueue(i40e_wq);
+	ida_destroy(&i40e_client_ida);
+	i40e_dbg_exit();
+}
+module_exit(i40e_exit_module);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
new file mode 100644
index 0000000000..07a46adeab
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -0,0 +1,1671 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e_prototype.h"
+
+/**
+ * i40e_init_nvm - Initialize NVM function pointers
+ * @hw: pointer to the HW structure
+ *
+ * Setup the function pointers and the NVM info structure. Should be called
+ * once per NVM initialization, e.g. inside the i40e_init_shared_code().
+ * Please notice that the NVM term is used here (& in all methods covered
+ * in this file) as an equivalent of the FLASH part mapped into the SR.
+ * We are accessing FLASH always thru the Shadow RAM.
+ **/
+int i40e_init_nvm(struct i40e_hw *hw)
+{
+	struct i40e_nvm_info *nvm = &hw->nvm;
+	int ret_code = 0;
+	u32 fla, gens;
+	u8 sr_size;
+
+	/* The SR size is stored regardless of the nvm programming mode
+	 * as the blank mode may be used in the factory line.
+	 */
+	gens = rd32(hw, I40E_GLNVM_GENS);
+	sr_size = ((gens & I40E_GLNVM_GENS_SR_SIZE_MASK) >>
+			   I40E_GLNVM_GENS_SR_SIZE_SHIFT);
+	/* Switching to words (sr_size contains power of 2KB) */
+	nvm->sr_size = BIT(sr_size) * I40E_SR_WORDS_IN_1KB;
+
+	/* Check if we are in the normal or blank NVM programming mode */
+	fla = rd32(hw, I40E_GLNVM_FLA);
+	if (fla & I40E_GLNVM_FLA_LOCKED_MASK) { /* Normal programming mode */
+		/* Max NVM timeout */
+		nvm->timeout = I40E_MAX_NVM_TIMEOUT;
+		nvm->blank_nvm_mode = false;
+	} else { /* Blank programming mode */
+		nvm->blank_nvm_mode = true;
+		ret_code = -EIO;
+		i40e_debug(hw, I40E_DEBUG_NVM, "NVM init error: unsupported blank mode.\n");
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40e_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
+ *
+ * This function will request NVM ownership for reading
+ * via the proper Admin Command.
+ **/
+int i40e_acquire_nvm(struct i40e_hw *hw,
+		     enum i40e_aq_resource_access_type access)
+{
+	u64 gtime, timeout;
+	u64 time_left = 0;
+	int ret_code = 0;
+
+	if (hw->nvm.blank_nvm_mode)
+		goto i40e_i40e_acquire_nvm_exit;
+
+	ret_code = i40e_aq_request_resource(hw, I40E_NVM_RESOURCE_ID, access,
+					    0, &time_left, NULL);
+	/* Reading the Global Device Timer */
+	gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+
+	/* Store the timeout */
+	hw->nvm.hw_semaphore_timeout = I40E_MS_TO_GTIME(time_left) + gtime;
+
+	if (ret_code)
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM acquire type %d failed time_left=%llu ret=%d aq_err=%d\n",
+			   access, time_left, ret_code, hw->aq.asq_last_status);
+
+	if (ret_code && time_left) {
+		/* Poll until the current NVM owner timeouts */
+		timeout = I40E_MS_TO_GTIME(I40E_MAX_NVM_TIMEOUT) + gtime;
+		while ((gtime < timeout) && time_left) {
+			usleep_range(10000, 20000);
+			gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+			ret_code = i40e_aq_request_resource(hw,
+							I40E_NVM_RESOURCE_ID,
+							access, 0, &time_left,
+							NULL);
+			if (!ret_code) {
+				hw->nvm.hw_semaphore_timeout =
+					    I40E_MS_TO_GTIME(time_left) + gtime;
+				break;
+			}
+		}
+		if (ret_code) {
+			hw->nvm.hw_semaphore_timeout = 0;
+			i40e_debug(hw, I40E_DEBUG_NVM,
+				   "NVM acquire timed out, wait %llu ms before trying again. status=%d aq_err=%d\n",
+				   time_left, ret_code, hw->aq.asq_last_status);
+		}
+	}
+
+i40e_i40e_acquire_nvm_exit:
+	return ret_code;
+}
+
+/**
+ * i40e_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
+ *
+ * This function will release NVM resource via the proper Admin Command.
+ **/
+void i40e_release_nvm(struct i40e_hw *hw)
+{
+	u32 total_delay = 0;
+	int ret_code = 0;
+
+	if (hw->nvm.blank_nvm_mode)
+		return;
+
+	ret_code = i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+
+	/* there are some rare cases when trying to release the resource
+	 * results in an admin Q timeout, so handle them correctly
+	 */
+	while ((ret_code == -EIO) &&
+	       (total_delay < hw->aq.asq_cmd_timeout)) {
+		usleep_range(1000, 2000);
+		ret_code = i40e_aq_release_resource(hw,
+						    I40E_NVM_RESOURCE_ID,
+						    0, NULL);
+		total_delay++;
+	}
+}
+
+/**
+ * i40e_poll_sr_srctl_done_bit - Polls the GLNVM_SRCTL done bit
+ * @hw: pointer to the HW structure
+ *
+ * Polls the SRCTL Shadow RAM register done bit.
+ **/
+static int i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
+{
+	int ret_code = -EIO;
+	u32 srctl, wait_cnt;
+
+	/* Poll the I40E_GLNVM_SRCTL until the done bit is set */
+	for (wait_cnt = 0; wait_cnt < I40E_SRRD_SRCTL_ATTEMPTS; wait_cnt++) {
+		srctl = rd32(hw, I40E_GLNVM_SRCTL);
+		if (srctl & I40E_GLNVM_SRCTL_DONE_MASK) {
+			ret_code = 0;
+			break;
+		}
+		udelay(5);
+	}
+	if (ret_code == -EIO)
+		i40e_debug(hw, I40E_DEBUG_NVM, "Done bit in GLNVM_SRCTL not set");
+	return ret_code;
+}
+
+/**
+ * i40e_read_nvm_word_srctl - Reads Shadow RAM via SRCTL register
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ **/
+static int i40e_read_nvm_word_srctl(struct i40e_hw *hw, u16 offset,
+				    u16 *data)
+{
+	int ret_code = -EIO;
+	u32 sr_reg;
+
+	if (offset >= hw->nvm.sr_size) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM read error: offset %d beyond Shadow RAM limit %d\n",
+			   offset, hw->nvm.sr_size);
+		ret_code = -EINVAL;
+		goto read_nvm_exit;
+	}
+
+	/* Poll the done bit first */
+	ret_code = i40e_poll_sr_srctl_done_bit(hw);
+	if (!ret_code) {
+		/* Write the address and start reading */
+		sr_reg = ((u32)offset << I40E_GLNVM_SRCTL_ADDR_SHIFT) |
+			 BIT(I40E_GLNVM_SRCTL_START_SHIFT);
+		wr32(hw, I40E_GLNVM_SRCTL, sr_reg);
+
+		/* Poll I40E_GLNVM_SRCTL until the done bit is set */
+		ret_code = i40e_poll_sr_srctl_done_bit(hw);
+		if (!ret_code) {
+			sr_reg = rd32(hw, I40E_GLNVM_SRDATA);
+			*data = (u16)((sr_reg &
+				       I40E_GLNVM_SRDATA_RDDATA_MASK)
+				    >> I40E_GLNVM_SRDATA_RDDATA_SHIFT);
+		}
+	}
+	if (ret_code)
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM read error: Couldn't access Shadow RAM address: 0x%x\n",
+			   offset);
+
+read_nvm_exit:
+	return ret_code;
+}
+
+/**
+ * i40e_read_nvm_aq - Read Shadow RAM.
+ * @hw: pointer to the HW structure.
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset in words from module start
+ * @words: number of words to read
+ * @data: buffer with words to read to the Shadow RAM
+ * @last_command: tells the AdminQ that this is the last command
+ *
+ * Reads a 16 bit words buffer to the Shadow RAM using the admin command.
+ **/
+static int i40e_read_nvm_aq(struct i40e_hw *hw,
+			    u8 module_pointer, u32 offset,
+			    u16 words, void *data,
+			    bool last_command)
+{
+	struct i40e_asq_cmd_details cmd_details;
+	int ret_code = -EIO;
+
+	memset(&cmd_details, 0, sizeof(cmd_details));
+	cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+	/* Here we are checking the SR limit only for the flat memory model.
+	 * We cannot do it for the module-based model, as we did not acquire
+	 * the NVM resource yet (we cannot get the module pointer value).
+	 * Firmware will check the module-based model.
+	 */
+	if ((offset + words) > hw->nvm.sr_size)
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM read error: offset %d beyond Shadow RAM limit %d\n",
+			   (offset + words), hw->nvm.sr_size);
+	else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
+		/* We can read only up to 4KB (one sector), in one AQ write */
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM read fail error: tried to read %d words, limit is %d.\n",
+			   words, I40E_SR_SECTOR_SIZE_IN_WORDS);
+	else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
+		 != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
+		/* A single read cannot spread over two sectors */
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n",
+			   offset, words);
+	else
+		ret_code = i40e_aq_read_nvm(hw, module_pointer,
+					    2 * offset,  /*bytes*/
+					    2 * words,   /*bytes*/
+					    data, last_command, &cmd_details);
+
+	return ret_code;
+}
+
+/**
+ * i40e_read_nvm_word_aq - Reads Shadow RAM via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the AdminQ
+ **/
+static int i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
+				 u16 *data)
+{
+	int ret_code = -EIO;
+
+	ret_code = i40e_read_nvm_aq(hw, 0x0, offset, 1, data, true);
+	*data = le16_to_cpu(*(__le16 *)data);
+
+	return ret_code;
+}
+
+/**
+ * __i40e_read_nvm_word - Reads nvm word, assumes caller does the locking
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM.
+ *
+ * Do not use this function except in cases where the nvm lock is already
+ * taken via i40e_acquire_nvm().
+ **/
+static int __i40e_read_nvm_word(struct i40e_hw *hw,
+				u16 offset, u16 *data)
+{
+	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+		return i40e_read_nvm_word_aq(hw, offset, data);
+
+	return i40e_read_nvm_word_srctl(hw, offset, data);
+}
+
+/**
+ * i40e_read_nvm_word - Reads nvm word and acquire lock if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM.
+ **/
+int i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+		       u16 *data)
+{
+	int ret_code = 0;
+
+	if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK)
+		ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret_code)
+		return ret_code;
+
+	ret_code = __i40e_read_nvm_word(hw, offset, data);
+
+	if (hw->flags & I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK)
+		i40e_release_nvm(hw);
+
+	return ret_code;
+}
+
+/**
+ * i40e_read_nvm_module_data - Reads NVM Buffer to specified memory location
+ * @hw: Pointer to the HW structure
+ * @module_ptr: Pointer to module in words with respect to NVM beginning
+ * @module_offset: Offset in words from module start
+ * @data_offset: Offset in words from reading data area start
+ * @words_data_size: Words to read from NVM
+ * @data_ptr: Pointer to memory location where resulting buffer will be stored
+ **/
+int i40e_read_nvm_module_data(struct i40e_hw *hw,
+			      u8 module_ptr,
+			      u16 module_offset,
+			      u16 data_offset,
+			      u16 words_data_size,
+			      u16 *data_ptr)
+{
+	u16 specific_ptr = 0;
+	u16 ptr_value = 0;
+	u32 offset = 0;
+	int status;
+
+	if (module_ptr != 0) {
+		status = i40e_read_nvm_word(hw, module_ptr, &ptr_value);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm word failed.Error code: %d.\n",
+				   status);
+			return -EIO;
+		}
+	}
+#define I40E_NVM_INVALID_PTR_VAL 0x7FFF
+#define I40E_NVM_INVALID_VAL 0xFFFF
+
+	/* Pointer not initialized */
+	if (ptr_value == I40E_NVM_INVALID_PTR_VAL ||
+	    ptr_value == I40E_NVM_INVALID_VAL) {
+		i40e_debug(hw, I40E_DEBUG_ALL, "Pointer not initialized.\n");
+		return -EINVAL;
+	}
+
+	/* Check whether the module is in SR mapped area or outside */
+	if (ptr_value & I40E_PTR_TYPE) {
+		/* Pointer points outside of the Shared RAM mapped area */
+		i40e_debug(hw, I40E_DEBUG_ALL,
+			   "Reading nvm data failed. Pointer points outside of the Shared RAM mapped area.\n");
+
+		return -EINVAL;
+	} else {
+		/* Read from the Shadow RAM */
+
+		status = i40e_read_nvm_word(hw, ptr_value + module_offset,
+					    &specific_ptr);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm word failed.Error code: %d.\n",
+				   status);
+			return -EIO;
+		}
+
+		offset = ptr_value + module_offset + specific_ptr +
+			data_offset;
+
+		status = i40e_read_nvm_buffer(hw, offset, &words_data_size,
+					      data_ptr);
+		if (status) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "Reading nvm buffer failed.Error code: %d.\n",
+				   status);
+		}
+	}
+
+	return status;
+}
+
+/**
+ * i40e_read_nvm_buffer_srctl - Reads Shadow RAM buffer via SRCTL register
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+static int i40e_read_nvm_buffer_srctl(struct i40e_hw *hw, u16 offset,
+				      u16 *words, u16 *data)
+{
+	int ret_code = 0;
+	u16 index, word;
+
+	/* Loop thru the selected region */
+	for (word = 0; word < *words; word++) {
+		index = offset + word;
+		ret_code = i40e_read_nvm_word_srctl(hw, index, &data[word]);
+		if (ret_code)
+			break;
+	}
+
+	/* Update the number of words read from the Shadow RAM */
+	*words = word;
+
+	return ret_code;
+}
+
+/**
+ * i40e_read_nvm_buffer_aq - Reads Shadow RAM buffer via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_aq()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+static int i40e_read_nvm_buffer_aq(struct i40e_hw *hw, u16 offset,
+				   u16 *words, u16 *data)
+{
+	bool last_cmd = false;
+	u16 words_read = 0;
+	u16 read_size;
+	int ret_code;
+	u16 i = 0;
+
+	do {
+		/* Calculate number of bytes we should read in this step.
+		 * FVL AQ do not allow to read more than one page at a time or
+		 * to cross page boundaries.
+		 */
+		if (offset % I40E_SR_SECTOR_SIZE_IN_WORDS)
+			read_size = min(*words,
+					(u16)(I40E_SR_SECTOR_SIZE_IN_WORDS -
+				      (offset % I40E_SR_SECTOR_SIZE_IN_WORDS)));
+		else
+			read_size = min((*words - words_read),
+					I40E_SR_SECTOR_SIZE_IN_WORDS);
+
+		/* Check if this is last command, if so set proper flag */
+		if ((words_read + read_size) >= *words)
+			last_cmd = true;
+
+		ret_code = i40e_read_nvm_aq(hw, 0x0, offset, read_size,
+					    data + words_read, last_cmd);
+		if (ret_code)
+			goto read_nvm_buffer_aq_exit;
+
+		/* Increment counter for words already read and move offset to
+		 * new read location
+		 */
+		words_read += read_size;
+		offset += read_size;
+	} while (words_read < *words);
+
+	for (i = 0; i < *words; i++)
+		data[i] = le16_to_cpu(((__le16 *)data)[i]);
+
+read_nvm_buffer_aq_exit:
+	*words = words_read;
+	return ret_code;
+}
+
+/**
+ * __i40e_read_nvm_buffer - Reads nvm buffer, caller must acquire lock
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method.
+ **/
+static int __i40e_read_nvm_buffer(struct i40e_hw *hw,
+				  u16 offset, u16 *words,
+				  u16 *data)
+{
+	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
+		return i40e_read_nvm_buffer_aq(hw, offset, words, data);
+
+	return i40e_read_nvm_buffer_srctl(hw, offset, words, data);
+}
+
+/**
+ * i40e_read_nvm_buffer - Reads Shadow RAM buffer and acquire lock if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
+ *
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
+ **/
+int i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+			 u16 *words, u16 *data)
+{
+	int ret_code = 0;
+
+	if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) {
+		ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+		if (!ret_code) {
+			ret_code = i40e_read_nvm_buffer_aq(hw, offset, words,
+							   data);
+			i40e_release_nvm(hw);
+		}
+	} else {
+		ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40e_write_nvm_aq - Writes Shadow RAM.
+ * @hw: pointer to the HW structure.
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: offset in words from module start
+ * @words: number of words to write
+ * @data: buffer with words to write to the Shadow RAM
+ * @last_command: tells the AdminQ that this is the last command
+ *
+ * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ **/
+static int i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
+			     u32 offset, u16 words, void *data,
+			     bool last_command)
+{
+	struct i40e_asq_cmd_details cmd_details;
+	int ret_code = -EIO;
+
+	memset(&cmd_details, 0, sizeof(cmd_details));
+	cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+	/* Here we are checking the SR limit only for the flat memory model.
+	 * We cannot do it for the module-based model, as we did not acquire
+	 * the NVM resource yet (we cannot get the module pointer value).
+	 * Firmware will check the module-based model.
+	 */
+	if ((offset + words) > hw->nvm.sr_size)
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM write error: offset %d beyond Shadow RAM limit %d\n",
+			   (offset + words), hw->nvm.sr_size);
+	else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
+		/* We can write only up to 4KB (one sector), in one AQ write */
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM write fail error: tried to write %d words, limit is %d.\n",
+			   words, I40E_SR_SECTOR_SIZE_IN_WORDS);
+	else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
+		 != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
+		/* A single write cannot spread over two sectors */
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n",
+			   offset, words);
+	else
+		ret_code = i40e_aq_update_nvm(hw, module_pointer,
+					      2 * offset,  /*bytes*/
+					      2 * words,   /*bytes*/
+					      data, last_command, 0,
+					      &cmd_details);
+
+	return ret_code;
+}
+
+/**
+ * i40e_calc_nvm_checksum - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ * @checksum: pointer to the checksum
+ *
+ * This function calculates SW Checksum that covers the whole 64kB shadow RAM
+ * except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD
+ * is customer specific and unknown. Therefore, this function skips all maximum
+ * possible size of VPD (1kB).
+ **/
+static int i40e_calc_nvm_checksum(struct i40e_hw *hw,
+				  u16 *checksum)
+{
+	struct i40e_virt_mem vmem;
+	u16 pcie_alt_module = 0;
+	u16 checksum_local = 0;
+	u16 vpd_module = 0;
+	int ret_code;
+	u16 *data;
+	u16 i = 0;
+
+	ret_code = i40e_allocate_virt_mem(hw, &vmem,
+				    I40E_SR_SECTOR_SIZE_IN_WORDS * sizeof(u16));
+	if (ret_code)
+		goto i40e_calc_nvm_checksum_exit;
+	data = (u16 *)vmem.va;
+
+	/* read pointer to VPD area */
+	ret_code = __i40e_read_nvm_word(hw, I40E_SR_VPD_PTR, &vpd_module);
+	if (ret_code) {
+		ret_code = -EIO;
+		goto i40e_calc_nvm_checksum_exit;
+	}
+
+	/* read pointer to PCIe Alt Auto-load module */
+	ret_code = __i40e_read_nvm_word(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR,
+					&pcie_alt_module);
+	if (ret_code) {
+		ret_code = -EIO;
+		goto i40e_calc_nvm_checksum_exit;
+	}
+
+	/* Calculate SW checksum that covers the whole 64kB shadow RAM
+	 * except the VPD and PCIe ALT Auto-load modules
+	 */
+	for (i = 0; i < hw->nvm.sr_size; i++) {
+		/* Read SR page */
+		if ((i % I40E_SR_SECTOR_SIZE_IN_WORDS) == 0) {
+			u16 words = I40E_SR_SECTOR_SIZE_IN_WORDS;
+
+			ret_code = __i40e_read_nvm_buffer(hw, i, &words, data);
+			if (ret_code) {
+				ret_code = -EIO;
+				goto i40e_calc_nvm_checksum_exit;
+			}
+		}
+
+		/* Skip Checksum word */
+		if (i == I40E_SR_SW_CHECKSUM_WORD)
+			continue;
+		/* Skip VPD module (convert byte size to word count) */
+		if ((i >= (u32)vpd_module) &&
+		    (i < ((u32)vpd_module +
+		     (I40E_SR_VPD_MODULE_MAX_SIZE / 2)))) {
+			continue;
+		}
+		/* Skip PCIe ALT module (convert byte size to word count) */
+		if ((i >= (u32)pcie_alt_module) &&
+		    (i < ((u32)pcie_alt_module +
+		     (I40E_SR_PCIE_ALT_MODULE_MAX_SIZE / 2)))) {
+			continue;
+		}
+
+		checksum_local += data[i % I40E_SR_SECTOR_SIZE_IN_WORDS];
+	}
+
+	*checksum = (u16)I40E_SR_SW_CHECKSUM_BASE - checksum_local;
+
+i40e_calc_nvm_checksum_exit:
+	i40e_free_virt_mem(hw, &vmem);
+	return ret_code;
+}
+
+/**
+ * i40e_update_nvm_checksum - Updates the NVM checksum
+ * @hw: pointer to hardware structure
+ *
+ * NVM ownership must be acquired before calling this function and released
+ * on ARQ completion event reception by caller.
+ * This function will commit SR to NVM.
+ **/
+int i40e_update_nvm_checksum(struct i40e_hw *hw)
+{
+	__le16 le_sum;
+	int ret_code;
+	u16 checksum;
+
+	ret_code = i40e_calc_nvm_checksum(hw, &checksum);
+	if (!ret_code) {
+		le_sum = cpu_to_le16(checksum);
+		ret_code = i40e_write_nvm_aq(hw, 0x00, I40E_SR_SW_CHECKSUM_WORD,
+					     1, &le_sum, true);
+	}
+
+	return ret_code;
+}
+
+/**
+ * i40e_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum: calculated checksum
+ *
+ * Performs checksum calculation and validates the NVM SW checksum. If the
+ * caller does not need checksum, the value can be NULL.
+ **/
+int i40e_validate_nvm_checksum(struct i40e_hw *hw,
+			       u16 *checksum)
+{
+	u16 checksum_local = 0;
+	u16 checksum_sr = 0;
+	int ret_code = 0;
+
+	/* We must acquire the NVM lock in order to correctly synchronize the
+	 * NVM accesses across multiple PFs. Without doing so it is possible
+	 * for one of the PFs to read invalid data potentially indicating that
+	 * the checksum is invalid.
+	 */
+	ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+	if (ret_code)
+		return ret_code;
+	ret_code = i40e_calc_nvm_checksum(hw, &checksum_local);
+	__i40e_read_nvm_word(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr);
+	i40e_release_nvm(hw);
+	if (ret_code)
+		return ret_code;
+
+	/* Verify read checksum from EEPROM is the same as
+	 * calculated checksum
+	 */
+	if (checksum_local != checksum_sr)
+		ret_code = -EIO;
+
+	/* If the user cares, return the calculated checksum */
+	if (checksum)
+		*checksum = checksum_local;
+
+	return ret_code;
+}
+
+static int i40e_nvmupd_state_init(struct i40e_hw *hw,
+				  struct i40e_nvm_access *cmd,
+				  u8 *bytes, int *perrno);
+static int i40e_nvmupd_state_reading(struct i40e_hw *hw,
+				     struct i40e_nvm_access *cmd,
+				     u8 *bytes, int *perrno);
+static int i40e_nvmupd_state_writing(struct i40e_hw *hw,
+				     struct i40e_nvm_access *cmd,
+				     u8 *bytes, int *errno);
+static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw,
+						struct i40e_nvm_access *cmd,
+						int *perrno);
+static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
+				 struct i40e_nvm_access *cmd,
+				 int *perrno);
+static int i40e_nvmupd_nvm_write(struct i40e_hw *hw,
+				 struct i40e_nvm_access *cmd,
+				 u8 *bytes, int *perrno);
+static int i40e_nvmupd_nvm_read(struct i40e_hw *hw,
+				struct i40e_nvm_access *cmd,
+				u8 *bytes, int *perrno);
+static int i40e_nvmupd_exec_aq(struct i40e_hw *hw,
+			       struct i40e_nvm_access *cmd,
+			       u8 *bytes, int *perrno);
+static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
+				     struct i40e_nvm_access *cmd,
+				     u8 *bytes, int *perrno);
+static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+				    struct i40e_nvm_access *cmd,
+				    u8 *bytes, int *perrno);
+static inline u8 i40e_nvmupd_get_module(u32 val)
+{
+	return (u8)(val & I40E_NVM_MOD_PNT_MASK);
+}
+static inline u8 i40e_nvmupd_get_transaction(u32 val)
+{
+	return (u8)((val & I40E_NVM_TRANS_MASK) >> I40E_NVM_TRANS_SHIFT);
+}
+
+static inline u8 i40e_nvmupd_get_preservation_flags(u32 val)
+{
+	return (u8)((val & I40E_NVM_PRESERVATION_FLAGS_MASK) >>
+		    I40E_NVM_PRESERVATION_FLAGS_SHIFT);
+}
+
+static const char * const i40e_nvm_update_state_str[] = {
+	"I40E_NVMUPD_INVALID",
+	"I40E_NVMUPD_READ_CON",
+	"I40E_NVMUPD_READ_SNT",
+	"I40E_NVMUPD_READ_LCB",
+	"I40E_NVMUPD_READ_SA",
+	"I40E_NVMUPD_WRITE_ERA",
+	"I40E_NVMUPD_WRITE_CON",
+	"I40E_NVMUPD_WRITE_SNT",
+	"I40E_NVMUPD_WRITE_LCB",
+	"I40E_NVMUPD_WRITE_SA",
+	"I40E_NVMUPD_CSUM_CON",
+	"I40E_NVMUPD_CSUM_SA",
+	"I40E_NVMUPD_CSUM_LCB",
+	"I40E_NVMUPD_STATUS",
+	"I40E_NVMUPD_EXEC_AQ",
+	"I40E_NVMUPD_GET_AQ_RESULT",
+	"I40E_NVMUPD_GET_AQ_EVENT",
+};
+
+/**
+ * i40e_nvmupd_command - Process an NVM update command
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * Dispatches command depending on what update state is current
+ **/
+int i40e_nvmupd_command(struct i40e_hw *hw,
+			struct i40e_nvm_access *cmd,
+			u8 *bytes, int *perrno)
+{
+	enum i40e_nvmupd_cmd upd_cmd;
+	int status;
+
+	/* assume success */
+	*perrno = 0;
+
+	/* early check for status command and debug msgs */
+	upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
+
+	i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d opc 0x%04x cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n",
+		   i40e_nvm_update_state_str[upd_cmd],
+		   hw->nvmupd_state,
+		   hw->nvm_release_on_done, hw->nvm_wait_opcode,
+		   cmd->command, cmd->config, cmd->offset, cmd->data_size);
+
+	if (upd_cmd == I40E_NVMUPD_INVALID) {
+		*perrno = -EFAULT;
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_validate_command returns %d errno %d\n",
+			   upd_cmd, *perrno);
+	}
+
+	/* a status request returns immediately rather than
+	 * going into the state machine
+	 */
+	if (upd_cmd == I40E_NVMUPD_STATUS) {
+		if (!cmd->data_size) {
+			*perrno = -EFAULT;
+			return -EINVAL;
+		}
+
+		bytes[0] = hw->nvmupd_state;
+
+		if (cmd->data_size >= 4) {
+			bytes[1] = 0;
+			*((u16 *)&bytes[2]) = hw->nvm_wait_opcode;
+		}
+
+		/* Clear error status on read */
+		if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR)
+			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+
+		return 0;
+	}
+
+	/* Clear status even it is not read and log */
+	if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n");
+		hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+	}
+
+	/* Acquire lock to prevent race condition where adminq_task
+	 * can execute after i40e_nvmupd_nvm_read/write but before state
+	 * variables (nvm_wait_opcode, nvm_release_on_done) are updated.
+	 *
+	 * During NVMUpdate, it is observed that lock could be held for
+	 * ~5ms for most commands. However lock is held for ~60ms for
+	 * NVMUPD_CSUM_LCB command.
+	 */
+	mutex_lock(&hw->aq.arq_mutex);
+	switch (hw->nvmupd_state) {
+	case I40E_NVMUPD_STATE_INIT:
+		status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno);
+		break;
+
+	case I40E_NVMUPD_STATE_READING:
+		status = i40e_nvmupd_state_reading(hw, cmd, bytes, perrno);
+		break;
+
+	case I40E_NVMUPD_STATE_WRITING:
+		status = i40e_nvmupd_state_writing(hw, cmd, bytes, perrno);
+		break;
+
+	case I40E_NVMUPD_STATE_INIT_WAIT:
+	case I40E_NVMUPD_STATE_WRITE_WAIT:
+		/* if we need to stop waiting for an event, clear
+		 * the wait info and return before doing anything else
+		 */
+		if (cmd->offset == 0xffff) {
+			i40e_nvmupd_clear_wait_state(hw);
+			status = 0;
+			break;
+		}
+
+		status = -EBUSY;
+		*perrno = -EBUSY;
+		break;
+
+	default:
+		/* invalid state, should never happen */
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVMUPD: no such state %d\n", hw->nvmupd_state);
+		status = -EOPNOTSUPP;
+		*perrno = -ESRCH;
+		break;
+	}
+
+	mutex_unlock(&hw->aq.arq_mutex);
+	return status;
+}
+
+/**
+ * i40e_nvmupd_state_init - Handle NVM update state Init
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * Process legitimate commands of the Init state and conditionally set next
+ * state. Reject all other commands.
+ **/
+static int i40e_nvmupd_state_init(struct i40e_hw *hw,
+				  struct i40e_nvm_access *cmd,
+				  u8 *bytes, int *perrno)
+{
+	enum i40e_nvmupd_cmd upd_cmd;
+	int status = 0;
+
+	upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
+
+	switch (upd_cmd) {
+	case I40E_NVMUPD_READ_SA:
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+		if (status) {
+			*perrno = i40e_aq_rc_to_posix(status,
+						     hw->aq.asq_last_status);
+		} else {
+			status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno);
+			i40e_release_nvm(hw);
+		}
+		break;
+
+	case I40E_NVMUPD_READ_SNT:
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+		if (status) {
+			*perrno = i40e_aq_rc_to_posix(status,
+						     hw->aq.asq_last_status);
+		} else {
+			status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno);
+			if (status)
+				i40e_release_nvm(hw);
+			else
+				hw->nvmupd_state = I40E_NVMUPD_STATE_READING;
+		}
+		break;
+
+	case I40E_NVMUPD_WRITE_ERA:
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE);
+		if (status) {
+			*perrno = i40e_aq_rc_to_posix(status,
+						     hw->aq.asq_last_status);
+		} else {
+			status = i40e_nvmupd_nvm_erase(hw, cmd, perrno);
+			if (status) {
+				i40e_release_nvm(hw);
+			} else {
+				hw->nvm_release_on_done = true;
+				hw->nvm_wait_opcode = i40e_aqc_opc_nvm_erase;
+				hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
+			}
+		}
+		break;
+
+	case I40E_NVMUPD_WRITE_SA:
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE);
+		if (status) {
+			*perrno = i40e_aq_rc_to_posix(status,
+						     hw->aq.asq_last_status);
+		} else {
+			status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno);
+			if (status) {
+				i40e_release_nvm(hw);
+			} else {
+				hw->nvm_release_on_done = true;
+				hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
+				hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
+			}
+		}
+		break;
+
+	case I40E_NVMUPD_WRITE_SNT:
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE);
+		if (status) {
+			*perrno = i40e_aq_rc_to_posix(status,
+						     hw->aq.asq_last_status);
+		} else {
+			status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno);
+			if (status) {
+				i40e_release_nvm(hw);
+			} else {
+				hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
+				hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT;
+			}
+		}
+		break;
+
+	case I40E_NVMUPD_CSUM_SA:
+		status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE);
+		if (status) {
+			*perrno = i40e_aq_rc_to_posix(status,
+						     hw->aq.asq_last_status);
+		} else {
+			status = i40e_update_nvm_checksum(hw);
+			if (status) {
+				*perrno = hw->aq.asq_last_status ?
+				   i40e_aq_rc_to_posix(status,
+						       hw->aq.asq_last_status) :
+				   -EIO;
+				i40e_release_nvm(hw);
+			} else {
+				hw->nvm_release_on_done = true;
+				hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
+				hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
+			}
+		}
+		break;
+
+	case I40E_NVMUPD_EXEC_AQ:
+		status = i40e_nvmupd_exec_aq(hw, cmd, bytes, perrno);
+		break;
+
+	case I40E_NVMUPD_GET_AQ_RESULT:
+		status = i40e_nvmupd_get_aq_result(hw, cmd, bytes, perrno);
+		break;
+
+	case I40E_NVMUPD_GET_AQ_EVENT:
+		status = i40e_nvmupd_get_aq_event(hw, cmd, bytes, perrno);
+		break;
+
+	default:
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVMUPD: bad cmd %s in init state\n",
+			   i40e_nvm_update_state_str[upd_cmd]);
+		status = -EIO;
+		*perrno = -ESRCH;
+		break;
+	}
+	return status;
+}
+
+/**
+ * i40e_nvmupd_state_reading - Handle NVM update state Reading
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * NVM ownership is already held.  Process legitimate commands and set any
+ * change in state; reject all other commands.
+ **/
+static int i40e_nvmupd_state_reading(struct i40e_hw *hw,
+				     struct i40e_nvm_access *cmd,
+				     u8 *bytes, int *perrno)
+{
+	enum i40e_nvmupd_cmd upd_cmd;
+	int status = 0;
+
+	upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
+
+	switch (upd_cmd) {
+	case I40E_NVMUPD_READ_SA:
+	case I40E_NVMUPD_READ_CON:
+		status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno);
+		break;
+
+	case I40E_NVMUPD_READ_LCB:
+		status = i40e_nvmupd_nvm_read(hw, cmd, bytes, perrno);
+		i40e_release_nvm(hw);
+		hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+		break;
+
+	default:
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVMUPD: bad cmd %s in reading state.\n",
+			   i40e_nvm_update_state_str[upd_cmd]);
+		status = -EOPNOTSUPP;
+		*perrno = -ESRCH;
+		break;
+	}
+	return status;
+}
+
+/**
+ * i40e_nvmupd_state_writing - Handle NVM update state Writing
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * NVM ownership is already held.  Process legitimate commands and set any
+ * change in state; reject all other commands
+ **/
+static int i40e_nvmupd_state_writing(struct i40e_hw *hw,
+				     struct i40e_nvm_access *cmd,
+				     u8 *bytes, int *perrno)
+{
+	enum i40e_nvmupd_cmd upd_cmd;
+	bool retry_attempt = false;
+	int status = 0;
+
+	upd_cmd = i40e_nvmupd_validate_command(hw, cmd, perrno);
+
+retry:
+	switch (upd_cmd) {
+	case I40E_NVMUPD_WRITE_CON:
+		status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno);
+		if (!status) {
+			hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
+			hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT;
+		}
+		break;
+
+	case I40E_NVMUPD_WRITE_LCB:
+		status = i40e_nvmupd_nvm_write(hw, cmd, bytes, perrno);
+		if (status) {
+			*perrno = hw->aq.asq_last_status ?
+				   i40e_aq_rc_to_posix(status,
+						       hw->aq.asq_last_status) :
+				   -EIO;
+			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+		} else {
+			hw->nvm_release_on_done = true;
+			hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
+			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
+		}
+		break;
+
+	case I40E_NVMUPD_CSUM_CON:
+		/* Assumes the caller has acquired the nvm */
+		status = i40e_update_nvm_checksum(hw);
+		if (status) {
+			*perrno = hw->aq.asq_last_status ?
+				   i40e_aq_rc_to_posix(status,
+						       hw->aq.asq_last_status) :
+				   -EIO;
+			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+		} else {
+			hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
+			hw->nvmupd_state = I40E_NVMUPD_STATE_WRITE_WAIT;
+		}
+		break;
+
+	case I40E_NVMUPD_CSUM_LCB:
+		/* Assumes the caller has acquired the nvm */
+		status = i40e_update_nvm_checksum(hw);
+		if (status) {
+			*perrno = hw->aq.asq_last_status ?
+				   i40e_aq_rc_to_posix(status,
+						       hw->aq.asq_last_status) :
+				   -EIO;
+			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+		} else {
+			hw->nvm_release_on_done = true;
+			hw->nvm_wait_opcode = i40e_aqc_opc_nvm_update;
+			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
+		}
+		break;
+
+	default:
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVMUPD: bad cmd %s in writing state.\n",
+			   i40e_nvm_update_state_str[upd_cmd]);
+		status = -EOPNOTSUPP;
+		*perrno = -ESRCH;
+		break;
+	}
+
+	/* In some circumstances, a multi-write transaction takes longer
+	 * than the default 3 minute timeout on the write semaphore.  If
+	 * the write failed with an EBUSY status, this is likely the problem,
+	 * so here we try to reacquire the semaphore then retry the write.
+	 * We only do one retry, then give up.
+	 */
+	if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) &&
+	    !retry_attempt) {
+		u32 old_asq_status = hw->aq.asq_last_status;
+		int old_status = status;
+		u32 gtime;
+
+		gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+		if (gtime >= hw->nvm.hw_semaphore_timeout) {
+			i40e_debug(hw, I40E_DEBUG_ALL,
+				   "NVMUPD: write semaphore expired (%d >= %lld), retrying\n",
+				   gtime, hw->nvm.hw_semaphore_timeout);
+			i40e_release_nvm(hw);
+			status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE);
+			if (status) {
+				i40e_debug(hw, I40E_DEBUG_ALL,
+					   "NVMUPD: write semaphore reacquire failed aq_err = %d\n",
+					   hw->aq.asq_last_status);
+				status = old_status;
+				hw->aq.asq_last_status = old_asq_status;
+			} else {
+				retry_attempt = true;
+				goto retry;
+			}
+		}
+	}
+
+	return status;
+}
+
+/**
+ * i40e_nvmupd_clear_wait_state - clear wait state on hw
+ * @hw: pointer to the hardware structure
+ **/
+void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw)
+{
+	i40e_debug(hw, I40E_DEBUG_NVM,
+		   "NVMUPD: clearing wait on opcode 0x%04x\n",
+		   hw->nvm_wait_opcode);
+
+	if (hw->nvm_release_on_done) {
+		i40e_release_nvm(hw);
+		hw->nvm_release_on_done = false;
+	}
+	hw->nvm_wait_opcode = 0;
+
+	if (hw->aq.arq_last_status) {
+		hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
+		return;
+	}
+
+	switch (hw->nvmupd_state) {
+	case I40E_NVMUPD_STATE_INIT_WAIT:
+		hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+		break;
+
+	case I40E_NVMUPD_STATE_WRITE_WAIT:
+		hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
+		break;
+
+	default:
+		break;
+	}
+}
+
+/**
+ * i40e_nvmupd_check_wait_event - handle NVM update operation events
+ * @hw: pointer to the hardware structure
+ * @opcode: the event that just happened
+ * @desc: AdminQ descriptor
+ **/
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
+				  struct i40e_aq_desc *desc)
+{
+	u32 aq_desc_len = sizeof(struct i40e_aq_desc);
+
+	if (opcode == hw->nvm_wait_opcode) {
+		memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len);
+		i40e_nvmupd_clear_wait_state(hw);
+	}
+}
+
+/**
+ * i40e_nvmupd_validate_command - Validate given command
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @perrno: pointer to return error code
+ *
+ * Return one of the valid command types or I40E_NVMUPD_INVALID
+ **/
+static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw,
+						 struct i40e_nvm_access *cmd,
+						 int *perrno)
+{
+	enum i40e_nvmupd_cmd upd_cmd;
+	u8 module, transaction;
+
+	/* anything that doesn't match a recognized case is an error */
+	upd_cmd = I40E_NVMUPD_INVALID;
+
+	transaction = i40e_nvmupd_get_transaction(cmd->config);
+	module = i40e_nvmupd_get_module(cmd->config);
+
+	/* limits on data size */
+	if ((cmd->data_size < 1) ||
+	    (cmd->data_size > I40E_NVMUPD_MAX_DATA)) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_validate_command data_size %d\n",
+			   cmd->data_size);
+		*perrno = -EFAULT;
+		return I40E_NVMUPD_INVALID;
+	}
+
+	switch (cmd->command) {
+	case I40E_NVM_READ:
+		switch (transaction) {
+		case I40E_NVM_CON:
+			upd_cmd = I40E_NVMUPD_READ_CON;
+			break;
+		case I40E_NVM_SNT:
+			upd_cmd = I40E_NVMUPD_READ_SNT;
+			break;
+		case I40E_NVM_LCB:
+			upd_cmd = I40E_NVMUPD_READ_LCB;
+			break;
+		case I40E_NVM_SA:
+			upd_cmd = I40E_NVMUPD_READ_SA;
+			break;
+		case I40E_NVM_EXEC:
+			if (module == 0xf)
+				upd_cmd = I40E_NVMUPD_STATUS;
+			else if (module == 0)
+				upd_cmd = I40E_NVMUPD_GET_AQ_RESULT;
+			break;
+		case I40E_NVM_AQE:
+			upd_cmd = I40E_NVMUPD_GET_AQ_EVENT;
+			break;
+		}
+		break;
+
+	case I40E_NVM_WRITE:
+		switch (transaction) {
+		case I40E_NVM_CON:
+			upd_cmd = I40E_NVMUPD_WRITE_CON;
+			break;
+		case I40E_NVM_SNT:
+			upd_cmd = I40E_NVMUPD_WRITE_SNT;
+			break;
+		case I40E_NVM_LCB:
+			upd_cmd = I40E_NVMUPD_WRITE_LCB;
+			break;
+		case I40E_NVM_SA:
+			upd_cmd = I40E_NVMUPD_WRITE_SA;
+			break;
+		case I40E_NVM_ERA:
+			upd_cmd = I40E_NVMUPD_WRITE_ERA;
+			break;
+		case I40E_NVM_CSUM:
+			upd_cmd = I40E_NVMUPD_CSUM_CON;
+			break;
+		case (I40E_NVM_CSUM|I40E_NVM_SA):
+			upd_cmd = I40E_NVMUPD_CSUM_SA;
+			break;
+		case (I40E_NVM_CSUM|I40E_NVM_LCB):
+			upd_cmd = I40E_NVMUPD_CSUM_LCB;
+			break;
+		case I40E_NVM_EXEC:
+			if (module == 0)
+				upd_cmd = I40E_NVMUPD_EXEC_AQ;
+			break;
+		}
+		break;
+	}
+
+	return upd_cmd;
+}
+
+/**
+ * i40e_nvmupd_exec_aq - Run an AQ command
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+static int i40e_nvmupd_exec_aq(struct i40e_hw *hw,
+			       struct i40e_nvm_access *cmd,
+			       u8 *bytes, int *perrno)
+{
+	struct i40e_asq_cmd_details cmd_details;
+	struct i40e_aq_desc *aq_desc;
+	u32 buff_size = 0;
+	u8 *buff = NULL;
+	u32 aq_desc_len;
+	u32 aq_data_len;
+	int status;
+
+	i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+	if (cmd->offset == 0xffff)
+		return 0;
+
+	memset(&cmd_details, 0, sizeof(cmd_details));
+	cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+	aq_desc_len = sizeof(struct i40e_aq_desc);
+	memset(&hw->nvm_wb_desc, 0, aq_desc_len);
+
+	/* get the aq descriptor */
+	if (cmd->data_size < aq_desc_len) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "NVMUPD: not enough aq desc bytes for exec, size %d < %d\n",
+			   cmd->data_size, aq_desc_len);
+		*perrno = -EINVAL;
+		return -EINVAL;
+	}
+	aq_desc = (struct i40e_aq_desc *)bytes;
+
+	/* if data buffer needed, make sure it's ready */
+	aq_data_len = cmd->data_size - aq_desc_len;
+	buff_size = max_t(u32, aq_data_len, le16_to_cpu(aq_desc->datalen));
+	if (buff_size) {
+		if (!hw->nvm_buff.va) {
+			status = i40e_allocate_virt_mem(hw, &hw->nvm_buff,
+							hw->aq.asq_buf_size);
+			if (status)
+				i40e_debug(hw, I40E_DEBUG_NVM,
+					   "NVMUPD: i40e_allocate_virt_mem for exec buff failed, %d\n",
+					   status);
+		}
+
+		if (hw->nvm_buff.va) {
+			buff = hw->nvm_buff.va;
+			memcpy(buff, &bytes[aq_desc_len], aq_data_len);
+		}
+	}
+
+	if (cmd->offset)
+		memset(&hw->nvm_aq_event_desc, 0, aq_desc_len);
+
+	/* and away we go! */
+	status = i40e_asq_send_command(hw, aq_desc, buff,
+				       buff_size, &cmd_details);
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "%s err %pe aq_err %s\n",
+			   __func__, ERR_PTR(status),
+			   i40e_aq_str(hw, hw->aq.asq_last_status));
+		*perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status);
+		return status;
+	}
+
+	/* should we wait for a followup event? */
+	if (cmd->offset) {
+		hw->nvm_wait_opcode = cmd->offset;
+		hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
+	}
+
+	return status;
+}
+
+/**
+ * i40e_nvmupd_get_aq_result - Get the results from the previous exec_aq
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+static int i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
+				     struct i40e_nvm_access *cmd,
+				     u8 *bytes, int *perrno)
+{
+	u32 aq_total_len;
+	u32 aq_desc_len;
+	int remainder;
+	u8 *buff;
+
+	i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+
+	aq_desc_len = sizeof(struct i40e_aq_desc);
+	aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_wb_desc.datalen);
+
+	/* check offset range */
+	if (cmd->offset > aq_total_len) {
+		i40e_debug(hw, I40E_DEBUG_NVM, "%s: offset too big %d > %d\n",
+			   __func__, cmd->offset, aq_total_len);
+		*perrno = -EINVAL;
+		return -EINVAL;
+	}
+
+	/* check copylength range */
+	if (cmd->data_size > (aq_total_len - cmd->offset)) {
+		int new_len = aq_total_len - cmd->offset;
+
+		i40e_debug(hw, I40E_DEBUG_NVM, "%s: copy length %d too big, trimming to %d\n",
+			   __func__, cmd->data_size, new_len);
+		cmd->data_size = new_len;
+	}
+
+	remainder = cmd->data_size;
+	if (cmd->offset < aq_desc_len) {
+		u32 len = aq_desc_len - cmd->offset;
+
+		len = min(len, cmd->data_size);
+		i40e_debug(hw, I40E_DEBUG_NVM, "%s: aq_desc bytes %d to %d\n",
+			   __func__, cmd->offset, cmd->offset + len);
+
+		buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset;
+		memcpy(bytes, buff, len);
+
+		bytes += len;
+		remainder -= len;
+		buff = hw->nvm_buff.va;
+	} else {
+		buff = hw->nvm_buff.va + (cmd->offset - aq_desc_len);
+	}
+
+	if (remainder > 0) {
+		int start_byte = buff - (u8 *)hw->nvm_buff.va;
+
+		i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n",
+			   __func__, start_byte, start_byte + remainder);
+		memcpy(bytes, buff, remainder);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+static int i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+				    struct i40e_nvm_access *cmd,
+				    u8 *bytes, int *perrno)
+{
+	u32 aq_total_len;
+	u32 aq_desc_len;
+
+	i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+
+	aq_desc_len = sizeof(struct i40e_aq_desc);
+	aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen);
+
+	/* check copylength range */
+	if (cmd->data_size > aq_total_len) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "%s: copy length %d too big, trimming to %d\n",
+			   __func__, cmd->data_size, aq_total_len);
+		cmd->data_size = aq_total_len;
+	}
+
+	memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size);
+
+	return 0;
+}
+
+/**
+ * i40e_nvmupd_nvm_read - Read NVM
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+static int i40e_nvmupd_nvm_read(struct i40e_hw *hw,
+				struct i40e_nvm_access *cmd,
+				u8 *bytes, int *perrno)
+{
+	struct i40e_asq_cmd_details cmd_details;
+	u8 module, transaction;
+	int status;
+	bool last;
+
+	transaction = i40e_nvmupd_get_transaction(cmd->config);
+	module = i40e_nvmupd_get_module(cmd->config);
+	last = (transaction == I40E_NVM_LCB) || (transaction == I40E_NVM_SA);
+
+	memset(&cmd_details, 0, sizeof(cmd_details));
+	cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+	status = i40e_aq_read_nvm(hw, module, cmd->offset, (u16)cmd->data_size,
+				  bytes, last, &cmd_details);
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_nvm_read mod 0x%x  off 0x%x  len 0x%x\n",
+			   module, cmd->offset, cmd->data_size);
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_nvm_read status %d aq %d\n",
+			   status, hw->aq.asq_last_status);
+		*perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_nvmupd_nvm_erase - Erase an NVM module
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @perrno: pointer to return error code
+ *
+ * module, offset, data_size and data are in cmd structure
+ **/
+static int i40e_nvmupd_nvm_erase(struct i40e_hw *hw,
+				 struct i40e_nvm_access *cmd,
+				 int *perrno)
+{
+	struct i40e_asq_cmd_details cmd_details;
+	u8 module, transaction;
+	int status = 0;
+	bool last;
+
+	transaction = i40e_nvmupd_get_transaction(cmd->config);
+	module = i40e_nvmupd_get_module(cmd->config);
+	last = (transaction & I40E_NVM_LCB);
+
+	memset(&cmd_details, 0, sizeof(cmd_details));
+	cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+	status = i40e_aq_erase_nvm(hw, module, cmd->offset, (u16)cmd->data_size,
+				   last, &cmd_details);
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_nvm_erase mod 0x%x  off 0x%x len 0x%x\n",
+			   module, cmd->offset, cmd->data_size);
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_nvm_erase status %d aq %d\n",
+			   status, hw->aq.asq_last_status);
+		*perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status);
+	}
+
+	return status;
+}
+
+/**
+ * i40e_nvmupd_nvm_write - Write NVM
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * module, offset, data_size and data are in cmd structure
+ **/
+static int i40e_nvmupd_nvm_write(struct i40e_hw *hw,
+				 struct i40e_nvm_access *cmd,
+				 u8 *bytes, int *perrno)
+{
+	struct i40e_asq_cmd_details cmd_details;
+	u8 module, transaction;
+	u8 preservation_flags;
+	int status = 0;
+	bool last;
+
+	transaction = i40e_nvmupd_get_transaction(cmd->config);
+	module = i40e_nvmupd_get_module(cmd->config);
+	last = (transaction & I40E_NVM_LCB);
+	preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config);
+
+	memset(&cmd_details, 0, sizeof(cmd_details));
+	cmd_details.wb_desc = &hw->nvm_wb_desc;
+
+	status = i40e_aq_update_nvm(hw, module, cmd->offset,
+				    (u16)cmd->data_size, bytes, last,
+				    preservation_flags, &cmd_details);
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_nvm_write mod 0x%x off 0x%x len 0x%x\n",
+			   module, cmd->offset, cmd->data_size);
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "i40e_nvmupd_nvm_write status %d aq %d\n",
+			   status, hw->aq.asq_last_status);
+		*perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status);
+	}
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
new file mode 100644
index 0000000000..2bd4de03da
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_OSDEP_H_
+#define _I40E_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/tcp.h>
+#include <linux/pci.h>
+#include <linux/highuid.h>
+
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+/* File to be the magic between shared code and
+ * actual OS primitives
+ */
+
+#define hw_dbg(hw, S, A...)							\
+do {										\
+	dev_dbg(&((struct i40e_pf *)hw->back)->pdev->dev, S, ##A);		\
+} while (0)
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+#define i40e_flush(a)		readl((a)->hw_addr + I40E_GLGEN_STAT)
+
+/* memory allocation tracking */
+struct i40e_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	u32 size;
+};
+
+#define i40e_allocate_dma_mem(h, m, unused, s, a) \
+			i40e_allocate_dma_mem_d(h, m, s, a)
+#define i40e_free_dma_mem(h, m) i40e_free_dma_mem_d(h, m)
+
+struct i40e_virt_mem {
+	void *va;
+	u32 size;
+};
+
+#define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s)
+#define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m)
+
+#define i40e_debug(h, m, s, ...)				\
+do {								\
+	if (((m) & (h)->debug_mask))				\
+		pr_info("i40e %02x:%02x.%x " s,			\
+			(h)->bus.bus_id, (h)->bus.device,	\
+			(h)->bus.func, ##__VA_ARGS__);		\
+} while (0)
+
+#endif /* _I40E_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
new file mode 100644
index 0000000000..3eeee224f1
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#ifndef _I40E_PROTOTYPE_H_
+#define _I40E_PROTOTYPE_H_
+
+#include "i40e_type.h"
+#include "i40e_alloc.h"
+#include <linux/avf/virtchnl.h>
+
+/* Prototypes for shared code functions that are not in
+ * the standard function pointer structures.  These are
+ * mostly because they are needed even before the init
+ * has happened and will assist in the early SW and FW
+ * setup.
+ */
+
+/* adminq functions */
+int i40e_init_adminq(struct i40e_hw *hw);
+void i40e_shutdown_adminq(struct i40e_hw *hw);
+int i40e_clean_arq_element(struct i40e_hw *hw,
+			   struct i40e_arq_event_info *e,
+			   u16 *events_pending);
+int
+i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+		      void *buff, /* can be NULL */ u16  buff_size,
+		      struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_asq_send_command_v2(struct i40e_hw *hw,
+			 struct i40e_aq_desc *desc,
+			 void *buff, /* can be NULL */
+			 u16  buff_size,
+			 struct i40e_asq_cmd_details *cmd_details,
+			 enum i40e_admin_queue_err *aq_status);
+int
+i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
+			     void *buff, /* can be NULL */ u16  buff_size,
+			     struct i40e_asq_cmd_details *cmd_details,
+			     bool is_atomic_context);
+int
+i40e_asq_send_command_atomic_v2(struct i40e_hw *hw,
+				struct i40e_aq_desc *desc,
+				void *buff, /* can be NULL */
+				u16  buff_size,
+				struct i40e_asq_cmd_details *cmd_details,
+				bool is_atomic_context,
+				enum i40e_admin_queue_err *aq_status);
+
+/* debug function for adminq */
+void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask,
+		   void *desc, void *buffer, u16 buf_len);
+
+bool i40e_check_asq_alive(struct i40e_hw *hw);
+int i40e_aq_queue_shutdown(struct i40e_hw *hw, bool unloading);
+const char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err);
+
+int i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 seid,
+			bool pf_lut, u8 *lut, u16 lut_size);
+int i40e_aq_set_rss_lut(struct i40e_hw *hw, u16 seid,
+			bool pf_lut, u8 *lut, u16 lut_size);
+int i40e_aq_get_rss_key(struct i40e_hw *hw,
+			u16 seid,
+			struct i40e_aqc_get_set_rss_key_data *key);
+int i40e_aq_set_rss_key(struct i40e_hw *hw,
+			u16 seid,
+			struct i40e_aqc_get_set_rss_key_data *key);
+
+u32 i40e_led_get(struct i40e_hw *hw);
+void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink);
+int i40e_led_set_phy(struct i40e_hw *hw, bool on,
+		     u16 led_addr, u32 mode);
+int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
+		     u16 *val);
+int i40e_blink_phy_link_led(struct i40e_hw *hw,
+			    u32 time, u32 interval);
+
+/* admin send queue commands */
+
+int i40e_aq_get_firmware_version(struct i40e_hw *hw,
+				 u16 *fw_major_version, u16 *fw_minor_version,
+				 u32 *fw_build,
+				 u16 *api_major_version, u16 *api_minor_version,
+				 struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_debug_write_register(struct i40e_hw *hw,
+				 u32 reg_addr, u64 reg_val,
+				 struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_debug_read_register(struct i40e_hw *hw,
+				u32  reg_addr, u64 *reg_val,
+				struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags,
+			  struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+			    struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+			      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
+				 bool qualified_modules, bool report_init,
+				 struct i40e_aq_get_phy_abilities_resp *abilities,
+				 struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_phy_config(struct i40e_hw *hw,
+			   struct i40e_aq_set_phy_config *config,
+			   struct i40e_asq_cmd_details *cmd_details);
+int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
+		bool atomic_reset);
+int i40e_aq_set_mac_loopback(struct i40e_hw *hw,
+			     bool ena_lpbk,
+			     struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask,
+			     struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_clear_pxe_mode(struct i40e_hw *hw,
+			   struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+				bool enable_link,
+				struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_get_link_info(struct i40e_hw *hw,
+			  bool enable_lse, struct i40e_link_status *link,
+			  struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_send_driver_version(struct i40e_hw *hw,
+				struct i40e_driver_version *dv,
+				struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_add_vsi(struct i40e_hw *hw,
+		    struct i40e_vsi_context *vsi_ctx,
+		    struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+			      u16 vsi_id, bool set_filter,
+			      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set,
+					struct i40e_asq_cmd_details *cmd_details,
+					bool rx_only_promisc);
+int i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set,
+					  struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw,
+				       u16 seid, bool enable,
+				       u16 vid,
+				       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
+				       u16 seid, bool enable,
+				       u16 vid,
+				       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+				       u16 seid, bool enable, u16 vid,
+				       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
+				 u16 seid, bool enable,
+				 struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_get_vsi_params(struct i40e_hw *hw,
+			   struct i40e_vsi_context *vsi_ctx,
+			   struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_update_vsi_params(struct i40e_hw *hw,
+			      struct i40e_vsi_context *vsi_ctx,
+			      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+		    u16 downlink_seid, u8 enabled_tc,
+		    bool default_port, u16 *pveb_seid,
+		    bool enable_stats,
+		    struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+			       u16 veb_seid, u16 *switch_id, bool *floating,
+			       u16 *statistic_index, u16 *vebs_used,
+			       u16 *vebs_free,
+			       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id,
+			struct i40e_aqc_add_macvlan_element_data *mv_list,
+			u16 count, struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_add_macvlan_v2(struct i40e_hw *hw, u16 seid,
+		       struct i40e_aqc_add_macvlan_element_data *mv_list,
+		       u16 count, struct i40e_asq_cmd_details *cmd_details,
+		       enum i40e_admin_queue_err *aq_status);
+int i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id,
+			   struct i40e_aqc_remove_macvlan_element_data *mv_list,
+			   u16 count, struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
+			  struct i40e_aqc_remove_macvlan_element_data *mv_list,
+			  u16 count, struct i40e_asq_cmd_details *cmd_details,
+			  enum i40e_admin_queue_err *aq_status);
+int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+			   u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list,
+			   struct i40e_asq_cmd_details *cmd_details,
+			   u16 *rule_id, u16 *rules_used, u16 *rules_free);
+int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
+			      u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list,
+			      struct i40e_asq_cmd_details *cmd_details,
+			      u16 *rules_used, u16 *rules_free);
+
+int i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+			   u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+			   struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_get_switch_config(struct i40e_hw *hw,
+			      struct i40e_aqc_get_switch_config_resp *buf,
+			      u16 buf_size, u16 *start_seid,
+			      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_switch_config(struct i40e_hw *hw,
+			      u16 flags,
+			      u16 valid_flags, u8 mode,
+			      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_request_resource(struct i40e_hw *hw,
+			     enum i40e_aq_resources_ids resource,
+			     enum i40e_aq_resource_access_type access,
+			     u8 sdp_number, u64 *timeout,
+			     struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_release_resource(struct i40e_hw *hw,
+			     enum i40e_aq_resources_ids resource,
+			     u8 sdp_number,
+			     struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+		     u32 offset, u16 length, void *data,
+		     bool last_command,
+		     struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_erase_nvm(struct i40e_hw *hw, u8 module_pointer,
+		      u32 offset, u16 length, bool last_command,
+		      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_discover_capabilities(struct i40e_hw *hw,
+				  void *buff, u16 buff_size, u16 *data_size,
+				  enum i40e_admin_queue_opc list_type_opc,
+				  struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
+		       u32 offset, u16 length, void *data,
+		       bool last_command, u8 preservation_flags,
+		       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_rearrange_nvm(struct i40e_hw *hw,
+			  u8 rearrange_nvm,
+			  struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+			 u8 mib_type, void *buff, u16 buff_size,
+			 u16 *local_len, u16 *remote_len,
+			 struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_set_lldp_mib(struct i40e_hw *hw,
+		     u8 mib_type, void *buff, u16 buff_size,
+		     struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+				      bool enable_update,
+				      struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
+		     struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+		      bool persist,
+		      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_set_dcb_parameters(struct i40e_hw *hw,
+			       bool dcb_enable,
+			       struct i40e_asq_cmd_details
+			       *cmd_details);
+int i40e_aq_start_lldp(struct i40e_hw *hw, bool persist,
+		       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
+			       void *buff, u16 buff_size,
+			       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_add_udp_tunnel(struct i40e_hw *hw,
+			   u16 udp_port, u8 protocol_index,
+			   u8 *filter_index,
+			   struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_del_udp_tunnel(struct i40e_hw *hw, u8 index,
+			   struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+			   struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_mac_address_write(struct i40e_hw *hw,
+			      u16 flags, u8 *mac_addr,
+			      struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_config_vsi_bw_limit(struct i40e_hw *hw,
+				u16 seid, u16 credit, u8 max_credit,
+				struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_dcb_updated(struct i40e_hw *hw,
+			struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
+			     struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_config_switch_comp_ets(struct i40e_hw *hw,
+			       u16 seid,
+			       struct i40e_aqc_configure_switching_comp_ets_data *ets_data,
+			       enum i40e_admin_queue_opc opcode,
+			       struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_config_switch_comp_bw_config(struct i40e_hw *hw,
+	u16 seid,
+	struct i40e_aqc_configure_switching_comp_bw_config_data *bw_data,
+	struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+				u16 seid,
+				struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+				struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+				 u16 seid,
+				 struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+				 struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+				     u16 seid,
+				     struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+				     struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+			      u16 seid,
+			      struct i40e_aqc_query_port_ets_config_resp *bw_data,
+			      struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+				    u16 seid,
+				    struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+				    struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_resume_port_tx(struct i40e_hw *hw,
+			   struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count);
+int
+i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 vsi,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count);
+int
+i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
+			  struct i40e_aqc_cloud_filters_element_data *filters,
+			  u8 filter_count);
+int
+i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+			     struct i40e_aqc_cloud_filters_element_bb *filters,
+			     u8 filter_count);
+int i40e_read_lldp_cfg(struct i40e_hw *hw,
+		       struct i40e_lldp_variables *lldp_cfg);
+int
+i40e_aq_suspend_port_tx(struct i40e_hw *hw, u16 seid,
+			struct i40e_asq_cmd_details *cmd_details);
+/* i40e_common */
+int i40e_init_shared_code(struct i40e_hw *hw);
+int i40e_pf_reset(struct i40e_hw *hw);
+void i40e_clear_hw(struct i40e_hw *hw);
+void i40e_clear_pxe_mode(struct i40e_hw *hw);
+int i40e_get_link_status(struct i40e_hw *hw, bool *link_up);
+int i40e_update_link_info(struct i40e_hw *hw);
+int i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
+int i40e_read_bw_from_alt_ram(struct i40e_hw *hw,
+			      u32 *max_bw, u32 *min_bw, bool *min_valid,
+			      bool *max_valid);
+int
+i40e_aq_configure_partition_bw(struct i40e_hw *hw,
+			       struct i40e_aqc_configure_partition_bw_data *bw_data,
+			       struct i40e_asq_cmd_details *cmd_details);
+int i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr);
+int i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num,
+			 u32 pba_num_size);
+void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable);
+/* prototype for functions used for NVM access */
+int i40e_init_nvm(struct i40e_hw *hw);
+int i40e_acquire_nvm(struct i40e_hw *hw,
+		     enum i40e_aq_resource_access_type access);
+void i40e_release_nvm(struct i40e_hw *hw);
+int i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+		       u16 *data);
+int i40e_read_nvm_module_data(struct i40e_hw *hw,
+			      u8 module_ptr,
+			      u16 module_offset,
+			      u16 data_offset,
+			      u16 words_data_size,
+			      u16 *data_ptr);
+int i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+			 u16 *words, u16 *data);
+int i40e_update_nvm_checksum(struct i40e_hw *hw);
+int i40e_validate_nvm_checksum(struct i40e_hw *hw,
+			       u16 *checksum);
+int i40e_nvmupd_command(struct i40e_hw *hw,
+			struct i40e_nvm_access *cmd,
+			u8 *bytes, int *errno);
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
+				  struct i40e_aq_desc *desc);
+void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw);
+void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
+
+int i40e_set_mac_type(struct i40e_hw *hw);
+
+extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+
+static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+	return i40e_ptype_lookup[ptype];
+}
+
+/**
+ * i40e_virtchnl_link_speed - Convert AdminQ link_speed to virtchnl definition
+ * @link_speed: the speed to convert
+ *
+ * Returns the link_speed in terms of the virtchnl interface, for use in
+ * converting link_speed as reported by the AdminQ into the format used for
+ * talking to virtchnl devices. If we can't represent the link speed properly,
+ * report LINK_SPEED_UNKNOWN.
+ **/
+static inline enum virtchnl_link_speed
+i40e_virtchnl_link_speed(enum i40e_aq_link_speed link_speed)
+{
+	switch (link_speed) {
+	case I40E_LINK_SPEED_100MB:
+		return VIRTCHNL_LINK_SPEED_100MB;
+	case I40E_LINK_SPEED_1GB:
+		return VIRTCHNL_LINK_SPEED_1GB;
+	case I40E_LINK_SPEED_2_5GB:
+		return VIRTCHNL_LINK_SPEED_2_5GB;
+	case I40E_LINK_SPEED_5GB:
+		return VIRTCHNL_LINK_SPEED_5GB;
+	case I40E_LINK_SPEED_10GB:
+		return VIRTCHNL_LINK_SPEED_10GB;
+	case I40E_LINK_SPEED_40GB:
+		return VIRTCHNL_LINK_SPEED_40GB;
+	case I40E_LINK_SPEED_20GB:
+		return VIRTCHNL_LINK_SPEED_20GB;
+	case I40E_LINK_SPEED_25GB:
+		return VIRTCHNL_LINK_SPEED_25GB;
+	case I40E_LINK_SPEED_UNKNOWN:
+	default:
+		return VIRTCHNL_LINK_SPEED_UNKNOWN;
+	}
+}
+
+/* prototype for functions used for SW locks */
+
+/* i40e_common for VF drivers*/
+int i40e_set_filter_control(struct i40e_hw *hw,
+			    struct i40e_filter_control_settings *settings);
+int i40e_aq_add_rem_control_packet_filter(struct i40e_hw *hw,
+					  u8 *mac_addr, u16 ethtype, u16 flags,
+					  u16 vsi_seid, u16 queue, bool is_add,
+					  struct i40e_control_filter_stats *stats,
+					  struct i40e_asq_cmd_details *cmd_details);
+int i40e_aq_debug_dump(struct i40e_hw *hw, u8 cluster_id,
+		       u8 table_id, u32 start_index, u16 buff_size,
+		       void *buff, u16 *ret_buff_size,
+		       u8 *ret_next_table, u32 *ret_next_index,
+		       struct i40e_asq_cmd_details *cmd_details);
+void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
+						    u16 vsi_seid);
+int i40e_aq_rx_ctl_read_register(struct i40e_hw *hw,
+				 u32 reg_addr, u32 *reg_val,
+				 struct i40e_asq_cmd_details *cmd_details);
+u32 i40e_read_rx_ctl(struct i40e_hw *hw, u32 reg_addr);
+int i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
+				  u32 reg_addr, u32 reg_val,
+				  struct i40e_asq_cmd_details *cmd_details);
+void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
+int
+i40e_aq_set_phy_register_ext(struct i40e_hw *hw,
+			     u8 phy_select, u8 dev_addr, bool page_change,
+			     bool set_mdio, u8 mdio_num,
+			     u32 reg_addr, u32 reg_val,
+			     struct i40e_asq_cmd_details *cmd_details);
+int
+i40e_aq_get_phy_register_ext(struct i40e_hw *hw,
+			     u8 phy_select, u8 dev_addr, bool page_change,
+			     bool set_mdio, u8 mdio_num,
+			     u32 reg_addr, u32 *reg_val,
+			     struct i40e_asq_cmd_details *cmd_details);
+
+/* Convenience wrappers for most common use case */
+#define i40e_aq_set_phy_register(hw, ps, da, pc, ra, rv, cd)		\
+	i40e_aq_set_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd)
+#define i40e_aq_get_phy_register(hw, ps, da, pc, ra, rv, cd)		\
+	i40e_aq_get_phy_register_ext(hw, ps, da, pc, false, 0, ra, rv, cd)
+
+int i40e_read_phy_register_clause22(struct i40e_hw *hw,
+				    u16 reg, u8 phy_addr, u16 *value);
+int i40e_write_phy_register_clause22(struct i40e_hw *hw,
+				     u16 reg, u8 phy_addr, u16 value);
+int i40e_read_phy_register_clause45(struct i40e_hw *hw,
+				    u8 page, u16 reg, u8 phy_addr, u16 *value);
+int i40e_write_phy_register_clause45(struct i40e_hw *hw,
+				     u8 page, u16 reg, u8 phy_addr, u16 value);
+int i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+			   u8 phy_addr, u16 *value);
+int i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+			    u8 phy_addr, u16 value);
+u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num);
+int i40e_blink_phy_link_led(struct i40e_hw *hw,
+			    u32 time, u32 interval);
+int i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
+		      u16 buff_size, u32 track_id,
+		      u32 *error_offset, u32 *error_info,
+		      struct i40e_asq_cmd_details *
+		      cmd_details);
+int i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
+			 u16 buff_size, u8 flags,
+			 struct i40e_asq_cmd_details *
+			 cmd_details);
+struct i40e_generic_seg_header *
+i40e_find_segment_in_package(u32 segment_type,
+			     struct i40e_package_header *pkg_header);
+struct i40e_profile_section_header *
+i40e_find_section_in_profile(u32 section_type,
+			     struct i40e_profile_segment *profile);
+int
+i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
+		   u32 track_id);
+int
+i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
+		      u32 track_id);
+int
+i40e_add_pinfo_to_list(struct i40e_hw *hw,
+		       struct i40e_profile_segment *profile,
+		       u8 *profile_info_sec, u32 track_id);
+#endif /* _I40E_PROTOTYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
new file mode 100644
index 0000000000..8a26811140
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -0,0 +1,1576 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e.h"
+#include <linux/ptp_classify.h>
+#include <linux/posix-clock.h>
+
+/* The XL710 timesync is very much like Intel's 82599 design when it comes to
+ * the fundamental clock design. However, the clock operations are much simpler
+ * in the XL710 because the device supports a full 64 bits of nanoseconds.
+ * Because the field is so wide, we can forgo the cycle counter and just
+ * operate with the nanosecond field directly without fear of overflow.
+ *
+ * Much like the 82599, the update period is dependent upon the link speed:
+ * At 40Gb, 25Gb, or no link, the period is 1.6ns.
+ * At 10Gb or 5Gb link, the period is multiplied by 2. (3.2ns)
+ * At 1Gb link, the period is multiplied by 20. (32ns)
+ * 1588 functionality is not supported at 100Mbps.
+ */
+#define I40E_PTP_40GB_INCVAL		0x0199999999ULL
+#define I40E_PTP_10GB_INCVAL_MULT	2
+#define I40E_PTP_5GB_INCVAL_MULT	2
+#define I40E_PTP_1GB_INCVAL_MULT	20
+#define I40E_ISGN			0x80000000
+
+#define I40E_PRTTSYN_CTL1_TSYNTYPE_V1  BIT(I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
+#define I40E_PRTTSYN_CTL1_TSYNTYPE_V2  (2 << \
+					I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
+#define I40E_SUBDEV_ID_25G_PTP_PIN	0xB
+
+enum i40e_ptp_pin {
+	SDP3_2 = 0,
+	SDP3_3,
+	GPIO_4
+};
+
+enum i40e_can_set_pins_t {
+	CANT_DO_PINS = -1,
+	CAN_SET_PINS,
+	CAN_DO_PINS
+};
+
+static struct ptp_pin_desc sdp_desc[] = {
+	/* name     idx      func      chan */
+	{"SDP3_2", SDP3_2, PTP_PF_NONE, 0},
+	{"SDP3_3", SDP3_3, PTP_PF_NONE, 1},
+	{"GPIO_4", GPIO_4, PTP_PF_NONE, 1},
+};
+
+enum i40e_ptp_gpio_pin_state {
+	end = -2,
+	invalid,
+	off,
+	in_A,
+	in_B,
+	out_A,
+	out_B,
+};
+
+static const char * const i40e_ptp_gpio_pin_state2str[] = {
+	"off", "in_A", "in_B", "out_A", "out_B"
+};
+
+enum i40e_ptp_led_pin_state {
+	led_end = -2,
+	low = 0,
+	high,
+};
+
+struct i40e_ptp_pins_settings {
+	enum i40e_ptp_gpio_pin_state sdp3_2;
+	enum i40e_ptp_gpio_pin_state sdp3_3;
+	enum i40e_ptp_gpio_pin_state gpio_4;
+	enum i40e_ptp_led_pin_state led2_0;
+	enum i40e_ptp_led_pin_state led2_1;
+	enum i40e_ptp_led_pin_state led3_0;
+	enum i40e_ptp_led_pin_state led3_1;
+};
+
+static const struct i40e_ptp_pins_settings
+	i40e_ptp_pin_led_allowed_states[] = {
+	{off,	off,	off,		high,	high,	high,	high},
+	{off,	in_A,	off,		high,	high,	high,	low},
+	{off,	out_A,	off,		high,	low,	high,	high},
+	{off,	in_B,	off,		high,	high,	high,	low},
+	{off,	out_B,	off,		high,	low,	high,	high},
+	{in_A,	off,	off,		high,	high,	high,	low},
+	{in_A,	in_B,	off,		high,	high,	high,	low},
+	{in_A,	out_B,	off,		high,	low,	high,	high},
+	{out_A,	off,	off,		high,	low,	high,	high},
+	{out_A,	in_B,	off,		high,	low,	high,	high},
+	{in_B,	off,	off,		high,	high,	high,	low},
+	{in_B,	in_A,	off,		high,	high,	high,	low},
+	{in_B,	out_A,	off,		high,	low,	high,	high},
+	{out_B,	off,	off,		high,	low,	high,	high},
+	{out_B,	in_A,	off,		high,	low,	high,	high},
+	{off,	off,	in_A,		high,	high,	low,	high},
+	{off,	out_A,	in_A,		high,	low,	low,	high},
+	{off,	in_B,	in_A,		high,	high,	low,	low},
+	{off,	out_B,	in_A,		high,	low,	low,	high},
+	{out_A,	off,	in_A,		high,	low,	low,	high},
+	{out_A,	in_B,	in_A,		high,	low,	low,	high},
+	{in_B,	off,	in_A,		high,	high,	low,	low},
+	{in_B,	out_A,	in_A,		high,	low,	low,	high},
+	{out_B,	off,	in_A,		high,	low,	low,	high},
+	{off,	off,	out_A,		low,	high,	high,	high},
+	{off,	in_A,	out_A,		low,	high,	high,	low},
+	{off,	in_B,	out_A,		low,	high,	high,	low},
+	{off,	out_B,	out_A,		low,	low,	high,	high},
+	{in_A,	off,	out_A,		low,	high,	high,	low},
+	{in_A,	in_B,	out_A,		low,	high,	high,	low},
+	{in_A,	out_B,	out_A,		low,	low,	high,	high},
+	{in_B,	off,	out_A,		low,	high,	high,	low},
+	{in_B,	in_A,	out_A,		low,	high,	high,	low},
+	{out_B,	off,	out_A,		low,	low,	high,	high},
+	{out_B,	in_A,	out_A,		low,	low,	high,	high},
+	{off,	off,	in_B,		high,	high,	low,	high},
+	{off,	in_A,	in_B,		high,	high,	low,	low},
+	{off,	out_A,	in_B,		high,	low,	low,	high},
+	{off,	out_B,	in_B,		high,	low,	low,	high},
+	{in_A,	off,	in_B,		high,	high,	low,	low},
+	{in_A,	out_B,	in_B,		high,	low,	low,	high},
+	{out_A,	off,	in_B,		high,	low,	low,	high},
+	{out_B,	off,	in_B,		high,	low,	low,	high},
+	{out_B,	in_A,	in_B,		high,	low,	low,	high},
+	{off,	off,	out_B,		low,	high,	high,	high},
+	{off,	in_A,	out_B,		low,	high,	high,	low},
+	{off,	out_A,	out_B,		low,	low,	high,	high},
+	{off,	in_B,	out_B,		low,	high,	high,	low},
+	{in_A,	off,	out_B,		low,	high,	high,	low},
+	{in_A,	in_B,	out_B,		low,	high,	high,	low},
+	{out_A,	off,	out_B,		low,	low,	high,	high},
+	{out_A,	in_B,	out_B,		low,	low,	high,	high},
+	{in_B,	off,	out_B,		low,	high,	high,	low},
+	{in_B,	in_A,	out_B,		low,	high,	high,	low},
+	{in_B,	out_A,	out_B,		low,	low,	high,	high},
+	{end,	end,	end,	led_end, led_end, led_end, led_end}
+};
+
+static int i40e_ptp_set_pins(struct i40e_pf *pf,
+			     struct i40e_ptp_pins_settings *pins);
+
+/**
+ * i40e_ptp_extts0_work - workqueue task function
+ * @work: workqueue task structure
+ *
+ * Service for PTP external clock event
+ **/
+static void i40e_ptp_extts0_work(struct work_struct *work)
+{
+	struct i40e_pf *pf = container_of(work, struct i40e_pf,
+					  ptp_extts0_work);
+	struct i40e_hw *hw = &pf->hw;
+	struct ptp_clock_event event;
+	u32 hi, lo;
+
+	/* Event time is captured by one of the two matched registers
+	 *      PRTTSYN_EVNT_L: 32 LSB of sampled time event
+	 *      PRTTSYN_EVNT_H: 32 MSB of sampled time event
+	 * Event is defined in PRTTSYN_EVNT_0 register
+	 */
+	lo = rd32(hw, I40E_PRTTSYN_EVNT_L(0));
+	hi = rd32(hw, I40E_PRTTSYN_EVNT_H(0));
+
+	event.timestamp = (((u64)hi) << 32) | lo;
+
+	event.type = PTP_CLOCK_EXTTS;
+	event.index = hw->pf_id;
+
+	/* fire event */
+	ptp_clock_event(pf->ptp_clock, &event);
+}
+
+/**
+ * i40e_is_ptp_pin_dev - check if device supports PTP pins
+ * @hw: pointer to the hardware structure
+ *
+ * Return true if device supports PTP pins, false otherwise.
+ **/
+static bool i40e_is_ptp_pin_dev(struct i40e_hw *hw)
+{
+	return hw->device_id == I40E_DEV_ID_25G_SFP28 &&
+	       hw->subsystem_device_id == I40E_SUBDEV_ID_25G_PTP_PIN;
+}
+
+/**
+ * i40e_can_set_pins - check possibility of manipulating the pins
+ * @pf: board private structure
+ *
+ * Check if all conditions are satisfied to manipulate PTP pins.
+ * Return CAN_SET_PINS if pins can be set on a specific PF or
+ * return CAN_DO_PINS if pins can be manipulated within a NIC or
+ * return CANT_DO_PINS otherwise.
+ **/
+static enum i40e_can_set_pins_t i40e_can_set_pins(struct i40e_pf *pf)
+{
+	if (!i40e_is_ptp_pin_dev(&pf->hw)) {
+		dev_warn(&pf->pdev->dev,
+			 "PTP external clock not supported.\n");
+		return CANT_DO_PINS;
+	}
+
+	if (!pf->ptp_pins) {
+		dev_warn(&pf->pdev->dev,
+			 "PTP PIN manipulation not allowed.\n");
+		return CANT_DO_PINS;
+	}
+
+	if (pf->hw.pf_id) {
+		dev_warn(&pf->pdev->dev,
+			 "PTP PINs should be accessed via PF0.\n");
+		return CAN_DO_PINS;
+	}
+
+	return CAN_SET_PINS;
+}
+
+/**
+ * i40_ptp_reset_timing_events - Reset PTP timing events
+ * @pf: Board private structure
+ *
+ * This function resets timing events for pf.
+ **/
+static void i40_ptp_reset_timing_events(struct i40e_pf *pf)
+{
+	u32 i;
+
+	spin_lock_bh(&pf->ptp_rx_lock);
+	for (i = 0; i <= I40E_PRTTSYN_RXTIME_L_MAX_INDEX; i++) {
+		/* reading and automatically clearing timing events registers */
+		rd32(&pf->hw, I40E_PRTTSYN_RXTIME_L(i));
+		rd32(&pf->hw, I40E_PRTTSYN_RXTIME_H(i));
+		pf->latch_events[i] = 0;
+	}
+	/* reading and automatically clearing timing events registers */
+	rd32(&pf->hw, I40E_PRTTSYN_TXTIME_L);
+	rd32(&pf->hw, I40E_PRTTSYN_TXTIME_H);
+
+	pf->tx_hwtstamp_timeouts = 0;
+	pf->tx_hwtstamp_skipped = 0;
+	pf->rx_hwtstamp_cleared = 0;
+	pf->latch_event_flags = 0;
+	spin_unlock_bh(&pf->ptp_rx_lock);
+}
+
+/**
+ * i40e_ptp_verify - check pins
+ * @ptp: ptp clock
+ * @pin: pin index
+ * @func: assigned function
+ * @chan: channel
+ *
+ * Check pins consistency.
+ * Return 0 on success or error on failure.
+ **/
+static int i40e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+			   enum ptp_pin_function func, unsigned int chan)
+{
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_EXTTS:
+	case PTP_PF_PEROUT:
+		break;
+	case PTP_PF_PHYSYNC:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/**
+ * i40e_ptp_read - Read the PHC time from the device
+ * @pf: Board private structure
+ * @ts: timespec structure to hold the current time value
+ * @sts: structure to hold the system time before and after reading the PHC
+ *
+ * This function reads the PRTTSYN_TIME registers and stores them in a
+ * timespec. However, since the registers are 64 bits of nanoseconds, we must
+ * convert the result to a timespec before we can return.
+ **/
+static void i40e_ptp_read(struct i40e_pf *pf, struct timespec64 *ts,
+			  struct ptp_system_timestamp *sts)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 hi, lo;
+	u64 ns;
+
+	/* The timer latches on the lowest register read. */
+	ptp_read_system_prets(sts);
+	lo = rd32(hw, I40E_PRTTSYN_TIME_L);
+	ptp_read_system_postts(sts);
+	hi = rd32(hw, I40E_PRTTSYN_TIME_H);
+
+	ns = (((u64)hi) << 32) | lo;
+
+	*ts = ns_to_timespec64(ns);
+}
+
+/**
+ * i40e_ptp_write - Write the PHC time to the device
+ * @pf: Board private structure
+ * @ts: timespec structure that holds the new time value
+ *
+ * This function writes the PRTTSYN_TIME registers with the user value. Since
+ * we receive a timespec from the stack, we must convert that timespec into
+ * nanoseconds before programming the registers.
+ **/
+static void i40e_ptp_write(struct i40e_pf *pf, const struct timespec64 *ts)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u64 ns = timespec64_to_ns(ts);
+
+	/* The timer will not update until the high register is written, so
+	 * write the low register first.
+	 */
+	wr32(hw, I40E_PRTTSYN_TIME_L, ns & 0xFFFFFFFF);
+	wr32(hw, I40E_PRTTSYN_TIME_H, ns >> 32);
+}
+
+/**
+ * i40e_ptp_convert_to_hwtstamp - Convert device clock to system time
+ * @hwtstamps: Timestamp structure to update
+ * @timestamp: Timestamp from the hardware
+ *
+ * We need to convert the NIC clock value into a hwtstamp which can be used by
+ * the upper level timestamping functions. Since the timestamp is simply a 64-
+ * bit nanosecond value, we can call ns_to_ktime directly to handle this.
+ **/
+static void i40e_ptp_convert_to_hwtstamp(struct skb_shared_hwtstamps *hwtstamps,
+					 u64 timestamp)
+{
+	memset(hwtstamps, 0, sizeof(*hwtstamps));
+
+	hwtstamps->hwtstamp = ns_to_ktime(timestamp);
+}
+
+/**
+ * i40e_ptp_adjfine - Adjust the PHC frequency
+ * @ptp: The PTP clock structure
+ * @scaled_ppm: Scaled parts per million adjustment from base
+ *
+ * Adjust the frequency of the PHC by the indicated delta from the base
+ * frequency.
+ *
+ * Scaled parts per million is ppm with a 16 bit binary fractional field.
+ **/
+static int i40e_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+	struct i40e_hw *hw = &pf->hw;
+	u64 adj, base_adj;
+
+	smp_mb(); /* Force any pending update before accessing. */
+	base_adj = I40E_PTP_40GB_INCVAL * READ_ONCE(pf->ptp_adj_mult);
+
+	adj = adjust_by_scaled_ppm(base_adj, scaled_ppm);
+
+	wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF);
+	wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32);
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_set_1pps_signal_hw - configure 1PPS PTP signal for pins
+ * @pf: the PF private data structure
+ *
+ * Configure 1PPS signal used for PTP pins
+ **/
+static void i40e_ptp_set_1pps_signal_hw(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	struct timespec64 now;
+	u64 ns;
+
+	wr32(hw, I40E_PRTTSYN_AUX_0(1), 0);
+	wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT);
+	wr32(hw, I40E_PRTTSYN_AUX_0(1), I40E_PRTTSYN_AUX_0_OUT_ENABLE);
+
+	i40e_ptp_read(pf, &now, NULL);
+	now.tv_sec += I40E_PTP_2_SEC_DELAY;
+	now.tv_nsec = 0;
+	ns = timespec64_to_ns(&now);
+
+	/* I40E_PRTTSYN_TGT_L(1) */
+	wr32(hw, I40E_PRTTSYN_TGT_L(1), ns & 0xFFFFFFFF);
+	/* I40E_PRTTSYN_TGT_H(1) */
+	wr32(hw, I40E_PRTTSYN_TGT_H(1), ns >> 32);
+	wr32(hw, I40E_PRTTSYN_CLKO(1), I40E_PTP_HALF_SECOND);
+	wr32(hw, I40E_PRTTSYN_AUX_1(1), I40E_PRTTSYN_AUX_1_INSTNT);
+	wr32(hw, I40E_PRTTSYN_AUX_0(1),
+	     I40E_PRTTSYN_AUX_0_OUT_ENABLE_CLK_MOD);
+}
+
+/**
+ * i40e_ptp_adjtime - Adjust the PHC time
+ * @ptp: The PTP clock structure
+ * @delta: Offset in nanoseconds to adjust the PHC time by
+ *
+ * Adjust the current clock time by a delta specified in nanoseconds.
+ **/
+static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+	struct i40e_hw *hw = &pf->hw;
+
+	mutex_lock(&pf->tmreg_lock);
+
+	if (delta > -999999900LL && delta < 999999900LL) {
+		int neg_adj = 0;
+		u32 timadj;
+		u64 tohw;
+
+		if (delta < 0) {
+			neg_adj = 1;
+			tohw = -delta;
+		} else {
+			tohw = delta;
+		}
+
+		timadj = tohw & 0x3FFFFFFF;
+		if (neg_adj)
+			timadj |= I40E_ISGN;
+		wr32(hw, I40E_PRTTSYN_ADJ, timadj);
+	} else {
+		struct timespec64 then, now;
+
+		then = ns_to_timespec64(delta);
+		i40e_ptp_read(pf, &now, NULL);
+		now = timespec64_add(now, then);
+		i40e_ptp_write(pf, (const struct timespec64 *)&now);
+		i40e_ptp_set_1pps_signal_hw(pf);
+	}
+
+	mutex_unlock(&pf->tmreg_lock);
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_gettimex - Get the time of the PHC
+ * @ptp: The PTP clock structure
+ * @ts: timespec structure to hold the current time value
+ * @sts: structure to hold the system time before and after reading the PHC
+ *
+ * Read the device clock and return the correct value on ns, after converting it
+ * into a timespec struct.
+ **/
+static int i40e_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
+			     struct ptp_system_timestamp *sts)
+{
+	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+	mutex_lock(&pf->tmreg_lock);
+	i40e_ptp_read(pf, ts, sts);
+	mutex_unlock(&pf->tmreg_lock);
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_settime - Set the time of the PHC
+ * @ptp: The PTP clock structure
+ * @ts: timespec64 structure that holds the new time value
+ *
+ * Set the device clock to the user input value. The conversion from timespec
+ * to ns happens in the write function.
+ **/
+static int i40e_ptp_settime(struct ptp_clock_info *ptp,
+			    const struct timespec64 *ts)
+{
+	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+	mutex_lock(&pf->tmreg_lock);
+	i40e_ptp_write(pf, ts);
+	mutex_unlock(&pf->tmreg_lock);
+
+	return 0;
+}
+
+/**
+ * i40e_pps_configure - configure PPS events
+ * @ptp: ptp clock
+ * @rq: clock request
+ * @on: status
+ *
+ * Configure PPS events for external clock source.
+ * Return 0 on success or error on failure.
+ **/
+static int i40e_pps_configure(struct ptp_clock_info *ptp,
+			      struct ptp_clock_request *rq,
+			      int on)
+{
+	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+	if (!!on)
+		i40e_ptp_set_1pps_signal_hw(pf);
+
+	return 0;
+}
+
+/**
+ * i40e_pin_state - determine PIN state
+ * @index: PIN index
+ * @func: function assigned to PIN
+ *
+ * Determine PIN state based on PIN index and function assigned.
+ * Return PIN state.
+ **/
+static enum i40e_ptp_gpio_pin_state i40e_pin_state(int index, int func)
+{
+	enum i40e_ptp_gpio_pin_state state = off;
+
+	if (index == 0 && func == PTP_PF_EXTTS)
+		state = in_A;
+	if (index == 1 && func == PTP_PF_EXTTS)
+		state = in_B;
+	if (index == 0 && func == PTP_PF_PEROUT)
+		state = out_A;
+	if (index == 1 && func == PTP_PF_PEROUT)
+		state = out_B;
+
+	return state;
+}
+
+/**
+ * i40e_ptp_enable_pin - enable PINs.
+ * @pf: private board structure
+ * @chan: channel
+ * @func: PIN function
+ * @on: state
+ *
+ * Enable PTP pins for external clock source.
+ * Return 0 on success or error code on failure.
+ **/
+static int i40e_ptp_enable_pin(struct i40e_pf *pf, unsigned int chan,
+			       enum ptp_pin_function func, int on)
+{
+	enum i40e_ptp_gpio_pin_state *pin = NULL;
+	struct i40e_ptp_pins_settings pins;
+	int pin_index;
+
+	/* Use PF0 to set pins. Return success for user space tools */
+	if (pf->hw.pf_id)
+		return 0;
+
+	/* Preserve previous state of pins that we don't touch */
+	pins.sdp3_2 = pf->ptp_pins->sdp3_2;
+	pins.sdp3_3 = pf->ptp_pins->sdp3_3;
+	pins.gpio_4 = pf->ptp_pins->gpio_4;
+
+	/* To turn on the pin - find the corresponding one based on
+	 * the given index. To to turn the function off - find
+	 * which pin had it assigned. Don't use ptp_find_pin here
+	 * because it tries to lock the pincfg_mux which is locked by
+	 * ptp_pin_store() that calls here.
+	 */
+	if (on) {
+		pin_index = ptp_find_pin(pf->ptp_clock, func, chan);
+		if (pin_index < 0)
+			return -EBUSY;
+
+		switch (pin_index) {
+		case SDP3_2:
+			pin = &pins.sdp3_2;
+			break;
+		case SDP3_3:
+			pin = &pins.sdp3_3;
+			break;
+		case GPIO_4:
+			pin = &pins.gpio_4;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		*pin = i40e_pin_state(chan, func);
+	} else {
+		pins.sdp3_2 = off;
+		pins.sdp3_3 = off;
+		pins.gpio_4 = off;
+	}
+
+	return i40e_ptp_set_pins(pf, &pins) ? -EINVAL : 0;
+}
+
+/**
+ * i40e_ptp_feature_enable - Enable external clock pins
+ * @ptp: The PTP clock structure
+ * @rq: The PTP clock request structure
+ * @on: To turn feature on/off
+ *
+ * Setting on/off PTP PPS feature for pin.
+ **/
+static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp,
+				   struct ptp_clock_request *rq,
+				   int on)
+{
+	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
+
+	enum ptp_pin_function func;
+	unsigned int chan;
+
+	/* TODO: Implement flags handling for EXTTS and PEROUT */
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		func = PTP_PF_EXTTS;
+		chan = rq->extts.index;
+		break;
+	case PTP_CLK_REQ_PEROUT:
+		func = PTP_PF_PEROUT;
+		chan = rq->perout.index;
+		break;
+	case PTP_CLK_REQ_PPS:
+		return i40e_pps_configure(ptp, rq, on);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return i40e_ptp_enable_pin(pf, chan, func, on);
+}
+
+/**
+ * i40e_ptp_get_rx_events - Read I40E_PRTTSYN_STAT_1 and latch events
+ * @pf: the PF data structure
+ *
+ * This function reads I40E_PRTTSYN_STAT_1 and updates the corresponding timers
+ * for noticed latch events. This allows the driver to keep track of the first
+ * time a latch event was noticed which will be used to help clear out Rx
+ * timestamps for packets that got dropped or lost.
+ *
+ * This function will return the current value of I40E_PRTTSYN_STAT_1 and is
+ * expected to be called only while under the ptp_rx_lock.
+ **/
+static u32 i40e_ptp_get_rx_events(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 prttsyn_stat, new_latch_events;
+	int  i;
+
+	prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1);
+	new_latch_events = prttsyn_stat & ~pf->latch_event_flags;
+
+	/* Update the jiffies time for any newly latched timestamp. This
+	 * ensures that we store the time that we first discovered a timestamp
+	 * was latched by the hardware. The service task will later determine
+	 * if we should free the latch and drop that timestamp should too much
+	 * time pass. This flow ensures that we only update jiffies for new
+	 * events latched since the last time we checked, and not all events
+	 * currently latched, so that the service task accounting remains
+	 * accurate.
+	 */
+	for (i = 0; i < 4; i++) {
+		if (new_latch_events & BIT(i))
+			pf->latch_events[i] = jiffies;
+	}
+
+	/* Finally, we store the current status of the Rx timestamp latches */
+	pf->latch_event_flags = prttsyn_stat;
+
+	return prttsyn_stat;
+}
+
+/**
+ * i40e_ptp_rx_hang - Detect error case when Rx timestamp registers are hung
+ * @pf: The PF private data structure
+ *
+ * This watchdog task is scheduled to detect error case where hardware has
+ * dropped an Rx packet that was timestamped when the ring is full. The
+ * particular error is rare but leaves the device in a state unable to timestamp
+ * any future packets.
+ **/
+void i40e_ptp_rx_hang(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	unsigned int i, cleared = 0;
+
+	/* Since we cannot turn off the Rx timestamp logic if the device is
+	 * configured for Tx timestamping, we check if Rx timestamping is
+	 * configured. We don't want to spuriously warn about Rx timestamp
+	 * hangs if we don't care about the timestamps.
+	 */
+	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
+		return;
+
+	spin_lock_bh(&pf->ptp_rx_lock);
+
+	/* Update current latch times for Rx events */
+	i40e_ptp_get_rx_events(pf);
+
+	/* Check all the currently latched Rx events and see whether they have
+	 * been latched for over a second. It is assumed that any timestamp
+	 * should have been cleared within this time, or else it was captured
+	 * for a dropped frame that the driver never received. Thus, we will
+	 * clear any timestamp that has been latched for over 1 second.
+	 */
+	for (i = 0; i < 4; i++) {
+		if ((pf->latch_event_flags & BIT(i)) &&
+		    time_is_before_jiffies(pf->latch_events[i] + HZ)) {
+			rd32(hw, I40E_PRTTSYN_RXTIME_H(i));
+			pf->latch_event_flags &= ~BIT(i);
+			cleared++;
+		}
+	}
+
+	spin_unlock_bh(&pf->ptp_rx_lock);
+
+	/* Log a warning if more than 2 timestamps got dropped in the same
+	 * check. We don't want to warn about all drops because it can occur
+	 * in normal scenarios such as PTP frames on multicast addresses we
+	 * aren't listening to. However, administrator should know if this is
+	 * the reason packets aren't receiving timestamps.
+	 */
+	if (cleared > 2)
+		dev_dbg(&pf->pdev->dev,
+			"Dropped %d missed RXTIME timestamp events\n",
+			cleared);
+
+	/* Finally, update the rx_hwtstamp_cleared counter */
+	pf->rx_hwtstamp_cleared += cleared;
+}
+
+/**
+ * i40e_ptp_tx_hang - Detect error case when Tx timestamp register is hung
+ * @pf: The PF private data structure
+ *
+ * This watchdog task is run periodically to make sure that we clear the Tx
+ * timestamp logic if we don't obtain a timestamp in a reasonable amount of
+ * time. It is unexpected in the normal case but if it occurs it results in
+ * permanently preventing timestamps of future packets.
+ **/
+void i40e_ptp_tx_hang(struct i40e_pf *pf)
+{
+	struct sk_buff *skb;
+
+	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
+		return;
+
+	/* Nothing to do if we're not already waiting for a timestamp */
+	if (!test_bit(__I40E_PTP_TX_IN_PROGRESS, pf->state))
+		return;
+
+	/* We already have a handler routine which is run when we are notified
+	 * of a Tx timestamp in the hardware. If we don't get an interrupt
+	 * within a second it is reasonable to assume that we never will.
+	 */
+	if (time_is_before_jiffies(pf->ptp_tx_start + HZ)) {
+		skb = pf->ptp_tx_skb;
+		pf->ptp_tx_skb = NULL;
+		clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state);
+
+		/* Free the skb after we clear the bitlock */
+		dev_kfree_skb_any(skb);
+		pf->tx_hwtstamp_timeouts++;
+	}
+}
+
+/**
+ * i40e_ptp_tx_hwtstamp - Utility function which returns the Tx timestamp
+ * @pf: Board private structure
+ *
+ * Read the value of the Tx timestamp from the registers, convert it into a
+ * value consumable by the stack, and store that result into the shhwtstamps
+ * struct before returning it up the stack.
+ **/
+void i40e_ptp_tx_hwtstamp(struct i40e_pf *pf)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct sk_buff *skb = pf->ptp_tx_skb;
+	struct i40e_hw *hw = &pf->hw;
+	u32 hi, lo;
+	u64 ns;
+
+	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_tx)
+		return;
+
+	/* don't attempt to timestamp if we don't have an skb */
+	if (!pf->ptp_tx_skb)
+		return;
+
+	lo = rd32(hw, I40E_PRTTSYN_TXTIME_L);
+	hi = rd32(hw, I40E_PRTTSYN_TXTIME_H);
+
+	ns = (((u64)hi) << 32) | lo;
+	i40e_ptp_convert_to_hwtstamp(&shhwtstamps, ns);
+
+	/* Clear the bit lock as soon as possible after reading the register,
+	 * and prior to notifying the stack via skb_tstamp_tx(). Otherwise
+	 * applications might wake up and attempt to request another transmit
+	 * timestamp prior to the bit lock being cleared.
+	 */
+	pf->ptp_tx_skb = NULL;
+	clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state);
+
+	/* Notify the stack and free the skb after we've unlocked */
+	skb_tstamp_tx(skb, &shhwtstamps);
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * i40e_ptp_rx_hwtstamp - Utility function which checks for an Rx timestamp
+ * @pf: Board private structure
+ * @skb: Particular skb to send timestamp with
+ * @index: Index into the receive timestamp registers for the timestamp
+ *
+ * The XL710 receives a notification in the receive descriptor with an offset
+ * into the set of RXTIME registers where the timestamp is for that skb. This
+ * function goes and fetches the receive timestamp from that offset, if a valid
+ * one exists. The RXTIME registers are in ns, so we must convert the result
+ * first.
+ **/
+void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index)
+{
+	u32 prttsyn_stat, hi, lo;
+	struct i40e_hw *hw;
+	u64 ns;
+
+	/* Since we cannot turn off the Rx timestamp logic if the device is
+	 * doing Tx timestamping, check if Rx timestamping is configured.
+	 */
+	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
+		return;
+
+	hw = &pf->hw;
+
+	spin_lock_bh(&pf->ptp_rx_lock);
+
+	/* Get current Rx events and update latch times */
+	prttsyn_stat = i40e_ptp_get_rx_events(pf);
+
+	/* TODO: Should we warn about missing Rx timestamp event? */
+	if (!(prttsyn_stat & BIT(index))) {
+		spin_unlock_bh(&pf->ptp_rx_lock);
+		return;
+	}
+
+	/* Clear the latched event since we're about to read its register */
+	pf->latch_event_flags &= ~BIT(index);
+
+	lo = rd32(hw, I40E_PRTTSYN_RXTIME_L(index));
+	hi = rd32(hw, I40E_PRTTSYN_RXTIME_H(index));
+
+	spin_unlock_bh(&pf->ptp_rx_lock);
+
+	ns = (((u64)hi) << 32) | lo;
+
+	i40e_ptp_convert_to_hwtstamp(skb_hwtstamps(skb), ns);
+}
+
+/**
+ * i40e_ptp_set_increment - Utility function to update clock increment rate
+ * @pf: Board private structure
+ *
+ * During a link change, the DMA frequency that drives the 1588 logic will
+ * change. In order to keep the PRTTSYN_TIME registers in units of nanoseconds,
+ * we must update the increment value per clock tick.
+ **/
+void i40e_ptp_set_increment(struct i40e_pf *pf)
+{
+	struct i40e_link_status *hw_link_info;
+	struct i40e_hw *hw = &pf->hw;
+	u64 incval;
+	u32 mult;
+
+	hw_link_info = &hw->phy.link_info;
+
+	i40e_aq_get_link_info(&pf->hw, true, NULL, NULL);
+
+	switch (hw_link_info->link_speed) {
+	case I40E_LINK_SPEED_10GB:
+		mult = I40E_PTP_10GB_INCVAL_MULT;
+		break;
+	case I40E_LINK_SPEED_5GB:
+		mult = I40E_PTP_5GB_INCVAL_MULT;
+		break;
+	case I40E_LINK_SPEED_1GB:
+		mult = I40E_PTP_1GB_INCVAL_MULT;
+		break;
+	case I40E_LINK_SPEED_100MB:
+	{
+		static int warn_once;
+
+		if (!warn_once) {
+			dev_warn(&pf->pdev->dev,
+				 "1588 functionality is not supported at 100 Mbps. Stopping the PHC.\n");
+			warn_once++;
+		}
+		mult = 0;
+		break;
+	}
+	case I40E_LINK_SPEED_40GB:
+	default:
+		mult = 1;
+		break;
+	}
+
+	/* The increment value is calculated by taking the base 40GbE incvalue
+	 * and multiplying it by a factor based on the link speed.
+	 */
+	incval = I40E_PTP_40GB_INCVAL * mult;
+
+	/* Write the new increment value into the increment register. The
+	 * hardware will not update the clock until both registers have been
+	 * written.
+	 */
+	wr32(hw, I40E_PRTTSYN_INC_L, incval & 0xFFFFFFFF);
+	wr32(hw, I40E_PRTTSYN_INC_H, incval >> 32);
+
+	/* Update the base adjustement value. */
+	WRITE_ONCE(pf->ptp_adj_mult, mult);
+	smp_mb(); /* Force the above update. */
+}
+
+/**
+ * i40e_ptp_get_ts_config - ioctl interface to read the HW timestamping
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Obtain the current hardware timestamping settigs as requested. To do this,
+ * keep a shadow copy of the timestamp settings rather than attempting to
+ * deconstruct it from the registers.
+ **/
+int i40e_ptp_get_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
+{
+	struct hwtstamp_config *config = &pf->tstamp_config;
+
+	if (!(pf->flags & I40E_FLAG_PTP))
+		return -EOPNOTSUPP;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * i40e_ptp_free_pins - free memory used by PTP pins
+ * @pf: Board private structure
+ *
+ * Release memory allocated for PTP pins.
+ **/
+static void i40e_ptp_free_pins(struct i40e_pf *pf)
+{
+	if (i40e_is_ptp_pin_dev(&pf->hw)) {
+		kfree(pf->ptp_pins);
+		kfree(pf->ptp_caps.pin_config);
+		pf->ptp_pins = NULL;
+	}
+}
+
+/**
+ * i40e_ptp_set_pin_hw - Set HW GPIO pin
+ * @hw: pointer to the hardware structure
+ * @pin: pin index
+ * @state: pin state
+ *
+ * Set status of GPIO pin for external clock handling.
+ **/
+static void i40e_ptp_set_pin_hw(struct i40e_hw *hw,
+				unsigned int pin,
+				enum i40e_ptp_gpio_pin_state state)
+{
+	switch (state) {
+	case off:
+		wr32(hw, I40E_GLGEN_GPIO_CTL(pin), 0);
+		break;
+	case in_A:
+		wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+		     I40E_GLGEN_GPIO_CTL_PORT_0_IN_TIMESYNC_0);
+		break;
+	case in_B:
+		wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+		     I40E_GLGEN_GPIO_CTL_PORT_1_IN_TIMESYNC_0);
+		break;
+	case out_A:
+		wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+		     I40E_GLGEN_GPIO_CTL_PORT_0_OUT_TIMESYNC_1);
+		break;
+	case out_B:
+		wr32(hw, I40E_GLGEN_GPIO_CTL(pin),
+		     I40E_GLGEN_GPIO_CTL_PORT_1_OUT_TIMESYNC_1);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40e_ptp_set_led_hw - Set HW GPIO led
+ * @hw: pointer to the hardware structure
+ * @led: led index
+ * @state: led state
+ *
+ * Set status of GPIO led for external clock handling.
+ **/
+static void i40e_ptp_set_led_hw(struct i40e_hw *hw,
+				unsigned int led,
+				enum i40e_ptp_led_pin_state state)
+{
+	switch (state) {
+	case low:
+		wr32(hw, I40E_GLGEN_GPIO_SET,
+		     I40E_GLGEN_GPIO_SET_DRV_SDP_DATA | led);
+		break;
+	case high:
+		wr32(hw, I40E_GLGEN_GPIO_SET,
+		     I40E_GLGEN_GPIO_SET_DRV_SDP_DATA |
+		     I40E_GLGEN_GPIO_SET_SDP_DATA_HI | led);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * i40e_ptp_init_leds_hw - init LEDs
+ * @hw: pointer to a hardware structure
+ *
+ * Set initial state of LEDs
+ **/
+static void i40e_ptp_init_leds_hw(struct i40e_hw *hw)
+{
+	wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_0),
+	     I40E_GLGEN_GPIO_CTL_LED_INIT);
+	wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED2_1),
+	     I40E_GLGEN_GPIO_CTL_LED_INIT);
+	wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_0),
+	     I40E_GLGEN_GPIO_CTL_LED_INIT);
+	wr32(hw, I40E_GLGEN_GPIO_CTL(I40E_LED3_1),
+	     I40E_GLGEN_GPIO_CTL_LED_INIT);
+}
+
+/**
+ * i40e_ptp_set_pins_hw - Set HW GPIO pins
+ * @pf: Board private structure
+ *
+ * This function sets GPIO pins for PTP
+ **/
+static void i40e_ptp_set_pins_hw(struct i40e_pf *pf)
+{
+	const struct i40e_ptp_pins_settings *pins = pf->ptp_pins;
+	struct i40e_hw *hw = &pf->hw;
+
+	/* pin must be disabled before it may be used */
+	i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off);
+	i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off);
+	i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off);
+
+	i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, pins->sdp3_2);
+	i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, pins->sdp3_3);
+	i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, pins->gpio_4);
+
+	i40e_ptp_set_led_hw(hw, I40E_LED2_0, pins->led2_0);
+	i40e_ptp_set_led_hw(hw, I40E_LED2_1, pins->led2_1);
+	i40e_ptp_set_led_hw(hw, I40E_LED3_0, pins->led3_0);
+	i40e_ptp_set_led_hw(hw, I40E_LED3_1, pins->led3_1);
+
+	dev_info(&pf->pdev->dev,
+		 "PTP configuration set to: SDP3_2: %s,  SDP3_3: %s,  GPIO_4: %s.\n",
+		 i40e_ptp_gpio_pin_state2str[pins->sdp3_2],
+		 i40e_ptp_gpio_pin_state2str[pins->sdp3_3],
+		 i40e_ptp_gpio_pin_state2str[pins->gpio_4]);
+}
+
+/**
+ * i40e_ptp_set_pins - set PTP pins in HW
+ * @pf: Board private structure
+ * @pins: PTP pins to be applied
+ *
+ * Validate and set PTP pins in HW for specific PF.
+ * Return 0 on success or negative value on error.
+ **/
+static int i40e_ptp_set_pins(struct i40e_pf *pf,
+			     struct i40e_ptp_pins_settings *pins)
+{
+	enum i40e_can_set_pins_t pin_caps = i40e_can_set_pins(pf);
+	int i = 0;
+
+	if (pin_caps == CANT_DO_PINS)
+		return -EOPNOTSUPP;
+	else if (pin_caps == CAN_DO_PINS)
+		return 0;
+
+	if (pins->sdp3_2 == invalid)
+		pins->sdp3_2 = pf->ptp_pins->sdp3_2;
+	if (pins->sdp3_3 == invalid)
+		pins->sdp3_3 = pf->ptp_pins->sdp3_3;
+	if (pins->gpio_4 == invalid)
+		pins->gpio_4 = pf->ptp_pins->gpio_4;
+	while (i40e_ptp_pin_led_allowed_states[i].sdp3_2 != end) {
+		if (pins->sdp3_2 == i40e_ptp_pin_led_allowed_states[i].sdp3_2 &&
+		    pins->sdp3_3 == i40e_ptp_pin_led_allowed_states[i].sdp3_3 &&
+		    pins->gpio_4 == i40e_ptp_pin_led_allowed_states[i].gpio_4) {
+			pins->led2_0 =
+				i40e_ptp_pin_led_allowed_states[i].led2_0;
+			pins->led2_1 =
+				i40e_ptp_pin_led_allowed_states[i].led2_1;
+			pins->led3_0 =
+				i40e_ptp_pin_led_allowed_states[i].led3_0;
+			pins->led3_1 =
+				i40e_ptp_pin_led_allowed_states[i].led3_1;
+			break;
+		}
+		i++;
+	}
+	if (i40e_ptp_pin_led_allowed_states[i].sdp3_2 == end) {
+		dev_warn(&pf->pdev->dev,
+			 "Unsupported PTP pin configuration: SDP3_2: %s,  SDP3_3: %s,  GPIO_4: %s.\n",
+			 i40e_ptp_gpio_pin_state2str[pins->sdp3_2],
+			 i40e_ptp_gpio_pin_state2str[pins->sdp3_3],
+			 i40e_ptp_gpio_pin_state2str[pins->gpio_4]);
+
+		return -EPERM;
+	}
+	memcpy(pf->ptp_pins, pins, sizeof(*pins));
+	i40e_ptp_set_pins_hw(pf);
+	i40_ptp_reset_timing_events(pf);
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_alloc_pins - allocate PTP pins structure
+ * @pf: Board private structure
+ *
+ * allocate PTP pins structure
+ **/
+int i40e_ptp_alloc_pins(struct i40e_pf *pf)
+{
+	if (!i40e_is_ptp_pin_dev(&pf->hw))
+		return 0;
+
+	pf->ptp_pins =
+		kzalloc(sizeof(struct i40e_ptp_pins_settings), GFP_KERNEL);
+
+	if (!pf->ptp_pins) {
+		dev_warn(&pf->pdev->dev, "Cannot allocate memory for PTP pins structure.\n");
+		return -ENOMEM;
+	}
+
+	pf->ptp_pins->sdp3_2 = off;
+	pf->ptp_pins->sdp3_3 = off;
+	pf->ptp_pins->gpio_4 = off;
+	pf->ptp_pins->led2_0 = high;
+	pf->ptp_pins->led2_1 = high;
+	pf->ptp_pins->led3_0 = high;
+	pf->ptp_pins->led3_1 = high;
+
+	/* Use PF0 to set pins in HW. Return success for user space tools */
+	if (pf->hw.pf_id)
+		return 0;
+
+	i40e_ptp_init_leds_hw(&pf->hw);
+	i40e_ptp_set_pins_hw(pf);
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_set_timestamp_mode - setup hardware for requested timestamp mode
+ * @pf: Board private structure
+ * @config: hwtstamp settings requested or saved
+ *
+ * Control hardware registers to enter the specific mode requested by the
+ * user. Also used during reset path to ensure that timestamp settings are
+ * maintained.
+ *
+ * Note: modifies config in place, and may update the requested mode to be
+ * more broad if the specific filter is not directly supported.
+ **/
+static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
+				       struct hwtstamp_config *config)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 tsyntype, regval;
+
+	/* Selects external trigger to cause event */
+	regval = rd32(hw, I40E_PRTTSYN_AUX_0(0));
+	/* Bit 17:16 is EVNTLVL, 01B rising edge */
+	regval &= 0;
+	regval |= (1 << I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT);
+	/* regval: 0001 0000 0000 0000 0000 */
+	wr32(hw, I40E_PRTTSYN_AUX_0(0), regval);
+
+	/* Enabel interrupts */
+	regval = rd32(hw, I40E_PRTTSYN_CTL0);
+	regval |= 1 << I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT;
+	wr32(hw, I40E_PRTTSYN_CTL0, regval);
+
+	INIT_WORK(&pf->ptp_extts0_work, i40e_ptp_extts0_work);
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		pf->ptp_tx = false;
+		break;
+	case HWTSTAMP_TX_ON:
+		pf->ptp_tx = true;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		pf->ptp_rx = false;
+		/* We set the type to V1, but do not enable UDP packet
+		 * recognition. In this way, we should be as close to
+		 * disabling PTP Rx timestamps as possible since V1 packets
+		 * are always UDP, since L2 packets are a V2 feature.
+		 */
+		tsyntype = I40E_PRTTSYN_CTL1_TSYNTYPE_V1;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+		if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
+			return -ERANGE;
+		pf->ptp_rx = true;
+		tsyntype = I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK |
+			   I40E_PRTTSYN_CTL1_TSYNTYPE_V1 |
+			   I40E_PRTTSYN_CTL1_UDP_ENA_MASK;
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		if (!(pf->hw_features & I40E_HW_PTP_L4_CAPABLE))
+			return -ERANGE;
+		fallthrough;
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+		pf->ptp_rx = true;
+		tsyntype = I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK |
+			   I40E_PRTTSYN_CTL1_TSYNTYPE_V2;
+		if (pf->hw_features & I40E_HW_PTP_L4_CAPABLE) {
+			tsyntype |= I40E_PRTTSYN_CTL1_UDP_ENA_MASK;
+			config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		} else {
+			config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+		}
+		break;
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+	default:
+		return -ERANGE;
+	}
+
+	/* Clear out all 1588-related registers to clear and unlatch them. */
+	spin_lock_bh(&pf->ptp_rx_lock);
+	rd32(hw, I40E_PRTTSYN_STAT_0);
+	rd32(hw, I40E_PRTTSYN_TXTIME_H);
+	rd32(hw, I40E_PRTTSYN_RXTIME_H(0));
+	rd32(hw, I40E_PRTTSYN_RXTIME_H(1));
+	rd32(hw, I40E_PRTTSYN_RXTIME_H(2));
+	rd32(hw, I40E_PRTTSYN_RXTIME_H(3));
+	pf->latch_event_flags = 0;
+	spin_unlock_bh(&pf->ptp_rx_lock);
+
+	/* Enable/disable the Tx timestamp interrupt based on user input. */
+	regval = rd32(hw, I40E_PRTTSYN_CTL0);
+	if (pf->ptp_tx)
+		regval |= I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK;
+	else
+		regval &= ~I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK;
+	wr32(hw, I40E_PRTTSYN_CTL0, regval);
+
+	regval = rd32(hw, I40E_PFINT_ICR0_ENA);
+	if (pf->ptp_tx)
+		regval |= I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
+	else
+		regval &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
+	wr32(hw, I40E_PFINT_ICR0_ENA, regval);
+
+	/* Although there is no simple on/off switch for Rx, we "disable" Rx
+	 * timestamps by setting to V1 only mode and clear the UDP
+	 * recognition. This ought to disable all PTP Rx timestamps as V1
+	 * packets are always over UDP. Note that software is configured to
+	 * ignore Rx timestamps via the pf->ptp_rx flag.
+	 */
+	regval = rd32(hw, I40E_PRTTSYN_CTL1);
+	/* clear everything but the enable bit */
+	regval &= I40E_PRTTSYN_CTL1_TSYNENA_MASK;
+	/* now enable bits for desired Rx timestamps */
+	regval |= tsyntype;
+	wr32(hw, I40E_PRTTSYN_CTL1, regval);
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_set_ts_config - ioctl interface to control the HW timestamping
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Respond to the user filter requests and make the appropriate hardware
+ * changes here. The XL710 cannot support splitting of the Tx/Rx timestamping
+ * logic, so keep track in software of whether to indicate these timestamps
+ * or not.
+ *
+ * It is permissible to "upgrade" the user request to a broader filter, as long
+ * as the user receives the timestamps they care about and the user is notified
+ * the filter has been broadened.
+ **/
+int i40e_ptp_set_ts_config(struct i40e_pf *pf, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (!(pf->flags & I40E_FLAG_PTP))
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = i40e_ptp_set_timestamp_mode(pf, &config);
+	if (err)
+		return err;
+
+	/* save these settings for future reference */
+	pf->tstamp_config = config;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * i40e_init_pin_config - initialize pins.
+ * @pf: private board structure
+ *
+ * Initialize pins for external clock source.
+ * Return 0 on success or error code on failure.
+ **/
+static int i40e_init_pin_config(struct i40e_pf *pf)
+{
+	int i;
+
+	pf->ptp_caps.n_pins = 3;
+	pf->ptp_caps.n_ext_ts = 2;
+	pf->ptp_caps.pps = 1;
+	pf->ptp_caps.n_per_out = 2;
+
+	pf->ptp_caps.pin_config = kcalloc(pf->ptp_caps.n_pins,
+					  sizeof(*pf->ptp_caps.pin_config),
+					  GFP_KERNEL);
+	if (!pf->ptp_caps.pin_config)
+		return -ENOMEM;
+
+	for (i = 0; i < pf->ptp_caps.n_pins; i++) {
+		snprintf(pf->ptp_caps.pin_config[i].name,
+			 sizeof(pf->ptp_caps.pin_config[i].name),
+			 "%s", sdp_desc[i].name);
+		pf->ptp_caps.pin_config[i].index = sdp_desc[i].index;
+		pf->ptp_caps.pin_config[i].func = PTP_PF_NONE;
+		pf->ptp_caps.pin_config[i].chan = sdp_desc[i].chan;
+	}
+
+	pf->ptp_caps.verify = i40e_ptp_verify;
+	pf->ptp_caps.enable = i40e_ptp_feature_enable;
+
+	pf->ptp_caps.pps = 1;
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_create_clock - Create PTP clock device for userspace
+ * @pf: Board private structure
+ *
+ * This function creates a new PTP clock device. It only creates one if we
+ * don't already have one, so it is safe to call. Will return error if it
+ * can't create one, but success if we already have a device. Should be used
+ * by i40e_ptp_init to create clock initially, and prevent global resets from
+ * creating new clock devices.
+ **/
+static long i40e_ptp_create_clock(struct i40e_pf *pf)
+{
+	/* no need to create a clock device if we already have one */
+	if (!IS_ERR_OR_NULL(pf->ptp_clock))
+		return 0;
+
+	strscpy(pf->ptp_caps.name, i40e_driver_name,
+		sizeof(pf->ptp_caps.name) - 1);
+	pf->ptp_caps.owner = THIS_MODULE;
+	pf->ptp_caps.max_adj = 999999999;
+	pf->ptp_caps.adjfine = i40e_ptp_adjfine;
+	pf->ptp_caps.adjtime = i40e_ptp_adjtime;
+	pf->ptp_caps.gettimex64 = i40e_ptp_gettimex;
+	pf->ptp_caps.settime64 = i40e_ptp_settime;
+	if (i40e_is_ptp_pin_dev(&pf->hw)) {
+		int err = i40e_init_pin_config(pf);
+
+		if (err)
+			return err;
+	}
+
+	/* Attempt to register the clock before enabling the hardware. */
+	pf->ptp_clock = ptp_clock_register(&pf->ptp_caps, &pf->pdev->dev);
+	if (IS_ERR(pf->ptp_clock))
+		return PTR_ERR(pf->ptp_clock);
+
+	/* clear the hwtstamp settings here during clock create, instead of
+	 * during regular init, so that we can maintain settings across a
+	 * reset or suspend.
+	 */
+	pf->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+	pf->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+	/* Set the previous "reset" time to the current Kernel clock time */
+	ktime_get_real_ts64(&pf->ptp_prev_hw_time);
+	pf->ptp_reset_start = ktime_get();
+
+	return 0;
+}
+
+/**
+ * i40e_ptp_save_hw_time - Save the current PTP time as ptp_prev_hw_time
+ * @pf: Board private structure
+ *
+ * Read the current PTP time and save it into pf->ptp_prev_hw_time. This should
+ * be called at the end of preparing to reset, just before hardware reset
+ * occurs, in order to preserve the PTP time as close as possible across
+ * resets.
+ */
+void i40e_ptp_save_hw_time(struct i40e_pf *pf)
+{
+	/* don't try to access the PTP clock if it's not enabled */
+	if (!(pf->flags & I40E_FLAG_PTP))
+		return;
+
+	i40e_ptp_gettimex(&pf->ptp_caps, &pf->ptp_prev_hw_time, NULL);
+	/* Get a monotonic starting time for this reset */
+	pf->ptp_reset_start = ktime_get();
+}
+
+/**
+ * i40e_ptp_restore_hw_time - Restore the ptp_prev_hw_time + delta to PTP regs
+ * @pf: Board private structure
+ *
+ * Restore the PTP hardware clock registers. We previously cached the PTP
+ * hardware time as pf->ptp_prev_hw_time. To be as accurate as possible,
+ * update this value based on the time delta since the time was saved, using
+ * CLOCK_MONOTONIC (via ktime_get()) to calculate the time difference.
+ *
+ * This ensures that the hardware clock is restored to nearly what it should
+ * have been if a reset had not occurred.
+ */
+void i40e_ptp_restore_hw_time(struct i40e_pf *pf)
+{
+	ktime_t delta = ktime_sub(ktime_get(), pf->ptp_reset_start);
+
+	/* Update the previous HW time with the ktime delta */
+	timespec64_add_ns(&pf->ptp_prev_hw_time, ktime_to_ns(delta));
+
+	/* Restore the hardware clock registers */
+	i40e_ptp_settime(&pf->ptp_caps, &pf->ptp_prev_hw_time);
+}
+
+/**
+ * i40e_ptp_init - Initialize the 1588 support after device probe or reset
+ * @pf: Board private structure
+ *
+ * This function sets device up for 1588 support. The first time it is run, it
+ * will create a PHC clock device. It does not create a clock device if one
+ * already exists. It also reconfigures the device after a reset.
+ *
+ * The first time a clock is created, i40e_ptp_create_clock will set
+ * pf->ptp_prev_hw_time to the current system time. During resets, it is
+ * expected that this timespec will be set to the last known PTP clock time,
+ * in order to preserve the clock time as close as possible across a reset.
+ **/
+void i40e_ptp_init(struct i40e_pf *pf)
+{
+	struct net_device *netdev = pf->vsi[pf->lan_vsi]->netdev;
+	struct i40e_hw *hw = &pf->hw;
+	u32 pf_id;
+	long err;
+
+	/* Only one PF is assigned to control 1588 logic per port. Do not
+	 * enable any support for PFs not assigned via PRTTSYN_CTL0.PF_ID
+	 */
+	pf_id = (rd32(hw, I40E_PRTTSYN_CTL0) & I40E_PRTTSYN_CTL0_PF_ID_MASK) >>
+		I40E_PRTTSYN_CTL0_PF_ID_SHIFT;
+	if (hw->pf_id != pf_id) {
+		pf->flags &= ~I40E_FLAG_PTP;
+		dev_info(&pf->pdev->dev, "%s: PTP not supported on %s\n",
+			 __func__,
+			 netdev->name);
+		return;
+	}
+
+	mutex_init(&pf->tmreg_lock);
+	spin_lock_init(&pf->ptp_rx_lock);
+
+	/* ensure we have a clock device */
+	err = i40e_ptp_create_clock(pf);
+	if (err) {
+		pf->ptp_clock = NULL;
+		dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n",
+			__func__);
+	} else if (pf->ptp_clock) {
+		u32 regval;
+
+		if (pf->hw.debug_mask & I40E_DEBUG_LAN)
+			dev_info(&pf->pdev->dev, "PHC enabled\n");
+		pf->flags |= I40E_FLAG_PTP;
+
+		/* Ensure the clocks are running. */
+		regval = rd32(hw, I40E_PRTTSYN_CTL0);
+		regval |= I40E_PRTTSYN_CTL0_TSYNENA_MASK;
+		wr32(hw, I40E_PRTTSYN_CTL0, regval);
+		regval = rd32(hw, I40E_PRTTSYN_CTL1);
+		regval |= I40E_PRTTSYN_CTL1_TSYNENA_MASK;
+		wr32(hw, I40E_PRTTSYN_CTL1, regval);
+
+		/* Set the increment value per clock tick. */
+		i40e_ptp_set_increment(pf);
+
+		/* reset timestamping mode */
+		i40e_ptp_set_timestamp_mode(pf, &pf->tstamp_config);
+
+		/* Restore the clock time based on last known value */
+		i40e_ptp_restore_hw_time(pf);
+	}
+
+	i40e_ptp_set_1pps_signal_hw(pf);
+}
+
+/**
+ * i40e_ptp_stop - Disable the driver/hardware support and unregister the PHC
+ * @pf: Board private structure
+ *
+ * This function handles the cleanup work required from the initialization by
+ * clearing out the important information and unregistering the PHC.
+ **/
+void i40e_ptp_stop(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 regval;
+
+	pf->flags &= ~I40E_FLAG_PTP;
+	pf->ptp_tx = false;
+	pf->ptp_rx = false;
+
+	if (pf->ptp_tx_skb) {
+		struct sk_buff *skb = pf->ptp_tx_skb;
+
+		pf->ptp_tx_skb = NULL;
+		clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state);
+		dev_kfree_skb_any(skb);
+	}
+
+	if (pf->ptp_clock) {
+		ptp_clock_unregister(pf->ptp_clock);
+		pf->ptp_clock = NULL;
+		dev_info(&pf->pdev->dev, "%s: removed PHC on %s\n", __func__,
+			 pf->vsi[pf->lan_vsi]->netdev->name);
+	}
+
+	if (i40e_is_ptp_pin_dev(&pf->hw)) {
+		i40e_ptp_set_pin_hw(hw, I40E_SDP3_2, off);
+		i40e_ptp_set_pin_hw(hw, I40E_SDP3_3, off);
+		i40e_ptp_set_pin_hw(hw, I40E_GPIO_4, off);
+	}
+
+	regval = rd32(hw, I40E_PRTTSYN_AUX_0(0));
+	regval &= ~I40E_PRTTSYN_AUX_0_PTPFLAG_MASK;
+	wr32(hw, I40E_PRTTSYN_AUX_0(0), regval);
+
+	/* Disable interrupts */
+	regval = rd32(hw, I40E_PRTTSYN_CTL0);
+	regval &= ~I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK;
+	wr32(hw, I40E_PRTTSYN_CTL0, regval);
+
+	i40e_ptp_free_pins(pf);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
new file mode 100644
index 0000000000..7339003aa1
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -0,0 +1,899 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#ifndef _I40E_REGISTER_H_
+#define _I40E_REGISTER_H_
+
+#define I40E_GL_ATQLEN_ATQCRIT_SHIFT 30
+#define I40E_GL_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_GL_ATQLEN_ATQCRIT_SHIFT)
+#define I40E_PF_ARQBAH 0x00080180 /* Reset: EMPR */
+#define I40E_PF_ARQBAL 0x00080080 /* Reset: EMPR */
+#define I40E_PF_ARQH 0x00080380 /* Reset: EMPR */
+#define I40E_PF_ARQH_ARQH_SHIFT 0
+#define I40E_PF_ARQH_ARQH_MASK I40E_MASK(0x3FF, I40E_PF_ARQH_ARQH_SHIFT)
+#define I40E_PF_ARQLEN 0x00080280 /* Reset: EMPR */
+#define I40E_PF_ARQLEN_ARQVFE_SHIFT 28
+#define I40E_PF_ARQLEN_ARQVFE_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQVFE_SHIFT)
+#define I40E_PF_ARQLEN_ARQOVFL_SHIFT 29
+#define I40E_PF_ARQLEN_ARQOVFL_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQOVFL_SHIFT)
+#define I40E_PF_ARQLEN_ARQCRIT_SHIFT 30
+#define I40E_PF_ARQLEN_ARQCRIT_MASK I40E_MASK(0x1, I40E_PF_ARQLEN_ARQCRIT_SHIFT)
+#define I40E_PF_ARQLEN_ARQENABLE_SHIFT 31
+#define I40E_PF_ARQLEN_ARQENABLE_MASK I40E_MASK(0x1u, I40E_PF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_PF_ARQT 0x00080480 /* Reset: EMPR */
+#define I40E_PF_ATQBAH 0x00080100 /* Reset: EMPR */
+#define I40E_PF_ATQBAL 0x00080000 /* Reset: EMPR */
+#define I40E_PF_ATQH 0x00080300 /* Reset: EMPR */
+#define I40E_PF_ATQLEN 0x00080200 /* Reset: EMPR */
+#define I40E_PF_ATQLEN_ATQVFE_SHIFT 28
+#define I40E_PF_ATQLEN_ATQVFE_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQVFE_SHIFT)
+#define I40E_PF_ATQLEN_ATQOVFL_SHIFT 29
+#define I40E_PF_ATQLEN_ATQOVFL_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQOVFL_SHIFT)
+#define I40E_PF_ATQLEN_ATQCRIT_SHIFT 30
+#define I40E_PF_ATQLEN_ATQCRIT_MASK I40E_MASK(0x1, I40E_PF_ATQLEN_ATQCRIT_SHIFT)
+#define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31
+#define I40E_PF_ATQLEN_ATQENABLE_MASK I40E_MASK(0x1u, I40E_PF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_PF_ATQT 0x00080400 /* Reset: EMPR */
+#define I40E_PRT_SWR_PM_THR 0x0026CD00 /* Reset: CORER */
+#define I40E_PRT_SWR_PM_THR_THRESHOLD_SHIFT 0
+#define I40E_PRT_SWR_PM_THR_THRESHOLD_MASK I40E_MASK(0xFF, I40E_PRT_SWR_PM_THR_THRESHOLD_SHIFT)
+#define I40E_PRTDCB_FCCFG 0x001E4640 /* Reset: GLOBR */
+#define I40E_PRTDCB_FCCFG_TFCE_SHIFT 3
+#define I40E_PRTDCB_FCCFG_TFCE_MASK I40E_MASK(0x3, I40E_PRTDCB_FCCFG_TFCE_SHIFT)
+#define I40E_PRTDCB_GENC 0x00083000 /* Reset: CORER */
+#define I40E_PRTDCB_GENC_NUMTC_SHIFT 2
+#define I40E_PRTDCB_GENC_NUMTC_MASK I40E_MASK(0xF, I40E_PRTDCB_GENC_NUMTC_SHIFT)
+#define I40E_PRTDCB_GENC_PFCLDA_SHIFT 16
+#define I40E_PRTDCB_GENC_PFCLDA_MASK I40E_MASK(0xFFFF, I40E_PRTDCB_GENC_PFCLDA_SHIFT)
+#define I40E_PRTDCB_GENS 0x00083020 /* Reset: CORER */
+#define I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT 0
+#define I40E_PRTDCB_GENS_DCBX_STATUS_MASK I40E_MASK(0x7, I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT)
+#define I40E_PRTDCB_MFLCN 0x001E2400 /* Reset: GLOBR */
+#define I40E_PRTDCB_MFLCN_PMCF_SHIFT 0
+#define I40E_PRTDCB_MFLCN_PMCF_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_PMCF_SHIFT)
+#define I40E_PRTDCB_MFLCN_DPF_SHIFT 1
+#define I40E_PRTDCB_MFLCN_DPF_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_DPF_SHIFT)
+#define I40E_PRTDCB_MFLCN_RPFCM_SHIFT 2
+#define I40E_PRTDCB_MFLCN_RPFCM_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_RPFCM_SHIFT)
+#define I40E_PRTDCB_MFLCN_RFCE_SHIFT 3
+#define I40E_PRTDCB_MFLCN_RFCE_MASK I40E_MASK(0x1, I40E_PRTDCB_MFLCN_RFCE_SHIFT)
+#define I40E_PRTDCB_MFLCN_RPFCE_SHIFT 4
+#define I40E_PRTDCB_MFLCN_RPFCE_MASK I40E_MASK(0xFF, I40E_PRTDCB_MFLCN_RPFCE_SHIFT)
+#define I40E_PRTDCB_RETSC 0x001223E0 /* Reset: CORER */
+#define I40E_PRTDCB_RETSC_ETS_MODE_SHIFT 0
+#define I40E_PRTDCB_RETSC_ETS_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSC_ETS_MODE_SHIFT)
+#define I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT 1
+#define I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT)
+#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT 2
+#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK I40E_MASK(0xF, I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT)
+#define I40E_PRTDCB_RETSC_LLTC_SHIFT 8
+#define I40E_PRTDCB_RETSC_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_RETSC_LLTC_SHIFT)
+#define I40E_PRTDCB_RETSTCC(_i) (0x00122180 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_RETSTCC_MAX_INDEX 7
+#define I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT 0
+#define I40E_PRTDCB_RETSTCC_BWSHARE_MASK I40E_MASK(0x7F, I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT)
+#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT 30
+#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT)
+#define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT 31
+#define I40E_PRTDCB_RETSTCC_ETSTC_MASK I40E_MASK(0x1u, I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
+#define I40E_PRTDCB_RPPMC 0x001223A0 /* Reset: CORER */
+#define I40E_PRTDCB_RPPMC_LANRPPM_SHIFT 0
+#define I40E_PRTDCB_RPPMC_LANRPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_LANRPPM_SHIFT)
+#define I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT 8
+#define I40E_PRTDCB_RPPMC_RDMARPPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT)
+#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT 16
+#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK I40E_MASK(0xFF, I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT)
+#define I40E_PRTDCB_RUP 0x001C0B00 /* Reset: CORER */
+#define I40E_PRTDCB_RUP_NOVLANUP_SHIFT 0
+#define I40E_PRTDCB_RUP_NOVLANUP_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP_NOVLANUP_SHIFT)
+#define I40E_PRTDCB_RUP2TC 0x001C09A0 /* Reset: CORER */
+#define I40E_PRTDCB_RUP2TC_UP0TC_SHIFT 0
+#define I40E_PRTDCB_RUP2TC_UP0TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP0TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP1TC_SHIFT 3
+#define I40E_PRTDCB_RUP2TC_UP1TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP1TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP2TC_SHIFT 6
+#define I40E_PRTDCB_RUP2TC_UP2TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP2TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP3TC_SHIFT 9
+#define I40E_PRTDCB_RUP2TC_UP3TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP3TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP4TC_SHIFT 12
+#define I40E_PRTDCB_RUP2TC_UP4TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP4TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP5TC_SHIFT 15
+#define I40E_PRTDCB_RUP2TC_UP5TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP5TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP6TC_SHIFT 18
+#define I40E_PRTDCB_RUP2TC_UP6TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP6TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP7TC_SHIFT 21
+#define I40E_PRTDCB_RUP2TC_UP7TC_MASK I40E_MASK(0x7, I40E_PRTDCB_RUP2TC_UP7TC_SHIFT)
+#define I40E_PRTDCB_RUPTQ(_i) (0x00122400 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_RUPTQ_MAX_INDEX 7
+#define I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT 0
+#define I40E_PRTDCB_RUPTQ_RXQNUM_MASK I40E_MASK(0x3FFF, I40E_PRTDCB_RUPTQ_RXQNUM_SHIFT)
+#define I40E_PRTDCB_TC2PFC 0x001C0980 /* Reset: CORER */
+#define I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT 0
+#define I40E_PRTDCB_TC2PFC_TC2PFC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT)
+#define I40E_PRTDCB_TCMSTC(_i) (0x000A0040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_TCMSTC_MAX_INDEX 7
+#define I40E_PRTDCB_TCMSTC_MSTC_SHIFT 0
+#define I40E_PRTDCB_TCMSTC_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TCMSTC_MSTC_SHIFT)
+#define I40E_PRTDCB_TCPMC 0x000A21A0 /* Reset: CORER */
+#define I40E_PRTDCB_TCPMC_CPM_SHIFT 0
+#define I40E_PRTDCB_TCPMC_CPM_MASK I40E_MASK(0x1FFF, I40E_PRTDCB_TCPMC_CPM_SHIFT)
+#define I40E_PRTDCB_TCPMC_LLTC_SHIFT 13
+#define I40E_PRTDCB_TCPMC_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TCPMC_LLTC_SHIFT)
+#define I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT 30
+#define I40E_PRTDCB_TCPMC_TCPM_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT)
+#define I40E_PRTDCB_TCWSTC(_i) (0x000A2040 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTDCB_TCWSTC_MAX_INDEX 7
+#define I40E_PRTDCB_TCWSTC_MSTC_SHIFT 0
+#define I40E_PRTDCB_TCWSTC_MSTC_MASK I40E_MASK(0xFFFFF, I40E_PRTDCB_TCWSTC_MSTC_SHIFT)
+#define I40E_PRTDCB_TDPMC 0x000A0180 /* Reset: CORER */
+#define I40E_PRTDCB_TDPMC_DPM_SHIFT 0
+#define I40E_PRTDCB_TDPMC_DPM_MASK I40E_MASK(0xFF, I40E_PRTDCB_TDPMC_DPM_SHIFT)
+#define I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT 30
+#define I40E_PRTDCB_TDPMC_TCPM_MODE_MASK I40E_MASK(0x1, I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT)
+#define I40E_PRTDCB_TETSC_TCB 0x000AE060 /* Reset: CORER */
+#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT 0
+#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_MASK I40E_MASK(0x1, \
+	I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT)
+#define I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT 8
+#define I40E_PRTDCB_TETSC_TCB_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT)
+#define I40E_PRTDCB_TETSC_TPB 0x00098060 /* Reset: CORER */
+#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT 0
+#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_MASK I40E_MASK(0x1, \
+	I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT)
+#define I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT 8
+#define I40E_PRTDCB_TETSC_TPB_LLTC_MASK I40E_MASK(0xFF, I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT)
+#define I40E_PRTDCB_TFCS 0x001E4560 /* Reset: GLOBR */
+#define I40E_PRTDCB_TFCS_TXOFF_SHIFT 0
+#define I40E_PRTDCB_TFCS_TXOFF_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF0_SHIFT 8
+#define I40E_PRTDCB_TFCS_TXOFF0_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF0_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF1_SHIFT 9
+#define I40E_PRTDCB_TFCS_TXOFF1_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF1_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF2_SHIFT 10
+#define I40E_PRTDCB_TFCS_TXOFF2_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF2_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF3_SHIFT 11
+#define I40E_PRTDCB_TFCS_TXOFF3_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF3_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF4_SHIFT 12
+#define I40E_PRTDCB_TFCS_TXOFF4_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF4_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF5_SHIFT 13
+#define I40E_PRTDCB_TFCS_TXOFF5_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF5_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF6_SHIFT 14
+#define I40E_PRTDCB_TFCS_TXOFF6_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF6_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF7_SHIFT 15
+#define I40E_PRTDCB_TFCS_TXOFF7_MASK I40E_MASK(0x1, I40E_PRTDCB_TFCS_TXOFF7_SHIFT)
+#define I40E_PRTDCB_TPFCTS(_i) (0x001E4660 + ((_i) * 32)) /* _i=0...7 */ /* Reset: GLOBR */
+#define I40E_PRTDCB_TPFCTS_MAX_INDEX 7
+#define I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT 0
+#define I40E_PRTDCB_TPFCTS_PFCTIMER_MASK I40E_MASK(0x3FFF, I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT)
+#define I40E_GL_FWSTS 0x00083048 /* Reset: POR */
+#define I40E_GL_FWSTS_FWS1B_SHIFT 16
+#define I40E_GL_FWSTS_FWS1B_MASK I40E_MASK(0xFF, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_GL_FWSTS_FWS1B_EMPR_0 I40E_MASK(0x20, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_GL_FWSTS_FWS1B_EMPR_10 I40E_MASK(0x2A, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK I40E_MASK(0x30, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_GLOBR_MASK I40E_MASK(0x31, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_TRANSITION_MASK I40E_MASK(0x32, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_XL710_GL_FWSTS_FWS1B_REC_MOD_NVM_MASK I40E_MASK(0x33, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_X722_GL_FWSTS_FWS1B_REC_MOD_CORER_MASK I40E_MASK(0xB, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_X722_GL_FWSTS_FWS1B_REC_MOD_GLOBR_MASK I40E_MASK(0xC, I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_GLGEN_GPIO_CTL(_i) (0x00088100 + ((_i) * 4)) /* _i=0...29 */ /* Reset: POR */
+#define I40E_GLGEN_GPIO_CTL_MAX_INDEX 29
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT 0
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK I40E_MASK(0x3, I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT 3
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT 4
+#define I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT 5
+#define I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT 6
+#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT 7
+#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK I40E_MASK(0x7, I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT 11
+#define I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT 12
+#define I40E_GLGEN_GPIO_CTL_LED_MODE_MASK I40E_MASK(0x1F, I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT  19
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK   I40E_MASK(0x1, I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20
+#define I40E_GLGEN_GPIO_SET 0x00088184 /* Reset: POR */
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT 5
+#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT 6
+#define I40E_GLGEN_MDIO_I2C_SEL(_i) (0x000881C0 + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_MSCA(_i) (0x0008818C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_MSCA_MDIADD_SHIFT 0
+#define I40E_GLGEN_MSCA_DEVADD_SHIFT 16
+#define I40E_GLGEN_MSCA_PHYADD_SHIFT 21
+#define I40E_GLGEN_MSCA_OPCODE_SHIFT 26
+#define I40E_GLGEN_MSCA_STCODE_SHIFT 28
+#define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
+#define I40E_GLGEN_MSCA_MDICMD_MASK I40E_MASK(0x1, I40E_GLGEN_MSCA_MDICMD_SHIFT)
+#define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
+#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK I40E_MASK(0x1u, I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
+#define I40E_GLGEN_MSRWD(_i) (0x0008819C + ((_i) * 4)) /* _i=0...3 */ /* Reset: POR */
+#define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
+#define I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT 16
+#define I40E_GLGEN_MSRWD_MDIRDDATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT                0x001C0AB4 /* Reset: PCIR */
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT 0
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK  I40E_MASK(0x1F, I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT 16
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_MASK  I40E_MASK(0xFF, I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT)
+#define I40E_GLGEN_RSTAT 0x000B8188 /* Reset: POR */
+#define I40E_GLGEN_RSTAT_DEVSTATE_SHIFT 0
+#define I40E_GLGEN_RSTAT_DEVSTATE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_DEVSTATE_SHIFT)
+#define I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT 2
+#define I40E_GLGEN_RSTAT_RESET_TYPE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT)
+#define I40E_GLGEN_RSTCTL 0x000B8180 /* Reset: POR */
+#define I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT 0
+#define I40E_GLGEN_RSTCTL_GRSTDEL_MASK I40E_MASK(0x3F, I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT)
+#define I40E_GLGEN_RTRIG 0x000B8190 /* Reset: CORER */
+#define I40E_GLGEN_RTRIG_CORER_SHIFT 0
+#define I40E_GLGEN_RTRIG_CORER_MASK I40E_MASK(0x1, I40E_GLGEN_RTRIG_CORER_SHIFT)
+#define I40E_GLGEN_RTRIG_GLOBR_SHIFT 1
+#define I40E_GLGEN_RTRIG_GLOBR_MASK I40E_MASK(0x1, I40E_GLGEN_RTRIG_GLOBR_SHIFT)
+#define I40E_GLGEN_STAT 0x000B612C /* Reset: POR */
+#define I40E_GLGEN_VFLRSTAT(_i) (0x00092600 + ((_i) * 4)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLVFGEN_TIMER 0x000881BC /* Reset: CORER */
+#define I40E_PFGEN_CTRL 0x00092400 /* Reset: PFR */
+#define I40E_PFGEN_CTRL_PFSWR_SHIFT 0
+#define I40E_PFGEN_CTRL_PFSWR_MASK I40E_MASK(0x1, I40E_PFGEN_CTRL_PFSWR_SHIFT)
+#define I40E_PFGEN_PORTNUM 0x001C0480 /* Reset: CORER */
+#define I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT 0
+#define I40E_PFGEN_PORTNUM_PORT_NUM_MASK I40E_MASK(0x3, I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT)
+#define I40E_PRTGEN_CNF 0x000B8120 /* Reset: POR */
+#define I40E_PRTGEN_CNF_PORT_DIS_SHIFT 0
+#define I40E_PRTGEN_CNF_PORT_DIS_MASK I40E_MASK(0x1, I40E_PRTGEN_CNF_PORT_DIS_SHIFT)
+#define I40E_PRTGEN_STATUS 0x000B8100 /* Reset: POR */
+#define I40E_VFGEN_RSTAT1(_VF) (0x00074400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPGEN_VFRSTAT(_VF) (0x00091C00 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VPGEN_VFRSTAT_VFRD_SHIFT 0
+#define I40E_VPGEN_VFRSTAT_VFRD_MASK I40E_MASK(0x1, I40E_VPGEN_VFRSTAT_VFRD_SHIFT)
+#define I40E_VPGEN_VFRTRIG(_VF) (0x00091800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VPGEN_VFRTRIG_VFSWR_SHIFT 0
+#define I40E_VPGEN_VFRTRIG_VFSWR_MASK I40E_MASK(0x1, I40E_VPGEN_VFRTRIG_VFSWR_SHIFT)
+#define I40E_GLHMC_FCOEDDPBASE(_i) (0x000C6600 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_SHIFT 0
+#define I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_SHIFT)
+#define I40E_GLHMC_FCOEDDPCNT(_i) (0x000C6700 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEDDPOBJSZ 0x000C2010 /* Reset: CORER */
+#define I40E_GLHMC_FCOEFBASE(_i) (0x000C6800 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_SHIFT 0
+#define I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_SHIFT)
+#define I40E_GLHMC_FCOEFCNT(_i) (0x000C6900 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_FCOEFMAX 0x000C20D0 /* Reset: CORER */
+#define I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT 0
+#define I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK I40E_MASK(0xFFFF, I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT)
+#define I40E_GLHMC_FCOEFOBJSZ 0x000C2018 /* Reset: CORER */
+#define I40E_GLHMC_FCOEMAX 0x000C2014 /* Reset: CORER */
+#define I40E_GLHMC_LANQMAX 0x000C2008 /* Reset: CORER */
+#define I40E_GLHMC_LANRXBASE(_i) (0x000C6400 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANRXBASE_FPMLANRXBASE_SHIFT 0
+#define I40E_GLHMC_LANRXBASE_FPMLANRXBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_LANRXBASE_FPMLANRXBASE_SHIFT)
+#define I40E_GLHMC_LANRXCNT(_i) (0x000C6500 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANRXOBJSZ 0x000C200c /* Reset: CORER */
+#define I40E_GLHMC_LANTXBASE(_i) (0x000C6200 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANTXBASE_FPMLANTXBASE_SHIFT 0
+#define I40E_GLHMC_LANTXBASE_FPMLANTXBASE_MASK I40E_MASK(0xFFFFFF, I40E_GLHMC_LANTXBASE_FPMLANTXBASE_SHIFT)
+#define I40E_GLHMC_LANTXCNT(_i) (0x000C6300 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLHMC_LANTXOBJSZ 0x000C2004 /* Reset: CORER */
+#define I40E_PFHMC_ERRORDATA 0x000C0500 /* Reset: PFR */
+#define I40E_PFHMC_ERRORINFO 0x000C0400 /* Reset: PFR */
+#define I40E_PFHMC_PDINV 0x000C0300 /* Reset: PFR */
+#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
+#define I40E_PFHMC_SDCMD 0x000C0000 /* Reset: PFR */
+#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT 31
+#define I40E_PFHMC_SDDATAHIGH 0x000C0200 /* Reset: PFR */
+#define I40E_PFHMC_SDDATALOW 0x000C0100 /* Reset: PFR */
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT 0
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT 1
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_PFGEN_PORTMDIO_NUM 0x0003F100 /* Reset: CORER */
+#define I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_SHIFT 4
+#define I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK I40E_MASK(0x1, I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_SHIFT)
+#define I40E_PFINT_AEQCTL 0x00038700 /* Reset: CORER */
+#define I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_PFINT_AEQCTL_ITR_INDX_SHIFT 11
+#define I40E_PFINT_AEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_PFINT_AEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_PFINT_AEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_PFINT_CEQCTL(_INTPF) (0x00036800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_PFINT_CEQCTL_ITR_INDX_SHIFT 11
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_PFINT_CEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_GLINT_CTL 0x0003F800 /* Reset: CORER */
+#define I40E_GLINT_CTL_DIS_AUTOMASK_VF0_SHIFT 1
+#define I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK I40E_MASK(0x1, I40E_GLINT_CTL_DIS_AUTOMASK_VF0_SHIFT)
+#define I40E_PFINT_DYN_CTL0 0x00038480 /* Reset: PFR */
+#define I40E_PFINT_DYN_CTL0_INTENA_SHIFT 0
+#define I40E_PFINT_DYN_CTL0_INTENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_CLEARPBA_SHIFT 1
+#define I40E_PFINT_DYN_CTL0_CLEARPBA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SWINT_TRIG_SHIFT 2
+#define I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_SWINT_TRIG_SHIFT)
+#define I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT 3
+#define I40E_PFINT_DYN_CTL0_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTL0_INTENA_MSK_SHIFT 31
+#define I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_INTENA_MSK_SHIFT)
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT 0
+#define I40E_PFINT_DYN_CTLN_INTENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SWINT_TRIG_SHIFT 2
+#define I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SWINT_TRIG_SHIFT)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
+#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
+#define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_0_SHIFT 1
+#define I40E_PFINT_ICR0_QUEUE_0_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_QUEUE_0_SHIFT)
+#define I40E_PFINT_ICR0_ECC_ERR_SHIFT 16
+#define I40E_PFINT_ICR0_ECC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ECC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_MAL_DETECT_SHIFT 19
+#define I40E_PFINT_ICR0_MAL_DETECT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_MAL_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_GRST_SHIFT 20
+#define I40E_PFINT_ICR0_GRST_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_GRST_SHIFT)
+#define I40E_PFINT_ICR0_PCI_EXCEPTION_SHIFT 21
+#define I40E_PFINT_ICR0_PCI_EXCEPTION_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_PCI_EXCEPTION_SHIFT)
+#define I40E_PFINT_ICR0_TIMESYNC_SHIFT 23
+#define I40E_PFINT_ICR0_TIMESYNC_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_TIMESYNC_SHIFT)
+#define I40E_PFINT_ICR0_HMC_ERR_SHIFT 26
+#define I40E_PFINT_ICR0_HMC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_HMC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_PE_CRITERR_SHIFT 28
+#define I40E_PFINT_ICR0_PE_CRITERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_PE_CRITERR_SHIFT)
+#define I40E_PFINT_ICR0_VFLR_SHIFT 29
+#define I40E_PFINT_ICR0_VFLR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_VFLR_SHIFT)
+#define I40E_PFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_PFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_PFINT_ICR0_SWINT_SHIFT 31
+#define I40E_PFINT_ICR0_SWINT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_SWINT_SHIFT)
+#define I40E_PFINT_ICR0_ENA 0x00038800 /* Reset: CORER */
+#define I40E_PFINT_ICR0_ENA_ECC_ERR_SHIFT 16
+#define I40E_PFINT_ICR0_ENA_ECC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_ECC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_MAL_DETECT_SHIFT 19
+#define I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_MAL_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_ENA_GRST_SHIFT 20
+#define I40E_PFINT_ICR0_ENA_GRST_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_GRST_SHIFT)
+#define I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_SHIFT 21
+#define I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_SHIFT)
+#define I40E_PFINT_ICR0_ENA_GPIO_SHIFT 22
+#define I40E_PFINT_ICR0_ENA_GPIO_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_GPIO_SHIFT)
+#define I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT 23
+#define I40E_PFINT_ICR0_ENA_TIMESYNC_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT)
+#define I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT 26
+#define I40E_PFINT_ICR0_ENA_HMC_ERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_PE_CRITERR_SHIFT 28
+#define I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_PE_CRITERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_VFLR_SHIFT 29
+#define I40E_PFINT_ICR0_ENA_VFLR_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_VFLR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_ADMINQ_SHIFT 30
+#define I40E_PFINT_ICR0_ENA_ADMINQ_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_ENA_ADMINQ_SHIFT)
+#define I40E_PFINT_ITR0(_i) (0x00038000 + ((_i) * 128)) /* _i=0...2 */ /* Reset: PFR */
+#define I40E_PFINT_ITRN(_i, _INTPF) (0x00030000 + ((_i) * 2048 + (_INTPF) * 4)) /* _i=0...2, _INTPF=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_LNKLST0 0x00038500 /* Reset: PFR */
+#define I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT 0
+#define I40E_PFINT_LNKLSTN(_INTPF) (0x00035000 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT 0
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK I40E_MASK(0x7FF, I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT)
+#define I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT 11
+#define I40E_PFINT_RATEN(_INTPF) (0x00035800 + ((_INTPF) * 4)) /* _i=0...511 */ /* Reset: PFR */
+#define I40E_PFINT_STAT_CTL0 0x00038400 /* Reset: CORER */
+#define I40E_QINT_RQCTL(_Q) (0x0003A000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QINT_RQCTL_MSIX_INDX_SHIFT 0
+#define I40E_QINT_RQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_QINT_RQCTL_MSIX_INDX_SHIFT)
+#define I40E_QINT_RQCTL_ITR_INDX_SHIFT 11
+#define I40E_QINT_RQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_QINT_RQCTL_ITR_INDX_SHIFT)
+#define I40E_QINT_RQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_QINT_RQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_QINT_RQCTL_MSIX0_INDX_SHIFT)
+#define I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_QINT_RQCTL_NEXTQ_INDX_MASK I40E_MASK(0x7FF, I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_QINT_RQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_QINT_RQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_QINT_RQCTL_CAUSE_ENA_SHIFT)
+#define I40E_QINT_RQCTL_INTEVENT_SHIFT 31
+#define I40E_QINT_RQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_QINT_RQCTL_INTEVENT_SHIFT)
+#define I40E_QINT_TQCTL(_Q) (0x0003C000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QINT_TQCTL_MSIX_INDX_SHIFT 0
+#define I40E_QINT_TQCTL_MSIX_INDX_MASK I40E_MASK(0xFF, I40E_QINT_TQCTL_MSIX_INDX_SHIFT)
+#define I40E_QINT_TQCTL_ITR_INDX_SHIFT 11
+#define I40E_QINT_TQCTL_ITR_INDX_MASK I40E_MASK(0x3, I40E_QINT_TQCTL_ITR_INDX_SHIFT)
+#define I40E_QINT_TQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_QINT_TQCTL_MSIX0_INDX_MASK I40E_MASK(0x7, I40E_QINT_TQCTL_MSIX0_INDX_SHIFT)
+#define I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_QINT_TQCTL_NEXTQ_INDX_MASK I40E_MASK(0x7FF, I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_QINT_TQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_QINT_TQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_QINT_TQCTL_CAUSE_ENA_SHIFT)
+#define I40E_QINT_TQCTL_INTEVENT_SHIFT 31
+#define I40E_QINT_TQCTL_INTEVENT_MASK I40E_MASK(0x1, I40E_QINT_TQCTL_INTEVENT_SHIFT)
+#define I40E_VFINT_DYN_CTL0(_VF) (0x0002A400 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+#define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11
+#define I40E_VPINT_AEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_VPINT_AEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_VPINT_AEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_VPINT_CEQCTL(_INTVF) (0x00026800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_VPINT_CEQCTL_ITR_INDX_SHIFT 11
+#define I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK I40E_MASK(0x7FF, I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK I40E_MASK(0x3, I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_VPINT_CEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_VPINT_CEQCTL_CAUSE_ENA_MASK I40E_MASK(0x1, I40E_VPINT_CEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_VPINT_LNKLST0(_VF) (0x0002A800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT 0
+#define I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK I40E_MASK(0x7FF, I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT)
+#define I40E_VPINT_LNKLSTN(_INTVF) (0x00025000 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */
+#define I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT 0
+#define I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK I40E_MASK(0x7FF, I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT)
+#define I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT 11
+#define I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK I40E_MASK(0x3, I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT)
+#define I40E_GLLAN_RCTL_0 0x0012A500 /* Reset: CORER */
+#define I40E_GLLAN_RCTL_0_PXE_MODE_SHIFT 0
+#define I40E_GLLAN_RCTL_0_PXE_MODE_MASK I40E_MASK(0x1, I40E_GLLAN_RCTL_0_PXE_MODE_SHIFT)
+#define I40E_GLLAN_TSOMSK_F 0x000442D8 /* Reset: CORER */
+#define I40E_GLLAN_TSOMSK_L 0x000442E0 /* Reset: CORER */
+#define I40E_GLLAN_TSOMSK_M 0x000442DC /* Reset: CORER */
+#define I40E_GLLAN_TXPRE_QDIS(_i) (0x000e6500 + ((_i) * 4)) /* _i=0...11 */ /* Reset: CORER */
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT 0
+#define I40E_GLLAN_TXPRE_QDIS_QINDX_MASK I40E_MASK(0x7FF, I40E_GLLAN_TXPRE_QDIS_QINDX_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT 30
+#define I40E_GLLAN_TXPRE_QDIS_SET_QDIS_MASK I40E_MASK(0x1, I40E_GLLAN_TXPRE_QDIS_SET_QDIS_SHIFT)
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT 31
+#define I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_MASK I40E_MASK(0x1u, I40E_GLLAN_TXPRE_QDIS_CLEAR_QDIS_SHIFT)
+#define I40E_PFLAN_QALLOC 0x001C0400 /* Reset: CORER */
+#define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
+#define I40E_PFLAN_QALLOC_FIRSTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)
+#define I40E_PFLAN_QALLOC_LASTQ_SHIFT 16
+#define I40E_PFLAN_QALLOC_LASTQ_MASK I40E_MASK(0x7FF, I40E_PFLAN_QALLOC_LASTQ_SHIFT)
+#define I40E_PFLAN_QALLOC_VALID_SHIFT 31
+#define I40E_PFLAN_QALLOC_VALID_MASK I40E_MASK(0x1u, I40E_PFLAN_QALLOC_VALID_SHIFT)
+#define I40E_QRX_ENA(_Q) (0x00120000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
+#define I40E_QRX_ENA_QENA_REQ_SHIFT 0
+#define I40E_QRX_ENA_QENA_REQ_MASK I40E_MASK(0x1, I40E_QRX_ENA_QENA_REQ_SHIFT)
+#define I40E_QRX_ENA_QENA_STAT_SHIFT 2
+#define I40E_QRX_ENA_QENA_STAT_MASK I40E_MASK(0x1, I40E_QRX_ENA_QENA_STAT_SHIFT)
+#define I40E_QRX_TAIL(_Q) (0x00128000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QTX_CTL(_Q) (0x00104000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QTX_CTL_PFVF_Q_SHIFT 0
+#define I40E_QTX_CTL_PFVF_Q_MASK I40E_MASK(0x3, I40E_QTX_CTL_PFVF_Q_SHIFT)
+#define I40E_QTX_CTL_PF_INDX_SHIFT 2
+#define I40E_QTX_CTL_PF_INDX_MASK I40E_MASK(0xF, I40E_QTX_CTL_PF_INDX_SHIFT)
+#define I40E_QTX_CTL_VFVM_INDX_SHIFT 7
+#define I40E_QTX_CTL_VFVM_INDX_MASK I40E_MASK(0x1FF, I40E_QTX_CTL_VFVM_INDX_SHIFT)
+#define I40E_QTX_ENA(_Q) (0x00100000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
+#define I40E_QTX_ENA_QENA_REQ_SHIFT 0
+#define I40E_QTX_ENA_QENA_REQ_MASK I40E_MASK(0x1, I40E_QTX_ENA_QENA_REQ_SHIFT)
+#define I40E_QTX_ENA_QENA_STAT_SHIFT 2
+#define I40E_QTX_ENA_QENA_STAT_MASK I40E_MASK(0x1, I40E_QTX_ENA_QENA_STAT_SHIFT)
+#define I40E_QTX_HEAD(_Q) (0x000E4000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: CORER */
+#define I40E_QTX_TAIL(_Q) (0x00108000 + ((_Q) * 4)) /* _i=0...1535 */ /* Reset: PFR */
+#define I40E_VPLAN_MAPENA(_VF) (0x00074000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: VFR */
+#define I40E_VPLAN_MAPENA_TXRX_ENA_SHIFT 0
+#define I40E_VPLAN_MAPENA_TXRX_ENA_MASK I40E_MASK(0x1, I40E_VPLAN_MAPENA_TXRX_ENA_SHIFT)
+#define I40E_VPLAN_QTABLE(_i, _VF) (0x00070000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: VFR */
+#define I40E_VPLAN_QTABLE_QINDEX_SHIFT 0
+#define I40E_VPLAN_QTABLE_QINDEX_MASK I40E_MASK(0x7FF, I40E_VPLAN_QTABLE_QINDEX_SHIFT)
+#define I40E_VSILAN_QBASE(_VSI) (0x0020C800 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: PFR */
+#define I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT 11
+#define I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK I40E_MASK(0x1, I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT)
+#define I40E_VSILAN_QTABLE(_i, _VSI) (0x00200000 + ((_i) * 2048 + (_VSI) * 4)) /* _i=0...7, _VSI=0...383 */ /* Reset: PFR */
+#define I40E_PRTGL_SAH 0x001E2140 /* Reset: GLOBR */
+#define I40E_PRTGL_SAH_FC_SAH_SHIFT 0
+#define I40E_PRTGL_SAH_FC_SAH_MASK I40E_MASK(0xFFFF, I40E_PRTGL_SAH_FC_SAH_SHIFT)
+#define I40E_PRTGL_SAH_MFS_SHIFT 16
+#define I40E_PRTGL_SAH_MFS_MASK I40E_MASK(0xFFFF, I40E_PRTGL_SAH_MFS_SHIFT)
+#define I40E_PRTGL_SAL 0x001E2120 /* Reset: GLOBR */
+#define I40E_PRTGL_SAL_FC_SAL_SHIFT 0
+#define I40E_PRTGL_SAL_FC_SAL_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTGL_SAL_FC_SAL_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP 0x001E3260 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_MASK I40E_MASK(0x1, \
+	I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP 0x001E32E0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_MASK I40E_MASK(0x1, \
+	I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE 0x001E30C0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_MASK I40E_MASK(0x1FF, \
+	I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE 0x001E30D0 /* Reset: GLOBR */
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_MASK I40E_MASK(0x1FF, \
+	I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3400 + ((_i) * 16)) /* _i=0...8 */
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MAX_INDEX 8
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK I40E_MASK(0xFFFF, \
+	I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT)
+#define I40E_GLNVM_FLA 0x000B6108 /* Reset: POR */
+#define I40E_GLNVM_FLA_LOCKED_SHIFT 6
+#define I40E_GLNVM_FLA_LOCKED_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_LOCKED_SHIFT)
+#define I40E_GLNVM_GENS 0x000B6100 /* Reset: POR */
+#define I40E_GLNVM_GENS_SR_SIZE_SHIFT 5
+#define I40E_GLNVM_GENS_SR_SIZE_MASK I40E_MASK(0x7, I40E_GLNVM_GENS_SR_SIZE_SHIFT)
+#define I40E_GLNVM_SRCTL 0x000B6110 /* Reset: POR */
+#define I40E_GLNVM_SRCTL_ADDR_SHIFT 14
+#define I40E_GLNVM_SRCTL_START_SHIFT 30
+#define I40E_GLNVM_SRCTL_DONE_SHIFT 31
+#define I40E_GLNVM_SRCTL_DONE_MASK I40E_MASK(0x1u, I40E_GLNVM_SRCTL_DONE_SHIFT)
+#define I40E_GLNVM_SRDATA 0x000B6114 /* Reset: POR */
+#define I40E_GLNVM_SRDATA_RDDATA_SHIFT 16
+#define I40E_GLNVM_SRDATA_RDDATA_MASK I40E_MASK(0xFFFF, I40E_GLNVM_SRDATA_RDDATA_SHIFT)
+#define I40E_GLNVM_ULD 0x000B6008 /* Reset: POR */
+#define I40E_GLNVM_ULD_CONF_CORE_DONE_SHIFT 3
+#define I40E_GLNVM_ULD_CONF_CORE_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_CORE_DONE_SHIFT)
+#define I40E_GLNVM_ULD_CONF_GLOBAL_DONE_SHIFT 4
+#define I40E_GLNVM_ULD_CONF_GLOBAL_DONE_MASK I40E_MASK(0x1, I40E_GLNVM_ULD_CONF_GLOBAL_DONE_SHIFT)
+#define I40E_GLPCI_CAPSUP 0x000BE4A8 /* Reset: PCIR */
+#define I40E_GLPCI_CAPSUP_ARI_EN_SHIFT 4
+#define I40E_GLPCI_CAPSUP_ARI_EN_MASK I40E_MASK(0x1, I40E_GLPCI_CAPSUP_ARI_EN_SHIFT)
+#define I40E_GLPCI_CNF2 0x000BE494 /* Reset: PCIR */
+#define I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT 2
+#define I40E_GLPCI_CNF2_MSI_X_PF_N_MASK I40E_MASK(0x7FF, I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT)
+#define I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT 13
+#define I40E_GLPCI_CNF2_MSI_X_VF_N_MASK I40E_MASK(0x7FF, I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT)
+#define I40E_GLPCI_LBARCTRL 0x000BE484 /* Reset: POR */
+#define I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT 6
+#define I40E_GLPCI_LBARCTRL_FL_SIZE_MASK I40E_MASK(0x7, I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT)
+#define I40E_PF_FUNC_RID 0x0009C000 /* Reset: PCIR */
+#define I40E_PF_PCI_CIAA 0x0009C080 /* Reset: FLR */
+#define I40E_PF_PCI_CIAA_VF_NUM_SHIFT 12
+#define I40E_PF_PCI_CIAD 0x0009C100 /* Reset: FLR */
+#define I40E_PRTPM_EEE_STAT 0x001E4320 /* Reset: GLOBR */
+#define I40E_PFPCI_SUBSYSID 0x000BE100 /* Reset: PCIR */
+#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT 30
+#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT)
+#define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31
+#define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK I40E_MASK(0x1, I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT)
+#define I40E_PRTPM_EEER_TX_LPI_EN_SHIFT 16
+#define I40E_PRTPM_EEER_TX_LPI_EN_MASK  I40E_MASK(0x1, I40E_PRTPM_EEER_TX_LPI_EN_SHIFT)
+#define I40E_PRTPM_RLPIC 0x001E43A0 /* Reset: GLOBR */
+#define I40E_PRTPM_TLPIC 0x001E43C0 /* Reset: GLOBR */
+#define I40E_PRTRPB_DHW(_i) (0x000AC100 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_DHW_DHW_TCN_SHIFT 0
+#define I40E_PRTRPB_DHW_DHW_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DHW_DHW_TCN_SHIFT)
+#define I40E_PRTRPB_DLW(_i) (0x000AC220 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_DLW_DLW_TCN_SHIFT 0
+#define I40E_PRTRPB_DLW_DLW_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DLW_DLW_TCN_SHIFT)
+#define I40E_PRTRPB_DPS(_i) (0x000AC320 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_DPS_DPS_TCN_SHIFT 0
+#define I40E_PRTRPB_DPS_DPS_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_DPS_DPS_TCN_SHIFT)
+#define I40E_PRTRPB_SHT(_i) (0x000AC480 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_SHT_SHT_TCN_SHIFT 0
+#define I40E_PRTRPB_SHT_SHT_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SHT_SHT_TCN_SHIFT)
+#define I40E_PRTRPB_SHW 0x000AC580 /* Reset: CORER */
+#define I40E_PRTRPB_SHW_SHW_SHIFT 0
+#define I40E_PRTRPB_SHW_SHW_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SHW_SHW_SHIFT)
+#define I40E_PRTRPB_SLT(_i) (0x000AC5A0 + ((_i) * 32)) /* _i=0...7 */ /* Reset: CORER */
+#define I40E_PRTRPB_SLT_SLT_TCN_SHIFT 0
+#define I40E_PRTRPB_SLT_SLT_TCN_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SLT_SLT_TCN_SHIFT)
+#define I40E_PRTRPB_SLW 0x000AC6A0 /* Reset: CORER */
+#define I40E_PRTRPB_SLW_SLW_SHIFT 0
+#define I40E_PRTRPB_SLW_SLW_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SLW_SLW_SHIFT)
+#define I40E_PRTRPB_SPS 0x000AC7C0 /* Reset: CORER */
+#define I40E_PRTRPB_SPS_SPS_SHIFT 0
+#define I40E_PRTRPB_SPS_SPS_MASK I40E_MASK(0xFFFFF, I40E_PRTRPB_SPS_SPS_SHIFT)
+#define I40E_GLQF_FDCNT_0 0x00269BAC /* Reset: CORER */
+#define I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT 0
+#define I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK I40E_MASK(0x1FFF, I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT)
+#define I40E_GLQF_FDCNT_0_BESTCNT_SHIFT 13
+#define I40E_GLQF_FDCNT_0_BESTCNT_MASK I40E_MASK(0x1FFF, I40E_GLQF_FDCNT_0_BESTCNT_SHIFT)
+#define I40E_GLQF_HKEY(_i) (0x00270140 + ((_i) * 4)) /* _i=0...12 */ /* Reset: CORER */
+#define I40E_GLQF_HKEY_MAX_INDEX 12
+#define I40E_GLQF_PCNT(_i) (0x00266800 + ((_i) * 4)) /* _i=0...511 */ /* Reset: CORER */
+#define I40E_PFQF_CTL_0 0x001C0AC0 /* Reset: CORER */
+#define I40E_PFQF_CTL_0_PEHSIZE_SHIFT 0
+#define I40E_PFQF_CTL_0_PEHSIZE_MASK I40E_MASK(0x1F, I40E_PFQF_CTL_0_PEHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PEDSIZE_SHIFT 5
+#define I40E_PFQF_CTL_0_PEDSIZE_MASK I40E_MASK(0x1F, I40E_PFQF_CTL_0_PEDSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT 10
+#define I40E_PFQF_CTL_0_PFFCHSIZE_MASK I40E_MASK(0xF, I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT 14
+#define I40E_PFQF_CTL_0_PFFCDSIZE_MASK I40E_MASK(0x3, I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT 16
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_FD_ENA_SHIFT 17
+#define I40E_PFQF_CTL_0_FD_ENA_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_FD_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_ETYPE_ENA_SHIFT 18
+#define I40E_PFQF_CTL_0_ETYPE_ENA_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_ETYPE_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_MACVLAN_ENA_SHIFT 19
+#define I40E_PFQF_CTL_0_MACVLAN_ENA_MASK I40E_MASK(0x1, I40E_PFQF_CTL_0_MACVLAN_ENA_SHIFT)
+#define I40E_PFQF_CTL_1 0x00245D80 /* Reset: CORER */
+#define I40E_PFQF_CTL_1_CLEARFDTABLE_SHIFT 0
+#define I40E_PFQF_CTL_1_CLEARFDTABLE_MASK I40E_MASK(0x1, I40E_PFQF_CTL_1_CLEARFDTABLE_SHIFT)
+#define I40E_PFQF_FDSTAT 0x00246380 /* Reset: CORER */
+#define I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT 0
+#define I40E_PFQF_FDSTAT_GUARANT_CNT_MASK I40E_MASK(0x1FFF, I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT)
+#define I40E_PFQF_FDSTAT_BEST_CNT_SHIFT 16
+#define I40E_PFQF_FDSTAT_BEST_CNT_MASK I40E_MASK(0x1FFF, I40E_PFQF_FDSTAT_BEST_CNT_SHIFT)
+#define I40E_PFQF_HENA(_i) (0x00245900 + ((_i) * 128)) /* _i=0...1 */ /* Reset: CORER */
+#define I40E_PFQF_HKEY(_i) (0x00244800 + ((_i) * 128)) /* _i=0...12 */ /* Reset: CORER */
+#define I40E_PFQF_HKEY_MAX_INDEX 12
+#define I40E_PFQF_HLUT(_i) (0x00240000 + ((_i) * 128)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_PFQF_HLUT_MAX_INDEX 127
+#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */
+#define I40E_PRTQF_FD_INSET_MAX_INDEX 63
+#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0
+#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT)
+#define I40E_PRTQF_FD_INSET(_i, _j) (0x00250000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */ /* Reset: CORER */
+#define I40E_PRTQF_FD_INSET_MAX_INDEX 63
+#define I40E_PRTQF_FD_INSET_INSET_SHIFT 0
+#define I40E_PRTQF_FD_INSET_INSET_MASK I40E_MASK(0xFFFFFFFF, I40E_PRTQF_FD_INSET_INSET_SHIFT)
+#define I40E_PRTQF_FLX_PIT(_i) (0x00255200 + ((_i) * 32)) /* _i=0...8 */ /* Reset: CORER */
+#define I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT 0
+#define I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK I40E_MASK(0x1F, I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT)
+#define I40E_PRTQF_FLX_PIT_FSIZE_SHIFT 5
+#define I40E_PRTQF_FLX_PIT_FSIZE_MASK I40E_MASK(0x1F, I40E_PRTQF_FLX_PIT_FSIZE_SHIFT)
+#define I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT 10
+#define I40E_PRTQF_FLX_PIT_DEST_OFF_MASK I40E_MASK(0x3F, I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT)
+#define I40E_VFQF_HENA1(_i, _VF) (0x00230800 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...1, _VF=0...127 */ /* Reset: CORER */
+#define I40E_VFQF_HKEY1(_i, _VF) (0x00228000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...12, _VF=0...127 */ /* Reset: CORER */
+#define I40E_VFQF_HKEY1_MAX_INDEX 12
+#define I40E_VFQF_HLUT1(_i, _VF) (0x00220000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: CORER */
+#define I40E_VFQF_HLUT1_MAX_INDEX 15
+#define I40E_GL_RXERR1H(_i)             (0x00318004 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_RXERR1H_MAX_INDEX       143
+#define I40E_GL_RXERR1H_RXERR1H_SHIFT   0
+#define I40E_GL_RXERR1H_RXERR1H_MASK    I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1H_RXERR1H_SHIFT)
+#define I40E_GL_RXERR1L(_i)             (0x00318000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_RXERR1L_MAX_INDEX       143
+#define I40E_GL_RXERR1L_RXERR1L_SHIFT   0
+#define I40E_GL_RXERR1L_RXERR1L_MASK    I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1L_RXERR1L_SHIFT)
+#define I40E_GLPRT_BPRCH(_i) (0x003005E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_BPRCL(_i) (0x003005E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_BPTCH(_i) (0x00300A04 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_BPTCL(_i) (0x00300A00 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_CRCERRS(_i) (0x00300080 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GORCH(_i) (0x00300004 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GORCL(_i) (0x00300000 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GOTCH(_i) (0x00300684 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_GOTCL(_i) (0x00300680 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_ILLERRC(_i) (0x003000E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXOFFRXC(_i) (0x00300160 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXOFFTXC(_i) (0x003009A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXONRXC(_i) (0x00300140 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_LXONTXC(_i) (0x00300980 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MLFC(_i) (0x00300020 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPRCH(_i) (0x003005C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPRCL(_i) (0x003005C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPTCH(_i) (0x003009E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MPTCL(_i) (0x003009E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_MRFC(_i) (0x00300040 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1023H(_i) (0x00300504 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1023L(_i) (0x00300500 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC127H(_i) (0x003004A4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC127L(_i) (0x003004A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1522H(_i) (0x00300524 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC1522L(_i) (0x00300520 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC255H(_i) (0x003004C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC255L(_i) (0x003004C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC511H(_i) (0x003004E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC511L(_i) (0x003004E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC64H(_i) (0x00300484 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC64L(_i) (0x00300480 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC9522H(_i) (0x00300544 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PRC9522L(_i) (0x00300540 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1023H(_i) (0x00300724 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1023L(_i) (0x00300720 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC127H(_i) (0x003006C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC127L(_i) (0x003006C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1522H(_i) (0x00300744 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC1522L(_i) (0x00300740 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC255H(_i) (0x003006E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC255L(_i) (0x003006E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC511H(_i) (0x00300704 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC511L(_i) (0x00300700 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC64H(_i) (0x003006A4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC64L(_i) (0x003006A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC9522H(_i) (0x00300764 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PTC9522L(_i) (0x00300760 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_PXOFFRXC(_i, _j) (0x00300280 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_PXOFFTXC(_i, _j) (0x00300880 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_PXONRXC(_i, _j) (0x00300180 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_PXONTXC(_i, _j) (0x00300780 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_RDPC(_i) (0x00300600 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RFC(_i) (0x00300560 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RJC(_i) (0x00300580 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RLEC(_i) (0x003000A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_ROC(_i) (0x00300120 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RUC(_i) (0x00300100 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_RXON2OFFCNT(_i, _j) (0x00300380 + ((_i) * 8 + (_j) * 32)) /* _i=0...3, _j=0...7 */ /* Reset: CORER */
+#define I40E_GLPRT_TDOLD(_i) (0x00300A20 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPRCH(_i) (0x003005A4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPRCL(_i) (0x003005A0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPTCH(_i) (0x003009C4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLPRT_UPTCL(_i) (0x003009C0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_GLSW_BPRCH(_i) (0x00370104 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_BPRCL(_i) (0x00370100 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_BPTCH(_i) (0x00340104 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_BPTCL(_i) (0x00340100 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GORCH(_i) (0x0035C004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GORCL(_i) (0x0035c000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GOTCH(_i) (0x0032C004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_GOTCL(_i) (0x0032c000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPRCH(_i) (0x00370084 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPRCL(_i) (0x00370080 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPTCH(_i) (0x00340084 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_MPTCL(_i) (0x00340080 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_RUPP(_i) (0x00370180 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_TDPC(_i) (0x00348000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPRCH(_i) (0x00370004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPRCL(_i) (0x00370000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPTCH(_i) (0x00340004 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLSW_UPTCL(_i) (0x00340000 + ((_i) * 8)) /* _i=0...15 */ /* Reset: CORER */
+#define I40E_GLV_BPRCH(_i) (0x0036D804 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_BPRCL(_i) (0x0036d800 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_BPTCH(_i) (0x0033D804 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_BPTCL(_i) (0x0033d800 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GORCH(_i) (0x00358004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GORCL(_i) (0x00358000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GOTCH(_i) (0x00328004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_GOTCL(_i) (0x00328000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPRCH(_i) (0x0036CC04 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPRCL(_i) (0x0036cc00 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPTCH(_i) (0x0033CC04 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_MPTCL(_i) (0x0033cc00 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_RDPC(_i) (0x00310000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_RUPP(_i) (0x0036E400 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_TEPC(_i) (0x00344000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPRCH(_i) (0x0036C004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPRCL(_i) (0x0036c000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPTCH(_i) (0x0033C004 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_UPTCL(_i) (0x0033c000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RBCH(_i, _j) (0x00364004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RBCL(_i, _j) (0x00364000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RPCH(_i, _j) (0x00368004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_RPCL(_i, _j) (0x00368000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TBCH(_i, _j) (0x00334004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TBCL(_i, _j) (0x00334000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TPCH(_i, _j) (0x00338004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_GLVEBTC_TPCL(_i, _j) (0x00338000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */ /* Reset: CORER */
+#define I40E_PRTTSYN_CTL0 0x001E4200 /* Reset: GLOBR */
+#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT 1
+#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT 2
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_PF_ID_SHIFT 8
+#define I40E_PRTTSYN_CTL0_PF_ID_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL0_PF_ID_SHIFT)
+#define I40E_PRTTSYN_CTL0_TSYNENA_SHIFT 31
+#define I40E_PRTTSYN_CTL0_TSYNENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL0_TSYNENA_SHIFT)
+#define I40E_PRTTSYN_CTL1 0x00085020 /* Reset: CORER */
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE0_SHIFT 0
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK I40E_MASK(0xFF, I40E_PRTTSYN_CTL1_V1MESSTYPE0_SHIFT)
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE0_SHIFT 16
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK I40E_MASK(0xF, I40E_PRTTSYN_CTL1_V2MESSTYPE0_SHIFT)
+#define I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT 24
+#define I40E_PRTTSYN_CTL1_UDP_ENA_SHIFT 26
+#define I40E_PRTTSYN_CTL1_UDP_ENA_MASK I40E_MASK(0x3, I40E_PRTTSYN_CTL1_UDP_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL1_TSYNENA_SHIFT 31
+#define I40E_PRTTSYN_CTL1_TSYNENA_MASK I40E_MASK(0x1, I40E_PRTTSYN_CTL1_TSYNENA_SHIFT)
+#define I40E_PRTTSYN_INC_H 0x001E4060 /* Reset: GLOBR */
+#define I40E_PRTTSYN_INC_L 0x001E4040 /* Reset: GLOBR */
+#define I40E_PRTTSYN_RXTIME_H(_i) (0x00085040 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_PRTTSYN_RXTIME_L(_i) (0x000850C0 + ((_i) * 32)) /* _i=0...3 */ /* Reset: CORER */
+#define I40E_PRTTSYN_RXTIME_L_MAX_INDEX 3
+#define I40E_PRTTSYN_STAT_0 0x001E4220 /* Reset: GLOBR */
+#define I40E_PRTTSYN_STAT_0_EVENT0_SHIFT 0
+#define I40E_PRTTSYN_STAT_0_EVENT0_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_EVENT0_SHIFT)
+#define I40E_PRTTSYN_STAT_0_TXTIME_SHIFT 4
+#define I40E_PRTTSYN_STAT_0_TXTIME_MASK I40E_MASK(0x1, I40E_PRTTSYN_STAT_0_TXTIME_SHIFT)
+#define I40E_PRTTSYN_STAT_1 0x00085140 /* Reset: CORER */
+#define I40E_PRTTSYN_TIME_H 0x001E4120 /* Reset: GLOBR */
+#define I40E_PRTTSYN_TIME_L 0x001E4100 /* Reset: GLOBR */
+#define I40E_PRTTSYN_TXTIME_H 0x001E41E0 /* Reset: GLOBR */
+#define I40E_PRTTSYN_TXTIME_L 0x001E41C0 /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_H(_i) (0x001E40C0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_EVNT_L(_i) (0x001E4080 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_0(_i) (0x001E42A0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT 0
+#define I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT 1
+#define I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT 16
+#define I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT 17
+#define I40E_PRTTSYN_AUX_0_PTPFLAG_MASK I40E_MASK(0x1, I40E_PRTTSYN_AUX_0_PTPFLAG_SHIFT)
+#define I40E_PRTTSYN_AUX_1(_i) (0x001E42E0 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_AUX_1_INSTNT_SHIFT 0
+#define I40E_PRTTSYN_TGT_H(_i) (0x001E4180 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_TGT_L(_i) (0x001E4140 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_CLKO(_i) (0x001E4240 + ((_i) * 32)) /* _i=0...1 */ /* Reset: GLOBR */
+#define I40E_PRTTSYN_ADJ 0x001E4280 /* Reset: GLOBR */
+#define I40E_GL_MDET_RX 0x0012A510 /* Reset: CORER */
+#define I40E_GL_MDET_RX_FUNCTION_SHIFT 0
+#define I40E_GL_MDET_RX_FUNCTION_MASK I40E_MASK(0xFF, I40E_GL_MDET_RX_FUNCTION_SHIFT)
+#define I40E_GL_MDET_RX_EVENT_SHIFT 8
+#define I40E_GL_MDET_RX_EVENT_MASK I40E_MASK(0x1FF, I40E_GL_MDET_RX_EVENT_SHIFT)
+#define I40E_GL_MDET_RX_QUEUE_SHIFT 17
+#define I40E_GL_MDET_RX_QUEUE_MASK I40E_MASK(0x3FFF, I40E_GL_MDET_RX_QUEUE_SHIFT)
+#define I40E_GL_MDET_RX_VALID_SHIFT 31
+#define I40E_GL_MDET_RX_VALID_MASK I40E_MASK(0x1, I40E_GL_MDET_RX_VALID_SHIFT)
+#define I40E_GL_MDET_TX 0x000E6480 /* Reset: CORER */
+#define I40E_GL_MDET_TX_QUEUE_SHIFT 0
+#define I40E_GL_MDET_TX_QUEUE_MASK I40E_MASK(0xFFF, I40E_GL_MDET_TX_QUEUE_SHIFT)
+#define I40E_GL_MDET_TX_VF_NUM_SHIFT 12
+#define I40E_GL_MDET_TX_VF_NUM_MASK I40E_MASK(0x1FF, I40E_GL_MDET_TX_VF_NUM_SHIFT)
+#define I40E_GL_MDET_TX_PF_NUM_SHIFT 21
+#define I40E_GL_MDET_TX_PF_NUM_MASK I40E_MASK(0xF, I40E_GL_MDET_TX_PF_NUM_SHIFT)
+#define I40E_GL_MDET_TX_EVENT_SHIFT 25
+#define I40E_GL_MDET_TX_EVENT_MASK I40E_MASK(0x1F, I40E_GL_MDET_TX_EVENT_SHIFT)
+#define I40E_GL_MDET_TX_VALID_SHIFT 31
+#define I40E_GL_MDET_TX_VALID_MASK I40E_MASK(0x1, I40E_GL_MDET_TX_VALID_SHIFT)
+#define I40E_PF_MDET_RX 0x0012A400 /* Reset: CORER */
+#define I40E_PF_MDET_RX_VALID_SHIFT 0
+#define I40E_PF_MDET_RX_VALID_MASK I40E_MASK(0x1, I40E_PF_MDET_RX_VALID_SHIFT)
+#define I40E_PF_MDET_TX 0x000E6400 /* Reset: CORER */
+#define I40E_PF_MDET_TX_VALID_SHIFT 0
+#define I40E_PF_MDET_TX_VALID_MASK I40E_MASK(0x1, I40E_PF_MDET_TX_VALID_SHIFT)
+#define I40E_PF_VT_PFALLOC 0x001C0500 /* Reset: CORER */
+#define I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT 0
+#define I40E_PF_VT_PFALLOC_FIRSTVF_MASK I40E_MASK(0xFF, I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT)
+#define I40E_PF_VT_PFALLOC_LASTVF_SHIFT 8
+#define I40E_PF_VT_PFALLOC_LASTVF_MASK I40E_MASK(0xFF, I40E_PF_VT_PFALLOC_LASTVF_SHIFT)
+#define I40E_PF_VT_PFALLOC_VALID_SHIFT 31
+#define I40E_PF_VT_PFALLOC_VALID_MASK I40E_MASK(0x1u, I40E_PF_VT_PFALLOC_VALID_SHIFT)
+#define I40E_VP_MDET_RX(_VF) (0x0012A000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VP_MDET_RX_VALID_SHIFT 0
+#define I40E_VP_MDET_RX_VALID_MASK I40E_MASK(0x1, I40E_VP_MDET_RX_VALID_SHIFT)
+#define I40E_VP_MDET_TX(_VF) (0x000E6000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */
+#define I40E_VP_MDET_TX_VALID_SHIFT 0
+#define I40E_VP_MDET_TX_VALID_MASK I40E_MASK(0x1, I40E_VP_MDET_TX_VALID_SHIFT)
+#define I40E_PFPM_APM 0x000B8080 /* Reset: POR */
+#define I40E_PFPM_APM_APME_SHIFT 0
+#define I40E_PFPM_APM_APME_MASK I40E_MASK(0x1, I40E_PFPM_APM_APME_SHIFT)
+#define I40E_PFPM_WUFC 0x0006B400 /* Reset: POR */
+#define I40E_PFPM_WUFC_MAG_SHIFT 1
+#define I40E_PFPM_WUFC_MAG_MASK I40E_MASK(0x1, I40E_PFPM_WUFC_MAG_SHIFT)
+#define I40E_VF_ARQBAH1 0x00006000 /* Reset: EMPR */
+#define I40E_VF_ARQBAL1 0x00006C00 /* Reset: EMPR */
+#define I40E_VF_ARQH1 0x00007400 /* Reset: EMPR */
+#define I40E_VF_ARQLEN1 0x00008000 /* Reset: EMPR */
+#define I40E_VF_ARQT1 0x00007000 /* Reset: EMPR */
+#define I40E_VF_ATQBAH1 0x00007800 /* Reset: EMPR */
+#define I40E_VF_ATQBAL1 0x00007C00 /* Reset: EMPR */
+#define I40E_VF_ATQH1 0x00006400 /* Reset: EMPR */
+#define I40E_VF_ATQLEN1 0x00006800 /* Reset: EMPR */
+#define I40E_VF_ATQT1 0x00008400 /* Reset: EMPR */
+#define I40E_VFQF_HLUT_MAX_INDEX 15
+
+
+
+
+#define I40E_PFINT_DYN_CTL0_WB_ON_ITR_SHIFT 30
+#define I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTL0_WB_ON_ITR_SHIFT)
+#define I40E_PFINT_DYN_CTLN_WB_ON_ITR_SHIFT 30
+#define I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_WB_ON_ITR_SHIFT)
+#define I40E_GLNVM_FLA 0x000B6108 /* Reset: POR */
+#define I40E_GLNVM_FLA_LOCKED_SHIFT 6
+#define I40E_GLNVM_FLA_LOCKED_MASK I40E_MASK(0x1, I40E_GLNVM_FLA_LOCKED_SHIFT)
+
+#define I40E_GLNVM_ULD 0x000B6008 /* Reset: POR */
+
+
+
+#define I40E_GLQF_HASH_INSET(_i, _j) (0x00267600 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */ /* Reset: CORER */
+#define I40E_GLQF_ORT(_i) (0x00268900 + ((_i) * 4)) /* _i=0...63 */ /* Reset: CORER */
+#define I40E_GLQF_ORT_PIT_INDX_SHIFT 0
+#define I40E_GLQF_ORT_PIT_INDX_MASK I40E_MASK(0x1F, I40E_GLQF_ORT_PIT_INDX_SHIFT)
+#define I40E_GLQF_ORT_FIELD_CNT_SHIFT 5
+#define I40E_GLQF_ORT_FIELD_CNT_MASK I40E_MASK(0x3, I40E_GLQF_ORT_FIELD_CNT_SHIFT)
+#define I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT 7
+#define I40E_GLQF_ORT_FLX_PAYLOAD_MASK I40E_MASK(0x1, I40E_GLQF_ORT_FLX_PAYLOAD_SHIFT)
+#define I40E_GLQF_FDEVICTENA(_i) (0x00270384 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+/* Redefined for X722 family */
+#define I40E_GLGEN_STAT_CLEAR 0x00390004 /* Reset: CORER */
+#endif /* _I40E_REGISTER_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h
new file mode 100644
index 0000000000..33b4e30f5e
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+/* Modeled on trace-events-sample.h */
+
+/* The trace subsystem name for i40e will be "i40e".
+ *
+ * This file is named i40e_trace.h.
+ *
+ * Since this include file's name is different from the trace
+ * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end
+ * of this file.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM i40e
+
+/* See trace-events-sample.h for a detailed description of why this
+ * guard clause is different from most normal include files.
+ */
+#if !defined(_I40E_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _I40E_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+/*
+ * i40e_trace() macro enables shared code to refer to trace points
+ * like:
+ *
+ * trace_i40e{,vf}_example(args...)
+ *
+ * ... as:
+ *
+ * i40e_trace(example, args...)
+ *
+ * ... to resolve to the PF or VF version of the tracepoint without
+ * ifdefs, and to allow tracepoints to be disabled entirely at build
+ * time.
+ *
+ * Trace point should always be referred to in the driver via this
+ * macro.
+ *
+ * Similarly, i40e_trace_enabled(trace_name) wraps references to
+ * trace_i40e{,vf}_<trace_name>_enabled() functions.
+ */
+#define _I40E_TRACE_NAME(trace_name) (trace_ ## i40e ## _ ## trace_name)
+#define I40E_TRACE_NAME(trace_name) _I40E_TRACE_NAME(trace_name)
+
+#define i40e_trace(trace_name, args...) I40E_TRACE_NAME(trace_name)(args)
+
+#define i40e_trace_enabled(trace_name) I40E_TRACE_NAME(trace_name##_enabled)()
+
+/* Events common to PF and VF. Corresponding versions will be defined
+ * for both, named trace_i40e_* and trace_i40evf_*. The i40e_trace()
+ * macro above will select the right trace point name for the driver
+ * being built from shared code.
+ */
+
+#define NO_DEV "(i40e no_device)"
+
+TRACE_EVENT(i40e_napi_poll,
+
+	TP_PROTO(struct napi_struct *napi, struct i40e_q_vector *q, int budget,
+		 int budget_per_ring, unsigned int rx_cleaned, unsigned int tx_cleaned,
+		 bool rx_clean_complete, bool tx_clean_complete),
+
+	TP_ARGS(napi, q, budget, budget_per_ring, rx_cleaned, tx_cleaned,
+		rx_clean_complete, tx_clean_complete),
+
+	TP_STRUCT__entry(
+		__field(int, budget)
+		__field(int, budget_per_ring)
+		__field(unsigned int, rx_cleaned)
+		__field(unsigned int, tx_cleaned)
+		__field(int, rx_clean_complete)
+		__field(int, tx_clean_complete)
+		__field(int, irq_num)
+		__field(int, curr_cpu)
+		__string(qname, q->name)
+		__string(dev_name, napi->dev ? napi->dev->name : NO_DEV)
+		__bitmask(irq_affinity,	nr_cpumask_bits)
+	),
+
+	TP_fast_assign(
+		__entry->budget = budget;
+		__entry->budget_per_ring = budget_per_ring;
+		__entry->rx_cleaned = rx_cleaned;
+		__entry->tx_cleaned = tx_cleaned;
+		__entry->rx_clean_complete = rx_clean_complete;
+		__entry->tx_clean_complete = tx_clean_complete;
+		__entry->irq_num = q->irq_num;
+		__entry->curr_cpu = get_cpu();
+		__assign_str(qname, q->name);
+		__assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV);
+		__assign_bitmask(irq_affinity, cpumask_bits(&q->affinity_mask),
+				 nr_cpumask_bits);
+	),
+
+	TP_printk("i40e_napi_poll on dev %s q %s irq %d irq_mask %s curr_cpu %d "
+		  "budget %d bpr %d rx_cleaned %u tx_cleaned %u "
+		  "rx_clean_complete %d tx_clean_complete %d",
+		__get_str(dev_name), __get_str(qname), __entry->irq_num,
+		__get_bitmask(irq_affinity), __entry->curr_cpu, __entry->budget,
+		__entry->budget_per_ring, __entry->rx_cleaned, __entry->tx_cleaned,
+		__entry->rx_clean_complete, __entry->tx_clean_complete)
+);
+
+/* Events related to a vsi & ring */
+DECLARE_EVENT_CLASS(
+	i40e_tx_template,
+
+	TP_PROTO(struct i40e_ring *ring,
+		 struct i40e_tx_desc *desc,
+		 struct i40e_tx_buffer *buf),
+
+	TP_ARGS(ring, desc, buf),
+
+	/* The convention here is to make the first fields in the
+	 * TP_STRUCT match the TP_PROTO exactly. This enables the use
+	 * of the args struct generated by the tplist tool (from the
+	 * bcc-tools package) to be used for those fields. To access
+	 * fields other than the tracepoint args will require the
+	 * tplist output to be adjusted.
+	 */
+	TP_STRUCT__entry(
+		__field(void*, ring)
+		__field(void*, desc)
+		__field(void*, buf)
+		__string(devname, ring->netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->ring = ring;
+		__entry->desc = desc;
+		__entry->buf = buf;
+		__assign_str(devname, ring->netdev->name);
+	),
+
+	TP_printk(
+		"netdev: %s ring: %p desc: %p buf %p",
+		__get_str(devname), __entry->ring,
+		__entry->desc, __entry->buf)
+);
+
+DEFINE_EVENT(
+	i40e_tx_template, i40e_clean_tx_irq,
+	TP_PROTO(struct i40e_ring *ring,
+		 struct i40e_tx_desc *desc,
+		 struct i40e_tx_buffer *buf),
+
+	TP_ARGS(ring, desc, buf));
+
+DEFINE_EVENT(
+	i40e_tx_template, i40e_clean_tx_irq_unmap,
+	TP_PROTO(struct i40e_ring *ring,
+		 struct i40e_tx_desc *desc,
+		 struct i40e_tx_buffer *buf),
+
+	TP_ARGS(ring, desc, buf));
+
+DECLARE_EVENT_CLASS(
+	i40e_rx_template,
+
+	TP_PROTO(struct i40e_ring *ring,
+		 union i40e_16byte_rx_desc *desc,
+		 struct xdp_buff *xdp),
+
+	TP_ARGS(ring, desc, xdp),
+
+	TP_STRUCT__entry(
+		__field(void*, ring)
+		__field(void*, desc)
+		__field(void*, xdp)
+		__string(devname, ring->netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->ring = ring;
+		__entry->desc = desc;
+		__entry->xdp = xdp;
+		__assign_str(devname, ring->netdev->name);
+	),
+
+	TP_printk(
+		"netdev: %s ring: %p desc: %p xdp %p",
+		__get_str(devname), __entry->ring,
+		__entry->desc, __entry->xdp)
+);
+
+DEFINE_EVENT(
+	i40e_rx_template, i40e_clean_rx_irq,
+	TP_PROTO(struct i40e_ring *ring,
+		 union i40e_16byte_rx_desc *desc,
+		 struct xdp_buff *xdp),
+
+	TP_ARGS(ring, desc, xdp));
+
+DEFINE_EVENT(
+	i40e_rx_template, i40e_clean_rx_irq_rx,
+	TP_PROTO(struct i40e_ring *ring,
+		 union i40e_16byte_rx_desc *desc,
+		 struct xdp_buff *xdp),
+
+	TP_ARGS(ring, desc, xdp));
+
+DECLARE_EVENT_CLASS(
+	i40e_xmit_template,
+
+	TP_PROTO(struct sk_buff *skb,
+		 struct i40e_ring *ring),
+
+	TP_ARGS(skb, ring),
+
+	TP_STRUCT__entry(
+		__field(void*, skb)
+		__field(void*, ring)
+		__string(devname, ring->netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->skb = skb;
+		__entry->ring = ring;
+		__assign_str(devname, ring->netdev->name);
+	),
+
+	TP_printk(
+		"netdev: %s skb: %p ring: %p",
+		__get_str(devname), __entry->skb,
+		__entry->ring)
+);
+
+DEFINE_EVENT(
+	i40e_xmit_template, i40e_xmit_frame_ring,
+	TP_PROTO(struct sk_buff *skb,
+		 struct i40e_ring *ring),
+
+	TP_ARGS(skb, ring));
+
+DEFINE_EVENT(
+	i40e_xmit_template, i40e_xmit_frame_ring_drop,
+	TP_PROTO(struct sk_buff *skb,
+		 struct i40e_ring *ring),
+
+	TP_ARGS(skb, ring));
+
+/* Events unique to the PF. */
+
+#endif /* _I40E_TRACE_H_ */
+/* This must be outside ifdef _I40E_TRACE_H */
+
+/* This trace include file is not located in the .../include/trace
+ * with the kernel tracepoint definitions, because we're a loadable
+ * module.
+ */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE i40e_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
new file mode 100644
index 0000000000..1df2f93388
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -0,0 +1,4058 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include <linux/prefetch.h>
+#include <linux/bpf_trace.h>
+#include <net/mpls.h>
+#include <net/xdp.h>
+#include "i40e.h"
+#include "i40e_trace.h"
+#include "i40e_prototype.h"
+#include "i40e_txrx_common.h"
+#include "i40e_xsk.h"
+
+#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+/**
+ * i40e_fdir - Generate a Flow Director descriptor based on fdata
+ * @tx_ring: Tx ring to send buffer on
+ * @fdata: Flow director filter data
+ * @add: Indicate if we are adding a rule or deleting one
+ *
+ **/
+static void i40e_fdir(struct i40e_ring *tx_ring,
+		      struct i40e_fdir_filter *fdata, bool add)
+{
+	struct i40e_filter_program_desc *fdir_desc;
+	struct i40e_pf *pf = tx_ring->vsi->back;
+	u32 flex_ptype, dtype_cmd;
+	u16 i;
+
+	/* grab the next descriptor */
+	i = tx_ring->next_to_use;
+	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK &
+		     (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT);
+
+	flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK &
+		      (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
+
+	flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
+		      (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
+
+	/* Use LAN VSI Id if not programmed by user */
+	flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK &
+		      ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) <<
+		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
+
+	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
+
+	dtype_cmd |= add ?
+		     I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
+		     I40E_TXD_FLTR_QW1_PCMD_SHIFT :
+		     I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
+		     I40E_TXD_FLTR_QW1_PCMD_SHIFT;
+
+	dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK &
+		     (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT);
+
+	dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK &
+		     (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT);
+
+	if (fdata->cnt_index) {
+		dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
+		dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK &
+			     ((u32)fdata->cnt_index <<
+			      I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT);
+	}
+
+	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
+	fdir_desc->rsvd = cpu_to_le32(0);
+	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
+	fdir_desc->fd_id = cpu_to_le32(fdata->fd_id);
+}
+
+#define I40E_FD_CLEAN_DELAY 10
+/**
+ * i40e_program_fdir_filter - Program a Flow Director filter
+ * @fdir_data: Packet data that will be filter parameters
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @pf: The PF pointer
+ * @add: True for add/update, False for remove
+ **/
+static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
+				    u8 *raw_packet, struct i40e_pf *pf,
+				    bool add)
+{
+	struct i40e_tx_buffer *tx_buf, *first;
+	struct i40e_tx_desc *tx_desc;
+	struct i40e_ring *tx_ring;
+	struct i40e_vsi *vsi;
+	struct device *dev;
+	dma_addr_t dma;
+	u32 td_cmd = 0;
+	u16 i;
+
+	/* find existing FDIR VSI */
+	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+	if (!vsi)
+		return -ENOENT;
+
+	tx_ring = vsi->tx_rings[0];
+	dev = tx_ring->dev;
+
+	/* we need two descriptors to add/del a filter and we can wait */
+	for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) {
+		if (!i)
+			return -EAGAIN;
+		msleep_interruptible(1);
+	}
+
+	dma = dma_map_single(dev, raw_packet,
+			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma))
+		goto dma_fail;
+
+	/* grab the next descriptor */
+	i = tx_ring->next_to_use;
+	first = &tx_ring->tx_bi[i];
+	i40e_fdir(tx_ring, fdir_data, add);
+
+	/* Now program a dummy descriptor */
+	i = tx_ring->next_to_use;
+	tx_desc = I40E_TX_DESC(tx_ring, i);
+	tx_buf = &tx_ring->tx_bi[i];
+
+	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
+
+	memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
+
+	/* record length, and DMA address */
+	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
+	dma_unmap_addr_set(tx_buf, dma, dma);
+
+	tx_desc->buffer_addr = cpu_to_le64(dma);
+	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
+
+	tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
+	tx_buf->raw_buf = (void *)raw_packet;
+
+	tx_desc->cmd_type_offset_bsz =
+		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+
+	/* Mark the data descriptor to be watched */
+	first->next_to_watch = tx_desc;
+
+	writel(tx_ring->next_to_use, tx_ring->tail);
+	return 0;
+
+dma_fail:
+	return -1;
+}
+
+/**
+ * i40e_create_dummy_packet - Constructs dummy packet for HW
+ * @dummy_packet: preallocated space for dummy packet
+ * @ipv4: is layer 3 packet of version 4 or 6
+ * @l4proto: next level protocol used in data portion of l3
+ * @data: filter data
+ *
+ * Returns address of layer 4 protocol dummy packet.
+ **/
+static char *i40e_create_dummy_packet(u8 *dummy_packet, bool ipv4, u8 l4proto,
+				      struct i40e_fdir_filter *data)
+{
+	bool is_vlan = !!data->vlan_tag;
+	struct vlan_hdr vlan = {};
+	struct ipv6hdr ipv6 = {};
+	struct ethhdr eth = {};
+	struct iphdr ip = {};
+	u8 *tmp;
+
+	if (ipv4) {
+		eth.h_proto = cpu_to_be16(ETH_P_IP);
+		ip.protocol = l4proto;
+		ip.version = 0x4;
+		ip.ihl = 0x5;
+
+		ip.daddr = data->dst_ip;
+		ip.saddr = data->src_ip;
+	} else {
+		eth.h_proto = cpu_to_be16(ETH_P_IPV6);
+		ipv6.nexthdr = l4proto;
+		ipv6.version = 0x6;
+
+		memcpy(&ipv6.saddr.in6_u.u6_addr32, data->src_ip6,
+		       sizeof(__be32) * 4);
+		memcpy(&ipv6.daddr.in6_u.u6_addr32, data->dst_ip6,
+		       sizeof(__be32) * 4);
+	}
+
+	if (is_vlan) {
+		vlan.h_vlan_TCI = data->vlan_tag;
+		vlan.h_vlan_encapsulated_proto = eth.h_proto;
+		eth.h_proto = data->vlan_etype;
+	}
+
+	tmp = dummy_packet;
+	memcpy(tmp, &eth, sizeof(eth));
+	tmp += sizeof(eth);
+
+	if (is_vlan) {
+		memcpy(tmp, &vlan, sizeof(vlan));
+		tmp += sizeof(vlan);
+	}
+
+	if (ipv4) {
+		memcpy(tmp, &ip, sizeof(ip));
+		tmp += sizeof(ip);
+	} else {
+		memcpy(tmp, &ipv6, sizeof(ipv6));
+		tmp += sizeof(ipv6);
+	}
+
+	return tmp;
+}
+
+/**
+ * i40e_create_dummy_udp_packet - helper function to create UDP packet
+ * @raw_packet: preallocated space for dummy packet
+ * @ipv4: is layer 3 packet of version 4 or 6
+ * @l4proto: next level protocol used in data portion of l3
+ * @data: filter data
+ *
+ * Helper function to populate udp fields.
+ **/
+static void i40e_create_dummy_udp_packet(u8 *raw_packet, bool ipv4, u8 l4proto,
+					 struct i40e_fdir_filter *data)
+{
+	struct udphdr *udp;
+	u8 *tmp;
+
+	tmp = i40e_create_dummy_packet(raw_packet, ipv4, IPPROTO_UDP, data);
+	udp = (struct udphdr *)(tmp);
+	udp->dest = data->dst_port;
+	udp->source = data->src_port;
+}
+
+/**
+ * i40e_create_dummy_tcp_packet - helper function to create TCP packet
+ * @raw_packet: preallocated space for dummy packet
+ * @ipv4: is layer 3 packet of version 4 or 6
+ * @l4proto: next level protocol used in data portion of l3
+ * @data: filter data
+ *
+ * Helper function to populate tcp fields.
+ **/
+static void i40e_create_dummy_tcp_packet(u8 *raw_packet, bool ipv4, u8 l4proto,
+					 struct i40e_fdir_filter *data)
+{
+	struct tcphdr *tcp;
+	u8 *tmp;
+	/* Dummy tcp packet */
+	static const char tcp_packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0x50, 0x11, 0x0, 0x72, 0, 0, 0, 0};
+
+	tmp = i40e_create_dummy_packet(raw_packet, ipv4, IPPROTO_TCP, data);
+
+	tcp = (struct tcphdr *)tmp;
+	memcpy(tcp, tcp_packet, sizeof(tcp_packet));
+	tcp->dest = data->dst_port;
+	tcp->source = data->src_port;
+}
+
+/**
+ * i40e_create_dummy_sctp_packet - helper function to create SCTP packet
+ * @raw_packet: preallocated space for dummy packet
+ * @ipv4: is layer 3 packet of version 4 or 6
+ * @l4proto: next level protocol used in data portion of l3
+ * @data: filter data
+ *
+ * Helper function to populate sctp fields.
+ **/
+static void i40e_create_dummy_sctp_packet(u8 *raw_packet, bool ipv4,
+					  u8 l4proto,
+					  struct i40e_fdir_filter *data)
+{
+	struct sctphdr *sctp;
+	u8 *tmp;
+
+	tmp = i40e_create_dummy_packet(raw_packet, ipv4, IPPROTO_SCTP, data);
+
+	sctp = (struct sctphdr *)tmp;
+	sctp->dest = data->dst_port;
+	sctp->source = data->src_port;
+}
+
+/**
+ * i40e_prepare_fdir_filter - Prepare and program fdir filter
+ * @pf: physical function to attach filter to
+ * @fd_data: filter data
+ * @add: add or delete filter
+ * @packet_addr: address of dummy packet, used in filtering
+ * @payload_offset: offset from dummy packet address to user defined data
+ * @pctype: Packet type for which filter is used
+ *
+ * Helper function to offset data of dummy packet, program it and
+ * handle errors.
+ **/
+static int i40e_prepare_fdir_filter(struct i40e_pf *pf,
+				    struct i40e_fdir_filter *fd_data,
+				    bool add, char *packet_addr,
+				    int payload_offset, u8 pctype)
+{
+	int ret;
+
+	if (fd_data->flex_filter) {
+		u8 *payload;
+		__be16 pattern = fd_data->flex_word;
+		u16 off = fd_data->flex_offset;
+
+		payload = packet_addr + payload_offset;
+
+		/* If user provided vlan, offset payload by vlan header length */
+		if (!!fd_data->vlan_tag)
+			payload += VLAN_HLEN;
+
+		*((__force __be16 *)(payload + off)) = pattern;
+	}
+
+	fd_data->pctype = pctype;
+	ret = i40e_program_fdir_filter(fd_data, packet_addr, pf, add);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
+			 fd_data->pctype, fd_data->fd_id, ret);
+		/* Free the packet buffer since it wasn't added to the ring */
+		return -EOPNOTSUPP;
+	} else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
+		if (add)
+			dev_info(&pf->pdev->dev,
+				 "Filter OK for PCTYPE %d loc = %d\n",
+				 fd_data->pctype, fd_data->fd_id);
+		else
+			dev_info(&pf->pdev->dev,
+				 "Filter deleted for PCTYPE %d loc = %d\n",
+				 fd_data->pctype, fd_data->fd_id);
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_change_filter_num - Prepare and program fdir filter
+ * @ipv4: is layer 3 packet of version 4 or 6
+ * @add: add or delete filter
+ * @ipv4_filter_num: field to update
+ * @ipv6_filter_num: field to update
+ *
+ * Update filter number field for pf.
+ **/
+static void i40e_change_filter_num(bool ipv4, bool add, u16 *ipv4_filter_num,
+				   u16 *ipv6_filter_num)
+{
+	if (add) {
+		if (ipv4)
+			(*ipv4_filter_num)++;
+		else
+			(*ipv6_filter_num)++;
+	} else {
+		if (ipv4)
+			(*ipv4_filter_num)--;
+		else
+			(*ipv6_filter_num)--;
+	}
+}
+
+#define I40E_UDPIP_DUMMY_PACKET_LEN	42
+#define I40E_UDPIP6_DUMMY_PACKET_LEN	62
+/**
+ * i40e_add_del_fdir_udp - Add/Remove UDP filters
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @add: true adds a filter, false removes it
+ * @ipv4: true is v4, false is v6
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_udp(struct i40e_vsi *vsi,
+				 struct i40e_fdir_filter *fd_data,
+				 bool add,
+				 bool ipv4)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 *raw_packet;
+	int ret;
+
+	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+	if (!raw_packet)
+		return -ENOMEM;
+
+	i40e_create_dummy_udp_packet(raw_packet, ipv4, IPPROTO_UDP, fd_data);
+
+	if (ipv4)
+		ret = i40e_prepare_fdir_filter
+			(pf, fd_data, add, raw_packet,
+			 I40E_UDPIP_DUMMY_PACKET_LEN,
+			 I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
+	else
+		ret = i40e_prepare_fdir_filter
+			(pf, fd_data, add, raw_packet,
+			 I40E_UDPIP6_DUMMY_PACKET_LEN,
+			 I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
+
+	if (ret) {
+		kfree(raw_packet);
+		return ret;
+	}
+
+	i40e_change_filter_num(ipv4, add, &pf->fd_udp4_filter_cnt,
+			       &pf->fd_udp6_filter_cnt);
+
+	return 0;
+}
+
+#define I40E_TCPIP_DUMMY_PACKET_LEN	54
+#define I40E_TCPIP6_DUMMY_PACKET_LEN	74
+/**
+ * i40e_add_del_fdir_tcp - Add/Remove TCPv4 filters
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @add: true adds a filter, false removes it
+ * @ipv4: true is v4, false is v6
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_tcp(struct i40e_vsi *vsi,
+				 struct i40e_fdir_filter *fd_data,
+				 bool add,
+				 bool ipv4)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 *raw_packet;
+	int ret;
+
+	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+	if (!raw_packet)
+		return -ENOMEM;
+
+	i40e_create_dummy_tcp_packet(raw_packet, ipv4, IPPROTO_TCP, fd_data);
+	if (ipv4)
+		ret = i40e_prepare_fdir_filter
+			(pf, fd_data, add, raw_packet,
+			 I40E_TCPIP_DUMMY_PACKET_LEN,
+			 I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
+	else
+		ret = i40e_prepare_fdir_filter
+			(pf, fd_data, add, raw_packet,
+			 I40E_TCPIP6_DUMMY_PACKET_LEN,
+			 I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
+
+	if (ret) {
+		kfree(raw_packet);
+		return ret;
+	}
+
+	i40e_change_filter_num(ipv4, add, &pf->fd_tcp4_filter_cnt,
+			       &pf->fd_tcp6_filter_cnt);
+
+	if (add) {
+		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		    I40E_DEBUG_FD & pf->hw.debug_mask)
+			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
+		set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
+	}
+	return 0;
+}
+
+#define I40E_SCTPIP_DUMMY_PACKET_LEN	46
+#define I40E_SCTPIP6_DUMMY_PACKET_LEN	66
+/**
+ * i40e_add_del_fdir_sctp - Add/Remove SCTPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @add: true adds a filter, false removes it
+ * @ipv4: true is v4, false is v6
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_sctp(struct i40e_vsi *vsi,
+				  struct i40e_fdir_filter *fd_data,
+				  bool add,
+				  bool ipv4)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 *raw_packet;
+	int ret;
+
+	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+	if (!raw_packet)
+		return -ENOMEM;
+
+	i40e_create_dummy_sctp_packet(raw_packet, ipv4, IPPROTO_SCTP, fd_data);
+
+	if (ipv4)
+		ret = i40e_prepare_fdir_filter
+			(pf, fd_data, add, raw_packet,
+			 I40E_SCTPIP_DUMMY_PACKET_LEN,
+			 I40E_FILTER_PCTYPE_NONF_IPV4_SCTP);
+	else
+		ret = i40e_prepare_fdir_filter
+			(pf, fd_data, add, raw_packet,
+			 I40E_SCTPIP6_DUMMY_PACKET_LEN,
+			 I40E_FILTER_PCTYPE_NONF_IPV6_SCTP);
+
+	if (ret) {
+		kfree(raw_packet);
+		return ret;
+	}
+
+	i40e_change_filter_num(ipv4, add, &pf->fd_sctp4_filter_cnt,
+			       &pf->fd_sctp6_filter_cnt);
+
+	return 0;
+}
+
+#define I40E_IP_DUMMY_PACKET_LEN	34
+#define I40E_IP6_DUMMY_PACKET_LEN	54
+/**
+ * i40e_add_del_fdir_ip - Add/Remove IPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @add: true adds a filter, false removes it
+ * @ipv4: true is v4, false is v6
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_ip(struct i40e_vsi *vsi,
+				struct i40e_fdir_filter *fd_data,
+				bool add,
+				bool ipv4)
+{
+	struct i40e_pf *pf = vsi->back;
+	int payload_offset;
+	u8 *raw_packet;
+	int iter_start;
+	int iter_end;
+	int ret;
+	int i;
+
+	if (ipv4) {
+		iter_start = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+		iter_end = I40E_FILTER_PCTYPE_FRAG_IPV4;
+	} else {
+		iter_start = I40E_FILTER_PCTYPE_NONF_IPV6_OTHER;
+		iter_end = I40E_FILTER_PCTYPE_FRAG_IPV6;
+	}
+
+	for (i = iter_start; i <= iter_end; i++) {
+		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+		if (!raw_packet)
+			return -ENOMEM;
+
+		/* IPv6 no header option differs from IPv4 */
+		(void)i40e_create_dummy_packet
+			(raw_packet, ipv4, (ipv4) ? IPPROTO_IP : IPPROTO_NONE,
+			 fd_data);
+
+		payload_offset = (ipv4) ? I40E_IP_DUMMY_PACKET_LEN :
+			I40E_IP6_DUMMY_PACKET_LEN;
+		ret = i40e_prepare_fdir_filter(pf, fd_data, add, raw_packet,
+					       payload_offset, i);
+		if (ret)
+			goto err;
+	}
+
+	i40e_change_filter_num(ipv4, add, &pf->fd_ip4_filter_cnt,
+			       &pf->fd_ip6_filter_cnt);
+
+	return 0;
+err:
+	kfree(raw_packet);
+	return ret;
+}
+
+/**
+ * i40e_add_del_fdir - Build raw packets to add/del fdir filter
+ * @vsi: pointer to the targeted VSI
+ * @input: filter to add or delete
+ * @add: true adds a filter, false removes it
+ *
+ **/
+int i40e_add_del_fdir(struct i40e_vsi *vsi,
+		      struct i40e_fdir_filter *input, bool add)
+{
+	enum ip_ver { ipv6 = 0, ipv4 = 1 };
+	struct i40e_pf *pf = vsi->back;
+	int ret;
+
+	switch (input->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv4);
+		break;
+	case UDP_V4_FLOW:
+		ret = i40e_add_del_fdir_udp(vsi, input, add, ipv4);
+		break;
+	case SCTP_V4_FLOW:
+		ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv4);
+		break;
+	case TCP_V6_FLOW:
+		ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv6);
+		break;
+	case UDP_V6_FLOW:
+		ret = i40e_add_del_fdir_udp(vsi, input, add, ipv6);
+		break;
+	case SCTP_V6_FLOW:
+		ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv6);
+		break;
+	case IP_USER_FLOW:
+		switch (input->ipl4_proto) {
+		case IPPROTO_TCP:
+			ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv4);
+			break;
+		case IPPROTO_UDP:
+			ret = i40e_add_del_fdir_udp(vsi, input, add, ipv4);
+			break;
+		case IPPROTO_SCTP:
+			ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv4);
+			break;
+		case IPPROTO_IP:
+			ret = i40e_add_del_fdir_ip(vsi, input, add, ipv4);
+			break;
+		default:
+			/* We cannot support masking based on protocol */
+			dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n",
+				 input->ipl4_proto);
+			return -EINVAL;
+		}
+		break;
+	case IPV6_USER_FLOW:
+		switch (input->ipl4_proto) {
+		case IPPROTO_TCP:
+			ret = i40e_add_del_fdir_tcp(vsi, input, add, ipv6);
+			break;
+		case IPPROTO_UDP:
+			ret = i40e_add_del_fdir_udp(vsi, input, add, ipv6);
+			break;
+		case IPPROTO_SCTP:
+			ret = i40e_add_del_fdir_sctp(vsi, input, add, ipv6);
+			break;
+		case IPPROTO_IP:
+			ret = i40e_add_del_fdir_ip(vsi, input, add, ipv6);
+			break;
+		default:
+			/* We cannot support masking based on protocol */
+			dev_info(&pf->pdev->dev, "Unsupported IPv6 protocol 0x%02x\n",
+				 input->ipl4_proto);
+			return -EINVAL;
+		}
+		break;
+	default:
+		dev_info(&pf->pdev->dev, "Unsupported flow type 0x%02x\n",
+			 input->flow_type);
+		return -EINVAL;
+	}
+
+	/* The buffer allocated here will be normally be freed by
+	 * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit
+	 * completion. In the event of an error adding the buffer to the FDIR
+	 * ring, it will immediately be freed. It may also be freed by
+	 * i40e_clean_tx_ring() when closing the VSI.
+	 */
+	return ret;
+}
+
+/**
+ * i40e_fd_handle_status - check the Programming Status for FD
+ * @rx_ring: the Rx ring for this descriptor
+ * @qword0_raw: qword0
+ * @qword1: qword1 after le_to_cpu
+ * @prog_id: the id originally used for programming
+ *
+ * This is used to verify if the FD programming or invalidation
+ * requested by SW to the HW is successful or not and take actions accordingly.
+ **/
+static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+				  u64 qword1, u8 prog_id)
+{
+	struct i40e_pf *pf = rx_ring->vsi->back;
+	struct pci_dev *pdev = pf->pdev;
+	struct i40e_16b_rx_wb_qw0 *qw0;
+	u32 fcnt_prog, fcnt_avail;
+	u32 error;
+
+	qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw;
+	error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
+		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
+
+	if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
+		pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id);
+		if (qw0->hi_dword.fd_id != 0 ||
+		    (I40E_DEBUG_FD & pf->hw.debug_mask))
+			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
+				 pf->fd_inv);
+
+		/* Check if the programming error is for ATR.
+		 * If so, auto disable ATR and set a state for
+		 * flush in progress. Next time we come here if flush is in
+		 * progress do nothing, once flush is complete the state will
+		 * be cleared.
+		 */
+		if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
+			return;
+
+		pf->fd_add_err++;
+		/* store the current atr filter count */
+		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
+
+		if (qw0->hi_dword.fd_id == 0 &&
+		    test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
+			/* These set_bit() calls aren't atomic with the
+			 * test_bit() here, but worse case we potentially
+			 * disable ATR and queue a flush right after SB
+			 * support is re-enabled. That shouldn't cause an
+			 * issue in practice
+			 */
+			set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
+			set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
+		}
+
+		/* filter programming failed most likely due to table full */
+		fcnt_prog = i40e_get_global_fd_count(pf);
+		fcnt_avail = pf->fdir_pf_filter_count;
+		/* If ATR is running fcnt_prog can quickly change,
+		 * if we are very close to full, it makes sense to disable
+		 * FD ATR/SB and then re-enable it when there is room.
+		 */
+		if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
+			if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+			    !test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED,
+					      pf->state))
+				if (I40E_DEBUG_FD & pf->hw.debug_mask)
+					dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
+		}
+	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
+		if (I40E_DEBUG_FD & pf->hw.debug_mask)
+			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
+				 qw0->hi_dword.fd_id);
+	}
+}
+
+/**
+ * i40e_unmap_and_free_tx_resource - Release a Tx buffer
+ * @ring:      the ring that owns the buffer
+ * @tx_buffer: the buffer to free
+ **/
+static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
+					    struct i40e_tx_buffer *tx_buffer)
+{
+	if (tx_buffer->skb) {
+		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+			kfree(tx_buffer->raw_buf);
+		else if (ring_is_xdp(ring))
+			xdp_return_frame(tx_buffer->xdpf);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_single(ring->dev,
+					 dma_unmap_addr(tx_buffer, dma),
+					 dma_unmap_len(tx_buffer, len),
+					 DMA_TO_DEVICE);
+	} else if (dma_unmap_len(tx_buffer, len)) {
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buffer, dma),
+			       dma_unmap_len(tx_buffer, len),
+			       DMA_TO_DEVICE);
+	}
+
+	tx_buffer->next_to_watch = NULL;
+	tx_buffer->skb = NULL;
+	dma_unmap_len_set(tx_buffer, len, 0);
+	/* tx_buffer must be completely set up in the transmit path */
+}
+
+/**
+ * i40e_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ **/
+void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
+{
+	unsigned long bi_size;
+	u16 i;
+
+	if (ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
+		i40e_xsk_clean_tx_ring(tx_ring);
+	} else {
+		/* ring already cleared, nothing to do */
+		if (!tx_ring->tx_bi)
+			return;
+
+		/* Free all the Tx ring sk_buffs */
+		for (i = 0; i < tx_ring->count; i++)
+			i40e_unmap_and_free_tx_resource(tx_ring,
+							&tx_ring->tx_bi[i]);
+	}
+
+	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
+	memset(tx_ring->tx_bi, 0, bi_size);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	if (!tx_ring->netdev)
+		return;
+
+	/* cleanup Tx queue statistics */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+}
+
+/**
+ * i40e_free_tx_resources - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void i40e_free_tx_resources(struct i40e_ring *tx_ring)
+{
+	i40e_clean_tx_ring(tx_ring);
+	kfree(tx_ring->tx_bi);
+	tx_ring->tx_bi = NULL;
+
+	if (tx_ring->desc) {
+		dma_free_coherent(tx_ring->dev, tx_ring->size,
+				  tx_ring->desc, tx_ring->dma);
+		tx_ring->desc = NULL;
+	}
+}
+
+/**
+ * i40e_get_tx_pending - how many tx descriptors not processed
+ * @ring: the ring of descriptors
+ * @in_sw: use SW variables
+ *
+ * Since there is no access to the ring head register
+ * in XL710, we need to use our local copies
+ **/
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
+{
+	u32 head, tail;
+
+	if (!in_sw) {
+		head = i40e_get_head(ring);
+		tail = readl(ring->tail);
+	} else {
+		head = ring->next_to_clean;
+		tail = ring->next_to_use;
+	}
+
+	if (head != tail)
+		return (head < tail) ?
+			tail - head : (tail + ring->count - head);
+
+	return 0;
+}
+
+/**
+ * i40e_detect_recover_hung - Function to detect and recover hung_queues
+ * @vsi:  pointer to vsi struct with tx queues
+ *
+ * VSI has netdev and netdev has TX queues. This function is to check each of
+ * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
+ **/
+void i40e_detect_recover_hung(struct i40e_vsi *vsi)
+{
+	struct i40e_ring *tx_ring = NULL;
+	struct net_device *netdev;
+	unsigned int i;
+	int packets;
+
+	if (!vsi)
+		return;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return;
+
+	netdev = vsi->netdev;
+	if (!netdev)
+		return;
+
+	if (!netif_carrier_ok(netdev))
+		return;
+
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		tx_ring = vsi->tx_rings[i];
+		if (tx_ring && tx_ring->desc) {
+			/* If packet counter has not changed the queue is
+			 * likely stalled, so force an interrupt for this
+			 * queue.
+			 *
+			 * prev_pkt_ctr would be negative if there was no
+			 * pending work.
+			 */
+			packets = tx_ring->stats.packets & INT_MAX;
+			if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
+				i40e_force_wb(vsi, tx_ring->q_vector);
+				continue;
+			}
+
+			/* Memory barrier between read of packet count and call
+			 * to i40e_get_tx_pending()
+			 */
+			smp_rmb();
+			tx_ring->tx_stats.prev_pkt_ctr =
+			    i40e_get_tx_pending(tx_ring, true) ? packets : -1;
+		}
+	}
+}
+
+/**
+ * i40e_clean_tx_irq - Reclaim resources after transmit completes
+ * @vsi: the VSI we care about
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ * @tx_cleaned: Out parameter set to the number of TXes cleaned
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ **/
+static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
+			      struct i40e_ring *tx_ring, int napi_budget,
+			      unsigned int *tx_cleaned)
+{
+	int i = tx_ring->next_to_clean;
+	struct i40e_tx_buffer *tx_buf;
+	struct i40e_tx_desc *tx_head;
+	struct i40e_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = vsi->work_limit;
+
+	tx_buf = &tx_ring->tx_bi[i];
+	tx_desc = I40E_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
+
+	do {
+		struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
+		/* we have caught up to head, no work left to do */
+		if (tx_head == tx_desc)
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_packets += tx_buf->gso_segs;
+
+		/* free the skb/XDP data */
+		if (ring_is_xdp(tx_ring))
+			xdp_return_frame(tx_buf->xdpf);
+		else
+			napi_consume_skb(tx_buf->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		tx_buf->skb = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			i40e_trace(clean_tx_irq_unmap,
+				   tx_ring, tx_desc, tx_buf);
+
+			tx_buf++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buf = tx_ring->tx_bi;
+				tx_desc = I40E_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buf, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buf, dma),
+					       dma_unmap_len(tx_buf, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buf, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_bi;
+			tx_desc = I40E_TX_DESC(tx_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	i40e_update_tx_stats(tx_ring, total_packets, total_bytes);
+	i40e_arm_wb(tx_ring, vsi, budget);
+
+	if (ring_is_xdp(tx_ring))
+		return !!budget;
+
+	/* notify netdev of completed buffers */
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
+	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		   !test_bit(__I40E_VSI_DOWN, vsi->state)) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+			++tx_ring->tx_stats.restart_queue;
+		}
+	}
+
+	*tx_cleaned = total_packets;
+	return !!budget;
+}
+
+/**
+ * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
+ * @vsi: the VSI we care about
+ * @q_vector: the vector on which to enable writeback
+ *
+ **/
+static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
+				  struct i40e_q_vector *q_vector)
+{
+	u16 flags = q_vector->tx.ring[0].flags;
+	u32 val;
+
+	if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
+		return;
+
+	if (q_vector->arm_wb_state)
+		return;
+
+	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+		val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
+		      I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
+
+		wr32(&vsi->back->hw,
+		     I40E_PFINT_DYN_CTLN(q_vector->reg_idx),
+		     val);
+	} else {
+		val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
+		      I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
+
+		wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
+	}
+	q_vector->arm_wb_state = true;
+}
+
+/**
+ * i40e_force_wb - Issue SW Interrupt so HW does a wb
+ * @vsi: the VSI we care about
+ * @q_vector: the vector  on which to force writeback
+ *
+ **/
+void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+{
+	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+		u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+			  I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
+			  I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+			  I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
+			  /* allow 00 to be written to the index */
+
+		wr32(&vsi->back->hw,
+		     I40E_PFINT_DYN_CTLN(q_vector->reg_idx), val);
+	} else {
+		u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
+			  I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
+			  I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
+			  I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
+			/* allow 00 to be written to the index */
+
+		wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
+	}
+}
+
+static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
+					struct i40e_ring_container *rc)
+{
+	return &q_vector->rx == rc;
+}
+
+static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
+{
+	unsigned int divisor;
+
+	switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
+		break;
+	case I40E_LINK_SPEED_25GB:
+	case I40E_LINK_SPEED_20GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
+		break;
+	default:
+	case I40E_LINK_SPEED_10GB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
+		break;
+	case I40E_LINK_SPEED_1GB:
+	case I40E_LINK_SPEED_100MB:
+		divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
+		break;
+	}
+
+	return divisor;
+}
+
+/**
+ * i40e_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @rc: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ **/
+static void i40e_update_itr(struct i40e_q_vector *q_vector,
+			    struct i40e_ring_container *rc)
+{
+	unsigned int avg_wire_size, packets, bytes, itr;
+	unsigned long next_update = jiffies;
+
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+		return;
+
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
+	 */
+	itr = i40e_container_is_rx(q_vector, rc) ?
+	      I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
+	      I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
+	}
+
+	packets = rc->total_packets;
+	bytes = rc->total_bytes;
+
+	if (i40e_container_is_rx(q_vector, rc)) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
+			itr = I40E_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & I40E_ITR_MASK) ==
+		     I40E_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
+	}
+
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
+	 */
+	if (packets < 56) {
+		itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= I40E_ITR_ADAPTIVE_LATENCY;
+			itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
+	}
+
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= I40E_ITR_MASK;
+
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr /= 2;
+		itr &= I40E_ITR_MASK;
+		if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
+			itr = I40E_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
+	}
+
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = I40E_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 60) {
+		/* Start at 250k ints/sec */
+		avg_wire_size = 4096;
+	} else if (avg_wire_size <= 380) {
+		/* 250K ints/sec to 60K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 1696;
+	} else if (avg_wire_size <= 1084) {
+		/* 60K ints/sec to 36K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size <= 1980) {
+		/* 36K ints/sec to 30K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 30K ints/sec */
+		avg_wire_size = 32256;
+	}
+
+	/* If we are in low latency mode halve our delay which doubles the
+	 * rate to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & I40E_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size /= 2;
+
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
+	       I40E_ITR_ADAPTIVE_MIN_INC;
+
+	if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= I40E_ITR_ADAPTIVE_LATENCY;
+		itr += I40E_ITR_ADAPTIVE_MAX_USECS;
+	}
+
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
+	rc->total_bytes = 0;
+	rc->total_packets = 0;
+}
+
+static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+	return &rx_ring->rx_bi[idx];
+}
+
+/**
+ * i40e_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
+			       struct i40e_rx_buffer *old_buff)
+{
+	struct i40e_rx_buffer *new_buff;
+	u16 nta = rx_ring->next_to_alloc;
+
+	new_buff = i40e_rx_bi(rx_ring, nta);
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	new_buff->dma		= old_buff->dma;
+	new_buff->page		= old_buff->page;
+	new_buff->page_offset	= old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+
+	/* clear contents of buffer_info */
+	old_buff->page = NULL;
+}
+
+/**
+ * i40e_clean_programming_status - clean the programming status descriptor
+ * @rx_ring: the rx ring that has this descriptor
+ * @qword0_raw: qword0
+ * @qword1: qword1 representing status_error_len in CPU ordering
+ *
+ * Flow director should handle FD_FILTER_STATUS to check its filter programming
+ * status being successful or not and take actions accordingly. FCoE should
+ * handle its context/filter programming/invalidation status and take actions.
+ *
+ * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
+ **/
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+				   u64 qword1)
+{
+	u8 id;
+
+	id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
+		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
+
+	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
+		i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
+}
+
+/**
+ * i40e_setup_tx_descriptors - Allocate the Tx descriptors
+ * @tx_ring: the tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ **/
+int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int bi_size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(tx_ring->tx_bi);
+	bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
+	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
+	if (!tx_ring->tx_bi)
+		goto err;
+
+	u64_stats_init(&tx_ring->syncp);
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
+	/* add u32 for head writeback, align after this takes care of
+	 * guaranteeing this is at least one cache line in size
+	 */
+	tx_ring->size += sizeof(u32);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc) {
+		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+			 tx_ring->size);
+		goto err;
+	}
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	tx_ring->tx_stats.prev_pkt_ctr = -1;
+	return 0;
+
+err:
+	kfree(tx_ring->tx_bi);
+	tx_ring->tx_bi = NULL;
+	return -ENOMEM;
+}
+
+static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
+{
+	memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
+}
+
+/**
+ * i40e_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ **/
+void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
+{
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!rx_ring->rx_bi)
+		return;
+
+	if (rx_ring->xsk_pool) {
+		i40e_xsk_clean_rx_ring(rx_ring);
+		goto skip_free;
+	}
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
+
+		if (!rx_bi->page)
+			continue;
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      rx_bi->dma,
+					      rx_bi->page_offset,
+					      rx_ring->rx_buf_len,
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
+				     i40e_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     I40E_RX_DMA_ATTR);
+
+		__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
+
+		rx_bi->page = NULL;
+		rx_bi->page_offset = 0;
+	}
+
+skip_free:
+	if (rx_ring->xsk_pool)
+		i40e_clear_rx_bi_zc(rx_ring);
+	else
+		i40e_clear_rx_bi(rx_ring);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_process = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * i40e_free_rx_resources - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void i40e_free_rx_resources(struct i40e_ring *rx_ring)
+{
+	i40e_clean_rx_ring(rx_ring);
+	if (rx_ring->vsi->type == I40E_VSI_MAIN)
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	rx_ring->xdp_prog = NULL;
+	kfree(rx_ring->rx_bi);
+	rx_ring->rx_bi = NULL;
+
+	if (rx_ring->desc) {
+		dma_free_coherent(rx_ring->dev, rx_ring->size,
+				  rx_ring->desc, rx_ring->dma);
+		rx_ring->desc = NULL;
+	}
+}
+
+/**
+ * i40e_setup_rx_descriptors - Allocate Rx descriptors
+ * @rx_ring: Rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+
+	u64_stats_init(&rx_ring->syncp);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union i40e_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+
+	if (!rx_ring->desc) {
+		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+			 rx_ring->size);
+		return -ENOMEM;
+	}
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_process = 0;
+	rx_ring->next_to_use = 0;
+
+	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
+
+	rx_ring->rx_bi =
+		kcalloc(rx_ring->count, sizeof(*rx_ring->rx_bi), GFP_KERNEL);
+	if (!rx_ring->rx_bi)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * i40e_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ **/
+void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
+{
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+	writel(val, rx_ring->tail);
+}
+
+#if (PAGE_SIZE >= 8192)
+static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
+					   unsigned int size)
+{
+	unsigned int truesize;
+
+	truesize = rx_ring->rx_offset ?
+		SKB_DATA_ALIGN(size + rx_ring->rx_offset) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+	return truesize;
+}
+#endif
+
+/**
+ * i40e_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buffer struct to modify
+ *
+ * Returns true if the page was successfully allocated or
+ * reused.
+ **/
+static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
+				   struct i40e_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page)) {
+		rx_ring->rx_stats.page_reuse_count++;
+		return true;
+	}
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	rx_ring->rx_stats.page_alloc_count++;
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 i40e_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 I40E_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, i40e_rx_pg_order(rx_ring));
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = rx_ring->rx_offset;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
+
+	return true;
+}
+
+/**
+ * i40e_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail
+ **/
+bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
+{
+	u16 ntu = rx_ring->next_to_use;
+	union i40e_rx_desc *rx_desc;
+	struct i40e_rx_buffer *bi;
+
+	/* do nothing if no valid netdev defined */
+	if (!rx_ring->netdev || !cleaned_count)
+		return false;
+
+	rx_desc = I40E_RX_DESC(rx_ring, ntu);
+	bi = i40e_rx_bi(rx_ring, ntu);
+
+	do {
+		if (!i40e_alloc_mapped_page(rx_ring, bi))
+			goto no_buffers;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 rx_ring->rx_buf_len,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		ntu++;
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = I40E_RX_DESC(rx_ring, 0);
+			bi = i40e_rx_bi(rx_ring, 0);
+			ntu = 0;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.qword1.status_error_len = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	if (rx_ring->next_to_use != ntu)
+		i40e_release_rx_desc(rx_ring, ntu);
+
+	return false;
+
+no_buffers:
+	if (rx_ring->next_to_use != ntu)
+		i40e_release_rx_desc(rx_ring, ntu);
+
+	/* make sure to come back via polling to try again after
+	 * allocation failure
+	 */
+	return true;
+}
+
+/**
+ * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
+ * @vsi: the VSI we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ **/
+static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
+				    struct sk_buff *skb,
+				    union i40e_rx_desc *rx_desc)
+{
+	struct i40e_rx_ptype_decoded decoded;
+	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
+	u8 ptype;
+	u64 qword;
+
+	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+	ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
+	rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
+		   I40E_RXD_QW1_ERROR_SHIFT;
+	rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
+		    I40E_RXD_QW1_STATUS_SHIFT;
+	decoded = decode_rx_desc_ptype(ptype);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_checksum_none_assert(skb);
+
+	/* Rx csum enabled and ip headers found? */
+	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* did the hardware decode the packet and checksum? */
+	if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+		return;
+
+	/* both known and outer_ip must be set for the below code to work */
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 &&
+	    (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
+			 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
+		goto checksum_fail;
+
+	/* likely incorrect csum if alternate IP extension headers found */
+	if (ipv6 &&
+	    rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+		/* don't increment checksum err here, non-fatal err */
+		return;
+
+	/* there was some L4 error, count error and punt packet to the stack */
+	if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
+		goto checksum_fail;
+
+	/* handle packets that were not able to be checksummed due
+	 * to arrival speed, in this case the stack can compute
+	 * the csum.
+	 */
+	if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
+		return;
+
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
+	 */
+	if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case I40E_RX_PTYPE_INNER_PROT_TCP:
+	case I40E_RX_PTYPE_INNER_PROT_UDP:
+	case I40E_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		fallthrough;
+	default:
+		break;
+	}
+
+	return;
+
+checksum_fail:
+	vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * i40e_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ **/
+static inline int i40e_ptype_to_htype(u8 ptype)
+{
+	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
+
+	if (!decoded.known)
+		return PKT_HASH_TYPE_NONE;
+
+	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+		return PKT_HASH_TYPE_L4;
+	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+		return PKT_HASH_TYPE_L3;
+	else
+		return PKT_HASH_TYPE_L2;
+}
+
+/**
+ * i40e_rx_hash - set the hash value in the skb
+ * @ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: skb currently being received and modified
+ * @rx_ptype: Rx packet type
+ **/
+static inline void i40e_rx_hash(struct i40e_ring *ring,
+				union i40e_rx_desc *rx_desc,
+				struct sk_buff *skb,
+				u8 rx_ptype)
+{
+	u32 hash;
+	const __le64 rss_mask =
+		cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
+			    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
+
+	if (!(ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
+		hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
+		skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
+	}
+}
+
+/**
+ * i40e_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ **/
+void i40e_process_skb_fields(struct i40e_ring *rx_ring,
+			     union i40e_rx_desc *rx_desc, struct sk_buff *skb)
+{
+	u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+	u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
+			I40E_RXD_QW1_STATUS_SHIFT;
+	u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK;
+	u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
+		   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
+	u8 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+		      I40E_RXD_QW1_PTYPE_SHIFT;
+
+	if (unlikely(tsynvalid))
+		i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
+
+	i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
+
+	i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
+
+	skb_record_rx_queue(skb, rx_ring->queue_index);
+
+	if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
+		__le16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1;
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       le16_to_cpu(vlan_tag));
+	}
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+/**
+ * i40e_cleanup_headers - Correct empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being fixed
+ * @rx_desc: pointer to the EOP Rx descriptor
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
+				 union i40e_rx_desc *rx_desc)
+
+{
+	/* ERR_MASK will only have valid bits if EOP set, and
+	 * what we are doing here is actually checking
+	 * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
+	 * the error field
+	 */
+	if (unlikely(i40e_test_staterr(rx_desc,
+				       BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
+		dev_kfree_skb_any(skb);
+		return true;
+	}
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * i40e_can_reuse_rx_page - Determine if page can be reused for another Rx
+ * @rx_buffer: buffer containing the page
+ * @rx_stats: rx stats structure for the rx ring
+ *
+ * If page is reusable, we have a green light for calling i40e_reuse_rx_page,
+ * which will assign the current buffer to the buffer that next_to_alloc is
+ * pointing to; otherwise, the DMA mapping needs to be destroyed and
+ * page freed.
+ *
+ * rx_stats will be updated to indicate whether the page was waived
+ * or busy if it could not be reused.
+ */
+static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer,
+				   struct i40e_rx_queue_stats *rx_stats)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* Is any reuse possible? */
+	if (!dev_page_is_reusable(page)) {
+		rx_stats->page_waive_count++;
+		return false;
+	}
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((rx_buffer->page_count - pagecnt_bias) > 1)) {
+		rx_stats->page_busy_count++;
+		return false;
+	}
+#else
+#define I40E_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048)
+	if (rx_buffer->page_offset > I40E_LAST_OFFSET) {
+		rx_stats->page_busy_count++;
+		return false;
+	}
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
+ * @rx_buffer: Rx buffer to adjust
+ * @truesize: Size of adjustment
+ **/
+static void i40e_rx_buffer_flip(struct i40e_rx_buffer *rx_buffer,
+				unsigned int truesize)
+{
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+/**
+ * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @size: size of buffer to add to skb
+ *
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
+ */
+static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
+						 const unsigned int size)
+{
+	struct i40e_rx_buffer *rx_buffer;
+
+	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_process);
+	rx_buffer->page_count =
+#if (PAGE_SIZE < 8192)
+		page_count(rx_buffer->page);
+#else
+		0;
+#endif
+	prefetch_page_address(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	/* We have pulled a buffer for use, so decrement pagecnt_bias */
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+/**
+ * i40e_put_rx_buffer - Clean up used buffer and either recycle or free
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: rx buffer to pull data from
+ *
+ * This function will clean up the contents of the rx_buffer.  It will
+ * either recycle the buffer or unmap it and free the associated resources.
+ */
+static void i40e_put_rx_buffer(struct i40e_ring *rx_ring,
+			       struct i40e_rx_buffer *rx_buffer)
+{
+	if (i40e_can_reuse_rx_page(rx_buffer, &rx_ring->rx_stats)) {
+		/* hand second half of page back to the ring */
+		i40e_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     i40e_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+		/* clear contents of buffer_info */
+		rx_buffer->page = NULL;
+	}
+}
+
+/**
+ * i40e_process_rx_buffs- Processing of buffers post XDP prog or on error
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp_res: Result of the XDP program
+ * @xdp: xdp_buff pointing to the data
+ **/
+static void i40e_process_rx_buffs(struct i40e_ring *rx_ring, int xdp_res,
+				  struct xdp_buff *xdp)
+{
+	u32 nr_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+	u32 next = rx_ring->next_to_clean, i = 0;
+	struct i40e_rx_buffer *rx_buffer;
+
+	xdp->flags = 0;
+
+	while (1) {
+		rx_buffer = i40e_rx_bi(rx_ring, next);
+		if (++next == rx_ring->count)
+			next = 0;
+
+		if (!rx_buffer->page)
+			continue;
+
+		if (xdp_res != I40E_XDP_CONSUMED)
+			i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+		else if (i++ <= nr_frags)
+			rx_buffer->pagecnt_bias++;
+
+		/* EOP buffer will be put in i40e_clean_rx_irq() */
+		if (next == rx_ring->next_to_process)
+			return;
+
+		i40e_put_rx_buffer(rx_ring, rx_buffer);
+	}
+}
+
+/**
+ * i40e_construct_skb - Allocate skb and populate it
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function allocates an skb.  It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
+					  struct xdp_buff *xdp)
+{
+	unsigned int size = xdp->data_end - xdp->data;
+	struct i40e_rx_buffer *rx_buffer;
+	struct skb_shared_info *sinfo;
+	unsigned int headlen;
+	struct sk_buff *skb;
+	u32 nr_frags = 0;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data);
+
+	/* Note, we get here by enabling legacy-rx via:
+	 *
+	 *    ethtool --set-priv-flags <dev> legacy-rx on
+	 *
+	 * In this mode, we currently get 0 extra XDP headroom as
+	 * opposed to having legacy-rx off, where we process XDP
+	 * packets going to stack via i40e_build_skb(). The latter
+	 * provides us currently with 192 bytes of headroom.
+	 *
+	 * For i40e_construct_skb() mode it means that the
+	 * xdp->data_meta will always point to xdp->data, since
+	 * the helper cannot expand the head. Should this ever
+	 * change in future for legacy-rx mode on, then lets also
+	 * add xdp->data_meta handling here.
+	 */
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+			       I40E_RX_HDR_SIZE,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > I40E_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, xdp->data,
+					  I40E_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), xdp->data,
+	       ALIGN(headlen, sizeof(long)));
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
+			dev_kfree_skb(skb);
+			return NULL;
+		}
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				rx_buffer->page_offset + headlen,
+				size, xdp->frame_sz);
+		/* buffer is used by skb, update page_offset */
+		i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+	} else {
+		/* buffer is unused, reset bias back to rx_buffer */
+		rx_buffer->pagecnt_bias++;
+	}
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		struct skb_shared_info *skinfo = skb_shinfo(skb);
+
+		memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
+		       sizeof(skb_frag_t) * nr_frags);
+
+		xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
+					   sinfo->xdp_frags_size,
+					   nr_frags * xdp->frame_sz,
+					   xdp_buff_is_frag_pfmemalloc(xdp));
+
+		/* First buffer has already been processed, so bump ntc */
+		if (++rx_ring->next_to_clean == rx_ring->count)
+			rx_ring->next_to_clean = 0;
+
+		i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp);
+	}
+
+	return skb;
+}
+
+/**
+ * i40e_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
+				      struct xdp_buff *xdp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct skb_shared_info *sinfo;
+	struct sk_buff *skb;
+	u32 nr_frags;
+
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points exactly as xdp->data, otherwise we
+	 * likely have a consumer accessing first few bytes of meta
+	 * data, and then actual data.
+	 */
+	net_prefetch(xdp->data_meta);
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+
+	/* build an skb around the page buffer */
+	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		xdp_update_skb_shared_info(skb, nr_frags,
+					   sinfo->xdp_frags_size,
+					   nr_frags * xdp->frame_sz,
+					   xdp_buff_is_frag_pfmemalloc(xdp));
+
+		i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp);
+	} else {
+		struct i40e_rx_buffer *rx_buffer;
+
+		rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+		/* buffer is used by skb, update page_offset */
+		i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+	}
+
+	return skb;
+}
+
+/**
+ * i40e_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
+ */
+bool i40e_is_non_eop(struct i40e_ring *rx_ring,
+		     union i40e_rx_desc *rx_desc)
+{
+	/* if we are the last buffer then there is nothing else to do */
+#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)
+	if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF)))
+		return false;
+
+	rx_ring->rx_stats.non_eop_descs++;
+
+	return true;
+}
+
+static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
+			      struct i40e_ring *xdp_ring);
+
+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+
+	if (unlikely(!xdpf))
+		return I40E_XDP_CONSUMED;
+
+	return i40e_xmit_xdp_ring(xdpf, xdp_ring);
+}
+
+/**
+ * i40e_run_xdp - run an XDP program
+ * @rx_ring: Rx ring being processed
+ * @xdp: XDP buffer containing the frame
+ * @xdp_prog: XDP program to run
+ **/
+static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
+{
+	int err, result = I40E_XDP_PASS;
+	struct i40e_ring *xdp_ring;
+	u32 act;
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+		if (result == I40E_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		if (err)
+			goto out_failure;
+		result = I40E_XDP_REDIR;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough; /* handle aborts by dropping packet */
+	case XDP_DROP:
+		result = I40E_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	return result;
+}
+
+/**
+ * i40e_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ **/
+void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+/**
+ * i40e_update_rx_stats - Update Rx ring statistics
+ * @rx_ring: rx descriptor ring
+ * @total_rx_bytes: number of bytes received
+ * @total_rx_packets: number of packets received
+ *
+ * This function updates the Rx ring statistics.
+ **/
+void i40e_update_rx_stats(struct i40e_ring *rx_ring,
+			  unsigned int total_rx_bytes,
+			  unsigned int total_rx_packets)
+{
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	rx_ring->q_vector->rx.total_packets += total_rx_packets;
+	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+}
+
+/**
+ * i40e_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @rx_ring: Rx ring
+ * @xdp_res: Result of the receive batch
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ **/
+void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res)
+{
+	if (xdp_res & I40E_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_res & I40E_XDP_TX) {
+		struct i40e_ring *xdp_ring =
+			rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+
+		i40e_xdp_ring_update_tail(xdp_ring);
+	}
+}
+
+/**
+ * i40e_inc_ntp: Advance the next_to_process index
+ * @rx_ring: Rx ring
+ **/
+static void i40e_inc_ntp(struct i40e_ring *rx_ring)
+{
+	u32 ntp = rx_ring->next_to_process + 1;
+
+	ntp = (ntp < rx_ring->count) ? ntp : 0;
+	rx_ring->next_to_process = ntp;
+	prefetch(I40E_RX_DESC(rx_ring, ntp));
+}
+
+/**
+ * i40e_add_xdp_frag: Add a frag to xdp_buff
+ * @xdp: xdp_buff pointing to the data
+ * @nr_frags: return number of buffers for the packet
+ * @rx_buffer: rx_buffer holding data of the current frag
+ * @size: size of data of current frag
+ */
+static int i40e_add_xdp_frag(struct xdp_buff *xdp, u32 *nr_frags,
+			     struct i40e_rx_buffer *rx_buffer, u32 size)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+
+	if (!xdp_buff_has_frags(xdp)) {
+		sinfo->nr_frags = 0;
+		sinfo->xdp_frags_size = 0;
+		xdp_buff_set_frags_flag(xdp);
+	} else if (unlikely(sinfo->nr_frags >= MAX_SKB_FRAGS)) {
+		/* Overflowing packet: All frags need to be dropped */
+		return -ENOMEM;
+	}
+
+	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buffer->page,
+				   rx_buffer->page_offset, size);
+
+	sinfo->xdp_frags_size += size;
+
+	if (page_is_pfmemalloc(rx_buffer->page))
+		xdp_buff_set_frag_pfmemalloc(xdp);
+	*nr_frags = sinfo->nr_frags;
+
+	return 0;
+}
+
+/**
+ * i40e_consume_xdp_buff - Consume all the buffers of the packet and update ntc
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @xdp: xdp_buff pointing to the data
+ * @rx_buffer: rx_buffer of eop desc
+ */
+static void i40e_consume_xdp_buff(struct i40e_ring *rx_ring,
+				  struct xdp_buff *xdp,
+				  struct i40e_rx_buffer *rx_buffer)
+{
+	i40e_process_rx_buffs(rx_ring, I40E_XDP_CONSUMED, xdp);
+	i40e_put_rx_buffer(rx_ring, rx_buffer);
+	rx_ring->next_to_clean = rx_ring->next_to_process;
+	xdp->data = NULL;
+}
+
+/**
+ * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ * @rx_cleaned: Out parameter of the number of packets processed
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing.  The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ **/
+static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget,
+			     unsigned int *rx_cleaned)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
+	u16 clean_threshold = rx_ring->count / 2;
+	unsigned int offset = rx_ring->rx_offset;
+	struct xdp_buff *xdp = &rx_ring->xdp;
+	unsigned int xdp_xmit = 0;
+	struct bpf_prog *xdp_prog;
+	bool failure = false;
+	int xdp_res = 0;
+
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		u16 ntp = rx_ring->next_to_process;
+		struct i40e_rx_buffer *rx_buffer;
+		union i40e_rx_desc *rx_desc;
+		struct sk_buff *skb;
+		unsigned int size;
+		u32 nfrags = 0;
+		bool neop;
+		u64 qword;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= clean_threshold) {
+			failure = failure ||
+				  i40e_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = I40E_RX_DESC(rx_ring, ntp);
+
+		/* status_error_len will always be zero for unused descriptors
+		 * because it's cleared in cleanup, and overlaps with hdr_addr
+		 * which is always zero because packet split isn't used, if the
+		 * hardware wrote DD then the length will be non-zero
+		 */
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		if (i40e_rx_is_programming_status(qword)) {
+			i40e_clean_programming_status(rx_ring,
+						      rx_desc->raw.qword[0],
+						      qword);
+			rx_buffer = i40e_rx_bi(rx_ring, ntp);
+			i40e_inc_ntp(rx_ring);
+			i40e_reuse_rx_page(rx_ring, rx_buffer);
+			/* Update ntc and bump cleaned count if not in the
+			 * middle of mb packet.
+			 */
+			if (rx_ring->next_to_clean == ntp) {
+				rx_ring->next_to_clean =
+					rx_ring->next_to_process;
+				cleaned_count++;
+			}
+			continue;
+		}
+
+		size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+		       I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+		if (!size)
+			break;
+
+		i40e_trace(clean_rx_irq, rx_ring, rx_desc, xdp);
+		/* retrieve a buffer from the ring */
+		rx_buffer = i40e_get_rx_buffer(rx_ring, size);
+
+		neop = i40e_is_non_eop(rx_ring, rx_desc);
+		i40e_inc_ntp(rx_ring);
+
+		if (!xdp->data) {
+			unsigned char *hard_start;
+
+			hard_start = page_address(rx_buffer->page) +
+				     rx_buffer->page_offset - offset;
+			xdp_prepare_buff(xdp, hard_start, offset, size, true);
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp->frame_sz = i40e_rx_frame_truesize(rx_ring, size);
+#endif
+		} else if (i40e_add_xdp_frag(xdp, &nfrags, rx_buffer, size) &&
+			   !neop) {
+			/* Overflowing packet: Drop all frags on EOP */
+			i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer);
+			break;
+		}
+
+		if (neop)
+			continue;
+
+		xdp_res = i40e_run_xdp(rx_ring, xdp, xdp_prog);
+
+		if (xdp_res) {
+			xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
+
+			if (unlikely(xdp_buff_has_frags(xdp))) {
+				i40e_process_rx_buffs(rx_ring, xdp_res, xdp);
+				size = xdp_get_buff_len(xdp);
+			} else if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
+				i40e_rx_buffer_flip(rx_buffer, xdp->frame_sz);
+			} else {
+				rx_buffer->pagecnt_bias++;
+			}
+			total_rx_bytes += size;
+		} else {
+			if (ring_uses_build_skb(rx_ring))
+				skb = i40e_build_skb(rx_ring, xdp);
+			else
+				skb = i40e_construct_skb(rx_ring, xdp);
+
+			/* drop if we failed to retrieve a buffer */
+			if (!skb) {
+				rx_ring->rx_stats.alloc_buff_failed++;
+				i40e_consume_xdp_buff(rx_ring, xdp, rx_buffer);
+				break;
+			}
+
+			if (i40e_cleanup_headers(rx_ring, skb, rx_desc))
+				goto process_next;
+
+			/* probably a little skewed due to removing CRC */
+			total_rx_bytes += skb->len;
+
+			/* populate checksum, VLAN, and protocol */
+			i40e_process_skb_fields(rx_ring, rx_desc, skb);
+
+			i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, xdp);
+			napi_gro_receive(&rx_ring->q_vector->napi, skb);
+		}
+
+		/* update budget accounting */
+		total_rx_packets++;
+process_next:
+		cleaned_count += nfrags + 1;
+		i40e_put_rx_buffer(rx_ring, rx_buffer);
+		rx_ring->next_to_clean = rx_ring->next_to_process;
+
+		xdp->data = NULL;
+	}
+
+	i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
+
+	i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
+
+	*rx_cleaned = total_rx_packets;
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return failure ? budget : (int)total_rx_packets;
+}
+
+static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+{
+	u32 val;
+
+	/* We don't bother with setting the CLEARPBA bit as the data sheet
+	 * points out doing so is "meaningless since it was already
+	 * auto-cleared". The auto-clearing happens when the interrupt is
+	 * asserted.
+	 *
+	 * Hardware errata 28 for also indicates that writing to a
+	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+	 * an event in the PBA anyway so we need to rely on the automask
+	 * to hold pending events for us until the interrupt is re-enabled
+	 *
+	 * The itr value is reported in microseconds, and the register
+	 * value is recorded in 2 microsecond units. For this reason we
+	 * only need to shift by the interval shift - 1 instead of the
+	 * full value.
+	 */
+	itr &= I40E_ITR_MASK;
+
+	val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+	      (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
+	      (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
+
+	return val;
+}
+
+/* a small macro to shorten up some long lines */
+#define INTREG I40E_PFINT_DYN_CTLN
+
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
+
+/**
+ * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
+ * @vsi: the VSI we care about
+ * @q_vector: q_vector for which itr is being updated and interrupt enabled
+ *
+ **/
+static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
+					  struct i40e_q_vector *q_vector)
+{
+	struct i40e_hw *hw = &vsi->back->hw;
+	u32 intval;
+
+	/* If we don't have MSIX, then we only need to re-enable icr0 */
+	if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
+		i40e_irq_dynamic_enable_icr0(vsi->back);
+		return;
+	}
+
+	/* These will do nothing if dynamic updates are not enabled */
+	i40e_update_itr(q_vector, &q_vector->tx);
+	i40e_update_itr(q_vector, &q_vector->rx);
+
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
+	 */
+	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+		   ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+		    (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
+		 */
+		intval = i40e_buildreg_itr(I40E_TX_ITR,
+					   q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		intval = i40e_buildreg_itr(I40E_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else {
+		/* No ITR update, lowest priority */
+		intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
+	}
+
+	if (!test_bit(__I40E_VSI_DOWN, vsi->state))
+		wr32(hw, INTREG(q_vector->reg_idx), intval);
+}
+
+/**
+ * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ **/
+int i40e_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct i40e_q_vector *q_vector =
+			       container_of(napi, struct i40e_q_vector, napi);
+	struct i40e_vsi *vsi = q_vector->vsi;
+	struct i40e_ring *ring;
+	bool tx_clean_complete = true;
+	bool rx_clean_complete = true;
+	unsigned int tx_cleaned = 0;
+	unsigned int rx_cleaned = 0;
+	bool clean_complete = true;
+	bool arm_wb = false;
+	int budget_per_ring;
+	int work_done = 0;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
+		napi_complete(napi);
+		return 0;
+	}
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	i40e_for_each_ring(ring, q_vector->tx) {
+		bool wd = ring->xsk_pool ?
+			  i40e_clean_xdp_tx_irq(vsi, ring) :
+			  i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned);
+
+		if (!wd) {
+			clean_complete = tx_clean_complete = false;
+			continue;
+		}
+		arm_wb |= ring->arm_wb;
+		ring->arm_wb = false;
+	}
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (budget <= 0)
+		goto tx_only;
+
+	/* normally we have 1 Rx ring per q_vector */
+	if (unlikely(q_vector->num_ringpairs > 1))
+		/* We attempt to distribute budget to each Rx queue fairly, but
+		 * don't allow the budget to go below 1 because that would exit
+		 * polling early.
+		 */
+		budget_per_ring = max_t(int, budget / q_vector->num_ringpairs, 1);
+	else
+		/* Max of 1 Rx ring in this q_vector so give it the budget */
+		budget_per_ring = budget;
+
+	i40e_for_each_ring(ring, q_vector->rx) {
+		int cleaned = ring->xsk_pool ?
+			      i40e_clean_rx_irq_zc(ring, budget_per_ring) :
+			      i40e_clean_rx_irq(ring, budget_per_ring, &rx_cleaned);
+
+		work_done += cleaned;
+		/* if we clean as many as budgeted, we must not be done */
+		if (cleaned >= budget_per_ring)
+			clean_complete = rx_clean_complete = false;
+	}
+
+	if (!i40e_enabled_xdp_vsi(vsi))
+		trace_i40e_napi_poll(napi, q_vector, budget, budget_per_ring, rx_cleaned,
+				     tx_cleaned, rx_clean_complete, tx_clean_complete);
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete) {
+		int cpu_id = smp_processor_id();
+
+		/* It is possible that the interrupt affinity has changed but,
+		 * if the cpu is pegged at 100%, polling will never exit while
+		 * traffic continues and the interrupt will be stuck on this
+		 * cpu.  We check to make sure affinity is correct before we
+		 * continue to poll, otherwise we must stop polling so the
+		 * interrupt can move to the correct cpu.
+		 */
+		if (!cpumask_test_cpu(cpu_id, &q_vector->affinity_mask)) {
+			/* Tell napi that we are done polling */
+			napi_complete_done(napi, work_done);
+
+			/* Force an interrupt */
+			i40e_force_wb(vsi, q_vector);
+
+			/* Return budget-1 so that polling stops */
+			return budget - 1;
+		}
+tx_only:
+		if (arm_wb) {
+			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+			i40e_enable_wb_on_itr(vsi, q_vector);
+		}
+		return budget;
+	}
+
+	if (q_vector->tx.ring[0].flags & I40E_TXR_FLAGS_WB_ON_ITR)
+		q_vector->arm_wb_state = false;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		i40e_update_enable_itr(vsi, q_vector);
+
+	return min(work_done, budget - 1);
+}
+
+/**
+ * i40e_atr - Add a Flow Director ATR filter
+ * @tx_ring:  ring to add programming descriptor to
+ * @skb:      send buffer
+ * @tx_flags: send tx flags
+ **/
+static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
+		     u32 tx_flags)
+{
+	struct i40e_filter_program_desc *fdir_desc;
+	struct i40e_pf *pf = tx_ring->vsi->back;
+	union {
+		unsigned char *network;
+		struct iphdr *ipv4;
+		struct ipv6hdr *ipv6;
+	} hdr;
+	struct tcphdr *th;
+	unsigned int hlen;
+	u32 flex_ptype, dtype_cmd;
+	int l4_proto;
+	u16 i;
+
+	/* make sure ATR is enabled */
+	if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
+		return;
+
+	if (test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
+		return;
+
+	/* if sampling is disabled do nothing */
+	if (!tx_ring->atr_sample_rate)
+		return;
+
+	/* Currently only IPv4/IPv6 with TCP is supported */
+	if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
+		return;
+
+	/* snag network header to get L4 type and address */
+	hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
+		      skb_inner_network_header(skb) : skb_network_header(skb);
+
+	/* Note: tx_flags gets modified to reflect inner protocols in
+	 * tx_enable_csum function if encap is enabled.
+	 */
+	if (tx_flags & I40E_TX_FLAGS_IPV4) {
+		/* access ihl as u8 to avoid unaligned access on ia64 */
+		hlen = (hdr.network[0] & 0x0F) << 2;
+		l4_proto = hdr.ipv4->protocol;
+	} else {
+		/* find the start of the innermost ipv6 header */
+		unsigned int inner_hlen = hdr.network - skb->data;
+		unsigned int h_offset = inner_hlen;
+
+		/* this function updates h_offset to the end of the header */
+		l4_proto =
+		  ipv6_find_hdr(skb, &h_offset, IPPROTO_TCP, NULL, NULL);
+		/* hlen will contain our best estimate of the tcp header */
+		hlen = h_offset - inner_hlen;
+	}
+
+	if (l4_proto != IPPROTO_TCP)
+		return;
+
+	th = (struct tcphdr *)(hdr.network + hlen);
+
+	/* Due to lack of space, no more new filters can be programmed */
+	if (th->syn && test_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state))
+		return;
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED) {
+		/* HW ATR eviction will take care of removing filters on FIN
+		 * and RST packets.
+		 */
+		if (th->fin || th->rst)
+			return;
+	}
+
+	tx_ring->atr_count++;
+
+	/* sample on all syn/fin/rst packets or once every atr sample rate */
+	if (!th->fin &&
+	    !th->syn &&
+	    !th->rst &&
+	    (tx_ring->atr_count < tx_ring->atr_sample_rate))
+		return;
+
+	tx_ring->atr_count = 0;
+
+	/* grab the next descriptor */
+	i = tx_ring->next_to_use;
+	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
+		      I40E_TXD_FLTR_QW0_QINDEX_MASK;
+	flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
+		      (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
+		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
+		      (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
+		       I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
+
+	flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
+
+	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
+
+	dtype_cmd |= (th->fin || th->rst) ?
+		     (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
+		      I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
+		     (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
+		      I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+
+	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
+		     I40E_TXD_FLTR_QW1_DEST_SHIFT;
+
+	dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
+		     I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
+
+	dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
+	if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
+		dtype_cmd |=
+			((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
+			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
+			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
+	else
+		dtype_cmd |=
+			((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
+			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
+			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
+
+	if (pf->flags & I40E_FLAG_HW_ATR_EVICT_ENABLED)
+		dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
+
+	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
+	fdir_desc->rsvd = cpu_to_le32(0);
+	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
+	fdir_desc->fd_id = cpu_to_le32(0);
+}
+
+/**
+ * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ * @flags:   the tx flags to be set
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ *
+ * Returns error code indicate the frame should be dropped upon error and the
+ * otherwise  returns 0 to indicate the flags has been set properly.
+ **/
+static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
+					     struct i40e_ring *tx_ring,
+					     u32 *flags)
+{
+	__be16 protocol = skb->protocol;
+	u32  tx_flags = 0;
+
+	if (protocol == htons(ETH_P_8021Q) &&
+	    !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
+		/* When HW VLAN acceleration is turned off by the user the
+		 * stack sets the protocol to 8021q so that the driver
+		 * can take any steps required to support the SW only
+		 * VLAN handling.  In our case the driver doesn't need
+		 * to take any further steps so just set the protocol
+		 * to the encapsulated ethertype.
+		 */
+		skb->protocol = vlan_get_protocol(skb);
+		goto out;
+	}
+
+	/* if we have a HW VLAN tag being added, default to the HW one */
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
+	/* else if it is a SW VLAN, check the next protocol and store the tag */
+	} else if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vhdr, _vhdr;
+
+		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
+		if (!vhdr)
+			return -EINVAL;
+
+		protocol = vhdr->h_vlan_encapsulated_proto;
+		tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= I40E_TX_FLAGS_SW_VLAN;
+	}
+
+	if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
+		goto out;
+
+	/* Insert 802.1p priority into VLAN header */
+	if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
+	    (skb->priority != TC_PRIO_CONTROL)) {
+		tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
+		tx_flags |= (skb->priority & 0x7) <<
+				I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
+		if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
+			struct vlan_ethhdr *vhdr;
+			int rc;
+
+			rc = skb_cow_head(skb, 0);
+			if (rc < 0)
+				return rc;
+			vhdr = skb_vlan_eth_hdr(skb);
+			vhdr->h_vlan_TCI = htons(tx_flags >>
+						 I40E_TX_FLAGS_VLAN_SHIFT);
+		} else {
+			tx_flags |= I40E_TX_FLAGS_HW_VLAN;
+		}
+	}
+
+out:
+	*flags = tx_flags;
+	return 0;
+}
+
+/**
+ * i40e_tso - set up the tso context descriptor
+ * @first:    pointer to first Tx buffer for xmit
+ * @hdr_len:  ptr to the size of the packet header
+ * @cd_type_cmd_tso_mss: Quad Word 1
+ *
+ * Returns 0 if no TSO can happen, 1 if tso is going, or error
+ **/
+static int i40e_tso(struct i40e_tx_buffer *first, u8 *hdr_len,
+		    u64 *cd_type_cmd_tso_mss)
+{
+	struct sk_buff *skb = first->skb;
+	u64 cd_cmd, cd_tso_len, cd_mss;
+	__be16 protocol;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	u16 gso_size;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	protocol = vlan_get_protocol(skb);
+
+	if (eth_p_mpls(protocol))
+		ip.hdr = skb_inner_network_header(skb);
+	else
+		ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		ip.v4->tot_len = 0;
+		ip.v4->check = 0;
+
+		first->tx_flags |= I40E_TX_FLAGS_TSO;
+	} else {
+		ip.v6->payload_len = 0;
+		first->tx_flags |= I40E_TX_FLAGS_TSO;
+	}
+
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+					 SKB_GSO_GRE_CSUM |
+					 SKB_GSO_IPXIP4 |
+					 SKB_GSO_IPXIP6 |
+					 SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)) {
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
+			l4.udp->len = 0;
+
+			/* determine offset of outer transport header */
+			l4_offset = l4.hdr - skb->data;
+
+			/* remove payload length from outer checksum */
+			paylen = skb->len - l4_offset;
+			csum_replace_by_diff(&l4.udp->check,
+					     (__force __wsum)htonl(paylen));
+		}
+
+		/* reset pointers to inner headers */
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+
+		/* initialize inner IP header fields */
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			ip.v4->check = 0;
+		} else {
+			ip.v6->payload_len = 0;
+		}
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		csum_replace_by_diff(&l4.udp->check, (__force __wsum)htonl(paylen));
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+	} else {
+		csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+	}
+
+	/* pull values out of skb_shinfo */
+	gso_size = skb_shinfo(skb)->gso_size;
+
+	/* update GSO size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* find the field values */
+	cd_cmd = I40E_TX_CTX_DESC_TSO;
+	cd_tso_len = skb->len - *hdr_len;
+	cd_mss = gso_size;
+	*cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+				(cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+				(cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
+	return 1;
+}
+
+/**
+ * i40e_tsyn - set up the tsyn context descriptor
+ * @tx_ring:  ptr to the ring to send
+ * @skb:      ptr to the skb we're sending
+ * @tx_flags: the collected send information
+ * @cd_type_cmd_tso_mss: Quad Word 1
+ *
+ * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
+ **/
+static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
+		     u32 tx_flags, u64 *cd_type_cmd_tso_mss)
+{
+	struct i40e_pf *pf;
+
+	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
+		return 0;
+
+	/* Tx timestamps cannot be sampled when doing TSO */
+	if (tx_flags & I40E_TX_FLAGS_TSO)
+		return 0;
+
+	/* only timestamp the outbound packet if the user has requested it and
+	 * we are not already transmitting a packet to be timestamped
+	 */
+	pf = i40e_netdev_to_pf(tx_ring->netdev);
+	if (!(pf->flags & I40E_FLAG_PTP))
+		return 0;
+
+	if (pf->ptp_tx &&
+	    !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, pf->state)) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		pf->ptp_tx_start = jiffies;
+		pf->ptp_tx_skb = skb_get(skb);
+	} else {
+		pf->tx_hwtstamp_skipped++;
+		return 0;
+	}
+
+	*cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
+				I40E_TXD_CTX_QW1_CMD_SHIFT;
+
+	return 1;
+}
+
+/**
+ * i40e_tx_enable_csum - Enable Tx checksum offloads
+ * @skb: send buffer
+ * @tx_flags: pointer to Tx flags currently set
+ * @td_cmd: Tx descriptor command bits to set
+ * @td_offset: Tx descriptor header offsets to set
+ * @tx_ring: Tx descriptor ring
+ * @cd_tunneling: ptr to context desc bits
+ **/
+static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
+			       u32 *td_cmd, u32 *td_offset,
+			       struct i40e_ring *tx_ring,
+			       u32 *cd_tunneling)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	unsigned char *exthdr;
+	u32 offset, cmd = 0;
+	__be16 frag_off;
+	__be16 protocol;
+	u8 l4_proto = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	protocol = vlan_get_protocol(skb);
+
+	if (eth_p_mpls(protocol)) {
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_checksum_start(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+	}
+
+	/* set the tx_flags to indicate the IP protocol type. this is
+	 * required so that checksum header computation below is accurate.
+	 */
+	if (ip.v4->version == 4)
+		*tx_flags |= I40E_TX_FLAGS_IPV4;
+	else
+		*tx_flags |= I40E_TX_FLAGS_IPV6;
+
+	/* compute outer L2 header size */
+	offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+
+	if (skb->encapsulation) {
+		u32 tunnel = 0;
+		/* define outer network header type */
+		if (*tx_flags & I40E_TX_FLAGS_IPV4) {
+			tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
+				  I40E_TX_CTX_EXT_IP_IPV4 :
+				  I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+
+			l4_proto = ip.v4->protocol;
+		} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
+			int ret;
+
+			tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
+
+			exthdr = ip.hdr + sizeof(*ip.v6);
+			l4_proto = ip.v6->nexthdr;
+			ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
+					       &l4_proto, &frag_off);
+			if (ret < 0)
+				return -1;
+		}
+
+		/* define outer transport */
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
+			*tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
+			break;
+		case IPPROTO_GRE:
+			tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
+			*tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
+			break;
+		case IPPROTO_IPIP:
+		case IPPROTO_IPV6:
+			*tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
+			l4.hdr = skb_inner_network_header(skb);
+			break;
+		default:
+			if (*tx_flags & I40E_TX_FLAGS_TSO)
+				return -1;
+
+			skb_checksum_help(skb);
+			return 0;
+		}
+
+		/* compute outer L3 header size */
+		tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+			  I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
+
+		/* switch IP header pointer from outer to inner header */
+		ip.hdr = skb_inner_network_header(skb);
+
+		/* compute tunnel header size */
+		tunnel |= ((ip.hdr - l4.hdr) / 2) <<
+			  I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+
+		/* indicate if we need to offload outer UDP header */
+		if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
+		    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+			tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
+
+		/* record tunnel offload values */
+		*cd_tunneling |= tunnel;
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+		l4_proto = 0;
+
+		/* reset type as we transition from outer to inner headers */
+		*tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
+		if (ip.v4->version == 4)
+			*tx_flags |= I40E_TX_FLAGS_IPV4;
+		if (ip.v6->version == 6)
+			*tx_flags |= I40E_TX_FLAGS_IPV6;
+	}
+
+	/* Enable IP checksum offloads */
+	if (*tx_flags & I40E_TX_FLAGS_IPV4) {
+		l4_proto = ip.v4->protocol;
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
+		       I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
+		       I40E_TX_DESC_CMD_IIPT_IPV4;
+	} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
+		cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
+
+		exthdr = ip.hdr + sizeof(*ip.v6);
+		l4_proto = ip.v6->nexthdr;
+		if (l4.hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data,
+					 &l4_proto, &frag_off);
+	}
+
+	/* compute inner L3 header size */
+	offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+
+	/* Enable L4 checksum offloads */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		/* enable checksum offloads */
+		cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+		offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	case IPPROTO_SCTP:
+		/* enable SCTP checksum offload */
+		cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
+		offset |= (sizeof(struct sctphdr) >> 2) <<
+			  I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	case IPPROTO_UDP:
+		/* enable UDP checksum offload */
+		cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
+		offset |= (sizeof(struct udphdr) >> 2) <<
+			  I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	default:
+		if (*tx_flags & I40E_TX_FLAGS_TSO)
+			return -1;
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	*td_cmd |= cmd;
+	*td_offset |= offset;
+
+	return 1;
+}
+
+/**
+ * i40e_create_tx_ctx - Build the Tx context descriptor
+ * @tx_ring:  ring to create the descriptor on
+ * @cd_type_cmd_tso_mss: Quad Word 1
+ * @cd_tunneling: Quad Word 0 - bits 0-31
+ * @cd_l2tag2: Quad Word 0 - bits 32-63
+ **/
+static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
+			       const u64 cd_type_cmd_tso_mss,
+			       const u32 cd_tunneling, const u32 cd_l2tag2)
+{
+	struct i40e_tx_context_desc *context_desc;
+	int i = tx_ring->next_to_use;
+
+	if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
+	    !cd_tunneling && !cd_l2tag2)
+		return;
+
+	/* grab the next descriptor */
+	context_desc = I40E_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* cpu_to_le32 and assign to struct fields */
+	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
+	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
+	context_desc->rsvd = cpu_to_le16(0);
+	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
+}
+
+/**
+ * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ **/
+int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	/* Memory barrier before checking head and tail */
+	smp_mb();
+
+	++tx_ring->tx_stats.tx_stopped;
+
+	/* Check again in a case another CPU has just made room available. */
+	if (likely(I40E_DESC_UNUSED(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_queue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	++tx_ring->tx_stats.restart_queue;
+	return 0;
+}
+
+/**
+ * __i40e_chk_linearize - Check if there are more than 8 buffers per packet
+ * @skb:      send buffer
+ *
+ * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
+ **/
+bool __i40e_chk_linearize(struct sk_buff *skb)
+{
+	const skb_frag_t *frag, *stale;
+	int nr_frags, sum;
+
+	/* no need to check if number of frags is less than 7 */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
+		return false;
+
+	/* We need to walk through the list and validate that each group
+	 * of 6 fragments totals at least gso_size.
+	 */
+	nr_frags -= I40E_MAX_BUFFER_TXD - 2;
+	frag = &skb_shinfo(skb)->frags[0];
+
+	/* Initialize size to the negative value of gso_size minus 1.  We
+	 * use this as the worst case scenerio in which the frag ahead
+	 * of us only provides one byte which is why we are limited to 6
+	 * descriptors for a single transmit as the header and previous
+	 * fragment are already consuming 2 descriptors.
+	 */
+	sum = 1 - skb_shinfo(skb)->gso_size;
+
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+
+	/* Walk through fragments adding latest fragment, testing it, and
+	 * then removing stale fragments from the sum.
+	 */
+	for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+		int stale_size = skb_frag_size(stale);
+
+		sum += skb_frag_size(frag++);
+
+		/* The stale fragment may present us with a smaller
+		 * descriptor than the actual fragment size. To account
+		 * for that we need to remove all the data on the front and
+		 * figure out what the remainder would be in the last
+		 * descriptor associated with the fragment.
+		 */
+		if (stale_size > I40E_MAX_DATA_PER_TXD) {
+			int align_pad = -(skb_frag_off(stale)) &
+					(I40E_MAX_READ_REQ_SIZE - 1);
+
+			sum -= align_pad;
+			stale_size -= align_pad;
+
+			do {
+				sum -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+				stale_size -= I40E_MAX_DATA_PER_TXD_ALIGNED;
+			} while (stale_size > I40E_MAX_DATA_PER_TXD);
+		}
+
+		/* if sum is negative we failed to make sufficient progress */
+		if (sum < 0)
+			return true;
+
+		if (!nr_frags--)
+			break;
+
+		sum -= stale_size;
+	}
+
+	return false;
+}
+
+/**
+ * i40e_tx_map - Build the Tx descriptor
+ * @tx_ring:  ring to send buffer on
+ * @skb:      send buffer
+ * @first:    first buffer info buffer to use
+ * @tx_flags: collected send information
+ * @hdr_len:  size of the packet header
+ * @td_cmd:   the command field in the descriptor
+ * @td_offset: offset for checksum or crc
+ *
+ * Returns 0 on success, -1 on failure to DMA
+ **/
+static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
+			      struct i40e_tx_buffer *first, u32 tx_flags,
+			      const u8 hdr_len, u32 td_cmd, u32 td_offset)
+{
+	unsigned int data_len = skb->data_len;
+	unsigned int size = skb_headlen(skb);
+	skb_frag_t *frag;
+	struct i40e_tx_buffer *tx_bi;
+	struct i40e_tx_desc *tx_desc;
+	u16 i = tx_ring->next_to_use;
+	u32 td_tag = 0;
+	dma_addr_t dma;
+	u16 desc_count = 1;
+
+	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
+		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
+		td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
+			 I40E_TX_FLAGS_VLAN_SHIFT;
+	}
+
+	first->tx_flags = tx_flags;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_desc = I40E_TX_DESC(tx_ring, i);
+	tx_bi = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
+
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_bi, len, size);
+		dma_unmap_addr_set(tx_bi, dma, dma);
+
+		/* align size to end of page */
+		max_data += -dma & (I40E_MAX_READ_REQ_SIZE - 1);
+		tx_desc->buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
+			tx_desc->cmd_type_offset_bsz =
+				build_ctob(td_cmd, td_offset,
+					   max_data, td_tag);
+
+			tx_desc++;
+			i++;
+			desc_count++;
+
+			if (i == tx_ring->count) {
+				tx_desc = I40E_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+
+			dma += max_data;
+			size -= max_data;
+
+			max_data = I40E_MAX_DATA_PER_TXD_ALIGNED;
+			tx_desc->buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
+							  size, td_tag);
+
+		tx_desc++;
+		i++;
+		desc_count++;
+
+		if (i == tx_ring->count) {
+			tx_desc = I40E_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_bi = &tx_ring->tx_bi[i];
+	}
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* write last descriptor with EOP bit */
+	td_cmd |= I40E_TX_DESC_CMD_EOP;
+
+	/* We OR these values together to check both against 4 (WB_STRIDE)
+	 * below. This is safe since we don't re-use desc_count afterwards.
+	 */
+	desc_count |= ++tx_ring->packet_stride;
+
+	if (desc_count >= WB_STRIDE) {
+		/* write last descriptor with RS bit set */
+		td_cmd |= I40E_TX_DESC_CMD_RS;
+		tx_ring->packet_stride = 0;
+	}
+
+	tx_desc->cmd_type_offset_bsz =
+			build_ctob(td_cmd, td_offset, size, td_tag);
+
+	skb_tx_timestamp(skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	/* notify HW of packet */
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+		writel(i, tx_ring->tail);
+	}
+
+	return 0;
+
+dma_error:
+	dev_info(tx_ring->dev, "TX DMA map failed\n");
+
+	/* clear dma mappings for failed tx_bi map */
+	for (;;) {
+		tx_bi = &tx_ring->tx_bi[i];
+		i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
+		if (tx_bi == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+
+	return -1;
+}
+
+static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev,
+				  const struct sk_buff *skb,
+				  u16 num_tx_queues)
+{
+	u32 jhash_initval_salt = 0xd631614b;
+	u32 hash;
+
+	if (skb->sk && skb->sk->sk_hash)
+		hash = skb->sk->sk_hash;
+	else
+		hash = (__force u16)skb->protocol ^ skb->hash;
+
+	hash = jhash_1word(hash, jhash_initval_salt);
+
+	return (u16)(((u64)hash * num_tx_queues) >> 32);
+}
+
+u16 i40e_lan_select_queue(struct net_device *netdev,
+			  struct sk_buff *skb,
+			  struct net_device __always_unused *sb_dev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_hw *hw;
+	u16 qoffset;
+	u16 qcount;
+	u8 tclass;
+	u16 hash;
+	u8 prio;
+
+	/* is DCB enabled at all? */
+	if (vsi->tc_config.numtc == 1 ||
+	    i40e_is_tc_mqprio_enabled(vsi->back))
+		return netdev_pick_tx(netdev, skb, sb_dev);
+
+	prio = skb->priority;
+	hw = &vsi->back->hw;
+	tclass = hw->local_dcbx_config.etscfg.prioritytable[prio];
+	/* sanity check */
+	if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass))))
+		tclass = 0;
+
+	/* select a queue assigned for the given TC */
+	qcount = vsi->tc_config.tc_info[tclass].qcount;
+	hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount);
+
+	qoffset = vsi->tc_config.tc_info[tclass].qoffset;
+	return qoffset + hash;
+}
+
+/**
+ * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
+ * @xdpf: data to transmit
+ * @xdp_ring: XDP Tx ring
+ **/
+static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
+			      struct i40e_ring *xdp_ring)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+	u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
+	u16 i = 0, index = xdp_ring->next_to_use;
+	struct i40e_tx_buffer *tx_head = &xdp_ring->tx_bi[index];
+	struct i40e_tx_buffer *tx_bi = tx_head;
+	struct i40e_tx_desc *tx_desc = I40E_TX_DESC(xdp_ring, index);
+	void *data = xdpf->data;
+	u32 size = xdpf->len;
+
+	if (unlikely(I40E_DESC_UNUSED(xdp_ring) < 1 + nr_frags)) {
+		xdp_ring->tx_stats.tx_busy++;
+		return I40E_XDP_CONSUMED;
+	}
+
+	tx_head->bytecount = xdp_get_frame_len(xdpf);
+	tx_head->gso_segs = 1;
+	tx_head->xdpf = xdpf;
+
+	for (;;) {
+		dma_addr_t dma;
+
+		dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(xdp_ring->dev, dma))
+			goto unmap;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_bi, len, size);
+		dma_unmap_addr_set(tx_bi, dma, dma);
+
+		tx_desc->buffer_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz =
+			build_ctob(I40E_TX_DESC_CMD_ICRC, 0, size, 0);
+
+		if (++index == xdp_ring->count)
+			index = 0;
+
+		if (i == nr_frags)
+			break;
+
+		tx_bi = &xdp_ring->tx_bi[index];
+		tx_desc = I40E_TX_DESC(xdp_ring, index);
+
+		data = skb_frag_address(&sinfo->frags[i]);
+		size = skb_frag_size(&sinfo->frags[i]);
+		i++;
+	}
+
+	tx_desc->cmd_type_offset_bsz |=
+		cpu_to_le64(I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
+
+	/* Make certain all of the status bits have been updated
+	 * before next_to_watch is written.
+	 */
+	smp_wmb();
+
+	xdp_ring->xdp_tx_active++;
+
+	tx_head->next_to_watch = tx_desc;
+	xdp_ring->next_to_use = index;
+
+	return I40E_XDP_TX;
+
+unmap:
+	for (;;) {
+		tx_bi = &xdp_ring->tx_bi[index];
+		if (dma_unmap_len(tx_bi, len))
+			dma_unmap_page(xdp_ring->dev,
+				       dma_unmap_addr(tx_bi, dma),
+				       dma_unmap_len(tx_bi, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_bi, len, 0);
+		if (tx_bi == tx_head)
+			break;
+
+		if (!index)
+			index += xdp_ring->count;
+		index--;
+	}
+
+	return I40E_XDP_CONSUMED;
+}
+
+/**
+ * i40e_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ **/
+static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
+					struct i40e_ring *tx_ring)
+{
+	u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
+	u32 cd_tunneling = 0, cd_l2tag2 = 0;
+	struct i40e_tx_buffer *first;
+	u32 td_offset = 0;
+	u32 tx_flags = 0;
+	u32 td_cmd = 0;
+	u8 hdr_len = 0;
+	int tso, count;
+	int tsyn;
+
+	/* prefetch the data, we'll need it later */
+	prefetch(skb->data);
+
+	i40e_trace(xmit_frame_ring, skb, tx_ring);
+
+	count = i40e_xmit_descriptor_count(skb);
+	if (i40e_chk_linearize(skb, count)) {
+		if (__skb_linearize(skb)) {
+			dev_kfree_skb_any(skb);
+			return NETDEV_TX_OK;
+		}
+		count = i40e_txd_use_count(skb->len);
+		tx_ring->tx_stats.tx_linearize++;
+	}
+
+	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
+	 *       + 4 desc gap to avoid the cache line where head is,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_bi[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	/* prepare the xmit flags */
+	if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
+		goto out_drop;
+
+	tso = i40e_tso(first, &hdr_len, &cd_type_cmd_tso_mss);
+
+	if (tso < 0)
+		goto out_drop;
+	else if (tso)
+		tx_flags |= I40E_TX_FLAGS_TSO;
+
+	/* Always offload the checksum, since it's in the data descriptor */
+	tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
+				  tx_ring, &cd_tunneling);
+	if (tso < 0)
+		goto out_drop;
+
+	tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
+
+	if (tsyn)
+		tx_flags |= I40E_TX_FLAGS_TSYN;
+
+	/* always enable CRC insertion offload */
+	td_cmd |= I40E_TX_DESC_CMD_ICRC;
+
+	i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
+			   cd_tunneling, cd_l2tag2);
+
+	/* Add Flow Director ATR if it's enabled.
+	 *
+	 * NOTE: this must always be directly before the data descriptor.
+	 */
+	i40e_atr(tx_ring, skb, tx_flags);
+
+	if (i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
+			td_cmd, td_offset))
+		goto cleanup_tx_tstamp;
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring);
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+cleanup_tx_tstamp:
+	if (unlikely(tx_flags & I40E_TX_FLAGS_TSYN)) {
+		struct i40e_pf *pf = i40e_netdev_to_pf(tx_ring->netdev);
+
+		dev_kfree_skb_any(pf->ptp_tx_skb);
+		pf->ptp_tx_skb = NULL;
+		clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+/**
+ * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
+ * @skb:    send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ **/
+netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (skb_put_padto(skb, I40E_MIN_TX_LEN))
+		return NETDEV_TX_OK;
+
+	return i40e_xmit_frame_ring(skb, tx_ring);
+}
+
+/**
+ * i40e_xdp_xmit - Implements ndo_xdp_xmit
+ * @dev: netdev
+ * @n: number of frames
+ * @frames: array of XDP buffer pointers
+ * @flags: XDP extra info
+ *
+ * Returns number of frames successfully sent. Failed frames
+ * will be free'ed by XDP core.
+ *
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ **/
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+		  u32 flags)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_ring *xdp_ring;
+	int nxmit = 0;
+	int i;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs ||
+	    test_bit(__I40E_CONFIG_BUSY, pf->state))
+		return -ENXIO;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	xdp_ring = vsi->xdp_rings[queue_index];
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
+		if (err != I40E_XDP_TX)
+			break;
+		nxmit++;
+	}
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		i40e_xdp_ring_update_tail(xdp_ring);
+
+	return nxmit;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
new file mode 100644
index 0000000000..900b0d9ede
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -0,0 +1,563 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_TXRX_H_
+#define _I40E_TXRX_H_
+
+#include <net/xdp.h>
+
+/* Interrupt Throttling and Rate Limiting Goodies */
+#define I40E_DEFAULT_IRQ_WORK      256
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define I40E_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
+#define I40E_ITR_MASK		0x1FFE	/* mask for ITR register value */
+#define I40E_MIN_ITR		     2	/* reg uses 2 usec resolution */
+#define I40E_ITR_20K		    50
+#define I40E_ITR_8K		   122
+#define I40E_MAX_ITR		  8160	/* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC))
+
+#define I40E_ITR_RX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF		(I40E_ITR_20K | I40E_ITR_DYNAMIC)
+
+/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
+ * the value of the rate limit is non-zero
+ */
+#define INTRL_ENA                  BIT(6)
+#define I40E_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
+#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+
+/**
+ * i40e_intrl_usec_to_reg - convert interrupt rate limit to register
+ * @intrl: interrupt rate limit to convert
+ *
+ * This function converts a decimal interrupt rate limit to the appropriate
+ * register format expected by the firmware when setting interrupt rate limit.
+ */
+static inline u16 i40e_intrl_usec_to_reg(int intrl)
+{
+	if (intrl >> 2)
+		return ((intrl >> 2) | INTRL_ENA);
+	else
+		return 0;
+}
+
+#define I40E_QUEUE_END_OF_LIST 0x7FF
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum i40e_dyn_idx_t {
+	I40E_IDX_ITR0 = 0,
+	I40E_IDX_ITR1 = 1,
+	I40E_IDX_ITR2 = 2,
+	I40E_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
+};
+
+/* these are indexes into ITRN registers */
+#define I40E_RX_ITR    I40E_IDX_ITR0
+#define I40E_TX_ITR    I40E_IDX_ITR1
+
+/* Supported RSS offloads */
+#define I40E_DEFAULT_RSS_HENA ( \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_SCTP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_SCTP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_L2_PAYLOAD))
+
+#define I40E_DEFAULT_RSS_HENA_EXPANDED (I40E_DEFAULT_RSS_HENA | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
+	BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
+
+#define i40e_pf_get_default_rss_hena(pf) \
+	(((pf)->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
+	  I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
+
+/* Supported Rx Buffer Sizes (a multiple of 128) */
+#define I40E_RXBUFFER_256   256
+#define I40E_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
+#define I40E_RXBUFFER_2048  2048
+#define I40E_RXBUFFER_3072  3072  /* Used for large frames w/ padding */
+#define I40E_MAX_RXBUFFER   9728  /* largest size for single descriptor */
+
+/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
+ * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
+ * this adds up to 512 bytes of extra data meaning the smallest allocation
+ * we could have is 1K.
+ * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
+ * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
+ */
+#define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
+#define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+#define i40e_rx_desc i40e_16byte_rx_desc
+
+#define I40E_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+/* Attempt to maximize the headroom available for incoming frames.  We
+ * use a 2K buffer for receives and need 1536/1534 to store the data for
+ * the frame.  This leaves us with 512 bytes of room.  From that we need
+ * to deduct the space needed for the shared info and the padding needed
+ * to IP align the frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the legacy
+ *	 receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define I40E_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048))
+
+static inline int i40e_compute_pad(int rx_buf_len)
+{
+	int page_size, pad_size;
+
+	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
+
+	return pad_size;
+}
+
+static inline int i40e_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (I40E_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = I40E_RXBUFFER_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return i40e_compute_pad(rx_buf_len);
+}
+
+#define I40E_SKB_PAD i40e_skb_pad()
+#else
+#define I40E_2K_TOO_SMALL_WITH_PADDING false
+#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/**
+ * i40e_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
+				     const u64 stat_err_bits)
+{
+	return !!(rx_desc->wb.qword1.status_error_len &
+		  cpu_to_le64(stat_err_bits));
+}
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define I40E_RX_BUFFER_WRITE	32	/* Must be power of 2 */
+
+#define I40E_RX_NEXT_DESC(r, i, n)		\
+	do {					\
+		(i)++;				\
+		if ((i) == (r)->count)		\
+			i = 0;			\
+		(n) = I40E_RX_DESC((r), (i));	\
+	} while (0)
+
+
+#define I40E_MAX_BUFFER_TXD	8
+#define I40E_MIN_TX_LEN		17
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define I40E_MAX_READ_REQ_SIZE		4096
+#define I40E_MAX_DATA_PER_TXD		(16 * 1024 - 1)
+#define I40E_MAX_DATA_PER_TXD_ALIGNED \
+	(I40E_MAX_DATA_PER_TXD & ~(I40E_MAX_READ_REQ_SIZE - 1))
+
+/**
+ * i40e_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment.  For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + 1;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + 1;
+ */
+static inline unsigned int i40e_txd_use_count(unsigned int size)
+{
+	return ((size * 85) >> 20) + 1;
+}
+
+/* Tx Descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + 6)
+
+#define I40E_TX_FLAGS_HW_VLAN		BIT(1)
+#define I40E_TX_FLAGS_SW_VLAN		BIT(2)
+#define I40E_TX_FLAGS_TSO		BIT(3)
+#define I40E_TX_FLAGS_IPV4		BIT(4)
+#define I40E_TX_FLAGS_IPV6		BIT(5)
+#define I40E_TX_FLAGS_TSYN		BIT(8)
+#define I40E_TX_FLAGS_FD_SB		BIT(9)
+#define I40E_TX_FLAGS_UDP_TUNNEL	BIT(10)
+#define I40E_TX_FLAGS_VLAN_MASK		0xffff0000
+#define I40E_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
+#define I40E_TX_FLAGS_VLAN_PRIO_SHIFT	29
+#define I40E_TX_FLAGS_VLAN_SHIFT	16
+
+struct i40e_tx_buffer {
+	struct i40e_tx_desc *next_to_watch;
+	union {
+		struct xdp_frame *xdpf;
+		struct sk_buff *skb;
+		void *raw_buf;
+	};
+	unsigned int bytecount;
+	unsigned short gso_segs;
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct i40e_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+	__u32 page_offset;
+	__u16 pagecnt_bias;
+	__u32 page_count;
+};
+
+struct i40e_queue_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+struct i40e_tx_queue_stats {
+	u64 restart_queue;
+	u64 tx_busy;
+	u64 tx_done_old;
+	u64 tx_linearize;
+	u64 tx_force_wb;
+	u64 tx_stopped;
+	int prev_pkt_ctr;
+};
+
+struct i40e_rx_queue_stats {
+	u64 non_eop_descs;
+	u64 alloc_page_failed;
+	u64 alloc_buff_failed;
+	u64 page_reuse_count;
+	u64 page_alloc_count;
+	u64 page_waive_count;
+	u64 page_busy_count;
+};
+
+enum i40e_ring_state_t {
+	__I40E_TX_FDIR_INIT_DONE,
+	__I40E_TX_XPS_INIT_DONE,
+	__I40E_RING_STATE_NBITS /* must be last */
+};
+
+/* some useful defines for virtchannel interface, which
+ * is the only remaining user of header split
+ */
+#define I40E_RX_DTYPE_HEADER_SPLIT  1
+#define I40E_RX_SPLIT_L2      0x1
+#define I40E_RX_SPLIT_IP      0x2
+#define I40E_RX_SPLIT_TCP_UDP 0x4
+#define I40E_RX_SPLIT_SCTP    0x8
+
+/* struct that defines a descriptor ring, associated with a VSI */
+struct i40e_ring {
+	struct i40e_ring *next;		/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
+	struct device *dev;		/* Used for DMA mapping */
+	struct net_device *netdev;	/* netdev ring maps to */
+	struct bpf_prog *xdp_prog;
+	union {
+		struct i40e_tx_buffer *tx_bi;
+		struct i40e_rx_buffer *rx_bi;
+		struct xdp_buff **rx_bi_zc;
+	};
+	DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
+	u16 queue_index;		/* Queue number of ring */
+	u8 dcb_tc;			/* Traffic class of ring */
+	u8 __iomem *tail;
+
+	/* Storing xdp_buff on ring helps in saving the state of partially built
+	 * packet when i40e_clean_rx_ring_irq() must return before it sees EOP
+	 * and to resume packet building for this ring in the next call to
+	 * i40e_clean_rx_ring_irq().
+	 */
+	struct xdp_buff xdp;
+
+	/* Next descriptor to be processed; next_to_clean is updated only on
+	 * processing EOP descriptor
+	 */
+	u16 next_to_process;
+	/* high bit set means dynamic, use accessor routines to read/write.
+	 * hardware only supports 2us resolution for the ITR registers.
+	 * these values always store the USER setting, and must be converted
+	 * before programming to a register.
+	 */
+	u16 itr_setting;
+
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+	u16 rx_buf_len;
+
+	/* used in interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 xdp_tx_active;
+
+	u8 atr_sample_rate;
+	u8 atr_count;
+
+	bool ring_active;		/* is ring online or not */
+	bool arm_wb;		/* do something to arm write back */
+	u8 packet_stride;
+
+	u16 flags;
+#define I40E_TXR_FLAGS_WB_ON_ITR		BIT(0)
+#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED	BIT(1)
+#define I40E_TXR_FLAGS_XDP			BIT(2)
+
+	/* stats structs */
+	struct i40e_queue_stats	stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct i40e_tx_queue_stats tx_stats;
+		struct i40e_rx_queue_stats rx_stats;
+	};
+
+	unsigned int size;		/* length of descriptor ring in bytes */
+	dma_addr_t dma;			/* physical address of ring */
+
+	struct i40e_vsi *vsi;		/* Backreference to associated VSI */
+	struct i40e_q_vector *q_vector;	/* Backreference to associated vector */
+
+	struct rcu_head rcu;		/* to avoid race on free */
+	u16 next_to_alloc;
+
+	struct i40e_channel *ch;
+	u16 rx_offset;
+	struct xdp_rxq_info xdp_rxq;
+	struct xsk_buff_pool *xsk_pool;
+} ____cacheline_internodealigned_in_smp;
+
+static inline bool ring_uses_build_skb(struct i40e_ring *ring)
+{
+	return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED);
+}
+
+static inline void set_ring_build_skb_enabled(struct i40e_ring *ring)
+{
+	ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
+}
+
+static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
+{
+	ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
+}
+
+static inline bool ring_is_xdp(struct i40e_ring *ring)
+{
+	return !!(ring->flags & I40E_TXR_FLAGS_XDP);
+}
+
+static inline void set_ring_xdp(struct i40e_ring *ring)
+{
+	ring->flags |= I40E_TXR_FLAGS_XDP;
+}
+
+#define I40E_ITR_ADAPTIVE_MIN_INC	0x0002
+#define I40E_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define I40E_ITR_ADAPTIVE_MAX_USECS	0x007e
+#define I40E_ITR_ADAPTIVE_LATENCY	0x8000
+#define I40E_ITR_ADAPTIVE_BULK		0x0000
+
+struct i40e_ring_container {
+	struct i40e_ring *ring;		/* pointer to linked list of ring(s) */
+	unsigned long next_update;	/* jiffies value of next update */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 count;
+	u16 target_itr;			/* target ITR setting for ring(s) */
+	u16 current_itr;		/* current ITR setting for ring(s) */
+};
+
+/* iterator for handling rings in ring container */
+#define i40e_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->rx_buf_len > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring))
+
+bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
+netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb,
+			  struct net_device *sb_dev);
+void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
+void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
+int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);
+int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring);
+void i40e_free_tx_resources(struct i40e_ring *tx_ring);
+void i40e_free_rx_resources(struct i40e_ring *rx_ring);
+int i40e_napi_poll(struct napi_struct *napi, int budget);
+void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
+void i40e_detect_recover_hung(struct i40e_vsi *vsi);
+int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
+bool __i40e_chk_linearize(struct sk_buff *skb);
+int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+		  u32 flags);
+bool i40e_is_non_eop(struct i40e_ring *rx_ring,
+		     union i40e_rx_desc *rx_desc);
+
+/**
+ * i40e_get_head - Retrieve head from head writeback
+ * @tx_ring:  tx ring to fetch head of
+ *
+ * Returns value of Tx ring head based on value stored
+ * in head write-back location
+ **/
+static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
+{
+	void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
+
+	return le32_to_cpu(*(volatile __le32 *)head);
+}
+
+/**
+ * i40e_xmit_descriptor_count - calculate number of Tx descriptors needed
+ * @skb:     send buffer
+ *
+ * Returns number of data descriptors needed for this skb. Returns 0 to indicate
+ * there is not enough descriptors available in this ring since we need at least
+ * one descriptor.
+ **/
+static inline int i40e_xmit_descriptor_count(struct sk_buff *skb)
+{
+	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	int count = 0, size = skb_headlen(skb);
+
+	for (;;) {
+		count += i40e_txd_use_count(size);
+
+		if (!nr_frags--)
+			break;
+
+		size = skb_frag_size(frag++);
+	}
+
+	return count;
+}
+
+/**
+ * i40e_maybe_stop_tx - 1st level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ **/
+static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+{
+	if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+	return __i40e_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * i40e_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb:      send buffer
+ * @count:    number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ **/
+static inline bool i40e_chk_linearize(struct sk_buff *skb, int count)
+{
+	/* Both TSO and single send will work if count is less than 8 */
+	if (likely(count < I40E_MAX_BUFFER_TXD))
+		return false;
+
+	if (skb_is_gso(skb))
+		return __i40e_chk_linearize(skb);
+
+	/* we can support up to 8 data buffers for a single send */
+	return count != I40E_MAX_BUFFER_TXD;
+}
+
+/**
+ * txring_txq - Find the netdev Tx ring based on the i40e Tx ring
+ * @ring: Tx ring to find the netdev equivalent of
+ **/
+static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+#endif /* _I40E_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
new file mode 100644
index 0000000000..8c5118c8ba
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Intel Corporation. */
+
+#ifndef I40E_TXRX_COMMON_
+#define I40E_TXRX_COMMON_
+
+int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+				   u64 qword1);
+void i40e_process_skb_fields(struct i40e_ring *rx_ring,
+			     union i40e_rx_desc *rx_desc, struct sk_buff *skb);
+void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring);
+void i40e_update_rx_stats(struct i40e_ring *rx_ring,
+			  unsigned int total_rx_bytes,
+			  unsigned int total_rx_packets);
+void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res);
+void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val);
+
+#define I40E_XDP_PASS		0
+#define I40E_XDP_CONSUMED	BIT(0)
+#define I40E_XDP_TX		BIT(1)
+#define I40E_XDP_REDIR		BIT(2)
+#define I40E_XDP_EXIT		BIT(3)
+
+/*
+ * build_ctob - Builds the Tx descriptor (cmd, offset and type) qword
+ */
+static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
+				u32 td_tag)
+{
+	return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
+			   ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
+			   ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
+			   ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
+			   ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
+}
+
+/**
+ * i40e_update_tx_stats - Update the egress statistics for the Tx ring
+ * @tx_ring: Tx ring to update
+ * @total_packets: total packets sent
+ * @total_bytes: total bytes sent
+ **/
+static inline void i40e_update_tx_stats(struct i40e_ring *tx_ring,
+					unsigned int total_packets,
+					unsigned int total_bytes)
+{
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	tx_ring->q_vector->tx.total_bytes += total_bytes;
+	tx_ring->q_vector->tx.total_packets += total_packets;
+}
+
+#define WB_STRIDE 4
+
+/**
+ * i40e_arm_wb - (Possibly) arms Tx write-back
+ * @tx_ring: Tx ring to update
+ * @vsi: the VSI
+ * @budget: the NAPI budget left
+ **/
+static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
+			       struct i40e_vsi *vsi,
+			       int budget)
+{
+	if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
+		/* check to see if there are < 4 descriptors
+		 * waiting to be written back, then kick the hardware to force
+		 * them to be written back in case we stay in NAPI.
+		 * In this mode on X722 we do not enable Interrupt.
+		 */
+		unsigned int j = i40e_get_tx_pending(tx_ring, false);
+
+		if (budget &&
+		    ((j / WB_STRIDE) == 0) && j > 0 &&
+		    !test_bit(__I40E_VSI_DOWN, vsi->state) &&
+		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
+			tx_ring->arm_wb = true;
+	}
+}
+
+/**
+ * i40e_rx_is_programming_status - check for programming status descriptor
+ * @qword1: qword1 representing status_error_len in CPU ordering
+ *
+ * The value of in the descriptor length field indicate if this
+ * is a programming status descriptor for flow director or FCoE
+ * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
+ * it is a packet descriptor.
+ **/
+static inline bool i40e_rx_is_programming_status(u64 qword1)
+{
+	/* The Rx filter programming status and SPH bit occupy the same
+	 * spot in the descriptor. Since we don't support packet split we
+	 * can just reuse the bit as an indication that this is a
+	 * programming status descriptor.
+	 */
+	return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK;
+}
+
+void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
+void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
+bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
+
+#endif /* I40E_TXRX_COMMON_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
new file mode 100644
index 0000000000..232131bedc
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -0,0 +1,1538 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2021 Intel Corporation. */
+
+#ifndef _I40E_TYPE_H_
+#define _I40E_TYPE_H_
+
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_adminq.h"
+#include "i40e_hmc.h"
+#include "i40e_lan_hmc.h"
+#include "i40e_devids.h"
+
+/* I40E_MASK is a macro used on 32 bit registers */
+#define I40E_MASK(mask, shift) ((u32)(mask) << (shift))
+
+#define I40E_MAX_VSI_QP			16
+#define I40E_MAX_VF_VSI			4
+#define I40E_MAX_CHAINED_RX_BUFFERS	5
+#define I40E_MAX_PF_UDP_OFFLOAD_PORTS	16
+
+/* Max default timeout in ms, */
+#define I40E_MAX_NVM_TIMEOUT		18000
+
+/* Max timeout in ms for the phy to respond */
+#define I40E_MAX_PHY_TIMEOUT		500
+
+/* Switch from ms to the 1usec global time (this is the GTIME resolution) */
+#define I40E_MS_TO_GTIME(time)		((time) * 1000)
+
+/* forward declaration */
+struct i40e_hw;
+typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
+
+/* Data type manipulation macros. */
+
+#define I40E_DESC_UNUSED(R)	\
+	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	(R)->next_to_clean - (R)->next_to_use - 1)
+
+/* bitfields for Tx queue mapping in QTX_CTL */
+#define I40E_QTX_CTL_VF_QUEUE	0x0
+#define I40E_QTX_CTL_VM_QUEUE	0x1
+#define I40E_QTX_CTL_PF_QUEUE	0x2
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+enum i40e_debug_mask {
+	I40E_DEBUG_INIT			= 0x00000001,
+	I40E_DEBUG_RELEASE		= 0x00000002,
+
+	I40E_DEBUG_LINK			= 0x00000010,
+	I40E_DEBUG_PHY			= 0x00000020,
+	I40E_DEBUG_HMC			= 0x00000040,
+	I40E_DEBUG_NVM			= 0x00000080,
+	I40E_DEBUG_LAN			= 0x00000100,
+	I40E_DEBUG_FLOW			= 0x00000200,
+	I40E_DEBUG_DCB			= 0x00000400,
+	I40E_DEBUG_DIAG			= 0x00000800,
+	I40E_DEBUG_FD			= 0x00001000,
+	I40E_DEBUG_PACKAGE		= 0x00002000,
+	I40E_DEBUG_IWARP		= 0x00F00000,
+	I40E_DEBUG_AQ_MESSAGE		= 0x01000000,
+	I40E_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
+	I40E_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
+	I40E_DEBUG_AQ_COMMAND		= 0x06000000,
+	I40E_DEBUG_AQ			= 0x0F000000,
+
+	I40E_DEBUG_USER			= 0xF0000000,
+
+	I40E_DEBUG_ALL			= 0xFFFFFFFF
+};
+
+#define I40E_MDIO_CLAUSE22_STCODE_MASK	I40E_MASK(1, \
+						  I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK	I40E_MASK(1, \
+						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK	I40E_MASK(2, \
+						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+
+#define I40E_MDIO_CLAUSE45_STCODE_MASK	I40E_MASK(0, \
+						  I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK	I40E_MASK(0, \
+						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK	I40E_MASK(1, \
+						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK	I40E_MASK(3, \
+						I40E_GLGEN_MSCA_OPCODE_SHIFT)
+
+#define I40E_PHY_COM_REG_PAGE                   0x1E
+#define I40E_PHY_LED_LINK_MODE_MASK             0xF0
+#define I40E_PHY_LED_MANUAL_ON                  0x100
+#define I40E_PHY_LED_PROV_REG_1                 0xC430
+#define I40E_PHY_LED_MODE_MASK                  0xFFFF
+#define I40E_PHY_LED_MODE_ORIG                  0x80000000
+
+/* These are structs for managing the hardware information and the operations.
+ * The structures of function pointers are filled out at init time when we
+ * know for sure exactly which hardware we're working with.  This gives us the
+ * flexibility of using the same main driver code but adapting to slightly
+ * different hardware needs as new parts are developed.  For this architecture,
+ * the Firmware and AdminQ are intended to insulate the driver from most of the
+ * future changes, but these structures will also do part of the job.
+ */
+enum i40e_mac_type {
+	I40E_MAC_UNKNOWN = 0,
+	I40E_MAC_XL710,
+	I40E_MAC_VF,
+	I40E_MAC_X722,
+	I40E_MAC_X722_VF,
+	I40E_MAC_GENERIC,
+};
+
+enum i40e_media_type {
+	I40E_MEDIA_TYPE_UNKNOWN = 0,
+	I40E_MEDIA_TYPE_FIBER,
+	I40E_MEDIA_TYPE_BASET,
+	I40E_MEDIA_TYPE_BACKPLANE,
+	I40E_MEDIA_TYPE_CX4,
+	I40E_MEDIA_TYPE_DA,
+	I40E_MEDIA_TYPE_VIRTUAL
+};
+
+enum i40e_fc_mode {
+	I40E_FC_NONE = 0,
+	I40E_FC_RX_PAUSE,
+	I40E_FC_TX_PAUSE,
+	I40E_FC_FULL,
+	I40E_FC_PFC,
+	I40E_FC_DEFAULT
+};
+
+enum i40e_set_fc_aq_failures {
+	I40E_SET_FC_AQ_FAIL_NONE = 0,
+	I40E_SET_FC_AQ_FAIL_GET = 1,
+	I40E_SET_FC_AQ_FAIL_SET = 2,
+	I40E_SET_FC_AQ_FAIL_UPDATE = 4,
+	I40E_SET_FC_AQ_FAIL_SET_UPDATE = 6
+};
+
+enum i40e_vsi_type {
+	I40E_VSI_MAIN	= 0,
+	I40E_VSI_VMDQ1	= 1,
+	I40E_VSI_VMDQ2	= 2,
+	I40E_VSI_CTRL	= 3,
+	I40E_VSI_FCOE	= 4,
+	I40E_VSI_MIRROR	= 5,
+	I40E_VSI_SRIOV	= 6,
+	I40E_VSI_FDIR	= 7,
+	I40E_VSI_IWARP	= 8,
+	I40E_VSI_TYPE_UNKNOWN
+};
+
+enum i40e_queue_type {
+	I40E_QUEUE_TYPE_RX = 0,
+	I40E_QUEUE_TYPE_TX,
+	I40E_QUEUE_TYPE_PE_CEQ,
+	I40E_QUEUE_TYPE_UNKNOWN
+};
+
+struct i40e_link_status {
+	enum i40e_aq_phy_type phy_type;
+	enum i40e_aq_link_speed link_speed;
+	u8 link_info;
+	u8 an_info;
+	u8 req_fec_info;
+	u8 fec_info;
+	u8 ext_info;
+	u8 loopback;
+	/* is Link Status Event notification to SW enabled */
+	bool lse_enable;
+	u16 max_frame_size;
+	bool crc_enable;
+	u8 pacing;
+	u8 requested_speeds;
+	u8 module_type[3];
+	/* 1st byte: module identifier */
+#define I40E_MODULE_TYPE_SFP		0x03
+	/* 3rd byte: ethernet compliance codes for 1G */
+#define I40E_MODULE_TYPE_1000BASE_SX	0x01
+#define I40E_MODULE_TYPE_1000BASE_LX	0x02
+};
+
+struct i40e_phy_info {
+	struct i40e_link_status link_info;
+	struct i40e_link_status link_info_old;
+	bool get_link_info;
+	enum i40e_media_type media_type;
+	/* all the phy types the NVM is capable of */
+	u64 phy_types;
+};
+
+#define I40E_CAP_PHY_TYPE_SGMII BIT_ULL(I40E_PHY_TYPE_SGMII)
+#define I40E_CAP_PHY_TYPE_1000BASE_KX BIT_ULL(I40E_PHY_TYPE_1000BASE_KX)
+#define I40E_CAP_PHY_TYPE_10GBASE_KX4 BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4)
+#define I40E_CAP_PHY_TYPE_10GBASE_KR BIT_ULL(I40E_PHY_TYPE_10GBASE_KR)
+#define I40E_CAP_PHY_TYPE_40GBASE_KR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4)
+#define I40E_CAP_PHY_TYPE_XAUI BIT_ULL(I40E_PHY_TYPE_XAUI)
+#define I40E_CAP_PHY_TYPE_XFI BIT_ULL(I40E_PHY_TYPE_XFI)
+#define I40E_CAP_PHY_TYPE_SFI BIT_ULL(I40E_PHY_TYPE_SFI)
+#define I40E_CAP_PHY_TYPE_XLAUI BIT_ULL(I40E_PHY_TYPE_XLAUI)
+#define I40E_CAP_PHY_TYPE_XLPPI BIT_ULL(I40E_PHY_TYPE_XLPPI)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4_CU BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_AOC BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_40GBASE_AOC BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_100BASE_TX BIT_ULL(I40E_PHY_TYPE_100BASE_TX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T BIT_ULL(I40E_PHY_TYPE_1000BASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_T BIT_ULL(I40E_PHY_TYPE_10GBASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_SR BIT_ULL(I40E_PHY_TYPE_10GBASE_SR)
+#define I40E_CAP_PHY_TYPE_10GBASE_LR BIT_ULL(I40E_PHY_TYPE_10GBASE_LR)
+#define I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1 BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_SR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_LR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4)
+#define I40E_CAP_PHY_TYPE_1000BASE_SX BIT_ULL(I40E_PHY_TYPE_1000BASE_SX)
+#define I40E_CAP_PHY_TYPE_1000BASE_LX BIT_ULL(I40E_PHY_TYPE_1000BASE_LX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL)
+#define I40E_CAP_PHY_TYPE_20GBASE_KR2 BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2)
+/* Defining the macro I40E_TYPE_OFFSET to implement a bit shift for some
+ * PHY types. There is an unused bit (31) in the I40E_CAP_PHY_TYPE_* bit
+ * fields but no corresponding gap in the i40e_aq_phy_type enumeration. So,
+ * a shift is needed to adjust for this with values larger than 31. The
+ * only affected values are I40E_PHY_TYPE_25GBASE_*.
+ */
+#define I40E_PHY_TYPE_OFFSET 1
+#define I40E_CAP_PHY_TYPE_25GBASE_KR BIT_ULL(I40E_PHY_TYPE_25GBASE_KR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_CR BIT_ULL(I40E_PHY_TYPE_25GBASE_CR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_SR BIT_ULL(I40E_PHY_TYPE_25GBASE_SR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_AOC BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \
+					     I40E_PHY_TYPE_OFFSET)
+/* Offset for 2.5G/5G PHY Types value to bit number conversion */
+#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T)
+#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T)
+#define I40E_HW_CAP_MAX_GPIO			30
+/* Capabilities of a PF or a VF or the whole device */
+struct i40e_hw_capabilities {
+	u32  switch_mode;
+
+	/* Cloud filter modes:
+	 * Mode1: Filter on L4 port only
+	 * Mode2: Filter for non-tunneled traffic
+	 * Mode3: Filter for tunnel traffic
+	 */
+#define I40E_CLOUD_FILTER_MODE1	0x6
+#define I40E_CLOUD_FILTER_MODE2	0x7
+#define I40E_SWITCH_MODE_MASK	0xF
+
+	u32  management_mode;
+	u32  mng_protocols_over_mctp;
+	u32  npar_enable;
+	u32  os2bmc;
+	u32  valid_functions;
+	bool sr_iov_1_1;
+	bool vmdq;
+	bool evb_802_1_qbg; /* Edge Virtual Bridging */
+	bool evb_802_1_qbh; /* Bridge Port Extension */
+	bool dcb;
+	bool fcoe;
+	bool iscsi; /* Indicates iSCSI enabled */
+	bool flex10_enable;
+	bool flex10_capable;
+	u32  flex10_mode;
+
+	u32 flex10_status;
+
+	bool sec_rev_disabled;
+	bool update_disabled;
+#define I40E_NVM_MGMT_SEC_REV_DISABLED	0x1
+#define I40E_NVM_MGMT_UPDATE_DISABLED	0x2
+
+	bool mgmt_cem;
+	bool ieee_1588;
+	bool iwarp;
+	bool fd;
+	u32 fd_filters_guaranteed;
+	u32 fd_filters_best_effort;
+	bool rss;
+	u32 rss_table_size;
+	u32 rss_table_entry_width;
+	bool led[I40E_HW_CAP_MAX_GPIO];
+	bool sdp[I40E_HW_CAP_MAX_GPIO];
+	u32 nvm_image_type;
+	u32 num_flow_director_filters;
+	u32 num_vfs;
+	u32 vf_base_id;
+	u32 num_vsis;
+	u32 num_rx_qp;
+	u32 num_tx_qp;
+	u32 base_queue;
+	u32 num_msix_vectors;
+	u32 num_msix_vectors_vf;
+	u32 led_pin_num;
+	u32 sdp_pin_num;
+	u32 mdio_port_num;
+	u32 mdio_port_mode;
+	u8 rx_buf_chain_len;
+	u32 enabled_tcmap;
+	u32 maxtc;
+	u64 wr_csr_prot;
+};
+
+struct i40e_mac_info {
+	enum i40e_mac_type type;
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+	u8 san_addr[ETH_ALEN];
+	u8 port_addr[ETH_ALEN];
+	u16 max_fcoeq;
+};
+
+enum i40e_aq_resources_ids {
+	I40E_NVM_RESOURCE_ID = 1
+};
+
+enum i40e_aq_resource_access_type {
+	I40E_RESOURCE_READ = 1,
+	I40E_RESOURCE_WRITE
+};
+
+struct i40e_nvm_info {
+	u64 hw_semaphore_timeout; /* usec global time (GTIME resolution) */
+	u32 timeout;              /* [ms] */
+	u16 sr_size;              /* Shadow RAM size in words */
+	bool blank_nvm_mode;      /* is NVM empty (no FW present)*/
+	u16 version;              /* NVM package version */
+	u32 eetrack;              /* NVM data version */
+	u32 oem_ver;              /* OEM version info */
+};
+
+/* definitions used in NVM update support */
+
+enum i40e_nvmupd_cmd {
+	I40E_NVMUPD_INVALID,
+	I40E_NVMUPD_READ_CON,
+	I40E_NVMUPD_READ_SNT,
+	I40E_NVMUPD_READ_LCB,
+	I40E_NVMUPD_READ_SA,
+	I40E_NVMUPD_WRITE_ERA,
+	I40E_NVMUPD_WRITE_CON,
+	I40E_NVMUPD_WRITE_SNT,
+	I40E_NVMUPD_WRITE_LCB,
+	I40E_NVMUPD_WRITE_SA,
+	I40E_NVMUPD_CSUM_CON,
+	I40E_NVMUPD_CSUM_SA,
+	I40E_NVMUPD_CSUM_LCB,
+	I40E_NVMUPD_STATUS,
+	I40E_NVMUPD_EXEC_AQ,
+	I40E_NVMUPD_GET_AQ_RESULT,
+	I40E_NVMUPD_GET_AQ_EVENT,
+};
+
+enum i40e_nvmupd_state {
+	I40E_NVMUPD_STATE_INIT,
+	I40E_NVMUPD_STATE_READING,
+	I40E_NVMUPD_STATE_WRITING,
+	I40E_NVMUPD_STATE_INIT_WAIT,
+	I40E_NVMUPD_STATE_WRITE_WAIT,
+	I40E_NVMUPD_STATE_ERROR
+};
+
+/* nvm_access definition and its masks/shifts need to be accessible to
+ * application, core driver, and shared code.  Where is the right file?
+ */
+#define I40E_NVM_READ	0xB
+#define I40E_NVM_WRITE	0xC
+
+#define I40E_NVM_MOD_PNT_MASK 0xFF
+
+#define I40E_NVM_TRANS_SHIFT			8
+#define I40E_NVM_TRANS_MASK			(0xf << I40E_NVM_TRANS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SHIFT	12
+#define I40E_NVM_PRESERVATION_FLAGS_MASK \
+				(0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SELECTED	0x01
+#define I40E_NVM_PRESERVATION_FLAGS_ALL		0x02
+#define I40E_NVM_CON				0x0
+#define I40E_NVM_SNT				0x1
+#define I40E_NVM_LCB				0x2
+#define I40E_NVM_SA				(I40E_NVM_SNT | I40E_NVM_LCB)
+#define I40E_NVM_ERA				0x4
+#define I40E_NVM_CSUM				0x8
+#define I40E_NVM_AQE				0xe
+#define I40E_NVM_EXEC				0xf
+
+
+#define I40E_NVMUPD_MAX_DATA	4096
+
+struct i40e_nvm_access {
+	u32 command;
+	u32 config;
+	u32 offset;	/* in bytes */
+	u32 data_size;	/* in bytes */
+	u8 data[1];
+};
+
+/* (Q)SFP module access definitions */
+#define I40E_I2C_EEPROM_DEV_ADDR	0xA0
+#define I40E_I2C_EEPROM_DEV_ADDR2	0xA2
+#define I40E_MODULE_REVISION_ADDR	0x01
+#define I40E_MODULE_SFF_8472_COMP	0x5E
+#define I40E_MODULE_SFF_8472_SWAP	0x5C
+#define I40E_MODULE_SFF_ADDR_MODE	0x04
+#define I40E_MODULE_SFF_DDM_IMPLEMENTED 0x40
+#define I40E_MODULE_TYPE_QSFP_PLUS	0x0D
+#define I40E_MODULE_TYPE_QSFP28		0x11
+#define I40E_MODULE_QSFP_MAX_LEN	640
+
+/* PCI bus types */
+enum i40e_bus_type {
+	i40e_bus_type_unknown = 0,
+	i40e_bus_type_pci,
+	i40e_bus_type_pcix,
+	i40e_bus_type_pci_express,
+	i40e_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum i40e_bus_speed {
+	i40e_bus_speed_unknown	= 0,
+	i40e_bus_speed_33	= 33,
+	i40e_bus_speed_66	= 66,
+	i40e_bus_speed_100	= 100,
+	i40e_bus_speed_120	= 120,
+	i40e_bus_speed_133	= 133,
+	i40e_bus_speed_2500	= 2500,
+	i40e_bus_speed_5000	= 5000,
+	i40e_bus_speed_8000	= 8000,
+	i40e_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum i40e_bus_width {
+	i40e_bus_width_unknown	= 0,
+	i40e_bus_width_pcie_x1	= 1,
+	i40e_bus_width_pcie_x2	= 2,
+	i40e_bus_width_pcie_x4	= 4,
+	i40e_bus_width_pcie_x8	= 8,
+	i40e_bus_width_32	= 32,
+	i40e_bus_width_64	= 64,
+	i40e_bus_width_reserved
+};
+
+/* Bus parameters */
+struct i40e_bus_info {
+	enum i40e_bus_speed speed;
+	enum i40e_bus_width width;
+	enum i40e_bus_type type;
+
+	u16 func;
+	u16 device;
+	u16 lan_id;
+	u16 bus_id;
+};
+
+/* Flow control (FC) parameters */
+struct i40e_fc_info {
+	enum i40e_fc_mode current_mode; /* FC mode in effect */
+	enum i40e_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+#define I40E_MAX_TRAFFIC_CLASS		8
+#define I40E_MAX_USER_PRIORITY		8
+#define I40E_DCBX_MAX_APPS		32
+#define I40E_LLDPDU_SIZE		1500
+#define I40E_TLV_STATUS_OPER		0x1
+#define I40E_TLV_STATUS_SYNC		0x2
+#define I40E_TLV_STATUS_ERR		0x4
+#define I40E_CEE_OPER_MAX_APPS		3
+#define I40E_APP_PROTOID_FCOE		0x8906
+#define I40E_APP_PROTOID_ISCSI		0x0cbc
+#define I40E_APP_PROTOID_FIP		0x8914
+#define I40E_APP_SEL_ETHTYPE		0x1
+#define I40E_APP_SEL_TCPIP		0x2
+#define I40E_CEE_APP_SEL_ETHTYPE	0x0
+#define I40E_CEE_APP_SEL_TCPIP		0x1
+
+/* CEE or IEEE 802.1Qaz ETS Configuration data */
+struct i40e_dcb_ets_config {
+	u8 willing;
+	u8 cbs;
+	u8 maxtcs;
+	u8 prioritytable[I40E_MAX_TRAFFIC_CLASS];
+	u8 tcbwtable[I40E_MAX_TRAFFIC_CLASS];
+	u8 tsatable[I40E_MAX_TRAFFIC_CLASS];
+};
+
+/* CEE or IEEE 802.1Qaz PFC Configuration data */
+struct i40e_dcb_pfc_config {
+	u8 willing;
+	u8 mbc;
+	u8 pfccap;
+	u8 pfcenable;
+};
+
+/* CEE or IEEE 802.1Qaz Application Priority data */
+struct i40e_dcb_app_priority_table {
+	u8  priority;
+	u8  selector;
+	u16 protocolid;
+};
+
+struct i40e_dcbx_config {
+	u8  dcbx_mode;
+#define I40E_DCBX_MODE_CEE	0x1
+#define I40E_DCBX_MODE_IEEE	0x2
+	u8  app_mode;
+#define I40E_DCBX_APPS_NON_WILLING 0x1
+	u32 numapps;
+	u32 tlv_status; /* CEE mode TLV status */
+	struct i40e_dcb_ets_config etscfg;
+	struct i40e_dcb_ets_config etsrec;
+	struct i40e_dcb_pfc_config pfc;
+	struct i40e_dcb_app_priority_table app[I40E_DCBX_MAX_APPS];
+};
+
+/* Port hardware description */
+struct i40e_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+
+	/* subsystem structs */
+	struct i40e_phy_info phy;
+	struct i40e_mac_info mac;
+	struct i40e_bus_info bus;
+	struct i40e_nvm_info nvm;
+	struct i40e_fc_info fc;
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+	u8 port;
+	bool adapter_stopped;
+
+	/* capabilities for entire device and PCI func */
+	struct i40e_hw_capabilities dev_caps;
+	struct i40e_hw_capabilities func_caps;
+
+	/* Flow Director shared filter space */
+	u16 fdir_shared_filter_count;
+
+	/* device profile info */
+	u8  pf_id;
+	u16 main_vsi_seid;
+
+	/* for multi-function MACs */
+	u16 partition_id;
+	u16 num_partitions;
+	u16 num_ports;
+
+	/* Closest numa node to the device */
+	u16 numa_node;
+
+	/* Admin Queue info */
+	struct i40e_adminq_info aq;
+
+	/* state of nvm update process */
+	enum i40e_nvmupd_state nvmupd_state;
+	struct i40e_aq_desc nvm_wb_desc;
+	struct i40e_aq_desc nvm_aq_event_desc;
+	struct i40e_virt_mem nvm_buff;
+	bool nvm_release_on_done;
+	u16 nvm_wait_opcode;
+
+	/* HMC info */
+	struct i40e_hmc_info hmc; /* HMC info struct */
+
+	/* LLDP/DCBX Status */
+	u16 dcbx_status;
+
+	/* DCBX info */
+	struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */
+	struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
+	struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
+
+#define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0)
+#define I40E_HW_FLAG_802_1AD_CAPABLE        BIT_ULL(1)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE  BIT_ULL(2)
+#define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3)
+#define I40E_HW_FLAG_FW_LLDP_STOPPABLE      BIT_ULL(4)
+#define I40E_HW_FLAG_FW_LLDP_PERSISTENT     BIT_ULL(5)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_EXTENDED BIT_ULL(6)
+#define I40E_HW_FLAG_DROP_MODE              BIT_ULL(7)
+#define I40E_HW_FLAG_X722_FEC_REQUEST_CAPABLE BIT_ULL(8)
+	u64 flags;
+
+	/* Used in set switch config AQ command */
+	u16 switch_tag;
+	u16 first_tag;
+	u16 second_tag;
+
+	/* debug mask */
+	u32 debug_mask;
+	char err_str[16];
+};
+
+static inline bool i40e_is_vf(struct i40e_hw *hw)
+{
+	return (hw->mac.type == I40E_MAC_VF ||
+		hw->mac.type == I40E_MAC_X722_VF);
+}
+
+struct i40e_driver_version {
+	u8 major_version;
+	u8 minor_version;
+	u8 build_version;
+	u8 subbuild_version;
+	u8 driver_string[32];
+};
+
+/* RX Descriptors */
+union i40e_16byte_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct i40e_16b_rx_wb_qw0 {
+			struct {
+				union {
+					__le16 mirroring_status;
+					__le16 fcoe_ctx_id;
+				} mirr_fcoe;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fd_id; /* Flow director filter id */
+				__le32 fcoe_param; /* FCoE DDP Context id */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* ext status/error/pktype/length */
+			__le64 status_error_len;
+		} qword1;
+	} wb;  /* writeback */
+	struct {
+		u64 qword[2];
+	} raw;
+};
+
+union i40e_32byte_rx_desc {
+	struct {
+		__le64  pkt_addr; /* Packet buffer address */
+		__le64  hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_buffer_addr is DD bit */
+		__le64  rsvd1;
+		__le64  rsvd2;
+	} read;
+	struct {
+		struct i40e_32b_rx_wb_qw0 {
+			struct {
+				union {
+					__le16 mirroring_status;
+					__le16 fcoe_ctx_id;
+				} mirr_fcoe;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fcoe_param; /* FCoE DDP Context id */
+				/* Flow director filter id in case of
+				 * Programming status desc WB
+				 */
+				__le32 fd_id;
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/pktype/length */
+			__le64 status_error_len;
+		} qword1;
+		struct {
+			__le16 ext_status; /* extended status */
+			__le16 rsvd;
+			__le16 l2tag2_1;
+			__le16 l2tag2_2;
+		} qword2;
+		struct {
+			union {
+				__le32 flex_bytes_lo;
+				__le32 pe_status;
+			} lo_dword;
+			union {
+				__le32 flex_bytes_hi;
+				__le32 fd_id;
+			} hi_dword;
+		} qword3;
+	} wb;  /* writeback */
+	struct {
+		u64 qword[4];
+	} raw;
+};
+
+enum i40e_rx_desc_status_bits {
+	/* Note: These are predefined bit offsets */
+	I40E_RX_DESC_STATUS_DD_SHIFT		= 0,
+	I40E_RX_DESC_STATUS_EOF_SHIFT		= 1,
+	I40E_RX_DESC_STATUS_L2TAG1P_SHIFT	= 2,
+	I40E_RX_DESC_STATUS_L3L4P_SHIFT		= 3,
+	I40E_RX_DESC_STATUS_CRCP_SHIFT		= 4,
+	I40E_RX_DESC_STATUS_TSYNINDX_SHIFT	= 5, /* 2 BITS */
+	I40E_RX_DESC_STATUS_TSYNVALID_SHIFT	= 7,
+	/* Note: Bit 8 is reserved in X710 and XL710 */
+	I40E_RX_DESC_STATUS_EXT_UDP_0_SHIFT	= 8,
+	I40E_RX_DESC_STATUS_UMBCAST_SHIFT	= 9, /* 2 BITS */
+	I40E_RX_DESC_STATUS_FLM_SHIFT		= 11,
+	I40E_RX_DESC_STATUS_FLTSTAT_SHIFT	= 12, /* 2 BITS */
+	I40E_RX_DESC_STATUS_LPBK_SHIFT		= 14,
+	I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT	= 15,
+	I40E_RX_DESC_STATUS_RESERVED_SHIFT	= 16, /* 2 BITS */
+	/* Note: For non-tunnel packets INT_UDP_0 is the right status for
+	 * UDP header
+	 */
+	I40E_RX_DESC_STATUS_INT_UDP_0_SHIFT	= 18,
+	I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */
+};
+
+#define I40E_RXD_QW1_STATUS_SHIFT	0
+#define I40E_RXD_QW1_STATUS_MASK	((BIT(I40E_RX_DESC_STATUS_LAST) - 1) \
+					 << I40E_RXD_QW1_STATUS_SHIFT)
+
+#define I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT   I40E_RX_DESC_STATUS_TSYNINDX_SHIFT
+#define I40E_RXD_QW1_STATUS_TSYNINDX_MASK	(0x3UL << \
+					     I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT)
+
+#define I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT  I40E_RX_DESC_STATUS_TSYNVALID_SHIFT
+#define I40E_RXD_QW1_STATUS_TSYNVALID_MASK \
+				    BIT_ULL(I40E_RXD_QW1_STATUS_TSYNVALID_SHIFT)
+
+enum i40e_rx_desc_fltstat_values {
+	I40E_RX_DESC_FLTSTAT_NO_DATA	= 0,
+	I40E_RX_DESC_FLTSTAT_RSV_FD_ID	= 1, /* 16byte desc? FD_ID : RSV */
+	I40E_RX_DESC_FLTSTAT_RSV	= 2,
+	I40E_RX_DESC_FLTSTAT_RSS_HASH	= 3,
+};
+
+#define I40E_RXD_QW1_ERROR_SHIFT	19
+#define I40E_RXD_QW1_ERROR_MASK		(0xFFUL << I40E_RXD_QW1_ERROR_SHIFT)
+
+enum i40e_rx_desc_error_bits {
+	/* Note: These are predefined bit offsets */
+	I40E_RX_DESC_ERROR_RXE_SHIFT		= 0,
+	I40E_RX_DESC_ERROR_RECIPE_SHIFT		= 1,
+	I40E_RX_DESC_ERROR_HBO_SHIFT		= 2,
+	I40E_RX_DESC_ERROR_L3L4E_SHIFT		= 3, /* 3 BITS */
+	I40E_RX_DESC_ERROR_IPE_SHIFT		= 3,
+	I40E_RX_DESC_ERROR_L4E_SHIFT		= 4,
+	I40E_RX_DESC_ERROR_EIPE_SHIFT		= 5,
+	I40E_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6,
+	I40E_RX_DESC_ERROR_PPRS_SHIFT		= 7
+};
+
+enum i40e_rx_desc_error_l3l4e_fcoe_masks {
+	I40E_RX_DESC_ERROR_L3L4E_NONE		= 0,
+	I40E_RX_DESC_ERROR_L3L4E_PROT		= 1,
+	I40E_RX_DESC_ERROR_L3L4E_FC		= 2,
+	I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR	= 3,
+	I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN	= 4
+};
+
+#define I40E_RXD_QW1_PTYPE_SHIFT	30
+#define I40E_RXD_QW1_PTYPE_MASK		(0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT)
+
+/* Packet type non-ip values */
+enum i40e_rx_l2_ptype {
+	I40E_RX_PTYPE_L2_RESERVED			= 0,
+	I40E_RX_PTYPE_L2_MAC_PAY2			= 1,
+	I40E_RX_PTYPE_L2_TIMESYNC_PAY2			= 2,
+	I40E_RX_PTYPE_L2_FIP_PAY2			= 3,
+	I40E_RX_PTYPE_L2_OUI_PAY2			= 4,
+	I40E_RX_PTYPE_L2_MACCNTRL_PAY2			= 5,
+	I40E_RX_PTYPE_L2_LLDP_PAY2			= 6,
+	I40E_RX_PTYPE_L2_ECP_PAY2			= 7,
+	I40E_RX_PTYPE_L2_EVB_PAY2			= 8,
+	I40E_RX_PTYPE_L2_QCN_PAY2			= 9,
+	I40E_RX_PTYPE_L2_EAPOL_PAY2			= 10,
+	I40E_RX_PTYPE_L2_ARP				= 11,
+	I40E_RX_PTYPE_L2_FCOE_PAY3			= 12,
+	I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3		= 13,
+	I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3		= 14,
+	I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3		= 15,
+	I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA		= 16,
+	I40E_RX_PTYPE_L2_FCOE_VFT_PAY3			= 17,
+	I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA		= 18,
+	I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY			= 19,
+	I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP			= 20,
+	I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER		= 21,
+	I40E_RX_PTYPE_GRENAT4_MAC_PAY3			= 58,
+	I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4	= 87,
+	I40E_RX_PTYPE_GRENAT6_MAC_PAY3			= 124,
+	I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4	= 153
+};
+
+struct i40e_rx_ptype_decoded {
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:1;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+enum i40e_rx_ptype_outer_ip {
+	I40E_RX_PTYPE_OUTER_L2	= 0,
+	I40E_RX_PTYPE_OUTER_IP	= 1
+};
+
+enum i40e_rx_ptype_outer_ip_ver {
+	I40E_RX_PTYPE_OUTER_NONE	= 0,
+	I40E_RX_PTYPE_OUTER_IPV4	= 0,
+	I40E_RX_PTYPE_OUTER_IPV6	= 1
+};
+
+enum i40e_rx_ptype_outer_fragmented {
+	I40E_RX_PTYPE_NOT_FRAG	= 0,
+	I40E_RX_PTYPE_FRAG	= 1
+};
+
+enum i40e_rx_ptype_tunnel_type {
+	I40E_RX_PTYPE_TUNNEL_NONE		= 0,
+	I40E_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	I40E_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum i40e_rx_ptype_tunnel_end_prot {
+	I40E_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	I40E_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	I40E_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum i40e_rx_ptype_inner_prot {
+	I40E_RX_PTYPE_INNER_PROT_NONE		= 0,
+	I40E_RX_PTYPE_INNER_PROT_UDP		= 1,
+	I40E_RX_PTYPE_INNER_PROT_TCP		= 2,
+	I40E_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	I40E_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	I40E_RX_PTYPE_INNER_PROT_TIMESYNC	= 5
+};
+
+enum i40e_rx_ptype_payload_layer {
+	I40E_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
+#define I40E_RXD_QW1_LENGTH_PBUF_SHIFT	38
+#define I40E_RXD_QW1_LENGTH_PBUF_MASK	(0x3FFFULL << \
+					 I40E_RXD_QW1_LENGTH_PBUF_SHIFT)
+
+
+#define I40E_RXD_QW1_LENGTH_SPH_SHIFT	63
+#define I40E_RXD_QW1_LENGTH_SPH_MASK	BIT_ULL(I40E_RXD_QW1_LENGTH_SPH_SHIFT)
+
+enum i40e_rx_desc_ext_status_bits {
+	/* Note: These are predefined bit offsets */
+	I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT	= 0,
+	I40E_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT	= 1,
+	I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT	= 2, /* 2 BITS */
+	I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT	= 4, /* 2 BITS */
+	I40E_RX_DESC_EXT_STATUS_FDLONGB_SHIFT	= 9,
+	I40E_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT	= 10,
+	I40E_RX_DESC_EXT_STATUS_PELONGB_SHIFT	= 11,
+};
+
+enum i40e_rx_desc_pe_status_bits {
+	/* Note: These are predefined bit offsets */
+	I40E_RX_DESC_PE_STATUS_QPID_SHIFT	= 0, /* 18 BITS */
+	I40E_RX_DESC_PE_STATUS_L4PORT_SHIFT	= 0, /* 16 BITS */
+	I40E_RX_DESC_PE_STATUS_IPINDEX_SHIFT	= 16, /* 8 BITS */
+	I40E_RX_DESC_PE_STATUS_QPIDHIT_SHIFT	= 24,
+	I40E_RX_DESC_PE_STATUS_APBVTHIT_SHIFT	= 25,
+	I40E_RX_DESC_PE_STATUS_PORTV_SHIFT	= 26,
+	I40E_RX_DESC_PE_STATUS_URG_SHIFT	= 27,
+	I40E_RX_DESC_PE_STATUS_IPFRAG_SHIFT	= 28,
+	I40E_RX_DESC_PE_STATUS_IPOPT_SHIFT	= 29
+};
+
+#define I40E_RX_PROG_STATUS_DESC_LENGTH			0x2000000
+
+#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT	2
+#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK	(0x7UL << \
+				I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT)
+
+#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT	19
+#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK		(0x3FUL << \
+				I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT)
+
+enum i40e_rx_prog_status_desc_status_bits {
+	/* Note: These are predefined bit offsets */
+	I40E_RX_PROG_STATUS_DESC_DD_SHIFT	= 0,
+	I40E_RX_PROG_STATUS_DESC_PROG_ID_SHIFT	= 2 /* 3 BITS */
+};
+
+enum i40e_rx_prog_status_desc_prog_id_masks {
+	I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS	= 1,
+	I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS	= 2,
+	I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS	= 4,
+};
+
+enum i40e_rx_prog_status_desc_error_bits {
+	/* Note: These are predefined bit offsets */
+	I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT	= 0,
+	I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT	= 1,
+	I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT	= 2,
+	I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT	= 3
+};
+
+/* TX Descriptor */
+struct i40e_tx_desc {
+	__le64 buffer_addr; /* Address of descriptor's data buf */
+	__le64 cmd_type_offset_bsz;
+};
+
+
+enum i40e_tx_desc_dtype_value {
+	I40E_TX_DESC_DTYPE_DATA		= 0x0,
+	I40E_TX_DESC_DTYPE_NOP		= 0x1, /* same as Context desc */
+	I40E_TX_DESC_DTYPE_CONTEXT	= 0x1,
+	I40E_TX_DESC_DTYPE_FCOE_CTX	= 0x2,
+	I40E_TX_DESC_DTYPE_FILTER_PROG	= 0x8,
+	I40E_TX_DESC_DTYPE_DDP_CTX	= 0x9,
+	I40E_TX_DESC_DTYPE_FLEX_DATA	= 0xB,
+	I40E_TX_DESC_DTYPE_FLEX_CTX_1	= 0xC,
+	I40E_TX_DESC_DTYPE_FLEX_CTX_2	= 0xD,
+	I40E_TX_DESC_DTYPE_DESC_DONE	= 0xF
+};
+
+#define I40E_TXD_QW1_CMD_SHIFT	4
+
+enum i40e_tx_desc_cmd_bits {
+	I40E_TX_DESC_CMD_EOP			= 0x0001,
+	I40E_TX_DESC_CMD_RS			= 0x0002,
+	I40E_TX_DESC_CMD_ICRC			= 0x0004,
+	I40E_TX_DESC_CMD_IL2TAG1		= 0x0008,
+	I40E_TX_DESC_CMD_DUMMY			= 0x0010,
+	I40E_TX_DESC_CMD_IIPT_NONIP		= 0x0000, /* 2 BITS */
+	I40E_TX_DESC_CMD_IIPT_IPV6		= 0x0020, /* 2 BITS */
+	I40E_TX_DESC_CMD_IIPT_IPV4		= 0x0040, /* 2 BITS */
+	I40E_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060, /* 2 BITS */
+	I40E_TX_DESC_CMD_FCOET			= 0x0080,
+	I40E_TX_DESC_CMD_L4T_EOFT_UNK		= 0x0000, /* 2 BITS */
+	I40E_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100, /* 2 BITS */
+	I40E_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200, /* 2 BITS */
+	I40E_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300, /* 2 BITS */
+	I40E_TX_DESC_CMD_L4T_EOFT_EOF_N		= 0x0000, /* 2 BITS */
+	I40E_TX_DESC_CMD_L4T_EOFT_EOF_T		= 0x0100, /* 2 BITS */
+	I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI	= 0x0200, /* 2 BITS */
+	I40E_TX_DESC_CMD_L4T_EOFT_EOF_A		= 0x0300, /* 2 BITS */
+};
+
+#define I40E_TXD_QW1_OFFSET_SHIFT	16
+
+enum i40e_tx_desc_length_fields {
+	/* Note: These are predefined bit offsets */
+	I40E_TX_DESC_LENGTH_MACLEN_SHIFT	= 0, /* 7 BITS */
+	I40E_TX_DESC_LENGTH_IPLEN_SHIFT		= 7, /* 7 BITS */
+	I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT	= 14 /* 4 BITS */
+};
+
+#define I40E_TXD_QW1_TX_BUF_SZ_SHIFT	34
+
+#define I40E_TXD_QW1_L2TAG1_SHIFT	48
+
+/* Context descriptors */
+struct i40e_tx_context_desc {
+	__le32 tunneling_params;
+	__le16 l2tag2;
+	__le16 rsvd;
+	__le64 type_cmd_tso_mss;
+};
+
+
+#define I40E_TXD_CTX_QW1_CMD_SHIFT	4
+
+enum i40e_tx_ctx_desc_cmd_bits {
+	I40E_TX_CTX_DESC_TSO		= 0x01,
+	I40E_TX_CTX_DESC_TSYN		= 0x02,
+	I40E_TX_CTX_DESC_IL2TAG2	= 0x04,
+	I40E_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
+	I40E_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	I40E_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	I40E_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	I40E_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	I40E_TX_CTX_DESC_SWPE		= 0x40
+};
+
+#define I40E_TXD_CTX_QW1_TSO_LEN_SHIFT	30
+
+#define I40E_TXD_CTX_QW1_MSS_SHIFT	50
+
+
+
+enum i40e_tx_ctx_desc_eipt_offload {
+	I40E_TX_CTX_EXT_IP_NONE		= 0x0,
+	I40E_TX_CTX_EXT_IP_IPV6		= 0x1,
+	I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM	= 0x2,
+	I40E_TX_CTX_EXT_IP_IPV4		= 0x3
+};
+
+#define I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT	2
+
+#define I40E_TXD_CTX_QW0_NATT_SHIFT	9
+
+#define I40E_TXD_CTX_UDP_TUNNELING	BIT_ULL(I40E_TXD_CTX_QW0_NATT_SHIFT)
+#define I40E_TXD_CTX_GRE_TUNNELING	(0x2ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
+
+
+
+#define I40E_TXD_CTX_QW0_NATLEN_SHIFT	12
+
+
+#define I40E_TXD_CTX_QW0_L4T_CS_SHIFT	23
+#define I40E_TXD_CTX_QW0_L4T_CS_MASK	BIT_ULL(I40E_TXD_CTX_QW0_L4T_CS_SHIFT)
+struct i40e_filter_program_desc {
+	__le32 qindex_flex_ptype_vsi;
+	__le32 rsvd;
+	__le32 dtype_cmd_cntindex;
+	__le32 fd_id;
+};
+#define I40E_TXD_FLTR_QW0_QINDEX_SHIFT	0
+#define I40E_TXD_FLTR_QW0_QINDEX_MASK	(0x7FFUL << \
+					 I40E_TXD_FLTR_QW0_QINDEX_SHIFT)
+#define I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT	11
+#define I40E_TXD_FLTR_QW0_FLEXOFF_MASK	(0x7UL << \
+					 I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT)
+#define I40E_TXD_FLTR_QW0_PCTYPE_SHIFT	17
+#define I40E_TXD_FLTR_QW0_PCTYPE_MASK	(0x3FUL << \
+					 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT)
+
+/* Packet Classifier Types for filters */
+enum i40e_filter_pctype {
+	/* Note: Values 0-28 are reserved for future use.
+	 * Value 29, 30, 32 are not supported on XL710 and X710.
+	 */
+	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP	= 29,
+	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP	= 30,
+	I40E_FILTER_PCTYPE_NONF_IPV4_UDP		= 31,
+	I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK	= 32,
+	I40E_FILTER_PCTYPE_NONF_IPV4_TCP		= 33,
+	I40E_FILTER_PCTYPE_NONF_IPV4_SCTP		= 34,
+	I40E_FILTER_PCTYPE_NONF_IPV4_OTHER		= 35,
+	I40E_FILTER_PCTYPE_FRAG_IPV4			= 36,
+	/* Note: Values 37-38 are reserved for future use.
+	 * Value 39, 40, 42 are not supported on XL710 and X710.
+	 */
+	I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP	= 39,
+	I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP	= 40,
+	I40E_FILTER_PCTYPE_NONF_IPV6_UDP		= 41,
+	I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK	= 42,
+	I40E_FILTER_PCTYPE_NONF_IPV6_TCP		= 43,
+	I40E_FILTER_PCTYPE_NONF_IPV6_SCTP		= 44,
+	I40E_FILTER_PCTYPE_NONF_IPV6_OTHER		= 45,
+	I40E_FILTER_PCTYPE_FRAG_IPV6			= 46,
+	/* Note: Value 47 is reserved for future use */
+	I40E_FILTER_PCTYPE_FCOE_OX			= 48,
+	I40E_FILTER_PCTYPE_FCOE_RX			= 49,
+	I40E_FILTER_PCTYPE_FCOE_OTHER			= 50,
+	/* Note: Values 51-62 are reserved for future use */
+	I40E_FILTER_PCTYPE_L2_PAYLOAD			= 63,
+};
+
+enum i40e_filter_program_desc_dest {
+	I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET		= 0x0,
+	I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX	= 0x1,
+	I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_OTHER	= 0x2,
+};
+
+enum i40e_filter_program_desc_fd_status {
+	I40E_FILTER_PROGRAM_DESC_FD_STATUS_NONE			= 0x0,
+	I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID		= 0x1,
+	I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID_4FLEX_BYTES	= 0x2,
+	I40E_FILTER_PROGRAM_DESC_FD_STATUS_8FLEX_BYTES		= 0x3,
+};
+
+#define I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT	23
+#define I40E_TXD_FLTR_QW0_DEST_VSI_MASK	(0x1FFUL << \
+					 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CMD_SHIFT	4
+
+#define I40E_TXD_FLTR_QW1_PCMD_SHIFT	(0x0ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+
+enum i40e_filter_program_desc_pcmd {
+	I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE	= 0x1,
+	I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE		= 0x2,
+};
+
+#define I40E_TXD_FLTR_QW1_DEST_SHIFT	(0x3ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_DEST_MASK	(0x3ULL << I40E_TXD_FLTR_QW1_DEST_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT	(0x7ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_CNT_ENA_MASK	BIT_ULL(I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT	(0x9ULL + \
+						 I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \
+					  I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_ATR_SHIFT	(0xEULL + \
+					 I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_ATR_MASK	BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_ATR_SHIFT	(0xEULL + \
+					 I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_ATR_MASK	BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20
+#define I40E_TXD_FLTR_QW1_CNTINDEX_MASK	(0x1FFUL << \
+					 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
+
+enum i40e_filter_type {
+	I40E_FLOW_DIRECTOR_FLTR = 0,
+	I40E_PE_QUAD_HASH_FLTR = 1,
+	I40E_ETHERTYPE_FLTR,
+	I40E_FCOE_CTX_FLTR,
+	I40E_MAC_VLAN_FLTR,
+	I40E_HASH_FLTR
+};
+
+struct i40e_vsi_context {
+	u16 seid;
+	u16 uplink_seid;
+	u16 vsi_number;
+	u16 vsis_allocated;
+	u16 vsis_unallocated;
+	u16 flags;
+	u8 pf_num;
+	u8 vf_num;
+	u8 connection_type;
+	struct i40e_aqc_vsi_properties_data info;
+};
+
+struct i40e_veb_context {
+	u16 seid;
+	u16 uplink_seid;
+	u16 veb_number;
+	u16 vebs_allocated;
+	u16 vebs_unallocated;
+	u16 flags;
+	struct i40e_aqc_get_veb_parameters_completion info;
+};
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct i40e_eth_stats {
+	u64 rx_bytes;			/* gorc */
+	u64 rx_unicast;			/* uprc */
+	u64 rx_multicast;		/* mprc */
+	u64 rx_broadcast;		/* bprc */
+	u64 rx_discards;		/* rdpc */
+	u64 rx_unknown_protocol;	/* rupp */
+	u64 tx_bytes;			/* gotc */
+	u64 tx_unicast;			/* uptc */
+	u64 tx_multicast;		/* mptc */
+	u64 tx_broadcast;		/* bptc */
+	u64 tx_discards;		/* tdpc */
+	u64 tx_errors;			/* tepc */
+	u64 rx_discards_other;          /* rxerr1 */
+};
+
+/* Statistics collected per VEB per TC */
+struct i40e_veb_tc_stats {
+	u64 tc_rx_packets[I40E_MAX_TRAFFIC_CLASS];
+	u64 tc_rx_bytes[I40E_MAX_TRAFFIC_CLASS];
+	u64 tc_tx_packets[I40E_MAX_TRAFFIC_CLASS];
+	u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS];
+};
+
+/* Statistics collected by the MAC */
+struct i40e_hw_port_stats {
+	/* eth stats collected by the port */
+	struct i40e_eth_stats eth;
+
+	/* additional port specific stats */
+	u64 tx_dropped_link_down;	/* tdold */
+	u64 crc_errors;			/* crcerrs */
+	u64 illegal_bytes;		/* illerrc */
+	u64 error_bytes;		/* errbc */
+	u64 mac_local_faults;		/* mlfc */
+	u64 mac_remote_faults;		/* mrfc */
+	u64 rx_length_errors;		/* rlec */
+	u64 link_xon_rx;		/* lxonrxc */
+	u64 link_xoff_rx;		/* lxoffrxc */
+	u64 priority_xon_rx[8];		/* pxonrxc[8] */
+	u64 priority_xoff_rx[8];	/* pxoffrxc[8] */
+	u64 link_xon_tx;		/* lxontxc */
+	u64 link_xoff_tx;		/* lxofftxc */
+	u64 priority_xon_tx[8];		/* pxontxc[8] */
+	u64 priority_xoff_tx[8];	/* pxofftxc[8] */
+	u64 priority_xon_2_xoff[8];	/* pxon2offc[8] */
+	u64 rx_size_64;			/* prc64 */
+	u64 rx_size_127;		/* prc127 */
+	u64 rx_size_255;		/* prc255 */
+	u64 rx_size_511;		/* prc511 */
+	u64 rx_size_1023;		/* prc1023 */
+	u64 rx_size_1522;		/* prc1522 */
+	u64 rx_size_big;		/* prc9522 */
+	u64 rx_undersize;		/* ruc */
+	u64 rx_fragments;		/* rfc */
+	u64 rx_oversize;		/* roc */
+	u64 rx_jabber;			/* rjc */
+	u64 tx_size_64;			/* ptc64 */
+	u64 tx_size_127;		/* ptc127 */
+	u64 tx_size_255;		/* ptc255 */
+	u64 tx_size_511;		/* ptc511 */
+	u64 tx_size_1023;		/* ptc1023 */
+	u64 tx_size_1522;		/* ptc1522 */
+	u64 tx_size_big;		/* ptc9522 */
+	u64 mac_short_packet_dropped;	/* mspdc */
+	u64 checksum_error;		/* xec */
+	/* flow director stats */
+	u64 fd_atr_match;
+	u64 fd_sb_match;
+	u64 fd_atr_tunnel_match;
+	u32 fd_atr_status;
+	u32 fd_sb_status;
+	/* EEE LPI */
+	u32 tx_lpi_status;
+	u32 rx_lpi_status;
+	u64 tx_lpi_count;		/* etlpic */
+	u64 rx_lpi_count;		/* erlpic */
+};
+
+/* Checksum and Shadow RAM pointers */
+#define I40E_SR_NVM_CONTROL_WORD		0x00
+#define I40E_EMP_MODULE_PTR			0x0F
+#define I40E_SR_EMP_MODULE_PTR			0x48
+#define I40E_SR_PBA_FLAGS			0x15
+#define I40E_SR_PBA_BLOCK_PTR			0x16
+#define I40E_SR_BOOT_CONFIG_PTR			0x17
+#define I40E_NVM_OEM_VER_OFF			0x83
+#define I40E_SR_NVM_DEV_STARTER_VERSION		0x18
+#define I40E_SR_NVM_WAKE_ON_LAN			0x19
+#define I40E_SR_NVM_EETRACK_LO			0x2D
+#define I40E_SR_NVM_EETRACK_HI			0x2E
+#define I40E_SR_VPD_PTR				0x2F
+#define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR		0x3E
+#define I40E_SR_SW_CHECKSUM_WORD		0x3F
+#define I40E_SR_EMP_SR_SETTINGS_PTR		0x48
+
+/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */
+#define I40E_SR_VPD_MODULE_MAX_SIZE		1024
+#define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE	1024
+#define I40E_SR_CONTROL_WORD_1_SHIFT		0x06
+#define I40E_SR_CONTROL_WORD_1_MASK	(0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+#define I40E_SR_NVM_MAP_STRUCTURE_TYPE		BIT(12)
+#define I40E_PTR_TYPE				BIT(15)
+#define I40E_SR_OCP_CFG_WORD0			0x2B
+#define I40E_SR_OCP_ENABLED			BIT(15)
+
+/* Shadow RAM related */
+#define I40E_SR_SECTOR_SIZE_IN_WORDS	0x800
+#define I40E_SR_WORDS_IN_1KB		512
+/* Checksum should be calculated such that after adding all the words,
+ * including the checksum word itself, the sum should be 0xBABA.
+ */
+#define I40E_SR_SW_CHECKSUM_BASE	0xBABA
+
+#define I40E_SRRD_SRCTL_ATTEMPTS	100000
+
+enum i40e_switch_element_types {
+	I40E_SWITCH_ELEMENT_TYPE_MAC	= 1,
+	I40E_SWITCH_ELEMENT_TYPE_PF	= 2,
+	I40E_SWITCH_ELEMENT_TYPE_VF	= 3,
+	I40E_SWITCH_ELEMENT_TYPE_EMP	= 4,
+	I40E_SWITCH_ELEMENT_TYPE_BMC	= 6,
+	I40E_SWITCH_ELEMENT_TYPE_PE	= 16,
+	I40E_SWITCH_ELEMENT_TYPE_VEB	= 17,
+	I40E_SWITCH_ELEMENT_TYPE_PA	= 18,
+	I40E_SWITCH_ELEMENT_TYPE_VSI	= 19,
+};
+
+/* Supported EtherType filters */
+enum i40e_ether_type_index {
+	I40E_ETHER_TYPE_1588		= 0,
+	I40E_ETHER_TYPE_FIP		= 1,
+	I40E_ETHER_TYPE_OUI_EXTENDED	= 2,
+	I40E_ETHER_TYPE_MAC_CONTROL	= 3,
+	I40E_ETHER_TYPE_LLDP		= 4,
+	I40E_ETHER_TYPE_EVB_PROTOCOL1	= 5,
+	I40E_ETHER_TYPE_EVB_PROTOCOL2	= 6,
+	I40E_ETHER_TYPE_QCN_CNM		= 7,
+	I40E_ETHER_TYPE_8021X		= 8,
+	I40E_ETHER_TYPE_ARP		= 9,
+	I40E_ETHER_TYPE_RSV1		= 10,
+	I40E_ETHER_TYPE_RSV2		= 11,
+};
+
+/* Filter context base size is 1K */
+#define I40E_HASH_FILTER_BASE_SIZE	1024
+/* Supported Hash filter values */
+enum i40e_hash_filter_size {
+	I40E_HASH_FILTER_SIZE_1K	= 0,
+	I40E_HASH_FILTER_SIZE_2K	= 1,
+	I40E_HASH_FILTER_SIZE_4K	= 2,
+	I40E_HASH_FILTER_SIZE_8K	= 3,
+	I40E_HASH_FILTER_SIZE_16K	= 4,
+	I40E_HASH_FILTER_SIZE_32K	= 5,
+	I40E_HASH_FILTER_SIZE_64K	= 6,
+	I40E_HASH_FILTER_SIZE_128K	= 7,
+	I40E_HASH_FILTER_SIZE_256K	= 8,
+	I40E_HASH_FILTER_SIZE_512K	= 9,
+	I40E_HASH_FILTER_SIZE_1M	= 10,
+};
+
+/* DMA context base size is 0.5K */
+#define I40E_DMA_CNTX_BASE_SIZE		512
+/* Supported DMA context values */
+enum i40e_dma_cntx_size {
+	I40E_DMA_CNTX_SIZE_512		= 0,
+	I40E_DMA_CNTX_SIZE_1K		= 1,
+	I40E_DMA_CNTX_SIZE_2K		= 2,
+	I40E_DMA_CNTX_SIZE_4K		= 3,
+	I40E_DMA_CNTX_SIZE_8K		= 4,
+	I40E_DMA_CNTX_SIZE_16K		= 5,
+	I40E_DMA_CNTX_SIZE_32K		= 6,
+	I40E_DMA_CNTX_SIZE_64K		= 7,
+	I40E_DMA_CNTX_SIZE_128K		= 8,
+	I40E_DMA_CNTX_SIZE_256K		= 9,
+};
+
+/* Supported Hash look up table (LUT) sizes */
+enum i40e_hash_lut_size {
+	I40E_HASH_LUT_SIZE_128		= 0,
+	I40E_HASH_LUT_SIZE_512		= 1,
+};
+
+/* Structure to hold a per PF filter control settings */
+struct i40e_filter_control_settings {
+	/* number of PE Quad Hash filter buckets */
+	enum i40e_hash_filter_size pe_filt_num;
+	/* number of PE Quad Hash contexts */
+	enum i40e_dma_cntx_size pe_cntx_num;
+	/* number of FCoE filter buckets */
+	enum i40e_hash_filter_size fcoe_filt_num;
+	/* number of FCoE DDP contexts */
+	enum i40e_dma_cntx_size fcoe_cntx_num;
+	/* size of the Hash LUT */
+	enum i40e_hash_lut_size	hash_lut_size;
+	/* enable FDIR filters for PF and its VFs */
+	bool enable_fdir;
+	/* enable Ethertype filters for PF and its VFs */
+	bool enable_ethtype;
+	/* enable MAC/VLAN filters for PF and its VFs */
+	bool enable_macvlan;
+};
+
+/* Structure to hold device level control filter counts */
+struct i40e_control_filter_stats {
+	u16 mac_etype_used;   /* Used perfect match MAC/EtherType filters */
+	u16 etype_used;       /* Used perfect EtherType filters */
+	u16 mac_etype_free;   /* Un-used perfect match MAC/EtherType filters */
+	u16 etype_free;       /* Un-used perfect EtherType filters */
+};
+
+enum i40e_reset_type {
+	I40E_RESET_POR		= 0,
+	I40E_RESET_CORER	= 1,
+	I40E_RESET_GLOBR	= 2,
+	I40E_RESET_EMPR		= 3,
+};
+
+/* IEEE 802.1AB LLDP Agent Variables from NVM */
+#define I40E_NVM_LLDP_CFG_PTR	0x06
+#define I40E_SR_LLDP_CFG_PTR	0x31
+struct i40e_lldp_variables {
+	u16 length;
+	u16 adminstatus;
+	u16 msgfasttx;
+	u16 msgtxinterval;
+	u16 txparams;
+	u16 timers;
+	u16 crc8;
+};
+
+/* Offsets into Alternate Ram */
+#define I40E_ALT_STRUCT_FIRST_PF_OFFSET		0   /* in dwords */
+#define I40E_ALT_STRUCT_DWORDS_PER_PF		64   /* in dwords */
+#define I40E_ALT_STRUCT_MIN_BW_OFFSET		0xE  /* in dwords */
+#define I40E_ALT_STRUCT_MAX_BW_OFFSET		0xF  /* in dwords */
+
+/* Alternate Ram Bandwidth Masks */
+#define I40E_ALT_BW_VALUE_MASK		0xFF
+#define I40E_ALT_BW_VALID_MASK		0x80000000
+
+/* RSS Hash Table Size */
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_512	0x00010000
+
+/* INPUT SET MASK for RSS, flow director, and flexible payload */
+#define I40E_X722_L3_SRC_SHIFT		49
+#define I40E_X722_L3_SRC_MASK		(0x3ULL << I40E_X722_L3_SRC_SHIFT)
+#define I40E_X722_L3_DST_SHIFT		41
+#define I40E_X722_L3_DST_MASK		(0x3ULL << I40E_X722_L3_DST_SHIFT)
+#define I40E_L3_SRC_SHIFT		47
+#define I40E_L3_SRC_MASK		(0x3ULL << I40E_L3_SRC_SHIFT)
+#define I40E_L3_V6_SRC_SHIFT		43
+#define I40E_L3_V6_SRC_MASK		(0xFFULL << I40E_L3_V6_SRC_SHIFT)
+#define I40E_L3_DST_SHIFT		35
+#define I40E_L3_DST_MASK		(0x3ULL << I40E_L3_DST_SHIFT)
+#define I40E_L3_V6_DST_SHIFT		35
+#define I40E_L3_V6_DST_MASK		(0xFFULL << I40E_L3_V6_DST_SHIFT)
+#define I40E_L4_SRC_SHIFT		34
+#define I40E_L4_SRC_MASK		(0x1ULL << I40E_L4_SRC_SHIFT)
+#define I40E_L4_DST_SHIFT		33
+#define I40E_L4_DST_MASK		(0x1ULL << I40E_L4_DST_SHIFT)
+#define I40E_VERIFY_TAG_SHIFT		31
+#define I40E_VERIFY_TAG_MASK		(0x3ULL << I40E_VERIFY_TAG_SHIFT)
+#define I40E_VLAN_SRC_SHIFT		55
+#define I40E_VLAN_SRC_MASK		(0x1ULL << I40E_VLAN_SRC_SHIFT)
+
+#define I40E_FLEX_50_SHIFT		13
+#define I40E_FLEX_50_MASK		(0x1ULL << I40E_FLEX_50_SHIFT)
+#define I40E_FLEX_51_SHIFT		12
+#define I40E_FLEX_51_MASK		(0x1ULL << I40E_FLEX_51_SHIFT)
+#define I40E_FLEX_52_SHIFT		11
+#define I40E_FLEX_52_MASK		(0x1ULL << I40E_FLEX_52_SHIFT)
+#define I40E_FLEX_53_SHIFT		10
+#define I40E_FLEX_53_MASK		(0x1ULL << I40E_FLEX_53_SHIFT)
+#define I40E_FLEX_54_SHIFT		9
+#define I40E_FLEX_54_MASK		(0x1ULL << I40E_FLEX_54_SHIFT)
+#define I40E_FLEX_55_SHIFT		8
+#define I40E_FLEX_55_MASK		(0x1ULL << I40E_FLEX_55_SHIFT)
+#define I40E_FLEX_56_SHIFT		7
+#define I40E_FLEX_56_MASK		(0x1ULL << I40E_FLEX_56_SHIFT)
+#define I40E_FLEX_57_SHIFT		6
+#define I40E_FLEX_57_MASK		(0x1ULL << I40E_FLEX_57_SHIFT)
+
+/* Version format for Dynamic Device Personalization(DDP) */
+struct i40e_ddp_version {
+	u8 major;
+	u8 minor;
+	u8 update;
+	u8 draft;
+};
+
+#define I40E_DDP_NAME_SIZE	32
+
+/* Package header */
+struct i40e_package_header {
+	struct i40e_ddp_version version;
+	u32 segment_count;
+	u32 segment_offset[];
+};
+
+/* Generic segment header */
+struct i40e_generic_seg_header {
+#define SEGMENT_TYPE_METADATA	0x00000001
+#define SEGMENT_TYPE_I40E	0x00000011
+	u32 type;
+	struct i40e_ddp_version version;
+	u32 size;
+	char name[I40E_DDP_NAME_SIZE];
+};
+
+struct i40e_metadata_segment {
+	struct i40e_generic_seg_header header;
+	struct i40e_ddp_version version;
+#define I40E_DDP_TRACKID_INVALID	0xFFFFFFFF
+	u32 track_id;
+	char name[I40E_DDP_NAME_SIZE];
+};
+
+struct i40e_device_id_entry {
+	u32 vendor_dev_id;
+	u32 sub_vendor_dev_id;
+};
+
+struct i40e_profile_segment {
+	struct i40e_generic_seg_header header;
+	struct i40e_ddp_version version;
+	char name[I40E_DDP_NAME_SIZE];
+	u32 device_table_count;
+	struct i40e_device_id_entry device_table[];
+};
+
+struct i40e_section_table {
+	u32 section_count;
+	u32 section_offset[];
+};
+
+struct i40e_profile_section_header {
+	u16 tbl_size;
+	u16 data_end;
+	struct {
+#define SECTION_TYPE_INFO	0x00000010
+#define SECTION_TYPE_MMIO	0x00000800
+#define SECTION_TYPE_RB_MMIO	0x00001800
+#define SECTION_TYPE_AQ		0x00000801
+#define SECTION_TYPE_RB_AQ	0x00001801
+#define SECTION_TYPE_NOTE	0x80000000
+		u32 type;
+		u32 offset;
+		u32 size;
+	} section;
+};
+
+struct i40e_profile_tlv_section_record {
+	u8 rtype;
+	u8 type;
+	u16 len;
+	u8 data[12];
+};
+
+/* Generic AQ section in proflie */
+struct i40e_profile_aq_section {
+	u16 opcode;
+	u16 flags;
+	u8  param[16];
+	u16 datalen;
+	u8  data[];
+};
+
+struct i40e_profile_info {
+	u32 track_id;
+	struct i40e_ddp_version version;
+	u8 op;
+#define I40E_DDP_ADD_TRACKID		0x01
+#define I40E_DDP_REMOVE_TRACKID	0x02
+	u8 reserved[7];
+	u8 name[I40E_DDP_NAME_SIZE];
+};
+#endif /* _I40E_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
new file mode 100644
index 0000000000..a97ca2224d
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -0,0 +1,4951 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "i40e.h"
+
+/*********************notification routines***********************/
+
+/**
+ * i40e_vc_vf_broadcast
+ * @pf: pointer to the PF structure
+ * @v_opcode: operation code
+ * @v_retval: return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send a message to all VFs on a given PF
+ **/
+static void i40e_vc_vf_broadcast(struct i40e_pf *pf,
+				 enum virtchnl_ops v_opcode,
+				 int v_retval, u8 *msg,
+				 u16 msglen)
+{
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vf *vf = pf->vf;
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++, vf++) {
+		int abs_vf_id = vf->vf_id + (int)hw->func_caps.vf_base_id;
+		/* Not all vfs are enabled so skip the ones that are not */
+		if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states) &&
+		    !test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
+			continue;
+
+		/* Ignore return value on purpose - a given VF may fail, but
+		 * we need to keep going and send to all of them
+		 */
+		i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval,
+				       msg, msglen, NULL);
+	}
+}
+
+/**
+ * i40e_vc_link_speed2mbps
+ * converts i40e_aq_link_speed to integer value of Mbps
+ * @link_speed: the speed to convert
+ *
+ * return the speed as direct value of Mbps.
+ **/
+static u32
+i40e_vc_link_speed2mbps(enum i40e_aq_link_speed link_speed)
+{
+	switch (link_speed) {
+	case I40E_LINK_SPEED_100MB:
+		return SPEED_100;
+	case I40E_LINK_SPEED_1GB:
+		return SPEED_1000;
+	case I40E_LINK_SPEED_2_5GB:
+		return SPEED_2500;
+	case I40E_LINK_SPEED_5GB:
+		return SPEED_5000;
+	case I40E_LINK_SPEED_10GB:
+		return SPEED_10000;
+	case I40E_LINK_SPEED_20GB:
+		return SPEED_20000;
+	case I40E_LINK_SPEED_25GB:
+		return SPEED_25000;
+	case I40E_LINK_SPEED_40GB:
+		return SPEED_40000;
+	case I40E_LINK_SPEED_UNKNOWN:
+		return SPEED_UNKNOWN;
+	}
+	return SPEED_UNKNOWN;
+}
+
+/**
+ * i40e_set_vf_link_state
+ * @vf: pointer to the VF structure
+ * @pfe: pointer to PF event structure
+ * @ls: pointer to link status structure
+ *
+ * set a link state on a single vf
+ **/
+static void i40e_set_vf_link_state(struct i40e_vf *vf,
+				   struct virtchnl_pf_event *pfe, struct i40e_link_status *ls)
+{
+	u8 link_status = ls->link_info & I40E_AQ_LINK_UP;
+
+	if (vf->link_forced)
+		link_status = vf->link_up;
+
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+		pfe->event_data.link_event_adv.link_speed = link_status ?
+			i40e_vc_link_speed2mbps(ls->link_speed) : 0;
+		pfe->event_data.link_event_adv.link_status = link_status;
+	} else {
+		pfe->event_data.link_event.link_speed = link_status ?
+			i40e_virtchnl_link_speed(ls->link_speed) : 0;
+		pfe->event_data.link_event.link_status = link_status;
+	}
+}
+
+/**
+ * i40e_vc_notify_vf_link_state
+ * @vf: pointer to the VF structure
+ *
+ * send a link status message to a single VF
+ **/
+static void i40e_vc_notify_vf_link_state(struct i40e_vf *vf)
+{
+	struct virtchnl_pf_event pfe;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_link_status *ls = &pf->hw.phy.link_info;
+	int abs_vf_id = vf->vf_id + (int)hw->func_caps.vf_base_id;
+
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
+
+	i40e_set_vf_link_state(vf, &pfe, ls);
+
+	i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT,
+			       0, (u8 *)&pfe, sizeof(pfe), NULL);
+}
+
+/**
+ * i40e_vc_notify_link_state
+ * @pf: pointer to the PF structure
+ *
+ * send a link status message to all VFs on a given PF
+ **/
+void i40e_vc_notify_link_state(struct i40e_pf *pf)
+{
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vfs; i++)
+		i40e_vc_notify_vf_link_state(&pf->vf[i]);
+}
+
+/**
+ * i40e_vc_notify_reset
+ * @pf: pointer to the PF structure
+ *
+ * indicate a pending reset to all VFs on a given PF
+ **/
+void i40e_vc_notify_reset(struct i40e_pf *pf)
+{
+	struct virtchnl_pf_event pfe;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	i40e_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, 0,
+			     (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
+}
+
+#ifdef CONFIG_PCI_IOV
+void i40e_restore_all_vfs_msi_state(struct pci_dev *pdev)
+{
+	u16 vf_id;
+	u16 pos;
+
+	/* Continue only if this is a PF */
+	if (!pdev->is_physfn)
+		return;
+
+	if (!pci_num_vf(pdev))
+		return;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos) {
+		struct pci_dev *vf_dev = NULL;
+
+		pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
+		while ((vf_dev = pci_get_device(pdev->vendor, vf_id, vf_dev))) {
+			if (vf_dev->is_virtfn && vf_dev->physfn == pdev)
+				pci_restore_msi_state(vf_dev);
+		}
+	}
+}
+#endif /* CONFIG_PCI_IOV */
+
+/**
+ * i40e_vc_notify_vf_reset
+ * @vf: pointer to the VF structure
+ *
+ * indicate a pending reset to the given VF
+ **/
+void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
+{
+	struct virtchnl_pf_event pfe;
+	int abs_vf_id;
+
+	/* validate the request */
+	if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+		return;
+
+	/* verify if the VF is in either init or active before proceeding */
+	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states) &&
+	    !test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
+		return;
+
+	abs_vf_id = vf->vf_id + (int)vf->pf->hw.func_caps.vf_base_id;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	i40e_aq_send_msg_to_vf(&vf->pf->hw, abs_vf_id, VIRTCHNL_OP_EVENT,
+			       0, (u8 *)&pfe,
+			       sizeof(struct virtchnl_pf_event), NULL);
+}
+/***********************misc routines*****************************/
+
+/**
+ * i40e_vc_reset_vf
+ * @vf: pointer to the VF info
+ * @notify_vf: notify vf about reset or not
+ * Reset VF handler.
+ **/
+static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	int i;
+
+	if (notify_vf)
+		i40e_vc_notify_vf_reset(vf);
+
+	/* We want to ensure that an actual reset occurs initiated after this
+	 * function was called. However, we do not want to wait forever, so
+	 * we'll give a reasonable time and print a message if we failed to
+	 * ensure a reset.
+	 */
+	for (i = 0; i < 20; i++) {
+		/* If PF is in VFs releasing state reset VF is impossible,
+		 * so leave it.
+		 */
+		if (test_bit(__I40E_VFS_RELEASING, pf->state))
+			return;
+		if (i40e_reset_vf(vf, false))
+			return;
+		usleep_range(10000, 20000);
+	}
+
+	if (notify_vf)
+		dev_warn(&vf->pf->pdev->dev,
+			 "Failed to initiate reset for VF %d after 200 milliseconds\n",
+			 vf->vf_id);
+	else
+		dev_dbg(&vf->pf->pdev->dev,
+			"Failed to initiate reset for VF %d after 200 milliseconds\n",
+			vf->vf_id);
+}
+
+/**
+ * i40e_vc_isvalid_vsi_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VF relative VSI id
+ *
+ * check for the valid VSI id
+ **/
+static inline bool i40e_vc_isvalid_vsi_id(struct i40e_vf *vf, u16 vsi_id)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = i40e_find_vsi_from_id(pf, vsi_id);
+
+	return (vsi && (vsi->vf_id == vf->vf_id));
+}
+
+/**
+ * i40e_vc_isvalid_queue_id
+ * @vf: pointer to the VF info
+ * @vsi_id: vsi id
+ * @qid: vsi relative queue id
+ *
+ * check for the valid queue id
+ **/
+static inline bool i40e_vc_isvalid_queue_id(struct i40e_vf *vf, u16 vsi_id,
+					    u16 qid)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = i40e_find_vsi_from_id(pf, vsi_id);
+
+	return (vsi && (qid < vsi->alloc_queue_pairs));
+}
+
+/**
+ * i40e_vc_isvalid_vector_id
+ * @vf: pointer to the VF info
+ * @vector_id: VF relative vector id
+ *
+ * check for the valid vector id
+ **/
+static inline bool i40e_vc_isvalid_vector_id(struct i40e_vf *vf, u32 vector_id)
+{
+	struct i40e_pf *pf = vf->pf;
+
+	return vector_id < pf->hw.func_caps.num_msix_vectors_vf;
+}
+
+/***********************vf resource mgmt routines*****************/
+
+/**
+ * i40e_vc_get_pf_queue_id
+ * @vf: pointer to the VF info
+ * @vsi_id: id of VSI as provided by the FW
+ * @vsi_queue_id: vsi relative queue id
+ *
+ * return PF relative queue id
+ **/
+static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u16 vsi_id,
+				   u8 vsi_queue_id)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = i40e_find_vsi_from_id(pf, vsi_id);
+	u16 pf_queue_id = I40E_QUEUE_END_OF_LIST;
+
+	if (!vsi)
+		return pf_queue_id;
+
+	if (le16_to_cpu(vsi->info.mapping_flags) &
+	    I40E_AQ_VSI_QUE_MAP_NONCONTIG)
+		pf_queue_id =
+			le16_to_cpu(vsi->info.queue_mapping[vsi_queue_id]);
+	else
+		pf_queue_id = le16_to_cpu(vsi->info.queue_mapping[0]) +
+			      vsi_queue_id;
+
+	return pf_queue_id;
+}
+
+/**
+ * i40e_get_real_pf_qid
+ * @vf: pointer to the VF info
+ * @vsi_id: vsi id
+ * @queue_id: queue number
+ *
+ * wrapper function to get pf_queue_id handling ADq code as well
+ **/
+static u16 i40e_get_real_pf_qid(struct i40e_vf *vf, u16 vsi_id, u16 queue_id)
+{
+	int i;
+
+	if (vf->adq_enabled) {
+		/* Although VF considers all the queues(can be 1 to 16) as its
+		 * own but they may actually belong to different VSIs(up to 4).
+		 * We need to find which queues belongs to which VSI.
+		 */
+		for (i = 0; i < vf->num_tc; i++) {
+			if (queue_id < vf->ch[i].num_qps) {
+				vsi_id = vf->ch[i].vsi_id;
+				break;
+			}
+			/* find right queue id which is relative to a
+			 * given VSI.
+			 */
+			queue_id -= vf->ch[i].num_qps;
+			}
+		}
+
+	return i40e_vc_get_pf_queue_id(vf, vsi_id, queue_id);
+}
+
+/**
+ * i40e_config_irq_link_list
+ * @vf: pointer to the VF info
+ * @vsi_id: id of VSI as given by the FW
+ * @vecmap: irq map info
+ *
+ * configure irq link list from the map
+ **/
+static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
+				      struct virtchnl_vector_map *vecmap)
+{
+	unsigned long linklistmap = 0, tempmap;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u16 vsi_queue_id, pf_queue_id;
+	enum i40e_queue_type qtype;
+	u16 next_q, vector_id, size;
+	u32 reg, reg_idx;
+	u16 itr_idx = 0;
+
+	vector_id = vecmap->vector_id;
+	/* setup the head */
+	if (0 == vector_id)
+		reg_idx = I40E_VPINT_LNKLST0(vf->vf_id);
+	else
+		reg_idx = I40E_VPINT_LNKLSTN(
+		     ((pf->hw.func_caps.num_msix_vectors_vf - 1) * vf->vf_id) +
+		     (vector_id - 1));
+
+	if (vecmap->rxq_map == 0 && vecmap->txq_map == 0) {
+		/* Special case - No queues mapped on this vector */
+		wr32(hw, reg_idx, I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK);
+		goto irq_list_done;
+	}
+	tempmap = vecmap->rxq_map;
+	for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
+		linklistmap |= (BIT(I40E_VIRTCHNL_SUPPORTED_QTYPES *
+				    vsi_queue_id));
+	}
+
+	tempmap = vecmap->txq_map;
+	for_each_set_bit(vsi_queue_id, &tempmap, I40E_MAX_VSI_QP) {
+		linklistmap |= (BIT(I40E_VIRTCHNL_SUPPORTED_QTYPES *
+				     vsi_queue_id + 1));
+	}
+
+	size = I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES;
+	next_q = find_first_bit(&linklistmap, size);
+	if (unlikely(next_q == size))
+		goto irq_list_done;
+
+	vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
+	qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
+	pf_queue_id = i40e_get_real_pf_qid(vf, vsi_id, vsi_queue_id);
+	reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
+
+	wr32(hw, reg_idx, reg);
+
+	while (next_q < size) {
+		switch (qtype) {
+		case I40E_QUEUE_TYPE_RX:
+			reg_idx = I40E_QINT_RQCTL(pf_queue_id);
+			itr_idx = vecmap->rxitr_idx;
+			break;
+		case I40E_QUEUE_TYPE_TX:
+			reg_idx = I40E_QINT_TQCTL(pf_queue_id);
+			itr_idx = vecmap->txitr_idx;
+			break;
+		default:
+			break;
+		}
+
+		next_q = find_next_bit(&linklistmap, size, next_q + 1);
+		if (next_q < size) {
+			vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
+			qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
+			pf_queue_id = i40e_get_real_pf_qid(vf,
+							   vsi_id,
+							   vsi_queue_id);
+		} else {
+			pf_queue_id = I40E_QUEUE_END_OF_LIST;
+			qtype = 0;
+		}
+
+		/* format for the RQCTL & TQCTL regs is same */
+		reg = (vector_id) |
+		    (qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
+		    (pf_queue_id << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+		    BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) |
+		    (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT);
+		wr32(hw, reg_idx, reg);
+	}
+
+	/* if the vf is running in polling mode and using interrupt zero,
+	 * need to disable auto-mask on enabling zero interrupt for VFs.
+	 */
+	if ((vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) &&
+	    (vector_id == 0)) {
+		reg = rd32(hw, I40E_GLINT_CTL);
+		if (!(reg & I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK)) {
+			reg |= I40E_GLINT_CTL_DIS_AUTOMASK_VF0_MASK;
+			wr32(hw, I40E_GLINT_CTL, reg);
+		}
+	}
+
+irq_list_done:
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_release_rdma_qvlist
+ * @vf: pointer to the VF.
+ *
+ **/
+static void i40e_release_rdma_qvlist(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct virtchnl_rdma_qvlist_info *qvlist_info = vf->qvlist_info;
+	u32 msix_vf;
+	u32 i;
+
+	if (!vf->qvlist_info)
+		return;
+
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		struct virtchnl_rdma_qv_info *qv_info;
+		u32 next_q_index, next_q_type;
+		struct i40e_hw *hw = &pf->hw;
+		u32 v_idx, reg_idx, reg;
+
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+		if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+			/* Figure out the queue after CEQ and make that the
+			 * first queue.
+			 */
+			reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+			reg = rd32(hw, I40E_VPINT_CEQCTL(reg_idx));
+			next_q_index = (reg & I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK)
+					>> I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT;
+			next_q_type = (reg & I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK)
+					>> I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT;
+
+			reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+			reg = (next_q_index &
+			       I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (next_q_type <<
+			       I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+			wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+		}
+	}
+	kfree(vf->qvlist_info);
+	vf->qvlist_info = NULL;
+}
+
+/**
+ * i40e_config_rdma_qvlist
+ * @vf: pointer to the VF info
+ * @qvlist_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int
+i40e_config_rdma_qvlist(struct i40e_vf *vf,
+			struct virtchnl_rdma_qvlist_info *qvlist_info)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct virtchnl_rdma_qv_info *qv_info;
+	u32 v_idx, i, reg_idx, reg;
+	u32 next_q_idx, next_q_type;
+	size_t size;
+	u32 msix_vf;
+	int ret = 0;
+
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+
+	if (qvlist_info->num_vectors > msix_vf) {
+		dev_warn(&pf->pdev->dev,
+			 "Incorrect number of iwarp vectors %u. Maximum %u allowed.\n",
+			 qvlist_info->num_vectors,
+			 msix_vf);
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	kfree(vf->qvlist_info);
+	size = virtchnl_struct_size(vf->qvlist_info, qv_info,
+				    qvlist_info->num_vectors);
+	vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+	if (!vf->qvlist_info) {
+		ret = -ENOMEM;
+		goto err_out;
+	}
+	vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
+
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+
+		/* Validate vector id belongs to this vf */
+		if (!i40e_vc_isvalid_vector_id(vf, qv_info->v_idx)) {
+			ret = -EINVAL;
+			goto err_free;
+		}
+
+		v_idx = qv_info->v_idx;
+
+		vf->qvlist_info->qv_info[i] = *qv_info;
+
+		reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+		/* We might be sharing the interrupt, so get the first queue
+		 * index and type, push it down the list by adding the new
+		 * queue on top. Also link it with the new queue in CEQCTL.
+		 */
+		reg = rd32(hw, I40E_VPINT_LNKLSTN(reg_idx));
+		next_q_idx = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) >>
+				I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT);
+		next_q_type = ((reg & I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK) >>
+				I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+
+		if (qv_info->ceq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg_idx = (msix_vf - 1) * vf->vf_id + qv_info->ceq_idx;
+			reg = (I40E_VPINT_CEQCTL_CAUSE_ENA_MASK |
+			(v_idx << I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT) |
+			(qv_info->itr_idx << I40E_VPINT_CEQCTL_ITR_INDX_SHIFT) |
+			(next_q_type << I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT) |
+			(next_q_idx << I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT));
+			wr32(hw, I40E_VPINT_CEQCTL(reg_idx), reg);
+
+			reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1);
+			reg = (qv_info->ceq_idx &
+			       I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK) |
+			       (I40E_QUEUE_TYPE_PE_CEQ <<
+			       I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT);
+			wr32(hw, I40E_VPINT_LNKLSTN(reg_idx), reg);
+		}
+
+		if (qv_info->aeq_idx != I40E_QUEUE_INVALID_IDX) {
+			reg = (I40E_VPINT_AEQCTL_CAUSE_ENA_MASK |
+			(v_idx << I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT) |
+			(qv_info->itr_idx << I40E_VPINT_AEQCTL_ITR_INDX_SHIFT));
+
+			wr32(hw, I40E_VPINT_AEQCTL(vf->vf_id), reg);
+		}
+	}
+
+	return 0;
+err_free:
+	kfree(vf->qvlist_info);
+	vf->qvlist_info = NULL;
+err_out:
+	return ret;
+}
+
+/**
+ * i40e_config_vsi_tx_queue
+ * @vf: pointer to the VF info
+ * @vsi_id: id of VSI as provided by the FW
+ * @vsi_queue_id: vsi relative queue index
+ * @info: config. info
+ *
+ * configure tx queue
+ **/
+static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id,
+				    u16 vsi_queue_id,
+				    struct virtchnl_txq_info *info)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_hmc_obj_txq tx_ctx;
+	struct i40e_vsi *vsi;
+	u16 pf_queue_id;
+	u32 qtx_ctl;
+	int ret = 0;
+
+	if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+		ret = -ENOENT;
+		goto error_context;
+	}
+	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+	vsi = i40e_find_vsi_from_id(pf, vsi_id);
+	if (!vsi) {
+		ret = -ENOENT;
+		goto error_context;
+	}
+
+	/* clear the context structure first */
+	memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq));
+
+	/* only set the required fields */
+	tx_ctx.base = info->dma_ring_addr / 128;
+	tx_ctx.qlen = info->ring_len;
+	tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[0]);
+	tx_ctx.rdylist_act = 0;
+	tx_ctx.head_wb_ena = info->headwb_enabled;
+	tx_ctx.head_wb_addr = info->dma_headwb_addr;
+
+	/* clear the context in the HMC */
+	ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Failed to clear VF LAN Tx queue context %d, error: %d\n",
+			pf_queue_id, ret);
+		ret = -ENOENT;
+		goto error_context;
+	}
+
+	/* set the context in the HMC */
+	ret = i40e_set_lan_tx_queue_context(hw, pf_queue_id, &tx_ctx);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Failed to set VF LAN Tx queue context %d error: %d\n",
+			pf_queue_id, ret);
+		ret = -ENOENT;
+		goto error_context;
+	}
+
+	/* associate this queue with the PCI VF function */
+	qtx_ctl = I40E_QTX_CTL_VF_QUEUE;
+	qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT)
+		    & I40E_QTX_CTL_PF_INDX_MASK);
+	qtx_ctl |= (((vf->vf_id + hw->func_caps.vf_base_id)
+		     << I40E_QTX_CTL_VFVM_INDX_SHIFT)
+		    & I40E_QTX_CTL_VFVM_INDX_MASK);
+	wr32(hw, I40E_QTX_CTL(pf_queue_id), qtx_ctl);
+	i40e_flush(hw);
+
+error_context:
+	return ret;
+}
+
+/**
+ * i40e_config_vsi_rx_queue
+ * @vf: pointer to the VF info
+ * @vsi_id: id of VSI  as provided by the FW
+ * @vsi_queue_id: vsi relative queue index
+ * @info: config. info
+ *
+ * configure rx queue
+ **/
+static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
+				    u16 vsi_queue_id,
+				    struct virtchnl_rxq_info *info)
+{
+	u16 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_hmc_obj_rxq rx_ctx;
+	int ret = 0;
+
+	/* clear the context structure first */
+	memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
+
+	/* only set the required fields */
+	rx_ctx.base = info->dma_ring_addr / 128;
+	rx_ctx.qlen = info->ring_len;
+
+	if (info->splithdr_enabled) {
+		rx_ctx.hsplit_0 = I40E_RX_SPLIT_L2      |
+				  I40E_RX_SPLIT_IP      |
+				  I40E_RX_SPLIT_TCP_UDP |
+				  I40E_RX_SPLIT_SCTP;
+		/* header length validation */
+		if (info->hdr_size > ((2 * 1024) - 64)) {
+			ret = -EINVAL;
+			goto error_param;
+		}
+		rx_ctx.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT;
+
+		/* set split mode 10b */
+		rx_ctx.dtype = I40E_RX_DTYPE_HEADER_SPLIT;
+	}
+
+	/* databuffer length validation */
+	if (info->databuffer_size > ((16 * 1024) - 128)) {
+		ret = -EINVAL;
+		goto error_param;
+	}
+	rx_ctx.dbuff = info->databuffer_size >> I40E_RXQ_CTX_DBUFF_SHIFT;
+
+	/* max pkt. length validation */
+	if (info->max_pkt_size >= (16 * 1024) || info->max_pkt_size < 64) {
+		ret = -EINVAL;
+		goto error_param;
+	}
+	rx_ctx.rxmax = info->max_pkt_size;
+
+	/* if port VLAN is configured increase the max packet size */
+	if (vsi->info.pvid)
+		rx_ctx.rxmax += VLAN_HLEN;
+
+	/* enable 32bytes desc always */
+	rx_ctx.dsize = 1;
+
+	/* default values */
+	rx_ctx.lrxqthresh = 1;
+	rx_ctx.crcstrip = 1;
+	rx_ctx.prefena = 1;
+	rx_ctx.l2tsel = 1;
+
+	/* clear the context in the HMC */
+	ret = i40e_clear_lan_rx_queue_context(hw, pf_queue_id);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Failed to clear VF LAN Rx queue context %d, error: %d\n",
+			pf_queue_id, ret);
+		ret = -ENOENT;
+		goto error_param;
+	}
+
+	/* set the context in the HMC */
+	ret = i40e_set_lan_rx_queue_context(hw, pf_queue_id, &rx_ctx);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"Failed to set VF LAN Rx queue context %d error: %d\n",
+			pf_queue_id, ret);
+		ret = -ENOENT;
+		goto error_param;
+	}
+
+error_param:
+	return ret;
+}
+
+/**
+ * i40e_alloc_vsi_res
+ * @vf: pointer to the VF info
+ * @idx: VSI index, applies only for ADq mode, zero otherwise
+ *
+ * alloc VF vsi context & resources
+ **/
+static int i40e_alloc_vsi_res(struct i40e_vf *vf, u8 idx)
+{
+	struct i40e_mac_filter *f = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi;
+	u64 max_tx_rate = 0;
+	int ret = 0;
+
+	vsi = i40e_vsi_setup(pf, I40E_VSI_SRIOV, pf->vsi[pf->lan_vsi]->seid,
+			     vf->vf_id);
+
+	if (!vsi) {
+		dev_err(&pf->pdev->dev,
+			"add vsi failed for VF %d, aq_err %d\n",
+			vf->vf_id, pf->hw.aq.asq_last_status);
+		ret = -ENOENT;
+		goto error_alloc_vsi_res;
+	}
+
+	if (!idx) {
+		u64 hena = i40e_pf_get_default_rss_hena(pf);
+		u8 broadcast[ETH_ALEN];
+
+		vf->lan_vsi_idx = vsi->idx;
+		vf->lan_vsi_id = vsi->id;
+		/* If the port VLAN has been configured and then the
+		 * VF driver was removed then the VSI port VLAN
+		 * configuration was destroyed.  Check if there is
+		 * a port VLAN and restore the VSI configuration if
+		 * needed.
+		 */
+		if (vf->port_vlan_id)
+			i40e_vsi_add_pvid(vsi, vf->port_vlan_id);
+
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		if (is_valid_ether_addr(vf->default_lan_addr.addr)) {
+			f = i40e_add_mac_filter(vsi,
+						vf->default_lan_addr.addr);
+			if (!f)
+				dev_info(&pf->pdev->dev,
+					 "Could not add MAC filter %pM for VF %d\n",
+					vf->default_lan_addr.addr, vf->vf_id);
+		}
+		eth_broadcast_addr(broadcast);
+		f = i40e_add_mac_filter(vsi, broadcast);
+		if (!f)
+			dev_info(&pf->pdev->dev,
+				 "Could not allocate VF broadcast filter\n");
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+		wr32(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)hena);
+		wr32(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id), (u32)(hena >> 32));
+		/* program mac filter only for VF VSI */
+		ret = i40e_sync_vsi_filters(vsi);
+		if (ret)
+			dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+	}
+
+	/* storing VSI index and id for ADq and don't apply the mac filter */
+	if (vf->adq_enabled) {
+		vf->ch[idx].vsi_idx = vsi->idx;
+		vf->ch[idx].vsi_id = vsi->id;
+	}
+
+	/* Set VF bandwidth if specified */
+	if (vf->tx_rate) {
+		max_tx_rate = vf->tx_rate;
+	} else if (vf->ch[idx].max_tx_rate) {
+		max_tx_rate = vf->ch[idx].max_tx_rate;
+	}
+
+	if (max_tx_rate) {
+		max_tx_rate = div_u64(max_tx_rate, I40E_BW_CREDIT_DIVISOR);
+		ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
+						  max_tx_rate, 0, NULL);
+		if (ret)
+			dev_err(&pf->pdev->dev, "Unable to set tx rate, VF %d, error code %d.\n",
+				vf->vf_id, ret);
+	}
+
+error_alloc_vsi_res:
+	return ret;
+}
+
+/**
+ * i40e_map_pf_queues_to_vsi
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of first part VSILAN_QTABLE, mapping pf queues to VSI.
+ **/
+static void i40e_map_pf_queues_to_vsi(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qps;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < 7; j++) {
+			if (j * 2 >= qps) {
+				/* end of list */
+				reg = 0x07FF07FF;
+			} else {
+				u16 qid = i40e_vc_get_pf_queue_id(vf,
+								  vsi_id,
+								  j * 2);
+				reg = qid;
+				qid = i40e_vc_get_pf_queue_id(vf, vsi_id,
+							      (j * 2) + 1);
+				reg |= qid << 16;
+			}
+			i40e_write_rx_ctl(hw,
+					  I40E_VSILAN_QTABLE(j, vsi_id),
+					  reg);
+		}
+	}
+}
+
+/**
+ * i40e_map_pf_to_vf_queues
+ * @vf: pointer to the VF info
+ *
+ * PF maps LQPs to a VF by programming VSILAN_QTABLE & VPLAN_QTABLE. This
+ * function takes care of the second part VPLAN_QTABLE & completes VF mappings.
+ **/
+static void i40e_map_pf_to_vf_queues(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, total_qps = 0;
+	u32 qps, num_tc = 1; /* VF has at least one traffic class */
+	u16 vsi_id, qid;
+	int i, j;
+
+	if (vf->adq_enabled)
+		num_tc = vf->num_tc;
+
+	for (i = 0; i < num_tc; i++) {
+		if (vf->adq_enabled) {
+			qps = vf->ch[i].num_qps;
+			vsi_id =  vf->ch[i].vsi_id;
+		} else {
+			qps = pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+			vsi_id = vf->lan_vsi_id;
+		}
+
+		for (j = 0; j < qps; j++) {
+			qid = i40e_vc_get_pf_queue_id(vf, vsi_id, j);
+
+			reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
+			wr32(hw, I40E_VPLAN_QTABLE(total_qps, vf->vf_id),
+			     reg);
+			total_qps++;
+		}
+	}
+}
+
+/**
+ * i40e_enable_vf_mappings
+ * @vf: pointer to the VF info
+ *
+ * enable VF mappings
+ **/
+static void i40e_enable_vf_mappings(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg;
+
+	/* Tell the hardware we're using noncontiguous mapping. HW requires
+	 * that VF queues be mapped using this method, even when they are
+	 * contiguous in real life
+	 */
+	i40e_write_rx_ctl(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id),
+			  I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK);
+
+	/* enable VF vplan_qtable mappings */
+	reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
+	wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
+
+	i40e_map_pf_to_vf_queues(vf);
+	i40e_map_pf_queues_to_vsi(vf);
+
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_disable_vf_mappings
+ * @vf: pointer to the VF info
+ *
+ * disable VF mappings
+ **/
+static void i40e_disable_vf_mappings(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	int i;
+
+	/* disable qp mappings */
+	wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), 0);
+	for (i = 0; i < I40E_MAX_VSI_QP; i++)
+		wr32(hw, I40E_VPLAN_QTABLE(i, vf->vf_id),
+		     I40E_QUEUE_END_OF_LIST);
+	i40e_flush(hw);
+}
+
+/**
+ * i40e_free_vf_res
+ * @vf: pointer to the VF info
+ *
+ * free VF resources
+ **/
+static void i40e_free_vf_res(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg_idx, reg;
+	int i, j, msix_vf;
+
+	/* Start by disabling VF's configuration API to prevent the OS from
+	 * accessing the VF's VSI after it's freed / invalidated.
+	 */
+	clear_bit(I40E_VF_STATE_INIT, &vf->vf_states);
+
+	/* It's possible the VF had requeuested more queues than the default so
+	 * do the accounting here when we're about to free them.
+	 */
+	if (vf->num_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) {
+		pf->queues_left += vf->num_queue_pairs -
+				   I40E_DEFAULT_QUEUES_PER_VF;
+	}
+
+	/* free vsi & disconnect it from the parent uplink */
+	if (vf->lan_vsi_idx) {
+		i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
+		vf->lan_vsi_idx = 0;
+		vf->lan_vsi_id = 0;
+	}
+
+	/* do the accounting and remove additional ADq VSI's */
+	if (vf->adq_enabled && vf->ch[0].vsi_idx) {
+		for (j = 0; j < vf->num_tc; j++) {
+			/* At this point VSI0 is already released so don't
+			 * release it again and only clear their values in
+			 * structure variables
+			 */
+			if (j)
+				i40e_vsi_release(pf->vsi[vf->ch[j].vsi_idx]);
+			vf->ch[j].vsi_idx = 0;
+			vf->ch[j].vsi_id = 0;
+		}
+	}
+	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+
+	/* disable interrupts so the VF starts in a known state */
+	for (i = 0; i < msix_vf; i++) {
+		/* format is same for both registers */
+		if (0 == i)
+			reg_idx = I40E_VFINT_DYN_CTL0(vf->vf_id);
+		else
+			reg_idx = I40E_VFINT_DYN_CTLN(((msix_vf - 1) *
+						      (vf->vf_id))
+						     + (i - 1));
+		wr32(hw, reg_idx, I40E_VFINT_DYN_CTLN_CLEARPBA_MASK);
+		i40e_flush(hw);
+	}
+
+	/* clear the irq settings */
+	for (i = 0; i < msix_vf; i++) {
+		/* format is same for both registers */
+		if (0 == i)
+			reg_idx = I40E_VPINT_LNKLST0(vf->vf_id);
+		else
+			reg_idx = I40E_VPINT_LNKLSTN(((msix_vf - 1) *
+						      (vf->vf_id))
+						     + (i - 1));
+		reg = (I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK |
+		       I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK);
+		wr32(hw, reg_idx, reg);
+		i40e_flush(hw);
+	}
+	/* reset some of the state variables keeping track of the resources */
+	vf->num_queue_pairs = 0;
+	clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states);
+	clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states);
+}
+
+/**
+ * i40e_alloc_vf_res
+ * @vf: pointer to the VF info
+ *
+ * allocate VF resources
+ **/
+static int i40e_alloc_vf_res(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	int total_queue_pairs = 0;
+	int ret, idx;
+
+	if (vf->num_req_queues &&
+	    vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
+		pf->num_vf_qps = vf->num_req_queues;
+	else
+		pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
+
+	/* allocate hw vsi context & associated resources */
+	ret = i40e_alloc_vsi_res(vf, 0);
+	if (ret)
+		goto error_alloc;
+	total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+
+	/* allocate additional VSIs based on tc information for ADq */
+	if (vf->adq_enabled) {
+		if (pf->queues_left >=
+		    (I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF)) {
+			/* TC 0 always belongs to VF VSI */
+			for (idx = 1; idx < vf->num_tc; idx++) {
+				ret = i40e_alloc_vsi_res(vf, idx);
+				if (ret)
+					goto error_alloc;
+			}
+			/* send correct number of queues */
+			total_queue_pairs = I40E_MAX_VF_QUEUES;
+		} else {
+			dev_info(&pf->pdev->dev, "VF %d: Not enough queues to allocate, disabling ADq\n",
+				 vf->vf_id);
+			vf->adq_enabled = false;
+		}
+	}
+
+	/* We account for each VF to get a default number of queue pairs.  If
+	 * the VF has now requested more, we need to account for that to make
+	 * certain we never request more queues than we actually have left in
+	 * HW.
+	 */
+	if (total_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF)
+		pf->queues_left -=
+			total_queue_pairs - I40E_DEFAULT_QUEUES_PER_VF;
+
+	if (vf->trusted)
+		set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+	else
+		clear_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+
+	/* store the total qps number for the runtime
+	 * VF req validation
+	 */
+	vf->num_queue_pairs = total_queue_pairs;
+
+	/* VF is now completely initialized */
+	set_bit(I40E_VF_STATE_INIT, &vf->vf_states);
+
+error_alloc:
+	if (ret)
+		i40e_free_vf_res(vf);
+
+	return ret;
+}
+
+#define VF_DEVICE_STATUS 0xAA
+#define VF_TRANS_PENDING_MASK 0x20
+/**
+ * i40e_quiesce_vf_pci
+ * @vf: pointer to the VF structure
+ *
+ * Wait for VF PCI transactions to be cleared after reset. Returns -EIO
+ * if the transactions never clear.
+ **/
+static int i40e_quiesce_vf_pci(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	int vf_abs_id, i;
+	u32 reg;
+
+	vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	wr32(hw, I40E_PF_PCI_CIAA,
+	     VF_DEVICE_STATUS | (vf_abs_id << I40E_PF_PCI_CIAA_VF_NUM_SHIFT));
+	for (i = 0; i < 100; i++) {
+		reg = rd32(hw, I40E_PF_PCI_CIAD);
+		if ((reg & VF_TRANS_PENDING_MASK) == 0)
+			return 0;
+		udelay(1);
+	}
+	return -EIO;
+}
+
+/**
+ * __i40e_getnum_vf_vsi_vlan_filters
+ * @vsi: pointer to the vsi
+ *
+ * called to get the number of VLANs offloaded on this VF
+ **/
+static int __i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
+{
+	struct i40e_mac_filter *f;
+	u16 num_vlans = 0, bkt;
+
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
+		if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID)
+			num_vlans++;
+	}
+
+	return num_vlans;
+}
+
+/**
+ * i40e_getnum_vf_vsi_vlan_filters
+ * @vsi: pointer to the vsi
+ *
+ * wrapper for __i40e_getnum_vf_vsi_vlan_filters() with spinlock held
+ **/
+static int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
+{
+	int num_vlans;
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	return num_vlans;
+}
+
+/**
+ * i40e_get_vlan_list_sync
+ * @vsi: pointer to the VSI
+ * @num_vlans: number of VLANs in mac_filter_hash, returned to caller
+ * @vlan_list: list of VLANs present in mac_filter_hash, returned to caller.
+ *             This array is allocated here, but has to be freed in caller.
+ *
+ * Called to get number of VLANs and VLAN list present in mac_filter_hash.
+ **/
+static void i40e_get_vlan_list_sync(struct i40e_vsi *vsi, u16 *num_vlans,
+				    s16 **vlan_list)
+{
+	struct i40e_mac_filter *f;
+	int i = 0;
+	int bkt;
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	*num_vlans = __i40e_getnum_vf_vsi_vlan_filters(vsi);
+	*vlan_list = kcalloc(*num_vlans, sizeof(**vlan_list), GFP_ATOMIC);
+	if (!(*vlan_list))
+		goto err;
+
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
+		if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
+			continue;
+		(*vlan_list)[i++] = f->vlan;
+	}
+err:
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+}
+
+/**
+ * i40e_set_vsi_promisc
+ * @vf: pointer to the VF struct
+ * @seid: VSI number
+ * @multi_enable: set MAC L2 layer multicast promiscuous enable/disable
+ *                for a given VLAN
+ * @unicast_enable: set MAC L2 layer unicast promiscuous enable/disable
+ *                  for a given VLAN
+ * @vl: List of VLANs - apply filter for given VLANs
+ * @num_vlans: Number of elements in @vl
+ **/
+static int
+i40e_set_vsi_promisc(struct i40e_vf *vf, u16 seid, bool multi_enable,
+		     bool unicast_enable, s16 *vl, u16 num_vlans)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_ret, aq_tmp = 0;
+	int i;
+
+	/* No VLAN to set promisc on, set on VSI */
+	if (!num_vlans || !vl) {
+		aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, seid,
+							       multi_enable,
+							       NULL);
+		if (aq_ret) {
+			int aq_err = pf->hw.aq.asq_last_status;
+
+			dev_err(&pf->pdev->dev,
+				"VF %d failed to set multicast promiscuous mode err %pe aq_err %s\n",
+				vf->vf_id,
+				ERR_PTR(aq_ret),
+				i40e_aq_str(&pf->hw, aq_err));
+
+			return aq_ret;
+		}
+
+		aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, seid,
+							     unicast_enable,
+							     NULL, true);
+
+		if (aq_ret) {
+			int aq_err = pf->hw.aq.asq_last_status;
+
+			dev_err(&pf->pdev->dev,
+				"VF %d failed to set unicast promiscuous mode err %pe aq_err %s\n",
+				vf->vf_id,
+				ERR_PTR(aq_ret),
+				i40e_aq_str(&pf->hw, aq_err));
+		}
+
+		return aq_ret;
+	}
+
+	for (i = 0; i < num_vlans; i++) {
+		aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw, seid,
+							    multi_enable,
+							    vl[i], NULL);
+		if (aq_ret) {
+			int aq_err = pf->hw.aq.asq_last_status;
+
+			dev_err(&pf->pdev->dev,
+				"VF %d failed to set multicast promiscuous mode err %pe aq_err %s\n",
+				vf->vf_id,
+				ERR_PTR(aq_ret),
+				i40e_aq_str(&pf->hw, aq_err));
+
+			if (!aq_tmp)
+				aq_tmp = aq_ret;
+		}
+
+		aq_ret = i40e_aq_set_vsi_uc_promisc_on_vlan(hw, seid,
+							    unicast_enable,
+							    vl[i], NULL);
+		if (aq_ret) {
+			int aq_err = pf->hw.aq.asq_last_status;
+
+			dev_err(&pf->pdev->dev,
+				"VF %d failed to set unicast promiscuous mode err %pe aq_err %s\n",
+				vf->vf_id,
+				ERR_PTR(aq_ret),
+				i40e_aq_str(&pf->hw, aq_err));
+
+			if (!aq_tmp)
+				aq_tmp = aq_ret;
+		}
+	}
+
+	if (aq_tmp)
+		aq_ret = aq_tmp;
+
+	return aq_ret;
+}
+
+/**
+ * i40e_config_vf_promiscuous_mode
+ * @vf: pointer to the VF info
+ * @vsi_id: VSI id
+ * @allmulti: set MAC L2 layer multicast promiscuous enable/disable
+ * @alluni: set MAC L2 layer unicast promiscuous enable/disable
+ *
+ * Called from the VF to configure the promiscuous mode of
+ * VF vsis and from the VF reset path to reset promiscuous mode.
+ **/
+static int i40e_config_vf_promiscuous_mode(struct i40e_vf *vf,
+					   u16 vsi_id,
+					   bool allmulti,
+					   bool alluni)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi;
+	int aq_ret = 0;
+	u16 num_vlans;
+	s16 *vl;
+
+	vsi = i40e_find_vsi_from_id(pf, vsi_id);
+	if (!i40e_vc_isvalid_vsi_id(vf, vsi_id) || !vsi)
+		return -EINVAL;
+
+	if (vf->port_vlan_id) {
+		aq_ret = i40e_set_vsi_promisc(vf, vsi->seid, allmulti,
+					      alluni, &vf->port_vlan_id, 1);
+		return aq_ret;
+	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
+		i40e_get_vlan_list_sync(vsi, &num_vlans, &vl);
+
+		if (!vl)
+			return -ENOMEM;
+
+		aq_ret = i40e_set_vsi_promisc(vf, vsi->seid, allmulti, alluni,
+					      vl, num_vlans);
+		kfree(vl);
+		return aq_ret;
+	}
+
+	/* no VLANs to set on, set on VSI */
+	aq_ret = i40e_set_vsi_promisc(vf, vsi->seid, allmulti, alluni,
+				      NULL, 0);
+	return aq_ret;
+}
+
+/**
+ * i40e_sync_vfr_reset
+ * @hw: pointer to hw struct
+ * @vf_id: VF identifier
+ *
+ * Before trigger hardware reset, we need to know if no other process has
+ * reserved the hardware for any reset operations. This check is done by
+ * examining the status of the RSTAT1 register used to signal the reset.
+ **/
+static int i40e_sync_vfr_reset(struct i40e_hw *hw, int vf_id)
+{
+	u32 reg;
+	int i;
+
+	for (i = 0; i < I40E_VFR_WAIT_COUNT; i++) {
+		reg = rd32(hw, I40E_VFINT_ICR0_ENA(vf_id)) &
+			   I40E_VFINT_ICR0_ADMINQ_MASK;
+		if (reg)
+			return 0;
+
+		usleep_range(100, 200);
+	}
+
+	return -EAGAIN;
+}
+
+/**
+ * i40e_trigger_vf_reset
+ * @vf: pointer to the VF structure
+ * @flr: VFLR was issued or not
+ *
+ * Trigger hardware to start a reset for a particular VF. Expects the caller
+ * to wait the proper amount of time to allow hardware to reset the VF before
+ * it cleans up and restores VF functionality.
+ **/
+static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, reg_idx, bit_idx;
+	bool vf_active;
+	u32 radq;
+
+	/* warn the VF */
+	vf_active = test_and_clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
+
+	/* Disable VF's configuration API during reset. The flag is re-enabled
+	 * in i40e_alloc_vf_res(), when it's safe again to access VF's VSI.
+	 * It's normally disabled in i40e_free_vf_res(), but it's safer
+	 * to do it earlier to give some time to finish to any VF config
+	 * functions that may still be running at this point.
+	 */
+	clear_bit(I40E_VF_STATE_INIT, &vf->vf_states);
+
+	/* In the case of a VFLR, the HW has already reset the VF and we
+	 * just need to clean up, so don't hit the VFRTRIG register.
+	 */
+	if (!flr) {
+		/* Sync VFR reset before trigger next one */
+		radq = rd32(hw, I40E_VFINT_ICR0_ENA(vf->vf_id)) &
+			    I40E_VFINT_ICR0_ADMINQ_MASK;
+		if (vf_active && !radq)
+			/* waiting for finish reset by virtual driver */
+			if (i40e_sync_vfr_reset(hw, vf->vf_id))
+				dev_info(&pf->pdev->dev,
+					 "Reset VF %d never finished\n",
+				vf->vf_id);
+
+		/* Reset VF using VPGEN_VFRTRIG reg. It is also setting
+		 * in progress state in rstat1 register.
+		 */
+		reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
+		reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK;
+		wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
+		i40e_flush(hw);
+	}
+	/* clear the VFLR bit in GLGEN_VFLRSTAT */
+	reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32;
+	bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32;
+	wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+	i40e_flush(hw);
+
+	if (i40e_quiesce_vf_pci(vf))
+		dev_err(&pf->pdev->dev, "VF %d PCI transactions stuck\n",
+			vf->vf_id);
+}
+
+/**
+ * i40e_cleanup_reset_vf
+ * @vf: pointer to the VF structure
+ *
+ * Cleanup a VF after the hardware reset is finished. Expects the caller to
+ * have verified whether the reset is finished properly, and ensure the
+ * minimum amount of wait time has passed.
+ **/
+static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg;
+
+	/* disable promisc modes in case they were enabled */
+	i40e_config_vf_promiscuous_mode(vf, vf->lan_vsi_id, false, false);
+
+	/* free VF resources to begin resetting the VSI state */
+	i40e_free_vf_res(vf);
+
+	/* Enable hardware by clearing the reset bit in the VPGEN_VFRTRIG reg.
+	 * By doing this we allow HW to access VF memory at any point. If we
+	 * did it any sooner, HW could access memory while it was being freed
+	 * in i40e_free_vf_res(), causing an IOMMU fault.
+	 *
+	 * On the other hand, this needs to be done ASAP, because the VF driver
+	 * is waiting for this to happen and may report a timeout. It's
+	 * harmless, but it gets logged into Guest OS kernel log, so best avoid
+	 * it.
+	 */
+	reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
+	reg &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK;
+	wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
+
+	/* reallocate VF resources to finish resetting the VSI state */
+	if (!i40e_alloc_vf_res(vf)) {
+		int abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+		i40e_enable_vf_mappings(vf);
+		set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
+		clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+		/* Do not notify the client during VF init */
+		if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
+					&vf->vf_states))
+			i40e_notify_client_of_vf_reset(pf, abs_vf_id);
+		vf->num_vlan = 0;
+	}
+
+	/* Tell the VF driver the reset is done. This needs to be done only
+	 * after VF has been fully initialized, because the VF driver may
+	 * request resources immediately after setting this flag.
+	 */
+	wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+}
+
+/**
+ * i40e_reset_vf
+ * @vf: pointer to the VF structure
+ * @flr: VFLR was issued or not
+ *
+ * Returns true if the VF is in reset, resets successfully, or resets
+ * are disabled and false otherwise.
+ **/
+bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	bool rsd = false;
+	u32 reg;
+	int i;
+
+	if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state))
+		return true;
+
+	/* Bail out if VFs are disabled. */
+	if (test_bit(__I40E_VF_DISABLE, pf->state))
+		return true;
+
+	/* If VF is being reset already we don't need to continue. */
+	if (test_and_set_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+		return true;
+
+	i40e_trigger_vf_reset(vf, flr);
+
+	/* poll VPGEN_VFRSTAT reg to make sure
+	 * that reset is complete
+	 */
+	for (i = 0; i < 10; i++) {
+		/* VF reset requires driver to first reset the VF and then
+		 * poll the status register to make sure that the reset
+		 * completed successfully. Due to internal HW FIFO flushes,
+		 * we must wait 10ms before the register will be valid.
+		 */
+		usleep_range(10000, 20000);
+		reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
+		if (reg & I40E_VPGEN_VFRSTAT_VFRD_MASK) {
+			rsd = true;
+			break;
+		}
+	}
+
+	if (flr)
+		usleep_range(10000, 20000);
+
+	if (!rsd)
+		dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
+			vf->vf_id);
+	usleep_range(10000, 20000);
+
+	/* On initial reset, we don't have any queues to disable */
+	if (vf->lan_vsi_idx != 0)
+		i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
+
+	i40e_cleanup_reset_vf(vf);
+
+	i40e_flush(hw);
+	usleep_range(20000, 40000);
+	clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states);
+
+	return true;
+}
+
+/**
+ * i40e_reset_all_vfs
+ * @pf: pointer to the PF structure
+ * @flr: VFLR was issued or not
+ *
+ * Reset all allocated VFs in one go. First, tell the hardware to reset each
+ * VF, then do all the waiting in one chunk, and finally finish restoring each
+ * VF after the wait. This is useful during PF routines which need to reset
+ * all VFs, as otherwise it must perform these resets in a serialized fashion.
+ *
+ * Returns true if any VFs were reset, and false otherwise.
+ **/
+bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+{
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vf *vf;
+	int i, v;
+	u32 reg;
+
+	/* If we don't have any VFs, then there is nothing to reset */
+	if (!pf->num_alloc_vfs)
+		return false;
+
+	/* If VFs have been disabled, there is no need to reset */
+	if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
+		return false;
+
+	/* Begin reset on all VFs at once */
+	for (v = 0; v < pf->num_alloc_vfs; v++) {
+		vf = &pf->vf[v];
+		/* If VF is being reset no need to trigger reset again */
+		if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+			i40e_trigger_vf_reset(&pf->vf[v], flr);
+	}
+
+	/* HW requires some time to make sure it can flush the FIFO for a VF
+	 * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
+	 * sequence to make sure that it has completed. We'll keep track of
+	 * the VFs using a simple iterator that increments once that VF has
+	 * finished resetting.
+	 */
+	for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+		usleep_range(10000, 20000);
+
+		/* Check each VF in sequence, beginning with the VF to fail
+		 * the previous check.
+		 */
+		while (v < pf->num_alloc_vfs) {
+			vf = &pf->vf[v];
+			if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
+				reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
+				if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
+					break;
+			}
+
+			/* If the current VF has finished resetting, move on
+			 * to the next VF in sequence.
+			 */
+			v++;
+		}
+	}
+
+	if (flr)
+		usleep_range(10000, 20000);
+
+	/* Display a warning if at least one VF didn't manage to reset in
+	 * time, but continue on with the operation.
+	 */
+	if (v < pf->num_alloc_vfs)
+		dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
+			pf->vf[v].vf_id);
+	usleep_range(10000, 20000);
+
+	/* Begin disabling all the rings associated with VFs, but do not wait
+	 * between each VF.
+	 */
+	for (v = 0; v < pf->num_alloc_vfs; v++) {
+		/* On initial reset, we don't have any queues to disable */
+		if (pf->vf[v].lan_vsi_idx == 0)
+			continue;
+
+		/* If VF is reset in another thread just continue */
+		if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+			continue;
+
+		i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
+	}
+
+	/* Now that we've notified HW to disable all of the VF rings, wait
+	 * until they finish.
+	 */
+	for (v = 0; v < pf->num_alloc_vfs; v++) {
+		/* On initial reset, we don't have any queues to disable */
+		if (pf->vf[v].lan_vsi_idx == 0)
+			continue;
+
+		/* If VF is reset in another thread just continue */
+		if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+			continue;
+
+		i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
+	}
+
+	/* Hw may need up to 50ms to finish disabling the RX queues. We
+	 * minimize the wait by delaying only once for all VFs.
+	 */
+	mdelay(50);
+
+	/* Finish the reset on each VF */
+	for (v = 0; v < pf->num_alloc_vfs; v++) {
+		/* If VF is reset in another thread just continue */
+		if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
+			continue;
+
+		i40e_cleanup_reset_vf(&pf->vf[v]);
+	}
+
+	i40e_flush(hw);
+	usleep_range(20000, 40000);
+	clear_bit(__I40E_VF_DISABLE, pf->state);
+
+	return true;
+}
+
+/**
+ * i40e_free_vfs
+ * @pf: pointer to the PF structure
+ *
+ * free VF resources
+ **/
+void i40e_free_vfs(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg_idx, bit_idx;
+	int i, tmp, vf_id;
+
+	if (!pf->vf)
+		return;
+
+	set_bit(__I40E_VFS_RELEASING, pf->state);
+	while (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
+		usleep_range(1000, 2000);
+
+	i40e_notify_client_of_vf_enable(pf, 0);
+
+	/* Disable IOV before freeing resources. This lets any VF drivers
+	 * running in the host get themselves cleaned up before we yank
+	 * the carpet out from underneath their feet.
+	 */
+	if (!pci_vfs_assigned(pf->pdev))
+		pci_disable_sriov(pf->pdev);
+	else
+		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+
+	/* Amortize wait time by stopping all VFs at the same time */
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
+		if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states))
+			continue;
+
+		i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[i].lan_vsi_idx]);
+	}
+
+	for (i = 0; i < pf->num_alloc_vfs; i++) {
+		if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states))
+			continue;
+
+		i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[i].lan_vsi_idx]);
+	}
+
+	/* free up VF resources */
+	tmp = pf->num_alloc_vfs;
+	pf->num_alloc_vfs = 0;
+	for (i = 0; i < tmp; i++) {
+		if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states))
+			i40e_free_vf_res(&pf->vf[i]);
+		/* disable qp mappings */
+		i40e_disable_vf_mappings(&pf->vf[i]);
+	}
+
+	kfree(pf->vf);
+	pf->vf = NULL;
+
+	/* This check is for when the driver is unloaded while VFs are
+	 * assigned. Setting the number of VFs to 0 through sysfs is caught
+	 * before this function ever gets called.
+	 */
+	if (!pci_vfs_assigned(pf->pdev)) {
+		/* Acknowledge VFLR for all VFS. Without this, VFs will fail to
+		 * work correctly when SR-IOV gets re-enabled.
+		 */
+		for (vf_id = 0; vf_id < tmp; vf_id++) {
+			reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
+			bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+			wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+		}
+	}
+	clear_bit(__I40E_VF_DISABLE, pf->state);
+	clear_bit(__I40E_VFS_RELEASING, pf->state);
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * i40e_alloc_vfs
+ * @pf: pointer to the PF structure
+ * @num_alloc_vfs: number of VFs to allocate
+ *
+ * allocate VF resources
+ **/
+int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
+{
+	struct i40e_vf *vfs;
+	int i, ret = 0;
+
+	/* Disable interrupt 0 so we don't try to handle the VFLR. */
+	i40e_irq_dynamic_disable_icr0(pf);
+
+	/* Check to see if we're just allocating resources for extant VFs */
+	if (pci_num_vf(pf->pdev) != num_alloc_vfs) {
+		ret = pci_enable_sriov(pf->pdev, num_alloc_vfs);
+		if (ret) {
+			pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+			pf->num_alloc_vfs = 0;
+			goto err_iov;
+		}
+	}
+	/* allocate memory */
+	vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
+	if (!vfs) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+	pf->vf = vfs;
+
+	/* apply default profile */
+	for (i = 0; i < num_alloc_vfs; i++) {
+		vfs[i].pf = pf;
+		vfs[i].parent_type = I40E_SWITCH_ELEMENT_TYPE_VEB;
+		vfs[i].vf_id = i;
+
+		/* assign default capabilities */
+		set_bit(I40E_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
+		vfs[i].spoofchk = true;
+
+		set_bit(I40E_VF_STATE_PRE_ENABLE, &vfs[i].vf_states);
+
+	}
+	pf->num_alloc_vfs = num_alloc_vfs;
+
+	/* VF resources get allocated during reset */
+	i40e_reset_all_vfs(pf, false);
+
+	i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
+
+err_alloc:
+	if (ret)
+		i40e_free_vfs(pf);
+err_iov:
+	/* Re-enable interrupt 0. */
+	i40e_irq_dynamic_enable_icr0(pf);
+	return ret;
+}
+
+#endif
+/**
+ * i40e_pci_sriov_enable
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of VFs to allocate
+ *
+ * Enable or change the number of VFs
+ **/
+static int i40e_pci_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+#ifdef CONFIG_PCI_IOV
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	int pre_existing_vfs = pci_num_vf(pdev);
+	int err = 0;
+
+	if (test_bit(__I40E_TESTING, pf->state)) {
+		dev_warn(&pdev->dev,
+			 "Cannot enable SR-IOV virtual functions while the device is undergoing diagnostic testing\n");
+		err = -EPERM;
+		goto err_out;
+	}
+
+	if (pre_existing_vfs && pre_existing_vfs != num_vfs)
+		i40e_free_vfs(pf);
+	else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
+		goto out;
+
+	if (num_vfs > pf->num_req_vfs) {
+		dev_warn(&pdev->dev, "Unable to enable %d VFs. Limited to %d VFs due to device resource constraints.\n",
+			 num_vfs, pf->num_req_vfs);
+		err = -EPERM;
+		goto err_out;
+	}
+
+	dev_info(&pdev->dev, "Allocating %d VFs.\n", num_vfs);
+	err = i40e_alloc_vfs(pf, num_vfs);
+	if (err) {
+		dev_warn(&pdev->dev, "Failed to enable SR-IOV: %d\n", err);
+		goto err_out;
+	}
+
+out:
+	return num_vfs;
+
+err_out:
+	return err;
+#endif
+	return 0;
+}
+
+/**
+ * i40e_pci_sriov_configure
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of VFs to allocate
+ *
+ * Enable or change the number of VFs. Called when the user updates the number
+ * of VFs in sysfs.
+ **/
+int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct i40e_pf *pf = pci_get_drvdata(pdev);
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	if (num_vfs) {
+		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+			i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
+		}
+		ret = i40e_pci_sriov_enable(pdev, num_vfs);
+		goto sriov_configure_out;
+	}
+
+	if (!pci_vfs_assigned(pf->pdev)) {
+		i40e_free_vfs(pf);
+		pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+		i40e_do_reset_safe(pf, I40E_PF_RESET_AND_REBUILD_FLAG);
+	} else {
+		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
+		ret = -EINVAL;
+		goto sriov_configure_out;
+	}
+sriov_configure_out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/***********************virtual channel routines******************/
+
+/**
+ * i40e_vc_send_msg_to_vf
+ * @vf: pointer to the VF info
+ * @v_opcode: virtual channel opcode
+ * @v_retval: virtual channel return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send msg to VF
+ **/
+static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
+				  u32 v_retval, u8 *msg, u16 msglen)
+{
+	struct i40e_pf *pf;
+	struct i40e_hw *hw;
+	int abs_vf_id;
+	int aq_ret;
+
+	/* validate the request */
+	if (!vf || vf->vf_id >= vf->pf->num_alloc_vfs)
+		return -EINVAL;
+
+	pf = vf->pf;
+	hw = &pf->hw;
+	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id,	v_opcode, v_retval,
+					msg, msglen, NULL);
+	if (aq_ret) {
+		dev_info(&pf->pdev->dev,
+			 "Unable to send the message to VF %d aq_err %d\n",
+			 vf->vf_id, pf->hw.aq.asq_last_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vc_send_resp_to_vf
+ * @vf: pointer to the VF info
+ * @opcode: operation code
+ * @retval: return value
+ *
+ * send resp msg to VF
+ **/
+static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf,
+				   enum virtchnl_ops opcode,
+				   int retval)
+{
+	return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0);
+}
+
+/**
+ * i40e_sync_vf_state
+ * @vf: pointer to the VF info
+ * @state: VF state
+ *
+ * Called from a VF message to synchronize the service with a potential
+ * VF reset state
+ **/
+static bool i40e_sync_vf_state(struct i40e_vf *vf, enum i40e_vf_states state)
+{
+	int i;
+
+	/* When handling some messages, it needs VF state to be set.
+	 * It is possible that this flag is cleared during VF reset,
+	 * so there is a need to wait until the end of the reset to
+	 * handle the request message correctly.
+	 */
+	for (i = 0; i < I40E_VF_STATE_WAIT_COUNT; i++) {
+		if (test_bit(state, &vf->vf_states))
+			return true;
+		usleep_range(10000, 20000);
+	}
+
+	return test_bit(state, &vf->vf_states);
+}
+
+/**
+ * i40e_vc_get_version_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request the API version used by the PF
+ **/
+static int i40e_vc_get_version_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_version_info info = {
+		VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR
+	};
+
+	vf->vf_ver = *(struct virtchnl_version_info *)msg;
+	/* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
+	if (VF_IS_V10(&vf->vf_ver))
+		info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
+				      0, (u8 *)&info,
+				      sizeof(struct virtchnl_version_info));
+}
+
+/**
+ * i40e_del_qch - delete all the additional VSIs created as a part of ADq
+ * @vf: pointer to VF structure
+ **/
+static void i40e_del_qch(struct i40e_vf *vf)
+{
+	struct i40e_pf *pf = vf->pf;
+	int i;
+
+	/* first element in the array belongs to primary VF VSI and we shouldn't
+	 * delete it. We should however delete the rest of the VSIs created
+	 */
+	for (i = 1; i < vf->num_tc; i++) {
+		if (vf->ch[i].vsi_idx) {
+			i40e_vsi_release(pf->vsi[vf->ch[i].vsi_idx]);
+			vf->ch[i].vsi_idx = 0;
+			vf->ch[i].vsi_id = 0;
+		}
+	}
+}
+
+/**
+ * i40e_vc_get_max_frame_size
+ * @vf: pointer to the VF
+ *
+ * Max frame size is determined based on the current port's max frame size and
+ * whether a port VLAN is configured on this VF. The VF is not aware whether
+ * it's in a port VLAN so the PF needs to account for this in max frame size
+ * checks and sending the max frame size to the VF.
+ **/
+static u16 i40e_vc_get_max_frame_size(struct i40e_vf *vf)
+{
+	u16 max_frame_size = vf->pf->hw.phy.link_info.max_frame_size;
+
+	if (vf->port_vlan_id)
+		max_frame_size -= VLAN_HLEN;
+
+	return max_frame_size;
+}
+
+/**
+ * i40e_vc_get_vf_resources_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request its resources
+ **/
+static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_vf_resource *vfres = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi;
+	int num_vsis = 1;
+	int aq_ret = 0;
+	size_t len = 0;
+	int ret;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	len = virtchnl_struct_size(vfres, vsi_res, num_vsis);
+	vfres = kzalloc(len, GFP_KERNEL);
+	if (!vfres) {
+		aq_ret = -ENOMEM;
+		len = 0;
+		goto err;
+	}
+	if (VF_IS_V11(&vf->vf_ver))
+		vf->driver_caps = *(u32 *)msg;
+	else
+		vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
+				  VIRTCHNL_VF_OFFLOAD_RSS_REG |
+				  VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
+	vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi->info.pvid)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	if (i40e_vf_client_capable(pf, vf->vf_id) &&
+	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RDMA)) {
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RDMA;
+		set_bit(I40E_VF_STATE_RDMAENA, &vf->vf_states);
+	} else {
+		clear_bit(I40E_VF_STATE_RDMAENA, &vf->vf_states);
+	}
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
+	} else {
+		if ((pf->hw_features & I40E_HW_RSS_AQ_CAPABLE) &&
+		    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ))
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+		else
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
+	}
+
+	if (pf->hw_features & I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE) {
+		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+			vfres->vf_cap_flags |=
+				VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+	}
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
+
+	if ((pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE) &&
+	    (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING) {
+		if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+			dev_err(&pf->pdev->dev,
+				"VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n",
+				 vf->vf_id);
+			aq_ret = -EINVAL;
+			goto err;
+		}
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+	}
+
+	if (pf->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) {
+		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+			vfres->vf_cap_flags |=
+					VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+	}
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADQ;
+
+	vfres->num_vsis = num_vsis;
+	vfres->num_queue_pairs = vf->num_queue_pairs;
+	vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
+	vfres->rss_key_size = I40E_HKEY_ARRAY_SIZE;
+	vfres->rss_lut_size = I40E_VF_HLUT_ARRAY_SIZE;
+	vfres->max_mtu = i40e_vc_get_max_frame_size(vf);
+
+	if (vf->lan_vsi_idx) {
+		vfres->vsi_res[0].vsi_id = vf->lan_vsi_id;
+		vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
+		vfres->vsi_res[0].num_queue_pairs = vsi->alloc_queue_pairs;
+		/* VFs only use TC 0 */
+		vfres->vsi_res[0].qset_handle
+					  = le16_to_cpu(vsi->info.qs_handle[0]);
+		if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) && !vf->pf_set_mac) {
+			i40e_del_mac_filter(vsi, vf->default_lan_addr.addr);
+			eth_zero_addr(vf->default_lan_addr.addr);
+		}
+		ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
+				vf->default_lan_addr.addr);
+	}
+	set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
+
+err:
+	/* send the response back to the VF */
+	ret = i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES,
+				     aq_ret, (u8 *)vfres, len);
+
+	kfree(vfres);
+	return ret;
+}
+
+/**
+ * i40e_vc_config_promiscuous_mode_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the promiscuous mode of
+ * VF vsis
+ **/
+static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_promisc_info *info =
+	    (struct virtchnl_promisc_info *)msg;
+	struct i40e_pf *pf = vf->pf;
+	bool allmulti = false;
+	bool alluni = false;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err_out;
+	}
+	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+		dev_err(&pf->pdev->dev,
+			"Unprivileged VF %d is attempting to configure promiscuous mode\n",
+			vf->vf_id);
+
+		/* Lie to the VF on purpose, because this is an error we can
+		 * ignore. Unprivileged VF is not a virtual channel error.
+		 */
+		aq_ret = 0;
+		goto err_out;
+	}
+
+	if (info->flags > I40E_MAX_VF_PROMISC_FLAGS) {
+		aq_ret = -EINVAL;
+		goto err_out;
+	}
+
+	if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+		aq_ret = -EINVAL;
+		goto err_out;
+	}
+
+	/* Multicast promiscuous handling*/
+	if (info->flags & FLAG_VF_MULTICAST_PROMISC)
+		allmulti = true;
+
+	if (info->flags & FLAG_VF_UNICAST_PROMISC)
+		alluni = true;
+	aq_ret = i40e_config_vf_promiscuous_mode(vf, info->vsi_id, allmulti,
+						 alluni);
+	if (aq_ret)
+		goto err_out;
+
+	if (allmulti) {
+		if (!test_and_set_bit(I40E_VF_STATE_MC_PROMISC,
+				      &vf->vf_states))
+			dev_info(&pf->pdev->dev,
+				 "VF %d successfully set multicast promiscuous mode\n",
+				 vf->vf_id);
+	} else if (test_and_clear_bit(I40E_VF_STATE_MC_PROMISC,
+				      &vf->vf_states))
+		dev_info(&pf->pdev->dev,
+			 "VF %d successfully unset multicast promiscuous mode\n",
+			 vf->vf_id);
+
+	if (alluni) {
+		if (!test_and_set_bit(I40E_VF_STATE_UC_PROMISC,
+				      &vf->vf_states))
+			dev_info(&pf->pdev->dev,
+				 "VF %d successfully set unicast promiscuous mode\n",
+				 vf->vf_id);
+	} else if (test_and_clear_bit(I40E_VF_STATE_UC_PROMISC,
+				      &vf->vf_states))
+		dev_info(&pf->pdev->dev,
+			 "VF %d successfully unset unicast promiscuous mode\n",
+			 vf->vf_id);
+
+err_out:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf,
+				       VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_config_queues_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the rx/tx
+ * queues
+ **/
+static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_vsi_queue_config_info *qci =
+	    (struct virtchnl_vsi_queue_config_info *)msg;
+	struct virtchnl_queue_pair_info *qpi;
+	u16 vsi_id, vsi_queue_id = 0;
+	struct i40e_pf *pf = vf->pf;
+	int i, j = 0, idx = 0;
+	struct i40e_vsi *vsi;
+	u16 num_qps_all = 0;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (!i40e_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (qci->num_queue_pairs > I40E_MAX_VF_QUEUES) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (vf->adq_enabled) {
+		for (i = 0; i < vf->num_tc; i++)
+			num_qps_all += vf->ch[i].num_qps;
+		if (num_qps_all != qci->num_queue_pairs) {
+			aq_ret = -EINVAL;
+			goto error_param;
+		}
+	}
+
+	vsi_id = qci->vsi_id;
+
+	for (i = 0; i < qci->num_queue_pairs; i++) {
+		qpi = &qci->qpair[i];
+
+		if (!vf->adq_enabled) {
+			if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
+						      qpi->txq.queue_id)) {
+				aq_ret = -EINVAL;
+				goto error_param;
+			}
+
+			vsi_queue_id = qpi->txq.queue_id;
+
+			if (qpi->txq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.vsi_id != qci->vsi_id ||
+			    qpi->rxq.queue_id != vsi_queue_id) {
+				aq_ret = -EINVAL;
+				goto error_param;
+			}
+		}
+
+		if (vf->adq_enabled) {
+			if (idx >= ARRAY_SIZE(vf->ch)) {
+				aq_ret = -ENODEV;
+				goto error_param;
+			}
+			vsi_id = vf->ch[idx].vsi_id;
+		}
+
+		if (i40e_config_vsi_rx_queue(vf, vsi_id, vsi_queue_id,
+					     &qpi->rxq) ||
+		    i40e_config_vsi_tx_queue(vf, vsi_id, vsi_queue_id,
+					     &qpi->txq)) {
+			aq_ret = -EINVAL;
+			goto error_param;
+		}
+
+		/* For ADq there can be up to 4 VSIs with max 4 queues each.
+		 * VF does not know about these additional VSIs and all
+		 * it cares is about its own queues. PF configures these queues
+		 * to its appropriate VSIs based on TC mapping
+		 */
+		if (vf->adq_enabled) {
+			if (idx >= ARRAY_SIZE(vf->ch)) {
+				aq_ret = -ENODEV;
+				goto error_param;
+			}
+			if (j == (vf->ch[idx].num_qps - 1)) {
+				idx++;
+				j = 0; /* resetting the queue count */
+				vsi_queue_id = 0;
+			} else {
+				j++;
+				vsi_queue_id++;
+			}
+		}
+	}
+	/* set vsi num_queue_pairs in use to num configured by VF */
+	if (!vf->adq_enabled) {
+		pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
+			qci->num_queue_pairs;
+	} else {
+		for (i = 0; i < vf->num_tc; i++) {
+			vsi = pf->vsi[vf->ch[i].vsi_idx];
+			vsi->num_queue_pairs = vf->ch[i].num_qps;
+
+			if (i40e_update_adq_vsi_queues(vsi, i)) {
+				aq_ret = -EIO;
+				goto error_param;
+			}
+		}
+	}
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+				       aq_ret);
+}
+
+/**
+ * i40e_validate_queue_map - check queue map is valid
+ * @vf: the VF structure pointer
+ * @vsi_id: vsi id
+ * @queuemap: Tx or Rx queue map
+ *
+ * check if Tx or Rx queue map is valid
+ **/
+static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id,
+				   unsigned long queuemap)
+{
+	u16 vsi_queue_id, queue_id;
+
+	for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) {
+		if (vf->adq_enabled) {
+			vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id;
+			queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF);
+		} else {
+			queue_id = vsi_queue_id;
+		}
+
+		if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_vc_config_irq_map_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the irq to
+ * queue map
+ **/
+static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_irq_map_info *irqmap_info =
+	    (struct virtchnl_irq_map_info *)msg;
+	struct virtchnl_vector_map *map;
+	int aq_ret = 0;
+	u16 vsi_id;
+	int i;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (irqmap_info->num_vectors >
+	    vf->pf->hw.func_caps.num_msix_vectors_vf) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	for (i = 0; i < irqmap_info->num_vectors; i++) {
+		map = &irqmap_info->vecmap[i];
+		/* validate msg params */
+		if (!i40e_vc_isvalid_vector_id(vf, map->vector_id) ||
+		    !i40e_vc_isvalid_vsi_id(vf, map->vsi_id)) {
+			aq_ret = -EINVAL;
+			goto error_param;
+		}
+		vsi_id = map->vsi_id;
+
+		if (i40e_validate_queue_map(vf, vsi_id, map->rxq_map)) {
+			aq_ret = -EINVAL;
+			goto error_param;
+		}
+
+		if (i40e_validate_queue_map(vf, vsi_id, map->txq_map)) {
+			aq_ret = -EINVAL;
+			goto error_param;
+		}
+
+		i40e_config_irq_link_list(vf, vsi_id, map);
+	}
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP,
+				       aq_ret);
+}
+
+/**
+ * i40e_ctrl_vf_tx_rings
+ * @vsi: the SRIOV VSI being configured
+ * @q_map: bit map of the queues to be enabled
+ * @enable: start or stop the queue
+ **/
+static int i40e_ctrl_vf_tx_rings(struct i40e_vsi *vsi, unsigned long q_map,
+				 bool enable)
+{
+	struct i40e_pf *pf = vsi->back;
+	int ret = 0;
+	u16 q_id;
+
+	for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) {
+		ret = i40e_control_wait_tx_q(vsi->seid, pf,
+					     vsi->base_queue + q_id,
+					     false /*is xdp*/, enable);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/**
+ * i40e_ctrl_vf_rx_rings
+ * @vsi: the SRIOV VSI being configured
+ * @q_map: bit map of the queues to be enabled
+ * @enable: start or stop the queue
+ **/
+static int i40e_ctrl_vf_rx_rings(struct i40e_vsi *vsi, unsigned long q_map,
+				 bool enable)
+{
+	struct i40e_pf *pf = vsi->back;
+	int ret = 0;
+	u16 q_id;
+
+	for_each_set_bit(q_id, &q_map, I40E_MAX_VF_QUEUES) {
+		ret = i40e_control_wait_rx_q(pf, vsi->base_queue + q_id,
+					     enable);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/**
+ * i40e_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTHCHNL
+ * @vqs: virtchnl_queue_select structure containing bitmaps to validate
+ *
+ * Returns true if validation was successful, else false.
+ */
+static bool i40e_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
+{
+	if ((!vqs->rx_queues && !vqs->tx_queues) ||
+	    vqs->rx_queues >= BIT(I40E_MAX_VF_QUEUES) ||
+	    vqs->tx_queues >= BIT(I40E_MAX_VF_QUEUES))
+		return false;
+
+	return true;
+}
+
+/**
+ * i40e_vc_enable_queues_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to enable all or specific queue(s)
+ **/
+static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct i40e_pf *pf = vf->pf;
+	int aq_ret = 0;
+	int i;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (!i40e_vc_validate_vqs_bitmaps(vqs)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	/* Use the queue bit map sent by the VF */
+	if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues,
+				  true)) {
+		aq_ret = -EIO;
+		goto error_param;
+	}
+	if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues,
+				  true)) {
+		aq_ret = -EIO;
+		goto error_param;
+	}
+
+	/* need to start the rings for additional ADq VSI's as well */
+	if (vf->adq_enabled) {
+		/* zero belongs to LAN VSI */
+		for (i = 1; i < vf->num_tc; i++) {
+			if (i40e_vsi_start_rings(pf->vsi[vf->ch[i].vsi_idx]))
+				aq_ret = -EIO;
+		}
+	}
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_disable_queues_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to disable all or specific
+ * queue(s)
+ **/
+static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct i40e_pf *pf = vf->pf;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (!i40e_vc_validate_vqs_bitmaps(vqs)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	/* Use the queue bit map sent by the VF */
+	if (i40e_ctrl_vf_tx_rings(pf->vsi[vf->lan_vsi_idx], vqs->tx_queues,
+				  false)) {
+		aq_ret = -EIO;
+		goto error_param;
+	}
+	if (i40e_ctrl_vf_rx_rings(pf->vsi[vf->lan_vsi_idx], vqs->rx_queues,
+				  false)) {
+		aq_ret = -EIO;
+		goto error_param;
+	}
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES,
+				       aq_ret);
+}
+
+/**
+ * i40e_check_enough_queue - find big enough queue number
+ * @vf: pointer to the VF info
+ * @needed: the number of items needed
+ *
+ * Returns the base item index of the queue, or negative for error
+ **/
+static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed)
+{
+	unsigned int  i, cur_queues, more, pool_size;
+	struct i40e_lump_tracking *pile;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	cur_queues = vsi->alloc_queue_pairs;
+
+	/* if current allocated queues are enough for need */
+	if (cur_queues >= needed)
+		return vsi->base_queue;
+
+	pile = pf->qp_pile;
+	if (cur_queues > 0) {
+		/* if the allocated queues are not zero
+		 * just check if there are enough queues for more
+		 * behind the allocated queues.
+		 */
+		more = needed - cur_queues;
+		for (i = vsi->base_queue + cur_queues;
+			i < pile->num_entries; i++) {
+			if (pile->list[i] & I40E_PILE_VALID_BIT)
+				break;
+
+			if (more-- == 1)
+				/* there is enough */
+				return vsi->base_queue;
+		}
+	}
+
+	pool_size = 0;
+	for (i = 0; i < pile->num_entries; i++) {
+		if (pile->list[i] & I40E_PILE_VALID_BIT) {
+			pool_size = 0;
+			continue;
+		}
+		if (needed <= ++pool_size)
+			/* there is enough */
+			return i;
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * i40e_vc_request_queues_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number.  If the request is successful, PF will reset the VF and
+ * return 0.  If unsuccessful, PF will send message informing VF of number of
+ * available queues and return result of sending VF a message.
+ **/
+static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_vf_res_request *vfres =
+		(struct virtchnl_vf_res_request *)msg;
+	u16 req_pairs = vfres->num_queue_pairs;
+	u8 cur_pairs = vf->num_queue_pairs;
+	struct i40e_pf *pf = vf->pf;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE))
+		return -EINVAL;
+
+	if (req_pairs > I40E_MAX_VF_QUEUES) {
+		dev_err(&pf->pdev->dev,
+			"VF %d tried to request more than %d queues.\n",
+			vf->vf_id,
+			I40E_MAX_VF_QUEUES);
+		vfres->num_queue_pairs = I40E_MAX_VF_QUEUES;
+	} else if (req_pairs - cur_pairs > pf->queues_left) {
+		dev_warn(&pf->pdev->dev,
+			 "VF %d requested %d more queues, but only %d left.\n",
+			 vf->vf_id,
+			 req_pairs - cur_pairs,
+			 pf->queues_left);
+		vfres->num_queue_pairs = pf->queues_left + cur_pairs;
+	} else if (i40e_check_enough_queue(vf, req_pairs) < 0) {
+		dev_warn(&pf->pdev->dev,
+			 "VF %d requested %d more queues, but there is not enough for it.\n",
+			 vf->vf_id,
+			 req_pairs - cur_pairs);
+		vfres->num_queue_pairs = cur_pairs;
+	} else {
+		/* successful request */
+		vf->num_req_queues = req_pairs;
+		i40e_vc_reset_vf(vf, true);
+		return 0;
+	}
+
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0,
+				      (u8 *)vfres, sizeof(*vfres));
+}
+
+/**
+ * i40e_vc_get_stats_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to get vsi stats
+ **/
+static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_eth_stats stats;
+	int aq_ret = 0;
+	struct i40e_vsi *vsi;
+
+	memset(&stats, 0, sizeof(struct i40e_eth_stats));
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+	i40e_update_eth_stats(vsi);
+	stats = vsi->eth_stats;
+
+error_param:
+	/* send the response back to the VF */
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, aq_ret,
+				      (u8 *)&stats, sizeof(stats));
+}
+
+#define I40E_MAX_MACVLAN_PER_HW 3072
+#define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW /	\
+	(num_ports))
+/* If the VF is not trusted restrict the number of MAC/VLAN it can program
+ * MAC filters: 16 for multicast, 1 for MAC, 1 for broadcast
+ */
+#define I40E_VC_MAX_MAC_ADDR_PER_VF (16 + 1 + 1)
+#define I40E_VC_MAX_VLAN_PER_VF 16
+
+#define I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(vf_num, num_ports)		\
+({	typeof(vf_num) vf_num_ = (vf_num);				\
+	typeof(num_ports) num_ports_ = (num_ports);			\
+	((I40E_MAX_MACVLAN_PER_PF(num_ports_) - vf_num_ *		\
+	I40E_VC_MAX_MAC_ADDR_PER_VF) / vf_num_) +			\
+	I40E_VC_MAX_MAC_ADDR_PER_VF; })
+/**
+ * i40e_check_vf_permission
+ * @vf: pointer to the VF info
+ * @al: MAC address list from virtchnl
+ *
+ * Check that the given list of MAC addresses is allowed. Will return -EPERM
+ * if any address in the list is not valid. Checks the following conditions:
+ *
+ * 1) broadcast and zero addresses are never valid
+ * 2) unicast addresses are not allowed if the VMM has administratively set
+ *    the VF MAC address, unless the VF is marked as privileged.
+ * 3) There is enough space to add all the addresses.
+ *
+ * Note that to guarantee consistency, it is expected this function be called
+ * while holding the mac_filter_hash_lock, as otherwise the current number of
+ * addresses might not be accurate.
+ **/
+static inline int i40e_check_vf_permission(struct i40e_vf *vf,
+					   struct virtchnl_ether_addr_list *al)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
+	struct i40e_hw *hw = &pf->hw;
+	int mac2add_cnt = 0;
+	int i;
+
+	for (i = 0; i < al->num_elements; i++) {
+		struct i40e_mac_filter *f;
+		u8 *addr = al->list[i].addr;
+
+		if (is_broadcast_ether_addr(addr) ||
+		    is_zero_ether_addr(addr)) {
+			dev_err(&pf->pdev->dev, "invalid VF MAC addr %pM\n",
+				addr);
+			return -EINVAL;
+		}
+
+		/* If the host VMM administrator has set the VF MAC address
+		 * administratively via the ndo_set_vf_mac command then deny
+		 * permission to the VF to add or delete unicast MAC addresses.
+		 * Unless the VF is privileged and then it can do whatever.
+		 * The VF may request to set the MAC address filter already
+		 * assigned to it so do not return an error in that case.
+		 */
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) &&
+		    !is_multicast_ether_addr(addr) && vf->pf_set_mac &&
+		    !ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+			dev_err(&pf->pdev->dev,
+				"VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+			return -EPERM;
+		}
+
+		/*count filters that really will be added*/
+		f = i40e_find_mac(vsi, addr);
+		if (!f)
+			++mac2add_cnt;
+	}
+
+	/* If this VF is not privileged, then we can't add more than a limited
+	 * number of addresses. Check to make sure that the additions do not
+	 * push us over the limit.
+	 */
+	if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+		if ((i40e_count_filters(vsi) + mac2add_cnt) >
+		    I40E_VC_MAX_MAC_ADDR_PER_VF) {
+			dev_err(&pf->pdev->dev,
+				"Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n");
+			return -EPERM;
+		}
+	/* If this VF is trusted, it can use more resources than untrusted.
+	 * However to ensure that every trusted VF has appropriate number of
+	 * resources, divide whole pool of resources per port and then across
+	 * all VFs.
+	 */
+	} else {
+		if ((i40e_count_filters(vsi) + mac2add_cnt) >
+		    I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs,
+						       hw->num_ports)) {
+			dev_err(&pf->pdev->dev,
+				"Cannot add more MAC addresses, trusted VF exhausted it's resources\n");
+			return -EPERM;
+		}
+	}
+	return 0;
+}
+
+/**
+ * i40e_vc_ether_addr_type - get type of virtchnl_ether_addr
+ * @vc_ether_addr: used to extract the type
+ **/
+static u8
+i40e_vc_ether_addr_type(struct virtchnl_ether_addr *vc_ether_addr)
+{
+	return vc_ether_addr->type & VIRTCHNL_ETHER_ADDR_TYPE_MASK;
+}
+
+/**
+ * i40e_is_vc_addr_legacy
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ *
+ * check if the MAC address is from an older VF
+ **/
+static bool
+i40e_is_vc_addr_legacy(struct virtchnl_ether_addr *vc_ether_addr)
+{
+	return i40e_vc_ether_addr_type(vc_ether_addr) ==
+		VIRTCHNL_ETHER_ADDR_LEGACY;
+}
+
+/**
+ * i40e_is_vc_addr_primary
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ *
+ * check if the MAC address is the VF's primary MAC
+ * This function should only be called when the MAC address in
+ * virtchnl_ether_addr is a valid unicast MAC
+ **/
+static bool
+i40e_is_vc_addr_primary(struct virtchnl_ether_addr *vc_ether_addr)
+{
+	return i40e_vc_ether_addr_type(vc_ether_addr) ==
+		VIRTCHNL_ETHER_ADDR_PRIMARY;
+}
+
+/**
+ * i40e_update_vf_mac_addr
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to add
+ *
+ * update the VF's cached hardware MAC if allowed
+ **/
+static void
+i40e_update_vf_mac_addr(struct i40e_vf *vf,
+			struct virtchnl_ether_addr *vc_ether_addr)
+{
+	u8 *mac_addr = vc_ether_addr->addr;
+
+	if (!is_valid_ether_addr(mac_addr))
+		return;
+
+	/* If request to add MAC filter is a primary request update its default
+	 * MAC address with the requested one. If it is a legacy request then
+	 * check if current default is empty if so update the default MAC
+	 */
+	if (i40e_is_vc_addr_primary(vc_ether_addr)) {
+		ether_addr_copy(vf->default_lan_addr.addr, mac_addr);
+	} else if (i40e_is_vc_addr_legacy(vc_ether_addr)) {
+		if (is_zero_ether_addr(vf->default_lan_addr.addr))
+			ether_addr_copy(vf->default_lan_addr.addr, mac_addr);
+	}
+}
+
+/**
+ * i40e_vc_add_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * add guest mac address filter
+ **/
+static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int ret = 0;
+	int i;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+	    !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+		ret = -EINVAL;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	/* Lock once, because all function inside for loop accesses VSI's
+	 * MAC filter list which needs to be protected using same lock.
+	 */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+	ret = i40e_check_vf_permission(vf, al);
+	if (ret) {
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+		goto error_param;
+	}
+
+	/* add new addresses to the list */
+	for (i = 0; i < al->num_elements; i++) {
+		struct i40e_mac_filter *f;
+
+		f = i40e_find_mac(vsi, al->list[i].addr);
+		if (!f) {
+			f = i40e_add_mac_filter(vsi, al->list[i].addr);
+
+			if (!f) {
+				dev_err(&pf->pdev->dev,
+					"Unable to add MAC filter %pM for VF %d\n",
+					al->list[i].addr, vf->vf_id);
+				ret = -EINVAL;
+				spin_unlock_bh(&vsi->mac_filter_hash_lock);
+				goto error_param;
+			}
+		}
+		i40e_update_vf_mac_addr(vf, &al->list[i]);
+	}
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* program the updated filter list */
+	ret = i40e_sync_vsi_filters(vsi);
+	if (ret)
+		dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n",
+			vf->vf_id, ret);
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
+				      ret, NULL, 0);
+}
+
+/**
+ * i40e_vc_del_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove guest mac address filter
+ **/
+static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
+	bool was_unimac_deleted = false;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int ret = 0;
+	int i;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+	    !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+		ret = -EINVAL;
+		goto error_param;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		if (is_broadcast_ether_addr(al->list[i].addr) ||
+		    is_zero_ether_addr(al->list[i].addr)) {
+			dev_err(&pf->pdev->dev, "Invalid MAC addr %pM for VF %d\n",
+				al->list[i].addr, vf->vf_id);
+			ret = -EINVAL;
+			goto error_param;
+		}
+		if (ether_addr_equal(al->list[i].addr, vf->default_lan_addr.addr))
+			was_unimac_deleted = true;
+	}
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	/* delete addresses from the list */
+	for (i = 0; i < al->num_elements; i++)
+		if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
+			ret = -EINVAL;
+			spin_unlock_bh(&vsi->mac_filter_hash_lock);
+			goto error_param;
+		}
+
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	if (was_unimac_deleted)
+		eth_zero_addr(vf->default_lan_addr.addr);
+
+	/* program the updated filter list */
+	ret = i40e_sync_vsi_filters(vsi);
+	if (ret)
+		dev_err(&pf->pdev->dev, "Unable to program VF %d MAC filters, error %d\n",
+			vf->vf_id, ret);
+
+	if (vf->trusted && was_unimac_deleted) {
+		struct i40e_mac_filter *f;
+		struct hlist_node *h;
+		u8 *macaddr = NULL;
+		int bkt;
+
+		/* set last unicast mac address as default */
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+			if (is_valid_ether_addr(f->macaddr))
+				macaddr = f->macaddr;
+		}
+		if (macaddr)
+			ether_addr_copy(vf->default_lan_addr.addr, macaddr);
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
+	}
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR, ret);
+}
+
+/**
+ * i40e_vc_add_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * program guest vlan id
+ **/
+static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_vlan_filter_list *vfl =
+	    (struct virtchnl_vlan_filter_list *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int aq_ret = 0;
+	int i;
+
+	if ((vf->num_vlan >= I40E_VC_MAX_VLAN_PER_VF) &&
+	    !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+		dev_err(&pf->pdev->dev,
+			"VF is not trusted, switch the VF to trusted to add more VLAN addresses\n");
+		goto error_param;
+	}
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		if (vfl->vlan_id[i] > I40E_MAX_VLANID) {
+			aq_ret = -EINVAL;
+			dev_err(&pf->pdev->dev,
+				"invalid VF VLAN id %d\n", vfl->vlan_id[i]);
+			goto error_param;
+		}
+	}
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (vsi->info.pvid) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	i40e_vlan_stripping_enable(vsi);
+	for (i = 0; i < vfl->num_elements; i++) {
+		/* add new VLAN filter */
+		int ret = i40e_vsi_add_vlan(vsi, vfl->vlan_id[i]);
+		if (!ret)
+			vf->num_vlan++;
+
+		if (test_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states))
+			i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid,
+							   true,
+							   vfl->vlan_id[i],
+							   NULL);
+		if (test_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states))
+			i40e_aq_set_vsi_mc_promisc_on_vlan(&pf->hw, vsi->seid,
+							   true,
+							   vfl->vlan_id[i],
+							   NULL);
+
+		if (ret)
+			dev_err(&pf->pdev->dev,
+				"Unable to add VLAN filter %d for VF %d, error %d\n",
+				vfl->vlan_id[i], vf->vf_id, ret);
+	}
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, aq_ret);
+}
+
+/**
+ * i40e_vc_remove_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove programmed guest vlan id
+ **/
+static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_vlan_filter_list *vfl =
+	    (struct virtchnl_vlan_filter_list *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int aq_ret = 0;
+	int i;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		if (vfl->vlan_id[i] > I40E_MAX_VLANID) {
+			aq_ret = -EINVAL;
+			goto error_param;
+		}
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (vsi->info.pvid) {
+		if (vfl->num_elements > 1 || vfl->vlan_id[0])
+			aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]);
+		vf->num_vlan--;
+
+		if (test_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states))
+			i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid,
+							   false,
+							   vfl->vlan_id[i],
+							   NULL);
+		if (test_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states))
+			i40e_aq_set_vsi_mc_promisc_on_vlan(&pf->hw, vsi->seid,
+							   false,
+							   vfl->vlan_id[i],
+							   NULL);
+	}
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, aq_ret);
+}
+
+/**
+ * i40e_vc_rdma_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_rdma_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+	struct i40e_pf *pf = vf->pf;
+	int abs_vf_id = vf->vf_id + pf->hw.func_caps.vf_base_id;
+	int aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
+	    !test_bit(I40E_VF_STATE_RDMAENA, &vf->vf_states)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	i40e_notify_client_of_vf_msg(pf->vsi[pf->lan_vsi], abs_vf_id,
+				     msg, msglen);
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_RDMA,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_rdma_qvmap_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @config: config qvmap or release it
+ *
+ * called from the VF for the iwarp msgs
+ **/
+static int i40e_vc_rdma_qvmap_msg(struct i40e_vf *vf, u8 *msg, bool config)
+{
+	struct virtchnl_rdma_qvlist_info *qvlist_info =
+				(struct virtchnl_rdma_qvlist_info *)msg;
+	int aq_ret = 0;
+
+	if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) ||
+	    !test_bit(I40E_VF_STATE_RDMAENA, &vf->vf_states)) {
+		aq_ret = -EINVAL;
+		goto error_param;
+	}
+
+	if (config) {
+		if (i40e_config_rdma_qvlist(vf, qvlist_info))
+			aq_ret = -EINVAL;
+	} else {
+		i40e_release_rdma_qvlist(vf);
+	}
+
+error_param:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf,
+			       config ? VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP :
+			       VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP,
+			       aq_ret);
+}
+
+/**
+ * i40e_vc_config_rss_key
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS key
+ **/
+static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_key *vrk =
+		(struct virtchnl_rss_key *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) ||
+	    vrk->key_len != I40E_HKEY_ARRAY_SIZE) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	aq_ret = i40e_config_rss(vsi, vrk->key, NULL, 0);
+err:
+	/* send the response to the VF */
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_config_rss_lut
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS LUT
+ **/
+static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_lut *vrl =
+		(struct virtchnl_rss_lut *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int aq_ret = 0;
+	u16 i;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) ||
+	    !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) ||
+	    vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	for (i = 0; i < vrl->lut_entries; i++)
+		if (vrl->lut[i] >= vf->num_queue_pairs) {
+			aq_ret = -EINVAL;
+			goto err;
+		}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	aq_ret = i40e_config_rss(vsi, NULL, vrl->lut, I40E_VF_HLUT_ARRAY_SIZE);
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_get_rss_hena
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return the RSS HENA bits allowed by the hardware
+ **/
+static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_hena *vrh = NULL;
+	struct i40e_pf *pf = vf->pf;
+	int aq_ret = 0;
+	int len = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+	len = sizeof(struct virtchnl_rss_hena);
+
+	vrh = kzalloc(len, GFP_KERNEL);
+	if (!vrh) {
+		aq_ret = -ENOMEM;
+		len = 0;
+		goto err;
+	}
+	vrh->hena = i40e_pf_get_default_rss_hena(pf);
+err:
+	/* send the response back to the VF */
+	aq_ret = i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HENA_CAPS,
+					aq_ret, (u8 *)vrh, len);
+	kfree(vrh);
+	return aq_ret;
+}
+
+/**
+ * i40e_vc_set_rss_hena
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Set the RSS HENA bits for the VF
+ **/
+static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_hena *vrh =
+		(struct virtchnl_rss_hena *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_hw *hw = &pf->hw;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+	i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(0, vf->vf_id), (u32)vrh->hena);
+	i40e_write_rx_ctl(hw, I40E_VFQF_HENA1(1, vf->vf_id),
+			  (u32)(vrh->hena >> 32));
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA, aq_ret);
+}
+
+/**
+ * i40e_vc_enable_vlan_stripping
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Enable vlan header stripping for the VF
+ **/
+static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
+{
+	struct i40e_vsi *vsi;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	i40e_vlan_stripping_enable(vsi);
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_disable_vlan_stripping
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Disable vlan header stripping for the VF
+ **/
+static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg)
+{
+	struct i40e_vsi *vsi;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	i40e_vlan_stripping_disable(vsi);
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+				       aq_ret);
+}
+
+/**
+ * i40e_validate_cloud_filter
+ * @vf: pointer to VF structure
+ * @tc_filter: pointer to filter requested
+ *
+ * This function validates cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_validate_cloud_filter(struct i40e_vf *vf,
+				      struct virtchnl_filter *tc_filter)
+{
+	struct virtchnl_l4_spec mask = tc_filter->mask.tcp_spec;
+	struct virtchnl_l4_spec data = tc_filter->data.tcp_spec;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	bool found = false;
+	int bkt;
+
+	if (tc_filter->action != VIRTCHNL_ACTION_TC_REDIRECT) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADQ doesn't support this action (%d)\n",
+			 vf->vf_id, tc_filter->action);
+		goto err;
+	}
+
+	/* action_meta is TC number here to which the filter is applied */
+	if (!tc_filter->action_meta ||
+	    tc_filter->action_meta > vf->num_tc) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n",
+			 vf->vf_id, tc_filter->action_meta);
+		goto err;
+	}
+
+	/* Check filter if it's programmed for advanced mode or basic mode.
+	 * There are two ADq modes (for VF only),
+	 * 1. Basic mode: intended to allow as many filter options as possible
+	 *		  to be added to a VF in Non-trusted mode. Main goal is
+	 *		  to add filters to its own MAC and VLAN id.
+	 * 2. Advanced mode: is for allowing filters to be applied other than
+	 *		  its own MAC or VLAN. This mode requires the VF to be
+	 *		  Trusted.
+	 */
+	if (mask.dst_mac[0] && !mask.dst_ip[0]) {
+		vsi = pf->vsi[vf->lan_vsi_idx];
+		f = i40e_find_mac(vsi, data.dst_mac);
+
+		if (!f) {
+			dev_info(&pf->pdev->dev,
+				 "Destination MAC %pM doesn't belong to VF %d\n",
+				 data.dst_mac, vf->vf_id);
+			goto err;
+		}
+
+		if (mask.vlan_id) {
+			hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
+					   hlist) {
+				if (f->vlan == ntohs(data.vlan_id)) {
+					found = true;
+					break;
+				}
+			}
+			if (!found) {
+				dev_info(&pf->pdev->dev,
+					 "VF %d doesn't have any VLAN id %u\n",
+					 vf->vf_id, ntohs(data.vlan_id));
+				goto err;
+			}
+		}
+	} else {
+		/* Check if VF is trusted */
+		if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+			dev_err(&pf->pdev->dev,
+				"VF %d not trusted, make VF trusted to add advanced mode ADq cloud filters\n",
+				vf->vf_id);
+			return -EIO;
+		}
+	}
+
+	if (mask.dst_mac[0] & data.dst_mac[0]) {
+		if (is_broadcast_ether_addr(data.dst_mac) ||
+		    is_zero_ether_addr(data.dst_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest MAC addr %pM\n",
+				 vf->vf_id, data.dst_mac);
+			goto err;
+		}
+	}
+
+	if (mask.src_mac[0] & data.src_mac[0]) {
+		if (is_broadcast_ether_addr(data.src_mac) ||
+		    is_zero_ether_addr(data.src_mac)) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source MAC addr %pM\n",
+				 vf->vf_id, data.src_mac);
+			goto err;
+		}
+	}
+
+	if (mask.dst_port & data.dst_port) {
+		if (!data.dst_port) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Dest port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (mask.src_port & data.src_port) {
+		if (!data.src_port) {
+			dev_info(&pf->pdev->dev, "VF %d: Invalid Source port\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	if (tc_filter->flow_type != VIRTCHNL_TCP_V6_FLOW &&
+	    tc_filter->flow_type != VIRTCHNL_TCP_V4_FLOW) {
+		dev_info(&pf->pdev->dev, "VF %d: Invalid Flow type\n",
+			 vf->vf_id);
+		goto err;
+	}
+
+	if (mask.vlan_id & data.vlan_id) {
+		if (ntohs(data.vlan_id) > I40E_MAX_VLANID) {
+			dev_info(&pf->pdev->dev, "VF %d: invalid VLAN ID\n",
+				 vf->vf_id);
+			goto err;
+		}
+	}
+
+	return 0;
+err:
+	return -EIO;
+}
+
+/**
+ * i40e_find_vsi_from_seid - searches for the vsi with the given seid
+ * @vf: pointer to the VF info
+ * @seid: seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid)
+{
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int i;
+
+	for (i = 0; i < vf->num_tc ; i++) {
+		vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id);
+		if (vsi && vsi->seid == seid)
+			return vsi;
+	}
+	return NULL;
+}
+
+/**
+ * i40e_del_all_cloud_filters
+ * @vf: pointer to the VF info
+ *
+ * This function deletes all cloud filters
+ **/
+static void i40e_del_all_cloud_filters(struct i40e_vf *vf)
+{
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	int ret;
+
+	hlist_for_each_entry_safe(cfilter, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		vsi = i40e_find_vsi_from_seid(vf, cfilter->seid);
+
+		if (!vsi) {
+			dev_err(&pf->pdev->dev, "VF %d: no VSI found for matching %u seid, can't delete cloud filter\n",
+				vf->vf_id, cfilter->seid);
+			continue;
+		}
+
+		if (cfilter->dst_port)
+			ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter,
+								false);
+		else
+			ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
+		if (ret)
+			dev_err(&pf->pdev->dev,
+				"VF %d: Failed to delete cloud filter, err %pe aq_err %s\n",
+				vf->vf_id, ERR_PTR(ret),
+				i40e_aq_str(&pf->hw,
+					    pf->hw.aq.asq_last_status));
+
+		hlist_del(&cfilter->cloud_node);
+		kfree(cfilter);
+		vf->num_cloud_filters--;
+	}
+}
+
+/**
+ * i40e_vc_del_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function deletes a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter cfilter, *cf = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	struct hlist_node *node;
+	int aq_ret = 0;
+	int i, ret;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	memset(&cfilter, 0, sizeof(cfilter));
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter.src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter.vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter.dst_port = mask.dst_port & tcf.dst_port;
+	cfilter.src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter.n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter.ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter.n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter.ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter.ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter.ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter.ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the vsi to which the tc belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter.seid = vsi->seid;
+	cfilter.flags = vcf->field_flags;
+
+	/* Deleting TC filter */
+	if (tcf.dst_port)
+		ret = i40e_add_del_cloud_filter_big_buf(vsi, &cfilter, false);
+	else
+		ret = i40e_add_del_cloud_filter(vsi, &cfilter, false);
+	if (ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to delete cloud filter, err %pe aq_err %s\n",
+			vf->vf_id, ERR_PTR(ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err;
+	}
+
+	hlist_for_each_entry_safe(cf, node,
+				  &vf->cloud_filter_list, cloud_node) {
+		if (cf->seid != cfilter.seid)
+			continue;
+		if (mask.dst_port)
+			if (cfilter.dst_port != cf->dst_port)
+				continue;
+		if (mask.dst_mac[0])
+			if (!ether_addr_equal(cf->src_mac, cfilter.src_mac))
+				continue;
+		/* for ipv4 data to be valid, only first byte of mask is set */
+		if (cfilter.n_proto == ETH_P_IP && mask.dst_ip[0])
+			if (memcmp(&cfilter.ip.v4.dst_ip, &cf->ip.v4.dst_ip,
+				   ARRAY_SIZE(tcf.dst_ip)))
+				continue;
+		/* for ipv6, mask is set for all sixteen bytes (4 words) */
+		if (cfilter.n_proto == ETH_P_IPV6 && mask.dst_ip[3])
+			if (memcmp(&cfilter.ip.v6.dst_ip6, &cf->ip.v6.dst_ip6,
+				   sizeof(cfilter.ip.v6.src_ip6)))
+				continue;
+		if (mask.vlan_id)
+			if (cfilter.vlan_id != cf->vlan_id)
+				continue;
+
+		hlist_del(&cf->cloud_node);
+		kfree(cf);
+		vf->num_cloud_filters--;
+	}
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_add_cloud_filter
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * This function adds a cloud filter programmed as TC filter for ADq
+ **/
+static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_filter *vcf = (struct virtchnl_filter *)msg;
+	struct virtchnl_l4_spec mask = vcf->mask.tcp_spec;
+	struct virtchnl_l4_spec tcf = vcf->data.tcp_spec;
+	struct i40e_cloud_filter *cfilter = NULL;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_vsi *vsi = NULL;
+	int aq_ret = 0;
+	int i;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err_out;
+	}
+
+	if (!vf->adq_enabled) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: ADq is not enabled, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = -EINVAL;
+		goto err_out;
+	}
+
+	if (i40e_validate_cloud_filter(vf, vcf)) {
+		dev_info(&pf->pdev->dev,
+			 "VF %d: Invalid input/s, can't apply cloud filter\n",
+			 vf->vf_id);
+		aq_ret = -EINVAL;
+		goto err_out;
+	}
+
+	cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL);
+	if (!cfilter) {
+		aq_ret = -ENOMEM;
+		goto err_out;
+	}
+
+	/* parse destination mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->dst_mac[i] = mask.dst_mac[i] & tcf.dst_mac[i];
+
+	/* parse source mac address */
+	for (i = 0; i < ETH_ALEN; i++)
+		cfilter->src_mac[i] = mask.src_mac[i] & tcf.src_mac[i];
+
+	cfilter->vlan_id = mask.vlan_id & tcf.vlan_id;
+	cfilter->dst_port = mask.dst_port & tcf.dst_port;
+	cfilter->src_port = mask.src_port & tcf.src_port;
+
+	switch (vcf->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		cfilter->n_proto = ETH_P_IP;
+		if (mask.dst_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.dst_ip, tcf.dst_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		else if (mask.src_ip[0] & tcf.dst_ip[0])
+			memcpy(&cfilter->ip.v4.src_ip, tcf.src_ip,
+			       ARRAY_SIZE(tcf.dst_ip));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		cfilter->n_proto = ETH_P_IPV6;
+		if (mask.dst_ip[3] & tcf.dst_ip[3])
+			memcpy(&cfilter->ip.v6.dst_ip6, tcf.dst_ip,
+			       sizeof(cfilter->ip.v6.dst_ip6));
+		if (mask.src_ip[3] & tcf.src_ip[3])
+			memcpy(&cfilter->ip.v6.src_ip6, tcf.src_ip,
+			       sizeof(cfilter->ip.v6.src_ip6));
+		break;
+	default:
+		/* TC filter can be configured based on different combinations
+		 * and in this case IP is not a part of filter config
+		 */
+		dev_info(&pf->pdev->dev, "VF %d: Flow type not configured\n",
+			 vf->vf_id);
+	}
+
+	/* get the VSI to which the TC belongs to */
+	vsi = pf->vsi[vf->ch[vcf->action_meta].vsi_idx];
+	cfilter->seid = vsi->seid;
+	cfilter->flags = vcf->field_flags;
+
+	/* Adding cloud filter programmed as TC filter */
+	if (tcf.dst_port)
+		aq_ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true);
+	else
+		aq_ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
+	if (aq_ret) {
+		dev_err(&pf->pdev->dev,
+			"VF %d: Failed to add cloud filter, err %pe aq_err %s\n",
+			vf->vf_id, ERR_PTR(aq_ret),
+			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+		goto err_free;
+	}
+
+	INIT_HLIST_NODE(&cfilter->cloud_node);
+	hlist_add_head(&cfilter->cloud_node, &vf->cloud_filter_list);
+	/* release the pointer passing it to the collection */
+	cfilter = NULL;
+	vf->num_cloud_filters++;
+err_free:
+	kfree(cfilter);
+err_out:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_add_qch_msg: Add queue channel and enable ADq
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct virtchnl_tc_info *tci =
+		(struct virtchnl_tc_info *)msg;
+	struct i40e_pf *pf = vf->pf;
+	struct i40e_link_status *ls = &pf->hw.phy.link_info;
+	int i, adq_request_qps = 0;
+	int aq_ret = 0;
+	u64 speed = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	/* ADq cannot be applied if spoof check is ON */
+	if (vf->spoofchk) {
+		dev_err(&pf->pdev->dev,
+			"Spoof check is ON, turn it OFF to enable ADq\n");
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+		dev_err(&pf->pdev->dev,
+			"VF %d attempting to enable ADq, but hasn't properly negotiated that capability\n",
+			vf->vf_id);
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	/* max number of traffic classes for VF currently capped at 4 */
+	if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) {
+		dev_err(&pf->pdev->dev,
+			"VF %d trying to set %u TCs, valid range 1-%u TCs per VF\n",
+			vf->vf_id, tci->num_tc, I40E_MAX_VF_VSI);
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	/* validate queues for each TC */
+	for (i = 0; i < tci->num_tc; i++)
+		if (!tci->list[i].count ||
+		    tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) {
+			dev_err(&pf->pdev->dev,
+				"VF %d: TC %d trying to set %u queues, valid range 1-%u queues per TC\n",
+				vf->vf_id, i, tci->list[i].count,
+				I40E_DEFAULT_QUEUES_PER_VF);
+			aq_ret = -EINVAL;
+			goto err;
+		}
+
+	/* need Max VF queues but already have default number of queues */
+	adq_request_qps = I40E_MAX_VF_QUEUES - I40E_DEFAULT_QUEUES_PER_VF;
+
+	if (pf->queues_left < adq_request_qps) {
+		dev_err(&pf->pdev->dev,
+			"No queues left to allocate to VF %d\n",
+			vf->vf_id);
+		aq_ret = -EINVAL;
+		goto err;
+	} else {
+		/* we need to allocate max VF queues to enable ADq so as to
+		 * make sure ADq enabled VF always gets back queues when it
+		 * goes through a reset.
+		 */
+		vf->num_queue_pairs = I40E_MAX_VF_QUEUES;
+	}
+
+	/* get link speed in MB to validate rate limit */
+	speed = i40e_vc_link_speed2mbps(ls->link_speed);
+	if (speed == SPEED_UNKNOWN) {
+		dev_err(&pf->pdev->dev,
+			"Cannot detect link speed\n");
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	/* parse data from the queue channel info */
+	vf->num_tc = tci->num_tc;
+	for (i = 0; i < vf->num_tc; i++) {
+		if (tci->list[i].max_tx_rate) {
+			if (tci->list[i].max_tx_rate > speed) {
+				dev_err(&pf->pdev->dev,
+					"Invalid max tx rate %llu specified for VF %d.",
+					tci->list[i].max_tx_rate,
+					vf->vf_id);
+				aq_ret = -EINVAL;
+				goto err;
+			} else {
+				vf->ch[i].max_tx_rate =
+					tci->list[i].max_tx_rate;
+			}
+		}
+		vf->ch[i].num_qps = tci->list[i].count;
+	}
+
+	/* set this flag only after making sure all inputs are sane */
+	vf->adq_enabled = true;
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_reset_vf(vf, true);
+
+	return 0;
+
+	/* send the response to the VF */
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ENABLE_CHANNELS,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_del_qch_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ **/
+static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
+{
+	struct i40e_pf *pf = vf->pf;
+	int aq_ret = 0;
+
+	if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) {
+		aq_ret = -EINVAL;
+		goto err;
+	}
+
+	if (vf->adq_enabled) {
+		i40e_del_all_cloud_filters(vf);
+		i40e_del_qch(vf);
+		vf->adq_enabled = false;
+		vf->num_tc = 0;
+		dev_info(&pf->pdev->dev,
+			 "Deleting Queue Channels and cloud filters for ADq on VF %d\n",
+			 vf->vf_id);
+	} else {
+		dev_info(&pf->pdev->dev, "VF %d trying to delete queue channels but ADq isn't enabled\n",
+			 vf->vf_id);
+		aq_ret = -EINVAL;
+	}
+
+	/* reset the VF in order to allocate resources */
+	i40e_vc_reset_vf(vf, true);
+
+	return 0;
+
+err:
+	return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_DISABLE_CHANNELS,
+				       aq_ret);
+}
+
+/**
+ * i40e_vc_process_vf_msg
+ * @pf: pointer to the PF structure
+ * @vf_id: source VF id
+ * @v_opcode: operation code
+ * @v_retval: unused return value code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the common aeq/arq handler to
+ * process request from VF
+ **/
+int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
+			   u32 __always_unused v_retval, u8 *msg, u16 msglen)
+{
+	struct i40e_hw *hw = &pf->hw;
+	int local_vf_id = vf_id - (s16)hw->func_caps.vf_base_id;
+	struct i40e_vf *vf;
+	int ret;
+
+	pf->vf_aq_requests++;
+	if (local_vf_id < 0 || local_vf_id >= pf->num_alloc_vfs)
+		return -EINVAL;
+	vf = &(pf->vf[local_vf_id]);
+
+	/* Check if VF is disabled. */
+	if (test_bit(I40E_VF_STATE_DISABLED, &vf->vf_states))
+		return -EINVAL;
+
+	/* perform basic checks on the msg */
+	ret = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
+
+	if (ret) {
+		i40e_vc_send_resp_to_vf(vf, v_opcode, -EINVAL);
+		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
+			local_vf_id, v_opcode, msglen);
+		return ret;
+	}
+
+	switch (v_opcode) {
+	case VIRTCHNL_OP_VERSION:
+		ret = i40e_vc_get_version_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
+		ret = i40e_vc_get_vf_resources_msg(vf, msg);
+		i40e_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_RESET_VF:
+		i40e_vc_reset_vf(vf, false);
+		ret = 0;
+		break;
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+		ret = i40e_vc_config_promiscuous_mode_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+		ret = i40e_vc_config_queues_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		ret = i40e_vc_config_irq_map_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+		ret = i40e_vc_enable_queues_msg(vf, msg);
+		i40e_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		ret = i40e_vc_disable_queues_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+		ret = i40e_vc_add_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
+		ret = i40e_vc_del_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_VLAN:
+		ret = i40e_vc_add_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_VLAN:
+		ret = i40e_vc_remove_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_STATS:
+		ret = i40e_vc_get_stats_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_RDMA:
+		ret = i40e_vc_rdma_msg(vf, msg, msglen);
+		break;
+	case VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP:
+		ret = i40e_vc_rdma_qvmap_msg(vf, msg, true);
+		break;
+	case VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP:
+		ret = i40e_vc_rdma_qvmap_msg(vf, msg, false);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
+		ret = i40e_vc_config_rss_key(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
+		ret = i40e_vc_config_rss_lut(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
+		ret = i40e_vc_get_rss_hena(vf, msg);
+		break;
+	case VIRTCHNL_OP_SET_RSS_HENA:
+		ret = i40e_vc_set_rss_hena(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		ret = i40e_vc_enable_vlan_stripping(vf, msg);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		ret = i40e_vc_disable_vlan_stripping(vf, msg);
+		break;
+	case VIRTCHNL_OP_REQUEST_QUEUES:
+		ret = i40e_vc_request_queues_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_CHANNELS:
+		ret = i40e_vc_add_qch_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DISABLE_CHANNELS:
+		ret = i40e_vc_del_qch_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER:
+		ret = i40e_vc_add_cloud_filter(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER:
+		ret = i40e_vc_del_cloud_filter(vf, msg);
+		break;
+	case VIRTCHNL_OP_UNKNOWN:
+	default:
+		dev_err(&pf->pdev->dev, "Unsupported opcode %d from VF %d\n",
+			v_opcode, local_vf_id);
+		ret = i40e_vc_send_resp_to_vf(vf, v_opcode,
+					      -EOPNOTSUPP);
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * i40e_vc_process_vflr_event
+ * @pf: pointer to the PF structure
+ *
+ * called from the vlfr irq handler to
+ * free up VF resources and state variables
+ **/
+int i40e_vc_process_vflr_event(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 reg, reg_idx, bit_idx;
+	struct i40e_vf *vf;
+	int vf_id;
+
+	if (!test_bit(__I40E_VFLR_EVENT_PENDING, pf->state))
+		return 0;
+
+	/* Re-enable the VFLR interrupt cause here, before looking for which
+	 * VF got reset. Otherwise, if another VF gets a reset while the
+	 * first one is being processed, that interrupt will be lost, and
+	 * that VF will be stuck in reset forever.
+	 */
+	reg = rd32(hw, I40E_PFINT_ICR0_ENA);
+	reg |= I40E_PFINT_ICR0_ENA_VFLR_MASK;
+	wr32(hw, I40E_PFINT_ICR0_ENA, reg);
+	i40e_flush(hw);
+
+	clear_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
+	for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
+		reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
+		bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+		/* read GLGEN_VFLRSTAT register to find out the flr VFs */
+		vf = &pf->vf[vf_id];
+		reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx));
+		if (reg & BIT(bit_idx))
+			/* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */
+			i40e_reset_vf(vf, true);
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_validate_vf
+ * @pf: the physical function
+ * @vf_id: VF identifier
+ *
+ * Check that the VF is enabled and the VSI exists.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_validate_vf(struct i40e_pf *pf, int vf_id)
+{
+	struct i40e_vsi *vsi;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev,
+			"Invalid VF Identifier %d\n", vf_id);
+		ret = -EINVAL;
+		goto err_out;
+	}
+	vf = &pf->vf[vf_id];
+	vsi = i40e_find_vsi_from_id(pf, vf->lan_vsi_id);
+	if (!vsi)
+		ret = -EINVAL;
+err_out:
+	return ret;
+}
+
+/**
+ * i40e_check_vf_init_timeout
+ * @vf: the virtual function
+ *
+ * Check that the VF's initialization was successfully done and if not
+ * wait up to 300ms for its finish.
+ *
+ * Returns true when VF is initialized, false on timeout
+ **/
+static bool i40e_check_vf_init_timeout(struct i40e_vf *vf)
+{
+	int i;
+
+	/* When the VF is resetting wait until it is done.
+	 * It can take up to 200 milliseconds, but wait for
+	 * up to 300 milliseconds to be safe.
+	 */
+	for (i = 0; i < 15; i++) {
+		if (test_bit(I40E_VF_STATE_INIT, &vf->vf_states))
+			return true;
+		msleep(20);
+	}
+
+	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
+		dev_err(&vf->pf->pdev->dev,
+			"VF %d still in reset. Try again.\n", vf->vf_id);
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * i40e_ndo_set_vf_mac
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @mac: mac address
+ *
+ * program VF mac address
+ **/
+int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_mac_filter *f;
+	struct i40e_vf *vf;
+	int ret = 0;
+	struct hlist_node *h;
+	int bkt;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error_param;
+
+	vf = &pf->vf[vf_id];
+	if (!i40e_check_vf_init_timeout(vf)) {
+		ret = -EAGAIN;
+		goto error_param;
+	}
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	if (is_multicast_ether_addr(mac)) {
+		dev_err(&pf->pdev->dev,
+			"Invalid Ethernet address %pM for VF %d\n", mac, vf_id);
+		ret = -EINVAL;
+		goto error_param;
+	}
+
+	/* Lock once because below invoked function add/del_filter requires
+	 * mac_filter_hash_lock to be held
+	 */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+	/* delete the temporary mac address */
+	if (!is_zero_ether_addr(vf->default_lan_addr.addr))
+		i40e_del_mac_filter(vsi, vf->default_lan_addr.addr);
+
+	/* Delete all the filters for this VSI - we're going to kill it
+	 * anyway.
+	 */
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+		__i40e_del_filter(vsi, f);
+
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* program mac filter */
+	if (i40e_sync_vsi_filters(vsi)) {
+		dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+		ret = -EIO;
+		goto error_param;
+	}
+	ether_addr_copy(vf->default_lan_addr.addr, mac);
+
+	if (is_zero_ether_addr(mac)) {
+		vf->pf_set_mac = false;
+		dev_info(&pf->pdev->dev, "Removing MAC on VF %d\n", vf_id);
+	} else {
+		vf->pf_set_mac = true;
+		dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n",
+			 mac, vf_id);
+	}
+
+	/* Force the VF interface down so it has to bring up with new MAC
+	 * address
+	 */
+	i40e_vc_reset_vf(vf, true);
+	dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n");
+
+error_param:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/**
+ * i40e_ndo_set_vf_port_vlan
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @vlan_id: mac address
+ * @qos: priority setting
+ * @vlan_proto: vlan protocol
+ *
+ * program VF vlan id and/or qos
+ **/
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+			      u16 vlan_id, u8 qos, __be16 vlan_proto)
+{
+	u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT);
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	bool allmulti = false, alluni = false;
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_vsi *vsi;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error_pvid;
+
+	if ((vlan_id > I40E_MAX_VLANID) || (qos > 7)) {
+		dev_err(&pf->pdev->dev, "Invalid VF Parameters\n");
+		ret = -EINVAL;
+		goto error_pvid;
+	}
+
+	if (vlan_proto != htons(ETH_P_8021Q)) {
+		dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n");
+		ret = -EPROTONOSUPPORT;
+		goto error_pvid;
+	}
+
+	vf = &pf->vf[vf_id];
+	if (!i40e_check_vf_init_timeout(vf)) {
+		ret = -EAGAIN;
+		goto error_pvid;
+	}
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	if (le16_to_cpu(vsi->info.pvid) == vlanprio)
+		/* duplicate request, so just return success */
+		goto error_pvid;
+
+	i40e_vlan_stripping_enable(vsi);
+
+	/* Locked once because multiple functions below iterate list */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+	/* Check for condition where there was already a port VLAN ID
+	 * filter set and now it is being deleted by setting it to zero.
+	 * Additionally check for the condition where there was a port
+	 * VLAN but now there is a new and different port VLAN being set.
+	 * Before deleting all the old VLAN filters we must add new ones
+	 * with -1 (I40E_VLAN_ANY) or otherwise we're left with all our
+	 * MAC addresses deleted.
+	 */
+	if ((!(vlan_id || qos) ||
+	     vlanprio != le16_to_cpu(vsi->info.pvid)) &&
+	    vsi->info.pvid) {
+		ret = i40e_add_vlan_all_mac(vsi, I40E_VLAN_ANY);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "add VF VLAN failed, ret=%d aq_err=%d\n", ret,
+				 vsi->back->hw.aq.asq_last_status);
+			spin_unlock_bh(&vsi->mac_filter_hash_lock);
+			goto error_pvid;
+		}
+	}
+
+	if (vsi->info.pvid) {
+		/* remove all filters on the old VLAN */
+		i40e_rm_vlan_all_mac(vsi, (le16_to_cpu(vsi->info.pvid) &
+					   VLAN_VID_MASK));
+	}
+
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* disable promisc modes in case they were enabled */
+	ret = i40e_config_vf_promiscuous_mode(vf, vf->lan_vsi_id,
+					      allmulti, alluni);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Unable to config VF promiscuous mode\n");
+		goto error_pvid;
+	}
+
+	if (vlan_id || qos)
+		ret = i40e_vsi_add_pvid(vsi, vlanprio);
+	else
+		i40e_vsi_remove_pvid(vsi);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+	if (vlan_id) {
+		dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
+			 vlan_id, qos, vf_id);
+
+		/* add new VLAN filter for each MAC */
+		ret = i40e_add_vlan_all_mac(vsi, vlan_id);
+		if (ret) {
+			dev_info(&vsi->back->pdev->dev,
+				 "add VF VLAN failed, ret=%d aq_err=%d\n", ret,
+				 vsi->back->hw.aq.asq_last_status);
+			spin_unlock_bh(&vsi->mac_filter_hash_lock);
+			goto error_pvid;
+		}
+
+		/* remove the previously added non-VLAN MAC filters */
+		i40e_rm_vlan_all_mac(vsi, I40E_VLAN_ANY);
+	}
+
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	if (test_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states))
+		alluni = true;
+
+	if (test_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states))
+		allmulti = true;
+
+	/* Schedule the worker thread to take care of applying changes */
+	i40e_service_event_schedule(vsi->back);
+
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Unable to update VF vsi context\n");
+		goto error_pvid;
+	}
+
+	/* The Port VLAN needs to be saved across resets the same as the
+	 * default LAN MAC address.
+	 */
+	vf->port_vlan_id = le16_to_cpu(vsi->info.pvid);
+
+	i40e_vc_reset_vf(vf, true);
+	/* During reset the VF got a new VSI, so refresh a pointer. */
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	ret = i40e_config_vf_promiscuous_mode(vf, vsi->id, allmulti, alluni);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Unable to config vf promiscuous mode\n");
+		goto error_pvid;
+	}
+
+	ret = 0;
+
+error_pvid:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/**
+ * i40e_ndo_set_vf_bw
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @min_tx_rate: Minimum Tx rate
+ * @max_tx_rate: Maximum Tx rate
+ *
+ * configure VF Tx rate
+ **/
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_vsi *vsi;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error;
+
+	if (min_tx_rate) {
+		dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for VF %d.\n",
+			min_tx_rate, vf_id);
+		ret = -EINVAL;
+		goto error;
+	}
+
+	vf = &pf->vf[vf_id];
+	if (!i40e_check_vf_init_timeout(vf)) {
+		ret = -EAGAIN;
+		goto error;
+	}
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+	if (ret)
+		goto error;
+
+	vf->tx_rate = max_tx_rate;
+error:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/**
+ * i40e_ndo_get_vf_config
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ivi: VF configuration structure
+ *
+ * return VF configuration
+ **/
+int i40e_ndo_get_vf_config(struct net_device *netdev,
+			   int vf_id, struct ifla_vf_info *ivi)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	/* validate the request */
+	ret = i40e_validate_vf(pf, vf_id);
+	if (ret)
+		goto error_param;
+
+	vf = &pf->vf[vf_id];
+	/* first vsi is always the LAN vsi */
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		ret = -ENOENT;
+		goto error_param;
+	}
+
+	ivi->vf = vf_id;
+
+	ether_addr_copy(ivi->mac, vf->default_lan_addr.addr);
+
+	ivi->max_tx_rate = vf->tx_rate;
+	ivi->min_tx_rate = 0;
+	ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK;
+	ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >>
+		   I40E_VLAN_PRIORITY_SHIFT;
+	if (vf->link_forced == false)
+		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
+	else if (vf->link_up == true)
+		ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+	else
+		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
+	ivi->spoofchk = vf->spoofchk;
+	ivi->trusted = vf->trusted;
+	ret = 0;
+
+error_param:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/**
+ * i40e_ndo_set_vf_link_state
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @link: required link state
+ *
+ * Set the link state of a specified VF, regardless of physical link state
+ **/
+int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_link_status *ls = &pf->hw.phy.link_info;
+	struct virtchnl_pf_event pfe;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vf *vf;
+	int abs_vf_id;
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+		ret = -EINVAL;
+		goto error_out;
+	}
+
+	vf = &pf->vf[vf_id];
+	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
+
+	switch (link) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		vf->link_forced = false;
+		i40e_set_vf_link_state(vf, &pfe, ls);
+		break;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		vf->link_forced = true;
+		vf->link_up = true;
+		i40e_set_vf_link_state(vf, &pfe, ls);
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		vf->link_forced = true;
+		vf->link_up = false;
+		i40e_set_vf_link_state(vf, &pfe, ls);
+		break;
+	default:
+		ret = -EINVAL;
+		goto error_out;
+	}
+	/* Notify the VF of its new link state */
+	i40e_aq_send_msg_to_vf(hw, abs_vf_id, VIRTCHNL_OP_EVENT,
+			       0, (u8 *)&pfe, sizeof(pfe), NULL);
+
+error_out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/**
+ * i40e_ndo_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @enable: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ **/
+int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_vsi_context ctxt;
+	struct i40e_hw *hw = &pf->hw;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vf = &(pf->vf[vf_id]);
+	if (!i40e_check_vf_init_timeout(vf)) {
+		ret = -EAGAIN;
+		goto out;
+	}
+
+	if (enable == vf->spoofchk)
+		goto out;
+
+	vf->spoofchk = enable;
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.seid = pf->vsi[vf->lan_vsi_idx]->seid;
+	ctxt.pf_num = pf->hw.pf_id;
+	ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
+	if (enable)
+		ctxt.info.sec_flags |= (I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK |
+					I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK);
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		dev_err(&pf->pdev->dev, "Error %d updating VSI parameters\n",
+			ret);
+		ret = -EIO;
+	}
+out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/**
+ * i40e_ndo_set_vf_trust
+ * @netdev: network interface device structure of the pf
+ * @vf_id: VF identifier
+ * @setting: trust setting
+ *
+ * Enable or disable VF trust setting
+ **/
+int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_vf *vf;
+	int ret = 0;
+
+	if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) {
+		dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n");
+		return -EAGAIN;
+	}
+
+	/* validate the request */
+	if (vf_id >= pf->num_alloc_vfs) {
+		dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+		dev_err(&pf->pdev->dev, "Trusted VF not supported in MFP mode.\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	vf = &pf->vf[vf_id];
+
+	if (setting == vf->trusted)
+		goto out;
+
+	vf->trusted = setting;
+
+	/* request PF to sync mac/vlan filters for the VF */
+	set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
+	pf->vsi[vf->lan_vsi_idx]->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+
+	i40e_vc_reset_vf(vf, true);
+	dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
+		 vf_id, setting ? "" : "un");
+
+	if (vf->adq_enabled) {
+		if (!vf->trusted) {
+			dev_info(&pf->pdev->dev,
+				 "VF %u no longer Trusted, deleting all cloud filters\n",
+				 vf_id);
+			i40e_del_all_cloud_filters(vf);
+		}
+	}
+
+out:
+	clear_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state);
+	return ret;
+}
+
+/**
+ * i40e_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-127)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
+		      struct ifla_vf_stats *vf_stats)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_pf *pf = np->vsi->back;
+	struct i40e_eth_stats *stats;
+	struct i40e_vsi *vsi;
+	struct i40e_vf *vf;
+
+	/* validate the request */
+	if (i40e_validate_vf(pf, vf_id))
+		return -EINVAL;
+
+	vf = &pf->vf[vf_id];
+	if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) {
+		dev_err(&pf->pdev->dev, "VF %d in reset. Try again.\n", vf_id);
+		return -EBUSY;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi)
+		return -EINVAL;
+
+	i40e_update_eth_stats(vsi);
+	stats = &vsi->eth_stats;
+
+	memset(vf_stats, 0, sizeof(*vf_stats));
+
+	vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+		stats->rx_multicast;
+	vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+		stats->tx_multicast;
+	vf_stats->rx_bytes   = stats->rx_bytes;
+	vf_stats->tx_bytes   = stats->tx_bytes;
+	vf_stats->broadcast  = stats->rx_broadcast;
+	vf_stats->multicast  = stats->rx_multicast;
+	vf_stats->rx_dropped = stats->rx_discards;
+	vf_stats->tx_dropped = stats->tx_discards;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
new file mode 100644
index 0000000000..bda9ba668c
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _I40E_VIRTCHNL_PF_H_
+#define _I40E_VIRTCHNL_PF_H_
+
+#include "i40e.h"
+
+#define I40E_MAX_VLANID 4095
+
+#define I40E_VIRTCHNL_SUPPORTED_QTYPES 2
+
+#define I40E_VLAN_PRIORITY_SHIFT	13
+#define I40E_VLAN_MASK			0xFFF
+#define I40E_PRIORITY_MASK		0xE000
+
+#define I40E_MAX_VF_PROMISC_FLAGS	3
+
+#define I40E_VF_STATE_WAIT_COUNT	20
+#define I40E_VFR_WAIT_COUNT		100
+
+/* Various queue ctrls */
+enum i40e_queue_ctrl {
+	I40E_QUEUE_CTRL_UNKNOWN = 0,
+	I40E_QUEUE_CTRL_ENABLE,
+	I40E_QUEUE_CTRL_ENABLECHECK,
+	I40E_QUEUE_CTRL_DISABLE,
+	I40E_QUEUE_CTRL_DISABLECHECK,
+	I40E_QUEUE_CTRL_FASTDISABLE,
+	I40E_QUEUE_CTRL_FASTDISABLECHECK,
+};
+
+/* VF states */
+enum i40e_vf_states {
+	I40E_VF_STATE_INIT = 0,
+	I40E_VF_STATE_ACTIVE,
+	I40E_VF_STATE_RDMAENA,
+	I40E_VF_STATE_DISABLED,
+	I40E_VF_STATE_MC_PROMISC,
+	I40E_VF_STATE_UC_PROMISC,
+	I40E_VF_STATE_PRE_ENABLE,
+	I40E_VF_STATE_RESETTING
+};
+
+/* VF capabilities */
+enum i40e_vf_capabilities {
+	I40E_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
+	I40E_VIRTCHNL_VF_CAP_L2,
+	I40E_VIRTCHNL_VF_CAP_RDMA,
+};
+
+/* In ADq, max 4 VSI's can be allocated per VF including primary VF VSI.
+ * These variables are used to store indices, id's and number of queues
+ * for each VSI including that of primary VF VSI. Each Traffic class is
+ * termed as channel and each channel can in-turn have 4 queues which
+ * means max 16 queues overall per VF.
+ */
+struct i40evf_channel {
+	u16 vsi_idx; /* index in PF struct for all channel VSIs */
+	u16 vsi_id; /* VSI ID used by firmware */
+	u16 num_qps; /* number of queue pairs requested by user */
+	u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
+};
+
+/* VF information structure */
+struct i40e_vf {
+	struct i40e_pf *pf;
+
+	/* VF id in the PF space */
+	s16 vf_id;
+	/* all VF vsis connect to the same parent */
+	enum i40e_switch_element_types parent_type;
+	struct virtchnl_version_info vf_ver;
+	u32 driver_caps; /* reported by VF driver */
+
+	/* VF Port Extender (PE) stag if used */
+	u16 stag;
+
+	struct virtchnl_ether_addr default_lan_addr;
+	u16 port_vlan_id;
+	bool pf_set_mac;	/* The VMM admin set the VF MAC address */
+	bool trusted;
+
+	/* VSI indices - actual VSI pointers are maintained in the PF structure
+	 * When assigned, these will be non-zero, because VSI 0 is always
+	 * the main LAN VSI for the PF.
+	 */
+	u16 lan_vsi_idx;	/* index into PF struct */
+	u16 lan_vsi_id;		/* ID as used by firmware */
+
+	u8 num_queue_pairs;	/* num of qps assigned to VF vsis */
+	u8 num_req_queues;	/* num of requested qps */
+	u64 num_mdd_events;	/* num of mdd events detected */
+
+	unsigned long vf_caps;	/* vf's adv. capabilities */
+	unsigned long vf_states;	/* vf's runtime states */
+	unsigned int tx_rate;	/* Tx bandwidth limit in Mbps */
+	bool link_forced;
+	bool link_up;		/* only valid if VF link is forced */
+	bool spoofchk;
+	u16 num_vlan;
+
+	/* ADq related variables */
+	bool adq_enabled; /* flag to enable adq */
+	u8 num_tc;
+	struct i40evf_channel ch[I40E_MAX_VF_VSI];
+	struct hlist_head cloud_filter_list;
+	u16 num_cloud_filters;
+
+	/* RDMA Client */
+	struct virtchnl_rdma_qvlist_info *qvlist_info;
+};
+
+void i40e_free_vfs(struct i40e_pf *pf);
+int i40e_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
+int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
+int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
+			   u32 v_retval, u8 *msg, u16 msglen);
+int i40e_vc_process_vflr_event(struct i40e_pf *pf);
+bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
+bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
+void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
+
+/* VF configuration related iplink handlers */
+int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+			      u16 vlan_id, u8 qos, __be16 vlan_proto);
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate);
+int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting);
+int i40e_ndo_get_vf_config(struct net_device *netdev,
+			   int vf_id, struct ifla_vf_info *ivi);
+int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link);
+int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable);
+
+void i40e_vc_notify_link_state(struct i40e_pf *pf);
+void i40e_vc_notify_reset(struct i40e_pf *pf);
+#ifdef CONFIG_PCI_IOV
+void i40e_restore_all_vfs_msi_state(struct pci_dev *pdev);
+#endif /* CONFIG_PCI_IOV */
+int i40e_get_vf_stats(struct net_device *netdev, int vf_id,
+		      struct ifla_vf_stats *vf_stats);
+
+#endif /* _I40E_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
new file mode 100644
index 0000000000..1f8ae6f5d9
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -0,0 +1,818 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <linux/stringify.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "i40e.h"
+#include "i40e_txrx_common.h"
+#include "i40e_xsk.h"
+
+void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
+{
+	memset(rx_ring->rx_bi_zc, 0,
+	       sizeof(*rx_ring->rx_bi_zc) * rx_ring->count);
+}
+
+static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+	return &rx_ring->rx_bi_zc[idx];
+}
+
+/**
+ * i40e_realloc_rx_xdp_bi - reallocate SW ring for either XSK or normal buffer
+ * @rx_ring: Current rx ring
+ * @pool_present: is pool for XSK present
+ *
+ * Try allocating memory and return ENOMEM, if failed to allocate.
+ * If allocation was successful, substitute buffer with allocated one.
+ * Returns 0 on success, negative on failure
+ */
+static int i40e_realloc_rx_xdp_bi(struct i40e_ring *rx_ring, bool pool_present)
+{
+	size_t elem_size = pool_present ? sizeof(*rx_ring->rx_bi_zc) :
+					  sizeof(*rx_ring->rx_bi);
+	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
+
+	if (!sw_ring)
+		return -ENOMEM;
+
+	if (pool_present) {
+		kfree(rx_ring->rx_bi);
+		rx_ring->rx_bi = NULL;
+		rx_ring->rx_bi_zc = sw_ring;
+	} else {
+		kfree(rx_ring->rx_bi_zc);
+		rx_ring->rx_bi_zc = NULL;
+		rx_ring->rx_bi = sw_ring;
+	}
+	return 0;
+}
+
+/**
+ * i40e_realloc_rx_bi_zc - reallocate rx SW rings
+ * @vsi: Current VSI
+ * @zc: is zero copy set
+ *
+ * Reallocate buffer for rx_rings that might be used by XSK.
+ * XDP requires more memory, than rx_buf provides.
+ * Returns 0 on success, negative on failure
+ */
+int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc)
+{
+	struct i40e_ring *rx_ring;
+	unsigned long q;
+
+	for_each_set_bit(q, vsi->af_xdp_zc_qps, vsi->alloc_queue_pairs) {
+		rx_ring = vsi->rx_rings[q];
+		if (i40e_realloc_rx_xdp_bi(rx_ring, zc))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * i40e_xsk_pool_enable - Enable/associate an AF_XDP buffer pool to a
+ * certain ring/qid
+ * @vsi: Current VSI
+ * @pool: buffer pool
+ * @qid: Rx ring to associate buffer pool with
+ *
+ * Returns 0 on success, <0 on failure
+ **/
+static int i40e_xsk_pool_enable(struct i40e_vsi *vsi,
+				struct xsk_buff_pool *pool,
+				u16 qid)
+{
+	struct net_device *netdev = vsi->netdev;
+	bool if_running;
+	int err;
+
+	if (vsi->type != I40E_VSI_MAIN)
+		return -EINVAL;
+
+	if (qid >= vsi->num_queue_pairs)
+		return -EINVAL;
+
+	if (qid >= netdev->real_num_rx_queues ||
+	    qid >= netdev->real_num_tx_queues)
+		return -EINVAL;
+
+	err = xsk_pool_dma_map(pool, &vsi->back->pdev->dev, I40E_RX_DMA_ATTR);
+	if (err)
+		return err;
+
+	set_bit(qid, vsi->af_xdp_zc_qps);
+
+	if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
+
+	if (if_running) {
+		err = i40e_queue_pair_disable(vsi, qid);
+		if (err)
+			return err;
+
+		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], true);
+		if (err)
+			return err;
+
+		err = i40e_queue_pair_enable(vsi, qid);
+		if (err)
+			return err;
+
+		/* Kick start the NAPI context so that receiving will start */
+		err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_xsk_pool_disable - Disassociate an AF_XDP buffer pool from a
+ * certain ring/qid
+ * @vsi: Current VSI
+ * @qid: Rx ring to associate buffer pool with
+ *
+ * Returns 0 on success, <0 on failure
+ **/
+static int i40e_xsk_pool_disable(struct i40e_vsi *vsi, u16 qid)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct xsk_buff_pool *pool;
+	bool if_running;
+	int err;
+
+	pool = xsk_get_pool_from_qid(netdev, qid);
+	if (!pool)
+		return -EINVAL;
+
+	if_running = netif_running(vsi->netdev) && i40e_enabled_xdp_vsi(vsi);
+
+	if (if_running) {
+		err = i40e_queue_pair_disable(vsi, qid);
+		if (err)
+			return err;
+	}
+
+	clear_bit(qid, vsi->af_xdp_zc_qps);
+	xsk_pool_dma_unmap(pool, I40E_RX_DMA_ATTR);
+
+	if (if_running) {
+		err = i40e_realloc_rx_xdp_bi(vsi->rx_rings[qid], false);
+		if (err)
+			return err;
+		err = i40e_queue_pair_enable(vsi, qid);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * i40e_xsk_pool_setup - Enable/disassociate an AF_XDP buffer pool to/from
+ * a ring/qid
+ * @vsi: Current VSI
+ * @pool: Buffer pool to enable/associate to a ring, or NULL to disable
+ * @qid: Rx ring to (dis)associate buffer pool (from)to
+ *
+ * This function enables or disables a buffer pool to a certain ring.
+ *
+ * Returns 0 on success, <0 on failure
+ **/
+int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
+			u16 qid)
+{
+	return pool ? i40e_xsk_pool_enable(vsi, pool, qid) :
+		i40e_xsk_pool_disable(vsi, qid);
+}
+
+/**
+ * i40e_run_xdp_zc - Executes an XDP program on an xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ *
+ * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR}
+ **/
+static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp,
+			   struct bpf_prog *xdp_prog)
+{
+	int err, result = I40E_XDP_PASS;
+	struct i40e_ring *xdp_ring;
+	u32 act;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		if (!err)
+			return I40E_XDP_REDIR;
+		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+			result = I40E_XDP_EXIT;
+		else
+			result = I40E_XDP_CONSUMED;
+		goto out_failure;
+	}
+
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+		if (result == I40E_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	case XDP_DROP:
+		result = I40E_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+		result = I40E_XDP_CONSUMED;
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+	}
+	return result;
+}
+
+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
+{
+	u16 ntu = rx_ring->next_to_use;
+	union i40e_rx_desc *rx_desc;
+	struct xdp_buff **xdp;
+	u32 nb_buffs, i;
+	dma_addr_t dma;
+
+	rx_desc = I40E_RX_DESC(rx_ring, ntu);
+	xdp = i40e_rx_bi(rx_ring, ntu);
+
+	nb_buffs = min_t(u16, count, rx_ring->count - ntu);
+	nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs);
+	if (!nb_buffs)
+		return false;
+
+	i = nb_buffs;
+	while (i--) {
+		dma = xsk_buff_xdp_get_dma(*xdp);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
+		rx_desc->read.hdr_addr = 0;
+
+		rx_desc++;
+		xdp++;
+	}
+
+	ntu += nb_buffs;
+	if (ntu == rx_ring->count) {
+		rx_desc = I40E_RX_DESC(rx_ring, 0);
+		ntu = 0;
+	}
+
+	/* clear the status bits for the next_to_use descriptor */
+	rx_desc->wb.qword1.status_error_len = 0;
+	i40e_release_rx_desc(rx_ring, ntu);
+
+	return count == nb_buffs;
+}
+
+/**
+ * i40e_construct_skb_zc - Create skbuff from zero-copy Rx buffer
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff
+ *
+ * This functions allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb, or NULL on failure.
+ **/
+static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
+					     struct xdp_buff *xdp)
+{
+	unsigned int totalsize = xdp->data_end - xdp->data_meta;
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct skb_shared_info *sinfo = NULL;
+	struct sk_buff *skb;
+	u32 nr_frags = 0;
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+	net_prefetch(xdp->data_meta);
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		goto out;
+
+	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+	       ALIGN(totalsize, sizeof(long)));
+
+	if (metasize) {
+		skb_metadata_set(skb, metasize);
+		__skb_pull(skb, metasize);
+	}
+
+	if (likely(!xdp_buff_has_frags(xdp)))
+		goto out;
+
+	for (int i = 0; i < nr_frags; i++) {
+		struct skb_shared_info *skinfo = skb_shinfo(skb);
+		skb_frag_t *frag = &sinfo->frags[i];
+		struct page *page;
+		void *addr;
+
+		page = dev_alloc_page();
+		if (!page) {
+			dev_kfree_skb(skb);
+			return NULL;
+		}
+		addr = page_to_virt(page);
+
+		memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
+
+		__skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
+					   addr, 0, skb_frag_size(frag));
+	}
+
+out:
+	xsk_buff_free(xdp);
+	return skb;
+}
+
+static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring,
+				      struct xdp_buff *xdp_buff,
+				      union i40e_rx_desc *rx_desc,
+				      unsigned int *rx_packets,
+				      unsigned int *rx_bytes,
+				      unsigned int xdp_res,
+				      bool *failure)
+{
+	struct sk_buff *skb;
+
+	*rx_packets = 1;
+	*rx_bytes = xdp_get_buff_len(xdp_buff);
+
+	if (likely(xdp_res == I40E_XDP_REDIR) || xdp_res == I40E_XDP_TX)
+		return;
+
+	if (xdp_res == I40E_XDP_EXIT) {
+		*failure = true;
+		return;
+	}
+
+	if (xdp_res == I40E_XDP_CONSUMED) {
+		xsk_buff_free(xdp_buff);
+		return;
+	}
+	if (xdp_res == I40E_XDP_PASS) {
+		/* NB! We are not checking for errors using
+		 * i40e_test_staterr with
+		 * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that
+		 * SBP is *not* set in PRT_SBPVSI (default not set).
+		 */
+		skb = i40e_construct_skb_zc(rx_ring, xdp_buff);
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buff_failed++;
+			*rx_packets = 0;
+			*rx_bytes = 0;
+			return;
+		}
+
+		if (eth_skb_pad(skb)) {
+			*rx_packets = 0;
+			*rx_bytes = 0;
+			return;
+		}
+
+		i40e_process_skb_fields(rx_ring, rx_desc, skb);
+		napi_gro_receive(&rx_ring->q_vector->napi, skb);
+		return;
+	}
+
+	/* Should never get here, as all valid cases have been handled already.
+	 */
+	WARN_ON_ONCE(1);
+}
+
+static int
+i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first,
+		  struct xdp_buff *xdp, const unsigned int size)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first);
+
+	if (!xdp_buff_has_frags(first)) {
+		sinfo->nr_frags = 0;
+		sinfo->xdp_frags_size = 0;
+		xdp_buff_set_frags_flag(first);
+	}
+
+	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+		xsk_buff_free(first);
+		return -ENOMEM;
+	}
+
+	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
+				   virt_to_page(xdp->data_hard_start),
+				   XDP_PACKET_HEADROOM, size);
+	sinfo->xdp_frags_size += size;
+	xsk_buff_add_frag(xdp);
+
+	return 0;
+}
+
+/**
+ * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
+ * @rx_ring: Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns amount of work completed
+ **/
+int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	u16 next_to_process = rx_ring->next_to_process;
+	u16 next_to_clean = rx_ring->next_to_clean;
+	unsigned int xdp_res, xdp_xmit = 0;
+	struct xdp_buff *first = NULL;
+	u32 count = rx_ring->count;
+	struct bpf_prog *xdp_prog;
+	u32 entries_to_alloc;
+	bool failure = false;
+
+	if (next_to_process != next_to_clean)
+		first = *i40e_rx_bi(rx_ring, next_to_clean);
+
+	/* NB! xdp_prog will always be !NULL, due to the fact that
+	 * this path is enabled by setting an XDP program.
+	 */
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		union i40e_rx_desc *rx_desc;
+		unsigned int rx_packets;
+		unsigned int rx_bytes;
+		struct xdp_buff *bi;
+		unsigned int size;
+		u64 qword;
+
+		rx_desc = I40E_RX_DESC(rx_ring, next_to_process);
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		if (i40e_rx_is_programming_status(qword)) {
+			i40e_clean_programming_status(rx_ring,
+						      rx_desc->raw.qword[0],
+						      qword);
+			bi = *i40e_rx_bi(rx_ring, next_to_process);
+			xsk_buff_free(bi);
+			if (++next_to_process == count)
+				next_to_process = 0;
+			continue;
+		}
+
+		size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+		       I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+		if (!size)
+			break;
+
+		bi = *i40e_rx_bi(rx_ring, next_to_process);
+		xsk_buff_set_size(bi, size);
+		xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool);
+
+		if (!first)
+			first = bi;
+		else if (i40e_add_xsk_frag(rx_ring, first, bi, size))
+			break;
+
+		if (++next_to_process == count)
+			next_to_process = 0;
+
+		if (i40e_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		xdp_res = i40e_run_xdp_zc(rx_ring, first, xdp_prog);
+		i40e_handle_xdp_result_zc(rx_ring, first, rx_desc, &rx_packets,
+					  &rx_bytes, xdp_res, &failure);
+		next_to_clean = next_to_process;
+		if (failure)
+			break;
+		total_rx_packets += rx_packets;
+		total_rx_bytes += rx_bytes;
+		xdp_xmit |= xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR);
+		first = NULL;
+	}
+
+	rx_ring->next_to_clean = next_to_clean;
+	rx_ring->next_to_process = next_to_process;
+
+	entries_to_alloc = I40E_DESC_UNUSED(rx_ring);
+	if (entries_to_alloc >= I40E_RX_BUFFER_WRITE)
+		failure |= !i40e_alloc_rx_buffers_zc(rx_ring, entries_to_alloc);
+
+	i40e_finalize_xdp_rx(rx_ring, xdp_xmit);
+	i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);
+
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
+		if (failure || next_to_clean == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
+
+		return (int)total_rx_packets;
+	}
+	return failure ? budget : (int)total_rx_packets;
+}
+
+static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
+			  unsigned int *total_bytes)
+{
+	u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(desc);
+	struct i40e_tx_desc *tx_desc;
+	dma_addr_t dma;
+
+	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
+	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+
+	tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
+	tx_desc->buffer_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = build_ctob(cmd, 0, desc->len, 0);
+
+	*total_bytes += desc->len;
+}
+
+static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
+				unsigned int *total_bytes)
+{
+	u16 ntu = xdp_ring->next_to_use;
+	struct i40e_tx_desc *tx_desc;
+	dma_addr_t dma;
+	u32 i;
+
+	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+		u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(&desc[i]);
+
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc[i].len);
+
+		tx_desc = I40E_TX_DESC(xdp_ring, ntu++);
+		tx_desc->buffer_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz = build_ctob(cmd, 0, desc[i].len, 0);
+
+		*total_bytes += desc[i].len;
+	}
+
+	xdp_ring->next_to_use = ntu;
+}
+
+static void i40e_fill_tx_hw_ring(struct i40e_ring *xdp_ring, struct xdp_desc *descs, u32 nb_pkts,
+				 unsigned int *total_bytes)
+{
+	u32 batched, leftover, i;
+
+	batched = nb_pkts & ~(PKTS_PER_BATCH - 1);
+	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+	for (i = 0; i < batched; i += PKTS_PER_BATCH)
+		i40e_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+	for (i = batched; i < batched + leftover; i++)
+		i40e_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+}
+
+static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
+{
+	u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
+	struct i40e_tx_desc *tx_desc;
+
+	tx_desc = I40E_TX_DESC(xdp_ring, ntu);
+	tx_desc->cmd_type_offset_bsz |= cpu_to_le64(I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT);
+}
+
+/**
+ * i40e_xmit_zc - Performs zero-copy Tx AF_XDP
+ * @xdp_ring: XDP Tx ring
+ * @budget: NAPI budget
+ *
+ * Returns true if the work is finished.
+ **/
+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
+{
+	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+	u32 nb_pkts, nb_processed = 0;
+	unsigned int total_bytes = 0;
+
+	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+	if (!nb_pkts)
+		return true;
+
+	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
+		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+		i40e_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
+		xdp_ring->next_to_use = 0;
+	}
+
+	i40e_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
+			     &total_bytes);
+
+	/* Request an interrupt for the last frame and bump tail ptr. */
+	i40e_set_rs_bit(xdp_ring);
+	i40e_xdp_ring_update_tail(xdp_ring);
+
+	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
+
+	return nb_pkts < budget;
+}
+
+/**
+ * i40e_clean_xdp_tx_buffer - Frees and unmaps an XDP Tx entry
+ * @tx_ring: XDP Tx ring
+ * @tx_bi: Tx buffer info to clean
+ **/
+static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
+				     struct i40e_tx_buffer *tx_bi)
+{
+	xdp_return_frame(tx_bi->xdpf);
+	tx_ring->xdp_tx_active--;
+	dma_unmap_single(tx_ring->dev,
+			 dma_unmap_addr(tx_bi, dma),
+			 dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_bi, len, 0);
+}
+
+/**
+ * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries
+ * @vsi: Current VSI
+ * @tx_ring: XDP Tx ring
+ *
+ * Returns true if cleanup/transmission is done.
+ **/
+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
+{
+	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
+	u32 i, completed_frames, xsk_frames = 0;
+	u32 head_idx = i40e_get_head(tx_ring);
+	struct i40e_tx_buffer *tx_bi;
+	unsigned int ntc;
+
+	if (head_idx < tx_ring->next_to_clean)
+		head_idx += tx_ring->count;
+	completed_frames = head_idx - tx_ring->next_to_clean;
+
+	if (completed_frames == 0)
+		goto out_xmit;
+
+	if (likely(!tx_ring->xdp_tx_active)) {
+		xsk_frames = completed_frames;
+		goto skip;
+	}
+
+	ntc = tx_ring->next_to_clean;
+
+	for (i = 0; i < completed_frames; i++) {
+		tx_bi = &tx_ring->tx_bi[ntc];
+
+		if (tx_bi->xdpf) {
+			i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
+			tx_bi->xdpf = NULL;
+		} else {
+			xsk_frames++;
+		}
+
+		if (++ntc >= tx_ring->count)
+			ntc = 0;
+	}
+
+skip:
+	tx_ring->next_to_clean += completed_frames;
+	if (unlikely(tx_ring->next_to_clean >= tx_ring->count))
+		tx_ring->next_to_clean -= tx_ring->count;
+
+	if (xsk_frames)
+		xsk_tx_completed(bp, xsk_frames);
+
+	i40e_arm_wb(tx_ring, vsi, completed_frames);
+
+out_xmit:
+	if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
+		xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
+
+	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
+}
+
+/**
+ * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup
+ * @dev: the netdevice
+ * @queue_id: queue id to wake up
+ * @flags: ignored in our case since we have Rx and Tx in the same NAPI.
+ *
+ * Returns <0 for errors, 0 otherwise.
+ **/
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+	struct i40e_netdev_priv *np = netdev_priv(dev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_ring *ring;
+
+	if (test_bit(__I40E_CONFIG_BUSY, pf->state))
+		return -EAGAIN;
+
+	if (test_bit(__I40E_VSI_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!i40e_enabled_xdp_vsi(vsi))
+		return -EINVAL;
+
+	if (queue_id >= vsi->num_queue_pairs)
+		return -EINVAL;
+
+	if (!vsi->xdp_rings[queue_id]->xsk_pool)
+		return -EINVAL;
+
+	ring = vsi->xdp_rings[queue_id];
+
+	/* The idea here is that if NAPI is running, mark a miss, so
+	 * it will run again. If not, trigger an interrupt and
+	 * schedule the NAPI from interrupt context. If NAPI would be
+	 * scheduled here, the interrupt affinity would not be
+	 * honored.
+	 */
+	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi))
+		i40e_force_wb(vsi, ring->q_vector);
+
+	return 0;
+}
+
+void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
+{
+	u16 ntc = rx_ring->next_to_clean;
+	u16 ntu = rx_ring->next_to_use;
+
+	while (ntc != ntu) {
+		struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, ntc);
+
+		xsk_buff_free(rx_bi);
+		ntc++;
+		if (ntc >= rx_ring->count)
+			ntc = 0;
+	}
+}
+
+/**
+ * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown
+ * @tx_ring: XDP Tx ring
+ **/
+void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
+{
+	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
+	struct i40e_tx_buffer *tx_bi;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		tx_bi = &tx_ring->tx_bi[ntc];
+
+		if (tx_bi->xdpf)
+			i40e_clean_xdp_tx_buffer(tx_ring, tx_bi);
+		else
+			xsk_frames++;
+
+		tx_bi->xdpf = NULL;
+
+		ntc++;
+		if (ntc >= tx_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_tx_completed(bp, xsk_frames);
+}
+
+/**
+ * i40e_xsk_any_rx_ring_enabled - Checks if Rx rings have an AF_XDP
+ * buffer pool attached
+ * @vsi: vsi
+ *
+ * Returns true if any of the Rx rings has an AF_XDP buffer pool attached
+ **/
+bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	int i;
+
+	for (i = 0; i < vsi->num_queue_pairs; i++) {
+		if (xsk_get_pool_from_qid(netdev, i))
+			return true;
+	}
+
+	return false;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
new file mode 100644
index 0000000000..821df248f8
--- /dev/null
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Intel Corporation. */
+
+#ifndef _I40E_XSK_H_
+#define _I40E_XSK_H_
+
+/* This value should match the pragma in the loop_unrolled_for
+ * macro. Why 4? It is strictly empirical. It seems to be a good
+ * compromise between the advantage of having simultaneous outstanding
+ * reads to the DMA array that can hide each others latency and the
+ * disadvantage of having a larger code path.
+ */
+#define PKTS_PER_BATCH 4
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(4)") for
+#elif __GNUC__ >= 8
+#define loop_unrolled_for _Pragma("GCC unroll 4") for
+#else
+#define loop_unrolled_for for
+#endif
+
+struct i40e_vsi;
+struct xsk_buff_pool;
+
+int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
+int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
+int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
+			u16 qid);
+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
+int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
+
+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
+void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
+
+#endif /* _I40E_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile
new file mode 100644
index 0000000000..9c3e45c54d
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2013 - 2018 Intel Corporation.
+#
+# Makefile for the Intel(R) Ethernet Adaptive Virtual Function (iavf)
+# driver
+#
+#
+
+ccflags-y += -I$(src)
+subdir-ccflags-y += -I$(src)
+
+obj-$(CONFIG_IAVF) += iavf.o
+
+iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o iavf_fdir.o \
+	     iavf_adv_rss.o \
+	     iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
new file mode 100644
index 0000000000..431d9d62c8
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -0,0 +1,592 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_H_
+#define _IAVF_H_
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/socket.h>
+#include <linux/jiffies.h>
+#include <net/ip6_checksum.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+#include <net/udp.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "iavf_type.h"
+#include <linux/avf/virtchnl.h>
+#include "iavf_txrx.h"
+#include "iavf_fdir.h"
+#include "iavf_adv_rss.h"
+#include <linux/bitmap.h>
+
+#define DEFAULT_DEBUG_LEVEL_SHIFT 3
+#define PFX "iavf: "
+
+int iavf_status_to_errno(enum iavf_status status);
+int virtchnl_status_to_errno(enum virtchnl_status_code v_status);
+
+/* VSI state flags shared with common code */
+enum iavf_vsi_state_t {
+	__IAVF_VSI_DOWN,
+	/* This must be last as it determines the size of the BITMAP */
+	__IAVF_VSI_STATE_SIZE__,
+};
+
+/* dummy struct to make common code less painful */
+struct iavf_vsi {
+	struct iavf_adapter *back;
+	struct net_device *netdev;
+	u16 seid;
+	u16 id;
+	DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
+	int base_vector;
+	u16 qs_handle;
+	void *priv;     /* client driver data reference. */
+};
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IAVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+#define IAVF_DEFAULT_TXD	512
+#define IAVF_DEFAULT_RXD	512
+#define IAVF_MAX_TXD		4096
+#define IAVF_MIN_TXD		64
+#define IAVF_MAX_RXD		4096
+#define IAVF_MIN_RXD		64
+#define IAVF_REQ_DESCRIPTOR_MULTIPLE	32
+#define IAVF_MAX_AQ_BUF_SIZE	4096
+#define IAVF_AQ_LEN		32
+#define IAVF_AQ_MAX_ERR	20 /* times to try before resetting AQ */
+
+#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+
+#define IAVF_RX_DESC(R, i) (&(((union iavf_32byte_rx_desc *)((R)->desc))[i]))
+#define IAVF_TX_DESC(R, i) (&(((struct iavf_tx_desc *)((R)->desc))[i]))
+#define IAVF_TX_CTXTDESC(R, i) \
+	(&(((struct iavf_tx_context_desc *)((R)->desc))[i]))
+#define IAVF_MAX_REQ_QUEUES 16
+
+#define IAVF_HKEY_ARRAY_SIZE ((IAVF_VFQF_HKEY_MAX_INDEX + 1) * 4)
+#define IAVF_HLUT_ARRAY_SIZE ((IAVF_VFQF_HLUT_MAX_INDEX + 1) * 4)
+#define IAVF_MBPS_DIVISOR	125000 /* divisor to convert to Mbps */
+#define IAVF_MBPS_QUANTA	50
+
+#define IAVF_VIRTCHNL_VF_RESOURCE_SIZE					\
+	virtchnl_struct_size((struct virtchnl_vf_resource *)NULL,	\
+			     vsi_res, IAVF_MAX_VF_VSI)
+
+/* MAX_MSIX_Q_VECTORS of these are allocated,
+ * but we only use one per queue-specific vector.
+ */
+struct iavf_q_vector {
+	struct iavf_adapter *adapter;
+	struct iavf_vsi *vsi;
+	struct napi_struct napi;
+	struct iavf_ring_container rx;
+	struct iavf_ring_container tx;
+	u32 ring_mask;
+	u8 itr_countdown;	/* when 0 should adjust adaptive ITR */
+	u8 num_ringpairs;	/* total number of ring pairs in vector */
+	u16 v_idx;		/* index in the vsi->q_vector array. */
+	u16 reg_idx;		/* register index of the interrupt */
+	char name[IFNAMSIZ + 15];
+	bool arm_wb_state;
+	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
+};
+
+/* Helper macros to switch between ints/sec and what the register uses.
+ * And yes, it's the same math going both ways.  The lowest value
+ * supported by all of the iavf hardware is 8.
+ */
+#define EITR_INTS_PER_SEC_TO_REG(_eitr) \
+	((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8)
+#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG
+
+#define IAVF_DESC_UNUSED(R) \
+	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	(R)->next_to_clean - (R)->next_to_use - 1)
+
+#define OTHER_VECTOR 1
+#define NONQ_VECS (OTHER_VECTOR)
+
+#define MIN_MSIX_Q_VECTORS 1
+#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NONQ_VECS)
+
+#define IAVF_QUEUE_END_OF_LIST 0x7FF
+#define IAVF_FREE_VECTOR 0x7FFF
+struct iavf_mac_filter {
+	struct list_head list;
+	u8 macaddr[ETH_ALEN];
+	struct {
+		u8 is_new_mac:1;    /* filter is new, wait for PF decision */
+		u8 remove:1;        /* filter needs to be removed */
+		u8 add:1;           /* filter needs to be added */
+		u8 is_primary:1;    /* filter is a default VF MAC */
+		u8 add_handled:1;   /* received response for filter add */
+		u8 padding:3;
+	};
+};
+
+#define IAVF_VLAN(vid, tpid) ((struct iavf_vlan){ vid, tpid })
+struct iavf_vlan {
+	u16 vid;
+	u16 tpid;
+};
+
+enum iavf_vlan_state_t {
+	IAVF_VLAN_INVALID,
+	IAVF_VLAN_ADD,		/* filter needs to be added */
+	IAVF_VLAN_IS_NEW,	/* filter is new, wait for PF answer */
+	IAVF_VLAN_ACTIVE,	/* filter is accepted by PF */
+	IAVF_VLAN_DISABLE,	/* filter needs to be deleted by PF, then marked INACTIVE */
+	IAVF_VLAN_INACTIVE,	/* filter is inactive, we are in IFF_DOWN */
+	IAVF_VLAN_REMOVE,	/* filter needs to be removed from list */
+};
+
+struct iavf_vlan_filter {
+	struct list_head list;
+	struct iavf_vlan vlan;
+	enum iavf_vlan_state_t state;
+};
+
+#define IAVF_MAX_TRAFFIC_CLASS	4
+/* State of traffic class creation */
+enum iavf_tc_state_t {
+	__IAVF_TC_INVALID, /* no traffic class, default state */
+	__IAVF_TC_RUNNING, /* traffic classes have been created */
+};
+
+/* channel info */
+struct iavf_channel_config {
+	struct virtchnl_channel_info ch_info[IAVF_MAX_TRAFFIC_CLASS];
+	enum iavf_tc_state_t state;
+	u8 total_qps;
+};
+
+/* State of cloud filter */
+enum iavf_cloud_filter_state_t {
+	__IAVF_CF_INVALID,	 /* cloud filter not added */
+	__IAVF_CF_ADD_PENDING, /* cloud filter pending add by the PF */
+	__IAVF_CF_DEL_PENDING, /* cloud filter pending del by the PF */
+	__IAVF_CF_ACTIVE,	 /* cloud filter is active */
+};
+
+/* Driver state. The order of these is important! */
+enum iavf_state_t {
+	__IAVF_STARTUP,		/* driver loaded, probe complete */
+	__IAVF_REMOVE,		/* driver is being unloaded */
+	__IAVF_INIT_VERSION_CHECK,	/* aq msg sent, awaiting reply */
+	__IAVF_INIT_GET_RESOURCES,	/* aq msg sent, awaiting reply */
+	__IAVF_INIT_EXTENDED_CAPS,	/* process extended caps which require aq msg exchange */
+	__IAVF_INIT_CONFIG_ADAPTER,
+	__IAVF_INIT_SW,		/* got resources, setting up structs */
+	__IAVF_INIT_FAILED,	/* init failed, restarting procedure */
+	__IAVF_RESETTING,		/* in reset */
+	__IAVF_COMM_FAILED,		/* communication with PF failed */
+	/* Below here, watchdog is running */
+	__IAVF_DOWN,			/* ready, can be opened */
+	__IAVF_DOWN_PENDING,		/* descending, waiting for watchdog */
+	__IAVF_TESTING,		/* in ethtool self-test */
+	__IAVF_RUNNING,		/* opened, working */
+};
+
+enum iavf_critical_section_t {
+	__IAVF_IN_REMOVE_TASK,	/* device being removed */
+};
+
+#define IAVF_CLOUD_FIELD_OMAC		0x01
+#define IAVF_CLOUD_FIELD_IMAC		0x02
+#define IAVF_CLOUD_FIELD_IVLAN	0x04
+#define IAVF_CLOUD_FIELD_TEN_ID	0x08
+#define IAVF_CLOUD_FIELD_IIP		0x10
+
+#define IAVF_CF_FLAGS_OMAC	IAVF_CLOUD_FIELD_OMAC
+#define IAVF_CF_FLAGS_IMAC	IAVF_CLOUD_FIELD_IMAC
+#define IAVF_CF_FLAGS_IMAC_IVLAN	(IAVF_CLOUD_FIELD_IMAC |\
+					 IAVF_CLOUD_FIELD_IVLAN)
+#define IAVF_CF_FLAGS_IMAC_TEN_ID	(IAVF_CLOUD_FIELD_IMAC |\
+					 IAVF_CLOUD_FIELD_TEN_ID)
+#define IAVF_CF_FLAGS_OMAC_TEN_ID_IMAC	(IAVF_CLOUD_FIELD_OMAC |\
+						 IAVF_CLOUD_FIELD_IMAC |\
+						 IAVF_CLOUD_FIELD_TEN_ID)
+#define IAVF_CF_FLAGS_IMAC_IVLAN_TEN_ID	(IAVF_CLOUD_FIELD_IMAC |\
+						 IAVF_CLOUD_FIELD_IVLAN |\
+						 IAVF_CLOUD_FIELD_TEN_ID)
+#define IAVF_CF_FLAGS_IIP	IAVF_CLOUD_FIELD_IIP
+
+/* bookkeeping of cloud filters */
+struct iavf_cloud_filter {
+	enum iavf_cloud_filter_state_t state;
+	struct list_head list;
+	struct virtchnl_filter f;
+	unsigned long cookie;
+	bool del;		/* filter needs to be deleted */
+	bool add;		/* filter needs to be added */
+};
+
+#define IAVF_RESET_WAIT_MS 10
+#define IAVF_RESET_WAIT_DETECTED_COUNT 500
+#define IAVF_RESET_WAIT_COMPLETE_COUNT 2000
+
+/* board specific private data structure */
+struct iavf_adapter {
+	struct workqueue_struct *wq;
+	struct work_struct reset_task;
+	struct work_struct adminq_task;
+	struct work_struct finish_config;
+	struct delayed_work client_task;
+	wait_queue_head_t down_waitqueue;
+	wait_queue_head_t reset_waitqueue;
+	wait_queue_head_t vc_waitqueue;
+	struct iavf_q_vector *q_vectors;
+	struct list_head vlan_filter_list;
+	int num_vlan_filters;
+	struct list_head mac_filter_list;
+	struct mutex crit_lock;
+	struct mutex client_lock;
+	/* Lock to protect accesses to MAC and VLAN lists */
+	spinlock_t mac_vlan_list_lock;
+	char misc_vector_name[IFNAMSIZ + 9];
+	int num_active_queues;
+	int num_req_queues;
+
+	/* TX */
+	struct iavf_ring *tx_rings;
+	u32 tx_timeout_count;
+	u32 tx_desc_count;
+
+	/* RX */
+	struct iavf_ring *rx_rings;
+	u64 hw_csum_rx_error;
+	u32 rx_desc_count;
+	int num_msix_vectors;
+	int num_rdma_msix;
+	int rdma_base_vector;
+	u32 client_pending;
+	struct iavf_client_instance *cinst;
+	struct msix_entry *msix_entries;
+
+	u32 flags;
+#define IAVF_FLAG_RX_CSUM_ENABLED		BIT(0)
+#define IAVF_FLAG_PF_COMMS_FAILED		BIT(3)
+#define IAVF_FLAG_RESET_PENDING		BIT(4)
+#define IAVF_FLAG_RESET_NEEDED		BIT(5)
+#define IAVF_FLAG_WB_ON_ITR_CAPABLE		BIT(6)
+#define IAVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(9)
+#define IAVF_FLAG_CLIENT_NEEDS_OPEN		BIT(10)
+#define IAVF_FLAG_CLIENT_NEEDS_CLOSE		BIT(11)
+#define IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS	BIT(12)
+#define IAVF_FLAG_LEGACY_RX			BIT(15)
+#define IAVF_FLAG_REINIT_ITR_NEEDED		BIT(16)
+#define IAVF_FLAG_QUEUES_DISABLED		BIT(17)
+#define IAVF_FLAG_SETUP_NETDEV_FEATURES		BIT(18)
+#define IAVF_FLAG_REINIT_MSIX_NEEDED		BIT(20)
+#define IAVF_FLAG_FDIR_ENABLED			BIT(21)
+/* duplicates for common code */
+#define IAVF_FLAG_DCB_ENABLED			0
+	/* flags for admin queue service task */
+	u64 aq_required;
+#define IAVF_FLAG_AQ_ENABLE_QUEUES		BIT_ULL(0)
+#define IAVF_FLAG_AQ_DISABLE_QUEUES		BIT_ULL(1)
+#define IAVF_FLAG_AQ_ADD_MAC_FILTER		BIT_ULL(2)
+#define IAVF_FLAG_AQ_ADD_VLAN_FILTER		BIT_ULL(3)
+#define IAVF_FLAG_AQ_DEL_MAC_FILTER		BIT_ULL(4)
+#define IAVF_FLAG_AQ_DEL_VLAN_FILTER		BIT_ULL(5)
+#define IAVF_FLAG_AQ_CONFIGURE_QUEUES		BIT_ULL(6)
+#define IAVF_FLAG_AQ_MAP_VECTORS		BIT_ULL(7)
+#define IAVF_FLAG_AQ_HANDLE_RESET		BIT_ULL(8)
+#define IAVF_FLAG_AQ_CONFIGURE_RSS		BIT_ULL(9) /* direct AQ config */
+#define IAVF_FLAG_AQ_GET_CONFIG			BIT_ULL(10)
+/* Newer style, RSS done by the PF so we can ignore hardware vagaries. */
+#define IAVF_FLAG_AQ_GET_HENA			BIT_ULL(11)
+#define IAVF_FLAG_AQ_SET_HENA			BIT_ULL(12)
+#define IAVF_FLAG_AQ_SET_RSS_KEY		BIT_ULL(13)
+#define IAVF_FLAG_AQ_SET_RSS_LUT		BIT_ULL(14)
+#define IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE	BIT_ULL(15)
+#define IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING	BIT_ULL(19)
+#define IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING	BIT_ULL(20)
+#define IAVF_FLAG_AQ_ENABLE_CHANNELS		BIT_ULL(21)
+#define IAVF_FLAG_AQ_DISABLE_CHANNELS		BIT_ULL(22)
+#define IAVF_FLAG_AQ_ADD_CLOUD_FILTER		BIT_ULL(23)
+#define IAVF_FLAG_AQ_DEL_CLOUD_FILTER		BIT_ULL(24)
+#define IAVF_FLAG_AQ_ADD_FDIR_FILTER		BIT_ULL(25)
+#define IAVF_FLAG_AQ_DEL_FDIR_FILTER		BIT_ULL(26)
+#define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG		BIT_ULL(27)
+#define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG		BIT_ULL(28)
+#define IAVF_FLAG_AQ_REQUEST_STATS		BIT_ULL(29)
+#define IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS	BIT_ULL(30)
+#define IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_STRIPPING		BIT_ULL(31)
+#define IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_STRIPPING	BIT_ULL(32)
+#define IAVF_FLAG_AQ_ENABLE_STAG_VLAN_STRIPPING		BIT_ULL(33)
+#define IAVF_FLAG_AQ_DISABLE_STAG_VLAN_STRIPPING	BIT_ULL(34)
+#define IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_INSERTION		BIT_ULL(35)
+#define IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_INSERTION	BIT_ULL(36)
+#define IAVF_FLAG_AQ_ENABLE_STAG_VLAN_INSERTION		BIT_ULL(37)
+#define IAVF_FLAG_AQ_DISABLE_STAG_VLAN_INSERTION	BIT_ULL(38)
+
+	/* flags for processing extended capability messages during
+	 * __IAVF_INIT_EXTENDED_CAPS. Each capability exchange requires
+	 * both a SEND and a RECV step, which must be processed in sequence.
+	 *
+	 * During the __IAVF_INIT_EXTENDED_CAPS state, the driver will
+	 * process one flag at a time during each state loop.
+	 */
+	u64 extended_caps;
+#define IAVF_EXTENDED_CAP_SEND_VLAN_V2			BIT_ULL(0)
+#define IAVF_EXTENDED_CAP_RECV_VLAN_V2			BIT_ULL(1)
+
+#define IAVF_EXTENDED_CAPS				\
+	(IAVF_EXTENDED_CAP_SEND_VLAN_V2 |		\
+	 IAVF_EXTENDED_CAP_RECV_VLAN_V2)
+
+	/* Lock to prevent possible clobbering of
+	 * current_netdev_promisc_flags
+	 */
+	spinlock_t current_netdev_promisc_flags_lock;
+	netdev_features_t current_netdev_promisc_flags;
+
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	struct iavf_hw hw; /* defined in iavf_type.h */
+
+	enum iavf_state_t state;
+	enum iavf_state_t last_state;
+	unsigned long crit_section;
+
+	struct delayed_work watchdog_task;
+	bool netdev_registered;
+	bool link_up;
+	enum virtchnl_link_speed link_speed;
+	/* This is only populated if the VIRTCHNL_VF_CAP_ADV_LINK_SPEED is set
+	 * in vf_res->vf_cap_flags. Use ADV_LINK_SUPPORT macro to determine if
+	 * this field is valid. This field should be used going forward and the
+	 * enum virtchnl_link_speed above should be considered the legacy way of
+	 * storing/communicating link speeds.
+	 */
+	u32 link_speed_mbps;
+
+	enum virtchnl_ops current_op;
+#define CLIENT_ALLOWED(_a) ((_a)->vf_res ? \
+			    (_a)->vf_res->vf_cap_flags & \
+				VIRTCHNL_VF_OFFLOAD_RDMA : \
+			    0)
+#define CLIENT_ENABLED(_a) ((_a)->cinst)
+/* RSS by the PF should be preferred over RSS via other methods. */
+#define RSS_PF(_a) ((_a)->vf_res->vf_cap_flags & \
+		    VIRTCHNL_VF_OFFLOAD_RSS_PF)
+#define RSS_AQ(_a) ((_a)->vf_res->vf_cap_flags & \
+		    VIRTCHNL_VF_OFFLOAD_RSS_AQ)
+#define RSS_REG(_a) (!((_a)->vf_res->vf_cap_flags & \
+		       (VIRTCHNL_VF_OFFLOAD_RSS_AQ | \
+			VIRTCHNL_VF_OFFLOAD_RSS_PF)))
+#define VLAN_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
+			  VIRTCHNL_VF_OFFLOAD_VLAN)
+#define VLAN_V2_ALLOWED(_a) ((_a)->vf_res->vf_cap_flags & \
+			     VIRTCHNL_VF_OFFLOAD_VLAN_V2)
+#define VLAN_V2_FILTERING_ALLOWED(_a) \
+	(VLAN_V2_ALLOWED((_a)) && \
+	 ((_a)->vlan_v2_caps.filtering.filtering_support.outer || \
+	  (_a)->vlan_v2_caps.filtering.filtering_support.inner))
+#define VLAN_FILTERING_ALLOWED(_a) \
+	(VLAN_ALLOWED((_a)) || VLAN_V2_FILTERING_ALLOWED((_a)))
+#define ADV_LINK_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
+			      VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+#define FDIR_FLTR_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
+			       VIRTCHNL_VF_OFFLOAD_FDIR_PF)
+#define ADV_RSS_SUPPORT(_a) ((_a)->vf_res->vf_cap_flags & \
+			     VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF)
+	struct virtchnl_vf_resource *vf_res; /* incl. all VSIs */
+	struct virtchnl_vsi_resource *vsi_res; /* our LAN VSI */
+	struct virtchnl_version_info pf_version;
+#define PF_IS_V11(_a) (((_a)->pf_version.major == 1) && \
+		       ((_a)->pf_version.minor == 1))
+	struct virtchnl_vlan_caps vlan_v2_caps;
+	u16 msg_enable;
+	struct iavf_eth_stats current_stats;
+	struct iavf_vsi vsi;
+	u32 aq_wait_count;
+	/* RSS stuff */
+	u64 hena;
+	u16 rss_key_size;
+	u16 rss_lut_size;
+	u8 *rss_key;
+	u8 *rss_lut;
+	/* ADQ related members */
+	struct iavf_channel_config ch_config;
+	u8 num_tc;
+	struct list_head cloud_filter_list;
+	/* lock to protect access to the cloud filter list */
+	spinlock_t cloud_filter_list_lock;
+	u16 num_cloud_filters;
+	/* snapshot of "num_active_queues" before setup_tc for qdisc add
+	 * is invoked. This information is useful during qdisc del flow,
+	 * to restore correct number of queues
+	 */
+	int orig_num_active_queues;
+
+#define IAVF_MAX_FDIR_FILTERS 128	/* max allowed Flow Director filters */
+	u16 fdir_active_fltr;
+	struct list_head fdir_list_head;
+	spinlock_t fdir_fltr_lock;	/* protect the Flow Director filter list */
+
+	struct list_head adv_rss_list_head;
+	spinlock_t adv_rss_lock;	/* protect the RSS management list */
+};
+
+
+/* Ethtool Private Flags */
+
+/* lan device, used by client interface */
+struct iavf_device {
+	struct list_head list;
+	struct iavf_adapter *vf;
+};
+
+/* needed by iavf_ethtool.c */
+extern char iavf_driver_name[];
+
+static inline const char *iavf_state_str(enum iavf_state_t state)
+{
+	switch (state) {
+	case __IAVF_STARTUP:
+		return "__IAVF_STARTUP";
+	case __IAVF_REMOVE:
+		return "__IAVF_REMOVE";
+	case __IAVF_INIT_VERSION_CHECK:
+		return "__IAVF_INIT_VERSION_CHECK";
+	case __IAVF_INIT_GET_RESOURCES:
+		return "__IAVF_INIT_GET_RESOURCES";
+	case __IAVF_INIT_EXTENDED_CAPS:
+		return "__IAVF_INIT_EXTENDED_CAPS";
+	case __IAVF_INIT_CONFIG_ADAPTER:
+		return "__IAVF_INIT_CONFIG_ADAPTER";
+	case __IAVF_INIT_SW:
+		return "__IAVF_INIT_SW";
+	case __IAVF_INIT_FAILED:
+		return "__IAVF_INIT_FAILED";
+	case __IAVF_RESETTING:
+		return "__IAVF_RESETTING";
+	case __IAVF_COMM_FAILED:
+		return "__IAVF_COMM_FAILED";
+	case __IAVF_DOWN:
+		return "__IAVF_DOWN";
+	case __IAVF_DOWN_PENDING:
+		return "__IAVF_DOWN_PENDING";
+	case __IAVF_TESTING:
+		return "__IAVF_TESTING";
+	case __IAVF_RUNNING:
+		return "__IAVF_RUNNING";
+	default:
+		return "__IAVF_UNKNOWN_STATE";
+	}
+}
+
+static inline void iavf_change_state(struct iavf_adapter *adapter,
+				     enum iavf_state_t state)
+{
+	if (adapter->state != state) {
+		adapter->last_state = adapter->state;
+		adapter->state = state;
+	}
+	dev_dbg(&adapter->pdev->dev,
+		"state transition from:%s to:%s\n",
+		iavf_state_str(adapter->last_state),
+		iavf_state_str(adapter->state));
+}
+
+int iavf_up(struct iavf_adapter *adapter);
+void iavf_down(struct iavf_adapter *adapter);
+int iavf_process_config(struct iavf_adapter *adapter);
+int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
+void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags);
+void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags);
+void iavf_schedule_finish_config(struct iavf_adapter *adapter);
+void iavf_reset(struct iavf_adapter *adapter);
+void iavf_set_ethtool_ops(struct net_device *netdev);
+void iavf_update_stats(struct iavf_adapter *adapter);
+void iavf_free_all_tx_resources(struct iavf_adapter *adapter);
+void iavf_free_all_rx_resources(struct iavf_adapter *adapter);
+
+void iavf_napi_add_all(struct iavf_adapter *adapter);
+void iavf_napi_del_all(struct iavf_adapter *adapter);
+
+int iavf_send_api_ver(struct iavf_adapter *adapter);
+int iavf_verify_api_ver(struct iavf_adapter *adapter);
+int iavf_send_vf_config_msg(struct iavf_adapter *adapter);
+int iavf_get_vf_config(struct iavf_adapter *adapter);
+int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter);
+int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
+void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
+void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
+void iavf_configure_queues(struct iavf_adapter *adapter);
+void iavf_deconfigure_queues(struct iavf_adapter *adapter);
+void iavf_enable_queues(struct iavf_adapter *adapter);
+void iavf_disable_queues(struct iavf_adapter *adapter);
+void iavf_map_queues(struct iavf_adapter *adapter);
+int iavf_request_queues(struct iavf_adapter *adapter, int num);
+void iavf_add_ether_addrs(struct iavf_adapter *adapter);
+void iavf_del_ether_addrs(struct iavf_adapter *adapter);
+void iavf_add_vlans(struct iavf_adapter *adapter);
+void iavf_del_vlans(struct iavf_adapter *adapter);
+void iavf_set_promiscuous(struct iavf_adapter *adapter);
+bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter);
+void iavf_request_stats(struct iavf_adapter *adapter);
+int iavf_request_reset(struct iavf_adapter *adapter);
+void iavf_get_hena(struct iavf_adapter *adapter);
+void iavf_set_hena(struct iavf_adapter *adapter);
+void iavf_set_rss_key(struct iavf_adapter *adapter);
+void iavf_set_rss_lut(struct iavf_adapter *adapter);
+void iavf_enable_vlan_stripping(struct iavf_adapter *adapter);
+void iavf_disable_vlan_stripping(struct iavf_adapter *adapter);
+void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+			      enum virtchnl_ops v_opcode,
+			      enum iavf_status v_retval, u8 *msg, u16 msglen);
+int iavf_config_rss(struct iavf_adapter *adapter);
+int iavf_lan_add_device(struct iavf_adapter *adapter);
+int iavf_lan_del_device(struct iavf_adapter *adapter);
+void iavf_client_subtask(struct iavf_adapter *adapter);
+void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len);
+void iavf_notify_client_l2_params(struct iavf_vsi *vsi);
+void iavf_notify_client_open(struct iavf_vsi *vsi);
+void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset);
+void iavf_enable_channels(struct iavf_adapter *adapter);
+void iavf_disable_channels(struct iavf_adapter *adapter);
+void iavf_add_cloud_filter(struct iavf_adapter *adapter);
+void iavf_del_cloud_filter(struct iavf_adapter *adapter);
+void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid);
+void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid);
+void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid);
+void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid);
+void iavf_add_fdir_filter(struct iavf_adapter *adapter);
+void iavf_del_fdir_filter(struct iavf_adapter *adapter);
+void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
+void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
+struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+					const u8 *macaddr);
+int iavf_wait_for_reset(struct iavf_adapter *adapter);
+#endif /* _IAVF_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.c b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
new file mode 100644
index 0000000000..9ffbd24d83
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.c
@@ -0,0 +1,946 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "iavf_status.h"
+#include "iavf_type.h"
+#include "iavf_register.h"
+#include "iavf_adminq.h"
+#include "iavf_prototype.h"
+
+/**
+ *  iavf_adminq_init_regs - Initialize AdminQ registers
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the alloc_asq and alloc_arq functions have already been called
+ **/
+static void iavf_adminq_init_regs(struct iavf_hw *hw)
+{
+	/* set head and tail registers in our local struct */
+	hw->aq.asq.tail = IAVF_VF_ATQT1;
+	hw->aq.asq.head = IAVF_VF_ATQH1;
+	hw->aq.asq.len  = IAVF_VF_ATQLEN1;
+	hw->aq.asq.bal  = IAVF_VF_ATQBAL1;
+	hw->aq.asq.bah  = IAVF_VF_ATQBAH1;
+	hw->aq.arq.tail = IAVF_VF_ARQT1;
+	hw->aq.arq.head = IAVF_VF_ARQH1;
+	hw->aq.arq.len  = IAVF_VF_ARQLEN1;
+	hw->aq.arq.bal  = IAVF_VF_ARQBAL1;
+	hw->aq.arq.bah  = IAVF_VF_ARQBAH1;
+}
+
+/**
+ *  iavf_alloc_adminq_asq_ring - Allocate Admin Queue send rings
+ *  @hw: pointer to the hardware structure
+ **/
+static enum iavf_status iavf_alloc_adminq_asq_ring(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code;
+
+	ret_code = iavf_allocate_dma_mem(hw, &hw->aq.asq.desc_buf,
+					 iavf_mem_atq_ring,
+					 (hw->aq.num_asq_entries *
+					 sizeof(struct iavf_aq_desc)),
+					 IAVF_ADMINQ_DESC_ALIGNMENT);
+	if (ret_code)
+		return ret_code;
+
+	ret_code = iavf_allocate_virt_mem(hw, &hw->aq.asq.cmd_buf,
+					  (hw->aq.num_asq_entries *
+					  sizeof(struct iavf_asq_cmd_details)));
+	if (ret_code) {
+		iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+		return ret_code;
+	}
+
+	return ret_code;
+}
+
+/**
+ *  iavf_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
+ *  @hw: pointer to the hardware structure
+ **/
+static enum iavf_status iavf_alloc_adminq_arq_ring(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code;
+
+	ret_code = iavf_allocate_dma_mem(hw, &hw->aq.arq.desc_buf,
+					 iavf_mem_arq_ring,
+					 (hw->aq.num_arq_entries *
+					 sizeof(struct iavf_aq_desc)),
+					 IAVF_ADMINQ_DESC_ALIGNMENT);
+
+	return ret_code;
+}
+
+/**
+ *  iavf_free_adminq_asq - Free Admin Queue send rings
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the posted send buffers have already been cleaned
+ *  and de-allocated
+ **/
+static void iavf_free_adminq_asq(struct iavf_hw *hw)
+{
+	iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+}
+
+/**
+ *  iavf_free_adminq_arq - Free Admin Queue receive rings
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the posted receive buffers have already been cleaned
+ *  and de-allocated
+ **/
+static void iavf_free_adminq_arq(struct iavf_hw *hw)
+{
+	iavf_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+}
+
+/**
+ *  iavf_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
+ *  @hw: pointer to the hardware structure
+ **/
+static enum iavf_status iavf_alloc_arq_bufs(struct iavf_hw *hw)
+{
+	struct iavf_aq_desc *desc;
+	struct iavf_dma_mem *bi;
+	enum iavf_status ret_code;
+	int i;
+
+	/* We'll be allocating the buffer info memory first, then we can
+	 * allocate the mapped buffers for the event processing
+	 */
+
+	/* buffer_info structures do not need alignment */
+	ret_code = iavf_allocate_virt_mem(hw, &hw->aq.arq.dma_head,
+					  (hw->aq.num_arq_entries *
+					   sizeof(struct iavf_dma_mem)));
+	if (ret_code)
+		goto alloc_arq_bufs;
+	hw->aq.arq.r.arq_bi = (struct iavf_dma_mem *)hw->aq.arq.dma_head.va;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < hw->aq.num_arq_entries; i++) {
+		bi = &hw->aq.arq.r.arq_bi[i];
+		ret_code = iavf_allocate_dma_mem(hw, bi,
+						 iavf_mem_arq_buf,
+						 hw->aq.arq_buf_size,
+						 IAVF_ADMINQ_DESC_ALIGNMENT);
+		if (ret_code)
+			goto unwind_alloc_arq_bufs;
+
+		/* now configure the descriptors for use */
+		desc = IAVF_ADMINQ_DESC(hw->aq.arq, i);
+
+		desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF);
+		if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF)
+			desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB);
+		desc->opcode = 0;
+		/* This is in accordance with Admin queue design, there is no
+		 * register for buffer size configuration
+		 */
+		desc->datalen = cpu_to_le16((u16)bi->size);
+		desc->retval = 0;
+		desc->cookie_high = 0;
+		desc->cookie_low = 0;
+		desc->params.external.addr_high =
+			cpu_to_le32(upper_32_bits(bi->pa));
+		desc->params.external.addr_low =
+			cpu_to_le32(lower_32_bits(bi->pa));
+		desc->params.external.param0 = 0;
+		desc->params.external.param1 = 0;
+	}
+
+alloc_arq_bufs:
+	return ret_code;
+
+unwind_alloc_arq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--)
+		iavf_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+	iavf_free_virt_mem(hw, &hw->aq.arq.dma_head);
+
+	return ret_code;
+}
+
+/**
+ *  iavf_alloc_asq_bufs - Allocate empty buffer structs for the send queue
+ *  @hw: pointer to the hardware structure
+ **/
+static enum iavf_status iavf_alloc_asq_bufs(struct iavf_hw *hw)
+{
+	struct iavf_dma_mem *bi;
+	enum iavf_status ret_code;
+	int i;
+
+	/* No mapped memory needed yet, just the buffer info structures */
+	ret_code = iavf_allocate_virt_mem(hw, &hw->aq.asq.dma_head,
+					  (hw->aq.num_asq_entries *
+					   sizeof(struct iavf_dma_mem)));
+	if (ret_code)
+		goto alloc_asq_bufs;
+	hw->aq.asq.r.asq_bi = (struct iavf_dma_mem *)hw->aq.asq.dma_head.va;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < hw->aq.num_asq_entries; i++) {
+		bi = &hw->aq.asq.r.asq_bi[i];
+		ret_code = iavf_allocate_dma_mem(hw, bi,
+						 iavf_mem_asq_buf,
+						 hw->aq.asq_buf_size,
+						 IAVF_ADMINQ_DESC_ALIGNMENT);
+		if (ret_code)
+			goto unwind_alloc_asq_bufs;
+	}
+alloc_asq_bufs:
+	return ret_code;
+
+unwind_alloc_asq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--)
+		iavf_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+	iavf_free_virt_mem(hw, &hw->aq.asq.dma_head);
+
+	return ret_code;
+}
+
+/**
+ *  iavf_free_arq_bufs - Free receive queue buffer info elements
+ *  @hw: pointer to the hardware structure
+ **/
+static void iavf_free_arq_bufs(struct iavf_hw *hw)
+{
+	int i;
+
+	/* free descriptors */
+	for (i = 0; i < hw->aq.num_arq_entries; i++)
+		iavf_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+
+	/* free the descriptor memory */
+	iavf_free_dma_mem(hw, &hw->aq.arq.desc_buf);
+
+	/* free the dma header */
+	iavf_free_virt_mem(hw, &hw->aq.arq.dma_head);
+}
+
+/**
+ *  iavf_free_asq_bufs - Free send queue buffer info elements
+ *  @hw: pointer to the hardware structure
+ **/
+static void iavf_free_asq_bufs(struct iavf_hw *hw)
+{
+	int i;
+
+	/* only unmap if the address is non-NULL */
+	for (i = 0; i < hw->aq.num_asq_entries; i++)
+		if (hw->aq.asq.r.asq_bi[i].pa)
+			iavf_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+
+	/* free the buffer info list */
+	iavf_free_virt_mem(hw, &hw->aq.asq.cmd_buf);
+
+	/* free the descriptor memory */
+	iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf);
+
+	/* free the dma header */
+	iavf_free_virt_mem(hw, &hw->aq.asq.dma_head);
+}
+
+/**
+ *  iavf_config_asq_regs - configure ASQ registers
+ *  @hw: pointer to the hardware structure
+ *
+ *  Configure base address and length registers for the transmit queue
+ **/
+static enum iavf_status iavf_config_asq_regs(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code = 0;
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, hw->aq.asq.head, 0);
+	wr32(hw, hw->aq.asq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, hw->aq.asq.len, (hw->aq.num_asq_entries |
+				  IAVF_VF_ATQLEN1_ATQENABLE_MASK));
+	wr32(hw, hw->aq.asq.bal, lower_32_bits(hw->aq.asq.desc_buf.pa));
+	wr32(hw, hw->aq.asq.bah, upper_32_bits(hw->aq.asq.desc_buf.pa));
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, hw->aq.asq.bal);
+	if (reg != lower_32_bits(hw->aq.asq.desc_buf.pa))
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
+
+	return ret_code;
+}
+
+/**
+ *  iavf_config_arq_regs - ARQ register configuration
+ *  @hw: pointer to the hardware structure
+ *
+ * Configure base address and length registers for the receive (event queue)
+ **/
+static enum iavf_status iavf_config_arq_regs(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code = 0;
+	u32 reg = 0;
+
+	/* Clear Head and Tail */
+	wr32(hw, hw->aq.arq.head, 0);
+	wr32(hw, hw->aq.arq.tail, 0);
+
+	/* set starting point */
+	wr32(hw, hw->aq.arq.len, (hw->aq.num_arq_entries |
+				  IAVF_VF_ARQLEN1_ARQENABLE_MASK));
+	wr32(hw, hw->aq.arq.bal, lower_32_bits(hw->aq.arq.desc_buf.pa));
+	wr32(hw, hw->aq.arq.bah, upper_32_bits(hw->aq.arq.desc_buf.pa));
+
+	/* Update tail in the HW to post pre-allocated buffers */
+	wr32(hw, hw->aq.arq.tail, hw->aq.num_arq_entries - 1);
+
+	/* Check one register to verify that config was applied */
+	reg = rd32(hw, hw->aq.arq.bal);
+	if (reg != lower_32_bits(hw->aq.arq.desc_buf.pa))
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
+
+	return ret_code;
+}
+
+/**
+ *  iavf_init_asq - main initialization routine for ASQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  This is the main initialization routine for the Admin Send Queue
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.arq_buf_size
+ *
+ *  Do *NOT* hold the lock when calling this as the memory allocation routines
+ *  called are not going to be atomic context safe
+ **/
+static enum iavf_status iavf_init_asq(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code = 0;
+	int i;
+
+	if (hw->aq.asq.count > 0) {
+		/* queue already initialized */
+		ret_code = IAVF_ERR_NOT_READY;
+		goto init_adminq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if ((hw->aq.num_asq_entries == 0) ||
+	    (hw->aq.asq_buf_size == 0)) {
+		ret_code = IAVF_ERR_CONFIG;
+		goto init_adminq_exit;
+	}
+
+	hw->aq.asq.next_to_use = 0;
+	hw->aq.asq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = iavf_alloc_adminq_asq_ring(hw);
+	if (ret_code)
+		goto init_adminq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = iavf_alloc_asq_bufs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
+
+	/* initialize base registers */
+	ret_code = iavf_config_asq_regs(hw);
+	if (ret_code)
+		goto init_free_asq_bufs;
+
+	/* success! */
+	hw->aq.asq.count = hw->aq.num_asq_entries;
+	goto init_adminq_exit;
+
+init_free_asq_bufs:
+	for (i = 0; i < hw->aq.num_asq_entries; i++)
+		iavf_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+	iavf_free_virt_mem(hw, &hw->aq.asq.dma_head);
+
+init_adminq_free_rings:
+	iavf_free_adminq_asq(hw);
+
+init_adminq_exit:
+	return ret_code;
+}
+
+/**
+ *  iavf_init_arq - initialize ARQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  The main initialization routine for the Admin Receive (Event) Queue.
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.arq_buf_size
+ *
+ *  Do *NOT* hold the lock when calling this as the memory allocation routines
+ *  called are not going to be atomic context safe
+ **/
+static enum iavf_status iavf_init_arq(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code = 0;
+	int i;
+
+	if (hw->aq.arq.count > 0) {
+		/* queue already initialized */
+		ret_code = IAVF_ERR_NOT_READY;
+		goto init_adminq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if ((hw->aq.num_arq_entries == 0) ||
+	    (hw->aq.arq_buf_size == 0)) {
+		ret_code = IAVF_ERR_CONFIG;
+		goto init_adminq_exit;
+	}
+
+	hw->aq.arq.next_to_use = 0;
+	hw->aq.arq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = iavf_alloc_adminq_arq_ring(hw);
+	if (ret_code)
+		goto init_adminq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = iavf_alloc_arq_bufs(hw);
+	if (ret_code)
+		goto init_adminq_free_rings;
+
+	/* initialize base registers */
+	ret_code = iavf_config_arq_regs(hw);
+	if (ret_code)
+		goto init_free_arq_bufs;
+
+	/* success! */
+	hw->aq.arq.count = hw->aq.num_arq_entries;
+	goto init_adminq_exit;
+
+init_free_arq_bufs:
+	for (i = 0; i < hw->aq.num_arq_entries; i++)
+		iavf_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+	iavf_free_virt_mem(hw, &hw->aq.arq.dma_head);
+init_adminq_free_rings:
+	iavf_free_adminq_arq(hw);
+
+init_adminq_exit:
+	return ret_code;
+}
+
+/**
+ *  iavf_shutdown_asq - shutdown the ASQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  The main shutdown routine for the Admin Send Queue
+ **/
+static enum iavf_status iavf_shutdown_asq(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code = 0;
+
+	mutex_lock(&hw->aq.asq_mutex);
+
+	if (hw->aq.asq.count == 0) {
+		ret_code = IAVF_ERR_NOT_READY;
+		goto shutdown_asq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, hw->aq.asq.head, 0);
+	wr32(hw, hw->aq.asq.tail, 0);
+	wr32(hw, hw->aq.asq.len, 0);
+	wr32(hw, hw->aq.asq.bal, 0);
+	wr32(hw, hw->aq.asq.bah, 0);
+
+	hw->aq.asq.count = 0; /* to indicate uninitialized queue */
+
+	/* free ring buffers */
+	iavf_free_asq_bufs(hw);
+
+shutdown_asq_out:
+	mutex_unlock(&hw->aq.asq_mutex);
+	return ret_code;
+}
+
+/**
+ *  iavf_shutdown_arq - shutdown ARQ
+ *  @hw: pointer to the hardware structure
+ *
+ *  The main shutdown routine for the Admin Receive Queue
+ **/
+static enum iavf_status iavf_shutdown_arq(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code = 0;
+
+	mutex_lock(&hw->aq.arq_mutex);
+
+	if (hw->aq.arq.count == 0) {
+		ret_code = IAVF_ERR_NOT_READY;
+		goto shutdown_arq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, hw->aq.arq.head, 0);
+	wr32(hw, hw->aq.arq.tail, 0);
+	wr32(hw, hw->aq.arq.len, 0);
+	wr32(hw, hw->aq.arq.bal, 0);
+	wr32(hw, hw->aq.arq.bah, 0);
+
+	hw->aq.arq.count = 0; /* to indicate uninitialized queue */
+
+	/* free ring buffers */
+	iavf_free_arq_bufs(hw);
+
+shutdown_arq_out:
+	mutex_unlock(&hw->aq.arq_mutex);
+	return ret_code;
+}
+
+/**
+ *  iavf_init_adminq - main initialization routine for Admin Queue
+ *  @hw: pointer to the hardware structure
+ *
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.num_arq_entries
+ *     - hw->aq.arq_buf_size
+ *     - hw->aq.asq_buf_size
+ **/
+enum iavf_status iavf_init_adminq(struct iavf_hw *hw)
+{
+	enum iavf_status ret_code;
+
+	/* verify input for valid configuration */
+	if ((hw->aq.num_arq_entries == 0) ||
+	    (hw->aq.num_asq_entries == 0) ||
+	    (hw->aq.arq_buf_size == 0) ||
+	    (hw->aq.asq_buf_size == 0)) {
+		ret_code = IAVF_ERR_CONFIG;
+		goto init_adminq_exit;
+	}
+
+	/* Set up register offsets */
+	iavf_adminq_init_regs(hw);
+
+	/* setup ASQ command write back timeout */
+	hw->aq.asq_cmd_timeout = IAVF_ASQ_CMD_TIMEOUT;
+
+	/* allocate the ASQ */
+	ret_code = iavf_init_asq(hw);
+	if (ret_code)
+		goto init_adminq_destroy_locks;
+
+	/* allocate the ARQ */
+	ret_code = iavf_init_arq(hw);
+	if (ret_code)
+		goto init_adminq_free_asq;
+
+	/* success! */
+	goto init_adminq_exit;
+
+init_adminq_free_asq:
+	iavf_shutdown_asq(hw);
+init_adminq_destroy_locks:
+
+init_adminq_exit:
+	return ret_code;
+}
+
+/**
+ *  iavf_shutdown_adminq - shutdown routine for the Admin Queue
+ *  @hw: pointer to the hardware structure
+ **/
+enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw)
+{
+	if (iavf_check_asq_alive(hw))
+		iavf_aq_queue_shutdown(hw, true);
+
+	iavf_shutdown_asq(hw);
+	iavf_shutdown_arq(hw);
+
+	return 0;
+}
+
+/**
+ *  iavf_clean_asq - cleans Admin send queue
+ *  @hw: pointer to the hardware structure
+ *
+ *  returns the number of free desc
+ **/
+static u16 iavf_clean_asq(struct iavf_hw *hw)
+{
+	struct iavf_adminq_ring *asq = &hw->aq.asq;
+	struct iavf_asq_cmd_details *details;
+	u16 ntc = asq->next_to_clean;
+	struct iavf_aq_desc desc_cb;
+	struct iavf_aq_desc *desc;
+
+	desc = IAVF_ADMINQ_DESC(*asq, ntc);
+	details = IAVF_ADMINQ_DETAILS(*asq, ntc);
+	while (rd32(hw, hw->aq.asq.head) != ntc) {
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
+			   "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head));
+
+		if (details->callback) {
+			IAVF_ADMINQ_CALLBACK cb_func =
+					(IAVF_ADMINQ_CALLBACK)details->callback;
+			desc_cb = *desc;
+			cb_func(hw, &desc_cb);
+		}
+		memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
+		memset((void *)details, 0,
+		       sizeof(struct iavf_asq_cmd_details));
+		ntc++;
+		if (ntc == asq->count)
+			ntc = 0;
+		desc = IAVF_ADMINQ_DESC(*asq, ntc);
+		details = IAVF_ADMINQ_DETAILS(*asq, ntc);
+	}
+
+	asq->next_to_clean = ntc;
+
+	return IAVF_DESC_UNUSED(asq);
+}
+
+/**
+ *  iavf_asq_done - check if FW has processed the Admin Send Queue
+ *  @hw: pointer to the hw struct
+ *
+ *  Returns true if the firmware has processed all descriptors on the
+ *  admin send queue. Returns false if there are still requests pending.
+ **/
+bool iavf_asq_done(struct iavf_hw *hw)
+{
+	/* AQ designers suggest use of head for better
+	 * timing reliability than DD bit
+	 */
+	return rd32(hw, hw->aq.asq.head) == hw->aq.asq.next_to_use;
+}
+
+/**
+ *  iavf_asq_send_command - send command to Admin Queue
+ *  @hw: pointer to the hw struct
+ *  @desc: prefilled descriptor describing the command (non DMA mem)
+ *  @buff: buffer to use for indirect commands
+ *  @buff_size: size of buffer for indirect commands
+ *  @cmd_details: pointer to command details structure
+ *
+ *  This is the main send command driver routine for the Admin Queue send
+ *  queue.  It runs the queue, cleans the queue, etc
+ **/
+enum iavf_status iavf_asq_send_command(struct iavf_hw *hw,
+				       struct iavf_aq_desc *desc,
+				       void *buff, /* can be NULL */
+				       u16  buff_size,
+				       struct iavf_asq_cmd_details *cmd_details)
+{
+	struct iavf_dma_mem *dma_buff = NULL;
+	struct iavf_asq_cmd_details *details;
+	struct iavf_aq_desc *desc_on_ring;
+	bool cmd_completed = false;
+	enum iavf_status status = 0;
+	u16  retval = 0;
+	u32  val = 0;
+
+	mutex_lock(&hw->aq.asq_mutex);
+
+	if (hw->aq.asq.count == 0) {
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
+			   "AQTX: Admin queue not initialized.\n");
+		status = IAVF_ERR_QUEUE_EMPTY;
+		goto asq_send_command_error;
+	}
+
+	hw->aq.asq_last_status = IAVF_AQ_RC_OK;
+
+	val = rd32(hw, hw->aq.asq.head);
+	if (val >= hw->aq.num_asq_entries) {
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
+			   "AQTX: head overrun at %d\n", val);
+		status = IAVF_ERR_QUEUE_EMPTY;
+		goto asq_send_command_error;
+	}
+
+	details = IAVF_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
+	if (cmd_details) {
+		*details = *cmd_details;
+
+		/* If the cmd_details are defined copy the cookie.  The
+		 * cpu_to_le32 is not needed here because the data is ignored
+		 * by the FW, only used by the driver
+		 */
+		if (details->cookie) {
+			desc->cookie_high =
+				cpu_to_le32(upper_32_bits(details->cookie));
+			desc->cookie_low =
+				cpu_to_le32(lower_32_bits(details->cookie));
+		}
+	} else {
+		memset(details, 0, sizeof(struct iavf_asq_cmd_details));
+	}
+
+	/* clear requested flags and then set additional flags if defined */
+	desc->flags &= ~cpu_to_le16(details->flags_dis);
+	desc->flags |= cpu_to_le16(details->flags_ena);
+
+	if (buff_size > hw->aq.asq_buf_size) {
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
+			   "AQTX: Invalid buffer size: %d.\n",
+			   buff_size);
+		status = IAVF_ERR_INVALID_SIZE;
+		goto asq_send_command_error;
+	}
+
+	if (details->postpone && !details->async) {
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
+			   "AQTX: Async flag not set along with postpone flag");
+		status = IAVF_ERR_PARAM;
+		goto asq_send_command_error;
+	}
+
+	/* call clean and check queue available function to reclaim the
+	 * descriptors that were processed by FW, the function returns the
+	 * number of desc available
+	 */
+	/* the clean function called here could be called in a separate thread
+	 * in case of asynchronous completions
+	 */
+	if (iavf_clean_asq(hw) == 0) {
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
+			   "AQTX: Error queue is full.\n");
+		status = IAVF_ERR_ADMIN_QUEUE_FULL;
+		goto asq_send_command_error;
+	}
+
+	/* initialize the temp desc pointer with the right desc */
+	desc_on_ring = IAVF_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use);
+
+	/* if the desc is available copy the temp desc to the right place */
+	*desc_on_ring = *desc;
+
+	/* if buff is not NULL assume indirect command */
+	if (buff) {
+		dma_buff = &hw->aq.asq.r.asq_bi[hw->aq.asq.next_to_use];
+		/* copy the user buff into the respective DMA buff */
+		memcpy(dma_buff->va, buff, buff_size);
+		desc_on_ring->datalen = cpu_to_le16(buff_size);
+
+		/* Update the address values in the desc with the pa value
+		 * for respective buffer
+		 */
+		desc_on_ring->params.external.addr_high =
+				cpu_to_le32(upper_32_bits(dma_buff->pa));
+		desc_on_ring->params.external.addr_low =
+				cpu_to_le32(lower_32_bits(dma_buff->pa));
+	}
+
+	/* bump the tail */
+	iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n");
+	iavf_debug_aq(hw, IAVF_DEBUG_AQ_COMMAND, (void *)desc_on_ring,
+		      buff, buff_size);
+	(hw->aq.asq.next_to_use)++;
+	if (hw->aq.asq.next_to_use == hw->aq.asq.count)
+		hw->aq.asq.next_to_use = 0;
+	if (!details->postpone)
+		wr32(hw, hw->aq.asq.tail, hw->aq.asq.next_to_use);
+
+	/* if cmd_details are not defined or async flag is not set,
+	 * we need to wait for desc write back
+	 */
+	if (!details->async && !details->postpone) {
+		u32 total_delay = 0;
+
+		do {
+			/* AQ designers suggest use of head for better
+			 * timing reliability than DD bit
+			 */
+			if (iavf_asq_done(hw))
+				break;
+			udelay(50);
+			total_delay += 50;
+		} while (total_delay < hw->aq.asq_cmd_timeout);
+	}
+
+	/* if ready, copy the desc back to temp */
+	if (iavf_asq_done(hw)) {
+		*desc = *desc_on_ring;
+		if (buff)
+			memcpy(buff, dma_buff->va, buff_size);
+		retval = le16_to_cpu(desc->retval);
+		if (retval != 0) {
+			iavf_debug(hw,
+				   IAVF_DEBUG_AQ_MESSAGE,
+				   "AQTX: Command completed with error 0x%X.\n",
+				   retval);
+
+			/* strip off FW internal code */
+			retval &= 0xff;
+		}
+		cmd_completed = true;
+		if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_OK)
+			status = 0;
+		else if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_EBUSY)
+			status = IAVF_ERR_NOT_READY;
+		else
+			status = IAVF_ERR_ADMIN_QUEUE_ERROR;
+		hw->aq.asq_last_status = (enum iavf_admin_queue_err)retval;
+	}
+
+	iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
+		   "AQTX: desc and buffer writeback:\n");
+	iavf_debug_aq(hw, IAVF_DEBUG_AQ_COMMAND, (void *)desc, buff, buff_size);
+
+	/* save writeback aq if requested */
+	if (details->wb_desc)
+		*details->wb_desc = *desc_on_ring;
+
+	/* update the error if time out occurred */
+	if ((!cmd_completed) &&
+	    (!details->async && !details->postpone)) {
+		if (rd32(hw, hw->aq.asq.len) & IAVF_VF_ATQLEN1_ATQCRIT_MASK) {
+			iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
+				   "AQTX: AQ Critical error.\n");
+			status = IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR;
+		} else {
+			iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
+				   "AQTX: Writeback timeout.\n");
+			status = IAVF_ERR_ADMIN_QUEUE_TIMEOUT;
+		}
+	}
+
+asq_send_command_error:
+	mutex_unlock(&hw->aq.asq_mutex);
+	return status;
+}
+
+/**
+ *  iavf_fill_default_direct_cmd_desc - AQ descriptor helper function
+ *  @desc:     pointer to the temp descriptor (non DMA mem)
+ *  @opcode:   the opcode can be used to decide which flags to turn off or on
+ *
+ *  Fill the desc with default values
+ **/
+void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode)
+{
+	/* zero out the desc */
+	memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flags = cpu_to_le16(IAVF_AQ_FLAG_SI);
+}
+
+/**
+ *  iavf_clean_arq_element
+ *  @hw: pointer to the hw struct
+ *  @e: event info from the receive descriptor, includes any buffers
+ *  @pending: number of events that could be left to process
+ *
+ *  This function cleans one Admin Receive Queue element and returns
+ *  the contents through e.  It can also return how many events are
+ *  left to process through 'pending'
+ **/
+enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
+					struct iavf_arq_event_info *e,
+					u16 *pending)
+{
+	u16 ntc = hw->aq.arq.next_to_clean;
+	struct iavf_aq_desc *desc;
+	enum iavf_status ret_code = 0;
+	struct iavf_dma_mem *bi;
+	u16 desc_idx;
+	u16 datalen;
+	u16 flags;
+	u16 ntu;
+
+	/* pre-clean the event info */
+	memset(&e->desc, 0, sizeof(e->desc));
+
+	/* take the lock before we start messing with the ring */
+	mutex_lock(&hw->aq.arq_mutex);
+
+	if (hw->aq.arq.count == 0) {
+		iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE,
+			   "AQRX: Admin queue not initialized.\n");
+		ret_code = IAVF_ERR_QUEUE_EMPTY;
+		goto clean_arq_element_err;
+	}
+
+	/* set next_to_use to head */
+	ntu = rd32(hw, hw->aq.arq.head) & IAVF_VF_ARQH1_ARQH_MASK;
+	if (ntu == ntc) {
+		/* nothing to do - shouldn't need to update ring's values */
+		ret_code = IAVF_ERR_ADMIN_QUEUE_NO_WORK;
+		goto clean_arq_element_out;
+	}
+
+	/* now clean the next descriptor */
+	desc = IAVF_ADMINQ_DESC(hw->aq.arq, ntc);
+	desc_idx = ntc;
+
+	hw->aq.arq_last_status =
+		(enum iavf_admin_queue_err)le16_to_cpu(desc->retval);
+	flags = le16_to_cpu(desc->flags);
+	if (flags & IAVF_AQ_FLAG_ERR) {
+		ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR;
+		iavf_debug(hw,
+			   IAVF_DEBUG_AQ_MESSAGE,
+			   "AQRX: Event received with error 0x%X.\n",
+			   hw->aq.arq_last_status);
+	}
+
+	e->desc = *desc;
+	datalen = le16_to_cpu(desc->datalen);
+	e->msg_len = min(datalen, e->buf_len);
+	if (e->msg_buf && (e->msg_len != 0))
+		memcpy(e->msg_buf, hw->aq.arq.r.arq_bi[desc_idx].va,
+		       e->msg_len);
+
+	iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQRX: desc and buffer:\n");
+	iavf_debug_aq(hw, IAVF_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf,
+		      hw->aq.arq_buf_size);
+
+	/* Restore the original datalen and buffer address in the desc,
+	 * FW updates datalen to indicate the event message
+	 * size
+	 */
+	bi = &hw->aq.arq.r.arq_bi[ntc];
+	memset((void *)desc, 0, sizeof(struct iavf_aq_desc));
+
+	desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF);
+	if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF)
+		desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB);
+	desc->datalen = cpu_to_le16((u16)bi->size);
+	desc->params.external.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+	desc->params.external.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+	/* set tail = the last cleaned desc index. */
+	wr32(hw, hw->aq.arq.tail, ntc);
+	/* ntc is updated to tail + 1 */
+	ntc++;
+	if (ntc == hw->aq.num_arq_entries)
+		ntc = 0;
+	hw->aq.arq.next_to_clean = ntc;
+	hw->aq.arq.next_to_use = ntu;
+
+clean_arq_element_out:
+	/* Set pending if needed, unlock and return */
+	if (pending)
+		*pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
+
+clean_arq_element_err:
+	mutex_unlock(&hw->aq.arq_mutex);
+
+	return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.h b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
new file mode 100644
index 0000000000..1f60518eb0
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_ADMINQ_H_
+#define _IAVF_ADMINQ_H_
+
+#include "iavf_osdep.h"
+#include "iavf_status.h"
+#include "iavf_adminq_cmd.h"
+
+#define IAVF_ADMINQ_DESC(R, i)   \
+	(&(((struct iavf_aq_desc *)((R).desc_buf.va))[i]))
+
+#define IAVF_ADMINQ_DESC_ALIGNMENT 4096
+
+struct iavf_adminq_ring {
+	struct iavf_virt_mem dma_head;	/* space for dma structures */
+	struct iavf_dma_mem desc_buf;	/* descriptor ring memory */
+	struct iavf_virt_mem cmd_buf;	/* command buffer memory */
+
+	union {
+		struct iavf_dma_mem *asq_bi;
+		struct iavf_dma_mem *arq_bi;
+	} r;
+
+	u16 count;		/* Number of descriptors */
+	u16 rx_buf_len;		/* Admin Receive Queue buffer length */
+
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	u32 len;
+	u32 bah;
+	u32 bal;
+};
+
+/* ASQ transaction details */
+struct iavf_asq_cmd_details {
+	void *callback; /* cast from type IAVF_ADMINQ_CALLBACK */
+	u64 cookie;
+	u16 flags_ena;
+	u16 flags_dis;
+	bool async;
+	bool postpone;
+	struct iavf_aq_desc *wb_desc;
+};
+
+#define IAVF_ADMINQ_DETAILS(R, i)   \
+	(&(((struct iavf_asq_cmd_details *)((R).cmd_buf.va))[i]))
+
+/* ARQ event information */
+struct iavf_arq_event_info {
+	struct iavf_aq_desc desc;
+	u16 msg_len;
+	u16 buf_len;
+	u8 *msg_buf;
+};
+
+/* Admin Queue information */
+struct iavf_adminq_info {
+	struct iavf_adminq_ring arq;    /* receive queue */
+	struct iavf_adminq_ring asq;    /* send queue */
+	u32 asq_cmd_timeout;            /* send queue cmd write back timeout*/
+	u16 num_arq_entries;            /* receive queue depth */
+	u16 num_asq_entries;            /* send queue depth */
+	u16 arq_buf_size;               /* receive queue buffer size */
+	u16 asq_buf_size;               /* send queue buffer size */
+	u16 fw_maj_ver;                 /* firmware major version */
+	u16 fw_min_ver;                 /* firmware minor version */
+	u32 fw_build;                   /* firmware build number */
+	u16 api_maj_ver;                /* api major version */
+	u16 api_min_ver;                /* api minor version */
+
+	struct mutex asq_mutex; /* Send queue lock */
+	struct mutex arq_mutex; /* Receive queue lock */
+
+	/* last status values on send and receive queues */
+	enum iavf_admin_queue_err asq_last_status;
+	enum iavf_admin_queue_err arq_last_status;
+};
+
+/**
+ * iavf_aq_rc_to_posix - convert errors to user-land codes
+ * @aq_ret: AdminQ handler error code can override aq_rc
+ * @aq_rc: AdminQ firmware error code to convert
+ **/
+static inline int iavf_aq_rc_to_posix(int aq_ret, int aq_rc)
+{
+	int aq_to_posix[] = {
+		0,           /* IAVF_AQ_RC_OK */
+		-EPERM,      /* IAVF_AQ_RC_EPERM */
+		-ENOENT,     /* IAVF_AQ_RC_ENOENT */
+		-ESRCH,      /* IAVF_AQ_RC_ESRCH */
+		-EINTR,      /* IAVF_AQ_RC_EINTR */
+		-EIO,        /* IAVF_AQ_RC_EIO */
+		-ENXIO,      /* IAVF_AQ_RC_ENXIO */
+		-E2BIG,      /* IAVF_AQ_RC_E2BIG */
+		-EAGAIN,     /* IAVF_AQ_RC_EAGAIN */
+		-ENOMEM,     /* IAVF_AQ_RC_ENOMEM */
+		-EACCES,     /* IAVF_AQ_RC_EACCES */
+		-EFAULT,     /* IAVF_AQ_RC_EFAULT */
+		-EBUSY,      /* IAVF_AQ_RC_EBUSY */
+		-EEXIST,     /* IAVF_AQ_RC_EEXIST */
+		-EINVAL,     /* IAVF_AQ_RC_EINVAL */
+		-ENOTTY,     /* IAVF_AQ_RC_ENOTTY */
+		-ENOSPC,     /* IAVF_AQ_RC_ENOSPC */
+		-ENOSYS,     /* IAVF_AQ_RC_ENOSYS */
+		-ERANGE,     /* IAVF_AQ_RC_ERANGE */
+		-EPIPE,      /* IAVF_AQ_RC_EFLUSHED */
+		-ESPIPE,     /* IAVF_AQ_RC_BAD_ADDR */
+		-EROFS,      /* IAVF_AQ_RC_EMODE */
+		-EFBIG,      /* IAVF_AQ_RC_EFBIG */
+	};
+
+	/* aq_rc is invalid if AQ timed out */
+	if (aq_ret == IAVF_ERR_ADMIN_QUEUE_TIMEOUT)
+		return -EAGAIN;
+
+	if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
+		return -ERANGE;
+
+	return aq_to_posix[aq_rc];
+}
+
+/* general information */
+#define IAVF_AQ_LARGE_BUF	512
+#define IAVF_ASQ_CMD_TIMEOUT	250000  /* usecs */
+
+void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode);
+
+#endif /* _IAVF_ADMINQ_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h
new file mode 100644
index 0000000000..bc51230855
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h
@@ -0,0 +1,528 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_ADMINQ_CMD_H_
+#define _IAVF_ADMINQ_CMD_H_
+
+/* This header file defines the iavf Admin Queue commands and is shared between
+ * iavf Firmware and Software.
+ *
+ * This file needs to comply with the Linux Kernel coding style.
+ */
+
+#define IAVF_FW_API_VERSION_MAJOR	0x0001
+#define IAVF_FW_API_VERSION_MINOR_X722	0x0005
+#define IAVF_FW_API_VERSION_MINOR_X710	0x0008
+
+#define IAVF_FW_MINOR_VERSION(_h) ((_h)->mac.type == IAVF_MAC_XL710 ? \
+					IAVF_FW_API_VERSION_MINOR_X710 : \
+					IAVF_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs  */
+#define IAVF_MINOR_VER_GET_LINK_INFO_XL710 0x0007
+
+struct iavf_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 param2;
+			__le32 param3;
+		} internal;
+		struct {
+			__le32 param0;
+			__le32 param1;
+			__le32 addr_high;
+			__le32 addr_low;
+		} external;
+		u8 raw[16];
+	} params;
+};
+
+/* Flags sub-structure
+ * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR|VFE| * *  RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
+ */
+
+/* command flags and offsets*/
+#define IAVF_AQ_FLAG_DD_SHIFT	0
+#define IAVF_AQ_FLAG_CMP_SHIFT	1
+#define IAVF_AQ_FLAG_ERR_SHIFT	2
+#define IAVF_AQ_FLAG_VFE_SHIFT	3
+#define IAVF_AQ_FLAG_LB_SHIFT	9
+#define IAVF_AQ_FLAG_RD_SHIFT	10
+#define IAVF_AQ_FLAG_VFC_SHIFT	11
+#define IAVF_AQ_FLAG_BUF_SHIFT	12
+#define IAVF_AQ_FLAG_SI_SHIFT	13
+#define IAVF_AQ_FLAG_EI_SHIFT	14
+#define IAVF_AQ_FLAG_FE_SHIFT	15
+
+#define IAVF_AQ_FLAG_DD		BIT(IAVF_AQ_FLAG_DD_SHIFT)  /* 0x1    */
+#define IAVF_AQ_FLAG_CMP	BIT(IAVF_AQ_FLAG_CMP_SHIFT) /* 0x2    */
+#define IAVF_AQ_FLAG_ERR	BIT(IAVF_AQ_FLAG_ERR_SHIFT) /* 0x4    */
+#define IAVF_AQ_FLAG_VFE	BIT(IAVF_AQ_FLAG_VFE_SHIFT) /* 0x8    */
+#define IAVF_AQ_FLAG_LB		BIT(IAVF_AQ_FLAG_LB_SHIFT)  /* 0x200  */
+#define IAVF_AQ_FLAG_RD		BIT(IAVF_AQ_FLAG_RD_SHIFT)  /* 0x400  */
+#define IAVF_AQ_FLAG_VFC	BIT(IAVF_AQ_FLAG_VFC_SHIFT) /* 0x800  */
+#define IAVF_AQ_FLAG_BUF	BIT(IAVF_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
+#define IAVF_AQ_FLAG_SI		BIT(IAVF_AQ_FLAG_SI_SHIFT)  /* 0x2000 */
+#define IAVF_AQ_FLAG_EI		BIT(IAVF_AQ_FLAG_EI_SHIFT)  /* 0x4000 */
+#define IAVF_AQ_FLAG_FE		BIT(IAVF_AQ_FLAG_FE_SHIFT)  /* 0x8000 */
+
+/* error codes */
+enum iavf_admin_queue_err {
+	IAVF_AQ_RC_OK		= 0,  /* success */
+	IAVF_AQ_RC_EPERM	= 1,  /* Operation not permitted */
+	IAVF_AQ_RC_ENOENT	= 2,  /* No such element */
+	IAVF_AQ_RC_ESRCH	= 3,  /* Bad opcode */
+	IAVF_AQ_RC_EINTR	= 4,  /* operation interrupted */
+	IAVF_AQ_RC_EIO		= 5,  /* I/O error */
+	IAVF_AQ_RC_ENXIO	= 6,  /* No such resource */
+	IAVF_AQ_RC_E2BIG	= 7,  /* Arg too long */
+	IAVF_AQ_RC_EAGAIN	= 8,  /* Try again */
+	IAVF_AQ_RC_ENOMEM	= 9,  /* Out of memory */
+	IAVF_AQ_RC_EACCES	= 10, /* Permission denied */
+	IAVF_AQ_RC_EFAULT	= 11, /* Bad address */
+	IAVF_AQ_RC_EBUSY	= 12, /* Device or resource busy */
+	IAVF_AQ_RC_EEXIST	= 13, /* object already exists */
+	IAVF_AQ_RC_EINVAL	= 14, /* Invalid argument */
+	IAVF_AQ_RC_ENOTTY	= 15, /* Not a typewriter */
+	IAVF_AQ_RC_ENOSPC	= 16, /* No space left or alloc failure */
+	IAVF_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	IAVF_AQ_RC_ERANGE	= 18, /* Parameter out of range */
+	IAVF_AQ_RC_EFLUSHED	= 19, /* Cmd flushed due to prev cmd error */
+	IAVF_AQ_RC_BAD_ADDR	= 20, /* Descriptor contains a bad pointer */
+	IAVF_AQ_RC_EMODE	= 21, /* Op not allowed in current dev mode */
+	IAVF_AQ_RC_EFBIG	= 22, /* File too large */
+};
+
+/* Admin Queue command opcodes */
+enum iavf_admin_queue_opc {
+	/* aq commands */
+	iavf_aqc_opc_get_version	= 0x0001,
+	iavf_aqc_opc_driver_version	= 0x0002,
+	iavf_aqc_opc_queue_shutdown	= 0x0003,
+	iavf_aqc_opc_set_pf_context	= 0x0004,
+
+	/* resource ownership */
+	iavf_aqc_opc_request_resource	= 0x0008,
+	iavf_aqc_opc_release_resource	= 0x0009,
+
+	iavf_aqc_opc_list_func_capabilities	= 0x000A,
+	iavf_aqc_opc_list_dev_capabilities	= 0x000B,
+
+	/* Proxy commands */
+	iavf_aqc_opc_set_proxy_config		= 0x0104,
+	iavf_aqc_opc_set_ns_proxy_table_entry	= 0x0105,
+
+	/* LAA */
+	iavf_aqc_opc_mac_address_read	= 0x0107,
+	iavf_aqc_opc_mac_address_write	= 0x0108,
+
+	/* PXE */
+	iavf_aqc_opc_clear_pxe_mode	= 0x0110,
+
+	/* WoL commands */
+	iavf_aqc_opc_set_wol_filter	= 0x0120,
+	iavf_aqc_opc_get_wake_reason	= 0x0121,
+
+	/* internal switch commands */
+	iavf_aqc_opc_get_switch_config		= 0x0200,
+	iavf_aqc_opc_add_statistics		= 0x0201,
+	iavf_aqc_opc_remove_statistics		= 0x0202,
+	iavf_aqc_opc_set_port_parameters	= 0x0203,
+	iavf_aqc_opc_get_switch_resource_alloc	= 0x0204,
+	iavf_aqc_opc_set_switch_config		= 0x0205,
+	iavf_aqc_opc_rx_ctl_reg_read		= 0x0206,
+	iavf_aqc_opc_rx_ctl_reg_write		= 0x0207,
+
+	iavf_aqc_opc_add_vsi			= 0x0210,
+	iavf_aqc_opc_update_vsi_parameters	= 0x0211,
+	iavf_aqc_opc_get_vsi_parameters		= 0x0212,
+
+	iavf_aqc_opc_add_pv			= 0x0220,
+	iavf_aqc_opc_update_pv_parameters	= 0x0221,
+	iavf_aqc_opc_get_pv_parameters		= 0x0222,
+
+	iavf_aqc_opc_add_veb			= 0x0230,
+	iavf_aqc_opc_update_veb_parameters	= 0x0231,
+	iavf_aqc_opc_get_veb_parameters		= 0x0232,
+
+	iavf_aqc_opc_delete_element		= 0x0243,
+
+	iavf_aqc_opc_add_macvlan		= 0x0250,
+	iavf_aqc_opc_remove_macvlan		= 0x0251,
+	iavf_aqc_opc_add_vlan			= 0x0252,
+	iavf_aqc_opc_remove_vlan		= 0x0253,
+	iavf_aqc_opc_set_vsi_promiscuous_modes	= 0x0254,
+	iavf_aqc_opc_add_tag			= 0x0255,
+	iavf_aqc_opc_remove_tag			= 0x0256,
+	iavf_aqc_opc_add_multicast_etag		= 0x0257,
+	iavf_aqc_opc_remove_multicast_etag	= 0x0258,
+	iavf_aqc_opc_update_tag			= 0x0259,
+	iavf_aqc_opc_add_control_packet_filter	= 0x025A,
+	iavf_aqc_opc_remove_control_packet_filter	= 0x025B,
+	iavf_aqc_opc_add_cloud_filters		= 0x025C,
+	iavf_aqc_opc_remove_cloud_filters	= 0x025D,
+	iavf_aqc_opc_clear_wol_switch_filters	= 0x025E,
+
+	iavf_aqc_opc_add_mirror_rule	= 0x0260,
+	iavf_aqc_opc_delete_mirror_rule	= 0x0261,
+
+	/* Dynamic Device Personalization */
+	iavf_aqc_opc_write_personalization_profile	= 0x0270,
+	iavf_aqc_opc_get_personalization_profile_list	= 0x0271,
+
+	/* DCB commands */
+	iavf_aqc_opc_dcb_ignore_pfc	= 0x0301,
+	iavf_aqc_opc_dcb_updated	= 0x0302,
+	iavf_aqc_opc_set_dcb_parameters = 0x0303,
+
+	/* TX scheduler */
+	iavf_aqc_opc_configure_vsi_bw_limit		= 0x0400,
+	iavf_aqc_opc_configure_vsi_ets_sla_bw_limit	= 0x0406,
+	iavf_aqc_opc_configure_vsi_tc_bw		= 0x0407,
+	iavf_aqc_opc_query_vsi_bw_config		= 0x0408,
+	iavf_aqc_opc_query_vsi_ets_sla_config		= 0x040A,
+	iavf_aqc_opc_configure_switching_comp_bw_limit	= 0x0410,
+
+	iavf_aqc_opc_enable_switching_comp_ets			= 0x0413,
+	iavf_aqc_opc_modify_switching_comp_ets			= 0x0414,
+	iavf_aqc_opc_disable_switching_comp_ets			= 0x0415,
+	iavf_aqc_opc_configure_switching_comp_ets_bw_limit	= 0x0416,
+	iavf_aqc_opc_configure_switching_comp_bw_config		= 0x0417,
+	iavf_aqc_opc_query_switching_comp_ets_config		= 0x0418,
+	iavf_aqc_opc_query_port_ets_config			= 0x0419,
+	iavf_aqc_opc_query_switching_comp_bw_config		= 0x041A,
+	iavf_aqc_opc_suspend_port_tx				= 0x041B,
+	iavf_aqc_opc_resume_port_tx				= 0x041C,
+	iavf_aqc_opc_configure_partition_bw			= 0x041D,
+	/* hmc */
+	iavf_aqc_opc_query_hmc_resource_profile	= 0x0500,
+	iavf_aqc_opc_set_hmc_resource_profile	= 0x0501,
+
+	/* phy commands*/
+	iavf_aqc_opc_get_phy_abilities		= 0x0600,
+	iavf_aqc_opc_set_phy_config		= 0x0601,
+	iavf_aqc_opc_set_mac_config		= 0x0603,
+	iavf_aqc_opc_set_link_restart_an	= 0x0605,
+	iavf_aqc_opc_get_link_status		= 0x0607,
+	iavf_aqc_opc_set_phy_int_mask		= 0x0613,
+	iavf_aqc_opc_get_local_advt_reg		= 0x0614,
+	iavf_aqc_opc_set_local_advt_reg		= 0x0615,
+	iavf_aqc_opc_get_partner_advt		= 0x0616,
+	iavf_aqc_opc_set_lb_modes		= 0x0618,
+	iavf_aqc_opc_get_phy_wol_caps		= 0x0621,
+	iavf_aqc_opc_set_phy_debug		= 0x0622,
+	iavf_aqc_opc_upload_ext_phy_fm		= 0x0625,
+	iavf_aqc_opc_run_phy_activity		= 0x0626,
+	iavf_aqc_opc_set_phy_register		= 0x0628,
+	iavf_aqc_opc_get_phy_register		= 0x0629,
+
+	/* NVM commands */
+	iavf_aqc_opc_nvm_read			= 0x0701,
+	iavf_aqc_opc_nvm_erase			= 0x0702,
+	iavf_aqc_opc_nvm_update			= 0x0703,
+	iavf_aqc_opc_nvm_config_read		= 0x0704,
+	iavf_aqc_opc_nvm_config_write		= 0x0705,
+	iavf_aqc_opc_oem_post_update		= 0x0720,
+	iavf_aqc_opc_thermal_sensor		= 0x0721,
+
+	/* virtualization commands */
+	iavf_aqc_opc_send_msg_to_pf		= 0x0801,
+	iavf_aqc_opc_send_msg_to_vf		= 0x0802,
+	iavf_aqc_opc_send_msg_to_peer		= 0x0803,
+
+	/* alternate structure */
+	iavf_aqc_opc_alternate_write		= 0x0900,
+	iavf_aqc_opc_alternate_write_indirect	= 0x0901,
+	iavf_aqc_opc_alternate_read		= 0x0902,
+	iavf_aqc_opc_alternate_read_indirect	= 0x0903,
+	iavf_aqc_opc_alternate_write_done	= 0x0904,
+	iavf_aqc_opc_alternate_set_mode		= 0x0905,
+	iavf_aqc_opc_alternate_clear_port	= 0x0906,
+
+	/* LLDP commands */
+	iavf_aqc_opc_lldp_get_mib	= 0x0A00,
+	iavf_aqc_opc_lldp_update_mib	= 0x0A01,
+	iavf_aqc_opc_lldp_add_tlv	= 0x0A02,
+	iavf_aqc_opc_lldp_update_tlv	= 0x0A03,
+	iavf_aqc_opc_lldp_delete_tlv	= 0x0A04,
+	iavf_aqc_opc_lldp_stop		= 0x0A05,
+	iavf_aqc_opc_lldp_start		= 0x0A06,
+
+	/* Tunnel commands */
+	iavf_aqc_opc_add_udp_tunnel	= 0x0B00,
+	iavf_aqc_opc_del_udp_tunnel	= 0x0B01,
+	iavf_aqc_opc_set_rss_key	= 0x0B02,
+	iavf_aqc_opc_set_rss_lut	= 0x0B03,
+	iavf_aqc_opc_get_rss_key	= 0x0B04,
+	iavf_aqc_opc_get_rss_lut	= 0x0B05,
+
+	/* Async Events */
+	iavf_aqc_opc_event_lan_overflow		= 0x1001,
+
+	/* OEM commands */
+	iavf_aqc_opc_oem_parameter_change	= 0xFE00,
+	iavf_aqc_opc_oem_device_status_change	= 0xFE01,
+	iavf_aqc_opc_oem_ocsd_initialize	= 0xFE02,
+	iavf_aqc_opc_oem_ocbb_initialize	= 0xFE03,
+
+	/* debug commands */
+	iavf_aqc_opc_debug_read_reg		= 0xFF03,
+	iavf_aqc_opc_debug_write_reg		= 0xFF04,
+	iavf_aqc_opc_debug_modify_reg		= 0xFF07,
+	iavf_aqc_opc_debug_dump_internals	= 0xFF08,
+};
+
+/* command structures and indirect data structures */
+
+/* Structure naming conventions:
+ * - no suffix for direct command descriptor structures
+ * - _data for indirect sent data
+ * - _resp for indirect return data (data which is both will use _data)
+ * - _completion for direct return data
+ * - _element_ for repeated elements (may also be _data or _resp)
+ *
+ * Command structures are expected to overlay the params.raw member of the basic
+ * descriptor, and as such cannot exceed 16 bytes in length.
+ */
+
+/* This macro is used to generate a compilation error if a structure
+ * is not exactly the correct length. It gives a divide by zero error if the
+ * structure is not of the correct size, otherwise it creates an enum that is
+ * never used.
+ */
+#define IAVF_CHECK_STRUCT_LEN(n, X) enum iavf_static_assert_enum_##X \
+	{ iavf_static_assert_##X = (n) / ((sizeof(struct X) == (n)) ? 1 : 0) }
+
+/* This macro is used extensively to ensure that command structures are 16
+ * bytes in length as they have to map to the raw array of that size.
+ */
+#define IAVF_CHECK_CMD_LENGTH(X)	IAVF_CHECK_STRUCT_LEN(16, X)
+
+/* Queue Shutdown (direct 0x0003) */
+struct iavf_aqc_queue_shutdown {
+	__le32	driver_unloading;
+#define IAVF_AQ_DRIVER_UNLOADING	0x1
+	u8	reserved[12];
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_queue_shutdown);
+
+struct iavf_aqc_vsi_properties_data {
+	/* first 96 byte are written by SW */
+	__le16	valid_sections;
+#define IAVF_AQ_VSI_PROP_SWITCH_VALID		0x0001
+#define IAVF_AQ_VSI_PROP_SECURITY_VALID		0x0002
+#define IAVF_AQ_VSI_PROP_VLAN_VALID		0x0004
+#define IAVF_AQ_VSI_PROP_CAS_PV_VALID		0x0008
+#define IAVF_AQ_VSI_PROP_INGRESS_UP_VALID	0x0010
+#define IAVF_AQ_VSI_PROP_EGRESS_UP_VALID	0x0020
+#define IAVF_AQ_VSI_PROP_QUEUE_MAP_VALID	0x0040
+#define IAVF_AQ_VSI_PROP_QUEUE_OPT_VALID	0x0080
+#define IAVF_AQ_VSI_PROP_OUTER_UP_VALID		0x0100
+#define IAVF_AQ_VSI_PROP_SCHED_VALID		0x0200
+	/* switch section */
+	__le16	switch_id; /* 12bit id combined with flags below */
+#define IAVF_AQ_VSI_SW_ID_SHIFT		0x0000
+#define IAVF_AQ_VSI_SW_ID_MASK		(0xFFF << IAVF_AQ_VSI_SW_ID_SHIFT)
+#define IAVF_AQ_VSI_SW_ID_FLAG_NOT_STAG	0x1000
+#define IAVF_AQ_VSI_SW_ID_FLAG_ALLOW_LB	0x2000
+#define IAVF_AQ_VSI_SW_ID_FLAG_LOCAL_LB	0x4000
+	u8	sw_reserved[2];
+	/* security section */
+	u8	sec_flags;
+#define IAVF_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	0x01
+#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK	0x02
+#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK	0x04
+	u8	sec_reserved;
+	/* VLAN section */
+	__le16	pvid; /* VLANS include priority bits */
+	__le16	fcoe_pvid;
+	u8	port_vlan_flags;
+#define IAVF_AQ_VSI_PVLAN_MODE_SHIFT	0x00
+#define IAVF_AQ_VSI_PVLAN_MODE_MASK	(0x03 << \
+					 IAVF_AQ_VSI_PVLAN_MODE_SHIFT)
+#define IAVF_AQ_VSI_PVLAN_MODE_TAGGED	0x01
+#define IAVF_AQ_VSI_PVLAN_MODE_UNTAGGED	0x02
+#define IAVF_AQ_VSI_PVLAN_MODE_ALL	0x03
+#define IAVF_AQ_VSI_PVLAN_INSERT_PVID	0x04
+#define IAVF_AQ_VSI_PVLAN_EMOD_SHIFT	0x03
+#define IAVF_AQ_VSI_PVLAN_EMOD_MASK	(0x3 << \
+					 IAVF_AQ_VSI_PVLAN_EMOD_SHIFT)
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR_BOTH	0x0
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR_UP	0x08
+#define IAVF_AQ_VSI_PVLAN_EMOD_STR	0x10
+#define IAVF_AQ_VSI_PVLAN_EMOD_NOTHING	0x18
+	u8	pvlan_reserved[3];
+	/* ingress egress up sections */
+	__le32	ingress_table; /* bitmap, 3 bits per up */
+#define IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT	0
+#define IAVF_AQ_VSI_UP_TABLE_UP0_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT	3
+#define IAVF_AQ_VSI_UP_TABLE_UP1_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT	6
+#define IAVF_AQ_VSI_UP_TABLE_UP2_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT	9
+#define IAVF_AQ_VSI_UP_TABLE_UP3_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT	12
+#define IAVF_AQ_VSI_UP_TABLE_UP4_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT	15
+#define IAVF_AQ_VSI_UP_TABLE_UP5_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT	18
+#define IAVF_AQ_VSI_UP_TABLE_UP6_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT)
+#define IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT	21
+#define IAVF_AQ_VSI_UP_TABLE_UP7_MASK	(0x7 << \
+					 IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT)
+	__le32	egress_table;   /* same defines as for ingress table */
+	/* cascaded PV section */
+	__le16	cas_pv_tag;
+	u8	cas_pv_flags;
+#define IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT		0x00
+#define IAVF_AQ_VSI_CAS_PV_TAGX_MASK		(0x03 << \
+						 IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT)
+#define IAVF_AQ_VSI_CAS_PV_TAGX_LEAVE		0x00
+#define IAVF_AQ_VSI_CAS_PV_TAGX_REMOVE		0x01
+#define IAVF_AQ_VSI_CAS_PV_TAGX_COPY		0x02
+#define IAVF_AQ_VSI_CAS_PV_INSERT_TAG		0x10
+#define IAVF_AQ_VSI_CAS_PV_ETAG_PRUNE		0x20
+#define IAVF_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG	0x40
+	u8	cas_pv_reserved;
+	/* queue mapping section */
+	__le16	mapping_flags;
+#define IAVF_AQ_VSI_QUE_MAP_CONTIG	0x0
+#define IAVF_AQ_VSI_QUE_MAP_NONCONTIG	0x1
+	__le16	queue_mapping[16];
+#define IAVF_AQ_VSI_QUEUE_SHIFT		0x0
+#define IAVF_AQ_VSI_QUEUE_MASK		(0x7FF << IAVF_AQ_VSI_QUEUE_SHIFT)
+	__le16	tc_mapping[8];
+#define IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT	0
+#define IAVF_AQ_VSI_TC_QUE_OFFSET_MASK	(0x1FF << \
+					 IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT)
+#define IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT	9
+#define IAVF_AQ_VSI_TC_QUE_NUMBER_MASK	(0x7 << \
+					 IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT)
+	/* queueing option section */
+	u8	queueing_opt_flags;
+#define IAVF_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA	0x04
+#define IAVF_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA	0x08
+#define IAVF_AQ_VSI_QUE_OPT_TCP_ENA	0x10
+#define IAVF_AQ_VSI_QUE_OPT_FCOE_ENA	0x20
+#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_PF	0x00
+#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_VSI	0x40
+	u8	queueing_opt_reserved[3];
+	/* scheduler section */
+	u8	up_enable_bits;
+	u8	sched_reserved;
+	/* outer up section */
+	__le32	outer_up_table; /* same structure and defines as ingress tbl */
+	u8	cmd_reserved[8];
+	/* last 32 bytes are written by FW */
+	__le16	qs_handle[8];
+#define IAVF_AQ_VSI_QS_HANDLE_INVALID	0xFFFF
+	__le16	stat_counter_idx;
+	__le16	sched_id;
+	u8	resp_reserved[12];
+};
+
+IAVF_CHECK_STRUCT_LEN(128, iavf_aqc_vsi_properties_data);
+
+/* Get VEB Parameters (direct 0x0232)
+ * uses iavf_aqc_switch_seid for the descriptor
+ */
+struct iavf_aqc_get_veb_parameters_completion {
+	__le16	seid;
+	__le16	switch_id;
+	__le16	veb_flags; /* only the first/last flags from 0x0230 is valid */
+	__le16	statistic_index;
+	__le16	vebs_used;
+	__le16	vebs_free;
+	u8	reserved[4];
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_veb_parameters_completion);
+
+#define IAVF_LINK_SPEED_100MB_SHIFT	0x1
+#define IAVF_LINK_SPEED_1000MB_SHIFT	0x2
+#define IAVF_LINK_SPEED_10GB_SHIFT	0x3
+#define IAVF_LINK_SPEED_40GB_SHIFT	0x4
+#define IAVF_LINK_SPEED_20GB_SHIFT	0x5
+#define IAVF_LINK_SPEED_25GB_SHIFT	0x6
+
+enum iavf_aq_link_speed {
+	IAVF_LINK_SPEED_UNKNOWN	= 0,
+	IAVF_LINK_SPEED_100MB	= BIT(IAVF_LINK_SPEED_100MB_SHIFT),
+	IAVF_LINK_SPEED_1GB	= BIT(IAVF_LINK_SPEED_1000MB_SHIFT),
+	IAVF_LINK_SPEED_10GB	= BIT(IAVF_LINK_SPEED_10GB_SHIFT),
+	IAVF_LINK_SPEED_40GB	= BIT(IAVF_LINK_SPEED_40GB_SHIFT),
+	IAVF_LINK_SPEED_20GB	= BIT(IAVF_LINK_SPEED_20GB_SHIFT),
+	IAVF_LINK_SPEED_25GB	= BIT(IAVF_LINK_SPEED_25GB_SHIFT),
+};
+
+/* Send to PF command (indirect 0x0801) id is only used by PF
+ * Send to VF command (indirect 0x0802) id is only used by PF
+ * Send to Peer PF command (indirect 0x0803)
+ */
+struct iavf_aqc_pf_vf_message {
+	__le32	id;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_pf_vf_message);
+
+struct iavf_aqc_get_set_rss_key {
+#define IAVF_AQC_SET_RSS_KEY_VSI_VALID		BIT(15)
+#define IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT	0
+#define IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK	(0x3FF << \
+					IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT)
+	__le16	vsi_id;
+	u8	reserved[6];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_key);
+
+struct iavf_aqc_get_set_rss_key_data {
+	u8 standard_rss_key[0x28];
+	u8 extended_hash_key[0xc];
+};
+
+IAVF_CHECK_STRUCT_LEN(0x34, iavf_aqc_get_set_rss_key_data);
+
+struct  iavf_aqc_get_set_rss_lut {
+#define IAVF_AQC_SET_RSS_LUT_VSI_VALID		BIT(15)
+#define IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT	0
+#define IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK	(0x3FF << \
+					IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT)
+	__le16	vsi_id;
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT	0
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \
+				BIT(IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT)
+
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI	0
+#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF	1
+	__le16	flags;
+	u8	reserved[4];
+	__le32	addr_high;
+	__le32	addr_low;
+};
+
+IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_lut);
+#endif /* _IAVF_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
new file mode 100644
index 0000000000..6edbf134b7
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Intel Corporation. */
+
+/* advanced RSS configuration ethtool support for iavf */
+
+#include "iavf.h"
+
+/**
+ * iavf_fill_adv_rss_ip4_hdr - fill the IPv4 RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_ip4_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV4);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV4_SA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, SRC);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV4_DA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
+}
+
+/**
+ * iavf_fill_adv_rss_ip6_hdr - fill the IPv6 RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_ip6_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV6);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV6_SA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, SRC);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_IPV6_DA)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
+}
+
+/**
+ * iavf_fill_adv_rss_tcp_hdr - fill the TCP RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_tcp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, TCP);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, SRC_PORT);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT);
+}
+
+/**
+ * iavf_fill_adv_rss_udp_hdr - fill the UDP RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_udp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, UDP);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, SRC_PORT);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT);
+}
+
+/**
+ * iavf_fill_adv_rss_sctp_hdr - fill the SCTP RSS protocol header
+ * @hdr: the virtchnl message protocol header data structure
+ * @hash_flds: the RSS configuration protocol hash fields
+ */
+static void
+iavf_fill_adv_rss_sctp_hdr(struct virtchnl_proto_hdr *hdr, u64 hash_flds)
+{
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, SCTP);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, SRC_PORT);
+
+	if (hash_flds & IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT)
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT);
+}
+
+/**
+ * iavf_fill_adv_rss_cfg_msg - fill the RSS configuration into virtchnl message
+ * @rss_cfg: the virtchnl message to be filled with RSS configuration setting
+ * @packet_hdrs: the RSS configuration protocol header types
+ * @hash_flds: the RSS configuration protocol hash fields
+ *
+ * Returns 0 if the RSS configuration virtchnl message is filled successfully
+ */
+int
+iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg,
+			  u32 packet_hdrs, u64 hash_flds)
+{
+	struct virtchnl_proto_hdrs *proto_hdrs = &rss_cfg->proto_hdrs;
+	struct virtchnl_proto_hdr *hdr;
+
+	rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC;
+
+	proto_hdrs->tunnel_level = 0;	/* always outer layer */
+
+	hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L3) {
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4:
+		iavf_fill_adv_rss_ip4_hdr(hdr, hash_flds);
+		break;
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6:
+		iavf_fill_adv_rss_ip6_hdr(hdr, hash_flds);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	switch (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_L4) {
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_TCP:
+		iavf_fill_adv_rss_tcp_hdr(hdr, hash_flds);
+		break;
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_UDP:
+		iavf_fill_adv_rss_udp_hdr(hdr, hash_flds);
+		break;
+	case IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP:
+		iavf_fill_adv_rss_sctp_hdr(hdr, hash_flds);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_find_adv_rss_cfg_by_hdrs - find RSS configuration with header type
+ * @adapter: pointer to the VF adapter structure
+ * @packet_hdrs: protocol header type to find.
+ *
+ * Returns pointer to advance RSS configuration if found or null
+ */
+struct iavf_adv_rss *
+iavf_find_adv_rss_cfg_by_hdrs(struct iavf_adapter *adapter, u32 packet_hdrs)
+{
+	struct iavf_adv_rss *rss;
+
+	list_for_each_entry(rss, &adapter->adv_rss_list_head, list)
+		if (rss->packet_hdrs == packet_hdrs)
+			return rss;
+
+	return NULL;
+}
+
+/**
+ * iavf_print_adv_rss_cfg
+ * @adapter: pointer to the VF adapter structure
+ * @rss: pointer to the advance RSS configuration to print
+ * @action: the string description about how to handle the RSS
+ * @result: the string description about the virtchnl result
+ *
+ * Print the advance RSS configuration
+ **/
+void
+iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
+		       const char *action, const char *result)
+{
+	u32 packet_hdrs = rss->packet_hdrs;
+	u64 hash_flds = rss->hash_flds;
+	static char hash_opt[300];
+	const char *proto;
+
+	if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_TCP)
+		proto = "TCP";
+	else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_UDP)
+		proto = "UDP";
+	else if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP)
+		proto = "SCTP";
+	else
+		return;
+
+	memset(hash_opt, 0, sizeof(hash_opt));
+
+	strcat(hash_opt, proto);
+	if (packet_hdrs & IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4)
+		strcat(hash_opt, "v4 ");
+	else
+		strcat(hash_opt, "v6 ");
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_SA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_SA))
+		strcat(hash_opt, "IP SA,");
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_DA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_DA))
+		strcat(hash_opt, "IP DA,");
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT))
+		strcat(hash_opt, "src port,");
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT))
+		strcat(hash_opt, "dst port,");
+
+	if (!action)
+		action = "";
+
+	if (!result)
+		result = "";
+
+	dev_info(&adapter->pdev->dev, "%s %s %s\n", action, hash_opt, result);
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
new file mode 100644
index 0000000000..4d3be11af7
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_adv_rss.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _IAVF_ADV_RSS_H_
+#define _IAVF_ADV_RSS_H_
+
+struct iavf_adapter;
+
+/* State of advanced RSS configuration */
+enum iavf_adv_rss_state_t {
+	IAVF_ADV_RSS_ADD_REQUEST,	/* User requests to add RSS */
+	IAVF_ADV_RSS_ADD_PENDING,	/* RSS pending add by the PF */
+	IAVF_ADV_RSS_DEL_REQUEST,	/* Driver requests to delete RSS */
+	IAVF_ADV_RSS_DEL_PENDING,	/* RSS pending delete by the PF */
+	IAVF_ADV_RSS_ACTIVE,		/* RSS configuration is active */
+};
+
+enum iavf_adv_rss_flow_seg_hdr {
+	IAVF_ADV_RSS_FLOW_SEG_HDR_NONE	= 0x00000000,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4	= 0x00000001,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6	= 0x00000002,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_TCP	= 0x00000004,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_UDP	= 0x00000008,
+	IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP	= 0x00000010,
+};
+
+#define IAVF_ADV_RSS_FLOW_SEG_HDR_L3		\
+	(IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4	|	\
+	 IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6)
+
+#define IAVF_ADV_RSS_FLOW_SEG_HDR_L4		\
+	(IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |	\
+	 IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |	\
+	 IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP)
+
+enum iavf_adv_rss_flow_field {
+	/* L3 */
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_SA,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_DA,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_SA,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_DA,
+	/* L4 */
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT,
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT,
+
+	/* The total number of enums must not exceed 64 */
+	IAVF_ADV_RSS_FLOW_FIELD_IDX_MAX
+};
+
+#define IAVF_ADV_RSS_HASH_INVALID	0
+#define IAVF_ADV_RSS_HASH_FLD_IPV4_SA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_SA)
+#define IAVF_ADV_RSS_HASH_FLD_IPV6_SA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_SA)
+#define IAVF_ADV_RSS_HASH_FLD_IPV4_DA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV4_DA)
+#define IAVF_ADV_RSS_HASH_FLD_IPV6_DA	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_IPV6_DA)
+#define IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_SRC_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_TCP_DST_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_SRC_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_UDP_DST_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_SRC_PORT)
+#define IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT	\
+	BIT_ULL(IAVF_ADV_RSS_FLOW_FIELD_IDX_SCTP_DST_PORT)
+
+/* bookkeeping of advanced RSS configuration */
+struct iavf_adv_rss {
+	enum iavf_adv_rss_state_t state;
+	struct list_head list;
+
+	u32 packet_hdrs;
+	u64 hash_flds;
+
+	struct virtchnl_rss_cfg cfg_msg;
+};
+
+int
+iavf_fill_adv_rss_cfg_msg(struct virtchnl_rss_cfg *rss_cfg,
+			  u32 packet_hdrs, u64 hash_flds);
+struct iavf_adv_rss *
+iavf_find_adv_rss_cfg_by_hdrs(struct iavf_adapter *adapter, u32 packet_hdrs);
+void
+iavf_print_adv_rss_cfg(struct iavf_adapter *adapter, struct iavf_adv_rss *rss,
+		       const char *action, const char *result);
+#endif /* _IAVF_ADV_RSS_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_alloc.h b/drivers/net/ethernet/intel/iavf/iavf_alloc.h
new file mode 100644
index 0000000000..162ea70685
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_alloc.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_ALLOC_H_
+#define _IAVF_ALLOC_H_
+
+struct iavf_hw;
+
+/* Memory allocation types */
+enum iavf_memory_type {
+	iavf_mem_arq_buf = 0,		/* ARQ indirect command buffer */
+	iavf_mem_asq_buf = 1,
+	iavf_mem_atq_buf = 2,		/* ATQ indirect command buffer */
+	iavf_mem_arq_ring = 3,		/* ARQ descriptor ring */
+	iavf_mem_atq_ring = 4,		/* ATQ descriptor ring */
+	iavf_mem_pd = 5,		/* Page Descriptor */
+	iavf_mem_bp = 6,		/* Backing Page - 4KB */
+	iavf_mem_bp_jumbo = 7,		/* Backing Page - > 4KB */
+	iavf_mem_reserved
+};
+
+/* prototype for functions used for dynamic memory allocation */
+enum iavf_status iavf_allocate_dma_mem(struct iavf_hw *hw,
+				       struct iavf_dma_mem *mem,
+				       enum iavf_memory_type type,
+				       u64 size, u32 alignment);
+enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw,
+				   struct iavf_dma_mem *mem);
+enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw,
+					struct iavf_virt_mem *mem, u32 size);
+void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem);
+
+#endif /* _IAVF_ALLOC_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.c b/drivers/net/ethernet/intel/iavf/iavf_client.c
new file mode 100644
index 0000000000..e6051b6355
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_client.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include <linux/list.h>
+#include <linux/errno.h>
+
+#include "iavf.h"
+#include "iavf_prototype.h"
+#include "iavf_client.h"
+
+static
+const char iavf_client_interface_version_str[] = IAVF_CLIENT_VERSION_STR;
+static struct iavf_client *vf_registered_client;
+static LIST_HEAD(iavf_devices);
+static DEFINE_MUTEX(iavf_device_mutex);
+
+static u32 iavf_client_virtchnl_send(struct iavf_info *ldev,
+				     struct iavf_client *client,
+				     u8 *msg, u16 len);
+
+static int iavf_client_setup_qvlist(struct iavf_info *ldev,
+				    struct iavf_client *client,
+				    struct iavf_qvlist_info *qvlist_info);
+
+static struct iavf_ops iavf_lan_ops = {
+	.virtchnl_send = iavf_client_virtchnl_send,
+	.setup_qvlist = iavf_client_setup_qvlist,
+};
+
+/**
+ * iavf_client_get_params - retrieve relevant client parameters
+ * @vsi: VSI with parameters
+ * @params: client param struct
+ **/
+static
+void iavf_client_get_params(struct iavf_vsi *vsi, struct iavf_params *params)
+{
+	int i;
+
+	memset(params, 0, sizeof(struct iavf_params));
+	params->mtu = vsi->netdev->mtu;
+	params->link_up = vsi->back->link_up;
+
+	for (i = 0; i < IAVF_MAX_USER_PRIORITY; i++) {
+		params->qos.prio_qos[i].tc = 0;
+		params->qos.prio_qos[i].qs_handle = vsi->qs_handle;
+	}
+}
+
+/**
+ * iavf_notify_client_message - call the client message receive callback
+ * @vsi: the VSI associated with this client
+ * @msg: message buffer
+ * @len: length of message
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len)
+{
+	struct iavf_client_instance *cinst;
+
+	if (!vsi)
+		return;
+
+	cinst = vsi->back->cinst;
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->virtchnl_receive) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance virtchnl_receive function\n");
+		return;
+	}
+	cinst->client->ops->virtchnl_receive(&cinst->lan_info,  cinst->client,
+					     msg, len);
+}
+
+/**
+ * iavf_notify_client_l2_params - call the client notify callback
+ * @vsi: the VSI with l2 param changes
+ *
+ * If there is a client to this VSI, call the client
+ **/
+void iavf_notify_client_l2_params(struct iavf_vsi *vsi)
+{
+	struct iavf_client_instance *cinst;
+	struct iavf_params params;
+
+	if (!vsi)
+		return;
+
+	cinst = vsi->back->cinst;
+
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->l2_param_change) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance l2_param_change function\n");
+		return;
+	}
+	iavf_client_get_params(vsi, &params);
+	cinst->lan_info.params = params;
+	cinst->client->ops->l2_param_change(&cinst->lan_info, cinst->client,
+					    &params);
+}
+
+/**
+ * iavf_notify_client_open - call the client open callback
+ * @vsi: the VSI with netdev opened
+ *
+ * If there is a client to this netdev, call the client with open
+ **/
+void iavf_notify_client_open(struct iavf_vsi *vsi)
+{
+	struct iavf_adapter *adapter = vsi->back;
+	struct iavf_client_instance *cinst = adapter->cinst;
+	int ret;
+
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->open) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance open function\n");
+		return;
+	}
+	if (!(test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state))) {
+		ret = cinst->client->ops->open(&cinst->lan_info, cinst->client);
+		if (!ret)
+			set_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
+	}
+}
+
+/**
+ * iavf_client_release_qvlist - send a message to the PF to release rdma qv map
+ * @ldev: pointer to L2 context.
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int iavf_client_release_qvlist(struct iavf_info *ldev)
+{
+	struct iavf_adapter *adapter = ldev->vf;
+	enum iavf_status err;
+
+	if (adapter->aq_required)
+		return -EAGAIN;
+
+	err = iavf_aq_send_msg_to_pf(&adapter->hw,
+				     VIRTCHNL_OP_RELEASE_RDMA_IRQ_MAP,
+				     IAVF_SUCCESS, NULL, 0, NULL);
+
+	if (err)
+		dev_err(&adapter->pdev->dev,
+			"Unable to send RDMA vector release message to PF, error %d, aq status %d\n",
+			err, adapter->hw.aq.asq_last_status);
+
+	return err;
+}
+
+/**
+ * iavf_notify_client_close - call the client close callback
+ * @vsi: the VSI with netdev closed
+ * @reset: true when close called due to reset pending
+ *
+ * If there is a client to this netdev, call the client with close
+ **/
+void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset)
+{
+	struct iavf_adapter *adapter = vsi->back;
+	struct iavf_client_instance *cinst = adapter->cinst;
+
+	if (!cinst || !cinst->client || !cinst->client->ops ||
+	    !cinst->client->ops->close) {
+		dev_dbg(&vsi->back->pdev->dev,
+			"Cannot locate client instance close function\n");
+		return;
+	}
+	cinst->client->ops->close(&cinst->lan_info, cinst->client, reset);
+	iavf_client_release_qvlist(&cinst->lan_info);
+	clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
+}
+
+/**
+ * iavf_client_add_instance - add a client instance to the instance list
+ * @adapter: pointer to the board struct
+ *
+ * Returns cinst ptr on success, NULL on failure
+ **/
+static struct iavf_client_instance *
+iavf_client_add_instance(struct iavf_adapter *adapter)
+{
+	struct iavf_client_instance *cinst = NULL;
+	struct iavf_vsi *vsi = &adapter->vsi;
+	struct netdev_hw_addr *mac = NULL;
+	struct iavf_params params;
+
+	if (!vf_registered_client)
+		goto out;
+
+	if (adapter->cinst) {
+		cinst = adapter->cinst;
+		goto out;
+	}
+
+	cinst = kzalloc(sizeof(*cinst), GFP_KERNEL);
+	if (!cinst)
+		goto out;
+
+	cinst->lan_info.vf = (void *)adapter;
+	cinst->lan_info.netdev = vsi->netdev;
+	cinst->lan_info.pcidev = adapter->pdev;
+	cinst->lan_info.fid = 0;
+	cinst->lan_info.ftype = IAVF_CLIENT_FTYPE_VF;
+	cinst->lan_info.hw_addr = adapter->hw.hw_addr;
+	cinst->lan_info.ops = &iavf_lan_ops;
+	cinst->lan_info.version.major = IAVF_CLIENT_VERSION_MAJOR;
+	cinst->lan_info.version.minor = IAVF_CLIENT_VERSION_MINOR;
+	cinst->lan_info.version.build = IAVF_CLIENT_VERSION_BUILD;
+	iavf_client_get_params(vsi, &params);
+	cinst->lan_info.params = params;
+	set_bit(__IAVF_CLIENT_INSTANCE_NONE, &cinst->state);
+
+	cinst->lan_info.msix_count = adapter->num_rdma_msix;
+	cinst->lan_info.msix_entries =
+			&adapter->msix_entries[adapter->rdma_base_vector];
+
+	mac = list_first_entry(&cinst->lan_info.netdev->dev_addrs.list,
+			       struct netdev_hw_addr, list);
+	if (mac)
+		ether_addr_copy(cinst->lan_info.lanmac, mac->addr);
+	else
+		dev_err(&adapter->pdev->dev, "MAC address list is empty!\n");
+
+	cinst->client = vf_registered_client;
+	adapter->cinst = cinst;
+out:
+	return cinst;
+}
+
+/**
+ * iavf_client_del_instance - removes a client instance from the list
+ * @adapter: pointer to the board struct
+ *
+ **/
+static
+void iavf_client_del_instance(struct iavf_adapter *adapter)
+{
+	kfree(adapter->cinst);
+	adapter->cinst = NULL;
+}
+
+/**
+ * iavf_client_subtask - client maintenance work
+ * @adapter: board private structure
+ **/
+void iavf_client_subtask(struct iavf_adapter *adapter)
+{
+	struct iavf_client *client = vf_registered_client;
+	struct iavf_client_instance *cinst;
+	int ret = 0;
+
+	if (adapter->state < __IAVF_DOWN)
+		return;
+
+	/* first check client is registered */
+	if (!client)
+		return;
+
+	/* Add the client instance to the instance list */
+	cinst = iavf_client_add_instance(adapter);
+	if (!cinst)
+		return;
+
+	dev_info(&adapter->pdev->dev, "Added instance of Client %s\n",
+		 client->name);
+
+	if (!test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+		/* Send an Open request to the client */
+
+		if (client->ops && client->ops->open)
+			ret = client->ops->open(&cinst->lan_info, client);
+		if (!ret)
+			set_bit(__IAVF_CLIENT_INSTANCE_OPENED,
+				&cinst->state);
+		else
+			/* remove client instance */
+			iavf_client_del_instance(adapter);
+	}
+}
+
+/**
+ * iavf_lan_add_device - add a lan device struct to the list of lan devices
+ * @adapter: pointer to the board struct
+ *
+ * Returns 0 on success or none 0 on error
+ **/
+int iavf_lan_add_device(struct iavf_adapter *adapter)
+{
+	struct iavf_device *ldev;
+	int ret = 0;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry(ldev, &iavf_devices, list) {
+		if (ldev->vf == adapter) {
+			ret = -EEXIST;
+			goto out;
+		}
+	}
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	ldev->vf = adapter;
+	INIT_LIST_HEAD(&ldev->list);
+	list_add(&ldev->list, &iavf_devices);
+	dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
+		 adapter->hw.bus.bus_id, adapter->hw.bus.device,
+		 adapter->hw.bus.func);
+
+	/* Since in some cases register may have happened before a device gets
+	 * added, we can schedule a subtask to go initiate the clients.
+	 */
+	adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+
+out:
+	mutex_unlock(&iavf_device_mutex);
+	return ret;
+}
+
+/**
+ * iavf_lan_del_device - removes a lan device from the device list
+ * @adapter: pointer to the board struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int iavf_lan_del_device(struct iavf_adapter *adapter)
+{
+	struct iavf_device *ldev, *tmp;
+	int ret = -ENODEV;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry_safe(ldev, tmp, &iavf_devices, list) {
+		if (ldev->vf == adapter) {
+			dev_info(&adapter->pdev->dev,
+				 "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n",
+				 adapter->hw.bus.bus_id, adapter->hw.bus.device,
+				 adapter->hw.bus.func);
+			list_del(&ldev->list);
+			kfree(ldev);
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&iavf_device_mutex);
+	return ret;
+}
+
+/**
+ * iavf_client_release - release client specific resources
+ * @client: pointer to the registered client
+ *
+ **/
+static void iavf_client_release(struct iavf_client *client)
+{
+	struct iavf_client_instance *cinst;
+	struct iavf_device *ldev;
+	struct iavf_adapter *adapter;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry(ldev, &iavf_devices, list) {
+		adapter = ldev->vf;
+		cinst = adapter->cinst;
+		if (!cinst)
+			continue;
+		if (test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) {
+			if (client->ops && client->ops->close)
+				client->ops->close(&cinst->lan_info, client,
+						   false);
+			iavf_client_release_qvlist(&cinst->lan_info);
+			clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state);
+
+			dev_warn(&adapter->pdev->dev,
+				 "Client %s instance closed\n", client->name);
+		}
+		/* delete the client instance */
+		iavf_client_del_instance(adapter);
+		dev_info(&adapter->pdev->dev, "Deleted client instance of Client %s\n",
+			 client->name);
+	}
+	mutex_unlock(&iavf_device_mutex);
+}
+
+/**
+ * iavf_client_prepare - prepare client specific resources
+ * @client: pointer to the registered client
+ *
+ **/
+static void iavf_client_prepare(struct iavf_client *client)
+{
+	struct iavf_device *ldev;
+	struct iavf_adapter *adapter;
+
+	mutex_lock(&iavf_device_mutex);
+	list_for_each_entry(ldev, &iavf_devices, list) {
+		adapter = ldev->vf;
+		/* Signal the watchdog to service the client */
+		adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+	}
+	mutex_unlock(&iavf_device_mutex);
+}
+
+/**
+ * iavf_client_virtchnl_send - send a message to the PF instance
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @msg: pointer to message buffer
+ * @len: message length
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static u32 iavf_client_virtchnl_send(struct iavf_info *ldev,
+				     struct iavf_client *client,
+				     u8 *msg, u16 len)
+{
+	struct iavf_adapter *adapter = ldev->vf;
+	enum iavf_status err;
+
+	if (adapter->aq_required)
+		return -EAGAIN;
+
+	err = iavf_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_RDMA,
+				     IAVF_SUCCESS, msg, len, NULL);
+	if (err)
+		dev_err(&adapter->pdev->dev, "Unable to send RDMA message to PF, error %d, aq status %d\n",
+			err, adapter->hw.aq.asq_last_status);
+
+	return err;
+}
+
+/**
+ * iavf_client_setup_qvlist - send a message to the PF to setup rdma qv map
+ * @ldev: pointer to L2 context.
+ * @client: Client pointer.
+ * @qvlist_info: queue and vector list
+ *
+ * Return 0 on success or < 0 on error
+ **/
+static int iavf_client_setup_qvlist(struct iavf_info *ldev,
+				    struct iavf_client *client,
+				    struct iavf_qvlist_info *qvlist_info)
+{
+	struct virtchnl_rdma_qvlist_info *v_qvlist_info;
+	struct iavf_adapter *adapter = ldev->vf;
+	struct iavf_qv_info *qv_info;
+	enum iavf_status err;
+	u32 v_idx, i;
+	size_t msg_size;
+
+	if (adapter->aq_required)
+		return -EAGAIN;
+
+	/* A quick check on whether the vectors belong to the client */
+	for (i = 0; i < qvlist_info->num_vectors; i++) {
+		qv_info = &qvlist_info->qv_info[i];
+		if (!qv_info)
+			continue;
+		v_idx = qv_info->v_idx;
+		if ((v_idx >=
+		    (adapter->rdma_base_vector + adapter->num_rdma_msix)) ||
+		    (v_idx < adapter->rdma_base_vector))
+			return -EINVAL;
+	}
+
+	v_qvlist_info = (struct virtchnl_rdma_qvlist_info *)qvlist_info;
+	msg_size = virtchnl_struct_size(v_qvlist_info, qv_info,
+					v_qvlist_info->num_vectors);
+
+	adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP);
+	err = iavf_aq_send_msg_to_pf(&adapter->hw,
+				VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP, IAVF_SUCCESS,
+				(u8 *)v_qvlist_info, msg_size, NULL);
+
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to send RDMA vector config message to PF, error %d, aq status %d\n",
+			err, adapter->hw.aq.asq_last_status);
+		goto out;
+	}
+
+	err = -EBUSY;
+	for (i = 0; i < 5; i++) {
+		msleep(100);
+		if (!(adapter->client_pending &
+		      BIT(VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP))) {
+			err = 0;
+			break;
+		}
+	}
+out:
+	return err;
+}
+
+/**
+ * iavf_register_client - Register a iavf client driver with the L2 driver
+ * @client: pointer to the iavf_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int iavf_register_client(struct iavf_client *client)
+{
+	int ret = 0;
+
+	if (!client) {
+		ret = -EIO;
+		goto out;
+	}
+
+	if (strlen(client->name) == 0) {
+		pr_info("iavf: Failed to register client with no name\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	if (vf_registered_client) {
+		pr_info("iavf: Client %s has already been registered!\n",
+			client->name);
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if ((client->version.major != IAVF_CLIENT_VERSION_MAJOR) ||
+	    (client->version.minor != IAVF_CLIENT_VERSION_MINOR)) {
+		pr_info("iavf: Failed to register client %s due to mismatched client interface version\n",
+			client->name);
+		pr_info("Client is using version: %02d.%02d.%02d while LAN driver supports %s\n",
+			client->version.major, client->version.minor,
+			client->version.build,
+			iavf_client_interface_version_str);
+		ret = -EIO;
+		goto out;
+	}
+
+	vf_registered_client = client;
+
+	iavf_client_prepare(client);
+
+	pr_info("iavf: Registered client %s with return code %d\n",
+		client->name, ret);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(iavf_register_client);
+
+/**
+ * iavf_unregister_client - Unregister a iavf client driver with the L2 driver
+ * @client: pointer to the iavf_client struct
+ *
+ * Returns 0 on success or non-0 on error
+ **/
+int iavf_unregister_client(struct iavf_client *client)
+{
+	int ret = 0;
+
+	/* When a unregister request comes through we would have to send
+	 * a close for each of the client instances that were opened.
+	 * client_release function is called to handle this.
+	 */
+	iavf_client_release(client);
+
+	if (vf_registered_client != client) {
+		pr_info("iavf: Client %s has not been registered\n",
+			client->name);
+		ret = -ENODEV;
+		goto out;
+	}
+	vf_registered_client = NULL;
+	pr_info("iavf: Unregistered client %s\n", client->name);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(iavf_unregister_client);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.h b/drivers/net/ethernet/intel/iavf/iavf_client.h
new file mode 100644
index 0000000000..500269bc0f
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_client.h
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_CLIENT_H_
+#define _IAVF_CLIENT_H_
+
+#define IAVF_CLIENT_STR_LENGTH 10
+
+/* Client interface version should be updated anytime there is a change in the
+ * existing APIs or data structures.
+ */
+#define IAVF_CLIENT_VERSION_MAJOR 0
+#define IAVF_CLIENT_VERSION_MINOR 01
+#define IAVF_CLIENT_VERSION_BUILD 00
+#define IAVF_CLIENT_VERSION_STR     \
+	__stringify(IAVF_CLIENT_VERSION_MAJOR) "." \
+	__stringify(IAVF_CLIENT_VERSION_MINOR) "." \
+	__stringify(IAVF_CLIENT_VERSION_BUILD)
+
+struct iavf_client_version {
+	u8 major;
+	u8 minor;
+	u8 build;
+	u8 rsvd;
+};
+
+enum iavf_client_state {
+	__IAVF_CLIENT_NULL,
+	__IAVF_CLIENT_REGISTERED
+};
+
+enum iavf_client_instance_state {
+	__IAVF_CLIENT_INSTANCE_NONE,
+	__IAVF_CLIENT_INSTANCE_OPENED,
+};
+
+struct iavf_ops;
+struct iavf_client;
+
+/* HW does not define a type value for AEQ; only for RX/TX and CEQ.
+ * In order for us to keep the interface simple, SW will define a
+ * unique type value for AEQ.
+ */
+#define IAVF_QUEUE_TYPE_PE_AEQ	0x80
+#define IAVF_QUEUE_INVALID_IDX	0xFFFF
+
+struct iavf_qv_info {
+	u32 v_idx; /* msix_vector */
+	u16 ceq_idx;
+	u16 aeq_idx;
+	u8 itr_idx;
+};
+
+struct iavf_qvlist_info {
+	u32 num_vectors;
+	struct iavf_qv_info qv_info[];
+};
+
+#define IAVF_CLIENT_MSIX_ALL 0xFFFFFFFF
+
+/* set of LAN parameters useful for clients managed by LAN */
+
+/* Struct to hold per priority info */
+struct iavf_prio_qos_params {
+	u16 qs_handle; /* qs handle for prio */
+	u8 tc; /* TC mapped to prio */
+	u8 reserved;
+};
+
+#define IAVF_CLIENT_MAX_USER_PRIORITY	8
+/* Struct to hold Client QoS */
+struct iavf_qos_params {
+	struct iavf_prio_qos_params prio_qos[IAVF_CLIENT_MAX_USER_PRIORITY];
+};
+
+struct iavf_params {
+	struct iavf_qos_params qos;
+	u16 mtu;
+	u16 link_up; /* boolean */
+};
+
+/* Structure to hold LAN device info for a client device */
+struct iavf_info {
+	struct iavf_client_version version;
+	u8 lanmac[6];
+	struct net_device *netdev;
+	struct pci_dev *pcidev;
+	u8 __iomem *hw_addr;
+	u8 fid;	/* function id, PF id or VF id */
+#define IAVF_CLIENT_FTYPE_PF 0
+#define IAVF_CLIENT_FTYPE_VF 1
+	u8 ftype; /* function type, PF or VF */
+	void *vf; /* cast to iavf_adapter */
+
+	/* All L2 params that could change during the life span of the device
+	 * and needs to be communicated to the client when they change
+	 */
+	struct iavf_params params;
+	struct iavf_ops *ops;
+
+	u16 msix_count;	 /* number of msix vectors*/
+	/* Array down below will be dynamically allocated based on msix_count */
+	struct msix_entry *msix_entries;
+	u16 itr_index; /* Which ITR index the PE driver is suppose to use */
+};
+
+struct iavf_ops {
+	/* setup_q_vector_list enables queues with a particular vector */
+	int (*setup_qvlist)(struct iavf_info *ldev, struct iavf_client *client,
+			    struct iavf_qvlist_info *qv_info);
+
+	u32 (*virtchnl_send)(struct iavf_info *ldev, struct iavf_client *client,
+			     u8 *msg, u16 len);
+
+	/* If the PE Engine is unresponsive, RDMA driver can request a reset.*/
+	void (*request_reset)(struct iavf_info *ldev,
+			      struct iavf_client *client);
+};
+
+struct iavf_client_ops {
+	/* Should be called from register_client() or whenever the driver is
+	 * ready to create a specific client instance.
+	 */
+	int (*open)(struct iavf_info *ldev, struct iavf_client *client);
+
+	/* Should be closed when netdev is unavailable or when unregister
+	 * call comes in. If the close happens due to a reset, set the reset
+	 * bit to true.
+	 */
+	void (*close)(struct iavf_info *ldev, struct iavf_client *client,
+		      bool reset);
+
+	/* called when some l2 managed parameters changes - mss */
+	void (*l2_param_change)(struct iavf_info *ldev,
+				struct iavf_client *client,
+				struct iavf_params *params);
+
+	/* called when a message is received from the PF */
+	int (*virtchnl_receive)(struct iavf_info *ldev,
+				struct iavf_client *client,
+				u8 *msg, u16 len);
+};
+
+/* Client device */
+struct iavf_client_instance {
+	struct list_head list;
+	struct iavf_info lan_info;
+	struct iavf_client *client;
+	unsigned long  state;
+};
+
+struct iavf_client {
+	struct list_head list;		/* list of registered clients */
+	char name[IAVF_CLIENT_STR_LENGTH];
+	struct iavf_client_version version;
+	unsigned long state;		/* client state */
+	atomic_t ref_cnt;  /* Count of all the client devices of this kind */
+	u32 flags;
+#define IAVF_CLIENT_FLAGS_LAUNCH_ON_PROBE	BIT(0)
+#define IAVF_TX_FLAGS_NOTIFY_OTHER_EVENTS	BIT(2)
+	u8 type;
+#define IAVF_CLIENT_RDMA 0
+	struct iavf_client_ops *ops;	/* client ops provided by the client */
+};
+
+/* used by clients */
+int iavf_register_client(struct iavf_client *client);
+int iavf_unregister_client(struct iavf_client *client);
+#endif /* _IAVF_CLIENT_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c
new file mode 100644
index 0000000000..1afd761d80
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_common.c
@@ -0,0 +1,799 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "iavf_type.h"
+#include "iavf_adminq.h"
+#include "iavf_prototype.h"
+#include <linux/avf/virtchnl.h>
+
+/**
+ * iavf_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the mac type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ **/
+enum iavf_status iavf_set_mac_type(struct iavf_hw *hw)
+{
+	enum iavf_status status = 0;
+
+	if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
+		switch (hw->device_id) {
+		case IAVF_DEV_ID_X722_VF:
+			hw->mac.type = IAVF_MAC_X722_VF;
+			break;
+		case IAVF_DEV_ID_VF:
+		case IAVF_DEV_ID_VF_HV:
+		case IAVF_DEV_ID_ADAPTIVE_VF:
+			hw->mac.type = IAVF_MAC_VF;
+			break;
+		default:
+			hw->mac.type = IAVF_MAC_GENERIC;
+			break;
+		}
+	} else {
+		status = IAVF_ERR_DEVICE_NOT_SUPPORTED;
+	}
+
+	return status;
+}
+
+/**
+ * iavf_aq_str - convert AQ err code to a string
+ * @hw: pointer to the HW structure
+ * @aq_err: the AQ error code to convert
+ **/
+const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err)
+{
+	switch (aq_err) {
+	case IAVF_AQ_RC_OK:
+		return "OK";
+	case IAVF_AQ_RC_EPERM:
+		return "IAVF_AQ_RC_EPERM";
+	case IAVF_AQ_RC_ENOENT:
+		return "IAVF_AQ_RC_ENOENT";
+	case IAVF_AQ_RC_ESRCH:
+		return "IAVF_AQ_RC_ESRCH";
+	case IAVF_AQ_RC_EINTR:
+		return "IAVF_AQ_RC_EINTR";
+	case IAVF_AQ_RC_EIO:
+		return "IAVF_AQ_RC_EIO";
+	case IAVF_AQ_RC_ENXIO:
+		return "IAVF_AQ_RC_ENXIO";
+	case IAVF_AQ_RC_E2BIG:
+		return "IAVF_AQ_RC_E2BIG";
+	case IAVF_AQ_RC_EAGAIN:
+		return "IAVF_AQ_RC_EAGAIN";
+	case IAVF_AQ_RC_ENOMEM:
+		return "IAVF_AQ_RC_ENOMEM";
+	case IAVF_AQ_RC_EACCES:
+		return "IAVF_AQ_RC_EACCES";
+	case IAVF_AQ_RC_EFAULT:
+		return "IAVF_AQ_RC_EFAULT";
+	case IAVF_AQ_RC_EBUSY:
+		return "IAVF_AQ_RC_EBUSY";
+	case IAVF_AQ_RC_EEXIST:
+		return "IAVF_AQ_RC_EEXIST";
+	case IAVF_AQ_RC_EINVAL:
+		return "IAVF_AQ_RC_EINVAL";
+	case IAVF_AQ_RC_ENOTTY:
+		return "IAVF_AQ_RC_ENOTTY";
+	case IAVF_AQ_RC_ENOSPC:
+		return "IAVF_AQ_RC_ENOSPC";
+	case IAVF_AQ_RC_ENOSYS:
+		return "IAVF_AQ_RC_ENOSYS";
+	case IAVF_AQ_RC_ERANGE:
+		return "IAVF_AQ_RC_ERANGE";
+	case IAVF_AQ_RC_EFLUSHED:
+		return "IAVF_AQ_RC_EFLUSHED";
+	case IAVF_AQ_RC_BAD_ADDR:
+		return "IAVF_AQ_RC_BAD_ADDR";
+	case IAVF_AQ_RC_EMODE:
+		return "IAVF_AQ_RC_EMODE";
+	case IAVF_AQ_RC_EFBIG:
+		return "IAVF_AQ_RC_EFBIG";
+	}
+
+	snprintf(hw->err_str, sizeof(hw->err_str), "%d", aq_err);
+	return hw->err_str;
+}
+
+/**
+ * iavf_stat_str - convert status err code to a string
+ * @hw: pointer to the HW structure
+ * @stat_err: the status error code to convert
+ **/
+const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err)
+{
+	switch (stat_err) {
+	case 0:
+		return "OK";
+	case IAVF_ERR_NVM:
+		return "IAVF_ERR_NVM";
+	case IAVF_ERR_NVM_CHECKSUM:
+		return "IAVF_ERR_NVM_CHECKSUM";
+	case IAVF_ERR_PHY:
+		return "IAVF_ERR_PHY";
+	case IAVF_ERR_CONFIG:
+		return "IAVF_ERR_CONFIG";
+	case IAVF_ERR_PARAM:
+		return "IAVF_ERR_PARAM";
+	case IAVF_ERR_MAC_TYPE:
+		return "IAVF_ERR_MAC_TYPE";
+	case IAVF_ERR_UNKNOWN_PHY:
+		return "IAVF_ERR_UNKNOWN_PHY";
+	case IAVF_ERR_LINK_SETUP:
+		return "IAVF_ERR_LINK_SETUP";
+	case IAVF_ERR_ADAPTER_STOPPED:
+		return "IAVF_ERR_ADAPTER_STOPPED";
+	case IAVF_ERR_INVALID_MAC_ADDR:
+		return "IAVF_ERR_INVALID_MAC_ADDR";
+	case IAVF_ERR_DEVICE_NOT_SUPPORTED:
+		return "IAVF_ERR_DEVICE_NOT_SUPPORTED";
+	case IAVF_ERR_PRIMARY_REQUESTS_PENDING:
+		return "IAVF_ERR_PRIMARY_REQUESTS_PENDING";
+	case IAVF_ERR_INVALID_LINK_SETTINGS:
+		return "IAVF_ERR_INVALID_LINK_SETTINGS";
+	case IAVF_ERR_AUTONEG_NOT_COMPLETE:
+		return "IAVF_ERR_AUTONEG_NOT_COMPLETE";
+	case IAVF_ERR_RESET_FAILED:
+		return "IAVF_ERR_RESET_FAILED";
+	case IAVF_ERR_SWFW_SYNC:
+		return "IAVF_ERR_SWFW_SYNC";
+	case IAVF_ERR_NO_AVAILABLE_VSI:
+		return "IAVF_ERR_NO_AVAILABLE_VSI";
+	case IAVF_ERR_NO_MEMORY:
+		return "IAVF_ERR_NO_MEMORY";
+	case IAVF_ERR_BAD_PTR:
+		return "IAVF_ERR_BAD_PTR";
+	case IAVF_ERR_RING_FULL:
+		return "IAVF_ERR_RING_FULL";
+	case IAVF_ERR_INVALID_PD_ID:
+		return "IAVF_ERR_INVALID_PD_ID";
+	case IAVF_ERR_INVALID_QP_ID:
+		return "IAVF_ERR_INVALID_QP_ID";
+	case IAVF_ERR_INVALID_CQ_ID:
+		return "IAVF_ERR_INVALID_CQ_ID";
+	case IAVF_ERR_INVALID_CEQ_ID:
+		return "IAVF_ERR_INVALID_CEQ_ID";
+	case IAVF_ERR_INVALID_AEQ_ID:
+		return "IAVF_ERR_INVALID_AEQ_ID";
+	case IAVF_ERR_INVALID_SIZE:
+		return "IAVF_ERR_INVALID_SIZE";
+	case IAVF_ERR_INVALID_ARP_INDEX:
+		return "IAVF_ERR_INVALID_ARP_INDEX";
+	case IAVF_ERR_INVALID_FPM_FUNC_ID:
+		return "IAVF_ERR_INVALID_FPM_FUNC_ID";
+	case IAVF_ERR_QP_INVALID_MSG_SIZE:
+		return "IAVF_ERR_QP_INVALID_MSG_SIZE";
+	case IAVF_ERR_QP_TOOMANY_WRS_POSTED:
+		return "IAVF_ERR_QP_TOOMANY_WRS_POSTED";
+	case IAVF_ERR_INVALID_FRAG_COUNT:
+		return "IAVF_ERR_INVALID_FRAG_COUNT";
+	case IAVF_ERR_QUEUE_EMPTY:
+		return "IAVF_ERR_QUEUE_EMPTY";
+	case IAVF_ERR_INVALID_ALIGNMENT:
+		return "IAVF_ERR_INVALID_ALIGNMENT";
+	case IAVF_ERR_FLUSHED_QUEUE:
+		return "IAVF_ERR_FLUSHED_QUEUE";
+	case IAVF_ERR_INVALID_PUSH_PAGE_INDEX:
+		return "IAVF_ERR_INVALID_PUSH_PAGE_INDEX";
+	case IAVF_ERR_INVALID_IMM_DATA_SIZE:
+		return "IAVF_ERR_INVALID_IMM_DATA_SIZE";
+	case IAVF_ERR_TIMEOUT:
+		return "IAVF_ERR_TIMEOUT";
+	case IAVF_ERR_OPCODE_MISMATCH:
+		return "IAVF_ERR_OPCODE_MISMATCH";
+	case IAVF_ERR_CQP_COMPL_ERROR:
+		return "IAVF_ERR_CQP_COMPL_ERROR";
+	case IAVF_ERR_INVALID_VF_ID:
+		return "IAVF_ERR_INVALID_VF_ID";
+	case IAVF_ERR_INVALID_HMCFN_ID:
+		return "IAVF_ERR_INVALID_HMCFN_ID";
+	case IAVF_ERR_BACKING_PAGE_ERROR:
+		return "IAVF_ERR_BACKING_PAGE_ERROR";
+	case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE:
+		return "IAVF_ERR_NO_PBLCHUNKS_AVAILABLE";
+	case IAVF_ERR_INVALID_PBLE_INDEX:
+		return "IAVF_ERR_INVALID_PBLE_INDEX";
+	case IAVF_ERR_INVALID_SD_INDEX:
+		return "IAVF_ERR_INVALID_SD_INDEX";
+	case IAVF_ERR_INVALID_PAGE_DESC_INDEX:
+		return "IAVF_ERR_INVALID_PAGE_DESC_INDEX";
+	case IAVF_ERR_INVALID_SD_TYPE:
+		return "IAVF_ERR_INVALID_SD_TYPE";
+	case IAVF_ERR_MEMCPY_FAILED:
+		return "IAVF_ERR_MEMCPY_FAILED";
+	case IAVF_ERR_INVALID_HMC_OBJ_INDEX:
+		return "IAVF_ERR_INVALID_HMC_OBJ_INDEX";
+	case IAVF_ERR_INVALID_HMC_OBJ_COUNT:
+		return "IAVF_ERR_INVALID_HMC_OBJ_COUNT";
+	case IAVF_ERR_INVALID_SRQ_ARM_LIMIT:
+		return "IAVF_ERR_INVALID_SRQ_ARM_LIMIT";
+	case IAVF_ERR_SRQ_ENABLED:
+		return "IAVF_ERR_SRQ_ENABLED";
+	case IAVF_ERR_ADMIN_QUEUE_ERROR:
+		return "IAVF_ERR_ADMIN_QUEUE_ERROR";
+	case IAVF_ERR_ADMIN_QUEUE_TIMEOUT:
+		return "IAVF_ERR_ADMIN_QUEUE_TIMEOUT";
+	case IAVF_ERR_BUF_TOO_SHORT:
+		return "IAVF_ERR_BUF_TOO_SHORT";
+	case IAVF_ERR_ADMIN_QUEUE_FULL:
+		return "IAVF_ERR_ADMIN_QUEUE_FULL";
+	case IAVF_ERR_ADMIN_QUEUE_NO_WORK:
+		return "IAVF_ERR_ADMIN_QUEUE_NO_WORK";
+	case IAVF_ERR_BAD_RDMA_CQE:
+		return "IAVF_ERR_BAD_RDMA_CQE";
+	case IAVF_ERR_NVM_BLANK_MODE:
+		return "IAVF_ERR_NVM_BLANK_MODE";
+	case IAVF_ERR_NOT_IMPLEMENTED:
+		return "IAVF_ERR_NOT_IMPLEMENTED";
+	case IAVF_ERR_PE_DOORBELL_NOT_ENABLED:
+		return "IAVF_ERR_PE_DOORBELL_NOT_ENABLED";
+	case IAVF_ERR_DIAG_TEST_FAILED:
+		return "IAVF_ERR_DIAG_TEST_FAILED";
+	case IAVF_ERR_NOT_READY:
+		return "IAVF_ERR_NOT_READY";
+	case IAVF_NOT_SUPPORTED:
+		return "IAVF_NOT_SUPPORTED";
+	case IAVF_ERR_FIRMWARE_API_VERSION:
+		return "IAVF_ERR_FIRMWARE_API_VERSION";
+	case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
+		return "IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
+	}
+
+	snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
+	return hw->err_str;
+}
+
+/**
+ * iavf_debug_aq
+ * @hw: debug mask related to admin queue
+ * @mask: debug mask
+ * @desc: pointer to admin queue descriptor
+ * @buffer: pointer to command buffer
+ * @buf_len: max length of buffer
+ *
+ * Dumps debug log about adminq command with descriptor contents.
+ **/
+void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask, void *desc,
+		   void *buffer, u16 buf_len)
+{
+	struct iavf_aq_desc *aq_desc = (struct iavf_aq_desc *)desc;
+	u8 *buf = (u8 *)buffer;
+
+	if ((!(mask & hw->debug_mask)) || !desc)
+		return;
+
+	iavf_debug(hw, mask,
+		   "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		   le16_to_cpu(aq_desc->opcode),
+		   le16_to_cpu(aq_desc->flags),
+		   le16_to_cpu(aq_desc->datalen),
+		   le16_to_cpu(aq_desc->retval));
+	iavf_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->cookie_high),
+		   le32_to_cpu(aq_desc->cookie_low));
+	iavf_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->params.internal.param0),
+		   le32_to_cpu(aq_desc->params.internal.param1));
+	iavf_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
+		   le32_to_cpu(aq_desc->params.external.addr_high),
+		   le32_to_cpu(aq_desc->params.external.addr_low));
+
+	if (buffer && aq_desc->datalen) {
+		u16 len = le16_to_cpu(aq_desc->datalen);
+
+		iavf_debug(hw, mask, "AQ CMD Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+		/* write the full 16-byte chunks */
+		if (hw->debug_mask & mask) {
+			char prefix[27];
+
+			snprintf(prefix, sizeof(prefix),
+				 "iavf %02x:%02x.%x: \t0x",
+				 hw->bus.bus_id,
+				 hw->bus.device,
+				 hw->bus.func);
+
+			print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET,
+				       16, 1, buf, len, false);
+		}
+	}
+}
+
+/**
+ * iavf_check_asq_alive
+ * @hw: pointer to the hw struct
+ *
+ * Returns true if Queue is enabled else false.
+ **/
+bool iavf_check_asq_alive(struct iavf_hw *hw)
+{
+	if (hw->aq.asq.len)
+		return !!(rd32(hw, hw->aq.asq.len) &
+			  IAVF_VF_ATQLEN1_ATQENABLE_MASK);
+	else
+		return false;
+}
+
+/**
+ * iavf_aq_queue_shutdown
+ * @hw: pointer to the hw struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well.
+ **/
+enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading)
+{
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_queue_shutdown *cmd =
+		(struct iavf_aqc_queue_shutdown *)&desc.params.raw;
+	enum iavf_status status;
+
+	iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_queue_shutdown);
+
+	if (unloading)
+		cmd->driver_unloading = cpu_to_le32(IAVF_AQ_DRIVER_UNLOADING);
+	status = iavf_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+	return status;
+}
+
+/**
+ * iavf_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get or set RSS look up table
+ **/
+static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw,
+						u16 vsi_id, bool pf_lut,
+						u8 *lut, u16 lut_size,
+						bool set)
+{
+	enum iavf_status status;
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_get_set_rss_lut *cmd_resp =
+		   (struct iavf_aqc_get_set_rss_lut *)&desc.params.raw;
+
+	if (set)
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_set_rss_lut);
+	else
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_get_rss_lut);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD);
+
+	cmd_resp->vsi_id =
+			cpu_to_le16((u16)((vsi_id <<
+					  IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT) &
+					  IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_LUT_VSI_VALID);
+
+	if (pf_lut)
+		cmd_resp->flags |= cpu_to_le16((u16)
+					((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF <<
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+	else
+		cmd_resp->flags |= cpu_to_le16((u16)
+					((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI <<
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) &
+					IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK));
+
+	status = iavf_asq_send_command(hw, &desc, lut, lut_size, NULL);
+
+	return status;
+}
+
+/**
+ * iavf_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @vsi_id: vsi fw index
+ * @pf_lut: for PF table set true, for VSI table set false
+ * @lut: pointer to the lut buffer provided by the caller
+ * @lut_size: size of the lut buffer
+ *
+ * set the RSS lookup table, PF or VSI type
+ **/
+enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id,
+				     bool pf_lut, u8 *lut, u16 lut_size)
+{
+	return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, true);
+}
+
+/**
+ * iavf_aq_get_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get the RSS key per VSI
+ **/
+static enum
+iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
+				    struct iavf_aqc_get_set_rss_key_data *key,
+				    bool set)
+{
+	enum iavf_status status;
+	struct iavf_aq_desc desc;
+	struct iavf_aqc_get_set_rss_key *cmd_resp =
+			(struct iavf_aqc_get_set_rss_key *)&desc.params.raw;
+	u16 key_size = sizeof(struct iavf_aqc_get_set_rss_key_data);
+
+	if (set)
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_set_rss_key);
+	else
+		iavf_fill_default_direct_cmd_desc(&desc,
+						  iavf_aqc_opc_get_rss_key);
+
+	/* Indirect command */
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD);
+
+	cmd_resp->vsi_id =
+			cpu_to_le16((u16)((vsi_id <<
+					  IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT) &
+					  IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK));
+	cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_KEY_VSI_VALID);
+
+	status = iavf_asq_send_command(hw, &desc, key, key_size, NULL);
+
+	return status;
+}
+
+/**
+ * iavf_aq_set_rss_key
+ * @hw: pointer to the hw struct
+ * @vsi_id: vsi fw index
+ * @key: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ **/
+enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id,
+				     struct iavf_aqc_get_set_rss_key_data *key)
+{
+	return iavf_aq_get_set_rss_key(hw, vsi_id, key, true);
+}
+
+/* The iavf_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT iavf_ptype_lookup[ptype].known
+ * THEN
+ *      Packet is unknown
+ * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP
+ *      Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ *      Use the enum iavf_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short, use explicit indexing with [PTYPE] */
+#define IAVF_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	[PTYPE] = { \
+		1, \
+		IAVF_RX_PTYPE_OUTER_##OUTER_IP, \
+		IAVF_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		IAVF_RX_PTYPE_##OUTER_FRAG, \
+		IAVF_RX_PTYPE_TUNNEL_##T, \
+		IAVF_RX_PTYPE_TUNNEL_END_##TE, \
+		IAVF_RX_PTYPE_##TEF, \
+		IAVF_RX_PTYPE_INNER_PROT_##I, \
+		IAVF_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define IAVF_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define IAVF_RX_PTYPE_NOF		IAVF_RX_PTYPE_NOT_FRAG
+#define IAVF_RX_PTYPE_FRG		IAVF_RX_PTYPE_FRAG
+#define IAVF_RX_PTYPE_INNER_PROT_TS	IAVF_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping the 8-bit HW PTYPE to the bit field for decoding */
+struct iavf_rx_ptype_decoded iavf_ptype_lookup[BIT(8)] = {
+	/* L2 Packet types */
+	IAVF_PTT_UNUSED_ENTRY(0),
+	IAVF_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
+	IAVF_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT_UNUSED_ENTRY(4),
+	IAVF_PTT_UNUSED_ENTRY(5),
+	IAVF_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT_UNUSED_ENTRY(8),
+	IAVF_PTT_UNUSED_ENTRY(9),
+	IAVF_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	IAVF_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	IAVF_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+	/* Non Tunneled IPv4 */
+	IAVF_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(25),
+	IAVF_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	IAVF_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	IAVF_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv4 */
+	IAVF_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(32),
+	IAVF_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv6 */
+	IAVF_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(39),
+	IAVF_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT */
+	IAVF_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> IPv4 */
+	IAVF_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(47),
+	IAVF_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> IPv6 */
+	IAVF_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(54),
+	IAVF_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC */
+	IAVF_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+	IAVF_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(62),
+	IAVF_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+	IAVF_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(69),
+	IAVF_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC/VLAN */
+	IAVF_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+	IAVF_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(77),
+	IAVF_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+	IAVF_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(84),
+	IAVF_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* Non Tunneled IPv6 */
+	IAVF_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+	IAVF_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(91),
+	IAVF_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	IAVF_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	IAVF_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv4 */
+	IAVF_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(98),
+	IAVF_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv6 */
+	IAVF_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(105),
+	IAVF_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT */
+	IAVF_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> IPv4 */
+	IAVF_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(113),
+	IAVF_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> IPv6 */
+	IAVF_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(120),
+	IAVF_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC */
+	IAVF_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+	IAVF_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(128),
+	IAVF_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+	IAVF_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(135),
+	IAVF_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN */
+	IAVF_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+	IAVF_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	IAVF_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	IAVF_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(143),
+	IAVF_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	IAVF_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	IAVF_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+	IAVF_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	IAVF_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	IAVF_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	IAVF_PTT_UNUSED_ENTRY(150),
+	IAVF_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	IAVF_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	IAVF_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* unused entries */
+	[154 ... 255] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
+
+/**
+ * iavf_aq_send_msg_to_pf
+ * @hw: pointer to the hardware structure
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cmd_details: pointer to command details
+ *
+ * Send message to PF driver using admin queue. By default, this message
+ * is sent asynchronously, i.e. iavf_asq_send_command() does not wait for
+ * completion before returning.
+ **/
+enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
+					enum virtchnl_ops v_opcode,
+					enum iavf_status v_retval,
+					u8 *msg, u16 msglen,
+					struct iavf_asq_cmd_details *cmd_details)
+{
+	struct iavf_asq_cmd_details details;
+	struct iavf_aq_desc desc;
+	enum iavf_status status;
+
+	iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_send_msg_to_pf);
+	desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_SI);
+	desc.cookie_high = cpu_to_le32(v_opcode);
+	desc.cookie_low = cpu_to_le32(v_retval);
+	if (msglen) {
+		desc.flags |= cpu_to_le16((u16)(IAVF_AQ_FLAG_BUF
+						| IAVF_AQ_FLAG_RD));
+		if (msglen > IAVF_AQ_LARGE_BUF)
+			desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_LB);
+		desc.datalen = cpu_to_le16(msglen);
+	}
+	if (!cmd_details) {
+		memset(&details, 0, sizeof(details));
+		details.async = true;
+		cmd_details = &details;
+	}
+	status = iavf_asq_send_command(hw, &desc, msg, msglen, cmd_details);
+	return status;
+}
+
+/**
+ * iavf_vf_parse_hw_config
+ * @hw: pointer to the hardware structure
+ * @msg: pointer to the virtual channel VF resource structure
+ *
+ * Given a VF resource message from the PF, populate the hw struct
+ * with appropriate information.
+ **/
+void iavf_vf_parse_hw_config(struct iavf_hw *hw,
+			     struct virtchnl_vf_resource *msg)
+{
+	struct virtchnl_vsi_resource *vsi_res;
+	int i;
+
+	vsi_res = &msg->vsi_res[0];
+
+	hw->dev_caps.num_vsis = msg->num_vsis;
+	hw->dev_caps.num_rx_qp = msg->num_queue_pairs;
+	hw->dev_caps.num_tx_qp = msg->num_queue_pairs;
+	hw->dev_caps.num_msix_vectors_vf = msg->max_vectors;
+	hw->dev_caps.dcb = msg->vf_cap_flags &
+			   VIRTCHNL_VF_OFFLOAD_L2;
+	hw->dev_caps.fcoe = 0;
+	for (i = 0; i < msg->num_vsis; i++) {
+		if (vsi_res->vsi_type == VIRTCHNL_VSI_SRIOV) {
+			ether_addr_copy(hw->mac.perm_addr,
+					vsi_res->default_mac_addr);
+			ether_addr_copy(hw->mac.addr,
+					vsi_res->default_mac_addr);
+		}
+		vsi_res++;
+	}
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_devids.h b/drivers/net/ethernet/intel/iavf/iavf_devids.h
new file mode 100644
index 0000000000..8eb7b697e9
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_devids.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_DEVIDS_H_
+#define _IAVF_DEVIDS_H_
+
+/* Device IDs for the VF driver */
+#define IAVF_DEV_ID_VF			0x154C
+#define IAVF_DEV_ID_VF_HV		0x1571
+#define IAVF_DEV_ID_ADAPTIVE_VF		0x1889
+#define IAVF_DEV_ID_X722_VF		0x37CD
+#endif /* _IAVF_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
new file mode 100644
index 0000000000..892c6a4f03
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -0,0 +1,2012 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+/* ethtool support for iavf */
+#include "iavf.h"
+
+#include <linux/uaccess.h>
+
+/* ethtool statistics helpers */
+
+/**
+ * struct iavf_stats - definition for an ethtool statistic
+ * @stat_string: statistic name to display in ethtool -S output
+ * @sizeof_stat: the sizeof() the stat, must be no greater than sizeof(u64)
+ * @stat_offset: offsetof() the stat from a base pointer
+ *
+ * This structure defines a statistic to be added to the ethtool stats buffer.
+ * It defines a statistic as offset from a common base pointer. Stats should
+ * be defined in constant arrays using the IAVF_STAT macro, with every element
+ * of the array using the same _type for calculating the sizeof_stat and
+ * stat_offset.
+ *
+ * The @sizeof_stat is expected to be sizeof(u8), sizeof(u16), sizeof(u32) or
+ * sizeof(u64). Other sizes are not expected and will produce a WARN_ONCE from
+ * the iavf_add_ethtool_stat() helper function.
+ *
+ * The @stat_string is interpreted as a format string, allowing formatted
+ * values to be inserted while looping over multiple structures for a given
+ * statistics array. Thus, every statistic string in an array should have the
+ * same type and number of format specifiers, to be formatted by variadic
+ * arguments to the iavf_add_stat_string() helper function.
+ **/
+struct iavf_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+/* Helper macro to define an iavf_stat structure with proper size and type.
+ * Use this when defining constant statistics arrays. Note that @_type expects
+ * only a type name and is used multiple times.
+ */
+#define IAVF_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+/* Helper macro for defining some statistics related to queues */
+#define IAVF_QUEUE_STAT(_name, _stat) \
+	IAVF_STAT(struct iavf_ring, _name, _stat)
+
+/* Stats associated with a Tx or Rx ring */
+static const struct iavf_stats iavf_gstrings_queue_stats[] = {
+	IAVF_QUEUE_STAT("%s-%u.packets", stats.packets),
+	IAVF_QUEUE_STAT("%s-%u.bytes", stats.bytes),
+};
+
+/**
+ * iavf_add_one_ethtool_stat - copy the stat into the supplied buffer
+ * @data: location to store the stat value
+ * @pointer: basis for where to copy from
+ * @stat: the stat definition
+ *
+ * Copies the stat data defined by the pointer and stat structure pair into
+ * the memory supplied as data. Used to implement iavf_add_ethtool_stats and
+ * iavf_add_queue_stats. If the pointer is null, data will be zero'd.
+ */
+static void
+iavf_add_one_ethtool_stat(u64 *data, void *pointer,
+			  const struct iavf_stats *stat)
+{
+	char *p;
+
+	if (!pointer) {
+		/* ensure that the ethtool data buffer is zero'd for any stats
+		 * which don't have a valid pointer.
+		 */
+		*data = 0;
+		return;
+	}
+
+	p = (char *)pointer + stat->stat_offset;
+	switch (stat->sizeof_stat) {
+	case sizeof(u64):
+		*data = *((u64 *)p);
+		break;
+	case sizeof(u32):
+		*data = *((u32 *)p);
+		break;
+	case sizeof(u16):
+		*data = *((u16 *)p);
+		break;
+	case sizeof(u8):
+		*data = *((u8 *)p);
+		break;
+	default:
+		WARN_ONCE(1, "unexpected stat size for %s",
+			  stat->stat_string);
+		*data = 0;
+	}
+}
+
+/**
+ * __iavf_add_ethtool_stats - copy stats into the ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location to copy stats from
+ * @stats: array of stats to copy
+ * @size: the size of the stats definition
+ *
+ * Copy the stats defined by the stats array using the pointer as a base into
+ * the data buffer supplied by ethtool. Updates the data pointer to point to
+ * the next empty location for successive calls to __iavf_add_ethtool_stats.
+ * If pointer is null, set the data values to zero and update the pointer to
+ * skip these stats.
+ **/
+static void
+__iavf_add_ethtool_stats(u64 **data, void *pointer,
+			 const struct iavf_stats stats[],
+			 const unsigned int size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		iavf_add_one_ethtool_stat((*data)++, pointer, &stats[i]);
+}
+
+/**
+ * iavf_add_ethtool_stats - copy stats into ethtool supplied buffer
+ * @data: ethtool stats buffer
+ * @pointer: location where stats are stored
+ * @stats: static const array of stat definitions
+ *
+ * Macro to ease the use of __iavf_add_ethtool_stats by taking a static
+ * constant stats array and passing the ARRAY_SIZE(). This avoids typos by
+ * ensuring that we pass the size associated with the given stats array.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided.
+ **/
+#define iavf_add_ethtool_stats(data, pointer, stats) \
+	__iavf_add_ethtool_stats(data, pointer, stats, ARRAY_SIZE(stats))
+
+/**
+ * iavf_add_queue_stats - copy queue statistics into supplied buffer
+ * @data: ethtool stats buffer
+ * @ring: the ring to copy
+ *
+ * Queue statistics must be copied while protected by
+ * u64_stats_fetch_begin, so we can't directly use iavf_add_ethtool_stats.
+ * Assumes that queue stats are defined in iavf_gstrings_queue_stats. If the
+ * ring pointer is null, zero out the queue stat values and update the data
+ * pointer. Otherwise safely copy the stats from the ring into the supplied
+ * buffer and update the data pointer when finished.
+ *
+ * This function expects to be called while under rcu_read_lock().
+ **/
+static void
+iavf_add_queue_stats(u64 **data, struct iavf_ring *ring)
+{
+	const unsigned int size = ARRAY_SIZE(iavf_gstrings_queue_stats);
+	const struct iavf_stats *stats = iavf_gstrings_queue_stats;
+	unsigned int start;
+	unsigned int i;
+
+	/* To avoid invalid statistics values, ensure that we keep retrying
+	 * the copy until we get a consistent value according to
+	 * u64_stats_fetch_retry. But first, make sure our ring is
+	 * non-null before attempting to access its syncp.
+	 */
+	do {
+		start = !ring ? 0 : u64_stats_fetch_begin(&ring->syncp);
+		for (i = 0; i < size; i++)
+			iavf_add_one_ethtool_stat(&(*data)[i], ring, &stats[i]);
+	} while (ring && u64_stats_fetch_retry(&ring->syncp, start));
+
+	/* Once we successfully copy the stats in, update the data pointer */
+	*data += size;
+}
+
+/**
+ * __iavf_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ * @size: size of the stats array
+ *
+ * Format and copy the strings described by stats into the buffer pointed at
+ * by p.
+ **/
+static void __iavf_add_stat_strings(u8 **p, const struct iavf_stats stats[],
+				    const unsigned int size, ...)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++) {
+		va_list args;
+
+		va_start(args, size);
+		vsnprintf(*p, ETH_GSTRING_LEN, stats[i].stat_string, args);
+		*p += ETH_GSTRING_LEN;
+		va_end(args);
+	}
+}
+
+/**
+ * iavf_add_stat_strings - copy stat strings into ethtool buffer
+ * @p: ethtool supplied buffer
+ * @stats: stat definitions array
+ *
+ * Format and copy the strings described by the const static stats value into
+ * the buffer pointed at by p.
+ *
+ * The parameter @stats is evaluated twice, so parameters with side effects
+ * should be avoided. Additionally, stats must be an array such that
+ * ARRAY_SIZE can be called on it.
+ **/
+#define iavf_add_stat_strings(p, stats, ...) \
+	__iavf_add_stat_strings(p, stats, ARRAY_SIZE(stats), ## __VA_ARGS__)
+
+#define VF_STAT(_name, _stat) \
+	IAVF_STAT(struct iavf_adapter, _name, _stat)
+
+static const struct iavf_stats iavf_gstrings_stats[] = {
+	VF_STAT("rx_bytes", current_stats.rx_bytes),
+	VF_STAT("rx_unicast", current_stats.rx_unicast),
+	VF_STAT("rx_multicast", current_stats.rx_multicast),
+	VF_STAT("rx_broadcast", current_stats.rx_broadcast),
+	VF_STAT("rx_discards", current_stats.rx_discards),
+	VF_STAT("rx_unknown_protocol", current_stats.rx_unknown_protocol),
+	VF_STAT("tx_bytes", current_stats.tx_bytes),
+	VF_STAT("tx_unicast", current_stats.tx_unicast),
+	VF_STAT("tx_multicast", current_stats.tx_multicast),
+	VF_STAT("tx_broadcast", current_stats.tx_broadcast),
+	VF_STAT("tx_discards", current_stats.tx_discards),
+	VF_STAT("tx_errors", current_stats.tx_errors),
+};
+
+#define IAVF_STATS_LEN	ARRAY_SIZE(iavf_gstrings_stats)
+
+#define IAVF_QUEUE_STATS_LEN	ARRAY_SIZE(iavf_gstrings_queue_stats)
+
+/* For now we have one and only one private flag and it is only defined
+ * when we have support for the SKIP_CPU_SYNC DMA attribute.  Instead
+ * of leaving all this code sitting around empty we will strip it unless
+ * our one private flag is actually available.
+ */
+struct iavf_priv_flags {
+	char flag_string[ETH_GSTRING_LEN];
+	u32 flag;
+	bool read_only;
+};
+
+#define IAVF_PRIV_FLAG(_name, _flag, _read_only) { \
+	.flag_string = _name, \
+	.flag = _flag, \
+	.read_only = _read_only, \
+}
+
+static const struct iavf_priv_flags iavf_gstrings_priv_flags[] = {
+	IAVF_PRIV_FLAG("legacy-rx", IAVF_FLAG_LEGACY_RX, 0),
+};
+
+#define IAVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(iavf_gstrings_priv_flags)
+
+/**
+ * iavf_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @cmd: ethtool command
+ *
+ * Reports speed/duplex settings. Because this is a VF, we don't know what
+ * kind of link we really have, so we fake it.
+ **/
+static int iavf_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	cmd->base.autoneg = AUTONEG_DISABLE;
+	cmd->base.port = PORT_NONE;
+	cmd->base.duplex = DUPLEX_FULL;
+
+	if (ADV_LINK_SUPPORT(adapter)) {
+		if (adapter->link_speed_mbps &&
+		    adapter->link_speed_mbps < U32_MAX)
+			cmd->base.speed = adapter->link_speed_mbps;
+		else
+			cmd->base.speed = SPEED_UNKNOWN;
+
+		return 0;
+	}
+
+	switch (adapter->link_speed) {
+	case VIRTCHNL_LINK_SPEED_40GB:
+		cmd->base.speed = SPEED_40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		cmd->base.speed = SPEED_25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		cmd->base.speed = SPEED_20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		cmd->base.speed = SPEED_10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		cmd->base.speed = SPEED_5000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		cmd->base.speed = SPEED_2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		cmd->base.speed = SPEED_1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_100MB:
+		cmd->base.speed = SPEED_100;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_get_sset_count - Get length of string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ *
+ * Reports size of various string tables.
+ **/
+static int iavf_get_sset_count(struct net_device *netdev, int sset)
+{
+	/* Report the maximum number queues, even if not every queue is
+	 * currently configured. Since allocation of queues is in pairs,
+	 * use netdev->real_num_tx_queues * 2. The real_num_tx_queues is set
+	 * at device creation and never changes.
+	 */
+
+	if (sset == ETH_SS_STATS)
+		return IAVF_STATS_LEN +
+			(IAVF_QUEUE_STATS_LEN * 2 *
+			 netdev->real_num_tx_queues);
+	else if (sset == ETH_SS_PRIV_FLAGS)
+		return IAVF_PRIV_FLAGS_STR_LEN;
+	else
+		return -EINVAL;
+}
+
+/**
+ * iavf_get_ethtool_stats - report device statistics
+ * @netdev: network interface device structure
+ * @stats: ethtool statistics structure
+ * @data: pointer to data buffer
+ *
+ * All statistics are added to the data buffer as an array of u64.
+ **/
+static void iavf_get_ethtool_stats(struct net_device *netdev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	unsigned int i;
+
+	/* Explicitly request stats refresh */
+	iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_REQUEST_STATS);
+
+	iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
+
+	rcu_read_lock();
+	/* As num_active_queues describe both tx and rx queues, we can use
+	 * it to iterate over rings' stats.
+	 */
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		struct iavf_ring *ring;
+
+		/* Tx rings stats */
+		ring = &adapter->tx_rings[i];
+		iavf_add_queue_stats(&data, ring);
+
+		/* Rx rings stats */
+		ring = &adapter->rx_rings[i];
+		iavf_add_queue_stats(&data, ring);
+	}
+	rcu_read_unlock();
+}
+
+/**
+ * iavf_get_priv_flag_strings - Get private flag strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the private flags string table
+ **/
+static void iavf_get_priv_flag_strings(struct net_device *netdev, u8 *data)
+{
+	unsigned int i;
+
+	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
+		snprintf(data, ETH_GSTRING_LEN, "%s",
+			 iavf_gstrings_priv_flags[i].flag_string);
+		data += ETH_GSTRING_LEN;
+	}
+}
+
+/**
+ * iavf_get_stat_strings - Get stat strings
+ * @netdev: network interface device structure
+ * @data: buffer for string data
+ *
+ * Builds the statistics string table
+ **/
+static void iavf_get_stat_strings(struct net_device *netdev, u8 *data)
+{
+	unsigned int i;
+
+	iavf_add_stat_strings(&data, iavf_gstrings_stats);
+
+	/* Queues are always allocated in pairs, so we just use
+	 * real_num_tx_queues for both Tx and Rx queues.
+	 */
+	for (i = 0; i < netdev->real_num_tx_queues; i++) {
+		iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
+				      "tx", i);
+		iavf_add_stat_strings(&data, iavf_gstrings_queue_stats,
+				      "rx", i);
+	}
+}
+
+/**
+ * iavf_get_strings - Get string set
+ * @netdev: network interface device structure
+ * @sset: id of string set
+ * @data: buffer for string data
+ *
+ * Builds string tables for various string sets
+ **/
+static void iavf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		iavf_get_stat_strings(netdev, data);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		iavf_get_priv_flag_strings(netdev, data);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * iavf_get_priv_flags - report device private flags
+ * @netdev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned.  Add new strings for each flag to the iavf_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ **/
+static u32 iavf_get_priv_flags(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 i, ret_flags = 0;
+
+	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
+		const struct iavf_priv_flags *priv_flags;
+
+		priv_flags = &iavf_gstrings_priv_flags[i];
+
+		if (priv_flags->flag & adapter->flags)
+			ret_flags |= BIT(i);
+	}
+
+	return ret_flags;
+}
+
+/**
+ * iavf_set_priv_flags - set private flags
+ * @netdev: network interface device structure
+ * @flags: bit flags to be set
+ **/
+static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 orig_flags, new_flags, changed_flags;
+	int ret = 0;
+	u32 i;
+
+	orig_flags = READ_ONCE(adapter->flags);
+	new_flags = orig_flags;
+
+	for (i = 0; i < IAVF_PRIV_FLAGS_STR_LEN; i++) {
+		const struct iavf_priv_flags *priv_flags;
+
+		priv_flags = &iavf_gstrings_priv_flags[i];
+
+		if (flags & BIT(i))
+			new_flags |= priv_flags->flag;
+		else
+			new_flags &= ~(priv_flags->flag);
+
+		if (priv_flags->read_only &&
+		    ((orig_flags ^ new_flags) & ~BIT(i)))
+			return -EOPNOTSUPP;
+	}
+
+	/* Before we finalize any flag changes, any checks which we need to
+	 * perform to determine if the new flags will be supported should go
+	 * here...
+	 */
+
+	/* Compare and exchange the new flags into place. If we failed, that
+	 * is if cmpxchg returns anything but the old value, this means
+	 * something else must have modified the flags variable since we
+	 * copied it. We'll just punt with an error and log something in the
+	 * message buffer.
+	 */
+	if (cmpxchg(&adapter->flags, orig_flags, new_flags) != orig_flags) {
+		dev_warn(&adapter->pdev->dev,
+			 "Unable to update adapter->flags as it was modified by another thread...\n");
+		return -EAGAIN;
+	}
+
+	changed_flags = orig_flags ^ new_flags;
+
+	/* Process any additional changes needed as a result of flag changes.
+	 * The changed_flags value reflects the list of bits that were changed
+	 * in the code above.
+	 */
+
+	/* issue a reset to force legacy-rx change to take effect */
+	if (changed_flags & IAVF_FLAG_LEGACY_RX) {
+		if (netif_running(netdev)) {
+			iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+			ret = iavf_wait_for_reset(adapter);
+			if (ret)
+				netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset");
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_get_msglevel - Get debug message level
+ * @netdev: network interface device structure
+ *
+ * Returns current debug message level.
+ **/
+static u32 iavf_get_msglevel(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+/**
+ * iavf_set_msglevel - Set debug message level
+ * @netdev: network interface device structure
+ * @data: message level
+ *
+ * Set current debug message level. Higher values cause the driver to
+ * be noisier.
+ **/
+static void iavf_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (IAVF_DEBUG_USER & data)
+		adapter->hw.debug_mask = data;
+	adapter->msg_enable = data;
+}
+
+/**
+ * iavf_get_drvinfo - Get driver info
+ * @netdev: network interface device structure
+ * @drvinfo: ethool driver info structure
+ *
+ * Returns information about the driver and device for display to the user.
+ **/
+static void iavf_get_drvinfo(struct net_device *netdev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	strscpy(drvinfo->driver, iavf_driver_name, 32);
+	strscpy(drvinfo->fw_version, "N/A", 4);
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
+	drvinfo->n_priv_flags = IAVF_PRIV_FLAGS_STR_LEN;
+}
+
+/**
+ * iavf_get_ringparam - Get ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ * @kernel_ring: ethtool extenal ringparam structure
+ * @extack: netlink extended ACK report struct
+ *
+ * Returns current ring parameters. TX and RX rings are reported separately,
+ * but the number of rings is not reported.
+ **/
+static void iavf_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IAVF_MAX_RXD;
+	ring->tx_max_pending = IAVF_MAX_TXD;
+	ring->rx_pending = adapter->rx_desc_count;
+	ring->tx_pending = adapter->tx_desc_count;
+}
+
+/**
+ * iavf_set_ringparam - Set ring parameters
+ * @netdev: network interface device structure
+ * @ring: ethtool ringparam structure
+ * @kernel_ring: ethtool external ringparam structure
+ * @extack: netlink extended ACK report struct
+ *
+ * Sets ring parameters. TX and RX rings are controlled separately, but the
+ * number of rings is not specified, so all rings get the same settings.
+ **/
+static int iavf_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring,
+			      struct kernel_ethtool_ringparam *kernel_ring,
+			      struct netlink_ext_ack *extack)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 new_rx_count, new_tx_count;
+	int ret = 0;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	if (ring->tx_pending > IAVF_MAX_TXD ||
+	    ring->tx_pending < IAVF_MIN_TXD ||
+	    ring->rx_pending > IAVF_MAX_RXD ||
+	    ring->rx_pending < IAVF_MIN_RXD) {
+		netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n",
+			   ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD,
+			   IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE);
+		return -EINVAL;
+	}
+
+	new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE);
+	if (new_tx_count != ring->tx_pending)
+		netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
+			    new_tx_count);
+
+	new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE);
+	if (new_rx_count != ring->rx_pending)
+		netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
+			    new_rx_count);
+
+	/* if nothing to do return success */
+	if ((new_tx_count == adapter->tx_desc_count) &&
+	    (new_rx_count == adapter->rx_desc_count)) {
+		netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+		return 0;
+	}
+
+	if (new_tx_count != adapter->tx_desc_count) {
+		netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n",
+			   adapter->tx_desc_count, new_tx_count);
+		adapter->tx_desc_count = new_tx_count;
+	}
+
+	if (new_rx_count != adapter->rx_desc_count) {
+		netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n",
+			   adapter->rx_desc_count, new_rx_count);
+		adapter->rx_desc_count = new_rx_count;
+	}
+
+	if (netif_running(netdev)) {
+		iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+		ret = iavf_wait_for_reset(adapter);
+		if (ret)
+			netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset");
+	}
+
+	return ret;
+}
+
+/**
+ * __iavf_get_coalesce - get per-queue coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @queue: which queue to pick
+ *
+ * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
+ * are per queue. If queue is <0 then we default to queue 0 as the
+ * representative value.
+ **/
+static int __iavf_get_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec, int queue)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_ring *rx_ring, *tx_ring;
+
+	/* Rx and Tx usecs per queue value. If user doesn't specify the
+	 * queue, return queue 0's value to represent.
+	 */
+	if (queue < 0)
+		queue = 0;
+	else if (queue >= adapter->num_active_queues)
+		return -EINVAL;
+
+	rx_ring = &adapter->rx_rings[queue];
+	tx_ring = &adapter->tx_rings[queue];
+
+	if (ITR_IS_DYNAMIC(rx_ring->itr_setting))
+		ec->use_adaptive_rx_coalesce = 1;
+
+	if (ITR_IS_DYNAMIC(tx_ring->itr_setting))
+		ec->use_adaptive_tx_coalesce = 1;
+
+	ec->rx_coalesce_usecs = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+	return 0;
+}
+
+/**
+ * iavf_get_coalesce - Get interrupt coalescing settings
+ * @netdev: network interface device structure
+ * @ec: ethtool coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
+ *
+ * Returns current coalescing settings. This is referred to elsewhere in the
+ * driver as Interrupt Throttle Rate, as this is how the hardware describes
+ * this functionality. Note that if per-queue settings have been modified this
+ * only represents the settings of queue 0.
+ **/
+static int iavf_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec,
+			     struct kernel_ethtool_coalesce *kernel_coal,
+			     struct netlink_ext_ack *extack)
+{
+	return __iavf_get_coalesce(netdev, ec, -1);
+}
+
+/**
+ * iavf_get_per_queue_coalesce - get coalesce values for specific queue
+ * @netdev: netdev to read
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to read
+ *
+ * Read specific queue's coalesce settings.
+ **/
+static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
+				       struct ethtool_coalesce *ec)
+{
+	return __iavf_get_coalesce(netdev, ec, queue);
+}
+
+/**
+ * iavf_set_itr_per_queue - set ITR values for specific queue
+ * @adapter: the VF adapter struct to set values for
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to modify
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+static int iavf_set_itr_per_queue(struct iavf_adapter *adapter,
+				  struct ethtool_coalesce *ec, int queue)
+{
+	struct iavf_ring *rx_ring = &adapter->rx_rings[queue];
+	struct iavf_ring *tx_ring = &adapter->tx_rings[queue];
+	struct iavf_q_vector *q_vector;
+	u16 itr_setting;
+
+	itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+	if (ec->rx_coalesce_usecs != itr_setting &&
+	    ec->use_adaptive_rx_coalesce) {
+		netif_info(adapter, drv, adapter->netdev,
+			   "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n");
+		return -EINVAL;
+	}
+
+	itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+	if (ec->tx_coalesce_usecs != itr_setting &&
+	    ec->use_adaptive_tx_coalesce) {
+		netif_info(adapter, drv, adapter->netdev,
+			   "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n");
+		return -EINVAL;
+	}
+
+	rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
+	tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
+
+	rx_ring->itr_setting |= IAVF_ITR_DYNAMIC;
+	if (!ec->use_adaptive_rx_coalesce)
+		rx_ring->itr_setting ^= IAVF_ITR_DYNAMIC;
+
+	tx_ring->itr_setting |= IAVF_ITR_DYNAMIC;
+	if (!ec->use_adaptive_tx_coalesce)
+		tx_ring->itr_setting ^= IAVF_ITR_DYNAMIC;
+
+	q_vector = rx_ring->q_vector;
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
+
+	q_vector = tx_ring->q_vector;
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
+
+	/* The interrupt handler itself will take care of programming
+	 * the Tx and Rx ITR values based on the values we have entered
+	 * into the q_vector, no need to write the values now.
+	 */
+	return 0;
+}
+
+/**
+ * __iavf_set_coalesce - set coalesce settings for particular queue
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the coalesce settings for a particular queue.
+ **/
+static int __iavf_set_coalesce(struct net_device *netdev,
+			       struct ethtool_coalesce *ec, int queue)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (ec->rx_coalesce_usecs > IAVF_MAX_ITR) {
+		netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
+		return -EINVAL;
+	} else if (ec->tx_coalesce_usecs > IAVF_MAX_ITR) {
+		netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
+
+	/* Rx and Tx usecs has per queue value. If user doesn't specify the
+	 * queue, apply to all queues.
+	 */
+	if (queue < 0) {
+		for (i = 0; i < adapter->num_active_queues; i++)
+			if (iavf_set_itr_per_queue(adapter, ec, i))
+				return -EINVAL;
+	} else if (queue < adapter->num_active_queues) {
+		if (iavf_set_itr_per_queue(adapter, ec, queue))
+			return -EINVAL;
+	} else {
+		netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
+			   adapter->num_active_queues - 1);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_set_coalesce - Set interrupt coalescing settings
+ * @netdev: network interface device structure
+ * @ec: ethtool coalesce structure
+ * @kernel_coal: ethtool CQE mode setting structure
+ * @extack: extack for reporting error messages
+ *
+ * Change current coalescing settings for every queue.
+ **/
+static int iavf_set_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *ec,
+			     struct kernel_ethtool_coalesce *kernel_coal,
+			     struct netlink_ext_ack *extack)
+{
+	return __iavf_set_coalesce(netdev, ec, -1);
+}
+
+/**
+ * iavf_set_per_queue_coalesce - set specific queue's coalesce settings
+ * @netdev: the netdev to change
+ * @ec: ethtool's coalesce settings
+ * @queue: the queue to modify
+ *
+ * Modifies a specific queue's coalesce settings.
+ */
+static int iavf_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
+				       struct ethtool_coalesce *ec)
+{
+	return __iavf_set_coalesce(netdev, ec, queue);
+}
+
+/**
+ * iavf_fltr_to_ethtool_flow - convert filter type values to ethtool
+ * flow type values
+ * @flow: filter type to be converted
+ *
+ * Returns the corresponding ethtool flow type.
+ */
+static int iavf_fltr_to_ethtool_flow(enum iavf_fdir_flow_type flow)
+{
+	switch (flow) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+		return TCP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+		return UDP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		return SCTP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+		return AH_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		return ESP_V4_FLOW;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		return IPV4_USER_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		return TCP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		return UDP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		return SCTP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		return AH_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		return ESP_V6_FLOW;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		return IPV6_USER_FLOW;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		return ETHER_FLOW;
+	default:
+		/* 0 is undefined ethtool flow */
+		return 0;
+	}
+}
+
+/**
+ * iavf_ethtool_flow_to_fltr - convert ethtool flow type to filter enum
+ * @eth: Ethtool flow type to be converted
+ *
+ * Returns flow enum
+ */
+static enum iavf_fdir_flow_type iavf_ethtool_flow_to_fltr(int eth)
+{
+	switch (eth) {
+	case TCP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_TCP;
+	case UDP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_UDP;
+	case SCTP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_SCTP;
+	case AH_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_AH;
+	case ESP_V4_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_ESP;
+	case IPV4_USER_FLOW:
+		return IAVF_FDIR_FLOW_IPV4_OTHER;
+	case TCP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_TCP;
+	case UDP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_UDP;
+	case SCTP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_SCTP;
+	case AH_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_AH;
+	case ESP_V6_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_ESP;
+	case IPV6_USER_FLOW:
+		return IAVF_FDIR_FLOW_IPV6_OTHER;
+	case ETHER_FLOW:
+		return IAVF_FDIR_FLOW_NON_IP_L2;
+	default:
+		return IAVF_FDIR_FLOW_NONE;
+	}
+}
+
+/**
+ * iavf_is_mask_valid - check mask field set
+ * @mask: full mask to check
+ * @field: field for which mask should be valid
+ *
+ * If the mask is fully set return true. If it is not valid for field return
+ * false.
+ */
+static bool iavf_is_mask_valid(u64 mask, u64 field)
+{
+	return (mask & field) == field;
+}
+
+/**
+ * iavf_parse_rx_flow_user_data - deconstruct user-defined data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: pointer to Flow Director filter for userdef data storage
+ *
+ * Returns 0 on success, negative error value on failure
+ */
+static int
+iavf_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+			     struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_flex_word *flex;
+	int i, cnt = 0;
+
+	if (!(fsp->flow_type & FLOW_EXT))
+		return 0;
+
+	for (i = 0; i < IAVF_FLEX_WORD_NUM; i++) {
+#define IAVF_USERDEF_FLEX_WORD_M	GENMASK(15, 0)
+#define IAVF_USERDEF_FLEX_OFFS_S	16
+#define IAVF_USERDEF_FLEX_OFFS_M	GENMASK(31, IAVF_USERDEF_FLEX_OFFS_S)
+#define IAVF_USERDEF_FLEX_FLTR_M	GENMASK(31, 0)
+		u32 value = be32_to_cpu(fsp->h_ext.data[i]);
+		u32 mask = be32_to_cpu(fsp->m_ext.data[i]);
+
+		if (!value || !mask)
+			continue;
+
+		if (!iavf_is_mask_valid(mask, IAVF_USERDEF_FLEX_FLTR_M))
+			return -EINVAL;
+
+		/* 504 is the maximum value for offsets, and offset is measured
+		 * from the start of the MAC address.
+		 */
+#define IAVF_USERDEF_FLEX_MAX_OFFS_VAL 504
+		flex = &fltr->flex_words[cnt++];
+		flex->word = value & IAVF_USERDEF_FLEX_WORD_M;
+		flex->offset = (value & IAVF_USERDEF_FLEX_OFFS_M) >>
+			     IAVF_USERDEF_FLEX_OFFS_S;
+		if (flex->offset > IAVF_USERDEF_FLEX_MAX_OFFS_VAL)
+			return -EINVAL;
+	}
+
+	fltr->flex_cnt = cnt;
+
+	return 0;
+}
+
+/**
+ * iavf_fill_rx_flow_ext_data - fill the additional data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: pointer to Flow Director filter to get additional data
+ */
+static void
+iavf_fill_rx_flow_ext_data(struct ethtool_rx_flow_spec *fsp,
+			   struct iavf_fdir_fltr *fltr)
+{
+	if (!fltr->ext_mask.usr_def[0] && !fltr->ext_mask.usr_def[1])
+		return;
+
+	fsp->flow_type |= FLOW_EXT;
+
+	memcpy(fsp->h_ext.data, fltr->ext_data.usr_def, sizeof(fsp->h_ext.data));
+	memcpy(fsp->m_ext.data, fltr->ext_mask.usr_def, sizeof(fsp->m_ext.data));
+}
+
+/**
+ * iavf_get_ethtool_fdir_entry - fill ethtool structure with Flow Director filter data
+ * @adapter: the VF adapter structure that contains filter list
+ * @cmd: ethtool command data structure to receive the filter data
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+static int
+iavf_get_ethtool_fdir_entry(struct iavf_adapter *adapter,
+			    struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct iavf_fdir_fltr *rule = NULL;
+	int ret = 0;
+
+	if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+		return -EOPNOTSUPP;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+
+	rule = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
+	if (!rule) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	fsp->flow_type = iavf_fltr_to_ethtool_flow(rule->flow_type);
+
+	memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+	memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+	switch (fsp->flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.tcp_ip4_spec.psrc = rule->ip_data.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->ip_data.dst_port;
+		fsp->h_u.tcp_ip4_spec.tos = rule->ip_data.tos;
+		fsp->m_u.tcp_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.tcp_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.tcp_ip4_spec.psrc = rule->ip_mask.src_port;
+		fsp->m_u.tcp_ip4_spec.pdst = rule->ip_mask.dst_port;
+		fsp->m_u.tcp_ip4_spec.tos = rule->ip_mask.tos;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		fsp->h_u.ah_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.ah_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.ah_ip4_spec.spi = rule->ip_data.spi;
+		fsp->h_u.ah_ip4_spec.tos = rule->ip_data.tos;
+		fsp->m_u.ah_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.ah_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.ah_ip4_spec.spi = rule->ip_mask.spi;
+		fsp->m_u.ah_ip4_spec.tos = rule->ip_mask.tos;
+		break;
+	case IPV4_USER_FLOW:
+		fsp->h_u.usr_ip4_spec.ip4src = rule->ip_data.v4_addrs.src_ip;
+		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip_data.v4_addrs.dst_ip;
+		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip_data.l4_header;
+		fsp->h_u.usr_ip4_spec.tos = rule->ip_data.tos;
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = rule->ip_data.proto;
+		fsp->m_u.usr_ip4_spec.ip4src = rule->ip_mask.v4_addrs.src_ip;
+		fsp->m_u.usr_ip4_spec.ip4dst = rule->ip_mask.v4_addrs.dst_ip;
+		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->ip_mask.l4_header;
+		fsp->m_u.usr_ip4_spec.tos = rule->ip_mask.tos;
+		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
+		fsp->m_u.usr_ip4_spec.proto = rule->ip_mask.proto;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.tcp_ip6_spec.psrc = rule->ip_data.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = rule->ip_data.dst_port;
+		fsp->h_u.tcp_ip6_spec.tclass = rule->ip_data.tclass;
+		memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.tcp_ip6_spec.psrc = rule->ip_mask.src_port;
+		fsp->m_u.tcp_ip6_spec.pdst = rule->ip_mask.dst_port;
+		fsp->m_u.tcp_ip6_spec.tclass = rule->ip_mask.tclass;
+		break;
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+		memcpy(fsp->h_u.ah_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.ah_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.ah_ip6_spec.spi = rule->ip_data.spi;
+		fsp->h_u.ah_ip6_spec.tclass = rule->ip_data.tclass;
+		memcpy(fsp->m_u.ah_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.ah_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.ah_ip6_spec.spi = rule->ip_mask.spi;
+		fsp->m_u.ah_ip6_spec.tclass = rule->ip_mask.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(fsp->h_u.usr_ip6_spec.ip6src, &rule->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.usr_ip6_spec.ip6dst, &rule->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip_data.l4_header;
+		fsp->h_u.usr_ip6_spec.tclass = rule->ip_data.tclass;
+		fsp->h_u.usr_ip6_spec.l4_proto = rule->ip_data.proto;
+		memcpy(fsp->m_u.usr_ip6_spec.ip6src, &rule->ip_mask.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.usr_ip6_spec.ip6dst, &rule->ip_mask.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->ip_mask.l4_header;
+		fsp->m_u.usr_ip6_spec.tclass = rule->ip_mask.tclass;
+		fsp->m_u.usr_ip6_spec.l4_proto = rule->ip_mask.proto;
+		break;
+	case ETHER_FLOW:
+		fsp->h_u.ether_spec.h_proto = rule->eth_data.etype;
+		fsp->m_u.ether_spec.h_proto = rule->eth_mask.etype;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	iavf_fill_rx_flow_ext_data(fsp, rule);
+
+	if (rule->action == VIRTCHNL_ACTION_DROP)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->q_index;
+
+release_lock:
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	return ret;
+}
+
+/**
+ * iavf_get_fdir_fltr_ids - fill buffer with filter IDs of active filters
+ * @adapter: the VF adapter structure containing the filter list
+ * @cmd: ethtool command data structure
+ * @rule_locs: ethtool array passed in from OS to receive filter IDs
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+static int
+iavf_get_fdir_fltr_ids(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd,
+		       u32 *rule_locs)
+{
+	struct iavf_fdir_fltr *fltr;
+	unsigned int cnt = 0;
+	int val = 0;
+
+	if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+		return -EOPNOTSUPP;
+
+	cmd->data = IAVF_MAX_FDIR_FILTERS;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+
+	list_for_each_entry(fltr, &adapter->fdir_list_head, list) {
+		if (cnt == cmd->rule_cnt) {
+			val = -EMSGSIZE;
+			goto release_lock;
+		}
+		rule_locs[cnt] = fltr->loc;
+		cnt++;
+	}
+
+release_lock:
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+	if (!val)
+		cmd->rule_cnt = cnt;
+
+	return val;
+}
+
+/**
+ * iavf_add_fdir_fltr_info - Set the input set for Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @fsp: pointer to ethtool Rx flow specification
+ * @fltr: filter structure
+ */
+static int
+iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spec *fsp,
+			struct iavf_fdir_fltr *fltr)
+{
+	u32 flow_type, q_index = 0;
+	enum virtchnl_action act;
+	int err;
+
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		act = VIRTCHNL_ACTION_DROP;
+	} else {
+		q_index = fsp->ring_cookie;
+		if (q_index >= adapter->num_active_queues)
+			return -EINVAL;
+
+		act = VIRTCHNL_ACTION_QUEUE;
+	}
+
+	fltr->action = act;
+	fltr->loc = fsp->location;
+	fltr->q_index = q_index;
+
+	if (fsp->flow_type & FLOW_EXT) {
+		memcpy(fltr->ext_data.usr_def, fsp->h_ext.data,
+		       sizeof(fltr->ext_data.usr_def));
+		memcpy(fltr->ext_mask.usr_def, fsp->m_ext.data,
+		       sizeof(fltr->ext_mask.usr_def));
+	}
+
+	flow_type = fsp->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS);
+	fltr->flow_type = iavf_ethtool_flow_to_fltr(flow_type);
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+		fltr->ip_data.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		fltr->ip_data.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+		fltr->ip_data.tos = fsp->h_u.tcp_ip4_spec.tos;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+		fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+		fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+		fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos;
+		fltr->ip_ver = 4;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.ah_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.ah_ip4_spec.ip4dst;
+		fltr->ip_data.spi = fsp->h_u.ah_ip4_spec.spi;
+		fltr->ip_data.tos = fsp->h_u.ah_ip4_spec.tos;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.ah_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst;
+		fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi;
+		fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos;
+		fltr->ip_ver = 4;
+		break;
+	case IPV4_USER_FLOW:
+		fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+		fltr->ip_data.v4_addrs.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+		fltr->ip_data.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+		fltr->ip_data.tos = fsp->h_u.usr_ip4_spec.tos;
+		fltr->ip_data.proto = fsp->h_u.usr_ip4_spec.proto;
+		fltr->ip_mask.v4_addrs.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+		fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+		fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+		fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
+		fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
+		fltr->ip_ver = 4;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		fltr->ip_data.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+		fltr->ip_data.tclass = fsp->h_u.tcp_ip6_spec.tclass;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+		fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+		fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass;
+		fltr->ip_ver = 6;
+		break;
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.ah_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.ah_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.spi = fsp->h_u.ah_ip6_spec.spi;
+		fltr->ip_data.tclass = fsp->h_u.ah_ip6_spec.tclass;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.ah_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.ah_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi;
+		fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass;
+		fltr->ip_ver = 6;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_data.v6_addrs.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_data.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes;
+		fltr->ip_data.tclass = fsp->h_u.usr_ip6_spec.tclass;
+		fltr->ip_data.proto = fsp->h_u.usr_ip6_spec.l4_proto;
+		memcpy(&fltr->ip_mask.v6_addrs.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr->ip_mask.v6_addrs.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+		fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
+		fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+		fltr->ip_ver = 6;
+		break;
+	case ETHER_FLOW:
+		fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto;
+		fltr->eth_mask.etype = fsp->m_u.ether_spec.h_proto;
+		break;
+	default:
+		/* not doing un-parsed flow types */
+		return -EINVAL;
+	}
+
+	err = iavf_validate_fdir_fltr_masks(adapter, fltr);
+	if (err)
+		return err;
+
+	if (iavf_fdir_is_dup_fltr(adapter, fltr))
+		return -EEXIST;
+
+	err = iavf_parse_rx_flow_user_data(fsp, fltr);
+	if (err)
+		return err;
+
+	return iavf_fill_fdir_add_msg(adapter, fltr);
+}
+
+/**
+ * iavf_add_fdir_ethtool - add Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: command to add Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct iavf_fdir_fltr *fltr;
+	int count = 50;
+	int err;
+
+	if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+		return -EOPNOTSUPP;
+
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return -EINVAL;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		dev_err(&adapter->pdev->dev,
+			"Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
+			IAVF_MAX_FDIR_FILTERS);
+		return -ENOSPC;
+	}
+
+	if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
+		dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		return -EEXIST;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
+	if (!fltr)
+		return -ENOMEM;
+
+	while (!mutex_trylock(&adapter->crit_lock)) {
+		if (--count == 0) {
+			kfree(fltr);
+			return -EINVAL;
+		}
+		udelay(1);
+	}
+
+	err = iavf_add_fdir_fltr_info(adapter, fsp, fltr);
+	if (err)
+		goto ret;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	iavf_fdir_list_add_fltr(adapter, fltr);
+	adapter->fdir_active_fltr++;
+	if (adapter->link_up) {
+		fltr->state = IAVF_FDIR_FLTR_ADD_REQUEST;
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+	} else {
+		fltr->state = IAVF_FDIR_FLTR_INACTIVE;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (adapter->link_up)
+		mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+ret:
+	if (err && fltr)
+		kfree(fltr);
+
+	mutex_unlock(&adapter->crit_lock);
+	return err;
+}
+
+/**
+ * iavf_del_fdir_ethtool - delete Flow Director filter
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: command to delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct iavf_fdir_fltr *fltr = NULL;
+	int err = 0;
+
+	if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+		return -EOPNOTSUPP;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	fltr = iavf_find_fdir_fltr_by_loc(adapter, fsp->location);
+	if (fltr) {
+		if (fltr->state == IAVF_FDIR_FLTR_ACTIVE) {
+			fltr->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		} else if (fltr->state == IAVF_FDIR_FLTR_INACTIVE) {
+			list_del(&fltr->list);
+			kfree(fltr);
+			adapter->fdir_active_fltr--;
+			fltr = NULL;
+		} else {
+			err = -EBUSY;
+		}
+	} else if (adapter->fdir_active_fltr) {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST)
+		mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+
+	return err;
+}
+
+/**
+ * iavf_adv_rss_parse_hdrs - parses headers from RSS hash input
+ * @cmd: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * header types for RSS configuration
+ */
+static u32 iavf_adv_rss_parse_hdrs(struct ethtool_rxnfc *cmd)
+{
+	u32 hdrs = IAVF_ADV_RSS_FLOW_SEG_HDR_NONE;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
+		break;
+	case UDP_V4_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
+		break;
+	case SCTP_V4_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV4;
+		break;
+	case TCP_V6_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_TCP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
+		break;
+	case UDP_V6_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_UDP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
+		break;
+	case SCTP_V6_FLOW:
+		hdrs |= IAVF_ADV_RSS_FLOW_SEG_HDR_SCTP |
+			IAVF_ADV_RSS_FLOW_SEG_HDR_IPV6;
+		break;
+	default:
+		break;
+	}
+
+	return hdrs;
+}
+
+/**
+ * iavf_adv_rss_parse_hash_flds - parses hash fields from RSS hash input
+ * @cmd: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended hash fields for
+ * RSS configuration
+ */
+static u64 iavf_adv_rss_parse_hash_flds(struct ethtool_rxnfc *cmd)
+{
+	u64 hfld = IAVF_ADV_RSS_HASH_INVALID;
+
+	if (cmd->data & RXH_IP_SRC || cmd->data & RXH_IP_DST) {
+		switch (cmd->flow_type) {
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+			if (cmd->data & RXH_IP_SRC)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_SA;
+			if (cmd->data & RXH_IP_DST)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV4_DA;
+			break;
+		case TCP_V6_FLOW:
+		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
+			if (cmd->data & RXH_IP_SRC)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_SA;
+			if (cmd->data & RXH_IP_DST)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_IPV6_DA;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (cmd->data & RXH_L4_B_0_1 || cmd->data & RXH_L4_B_2_3) {
+		switch (cmd->flow_type) {
+		case TCP_V4_FLOW:
+		case TCP_V6_FLOW:
+			if (cmd->data & RXH_L4_B_0_1)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT;
+			if (cmd->data & RXH_L4_B_2_3)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT;
+			break;
+		case UDP_V4_FLOW:
+		case UDP_V6_FLOW:
+			if (cmd->data & RXH_L4_B_0_1)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT;
+			if (cmd->data & RXH_L4_B_2_3)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT;
+			break;
+		case SCTP_V4_FLOW:
+		case SCTP_V6_FLOW:
+			if (cmd->data & RXH_L4_B_0_1)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT;
+			if (cmd->data & RXH_L4_B_2_3)
+				hfld |= IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return hfld;
+}
+
+/**
+ * iavf_set_adv_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter,
+			  struct ethtool_rxnfc *cmd)
+{
+	struct iavf_adv_rss *rss_old, *rss_new;
+	bool rss_new_add = false;
+	int count = 50, err = 0;
+	u64 hash_flds;
+	u32 hdrs;
+
+	if (!ADV_RSS_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	hdrs = iavf_adv_rss_parse_hdrs(cmd);
+	if (hdrs == IAVF_ADV_RSS_FLOW_SEG_HDR_NONE)
+		return -EINVAL;
+
+	hash_flds = iavf_adv_rss_parse_hash_flds(cmd);
+	if (hash_flds == IAVF_ADV_RSS_HASH_INVALID)
+		return -EINVAL;
+
+	rss_new = kzalloc(sizeof(*rss_new), GFP_KERNEL);
+	if (!rss_new)
+		return -ENOMEM;
+
+	if (iavf_fill_adv_rss_cfg_msg(&rss_new->cfg_msg, hdrs, hash_flds)) {
+		kfree(rss_new);
+		return -EINVAL;
+	}
+
+	while (!mutex_trylock(&adapter->crit_lock)) {
+		if (--count == 0) {
+			kfree(rss_new);
+			return -EINVAL;
+		}
+
+		udelay(1);
+	}
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	rss_old = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs);
+	if (rss_old) {
+		if (rss_old->state != IAVF_ADV_RSS_ACTIVE) {
+			err = -EBUSY;
+		} else if (rss_old->hash_flds != hash_flds) {
+			rss_old->state = IAVF_ADV_RSS_ADD_REQUEST;
+			rss_old->hash_flds = hash_flds;
+			memcpy(&rss_old->cfg_msg, &rss_new->cfg_msg,
+			       sizeof(rss_new->cfg_msg));
+			adapter->aq_required |= IAVF_FLAG_AQ_ADD_ADV_RSS_CFG;
+		} else {
+			err = -EEXIST;
+		}
+	} else {
+		rss_new_add = true;
+		rss_new->state = IAVF_ADV_RSS_ADD_REQUEST;
+		rss_new->packet_hdrs = hdrs;
+		rss_new->hash_flds = hash_flds;
+		list_add_tail(&rss_new->list, &adapter->adv_rss_list_head);
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_ADV_RSS_CFG;
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (!err)
+		mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+
+	mutex_unlock(&adapter->crit_lock);
+
+	if (!rss_new_add)
+		kfree(rss_new);
+
+	return err;
+}
+
+/**
+ * iavf_get_adv_rss_hash_opt - Retrieve hash fields for a given flow-type
+ * @adapter: pointer to the VF adapter structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+iavf_get_adv_rss_hash_opt(struct iavf_adapter *adapter,
+			  struct ethtool_rxnfc *cmd)
+{
+	struct iavf_adv_rss *rss;
+	u64 hash_flds;
+	u32 hdrs;
+
+	if (!ADV_RSS_SUPPORT(adapter))
+		return -EOPNOTSUPP;
+
+	cmd->data = 0;
+
+	hdrs = iavf_adv_rss_parse_hdrs(cmd);
+	if (hdrs == IAVF_ADV_RSS_FLOW_SEG_HDR_NONE)
+		return -EINVAL;
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	rss = iavf_find_adv_rss_cfg_by_hdrs(adapter, hdrs);
+	if (rss)
+		hash_flds = rss->hash_flds;
+	else
+		hash_flds = IAVF_ADV_RSS_HASH_INVALID;
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (hash_flds == IAVF_ADV_RSS_HASH_INVALID)
+		return -EINVAL;
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_SA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_SA))
+		cmd->data |= (u64)RXH_IP_SRC;
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_IPV4_DA |
+			 IAVF_ADV_RSS_HASH_FLD_IPV6_DA))
+		cmd->data |= (u64)RXH_IP_DST;
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_SRC_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_SRC_PORT))
+		cmd->data |= (u64)RXH_L4_B_0_1;
+
+	if (hash_flds & (IAVF_ADV_RSS_HASH_FLD_TCP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_UDP_DST_PORT |
+			 IAVF_ADV_RSS_HASH_FLD_SCTP_DST_PORT))
+		cmd->data |= (u64)RXH_L4_B_2_3;
+
+	return 0;
+}
+
+/**
+ * iavf_set_rxnfc - command to set Rx flow rules.
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns 0 for success and negative values for errors
+ */
+static int iavf_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = iavf_add_fdir_ethtool(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = iavf_del_fdir_ethtool(adapter, cmd);
+		break;
+	case ETHTOOL_SRXFH:
+		ret = iavf_set_adv_rss_hash_opt(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: pointer to store rule locations
+ *
+ * Returns Success if the command is supported.
+ **/
+static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+			  u32 *rule_locs)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_active_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		if (!(adapter->flags & IAVF_FLAG_FDIR_ENABLED))
+			break;
+		spin_lock_bh(&adapter->fdir_fltr_lock);
+		cmd->rule_cnt = adapter->fdir_active_fltr;
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		cmd->data = IAVF_MAX_FDIR_FILTERS;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = iavf_get_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = iavf_get_fdir_fltr_ids(adapter, cmd, (u32 *)rule_locs);
+		break;
+	case ETHTOOL_GRXFH:
+		ret = iavf_get_adv_rss_hash_opt(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+/**
+ * iavf_get_channels: get the number of channels supported by the device
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * For the purposes of our device, we only use combined channels, i.e. a tx/rx
+ * queue pair. Report one extra channel to match our "other" MSI-X vector.
+ **/
+static void iavf_get_channels(struct net_device *netdev,
+			      struct ethtool_channels *ch)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	/* Report maximum channels */
+	ch->max_combined = adapter->vsi_res->num_queue_pairs;
+
+	ch->max_other = NONQ_VECS;
+	ch->other_count = NONQ_VECS;
+
+	ch->combined_count = adapter->num_active_queues;
+}
+
+/**
+ * iavf_set_channels: set the new channel count
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Negotiate a new number of channels with the PF then do a reset.  During
+ * reset we'll realloc queues and fix the RSS table.  Returns 0 on success,
+ * negative on failure.
+ **/
+static int iavf_set_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u32 num_req = ch->combined_count;
+	int ret = 0;
+
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		dev_info(&adapter->pdev->dev, "Cannot set channels since ADq is enabled.\n");
+		return -EINVAL;
+	}
+
+	/* All of these should have already been checked by ethtool before this
+	 * even gets to us, but just to be sure.
+	 */
+	if (num_req == 0 || num_req > adapter->vsi_res->num_queue_pairs)
+		return -EINVAL;
+
+	if (num_req == adapter->num_active_queues)
+		return 0;
+
+	if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS)
+		return -EINVAL;
+
+	adapter->num_req_queues = num_req;
+	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+	iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+
+	ret = iavf_wait_for_reset(adapter);
+	if (ret)
+		netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset");
+
+	return ret;
+}
+
+/**
+ * iavf_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ **/
+static u32 iavf_get_rxfh_key_size(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->rss_key_size;
+}
+
+/**
+ * iavf_get_rxfh_indir_size - get the rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ **/
+static u32 iavf_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->rss_lut_size;
+}
+
+/**
+ * iavf_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function in use
+ *
+ * Reads the indirection table directly from the hardware. Always returns 0.
+ **/
+static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			 u8 *hfunc)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u16 i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (key)
+		memcpy(key, adapter->rss_key, adapter->rss_key_size);
+
+	if (indir)
+		/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+		for (i = 0; i < adapter->rss_lut_size; i++)
+			indir[i] = (u32)adapter->rss_lut[i];
+
+	return 0;
+}
+
+/**
+ * iavf_set_rxfh - set the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function to use
+ *
+ * Returns -EINVAL if the table specifies an invalid queue id, otherwise
+ * returns 0 after programming the table.
+ **/
+static int iavf_set_rxfh(struct net_device *netdev, const u32 *indir,
+			 const u8 *key, const u8 hfunc)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u16 i;
+
+	/* Only support toeplitz hash function */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (!key && !indir)
+		return 0;
+
+	if (key)
+		memcpy(adapter->rss_key, key, adapter->rss_key_size);
+
+	if (indir) {
+		/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+		for (i = 0; i < adapter->rss_lut_size; i++)
+			adapter->rss_lut[i] = (u8)(indir[i]);
+	}
+
+	return iavf_config_rss(adapter);
+}
+
+static const struct ethtool_ops iavf_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE,
+	.get_drvinfo		= iavf_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_ringparam		= iavf_get_ringparam,
+	.set_ringparam		= iavf_set_ringparam,
+	.get_strings		= iavf_get_strings,
+	.get_ethtool_stats	= iavf_get_ethtool_stats,
+	.get_sset_count		= iavf_get_sset_count,
+	.get_priv_flags		= iavf_get_priv_flags,
+	.set_priv_flags		= iavf_set_priv_flags,
+	.get_msglevel		= iavf_get_msglevel,
+	.set_msglevel		= iavf_set_msglevel,
+	.get_coalesce		= iavf_get_coalesce,
+	.set_coalesce		= iavf_set_coalesce,
+	.get_per_queue_coalesce = iavf_get_per_queue_coalesce,
+	.set_per_queue_coalesce = iavf_set_per_queue_coalesce,
+	.set_rxnfc		= iavf_set_rxnfc,
+	.get_rxnfc		= iavf_get_rxnfc,
+	.get_rxfh_indir_size	= iavf_get_rxfh_indir_size,
+	.get_rxfh		= iavf_get_rxfh,
+	.set_rxfh		= iavf_set_rxfh,
+	.get_channels		= iavf_get_channels,
+	.set_channels		= iavf_set_channels,
+	.get_rxfh_key_size	= iavf_get_rxfh_key_size,
+	.get_link_ksettings	= iavf_get_link_ksettings,
+};
+
+/**
+ * iavf_set_ethtool_ops - Initialize ethtool ops struct
+ * @netdev: network interface device structure
+ *
+ * Sets ethtool ops struct in our netdev so that ethtool can call
+ * our functions.
+ **/
+void iavf_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &iavf_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
new file mode 100644
index 0000000000..03e774bd2a
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -0,0 +1,853 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+/* flow director ethtool support for iavf */
+
+#include "iavf.h"
+
+#define GTPU_PORT	2152
+#define NAT_T_ESP_PORT	4500
+#define PFCP_PORT	8805
+
+static const struct in6_addr ipv6_addr_full_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+		}
+	}
+};
+
+static const struct in6_addr ipv6_addr_zero_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		}
+	}
+};
+
+/**
+ * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns 0 if all masks of packet fields are either full or empty. Returns
+ * error on at least one partial mask.
+ */
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+				  struct iavf_fdir_fltr *fltr)
+{
+	if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX))
+		goto partial_mask;
+
+	if (fltr->ip_ver == 4) {
+		if (fltr->ip_mask.v4_addrs.src_ip &&
+		    fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX))
+			goto partial_mask;
+
+		if (fltr->ip_mask.v4_addrs.dst_ip &&
+		    fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX))
+			goto partial_mask;
+
+		if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX)
+			goto partial_mask;
+	} else if (fltr->ip_ver == 6) {
+		if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask,
+			   sizeof(struct in6_addr)) &&
+		    memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
+			   sizeof(struct in6_addr)))
+			goto partial_mask;
+
+		if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask,
+			   sizeof(struct in6_addr)) &&
+		    memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
+			   sizeof(struct in6_addr)))
+			goto partial_mask;
+
+		if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX)
+			goto partial_mask;
+	}
+
+	if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX)
+		goto partial_mask;
+
+	if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX))
+		goto partial_mask;
+
+	if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX))
+		goto partial_mask;
+
+	if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX))
+		goto partial_mask;
+
+	if (fltr->ip_mask.l4_header &&
+	    fltr->ip_mask.l4_header != htonl(U32_MAX))
+		goto partial_mask;
+
+	return 0;
+
+partial_mask:
+	dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n");
+	return -EOPNOTSUPP;
+}
+
+/**
+ * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload
+ * @fltr: Flow Director filter data structure
+ */
+static u16 iavf_pkt_udp_no_pay_len(struct iavf_fdir_fltr *fltr)
+{
+	return sizeof(struct ethhdr) +
+	       (fltr->ip_ver == 4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
+	       sizeof(struct udphdr);
+}
+
+/**
+ * iavf_fill_fdir_gtpu_hdr - fill the GTP-U protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the GTP-U protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_gtpu_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *ghdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct virtchnl_proto_hdr *ehdr = NULL; /* Extension Header if it exists */
+	u16 adj_offs, hdr_offs;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(ghdr, GTPU_IP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_GTPU_HDR_TEID_OFFS0	4
+#define IAVF_GTPU_HDR_TEID_OFFS1	6
+#define IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS	10
+#define IAVF_GTPU_HDR_NEXT_EXTHDR_TYPE_MASK		0x00FF /* skip N_PDU */
+/* PDU Session Container Extension Header (PSC) */
+#define IAVF_GTPU_PSC_EXTHDR_TYPE			0x85
+#define IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS		13
+#define IAVF_GTPU_HDR_PSC_PDU_QFI_MASK			0x3F /* skip Type */
+#define IAVF_GTPU_EH_QFI_IDX				1
+
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_GTPU_HDR_TEID_OFFS0:
+		case IAVF_GTPU_HDR_TEID_OFFS1: {
+			__be16 *pay_word = (__be16 *)ghdr->buffer;
+
+			pay_word[hdr_offs >> 1] = htons(fltr->flex_words[i].word);
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ghdr, GTPU_IP, TEID);
+			}
+			break;
+		case IAVF_GTPU_HDR_N_PDU_AND_NEXT_EXTHDR_OFFS:
+			if ((fltr->flex_words[i].word &
+			     IAVF_GTPU_HDR_NEXT_EXTHDR_TYPE_MASK) !=
+						IAVF_GTPU_PSC_EXTHDR_TYPE)
+				return -EOPNOTSUPP;
+			if (!ehdr)
+				ehdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+			VIRTCHNL_SET_PROTO_HDR_TYPE(ehdr, GTPU_EH);
+			break;
+		case IAVF_GTPU_HDR_PSC_PDU_TYPE_AND_QFI_OFFS:
+			if (!ehdr)
+				return -EINVAL;
+			ehdr->buffer[IAVF_GTPU_EH_QFI_IDX] =
+					fltr->flex_words[i].word &
+						IAVF_GTPU_HDR_PSC_PDU_QFI_MASK;
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(ehdr, GTPU_EH, QFI);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_pfcp_hdr - fill the PFCP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the PFCP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_pfcp_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	u16 adj_offs, hdr_offs;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, PFCP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS	0
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_PFCP_HDR_SFIELD_AND_MSG_TYPE_OFFS:
+			hdr->buffer[0] = (fltr->flex_words[i].word >> 8) & 0xff;
+			VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, PFCP, S_FIELD);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_nat_t_esp_hdr - fill the NAT-T-ESP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the NAT-T-ESP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_nat_t_esp_hdr(struct iavf_fdir_fltr *fltr,
+			     struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *uhdr = &proto_hdrs->proto_hdr[proto_hdrs->count - 1];
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	u16 adj_offs, hdr_offs;
+	u32 spi = 0;
+	int i;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP);
+
+	adj_offs = iavf_pkt_udp_no_pay_len(fltr);
+
+	for (i = 0; i < fltr->flex_cnt; i++) {
+#define IAVF_NAT_T_ESP_SPI_OFFS0	0
+#define IAVF_NAT_T_ESP_SPI_OFFS1	2
+		if (fltr->flex_words[i].offset < adj_offs)
+			return -EINVAL;
+
+		hdr_offs = fltr->flex_words[i].offset - adj_offs;
+
+		switch (hdr_offs) {
+		case IAVF_NAT_T_ESP_SPI_OFFS0:
+			spi |= fltr->flex_words[i].word << 16;
+			break;
+		case IAVF_NAT_T_ESP_SPI_OFFS1:
+			spi |= fltr->flex_words[i].word;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (!spi)
+		return -EOPNOTSUPP; /* Not support IKE Header Format with SPI 0 */
+
+	*(__be32 *)hdr->buffer = htonl(spi);
+	VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI);
+
+	uhdr->field_selector = 0; /* The PF ignores the UDP header fields */
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_udp_flex_pay_hdr - fill the UDP payload header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the UDP payload defined protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_udp_flex_pay_hdr(struct iavf_fdir_fltr *fltr,
+				struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	int err;
+
+	switch (ntohs(fltr->ip_data.dst_port)) {
+	case GTPU_PORT:
+		err = iavf_fill_fdir_gtpu_hdr(fltr, proto_hdrs);
+		break;
+	case NAT_T_ESP_PORT:
+		err = iavf_fill_fdir_nat_t_esp_hdr(fltr, proto_hdrs);
+		break;
+	case PFCP_PORT:
+		err = iavf_fill_fdir_pfcp_hdr(fltr, proto_hdrs);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * iavf_fill_fdir_ip4_hdr - fill the IPv4 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the IPv4 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct iphdr *iph = (struct iphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV4);
+
+	if (fltr->ip_mask.tos == U8_MAX) {
+		iph->tos = fltr->ip_data.tos;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DSCP);
+	}
+
+	if (fltr->ip_mask.proto == U8_MAX) {
+		iph->protocol = fltr->ip_data.proto;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, PROT);
+	}
+
+	if (fltr->ip_mask.v4_addrs.src_ip == htonl(U32_MAX)) {
+		iph->saddr = fltr->ip_data.v4_addrs.src_ip;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, SRC);
+	}
+
+	if (fltr->ip_mask.v4_addrs.dst_ip == htonl(U32_MAX)) {
+		iph->daddr = fltr->ip_data.v4_addrs.dst_ip;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_ip6_hdr - fill the IPv6 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the IPv6 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ipv6hdr *iph = (struct ipv6hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, IPV6);
+
+	if (fltr->ip_mask.tclass == U8_MAX) {
+		iph->priority = (fltr->ip_data.tclass >> 4) & 0xF;
+		iph->flow_lbl[0] = (fltr->ip_data.tclass << 4) & 0xF0;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, TC);
+	}
+
+	if (fltr->ip_mask.proto == U8_MAX) {
+		iph->nexthdr = fltr->ip_data.proto;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, PROT);
+	}
+
+	if (!memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
+		    sizeof(struct in6_addr))) {
+		memcpy(&iph->saddr, &fltr->ip_data.v6_addrs.src_ip,
+		       sizeof(struct in6_addr));
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, SRC);
+	}
+
+	if (!memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
+		    sizeof(struct in6_addr))) {
+		memcpy(&iph->daddr, &fltr->ip_data.v6_addrs.dst_ip,
+		       sizeof(struct in6_addr));
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_tcp_hdr - fill the TCP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the TCP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_tcp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct tcphdr *tcph = (struct tcphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, TCP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		tcph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		tcph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, TCP, DST_PORT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_udp_hdr - fill the UDP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the UDP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_udp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct udphdr *udph = (struct udphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, UDP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		udph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		udph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, UDP, DST_PORT);
+	}
+
+	if (!fltr->flex_cnt)
+		return 0;
+
+	return iavf_fill_fdir_udp_flex_pay_hdr(fltr, proto_hdrs);
+}
+
+/**
+ * iavf_fill_fdir_sctp_hdr - fill the SCTP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the SCTP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_sctp_hdr(struct iavf_fdir_fltr *fltr,
+			struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct sctphdr *sctph = (struct sctphdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, SCTP);
+
+	if (fltr->ip_mask.src_port == htons(U16_MAX)) {
+		sctph->source = fltr->ip_data.src_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, SRC_PORT);
+	}
+
+	if (fltr->ip_mask.dst_port == htons(U16_MAX)) {
+		sctph->dest = fltr->ip_data.dst_port;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, SCTP, DST_PORT);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_ah_hdr - fill the AH protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the AH protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_ah_hdr(struct iavf_fdir_fltr *fltr,
+		      struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ip_auth_hdr *ah = (struct ip_auth_hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, AH);
+
+	if (fltr->ip_mask.spi == htonl(U32_MAX)) {
+		ah->spi = fltr->ip_data.spi;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, AH, SPI);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_esp_hdr - fill the ESP protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the ESP protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_esp_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ESP);
+
+	if (fltr->ip_mask.spi == htonl(U32_MAX)) {
+		esph->spi = fltr->ip_data.spi;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ESP, SPI);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_l4_hdr - fill the L4 protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the L4 protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_l4_hdr(struct iavf_fdir_fltr *fltr,
+		      struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr;
+	__be32 *l4_4_data;
+
+	if (!fltr->ip_mask.proto) /* IPv4/IPv6 header only */
+		return 0;
+
+	hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	l4_4_data = (__be32 *)hdr->buffer;
+
+	/* L2TPv3 over IP with 'Session ID' */
+	if (fltr->ip_data.proto == 115 && fltr->ip_mask.l4_header == htonl(U32_MAX)) {
+		VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, L2TPV3);
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, L2TPV3, SESS_ID);
+
+		*l4_4_data = fltr->ip_data.l4_header;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_eth_hdr - fill the Ethernet protocol header
+ * @fltr: Flow Director filter data structure
+ * @proto_hdrs: Flow Director protocol headers data structure
+ *
+ * Returns 0 if the Ethernet protocol header is set successfully
+ */
+static int
+iavf_fill_fdir_eth_hdr(struct iavf_fdir_fltr *fltr,
+		       struct virtchnl_proto_hdrs *proto_hdrs)
+{
+	struct virtchnl_proto_hdr *hdr = &proto_hdrs->proto_hdr[proto_hdrs->count++];
+	struct ethhdr *ehdr = (struct ethhdr *)hdr->buffer;
+
+	VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, ETH);
+
+	if (fltr->eth_mask.etype == htons(U16_MAX)) {
+		if (fltr->eth_data.etype == htons(ETH_P_IP) ||
+		    fltr->eth_data.etype == htons(ETH_P_IPV6))
+			return -EOPNOTSUPP;
+
+		ehdr->h_proto = fltr->eth_data.etype;
+		VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, ETH, ETHERTYPE);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_fill_fdir_add_msg - fill the Flow Director filter into virtchnl message
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns 0 if the add Flow Director virtchnl message is filled successfully
+ */
+int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct virtchnl_fdir_add *vc_msg = &fltr->vc_add_msg;
+	struct virtchnl_proto_hdrs *proto_hdrs;
+	int err;
+
+	proto_hdrs = &vc_msg->rule_cfg.proto_hdrs;
+
+	err = iavf_fill_fdir_eth_hdr(fltr, proto_hdrs); /* L2 always exists */
+	if (err)
+		return err;
+
+	switch (fltr->flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_udp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_ah_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_esp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		err = iavf_fill_fdir_ip4_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_tcp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_udp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_sctp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_ah_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_esp_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		err = iavf_fill_fdir_ip6_hdr(fltr, proto_hdrs) |
+		      iavf_fill_fdir_l4_hdr(fltr, proto_hdrs);
+		break;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	if (err)
+		return err;
+
+	vc_msg->vsi_id = adapter->vsi.id;
+	vc_msg->rule_cfg.action_set.count = 1;
+	vc_msg->rule_cfg.action_set.actions[0].type = fltr->action;
+	vc_msg->rule_cfg.action_set.actions[0].act_conf.queue.index = fltr->q_index;
+
+	return 0;
+}
+
+/**
+ * iavf_fdir_flow_proto_name - get the flow protocol name
+ * @flow_type: Flow Director filter flow type
+ **/
+static const char *iavf_fdir_flow_proto_name(enum iavf_fdir_flow_type flow_type)
+{
+	switch (flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+		return "TCP";
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+		return "UDP";
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		return "SCTP";
+	case IAVF_FDIR_FLOW_IPV4_AH:
+	case IAVF_FDIR_FLOW_IPV6_AH:
+		return "AH";
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		return "ESP";
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		return "Other";
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		return "Ethernet";
+	default:
+		return NULL;
+	}
+}
+
+/**
+ * iavf_print_fdir_fltr
+ * @adapter: adapter structure
+ * @fltr: Flow Director filter to print
+ *
+ * Print the Flow Director filter
+ **/
+void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	const char *proto = iavf_fdir_flow_proto_name(fltr->flow_type);
+
+	if (!proto)
+		return;
+
+	switch (fltr->flow_type) {
+	case IAVF_FDIR_FLOW_IPV4_TCP:
+	case IAVF_FDIR_FLOW_IPV4_UDP:
+	case IAVF_FDIR_FLOW_IPV4_SCTP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: dst_port %hu src_port %hu\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 proto,
+			 ntohs(fltr->ip_data.dst_port),
+			 ntohs(fltr->ip_data.src_port));
+		break;
+	case IAVF_FDIR_FLOW_IPV4_AH:
+	case IAVF_FDIR_FLOW_IPV4_ESP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 %s: SPI %u\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 proto,
+			 ntohl(fltr->ip_data.spi));
+		break;
+	case IAVF_FDIR_FLOW_IPV4_OTHER:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI4 src_ip %pI4 proto: %u L4_bytes: 0x%x\n",
+			 fltr->loc,
+			 &fltr->ip_data.v4_addrs.dst_ip,
+			 &fltr->ip_data.v4_addrs.src_ip,
+			 fltr->ip_data.proto,
+			 ntohl(fltr->ip_data.l4_header));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_TCP:
+	case IAVF_FDIR_FLOW_IPV6_UDP:
+	case IAVF_FDIR_FLOW_IPV6_SCTP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: dst_port %hu src_port %hu\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 proto,
+			 ntohs(fltr->ip_data.dst_port),
+			 ntohs(fltr->ip_data.src_port));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_AH:
+	case IAVF_FDIR_FLOW_IPV6_ESP:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 %s: SPI %u\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 proto,
+			 ntohl(fltr->ip_data.spi));
+		break;
+	case IAVF_FDIR_FLOW_IPV6_OTHER:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u dst_ip: %pI6 src_ip %pI6 proto: %u L4_bytes: 0x%x\n",
+			 fltr->loc,
+			 &fltr->ip_data.v6_addrs.dst_ip,
+			 &fltr->ip_data.v6_addrs.src_ip,
+			 fltr->ip_data.proto,
+			 ntohl(fltr->ip_data.l4_header));
+		break;
+	case IAVF_FDIR_FLOW_NON_IP_L2:
+		dev_info(&adapter->pdev->dev, "Rule ID: %u eth_type: 0x%x\n",
+			 fltr->loc,
+			 ntohs(fltr->eth_data.etype));
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * iavf_fdir_is_dup_fltr - test if filter is already in list
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns true if the filter is found in the list
+ */
+bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_fdir_fltr *tmp;
+	bool ret = false;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
+		if (tmp->flow_type != fltr->flow_type)
+			continue;
+
+		if (!memcmp(&tmp->eth_data, &fltr->eth_data,
+			    sizeof(fltr->eth_data)) &&
+		    !memcmp(&tmp->ip_data, &fltr->ip_data,
+			    sizeof(fltr->ip_data)) &&
+		    !memcmp(&tmp->ext_data, &fltr->ext_data,
+			    sizeof(fltr->ext_data))) {
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	return ret;
+}
+
+/**
+ * iavf_find_fdir_fltr_by_loc - find filter with location
+ * @adapter: pointer to the VF adapter structure
+ * @loc: location to find.
+ *
+ * Returns pointer to Flow Director filter if found or null
+ */
+struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc)
+{
+	struct iavf_fdir_fltr *rule;
+
+	list_for_each_entry(rule, &adapter->fdir_list_head, list)
+		if (rule->loc == loc)
+			return rule;
+
+	return NULL;
+}
+
+/**
+ * iavf_fdir_list_add_fltr - add a new node to the flow director filter list
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: filter node to add to structure
+ */
+void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
+{
+	struct iavf_fdir_fltr *rule, *parent = NULL;
+
+	list_for_each_entry(rule, &adapter->fdir_list_head, list) {
+		if (rule->loc >= fltr->loc)
+			break;
+		parent = rule;
+	}
+
+	if (parent)
+		list_add(&fltr->list, &parent->list);
+	else
+		list_add(&fltr->list, &adapter->fdir_list_head);
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
new file mode 100644
index 0000000000..d31bd923ba
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _IAVF_FDIR_H_
+#define _IAVF_FDIR_H_
+
+struct iavf_adapter;
+
+/* State of Flow Director filter
+ *
+ * *_REQUEST states are used to mark filter to be sent to PF driver to perform
+ * an action (either add or delete filter). *_PENDING states are an indication
+ * that request was sent to PF and the driver is waiting for response.
+ *
+ * Both DELETE and DISABLE states are being used to delete a filter in PF.
+ * The difference is that after a successful response filter in DEL_PENDING
+ * state is being deleted from VF driver as well and filter in DIS_PENDING state
+ * is being changed to INACTIVE state.
+ */
+enum iavf_fdir_fltr_state_t {
+	IAVF_FDIR_FLTR_ADD_REQUEST,	/* User requests to add filter */
+	IAVF_FDIR_FLTR_ADD_PENDING,	/* Filter pending add by the PF */
+	IAVF_FDIR_FLTR_DEL_REQUEST,	/* User requests to delete filter */
+	IAVF_FDIR_FLTR_DEL_PENDING,	/* Filter pending delete by the PF */
+	IAVF_FDIR_FLTR_DIS_REQUEST,	/* Filter scheduled to be disabled */
+	IAVF_FDIR_FLTR_DIS_PENDING,	/* Filter pending disable by the PF */
+	IAVF_FDIR_FLTR_INACTIVE,	/* Filter inactive on link down */
+	IAVF_FDIR_FLTR_ACTIVE,		/* Filter is active */
+};
+
+enum iavf_fdir_flow_type {
+	/* NONE - used for undef/error */
+	IAVF_FDIR_FLOW_NONE = 0,
+	IAVF_FDIR_FLOW_IPV4_TCP,
+	IAVF_FDIR_FLOW_IPV4_UDP,
+	IAVF_FDIR_FLOW_IPV4_SCTP,
+	IAVF_FDIR_FLOW_IPV4_AH,
+	IAVF_FDIR_FLOW_IPV4_ESP,
+	IAVF_FDIR_FLOW_IPV4_OTHER,
+	IAVF_FDIR_FLOW_IPV6_TCP,
+	IAVF_FDIR_FLOW_IPV6_UDP,
+	IAVF_FDIR_FLOW_IPV6_SCTP,
+	IAVF_FDIR_FLOW_IPV6_AH,
+	IAVF_FDIR_FLOW_IPV6_ESP,
+	IAVF_FDIR_FLOW_IPV6_OTHER,
+	IAVF_FDIR_FLOW_NON_IP_L2,
+	/* MAX - this must be last and add anything new just above it */
+	IAVF_FDIR_FLOW_PTYPE_MAX,
+};
+
+/* Must not exceed the array element number of '__be32 data[2]' in the ethtool
+ * 'struct ethtool_rx_flow_spec.m_ext.data[2]' to express the flex-byte (word).
+ */
+#define IAVF_FLEX_WORD_NUM	2
+
+struct iavf_flex_word {
+	u16 offset;
+	u16 word;
+};
+
+struct iavf_ipv4_addrs {
+	__be32 src_ip;
+	__be32 dst_ip;
+};
+
+struct iavf_ipv6_addrs {
+	struct in6_addr src_ip;
+	struct in6_addr dst_ip;
+};
+
+struct iavf_fdir_eth {
+	__be16 etype;
+};
+
+struct iavf_fdir_ip {
+	union {
+		struct iavf_ipv4_addrs v4_addrs;
+		struct iavf_ipv6_addrs v6_addrs;
+	};
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 l4_header;	/* first 4 bytes of the layer 4 header */
+	__be32 spi;		/* security parameter index for AH/ESP */
+	union {
+		u8 tos;
+		u8 tclass;
+	};
+	u8 proto;
+};
+
+struct iavf_fdir_extra {
+	u32 usr_def[IAVF_FLEX_WORD_NUM];
+};
+
+/* bookkeeping of Flow Director filters */
+struct iavf_fdir_fltr {
+	enum iavf_fdir_fltr_state_t state;
+	struct list_head list;
+
+	enum iavf_fdir_flow_type flow_type;
+
+	struct iavf_fdir_eth eth_data;
+	struct iavf_fdir_eth eth_mask;
+
+	struct iavf_fdir_ip ip_data;
+	struct iavf_fdir_ip ip_mask;
+
+	struct iavf_fdir_extra ext_data;
+	struct iavf_fdir_extra ext_mask;
+
+	enum virtchnl_action action;
+
+	/* flex byte filter data */
+	u8 ip_ver; /* used to adjust the flex offset, 4 : IPv4, 6 : IPv6 */
+	u8 flex_cnt;
+	struct iavf_flex_word flex_words[IAVF_FLEX_WORD_NUM];
+
+	u32 flow_id;
+
+	u32 loc;	/* Rule location inside the flow table */
+	u32 q_index;
+
+	struct virtchnl_fdir_add vc_add_msg;
+};
+
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+				  struct iavf_fdir_fltr *fltr);
+int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+void iavf_fdir_list_add_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
+struct iavf_fdir_fltr *iavf_find_fdir_fltr_by_loc(struct iavf_adapter *adapter, u32 loc);
+#endif /* _IAVF_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
new file mode 100644
index 0000000000..257865647c
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -0,0 +1,5323 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "iavf.h"
+#include "iavf_prototype.h"
+#include "iavf_client.h"
+/* All iavf tracepoints are defined by the include below, which must
+ * be included exactly once across the whole kernel with
+ * CREATE_TRACE_POINTS defined
+ */
+#define CREATE_TRACE_POINTS
+#include "iavf_trace.h"
+
+static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter);
+static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter);
+static int iavf_close(struct net_device *netdev);
+static void iavf_init_get_resources(struct iavf_adapter *adapter);
+static int iavf_check_reset_complete(struct iavf_hw *hw);
+
+char iavf_driver_name[] = "iavf";
+static const char iavf_driver_string[] =
+	"Intel(R) Ethernet Adaptive Virtual Function Network Driver";
+
+static const char iavf_copyright[] =
+	"Copyright (c) 2013 - 2018 Intel Corporation.";
+
+/* iavf_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id iavf_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_VF), 0},
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_VF_HV), 0},
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_X722_VF), 0},
+	{PCI_VDEVICE(INTEL, IAVF_DEV_ID_ADAPTIVE_VF), 0},
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, iavf_pci_tbl);
+
+MODULE_ALIAS("i40evf");
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver");
+MODULE_LICENSE("GPL v2");
+
+static const struct net_device_ops iavf_netdev_ops;
+
+int iavf_status_to_errno(enum iavf_status status)
+{
+	switch (status) {
+	case IAVF_SUCCESS:
+		return 0;
+	case IAVF_ERR_PARAM:
+	case IAVF_ERR_MAC_TYPE:
+	case IAVF_ERR_INVALID_MAC_ADDR:
+	case IAVF_ERR_INVALID_LINK_SETTINGS:
+	case IAVF_ERR_INVALID_PD_ID:
+	case IAVF_ERR_INVALID_QP_ID:
+	case IAVF_ERR_INVALID_CQ_ID:
+	case IAVF_ERR_INVALID_CEQ_ID:
+	case IAVF_ERR_INVALID_AEQ_ID:
+	case IAVF_ERR_INVALID_SIZE:
+	case IAVF_ERR_INVALID_ARP_INDEX:
+	case IAVF_ERR_INVALID_FPM_FUNC_ID:
+	case IAVF_ERR_QP_INVALID_MSG_SIZE:
+	case IAVF_ERR_INVALID_FRAG_COUNT:
+	case IAVF_ERR_INVALID_ALIGNMENT:
+	case IAVF_ERR_INVALID_PUSH_PAGE_INDEX:
+	case IAVF_ERR_INVALID_IMM_DATA_SIZE:
+	case IAVF_ERR_INVALID_VF_ID:
+	case IAVF_ERR_INVALID_HMCFN_ID:
+	case IAVF_ERR_INVALID_PBLE_INDEX:
+	case IAVF_ERR_INVALID_SD_INDEX:
+	case IAVF_ERR_INVALID_PAGE_DESC_INDEX:
+	case IAVF_ERR_INVALID_SD_TYPE:
+	case IAVF_ERR_INVALID_HMC_OBJ_INDEX:
+	case IAVF_ERR_INVALID_HMC_OBJ_COUNT:
+	case IAVF_ERR_INVALID_SRQ_ARM_LIMIT:
+		return -EINVAL;
+	case IAVF_ERR_NVM:
+	case IAVF_ERR_NVM_CHECKSUM:
+	case IAVF_ERR_PHY:
+	case IAVF_ERR_CONFIG:
+	case IAVF_ERR_UNKNOWN_PHY:
+	case IAVF_ERR_LINK_SETUP:
+	case IAVF_ERR_ADAPTER_STOPPED:
+	case IAVF_ERR_PRIMARY_REQUESTS_PENDING:
+	case IAVF_ERR_AUTONEG_NOT_COMPLETE:
+	case IAVF_ERR_RESET_FAILED:
+	case IAVF_ERR_BAD_PTR:
+	case IAVF_ERR_SWFW_SYNC:
+	case IAVF_ERR_QP_TOOMANY_WRS_POSTED:
+	case IAVF_ERR_QUEUE_EMPTY:
+	case IAVF_ERR_FLUSHED_QUEUE:
+	case IAVF_ERR_OPCODE_MISMATCH:
+	case IAVF_ERR_CQP_COMPL_ERROR:
+	case IAVF_ERR_BACKING_PAGE_ERROR:
+	case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE:
+	case IAVF_ERR_MEMCPY_FAILED:
+	case IAVF_ERR_SRQ_ENABLED:
+	case IAVF_ERR_ADMIN_QUEUE_ERROR:
+	case IAVF_ERR_ADMIN_QUEUE_FULL:
+	case IAVF_ERR_BAD_RDMA_CQE:
+	case IAVF_ERR_NVM_BLANK_MODE:
+	case IAVF_ERR_PE_DOORBELL_NOT_ENABLED:
+	case IAVF_ERR_DIAG_TEST_FAILED:
+	case IAVF_ERR_FIRMWARE_API_VERSION:
+	case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
+		return -EIO;
+	case IAVF_ERR_DEVICE_NOT_SUPPORTED:
+		return -ENODEV;
+	case IAVF_ERR_NO_AVAILABLE_VSI:
+	case IAVF_ERR_RING_FULL:
+		return -ENOSPC;
+	case IAVF_ERR_NO_MEMORY:
+		return -ENOMEM;
+	case IAVF_ERR_TIMEOUT:
+	case IAVF_ERR_ADMIN_QUEUE_TIMEOUT:
+		return -ETIMEDOUT;
+	case IAVF_ERR_NOT_IMPLEMENTED:
+	case IAVF_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	case IAVF_ERR_ADMIN_QUEUE_NO_WORK:
+		return -EALREADY;
+	case IAVF_ERR_NOT_READY:
+		return -EBUSY;
+	case IAVF_ERR_BUF_TOO_SHORT:
+		return -EMSGSIZE;
+	}
+
+	return -EIO;
+}
+
+int virtchnl_status_to_errno(enum virtchnl_status_code v_status)
+{
+	switch (v_status) {
+	case VIRTCHNL_STATUS_SUCCESS:
+		return 0;
+	case VIRTCHNL_STATUS_ERR_PARAM:
+	case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
+		return -EINVAL;
+	case VIRTCHNL_STATUS_ERR_NO_MEMORY:
+		return -ENOMEM;
+	case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
+	case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
+	case VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR:
+		return -EIO;
+	case VIRTCHNL_STATUS_ERR_NOT_SUPPORTED:
+		return -EOPNOTSUPP;
+	}
+
+	return -EIO;
+}
+
+/**
+ * iavf_pdev_to_adapter - go from pci_dev to adapter
+ * @pdev: pci_dev pointer
+ */
+static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev)
+{
+	return netdev_priv(pci_get_drvdata(pdev));
+}
+
+/**
+ * iavf_is_reset_in_progress - Check if a reset is in progress
+ * @adapter: board private structure
+ */
+static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter)
+{
+	if (adapter->state == __IAVF_RESETTING ||
+	    adapter->flags & (IAVF_FLAG_RESET_PENDING |
+			      IAVF_FLAG_RESET_NEEDED))
+		return true;
+
+	return false;
+}
+
+/**
+ * iavf_wait_for_reset - Wait for reset to finish.
+ * @adapter: board private structure
+ *
+ * Returns 0 if reset finished successfully, negative on timeout or interrupt.
+ */
+int iavf_wait_for_reset(struct iavf_adapter *adapter)
+{
+	int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue,
+					!iavf_is_reset_in_progress(adapter),
+					msecs_to_jiffies(5000));
+
+	/* If ret < 0 then it means wait was interrupted.
+	 * If ret == 0 then it means we got a timeout while waiting
+	 * for reset to finish.
+	 * If ret > 0 it means reset has finished.
+	 */
+	if (ret > 0)
+		return 0;
+	else if (ret < 0)
+		return -EINTR;
+	else
+		return -EBUSY;
+}
+
+/**
+ * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ **/
+enum iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw,
+					 struct iavf_dma_mem *mem,
+					 u64 size, u32 alignment)
+{
+	struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back;
+
+	if (!mem)
+		return IAVF_ERR_PARAM;
+
+	mem->size = ALIGN(size, alignment);
+	mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
+				     (dma_addr_t *)&mem->pa, GFP_KERNEL);
+	if (mem->va)
+		return 0;
+	else
+		return IAVF_ERR_NO_MEMORY;
+}
+
+/**
+ * iavf_free_dma_mem - wrapper for DMA memory freeing
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem)
+{
+	struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back;
+
+	if (!mem || !mem->va)
+		return IAVF_ERR_PARAM;
+	dma_free_coherent(&adapter->pdev->dev, mem->size,
+			  mem->va, (dma_addr_t)mem->pa);
+	return 0;
+}
+
+/**
+ * iavf_allocate_virt_mem - virt memory alloc wrapper
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ **/
+enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw,
+					struct iavf_virt_mem *mem, u32 size)
+{
+	if (!mem)
+		return IAVF_ERR_PARAM;
+
+	mem->size = size;
+	mem->va = kzalloc(size, GFP_KERNEL);
+
+	if (mem->va)
+		return 0;
+	else
+		return IAVF_ERR_NO_MEMORY;
+}
+
+/**
+ * iavf_free_virt_mem - virt memory free wrapper
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem)
+{
+	kfree(mem->va);
+}
+
+/**
+ * iavf_schedule_reset - Set the flags and schedule a reset event
+ * @adapter: board private structure
+ * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED
+ **/
+void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags)
+{
+	if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
+	    !(adapter->flags &
+	    (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
+		adapter->flags |= flags;
+		queue_work(adapter->wq, &adapter->reset_task);
+	}
+}
+
+/**
+ * iavf_schedule_aq_request - Set the flags and schedule aq request
+ * @adapter: board private structure
+ * @flags: requested aq flags
+ **/
+void iavf_schedule_aq_request(struct iavf_adapter *adapter, u64 flags)
+{
+	adapter->aq_required |= flags;
+	mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+}
+
+/**
+ * iavf_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: queue number that is timing out
+ **/
+static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	adapter->tx_timeout_count++;
+	iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+}
+
+/**
+ * iavf_misc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void iavf_misc_irq_disable(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	if (!adapter->msix_entries)
+		return;
+
+	wr32(hw, IAVF_VFINT_DYN_CTL01, 0);
+
+	iavf_flush(hw);
+
+	synchronize_irq(adapter->msix_entries[0].vector);
+}
+
+/**
+ * iavf_misc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static void iavf_misc_irq_enable(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	wr32(hw, IAVF_VFINT_DYN_CTL01, IAVF_VFINT_DYN_CTL01_INTENA_MASK |
+				       IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK);
+	wr32(hw, IAVF_VFINT_ICR0_ENA1, IAVF_VFINT_ICR0_ENA1_ADMINQ_MASK);
+
+	iavf_flush(hw);
+}
+
+/**
+ * iavf_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void iavf_irq_disable(struct iavf_adapter *adapter)
+{
+	int i;
+	struct iavf_hw *hw = &adapter->hw;
+
+	if (!adapter->msix_entries)
+		return;
+
+	for (i = 1; i < adapter->num_msix_vectors; i++) {
+		wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), 0);
+		synchronize_irq(adapter->msix_entries[i].vector);
+	}
+	iavf_flush(hw);
+}
+
+/**
+ * iavf_irq_enable_queues - Enable interrupt for all queues
+ * @adapter: board private structure
+ **/
+static void iavf_irq_enable_queues(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 1; i < adapter->num_msix_vectors; i++) {
+		wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1),
+		     IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+		     IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK);
+	}
+}
+
+/**
+ * iavf_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ * @flush: boolean value whether to run rd32()
+ **/
+void iavf_irq_enable(struct iavf_adapter *adapter, bool flush)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	iavf_misc_irq_enable(adapter);
+	iavf_irq_enable_queues(adapter);
+
+	if (flush)
+		iavf_flush(hw);
+}
+
+/**
+ * iavf_msix_aq - Interrupt handler for vector 0
+ * @irq: interrupt number
+ * @data: pointer to netdev
+ **/
+static irqreturn_t iavf_msix_aq(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_hw *hw = &adapter->hw;
+
+	/* handle non-queue interrupts, these reads clear the registers */
+	rd32(hw, IAVF_VFINT_ICR01);
+	rd32(hw, IAVF_VFINT_ICR0_ENA1);
+
+	if (adapter->state != __IAVF_REMOVE)
+		/* schedule work on the private workqueue */
+		queue_work(adapter->wq, &adapter->adminq_task);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * iavf_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ **/
+static irqreturn_t iavf_msix_clean_rings(int irq, void *data)
+{
+	struct iavf_q_vector *q_vector = data;
+
+	if (!q_vector->tx.ring && !q_vector->rx.ring)
+		return IRQ_HANDLED;
+
+	napi_schedule_irqoff(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * iavf_map_vector_to_rxq - associate irqs with rx queues
+ * @adapter: board private structure
+ * @v_idx: interrupt number
+ * @r_idx: queue number
+ **/
+static void
+iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx)
+{
+	struct iavf_q_vector *q_vector = &adapter->q_vectors[v_idx];
+	struct iavf_ring *rx_ring = &adapter->rx_rings[r_idx];
+	struct iavf_hw *hw = &adapter->hw;
+
+	rx_ring->q_vector = q_vector;
+	rx_ring->next = q_vector->rx.ring;
+	rx_ring->vsi = &adapter->vsi;
+	q_vector->rx.ring = rx_ring;
+	q_vector->rx.count++;
+	q_vector->rx.next_update = jiffies + 1;
+	q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
+	q_vector->ring_mask |= BIT(r_idx);
+	wr32(hw, IAVF_VFINT_ITRN1(IAVF_RX_ITR, q_vector->reg_idx),
+	     q_vector->rx.current_itr >> 1);
+	q_vector->rx.current_itr = q_vector->rx.target_itr;
+}
+
+/**
+ * iavf_map_vector_to_txq - associate irqs with tx queues
+ * @adapter: board private structure
+ * @v_idx: interrupt number
+ * @t_idx: queue number
+ **/
+static void
+iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx)
+{
+	struct iavf_q_vector *q_vector = &adapter->q_vectors[v_idx];
+	struct iavf_ring *tx_ring = &adapter->tx_rings[t_idx];
+	struct iavf_hw *hw = &adapter->hw;
+
+	tx_ring->q_vector = q_vector;
+	tx_ring->next = q_vector->tx.ring;
+	tx_ring->vsi = &adapter->vsi;
+	q_vector->tx.ring = tx_ring;
+	q_vector->tx.count++;
+	q_vector->tx.next_update = jiffies + 1;
+	q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
+	q_vector->num_ringpairs++;
+	wr32(hw, IAVF_VFINT_ITRN1(IAVF_TX_ITR, q_vector->reg_idx),
+	     q_vector->tx.target_itr >> 1);
+	q_vector->tx.current_itr = q_vector->tx.target_itr;
+}
+
+/**
+ * iavf_map_rings_to_vectors - Maps descriptor rings to vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function maps descriptor rings to the queue-specific vectors
+ * we were allotted through the MSI-X enabling code.  Ideally, we'd have
+ * one vector per ring/queue, but on a constrained vector budget, we
+ * group the rings as "efficiently" as possible.  You would add new
+ * mapping configurations in here.
+ **/
+static void iavf_map_rings_to_vectors(struct iavf_adapter *adapter)
+{
+	int rings_remaining = adapter->num_active_queues;
+	int ridx = 0, vidx = 0;
+	int q_vectors;
+
+	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (; ridx < rings_remaining; ridx++) {
+		iavf_map_vector_to_rxq(adapter, vidx, ridx);
+		iavf_map_vector_to_txq(adapter, vidx, ridx);
+
+		/* In the case where we have more queues than vectors, continue
+		 * round-robin on vectors until all queues are mapped.
+		 */
+		if (++vidx >= q_vectors)
+			vidx = 0;
+	}
+
+	adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS;
+}
+
+/**
+ * iavf_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ **/
+static void iavf_irq_affinity_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct iavf_q_vector *q_vector =
+		container_of(notify, struct iavf_q_vector, affinity_notify);
+
+	cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * iavf_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ **/
+static void iavf_irq_affinity_release(struct kref *ref) {}
+
+/**
+ * iavf_request_traffic_irqs - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ * @basename: device basename
+ *
+ * Allocates MSI-X vectors for tx and rx handling, and requests
+ * interrupts from the kernel.
+ **/
+static int
+iavf_request_traffic_irqs(struct iavf_adapter *adapter, char *basename)
+{
+	unsigned int vector, q_vectors;
+	unsigned int rx_int_idx = 0, tx_int_idx = 0;
+	int irq_num, err;
+	int cpu;
+
+	iavf_irq_disable(adapter);
+	/* Decrement for Other and TCP Timer vectors */
+	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct iavf_q_vector *q_vector = &adapter->q_vectors[vector];
+
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "iavf-%s-TxRx-%u", basename, rx_int_idx++);
+			tx_int_idx++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "iavf-%s-rx-%u", basename, rx_int_idx++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "iavf-%s-tx-%u", basename, tx_int_idx++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = request_irq(irq_num,
+				  iavf_msix_clean_rings,
+				  0,
+				  q_vector->name,
+				  q_vector);
+		if (err) {
+			dev_info(&adapter->pdev->dev,
+				 "Request_irq failed, error: %d\n", err);
+			goto free_queue_irqs;
+		}
+		/* register for affinity change notifications */
+		q_vector->affinity_notify.notify = iavf_irq_affinity_notify;
+		q_vector->affinity_notify.release =
+						   iavf_irq_affinity_release;
+		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
+		/* Spread the IRQ affinity hints across online CPUs. Note that
+		 * get_cpu_mask returns a mask with a permanent lifetime so
+		 * it's safe to use as a hint for irq_update_affinity_hint.
+		 */
+		cpu = cpumask_local_spread(q_vector->v_idx, -1);
+		irq_update_affinity_hint(irq_num, get_cpu_mask(cpu));
+	}
+
+	return 0;
+
+free_queue_irqs:
+	while (vector) {
+		vector--;
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_update_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &adapter->q_vectors[vector]);
+	}
+	return err;
+}
+
+/**
+ * iavf_request_misc_irq - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * Allocates MSI-X vector 0 and requests interrupts from the kernel. This
+ * vector is only for the admin queue, and stays active even when the netdev
+ * is closed.
+ **/
+static int iavf_request_misc_irq(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	snprintf(adapter->misc_vector_name,
+		 sizeof(adapter->misc_vector_name) - 1, "iavf-%s:mbx",
+		 dev_name(&adapter->pdev->dev));
+	err = request_irq(adapter->msix_entries[0].vector,
+			  &iavf_msix_aq, 0,
+			  adapter->misc_vector_name, netdev);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"request_irq for %s failed: %d\n",
+			adapter->misc_vector_name, err);
+		free_irq(adapter->msix_entries[0].vector, netdev);
+	}
+	return err;
+}
+
+/**
+ * iavf_free_traffic_irqs - Free MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * Frees all MSI-X vectors other than 0.
+ **/
+static void iavf_free_traffic_irqs(struct iavf_adapter *adapter)
+{
+	int vector, irq_num, q_vectors;
+
+	if (!adapter->msix_entries)
+		return;
+
+	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_update_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &adapter->q_vectors[vector]);
+	}
+}
+
+/**
+ * iavf_free_misc_irq - Free MSI-X miscellaneous vector
+ * @adapter: board private structure
+ *
+ * Frees MSI-X vector 0.
+ **/
+static void iavf_free_misc_irq(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	if (!adapter->msix_entries)
+		return;
+
+	free_irq(adapter->msix_entries[0].vector, netdev);
+}
+
+/**
+ * iavf_configure_tx - Configure Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void iavf_configure_tx(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < adapter->num_active_queues; i++)
+		adapter->tx_rings[i].tail = hw->hw_addr + IAVF_QTX_TAIL1(i);
+}
+
+/**
+ * iavf_configure_rx - Configure Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void iavf_configure_rx(struct iavf_adapter *adapter)
+{
+	unsigned int rx_buf_len = IAVF_RXBUFFER_2048;
+	struct iavf_hw *hw = &adapter->hw;
+	int i;
+
+	/* Legacy Rx will always default to a 2048 buffer size. */
+#if (PAGE_SIZE < 8192)
+	if (!(adapter->flags & IAVF_FLAG_LEGACY_RX)) {
+		struct net_device *netdev = adapter->netdev;
+
+		/* For jumbo frames on systems with 4K pages we have to use
+		 * an order 1 page, so we might as well increase the size
+		 * of our Rx buffer to make better use of the available space
+		 */
+		rx_buf_len = IAVF_RXBUFFER_3072;
+
+		/* We use a 1536 buffer size for configurations with
+		 * standard Ethernet mtu.  On x86 this gives us enough room
+		 * for shared info and 192 bytes of padding.
+		 */
+		if (!IAVF_2K_TOO_SMALL_WITH_PADDING &&
+		    (netdev->mtu <= ETH_DATA_LEN))
+			rx_buf_len = IAVF_RXBUFFER_1536 - NET_IP_ALIGN;
+	}
+#endif
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		adapter->rx_rings[i].tail = hw->hw_addr + IAVF_QRX_TAIL1(i);
+		adapter->rx_rings[i].rx_buf_len = rx_buf_len;
+
+		if (adapter->flags & IAVF_FLAG_LEGACY_RX)
+			clear_ring_build_skb_enabled(&adapter->rx_rings[i]);
+		else
+			set_ring_build_skb_enabled(&adapter->rx_rings[i]);
+	}
+}
+
+/**
+ * iavf_find_vlan - Search filter list for specific vlan filter
+ * @adapter: board private structure
+ * @vlan: vlan tag
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * mac_vlan_list_lock.
+ **/
+static struct
+iavf_vlan_filter *iavf_find_vlan(struct iavf_adapter *adapter,
+				 struct iavf_vlan vlan)
+{
+	struct iavf_vlan_filter *f;
+
+	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+		if (f->vlan.vid == vlan.vid &&
+		    f->vlan.tpid == vlan.tpid)
+			return f;
+	}
+
+	return NULL;
+}
+
+/**
+ * iavf_add_vlan - Add a vlan filter to the list
+ * @adapter: board private structure
+ * @vlan: VLAN tag
+ *
+ * Returns ptr to the filter object or NULL when no memory available.
+ **/
+static struct
+iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter,
+				struct iavf_vlan vlan)
+{
+	struct iavf_vlan_filter *f = NULL;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	f = iavf_find_vlan(adapter, vlan);
+	if (!f) {
+		f = kzalloc(sizeof(*f), GFP_ATOMIC);
+		if (!f)
+			goto clearout;
+
+		f->vlan = vlan;
+
+		list_add_tail(&f->list, &adapter->vlan_filter_list);
+		f->state = IAVF_VLAN_ADD;
+		adapter->num_vlan_filters++;
+		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_ADD_VLAN_FILTER);
+	}
+
+clearout:
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+	return f;
+}
+
+/**
+ * iavf_del_vlan - Remove a vlan filter from the list
+ * @adapter: board private structure
+ * @vlan: VLAN tag
+ **/
+static void iavf_del_vlan(struct iavf_adapter *adapter, struct iavf_vlan vlan)
+{
+	struct iavf_vlan_filter *f;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	f = iavf_find_vlan(adapter, vlan);
+	if (f) {
+		f->state = IAVF_VLAN_REMOVE;
+		iavf_schedule_aq_request(adapter, IAVF_FLAG_AQ_DEL_VLAN_FILTER);
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
+ * iavf_restore_filters
+ * @adapter: board private structure
+ *
+ * Restore existing non MAC filters when VF netdev comes back up
+ **/
+static void iavf_restore_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_vlan_filter *f;
+
+	/* re-add all VLAN filters */
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+		if (f->state == IAVF_VLAN_INACTIVE)
+			f->state = IAVF_VLAN_ADD;
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+}
+
+/**
+ * iavf_get_num_vlans_added - get number of VLANs added
+ * @adapter: board private structure
+ */
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
+{
+	return adapter->num_vlan_filters;
+}
+
+/**
+ * iavf_get_max_vlans_allowed - get maximum VLANs allowed for this VF
+ * @adapter: board private structure
+ *
+ * This depends on the negotiated VLAN capability. For VIRTCHNL_VF_OFFLOAD_VLAN,
+ * do not impose a limit as that maintains current behavior and for
+ * VIRTCHNL_VF_OFFLOAD_VLAN_V2, use the maximum allowed sent from the PF.
+ **/
+static u16 iavf_get_max_vlans_allowed(struct iavf_adapter *adapter)
+{
+	/* don't impose any limit for VIRTCHNL_VF_OFFLOAD_VLAN since there has
+	 * never been a limit on the VF driver side
+	 */
+	if (VLAN_ALLOWED(adapter))
+		return VLAN_N_VID;
+	else if (VLAN_V2_ALLOWED(adapter))
+		return adapter->vlan_v2_caps.filtering.max_filters;
+
+	return 0;
+}
+
+/**
+ * iavf_max_vlans_added - check if maximum VLANs allowed already exist
+ * @adapter: board private structure
+ **/
+static bool iavf_max_vlans_added(struct iavf_adapter *adapter)
+{
+	if (iavf_get_num_vlans_added(adapter) <
+	    iavf_get_max_vlans_allowed(adapter))
+		return false;
+
+	return true;
+}
+
+/**
+ * iavf_vlan_rx_add_vid - Add a VLAN filter to a device
+ * @netdev: network device struct
+ * @proto: unused protocol data
+ * @vid: VLAN tag
+ **/
+static int iavf_vlan_rx_add_vid(struct net_device *netdev,
+				__always_unused __be16 proto, u16 vid)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	/* Do not track VLAN 0 filter, always added by the PF on VF init */
+	if (!vid)
+		return 0;
+
+	if (!VLAN_FILTERING_ALLOWED(adapter))
+		return -EIO;
+
+	if (iavf_max_vlans_added(adapter)) {
+		netdev_err(netdev, "Max allowed VLAN filters %u. Remove existing VLANs or disable filtering via Ethtool if supported.\n",
+			   iavf_get_max_vlans_allowed(adapter));
+		return -EIO;
+	}
+
+	if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))))
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * iavf_vlan_rx_kill_vid - Remove a VLAN filter from a device
+ * @netdev: network device struct
+ * @proto: unused protocol data
+ * @vid: VLAN tag
+ **/
+static int iavf_vlan_rx_kill_vid(struct net_device *netdev,
+				 __always_unused __be16 proto, u16 vid)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	/* We do not track VLAN 0 filter */
+	if (!vid)
+		return 0;
+
+	iavf_del_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto)));
+	return 0;
+}
+
+/**
+ * iavf_find_filter - Search filter list for specific mac filter
+ * @adapter: board private structure
+ * @macaddr: the MAC address
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * mac_vlan_list_lock.
+ **/
+static struct
+iavf_mac_filter *iavf_find_filter(struct iavf_adapter *adapter,
+				  const u8 *macaddr)
+{
+	struct iavf_mac_filter *f;
+
+	if (!macaddr)
+		return NULL;
+
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		if (ether_addr_equal(macaddr, f->macaddr))
+			return f;
+	}
+	return NULL;
+}
+
+/**
+ * iavf_add_filter - Add a mac filter to the filter list
+ * @adapter: board private structure
+ * @macaddr: the MAC address
+ *
+ * Returns ptr to the filter object or NULL when no memory available.
+ **/
+struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+					const u8 *macaddr)
+{
+	struct iavf_mac_filter *f;
+
+	if (!macaddr)
+		return NULL;
+
+	f = iavf_find_filter(adapter, macaddr);
+	if (!f) {
+		f = kzalloc(sizeof(*f), GFP_ATOMIC);
+		if (!f)
+			return f;
+
+		ether_addr_copy(f->macaddr, macaddr);
+
+		list_add_tail(&f->list, &adapter->mac_filter_list);
+		f->add = true;
+		f->add_handled = false;
+		f->is_new_mac = true;
+		f->is_primary = ether_addr_equal(macaddr, adapter->hw.mac.addr);
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+	} else {
+		f->remove = false;
+	}
+
+	return f;
+}
+
+/**
+ * iavf_replace_primary_mac - Replace current primary address
+ * @adapter: board private structure
+ * @new_mac: new MAC address to be applied
+ *
+ * Replace current dev_addr and send request to PF for removal of previous
+ * primary MAC address filter and addition of new primary MAC filter.
+ * Return 0 for success, -ENOMEM for failure.
+ *
+ * Do not call this with mac_vlan_list_lock!
+ **/
+static int iavf_replace_primary_mac(struct iavf_adapter *adapter,
+				    const u8 *new_mac)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_mac_filter *new_f;
+	struct iavf_mac_filter *old_f;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	new_f = iavf_add_filter(adapter, new_mac);
+	if (!new_f) {
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		return -ENOMEM;
+	}
+
+	old_f = iavf_find_filter(adapter, hw->mac.addr);
+	if (old_f) {
+		old_f->is_primary = false;
+		old_f->remove = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER;
+	}
+	/* Always send the request to add if changing primary MAC,
+	 * even if filter is already present on the list
+	 */
+	new_f->is_primary = true;
+	new_f->add = true;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+	ether_addr_copy(hw->mac.addr, new_mac);
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	/* schedule the watchdog task to immediately process the request */
+	mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+	return 0;
+}
+
+/**
+ * iavf_is_mac_set_handled - wait for a response to set MAC from PF
+ * @netdev: network interface device structure
+ * @macaddr: MAC address to set
+ *
+ * Returns true on success, false on failure
+ */
+static bool iavf_is_mac_set_handled(struct net_device *netdev,
+				    const u8 *macaddr)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_mac_filter *f;
+	bool ret = false;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	f = iavf_find_filter(adapter, macaddr);
+
+	if (!f || (!f->add && f->add_handled))
+		ret = true;
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	return ret;
+}
+
+/**
+ * iavf_set_mac - NDO callback to set port MAC address
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int iavf_set_mac(struct net_device *netdev, void *p)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct sockaddr *addr = p;
+	int ret;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	ret = iavf_replace_primary_mac(adapter, addr->sa_data);
+
+	if (ret)
+		return ret;
+
+	ret = wait_event_interruptible_timeout(adapter->vc_waitqueue,
+					       iavf_is_mac_set_handled(netdev, addr->sa_data),
+					       msecs_to_jiffies(2500));
+
+	/* If ret < 0 then it means wait was interrupted.
+	 * If ret == 0 then it means we got a timeout.
+	 * else it means we got response for set MAC from PF,
+	 * check if netdev MAC was updated to requested MAC,
+	 * if yes then set MAC succeeded otherwise it failed return -EACCES
+	 */
+	if (ret < 0)
+		return ret;
+
+	if (!ret)
+		return -EAGAIN;
+
+	if (!ether_addr_equal(netdev->dev_addr, addr->sa_data))
+		return -EACCES;
+
+	return 0;
+}
+
+/**
+ * iavf_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int iavf_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (iavf_add_filter(adapter, addr))
+		return 0;
+	else
+		return -ENOMEM;
+}
+
+/**
+ * iavf_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int iavf_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_mac_filter *f;
+
+	/* Under some circumstances, we might receive a request to delete
+	 * our own device address from our uc list. Because we store the
+	 * device address in the VSI's MAC/VLAN filter list, we need to ignore
+	 * such requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
+	f = iavf_find_filter(adapter, addr);
+	if (f) {
+		f->remove = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER;
+	}
+	return 0;
+}
+
+/**
+ * iavf_promiscuous_mode_changed - check if promiscuous mode bits changed
+ * @adapter: device specific adapter
+ */
+bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter)
+{
+	return (adapter->current_netdev_promisc_flags ^ adapter->netdev->flags) &
+		(IFF_PROMISC | IFF_ALLMULTI);
+}
+
+/**
+ * iavf_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void iavf_set_rx_mode(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	__dev_uc_sync(netdev, iavf_addr_sync, iavf_addr_unsync);
+	__dev_mc_sync(netdev, iavf_addr_sync, iavf_addr_unsync);
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	spin_lock_bh(&adapter->current_netdev_promisc_flags_lock);
+	if (iavf_promiscuous_mode_changed(adapter))
+		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+	spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+}
+
+/**
+ * iavf_napi_enable_all - enable NAPI on all queue vectors
+ * @adapter: board private structure
+ **/
+static void iavf_napi_enable_all(struct iavf_adapter *adapter)
+{
+	int q_idx;
+	struct iavf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
+		struct napi_struct *napi;
+
+		q_vector = &adapter->q_vectors[q_idx];
+		napi = &q_vector->napi;
+		napi_enable(napi);
+	}
+}
+
+/**
+ * iavf_napi_disable_all - disable NAPI on all queue vectors
+ * @adapter: board private structure
+ **/
+static void iavf_napi_disable_all(struct iavf_adapter *adapter)
+{
+	int q_idx;
+	struct iavf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
+		q_vector = &adapter->q_vectors[q_idx];
+		napi_disable(&q_vector->napi);
+	}
+}
+
+/**
+ * iavf_configure - set up transmit and receive data structures
+ * @adapter: board private structure
+ **/
+static void iavf_configure(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	iavf_set_rx_mode(netdev);
+
+	iavf_configure_tx(adapter);
+	iavf_configure_rx(adapter);
+	adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_QUEUES;
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		struct iavf_ring *ring = &adapter->rx_rings[i];
+
+		iavf_alloc_rx_buffers(ring, IAVF_DESC_UNUSED(ring));
+	}
+}
+
+/**
+ * iavf_up_complete - Finish the last steps of bringing up a connection
+ * @adapter: board private structure
+ *
+ * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
+ **/
+static void iavf_up_complete(struct iavf_adapter *adapter)
+{
+	iavf_change_state(adapter, __IAVF_RUNNING);
+	clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+
+	iavf_napi_enable_all(adapter);
+
+	adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES;
+	if (CLIENT_ENABLED(adapter))
+		adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN;
+	mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+}
+
+/**
+ * iavf_clear_mac_vlan_filters - Remove mac and vlan filters not sent to PF
+ * yet and mark other to be removed.
+ * @adapter: board private structure
+ **/
+static void iavf_clear_mac_vlan_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_vlan_filter *vlf, *vlftmp;
+	struct iavf_mac_filter *f, *ftmp;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	/* clear the sync flag on all filters */
+	__dev_uc_unsync(adapter->netdev, NULL);
+	__dev_mc_unsync(adapter->netdev, NULL);
+
+	/* remove all MAC filters */
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list,
+				 list) {
+		if (f->add) {
+			list_del(&f->list);
+			kfree(f);
+		} else {
+			f->remove = true;
+		}
+	}
+
+	/* disable all VLAN filters */
+	list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+				 list)
+		vlf->state = IAVF_VLAN_DISABLE;
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
+ * iavf_clear_cloud_filters - Remove cloud filters not sent to PF yet and
+ * mark other to be removed.
+ * @adapter: board private structure
+ **/
+static void iavf_clear_cloud_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_cloud_filter *cf, *cftmp;
+
+	/* remove all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+				 list) {
+		if (cf->add) {
+			list_del(&cf->list);
+			kfree(cf);
+			adapter->num_cloud_filters--;
+		} else {
+			cf->del = true;
+		}
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
+/**
+ * iavf_clear_fdir_filters - Remove fdir filters not sent to PF yet and mark
+ * other to be removed.
+ * @adapter: board private structure
+ **/
+static void iavf_clear_fdir_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *fdir;
+
+	/* remove all Flow Director filters */
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) {
+			/* Cancel a request, keep filter as inactive */
+			fdir->state = IAVF_FDIR_FLTR_INACTIVE;
+		} else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING ||
+			 fdir->state == IAVF_FDIR_FLTR_ACTIVE) {
+			/* Disable filters which are active or have a pending
+			 * request to PF to be added
+			 */
+			fdir->state = IAVF_FDIR_FLTR_DIS_REQUEST;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+}
+
+/**
+ * iavf_clear_adv_rss_conf - Remove adv rss conf not sent to PF yet and mark
+ * other to be removed.
+ * @adapter: board private structure
+ **/
+static void iavf_clear_adv_rss_conf(struct iavf_adapter *adapter)
+{
+	struct iavf_adv_rss *rss, *rsstmp;
+
+	/* remove all advance RSS configuration */
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry_safe(rss, rsstmp, &adapter->adv_rss_list_head,
+				 list) {
+		if (rss->state == IAVF_ADV_RSS_ADD_REQUEST) {
+			list_del(&rss->list);
+			kfree(rss);
+		} else {
+			rss->state = IAVF_ADV_RSS_DEL_REQUEST;
+		}
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+}
+
+/**
+ * iavf_down - Shutdown the connection processing
+ * @adapter: board private structure
+ *
+ * Expects to be called while holding the __IAVF_IN_CRITICAL_TASK bit lock.
+ **/
+void iavf_down(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	if (adapter->state <= __IAVF_DOWN_PENDING)
+		return;
+
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+	adapter->link_up = false;
+	iavf_napi_disable_all(adapter);
+	iavf_irq_disable(adapter);
+
+	iavf_clear_mac_vlan_filters(adapter);
+	iavf_clear_cloud_filters(adapter);
+	iavf_clear_fdir_filters(adapter);
+	iavf_clear_adv_rss_conf(adapter);
+
+	if (!(adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) &&
+	    !(test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))) {
+		/* cancel any current operation */
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		/* Schedule operations to close down the HW. Don't wait
+		 * here for this to complete. The watchdog is still running
+		 * and it will take care of this.
+		 */
+		if (!list_empty(&adapter->mac_filter_list))
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER;
+		if (!list_empty(&adapter->vlan_filter_list))
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+		if (!list_empty(&adapter->cloud_filter_list))
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		if (!list_empty(&adapter->fdir_list_head))
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		if (!list_empty(&adapter->adv_rss_list_head))
+			adapter->aq_required |= IAVF_FLAG_AQ_DEL_ADV_RSS_CFG;
+	}
+
+	adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES;
+	mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+}
+
+/**
+ * iavf_acquire_msix_vectors - Setup the MSIX capability
+ * @adapter: board private structure
+ * @vectors: number of vectors to request
+ *
+ * Work with the OS to set up the MSIX vectors needed.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int
+iavf_acquire_msix_vectors(struct iavf_adapter *adapter, int vectors)
+{
+	int err, vector_threshold;
+
+	/* We'll want at least 3 (vector_threshold):
+	 * 0) Other (Admin Queue and link, mostly)
+	 * 1) TxQ[0] Cleanup
+	 * 2) RxQ[0] Cleanup
+	 */
+	vector_threshold = MIN_MSIX_COUNT;
+
+	/* The more we get, the more we will assign to Tx/Rx Cleanup
+	 * for the separate queues...where Rx Cleanup >= Tx Cleanup.
+	 * Right now, we simply care about how many we'll get; we'll
+	 * set them up later while requesting irq's.
+	 */
+	err = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+				    vector_threshold, vectors);
+	if (err < 0) {
+		dev_err(&adapter->pdev->dev, "Unable to allocate MSI-X interrupts\n");
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+		return err;
+	}
+
+	/* Adjust for only the vectors we'll use, which is minimum
+	 * of max_msix_q_vectors + NONQ_VECS, or the number of
+	 * vectors we were allocated.
+	 */
+	adapter->num_msix_vectors = err;
+	return 0;
+}
+
+/**
+ * iavf_free_queues - Free memory for all rings
+ * @adapter: board private structure to initialize
+ *
+ * Free all of the memory associated with queue pairs.
+ **/
+static void iavf_free_queues(struct iavf_adapter *adapter)
+{
+	if (!adapter->vsi_res)
+		return;
+	adapter->num_active_queues = 0;
+	kfree(adapter->tx_rings);
+	adapter->tx_rings = NULL;
+	kfree(adapter->rx_rings);
+	adapter->rx_rings = NULL;
+}
+
+/**
+ * iavf_set_queue_vlan_tag_loc - set location for VLAN tag offload
+ * @adapter: board private structure
+ *
+ * Based on negotiated capabilities, the VLAN tag needs to be inserted and/or
+ * stripped in certain descriptor fields. Instead of checking the offload
+ * capability bits in the hot path, cache the location the ring specific
+ * flags.
+ */
+void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		struct iavf_ring *tx_ring = &adapter->tx_rings[i];
+		struct iavf_ring *rx_ring = &adapter->rx_rings[i];
+
+		/* prevent multiple L2TAG bits being set after VFR */
+		tx_ring->flags &=
+			~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 |
+			  IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2);
+		rx_ring->flags &=
+			~(IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 |
+			  IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2);
+
+		if (VLAN_ALLOWED(adapter)) {
+			tx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+			rx_ring->flags |= IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+		} else if (VLAN_V2_ALLOWED(adapter)) {
+			struct virtchnl_vlan_supported_caps *stripping_support;
+			struct virtchnl_vlan_supported_caps *insertion_support;
+
+			stripping_support =
+				&adapter->vlan_v2_caps.offloads.stripping_support;
+			insertion_support =
+				&adapter->vlan_v2_caps.offloads.insertion_support;
+
+			if (stripping_support->outer) {
+				if (stripping_support->outer &
+				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+					rx_ring->flags |=
+						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+				else if (stripping_support->outer &
+					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2)
+					rx_ring->flags |=
+						IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2;
+			} else if (stripping_support->inner) {
+				if (stripping_support->inner &
+				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+					rx_ring->flags |=
+						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+				else if (stripping_support->inner &
+					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2)
+					rx_ring->flags |=
+						IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2;
+			}
+
+			if (insertion_support->outer) {
+				if (insertion_support->outer &
+				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+					tx_ring->flags |=
+						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+				else if (insertion_support->outer &
+					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+					tx_ring->flags |=
+						IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2;
+			} else if (insertion_support->inner) {
+				if (insertion_support->inner &
+				    VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1)
+					tx_ring->flags |=
+						IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1;
+				else if (insertion_support->inner &
+					 VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2)
+					tx_ring->flags |=
+						IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2;
+			}
+		}
+	}
+}
+
+/**
+ * iavf_alloc_queues - Allocate memory for all rings
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one ring per queue at run-time since we don't know the
+ * number of queues at compile-time.  The polling_netdev array is
+ * intended for Multiqueue, but should work fine with a single queue.
+ **/
+static int iavf_alloc_queues(struct iavf_adapter *adapter)
+{
+	int i, num_active_queues;
+
+	/* If we're in reset reallocating queues we don't actually know yet for
+	 * certain the PF gave us the number of queues we asked for but we'll
+	 * assume it did.  Once basic reset is finished we'll confirm once we
+	 * start negotiating config with PF.
+	 */
+	if (adapter->num_req_queues)
+		num_active_queues = adapter->num_req_queues;
+	else if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+		 adapter->num_tc)
+		num_active_queues = adapter->ch_config.total_qps;
+	else
+		num_active_queues = min_t(int,
+					  adapter->vsi_res->num_queue_pairs,
+					  (int)(num_online_cpus()));
+
+
+	adapter->tx_rings = kcalloc(num_active_queues,
+				    sizeof(struct iavf_ring), GFP_KERNEL);
+	if (!adapter->tx_rings)
+		goto err_out;
+	adapter->rx_rings = kcalloc(num_active_queues,
+				    sizeof(struct iavf_ring), GFP_KERNEL);
+	if (!adapter->rx_rings)
+		goto err_out;
+
+	for (i = 0; i < num_active_queues; i++) {
+		struct iavf_ring *tx_ring;
+		struct iavf_ring *rx_ring;
+
+		tx_ring = &adapter->tx_rings[i];
+
+		tx_ring->queue_index = i;
+		tx_ring->netdev = adapter->netdev;
+		tx_ring->dev = &adapter->pdev->dev;
+		tx_ring->count = adapter->tx_desc_count;
+		tx_ring->itr_setting = IAVF_ITR_TX_DEF;
+		if (adapter->flags & IAVF_FLAG_WB_ON_ITR_CAPABLE)
+			tx_ring->flags |= IAVF_TXR_FLAGS_WB_ON_ITR;
+
+		rx_ring = &adapter->rx_rings[i];
+		rx_ring->queue_index = i;
+		rx_ring->netdev = adapter->netdev;
+		rx_ring->dev = &adapter->pdev->dev;
+		rx_ring->count = adapter->rx_desc_count;
+		rx_ring->itr_setting = IAVF_ITR_RX_DEF;
+	}
+
+	adapter->num_active_queues = num_active_queues;
+
+	iavf_set_queue_vlan_tag_loc(adapter);
+
+	return 0;
+
+err_out:
+	iavf_free_queues(adapter);
+	return -ENOMEM;
+}
+
+/**
+ * iavf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int iavf_set_interrupt_capability(struct iavf_adapter *adapter)
+{
+	int vector, v_budget;
+	int pairs = 0;
+	int err = 0;
+
+	if (!adapter->vsi_res) {
+		err = -EIO;
+		goto out;
+	}
+	pairs = adapter->num_active_queues;
+
+	/* It's easy to be greedy for MSI-X vectors, but it really doesn't do
+	 * us much good if we have more vectors than CPUs. However, we already
+	 * limit the total number of queues by the number of CPUs so we do not
+	 * need any further limiting here.
+	 */
+	v_budget = min_t(int, pairs + NONQ_VECS,
+			 (int)adapter->vf_res->max_vectors);
+
+	adapter->msix_entries = kcalloc(v_budget,
+					sizeof(struct msix_entry), GFP_KERNEL);
+	if (!adapter->msix_entries) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (vector = 0; vector < v_budget; vector++)
+		adapter->msix_entries[vector].entry = vector;
+
+	err = iavf_acquire_msix_vectors(adapter, v_budget);
+	if (!err)
+		iavf_schedule_finish_config(adapter);
+
+out:
+	return err;
+}
+
+/**
+ * iavf_config_rss_aq - Configure RSS keys and lut by using AQ commands
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_config_rss_aq(struct iavf_adapter *adapter)
+{
+	struct iavf_aqc_get_set_rss_key_data *rss_key =
+		(struct iavf_aqc_get_set_rss_key_data *)adapter->rss_key;
+	struct iavf_hw *hw = &adapter->hw;
+	enum iavf_status status;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure RSS, command %d pending\n",
+			adapter->current_op);
+		return -EBUSY;
+	}
+
+	status = iavf_aq_set_rss_key(hw, adapter->vsi.id, rss_key);
+	if (status) {
+		dev_err(&adapter->pdev->dev, "Cannot set RSS key, err %s aq_err %s\n",
+			iavf_stat_str(hw, status),
+			iavf_aq_str(hw, hw->aq.asq_last_status));
+		return iavf_status_to_errno(status);
+
+	}
+
+	status = iavf_aq_set_rss_lut(hw, adapter->vsi.id, false,
+				     adapter->rss_lut, adapter->rss_lut_size);
+	if (status) {
+		dev_err(&adapter->pdev->dev, "Cannot set RSS lut, err %s aq_err %s\n",
+			iavf_stat_str(hw, status),
+			iavf_aq_str(hw, hw->aq.asq_last_status));
+		return iavf_status_to_errno(status);
+	}
+
+	return 0;
+
+}
+
+/**
+ * iavf_config_rss_reg - Configure RSS keys and lut by writing registers
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_config_rss_reg(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	u32 *dw;
+	u16 i;
+
+	dw = (u32 *)adapter->rss_key;
+	for (i = 0; i <= adapter->rss_key_size / 4; i++)
+		wr32(hw, IAVF_VFQF_HKEY(i), dw[i]);
+
+	dw = (u32 *)adapter->rss_lut;
+	for (i = 0; i <= adapter->rss_lut_size / 4; i++)
+		wr32(hw, IAVF_VFQF_HLUT(i), dw[i]);
+
+	iavf_flush(hw);
+
+	return 0;
+}
+
+/**
+ * iavf_config_rss - Configure RSS keys and lut
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int iavf_config_rss(struct iavf_adapter *adapter)
+{
+
+	if (RSS_PF(adapter)) {
+		adapter->aq_required |= IAVF_FLAG_AQ_SET_RSS_LUT |
+					IAVF_FLAG_AQ_SET_RSS_KEY;
+		return 0;
+	} else if (RSS_AQ(adapter)) {
+		return iavf_config_rss_aq(adapter);
+	} else {
+		return iavf_config_rss_reg(adapter);
+	}
+}
+
+/**
+ * iavf_fill_rss_lut - Fill the lut with default values
+ * @adapter: board private structure
+ **/
+static void iavf_fill_rss_lut(struct iavf_adapter *adapter)
+{
+	u16 i;
+
+	for (i = 0; i < adapter->rss_lut_size; i++)
+		adapter->rss_lut[i] = i % adapter->num_active_queues;
+}
+
+/**
+ * iavf_init_rss - Prepare for RSS
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_init_rss(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+
+	if (!RSS_PF(adapter)) {
+		/* Enable PCTYPES for RSS, TCP/UDP with IPv4/IPv6 */
+		if (adapter->vf_res->vf_cap_flags &
+		    VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+			adapter->hena = IAVF_DEFAULT_RSS_HENA_EXPANDED;
+		else
+			adapter->hena = IAVF_DEFAULT_RSS_HENA;
+
+		wr32(hw, IAVF_VFQF_HENA(0), (u32)adapter->hena);
+		wr32(hw, IAVF_VFQF_HENA(1), (u32)(adapter->hena >> 32));
+	}
+
+	iavf_fill_rss_lut(adapter);
+	netdev_rss_key_fill((void *)adapter->rss_key, adapter->rss_key_size);
+
+	return iavf_config_rss(adapter);
+}
+
+/**
+ * iavf_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
+{
+	int q_idx = 0, num_q_vectors;
+	struct iavf_q_vector *q_vector;
+
+	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+	adapter->q_vectors = kcalloc(num_q_vectors, sizeof(*q_vector),
+				     GFP_KERNEL);
+	if (!adapter->q_vectors)
+		return -ENOMEM;
+
+	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+		q_vector = &adapter->q_vectors[q_idx];
+		q_vector->adapter = adapter;
+		q_vector->vsi = &adapter->vsi;
+		q_vector->v_idx = q_idx;
+		q_vector->reg_idx = q_idx;
+		cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
+		netif_napi_add(adapter->netdev, &q_vector->napi,
+			       iavf_napi_poll);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+{
+	int q_idx, num_q_vectors;
+
+	if (!adapter->q_vectors)
+		return;
+
+	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+		struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
+
+		netif_napi_del(&q_vector->napi);
+	}
+	kfree(adapter->q_vectors);
+	adapter->q_vectors = NULL;
+}
+
+/**
+ * iavf_reset_interrupt_capability - Reset MSIX setup
+ * @adapter: board private structure
+ *
+ **/
+static void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
+{
+	if (!adapter->msix_entries)
+		return;
+
+	pci_disable_msix(adapter->pdev);
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+}
+
+/**
+ * iavf_init_interrupt_scheme - Determine if MSIX is supported and init
+ * @adapter: board private structure to initialize
+ *
+ **/
+static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
+{
+	int err;
+
+	err = iavf_alloc_queues(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to allocate memory for queues\n");
+		goto err_alloc_queues;
+	}
+
+	err = iavf_set_interrupt_capability(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to setup interrupt capabilities\n");
+		goto err_set_interrupt;
+	}
+
+	err = iavf_alloc_q_vectors(adapter);
+	if (err) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to allocate memory for queue vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	/* If we've made it so far while ADq flag being ON, then we haven't
+	 * bailed out anywhere in middle. And ADq isn't just enabled but actual
+	 * resources have been allocated in the reset path.
+	 * Now we can truly claim that ADq is enabled.
+	 */
+	if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc)
+		dev_info(&adapter->pdev->dev, "ADq Enabled, %u TCs created",
+			 adapter->num_tc);
+
+	dev_info(&adapter->pdev->dev, "Multiqueue %s: Queue pair count = %u",
+		 (adapter->num_active_queues > 1) ? "Enabled" : "Disabled",
+		 adapter->num_active_queues);
+
+	return 0;
+err_alloc_q_vectors:
+	iavf_reset_interrupt_capability(adapter);
+err_set_interrupt:
+	iavf_free_queues(adapter);
+err_alloc_queues:
+	return err;
+}
+
+/**
+ * iavf_free_rss - Free memory used by RSS structs
+ * @adapter: board private structure
+ **/
+static void iavf_free_rss(struct iavf_adapter *adapter)
+{
+	kfree(adapter->rss_key);
+	adapter->rss_key = NULL;
+
+	kfree(adapter->rss_lut);
+	adapter->rss_lut = NULL;
+}
+
+/**
+ * iavf_reinit_interrupt_scheme - Reallocate queues and vectors
+ * @adapter: board private structure
+ * @running: true if adapter->state == __IAVF_RUNNING
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	if (running)
+		iavf_free_traffic_irqs(adapter);
+	iavf_free_misc_irq(adapter);
+	iavf_reset_interrupt_capability(adapter);
+	iavf_free_q_vectors(adapter);
+	iavf_free_queues(adapter);
+
+	err =  iavf_init_interrupt_scheme(adapter);
+	if (err)
+		goto err;
+
+	netif_tx_stop_all_queues(netdev);
+
+	err = iavf_request_misc_irq(adapter);
+	if (err)
+		goto err;
+
+	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+
+	iavf_map_rings_to_vectors(adapter);
+err:
+	return err;
+}
+
+/**
+ * iavf_finish_config - do all netdev work that needs RTNL
+ * @work: our work_struct
+ *
+ * Do work that needs both RTNL and crit_lock.
+ **/
+static void iavf_finish_config(struct work_struct *work)
+{
+	struct iavf_adapter *adapter;
+	int pairs, err;
+
+	adapter = container_of(work, struct iavf_adapter, finish_config);
+
+	/* Always take RTNL first to prevent circular lock dependency */
+	rtnl_lock();
+	mutex_lock(&adapter->crit_lock);
+
+	if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
+	    adapter->netdev_registered &&
+	    !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
+		netdev_update_features(adapter->netdev);
+		adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+	}
+
+	switch (adapter->state) {
+	case __IAVF_DOWN:
+		if (!adapter->netdev_registered) {
+			err = register_netdevice(adapter->netdev);
+			if (err) {
+				dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
+					err);
+
+				/* go back and try again.*/
+				iavf_free_rss(adapter);
+				iavf_free_misc_irq(adapter);
+				iavf_reset_interrupt_capability(adapter);
+				iavf_change_state(adapter,
+						  __IAVF_INIT_CONFIG_ADAPTER);
+				goto out;
+			}
+			adapter->netdev_registered = true;
+		}
+
+		/* Set the real number of queues when reset occurs while
+		 * state == __IAVF_DOWN
+		 */
+		fallthrough;
+	case __IAVF_RUNNING:
+		pairs = adapter->num_active_queues;
+		netif_set_real_num_rx_queues(adapter->netdev, pairs);
+		netif_set_real_num_tx_queues(adapter->netdev, pairs);
+		break;
+
+	default:
+		break;
+	}
+
+out:
+	mutex_unlock(&adapter->crit_lock);
+	rtnl_unlock();
+}
+
+/**
+ * iavf_schedule_finish_config - Set the flags and schedule a reset event
+ * @adapter: board private structure
+ **/
+void iavf_schedule_finish_config(struct iavf_adapter *adapter)
+{
+	if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+		queue_work(adapter->wq, &adapter->finish_config);
+}
+
+/**
+ * iavf_process_aq_command - process aq_required flags
+ * and sends aq command
+ * @adapter: pointer to iavf adapter structure
+ *
+ * Returns 0 on success
+ * Returns error code if no command was sent
+ * or error code if the command failed.
+ **/
+static int iavf_process_aq_command(struct iavf_adapter *adapter)
+{
+	if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG)
+		return iavf_send_vf_config_msg(adapter);
+	if (adapter->aq_required & IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS)
+		return iavf_send_vf_offload_vlan_v2_msg(adapter);
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) {
+		iavf_disable_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) {
+		iavf_map_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_MAC_FILTER) {
+		iavf_add_ether_addrs(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_VLAN_FILTER) {
+		iavf_add_vlans(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_MAC_FILTER) {
+		iavf_del_ether_addrs(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_VLAN_FILTER) {
+		iavf_del_vlans(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) {
+		iavf_enable_vlan_stripping(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) {
+		iavf_disable_vlan_stripping(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) {
+		iavf_configure_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) {
+		iavf_enable_queues(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_RSS) {
+		/* This message goes straight to the firmware, not the
+		 * PF, so we don't have to set current_op as we will
+		 * not get a response through the ARQ.
+		 */
+		adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_RSS;
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_GET_HENA) {
+		iavf_get_hena(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_SET_HENA) {
+		iavf_set_hena(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_KEY) {
+		iavf_set_rss_key(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_LUT) {
+		iavf_set_rss_lut(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE) {
+		iavf_set_promiscuous(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CHANNELS) {
+		iavf_enable_channels(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CHANNELS) {
+		iavf_disable_channels(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		iavf_add_cloud_filter(adapter);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		iavf_del_cloud_filter(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) {
+		iavf_del_cloud_filter(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) {
+		iavf_add_cloud_filter(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_FDIR_FILTER) {
+		iavf_add_fdir_filter(adapter);
+		return IAVF_SUCCESS;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_FDIR_FILTER) {
+		iavf_del_fdir_filter(adapter);
+		return IAVF_SUCCESS;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ADD_ADV_RSS_CFG) {
+		iavf_add_adv_rss_cfg(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DEL_ADV_RSS_CFG) {
+		iavf_del_adv_rss_cfg(adapter);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_STRIPPING) {
+		iavf_disable_vlan_stripping_v2(adapter, ETH_P_8021Q);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_STAG_VLAN_STRIPPING) {
+		iavf_disable_vlan_stripping_v2(adapter, ETH_P_8021AD);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_STRIPPING) {
+		iavf_enable_vlan_stripping_v2(adapter, ETH_P_8021Q);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_STAG_VLAN_STRIPPING) {
+		iavf_enable_vlan_stripping_v2(adapter, ETH_P_8021AD);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_INSERTION) {
+		iavf_disable_vlan_insertion_v2(adapter, ETH_P_8021Q);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_STAG_VLAN_INSERTION) {
+		iavf_disable_vlan_insertion_v2(adapter, ETH_P_8021AD);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_INSERTION) {
+		iavf_enable_vlan_insertion_v2(adapter, ETH_P_8021Q);
+		return 0;
+	}
+	if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_STAG_VLAN_INSERTION) {
+		iavf_enable_vlan_insertion_v2(adapter, ETH_P_8021AD);
+		return 0;
+	}
+
+	if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) {
+		iavf_request_stats(adapter);
+		return 0;
+	}
+
+	return -EAGAIN;
+}
+
+/**
+ * iavf_set_vlan_offload_features - set VLAN offload configuration
+ * @adapter: board private structure
+ * @prev_features: previous features used for comparison
+ * @features: updated features used for configuration
+ *
+ * Set the aq_required bit(s) based on the requested features passed in to
+ * configure VLAN stripping and/or VLAN insertion if supported. Also, schedule
+ * the watchdog if any changes are requested to expedite the request via
+ * virtchnl.
+ **/
+static void
+iavf_set_vlan_offload_features(struct iavf_adapter *adapter,
+			       netdev_features_t prev_features,
+			       netdev_features_t features)
+{
+	bool enable_stripping = true, enable_insertion = true;
+	u16 vlan_ethertype = 0;
+	u64 aq_required = 0;
+
+	/* keep cases separate because one ethertype for offloads can be
+	 * disabled at the same time as another is disabled, so check for an
+	 * enabled ethertype first, then check for disabled. Default to
+	 * ETH_P_8021Q so an ethertype is specified if disabling insertion and
+	 * stripping.
+	 */
+	if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))
+		vlan_ethertype = ETH_P_8021AD;
+	else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX))
+		vlan_ethertype = ETH_P_8021Q;
+	else if (prev_features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))
+		vlan_ethertype = ETH_P_8021AD;
+	else if (prev_features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX))
+		vlan_ethertype = ETH_P_8021Q;
+	else
+		vlan_ethertype = ETH_P_8021Q;
+
+	if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX)))
+		enable_stripping = false;
+	if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX)))
+		enable_insertion = false;
+
+	if (VLAN_ALLOWED(adapter)) {
+		/* VIRTCHNL_VF_OFFLOAD_VLAN only has support for toggling VLAN
+		 * stripping via virtchnl. VLAN insertion can be toggled on the
+		 * netdev, but it doesn't require a virtchnl message
+		 */
+		if (enable_stripping)
+			aq_required |= IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
+		else
+			aq_required |= IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+
+	} else if (VLAN_V2_ALLOWED(adapter)) {
+		switch (vlan_ethertype) {
+		case ETH_P_8021Q:
+			if (enable_stripping)
+				aq_required |= IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_STRIPPING;
+			else
+				aq_required |= IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_STRIPPING;
+
+			if (enable_insertion)
+				aq_required |= IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_INSERTION;
+			else
+				aq_required |= IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_INSERTION;
+			break;
+		case ETH_P_8021AD:
+			if (enable_stripping)
+				aq_required |= IAVF_FLAG_AQ_ENABLE_STAG_VLAN_STRIPPING;
+			else
+				aq_required |= IAVF_FLAG_AQ_DISABLE_STAG_VLAN_STRIPPING;
+
+			if (enable_insertion)
+				aq_required |= IAVF_FLAG_AQ_ENABLE_STAG_VLAN_INSERTION;
+			else
+				aq_required |= IAVF_FLAG_AQ_DISABLE_STAG_VLAN_INSERTION;
+			break;
+		}
+	}
+
+	if (aq_required) {
+		adapter->aq_required |= aq_required;
+		mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+	}
+}
+
+/**
+ * iavf_startup - first step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_STARTUP driver state.
+ * When success the state is changed to __IAVF_INIT_VERSION_CHECK
+ * when fails the state is changed to __IAVF_INIT_FAILED
+ **/
+static void iavf_startup(struct iavf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	enum iavf_status status;
+	int ret;
+
+	WARN_ON(adapter->state != __IAVF_STARTUP);
+
+	/* driver loaded, probe complete */
+	adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+	status = iavf_set_mac_type(hw);
+	if (status) {
+		dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", status);
+		goto err;
+	}
+
+	ret = iavf_check_reset_complete(hw);
+	if (ret) {
+		dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n",
+			 ret);
+		goto err;
+	}
+	hw->aq.num_arq_entries = IAVF_AQ_LEN;
+	hw->aq.num_asq_entries = IAVF_AQ_LEN;
+	hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
+	hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE;
+
+	status = iavf_init_adminq(hw);
+	if (status) {
+		dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n",
+			status);
+		goto err;
+	}
+	ret = iavf_send_api_ver(adapter);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to send to PF (%d)\n", ret);
+		iavf_shutdown_adminq(hw);
+		goto err;
+	}
+	iavf_change_state(adapter, __IAVF_INIT_VERSION_CHECK);
+	return;
+err:
+	iavf_change_state(adapter, __IAVF_INIT_FAILED);
+}
+
+/**
+ * iavf_init_version_check - second step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_INIT_VERSION_CHECK driver state.
+ * When success the state is changed to __IAVF_INIT_GET_RESOURCES
+ * when fails the state is changed to __IAVF_INIT_FAILED
+ **/
+static void iavf_init_version_check(struct iavf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err = -EAGAIN;
+
+	WARN_ON(adapter->state != __IAVF_INIT_VERSION_CHECK);
+
+	if (!iavf_asq_done(hw)) {
+		dev_err(&pdev->dev, "Admin queue command never completed\n");
+		iavf_shutdown_adminq(hw);
+		iavf_change_state(adapter, __IAVF_STARTUP);
+		goto err;
+	}
+
+	/* aq msg sent, awaiting reply */
+	err = iavf_verify_api_ver(adapter);
+	if (err) {
+		if (err == -EALREADY)
+			err = iavf_send_api_ver(adapter);
+		else
+			dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n",
+				adapter->pf_version.major,
+				adapter->pf_version.minor,
+				VIRTCHNL_VERSION_MAJOR,
+				VIRTCHNL_VERSION_MINOR);
+		goto err;
+	}
+	err = iavf_send_vf_config_msg(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to send config request (%d)\n",
+			err);
+		goto err;
+	}
+	iavf_change_state(adapter, __IAVF_INIT_GET_RESOURCES);
+	return;
+err:
+	iavf_change_state(adapter, __IAVF_INIT_FAILED);
+}
+
+/**
+ * iavf_parse_vf_resource_msg - parse response from VIRTCHNL_OP_GET_VF_RESOURCES
+ * @adapter: board private structure
+ */
+int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
+{
+	int i, num_req_queues = adapter->num_req_queues;
+	struct iavf_vsi *vsi = &adapter->vsi;
+
+	for (i = 0; i < adapter->vf_res->num_vsis; i++) {
+		if (adapter->vf_res->vsi_res[i].vsi_type == VIRTCHNL_VSI_SRIOV)
+			adapter->vsi_res = &adapter->vf_res->vsi_res[i];
+	}
+	if (!adapter->vsi_res) {
+		dev_err(&adapter->pdev->dev, "No LAN VSI found\n");
+		return -ENODEV;
+	}
+
+	if (num_req_queues &&
+	    num_req_queues > adapter->vsi_res->num_queue_pairs) {
+		/* Problem.  The PF gave us fewer queues than what we had
+		 * negotiated in our request.  Need a reset to see if we can't
+		 * get back to a working state.
+		 */
+		dev_err(&adapter->pdev->dev,
+			"Requested %d queues, but PF only gave us %d.\n",
+			num_req_queues,
+			adapter->vsi_res->num_queue_pairs);
+		adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED;
+		adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
+		iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+
+		return -EAGAIN;
+	}
+	adapter->num_req_queues = 0;
+	adapter->vsi.id = adapter->vsi_res->vsi_id;
+
+	adapter->vsi.back = adapter;
+	adapter->vsi.base_vector = 1;
+	vsi->netdev = adapter->netdev;
+	vsi->qs_handle = adapter->vsi_res->qset_handle;
+	if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+		adapter->rss_key_size = adapter->vf_res->rss_key_size;
+		adapter->rss_lut_size = adapter->vf_res->rss_lut_size;
+	} else {
+		adapter->rss_key_size = IAVF_HKEY_ARRAY_SIZE;
+		adapter->rss_lut_size = IAVF_HLUT_ARRAY_SIZE;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_init_get_resources - third step of driver startup
+ * @adapter: board private structure
+ *
+ * Function process __IAVF_INIT_GET_RESOURCES driver state and
+ * finishes driver initialization procedure.
+ * When success the state is changed to __IAVF_DOWN
+ * when fails the state is changed to __IAVF_INIT_FAILED
+ **/
+static void iavf_init_get_resources(struct iavf_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct iavf_hw *hw = &adapter->hw;
+	int err;
+
+	WARN_ON(adapter->state != __IAVF_INIT_GET_RESOURCES);
+	/* aq msg sent, awaiting reply */
+	if (!adapter->vf_res) {
+		adapter->vf_res = kzalloc(IAVF_VIRTCHNL_VF_RESOURCE_SIZE,
+					  GFP_KERNEL);
+		if (!adapter->vf_res) {
+			err = -ENOMEM;
+			goto err;
+		}
+	}
+	err = iavf_get_vf_config(adapter);
+	if (err == -EALREADY) {
+		err = iavf_send_vf_config_msg(adapter);
+		goto err;
+	} else if (err == -EINVAL) {
+		/* We only get -EINVAL if the device is in a very bad
+		 * state or if we've been disabled for previous bad
+		 * behavior. Either way, we're done now.
+		 */
+		iavf_shutdown_adminq(hw);
+		dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n");
+		return;
+	}
+	if (err) {
+		dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err);
+		goto err_alloc;
+	}
+
+	err = iavf_parse_vf_resource_msg(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to parse VF resource message from PF (%d)\n",
+			err);
+		goto err_alloc;
+	}
+	/* Some features require additional messages to negotiate extended
+	 * capabilities. These are processed in sequence by the
+	 * __IAVF_INIT_EXTENDED_CAPS driver state.
+	 */
+	adapter->extended_caps = IAVF_EXTENDED_CAPS;
+
+	iavf_change_state(adapter, __IAVF_INIT_EXTENDED_CAPS);
+	return;
+
+err_alloc:
+	kfree(adapter->vf_res);
+	adapter->vf_res = NULL;
+err:
+	iavf_change_state(adapter, __IAVF_INIT_FAILED);
+}
+
+/**
+ * iavf_init_send_offload_vlan_v2_caps - part of initializing VLAN V2 caps
+ * @adapter: board private structure
+ *
+ * Function processes send of the extended VLAN V2 capability message to the
+ * PF. Must clear IAVF_EXTENDED_CAP_RECV_VLAN_V2 if the message is not sent,
+ * e.g. due to PF not negotiating VIRTCHNL_VF_OFFLOAD_VLAN_V2.
+ */
+static void iavf_init_send_offload_vlan_v2_caps(struct iavf_adapter *adapter)
+{
+	int ret;
+
+	WARN_ON(!(adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_VLAN_V2));
+
+	ret = iavf_send_vf_offload_vlan_v2_msg(adapter);
+	if (ret && ret == -EOPNOTSUPP) {
+		/* PF does not support VIRTCHNL_VF_OFFLOAD_V2. In this case,
+		 * we did not send the capability exchange message and do not
+		 * expect a response.
+		 */
+		adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_VLAN_V2;
+	}
+
+	/* We sent the message, so move on to the next step */
+	adapter->extended_caps &= ~IAVF_EXTENDED_CAP_SEND_VLAN_V2;
+}
+
+/**
+ * iavf_init_recv_offload_vlan_v2_caps - part of initializing VLAN V2 caps
+ * @adapter: board private structure
+ *
+ * Function processes receipt of the extended VLAN V2 capability message from
+ * the PF.
+ **/
+static void iavf_init_recv_offload_vlan_v2_caps(struct iavf_adapter *adapter)
+{
+	int ret;
+
+	WARN_ON(!(adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_VLAN_V2));
+
+	memset(&adapter->vlan_v2_caps, 0, sizeof(adapter->vlan_v2_caps));
+
+	ret = iavf_get_vf_vlan_v2_caps(adapter);
+	if (ret)
+		goto err;
+
+	/* We've processed receipt of the VLAN V2 caps message */
+	adapter->extended_caps &= ~IAVF_EXTENDED_CAP_RECV_VLAN_V2;
+	return;
+err:
+	/* We didn't receive a reply. Make sure we try sending again when
+	 * __IAVF_INIT_FAILED attempts to recover.
+	 */
+	adapter->extended_caps |= IAVF_EXTENDED_CAP_SEND_VLAN_V2;
+	iavf_change_state(adapter, __IAVF_INIT_FAILED);
+}
+
+/**
+ * iavf_init_process_extended_caps - Part of driver startup
+ * @adapter: board private structure
+ *
+ * Function processes __IAVF_INIT_EXTENDED_CAPS driver state. This state
+ * handles negotiating capabilities for features which require an additional
+ * message.
+ *
+ * Once all extended capabilities exchanges are finished, the driver will
+ * transition into __IAVF_INIT_CONFIG_ADAPTER.
+ */
+static void iavf_init_process_extended_caps(struct iavf_adapter *adapter)
+{
+	WARN_ON(adapter->state != __IAVF_INIT_EXTENDED_CAPS);
+
+	/* Process capability exchange for VLAN V2 */
+	if (adapter->extended_caps & IAVF_EXTENDED_CAP_SEND_VLAN_V2) {
+		iavf_init_send_offload_vlan_v2_caps(adapter);
+		return;
+	} else if (adapter->extended_caps & IAVF_EXTENDED_CAP_RECV_VLAN_V2) {
+		iavf_init_recv_offload_vlan_v2_caps(adapter);
+		return;
+	}
+
+	/* When we reach here, no further extended capabilities exchanges are
+	 * necessary, so we finally transition into __IAVF_INIT_CONFIG_ADAPTER
+	 */
+	iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER);
+}
+
+/**
+ * iavf_init_config_adapter - last part of driver startup
+ * @adapter: board private structure
+ *
+ * After all the supported capabilities are negotiated, then the
+ * __IAVF_INIT_CONFIG_ADAPTER state will finish driver initialization.
+ */
+static void iavf_init_config_adapter(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+
+	WARN_ON(adapter->state != __IAVF_INIT_CONFIG_ADAPTER);
+
+	if (iavf_process_config(adapter))
+		goto err;
+
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+
+	adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED;
+
+	netdev->netdev_ops = &iavf_netdev_ops;
+	iavf_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+
+	/* MTU range: 68 - 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD;
+
+	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
+		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
+			 adapter->hw.mac.addr);
+		eth_hw_addr_random(netdev);
+		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+	} else {
+		eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
+	}
+
+	adapter->tx_desc_count = IAVF_DEFAULT_TXD;
+	adapter->rx_desc_count = IAVF_DEFAULT_RXD;
+	err = iavf_init_interrupt_scheme(adapter);
+	if (err)
+		goto err_sw_init;
+	iavf_map_rings_to_vectors(adapter);
+	if (adapter->vf_res->vf_cap_flags &
+		VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+		adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE;
+
+	err = iavf_request_misc_irq(adapter);
+	if (err)
+		goto err_sw_init;
+
+	netif_carrier_off(netdev);
+	adapter->link_up = false;
+	netif_tx_stop_all_queues(netdev);
+
+	if (CLIENT_ALLOWED(adapter)) {
+		err = iavf_lan_add_device(adapter);
+		if (err)
+			dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n",
+				 err);
+	}
+	dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr);
+	if (netdev->features & NETIF_F_GRO)
+		dev_info(&pdev->dev, "GRO is enabled\n");
+
+	iavf_change_state(adapter, __IAVF_DOWN);
+	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+
+	iavf_misc_irq_enable(adapter);
+	wake_up(&adapter->down_waitqueue);
+
+	adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL);
+	adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL);
+	if (!adapter->rss_key || !adapter->rss_lut) {
+		err = -ENOMEM;
+		goto err_mem;
+	}
+	if (RSS_AQ(adapter))
+		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
+	else
+		iavf_init_rss(adapter);
+
+	if (VLAN_V2_ALLOWED(adapter))
+		/* request initial VLAN offload settings */
+		iavf_set_vlan_offload_features(adapter, 0, netdev->features);
+
+	iavf_schedule_finish_config(adapter);
+	return;
+
+err_mem:
+	iavf_free_rss(adapter);
+	iavf_free_misc_irq(adapter);
+err_sw_init:
+	iavf_reset_interrupt_capability(adapter);
+err:
+	iavf_change_state(adapter, __IAVF_INIT_FAILED);
+}
+
+/**
+ * iavf_watchdog_task - Periodic call-back task
+ * @work: pointer to work_struct
+ **/
+static void iavf_watchdog_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter = container_of(work,
+						    struct iavf_adapter,
+						    watchdog_task.work);
+	struct iavf_hw *hw = &adapter->hw;
+	u32 reg_val;
+
+	if (!mutex_trylock(&adapter->crit_lock)) {
+		if (adapter->state == __IAVF_REMOVE)
+			return;
+
+		goto restart_watchdog;
+	}
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+		iavf_change_state(adapter, __IAVF_COMM_FAILED);
+
+	switch (adapter->state) {
+	case __IAVF_STARTUP:
+		iavf_startup(adapter);
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(30));
+		return;
+	case __IAVF_INIT_VERSION_CHECK:
+		iavf_init_version_check(adapter);
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(30));
+		return;
+	case __IAVF_INIT_GET_RESOURCES:
+		iavf_init_get_resources(adapter);
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(1));
+		return;
+	case __IAVF_INIT_EXTENDED_CAPS:
+		iavf_init_process_extended_caps(adapter);
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(1));
+		return;
+	case __IAVF_INIT_CONFIG_ADAPTER:
+		iavf_init_config_adapter(adapter);
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(1));
+		return;
+	case __IAVF_INIT_FAILED:
+		if (test_bit(__IAVF_IN_REMOVE_TASK,
+			     &adapter->crit_section)) {
+			/* Do not update the state and do not reschedule
+			 * watchdog task, iavf_remove should handle this state
+			 * as it can loop forever
+			 */
+			mutex_unlock(&adapter->crit_lock);
+			return;
+		}
+		if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
+			dev_err(&adapter->pdev->dev,
+				"Failed to communicate with PF; waiting before retry\n");
+			adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
+			iavf_shutdown_adminq(hw);
+			mutex_unlock(&adapter->crit_lock);
+			queue_delayed_work(adapter->wq,
+					   &adapter->watchdog_task, (5 * HZ));
+			return;
+		}
+		/* Try again from failed step*/
+		iavf_change_state(adapter, adapter->last_state);
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
+		return;
+	case __IAVF_COMM_FAILED:
+		if (test_bit(__IAVF_IN_REMOVE_TASK,
+			     &adapter->crit_section)) {
+			/* Set state to __IAVF_INIT_FAILED and perform remove
+			 * steps. Remove IAVF_FLAG_PF_COMMS_FAILED so the task
+			 * doesn't bring the state back to __IAVF_COMM_FAILED.
+			 */
+			iavf_change_state(adapter, __IAVF_INIT_FAILED);
+			adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+			mutex_unlock(&adapter->crit_lock);
+			return;
+		}
+		reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+			  IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+		if (reg_val == VIRTCHNL_VFR_VFACTIVE ||
+		    reg_val == VIRTCHNL_VFR_COMPLETED) {
+			/* A chance for redemption! */
+			dev_err(&adapter->pdev->dev,
+				"Hardware came out of reset. Attempting reinit.\n");
+			/* When init task contacts the PF and
+			 * gets everything set up again, it'll restart the
+			 * watchdog for us. Down, boy. Sit. Stay. Woof.
+			 */
+			iavf_change_state(adapter, __IAVF_STARTUP);
+			adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+		}
+		adapter->aq_required = 0;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq,
+				   &adapter->watchdog_task,
+				   msecs_to_jiffies(10));
+		return;
+	case __IAVF_RESETTING:
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   HZ * 2);
+		return;
+	case __IAVF_DOWN:
+	case __IAVF_DOWN_PENDING:
+	case __IAVF_TESTING:
+	case __IAVF_RUNNING:
+		if (adapter->current_op) {
+			if (!iavf_asq_done(hw)) {
+				dev_dbg(&adapter->pdev->dev,
+					"Admin queue timeout\n");
+				iavf_send_api_ver(adapter);
+			}
+		} else {
+			int ret = iavf_process_aq_command(adapter);
+
+			/* An error will be returned if no commands were
+			 * processed; use this opportunity to update stats
+			 * if the error isn't -ENOTSUPP
+			 */
+			if (ret && ret != -EOPNOTSUPP &&
+			    adapter->state == __IAVF_RUNNING)
+				iavf_request_stats(adapter);
+		}
+		if (adapter->state == __IAVF_RUNNING)
+			iavf_detect_recover_hung(&adapter->vsi);
+		break;
+	case __IAVF_REMOVE:
+	default:
+		mutex_unlock(&adapter->crit_lock);
+		return;
+	}
+
+	/* check for hw reset */
+	reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+	if (!reg_val) {
+		adapter->aq_required = 0;
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+		dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+		iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+		mutex_unlock(&adapter->crit_lock);
+		queue_delayed_work(adapter->wq,
+				   &adapter->watchdog_task, HZ * 2);
+		return;
+	}
+
+	schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5));
+	mutex_unlock(&adapter->crit_lock);
+restart_watchdog:
+	if (adapter->state >= __IAVF_DOWN)
+		queue_work(adapter->wq, &adapter->adminq_task);
+	if (adapter->aq_required)
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   msecs_to_jiffies(20));
+	else
+		queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+				   HZ * 2);
+}
+
+/**
+ * iavf_disable_vf - disable VF
+ * @adapter: board private structure
+ *
+ * Set communication failed flag and free all resources.
+ * NOTE: This function is expected to be called with crit_lock being held.
+ **/
+static void iavf_disable_vf(struct iavf_adapter *adapter)
+{
+	struct iavf_mac_filter *f, *ftmp;
+	struct iavf_vlan_filter *fv, *fvtmp;
+	struct iavf_cloud_filter *cf, *cftmp;
+
+	adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
+
+	/* We don't use netif_running() because it may be true prior to
+	 * ndo_open() returning, so we can't assume it means all our open
+	 * tasks have finished, since we're not holding the rtnl_lock here.
+	 */
+	if (adapter->state == __IAVF_RUNNING) {
+		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+		netif_carrier_off(adapter->netdev);
+		netif_tx_disable(adapter->netdev);
+		adapter->link_up = false;
+		iavf_napi_disable_all(adapter);
+		iavf_irq_disable(adapter);
+		iavf_free_traffic_irqs(adapter);
+		iavf_free_all_tx_resources(adapter);
+		iavf_free_all_rx_resources(adapter);
+	}
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	/* Delete all of the filters */
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		list_del(&f->list);
+		kfree(f);
+	}
+
+	list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list, list) {
+		list_del(&fv->list);
+		kfree(fv);
+	}
+	adapter->num_vlan_filters = 0;
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	iavf_free_misc_irq(adapter);
+	iavf_reset_interrupt_capability(adapter);
+	iavf_free_q_vectors(adapter);
+	iavf_free_queues(adapter);
+	memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE);
+	iavf_shutdown_adminq(&adapter->hw);
+	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+	iavf_change_state(adapter, __IAVF_DOWN);
+	wake_up(&adapter->down_waitqueue);
+	dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
+}
+
+/**
+ * iavf_reset_task - Call-back task to handle hardware reset
+ * @work: pointer to work_struct
+ *
+ * During reset we need to shut down and reinitialize the admin queue
+ * before we can use it to communicate with the PF again. We also clear
+ * and reinit the rings because that context is lost as well.
+ **/
+static void iavf_reset_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter = container_of(work,
+						      struct iavf_adapter,
+						      reset_task);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	struct net_device *netdev = adapter->netdev;
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_mac_filter *f, *ftmp;
+	struct iavf_cloud_filter *cf;
+	enum iavf_status status;
+	u32 reg_val;
+	int i = 0, err;
+	bool running;
+
+	/* When device is being removed it doesn't make sense to run the reset
+	 * task, just return in such a case.
+	 */
+	if (!mutex_trylock(&adapter->crit_lock)) {
+		if (adapter->state != __IAVF_REMOVE)
+			queue_work(adapter->wq, &adapter->reset_task);
+
+		return;
+	}
+
+	while (!mutex_trylock(&adapter->client_lock))
+		usleep_range(500, 1000);
+	if (CLIENT_ENABLED(adapter)) {
+		adapter->flags &= ~(IAVF_FLAG_CLIENT_NEEDS_OPEN |
+				    IAVF_FLAG_CLIENT_NEEDS_CLOSE |
+				    IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS |
+				    IAVF_FLAG_SERVICE_CLIENT_REQUESTED);
+		cancel_delayed_work_sync(&adapter->client_task);
+		iavf_notify_client_close(&adapter->vsi, true);
+	}
+	iavf_misc_irq_disable(adapter);
+	if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
+		adapter->flags &= ~IAVF_FLAG_RESET_NEEDED;
+		/* Restart the AQ here. If we have been reset but didn't
+		 * detect it, or if the PF had to reinit, our AQ will be hosed.
+		 */
+		iavf_shutdown_adminq(hw);
+		iavf_init_adminq(hw);
+		iavf_request_reset(adapter);
+	}
+	adapter->flags |= IAVF_FLAG_RESET_PENDING;
+
+	/* poll until we see the reset actually happen */
+	for (i = 0; i < IAVF_RESET_WAIT_DETECTED_COUNT; i++) {
+		reg_val = rd32(hw, IAVF_VF_ARQLEN1) &
+			  IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+		if (!reg_val)
+			break;
+		usleep_range(5000, 10000);
+	}
+	if (i == IAVF_RESET_WAIT_DETECTED_COUNT) {
+		dev_info(&adapter->pdev->dev, "Never saw reset\n");
+		goto continue_reset; /* act like the reset happened */
+	}
+
+	/* wait until the reset is complete and the PF is responding to us */
+	for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
+		/* sleep first to make sure a minimum wait time is met */
+		msleep(IAVF_RESET_WAIT_MS);
+
+		reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+			  IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+		if (reg_val == VIRTCHNL_VFR_VFACTIVE)
+			break;
+	}
+
+	pci_set_master(adapter->pdev);
+	pci_restore_msi_state(adapter->pdev);
+
+	if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+		dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
+			reg_val);
+		iavf_disable_vf(adapter);
+		mutex_unlock(&adapter->client_lock);
+		mutex_unlock(&adapter->crit_lock);
+		return; /* Do not attempt to reinit. It's dead, Jim. */
+	}
+
+continue_reset:
+	/* We don't use netif_running() because it may be true prior to
+	 * ndo_open() returning, so we can't assume it means all our open
+	 * tasks have finished, since we're not holding the rtnl_lock here.
+	 */
+	running = adapter->state == __IAVF_RUNNING;
+
+	if (running) {
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+		adapter->link_up = false;
+		iavf_napi_disable_all(adapter);
+	}
+	iavf_irq_disable(adapter);
+
+	iavf_change_state(adapter, __IAVF_RESETTING);
+	adapter->flags &= ~IAVF_FLAG_RESET_PENDING;
+
+	/* free the Tx/Rx rings and descriptors, might be better to just
+	 * re-use them sometime in the future
+	 */
+	iavf_free_all_rx_resources(adapter);
+	iavf_free_all_tx_resources(adapter);
+
+	adapter->flags |= IAVF_FLAG_QUEUES_DISABLED;
+	/* kill and reinit the admin queue */
+	iavf_shutdown_adminq(hw);
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+	status = iavf_init_adminq(hw);
+	if (status) {
+		dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n",
+			 status);
+		goto reset_err;
+	}
+	adapter->aq_required = 0;
+
+	if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+	    (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
+		err = iavf_reinit_interrupt_scheme(adapter, running);
+		if (err)
+			goto reset_err;
+	}
+
+	if (RSS_AQ(adapter)) {
+		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS;
+	} else {
+		err = iavf_init_rss(adapter);
+		if (err)
+			goto reset_err;
+	}
+
+	adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG;
+	/* always set since VIRTCHNL_OP_GET_VF_RESOURCES has not been
+	 * sent/received yet, so VLAN_V2_ALLOWED() cannot is not reliable here,
+	 * however the VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS won't be sent until
+	 * VIRTCHNL_OP_GET_VF_RESOURCES and VIRTCHNL_VF_OFFLOAD_VLAN_V2 have
+	 * been successfully sent and negotiated
+	 */
+	adapter->aq_required |= IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS;
+	adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	/* Delete filter for the current MAC address, it could have
+	 * been changed by the PF via administratively set MAC.
+	 * Will be re-added via VIRTCHNL_OP_GET_VF_RESOURCES.
+	 */
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		if (ether_addr_equal(f->macaddr, adapter->hw.mac.addr)) {
+			list_del(&f->list);
+			kfree(f);
+		}
+	}
+	/* re-add all MAC filters */
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		f->add = true;
+	}
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	/* check if TCs are running and re-add all cloud filters */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	if ((vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+	    adapter->num_tc) {
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			cf->add = true;
+		}
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
+	iavf_misc_irq_enable(adapter);
+
+	mod_delayed_work(adapter->wq, &adapter->watchdog_task, 2);
+
+	/* We were running when the reset started, so we need to restore some
+	 * state here.
+	 */
+	if (running) {
+		/* allocate transmit descriptors */
+		err = iavf_setup_all_tx_resources(adapter);
+		if (err)
+			goto reset_err;
+
+		/* allocate receive descriptors */
+		err = iavf_setup_all_rx_resources(adapter);
+		if (err)
+			goto reset_err;
+
+		if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+		    (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
+			err = iavf_request_traffic_irqs(adapter, netdev->name);
+			if (err)
+				goto reset_err;
+
+			adapter->flags &= ~IAVF_FLAG_REINIT_MSIX_NEEDED;
+		}
+
+		iavf_configure(adapter);
+
+		/* iavf_up_complete() will switch device back
+		 * to __IAVF_RUNNING
+		 */
+		iavf_up_complete(adapter);
+
+		iavf_irq_enable(adapter, true);
+	} else {
+		iavf_change_state(adapter, __IAVF_DOWN);
+		wake_up(&adapter->down_waitqueue);
+	}
+
+	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+
+	wake_up(&adapter->reset_waitqueue);
+	mutex_unlock(&adapter->client_lock);
+	mutex_unlock(&adapter->crit_lock);
+
+	return;
+reset_err:
+	if (running) {
+		set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+		iavf_free_traffic_irqs(adapter);
+	}
+	iavf_disable_vf(adapter);
+
+	mutex_unlock(&adapter->client_lock);
+	mutex_unlock(&adapter->crit_lock);
+	dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
+}
+
+/**
+ * iavf_adminq_task - worker thread to clean the admin queue
+ * @work: pointer to work_struct containing our data
+ **/
+static void iavf_adminq_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter =
+		container_of(work, struct iavf_adapter, adminq_task);
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
+	enum virtchnl_ops v_op;
+	enum iavf_status ret, v_ret;
+	u32 val, oldval;
+	u16 pending;
+
+	if (!mutex_trylock(&adapter->crit_lock)) {
+		if (adapter->state == __IAVF_REMOVE)
+			return;
+
+		queue_work(adapter->wq, &adapter->adminq_task);
+		goto out;
+	}
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+		goto unlock;
+
+	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
+	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
+	if (!event.msg_buf)
+		goto unlock;
+
+	do {
+		ret = iavf_clean_arq_element(hw, &event, &pending);
+		v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high);
+		v_ret = (enum iavf_status)le32_to_cpu(event.desc.cookie_low);
+
+		if (ret || !v_op)
+			break; /* No event to process or error cleaning ARQ */
+
+		iavf_virtchnl_completion(adapter, v_op, v_ret, event.msg_buf,
+					 event.msg_len);
+		if (pending != 0)
+			memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
+	} while (pending);
+
+	if (iavf_is_reset_in_progress(adapter))
+		goto freedom;
+
+	/* check for error indications */
+	val = rd32(hw, hw->aq.arq.len);
+	if (val == 0xdeadbeef || val == 0xffffffff) /* device in reset */
+		goto freedom;
+	oldval = val;
+	if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ VF Error detected\n");
+		val &= ~IAVF_VF_ARQLEN1_ARQVFE_MASK;
+	}
+	if (val & IAVF_VF_ARQLEN1_ARQOVFL_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ Overflow Error detected\n");
+		val &= ~IAVF_VF_ARQLEN1_ARQOVFL_MASK;
+	}
+	if (val & IAVF_VF_ARQLEN1_ARQCRIT_MASK) {
+		dev_info(&adapter->pdev->dev, "ARQ Critical Error detected\n");
+		val &= ~IAVF_VF_ARQLEN1_ARQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(hw, hw->aq.arq.len, val);
+
+	val = rd32(hw, hw->aq.asq.len);
+	oldval = val;
+	if (val & IAVF_VF_ATQLEN1_ATQVFE_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ VF Error detected\n");
+		val &= ~IAVF_VF_ATQLEN1_ATQVFE_MASK;
+	}
+	if (val & IAVF_VF_ATQLEN1_ATQOVFL_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ Overflow Error detected\n");
+		val &= ~IAVF_VF_ATQLEN1_ATQOVFL_MASK;
+	}
+	if (val & IAVF_VF_ATQLEN1_ATQCRIT_MASK) {
+		dev_info(&adapter->pdev->dev, "ASQ Critical Error detected\n");
+		val &= ~IAVF_VF_ATQLEN1_ATQCRIT_MASK;
+	}
+	if (oldval != val)
+		wr32(hw, hw->aq.asq.len, val);
+
+freedom:
+	kfree(event.msg_buf);
+unlock:
+	mutex_unlock(&adapter->crit_lock);
+out:
+	/* re-enable Admin queue interrupt cause */
+	iavf_misc_irq_enable(adapter);
+}
+
+/**
+ * iavf_client_task - worker thread to perform client work
+ * @work: pointer to work_struct containing our data
+ *
+ * This task handles client interactions. Because client calls can be
+ * reentrant, we can't handle them in the watchdog.
+ **/
+static void iavf_client_task(struct work_struct *work)
+{
+	struct iavf_adapter *adapter =
+		container_of(work, struct iavf_adapter, client_task.work);
+
+	/* If we can't get the client bit, just give up. We'll be rescheduled
+	 * later.
+	 */
+
+	if (!mutex_trylock(&adapter->client_lock))
+		return;
+
+	if (adapter->flags & IAVF_FLAG_SERVICE_CLIENT_REQUESTED) {
+		iavf_client_subtask(adapter);
+		adapter->flags &= ~IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+		goto out;
+	}
+	if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS) {
+		iavf_notify_client_l2_params(&adapter->vsi);
+		adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_L2_PARAMS;
+		goto out;
+	}
+	if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_CLOSE) {
+		iavf_notify_client_close(&adapter->vsi, false);
+		adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_CLOSE;
+		goto out;
+	}
+	if (adapter->flags & IAVF_FLAG_CLIENT_NEEDS_OPEN) {
+		iavf_notify_client_open(&adapter->vsi);
+		adapter->flags &= ~IAVF_FLAG_CLIENT_NEEDS_OPEN;
+	}
+out:
+	mutex_unlock(&adapter->client_lock);
+}
+
+/**
+ * iavf_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+void iavf_free_all_tx_resources(struct iavf_adapter *adapter)
+{
+	int i;
+
+	if (!adapter->tx_rings)
+		return;
+
+	for (i = 0; i < adapter->num_active_queues; i++)
+		if (adapter->tx_rings[i].desc)
+			iavf_free_tx_resources(&adapter->tx_rings[i]);
+}
+
+/**
+ * iavf_setup_all_tx_resources - allocate all queues Tx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		adapter->tx_rings[i].count = adapter->tx_desc_count;
+		err = iavf_setup_tx_descriptors(&adapter->tx_rings[i]);
+		if (!err)
+			continue;
+		dev_err(&adapter->pdev->dev,
+			"Allocation for Tx Queue %u failed\n", i);
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * iavf_setup_all_rx_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_active_queues; i++) {
+		adapter->rx_rings[i].count = adapter->rx_desc_count;
+		err = iavf_setup_rx_descriptors(&adapter->rx_rings[i]);
+		if (!err)
+			continue;
+		dev_err(&adapter->pdev->dev,
+			"Allocation for Rx Queue %u failed\n", i);
+		break;
+	}
+	return err;
+}
+
+/**
+ * iavf_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+void iavf_free_all_rx_resources(struct iavf_adapter *adapter)
+{
+	int i;
+
+	if (!adapter->rx_rings)
+		return;
+
+	for (i = 0; i < adapter->num_active_queues; i++)
+		if (adapter->rx_rings[i].desc)
+			iavf_free_rx_resources(&adapter->rx_rings[i]);
+}
+
+/**
+ * iavf_validate_tx_bandwidth - validate the max Tx bandwidth
+ * @adapter: board private structure
+ * @max_tx_rate: max Tx bw for a tc
+ **/
+static int iavf_validate_tx_bandwidth(struct iavf_adapter *adapter,
+				      u64 max_tx_rate)
+{
+	int speed = 0, ret = 0;
+
+	if (ADV_LINK_SUPPORT(adapter)) {
+		if (adapter->link_speed_mbps < U32_MAX) {
+			speed = adapter->link_speed_mbps;
+			goto validate_bw;
+		} else {
+			dev_err(&adapter->pdev->dev, "Unknown link speed\n");
+			return -EINVAL;
+		}
+	}
+
+	switch (adapter->link_speed) {
+	case VIRTCHNL_LINK_SPEED_40GB:
+		speed = SPEED_40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		speed = SPEED_25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		speed = SPEED_20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		speed = SPEED_10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		speed = SPEED_5000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		speed = SPEED_2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		speed = SPEED_1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_100MB:
+		speed = SPEED_100;
+		break;
+	default:
+		break;
+	}
+
+validate_bw:
+	if (max_tx_rate > speed) {
+		dev_err(&adapter->pdev->dev,
+			"Invalid tx rate specified\n");
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_validate_ch_config - validate queue mapping info
+ * @adapter: board private structure
+ * @mqprio_qopt: queue parameters
+ *
+ * This function validates if the config provided by the user to
+ * configure queue channels is valid or not. Returns 0 on a valid
+ * config.
+ **/
+static int iavf_validate_ch_config(struct iavf_adapter *adapter,
+				   struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	u64 total_max_rate = 0;
+	u32 tx_rate_rem = 0;
+	int i, num_qps = 0;
+	u64 tx_rate = 0;
+	int ret = 0;
+
+	if (mqprio_qopt->qopt.num_tc > IAVF_MAX_TRAFFIC_CLASS ||
+	    mqprio_qopt->qopt.num_tc < 1)
+		return -EINVAL;
+
+	for (i = 0; i <= mqprio_qopt->qopt.num_tc - 1; i++) {
+		if (!mqprio_qopt->qopt.count[i] ||
+		    mqprio_qopt->qopt.offset[i] != num_qps)
+			return -EINVAL;
+		if (mqprio_qopt->min_rate[i]) {
+			dev_err(&adapter->pdev->dev,
+				"Invalid min tx rate (greater than 0) specified for TC%d\n",
+				i);
+			return -EINVAL;
+		}
+
+		/* convert to Mbps */
+		tx_rate = div_u64(mqprio_qopt->max_rate[i],
+				  IAVF_MBPS_DIVISOR);
+
+		if (mqprio_qopt->max_rate[i] &&
+		    tx_rate < IAVF_MBPS_QUANTA) {
+			dev_err(&adapter->pdev->dev,
+				"Invalid max tx rate for TC%d, minimum %dMbps\n",
+				i, IAVF_MBPS_QUANTA);
+			return -EINVAL;
+		}
+
+		(void)div_u64_rem(tx_rate, IAVF_MBPS_QUANTA, &tx_rate_rem);
+
+		if (tx_rate_rem != 0) {
+			dev_err(&adapter->pdev->dev,
+				"Invalid max tx rate for TC%d, not divisible by %d\n",
+				i, IAVF_MBPS_QUANTA);
+			return -EINVAL;
+		}
+
+		total_max_rate += tx_rate;
+		num_qps += mqprio_qopt->qopt.count[i];
+	}
+	if (num_qps > adapter->num_active_queues) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot support requested number of queues\n");
+		return -EINVAL;
+	}
+
+	ret = iavf_validate_tx_bandwidth(adapter, total_max_rate);
+	return ret;
+}
+
+/**
+ * iavf_del_all_cloud_filters - delete all cloud filters on the traffic classes
+ * @adapter: board private structure
+ **/
+static void iavf_del_all_cloud_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_cloud_filter *cf, *cftmp;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+				 list) {
+		list_del(&cf->list);
+		kfree(cf);
+		adapter->num_cloud_filters--;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+}
+
+/**
+ * __iavf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type_data: tc offload data
+ *
+ * This function processes the config information provided by the
+ * user to configure traffic classes/queue channels and packages the
+ * information to request the PF to setup traffic classes.
+ *
+ * Returns 0 on success.
+ **/
+static int __iavf_setup_tc(struct net_device *netdev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	u8 num_tc = 0, total_qps = 0;
+	int ret = 0, netdev_tc = 0;
+	u64 max_tx_rate;
+	u16 mode;
+	int i;
+
+	num_tc = mqprio_qopt->qopt.num_tc;
+	mode = mqprio_qopt->mode;
+
+	/* delete queue_channel */
+	if (!mqprio_qopt->qopt.hw) {
+		if (adapter->ch_config.state == __IAVF_TC_RUNNING) {
+			/* reset the tc configuration */
+			netdev_reset_tc(netdev);
+			adapter->num_tc = 0;
+			netif_tx_stop_all_queues(netdev);
+			netif_tx_disable(netdev);
+			iavf_del_all_cloud_filters(adapter);
+			adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS;
+			total_qps = adapter->orig_num_active_queues;
+			goto exit;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	/* add queue channel */
+	if (mode == TC_MQPRIO_MODE_CHANNEL) {
+		if (!(vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)) {
+			dev_err(&adapter->pdev->dev, "ADq not supported\n");
+			return -EOPNOTSUPP;
+		}
+		if (adapter->ch_config.state != __IAVF_TC_INVALID) {
+			dev_err(&adapter->pdev->dev, "TC configuration already exists\n");
+			return -EINVAL;
+		}
+
+		ret = iavf_validate_ch_config(adapter, mqprio_qopt);
+		if (ret)
+			return ret;
+		/* Return if same TC config is requested */
+		if (adapter->num_tc == num_tc)
+			return 0;
+		adapter->num_tc = num_tc;
+
+		for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+			if (i < num_tc) {
+				adapter->ch_config.ch_info[i].count =
+					mqprio_qopt->qopt.count[i];
+				adapter->ch_config.ch_info[i].offset =
+					mqprio_qopt->qopt.offset[i];
+				total_qps += mqprio_qopt->qopt.count[i];
+				max_tx_rate = mqprio_qopt->max_rate[i];
+				/* convert to Mbps */
+				max_tx_rate = div_u64(max_tx_rate,
+						      IAVF_MBPS_DIVISOR);
+				adapter->ch_config.ch_info[i].max_tx_rate =
+					max_tx_rate;
+			} else {
+				adapter->ch_config.ch_info[i].count = 1;
+				adapter->ch_config.ch_info[i].offset = 0;
+			}
+		}
+
+		/* Take snapshot of original config such as "num_active_queues"
+		 * It is used later when delete ADQ flow is exercised, so that
+		 * once delete ADQ flow completes, VF shall go back to its
+		 * original queue configuration
+		 */
+
+		adapter->orig_num_active_queues = adapter->num_active_queues;
+
+		/* Store queue info based on TC so that VF gets configured
+		 * with correct number of queues when VF completes ADQ config
+		 * flow
+		 */
+		adapter->ch_config.total_qps = total_qps;
+
+		netif_tx_stop_all_queues(netdev);
+		netif_tx_disable(netdev);
+		adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS;
+		netdev_reset_tc(netdev);
+		/* Report the tc mapping up the stack */
+		netdev_set_num_tc(adapter->netdev, num_tc);
+		for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+			u16 qcount = mqprio_qopt->qopt.count[i];
+			u16 qoffset = mqprio_qopt->qopt.offset[i];
+
+			if (i < num_tc)
+				netdev_set_tc_queue(netdev, netdev_tc++, qcount,
+						    qoffset);
+		}
+	}
+exit:
+	if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+		return 0;
+
+	netif_set_real_num_rx_queues(netdev, total_qps);
+	netif_set_real_num_tx_queues(netdev, total_qps);
+
+	return ret;
+}
+
+/**
+ * iavf_parse_cls_flower - Parse tc flower filters provided by kernel
+ * @adapter: board private structure
+ * @f: pointer to struct flow_cls_offload
+ * @filter: pointer to cloud filter structure
+ */
+static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
+				 struct flow_cls_offload *f,
+				 struct iavf_cloud_filter *filter)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
+	u16 n_proto_mask = 0;
+	u16 n_proto_key = 0;
+	u8 field_flags = 0;
+	u16 addr_type = 0;
+	u16 n_proto = 0;
+	int i = 0;
+	struct virtchnl_filter *vf = &filter->f;
+
+	if (dissector->used_keys &
+	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
+		dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%llx\n",
+			dissector->used_keys);
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+
+		flow_rule_match_enc_keyid(rule, &match);
+		if (match.mask->keyid != 0)
+			field_flags |= IAVF_CLOUD_FIELD_TEN_ID;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		}
+		n_proto = n_proto_key & n_proto_mask;
+		if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6)
+			return -EINVAL;
+		if (n_proto == ETH_P_IPV6) {
+			/* specify flow type as TCP IPv6 */
+			vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
+		}
+
+		if (match.key->ip_proto != IPPROTO_TCP) {
+			dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
+			return -EINVAL;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+
+		/* use is_broadcast and is_zero to check for all 0xf or 0 */
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (is_broadcast_ether_addr(match.mask->dst)) {
+				field_flags |= IAVF_CLOUD_FIELD_OMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
+					match.mask->dst);
+				return -EINVAL;
+			}
+		}
+
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (is_broadcast_ether_addr(match.mask->src)) {
+				field_flags |= IAVF_CLOUD_FIELD_IMAC;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
+					match.mask->src);
+				return -EINVAL;
+			}
+		}
+
+		if (!is_zero_ether_addr(match.key->dst))
+			if (is_valid_ether_addr(match.key->dst) ||
+			    is_multicast_ether_addr(match.key->dst)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.dst_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.dst_mac,
+						match.key->dst);
+			}
+
+		if (!is_zero_ether_addr(match.key->src))
+			if (is_valid_ether_addr(match.key->src) ||
+			    is_multicast_ether_addr(match.key->src)) {
+				/* set the mask if a valid dst_mac address */
+				for (i = 0; i < ETH_ALEN; i++)
+					vf->mask.tcp_spec.src_mac[i] |= 0xff;
+				ether_addr_copy(vf->data.tcp_spec.src_mac,
+						match.key->src);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
+				field_flags |= IAVF_CLOUD_FIELD_IVLAN;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
+					match.mask->vlan_id);
+				return -EINVAL;
+			}
+		}
+		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
+		vf->data.tcp_spec.vlan_id = cpu_to_be16(match.key->vlan_id);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
+					be32_to_cpu(match.mask->dst));
+				return -EINVAL;
+			}
+		}
+
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be32(0xffffffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
+					be32_to_cpu(match.mask->src));
+				return -EINVAL;
+			}
+		}
+
+		if (field_flags & IAVF_CLOUD_FIELD_TEN_ID) {
+			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
+			return -EINVAL;
+		}
+		if (match.key->dst) {
+			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.dst_ip[0] = match.key->dst;
+		}
+		if (match.key->src) {
+			vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
+			vf->data.tcp_spec.src_ip[0] = match.key->src;
+		}
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+
+		/* validate mask, make sure it is not IPV6_ADDR_ANY */
+		if (ipv6_addr_any(&match.mask->dst)) {
+			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
+				IPV6_ADDR_ANY);
+			return -EINVAL;
+		}
+
+		/* src and dest IPv6 address should not be LOOPBACK
+		 * (0:0:0:0:0:0:0:1) which can be represented as ::1
+		 */
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
+			dev_err(&adapter->pdev->dev,
+				"ipv6 addr should not be loopback\n");
+			return -EINVAL;
+		}
+		if (!ipv6_addr_any(&match.mask->dst) ||
+		    !ipv6_addr_any(&match.mask->src))
+			field_flags |= IAVF_CLOUD_FIELD_IIP;
+
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.dst_ip, &match.key->dst.s6_addr32,
+		       sizeof(vf->data.tcp_spec.dst_ip));
+		for (i = 0; i < 4; i++)
+			vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
+		memcpy(&vf->data.tcp_spec.src_ip, &match.key->src.s6_addr32,
+		       sizeof(vf->data.tcp_spec.src_ip));
+	}
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be16(0xffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
+					be16_to_cpu(match.mask->src));
+				return -EINVAL;
+			}
+		}
+
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be16(0xffff)) {
+				field_flags |= IAVF_CLOUD_FIELD_IIP;
+			} else {
+				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
+					be16_to_cpu(match.mask->dst));
+				return -EINVAL;
+			}
+		}
+		if (match.key->dst) {
+			vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.dst_port = match.key->dst;
+		}
+
+		if (match.key->src) {
+			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
+			vf->data.tcp_spec.src_port = match.key->src;
+		}
+	}
+	vf->field_flags = field_flags;
+
+	return 0;
+}
+
+/**
+ * iavf_handle_tclass - Forward to a traffic class on the device
+ * @adapter: board private structure
+ * @tc: traffic class index on the device
+ * @filter: pointer to cloud filter structure
+ */
+static int iavf_handle_tclass(struct iavf_adapter *adapter, u32 tc,
+			      struct iavf_cloud_filter *filter)
+{
+	if (tc == 0)
+		return 0;
+	if (tc < adapter->num_tc) {
+		if (!filter->f.data.tcp_spec.dst_port) {
+			dev_err(&adapter->pdev->dev,
+				"Specify destination port to redirect to traffic class other than TC0\n");
+			return -EINVAL;
+		}
+	}
+	/* redirect to a traffic class on the same device */
+	filter->f.action = VIRTCHNL_ACTION_TC_REDIRECT;
+	filter->f.action_meta = tc;
+	return 0;
+}
+
+/**
+ * iavf_find_cf - Find the cloud filter in the list
+ * @adapter: Board private structure
+ * @cookie: filter specific cookie
+ *
+ * Returns ptr to the filter object or NULL. Must be called while holding the
+ * cloud_filter_list_lock.
+ */
+static struct iavf_cloud_filter *iavf_find_cf(struct iavf_adapter *adapter,
+					      unsigned long *cookie)
+{
+	struct iavf_cloud_filter *filter = NULL;
+
+	if (!cookie)
+		return NULL;
+
+	list_for_each_entry(filter, &adapter->cloud_filter_list, list) {
+		if (!memcmp(cookie, &filter->cookie, sizeof(filter->cookie)))
+			return filter;
+	}
+	return NULL;
+}
+
+/**
+ * iavf_configure_clsflower - Add tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct flow_cls_offload
+ */
+static int iavf_configure_clsflower(struct iavf_adapter *adapter,
+				    struct flow_cls_offload *cls_flower)
+{
+	int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+	struct iavf_cloud_filter *filter = NULL;
+	int err = -EINVAL, count = 50;
+
+	if (tc < 0) {
+		dev_err(&adapter->pdev->dev, "Invalid traffic class\n");
+		return -EINVAL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	while (!mutex_trylock(&adapter->crit_lock)) {
+		if (--count == 0) {
+			kfree(filter);
+			return err;
+		}
+		udelay(1);
+	}
+
+	filter->cookie = cls_flower->cookie;
+
+	/* bail out here if filter already exists */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	if (iavf_find_cf(adapter, &cls_flower->cookie)) {
+		dev_err(&adapter->pdev->dev, "Failed to add TC Flower filter, it already exists\n");
+		err = -EEXIST;
+		goto spin_unlock;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	/* set the mask to all zeroes to begin with */
+	memset(&filter->f.mask.tcp_spec, 0, sizeof(struct virtchnl_l4_spec));
+	/* start out with flow type and eth type IPv4 to begin with */
+	filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW;
+	err = iavf_parse_cls_flower(adapter, cls_flower, filter);
+	if (err)
+		goto err;
+
+	err = iavf_handle_tclass(adapter, tc, filter);
+	if (err)
+		goto err;
+
+	/* add filter to the list */
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_add_tail(&filter->list, &adapter->cloud_filter_list);
+	adapter->num_cloud_filters++;
+	filter->add = true;
+	adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
+spin_unlock:
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+err:
+	if (err)
+		kfree(filter);
+
+	mutex_unlock(&adapter->crit_lock);
+	return err;
+}
+
+/**
+ * iavf_delete_clsflower - Remove tc flower filters
+ * @adapter: board private structure
+ * @cls_flower: Pointer to struct flow_cls_offload
+ */
+static int iavf_delete_clsflower(struct iavf_adapter *adapter,
+				 struct flow_cls_offload *cls_flower)
+{
+	struct iavf_cloud_filter *filter = NULL;
+	int err = 0;
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	filter = iavf_find_cf(adapter, &cls_flower->cookie);
+	if (filter) {
+		filter->del = true;
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+	} else {
+		err = -EINVAL;
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	return err;
+}
+
+/**
+ * iavf_setup_tc_cls_flower - flower classifier offloads
+ * @adapter: board private structure
+ * @cls_flower: pointer to flow_cls_offload struct with flow info
+ */
+static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter,
+				    struct flow_cls_offload *cls_flower)
+{
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return iavf_configure_clsflower(adapter, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return iavf_delete_clsflower(adapter, cls_flower);
+	case FLOW_CLS_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * iavf_setup_tc_block_cb - block callback for tc
+ * @type: type of offload
+ * @type_data: offload data
+ * @cb_priv:
+ *
+ * This function is the block callback for traffic classes
+ **/
+static int iavf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				  void *cb_priv)
+{
+	struct iavf_adapter *adapter = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return iavf_setup_tc_cls_flower(cb_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(iavf_block_cb_list);
+
+/**
+ * iavf_setup_tc - configure multiple traffic classes
+ * @netdev: network interface device structure
+ * @type: type of offload
+ * @type_data: tc offload data
+ *
+ * This function is the callback to ndo_setup_tc in the
+ * netdev_ops.
+ *
+ * Returns 0 on success
+ **/
+static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+			 void *type_data)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	switch (type) {
+	case TC_SETUP_QDISC_MQPRIO:
+		return __iavf_setup_tc(netdev, type_data);
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple(type_data,
+						  &iavf_block_cb_list,
+						  iavf_setup_tc_block_cb,
+						  adapter, adapter, true);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * iavf_restore_fdir_filters
+ * @adapter: board private structure
+ *
+ * Restore existing FDIR filters when VF netdev comes back up.
+ **/
+static void iavf_restore_fdir_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *f;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(f, &adapter->fdir_list_head, list) {
+		if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST) {
+			/* Cancel a request, keep filter as active */
+			f->state = IAVF_FDIR_FLTR_ACTIVE;
+		} else if (f->state == IAVF_FDIR_FLTR_DIS_PENDING ||
+			   f->state == IAVF_FDIR_FLTR_INACTIVE) {
+			/* Add filters which are inactive or have a pending
+			 * request to PF to be deleted
+			 */
+			f->state = IAVF_FDIR_FLTR_ADD_REQUEST;
+			adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+}
+
+/**
+ * iavf_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog is started,
+ * and the stack is notified that the interface is ready.
+ **/
+static int iavf_open(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int err;
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) {
+		dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n");
+		return -EIO;
+	}
+
+	while (!mutex_trylock(&adapter->crit_lock)) {
+		/* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock
+		 * is already taken and iavf_open is called from an upper
+		 * device's notifier reacting on NETDEV_REGISTER event.
+		 * We have to leave here to avoid dead lock.
+		 */
+		if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER)
+			return -EBUSY;
+
+		usleep_range(500, 1000);
+	}
+
+	if (adapter->state != __IAVF_DOWN) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	if (adapter->state == __IAVF_RUNNING &&
+	    !test_bit(__IAVF_VSI_DOWN, adapter->vsi.state)) {
+		dev_dbg(&adapter->pdev->dev, "VF is already open.\n");
+		err = 0;
+		goto err_unlock;
+	}
+
+	/* allocate transmit descriptors */
+	err = iavf_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = iavf_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	/* clear any pending interrupts, may auto mask */
+	err = iavf_request_traffic_irqs(adapter, netdev->name);
+	if (err)
+		goto err_req_irq;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	iavf_add_filter(adapter, adapter->hw.mac.addr);
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	/* Restore filters that were removed with IFF_DOWN */
+	iavf_restore_filters(adapter);
+	iavf_restore_fdir_filters(adapter);
+
+	iavf_configure(adapter);
+
+	iavf_up_complete(adapter);
+
+	iavf_irq_enable(adapter, true);
+
+	mutex_unlock(&adapter->crit_lock);
+
+	return 0;
+
+err_req_irq:
+	iavf_down(adapter);
+	iavf_free_traffic_irqs(adapter);
+err_setup_rx:
+	iavf_free_all_rx_resources(adapter);
+err_setup_tx:
+	iavf_free_all_tx_resources(adapter);
+err_unlock:
+	mutex_unlock(&adapter->crit_lock);
+
+	return err;
+}
+
+/**
+ * iavf_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled. All IRQs except vector 0 (reserved for admin queue)
+ * are freed, along with all transmit and receive resources.
+ **/
+static int iavf_close(struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	u64 aq_to_restore;
+	int status;
+
+	mutex_lock(&adapter->crit_lock);
+
+	if (adapter->state <= __IAVF_DOWN_PENDING) {
+		mutex_unlock(&adapter->crit_lock);
+		return 0;
+	}
+
+	set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+	if (CLIENT_ENABLED(adapter))
+		adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE;
+	/* We cannot send IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS before
+	 * IAVF_FLAG_AQ_DISABLE_QUEUES because in such case there is rtnl
+	 * deadlock with adminq_task() until iavf_close timeouts. We must send
+	 * IAVF_FLAG_AQ_GET_CONFIG before IAVF_FLAG_AQ_DISABLE_QUEUES to make
+	 * disable queues possible for vf. Give only necessary flags to
+	 * iavf_down and save other to set them right before iavf_close()
+	 * returns, when IAVF_FLAG_AQ_DISABLE_QUEUES will be already sent and
+	 * iavf will be in DOWN state.
+	 */
+	aq_to_restore = adapter->aq_required;
+	adapter->aq_required &= IAVF_FLAG_AQ_GET_CONFIG;
+
+	/* Remove flags which we do not want to send after close or we want to
+	 * send before disable queues.
+	 */
+	aq_to_restore &= ~(IAVF_FLAG_AQ_GET_CONFIG		|
+			   IAVF_FLAG_AQ_ENABLE_QUEUES		|
+			   IAVF_FLAG_AQ_CONFIGURE_QUEUES	|
+			   IAVF_FLAG_AQ_ADD_VLAN_FILTER		|
+			   IAVF_FLAG_AQ_ADD_MAC_FILTER		|
+			   IAVF_FLAG_AQ_ADD_CLOUD_FILTER	|
+			   IAVF_FLAG_AQ_ADD_FDIR_FILTER		|
+			   IAVF_FLAG_AQ_ADD_ADV_RSS_CFG);
+
+	iavf_down(adapter);
+	iavf_change_state(adapter, __IAVF_DOWN_PENDING);
+	iavf_free_traffic_irqs(adapter);
+
+	mutex_unlock(&adapter->crit_lock);
+
+	/* We explicitly don't free resources here because the hardware is
+	 * still active and can DMA into memory. Resources are cleared in
+	 * iavf_virtchnl_completion() after we get confirmation from the PF
+	 * driver that the rings have been stopped.
+	 *
+	 * Also, we wait for state to transition to __IAVF_DOWN before
+	 * returning. State change occurs in iavf_virtchnl_completion() after
+	 * VF resources are released (which occurs after PF driver processes and
+	 * responds to admin queue commands).
+	 */
+
+	status = wait_event_timeout(adapter->down_waitqueue,
+				    adapter->state == __IAVF_DOWN,
+				    msecs_to_jiffies(500));
+	if (!status)
+		netdev_warn(netdev, "Device resources not yet released\n");
+
+	mutex_lock(&adapter->crit_lock);
+	adapter->aq_required |= aq_to_restore;
+	mutex_unlock(&adapter->crit_lock);
+	return 0;
+}
+
+/**
+ * iavf_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+	if (CLIENT_ENABLED(adapter)) {
+		iavf_notify_client_l2_params(&adapter->vsi);
+		adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED;
+	}
+
+	if (netif_running(netdev)) {
+		iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+		ret = iavf_wait_for_reset(adapter);
+		if (ret < 0)
+			netdev_warn(netdev, "MTU change interrupted waiting for reset");
+		else if (ret)
+			netdev_warn(netdev, "MTU change timed out waiting for reset");
+	}
+
+	return ret;
+}
+
+/**
+ * iavf_disable_fdir - disable Flow Director and clear existing filters
+ * @adapter: board private structure
+ **/
+static void iavf_disable_fdir(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *fdir, *fdirtmp;
+	bool del_filters = false;
+
+	adapter->flags &= ~IAVF_FLAG_FDIR_ENABLED;
+
+	/* remove all Flow Director filters */
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head,
+				 list) {
+		if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST ||
+		    fdir->state == IAVF_FDIR_FLTR_INACTIVE) {
+			/* Delete filters not registered in PF */
+			list_del(&fdir->list);
+			kfree(fdir);
+			adapter->fdir_active_fltr--;
+		} else if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING ||
+			   fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST ||
+			   fdir->state == IAVF_FDIR_FLTR_ACTIVE) {
+			/* Filters registered in PF, schedule their deletion */
+			fdir->state = IAVF_FDIR_FLTR_DEL_REQUEST;
+			del_filters = true;
+		} else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) {
+			/* Request to delete filter already sent to PF, change
+			 * state to DEL_PENDING to delete filter after PF's
+			 * response, not set as INACTIVE
+			 */
+			fdir->state = IAVF_FDIR_FLTR_DEL_PENDING;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (del_filters) {
+		adapter->aq_required |= IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0);
+	}
+}
+
+#define NETIF_VLAN_OFFLOAD_FEATURES	(NETIF_F_HW_VLAN_CTAG_RX | \
+					 NETIF_F_HW_VLAN_CTAG_TX | \
+					 NETIF_F_HW_VLAN_STAG_RX | \
+					 NETIF_F_HW_VLAN_STAG_TX)
+
+/**
+ * iavf_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ * Note: expects to be called while under rtnl_lock()
+ **/
+static int iavf_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	/* trigger update on any VLAN feature change */
+	if ((netdev->features & NETIF_VLAN_OFFLOAD_FEATURES) ^
+	    (features & NETIF_VLAN_OFFLOAD_FEATURES))
+		iavf_set_vlan_offload_features(adapter, netdev->features,
+					       features);
+
+	if ((netdev->features & NETIF_F_NTUPLE) ^ (features & NETIF_F_NTUPLE)) {
+		if (features & NETIF_F_NTUPLE)
+			adapter->flags |= IAVF_FLAG_FDIR_ENABLED;
+		else
+			iavf_disable_fdir(adapter);
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buff
+ * @dev: This physical port's netdev
+ * @features: Offload features that the stack believes apply
+ **/
+static netdev_features_t iavf_features_check(struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+		features &= ~NETIF_F_GSO_MASK;
+
+	/* MACLEN can support at most 63 words */
+	len = skb_network_header(skb) - skb->data;
+	if (len & ~(63 * 2))
+		goto out_err;
+
+	/* IPLEN and EIPLEN can support at most 127 dwords */
+	len = skb_transport_header(skb) - skb_network_header(skb);
+	if (len & ~(127 * 4))
+		goto out_err;
+
+	if (skb->encapsulation) {
+		/* L4TUNLEN can support 127 words */
+		len = skb_inner_network_header(skb) - skb_transport_header(skb);
+		if (len & ~(127 * 2))
+			goto out_err;
+
+		/* IPLEN can support at most 127 dwords */
+		len = skb_inner_transport_header(skb) -
+		      skb_inner_network_header(skb);
+		if (len & ~(127 * 4))
+			goto out_err;
+	}
+
+	/* No need to validate L4LEN as TCP is the only protocol with a
+	 * flexible value and we support all possible values supported
+	 * by TCP, which is at most 15 dwords
+	 */
+
+	return features;
+out_err:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+/**
+ * iavf_get_netdev_vlan_hw_features - get NETDEV VLAN features that can toggle on/off
+ * @adapter: board private structure
+ *
+ * Depending on whether VIRTHCNL_VF_OFFLOAD_VLAN or VIRTCHNL_VF_OFFLOAD_VLAN_V2
+ * were negotiated determine the VLAN features that can be toggled on and off.
+ **/
+static netdev_features_t
+iavf_get_netdev_vlan_hw_features(struct iavf_adapter *adapter)
+{
+	netdev_features_t hw_features = 0;
+
+	if (!adapter->vf_res || !adapter->vf_res->vf_cap_flags)
+		return hw_features;
+
+	/* Enable VLAN features if supported */
+	if (VLAN_ALLOWED(adapter)) {
+		hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
+				NETIF_F_HW_VLAN_CTAG_RX);
+	} else if (VLAN_V2_ALLOWED(adapter)) {
+		struct virtchnl_vlan_caps *vlan_v2_caps =
+			&adapter->vlan_v2_caps;
+		struct virtchnl_vlan_supported_caps *stripping_support =
+			&vlan_v2_caps->offloads.stripping_support;
+		struct virtchnl_vlan_supported_caps *insertion_support =
+			&vlan_v2_caps->offloads.insertion_support;
+
+		if (stripping_support->outer != VIRTCHNL_VLAN_UNSUPPORTED &&
+		    stripping_support->outer & VIRTCHNL_VLAN_TOGGLE) {
+			if (stripping_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100)
+				hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+			if (stripping_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_88A8)
+				hw_features |= NETIF_F_HW_VLAN_STAG_RX;
+		} else if (stripping_support->inner !=
+			   VIRTCHNL_VLAN_UNSUPPORTED &&
+			   stripping_support->inner & VIRTCHNL_VLAN_TOGGLE) {
+			if (stripping_support->inner &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100)
+				hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+		}
+
+		if (insertion_support->outer != VIRTCHNL_VLAN_UNSUPPORTED &&
+		    insertion_support->outer & VIRTCHNL_VLAN_TOGGLE) {
+			if (insertion_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100)
+				hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+			if (insertion_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_88A8)
+				hw_features |= NETIF_F_HW_VLAN_STAG_TX;
+		} else if (insertion_support->inner &&
+			   insertion_support->inner & VIRTCHNL_VLAN_TOGGLE) {
+			if (insertion_support->inner &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100)
+				hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+		}
+	}
+
+	return hw_features;
+}
+
+/**
+ * iavf_get_netdev_vlan_features - get the enabled NETDEV VLAN fetures
+ * @adapter: board private structure
+ *
+ * Depending on whether VIRTHCNL_VF_OFFLOAD_VLAN or VIRTCHNL_VF_OFFLOAD_VLAN_V2
+ * were negotiated determine the VLAN features that are enabled by default.
+ **/
+static netdev_features_t
+iavf_get_netdev_vlan_features(struct iavf_adapter *adapter)
+{
+	netdev_features_t features = 0;
+
+	if (!adapter->vf_res || !adapter->vf_res->vf_cap_flags)
+		return features;
+
+	if (VLAN_ALLOWED(adapter)) {
+		features |= NETIF_F_HW_VLAN_CTAG_FILTER |
+			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX;
+	} else if (VLAN_V2_ALLOWED(adapter)) {
+		struct virtchnl_vlan_caps *vlan_v2_caps =
+			&adapter->vlan_v2_caps;
+		struct virtchnl_vlan_supported_caps *filtering_support =
+			&vlan_v2_caps->filtering.filtering_support;
+		struct virtchnl_vlan_supported_caps *stripping_support =
+			&vlan_v2_caps->offloads.stripping_support;
+		struct virtchnl_vlan_supported_caps *insertion_support =
+			&vlan_v2_caps->offloads.insertion_support;
+		u32 ethertype_init;
+
+		/* give priority to outer stripping and don't support both outer
+		 * and inner stripping
+		 */
+		ethertype_init = vlan_v2_caps->offloads.ethertype_init;
+		if (stripping_support->outer != VIRTCHNL_VLAN_UNSUPPORTED) {
+			if (stripping_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_8100)
+				features |= NETIF_F_HW_VLAN_CTAG_RX;
+			else if (stripping_support->outer &
+				 VIRTCHNL_VLAN_ETHERTYPE_88A8 &&
+				 ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_88A8)
+				features |= NETIF_F_HW_VLAN_STAG_RX;
+		} else if (stripping_support->inner !=
+			   VIRTCHNL_VLAN_UNSUPPORTED) {
+			if (stripping_support->inner &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_8100)
+				features |= NETIF_F_HW_VLAN_CTAG_RX;
+		}
+
+		/* give priority to outer insertion and don't support both outer
+		 * and inner insertion
+		 */
+		if (insertion_support->outer != VIRTCHNL_VLAN_UNSUPPORTED) {
+			if (insertion_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_8100)
+				features |= NETIF_F_HW_VLAN_CTAG_TX;
+			else if (insertion_support->outer &
+				 VIRTCHNL_VLAN_ETHERTYPE_88A8 &&
+				 ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_88A8)
+				features |= NETIF_F_HW_VLAN_STAG_TX;
+		} else if (insertion_support->inner !=
+			   VIRTCHNL_VLAN_UNSUPPORTED) {
+			if (insertion_support->inner &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_8100)
+				features |= NETIF_F_HW_VLAN_CTAG_TX;
+		}
+
+		/* give priority to outer filtering and don't bother if both
+		 * outer and inner filtering are enabled
+		 */
+		ethertype_init = vlan_v2_caps->filtering.ethertype_init;
+		if (filtering_support->outer != VIRTCHNL_VLAN_UNSUPPORTED) {
+			if (filtering_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_8100)
+				features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+			if (filtering_support->outer &
+			    VIRTCHNL_VLAN_ETHERTYPE_88A8 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_88A8)
+				features |= NETIF_F_HW_VLAN_STAG_FILTER;
+		} else if (filtering_support->inner !=
+			   VIRTCHNL_VLAN_UNSUPPORTED) {
+			if (filtering_support->inner &
+			    VIRTCHNL_VLAN_ETHERTYPE_8100 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_8100)
+				features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+			if (filtering_support->inner &
+			    VIRTCHNL_VLAN_ETHERTYPE_88A8 &&
+			    ethertype_init & VIRTCHNL_VLAN_ETHERTYPE_88A8)
+				features |= NETIF_F_HW_VLAN_STAG_FILTER;
+		}
+	}
+
+	return features;
+}
+
+#define IAVF_NETDEV_VLAN_FEATURE_ALLOWED(requested, allowed, feature_bit) \
+	(!(((requested) & (feature_bit)) && \
+	   !((allowed) & (feature_bit))))
+
+/**
+ * iavf_fix_netdev_vlan_features - fix NETDEV VLAN features based on support
+ * @adapter: board private structure
+ * @requested_features: stack requested NETDEV features
+ **/
+static netdev_features_t
+iavf_fix_netdev_vlan_features(struct iavf_adapter *adapter,
+			      netdev_features_t requested_features)
+{
+	netdev_features_t allowed_features;
+
+	allowed_features = iavf_get_netdev_vlan_hw_features(adapter) |
+		iavf_get_netdev_vlan_features(adapter);
+
+	if (!IAVF_NETDEV_VLAN_FEATURE_ALLOWED(requested_features,
+					      allowed_features,
+					      NETIF_F_HW_VLAN_CTAG_TX))
+		requested_features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
+	if (!IAVF_NETDEV_VLAN_FEATURE_ALLOWED(requested_features,
+					      allowed_features,
+					      NETIF_F_HW_VLAN_CTAG_RX))
+		requested_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+
+	if (!IAVF_NETDEV_VLAN_FEATURE_ALLOWED(requested_features,
+					      allowed_features,
+					      NETIF_F_HW_VLAN_STAG_TX))
+		requested_features &= ~NETIF_F_HW_VLAN_STAG_TX;
+	if (!IAVF_NETDEV_VLAN_FEATURE_ALLOWED(requested_features,
+					      allowed_features,
+					      NETIF_F_HW_VLAN_STAG_RX))
+		requested_features &= ~NETIF_F_HW_VLAN_STAG_RX;
+
+	if (!IAVF_NETDEV_VLAN_FEATURE_ALLOWED(requested_features,
+					      allowed_features,
+					      NETIF_F_HW_VLAN_CTAG_FILTER))
+		requested_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (!IAVF_NETDEV_VLAN_FEATURE_ALLOWED(requested_features,
+					      allowed_features,
+					      NETIF_F_HW_VLAN_STAG_FILTER))
+		requested_features &= ~NETIF_F_HW_VLAN_STAG_FILTER;
+
+	if ((requested_features &
+	     (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) &&
+	    (requested_features &
+	     (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX)) &&
+	    adapter->vlan_v2_caps.offloads.ethertype_match ==
+	    VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION) {
+		netdev_warn(adapter->netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
+		requested_features &= ~(NETIF_F_HW_VLAN_STAG_RX |
+					NETIF_F_HW_VLAN_STAG_TX);
+	}
+
+	return requested_features;
+}
+
+/**
+ * iavf_fix_features - fix up the netdev feature bits
+ * @netdev: our net device
+ * @features: desired feature bits
+ *
+ * Returns fixed-up features bits
+ **/
+static netdev_features_t iavf_fix_features(struct net_device *netdev,
+					   netdev_features_t features)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	if (!FDIR_FLTR_SUPPORT(adapter))
+		features &= ~NETIF_F_NTUPLE;
+
+	return iavf_fix_netdev_vlan_features(adapter, features);
+}
+
+static const struct net_device_ops iavf_netdev_ops = {
+	.ndo_open		= iavf_open,
+	.ndo_stop		= iavf_close,
+	.ndo_start_xmit		= iavf_xmit_frame,
+	.ndo_set_rx_mode	= iavf_set_rx_mode,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= iavf_set_mac,
+	.ndo_change_mtu		= iavf_change_mtu,
+	.ndo_tx_timeout		= iavf_tx_timeout,
+	.ndo_vlan_rx_add_vid	= iavf_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= iavf_vlan_rx_kill_vid,
+	.ndo_features_check	= iavf_features_check,
+	.ndo_fix_features	= iavf_fix_features,
+	.ndo_set_features	= iavf_set_features,
+	.ndo_setup_tc		= iavf_setup_tc,
+};
+
+/**
+ * iavf_check_reset_complete - check that VF reset is complete
+ * @hw: pointer to hw struct
+ *
+ * Returns 0 if device is ready to use, or -EBUSY if it's in reset.
+ **/
+static int iavf_check_reset_complete(struct iavf_hw *hw)
+{
+	u32 rstat;
+	int i;
+
+	for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
+		rstat = rd32(hw, IAVF_VFGEN_RSTAT) &
+			     IAVF_VFGEN_RSTAT_VFR_STATE_MASK;
+		if ((rstat == VIRTCHNL_VFR_VFACTIVE) ||
+		    (rstat == VIRTCHNL_VFR_COMPLETED))
+			return 0;
+		usleep_range(10, 20);
+	}
+	return -EBUSY;
+}
+
+/**
+ * iavf_process_config - Process the config information we got from the PF
+ * @adapter: board private structure
+ *
+ * Verify that we have a valid config struct, and set up our netdev features
+ * and our VSI struct.
+ **/
+int iavf_process_config(struct iavf_adapter *adapter)
+{
+	struct virtchnl_vf_resource *vfres = adapter->vf_res;
+	netdev_features_t hw_vlan_features, vlan_features;
+	struct net_device *netdev = adapter->netdev;
+	netdev_features_t hw_enc_features;
+	netdev_features_t hw_features;
+
+	hw_enc_features = NETIF_F_SG			|
+			  NETIF_F_IP_CSUM		|
+			  NETIF_F_IPV6_CSUM		|
+			  NETIF_F_HIGHDMA		|
+			  NETIF_F_SOFT_FEATURES	|
+			  NETIF_F_TSO			|
+			  NETIF_F_TSO_ECN		|
+			  NETIF_F_TSO6			|
+			  NETIF_F_SCTP_CRC		|
+			  NETIF_F_RXHASH		|
+			  NETIF_F_RXCSUM		|
+			  0;
+
+	/* advertise to stack only if offloads for encapsulated packets is
+	 * supported
+	 */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ENCAP) {
+		hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL	|
+				   NETIF_F_GSO_GRE		|
+				   NETIF_F_GSO_GRE_CSUM		|
+				   NETIF_F_GSO_IPXIP4		|
+				   NETIF_F_GSO_IPXIP6		|
+				   NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+				   NETIF_F_GSO_PARTIAL		|
+				   0;
+
+		if (!(vfres->vf_cap_flags &
+		      VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM))
+			netdev->gso_partial_features |=
+				NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+		netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
+		netdev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
+		netdev->hw_enc_features |= hw_enc_features;
+	}
+	/* record features VLANs can make use of */
+	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
+
+	/* Write features and hw_features separately to avoid polluting
+	 * with, or dropping, features that are set when we registered.
+	 */
+	hw_features = hw_enc_features;
+
+	/* get HW VLAN features that can be toggled */
+	hw_vlan_features = iavf_get_netdev_vlan_hw_features(adapter);
+
+	/* Enable cloud filter if ADQ is supported */
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ)
+		hw_features |= NETIF_F_HW_TC;
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_USO)
+		hw_features |= NETIF_F_GSO_UDP_L4;
+
+	netdev->hw_features |= hw_features | hw_vlan_features;
+	vlan_features = iavf_get_netdev_vlan_features(adapter);
+
+	netdev->features |= hw_features | vlan_features;
+
+	if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+		netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+	if (FDIR_FLTR_SUPPORT(adapter)) {
+		netdev->hw_features |= NETIF_F_NTUPLE;
+		netdev->features |= NETIF_F_NTUPLE;
+		adapter->flags |= IAVF_FLAG_FDIR_ENABLED;
+	}
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* Do not turn on offloads when they are requested to be turned off.
+	 * TSO needs minimum 576 bytes to work correctly.
+	 */
+	if (netdev->wanted_features) {
+		if (!(netdev->wanted_features & NETIF_F_TSO) ||
+		    netdev->mtu < 576)
+			netdev->features &= ~NETIF_F_TSO;
+		if (!(netdev->wanted_features & NETIF_F_TSO6) ||
+		    netdev->mtu < 576)
+			netdev->features &= ~NETIF_F_TSO6;
+		if (!(netdev->wanted_features & NETIF_F_TSO_ECN))
+			netdev->features &= ~NETIF_F_TSO_ECN;
+		if (!(netdev->wanted_features & NETIF_F_GRO))
+			netdev->features &= ~NETIF_F_GRO;
+		if (!(netdev->wanted_features & NETIF_F_GSO))
+			netdev->features &= ~NETIF_F_GSO;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in iavf_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * iavf_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct iavf_adapter *adapter = NULL;
+	struct iavf_hw *hw = NULL;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"DMA configuration failed: 0x%x\n", err);
+		goto err_dma;
+	}
+
+	err = pci_request_regions(pdev, iavf_driver_name);
+	if (err) {
+		dev_err(&pdev->dev,
+			"pci_request_regions failed 0x%x\n", err);
+		goto err_pci_reg;
+	}
+
+	pci_set_master(pdev);
+
+	netdev = alloc_etherdev_mq(sizeof(struct iavf_adapter),
+				   IAVF_MAX_REQ_QUEUES);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+
+	hw = &adapter->hw;
+	hw->back = adapter;
+
+	adapter->wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM,
+					      iavf_driver_name);
+	if (!adapter->wq) {
+		err = -ENOMEM;
+		goto err_alloc_wq;
+	}
+
+	adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1;
+	iavf_change_state(adapter, __IAVF_STARTUP);
+
+	/* Call save state here because it relies on the adapter struct. */
+	pci_save_state(pdev);
+
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+			      pci_resource_len(pdev, 0));
+	if (!hw->hw_addr) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+	hw->bus.device = PCI_SLOT(pdev->devfn);
+	hw->bus.func = PCI_FUNC(pdev->devfn);
+	hw->bus.bus_id = pdev->bus->number;
+
+	/* set up the locks for the AQ, do this only once in probe
+	 * and destroy them only once in remove
+	 */
+	mutex_init(&adapter->crit_lock);
+	mutex_init(&adapter->client_lock);
+	mutex_init(&hw->aq.asq_mutex);
+	mutex_init(&hw->aq.arq_mutex);
+
+	spin_lock_init(&adapter->mac_vlan_list_lock);
+	spin_lock_init(&adapter->cloud_filter_list_lock);
+	spin_lock_init(&adapter->fdir_fltr_lock);
+	spin_lock_init(&adapter->adv_rss_lock);
+	spin_lock_init(&adapter->current_netdev_promisc_flags_lock);
+
+	INIT_LIST_HEAD(&adapter->mac_filter_list);
+	INIT_LIST_HEAD(&adapter->vlan_filter_list);
+	INIT_LIST_HEAD(&adapter->cloud_filter_list);
+	INIT_LIST_HEAD(&adapter->fdir_list_head);
+	INIT_LIST_HEAD(&adapter->adv_rss_list_head);
+
+	INIT_WORK(&adapter->reset_task, iavf_reset_task);
+	INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
+	INIT_WORK(&adapter->finish_config, iavf_finish_config);
+	INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
+	INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
+
+	/* Setup the wait queue for indicating transition to down status */
+	init_waitqueue_head(&adapter->down_waitqueue);
+
+	/* Setup the wait queue for indicating transition to running state */
+	init_waitqueue_head(&adapter->reset_waitqueue);
+
+	/* Setup the wait queue for indicating virtchannel events */
+	init_waitqueue_head(&adapter->vc_waitqueue);
+
+	queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+			   msecs_to_jiffies(5 * (pdev->devfn & 0x07)));
+	/* Initialization goes on in the work. Do not add more of it below. */
+	return 0;
+
+err_ioremap:
+	destroy_workqueue(adapter->wq);
+err_alloc_wq:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * iavf_suspend - Power management suspend routine
+ * @dev_d: device info pointer
+ *
+ * Called when the system (VM) is entering sleep/suspend.
+ **/
+static int __maybe_unused iavf_suspend(struct device *dev_d)
+{
+	struct net_device *netdev = dev_get_drvdata(dev_d);
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	while (!mutex_trylock(&adapter->crit_lock))
+		usleep_range(500, 1000);
+
+	if (netif_running(netdev)) {
+		rtnl_lock();
+		iavf_down(adapter);
+		rtnl_unlock();
+	}
+	iavf_free_misc_irq(adapter);
+	iavf_reset_interrupt_capability(adapter);
+
+	mutex_unlock(&adapter->crit_lock);
+
+	return 0;
+}
+
+/**
+ * iavf_resume - Power management resume routine
+ * @dev_d: device info pointer
+ *
+ * Called when the system (VM) is resumed from sleep/suspend.
+ **/
+static int __maybe_unused iavf_resume(struct device *dev_d)
+{
+	struct pci_dev *pdev = to_pci_dev(dev_d);
+	struct iavf_adapter *adapter;
+	u32 err;
+
+	adapter = iavf_pdev_to_adapter(pdev);
+
+	pci_set_master(pdev);
+
+	rtnl_lock();
+	err = iavf_set_interrupt_capability(adapter);
+	if (err) {
+		rtnl_unlock();
+		dev_err(&pdev->dev, "Cannot enable MSI-X interrupts.\n");
+		return err;
+	}
+	err = iavf_request_misc_irq(adapter);
+	rtnl_unlock();
+	if (err) {
+		dev_err(&pdev->dev, "Cannot get interrupt vector.\n");
+		return err;
+	}
+
+	queue_work(adapter->wq, &adapter->reset_task);
+
+	netif_device_attach(adapter->netdev);
+
+	return err;
+}
+
+/**
+ * iavf_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * iavf_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void iavf_remove(struct pci_dev *pdev)
+{
+	struct iavf_fdir_fltr *fdir, *fdirtmp;
+	struct iavf_vlan_filter *vlf, *vlftmp;
+	struct iavf_cloud_filter *cf, *cftmp;
+	struct iavf_adv_rss *rss, *rsstmp;
+	struct iavf_mac_filter *f, *ftmp;
+	struct iavf_adapter *adapter;
+	struct net_device *netdev;
+	struct iavf_hw *hw;
+	int err;
+
+	/* Don't proceed with remove if netdev is already freed */
+	netdev = pci_get_drvdata(pdev);
+	if (!netdev)
+		return;
+
+	adapter = iavf_pdev_to_adapter(pdev);
+	hw = &adapter->hw;
+
+	if (test_and_set_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+		return;
+
+	/* Wait until port initialization is complete.
+	 * There are flows where register/unregister netdev may race.
+	 */
+	while (1) {
+		mutex_lock(&adapter->crit_lock);
+		if (adapter->state == __IAVF_RUNNING ||
+		    adapter->state == __IAVF_DOWN ||
+		    adapter->state == __IAVF_INIT_FAILED) {
+			mutex_unlock(&adapter->crit_lock);
+			break;
+		}
+		/* Simply return if we already went through iavf_shutdown */
+		if (adapter->state == __IAVF_REMOVE) {
+			mutex_unlock(&adapter->crit_lock);
+			return;
+		}
+
+		mutex_unlock(&adapter->crit_lock);
+		usleep_range(500, 1000);
+	}
+	cancel_delayed_work_sync(&adapter->watchdog_task);
+	cancel_work_sync(&adapter->finish_config);
+
+	rtnl_lock();
+	if (adapter->netdev_registered) {
+		unregister_netdevice(netdev);
+		adapter->netdev_registered = false;
+	}
+	rtnl_unlock();
+
+	if (CLIENT_ALLOWED(adapter)) {
+		err = iavf_lan_del_device(adapter);
+		if (err)
+			dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
+				 err);
+	}
+
+	mutex_lock(&adapter->crit_lock);
+	dev_info(&adapter->pdev->dev, "Removing device\n");
+	iavf_change_state(adapter, __IAVF_REMOVE);
+
+	iavf_request_reset(adapter);
+	msleep(50);
+	/* If the FW isn't responding, kick it once, but only once. */
+	if (!iavf_asq_done(hw)) {
+		iavf_request_reset(adapter);
+		msleep(50);
+	}
+
+	iavf_misc_irq_disable(adapter);
+	/* Shut down all the garbage mashers on the detention level */
+	cancel_work_sync(&adapter->reset_task);
+	cancel_delayed_work_sync(&adapter->watchdog_task);
+	cancel_work_sync(&adapter->adminq_task);
+	cancel_delayed_work_sync(&adapter->client_task);
+
+	adapter->aq_required = 0;
+	adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+
+	iavf_free_all_tx_resources(adapter);
+	iavf_free_all_rx_resources(adapter);
+	iavf_free_misc_irq(adapter);
+
+	iavf_reset_interrupt_capability(adapter);
+	iavf_free_q_vectors(adapter);
+
+	iavf_free_rss(adapter);
+
+	if (hw->aq.asq.count)
+		iavf_shutdown_adminq(hw);
+
+	/* destroy the locks only once, here */
+	mutex_destroy(&hw->aq.arq_mutex);
+	mutex_destroy(&hw->aq.asq_mutex);
+	mutex_destroy(&adapter->client_lock);
+	mutex_unlock(&adapter->crit_lock);
+	mutex_destroy(&adapter->crit_lock);
+
+	iounmap(hw->hw_addr);
+	pci_release_regions(pdev);
+	iavf_free_queues(adapter);
+	kfree(adapter->vf_res);
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	/* If we got removed before an up/down sequence, we've got a filter
+	 * hanging out there that we need to get rid of.
+	 */
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		list_del(&f->list);
+		kfree(f);
+	}
+	list_for_each_entry_safe(vlf, vlftmp, &adapter->vlan_filter_list,
+				 list) {
+		list_del(&vlf->list);
+		kfree(vlf);
+	}
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	spin_lock_bh(&adapter->cloud_filter_list_lock);
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		list_del(&cf->list);
+		kfree(cf);
+	}
+	spin_unlock_bh(&adapter->cloud_filter_list_lock);
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry_safe(fdir, fdirtmp, &adapter->fdir_list_head, list) {
+		list_del(&fdir->list);
+		kfree(fdir);
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry_safe(rss, rsstmp, &adapter->adv_rss_list_head,
+				 list) {
+		list_del(&rss->list);
+		kfree(rss);
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	destroy_workqueue(adapter->wq);
+
+	pci_set_drvdata(pdev, NULL);
+
+	free_netdev(netdev);
+
+	pci_disable_device(pdev);
+}
+
+/**
+ * iavf_shutdown - Shutdown the device in preparation for a reboot
+ * @pdev: pci device structure
+ **/
+static void iavf_shutdown(struct pci_dev *pdev)
+{
+	iavf_remove(pdev);
+
+	if (system_state == SYSTEM_POWER_OFF)
+		pci_set_power_state(pdev, PCI_D3hot);
+}
+
+static SIMPLE_DEV_PM_OPS(iavf_pm_ops, iavf_suspend, iavf_resume);
+
+static struct pci_driver iavf_driver = {
+	.name      = iavf_driver_name,
+	.id_table  = iavf_pci_tbl,
+	.probe     = iavf_probe,
+	.remove    = iavf_remove,
+	.driver.pm = &iavf_pm_ops,
+	.shutdown  = iavf_shutdown,
+};
+
+/**
+ * iavf_init_module - Driver Registration Routine
+ *
+ * iavf_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init iavf_init_module(void)
+{
+	pr_info("iavf: %s\n", iavf_driver_string);
+
+	pr_info("%s\n", iavf_copyright);
+
+	return pci_register_driver(&iavf_driver);
+}
+
+module_init(iavf_init_module);
+
+/**
+ * iavf_exit_module - Driver Exit Cleanup Routine
+ *
+ * iavf_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit iavf_exit_module(void)
+{
+	pci_unregister_driver(&iavf_driver);
+}
+
+module_exit(iavf_exit_module);
+
+/* iavf_main.c */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h
new file mode 100644
index 0000000000..77d33deaab
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_OSDEP_H_
+#define _IAVF_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/tcp.h>
+#include <linux/pci.h>
+
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+
+#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+#define iavf_flush(a)		readl((a)->hw_addr + IAVF_VFGEN_RSTAT)
+
+/* memory allocation tracking */
+struct iavf_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	u32 size;
+};
+
+#define iavf_allocate_dma_mem(h, m, unused, s, a) \
+	iavf_allocate_dma_mem_d(h, m, s, a)
+
+struct iavf_virt_mem {
+	void *va;
+	u32 size;
+};
+
+#define iavf_debug(h, m, s, ...)				\
+do {								\
+	if (((m) & (h)->debug_mask))				\
+		pr_info("iavf %02x:%02x.%x " s,			\
+			(h)->bus.bus_id, (h)->bus.device,	\
+			(h)->bus.func, ##__VA_ARGS__);		\
+} while (0)
+
+#endif /* _IAVF_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h
new file mode 100644
index 0000000000..940cb4203f
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_PROTOTYPE_H_
+#define _IAVF_PROTOTYPE_H_
+
+#include "iavf_type.h"
+#include "iavf_alloc.h"
+#include <linux/avf/virtchnl.h>
+
+/* Prototypes for shared code functions that are not in
+ * the standard function pointer structures.  These are
+ * mostly because they are needed even before the init
+ * has happened and will assist in the early SW and FW
+ * setup.
+ */
+
+/* adminq functions */
+enum iavf_status iavf_init_adminq(struct iavf_hw *hw);
+enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw);
+void iavf_adminq_init_ring_data(struct iavf_hw *hw);
+enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw,
+					struct iavf_arq_event_info *e,
+					u16 *events_pending);
+enum iavf_status iavf_asq_send_command(struct iavf_hw *hw,
+				       struct iavf_aq_desc *desc,
+				       void *buff, /* can be NULL */
+				       u16 buff_size,
+				       struct iavf_asq_cmd_details *cmd_details);
+bool iavf_asq_done(struct iavf_hw *hw);
+
+/* debug function for adminq */
+void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask,
+		   void *desc, void *buffer, u16 buf_len);
+
+void iavf_idle_aq(struct iavf_hw *hw);
+void iavf_resume_aq(struct iavf_hw *hw);
+bool iavf_check_asq_alive(struct iavf_hw *hw);
+enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading);
+const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err);
+const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err);
+
+enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid,
+				     bool pf_lut, u8 *lut, u16 lut_size);
+enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid,
+				     struct iavf_aqc_get_set_rss_key_data *key);
+
+enum iavf_status iavf_set_mac_type(struct iavf_hw *hw);
+
+extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[];
+
+static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+	return iavf_ptype_lookup[ptype];
+}
+
+void iavf_vf_parse_hw_config(struct iavf_hw *hw,
+			     struct virtchnl_vf_resource *msg);
+enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw,
+					enum virtchnl_ops v_opcode,
+					enum iavf_status v_retval,
+					u8 *msg, u16 msglen,
+					struct iavf_asq_cmd_details *cmd_details);
+#endif /* _IAVF_PROTOTYPE_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_register.h b/drivers/net/ethernet/intel/iavf/iavf_register.h
new file mode 100644
index 0000000000..a19e88898a
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_register.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_REGISTER_H_
+#define _IAVF_REGISTER_H_
+
+#define IAVF_VF_ARQBAH1 0x00006000 /* Reset: EMPR */
+#define IAVF_VF_ARQBAL1 0x00006C00 /* Reset: EMPR */
+#define IAVF_VF_ARQH1 0x00007400 /* Reset: EMPR */
+#define IAVF_VF_ARQH1_ARQH_SHIFT 0
+#define IAVF_VF_ARQH1_ARQH_MASK IAVF_MASK(0x3FF, IAVF_VF_ARQH1_ARQH_SHIFT)
+#define IAVF_VF_ARQLEN1 0x00008000 /* Reset: EMPR */
+#define IAVF_VF_ARQLEN1_ARQVFE_SHIFT 28
+#define IAVF_VF_ARQLEN1_ARQVFE_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQVFE_SHIFT)
+#define IAVF_VF_ARQLEN1_ARQOVFL_SHIFT 29
+#define IAVF_VF_ARQLEN1_ARQOVFL_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQOVFL_SHIFT)
+#define IAVF_VF_ARQLEN1_ARQCRIT_SHIFT 30
+#define IAVF_VF_ARQLEN1_ARQCRIT_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQCRIT_SHIFT)
+#define IAVF_VF_ARQLEN1_ARQENABLE_SHIFT 31
+#define IAVF_VF_ARQLEN1_ARQENABLE_MASK IAVF_MASK(0x1, IAVF_VF_ARQLEN1_ARQENABLE_SHIFT)
+#define IAVF_VF_ARQT1 0x00007000 /* Reset: EMPR */
+#define IAVF_VF_ATQBAH1 0x00007800 /* Reset: EMPR */
+#define IAVF_VF_ATQBAL1 0x00007C00 /* Reset: EMPR */
+#define IAVF_VF_ATQH1 0x00006400 /* Reset: EMPR */
+#define IAVF_VF_ATQLEN1 0x00006800 /* Reset: EMPR */
+#define IAVF_VF_ATQLEN1_ATQVFE_SHIFT 28
+#define IAVF_VF_ATQLEN1_ATQVFE_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQVFE_SHIFT)
+#define IAVF_VF_ATQLEN1_ATQOVFL_SHIFT 29
+#define IAVF_VF_ATQLEN1_ATQOVFL_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQOVFL_SHIFT)
+#define IAVF_VF_ATQLEN1_ATQCRIT_SHIFT 30
+#define IAVF_VF_ATQLEN1_ATQCRIT_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQCRIT_SHIFT)
+#define IAVF_VF_ATQLEN1_ATQENABLE_SHIFT 31
+#define IAVF_VF_ATQLEN1_ATQENABLE_MASK IAVF_MASK(0x1, IAVF_VF_ATQLEN1_ATQENABLE_SHIFT)
+#define IAVF_VF_ATQT1 0x00008400 /* Reset: EMPR */
+#define IAVF_VFGEN_RSTAT 0x00008800 /* Reset: VFR */
+#define IAVF_VFGEN_RSTAT_VFR_STATE_SHIFT 0
+#define IAVF_VFGEN_RSTAT_VFR_STATE_MASK IAVF_MASK(0x3, IAVF_VFGEN_RSTAT_VFR_STATE_SHIFT)
+#define IAVF_VFINT_DYN_CTL01 0x00005C00 /* Reset: VFR */
+#define IAVF_VFINT_DYN_CTL01_INTENA_SHIFT 0
+#define IAVF_VFINT_DYN_CTL01_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTL01_INTENA_SHIFT)
+#define IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3
+#define IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...63 */ /* Reset: VFR */
+#define IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT 0
+#define IAVF_VFINT_DYN_CTLN1_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2
+#define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT 3
+#define IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT)
+#define IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT 5
+#define IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT 24
+#define IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT)
+#define IAVF_VFINT_ICR0_ENA1 0x00005000 /* Reset: CORER */
+#define IAVF_VFINT_ICR0_ENA1_ADMINQ_SHIFT 30
+#define IAVF_VFINT_ICR0_ENA1_ADMINQ_MASK IAVF_MASK(0x1, IAVF_VFINT_ICR0_ENA1_ADMINQ_SHIFT)
+#define IAVF_VFINT_ICR0_ENA1_RSVD_SHIFT 31
+#define IAVF_VFINT_ICR01 0x00004800 /* Reset: CORER */
+#define IAVF_VFINT_ITRN1(_i, _INTVF) (0x00002800 + ((_i) * 64 + (_INTVF) * 4)) /* _i=0...2, _INTVF=0...15 */ /* Reset: VFR */
+#define IAVF_QRX_TAIL1(_Q) (0x00002000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define IAVF_QTX_TAIL1(_Q) (0x00000000 + ((_Q) * 4)) /* _i=0...15 */ /* Reset: PFR */
+#define IAVF_VFQF_HENA(_i) (0x0000C400 + ((_i) * 4)) /* _i=0...1 */ /* Reset: CORER */
+#define IAVF_VFQF_HKEY(_i) (0x0000CC00 + ((_i) * 4)) /* _i=0...12 */ /* Reset: CORER */
+#define IAVF_VFQF_HKEY_MAX_INDEX 12
+#define IAVF_VFQF_HLUT(_i) (0x0000D000 + ((_i) * 4)) /* _i=0...15 */ /* Reset: CORER */
+#define IAVF_VFQF_HLUT_MAX_INDEX 15
+#define IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT 30
+#define IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_SHIFT)
+#endif /* _IAVF_REGISTER_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_status.h b/drivers/net/ethernet/intel/iavf/iavf_status.h
new file mode 100644
index 0000000000..0e493ee9e9
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_status.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_STATUS_H_
+#define _IAVF_STATUS_H_
+
+/* Error Codes */
+enum iavf_status {
+	IAVF_SUCCESS				= 0,
+	IAVF_ERR_NVM				= -1,
+	IAVF_ERR_NVM_CHECKSUM			= -2,
+	IAVF_ERR_PHY				= -3,
+	IAVF_ERR_CONFIG				= -4,
+	IAVF_ERR_PARAM				= -5,
+	IAVF_ERR_MAC_TYPE			= -6,
+	IAVF_ERR_UNKNOWN_PHY			= -7,
+	IAVF_ERR_LINK_SETUP			= -8,
+	IAVF_ERR_ADAPTER_STOPPED		= -9,
+	IAVF_ERR_INVALID_MAC_ADDR		= -10,
+	IAVF_ERR_DEVICE_NOT_SUPPORTED		= -11,
+	IAVF_ERR_PRIMARY_REQUESTS_PENDING	= -12,
+	IAVF_ERR_INVALID_LINK_SETTINGS		= -13,
+	IAVF_ERR_AUTONEG_NOT_COMPLETE		= -14,
+	IAVF_ERR_RESET_FAILED			= -15,
+	IAVF_ERR_SWFW_SYNC			= -16,
+	IAVF_ERR_NO_AVAILABLE_VSI		= -17,
+	IAVF_ERR_NO_MEMORY			= -18,
+	IAVF_ERR_BAD_PTR			= -19,
+	IAVF_ERR_RING_FULL			= -20,
+	IAVF_ERR_INVALID_PD_ID			= -21,
+	IAVF_ERR_INVALID_QP_ID			= -22,
+	IAVF_ERR_INVALID_CQ_ID			= -23,
+	IAVF_ERR_INVALID_CEQ_ID			= -24,
+	IAVF_ERR_INVALID_AEQ_ID			= -25,
+	IAVF_ERR_INVALID_SIZE			= -26,
+	IAVF_ERR_INVALID_ARP_INDEX		= -27,
+	IAVF_ERR_INVALID_FPM_FUNC_ID		= -28,
+	IAVF_ERR_QP_INVALID_MSG_SIZE		= -29,
+	IAVF_ERR_QP_TOOMANY_WRS_POSTED		= -30,
+	IAVF_ERR_INVALID_FRAG_COUNT		= -31,
+	IAVF_ERR_QUEUE_EMPTY			= -32,
+	IAVF_ERR_INVALID_ALIGNMENT		= -33,
+	IAVF_ERR_FLUSHED_QUEUE			= -34,
+	IAVF_ERR_INVALID_PUSH_PAGE_INDEX	= -35,
+	IAVF_ERR_INVALID_IMM_DATA_SIZE		= -36,
+	IAVF_ERR_TIMEOUT			= -37,
+	IAVF_ERR_OPCODE_MISMATCH		= -38,
+	IAVF_ERR_CQP_COMPL_ERROR		= -39,
+	IAVF_ERR_INVALID_VF_ID			= -40,
+	IAVF_ERR_INVALID_HMCFN_ID		= -41,
+	IAVF_ERR_BACKING_PAGE_ERROR		= -42,
+	IAVF_ERR_NO_PBLCHUNKS_AVAILABLE		= -43,
+	IAVF_ERR_INVALID_PBLE_INDEX		= -44,
+	IAVF_ERR_INVALID_SD_INDEX		= -45,
+	IAVF_ERR_INVALID_PAGE_DESC_INDEX	= -46,
+	IAVF_ERR_INVALID_SD_TYPE		= -47,
+	IAVF_ERR_MEMCPY_FAILED			= -48,
+	IAVF_ERR_INVALID_HMC_OBJ_INDEX		= -49,
+	IAVF_ERR_INVALID_HMC_OBJ_COUNT		= -50,
+	IAVF_ERR_INVALID_SRQ_ARM_LIMIT		= -51,
+	IAVF_ERR_SRQ_ENABLED			= -52,
+	IAVF_ERR_ADMIN_QUEUE_ERROR		= -53,
+	IAVF_ERR_ADMIN_QUEUE_TIMEOUT		= -54,
+	IAVF_ERR_BUF_TOO_SHORT			= -55,
+	IAVF_ERR_ADMIN_QUEUE_FULL		= -56,
+	IAVF_ERR_ADMIN_QUEUE_NO_WORK		= -57,
+	IAVF_ERR_BAD_RDMA_CQE			= -58,
+	IAVF_ERR_NVM_BLANK_MODE			= -59,
+	IAVF_ERR_NOT_IMPLEMENTED		= -60,
+	IAVF_ERR_PE_DOORBELL_NOT_ENABLED	= -61,
+	IAVF_ERR_DIAG_TEST_FAILED		= -62,
+	IAVF_ERR_NOT_READY			= -63,
+	IAVF_NOT_SUPPORTED			= -64,
+	IAVF_ERR_FIRMWARE_API_VERSION		= -65,
+	IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR	= -66,
+};
+
+#endif /* _IAVF_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h
new file mode 100644
index 0000000000..82fda6f5ab
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+/* Modeled on trace-events-sample.h */
+
+/* The trace subsystem name for iavf will be "iavf".
+ *
+ * This file is named iavf_trace.h.
+ *
+ * Since this include file's name is different from the trace
+ * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end
+ * of this file.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iavf
+
+/* See trace-events-sample.h for a detailed description of why this
+ * guard clause is different from most normal include files.
+ */
+#if !defined(_IAVF_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _IAVF_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+/*
+ * iavf_trace() macro enables shared code to refer to trace points
+ * like:
+ *
+ * trace_iavf{,vf}_example(args...)
+ *
+ * ... as:
+ *
+ * iavf_trace(example, args...)
+ *
+ * ... to resolve to the PF or VF version of the tracepoint without
+ * ifdefs, and to allow tracepoints to be disabled entirely at build
+ * time.
+ *
+ * Trace point should always be referred to in the driver via this
+ * macro.
+ *
+ * Similarly, iavf_trace_enabled(trace_name) wraps references to
+ * trace_iavf{,vf}_<trace_name>_enabled() functions.
+ */
+#define _IAVF_TRACE_NAME(trace_name) (trace_ ## iavf ## _ ## trace_name)
+#define IAVF_TRACE_NAME(trace_name) _IAVF_TRACE_NAME(trace_name)
+
+#define iavf_trace(trace_name, args...) IAVF_TRACE_NAME(trace_name)(args)
+
+#define iavf_trace_enabled(trace_name) IAVF_TRACE_NAME(trace_name##_enabled)()
+
+/* Events common to PF and VF. Corresponding versions will be defined
+ * for both, named trace_iavf_* and trace_iavf_*. The iavf_trace()
+ * macro above will select the right trace point name for the driver
+ * being built from shared code.
+ */
+
+/* Events related to a vsi & ring */
+DECLARE_EVENT_CLASS(
+	iavf_tx_template,
+
+	TP_PROTO(struct iavf_ring *ring,
+		 struct iavf_tx_desc *desc,
+		 struct iavf_tx_buffer *buf),
+
+	TP_ARGS(ring, desc, buf),
+
+	/* The convention here is to make the first fields in the
+	 * TP_STRUCT match the TP_PROTO exactly. This enables the use
+	 * of the args struct generated by the tplist tool (from the
+	 * bcc-tools package) to be used for those fields. To access
+	 * fields other than the tracepoint args will require the
+	 * tplist output to be adjusted.
+	 */
+	TP_STRUCT__entry(
+		__field(void*, ring)
+		__field(void*, desc)
+		__field(void*, buf)
+		__string(devname, ring->netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->ring = ring;
+		__entry->desc = desc;
+		__entry->buf = buf;
+		__assign_str(devname, ring->netdev->name);
+	),
+
+	TP_printk(
+		"netdev: %s ring: %p desc: %p buf %p",
+		__get_str(devname), __entry->ring,
+		__entry->desc, __entry->buf)
+);
+
+DEFINE_EVENT(
+	iavf_tx_template, iavf_clean_tx_irq,
+	TP_PROTO(struct iavf_ring *ring,
+		 struct iavf_tx_desc *desc,
+		 struct iavf_tx_buffer *buf),
+
+	TP_ARGS(ring, desc, buf));
+
+DEFINE_EVENT(
+	iavf_tx_template, iavf_clean_tx_irq_unmap,
+	TP_PROTO(struct iavf_ring *ring,
+		 struct iavf_tx_desc *desc,
+		 struct iavf_tx_buffer *buf),
+
+	TP_ARGS(ring, desc, buf));
+
+DECLARE_EVENT_CLASS(
+	iavf_rx_template,
+
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
+		 struct sk_buff *skb),
+
+	TP_ARGS(ring, desc, skb),
+
+	TP_STRUCT__entry(
+		__field(void*, ring)
+		__field(void*, desc)
+		__field(void*, skb)
+		__string(devname, ring->netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->ring = ring;
+		__entry->desc = desc;
+		__entry->skb = skb;
+		__assign_str(devname, ring->netdev->name);
+	),
+
+	TP_printk(
+		"netdev: %s ring: %p desc: %p skb %p",
+		__get_str(devname), __entry->ring,
+		__entry->desc, __entry->skb)
+);
+
+DEFINE_EVENT(
+	iavf_rx_template, iavf_clean_rx_irq,
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
+		 struct sk_buff *skb),
+
+	TP_ARGS(ring, desc, skb));
+
+DEFINE_EVENT(
+	iavf_rx_template, iavf_clean_rx_irq_rx,
+	TP_PROTO(struct iavf_ring *ring,
+		 union iavf_32byte_rx_desc *desc,
+		 struct sk_buff *skb),
+
+	TP_ARGS(ring, desc, skb));
+
+DECLARE_EVENT_CLASS(
+	iavf_xmit_template,
+
+	TP_PROTO(struct sk_buff *skb,
+		 struct iavf_ring *ring),
+
+	TP_ARGS(skb, ring),
+
+	TP_STRUCT__entry(
+		__field(void*, skb)
+		__field(void*, ring)
+		__string(devname, ring->netdev->name)
+	),
+
+	TP_fast_assign(
+		__entry->skb = skb;
+		__entry->ring = ring;
+		__assign_str(devname, ring->netdev->name);
+	),
+
+	TP_printk(
+		"netdev: %s skb: %p ring: %p",
+		__get_str(devname), __entry->skb,
+		__entry->ring)
+);
+
+DEFINE_EVENT(
+	iavf_xmit_template, iavf_xmit_frame_ring,
+	TP_PROTO(struct sk_buff *skb,
+		 struct iavf_ring *ring),
+
+	TP_ARGS(skb, ring));
+
+DEFINE_EVENT(
+	iavf_xmit_template, iavf_xmit_frame_ring_drop,
+	TP_PROTO(struct sk_buff *skb,
+		 struct iavf_ring *ring),
+
+	TP_ARGS(skb, ring));
+
+/* Events unique to the VF. */
+
+#endif /* _IAVF_TRACE_H_ */
+/* This must be outside ifdef _IAVF_TRACE_H */
+
+/* This trace include file is not located in the .../include/trace
+ * with the kernel tracepoint definitions, because we're a loadable
+ * module.
+ */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE iavf_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
new file mode 100644
index 0000000000..8c5f6096b0
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
@@ -0,0 +1,2537 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include <linux/prefetch.h>
+
+#include "iavf.h"
+#include "iavf_trace.h"
+#include "iavf_prototype.h"
+
+static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
+				u32 td_tag)
+{
+	return cpu_to_le64(IAVF_TX_DESC_DTYPE_DATA |
+			   ((u64)td_cmd  << IAVF_TXD_QW1_CMD_SHIFT) |
+			   ((u64)td_offset << IAVF_TXD_QW1_OFFSET_SHIFT) |
+			   ((u64)size  << IAVF_TXD_QW1_TX_BUF_SZ_SHIFT) |
+			   ((u64)td_tag  << IAVF_TXD_QW1_L2TAG1_SHIFT));
+}
+
+#define IAVF_TXD_CMD (IAVF_TX_DESC_CMD_EOP | IAVF_TX_DESC_CMD_RS)
+
+/**
+ * iavf_unmap_and_free_tx_resource - Release a Tx buffer
+ * @ring:      the ring that owns the buffer
+ * @tx_buffer: the buffer to free
+ **/
+static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring,
+					    struct iavf_tx_buffer *tx_buffer)
+{
+	if (tx_buffer->skb) {
+		if (tx_buffer->tx_flags & IAVF_TX_FLAGS_FD_SB)
+			kfree(tx_buffer->raw_buf);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_single(ring->dev,
+					 dma_unmap_addr(tx_buffer, dma),
+					 dma_unmap_len(tx_buffer, len),
+					 DMA_TO_DEVICE);
+	} else if (dma_unmap_len(tx_buffer, len)) {
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buffer, dma),
+			       dma_unmap_len(tx_buffer, len),
+			       DMA_TO_DEVICE);
+	}
+
+	tx_buffer->next_to_watch = NULL;
+	tx_buffer->skb = NULL;
+	dma_unmap_len_set(tx_buffer, len, 0);
+	/* tx_buffer must be completely set up in the transmit path */
+}
+
+/**
+ * iavf_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
+{
+	unsigned long bi_size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!tx_ring->tx_bi)
+		return;
+
+	/* Free all the Tx ring sk_buffs */
+	for (i = 0; i < tx_ring->count; i++)
+		iavf_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
+
+	bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count;
+	memset(tx_ring->tx_bi, 0, bi_size);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	if (!tx_ring->netdev)
+		return;
+
+	/* cleanup Tx queue statistics */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+}
+
+/**
+ * iavf_free_tx_resources - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void iavf_free_tx_resources(struct iavf_ring *tx_ring)
+{
+	iavf_clean_tx_ring(tx_ring);
+	kfree(tx_ring->tx_bi);
+	tx_ring->tx_bi = NULL;
+
+	if (tx_ring->desc) {
+		dma_free_coherent(tx_ring->dev, tx_ring->size,
+				  tx_ring->desc, tx_ring->dma);
+		tx_ring->desc = NULL;
+	}
+}
+
+/**
+ * iavf_get_tx_pending - how many Tx descriptors not processed
+ * @ring: the ring of descriptors
+ * @in_sw: is tx_pending being checked in SW or HW
+ *
+ * Since there is no access to the ring head register
+ * in XL710, we need to use our local copies
+ **/
+static u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
+{
+	u32 head, tail;
+
+	/* underlying hardware might not allow access and/or always return
+	 * 0 for the head/tail registers so just use the cached values
+	 */
+	head = ring->next_to_clean;
+	tail = ring->next_to_use;
+
+	if (head != tail)
+		return (head < tail) ?
+			tail - head : (tail + ring->count - head);
+
+	return 0;
+}
+
+/**
+ * iavf_force_wb - Issue SW Interrupt so HW does a wb
+ * @vsi: the VSI we care about
+ * @q_vector: the vector on which to force writeback
+ **/
+static void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector)
+{
+	u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+		  IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
+		  IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
+		  IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
+		  /* allow 00 to be written to the index */;
+
+	wr32(&vsi->back->hw,
+	     IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
+	     val);
+}
+
+/**
+ * iavf_detect_recover_hung - Function to detect and recover hung_queues
+ * @vsi:  pointer to vsi struct with tx queues
+ *
+ * VSI has netdev and netdev has TX queues. This function is to check each of
+ * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
+ **/
+void iavf_detect_recover_hung(struct iavf_vsi *vsi)
+{
+	struct iavf_ring *tx_ring = NULL;
+	struct net_device *netdev;
+	unsigned int i;
+	int packets;
+
+	if (!vsi)
+		return;
+
+	if (test_bit(__IAVF_VSI_DOWN, vsi->state))
+		return;
+
+	netdev = vsi->netdev;
+	if (!netdev)
+		return;
+
+	if (!netif_carrier_ok(netdev))
+		return;
+
+	for (i = 0; i < vsi->back->num_active_queues; i++) {
+		tx_ring = &vsi->back->tx_rings[i];
+		if (tx_ring && tx_ring->desc) {
+			/* If packet counter has not changed the queue is
+			 * likely stalled, so force an interrupt for this
+			 * queue.
+			 *
+			 * prev_pkt_ctr would be negative if there was no
+			 * pending work.
+			 */
+			packets = tx_ring->stats.packets & INT_MAX;
+			if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
+				iavf_force_wb(vsi, tx_ring->q_vector);
+				continue;
+			}
+
+			/* Memory barrier between read of packet count and call
+			 * to iavf_get_tx_pending()
+			 */
+			smp_rmb();
+			tx_ring->tx_stats.prev_pkt_ctr =
+			  iavf_get_tx_pending(tx_ring, true) ? packets : -1;
+		}
+	}
+}
+
+#define WB_STRIDE 4
+
+/**
+ * iavf_clean_tx_irq - Reclaim resources after transmit completes
+ * @vsi: the VSI we care about
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ **/
+static bool iavf_clean_tx_irq(struct iavf_vsi *vsi,
+			      struct iavf_ring *tx_ring, int napi_budget)
+{
+	int i = tx_ring->next_to_clean;
+	struct iavf_tx_buffer *tx_buf;
+	struct iavf_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = IAVF_DEFAULT_IRQ_WORK;
+
+	tx_buf = &tx_ring->tx_bi[i];
+	tx_desc = IAVF_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct iavf_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		iavf_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
+		/* if the descriptor isn't done, no work yet to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(IAVF_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_packets += tx_buf->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buf->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		tx_buf->skb = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			iavf_trace(clean_tx_irq_unmap,
+				   tx_ring, tx_desc, tx_buf);
+
+			tx_buf++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buf = tx_ring->tx_bi;
+				tx_desc = IAVF_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buf, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buf, dma),
+					       dma_unmap_len(tx_buf, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buf, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_bi;
+			tx_desc = IAVF_TX_DESC(tx_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	tx_ring->q_vector->tx.total_bytes += total_bytes;
+	tx_ring->q_vector->tx.total_packets += total_packets;
+
+	if (tx_ring->flags & IAVF_TXR_FLAGS_WB_ON_ITR) {
+		/* check to see if there are < 4 descriptors
+		 * waiting to be written back, then kick the hardware to force
+		 * them to be written back in case we stay in NAPI.
+		 * In this mode on X722 we do not enable Interrupt.
+		 */
+		unsigned int j = iavf_get_tx_pending(tx_ring, false);
+
+		if (budget &&
+		    ((j / WB_STRIDE) == 0) && (j > 0) &&
+		    !test_bit(__IAVF_VSI_DOWN, vsi->state) &&
+		    (IAVF_DESC_UNUSED(tx_ring) != tx_ring->count))
+			tx_ring->arm_wb = true;
+	}
+
+	/* notify netdev of completed buffers */
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
+	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+		     (IAVF_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		   !test_bit(__IAVF_VSI_DOWN, vsi->state)) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+			++tx_ring->tx_stats.restart_queue;
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * iavf_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
+ * @vsi: the VSI we care about
+ * @q_vector: the vector on which to enable writeback
+ *
+ **/
+static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi,
+				  struct iavf_q_vector *q_vector)
+{
+	u16 flags = q_vector->tx.ring[0].flags;
+	u32 val;
+
+	if (!(flags & IAVF_TXR_FLAGS_WB_ON_ITR))
+		return;
+
+	if (q_vector->arm_wb_state)
+		return;
+
+	val = IAVF_VFINT_DYN_CTLN1_WB_ON_ITR_MASK |
+	      IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */
+
+	wr32(&vsi->back->hw,
+	     IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), val);
+	q_vector->arm_wb_state = true;
+}
+
+static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector,
+					struct iavf_ring_container *rc)
+{
+	return &q_vector->rx == rc;
+}
+
+#define IAVF_AIM_MULTIPLIER_100G	2560
+#define IAVF_AIM_MULTIPLIER_50G		1280
+#define IAVF_AIM_MULTIPLIER_40G		1024
+#define IAVF_AIM_MULTIPLIER_20G		512
+#define IAVF_AIM_MULTIPLIER_10G		256
+#define IAVF_AIM_MULTIPLIER_1G		32
+
+static unsigned int iavf_mbps_itr_multiplier(u32 speed_mbps)
+{
+	switch (speed_mbps) {
+	case SPEED_100000:
+		return IAVF_AIM_MULTIPLIER_100G;
+	case SPEED_50000:
+		return IAVF_AIM_MULTIPLIER_50G;
+	case SPEED_40000:
+		return IAVF_AIM_MULTIPLIER_40G;
+	case SPEED_25000:
+	case SPEED_20000:
+		return IAVF_AIM_MULTIPLIER_20G;
+	case SPEED_10000:
+	default:
+		return IAVF_AIM_MULTIPLIER_10G;
+	case SPEED_1000:
+	case SPEED_100:
+		return IAVF_AIM_MULTIPLIER_1G;
+	}
+}
+
+static unsigned int
+iavf_virtchnl_itr_multiplier(enum virtchnl_link_speed speed_virtchnl)
+{
+	switch (speed_virtchnl) {
+	case VIRTCHNL_LINK_SPEED_40GB:
+		return IAVF_AIM_MULTIPLIER_40G;
+	case VIRTCHNL_LINK_SPEED_25GB:
+	case VIRTCHNL_LINK_SPEED_20GB:
+		return IAVF_AIM_MULTIPLIER_20G;
+	case VIRTCHNL_LINK_SPEED_10GB:
+	default:
+		return IAVF_AIM_MULTIPLIER_10G;
+	case VIRTCHNL_LINK_SPEED_1GB:
+	case VIRTCHNL_LINK_SPEED_100MB:
+		return IAVF_AIM_MULTIPLIER_1G;
+	}
+}
+
+static unsigned int iavf_itr_divisor(struct iavf_adapter *adapter)
+{
+	if (ADV_LINK_SUPPORT(adapter))
+		return IAVF_ITR_ADAPTIVE_MIN_INC *
+			iavf_mbps_itr_multiplier(adapter->link_speed_mbps);
+	else
+		return IAVF_ITR_ADAPTIVE_MIN_INC *
+			iavf_virtchnl_itr_multiplier(adapter->link_speed);
+}
+
+/**
+ * iavf_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @rc: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ **/
+static void iavf_update_itr(struct iavf_q_vector *q_vector,
+			    struct iavf_ring_container *rc)
+{
+	unsigned int avg_wire_size, packets, bytes, itr;
+	unsigned long next_update = jiffies;
+
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
+	if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
+		return;
+
+	/* For Rx we want to push the delay up and default to low latency.
+	 * for Tx we want to pull the delay down and default to high latency.
+	 */
+	itr = iavf_container_is_rx(q_vector, rc) ?
+	      IAVF_ITR_ADAPTIVE_MIN_USECS | IAVF_ITR_ADAPTIVE_LATENCY :
+	      IAVF_ITR_ADAPTIVE_MAX_USECS | IAVF_ITR_ADAPTIVE_LATENCY;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, rc->next_update))
+		goto clear_counts;
+
+	/* If itr_countdown is set it means we programmed an ITR within
+	 * the last 4 interrupt cycles. This has a side effect of us
+	 * potentially firing an early interrupt. In order to work around
+	 * this we need to throw out any data received for a few
+	 * interrupts following the update.
+	 */
+	if (q_vector->itr_countdown) {
+		itr = rc->target_itr;
+		goto clear_counts;
+	}
+
+	packets = rc->total_packets;
+	bytes = rc->total_bytes;
+
+	if (iavf_container_is_rx(q_vector, rc)) {
+		/* If Rx there are 1 to 4 packets and bytes are less than
+		 * 9000 assume insufficient data to use bulk rate limiting
+		 * approach unless Tx is already in bulk rate limiting. We
+		 * are likely latency driven.
+		 */
+		if (packets && packets < 4 && bytes < 9000 &&
+		    (q_vector->tx.target_itr & IAVF_ITR_ADAPTIVE_LATENCY)) {
+			itr = IAVF_ITR_ADAPTIVE_LATENCY;
+			goto adjust_by_size;
+		}
+	} else if (packets < 4) {
+		/* If we have Tx and Rx ITR maxed and Tx ITR is running in
+		 * bulk mode and we are receiving 4 or fewer packets just
+		 * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
+		 * that the Rx can relax.
+		 */
+		if (rc->target_itr == IAVF_ITR_ADAPTIVE_MAX_USECS &&
+		    (q_vector->rx.target_itr & IAVF_ITR_MASK) ==
+		     IAVF_ITR_ADAPTIVE_MAX_USECS)
+			goto clear_counts;
+	} else if (packets > 32) {
+		/* If we have processed over 32 packets in a single interrupt
+		 * for Tx assume we need to switch over to "bulk" mode.
+		 */
+		rc->target_itr &= ~IAVF_ITR_ADAPTIVE_LATENCY;
+	}
+
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * Between 4 and 56 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
+	 */
+	if (packets < 56) {
+		itr = rc->target_itr + IAVF_ITR_ADAPTIVE_MIN_INC;
+		if ((itr & IAVF_ITR_MASK) > IAVF_ITR_ADAPTIVE_MAX_USECS) {
+			itr &= IAVF_ITR_ADAPTIVE_LATENCY;
+			itr += IAVF_ITR_ADAPTIVE_MAX_USECS;
+		}
+		goto clear_counts;
+	}
+
+	if (packets <= 256) {
+		itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
+		itr &= IAVF_ITR_MASK;
+
+		/* Between 56 and 112 is our "goldilocks" zone where we are
+		 * working out "just right". Just report that our current
+		 * ITR is good for us.
+		 */
+		if (packets <= 112)
+			goto clear_counts;
+
+		/* If packet count is 128 or greater we are likely looking
+		 * at a slight overrun of the delay we want. Try halving
+		 * our delay to see if that will cut the number of packets
+		 * in half per interrupt.
+		 */
+		itr /= 2;
+		itr &= IAVF_ITR_MASK;
+		if (itr < IAVF_ITR_ADAPTIVE_MIN_USECS)
+			itr = IAVF_ITR_ADAPTIVE_MIN_USECS;
+
+		goto clear_counts;
+	}
+
+	/* The paths below assume we are dealing with a bulk ITR since
+	 * number of packets is greater than 256. We are just going to have
+	 * to compute a value and try to bring the count under control,
+	 * though for smaller packet sizes there isn't much we can do as
+	 * NAPI polling will likely be kicking in sooner rather than later.
+	 */
+	itr = IAVF_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 60) {
+		/* Start at 250k ints/sec */
+		avg_wire_size = 4096;
+	} else if (avg_wire_size <= 380) {
+		/* 250K ints/sec to 60K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 1696;
+	} else if (avg_wire_size <= 1084) {
+		/* 60K ints/sec to 36K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size <= 1980) {
+		/* 36K ints/sec to 30K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 30K ints/sec */
+		avg_wire_size = 32256;
+	}
+
+	/* If we are in low latency mode halve our delay which doubles the
+	 * rate to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & IAVF_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size /= 2;
+
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	itr += DIV_ROUND_UP(avg_wire_size,
+			    iavf_itr_divisor(q_vector->adapter)) *
+		IAVF_ITR_ADAPTIVE_MIN_INC;
+
+	if ((itr & IAVF_ITR_MASK) > IAVF_ITR_ADAPTIVE_MAX_USECS) {
+		itr &= IAVF_ITR_ADAPTIVE_LATENCY;
+		itr += IAVF_ITR_ADAPTIVE_MAX_USECS;
+	}
+
+clear_counts:
+	/* write back value */
+	rc->target_itr = itr;
+
+	/* next update should occur within next jiffy */
+	rc->next_update = next_update + 1;
+
+	rc->total_bytes = 0;
+	rc->total_packets = 0;
+}
+
+/**
+ * iavf_setup_tx_descriptors - Allocate the Tx descriptors
+ * @tx_ring: the tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ **/
+int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int bi_size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(tx_ring->tx_bi);
+	bi_size = sizeof(struct iavf_tx_buffer) * tx_ring->count;
+	tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
+	if (!tx_ring->tx_bi)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(struct iavf_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc) {
+		dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+			 tx_ring->size);
+		goto err;
+	}
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	tx_ring->tx_stats.prev_pkt_ctr = -1;
+	return 0;
+
+err:
+	kfree(tx_ring->tx_bi);
+	tx_ring->tx_bi = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * iavf_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ **/
+static void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
+{
+	unsigned long bi_size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!rx_ring->rx_bi)
+		return;
+
+	if (rx_ring->skb) {
+		dev_kfree_skb(rx_ring->skb);
+		rx_ring->skb = NULL;
+	}
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct iavf_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+
+		if (!rx_bi->page)
+			continue;
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      rx_bi->dma,
+					      rx_bi->page_offset,
+					      rx_ring->rx_buf_len,
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
+				     iavf_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IAVF_RX_DMA_ATTR);
+
+		__page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
+
+		rx_bi->page = NULL;
+		rx_bi->page_offset = 0;
+	}
+
+	bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count;
+	memset(rx_ring->rx_bi, 0, bi_size);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * iavf_free_rx_resources - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void iavf_free_rx_resources(struct iavf_ring *rx_ring)
+{
+	iavf_clean_rx_ring(rx_ring);
+	kfree(rx_ring->rx_bi);
+	rx_ring->rx_bi = NULL;
+
+	if (rx_ring->desc) {
+		dma_free_coherent(rx_ring->dev, rx_ring->size,
+				  rx_ring->desc, rx_ring->dma);
+		rx_ring->desc = NULL;
+	}
+}
+
+/**
+ * iavf_setup_rx_descriptors - Allocate Rx descriptors
+ * @rx_ring: Rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int bi_size;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(rx_ring->rx_bi);
+	bi_size = sizeof(struct iavf_rx_buffer) * rx_ring->count;
+	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
+	if (!rx_ring->rx_bi)
+		goto err;
+
+	u64_stats_init(&rx_ring->syncp);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union iavf_32byte_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+
+	if (!rx_ring->desc) {
+		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+			 rx_ring->size);
+		goto err;
+	}
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	return 0;
+err:
+	kfree(rx_ring->rx_bi);
+	rx_ring->rx_bi = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * iavf_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ **/
+static inline void iavf_release_rx_desc(struct iavf_ring *rx_ring, u32 val)
+{
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+	writel(val, rx_ring->tail);
+}
+
+/**
+ * iavf_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static inline unsigned int iavf_rx_offset(struct iavf_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IAVF_SKB_PAD : 0;
+}
+
+/**
+ * iavf_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buffer struct to modify
+ *
+ * Returns true if the page was successfully allocated or
+ * reused.
+ **/
+static bool iavf_alloc_mapped_page(struct iavf_ring *rx_ring,
+				   struct iavf_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page)) {
+		rx_ring->rx_stats.page_reuse_count++;
+		return true;
+	}
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(iavf_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 iavf_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 IAVF_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, iavf_rx_pg_order(rx_ring));
+		rx_ring->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = iavf_rx_offset(rx_ring);
+
+	/* initialize pagecnt_bias to 1 representing we fully own page */
+	bi->pagecnt_bias = 1;
+
+	return true;
+}
+
+/**
+ * iavf_receive_skb - Send a completed packet up the stack
+ * @rx_ring:  rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: vlan tag for packet
+ **/
+static void iavf_receive_skb(struct iavf_ring *rx_ring,
+			     struct sk_buff *skb, u16 vlan_tag)
+{
+	struct iavf_q_vector *q_vector = rx_ring->q_vector;
+
+	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (vlan_tag & VLAN_VID_MASK))
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+	else if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_STAG_RX) &&
+		 vlan_tag & VLAN_VID_MASK)
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag);
+
+	napi_gro_receive(&q_vector->napi, skb);
+}
+
+/**
+ * iavf_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail
+ **/
+bool iavf_alloc_rx_buffers(struct iavf_ring *rx_ring, u16 cleaned_count)
+{
+	u16 ntu = rx_ring->next_to_use;
+	union iavf_rx_desc *rx_desc;
+	struct iavf_rx_buffer *bi;
+
+	/* do nothing if no valid netdev defined */
+	if (!rx_ring->netdev || !cleaned_count)
+		return false;
+
+	rx_desc = IAVF_RX_DESC(rx_ring, ntu);
+	bi = &rx_ring->rx_bi[ntu];
+
+	do {
+		if (!iavf_alloc_mapped_page(rx_ring, bi))
+			goto no_buffers;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 rx_ring->rx_buf_len,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		ntu++;
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = IAVF_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_bi;
+			ntu = 0;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.qword1.status_error_len = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	if (rx_ring->next_to_use != ntu)
+		iavf_release_rx_desc(rx_ring, ntu);
+
+	return false;
+
+no_buffers:
+	if (rx_ring->next_to_use != ntu)
+		iavf_release_rx_desc(rx_ring, ntu);
+
+	/* make sure to come back via polling to try again after
+	 * allocation failure
+	 */
+	return true;
+}
+
+/**
+ * iavf_rx_checksum - Indicate in skb if hw indicated a good cksum
+ * @vsi: the VSI we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ **/
+static inline void iavf_rx_checksum(struct iavf_vsi *vsi,
+				    struct sk_buff *skb,
+				    union iavf_rx_desc *rx_desc)
+{
+	struct iavf_rx_ptype_decoded decoded;
+	u32 rx_error, rx_status;
+	bool ipv4, ipv6;
+	u8 ptype;
+	u64 qword;
+
+	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+	ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >> IAVF_RXD_QW1_PTYPE_SHIFT;
+	rx_error = (qword & IAVF_RXD_QW1_ERROR_MASK) >>
+		   IAVF_RXD_QW1_ERROR_SHIFT;
+	rx_status = (qword & IAVF_RXD_QW1_STATUS_MASK) >>
+		    IAVF_RXD_QW1_STATUS_SHIFT;
+	decoded = decode_rx_desc_ptype(ptype);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_checksum_none_assert(skb);
+
+	/* Rx csum enabled and ip headers found? */
+	if (!(vsi->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* did the hardware decode the packet and checksum? */
+	if (!(rx_status & BIT(IAVF_RX_DESC_STATUS_L3L4P_SHIFT)))
+		return;
+
+	/* both known and outer_ip must be set for the below code to work */
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == IAVF_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 &&
+	    (rx_error & (BIT(IAVF_RX_DESC_ERROR_IPE_SHIFT) |
+			 BIT(IAVF_RX_DESC_ERROR_EIPE_SHIFT))))
+		goto checksum_fail;
+
+	/* likely incorrect csum if alternate IP extension headers found */
+	if (ipv6 &&
+	    rx_status & BIT(IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT))
+		/* don't increment checksum err here, non-fatal err */
+		return;
+
+	/* there was some L4 error, count error and punt packet to the stack */
+	if (rx_error & BIT(IAVF_RX_DESC_ERROR_L4E_SHIFT))
+		goto checksum_fail;
+
+	/* handle packets that were not able to be checksummed due
+	 * to arrival speed, in this case the stack can compute
+	 * the csum.
+	 */
+	if (rx_error & BIT(IAVF_RX_DESC_ERROR_PPRS_SHIFT))
+		return;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case IAVF_RX_PTYPE_INNER_PROT_TCP:
+	case IAVF_RX_PTYPE_INNER_PROT_UDP:
+	case IAVF_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		fallthrough;
+	default:
+		break;
+	}
+
+	return;
+
+checksum_fail:
+	vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * iavf_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ **/
+static inline int iavf_ptype_to_htype(u8 ptype)
+{
+	struct iavf_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
+
+	if (!decoded.known)
+		return PKT_HASH_TYPE_NONE;
+
+	if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
+	    decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+		return PKT_HASH_TYPE_L4;
+	else if (decoded.outer_ip == IAVF_RX_PTYPE_OUTER_IP &&
+		 decoded.payload_layer == IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+		return PKT_HASH_TYPE_L3;
+	else
+		return PKT_HASH_TYPE_L2;
+}
+
+/**
+ * iavf_rx_hash - set the hash value in the skb
+ * @ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: skb currently being received and modified
+ * @rx_ptype: Rx packet type
+ **/
+static inline void iavf_rx_hash(struct iavf_ring *ring,
+				union iavf_rx_desc *rx_desc,
+				struct sk_buff *skb,
+				u8 rx_ptype)
+{
+	u32 hash;
+	const __le64 rss_mask =
+		cpu_to_le64((u64)IAVF_RX_DESC_FLTSTAT_RSS_HASH <<
+			    IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT);
+
+	if (!(ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
+		hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
+		skb_set_hash(skb, hash, iavf_ptype_to_htype(rx_ptype));
+	}
+}
+
+/**
+ * iavf_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @rx_ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ **/
+static inline
+void iavf_process_skb_fields(struct iavf_ring *rx_ring,
+			     union iavf_rx_desc *rx_desc, struct sk_buff *skb,
+			     u8 rx_ptype)
+{
+	iavf_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
+
+	iavf_rx_checksum(rx_ring->vsi, skb, rx_desc);
+
+	skb_record_rx_queue(skb, rx_ring->queue_index);
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+/**
+ * iavf_cleanup_headers - Correct empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being fixed
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool iavf_cleanup_headers(struct iavf_ring *rx_ring, struct sk_buff *skb)
+{
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * iavf_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void iavf_reuse_rx_page(struct iavf_ring *rx_ring,
+			       struct iavf_rx_buffer *old_buff)
+{
+	struct iavf_rx_buffer *new_buff;
+	u16 nta = rx_ring->next_to_alloc;
+
+	new_buff = &rx_ring->rx_bi[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	new_buff->dma		= old_buff->dma;
+	new_buff->page		= old_buff->page;
+	new_buff->page_offset	= old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+}
+
+/**
+ * iavf_can_reuse_rx_page - Determine if this page can be reused by
+ * the adapter for another receive
+ *
+ * @rx_buffer: buffer containing the page
+ *
+ * If page is reusable, rx_buffer->page_offset is adjusted to point to
+ * an unused region in the page.
+ *
+ * For small pages, @truesize will be a constant value, half the size
+ * of the memory at page.  We'll attempt to alternate between high and
+ * low halves of the page, with one half ready for use by the hardware
+ * and the other half being consumed by the stack.  We use the page
+ * ref count to determine whether the stack has finished consuming the
+ * portion of this page that was passed up with a previous packet.  If
+ * the page ref count is >1, we'll assume the "other" half page is
+ * still busy, and this page cannot be reused.
+ *
+ * For larger pages, @truesize will be the actual space used by the
+ * received packet (adjusted upward to an even multiple of the cache
+ * line size).  This will advance through the page by the amount
+ * actually consumed by the received packets while there is still
+ * space for a buffer.  Each region of larger pages will be used at
+ * most once, after which the page will not be reused.
+ *
+ * In either case, if the page is reusable its refcount is increased.
+ **/
+static bool iavf_can_reuse_rx_page(struct iavf_rx_buffer *rx_buffer)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* Is any reuse possible? */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((page_count(page) - pagecnt_bias) > 1))
+		return false;
+#else
+#define IAVF_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IAVF_RXBUFFER_2048)
+	if (rx_buffer->page_offset > IAVF_LAST_OFFSET)
+		return false;
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(!pagecnt_bias)) {
+		page_ref_add(page, USHRT_MAX);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * iavf_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: packet length from rx_desc
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * It will just attach the page as a frag to the skb.
+ *
+ * The function will then update the page offset.
+ **/
+static void iavf_add_rx_frag(struct iavf_ring *rx_ring,
+			     struct iavf_rx_buffer *rx_buffer,
+			     struct sk_buff *skb,
+			     unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring));
+#endif
+
+	if (!size)
+		return;
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+
+	/* page is being used so we must update the page offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+/**
+ * iavf_get_rx_buffer - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @size: size of buffer to add to skb
+ *
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
+ */
+static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring,
+						 const unsigned int size)
+{
+	struct iavf_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+	prefetchw(rx_buffer->page);
+	if (!size)
+		return rx_buffer;
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	/* We have pulled a buffer for use, so decrement pagecnt_bias */
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+/**
+ * iavf_construct_skb - Allocate skb and populate it
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: rx buffer to pull data from
+ * @size: size of buffer to add to skb
+ *
+ * This function allocates an skb.  It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring,
+					  struct iavf_rx_buffer *rx_buffer,
+					  unsigned int size)
+{
+	void *va;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(size);
+#endif
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	if (!rx_buffer)
+		return NULL;
+	/* prefetch first cache line of first page */
+	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	net_prefetch(va);
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+			       IAVF_RX_HDR_SIZE,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IAVF_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				rx_buffer->page_offset + headlen,
+				size, truesize);
+
+		/* buffer is used by skb, update page_offset */
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
+	} else {
+		/* buffer is unused, reset bias back to rx_buffer */
+		rx_buffer->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+/**
+ * iavf_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buffer: Rx buffer to pull data from
+ * @size: size of buffer to add to skb
+ *
+ * This function builds an skb around an existing Rx buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead.
+ */
+static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring,
+				      struct iavf_rx_buffer *rx_buffer,
+				      unsigned int size)
+{
+	void *va;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(IAVF_SKB_PAD + size);
+#endif
+	struct sk_buff *skb;
+
+	if (!rx_buffer || !size)
+		return NULL;
+	/* prefetch first cache line of first page */
+	va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	net_prefetch(va);
+
+	/* build an skb around the page buffer */
+	skb = napi_build_skb(va - IAVF_SKB_PAD, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, IAVF_SKB_PAD);
+	__skb_put(skb, size);
+
+	/* buffer is used by skb, update page_offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
+
+/**
+ * iavf_put_rx_buffer - Clean up used buffer and either recycle or free
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: rx buffer to pull data from
+ *
+ * This function will clean up the contents of the rx_buffer.  It will
+ * either recycle the buffer or unmap it and free the associated resources.
+ */
+static void iavf_put_rx_buffer(struct iavf_ring *rx_ring,
+			       struct iavf_rx_buffer *rx_buffer)
+{
+	if (!rx_buffer)
+		return;
+
+	if (iavf_can_reuse_rx_page(rx_buffer)) {
+		/* hand second half of page back to the ring */
+		iavf_reuse_rx_page(rx_ring, rx_buffer);
+		rx_ring->rx_stats.page_reuse_count++;
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     iavf_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE, IAVF_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of buffer_info */
+	rx_buffer->page = NULL;
+}
+
+/**
+ * iavf_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool iavf_is_non_eop(struct iavf_ring *rx_ring,
+			    union iavf_rx_desc *rx_desc,
+			    struct sk_buff *skb)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IAVF_RX_DESC(rx_ring, ntc));
+
+	/* if we are the last buffer then there is nothing else to do */
+#define IAVF_RXD_EOF BIT(IAVF_RX_DESC_STATUS_EOF_SHIFT)
+	if (likely(iavf_test_staterr(rx_desc, IAVF_RXD_EOF)))
+		return false;
+
+	rx_ring->rx_stats.non_eop_descs++;
+
+	return true;
+}
+
+/**
+ * iavf_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing.  The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ **/
+static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	struct sk_buff *skb = rx_ring->skb;
+	u16 cleaned_count = IAVF_DESC_UNUSED(rx_ring);
+	bool failure = false;
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		struct iavf_rx_buffer *rx_buffer;
+		union iavf_rx_desc *rx_desc;
+		unsigned int size;
+		u16 vlan_tag = 0;
+		u8 rx_ptype;
+		u64 qword;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IAVF_RX_BUFFER_WRITE) {
+			failure = failure ||
+				  iavf_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IAVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
+		/* status_error_len will always be zero for unused descriptors
+		 * because it's cleared in cleanup, and overlaps with hdr_addr
+		 * which is always zero because packet split isn't used, if the
+		 * hardware wrote DD then the length will be non-zero
+		 */
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT)
+		if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD))
+			break;
+
+		size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >>
+		       IAVF_RXD_QW1_LENGTH_PBUF_SHIFT;
+
+		iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb);
+		rx_buffer = iavf_get_rx_buffer(rx_ring, size);
+
+		/* retrieve a buffer from the ring */
+		if (skb)
+			iavf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else if (ring_uses_build_skb(rx_ring))
+			skb = iavf_build_skb(rx_ring, rx_buffer, size);
+		else
+			skb = iavf_construct_skb(rx_ring, rx_buffer, size);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_buff_failed++;
+			if (rx_buffer && size)
+				rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		iavf_put_rx_buffer(rx_ring, rx_buffer);
+		cleaned_count++;
+
+		if (iavf_is_non_eop(rx_ring, rx_desc, skb))
+			continue;
+
+		/* ERR_MASK will only have valid bits if EOP set, and
+		 * what we are doing here is actually checking
+		 * IAVF_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
+		 * the error field
+		 */
+		if (unlikely(iavf_test_staterr(rx_desc, BIT(IAVF_RXD_QW1_ERROR_SHIFT)))) {
+			dev_kfree_skb_any(skb);
+			skb = NULL;
+			continue;
+		}
+
+		if (iavf_cleanup_headers(rx_ring, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+		rx_ptype = (qword & IAVF_RXD_QW1_PTYPE_MASK) >>
+			   IAVF_RXD_QW1_PTYPE_SHIFT;
+
+		/* populate checksum, VLAN, and protocol */
+		iavf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+
+		if (qword & BIT(IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT) &&
+		    rx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1)
+			vlan_tag = le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1);
+		if (rx_desc->wb.qword2.ext_status &
+		    cpu_to_le16(BIT(IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) &&
+		    rx_ring->flags & IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2)
+			vlan_tag = le16_to_cpu(rx_desc->wb.qword2.l2tag2_2);
+
+		iavf_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb);
+		iavf_receive_skb(rx_ring, skb, vlan_tag);
+		skb = NULL;
+
+		/* update budget accounting */
+		total_rx_packets++;
+	}
+
+	rx_ring->skb = skb;
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	rx_ring->q_vector->rx.total_packets += total_rx_packets;
+	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return failure ? budget : (int)total_rx_packets;
+}
+
+static inline u32 iavf_buildreg_itr(const int type, u16 itr)
+{
+	u32 val;
+
+	/* We don't bother with setting the CLEARPBA bit as the data sheet
+	 * points out doing so is "meaningless since it was already
+	 * auto-cleared". The auto-clearing happens when the interrupt is
+	 * asserted.
+	 *
+	 * Hardware errata 28 for also indicates that writing to a
+	 * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear
+	 * an event in the PBA anyway so we need to rely on the automask
+	 * to hold pending events for us until the interrupt is re-enabled
+	 *
+	 * The itr value is reported in microseconds, and the register
+	 * value is recorded in 2 microsecond units. For this reason we
+	 * only need to shift by the interval shift - 1 instead of the
+	 * full value.
+	 */
+	itr &= IAVF_ITR_MASK;
+
+	val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+	      (type << IAVF_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
+	      (itr << (IAVF_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1));
+
+	return val;
+}
+
+/* a small macro to shorten up some long lines */
+#define INTREG IAVF_VFINT_DYN_CTLN1
+
+/* The act of updating the ITR will cause it to immediately trigger. In order
+ * to prevent this from throwing off adaptive update statistics we defer the
+ * update so that it can only happen so often. So after either Tx or Rx are
+ * updated we make the adaptive scheme wait until either the ITR completely
+ * expires via the next_update expiration or we have been through at least
+ * 3 interrupts.
+ */
+#define ITR_COUNTDOWN_START 3
+
+/**
+ * iavf_update_enable_itr - Update itr and re-enable MSIX interrupt
+ * @vsi: the VSI we care about
+ * @q_vector: q_vector for which itr is being updated and interrupt enabled
+ *
+ **/
+static inline void iavf_update_enable_itr(struct iavf_vsi *vsi,
+					  struct iavf_q_vector *q_vector)
+{
+	struct iavf_hw *hw = &vsi->back->hw;
+	u32 intval;
+
+	/* These will do nothing if dynamic updates are not enabled */
+	iavf_update_itr(q_vector, &q_vector->tx);
+	iavf_update_itr(q_vector, &q_vector->rx);
+
+	/* This block of logic allows us to get away with only updating
+	 * one ITR value with each interrupt. The idea is to perform a
+	 * pseudo-lazy update with the following criteria.
+	 *
+	 * 1. Rx is given higher priority than Tx if both are in same state
+	 * 2. If we must reduce an ITR that is given highest priority.
+	 * 3. We then give priority to increasing ITR based on amount.
+	 */
+	if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
+		/* Rx ITR needs to be reduced, this is highest priority */
+		intval = iavf_buildreg_itr(IAVF_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
+		   ((q_vector->rx.target_itr - q_vector->rx.current_itr) <
+		    (q_vector->tx.target_itr - q_vector->tx.current_itr))) {
+		/* Tx ITR needs to be reduced, this is second priority
+		 * Tx ITR needs to be increased more than Rx, fourth priority
+		 */
+		intval = iavf_buildreg_itr(IAVF_TX_ITR,
+					   q_vector->tx.target_itr);
+		q_vector->tx.current_itr = q_vector->tx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
+		/* Rx ITR needs to be increased, third priority */
+		intval = iavf_buildreg_itr(IAVF_RX_ITR,
+					   q_vector->rx.target_itr);
+		q_vector->rx.current_itr = q_vector->rx.target_itr;
+		q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	} else {
+		/* No ITR update, lowest priority */
+		intval = iavf_buildreg_itr(IAVF_ITR_NONE, 0);
+		if (q_vector->itr_countdown)
+			q_vector->itr_countdown--;
+	}
+
+	if (!test_bit(__IAVF_VSI_DOWN, vsi->state))
+		wr32(hw, INTREG(q_vector->reg_idx), intval);
+}
+
+/**
+ * iavf_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ **/
+int iavf_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct iavf_q_vector *q_vector =
+			       container_of(napi, struct iavf_q_vector, napi);
+	struct iavf_vsi *vsi = q_vector->vsi;
+	struct iavf_ring *ring;
+	bool clean_complete = true;
+	bool arm_wb = false;
+	int budget_per_ring;
+	int work_done = 0;
+
+	if (test_bit(__IAVF_VSI_DOWN, vsi->state)) {
+		napi_complete(napi);
+		return 0;
+	}
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	iavf_for_each_ring(ring, q_vector->tx) {
+		if (!iavf_clean_tx_irq(vsi, ring, budget)) {
+			clean_complete = false;
+			continue;
+		}
+		arm_wb |= ring->arm_wb;
+		ring->arm_wb = false;
+	}
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (budget <= 0)
+		goto tx_only;
+
+	/* We attempt to distribute budget to each Rx queue fairly, but don't
+	 * allow the budget to go below 1 because that would exit polling early.
+	 */
+	budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
+
+	iavf_for_each_ring(ring, q_vector->rx) {
+		int cleaned = iavf_clean_rx_irq(ring, budget_per_ring);
+
+		work_done += cleaned;
+		/* if we clean as many as budgeted, we must not be done */
+		if (cleaned >= budget_per_ring)
+			clean_complete = false;
+	}
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete) {
+		int cpu_id = smp_processor_id();
+
+		/* It is possible that the interrupt affinity has changed but,
+		 * if the cpu is pegged at 100%, polling will never exit while
+		 * traffic continues and the interrupt will be stuck on this
+		 * cpu.  We check to make sure affinity is correct before we
+		 * continue to poll, otherwise we must stop polling so the
+		 * interrupt can move to the correct cpu.
+		 */
+		if (!cpumask_test_cpu(cpu_id, &q_vector->affinity_mask)) {
+			/* Tell napi that we are done polling */
+			napi_complete_done(napi, work_done);
+
+			/* Force an interrupt */
+			iavf_force_wb(vsi, q_vector);
+
+			/* Return budget-1 so that polling stops */
+			return budget - 1;
+		}
+tx_only:
+		if (arm_wb) {
+			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+			iavf_enable_wb_on_itr(vsi, q_vector);
+		}
+		return budget;
+	}
+
+	if (vsi->back->flags & IAVF_TXR_FLAGS_WB_ON_ITR)
+		q_vector->arm_wb_state = false;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		iavf_update_enable_itr(vsi, q_vector);
+
+	return min_t(int, work_done, budget - 1);
+}
+
+/**
+ * iavf_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ * @flags:   the tx flags to be set
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ *
+ * Returns error code indicate the frame should be dropped upon error and the
+ * otherwise  returns 0 to indicate the flags has been set properly.
+ **/
+static void iavf_tx_prepare_vlan_flags(struct sk_buff *skb,
+				       struct iavf_ring *tx_ring, u32 *flags)
+{
+	u32  tx_flags = 0;
+
+
+	/* stack will only request hardware VLAN insertion offload for protocols
+	 * that the driver supports and has enabled
+	 */
+	if (!skb_vlan_tag_present(skb))
+		return;
+
+	tx_flags |= skb_vlan_tag_get(skb) << IAVF_TX_FLAGS_VLAN_SHIFT;
+	if (tx_ring->flags & IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2) {
+		tx_flags |= IAVF_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
+	} else if (tx_ring->flags & IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1) {
+		tx_flags |= IAVF_TX_FLAGS_HW_VLAN;
+	} else {
+		dev_dbg(tx_ring->dev, "Unsupported Tx VLAN tag location requested\n");
+		return;
+	}
+
+	*flags = tx_flags;
+}
+
+/**
+ * iavf_tso - set up the tso context descriptor
+ * @first:    pointer to first Tx buffer for xmit
+ * @hdr_len:  ptr to the size of the packet header
+ * @cd_type_cmd_tso_mss: Quad Word 1
+ *
+ * Returns 0 if no TSO can happen, 1 if tso is going, or error
+ **/
+static int iavf_tso(struct iavf_tx_buffer *first, u8 *hdr_len,
+		    u64 *cd_type_cmd_tso_mss)
+{
+	struct sk_buff *skb = first->skb;
+	u64 cd_cmd, cd_tso_len, cd_mss;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	u16 gso_segs, gso_size;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		ip.v4->tot_len = 0;
+		ip.v4->check = 0;
+	} else {
+		ip.v6->payload_len = 0;
+	}
+
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+					 SKB_GSO_GRE_CSUM |
+					 SKB_GSO_IPXIP4 |
+					 SKB_GSO_IPXIP6 |
+					 SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)) {
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
+			l4.udp->len = 0;
+
+			/* determine offset of outer transport header */
+			l4_offset = l4.hdr - skb->data;
+
+			/* remove payload length from outer checksum */
+			paylen = skb->len - l4_offset;
+			csum_replace_by_diff(&l4.udp->check,
+					     (__force __wsum)htonl(paylen));
+		}
+
+		/* reset pointers to inner headers */
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+
+		/* initialize inner IP header fields */
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			ip.v4->check = 0;
+		} else {
+			ip.v6->payload_len = 0;
+		}
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of UDP segmentation header */
+		*hdr_len = (u8)sizeof(l4.udp) + l4_offset;
+	} else {
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of TCP segmentation header */
+		*hdr_len = (u8)((l4.tcp->doff * 4) + l4_offset);
+	}
+
+	/* pull values out of skb_shinfo */
+	gso_size = skb_shinfo(skb)->gso_size;
+	gso_segs = skb_shinfo(skb)->gso_segs;
+
+	/* update GSO size and bytecount with header size */
+	first->gso_segs = gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* find the field values */
+	cd_cmd = IAVF_TX_CTX_DESC_TSO;
+	cd_tso_len = skb->len - *hdr_len;
+	cd_mss = gso_size;
+	*cd_type_cmd_tso_mss |= (cd_cmd << IAVF_TXD_CTX_QW1_CMD_SHIFT) |
+				(cd_tso_len << IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+				(cd_mss << IAVF_TXD_CTX_QW1_MSS_SHIFT);
+	return 1;
+}
+
+/**
+ * iavf_tx_enable_csum - Enable Tx checksum offloads
+ * @skb: send buffer
+ * @tx_flags: pointer to Tx flags currently set
+ * @td_cmd: Tx descriptor command bits to set
+ * @td_offset: Tx descriptor header offsets to set
+ * @tx_ring: Tx descriptor ring
+ * @cd_tunneling: ptr to context desc bits
+ **/
+static int iavf_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
+			       u32 *td_cmd, u32 *td_offset,
+			       struct iavf_ring *tx_ring,
+			       u32 *cd_tunneling)
+{
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	unsigned char *exthdr;
+	u32 offset, cmd = 0;
+	__be16 frag_off;
+	u8 l4_proto = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_transport_header(skb);
+
+	/* compute outer L2 header size */
+	offset = ((ip.hdr - skb->data) / 2) << IAVF_TX_DESC_LENGTH_MACLEN_SHIFT;
+
+	if (skb->encapsulation) {
+		u32 tunnel = 0;
+		/* define outer network header type */
+		if (*tx_flags & IAVF_TX_FLAGS_IPV4) {
+			tunnel |= (*tx_flags & IAVF_TX_FLAGS_TSO) ?
+				  IAVF_TX_CTX_EXT_IP_IPV4 :
+				  IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+
+			l4_proto = ip.v4->protocol;
+		} else if (*tx_flags & IAVF_TX_FLAGS_IPV6) {
+			tunnel |= IAVF_TX_CTX_EXT_IP_IPV6;
+
+			exthdr = ip.hdr + sizeof(*ip.v6);
+			l4_proto = ip.v6->nexthdr;
+			if (l4.hdr != exthdr)
+				ipv6_skip_exthdr(skb, exthdr - skb->data,
+						 &l4_proto, &frag_off);
+		}
+
+		/* define outer transport */
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			tunnel |= IAVF_TXD_CTX_UDP_TUNNELING;
+			*tx_flags |= IAVF_TX_FLAGS_VXLAN_TUNNEL;
+			break;
+		case IPPROTO_GRE:
+			tunnel |= IAVF_TXD_CTX_GRE_TUNNELING;
+			*tx_flags |= IAVF_TX_FLAGS_VXLAN_TUNNEL;
+			break;
+		case IPPROTO_IPIP:
+		case IPPROTO_IPV6:
+			*tx_flags |= IAVF_TX_FLAGS_VXLAN_TUNNEL;
+			l4.hdr = skb_inner_network_header(skb);
+			break;
+		default:
+			if (*tx_flags & IAVF_TX_FLAGS_TSO)
+				return -1;
+
+			skb_checksum_help(skb);
+			return 0;
+		}
+
+		/* compute outer L3 header size */
+		tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+			  IAVF_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
+
+		/* switch IP header pointer from outer to inner header */
+		ip.hdr = skb_inner_network_header(skb);
+
+		/* compute tunnel header size */
+		tunnel |= ((ip.hdr - l4.hdr) / 2) <<
+			  IAVF_TXD_CTX_QW0_NATLEN_SHIFT;
+
+		/* indicate if we need to offload outer UDP header */
+		if ((*tx_flags & IAVF_TX_FLAGS_TSO) &&
+		    !(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+			tunnel |= IAVF_TXD_CTX_QW0_L4T_CS_MASK;
+
+		/* record tunnel offload values */
+		*cd_tunneling |= tunnel;
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+		l4_proto = 0;
+
+		/* reset type as we transition from outer to inner headers */
+		*tx_flags &= ~(IAVF_TX_FLAGS_IPV4 | IAVF_TX_FLAGS_IPV6);
+		if (ip.v4->version == 4)
+			*tx_flags |= IAVF_TX_FLAGS_IPV4;
+		if (ip.v6->version == 6)
+			*tx_flags |= IAVF_TX_FLAGS_IPV6;
+	}
+
+	/* Enable IP checksum offloads */
+	if (*tx_flags & IAVF_TX_FLAGS_IPV4) {
+		l4_proto = ip.v4->protocol;
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		cmd |= (*tx_flags & IAVF_TX_FLAGS_TSO) ?
+		       IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM :
+		       IAVF_TX_DESC_CMD_IIPT_IPV4;
+	} else if (*tx_flags & IAVF_TX_FLAGS_IPV6) {
+		cmd |= IAVF_TX_DESC_CMD_IIPT_IPV6;
+
+		exthdr = ip.hdr + sizeof(*ip.v6);
+		l4_proto = ip.v6->nexthdr;
+		if (l4.hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data,
+					 &l4_proto, &frag_off);
+	}
+
+	/* compute inner L3 header size */
+	offset |= ((l4.hdr - ip.hdr) / 4) << IAVF_TX_DESC_LENGTH_IPLEN_SHIFT;
+
+	/* Enable L4 checksum offloads */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		/* enable checksum offloads */
+		cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_TCP;
+		offset |= l4.tcp->doff << IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	case IPPROTO_SCTP:
+		/* enable SCTP checksum offload */
+		cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_SCTP;
+		offset |= (sizeof(struct sctphdr) >> 2) <<
+			  IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	case IPPROTO_UDP:
+		/* enable UDP checksum offload */
+		cmd |= IAVF_TX_DESC_CMD_L4T_EOFT_UDP;
+		offset |= (sizeof(struct udphdr) >> 2) <<
+			  IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		break;
+	default:
+		if (*tx_flags & IAVF_TX_FLAGS_TSO)
+			return -1;
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	*td_cmd |= cmd;
+	*td_offset |= offset;
+
+	return 1;
+}
+
+/**
+ * iavf_create_tx_ctx - Build the Tx context descriptor
+ * @tx_ring:  ring to create the descriptor on
+ * @cd_type_cmd_tso_mss: Quad Word 1
+ * @cd_tunneling: Quad Word 0 - bits 0-31
+ * @cd_l2tag2: Quad Word 0 - bits 32-63
+ **/
+static void iavf_create_tx_ctx(struct iavf_ring *tx_ring,
+			       const u64 cd_type_cmd_tso_mss,
+			       const u32 cd_tunneling, const u32 cd_l2tag2)
+{
+	struct iavf_tx_context_desc *context_desc;
+	int i = tx_ring->next_to_use;
+
+	if ((cd_type_cmd_tso_mss == IAVF_TX_DESC_DTYPE_CONTEXT) &&
+	    !cd_tunneling && !cd_l2tag2)
+		return;
+
+	/* grab the next descriptor */
+	context_desc = IAVF_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* cpu_to_le32 and assign to struct fields */
+	context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
+	context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
+	context_desc->rsvd = cpu_to_le16(0);
+	context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
+}
+
+/**
+ * __iavf_chk_linearize - Check if there are more than 8 buffers per packet
+ * @skb:      send buffer
+ *
+ * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
+ **/
+bool __iavf_chk_linearize(struct sk_buff *skb)
+{
+	const skb_frag_t *frag, *stale;
+	int nr_frags, sum;
+
+	/* no need to check if number of frags is less than 7 */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags < (IAVF_MAX_BUFFER_TXD - 1))
+		return false;
+
+	/* We need to walk through the list and validate that each group
+	 * of 6 fragments totals at least gso_size.
+	 */
+	nr_frags -= IAVF_MAX_BUFFER_TXD - 2;
+	frag = &skb_shinfo(skb)->frags[0];
+
+	/* Initialize size to the negative value of gso_size minus 1.  We
+	 * use this as the worst case scenerio in which the frag ahead
+	 * of us only provides one byte which is why we are limited to 6
+	 * descriptors for a single transmit as the header and previous
+	 * fragment are already consuming 2 descriptors.
+	 */
+	sum = 1 - skb_shinfo(skb)->gso_size;
+
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+
+	/* Walk through fragments adding latest fragment, testing it, and
+	 * then removing stale fragments from the sum.
+	 */
+	for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+		int stale_size = skb_frag_size(stale);
+
+		sum += skb_frag_size(frag++);
+
+		/* The stale fragment may present us with a smaller
+		 * descriptor than the actual fragment size. To account
+		 * for that we need to remove all the data on the front and
+		 * figure out what the remainder would be in the last
+		 * descriptor associated with the fragment.
+		 */
+		if (stale_size > IAVF_MAX_DATA_PER_TXD) {
+			int align_pad = -(skb_frag_off(stale)) &
+					(IAVF_MAX_READ_REQ_SIZE - 1);
+
+			sum -= align_pad;
+			stale_size -= align_pad;
+
+			do {
+				sum -= IAVF_MAX_DATA_PER_TXD_ALIGNED;
+				stale_size -= IAVF_MAX_DATA_PER_TXD_ALIGNED;
+			} while (stale_size > IAVF_MAX_DATA_PER_TXD);
+		}
+
+		/* if sum is negative we failed to make sufficient progress */
+		if (sum < 0)
+			return true;
+
+		if (!nr_frags--)
+			break;
+
+		sum -= stale_size;
+	}
+
+	return false;
+}
+
+/**
+ * __iavf_maybe_stop_tx - 2nd level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ **/
+int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	/* Memory barrier before checking head and tail */
+	smp_mb();
+
+	/* Check again in a case another CPU has just made room available. */
+	if (likely(IAVF_DESC_UNUSED(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_queue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	++tx_ring->tx_stats.restart_queue;
+	return 0;
+}
+
+/**
+ * iavf_tx_map - Build the Tx descriptor
+ * @tx_ring:  ring to send buffer on
+ * @skb:      send buffer
+ * @first:    first buffer info buffer to use
+ * @tx_flags: collected send information
+ * @hdr_len:  size of the packet header
+ * @td_cmd:   the command field in the descriptor
+ * @td_offset: offset for checksum or crc
+ **/
+static inline void iavf_tx_map(struct iavf_ring *tx_ring, struct sk_buff *skb,
+			       struct iavf_tx_buffer *first, u32 tx_flags,
+			       const u8 hdr_len, u32 td_cmd, u32 td_offset)
+{
+	unsigned int data_len = skb->data_len;
+	unsigned int size = skb_headlen(skb);
+	skb_frag_t *frag;
+	struct iavf_tx_buffer *tx_bi;
+	struct iavf_tx_desc *tx_desc;
+	u16 i = tx_ring->next_to_use;
+	u32 td_tag = 0;
+	dma_addr_t dma;
+
+	if (tx_flags & IAVF_TX_FLAGS_HW_VLAN) {
+		td_cmd |= IAVF_TX_DESC_CMD_IL2TAG1;
+		td_tag = (tx_flags & IAVF_TX_FLAGS_VLAN_MASK) >>
+			 IAVF_TX_FLAGS_VLAN_SHIFT;
+	}
+
+	first->tx_flags = tx_flags;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_desc = IAVF_TX_DESC(tx_ring, i);
+	tx_bi = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = IAVF_MAX_DATA_PER_TXD_ALIGNED;
+
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_bi, len, size);
+		dma_unmap_addr_set(tx_bi, dma, dma);
+
+		/* align size to end of page */
+		max_data += -dma & (IAVF_MAX_READ_REQ_SIZE - 1);
+		tx_desc->buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IAVF_MAX_DATA_PER_TXD)) {
+			tx_desc->cmd_type_offset_bsz =
+				build_ctob(td_cmd, td_offset,
+					   max_data, td_tag);
+
+			tx_desc++;
+			i++;
+
+			if (i == tx_ring->count) {
+				tx_desc = IAVF_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+
+			dma += max_data;
+			size -= max_data;
+
+			max_data = IAVF_MAX_DATA_PER_TXD_ALIGNED;
+			tx_desc->buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
+							  size, td_tag);
+
+		tx_desc++;
+		i++;
+
+		if (i == tx_ring->count) {
+			tx_desc = IAVF_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_bi = &tx_ring->tx_bi[i];
+	}
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	iavf_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* write last descriptor with RS and EOP bits */
+	td_cmd |= IAVF_TXD_CMD;
+	tx_desc->cmd_type_offset_bsz =
+			build_ctob(td_cmd, td_offset, size, td_tag);
+
+	skb_tx_timestamp(skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	/* notify HW of packet */
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+		writel(i, tx_ring->tail);
+	}
+
+	return;
+
+dma_error:
+	dev_info(tx_ring->dev, "TX DMA map failed\n");
+
+	/* clear dma mappings for failed tx_bi map */
+	for (;;) {
+		tx_bi = &tx_ring->tx_bi[i];
+		iavf_unmap_and_free_tx_resource(tx_ring, tx_bi);
+		if (tx_bi == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+}
+
+/**
+ * iavf_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ **/
+static netdev_tx_t iavf_xmit_frame_ring(struct sk_buff *skb,
+					struct iavf_ring *tx_ring)
+{
+	u64 cd_type_cmd_tso_mss = IAVF_TX_DESC_DTYPE_CONTEXT;
+	u32 cd_tunneling = 0, cd_l2tag2 = 0;
+	struct iavf_tx_buffer *first;
+	u32 td_offset = 0;
+	u32 tx_flags = 0;
+	__be16 protocol;
+	u32 td_cmd = 0;
+	u8 hdr_len = 0;
+	int tso, count;
+
+	/* prefetch the data, we'll need it later */
+	prefetch(skb->data);
+
+	iavf_trace(xmit_frame_ring, skb, tx_ring);
+
+	count = iavf_xmit_descriptor_count(skb);
+	if (iavf_chk_linearize(skb, count)) {
+		if (__skb_linearize(skb)) {
+			dev_kfree_skb_any(skb);
+			return NETDEV_TX_OK;
+		}
+		count = iavf_txd_use_count(skb->len);
+		tx_ring->tx_stats.tx_linearize++;
+	}
+
+	/* need: 1 descriptor per page * PAGE_SIZE/IAVF_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_head_len/IAVF_MAX_DATA_PER_TXD,
+	 *       + 4 desc gap to avoid the cache line where head is,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	if (iavf_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_bi[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	/* prepare the xmit flags */
+	iavf_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags);
+	if (tx_flags & IAVF_TX_FLAGS_HW_OUTER_SINGLE_VLAN) {
+		cd_type_cmd_tso_mss |= IAVF_TX_CTX_DESC_IL2TAG2 <<
+			IAVF_TXD_CTX_QW1_CMD_SHIFT;
+		cd_l2tag2 = (tx_flags & IAVF_TX_FLAGS_VLAN_MASK) >>
+			IAVF_TX_FLAGS_VLAN_SHIFT;
+	}
+
+	/* obtain protocol of skb */
+	protocol = vlan_get_protocol(skb);
+
+	/* setup IPv4/IPv6 offloads */
+	if (protocol == htons(ETH_P_IP))
+		tx_flags |= IAVF_TX_FLAGS_IPV4;
+	else if (protocol == htons(ETH_P_IPV6))
+		tx_flags |= IAVF_TX_FLAGS_IPV6;
+
+	tso = iavf_tso(first, &hdr_len, &cd_type_cmd_tso_mss);
+
+	if (tso < 0)
+		goto out_drop;
+	else if (tso)
+		tx_flags |= IAVF_TX_FLAGS_TSO;
+
+	/* Always offload the checksum, since it's in the data descriptor */
+	tso = iavf_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
+				  tx_ring, &cd_tunneling);
+	if (tso < 0)
+		goto out_drop;
+
+	/* always enable CRC insertion offload */
+	td_cmd |= IAVF_TX_DESC_CMD_ICRC;
+
+	iavf_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
+			   cd_tunneling, cd_l2tag2);
+
+	iavf_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
+		    td_cmd, td_offset);
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	iavf_trace(xmit_frame_ring_drop, first->skb, tx_ring);
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+	return NETDEV_TX_OK;
+}
+
+/**
+ * iavf_xmit_frame - Selects the correct VSI and Tx queue to send buffer
+ * @skb:    send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ **/
+netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct iavf_adapter *adapter = netdev_priv(netdev);
+	struct iavf_ring *tx_ring = &adapter->tx_rings[skb->queue_mapping];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (unlikely(skb->len < IAVF_MIN_TX_LEN)) {
+		if (skb_pad(skb, IAVF_MIN_TX_LEN - skb->len))
+			return NETDEV_TX_OK;
+		skb->len = IAVF_MIN_TX_LEN;
+		skb_set_tail_pointer(skb, IAVF_MIN_TX_LEN);
+	}
+
+	return iavf_xmit_frame_ring(skb, tx_ring);
+}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
new file mode 100644
index 0000000000..10ba36602c
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
@@ -0,0 +1,522 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_TXRX_H_
+#define _IAVF_TXRX_H_
+
+/* Interrupt Throttling and Rate Limiting Goodies */
+#define IAVF_DEFAULT_IRQ_WORK      256
+
+/* The datasheet for the X710 and XL710 indicate that the maximum value for
+ * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec
+ * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing
+ * the register value which is divided by 2 lets use the actual values and
+ * avoid an excessive amount of translation.
+ */
+#define IAVF_ITR_DYNAMIC	0x8000	/* use top bit as a flag */
+#define IAVF_ITR_MASK		0x1FFE	/* mask for ITR register value */
+#define IAVF_ITR_100K		    10	/* all values below must be even */
+#define IAVF_ITR_50K		    20
+#define IAVF_ITR_20K		    50
+#define IAVF_ITR_18K		    60
+#define IAVF_ITR_8K		   122
+#define IAVF_MAX_ITR		  8160	/* maximum value as per datasheet */
+#define ITR_TO_REG(setting) ((setting) & ~IAVF_ITR_DYNAMIC)
+#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~IAVF_ITR_MASK)
+#define ITR_IS_DYNAMIC(setting) (!!((setting) & IAVF_ITR_DYNAMIC))
+
+#define IAVF_ITR_RX_DEF		(IAVF_ITR_20K | IAVF_ITR_DYNAMIC)
+#define IAVF_ITR_TX_DEF		(IAVF_ITR_20K | IAVF_ITR_DYNAMIC)
+
+/* 0x40 is the enable bit for interrupt rate limiting, and must be set if
+ * the value of the rate limit is non-zero
+ */
+#define INTRL_ENA                  BIT(6)
+#define IAVF_MAX_INTRL             0x3B    /* reg uses 4 usec resolution */
+#define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2)
+#define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0)
+#define IAVF_INTRL_8K              125     /* 8000 ints/sec */
+#define IAVF_INTRL_62K             16      /* 62500 ints/sec */
+#define IAVF_INTRL_83K             12      /* 83333 ints/sec */
+
+#define IAVF_QUEUE_END_OF_LIST 0x7FF
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum iavf_dyn_idx_t {
+	IAVF_IDX_ITR0 = 0,
+	IAVF_IDX_ITR1 = 1,
+	IAVF_IDX_ITR2 = 2,
+	IAVF_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
+};
+
+/* these are indexes into ITRN registers */
+#define IAVF_RX_ITR    IAVF_IDX_ITR0
+#define IAVF_TX_ITR    IAVF_IDX_ITR1
+#define IAVF_PE_ITR    IAVF_IDX_ITR2
+
+/* Supported RSS offloads */
+#define IAVF_DEFAULT_RSS_HENA ( \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_SCTP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_TCP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_OTHER) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_FRAG_IPV4) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_TCP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_SCTP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_OTHER) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_FRAG_IPV6) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_L2_PAYLOAD))
+
+#define IAVF_DEFAULT_RSS_HENA_EXPANDED (IAVF_DEFAULT_RSS_HENA | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \
+	BIT_ULL(IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP))
+
+/* Supported Rx Buffer Sizes (a multiple of 128) */
+#define IAVF_RXBUFFER_256   256
+#define IAVF_RXBUFFER_1536  1536  /* 128B aligned standard Ethernet frame */
+#define IAVF_RXBUFFER_2048  2048
+#define IAVF_RXBUFFER_3072  3072  /* Used for large frames w/ padding */
+#define IAVF_MAX_RXBUFFER   9728  /* largest size for single descriptor */
+
+/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
+ * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
+ * this adds up to 512 bytes of extra data meaning the smallest allocation
+ * we could have is 1K.
+ * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
+ * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
+ */
+#define IAVF_RX_HDR_SIZE IAVF_RXBUFFER_256
+#define IAVF_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+#define iavf_rx_desc iavf_32byte_rx_desc
+
+#define IAVF_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+/* Attempt to maximize the headroom available for incoming frames.  We
+ * use a 2K buffer for receives and need 1536/1534 to store the data for
+ * the frame.  This leaves us with 512 bytes of room.  From that we need
+ * to deduct the space needed for the shared info and the padding needed
+ * to IP align the frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the legacy
+ *	 receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define IAVF_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + IAVF_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IAVF_RXBUFFER_2048))
+
+static inline int iavf_compute_pad(int rx_buf_len)
+{
+	int page_size, pad_size;
+
+	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
+
+	return pad_size;
+}
+
+static inline int iavf_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (IAVF_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = IAVF_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = IAVF_RXBUFFER_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return iavf_compute_pad(rx_buf_len);
+}
+
+#define IAVF_SKB_PAD iavf_skb_pad()
+#else
+#define IAVF_2K_TOO_SMALL_WITH_PADDING false
+#define IAVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/**
+ * iavf_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool iavf_test_staterr(union iavf_rx_desc *rx_desc,
+				     const u64 stat_err_bits)
+{
+	return !!(rx_desc->wb.qword1.status_error_len &
+		  cpu_to_le64(stat_err_bits));
+}
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IAVF_RX_INCREMENT(r, i) \
+	do {					\
+		(i)++;				\
+		if ((i) == (r)->count)		\
+			i = 0;			\
+		r->next_to_clean = i;		\
+	} while (0)
+
+#define IAVF_RX_NEXT_DESC(r, i, n)		\
+	do {					\
+		(i)++;				\
+		if ((i) == (r)->count)		\
+			i = 0;			\
+		(n) = IAVF_RX_DESC((r), (i));	\
+	} while (0)
+
+#define IAVF_RX_NEXT_DESC_PREFETCH(r, i, n)		\
+	do {						\
+		IAVF_RX_NEXT_DESC((r), (i), (n));	\
+		prefetch((n));				\
+	} while (0)
+
+#define IAVF_MAX_BUFFER_TXD	8
+#define IAVF_MIN_TX_LEN		17
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define IAVF_MAX_READ_REQ_SIZE		4096
+#define IAVF_MAX_DATA_PER_TXD		(16 * 1024 - 1)
+#define IAVF_MAX_DATA_PER_TXD_ALIGNED \
+	(IAVF_MAX_DATA_PER_TXD & ~(IAVF_MAX_READ_REQ_SIZE - 1))
+
+/**
+ * iavf_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment.  For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + 1;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + 1;
+ */
+static inline unsigned int iavf_txd_use_count(unsigned int size)
+{
+	return ((size * 85) >> 20) + 1;
+}
+
+/* Tx Descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + 6)
+#define IAVF_MIN_DESC_PENDING	4
+
+#define IAVF_TX_FLAGS_HW_VLAN			BIT(1)
+#define IAVF_TX_FLAGS_SW_VLAN			BIT(2)
+#define IAVF_TX_FLAGS_TSO			BIT(3)
+#define IAVF_TX_FLAGS_IPV4			BIT(4)
+#define IAVF_TX_FLAGS_IPV6			BIT(5)
+#define IAVF_TX_FLAGS_FCCRC			BIT(6)
+#define IAVF_TX_FLAGS_FSO			BIT(7)
+#define IAVF_TX_FLAGS_FD_SB			BIT(9)
+#define IAVF_TX_FLAGS_VXLAN_TUNNEL		BIT(10)
+#define IAVF_TX_FLAGS_HW_OUTER_SINGLE_VLAN	BIT(11)
+#define IAVF_TX_FLAGS_VLAN_MASK			0xffff0000
+#define IAVF_TX_FLAGS_VLAN_PRIO_MASK		0xe0000000
+#define IAVF_TX_FLAGS_VLAN_PRIO_SHIFT		29
+#define IAVF_TX_FLAGS_VLAN_SHIFT		16
+
+struct iavf_tx_buffer {
+	struct iavf_tx_desc *next_to_watch;
+	union {
+		struct sk_buff *skb;
+		void *raw_buf;
+	};
+	unsigned int bytecount;
+	unsigned short gso_segs;
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct iavf_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 page_offset;
+#else
+	__u16 page_offset;
+#endif
+	__u16 pagecnt_bias;
+};
+
+struct iavf_queue_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+struct iavf_tx_queue_stats {
+	u64 restart_queue;
+	u64 tx_busy;
+	u64 tx_done_old;
+	u64 tx_linearize;
+	u64 tx_force_wb;
+	int prev_pkt_ctr;
+	u64 tx_lost_interrupt;
+};
+
+struct iavf_rx_queue_stats {
+	u64 non_eop_descs;
+	u64 alloc_page_failed;
+	u64 alloc_buff_failed;
+	u64 page_reuse_count;
+	u64 realloc_count;
+};
+
+enum iavf_ring_state_t {
+	__IAVF_TX_FDIR_INIT_DONE,
+	__IAVF_TX_XPS_INIT_DONE,
+	__IAVF_RING_STATE_NBITS /* must be last */
+};
+
+/* some useful defines for virtchannel interface, which
+ * is the only remaining user of header split
+ */
+#define IAVF_RX_DTYPE_NO_SPLIT      0
+#define IAVF_RX_DTYPE_HEADER_SPLIT  1
+#define IAVF_RX_DTYPE_SPLIT_ALWAYS  2
+#define IAVF_RX_SPLIT_L2      0x1
+#define IAVF_RX_SPLIT_IP      0x2
+#define IAVF_RX_SPLIT_TCP_UDP 0x4
+#define IAVF_RX_SPLIT_SCTP    0x8
+
+/* struct that defines a descriptor ring, associated with a VSI */
+struct iavf_ring {
+	struct iavf_ring *next;		/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
+	struct device *dev;		/* Used for DMA mapping */
+	struct net_device *netdev;	/* netdev ring maps to */
+	union {
+		struct iavf_tx_buffer *tx_bi;
+		struct iavf_rx_buffer *rx_bi;
+	};
+	DECLARE_BITMAP(state, __IAVF_RING_STATE_NBITS);
+	u16 queue_index;		/* Queue number of ring */
+	u8 dcb_tc;			/* Traffic class of ring */
+	u8 __iomem *tail;
+
+	/* high bit set means dynamic, use accessors routines to read/write.
+	 * hardware only supports 2us resolution for the ITR registers.
+	 * these values always store the USER setting, and must be converted
+	 * before programming to a register.
+	 */
+	u16 itr_setting;
+
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+	u16 rx_buf_len;
+
+	/* used in interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	u8 atr_sample_rate;
+	u8 atr_count;
+
+	bool ring_active;		/* is ring online or not */
+	bool arm_wb;		/* do something to arm write back */
+	u8 packet_stride;
+
+	u16 flags;
+#define IAVF_TXR_FLAGS_WB_ON_ITR		BIT(0)
+#define IAVF_RXR_FLAGS_BUILD_SKB_ENABLED	BIT(1)
+#define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1	BIT(3)
+#define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2	BIT(4)
+#define IAVF_RXR_FLAGS_VLAN_TAG_LOC_L2TAG2_2	BIT(5)
+
+	/* stats structs */
+	struct iavf_queue_stats	stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct iavf_tx_queue_stats tx_stats;
+		struct iavf_rx_queue_stats rx_stats;
+	};
+
+	unsigned int size;		/* length of descriptor ring in bytes */
+	dma_addr_t dma;			/* physical address of ring */
+
+	struct iavf_vsi *vsi;		/* Backreference to associated VSI */
+	struct iavf_q_vector *q_vector;	/* Backreference to associated vector */
+
+	struct rcu_head rcu;		/* to avoid race on free */
+	u16 next_to_alloc;
+	struct sk_buff *skb;		/* When iavf_clean_rx_ring_irq() must
+					 * return before it sees the EOP for
+					 * the current packet, we save that skb
+					 * here and resume receiving this
+					 * packet the next time
+					 * iavf_clean_rx_ring_irq() is called
+					 * for this ring.
+					 */
+} ____cacheline_internodealigned_in_smp;
+
+static inline bool ring_uses_build_skb(struct iavf_ring *ring)
+{
+	return !!(ring->flags & IAVF_RXR_FLAGS_BUILD_SKB_ENABLED);
+}
+
+static inline void set_ring_build_skb_enabled(struct iavf_ring *ring)
+{
+	ring->flags |= IAVF_RXR_FLAGS_BUILD_SKB_ENABLED;
+}
+
+static inline void clear_ring_build_skb_enabled(struct iavf_ring *ring)
+{
+	ring->flags &= ~IAVF_RXR_FLAGS_BUILD_SKB_ENABLED;
+}
+
+#define IAVF_ITR_ADAPTIVE_MIN_INC	0x0002
+#define IAVF_ITR_ADAPTIVE_MIN_USECS	0x0002
+#define IAVF_ITR_ADAPTIVE_MAX_USECS	0x007e
+#define IAVF_ITR_ADAPTIVE_LATENCY	0x8000
+#define IAVF_ITR_ADAPTIVE_BULK		0x0000
+#define ITR_IS_BULK(x) (!((x) & IAVF_ITR_ADAPTIVE_LATENCY))
+
+struct iavf_ring_container {
+	struct iavf_ring *ring;		/* pointer to linked list of ring(s) */
+	unsigned long next_update;	/* jiffies value of next update */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 count;
+	u16 target_itr;			/* target ITR setting for ring(s) */
+	u16 current_itr;		/* current ITR setting for ring(s) */
+};
+
+/* iterator for handling rings in ring container */
+#define iavf_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->rx_buf_len > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define iavf_rx_pg_size(_ring) (PAGE_SIZE << iavf_rx_pg_order(_ring))
+
+bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count);
+netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring);
+int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring);
+void iavf_free_tx_resources(struct iavf_ring *tx_ring);
+void iavf_free_rx_resources(struct iavf_ring *rx_ring);
+int iavf_napi_poll(struct napi_struct *napi, int budget);
+void iavf_detect_recover_hung(struct iavf_vsi *vsi);
+int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size);
+bool __iavf_chk_linearize(struct sk_buff *skb);
+
+/**
+ * iavf_xmit_descriptor_count - calculate number of Tx descriptors needed
+ * @skb:     send buffer
+ *
+ * Returns number of data descriptors needed for this skb. Returns 0 to indicate
+ * there is not enough descriptors available in this ring since we need at least
+ * one descriptor.
+ **/
+static inline int iavf_xmit_descriptor_count(struct sk_buff *skb)
+{
+	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	int count = 0, size = skb_headlen(skb);
+
+	for (;;) {
+		count += iavf_txd_use_count(size);
+
+		if (!nr_frags--)
+			break;
+
+		size = skb_frag_size(frag++);
+	}
+
+	return count;
+}
+
+/**
+ * iavf_maybe_stop_tx - 1st level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ **/
+static inline int iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size)
+{
+	if (likely(IAVF_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+	return __iavf_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * iavf_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb:      send buffer
+ * @count:    number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ **/
+static inline bool iavf_chk_linearize(struct sk_buff *skb, int count)
+{
+	/* Both TSO and single send will work if count is less than 8 */
+	if (likely(count < IAVF_MAX_BUFFER_TXD))
+		return false;
+
+	if (skb_is_gso(skb))
+		return __iavf_chk_linearize(skb);
+
+	/* we can support up to 8 data buffers for a single send */
+	return count != IAVF_MAX_BUFFER_TXD;
+}
+/**
+ * txring_txq - helper to convert from a ring to a queue
+ * @ring: Tx ring to find the netdev equivalent of
+ **/
+static inline struct netdev_queue *txring_txq(const struct iavf_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+#endif /* _IAVF_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h
new file mode 100644
index 0000000000..9f1f523807
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_type.h
@@ -0,0 +1,679 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#ifndef _IAVF_TYPE_H_
+#define _IAVF_TYPE_H_
+
+#include "iavf_status.h"
+#include "iavf_osdep.h"
+#include "iavf_register.h"
+#include "iavf_adminq.h"
+#include "iavf_devids.h"
+
+#define IAVF_RXQ_CTX_DBUFF_SHIFT 7
+
+/* IAVF_MASK is a macro used on 32 bit registers */
+#define IAVF_MASK(mask, shift) ((u32)(mask) << (shift))
+
+#define IAVF_MAX_VSI_QP			16
+#define IAVF_MAX_VF_VSI			3
+#define IAVF_MAX_CHAINED_RX_BUFFERS	5
+
+/* forward declaration */
+struct iavf_hw;
+typedef void (*IAVF_ADMINQ_CALLBACK)(struct iavf_hw *, struct iavf_aq_desc *);
+
+/* Data type manipulation macros. */
+
+#define IAVF_DESC_UNUSED(R)	\
+	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	(R)->next_to_clean - (R)->next_to_use - 1)
+
+/* bitfields for Tx queue mapping in QTX_CTL */
+#define IAVF_QTX_CTL_VF_QUEUE	0x0
+#define IAVF_QTX_CTL_VM_QUEUE	0x1
+#define IAVF_QTX_CTL_PF_QUEUE	0x2
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+enum iavf_debug_mask {
+	IAVF_DEBUG_INIT			= 0x00000001,
+	IAVF_DEBUG_RELEASE		= 0x00000002,
+
+	IAVF_DEBUG_LINK			= 0x00000010,
+	IAVF_DEBUG_PHY			= 0x00000020,
+	IAVF_DEBUG_HMC			= 0x00000040,
+	IAVF_DEBUG_NVM			= 0x00000080,
+	IAVF_DEBUG_LAN			= 0x00000100,
+	IAVF_DEBUG_FLOW			= 0x00000200,
+	IAVF_DEBUG_DCB			= 0x00000400,
+	IAVF_DEBUG_DIAG			= 0x00000800,
+	IAVF_DEBUG_FD			= 0x00001000,
+	IAVF_DEBUG_PACKAGE		= 0x00002000,
+
+	IAVF_DEBUG_AQ_MESSAGE		= 0x01000000,
+	IAVF_DEBUG_AQ_DESCRIPTOR	= 0x02000000,
+	IAVF_DEBUG_AQ_DESC_BUFFER	= 0x04000000,
+	IAVF_DEBUG_AQ_COMMAND		= 0x06000000,
+	IAVF_DEBUG_AQ			= 0x0F000000,
+
+	IAVF_DEBUG_USER			= 0xF0000000,
+
+	IAVF_DEBUG_ALL			= 0xFFFFFFFF
+};
+
+/* These are structs for managing the hardware information and the operations.
+ * The structures of function pointers are filled out at init time when we
+ * know for sure exactly which hardware we're working with.  This gives us the
+ * flexibility of using the same main driver code but adapting to slightly
+ * different hardware needs as new parts are developed.  For this architecture,
+ * the Firmware and AdminQ are intended to insulate the driver from most of the
+ * future changes, but these structures will also do part of the job.
+ */
+enum iavf_mac_type {
+	IAVF_MAC_UNKNOWN = 0,
+	IAVF_MAC_XL710,
+	IAVF_MAC_VF,
+	IAVF_MAC_X722,
+	IAVF_MAC_X722_VF,
+	IAVF_MAC_GENERIC,
+};
+
+enum iavf_vsi_type {
+	IAVF_VSI_MAIN	= 0,
+	IAVF_VSI_VMDQ1	= 1,
+	IAVF_VSI_VMDQ2	= 2,
+	IAVF_VSI_CTRL	= 3,
+	IAVF_VSI_FCOE	= 4,
+	IAVF_VSI_MIRROR	= 5,
+	IAVF_VSI_SRIOV	= 6,
+	IAVF_VSI_FDIR	= 7,
+	IAVF_VSI_TYPE_UNKNOWN
+};
+
+enum iavf_queue_type {
+	IAVF_QUEUE_TYPE_RX = 0,
+	IAVF_QUEUE_TYPE_TX,
+	IAVF_QUEUE_TYPE_PE_CEQ,
+	IAVF_QUEUE_TYPE_UNKNOWN
+};
+
+#define IAVF_HW_CAP_MAX_GPIO		30
+/* Capabilities of a PF or a VF or the whole device */
+struct iavf_hw_capabilities {
+	bool dcb;
+	bool fcoe;
+	u32 num_vsis;
+	u32 num_rx_qp;
+	u32 num_tx_qp;
+	u32 base_queue;
+	u32 num_msix_vectors_vf;
+};
+
+struct iavf_mac_info {
+	enum iavf_mac_type type;
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+	u8 san_addr[ETH_ALEN];
+	u16 max_fcoeq;
+};
+
+/* PCI bus types */
+enum iavf_bus_type {
+	iavf_bus_type_unknown = 0,
+	iavf_bus_type_pci,
+	iavf_bus_type_pcix,
+	iavf_bus_type_pci_express,
+	iavf_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum iavf_bus_speed {
+	iavf_bus_speed_unknown	= 0,
+	iavf_bus_speed_33	= 33,
+	iavf_bus_speed_66	= 66,
+	iavf_bus_speed_100	= 100,
+	iavf_bus_speed_120	= 120,
+	iavf_bus_speed_133	= 133,
+	iavf_bus_speed_2500	= 2500,
+	iavf_bus_speed_5000	= 5000,
+	iavf_bus_speed_8000	= 8000,
+	iavf_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum iavf_bus_width {
+	iavf_bus_width_unknown	= 0,
+	iavf_bus_width_pcie_x1	= 1,
+	iavf_bus_width_pcie_x2	= 2,
+	iavf_bus_width_pcie_x4	= 4,
+	iavf_bus_width_pcie_x8	= 8,
+	iavf_bus_width_32	= 32,
+	iavf_bus_width_64	= 64,
+	iavf_bus_width_reserved
+};
+
+/* Bus parameters */
+struct iavf_bus_info {
+	enum iavf_bus_speed speed;
+	enum iavf_bus_width width;
+	enum iavf_bus_type type;
+
+	u16 func;
+	u16 device;
+	u16 lan_id;
+	u16 bus_id;
+};
+
+#define IAVF_MAX_USER_PRIORITY		8
+/* Port hardware description */
+struct iavf_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+
+	/* subsystem structs */
+	struct iavf_mac_info mac;
+	struct iavf_bus_info bus;
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+
+	/* capabilities for entire device and PCI func */
+	struct iavf_hw_capabilities dev_caps;
+
+	/* Admin Queue info */
+	struct iavf_adminq_info aq;
+
+	/* debug mask */
+	u32 debug_mask;
+	char err_str[16];
+};
+
+/* RX Descriptors */
+union iavf_16byte_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			struct {
+				union {
+					__le16 mirroring_status;
+					__le16 fcoe_ctx_id;
+				} mirr_fcoe;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fd_id; /* Flow director filter id */
+				__le32 fcoe_param; /* FCoE DDP Context id */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* ext status/error/pktype/length */
+			__le64 status_error_len;
+		} qword1;
+	} wb;  /* writeback */
+};
+
+union iavf_32byte_rx_desc {
+	struct {
+		__le64  pkt_addr; /* Packet buffer address */
+		__le64  hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_buffer_addr is DD bit */
+		__le64  rsvd1;
+		__le64  rsvd2;
+	} read;
+	struct {
+		struct {
+			struct {
+				union {
+					__le16 mirroring_status;
+					__le16 fcoe_ctx_id;
+				} mirr_fcoe;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fcoe_param; /* FCoE DDP Context id */
+				/* Flow director filter id in case of
+				 * Programming status desc WB
+				 */
+				__le32 fd_id;
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/pktype/length */
+			__le64 status_error_len;
+		} qword1;
+		struct {
+			__le16 ext_status; /* extended status */
+			__le16 rsvd;
+			__le16 l2tag2_1;
+			__le16 l2tag2_2;
+		} qword2;
+		struct {
+			union {
+				__le32 flex_bytes_lo;
+				__le32 pe_status;
+			} lo_dword;
+			union {
+				__le32 flex_bytes_hi;
+				__le32 fd_id;
+			} hi_dword;
+		} qword3;
+	} wb;  /* writeback */
+};
+
+enum iavf_rx_desc_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_STATUS_DD_SHIFT		= 0,
+	IAVF_RX_DESC_STATUS_EOF_SHIFT		= 1,
+	IAVF_RX_DESC_STATUS_L2TAG1P_SHIFT	= 2,
+	IAVF_RX_DESC_STATUS_L3L4P_SHIFT		= 3,
+	IAVF_RX_DESC_STATUS_CRCP_SHIFT		= 4,
+	IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT	= 5, /* 2 BITS */
+	IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT	= 7,
+	/* Note: Bit 8 is reserved in X710 and XL710 */
+	IAVF_RX_DESC_STATUS_EXT_UDP_0_SHIFT	= 8,
+	IAVF_RX_DESC_STATUS_UMBCAST_SHIFT	= 9, /* 2 BITS */
+	IAVF_RX_DESC_STATUS_FLM_SHIFT		= 11,
+	IAVF_RX_DESC_STATUS_FLTSTAT_SHIFT	= 12, /* 2 BITS */
+	IAVF_RX_DESC_STATUS_LPBK_SHIFT		= 14,
+	IAVF_RX_DESC_STATUS_IPV6EXADD_SHIFT	= 15,
+	IAVF_RX_DESC_STATUS_RESERVED_SHIFT	= 16, /* 2 BITS */
+	/* Note: For non-tunnel packets INT_UDP_0 is the right status for
+	 * UDP header
+	 */
+	IAVF_RX_DESC_STATUS_INT_UDP_0_SHIFT	= 18,
+	IAVF_RX_DESC_STATUS_LAST /* this entry must be last!!! */
+};
+
+#define IAVF_RXD_QW1_STATUS_SHIFT	0
+#define IAVF_RXD_QW1_STATUS_MASK	((BIT(IAVF_RX_DESC_STATUS_LAST) - 1) \
+					 << IAVF_RXD_QW1_STATUS_SHIFT)
+
+#define IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT IAVF_RX_DESC_STATUS_TSYNINDX_SHIFT
+#define IAVF_RXD_QW1_STATUS_TSYNINDX_MASK  (0x3UL << \
+					    IAVF_RXD_QW1_STATUS_TSYNINDX_SHIFT)
+
+#define IAVF_RXD_QW1_STATUS_TSYNVALID_SHIFT IAVF_RX_DESC_STATUS_TSYNVALID_SHIFT
+#define IAVF_RXD_QW1_STATUS_TSYNVALID_MASK \
+				    BIT_ULL(IAVF_RXD_QW1_STATUS_TSYNVALID_SHIFT)
+
+enum iavf_rx_desc_fltstat_values {
+	IAVF_RX_DESC_FLTSTAT_NO_DATA	= 0,
+	IAVF_RX_DESC_FLTSTAT_RSV_FD_ID	= 1, /* 16byte desc? FD_ID : RSV */
+	IAVF_RX_DESC_FLTSTAT_RSV	= 2,
+	IAVF_RX_DESC_FLTSTAT_RSS_HASH	= 3,
+};
+
+#define IAVF_RXD_QW1_ERROR_SHIFT	19
+#define IAVF_RXD_QW1_ERROR_MASK		(0xFFUL << IAVF_RXD_QW1_ERROR_SHIFT)
+
+enum iavf_rx_desc_error_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_ERROR_RXE_SHIFT		= 0,
+	IAVF_RX_DESC_ERROR_RECIPE_SHIFT		= 1,
+	IAVF_RX_DESC_ERROR_HBO_SHIFT		= 2,
+	IAVF_RX_DESC_ERROR_L3L4E_SHIFT		= 3, /* 3 BITS */
+	IAVF_RX_DESC_ERROR_IPE_SHIFT		= 3,
+	IAVF_RX_DESC_ERROR_L4E_SHIFT		= 4,
+	IAVF_RX_DESC_ERROR_EIPE_SHIFT		= 5,
+	IAVF_RX_DESC_ERROR_OVERSIZE_SHIFT	= 6,
+	IAVF_RX_DESC_ERROR_PPRS_SHIFT		= 7
+};
+
+enum iavf_rx_desc_error_l3l4e_fcoe_masks {
+	IAVF_RX_DESC_ERROR_L3L4E_NONE		= 0,
+	IAVF_RX_DESC_ERROR_L3L4E_PROT		= 1,
+	IAVF_RX_DESC_ERROR_L3L4E_FC		= 2,
+	IAVF_RX_DESC_ERROR_L3L4E_DMAC_ERR	= 3,
+	IAVF_RX_DESC_ERROR_L3L4E_DMAC_WARN	= 4
+};
+
+#define IAVF_RXD_QW1_PTYPE_SHIFT	30
+#define IAVF_RXD_QW1_PTYPE_MASK		(0xFFULL << IAVF_RXD_QW1_PTYPE_SHIFT)
+
+/* Packet type non-ip values */
+enum iavf_rx_l2_ptype {
+	IAVF_RX_PTYPE_L2_RESERVED			= 0,
+	IAVF_RX_PTYPE_L2_MAC_PAY2			= 1,
+	IAVF_RX_PTYPE_L2_TIMESYNC_PAY2			= 2,
+	IAVF_RX_PTYPE_L2_FIP_PAY2			= 3,
+	IAVF_RX_PTYPE_L2_OUI_PAY2			= 4,
+	IAVF_RX_PTYPE_L2_MACCNTRL_PAY2			= 5,
+	IAVF_RX_PTYPE_L2_LLDP_PAY2			= 6,
+	IAVF_RX_PTYPE_L2_ECP_PAY2			= 7,
+	IAVF_RX_PTYPE_L2_EVB_PAY2			= 8,
+	IAVF_RX_PTYPE_L2_QCN_PAY2			= 9,
+	IAVF_RX_PTYPE_L2_EAPOL_PAY2			= 10,
+	IAVF_RX_PTYPE_L2_ARP				= 11,
+	IAVF_RX_PTYPE_L2_FCOE_PAY3			= 12,
+	IAVF_RX_PTYPE_L2_FCOE_FCDATA_PAY3		= 13,
+	IAVF_RX_PTYPE_L2_FCOE_FCRDY_PAY3		= 14,
+	IAVF_RX_PTYPE_L2_FCOE_FCRSP_PAY3		= 15,
+	IAVF_RX_PTYPE_L2_FCOE_FCOTHER_PA		= 16,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_PAY3			= 17,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCDATA		= 18,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCRDY			= 19,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCRSP			= 20,
+	IAVF_RX_PTYPE_L2_FCOE_VFT_FCOTHER		= 21,
+	IAVF_RX_PTYPE_GRENAT4_MAC_PAY3			= 58,
+	IAVF_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4	= 87,
+	IAVF_RX_PTYPE_GRENAT6_MAC_PAY3			= 124,
+	IAVF_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4	= 153
+};
+
+struct iavf_rx_ptype_decoded {
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:1;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+enum iavf_rx_ptype_outer_ip {
+	IAVF_RX_PTYPE_OUTER_L2	= 0,
+	IAVF_RX_PTYPE_OUTER_IP	= 1
+};
+
+enum iavf_rx_ptype_outer_ip_ver {
+	IAVF_RX_PTYPE_OUTER_NONE	= 0,
+	IAVF_RX_PTYPE_OUTER_IPV4	= 0,
+	IAVF_RX_PTYPE_OUTER_IPV6	= 1
+};
+
+enum iavf_rx_ptype_outer_fragmented {
+	IAVF_RX_PTYPE_NOT_FRAG	= 0,
+	IAVF_RX_PTYPE_FRAG	= 1
+};
+
+enum iavf_rx_ptype_tunnel_type {
+	IAVF_RX_PTYPE_TUNNEL_NONE		= 0,
+	IAVF_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	IAVF_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum iavf_rx_ptype_tunnel_end_prot {
+	IAVF_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	IAVF_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	IAVF_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum iavf_rx_ptype_inner_prot {
+	IAVF_RX_PTYPE_INNER_PROT_NONE		= 0,
+	IAVF_RX_PTYPE_INNER_PROT_UDP		= 1,
+	IAVF_RX_PTYPE_INNER_PROT_TCP		= 2,
+	IAVF_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	IAVF_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	IAVF_RX_PTYPE_INNER_PROT_TIMESYNC	= 5
+};
+
+enum iavf_rx_ptype_payload_layer {
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	IAVF_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
+#define IAVF_RXD_QW1_LENGTH_PBUF_SHIFT	38
+#define IAVF_RXD_QW1_LENGTH_PBUF_MASK	(0x3FFFULL << \
+					 IAVF_RXD_QW1_LENGTH_PBUF_SHIFT)
+
+#define IAVF_RXD_QW1_LENGTH_HBUF_SHIFT	52
+#define IAVF_RXD_QW1_LENGTH_HBUF_MASK	(0x7FFULL << \
+					 IAVF_RXD_QW1_LENGTH_HBUF_SHIFT)
+
+#define IAVF_RXD_QW1_LENGTH_SPH_SHIFT	63
+#define IAVF_RXD_QW1_LENGTH_SPH_MASK	BIT_ULL(IAVF_RXD_QW1_LENGTH_SPH_SHIFT)
+
+enum iavf_rx_desc_ext_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT	= 0,
+	IAVF_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT	= 1,
+	IAVF_RX_DESC_EXT_STATUS_FLEXBL_SHIFT	= 2, /* 2 BITS */
+	IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT	= 4, /* 2 BITS */
+	IAVF_RX_DESC_EXT_STATUS_FDLONGB_SHIFT	= 9,
+	IAVF_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT	= 10,
+	IAVF_RX_DESC_EXT_STATUS_PELONGB_SHIFT	= 11,
+};
+
+enum iavf_rx_desc_pe_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_DESC_PE_STATUS_QPID_SHIFT	= 0, /* 18 BITS */
+	IAVF_RX_DESC_PE_STATUS_L4PORT_SHIFT	= 0, /* 16 BITS */
+	IAVF_RX_DESC_PE_STATUS_IPINDEX_SHIFT	= 16, /* 8 BITS */
+	IAVF_RX_DESC_PE_STATUS_QPIDHIT_SHIFT	= 24,
+	IAVF_RX_DESC_PE_STATUS_APBVTHIT_SHIFT	= 25,
+	IAVF_RX_DESC_PE_STATUS_PORTV_SHIFT	= 26,
+	IAVF_RX_DESC_PE_STATUS_URG_SHIFT	= 27,
+	IAVF_RX_DESC_PE_STATUS_IPFRAG_SHIFT	= 28,
+	IAVF_RX_DESC_PE_STATUS_IPOPT_SHIFT	= 29
+};
+
+#define IAVF_RX_PROG_STATUS_DESC_LENGTH_SHIFT		38
+#define IAVF_RX_PROG_STATUS_DESC_LENGTH			0x2000000
+
+#define IAVF_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT	2
+#define IAVF_RX_PROG_STATUS_DESC_QW1_PROGID_MASK	(0x7UL << \
+				IAVF_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT)
+
+#define IAVF_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT	19
+#define IAVF_RX_PROG_STATUS_DESC_QW1_ERROR_MASK		(0x3FUL << \
+				IAVF_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT)
+
+enum iavf_rx_prog_status_desc_status_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_PROG_STATUS_DESC_DD_SHIFT	= 0,
+	IAVF_RX_PROG_STATUS_DESC_PROG_ID_SHIFT	= 2 /* 3 BITS */
+};
+
+enum iavf_rx_prog_status_desc_prog_id_masks {
+	IAVF_RX_PROG_STATUS_DESC_FD_FILTER_STATUS	= 1,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS	= 2,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS	= 4,
+};
+
+enum iavf_rx_prog_status_desc_error_bits {
+	/* Note: These are predefined bit offsets */
+	IAVF_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT	= 0,
+	IAVF_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT	= 1,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT	= 2,
+	IAVF_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT	= 3
+};
+
+/* TX Descriptor */
+struct iavf_tx_desc {
+	__le64 buffer_addr; /* Address of descriptor's data buf */
+	__le64 cmd_type_offset_bsz;
+};
+
+#define IAVF_TXD_QW1_DTYPE_SHIFT	0
+#define IAVF_TXD_QW1_DTYPE_MASK		(0xFUL << IAVF_TXD_QW1_DTYPE_SHIFT)
+
+enum iavf_tx_desc_dtype_value {
+	IAVF_TX_DESC_DTYPE_DATA		= 0x0,
+	IAVF_TX_DESC_DTYPE_NOP		= 0x1, /* same as Context desc */
+	IAVF_TX_DESC_DTYPE_CONTEXT	= 0x1,
+	IAVF_TX_DESC_DTYPE_FCOE_CTX	= 0x2,
+	IAVF_TX_DESC_DTYPE_FILTER_PROG	= 0x8,
+	IAVF_TX_DESC_DTYPE_DDP_CTX	= 0x9,
+	IAVF_TX_DESC_DTYPE_FLEX_DATA	= 0xB,
+	IAVF_TX_DESC_DTYPE_FLEX_CTX_1	= 0xC,
+	IAVF_TX_DESC_DTYPE_FLEX_CTX_2	= 0xD,
+	IAVF_TX_DESC_DTYPE_DESC_DONE	= 0xF
+};
+
+#define IAVF_TXD_QW1_CMD_SHIFT	4
+#define IAVF_TXD_QW1_CMD_MASK	(0x3FFUL << IAVF_TXD_QW1_CMD_SHIFT)
+
+enum iavf_tx_desc_cmd_bits {
+	IAVF_TX_DESC_CMD_EOP			= 0x0001,
+	IAVF_TX_DESC_CMD_RS			= 0x0002,
+	IAVF_TX_DESC_CMD_ICRC			= 0x0004,
+	IAVF_TX_DESC_CMD_IL2TAG1		= 0x0008,
+	IAVF_TX_DESC_CMD_DUMMY			= 0x0010,
+	IAVF_TX_DESC_CMD_IIPT_NONIP		= 0x0000, /* 2 BITS */
+	IAVF_TX_DESC_CMD_IIPT_IPV6		= 0x0020, /* 2 BITS */
+	IAVF_TX_DESC_CMD_IIPT_IPV4		= 0x0040, /* 2 BITS */
+	IAVF_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060, /* 2 BITS */
+	IAVF_TX_DESC_CMD_FCOET			= 0x0080,
+	IAVF_TX_DESC_CMD_L4T_EOFT_UNK		= 0x0000, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_N		= 0x0000, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_T		= 0x0100, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_NI	= 0x0200, /* 2 BITS */
+	IAVF_TX_DESC_CMD_L4T_EOFT_EOF_A		= 0x0300, /* 2 BITS */
+};
+
+#define IAVF_TXD_QW1_OFFSET_SHIFT	16
+#define IAVF_TXD_QW1_OFFSET_MASK	(0x3FFFFULL << \
+					 IAVF_TXD_QW1_OFFSET_SHIFT)
+
+enum iavf_tx_desc_length_fields {
+	/* Note: These are predefined bit offsets */
+	IAVF_TX_DESC_LENGTH_MACLEN_SHIFT	= 0, /* 7 BITS */
+	IAVF_TX_DESC_LENGTH_IPLEN_SHIFT		= 7, /* 7 BITS */
+	IAVF_TX_DESC_LENGTH_L4_FC_LEN_SHIFT	= 14 /* 4 BITS */
+};
+
+#define IAVF_TXD_QW1_TX_BUF_SZ_SHIFT	34
+#define IAVF_TXD_QW1_TX_BUF_SZ_MASK	(0x3FFFULL << \
+					 IAVF_TXD_QW1_TX_BUF_SZ_SHIFT)
+
+#define IAVF_TXD_QW1_L2TAG1_SHIFT	48
+#define IAVF_TXD_QW1_L2TAG1_MASK	(0xFFFFULL << IAVF_TXD_QW1_L2TAG1_SHIFT)
+
+/* Context descriptors */
+struct iavf_tx_context_desc {
+	__le32 tunneling_params;
+	__le16 l2tag2;
+	__le16 rsvd;
+	__le64 type_cmd_tso_mss;
+};
+
+#define IAVF_TXD_CTX_QW1_CMD_SHIFT	4
+#define IAVF_TXD_CTX_QW1_CMD_MASK	(0xFFFFUL << IAVF_TXD_CTX_QW1_CMD_SHIFT)
+
+enum iavf_tx_ctx_desc_cmd_bits {
+	IAVF_TX_CTX_DESC_TSO		= 0x01,
+	IAVF_TX_CTX_DESC_TSYN		= 0x02,
+	IAVF_TX_CTX_DESC_IL2TAG2	= 0x04,
+	IAVF_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
+	IAVF_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	IAVF_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	IAVF_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	IAVF_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	IAVF_TX_CTX_DESC_SWPE		= 0x40
+};
+
+/* Packet Classifier Types for filters */
+enum iavf_filter_pctype {
+	/* Note: Values 0-28 are reserved for future use.
+	 * Value 29, 30, 32 are not supported on XL710 and X710.
+	 */
+	IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP	= 29,
+	IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP	= 30,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_UDP		= 31,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK	= 32,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_TCP		= 33,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_SCTP		= 34,
+	IAVF_FILTER_PCTYPE_NONF_IPV4_OTHER		= 35,
+	IAVF_FILTER_PCTYPE_FRAG_IPV4			= 36,
+	/* Note: Values 37-38 are reserved for future use.
+	 * Value 39, 40, 42 are not supported on XL710 and X710.
+	 */
+	IAVF_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP	= 39,
+	IAVF_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP	= 40,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_UDP		= 41,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK	= 42,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_TCP		= 43,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_SCTP		= 44,
+	IAVF_FILTER_PCTYPE_NONF_IPV6_OTHER		= 45,
+	IAVF_FILTER_PCTYPE_FRAG_IPV6			= 46,
+	/* Note: Value 47 is reserved for future use */
+	IAVF_FILTER_PCTYPE_FCOE_OX			= 48,
+	IAVF_FILTER_PCTYPE_FCOE_RX			= 49,
+	IAVF_FILTER_PCTYPE_FCOE_OTHER			= 50,
+	/* Note: Values 51-62 are reserved for future use */
+	IAVF_FILTER_PCTYPE_L2_PAYLOAD			= 63,
+};
+
+#define IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT	30
+#define IAVF_TXD_CTX_QW1_TSO_LEN_MASK	(0x3FFFFULL << \
+					 IAVF_TXD_CTX_QW1_TSO_LEN_SHIFT)
+
+#define IAVF_TXD_CTX_QW1_MSS_SHIFT	50
+#define IAVF_TXD_CTX_QW1_MSS_MASK	(0x3FFFULL << \
+					 IAVF_TXD_CTX_QW1_MSS_SHIFT)
+
+#define IAVF_TXD_CTX_QW1_VSI_SHIFT	50
+#define IAVF_TXD_CTX_QW1_VSI_MASK	(0x1FFULL << IAVF_TXD_CTX_QW1_VSI_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_EXT_IP_SHIFT	0
+#define IAVF_TXD_CTX_QW0_EXT_IP_MASK	(0x3ULL << \
+					 IAVF_TXD_CTX_QW0_EXT_IP_SHIFT)
+
+enum iavf_tx_ctx_desc_eipt_offload {
+	IAVF_TX_CTX_EXT_IP_NONE		= 0x0,
+	IAVF_TX_CTX_EXT_IP_IPV6		= 0x1,
+	IAVF_TX_CTX_EXT_IP_IPV4_NO_CSUM	= 0x2,
+	IAVF_TX_CTX_EXT_IP_IPV4		= 0x3
+};
+
+#define IAVF_TXD_CTX_QW0_EXT_IPLEN_SHIFT	2
+#define IAVF_TXD_CTX_QW0_EXT_IPLEN_MASK	(0x3FULL << \
+					 IAVF_TXD_CTX_QW0_EXT_IPLEN_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_NATT_SHIFT	9
+#define IAVF_TXD_CTX_QW0_NATT_MASK	(0x3ULL << IAVF_TXD_CTX_QW0_NATT_SHIFT)
+
+#define IAVF_TXD_CTX_UDP_TUNNELING	BIT_ULL(IAVF_TXD_CTX_QW0_NATT_SHIFT)
+#define IAVF_TXD_CTX_GRE_TUNNELING	(0x2ULL << IAVF_TXD_CTX_QW0_NATT_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_EIP_NOINC_SHIFT	11
+#define IAVF_TXD_CTX_QW0_EIP_NOINC_MASK \
+				       BIT_ULL(IAVF_TXD_CTX_QW0_EIP_NOINC_SHIFT)
+
+#define IAVF_TXD_CTX_EIP_NOINC_IPID_CONST	IAVF_TXD_CTX_QW0_EIP_NOINC_MASK
+
+#define IAVF_TXD_CTX_QW0_NATLEN_SHIFT	12
+#define IAVF_TXD_CTX_QW0_NATLEN_MASK	(0X7FULL << \
+					 IAVF_TXD_CTX_QW0_NATLEN_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_DECTTL_SHIFT	19
+#define IAVF_TXD_CTX_QW0_DECTTL_MASK	(0xFULL << \
+					 IAVF_TXD_CTX_QW0_DECTTL_SHIFT)
+
+#define IAVF_TXD_CTX_QW0_L4T_CS_SHIFT	23
+#define IAVF_TXD_CTX_QW0_L4T_CS_MASK	BIT_ULL(IAVF_TXD_CTX_QW0_L4T_CS_SHIFT)
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct iavf_eth_stats {
+	u64 rx_bytes;			/* gorc */
+	u64 rx_unicast;			/* uprc */
+	u64 rx_multicast;		/* mprc */
+	u64 rx_broadcast;		/* bprc */
+	u64 rx_discards;		/* rdpc */
+	u64 rx_unknown_protocol;	/* rupp */
+	u64 tx_bytes;			/* gotc */
+	u64 tx_unicast;			/* uptc */
+	u64 tx_multicast;		/* mptc */
+	u64 tx_broadcast;		/* bptc */
+	u64 tx_discards;		/* tdpc */
+	u64 tx_errors;			/* tepc */
+};
+#endif /* _IAVF_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
new file mode 100644
index 0000000000..b95a4f9032
--- /dev/null
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -0,0 +1,2550 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2013 - 2018 Intel Corporation. */
+
+#include "iavf.h"
+#include "iavf_prototype.h"
+#include "iavf_client.h"
+
+/**
+ * iavf_send_pf_msg
+ * @adapter: adapter structure
+ * @op: virtual channel opcode
+ * @msg: pointer to message buffer
+ * @len: message length
+ *
+ * Send message to PF and print status if failure.
+ **/
+static int iavf_send_pf_msg(struct iavf_adapter *adapter,
+			    enum virtchnl_ops op, u8 *msg, u16 len)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	enum iavf_status status;
+
+	if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+		return 0; /* nothing to see here, move along */
+
+	status = iavf_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
+	if (status)
+		dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, status %s, aq_err %s\n",
+			op, iavf_stat_str(hw, status),
+			iavf_aq_str(hw, hw->aq.asq_last_status));
+	return iavf_status_to_errno(status);
+}
+
+/**
+ * iavf_send_api_ver
+ * @adapter: adapter structure
+ *
+ * Send API version admin queue message to the PF. The reply is not checked
+ * in this function. Returns 0 if the message was successfully
+ * sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not.
+ **/
+int iavf_send_api_ver(struct iavf_adapter *adapter)
+{
+	struct virtchnl_version_info vvi;
+
+	vvi.major = VIRTCHNL_VERSION_MAJOR;
+	vvi.minor = VIRTCHNL_VERSION_MINOR;
+
+	return iavf_send_pf_msg(adapter, VIRTCHNL_OP_VERSION, (u8 *)&vvi,
+				sizeof(vvi));
+}
+
+/**
+ * iavf_poll_virtchnl_msg
+ * @hw: HW configuration structure
+ * @event: event to populate on success
+ * @op_to_poll: requested virtchnl op to poll for
+ *
+ * Initialize poll for virtchnl msg matching the requested_op. Returns 0
+ * if a message of the correct opcode is in the queue or an error code
+ * if no message matching the op code is waiting and other failures.
+ */
+static int
+iavf_poll_virtchnl_msg(struct iavf_hw *hw, struct iavf_arq_event_info *event,
+		       enum virtchnl_ops op_to_poll)
+{
+	enum virtchnl_ops received_op;
+	enum iavf_status status;
+	u32 v_retval;
+
+	while (1) {
+		/* When the AQ is empty, iavf_clean_arq_element will return
+		 * nonzero and this loop will terminate.
+		 */
+		status = iavf_clean_arq_element(hw, event, NULL);
+		if (status != IAVF_SUCCESS)
+			return iavf_status_to_errno(status);
+		received_op =
+		    (enum virtchnl_ops)le32_to_cpu(event->desc.cookie_high);
+		if (op_to_poll == received_op)
+			break;
+	}
+
+	v_retval = le32_to_cpu(event->desc.cookie_low);
+	return virtchnl_status_to_errno((enum virtchnl_status_code)v_retval);
+}
+
+/**
+ * iavf_verify_api_ver
+ * @adapter: adapter structure
+ *
+ * Compare API versions with the PF. Must be called after admin queue is
+ * initialized. Returns 0 if API versions match, -EIO if they do not,
+ * IAVF_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors
+ * from the firmware are propagated.
+ **/
+int iavf_verify_api_ver(struct iavf_adapter *adapter)
+{
+	struct iavf_arq_event_info event;
+	int err;
+
+	event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
+	event.msg_buf = kzalloc(IAVF_MAX_AQ_BUF_SIZE, GFP_KERNEL);
+	if (!event.msg_buf)
+		return -ENOMEM;
+
+	err = iavf_poll_virtchnl_msg(&adapter->hw, &event, VIRTCHNL_OP_VERSION);
+	if (!err) {
+		struct virtchnl_version_info *pf_vvi =
+			(struct virtchnl_version_info *)event.msg_buf;
+		adapter->pf_version = *pf_vvi;
+
+		if (pf_vvi->major > VIRTCHNL_VERSION_MAJOR ||
+		    (pf_vvi->major == VIRTCHNL_VERSION_MAJOR &&
+		     pf_vvi->minor > VIRTCHNL_VERSION_MINOR))
+			err = -EIO;
+	}
+
+	kfree(event.msg_buf);
+
+	return err;
+}
+
+/**
+ * iavf_send_vf_config_msg
+ * @adapter: adapter structure
+ *
+ * Send VF configuration request admin queue message to the PF. The reply
+ * is not checked in this function. Returns 0 if the message was
+ * successfully sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not.
+ **/
+int iavf_send_vf_config_msg(struct iavf_adapter *adapter)
+{
+	u32 caps;
+
+	caps = VIRTCHNL_VF_OFFLOAD_L2 |
+	       VIRTCHNL_VF_OFFLOAD_RSS_PF |
+	       VIRTCHNL_VF_OFFLOAD_RSS_AQ |
+	       VIRTCHNL_VF_OFFLOAD_RSS_REG |
+	       VIRTCHNL_VF_OFFLOAD_VLAN |
+	       VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
+	       VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
+	       VIRTCHNL_VF_OFFLOAD_ENCAP |
+	       VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
+	       VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
+	       VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+	       VIRTCHNL_VF_OFFLOAD_ADQ |
+	       VIRTCHNL_VF_OFFLOAD_USO |
+	       VIRTCHNL_VF_OFFLOAD_FDIR_PF |
+	       VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF |
+	       VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+
+	adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_GET_CONFIG;
+	if (PF_IS_V11(adapter))
+		return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES,
+					(u8 *)&caps, sizeof(caps));
+	else
+		return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_VF_RESOURCES,
+					NULL, 0);
+}
+
+int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter)
+{
+	adapter->aq_required &= ~IAVF_FLAG_AQ_GET_OFFLOAD_VLAN_V2_CAPS;
+
+	if (!VLAN_V2_ALLOWED(adapter))
+		return -EOPNOTSUPP;
+
+	adapter->current_op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS;
+
+	return iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS,
+				NULL, 0);
+}
+
+/**
+ * iavf_validate_num_queues
+ * @adapter: adapter structure
+ *
+ * Validate that the number of queues the PF has sent in
+ * VIRTCHNL_OP_GET_VF_RESOURCES is not larger than the VF can handle.
+ **/
+static void iavf_validate_num_queues(struct iavf_adapter *adapter)
+{
+	if (adapter->vf_res->num_queue_pairs > IAVF_MAX_REQ_QUEUES) {
+		struct virtchnl_vsi_resource *vsi_res;
+		int i;
+
+		dev_info(&adapter->pdev->dev, "Received %d queues, but can only have a max of %d\n",
+			 adapter->vf_res->num_queue_pairs,
+			 IAVF_MAX_REQ_QUEUES);
+		dev_info(&adapter->pdev->dev, "Fixing by reducing queues to %d\n",
+			 IAVF_MAX_REQ_QUEUES);
+		adapter->vf_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES;
+		for (i = 0; i < adapter->vf_res->num_vsis; i++) {
+			vsi_res = &adapter->vf_res->vsi_res[i];
+			vsi_res->num_queue_pairs = IAVF_MAX_REQ_QUEUES;
+		}
+	}
+}
+
+/**
+ * iavf_get_vf_config
+ * @adapter: private adapter structure
+ *
+ * Get VF configuration from PF and populate hw structure. Must be called after
+ * admin queue is initialized. Busy waits until response is received from PF,
+ * with maximum timeout. Response from PF is returned in the buffer for further
+ * processing by the caller.
+ **/
+int iavf_get_vf_config(struct iavf_adapter *adapter)
+{
+	struct iavf_hw *hw = &adapter->hw;
+	struct iavf_arq_event_info event;
+	u16 len;
+	int err;
+
+	len = IAVF_VIRTCHNL_VF_RESOURCE_SIZE;
+	event.buf_len = len;
+	event.msg_buf = kzalloc(len, GFP_KERNEL);
+	if (!event.msg_buf)
+		return -ENOMEM;
+
+	err = iavf_poll_virtchnl_msg(hw, &event, VIRTCHNL_OP_GET_VF_RESOURCES);
+	memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len));
+
+	/* some PFs send more queues than we should have so validate that
+	 * we aren't getting too many queues
+	 */
+	if (!err)
+		iavf_validate_num_queues(adapter);
+	iavf_vf_parse_hw_config(hw, adapter->vf_res);
+
+	kfree(event.msg_buf);
+
+	return err;
+}
+
+int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter)
+{
+	struct iavf_arq_event_info event;
+	int err;
+	u16 len;
+
+	len = sizeof(struct virtchnl_vlan_caps);
+	event.buf_len = len;
+	event.msg_buf = kzalloc(len, GFP_KERNEL);
+	if (!event.msg_buf)
+		return -ENOMEM;
+
+	err = iavf_poll_virtchnl_msg(&adapter->hw, &event,
+				     VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS);
+	if (!err)
+		memcpy(&adapter->vlan_v2_caps, event.msg_buf,
+		       min(event.msg_len, len));
+
+	kfree(event.msg_buf);
+
+	return err;
+}
+
+/**
+ * iavf_configure_queues
+ * @adapter: adapter structure
+ *
+ * Request that the PF set up our (previously allocated) queues.
+ **/
+void iavf_configure_queues(struct iavf_adapter *adapter)
+{
+	struct virtchnl_vsi_queue_config_info *vqci;
+	int i, max_frame = adapter->vf_res->max_mtu;
+	int pairs = adapter->num_active_queues;
+	struct virtchnl_queue_pair_info *vqpi;
+	size_t len;
+
+	if (max_frame > IAVF_MAX_RXBUFFER || !max_frame)
+		max_frame = IAVF_MAX_RXBUFFER;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure queues, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES;
+	len = virtchnl_struct_size(vqci, qpair, pairs);
+	vqci = kzalloc(len, GFP_KERNEL);
+	if (!vqci)
+		return;
+
+	/* Limit maximum frame size when jumbo frames is not enabled */
+	if (!(adapter->flags & IAVF_FLAG_LEGACY_RX) &&
+	    (adapter->netdev->mtu <= ETH_DATA_LEN))
+		max_frame = IAVF_RXBUFFER_1536 - NET_IP_ALIGN;
+
+	vqci->vsi_id = adapter->vsi_res->vsi_id;
+	vqci->num_queue_pairs = pairs;
+	vqpi = vqci->qpair;
+	/* Size check is not needed here - HW max is 16 queue pairs, and we
+	 * can fit info for 31 of them into the AQ buffer before it overflows.
+	 */
+	for (i = 0; i < pairs; i++) {
+		vqpi->txq.vsi_id = vqci->vsi_id;
+		vqpi->txq.queue_id = i;
+		vqpi->txq.ring_len = adapter->tx_rings[i].count;
+		vqpi->txq.dma_ring_addr = adapter->tx_rings[i].dma;
+		vqpi->rxq.vsi_id = vqci->vsi_id;
+		vqpi->rxq.queue_id = i;
+		vqpi->rxq.ring_len = adapter->rx_rings[i].count;
+		vqpi->rxq.dma_ring_addr = adapter->rx_rings[i].dma;
+		vqpi->rxq.max_pkt_size = max_frame;
+		vqpi->rxq.databuffer_size =
+			ALIGN(adapter->rx_rings[i].rx_buf_len,
+			      BIT_ULL(IAVF_RXQ_CTX_DBUFF_SHIFT));
+		vqpi++;
+	}
+
+	adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_QUEUES;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+			 (u8 *)vqci, len);
+	kfree(vqci);
+}
+
+/**
+ * iavf_enable_queues
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable all of our queues.
+ **/
+void iavf_enable_queues(struct iavf_adapter *adapter)
+{
+	struct virtchnl_queue_select vqs;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot enable queues, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ENABLE_QUEUES;
+	vqs.vsi_id = adapter->vsi_res->vsi_id;
+	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
+	vqs.rx_queues = vqs.tx_queues;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_QUEUES;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_QUEUES,
+			 (u8 *)&vqs, sizeof(vqs));
+}
+
+/**
+ * iavf_disable_queues
+ * @adapter: adapter structure
+ *
+ * Request that the PF disable all of our queues.
+ **/
+void iavf_disable_queues(struct iavf_adapter *adapter)
+{
+	struct virtchnl_queue_select vqs;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot disable queues, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DISABLE_QUEUES;
+	vqs.vsi_id = adapter->vsi_res->vsi_id;
+	vqs.tx_queues = BIT(adapter->num_active_queues) - 1;
+	vqs.rx_queues = vqs.tx_queues;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_QUEUES;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_QUEUES,
+			 (u8 *)&vqs, sizeof(vqs));
+}
+
+/**
+ * iavf_map_queues
+ * @adapter: adapter structure
+ *
+ * Request that the PF map queues to interrupt vectors. Misc causes, including
+ * admin queue, are always mapped to vector 0.
+ **/
+void iavf_map_queues(struct iavf_adapter *adapter)
+{
+	struct virtchnl_irq_map_info *vimi;
+	struct virtchnl_vector_map *vecmap;
+	struct iavf_q_vector *q_vector;
+	int v_idx, q_vectors;
+	size_t len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot map queues to vectors, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_CONFIG_IRQ_MAP;
+
+	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+
+	len = virtchnl_struct_size(vimi, vecmap, adapter->num_msix_vectors);
+	vimi = kzalloc(len, GFP_KERNEL);
+	if (!vimi)
+		return;
+
+	vimi->num_vectors = adapter->num_msix_vectors;
+	/* Queue vectors first */
+	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
+		q_vector = &adapter->q_vectors[v_idx];
+		vecmap = &vimi->vecmap[v_idx];
+
+		vecmap->vsi_id = adapter->vsi_res->vsi_id;
+		vecmap->vector_id = v_idx + NONQ_VECS;
+		vecmap->txq_map = q_vector->ring_mask;
+		vecmap->rxq_map = q_vector->ring_mask;
+		vecmap->rxitr_idx = IAVF_RX_ITR;
+		vecmap->txitr_idx = IAVF_TX_ITR;
+	}
+	/* Misc vector last - this is only for AdminQ messages */
+	vecmap = &vimi->vecmap[v_idx];
+	vecmap->vsi_id = adapter->vsi_res->vsi_id;
+	vecmap->vector_id = 0;
+	vecmap->txq_map = 0;
+	vecmap->rxq_map = 0;
+
+	adapter->aq_required &= ~IAVF_FLAG_AQ_MAP_VECTORS;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,
+			 (u8 *)vimi, len);
+	kfree(vimi);
+}
+
+/**
+ * iavf_set_mac_addr_type - Set the correct request type from the filter type
+ * @virtchnl_ether_addr: pointer to requested list element
+ * @filter: pointer to requested filter
+ **/
+static void
+iavf_set_mac_addr_type(struct virtchnl_ether_addr *virtchnl_ether_addr,
+		       const struct iavf_mac_filter *filter)
+{
+	virtchnl_ether_addr->type = filter->is_primary ?
+		VIRTCHNL_ETHER_ADDR_PRIMARY :
+		VIRTCHNL_ETHER_ADDR_EXTRA;
+}
+
+/**
+ * iavf_add_ether_addrs
+ * @adapter: adapter structure
+ *
+ * Request that the PF add one or more addresses to our filters.
+ **/
+void iavf_add_ether_addrs(struct iavf_adapter *adapter)
+{
+	struct virtchnl_ether_addr_list *veal;
+	struct iavf_mac_filter *f;
+	int i = 0, count = 0;
+	bool more = false;
+	size_t len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add filters, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		if (f->add)
+			count++;
+	}
+	if (!count) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER;
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR;
+
+	len = virtchnl_struct_size(veal, list, count);
+	if (len > IAVF_MAX_AQ_BUF_SIZE) {
+		dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n");
+		while (len > IAVF_MAX_AQ_BUF_SIZE)
+			len = virtchnl_struct_size(veal, list, --count);
+		more = true;
+	}
+
+	veal = kzalloc(len, GFP_ATOMIC);
+	if (!veal) {
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		return;
+	}
+
+	veal->vsi_id = adapter->vsi_res->vsi_id;
+	veal->num_elements = count;
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		if (f->add) {
+			ether_addr_copy(veal->list[i].addr, f->macaddr);
+			iavf_set_mac_addr_type(&veal->list[i], f);
+			i++;
+			f->add = false;
+			if (i == count)
+				break;
+		}
+	}
+	if (!more)
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_MAC_FILTER;
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_ETH_ADDR, (u8 *)veal, len);
+	kfree(veal);
+}
+
+/**
+ * iavf_del_ether_addrs
+ * @adapter: adapter structure
+ *
+ * Request that the PF remove one or more addresses from our filters.
+ **/
+void iavf_del_ether_addrs(struct iavf_adapter *adapter)
+{
+	struct virtchnl_ether_addr_list *veal;
+	struct iavf_mac_filter *f, *ftmp;
+	int i = 0, count = 0;
+	bool more = false;
+	size_t len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove filters, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	list_for_each_entry(f, &adapter->mac_filter_list, list) {
+		if (f->remove)
+			count++;
+	}
+	if (!count) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER;
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR;
+
+	len = virtchnl_struct_size(veal, list, count);
+	if (len > IAVF_MAX_AQ_BUF_SIZE) {
+		dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n");
+		while (len > IAVF_MAX_AQ_BUF_SIZE)
+			len = virtchnl_struct_size(veal, list, --count);
+		more = true;
+	}
+	veal = kzalloc(len, GFP_ATOMIC);
+	if (!veal) {
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		return;
+	}
+
+	veal->vsi_id = adapter->vsi_res->vsi_id;
+	veal->num_elements = count;
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		if (f->remove) {
+			ether_addr_copy(veal->list[i].addr, f->macaddr);
+			iavf_set_mac_addr_type(&veal->list[i], f);
+			i++;
+			list_del(&f->list);
+			kfree(f);
+			if (i == count)
+				break;
+		}
+	}
+	if (!more)
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_MAC_FILTER;
+
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_ETH_ADDR, (u8 *)veal, len);
+	kfree(veal);
+}
+
+/**
+ * iavf_mac_add_ok
+ * @adapter: adapter structure
+ *
+ * Submit list of filters based on PF response.
+ **/
+static void iavf_mac_add_ok(struct iavf_adapter *adapter)
+{
+	struct iavf_mac_filter *f, *ftmp;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		f->is_new_mac = false;
+		if (!f->add && !f->add_handled)
+			f->add_handled = true;
+	}
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
+ * iavf_mac_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove filters from list based on PF response.
+ **/
+static void iavf_mac_add_reject(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct iavf_mac_filter *f, *ftmp;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr))
+			f->remove = false;
+
+		if (!f->add && !f->add_handled)
+			f->add_handled = true;
+
+		if (f->is_new_mac) {
+			list_del(&f->list);
+			kfree(f);
+		}
+	}
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
+ * iavf_vlan_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove VLAN filters from list based on PF response.
+ **/
+static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
+{
+	struct iavf_vlan_filter *f, *ftmp;
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+	list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+		if (f->state == IAVF_VLAN_IS_NEW) {
+			list_del(&f->list);
+			kfree(f);
+			adapter->num_vlan_filters--;
+		}
+	}
+	spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
+/**
+ * iavf_add_vlans
+ * @adapter: adapter structure
+ *
+ * Request that the PF add one or more VLAN filters to our VSI.
+ **/
+void iavf_add_vlans(struct iavf_adapter *adapter)
+{
+	int len, i = 0, count = 0;
+	struct iavf_vlan_filter *f;
+	bool more = false;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add VLANs, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+		if (f->state == IAVF_VLAN_ADD)
+			count++;
+	}
+	if (!count || !VLAN_FILTERING_ALLOWED(adapter)) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		return;
+	}
+
+	if (VLAN_ALLOWED(adapter)) {
+		struct virtchnl_vlan_filter_list *vvfl;
+
+		adapter->current_op = VIRTCHNL_OP_ADD_VLAN;
+
+		len = virtchnl_struct_size(vvfl, vlan_id, count);
+		if (len > IAVF_MAX_AQ_BUF_SIZE) {
+			dev_warn(&adapter->pdev->dev, "Too many add VLAN changes in one request\n");
+			while (len > IAVF_MAX_AQ_BUF_SIZE)
+				len = virtchnl_struct_size(vvfl, vlan_id,
+							   --count);
+			more = true;
+		}
+		vvfl = kzalloc(len, GFP_ATOMIC);
+		if (!vvfl) {
+			spin_unlock_bh(&adapter->mac_vlan_list_lock);
+			return;
+		}
+
+		vvfl->vsi_id = adapter->vsi_res->vsi_id;
+		vvfl->num_elements = count;
+		list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+			if (f->state == IAVF_VLAN_ADD) {
+				vvfl->vlan_id[i] = f->vlan.vid;
+				i++;
+				f->state = IAVF_VLAN_IS_NEW;
+				if (i == count)
+					break;
+			}
+		}
+		if (!more)
+			adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
+		kfree(vvfl);
+	} else {
+		u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters;
+		u16 current_vlans = iavf_get_num_vlans_added(adapter);
+		struct virtchnl_vlan_filter_list_v2 *vvfl_v2;
+
+		adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2;
+
+		if ((count + current_vlans) > max_vlans &&
+		    current_vlans < max_vlans) {
+			count = max_vlans - iavf_get_num_vlans_added(adapter);
+			more = true;
+		}
+
+		len = virtchnl_struct_size(vvfl_v2, filters, count);
+		if (len > IAVF_MAX_AQ_BUF_SIZE) {
+			dev_warn(&adapter->pdev->dev, "Too many add VLAN changes in one request\n");
+			while (len > IAVF_MAX_AQ_BUF_SIZE)
+				len = virtchnl_struct_size(vvfl_v2, filters,
+							   --count);
+			more = true;
+		}
+
+		vvfl_v2 = kzalloc(len, GFP_ATOMIC);
+		if (!vvfl_v2) {
+			spin_unlock_bh(&adapter->mac_vlan_list_lock);
+			return;
+		}
+
+		vvfl_v2->vport_id = adapter->vsi_res->vsi_id;
+		vvfl_v2->num_elements = count;
+		list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+			if (f->state == IAVF_VLAN_ADD) {
+				struct virtchnl_vlan_supported_caps *filtering_support =
+					&adapter->vlan_v2_caps.filtering.filtering_support;
+				struct virtchnl_vlan *vlan;
+
+				if (i == count)
+					break;
+
+				/* give priority over outer if it's enabled */
+				if (filtering_support->outer)
+					vlan = &vvfl_v2->filters[i].outer;
+				else
+					vlan = &vvfl_v2->filters[i].inner;
+
+				vlan->tci = f->vlan.vid;
+				vlan->tpid = f->vlan.tpid;
+
+				i++;
+				f->state = IAVF_VLAN_IS_NEW;
+			}
+		}
+
+		if (!more)
+			adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN_V2,
+				 (u8 *)vvfl_v2, len);
+		kfree(vvfl_v2);
+	}
+}
+
+/**
+ * iavf_del_vlans
+ * @adapter: adapter structure
+ *
+ * Request that the PF remove one or more VLAN filters from our VSI.
+ **/
+void iavf_del_vlans(struct iavf_adapter *adapter)
+{
+	struct iavf_vlan_filter *f, *ftmp;
+	int len, i = 0, count = 0;
+	bool more = false;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove VLANs, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+	list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+		/* since VLAN capabilities are not allowed, we dont want to send
+		 * a VLAN delete request because it will most likely fail and
+		 * create unnecessary errors/noise, so just free the VLAN
+		 * filters marked for removal to enable bailing out before
+		 * sending a virtchnl message
+		 */
+		if (f->state == IAVF_VLAN_REMOVE &&
+		    !VLAN_FILTERING_ALLOWED(adapter)) {
+			list_del(&f->list);
+			kfree(f);
+			adapter->num_vlan_filters--;
+		} else if (f->state == IAVF_VLAN_DISABLE &&
+		    !VLAN_FILTERING_ALLOWED(adapter)) {
+			f->state = IAVF_VLAN_INACTIVE;
+		} else if (f->state == IAVF_VLAN_REMOVE ||
+			   f->state == IAVF_VLAN_DISABLE) {
+			count++;
+		}
+	}
+	if (!count || !VLAN_FILTERING_ALLOWED(adapter)) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		return;
+	}
+
+	if (VLAN_ALLOWED(adapter)) {
+		struct virtchnl_vlan_filter_list *vvfl;
+
+		adapter->current_op = VIRTCHNL_OP_DEL_VLAN;
+
+		len = virtchnl_struct_size(vvfl, vlan_id, count);
+		if (len > IAVF_MAX_AQ_BUF_SIZE) {
+			dev_warn(&adapter->pdev->dev, "Too many delete VLAN changes in one request\n");
+			while (len > IAVF_MAX_AQ_BUF_SIZE)
+				len = virtchnl_struct_size(vvfl, vlan_id,
+							   --count);
+			more = true;
+		}
+		vvfl = kzalloc(len, GFP_ATOMIC);
+		if (!vvfl) {
+			spin_unlock_bh(&adapter->mac_vlan_list_lock);
+			return;
+		}
+
+		vvfl->vsi_id = adapter->vsi_res->vsi_id;
+		vvfl->num_elements = count;
+		list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+			if (f->state == IAVF_VLAN_DISABLE) {
+				vvfl->vlan_id[i] = f->vlan.vid;
+				f->state = IAVF_VLAN_INACTIVE;
+				i++;
+				if (i == count)
+					break;
+			} else if (f->state == IAVF_VLAN_REMOVE) {
+				vvfl->vlan_id[i] = f->vlan.vid;
+				list_del(&f->list);
+				kfree(f);
+				adapter->num_vlan_filters--;
+				i++;
+				if (i == count)
+					break;
+			}
+		}
+
+		if (!more)
+			adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_VLAN, (u8 *)vvfl, len);
+		kfree(vvfl);
+	} else {
+		struct virtchnl_vlan_filter_list_v2 *vvfl_v2;
+
+		adapter->current_op = VIRTCHNL_OP_DEL_VLAN_V2;
+
+		len = virtchnl_struct_size(vvfl_v2, filters, count);
+		if (len > IAVF_MAX_AQ_BUF_SIZE) {
+			dev_warn(&adapter->pdev->dev, "Too many add VLAN changes in one request\n");
+			while (len > IAVF_MAX_AQ_BUF_SIZE)
+				len = virtchnl_struct_size(vvfl_v2, filters,
+							   --count);
+			more = true;
+		}
+
+		vvfl_v2 = kzalloc(len, GFP_ATOMIC);
+		if (!vvfl_v2) {
+			spin_unlock_bh(&adapter->mac_vlan_list_lock);
+			return;
+		}
+
+		vvfl_v2->vport_id = adapter->vsi_res->vsi_id;
+		vvfl_v2->num_elements = count;
+		list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+			if (f->state == IAVF_VLAN_DISABLE ||
+			    f->state == IAVF_VLAN_REMOVE) {
+				struct virtchnl_vlan_supported_caps *filtering_support =
+					&adapter->vlan_v2_caps.filtering.filtering_support;
+				struct virtchnl_vlan *vlan;
+
+				/* give priority over outer if it's enabled */
+				if (filtering_support->outer)
+					vlan = &vvfl_v2->filters[i].outer;
+				else
+					vlan = &vvfl_v2->filters[i].inner;
+
+				vlan->tci = f->vlan.vid;
+				vlan->tpid = f->vlan.tpid;
+
+				if (f->state == IAVF_VLAN_DISABLE) {
+					f->state = IAVF_VLAN_INACTIVE;
+				} else {
+					list_del(&f->list);
+					kfree(f);
+					adapter->num_vlan_filters--;
+				}
+				i++;
+				if (i == count)
+					break;
+			}
+		}
+
+		if (!more)
+			adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
+
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_VLAN_V2,
+				 (u8 *)vvfl_v2, len);
+		kfree(vvfl_v2);
+	}
+}
+
+/**
+ * iavf_set_promiscuous
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable promiscuous mode for our VSI.
+ **/
+void iavf_set_promiscuous(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct virtchnl_promisc_info vpi;
+	unsigned int flags;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot set promiscuous mode, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	/* prevent changes to promiscuous flags */
+	spin_lock_bh(&adapter->current_netdev_promisc_flags_lock);
+
+	/* sanity check to prevent duplicate AQ calls */
+	if (!iavf_promiscuous_mode_changed(adapter)) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+		dev_dbg(&adapter->pdev->dev, "No change in promiscuous mode\n");
+		/* allow changes to promiscuous flags */
+		spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+		return;
+	}
+
+	/* there are 2 bits, but only 3 states */
+	if (!(netdev->flags & IFF_PROMISC) &&
+	    netdev->flags & IFF_ALLMULTI) {
+		/* State 1  - only multicast promiscuous mode enabled
+		 * - !IFF_PROMISC && IFF_ALLMULTI
+		 */
+		flags = FLAG_VF_MULTICAST_PROMISC;
+		adapter->current_netdev_promisc_flags |= IFF_ALLMULTI;
+		adapter->current_netdev_promisc_flags &= ~IFF_PROMISC;
+		dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n");
+	} else if (!(netdev->flags & IFF_PROMISC) &&
+		   !(netdev->flags & IFF_ALLMULTI)) {
+		/* State 2 - unicast/multicast promiscuous mode disabled
+		 * - !IFF_PROMISC && !IFF_ALLMULTI
+		 */
+		flags = 0;
+		adapter->current_netdev_promisc_flags &=
+			~(IFF_PROMISC | IFF_ALLMULTI);
+		dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n");
+	} else {
+		/* State 3 - unicast/multicast promiscuous mode enabled
+		 * - IFF_PROMISC && IFF_ALLMULTI
+		 * - IFF_PROMISC && !IFF_ALLMULTI
+		 */
+		flags = FLAG_VF_UNICAST_PROMISC | FLAG_VF_MULTICAST_PROMISC;
+		adapter->current_netdev_promisc_flags |= IFF_PROMISC;
+		if (netdev->flags & IFF_ALLMULTI)
+			adapter->current_netdev_promisc_flags |= IFF_ALLMULTI;
+		else
+			adapter->current_netdev_promisc_flags &= ~IFF_ALLMULTI;
+
+		dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n");
+	}
+
+	adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
+
+	/* allow changes to promiscuous flags */
+	spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+
+	adapter->current_op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE;
+	vpi.vsi_id = adapter->vsi_res->vsi_id;
+	vpi.flags = flags;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+			 (u8 *)&vpi, sizeof(vpi));
+}
+
+/**
+ * iavf_request_stats
+ * @adapter: adapter structure
+ *
+ * Request VSI statistics from PF.
+ **/
+void iavf_request_stats(struct iavf_adapter *adapter)
+{
+	struct virtchnl_queue_select vqs;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* no error message, this isn't crucial */
+		return;
+	}
+
+	adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_STATS;
+	adapter->current_op = VIRTCHNL_OP_GET_STATS;
+	vqs.vsi_id = adapter->vsi_res->vsi_id;
+	/* queue maps are ignored for this message - only the vsi is used */
+	if (iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_STATS, (u8 *)&vqs,
+			     sizeof(vqs)))
+		/* if the request failed, don't lock out others */
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+}
+
+/**
+ * iavf_get_hena
+ * @adapter: adapter structure
+ *
+ * Request hash enable capabilities from PF
+ **/
+void iavf_get_hena(struct iavf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot get RSS hash capabilities, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_GET_RSS_HENA_CAPS;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_GET_HENA;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_GET_RSS_HENA_CAPS, NULL, 0);
+}
+
+/**
+ * iavf_set_hena
+ * @adapter: adapter structure
+ *
+ * Request the PF to set our RSS hash capabilities
+ **/
+void iavf_set_hena(struct iavf_adapter *adapter)
+{
+	struct virtchnl_rss_hena vrh;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot set RSS hash enable, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	vrh.hena = adapter->hena;
+	adapter->current_op = VIRTCHNL_OP_SET_RSS_HENA;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_SET_HENA;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_SET_RSS_HENA, (u8 *)&vrh,
+			 sizeof(vrh));
+}
+
+/**
+ * iavf_set_rss_key
+ * @adapter: adapter structure
+ *
+ * Request the PF to set our RSS hash key
+ **/
+void iavf_set_rss_key(struct iavf_adapter *adapter)
+{
+	struct virtchnl_rss_key *vrk;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot set RSS key, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	len = virtchnl_struct_size(vrk, key, adapter->rss_key_size);
+	vrk = kzalloc(len, GFP_KERNEL);
+	if (!vrk)
+		return;
+	vrk->vsi_id = adapter->vsi.id;
+	vrk->key_len = adapter->rss_key_size;
+	memcpy(vrk->key, adapter->rss_key, adapter->rss_key_size);
+
+	adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_KEY;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_KEY;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_KEY, (u8 *)vrk, len);
+	kfree(vrk);
+}
+
+/**
+ * iavf_set_rss_lut
+ * @adapter: adapter structure
+ *
+ * Request the PF to set our RSS lookup table
+ **/
+void iavf_set_rss_lut(struct iavf_adapter *adapter)
+{
+	struct virtchnl_rss_lut *vrl;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot set RSS LUT, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	len = virtchnl_struct_size(vrl, lut, adapter->rss_lut_size);
+	vrl = kzalloc(len, GFP_KERNEL);
+	if (!vrl)
+		return;
+	vrl->vsi_id = adapter->vsi.id;
+	vrl->lut_entries = adapter->rss_lut_size;
+	memcpy(vrl->lut, adapter->rss_lut, adapter->rss_lut_size);
+	adapter->current_op = VIRTCHNL_OP_CONFIG_RSS_LUT;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_SET_RSS_LUT;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_RSS_LUT, (u8 *)vrl, len);
+	kfree(vrl);
+}
+
+/**
+ * iavf_enable_vlan_stripping
+ * @adapter: adapter structure
+ *
+ * Request VLAN header stripping to be enabled
+ **/
+void iavf_enable_vlan_stripping(struct iavf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot enable stripping, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ENABLE_VLAN_STRIPPING;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, NULL, 0);
+}
+
+/**
+ * iavf_disable_vlan_stripping
+ * @adapter: adapter structure
+ *
+ * Request VLAN header stripping to be disabled
+ **/
+void iavf_disable_vlan_stripping(struct iavf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot disable stripping, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, NULL, 0);
+}
+
+/**
+ * iavf_tpid_to_vc_ethertype - transform from VLAN TPID to virtchnl ethertype
+ * @tpid: VLAN TPID (i.e. 0x8100, 0x88a8, etc.)
+ */
+static u32 iavf_tpid_to_vc_ethertype(u16 tpid)
+{
+	switch (tpid) {
+	case ETH_P_8021Q:
+		return VIRTCHNL_VLAN_ETHERTYPE_8100;
+	case ETH_P_8021AD:
+		return VIRTCHNL_VLAN_ETHERTYPE_88A8;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_set_vc_offload_ethertype - set virtchnl ethertype for offload message
+ * @adapter: adapter structure
+ * @msg: message structure used for updating offloads over virtchnl to update
+ * @tpid: VLAN TPID (i.e. 0x8100, 0x88a8, etc.)
+ * @offload_op: opcode used to determine which support structure to check
+ */
+static int
+iavf_set_vc_offload_ethertype(struct iavf_adapter *adapter,
+			      struct virtchnl_vlan_setting *msg, u16 tpid,
+			      enum virtchnl_ops offload_op)
+{
+	struct virtchnl_vlan_supported_caps *offload_support;
+	u16 vc_ethertype = iavf_tpid_to_vc_ethertype(tpid);
+
+	/* reference the correct offload support structure */
+	switch (offload_op) {
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2:
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+		offload_support =
+			&adapter->vlan_v2_caps.offloads.stripping_support;
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2:
+	case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2:
+		offload_support =
+			&adapter->vlan_v2_caps.offloads.insertion_support;
+		break;
+	default:
+		dev_err(&adapter->pdev->dev, "Invalid opcode %d for setting virtchnl ethertype to enable/disable VLAN offloads\n",
+			offload_op);
+		return -EINVAL;
+	}
+
+	/* make sure ethertype is supported */
+	if (offload_support->outer & vc_ethertype &&
+	    offload_support->outer & VIRTCHNL_VLAN_TOGGLE) {
+		msg->outer_ethertype_setting = vc_ethertype;
+	} else if (offload_support->inner & vc_ethertype &&
+		   offload_support->inner & VIRTCHNL_VLAN_TOGGLE) {
+		msg->inner_ethertype_setting = vc_ethertype;
+	} else {
+		dev_dbg(&adapter->pdev->dev, "opcode %d unsupported for VLAN TPID 0x%04x\n",
+			offload_op, tpid);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * iavf_clear_offload_v2_aq_required - clear AQ required bit for offload request
+ * @adapter: adapter structure
+ * @tpid: VLAN TPID
+ * @offload_op: opcode used to determine which AQ required bit to clear
+ */
+static void
+iavf_clear_offload_v2_aq_required(struct iavf_adapter *adapter, u16 tpid,
+				  enum virtchnl_ops offload_op)
+{
+	switch (offload_op) {
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2:
+		if (tpid == ETH_P_8021Q)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_STRIPPING;
+		else if (tpid == ETH_P_8021AD)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_ENABLE_STAG_VLAN_STRIPPING;
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+		if (tpid == ETH_P_8021Q)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_STRIPPING;
+		else if (tpid == ETH_P_8021AD)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_DISABLE_STAG_VLAN_STRIPPING;
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2:
+		if (tpid == ETH_P_8021Q)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_INSERTION;
+		else if (tpid == ETH_P_8021AD)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_ENABLE_STAG_VLAN_INSERTION;
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2:
+		if (tpid == ETH_P_8021Q)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_INSERTION;
+		else if (tpid == ETH_P_8021AD)
+			adapter->aq_required &=
+				~IAVF_FLAG_AQ_DISABLE_STAG_VLAN_INSERTION;
+		break;
+	default:
+		dev_err(&adapter->pdev->dev, "Unsupported opcode %d specified for clearing aq_required bits for VIRTCHNL_VF_OFFLOAD_VLAN_V2 offload request\n",
+			offload_op);
+	}
+}
+
+/**
+ * iavf_send_vlan_offload_v2 - send offload enable/disable over virtchnl
+ * @adapter: adapter structure
+ * @tpid: VLAN TPID used for the command (i.e. 0x8100 or 0x88a8)
+ * @offload_op: offload_op used to make the request over virtchnl
+ */
+static void
+iavf_send_vlan_offload_v2(struct iavf_adapter *adapter, u16 tpid,
+			  enum virtchnl_ops offload_op)
+{
+	struct virtchnl_vlan_setting *msg;
+	int len = sizeof(*msg);
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot send %d, command %d pending\n",
+			offload_op, adapter->current_op);
+		return;
+	}
+
+	adapter->current_op = offload_op;
+
+	msg = kzalloc(len, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	msg->vport_id = adapter->vsi_res->vsi_id;
+
+	/* always clear to prevent unsupported and endless requests */
+	iavf_clear_offload_v2_aq_required(adapter, tpid, offload_op);
+
+	/* only send valid offload requests */
+	if (!iavf_set_vc_offload_ethertype(adapter, msg, tpid, offload_op))
+		iavf_send_pf_msg(adapter, offload_op, (u8 *)msg, len);
+	else
+		adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+
+	kfree(msg);
+}
+
+/**
+ * iavf_enable_vlan_stripping_v2 - enable VLAN stripping
+ * @adapter: adapter structure
+ * @tpid: VLAN TPID used to enable VLAN stripping
+ */
+void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid)
+{
+	iavf_send_vlan_offload_v2(adapter, tpid,
+				  VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2);
+}
+
+/**
+ * iavf_disable_vlan_stripping_v2 - disable VLAN stripping
+ * @adapter: adapter structure
+ * @tpid: VLAN TPID used to disable VLAN stripping
+ */
+void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid)
+{
+	iavf_send_vlan_offload_v2(adapter, tpid,
+				  VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2);
+}
+
+/**
+ * iavf_enable_vlan_insertion_v2 - enable VLAN insertion
+ * @adapter: adapter structure
+ * @tpid: VLAN TPID used to enable VLAN insertion
+ */
+void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid)
+{
+	iavf_send_vlan_offload_v2(adapter, tpid,
+				  VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2);
+}
+
+/**
+ * iavf_disable_vlan_insertion_v2 - disable VLAN insertion
+ * @adapter: adapter structure
+ * @tpid: VLAN TPID used to disable VLAN insertion
+ */
+void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid)
+{
+	iavf_send_vlan_offload_v2(adapter, tpid,
+				  VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2);
+}
+
+#define IAVF_MAX_SPEED_STRLEN	13
+
+/**
+ * iavf_print_link_message - print link up or down
+ * @adapter: adapter structure
+ *
+ * Log a message telling the world of our wonderous link status
+ */
+static void iavf_print_link_message(struct iavf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int link_speed_mbps;
+	char *speed;
+
+	if (!adapter->link_up) {
+		netdev_info(netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	speed = kzalloc(IAVF_MAX_SPEED_STRLEN, GFP_KERNEL);
+	if (!speed)
+		return;
+
+	if (ADV_LINK_SUPPORT(adapter)) {
+		link_speed_mbps = adapter->link_speed_mbps;
+		goto print_link_msg;
+	}
+
+	switch (adapter->link_speed) {
+	case VIRTCHNL_LINK_SPEED_40GB:
+		link_speed_mbps = SPEED_40000;
+		break;
+	case VIRTCHNL_LINK_SPEED_25GB:
+		link_speed_mbps = SPEED_25000;
+		break;
+	case VIRTCHNL_LINK_SPEED_20GB:
+		link_speed_mbps = SPEED_20000;
+		break;
+	case VIRTCHNL_LINK_SPEED_10GB:
+		link_speed_mbps = SPEED_10000;
+		break;
+	case VIRTCHNL_LINK_SPEED_5GB:
+		link_speed_mbps = SPEED_5000;
+		break;
+	case VIRTCHNL_LINK_SPEED_2_5GB:
+		link_speed_mbps = SPEED_2500;
+		break;
+	case VIRTCHNL_LINK_SPEED_1GB:
+		link_speed_mbps = SPEED_1000;
+		break;
+	case VIRTCHNL_LINK_SPEED_100MB:
+		link_speed_mbps = SPEED_100;
+		break;
+	default:
+		link_speed_mbps = SPEED_UNKNOWN;
+		break;
+	}
+
+print_link_msg:
+	if (link_speed_mbps > SPEED_1000) {
+		if (link_speed_mbps == SPEED_2500)
+			snprintf(speed, IAVF_MAX_SPEED_STRLEN, "2.5 Gbps");
+		else
+			/* convert to Gbps inline */
+			snprintf(speed, IAVF_MAX_SPEED_STRLEN, "%d %s",
+				 link_speed_mbps / 1000, "Gbps");
+	} else if (link_speed_mbps == SPEED_UNKNOWN) {
+		snprintf(speed, IAVF_MAX_SPEED_STRLEN, "%s", "Unknown Mbps");
+	} else {
+		snprintf(speed, IAVF_MAX_SPEED_STRLEN, "%d %s",
+			 link_speed_mbps, "Mbps");
+	}
+
+	netdev_info(netdev, "NIC Link is Up Speed is %s Full Duplex\n", speed);
+	kfree(speed);
+}
+
+/**
+ * iavf_get_vpe_link_status
+ * @adapter: adapter structure
+ * @vpe: virtchnl_pf_event structure
+ *
+ * Helper function for determining the link status
+ **/
+static bool
+iavf_get_vpe_link_status(struct iavf_adapter *adapter,
+			 struct virtchnl_pf_event *vpe)
+{
+	if (ADV_LINK_SUPPORT(adapter))
+		return vpe->event_data.link_event_adv.link_status;
+	else
+		return vpe->event_data.link_event.link_status;
+}
+
+/**
+ * iavf_set_adapter_link_speed_from_vpe
+ * @adapter: adapter structure for which we are setting the link speed
+ * @vpe: virtchnl_pf_event structure that contains the link speed we are setting
+ *
+ * Helper function for setting iavf_adapter link speed
+ **/
+static void
+iavf_set_adapter_link_speed_from_vpe(struct iavf_adapter *adapter,
+				     struct virtchnl_pf_event *vpe)
+{
+	if (ADV_LINK_SUPPORT(adapter))
+		adapter->link_speed_mbps =
+			vpe->event_data.link_event_adv.link_speed;
+	else
+		adapter->link_speed = vpe->event_data.link_event.link_speed;
+}
+
+/**
+ * iavf_enable_channels
+ * @adapter: adapter structure
+ *
+ * Request that the PF enable channels as specified by
+ * the user via tc tool.
+ **/
+void iavf_enable_channels(struct iavf_adapter *adapter)
+{
+	struct virtchnl_tc_info *vti = NULL;
+	size_t len;
+	int i;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = virtchnl_struct_size(vti, list, adapter->num_tc);
+	vti = kzalloc(len, GFP_KERNEL);
+	if (!vti)
+		return;
+	vti->num_tc = adapter->num_tc;
+	for (i = 0; i < vti->num_tc; i++) {
+		vti->list[i].count = adapter->ch_config.ch_info[i].count;
+		vti->list[i].offset = adapter->ch_config.ch_info[i].offset;
+		vti->list[i].pad = 0;
+		vti->list[i].max_tx_rate =
+				adapter->ch_config.ch_info[i].max_tx_rate;
+	}
+
+	adapter->ch_config.state = __IAVF_TC_RUNNING;
+	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_ENABLE_CHANNELS;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_ENABLE_CHANNELS;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ENABLE_CHANNELS, (u8 *)vti, len);
+	kfree(vti);
+}
+
+/**
+ * iavf_disable_channels
+ * @adapter: adapter structure
+ *
+ * Request that the PF disable channels that are configured
+ **/
+void iavf_disable_channels(struct iavf_adapter *adapter)
+{
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot configure mqprio, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	adapter->ch_config.state = __IAVF_TC_INVALID;
+	adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+	adapter->current_op = VIRTCHNL_OP_DISABLE_CHANNELS;
+	adapter->aq_required &= ~IAVF_FLAG_AQ_DISABLE_CHANNELS;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DISABLE_CHANNELS, NULL, 0);
+}
+
+/**
+ * iavf_print_cloud_filter
+ * @adapter: adapter structure
+ * @f: cloud filter to print
+ *
+ * Print the cloud filter
+ **/
+static void iavf_print_cloud_filter(struct iavf_adapter *adapter,
+				    struct virtchnl_filter *f)
+{
+	switch (f->flow_type) {
+	case VIRTCHNL_TCP_V4_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n",
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip[0],
+			 &f->data.tcp_spec.src_ip[0],
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
+		break;
+	case VIRTCHNL_TCP_V6_FLOW:
+		dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n",
+			 &f->data.tcp_spec.dst_mac,
+			 &f->data.tcp_spec.src_mac,
+			 ntohs(f->data.tcp_spec.vlan_id),
+			 &f->data.tcp_spec.dst_ip,
+			 &f->data.tcp_spec.src_ip,
+			 ntohs(f->data.tcp_spec.dst_port),
+			 ntohs(f->data.tcp_spec.src_port));
+		break;
+	}
+}
+
+/**
+ * iavf_add_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF add cloud filters as specified
+ * by the user via tc tool.
+ **/
+void iavf_add_cloud_filter(struct iavf_adapter *adapter)
+{
+	struct iavf_cloud_filter *cf;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->add) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->add = false;
+			cf->state = __IAVF_CF_ADD_PENDING;
+			iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_CLOUD_FILTER,
+					 (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
+/**
+ * iavf_del_cloud_filter
+ * @adapter: adapter structure
+ *
+ * Request that the PF delete cloud filters as specified
+ * by the user via tc tool.
+ **/
+void iavf_del_cloud_filter(struct iavf_adapter *adapter)
+{
+	struct iavf_cloud_filter *cf, *cftmp;
+	struct virtchnl_filter *f;
+	int len = 0, count = 0;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove cloud filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+	list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			count++;
+			break;
+		}
+	}
+	if (!count) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_CLOUD_FILTER;
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_DEL_CLOUD_FILTER;
+
+	len = sizeof(struct virtchnl_filter);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list, list) {
+		if (cf->del) {
+			memcpy(f, &cf->f, sizeof(struct virtchnl_filter));
+			cf->del = false;
+			cf->state = __IAVF_CF_DEL_PENDING;
+			iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_CLOUD_FILTER,
+					 (u8 *)f, len);
+		}
+	}
+	kfree(f);
+}
+
+/**
+ * iavf_add_fdir_filter
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF add Flow Director filters as specified
+ * by the user via ethtool.
+ **/
+void iavf_add_fdir_filter(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *fdir;
+	struct virtchnl_fdir_add *f;
+	bool process_fltr = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add Flow Director filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_fdir_add);
+	f = kzalloc(len, GFP_KERNEL);
+	if (!f)
+		return;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		if (fdir->state == IAVF_FDIR_FLTR_ADD_REQUEST) {
+			process_fltr = true;
+			fdir->state = IAVF_FDIR_FLTR_ADD_PENDING;
+			memcpy(f, &fdir->vc_add_msg, len);
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (!process_fltr) {
+		/* prevent iavf_add_fdir_filter() from being called when there
+		 * are no filters to add
+		 */
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+		kfree(f);
+		return;
+	}
+	adapter->current_op = VIRTCHNL_OP_ADD_FDIR_FILTER;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_FDIR_FILTER, (u8 *)f, len);
+	kfree(f);
+}
+
+/**
+ * iavf_del_fdir_filter
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF delete Flow Director filters as specified
+ * by the user via ethtool.
+ **/
+void iavf_del_fdir_filter(struct iavf_adapter *adapter)
+{
+	struct virtchnl_fdir_del f = {};
+	struct iavf_fdir_fltr *fdir;
+	bool process_fltr = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove Flow Director filter, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_fdir_del);
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry(fdir, &adapter->fdir_list_head, list) {
+		if (fdir->state == IAVF_FDIR_FLTR_DEL_REQUEST) {
+			process_fltr = true;
+			f.vsi_id = fdir->vc_add_msg.vsi_id;
+			f.flow_id = fdir->flow_id;
+			fdir->state = IAVF_FDIR_FLTR_DEL_PENDING;
+			break;
+		} else if (fdir->state == IAVF_FDIR_FLTR_DIS_REQUEST) {
+			process_fltr = true;
+			f.vsi_id = fdir->vc_add_msg.vsi_id;
+			f.flow_id = fdir->flow_id;
+			fdir->state = IAVF_FDIR_FLTR_DIS_PENDING;
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (!process_fltr) {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_FDIR_FILTER;
+		return;
+	}
+
+	adapter->current_op = VIRTCHNL_OP_DEL_FDIR_FILTER;
+	iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_FDIR_FILTER, (u8 *)&f, len);
+}
+
+/**
+ * iavf_add_adv_rss_cfg
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF add RSS configuration as specified
+ * by the user via ethtool.
+ **/
+void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter)
+{
+	struct virtchnl_rss_cfg *rss_cfg;
+	struct iavf_adv_rss *rss;
+	bool process_rss = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot add RSS configuration, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_rss_cfg);
+	rss_cfg = kzalloc(len, GFP_KERNEL);
+	if (!rss_cfg)
+		return;
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry(rss, &adapter->adv_rss_list_head, list) {
+		if (rss->state == IAVF_ADV_RSS_ADD_REQUEST) {
+			process_rss = true;
+			rss->state = IAVF_ADV_RSS_ADD_PENDING;
+			memcpy(rss_cfg, &rss->cfg_msg, len);
+			iavf_print_adv_rss_cfg(adapter, rss,
+					       "Input set change for",
+					       "is pending");
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (process_rss) {
+		adapter->current_op = VIRTCHNL_OP_ADD_RSS_CFG;
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_RSS_CFG,
+				 (u8 *)rss_cfg, len);
+	} else {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_ADV_RSS_CFG;
+	}
+
+	kfree(rss_cfg);
+}
+
+/**
+ * iavf_del_adv_rss_cfg
+ * @adapter: the VF adapter structure
+ *
+ * Request that the PF delete RSS configuration as specified
+ * by the user via ethtool.
+ **/
+void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter)
+{
+	struct virtchnl_rss_cfg *rss_cfg;
+	struct iavf_adv_rss *rss;
+	bool process_rss = false;
+	int len;
+
+	if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+		/* bail because we already have a command pending */
+		dev_err(&adapter->pdev->dev, "Cannot remove RSS configuration, command %d pending\n",
+			adapter->current_op);
+		return;
+	}
+
+	len = sizeof(struct virtchnl_rss_cfg);
+	rss_cfg = kzalloc(len, GFP_KERNEL);
+	if (!rss_cfg)
+		return;
+
+	spin_lock_bh(&adapter->adv_rss_lock);
+	list_for_each_entry(rss, &adapter->adv_rss_list_head, list) {
+		if (rss->state == IAVF_ADV_RSS_DEL_REQUEST) {
+			process_rss = true;
+			rss->state = IAVF_ADV_RSS_DEL_PENDING;
+			memcpy(rss_cfg, &rss->cfg_msg, len);
+			break;
+		}
+	}
+	spin_unlock_bh(&adapter->adv_rss_lock);
+
+	if (process_rss) {
+		adapter->current_op = VIRTCHNL_OP_DEL_RSS_CFG;
+		iavf_send_pf_msg(adapter, VIRTCHNL_OP_DEL_RSS_CFG,
+				 (u8 *)rss_cfg, len);
+	} else {
+		adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_ADV_RSS_CFG;
+	}
+
+	kfree(rss_cfg);
+}
+
+/**
+ * iavf_request_reset
+ * @adapter: adapter structure
+ *
+ * Request that the PF reset this VF. No response is expected.
+ **/
+int iavf_request_reset(struct iavf_adapter *adapter)
+{
+	int err;
+	/* Don't check CURRENT_OP - this is always higher priority */
+	err = iavf_send_pf_msg(adapter, VIRTCHNL_OP_RESET_VF, NULL, 0);
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+	return err;
+}
+
+/**
+ * iavf_netdev_features_vlan_strip_set - update vlan strip status
+ * @netdev: ptr to netdev being adjusted
+ * @enable: enable or disable vlan strip
+ *
+ * Helper function to change vlan strip status in netdev->features.
+ */
+static void iavf_netdev_features_vlan_strip_set(struct net_device *netdev,
+						const bool enable)
+{
+	if (enable)
+		netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+	else
+		netdev->features &= ~NETIF_F_HW_VLAN_CTAG_RX;
+}
+
+/**
+ * iavf_activate_fdir_filters - Reactivate all FDIR filters after a reset
+ * @adapter: private adapter structure
+ *
+ * Called after a reset to re-add all FDIR filters and delete some of them
+ * if they were pending to be deleted.
+ */
+static void iavf_activate_fdir_filters(struct iavf_adapter *adapter)
+{
+	struct iavf_fdir_fltr *f, *ftmp;
+	bool add_filters = false;
+
+	spin_lock_bh(&adapter->fdir_fltr_lock);
+	list_for_each_entry_safe(f, ftmp, &adapter->fdir_list_head, list) {
+		if (f->state == IAVF_FDIR_FLTR_ADD_REQUEST ||
+		    f->state == IAVF_FDIR_FLTR_ADD_PENDING ||
+		    f->state == IAVF_FDIR_FLTR_ACTIVE) {
+			/* All filters and requests have been removed in PF,
+			 * restore them
+			 */
+			f->state = IAVF_FDIR_FLTR_ADD_REQUEST;
+			add_filters = true;
+		} else if (f->state == IAVF_FDIR_FLTR_DIS_REQUEST ||
+			   f->state == IAVF_FDIR_FLTR_DIS_PENDING) {
+			/* Link down state, leave filters as inactive */
+			f->state = IAVF_FDIR_FLTR_INACTIVE;
+		} else if (f->state == IAVF_FDIR_FLTR_DEL_REQUEST ||
+			   f->state == IAVF_FDIR_FLTR_DEL_PENDING) {
+			/* Delete filters that were pending to be deleted, the
+			 * list on PF is already cleared after a reset
+			 */
+			list_del(&f->list);
+			kfree(f);
+			adapter->fdir_active_fltr--;
+		}
+	}
+	spin_unlock_bh(&adapter->fdir_fltr_lock);
+
+	if (add_filters)
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER;
+}
+
+/**
+ * iavf_virtchnl_completion
+ * @adapter: adapter structure
+ * @v_opcode: opcode sent by PF
+ * @v_retval: retval sent by PF
+ * @msg: message sent by PF
+ * @msglen: message length
+ *
+ * Asynchronous completion function for admin queue messages. Rather than busy
+ * wait, we fire off our requests and assume that no errors will be returned.
+ * This function handles the reply messages.
+ **/
+void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+			      enum virtchnl_ops v_opcode,
+			      enum iavf_status v_retval, u8 *msg, u16 msglen)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	if (v_opcode == VIRTCHNL_OP_EVENT) {
+		struct virtchnl_pf_event *vpe =
+			(struct virtchnl_pf_event *)msg;
+		bool link_up = iavf_get_vpe_link_status(adapter, vpe);
+
+		switch (vpe->event) {
+		case VIRTCHNL_EVENT_LINK_CHANGE:
+			iavf_set_adapter_link_speed_from_vpe(adapter, vpe);
+
+			/* we've already got the right link status, bail */
+			if (adapter->link_up == link_up)
+				break;
+
+			if (link_up) {
+				/* If we get link up message and start queues
+				 * before our queues are configured it will
+				 * trigger a TX hang. In that case, just ignore
+				 * the link status message,we'll get another one
+				 * after we enable queues and actually prepared
+				 * to send traffic.
+				 */
+				if (adapter->state != __IAVF_RUNNING)
+					break;
+
+				/* For ADq enabled VF, we reconfigure VSIs and
+				 * re-allocate queues. Hence wait till all
+				 * queues are enabled.
+				 */
+				if (adapter->flags &
+				    IAVF_FLAG_QUEUES_DISABLED)
+					break;
+			}
+
+			adapter->link_up = link_up;
+			if (link_up) {
+				netif_tx_start_all_queues(netdev);
+				netif_carrier_on(netdev);
+			} else {
+				netif_tx_stop_all_queues(netdev);
+				netif_carrier_off(netdev);
+			}
+			iavf_print_link_message(adapter);
+			break;
+		case VIRTCHNL_EVENT_RESET_IMPENDING:
+			dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n");
+			if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
+				dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
+				iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+			}
+			break;
+		default:
+			dev_err(&adapter->pdev->dev, "Unknown event %d from PF\n",
+				vpe->event);
+			break;
+		}
+		return;
+	}
+	if (v_retval) {
+		switch (v_opcode) {
+		case VIRTCHNL_OP_ADD_VLAN:
+			dev_err(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+				iavf_stat_str(&adapter->hw, v_retval));
+			break;
+		case VIRTCHNL_OP_ADD_ETH_ADDR:
+			dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n",
+				iavf_stat_str(&adapter->hw, v_retval));
+			iavf_mac_add_reject(adapter);
+			/* restore administratively set MAC address */
+			ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+			wake_up(&adapter->vc_waitqueue);
+			break;
+		case VIRTCHNL_OP_DEL_VLAN:
+			dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n",
+				iavf_stat_str(&adapter->hw, v_retval));
+			break;
+		case VIRTCHNL_OP_DEL_ETH_ADDR:
+			dev_err(&adapter->pdev->dev, "Failed to delete MAC filter, error %s\n",
+				iavf_stat_str(&adapter->hw, v_retval));
+			break;
+		case VIRTCHNL_OP_ENABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to configure queue channels, error %s\n",
+				iavf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __IAVF_TC_INVALID;
+			netdev_reset_tc(netdev);
+			netif_tx_start_all_queues(netdev);
+			break;
+		case VIRTCHNL_OP_DISABLE_CHANNELS:
+			dev_err(&adapter->pdev->dev, "Failed to disable queue channels, error %s\n",
+				iavf_stat_str(&adapter->hw, v_retval));
+			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+			adapter->ch_config.state = __IAVF_TC_RUNNING;
+			netif_tx_start_all_queues(netdev);
+			break;
+		case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+			struct iavf_cloud_filter *cf, *cftmp;
+
+			list_for_each_entry_safe(cf, cftmp,
+						 &adapter->cloud_filter_list,
+						 list) {
+				if (cf->state == __IAVF_CF_ADD_PENDING) {
+					cf->state = __IAVF_CF_INVALID;
+					dev_info(&adapter->pdev->dev, "Failed to add cloud filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_cloud_filter(adapter,
+								&cf->f);
+					list_del(&cf->list);
+					kfree(cf);
+					adapter->num_cloud_filters--;
+				}
+			}
+			}
+			break;
+		case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+			struct iavf_cloud_filter *cf;
+
+			list_for_each_entry(cf, &adapter->cloud_filter_list,
+					    list) {
+				if (cf->state == __IAVF_CF_DEL_PENDING) {
+					cf->state = __IAVF_CF_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to del cloud filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_cloud_filter(adapter,
+								&cf->f);
+				}
+			}
+			}
+			break;
+		case VIRTCHNL_OP_ADD_FDIR_FILTER: {
+			struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+			spin_lock_bh(&adapter->fdir_fltr_lock);
+			list_for_each_entry_safe(fdir, fdir_tmp,
+						 &adapter->fdir_list_head,
+						 list) {
+				if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
+					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_fdir_fltr(adapter, fdir);
+					if (msglen)
+						dev_err(&adapter->pdev->dev,
+							"%s\n", msg);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				}
+			}
+			spin_unlock_bh(&adapter->fdir_fltr_lock);
+			}
+			break;
+		case VIRTCHNL_OP_DEL_FDIR_FILTER: {
+			struct iavf_fdir_fltr *fdir;
+
+			spin_lock_bh(&adapter->fdir_fltr_lock);
+			list_for_each_entry(fdir, &adapter->fdir_list_head,
+					    list) {
+				if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING ||
+				    fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to del Flow Director filter, error %s\n",
+						 iavf_stat_str(&adapter->hw,
+							       v_retval));
+					iavf_print_fdir_fltr(adapter, fdir);
+				}
+			}
+			spin_unlock_bh(&adapter->fdir_fltr_lock);
+			}
+			break;
+		case VIRTCHNL_OP_ADD_RSS_CFG: {
+			struct iavf_adv_rss *rss, *rss_tmp;
+
+			spin_lock_bh(&adapter->adv_rss_lock);
+			list_for_each_entry_safe(rss, rss_tmp,
+						 &adapter->adv_rss_list_head,
+						 list) {
+				if (rss->state == IAVF_ADV_RSS_ADD_PENDING) {
+					iavf_print_adv_rss_cfg(adapter, rss,
+							       "Failed to change the input set for",
+							       NULL);
+					list_del(&rss->list);
+					kfree(rss);
+				}
+			}
+			spin_unlock_bh(&adapter->adv_rss_lock);
+			}
+			break;
+		case VIRTCHNL_OP_DEL_RSS_CFG: {
+			struct iavf_adv_rss *rss;
+
+			spin_lock_bh(&adapter->adv_rss_lock);
+			list_for_each_entry(rss, &adapter->adv_rss_list_head,
+					    list) {
+				if (rss->state == IAVF_ADV_RSS_DEL_PENDING) {
+					rss->state = IAVF_ADV_RSS_ACTIVE;
+					dev_err(&adapter->pdev->dev, "Failed to delete RSS configuration, error %s\n",
+						iavf_stat_str(&adapter->hw,
+							      v_retval));
+				}
+			}
+			spin_unlock_bh(&adapter->adv_rss_lock);
+			}
+			break;
+		case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+			dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+			/* Vlan stripping could not be enabled by ethtool.
+			 * Disable it in netdev->features.
+			 */
+			iavf_netdev_features_vlan_strip_set(netdev, false);
+			break;
+		case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+			dev_warn(&adapter->pdev->dev, "Changing VLAN Stripping is not allowed when Port VLAN is configured\n");
+			/* Vlan stripping could not be disabled by ethtool.
+			 * Enable it in netdev->features.
+			 */
+			iavf_netdev_features_vlan_strip_set(netdev, true);
+			break;
+		case VIRTCHNL_OP_ADD_VLAN_V2:
+			iavf_vlan_add_reject(adapter);
+			dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+				 iavf_stat_str(&adapter->hw, v_retval));
+			break;
+		default:
+			dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
+				v_retval, iavf_stat_str(&adapter->hw, v_retval),
+				v_opcode);
+		}
+	}
+	switch (v_opcode) {
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+		if (!v_retval)
+			iavf_mac_add_ok(adapter);
+		if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
+			if (!ether_addr_equal(netdev->dev_addr,
+					      adapter->hw.mac.addr)) {
+				netif_addr_lock_bh(netdev);
+				eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+				netif_addr_unlock_bh(netdev);
+			}
+		wake_up(&adapter->vc_waitqueue);
+		break;
+	case VIRTCHNL_OP_GET_STATS: {
+		struct iavf_eth_stats *stats =
+			(struct iavf_eth_stats *)msg;
+		netdev->stats.rx_packets = stats->rx_unicast +
+					   stats->rx_multicast +
+					   stats->rx_broadcast;
+		netdev->stats.tx_packets = stats->tx_unicast +
+					   stats->tx_multicast +
+					   stats->tx_broadcast;
+		netdev->stats.rx_bytes = stats->rx_bytes;
+		netdev->stats.tx_bytes = stats->tx_bytes;
+		netdev->stats.tx_errors = stats->tx_errors;
+		netdev->stats.rx_dropped = stats->rx_discards;
+		netdev->stats.tx_dropped = stats->tx_discards;
+		adapter->current_stats = *stats;
+		}
+		break;
+	case VIRTCHNL_OP_GET_VF_RESOURCES: {
+		u16 len = IAVF_VIRTCHNL_VF_RESOURCE_SIZE;
+
+		memcpy(adapter->vf_res, msg, min(msglen, len));
+		iavf_validate_num_queues(adapter);
+		iavf_vf_parse_hw_config(&adapter->hw, adapter->vf_res);
+		if (is_zero_ether_addr(adapter->hw.mac.addr)) {
+			/* restore current mac address */
+			ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
+		} else {
+			netif_addr_lock_bh(netdev);
+			/* refresh current mac address if changed */
+			ether_addr_copy(netdev->perm_addr,
+					adapter->hw.mac.addr);
+			netif_addr_unlock_bh(netdev);
+		}
+		spin_lock_bh(&adapter->mac_vlan_list_lock);
+		iavf_add_filter(adapter, adapter->hw.mac.addr);
+
+		if (VLAN_ALLOWED(adapter)) {
+			if (!list_empty(&adapter->vlan_filter_list)) {
+				struct iavf_vlan_filter *vlf;
+
+				/* re-add all VLAN filters over virtchnl */
+				list_for_each_entry(vlf,
+						    &adapter->vlan_filter_list,
+						    list)
+					vlf->state = IAVF_VLAN_ADD;
+
+				adapter->aq_required |=
+					IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+			}
+		}
+
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+		iavf_activate_fdir_filters(adapter);
+
+		iavf_parse_vf_resource_msg(adapter);
+
+		/* negotiated VIRTCHNL_VF_OFFLOAD_VLAN_V2, so wait for the
+		 * response to VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS to finish
+		 * configuration
+		 */
+		if (VLAN_V2_ALLOWED(adapter))
+			break;
+		/* fallthrough and finish config if VIRTCHNL_VF_OFFLOAD_VLAN_V2
+		 * wasn't successfully negotiated with the PF
+		 */
+		}
+		fallthrough;
+	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS: {
+		struct iavf_mac_filter *f;
+		bool was_mac_changed;
+		u64 aq_required = 0;
+
+		if (v_opcode == VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS)
+			memcpy(&adapter->vlan_v2_caps, msg,
+			       min_t(u16, msglen,
+				     sizeof(adapter->vlan_v2_caps)));
+
+		iavf_process_config(adapter);
+		adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
+		iavf_schedule_finish_config(adapter);
+
+		iavf_set_queue_vlan_tag_loc(adapter);
+
+		was_mac_changed = !ether_addr_equal(netdev->dev_addr,
+						    adapter->hw.mac.addr);
+
+		spin_lock_bh(&adapter->mac_vlan_list_lock);
+
+		/* re-add all MAC filters */
+		list_for_each_entry(f, &adapter->mac_filter_list, list) {
+			if (was_mac_changed &&
+			    ether_addr_equal(netdev->dev_addr, f->macaddr))
+				ether_addr_copy(f->macaddr,
+						adapter->hw.mac.addr);
+
+			f->is_new_mac = true;
+			f->add = true;
+			f->add_handled = false;
+			f->remove = false;
+		}
+
+		/* re-add all VLAN filters */
+		if (VLAN_FILTERING_ALLOWED(adapter)) {
+			struct iavf_vlan_filter *vlf;
+
+			if (!list_empty(&adapter->vlan_filter_list)) {
+				list_for_each_entry(vlf,
+						    &adapter->vlan_filter_list,
+						    list)
+					vlf->state = IAVF_VLAN_ADD;
+
+				aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+			}
+		}
+
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+
+		netif_addr_lock_bh(netdev);
+		eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+		netif_addr_unlock_bh(netdev);
+
+		adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER |
+			aq_required;
+		}
+		break;
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+		/* enable transmits */
+		iavf_irq_enable(adapter, true);
+		wake_up(&adapter->reset_waitqueue);
+		adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED;
+		break;
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		iavf_free_all_tx_resources(adapter);
+		iavf_free_all_rx_resources(adapter);
+		if (adapter->state == __IAVF_DOWN_PENDING) {
+			iavf_change_state(adapter, __IAVF_DOWN);
+			wake_up(&adapter->down_waitqueue);
+		}
+		break;
+	case VIRTCHNL_OP_VERSION:
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		/* Don't display an error if we get these out of sequence.
+		 * If the firmware needed to get kicked, we'll get these and
+		 * it's no problem.
+		 */
+		if (v_opcode != adapter->current_op)
+			return;
+		break;
+	case VIRTCHNL_OP_RDMA:
+		/* Gobble zero-length replies from the PF. They indicate that
+		 * a previous message was received OK, and the client doesn't
+		 * care about that.
+		 */
+		if (msglen && CLIENT_ENABLED(adapter))
+			iavf_notify_client_message(&adapter->vsi, msg, msglen);
+		break;
+
+	case VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP:
+		adapter->client_pending &=
+				~(BIT(VIRTCHNL_OP_CONFIG_RDMA_IRQ_MAP));
+		break;
+	case VIRTCHNL_OP_GET_RSS_HENA_CAPS: {
+		struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg;
+
+		if (msglen == sizeof(*vrh))
+			adapter->hena = vrh->hena;
+		else
+			dev_warn(&adapter->pdev->dev,
+				 "Invalid message %d from PF\n", v_opcode);
+		}
+		break;
+	case VIRTCHNL_OP_REQUEST_QUEUES: {
+		struct virtchnl_vf_res_request *vfres =
+			(struct virtchnl_vf_res_request *)msg;
+
+		if (vfres->num_queue_pairs != adapter->num_req_queues) {
+			dev_info(&adapter->pdev->dev,
+				 "Requested %d queues, PF can support %d\n",
+				 adapter->num_req_queues,
+				 vfres->num_queue_pairs);
+			adapter->num_req_queues = 0;
+			adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+		}
+		}
+		break;
+	case VIRTCHNL_OP_ADD_CLOUD_FILTER: {
+		struct iavf_cloud_filter *cf;
+
+		list_for_each_entry(cf, &adapter->cloud_filter_list, list) {
+			if (cf->state == __IAVF_CF_ADD_PENDING)
+				cf->state = __IAVF_CF_ACTIVE;
+		}
+		}
+		break;
+	case VIRTCHNL_OP_DEL_CLOUD_FILTER: {
+		struct iavf_cloud_filter *cf, *cftmp;
+
+		list_for_each_entry_safe(cf, cftmp, &adapter->cloud_filter_list,
+					 list) {
+			if (cf->state == __IAVF_CF_DEL_PENDING) {
+				cf->state = __IAVF_CF_INVALID;
+				list_del(&cf->list);
+				kfree(cf);
+				adapter->num_cloud_filters--;
+			}
+		}
+		}
+		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER: {
+		struct virtchnl_fdir_add *add_fltr = (struct virtchnl_fdir_add *)msg;
+		struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+		spin_lock_bh(&adapter->fdir_fltr_lock);
+		list_for_each_entry_safe(fdir, fdir_tmp,
+					 &adapter->fdir_list_head,
+					 list) {
+			if (fdir->state == IAVF_FDIR_FLTR_ADD_PENDING) {
+				if (add_fltr->status == VIRTCHNL_FDIR_SUCCESS) {
+					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is added\n",
+						 fdir->loc);
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					fdir->flow_id = add_fltr->flow_id;
+				} else {
+					dev_info(&adapter->pdev->dev, "Failed to add Flow Director filter with status: %d\n",
+						 add_fltr->status);
+					iavf_print_fdir_fltr(adapter, fdir);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				}
+			}
+		}
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		}
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER: {
+		struct virtchnl_fdir_del *del_fltr = (struct virtchnl_fdir_del *)msg;
+		struct iavf_fdir_fltr *fdir, *fdir_tmp;
+
+		spin_lock_bh(&adapter->fdir_fltr_lock);
+		list_for_each_entry_safe(fdir, fdir_tmp, &adapter->fdir_list_head,
+					 list) {
+			if (fdir->state == IAVF_FDIR_FLTR_DEL_PENDING) {
+				if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS ||
+				    del_fltr->status ==
+				    VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) {
+					dev_info(&adapter->pdev->dev, "Flow Director filter with location %u is deleted\n",
+						 fdir->loc);
+					list_del(&fdir->list);
+					kfree(fdir);
+					adapter->fdir_active_fltr--;
+				} else {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to delete Flow Director filter with status: %d\n",
+						 del_fltr->status);
+					iavf_print_fdir_fltr(adapter, fdir);
+				}
+			} else if (fdir->state == IAVF_FDIR_FLTR_DIS_PENDING) {
+				if (del_fltr->status == VIRTCHNL_FDIR_SUCCESS ||
+				    del_fltr->status ==
+				    VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST) {
+					fdir->state = IAVF_FDIR_FLTR_INACTIVE;
+				} else {
+					fdir->state = IAVF_FDIR_FLTR_ACTIVE;
+					dev_info(&adapter->pdev->dev, "Failed to disable Flow Director filter with status: %d\n",
+						 del_fltr->status);
+					iavf_print_fdir_fltr(adapter, fdir);
+				}
+			}
+		}
+		spin_unlock_bh(&adapter->fdir_fltr_lock);
+		}
+		break;
+	case VIRTCHNL_OP_ADD_RSS_CFG: {
+		struct iavf_adv_rss *rss;
+
+		spin_lock_bh(&adapter->adv_rss_lock);
+		list_for_each_entry(rss, &adapter->adv_rss_list_head, list) {
+			if (rss->state == IAVF_ADV_RSS_ADD_PENDING) {
+				iavf_print_adv_rss_cfg(adapter, rss,
+						       "Input set change for",
+						       "successful");
+				rss->state = IAVF_ADV_RSS_ACTIVE;
+			}
+		}
+		spin_unlock_bh(&adapter->adv_rss_lock);
+		}
+		break;
+	case VIRTCHNL_OP_DEL_RSS_CFG: {
+		struct iavf_adv_rss *rss, *rss_tmp;
+
+		spin_lock_bh(&adapter->adv_rss_lock);
+		list_for_each_entry_safe(rss, rss_tmp,
+					 &adapter->adv_rss_list_head, list) {
+			if (rss->state == IAVF_ADV_RSS_DEL_PENDING) {
+				list_del(&rss->list);
+				kfree(rss);
+			}
+		}
+		spin_unlock_bh(&adapter->adv_rss_lock);
+		}
+		break;
+	case VIRTCHNL_OP_ADD_VLAN_V2: {
+		struct iavf_vlan_filter *f;
+
+		spin_lock_bh(&adapter->mac_vlan_list_lock);
+		list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+			if (f->state == IAVF_VLAN_IS_NEW)
+				f->state = IAVF_VLAN_ACTIVE;
+		}
+		spin_unlock_bh(&adapter->mac_vlan_list_lock);
+		}
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		/* PF enabled vlan strip on this VF.
+		 * Update netdev->features if needed to be in sync with ethtool.
+		 */
+		if (!v_retval)
+			iavf_netdev_features_vlan_strip_set(netdev, true);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		/* PF disabled vlan strip on this VF.
+		 * Update netdev->features if needed to be in sync with ethtool.
+		 */
+		if (!v_retval)
+			iavf_netdev_features_vlan_strip_set(netdev, false);
+		break;
+	default:
+		if (adapter->current_op && (v_opcode != adapter->current_op))
+			dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
+				 adapter->current_op, v_opcode);
+		break;
+	} /* switch v_opcode */
+	adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+}
diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
new file mode 100644
index 0000000000..960277d78e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018, Intel Corporation.
+
+#
+# Makefile for the Intel(R) Ethernet Connection E800 Series Linux Driver
+#
+
+obj-$(CONFIG_ICE) += ice.o
+
+ice-y := ice_main.o	\
+	 ice_controlq.o	\
+	 ice_common.o	\
+	 ice_nvm.o	\
+	 ice_switch.o	\
+	 ice_sched.o	\
+	 ice_base.o	\
+	 ice_lib.o	\
+	 ice_txrx_lib.o	\
+	 ice_txrx.o	\
+	 ice_fltr.o	\
+	 ice_irq.o	\
+	 ice_pf_vsi_vlan_ops.o \
+	 ice_vsi_vlan_ops.o \
+	 ice_vsi_vlan_lib.o \
+	 ice_fdir.o	\
+	 ice_ethtool_fdir.o \
+	 ice_vlan_mode.o \
+	 ice_flex_pipe.o \
+	 ice_flow.o	\
+	 ice_idc.o	\
+	 ice_devlink.o	\
+	 ice_ddp.o	\
+	 ice_fw_update.o \
+	 ice_lag.o	\
+	 ice_ethtool.o  \
+	 ice_repr.o	\
+	 ice_tc_lib.o
+ice-$(CONFIG_PCI_IOV) +=	\
+	ice_sriov.o		\
+	ice_virtchnl.o		\
+	ice_virtchnl_allowlist.o \
+	ice_virtchnl_fdir.o	\
+	ice_vf_mbx.o		\
+	ice_vf_vsi_vlan_ops.o	\
+	ice_vf_lib.o
+ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o
+ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
+ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
+ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
+ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o ice_eswitch_br.o
+ice-$(CONFIG_GNSS) += ice_gnss.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
new file mode 100644
index 0000000000..5022b036ca
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -0,0 +1,971 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_H_
+#define _ICE_H_
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/firmware.h>
+#include <linux/netdevice.h>
+#include <linux/compiler.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/cpumask.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ethtool.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/bitmap.h>
+#include <linux/log2.h>
+#include <linux/ip.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_sched.h>
+#include <linux/if_bridge.h>
+#include <linux/ctype.h>
+#include <linux/linkmode.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/auxiliary_bus.h>
+#include <linux/avf/virtchnl.h>
+#include <linux/cpu_rmap.h>
+#include <linux/dim.h>
+#include <linux/gnss.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/ip.h>
+#include <net/devlink.h>
+#include <net/ipv6.h>
+#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
+#include <net/geneve.h>
+#include <net/gre.h>
+#include <net/udp_tunnel.h>
+#include <net/vxlan.h>
+#include <net/gtp.h>
+#include <linux/ppp_defs.h>
+#include "ice_devids.h"
+#include "ice_type.h"
+#include "ice_txrx.h"
+#include "ice_dcb.h"
+#include "ice_switch.h"
+#include "ice_common.h"
+#include "ice_flow.h"
+#include "ice_sched.h"
+#include "ice_idc_int.h"
+#include "ice_sriov.h"
+#include "ice_vf_mbx.h"
+#include "ice_ptp.h"
+#include "ice_fdir.h"
+#include "ice_xsk.h"
+#include "ice_arfs.h"
+#include "ice_repr.h"
+#include "ice_eswitch.h"
+#include "ice_lag.h"
+#include "ice_vsi_vlan_ops.h"
+#include "ice_gnss.h"
+#include "ice_irq.h"
+
+#define ICE_BAR0		0
+#define ICE_REQ_DESC_MULTIPLE	32
+#define ICE_MIN_NUM_DESC	64
+#define ICE_MAX_NUM_DESC	8160
+#define ICE_DFLT_MIN_RX_DESC	512
+#define ICE_DFLT_NUM_TX_DESC	256
+#define ICE_DFLT_NUM_RX_DESC	2048
+
+#define ICE_DFLT_TRAFFIC_CLASS	BIT(0)
+#define ICE_INT_NAME_STR_LEN	(IFNAMSIZ + 16)
+#define ICE_AQ_LEN		192
+#define ICE_MBXSQ_LEN		64
+#define ICE_SBQ_LEN		64
+#define ICE_MIN_LAN_TXRX_MSIX	1
+#define ICE_MIN_LAN_OICR_MSIX	1
+#define ICE_MIN_MSIX		(ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
+#define ICE_FDIR_MSIX		2
+#define ICE_RDMA_NUM_AEQ_MSIX	4
+#define ICE_MIN_RDMA_MSIX	2
+#define ICE_ESWITCH_MSIX	1
+#define ICE_NO_VSI		0xffff
+#define ICE_VSI_MAP_CONTIG	0
+#define ICE_VSI_MAP_SCATTER	1
+#define ICE_MAX_SCATTER_TXQS	16
+#define ICE_MAX_SCATTER_RXQS	16
+#define ICE_Q_WAIT_RETRY_LIMIT	10
+#define ICE_Q_WAIT_MAX_RETRY	(5 * ICE_Q_WAIT_RETRY_LIMIT)
+#define ICE_MAX_LG_RSS_QS	256
+#define ICE_INVAL_Q_INDEX	0xffff
+
+#define ICE_MAX_RXQS_PER_TC		256	/* Used when setting VSI context per TC Rx queues */
+
+#define ICE_CHNL_START_TC		1
+
+#define ICE_MAX_RESET_WAIT		20
+
+#define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
+
+#define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+#define ICE_MAX_MTU	(ICE_AQ_SET_MAC_FRAME_SIZE_MAX - ICE_ETH_PKT_HDR_PAD)
+
+#define ICE_MAX_TSO_SIZE 131072
+
+#define ICE_UP_TABLE_TRANSLATE(val, i) \
+		(((val) << ICE_AQ_VSI_UP_TABLE_UP##i##_S) & \
+		  ICE_AQ_VSI_UP_TABLE_UP##i##_M)
+
+#define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
+#define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
+#define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
+#define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i]))
+
+/* Minimum BW limit is 500 Kbps for any scheduler node */
+#define ICE_MIN_BW_LIMIT		500
+/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes.
+ * use it to convert user specified BW limit into Kbps
+ */
+#define ICE_BW_KBPS_DIVISOR		125
+
+/* Default recipes have priority 4 and below, hence priority values between 5..7
+ * can be used as filter priority for advanced switch filter (advanced switch
+ * filters need new recipe to be created for specified extraction sequence
+ * because default recipe extraction sequence does not represent custom
+ * extraction)
+ */
+#define ICE_SWITCH_FLTR_PRIO_QUEUE	7
+/* prio 6 is reserved for future use (e.g. switch filter with L3 fields +
+ * (Optional: IP TOS/TTL) + L4 fields + (optionally: TCP fields such as
+ * SYN/FIN/RST))
+ */
+#define ICE_SWITCH_FLTR_PRIO_RSVD	6
+#define ICE_SWITCH_FLTR_PRIO_VSI	5
+#define ICE_SWITCH_FLTR_PRIO_QGRP	ICE_SWITCH_FLTR_PRIO_VSI
+
+/* Macro for each VSI in a PF */
+#define ice_for_each_vsi(pf, i) \
+	for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++)
+
+/* Macros for each Tx/Xdp/Rx ring in a VSI */
+#define ice_for_each_txq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_txq; (i)++)
+
+#define ice_for_each_xdp_txq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_xdp_txq; (i)++)
+
+#define ice_for_each_rxq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_rxq; (i)++)
+
+/* Macros for each allocated Tx/Rx ring whether used or not in a VSI */
+#define ice_for_each_alloc_txq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->alloc_txq; (i)++)
+
+#define ice_for_each_alloc_rxq(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->alloc_rxq; (i)++)
+
+#define ice_for_each_q_vector(vsi, i) \
+	for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++)
+
+#define ice_for_each_chnl_tc(i)	\
+	for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++)
+
+#define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_UCAST_RX)
+
+#define ICE_UCAST_VLAN_PROMISC_BITS (ICE_PROMISC_UCAST_TX | \
+				     ICE_PROMISC_UCAST_RX | \
+				     ICE_PROMISC_VLAN_TX  | \
+				     ICE_PROMISC_VLAN_RX)
+
+#define ICE_MCAST_PROMISC_BITS (ICE_PROMISC_MCAST_TX | ICE_PROMISC_MCAST_RX)
+
+#define ICE_MCAST_VLAN_PROMISC_BITS (ICE_PROMISC_MCAST_TX | \
+				     ICE_PROMISC_MCAST_RX | \
+				     ICE_PROMISC_VLAN_TX  | \
+				     ICE_PROMISC_VLAN_RX)
+
+#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
+
+enum ice_feature {
+	ICE_F_DSCP,
+	ICE_F_PTP_EXTTS,
+	ICE_F_SMA_CTRL,
+	ICE_F_GNSS,
+	ICE_F_ROCE_LAG,
+	ICE_F_SRIOV_LAG,
+	ICE_F_MAX
+};
+
+DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key);
+
+struct ice_channel {
+	struct list_head list;
+	u8 type;
+	u16 sw_id;
+	u16 base_q;
+	u16 num_rxq;
+	u16 num_txq;
+	u16 vsi_num;
+	u8 ena_tc;
+	struct ice_aqc_vsi_props info;
+	u64 max_tx_rate;
+	u64 min_tx_rate;
+	atomic_t num_sb_fltr;
+	struct ice_vsi *ch_vsi;
+};
+
+struct ice_txq_meta {
+	u32 q_teid;	/* Tx-scheduler element identifier */
+	u16 q_id;	/* Entry in VSI's txq_map bitmap */
+	u16 q_handle;	/* Relative index of Tx queue within TC */
+	u16 vsi_idx;	/* VSI index that Tx queue belongs to */
+	u8 tc;		/* TC number that Tx queue belongs to */
+};
+
+struct ice_tc_info {
+	u16 qoffset;
+	u16 qcount_tx;
+	u16 qcount_rx;
+	u8 netdev_tc;
+};
+
+struct ice_tc_cfg {
+	u8 numtc; /* Total number of enabled TCs */
+	u16 ena_tc; /* Tx map */
+	struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS];
+};
+
+struct ice_qs_cfg {
+	struct mutex *qs_mutex;  /* will be assigned to &pf->avail_q_mutex */
+	unsigned long *pf_map;
+	unsigned long pf_map_size;
+	unsigned int q_count;
+	unsigned int scatter_count;
+	u16 *vsi_map;
+	u16 vsi_map_offset;
+	u8 mapping_mode;
+};
+
+struct ice_sw {
+	struct ice_pf *pf;
+	u16 sw_id;		/* switch ID for this switch */
+	u16 bridge_mode;	/* VEB/VEPA/Port Virtualizer */
+};
+
+enum ice_pf_state {
+	ICE_TESTING,
+	ICE_DOWN,
+	ICE_NEEDS_RESTART,
+	ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
+	ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
+	ICE_PFR_REQ,		/* set by driver */
+	ICE_CORER_REQ,		/* set by driver */
+	ICE_GLOBR_REQ,		/* set by driver */
+	ICE_CORER_RECV,		/* set by OICR handler */
+	ICE_GLOBR_RECV,		/* set by OICR handler */
+	ICE_EMPR_RECV,		/* set by OICR handler */
+	ICE_SUSPENDED,		/* set on module remove path */
+	ICE_RESET_FAILED,		/* set by reset/rebuild */
+	/* When checking for the PF to be in a nominal operating state, the
+	 * bits that are grouped at the beginning of the list need to be
+	 * checked. Bits occurring before ICE_STATE_NOMINAL_CHECK_BITS will
+	 * be checked. If you need to add a bit into consideration for nominal
+	 * operating state, it must be added before
+	 * ICE_STATE_NOMINAL_CHECK_BITS. Do not move this entry's position
+	 * without appropriate consideration.
+	 */
+	ICE_STATE_NOMINAL_CHECK_BITS,
+	ICE_ADMINQ_EVENT_PENDING,
+	ICE_MAILBOXQ_EVENT_PENDING,
+	ICE_SIDEBANDQ_EVENT_PENDING,
+	ICE_MDD_EVENT_PENDING,
+	ICE_VFLR_EVENT_PENDING,
+	ICE_FLTR_OVERFLOW_PROMISC,
+	ICE_VF_DIS,
+	ICE_CFG_BUSY,
+	ICE_SERVICE_SCHED,
+	ICE_SERVICE_DIS,
+	ICE_FD_FLUSH_REQ,
+	ICE_OICR_INTR_DIS,		/* Global OICR interrupt disabled */
+	ICE_MDD_VF_PRINT_PENDING,	/* set when MDD event handle */
+	ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
+	ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+	ICE_PHY_INIT_COMPLETE,
+	ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
+	ICE_AUX_ERR_PENDING,
+	ICE_STATE_NBITS		/* must be last */
+};
+
+enum ice_vsi_state {
+	ICE_VSI_DOWN,
+	ICE_VSI_NEEDS_RESTART,
+	ICE_VSI_NETDEV_ALLOCD,
+	ICE_VSI_NETDEV_REGISTERED,
+	ICE_VSI_UMAC_FLTR_CHANGED,
+	ICE_VSI_MMAC_FLTR_CHANGED,
+	ICE_VSI_PROMISC_CHANGED,
+	ICE_VSI_STATE_NBITS		/* must be last */
+};
+
+struct ice_vsi_stats {
+	struct ice_ring_stats **tx_ring_stats;  /* Tx ring stats array */
+	struct ice_ring_stats **rx_ring_stats;  /* Rx ring stats array */
+};
+
+/* struct that defines a VSI, associated with a dev */
+struct ice_vsi {
+	struct net_device *netdev;
+	struct ice_sw *vsw;		 /* switch this VSI is on */
+	struct ice_pf *back;		 /* back pointer to PF */
+	struct ice_port_info *port_info; /* back pointer to port_info */
+	struct ice_rx_ring **rx_rings;	 /* Rx ring array */
+	struct ice_tx_ring **tx_rings;	 /* Tx ring array */
+	struct ice_q_vector **q_vectors; /* q_vector array */
+
+	irqreturn_t (*irq_handler)(int irq, void *data);
+
+	u64 tx_linearize;
+	DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS);
+	unsigned int current_netdev_flags;
+	u32 tx_restart;
+	u32 tx_busy;
+	u32 rx_buf_failed;
+	u32 rx_page_failed;
+	u16 num_q_vectors;
+	/* tell if only dynamic irq allocation is allowed */
+	bool irq_dyn_alloc;
+
+	enum ice_vsi_type type;
+	u16 vsi_num;			/* HW (absolute) index of this VSI */
+	u16 idx;			/* software index in pf->vsi[] */
+
+	struct ice_vf *vf;		/* VF associated with this VSI */
+
+	u16 num_gfltr;
+	u16 num_bfltr;
+
+	/* RSS config */
+	u16 rss_table_size;	/* HW RSS table size */
+	u16 rss_size;		/* Allocated RSS queues */
+	u8 *rss_hkey_user;	/* User configured hash keys */
+	u8 *rss_lut_user;	/* User configured lookup table entries */
+	u8 rss_lut_type;	/* used to configure Get/Set RSS LUT AQ call */
+
+	/* aRFS members only allocated for the PF VSI */
+#define ICE_MAX_ARFS_LIST	1024
+#define ICE_ARFS_LST_MASK	(ICE_MAX_ARFS_LIST - 1)
+	struct hlist_head *arfs_fltr_list;
+	struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+	spinlock_t arfs_lock;	/* protects aRFS hash table and filter state */
+	atomic_t *arfs_last_fltr_id;
+
+	u16 max_frame;
+	u16 rx_buf_len;
+
+	struct ice_aqc_vsi_props info;	 /* VSI properties */
+	struct ice_vsi_vlan_info vlan_info;	/* vlan config to be restored */
+
+	/* VSI stats */
+	struct rtnl_link_stats64 net_stats;
+	struct rtnl_link_stats64 net_stats_prev;
+	struct ice_eth_stats eth_stats;
+	struct ice_eth_stats eth_stats_prev;
+
+	struct list_head tmp_sync_list;		/* MAC filters to be synced */
+	struct list_head tmp_unsync_list;	/* MAC filters to be unsynced */
+
+	u8 irqs_ready:1;
+	u8 current_isup:1;		 /* Sync 'link up' logging */
+	u8 stat_offsets_loaded:1;
+	struct ice_vsi_vlan_ops inner_vlan_ops;
+	struct ice_vsi_vlan_ops outer_vlan_ops;
+	u16 num_vlan;
+
+	/* queue information */
+	u8 tx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u8 rx_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+	u16 *txq_map;			 /* index in pf->avail_txqs */
+	u16 *rxq_map;			 /* index in pf->avail_rxqs */
+	u16 alloc_txq;			 /* Allocated Tx queues */
+	u16 num_txq;			 /* Used Tx queues */
+	u16 alloc_rxq;			 /* Allocated Rx queues */
+	u16 num_rxq;			 /* Used Rx queues */
+	u16 req_txq;			 /* User requested Tx queues */
+	u16 req_rxq;			 /* User requested Rx queues */
+	u16 num_rx_desc;
+	u16 num_tx_desc;
+	u16 qset_handle[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_tc_cfg tc_cfg;
+	struct bpf_prog *xdp_prog;
+	struct ice_tx_ring **xdp_rings;	 /* XDP ring array */
+	unsigned long *af_xdp_zc_qps;	 /* tracks AF_XDP ZC enabled qps */
+	u16 num_xdp_txq;		 /* Used XDP queues */
+	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
+
+	struct net_device **target_netdevs;
+
+	struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
+
+	/* Channel Specific Fields */
+	struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC];
+	u16 cnt_q_avail;
+	u16 next_base_q;	/* next queue to be used for channel setup */
+	struct list_head ch_list;
+	u16 num_chnl_rxq;
+	u16 num_chnl_txq;
+	u16 ch_rss_size;
+	u16 num_chnl_fltr;
+	/* store away rss size info before configuring ADQ channels so that,
+	 * it can be used after tc-qdisc delete, to get back RSS setting as
+	 * they were before
+	 */
+	u16 orig_rss_size;
+	/* this keeps tracks of all enabled TC with and without DCB
+	 * and inclusive of ADQ, vsi->mqprio_opt keeps track of queue
+	 * information
+	 */
+	u8 all_numtc;
+	u16 all_enatc;
+
+	/* store away TC info, to be used for rebuild logic */
+	u8 old_numtc;
+	u16 old_ena_tc;
+
+	struct ice_channel *ch;
+
+	/* setup back reference, to which aggregator node this VSI
+	 * corresponds to
+	 */
+	struct ice_agg_node *agg_node;
+} ____cacheline_internodealigned_in_smp;
+
+/* struct that defines an interrupt vector */
+struct ice_q_vector {
+	struct ice_vsi *vsi;
+
+	u16 v_idx;			/* index in the vsi->q_vector array. */
+	u16 reg_idx;
+	u8 num_ring_rx;			/* total number of Rx rings in vector */
+	u8 num_ring_tx;			/* total number of Tx rings in vector */
+	u8 wb_on_itr:1;			/* if true, WB on ITR is enabled */
+	/* in usecs, need to use ice_intrl_to_usecs_reg() before writing this
+	 * value to the device
+	 */
+	u8 intrl;
+
+	struct napi_struct napi;
+
+	struct ice_ring_container rx;
+	struct ice_ring_container tx;
+
+	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
+
+	struct ice_channel *ch;
+
+	char name[ICE_INT_NAME_STR_LEN];
+
+	u16 total_events;	/* net_dim(): number of interrupts processed */
+	struct msi_map irq;
+} ____cacheline_internodealigned_in_smp;
+
+enum ice_pf_flags {
+	ICE_FLAG_FLTR_SYNC,
+	ICE_FLAG_RDMA_ENA,
+	ICE_FLAG_RSS_ENA,
+	ICE_FLAG_SRIOV_ENA,
+	ICE_FLAG_SRIOV_CAPABLE,
+	ICE_FLAG_DCB_CAPABLE,
+	ICE_FLAG_DCB_ENA,
+	ICE_FLAG_FD_ENA,
+	ICE_FLAG_PTP_SUPPORTED,		/* PTP is supported by NVM */
+	ICE_FLAG_PTP,			/* PTP is enabled by software */
+	ICE_FLAG_ADV_FEATURES,
+	ICE_FLAG_TC_MQPRIO,		/* support for Multi queue TC */
+	ICE_FLAG_CLS_FLOWER,
+	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
+	ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
+	ICE_FLAG_NO_MEDIA,
+	ICE_FLAG_FW_LLDP_AGENT,
+	ICE_FLAG_MOD_POWER_UNSUPPORTED,
+	ICE_FLAG_PHY_FW_LOAD_FAILED,
+	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
+	ICE_FLAG_LEGACY_RX,
+	ICE_FLAG_VF_TRUE_PROMISC_ENA,
+	ICE_FLAG_MDD_AUTO_RESET_VF,
+	ICE_FLAG_VF_VLAN_PRUNING,
+	ICE_FLAG_LINK_LENIENT_MODE_ENA,
+	ICE_FLAG_PLUG_AUX_DEV,
+	ICE_FLAG_UNPLUG_AUX_DEV,
+	ICE_FLAG_MTU_CHANGED,
+	ICE_FLAG_GNSS,			/* GNSS successfully initialized */
+	ICE_PF_FLAGS_NBITS		/* must be last */
+};
+
+enum ice_misc_thread_tasks {
+	ICE_MISC_THREAD_EXTTS_EVENT,
+	ICE_MISC_THREAD_TX_TSTAMP,
+	ICE_MISC_THREAD_NBITS		/* must be last */
+};
+
+struct ice_switchdev_info {
+	struct ice_vsi *control_vsi;
+	struct ice_vsi *uplink_vsi;
+	struct ice_esw_br_offloads *br_offloads;
+	bool is_running;
+};
+
+struct ice_agg_node {
+	u32 agg_id;
+#define ICE_MAX_VSIS_IN_AGG_NODE	64
+	u32 num_vsis;
+	u8 valid;
+};
+
+struct ice_pf {
+	struct pci_dev *pdev;
+
+	struct devlink_region *nvm_region;
+	struct devlink_region *sram_region;
+	struct devlink_region *devcaps_region;
+
+	/* devlink port data */
+	struct devlink_port devlink_port;
+
+	/* OS reserved IRQ details */
+	struct msix_entry *msix_entries;
+	struct ice_irq_tracker irq_tracker;
+	/* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+	 * number of MSIX vectors needed for all SR-IOV VFs from the number of
+	 * MSIX vectors allowed on this PF.
+	 */
+	u16 sriov_base_vector;
+
+	u16 ctrl_vsi_idx;		/* control VSI index in pf->vsi array */
+
+	struct ice_vsi **vsi;		/* VSIs created by the driver */
+	struct ice_vsi_stats **vsi_stats;
+	struct ice_sw *first_sw;	/* first switch created by firmware */
+	u16 eswitch_mode;		/* current mode of eswitch */
+	struct ice_vfs vfs;
+	DECLARE_BITMAP(features, ICE_F_MAX);
+	DECLARE_BITMAP(state, ICE_STATE_NBITS);
+	DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS);
+	DECLARE_BITMAP(misc_thread, ICE_MISC_THREAD_NBITS);
+	unsigned long *avail_txqs;	/* bitmap to track PF Tx queue usage */
+	unsigned long *avail_rxqs;	/* bitmap to track PF Rx queue usage */
+	unsigned long serv_tmr_period;
+	unsigned long serv_tmr_prev;
+	struct timer_list serv_tmr;
+	struct work_struct serv_task;
+	struct mutex avail_q_mutex;	/* protects access to avail_[rx|tx]qs */
+	struct mutex sw_mutex;		/* lock for protecting VSI alloc flow */
+	struct mutex tc_mutex;		/* lock to protect TC changes */
+	struct mutex adev_mutex;	/* lock to protect aux device access */
+	struct mutex lag_mutex;		/* protect ice_lag struct in PF */
+	u32 msg_enable;
+	struct ice_ptp ptp;
+	struct gnss_serial *gnss_serial;
+	struct gnss_device *gnss_dev;
+	u16 num_rdma_msix;		/* Total MSIX vectors for RDMA driver */
+	u16 rdma_base_vector;
+
+	/* spinlock to protect the AdminQ wait list */
+	spinlock_t aq_wait_lock;
+	struct hlist_head aq_wait_list;
+	wait_queue_head_t aq_wait_queue;
+	bool fw_emp_reset_disabled;
+
+	wait_queue_head_t reset_wait_queue;
+
+	u32 hw_csum_rx_error;
+	u32 oicr_err_reg;
+	struct msi_map oicr_irq;	/* Other interrupt cause MSIX vector */
+	u16 max_pf_txqs;	/* Total Tx queues PF wide */
+	u16 max_pf_rxqs;	/* Total Rx queues PF wide */
+	u16 num_lan_msix;	/* Total MSIX vectors for base driver */
+	u16 num_lan_tx;		/* num LAN Tx queues setup */
+	u16 num_lan_rx;		/* num LAN Rx queues setup */
+	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
+	u16 num_alloc_vsi;
+	u16 corer_count;	/* Core reset count */
+	u16 globr_count;	/* Global reset count */
+	u16 empr_count;		/* EMP reset count */
+	u16 pfr_count;		/* PF reset count */
+
+	u8 wol_ena : 1;		/* software state of WoL */
+	u32 wakeup_reason;	/* last wakeup reason */
+	struct ice_hw_port_stats stats;
+	struct ice_hw_port_stats stats_prev;
+	struct ice_hw hw;
+	u8 stat_prev_loaded:1; /* has previous stats been loaded */
+	u8 rdma_mode;
+	u16 dcbx_cap;
+	u32 tx_timeout_count;
+	unsigned long tx_timeout_last_recovery;
+	u32 tx_timeout_recovery_level;
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct auxiliary_device *adev;
+	int aux_idx;
+	u32 sw_int_count;
+	/* count of tc_flower filters specific to channel (aka where filter
+	 * action is "hw_tc <tc_num>")
+	 */
+	u16 num_dmac_chnl_fltrs;
+	struct hlist_head tc_flower_fltr_list;
+
+	u64 supported_rxdids;
+
+	__le64 nvm_phy_type_lo; /* NVM PHY type low */
+	__le64 nvm_phy_type_hi; /* NVM PHY type high */
+	struct ice_link_default_override_tlv link_dflt_override;
+	struct ice_lag *lag; /* Link Aggregation information */
+
+	struct ice_switchdev_info switchdev;
+	struct ice_esw_br_port *br_port;
+
+#define ICE_INVALID_AGG_NODE_ID		0
+#define ICE_PF_AGG_NODE_ID_START	1
+#define ICE_MAX_PF_AGG_NODES		32
+	struct ice_agg_node pf_agg_node[ICE_MAX_PF_AGG_NODES];
+#define ICE_VF_AGG_NODE_ID_START	65
+#define ICE_MAX_VF_AGG_NODES		32
+	struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
+};
+
+extern struct workqueue_struct *ice_lag_wq;
+
+struct ice_netdev_priv {
+	struct ice_vsi *vsi;
+	struct ice_repr *repr;
+	/* indirect block callbacks on registered higher level devices
+	 * (e.g. tunnel devices)
+	 *
+	 * tc_indr_block_cb_priv_list is used to look up indirect callback
+	 * private data
+	 */
+	struct list_head tc_indr_block_priv_list;
+};
+
+/**
+ * ice_vector_ch_enabled
+ * @qv: pointer to q_vector, can be NULL
+ *
+ * This function returns true if vector is channel enabled otherwise false
+ */
+static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv)
+{
+	return !!qv->ch; /* Enable it to run with TC */
+}
+
+/**
+ * ice_irq_dynamic_ena - Enable default interrupt generation settings
+ * @hw: pointer to HW struct
+ * @vsi: pointer to VSI struct, can be NULL
+ * @q_vector: pointer to q_vector, can be NULL
+ */
+static inline void
+ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
+		    struct ice_q_vector *q_vector)
+{
+	u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
+				((struct ice_pf *)hw->back)->oicr_irq.index;
+	int itr = ICE_ITR_NONE;
+	u32 val;
+
+	/* clear the PBA here, as this function is meant to clean out all
+	 * previous interrupts and enable the interrupt
+	 */
+	val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+	      (itr << GLINT_DYN_CTL_ITR_INDX_S);
+	if (vsi)
+		if (test_bit(ICE_VSI_DOWN, vsi->state))
+			return;
+	wr32(hw, GLINT_DYN_CTL(vector), val);
+}
+
+/**
+ * ice_netdev_to_pf - Retrieve the PF struct associated with a netdev
+ * @netdev: pointer to the netdev struct
+ */
+static inline struct ice_pf *ice_netdev_to_pf(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vsi->back;
+}
+
+static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi)
+{
+	return !!READ_ONCE(vsi->xdp_prog);
+}
+
+static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
+{
+	ring->flags |= ICE_TX_FLAGS_RING_XDP;
+}
+
+/**
+ * ice_xsk_pool - get XSK buffer pool bound to a ring
+ * @ring: Rx ring to use
+ *
+ * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
+ * present, NULL otherwise.
+ */
+static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	u16 qid = ring->q_index;
+
+	if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
+		return NULL;
+
+	return xsk_get_pool_from_qid(vsi->netdev, qid);
+}
+
+/**
+ * ice_tx_xsk_pool - assign XSK buff pool to XDP ring
+ * @vsi: pointer to VSI
+ * @qid: index of a queue to look at XSK buff pool presence
+ *
+ * Sets XSK buff pool pointer on XDP ring.
+ *
+ * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided
+ * queue id. Reason for doing so is that queue vectors might have assigned more
+ * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring
+ * carries a pointer to one of these XDP rings for its own purposes, such as
+ * handling XDP_TX action, therefore we can piggyback here on the
+ * rx_ring->xdp_ring assignment that was done during XDP rings initialization.
+ */
+static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
+{
+	struct ice_tx_ring *ring;
+
+	ring = vsi->rx_rings[qid]->xdp_ring;
+	if (!ring)
+		return;
+
+	if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
+		ring->xsk_pool = NULL;
+		return;
+	}
+
+	ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+}
+
+/**
+ * ice_get_main_vsi - Get the PF VSI
+ * @pf: PF instance
+ *
+ * returns pf->vsi[0], which by definition is the PF VSI
+ */
+static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
+{
+	if (pf->vsi)
+		return pf->vsi[0];
+
+	return NULL;
+}
+
+/**
+ * ice_get_netdev_priv_vsi - return VSI associated with netdev priv.
+ * @np: private netdev structure
+ */
+static inline struct ice_vsi *ice_get_netdev_priv_vsi(struct ice_netdev_priv *np)
+{
+	/* In case of port representor return source port VSI. */
+	if (np->repr)
+		return np->repr->src_vsi;
+	else
+		return np->vsi;
+}
+
+/**
+ * ice_get_ctrl_vsi - Get the control VSI
+ * @pf: PF instance
+ */
+static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
+{
+	/* if pf->ctrl_vsi_idx is ICE_NO_VSI, control VSI was not set up */
+	if (!pf->vsi || pf->ctrl_vsi_idx == ICE_NO_VSI)
+		return NULL;
+
+	return pf->vsi[pf->ctrl_vsi_idx];
+}
+
+/**
+ * ice_find_vsi - Find the VSI from VSI ID
+ * @pf: The PF pointer to search in
+ * @vsi_num: The VSI ID to search for
+ */
+static inline struct ice_vsi *ice_find_vsi(struct ice_pf *pf, u16 vsi_num)
+{
+	int i;
+
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && pf->vsi[i]->vsi_num == vsi_num)
+			return  pf->vsi[i];
+	return NULL;
+}
+
+/**
+ * ice_is_switchdev_running - check if switchdev is configured
+ * @pf: pointer to PF structure
+ *
+ * Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV
+ * and switchdev is configured, false otherwise.
+ */
+static inline bool ice_is_switchdev_running(struct ice_pf *pf)
+{
+	return pf->switchdev.is_running;
+}
+
+#define ICE_FD_STAT_CTR_BLOCK_COUNT	256
+#define ICE_FD_STAT_PF_IDX(base_idx) \
+			((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
+#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
+#define ICE_FD_STAT_CH			1
+#define ICE_FD_CH_STAT_IDX(base_idx) \
+			(ICE_FD_STAT_PF_IDX(base_idx) + ICE_FD_STAT_CH)
+
+/**
+ * ice_is_adq_active - any active ADQs
+ * @pf: pointer to PF
+ *
+ * This function returns true if there are any ADQs configured (which is
+ * determined by looking at VSI type (which should be VSI_PF), numtc, and
+ * TC_MQPRIO flag) otherwise return false
+ */
+static inline bool ice_is_adq_active(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return false;
+
+	/* is ADQ configured */
+	if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC &&
+	    test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+		return true;
+
+	return false;
+}
+
+bool netif_is_ice(const struct net_device *dev);
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
+int ice_vsi_open_ctrl(struct ice_vsi *vsi);
+int ice_vsi_open(struct ice_vsi *vsi);
+void ice_set_ethtool_ops(struct net_device *netdev);
+void ice_set_ethtool_repr_ops(struct net_device *netdev);
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
+u16 ice_get_avail_txq_count(struct ice_pf *pf);
+u16 ice_get_avail_rxq_count(struct ice_pf *pf);
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked);
+void ice_update_vsi_stats(struct ice_vsi *vsi);
+void ice_update_pf_stats(struct ice_pf *pf);
+void
+ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
+			     struct ice_q_stats stats, u64 *pkts, u64 *bytes);
+int ice_up(struct ice_vsi *vsi);
+int ice_down(struct ice_vsi *vsi);
+int ice_down_up(struct ice_vsi *vsi);
+int ice_vsi_cfg_lan(struct ice_vsi *vsi);
+struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
+int ice_vsi_determine_xdp_res(struct ice_vsi *vsi);
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog);
+int ice_destroy_xdp_rings(struct ice_vsi *vsi);
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags);
+int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
+int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
+int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed);
+int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+int ice_plug_aux_dev(struct ice_pf *pf);
+void ice_unplug_aux_dev(struct ice_pf *pf);
+int ice_init_rdma(struct ice_pf *pf);
+void ice_deinit_rdma(struct ice_pf *pf);
+const char *ice_aq_str(enum ice_aq_err aq_err);
+bool ice_is_wol_supported(struct ice_hw *hw);
+void ice_fdir_del_all_fltrs(struct ice_vsi *vsi);
+int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+		    bool is_tun);
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd);
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+		      u32 *rule_locs);
+void ice_fdir_rem_adq_chnl(struct ice_hw *hw, u16 vsi_idx);
+void ice_fdir_release_flows(struct ice_hw *hw);
+void ice_fdir_replay_flows(struct ice_hw *hw);
+void ice_fdir_replay_fltrs(struct ice_pf *pf);
+int ice_fdir_create_dflt_rules(struct ice_pf *pf);
+
+enum ice_aq_task_state {
+	ICE_AQ_TASK_NOT_PREPARED,
+	ICE_AQ_TASK_WAITING,
+	ICE_AQ_TASK_COMPLETE,
+	ICE_AQ_TASK_CANCELED,
+};
+
+struct ice_aq_task {
+	struct hlist_node entry;
+	struct ice_rq_event_info event;
+	enum ice_aq_task_state state;
+	u16 opcode;
+};
+
+void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+			   u16 opcode);
+int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+			  unsigned long timeout);
+int ice_open(struct net_device *netdev);
+int ice_open_internal(struct net_device *netdev);
+int ice_stop(struct net_device *netdev);
+void ice_service_task_schedule(struct ice_pf *pf);
+int ice_load(struct ice_pf *pf);
+void ice_unload(struct ice_pf *pf);
+
+/**
+ * ice_set_rdma_cap - enable RDMA support
+ * @pf: PF struct
+ */
+static inline void ice_set_rdma_cap(struct ice_pf *pf)
+{
+	if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) {
+		set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+		set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
+	}
+}
+
+/**
+ * ice_clear_rdma_cap - disable RDMA support
+ * @pf: PF struct
+ */
+static inline void ice_clear_rdma_cap(struct ice_pf *pf)
+{
+	/* defer unplug to service task to avoid RTNL lock and
+	 * clear PLUG bit so that pending plugs don't interfere
+	 */
+	clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
+	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
+	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+}
+#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
new file mode 100644
index 0000000000..29f7a9852a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -0,0 +1,2426 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_ADMINQ_CMD_H_
+#define _ICE_ADMINQ_CMD_H_
+
+/* This header file defines the Admin Queue commands, error codes and
+ * descriptor format. It is shared between Firmware and Software.
+ */
+
+#define ICE_MAX_VSI			768
+#define ICE_AQC_TOPO_MAX_LEVEL_NUM	0x9
+#define ICE_AQ_SET_MAC_FRAME_SIZE_MAX	9728
+
+struct ice_aqc_generic {
+	__le32 param0;
+	__le32 param1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get version (direct 0x0001) */
+struct ice_aqc_get_ver {
+	__le32 rom_ver;
+	__le32 fw_build;
+	u8 fw_branch;
+	u8 fw_major;
+	u8 fw_minor;
+	u8 fw_patch;
+	u8 api_branch;
+	u8 api_major;
+	u8 api_minor;
+	u8 api_patch;
+};
+
+/* Send driver version (indirect 0x0002) */
+struct ice_aqc_driver_ver {
+	u8 major_ver;
+	u8 minor_ver;
+	u8 build_ver;
+	u8 subbuild_ver;
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Queue Shutdown (direct 0x0003) */
+struct ice_aqc_q_shutdown {
+	u8 driver_unloading;
+#define ICE_AQC_DRIVER_UNLOADING	BIT(0)
+	u8 reserved[15];
+};
+
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+struct ice_aqc_req_res {
+	__le16 res_id;
+#define ICE_AQC_RES_ID_NVM		1
+#define ICE_AQC_RES_ID_SDP		2
+#define ICE_AQC_RES_ID_CHNG_LOCK	3
+#define ICE_AQC_RES_ID_GLBL_LOCK	4
+	__le16 access_type;
+#define ICE_AQC_RES_ACCESS_READ		1
+#define ICE_AQC_RES_ACCESS_WRITE	2
+
+	/* Upon successful completion, FW writes this value and driver is
+	 * expected to release resource before timeout. This value is provided
+	 * in milliseconds.
+	 */
+	__le32 timeout;
+#define ICE_AQ_RES_NVM_READ_DFLT_TIMEOUT_MS	3000
+#define ICE_AQ_RES_NVM_WRITE_DFLT_TIMEOUT_MS	180000
+#define ICE_AQ_RES_CHNG_LOCK_DFLT_TIMEOUT_MS	1000
+#define ICE_AQ_RES_GLBL_LOCK_DFLT_TIMEOUT_MS	3000
+	/* For SDP: pin ID of the SDP */
+	__le32 res_number;
+	/* Status is only used for ICE_AQC_RES_ID_GLBL_LOCK */
+	__le16 status;
+#define ICE_AQ_RES_GLBL_SUCCESS		0
+#define ICE_AQ_RES_GLBL_IN_PROG		1
+#define ICE_AQ_RES_GLBL_DONE		2
+	u8 reserved[2];
+};
+
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct ice_aqc_list_caps {
+	u8 cmd_flags;
+	u8 pf_index;
+	u8 reserved[2];
+	__le32 count;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Device/Function buffer entry, repeated per reported capability */
+struct ice_aqc_list_caps_elem {
+	__le16 cap;
+#define ICE_AQC_CAPS_VALID_FUNCTIONS			0x0005
+#define ICE_AQC_CAPS_SRIOV				0x0012
+#define ICE_AQC_CAPS_VF					0x0013
+#define ICE_AQC_CAPS_VSI				0x0017
+#define ICE_AQC_CAPS_DCB				0x0018
+#define ICE_AQC_CAPS_RSS				0x0040
+#define ICE_AQC_CAPS_RXQS				0x0041
+#define ICE_AQC_CAPS_TXQS				0x0042
+#define ICE_AQC_CAPS_MSIX				0x0043
+#define ICE_AQC_CAPS_FD					0x0045
+#define ICE_AQC_CAPS_1588				0x0046
+#define ICE_AQC_CAPS_MAX_MTU				0x0047
+#define ICE_AQC_CAPS_NVM_VER				0x0048
+#define ICE_AQC_CAPS_PENDING_NVM_VER			0x0049
+#define ICE_AQC_CAPS_OROM_VER				0x004A
+#define ICE_AQC_CAPS_PENDING_OROM_VER			0x004B
+#define ICE_AQC_CAPS_NET_VER				0x004C
+#define ICE_AQC_CAPS_PENDING_NET_VER			0x004D
+#define ICE_AQC_CAPS_RDMA				0x0051
+#define ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE		0x0076
+#define ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT		0x0077
+#define ICE_AQC_CAPS_NVM_MGMT				0x0080
+#define ICE_AQC_CAPS_FW_LAG_SUPPORT			0x0092
+#define ICE_AQC_BIT_ROCEV2_LAG				0x01
+#define ICE_AQC_BIT_SRIOV_LAG				0x02
+
+	u8 major_ver;
+	u8 minor_ver;
+	/* Number of resources described by this capability */
+	__le32 number;
+	/* Only meaningful for some types of resources */
+	__le32 logical_id;
+	/* Only meaningful for some types of resources */
+	__le32 phys_id;
+	__le64 rsvd1;
+	__le64 rsvd2;
+};
+
+/* Manage MAC address, read command - indirect (0x0107)
+ * This struct is also used for the response
+ */
+struct ice_aqc_manage_mac_read {
+	__le16 flags; /* Zeroed by device driver */
+#define ICE_AQC_MAN_MAC_LAN_ADDR_VALID		BIT(4)
+#define ICE_AQC_MAN_MAC_SAN_ADDR_VALID		BIT(5)
+#define ICE_AQC_MAN_MAC_PORT_ADDR_VALID		BIT(6)
+#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID		BIT(7)
+#define ICE_AQC_MAN_MAC_READ_S			4
+#define ICE_AQC_MAN_MAC_READ_M			(0xF << ICE_AQC_MAN_MAC_READ_S)
+	u8 rsvd[2];
+	u8 num_addr; /* Used in response */
+	u8 rsvd1[3];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response buffer format for manage MAC read command */
+struct ice_aqc_manage_mac_read_resp {
+	u8 lport_num;
+	u8 addr_type;
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_LAN		0
+#define ICE_AQC_MAN_MAC_ADDR_TYPE_WOL		1
+	u8 mac_addr[ETH_ALEN];
+};
+
+/* Manage MAC address, write command - direct (0x0108) */
+struct ice_aqc_manage_mac_write {
+	u8 rsvd;
+	u8 flags;
+#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
+#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
+#define ICE_AQC_MAN_MAC_WR_S		6
+#define ICE_AQC_MAN_MAC_WR_M		ICE_M(3, ICE_AQC_MAN_MAC_WR_S)
+#define ICE_AQC_MAN_MAC_UPDATE_LAA	0
+#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL	BIT(ICE_AQC_MAN_MAC_WR_S)
+	/* byte stream in network order */
+	u8 mac_addr[ETH_ALEN];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Clear PXE Command and response (direct 0x0110) */
+struct ice_aqc_clear_pxe {
+	u8 rx_cnt;
+#define ICE_AQC_CLEAR_PXE_RX_CNT		0x2
+	u8 reserved[15];
+};
+
+/* Get switch configuration (0x0200) */
+struct ice_aqc_get_sw_cfg {
+	/* Reserved for command and copy of request flags for response */
+	__le16 flags;
+	/* First desc in case of command and next_elem in case of response
+	 * In case of response, if it is not zero, means all the configuration
+	 * was not returned and new command shall be sent with this value in
+	 * the 'first desc' field
+	 */
+	__le16 element;
+	/* Reserved for command, only used for response */
+	__le16 num_elems;
+	__le16 rsvd;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Each entry in the response buffer is of the following type: */
+struct ice_aqc_get_sw_cfg_resp_elem {
+	/* VSI/Port Number */
+	__le16 vsi_port_num;
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M	\
+			(0x3FF << ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_S	14
+#define ICE_AQC_GET_SW_CONF_RESP_TYPE_M	(0x3 << ICE_AQC_GET_SW_CONF_RESP_TYPE_S)
+#define ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT	0
+#define ICE_AQC_GET_SW_CONF_RESP_VIRT_PORT	1
+#define ICE_AQC_GET_SW_CONF_RESP_VSI		2
+
+	/* SWID VSI/Port belongs to */
+	__le16 swid;
+
+	/* Bit 14..0 : PF/VF number VSI belongs to
+	 * Bit 15 : VF indication bit
+	 */
+	__le16 pf_vf_num;
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S	0
+#define ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M	\
+				(0x7FFF << ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_S)
+#define ICE_AQC_GET_SW_CONF_RESP_IS_VF		BIT(15)
+};
+
+/* Set Port parameters, (direct, 0x0203) */
+struct ice_aqc_set_port_params {
+	__le16 cmd_flags;
+#define ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA	BIT(2)
+	__le16 bad_frame_vsi;
+	__le16 swid;
+#define ICE_AQC_PORT_SWID_VALID			BIT(15)
+#define ICE_AQC_PORT_SWID_M			0xFF
+	u8 reserved[10];
+};
+
+/* These resource type defines are used for all switch resource
+ * commands where a resource type is required, such as:
+ * Get Resource Allocation command (indirect 0x0204)
+ * Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ * Get Allocated Resource Descriptors Command (indirect 0x020A)
+ * Share Resource command (indirect 0x020B)
+ */
+#define ICE_AQC_RES_TYPE_VSI_LIST_REP			0x03
+#define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE			0x04
+#define ICE_AQC_RES_TYPE_RECIPE				0x05
+#define ICE_AQC_RES_TYPE_SWID				0x07
+#define ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK		0x21
+#define ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES	0x22
+#define ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES		0x23
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID		0x58
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM		0x59
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID		0x60
+#define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM		0x61
+
+#define ICE_AQC_RES_TYPE_FLAG_SHARED			BIT(7)
+#define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM		BIT(12)
+#define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX		BIT(13)
+
+#define ICE_AQC_RES_TYPE_FLAG_DEDICATED			0x00
+
+#define ICE_AQC_RES_TYPE_S	0
+#define ICE_AQC_RES_TYPE_M	(0x07F << ICE_AQC_RES_TYPE_S)
+
+/* Allocate Resources command (indirect 0x0208)
+ * Free Resources command (indirect 0x0209)
+ * Share Resource command (indirect 0x020B)
+ */
+struct ice_aqc_alloc_free_res_cmd {
+	__le16 num_entries; /* Number of Resource entries */
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Resource descriptor */
+struct ice_aqc_res_elem {
+	union {
+		__le16 sw_resp;
+		__le16 flu_resp;
+	} e;
+};
+
+/* Buffer for Allocate/Free Resources commands */
+struct ice_aqc_alloc_free_res_elem {
+	__le16 res_type; /* Types defined above cmd 0x0204 */
+#define ICE_AQC_RES_TYPE_SHARED_S	7
+#define ICE_AQC_RES_TYPE_SHARED_M	(0x1 << ICE_AQC_RES_TYPE_SHARED_S)
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S	8
+#define ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_M	\
+				(0xF << ICE_AQC_RES_TYPE_VSI_PRUNE_LIST_S)
+	__le16 num_elems;
+	struct ice_aqc_res_elem elem[];
+};
+
+/* Request buffer for Set VLAN Mode AQ command (indirect 0x020C) */
+struct ice_aqc_set_vlan_mode {
+	u8 reserved;
+	u8 l2tag_prio_tagging;
+#define ICE_AQ_VLAN_PRIO_TAG_S			0
+#define ICE_AQ_VLAN_PRIO_TAG_M			(0x7 << ICE_AQ_VLAN_PRIO_TAG_S)
+#define ICE_AQ_VLAN_PRIO_TAG_NOT_SUPPORTED	0x0
+#define ICE_AQ_VLAN_PRIO_TAG_STAG		0x1
+#define ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG		0x2
+#define ICE_AQ_VLAN_PRIO_TAG_OUTER_VLAN		0x3
+#define ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG		0x4
+#define ICE_AQ_VLAN_PRIO_TAG_MAX		0x4
+#define ICE_AQ_VLAN_PRIO_TAG_ERROR		0x7
+	u8 l2tag_reserved[64];
+	u8 rdma_packet;
+#define ICE_AQ_VLAN_RDMA_TAG_S			0
+#define ICE_AQ_VLAN_RDMA_TAG_M			(0x3F << ICE_AQ_VLAN_RDMA_TAG_S)
+#define ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING	0x10
+#define ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING	0x1A
+	u8 rdma_reserved[2];
+	u8 mng_vlan_prot_id;
+#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER	0x10
+#define ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER	0x11
+	u8 prot_id_reserved[30];
+};
+
+/* Response buffer for Get VLAN Mode AQ command (indirect 0x020D) */
+struct ice_aqc_get_vlan_mode {
+	u8 vlan_mode;
+#define ICE_AQ_VLAN_MODE_DVM_ENA	BIT(0)
+	u8 l2tag_prio_tagging;
+	u8 reserved[98];
+};
+
+/* Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Get VSI (indirect 0x0212)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_get_update_free_vsi {
+	__le16 vsi_num;
+#define ICE_AQ_VSI_NUM_S	0
+#define ICE_AQ_VSI_NUM_M	(0x03FF << ICE_AQ_VSI_NUM_S)
+#define ICE_AQ_VSI_IS_VALID	BIT(15)
+	__le16 cmd_flags;
+#define ICE_AQ_VSI_KEEP_ALLOC	0x1
+	u8 vf_id;
+	u8 reserved;
+	__le16 vsi_flags;
+#define ICE_AQ_VSI_TYPE_S	0
+#define ICE_AQ_VSI_TYPE_M	(0x3 << ICE_AQ_VSI_TYPE_S)
+#define ICE_AQ_VSI_TYPE_VF	0x0
+#define ICE_AQ_VSI_TYPE_VMDQ2	0x1
+#define ICE_AQ_VSI_TYPE_PF	0x2
+#define ICE_AQ_VSI_TYPE_EMP_MNG	0x3
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Response descriptor for:
+ * Add VSI (indirect 0x0210)
+ * Update VSI (indirect 0x0211)
+ * Free VSI (indirect 0x0213)
+ */
+struct ice_aqc_add_update_free_vsi_resp {
+	__le16 vsi_num;
+	__le16 ext_status;
+	__le16 vsi_used;
+	__le16 vsi_free;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_vsi_props {
+	__le16 valid_sections;
+#define ICE_AQ_VSI_PROP_SW_VALID		BIT(0)
+#define ICE_AQ_VSI_PROP_SECURITY_VALID		BIT(1)
+#define ICE_AQ_VSI_PROP_VLAN_VALID		BIT(2)
+#define ICE_AQ_VSI_PROP_OUTER_TAG_VALID		BIT(3)
+#define ICE_AQ_VSI_PROP_INGRESS_UP_VALID	BIT(4)
+#define ICE_AQ_VSI_PROP_EGRESS_UP_VALID		BIT(5)
+#define ICE_AQ_VSI_PROP_RXQ_MAP_VALID		BIT(6)
+#define ICE_AQ_VSI_PROP_Q_OPT_VALID		BIT(7)
+#define ICE_AQ_VSI_PROP_OUTER_UP_VALID		BIT(8)
+#define ICE_AQ_VSI_PROP_FLOW_DIR_VALID		BIT(11)
+#define ICE_AQ_VSI_PROP_PASID_VALID		BIT(12)
+	/* switch section */
+	u8 sw_id;
+	u8 sw_flags;
+#define ICE_AQ_VSI_SW_FLAG_ALLOW_LB		BIT(5)
+#define ICE_AQ_VSI_SW_FLAG_LOCAL_LB		BIT(6)
+#define ICE_AQ_VSI_SW_FLAG_SRC_PRUNE		BIT(7)
+	u8 sw_flags2;
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S	0
+#define ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M	(0xF << ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_S)
+#define ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA	BIT(0)
+#define ICE_AQ_VSI_SW_FLAG_LAN_ENA		BIT(4)
+	u8 veb_stat_id;
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_S		0
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_M		(0x1F << ICE_AQ_VSI_SW_VEB_STAT_ID_S)
+#define ICE_AQ_VSI_SW_VEB_STAT_ID_VALID		BIT(5)
+	/* security section */
+	u8 sec_flags;
+#define ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD	BIT(0)
+#define ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF	BIT(2)
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S		4
+#define ICE_AQ_VSI_SEC_TX_PRUNE_ENA_M		(0xF << ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S)
+#define ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA	BIT(0)
+	u8 sec_reserved;
+	/* VLAN section */
+	__le16 port_based_inner_vlan; /* VLANS include priority bits */
+	u8 inner_vlan_reserved[2];
+	u8 inner_vlan_flags;
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_S		0
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_M		(0x3 << ICE_AQ_VSI_INNER_VLAN_TX_MODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED	0x1
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTTAGGED	0x2
+#define ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL	0x3
+#define ICE_AQ_VSI_INNER_VLAN_INSERT_PVID	BIT(2)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_S		3
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_M		(0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH	(0x0 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR_UP	(0x1 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_STR		(0x2 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING	(0x3 << ICE_AQ_VSI_INNER_VLAN_EMODE_S)
+	u8 inner_vlan_reserved2[3];
+	/* ingress egress up sections */
+	__le32 ingress_table; /* bitmap, 3 bits per up */
+#define ICE_AQ_VSI_UP_TABLE_UP0_S		0
+#define ICE_AQ_VSI_UP_TABLE_UP0_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP0_S)
+#define ICE_AQ_VSI_UP_TABLE_UP1_S		3
+#define ICE_AQ_VSI_UP_TABLE_UP1_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP1_S)
+#define ICE_AQ_VSI_UP_TABLE_UP2_S		6
+#define ICE_AQ_VSI_UP_TABLE_UP2_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP2_S)
+#define ICE_AQ_VSI_UP_TABLE_UP3_S		9
+#define ICE_AQ_VSI_UP_TABLE_UP3_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP3_S)
+#define ICE_AQ_VSI_UP_TABLE_UP4_S		12
+#define ICE_AQ_VSI_UP_TABLE_UP4_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP4_S)
+#define ICE_AQ_VSI_UP_TABLE_UP5_S		15
+#define ICE_AQ_VSI_UP_TABLE_UP5_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP5_S)
+#define ICE_AQ_VSI_UP_TABLE_UP6_S		18
+#define ICE_AQ_VSI_UP_TABLE_UP6_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP6_S)
+#define ICE_AQ_VSI_UP_TABLE_UP7_S		21
+#define ICE_AQ_VSI_UP_TABLE_UP7_M		(0x7 << ICE_AQ_VSI_UP_TABLE_UP7_S)
+	__le32 egress_table;   /* same defines as for ingress table */
+	/* outer tags section */
+	__le16 port_based_outer_vlan;
+	u8 outer_vlan_flags;
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_S		0
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_M		(0x3 << ICE_AQ_VSI_OUTER_VLAN_EMODE_S)
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH	0x0
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_UP	0x1
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW	0x2
+#define ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING	0x3
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_S		2
+#define ICE_AQ_VSI_OUTER_TAG_TYPE_M		(0x3 << ICE_AQ_VSI_OUTER_TAG_TYPE_S)
+#define ICE_AQ_VSI_OUTER_TAG_NONE		0x0
+#define ICE_AQ_VSI_OUTER_TAG_STAG		0x1
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_8100		0x2
+#define ICE_AQ_VSI_OUTER_TAG_VLAN_9100		0x3
+#define ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT		BIT(4)
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S			5
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M			(0x3 << ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S)
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED	0x1
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTTAGGED	0x2
+#define ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL		0x3
+#define ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC		BIT(7)
+	u8 outer_vlan_reserved;
+	/* queue mapping section */
+	__le16 mapping_flags;
+#define ICE_AQ_VSI_Q_MAP_CONTIG			0x0
+#define ICE_AQ_VSI_Q_MAP_NONCONTIG		BIT(0)
+	__le16 q_mapping[16];
+#define ICE_AQ_VSI_Q_S				0
+#define ICE_AQ_VSI_Q_M				(0x7FF << ICE_AQ_VSI_Q_S)
+	__le16 tc_mapping[8];
+#define ICE_AQ_VSI_TC_Q_OFFSET_S		0
+#define ICE_AQ_VSI_TC_Q_OFFSET_M		(0x7FF << ICE_AQ_VSI_TC_Q_OFFSET_S)
+#define ICE_AQ_VSI_TC_Q_NUM_S			11
+#define ICE_AQ_VSI_TC_Q_NUM_M			(0xF << ICE_AQ_VSI_TC_Q_NUM_S)
+	/* queueing option section */
+	u8 q_opt_rss;
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_S		0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_M		(0x3 << ICE_AQ_VSI_Q_OPT_RSS_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI		0x0
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_PF		0x2
+#define ICE_AQ_VSI_Q_OPT_RSS_LUT_GBL		0x3
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S		2
+#define ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M		(0xF << ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_S		6
+#define ICE_AQ_VSI_Q_OPT_RSS_HASH_M		(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_TPLZ		(0x0 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_SYM_TPLZ		(0x1 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_XOR		(0x2 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+#define ICE_AQ_VSI_Q_OPT_RSS_JHASH		(0x3 << ICE_AQ_VSI_Q_OPT_RSS_HASH_S)
+	u8 q_opt_tc;
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_S		0
+#define ICE_AQ_VSI_Q_OPT_TC_OVR_M		(0x1F << ICE_AQ_VSI_Q_OPT_TC_OVR_S)
+#define ICE_AQ_VSI_Q_OPT_PROF_TC_OVR		BIT(7)
+	u8 q_opt_flags;
+#define ICE_AQ_VSI_Q_OPT_PE_FLTR_EN		BIT(0)
+	u8 q_opt_reserved[3];
+	/* outer up section */
+	__le32 outer_up_table; /* same structure and defines as ingress tbl */
+	/* section 10 */
+	__le16 sect_10_reserved;
+	/* flow director section */
+	__le16 fd_options;
+#define ICE_AQ_VSI_FD_ENABLE			BIT(0)
+#define ICE_AQ_VSI_FD_TX_AUTO_ENABLE		BIT(1)
+#define ICE_AQ_VSI_FD_PROG_ENABLE		BIT(3)
+	__le16 max_fd_fltr_dedicated;
+	__le16 max_fd_fltr_shared;
+	__le16 fd_def_q;
+#define ICE_AQ_VSI_FD_DEF_Q_S			0
+#define ICE_AQ_VSI_FD_DEF_Q_M			(0x7FF << ICE_AQ_VSI_FD_DEF_Q_S)
+#define ICE_AQ_VSI_FD_DEF_GRP_S			12
+#define ICE_AQ_VSI_FD_DEF_GRP_M			(0x7 << ICE_AQ_VSI_FD_DEF_GRP_S)
+	__le16 fd_report_opt;
+#define ICE_AQ_VSI_FD_REPORT_Q_S		0
+#define ICE_AQ_VSI_FD_REPORT_Q_M		(0x7FF << ICE_AQ_VSI_FD_REPORT_Q_S)
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_S		12
+#define ICE_AQ_VSI_FD_DEF_PRIORITY_M		(0x7 << ICE_AQ_VSI_FD_DEF_PRIORITY_S)
+#define ICE_AQ_VSI_FD_DEF_DROP			BIT(15)
+	/* PASID section */
+	__le32 pasid_id;
+#define ICE_AQ_VSI_PASID_ID_S			0
+#define ICE_AQ_VSI_PASID_ID_M			(0xFFFFF << ICE_AQ_VSI_PASID_ID_S)
+#define ICE_AQ_VSI_PASID_ID_VALID		BIT(31)
+	u8 reserved[24];
+};
+
+#define ICE_MAX_NUM_RECIPES 64
+
+/* Add/Get Recipe (indirect 0x0290/0x0292) */
+struct ice_aqc_add_get_recipe {
+	__le16 num_sub_recipes;	/* Input in Add cmd, Output in Get cmd */
+	__le16 return_index;	/* Input, used for Get cmd only */
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_recipe_content {
+	u8 rid;
+#define ICE_AQ_RECIPE_ID_S		0
+#define ICE_AQ_RECIPE_ID_M		(0x3F << ICE_AQ_RECIPE_ID_S)
+#define ICE_AQ_RECIPE_ID_IS_ROOT	BIT(7)
+#define ICE_AQ_SW_ID_LKUP_IDX		0
+	u8 lkup_indx[5];
+#define ICE_AQ_RECIPE_LKUP_DATA_S	0
+#define ICE_AQ_RECIPE_LKUP_DATA_M	(0x3F << ICE_AQ_RECIPE_LKUP_DATA_S)
+#define ICE_AQ_RECIPE_LKUP_IGNORE	BIT(7)
+#define ICE_AQ_SW_ID_LKUP_MASK		0x00FF
+	__le16 mask[5];
+	u8 result_indx;
+#define ICE_AQ_RECIPE_RESULT_DATA_S	0
+#define ICE_AQ_RECIPE_RESULT_DATA_M	(0x3F << ICE_AQ_RECIPE_RESULT_DATA_S)
+#define ICE_AQ_RECIPE_RESULT_EN		BIT(7)
+	u8 rsvd0[3];
+	u8 act_ctrl_join_priority;
+	u8 act_ctrl_fwd_priority;
+#define ICE_AQ_RECIPE_FWD_PRIORITY_S	0
+#define ICE_AQ_RECIPE_FWD_PRIORITY_M	(0xF << ICE_AQ_RECIPE_FWD_PRIORITY_S)
+	u8 act_ctrl;
+#define ICE_AQ_RECIPE_ACT_NEED_PASS_L2	BIT(0)
+#define ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2	BIT(1)
+#define ICE_AQ_RECIPE_ACT_INV_ACT	BIT(2)
+#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_S	4
+#define ICE_AQ_RECIPE_ACT_PRUNE_INDX_M	(0x3 << ICE_AQ_RECIPE_ACT_PRUNE_INDX_S)
+	u8 rsvd1;
+	__le32 dflt_act;
+#define ICE_AQ_RECIPE_DFLT_ACT_S	0
+#define ICE_AQ_RECIPE_DFLT_ACT_M	(0x7FFFF << ICE_AQ_RECIPE_DFLT_ACT_S)
+#define ICE_AQ_RECIPE_DFLT_ACT_VALID	BIT(31)
+};
+
+struct ice_aqc_recipe_data_elem {
+	u8 recipe_indx;
+	u8 resp_bits;
+#define ICE_AQ_RECIPE_WAS_UPDATED	BIT(0)
+	u8 rsvd0[2];
+	u8 recipe_bitmap[8];
+	u8 rsvd1[4];
+	struct ice_aqc_recipe_content content;
+	u8 rsvd2[20];
+};
+
+/* Set/Get Recipes to Profile Association (direct 0x0291/0x0293) */
+struct ice_aqc_recipe_to_profile {
+	__le16 profile_id;
+	u8 rsvd[6];
+	DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES);
+};
+
+/* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3)
+ */
+struct ice_aqc_sw_rules {
+	/* ops: add switch rules, referring the number of rules.
+	 * ops: update switch rules, referring the number of filters
+	 * ops: remove switch rules, referring the entry index.
+	 * ops: get switch rules, referring to the number of filters.
+	 */
+	__le16 num_rules_fltr_entry_index;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Add switch rule response:
+ * Content of return buffer is same as the input buffer. The status field and
+ * LUT index are updated as part of the response
+ */
+struct ice_aqc_sw_rules_elem_hdr {
+	__le16 type; /* Switch rule type, one of T_... */
+#define ICE_AQC_SW_RULES_T_LKUP_RX		0x0
+#define ICE_AQC_SW_RULES_T_LKUP_TX		0x1
+#define ICE_AQC_SW_RULES_T_LG_ACT		0x2
+#define ICE_AQC_SW_RULES_T_VSI_LIST_SET		0x3
+#define ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR	0x4
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_SET	0x5
+#define ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR	0x6
+	__le16 status;
+} __packed __aligned(sizeof(__le16));
+
+/* Add/Update/Get/Remove lookup Rx/Tx command/response entry
+ * This structures describes the lookup rules and associated actions. "index"
+ * is returned as part of a response to a successful Add command, and can be
+ * used to identify the rule for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lkup_rx_tx {
+	struct ice_aqc_sw_rules_elem_hdr hdr;
+
+	__le16 recipe_id;
+#define ICE_SW_RECIPE_LOGICAL_PORT_FWD		10
+	/* Source port for LOOKUP_RX and source VSI in case of LOOKUP_TX */
+	__le16 src;
+	__le32 act;
+
+	/* Bit 0:1 - Action type */
+#define ICE_SINGLE_ACT_TYPE_S	0x00
+#define ICE_SINGLE_ACT_TYPE_M	(0x3 << ICE_SINGLE_ACT_TYPE_S)
+
+	/* Bit 2 - Loop back enable
+	 * Bit 3 - LAN enable
+	 */
+#define ICE_SINGLE_ACT_LB_ENABLE	BIT(2)
+#define ICE_SINGLE_ACT_LAN_ENABLE	BIT(3)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_SINGLE_ACT_VSI_FORWARDING	0x0
+
+#define ICE_SINGLE_ACT_VSI_ID_S		4
+#define ICE_SINGLE_ACT_VSI_ID_M		(0x3FF << ICE_SINGLE_ACT_VSI_ID_S)
+#define ICE_SINGLE_ACT_VSI_LIST_ID_S	4
+#define ICE_SINGLE_ACT_VSI_LIST_ID_M	(0x3FF << ICE_SINGLE_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_SINGLE_ACT_VSI_LIST		BIT(14)
+#define ICE_SINGLE_ACT_VALID_BIT	BIT(17)
+#define ICE_SINGLE_ACT_DROP		BIT(18)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_SINGLE_ACT_TO_Q		0x1
+#define ICE_SINGLE_ACT_Q_INDEX_S	4
+#define ICE_SINGLE_ACT_Q_INDEX_M	(0x7FF << ICE_SINGLE_ACT_Q_INDEX_S)
+#define ICE_SINGLE_ACT_Q_REGION_S	15
+#define ICE_SINGLE_ACT_Q_REGION_M	(0x7 << ICE_SINGLE_ACT_Q_REGION_S)
+#define ICE_SINGLE_ACT_Q_PRIORITY	BIT(18)
+
+	/* Action type = 2 - Prune */
+#define ICE_SINGLE_ACT_PRUNE		0x2
+#define ICE_SINGLE_ACT_EGRESS		BIT(15)
+#define ICE_SINGLE_ACT_INGRESS		BIT(16)
+#define ICE_SINGLE_ACT_PRUNET		BIT(17)
+	/* Bit 18 should be set to 0 for this action */
+
+	/* Action type = 2 - Pointer */
+#define ICE_SINGLE_ACT_PTR		0x2
+#define ICE_SINGLE_ACT_PTR_VAL_S	4
+#define ICE_SINGLE_ACT_PTR_VAL_M	(0x1FFF << ICE_SINGLE_ACT_PTR_VAL_S)
+	/* Bit 18 should be set to 1 */
+#define ICE_SINGLE_ACT_PTR_BIT		BIT(18)
+
+	/* Action type = 3 - Other actions. Last two bits
+	 * are other action identifier
+	 */
+#define ICE_SINGLE_ACT_OTHER_ACTS		0x3
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S	17
+#define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M	\
+				(0x3 << ICE_SINGLE_OTHER_ACT_IDENTIFIER_S)
+
+	/* Bit 17:18 - Defines other actions */
+	/* Other action = 0 - Mirror VSI */
+#define ICE_SINGLE_OTHER_ACT_MIRROR		0
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_S	4
+#define ICE_SINGLE_ACT_MIRROR_VSI_ID_M	\
+				(0x3FF << ICE_SINGLE_ACT_MIRROR_VSI_ID_S)
+
+	/* Other action = 3 - Set Stat count */
+#define ICE_SINGLE_OTHER_ACT_STAT_COUNT		3
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_S	4
+#define ICE_SINGLE_ACT_STAT_COUNT_INDEX_M	\
+				(0x7F << ICE_SINGLE_ACT_STAT_COUNT_INDEX_S)
+
+	__le16 index; /* The index of the rule in the lookup table */
+	/* Length and values of the header to be matched per recipe or
+	 * lookup-type
+	 */
+	__le16 hdr_len;
+	u8 hdr_data[];
+} __packed __aligned(sizeof(__le16));
+
+/* Add/Update/Remove large action command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the action for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_lg_act {
+	struct ice_aqc_sw_rules_elem_hdr hdr;
+
+	__le16 index; /* Index in large action table */
+	__le16 size;
+	/* Max number of large actions */
+#define ICE_MAX_LG_ACT	4
+	/* Bit 0:1 - Action type */
+#define ICE_LG_ACT_TYPE_S	0
+#define ICE_LG_ACT_TYPE_M	(0x7 << ICE_LG_ACT_TYPE_S)
+
+	/* Action type = 0 - Forward to VSI or VSI list */
+#define ICE_LG_ACT_VSI_FORWARDING	0
+#define ICE_LG_ACT_VSI_ID_S		3
+#define ICE_LG_ACT_VSI_ID_M		(0x3FF << ICE_LG_ACT_VSI_ID_S)
+#define ICE_LG_ACT_VSI_LIST_ID_S	3
+#define ICE_LG_ACT_VSI_LIST_ID_M	(0x3FF << ICE_LG_ACT_VSI_LIST_ID_S)
+	/* This bit needs to be set if action is forward to VSI list */
+#define ICE_LG_ACT_VSI_LIST		BIT(13)
+
+#define ICE_LG_ACT_VALID_BIT		BIT(16)
+
+	/* Action type = 1 - Forward to Queue of Queue group */
+#define ICE_LG_ACT_TO_Q			0x1
+#define ICE_LG_ACT_Q_INDEX_S		3
+#define ICE_LG_ACT_Q_INDEX_M		(0x7FF << ICE_LG_ACT_Q_INDEX_S)
+#define ICE_LG_ACT_Q_REGION_S		14
+#define ICE_LG_ACT_Q_REGION_M		(0x7 << ICE_LG_ACT_Q_REGION_S)
+#define ICE_LG_ACT_Q_PRIORITY_SET	BIT(17)
+
+	/* Action type = 2 - Prune */
+#define ICE_LG_ACT_PRUNE		0x2
+#define ICE_LG_ACT_EGRESS		BIT(14)
+#define ICE_LG_ACT_INGRESS		BIT(15)
+#define ICE_LG_ACT_PRUNET		BIT(16)
+
+	/* Action type = 3 - Mirror VSI */
+#define ICE_LG_OTHER_ACT_MIRROR		0x3
+#define ICE_LG_ACT_MIRROR_VSI_ID_S	3
+#define ICE_LG_ACT_MIRROR_VSI_ID_M	(0x3FF << ICE_LG_ACT_MIRROR_VSI_ID_S)
+
+	/* Action type = 5 - Generic Value */
+#define ICE_LG_ACT_GENERIC		0x5
+#define ICE_LG_ACT_GENERIC_VALUE_S	3
+#define ICE_LG_ACT_GENERIC_VALUE_M	(0xFFFF << ICE_LG_ACT_GENERIC_VALUE_S)
+#define ICE_LG_ACT_GENERIC_OFFSET_S	19
+#define ICE_LG_ACT_GENERIC_OFFSET_M	(0x7 << ICE_LG_ACT_GENERIC_OFFSET_S)
+#define ICE_LG_ACT_GENERIC_PRIORITY_S	22
+#define ICE_LG_ACT_GENERIC_PRIORITY_M	(0x7 << ICE_LG_ACT_GENERIC_PRIORITY_S)
+#define ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX	7
+
+	/* Action = 7 - Set Stat count */
+#define ICE_LG_ACT_STAT_COUNT		0x7
+#define ICE_LG_ACT_STAT_COUNT_S		3
+#define ICE_LG_ACT_STAT_COUNT_M		(0x7F << ICE_LG_ACT_STAT_COUNT_S)
+	__le32 act[]; /* array of size for actions */
+} __packed __aligned(sizeof(__le16));
+
+/* Add/Update/Remove VSI list command/response entry
+ * "index" is returned as part of a response to a successful Add command, and
+ * can be used to identify the VSI list for Update/Get/Remove commands.
+ */
+struct ice_sw_rule_vsi_list {
+	struct ice_aqc_sw_rules_elem_hdr hdr;
+
+	__le16 index; /* Index of VSI/Prune list */
+	__le16 number_vsi;
+	__le16 vsi[]; /* Array of number_vsi VSI numbers */
+} __packed __aligned(sizeof(__le16));
+
+/* Query PFC Mode (direct 0x0302)
+ * Set PFC Mode (direct 0x0303)
+ */
+struct ice_aqc_set_query_pfc_mode {
+	u8	pfc_mode;
+/* For Query Command response, reserved in all other cases */
+#define ICE_AQC_PFC_VLAN_BASED_PFC	1
+#define ICE_AQC_PFC_DSCP_BASED_PFC	2
+	u8	rsvd[15];
+};
+/* Get Default Topology (indirect 0x0400) */
+struct ice_aqc_get_topo {
+	u8 port_num;
+	u8 num_branches;
+	__le16 reserved1;
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Update TSE (indirect 0x0403)
+ * Get TSE (indirect 0x0404)
+ * Add TSE (indirect 0x0401)
+ * Delete TSE (indirect 0x040F)
+ * Move TSE (indirect 0x0408)
+ * Suspend Nodes (indirect 0x0409)
+ * Resume Nodes (indirect 0x040A)
+ */
+struct ice_aqc_sched_elem_cmd {
+	__le16 num_elem_req;	/* Used by commands */
+	__le16 num_elem_resp;	/* Used by responses */
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_txsched_move_grp_info_hdr {
+	__le32 src_parent_teid;
+	__le32 dest_parent_teid;
+	__le16 num_elems;
+	u8 mode;
+#define ICE_AQC_MOVE_ELEM_MODE_SAME_PF		0x0
+#define ICE_AQC_MOVE_ELEM_MODE_GIVE_OWN		0x1
+#define ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN		0x2
+	u8 reserved;
+};
+
+struct ice_aqc_move_elem {
+	struct ice_aqc_txsched_move_grp_info_hdr hdr;
+	__le32 teid[];
+};
+
+struct ice_aqc_elem_info_bw {
+	__le16 bw_profile_idx;
+	__le16 bw_alloc;
+};
+
+struct ice_aqc_txsched_elem {
+	u8 elem_type; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_TYPE_UNDEFINED		0x0
+#define ICE_AQC_ELEM_TYPE_ROOT_PORT		0x1
+#define ICE_AQC_ELEM_TYPE_TC			0x2
+#define ICE_AQC_ELEM_TYPE_SE_GENERIC		0x3
+#define ICE_AQC_ELEM_TYPE_ENTRY_POINT		0x4
+#define ICE_AQC_ELEM_TYPE_LEAF			0x5
+#define ICE_AQC_ELEM_TYPE_SE_PADDED		0x6
+	u8 valid_sections;
+#define ICE_AQC_ELEM_VALID_GENERIC		BIT(0)
+#define ICE_AQC_ELEM_VALID_CIR			BIT(1)
+#define ICE_AQC_ELEM_VALID_EIR			BIT(2)
+#define ICE_AQC_ELEM_VALID_SHARED		BIT(3)
+	u8 generic;
+#define ICE_AQC_ELEM_GENERIC_MODE_M		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_S		0x1
+#define ICE_AQC_ELEM_GENERIC_PRIO_M	        GENMASK(3, 1)
+#define ICE_AQC_ELEM_GENERIC_SP_S		0x4
+#define ICE_AQC_ELEM_GENERIC_SP_M	        GENMASK(4, 4)
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S	0x5
+#define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M	\
+	(0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S)
+	u8 flags; /* Special field, reserved for some aq calls */
+#define ICE_AQC_ELEM_FLAG_SUSPEND_M		0x1
+	struct ice_aqc_elem_info_bw cir_bw;
+	struct ice_aqc_elem_info_bw eir_bw;
+	__le16 srl_id;
+	__le16 reserved2;
+};
+
+struct ice_aqc_txsched_elem_data {
+	__le32 parent_teid;
+	__le32 node_teid;
+	struct ice_aqc_txsched_elem data;
+};
+
+struct ice_aqc_txsched_topo_grp_info_hdr {
+	__le32 parent_teid;
+	__le16 num_elems;
+	__le16 reserved2;
+};
+
+struct ice_aqc_add_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data generic[];
+};
+
+struct ice_aqc_get_topo_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	struct ice_aqc_txsched_elem_data
+		generic[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+struct ice_aqc_delete_elem {
+	struct ice_aqc_txsched_topo_grp_info_hdr hdr;
+	__le32 teid[];
+};
+
+/* Query Port ETS (indirect 0x040E)
+ *
+ * This indirect command is used to query port TC node configuration.
+ */
+struct ice_aqc_query_port_ets {
+	__le32 port_teid;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_port_ets_elem {
+	u8 tc_valid_bits;
+	u8 reserved[3];
+	/* 3 bits for UP per TC 0-7, 4th byte reserved */
+	__le32 up2tc;
+	u8 tc_bw_share[8];
+	__le32 port_eir_prof_id;
+	__le32 port_cir_prof_id;
+	/* 3 bits per Node priority to TC 0-7, 4th byte reserved */
+	__le32 tc_node_prio;
+#define ICE_TC_NODE_PRIO_S	0x4
+	u8 reserved1[4];
+	__le32 tc_node_teid[8]; /* Used for response, reserved in command */
+};
+
+/* Rate limiting profile for
+ * Add RL profile (indirect 0x0410)
+ * Query RL profile (indirect 0x0411)
+ * Remove RL profile (indirect 0x0415)
+ * These indirect commands acts on single or multiple
+ * RL profiles with specified data.
+ */
+struct ice_aqc_rl_profile {
+	__le16 num_profiles;
+	__le16 num_processed; /* Only for response. Reserved in Command. */
+	u8 reserved[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_rl_profile_elem {
+	u8 level;
+	u8 flags;
+#define ICE_AQC_RL_PROFILE_TYPE_S	0x0
+#define ICE_AQC_RL_PROFILE_TYPE_M	(0x3 << ICE_AQC_RL_PROFILE_TYPE_S)
+#define ICE_AQC_RL_PROFILE_TYPE_CIR	0
+#define ICE_AQC_RL_PROFILE_TYPE_EIR	1
+#define ICE_AQC_RL_PROFILE_TYPE_SRL	2
+/* The following flag is used for Query RL Profile Data */
+#define ICE_AQC_RL_PROFILE_INVAL_S	0x7
+#define ICE_AQC_RL_PROFILE_INVAL_M	(0x1 << ICE_AQC_RL_PROFILE_INVAL_S)
+
+	__le16 profile_id;
+	__le16 max_burst_size;
+	__le16 rl_multiply;
+	__le16 wake_up_calc;
+	__le16 rl_encode;
+};
+
+/* Query Scheduler Resource Allocation (indirect 0x0412)
+ * This indirect command retrieves the scheduler resources allocated by
+ * EMP Firmware to the given PF.
+ */
+struct ice_aqc_query_txsched_res {
+	u8 reserved[8];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_generic_sched_props {
+	__le16 phys_levels;
+	__le16 logical_levels;
+	u8 flattening_bitmap;
+	u8 max_device_cgds;
+	u8 max_pf_cgds;
+	u8 rsvd0;
+	__le16 rdma_qsets;
+	u8 rsvd1[22];
+};
+
+struct ice_aqc_layer_props {
+	u8 logical_layer;
+	u8 chunk_size;
+	__le16 max_device_nodes;
+	__le16 max_pf_nodes;
+	u8 rsvd0[4];
+	__le16 max_sibl_grp_sz;
+	__le16 max_cir_rl_profiles;
+	__le16 max_eir_rl_profiles;
+	__le16 max_srl_profiles;
+	u8 rsvd1[14];
+};
+
+struct ice_aqc_query_txsched_res_resp {
+	struct ice_aqc_generic_sched_props sched_props;
+	struct ice_aqc_layer_props layer_props[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+};
+
+/* Get PHY capabilities (indirect 0x0600) */
+struct ice_aqc_get_phy_caps {
+	u8 lport_num;
+	u8 reserved;
+	__le16 param0;
+	/* 18.0 - Report qualified modules */
+#define ICE_AQC_GET_PHY_RQM		BIT(0)
+	/* 18.1 - 18.3 : Report mode
+	 * 000b - Report NVM capabilities
+	 * 001b - Report topology capabilities
+	 * 010b - Report SW configured
+	 * 100b - Report default capabilities
+	 */
+#define ICE_AQC_REPORT_MODE_S			1
+#define ICE_AQC_REPORT_MODE_M			(7 << ICE_AQC_REPORT_MODE_S)
+#define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA	0
+#define ICE_AQC_REPORT_TOPO_CAP_MEDIA		BIT(1)
+#define ICE_AQC_REPORT_ACTIVE_CFG		BIT(2)
+#define ICE_AQC_REPORT_DFLT_CFG		BIT(3)
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is #define of PHY type (Extended):
+ * The first set of defines is for phy_type_low.
+ */
+#define ICE_PHY_TYPE_LOW_100BASE_TX		BIT_ULL(0)
+#define ICE_PHY_TYPE_LOW_100M_SGMII		BIT_ULL(1)
+#define ICE_PHY_TYPE_LOW_1000BASE_T		BIT_ULL(2)
+#define ICE_PHY_TYPE_LOW_1000BASE_SX		BIT_ULL(3)
+#define ICE_PHY_TYPE_LOW_1000BASE_LX		BIT_ULL(4)
+#define ICE_PHY_TYPE_LOW_1000BASE_KX		BIT_ULL(5)
+#define ICE_PHY_TYPE_LOW_1G_SGMII		BIT_ULL(6)
+#define ICE_PHY_TYPE_LOW_2500BASE_T		BIT_ULL(7)
+#define ICE_PHY_TYPE_LOW_2500BASE_X		BIT_ULL(8)
+#define ICE_PHY_TYPE_LOW_2500BASE_KX		BIT_ULL(9)
+#define ICE_PHY_TYPE_LOW_5GBASE_T		BIT_ULL(10)
+#define ICE_PHY_TYPE_LOW_5GBASE_KR		BIT_ULL(11)
+#define ICE_PHY_TYPE_LOW_10GBASE_T		BIT_ULL(12)
+#define ICE_PHY_TYPE_LOW_10G_SFI_DA		BIT_ULL(13)
+#define ICE_PHY_TYPE_LOW_10GBASE_SR		BIT_ULL(14)
+#define ICE_PHY_TYPE_LOW_10GBASE_LR		BIT_ULL(15)
+#define ICE_PHY_TYPE_LOW_10GBASE_KR_CR1		BIT_ULL(16)
+#define ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC	BIT_ULL(17)
+#define ICE_PHY_TYPE_LOW_10G_SFI_C2C		BIT_ULL(18)
+#define ICE_PHY_TYPE_LOW_25GBASE_T		BIT_ULL(19)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR		BIT_ULL(20)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR_S		BIT_ULL(21)
+#define ICE_PHY_TYPE_LOW_25GBASE_CR1		BIT_ULL(22)
+#define ICE_PHY_TYPE_LOW_25GBASE_SR		BIT_ULL(23)
+#define ICE_PHY_TYPE_LOW_25GBASE_LR		BIT_ULL(24)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR		BIT_ULL(25)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR_S		BIT_ULL(26)
+#define ICE_PHY_TYPE_LOW_25GBASE_KR1		BIT_ULL(27)
+#define ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC	BIT_ULL(28)
+#define ICE_PHY_TYPE_LOW_25G_AUI_C2C		BIT_ULL(29)
+#define ICE_PHY_TYPE_LOW_40GBASE_CR4		BIT_ULL(30)
+#define ICE_PHY_TYPE_LOW_40GBASE_SR4		BIT_ULL(31)
+#define ICE_PHY_TYPE_LOW_40GBASE_LR4		BIT_ULL(32)
+#define ICE_PHY_TYPE_LOW_40GBASE_KR4		BIT_ULL(33)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC	BIT_ULL(34)
+#define ICE_PHY_TYPE_LOW_40G_XLAUI		BIT_ULL(35)
+#define ICE_PHY_TYPE_LOW_50GBASE_CR2		BIT_ULL(36)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR2		BIT_ULL(37)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR2		BIT_ULL(38)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR2		BIT_ULL(39)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC	BIT_ULL(40)
+#define ICE_PHY_TYPE_LOW_50G_LAUI2		BIT_ULL(41)
+#define ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC	BIT_ULL(42)
+#define ICE_PHY_TYPE_LOW_50G_AUI2		BIT_ULL(43)
+#define ICE_PHY_TYPE_LOW_50GBASE_CP		BIT_ULL(44)
+#define ICE_PHY_TYPE_LOW_50GBASE_SR		BIT_ULL(45)
+#define ICE_PHY_TYPE_LOW_50GBASE_FR		BIT_ULL(46)
+#define ICE_PHY_TYPE_LOW_50GBASE_LR		BIT_ULL(47)
+#define ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4	BIT_ULL(48)
+#define ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC	BIT_ULL(49)
+#define ICE_PHY_TYPE_LOW_50G_AUI1		BIT_ULL(50)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR4		BIT_ULL(51)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR4		BIT_ULL(52)
+#define ICE_PHY_TYPE_LOW_100GBASE_LR4		BIT_ULL(53)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR4		BIT_ULL(54)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC	BIT_ULL(55)
+#define ICE_PHY_TYPE_LOW_100G_CAUI4		BIT_ULL(56)
+#define ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC	BIT_ULL(57)
+#define ICE_PHY_TYPE_LOW_100G_AUI4		BIT_ULL(58)
+#define ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4	BIT_ULL(59)
+#define ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4	BIT_ULL(60)
+#define ICE_PHY_TYPE_LOW_100GBASE_CP2		BIT_ULL(61)
+#define ICE_PHY_TYPE_LOW_100GBASE_SR2		BIT_ULL(62)
+#define ICE_PHY_TYPE_LOW_100GBASE_DR		BIT_ULL(63)
+#define ICE_PHY_TYPE_LOW_MAX_INDEX		63
+/* The second set of defines is for phy_type_high. */
+#define ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4	BIT_ULL(0)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC	BIT_ULL(1)
+#define ICE_PHY_TYPE_HIGH_100G_CAUI2		BIT_ULL(2)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC	BIT_ULL(3)
+#define ICE_PHY_TYPE_HIGH_100G_AUI2		BIT_ULL(4)
+#define ICE_PHY_TYPE_HIGH_MAX_INDEX		4
+
+struct ice_aqc_get_phy_caps_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+	u8 caps;
+#define ICE_AQC_PHY_EN_TX_LINK_PAUSE			BIT(0)
+#define ICE_AQC_PHY_EN_RX_LINK_PAUSE			BIT(1)
+#define ICE_AQC_PHY_LOW_POWER_MODE			BIT(2)
+#define ICE_AQC_PHY_EN_LINK				BIT(3)
+#define ICE_AQC_PHY_AN_MODE				BIT(4)
+#define ICE_AQC_GET_PHY_EN_MOD_QUAL			BIT(5)
+#define ICE_AQC_PHY_EN_AUTO_FEC				BIT(7)
+#define ICE_AQC_PHY_CAPS_MASK				ICE_M(0xff, 0)
+	u8 low_power_ctrl_an;
+#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG		BIT(0)
+#define ICE_AQC_PHY_AN_EN_CLAUSE28			BIT(1)
+#define ICE_AQC_PHY_AN_EN_CLAUSE73			BIT(2)
+#define ICE_AQC_PHY_AN_EN_CLAUSE37			BIT(3)
+	__le16 eee_cap;
+#define ICE_AQC_PHY_EEE_EN_100BASE_TX			BIT(0)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_T			BIT(1)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_T			BIT(2)
+#define ICE_AQC_PHY_EEE_EN_1000BASE_KX			BIT(3)
+#define ICE_AQC_PHY_EEE_EN_10GBASE_KR			BIT(4)
+#define ICE_AQC_PHY_EEE_EN_25GBASE_KR			BIT(5)
+#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4			BIT(6)
+	__le16 eeer_value;
+	u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+	u8 phy_fw_ver[8];
+	u8 link_fec_options;
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN		BIT(0)
+#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ		BIT(1)
+#define ICE_AQC_PHY_FEC_25G_RS_528_REQ			BIT(2)
+#define ICE_AQC_PHY_FEC_25G_KR_REQ			BIT(3)
+#define ICE_AQC_PHY_FEC_25G_RS_544_REQ			BIT(4)
+#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN		BIT(6)
+#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN		BIT(7)
+#define ICE_AQC_PHY_FEC_MASK				ICE_M(0xdf, 0)
+	u8 module_compliance_enforcement;
+#define ICE_AQC_MOD_ENFORCE_STRICT_MODE			BIT(0)
+	u8 extended_compliance_code;
+#define ICE_MODULE_TYPE_TOTAL_BYTE			3
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+#define ICE_AQC_MOD_TYPE_BYTE0_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE0_QSFP_PLUS		0x80
+#define ICE_AQC_MOD_TYPE_IDENT				1
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE	BIT(0)
+#define ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE	BIT(1)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_SR		BIT(4)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LR		BIT(5)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_LRM		BIT(6)
+#define ICE_AQC_MOD_TYPE_BYTE1_10G_BASE_ER		BIT(7)
+#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS			0xA0
+#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS		0x86
+	u8 qualified_module_count;
+	u8 rsvd2[7];	/* Bytes 47:41 reserved */
+#define ICE_AQC_QUAL_MOD_COUNT_MAX			16
+	struct {
+		u8 v_oui[3];
+		u8 rsvd3;
+		u8 v_part[16];
+		__le32 v_rev;
+		__le64 rsvd4;
+	} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
+};
+
+/* Set PHY capabilities (direct 0x0601)
+ * NOTE: This command must be followed by setup link and restart auto-neg
+ */
+struct ice_aqc_set_phy_cfg {
+	u8 lport_num;
+	u8 reserved[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Set PHY config command data structure */
+struct ice_aqc_set_phy_cfg_data {
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+	u8 caps;
+#define ICE_AQ_PHY_ENA_VALID_MASK	ICE_M(0xef, 0)
+#define ICE_AQ_PHY_ENA_TX_PAUSE_ABILITY	BIT(0)
+#define ICE_AQ_PHY_ENA_RX_PAUSE_ABILITY	BIT(1)
+#define ICE_AQ_PHY_ENA_LOW_POWER	BIT(2)
+#define ICE_AQ_PHY_ENA_LINK		BIT(3)
+#define ICE_AQ_PHY_ENA_AUTO_LINK_UPDT	BIT(5)
+#define ICE_AQ_PHY_ENA_LESM		BIT(6)
+#define ICE_AQ_PHY_ENA_AUTO_FEC		BIT(7)
+	u8 low_power_ctrl_an;
+	__le16 eee_cap; /* Value from ice_aqc_get_phy_caps */
+	__le16 eeer_value;
+	u8 link_fec_opt; /* Use defines from ice_aqc_get_phy_caps */
+	u8 module_compliance_enforcement;
+};
+
+/* Set MAC Config command data structure (direct 0x0603) */
+struct ice_aqc_set_mac_cfg {
+	__le16 max_frame_size;
+	u8 params;
+#define ICE_AQ_SET_MAC_PACE_S		3
+#define ICE_AQ_SET_MAC_PACE_M		(0xF << ICE_AQ_SET_MAC_PACE_S)
+#define ICE_AQ_SET_MAC_PACE_TYPE_M	BIT(7)
+#define ICE_AQ_SET_MAC_PACE_TYPE_RATE	0
+#define ICE_AQ_SET_MAC_PACE_TYPE_FIXED	ICE_AQ_SET_MAC_PACE_TYPE_M
+	u8 tx_tmr_priority;
+	__le16 tx_tmr_value;
+	__le16 fc_refresh_threshold;
+	u8 drop_opts;
+#define ICE_AQ_SET_MAC_AUTO_DROP_MASK		BIT(0)
+#define ICE_AQ_SET_MAC_AUTO_DROP_NONE		0
+#define ICE_AQ_SET_MAC_AUTO_DROP_BLOCKING_PKTS	BIT(0)
+	u8 reserved[7];
+};
+
+/* Restart AN command data structure (direct 0x0605)
+ * Also used for response, with only the lport_num field present.
+ */
+struct ice_aqc_restart_an {
+	u8 lport_num;
+	u8 reserved;
+	u8 cmd_flags;
+#define ICE_AQC_RESTART_AN_LINK_RESTART	BIT(1)
+#define ICE_AQC_RESTART_AN_LINK_ENABLE	BIT(2)
+	u8 reserved2[13];
+};
+
+/* Get link status (indirect 0x0607), also used for Link Status Event */
+struct ice_aqc_get_link_status {
+	u8 lport_num;
+	u8 reserved;
+	__le16 cmd_flags;
+#define ICE_AQ_LSE_M			0x3
+#define ICE_AQ_LSE_NOP			0x0
+#define ICE_AQ_LSE_DIS			0x2
+#define ICE_AQ_LSE_ENA			0x3
+	/* only response uses this flag */
+#define ICE_AQ_LSE_IS_ENABLED		0x1
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get link status response data structure, also used for Link Status Event */
+struct ice_aqc_get_link_status_data {
+	u8 topo_media_conflict;
+#define ICE_AQ_LINK_TOPO_CONFLICT	BIT(0)
+#define ICE_AQ_LINK_MEDIA_CONFLICT	BIT(1)
+#define ICE_AQ_LINK_TOPO_CORRUPT	BIT(2)
+#define ICE_AQ_LINK_TOPO_UNREACH_PRT	BIT(4)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_PRT	BIT(5)
+#define ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA	BIT(6)
+#define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA	BIT(7)
+	u8 link_cfg_err;
+#define ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED	BIT(5)
+#define ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE	BIT(6)
+#define ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT	BIT(7)
+	u8 link_info;
+#define ICE_AQ_LINK_UP			BIT(0)	/* Link Status */
+#define ICE_AQ_LINK_FAULT		BIT(1)
+#define ICE_AQ_LINK_FAULT_TX		BIT(2)
+#define ICE_AQ_LINK_FAULT_RX		BIT(3)
+#define ICE_AQ_LINK_FAULT_REMOTE	BIT(4)
+#define ICE_AQ_LINK_UP_PORT		BIT(5)	/* External Port Link Status */
+#define ICE_AQ_MEDIA_AVAILABLE		BIT(6)
+#define ICE_AQ_SIGNAL_DETECT		BIT(7)
+	u8 an_info;
+#define ICE_AQ_AN_COMPLETED		BIT(0)
+#define ICE_AQ_LP_AN_ABILITY		BIT(1)
+#define ICE_AQ_PD_FAULT			BIT(2)	/* Parallel Detection Fault */
+#define ICE_AQ_FEC_EN			BIT(3)
+#define ICE_AQ_PHY_LOW_POWER		BIT(4)	/* Low Power State */
+#define ICE_AQ_LINK_PAUSE_TX		BIT(5)
+#define ICE_AQ_LINK_PAUSE_RX		BIT(6)
+#define ICE_AQ_QUALIFIED_MODULE		BIT(7)
+	u8 ext_info;
+#define ICE_AQ_LINK_PHY_TEMP_ALARM	BIT(0)
+#define ICE_AQ_LINK_EXCESSIVE_ERRORS	BIT(1)	/* Excessive Link Errors */
+	/* Port Tx Suspended */
+#define ICE_AQ_LINK_TX_S		2
+#define ICE_AQ_LINK_TX_M		(0x03 << ICE_AQ_LINK_TX_S)
+#define ICE_AQ_LINK_TX_ACTIVE		0
+#define ICE_AQ_LINK_TX_DRAINED		1
+#define ICE_AQ_LINK_TX_FLUSHED		3
+	u8 reserved2;
+	__le16 max_frame_size;
+	u8 cfg;
+#define ICE_AQ_LINK_25G_KR_FEC_EN	BIT(0)
+#define ICE_AQ_LINK_25G_RS_528_FEC_EN	BIT(1)
+#define ICE_AQ_LINK_25G_RS_544_FEC_EN	BIT(2)
+#define ICE_AQ_FEC_MASK			ICE_M(0x7, 0)
+	/* Pacing Config */
+#define ICE_AQ_CFG_PACING_S		3
+#define ICE_AQ_CFG_PACING_M		(0xF << ICE_AQ_CFG_PACING_S)
+#define ICE_AQ_CFG_PACING_TYPE_M	BIT(7)
+#define ICE_AQ_CFG_PACING_TYPE_AVG	0
+#define ICE_AQ_CFG_PACING_TYPE_FIXED	ICE_AQ_CFG_PACING_TYPE_M
+	/* External Device Power Ability */
+	u8 power_desc;
+#define ICE_AQ_PWR_CLASS_M		0x3F
+#define ICE_AQ_LINK_PWR_BASET_LOW_HIGH	0
+#define ICE_AQ_LINK_PWR_BASET_HIGH	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_1	0
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_2	1
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_3	2
+#define ICE_AQ_LINK_PWR_QSFP_CLASS_4	3
+	__le16 link_speed;
+#define ICE_AQ_LINK_SPEED_M		0x7FF
+#define ICE_AQ_LINK_SPEED_10MB		BIT(0)
+#define ICE_AQ_LINK_SPEED_100MB		BIT(1)
+#define ICE_AQ_LINK_SPEED_1000MB	BIT(2)
+#define ICE_AQ_LINK_SPEED_2500MB	BIT(3)
+#define ICE_AQ_LINK_SPEED_5GB		BIT(4)
+#define ICE_AQ_LINK_SPEED_10GB		BIT(5)
+#define ICE_AQ_LINK_SPEED_20GB		BIT(6)
+#define ICE_AQ_LINK_SPEED_25GB		BIT(7)
+#define ICE_AQ_LINK_SPEED_40GB		BIT(8)
+#define ICE_AQ_LINK_SPEED_50GB		BIT(9)
+#define ICE_AQ_LINK_SPEED_100GB		BIT(10)
+#define ICE_AQ_LINK_SPEED_UNKNOWN	BIT(15)
+	__le32 reserved3; /* Aligns next field to 8-byte boundary */
+	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
+	__le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */
+};
+
+/* Set event mask command (direct 0x0613) */
+struct ice_aqc_set_event_mask {
+	u8	lport_num;
+	u8	reserved[7];
+	__le16	event_mask;
+#define ICE_AQ_LINK_EVENT_UPDOWN		BIT(1)
+#define ICE_AQ_LINK_EVENT_MEDIA_NA		BIT(2)
+#define ICE_AQ_LINK_EVENT_LINK_FAULT		BIT(3)
+#define ICE_AQ_LINK_EVENT_PHY_TEMP_ALARM	BIT(4)
+#define ICE_AQ_LINK_EVENT_EXCESSIVE_ERRORS	BIT(5)
+#define ICE_AQ_LINK_EVENT_SIGNAL_DETECT		BIT(6)
+#define ICE_AQ_LINK_EVENT_AN_COMPLETED		BIT(7)
+#define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL	BIT(8)
+#define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED	BIT(9)
+#define ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL	BIT(12)
+	u8	reserved1[6];
+};
+
+/* Set MAC Loopback command (direct 0x0620) */
+struct ice_aqc_set_mac_lb {
+	u8 lb_mode;
+#define ICE_AQ_MAC_LB_EN		BIT(0)
+#define ICE_AQ_MAC_LB_OSC_CLK		BIT(1)
+	u8 reserved[15];
+};
+
+struct ice_aqc_link_topo_params {
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_LINK_TOPO_PORT_NUM_VALID	BIT(0)
+	u8 node_type_ctx;
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_S		0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_M	(0xF << ICE_AQC_LINK_TOPO_NODE_TYPE_S)
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_PHY		0
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL	1
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MUX_CTRL	2
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED_CTRL	3
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_LED		4
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_THERMAL	5
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE	6
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_MEZZ	7
+#define ICE_AQC_LINK_TOPO_NODE_TYPE_ID_EEPROM	8
+#define ICE_AQC_LINK_TOPO_NODE_CTX_S		4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_M		\
+				(0xF << ICE_AQC_LINK_TOPO_NODE_CTX_S)
+#define ICE_AQC_LINK_TOPO_NODE_CTX_GLOBAL	0
+#define ICE_AQC_LINK_TOPO_NODE_CTX_BOARD	1
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PORT		2
+#define ICE_AQC_LINK_TOPO_NODE_CTX_NODE		3
+#define ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED	4
+#define ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE	5
+	u8 index;
+};
+
+struct ice_aqc_link_topo_addr {
+	struct ice_aqc_link_topo_params topo_params;
+	__le16 handle;
+#define ICE_AQC_LINK_TOPO_HANDLE_S	0
+#define ICE_AQC_LINK_TOPO_HANDLE_M	(0x3FF << ICE_AQC_LINK_TOPO_HANDLE_S)
+/* Used to decode the handle field */
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_M	BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_LOM	BIT(9)
+#define ICE_AQC_LINK_TOPO_HANDLE_BRD_TYPE_MEZZ	0
+#define ICE_AQC_LINK_TOPO_HANDLE_NODE_S		0
+/* In case of a Mezzanine type */
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_NODE_M	\
+				(0x3F << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S	6
+#define ICE_AQC_LINK_TOPO_HANDLE_MEZZ_M	(0x7 << ICE_AQC_LINK_TOPO_HANDLE_MEZZ_S)
+/* In case of a LOM type */
+#define ICE_AQC_LINK_TOPO_HANDLE_LOM_NODE_M	\
+				(0x1FF << ICE_AQC_LINK_TOPO_HANDLE_NODE_S)
+};
+
+/* Get Link Topology Handle (direct, 0x06E0) */
+struct ice_aqc_get_link_topo {
+	struct ice_aqc_link_topo_addr addr;
+	u8 node_part_num;
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575	0x21
+#define ICE_AQC_GET_LINK_TOPO_NODE_NR_C827	0x31
+	u8 rsvd[9];
+};
+
+/* Read/Write I2C (direct, 0x06E2/0x06E3) */
+struct ice_aqc_i2c {
+	struct ice_aqc_link_topo_addr topo_addr;
+	__le16 i2c_addr;
+	u8 i2c_params;
+#define ICE_AQC_I2C_DATA_SIZE_M		GENMASK(3, 0)
+#define ICE_AQC_I2C_USE_REPEATED_START	BIT(7)
+
+	u8 rsvd;
+	__le16 i2c_bus_addr;
+	u8 i2c_data[4]; /* Used only by write command, reserved in read. */
+};
+
+/* Read I2C Response (direct, 0x06E2) */
+struct ice_aqc_read_i2c_resp {
+	u8 i2c_data[16];
+};
+
+/* Set Port Identification LED (direct, 0x06E9) */
+struct ice_aqc_set_port_id_led {
+	u8 lport_num;
+	u8 lport_num_valid;
+	u8 ident_mode;
+#define ICE_AQC_PORT_IDENT_LED_BLINK	BIT(0)
+#define ICE_AQC_PORT_IDENT_LED_ORIG	0
+	u8 rsvd[13];
+};
+
+/* Get Port Options (indirect, 0x06EA) */
+struct ice_aqc_get_port_options {
+	u8 lport_num;
+	u8 lport_num_valid;
+	u8 port_options_count;
+#define ICE_AQC_PORT_OPT_COUNT_M	GENMASK(3, 0)
+#define ICE_AQC_PORT_OPT_MAX		16
+
+	u8 innermost_phy_index;
+	u8 port_options;
+#define ICE_AQC_PORT_OPT_ACTIVE_M	GENMASK(3, 0)
+#define ICE_AQC_PORT_OPT_VALID		BIT(7)
+
+	u8 pending_port_option_status;
+#define ICE_AQC_PENDING_PORT_OPT_IDX_M	GENMASK(3, 0)
+#define ICE_AQC_PENDING_PORT_OPT_VALID	BIT(7)
+
+	u8 rsvd[2];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_get_port_options_elem {
+	u8 pmd;
+#define ICE_AQC_PORT_OPT_PMD_COUNT_M	GENMASK(3, 0)
+
+	u8 max_lane_speed;
+#define ICE_AQC_PORT_OPT_MAX_LANE_M	GENMASK(3, 0)
+#define ICE_AQC_PORT_OPT_MAX_LANE_100M	0
+#define ICE_AQC_PORT_OPT_MAX_LANE_1G	1
+#define ICE_AQC_PORT_OPT_MAX_LANE_2500M	2
+#define ICE_AQC_PORT_OPT_MAX_LANE_5G	3
+#define ICE_AQC_PORT_OPT_MAX_LANE_10G	4
+#define ICE_AQC_PORT_OPT_MAX_LANE_25G	5
+#define ICE_AQC_PORT_OPT_MAX_LANE_50G	6
+#define ICE_AQC_PORT_OPT_MAX_LANE_100G	7
+
+	u8 global_scid[2];
+	u8 phy_scid[2];
+	u8 pf2port_cid[2];
+};
+
+/* Set Port Option (direct, 0x06EB) */
+struct ice_aqc_set_port_option {
+	u8 lport_num;
+	u8 lport_num_valid;
+	u8 selected_port_option;
+	u8 rsvd[13];
+};
+
+/* Set/Get GPIO (direct, 0x06EC/0x06ED) */
+struct ice_aqc_gpio {
+	__le16 gpio_ctrl_handle;
+#define ICE_AQC_GPIO_HANDLE_S	0
+#define ICE_AQC_GPIO_HANDLE_M	(0x3FF << ICE_AQC_GPIO_HANDLE_S)
+	u8 gpio_num;
+	u8 gpio_val;
+	u8 rsvd[12];
+};
+
+/* Read/Write SFF EEPROM command (indirect 0x06EE) */
+struct ice_aqc_sff_eeprom {
+	u8 lport_num;
+	u8 lport_num_valid;
+#define ICE_AQC_SFF_PORT_NUM_VALID	BIT(0)
+	__le16 i2c_bus_addr;
+#define ICE_AQC_SFF_I2CBUS_7BIT_M	0x7F
+#define ICE_AQC_SFF_I2CBUS_10BIT_M	0x3FF
+#define ICE_AQC_SFF_I2CBUS_TYPE_M	BIT(10)
+#define ICE_AQC_SFF_I2CBUS_TYPE_7BIT	0
+#define ICE_AQC_SFF_I2CBUS_TYPE_10BIT	ICE_AQC_SFF_I2CBUS_TYPE_M
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_S	11
+#define ICE_AQC_SFF_SET_EEPROM_PAGE_M	(0x3 << ICE_AQC_SFF_SET_EEPROM_PAGE_S)
+#define ICE_AQC_SFF_NO_PAGE_CHANGE	0
+#define ICE_AQC_SFF_SET_23_ON_MISMATCH	1
+#define ICE_AQC_SFF_SET_22_ON_MISMATCH	2
+#define ICE_AQC_SFF_IS_WRITE		BIT(15)
+	__le16 i2c_mem_addr;
+	__le16 eeprom_page;
+#define  ICE_AQC_SFF_EEPROM_BANK_S 0
+#define  ICE_AQC_SFF_EEPROM_BANK_M (0xFF << ICE_AQC_SFF_EEPROM_BANK_S)
+#define  ICE_AQC_SFF_EEPROM_PAGE_S 8
+#define  ICE_AQC_SFF_EEPROM_PAGE_M (0xFF << ICE_AQC_SFF_EEPROM_PAGE_S)
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct ice_aqc_nvm {
+#define ICE_AQC_NVM_MAX_OFFSET		0xFFFFFF
+	__le16 offset_low;
+	u8 offset_high;
+	u8 cmd_flags;
+#define ICE_AQC_NVM_LAST_CMD		BIT(0)
+#define ICE_AQC_NVM_PCIR_REQ		BIT(0)	/* Used by NVM Update reply */
+#define ICE_AQC_NVM_PRESERVATION_S	1
+#define ICE_AQC_NVM_PRESERVATION_M	(3 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_NO_PRESERVATION	(0 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_ALL	BIT(1)
+#define ICE_AQC_NVM_FACTORY_DEFAULT	(2 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_PRESERVE_SELECTED	(3 << ICE_AQC_NVM_PRESERVATION_S)
+#define ICE_AQC_NVM_ACTIV_SEL_NVM	BIT(3) /* Write Activate/SR Dump only */
+#define ICE_AQC_NVM_ACTIV_SEL_OROM	BIT(4)
+#define ICE_AQC_NVM_ACTIV_SEL_NETLIST	BIT(5)
+#define ICE_AQC_NVM_SPECIAL_UPDATE	BIT(6)
+#define ICE_AQC_NVM_REVERT_LAST_ACTIV	BIT(6) /* Write Activate only */
+#define ICE_AQC_NVM_ACTIV_SEL_MASK	ICE_M(0x7, 3)
+#define ICE_AQC_NVM_FLASH_ONLY		BIT(7)
+#define ICE_AQC_NVM_RESET_LVL_M		ICE_M(0x3, 0) /* Write reply only */
+#define ICE_AQC_NVM_POR_FLAG		0
+#define ICE_AQC_NVM_PERST_FLAG		1
+#define ICE_AQC_NVM_EMPR_FLAG		2
+#define ICE_AQC_NVM_EMPR_ENA		BIT(0) /* Write Activate reply only */
+	/* For Write Activate, several flags are sent as part of a separate
+	 * flags2 field using a separate byte. For simplicity of the software
+	 * interface, we pass the flags as a 16 bit value so these flags are
+	 * all offset by 8 bits
+	 */
+#define ICE_AQC_NVM_ACTIV_REQ_EMPR	BIT(8) /* NVM Write Activate only */
+	__le16 module_typeid;
+	__le16 length;
+#define ICE_AQC_NVM_ERASE_LEN	0xFFFF
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+#define ICE_AQC_NVM_START_POINT			0
+
+/* NVM Checksum Command (direct, 0x0706) */
+struct ice_aqc_nvm_checksum {
+	u8 flags;
+#define ICE_AQC_NVM_CHECKSUM_VERIFY	BIT(0)
+#define ICE_AQC_NVM_CHECKSUM_RECALC	BIT(1)
+	u8 rsvd;
+	__le16 checksum; /* Used only by response */
+#define ICE_AQC_NVM_CHECKSUM_CORRECT	0xBABA
+	u8 rsvd2[12];
+};
+
+/* Used for NVM Set Package Data command - 0x070A */
+struct ice_aqc_nvm_pkg_data {
+	u8 reserved[3];
+	u8 cmd_flags;
+#define ICE_AQC_NVM_PKG_DELETE		BIT(0) /* used for command call */
+#define ICE_AQC_NVM_PKG_SKIPPED		BIT(0) /* used for command response */
+
+	u32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Used for Pass Component Table command - 0x070B */
+struct ice_aqc_nvm_pass_comp_tbl {
+	u8 component_response; /* Response only */
+#define ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED		0x0
+#define ICE_AQ_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE	0x1
+#define ICE_AQ_NVM_PASS_COMP_CAN_NOT_BE_UPDATED		0x2
+	u8 component_response_code; /* Response only */
+#define ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED_CODE	0x0
+#define ICE_AQ_NVM_PASS_COMP_STAMP_IDENTICAL_CODE	0x1
+#define ICE_AQ_NVM_PASS_COMP_STAMP_LOWER		0x2
+#define ICE_AQ_NVM_PASS_COMP_INVALID_STAMP_CODE		0x3
+#define ICE_AQ_NVM_PASS_COMP_CONFLICT_CODE		0x4
+#define ICE_AQ_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE	0x5
+#define ICE_AQ_NVM_PASS_COMP_NOT_SUPPORTED_CODE		0x6
+#define ICE_AQ_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE	0x7
+#define ICE_AQ_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE	0x8
+#define ICE_AQ_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE	0xA
+#define ICE_AQ_NVM_PASS_COMP_VER_STR_LOWER_CODE		0xB
+	u8 reserved;
+	u8 transfer_flag;
+#define ICE_AQ_NVM_PASS_COMP_TBL_START			0x1
+#define ICE_AQ_NVM_PASS_COMP_TBL_MIDDLE			0x2
+#define ICE_AQ_NVM_PASS_COMP_TBL_END			0x4
+#define ICE_AQ_NVM_PASS_COMP_TBL_START_AND_END		0x5
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_nvm_comp_tbl {
+	__le16 comp_class;
+#define NVM_COMP_CLASS_ALL_FW	0x000A
+
+	__le16 comp_id;
+#define NVM_COMP_ID_OROM	0x5
+#define NVM_COMP_ID_NVM		0x6
+#define NVM_COMP_ID_NETLIST	0x8
+
+	u8 comp_class_idx;
+#define FWU_COMP_CLASS_IDX_NOT_USE 0x0
+
+	__le32 comp_cmp_stamp;
+	u8 cvs_type;
+#define NVM_CVS_TYPE_ASCII	0x1
+
+	u8 cvs_len;
+	u8 cvs[]; /* Component Version String */
+} __packed;
+
+/* Send to PF command (indirect 0x0801) ID is only used by PF
+ *
+ * Send to VF command (indirect 0x0802) ID is only used by PF
+ *
+ */
+struct ice_aqc_pf_vf_msg {
+	__le32 id;
+	u32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get LLDP MIB (indirect 0x0A00)
+ * Note: This is also used by the LLDP MIB Change Event (0x0A01)
+ * as the format is the same.
+ */
+struct ice_aqc_lldp_get_mib {
+	u8 type;
+#define ICE_AQ_LLDP_MIB_TYPE_S			0
+#define ICE_AQ_LLDP_MIB_TYPE_M			(0x3 << ICE_AQ_LLDP_MIB_TYPE_S)
+#define ICE_AQ_LLDP_MIB_LOCAL			0
+#define ICE_AQ_LLDP_MIB_REMOTE			1
+#define ICE_AQ_LLDP_MIB_LOCAL_AND_REMOTE	2
+#define ICE_AQ_LLDP_BRID_TYPE_S			2
+#define ICE_AQ_LLDP_BRID_TYPE_M			(0x3 << ICE_AQ_LLDP_BRID_TYPE_S)
+#define ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID	0
+#define ICE_AQ_LLDP_BRID_TYPE_NON_TPMR		1
+/* Tx pause flags in the 0xA01 event use ICE_AQ_LLDP_TX_* */
+#define ICE_AQ_LLDP_TX_S			0x4
+#define ICE_AQ_LLDP_TX_M			(0x03 << ICE_AQ_LLDP_TX_S)
+#define ICE_AQ_LLDP_TX_ACTIVE			0
+#define ICE_AQ_LLDP_TX_SUSPENDED		1
+#define ICE_AQ_LLDP_TX_FLUSHED			3
+/* DCBX mode */
+#define ICE_AQ_LLDP_DCBX_M			GENMASK(7, 6)
+#define ICE_AQ_LLDP_DCBX_NA			0
+#define ICE_AQ_LLDP_DCBX_CEE			1
+#define ICE_AQ_LLDP_DCBX_IEEE			2
+
+	u8 state;
+#define ICE_AQ_LLDP_MIB_CHANGE_STATE_M		BIT(0)
+#define ICE_AQ_LLDP_MIB_CHANGE_EXECUTED		0
+#define ICE_AQ_LLDP_MIB_CHANGE_PENDING		1
+
+/* The following bytes are reserved for the Get LLDP MIB command (0x0A00)
+ * and in the LLDP MIB Change Event (0x0A01). They are valid for the
+ * Get LLDP MIB (0x0A00) response only.
+ */
+	__le16 local_len;
+	__le16 remote_len;
+	u8 reserved[2];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Configure LLDP MIB Change Event (direct 0x0A01) */
+/* For MIB Change Event use ice_aqc_lldp_get_mib structure above */
+struct ice_aqc_lldp_set_mib_change {
+	u8 command;
+#define ICE_AQ_LLDP_MIB_UPDATE_ENABLE		0x0
+#define ICE_AQ_LLDP_MIB_UPDATE_DIS		0x1
+#define ICE_AQ_LLDP_MIB_PENDING_M		BIT(1)
+#define ICE_AQ_LLDP_MIB_PENDING_DISABLE		0
+#define ICE_AQ_LLDP_MIB_PENDING_ENABLE		1
+	u8 reserved[15];
+};
+
+/* Stop LLDP (direct 0x0A05) */
+struct ice_aqc_lldp_stop {
+	u8 command;
+#define ICE_AQ_LLDP_AGENT_STATE_MASK	BIT(0)
+#define ICE_AQ_LLDP_AGENT_STOP		0x0
+#define ICE_AQ_LLDP_AGENT_SHUTDOWN	ICE_AQ_LLDP_AGENT_STATE_MASK
+#define ICE_AQ_LLDP_AGENT_PERSIST_DIS	BIT(1)
+	u8 reserved[15];
+};
+
+/* Start LLDP (direct 0x0A06) */
+struct ice_aqc_lldp_start {
+	u8 command;
+#define ICE_AQ_LLDP_AGENT_START		BIT(0)
+#define ICE_AQ_LLDP_AGENT_PERSIST_ENA	BIT(1)
+	u8 reserved[15];
+};
+
+/* Get CEE DCBX Oper Config (0x0A07)
+ * The command uses the generic descriptor struct and
+ * returns the struct below as an indirect response.
+ */
+struct ice_aqc_get_cee_dcb_cfg_resp {
+	u8 oper_num_tc;
+	u8 oper_prio_tc[4];
+	u8 oper_tc_bw[8];
+	u8 oper_pfc_en;
+	__le16 oper_app_prio;
+#define ICE_AQC_CEE_APP_FCOE_S		0
+#define ICE_AQC_CEE_APP_FCOE_M		(0x7 << ICE_AQC_CEE_APP_FCOE_S)
+#define ICE_AQC_CEE_APP_ISCSI_S		3
+#define ICE_AQC_CEE_APP_ISCSI_M		(0x7 << ICE_AQC_CEE_APP_ISCSI_S)
+#define ICE_AQC_CEE_APP_FIP_S		8
+#define ICE_AQC_CEE_APP_FIP_M		(0x7 << ICE_AQC_CEE_APP_FIP_S)
+	__le32 tlv_status;
+#define ICE_AQC_CEE_PG_STATUS_S		0
+#define ICE_AQC_CEE_PG_STATUS_M		(0x7 << ICE_AQC_CEE_PG_STATUS_S)
+#define ICE_AQC_CEE_PFC_STATUS_S	3
+#define ICE_AQC_CEE_PFC_STATUS_M	(0x7 << ICE_AQC_CEE_PFC_STATUS_S)
+#define ICE_AQC_CEE_FCOE_STATUS_S	8
+#define ICE_AQC_CEE_FCOE_STATUS_M	(0x7 << ICE_AQC_CEE_FCOE_STATUS_S)
+#define ICE_AQC_CEE_ISCSI_STATUS_S	11
+#define ICE_AQC_CEE_ISCSI_STATUS_M	(0x7 << ICE_AQC_CEE_ISCSI_STATUS_S)
+#define ICE_AQC_CEE_FIP_STATUS_S	16
+#define ICE_AQC_CEE_FIP_STATUS_M	(0x7 << ICE_AQC_CEE_FIP_STATUS_S)
+	u8 reserved[12];
+};
+
+/* Set Local LLDP MIB (indirect 0x0A08)
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBX
+ */
+struct ice_aqc_lldp_set_local_mib {
+	u8 type;
+#define SET_LOCAL_MIB_TYPE_DCBX_M		BIT(0)
+#define SET_LOCAL_MIB_TYPE_LOCAL_MIB		0
+#define SET_LOCAL_MIB_TYPE_CEE_M		BIT(1)
+#define SET_LOCAL_MIB_TYPE_CEE_WILLING		0
+#define SET_LOCAL_MIB_TYPE_CEE_NON_WILLING	SET_LOCAL_MIB_TYPE_CEE_M
+	u8 reserved0;
+	__le16 length;
+	u8 reserved1[4];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Stop/Start LLDP Agent (direct 0x0A09)
+ * Used for stopping/starting specific LLDP agent. e.g. DCBX.
+ * The same structure is used for the response, with the command field
+ * being used as the status field.
+ */
+struct ice_aqc_lldp_stop_start_specific_agent {
+	u8 command;
+#define ICE_AQC_START_STOP_AGENT_M		BIT(0)
+#define ICE_AQC_START_STOP_AGENT_STOP_DCBX	0
+#define ICE_AQC_START_STOP_AGENT_START_DCBX	ICE_AQC_START_STOP_AGENT_M
+	u8 reserved[15];
+};
+
+/* LLDP Filter Control (direct 0x0A0A) */
+struct ice_aqc_lldp_filter_ctrl {
+	u8 cmd_flags;
+#define ICE_AQC_LLDP_FILTER_ACTION_ADD		0x0
+#define ICE_AQC_LLDP_FILTER_ACTION_DELETE	0x1
+	u8 reserved1;
+	__le16 vsi_num;
+	u8 reserved2[12];
+};
+
+#define ICE_AQC_RSS_VSI_VALID BIT(15)
+
+/* Get/Set RSS key (indirect 0x0B04/0x0B02) */
+struct ice_aqc_get_set_rss_key {
+	__le16 vsi_id;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE	0x28
+#define ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE	0xC
+#define ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE \
+				(ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE + \
+				 ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE)
+
+struct ice_aqc_get_set_rss_keys {
+	u8 standard_rss_key[ICE_AQC_GET_SET_RSS_KEY_DATA_RSS_KEY_SIZE];
+	u8 extended_hash_key[ICE_AQC_GET_SET_RSS_KEY_DATA_HASH_KEY_SIZE];
+};
+
+enum ice_lut_type {
+	ICE_LUT_VSI = 0,
+	ICE_LUT_PF = 1,
+	ICE_LUT_GLOBAL = 2,
+};
+
+enum ice_lut_size {
+	ICE_LUT_VSI_SIZE = 64,
+	ICE_LUT_GLOBAL_SIZE = 512,
+	ICE_LUT_PF_SIZE = 2048,
+};
+
+/* enum ice_aqc_lut_flags combines constants used to fill
+ * &ice_aqc_get_set_rss_lut ::flags, which is an amalgamation of global LUT ID,
+ * LUT size and LUT type, last of which does not need neither shift nor mask.
+ */
+enum ice_aqc_lut_flags {
+	ICE_AQC_LUT_SIZE_SMALL = 0, /* size = 64 or 128 */
+	ICE_AQC_LUT_SIZE_512 = BIT(2),
+	ICE_AQC_LUT_SIZE_2K = BIT(3),
+
+	ICE_AQC_LUT_GLOBAL_IDX = GENMASK(7, 4),
+};
+
+/* Get/Set RSS LUT (indirect 0x0B05/0x0B03) */
+struct ice_aqc_get_set_rss_lut {
+	__le16 vsi_id;
+	__le16 flags;
+	__le32 reserved;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Sideband Control Interface Commands */
+/* Neighbor Device Request (indirect 0x0C00); also used for the response. */
+struct ice_aqc_neigh_dev_req {
+	__le16 sb_data_len;
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Add Tx LAN Queues (indirect 0x0C30) */
+struct ice_aqc_add_txqs {
+	u8 num_qgrps;
+	u8 reserved[3];
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the descriptor of each queue entry for the Add Tx LAN Queues
+ * command (0x0C30). Only used within struct ice_aqc_add_tx_qgrp.
+ */
+struct ice_aqc_add_txqs_perq {
+	__le16 txq_id;
+	u8 rsvd[2];
+	__le32 q_teid;
+	u8 txq_ctx[22];
+	u8 rsvd2[2];
+	struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add Tx LAN Queues (0x0C30)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_tx_qgrp is variable due
+ * to the variable number of queues in each group!
+ */
+struct ice_aqc_add_tx_qgrp {
+	__le32 parent_teid;
+	u8 num_txqs;
+	u8 rsvd[3];
+	struct ice_aqc_add_txqs_perq txqs[];
+};
+
+/* Disable Tx LAN Queues (indirect 0x0C31) */
+struct ice_aqc_dis_txqs {
+	u8 cmd_type;
+#define ICE_AQC_Q_DIS_CMD_S		0
+#define ICE_AQC_Q_DIS_CMD_M		(0x3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_NO_FUNC_RESET	(0 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VM_RESET	BIT(ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_VF_RESET	(2 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_PF_RESET	(3 << ICE_AQC_Q_DIS_CMD_S)
+#define ICE_AQC_Q_DIS_CMD_SUBSEQ_CALL	BIT(2)
+#define ICE_AQC_Q_DIS_CMD_FLUSH_PIPE	BIT(3)
+	u8 num_entries;
+	__le16 vmvf_and_timeout;
+#define ICE_AQC_Q_DIS_VMVF_NUM_S	0
+#define ICE_AQC_Q_DIS_VMVF_NUM_M	(0x3FF << ICE_AQC_Q_DIS_VMVF_NUM_S)
+#define ICE_AQC_Q_DIS_TIMEOUT_S		10
+#define ICE_AQC_Q_DIS_TIMEOUT_M		(0x3F << ICE_AQC_Q_DIS_TIMEOUT_S)
+	__le32 blocked_cgds;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* The buffer for Disable Tx LAN Queues (indirect 0x0C31)
+ * contains the following structures, arrayed one after the
+ * other.
+ * Note: Since the q_id is 16 bits wide, if the
+ * number of queues is even, then 2 bytes of alignment MUST be
+ * added before the start of the next group, to allow correct
+ * alignment of the parent_teid field.
+ */
+struct ice_aqc_dis_txq_item {
+	__le32 parent_teid;
+	u8 num_qs;
+	u8 rsvd;
+	/* The length of the q_id array varies according to num_qs */
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S		15
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_LAN_Q	\
+			(0 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+#define ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET	\
+			(1 << ICE_AQC_Q_DIS_BUF_ELEM_TYPE_S)
+	__le16 q_id[];
+} __packed;
+
+/* Move/Reconfigure Tx queue (indirect 0x0C32) */
+struct ice_aqc_cfg_txqs {
+	u8 cmd_type;
+#define ICE_AQC_Q_CFG_MOVE_NODE		0x1
+#define ICE_AQC_Q_CFG_TC_CHNG		0x2
+#define ICE_AQC_Q_CFG_MOVE_TC_CHNG	0x3
+#define ICE_AQC_Q_CFG_SUBSEQ_CALL	BIT(2)
+#define ICE_AQC_Q_CFG_FLUSH		BIT(3)
+	u8 num_qs;
+	u8 port_num_chng;
+#define ICE_AQC_Q_CFG_SRC_PRT_M		0x7
+#define ICE_AQC_Q_CFG_DST_PRT_S		3
+#define ICE_AQC_Q_CFG_DST_PRT_M		(0x7 << ICE_AQC_Q_CFG_DST_PRT_S)
+	u8 time_out;
+#define ICE_AQC_Q_CFG_TIMEOUT_S		2
+#define ICE_AQC_Q_CFG_TIMEOUT_M		(0x1F << ICE_AQC_Q_CFG_TIMEOUT_S)
+	__le32 blocked_cgds;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Per Q struct for Move/Reconfigure Tx LAN Queues (indirect 0x0C32) */
+struct ice_aqc_cfg_txq_perq {
+	__le16 q_handle;
+	u8 tc;
+	u8 rsvd;
+	__le32 q_teid;
+};
+
+/* The buffer for Move/Reconfigure Tx LAN Queues (indirect 0x0C32) */
+struct ice_aqc_cfg_txqs_buf {
+	__le32 src_parent_teid;
+	__le32 dst_parent_teid;
+	struct ice_aqc_cfg_txq_perq queue_info[];
+};
+
+/* Add Tx RDMA Queue Set (indirect 0x0C33) */
+struct ice_aqc_add_rdma_qset {
+	u8 num_qset_grps;
+	u8 reserved[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* This is the descriptor of each Qset entry for the Add Tx RDMA Queue Set
+ * command (0x0C33). Only used within struct ice_aqc_add_rdma_qset.
+ */
+struct ice_aqc_add_tx_rdma_qset_entry {
+	__le16 tx_qset_id;
+	u8 rsvd[2];
+	__le32 qset_teid;
+	struct ice_aqc_txsched_elem info;
+};
+
+/* The format of the command buffer for Add Tx RDMA Queue Set(0x0C33)
+ * is an array of the following structs. Please note that the length of
+ * each struct ice_aqc_add_rdma_qset is variable due to the variable
+ * number of queues in each group!
+ */
+struct ice_aqc_add_rdma_qset_data {
+	__le32 parent_teid;
+	__le16 num_qsets;
+	u8 rsvd[2];
+	struct ice_aqc_add_tx_rdma_qset_entry rdma_qsets[];
+};
+
+/* Configure Firmware Logging Command (indirect 0xFF09)
+ * Logging Information Read Response (indirect 0xFF10)
+ * Note: The 0xFF10 command has no input parameters.
+ */
+struct ice_aqc_fw_logging {
+	u8 log_ctrl;
+#define ICE_AQC_FW_LOG_AQ_EN		BIT(0)
+#define ICE_AQC_FW_LOG_UART_EN		BIT(1)
+	u8 rsvd0;
+	u8 log_ctrl_valid; /* Not used by 0xFF10 Response */
+#define ICE_AQC_FW_LOG_AQ_VALID		BIT(0)
+#define ICE_AQC_FW_LOG_UART_VALID	BIT(1)
+	u8 rsvd1[5];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+enum ice_aqc_fw_logging_mod {
+	ICE_AQC_FW_LOG_ID_GENERAL = 0,
+	ICE_AQC_FW_LOG_ID_CTRL,
+	ICE_AQC_FW_LOG_ID_LINK,
+	ICE_AQC_FW_LOG_ID_LINK_TOPO,
+	ICE_AQC_FW_LOG_ID_DNL,
+	ICE_AQC_FW_LOG_ID_I2C,
+	ICE_AQC_FW_LOG_ID_SDP,
+	ICE_AQC_FW_LOG_ID_MDIO,
+	ICE_AQC_FW_LOG_ID_ADMINQ,
+	ICE_AQC_FW_LOG_ID_HDMA,
+	ICE_AQC_FW_LOG_ID_LLDP,
+	ICE_AQC_FW_LOG_ID_DCBX,
+	ICE_AQC_FW_LOG_ID_DCB,
+	ICE_AQC_FW_LOG_ID_NETPROXY,
+	ICE_AQC_FW_LOG_ID_NVM,
+	ICE_AQC_FW_LOG_ID_AUTH,
+	ICE_AQC_FW_LOG_ID_VPD,
+	ICE_AQC_FW_LOG_ID_IOSF,
+	ICE_AQC_FW_LOG_ID_PARSER,
+	ICE_AQC_FW_LOG_ID_SW,
+	ICE_AQC_FW_LOG_ID_SCHEDULER,
+	ICE_AQC_FW_LOG_ID_TXQ,
+	ICE_AQC_FW_LOG_ID_RSVD,
+	ICE_AQC_FW_LOG_ID_POST,
+	ICE_AQC_FW_LOG_ID_WATCHDOG,
+	ICE_AQC_FW_LOG_ID_TASK_DISPATCH,
+	ICE_AQC_FW_LOG_ID_MNG,
+	ICE_AQC_FW_LOG_ID_MAX,
+};
+
+/* Defines for both above FW logging command/response buffers */
+#define ICE_AQC_FW_LOG_ID_S		0
+#define ICE_AQC_FW_LOG_ID_M		(0xFFF << ICE_AQC_FW_LOG_ID_S)
+
+#define ICE_AQC_FW_LOG_CONF_SUCCESS	0	/* Used by response */
+#define ICE_AQC_FW_LOG_CONF_BAD_INDX	BIT(12)	/* Used by response */
+
+#define ICE_AQC_FW_LOG_EN_S		12
+#define ICE_AQC_FW_LOG_EN_M		(0xF << ICE_AQC_FW_LOG_EN_S)
+#define ICE_AQC_FW_LOG_INFO_EN		BIT(12)	/* Used by command */
+#define ICE_AQC_FW_LOG_INIT_EN		BIT(13)	/* Used by command */
+#define ICE_AQC_FW_LOG_FLOW_EN		BIT(14)	/* Used by command */
+#define ICE_AQC_FW_LOG_ERR_EN		BIT(15)	/* Used by command */
+
+/* Get/Clear FW Log (indirect 0xFF11) */
+struct ice_aqc_get_clear_fw_log {
+	u8 flags;
+#define ICE_AQC_FW_LOG_CLEAR		BIT(0)
+#define ICE_AQC_FW_LOG_MORE_DATA_AVAIL	BIT(1)
+	u8 rsvd1[7];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Download Package (indirect 0x0C40) */
+/* Also used for Update Package (indirect 0x0C41 and 0x0C42) */
+struct ice_aqc_download_pkg {
+	u8 flags;
+#define ICE_AQC_DOWNLOAD_PKG_LAST_BUF	0x01
+	u8 reserved[3];
+	__le32 reserved1;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_aqc_download_pkg_resp {
+	__le32 error_offset;
+	__le32 error_info;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Get Package Info List (indirect 0x0C43) */
+struct ice_aqc_get_pkg_info_list {
+	__le32 reserved1;
+	__le32 reserved2;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+/* Version format for packages */
+struct ice_pkg_ver {
+	u8 major;
+	u8 minor;
+	u8 update;
+	u8 draft;
+};
+
+#define ICE_PKG_NAME_SIZE	32
+#define ICE_SEG_ID_SIZE		28
+#define ICE_SEG_NAME_SIZE	28
+
+struct ice_aqc_get_pkg_info {
+	struct ice_pkg_ver ver;
+	char name[ICE_SEG_NAME_SIZE];
+	__le32 track_id;
+	u8 is_in_nvm;
+	u8 is_active;
+	u8 is_active_at_boot;
+	u8 is_modified;
+};
+
+/* Get Package Info List response buffer format (0x0C43) */
+struct ice_aqc_get_pkg_info_resp {
+	__le32 count;
+	struct ice_aqc_get_pkg_info pkg_info[];
+};
+
+/* Driver Shared Parameters (direct, 0x0C90) */
+struct ice_aqc_driver_shared_params {
+	u8 set_or_get_op;
+#define ICE_AQC_DRIVER_PARAM_OP_MASK		BIT(0)
+#define ICE_AQC_DRIVER_PARAM_SET		0
+#define ICE_AQC_DRIVER_PARAM_GET		1
+	u8 param_indx;
+#define ICE_AQC_DRIVER_PARAM_MAX_IDX		15
+	u8 rsvd[2];
+	__le32 param_val;
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+enum ice_aqc_driver_params {
+	/* OS clock index for PTP timer Domain 0 */
+	ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0 = 0,
+	/* OS clock index for PTP timer Domain 1 */
+	ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1,
+
+	/* Add new parameters above */
+	ICE_AQC_DRIVER_PARAM_MAX = 16,
+};
+
+/* Lan Queue Overflow Event (direct, 0x1001) */
+struct ice_aqc_event_lan_overflow {
+	__le32 prtdcb_ruptq;
+	__le32 qtx_ctl;
+	u8 reserved[8];
+};
+
+/**
+ * struct ice_aq_desc - Admin Queue (AQ) descriptor
+ * @flags: ICE_AQ_FLAG_* flags
+ * @opcode: AQ command opcode
+ * @datalen: length in bytes of indirect/external data buffer
+ * @retval: return value from firmware
+ * @cookie_high: opaque data high-half
+ * @cookie_low: opaque data low-half
+ * @params: command-specific parameters
+ *
+ * Descriptor format for commands the driver posts on the Admin Transmit Queue
+ * (ATQ). The firmware writes back onto the command descriptor and returns
+ * the result of the command. Asynchronous events that are not an immediate
+ * result of the command are written to the Admin Receive Queue (ARQ) using
+ * the same descriptor format. Descriptors are in little-endian notation with
+ * 32-bit words.
+ */
+struct ice_aq_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 retval;
+	__le32 cookie_high;
+	__le32 cookie_low;
+	union {
+		u8 raw[16];
+		struct ice_aqc_generic generic;
+		struct ice_aqc_get_ver get_ver;
+		struct ice_aqc_driver_ver driver_ver;
+		struct ice_aqc_q_shutdown q_shutdown;
+		struct ice_aqc_req_res res_owner;
+		struct ice_aqc_manage_mac_read mac_read;
+		struct ice_aqc_manage_mac_write mac_write;
+		struct ice_aqc_clear_pxe clear_pxe;
+		struct ice_aqc_list_caps get_cap;
+		struct ice_aqc_get_phy_caps get_phy;
+		struct ice_aqc_set_phy_cfg set_phy;
+		struct ice_aqc_restart_an restart_an;
+		struct ice_aqc_gpio read_write_gpio;
+		struct ice_aqc_sff_eeprom read_write_sff_param;
+		struct ice_aqc_set_port_id_led set_port_id_led;
+		struct ice_aqc_get_port_options get_port_options;
+		struct ice_aqc_set_port_option set_port_option;
+		struct ice_aqc_get_sw_cfg get_sw_conf;
+		struct ice_aqc_set_port_params set_port_params;
+		struct ice_aqc_sw_rules sw_rules;
+		struct ice_aqc_add_get_recipe add_get_recipe;
+		struct ice_aqc_recipe_to_profile recipe_to_profile;
+		struct ice_aqc_get_topo get_topo;
+		struct ice_aqc_sched_elem_cmd sched_elem_cmd;
+		struct ice_aqc_query_txsched_res query_sched_res;
+		struct ice_aqc_query_port_ets port_ets;
+		struct ice_aqc_rl_profile rl_profile;
+		struct ice_aqc_nvm nvm;
+		struct ice_aqc_nvm_checksum nvm_checksum;
+		struct ice_aqc_nvm_pkg_data pkg_data;
+		struct ice_aqc_nvm_pass_comp_tbl pass_comp_tbl;
+		struct ice_aqc_pf_vf_msg virt;
+		struct ice_aqc_set_query_pfc_mode set_query_pfc_mode;
+		struct ice_aqc_lldp_get_mib lldp_get_mib;
+		struct ice_aqc_lldp_set_mib_change lldp_set_event;
+		struct ice_aqc_lldp_stop lldp_stop;
+		struct ice_aqc_lldp_start lldp_start;
+		struct ice_aqc_lldp_set_local_mib lldp_set_mib;
+		struct ice_aqc_lldp_stop_start_specific_agent lldp_agent_ctrl;
+		struct ice_aqc_lldp_filter_ctrl lldp_filter_ctrl;
+		struct ice_aqc_get_set_rss_lut get_set_rss_lut;
+		struct ice_aqc_get_set_rss_key get_set_rss_key;
+		struct ice_aqc_neigh_dev_req neigh_dev;
+		struct ice_aqc_add_txqs add_txqs;
+		struct ice_aqc_dis_txqs dis_txqs;
+		struct ice_aqc_cfg_txqs cfg_txqs;
+		struct ice_aqc_add_rdma_qset add_rdma_qset;
+		struct ice_aqc_add_get_update_free_vsi vsi_cmd;
+		struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
+		struct ice_aqc_fw_logging fw_logging;
+		struct ice_aqc_get_clear_fw_log get_clear_fw_log;
+		struct ice_aqc_download_pkg download_pkg;
+		struct ice_aqc_driver_shared_params drv_shared_params;
+		struct ice_aqc_set_mac_lb set_mac_lb;
+		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
+		struct ice_aqc_set_mac_cfg set_mac_cfg;
+		struct ice_aqc_set_event_mask set_event_mask;
+		struct ice_aqc_get_link_status get_link_status;
+		struct ice_aqc_event_lan_overflow lan_overflow;
+		struct ice_aqc_get_link_topo get_link_topo;
+		struct ice_aqc_i2c read_write_i2c;
+		struct ice_aqc_read_i2c_resp read_i2c_resp;
+	} params;
+};
+
+/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */
+#define ICE_AQ_LG_BUF	512
+
+#define ICE_AQ_FLAG_ERR_S	2
+#define ICE_AQ_FLAG_LB_S	9
+#define ICE_AQ_FLAG_RD_S	10
+#define ICE_AQ_FLAG_BUF_S	12
+#define ICE_AQ_FLAG_SI_S	13
+
+#define ICE_AQ_FLAG_ERR		BIT(ICE_AQ_FLAG_ERR_S) /* 0x4    */
+#define ICE_AQ_FLAG_LB		BIT(ICE_AQ_FLAG_LB_S)  /* 0x200  */
+#define ICE_AQ_FLAG_RD		BIT(ICE_AQ_FLAG_RD_S)  /* 0x400  */
+#define ICE_AQ_FLAG_BUF		BIT(ICE_AQ_FLAG_BUF_S) /* 0x1000 */
+#define ICE_AQ_FLAG_SI		BIT(ICE_AQ_FLAG_SI_S)  /* 0x2000 */
+
+/* error codes */
+enum ice_aq_err {
+	ICE_AQ_RC_OK		= 0,  /* Success */
+	ICE_AQ_RC_EPERM		= 1,  /* Operation not permitted */
+	ICE_AQ_RC_ENOENT	= 2,  /* No such element */
+	ICE_AQ_RC_ENOMEM	= 9,  /* Out of memory */
+	ICE_AQ_RC_EBUSY		= 12, /* Device or resource busy */
+	ICE_AQ_RC_EEXIST	= 13, /* Object already exists */
+	ICE_AQ_RC_EINVAL	= 14, /* Invalid argument */
+	ICE_AQ_RC_ENOSPC	= 16, /* No space left or allocation failure */
+	ICE_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	ICE_AQ_RC_EMODE		= 21, /* Op not allowed in current dev mode */
+	ICE_AQ_RC_ENOSEC	= 24, /* Missing security manifest */
+	ICE_AQ_RC_EBADSIG	= 25, /* Bad RSA signature */
+	ICE_AQ_RC_ESVN		= 26, /* SVN number prohibits this package */
+	ICE_AQ_RC_EBADMAN	= 27, /* Manifest hash mismatch */
+	ICE_AQ_RC_EBADBUF	= 28, /* Buffer hash mismatches manifest */
+};
+
+/* Admin Queue command opcodes */
+enum ice_adminq_opc {
+	/* AQ commands */
+	ice_aqc_opc_get_ver				= 0x0001,
+	ice_aqc_opc_driver_ver				= 0x0002,
+	ice_aqc_opc_q_shutdown				= 0x0003,
+
+	/* resource ownership */
+	ice_aqc_opc_req_res				= 0x0008,
+	ice_aqc_opc_release_res				= 0x0009,
+
+	/* device/function capabilities */
+	ice_aqc_opc_list_func_caps			= 0x000A,
+	ice_aqc_opc_list_dev_caps			= 0x000B,
+
+	/* manage MAC address */
+	ice_aqc_opc_manage_mac_read			= 0x0107,
+	ice_aqc_opc_manage_mac_write			= 0x0108,
+
+	/* PXE */
+	ice_aqc_opc_clear_pxe_mode			= 0x0110,
+
+	/* internal switch commands */
+	ice_aqc_opc_get_sw_cfg				= 0x0200,
+	ice_aqc_opc_set_port_params			= 0x0203,
+
+	/* Alloc/Free/Get Resources */
+	ice_aqc_opc_alloc_res				= 0x0208,
+	ice_aqc_opc_free_res				= 0x0209,
+	ice_aqc_opc_share_res				= 0x020B,
+	ice_aqc_opc_set_vlan_mode_parameters		= 0x020C,
+	ice_aqc_opc_get_vlan_mode_parameters		= 0x020D,
+
+	/* VSI commands */
+	ice_aqc_opc_add_vsi				= 0x0210,
+	ice_aqc_opc_update_vsi				= 0x0211,
+	ice_aqc_opc_free_vsi				= 0x0213,
+
+	/* recipe commands */
+	ice_aqc_opc_add_recipe				= 0x0290,
+	ice_aqc_opc_recipe_to_profile			= 0x0291,
+	ice_aqc_opc_get_recipe				= 0x0292,
+	ice_aqc_opc_get_recipe_to_profile		= 0x0293,
+
+	/* switch rules population commands */
+	ice_aqc_opc_add_sw_rules			= 0x02A0,
+	ice_aqc_opc_update_sw_rules			= 0x02A1,
+	ice_aqc_opc_remove_sw_rules			= 0x02A2,
+
+	ice_aqc_opc_clear_pf_cfg			= 0x02A4,
+
+	/* DCB commands */
+	ice_aqc_opc_query_pfc_mode			= 0x0302,
+	ice_aqc_opc_set_pfc_mode			= 0x0303,
+
+	/* transmit scheduler commands */
+	ice_aqc_opc_get_dflt_topo			= 0x0400,
+	ice_aqc_opc_add_sched_elems			= 0x0401,
+	ice_aqc_opc_cfg_sched_elems			= 0x0403,
+	ice_aqc_opc_get_sched_elems			= 0x0404,
+	ice_aqc_opc_move_sched_elems			= 0x0408,
+	ice_aqc_opc_suspend_sched_elems			= 0x0409,
+	ice_aqc_opc_resume_sched_elems			= 0x040A,
+	ice_aqc_opc_query_port_ets			= 0x040E,
+	ice_aqc_opc_delete_sched_elems			= 0x040F,
+	ice_aqc_opc_add_rl_profiles			= 0x0410,
+	ice_aqc_opc_query_sched_res			= 0x0412,
+	ice_aqc_opc_remove_rl_profiles			= 0x0415,
+
+	/* PHY commands */
+	ice_aqc_opc_get_phy_caps			= 0x0600,
+	ice_aqc_opc_set_phy_cfg				= 0x0601,
+	ice_aqc_opc_set_mac_cfg				= 0x0603,
+	ice_aqc_opc_restart_an				= 0x0605,
+	ice_aqc_opc_get_link_status			= 0x0607,
+	ice_aqc_opc_set_event_mask			= 0x0613,
+	ice_aqc_opc_set_mac_lb				= 0x0620,
+	ice_aqc_opc_get_link_topo			= 0x06E0,
+	ice_aqc_opc_read_i2c				= 0x06E2,
+	ice_aqc_opc_write_i2c				= 0x06E3,
+	ice_aqc_opc_set_port_id_led			= 0x06E9,
+	ice_aqc_opc_get_port_options			= 0x06EA,
+	ice_aqc_opc_set_port_option			= 0x06EB,
+	ice_aqc_opc_set_gpio				= 0x06EC,
+	ice_aqc_opc_get_gpio				= 0x06ED,
+	ice_aqc_opc_sff_eeprom				= 0x06EE,
+
+	/* NVM commands */
+	ice_aqc_opc_nvm_read				= 0x0701,
+	ice_aqc_opc_nvm_erase				= 0x0702,
+	ice_aqc_opc_nvm_write				= 0x0703,
+	ice_aqc_opc_nvm_checksum			= 0x0706,
+	ice_aqc_opc_nvm_write_activate			= 0x0707,
+	ice_aqc_opc_nvm_update_empr			= 0x0709,
+	ice_aqc_opc_nvm_pkg_data			= 0x070A,
+	ice_aqc_opc_nvm_pass_component_tbl		= 0x070B,
+
+	/* PF/VF mailbox commands */
+	ice_mbx_opc_send_msg_to_pf			= 0x0801,
+	ice_mbx_opc_send_msg_to_vf			= 0x0802,
+	/* LLDP commands */
+	ice_aqc_opc_lldp_get_mib			= 0x0A00,
+	ice_aqc_opc_lldp_set_mib_change			= 0x0A01,
+	ice_aqc_opc_lldp_stop				= 0x0A05,
+	ice_aqc_opc_lldp_start				= 0x0A06,
+	ice_aqc_opc_get_cee_dcb_cfg			= 0x0A07,
+	ice_aqc_opc_lldp_set_local_mib			= 0x0A08,
+	ice_aqc_opc_lldp_stop_start_specific_agent	= 0x0A09,
+	ice_aqc_opc_lldp_filter_ctrl			= 0x0A0A,
+	ice_aqc_opc_lldp_execute_pending_mib		= 0x0A0B,
+
+	/* RSS commands */
+	ice_aqc_opc_set_rss_key				= 0x0B02,
+	ice_aqc_opc_set_rss_lut				= 0x0B03,
+	ice_aqc_opc_get_rss_key				= 0x0B04,
+	ice_aqc_opc_get_rss_lut				= 0x0B05,
+
+	/* Sideband Control Interface commands */
+	ice_aqc_opc_neighbour_device_request		= 0x0C00,
+
+	/* Tx queue handling commands/events */
+	ice_aqc_opc_add_txqs				= 0x0C30,
+	ice_aqc_opc_dis_txqs				= 0x0C31,
+	ice_aqc_opc_cfg_txqs				= 0x0C32,
+	ice_aqc_opc_add_rdma_qset			= 0x0C33,
+
+	/* package commands */
+	ice_aqc_opc_download_pkg			= 0x0C40,
+	ice_aqc_opc_upload_section			= 0x0C41,
+	ice_aqc_opc_update_pkg				= 0x0C42,
+	ice_aqc_opc_get_pkg_info_list			= 0x0C43,
+
+	ice_aqc_opc_driver_shared_params		= 0x0C90,
+
+	/* Standalone Commands/Events */
+	ice_aqc_opc_event_lan_overflow			= 0x1001,
+
+	/* debug commands */
+	ice_aqc_opc_fw_logging				= 0xFF09,
+	ice_aqc_opc_fw_logging_info			= 0xFF10,
+};
+
+#endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
new file mode 100644
index 0000000000..cca0e753f3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+
+/**
+ * ice_is_arfs_active - helper to check is aRFS is active
+ * @vsi: VSI to check
+ */
+static bool ice_is_arfs_active(struct ice_vsi *vsi)
+{
+	return !!vsi->arfs_fltr_list;
+}
+
+/**
+ * ice_is_arfs_using_perfect_flow - check if aRFS has active perfect filters
+ * @hw: pointer to the HW structure
+ * @flow_type: flow type as Flow Director understands it
+ *
+ * Flow Director will query this function to see if aRFS is currently using
+ * the specified flow_type for perfect (4-tuple) filters.
+ */
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw, enum ice_fltr_ptype flow_type)
+{
+	struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+	struct ice_pf *pf = hw->back;
+	struct ice_vsi *vsi;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return false;
+
+	arfs_fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+	/* active counters can be updated by multiple CPUs */
+	smp_mb__before_atomic();
+	switch (flow_type) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		return atomic_read(&arfs_fltr_cntrs->active_udpv4_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		return atomic_read(&arfs_fltr_cntrs->active_udpv6_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		return atomic_read(&arfs_fltr_cntrs->active_tcpv4_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		return atomic_read(&arfs_fltr_cntrs->active_tcpv6_cnt) > 0;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_arfs_update_active_fltr_cntrs - update active filter counters for aRFS
+ * @vsi: VSI that aRFS is active on
+ * @entry: aRFS entry used to change counters
+ * @add: true to increment counter, false to decrement
+ */
+static void
+ice_arfs_update_active_fltr_cntrs(struct ice_vsi *vsi,
+				  struct ice_arfs_entry *entry, bool add)
+{
+	struct ice_arfs_active_fltr_cntrs *fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+	switch (entry->fltr_info.flow_type) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_tcpv4_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_tcpv4_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_tcpv6_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_tcpv6_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_udpv4_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_udpv4_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_udpv6_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_udpv6_cnt);
+		break;
+	default:
+		dev_err(ice_pf_to_dev(vsi->back), "aRFS: Failed to update filter counters, invalid filter type %d\n",
+			entry->fltr_info.flow_type);
+	}
+}
+
+/**
+ * ice_arfs_del_flow_rules - delete the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be deleted
+ * @del_list_head: head of the list of ice_arfs_entry(s) for rule deletion
+ *
+ * Loop through the delete list passed in and remove the rules from HW. After
+ * each rule is deleted, disconnect and free the ice_arfs_entry because it is no
+ * longer being referenced by the aRFS hash table.
+ */
+static void
+ice_arfs_del_flow_rules(struct ice_vsi *vsi, struct hlist_head *del_list_head)
+{
+	struct ice_arfs_entry *e;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	hlist_for_each_entry_safe(e, n, del_list_head, list_entry) {
+		int result;
+
+		result = ice_fdir_write_fltr(vsi->back, &e->fltr_info, false,
+					     false);
+		if (!result)
+			ice_arfs_update_active_fltr_cntrs(vsi, e, false);
+		else
+			dev_dbg(dev, "Unable to delete aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+				result, e->fltr_state, e->fltr_info.fltr_id,
+				e->flow_id, e->fltr_info.q_index);
+
+		/* The aRFS hash table is no longer referencing this entry */
+		hlist_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_arfs_add_flow_rules - add the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be added
+ * @add_list_head: head of the list of ice_arfs_entry_ptr(s) for rule addition
+ *
+ * Loop through the add list passed in and remove the rules from HW. After each
+ * rule is added, disconnect and free the ice_arfs_entry_ptr node. Don't free
+ * the ice_arfs_entry(s) because they are still being referenced in the aRFS
+ * hash table.
+ */
+static void
+ice_arfs_add_flow_rules(struct ice_vsi *vsi, struct hlist_head *add_list_head)
+{
+	struct ice_arfs_entry_ptr *ep;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	hlist_for_each_entry_safe(ep, n, add_list_head, list_entry) {
+		int result;
+
+		result = ice_fdir_write_fltr(vsi->back,
+					     &ep->arfs_entry->fltr_info, true,
+					     false);
+		if (!result)
+			ice_arfs_update_active_fltr_cntrs(vsi, ep->arfs_entry,
+							  true);
+		else
+			dev_dbg(dev, "Unable to add aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+				result, ep->arfs_entry->fltr_state,
+				ep->arfs_entry->fltr_info.fltr_id,
+				ep->arfs_entry->flow_id,
+				ep->arfs_entry->fltr_info.q_index);
+
+		hlist_del(&ep->list_entry);
+		devm_kfree(dev, ep);
+	}
+}
+
+/**
+ * ice_arfs_is_flow_expired - check if the aRFS entry has expired
+ * @vsi: VSI containing the aRFS entry
+ * @arfs_entry: aRFS entry that's being checked for expiration
+ *
+ * Return true if the flow has expired, else false. This function should be used
+ * to determine whether or not an aRFS entry should be removed from the hardware
+ * and software structures.
+ */
+static bool
+ice_arfs_is_flow_expired(struct ice_vsi *vsi, struct ice_arfs_entry *arfs_entry)
+{
+#define ICE_ARFS_TIME_DELTA_EXPIRATION	msecs_to_jiffies(5000)
+	if (rps_may_expire_flow(vsi->netdev, arfs_entry->fltr_info.q_index,
+				arfs_entry->flow_id,
+				arfs_entry->fltr_info.fltr_id))
+		return true;
+
+	/* expiration timer only used for UDP filters */
+	if (arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV4_UDP &&
+	    arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		return false;
+
+	return time_in_range64(arfs_entry->time_activated +
+			       ICE_ARFS_TIME_DELTA_EXPIRATION,
+			       arfs_entry->time_activated, get_jiffies_64());
+}
+
+/**
+ * ice_arfs_update_flow_rules - add/delete aRFS rules in HW
+ * @vsi: the VSI to be forwarded to
+ * @idx: index into the table of aRFS filter lists. Obtained from skb->hash
+ * @add_list: list to populate with filters to be added to Flow Director
+ * @del_list: list to populate with filters to be deleted from Flow Director
+ *
+ * Iterate over the hlist at the index given in the aRFS hash table and
+ * determine if there are any aRFS entries that need to be either added or
+ * deleted in the HW. If the aRFS entry is marked as ICE_ARFS_INACTIVE the
+ * filter needs to be added to HW, else if it's marked as ICE_ARFS_ACTIVE and
+ * the flow has expired delete the filter from HW. The caller of this function
+ * is expected to add/delete rules on the add_list/del_list respectively.
+ */
+static void
+ice_arfs_update_flow_rules(struct ice_vsi *vsi, u16 idx,
+			   struct hlist_head *add_list,
+			   struct hlist_head *del_list)
+{
+	struct ice_arfs_entry *e;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	/* go through the aRFS hlist at this idx and check for needed updates */
+	hlist_for_each_entry_safe(e, n, &vsi->arfs_fltr_list[idx], list_entry)
+		/* check if filter needs to be added to HW */
+		if (e->fltr_state == ICE_ARFS_INACTIVE) {
+			enum ice_fltr_ptype flow_type = e->fltr_info.flow_type;
+			struct ice_arfs_entry_ptr *ep =
+				devm_kzalloc(dev, sizeof(*ep), GFP_ATOMIC);
+
+			if (!ep)
+				continue;
+			INIT_HLIST_NODE(&ep->list_entry);
+			/* reference aRFS entry to add HW filter */
+			ep->arfs_entry = e;
+			hlist_add_head(&ep->list_entry, add_list);
+			e->fltr_state = ICE_ARFS_ACTIVE;
+			/* expiration timer only used for UDP flows */
+			if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+			    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+				e->time_activated = get_jiffies_64();
+		} else if (e->fltr_state == ICE_ARFS_ACTIVE) {
+			/* check if filter needs to be removed from HW */
+			if (ice_arfs_is_flow_expired(vsi, e)) {
+				/* remove aRFS entry from hash table for delete
+				 * and to prevent referencing it the next time
+				 * through this hlist index
+				 */
+				hlist_del(&e->list_entry);
+				e->fltr_state = ICE_ARFS_TODEL;
+				/* save reference to aRFS entry for delete */
+				hlist_add_head(&e->list_entry, del_list);
+			}
+		}
+}
+
+/**
+ * ice_sync_arfs_fltrs - update all aRFS filters
+ * @pf: board private structure
+ */
+void ice_sync_arfs_fltrs(struct ice_pf *pf)
+{
+	HLIST_HEAD(tmp_del_list);
+	HLIST_HEAD(tmp_add_list);
+	struct ice_vsi *pf_vsi;
+	unsigned int i;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	if (!ice_is_arfs_active(pf_vsi))
+		return;
+
+	spin_lock_bh(&pf_vsi->arfs_lock);
+	/* Once we process aRFS for the PF VSI get out */
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+		ice_arfs_update_flow_rules(pf_vsi, i, &tmp_add_list,
+					   &tmp_del_list);
+	spin_unlock_bh(&pf_vsi->arfs_lock);
+
+	/* use list of ice_arfs_entry(s) for delete */
+	ice_arfs_del_flow_rules(pf_vsi, &tmp_del_list);
+
+	/* use list of ice_arfs_entry_ptr(s) for add */
+	ice_arfs_add_flow_rules(pf_vsi, &tmp_add_list);
+}
+
+/**
+ * ice_arfs_build_entry - builds an aRFS entry based on input
+ * @vsi: destination VSI for this flow
+ * @fk: flow dissector keys for creating the tuple
+ * @rxq_idx: Rx queue to steer this flow to
+ * @flow_id: passed down from the stack and saved for flow expiration
+ *
+ * returns an aRFS entry on success and NULL on failure
+ */
+static struct ice_arfs_entry *
+ice_arfs_build_entry(struct ice_vsi *vsi, const struct flow_keys *fk,
+		     u16 rxq_idx, u32 flow_id)
+{
+	struct ice_arfs_entry *arfs_entry;
+	struct ice_fdir_fltr *fltr_info;
+	u8 ip_proto;
+
+	arfs_entry = devm_kzalloc(ice_pf_to_dev(vsi->back),
+				  sizeof(*arfs_entry),
+				  GFP_ATOMIC | __GFP_NOWARN);
+	if (!arfs_entry)
+		return NULL;
+
+	fltr_info = &arfs_entry->fltr_info;
+	fltr_info->q_index = rxq_idx;
+	fltr_info->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+	fltr_info->dest_vsi = vsi->idx;
+	ip_proto = fk->basic.ip_proto;
+
+	if (fk->basic.n_proto == htons(ETH_P_IP)) {
+		fltr_info->ip.v4.proto = ip_proto;
+		fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+			ICE_FLTR_PTYPE_NONF_IPV4_TCP :
+			ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+		fltr_info->ip.v4.src_ip = fk->addrs.v4addrs.src;
+		fltr_info->ip.v4.dst_ip = fk->addrs.v4addrs.dst;
+		fltr_info->ip.v4.src_port = fk->ports.src;
+		fltr_info->ip.v4.dst_port = fk->ports.dst;
+	} else { /* ETH_P_IPV6 */
+		fltr_info->ip.v6.proto = ip_proto;
+		fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+			ICE_FLTR_PTYPE_NONF_IPV6_TCP :
+			ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+		memcpy(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst,
+		       sizeof(struct in6_addr));
+		fltr_info->ip.v6.src_port = fk->ports.src;
+		fltr_info->ip.v6.dst_port = fk->ports.dst;
+	}
+
+	arfs_entry->flow_id = flow_id;
+	fltr_info->fltr_id =
+		atomic_inc_return(vsi->arfs_last_fltr_id) % RPS_NO_FILTER;
+
+	return arfs_entry;
+}
+
+/**
+ * ice_arfs_is_perfect_flow_set - Check to see if perfect flow is set
+ * @hw: pointer to HW structure
+ * @l3_proto: ETH_P_IP or ETH_P_IPV6 in network order
+ * @l4_proto: IPPROTO_UDP or IPPROTO_TCP
+ *
+ * We only support perfect (4-tuple) filters for aRFS. This function allows aRFS
+ * to check if perfect (4-tuple) flow rules are currently in place by Flow
+ * Director.
+ */
+static bool
+ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto)
+{
+	unsigned long *perfect_fltr = hw->fdir_perfect_fltr;
+
+	/* advanced Flow Director disabled, perfect filters always supported */
+	if (!perfect_fltr)
+		return true;
+
+	if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_UDP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_UDP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_TCP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_TCP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_UDP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_UDP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_TCP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_TCP, perfect_fltr);
+
+	return false;
+}
+
+/**
+ * ice_rx_flow_steer - steer the Rx flow to where application is being run
+ * @netdev: ptr to the netdev being adjusted
+ * @skb: buffer with required header information
+ * @rxq_idx: queue to which the flow needs to move
+ * @flow_id: flow identifier provided by the netdev
+ *
+ * Based on the skb, rxq_idx, and flow_id passed in add/update an entry in the
+ * aRFS hash table. Iterate over one of the hlists in the aRFS hash table and
+ * if the flow_id already exists in the hash table but the rxq_idx has changed
+ * mark the entry as ICE_ARFS_INACTIVE so it can get updated in HW, else
+ * if the entry is marked as ICE_ARFS_TODEL delete it from the aRFS hash table.
+ * If neither of the previous conditions are true then add a new entry in the
+ * aRFS hash table, which gets set to ICE_ARFS_INACTIVE by default so it can be
+ * added to HW.
+ */
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+		  u16 rxq_idx, u32 flow_id)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_arfs_entry *arfs_entry;
+	struct ice_vsi *vsi = np->vsi;
+	struct flow_keys fk;
+	struct ice_pf *pf;
+	__be16 n_proto;
+	u8 ip_proto;
+	u16 idx;
+	int ret;
+
+	/* failed to allocate memory for aRFS so don't crash */
+	if (unlikely(!vsi->arfs_fltr_list))
+		return -ENODEV;
+
+	pf = vsi->back;
+
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+		return -EPROTONOSUPPORT;
+
+	n_proto = fk.basic.n_proto;
+	/* Support only IPV4 and IPV6 */
+	if ((n_proto == htons(ETH_P_IP) && !ip_is_fragment(ip_hdr(skb))) ||
+	    n_proto == htons(ETH_P_IPV6))
+		ip_proto = fk.basic.ip_proto;
+	else
+		return -EPROTONOSUPPORT;
+
+	/* Support only TCP and UDP */
+	if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP)
+		return -EPROTONOSUPPORT;
+
+	/* only support 4-tuple filters for aRFS */
+	if (!ice_arfs_is_perfect_flow_set(&pf->hw, n_proto, ip_proto))
+		return -EOPNOTSUPP;
+
+	/* choose the aRFS list bucket based on skb hash */
+	idx = skb_get_hash_raw(skb) & ICE_ARFS_LST_MASK;
+	/* search for entry in the bucket */
+	spin_lock_bh(&vsi->arfs_lock);
+	hlist_for_each_entry(arfs_entry, &vsi->arfs_fltr_list[idx],
+			     list_entry) {
+		struct ice_fdir_fltr *fltr_info;
+
+		/* keep searching for the already existing arfs_entry flow */
+		if (arfs_entry->flow_id != flow_id)
+			continue;
+
+		fltr_info = &arfs_entry->fltr_info;
+		ret = fltr_info->fltr_id;
+
+		if (fltr_info->q_index == rxq_idx ||
+		    arfs_entry->fltr_state != ICE_ARFS_ACTIVE)
+			goto out;
+
+		/* update the queue to forward to on an already existing flow */
+		fltr_info->q_index = rxq_idx;
+		arfs_entry->fltr_state = ICE_ARFS_INACTIVE;
+		ice_arfs_update_active_fltr_cntrs(vsi, arfs_entry, false);
+		goto out_schedule_service_task;
+	}
+
+	arfs_entry = ice_arfs_build_entry(vsi, &fk, rxq_idx, flow_id);
+	if (!arfs_entry) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = arfs_entry->fltr_info.fltr_id;
+	INIT_HLIST_NODE(&arfs_entry->list_entry);
+	hlist_add_head(&arfs_entry->list_entry, &vsi->arfs_fltr_list[idx]);
+out_schedule_service_task:
+	ice_service_task_schedule(pf);
+out:
+	spin_unlock_bh(&vsi->arfs_lock);
+	return ret;
+}
+
+/**
+ * ice_init_arfs_cntrs - initialize aRFS counter values
+ * @vsi: VSI that aRFS counters need to be initialized on
+ */
+static int ice_init_arfs_cntrs(struct ice_vsi *vsi)
+{
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	vsi->arfs_fltr_cntrs = kzalloc(sizeof(*vsi->arfs_fltr_cntrs),
+				       GFP_KERNEL);
+	if (!vsi->arfs_fltr_cntrs)
+		return -ENOMEM;
+
+	vsi->arfs_last_fltr_id = kzalloc(sizeof(*vsi->arfs_last_fltr_id),
+					 GFP_KERNEL);
+	if (!vsi->arfs_last_fltr_id) {
+		kfree(vsi->arfs_fltr_cntrs);
+		vsi->arfs_fltr_cntrs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_init_arfs - initialize aRFS resources
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_init_arfs(struct ice_vsi *vsi)
+{
+	struct hlist_head *arfs_fltr_list;
+	unsigned int i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return;
+
+	arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list),
+				 GFP_KERNEL);
+	if (!arfs_fltr_list)
+		return;
+
+	if (ice_init_arfs_cntrs(vsi))
+		goto free_arfs_fltr_list;
+
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+		INIT_HLIST_HEAD(&arfs_fltr_list[i]);
+
+	spin_lock_init(&vsi->arfs_lock);
+
+	vsi->arfs_fltr_list = arfs_fltr_list;
+
+	return;
+
+free_arfs_fltr_list:
+	kfree(arfs_fltr_list);
+}
+
+/**
+ * ice_clear_arfs - clear the aRFS hash table and any memory used for aRFS
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_clear_arfs(struct ice_vsi *vsi)
+{
+	struct device *dev;
+	unsigned int i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF || !vsi->back ||
+	    !vsi->arfs_fltr_list)
+		return;
+
+	dev = ice_pf_to_dev(vsi->back);
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++) {
+		struct ice_arfs_entry *r;
+		struct hlist_node *n;
+
+		spin_lock_bh(&vsi->arfs_lock);
+		hlist_for_each_entry_safe(r, n, &vsi->arfs_fltr_list[i],
+					  list_entry) {
+			hlist_del(&r->list_entry);
+			devm_kfree(dev, r);
+		}
+		spin_unlock_bh(&vsi->arfs_lock);
+	}
+
+	kfree(vsi->arfs_fltr_list);
+	vsi->arfs_fltr_list = NULL;
+	kfree(vsi->arfs_last_fltr_id);
+	vsi->arfs_last_fltr_id = NULL;
+	kfree(vsi->arfs_fltr_cntrs);
+	vsi->arfs_fltr_cntrs = NULL;
+}
+
+/**
+ * ice_free_cpu_rx_rmap - free setup CPU reverse map
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+	struct net_device *netdev;
+
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return;
+
+	netdev = vsi->netdev;
+	if (!netdev || !netdev->rx_cpu_rmap)
+		return;
+
+	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+	netdev->rx_cpu_rmap = NULL;
+}
+
+/**
+ * ice_set_cpu_rx_rmap - setup CPU reverse map for each queue
+ * @vsi: the VSI to be forwarded to
+ */
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+	struct net_device *netdev;
+	struct ice_pf *pf;
+	int i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return 0;
+
+	pf = vsi->back;
+	netdev = vsi->netdev;
+	if (!pf || !netdev || !vsi->num_q_vectors)
+		return -EINVAL;
+
+	netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n",
+		   vsi->type, netdev->name, vsi->num_q_vectors);
+
+	netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
+	if (unlikely(!netdev->rx_cpu_rmap))
+		return -EINVAL;
+
+	ice_for_each_q_vector(vsi, i)
+		if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+				     vsi->q_vectors[i]->irq.virq)) {
+			ice_free_cpu_rx_rmap(vsi);
+			return -EINVAL;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_remove_arfs - remove/clear all aRFS resources
+ * @pf: device private structure
+ */
+void ice_remove_arfs(struct ice_pf *pf)
+{
+	struct ice_vsi *pf_vsi;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	ice_clear_arfs(pf_vsi);
+}
+
+/**
+ * ice_rebuild_arfs - remove/clear all aRFS resources and rebuild after reset
+ * @pf: device private structure
+ */
+void ice_rebuild_arfs(struct ice_pf *pf)
+{
+	struct ice_vsi *pf_vsi;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	ice_remove_arfs(pf);
+	ice_init_arfs(pf_vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.h b/drivers/net/ethernet/intel/ice/ice_arfs.h
new file mode 100644
index 0000000000..9669ad9bf7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_ARFS_H_
+#define _ICE_ARFS_H_
+
+#include "ice_fdir.h"
+
+enum ice_arfs_fltr_state {
+	ICE_ARFS_INACTIVE,
+	ICE_ARFS_ACTIVE,
+	ICE_ARFS_TODEL,
+};
+
+struct ice_arfs_entry {
+	struct ice_fdir_fltr fltr_info;
+	struct hlist_node list_entry;
+	u64 time_activated;	/* only valid for UDP flows */
+	u32 flow_id;
+	/* fltr_state = 0 - ICE_ARFS_INACTIVE:
+	 *	filter needs to be updated or programmed in HW.
+	 * fltr_state = 1 - ICE_ARFS_ACTIVE:
+	 *	filter is active and programmed in HW.
+	 * fltr_state = 2 - ICE_ARFS_TODEL:
+	 *	filter has been deleted from HW and needs to be removed from
+	 *	the aRFS hash table.
+	 */
+	u8 fltr_state;
+};
+
+struct ice_arfs_entry_ptr {
+	struct ice_arfs_entry *arfs_entry;
+	struct hlist_node list_entry;
+};
+
+struct ice_arfs_active_fltr_cntrs {
+	atomic_t active_tcpv4_cnt;
+	atomic_t active_tcpv6_cnt;
+	atomic_t active_udpv4_cnt;
+	atomic_t active_udpv6_cnt;
+};
+
+#ifdef CONFIG_RFS_ACCEL
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+		  u16 rxq_idx, u32 flow_id);
+void ice_clear_arfs(struct ice_vsi *vsi);
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_init_arfs(struct ice_vsi *vsi);
+void ice_sync_arfs_fltrs(struct ice_pf *pf);
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_remove_arfs(struct ice_pf *pf);
+void ice_rebuild_arfs(struct ice_pf *pf);
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw,
+			       enum ice_fltr_ptype flow_type);
+#else
+static inline void ice_clear_arfs(struct ice_vsi *vsi) { }
+static inline void ice_free_cpu_rx_rmap(struct ice_vsi *vsi) { }
+static inline void ice_init_arfs(struct ice_vsi *vsi) { }
+static inline void ice_sync_arfs_fltrs(struct ice_pf *pf) { }
+static inline void ice_remove_arfs(struct ice_pf *pf) { }
+static inline void ice_rebuild_arfs(struct ice_pf *pf) { }
+
+static inline int ice_set_cpu_rx_rmap(struct ice_vsi __always_unused *vsi)
+{
+	return 0;
+}
+
+static inline int
+ice_rx_flow_steer(struct net_device __always_unused *netdev,
+		  const struct sk_buff __always_unused *skb,
+		  u16 __always_unused rxq_idx, u32 __always_unused flow_id)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool
+ice_is_arfs_using_perfect_flow(struct ice_hw __always_unused *hw,
+			       enum ice_fltr_ptype __always_unused flow_type)
+{
+	return false;
+}
+#endif /* CONFIG_RFS_ACCEL */
+#endif /* _ICE_ARFS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
new file mode 100644
index 0000000000..4f3e65b47c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -0,0 +1,1068 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <net/xdp_sock_drv.h>
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+#include "ice_sriov.h"
+
+/**
+ * __ice_vsi_get_qs_contig - Assign a contiguous chunk of queues to VSI
+ * @qs_cfg: gathered variables needed for PF->VSI queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
+{
+	unsigned int offset, i;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
+					    0, qs_cfg->q_count, 0);
+	if (offset >= qs_cfg->pf_map_size) {
+		mutex_unlock(qs_cfg->qs_mutex);
+		return -ENOMEM;
+	}
+
+	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
+	for (i = 0; i < qs_cfg->q_count; i++)
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)(i + offset);
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs_sc - Assign a scattered queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
+{
+	unsigned int i, index = 0;
+
+	mutex_lock(qs_cfg->qs_mutex);
+	for (i = 0; i < qs_cfg->q_count; i++) {
+		index = find_next_zero_bit(qs_cfg->pf_map,
+					   qs_cfg->pf_map_size, index);
+		if (index >= qs_cfg->pf_map_size)
+			goto err_scatter;
+		set_bit(index, qs_cfg->pf_map);
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)index;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return 0;
+err_scatter:
+	for (index = 0; index < i; index++) {
+		clear_bit(qs_cfg->vsi_map[index], qs_cfg->pf_map);
+		qs_cfg->vsi_map[index + qs_cfg->vsi_map_offset] = 0;
+	}
+	mutex_unlock(qs_cfg->qs_mutex);
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled
+ * @pf: the PF being configured
+ * @pf_q: the PF queue
+ * @ena: enable or disable state of the queue
+ *
+ * This routine will wait for the given Rx queue of the PF to reach the
+ * enabled or disabled state.
+ * Returns -ETIMEDOUT in case of failing to reach the requested state after
+ * multiple retries; else will return 0 in case of success.
+ */
+static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
+{
+	int i;
+
+	for (i = 0; i < ICE_Q_WAIT_MAX_RETRY; i++) {
+		if (ena == !!(rd32(&pf->hw, QRX_CTRL(pf_q)) &
+			      QRX_CTRL_QENA_STAT_M))
+			return 0;
+
+		usleep_range(20, 40);
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * ice_vsi_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @vsi: the VSI being configured
+ * @v_idx: index of the vector in the VSI struct
+ *
+ * We allocate one q_vector and set default value for ITR setting associated
+ * with this q_vector. If allocation fails we return -ENOMEM.
+ */
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+	int err;
+
+	/* allocate q_vector */
+	q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	q_vector->vsi = vsi;
+	q_vector->v_idx = v_idx;
+	q_vector->tx.itr_setting = ICE_DFLT_TX_ITR;
+	q_vector->rx.itr_setting = ICE_DFLT_RX_ITR;
+	q_vector->tx.itr_mode = ITR_DYNAMIC;
+	q_vector->rx.itr_mode = ITR_DYNAMIC;
+	q_vector->tx.type = ICE_TX_CONTAINER;
+	q_vector->rx.type = ICE_RX_CONTAINER;
+	q_vector->irq.index = -ENOENT;
+
+	if (vsi->type == ICE_VSI_VF) {
+		q_vector->reg_idx = ice_calc_vf_reg_idx(vsi->vf, q_vector);
+		goto out;
+	} else if (vsi->type == ICE_VSI_CTRL && vsi->vf) {
+		struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi);
+
+		if (ctrl_vsi) {
+			if (unlikely(!ctrl_vsi->q_vectors)) {
+				err = -ENOENT;
+				goto err_free_q_vector;
+			}
+
+			q_vector->irq = ctrl_vsi->q_vectors[0]->irq;
+			goto skip_alloc;
+		}
+	}
+
+	q_vector->irq = ice_alloc_irq(pf, vsi->irq_dyn_alloc);
+	if (q_vector->irq.index < 0) {
+		err = -ENOMEM;
+		goto err_free_q_vector;
+	}
+
+skip_alloc:
+	q_vector->reg_idx = q_vector->irq.index;
+
+	/* only set affinity_mask if the CPU is online */
+	if (cpu_online(v_idx))
+		cpumask_set_cpu(v_idx, &q_vector->affinity_mask);
+
+	/* This will not be called in the driver load path because the netdev
+	 * will not be created yet. All other cases with register the NAPI
+	 * handler here (i.e. resume, reset/rebuild, etc.)
+	 */
+	if (vsi->netdev)
+		netif_napi_add(vsi->netdev, &q_vector->napi, ice_napi_poll);
+
+out:
+	/* tie q_vector and VSI together */
+	vsi->q_vectors[v_idx] = q_vector;
+
+	return 0;
+
+err_free_q_vector:
+	kfree(q_vector);
+
+	return err;
+}
+
+/**
+ * ice_free_q_vector - Free memory allocated for a specific interrupt vector
+ * @vsi: VSI having the memory freed
+ * @v_idx: index of the vector to be freed
+ */
+static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_pf *pf = vsi->back;
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (!vsi->q_vectors[v_idx]) {
+		dev_dbg(dev, "Queue vector at index %d not found\n", v_idx);
+		return;
+	}
+	q_vector = vsi->q_vectors[v_idx];
+
+	ice_for_each_tx_ring(tx_ring, q_vector->tx)
+		tx_ring->q_vector = NULL;
+	ice_for_each_rx_ring(rx_ring, q_vector->rx)
+		rx_ring->q_vector = NULL;
+
+	/* only VSI with an associated netdev is set up with NAPI */
+	if (vsi->netdev)
+		netif_napi_del(&q_vector->napi);
+
+	/* release MSIX interrupt if q_vector had interrupt allocated */
+	if (q_vector->irq.index < 0)
+		goto free_q_vector;
+
+	/* only free last VF ctrl vsi interrupt */
+	if (vsi->type == ICE_VSI_CTRL && vsi->vf &&
+	    ice_get_vf_ctrl_vsi(pf, vsi))
+		goto free_q_vector;
+
+	ice_free_irq(pf, q_vector->irq);
+
+free_q_vector:
+	kfree(q_vector);
+	vsi->q_vectors[v_idx] = NULL;
+}
+
+/**
+ * ice_cfg_itr_gran - set the ITR granularity to 2 usecs if not already set
+ * @hw: board specific structure
+ */
+static void ice_cfg_itr_gran(struct ice_hw *hw)
+{
+	u32 regval = rd32(hw, GLINT_CTL);
+
+	/* no need to update global register if ITR gran is already set */
+	if (!(regval & GLINT_CTL_DIS_AUTOMASK_M) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_200_M) >>
+	     GLINT_CTL_ITR_GRAN_200_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_100_M) >>
+	     GLINT_CTL_ITR_GRAN_100_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_50_M) >>
+	     GLINT_CTL_ITR_GRAN_50_S) == ICE_ITR_GRAN_US) &&
+	    (((regval & GLINT_CTL_ITR_GRAN_25_M) >>
+	      GLINT_CTL_ITR_GRAN_25_S) == ICE_ITR_GRAN_US))
+		return;
+
+	regval = ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_200_S) &
+		  GLINT_CTL_ITR_GRAN_200_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_100_S) &
+		  GLINT_CTL_ITR_GRAN_100_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_50_S) &
+		  GLINT_CTL_ITR_GRAN_50_M) |
+		 ((ICE_ITR_GRAN_US << GLINT_CTL_ITR_GRAN_25_S) &
+		  GLINT_CTL_ITR_GRAN_25_M);
+	wr32(hw, GLINT_CTL, regval);
+}
+
+/**
+ * ice_calc_txq_handle - calculate the queue handle
+ * @vsi: VSI that ring belongs to
+ * @ring: ring to get the absolute queue index
+ * @tc: traffic class number
+ */
+static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 tc)
+{
+	WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n");
+
+	if (ring->ch)
+		return ring->q_index - ring->ch->base_q;
+
+	/* Idea here for calculation is that we subtract the number of queue
+	 * count from TC that ring belongs to from it's absolute queue index
+	 * and as a result we get the queue's index within TC.
+	 */
+	return ring->q_index - vsi->tc_cfg.tc_info[tc].qoffset;
+}
+
+/**
+ * ice_eswitch_calc_txq_handle
+ * @ring: pointer to ring which unique index is needed
+ *
+ * To correctly work with many netdevs ring->q_index of Tx rings on switchdev
+ * VSI can repeat. Hardware ring setup requires unique q_index. Calculate it
+ * here by finding index in vsi->tx_rings of this ring.
+ *
+ * Return ICE_INVAL_Q_INDEX when index wasn't found. Should never happen,
+ * because VSI is get from ring->vsi, so it has to be present in this VSI.
+ */
+static u16 ice_eswitch_calc_txq_handle(struct ice_tx_ring *ring)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	int i;
+
+	ice_for_each_txq(vsi, i) {
+		if (vsi->tx_rings[i] == ring)
+			return i;
+	}
+
+	return ICE_INVAL_Q_INDEX;
+}
+
+/**
+ * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring
+ * @ring: The Tx ring to configure
+ *
+ * This enables/disables XPS for a given Tx descriptor ring
+ * based on the TCs enabled for the VSI that ring belongs to.
+ */
+static void ice_cfg_xps_tx_ring(struct ice_tx_ring *ring)
+{
+	if (!ring->q_vector || !ring->netdev)
+		return;
+
+	/* We only initialize XPS once, so as not to overwrite user settings */
+	if (test_and_set_bit(ICE_TX_XPS_INIT_DONE, ring->xps_state))
+		return;
+
+	netif_set_xps_queue(ring->netdev, &ring->q_vector->affinity_mask,
+			    ring->q_index);
+}
+
+/**
+ * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance
+ * @ring: The Tx ring to configure
+ * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized
+ * @pf_q: queue index in the PF space
+ *
+ * Configure the Tx descriptor ring in TLAN context.
+ */
+static void
+ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	struct ice_hw *hw = &vsi->back->hw;
+
+	tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S;
+
+	tlan_ctx->port_num = vsi->port_info->lport;
+
+	/* Transmit Queue Length */
+	tlan_ctx->qlen = ring->count;
+
+	ice_set_cgd_num(tlan_ctx, ring->dcb_tc);
+
+	/* PF number */
+	tlan_ctx->pf_num = hw->pf_id;
+
+	/* queue belongs to a specific VSI type
+	 * VF / VM index should be programmed per vmvf_type setting:
+	 * for vmvf_type = VF, it is VF number between 0-256
+	 * for vmvf_type = VM, it is VM number between 0-767
+	 * for PF or EMP this field should be set to zero
+	 */
+	switch (vsi->type) {
+	case ICE_VSI_LB:
+	case ICE_VSI_CTRL:
+	case ICE_VSI_PF:
+		if (ring->ch)
+			tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
+		else
+			tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
+		break;
+	case ICE_VSI_VF:
+		/* Firmware expects vmvf_num to be absolute VF ID */
+		tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id;
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF;
+		break;
+	case ICE_VSI_SWITCHDEV_CTRL:
+		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ;
+		break;
+	default:
+		return;
+	}
+
+	/* make sure the context is associated with the right VSI */
+	if (ring->ch)
+		tlan_ctx->src_vsi = ring->ch->vsi_num;
+	else
+		tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx);
+
+	/* Restrict Tx timestamps to the PF VSI */
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		tlan_ctx->tsyn_ena = 1;
+		break;
+	default:
+		break;
+	}
+
+	tlan_ctx->tso_ena = ICE_TX_LEGACY;
+	tlan_ctx->tso_qnum = pf_q;
+
+	/* Legacy or Advanced Host Interface:
+	 * 0: Advanced Host Interface
+	 * 1: Legacy Host Interface
+	 */
+	tlan_ctx->legacy_int = ICE_TX_LEGACY;
+}
+
+/**
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
+{
+	if (ice_ring_uses_build_skb(rx_ring))
+		return ICE_SKB_PAD;
+	return 0;
+}
+
+/**
+ * ice_setup_rx_ctx - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in RLAN context.
+ */
+static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
+{
+	struct ice_vsi *vsi = ring->vsi;
+	u32 rxdid = ICE_RXDID_FLEX_NIC;
+	struct ice_rlan_ctx rlan_ctx;
+	struct ice_hw *hw;
+	u16 pf_q;
+	int err;
+
+	hw = &vsi->back->hw;
+
+	/* what is Rx queue number in global space of 2K Rx queues */
+	pf_q = vsi->rxq_map[ring->q_index];
+
+	/* clear the context structure first */
+	memset(&rlan_ctx, 0, sizeof(rlan_ctx));
+
+	/* Receive Queue Base Address.
+	 * Indicates the starting address of the descriptor queue defined in
+	 * 128 Byte units.
+	 */
+	rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S;
+
+	rlan_ctx.qlen = ring->count;
+
+	/* Receive Packet Data Buffer Size.
+	 * The Packet Data Buffer Size is defined in 128 byte units.
+	 */
+	rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
+				     BIT_ULL(ICE_RLAN_CTX_DBUF_S));
+
+	/* use 32 byte descriptors */
+	rlan_ctx.dsize = 1;
+
+	/* Strip the Ethernet CRC bytes before the packet is posted to host
+	 * memory.
+	 */
+	rlan_ctx.crcstrip = !(ring->flags & ICE_RX_FLAGS_CRC_STRIP_DIS);
+
+	/* L2TSEL flag defines the reported L2 Tags in the receive descriptor
+	 * and it needs to remain 1 for non-DVM capable configurations to not
+	 * break backward compatibility for VF drivers. Setting this field to 0
+	 * will cause the single/outer VLAN tag to be stripped to the L2TAG2_2ND
+	 * field in the Rx descriptor. Setting it to 1 allows the VLAN tag to
+	 * be stripped in L2TAG1 of the Rx descriptor, which is where VFs will
+	 * check for the tag
+	 */
+	if (ice_is_dvm_ena(hw))
+		if (vsi->type == ICE_VSI_VF &&
+		    ice_vf_is_port_vlan_ena(vsi->vf))
+			rlan_ctx.l2tsel = 1;
+		else
+			rlan_ctx.l2tsel = 0;
+	else
+		rlan_ctx.l2tsel = 1;
+
+	rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
+	rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
+	rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
+
+	/* This controls whether VLAN is stripped from inner headers
+	 * The VLAN in the inner L2 header is stripped to the receive
+	 * descriptor if enabled by this flag.
+	 */
+	rlan_ctx.showiv = 0;
+
+	/* Max packet size for this queue - must not be set to a larger value
+	 * than 5 x DBUF
+	 */
+	rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
+			       ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len);
+
+	/* Rx queue threshold in units of 64 */
+	rlan_ctx.lrxqthresh = 1;
+
+	/* Enable Flexible Descriptors in the queue context which
+	 * allows this driver to select a specific receive descriptor format
+	 * increasing context priority to pick up profile ID; default is 0x01;
+	 * setting to 0x03 to ensure profile is programming if prev context is
+	 * of same priority
+	 */
+	if (vsi->type != ICE_VSI_VF)
+		ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true);
+	else
+		ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3,
+					false);
+
+	/* Absolute queue number out of 2K needs to be passed */
+	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
+	if (err) {
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+			pf_q, err);
+		return -EIO;
+	}
+
+	if (vsi->type == ICE_VSI_VF)
+		return 0;
+
+	/* configure Rx buffer alignment */
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		ice_clear_ring_build_skb_ena(ring);
+	else
+		ice_set_ring_build_skb_ena(ring);
+
+	ring->rx_offset = ice_rx_offset(ring);
+
+	/* init queue specific tail register */
+	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
+	writel(0, ring->tail);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_rxq - Configure an Rx queue
+ * @ring: the ring being configured
+ *
+ * Return 0 on success and a negative value on error.
+ */
+int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
+{
+	struct device *dev = ice_pf_to_dev(ring->vsi->back);
+	u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
+	int err;
+
+	ring->rx_buf_len = ring->vsi->rx_buf_len;
+
+	if (ring->vsi->type == ICE_VSI_PF) {
+		if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+						 ring->q_index,
+						 ring->q_vector->napi.napi_id,
+						 ring->rx_buf_len);
+			if (err)
+				return err;
+		}
+
+		ring->xsk_pool = ice_xsk_pool(ring);
+		if (ring->xsk_pool) {
+			xdp_rxq_info_unreg(&ring->xdp_rxq);
+
+			ring->rx_buf_len =
+				xsk_pool_get_rx_frame_size(ring->xsk_pool);
+			err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+						 ring->q_index,
+						 ring->q_vector->napi.napi_id,
+						 ring->rx_buf_len);
+			if (err)
+				return err;
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_XSK_BUFF_POOL,
+							 NULL);
+			if (err)
+				return err;
+			xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+
+			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
+				 ring->q_index);
+		} else {
+			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
+				err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
+							 ring->q_index,
+							 ring->q_vector->napi.napi_id,
+							 ring->rx_buf_len);
+				if (err)
+					return err;
+			}
+
+			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+							 MEM_TYPE_PAGE_SHARED,
+							 NULL);
+			if (err)
+				return err;
+		}
+	}
+
+	xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
+	ring->xdp.data = NULL;
+	err = ice_setup_rx_ctx(ring);
+	if (err) {
+		dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
+			ring->q_index, err);
+		return err;
+	}
+
+	if (ring->xsk_pool) {
+		bool ok;
+
+		if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) {
+			dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n",
+				 num_bufs, ring->q_index);
+			dev_warn(dev, "Change Rx ring/fill queue size to avoid performance issues\n");
+
+			return 0;
+		}
+
+		ok = ice_alloc_rx_bufs_zc(ring, num_bufs);
+		if (!ok) {
+			u16 pf_q = ring->vsi->rxq_map[ring->q_index];
+
+			dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n",
+				 ring->q_index, pf_q);
+		}
+
+		return 0;
+	}
+
+	ice_alloc_rx_bufs(ring, num_bufs);
+
+	return 0;
+}
+
+/**
+ * __ice_vsi_get_qs - helper function for assigning queues from PF to VSI
+ * @qs_cfg: gathered variables needed for pf->vsi queues assignment
+ *
+ * This function first tries to find contiguous space. If it is not successful,
+ * it tries with the scatter approach.
+ *
+ * Return 0 on success and -ENOMEM in case of no left space in PF queue bitmap
+ */
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
+{
+	int ret = 0;
+
+	ret = __ice_vsi_get_qs_contig(qs_cfg);
+	if (ret) {
+		/* contig failed, so try with scatter approach */
+		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
+		qs_cfg->q_count = min_t(unsigned int, qs_cfg->q_count,
+					qs_cfg->scatter_count);
+		ret = __ice_vsi_get_qs_sc(qs_cfg);
+	}
+	return ret;
+}
+
+/**
+ * ice_vsi_ctrl_one_rx_ring - start/stop VSI's Rx ring with no busy wait
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx ring
+ * @rxq_idx: 0-based Rx queue index for the VSI passed in
+ * @wait: wait or don't wait for configuration to finish in hardware
+ *
+ * Return 0 on success and negative on error.
+ */
+int
+ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait)
+{
+	int pf_q = vsi->rxq_map[rxq_idx];
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 rx_reg;
+
+	rx_reg = rd32(hw, QRX_CTRL(pf_q));
+
+	/* Skip if the queue is already in the requested state */
+	if (ena == !!(rx_reg & QRX_CTRL_QENA_STAT_M))
+		return 0;
+
+	/* turn on/off the queue */
+	if (ena)
+		rx_reg |= QRX_CTRL_QENA_REQ_M;
+	else
+		rx_reg &= ~QRX_CTRL_QENA_REQ_M;
+	wr32(hw, QRX_CTRL(pf_q), rx_reg);
+
+	if (!wait)
+		return 0;
+
+	ice_flush(hw);
+	return ice_pf_rxq_wait(pf, pf_q, ena);
+}
+
+/**
+ * ice_vsi_wait_one_rx_ring - wait for a VSI's Rx ring to be stopped/started
+ * @vsi: the VSI being configured
+ * @ena: true/false to verify Rx ring has been enabled/disabled respectively
+ * @rxq_idx: 0-based Rx queue index for the VSI passed in
+ *
+ * This routine will wait for the given Rx queue of the VSI to reach the
+ * enabled or disabled state. Returns -ETIMEDOUT in case of failing to reach
+ * the requested state after multiple retries; else will return 0 in case of
+ * success.
+ */
+int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
+{
+	int pf_q = vsi->rxq_map[rxq_idx];
+	struct ice_pf *pf = vsi->back;
+
+	return ice_pf_rxq_wait(pf, pf_q, ena);
+}
+
+/**
+ * ice_vsi_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	u16 v_idx;
+	int err;
+
+	if (vsi->q_vectors[0]) {
+		dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num);
+		return -EEXIST;
+	}
+
+	for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) {
+		err = ice_vsi_alloc_q_vector(vsi, v_idx);
+		if (err)
+			goto err_out;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx--)
+		ice_free_q_vector(vsi, v_idx);
+
+	dev_err(dev, "Failed to allocate %d q_vector for VSI %d, ret=%d\n",
+		vsi->num_q_vectors, vsi->vsi_num, err);
+	vsi->num_q_vectors = 0;
+	return err;
+}
+
+/**
+ * ice_vsi_map_rings_to_vectors - Map VSI rings to interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors allotted
+ * through the MSI-X enabling code. On a constrained vector budget, we map Tx
+ * and Rx rings to the vector as "efficiently" as possible.
+ */
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
+{
+	int q_vectors = vsi->num_q_vectors;
+	u16 tx_rings_rem, rx_rings_rem;
+	int v_id;
+
+	/* initially assigning remaining rings count to VSIs num queue value */
+	tx_rings_rem = vsi->num_txq;
+	rx_rings_rem = vsi->num_rxq;
+
+	for (v_id = 0; v_id < q_vectors; v_id++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
+		u8 tx_rings_per_v, rx_rings_per_v;
+		u16 q_id, q_base;
+
+		/* Tx rings mapping to vector */
+		tx_rings_per_v = (u8)DIV_ROUND_UP(tx_rings_rem,
+						  q_vectors - v_id);
+		q_vector->num_ring_tx = tx_rings_per_v;
+		q_vector->tx.tx_ring = NULL;
+		q_vector->tx.itr_idx = ICE_TX_ITR;
+		q_base = vsi->num_txq - tx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) {
+			struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id];
+
+			tx_ring->q_vector = q_vector;
+			tx_ring->next = q_vector->tx.tx_ring;
+			q_vector->tx.tx_ring = tx_ring;
+		}
+		tx_rings_rem -= tx_rings_per_v;
+
+		/* Rx rings mapping to vector */
+		rx_rings_per_v = (u8)DIV_ROUND_UP(rx_rings_rem,
+						  q_vectors - v_id);
+		q_vector->num_ring_rx = rx_rings_per_v;
+		q_vector->rx.rx_ring = NULL;
+		q_vector->rx.itr_idx = ICE_RX_ITR;
+		q_base = vsi->num_rxq - rx_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) {
+			struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id];
+
+			rx_ring->q_vector = q_vector;
+			rx_ring->next = q_vector->rx.rx_ring;
+			q_vector->rx.rx_ring = rx_ring;
+		}
+		rx_rings_rem -= rx_rings_per_v;
+	}
+}
+
+/**
+ * ice_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI having memory freed
+ */
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		ice_free_q_vector(vsi, v_idx);
+
+	vsi->num_q_vectors = 0;
+}
+
+/**
+ * ice_vsi_cfg_txq - Configure single Tx queue
+ * @vsi: the VSI that queue belongs to
+ * @ring: Tx ring to be configured
+ * @qg_buf: queue group buffer
+ */
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf)
+{
+	u8 buf_len = struct_size(qg_buf, txqs, 1);
+	struct ice_tlan_ctx tlan_ctx = { 0 };
+	struct ice_aqc_add_txqs_perq *txq;
+	struct ice_channel *ch = ring->ch;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int status;
+	u16 pf_q;
+	u8 tc;
+
+	/* Configure XPS */
+	ice_cfg_xps_tx_ring(ring);
+
+	pf_q = ring->reg_idx;
+	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
+	/* copy context contents into the qg_buf */
+	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
+	ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+		    ice_tlan_ctx_info);
+
+	/* init queue specific tail reg. It is referred as
+	 * transmit comm scheduler queue doorbell.
+	 */
+	ring->tail = hw->hw_addr + QTX_COMM_DBELL(pf_q);
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	/* Add unique software queue handle of the Tx queue per
+	 * TC into the VSI Tx ring
+	 */
+	if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
+		ring->q_handle = ice_eswitch_calc_txq_handle(ring);
+
+		if (ring->q_handle == ICE_INVAL_Q_INDEX)
+			return -ENODEV;
+	} else {
+		ring->q_handle = ice_calc_txq_handle(vsi, ring, tc);
+	}
+
+	if (ch)
+		status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0,
+					 ring->q_handle, 1, qg_buf, buf_len,
+					 NULL);
+	else
+		status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc,
+					 ring->q_handle, 1, qg_buf, buf_len,
+					 NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n",
+			status);
+		return status;
+	}
+
+	/* Add Tx Queue TEID into the VSI Tx ring from the
+	 * response. This will complete configuring and
+	 * enabling the queue.
+	 */
+	txq = &qg_buf->txqs[0];
+	if (pf_q == le16_to_cpu(txq->txq_id))
+		ring->txq_teid = le32_to_cpu(txq->q_teid);
+
+	return 0;
+}
+
+/**
+ * ice_cfg_itr - configure the initial interrupt throttle values
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector that's being configured
+ *
+ * Configure interrupt throttling values for the ring containers that are
+ * associated with the interrupt vector passed in.
+ */
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	ice_cfg_itr_gran(hw);
+
+	if (q_vector->num_ring_rx)
+		ice_write_itr(&q_vector->rx, q_vector->rx.itr_setting);
+
+	if (q_vector->num_ring_tx)
+		ice_write_itr(&q_vector->tx, q_vector->tx.itr_setting);
+
+	ice_write_intrl(q_vector, q_vector->intrl);
+}
+
+/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+	val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+		wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]),
+		     val);
+	}
+	ice_flush(hw);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+	val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+	      ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+	wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+	wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+	     GLINT_DYN_CTL_SWINT_TRIG_M |
+	     GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
+ * ice_vsi_stop_tx_ring - Disable single Tx ring
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @ring: Tx ring to be stopped
+ * @txq_meta: Meta data of Tx ring to be stopped
+ */
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_tx_ring *ring,
+		     struct ice_txq_meta *txq_meta)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_q_vector *q_vector;
+	struct ice_hw *hw = &pf->hw;
+	int status;
+	u32 val;
+
+	/* clear cause_ena bit for disabled queues */
+	val = rd32(hw, QINT_TQCTL(ring->reg_idx));
+	val &= ~QINT_TQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_TQCTL(ring->reg_idx), val);
+
+	/* software is expected to wait for 100 ns */
+	ndelay(100);
+
+	/* trigger a software interrupt for the vector
+	 * associated to the queue to schedule NAPI handler
+	 */
+	q_vector = ring->q_vector;
+	if (q_vector && !(vsi->vf && ice_is_vf_disabled(vsi->vf)))
+		ice_trigger_sw_intr(hw, q_vector);
+
+	status = ice_dis_vsi_txq(vsi->port_info, txq_meta->vsi_idx,
+				 txq_meta->tc, 1, &txq_meta->q_handle,
+				 &txq_meta->q_id, &txq_meta->q_teid, rst_src,
+				 rel_vmvf_num, NULL);
+
+	/* if the disable queue command was exercised during an
+	 * active reset flow, -EBUSY is returned.
+	 * This is not an error as the reset operation disables
+	 * queues at the hardware level anyway.
+	 */
+	if (status == -EBUSY) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "Reset in progress. LAN Tx queues already disabled\n");
+	} else if (status == -ENOENT) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n");
+	} else if (status) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n",
+			status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fill_txq_meta - Prepare the Tx queue's meta data
+ * @vsi: VSI that ring belongs to
+ * @ring: ring that txq_meta will be based on
+ * @txq_meta: a helper struct that wraps Tx queue's information
+ *
+ * Set up a helper struct that will contain all the necessary fields that
+ * are needed for stopping Tx queue
+ */
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+		  struct ice_txq_meta *txq_meta)
+{
+	struct ice_channel *ch = ring->ch;
+	u8 tc;
+
+	if (IS_ENABLED(CONFIG_DCB))
+		tc = ring->dcb_tc;
+	else
+		tc = 0;
+
+	txq_meta->q_id = ring->reg_idx;
+	txq_meta->q_teid = ring->txq_teid;
+	txq_meta->q_handle = ring->q_handle;
+	if (ch) {
+		txq_meta->vsi_idx = ch->ch_vsi->idx;
+		txq_meta->tc = 0;
+	} else {
+		txq_meta->vsi_idx = vsi->idx;
+		txq_meta->tc = tc;
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h
new file mode 100644
index 0000000000..b67dca417a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_base.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_BASE_H_
+#define _ICE_BASE_H_
+
+#include "ice.h"
+
+int ice_vsi_cfg_rxq(struct ice_rx_ring *ring);
+int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg);
+int
+ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait);
+int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx);
+int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi);
+void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi);
+void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+int
+ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+		struct ice_aqc_add_tx_qgrp *qg_buf);
+void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+int
+ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		     u16 rel_vmvf_num, struct ice_tx_ring *ring,
+		     struct ice_txq_meta *txq_meta);
+void
+ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring,
+		  struct ice_txq_meta *txq_meta);
+#endif /* _ICE_BASE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_cgu_regs.h b/drivers/net/ethernet/intel/ice/ice_cgu_regs.h
new file mode 100644
index 0000000000..57abd52386
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_cgu_regs.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_CGU_REGS_H_
+#define _ICE_CGU_REGS_H_
+
+#define NAC_CGU_DWORD9 0x24
+union nac_cgu_dword9 {
+	struct {
+		u32 time_ref_freq_sel : 3;
+		u32 clk_eref1_en : 1;
+		u32 clk_eref0_en : 1;
+		u32 time_ref_en : 1;
+		u32 time_sync_en : 1;
+		u32 one_pps_out_en : 1;
+		u32 clk_ref_synce_en : 1;
+		u32 clk_synce1_en : 1;
+		u32 clk_synce0_en : 1;
+		u32 net_clk_ref1_en : 1;
+		u32 net_clk_ref0_en : 1;
+		u32 clk_synce1_amp : 2;
+		u32 misc6 : 1;
+		u32 clk_synce0_amp : 2;
+		u32 one_pps_out_amp : 2;
+		u32 misc24 : 12;
+	} field;
+	u32 val;
+};
+
+#define NAC_CGU_DWORD19 0x4c
+union nac_cgu_dword19 {
+	struct {
+		u32 tspll_fbdiv_intgr : 8;
+		u32 fdpll_ulck_thr : 5;
+		u32 misc15 : 3;
+		u32 tspll_ndivratio : 4;
+		u32 tspll_iref_ndivratio : 3;
+		u32 misc19 : 1;
+		u32 japll_ndivratio : 4;
+		u32 japll_iref_ndivratio : 3;
+		u32 misc27 : 1;
+	} field;
+	u32 val;
+};
+
+#define NAC_CGU_DWORD22 0x58
+union nac_cgu_dword22 {
+	struct {
+		u32 fdpll_frac_div_out_nc : 2;
+		u32 fdpll_lock_int_for : 1;
+		u32 synce_hdov_int_for : 1;
+		u32 synce_lock_int_for : 1;
+		u32 fdpll_phlead_slip_nc : 1;
+		u32 fdpll_acc1_ovfl_nc : 1;
+		u32 fdpll_acc2_ovfl_nc : 1;
+		u32 synce_status_nc : 6;
+		u32 fdpll_acc1f_ovfl : 1;
+		u32 misc18 : 1;
+		u32 fdpllclk_div : 4;
+		u32 time1588clk_div : 4;
+		u32 synceclk_div : 4;
+		u32 synceclk_sel_div2 : 1;
+		u32 fdpllclk_sel_div2 : 1;
+		u32 time1588clk_sel_div2 : 1;
+		u32 misc3 : 1;
+	} field;
+	u32 val;
+};
+
+#define NAC_CGU_DWORD24 0x60
+union nac_cgu_dword24 {
+	struct {
+		u32 tspll_fbdiv_frac : 22;
+		u32 misc20 : 2;
+		u32 ts_pll_enable : 1;
+		u32 time_sync_tspll_align_sel : 1;
+		u32 ext_synce_sel : 1;
+		u32 ref1588_ck_div : 4;
+		u32 time_ref_sel : 1;
+	} field;
+	u32 val;
+};
+
+#define TSPLL_CNTR_BIST_SETTINGS 0x344
+union tspll_cntr_bist_settings {
+	struct {
+		u32 i_irefgen_settling_time_cntr_7_0 : 8;
+		u32 i_irefgen_settling_time_ro_standby_1_0 : 2;
+		u32 reserved195 : 5;
+		u32 i_plllock_sel_0 : 1;
+		u32 i_plllock_sel_1 : 1;
+		u32 i_plllock_cnt_6_0 : 7;
+		u32 i_plllock_cnt_10_7 : 4;
+		u32 reserved200 : 4;
+	} field;
+	u32 val;
+};
+
+#define TSPLL_RO_BWM_LF 0x370
+union tspll_ro_bwm_lf {
+	struct {
+		u32 bw_freqov_high_cri_7_0 : 8;
+		u32 bw_freqov_high_cri_9_8 : 2;
+		u32 biascaldone_cri : 1;
+		u32 plllock_gain_tran_cri : 1;
+		u32 plllock_true_lock_cri : 1;
+		u32 pllunlock_flag_cri : 1;
+		u32 afcerr_cri : 1;
+		u32 afcdone_cri : 1;
+		u32 feedfwrdgain_cal_cri_7_0 : 8;
+		u32 m2fbdivmod_cri_7_0 : 8;
+	} field;
+	u32 val;
+};
+
+#endif /* _ICE_CGU_REGS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
new file mode 100644
index 0000000000..80deca45ab
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -0,0 +1,5660 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_adminq_cmd.h"
+#include "ice_flow.h"
+#include "ice_ptp_hw.h"
+
+#define ICE_PF_RESET_WAIT_COUNT	300
+
+static const char * const ice_link_mode_str_low[] = {
+	[0] = "100BASE_TX",
+	[1] = "100M_SGMII",
+	[2] = "1000BASE_T",
+	[3] = "1000BASE_SX",
+	[4] = "1000BASE_LX",
+	[5] = "1000BASE_KX",
+	[6] = "1G_SGMII",
+	[7] = "2500BASE_T",
+	[8] = "2500BASE_X",
+	[9] = "2500BASE_KX",
+	[10] = "5GBASE_T",
+	[11] = "5GBASE_KR",
+	[12] = "10GBASE_T",
+	[13] = "10G_SFI_DA",
+	[14] = "10GBASE_SR",
+	[15] = "10GBASE_LR",
+	[16] = "10GBASE_KR_CR1",
+	[17] = "10G_SFI_AOC_ACC",
+	[18] = "10G_SFI_C2C",
+	[19] = "25GBASE_T",
+	[20] = "25GBASE_CR",
+	[21] = "25GBASE_CR_S",
+	[22] = "25GBASE_CR1",
+	[23] = "25GBASE_SR",
+	[24] = "25GBASE_LR",
+	[25] = "25GBASE_KR",
+	[26] = "25GBASE_KR_S",
+	[27] = "25GBASE_KR1",
+	[28] = "25G_AUI_AOC_ACC",
+	[29] = "25G_AUI_C2C",
+	[30] = "40GBASE_CR4",
+	[31] = "40GBASE_SR4",
+	[32] = "40GBASE_LR4",
+	[33] = "40GBASE_KR4",
+	[34] = "40G_XLAUI_AOC_ACC",
+	[35] = "40G_XLAUI",
+	[36] = "50GBASE_CR2",
+	[37] = "50GBASE_SR2",
+	[38] = "50GBASE_LR2",
+	[39] = "50GBASE_KR2",
+	[40] = "50G_LAUI2_AOC_ACC",
+	[41] = "50G_LAUI2",
+	[42] = "50G_AUI2_AOC_ACC",
+	[43] = "50G_AUI2",
+	[44] = "50GBASE_CP",
+	[45] = "50GBASE_SR",
+	[46] = "50GBASE_FR",
+	[47] = "50GBASE_LR",
+	[48] = "50GBASE_KR_PAM4",
+	[49] = "50G_AUI1_AOC_ACC",
+	[50] = "50G_AUI1",
+	[51] = "100GBASE_CR4",
+	[52] = "100GBASE_SR4",
+	[53] = "100GBASE_LR4",
+	[54] = "100GBASE_KR4",
+	[55] = "100G_CAUI4_AOC_ACC",
+	[56] = "100G_CAUI4",
+	[57] = "100G_AUI4_AOC_ACC",
+	[58] = "100G_AUI4",
+	[59] = "100GBASE_CR_PAM4",
+	[60] = "100GBASE_KR_PAM4",
+	[61] = "100GBASE_CP2",
+	[62] = "100GBASE_SR2",
+	[63] = "100GBASE_DR",
+};
+
+static const char * const ice_link_mode_str_high[] = {
+	[0] = "100GBASE_KR2_PAM4",
+	[1] = "100G_CAUI2_AOC_ACC",
+	[2] = "100G_CAUI2",
+	[3] = "100G_AUI2_AOC_ACC",
+	[4] = "100G_AUI2",
+};
+
+/**
+ * ice_dump_phy_type - helper function to dump phy_type
+ * @hw: pointer to the HW structure
+ * @low: 64 bit value for phy_type_low
+ * @high: 64 bit value for phy_type_high
+ * @prefix: prefix string to differentiate multiple dumps
+ */
+static void
+ice_dump_phy_type(struct ice_hw *hw, u64 low, u64 high, const char *prefix)
+{
+	ice_debug(hw, ICE_DBG_PHY, "%s: phy_type_low: 0x%016llx\n", prefix, low);
+
+	for (u32 i = 0; i < BITS_PER_TYPE(typeof(low)); i++) {
+		if (low & BIT_ULL(i))
+			ice_debug(hw, ICE_DBG_PHY, "%s:   bit(%d): %s\n",
+				  prefix, i, ice_link_mode_str_low[i]);
+	}
+
+	ice_debug(hw, ICE_DBG_PHY, "%s: phy_type_high: 0x%016llx\n", prefix, high);
+
+	for (u32 i = 0; i < BITS_PER_TYPE(typeof(high)); i++) {
+		if (high & BIT_ULL(i))
+			ice_debug(hw, ICE_DBG_PHY, "%s:   bit(%d): %s\n",
+				  prefix, i, ice_link_mode_str_high[i]);
+	}
+}
+
+/**
+ * ice_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the MAC type of the adapter based on the
+ * vendor ID and device ID stored in the HW structure.
+ */
+static int ice_set_mac_type(struct ice_hw *hw)
+{
+	if (hw->vendor_id != PCI_VENDOR_ID_INTEL)
+		return -ENODEV;
+
+	switch (hw->device_id) {
+	case ICE_DEV_ID_E810C_BACKPLANE:
+	case ICE_DEV_ID_E810C_QSFP:
+	case ICE_DEV_ID_E810C_SFP:
+	case ICE_DEV_ID_E810_XXV_BACKPLANE:
+	case ICE_DEV_ID_E810_XXV_QSFP:
+	case ICE_DEV_ID_E810_XXV_SFP:
+		hw->mac_type = ICE_MAC_E810;
+		break;
+	case ICE_DEV_ID_E823C_10G_BASE_T:
+	case ICE_DEV_ID_E823C_BACKPLANE:
+	case ICE_DEV_ID_E823C_QSFP:
+	case ICE_DEV_ID_E823C_SFP:
+	case ICE_DEV_ID_E823C_SGMII:
+	case ICE_DEV_ID_E822C_10G_BASE_T:
+	case ICE_DEV_ID_E822C_BACKPLANE:
+	case ICE_DEV_ID_E822C_QSFP:
+	case ICE_DEV_ID_E822C_SFP:
+	case ICE_DEV_ID_E822C_SGMII:
+	case ICE_DEV_ID_E822L_10G_BASE_T:
+	case ICE_DEV_ID_E822L_BACKPLANE:
+	case ICE_DEV_ID_E822L_SFP:
+	case ICE_DEV_ID_E822L_SGMII:
+	case ICE_DEV_ID_E823L_10G_BASE_T:
+	case ICE_DEV_ID_E823L_1GBE:
+	case ICE_DEV_ID_E823L_BACKPLANE:
+	case ICE_DEV_ID_E823L_QSFP:
+	case ICE_DEV_ID_E823L_SFP:
+		hw->mac_type = ICE_MAC_GENERIC;
+		break;
+	default:
+		hw->mac_type = ICE_MAC_UNKNOWN;
+		break;
+	}
+
+	ice_debug(hw, ICE_DBG_INIT, "mac_type: %d\n", hw->mac_type);
+	return 0;
+}
+
+/**
+ * ice_is_e810
+ * @hw: pointer to the hardware structure
+ *
+ * returns true if the device is E810 based, false if not.
+ */
+bool ice_is_e810(struct ice_hw *hw)
+{
+	return hw->mac_type == ICE_MAC_E810;
+}
+
+/**
+ * ice_is_e810t
+ * @hw: pointer to the hardware structure
+ *
+ * returns true if the device is E810T based, false if not.
+ */
+bool ice_is_e810t(struct ice_hw *hw)
+{
+	switch (hw->device_id) {
+	case ICE_DEV_ID_E810C_SFP:
+		switch (hw->subsystem_device_id) {
+		case ICE_SUBDEV_ID_E810T:
+		case ICE_SUBDEV_ID_E810T2:
+		case ICE_SUBDEV_ID_E810T3:
+		case ICE_SUBDEV_ID_E810T4:
+		case ICE_SUBDEV_ID_E810T6:
+		case ICE_SUBDEV_ID_E810T7:
+			return true;
+		}
+		break;
+	case ICE_DEV_ID_E810C_QSFP:
+		switch (hw->subsystem_device_id) {
+		case ICE_SUBDEV_ID_E810T2:
+		case ICE_SUBDEV_ID_E810T3:
+		case ICE_SUBDEV_ID_E810T5:
+			return true;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+/**
+ * ice_is_e823
+ * @hw: pointer to the hardware structure
+ *
+ * returns true if the device is E823-L or E823-C based, false if not.
+ */
+bool ice_is_e823(struct ice_hw *hw)
+{
+	switch (hw->device_id) {
+	case ICE_DEV_ID_E823L_BACKPLANE:
+	case ICE_DEV_ID_E823L_SFP:
+	case ICE_DEV_ID_E823L_10G_BASE_T:
+	case ICE_DEV_ID_E823L_1GBE:
+	case ICE_DEV_ID_E823L_QSFP:
+	case ICE_DEV_ID_E823C_BACKPLANE:
+	case ICE_DEV_ID_E823C_QSFP:
+	case ICE_DEV_ID_E823C_SFP:
+	case ICE_DEV_ID_E823C_10G_BASE_T:
+	case ICE_DEV_ID_E823C_SGMII:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_clear_pf_cfg - Clear PF configuration
+ * @hw: pointer to the hardware structure
+ *
+ * Clears any existing PF configuration (VSIs, VSI lists, switch rules, port
+ * configuration, flow director filters, etc.).
+ */
+int ice_clear_pf_cfg(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pf_cfg);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_manage_mac_read - manage MAC address read command
+ * @hw: pointer to the HW struct
+ * @buf: a virtual buffer to hold the manage MAC read response
+ * @buf_size: Size of the virtual buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to return per PF station MAC address (0x0107).
+ * NOTE: Upon successful completion of this command, MAC address information
+ * is returned in user specified buffer. Please interpret user specified
+ * buffer as "manage_mac_read" response.
+ * Response such as various MAC addresses are stored in HW struct (port.mac)
+ * ice_discover_dev_caps is expected to be called before this function is
+ * called.
+ */
+static int
+ice_aq_manage_mac_read(struct ice_hw *hw, void *buf, u16 buf_size,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_read_resp *resp;
+	struct ice_aqc_manage_mac_read *cmd;
+	struct ice_aq_desc desc;
+	int status;
+	u16 flags;
+	u8 i;
+
+	cmd = &desc.params.mac_read;
+
+	if (buf_size < sizeof(*resp))
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_read);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (status)
+		return status;
+
+	resp = buf;
+	flags = le16_to_cpu(cmd->flags) & ICE_AQC_MAN_MAC_READ_M;
+
+	if (!(flags & ICE_AQC_MAN_MAC_LAN_ADDR_VALID)) {
+		ice_debug(hw, ICE_DBG_LAN, "got invalid MAC address\n");
+		return -EIO;
+	}
+
+	/* A single port can report up to two (LAN and WoL) addresses */
+	for (i = 0; i < cmd->num_addr; i++)
+		if (resp[i].addr_type == ICE_AQC_MAN_MAC_ADDR_TYPE_LAN) {
+			ether_addr_copy(hw->port_info->mac.lan_addr,
+					resp[i].mac_addr);
+			ether_addr_copy(hw->port_info->mac.perm_addr,
+					resp[i].mac_addr);
+			break;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_aq_get_phy_caps - returns PHY capabilities
+ * @pi: port information structure
+ * @qual_mods: report qualified modules
+ * @report_mode: report mode capabilities
+ * @pcaps: structure for PHY capabilities to be filled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Returns the various PHY capabilities supported on the Port (0x0600)
+ */
+int
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+		    struct ice_aqc_get_phy_caps_data *pcaps,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_phy_caps *cmd;
+	u16 pcaps_size = sizeof(*pcaps);
+	struct ice_aq_desc desc;
+	const char *prefix;
+	struct ice_hw *hw;
+	int status;
+
+	cmd = &desc.params.get_phy;
+
+	if (!pcaps || (report_mode & ~ICE_AQC_REPORT_MODE_M) || !pi)
+		return -EINVAL;
+	hw = pi->hw;
+
+	if (report_mode == ICE_AQC_REPORT_DFLT_CFG &&
+	    !ice_fw_supports_report_dflt_cfg(hw))
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
+
+	if (qual_mods)
+		cmd->param0 |= cpu_to_le16(ICE_AQC_GET_PHY_RQM);
+
+	cmd->param0 |= cpu_to_le16(report_mode);
+	status = ice_aq_send_cmd(hw, &desc, pcaps, pcaps_size, cd);
+
+	ice_debug(hw, ICE_DBG_LINK, "get phy caps dump\n");
+
+	switch (report_mode) {
+	case ICE_AQC_REPORT_TOPO_CAP_MEDIA:
+		prefix = "phy_caps_media";
+		break;
+	case ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA:
+		prefix = "phy_caps_no_media";
+		break;
+	case ICE_AQC_REPORT_ACTIVE_CFG:
+		prefix = "phy_caps_active";
+		break;
+	case ICE_AQC_REPORT_DFLT_CFG:
+		prefix = "phy_caps_default";
+		break;
+	default:
+		prefix = "phy_caps_invalid";
+	}
+
+	ice_dump_phy_type(hw, le64_to_cpu(pcaps->phy_type_low),
+			  le64_to_cpu(pcaps->phy_type_high), prefix);
+
+	ice_debug(hw, ICE_DBG_LINK, "%s: report_mode = 0x%x\n",
+		  prefix, report_mode);
+	ice_debug(hw, ICE_DBG_LINK, "%s: caps = 0x%x\n", prefix, pcaps->caps);
+	ice_debug(hw, ICE_DBG_LINK, "%s: low_power_ctrl_an = 0x%x\n", prefix,
+		  pcaps->low_power_ctrl_an);
+	ice_debug(hw, ICE_DBG_LINK, "%s: eee_cap = 0x%x\n", prefix,
+		  pcaps->eee_cap);
+	ice_debug(hw, ICE_DBG_LINK, "%s: eeer_value = 0x%x\n", prefix,
+		  pcaps->eeer_value);
+	ice_debug(hw, ICE_DBG_LINK, "%s: link_fec_options = 0x%x\n", prefix,
+		  pcaps->link_fec_options);
+	ice_debug(hw, ICE_DBG_LINK, "%s: module_compliance_enforcement = 0x%x\n",
+		  prefix, pcaps->module_compliance_enforcement);
+	ice_debug(hw, ICE_DBG_LINK, "%s: extended_compliance_code = 0x%x\n",
+		  prefix, pcaps->extended_compliance_code);
+	ice_debug(hw, ICE_DBG_LINK, "%s: module_type[0] = 0x%x\n", prefix,
+		  pcaps->module_type[0]);
+	ice_debug(hw, ICE_DBG_LINK, "%s: module_type[1] = 0x%x\n", prefix,
+		  pcaps->module_type[1]);
+	ice_debug(hw, ICE_DBG_LINK, "%s: module_type[2] = 0x%x\n", prefix,
+		  pcaps->module_type[2]);
+
+	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) {
+		pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
+		pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
+		memcpy(pi->phy.link_info.module_type, &pcaps->module_type,
+		       sizeof(pi->phy.link_info.module_type));
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_get_link_topo_handle - get link topology node return status
+ * @pi: port information structure
+ * @node_type: requested node type
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get link topology node return status for specified node type (0x06E0)
+ *
+ * Node type cage can be used to determine if cage is present. If AQC
+ * returns error (ENOENT), then no cage present. If no cage present, then
+ * connection type is backplane or BASE-T.
+ */
+static int
+ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type,
+			    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_link_topo *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.get_link_topo;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+
+	cmd->addr.topo_params.node_type_ctx =
+		(ICE_AQC_LINK_TOPO_NODE_CTX_PORT <<
+		 ICE_AQC_LINK_TOPO_NODE_CTX_S);
+
+	/* set node type */
+	cmd->addr.topo_params.node_type_ctx |=
+		(ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type);
+
+	return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_is_media_cage_present
+ * @pi: port information structure
+ *
+ * Returns true if media cage is present, else false. If no cage, then
+ * media type is backplane or BASE-T.
+ */
+static bool ice_is_media_cage_present(struct ice_port_info *pi)
+{
+	/* Node type cage can be used to determine if cage is present. If AQC
+	 * returns error (ENOENT), then no cage present. If no cage present then
+	 * connection type is backplane or BASE-T.
+	 */
+	return !ice_aq_get_link_topo_handle(pi,
+					    ICE_AQC_LINK_TOPO_NODE_TYPE_CAGE,
+					    NULL);
+}
+
+/**
+ * ice_get_media_type - Gets media type
+ * @pi: port information structure
+ */
+static enum ice_media_type ice_get_media_type(struct ice_port_info *pi)
+{
+	struct ice_link_status *hw_link_info;
+
+	if (!pi)
+		return ICE_MEDIA_UNKNOWN;
+
+	hw_link_info = &pi->phy.link_info;
+	if (hw_link_info->phy_type_low && hw_link_info->phy_type_high)
+		/* If more than one media type is selected, report unknown */
+		return ICE_MEDIA_UNKNOWN;
+
+	if (hw_link_info->phy_type_low) {
+		/* 1G SGMII is a special case where some DA cable PHYs
+		 * may show this as an option when it really shouldn't
+		 * be since SGMII is meant to be between a MAC and a PHY
+		 * in a backplane. Try to detect this case and handle it
+		 */
+		if (hw_link_info->phy_type_low == ICE_PHY_TYPE_LOW_1G_SGMII &&
+		    (hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+		    ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE ||
+		    hw_link_info->module_type[ICE_AQC_MOD_TYPE_IDENT] ==
+		    ICE_AQC_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE))
+			return ICE_MEDIA_DA;
+
+		switch (hw_link_info->phy_type_low) {
+		case ICE_PHY_TYPE_LOW_1000BASE_SX:
+		case ICE_PHY_TYPE_LOW_1000BASE_LX:
+		case ICE_PHY_TYPE_LOW_10GBASE_SR:
+		case ICE_PHY_TYPE_LOW_10GBASE_LR:
+		case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		case ICE_PHY_TYPE_LOW_25GBASE_SR:
+		case ICE_PHY_TYPE_LOW_25GBASE_LR:
+		case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+		case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_SR:
+		case ICE_PHY_TYPE_LOW_50GBASE_FR:
+		case ICE_PHY_TYPE_LOW_50GBASE_LR:
+		case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+		case ICE_PHY_TYPE_LOW_100GBASE_DR:
+		case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+		case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+			return ICE_MEDIA_FIBER;
+		case ICE_PHY_TYPE_LOW_100BASE_TX:
+		case ICE_PHY_TYPE_LOW_1000BASE_T:
+		case ICE_PHY_TYPE_LOW_2500BASE_T:
+		case ICE_PHY_TYPE_LOW_5GBASE_T:
+		case ICE_PHY_TYPE_LOW_10GBASE_T:
+		case ICE_PHY_TYPE_LOW_25GBASE_T:
+			return ICE_MEDIA_BASET;
+		case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+		case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+		case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+		case ICE_PHY_TYPE_LOW_50GBASE_CP:
+		case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+		case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+			return ICE_MEDIA_DA;
+		case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		case ICE_PHY_TYPE_LOW_40G_XLAUI:
+		case ICE_PHY_TYPE_LOW_50G_LAUI2:
+		case ICE_PHY_TYPE_LOW_50G_AUI2:
+		case ICE_PHY_TYPE_LOW_50G_AUI1:
+		case ICE_PHY_TYPE_LOW_100G_AUI4:
+		case ICE_PHY_TYPE_LOW_100G_CAUI4:
+			if (ice_is_media_cage_present(pi))
+				return ICE_MEDIA_DA;
+			fallthrough;
+		case ICE_PHY_TYPE_LOW_1000BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		case ICE_PHY_TYPE_LOW_2500BASE_X:
+		case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+		case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+		case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+		case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+		case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+		case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+		case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+			return ICE_MEDIA_BACKPLANE;
+		}
+	} else {
+		switch (hw_link_info->phy_type_high) {
+		case ICE_PHY_TYPE_HIGH_100G_AUI2:
+		case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+			if (ice_is_media_cage_present(pi))
+				return ICE_MEDIA_DA;
+			fallthrough;
+		case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+			return ICE_MEDIA_BACKPLANE;
+		case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+		case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+			return ICE_MEDIA_FIBER;
+		}
+	}
+	return ICE_MEDIA_UNKNOWN;
+}
+
+/**
+ * ice_aq_get_link_info
+ * @pi: port information structure
+ * @ena_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Link Status (0x607). Returns the link status of the adapter.
+ */
+int
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_link_status_data link_data = { 0 };
+	struct ice_aqc_get_link_status *resp;
+	struct ice_link_status *li_old, *li;
+	enum ice_media_type *hw_media_type;
+	struct ice_fc_info *hw_fc_info;
+	bool tx_pause, rx_pause;
+	struct ice_aq_desc desc;
+	struct ice_hw *hw;
+	u16 cmd_flags;
+	int status;
+
+	if (!pi)
+		return -EINVAL;
+	hw = pi->hw;
+	li_old = &pi->phy.link_info_old;
+	hw_media_type = &pi->phy.media_type;
+	li = &pi->phy.link_info;
+	hw_fc_info = &pi->fc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_status);
+	cmd_flags = (ena_lse) ? ICE_AQ_LSE_ENA : ICE_AQ_LSE_DIS;
+	resp = &desc.params.get_link_status;
+	resp->cmd_flags = cpu_to_le16(cmd_flags);
+	resp->lport_num = pi->lport;
+
+	status = ice_aq_send_cmd(hw, &desc, &link_data, sizeof(link_data), cd);
+
+	if (status)
+		return status;
+
+	/* save off old link status information */
+	*li_old = *li;
+
+	/* update current link status information */
+	li->link_speed = le16_to_cpu(link_data.link_speed);
+	li->phy_type_low = le64_to_cpu(link_data.phy_type_low);
+	li->phy_type_high = le64_to_cpu(link_data.phy_type_high);
+	*hw_media_type = ice_get_media_type(pi);
+	li->link_info = link_data.link_info;
+	li->link_cfg_err = link_data.link_cfg_err;
+	li->an_info = link_data.an_info;
+	li->ext_info = link_data.ext_info;
+	li->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+	li->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+	li->topo_media_conflict = link_data.topo_media_conflict;
+	li->pacing = link_data.cfg & (ICE_AQ_CFG_PACING_M |
+				      ICE_AQ_CFG_PACING_TYPE_M);
+
+	/* update fc info */
+	tx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_TX);
+	rx_pause = !!(link_data.an_info & ICE_AQ_LINK_PAUSE_RX);
+	if (tx_pause && rx_pause)
+		hw_fc_info->current_mode = ICE_FC_FULL;
+	else if (tx_pause)
+		hw_fc_info->current_mode = ICE_FC_TX_PAUSE;
+	else if (rx_pause)
+		hw_fc_info->current_mode = ICE_FC_RX_PAUSE;
+	else
+		hw_fc_info->current_mode = ICE_FC_NONE;
+
+	li->lse_ena = !!(resp->cmd_flags & cpu_to_le16(ICE_AQ_LSE_IS_ENABLED));
+
+	ice_debug(hw, ICE_DBG_LINK, "get link info\n");
+	ice_debug(hw, ICE_DBG_LINK, "	link_speed = 0x%x\n", li->link_speed);
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_low = 0x%llx\n",
+		  (unsigned long long)li->phy_type_low);
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_high = 0x%llx\n",
+		  (unsigned long long)li->phy_type_high);
+	ice_debug(hw, ICE_DBG_LINK, "	media_type = 0x%x\n", *hw_media_type);
+	ice_debug(hw, ICE_DBG_LINK, "	link_info = 0x%x\n", li->link_info);
+	ice_debug(hw, ICE_DBG_LINK, "	link_cfg_err = 0x%x\n", li->link_cfg_err);
+	ice_debug(hw, ICE_DBG_LINK, "	an_info = 0x%x\n", li->an_info);
+	ice_debug(hw, ICE_DBG_LINK, "	ext_info = 0x%x\n", li->ext_info);
+	ice_debug(hw, ICE_DBG_LINK, "	fec_info = 0x%x\n", li->fec_info);
+	ice_debug(hw, ICE_DBG_LINK, "	lse_ena = 0x%x\n", li->lse_ena);
+	ice_debug(hw, ICE_DBG_LINK, "	max_frame = 0x%x\n",
+		  li->max_frame_size);
+	ice_debug(hw, ICE_DBG_LINK, "	pacing = 0x%x\n", li->pacing);
+
+	/* save link status information */
+	if (link)
+		*link = *li;
+
+	/* flag cleared so calling functions don't call AQ again */
+	pi->phy.get_link_info = false;
+
+	return 0;
+}
+
+/**
+ * ice_fill_tx_timer_and_fc_thresh
+ * @hw: pointer to the HW struct
+ * @cmd: pointer to MAC cfg structure
+ *
+ * Add Tx timer and FC refresh threshold info to Set MAC Config AQ command
+ * descriptor
+ */
+static void
+ice_fill_tx_timer_and_fc_thresh(struct ice_hw *hw,
+				struct ice_aqc_set_mac_cfg *cmd)
+{
+	u16 fc_thres_val, tx_timer_val;
+	u32 val;
+
+	/* We read back the transmit timer and FC threshold value of
+	 * LFC. Thus, we will use index =
+	 * PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX.
+	 *
+	 * Also, because we are operating on transmit timer and FC
+	 * threshold of LFC, we don't turn on any bit in tx_tmr_priority
+	 */
+#define IDX_OF_LFC PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX
+
+	/* Retrieve the transmit timer */
+	val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(IDX_OF_LFC));
+	tx_timer_val = val &
+		PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M;
+	cmd->tx_tmr_value = cpu_to_le16(tx_timer_val);
+
+	/* Retrieve the FC threshold */
+	val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(IDX_OF_LFC));
+	fc_thres_val = val & PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M;
+
+	cmd->fc_refresh_threshold = cpu_to_le16(fc_thres_val);
+}
+
+/**
+ * ice_aq_set_mac_cfg
+ * @hw: pointer to the HW struct
+ * @max_frame_size: Maximum Frame Size to be supported
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set MAC configuration (0x0603)
+ */
+int
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_cfg *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_cfg;
+
+	if (max_frame_size == 0)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_cfg);
+
+	cmd->max_frame_size = cpu_to_le16(max_frame_size);
+
+	ice_fill_tx_timer_and_fc_thresh(hw, cmd);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_init_fltr_mgmt_struct - initializes filter management list and locks
+ * @hw: pointer to the HW struct
+ */
+static int ice_init_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw;
+	int status;
+
+	hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw),
+				       sizeof(*hw->switch_info), GFP_KERNEL);
+	sw = hw->switch_info;
+
+	if (!sw)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&sw->vsi_list_map_head);
+	sw->prof_res_bm_init = 0;
+
+	status = ice_init_def_sw_recp(hw);
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), hw->switch_info);
+		return status;
+	}
+	return 0;
+}
+
+/**
+ * ice_cleanup_fltr_mgmt_struct - cleanup filter management list and locks
+ * @hw: pointer to the HW struct
+ */
+static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_pos_map;
+	struct ice_vsi_list_map_info *v_tmp_map;
+	struct ice_sw_recipe *recps;
+	u8 i;
+
+	list_for_each_entry_safe(v_pos_map, v_tmp_map, &sw->vsi_list_map_head,
+				 list_entry) {
+		list_del(&v_pos_map->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), v_pos_map);
+	}
+	recps = sw->recp_list;
+	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+		struct ice_recp_grp_entry *rg_entry, *tmprg_entry;
+
+		recps[i].root_rid = i;
+		list_for_each_entry_safe(rg_entry, tmprg_entry,
+					 &recps[i].rg_list, l_entry) {
+			list_del(&rg_entry->l_entry);
+			devm_kfree(ice_hw_to_dev(hw), rg_entry);
+		}
+
+		if (recps[i].adv_rule) {
+			struct ice_adv_fltr_mgmt_list_entry *tmp_entry;
+			struct ice_adv_fltr_mgmt_list_entry *lst_itr;
+
+			mutex_destroy(&recps[i].filt_rule_lock);
+			list_for_each_entry_safe(lst_itr, tmp_entry,
+						 &recps[i].filt_rules,
+						 list_entry) {
+				list_del(&lst_itr->list_entry);
+				devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups);
+				devm_kfree(ice_hw_to_dev(hw), lst_itr);
+			}
+		} else {
+			struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry;
+
+			mutex_destroy(&recps[i].filt_rule_lock);
+			list_for_each_entry_safe(lst_itr, tmp_entry,
+						 &recps[i].filt_rules,
+						 list_entry) {
+				list_del(&lst_itr->list_entry);
+				devm_kfree(ice_hw_to_dev(hw), lst_itr);
+			}
+		}
+		devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf);
+	}
+	ice_rm_all_sw_replay_rule_info(hw);
+	devm_kfree(ice_hw_to_dev(hw), sw->recp_list);
+	devm_kfree(ice_hw_to_dev(hw), sw);
+}
+
+/**
+ * ice_get_fw_log_cfg - get FW logging configuration
+ * @hw: pointer to the HW struct
+ */
+static int ice_get_fw_log_cfg(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+	__le16 *config;
+	int status;
+	u16 size;
+
+	size = sizeof(*config) * ICE_AQC_FW_LOG_ID_MAX;
+	config = kzalloc(size, GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info);
+
+	status = ice_aq_send_cmd(hw, &desc, config, size, NULL);
+	if (!status) {
+		u16 i;
+
+		/* Save FW logging information into the HW structure */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 v, m, flgs;
+
+			v = le16_to_cpu(config[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S;
+
+			if (m < ICE_AQC_FW_LOG_ID_MAX)
+				hw->fw_log.evnts[m].cur = flgs;
+		}
+	}
+
+	kfree(config);
+
+	return status;
+}
+
+/**
+ * ice_cfg_fw_log - configure FW logging
+ * @hw: pointer to the HW struct
+ * @enable: enable certain FW logging events if true, disable all if false
+ *
+ * This function enables/disables the FW logging via Rx CQ events and a UART
+ * port based on predetermined configurations. FW logging via the Rx CQ can be
+ * enabled/disabled for individual PF's. However, FW logging via the UART can
+ * only be enabled/disabled for all PFs on the same device.
+ *
+ * To enable overall FW logging, the "cq_en" and "uart_en" enable bits in
+ * hw->fw_log need to be set accordingly, e.g. based on user-provided input,
+ * before initializing the device.
+ *
+ * When re/configuring FW logging, callers need to update the "cfg" elements of
+ * the hw->fw_log.evnts array with the desired logging event configurations for
+ * modules of interest. When disabling FW logging completely, the callers can
+ * just pass false in the "enable" parameter. On completion, the function will
+ * update the "cur" element of the hw->fw_log.evnts array with the resulting
+ * logging event configurations of the modules that are being re/configured. FW
+ * logging modules that are not part of a reconfiguration operation retain their
+ * previous states.
+ *
+ * Before resetting the device, it is recommended that the driver disables FW
+ * logging before shutting down the control queue. When disabling FW logging
+ * ("enable" = false), the latest configurations of FW logging events stored in
+ * hw->fw_log.evnts[] are not overridden to allow them to be reconfigured after
+ * a device reset.
+ *
+ * When enabling FW logging to emit log messages via the Rx CQ during the
+ * device's initialization phase, a mechanism alternative to interrupt handlers
+ * needs to be used to extract FW log messages from the Rx CQ periodically and
+ * to prevent the Rx CQ from being full and stalling other types of control
+ * messages from FW to SW. Interrupts are typically disabled during the device's
+ * initialization phase.
+ */
+static int ice_cfg_fw_log(struct ice_hw *hw, bool enable)
+{
+	struct ice_aqc_fw_logging *cmd;
+	u16 i, chgs = 0, len = 0;
+	struct ice_aq_desc desc;
+	__le16 *data = NULL;
+	u8 actv_evnts = 0;
+	void *buf = NULL;
+	int status = 0;
+
+	if (!hw->fw_log.cq_en && !hw->fw_log.uart_en)
+		return 0;
+
+	/* Disable FW logging only when the control queue is still responsive */
+	if (!enable &&
+	    (!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq)))
+		return 0;
+
+	/* Get current FW log settings */
+	status = ice_get_fw_log_cfg(hw);
+	if (status)
+		return status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging);
+	cmd = &desc.params.fw_logging;
+
+	/* Indicate which controls are valid */
+	if (hw->fw_log.cq_en)
+		cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_AQ_VALID;
+
+	if (hw->fw_log.uart_en)
+		cmd->log_ctrl_valid |= ICE_AQC_FW_LOG_UART_VALID;
+
+	if (enable) {
+		/* Fill in an array of entries with FW logging modules and
+		 * logging events being reconfigured.
+		 */
+		for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+			u16 val;
+
+			/* Keep track of enabled event types */
+			actv_evnts |= hw->fw_log.evnts[i].cfg;
+
+			if (hw->fw_log.evnts[i].cfg == hw->fw_log.evnts[i].cur)
+				continue;
+
+			if (!data) {
+				data = devm_kcalloc(ice_hw_to_dev(hw),
+						    ICE_AQC_FW_LOG_ID_MAX,
+						    sizeof(*data),
+						    GFP_KERNEL);
+				if (!data)
+					return -ENOMEM;
+			}
+
+			val = i << ICE_AQC_FW_LOG_ID_S;
+			val |= hw->fw_log.evnts[i].cfg << ICE_AQC_FW_LOG_EN_S;
+			data[chgs++] = cpu_to_le16(val);
+		}
+
+		/* Only enable FW logging if at least one module is specified.
+		 * If FW logging is currently enabled but all modules are not
+		 * enabled to emit log messages, disable FW logging altogether.
+		 */
+		if (actv_evnts) {
+			/* Leave if there is effectively no change */
+			if (!chgs)
+				goto out;
+
+			if (hw->fw_log.cq_en)
+				cmd->log_ctrl |= ICE_AQC_FW_LOG_AQ_EN;
+
+			if (hw->fw_log.uart_en)
+				cmd->log_ctrl |= ICE_AQC_FW_LOG_UART_EN;
+
+			buf = data;
+			len = sizeof(*data) * chgs;
+			desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+		}
+	}
+
+	status = ice_aq_send_cmd(hw, &desc, buf, len, NULL);
+	if (!status) {
+		/* Update the current configuration to reflect events enabled.
+		 * hw->fw_log.cq_en and hw->fw_log.uart_en indicate if the FW
+		 * logging mode is enabled for the device. They do not reflect
+		 * actual modules being enabled to emit log messages. So, their
+		 * values remain unchanged even when all modules are disabled.
+		 */
+		u16 cnt = enable ? chgs : (u16)ICE_AQC_FW_LOG_ID_MAX;
+
+		hw->fw_log.actv_evnts = actv_evnts;
+		for (i = 0; i < cnt; i++) {
+			u16 v, m;
+
+			if (!enable) {
+				/* When disabling all FW logging events as part
+				 * of device's de-initialization, the original
+				 * configurations are retained, and can be used
+				 * to reconfigure FW logging later if the device
+				 * is re-initialized.
+				 */
+				hw->fw_log.evnts[i].cur = 0;
+				continue;
+			}
+
+			v = le16_to_cpu(data[i]);
+			m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+			hw->fw_log.evnts[m].cur = hw->fw_log.evnts[m].cfg;
+		}
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), data);
+
+	return status;
+}
+
+/**
+ * ice_output_fw_log
+ * @hw: pointer to the HW struct
+ * @desc: pointer to the AQ message descriptor
+ * @buf: pointer to the buffer accompanying the AQ message
+ *
+ * Formats a FW Log message and outputs it via the standard driver logs.
+ */
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
+{
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n");
+	ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf,
+			le16_to_cpu(desc->datalen));
+	ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n");
+}
+
+/**
+ * ice_get_itr_intrl_gran
+ * @hw: pointer to the HW struct
+ *
+ * Determines the ITR/INTRL granularities based on the maximum aggregate
+ * bandwidth according to the device's configuration during power-on.
+ */
+static void ice_get_itr_intrl_gran(struct ice_hw *hw)
+{
+	u8 max_agg_bw = (rd32(hw, GL_PWR_MODE_CTL) &
+			 GL_PWR_MODE_CTL_CAR_MAX_BW_M) >>
+			GL_PWR_MODE_CTL_CAR_MAX_BW_S;
+
+	switch (max_agg_bw) {
+	case ICE_MAX_AGG_BW_200G:
+	case ICE_MAX_AGG_BW_100G:
+	case ICE_MAX_AGG_BW_50G:
+		hw->itr_gran = ICE_ITR_GRAN_ABOVE_25;
+		hw->intrl_gran = ICE_INTRL_GRAN_ABOVE_25;
+		break;
+	case ICE_MAX_AGG_BW_25G:
+		hw->itr_gran = ICE_ITR_GRAN_MAX_25;
+		hw->intrl_gran = ICE_INTRL_GRAN_MAX_25;
+		break;
+	}
+}
+
+/**
+ * ice_init_hw - main hardware initialization routine
+ * @hw: pointer to the hardware structure
+ */
+int ice_init_hw(struct ice_hw *hw)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	u16 mac_buf_len;
+	void *mac_buf;
+	int status;
+
+	/* Set MAC type based on DeviceID */
+	status = ice_set_mac_type(hw);
+	if (status)
+		return status;
+
+	hw->pf_id = (u8)(rd32(hw, PF_FUNC_RID) &
+			 PF_FUNC_RID_FUNC_NUM_M) >>
+		PF_FUNC_RID_FUNC_NUM_S;
+
+	status = ice_reset(hw, ICE_RESET_PFR);
+	if (status)
+		return status;
+
+	ice_get_itr_intrl_gran(hw);
+
+	status = ice_create_all_ctrlq(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	/* Enable FW logging. Not fatal if this fails. */
+	status = ice_cfg_fw_log(hw, true);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to enable FW logging.\n");
+
+	status = ice_clear_pf_cfg(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	/* Set bit to enable Flow Director filters */
+	wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+	INIT_LIST_HEAD(&hw->fdir_list_head);
+
+	ice_clear_pxe_mode(hw);
+
+	status = ice_init_nvm(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	status = ice_get_caps(hw);
+	if (status)
+		goto err_unroll_cqinit;
+
+	if (!hw->port_info)
+		hw->port_info = devm_kzalloc(ice_hw_to_dev(hw),
+					     sizeof(*hw->port_info),
+					     GFP_KERNEL);
+	if (!hw->port_info) {
+		status = -ENOMEM;
+		goto err_unroll_cqinit;
+	}
+
+	/* set the back pointer to HW */
+	hw->port_info->hw = hw;
+
+	/* Initialize port_info struct with switch configuration data */
+	status = ice_get_initial_sw_cfg(hw);
+	if (status)
+		goto err_unroll_alloc;
+
+	hw->evb_veb = true;
+
+	/* init xarray for identifying scheduling nodes uniquely */
+	xa_init_flags(&hw->port_info->sched_node_ids, XA_FLAGS_ALLOC);
+
+	/* Query the allocated resources for Tx scheduler */
+	status = ice_sched_query_res_alloc(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_SCHED, "Failed to get scheduler allocated resources\n");
+		goto err_unroll_alloc;
+	}
+	ice_sched_get_psm_clk_freq(hw);
+
+	/* Initialize port_info struct with scheduler data */
+	status = ice_sched_init_port(hw->port_info);
+	if (status)
+		goto err_unroll_sched;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps) {
+		status = -ENOMEM;
+		goto err_unroll_sched;
+	}
+
+	/* Initialize port_info struct with PHY capabilities */
+	status = ice_aq_get_phy_caps(hw->port_info, false,
+				     ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
+				     NULL);
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	if (status)
+		dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n",
+			 status);
+
+	/* Initialize port_info struct with link information */
+	status = ice_aq_get_link_info(hw->port_info, false, NULL, NULL);
+	if (status)
+		goto err_unroll_sched;
+
+	/* need a valid SW entry point to build a Tx tree */
+	if (!hw->sw_entry_point_layer) {
+		ice_debug(hw, ICE_DBG_SCHED, "invalid sw entry point\n");
+		status = -EIO;
+		goto err_unroll_sched;
+	}
+	INIT_LIST_HEAD(&hw->agg_list);
+	/* Initialize max burst size */
+	if (!hw->max_burst_size)
+		ice_cfg_rl_burst_size(hw, ICE_SCHED_DFLT_BURST_SIZE);
+
+	status = ice_init_fltr_mgmt_struct(hw);
+	if (status)
+		goto err_unroll_sched;
+
+	/* Get MAC information */
+	/* A single port can report up to two (LAN and WoL) addresses */
+	mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
+			       sizeof(struct ice_aqc_manage_mac_read_resp),
+			       GFP_KERNEL);
+	mac_buf_len = 2 * sizeof(struct ice_aqc_manage_mac_read_resp);
+
+	if (!mac_buf) {
+		status = -ENOMEM;
+		goto err_unroll_fltr_mgmt_struct;
+	}
+
+	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
+	devm_kfree(ice_hw_to_dev(hw), mac_buf);
+
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	/* enable jumbo frame support at MAC level */
+	status = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	/* Obtain counter base index which would be used by flow director */
+	status = ice_alloc_fd_res_cntr(hw, &hw->fd_ctr_base);
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	status = ice_init_hw_tbls(hw);
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	mutex_init(&hw->tnl_lock);
+	return 0;
+
+err_unroll_fltr_mgmt_struct:
+	ice_cleanup_fltr_mgmt_struct(hw);
+err_unroll_sched:
+	ice_sched_cleanup_all(hw);
+err_unroll_alloc:
+	devm_kfree(ice_hw_to_dev(hw), hw->port_info);
+err_unroll_cqinit:
+	ice_destroy_all_ctrlq(hw);
+	return status;
+}
+
+/**
+ * ice_deinit_hw - unroll initialization operations done by ice_init_hw
+ * @hw: pointer to the hardware structure
+ *
+ * This should be called only during nominal operation, not as a result of
+ * ice_init_hw() failing since ice_init_hw() will take care of unrolling
+ * applicable initializations if it fails for any reason.
+ */
+void ice_deinit_hw(struct ice_hw *hw)
+{
+	ice_free_fd_res_cntr(hw, hw->fd_ctr_base);
+	ice_cleanup_fltr_mgmt_struct(hw);
+
+	ice_sched_cleanup_all(hw);
+	ice_sched_clear_agg(hw);
+	ice_free_seg(hw);
+	ice_free_hw_tbls(hw);
+	mutex_destroy(&hw->tnl_lock);
+
+	/* Attempt to disable FW logging before shutting down control queues */
+	ice_cfg_fw_log(hw, false);
+	ice_destroy_all_ctrlq(hw);
+
+	/* Clear VSI contexts if not already cleared */
+	ice_clear_all_vsi_ctx(hw);
+}
+
+/**
+ * ice_check_reset - Check to see if a global reset is complete
+ * @hw: pointer to the hardware structure
+ */
+int ice_check_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg = 0, grst_timeout, uld_mask;
+
+	/* Poll for Device Active state in case a recent CORER, GLOBR,
+	 * or EMPR has occurred. The grst delay value is in 100ms units.
+	 * Add 1sec for outstanding AQ commands that can take a long time.
+	 */
+	grst_timeout = ((rd32(hw, GLGEN_RSTCTL) & GLGEN_RSTCTL_GRSTDEL_M) >>
+			GLGEN_RSTCTL_GRSTDEL_S) + 10;
+
+	for (cnt = 0; cnt < grst_timeout; cnt++) {
+		mdelay(100);
+		reg = rd32(hw, GLGEN_RSTAT);
+		if (!(reg & GLGEN_RSTAT_DEVSTATE_M))
+			break;
+	}
+
+	if (cnt == grst_timeout) {
+		ice_debug(hw, ICE_DBG_INIT, "Global reset polling failed to complete.\n");
+		return -EIO;
+	}
+
+#define ICE_RESET_DONE_MASK	(GLNVM_ULD_PCIER_DONE_M |\
+				 GLNVM_ULD_PCIER_DONE_1_M |\
+				 GLNVM_ULD_CORER_DONE_M |\
+				 GLNVM_ULD_GLOBR_DONE_M |\
+				 GLNVM_ULD_POR_DONE_M |\
+				 GLNVM_ULD_POR_DONE_1_M |\
+				 GLNVM_ULD_PCIER_DONE_2_M)
+
+	uld_mask = ICE_RESET_DONE_MASK | (hw->func_caps.common_cap.rdma ?
+					  GLNVM_ULD_PE_DONE_M : 0);
+
+	/* Device is Active; check Global Reset processes are done */
+	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, GLNVM_ULD) & uld_mask;
+		if (reg == uld_mask) {
+			ice_debug(hw, ICE_DBG_INIT, "Global reset processes done. %d\n", cnt);
+			break;
+		}
+		mdelay(10);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT, "Wait for Reset Done timed out. GLNVM_ULD = 0x%x\n",
+			  reg);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * If a global reset has been triggered, this function checks
+ * for its completion and then issues the PF reset
+ */
+static int ice_pf_reset(struct ice_hw *hw)
+{
+	u32 cnt, reg;
+
+	/* If at function entry a global reset was already in progress, i.e.
+	 * state is not 'device active' or any of the reset done bits are not
+	 * set in GLNVM_ULD, there is no need for a PF Reset; poll until the
+	 * global reset is done.
+	 */
+	if ((rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_DEVSTATE_M) ||
+	    (rd32(hw, GLNVM_ULD) & ICE_RESET_DONE_MASK) ^ ICE_RESET_DONE_MASK) {
+		/* poll on global reset currently in progress until done */
+		if (ice_check_reset(hw))
+			return -EIO;
+
+		return 0;
+	}
+
+	/* Reset the PF */
+	reg = rd32(hw, PFGEN_CTRL);
+
+	wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M));
+
+	/* Wait for the PFR to complete. The wait time is the global config lock
+	 * timeout plus the PFR timeout which will account for a possible reset
+	 * that is occurring during a download package operation.
+	 */
+	for (cnt = 0; cnt < ICE_GLOBAL_CFG_LOCK_TIMEOUT +
+	     ICE_PF_RESET_WAIT_COUNT; cnt++) {
+		reg = rd32(hw, PFGEN_CTRL);
+		if (!(reg & PFGEN_CTRL_PFSWR_M))
+			break;
+
+		mdelay(1);
+	}
+
+	if (cnt == ICE_PF_RESET_WAIT_COUNT) {
+		ice_debug(hw, ICE_DBG_INIT, "PF reset polling failed to complete.\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reset - Perform different types of reset
+ * @hw: pointer to the hardware structure
+ * @req: reset request
+ *
+ * This function triggers a reset as specified by the req parameter.
+ *
+ * Note:
+ * If anything other than a PF reset is triggered, PXE mode is restored.
+ * This has to be cleared using ice_clear_pxe_mode again, once the AQ
+ * interface has been restored in the rebuild flow.
+ */
+int ice_reset(struct ice_hw *hw, enum ice_reset_req req)
+{
+	u32 val = 0;
+
+	switch (req) {
+	case ICE_RESET_PFR:
+		return ice_pf_reset(hw);
+	case ICE_RESET_CORER:
+		ice_debug(hw, ICE_DBG_INIT, "CoreR requested\n");
+		val = GLGEN_RTRIG_CORER_M;
+		break;
+	case ICE_RESET_GLOBR:
+		ice_debug(hw, ICE_DBG_INIT, "GlobalR requested\n");
+		val = GLGEN_RTRIG_GLOBR_M;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	val |= rd32(hw, GLGEN_RTRIG);
+	wr32(hw, GLGEN_RTRIG, val);
+	ice_flush(hw);
+
+	/* wait for the FW to be ready */
+	return ice_check_reset(hw);
+}
+
+/**
+ * ice_copy_rxq_ctx_to_hw
+ * @hw: pointer to the hardware structure
+ * @ice_rxq_ctx: pointer to the rxq context
+ * @rxq_index: the index of the Rx queue
+ *
+ * Copies rxq context from dense structure to HW register space
+ */
+static int
+ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index)
+{
+	u8 i;
+
+	if (!ice_rxq_ctx)
+		return -EINVAL;
+
+	if (rxq_index > QRX_CTRL_MAX_INDEX)
+		return -EINVAL;
+
+	/* Copy each dword separately to HW */
+	for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) {
+		wr32(hw, QRX_CONTEXT(i, rxq_index),
+		     *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+
+		ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i,
+			  *((u32 *)(ice_rxq_ctx + (i * sizeof(u32)))));
+	}
+
+	return 0;
+}
+
+/* LAN Rx Queue Context */
+static const struct ice_ctx_ele ice_rlan_ctx_info[] = {
+	/* Field		Width	LSB */
+	ICE_CTX_STORE(ice_rlan_ctx, head,		13,	0),
+	ICE_CTX_STORE(ice_rlan_ctx, cpuid,		8,	13),
+	ICE_CTX_STORE(ice_rlan_ctx, base,		57,	32),
+	ICE_CTX_STORE(ice_rlan_ctx, qlen,		13,	89),
+	ICE_CTX_STORE(ice_rlan_ctx, dbuf,		7,	102),
+	ICE_CTX_STORE(ice_rlan_ctx, hbuf,		5,	109),
+	ICE_CTX_STORE(ice_rlan_ctx, dtype,		2,	114),
+	ICE_CTX_STORE(ice_rlan_ctx, dsize,		1,	116),
+	ICE_CTX_STORE(ice_rlan_ctx, crcstrip,		1,	117),
+	ICE_CTX_STORE(ice_rlan_ctx, l2tsel,		1,	119),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_0,		4,	120),
+	ICE_CTX_STORE(ice_rlan_ctx, hsplit_1,		2,	124),
+	ICE_CTX_STORE(ice_rlan_ctx, showiv,		1,	127),
+	ICE_CTX_STORE(ice_rlan_ctx, rxmax,		14,	174),
+	ICE_CTX_STORE(ice_rlan_ctx, tphrdesc_ena,	1,	193),
+	ICE_CTX_STORE(ice_rlan_ctx, tphwdesc_ena,	1,	194),
+	ICE_CTX_STORE(ice_rlan_ctx, tphdata_ena,	1,	195),
+	ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena,	1,	196),
+	ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh,		3,	198),
+	ICE_CTX_STORE(ice_rlan_ctx, prefena,		1,	201),
+	{ 0 }
+};
+
+/**
+ * ice_write_rxq_ctx
+ * @hw: pointer to the hardware structure
+ * @rlan_ctx: pointer to the rxq context
+ * @rxq_index: the index of the Rx queue
+ *
+ * Converts rxq context from sparse to dense structure and then writes
+ * it to HW register space and enables the hardware to prefetch descriptors
+ * instead of only fetching them on demand
+ */
+int
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index)
+{
+	u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 };
+
+	if (!rlan_ctx)
+		return -EINVAL;
+
+	rlan_ctx->prefena = 1;
+
+	ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
+	return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
+}
+
+/* LAN Tx Queue Context */
+const struct ice_ctx_ele ice_tlan_ctx_info[] = {
+				    /* Field			Width	LSB */
+	ICE_CTX_STORE(ice_tlan_ctx, base,			57,	0),
+	ICE_CTX_STORE(ice_tlan_ctx, port_num,			3,	57),
+	ICE_CTX_STORE(ice_tlan_ctx, cgd_num,			5,	60),
+	ICE_CTX_STORE(ice_tlan_ctx, pf_num,			3,	65),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_num,			10,	68),
+	ICE_CTX_STORE(ice_tlan_ctx, vmvf_type,			2,	78),
+	ICE_CTX_STORE(ice_tlan_ctx, src_vsi,			10,	80),
+	ICE_CTX_STORE(ice_tlan_ctx, tsyn_ena,			1,	90),
+	ICE_CTX_STORE(ice_tlan_ctx, internal_usage_flag,	1,	91),
+	ICE_CTX_STORE(ice_tlan_ctx, alt_vlan,			1,	92),
+	ICE_CTX_STORE(ice_tlan_ctx, cpuid,			8,	93),
+	ICE_CTX_STORE(ice_tlan_ctx, wb_mode,			1,	101),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd_desc,			1,	102),
+	ICE_CTX_STORE(ice_tlan_ctx, tphrd,			1,	103),
+	ICE_CTX_STORE(ice_tlan_ctx, tphwr_desc,			1,	104),
+	ICE_CTX_STORE(ice_tlan_ctx, cmpq_id,			9,	105),
+	ICE_CTX_STORE(ice_tlan_ctx, qnum_in_func,		14,	114),
+	ICE_CTX_STORE(ice_tlan_ctx, itr_notification_mode,	1,	128),
+	ICE_CTX_STORE(ice_tlan_ctx, adjust_prof_id,		6,	129),
+	ICE_CTX_STORE(ice_tlan_ctx, qlen,			13,	135),
+	ICE_CTX_STORE(ice_tlan_ctx, quanta_prof_idx,		4,	148),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_ena,			1,	152),
+	ICE_CTX_STORE(ice_tlan_ctx, tso_qnum,			11,	153),
+	ICE_CTX_STORE(ice_tlan_ctx, legacy_int,			1,	164),
+	ICE_CTX_STORE(ice_tlan_ctx, drop_ena,			1,	165),
+	ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx,		2,	166),
+	ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx,	3,	168),
+	ICE_CTX_STORE(ice_tlan_ctx, int_q_state,		122,	171),
+	{ 0 }
+};
+
+/* Sideband Queue command wrappers */
+
+/**
+ * ice_sbq_send_cmd - send Sideband Queue command to Sideband Queue
+ * @hw: pointer to the HW struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ */
+static int
+ice_sbq_send_cmd(struct ice_hw *hw, struct ice_sbq_cmd_desc *desc,
+		 void *buf, u16 buf_size, struct ice_sq_cd *cd)
+{
+	return ice_sq_send_cmd(hw, ice_get_sbq(hw),
+			       (struct ice_aq_desc *)desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_sbq_rw_reg - Fill Sideband Queue command
+ * @hw: pointer to the HW struct
+ * @in: message info to be filled in descriptor
+ */
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in)
+{
+	struct ice_sbq_cmd_desc desc = {0};
+	struct ice_sbq_msg_req msg = {0};
+	u16 msg_len;
+	int status;
+
+	msg_len = sizeof(msg);
+
+	msg.dest_dev = in->dest_dev;
+	msg.opcode = in->opcode;
+	msg.flags = ICE_SBQ_MSG_FLAGS;
+	msg.sbe_fbe = ICE_SBQ_MSG_SBE_FBE;
+	msg.msg_addr_low = cpu_to_le16(in->msg_addr_low);
+	msg.msg_addr_high = cpu_to_le32(in->msg_addr_high);
+
+	if (in->opcode)
+		msg.data = cpu_to_le32(in->data);
+	else
+		/* data read comes back in completion, so shorten the struct by
+		 * sizeof(msg.data)
+		 */
+		msg_len -= sizeof(msg.data);
+
+	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
+	desc.opcode = cpu_to_le16(ice_sbq_opc_neigh_dev_req);
+	desc.param0.cmd_len = cpu_to_le16(msg_len);
+	status = ice_sbq_send_cmd(hw, &desc, &msg, msg_len, NULL);
+	if (!status && !in->opcode)
+		in->data = le32_to_cpu
+			(((struct ice_sbq_msg_cmpl *)&msg)->data);
+	return status;
+}
+
+/* FW Admin Queue command wrappers */
+
+/* Software lock/mutex that is meant to be held while the Global Config Lock
+ * in firmware is acquired by the software to prevent most (but not all) types
+ * of AQ commands from being sent to FW
+ */
+DEFINE_MUTEX(ice_global_cfg_lock_sw);
+
+/**
+ * ice_should_retry_sq_send_cmd
+ * @opcode: AQ opcode
+ *
+ * Decide if we should retry the send command routine for the ATQ, depending
+ * on the opcode.
+ */
+static bool ice_should_retry_sq_send_cmd(u16 opcode)
+{
+	switch (opcode) {
+	case ice_aqc_opc_get_link_topo:
+	case ice_aqc_opc_lldp_stop:
+	case ice_aqc_opc_lldp_start:
+	case ice_aqc_opc_lldp_filter_ctrl:
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_sq_send_cmd_retry - send command to Control Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Retry sending the FW Admin Queue command, multiple times, to the FW Admin
+ * Queue if the EBUSY AQ error is returned.
+ */
+static int
+ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		      struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc_cpy;
+	bool is_cmd_for_retry;
+	u8 idx = 0;
+	u16 opcode;
+	int status;
+
+	opcode = le16_to_cpu(desc->opcode);
+	is_cmd_for_retry = ice_should_retry_sq_send_cmd(opcode);
+	memset(&desc_cpy, 0, sizeof(desc_cpy));
+
+	if (is_cmd_for_retry) {
+		/* All retryable cmds are direct, without buf. */
+		WARN_ON(buf);
+
+		memcpy(&desc_cpy, desc, sizeof(desc_cpy));
+	}
+
+	do {
+		status = ice_sq_send_cmd(hw, cq, desc, buf, buf_size, cd);
+
+		if (!is_cmd_for_retry || !status ||
+		    hw->adminq.sq_last_status != ICE_AQ_RC_EBUSY)
+			break;
+
+		memcpy(desc, &desc_cpy, sizeof(desc_cpy));
+
+		msleep(ICE_SQ_SEND_DELAY_TIME_MS);
+
+	} while (++idx < ICE_SQ_SEND_MAX_EXECUTE);
+
+	return status;
+}
+
+/**
+ * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
+ * @hw: pointer to the HW struct
+ * @desc: descriptor describing the command
+ * @buf: buffer to use for indirect commands (NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * Helper function to send FW Admin Queue commands to the FW Admin Queue.
+ */
+int
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf,
+		u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd = &desc->params.res_owner;
+	bool lock_acquired = false;
+	int status;
+
+	/* When a package download is in process (i.e. when the firmware's
+	 * Global Configuration Lock resource is held), only the Download
+	 * Package, Get Version, Get Package Info List, Upload Section,
+	 * Update Package, Set Port Parameters, Get/Set VLAN Mode Parameters,
+	 * Add Recipe, Set Recipes to Profile Association, Get Recipe, and Get
+	 * Recipes to Profile Association, and Release Resource (with resource
+	 * ID set to Global Config Lock) AdminQ commands are allowed; all others
+	 * must block until the package download completes and the Global Config
+	 * Lock is released.  See also ice_acquire_global_cfg_lock().
+	 */
+	switch (le16_to_cpu(desc->opcode)) {
+	case ice_aqc_opc_download_pkg:
+	case ice_aqc_opc_get_pkg_info_list:
+	case ice_aqc_opc_get_ver:
+	case ice_aqc_opc_upload_section:
+	case ice_aqc_opc_update_pkg:
+	case ice_aqc_opc_set_port_params:
+	case ice_aqc_opc_get_vlan_mode_parameters:
+	case ice_aqc_opc_set_vlan_mode_parameters:
+	case ice_aqc_opc_add_recipe:
+	case ice_aqc_opc_recipe_to_profile:
+	case ice_aqc_opc_get_recipe:
+	case ice_aqc_opc_get_recipe_to_profile:
+		break;
+	case ice_aqc_opc_release_res:
+		if (le16_to_cpu(cmd->res_id) == ICE_AQC_RES_ID_GLBL_LOCK)
+			break;
+		fallthrough;
+	default:
+		mutex_lock(&ice_global_cfg_lock_sw);
+		lock_acquired = true;
+		break;
+	}
+
+	status = ice_sq_send_cmd_retry(hw, &hw->adminq, desc, buf, buf_size, cd);
+	if (lock_acquired)
+		mutex_unlock(&ice_global_cfg_lock_sw);
+
+	return status;
+}
+
+/**
+ * ice_aq_get_fw_ver
+ * @hw: pointer to the HW struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the firmware version (0x0001) from the admin queue commands
+ */
+int ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_ver *resp;
+	struct ice_aq_desc desc;
+	int status;
+
+	resp = &desc.params.get_ver;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_ver);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	if (!status) {
+		hw->fw_branch = resp->fw_branch;
+		hw->fw_maj_ver = resp->fw_major;
+		hw->fw_min_ver = resp->fw_minor;
+		hw->fw_patch = resp->fw_patch;
+		hw->fw_build = le32_to_cpu(resp->fw_build);
+		hw->api_branch = resp->api_branch;
+		hw->api_maj_ver = resp->api_major;
+		hw->api_min_ver = resp->api_minor;
+		hw->api_patch = resp->api_patch;
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_send_driver_ver
+ * @hw: pointer to the HW struct
+ * @dv: driver's major, minor version
+ * @cd: pointer to command details structure or NULL
+ *
+ * Send the driver version (0x0002) to the firmware
+ */
+int
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_driver_ver *cmd;
+	struct ice_aq_desc desc;
+	u16 len;
+
+	cmd = &desc.params.driver_ver;
+
+	if (!dv)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_ver);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->major_ver = dv->major_ver;
+	cmd->minor_ver = dv->minor_ver;
+	cmd->build_ver = dv->build_ver;
+	cmd->subbuild_ver = dv->subbuild_ver;
+
+	len = 0;
+	while (len < sizeof(dv->driver_string) &&
+	       isascii(dv->driver_string[len]) && dv->driver_string[len])
+		len++;
+
+	return ice_aq_send_cmd(hw, &desc, dv->driver_string, len, cd);
+}
+
+/**
+ * ice_aq_q_shutdown
+ * @hw: pointer to the HW struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well (0x0003).
+ */
+int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading)
+{
+	struct ice_aqc_q_shutdown *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.q_shutdown;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_q_shutdown);
+
+	if (unloading)
+		cmd->driver_unloading = ICE_AQC_DRIVER_UNLOADING;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_req_res
+ * @hw: pointer to the HW struct
+ * @res: resource ID
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests common resource using the admin queue commands (0x0008).
+ * When attempting to acquire the Global Config Lock, the driver can
+ * learn of three states:
+ *  1) 0 -         acquired lock, and can perform download package
+ *  2) -EIO -      did not get lock, driver should fail to load
+ *  3) -EALREADY - did not get lock, but another driver has
+ *                 successfully downloaded the package; the driver does
+ *                 not have to download the package and can continue
+ *                 loading
+ *
+ * Note that if the caller is in an acquire lock, perform action, release lock
+ * phase of operation, it is possible that the FW may detect a timeout and issue
+ * a CORER. In this case, the driver will receive a CORER interrupt and will
+ * have to determine its cause. The calling thread that is handling this flow
+ * will likely get an error propagated back to it indicating the Download
+ * Package, Update Package or the Release Resource AQ commands timed out.
+ */
+static int
+ice_aq_req_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+	       enum ice_aq_res_access_type access, u8 sdp_number, u32 *timeout,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd_resp;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd_resp = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_req_res);
+
+	cmd_resp->res_id = cpu_to_le16(res);
+	cmd_resp->access_type = cpu_to_le16(access);
+	cmd_resp->res_number = cpu_to_le32(sdp_number);
+	cmd_resp->timeout = cpu_to_le32(*timeout);
+	*timeout = 0;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	/* The completion specifies the maximum time in ms that the driver
+	 * may hold the resource in the Timeout field.
+	 */
+
+	/* Global config lock response utilizes an additional status field.
+	 *
+	 * If the Global config lock resource is held by some other driver, the
+	 * command completes with ICE_AQ_RES_GLBL_IN_PROG in the status field
+	 * and the timeout field indicates the maximum time the current owner
+	 * of the resource has to free it.
+	 */
+	if (res == ICE_GLOBAL_CFG_LOCK_RES_ID) {
+		if (le16_to_cpu(cmd_resp->status) == ICE_AQ_RES_GLBL_SUCCESS) {
+			*timeout = le32_to_cpu(cmd_resp->timeout);
+			return 0;
+		} else if (le16_to_cpu(cmd_resp->status) ==
+			   ICE_AQ_RES_GLBL_IN_PROG) {
+			*timeout = le32_to_cpu(cmd_resp->timeout);
+			return -EIO;
+		} else if (le16_to_cpu(cmd_resp->status) ==
+			   ICE_AQ_RES_GLBL_DONE) {
+			return -EALREADY;
+		}
+
+		/* invalid FW response, force a timeout immediately */
+		*timeout = 0;
+		return -EIO;
+	}
+
+	/* If the resource is held by some other driver, the command completes
+	 * with a busy return value and the timeout field indicates the maximum
+	 * time the current owner of the resource has to free it.
+	 */
+	if (!status || hw->adminq.sq_last_status == ICE_AQ_RC_EBUSY)
+		*timeout = le32_to_cpu(cmd_resp->timeout);
+
+	return status;
+}
+
+/**
+ * ice_aq_release_res
+ * @hw: pointer to the HW struct
+ * @res: resource ID
+ * @sdp_number: resource number
+ * @cd: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands (0x0009)
+ */
+static int
+ice_aq_release_res(struct ice_hw *hw, enum ice_aq_res_ids res, u8 sdp_number,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_req_res *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.res_owner;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_release_res);
+
+	cmd->res_id = cpu_to_le16(res);
+	cmd->res_number = cpu_to_le32(sdp_number);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_acquire_res
+ * @hw: pointer to the HW structure
+ * @res: resource ID
+ * @access: access type (read or write)
+ * @timeout: timeout in milliseconds
+ *
+ * This function will attempt to acquire the ownership of a resource.
+ */
+int
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access, u32 timeout)
+{
+#define ICE_RES_POLLING_DELAY_MS	10
+	u32 delay = ICE_RES_POLLING_DELAY_MS;
+	u32 time_left = timeout;
+	int status;
+
+	status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+	/* A return code of -EALREADY means that another driver has
+	 * previously acquired the resource and performed any necessary updates;
+	 * in this case the caller does not obtain the resource and has no
+	 * further work to do.
+	 */
+	if (status == -EALREADY)
+		goto ice_acquire_res_exit;
+
+	if (status)
+		ice_debug(hw, ICE_DBG_RES, "resource %d acquire type %d failed.\n", res, access);
+
+	/* If necessary, poll until the current lock owner timeouts */
+	timeout = time_left;
+	while (status && timeout && time_left) {
+		mdelay(delay);
+		timeout = (timeout > delay) ? timeout - delay : 0;
+		status = ice_aq_req_res(hw, res, access, 0, &time_left, NULL);
+
+		if (status == -EALREADY)
+			/* lock free, but no work to do */
+			break;
+
+		if (!status)
+			/* lock acquired */
+			break;
+	}
+	if (status && status != -EALREADY)
+		ice_debug(hw, ICE_DBG_RES, "resource acquire timed out.\n");
+
+ice_acquire_res_exit:
+	if (status == -EALREADY) {
+		if (access == ICE_RES_WRITE)
+			ice_debug(hw, ICE_DBG_RES, "resource indicates no work to do.\n");
+		else
+			ice_debug(hw, ICE_DBG_RES, "Warning: -EALREADY not expected\n");
+	}
+	return status;
+}
+
+/**
+ * ice_release_res
+ * @hw: pointer to the HW structure
+ * @res: resource ID
+ *
+ * This function will release a resource using the proper Admin Command.
+ */
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res)
+{
+	unsigned long timeout;
+	int status;
+
+	/* there are some rare cases when trying to release the resource
+	 * results in an admin queue timeout, so handle them correctly
+	 */
+	timeout = jiffies + 10 * ICE_CTL_Q_SQ_CMD_TIMEOUT;
+	do {
+		status = ice_aq_release_res(hw, res, 0, NULL);
+		if (status != -EIO)
+			break;
+		usleep_range(1000, 2000);
+	} while (time_before(jiffies, timeout));
+}
+
+/**
+ * ice_aq_alloc_free_res - command to allocate/free resources
+ * @hw: pointer to the HW struct
+ * @buf: Indirect buffer to hold data parameters and response
+ * @buf_size: size of buffer for indirect commands
+ * @opc: pass in the command opcode
+ *
+ * Helper function to allocate/free resources using the admin queue commands
+ */
+int ice_aq_alloc_free_res(struct ice_hw *hw,
+			  struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+			  enum ice_adminq_opc opc)
+{
+	struct ice_aqc_alloc_free_res_cmd *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.sw_res_ctrl;
+
+	if (!buf || buf_size < flex_array_size(buf, elem, 1))
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_entries = cpu_to_le16(1);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, NULL);
+}
+
+/**
+ * ice_alloc_hw_res - allocate resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource
+ * @num: number of resources to allocate
+ * @btm: allocate from bottom
+ * @res: pointer to array that will receive the resources
+ */
+int
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	u16 buf_len;
+	int status;
+
+	buf_len = struct_size(buf, elem, num);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Prepare buffer to allocate resource. */
+	buf->num_elems = cpu_to_le16(num);
+	buf->res_type = cpu_to_le16(type | ICE_AQC_RES_TYPE_FLAG_DEDICATED |
+				    ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX);
+	if (btm)
+		buf->res_type |= cpu_to_le16(ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM);
+
+	status = ice_aq_alloc_free_res(hw, buf, buf_len, ice_aqc_opc_alloc_res);
+	if (status)
+		goto ice_alloc_res_exit;
+
+	memcpy(res, buf->elem, sizeof(*buf->elem) * num);
+
+ice_alloc_res_exit:
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_free_hw_res - free allocated HW resource
+ * @hw: pointer to the HW struct
+ * @type: type of resource to free
+ * @num: number of resources
+ * @res: pointer to array that contains the resources to free
+ */
+int ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	u16 buf_len;
+	int status;
+
+	buf_len = struct_size(buf, elem, num);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Prepare buffer to free resource. */
+	buf->num_elems = cpu_to_le16(num);
+	buf->res_type = cpu_to_le16(type);
+	memcpy(buf->elem, res, sizeof(*buf->elem) * num);
+
+	status = ice_aq_alloc_free_res(hw, buf, buf_len, ice_aqc_opc_free_res);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW, "CQ CMD Buffer:\n");
+
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_get_num_per_func - determine number of resources per PF
+ * @hw: pointer to the HW structure
+ * @max: value to be evenly split between each PF
+ *
+ * Determine the number of valid functions by going through the bitmap returned
+ * from parsing capabilities and use this to calculate the number of resources
+ * per PF based on the max value passed in.
+ */
+static u32 ice_get_num_per_func(struct ice_hw *hw, u32 max)
+{
+	u8 funcs;
+
+#define ICE_CAPS_VALID_FUNCS_M	0xFF
+	funcs = hweight8(hw->dev_caps.common_cap.valid_functions &
+			 ICE_CAPS_VALID_FUNCS_M);
+
+	if (!funcs)
+		return 0;
+
+	return max / funcs;
+}
+
+/**
+ * ice_parse_common_caps - parse common device/function capabilities
+ * @hw: pointer to the HW struct
+ * @caps: pointer to common capabilities structure
+ * @elem: the capability element to parse
+ * @prefix: message prefix for tracing capabilities
+ *
+ * Given a capability element, extract relevant details into the common
+ * capability structure.
+ *
+ * Returns: true if the capability matches one of the common capability ids,
+ * false otherwise.
+ */
+static bool
+ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps,
+		      struct ice_aqc_list_caps_elem *elem, const char *prefix)
+{
+	u32 logical_id = le32_to_cpu(elem->logical_id);
+	u32 phys_id = le32_to_cpu(elem->phys_id);
+	u32 number = le32_to_cpu(elem->number);
+	u16 cap = le16_to_cpu(elem->cap);
+	bool found = true;
+
+	switch (cap) {
+	case ICE_AQC_CAPS_VALID_FUNCTIONS:
+		caps->valid_functions = number;
+		ice_debug(hw, ICE_DBG_INIT, "%s: valid_functions (bitmap) = %d\n", prefix,
+			  caps->valid_functions);
+		break;
+	case ICE_AQC_CAPS_SRIOV:
+		caps->sr_iov_1_1 = (number == 1);
+		ice_debug(hw, ICE_DBG_INIT, "%s: sr_iov_1_1 = %d\n", prefix,
+			  caps->sr_iov_1_1);
+		break;
+	case ICE_AQC_CAPS_DCB:
+		caps->dcb = (number == 1);
+		caps->active_tc_bitmap = logical_id;
+		caps->maxtc = phys_id;
+		ice_debug(hw, ICE_DBG_INIT, "%s: dcb = %d\n", prefix, caps->dcb);
+		ice_debug(hw, ICE_DBG_INIT, "%s: active_tc_bitmap = %d\n", prefix,
+			  caps->active_tc_bitmap);
+		ice_debug(hw, ICE_DBG_INIT, "%s: maxtc = %d\n", prefix, caps->maxtc);
+		break;
+	case ICE_AQC_CAPS_RSS:
+		caps->rss_table_size = number;
+		caps->rss_table_entry_width = logical_id;
+		ice_debug(hw, ICE_DBG_INIT, "%s: rss_table_size = %d\n", prefix,
+			  caps->rss_table_size);
+		ice_debug(hw, ICE_DBG_INIT, "%s: rss_table_entry_width = %d\n", prefix,
+			  caps->rss_table_entry_width);
+		break;
+	case ICE_AQC_CAPS_RXQS:
+		caps->num_rxq = number;
+		caps->rxq_first_id = phys_id;
+		ice_debug(hw, ICE_DBG_INIT, "%s: num_rxq = %d\n", prefix,
+			  caps->num_rxq);
+		ice_debug(hw, ICE_DBG_INIT, "%s: rxq_first_id = %d\n", prefix,
+			  caps->rxq_first_id);
+		break;
+	case ICE_AQC_CAPS_TXQS:
+		caps->num_txq = number;
+		caps->txq_first_id = phys_id;
+		ice_debug(hw, ICE_DBG_INIT, "%s: num_txq = %d\n", prefix,
+			  caps->num_txq);
+		ice_debug(hw, ICE_DBG_INIT, "%s: txq_first_id = %d\n", prefix,
+			  caps->txq_first_id);
+		break;
+	case ICE_AQC_CAPS_MSIX:
+		caps->num_msix_vectors = number;
+		caps->msix_vector_first_id = phys_id;
+		ice_debug(hw, ICE_DBG_INIT, "%s: num_msix_vectors = %d\n", prefix,
+			  caps->num_msix_vectors);
+		ice_debug(hw, ICE_DBG_INIT, "%s: msix_vector_first_id = %d\n", prefix,
+			  caps->msix_vector_first_id);
+		break;
+	case ICE_AQC_CAPS_PENDING_NVM_VER:
+		caps->nvm_update_pending_nvm = true;
+		ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_nvm\n", prefix);
+		break;
+	case ICE_AQC_CAPS_PENDING_OROM_VER:
+		caps->nvm_update_pending_orom = true;
+		ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_orom\n", prefix);
+		break;
+	case ICE_AQC_CAPS_PENDING_NET_VER:
+		caps->nvm_update_pending_netlist = true;
+		ice_debug(hw, ICE_DBG_INIT, "%s: update_pending_netlist\n", prefix);
+		break;
+	case ICE_AQC_CAPS_NVM_MGMT:
+		caps->nvm_unified_update =
+			(number & ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT) ?
+			true : false;
+		ice_debug(hw, ICE_DBG_INIT, "%s: nvm_unified_update = %d\n", prefix,
+			  caps->nvm_unified_update);
+		break;
+	case ICE_AQC_CAPS_RDMA:
+		caps->rdma = (number == 1);
+		ice_debug(hw, ICE_DBG_INIT, "%s: rdma = %d\n", prefix, caps->rdma);
+		break;
+	case ICE_AQC_CAPS_MAX_MTU:
+		caps->max_mtu = number;
+		ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
+			  prefix, caps->max_mtu);
+		break;
+	case ICE_AQC_CAPS_PCIE_RESET_AVOIDANCE:
+		caps->pcie_reset_avoidance = (number > 0);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: pcie_reset_avoidance = %d\n", prefix,
+			  caps->pcie_reset_avoidance);
+		break;
+	case ICE_AQC_CAPS_POST_UPDATE_RESET_RESTRICT:
+		caps->reset_restrict_support = (number == 1);
+		ice_debug(hw, ICE_DBG_INIT,
+			  "%s: reset_restrict_support = %d\n", prefix,
+			  caps->reset_restrict_support);
+		break;
+	case ICE_AQC_CAPS_FW_LAG_SUPPORT:
+		caps->roce_lag = !!(number & ICE_AQC_BIT_ROCEV2_LAG);
+		ice_debug(hw, ICE_DBG_INIT, "%s: roce_lag = %u\n",
+			  prefix, caps->roce_lag);
+		caps->sriov_lag = !!(number & ICE_AQC_BIT_SRIOV_LAG);
+		ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n",
+			  prefix, caps->sriov_lag);
+		break;
+	default:
+		/* Not one of the recognized common capabilities */
+		found = false;
+	}
+
+	return found;
+}
+
+/**
+ * ice_recalc_port_limited_caps - Recalculate port limited capabilities
+ * @hw: pointer to the HW structure
+ * @caps: pointer to capabilities structure to fix
+ *
+ * Re-calculate the capabilities that are dependent on the number of physical
+ * ports; i.e. some features are not supported or function differently on
+ * devices with more than 4 ports.
+ */
+static void
+ice_recalc_port_limited_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps)
+{
+	/* This assumes device capabilities are always scanned before function
+	 * capabilities during the initialization flow.
+	 */
+	if (hw->dev_caps.num_funcs > 4) {
+		/* Max 4 TCs per port */
+		caps->maxtc = 4;
+		ice_debug(hw, ICE_DBG_INIT, "reducing maxtc to %d (based on #ports)\n",
+			  caps->maxtc);
+		if (caps->rdma) {
+			ice_debug(hw, ICE_DBG_INIT, "forcing RDMA off\n");
+			caps->rdma = 0;
+		}
+
+		/* print message only when processing device capabilities
+		 * during initialization.
+		 */
+		if (caps == &hw->dev_caps.common_cap)
+			dev_info(ice_hw_to_dev(hw), "RDMA functionality is not available with the current device configuration.\n");
+	}
+}
+
+/**
+ * ice_parse_vf_func_caps - Parse ICE_AQC_CAPS_VF function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VF.
+ */
+static void
+ice_parse_vf_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+		       struct ice_aqc_list_caps_elem *cap)
+{
+	u32 logical_id = le32_to_cpu(cap->logical_id);
+	u32 number = le32_to_cpu(cap->number);
+
+	func_p->num_allocd_vfs = number;
+	func_p->vf_base_id = logical_id;
+	ice_debug(hw, ICE_DBG_INIT, "func caps: num_allocd_vfs = %d\n",
+		  func_p->num_allocd_vfs);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: vf_base_id = %d\n",
+		  func_p->vf_base_id);
+}
+
+/**
+ * ice_parse_vsi_func_caps - Parse ICE_AQC_CAPS_VSI function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_VSI.
+ */
+static void
+ice_parse_vsi_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+			struct ice_aqc_list_caps_elem *cap)
+{
+	func_p->guar_num_vsi = ice_get_num_per_func(hw, ICE_MAX_VSI);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi (fw) = %d\n",
+		  le32_to_cpu(cap->number));
+	ice_debug(hw, ICE_DBG_INIT, "func caps: guar_num_vsi = %d\n",
+		  func_p->guar_num_vsi);
+}
+
+/**
+ * ice_parse_1588_func_caps - Parse ICE_AQC_CAPS_1588 function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @cap: pointer to the capability element to parse
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_1588.
+ */
+static void
+ice_parse_1588_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+			 struct ice_aqc_list_caps_elem *cap)
+{
+	struct ice_ts_func_info *info = &func_p->ts_func_info;
+	u32 number = le32_to_cpu(cap->number);
+
+	info->ena = ((number & ICE_TS_FUNC_ENA_M) != 0);
+	func_p->common_cap.ieee_1588 = info->ena;
+
+	info->src_tmr_owned = ((number & ICE_TS_SRC_TMR_OWND_M) != 0);
+	info->tmr_ena = ((number & ICE_TS_TMR_ENA_M) != 0);
+	info->tmr_index_owned = ((number & ICE_TS_TMR_IDX_OWND_M) != 0);
+	info->tmr_index_assoc = ((number & ICE_TS_TMR_IDX_ASSOC_M) != 0);
+
+	info->clk_freq = (number & ICE_TS_CLK_FREQ_M) >> ICE_TS_CLK_FREQ_S;
+	info->clk_src = ((number & ICE_TS_CLK_SRC_M) != 0);
+
+	if (info->clk_freq < NUM_ICE_TIME_REF_FREQ) {
+		info->time_ref = (enum ice_time_ref_freq)info->clk_freq;
+	} else {
+		/* Unknown clock frequency, so assume a (probably incorrect)
+		 * default to avoid out-of-bounds look ups of frequency
+		 * related information.
+		 */
+		ice_debug(hw, ICE_DBG_INIT, "1588 func caps: unknown clock frequency %u\n",
+			  info->clk_freq);
+		info->time_ref = ICE_TIME_REF_FREQ_25_000;
+	}
+
+	ice_debug(hw, ICE_DBG_INIT, "func caps: ieee_1588 = %u\n",
+		  func_p->common_cap.ieee_1588);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: src_tmr_owned = %u\n",
+		  info->src_tmr_owned);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_ena = %u\n",
+		  info->tmr_ena);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_owned = %u\n",
+		  info->tmr_index_owned);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: tmr_index_assoc = %u\n",
+		  info->tmr_index_assoc);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: clk_freq = %u\n",
+		  info->clk_freq);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: clk_src = %u\n",
+		  info->clk_src);
+}
+
+/**
+ * ice_parse_fdir_func_caps - Parse ICE_AQC_CAPS_FD function caps
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ *
+ * Extract function capabilities for ICE_AQC_CAPS_FD.
+ */
+static void
+ice_parse_fdir_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p)
+{
+	u32 reg_val, val;
+
+	reg_val = rd32(hw, GLQF_FD_SIZE);
+	val = (reg_val & GLQF_FD_SIZE_FD_GSIZE_M) >>
+		GLQF_FD_SIZE_FD_GSIZE_S;
+	func_p->fd_fltr_guar =
+		ice_get_num_per_func(hw, val);
+	val = (reg_val & GLQF_FD_SIZE_FD_BSIZE_M) >>
+		GLQF_FD_SIZE_FD_BSIZE_S;
+	func_p->fd_fltr_best_effort = val;
+
+	ice_debug(hw, ICE_DBG_INIT, "func caps: fd_fltr_guar = %d\n",
+		  func_p->fd_fltr_guar);
+	ice_debug(hw, ICE_DBG_INIT, "func caps: fd_fltr_best_effort = %d\n",
+		  func_p->fd_fltr_best_effort);
+}
+
+/**
+ * ice_parse_func_caps - Parse function capabilities
+ * @hw: pointer to the HW struct
+ * @func_p: pointer to function capabilities structure
+ * @buf: buffer containing the function capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper function to parse function (0x000A) capabilities list. For
+ * capabilities shared between device and function, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the function capabilities structured.
+ */
+static void
+ice_parse_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_p,
+		    void *buf, u32 cap_count)
+{
+	struct ice_aqc_list_caps_elem *cap_resp;
+	u32 i;
+
+	cap_resp = buf;
+
+	memset(func_p, 0, sizeof(*func_p));
+
+	for (i = 0; i < cap_count; i++) {
+		u16 cap = le16_to_cpu(cap_resp[i].cap);
+		bool found;
+
+		found = ice_parse_common_caps(hw, &func_p->common_cap,
+					      &cap_resp[i], "func caps");
+
+		switch (cap) {
+		case ICE_AQC_CAPS_VF:
+			ice_parse_vf_func_caps(hw, func_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_VSI:
+			ice_parse_vsi_func_caps(hw, func_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_1588:
+			ice_parse_1588_func_caps(hw, func_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_FD:
+			ice_parse_fdir_func_caps(hw, func_p);
+			break;
+		default:
+			/* Don't list common capabilities as unknown */
+			if (!found)
+				ice_debug(hw, ICE_DBG_INIT, "func caps: unknown capability[%d]: 0x%x\n",
+					  i, cap);
+			break;
+		}
+	}
+
+	ice_recalc_port_limited_caps(hw, &func_p->common_cap);
+}
+
+/**
+ * ice_parse_valid_functions_cap - Parse ICE_AQC_CAPS_VALID_FUNCTIONS caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VALID_FUNCTIONS for device capabilities.
+ */
+static void
+ice_parse_valid_functions_cap(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+			      struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_funcs = hweight32(number);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: num_funcs = %d\n",
+		  dev_p->num_funcs);
+}
+
+/**
+ * ice_parse_vf_dev_caps - Parse ICE_AQC_CAPS_VF device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VF for device capabilities.
+ */
+static void
+ice_parse_vf_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+		      struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_vfs_exposed = number;
+	ice_debug(hw, ICE_DBG_INIT, "dev_caps: num_vfs_exposed = %d\n",
+		  dev_p->num_vfs_exposed);
+}
+
+/**
+ * ice_parse_vsi_dev_caps - Parse ICE_AQC_CAPS_VSI device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_VSI for device capabilities.
+ */
+static void
+ice_parse_vsi_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+		       struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_vsi_allocd_to_host = number;
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: num_vsi_allocd_to_host = %d\n",
+		  dev_p->num_vsi_allocd_to_host);
+}
+
+/**
+ * ice_parse_1588_dev_caps - Parse ICE_AQC_CAPS_1588 device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_1588 for device capabilities.
+ */
+static void
+ice_parse_1588_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+			struct ice_aqc_list_caps_elem *cap)
+{
+	struct ice_ts_dev_info *info = &dev_p->ts_dev_info;
+	u32 logical_id = le32_to_cpu(cap->logical_id);
+	u32 phys_id = le32_to_cpu(cap->phys_id);
+	u32 number = le32_to_cpu(cap->number);
+
+	info->ena = ((number & ICE_TS_DEV_ENA_M) != 0);
+	dev_p->common_cap.ieee_1588 = info->ena;
+
+	info->tmr0_owner = number & ICE_TS_TMR0_OWNR_M;
+	info->tmr0_owned = ((number & ICE_TS_TMR0_OWND_M) != 0);
+	info->tmr0_ena = ((number & ICE_TS_TMR0_ENA_M) != 0);
+
+	info->tmr1_owner = (number & ICE_TS_TMR1_OWNR_M) >> ICE_TS_TMR1_OWNR_S;
+	info->tmr1_owned = ((number & ICE_TS_TMR1_OWND_M) != 0);
+	info->tmr1_ena = ((number & ICE_TS_TMR1_ENA_M) != 0);
+
+	info->ts_ll_read = ((number & ICE_TS_LL_TX_TS_READ_M) != 0);
+
+	info->ena_ports = logical_id;
+	info->tmr_own_map = phys_id;
+
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 = %u\n",
+		  dev_p->common_cap.ieee_1588);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owner = %u\n",
+		  info->tmr0_owner);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_owned = %u\n",
+		  info->tmr0_owned);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr0_ena = %u\n",
+		  info->tmr0_ena);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owner = %u\n",
+		  info->tmr1_owner);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_owned = %u\n",
+		  info->tmr1_owned);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr1_ena = %u\n",
+		  info->tmr1_ena);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: ts_ll_read = %u\n",
+		  info->ts_ll_read);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: ieee_1588 ena_ports = %u\n",
+		  info->ena_ports);
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: tmr_own_map = %u\n",
+		  info->tmr_own_map);
+}
+
+/**
+ * ice_parse_fdir_dev_caps - Parse ICE_AQC_CAPS_FD device caps
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @cap: capability element to parse
+ *
+ * Parse ICE_AQC_CAPS_FD for device capabilities.
+ */
+static void
+ice_parse_fdir_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+			struct ice_aqc_list_caps_elem *cap)
+{
+	u32 number = le32_to_cpu(cap->number);
+
+	dev_p->num_flow_director_fltr = number;
+	ice_debug(hw, ICE_DBG_INIT, "dev caps: num_flow_director_fltr = %d\n",
+		  dev_p->num_flow_director_fltr);
+}
+
+/**
+ * ice_parse_dev_caps - Parse device capabilities
+ * @hw: pointer to the HW struct
+ * @dev_p: pointer to device capabilities structure
+ * @buf: buffer containing the device capability records
+ * @cap_count: the number of capabilities
+ *
+ * Helper device to parse device (0x000B) capabilities list. For
+ * capabilities shared between device and function, this relies on
+ * ice_parse_common_caps.
+ *
+ * Loop through the list of provided capabilities and extract the relevant
+ * data into the device capabilities structured.
+ */
+static void
+ice_parse_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_p,
+		   void *buf, u32 cap_count)
+{
+	struct ice_aqc_list_caps_elem *cap_resp;
+	u32 i;
+
+	cap_resp = buf;
+
+	memset(dev_p, 0, sizeof(*dev_p));
+
+	for (i = 0; i < cap_count; i++) {
+		u16 cap = le16_to_cpu(cap_resp[i].cap);
+		bool found;
+
+		found = ice_parse_common_caps(hw, &dev_p->common_cap,
+					      &cap_resp[i], "dev caps");
+
+		switch (cap) {
+		case ICE_AQC_CAPS_VALID_FUNCTIONS:
+			ice_parse_valid_functions_cap(hw, dev_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_VF:
+			ice_parse_vf_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_VSI:
+			ice_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
+		case ICE_AQC_CAPS_1588:
+			ice_parse_1588_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
+		case  ICE_AQC_CAPS_FD:
+			ice_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]);
+			break;
+		default:
+			/* Don't list common capabilities as unknown */
+			if (!found)
+				ice_debug(hw, ICE_DBG_INIT, "dev caps: unknown capability[%d]: 0x%x\n",
+					  i, cap);
+			break;
+		}
+	}
+
+	ice_recalc_port_limited_caps(hw, &dev_p->common_cap);
+}
+
+/**
+ * ice_aq_get_netlist_node
+ * @hw: pointer to the hw struct
+ * @cmd: get_link_topo AQ structure
+ * @node_part_number: output node part number if node found
+ * @node_handle: output node handle parameter if node found
+ */
+static int
+ice_aq_get_netlist_node(struct ice_hw *hw, struct ice_aqc_get_link_topo *cmd,
+			u8 *node_part_number, u16 *node_handle)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+	desc.params.get_link_topo = *cmd;
+
+	if (ice_aq_send_cmd(hw, &desc, NULL, 0, NULL))
+		return -EIO;
+
+	if (node_handle)
+		*node_handle = le16_to_cpu(desc.params.get_link_topo.addr.handle);
+	if (node_part_number)
+		*node_part_number = desc.params.get_link_topo.node_part_num;
+
+	return 0;
+}
+
+/**
+ * ice_is_pf_c827 - check if pf contains c827 phy
+ * @hw: pointer to the hw struct
+ */
+bool ice_is_pf_c827(struct ice_hw *hw)
+{
+	struct ice_aqc_get_link_topo cmd = {};
+	u8 node_part_number;
+	u16 node_handle;
+	int status;
+
+	if (hw->mac_type != ICE_MAC_E810)
+		return false;
+
+	if (hw->device_id != ICE_DEV_ID_E810C_QSFP)
+		return true;
+
+	cmd.addr.topo_params.node_type_ctx =
+		FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_TYPE_M, ICE_AQC_LINK_TOPO_NODE_TYPE_PHY) |
+		FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M, ICE_AQC_LINK_TOPO_NODE_CTX_PORT);
+	cmd.addr.topo_params.index = 0;
+
+	status = ice_aq_get_netlist_node(hw, &cmd, &node_part_number,
+					 &node_handle);
+
+	if (status || node_part_number != ICE_AQC_GET_LINK_TOPO_NODE_NR_C827)
+		return false;
+
+	if (node_handle == E810C_QSFP_C827_0_HANDLE || node_handle == E810C_QSFP_C827_1_HANDLE)
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_aq_list_caps - query function/device capabilities
+ * @hw: pointer to the HW struct
+ * @buf: a buffer to hold the capabilities
+ * @buf_size: size of the buffer
+ * @cap_count: if not NULL, set to the number of capabilities reported
+ * @opc: capabilities type to discover, device or function
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the function (0x000A) or device (0x000B) capabilities description from
+ * firmware and store it in the buffer.
+ *
+ * If the cap_count pointer is not NULL, then it is set to the number of
+ * capabilities firmware will report. Note that if the buffer size is too
+ * small, it is possible the command will return ICE_AQ_ERR_ENOMEM. The
+ * cap_count will still be updated in this case. It is recommended that the
+ * buffer size be set to ICE_AQ_MAX_BUF_LEN (the largest possible buffer that
+ * firmware could return) to avoid this.
+ */
+int
+ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
+		 enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_list_caps *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.get_cap;
+
+	if (opc != ice_aqc_opc_list_func_caps &&
+	    opc != ice_aqc_opc_list_dev_caps)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+
+	if (cap_count)
+		*cap_count = le32_to_cpu(cmd->count);
+
+	return status;
+}
+
+/**
+ * ice_discover_dev_caps - Read and extract device capabilities
+ * @hw: pointer to the hardware structure
+ * @dev_caps: pointer to device capabilities structure
+ *
+ * Read the device capabilities and extract them into the dev_caps structure
+ * for later use.
+ */
+int
+ice_discover_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_caps)
+{
+	u32 cap_count = 0;
+	void *cbuf;
+	int status;
+
+	cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!cbuf)
+		return -ENOMEM;
+
+	/* Although the driver doesn't know the number of capabilities the
+	 * device will return, we can simply send a 4KB buffer, the maximum
+	 * possible size that firmware can return.
+	 */
+	cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
+
+	status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+				  ice_aqc_opc_list_dev_caps, NULL);
+	if (!status)
+		ice_parse_dev_caps(hw, dev_caps, cbuf, cap_count);
+	kfree(cbuf);
+
+	return status;
+}
+
+/**
+ * ice_discover_func_caps - Read and extract function capabilities
+ * @hw: pointer to the hardware structure
+ * @func_caps: pointer to function capabilities structure
+ *
+ * Read the function capabilities and extract them into the func_caps structure
+ * for later use.
+ */
+static int
+ice_discover_func_caps(struct ice_hw *hw, struct ice_hw_func_caps *func_caps)
+{
+	u32 cap_count = 0;
+	void *cbuf;
+	int status;
+
+	cbuf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!cbuf)
+		return -ENOMEM;
+
+	/* Although the driver doesn't know the number of capabilities the
+	 * device will return, we can simply send a 4KB buffer, the maximum
+	 * possible size that firmware can return.
+	 */
+	cap_count = ICE_AQ_MAX_BUF_LEN / sizeof(struct ice_aqc_list_caps_elem);
+
+	status = ice_aq_list_caps(hw, cbuf, ICE_AQ_MAX_BUF_LEN, &cap_count,
+				  ice_aqc_opc_list_func_caps, NULL);
+	if (!status)
+		ice_parse_func_caps(hw, func_caps, cbuf, cap_count);
+	kfree(cbuf);
+
+	return status;
+}
+
+/**
+ * ice_set_safe_mode_caps - Override dev/func capabilities when in safe mode
+ * @hw: pointer to the hardware structure
+ */
+void ice_set_safe_mode_caps(struct ice_hw *hw)
+{
+	struct ice_hw_func_caps *func_caps = &hw->func_caps;
+	struct ice_hw_dev_caps *dev_caps = &hw->dev_caps;
+	struct ice_hw_common_caps cached_caps;
+	u32 num_funcs;
+
+	/* cache some func_caps values that should be restored after memset */
+	cached_caps = func_caps->common_cap;
+
+	/* unset func capabilities */
+	memset(func_caps, 0, sizeof(*func_caps));
+
+#define ICE_RESTORE_FUNC_CAP(name) \
+	func_caps->common_cap.name = cached_caps.name
+
+	/* restore cached values */
+	ICE_RESTORE_FUNC_CAP(valid_functions);
+	ICE_RESTORE_FUNC_CAP(txq_first_id);
+	ICE_RESTORE_FUNC_CAP(rxq_first_id);
+	ICE_RESTORE_FUNC_CAP(msix_vector_first_id);
+	ICE_RESTORE_FUNC_CAP(max_mtu);
+	ICE_RESTORE_FUNC_CAP(nvm_unified_update);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_nvm);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_orom);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_netlist);
+
+	/* one Tx and one Rx queue in safe mode */
+	func_caps->common_cap.num_rxq = 1;
+	func_caps->common_cap.num_txq = 1;
+
+	/* two MSIX vectors, one for traffic and one for misc causes */
+	func_caps->common_cap.num_msix_vectors = 2;
+	func_caps->guar_num_vsi = 1;
+
+	/* cache some dev_caps values that should be restored after memset */
+	cached_caps = dev_caps->common_cap;
+	num_funcs = dev_caps->num_funcs;
+
+	/* unset dev capabilities */
+	memset(dev_caps, 0, sizeof(*dev_caps));
+
+#define ICE_RESTORE_DEV_CAP(name) \
+	dev_caps->common_cap.name = cached_caps.name
+
+	/* restore cached values */
+	ICE_RESTORE_DEV_CAP(valid_functions);
+	ICE_RESTORE_DEV_CAP(txq_first_id);
+	ICE_RESTORE_DEV_CAP(rxq_first_id);
+	ICE_RESTORE_DEV_CAP(msix_vector_first_id);
+	ICE_RESTORE_DEV_CAP(max_mtu);
+	ICE_RESTORE_DEV_CAP(nvm_unified_update);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_nvm);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_orom);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_netlist);
+	dev_caps->num_funcs = num_funcs;
+
+	/* one Tx and one Rx queue per function in safe mode */
+	dev_caps->common_cap.num_rxq = num_funcs;
+	dev_caps->common_cap.num_txq = num_funcs;
+
+	/* two MSIX vectors per function */
+	dev_caps->common_cap.num_msix_vectors = 2 * num_funcs;
+}
+
+/**
+ * ice_get_caps - get info about the HW
+ * @hw: pointer to the hardware structure
+ */
+int ice_get_caps(struct ice_hw *hw)
+{
+	int status;
+
+	status = ice_discover_dev_caps(hw, &hw->dev_caps);
+	if (status)
+		return status;
+
+	return ice_discover_func_caps(hw, &hw->func_caps);
+}
+
+/**
+ * ice_aq_manage_mac_write - manage MAC address write command
+ * @hw: pointer to the HW struct
+ * @mac_addr: MAC address to be written as LAA/LAA+WoL/Port address
+ * @flags: flags to control write behavior
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function is used to write MAC address to the NVM (0x0108).
+ */
+int
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd)
+{
+	struct ice_aqc_manage_mac_write *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.mac_write;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write);
+
+	cmd->flags = flags;
+	ether_addr_copy(cmd->mac_addr, mac_addr);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_clear_pxe_mode
+ * @hw: pointer to the HW struct
+ *
+ * Tell the firmware that the driver is taking over from PXE (0x0110).
+ */
+static int ice_aq_clear_pxe_mode(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_clear_pxe_mode);
+	desc.params.clear_pxe.rx_cnt = ICE_AQC_CLEAR_PXE_RX_CNT;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the HW struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ */
+void ice_clear_pxe_mode(struct ice_hw *hw)
+{
+	if (ice_check_sq_alive(hw, &hw->adminq))
+		ice_aq_clear_pxe_mode(hw);
+}
+
+/**
+ * ice_aq_set_port_params - set physical port parameters.
+ * @pi: pointer to the port info struct
+ * @double_vlan: if set double VLAN is enabled
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set Physical port parameters (0x0203)
+ */
+int
+ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan,
+		       struct ice_sq_cd *cd)
+
+{
+	struct ice_aqc_set_port_params *cmd;
+	struct ice_hw *hw = pi->hw;
+	struct ice_aq_desc desc;
+	u16 cmd_flags = 0;
+
+	cmd = &desc.params.set_port_params;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_params);
+	if (double_vlan)
+		cmd_flags |= ICE_AQC_SET_P_PARAMS_DOUBLE_VLAN_ENA;
+	cmd->cmd_flags = cpu_to_le16(cmd_flags);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_is_100m_speed_supported
+ * @hw: pointer to the HW struct
+ *
+ * returns true if 100M speeds are supported by the device,
+ * false otherwise.
+ */
+bool ice_is_100m_speed_supported(struct ice_hw *hw)
+{
+	switch (hw->device_id) {
+	case ICE_DEV_ID_E822C_SGMII:
+	case ICE_DEV_ID_E822L_SGMII:
+	case ICE_DEV_ID_E823L_1GBE:
+	case ICE_DEV_ID_E823C_SGMII:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_get_link_speed_based_on_phy_type - returns link speed
+ * @phy_type_low: lower part of phy_type
+ * @phy_type_high: higher part of phy_type
+ *
+ * This helper function will convert an entry in PHY type structure
+ * [phy_type_low, phy_type_high] to its corresponding link speed.
+ * Note: In the structure of [phy_type_low, phy_type_high], there should
+ * be one bit set, as this function will convert one PHY type to its
+ * speed.
+ * If no bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned
+ * If more than one bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned
+ */
+static u16
+ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high)
+{
+	u16 speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+	u16 speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+
+	switch (phy_type_low) {
+	case ICE_PHY_TYPE_LOW_100BASE_TX:
+	case ICE_PHY_TYPE_LOW_100M_SGMII:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_100MB;
+		break;
+	case ICE_PHY_TYPE_LOW_1000BASE_T:
+	case ICE_PHY_TYPE_LOW_1000BASE_SX:
+	case ICE_PHY_TYPE_LOW_1000BASE_LX:
+	case ICE_PHY_TYPE_LOW_1000BASE_KX:
+	case ICE_PHY_TYPE_LOW_1G_SGMII:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_1000MB;
+		break;
+	case ICE_PHY_TYPE_LOW_2500BASE_T:
+	case ICE_PHY_TYPE_LOW_2500BASE_X:
+	case ICE_PHY_TYPE_LOW_2500BASE_KX:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_2500MB;
+		break;
+	case ICE_PHY_TYPE_LOW_5GBASE_T:
+	case ICE_PHY_TYPE_LOW_5GBASE_KR:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_5GB;
+		break;
+	case ICE_PHY_TYPE_LOW_10GBASE_T:
+	case ICE_PHY_TYPE_LOW_10G_SFI_DA:
+	case ICE_PHY_TYPE_LOW_10GBASE_SR:
+	case ICE_PHY_TYPE_LOW_10GBASE_LR:
+	case ICE_PHY_TYPE_LOW_10GBASE_KR_CR1:
+	case ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_10G_SFI_C2C:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_10GB;
+		break;
+	case ICE_PHY_TYPE_LOW_25GBASE_T:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR_S:
+	case ICE_PHY_TYPE_LOW_25GBASE_CR1:
+	case ICE_PHY_TYPE_LOW_25GBASE_SR:
+	case ICE_PHY_TYPE_LOW_25GBASE_LR:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR_S:
+	case ICE_PHY_TYPE_LOW_25GBASE_KR1:
+	case ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_25G_AUI_C2C:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_25GB;
+		break;
+	case ICE_PHY_TYPE_LOW_40GBASE_CR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_SR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_LR4:
+	case ICE_PHY_TYPE_LOW_40GBASE_KR4:
+	case ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_40G_XLAUI:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_40GB;
+		break;
+	case ICE_PHY_TYPE_LOW_50GBASE_CR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_SR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR2:
+	case ICE_PHY_TYPE_LOW_50GBASE_KR2:
+	case ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_LAUI2:
+	case ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI2:
+	case ICE_PHY_TYPE_LOW_50GBASE_CP:
+	case ICE_PHY_TYPE_LOW_50GBASE_SR:
+	case ICE_PHY_TYPE_LOW_50GBASE_FR:
+	case ICE_PHY_TYPE_LOW_50GBASE_LR:
+	case ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4:
+	case ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_50G_AUI1:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_50GB;
+		break;
+	case ICE_PHY_TYPE_LOW_100GBASE_CR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_SR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_LR4:
+	case ICE_PHY_TYPE_LOW_100GBASE_KR4:
+	case ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_CAUI4:
+	case ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC:
+	case ICE_PHY_TYPE_LOW_100G_AUI4:
+	case ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4:
+	case ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4:
+	case ICE_PHY_TYPE_LOW_100GBASE_CP2:
+	case ICE_PHY_TYPE_LOW_100GBASE_SR2:
+	case ICE_PHY_TYPE_LOW_100GBASE_DR:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_100GB;
+		break;
+	default:
+		speed_phy_type_low = ICE_AQ_LINK_SPEED_UNKNOWN;
+		break;
+	}
+
+	switch (phy_type_high) {
+	case ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4:
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_CAUI2:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC:
+	case ICE_PHY_TYPE_HIGH_100G_AUI2:
+		speed_phy_type_high = ICE_AQ_LINK_SPEED_100GB;
+		break;
+	default:
+		speed_phy_type_high = ICE_AQ_LINK_SPEED_UNKNOWN;
+		break;
+	}
+
+	if (speed_phy_type_low == ICE_AQ_LINK_SPEED_UNKNOWN &&
+	    speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+		return ICE_AQ_LINK_SPEED_UNKNOWN;
+	else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+		 speed_phy_type_high != ICE_AQ_LINK_SPEED_UNKNOWN)
+		return ICE_AQ_LINK_SPEED_UNKNOWN;
+	else if (speed_phy_type_low != ICE_AQ_LINK_SPEED_UNKNOWN &&
+		 speed_phy_type_high == ICE_AQ_LINK_SPEED_UNKNOWN)
+		return speed_phy_type_low;
+	else
+		return speed_phy_type_high;
+}
+
+/**
+ * ice_update_phy_type
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @link_speeds_bitmap: targeted link speeds bitmap
+ *
+ * Note: For the link_speeds_bitmap structure, you can check it at
+ * [ice_aqc_get_link_status->link_speed]. Caller can pass in
+ * link_speeds_bitmap include multiple speeds.
+ *
+ * Each entry in this [phy_type_low, phy_type_high] structure will
+ * present a certain link speed. This helper function will turn on bits
+ * in [phy_type_low, phy_type_high] structure based on the value of
+ * link_speeds_bitmap input parameter.
+ */
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+		    u16 link_speeds_bitmap)
+{
+	u64 pt_high;
+	u64 pt_low;
+	int index;
+	u16 speed;
+
+	/* We first check with low part of phy_type */
+	for (index = 0; index <= ICE_PHY_TYPE_LOW_MAX_INDEX; index++) {
+		pt_low = BIT_ULL(index);
+		speed = ice_get_link_speed_based_on_phy_type(pt_low, 0);
+
+		if (link_speeds_bitmap & speed)
+			*phy_type_low |= BIT_ULL(index);
+	}
+
+	/* We then check with high part of phy_type */
+	for (index = 0; index <= ICE_PHY_TYPE_HIGH_MAX_INDEX; index++) {
+		pt_high = BIT_ULL(index);
+		speed = ice_get_link_speed_based_on_phy_type(0, pt_high);
+
+		if (link_speeds_bitmap & speed)
+			*phy_type_high |= BIT_ULL(index);
+	}
+}
+
+/**
+ * ice_aq_set_phy_cfg
+ * @hw: pointer to the HW struct
+ * @pi: port info structure of the interested logical port
+ * @cfg: structure with PHY configuration data to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the various PHY configuration parameters supported on the Port.
+ * One or more of the Set PHY config parameters may be ignored in an MFP
+ * mode as the PF may not have the privilege to set some of the PHY Config
+ * parameters. This status will be indicated by the command response (0x0601).
+ */
+int
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
+		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	int status;
+
+	if (!cfg)
+		return -EINVAL;
+
+	/* Ensure that only valid bits of cfg->caps can be turned on. */
+	if (cfg->caps & ~ICE_AQ_PHY_ENA_VALID_MASK) {
+		ice_debug(hw, ICE_DBG_PHY, "Invalid bit is set in ice_aqc_set_phy_cfg_data->caps : 0x%x\n",
+			  cfg->caps);
+
+		cfg->caps &= ICE_AQ_PHY_ENA_VALID_MASK;
+	}
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_phy_cfg);
+	desc.params.set_phy.lport_num = pi->lport;
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	ice_debug(hw, ICE_DBG_LINK, "set phy cfg\n");
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_low = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_low));
+	ice_debug(hw, ICE_DBG_LINK, "	phy_type_high = 0x%llx\n",
+		  (unsigned long long)le64_to_cpu(cfg->phy_type_high));
+	ice_debug(hw, ICE_DBG_LINK, "	caps = 0x%x\n", cfg->caps);
+	ice_debug(hw, ICE_DBG_LINK, "	low_power_ctrl_an = 0x%x\n",
+		  cfg->low_power_ctrl_an);
+	ice_debug(hw, ICE_DBG_LINK, "	eee_cap = 0x%x\n", cfg->eee_cap);
+	ice_debug(hw, ICE_DBG_LINK, "	eeer_value = 0x%x\n", cfg->eeer_value);
+	ice_debug(hw, ICE_DBG_LINK, "	link_fec_opt = 0x%x\n",
+		  cfg->link_fec_opt);
+
+	status = ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
+	if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
+		status = 0;
+
+	if (!status)
+		pi->phy.curr_user_phy_cfg = *cfg;
+
+	return status;
+}
+
+/**
+ * ice_update_link_info - update status of the HW network link
+ * @pi: port info structure of the interested logical port
+ */
+int ice_update_link_info(struct ice_port_info *pi)
+{
+	struct ice_link_status *li;
+	int status;
+
+	if (!pi)
+		return -EINVAL;
+
+	li = &pi->phy.link_info;
+
+	status = ice_aq_get_link_info(pi, true, NULL, NULL);
+	if (status)
+		return status;
+
+	if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		struct ice_aqc_get_phy_caps_data *pcaps;
+		struct ice_hw *hw;
+
+		hw = pi->hw;
+		pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
+				     GFP_KERNEL);
+		if (!pcaps)
+			return -ENOMEM;
+
+		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					     pcaps, NULL);
+
+		devm_kfree(ice_hw_to_dev(hw), pcaps);
+	}
+
+	return status;
+}
+
+/**
+ * ice_cache_phy_user_req
+ * @pi: port information structure
+ * @cache_data: PHY logging data
+ * @cache_mode: PHY logging mode
+ *
+ * Log the user request on (FC, FEC, SPEED) for later use.
+ */
+static void
+ice_cache_phy_user_req(struct ice_port_info *pi,
+		       struct ice_phy_cache_mode_data cache_data,
+		       enum ice_phy_cache_mode cache_mode)
+{
+	if (!pi)
+		return;
+
+	switch (cache_mode) {
+	case ICE_FC_MODE:
+		pi->phy.curr_user_fc_req = cache_data.data.curr_user_fc_req;
+		break;
+	case ICE_SPEED_MODE:
+		pi->phy.curr_user_speed_req =
+			cache_data.data.curr_user_speed_req;
+		break;
+	case ICE_FEC_MODE:
+		pi->phy.curr_user_fec_req = cache_data.data.curr_user_fec_req;
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_caps_to_fc_mode
+ * @caps: PHY capabilities
+ *
+ * Convert PHY FC capabilities to ice FC mode
+ */
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps)
+{
+	if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE &&
+	    caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+		return ICE_FC_FULL;
+
+	if (caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
+		return ICE_FC_TX_PAUSE;
+
+	if (caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+		return ICE_FC_RX_PAUSE;
+
+	return ICE_FC_NONE;
+}
+
+/**
+ * ice_caps_to_fec_mode
+ * @caps: PHY capabilities
+ * @fec_options: Link FEC options
+ *
+ * Convert PHY FEC capabilities to ice FEC mode
+ */
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options)
+{
+	if (caps & ICE_AQC_PHY_EN_AUTO_FEC)
+		return ICE_FEC_AUTO;
+
+	if (fec_options & (ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+			   ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+			   ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN |
+			   ICE_AQC_PHY_FEC_25G_KR_REQ))
+		return ICE_FEC_BASER;
+
+	if (fec_options & (ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+			   ICE_AQC_PHY_FEC_25G_RS_544_REQ |
+			   ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN))
+		return ICE_FEC_RS;
+
+	return ICE_FEC_NONE;
+}
+
+/**
+ * ice_cfg_phy_fc - Configure PHY FC data based on FC mode
+ * @pi: port information structure
+ * @cfg: PHY configuration data to set FC mode
+ * @req_mode: FC mode to configure
+ */
+int
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+	       enum ice_fc_mode req_mode)
+{
+	struct ice_phy_cache_mode_data cache_data;
+	u8 pause_mask = 0x0;
+
+	if (!pi || !cfg)
+		return -EINVAL;
+
+	switch (req_mode) {
+	case ICE_FC_FULL:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_RX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_RX_LINK_PAUSE;
+		break;
+	case ICE_FC_TX_PAUSE:
+		pause_mask |= ICE_AQC_PHY_EN_TX_LINK_PAUSE;
+		break;
+	default:
+		break;
+	}
+
+	/* clear the old pause settings */
+	cfg->caps &= ~(ICE_AQC_PHY_EN_TX_LINK_PAUSE |
+		ICE_AQC_PHY_EN_RX_LINK_PAUSE);
+
+	/* set the new capabilities */
+	cfg->caps |= pause_mask;
+
+	/* Cache user FC request */
+	cache_data.data.curr_user_fc_req = req_mode;
+	ice_cache_phy_user_req(pi, cache_data, ICE_FC_MODE);
+
+	return 0;
+}
+
+/**
+ * ice_set_fc
+ * @pi: port information structure
+ * @aq_failures: pointer to status code, specific to ice_set_fc routine
+ * @ena_auto_link_update: enable automatic link update
+ *
+ * Set the requested flow control mode.
+ */
+int
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
+{
+	struct ice_aqc_set_phy_cfg_data cfg = { 0 };
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_hw *hw;
+	int status;
+
+	if (!pi || !aq_failures)
+		return -EINVAL;
+
+	*aq_failures = 0;
+	hw = pi->hw;
+
+	pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	/* Get the current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
+				     pcaps, NULL);
+	if (status) {
+		*aq_failures = ICE_SET_FC_AQ_FAIL_GET;
+		goto out;
+	}
+
+	ice_copy_phy_caps_to_cfg(pi, pcaps, &cfg);
+
+	/* Configure the set PHY data */
+	status = ice_cfg_phy_fc(pi, &cfg, pi->fc.req_mode);
+	if (status)
+		goto out;
+
+	/* If the capabilities have changed, then set the new config */
+	if (cfg.caps != pcaps->caps) {
+		int retry_count, retry_max = 10;
+
+		/* Auto restart link so settings take effect */
+		if (ena_auto_link_update)
+			cfg.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+		status = ice_aq_set_phy_cfg(hw, pi, &cfg, NULL);
+		if (status) {
+			*aq_failures = ICE_SET_FC_AQ_FAIL_SET;
+			goto out;
+		}
+
+		/* Update the link info
+		 * It sometimes takes a really long time for link to
+		 * come back from the atomic reset. Thus, we wait a
+		 * little bit.
+		 */
+		for (retry_count = 0; retry_count < retry_max; retry_count++) {
+			status = ice_update_link_info(pi);
+
+			if (!status)
+				break;
+
+			mdelay(100);
+		}
+
+		if (status)
+			*aq_failures = ICE_SET_FC_AQ_FAIL_UPDATE;
+	}
+
+out:
+	devm_kfree(ice_hw_to_dev(hw), pcaps);
+	return status;
+}
+
+/**
+ * ice_phy_caps_equals_cfg
+ * @phy_caps: PHY capabilities
+ * @phy_cfg: PHY configuration
+ *
+ * Helper function to determine if PHY capabilities matches PHY
+ * configuration
+ */
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *phy_caps,
+			struct ice_aqc_set_phy_cfg_data *phy_cfg)
+{
+	u8 caps_mask, cfg_mask;
+
+	if (!phy_caps || !phy_cfg)
+		return false;
+
+	/* These bits are not common between capabilities and configuration.
+	 * Do not use them to determine equality.
+	 */
+	caps_mask = ICE_AQC_PHY_CAPS_MASK & ~(ICE_AQC_PHY_AN_MODE |
+					      ICE_AQC_GET_PHY_EN_MOD_QUAL);
+	cfg_mask = ICE_AQ_PHY_ENA_VALID_MASK & ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+	if (phy_caps->phy_type_low != phy_cfg->phy_type_low ||
+	    phy_caps->phy_type_high != phy_cfg->phy_type_high ||
+	    ((phy_caps->caps & caps_mask) != (phy_cfg->caps & cfg_mask)) ||
+	    phy_caps->low_power_ctrl_an != phy_cfg->low_power_ctrl_an ||
+	    phy_caps->eee_cap != phy_cfg->eee_cap ||
+	    phy_caps->eeer_value != phy_cfg->eeer_value ||
+	    phy_caps->link_fec_options != phy_cfg->link_fec_opt)
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @pi: port information structure
+ * @caps: PHY ability structure to copy date from
+ * @cfg: PHY configuration structure to copy data to
+ *
+ * Helper function to copy AQC PHY get ability data to PHY set configuration
+ * data structure
+ */
+void
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+			 struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg)
+{
+	if (!pi || !caps || !cfg)
+		return;
+
+	memset(cfg, 0, sizeof(*cfg));
+	cfg->phy_type_low = caps->phy_type_low;
+	cfg->phy_type_high = caps->phy_type_high;
+	cfg->caps = caps->caps;
+	cfg->low_power_ctrl_an = caps->low_power_ctrl_an;
+	cfg->eee_cap = caps->eee_cap;
+	cfg->eeer_value = caps->eeer_value;
+	cfg->link_fec_opt = caps->link_fec_options;
+	cfg->module_compliance_enforcement =
+		caps->module_compliance_enforcement;
+}
+
+/**
+ * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @pi: port information structure
+ * @cfg: PHY configuration data to set FEC mode
+ * @fec: FEC mode to configure
+ */
+int
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+		enum ice_fec_mode fec)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_hw *hw;
+	int status;
+
+	if (!pi || !cfg)
+		return -EINVAL;
+
+	hw = pi->hw;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	status = ice_aq_get_phy_caps(pi, false,
+				     (ice_fw_supports_report_dflt_cfg(hw) ?
+				      ICE_AQC_REPORT_DFLT_CFG :
+				      ICE_AQC_REPORT_TOPO_CAP_MEDIA), pcaps, NULL);
+	if (status)
+		goto out;
+
+	cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+	cfg->link_fec_opt = pcaps->link_fec_options;
+
+	switch (fec) {
+	case ICE_FEC_BASER:
+		/* Clear RS bits, and AND BASE-R ability
+		 * bits and OR request bits.
+		 */
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+			ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+			ICE_AQC_PHY_FEC_25G_KR_REQ;
+		break;
+	case ICE_FEC_RS:
+		/* Clear BASE-R bits, and AND RS ability
+		 * bits and OR request bits.
+		 */
+		cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
+		cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+			ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+		break;
+	case ICE_FEC_NONE:
+		/* Clear all FEC option bits. */
+		cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK;
+		break;
+	case ICE_FEC_AUTO:
+		/* AND auto FEC bit, and all caps bits. */
+		cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+		cfg->link_fec_opt |= pcaps->link_fec_options;
+		break;
+	default:
+		status = -EINVAL;
+		break;
+	}
+
+	if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) &&
+	    !ice_fw_supports_report_dflt_cfg(hw)) {
+		struct ice_link_default_override_tlv tlv = { 0 };
+
+		status = ice_get_link_default_override(&tlv, pi);
+		if (status)
+			goto out;
+
+		if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) &&
+		    (tlv.options & ICE_LINK_OVERRIDE_EN))
+			cfg->link_fec_opt = tlv.fec_options;
+	}
+
+out:
+	kfree(pcaps);
+
+	return status;
+}
+
+/**
+ * ice_get_link_status - get status of the HW network link
+ * @pi: port information structure
+ * @link_up: pointer to bool (true/false = linkup/linkdown)
+ *
+ * Variable link_up is true if link is up, false if link is down.
+ * The variable link_up is invalid if status is non zero. As a
+ * result of this call, link status reporting becomes enabled
+ */
+int ice_get_link_status(struct ice_port_info *pi, bool *link_up)
+{
+	struct ice_phy_info *phy_info;
+	int status = 0;
+
+	if (!pi || !link_up)
+		return -EINVAL;
+
+	phy_info = &pi->phy;
+
+	if (phy_info->get_link_info) {
+		status = ice_update_link_info(pi);
+
+		if (status)
+			ice_debug(pi->hw, ICE_DBG_LINK, "get link status error, status = %d\n",
+				  status);
+	}
+
+	*link_up = phy_info->link_info.link_info & ICE_AQ_LINK_UP;
+
+	return status;
+}
+
+/**
+ * ice_aq_set_link_restart_an
+ * @pi: pointer to the port information structure
+ * @ena_link: if true: enable link, if false: disable link
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ */
+int
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_restart_an *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.restart_an;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_restart_an);
+
+	cmd->cmd_flags = ICE_AQC_RESTART_AN_LINK_RESTART;
+	cmd->lport_num = pi->lport;
+	if (ena_link)
+		cmd->cmd_flags |= ICE_AQC_RESTART_AN_LINK_ENABLE;
+	else
+		cmd->cmd_flags &= ~ICE_AQC_RESTART_AN_LINK_ENABLE;
+
+	return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_event_mask
+ * @hw: pointer to the HW struct
+ * @port_num: port number of the physical function
+ * @mask: event mask to be set
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set event mask (0x0613)
+ */
+int
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_event_mask *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_event_mask;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask);
+
+	cmd->lport_num = port_num;
+
+	cmd->event_mask = cpu_to_le16(mask);
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+int
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_lb *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_lb;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb);
+	if (ena_lpbk)
+		cmd->lb_mode = ICE_AQ_MAC_LB_EN;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_port_id_led
+ * @pi: pointer to the port information
+ * @is_orig_mode: is this LED set to original mode (by the net-list)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set LED value for the given port (0x06e9)
+ */
+int
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_port_id_led *cmd;
+	struct ice_hw *hw = pi->hw;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_port_id_led;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_id_led);
+
+	if (is_orig_mode)
+		cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_ORIG;
+	else
+		cmd->ident_mode = ICE_AQC_PORT_IDENT_LED_BLINK;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_port_options
+ * @hw: pointer to the HW struct
+ * @options: buffer for the resultant port options
+ * @option_count: input - size of the buffer in port options structures,
+ *                output - number of returned port options
+ * @lport: logical port to call the command with (optional)
+ * @lport_valid: when false, FW uses port owned by the PF instead of lport,
+ *               when PF owns more than 1 port it must be true
+ * @active_option_idx: index of active port option in returned buffer
+ * @active_option_valid: active option in returned buffer is valid
+ * @pending_option_idx: index of pending port option in returned buffer
+ * @pending_option_valid: pending option in returned buffer is valid
+ *
+ * Calls Get Port Options AQC (0x06ea) and verifies result.
+ */
+int
+ice_aq_get_port_options(struct ice_hw *hw,
+			struct ice_aqc_get_port_options_elem *options,
+			u8 *option_count, u8 lport, bool lport_valid,
+			u8 *active_option_idx, bool *active_option_valid,
+			u8 *pending_option_idx, bool *pending_option_valid)
+{
+	struct ice_aqc_get_port_options *cmd;
+	struct ice_aq_desc desc;
+	int status;
+	u8 i;
+
+	/* options buffer shall be able to hold max returned options */
+	if (*option_count < ICE_AQC_PORT_OPT_COUNT_M)
+		return -EINVAL;
+
+	cmd = &desc.params.get_port_options;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_port_options);
+
+	if (lport_valid)
+		cmd->lport_num = lport;
+	cmd->lport_num_valid = lport_valid;
+
+	status = ice_aq_send_cmd(hw, &desc, options,
+				 *option_count * sizeof(*options), NULL);
+	if (status)
+		return status;
+
+	/* verify direct FW response & set output parameters */
+	*option_count = FIELD_GET(ICE_AQC_PORT_OPT_COUNT_M,
+				  cmd->port_options_count);
+	ice_debug(hw, ICE_DBG_PHY, "options: %x\n", *option_count);
+	*active_option_valid = FIELD_GET(ICE_AQC_PORT_OPT_VALID,
+					 cmd->port_options);
+	if (*active_option_valid) {
+		*active_option_idx = FIELD_GET(ICE_AQC_PORT_OPT_ACTIVE_M,
+					       cmd->port_options);
+		if (*active_option_idx > (*option_count - 1))
+			return -EIO;
+		ice_debug(hw, ICE_DBG_PHY, "active idx: %x\n",
+			  *active_option_idx);
+	}
+
+	*pending_option_valid = FIELD_GET(ICE_AQC_PENDING_PORT_OPT_VALID,
+					  cmd->pending_port_option_status);
+	if (*pending_option_valid) {
+		*pending_option_idx = FIELD_GET(ICE_AQC_PENDING_PORT_OPT_IDX_M,
+						cmd->pending_port_option_status);
+		if (*pending_option_idx > (*option_count - 1))
+			return -EIO;
+		ice_debug(hw, ICE_DBG_PHY, "pending idx: %x\n",
+			  *pending_option_idx);
+	}
+
+	/* mask output options fields */
+	for (i = 0; i < *option_count; i++) {
+		options[i].pmd = FIELD_GET(ICE_AQC_PORT_OPT_PMD_COUNT_M,
+					   options[i].pmd);
+		options[i].max_lane_speed = FIELD_GET(ICE_AQC_PORT_OPT_MAX_LANE_M,
+						      options[i].max_lane_speed);
+		ice_debug(hw, ICE_DBG_PHY, "pmds: %x max speed: %x\n",
+			  options[i].pmd, options[i].max_lane_speed);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_aq_set_port_option
+ * @hw: pointer to the HW struct
+ * @lport: logical port to call the command with
+ * @lport_valid: when false, FW uses port owned by the PF instead of lport,
+ *               when PF owns more than 1 port it must be true
+ * @new_option: new port option to be written
+ *
+ * Calls Set Port Options AQC (0x06eb).
+ */
+int
+ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid,
+		       u8 new_option)
+{
+	struct ice_aqc_set_port_option *cmd;
+	struct ice_aq_desc desc;
+
+	if (new_option > ICE_AQC_PORT_OPT_COUNT_M)
+		return -EINVAL;
+
+	cmd = &desc.params.set_port_option;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_option);
+
+	if (lport_valid)
+		cmd->lport_num = lport;
+
+	cmd->lport_num_valid = lport_valid;
+	cmd->selected_port_option = new_option;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_aq_sff_eeprom
+ * @hw: pointer to the HW struct
+ * @lport: bits [7:0] = logical port, bit [8] = logical port valid
+ * @bus_addr: I2C bus address of the eeprom (typically 0xA0, 0=topo default)
+ * @mem_addr: I2C offset. lower 8 bits for address, 8 upper bits zero padding.
+ * @page: QSFP page
+ * @set_page: set or ignore the page
+ * @data: pointer to data buffer to be read/written to the I2C device.
+ * @length: 1-16 for read, 1 for write.
+ * @write: 0 read, 1 for write.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read/Write SFF EEPROM (0x06EE)
+ */
+int
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_sff_eeprom *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	if (!data || (mem_addr & 0xff00))
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
+	cmd = &desc.params.read_write_sff_param;
+	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->lport_num = (u8)(lport & 0xff);
+	cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
+	cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
+					 ICE_AQC_SFF_I2CBUS_7BIT_M) |
+					((set_page <<
+					  ICE_AQC_SFF_SET_EEPROM_PAGE_S) &
+					 ICE_AQC_SFF_SET_EEPROM_PAGE_M));
+	cmd->i2c_mem_addr = cpu_to_le16(mem_addr & 0xff);
+	cmd->eeprom_page = cpu_to_le16((u16)page << ICE_AQC_SFF_EEPROM_PAGE_S);
+	if (write)
+		cmd->i2c_bus_addr |= cpu_to_le16(ICE_AQC_SFF_IS_WRITE);
+
+	status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+	return status;
+}
+
+static enum ice_lut_size ice_lut_type_to_size(enum ice_lut_type type)
+{
+	switch (type) {
+	case ICE_LUT_VSI:
+		return ICE_LUT_VSI_SIZE;
+	case ICE_LUT_GLOBAL:
+		return ICE_LUT_GLOBAL_SIZE;
+	case ICE_LUT_PF:
+		return ICE_LUT_PF_SIZE;
+	}
+	WARN_ONCE(1, "incorrect type passed");
+	return ICE_LUT_VSI_SIZE;
+}
+
+static enum ice_aqc_lut_flags ice_lut_size_to_flag(enum ice_lut_size size)
+{
+	switch (size) {
+	case ICE_LUT_VSI_SIZE:
+		return ICE_AQC_LUT_SIZE_SMALL;
+	case ICE_LUT_GLOBAL_SIZE:
+		return ICE_AQC_LUT_SIZE_512;
+	case ICE_LUT_PF_SIZE:
+		return ICE_AQC_LUT_SIZE_2K;
+	}
+	WARN_ONCE(1, "incorrect size passed");
+	return 0;
+}
+
+/**
+ * __ice_aq_get_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @params: RSS LUT parameters
+ * @set: set true to set the table, false to get the table
+ *
+ * Internal function to get (0x0B05) or set (0x0B03) RSS look up table
+ */
+static int
+__ice_aq_get_set_rss_lut(struct ice_hw *hw,
+			 struct ice_aq_get_set_rss_lut_params *params, bool set)
+{
+	u16 opcode, vsi_id, vsi_handle = params->vsi_handle, glob_lut_idx = 0;
+	enum ice_lut_type lut_type = params->lut_type;
+	struct ice_aqc_get_set_rss_lut *desc_params;
+	enum ice_aqc_lut_flags flags;
+	enum ice_lut_size lut_size;
+	struct ice_aq_desc desc;
+	u8 *lut = params->lut;
+
+
+	if (!lut || !ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	lut_size = ice_lut_type_to_size(lut_type);
+	if (lut_size > params->lut_size)
+		return -EINVAL;
+	else if (set && lut_size != params->lut_size)
+		return -EINVAL;
+
+	opcode = set ? ice_aqc_opc_set_rss_lut : ice_aqc_opc_get_rss_lut;
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+	if (set)
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	desc_params = &desc.params.get_set_rss_lut;
+	vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+	desc_params->vsi_id = cpu_to_le16(vsi_id | ICE_AQC_RSS_VSI_VALID);
+
+	if (lut_type == ICE_LUT_GLOBAL)
+		glob_lut_idx = FIELD_PREP(ICE_AQC_LUT_GLOBAL_IDX,
+					  params->global_lut_id);
+
+	flags = lut_type | glob_lut_idx | ice_lut_size_to_flag(lut_size);
+	desc_params->flags = cpu_to_le16(flags);
+
+	return ice_aq_send_cmd(hw, &desc, lut, lut_size, NULL);
+}
+
+/**
+ * ice_aq_get_rss_lut
+ * @hw: pointer to the hardware structure
+ * @get_params: RSS LUT parameters used to specify which RSS LUT to get
+ *
+ * get the RSS lookup table, PF or VSI type
+ */
+int
+ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params)
+{
+	return __ice_aq_get_set_rss_lut(hw, get_params, false);
+}
+
+/**
+ * ice_aq_set_rss_lut
+ * @hw: pointer to the hardware structure
+ * @set_params: RSS LUT parameters used to specify how to set the RSS LUT
+ *
+ * set the RSS lookup table, PF or VSI type
+ */
+int
+ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params)
+{
+	return __ice_aq_get_set_rss_lut(hw, set_params, true);
+}
+
+/**
+ * __ice_aq_get_set_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_id: VSI FW index
+ * @key: pointer to key info struct
+ * @set: set true to set the key, false to get the key
+ *
+ * get (0x0B04) or set (0x0B02) the RSS key per VSI
+ */
+static int
+__ice_aq_get_set_rss_key(struct ice_hw *hw, u16 vsi_id,
+			 struct ice_aqc_get_set_rss_keys *key, bool set)
+{
+	struct ice_aqc_get_set_rss_key *desc_params;
+	u16 key_size = sizeof(*key);
+	struct ice_aq_desc desc;
+
+	if (set) {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_rss_key);
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	} else {
+		ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_rss_key);
+	}
+
+	desc_params = &desc.params.get_set_rss_key;
+	desc_params->vsi_id = cpu_to_le16(vsi_id | ICE_AQC_RSS_VSI_VALID);
+
+	return ice_aq_send_cmd(hw, &desc, key, key_size, NULL);
+}
+
+/**
+ * ice_aq_get_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @key: pointer to key info struct
+ *
+ * get the RSS key per VSI
+ */
+int
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *key)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !key)
+		return -EINVAL;
+
+	return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					key, false);
+}
+
+/**
+ * ice_aq_set_rss_key
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @keys: pointer to key info struct
+ *
+ * set the RSS key per VSI
+ */
+int
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *keys)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle) || !keys)
+		return -EINVAL;
+
+	return __ice_aq_get_set_rss_key(hw, ice_get_hw_vsi_num(hw, vsi_handle),
+					keys, true);
+}
+
+/**
+ * ice_aq_add_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: Number of added queue groups
+ * @qg_list: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx LAN queue (0x0C30)
+ *
+ * NOTE:
+ * Prior to calling add Tx LAN queue:
+ * Initialize the following as part of the Tx queue context:
+ * Completion queue ID if the queue uses Completion queue, Quanta profile,
+ * Cache profile and Packet shaper profile.
+ *
+ * After add Tx LAN queue AQ command is completed:
+ * Interrupts should be associated with specific queues,
+ * Association of Tx queue to Doorbell queue is not part of Add LAN Tx queue
+ * flow.
+ */
+static int
+ice_aq_add_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_add_tx_qgrp *qg_list, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_tx_qgrp *list;
+	struct ice_aqc_add_txqs *cmd;
+	struct ice_aq_desc desc;
+	u16 i, sum_size = 0;
+
+	cmd = &desc.params.add_txqs;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_txqs);
+
+	if (!qg_list)
+		return -EINVAL;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return -EINVAL;
+
+	for (i = 0, list = qg_list; i < num_qgrps; i++) {
+		sum_size += struct_size(list, txqs, list->num_txqs);
+		list = (struct ice_aqc_add_tx_qgrp *)(list->txqs +
+						      list->num_txqs);
+	}
+
+	if (buf_size != sum_size)
+		return -EINVAL;
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_qgrps = num_qgrps;
+
+	return ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+}
+
+/**
+ * ice_aq_dis_lan_txq
+ * @hw: pointer to the hardware structure
+ * @num_qgrps: number of groups in the list
+ * @qg_list: the list of groups to disable
+ * @buf_size: the total size of the qg_list buffer in bytes
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
+ * @cd: pointer to command details structure or NULL
+ *
+ * Disable LAN Tx queue (0x0C31)
+ */
+static int
+ice_aq_dis_lan_txq(struct ice_hw *hw, u8 num_qgrps,
+		   struct ice_aqc_dis_txq_item *qg_list, u16 buf_size,
+		   enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_dis_txq_item *item;
+	struct ice_aqc_dis_txqs *cmd;
+	struct ice_aq_desc desc;
+	u16 i, sz = 0;
+	int status;
+
+	cmd = &desc.params.dis_txqs;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_dis_txqs);
+
+	/* qg_list can be NULL only in VM/VF reset flow */
+	if (!qg_list && !rst_src)
+		return -EINVAL;
+
+	if (num_qgrps > ICE_LAN_TXQ_MAX_QGRPS)
+		return -EINVAL;
+
+	cmd->num_entries = num_qgrps;
+
+	cmd->vmvf_and_timeout = cpu_to_le16((5 << ICE_AQC_Q_DIS_TIMEOUT_S) &
+					    ICE_AQC_Q_DIS_TIMEOUT_M);
+
+	switch (rst_src) {
+	case ICE_VM_RESET:
+		cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VM_RESET;
+		cmd->vmvf_and_timeout |=
+			cpu_to_le16(vmvf_num & ICE_AQC_Q_DIS_VMVF_NUM_M);
+		break;
+	case ICE_VF_RESET:
+		cmd->cmd_type = ICE_AQC_Q_DIS_CMD_VF_RESET;
+		/* In this case, FW expects vmvf_num to be absolute VF ID */
+		cmd->vmvf_and_timeout |=
+			cpu_to_le16((vmvf_num + hw->func_caps.vf_base_id) &
+				    ICE_AQC_Q_DIS_VMVF_NUM_M);
+		break;
+	case ICE_NO_RESET:
+	default:
+		break;
+	}
+
+	/* flush pipe on time out */
+	cmd->cmd_type |= ICE_AQC_Q_DIS_CMD_FLUSH_PIPE;
+	/* If no queue group info, we are in a reset flow. Issue the AQ */
+	if (!qg_list)
+		goto do_aq;
+
+	/* set RD bit to indicate that command buffer is provided by the driver
+	 * and it needs to be read by the firmware
+	 */
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	for (i = 0, item = qg_list; i < num_qgrps; i++) {
+		u16 item_size = struct_size(item, q_id, item->num_qs);
+
+		/* If the num of queues is even, add 2 bytes of padding */
+		if ((item->num_qs % 2) == 0)
+			item_size += 2;
+
+		sz += item_size;
+
+		item = (struct ice_aqc_dis_txq_item *)((u8 *)item + item_size);
+	}
+
+	if (buf_size != sz)
+		return -EINVAL;
+
+do_aq:
+	status = ice_aq_send_cmd(hw, &desc, qg_list, buf_size, cd);
+	if (status) {
+		if (!qg_list)
+			ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n",
+				  vmvf_num, hw->adminq.sq_last_status);
+		else
+			ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n",
+				  le16_to_cpu(qg_list[0].q_id[0]),
+				  hw->adminq.sq_last_status);
+	}
+	return status;
+}
+
+/**
+ * ice_aq_cfg_lan_txq
+ * @hw: pointer to the hardware structure
+ * @buf: buffer for command
+ * @buf_size: size of buffer in bytes
+ * @num_qs: number of queues being configured
+ * @oldport: origination lport
+ * @newport: destination lport
+ * @cd: pointer to command details structure or NULL
+ *
+ * Move/Configure LAN Tx queue (0x0C32)
+ *
+ * There is a better AQ command to use for moving nodes, so only coding
+ * this one for configuring the node.
+ */
+int
+ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf,
+		   u16 buf_size, u16 num_qs, u8 oldport, u8 newport,
+		   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_cfg_txqs *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.cfg_txqs;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_cfg_txqs);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (!buf)
+		return -EINVAL;
+
+	cmd->cmd_type = ICE_AQC_Q_CFG_TC_CHNG;
+	cmd->num_qs = num_qs;
+	cmd->port_num_chng = (oldport & ICE_AQC_Q_CFG_SRC_PRT_M);
+	cmd->port_num_chng |= (newport << ICE_AQC_Q_CFG_DST_PRT_S) &
+			      ICE_AQC_Q_CFG_DST_PRT_M;
+	cmd->time_out = (5 << ICE_AQC_Q_CFG_TIMEOUT_S) &
+			ICE_AQC_Q_CFG_TIMEOUT_M;
+	cmd->blocked_cgds = 0;
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (status)
+		ice_debug(hw, ICE_DBG_SCHED, "Failed to reconfigure nodes %d\n",
+			  hw->adminq.sq_last_status);
+	return status;
+}
+
+/**
+ * ice_aq_add_rdma_qsets
+ * @hw: pointer to the hardware structure
+ * @num_qset_grps: Number of RDMA Qset groups
+ * @qset_list: list of Qset groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add Tx RDMA Qsets (0x0C33)
+ */
+static int
+ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps,
+		      struct ice_aqc_add_rdma_qset_data *qset_list,
+		      u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_rdma_qset_data *list;
+	struct ice_aqc_add_rdma_qset *cmd;
+	struct ice_aq_desc desc;
+	u16 i, sum_size = 0;
+
+	cmd = &desc.params.add_rdma_qset;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_rdma_qset);
+
+	if (num_qset_grps > ICE_LAN_TXQ_MAX_QGRPS)
+		return -EINVAL;
+
+	for (i = 0, list = qset_list; i < num_qset_grps; i++) {
+		u16 num_qsets = le16_to_cpu(list->num_qsets);
+
+		sum_size += struct_size(list, rdma_qsets, num_qsets);
+		list = (struct ice_aqc_add_rdma_qset_data *)(list->rdma_qsets +
+							     num_qsets);
+	}
+
+	if (buf_size != sum_size)
+		return -EINVAL;
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->num_qset_grps = num_qset_grps;
+
+	return ice_aq_send_cmd(hw, &desc, qset_list, buf_size, cd);
+}
+
+/* End of FW Admin Queue command wrappers */
+
+/**
+ * ice_write_byte - write a byte to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_byte(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u8 src_byte, dest_byte, mask;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = (u8)(BIT(ce_info->width) - 1);
+
+	src_byte = *from;
+	src_byte &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_byte <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_byte, dest, sizeof(dest_byte));
+
+	dest_byte &= ~mask;	/* get the bits not changing */
+	dest_byte |= src_byte;	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_byte, sizeof(dest_byte));
+}
+
+/**
+ * ice_write_word - write a word to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_word(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u16 src_word, mask;
+	__le16 dest_word;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+	mask = BIT(ce_info->width) - 1;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_word = *(u16 *)from;
+	src_word &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_word <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_word, dest, sizeof(dest_word));
+
+	dest_word &= ~(cpu_to_le16(mask));	/* get the bits not changing */
+	dest_word |= cpu_to_le16(src_word);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_word, sizeof(dest_word));
+}
+
+/**
+ * ice_write_dword - write a dword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_dword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u32 src_dword, mask;
+	__le32 dest_dword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 32 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 5 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 32)
+		mask = BIT(ce_info->width) - 1;
+	else
+		mask = (u32)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_dword = *(u32 *)from;
+	src_dword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_dword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_dword, dest, sizeof(dest_dword));
+
+	dest_dword &= ~(cpu_to_le32(mask));	/* get the bits not changing */
+	dest_dword |= cpu_to_le32(src_dword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_dword, sizeof(dest_dword));
+}
+
+/**
+ * ice_write_qword - write a qword to a packed context structure
+ * @src_ctx:  the context structure to read from
+ * @dest_ctx: the context to be written to
+ * @ce_info:  a description of the struct to be filled
+ */
+static void
+ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+{
+	u64 src_qword, mask;
+	__le64 dest_qword;
+	u8 *from, *dest;
+	u16 shift_width;
+
+	/* copy from the next struct field */
+	from = src_ctx + ce_info->offset;
+
+	/* prepare the bits and mask */
+	shift_width = ce_info->lsb % 8;
+
+	/* if the field width is exactly 64 on an x86 machine, then the shift
+	 * operation will not work because the SHL instructions count is masked
+	 * to 6 bits so the shift will do nothing
+	 */
+	if (ce_info->width < 64)
+		mask = BIT_ULL(ce_info->width) - 1;
+	else
+		mask = (u64)~0;
+
+	/* don't swizzle the bits until after the mask because the mask bits
+	 * will be in a different bit position on big endian machines
+	 */
+	src_qword = *(u64 *)from;
+	src_qword &= mask;
+
+	/* shift to correct alignment */
+	mask <<= shift_width;
+	src_qword <<= shift_width;
+
+	/* get the current bits from the target bit string */
+	dest = dest_ctx + (ce_info->lsb / 8);
+
+	memcpy(&dest_qword, dest, sizeof(dest_qword));
+
+	dest_qword &= ~(cpu_to_le64(mask));	/* get the bits not changing */
+	dest_qword |= cpu_to_le64(src_qword);	/* add in the new bits */
+
+	/* put it all back */
+	memcpy(dest, &dest_qword, sizeof(dest_qword));
+}
+
+/**
+ * ice_set_ctx - set context bits in packed structure
+ * @hw: pointer to the hardware structure
+ * @src_ctx:  pointer to a generic non-packed context structure
+ * @dest_ctx: pointer to memory for the packed structure
+ * @ce_info:  a description of the structure to be transformed
+ */
+int
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+	    const struct ice_ctx_ele *ce_info)
+{
+	int f;
+
+	for (f = 0; ce_info[f].width; f++) {
+		/* We have to deal with each element of the FW response
+		 * using the correct size so that we are correct regardless
+		 * of the endianness of the machine.
+		 */
+		if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) {
+			ice_debug(hw, ICE_DBG_QCTX, "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n",
+				  f, ce_info[f].width, ce_info[f].size_of);
+			continue;
+		}
+		switch (ce_info[f].size_of) {
+		case sizeof(u8):
+			ice_write_byte(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u16):
+			ice_write_word(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u32):
+			ice_write_dword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		case sizeof(u64):
+			ice_write_qword(src_ctx, dest_ctx, &ce_info[f]);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
+ */
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle)
+{
+	struct ice_vsi_ctx *vsi;
+	struct ice_q_ctx *q_ctx;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi)
+		return NULL;
+	if (q_handle >= vsi->num_lan_q_entries[tc])
+		return NULL;
+	if (!vsi->lan_q_ctx[tc])
+		return NULL;
+	q_ctx = vsi->lan_q_ctx[tc];
+	return &q_ctx[q_handle];
+}
+
+/**
+ * ice_ena_vsi_txq
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @q_handle: software queue handle
+ * @num_qgrps: Number of added queue groups
+ * @buf: list of queue groups to be added
+ * @buf_size: size of buffer for indirect command
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function adds one LAN queue
+ */
+int
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+		u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aqc_txsched_elem_data node = { 0 };
+	struct ice_sched_node *parent;
+	struct ice_q_ctx *q_ctx;
+	struct ice_hw *hw;
+	int status;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return -EIO;
+
+	if (num_qgrps > 1 || buf->num_txqs > 1)
+		return -ENOSPC;
+
+	hw = pi->hw;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	mutex_lock(&pi->sched_lock);
+
+	q_ctx = ice_get_lan_q_ctx(hw, vsi_handle, tc, q_handle);
+	if (!q_ctx) {
+		ice_debug(hw, ICE_DBG_SCHED, "Enaq: invalid queue handle %d\n",
+			  q_handle);
+		status = -EINVAL;
+		goto ena_txq_exit;
+	}
+
+	/* find a parent node */
+	parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
+					    ICE_SCHED_NODE_OWNER_LAN);
+	if (!parent) {
+		status = -EINVAL;
+		goto ena_txq_exit;
+	}
+
+	buf->parent_teid = parent->info.node_teid;
+	node.parent_teid = parent->info.node_teid;
+	/* Mark that the values in the "generic" section as valid. The default
+	 * value in the "generic" section is zero. This means that :
+	 * - Scheduling mode is Bytes Per Second (BPS), indicated by Bit 0.
+	 * - 0 priority among siblings, indicated by Bit 1-3.
+	 * - WFQ, indicated by Bit 4.
+	 * - 0 Adjustment value is used in PSM credit update flow, indicated by
+	 * Bit 5-6.
+	 * - Bit 7 is reserved.
+	 * Without setting the generic section as valid in valid_sections, the
+	 * Admin queue command will fail with error code ICE_AQ_RC_EINVAL.
+	 */
+	buf->txqs[0].info.valid_sections =
+		ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+		ICE_AQC_ELEM_VALID_EIR;
+	buf->txqs[0].info.generic = 0;
+	buf->txqs[0].info.cir_bw.bw_profile_idx =
+		cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+	buf->txqs[0].info.cir_bw.bw_alloc =
+		cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+	buf->txqs[0].info.eir_bw.bw_profile_idx =
+		cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+	buf->txqs[0].info.eir_bw.bw_alloc =
+		cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+
+	/* add the LAN queue */
+	status = ice_aq_add_lan_txq(hw, num_qgrps, buf, buf_size, cd);
+	if (status) {
+		ice_debug(hw, ICE_DBG_SCHED, "enable queue %d failed %d\n",
+			  le16_to_cpu(buf->txqs[0].txq_id),
+			  hw->adminq.sq_last_status);
+		goto ena_txq_exit;
+	}
+
+	node.node_teid = buf->txqs[0].q_teid;
+	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+	q_ctx->q_handle = q_handle;
+	q_ctx->q_teid = le32_to_cpu(node.node_teid);
+
+	/* add a leaf node into scheduler tree queue layer */
+	status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node, NULL);
+	if (!status)
+		status = ice_sched_replay_q_bw(pi, q_ctx);
+
+ena_txq_exit:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_dis_vsi_txq
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @num_queues: number of queues
+ * @q_handles: pointer to software queue handle array
+ * @q_ids: pointer to the q_id array
+ * @q_teids: pointer to queue node teids
+ * @rst_src: if called due to reset, specifies the reset source
+ * @vmvf_num: the relative VM or VF number that is undergoing the reset
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function removes queues and their corresponding nodes in SW DB
+ */
+int
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+		u16 *q_handles, u16 *q_ids, u32 *q_teids,
+		enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aqc_dis_txq_item *qg_list;
+	struct ice_q_ctx *q_ctx;
+	int status = -ENOENT;
+	struct ice_hw *hw;
+	u16 i, buf_size;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return -EIO;
+
+	hw = pi->hw;
+
+	if (!num_queues) {
+		/* if queue is disabled already yet the disable queue command
+		 * has to be sent to complete the VF reset, then call
+		 * ice_aq_dis_lan_txq without any queue information
+		 */
+		if (rst_src)
+			return ice_aq_dis_lan_txq(hw, 0, NULL, 0, rst_src,
+						  vmvf_num, NULL);
+		return -EIO;
+	}
+
+	buf_size = struct_size(qg_list, q_id, 1);
+	qg_list = kzalloc(buf_size, GFP_KERNEL);
+	if (!qg_list)
+		return -ENOMEM;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < num_queues; i++) {
+		struct ice_sched_node *node;
+
+		node = ice_sched_find_node_by_teid(pi->root, q_teids[i]);
+		if (!node)
+			continue;
+		q_ctx = ice_get_lan_q_ctx(hw, vsi_handle, tc, q_handles[i]);
+		if (!q_ctx) {
+			ice_debug(hw, ICE_DBG_SCHED, "invalid queue handle%d\n",
+				  q_handles[i]);
+			continue;
+		}
+		if (q_ctx->q_handle != q_handles[i]) {
+			ice_debug(hw, ICE_DBG_SCHED, "Err:handles %d %d\n",
+				  q_ctx->q_handle, q_handles[i]);
+			continue;
+		}
+		qg_list->parent_teid = node->info.parent_teid;
+		qg_list->num_qs = 1;
+		qg_list->q_id[0] = cpu_to_le16(q_ids[i]);
+		status = ice_aq_dis_lan_txq(hw, 1, qg_list, buf_size, rst_src,
+					    vmvf_num, cd);
+
+		if (status)
+			break;
+		ice_free_sched_node(pi, node);
+		q_ctx->q_handle = ICE_INVAL_Q_HANDLE;
+		q_ctx->q_teid = ICE_INVAL_TEID;
+	}
+	mutex_unlock(&pi->sched_lock);
+	kfree(qg_list);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_qs - configure the new/existing VSI queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @maxqs: max queues array per TC
+ * @owner: LAN or RDMA
+ *
+ * This function adds/updates the VSI queues per TC.
+ */
+static int
+ice_cfg_vsi_qs(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+	       u16 *maxqs, u8 owner)
+{
+	int status = 0;
+	u8 i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return -EIO;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return -EINVAL;
+
+	mutex_lock(&pi->sched_lock);
+
+	ice_for_each_traffic_class(i) {
+		/* configuration is possible only if TC node is present */
+		if (!ice_sched_get_tc_node(pi, i))
+			continue;
+
+		status = ice_sched_cfg_vsi(pi, vsi_handle, i, maxqs[i], owner,
+					   ice_is_tc_ena(tc_bitmap, i));
+		if (status)
+			break;
+	}
+
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_lan - configure VSI LAN queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @max_lanqs: max LAN queues array per TC
+ *
+ * This function adds/updates the VSI LAN queues per TC.
+ */
+int
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+		u16 *max_lanqs)
+{
+	return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_lanqs,
+			      ICE_SCHED_NODE_OWNER_LAN);
+}
+
+/**
+ * ice_cfg_vsi_rdma - configure the VSI RDMA queues
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap
+ * @max_rdmaqs: max RDMA queues array per TC
+ *
+ * This function adds/updates the VSI RDMA queues per TC.
+ */
+int
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+		 u16 *max_rdmaqs)
+{
+	return ice_cfg_vsi_qs(pi, vsi_handle, tc_bitmap, max_rdmaqs,
+			      ICE_SCHED_NODE_OWNER_RDMA);
+}
+
+/**
+ * ice_ena_vsi_rdma_qset
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @rdma_qset: pointer to RDMA Qset
+ * @num_qsets: number of RDMA Qsets
+ * @qset_teid: pointer to Qset node TEIDs
+ *
+ * This function adds RDMA Qset
+ */
+int
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 *rdma_qset, u16 num_qsets, u32 *qset_teid)
+{
+	struct ice_aqc_txsched_elem_data node = { 0 };
+	struct ice_aqc_add_rdma_qset_data *buf;
+	struct ice_sched_node *parent;
+	struct ice_hw *hw;
+	u16 i, buf_size;
+	int ret;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return -EIO;
+	hw = pi->hw;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	buf_size = struct_size(buf, rdma_qsets, num_qsets);
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	mutex_lock(&pi->sched_lock);
+
+	parent = ice_sched_get_free_qparent(pi, vsi_handle, tc,
+					    ICE_SCHED_NODE_OWNER_RDMA);
+	if (!parent) {
+		ret = -EINVAL;
+		goto rdma_error_exit;
+	}
+	buf->parent_teid = parent->info.node_teid;
+	node.parent_teid = parent->info.node_teid;
+
+	buf->num_qsets = cpu_to_le16(num_qsets);
+	for (i = 0; i < num_qsets; i++) {
+		buf->rdma_qsets[i].tx_qset_id = cpu_to_le16(rdma_qset[i]);
+		buf->rdma_qsets[i].info.valid_sections =
+			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+			ICE_AQC_ELEM_VALID_EIR;
+		buf->rdma_qsets[i].info.generic = 0;
+		buf->rdma_qsets[i].info.cir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->rdma_qsets[i].info.cir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+		buf->rdma_qsets[i].info.eir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->rdma_qsets[i].info.eir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+	}
+	ret = ice_aq_add_rdma_qsets(hw, 1, buf, buf_size, NULL);
+	if (ret) {
+		ice_debug(hw, ICE_DBG_RDMA, "add RDMA qset failed\n");
+		goto rdma_error_exit;
+	}
+	node.data.elem_type = ICE_AQC_ELEM_TYPE_LEAF;
+	for (i = 0; i < num_qsets; i++) {
+		node.node_teid = buf->rdma_qsets[i].qset_teid;
+		ret = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1,
+					 &node, NULL);
+		if (ret)
+			break;
+		qset_teid[i] = le32_to_cpu(node.node_teid);
+	}
+rdma_error_exit:
+	mutex_unlock(&pi->sched_lock);
+	kfree(buf);
+	return ret;
+}
+
+/**
+ * ice_dis_vsi_rdma_qset - free RDMA resources
+ * @pi: port_info struct
+ * @count: number of RDMA Qsets to free
+ * @qset_teid: TEID of Qset node
+ * @q_id: list of queue IDs being disabled
+ */
+int
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+		      u16 *q_id)
+{
+	struct ice_aqc_dis_txq_item *qg_list;
+	struct ice_hw *hw;
+	int status = 0;
+	u16 qg_size;
+	int i;
+
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return -EIO;
+
+	hw = pi->hw;
+
+	qg_size = struct_size(qg_list, q_id, 1);
+	qg_list = kzalloc(qg_size, GFP_KERNEL);
+	if (!qg_list)
+		return -ENOMEM;
+
+	mutex_lock(&pi->sched_lock);
+
+	for (i = 0; i < count; i++) {
+		struct ice_sched_node *node;
+
+		node = ice_sched_find_node_by_teid(pi->root, qset_teid[i]);
+		if (!node)
+			continue;
+
+		qg_list->parent_teid = node->info.parent_teid;
+		qg_list->num_qs = 1;
+		qg_list->q_id[0] =
+			cpu_to_le16(q_id[i] |
+				    ICE_AQC_Q_DIS_BUF_ELEM_TYPE_RDMA_QSET);
+
+		status = ice_aq_dis_lan_txq(hw, 1, qg_list, qg_size,
+					    ICE_NO_RESET, 0, NULL);
+		if (status)
+			break;
+
+		ice_free_sched_node(pi, node);
+	}
+
+	mutex_unlock(&pi->sched_lock);
+	kfree(qg_list);
+	return status;
+}
+
+/**
+ * ice_replay_pre_init - replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * Initializes required config data for VSI, FD, ACL, and RSS before replay.
+ */
+static int ice_replay_pre_init(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	u8 i;
+
+	/* Delete old entries from replay filter list head if there is any */
+	ice_rm_all_sw_replay_rule_info(hw);
+	/* In start of replay, move entries into replay_rules list, it
+	 * will allow adding rules entries back to filt_rules list,
+	 * which is operational list.
+	 */
+	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++)
+		list_replace_init(&sw->recp_list[i].filt_rules,
+				  &sw->recp_list[i].filt_replay_rules);
+	ice_sched_replay_agg_vsi_preinit(hw);
+
+	return 0;
+}
+
+/**
+ * ice_replay_vsi - replay VSI configuration
+ * @hw: pointer to the HW struct
+ * @vsi_handle: driver VSI handle
+ *
+ * Restore all VSI configuration after reset. It is required to call this
+ * function with main VSI first.
+ */
+int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle)
+{
+	int status;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	/* Replay pre-initialization if there is any */
+	if (vsi_handle == ICE_MAIN_VSI_HANDLE) {
+		status = ice_replay_pre_init(hw);
+		if (status)
+			return status;
+	}
+	/* Replay per VSI all RSS configurations */
+	status = ice_replay_rss_cfg(hw, vsi_handle);
+	if (status)
+		return status;
+	/* Replay per VSI all filters */
+	status = ice_replay_vsi_all_fltr(hw, vsi_handle);
+	if (!status)
+		status = ice_replay_vsi_agg(hw, vsi_handle);
+	return status;
+}
+
+/**
+ * ice_replay_post - post replay configuration cleanup
+ * @hw: pointer to the HW struct
+ *
+ * Post replay cleanup.
+ */
+void ice_replay_post(struct ice_hw *hw)
+{
+	/* Delete old entries from replay filter list head */
+	ice_rm_all_sw_replay_rule_info(hw);
+	ice_sched_replay_agg(hw);
+}
+
+/**
+ * ice_stat_update40 - read 40 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of 64 bit HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat)
+{
+	u64 new_data = rd64(hw, reg) & (BIT_ULL(40) - 1);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. Thus, save the value from the first read
+	 * without adding to the statistic value so that we report stats which
+	 * count up from zero.
+	 */
+	if (!prev_stat_loaded) {
+		*prev_stat = new_data;
+		return;
+	}
+
+	/* Calculate the difference between the new and old values, and then
+	 * add it to the software stat value.
+	 */
+	if (new_data >= *prev_stat)
+		*cur_stat += new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat += (new_data + BIT_ULL(40)) - *prev_stat;
+
+	/* Update the previously stored value to prepare for next read */
+	*prev_stat = new_data;
+}
+
+/**
+ * ice_stat_update32 - read 32 bit stat from the chip and update stat values
+ * @hw: ptr to the hardware info
+ * @reg: offset of HW register to read from
+ * @prev_stat_loaded: bool to specify if previous stats are loaded
+ * @prev_stat: ptr to previous loaded stat value
+ * @cur_stat: ptr to current stat value
+ */
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat)
+{
+	u32 new_data;
+
+	new_data = rd32(hw, reg);
+
+	/* device stats are not reset at PFR, they likely will not be zeroed
+	 * when the driver starts. Thus, save the value from the first read
+	 * without adding to the statistic value so that we report stats which
+	 * count up from zero.
+	 */
+	if (!prev_stat_loaded) {
+		*prev_stat = new_data;
+		return;
+	}
+
+	/* Calculate the difference between the new and old values, and then
+	 * add it to the software stat value.
+	 */
+	if (new_data >= *prev_stat)
+		*cur_stat += new_data - *prev_stat;
+	else
+		/* to manage the potential roll-over */
+		*cur_stat += (new_data + BIT_ULL(32)) - *prev_stat;
+
+	/* Update the previously stored value to prepare for next read */
+	*prev_stat = new_data;
+}
+
+/**
+ * ice_sched_query_elem - query element information from HW
+ * @hw: pointer to the HW struct
+ * @node_teid: node TEID to be queried
+ * @buf: buffer to element information
+ *
+ * This function queries HW element information
+ */
+int
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+		     struct ice_aqc_txsched_elem_data *buf)
+{
+	u16 buf_size, num_elem_ret = 0;
+	int status;
+
+	buf_size = sizeof(*buf);
+	memset(buf, 0, buf_size);
+	buf->node_teid = cpu_to_le32(node_teid);
+	status = ice_aq_query_sched_elems(hw, 1, buf, buf_size, &num_elem_ret,
+					  NULL);
+	if (status || num_elem_ret != 1)
+		ice_debug(hw, ICE_DBG_SCHED, "query element failed\n");
+	return status;
+}
+
+/**
+ * ice_aq_read_i2c
+ * @hw: pointer to the hw struct
+ * @topo_addr: topology address for a device to communicate with
+ * @bus_addr: 7-bit I2C bus address
+ * @addr: I2C memory address (I2C offset) with up to 16 bits
+ * @params: I2C parameters: bit [7] - Repeated start,
+ *			    bits [6:5] data offset size,
+ *			    bit [4] - I2C address type,
+ *			    bits [3:0] - data size to read (0-16 bytes)
+ * @data: pointer to data (0 to 16 bytes) to be read from the I2C device
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read I2C (0x06E2)
+ */
+int
+ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+		u16 bus_addr, __le16 addr, u8 params, u8 *data,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc = { 0 };
+	struct ice_aqc_i2c *cmd;
+	u8 data_size;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_read_i2c);
+	cmd = &desc.params.read_write_i2c;
+
+	if (!data)
+		return -EINVAL;
+
+	data_size = FIELD_GET(ICE_AQC_I2C_DATA_SIZE_M, params);
+
+	cmd->i2c_bus_addr = cpu_to_le16(bus_addr);
+	cmd->topo_addr = topo_addr;
+	cmd->i2c_params = params;
+	cmd->i2c_addr = addr;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (!status) {
+		struct ice_aqc_read_i2c_resp *resp;
+		u8 i;
+
+		resp = &desc.params.read_i2c_resp;
+		for (i = 0; i < data_size; i++) {
+			*data = resp->i2c_data[i];
+			data++;
+		}
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_write_i2c
+ * @hw: pointer to the hw struct
+ * @topo_addr: topology address for a device to communicate with
+ * @bus_addr: 7-bit I2C bus address
+ * @addr: I2C memory address (I2C offset) with up to 16 bits
+ * @params: I2C parameters: bit [4] - I2C address type, bits [3:0] - data size to write (0-7 bytes)
+ * @data: pointer to data (0 to 4 bytes) to be written to the I2C device
+ * @cd: pointer to command details structure or NULL
+ *
+ * Write I2C (0x06E3)
+ *
+ * * Return:
+ * * 0             - Successful write to the i2c device
+ * * -EINVAL       - Data size greater than 4 bytes
+ * * -EIO          - FW error
+ */
+int
+ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+		 u16 bus_addr, __le16 addr, u8 params, const u8 *data,
+		 struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc = { 0 };
+	struct ice_aqc_i2c *cmd;
+	u8 data_size;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_write_i2c);
+	cmd = &desc.params.read_write_i2c;
+
+	data_size = FIELD_GET(ICE_AQC_I2C_DATA_SIZE_M, params);
+
+	/* data_size limited to 4 */
+	if (data_size > 4)
+		return -EINVAL;
+
+	cmd->i2c_bus_addr = cpu_to_le16(bus_addr);
+	cmd->topo_addr = topo_addr;
+	cmd->i2c_params = params;
+	cmd->i2c_addr = addr;
+
+	memcpy(cmd->i2c_data, data, data_size);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_set_driver_param - Set driver parameter to share via firmware
+ * @hw: pointer to the HW struct
+ * @idx: parameter index to set
+ * @value: the value to set the parameter to
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the value of one of the software defined parameters. All PFs connected
+ * to this device can read the value using ice_aq_get_driver_param.
+ *
+ * Note that firmware provides no synchronization or locking, and will not
+ * save the parameter value during a device reset. It is expected that
+ * a single PF will write the parameter value, while all other PFs will only
+ * read it.
+ */
+int
+ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 value, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_driver_shared_params *cmd;
+	struct ice_aq_desc desc;
+
+	if (idx >= ICE_AQC_DRIVER_PARAM_MAX)
+		return -EIO;
+
+	cmd = &desc.params.drv_shared_params;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params);
+
+	cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_SET;
+	cmd->param_indx = idx;
+	cmd->param_val = cpu_to_le32(value);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_driver_param - Get driver parameter shared via firmware
+ * @hw: pointer to the HW struct
+ * @idx: parameter index to set
+ * @value: storage to return the shared parameter
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get the value of one of the software defined parameters.
+ *
+ * Note that firmware provides no synchronization or locking. It is expected
+ * that only a single PF will write a given parameter.
+ */
+int
+ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 *value, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_driver_shared_params *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	if (idx >= ICE_AQC_DRIVER_PARAM_MAX)
+		return -EIO;
+
+	cmd = &desc.params.drv_shared_params;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_driver_shared_params);
+
+	cmd->set_or_get_op = ICE_AQC_DRIVER_PARAM_GET;
+	cmd->param_indx = idx;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (status)
+		return status;
+
+	*value = le32_to_cpu(cmd->param_val);
+
+	return 0;
+}
+
+/**
+ * ice_aq_set_gpio
+ * @hw: pointer to the hw struct
+ * @gpio_ctrl_handle: GPIO controller node handle
+ * @pin_idx: IO Number of the GPIO that needs to be set
+ * @value: SW provide IO value to set in the LSB
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sends 0x06EC AQ command to set the GPIO pin state that's part of the topology
+ */
+int
+ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aqc_gpio *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_gpio);
+	cmd = &desc.params.read_write_gpio;
+	cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle);
+	cmd->gpio_num = pin_idx;
+	cmd->gpio_val = value ? 1 : 0;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_gpio
+ * @hw: pointer to the hw struct
+ * @gpio_ctrl_handle: GPIO controller node handle
+ * @pin_idx: IO Number of the GPIO that needs to be set
+ * @value: IO value read
+ * @cd: pointer to command details structure or NULL
+ *
+ * Sends 0x06ED AQ command to get the value of a GPIO signal which is part of
+ * the topology
+ */
+int
+ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx,
+		bool *value, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_gpio *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_gpio);
+	cmd = &desc.params.read_write_gpio;
+	cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle);
+	cmd->gpio_num = pin_idx;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (status)
+		return status;
+
+	*value = !!cmd->gpio_val;
+	return 0;
+}
+
+/**
+ * ice_is_fw_api_min_ver
+ * @hw: pointer to the hardware structure
+ * @maj: major version
+ * @min: minor version
+ * @patch: patch version
+ *
+ * Checks if the firmware API is minimum version
+ */
+static bool ice_is_fw_api_min_ver(struct ice_hw *hw, u8 maj, u8 min, u8 patch)
+{
+	if (hw->api_maj_ver == maj) {
+		if (hw->api_min_ver > min)
+			return true;
+		if (hw->api_min_ver == min && hw->api_patch >= patch)
+			return true;
+	} else if (hw->api_maj_ver > maj) {
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_fw_supports_link_override
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the firmware supports link override
+ */
+bool ice_fw_supports_link_override(struct ice_hw *hw)
+{
+	return ice_is_fw_api_min_ver(hw, ICE_FW_API_LINK_OVERRIDE_MAJ,
+				     ICE_FW_API_LINK_OVERRIDE_MIN,
+				     ICE_FW_API_LINK_OVERRIDE_PATCH);
+}
+
+/**
+ * ice_get_link_default_override
+ * @ldo: pointer to the link default override struct
+ * @pi: pointer to the port info struct
+ *
+ * Gets the link default override for a port
+ */
+int
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+			      struct ice_port_info *pi)
+{
+	u16 i, tlv, tlv_len, tlv_start, buf, offset;
+	struct ice_hw *hw = pi->hw;
+	int status;
+
+	status = ice_get_pfa_module_tlv(hw, &tlv, &tlv_len,
+					ICE_SR_LINK_DEFAULT_OVERRIDE_PTR);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read link override TLV.\n");
+		return status;
+	}
+
+	/* Each port has its own config; calculate for our port */
+	tlv_start = tlv + pi->lport * ICE_SR_PFA_LINK_OVERRIDE_WORDS +
+		ICE_SR_PFA_LINK_OVERRIDE_OFFSET;
+
+	/* link options first */
+	status = ice_read_sr_word(hw, tlv_start, &buf);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read override link options.\n");
+		return status;
+	}
+	ldo->options = buf & ICE_LINK_OVERRIDE_OPT_M;
+	ldo->phy_config = (buf & ICE_LINK_OVERRIDE_PHY_CFG_M) >>
+		ICE_LINK_OVERRIDE_PHY_CFG_S;
+
+	/* link PHY config */
+	offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET;
+	status = ice_read_sr_word(hw, offset, &buf);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read override phy config.\n");
+		return status;
+	}
+	ldo->fec_options = buf & ICE_LINK_OVERRIDE_FEC_OPT_M;
+
+	/* PHY types low */
+	offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET;
+	for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+		status = ice_read_sr_word(hw, (offset + i), &buf);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read override link options.\n");
+			return status;
+		}
+		/* shift 16 bits at a time to fill 64 bits */
+		ldo->phy_type_low |= ((u64)buf << (i * 16));
+	}
+
+	/* PHY types high */
+	offset = tlv_start + ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET +
+		ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS;
+	for (i = 0; i < ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS; i++) {
+		status = ice_read_sr_word(hw, (offset + i), &buf);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read override link options.\n");
+			return status;
+		}
+		/* shift 16 bits at a time to fill 64 bits */
+		ldo->phy_type_high |= ((u64)buf << (i * 16));
+	}
+
+	return status;
+}
+
+/**
+ * ice_is_phy_caps_an_enabled - check if PHY capabilities autoneg is enabled
+ * @caps: get PHY capability data
+ */
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps)
+{
+	if (caps->caps & ICE_AQC_PHY_AN_MODE ||
+	    caps->low_power_ctrl_an & (ICE_AQC_PHY_AN_EN_CLAUSE28 |
+				       ICE_AQC_PHY_AN_EN_CLAUSE73 |
+				       ICE_AQC_PHY_AN_EN_CLAUSE37))
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_aq_set_lldp_mib - Set the LLDP MIB
+ * @hw: pointer to the HW struct
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set the LLDP MIB. (0x0A08)
+ */
+int
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_set_local_mib *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_set_mib;
+
+	if (buf_size == 0 || !buf)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_local_mib);
+
+	desc.flags |= cpu_to_le16((u16)ICE_AQ_FLAG_RD);
+	desc.datalen = cpu_to_le16(buf_size);
+
+	cmd->type = mib_type;
+	cmd->length = cpu_to_le16(buf_size);
+
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl
+ * @hw: pointer to HW struct
+ */
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
+{
+	if (hw->mac_type != ICE_MAC_E810)
+		return false;
+
+	return ice_is_fw_api_min_ver(hw, ICE_FW_API_LLDP_FLTR_MAJ,
+				     ICE_FW_API_LLDP_FLTR_MIN,
+				     ICE_FW_API_LLDP_FLTR_PATCH);
+}
+
+/**
+ * ice_lldp_fltr_add_remove - add or remove a LLDP Rx switch filter
+ * @hw: pointer to HW struct
+ * @vsi_num: absolute HW index for VSI
+ * @add: boolean for if adding or removing a filter
+ */
+int
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add)
+{
+	struct ice_aqc_lldp_filter_ctrl *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_filter_ctrl;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_filter_ctrl);
+
+	if (add)
+		cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_ADD;
+	else
+		cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_DELETE;
+
+	cmd->vsi_num = cpu_to_le16(vsi_num);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_lldp_execute_pending_mib - execute LLDP pending MIB request
+ * @hw: pointer to HW struct
+ */
+int ice_lldp_execute_pending_mib(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_execute_pending_mib);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/**
+ * ice_fw_supports_report_dflt_cfg
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the firmware supports report default configuration
+ */
+bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw)
+{
+	return ice_is_fw_api_min_ver(hw, ICE_FW_API_REPORT_DFLT_CFG_MAJ,
+				     ICE_FW_API_REPORT_DFLT_CFG_MIN,
+				     ICE_FW_API_REPORT_DFLT_CFG_PATCH);
+}
+
+/* each of the indexes into the following array match the speed of a return
+ * value from the list of AQ returned speeds like the range:
+ * ICE_AQ_LINK_SPEED_10MB .. ICE_AQ_LINK_SPEED_100GB excluding
+ * ICE_AQ_LINK_SPEED_UNKNOWN which is BIT(15) and maps to BIT(14) in this
+ * array. The array is defined as 15 elements long because the link_speed
+ * returned by the firmware is a 16 bit * value, but is indexed
+ * by [fls(speed) - 1]
+ */
+static const u32 ice_aq_to_link_speed[] = {
+	SPEED_10,	/* BIT(0) */
+	SPEED_100,
+	SPEED_1000,
+	SPEED_2500,
+	SPEED_5000,
+	SPEED_10000,
+	SPEED_20000,
+	SPEED_25000,
+	SPEED_40000,
+	SPEED_50000,
+	SPEED_100000,	/* BIT(10) */
+};
+
+/**
+ * ice_get_link_speed - get integer speed from table
+ * @index: array index from fls(aq speed) - 1
+ *
+ * Returns: u32 value containing integer speed
+ */
+u32 ice_get_link_speed(u16 index)
+{
+	if (index >= ARRAY_SIZE(ice_aq_to_link_speed))
+		return 0;
+
+	return ice_aq_to_link_speed[index];
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
new file mode 100644
index 0000000000..226b81f97a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_COMMON_H_
+#define _ICE_COMMON_H_
+
+#include <linux/bitfield.h>
+
+#include "ice_type.h"
+#include "ice_nvm.h"
+#include "ice_flex_pipe.h"
+#include <linux/avf/virtchnl.h>
+#include "ice_switch.h"
+#include "ice_fdir.h"
+
+#define ICE_SQ_SEND_DELAY_TIME_MS	10
+#define ICE_SQ_SEND_MAX_EXECUTE		3
+
+int ice_init_hw(struct ice_hw *hw);
+void ice_deinit_hw(struct ice_hw *hw);
+int ice_check_reset(struct ice_hw *hw);
+int ice_reset(struct ice_hw *hw, enum ice_reset_req req);
+int ice_create_all_ctrlq(struct ice_hw *hw);
+int ice_init_all_ctrlq(struct ice_hw *hw);
+void ice_shutdown_all_ctrlq(struct ice_hw *hw);
+void ice_destroy_all_ctrlq(struct ice_hw *hw);
+int
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending);
+int
+ice_get_link_status(struct ice_port_info *pi, bool *link_up);
+int ice_update_link_info(struct ice_port_info *pi);
+int
+ice_acquire_res(struct ice_hw *hw, enum ice_aq_res_ids res,
+		enum ice_aq_res_access_type access, u32 timeout);
+void ice_release_res(struct ice_hw *hw, enum ice_aq_res_ids res);
+int
+ice_alloc_hw_res(struct ice_hw *hw, u16 type, u16 num, bool btm, u16 *res);
+int
+ice_free_hw_res(struct ice_hw *hw, u16 type, u16 num, u16 *res);
+int ice_aq_alloc_free_res(struct ice_hw *hw,
+			  struct ice_aqc_alloc_free_res_elem *buf, u16 buf_size,
+			  enum ice_adminq_opc opc);
+bool ice_is_sbq_supported(struct ice_hw *hw);
+struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw);
+int
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
+void ice_clear_pxe_mode(struct ice_hw *hw);
+int ice_get_caps(struct ice_hw *hw);
+
+void ice_set_safe_mode_caps(struct ice_hw *hw);
+
+int
+ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
+		  u32 rxq_index);
+
+int
+ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params);
+int
+ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params);
+int
+ice_aq_get_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *keys);
+int
+ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle,
+		   struct ice_aqc_get_set_rss_keys *keys);
+
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq);
+int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
+extern const struct ice_ctx_ele ice_tlan_ctx_info[];
+int
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+	    const struct ice_ctx_ele *ce_info);
+
+extern struct mutex ice_global_cfg_lock_sw;
+
+int
+ice_aq_send_cmd(struct ice_hw *hw, struct ice_aq_desc *desc,
+		void *buf, u16 buf_size, struct ice_sq_cd *cd);
+int ice_aq_get_fw_ver(struct ice_hw *hw, struct ice_sq_cd *cd);
+
+int
+ice_aq_send_driver_ver(struct ice_hw *hw, struct ice_driver_ver *dv,
+		       struct ice_sq_cd *cd);
+int
+ice_aq_set_port_params(struct ice_port_info *pi, bool double_vlan,
+		       struct ice_sq_cd *cd);
+int
+ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode,
+		    struct ice_aqc_get_phy_caps_data *caps,
+		    struct ice_sq_cd *cd);
+bool ice_is_pf_c827(struct ice_hw *hw);
+int
+ice_aq_list_caps(struct ice_hw *hw, void *buf, u16 buf_size, u32 *cap_count,
+		 enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+int
+ice_discover_dev_caps(struct ice_hw *hw, struct ice_hw_dev_caps *dev_caps);
+void
+ice_update_phy_type(u64 *phy_type_low, u64 *phy_type_high,
+		    u16 link_speeds_bitmap);
+int
+ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
+			struct ice_sq_cd *cd);
+bool ice_is_e810(struct ice_hw *hw);
+int ice_clear_pf_cfg(struct ice_hw *hw);
+int
+ice_aq_set_phy_cfg(struct ice_hw *hw, struct ice_port_info *pi,
+		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd);
+bool ice_fw_supports_link_override(struct ice_hw *hw);
+int
+ice_get_link_default_override(struct ice_link_default_override_tlv *ldo,
+			      struct ice_port_info *pi);
+bool ice_is_phy_caps_an_enabled(struct ice_aqc_get_phy_caps_data *caps);
+
+enum ice_fc_mode ice_caps_to_fc_mode(u8 caps);
+enum ice_fec_mode ice_caps_to_fec_mode(u8 caps, u8 fec_options);
+int
+ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
+	   bool ena_auto_link_update);
+int
+ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+	       enum ice_fc_mode req_mode);
+bool
+ice_phy_caps_equals_cfg(struct ice_aqc_get_phy_caps_data *caps,
+			struct ice_aqc_set_phy_cfg_data *cfg);
+void
+ice_copy_phy_caps_to_cfg(struct ice_port_info *pi,
+			 struct ice_aqc_get_phy_caps_data *caps,
+			 struct ice_aqc_set_phy_cfg_data *cfg);
+int
+ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
+		enum ice_fec_mode fec);
+int
+ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
+			   struct ice_sq_cd *cd);
+int
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd);
+int
+ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
+		     struct ice_link_status *link, struct ice_sq_cd *cd);
+int
+ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
+		      struct ice_sq_cd *cd);
+int
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
+
+int
+ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
+		       struct ice_sq_cd *cd);
+int
+ice_aq_get_port_options(struct ice_hw *hw,
+			struct ice_aqc_get_port_options_elem *options,
+			u8 *option_count, u8 lport, bool lport_valid,
+			u8 *active_option_idx, bool *active_option_valid,
+			u8 *pending_option_idx, bool *pending_option_valid);
+int
+ice_aq_set_port_option(struct ice_hw *hw, u8 lport, u8 lport_valid,
+		       u8 new_option);
+int
+ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr,
+		  u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length,
+		  bool write, struct ice_sq_cd *cd);
+u32 ice_get_link_speed(u16 index);
+
+int
+ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap,
+		 u16 *max_rdmaqs);
+int
+ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 *rdma_qset, u16 num_qsets, u32 *qset_teid);
+int
+ice_dis_vsi_rdma_qset(struct ice_port_info *pi, u16 count, u32 *qset_teid,
+		      u16 *q_id);
+int
+ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
+		u16 *q_handle, u16 *q_ids, u32 *q_teids,
+		enum ice_disq_rst_src rst_src, u16 vmvf_num,
+		struct ice_sq_cd *cd);
+int
+ice_cfg_vsi_lan(struct ice_port_info *pi, u16 vsi_handle, u8 tc_bitmap,
+		u16 *max_lanqs);
+int
+ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle,
+		u8 num_qgrps, struct ice_aqc_add_tx_qgrp *buf, u16 buf_size,
+		struct ice_sq_cd *cd);
+int
+ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf,
+		   u16 buf_size, u16 num_qs, u8 oldport, u8 newport,
+		   struct ice_sq_cd *cd);
+int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle);
+void ice_replay_post(struct ice_hw *hw);
+void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf);
+struct ice_q_ctx *
+ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle);
+int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in);
+void
+ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat);
+void
+ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded,
+		  u64 *prev_stat, u64 *cur_stat);
+bool ice_is_e810t(struct ice_hw *hw);
+bool ice_is_e823(struct ice_hw *hw);
+int
+ice_sched_query_elem(struct ice_hw *hw, u32 node_teid,
+		     struct ice_aqc_txsched_elem_data *buf);
+int
+ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 value, struct ice_sq_cd *cd);
+int
+ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx,
+			u32 *value, struct ice_sq_cd *cd);
+int
+ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value,
+		struct ice_sq_cd *cd);
+int
+ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx,
+		bool *value, struct ice_sq_cd *cd);
+bool ice_is_100m_speed_supported(struct ice_hw *hw);
+int
+ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
+		    struct ice_sq_cd *cd);
+bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw);
+int
+ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add);
+int ice_lldp_execute_pending_mib(struct ice_hw *hw);
+int
+ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+		u16 bus_addr, __le16 addr, u8 params, u8 *data,
+		struct ice_sq_cd *cd);
+int
+ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
+		 u16 bus_addr, __le16 addr, u8 params, const u8 *data,
+		 struct ice_sq_cd *cd);
+bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw);
+#endif /* _ICE_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
new file mode 100644
index 0000000000..e7d2474c43
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -0,0 +1,1244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+
+#define ICE_CQ_INIT_REGS(qinfo, prefix)				\
+do {								\
+	(qinfo)->sq.head = prefix##_ATQH;			\
+	(qinfo)->sq.tail = prefix##_ATQT;			\
+	(qinfo)->sq.len = prefix##_ATQLEN;			\
+	(qinfo)->sq.bah = prefix##_ATQBAH;			\
+	(qinfo)->sq.bal = prefix##_ATQBAL;			\
+	(qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;	\
+	(qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M;	\
+	(qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M;	\
+	(qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;		\
+	(qinfo)->rq.head = prefix##_ARQH;			\
+	(qinfo)->rq.tail = prefix##_ARQT;			\
+	(qinfo)->rq.len = prefix##_ARQLEN;			\
+	(qinfo)->rq.bah = prefix##_ARQBAH;			\
+	(qinfo)->rq.bal = prefix##_ARQBAL;			\
+	(qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;	\
+	(qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M;	\
+	(qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M;	\
+	(qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;		\
+} while (0)
+
+/**
+ * ice_adminq_init_regs - Initialize AdminQ registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_adminq_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+
+	ICE_CQ_INIT_REGS(cq, PF_FW);
+}
+
+/**
+ * ice_mailbox_init_regs - Initialize Mailbox registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_mailbox_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->mailboxq;
+
+	ICE_CQ_INIT_REGS(cq, PF_MBX);
+}
+
+/**
+ * ice_sb_init_regs - Initialize Sideband registers
+ * @hw: pointer to the hardware structure
+ *
+ * This assumes the alloc_sq and alloc_rq functions have already been called
+ */
+static void ice_sb_init_regs(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->sbq;
+
+	ICE_CQ_INIT_REGS(cq, PF_SB);
+}
+
+/**
+ * ice_check_sq_alive
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if Queue is enabled else false.
+ */
+bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* check both queue-length and queue-enable fields */
+	if (cq->sq.len && cq->sq.len_mask && cq->sq.len_ena_mask)
+		return (rd32(hw, cq->sq.len) & (cq->sq.len_mask |
+						cq->sq.len_ena_mask)) ==
+			(cq->num_sq_entries | cq->sq.len_ena_mask);
+
+	return false;
+}
+
+/**
+ * ice_alloc_ctrlq_sq_ring - Allocate Control Transmit Queue (ATQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_ctrlq_sq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_sq_entries * sizeof(struct ice_aq_desc);
+
+	cq->sq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->sq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->sq.desc_buf.va)
+		return -ENOMEM;
+	cq->sq.desc_buf.size = size;
+
+	cq->sq.cmd_buf = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				      sizeof(struct ice_sq_cd), GFP_KERNEL);
+	if (!cq->sq.cmd_buf) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.desc_buf.size,
+				   cq->sq.desc_buf.va, cq->sq.desc_buf.pa);
+		cq->sq.desc_buf.va = NULL;
+		cq->sq.desc_buf.pa = 0;
+		cq->sq.desc_buf.size = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_alloc_ctrlq_rq_ring - Allocate Control Receive Queue (ARQ) rings
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_ctrlq_rq_ring(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	size_t size = cq->num_rq_entries * sizeof(struct ice_aq_desc);
+
+	cq->rq.desc_buf.va = dmam_alloc_coherent(ice_hw_to_dev(hw), size,
+						 &cq->rq.desc_buf.pa,
+						 GFP_KERNEL | __GFP_ZERO);
+	if (!cq->rq.desc_buf.va)
+		return -ENOMEM;
+	cq->rq.desc_buf.size = size;
+	return 0;
+}
+
+/**
+ * ice_free_cq_ring - Free control queue ring
+ * @hw: pointer to the hardware structure
+ * @ring: pointer to the specific control queue ring
+ *
+ * This assumes the posted buffers have already been cleaned
+ * and de-allocated
+ */
+static void ice_free_cq_ring(struct ice_hw *hw, struct ice_ctl_q_ring *ring)
+{
+	dmam_free_coherent(ice_hw_to_dev(hw), ring->desc_buf.size,
+			   ring->desc_buf.va, ring->desc_buf.pa);
+	ring->desc_buf.va = NULL;
+	ring->desc_buf.pa = 0;
+	ring->desc_buf.size = 0;
+}
+
+/**
+ * ice_alloc_rq_bufs - Allocate pre-posted buffers for the ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_rq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* We'll be allocating the buffer info memory first, then we can
+	 * allocate the mapped buffers for the event processing
+	 */
+	cq->rq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_rq_entries,
+				       sizeof(cq->rq.desc_buf), GFP_KERNEL);
+	if (!cq->rq.dma_head)
+		return -ENOMEM;
+	cq->rq.r.rq_bi = (struct ice_dma_mem *)cq->rq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_rq_entries; i++) {
+		struct ice_aq_desc *desc;
+		struct ice_dma_mem *bi;
+
+		bi = &cq->rq.r.rq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->rq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_rq_bufs;
+		bi->size = cq->rq_buf_size;
+
+		/* now configure the descriptors for use */
+		desc = ICE_CTL_Q_DESC(cq->rq, i);
+
+		desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+		desc->opcode = 0;
+		/* This is in accordance with Admin queue design, there is no
+		 * register for buffer size configuration
+		 */
+		desc->datalen = cpu_to_le16(bi->size);
+		desc->retval = 0;
+		desc->cookie_high = 0;
+		desc->cookie_low = 0;
+		desc->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(bi->pa));
+		desc->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(bi->pa));
+		desc->params.generic.param0 = 0;
+		desc->params.generic.param1 = 0;
+	}
+	return 0;
+
+unwind_alloc_rq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->rq.r.rq_bi[i].size,
+				   cq->rq.r.rq_bi[i].va, cq->rq.r.rq_bi[i].pa);
+		cq->rq.r.rq_bi[i].va = NULL;
+		cq->rq.r.rq_bi[i].pa = 0;
+		cq->rq.r.rq_bi[i].size = 0;
+	}
+	cq->rq.r.rq_bi = NULL;
+	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+	cq->rq.dma_head = NULL;
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_alloc_sq_bufs - Allocate empty buffer structs for the ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ */
+static int
+ice_alloc_sq_bufs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int i;
+
+	/* No mapped memory needed yet, just the buffer info structures */
+	cq->sq.dma_head = devm_kcalloc(ice_hw_to_dev(hw), cq->num_sq_entries,
+				       sizeof(cq->sq.desc_buf), GFP_KERNEL);
+	if (!cq->sq.dma_head)
+		return -ENOMEM;
+	cq->sq.r.sq_bi = (struct ice_dma_mem *)cq->sq.dma_head;
+
+	/* allocate the mapped buffers */
+	for (i = 0; i < cq->num_sq_entries; i++) {
+		struct ice_dma_mem *bi;
+
+		bi = &cq->sq.r.sq_bi[i];
+		bi->va = dmam_alloc_coherent(ice_hw_to_dev(hw),
+					     cq->sq_buf_size, &bi->pa,
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!bi->va)
+			goto unwind_alloc_sq_bufs;
+		bi->size = cq->sq_buf_size;
+	}
+	return 0;
+
+unwind_alloc_sq_bufs:
+	/* don't try to free the one that failed... */
+	i--;
+	for (; i >= 0; i--) {
+		dmam_free_coherent(ice_hw_to_dev(hw), cq->sq.r.sq_bi[i].size,
+				   cq->sq.r.sq_bi[i].va, cq->sq.r.sq_bi[i].pa);
+		cq->sq.r.sq_bi[i].va = NULL;
+		cq->sq.r.sq_bi[i].pa = 0;
+		cq->sq.r.sq_bi[i].size = 0;
+	}
+	cq->sq.r.sq_bi = NULL;
+	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+	cq->sq.dma_head = NULL;
+
+	return -ENOMEM;
+}
+
+static int
+ice_cfg_cq_regs(struct ice_hw *hw, struct ice_ctl_q_ring *ring, u16 num_entries)
+{
+	/* Clear Head and Tail */
+	wr32(hw, ring->head, 0);
+	wr32(hw, ring->tail, 0);
+
+	/* set starting point */
+	wr32(hw, ring->len, (num_entries | ring->len_ena_mask));
+	wr32(hw, ring->bal, lower_32_bits(ring->desc_buf.pa));
+	wr32(hw, ring->bah, upper_32_bits(ring->desc_buf.pa));
+
+	/* Check one register to verify that config was applied */
+	if (rd32(hw, ring->bal) != lower_32_bits(ring->desc_buf.pa))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_cfg_sq_regs - configure Control ATQ registers
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the transmit queue
+ */
+static int ice_cfg_sq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	return ice_cfg_cq_regs(hw, &cq->sq, cq->num_sq_entries);
+}
+
+/**
+ * ice_cfg_rq_regs - configure Control ARQ register
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * Configure base address and length registers for the receive (event queue)
+ */
+static int ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int status;
+
+	status = ice_cfg_cq_regs(hw, &cq->rq, cq->num_rq_entries);
+	if (status)
+		return status;
+
+	/* Update tail in the HW to post pre-allocated buffers */
+	wr32(hw, cq->rq.tail, (u32)(cq->num_rq_entries - 1));
+
+	return 0;
+}
+
+#define ICE_FREE_CQ_BUFS(hw, qi, ring)					\
+do {									\
+	/* free descriptors */						\
+	if ((qi)->ring.r.ring##_bi) {					\
+		int i;							\
+									\
+		for (i = 0; i < (qi)->num_##ring##_entries; i++)	\
+			if ((qi)->ring.r.ring##_bi[i].pa) {		\
+				dmam_free_coherent(ice_hw_to_dev(hw),	\
+					(qi)->ring.r.ring##_bi[i].size,	\
+					(qi)->ring.r.ring##_bi[i].va,	\
+					(qi)->ring.r.ring##_bi[i].pa);	\
+					(qi)->ring.r.ring##_bi[i].va = NULL;\
+					(qi)->ring.r.ring##_bi[i].pa = 0;\
+					(qi)->ring.r.ring##_bi[i].size = 0;\
+		}							\
+	}								\
+	/* free the buffer info list */					\
+	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);		\
+	/* free DMA head */						\
+	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
+} while (0)
+
+/**
+ * ice_init_sq - main initialization routine for Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * This is the main initialization routine for the Control Send Queue
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->sq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static int ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int ret_code;
+
+	if (cq->sq.count > 0) {
+		/* queue already initialized */
+		ret_code = -EBUSY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_sq_entries || !cq->sq_buf_size) {
+		ret_code = -EIO;
+		goto init_ctrlq_exit;
+	}
+
+	cq->sq.next_to_use = 0;
+	cq->sq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_sq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_sq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_sq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->sq.count = cq->num_sq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ICE_FREE_CQ_BUFS(hw, cq, sq);
+	ice_free_cq_ring(hw, &cq->sq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_init_rq - initialize ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main initialization routine for the Admin Receive (Event) Queue.
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+static int ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int ret_code;
+
+	if (cq->rq.count > 0) {
+		/* queue already initialized */
+		ret_code = -EBUSY;
+		goto init_ctrlq_exit;
+	}
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->rq_buf_size) {
+		ret_code = -EIO;
+		goto init_ctrlq_exit;
+	}
+
+	cq->rq.next_to_use = 0;
+	cq->rq.next_to_clean = 0;
+
+	/* allocate the ring memory */
+	ret_code = ice_alloc_ctrlq_rq_ring(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_exit;
+
+	/* allocate buffers in the rings */
+	ret_code = ice_alloc_rq_bufs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* initialize base registers */
+	ret_code = ice_cfg_rq_regs(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_rings;
+
+	/* success! */
+	cq->rq.count = cq->num_rq_entries;
+	goto init_ctrlq_exit;
+
+init_ctrlq_free_rings:
+	ICE_FREE_CQ_BUFS(hw, cq, rq);
+	ice_free_cq_ring(hw, &cq->rq);
+
+init_ctrlq_exit:
+	return ret_code;
+}
+
+/**
+ * ice_shutdown_sq - shutdown the Control ATQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Transmit Queue
+ */
+static int ice_shutdown_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int ret_code = 0;
+
+	mutex_lock(&cq->sq_lock);
+
+	if (!cq->sq.count) {
+		ret_code = -EBUSY;
+		goto shutdown_sq_out;
+	}
+
+	/* Stop firmware AdminQ processing */
+	wr32(hw, cq->sq.head, 0);
+	wr32(hw, cq->sq.tail, 0);
+	wr32(hw, cq->sq.len, 0);
+	wr32(hw, cq->sq.bal, 0);
+	wr32(hw, cq->sq.bah, 0);
+
+	cq->sq.count = 0;	/* to indicate uninitialized queue */
+
+	/* free ring buffers and the ring itself */
+	ICE_FREE_CQ_BUFS(hw, cq, sq);
+	ice_free_cq_ring(hw, &cq->sq);
+
+shutdown_sq_out:
+	mutex_unlock(&cq->sq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_aq_ver_check - Check the reported AQ API version.
+ * @hw: pointer to the hardware structure
+ *
+ * Checks if the driver should load on a given AQ API version.
+ *
+ * Return: 'true' iff the driver should attempt to load. 'false' otherwise.
+ */
+static bool ice_aq_ver_check(struct ice_hw *hw)
+{
+	if (hw->api_maj_ver > EXP_FW_API_VER_MAJOR) {
+		/* Major API version is newer than expected, don't load */
+		dev_warn(ice_hw_to_dev(hw),
+			 "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
+		return false;
+	} else if (hw->api_maj_ver == EXP_FW_API_VER_MAJOR) {
+		if (hw->api_min_ver > (EXP_FW_API_VER_MINOR + 2))
+			dev_info(ice_hw_to_dev(hw),
+				 "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
+		else if ((hw->api_min_ver + 2) < EXP_FW_API_VER_MINOR)
+			dev_info(ice_hw_to_dev(hw),
+				 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+	} else {
+		/* Major API version is older than expected, log a warning */
+		dev_info(ice_hw_to_dev(hw),
+			 "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+	}
+	return true;
+}
+
+/**
+ * ice_shutdown_rq - shutdown Control ARQ
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for the Control Receive Queue
+ */
+static int ice_shutdown_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	int ret_code = 0;
+
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ret_code = -EBUSY;
+		goto shutdown_rq_out;
+	}
+
+	/* Stop Control Queue processing */
+	wr32(hw, cq->rq.head, 0);
+	wr32(hw, cq->rq.tail, 0);
+	wr32(hw, cq->rq.len, 0);
+	wr32(hw, cq->rq.bal, 0);
+	wr32(hw, cq->rq.bah, 0);
+
+	/* set rq.count to 0 to indicate uninitialized queue */
+	cq->rq.count = 0;
+
+	/* free ring buffers and the ring itself */
+	ICE_FREE_CQ_BUFS(hw, cq, rq);
+	ice_free_cq_ring(hw, &cq->rq);
+
+shutdown_rq_out:
+	mutex_unlock(&cq->rq_lock);
+	return ret_code;
+}
+
+/**
+ * ice_init_check_adminq - Check version for Admin Queue to know if its alive
+ * @hw: pointer to the hardware structure
+ */
+static int ice_init_check_adminq(struct ice_hw *hw)
+{
+	struct ice_ctl_q_info *cq = &hw->adminq;
+	int status;
+
+	status = ice_aq_get_fw_ver(hw, NULL);
+	if (status)
+		goto init_ctrlq_free_rq;
+
+	if (!ice_aq_ver_check(hw)) {
+		status = -EIO;
+		goto init_ctrlq_free_rq;
+	}
+
+	return 0;
+
+init_ctrlq_free_rq:
+	ice_shutdown_rq(hw, cq);
+	ice_shutdown_sq(hw, cq);
+	return status;
+}
+
+/**
+ * ice_init_ctrlq - main initialization routine for any control Queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ * NOTE: this function does not initialize the controlq locks
+ */
+static int ice_init_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+	int ret_code;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		ice_adminq_init_regs(hw);
+		cq = &hw->adminq;
+		break;
+	case ICE_CTL_Q_SB:
+		ice_sb_init_regs(hw);
+		cq = &hw->sbq;
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		ice_mailbox_init_regs(hw);
+		cq = &hw->mailboxq;
+		break;
+	default:
+		return -EINVAL;
+	}
+	cq->qtype = q_type;
+
+	/* verify input for valid configuration */
+	if (!cq->num_rq_entries || !cq->num_sq_entries ||
+	    !cq->rq_buf_size || !cq->sq_buf_size) {
+		return -EIO;
+	}
+
+	/* allocate the ATQ */
+	ret_code = ice_init_sq(hw, cq);
+	if (ret_code)
+		return ret_code;
+
+	/* allocate the ARQ */
+	ret_code = ice_init_rq(hw, cq);
+	if (ret_code)
+		goto init_ctrlq_free_sq;
+
+	/* success! */
+	return 0;
+
+init_ctrlq_free_sq:
+	ice_shutdown_sq(hw, cq);
+	return ret_code;
+}
+
+/**
+ * ice_is_sbq_supported - is the sideband queue supported
+ * @hw: pointer to the hardware structure
+ *
+ * Returns true if the sideband control queue interface is
+ * supported for the device, false otherwise
+ */
+bool ice_is_sbq_supported(struct ice_hw *hw)
+{
+	/* The device sideband queue is only supported on devices with the
+	 * generic MAC type.
+	 */
+	return hw->mac_type == ICE_MAC_GENERIC;
+}
+
+/**
+ * ice_get_sbq - returns the right control queue to use for sideband
+ * @hw: pointer to the hardware structure
+ */
+struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw)
+{
+	if (ice_is_sbq_supported(hw))
+		return &hw->sbq;
+	return &hw->adminq;
+}
+
+/**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		if (ice_check_sq_alive(hw, cq))
+			ice_aq_q_shutdown(hw, true);
+		break;
+	case ICE_CTL_Q_SB:
+		cq = &hw->sbq;
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		break;
+	default:
+		return;
+	}
+
+	ice_shutdown_sq(hw, cq);
+	ice_shutdown_rq(hw, cq);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+	/* Shutdown FW admin queue */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	/* Shutdown PHY Sideband */
+	if (ice_is_sbq_supported(hw))
+		ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB);
+	/* Shutdown PF-VF Mailbox */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_init_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, the driver MUST* set the following fields
+ * in the cq->structure for all control queues:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ * NOTE: this function does not initialize the controlq locks.
+ */
+int ice_init_all_ctrlq(struct ice_hw *hw)
+{
+	u32 retry = 0;
+	int status;
+
+	/* Init FW admin queue */
+	do {
+		status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		if (status)
+			return status;
+
+		status = ice_init_check_adminq(hw);
+		if (status != -EIO)
+			break;
+
+		ice_debug(hw, ICE_DBG_AQ_MSG, "Retry Admin Queue init due to FW critical error\n");
+		ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
+	} while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
+
+	if (status)
+		return status;
+	/* sideband control queue (SBQ) interface is not supported on some
+	 * devices. Initialize if supported, else fallback to the admin queue
+	 * interface
+	 */
+	if (ice_is_sbq_supported(hw)) {
+		status = ice_init_ctrlq(hw, ICE_CTL_Q_SB);
+		if (status)
+			return status;
+	}
+	/* Init Mailbox queue */
+	return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
+/**
+ * ice_init_ctrlq_locks - Initialize locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Initializes the send and receive queue locks for a given control queue.
+ */
+static void ice_init_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+	mutex_init(&cq->sq_lock);
+	mutex_init(&cq->rq_lock);
+}
+
+/**
+ * ice_create_all_ctrlq - main initialization routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * Prior to calling this function, the driver *MUST* set the following fields
+ * in the cq->structure for all control queues:
+ *     - cq->num_sq_entries
+ *     - cq->num_rq_entries
+ *     - cq->rq_buf_size
+ *     - cq->sq_buf_size
+ *
+ * This function creates all the control queue locks and then calls
+ * ice_init_all_ctrlq. It should be called once during driver load. If the
+ * driver needs to re-initialize control queues at run time it should call
+ * ice_init_all_ctrlq instead.
+ */
+int ice_create_all_ctrlq(struct ice_hw *hw)
+{
+	ice_init_ctrlq_locks(&hw->adminq);
+	if (ice_is_sbq_supported(hw))
+		ice_init_ctrlq_locks(&hw->sbq);
+	ice_init_ctrlq_locks(&hw->mailboxq);
+
+	return ice_init_all_ctrlq(hw);
+}
+
+/**
+ * ice_destroy_ctrlq_locks - Destroy locks for a control queue
+ * @cq: pointer to the control queue
+ *
+ * Destroys the send and receive queue locks for a given control queue.
+ */
+static void ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq)
+{
+	mutex_destroy(&cq->sq_lock);
+	mutex_destroy(&cq->rq_lock);
+}
+
+/**
+ * ice_destroy_all_ctrlq - exit routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * This function shuts down all the control queues and then destroys the
+ * control queue locks. It should be called once during driver unload. The
+ * driver should call ice_shutdown_all_ctrlq if it needs to shut down and
+ * reinitialize control queues, such as in response to a reset event.
+ */
+void ice_destroy_all_ctrlq(struct ice_hw *hw)
+{
+	/* shut down all the control queues first */
+	ice_shutdown_all_ctrlq(hw);
+
+	ice_destroy_ctrlq_locks(&hw->adminq);
+	if (ice_is_sbq_supported(hw))
+		ice_destroy_ctrlq_locks(&hw->sbq);
+	ice_destroy_ctrlq_locks(&hw->mailboxq);
+}
+
+/**
+ * ice_clean_sq - cleans Admin send queue (ATQ)
+ * @hw: pointer to the hardware structure
+ * @cq: pointer to the specific Control queue
+ *
+ * returns the number of free desc
+ */
+static u16 ice_clean_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	struct ice_ctl_q_ring *sq = &cq->sq;
+	u16 ntc = sq->next_to_clean;
+	struct ice_sq_cd *details;
+	struct ice_aq_desc *desc;
+
+	desc = ICE_CTL_Q_DESC(*sq, ntc);
+	details = ICE_CTL_Q_DETAILS(*sq, ntc);
+
+	while (rd32(hw, cq->sq.head) != ntc) {
+		ice_debug(hw, ICE_DBG_AQ_MSG, "ntc %d head %d.\n", ntc, rd32(hw, cq->sq.head));
+		memset(desc, 0, sizeof(*desc));
+		memset(details, 0, sizeof(*details));
+		ntc++;
+		if (ntc == sq->count)
+			ntc = 0;
+		desc = ICE_CTL_Q_DESC(*sq, ntc);
+		details = ICE_CTL_Q_DETAILS(*sq, ntc);
+	}
+
+	sq->next_to_clean = ntc;
+
+	return ICE_CTL_Q_DESC_UNUSED(sq);
+}
+
+/**
+ * ice_debug_cq
+ * @hw: pointer to the hardware structure
+ * @desc: pointer to control queue descriptor
+ * @buf: pointer to command buffer
+ * @buf_len: max length of buf
+ *
+ * Dumps debug log about control command with descriptor contents.
+ */
+static void ice_debug_cq(struct ice_hw *hw, void *desc, void *buf, u16 buf_len)
+{
+	struct ice_aq_desc *cq_desc = desc;
+	u16 len;
+
+	if (!IS_ENABLED(CONFIG_DYNAMIC_DEBUG) &&
+	    !((ICE_DBG_AQ_DESC | ICE_DBG_AQ_DESC_BUF) & hw->debug_mask))
+		return;
+
+	if (!desc)
+		return;
+
+	len = le16_to_cpu(cq_desc->datalen);
+
+	ice_debug(hw, ICE_DBG_AQ_DESC, "CQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+		  le16_to_cpu(cq_desc->opcode),
+		  le16_to_cpu(cq_desc->flags),
+		  le16_to_cpu(cq_desc->datalen), le16_to_cpu(cq_desc->retval));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tcookie (h,l) 0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->cookie_high),
+		  le32_to_cpu(cq_desc->cookie_low));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\tparam (0,1)  0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.param0),
+		  le32_to_cpu(cq_desc->params.generic.param1));
+	ice_debug(hw, ICE_DBG_AQ_DESC, "\taddr (h,l)   0x%08X 0x%08X\n",
+		  le32_to_cpu(cq_desc->params.generic.addr_high),
+		  le32_to_cpu(cq_desc->params.generic.addr_low));
+	if (buf && cq_desc->datalen != 0) {
+		ice_debug(hw, ICE_DBG_AQ_DESC_BUF, "Buffer:\n");
+		if (buf_len < len)
+			len = buf_len;
+
+		ice_debug_array(hw, ICE_DBG_AQ_DESC_BUF, 16, 1, buf, len);
+	}
+}
+
+/**
+ * ice_sq_done - check if FW has processed the Admin Send Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Returns true if the firmware has processed all descriptors on the
+ * admin send queue. Returns false if there are still requests pending.
+ */
+static bool ice_sq_done(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	/* AQ designers suggest use of head for better
+	 * timing reliability than DD bit
+	 */
+	return rd32(hw, cq->sq.head) == cq->sq.next_to_use;
+}
+
+/**
+ * ice_sq_send_cmd - send command to Control Queue (ATQ)
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @desc: prefilled descriptor describing the command
+ * @buf: buffer to use for indirect commands (or NULL for direct commands)
+ * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
+ * @cd: pointer to command details structure
+ *
+ * This is the main send command routine for the ATQ. It runs the queue,
+ * cleans the queue, etc.
+ */
+int
+ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		struct ice_aq_desc *desc, void *buf, u16 buf_size,
+		struct ice_sq_cd *cd)
+{
+	struct ice_dma_mem *dma_buf = NULL;
+	struct ice_aq_desc *desc_on_ring;
+	bool cmd_completed = false;
+	struct ice_sq_cd *details;
+	unsigned long timeout;
+	int status = 0;
+	u16 retval = 0;
+	u32 val = 0;
+
+	/* if reset is in progress return a soft error */
+	if (hw->reset_ongoing)
+		return -EBUSY;
+	mutex_lock(&cq->sq_lock);
+
+	cq->sq_last_status = ICE_AQ_RC_OK;
+
+	if (!cq->sq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send queue not initialized.\n");
+		status = -EIO;
+		goto sq_send_command_error;
+	}
+
+	if ((buf && !buf_size) || (!buf && buf_size)) {
+		status = -EINVAL;
+		goto sq_send_command_error;
+	}
+
+	if (buf) {
+		if (buf_size > cq->sq_buf_size) {
+			ice_debug(hw, ICE_DBG_AQ_MSG, "Invalid buffer size for Control Send queue: %d.\n",
+				  buf_size);
+			status = -EINVAL;
+			goto sq_send_command_error;
+		}
+
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+		if (buf_size > ICE_AQ_LG_BUF)
+			desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	}
+
+	val = rd32(hw, cq->sq.head);
+	if (val >= cq->num_sq_entries) {
+		ice_debug(hw, ICE_DBG_AQ_MSG, "head overrun at %d in the Control Send Queue ring\n",
+			  val);
+		status = -EIO;
+		goto sq_send_command_error;
+	}
+
+	details = ICE_CTL_Q_DETAILS(cq->sq, cq->sq.next_to_use);
+	if (cd)
+		*details = *cd;
+	else
+		memset(details, 0, sizeof(*details));
+
+	/* Call clean and check queue available function to reclaim the
+	 * descriptors that were processed by FW/MBX; the function returns the
+	 * number of desc available. The clean function called here could be
+	 * called in a separate thread in case of asynchronous completions.
+	 */
+	if (ice_clean_sq(hw, cq) == 0) {
+		ice_debug(hw, ICE_DBG_AQ_MSG, "Error: Control Send Queue is full.\n");
+		status = -ENOSPC;
+		goto sq_send_command_error;
+	}
+
+	/* initialize the temp desc pointer with the right desc */
+	desc_on_ring = ICE_CTL_Q_DESC(cq->sq, cq->sq.next_to_use);
+
+	/* if the desc is available copy the temp desc to the right place */
+	memcpy(desc_on_ring, desc, sizeof(*desc_on_ring));
+
+	/* if buf is not NULL assume indirect command */
+	if (buf) {
+		dma_buf = &cq->sq.r.sq_bi[cq->sq.next_to_use];
+		/* copy the user buf into the respective DMA buf */
+		memcpy(dma_buf->va, buf, buf_size);
+		desc_on_ring->datalen = cpu_to_le16(buf_size);
+
+		/* Update the address values in the desc with the pa value
+		 * for respective buffer
+		 */
+		desc_on_ring->params.generic.addr_high =
+			cpu_to_le32(upper_32_bits(dma_buf->pa));
+		desc_on_ring->params.generic.addr_low =
+			cpu_to_le32(lower_32_bits(dma_buf->pa));
+	}
+
+	/* Debug desc and buffer */
+	ice_debug(hw, ICE_DBG_AQ_DESC, "ATQ: Control Send queue desc and buffer:\n");
+
+	ice_debug_cq(hw, (void *)desc_on_ring, buf, buf_size);
+
+	(cq->sq.next_to_use)++;
+	if (cq->sq.next_to_use == cq->sq.count)
+		cq->sq.next_to_use = 0;
+	wr32(hw, cq->sq.tail, cq->sq.next_to_use);
+	ice_flush(hw);
+
+	/* Wait a short time before initial ice_sq_done() check, to allow
+	 * hardware time for completion.
+	 */
+	udelay(5);
+
+	timeout = jiffies + ICE_CTL_Q_SQ_CMD_TIMEOUT;
+	do {
+		if (ice_sq_done(hw, cq))
+			break;
+
+		usleep_range(100, 150);
+	} while (time_before(jiffies, timeout));
+
+	/* if ready, copy the desc back to temp */
+	if (ice_sq_done(hw, cq)) {
+		memcpy(desc, desc_on_ring, sizeof(*desc));
+		if (buf) {
+			/* get returned length to copy */
+			u16 copy_size = le16_to_cpu(desc->datalen);
+
+			if (copy_size > buf_size) {
+				ice_debug(hw, ICE_DBG_AQ_MSG, "Return len %d > than buf len %d\n",
+					  copy_size, buf_size);
+				status = -EIO;
+			} else {
+				memcpy(buf, dma_buf->va, copy_size);
+			}
+		}
+		retval = le16_to_cpu(desc->retval);
+		if (retval) {
+			ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send Queue command 0x%04X completed with error 0x%X\n",
+				  le16_to_cpu(desc->opcode),
+				  retval);
+
+			/* strip off FW internal code */
+			retval &= 0xff;
+		}
+		cmd_completed = true;
+		if (!status && retval != ICE_AQ_RC_OK)
+			status = -EIO;
+		cq->sq_last_status = (enum ice_aq_err)retval;
+	}
+
+	ice_debug(hw, ICE_DBG_AQ_MSG, "ATQ: desc and buffer writeback:\n");
+
+	ice_debug_cq(hw, (void *)desc, buf, buf_size);
+
+	/* save writeback AQ if requested */
+	if (details->wb_desc)
+		memcpy(details->wb_desc, desc_on_ring,
+		       sizeof(*details->wb_desc));
+
+	/* update the error if time out occurred */
+	if (!cmd_completed) {
+		if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
+		    rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
+			ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n");
+			status = -EIO;
+		} else {
+			ice_debug(hw, ICE_DBG_AQ_MSG, "Control Send Queue Writeback timeout.\n");
+			status = -EIO;
+		}
+	}
+
+sq_send_command_error:
+	mutex_unlock(&cq->sq_lock);
+	return status;
+}
+
+/**
+ * ice_fill_dflt_direct_cmd_desc - AQ descriptor helper function
+ * @desc: pointer to the temp descriptor (non DMA mem)
+ * @opcode: the opcode can be used to decide which flags to turn off or on
+ *
+ * Fill the desc with default values
+ */
+void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode)
+{
+	/* zero out the desc */
+	memset(desc, 0, sizeof(*desc));
+	desc->opcode = cpu_to_le16(opcode);
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_SI);
+}
+
+/**
+ * ice_clean_rq_elem
+ * @hw: pointer to the HW struct
+ * @cq: pointer to the specific Control queue
+ * @e: event info from the receive descriptor, includes any buffers
+ * @pending: number of events that could be left to process
+ *
+ * This function cleans one Admin Receive Queue element and returns
+ * the contents through e. It can also return how many events are
+ * left to process through 'pending'.
+ */
+int
+ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
+		  struct ice_rq_event_info *e, u16 *pending)
+{
+	u16 ntc = cq->rq.next_to_clean;
+	enum ice_aq_err rq_last_status;
+	struct ice_aq_desc *desc;
+	struct ice_dma_mem *bi;
+	int ret_code = 0;
+	u16 desc_idx;
+	u16 datalen;
+	u16 flags;
+	u16 ntu;
+
+	/* pre-clean the event info */
+	memset(&e->desc, 0, sizeof(e->desc));
+
+	/* take the lock before we start messing with the ring */
+	mutex_lock(&cq->rq_lock);
+
+	if (!cq->rq.count) {
+		ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive queue not initialized.\n");
+		ret_code = -EIO;
+		goto clean_rq_elem_err;
+	}
+
+	/* set next_to_use to head */
+	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+
+	if (ntu == ntc) {
+		/* nothing to do - shouldn't need to update ring's values */
+		ret_code = -EALREADY;
+		goto clean_rq_elem_out;
+	}
+
+	/* now clean the next descriptor */
+	desc = ICE_CTL_Q_DESC(cq->rq, ntc);
+	desc_idx = ntc;
+
+	rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval);
+	flags = le16_to_cpu(desc->flags);
+	if (flags & ICE_AQ_FLAG_ERR) {
+		ret_code = -EIO;
+		ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event 0x%04X received with error 0x%X\n",
+			  le16_to_cpu(desc->opcode), rq_last_status);
+	}
+	memcpy(&e->desc, desc, sizeof(e->desc));
+	datalen = le16_to_cpu(desc->datalen);
+	e->msg_len = min_t(u16, datalen, e->buf_len);
+	if (e->msg_buf && e->msg_len)
+		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
+
+	ice_debug(hw, ICE_DBG_AQ_DESC, "ARQ: desc and buffer:\n");
+
+	ice_debug_cq(hw, (void *)desc, e->msg_buf, cq->rq_buf_size);
+
+	/* Restore the original datalen and buffer address in the desc,
+	 * FW updates datalen to indicate the event message size
+	 */
+	bi = &cq->rq.r.rq_bi[ntc];
+	memset(desc, 0, sizeof(*desc));
+
+	desc->flags = cpu_to_le16(ICE_AQ_FLAG_BUF);
+	if (cq->rq_buf_size > ICE_AQ_LG_BUF)
+		desc->flags |= cpu_to_le16(ICE_AQ_FLAG_LB);
+	desc->datalen = cpu_to_le16(bi->size);
+	desc->params.generic.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+	desc->params.generic.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+	/* set tail = the last cleaned desc index. */
+	wr32(hw, cq->rq.tail, ntc);
+	/* ntc is updated to tail + 1 */
+	ntc++;
+	if (ntc == cq->num_rq_entries)
+		ntc = 0;
+	cq->rq.next_to_clean = ntc;
+	cq->rq.next_to_use = ntu;
+
+clean_rq_elem_out:
+	/* Set pending if needed, unlock and return */
+	if (pending) {
+		/* re-read HW head to calculate actual pending messages */
+		ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+		*pending = (u16)((ntc > ntu ? cq->rq.count : 0) + (ntu - ntc));
+	}
+clean_rq_elem_err:
+	mutex_unlock(&cq->rq_lock);
+
+	return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
new file mode 100644
index 0000000000..8f2fd1613a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_CONTROLQ_H_
+#define _ICE_CONTROLQ_H_
+
+#include "ice_adminq_cmd.h"
+
+/* Maximum buffer lengths for all control queue types */
+#define ICE_AQ_MAX_BUF_LEN 4096
+#define ICE_MBXQ_MAX_BUF_LEN 4096
+#define ICE_SBQ_MAX_BUF_LEN 512
+
+#define ICE_CTL_Q_DESC(R, i) \
+	(&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
+
+#define ICE_CTL_Q_DESC_UNUSED(R) \
+	((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	       (R)->next_to_clean - (R)->next_to_use - 1))
+
+/* Defines that help manage the driver vs FW API checks.
+ * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
+ */
+#define EXP_FW_API_VER_BRANCH		0x00
+#define EXP_FW_API_VER_MAJOR		0x01
+#define EXP_FW_API_VER_MINOR		0x05
+
+/* Different control queue types: These are mainly for SW consumption. */
+enum ice_ctl_q {
+	ICE_CTL_Q_UNKNOWN = 0,
+	ICE_CTL_Q_ADMIN,
+	ICE_CTL_Q_MAILBOX,
+	ICE_CTL_Q_SB,
+};
+
+/* Control Queue timeout settings - max delay 1s */
+#define ICE_CTL_Q_SQ_CMD_TIMEOUT	HZ    /* Wait max 1s */
+#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT	10    /* Count 10 times */
+#define ICE_CTL_Q_ADMIN_INIT_MSEC	100   /* Check every 100msec */
+
+struct ice_ctl_q_ring {
+	void *dma_head;			/* Virtual address to DMA head */
+	struct ice_dma_mem desc_buf;	/* descriptor ring memory */
+	void *cmd_buf;			/* command buffer memory */
+
+	union {
+		struct ice_dma_mem *sq_bi;
+		struct ice_dma_mem *rq_bi;
+	} r;
+
+	u16 count;		/* Number of descriptors */
+
+	/* used for interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	/* used for queue tracking */
+	u32 head;
+	u32 tail;
+	u32 len;
+	u32 bah;
+	u32 bal;
+	u32 len_mask;
+	u32 len_ena_mask;
+	u32 len_crit_mask;
+	u32 head_mask;
+};
+
+/* sq transaction details */
+struct ice_sq_cd {
+	struct ice_aq_desc *wb_desc;
+};
+
+#define ICE_CTL_Q_DETAILS(R, i) (&(((struct ice_sq_cd *)((R).cmd_buf))[i]))
+
+/* rq event information */
+struct ice_rq_event_info {
+	struct ice_aq_desc desc;
+	u16 msg_len;
+	u16 buf_len;
+	u8 *msg_buf;
+};
+
+/* Control Queue information */
+struct ice_ctl_q_info {
+	enum ice_ctl_q qtype;
+	struct ice_ctl_q_ring rq;	/* receive queue */
+	struct ice_ctl_q_ring sq;	/* send queue */
+	u16 num_rq_entries;		/* receive queue depth */
+	u16 num_sq_entries;		/* send queue depth */
+	u16 rq_buf_size;		/* receive queue buffer size */
+	u16 sq_buf_size;		/* send queue buffer size */
+	enum ice_aq_err sq_last_status;	/* last status on send queue */
+	struct mutex sq_lock;		/* Send queue lock */
+	struct mutex rq_lock;		/* Receive queue lock */
+};
+
+#endif /* _ICE_CONTROLQ_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
new file mode 100644
index 0000000000..396e555023
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -0,0 +1,1656 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_sched.h"
+#include "ice_dcb.h"
+
+/**
+ * ice_aq_get_lldp_mib
+ * @hw: pointer to the HW struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buf: pointer to the caller-supplied buffer to store the MIB block
+ * @buf_size: size of the buffer (in bytes)
+ * @local_len: length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cd: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet). (0x0A00)
+ */
+static int
+ice_aq_get_lldp_mib(struct ice_hw *hw, u8 bridge_type, u8 mib_type, void *buf,
+		    u16 buf_size, u16 *local_len, u16 *remote_len,
+		    struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_get_mib *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.lldp_get_mib;
+
+	if (buf_size == 0 || !buf)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_get_mib);
+
+	cmd->type = mib_type & ICE_AQ_LLDP_MIB_TYPE_M;
+	cmd->type |= (bridge_type << ICE_AQ_LLDP_BRID_TYPE_S) &
+		ICE_AQ_LLDP_BRID_TYPE_M;
+
+	desc.datalen = cpu_to_le16(buf_size);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status) {
+		if (local_len)
+			*local_len = le16_to_cpu(cmd->local_len);
+		if (remote_len)
+			*remote_len = le16_to_cpu(cmd->remote_len);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_update: Enable or Disable event posting
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes (0x0A01)
+ */
+static int
+ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
+			   struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_set_mib_change *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_set_event;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_set_mib_change);
+
+	if (!ena_update)
+		cmd->command |= ICE_AQ_LLDP_MIB_UPDATE_DIS;
+	else
+		cmd->command |= FIELD_PREP(ICE_AQ_LLDP_MIB_PENDING_M,
+					   ICE_AQ_LLDP_MIB_PENDING_ENABLE);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_stop_lldp
+ * @hw: pointer to the HW struct
+ * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
+ *			 False if LLDP Agent needs to be Stopped
+ * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across
+ *	     reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent (0x0A05)
+ */
+int
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+		 struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_stop *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_stop;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_stop);
+
+	if (shutdown_lldp_agent)
+		cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
+
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_start_lldp
+ * @hw: pointer to the HW struct
+ * @persist: True if Start of LLDP Agent needs to be persistent across reboots
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports. (0x0A06)
+ */
+int ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_start *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.lldp_start;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_start);
+
+	cmd->command = ICE_AQ_LLDP_AGENT_START;
+
+	if (persist)
+		cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_get_dcbx_status
+ * @hw: pointer to the HW struct
+ *
+ * Get the DCBX status from the Firmware
+ */
+static u8 ice_get_dcbx_status(struct ice_hw *hw)
+{
+	u32 reg;
+
+	reg = rd32(hw, PRTDCB_GENS);
+	return (u8)((reg & PRTDCB_GENS_DCBX_STATUS_M) >>
+		    PRTDCB_GENS_DCBX_STATUS_S);
+}
+
+/**
+ * ice_parse_ieee_ets_common_tlv
+ * @buf: Data buffer to be parsed for ETS CFG/REC data
+ * @ets_cfg: Container to store parsed data
+ *
+ * Parses the common data of IEEE 802.1Qaz ETS CFG/REC TLV
+ */
+static void
+ice_parse_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+	u8 offset = 0;
+	int i;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		ets_cfg->prio_table[i * 2] =
+			((buf[offset] & ICE_IEEE_ETS_PRIO_1_M) >>
+			 ICE_IEEE_ETS_PRIO_1_S);
+		ets_cfg->prio_table[i * 2 + 1] =
+			((buf[offset] & ICE_IEEE_ETS_PRIO_0_M) >>
+			 ICE_IEEE_ETS_PRIO_0_S);
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 *
+	 * TSA Assignment Table (8 octets)
+	 * Octets:| 9 | 10| 11| 12| 13| 14| 15| 16|
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		ets_cfg->tcbwtable[i] = buf[offset];
+		ets_cfg->tsatable[i] = buf[ICE_MAX_TRAFFIC_CLASS + offset++];
+	}
+}
+
+/**
+ * ice_parse_ieee_etscfg_tlv
+ * @tlv: IEEE 802.1Qaz ETS CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_parse_ieee_etscfg_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	etscfg->willing = ((buf[0] & ICE_IEEE_ETS_WILLING_M) >>
+			   ICE_IEEE_ETS_WILLING_S);
+	etscfg->cbs = ((buf[0] & ICE_IEEE_ETS_CBS_M) >> ICE_IEEE_ETS_CBS_S);
+	etscfg->maxtcs = ((buf[0] & ICE_IEEE_ETS_MAXTC_M) >>
+			  ICE_IEEE_ETS_MAXTC_S);
+
+	/* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+	ice_parse_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_parse_ieee_etsrec_tlv
+ * @tlv: IEEE 802.1Qaz ETS REC TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Parses IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_parse_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	/* Begin parsing at Priority Assignment Table (offset 1 in buf) */
+	ice_parse_ieee_ets_common_tlv(&buf[1], &dcbcfg->etsrec);
+}
+
+/**
+ * ice_parse_ieee_pfccfg_tlv
+ * @tlv: IEEE 802.1Qaz PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_parse_ieee_pfccfg_tlv(struct ice_lldp_org_tlv *tlv,
+			  struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	dcbcfg->pfc.willing = ((buf[0] & ICE_IEEE_PFC_WILLING_M) >>
+			       ICE_IEEE_PFC_WILLING_S);
+	dcbcfg->pfc.mbc = ((buf[0] & ICE_IEEE_PFC_MBC_M) >> ICE_IEEE_PFC_MBC_S);
+	dcbcfg->pfc.pfccap = ((buf[0] & ICE_IEEE_PFC_CAP_M) >>
+			      ICE_IEEE_PFC_CAP_S);
+	dcbcfg->pfc.pfcena = buf[1];
+}
+
+/**
+ * ice_parse_ieee_app_tlv
+ * @tlv: IEEE 802.1Qaz APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses IEEE 802.1Qaz APP PRIO TLV
+ */
+static void
+ice_parse_ieee_app_tlv(struct ice_lldp_org_tlv *tlv,
+		       struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 offset = 0;
+	u16 typelen;
+	int i = 0;
+	u16 len;
+	u8 *buf;
+
+	typelen = ntohs(tlv->typelen);
+	len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+	buf = tlv->tlvinfo;
+
+	/* Removing sizeof(ouisubtype) and reserved byte from len.
+	 * Remaining len div 3 is number of APP TLVs.
+	 */
+	len -= (sizeof(tlv->ouisubtype) + 1);
+
+	/* Move offset to App Priority Table */
+	offset++;
+
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (offset < len) {
+		dcbcfg->app[i].priority = ((buf[offset] &
+					    ICE_IEEE_APP_PRIO_M) >>
+					   ICE_IEEE_APP_PRIO_S);
+		dcbcfg->app[i].selector = ((buf[offset] &
+					    ICE_IEEE_APP_SEL_M) >>
+					   ICE_IEEE_APP_SEL_S);
+		dcbcfg->app[i].prot_id = (buf[offset + 1] << 0x8) |
+			buf[offset + 2];
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= ICE_DCBX_MAX_APPS)
+			break;
+	}
+
+	dcbcfg->numapps = i;
+}
+
+/**
+ * ice_parse_ieee_tlv
+ * @tlv: IEEE 802.1Qaz TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_ieee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u32 ouisubtype;
+	u8 subtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+		       ICE_LLDP_TLV_SUBTYPE_S);
+	switch (subtype) {
+	case ICE_IEEE_SUBTYPE_ETS_CFG:
+		ice_parse_ieee_etscfg_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_ETS_REC:
+		ice_parse_ieee_etsrec_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_PFC_CFG:
+		ice_parse_ieee_pfccfg_tlv(tlv, dcbcfg);
+		break;
+	case ICE_IEEE_SUBTYPE_APP_PRI:
+		ice_parse_ieee_app_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_parse_cee_pgcfg_tlv
+ * @tlv: CEE DCBX PG CFG TLV
+ * @dcbcfg: Local store to update ETS CFG data
+ *
+ * Parses CEE DCBX PG CFG TLV
+ */
+static void
+ice_parse_cee_pgcfg_tlv(struct ice_cee_feat_tlv *tlv,
+			struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u16 offset = 0;
+	int i;
+
+	etscfg = &dcbcfg->etscfg;
+
+	if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+		etscfg->willing = 1;
+
+	etscfg->cbs = 0;
+	/* Priority Group Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < 4; i++) {
+		etscfg->prio_table[i * 2] =
+			((buf[offset] & ICE_CEE_PGID_PRIO_1_M) >>
+			 ICE_CEE_PGID_PRIO_1_S);
+		etscfg->prio_table[i * 2 + 1] =
+			((buf[offset] & ICE_CEE_PGID_PRIO_0_M) >>
+			 ICE_CEE_PGID_PRIO_0_S);
+		offset++;
+	}
+
+	/* PG Percentage Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |pg0|pg1|pg2|pg3|pg4|pg5|pg6|pg7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		etscfg->tcbwtable[i] = buf[offset++];
+
+		if (etscfg->prio_table[i] == ICE_CEE_PGID_STRICT)
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+		else
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+	}
+
+	/* Number of TCs supported (1 octet) */
+	etscfg->maxtcs = buf[offset];
+}
+
+/**
+ * ice_parse_cee_pfccfg_tlv
+ * @tlv: CEE DCBX PFC CFG TLV
+ * @dcbcfg: Local store to update PFC CFG data
+ *
+ * Parses CEE DCBX PFC CFG TLV
+ */
+static void
+ice_parse_cee_pfccfg_tlv(struct ice_cee_feat_tlv *tlv,
+			 struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+
+	if (tlv->en_will_err & ICE_CEE_FEAT_TLV_WILLING_M)
+		dcbcfg->pfc.willing = 1;
+
+	/* ------------------------
+	 * | PFC Enable | PFC TCs |
+	 * ------------------------
+	 * | 1 octet    | 1 octet |
+	 */
+	dcbcfg->pfc.pfcena = buf[0];
+	dcbcfg->pfc.pfccap = buf[1];
+}
+
+/**
+ * ice_parse_cee_app_tlv
+ * @tlv: CEE DCBX APP TLV
+ * @dcbcfg: Local store to update APP PRIO data
+ *
+ * Parses CEE DCBX APP PRIO TLV
+ */
+static void
+ice_parse_cee_app_tlv(struct ice_cee_feat_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 len, typelen, offset = 0;
+	struct ice_cee_app_prio *app;
+	u8 i;
+
+	typelen = ntohs(tlv->hdr.typelen);
+	len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+
+	dcbcfg->numapps = len / sizeof(*app);
+	if (!dcbcfg->numapps)
+		return;
+	if (dcbcfg->numapps > ICE_DCBX_MAX_APPS)
+		dcbcfg->numapps = ICE_DCBX_MAX_APPS;
+
+	for (i = 0; i < dcbcfg->numapps; i++) {
+		u8 up, selector;
+
+		app = (struct ice_cee_app_prio *)(tlv->tlvinfo + offset);
+		for (up = 0; up < ICE_MAX_USER_PRIORITY; up++)
+			if (app->prio_map & BIT(up))
+				break;
+
+		dcbcfg->app[i].priority = up;
+
+		/* Get Selector from lower 2 bits, and convert to IEEE */
+		selector = (app->upper_oui_sel & ICE_CEE_APP_SELECTOR_M);
+		switch (selector) {
+		case ICE_CEE_APP_SEL_ETHTYPE:
+			dcbcfg->app[i].selector = ICE_APP_SEL_ETHTYPE;
+			break;
+		case ICE_CEE_APP_SEL_TCPIP:
+			dcbcfg->app[i].selector = ICE_APP_SEL_TCPIP;
+			break;
+		default:
+			/* Keep selector as it is for unknown types */
+			dcbcfg->app[i].selector = selector;
+		}
+
+		dcbcfg->app[i].prot_id = ntohs(app->protocol);
+		/* Move to next app */
+		offset += sizeof(*app);
+	}
+}
+
+/**
+ * ice_parse_cee_tlv
+ * @tlv: CEE DCBX TLV
+ * @dcbcfg: Local store to update DCBX config data
+ *
+ * Get the TLV subtype and send it to parsing function
+ * based on the subtype value
+ */
+static void
+ice_parse_cee_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_cee_feat_tlv *sub_tlv;
+	u8 subtype, feat_tlv_count = 0;
+	u16 len, tlvlen, typelen;
+	u32 ouisubtype;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	subtype = (u8)((ouisubtype & ICE_LLDP_TLV_SUBTYPE_M) >>
+		       ICE_LLDP_TLV_SUBTYPE_S);
+	/* Return if not CEE DCBX */
+	if (subtype != ICE_CEE_DCBX_TYPE)
+		return;
+
+	typelen = ntohs(tlv->typelen);
+	tlvlen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+	len = sizeof(tlv->typelen) + sizeof(ouisubtype) +
+		sizeof(struct ice_cee_ctrl_tlv);
+	/* Return if no CEE DCBX Feature TLVs */
+	if (tlvlen <= len)
+		return;
+
+	sub_tlv = (struct ice_cee_feat_tlv *)((char *)tlv + len);
+	while (feat_tlv_count < ICE_CEE_MAX_FEAT_TYPE) {
+		u16 sublen;
+
+		typelen = ntohs(sub_tlv->hdr.typelen);
+		sublen = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+		subtype = (u8)((typelen & ICE_LLDP_TLV_TYPE_M) >>
+			       ICE_LLDP_TLV_TYPE_S);
+		switch (subtype) {
+		case ICE_CEE_SUBTYPE_PG_CFG:
+			ice_parse_cee_pgcfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case ICE_CEE_SUBTYPE_PFC_CFG:
+			ice_parse_cee_pfccfg_tlv(sub_tlv, dcbcfg);
+			break;
+		case ICE_CEE_SUBTYPE_APP_PRI:
+			ice_parse_cee_app_tlv(sub_tlv, dcbcfg);
+			break;
+		default:
+			return;	/* Invalid Sub-type return */
+		}
+		feat_tlv_count++;
+		/* Move to next sub TLV */
+		sub_tlv = (struct ice_cee_feat_tlv *)
+			  ((char *)sub_tlv + sizeof(sub_tlv->hdr.typelen) +
+			   sublen);
+	}
+}
+
+/**
+ * ice_parse_org_tlv
+ * @tlv: Organization specific TLV
+ * @dcbcfg: Local store to update ETS REC data
+ *
+ * Currently IEEE 802.1Qaz and CEE DCBX TLV are supported, others
+ * will be returned
+ */
+static void
+ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u32 ouisubtype;
+	u32 oui;
+
+	ouisubtype = ntohl(tlv->ouisubtype);
+	oui = ((ouisubtype & ICE_LLDP_TLV_OUI_M) >> ICE_LLDP_TLV_OUI_S);
+	switch (oui) {
+	case ICE_IEEE_8021QAZ_OUI:
+		ice_parse_ieee_tlv(tlv, dcbcfg);
+		break;
+	case ICE_CEE_DCBX_OUI:
+		ice_parse_cee_tlv(tlv, dcbcfg);
+		break;
+	default:
+		break; /* Other OUIs not supported */
+	}
+}
+
+/**
+ * ice_lldp_to_dcb_cfg
+ * @lldpmib: LLDPDU to be parsed
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Parse DCB configuration from the LLDPDU
+ */
+static int ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_lldp_org_tlv *tlv;
+	u16 offset = 0;
+	int ret = 0;
+	u16 typelen;
+	u16 type;
+	u16 len;
+
+	if (!lldpmib || !dcbcfg)
+		return -EINVAL;
+
+	/* set to the start of LLDPDU */
+	lldpmib += ETH_HLEN;
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	while (1) {
+		typelen = ntohs(tlv->typelen);
+		type = ((typelen & ICE_LLDP_TLV_TYPE_M) >> ICE_LLDP_TLV_TYPE_S);
+		len = ((typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S);
+		offset += sizeof(typelen) + len;
+
+		/* END TLV or beyond LLDPDU size */
+		if (type == ICE_TLV_TYPE_END || offset > ICE_LLDPDU_SIZE)
+			break;
+
+		switch (type) {
+		case ICE_TLV_TYPE_ORG:
+			ice_parse_org_tlv(tlv, dcbcfg);
+			break;
+		default:
+			break;
+		}
+
+		/* Move to next TLV */
+		tlv = (struct ice_lldp_org_tlv *)
+		      ((char *)tlv + sizeof(tlv->typelen) + len);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_aq_get_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @mib_type: MIB type for the query
+ * @bridgetype: bridge type for the query (remote)
+ * @dcbcfg: store for LLDPDU data
+ *
+ * Query DCB configuration from the firmware
+ */
+int
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *lldpmib;
+	int ret;
+
+	/* Allocate the LLDPDU */
+	lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib)
+		return -ENOMEM;
+
+	ret = ice_aq_get_lldp_mib(hw, bridgetype, mib_type, (void *)lldpmib,
+				  ICE_LLDPDU_SIZE, NULL, NULL, NULL);
+
+	if (!ret)
+		/* Parse LLDP MIB to get DCB configuration */
+		ret = ice_lldp_to_dcb_cfg(lldpmib, dcbcfg);
+
+	devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+	return ret;
+}
+
+/**
+ * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW
+ * @hw: pointer to the HW struct
+ * @start_dcbx_agent: True if DCBX Agent needs to be started
+ *		      False if DCBX Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBX agent status
+ *		       True if DCBX Agent is active
+ *		       False if DCBX Agent is stopped
+ * @cd: pointer to command details structure or NULL
+ *
+ * Start/Stop the embedded dcbx Agent. In case that this wrapper function
+ * returns 0, caller will need to check if FW returns back the same
+ * value as stated in dcbx_agent_status, and react accordingly. (0x0A09)
+ */
+int
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+		       bool *dcbx_agent_status, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_lldp_stop_start_specific_agent *cmd;
+	struct ice_aq_desc desc;
+	u16 opcode;
+	int status;
+
+	cmd = &desc.params.lldp_agent_ctrl;
+
+	opcode = ice_aqc_opc_lldp_stop_start_specific_agent;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+
+	if (start_dcbx_agent)
+		cmd->command = ICE_AQC_START_STOP_AGENT_START_DCBX;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+
+	*dcbx_agent_status = false;
+
+	if (!status &&
+	    cmd->command == ICE_AQC_START_STOP_AGENT_START_DCBX)
+		*dcbx_agent_status = true;
+
+	return status;
+}
+
+/**
+ * ice_aq_get_cee_dcb_cfg
+ * @hw: pointer to the HW struct
+ * @buff: response buffer that stores CEE operational configuration
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get CEE DCBX mode operational configuration from firmware (0x0A07)
+ */
+static int
+ice_aq_get_cee_dcb_cfg(struct ice_hw *hw,
+		       struct ice_aqc_get_cee_dcb_cfg_resp *buff,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_cee_dcb_cfg);
+
+	return ice_aq_send_cmd(hw, &desc, (void *)buff, sizeof(*buff), cd);
+}
+
+/**
+ * ice_aq_set_pfc_mode - Set PFC mode
+ * @hw: pointer to the HW struct
+ * @pfc_mode: value of PFC mode to set
+ * @cd: pointer to command details structure or NULL
+ *
+ * This AQ call configures the PFC mode to DSCP-based PFC mode or
+ * VLAN-based PFC (0x0303)
+ */
+int ice_aq_set_pfc_mode(struct ice_hw *hw, u8 pfc_mode, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_query_pfc_mode *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	if (pfc_mode > ICE_AQC_PFC_DSCP_BASED_PFC)
+		return -EINVAL;
+
+	cmd = &desc.params.set_query_pfc_mode;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_pfc_mode);
+
+	cmd->pfc_mode = pfc_mode;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (status)
+		return status;
+
+	/* FW will write the PFC mode set back into cmd->pfc_mode, but if DCB is
+	 * disabled, FW will write back 0 to cmd->pfc_mode. After the AQ has
+	 * been executed, check if cmd->pfc_mode is what was requested. If not,
+	 * return an error.
+	 */
+	if (cmd->pfc_mode != pfc_mode)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_cee_to_dcb_cfg
+ * @cee_cfg: pointer to CEE configuration struct
+ * @pi: port information structure
+ *
+ * Convert CEE configuration from firmware to DCB configuration
+ */
+static void
+ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
+		   struct ice_port_info *pi)
+{
+	u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status);
+	u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift, j;
+	u8 i, err, sync, oper, app_index, ice_app_sel_type;
+	u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
+	u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift;
+	struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg;
+	u16 ice_app_prot_id_type;
+
+	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+	dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
+	dcbcfg->tlv_status = tlv_status;
+
+	/* CEE PG data */
+	dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
+
+	/* Note that the FW creates the oper_prio_tc nibbles reversed
+	 * from those in the CEE Priority Group sub-TLV.
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+		dcbcfg->etscfg.prio_table[i * 2] =
+			((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_0_M) >>
+			 ICE_CEE_PGID_PRIO_0_S);
+		dcbcfg->etscfg.prio_table[i * 2 + 1] =
+			((cee_cfg->oper_prio_tc[i] & ICE_CEE_PGID_PRIO_1_M) >>
+			 ICE_CEE_PGID_PRIO_1_S);
+	}
+
+	ice_for_each_traffic_class(i) {
+		dcbcfg->etscfg.tcbwtable[i] = cee_cfg->oper_tc_bw[i];
+
+		if (dcbcfg->etscfg.prio_table[i] == ICE_CEE_PGID_STRICT) {
+			/* Map it to next empty TC */
+			dcbcfg->etscfg.prio_table[i] = cee_cfg->oper_num_tc - 1;
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_STRICT;
+		} else {
+			dcbcfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+		}
+	}
+
+	/* CEE PFC data */
+	dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en;
+	dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+
+	/* CEE APP TLV data */
+	if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+		cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg;
+	else
+		cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg;
+
+	app_index = 0;
+	for (i = 0; i < 3; i++) {
+		if (i == 0) {
+			/* FCoE APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_FCOE_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_FCOE_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FCOE_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FCOE_S;
+			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+			ice_app_prot_id_type = ETH_P_FCOE;
+		} else if (i == 1) {
+			/* iSCSI APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_ISCSI_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_ISCSI_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_ISCSI_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S;
+			ice_app_sel_type = ICE_APP_SEL_TCPIP;
+			ice_app_prot_id_type = ISCSI_LISTEN_PORT;
+
+			for (j = 0; j < cmp_dcbcfg->numapps; j++) {
+				u16 prot_id = cmp_dcbcfg->app[j].prot_id;
+				u8 sel = cmp_dcbcfg->app[j].selector;
+
+				if  (sel == ICE_APP_SEL_TCPIP &&
+				     (prot_id == ISCSI_LISTEN_PORT ||
+				      prot_id == ICE_APP_PROT_ID_ISCSI_860)) {
+					ice_app_prot_id_type = prot_id;
+					break;
+				}
+			}
+		} else {
+			/* FIP APP */
+			ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M;
+			ice_aqc_cee_status_shift = ICE_AQC_CEE_FIP_STATUS_S;
+			ice_aqc_cee_app_mask = ICE_AQC_CEE_APP_FIP_M;
+			ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_FIP_S;
+			ice_app_sel_type = ICE_APP_SEL_ETHTYPE;
+			ice_app_prot_id_type = ETH_P_FIP;
+		}
+
+		status = (tlv_status & ice_aqc_cee_status_mask) >>
+			 ice_aqc_cee_status_shift;
+		err = (status & ICE_TLV_STATUS_ERR) ? 1 : 0;
+		sync = (status & ICE_TLV_STATUS_SYNC) ? 1 : 0;
+		oper = (status & ICE_TLV_STATUS_OPER) ? 1 : 0;
+		/* Add FCoE/iSCSI/FIP APP if Error is False and
+		 * Oper/Sync is True
+		 */
+		if (!err && sync && oper) {
+			dcbcfg->app[app_index].priority =
+				(app_prio & ice_aqc_cee_app_mask) >>
+				ice_aqc_cee_app_shift;
+			dcbcfg->app[app_index].selector = ice_app_sel_type;
+			dcbcfg->app[app_index].prot_id = ice_app_prot_id_type;
+			app_index++;
+		}
+	}
+
+	dcbcfg->numapps = app_index;
+}
+
+/**
+ * ice_get_ieee_or_cee_dcb_cfg
+ * @pi: port information structure
+ * @dcbx_mode: mode of DCBX (IEEE or CEE)
+ *
+ * Get IEEE or CEE mode DCB configuration from the Firmware
+ */
+static int ice_get_ieee_or_cee_dcb_cfg(struct ice_port_info *pi, u8 dcbx_mode)
+{
+	struct ice_dcbx_cfg *dcbx_cfg = NULL;
+	int ret;
+
+	if (!pi)
+		return -EINVAL;
+
+	if (dcbx_mode == ICE_DCBX_MODE_IEEE)
+		dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+	else if (dcbx_mode == ICE_DCBX_MODE_CEE)
+		dcbx_cfg = &pi->qos_cfg.desired_dcbx_cfg;
+
+	/* Get Local DCB Config in case of ICE_DCBX_MODE_IEEE
+	 * or get CEE DCB Desired Config in case of ICE_DCBX_MODE_CEE
+	 */
+	ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_LOCAL,
+				 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+	if (ret)
+		goto out;
+
+	/* Get Remote DCB Config */
+	dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg;
+	ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+				 ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, dcbx_cfg);
+	/* Don't treat ENOENT as an error for Remote MIBs */
+	if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+		ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * ice_get_dcb_cfg
+ * @pi: port information structure
+ *
+ * Get DCB configuration from the Firmware
+ */
+int ice_get_dcb_cfg(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_cee_dcb_cfg_resp cee_cfg;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	int ret;
+
+	if (!pi)
+		return -EINVAL;
+
+	ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
+	if (!ret) {
+		/* CEE mode */
+		ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
+		ice_cee_to_dcb_cfg(&cee_cfg, pi);
+	} else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
+		/* CEE mode not enabled try querying IEEE data */
+		dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+		dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
+		ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_IEEE);
+	}
+
+	return ret;
+}
+
+/**
+ * ice_get_dcb_cfg_from_mib_change
+ * @pi: port information structure
+ * @event: pointer to the admin queue receive event
+ *
+ * Set DCB configuration from received MIB Change event
+ */
+void ice_get_dcb_cfg_from_mib_change(struct ice_port_info *pi,
+				     struct ice_rq_event_info *event)
+{
+	struct ice_dcbx_cfg *dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+	struct ice_aqc_lldp_get_mib *mib;
+	u8 change_type, dcbx_mode;
+
+	mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+
+	change_type = FIELD_GET(ICE_AQ_LLDP_MIB_TYPE_M,  mib->type);
+	if (change_type == ICE_AQ_LLDP_MIB_REMOTE)
+		dcbx_cfg = &pi->qos_cfg.remote_dcbx_cfg;
+
+	dcbx_mode = FIELD_GET(ICE_AQ_LLDP_DCBX_M, mib->type);
+
+	switch (dcbx_mode) {
+	case ICE_AQ_LLDP_DCBX_IEEE:
+		dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_IEEE;
+		ice_lldp_to_dcb_cfg(event->msg_buf, dcbx_cfg);
+		break;
+
+	case ICE_AQ_LLDP_DCBX_CEE:
+		pi->qos_cfg.desired_dcbx_cfg = pi->qos_cfg.local_dcbx_cfg;
+		ice_cee_to_dcb_cfg((struct ice_aqc_get_cee_dcb_cfg_resp *)
+				   event->msg_buf, pi);
+		break;
+	}
+}
+
+/**
+ * ice_init_dcb
+ * @hw: pointer to the HW struct
+ * @enable_mib_change: enable MIB change event
+ *
+ * Update DCB configuration from the Firmware
+ */
+int ice_init_dcb(struct ice_hw *hw, bool enable_mib_change)
+{
+	struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
+	int ret = 0;
+
+	if (!hw->func_caps.common_cap.dcb)
+		return -EOPNOTSUPP;
+
+	qos_cfg->is_sw_lldp = true;
+
+	/* Get DCBX status */
+	qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
+
+	if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DONE ||
+	    qos_cfg->dcbx_status == ICE_DCBX_STATUS_IN_PROGRESS ||
+	    qos_cfg->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
+		/* Get current DCBX configuration */
+		ret = ice_get_dcb_cfg(hw->port_info);
+		if (ret)
+			return ret;
+		qos_cfg->is_sw_lldp = false;
+	} else if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS) {
+		return -EBUSY;
+	}
+
+	/* Configure the LLDP MIB change event */
+	if (enable_mib_change) {
+		ret = ice_aq_cfg_lldp_mib_change(hw, true, NULL);
+		if (ret)
+			qos_cfg->is_sw_lldp = true;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_cfg_lldp_mib_change
+ * @hw: pointer to the HW struct
+ * @ena_mib: enable/disable MIB change event
+ *
+ * Configure (disable/enable) MIB
+ */
+int ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib)
+{
+	struct ice_qos_cfg *qos_cfg = &hw->port_info->qos_cfg;
+	int ret;
+
+	if (!hw->func_caps.common_cap.dcb)
+		return -EOPNOTSUPP;
+
+	/* Get DCBX status */
+	qos_cfg->dcbx_status = ice_get_dcbx_status(hw);
+
+	if (qos_cfg->dcbx_status == ICE_DCBX_STATUS_DIS)
+		return -EBUSY;
+
+	ret = ice_aq_cfg_lldp_mib_change(hw, ena_mib, NULL);
+	if (!ret)
+		qos_cfg->is_sw_lldp = !ena_mib;
+
+	return ret;
+}
+
+/**
+ * ice_add_ieee_ets_common_tlv
+ * @buf: Data buffer to be populated with ice_dcb_ets_cfg data
+ * @ets_cfg: Container for ice_dcb_ets_cfg data
+ *
+ * Populate the TLV buffer with ice_dcb_ets_cfg data
+ */
+static void
+ice_add_ieee_ets_common_tlv(u8 *buf, struct ice_dcb_ets_cfg *ets_cfg)
+{
+	u8 priority0, priority1;
+	u8 offset = 0;
+	int i;
+
+	/* Priority Assignment Table (4 octets)
+	 * Octets:|    1    |    2    |    3    |    4    |
+	 *        -----------------------------------------
+	 *        |pri0|pri1|pri2|pri3|pri4|pri5|pri6|pri7|
+	 *        -----------------------------------------
+	 *   Bits:|7  4|3  0|7  4|3  0|7  4|3  0|7  4|3  0|
+	 *        -----------------------------------------
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS / 2; i++) {
+		priority0 = ets_cfg->prio_table[i * 2] & 0xF;
+		priority1 = ets_cfg->prio_table[i * 2 + 1] & 0xF;
+		buf[offset] = (priority0 << ICE_IEEE_ETS_PRIO_1_S) | priority1;
+		offset++;
+	}
+
+	/* TC Bandwidth Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 *
+	 * TSA Assignment Table (8 octets)
+	 * Octets:| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
+	 *        ---------------------------------
+	 *        |tc0|tc1|tc2|tc3|tc4|tc5|tc6|tc7|
+	 *        ---------------------------------
+	 */
+	ice_for_each_traffic_class(i) {
+		buf[offset] = ets_cfg->tcbwtable[i];
+		buf[ICE_MAX_TRAFFIC_CLASS + offset] = ets_cfg->tsatable[i];
+		offset++;
+	}
+}
+
+/**
+ * ice_add_ieee_ets_tlv - Prepare ETS TLV in IEEE format
+ * @tlv: Fill the ETS config data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS CFG TLV
+ */
+static void
+ice_add_ieee_ets_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u8 maxtcwilling = 0;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* First Octet post subtype
+	 * --------------------------
+	 * |will-|CBS  | Re-  | Max |
+	 * |ing  |     |served| TCs |
+	 * --------------------------
+	 * |1bit | 1bit|3 bits|3bits|
+	 */
+	etscfg = &dcbcfg->etscfg;
+	if (etscfg->willing)
+		maxtcwilling = BIT(ICE_IEEE_ETS_WILLING_S);
+	maxtcwilling |= etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M;
+	buf[0] = maxtcwilling;
+
+	/* Begin adding at Priority Assignment Table (offset 1 in buf) */
+	ice_add_ieee_ets_common_tlv(&buf[1], etscfg);
+}
+
+/**
+ * ice_add_ieee_etsrec_tlv - Prepare ETS Recommended TLV in IEEE format
+ * @tlv: Fill ETS Recommended TLV in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Prepare IEEE 802.1Qaz ETS REC TLV
+ */
+static void
+ice_add_ieee_etsrec_tlv(struct ice_lldp_org_tlv *tlv,
+			struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etsrec;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_REC);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	etsrec = &dcbcfg->etsrec;
+
+	/* First Octet is reserved */
+	/* Begin adding at Priority Assignment Table (offset 1 in buf) */
+	ice_add_ieee_ets_common_tlv(&buf[1], etsrec);
+}
+
+/**
+ * ice_add_ieee_pfc_tlv - Prepare PFC TLV in IEEE format
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store which holds the PFC CFG data
+ *
+ * Prepare IEEE 802.1Qaz PFC CFG TLV
+ */
+static void
+ice_add_ieee_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_PFC_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_PFC_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* ----------------------------------------
+	 * |will-|MBC  | Re-  | PFC |  PFC Enable  |
+	 * |ing  |     |served| cap |              |
+	 * -----------------------------------------
+	 * |1bit | 1bit|2 bits|4bits| 1 octet      |
+	 */
+	if (dcbcfg->pfc.willing)
+		buf[0] = BIT(ICE_IEEE_PFC_WILLING_S);
+
+	if (dcbcfg->pfc.mbc)
+		buf[0] |= BIT(ICE_IEEE_PFC_MBC_S);
+
+	buf[0] |= dcbcfg->pfc.pfccap & 0xF;
+	buf[1] = dcbcfg->pfc.pfcena;
+}
+
+/**
+ * ice_add_ieee_app_pri_tlv -  Prepare APP TLV in IEEE format
+ * @tlv: Fill APP TLV in IEEE format
+ * @dcbcfg: Local store which holds the APP CFG data
+ *
+ * Prepare IEEE 802.1Qaz APP CFG TLV
+ */
+static void
+ice_add_ieee_app_pri_tlv(struct ice_lldp_org_tlv *tlv,
+			 struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 typelen, len, offset = 0;
+	u8 priority, selector, i = 0;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+
+	/* No APP TLVs then just return */
+	if (dcbcfg->numapps == 0)
+		return;
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_APP_PRI);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* Move offset to App Priority Table */
+	offset++;
+	/* Application Priority Table (3 octets)
+	 * Octets:|         1          |    2    |    3    |
+	 *        -----------------------------------------
+	 *        |Priority|Rsrvd| Sel |    Protocol ID    |
+	 *        -----------------------------------------
+	 *   Bits:|23    21|20 19|18 16|15                0|
+	 *        -----------------------------------------
+	 */
+	while (i < dcbcfg->numapps) {
+		priority = dcbcfg->app[i].priority & 0x7;
+		selector = dcbcfg->app[i].selector & 0x7;
+		buf[offset] = (priority << ICE_IEEE_APP_PRIO_S) | selector;
+		buf[offset + 1] = (dcbcfg->app[i].prot_id >> 0x8) & 0xFF;
+		buf[offset + 2] = dcbcfg->app[i].prot_id & 0xFF;
+		/* Move to next app */
+		offset += 3;
+		i++;
+		if (i >= ICE_DCBX_MAX_APPS)
+			break;
+	}
+	/* len includes size of ouisubtype + 1 reserved + 3*numapps */
+	len = sizeof(tlv->ouisubtype) + 1 + (i * 3);
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | (len & 0x1FF));
+	tlv->typelen = htons(typelen);
+}
+
+/**
+ * ice_add_dscp_up_tlv - Prepare DSCP to UP TLV
+ * @tlv: location to build the TLV data
+ * @dcbcfg: location of data to convert to TLV
+ */
+static void
+ice_add_dscp_up_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+	int i;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_DSCP_UP_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+			   ICE_DSCP_SUBTYPE_DSCP2UP);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* bytes 0 - 63 - IPv4 DSCP2UP LUT */
+	for (i = 0; i < ICE_DSCP_NUM_VAL; i++) {
+		/* IPv4 mapping */
+		buf[i] = dcbcfg->dscp_map[i];
+		/* IPv6 mapping */
+		buf[i + ICE_DSCP_IPV6_OFFSET] = dcbcfg->dscp_map[i];
+	}
+
+	/* byte 64 - IPv4 untagged traffic */
+	buf[i] = 0;
+
+	/* byte 144 - IPv6 untagged traffic */
+	buf[i + ICE_DSCP_IPV6_OFFSET] = 0;
+}
+
+#define ICE_BYTES_PER_TC	8
+/**
+ * ice_add_dscp_enf_tlv - Prepare DSCP Enforcement TLV
+ * @tlv: location to build the TLV data
+ */
+static void
+ice_add_dscp_enf_tlv(struct ice_lldp_org_tlv *tlv)
+{
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_DSCP_ENF_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+			   ICE_DSCP_SUBTYPE_ENFORCE);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* Allow all DSCP values to be valid for all TC's (IPv4 and IPv6) */
+	memset(buf, 0, 2 * (ICE_MAX_TRAFFIC_CLASS * ICE_BYTES_PER_TC));
+}
+
+/**
+ * ice_add_dscp_tc_bw_tlv - Prepare DSCP BW for TC TLV
+ * @tlv: location to build the TLV data
+ * @dcbcfg: location of the data to convert to TLV
+ */
+static void
+ice_add_dscp_tc_bw_tlv(struct ice_lldp_org_tlv *tlv,
+		       struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg;
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u8 offset = 0;
+	u16 typelen;
+	int i;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_DSCP_TC_BW_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+			   ICE_DSCP_SUBTYPE_TCBW);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* First Octect after subtype
+	 * ----------------------------
+	 * | RSV | CBS | RSV | Max TCs |
+	 * | 1b  | 1b  | 3b  | 3b      |
+	 * ----------------------------
+	 */
+	etscfg = &dcbcfg->etscfg;
+	buf[0] = etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M;
+
+	/* bytes 1 - 4 reserved */
+	offset = 5;
+
+	/* TC BW table
+	 * bytes 0 - 7 for TC 0 - 7
+	 *
+	 * TSA Assignment table
+	 * bytes 8 - 15 for TC 0 - 7
+	 */
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		buf[offset] = etscfg->tcbwtable[i];
+		buf[offset + ICE_MAX_TRAFFIC_CLASS] = etscfg->tsatable[i];
+		offset++;
+	}
+}
+
+/**
+ * ice_add_dscp_pfc_tlv - Prepare DSCP PFC TLV
+ * @tlv: Fill PFC TLV in IEEE format
+ * @dcbcfg: Local store which holds the PFC CFG data
+ */
+static void
+ice_add_dscp_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 *buf = tlv->tlvinfo;
+	u32 ouisubtype;
+	u16 typelen;
+
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_DSCP_PFC_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) |
+			   ICE_DSCP_SUBTYPE_PFC);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	buf[0] = dcbcfg->pfc.pfccap & 0xF;
+	buf[1] = dcbcfg->pfc.pfcena;
+}
+
+/**
+ * ice_add_dcb_tlv - Add all IEEE or DSCP TLVs
+ * @tlv: Fill TLV data in IEEE format
+ * @dcbcfg: Local store which holds the DCB Config
+ * @tlvid: Type of IEEE TLV
+ *
+ * Add tlv information
+ */
+static void
+ice_add_dcb_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg,
+		u16 tlvid)
+{
+	if (dcbcfg->pfc_mode == ICE_QOS_MODE_VLAN) {
+		switch (tlvid) {
+		case ICE_IEEE_TLV_ID_ETS_CFG:
+			ice_add_ieee_ets_tlv(tlv, dcbcfg);
+			break;
+		case ICE_IEEE_TLV_ID_ETS_REC:
+			ice_add_ieee_etsrec_tlv(tlv, dcbcfg);
+			break;
+		case ICE_IEEE_TLV_ID_PFC_CFG:
+			ice_add_ieee_pfc_tlv(tlv, dcbcfg);
+			break;
+		case ICE_IEEE_TLV_ID_APP_PRI:
+			ice_add_ieee_app_pri_tlv(tlv, dcbcfg);
+			break;
+		default:
+			break;
+		}
+	} else {
+		/* pfc_mode == ICE_QOS_MODE_DSCP */
+		switch (tlvid) {
+		case ICE_TLV_ID_DSCP_UP:
+			ice_add_dscp_up_tlv(tlv, dcbcfg);
+			break;
+		case ICE_TLV_ID_DSCP_ENF:
+			ice_add_dscp_enf_tlv(tlv);
+			break;
+		case ICE_TLV_ID_DSCP_TC_BW:
+			ice_add_dscp_tc_bw_tlv(tlv, dcbcfg);
+			break;
+		case ICE_TLV_ID_DSCP_TO_PFC:
+			ice_add_dscp_pfc_tlv(tlv, dcbcfg);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/**
+ * ice_dcb_cfg_to_lldp - Convert DCB configuration to MIB format
+ * @lldpmib: pointer to the HW struct
+ * @miblen: length of LLDP MIB
+ * @dcbcfg: Local store which holds the DCB Config
+ *
+ * Convert the DCB configuration to MIB format
+ */
+static void
+ice_dcb_cfg_to_lldp(u8 *lldpmib, u16 *miblen, struct ice_dcbx_cfg *dcbcfg)
+{
+	u16 len, offset = 0, tlvid = ICE_TLV_ID_START;
+	struct ice_lldp_org_tlv *tlv;
+	u16 typelen;
+
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	while (1) {
+		ice_add_dcb_tlv(tlv, dcbcfg, tlvid++);
+		typelen = ntohs(tlv->typelen);
+		len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+		if (len)
+			offset += len + 2;
+		/* END TLV or beyond LLDPDU size */
+		if (tlvid >= ICE_TLV_ID_END_OF_LLDPPDU ||
+		    offset > ICE_LLDPDU_SIZE)
+			break;
+		/* Move to next TLV */
+		if (len)
+			tlv = (struct ice_lldp_org_tlv *)
+				((char *)tlv + sizeof(tlv->typelen) + len);
+	}
+	*miblen = offset;
+}
+
+/**
+ * ice_set_dcb_cfg - Set the local LLDP MIB to FW
+ * @pi: port information structure
+ *
+ * Set DCB configuration to the Firmware
+ */
+int ice_set_dcb_cfg(struct ice_port_info *pi)
+{
+	u8 mib_type, *lldpmib = NULL;
+	struct ice_dcbx_cfg *dcbcfg;
+	struct ice_hw *hw;
+	u16 miblen;
+	int ret;
+
+	if (!pi)
+		return -EINVAL;
+
+	hw = pi->hw;
+
+	/* update the HW local config */
+	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+	/* Allocate the LLDPDU */
+	lldpmib = devm_kzalloc(ice_hw_to_dev(hw), ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib)
+		return -ENOMEM;
+
+	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+	if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+		mib_type |= SET_LOCAL_MIB_TYPE_CEE_NON_WILLING;
+
+	ice_dcb_cfg_to_lldp(lldpmib, &miblen, dcbcfg);
+	ret = ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, miblen,
+				  NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), lldpmib);
+
+	return ret;
+}
+
+/**
+ * ice_aq_query_port_ets - query port ETS configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ETS configuration
+ */
+static int
+ice_aq_query_port_ets(struct ice_port_info *pi,
+		      struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		      struct ice_sq_cd *cd)
+{
+	struct ice_aqc_query_port_ets *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	if (!pi)
+		return -EINVAL;
+	cmd = &desc.params.port_ets;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_port_ets);
+	cmd->port_teid = pi->root->info.node_teid;
+
+	status = ice_aq_send_cmd(pi->hw, &desc, buf, buf_size, cd);
+	return status;
+}
+
+/**
+ * ice_update_port_tc_tree_cfg - update TC tree configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ *
+ * update the SW DB with the new TC changes
+ */
+static int
+ice_update_port_tc_tree_cfg(struct ice_port_info *pi,
+			    struct ice_aqc_port_ets_elem *buf)
+{
+	struct ice_sched_node *node, *tc_node;
+	struct ice_aqc_txsched_elem_data elem;
+	u32 teid1, teid2;
+	int status = 0;
+	u8 i, j;
+
+	if (!pi)
+		return -EINVAL;
+	/* suspend the missing TC nodes */
+	for (i = 0; i < pi->root->num_children; i++) {
+		teid1 = le32_to_cpu(pi->root->children[i]->info.node_teid);
+		ice_for_each_traffic_class(j) {
+			teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+			if (teid1 == teid2)
+				break;
+		}
+		if (j < ICE_MAX_TRAFFIC_CLASS)
+			continue;
+		/* TC is missing */
+		pi->root->children[i]->in_use = false;
+	}
+	/* add the new TC nodes */
+	ice_for_each_traffic_class(j) {
+		teid2 = le32_to_cpu(buf->tc_node_teid[j]);
+		if (teid2 == ICE_INVAL_TEID)
+			continue;
+		/* Is it already present in the tree ? */
+		for (i = 0; i < pi->root->num_children; i++) {
+			tc_node = pi->root->children[i];
+			if (!tc_node)
+				continue;
+			teid1 = le32_to_cpu(tc_node->info.node_teid);
+			if (teid1 == teid2) {
+				tc_node->tc_num = j;
+				tc_node->in_use = true;
+				break;
+			}
+		}
+		if (i < pi->root->num_children)
+			continue;
+		/* new TC */
+		status = ice_sched_query_elem(pi->hw, teid2, &elem);
+		if (!status)
+			status = ice_sched_add_node(pi, 1, &elem, NULL);
+		if (status)
+			break;
+		/* update the TC number */
+		node = ice_sched_find_node_by_teid(pi->root, teid2);
+		if (node)
+			node->tc_num = j;
+	}
+	return status;
+}
+
+/**
+ * ice_query_port_ets - query port ETS configuration
+ * @pi: port information structure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @cd: pointer to command details structure or NULL
+ *
+ * query current port ETS configuration and update the
+ * SW DB with the TC changes
+ */
+int
+ice_query_port_ets(struct ice_port_info *pi,
+		   struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		   struct ice_sq_cd *cd)
+{
+	int status;
+
+	mutex_lock(&pi->sched_lock);
+	status = ice_aq_query_port_ets(pi, buf, buf_size, cd);
+	if (!status)
+		status = ice_update_port_tc_tree_cfg(pi, buf);
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
new file mode 100644
index 0000000000..be34650a77
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h
@@ -0,0 +1,200 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_H_
+#define _ICE_DCB_H_
+
+#include "ice_type.h"
+#include <scsi/iscsi_proto.h>
+
+#define ICE_DCBX_STATUS_NOT_STARTED	0
+#define ICE_DCBX_STATUS_IN_PROGRESS	1
+#define ICE_DCBX_STATUS_DONE		2
+#define ICE_DCBX_STATUS_DIS		7
+
+#define ICE_TLV_TYPE_END		0
+#define ICE_TLV_TYPE_ORG		127
+
+#define ICE_IEEE_8021QAZ_OUI		0x0080C2
+#define ICE_IEEE_SUBTYPE_ETS_CFG	9
+#define ICE_IEEE_SUBTYPE_ETS_REC	10
+#define ICE_IEEE_SUBTYPE_PFC_CFG	11
+#define ICE_IEEE_SUBTYPE_APP_PRI	12
+
+#define ICE_CEE_DCBX_OUI		0x001B21
+#define ICE_CEE_DCBX_TYPE		2
+
+#define ICE_DSCP_OUI			0xFFFFFF
+#define ICE_DSCP_SUBTYPE_DSCP2UP	0x41
+#define ICE_DSCP_SUBTYPE_ENFORCE	0x42
+#define ICE_DSCP_SUBTYPE_TCBW		0x43
+#define ICE_DSCP_SUBTYPE_PFC		0x44
+#define ICE_DSCP_IPV6_OFFSET		80
+
+#define ICE_CEE_SUBTYPE_PG_CFG		2
+#define ICE_CEE_SUBTYPE_PFC_CFG		3
+#define ICE_CEE_SUBTYPE_APP_PRI		4
+#define ICE_CEE_MAX_FEAT_TYPE		3
+/* Defines for LLDP TLV header */
+#define ICE_LLDP_TLV_LEN_S		0
+#define ICE_LLDP_TLV_LEN_M		(0x01FF << ICE_LLDP_TLV_LEN_S)
+#define ICE_LLDP_TLV_TYPE_S		9
+#define ICE_LLDP_TLV_TYPE_M		(0x7F << ICE_LLDP_TLV_TYPE_S)
+#define ICE_LLDP_TLV_SUBTYPE_S		0
+#define ICE_LLDP_TLV_SUBTYPE_M		(0xFF << ICE_LLDP_TLV_SUBTYPE_S)
+#define ICE_LLDP_TLV_OUI_S		8
+#define ICE_LLDP_TLV_OUI_M		(0xFFFFFFUL << ICE_LLDP_TLV_OUI_S)
+
+/* Defines for IEEE ETS TLV */
+#define ICE_IEEE_ETS_MAXTC_S	0
+#define ICE_IEEE_ETS_MAXTC_M		(0x7 << ICE_IEEE_ETS_MAXTC_S)
+#define ICE_IEEE_ETS_CBS_S		6
+#define ICE_IEEE_ETS_CBS_M		BIT(ICE_IEEE_ETS_CBS_S)
+#define ICE_IEEE_ETS_WILLING_S		7
+#define ICE_IEEE_ETS_WILLING_M		BIT(ICE_IEEE_ETS_WILLING_S)
+#define ICE_IEEE_ETS_PRIO_0_S		0
+#define ICE_IEEE_ETS_PRIO_0_M		(0x7 << ICE_IEEE_ETS_PRIO_0_S)
+#define ICE_IEEE_ETS_PRIO_1_S		4
+#define ICE_IEEE_ETS_PRIO_1_M		(0x7 << ICE_IEEE_ETS_PRIO_1_S)
+#define ICE_CEE_PGID_PRIO_0_S		0
+#define ICE_CEE_PGID_PRIO_0_M		(0xF << ICE_CEE_PGID_PRIO_0_S)
+#define ICE_CEE_PGID_PRIO_1_S		4
+#define ICE_CEE_PGID_PRIO_1_M		(0xF << ICE_CEE_PGID_PRIO_1_S)
+#define ICE_CEE_PGID_STRICT		15
+
+/* Defines for IEEE TSA types */
+#define ICE_IEEE_TSA_STRICT		0
+#define ICE_IEEE_TSA_ETS		2
+
+/* Defines for IEEE PFC TLV */
+#define ICE_IEEE_PFC_CAP_S		0
+#define ICE_IEEE_PFC_CAP_M		(0xF << ICE_IEEE_PFC_CAP_S)
+#define ICE_IEEE_PFC_MBC_S		6
+#define ICE_IEEE_PFC_MBC_M		BIT(ICE_IEEE_PFC_MBC_S)
+#define ICE_IEEE_PFC_WILLING_S		7
+#define ICE_IEEE_PFC_WILLING_M		BIT(ICE_IEEE_PFC_WILLING_S)
+
+/* Defines for IEEE APP TLV */
+#define ICE_IEEE_APP_SEL_S		0
+#define ICE_IEEE_APP_SEL_M		(0x7 << ICE_IEEE_APP_SEL_S)
+#define ICE_IEEE_APP_PRIO_S		5
+#define ICE_IEEE_APP_PRIO_M		(0x7 << ICE_IEEE_APP_PRIO_S)
+
+/* TLV definitions for preparing MIB */
+#define ICE_IEEE_TLV_ID_ETS_CFG		3
+#define ICE_IEEE_TLV_ID_ETS_REC		4
+#define ICE_IEEE_TLV_ID_PFC_CFG		5
+#define ICE_IEEE_TLV_ID_APP_PRI		6
+#define ICE_TLV_ID_END_OF_LLDPPDU	7
+#define ICE_TLV_ID_START		ICE_IEEE_TLV_ID_ETS_CFG
+#define ICE_TLV_ID_DSCP_UP		3
+#define ICE_TLV_ID_DSCP_ENF		4
+#define ICE_TLV_ID_DSCP_TC_BW		5
+#define ICE_TLV_ID_DSCP_TO_PFC		6
+
+#define ICE_IEEE_ETS_TLV_LEN		25
+#define ICE_IEEE_PFC_TLV_LEN		6
+#define ICE_IEEE_APP_TLV_LEN		11
+
+#define ICE_DSCP_UP_TLV_LEN		148
+#define ICE_DSCP_ENF_TLV_LEN		132
+#define ICE_DSCP_TC_BW_TLV_LEN		25
+#define ICE_DSCP_PFC_TLV_LEN		6
+
+/* IEEE 802.1AB LLDP Organization specific TLV */
+struct ice_lldp_org_tlv {
+	__be16 typelen;
+	__be32 ouisubtype;
+	u8 tlvinfo[];
+} __packed;
+
+struct ice_cee_tlv_hdr {
+	__be16 typelen;
+	u8 operver;
+	u8 maxver;
+};
+
+struct ice_cee_ctrl_tlv {
+	struct ice_cee_tlv_hdr hdr;
+	__be32 seqno;
+	__be32 ackno;
+};
+
+struct ice_cee_feat_tlv {
+	struct ice_cee_tlv_hdr hdr;
+	u8 en_will_err; /* Bits: |En|Will|Err|Reserved(5)| */
+#define ICE_CEE_FEAT_TLV_ENA_M		0x80
+#define ICE_CEE_FEAT_TLV_WILLING_M	0x40
+#define ICE_CEE_FEAT_TLV_ERR_M		0x20
+	u8 subtype;
+	u8 tlvinfo[];
+};
+
+struct ice_cee_app_prio {
+	__be16 protocol;
+	u8 upper_oui_sel; /* Bits: |Upper OUI(6)|Selector(2)| */
+#define ICE_CEE_APP_SELECTOR_M	0x03
+	__be16 lower_oui;
+	u8 prio_map;
+} __packed;
+
+int ice_aq_set_pfc_mode(struct ice_hw *hw, u8 pfc_mode, struct ice_sq_cd *cd);
+int
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+		   struct ice_dcbx_cfg *dcbcfg);
+int ice_get_dcb_cfg(struct ice_port_info *pi);
+int ice_set_dcb_cfg(struct ice_port_info *pi);
+void ice_get_dcb_cfg_from_mib_change(struct ice_port_info *pi,
+				     struct ice_rq_event_info *event);
+int ice_init_dcb(struct ice_hw *hw, bool enable_mib_change);
+int
+ice_query_port_ets(struct ice_port_info *pi,
+		   struct ice_aqc_port_ets_elem *buf, u16 buf_size,
+		   struct ice_sq_cd *cmd_details);
+#ifdef CONFIG_DCB
+int
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
+		 struct ice_sq_cd *cd);
+int ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
+int
+ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
+		       bool *dcbx_agent_status, struct ice_sq_cd *cd);
+int ice_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_mib);
+#else /* CONFIG_DCB */
+static inline int
+ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
+		 bool __always_unused shutdown_lldp_agent,
+		 bool __always_unused persist,
+		 struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline int
+ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+		  bool __always_unused persist,
+		  struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline int
+ice_aq_start_stop_dcbx(struct ice_hw __always_unused *hw,
+		       bool __always_unused start_dcbx_agent,
+		       bool *dcbx_agent_status,
+		       struct ice_sq_cd __always_unused *cd)
+{
+	*dcbx_agent_status = false;
+
+	return 0;
+}
+
+static inline int
+ice_cfg_lldp_mib_change(struct ice_hw __always_unused *hw,
+			bool __always_unused ena_mib)
+{
+	return 0;
+}
+
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
new file mode 100644
index 0000000000..850db8e0e6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include "ice_devlink.h"
+
+/**
+ * ice_dcb_get_ena_tc - return bitmap of enabled TCs
+ * @dcbcfg: DCB config to evaluate for enabled TCs
+ */
+static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+	u8 i, num_tc, ena_tc = 1;
+
+	num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+	for (i = 0; i < num_tc; i++)
+		ena_tc |= BIT(i);
+
+	return ena_tc;
+}
+
+/**
+ * ice_is_pfc_causing_hung_q
+ * @pf: pointer to PF structure
+ * @txqueue: Tx queue which is supposedly hung queue
+ *
+ * find if PFC is causing the hung queue, if yes return true else false
+ */
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue)
+{
+	u8 num_tcs = 0, i, tc, up_mapped_tc, up_in_tc = 0;
+	u64 ref_prio_xoff[ICE_MAX_UP];
+	struct ice_vsi *vsi;
+	u32 up2tc;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return false;
+
+	ice_for_each_traffic_class(i)
+		if (vsi->tc_cfg.ena_tc & BIT(i))
+			num_tcs++;
+
+	/* first find out the TC to which the hung queue belongs to */
+	for (tc = 0; tc < num_tcs - 1; tc++)
+		if (ice_find_q_in_range(vsi->tc_cfg.tc_info[tc].qoffset,
+					vsi->tc_cfg.tc_info[tc + 1].qoffset,
+					txqueue))
+			break;
+
+	/* Build a bit map of all UPs associated to the suspect hung queue TC,
+	 * so that we check for its counter increment.
+	 */
+	up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
+	for (i = 0; i < ICE_MAX_UP; i++) {
+		up_mapped_tc = (up2tc >> (i * 3)) & 0x7;
+		if (up_mapped_tc == tc)
+			up_in_tc |= BIT(i);
+	}
+
+	/* Now that we figured out that hung queue is PFC enabled, still the
+	 * Tx timeout can be legitimate. So to make sure Tx timeout is
+	 * absolutely caused by PFC storm, check if the counters are
+	 * incrementing.
+	 */
+	for (i = 0; i < ICE_MAX_UP; i++)
+		if (up_in_tc & BIT(i))
+			ref_prio_xoff[i] = pf->stats.priority_xoff_rx[i];
+
+	ice_update_dcb_stats(pf);
+
+	for (i = 0; i < ICE_MAX_UP; i++)
+		if (up_in_tc & BIT(i))
+			if (pf->stats.priority_xoff_rx[i] > ref_prio_xoff[i])
+				return true;
+
+	return false;
+}
+
+/**
+ * ice_dcb_get_mode - gets the DCB mode
+ * @port_info: pointer to port info structure
+ * @host: if set it's HOST if not it's MANAGED
+ */
+static u8 ice_dcb_get_mode(struct ice_port_info *port_info, bool host)
+{
+	u8 mode;
+
+	if (host)
+		mode = DCB_CAP_DCBX_HOST;
+	else
+		mode = DCB_CAP_DCBX_LLD_MANAGED;
+
+	if (port_info->qos_cfg.local_dcbx_cfg.dcbx_mode & ICE_DCBX_MODE_CEE)
+		return mode | DCB_CAP_DCBX_VER_CEE;
+	else
+		return mode | DCB_CAP_DCBX_VER_IEEE;
+}
+
+/**
+ * ice_dcb_get_num_tc - Get the number of TCs from DCBX config
+ * @dcbcfg: config to retrieve number of TCs from
+ */
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg)
+{
+	bool tc_unused = false;
+	u8 num_tc = 0;
+	u8 ret = 0;
+	int i;
+
+	/* Scan the ETS Config Priority Table to find traffic classes
+	 * enabled and create a bitmask of enabled TCs
+	 */
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+		num_tc |= BIT(dcbcfg->etscfg.prio_table[i]);
+
+	/* Scan bitmask for contiguous TCs starting with TC0 */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (num_tc & BIT(i)) {
+			if (!tc_unused) {
+				ret++;
+			} else {
+				pr_err("Non-contiguous TCs - Disabling DCB\n");
+				return 1;
+			}
+		} else {
+			tc_unused = true;
+		}
+	}
+
+	/* There is always at least 1 TC */
+	if (!ret)
+		ret = 1;
+
+	return ret;
+}
+
+/**
+ * ice_get_first_droptc - returns number of first droptc
+ * @vsi: used to find the first droptc
+ *
+ * This function returns the value of first_droptc.
+ * When DCB is enabled, first droptc information is derived from enabled_tc
+ * and PFC enabled bits. otherwise this function returns 0 as there is one
+ * TC without DCB (tc0)
+ */
+static u8 ice_get_first_droptc(struct ice_vsi *vsi)
+{
+	struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	u8 num_tc, ena_tc_map, pfc_ena_map;
+	u8 i;
+
+	num_tc = ice_dcb_get_num_tc(cfg);
+
+	/* get bitmap of enabled TCs */
+	ena_tc_map = ice_dcb_get_ena_tc(cfg);
+
+	/* get bitmap of PFC enabled TCs */
+	pfc_ena_map = cfg->pfc.pfcena;
+
+	/* get first TC that is not PFC enabled */
+	for (i = 0; i < num_tc; i++) {
+		if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) {
+			dev_dbg(dev, "first drop tc = %d\n", i);
+			return i;
+		}
+	}
+
+	dev_dbg(dev, "first drop tc = 0\n");
+	return 0;
+}
+
+/**
+ * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration
+ * @vsi: pointer to the VSI instance
+ */
+void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
+{
+	struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+		vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg);
+		vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg);
+		break;
+	case ICE_VSI_CHNL:
+		vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi));
+		vsi->tc_cfg.numtc = 1;
+		break;
+	case ICE_VSI_CTRL:
+	case ICE_VSI_LB:
+	default:
+		vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+		vsi->tc_cfg.numtc = 1;
+	}
+}
+
+/**
+ * ice_dcb_get_tc - Get the TC associated with the queue
+ * @vsi: ptr to the VSI
+ * @queue_index: queue number associated with VSI
+ */
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index)
+{
+	return vsi->tx_rings[queue_index]->dcb_tc;
+}
+
+/**
+ * ice_vsi_cfg_dcb_rings - Update rings to reflect DCB TC
+ * @vsi: VSI owner of rings being updated
+ */
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi)
+{
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	u16 qoffset, qcount;
+	int i, n;
+
+	if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+		/* Reset the TC information */
+		ice_for_each_txq(vsi, i) {
+			tx_ring = vsi->tx_rings[i];
+			tx_ring->dcb_tc = 0;
+		}
+		ice_for_each_rxq(vsi, i) {
+			rx_ring = vsi->rx_rings[i];
+			rx_ring->dcb_tc = 0;
+		}
+		return;
+	}
+
+	ice_for_each_traffic_class(n) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(n)))
+			break;
+
+		qoffset = vsi->tc_cfg.tc_info[n].qoffset;
+		qcount = vsi->tc_cfg.tc_info[n].qcount_tx;
+		for (i = qoffset; i < (qoffset + qcount); i++)
+			vsi->tx_rings[i]->dcb_tc = n;
+
+		qcount = vsi->tc_cfg.tc_info[n].qcount_rx;
+		for (i = qoffset; i < (qoffset + qcount); i++)
+			vsi->rx_rings[i]->dcb_tc = n;
+	}
+	/* applicable only if "all_enatc" is set, which will be set from
+	 * setup_tc method as part of configuring channels
+	 */
+	if (vsi->all_enatc) {
+		u8 first_droptc = ice_get_first_droptc(vsi);
+
+		/* When DCB is configured, TC for ADQ queues (which are really
+		 * PF queues) should be the first drop TC of the main VSI
+		 */
+		ice_for_each_chnl_tc(n) {
+			if (!(vsi->all_enatc & BIT(n)))
+				break;
+
+			qoffset = vsi->mqprio_qopt.qopt.offset[n];
+			qcount = vsi->mqprio_qopt.qopt.count[n];
+			for (i = qoffset; i < (qoffset + qcount); i++) {
+				vsi->tx_rings[i]->dcb_tc = first_droptc;
+				vsi->rx_rings[i]->dcb_tc = first_droptc;
+			}
+		}
+	}
+}
+
+/**
+ * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig
+ * @pf: pointer to the PF instance
+ * @ena: true to enable VSIs, false to disable
+ * @locked: true if caller holds RTNL lock, false otherwise
+ *
+ * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV
+ * and CHNL need to be brought down. Following completion of DCB configuration
+ * the VSIs that were downed need to be brought up again. This helper function
+ * does both.
+ */
+static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked)
+{
+	int i;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi)
+			continue;
+
+		switch (vsi->type) {
+		case ICE_VSI_CHNL:
+		case ICE_VSI_SWITCHDEV_CTRL:
+		case ICE_VSI_PF:
+			if (ena)
+				ice_ena_vsi(vsi, locked);
+			else
+				ice_dis_vsi(vsi, locked);
+			break;
+		default:
+			continue;
+		}
+	}
+}
+
+/**
+ * ice_dcb_bwchk - check if ETS bandwidth input parameters are correct
+ * @pf: pointer to the PF struct
+ * @dcbcfg: pointer to DCB config structure
+ */
+int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg)
+{
+	struct ice_dcb_ets_cfg *etscfg = &dcbcfg->etscfg;
+	u8 num_tc, total_bw = 0;
+	int i;
+
+	/* returns number of contigous TCs and 1 TC for non-contigous TCs,
+	 * since at least 1 TC has to be configured
+	 */
+	num_tc = ice_dcb_get_num_tc(dcbcfg);
+
+	/* no bandwidth checks required if there's only one TC, so assign
+	 * all bandwidth to TC0 and return
+	 */
+	if (num_tc == 1) {
+		etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
+		return 0;
+	}
+
+	for (i = 0; i < num_tc; i++)
+		total_bw += etscfg->tcbwtable[i];
+
+	if (!total_bw) {
+		etscfg->tcbwtable[0] = ICE_TC_MAX_BW;
+	} else if (total_bw != ICE_TC_MAX_BW) {
+		dev_err(ice_pf_to_dev(pf), "Invalid config, total bandwidth must equal 100\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_pf_dcb_cfg - Apply new DCB configuration
+ * @pf: pointer to the PF struct
+ * @new_cfg: DCBX config to apply
+ * @locked: is the RTNL held
+ */
+int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_dcbx_cfg *old_cfg, *curr_cfg;
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret = ICE_DCB_NO_HW_CHG;
+	struct iidc_event *event;
+	struct ice_vsi *pf_vsi;
+
+	curr_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	/* FW does not care if change happened */
+	if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
+		ret = ICE_DCB_HW_CHG_RST;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(new_cfg) > 1) {
+		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
+		if (pf->hw.port_info->is_custom_tx_enabled) {
+			dev_err(dev, "Custom Tx scheduler feature enabled, can't configure DCB\n");
+			return -EBUSY;
+		}
+		ice_tear_down_devlink_rate_tree(pf);
+
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	if (!memcmp(new_cfg, curr_cfg, sizeof(*new_cfg))) {
+		dev_dbg(dev, "No change in DCB config required\n");
+		return ret;
+	}
+
+	if (ice_dcb_bwchk(pf, new_cfg))
+		return -EINVAL;
+
+	/* Store old config in case FW config fails */
+	old_cfg = kmemdup(curr_cfg, sizeof(*old_cfg), GFP_KERNEL);
+	if (!old_cfg)
+		return -ENOMEM;
+
+	dev_info(dev, "Commit DCB Configuration to the hardware\n");
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi) {
+		dev_dbg(dev, "PF VSI doesn't exist\n");
+		ret = -EINVAL;
+		goto free_cfg;
+	}
+
+	/* Notify AUX drivers about impending change to TCs */
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (!event) {
+		ret = -ENOMEM;
+		goto free_cfg;
+	}
+
+	set_bit(IIDC_EVENT_BEFORE_TC_CHANGE, event->type);
+	ice_send_event_to_aux(pf, event);
+	kfree(event);
+
+	/* avoid race conditions by holding the lock while disabling and
+	 * re-enabling the VSI
+	 */
+	if (!locked)
+		rtnl_lock();
+
+	/* disable VSIs affected by DCB changes */
+	ice_dcb_ena_dis_vsi(pf, false, true);
+
+	memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
+	memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+	memcpy(&new_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec));
+
+	/* Only send new config to HW if we are in SW LLDP mode. Otherwise,
+	 * the new config came from the HW in the first place.
+	 */
+	if (pf->hw.port_info->qos_cfg.is_sw_lldp) {
+		ret = ice_set_dcb_cfg(pf->hw.port_info);
+		if (ret) {
+			dev_err(dev, "Set DCB Config failed\n");
+			/* Restore previous settings to local config */
+			memcpy(curr_cfg, old_cfg, sizeof(*curr_cfg));
+			goto out;
+		}
+	}
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto out;
+	}
+
+	ice_pf_dcb_recfg(pf, false);
+
+out:
+	/* enable previously downed VSIs */
+	ice_dcb_ena_dis_vsi(pf, true, true);
+	if (!locked)
+		rtnl_unlock();
+free_cfg:
+	kfree(old_cfg);
+	return ret;
+}
+
+/**
+ * ice_cfg_etsrec_defaults - Set default ETS recommended DCB config
+ * @pi: port information structure
+ */
+static void ice_cfg_etsrec_defaults(struct ice_port_info *pi)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+	u8 i;
+
+	/* Ensure ETS recommended DCB configuration is not already set */
+	if (dcbcfg->etsrec.maxtcs)
+		return;
+
+	/* In CEE mode, set the default to 1 TC */
+	dcbcfg->etsrec.maxtcs = 1;
+	for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) {
+		dcbcfg->etsrec.tcbwtable[i] = i ? 0 : 100;
+		dcbcfg->etsrec.tsatable[i] = i ? ICE_IEEE_TSA_STRICT :
+						 ICE_IEEE_TSA_ETS;
+	}
+}
+
+/**
+ * ice_dcb_need_recfg - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ */
+static bool
+ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		   struct ice_dcbx_cfg *new_cfg)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	bool need_reconfig = false;
+
+	/* Check if ETS configuration has changed */
+	if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
+		   sizeof(new_cfg->etscfg))) {
+		/* If Priority Table has changed reconfig is needed */
+		if (memcmp(&new_cfg->etscfg.prio_table,
+			   &old_cfg->etscfg.prio_table,
+			   sizeof(new_cfg->etscfg.prio_table))) {
+			need_reconfig = true;
+			dev_dbg(dev, "ETS UP2TC changed.\n");
+		}
+
+		if (memcmp(&new_cfg->etscfg.tcbwtable,
+			   &old_cfg->etscfg.tcbwtable,
+			   sizeof(new_cfg->etscfg.tcbwtable)))
+			dev_dbg(dev, "ETS TC BW Table changed.\n");
+
+		if (memcmp(&new_cfg->etscfg.tsatable,
+			   &old_cfg->etscfg.tsatable,
+			   sizeof(new_cfg->etscfg.tsatable)))
+			dev_dbg(dev, "ETS TSA Table changed.\n");
+	}
+
+	/* Check if PFC configuration has changed */
+	if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
+		need_reconfig = true;
+		dev_dbg(dev, "PFC config change detected.\n");
+	}
+
+	/* Check if APP Table has changed */
+	if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
+		need_reconfig = true;
+		dev_dbg(dev, "APP Table change detected.\n");
+	}
+
+	dev_dbg(dev, "dcb need_reconfig=%d\n", need_reconfig);
+	return need_reconfig;
+}
+
+/**
+ * ice_dcb_rebuild - rebuild DCB post reset
+ * @pf: physical function instance
+ */
+void ice_dcb_rebuild(struct ice_pf *pf)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_dcbx_cfg *err_cfg;
+	int ret;
+
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto dcb_error;
+	}
+
+	mutex_lock(&pf->tc_mutex);
+
+	if (!pf->hw.port_info->qos_cfg.is_sw_lldp)
+		ice_cfg_etsrec_defaults(pf->hw.port_info);
+
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret) {
+		dev_err(dev, "Failed to set DCB config in rebuild\n");
+		goto dcb_error;
+	}
+
+	if (!pf->hw.port_info->qos_cfg.is_sw_lldp) {
+		ret = ice_cfg_lldp_mib_change(&pf->hw, true);
+		if (ret && !pf->hw.port_info->qos_cfg.is_sw_lldp) {
+			dev_err(dev, "Failed to register for MIB changes\n");
+			goto dcb_error;
+		}
+	}
+
+	dev_info(dev, "DCB info restored\n");
+	ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto dcb_error;
+	}
+
+	mutex_unlock(&pf->tc_mutex);
+
+	return;
+
+dcb_error:
+	dev_err(dev, "Disabling DCB until new settings occur\n");
+	err_cfg = kzalloc(sizeof(*err_cfg), GFP_KERNEL);
+	if (!err_cfg) {
+		mutex_unlock(&pf->tc_mutex);
+		return;
+	}
+
+	err_cfg->etscfg.willing = true;
+	err_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
+	err_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+	memcpy(&err_cfg->etsrec, &err_cfg->etscfg, sizeof(err_cfg->etsrec));
+	/* Coverity warns the return code of ice_pf_dcb_cfg() is not checked
+	 * here as is done for other calls to that function. That check is
+	 * not necessary since this is in this function's error cleanup path.
+	 * Suppress the Coverity warning with the following comment...
+	 */
+	/* coverity[check_return] */
+	ice_pf_dcb_cfg(pf, err_cfg, false);
+	kfree(err_cfg);
+
+	mutex_unlock(&pf->tc_mutex);
+}
+
+/**
+ * ice_dcb_init_cfg - set the initial DCB config in SW
+ * @pf: PF to apply config to
+ * @locked: Is the RTNL held
+ */
+static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
+{
+	struct ice_dcbx_cfg *newcfg;
+	struct ice_port_info *pi;
+	int ret = 0;
+
+	pi = pf->hw.port_info;
+	newcfg = kmemdup(&pi->qos_cfg.local_dcbx_cfg, sizeof(*newcfg),
+			 GFP_KERNEL);
+	if (!newcfg)
+		return -ENOMEM;
+
+	memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*newcfg));
+
+	dev_info(ice_pf_to_dev(pf), "Configuring initial DCB values\n");
+	if (ice_pf_dcb_cfg(pf, newcfg, locked))
+		ret = -EINVAL;
+
+	kfree(newcfg);
+
+	return ret;
+}
+
+/**
+ * ice_dcb_sw_dflt_cfg - Apply a default DCB config
+ * @pf: PF to apply config to
+ * @ets_willing: configure ETS willing
+ * @locked: was this function called with RTNL held
+ */
+int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct ice_dcbx_cfg *dcbcfg;
+	struct ice_port_info *pi;
+	struct ice_hw *hw;
+	int ret;
+
+	hw = &pf->hw;
+	pi = hw->port_info;
+	dcbcfg = kzalloc(sizeof(*dcbcfg), GFP_KERNEL);
+	if (!dcbcfg)
+		return -ENOMEM;
+
+	memset(&pi->qos_cfg.local_dcbx_cfg, 0, sizeof(*dcbcfg));
+
+	dcbcfg->etscfg.willing = ets_willing ? 1 : 0;
+	dcbcfg->etscfg.maxtcs = hw->func_caps.common_cap.maxtc;
+	dcbcfg->etscfg.tcbwtable[0] = 100;
+	dcbcfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+
+	memcpy(&dcbcfg->etsrec, &dcbcfg->etscfg,
+	       sizeof(dcbcfg->etsrec));
+	dcbcfg->etsrec.willing = 0;
+
+	dcbcfg->pfc.willing = 1;
+	dcbcfg->pfc.pfccap = hw->func_caps.common_cap.maxtc;
+
+	dcbcfg->numapps = 1;
+	dcbcfg->app[0].selector = ICE_APP_SEL_ETHTYPE;
+	dcbcfg->app[0].priority = 3;
+	dcbcfg->app[0].prot_id = ETH_P_FCOE;
+
+	ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
+	kfree(dcbcfg);
+	if (ret)
+		return ret;
+
+	return ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
+}
+
+/**
+ * ice_dcb_tc_contig - Check that TCs are contiguous
+ * @prio_table: pointer to priority table
+ *
+ * Check if TCs begin with TC0 and are contiguous
+ */
+static bool ice_dcb_tc_contig(u8 *prio_table)
+{
+	bool found_empty = false;
+	u8 used_tc = 0;
+	int i;
+
+	/* Create a bitmap of used TCs */
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+		used_tc |= BIT(prio_table[i]);
+
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
+		if (used_tc & BIT(i)) {
+			if (found_empty)
+				return false;
+		} else {
+			found_empty = true;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * ice_dcb_noncontig_cfg - Configure DCB for non-contiguous TCs
+ * @pf: pointer to the PF struct
+ *
+ * If non-contiguous TCs, then configure SW DCB with TC0 and ETS non-willing
+ */
+static int ice_dcb_noncontig_cfg(struct ice_pf *pf)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret;
+
+	/* Configure SW DCB default with ETS non-willing */
+	ret = ice_dcb_sw_dflt_cfg(pf, false, true);
+	if (ret) {
+		dev_err(dev, "Failed to set local DCB config %d\n", ret);
+		return ret;
+	}
+
+	/* Reconfigure with ETS willing so that FW will send LLDP MIB event */
+	dcbcfg->etscfg.willing = 1;
+	ret = ice_set_dcb_cfg(pf->hw.port_info);
+	if (ret)
+		dev_err(dev, "Failed to set DCB to unwilling\n");
+
+	return ret;
+}
+
+/**
+ * ice_pf_dcb_recfg - Reconfigure all VEBs and VSIs
+ * @pf: pointer to the PF struct
+ * @locked: is adev device lock held
+ *
+ * Assumed caller has already disabled all VSIs before
+ * calling this function. Reconfiguring DCB based on
+ * local_dcbx_cfg.
+ */
+void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked)
+{
+	struct ice_dcbx_cfg *dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	struct iidc_event *event;
+	u8 tc_map = 0;
+	int v, ret;
+
+	/* Update each VSI */
+	ice_for_each_vsi(pf, v) {
+		struct ice_vsi *vsi = pf->vsi[v];
+
+		if (!vsi)
+			continue;
+
+		if (vsi->type == ICE_VSI_PF) {
+			tc_map = ice_dcb_get_ena_tc(dcbcfg);
+
+			/* If DCBX request non-contiguous TC, then configure
+			 * default TC
+			 */
+			if (!ice_dcb_tc_contig(dcbcfg->etscfg.prio_table)) {
+				tc_map = ICE_DFLT_TRAFFIC_CLASS;
+				ice_dcb_noncontig_cfg(pf);
+			}
+		} else if (vsi->type == ICE_VSI_CHNL) {
+			tc_map = BIT(ice_get_first_droptc(vsi));
+		} else {
+			tc_map = ICE_DFLT_TRAFFIC_CLASS;
+		}
+
+		ret = ice_vsi_cfg_tc(vsi, tc_map);
+		if (ret) {
+			dev_err(ice_pf_to_dev(pf), "Failed to config TC for VSI index: %d\n",
+				vsi->idx);
+			continue;
+		}
+		/* no need to proceed with remaining cfg if it is CHNL
+		 * or switchdev VSI
+		 */
+		if (vsi->type == ICE_VSI_CHNL ||
+		    vsi->type == ICE_VSI_SWITCHDEV_CTRL)
+			continue;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		if (vsi->type == ICE_VSI_PF)
+			ice_dcbnl_set_all(vsi);
+	}
+	if (!locked) {
+		/* Notify the AUX drivers that TC change is finished */
+		event = kzalloc(sizeof(*event), GFP_KERNEL);
+		if (!event)
+			return;
+
+		set_bit(IIDC_EVENT_AFTER_TC_CHANGE, event->type);
+		ice_send_event_to_aux(pf, event);
+		kfree(event);
+	}
+}
+
+/**
+ * ice_init_pf_dcb - initialize DCB for a PF
+ * @pf: PF to initialize DCB for
+ * @locked: Was function called with RTNL held
+ */
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_port_info *port_info;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	port_info = hw->port_info;
+
+	err = ice_init_dcb(hw, false);
+	if (err && !port_info->qos_cfg.is_sw_lldp) {
+		dev_err(dev, "Error initializing DCB %d\n", err);
+		goto dcb_init_err;
+	}
+
+	dev_info(dev, "DCB is enabled in the hardware, max number of TCs supported on this port are %d\n",
+		 pf->hw.func_caps.common_cap.maxtc);
+	if (err) {
+		struct ice_vsi *pf_vsi;
+
+		/* FW LLDP is disabled, activate SW DCBX/LLDP mode */
+		dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n");
+		clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+		err = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_VLAN_BASED_PFC,
+					  NULL);
+		if (err)
+			dev_info(dev, "Failed to set VLAN PFC mode\n");
+
+		err = ice_dcb_sw_dflt_cfg(pf, true, locked);
+		if (err) {
+			dev_err(dev, "Failed to set local DCB config %d\n",
+				err);
+			err = -EIO;
+			goto dcb_init_err;
+		}
+
+		/* If the FW DCBX engine is not running then Rx LLDP packets
+		 * need to be redirected up the stack.
+		 */
+		pf_vsi = ice_get_main_vsi(pf);
+		if (!pf_vsi) {
+			dev_err(dev, "Failed to set local DCB config\n");
+			err = -EIO;
+			goto dcb_init_err;
+		}
+
+		ice_cfg_sw_lldp(pf_vsi, false, true);
+
+		pf->dcbx_cap = ice_dcb_get_mode(port_info, true);
+		return 0;
+	}
+
+	set_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+
+	/* DCBX/LLDP enabled in FW, set DCBNL mode advertisement */
+	pf->dcbx_cap = ice_dcb_get_mode(port_info, false);
+
+	err = ice_dcb_init_cfg(pf, locked);
+	if (err)
+		goto dcb_init_err;
+
+	return 0;
+
+dcb_init_err:
+	dev_err(dev, "DCB init failed\n");
+	return err;
+}
+
+/**
+ * ice_update_dcb_stats - Update DCB stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_dcb_stats(struct ice_pf *pf)
+{
+	struct ice_hw_port_stats *prev_ps, *cur_ps;
+	struct ice_hw *hw = &pf->hw;
+	u8 port;
+	int i;
+
+	port = hw->port_info->lport;
+	prev_ps = &pf->stats_prev;
+	cur_ps = &pf->stats;
+
+	if (ice_is_reset_in_progress(pf->state))
+		pf->stat_prev_loaded = false;
+
+	for (i = 0; i < 8; i++) {
+		ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xoff_rx[i],
+				  &cur_ps->priority_xoff_rx[i]);
+		ice_stat_update32(hw, GLPRT_PXONRXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_rx[i],
+				  &cur_ps->priority_xon_rx[i]);
+		ice_stat_update32(hw, GLPRT_PXONTXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_tx[i],
+				  &cur_ps->priority_xon_tx[i]);
+		ice_stat_update32(hw, GLPRT_PXOFFTXC(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xoff_tx[i],
+				  &cur_ps->priority_xoff_tx[i]);
+		ice_stat_update32(hw, GLPRT_RXON2OFFCNT(port, i),
+				  pf->stat_prev_loaded,
+				  &prev_ps->priority_xon_2_xoff[i],
+				  &cur_ps->priority_xon_2_xoff[i]);
+	}
+}
+
+/**
+ * ice_tx_prepare_vlan_flags_dcb - prepare VLAN tagging for DCB
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * This should not be called if the outer VLAN is software offloaded as the VLAN
+ * tag will already be configured with the correct ID and priority bits
+ */
+void
+ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
+			      struct ice_tx_buf *first)
+{
+	struct sk_buff *skb = first->skb;
+
+	if (!test_bit(ICE_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
+		return;
+
+	/* Insert 802.1p priority into VLAN header */
+	if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN ||
+	     first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) ||
+	    skb->priority != TC_PRIO_CONTROL) {
+		first->vid &= ~VLAN_PRIO_MASK;
+		/* Mask the lower 3 bits to set the 802.1p priority */
+		first->vid |= (skb->priority << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
+		/* if this is not already set it means a VLAN 0 + priority needs
+		 * to be offloaded
+		 */
+		if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
+			first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
+		else
+			first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+	}
+}
+
+/**
+ * ice_dcb_is_mib_change_pending - Check if MIB change is pending
+ * @state: MIB change state
+ */
+static bool ice_dcb_is_mib_change_pending(u8 state)
+{
+	return ICE_AQ_LLDP_MIB_CHANGE_PENDING ==
+		FIELD_GET(ICE_AQ_LLDP_MIB_CHANGE_STATE_M, state);
+}
+
+/**
+ * ice_dcb_process_lldp_set_mib_change - Process MIB change
+ * @pf: ptr to ice_pf
+ * @event: pointer to the admin queue receive event
+ */
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+				    struct ice_rq_event_info *event)
+{
+	struct ice_aqc_port_ets_elem buf = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_aqc_lldp_get_mib *mib;
+	struct ice_dcbx_cfg tmp_dcbx_cfg;
+	bool pending_handled = true;
+	bool need_reconfig = false;
+	struct ice_port_info *pi;
+	u8 mib_type;
+	int ret;
+
+	/* Not DCB capable or capability disabled */
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
+		dev_dbg(dev, "MIB Change Event in HOST mode\n");
+		return;
+	}
+
+	pi = pf->hw.port_info;
+	mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+
+	/* Ignore if event is not for Nearest Bridge */
+	mib_type = FIELD_GET(ICE_AQ_LLDP_BRID_TYPE_M, mib->type);
+	dev_dbg(dev, "LLDP event MIB bridge type 0x%x\n", mib_type);
+	if (mib_type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
+		return;
+
+	/* A pending change event contains accurate config information, and
+	 * the FW setting has not been updaed yet, so detect if change is
+	 * pending to determine where to pull config information from
+	 * (FW vs event)
+	 */
+	if (ice_dcb_is_mib_change_pending(mib->state))
+		pending_handled = false;
+
+	/* Check MIB Type and return if event for Remote MIB update */
+	mib_type = FIELD_GET(ICE_AQ_LLDP_MIB_TYPE_M, mib->type);
+	dev_dbg(dev, "LLDP event mib type %s\n", mib_type ? "remote" : "local");
+	if (mib_type == ICE_AQ_LLDP_MIB_REMOTE) {
+		/* Update the remote cached instance and return */
+		if (!pending_handled) {
+			ice_get_dcb_cfg_from_mib_change(pi, event);
+		} else {
+			ret =
+			  ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+					     ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
+					     &pi->qos_cfg.remote_dcbx_cfg);
+			if (ret)
+				dev_dbg(dev, "Failed to get remote DCB config\n");
+		}
+		return;
+	}
+
+	/* That a DCB change has happened is now determined */
+	mutex_lock(&pf->tc_mutex);
+
+	/* store the old configuration */
+	tmp_dcbx_cfg = pi->qos_cfg.local_dcbx_cfg;
+
+	/* Reset the old DCBX configuration data */
+	memset(&pi->qos_cfg.local_dcbx_cfg, 0,
+	       sizeof(pi->qos_cfg.local_dcbx_cfg));
+
+	/* Get updated DCBX data from firmware */
+	if (!pending_handled) {
+		ice_get_dcb_cfg_from_mib_change(pi, event);
+	} else {
+		ret = ice_get_dcb_cfg(pi);
+		if (ret) {
+			dev_err(dev, "Failed to get DCB config\n");
+			goto out;
+		}
+	}
+
+	/* No change detected in DCBX configs */
+	if (!memcmp(&tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg,
+		    sizeof(tmp_dcbx_cfg))) {
+		dev_dbg(dev, "No change detected in DCBX configuration.\n");
+		goto out;
+	}
+
+	pf->dcbx_cap = ice_dcb_get_mode(pi, false);
+
+	need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
+					   &pi->qos_cfg.local_dcbx_cfg);
+	ice_dcbnl_flush_apps(pf, &tmp_dcbx_cfg, &pi->qos_cfg.local_dcbx_cfg);
+	if (!need_reconfig)
+		goto out;
+
+	/* Enable DCB tagging only when more than one TC */
+	if (ice_dcb_get_num_tc(&pi->qos_cfg.local_dcbx_cfg) > 1) {
+		dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n");
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n");
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	/* Send Execute Pending MIB Change event if it is a Pending event */
+	if (!pending_handled) {
+		ice_lldp_execute_pending_mib(&pf->hw);
+		pending_handled = true;
+	}
+
+	rtnl_lock();
+	/* disable VSIs affected by DCB changes */
+	ice_dcb_ena_dis_vsi(pf, false, true);
+
+	ret = ice_query_port_ets(pi, &buf, sizeof(buf), NULL);
+	if (ret) {
+		dev_err(dev, "Query Port ETS failed\n");
+		goto unlock_rtnl;
+	}
+
+	/* changes in configuration update VSI */
+	ice_pf_dcb_recfg(pf, false);
+
+	/* enable previously downed VSIs */
+	ice_dcb_ena_dis_vsi(pf, true, true);
+unlock_rtnl:
+	rtnl_unlock();
+out:
+	mutex_unlock(&pf->tc_mutex);
+
+	/* Send Execute Pending MIB Change event if it is a Pending event */
+	if (!pending_handled)
+		ice_lldp_execute_pending_mib(&pf->hw);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
new file mode 100644
index 0000000000..800879a88c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_LIB_H_
+#define _ICE_DCB_LIB_H_
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+
+#ifdef CONFIG_DCB
+#define ICE_TC_MAX_BW		100 /* Default Max BW percentage */
+#define ICE_DCB_HW_CHG_RST	0 /* DCB configuration changed with reset */
+#define ICE_DCB_NO_HW_CHG	1 /* DCB configuration did not change */
+#define ICE_DCB_HW_CHG		2 /* DCB configuration changed, no reset */
+
+void ice_dcb_rebuild(struct ice_pf *pf);
+int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked);
+u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi);
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue);
+u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
+int
+ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
+int ice_dcb_bwchk(struct ice_pf *pf, struct ice_dcbx_cfg *dcbcfg);
+void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked);
+void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
+void ice_update_dcb_stats(struct ice_pf *pf);
+void
+ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring,
+			      struct ice_tx_buf *first);
+void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
+				    struct ice_rq_event_info *event);
+/**
+ * ice_find_q_in_range
+ * @low: start of queue range for a TC i.e. offset of TC
+ * @high: start of queue for next TC
+ * @tx_q: hung_queue/tx_queue
+ *
+ * finds if queue 'tx_q' falls between the two offsets of any given TC
+ */
+static inline bool ice_find_q_in_range(u16 low, u16 high, unsigned int tx_q)
+{
+	return (tx_q >= low) && (tx_q < high);
+}
+
+static inline void
+ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc)
+{
+	tlan_ctx->cgd_num = dcb_tc;
+}
+
+static inline bool ice_is_dcb_active(struct ice_pf *pf)
+{
+	return (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags) ||
+		test_bit(ICE_FLAG_DCB_ENA, pf->flags));
+}
+
+static inline u8 ice_get_pfc_mode(struct ice_pf *pf)
+{
+	return pf->hw.port_info->qos_cfg.local_dcbx_cfg.pfc_mode;
+}
+
+#else
+static inline void ice_dcb_rebuild(struct ice_pf *pf) { }
+
+static inline void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi)
+{
+	vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+	vsi->tc_cfg.numtc = 1;
+}
+
+static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+	return ICE_DFLT_TRAFFIC_CLASS;
+}
+
+static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
+{
+	return 1;
+}
+
+static inline u8
+ice_dcb_get_tc(struct ice_vsi __always_unused *vsi,
+	       int __always_unused queue_index)
+{
+	return 0;
+}
+
+static inline int
+ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
+{
+	dev_dbg(ice_pf_to_dev(pf), "DCB not supported\n");
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_pf_dcb_cfg(struct ice_pf __always_unused *pf,
+	       struct ice_dcbx_cfg __always_unused *new_cfg,
+	       bool __always_unused locked)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring __always_unused *tx_ring,
+			      struct ice_tx_buf __always_unused *first)
+{
+	return 0;
+}
+
+static inline bool ice_is_dcb_active(struct ice_pf __always_unused *pf)
+{
+	return false;
+}
+
+static inline bool
+ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf,
+			  unsigned int __always_unused txqueue)
+{
+	return false;
+}
+
+static inline u8 ice_get_pfc_mode(struct ice_pf *pf)
+{
+	return 0;
+}
+
+static inline void ice_pf_dcb_recfg(struct ice_pf *pf, bool locked) { }
+static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { }
+static inline void ice_update_dcb_stats(struct ice_pf *pf) { }
+static inline void
+ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { }
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
new file mode 100644
index 0000000000..e1fbc6de45
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -0,0 +1,1146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_dcb.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include <net/dcbnl.h>
+
+/**
+ * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info
+ * @netdev: device associated with interface that needs reset
+ */
+static void ice_dcbnl_devreset(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	while (ice_is_reset_in_progress(pf->state))
+		usleep_range(1000, 2000);
+
+	dev_close(netdev);
+	netdev_state_change(netdev);
+	dev_open(netdev, NULL);
+	netdev_state_change(netdev);
+}
+
+/**
+ * ice_dcbnl_getets - retrieve local ETS configuration
+ * @netdev: the relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_getets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	dcbxcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	ets->willing = dcbxcfg->etscfg.willing;
+	ets->ets_cap = dcbxcfg->etscfg.maxtcs;
+	ets->cbs = dcbxcfg->etscfg.cbs;
+	memcpy(ets->tc_tx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_rx_bw, dcbxcfg->etscfg.tcbwtable, sizeof(ets->tc_rx_bw));
+	memcpy(ets->tc_tsa, dcbxcfg->etscfg.tsatable, sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, dcbxcfg->etscfg.prio_table, sizeof(ets->prio_tc));
+	memcpy(ets->tc_reco_bw, dcbxcfg->etsrec.tcbwtable,
+	       sizeof(ets->tc_reco_bw));
+	memcpy(ets->tc_reco_tsa, dcbxcfg->etsrec.tsatable,
+	       sizeof(ets->tc_reco_tsa));
+	memcpy(ets->reco_prio_tc, dcbxcfg->etscfg.prio_table,
+	       sizeof(ets->reco_prio_tc));
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setets - set IEEE ETS configuration
+ * @netdev: pointer to relevant netdev
+ * @ets: struct to hold ETS configuration
+ */
+static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int bwcfg = 0, bwrec = 0;
+	int err, i;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return -EINVAL;
+	}
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg->etscfg.willing = ets->willing;
+	new_cfg->etscfg.cbs = ets->cbs;
+	ice_for_each_traffic_class(i) {
+		new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i];
+		bwcfg += ets->tc_tx_bw[i];
+		new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i];
+		if (new_cfg->pfc_mode == ICE_QOS_MODE_VLAN) {
+			/* in DSCP mode up->tc mapping cannot change */
+			new_cfg->etscfg.prio_table[i] = ets->prio_tc[i];
+			new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i];
+		}
+		new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i];
+		bwrec += ets->tc_reco_bw[i];
+		new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i];
+	}
+
+	if (ice_dcb_bwchk(pf, new_cfg)) {
+		err = -EINVAL;
+		goto ets_out;
+	}
+
+	new_cfg->etscfg.maxtcs = pf->hw.func_caps.common_cap.maxtc;
+
+	if (!bwcfg)
+		new_cfg->etscfg.tcbwtable[0] = 100;
+
+	if (!bwrec)
+		new_cfg->etsrec.tcbwtable[0] = 100;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+
+ets_out:
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_getnumtcs - Get max number of traffic classes supported
+ * @dev: pointer to netdev struct
+ * @tcid: TC ID
+ * @num: total number of TCs supported by the adapter
+ *
+ * Return the total number of TCs supported
+ */
+static int
+ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return -EINVAL;
+
+	*num = pf->hw.func_caps.common_cap.maxtc;
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getdcbx - retrieve current DCBX capability
+ * @netdev: pointer to the netdev struct
+ */
+static u8 ice_dcbnl_getdcbx(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	return pf->dcbx_cap;
+}
+
+/**
+ * ice_dcbnl_setdcbx - set required DCBX capability
+ * @netdev: the corresponding netdev
+ * @mode: required mode
+ */
+static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_qos_cfg *qos_cfg;
+
+	/* if FW LLDP agent is running, DCBNL not allowed to change mode */
+	if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* No support for LLD_MANAGED modes or CEE+IEEE */
+	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+	    !(mode & DCB_CAP_DCBX_HOST))
+		return ICE_DCB_NO_HW_CHG;
+
+	/* Already set to the given mode no change */
+	if (mode == pf->dcbx_cap)
+		return ICE_DCB_NO_HW_CHG;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return ICE_DCB_NO_HW_CHG;
+	}
+
+	qos_cfg = &pf->hw.port_info->qos_cfg;
+
+	/* DSCP configuration is not DCBx negotiated */
+	if (qos_cfg->local_dcbx_cfg.pfc_mode == ICE_QOS_MODE_DSCP)
+		return ICE_DCB_NO_HW_CHG;
+
+	pf->dcbx_cap = mode;
+
+	if (mode & DCB_CAP_DCBX_VER_CEE)
+		qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE;
+	else
+		qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE;
+
+	dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode);
+	return ICE_DCB_HW_CHG_RST;
+}
+
+/**
+ * ice_dcbnl_get_perm_hw_addr - MAC address used by DCBX
+ * @netdev: pointer to netdev struct
+ * @perm_addr: buffer to return permanent MAC address
+ */
+static void ice_dcbnl_get_perm_hw_addr(struct net_device *netdev, u8 *perm_addr)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	int i, j;
+
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+	for (i = 0; i < netdev->addr_len; i++)
+		perm_addr[i] = pi->mac.perm_addr[i];
+
+	for (j = 0; j < netdev->addr_len; j++, i++)
+		perm_addr[i] = pi->mac.perm_addr[j];
+}
+
+/**
+ * ice_get_pfc_delay - Retrieve PFC Link Delay
+ * @hw: pointer to HW struct
+ * @delay: holds the PFC Link Delay value
+ */
+static void ice_get_pfc_delay(struct ice_hw *hw, u16 *delay)
+{
+	u32 val;
+
+	val = rd32(hw, PRTDCB_GENC);
+	*delay = (u16)((val & PRTDCB_GENC_PFCLDA_M) >> PRTDCB_GENC_PFCLDA_S);
+}
+
+/**
+ * ice_dcbnl_getpfc - retrieve local IEEE PFC config
+ * @netdev: pointer to netdev struct
+ * @pfc: struct to hold PFC info
+ */
+static int ice_dcbnl_getpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+	struct ice_dcbx_cfg *dcbxcfg;
+	int i;
+
+	dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
+	pfc->pfc_cap = dcbxcfg->pfc.pfccap;
+	pfc->pfc_en = dcbxcfg->pfc.pfcena;
+	pfc->mbc = dcbxcfg->pfc.mbc;
+	ice_get_pfc_delay(&pf->hw, &pfc->delay);
+
+	ice_for_each_traffic_class(i) {
+		pfc->requests[i] = pf->stats.priority_xoff_tx[i];
+		pfc->indications[i] = pf->stats.priority_xoff_rx[i];
+	}
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_setpfc - set local IEEE PFC config
+ * @netdev: pointer to relevant netdev
+ * @pfc: pointer to struct holding PFC config
+ */
+static int ice_dcbnl_setpfc(struct net_device *netdev, struct ieee_pfc *pfc)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	if (pfc->pfc_cap)
+		new_cfg->pfc.pfccap = pfc->pfc_cap;
+	else
+		new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+
+	new_cfg->pfc.pfcena = pfc->pfc_en;
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+	if (err == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	if (err == ICE_DCB_NO_HW_CHG)
+		err = ICE_DCB_HW_CHG_RST;
+	mutex_unlock(&pf->tc_mutex);
+	return err;
+}
+
+/**
+ * ice_dcbnl_get_pfc_cfg - Get CEE PFC config
+ * @netdev: pointer to netdev struct
+ * @prio: corresponding user priority
+ * @setting: the PFC setting for given priority
+ */
+static void
+ice_dcbnl_get_pfc_cfg(struct net_device *netdev, int prio, u8 *setting)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*setting = (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena >> prio) & 0x1;
+	dev_dbg(ice_pf_to_dev(pf), "Get PFC Config up=%d, setting=%d, pfcenable=0x%x\n",
+		prio, *setting, pi->qos_cfg.local_dcbx_cfg.pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_set_pfc_cfg - Set CEE PFC config
+ * @netdev: the corresponding netdev
+ * @prio: User Priority
+ * @set: PFC setting to apply
+ */
+static void ice_dcbnl_set_pfc_cfg(struct net_device *netdev, int prio, u8 set)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return;
+	}
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	new_cfg->pfc.pfccap = pf->hw.func_caps.common_cap.maxtc;
+	if (set)
+		new_cfg->pfc.pfcena |= BIT(prio);
+	else
+		new_cfg->pfc.pfcena &= ~BIT(prio);
+
+	dev_dbg(ice_pf_to_dev(pf), "Set PFC config UP:%d set:%d pfcena:0x%x\n",
+		prio, set, new_cfg->pfc.pfcena);
+}
+
+/**
+ * ice_dcbnl_getpfcstate - get CEE PFC mode
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	/* Return enabled if any UP enabled for PFC */
+	if (pi->qos_cfg.local_dcbx_cfg.pfc.pfcena)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getstate - get DCB enabled state
+ * @netdev: pointer to netdev struct
+ */
+static u8 ice_dcbnl_getstate(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	u8 state = 0;
+
+	state = test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+
+	dev_dbg(ice_pf_to_dev(pf), "DCB enabled state = %d\n", state);
+	return state;
+}
+
+/**
+ * ice_dcbnl_setstate - Set CEE DCB state
+ * @netdev: pointer to relevant netdev
+ * @state: state value to set
+ */
+static u8 ice_dcbnl_setstate(struct net_device *netdev, u8 state)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return ICE_DCB_NO_HW_CHG;
+	}
+
+	/* Nothing to do */
+	if (!!state == test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return ICE_DCB_NO_HW_CHG;
+
+	if (state) {
+		set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+		memcpy(&pf->hw.port_info->qos_cfg.desired_dcbx_cfg,
+		       &pf->hw.port_info->qos_cfg.local_dcbx_cfg,
+		       sizeof(struct ice_dcbx_cfg));
+	} else {
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	}
+
+	return ICE_DCB_HW_CHG;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_tx - get CEE PG Tx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: traffic priority type
+ * @pgid: the BW group ID the traffic class belongs to
+ * @bw_pct: BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
+	dev_dbg(ice_pf_to_dev(pf), "Get PG config prio=%d tc=%d\n", prio,
+		*pgid);
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_tx - set CEE PG Tx config
+ * @netdev: pointer to relevant netdev
+ * @tc: the corresponding traffic class
+ * @prio_type: the traffic priority type
+ * @bwg_id: the BW group ID the TC belongs to
+ * @bw_pct: the BW perventage for the BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+			   u8 __always_unused prio_type,
+			   u8 __always_unused bwg_id,
+			   u8 __always_unused bw_pct, u8 up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int i;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (tc >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return;
+	}
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	/* prio_type, bwg_id and bw_pct per UP are not supported */
+
+	ice_for_each_traffic_class(i) {
+		if (up_map & BIT(i))
+			new_cfg->etscfg.prio_table[i] = tc;
+	}
+	new_cfg->etscfg.tsatable[tc] = ICE_IEEE_TSA_ETS;
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_tx - Get CEE PGBW config
+ * @netdev: pointer to the netdev struct
+ * @pgid: corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	*bw_pct = pi->qos_cfg.local_dcbx_cfg.etscfg.tcbwtable[pgid];
+	dev_dbg(ice_pf_to_dev(pf), "Get PG BW config tc=%d bw_pct=%d\n",
+		pgid, *bw_pct);
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_tx - set CEE PG Tx BW config
+ * @netdev: the corresponding netdev
+ * @pgid: Correspongind traffic class
+ * @bw_pct: the BW percentage for the specified TC
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, u8 bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (pgid >= ICE_MAX_TRAFFIC_CLASS)
+		return;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return;
+	}
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	new_cfg->etscfg.tcbwtable[pgid] = bw_pct;
+}
+
+/**
+ * ice_dcbnl_get_pg_tc_cfg_rx - Get CEE PG Rx config
+ * @netdev: pointer to netdev struct
+ * @prio: the corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: the BW percentage for the corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int prio,
+			   u8 __always_unused *prio_type, u8 *pgid,
+			   u8 __always_unused *bw_pct,
+			   u8 __always_unused *up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_port_info *pi = pf->hw.port_info;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	if (prio >= ICE_MAX_USER_PRIORITY)
+		return;
+
+	*pgid = pi->qos_cfg.local_dcbx_cfg.etscfg.prio_table[prio];
+}
+
+/**
+ * ice_dcbnl_set_pg_tc_cfg_rx
+ * @netdev: relevant netdev struct
+ * @prio: corresponding user priority
+ * @prio_type: the traffic priority type
+ * @pgid: the PG ID
+ * @bw_pct: BW percentage for corresponding BWG
+ * @up_map: prio mapped to corresponding TC
+ *
+ * lldpad requires this function pointer to be non-NULL to complete CEE config.
+ */
+static void
+ice_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev,
+			   int __always_unused prio,
+			   u8 __always_unused prio_type,
+			   u8 __always_unused pgid,
+			   u8 __always_unused bw_pct,
+			   u8 __always_unused up_map)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	dev_dbg(ice_pf_to_dev(pf), "Rx TC PG Config Not Supported.\n");
+}
+
+/**
+ * ice_dcbnl_get_pg_bwg_cfg_rx - Get CEE PG BW Rx config
+ * @netdev: pointer to netdev struct
+ * @pgid: the corresponding traffic class
+ * @bw_pct: the BW percentage for the corresponding TC
+ */
+static void
+ice_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+			    u8 *bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return;
+
+	*bw_pct = 0;
+}
+
+/**
+ * ice_dcbnl_set_pg_bwg_cfg_rx
+ * @netdev: the corresponding netdev
+ * @pgid: corresponding TC
+ * @bw_pct: BW percentage for given TC
+ *
+ * lldpad requires this function pointer to be non-NULL to complete CEE config.
+ */
+static void
+ice_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int __always_unused pgid,
+			    u8 __always_unused bw_pct)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	dev_dbg(ice_pf_to_dev(pf), "Rx BWG PG Config Not Supported.\n");
+}
+
+/**
+ * ice_dcbnl_get_cap - Get DCBX capabilities of adapter
+ * @netdev: pointer to netdev struct
+ * @capid: the capability type
+ * @cap: the capability value
+ */
+static u8 ice_dcbnl_get_cap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+		return ICE_DCB_NO_HW_CHG;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_GSP:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_BCN:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = pf->dcbx_cap;
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	dev_dbg(ice_pf_to_dev(pf), "DCBX Get Capability cap=%d capval=0x%x\n",
+		capid, *cap);
+	return 0;
+}
+
+/**
+ * ice_dcbnl_getapp - get CEE APP
+ * @netdev: pointer to netdev struct
+ * @idtype: the App selector
+ * @id: the App ethtype or port number
+ */
+static int ice_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct dcb_app app = {
+				.selector = idtype,
+				.protocol = id,
+			     };
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return -EINVAL;
+
+	return dcb_getapp(netdev, &app);
+}
+
+/**
+ * ice_dcbnl_find_app - Search for APP in given DCB config
+ * @cfg: struct to hold DCBX config
+ * @app: struct to hold app data to look for
+ */
+static bool
+ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg,
+		   struct ice_dcb_app_priority_table *app)
+{
+	unsigned int i;
+
+	for (i = 0; i < cfg->numapps; i++) {
+		if (app->selector == cfg->app[i].selector &&
+		    app->prot_id == cfg->app[i].prot_id &&
+		    app->priority == cfg->app[i].priority)
+			return true;
+	}
+
+	return false;
+}
+
+#define ICE_BYTES_PER_DSCP_VAL		8
+
+/**
+ * ice_dcbnl_setapp - set local IEEE App config
+ * @netdev: relevant netdev struct
+ * @app: struct to hold app config info
+ */
+static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcb_app_priority_table new_app;
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	u8 max_tc;
+	int ret;
+
+	/* ONLY DSCP APP TLVs have operational significance */
+	if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP)
+		return -EINVAL;
+
+	/* only allow APP TLVs in SW Mode */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
+		netdev_err(netdev, "can't do DSCP QoS when FW DCB agent active\n");
+		return -EINVAL;
+	}
+
+	if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	if (!ice_is_feature_supported(pf, ICE_F_DSCP))
+		return -EOPNOTSUPP;
+
+	if (app->protocol >= ICE_DSCP_NUM_VAL) {
+		netdev_err(netdev, "DSCP value 0x%04X out of range\n",
+			   app->protocol);
+		return -EINVAL;
+	}
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return -EINVAL;
+	}
+
+	max_tc = pf->hw.func_caps.common_cap.maxtc;
+	if (app->priority >= max_tc) {
+		netdev_err(netdev, "TC %d out of range, max TC %d\n",
+			   app->priority, max_tc);
+		return -EINVAL;
+	}
+
+	/* grab TC mutex */
+	mutex_lock(&pf->tc_mutex);
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+	old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	ret = dcb_ieee_setapp(netdev, app);
+	if (ret)
+		goto setapp_out;
+
+	if (test_and_set_bit(app->protocol, new_cfg->dscp_mapped)) {
+		netdev_err(netdev, "DSCP value 0x%04X already user mapped\n",
+			   app->protocol);
+		ret = dcb_ieee_delapp(netdev, app);
+		if (ret)
+			netdev_err(netdev, "Failed to delete re-mapping TLV\n");
+		ret = -EINVAL;
+		goto setapp_out;
+	}
+
+	new_app.selector = app->selector;
+	new_app.prot_id = app->protocol;
+	new_app.priority = app->priority;
+
+	/* If port is not in DSCP mode, need to set */
+	if (old_cfg->pfc_mode == ICE_QOS_MODE_VLAN) {
+		int i, j;
+
+		/* set DSCP mode */
+		ret = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_DSCP_BASED_PFC,
+					  NULL);
+		if (ret) {
+			netdev_err(netdev, "Failed to set DSCP PFC mode %d\n",
+				   ret);
+			goto setapp_out;
+		}
+		netdev_info(netdev, "Switched QoS to L3 DSCP mode\n");
+
+		new_cfg->pfc_mode = ICE_QOS_MODE_DSCP;
+
+		/* set default DSCP QoS values */
+		new_cfg->etscfg.willing = 0;
+		new_cfg->pfc.pfccap = max_tc;
+		new_cfg->pfc.willing = 0;
+
+		for (i = 0; i < max_tc; i++)
+			for (j = 0; j < ICE_BYTES_PER_DSCP_VAL; j++) {
+				int dscp, offset;
+
+				dscp = (i * max_tc) + j;
+				offset = max_tc * ICE_BYTES_PER_DSCP_VAL;
+
+				new_cfg->dscp_map[dscp] = i;
+				/* if less that 8 TCs supported */
+				if (max_tc < ICE_MAX_TRAFFIC_CLASS)
+					new_cfg->dscp_map[dscp + offset] = i;
+			}
+
+		new_cfg->etscfg.tcbwtable[0] = 100;
+		new_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
+		new_cfg->etscfg.prio_table[0] = 0;
+
+		for (i = 1; i < max_tc; i++) {
+			new_cfg->etscfg.tcbwtable[i] = 0;
+			new_cfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS;
+			new_cfg->etscfg.prio_table[i] = i;
+		}
+	} /* end of switching to DSCP mode */
+
+	/* apply new mapping for this DSCP value */
+	new_cfg->dscp_map[app->protocol] = app->priority;
+	new_cfg->app[new_cfg->numapps++] = new_app;
+
+	ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	/* return of zero indicates new cfg applied */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+	else
+		ret = ICE_DCB_NO_HW_CHG;
+
+setapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_delapp - Delete local IEEE App config
+ * @netdev: relevant netdev
+ * @app: struct to hold app too delete
+ *
+ * Will not delete first application required by the FW
+ */
+static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *old_cfg, *new_cfg;
+	unsigned int i, j;
+	int ret = 0;
+
+	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
+		netdev_err(netdev, "can't delete DSCP netlink app when FW DCB agent is active\n");
+		return -EINVAL;
+	}
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&pf->tc_mutex);
+	old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	ret = dcb_ieee_delapp(netdev, app);
+	if (ret)
+		goto delapp_out;
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	for (i = 0; i < new_cfg->numapps; i++) {
+		if (app->selector == new_cfg->app[i].selector &&
+		    app->protocol == new_cfg->app[i].prot_id &&
+		    app->priority == new_cfg->app[i].priority) {
+			new_cfg->app[i].selector = 0;
+			new_cfg->app[i].prot_id = 0;
+			new_cfg->app[i].priority = 0;
+			break;
+		}
+	}
+
+	/* Did not find DCB App */
+	if (i == new_cfg->numapps) {
+		ret = -EINVAL;
+		goto delapp_out;
+	}
+
+	new_cfg->numapps--;
+
+	for (j = i; j < new_cfg->numapps; j++) {
+		new_cfg->app[j].selector = old_cfg->app[j + 1].selector;
+		new_cfg->app[j].prot_id = old_cfg->app[j + 1].prot_id;
+		new_cfg->app[j].priority = old_cfg->app[j + 1].priority;
+	}
+
+	/* if not a DSCP APP TLV or DSCP is not supported, we are done */
+	if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP ||
+	    !ice_is_feature_supported(pf, ICE_F_DSCP)) {
+		ret = ICE_DCB_HW_CHG;
+		goto delapp_out;
+	}
+
+	/* if DSCP TLV, then need to address change in mapping */
+	clear_bit(app->protocol, new_cfg->dscp_mapped);
+	/* remap this DSCP value to default value */
+	new_cfg->dscp_map[app->protocol] = app->protocol %
+					   ICE_BYTES_PER_DSCP_VAL;
+
+	/* if the last DSCP mapping just got deleted, need to switch
+	 * to L2 VLAN QoS mode
+	 */
+	if (bitmap_empty(new_cfg->dscp_mapped, ICE_DSCP_NUM_VAL) &&
+	    new_cfg->pfc_mode == ICE_QOS_MODE_DSCP) {
+		ret = ice_aq_set_pfc_mode(&pf->hw,
+					  ICE_AQC_PFC_VLAN_BASED_PFC,
+					  NULL);
+		if (ret) {
+			netdev_info(netdev, "Failed to set VLAN PFC mode %d\n",
+				    ret);
+			goto delapp_out;
+		}
+		netdev_info(netdev, "Switched QoS to L2 VLAN mode\n");
+
+		new_cfg->pfc_mode = ICE_QOS_MODE_VLAN;
+
+		ret = ice_dcb_sw_dflt_cfg(pf, true, true);
+	} else {
+		ret = ice_pf_dcb_cfg(pf, new_cfg, true);
+	}
+
+	/* return of ICE_DCB_HW_CHG_RST indicates new cfg applied
+	 * and reset needs to be performed
+	 */
+	if (ret == ICE_DCB_HW_CHG_RST)
+		ice_dcbnl_devreset(netdev);
+
+	/* if the change was not siginificant enough to actually call
+	 * the reconfiguration flow, we still need to tell caller that
+	 * their request was successfully handled
+	 */
+	if (ret == ICE_DCB_NO_HW_CHG)
+		ret = ICE_DCB_HW_CHG;
+
+delapp_out:
+	mutex_unlock(&pf->tc_mutex);
+	return ret;
+}
+
+/**
+ * ice_dcbnl_cee_set_all - Commit CEE DCB settings to HW
+ * @netdev: the corresponding netdev
+ */
+static u8 ice_dcbnl_cee_set_all(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *new_cfg;
+	int err;
+
+	if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    !(pf->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return ICE_DCB_NO_HW_CHG;
+
+	if (pf->lag && pf->lag->bonded) {
+		netdev_err(netdev, "DCB changes not allowed when in a bond\n");
+		return ICE_DCB_NO_HW_CHG;
+	}
+
+	new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg;
+
+	mutex_lock(&pf->tc_mutex);
+
+	err = ice_pf_dcb_cfg(pf, new_cfg, true);
+
+	mutex_unlock(&pf->tc_mutex);
+	return (err != ICE_DCB_HW_CHG_RST) ? ICE_DCB_NO_HW_CHG : err;
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+	/* IEEE 802.1Qaz std */
+	.ieee_getets = ice_dcbnl_getets,
+	.ieee_setets = ice_dcbnl_setets,
+	.ieee_getpfc = ice_dcbnl_getpfc,
+	.ieee_setpfc = ice_dcbnl_setpfc,
+	.ieee_setapp = ice_dcbnl_setapp,
+	.ieee_delapp = ice_dcbnl_delapp,
+
+	/* CEE std */
+	.getstate = ice_dcbnl_getstate,
+	.setstate = ice_dcbnl_setstate,
+	.getpermhwaddr = ice_dcbnl_get_perm_hw_addr,
+	.setpgtccfgtx = ice_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx = ice_dcbnl_set_pg_bwg_cfg_tx,
+	.setpgtccfgrx = ice_dcbnl_set_pg_tc_cfg_rx,
+	.setpgbwgcfgrx = ice_dcbnl_set_pg_bwg_cfg_rx,
+	.getpgtccfgtx = ice_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx = ice_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx = ice_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx = ice_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg = ice_dcbnl_set_pfc_cfg,
+	.getpfccfg = ice_dcbnl_get_pfc_cfg,
+	.setall = ice_dcbnl_cee_set_all,
+	.getcap = ice_dcbnl_get_cap,
+	.getnumtcs = ice_dcbnl_getnumtcs,
+	.getpfcstate = ice_dcbnl_getpfcstate,
+	.getapp = ice_dcbnl_getapp,
+
+	/* DCBX configuration */
+	.getdcbx = ice_dcbnl_getdcbx,
+	.setdcbx = ice_dcbnl_setdcbx,
+};
+
+/**
+ * ice_dcbnl_set_all - set all the apps and ieee data from DCBX config
+ * @vsi: pointer to VSI struct
+ */
+void ice_dcbnl_set_all(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_dcbx_cfg *dcbxcfg;
+	struct ice_port_info *pi;
+	struct dcb_app sapp;
+	struct ice_pf *pf;
+	unsigned int i;
+
+	if (!netdev)
+		return;
+
+	pf = ice_netdev_to_pf(netdev);
+	pi = pf->hw.port_info;
+
+	/* SW DCB taken care of by SW Default Config */
+	if (pf->dcbx_cap & DCB_CAP_DCBX_HOST)
+		return;
+
+	/* DCB not enabled */
+	if (!test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		return;
+
+	dcbxcfg = &pi->qos_cfg.local_dcbx_cfg;
+
+	for (i = 0; i < dcbxcfg->numapps; i++) {
+		u8 prio, tc_map;
+
+		prio = dcbxcfg->app[i].priority;
+		tc_map = BIT(dcbxcfg->etscfg.prio_table[prio]);
+
+		/* Add APP only if the TC is enabled for this VSI */
+		if (tc_map & vsi->tc_cfg.ena_tc) {
+			sapp.selector = dcbxcfg->app[i].selector;
+			sapp.protocol = dcbxcfg->app[i].prot_id;
+			sapp.priority = prio;
+			dcb_ieee_setapp(netdev, &sapp);
+		}
+	}
+	/* Notify user-space of the changes */
+	dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, 0, 0);
+}
+
+/**
+ * ice_dcbnl_vsi_del_app - Delete APP on all VSIs
+ * @vsi: pointer to the main VSI
+ * @app: APP to delete
+ *
+ * Delete given APP from all the VSIs for given PF
+ */
+static void
+ice_dcbnl_vsi_del_app(struct ice_vsi *vsi,
+		      struct ice_dcb_app_priority_table *app)
+{
+	struct dcb_app sapp;
+	int err;
+
+	sapp.selector = app->selector;
+	sapp.protocol = app->prot_id;
+	sapp.priority = app->priority;
+	err = ice_dcbnl_delapp(vsi->netdev, &sapp);
+	dev_dbg(ice_pf_to_dev(vsi->back), "Deleting app for VSI idx=%d err=%d sel=%d proto=0x%x, prio=%d\n",
+		vsi->idx, err, app->selector, app->prot_id, app->priority);
+}
+
+/**
+ * ice_dcbnl_flush_apps - Delete all removed APPs
+ * @pf: the corresponding PF
+ * @old_cfg: old DCBX configuration data
+ * @new_cfg: new DCBX configuration data
+ *
+ * Find and delete all APPS that are not present in the passed
+ * DCB configuration
+ */
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg)
+{
+	struct ice_vsi *main_vsi = ice_get_main_vsi(pf);
+	unsigned int i;
+
+	if (!main_vsi)
+		return;
+
+	for (i = 0; i < old_cfg->numapps; i++) {
+		struct ice_dcb_app_priority_table app = old_cfg->app[i];
+
+		/* The APP is not available anymore delete it */
+		if (!ice_dcbnl_find_app(new_cfg, &app))
+			ice_dcbnl_vsi_del_app(main_vsi, &app);
+	}
+}
+
+/**
+ * ice_dcbnl_setup - setup DCBNL
+ * @vsi: VSI to get associated netdev from
+ */
+void ice_dcbnl_setup(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf;
+
+	pf = ice_netdev_to_pf(netdev);
+	if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		return;
+
+	netdev->dcbnl_ops = &dcbnl_ops;
+	ice_dcbnl_set_all(vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.h b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
new file mode 100644
index 0000000000..eac2f34bdc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DCB_NL_H_
+#define _ICE_DCB_NL_H_
+
+#ifdef CONFIG_DCB
+void ice_dcbnl_setup(struct ice_vsi *vsi);
+void ice_dcbnl_set_all(struct ice_vsi *vsi);
+void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg);
+#else
+static inline void ice_dcbnl_setup(struct ice_vsi *vsi) { }
+static inline void ice_dcbnl_set_all(struct ice_vsi *vsi) { }
+static inline void
+ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+		     struct ice_dcbx_cfg *new_cfg) { }
+#endif /* CONFIG_DCB */
+#endif /* _ICE_DCB_NL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c
new file mode 100644
index 0000000000..b27ec93638
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.c
@@ -0,0 +1,1899 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice.h"
+#include "ice_ddp.h"
+
+/* For supporting double VLAN mode, it is necessary to enable or disable certain
+ * boost tcam entries. The metadata labels names that match the following
+ * prefixes will be saved to allow enabling double VLAN mode.
+ */
+#define ICE_DVM_PRE "BOOST_MAC_VLAN_DVM" /* enable these entries */
+#define ICE_SVM_PRE "BOOST_MAC_VLAN_SVM" /* disable these entries */
+
+/* To support tunneling entries by PF, the package will append the PF number to
+ * the label; for example TNL_VXLAN_PF0, TNL_VXLAN_PF1, TNL_VXLAN_PF2, etc.
+ */
+#define ICE_TNL_PRE "TNL_"
+static const struct ice_tunnel_type_scan tnls[] = {
+	{ TNL_VXLAN, "TNL_VXLAN_PF" },
+	{ TNL_GENEVE, "TNL_GENEVE_PF" },
+	{ TNL_LAST, "" }
+};
+
+/**
+ * ice_verify_pkg - verify package
+ * @pkg: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * Verifies various attributes of the package file, including length, format
+ * version, and the requirement of at least one segment.
+ */
+static enum ice_ddp_state ice_verify_pkg(struct ice_pkg_hdr *pkg, u32 len)
+{
+	u32 seg_count;
+	u32 i;
+
+	if (len < struct_size(pkg, seg_offset, 1))
+		return ICE_DDP_PKG_INVALID_FILE;
+
+	if (pkg->pkg_format_ver.major != ICE_PKG_FMT_VER_MAJ ||
+	    pkg->pkg_format_ver.minor != ICE_PKG_FMT_VER_MNR ||
+	    pkg->pkg_format_ver.update != ICE_PKG_FMT_VER_UPD ||
+	    pkg->pkg_format_ver.draft != ICE_PKG_FMT_VER_DFT)
+		return ICE_DDP_PKG_INVALID_FILE;
+
+	/* pkg must have at least one segment */
+	seg_count = le32_to_cpu(pkg->seg_count);
+	if (seg_count < 1)
+		return ICE_DDP_PKG_INVALID_FILE;
+
+	/* make sure segment array fits in package length */
+	if (len < struct_size(pkg, seg_offset, seg_count))
+		return ICE_DDP_PKG_INVALID_FILE;
+
+	/* all segments must fit within length */
+	for (i = 0; i < seg_count; i++) {
+		u32 off = le32_to_cpu(pkg->seg_offset[i]);
+		struct ice_generic_seg_hdr *seg;
+
+		/* segment header must fit */
+		if (len < off + sizeof(*seg))
+			return ICE_DDP_PKG_INVALID_FILE;
+
+		seg = (struct ice_generic_seg_hdr *)((u8 *)pkg + off);
+
+		/* segment body must fit */
+		if (len < off + le32_to_cpu(seg->seg_size))
+			return ICE_DDP_PKG_INVALID_FILE;
+	}
+
+	return ICE_DDP_PKG_SUCCESS;
+}
+
+/**
+ * ice_free_seg - free package segment pointer
+ * @hw: pointer to the hardware structure
+ *
+ * Frees the package segment pointer in the proper manner, depending on if the
+ * segment was allocated or just the passed in pointer was stored.
+ */
+void ice_free_seg(struct ice_hw *hw)
+{
+	if (hw->pkg_copy) {
+		devm_kfree(ice_hw_to_dev(hw), hw->pkg_copy);
+		hw->pkg_copy = NULL;
+		hw->pkg_size = 0;
+	}
+	hw->seg = NULL;
+}
+
+/**
+ * ice_chk_pkg_version - check package version for compatibility with driver
+ * @pkg_ver: pointer to a version structure to check
+ *
+ * Check to make sure that the package about to be downloaded is compatible with
+ * the driver. To be compatible, the major and minor components of the package
+ * version must match our ICE_PKG_SUPP_VER_MAJ and ICE_PKG_SUPP_VER_MNR
+ * definitions.
+ */
+static enum ice_ddp_state ice_chk_pkg_version(struct ice_pkg_ver *pkg_ver)
+{
+	if (pkg_ver->major > ICE_PKG_SUPP_VER_MAJ ||
+	    (pkg_ver->major == ICE_PKG_SUPP_VER_MAJ &&
+	     pkg_ver->minor > ICE_PKG_SUPP_VER_MNR))
+		return ICE_DDP_PKG_FILE_VERSION_TOO_HIGH;
+	else if (pkg_ver->major < ICE_PKG_SUPP_VER_MAJ ||
+		 (pkg_ver->major == ICE_PKG_SUPP_VER_MAJ &&
+		  pkg_ver->minor < ICE_PKG_SUPP_VER_MNR))
+		return ICE_DDP_PKG_FILE_VERSION_TOO_LOW;
+
+	return ICE_DDP_PKG_SUCCESS;
+}
+
+/**
+ * ice_pkg_val_buf
+ * @buf: pointer to the ice buffer
+ *
+ * This helper function validates a buffer's header.
+ */
+static struct ice_buf_hdr *ice_pkg_val_buf(struct ice_buf *buf)
+{
+	struct ice_buf_hdr *hdr;
+	u16 section_count;
+	u16 data_end;
+
+	hdr = (struct ice_buf_hdr *)buf->buf;
+	/* verify data */
+	section_count = le16_to_cpu(hdr->section_count);
+	if (section_count < ICE_MIN_S_COUNT || section_count > ICE_MAX_S_COUNT)
+		return NULL;
+
+	data_end = le16_to_cpu(hdr->data_end);
+	if (data_end < ICE_MIN_S_DATA_END || data_end > ICE_MAX_S_DATA_END)
+		return NULL;
+
+	return hdr;
+}
+
+/**
+ * ice_find_buf_table
+ * @ice_seg: pointer to the ice segment
+ *
+ * Returns the address of the buffer table within the ice segment.
+ */
+static struct ice_buf_table *ice_find_buf_table(struct ice_seg *ice_seg)
+{
+	struct ice_nvm_table *nvms = (struct ice_nvm_table *)
+		(ice_seg->device_table + le32_to_cpu(ice_seg->device_table_count));
+
+	return (__force struct ice_buf_table *)(nvms->vers +
+						le32_to_cpu(nvms->table_count));
+}
+
+/**
+ * ice_pkg_enum_buf
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This function will enumerate all the buffers in the ice segment. The first
+ * call is made with the ice_seg parameter non-NULL; on subsequent calls,
+ * ice_seg is set to NULL which continues the enumeration. When the function
+ * returns a NULL pointer, then the end of the buffers has been reached, or an
+ * unexpected value has been detected (for example an invalid section count or
+ * an invalid buffer end value).
+ */
+static struct ice_buf_hdr *ice_pkg_enum_buf(struct ice_seg *ice_seg,
+					    struct ice_pkg_enum *state)
+{
+	if (ice_seg) {
+		state->buf_table = ice_find_buf_table(ice_seg);
+		if (!state->buf_table)
+			return NULL;
+
+		state->buf_idx = 0;
+		return ice_pkg_val_buf(state->buf_table->buf_array);
+	}
+
+	if (++state->buf_idx < le32_to_cpu(state->buf_table->buf_count))
+		return ice_pkg_val_buf(state->buf_table->buf_array +
+				       state->buf_idx);
+	else
+		return NULL;
+}
+
+/**
+ * ice_pkg_advance_sect
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ *
+ * This helper function will advance the section within the ice segment,
+ * also advancing the buffer if needed.
+ */
+static bool ice_pkg_advance_sect(struct ice_seg *ice_seg,
+				 struct ice_pkg_enum *state)
+{
+	if (!ice_seg && !state->buf)
+		return false;
+
+	if (!ice_seg && state->buf)
+		if (++state->sect_idx < le16_to_cpu(state->buf->section_count))
+			return true;
+
+	state->buf = ice_pkg_enum_buf(ice_seg, state);
+	if (!state->buf)
+		return false;
+
+	/* start of new buffer, reset section index */
+	state->sect_idx = 0;
+	return true;
+}
+
+/**
+ * ice_pkg_enum_section
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ *
+ * This function will enumerate all the sections of a particular type in the
+ * ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the matching
+ * sections has been reached.
+ */
+void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+			   u32 sect_type)
+{
+	u16 offset, size;
+
+	if (ice_seg)
+		state->type = sect_type;
+
+	if (!ice_pkg_advance_sect(ice_seg, state))
+		return NULL;
+
+	/* scan for next matching section */
+	while (state->buf->section_entry[state->sect_idx].type !=
+	       cpu_to_le32(state->type))
+		if (!ice_pkg_advance_sect(NULL, state))
+			return NULL;
+
+	/* validate section */
+	offset = le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+	if (offset < ICE_MIN_S_OFF || offset > ICE_MAX_S_OFF)
+		return NULL;
+
+	size = le16_to_cpu(state->buf->section_entry[state->sect_idx].size);
+	if (size < ICE_MIN_S_SZ || size > ICE_MAX_S_SZ)
+		return NULL;
+
+	/* make sure the section fits in the buffer */
+	if (offset + size > ICE_PKG_BUF_SIZE)
+		return NULL;
+
+	state->sect_type =
+		le32_to_cpu(state->buf->section_entry[state->sect_idx].type);
+
+	/* calc pointer to this section */
+	state->sect =
+		((u8 *)state->buf) +
+		le16_to_cpu(state->buf->section_entry[state->sect_idx].offset);
+
+	return state->sect;
+}
+
+/**
+ * ice_pkg_enum_entry
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ * @offset: pointer to variable that receives the offset in the table (optional)
+ * @handler: function that handles access to the entries into the section type
+ *
+ * This function will enumerate all the entries in particular section type in
+ * the ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the entries has
+ * been reached.
+ *
+ * Since each section may have a different header and entry size, the handler
+ * function is needed to determine the number and location entries in each
+ * section.
+ *
+ * The offset parameter is optional, but should be used for sections that
+ * contain an offset for each section table. For such cases, the section handler
+ * function must return the appropriate offset + index to give the absolution
+ * offset for each entry. For example, if the base for a section's header
+ * indicates a base offset of 10, and the index for the entry is 2, then
+ * section handler function should set the offset to 10 + 2 = 12.
+ */
+static void *ice_pkg_enum_entry(struct ice_seg *ice_seg,
+				struct ice_pkg_enum *state, u32 sect_type,
+				u32 *offset,
+				void *(*handler)(u32 sect_type, void *section,
+						 u32 index, u32 *offset))
+{
+	void *entry;
+
+	if (ice_seg) {
+		if (!handler)
+			return NULL;
+
+		if (!ice_pkg_enum_section(ice_seg, state, sect_type))
+			return NULL;
+
+		state->entry_idx = 0;
+		state->handler = handler;
+	} else {
+		state->entry_idx++;
+	}
+
+	if (!state->handler)
+		return NULL;
+
+	/* get entry */
+	entry = state->handler(state->sect_type, state->sect, state->entry_idx,
+			       offset);
+	if (!entry) {
+		/* end of a section, look for another section of this type */
+		if (!ice_pkg_enum_section(NULL, state, 0))
+			return NULL;
+
+		state->entry_idx = 0;
+		entry = state->handler(state->sect_type, state->sect,
+				       state->entry_idx, offset);
+	}
+
+	return entry;
+}
+
+/**
+ * ice_sw_fv_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the field vector entry to be returned
+ * @offset: ptr to variable that receives the offset in the field vector table
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * This function treats the given section as of type ice_sw_fv_section and
+ * enumerates offset field. "offset" is an index into the field vector table.
+ */
+static void *ice_sw_fv_handler(u32 sect_type, void *section, u32 index,
+			       u32 *offset)
+{
+	struct ice_sw_fv_section *fv_section = section;
+
+	if (!section || sect_type != ICE_SID_FLD_VEC_SW)
+		return NULL;
+	if (index >= le16_to_cpu(fv_section->count))
+		return NULL;
+	if (offset)
+		/* "index" passed in to this function is relative to a given
+		 * 4k block. To get to the true index into the field vector
+		 * table need to add the relative index to the base_offset
+		 * field of this section
+		 */
+		*offset = le16_to_cpu(fv_section->base_offset) + index;
+	return fv_section->fv + index;
+}
+
+/**
+ * ice_get_prof_index_max - get the max profile index for used profile
+ * @hw: pointer to the HW struct
+ *
+ * Calling this function will get the max profile index for used profile
+ * and store the index number in struct ice_switch_info *switch_info
+ * in HW for following use.
+ */
+static int ice_get_prof_index_max(struct ice_hw *hw)
+{
+	u16 prof_index = 0, j, max_prof_index = 0;
+	struct ice_pkg_enum state;
+	struct ice_seg *ice_seg;
+	bool flag = false;
+	struct ice_fv *fv;
+	u32 offset;
+
+	memset(&state, 0, sizeof(state));
+
+	if (!hw->seg)
+		return -EINVAL;
+
+	ice_seg = hw->seg;
+
+	do {
+		fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+					&offset, ice_sw_fv_handler);
+		if (!fv)
+			break;
+		ice_seg = NULL;
+
+		/* in the profile that not be used, the prot_id is set to 0xff
+		 * and the off is set to 0x1ff for all the field vectors.
+		 */
+		for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++)
+			if (fv->ew[j].prot_id != ICE_PROT_INVALID ||
+			    fv->ew[j].off != ICE_FV_OFFSET_INVAL)
+				flag = true;
+		if (flag && prof_index > max_prof_index)
+			max_prof_index = prof_index;
+
+		prof_index++;
+		flag = false;
+	} while (fv);
+
+	hw->switch_info->max_used_prof_index = max_prof_index;
+
+	return 0;
+}
+
+/**
+ * ice_get_ddp_pkg_state - get DDP pkg state after download
+ * @hw: pointer to the HW struct
+ * @already_loaded: indicates if pkg was already loaded onto the device
+ */
+static enum ice_ddp_state ice_get_ddp_pkg_state(struct ice_hw *hw,
+						bool already_loaded)
+{
+	if (hw->pkg_ver.major == hw->active_pkg_ver.major &&
+	    hw->pkg_ver.minor == hw->active_pkg_ver.minor &&
+	    hw->pkg_ver.update == hw->active_pkg_ver.update &&
+	    hw->pkg_ver.draft == hw->active_pkg_ver.draft &&
+	    !memcmp(hw->pkg_name, hw->active_pkg_name, sizeof(hw->pkg_name))) {
+		if (already_loaded)
+			return ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED;
+		else
+			return ICE_DDP_PKG_SUCCESS;
+	} else if (hw->active_pkg_ver.major != ICE_PKG_SUPP_VER_MAJ ||
+		   hw->active_pkg_ver.minor != ICE_PKG_SUPP_VER_MNR) {
+		return ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED;
+	} else if (hw->active_pkg_ver.major == ICE_PKG_SUPP_VER_MAJ &&
+		   hw->active_pkg_ver.minor == ICE_PKG_SUPP_VER_MNR) {
+		return ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED;
+	} else {
+		return ICE_DDP_PKG_ERR;
+	}
+}
+
+/**
+ * ice_init_pkg_regs - initialize additional package registers
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_pkg_regs(struct ice_hw *hw)
+{
+#define ICE_SW_BLK_INP_MASK_L 0xFFFFFFFF
+#define ICE_SW_BLK_INP_MASK_H 0x0000FFFF
+#define ICE_SW_BLK_IDX 0
+
+	/* setup Switch block input mask, which is 48-bits in two parts */
+	wr32(hw, GL_PREEXT_L2_PMASK0(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_L);
+	wr32(hw, GL_PREEXT_L2_PMASK1(ICE_SW_BLK_IDX), ICE_SW_BLK_INP_MASK_H);
+}
+
+/**
+ * ice_marker_ptype_tcam_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the Marker PType TCAM entry to be returned
+ * @offset: pointer to receive absolute offset, always 0 for ptype TCAM sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual Marker PType TCAM entries.
+ */
+static void *ice_marker_ptype_tcam_handler(u32 sect_type, void *section,
+					   u32 index, u32 *offset)
+{
+	struct ice_marker_ptype_tcam_section *marker_ptype;
+
+	if (sect_type != ICE_SID_RXPARSER_MARKER_PTYPE)
+		return NULL;
+
+	if (index > ICE_MAX_MARKER_PTYPE_TCAMS_IN_BUF)
+		return NULL;
+
+	if (offset)
+		*offset = 0;
+
+	marker_ptype = section;
+	if (index >= le16_to_cpu(marker_ptype->count))
+		return NULL;
+
+	return marker_ptype->tcam + index;
+}
+
+/**
+ * ice_add_dvm_hint
+ * @hw: pointer to the HW structure
+ * @val: value of the boost entry
+ * @enable: true if entry needs to be enabled, or false if needs to be disabled
+ */
+static void ice_add_dvm_hint(struct ice_hw *hw, u16 val, bool enable)
+{
+	if (hw->dvm_upd.count < ICE_DVM_MAX_ENTRIES) {
+		hw->dvm_upd.tbl[hw->dvm_upd.count].boost_addr = val;
+		hw->dvm_upd.tbl[hw->dvm_upd.count].enable = enable;
+		hw->dvm_upd.count++;
+	}
+}
+
+/**
+ * ice_add_tunnel_hint
+ * @hw: pointer to the HW structure
+ * @label_name: label text
+ * @val: value of the tunnel port boost entry
+ */
+static void ice_add_tunnel_hint(struct ice_hw *hw, char *label_name, u16 val)
+{
+	if (hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) {
+		u16 i;
+
+		for (i = 0; tnls[i].type != TNL_LAST; i++) {
+			size_t len = strlen(tnls[i].label_prefix);
+
+			/* Look for matching label start, before continuing */
+			if (strncmp(label_name, tnls[i].label_prefix, len))
+				continue;
+
+			/* Make sure this label matches our PF. Note that the PF
+			 * character ('0' - '7') will be located where our
+			 * prefix string's null terminator is located.
+			 */
+			if ((label_name[len] - '0') == hw->pf_id) {
+				hw->tnl.tbl[hw->tnl.count].type = tnls[i].type;
+				hw->tnl.tbl[hw->tnl.count].valid = false;
+				hw->tnl.tbl[hw->tnl.count].boost_addr = val;
+				hw->tnl.tbl[hw->tnl.count].port = 0;
+				hw->tnl.count++;
+				break;
+			}
+		}
+	}
+}
+
+/**
+ * ice_label_enum_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the label entry to be returned
+ * @offset: pointer to receive absolute offset, always zero for label sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual label entries.
+ */
+static void *ice_label_enum_handler(u32 __always_unused sect_type,
+				    void *section, u32 index, u32 *offset)
+{
+	struct ice_label_section *labels;
+
+	if (!section)
+		return NULL;
+
+	if (index > ICE_MAX_LABELS_IN_BUF)
+		return NULL;
+
+	if (offset)
+		*offset = 0;
+
+	labels = section;
+	if (index >= le16_to_cpu(labels->count))
+		return NULL;
+
+	return labels->label + index;
+}
+
+/**
+ * ice_enum_labels
+ * @ice_seg: pointer to the ice segment (NULL on subsequent calls)
+ * @type: the section type that will contain the label (0 on subsequent calls)
+ * @state: ice_pkg_enum structure that will hold the state of the enumeration
+ * @value: pointer to a value that will return the label's value if found
+ *
+ * Enumerates a list of labels in the package. The caller will call
+ * ice_enum_labels(ice_seg, type, ...) to start the enumeration, then call
+ * ice_enum_labels(NULL, 0, ...) to continue. When the function returns a NULL
+ * the end of the list has been reached.
+ */
+static char *ice_enum_labels(struct ice_seg *ice_seg, u32 type,
+			     struct ice_pkg_enum *state, u16 *value)
+{
+	struct ice_label *label;
+
+	/* Check for valid label section on first call */
+	if (type && !(type >= ICE_SID_LBL_FIRST && type <= ICE_SID_LBL_LAST))
+		return NULL;
+
+	label = ice_pkg_enum_entry(ice_seg, state, type, NULL,
+				   ice_label_enum_handler);
+	if (!label)
+		return NULL;
+
+	*value = le16_to_cpu(label->value);
+	return label->name;
+}
+
+/**
+ * ice_boost_tcam_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the boost TCAM entry to be returned
+ * @offset: pointer to receive absolute offset, always 0 for boost TCAM sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual boost TCAM entries.
+ */
+static void *ice_boost_tcam_handler(u32 sect_type, void *section, u32 index,
+				    u32 *offset)
+{
+	struct ice_boost_tcam_section *boost;
+
+	if (!section)
+		return NULL;
+
+	if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM)
+		return NULL;
+
+	if (index > ICE_MAX_BST_TCAMS_IN_BUF)
+		return NULL;
+
+	if (offset)
+		*offset = 0;
+
+	boost = section;
+	if (index >= le16_to_cpu(boost->count))
+		return NULL;
+
+	return boost->tcam + index;
+}
+
+/**
+ * ice_find_boost_entry
+ * @ice_seg: pointer to the ice segment (non-NULL)
+ * @addr: Boost TCAM address of entry to search for
+ * @entry: returns pointer to the entry
+ *
+ * Finds a particular Boost TCAM entry and returns a pointer to that entry
+ * if it is found. The ice_seg parameter must not be NULL since the first call
+ * to ice_pkg_enum_entry requires a pointer to an actual ice_segment structure.
+ */
+static int ice_find_boost_entry(struct ice_seg *ice_seg, u16 addr,
+				struct ice_boost_tcam_entry **entry)
+{
+	struct ice_boost_tcam_entry *tcam;
+	struct ice_pkg_enum state;
+
+	memset(&state, 0, sizeof(state));
+
+	if (!ice_seg)
+		return -EINVAL;
+
+	do {
+		tcam = ice_pkg_enum_entry(ice_seg, &state,
+					  ICE_SID_RXPARSER_BOOST_TCAM, NULL,
+					  ice_boost_tcam_handler);
+		if (tcam && le16_to_cpu(tcam->addr) == addr) {
+			*entry = tcam;
+			return 0;
+		}
+
+		ice_seg = NULL;
+	} while (tcam);
+
+	*entry = NULL;
+	return -EIO;
+}
+
+/**
+ * ice_is_init_pkg_successful - check if DDP init was successful
+ * @state: state of the DDP pkg after download
+ */
+bool ice_is_init_pkg_successful(enum ice_ddp_state state)
+{
+	switch (state) {
+	case ICE_DDP_PKG_SUCCESS:
+	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
+	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_pkg_buf_alloc
+ * @hw: pointer to the HW structure
+ *
+ * Allocates a package buffer and returns a pointer to the buffer header.
+ * Note: all package contents must be in Little Endian form.
+ */
+struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw)
+{
+	struct ice_buf_build *bld;
+	struct ice_buf_hdr *buf;
+
+	bld = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*bld), GFP_KERNEL);
+	if (!bld)
+		return NULL;
+
+	buf = (struct ice_buf_hdr *)bld;
+	buf->data_end =
+		cpu_to_le16(offsetof(struct ice_buf_hdr, section_entry));
+	return bld;
+}
+
+static bool ice_is_gtp_u_profile(u16 prof_idx)
+{
+	return (prof_idx >= ICE_PROFID_IPV6_GTPU_TEID &&
+		prof_idx <= ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER) ||
+	       prof_idx == ICE_PROFID_IPV4_GTPU_TEID;
+}
+
+static bool ice_is_gtp_c_profile(u16 prof_idx)
+{
+	switch (prof_idx) {
+	case ICE_PROFID_IPV4_GTPC_TEID:
+	case ICE_PROFID_IPV4_GTPC_NO_TEID:
+	case ICE_PROFID_IPV6_GTPC_TEID:
+	case ICE_PROFID_IPV6_GTPC_NO_TEID:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_get_sw_prof_type - determine switch profile type
+ * @hw: pointer to the HW structure
+ * @fv: pointer to the switch field vector
+ * @prof_idx: profile index to check
+ */
+static enum ice_prof_type ice_get_sw_prof_type(struct ice_hw *hw,
+					       struct ice_fv *fv, u32 prof_idx)
+{
+	u16 i;
+
+	if (ice_is_gtp_c_profile(prof_idx))
+		return ICE_PROF_TUN_GTPC;
+
+	if (ice_is_gtp_u_profile(prof_idx))
+		return ICE_PROF_TUN_GTPU;
+
+	for (i = 0; i < hw->blk[ICE_BLK_SW].es.fvw; i++) {
+		/* UDP tunnel will have UDP_OF protocol ID and VNI offset */
+		if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF &&
+		    fv->ew[i].off == ICE_VNI_OFFSET)
+			return ICE_PROF_TUN_UDP;
+
+		/* GRE tunnel will have GRE protocol */
+		if (fv->ew[i].prot_id == (u8)ICE_PROT_GRE_OF)
+			return ICE_PROF_TUN_GRE;
+	}
+
+	return ICE_PROF_NON_TUN;
+}
+
+/**
+ * ice_get_sw_fv_bitmap - Get switch field vector bitmap based on profile type
+ * @hw: pointer to hardware structure
+ * @req_profs: type of profiles requested
+ * @bm: pointer to memory for returning the bitmap of field vectors
+ */
+void ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs,
+			  unsigned long *bm)
+{
+	struct ice_pkg_enum state;
+	struct ice_seg *ice_seg;
+	struct ice_fv *fv;
+
+	if (req_profs == ICE_PROF_ALL) {
+		bitmap_set(bm, 0, ICE_MAX_NUM_PROFILES);
+		return;
+	}
+
+	memset(&state, 0, sizeof(state));
+	bitmap_zero(bm, ICE_MAX_NUM_PROFILES);
+	ice_seg = hw->seg;
+	do {
+		enum ice_prof_type prof_type;
+		u32 offset;
+
+		fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+					&offset, ice_sw_fv_handler);
+		ice_seg = NULL;
+
+		if (fv) {
+			/* Determine field vector type */
+			prof_type = ice_get_sw_prof_type(hw, fv, offset);
+
+			if (req_profs & prof_type)
+				set_bit((u16)offset, bm);
+		}
+	} while (fv);
+}
+
+/**
+ * ice_get_sw_fv_list
+ * @hw: pointer to the HW structure
+ * @lkups: list of protocol types
+ * @bm: bitmap of field vectors to consider
+ * @fv_list: Head of a list
+ *
+ * Finds all the field vector entries from switch block that contain
+ * a given protocol ID and offset and returns a list of structures of type
+ * "ice_sw_fv_list_entry". Every structure in the list has a field vector
+ * definition and profile ID information
+ * NOTE: The caller of the function is responsible for freeing the memory
+ * allocated for every list entry.
+ */
+int ice_get_sw_fv_list(struct ice_hw *hw, struct ice_prot_lkup_ext *lkups,
+		       unsigned long *bm, struct list_head *fv_list)
+{
+	struct ice_sw_fv_list_entry *fvl;
+	struct ice_sw_fv_list_entry *tmp;
+	struct ice_pkg_enum state;
+	struct ice_seg *ice_seg;
+	struct ice_fv *fv;
+	u32 offset;
+
+	memset(&state, 0, sizeof(state));
+
+	if (!lkups->n_val_words || !hw->seg)
+		return -EINVAL;
+
+	ice_seg = hw->seg;
+	do {
+		u16 i;
+
+		fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+					&offset, ice_sw_fv_handler);
+		if (!fv)
+			break;
+		ice_seg = NULL;
+
+		/* If field vector is not in the bitmap list, then skip this
+		 * profile.
+		 */
+		if (!test_bit((u16)offset, bm))
+			continue;
+
+		for (i = 0; i < lkups->n_val_words; i++) {
+			int j;
+
+			for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++)
+				if (fv->ew[j].prot_id ==
+					    lkups->fv_words[i].prot_id &&
+				    fv->ew[j].off == lkups->fv_words[i].off)
+					break;
+			if (j >= hw->blk[ICE_BLK_SW].es.fvw)
+				break;
+			if (i + 1 == lkups->n_val_words) {
+				fvl = devm_kzalloc(ice_hw_to_dev(hw),
+						   sizeof(*fvl), GFP_KERNEL);
+				if (!fvl)
+					goto err;
+				fvl->fv_ptr = fv;
+				fvl->profile_id = offset;
+				list_add(&fvl->list_entry, fv_list);
+				break;
+			}
+		}
+	} while (fv);
+	if (list_empty(fv_list)) {
+		dev_warn(ice_hw_to_dev(hw),
+			 "Required profiles not found in currently loaded DDP package");
+		return -EIO;
+	}
+
+	return 0;
+
+err:
+	list_for_each_entry_safe(fvl, tmp, fv_list, list_entry) {
+		list_del(&fvl->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fvl);
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_init_prof_result_bm - Initialize the profile result index bitmap
+ * @hw: pointer to hardware structure
+ */
+void ice_init_prof_result_bm(struct ice_hw *hw)
+{
+	struct ice_pkg_enum state;
+	struct ice_seg *ice_seg;
+	struct ice_fv *fv;
+
+	memset(&state, 0, sizeof(state));
+
+	if (!hw->seg)
+		return;
+
+	ice_seg = hw->seg;
+	do {
+		u32 off;
+		u16 i;
+
+		fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW,
+					&off, ice_sw_fv_handler);
+		ice_seg = NULL;
+		if (!fv)
+			break;
+
+		bitmap_zero(hw->switch_info->prof_res_bm[off],
+			    ICE_MAX_FV_WORDS);
+
+		/* Determine empty field vector indices, these can be
+		 * used for recipe results. Skip index 0, since it is
+		 * always used for Switch ID.
+		 */
+		for (i = 1; i < ICE_MAX_FV_WORDS; i++)
+			if (fv->ew[i].prot_id == ICE_PROT_INVALID &&
+			    fv->ew[i].off == ICE_FV_OFFSET_INVAL)
+				set_bit(i, hw->switch_info->prof_res_bm[off]);
+	} while (fv);
+}
+
+/**
+ * ice_pkg_buf_free
+ * @hw: pointer to the HW structure
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Frees a package buffer
+ */
+void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld)
+{
+	devm_kfree(ice_hw_to_dev(hw), bld);
+}
+
+/**
+ * ice_pkg_buf_reserve_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @count: the number of sections to reserve
+ *
+ * Reserves one or more section table entries in a package buffer. This routine
+ * can be called multiple times as long as they are made before calling
+ * ice_pkg_buf_alloc_section(). Once ice_pkg_buf_alloc_section()
+ * is called once, the number of sections that can be allocated will not be able
+ * to be increased; not using all reserved sections is fine, but this will
+ * result in some wasted space in the buffer.
+ * Note: all package contents must be in Little Endian form.
+ */
+int ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count)
+{
+	struct ice_buf_hdr *buf;
+	u16 section_count;
+	u16 data_end;
+
+	if (!bld)
+		return -EINVAL;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+
+	/* already an active section, can't increase table size */
+	section_count = le16_to_cpu(buf->section_count);
+	if (section_count > 0)
+		return -EIO;
+
+	if (bld->reserved_section_table_entries + count > ICE_MAX_S_COUNT)
+		return -EIO;
+	bld->reserved_section_table_entries += count;
+
+	data_end = le16_to_cpu(buf->data_end) +
+		   flex_array_size(buf, section_entry, count);
+	buf->data_end = cpu_to_le16(data_end);
+
+	return 0;
+}
+
+/**
+ * ice_pkg_buf_alloc_section
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ * @type: the section type value
+ * @size: the size of the section to reserve (in bytes)
+ *
+ * Reserves memory in the buffer for a section's content and updates the
+ * buffers' status accordingly. This routine returns a pointer to the first
+ * byte of the section start within the buffer, which is used to fill in the
+ * section contents.
+ * Note: all package contents must be in Little Endian form.
+ */
+void *ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size)
+{
+	struct ice_buf_hdr *buf;
+	u16 sect_count;
+	u16 data_end;
+
+	if (!bld || !type || !size)
+		return NULL;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+
+	/* check for enough space left in buffer */
+	data_end = le16_to_cpu(buf->data_end);
+
+	/* section start must align on 4 byte boundary */
+	data_end = ALIGN(data_end, 4);
+
+	if ((data_end + size) > ICE_MAX_S_DATA_END)
+		return NULL;
+
+	/* check for more available section table entries */
+	sect_count = le16_to_cpu(buf->section_count);
+	if (sect_count < bld->reserved_section_table_entries) {
+		void *section_ptr = ((u8 *)buf) + data_end;
+
+		buf->section_entry[sect_count].offset = cpu_to_le16(data_end);
+		buf->section_entry[sect_count].size = cpu_to_le16(size);
+		buf->section_entry[sect_count].type = cpu_to_le32(type);
+
+		data_end += size;
+		buf->data_end = cpu_to_le16(data_end);
+
+		buf->section_count = cpu_to_le16(sect_count + 1);
+		return section_ptr;
+	}
+
+	/* no free section table entries */
+	return NULL;
+}
+
+/**
+ * ice_pkg_buf_alloc_single_section
+ * @hw: pointer to the HW structure
+ * @type: the section type value
+ * @size: the size of the section to reserve (in bytes)
+ * @section: returns pointer to the section
+ *
+ * Allocates a package buffer with a single section.
+ * Note: all package contents must be in Little Endian form.
+ */
+struct ice_buf_build *ice_pkg_buf_alloc_single_section(struct ice_hw *hw,
+						       u32 type, u16 size,
+						       void **section)
+{
+	struct ice_buf_build *buf;
+
+	if (!section)
+		return NULL;
+
+	buf = ice_pkg_buf_alloc(hw);
+	if (!buf)
+		return NULL;
+
+	if (ice_pkg_buf_reserve_section(buf, 1))
+		goto ice_pkg_buf_alloc_single_section_err;
+
+	*section = ice_pkg_buf_alloc_section(buf, type, size);
+	if (!*section)
+		goto ice_pkg_buf_alloc_single_section_err;
+
+	return buf;
+
+ice_pkg_buf_alloc_single_section_err:
+	ice_pkg_buf_free(hw, buf);
+	return NULL;
+}
+
+/**
+ * ice_pkg_buf_get_active_sections
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Returns the number of active sections. Before using the package buffer
+ * in an update package command, the caller should make sure that there is at
+ * least one active section - otherwise, the buffer is not legal and should
+ * not be used.
+ * Note: all package contents must be in Little Endian form.
+ */
+u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld)
+{
+	struct ice_buf_hdr *buf;
+
+	if (!bld)
+		return 0;
+
+	buf = (struct ice_buf_hdr *)&bld->buf;
+	return le16_to_cpu(buf->section_count);
+}
+
+/**
+ * ice_pkg_buf
+ * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc())
+ *
+ * Return a pointer to the buffer's header
+ */
+struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
+{
+	if (!bld)
+		return NULL;
+
+	return &bld->buf;
+}
+
+static enum ice_ddp_state ice_map_aq_err_to_ddp_state(enum ice_aq_err aq_err)
+{
+	switch (aq_err) {
+	case ICE_AQ_RC_ENOSEC:
+	case ICE_AQ_RC_EBADSIG:
+		return ICE_DDP_PKG_FILE_SIGNATURE_INVALID;
+	case ICE_AQ_RC_ESVN:
+		return ICE_DDP_PKG_FILE_REVISION_TOO_LOW;
+	case ICE_AQ_RC_EBADMAN:
+	case ICE_AQ_RC_EBADBUF:
+		return ICE_DDP_PKG_LOAD_ERROR;
+	default:
+		return ICE_DDP_PKG_ERR;
+	}
+}
+
+/**
+ * ice_acquire_global_cfg_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the global config lock for reading
+ * or writing of the package. When attempting to obtain write access, the
+ * caller must check for the following two return values:
+ *
+ * 0         -  Means the caller has acquired the global config lock
+ *              and can perform writing of the package.
+ * -EALREADY - Indicates another driver has already written the
+ *             package or has found that no update was necessary; in
+ *             this case, the caller can just skip performing any
+ *             update of the package.
+ */
+static int ice_acquire_global_cfg_lock(struct ice_hw *hw,
+				       enum ice_aq_res_access_type access)
+{
+	int status;
+
+	status = ice_acquire_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID, access,
+				 ICE_GLOBAL_CFG_LOCK_TIMEOUT);
+
+	if (!status)
+		mutex_lock(&ice_global_cfg_lock_sw);
+	else if (status == -EALREADY)
+		ice_debug(hw, ICE_DBG_PKG,
+			  "Global config lock: No work to do\n");
+
+	return status;
+}
+
+/**
+ * ice_release_global_cfg_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the global config lock.
+ */
+static void ice_release_global_cfg_lock(struct ice_hw *hw)
+{
+	mutex_unlock(&ice_global_cfg_lock_sw);
+	ice_release_res(hw, ICE_GLOBAL_CFG_LOCK_RES_ID);
+}
+
+/**
+ * ice_aq_download_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package buffer to transfer
+ * @buf_size: the size of the package buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Download Package (0x0C40)
+ */
+static int
+ice_aq_download_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+		    u16 buf_size, bool last_buf, u32 *error_offset,
+		    u32 *error_info, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_download_pkg *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	if (error_offset)
+		*error_offset = 0;
+	if (error_info)
+		*error_info = 0;
+
+	cmd = &desc.params.download_pkg;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_download_pkg);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (last_buf)
+		cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+	status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+	if (status == -EIO) {
+		/* Read error from buffer only when the FW returned an error */
+		struct ice_aqc_download_pkg_resp *resp;
+
+		resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+		if (error_offset)
+			*error_offset = le32_to_cpu(resp->error_offset);
+		if (error_info)
+			*error_info = le32_to_cpu(resp->error_info);
+	}
+
+	return status;
+}
+
+/**
+ * ice_dwnld_cfg_bufs
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains global config lock and downloads the package configuration buffers
+ * to the firmware. Metadata buffers are skipped, and the first metadata buffer
+ * found indicates that the rest of the buffers are all metadata buffers.
+ */
+static enum ice_ddp_state ice_dwnld_cfg_bufs(struct ice_hw *hw,
+					     struct ice_buf *bufs, u32 count)
+{
+	enum ice_ddp_state state = ICE_DDP_PKG_SUCCESS;
+	struct ice_buf_hdr *bh;
+	enum ice_aq_err err;
+	u32 offset, info, i;
+	int status;
+
+	if (!bufs || !count)
+		return ICE_DDP_PKG_ERR;
+
+	/* If the first buffer's first section has its metadata bit set
+	 * then there are no buffers to be downloaded, and the operation is
+	 * considered a success.
+	 */
+	bh = (struct ice_buf_hdr *)bufs;
+	if (le32_to_cpu(bh->section_entry[0].type) & ICE_METADATA_BUF)
+		return ICE_DDP_PKG_SUCCESS;
+
+	status = ice_acquire_global_cfg_lock(hw, ICE_RES_WRITE);
+	if (status) {
+		if (status == -EALREADY)
+			return ICE_DDP_PKG_ALREADY_LOADED;
+		return ice_map_aq_err_to_ddp_state(hw->adminq.sq_last_status);
+	}
+
+	for (i = 0; i < count; i++) {
+		bool last = ((i + 1) == count);
+
+		if (!last) {
+			/* check next buffer for metadata flag */
+			bh = (struct ice_buf_hdr *)(bufs + i + 1);
+
+			/* A set metadata flag in the next buffer will signal
+			 * that the current buffer will be the last buffer
+			 * downloaded
+			 */
+			if (le16_to_cpu(bh->section_count))
+				if (le32_to_cpu(bh->section_entry[0].type) &
+				    ICE_METADATA_BUF)
+					last = true;
+		}
+
+		bh = (struct ice_buf_hdr *)(bufs + i);
+
+		status = ice_aq_download_pkg(hw, bh, ICE_PKG_BUF_SIZE, last,
+					     &offset, &info, NULL);
+
+		/* Save AQ status from download package */
+		if (status) {
+			ice_debug(hw, ICE_DBG_PKG,
+				  "Pkg download failed: err %d off %d inf %d\n",
+				  status, offset, info);
+			err = hw->adminq.sq_last_status;
+			state = ice_map_aq_err_to_ddp_state(err);
+			break;
+		}
+
+		if (last)
+			break;
+	}
+
+	if (!status) {
+		status = ice_set_vlan_mode(hw);
+		if (status)
+			ice_debug(hw, ICE_DBG_PKG,
+				  "Failed to set VLAN mode: err %d\n", status);
+	}
+
+	ice_release_global_cfg_lock(hw);
+
+	return state;
+}
+
+/**
+ * ice_aq_get_pkg_info_list
+ * @hw: pointer to the hardware structure
+ * @pkg_info: the buffer which will receive the information list
+ * @buf_size: the size of the pkg_info information buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get Package Info List (0x0C43)
+ */
+static int ice_aq_get_pkg_info_list(struct ice_hw *hw,
+				    struct ice_aqc_get_pkg_info_resp *pkg_info,
+				    u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_pkg_info_list);
+
+	return ice_aq_send_cmd(hw, &desc, pkg_info, buf_size, cd);
+}
+
+/**
+ * ice_download_pkg
+ * @hw: pointer to the hardware structure
+ * @ice_seg: pointer to the segment of the package to be downloaded
+ *
+ * Handles the download of a complete package.
+ */
+static enum ice_ddp_state ice_download_pkg(struct ice_hw *hw,
+					   struct ice_seg *ice_seg)
+{
+	struct ice_buf_table *ice_buf_tbl;
+	int status;
+
+	ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
+		  ice_seg->hdr.seg_format_ver.major,
+		  ice_seg->hdr.seg_format_ver.minor,
+		  ice_seg->hdr.seg_format_ver.update,
+		  ice_seg->hdr.seg_format_ver.draft);
+
+	ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
+		  le32_to_cpu(ice_seg->hdr.seg_type),
+		  le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
+
+	ice_buf_tbl = ice_find_buf_table(ice_seg);
+
+	ice_debug(hw, ICE_DBG_PKG, "Seg buf count: %d\n",
+		  le32_to_cpu(ice_buf_tbl->buf_count));
+
+	status = ice_dwnld_cfg_bufs(hw, ice_buf_tbl->buf_array,
+				    le32_to_cpu(ice_buf_tbl->buf_count));
+
+	ice_post_pkg_dwnld_vlan_mode_cfg(hw);
+
+	return status;
+}
+
+/**
+ * ice_aq_update_pkg
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package cmd buffer
+ * @buf_size: the size of the package cmd buffer
+ * @last_buf: last buffer indicator
+ * @error_offset: returns error offset
+ * @error_info: returns error information
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update Package (0x0C42)
+ */
+static int ice_aq_update_pkg(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+			     u16 buf_size, bool last_buf, u32 *error_offset,
+			     u32 *error_info, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_download_pkg *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	if (error_offset)
+		*error_offset = 0;
+	if (error_info)
+		*error_info = 0;
+
+	cmd = &desc.params.download_pkg;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_pkg);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (last_buf)
+		cmd->flags |= ICE_AQC_DOWNLOAD_PKG_LAST_BUF;
+
+	status = ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+	if (status == -EIO) {
+		/* Read error from buffer only when the FW returned an error */
+		struct ice_aqc_download_pkg_resp *resp;
+
+		resp = (struct ice_aqc_download_pkg_resp *)pkg_buf;
+		if (error_offset)
+			*error_offset = le32_to_cpu(resp->error_offset);
+		if (error_info)
+			*error_info = le32_to_cpu(resp->error_info);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_upload_section
+ * @hw: pointer to the hardware structure
+ * @pkg_buf: the package buffer which will receive the section
+ * @buf_size: the size of the package buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Upload Section (0x0C41)
+ */
+int ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+			  u16 buf_size, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_upload_section);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	return ice_aq_send_cmd(hw, &desc, pkg_buf, buf_size, cd);
+}
+
+/**
+ * ice_update_pkg_no_lock
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ */
+int ice_update_pkg_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+	int status = 0;
+	u32 i;
+
+	for (i = 0; i < count; i++) {
+		struct ice_buf_hdr *bh = (struct ice_buf_hdr *)(bufs + i);
+		bool last = ((i + 1) == count);
+		u32 offset, info;
+
+		status = ice_aq_update_pkg(hw, bh, le16_to_cpu(bh->data_end),
+					   last, &offset, &info, NULL);
+
+		if (status) {
+			ice_debug(hw, ICE_DBG_PKG,
+				  "Update pkg failed: err %d off %d inf %d\n",
+				  status, offset, info);
+			break;
+		}
+	}
+
+	return status;
+}
+
+/**
+ * ice_update_pkg
+ * @hw: pointer to the hardware structure
+ * @bufs: pointer to an array of buffers
+ * @count: the number of buffers in the array
+ *
+ * Obtains change lock and updates package.
+ */
+int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count)
+{
+	int status;
+
+	status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+	if (status)
+		return status;
+
+	status = ice_update_pkg_no_lock(hw, bufs, count);
+
+	ice_release_change_lock(hw);
+
+	return status;
+}
+
+/**
+ * ice_find_seg_in_pkg
+ * @hw: pointer to the hardware structure
+ * @seg_type: the segment type to search for (i.e., SEGMENT_TYPE_CPK)
+ * @pkg_hdr: pointer to the package header to be searched
+ *
+ * This function searches a package file for a particular segment type. On
+ * success it returns a pointer to the segment header, otherwise it will
+ * return NULL.
+ */
+static struct ice_generic_seg_hdr *
+ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type,
+		    struct ice_pkg_hdr *pkg_hdr)
+{
+	u32 i;
+
+	ice_debug(hw, ICE_DBG_PKG, "Package format version: %d.%d.%d.%d\n",
+		  pkg_hdr->pkg_format_ver.major, pkg_hdr->pkg_format_ver.minor,
+		  pkg_hdr->pkg_format_ver.update,
+		  pkg_hdr->pkg_format_ver.draft);
+
+	/* Search all package segments for the requested segment type */
+	for (i = 0; i < le32_to_cpu(pkg_hdr->seg_count); i++) {
+		struct ice_generic_seg_hdr *seg;
+
+		seg = (struct ice_generic_seg_hdr
+			       *)((u8 *)pkg_hdr +
+				  le32_to_cpu(pkg_hdr->seg_offset[i]));
+
+		if (le32_to_cpu(seg->seg_type) == seg_type)
+			return seg;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_init_pkg_info
+ * @hw: pointer to the hardware structure
+ * @pkg_hdr: pointer to the driver's package hdr
+ *
+ * Saves off the package details into the HW structure.
+ */
+static enum ice_ddp_state ice_init_pkg_info(struct ice_hw *hw,
+					    struct ice_pkg_hdr *pkg_hdr)
+{
+	struct ice_generic_seg_hdr *seg_hdr;
+
+	if (!pkg_hdr)
+		return ICE_DDP_PKG_ERR;
+
+	seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr);
+	if (seg_hdr) {
+		struct ice_meta_sect *meta;
+		struct ice_pkg_enum state;
+
+		memset(&state, 0, sizeof(state));
+
+		/* Get package information from the Metadata Section */
+		meta = ice_pkg_enum_section((struct ice_seg *)seg_hdr, &state,
+					    ICE_SID_METADATA);
+		if (!meta) {
+			ice_debug(hw, ICE_DBG_INIT,
+				  "Did not find ice metadata section in package\n");
+			return ICE_DDP_PKG_INVALID_FILE;
+		}
+
+		hw->pkg_ver = meta->ver;
+		memcpy(hw->pkg_name, meta->name, sizeof(meta->name));
+
+		ice_debug(hw, ICE_DBG_PKG, "Pkg: %d.%d.%d.%d, %s\n",
+			  meta->ver.major, meta->ver.minor, meta->ver.update,
+			  meta->ver.draft, meta->name);
+
+		hw->ice_seg_fmt_ver = seg_hdr->seg_format_ver;
+		memcpy(hw->ice_seg_id, seg_hdr->seg_id, sizeof(hw->ice_seg_id));
+
+		ice_debug(hw, ICE_DBG_PKG, "Ice Seg: %d.%d.%d.%d, %s\n",
+			  seg_hdr->seg_format_ver.major,
+			  seg_hdr->seg_format_ver.minor,
+			  seg_hdr->seg_format_ver.update,
+			  seg_hdr->seg_format_ver.draft, seg_hdr->seg_id);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "Did not find ice segment in driver package\n");
+		return ICE_DDP_PKG_INVALID_FILE;
+	}
+
+	return ICE_DDP_PKG_SUCCESS;
+}
+
+/**
+ * ice_get_pkg_info
+ * @hw: pointer to the hardware structure
+ *
+ * Store details of the package currently loaded in HW into the HW structure.
+ */
+static enum ice_ddp_state ice_get_pkg_info(struct ice_hw *hw)
+{
+	enum ice_ddp_state state = ICE_DDP_PKG_SUCCESS;
+	struct ice_aqc_get_pkg_info_resp *pkg_info;
+	u16 size;
+	u32 i;
+
+	size = struct_size(pkg_info, pkg_info, ICE_PKG_CNT);
+	pkg_info = kzalloc(size, GFP_KERNEL);
+	if (!pkg_info)
+		return ICE_DDP_PKG_ERR;
+
+	if (ice_aq_get_pkg_info_list(hw, pkg_info, size, NULL)) {
+		state = ICE_DDP_PKG_ERR;
+		goto init_pkg_free_alloc;
+	}
+
+	for (i = 0; i < le32_to_cpu(pkg_info->count); i++) {
+#define ICE_PKG_FLAG_COUNT 4
+		char flags[ICE_PKG_FLAG_COUNT + 1] = { 0 };
+		u8 place = 0;
+
+		if (pkg_info->pkg_info[i].is_active) {
+			flags[place++] = 'A';
+			hw->active_pkg_ver = pkg_info->pkg_info[i].ver;
+			hw->active_track_id =
+				le32_to_cpu(pkg_info->pkg_info[i].track_id);
+			memcpy(hw->active_pkg_name, pkg_info->pkg_info[i].name,
+			       sizeof(pkg_info->pkg_info[i].name));
+			hw->active_pkg_in_nvm = pkg_info->pkg_info[i].is_in_nvm;
+		}
+		if (pkg_info->pkg_info[i].is_active_at_boot)
+			flags[place++] = 'B';
+		if (pkg_info->pkg_info[i].is_modified)
+			flags[place++] = 'M';
+		if (pkg_info->pkg_info[i].is_in_nvm)
+			flags[place++] = 'N';
+
+		ice_debug(hw, ICE_DBG_PKG, "Pkg[%d]: %d.%d.%d.%d,%s,%s\n", i,
+			  pkg_info->pkg_info[i].ver.major,
+			  pkg_info->pkg_info[i].ver.minor,
+			  pkg_info->pkg_info[i].ver.update,
+			  pkg_info->pkg_info[i].ver.draft,
+			  pkg_info->pkg_info[i].name, flags);
+	}
+
+init_pkg_free_alloc:
+	kfree(pkg_info);
+
+	return state;
+}
+
+/**
+ * ice_chk_pkg_compat
+ * @hw: pointer to the hardware structure
+ * @ospkg: pointer to the package hdr
+ * @seg: pointer to the package segment hdr
+ *
+ * This function checks the package version compatibility with driver and NVM
+ */
+static enum ice_ddp_state ice_chk_pkg_compat(struct ice_hw *hw,
+					     struct ice_pkg_hdr *ospkg,
+					     struct ice_seg **seg)
+{
+	struct ice_aqc_get_pkg_info_resp *pkg;
+	enum ice_ddp_state state;
+	u16 size;
+	u32 i;
+
+	/* Check package version compatibility */
+	state = ice_chk_pkg_version(&hw->pkg_ver);
+	if (state) {
+		ice_debug(hw, ICE_DBG_INIT, "Package version check failed.\n");
+		return state;
+	}
+
+	/* find ICE segment in given package */
+	*seg = (struct ice_seg *)ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE,
+						     ospkg);
+	if (!*seg) {
+		ice_debug(hw, ICE_DBG_INIT, "no ice segment in package.\n");
+		return ICE_DDP_PKG_INVALID_FILE;
+	}
+
+	/* Check if FW is compatible with the OS package */
+	size = struct_size(pkg, pkg_info, ICE_PKG_CNT);
+	pkg = kzalloc(size, GFP_KERNEL);
+	if (!pkg)
+		return ICE_DDP_PKG_ERR;
+
+	if (ice_aq_get_pkg_info_list(hw, pkg, size, NULL)) {
+		state = ICE_DDP_PKG_LOAD_ERROR;
+		goto fw_ddp_compat_free_alloc;
+	}
+
+	for (i = 0; i < le32_to_cpu(pkg->count); i++) {
+		/* loop till we find the NVM package */
+		if (!pkg->pkg_info[i].is_in_nvm)
+			continue;
+		if ((*seg)->hdr.seg_format_ver.major !=
+			    pkg->pkg_info[i].ver.major ||
+		    (*seg)->hdr.seg_format_ver.minor >
+			    pkg->pkg_info[i].ver.minor) {
+			state = ICE_DDP_PKG_FW_MISMATCH;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "OS package is not compatible with NVM.\n");
+		}
+		/* done processing NVM package so break */
+		break;
+	}
+fw_ddp_compat_free_alloc:
+	kfree(pkg);
+	return state;
+}
+
+/**
+ * ice_init_pkg_hints
+ * @hw: pointer to the HW structure
+ * @ice_seg: pointer to the segment of the package scan (non-NULL)
+ *
+ * This function will scan the package and save off relevant information
+ * (hints or metadata) for driver use. The ice_seg parameter must not be NULL
+ * since the first call to ice_enum_labels requires a pointer to an actual
+ * ice_seg structure.
+ */
+static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+	struct ice_pkg_enum state;
+	char *label_name;
+	u16 val;
+	int i;
+
+	memset(&hw->tnl, 0, sizeof(hw->tnl));
+	memset(&state, 0, sizeof(state));
+
+	if (!ice_seg)
+		return;
+
+	label_name = ice_enum_labels(ice_seg, ICE_SID_LBL_RXPARSER_TMEM, &state,
+				     &val);
+
+	while (label_name) {
+		if (!strncmp(label_name, ICE_TNL_PRE, strlen(ICE_TNL_PRE)))
+			/* check for a tunnel entry */
+			ice_add_tunnel_hint(hw, label_name, val);
+
+		/* check for a dvm mode entry */
+		else if (!strncmp(label_name, ICE_DVM_PRE, strlen(ICE_DVM_PRE)))
+			ice_add_dvm_hint(hw, val, true);
+
+		/* check for a svm mode entry */
+		else if (!strncmp(label_name, ICE_SVM_PRE, strlen(ICE_SVM_PRE)))
+			ice_add_dvm_hint(hw, val, false);
+
+		label_name = ice_enum_labels(NULL, 0, &state, &val);
+	}
+
+	/* Cache the appropriate boost TCAM entry pointers for tunnels */
+	for (i = 0; i < hw->tnl.count; i++) {
+		ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr,
+				     &hw->tnl.tbl[i].boost_entry);
+		if (hw->tnl.tbl[i].boost_entry) {
+			hw->tnl.tbl[i].valid = true;
+			if (hw->tnl.tbl[i].type < __TNL_TYPE_CNT)
+				hw->tnl.valid_count[hw->tnl.tbl[i].type]++;
+		}
+	}
+
+	/* Cache the appropriate boost TCAM entry pointers for DVM and SVM */
+	for (i = 0; i < hw->dvm_upd.count; i++)
+		ice_find_boost_entry(ice_seg, hw->dvm_upd.tbl[i].boost_addr,
+				     &hw->dvm_upd.tbl[i].boost_entry);
+}
+
+/**
+ * ice_fill_hw_ptype - fill the enabled PTYPE bit information
+ * @hw: pointer to the HW structure
+ */
+static void ice_fill_hw_ptype(struct ice_hw *hw)
+{
+	struct ice_marker_ptype_tcam_entry *tcam;
+	struct ice_seg *seg = hw->seg;
+	struct ice_pkg_enum state;
+
+	bitmap_zero(hw->hw_ptype, ICE_FLOW_PTYPE_MAX);
+	if (!seg)
+		return;
+
+	memset(&state, 0, sizeof(state));
+
+	do {
+		tcam = ice_pkg_enum_entry(seg, &state,
+					  ICE_SID_RXPARSER_MARKER_PTYPE, NULL,
+					  ice_marker_ptype_tcam_handler);
+		if (tcam &&
+		    le16_to_cpu(tcam->addr) < ICE_MARKER_PTYPE_TCAM_ADDR_MAX &&
+		    le16_to_cpu(tcam->ptype) < ICE_FLOW_PTYPE_MAX)
+			set_bit(le16_to_cpu(tcam->ptype), hw->hw_ptype);
+
+		seg = NULL;
+	} while (tcam);
+}
+
+/**
+ * ice_init_pkg - initialize/download package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function initializes a package. The package contains HW tables
+ * required to do packet processing. First, the function extracts package
+ * information such as version. Then it finds the ice configuration segment
+ * within the package; this function then saves a copy of the segment pointer
+ * within the supplied package buffer. Next, the function will cache any hints
+ * from the package, followed by downloading the package itself. Note, that if
+ * a previous PF driver has already downloaded the package successfully, then
+ * the current driver will not have to download the package again.
+ *
+ * The local package contents will be used to query default behavior and to
+ * update specific sections of the HW's version of the package (e.g. to update
+ * the parse graph to understand new protocols).
+ *
+ * This function stores a pointer to the package buffer memory, and it is
+ * expected that the supplied buffer will not be freed immediately. If the
+ * package buffer needs to be freed, such as when read from a file, use
+ * ice_copy_and_init_pkg() instead of directly calling ice_init_pkg() in this
+ * case.
+ */
+enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
+{
+	bool already_loaded = false;
+	enum ice_ddp_state state;
+	struct ice_pkg_hdr *pkg;
+	struct ice_seg *seg;
+
+	if (!buf || !len)
+		return ICE_DDP_PKG_ERR;
+
+	pkg = (struct ice_pkg_hdr *)buf;
+	state = ice_verify_pkg(pkg, len);
+	if (state) {
+		ice_debug(hw, ICE_DBG_INIT, "failed to verify pkg (err: %d)\n",
+			  state);
+		return state;
+	}
+
+	/* initialize package info */
+	state = ice_init_pkg_info(hw, pkg);
+	if (state)
+		return state;
+
+	/* before downloading the package, check package version for
+	 * compatibility with driver
+	 */
+	state = ice_chk_pkg_compat(hw, pkg, &seg);
+	if (state)
+		return state;
+
+	/* initialize package hints and then download package */
+	ice_init_pkg_hints(hw, seg);
+	state = ice_download_pkg(hw, seg);
+	if (state == ICE_DDP_PKG_ALREADY_LOADED) {
+		ice_debug(hw, ICE_DBG_INIT,
+			  "package previously loaded - no work.\n");
+		already_loaded = true;
+	}
+
+	/* Get information on the package currently loaded in HW, then make sure
+	 * the driver is compatible with this version.
+	 */
+	if (!state || state == ICE_DDP_PKG_ALREADY_LOADED) {
+		state = ice_get_pkg_info(hw);
+		if (!state)
+			state = ice_get_ddp_pkg_state(hw, already_loaded);
+	}
+
+	if (ice_is_init_pkg_successful(state)) {
+		hw->seg = seg;
+		/* on successful package download update other required
+		 * registers to support the package and fill HW tables
+		 * with package content.
+		 */
+		ice_init_pkg_regs(hw);
+		ice_fill_blk_tbls(hw);
+		ice_fill_hw_ptype(hw);
+		ice_get_prof_index_max(hw);
+	} else {
+		ice_debug(hw, ICE_DBG_INIT, "package load failed, %d\n", state);
+	}
+
+	return state;
+}
+
+/**
+ * ice_copy_and_init_pkg - initialize/download a copy of the package
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the package buffer
+ * @len: size of the package buffer
+ *
+ * This function copies the package buffer, and then calls ice_init_pkg() to
+ * initialize the copied package contents.
+ *
+ * The copying is necessary if the package buffer supplied is constant, or if
+ * the memory may disappear shortly after calling this function.
+ *
+ * If the package buffer resides in the data segment and can be modified, the
+ * caller is free to use ice_init_pkg() instead of ice_copy_and_init_pkg().
+ *
+ * However, if the package buffer needs to be copied first, such as when being
+ * read from a file, the caller should use ice_copy_and_init_pkg().
+ *
+ * This function will first copy the package buffer, before calling
+ * ice_init_pkg(). The caller is free to immediately destroy the original
+ * package buffer, as the new copy will be managed by this function and
+ * related routines.
+ */
+enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf,
+					 u32 len)
+{
+	enum ice_ddp_state state;
+	u8 *buf_copy;
+
+	if (!buf || !len)
+		return ICE_DDP_PKG_ERR;
+
+	buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL);
+
+	state = ice_init_pkg(hw, buf_copy, len);
+	if (!ice_is_init_pkg_successful(state)) {
+		/* Free the copy, since we failed to initialize the package */
+		devm_kfree(ice_hw_to_dev(hw), buf_copy);
+	} else {
+		/* Track the copied pkg so we can free it later */
+		hw->pkg_copy = buf_copy;
+		hw->pkg_size = len;
+	}
+
+	return state;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h
new file mode 100644
index 0000000000..abb5f32f2e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ddp.h
@@ -0,0 +1,434 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022, Intel Corporation. */
+
+#ifndef _ICE_DDP_H_
+#define _ICE_DDP_H_
+
+#include "ice_type.h"
+
+/* Package minimal version supported */
+#define ICE_PKG_SUPP_VER_MAJ 1
+#define ICE_PKG_SUPP_VER_MNR 3
+
+/* Package format version */
+#define ICE_PKG_FMT_VER_MAJ 1
+#define ICE_PKG_FMT_VER_MNR 0
+#define ICE_PKG_FMT_VER_UPD 0
+#define ICE_PKG_FMT_VER_DFT 0
+
+#define ICE_PKG_CNT 4
+
+#define ICE_FV_OFFSET_INVAL 0x1FF
+
+/* Extraction Sequence (Field Vector) Table */
+struct ice_fv_word {
+	u8 prot_id;
+	u16 off; /* Offset within the protocol header */
+	u8 resvrd;
+} __packed;
+
+#define ICE_MAX_NUM_PROFILES 256
+
+#define ICE_MAX_FV_WORDS 48
+struct ice_fv {
+	struct ice_fv_word ew[ICE_MAX_FV_WORDS];
+};
+
+enum ice_ddp_state {
+	/* Indicates that this call to ice_init_pkg
+	 * successfully loaded the requested DDP package
+	 */
+	ICE_DDP_PKG_SUCCESS = 0,
+
+	/* Generic error for already loaded errors, it is mapped later to
+	 * the more specific one (one of the next 3)
+	 */
+	ICE_DDP_PKG_ALREADY_LOADED = -1,
+
+	/* Indicates that a DDP package of the same version has already been
+	 * loaded onto the device by a previous call or by another PF
+	 */
+	ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED = -2,
+
+	/* The device has a DDP package that is not supported by the driver */
+	ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED = -3,
+
+	/* The device has a compatible package
+	 * (but different from the request) already loaded
+	 */
+	ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED = -4,
+
+	/* The firmware loaded on the device is not compatible with
+	 * the DDP package loaded
+	 */
+	ICE_DDP_PKG_FW_MISMATCH = -5,
+
+	/* The DDP package file is invalid */
+	ICE_DDP_PKG_INVALID_FILE = -6,
+
+	/* The version of the DDP package provided is higher than
+	 * the driver supports
+	 */
+	ICE_DDP_PKG_FILE_VERSION_TOO_HIGH = -7,
+
+	/* The version of the DDP package provided is lower than the
+	 * driver supports
+	 */
+	ICE_DDP_PKG_FILE_VERSION_TOO_LOW = -8,
+
+	/* The signature of the DDP package file provided is invalid */
+	ICE_DDP_PKG_FILE_SIGNATURE_INVALID = -9,
+
+	/* The DDP package file security revision is too low and not
+	 * supported by firmware
+	 */
+	ICE_DDP_PKG_FILE_REVISION_TOO_LOW = -10,
+
+	/* An error occurred in firmware while loading the DDP package */
+	ICE_DDP_PKG_LOAD_ERROR = -11,
+
+	/* Other errors */
+	ICE_DDP_PKG_ERR = -12
+};
+
+/* Package and segment headers and tables */
+struct ice_pkg_hdr {
+	struct ice_pkg_ver pkg_format_ver;
+	__le32 seg_count;
+	__le32 seg_offset[];
+};
+
+/* generic segment */
+struct ice_generic_seg_hdr {
+#define SEGMENT_TYPE_METADATA 0x00000001
+#define SEGMENT_TYPE_ICE 0x00000010
+	__le32 seg_type;
+	struct ice_pkg_ver seg_format_ver;
+	__le32 seg_size;
+	char seg_id[ICE_PKG_NAME_SIZE];
+};
+
+/* ice specific segment */
+
+union ice_device_id {
+	struct {
+		__le16 device_id;
+		__le16 vendor_id;
+	} dev_vend_id;
+	__le32 id;
+};
+
+struct ice_device_id_entry {
+	union ice_device_id device;
+	union ice_device_id sub_device;
+};
+
+struct ice_seg {
+	struct ice_generic_seg_hdr hdr;
+	__le32 device_table_count;
+	struct ice_device_id_entry device_table[];
+};
+
+struct ice_nvm_table {
+	__le32 table_count;
+	__le32 vers[];
+};
+
+struct ice_buf {
+#define ICE_PKG_BUF_SIZE 4096
+	u8 buf[ICE_PKG_BUF_SIZE];
+};
+
+struct ice_buf_table {
+	__le32 buf_count;
+	struct ice_buf buf_array[];
+};
+
+struct ice_run_time_cfg_seg {
+	struct ice_generic_seg_hdr hdr;
+	u8 rsvd[8];
+	struct ice_buf_table buf_table;
+};
+
+/* global metadata specific segment */
+struct ice_global_metadata_seg {
+	struct ice_generic_seg_hdr hdr;
+	struct ice_pkg_ver pkg_ver;
+	__le32 rsvd;
+	char pkg_name[ICE_PKG_NAME_SIZE];
+};
+
+#define ICE_MIN_S_OFF 12
+#define ICE_MAX_S_OFF 4095
+#define ICE_MIN_S_SZ 1
+#define ICE_MAX_S_SZ 4084
+
+/* section information */
+struct ice_section_entry {
+	__le32 type;
+	__le16 offset;
+	__le16 size;
+};
+
+#define ICE_MIN_S_COUNT 1
+#define ICE_MAX_S_COUNT 511
+#define ICE_MIN_S_DATA_END 12
+#define ICE_MAX_S_DATA_END 4096
+
+#define ICE_METADATA_BUF 0x80000000
+
+struct ice_buf_hdr {
+	__le16 section_count;
+	__le16 data_end;
+	struct ice_section_entry section_entry[];
+};
+
+#define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz)                                 \
+	((ICE_PKG_BUF_SIZE -                                                  \
+	  struct_size_t(struct ice_buf_hdr,  section_entry, 1) - (hd_sz)) / \
+	 (ent_sz))
+
+/* ice package section IDs */
+#define ICE_SID_METADATA 1
+#define ICE_SID_XLT0_SW 10
+#define ICE_SID_XLT_KEY_BUILDER_SW 11
+#define ICE_SID_XLT1_SW 12
+#define ICE_SID_XLT2_SW 13
+#define ICE_SID_PROFID_TCAM_SW 14
+#define ICE_SID_PROFID_REDIR_SW 15
+#define ICE_SID_FLD_VEC_SW 16
+#define ICE_SID_CDID_KEY_BUILDER_SW 17
+
+struct ice_meta_sect {
+	struct ice_pkg_ver ver;
+#define ICE_META_SECT_NAME_SIZE 28
+	char name[ICE_META_SECT_NAME_SIZE];
+	__le32 track_id;
+};
+
+#define ICE_SID_CDID_REDIR_SW 18
+
+#define ICE_SID_XLT0_ACL 20
+#define ICE_SID_XLT_KEY_BUILDER_ACL 21
+#define ICE_SID_XLT1_ACL 22
+#define ICE_SID_XLT2_ACL 23
+#define ICE_SID_PROFID_TCAM_ACL 24
+#define ICE_SID_PROFID_REDIR_ACL 25
+#define ICE_SID_FLD_VEC_ACL 26
+#define ICE_SID_CDID_KEY_BUILDER_ACL 27
+#define ICE_SID_CDID_REDIR_ACL 28
+
+#define ICE_SID_XLT0_FD 30
+#define ICE_SID_XLT_KEY_BUILDER_FD 31
+#define ICE_SID_XLT1_FD 32
+#define ICE_SID_XLT2_FD 33
+#define ICE_SID_PROFID_TCAM_FD 34
+#define ICE_SID_PROFID_REDIR_FD 35
+#define ICE_SID_FLD_VEC_FD 36
+#define ICE_SID_CDID_KEY_BUILDER_FD 37
+#define ICE_SID_CDID_REDIR_FD 38
+
+#define ICE_SID_XLT0_RSS 40
+#define ICE_SID_XLT_KEY_BUILDER_RSS 41
+#define ICE_SID_XLT1_RSS 42
+#define ICE_SID_XLT2_RSS 43
+#define ICE_SID_PROFID_TCAM_RSS 44
+#define ICE_SID_PROFID_REDIR_RSS 45
+#define ICE_SID_FLD_VEC_RSS 46
+#define ICE_SID_CDID_KEY_BUILDER_RSS 47
+#define ICE_SID_CDID_REDIR_RSS 48
+
+#define ICE_SID_RXPARSER_MARKER_PTYPE 55
+#define ICE_SID_RXPARSER_BOOST_TCAM 56
+#define ICE_SID_RXPARSER_METADATA_INIT 58
+#define ICE_SID_TXPARSER_BOOST_TCAM 66
+
+#define ICE_SID_XLT0_PE 80
+#define ICE_SID_XLT_KEY_BUILDER_PE 81
+#define ICE_SID_XLT1_PE 82
+#define ICE_SID_XLT2_PE 83
+#define ICE_SID_PROFID_TCAM_PE 84
+#define ICE_SID_PROFID_REDIR_PE 85
+#define ICE_SID_FLD_VEC_PE 86
+#define ICE_SID_CDID_KEY_BUILDER_PE 87
+#define ICE_SID_CDID_REDIR_PE 88
+
+/* Label Metadata section IDs */
+#define ICE_SID_LBL_FIRST 0x80000010
+#define ICE_SID_LBL_RXPARSER_TMEM 0x80000018
+/* The following define MUST be updated to reflect the last label section ID */
+#define ICE_SID_LBL_LAST 0x80000038
+
+/* Label ICE runtime configuration section IDs */
+#define ICE_SID_TX_5_LAYER_TOPO 0x10
+
+enum ice_block {
+	ICE_BLK_SW = 0,
+	ICE_BLK_ACL,
+	ICE_BLK_FD,
+	ICE_BLK_RSS,
+	ICE_BLK_PE,
+	ICE_BLK_COUNT
+};
+
+enum ice_sect {
+	ICE_XLT0 = 0,
+	ICE_XLT_KB,
+	ICE_XLT1,
+	ICE_XLT2,
+	ICE_PROF_TCAM,
+	ICE_PROF_REDIR,
+	ICE_VEC_TBL,
+	ICE_CDID_KB,
+	ICE_CDID_REDIR,
+	ICE_SECT_COUNT
+};
+
+/* package labels */
+struct ice_label {
+	__le16 value;
+#define ICE_PKG_LABEL_SIZE 64
+	char name[ICE_PKG_LABEL_SIZE];
+};
+
+struct ice_label_section {
+	__le16 count;
+	struct ice_label label[];
+};
+
+#define ICE_MAX_LABELS_IN_BUF                                             \
+	ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_label_section,  \
+					   label, 1) -                    \
+				       sizeof(struct ice_label),          \
+			       sizeof(struct ice_label))
+
+struct ice_sw_fv_section {
+	__le16 count;
+	__le16 base_offset;
+	struct ice_fv fv[];
+};
+
+struct ice_sw_fv_list_entry {
+	struct list_head list_entry;
+	u32 profile_id;
+	struct ice_fv *fv_ptr;
+};
+
+/* The BOOST TCAM stores the match packet header in reverse order, meaning
+ * the fields are reversed; in addition, this means that the normally big endian
+ * fields of the packet are now little endian.
+ */
+struct ice_boost_key_value {
+#define ICE_BOOST_REMAINING_HV_KEY 15
+	u8 remaining_hv_key[ICE_BOOST_REMAINING_HV_KEY];
+	__le16 hv_dst_port_key;
+	__le16 hv_src_port_key;
+	u8 tcam_search_key;
+} __packed;
+
+struct ice_boost_key {
+	struct ice_boost_key_value key;
+	struct ice_boost_key_value key2;
+};
+
+/* package Boost TCAM entry */
+struct ice_boost_tcam_entry {
+	__le16 addr;
+	__le16 reserved;
+	/* break up the 40 bytes of key into different fields */
+	struct ice_boost_key key;
+	u8 boost_hit_index_group;
+	/* The following contains bitfields which are not on byte boundaries.
+	 * These fields are currently unused by driver software.
+	 */
+#define ICE_BOOST_BIT_FIELDS 43
+	u8 bit_fields[ICE_BOOST_BIT_FIELDS];
+};
+
+struct ice_boost_tcam_section {
+	__le16 count;
+	__le16 reserved;
+	struct ice_boost_tcam_entry tcam[];
+};
+
+#define ICE_MAX_BST_TCAMS_IN_BUF                                               \
+	ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_boost_tcam_section,  \
+					   tcam, 1) -                          \
+				       sizeof(struct ice_boost_tcam_entry),    \
+			       sizeof(struct ice_boost_tcam_entry))
+
+/* package Marker Ptype TCAM entry */
+struct ice_marker_ptype_tcam_entry {
+#define ICE_MARKER_PTYPE_TCAM_ADDR_MAX 1024
+	__le16 addr;
+	__le16 ptype;
+	u8 keys[20];
+};
+
+struct ice_marker_ptype_tcam_section {
+	__le16 count;
+	__le16 reserved;
+	struct ice_marker_ptype_tcam_entry tcam[];
+};
+
+#define ICE_MAX_MARKER_PTYPE_TCAMS_IN_BUF                                    \
+	ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_marker_ptype_tcam_section,  tcam, \
+			    1) -                                             \
+			sizeof(struct ice_marker_ptype_tcam_entry),          \
+		sizeof(struct ice_marker_ptype_tcam_entry))
+
+struct ice_xlt1_section {
+	__le16 count;
+	__le16 offset;
+	u8 value[];
+};
+
+struct ice_xlt2_section {
+	__le16 count;
+	__le16 offset;
+	__le16 value[];
+};
+
+struct ice_prof_redir_section {
+	__le16 count;
+	__le16 offset;
+	u8 redir_value[];
+};
+
+/* package buffer building */
+
+struct ice_buf_build {
+	struct ice_buf buf;
+	u16 reserved_section_table_entries;
+};
+
+struct ice_pkg_enum {
+	struct ice_buf_table *buf_table;
+	u32 buf_idx;
+
+	u32 type;
+	struct ice_buf_hdr *buf;
+	u32 sect_idx;
+	void *sect;
+	u32 sect_type;
+
+	u32 entry_idx;
+	void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset);
+};
+
+int ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+			  u16 buf_size, struct ice_sq_cd *cd);
+
+void *ice_pkg_buf_alloc_section(struct ice_buf_build *bld, u32 type, u16 size);
+
+struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw);
+
+int ice_update_pkg_no_lock(struct ice_hw *hw, struct ice_buf *bufs, u32 count);
+int ice_update_pkg(struct ice_hw *hw, struct ice_buf *bufs, u32 count);
+
+int ice_pkg_buf_reserve_section(struct ice_buf_build *bld, u16 count);
+u16 ice_pkg_buf_get_active_sections(struct ice_buf_build *bld);
+void *ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+			   u32 sect_type);
+
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
new file mode 100644
index 0000000000..6d560d1c74
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_DEVIDS_H_
+#define _ICE_DEVIDS_H_
+
+/* Device IDs */
+#define ICE_DEV_ID_E822_SI_DFLT         0x1888
+/* Intel(R) Ethernet Connection E823-L for backplane */
+#define ICE_DEV_ID_E823L_BACKPLANE	0x124C
+/* Intel(R) Ethernet Connection E823-L for SFP */
+#define ICE_DEV_ID_E823L_SFP		0x124D
+/* Intel(R) Ethernet Connection E823-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E823L_10G_BASE_T	0x124E
+/* Intel(R) Ethernet Connection E823-L 1GbE */
+#define ICE_DEV_ID_E823L_1GBE		0x124F
+/* Intel(R) Ethernet Connection E823-L for QSFP */
+#define ICE_DEV_ID_E823L_QSFP		0x151D
+/* Intel(R) Ethernet Controller E810-C for backplane */
+#define ICE_DEV_ID_E810C_BACKPLANE	0x1591
+/* Intel(R) Ethernet Controller E810-C for QSFP */
+#define ICE_DEV_ID_E810C_QSFP		0x1592
+/* Intel(R) Ethernet Controller E810-C for SFP */
+#define ICE_DEV_ID_E810C_SFP		0x1593
+#define ICE_SUBDEV_ID_E810T		0x000E
+#define ICE_SUBDEV_ID_E810T2		0x000F
+#define ICE_SUBDEV_ID_E810T3		0x0010
+#define ICE_SUBDEV_ID_E810T4		0x0011
+#define ICE_SUBDEV_ID_E810T5		0x0012
+#define ICE_SUBDEV_ID_E810T6		0x02E9
+#define ICE_SUBDEV_ID_E810T7		0x02EA
+/* Intel(R) Ethernet Controller E810-XXV for backplane */
+#define ICE_DEV_ID_E810_XXV_BACKPLANE	0x1599
+/* Intel(R) Ethernet Controller E810-XXV for QSFP */
+#define ICE_DEV_ID_E810_XXV_QSFP	0x159A
+/* Intel(R) Ethernet Controller E810-XXV for SFP */
+#define ICE_DEV_ID_E810_XXV_SFP		0x159B
+/* Intel(R) Ethernet Connection E823-C for backplane */
+#define ICE_DEV_ID_E823C_BACKPLANE	0x188A
+/* Intel(R) Ethernet Connection E823-C for QSFP */
+#define ICE_DEV_ID_E823C_QSFP		0x188B
+/* Intel(R) Ethernet Connection E823-C for SFP */
+#define ICE_DEV_ID_E823C_SFP		0x188C
+/* Intel(R) Ethernet Connection E823-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E823C_10G_BASE_T	0x188D
+/* Intel(R) Ethernet Connection E823-C 1GbE */
+#define ICE_DEV_ID_E823C_SGMII		0x188E
+/* Intel(R) Ethernet Connection E822-C for backplane */
+#define ICE_DEV_ID_E822C_BACKPLANE	0x1890
+/* Intel(R) Ethernet Connection E822-C for QSFP */
+#define ICE_DEV_ID_E822C_QSFP		0x1891
+/* Intel(R) Ethernet Connection E822-C for SFP */
+#define ICE_DEV_ID_E822C_SFP		0x1892
+/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822C_10G_BASE_T	0x1893
+/* Intel(R) Ethernet Connection E822-C 1GbE */
+#define ICE_DEV_ID_E822C_SGMII		0x1894
+/* Intel(R) Ethernet Connection E822-L for backplane */
+#define ICE_DEV_ID_E822L_BACKPLANE	0x1897
+/* Intel(R) Ethernet Connection E822-L for SFP */
+#define ICE_DEV_ID_E822L_SFP		0x1898
+/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822L_10G_BASE_T	0x1899
+/* Intel(R) Ethernet Connection E822-L 1GbE */
+#define ICE_DEV_ID_E822L_SGMII		0x189A
+
+#endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
new file mode 100644
index 0000000000..80dc5445b5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -0,0 +1,1917 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+#include <linux/vmalloc.h>
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_devlink.h"
+#include "ice_eswitch.h"
+#include "ice_fw_update.h"
+#include "ice_dcb_lib.h"
+
+static int ice_active_port_option = -1;
+
+/* context for devlink info version reporting */
+struct ice_info_ctx {
+	char buf[128];
+	struct ice_orom_info pending_orom;
+	struct ice_nvm_info pending_nvm;
+	struct ice_netlist_info pending_netlist;
+	struct ice_hw_dev_caps dev_caps;
+};
+
+/* The following functions are used to format specific strings for various
+ * devlink info versions. The ctx parameter is used to provide the storage
+ * buffer, as well as any ancillary information calculated when the info
+ * request was made.
+ *
+ * If a version does not exist, for example when attempting to get the
+ * inactive version of flash when there is no pending update, the function
+ * should leave the buffer in the ctx structure empty.
+ */
+
+static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	u8 dsn[8];
+
+	/* Copy the DSN into an array in Big Endian format */
+	put_unaligned_be64(pci_get_dsn(pf->pdev), dsn);
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn);
+}
+
+static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_hw *hw = &pf->hw;
+	int status;
+
+	status = ice_read_pba_string(hw, (u8 *)ctx->buf, sizeof(ctx->buf));
+	if (status)
+		/* We failed to locate the PBA, so just skip this entry */
+		dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %d\n",
+			status);
+}
+
+static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+		 hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch);
+}
+
+static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver,
+		 hw->api_min_ver, hw->api_patch);
+}
+
+static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build);
+}
+
+static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_orom_info *orom = &pf->hw.flash.orom;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+		 orom->major, orom->build, orom->patch);
+}
+
+static void
+ice_info_pending_orom_ver(struct ice_pf __always_unused *pf,
+			  struct ice_info_ctx *ctx)
+{
+	struct ice_orom_info *orom = &ctx->pending_orom;
+
+	if (ctx->dev_caps.common_cap.nvm_update_pending_orom)
+		snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+			 orom->major, orom->build, orom->patch);
+}
+
+static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor);
+}
+
+static void
+ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf,
+			 struct ice_info_ctx *ctx)
+{
+	struct ice_nvm_info *nvm = &ctx->pending_nvm;
+
+	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
+		snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x",
+			 nvm->major, nvm->minor);
+}
+
+static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_nvm_info *nvm = &pf->hw.flash.nvm;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
+}
+
+static void
+ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_nvm_info *nvm = &ctx->pending_nvm;
+
+	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm)
+		snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack);
+}
+
+static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name);
+}
+
+static void
+ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u",
+		 pkg->major, pkg->minor, pkg->update, pkg->draft);
+}
+
+static void
+ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id);
+}
+
+static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
+
+	/* The netlist version fields are BCD formatted */
+	snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
+		 netlist->major, netlist->minor,
+		 netlist->type >> 16, netlist->type & 0xFFFF,
+		 netlist->rev, netlist->cust_ver);
+}
+
+static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx)
+{
+	struct ice_netlist_info *netlist = &pf->hw.flash.netlist;
+
+	snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
+}
+
+static void
+ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf,
+			     struct ice_info_ctx *ctx)
+{
+	struct ice_netlist_info *netlist = &ctx->pending_netlist;
+
+	/* The netlist version fields are BCD formatted */
+	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
+		snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
+			 netlist->major, netlist->minor,
+			 netlist->type >> 16, netlist->type & 0xFFFF,
+			 netlist->rev, netlist->cust_ver);
+}
+
+static void
+ice_info_pending_netlist_build(struct ice_pf __always_unused *pf,
+			       struct ice_info_ctx *ctx)
+{
+	struct ice_netlist_info *netlist = &ctx->pending_netlist;
+
+	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist)
+		snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
+}
+
+#define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL }
+#define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL }
+#define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback }
+
+/* The combined() macro inserts both the running entry as well as a stored
+ * entry. The running entry will always report the version from the active
+ * handler. The stored entry will first try the pending handler, and fallback
+ * to the active handler if the pending function does not report a version.
+ * The pending handler should check the status of a pending update for the
+ * relevant flash component. It should only fill in the buffer in the case
+ * where a valid pending version is available. This ensures that the related
+ * stored and running versions remain in sync, and that stored versions are
+ * correctly reported as expected.
+ */
+#define combined(key, active, pending) \
+	running(key, active), \
+	stored(key, pending, active)
+
+enum ice_version_type {
+	ICE_VERSION_FIXED,
+	ICE_VERSION_RUNNING,
+	ICE_VERSION_STORED,
+};
+
+static const struct ice_devlink_version {
+	enum ice_version_type type;
+	const char *key;
+	void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx);
+	void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx);
+} ice_devlink_versions[] = {
+	fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba),
+	running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt),
+	running("fw.mgmt.api", ice_info_fw_api),
+	running("fw.mgmt.build", ice_info_fw_build),
+	combined(DEVLINK_INFO_VERSION_GENERIC_FW_UNDI, ice_info_orom_ver, ice_info_pending_orom_ver),
+	combined("fw.psid.api", ice_info_nvm_ver, ice_info_pending_nvm_ver),
+	combined(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack, ice_info_pending_eetrack),
+	running("fw.app.name", ice_info_ddp_pkg_name),
+	running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version),
+	running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id),
+	combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver),
+	combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build),
+};
+
+/**
+ * ice_devlink_info_get - .info_get devlink handler
+ * @devlink: devlink instance structure
+ * @req: the devlink info request
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .info_get operation. Reports information about the
+ * device.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int ice_devlink_info_get(struct devlink *devlink,
+				struct devlink_info_req *req,
+				struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_info_ctx *ctx;
+	size_t i;
+	int err;
+
+	err = ice_wait_for_reset(pf, 10 * HZ);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Device is busy resetting");
+		return err;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	/* discover capabilities first */
+	err = ice_discover_dev_caps(hw, &ctx->dev_caps);
+	if (err) {
+		dev_dbg(dev, "Failed to discover device capabilities, status %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Unable to discover device capabilities");
+		goto out_free_ctx;
+	}
+
+	if (ctx->dev_caps.common_cap.nvm_update_pending_orom) {
+		err = ice_get_inactive_orom_ver(hw, &ctx->pending_orom);
+		if (err) {
+			dev_dbg(dev, "Unable to read inactive Option ROM version data, status %d aq_err %s\n",
+				err, ice_aq_str(hw->adminq.sq_last_status));
+
+			/* disable display of pending Option ROM */
+			ctx->dev_caps.common_cap.nvm_update_pending_orom = false;
+		}
+	}
+
+	if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) {
+		err = ice_get_inactive_nvm_ver(hw, &ctx->pending_nvm);
+		if (err) {
+			dev_dbg(dev, "Unable to read inactive NVM version data, status %d aq_err %s\n",
+				err, ice_aq_str(hw->adminq.sq_last_status));
+
+			/* disable display of pending Option ROM */
+			ctx->dev_caps.common_cap.nvm_update_pending_nvm = false;
+		}
+	}
+
+	if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) {
+		err = ice_get_inactive_netlist_ver(hw, &ctx->pending_netlist);
+		if (err) {
+			dev_dbg(dev, "Unable to read inactive Netlist version data, status %d aq_err %s\n",
+				err, ice_aq_str(hw->adminq.sq_last_status));
+
+			/* disable display of pending Option ROM */
+			ctx->dev_caps.common_cap.nvm_update_pending_netlist = false;
+		}
+	}
+
+	ice_info_get_dsn(pf, ctx);
+
+	err = devlink_info_serial_number_put(req, ctx->buf);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to set serial number");
+		goto out_free_ctx;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ice_devlink_versions); i++) {
+		enum ice_version_type type = ice_devlink_versions[i].type;
+		const char *key = ice_devlink_versions[i].key;
+
+		memset(ctx->buf, 0, sizeof(ctx->buf));
+
+		ice_devlink_versions[i].getter(pf, ctx);
+
+		/* If the default getter doesn't report a version, use the
+		 * fallback function. This is primarily useful in the case of
+		 * "stored" versions that want to report the same value as the
+		 * running version in the normal case of no pending update.
+		 */
+		if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback)
+			ice_devlink_versions[i].fallback(pf, ctx);
+
+		/* Do not report missing versions */
+		if (ctx->buf[0] == '\0')
+			continue;
+
+		switch (type) {
+		case ICE_VERSION_FIXED:
+			err = devlink_info_version_fixed_put(req, key, ctx->buf);
+			if (err) {
+				NL_SET_ERR_MSG_MOD(extack, "Unable to set fixed version");
+				goto out_free_ctx;
+			}
+			break;
+		case ICE_VERSION_RUNNING:
+			err = devlink_info_version_running_put(req, key, ctx->buf);
+			if (err) {
+				NL_SET_ERR_MSG_MOD(extack, "Unable to set running version");
+				goto out_free_ctx;
+			}
+			break;
+		case ICE_VERSION_STORED:
+			err = devlink_info_version_stored_put(req, key, ctx->buf);
+			if (err) {
+				NL_SET_ERR_MSG_MOD(extack, "Unable to set stored version");
+				goto out_free_ctx;
+			}
+			break;
+		}
+	}
+
+out_free_ctx:
+	kfree(ctx);
+	return err;
+}
+
+/**
+ * ice_devlink_reload_empr_start - Start EMP reset to activate new firmware
+ * @pf: pointer to the pf instance
+ * @extack: netlink extended ACK structure
+ *
+ * Allow user to activate new Embedded Management Processor firmware by
+ * issuing device specific EMP reset. Called in response to
+ * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE.
+ *
+ * Note that teardown and rebuild of the driver state happens automatically as
+ * part of an interrupt and watchdog task. This is because all physical
+ * functions on the device must be able to reset when an EMP reset occurs from
+ * any source.
+ */
+static int
+ice_devlink_reload_empr_start(struct ice_pf *pf,
+			      struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	u8 pending;
+	int err;
+
+	err = ice_get_pending_updates(pf, &pending, extack);
+	if (err)
+		return err;
+
+	/* pending is a bitmask of which flash banks have a pending update,
+	 * including the main NVM bank, the Option ROM bank, and the netlist
+	 * bank. If any of these bits are set, then there is a pending update
+	 * waiting to be activated.
+	 */
+	if (!pending) {
+		NL_SET_ERR_MSG_MOD(extack, "No pending firmware update");
+		return -ECANCELED;
+	}
+
+	if (pf->fw_emp_reset_disabled) {
+		NL_SET_ERR_MSG_MOD(extack, "EMP reset is not available. To activate firmware, a reboot or power cycle is needed");
+		return -ECANCELED;
+	}
+
+	dev_dbg(dev, "Issuing device EMP reset to activate firmware\n");
+
+	err = ice_aq_nvm_update_empr(hw);
+	if (err) {
+		dev_err(dev, "Failed to trigger EMP device reset to reload firmware, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to trigger EMP device reset to reload firmware");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_reload_down - prepare for reload
+ * @devlink: pointer to the devlink instance to reload
+ * @netns_change: if true, the network namespace is changing
+ * @action: the action to perform
+ * @limit: limits on what reload should do, such as not resetting
+ * @extack: netlink extended ACK structure
+ */
+static int
+ice_devlink_reload_down(struct devlink *devlink, bool netns_change,
+			enum devlink_reload_action action,
+			enum devlink_reload_limit limit,
+			struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	switch (action) {
+	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+		if (ice_is_eswitch_mode_switchdev(pf)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Go to legacy mode before doing reinit\n");
+			return -EOPNOTSUPP;
+		}
+		if (ice_is_adq_active(pf)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Turn off ADQ before doing reinit\n");
+			return -EOPNOTSUPP;
+		}
+		if (ice_has_vfs(pf)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Remove all VFs before doing reinit\n");
+			return -EOPNOTSUPP;
+		}
+		ice_unload(pf);
+		return 0;
+	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+		return ice_devlink_reload_empr_start(pf, extack);
+	default:
+		WARN_ON(1);
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * ice_devlink_reload_empr_finish - Wait for EMP reset to finish
+ * @pf: pointer to the pf instance
+ * @extack: netlink extended ACK structure
+ *
+ * Wait for driver to finish rebuilding after EMP reset is completed. This
+ * includes time to wait for both the actual device reset as well as the time
+ * for the driver's rebuild to complete.
+ */
+static int
+ice_devlink_reload_empr_finish(struct ice_pf *pf,
+			       struct netlink_ext_ack *extack)
+{
+	int err;
+
+	err = ice_wait_for_reset(pf, 60 * HZ);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Device still resetting after 1 minute");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_opt_speed_str - convert speed to a string
+ * @speed: speed value
+ */
+static const char *ice_devlink_port_opt_speed_str(u8 speed)
+{
+	switch (speed & ICE_AQC_PORT_OPT_MAX_LANE_M) {
+	case ICE_AQC_PORT_OPT_MAX_LANE_100M:
+		return "0.1";
+	case ICE_AQC_PORT_OPT_MAX_LANE_1G:
+		return "1";
+	case ICE_AQC_PORT_OPT_MAX_LANE_2500M:
+		return "2.5";
+	case ICE_AQC_PORT_OPT_MAX_LANE_5G:
+		return "5";
+	case ICE_AQC_PORT_OPT_MAX_LANE_10G:
+		return "10";
+	case ICE_AQC_PORT_OPT_MAX_LANE_25G:
+		return "25";
+	case ICE_AQC_PORT_OPT_MAX_LANE_50G:
+		return "50";
+	case ICE_AQC_PORT_OPT_MAX_LANE_100G:
+		return "100";
+	}
+
+	return "-";
+}
+
+#define ICE_PORT_OPT_DESC_LEN	50
+/**
+ * ice_devlink_port_options_print - Print available port split options
+ * @pf: the PF to print split port options
+ *
+ * Prints a table with available port split options and max port speeds
+ */
+static void ice_devlink_port_options_print(struct ice_pf *pf)
+{
+	u8 i, j, options_count, cnt, speed, pending_idx, active_idx;
+	struct ice_aqc_get_port_options_elem *options, *opt;
+	struct device *dev = ice_pf_to_dev(pf);
+	bool active_valid, pending_valid;
+	char desc[ICE_PORT_OPT_DESC_LEN];
+	const char *str;
+	int status;
+
+	options = kcalloc(ICE_AQC_PORT_OPT_MAX * ICE_MAX_PORT_PER_PCI_DEV,
+			  sizeof(*options), GFP_KERNEL);
+	if (!options)
+		return;
+
+	for (i = 0; i < ICE_MAX_PORT_PER_PCI_DEV; i++) {
+		opt = options + i * ICE_AQC_PORT_OPT_MAX;
+		options_count = ICE_AQC_PORT_OPT_MAX;
+		active_valid = 0;
+
+		status = ice_aq_get_port_options(&pf->hw, opt, &options_count,
+						 i, true, &active_idx,
+						 &active_valid, &pending_idx,
+						 &pending_valid);
+		if (status) {
+			dev_dbg(dev, "Couldn't read port option for port %d, err %d\n",
+				i, status);
+			goto err;
+		}
+	}
+
+	dev_dbg(dev, "Available port split options and max port speeds (Gbps):\n");
+	dev_dbg(dev, "Status  Split      Quad 0          Quad 1\n");
+	dev_dbg(dev, "        count  L0  L1  L2  L3  L4  L5  L6  L7\n");
+
+	for (i = 0; i < options_count; i++) {
+		cnt = 0;
+
+		if (i == ice_active_port_option)
+			str = "Active";
+		else if ((i == pending_idx) && pending_valid)
+			str = "Pending";
+		else
+			str = "";
+
+		cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt,
+				"%-8s", str);
+
+		cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt,
+				"%-6u", options[i].pmd);
+
+		for (j = 0; j < ICE_MAX_PORT_PER_PCI_DEV; ++j) {
+			speed = options[i + j * ICE_AQC_PORT_OPT_MAX].max_lane_speed;
+			str = ice_devlink_port_opt_speed_str(speed);
+			cnt += snprintf(&desc[cnt], ICE_PORT_OPT_DESC_LEN - cnt,
+					"%3s ", str);
+		}
+
+		dev_dbg(dev, "%s\n", desc);
+	}
+
+err:
+	kfree(options);
+}
+
+/**
+ * ice_devlink_aq_set_port_option - Send set port option admin queue command
+ * @pf: the PF to print split port options
+ * @option_idx: selected port option
+ * @extack: extended netdev ack structure
+ *
+ * Sends set port option admin queue command with selected port option and
+ * calls NVM write activate.
+ */
+static int
+ice_devlink_aq_set_port_option(struct ice_pf *pf, u8 option_idx,
+			       struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int status;
+
+	status = ice_aq_set_port_option(&pf->hw, 0, true, option_idx);
+	if (status) {
+		dev_dbg(dev, "ice_aq_set_port_option, err %d aq_err %d\n",
+			status, pf->hw.adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Port split request failed");
+		return -EIO;
+	}
+
+	status = ice_acquire_nvm(&pf->hw, ICE_RES_WRITE);
+	if (status) {
+		dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+			status, pf->hw.adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+		return -EIO;
+	}
+
+	status = ice_nvm_write_activate(&pf->hw, ICE_AQC_NVM_ACTIV_REQ_EMPR, NULL);
+	if (status) {
+		dev_dbg(dev, "ice_nvm_write_activate failed, err %d aq_err %d\n",
+			status, pf->hw.adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Port split request failed to save data");
+		ice_release_nvm(&pf->hw);
+		return -EIO;
+	}
+
+	ice_release_nvm(&pf->hw);
+
+	NL_SET_ERR_MSG_MOD(extack, "Reboot required to finish port split");
+	return 0;
+}
+
+/**
+ * ice_devlink_port_split - .port_split devlink handler
+ * @devlink: devlink instance structure
+ * @port: devlink port structure
+ * @count: number of ports to split to
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .port_split operation.
+ *
+ * Unfortunately, the devlink expression of available options is limited
+ * to just a number, so search for an FW port option which supports
+ * the specified number. As there could be multiple FW port options with
+ * the same port split count, allow switching between them. When the same
+ * port split count request is issued again, switch to the next FW port
+ * option with the same port split count.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_split(struct devlink *devlink, struct devlink_port *port,
+		       unsigned int count, struct netlink_ext_ack *extack)
+{
+	struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX];
+	u8 i, j, active_idx, pending_idx, new_option;
+	struct ice_pf *pf = devlink_priv(devlink);
+	u8 option_count = ICE_AQC_PORT_OPT_MAX;
+	struct device *dev = ice_pf_to_dev(pf);
+	bool active_valid, pending_valid;
+	int status;
+
+	status = ice_aq_get_port_options(&pf->hw, options, &option_count,
+					 0, true, &active_idx, &active_valid,
+					 &pending_idx, &pending_valid);
+	if (status) {
+		dev_dbg(dev, "Couldn't read port split options, err = %d\n",
+			status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to get available port split options");
+		return -EIO;
+	}
+
+	new_option = ICE_AQC_PORT_OPT_MAX;
+	active_idx = pending_valid ? pending_idx : active_idx;
+	for (i = 1; i <= option_count; i++) {
+		/* In order to allow switching between FW port options with
+		 * the same port split count, search for a new option starting
+		 * from the active/pending option (with array wrap around).
+		 */
+		j = (active_idx + i) % option_count;
+
+		if (count == options[j].pmd) {
+			new_option = j;
+			break;
+		}
+	}
+
+	if (new_option == active_idx) {
+		dev_dbg(dev, "request to split: count: %u is already set and there are no other options\n",
+			count);
+		NL_SET_ERR_MSG_MOD(extack, "Requested split count is already set");
+		ice_devlink_port_options_print(pf);
+		return -EINVAL;
+	}
+
+	if (new_option == ICE_AQC_PORT_OPT_MAX) {
+		dev_dbg(dev, "request to split: count: %u not found\n", count);
+		NL_SET_ERR_MSG_MOD(extack, "Port split requested unsupported port config");
+		ice_devlink_port_options_print(pf);
+		return -EINVAL;
+	}
+
+	status = ice_devlink_aq_set_port_option(pf, new_option, extack);
+	if (status)
+		return status;
+
+	ice_devlink_port_options_print(pf);
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_unsplit - .port_unsplit devlink handler
+ * @devlink: devlink instance structure
+ * @port: devlink port structure
+ * @extack: extended netdev ack structure
+ *
+ * Callback for the devlink .port_unsplit operation.
+ * Calls ice_devlink_port_split with split count set to 1.
+ * There could be no FW option available with split count 1.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port,
+			 struct netlink_ext_ack *extack)
+{
+	return ice_devlink_port_split(devlink, port, 1, extack);
+}
+
+/**
+ * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree
+ * @pf: pf struct
+ *
+ * This function tears down tree exported during VF's creation.
+ */
+void ice_tear_down_devlink_rate_tree(struct ice_pf *pf)
+{
+	struct devlink *devlink;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	devlink = priv_to_devlink(pf);
+
+	devl_lock(devlink);
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf) {
+		if (vf->devlink_port.devlink_rate)
+			devl_rate_leaf_destroy(&vf->devlink_port);
+	}
+	mutex_unlock(&pf->vfs.table_lock);
+
+	devl_rate_nodes_destroy(devlink);
+	devl_unlock(devlink);
+}
+
+/**
+ * ice_enable_custom_tx - try to enable custom Tx feature
+ * @pf: pf struct
+ *
+ * This function tries to enable custom Tx feature,
+ * it's not possible to enable it, if DCB or ADQ is active.
+ */
+static bool ice_enable_custom_tx(struct ice_pf *pf)
+{
+	struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info;
+	struct device *dev = ice_pf_to_dev(pf);
+
+	if (pi->is_custom_tx_enabled)
+		/* already enabled, return true */
+		return true;
+
+	if (ice_is_adq_active(pf)) {
+		dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n");
+		return false;
+	}
+
+	if (ice_is_dcb_active(pf)) {
+		dev_err(dev, "DCB active, can't modify Tx scheduler tree\n");
+		return false;
+	}
+
+	pi->is_custom_tx_enabled = true;
+
+	return true;
+}
+
+/**
+ * ice_traverse_tx_tree - traverse Tx scheduler tree
+ * @devlink: devlink struct
+ * @node: current node, used for recursion
+ * @tc_node: tc_node struct, that is treated as a root
+ * @pf: pf struct
+ *
+ * This function traverses Tx scheduler tree and exports
+ * entire structure to the devlink-rate.
+ */
+static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node,
+				 struct ice_sched_node *tc_node, struct ice_pf *pf)
+{
+	struct devlink_rate *rate_node = NULL;
+	struct ice_vf *vf;
+	int i;
+
+	if (node->parent == tc_node) {
+		/* create root node */
+		rate_node = devl_rate_node_create(devlink, node, node->name, NULL);
+	} else if (node->vsi_handle &&
+		   pf->vsi[node->vsi_handle]->vf) {
+		vf = pf->vsi[node->vsi_handle]->vf;
+		if (!vf->devlink_port.devlink_rate)
+			/* leaf nodes doesn't have children
+			 * so we don't set rate_node
+			 */
+			devl_rate_leaf_create(&vf->devlink_port, node,
+					      node->parent->rate_node);
+	} else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF &&
+		   node->parent->rate_node) {
+		rate_node = devl_rate_node_create(devlink, node, node->name,
+						  node->parent->rate_node);
+	}
+
+	if (rate_node && !IS_ERR(rate_node))
+		node->rate_node = rate_node;
+
+	for (i = 0; i < node->num_children; i++)
+		ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf);
+}
+
+/**
+ * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate
+ * @devlink: devlink struct
+ * @vsi: main vsi struct
+ *
+ * This function finds a root node, then calls ice_traverse_tx tree, which
+ * traverses the tree and exports it's contents to devlink rate.
+ */
+int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi)
+{
+	struct ice_port_info *pi = vsi->port_info;
+	struct ice_sched_node *tc_node;
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	tc_node = pi->root->children[0];
+	mutex_lock(&pi->sched_lock);
+	devl_lock(devlink);
+	for (i = 0; i < tc_node->num_children; i++)
+		ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf);
+	devl_unlock(devlink);
+	mutex_unlock(&pi->sched_lock);
+
+	return 0;
+}
+
+/**
+ * ice_set_object_tx_share - sets node scheduling parameter
+ * @pi: devlink struct instance
+ * @node: node struct instance
+ * @bw: bandwidth in bytes per second
+ * @extack: extended netdev ack structure
+ *
+ * This function sets ICE_MIN_BW scheduling BW limit.
+ */
+static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node,
+				   u64 bw, struct netlink_ext_ack *extack)
+{
+	int status;
+
+	mutex_lock(&pi->sched_lock);
+	/* converts bytes per second to kilo bits per second */
+	node->tx_share = div_u64(bw, 125);
+	status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share);
+	mutex_unlock(&pi->sched_lock);
+
+	if (status)
+		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share");
+
+	return status;
+}
+
+/**
+ * ice_set_object_tx_max - sets node scheduling parameter
+ * @pi: devlink struct instance
+ * @node: node struct instance
+ * @bw: bandwidth in bytes per second
+ * @extack: extended netdev ack structure
+ *
+ * This function sets ICE_MAX_BW scheduling BW limit.
+ */
+static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node,
+				 u64 bw, struct netlink_ext_ack *extack)
+{
+	int status;
+
+	mutex_lock(&pi->sched_lock);
+	/* converts bytes per second value to kilo bits per second */
+	node->tx_max = div_u64(bw, 125);
+	status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max);
+	mutex_unlock(&pi->sched_lock);
+
+	if (status)
+		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max");
+
+	return status;
+}
+
+/**
+ * ice_set_object_tx_priority - sets node scheduling parameter
+ * @pi: devlink struct instance
+ * @node: node struct instance
+ * @priority: value representing priority for strict priority arbitration
+ * @extack: extended netdev ack structure
+ *
+ * This function sets priority of node among siblings.
+ */
+static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node,
+				      u32 priority, struct netlink_ext_ack *extack)
+{
+	int status;
+
+	if (priority >= 8) {
+		NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8");
+		return -EINVAL;
+	}
+
+	mutex_lock(&pi->sched_lock);
+	node->tx_priority = priority;
+	status = ice_sched_set_node_priority(pi, node, node->tx_priority);
+	mutex_unlock(&pi->sched_lock);
+
+	if (status)
+		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority");
+
+	return status;
+}
+
+/**
+ * ice_set_object_tx_weight - sets node scheduling parameter
+ * @pi: devlink struct instance
+ * @node: node struct instance
+ * @weight: value represeting relative weight for WFQ arbitration
+ * @extack: extended netdev ack structure
+ *
+ * This function sets node weight for WFQ algorithm.
+ */
+static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node,
+				    u32 weight, struct netlink_ext_ack *extack)
+{
+	int status;
+
+	if (weight > 200 || weight < 1) {
+		NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200");
+		return -EINVAL;
+	}
+
+	mutex_lock(&pi->sched_lock);
+	node->tx_weight = weight;
+	status = ice_sched_set_node_weight(pi, node, node->tx_weight);
+	mutex_unlock(&pi->sched_lock);
+
+	if (status)
+		NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight");
+
+	return status;
+}
+
+/**
+ * ice_get_pi_from_dev_rate - get port info from devlink_rate
+ * @rate_node: devlink struct instance
+ *
+ * This function returns corresponding port_info struct of devlink_rate
+ */
+static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node)
+{
+	struct ice_pf *pf = devlink_priv(rate_node->devlink);
+
+	return ice_get_main_vsi(pf)->port_info;
+}
+
+static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+				     struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node;
+	struct ice_port_info *pi;
+
+	pi = ice_get_pi_from_dev_rate(rate_node);
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
+		return -EBUSY;
+
+	/* preallocate memory for ice_sched_node */
+	node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL);
+	*priv = node;
+
+	return 0;
+}
+
+static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+				     struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node, *tc_node;
+	struct ice_port_info *pi;
+
+	pi = ice_get_pi_from_dev_rate(rate_node);
+	tc_node = pi->root->children[0];
+	node = priv;
+
+	if (!rate_node->parent || !node || tc_node == node || !extack)
+		return 0;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
+		return -EBUSY;
+
+	/* can't allow to delete a node with children */
+	if (node->num_children)
+		return -EINVAL;
+
+	mutex_lock(&pi->sched_lock);
+	ice_free_sched_node(pi, node);
+	mutex_unlock(&pi->sched_lock);
+
+	return 0;
+}
+
+static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+					    u64 tx_max, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf),
+				     node, tx_max, extack);
+}
+
+static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+					      u64 tx_share, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node,
+				       tx_share, extack);
+}
+
+static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv,
+						 u32 tx_priority, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node,
+					  tx_priority, extack);
+}
+
+static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv,
+					       u32 tx_weight, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node,
+					tx_weight, extack);
+}
+
+static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+					    u64 tx_max, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node),
+				     node, tx_max, extack);
+}
+
+static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+					      u64 tx_share, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node),
+				       node, tx_share, extack);
+}
+
+static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv,
+						 u32 tx_priority, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node),
+					  node, tx_priority, extack);
+}
+
+static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv,
+					       u32 tx_weight, struct netlink_ext_ack *extack)
+{
+	struct ice_sched_node *node = priv;
+
+	if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink)))
+		return -EBUSY;
+
+	if (!node)
+		return 0;
+
+	return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node),
+					node, tx_weight, extack);
+}
+
+static int ice_devlink_set_parent(struct devlink_rate *devlink_rate,
+				  struct devlink_rate *parent,
+				  void *priv, void *parent_priv,
+				  struct netlink_ext_ack *extack)
+{
+	struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate);
+	struct ice_sched_node *tc_node, *node, *parent_node;
+	u16 num_nodes_added;
+	u32 first_node_teid;
+	u32 node_teid;
+	int status;
+
+	tc_node = pi->root->children[0];
+	node = priv;
+
+	if (!extack)
+		return 0;
+
+	if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink)))
+		return -EBUSY;
+
+	if (!parent) {
+		if (!node || tc_node == node || node->num_children)
+			return -EINVAL;
+
+		mutex_lock(&pi->sched_lock);
+		ice_free_sched_node(pi, node);
+		mutex_unlock(&pi->sched_lock);
+
+		return 0;
+	}
+
+	parent_node = parent_priv;
+
+	/* if the node doesn't exist, create it */
+	if (!node->parent) {
+		mutex_lock(&pi->sched_lock);
+		status = ice_sched_add_elems(pi, tc_node, parent_node,
+					     parent_node->tx_sched_layer + 1,
+					     1, &num_nodes_added, &first_node_teid,
+					     &node);
+		mutex_unlock(&pi->sched_lock);
+
+		if (status) {
+			NL_SET_ERR_MSG_MOD(extack, "Can't add a new node");
+			return status;
+		}
+
+		if (devlink_rate->tx_share)
+			ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack);
+		if (devlink_rate->tx_max)
+			ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack);
+		if (devlink_rate->tx_priority)
+			ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack);
+		if (devlink_rate->tx_weight)
+			ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack);
+	} else {
+		node_teid = le32_to_cpu(node->info.node_teid);
+		mutex_lock(&pi->sched_lock);
+		status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid);
+		mutex_unlock(&pi->sched_lock);
+
+		if (status)
+			NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent");
+	}
+
+	return status;
+}
+
+/**
+ * ice_devlink_reload_up - do reload up after reinit
+ * @devlink: pointer to the devlink instance reloading
+ * @action: the action requested
+ * @limit: limits imposed by userspace, such as not resetting
+ * @actions_performed: on return, indicate what actions actually performed
+ * @extack: netlink extended ACK structure
+ */
+static int
+ice_devlink_reload_up(struct devlink *devlink,
+		      enum devlink_reload_action action,
+		      enum devlink_reload_limit limit,
+		      u32 *actions_performed,
+		      struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	switch (action) {
+	case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
+		*actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
+		return ice_load(pf);
+	case DEVLINK_RELOAD_ACTION_FW_ACTIVATE:
+		*actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
+		return ice_devlink_reload_empr_finish(pf, extack);
+	default:
+		WARN_ON(1);
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct devlink_ops ice_devlink_ops = {
+	.supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
+	.reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) |
+			  BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+	.reload_down = ice_devlink_reload_down,
+	.reload_up = ice_devlink_reload_up,
+	.eswitch_mode_get = ice_eswitch_mode_get,
+	.eswitch_mode_set = ice_eswitch_mode_set,
+	.info_get = ice_devlink_info_get,
+	.flash_update = ice_devlink_flash_update,
+
+	.rate_node_new = ice_devlink_rate_node_new,
+	.rate_node_del = ice_devlink_rate_node_del,
+
+	.rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set,
+	.rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set,
+	.rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set,
+	.rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set,
+
+	.rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set,
+	.rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set,
+	.rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set,
+	.rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set,
+
+	.rate_leaf_parent_set = ice_devlink_set_parent,
+	.rate_node_parent_set = ice_devlink_set_parent,
+};
+
+static int
+ice_devlink_enable_roce_get(struct devlink *devlink, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? true : false;
+
+	return 0;
+}
+
+static int
+ice_devlink_enable_roce_set(struct devlink *devlink, u32 id,
+			    struct devlink_param_gset_ctx *ctx)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	bool roce_ena = ctx->val.vbool;
+	int ret;
+
+	if (!roce_ena) {
+		ice_unplug_aux_dev(pf);
+		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
+		return 0;
+	}
+
+	pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2;
+	ret = ice_plug_aux_dev(pf);
+	if (ret)
+		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_ROCEV2;
+
+	return ret;
+}
+
+static int
+ice_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
+				 union devlink_param_value val,
+				 struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP) {
+		NL_SET_ERR_MSG_MOD(extack, "iWARP is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+ice_devlink_enable_iw_get(struct devlink *devlink, u32 id,
+			  struct devlink_param_gset_ctx *ctx)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	ctx->val.vbool = pf->rdma_mode & IIDC_RDMA_PROTOCOL_IWARP;
+
+	return 0;
+}
+
+static int
+ice_devlink_enable_iw_set(struct devlink *devlink, u32 id,
+			  struct devlink_param_gset_ctx *ctx)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	bool iw_ena = ctx->val.vbool;
+	int ret;
+
+	if (!iw_ena) {
+		ice_unplug_aux_dev(pf);
+		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
+		return 0;
+	}
+
+	pf->rdma_mode |= IIDC_RDMA_PROTOCOL_IWARP;
+	ret = ice_plug_aux_dev(pf);
+	if (ret)
+		pf->rdma_mode &= ~IIDC_RDMA_PROTOCOL_IWARP;
+
+	return ret;
+}
+
+static int
+ice_devlink_enable_iw_validate(struct devlink *devlink, u32 id,
+			       union devlink_param_value val,
+			       struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	if (!test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	if (pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2) {
+		NL_SET_ERR_MSG_MOD(extack, "RoCEv2 is currently enabled. This device cannot enable iWARP and RoCEv2 simultaneously");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct devlink_param ice_devlink_params[] = {
+	DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			      ice_devlink_enable_roce_get,
+			      ice_devlink_enable_roce_set,
+			      ice_devlink_enable_roce_validate),
+	DEVLINK_PARAM_GENERIC(ENABLE_IWARP, BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			      ice_devlink_enable_iw_get,
+			      ice_devlink_enable_iw_set,
+			      ice_devlink_enable_iw_validate),
+
+};
+
+static void ice_devlink_free(void *devlink_ptr)
+{
+	devlink_free((struct devlink *)devlink_ptr);
+}
+
+/**
+ * ice_allocate_pf - Allocate devlink and return PF structure pointer
+ * @dev: the device to allocate for
+ *
+ * Allocate a devlink instance for this device and return the private area as
+ * the PF structure. The devlink memory is kept track of through devres by
+ * adding an action to remove it when unwinding.
+ */
+struct ice_pf *ice_allocate_pf(struct device *dev)
+{
+	struct devlink *devlink;
+
+	devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev);
+	if (!devlink)
+		return NULL;
+
+	/* Add an action to teardown the devlink when unwinding the driver */
+	if (devm_add_action_or_reset(dev, ice_devlink_free, devlink))
+		return NULL;
+
+	return devlink_priv(devlink);
+}
+
+/**
+ * ice_devlink_register - Register devlink interface for this PF
+ * @pf: the PF to register the devlink for.
+ *
+ * Register the devlink instance associated with this physical function.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+void ice_devlink_register(struct ice_pf *pf)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+
+	devlink_register(devlink);
+}
+
+/**
+ * ice_devlink_unregister - Unregister devlink resources for this PF.
+ * @pf: the PF structure to cleanup
+ *
+ * Releases resources used by devlink and cleans up associated memory.
+ */
+void ice_devlink_unregister(struct ice_pf *pf)
+{
+	devlink_unregister(priv_to_devlink(pf));
+}
+
+/**
+ * ice_devlink_set_switch_id - Set unique switch id based on pci dsn
+ * @pf: the PF to create a devlink port for
+ * @ppid: struct with switch id information
+ */
+static void
+ice_devlink_set_switch_id(struct ice_pf *pf, struct netdev_phys_item_id *ppid)
+{
+	struct pci_dev *pdev = pf->pdev;
+	u64 id;
+
+	id = pci_get_dsn(pdev);
+
+	ppid->id_len = sizeof(id);
+	put_unaligned_be64(id, &ppid->id);
+}
+
+int ice_devlink_register_params(struct ice_pf *pf)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+
+	return devlink_params_register(devlink, ice_devlink_params,
+				       ARRAY_SIZE(ice_devlink_params));
+}
+
+void ice_devlink_unregister_params(struct ice_pf *pf)
+{
+	devlink_params_unregister(priv_to_devlink(pf), ice_devlink_params,
+				  ARRAY_SIZE(ice_devlink_params));
+}
+
+/**
+ * ice_devlink_set_port_split_options - Set port split options
+ * @pf: the PF to set port split options
+ * @attrs: devlink attributes
+ *
+ * Sets devlink port split options based on available FW port options
+ */
+static void
+ice_devlink_set_port_split_options(struct ice_pf *pf,
+				   struct devlink_port_attrs *attrs)
+{
+	struct ice_aqc_get_port_options_elem options[ICE_AQC_PORT_OPT_MAX];
+	u8 i, active_idx, pending_idx, option_count = ICE_AQC_PORT_OPT_MAX;
+	bool active_valid, pending_valid;
+	int status;
+
+	status = ice_aq_get_port_options(&pf->hw, options, &option_count,
+					 0, true, &active_idx, &active_valid,
+					 &pending_idx, &pending_valid);
+	if (status) {
+		dev_dbg(ice_pf_to_dev(pf), "Couldn't read port split options, err = %d\n",
+			status);
+		return;
+	}
+
+	/* find the biggest available port split count */
+	for (i = 0; i < option_count; i++)
+		attrs->lanes = max_t(int, attrs->lanes, options[i].pmd);
+
+	attrs->splittable = attrs->lanes ? 1 : 0;
+	ice_active_port_option = active_idx;
+}
+
+static const struct devlink_port_ops ice_devlink_port_ops = {
+	.port_split = ice_devlink_port_split,
+	.port_unsplit = ice_devlink_port_unsplit,
+};
+
+/**
+ * ice_devlink_create_pf_port - Create a devlink port for this PF
+ * @pf: the PF to create a devlink port for
+ *
+ * Create and register a devlink_port for this PF.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_pf_port(struct ice_pf *pf)
+{
+	struct devlink_port_attrs attrs = {};
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+
+	devlink_port = &pf->devlink_port;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return -EIO;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+	attrs.phys.port_number = pf->hw.bus.func;
+
+	/* As FW supports only port split options for whole device,
+	 * set port split options only for first PF.
+	 */
+	if (pf->hw.pf_id == 0)
+		ice_devlink_set_port_split_options(pf, &attrs);
+
+	ice_devlink_set_switch_id(pf, &attrs.switch_id);
+
+	devlink_port_attrs_set(devlink_port, &attrs);
+	devlink = priv_to_devlink(pf);
+
+	err = devlink_port_register_with_ops(devlink, devlink_port, vsi->idx,
+					     &ice_devlink_port_ops);
+	if (err) {
+		dev_err(dev, "Failed to create devlink port for PF %d, error %d\n",
+			pf->hw.pf_id, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF
+ * @pf: the PF to cleanup
+ *
+ * Unregisters the devlink_port structure associated with this PF.
+ */
+void ice_devlink_destroy_pf_port(struct ice_pf *pf)
+{
+	devlink_port_unregister(&pf->devlink_port);
+}
+
+/**
+ * ice_devlink_create_vf_port - Create a devlink port for this VF
+ * @vf: the VF to create a port for
+ *
+ * Create and register a devlink_port for this VF.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_vf_port(struct ice_vf *vf)
+{
+	struct devlink_port_attrs attrs = {};
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	struct ice_pf *pf;
+	int err;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	devlink_port = &vf->devlink_port;
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi)
+		return -EINVAL;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF;
+	attrs.pci_vf.pf = pf->hw.bus.func;
+	attrs.pci_vf.vf = vf->vf_id;
+
+	ice_devlink_set_switch_id(pf, &attrs.switch_id);
+
+	devlink_port_attrs_set(devlink_port, &attrs);
+	devlink = priv_to_devlink(pf);
+
+	err = devlink_port_register(devlink, devlink_port, vsi->idx);
+	if (err) {
+		dev_err(dev, "Failed to create devlink port for VF %d, error %d\n",
+			vf->vf_id, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF
+ * @vf: the VF to cleanup
+ *
+ * Unregisters the devlink_port structure associated with this VF.
+ */
+void ice_devlink_destroy_vf_port(struct ice_vf *vf)
+{
+	devl_rate_leaf_destroy(&vf->devlink_port);
+	devlink_port_unregister(&vf->devlink_port);
+}
+
+#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
+
+static const struct devlink_region_ops ice_nvm_region_ops;
+static const struct devlink_region_ops ice_sram_region_ops;
+
+/**
+ * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents
+ * @devlink: the devlink instance
+ * @ops: the devlink region to snapshot
+ * @extack: extended ACK response structure
+ * @data: on exit points to snapshot data buffer
+ *
+ * This function is called in response to a DEVLINK_CMD_REGION_NEW for either
+ * the nvm-flash or shadow-ram region.
+ *
+ * It captures a snapshot of the NVM or Shadow RAM flash contents. This
+ * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink
+ * interface.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int ice_devlink_nvm_snapshot(struct devlink *devlink,
+				    const struct devlink_region_ops *ops,
+				    struct netlink_ext_ack *extack, u8 **data)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	bool read_shadow_ram;
+	u8 *nvm_data, *tmp, i;
+	u32 nvm_size, left;
+	s8 num_blks;
+	int status;
+
+	if (ops == &ice_nvm_region_ops) {
+		read_shadow_ram = false;
+		nvm_size = hw->flash.flash_size;
+	} else if (ops == &ice_sram_region_ops) {
+		read_shadow_ram = true;
+		nvm_size = hw->flash.sr_words * 2u;
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
+		return -EOPNOTSUPP;
+	}
+
+	nvm_data = vzalloc(nvm_size);
+	if (!nvm_data)
+		return -ENOMEM;
+
+	num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE);
+	tmp = nvm_data;
+	left = nvm_size;
+
+	/* Some systems take longer to read the NVM than others which causes the
+	 * FW to reclaim the NVM lock before the entire NVM has been read. Fix
+	 * this by breaking the reads of the NVM into smaller chunks that will
+	 * probably not take as long. This has some overhead since we are
+	 * increasing the number of AQ commands, but it should always work
+	 */
+	for (i = 0; i < num_blks; i++) {
+		u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left);
+
+		status = ice_acquire_nvm(hw, ICE_RES_READ);
+		if (status) {
+			dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+				status, hw->adminq.sq_last_status);
+			NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+			vfree(nvm_data);
+			return -EIO;
+		}
+
+		status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE,
+					   &read_sz, tmp, read_shadow_ram);
+		if (status) {
+			dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
+				read_sz, status, hw->adminq.sq_last_status);
+			NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+			ice_release_nvm(hw);
+			vfree(nvm_data);
+			return -EIO;
+		}
+		ice_release_nvm(hw);
+
+		tmp += read_sz;
+		left -= read_sz;
+	}
+
+	*data = nvm_data;
+
+	return 0;
+}
+
+/**
+ * ice_devlink_nvm_read - Read a portion of NVM flash contents
+ * @devlink: the devlink instance
+ * @ops: the devlink region to snapshot
+ * @extack: extended ACK response structure
+ * @offset: the offset to start at
+ * @size: the amount to read
+ * @data: the data buffer to read into
+ *
+ * This function is called in response to DEVLINK_CMD_REGION_READ to directly
+ * read a section of the NVM contents.
+ *
+ * It reads from either the nvm-flash or shadow-ram region contents.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int ice_devlink_nvm_read(struct devlink *devlink,
+				const struct devlink_region_ops *ops,
+				struct netlink_ext_ack *extack,
+				u64 offset, u32 size, u8 *data)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	bool read_shadow_ram;
+	u64 nvm_size;
+	int status;
+
+	if (ops == &ice_nvm_region_ops) {
+		read_shadow_ram = false;
+		nvm_size = hw->flash.flash_size;
+	} else if (ops == &ice_sram_region_ops) {
+		read_shadow_ram = true;
+		nvm_size = hw->flash.sr_words * 2u;
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
+		return -EOPNOTSUPP;
+	}
+
+	if (offset + size >= nvm_size) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size");
+		return -ERANGE;
+	}
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status) {
+		dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+		return -EIO;
+	}
+
+	status = ice_read_flat_nvm(hw, (u32)offset, &size, data,
+				   read_shadow_ram);
+	if (status) {
+		dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
+			size, status, hw->adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+		ice_release_nvm(hw);
+		return -EIO;
+	}
+	ice_release_nvm(hw);
+
+	return 0;
+}
+
+/**
+ * ice_devlink_devcaps_snapshot - Capture snapshot of device capabilities
+ * @devlink: the devlink instance
+ * @ops: the devlink region being snapshotted
+ * @extack: extended ACK response structure
+ * @data: on exit points to snapshot data buffer
+ *
+ * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
+ * the device-caps devlink region. It captures a snapshot of the device
+ * capabilities reported by firmware.
+ *
+ * @returns zero on success, and updates the data pointer. Returns a non-zero
+ * error code on failure.
+ */
+static int
+ice_devlink_devcaps_snapshot(struct devlink *devlink,
+			     const struct devlink_region_ops *ops,
+			     struct netlink_ext_ack *extack, u8 **data)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	void *devcaps;
+	int status;
+
+	devcaps = vzalloc(ICE_AQ_MAX_BUF_LEN);
+	if (!devcaps)
+		return -ENOMEM;
+
+	status = ice_aq_list_caps(hw, devcaps, ICE_AQ_MAX_BUF_LEN, NULL,
+				  ice_aqc_opc_list_dev_caps, NULL);
+	if (status) {
+		dev_dbg(dev, "ice_aq_list_caps: failed to read device capabilities, err %d aq_err %d\n",
+			status, hw->adminq.sq_last_status);
+		NL_SET_ERR_MSG_MOD(extack, "Failed to read device capabilities");
+		vfree(devcaps);
+		return status;
+	}
+
+	*data = (u8 *)devcaps;
+
+	return 0;
+}
+
+static const struct devlink_region_ops ice_nvm_region_ops = {
+	.name = "nvm-flash",
+	.destructor = vfree,
+	.snapshot = ice_devlink_nvm_snapshot,
+	.read = ice_devlink_nvm_read,
+};
+
+static const struct devlink_region_ops ice_sram_region_ops = {
+	.name = "shadow-ram",
+	.destructor = vfree,
+	.snapshot = ice_devlink_nvm_snapshot,
+	.read = ice_devlink_nvm_read,
+};
+
+static const struct devlink_region_ops ice_devcaps_region_ops = {
+	.name = "device-caps",
+	.destructor = vfree,
+	.snapshot = ice_devlink_devcaps_snapshot,
+};
+
+/**
+ * ice_devlink_init_regions - Initialize devlink regions
+ * @pf: the PF device structure
+ *
+ * Create devlink regions used to enable access to dump the contents of the
+ * flash memory on the device.
+ */
+void ice_devlink_init_regions(struct ice_pf *pf)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+	struct device *dev = ice_pf_to_dev(pf);
+	u64 nvm_size, sram_size;
+
+	nvm_size = pf->hw.flash.flash_size;
+	pf->nvm_region = devlink_region_create(devlink, &ice_nvm_region_ops, 1,
+					       nvm_size);
+	if (IS_ERR(pf->nvm_region)) {
+		dev_err(dev, "failed to create NVM devlink region, err %ld\n",
+			PTR_ERR(pf->nvm_region));
+		pf->nvm_region = NULL;
+	}
+
+	sram_size = pf->hw.flash.sr_words * 2u;
+	pf->sram_region = devlink_region_create(devlink, &ice_sram_region_ops,
+						1, sram_size);
+	if (IS_ERR(pf->sram_region)) {
+		dev_err(dev, "failed to create shadow-ram devlink region, err %ld\n",
+			PTR_ERR(pf->sram_region));
+		pf->sram_region = NULL;
+	}
+
+	pf->devcaps_region = devlink_region_create(devlink,
+						   &ice_devcaps_region_ops, 10,
+						   ICE_AQ_MAX_BUF_LEN);
+	if (IS_ERR(pf->devcaps_region)) {
+		dev_err(dev, "failed to create device-caps devlink region, err %ld\n",
+			PTR_ERR(pf->devcaps_region));
+		pf->devcaps_region = NULL;
+	}
+}
+
+/**
+ * ice_devlink_destroy_regions - Destroy devlink regions
+ * @pf: the PF device structure
+ *
+ * Remove previously created regions for this PF.
+ */
+void ice_devlink_destroy_regions(struct ice_pf *pf)
+{
+	if (pf->nvm_region)
+		devlink_region_destroy(pf->nvm_region);
+
+	if (pf->sram_region)
+		devlink_region_destroy(pf->sram_region);
+
+	if (pf->devcaps_region)
+		devlink_region_destroy(pf->devcaps_region);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h
new file mode 100644
index 0000000000..6ec96779f5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_DEVLINK_H_
+#define _ICE_DEVLINK_H_
+
+struct ice_pf *ice_allocate_pf(struct device *dev);
+
+void ice_devlink_register(struct ice_pf *pf);
+void ice_devlink_unregister(struct ice_pf *pf);
+int ice_devlink_register_params(struct ice_pf *pf);
+void ice_devlink_unregister_params(struct ice_pf *pf);
+int ice_devlink_create_pf_port(struct ice_pf *pf);
+void ice_devlink_destroy_pf_port(struct ice_pf *pf);
+int ice_devlink_create_vf_port(struct ice_vf *vf);
+void ice_devlink_destroy_vf_port(struct ice_vf *vf);
+
+void ice_devlink_init_regions(struct ice_pf *pf);
+void ice_devlink_destroy_regions(struct ice_pf *pf);
+
+int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi);
+void ice_tear_down_devlink_rate_tree(struct ice_pf *pf);
+
+#endif /* _ICE_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
new file mode 100644
index 0000000000..a655d499ab
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_eswitch.h"
+#include "ice_eswitch_br.h"
+#include "ice_fltr.h"
+#include "ice_repr.h"
+#include "ice_devlink.h"
+#include "ice_tc_lib.h"
+
+/**
+ * ice_eswitch_add_vf_sp_rule - add adv rule with VF's VSI index
+ * @pf: pointer to PF struct
+ * @vf: pointer to VF struct
+ *
+ * This function adds advanced rule that forwards packets with
+ * VF's VSI index to the corresponding switchdev ctrl VSI queue.
+ */
+static int
+ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf)
+{
+	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	struct ice_adv_rule_info rule_info = { 0 };
+	struct ice_adv_lkup_elem *list;
+	struct ice_hw *hw = &pf->hw;
+	const u16 lkups_cnt = 1;
+	int err;
+
+	list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+	if (!list)
+		return -ENOMEM;
+
+	ice_rule_add_src_vsi_metadata(list);
+
+	rule_info.sw_act.flag = ICE_FLTR_TX;
+	rule_info.sw_act.vsi_handle = ctrl_vsi->idx;
+	rule_info.sw_act.fltr_act = ICE_FWD_TO_Q;
+	rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id +
+				       ctrl_vsi->rxq_map[vf->vf_id];
+	rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE;
+	rule_info.flags_info.act_valid = true;
+	rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN;
+	rule_info.src_vsi = vf->lan_vsi_idx;
+
+	err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info,
+			       &vf->repr->sp_rule);
+	if (err)
+		dev_err(ice_pf_to_dev(pf), "Unable to add VF slow-path rule in switchdev mode for VF %d",
+			vf->vf_id);
+
+	kfree(list);
+	return err;
+}
+
+/**
+ * ice_eswitch_del_vf_sp_rule - delete adv rule with VF's VSI index
+ * @vf: pointer to the VF struct
+ *
+ * Delete the advanced rule that was used to forward packets with the VF's VSI
+ * index to the corresponding switchdev ctrl VSI queue.
+ */
+static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf)
+{
+	if (!vf->repr)
+		return;
+
+	ice_rem_adv_rule_by_id(&vf->pf->hw, &vf->repr->sp_rule);
+}
+
+/**
+ * ice_eswitch_setup_env - configure switchdev HW filters
+ * @pf: pointer to PF struct
+ *
+ * This function adds HW filters configuration specific for switchdev
+ * mode.
+ */
+static int ice_eswitch_setup_env(struct ice_pf *pf)
+{
+	struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
+	struct net_device *uplink_netdev = uplink_vsi->netdev;
+	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	struct ice_vsi_vlan_ops *vlan_ops;
+	bool rule_added = false;
+
+	ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx);
+
+	netif_addr_lock_bh(uplink_netdev);
+	__dev_uc_unsync(uplink_netdev, NULL);
+	__dev_mc_unsync(uplink_netdev, NULL);
+	netif_addr_unlock_bh(uplink_netdev);
+
+	if (ice_vsi_add_vlan_zero(uplink_vsi))
+		goto err_def_rx;
+
+	if (!ice_is_dflt_vsi_in_use(uplink_vsi->port_info)) {
+		if (ice_set_dflt_vsi(uplink_vsi))
+			goto err_def_rx;
+		rule_added = true;
+	}
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
+	if (vlan_ops->dis_rx_filtering(uplink_vsi))
+		goto err_dis_rx;
+
+	if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override))
+		goto err_override_uplink;
+
+	if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override))
+		goto err_override_control;
+
+	if (ice_vsi_update_local_lb(uplink_vsi, true))
+		goto err_override_local_lb;
+
+	return 0;
+
+err_override_local_lb:
+	ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
+err_override_control:
+	ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
+err_override_uplink:
+	vlan_ops->ena_rx_filtering(uplink_vsi);
+err_dis_rx:
+	if (rule_added)
+		ice_clear_dflt_vsi(uplink_vsi);
+err_def_rx:
+	ice_fltr_add_mac_and_broadcast(uplink_vsi,
+				       uplink_vsi->port_info->mac.perm_addr,
+				       ICE_FWD_TO_VSI);
+	return -ENODEV;
+}
+
+/**
+ * ice_eswitch_remap_rings_to_vectors - reconfigure rings of switchdev ctrl VSI
+ * @pf: pointer to PF struct
+ *
+ * In switchdev number of allocated Tx/Rx rings is equal.
+ *
+ * This function fills q_vectors structures associated with representor and
+ * move each ring pairs to port representor netdevs. Each port representor
+ * will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to
+ * number of VFs.
+ */
+static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = pf->switchdev.control_vsi;
+	int q_id;
+
+	ice_for_each_txq(vsi, q_id) {
+		struct ice_q_vector *q_vector;
+		struct ice_tx_ring *tx_ring;
+		struct ice_rx_ring *rx_ring;
+		struct ice_repr *repr;
+		struct ice_vf *vf;
+
+		vf = ice_get_vf_by_id(pf, q_id);
+		if (WARN_ON(!vf))
+			continue;
+
+		repr = vf->repr;
+		q_vector = repr->q_vector;
+		tx_ring = vsi->tx_rings[q_id];
+		rx_ring = vsi->rx_rings[q_id];
+
+		q_vector->vsi = vsi;
+		q_vector->reg_idx = vsi->q_vectors[0]->reg_idx;
+
+		q_vector->num_ring_tx = 1;
+		q_vector->tx.tx_ring = tx_ring;
+		tx_ring->q_vector = q_vector;
+		tx_ring->next = NULL;
+		tx_ring->netdev = repr->netdev;
+		/* In switchdev mode, from OS stack perspective, there is only
+		 * one queue for given netdev, so it needs to be indexed as 0.
+		 */
+		tx_ring->q_index = 0;
+
+		q_vector->num_ring_rx = 1;
+		q_vector->rx.rx_ring = rx_ring;
+		rx_ring->q_vector = q_vector;
+		rx_ring->next = NULL;
+		rx_ring->netdev = repr->netdev;
+
+		ice_put_vf(vf);
+	}
+}
+
+/**
+ * ice_eswitch_release_reprs - clear PR VSIs configuration
+ * @pf: poiner to PF struct
+ * @ctrl_vsi: pointer to switchdev control VSI
+ */
+static void
+ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	ice_for_each_vf(pf, bkt, vf) {
+		struct ice_vsi *vsi = vf->repr->src_vsi;
+
+		/* Skip VFs that aren't configured */
+		if (!vf->repr->dst)
+			continue;
+
+		ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+		metadata_dst_free(vf->repr->dst);
+		vf->repr->dst = NULL;
+		ice_eswitch_del_vf_sp_rule(vf);
+		ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
+					       ICE_FWD_TO_VSI);
+
+		netif_napi_del(&vf->repr->q_vector->napi);
+	}
+}
+
+/**
+ * ice_eswitch_setup_reprs - configure port reprs to run in switchdev mode
+ * @pf: pointer to PF struct
+ */
+static int ice_eswitch_setup_reprs(struct ice_pf *pf)
+{
+	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	int max_vsi_num = 0;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	ice_for_each_vf(pf, bkt, vf) {
+		struct ice_vsi *vsi = vf->repr->src_vsi;
+
+		ice_remove_vsi_fltr(&pf->hw, vsi->idx);
+		vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+						   GFP_KERNEL);
+		if (!vf->repr->dst) {
+			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
+						       ICE_FWD_TO_VSI);
+			goto err;
+		}
+
+		if (ice_eswitch_add_vf_sp_rule(pf, vf)) {
+			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
+						       ICE_FWD_TO_VSI);
+			goto err;
+		}
+
+		if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) {
+			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
+						       ICE_FWD_TO_VSI);
+			ice_eswitch_del_vf_sp_rule(vf);
+			metadata_dst_free(vf->repr->dst);
+			vf->repr->dst = NULL;
+			goto err;
+		}
+
+		if (ice_vsi_add_vlan_zero(vsi)) {
+			ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr,
+						       ICE_FWD_TO_VSI);
+			ice_eswitch_del_vf_sp_rule(vf);
+			metadata_dst_free(vf->repr->dst);
+			vf->repr->dst = NULL;
+			ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof);
+			goto err;
+		}
+
+		if (max_vsi_num < vsi->vsi_num)
+			max_vsi_num = vsi->vsi_num;
+
+		netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi,
+			       ice_napi_poll);
+
+		netif_keep_dst(vf->repr->netdev);
+	}
+
+	ice_for_each_vf(pf, bkt, vf) {
+		struct ice_repr *repr = vf->repr;
+		struct ice_vsi *vsi = repr->src_vsi;
+		struct metadata_dst *dst;
+
+		dst = repr->dst;
+		dst->u.port_info.port_id = vsi->vsi_num;
+		dst->u.port_info.lower_dev = repr->netdev;
+		ice_repr_set_traffic_vsi(repr, ctrl_vsi);
+	}
+
+	return 0;
+
+err:
+	ice_eswitch_release_reprs(pf, ctrl_vsi);
+
+	return -ENODEV;
+}
+
+/**
+ * ice_eswitch_update_repr - reconfigure VF port representor
+ * @vsi: VF VSI for which port representor is configured
+ */
+void ice_eswitch_update_repr(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_repr *repr;
+	struct ice_vf *vf;
+	int ret;
+
+	if (!ice_is_switchdev_running(pf))
+		return;
+
+	vf = vsi->vf;
+	repr = vf->repr;
+	repr->src_vsi = vsi;
+	repr->dst->u.port_info.port_id = vsi->vsi_num;
+
+	if (repr->br_port)
+		repr->br_port->vsi = vsi;
+
+	ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof);
+	if (ret) {
+		ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI);
+		dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor",
+			vsi->vf->vf_id);
+	}
+}
+
+/**
+ * ice_eswitch_port_start_xmit - callback for packets transmit
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t
+ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ice_netdev_priv *np;
+	struct ice_repr *repr;
+	struct ice_vsi *vsi;
+
+	np = netdev_priv(netdev);
+	vsi = np->vsi;
+
+	if (!vsi || !ice_is_switchdev_running(vsi->back))
+		return NETDEV_TX_BUSY;
+
+	if (ice_is_reset_in_progress(vsi->back->state) ||
+	    test_bit(ICE_VF_DIS, vsi->back->state))
+		return NETDEV_TX_BUSY;
+
+	repr = ice_netdev_to_repr(netdev);
+	skb_dst_drop(skb);
+	dst_hold((struct dst_entry *)repr->dst);
+	skb_dst_set(skb, (struct dst_entry *)repr->dst);
+	skb->queue_mapping = repr->vf->vf_id;
+
+	return ice_start_xmit(skb, netdev);
+}
+
+/**
+ * ice_eswitch_set_target_vsi - set switchdev context in Tx context descriptor
+ * @skb: pointer to send buffer
+ * @off: pointer to offload struct
+ */
+void
+ice_eswitch_set_target_vsi(struct sk_buff *skb,
+			   struct ice_tx_offload_params *off)
+{
+	struct metadata_dst *dst = skb_metadata_dst(skb);
+	u64 cd_cmd, dst_vsi;
+
+	if (!dst) {
+		cd_cmd = ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S;
+		off->cd_qw1 |= (cd_cmd | ICE_TX_DESC_DTYPE_CTX);
+	} else {
+		cd_cmd = ICE_TX_CTX_DESC_SWTCH_VSI << ICE_TXD_CTX_QW1_CMD_S;
+		dst_vsi = ((u64)dst->u.port_info.port_id <<
+			   ICE_TXD_CTX_QW1_VSI_S) & ICE_TXD_CTX_QW1_VSI_M;
+		off->cd_qw1 = cd_cmd | dst_vsi | ICE_TX_DESC_DTYPE_CTX;
+	}
+}
+
+/**
+ * ice_eswitch_release_env - clear switchdev HW filters
+ * @pf: pointer to PF struct
+ *
+ * This function removes HW filters configuration specific for switchdev
+ * mode and restores default legacy mode settings.
+ */
+static void ice_eswitch_release_env(struct ice_pf *pf)
+{
+	struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi;
+	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	struct ice_vsi_vlan_ops *vlan_ops;
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(uplink_vsi);
+
+	ice_vsi_update_local_lb(uplink_vsi, false);
+	ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override);
+	ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override);
+	vlan_ops->ena_rx_filtering(uplink_vsi);
+	ice_clear_dflt_vsi(uplink_vsi);
+	ice_fltr_add_mac_and_broadcast(uplink_vsi,
+				       uplink_vsi->port_info->mac.perm_addr,
+				       ICE_FWD_TO_VSI);
+}
+
+/**
+ * ice_eswitch_vsi_setup - configure switchdev control VSI
+ * @pf: pointer to PF structure
+ * @pi: pointer to port_info structure
+ */
+static struct ice_vsi *
+ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	struct ice_vsi_cfg_params params = {};
+
+	params.type = ICE_VSI_SWITCHDEV_CTRL;
+	params.pi = pi;
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	return ice_vsi_setup(pf, &params);
+}
+
+/**
+ * ice_eswitch_napi_del - remove NAPI handle for all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_napi_del(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	ice_for_each_vf(pf, bkt, vf)
+		netif_napi_del(&vf->repr->q_vector->napi);
+}
+
+/**
+ * ice_eswitch_napi_enable - enable NAPI for all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_napi_enable(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	ice_for_each_vf(pf, bkt, vf)
+		napi_enable(&vf->repr->q_vector->napi);
+}
+
+/**
+ * ice_eswitch_napi_disable - disable NAPI for all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_napi_disable(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	ice_for_each_vf(pf, bkt, vf)
+		napi_disable(&vf->repr->q_vector->napi);
+}
+
+/**
+ * ice_eswitch_enable_switchdev - configure eswitch in switchdev mode
+ * @pf: pointer to PF structure
+ */
+static int ice_eswitch_enable_switchdev(struct ice_pf *pf)
+{
+	struct ice_vsi *ctrl_vsi, *uplink_vsi;
+
+	uplink_vsi = ice_get_main_vsi(pf);
+	if (!uplink_vsi)
+		return -ENODEV;
+
+	if (netif_is_any_bridge_port(uplink_vsi->netdev)) {
+		dev_err(ice_pf_to_dev(pf),
+			"Uplink port cannot be a bridge port\n");
+		return -EINVAL;
+	}
+
+	pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info);
+	if (!pf->switchdev.control_vsi)
+		return -ENODEV;
+
+	ctrl_vsi = pf->switchdev.control_vsi;
+	pf->switchdev.uplink_vsi = uplink_vsi;
+
+	if (ice_eswitch_setup_env(pf))
+		goto err_vsi;
+
+	if (ice_repr_add_for_all_vfs(pf))
+		goto err_repr_add;
+
+	if (ice_eswitch_setup_reprs(pf))
+		goto err_setup_reprs;
+
+	ice_eswitch_remap_rings_to_vectors(pf);
+
+	if (ice_vsi_open(ctrl_vsi))
+		goto err_setup_reprs;
+
+	if (ice_eswitch_br_offloads_init(pf))
+		goto err_br_offloads;
+
+	ice_eswitch_napi_enable(pf);
+
+	return 0;
+
+err_br_offloads:
+	ice_vsi_close(ctrl_vsi);
+err_setup_reprs:
+	ice_repr_rem_from_all_vfs(pf);
+err_repr_add:
+	ice_eswitch_release_env(pf);
+err_vsi:
+	ice_vsi_release(ctrl_vsi);
+	return -ENODEV;
+}
+
+/**
+ * ice_eswitch_disable_switchdev - disable switchdev resources
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_disable_switchdev(struct ice_pf *pf)
+{
+	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+
+	ice_eswitch_napi_disable(pf);
+	ice_eswitch_br_offloads_deinit(pf);
+	ice_eswitch_release_env(pf);
+	ice_eswitch_release_reprs(pf, ctrl_vsi);
+	ice_vsi_release(ctrl_vsi);
+	ice_repr_rem_from_all_vfs(pf);
+}
+
+/**
+ * ice_eswitch_mode_set - set new eswitch mode
+ * @devlink: pointer to devlink structure
+ * @mode: eswitch mode to switch to
+ * @extack: pointer to extack structure
+ */
+int
+ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
+		     struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	if (pf->eswitch_mode == mode)
+		return 0;
+
+	if (ice_has_vfs(pf)) {
+		dev_info(ice_pf_to_dev(pf), "Changing eswitch mode is allowed only if there is no VFs created");
+		NL_SET_ERR_MSG_MOD(extack, "Changing eswitch mode is allowed only if there is no VFs created");
+		return -EOPNOTSUPP;
+	}
+
+	switch (mode) {
+	case DEVLINK_ESWITCH_MODE_LEGACY:
+		dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to legacy",
+			 pf->hw.pf_id);
+		NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to legacy");
+		break;
+	case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+	{
+		if (ice_is_adq_active(pf)) {
+			dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+			NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+			return -EOPNOTSUPP;
+		}
+
+		dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev",
+			 pf->hw.pf_id);
+		NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev");
+		break;
+	}
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "Unknown eswitch mode");
+		return -EINVAL;
+	}
+
+	pf->eswitch_mode = mode;
+	return 0;
+}
+
+/**
+ * ice_eswitch_mode_get - get current eswitch mode
+ * @devlink: pointer to devlink structure
+ * @mode: output parameter for current eswitch mode
+ */
+int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+
+	*mode = pf->eswitch_mode;
+	return 0;
+}
+
+/**
+ * ice_is_eswitch_mode_switchdev - check if eswitch mode is set to switchdev
+ * @pf: pointer to PF structure
+ *
+ * Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV,
+ * false otherwise.
+ */
+bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf)
+{
+	return pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV;
+}
+
+/**
+ * ice_eswitch_release - cleanup eswitch
+ * @pf: pointer to PF structure
+ */
+void ice_eswitch_release(struct ice_pf *pf)
+{
+	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY)
+		return;
+
+	ice_eswitch_disable_switchdev(pf);
+	pf->switchdev.is_running = false;
+}
+
+/**
+ * ice_eswitch_configure - configure eswitch
+ * @pf: pointer to PF structure
+ */
+int ice_eswitch_configure(struct ice_pf *pf)
+{
+	int status;
+
+	if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY || pf->switchdev.is_running)
+		return 0;
+
+	status = ice_eswitch_enable_switchdev(pf);
+	if (status)
+		return status;
+
+	pf->switchdev.is_running = true;
+	return 0;
+}
+
+/**
+ * ice_eswitch_start_all_tx_queues - start Tx queues of all port representors
+ * @pf: pointer to PF structure
+ */
+static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	if (test_bit(ICE_DOWN, pf->state))
+		return;
+
+	ice_for_each_vf(pf, bkt, vf) {
+		if (vf->repr)
+			ice_repr_start_tx_queues(vf->repr);
+	}
+}
+
+/**
+ * ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors
+ * @pf: pointer to PF structure
+ */
+void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	if (test_bit(ICE_DOWN, pf->state))
+		return;
+
+	ice_for_each_vf(pf, bkt, vf) {
+		if (vf->repr)
+			ice_repr_stop_tx_queues(vf->repr);
+	}
+}
+
+/**
+ * ice_eswitch_rebuild - rebuild eswitch
+ * @pf: pointer to PF structure
+ */
+int ice_eswitch_rebuild(struct ice_pf *pf)
+{
+	struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi;
+	int status;
+
+	ice_eswitch_napi_disable(pf);
+	ice_eswitch_napi_del(pf);
+
+	status = ice_eswitch_setup_env(pf);
+	if (status)
+		return status;
+
+	status = ice_eswitch_setup_reprs(pf);
+	if (status)
+		return status;
+
+	ice_eswitch_remap_rings_to_vectors(pf);
+
+	ice_replay_tc_fltrs(pf);
+
+	status = ice_vsi_open(ctrl_vsi);
+	if (status)
+		return status;
+
+	ice_eswitch_napi_enable(pf);
+	ice_eswitch_start_all_tx_queues(pf);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h
new file mode 100644
index 0000000000..b18bf83a2f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_ESWITCH_H_
+#define _ICE_ESWITCH_H_
+
+#include <net/devlink.h>
+
+#ifdef CONFIG_ICE_SWITCHDEV
+void ice_eswitch_release(struct ice_pf *pf);
+int ice_eswitch_configure(struct ice_pf *pf);
+int ice_eswitch_rebuild(struct ice_pf *pf);
+
+int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int
+ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
+		     struct netlink_ext_ack *extack);
+bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf);
+
+void ice_eswitch_update_repr(struct ice_vsi *vsi);
+
+void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf);
+
+void ice_eswitch_set_target_vsi(struct sk_buff *skb,
+				struct ice_tx_offload_params *off);
+netdev_tx_t
+ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+#else /* CONFIG_ICE_SWITCHDEV */
+static inline void ice_eswitch_release(struct ice_pf *pf) { }
+
+static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { }
+
+static inline void
+ice_eswitch_set_target_vsi(struct sk_buff *skb,
+			   struct ice_tx_offload_params *off) { }
+
+static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { }
+
+static inline int ice_eswitch_configure(struct ice_pf *pf)
+{
+	return 0;
+}
+
+static inline int ice_eswitch_rebuild(struct ice_pf *pf)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+	return DEVLINK_ESWITCH_MODE_LEGACY;
+}
+
+static inline int
+ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
+		     struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf)
+{
+	return false;
+}
+
+static inline netdev_tx_t
+ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	return NETDEV_TX_BUSY;
+}
+#endif /* CONFIG_ICE_SWITCHDEV */
+#endif /* _ICE_ESWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch_br.c b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
new file mode 100644
index 0000000000..67bfd1f61c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch_br.c
@@ -0,0 +1,1346 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_eswitch_br.h"
+#include "ice_repr.h"
+#include "ice_switch.h"
+#include "ice_vlan.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_trace.h"
+
+#define ICE_ESW_BRIDGE_UPDATE_INTERVAL msecs_to_jiffies(1000)
+
+static const struct rhashtable_params ice_fdb_ht_params = {
+	.key_offset = offsetof(struct ice_esw_br_fdb_entry, data),
+	.key_len = sizeof(struct ice_esw_br_fdb_data),
+	.head_offset = offsetof(struct ice_esw_br_fdb_entry, ht_node),
+	.automatic_shrinking = true,
+};
+
+static bool ice_eswitch_br_is_dev_valid(const struct net_device *dev)
+{
+	/* Accept only PF netdev, PRs and LAG */
+	return ice_is_port_repr_netdev(dev) || netif_is_ice(dev) ||
+		netif_is_lag_master(dev);
+}
+
+static struct net_device *
+ice_eswitch_br_get_uplink_from_lag(struct net_device *lag_dev)
+{
+	struct net_device *lower;
+	struct list_head *iter;
+
+	netdev_for_each_lower_dev(lag_dev, lower, iter) {
+		if (netif_is_ice(lower))
+			return lower;
+	}
+
+	return NULL;
+}
+
+static struct ice_esw_br_port *
+ice_eswitch_br_netdev_to_port(struct net_device *dev)
+{
+	if (ice_is_port_repr_netdev(dev)) {
+		struct ice_repr *repr = ice_netdev_to_repr(dev);
+
+		return repr->br_port;
+	} else if (netif_is_ice(dev) || netif_is_lag_master(dev)) {
+		struct net_device *ice_dev;
+		struct ice_pf *pf;
+
+		if (netif_is_lag_master(dev))
+			ice_dev = ice_eswitch_br_get_uplink_from_lag(dev);
+		else
+			ice_dev = dev;
+
+		if (!ice_dev)
+			return NULL;
+
+		pf = ice_netdev_to_pf(ice_dev);
+
+		return pf->br_port;
+	}
+
+	return NULL;
+}
+
+static void
+ice_eswitch_br_ingress_rule_setup(struct ice_adv_rule_info *rule_info,
+				  u8 pf_id, u16 vf_vsi_idx)
+{
+	rule_info->sw_act.vsi_handle = vf_vsi_idx;
+	rule_info->sw_act.flag |= ICE_FLTR_RX;
+	rule_info->sw_act.src = pf_id;
+	rule_info->priority = 5;
+}
+
+static void
+ice_eswitch_br_egress_rule_setup(struct ice_adv_rule_info *rule_info,
+				 u16 pf_vsi_idx)
+{
+	rule_info->sw_act.vsi_handle = pf_vsi_idx;
+	rule_info->sw_act.flag |= ICE_FLTR_TX;
+	rule_info->flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
+	rule_info->flags_info.act_valid = true;
+	rule_info->priority = 5;
+}
+
+static int
+ice_eswitch_br_rule_delete(struct ice_hw *hw, struct ice_rule_query_data *rule)
+{
+	int err;
+
+	if (!rule)
+		return -EINVAL;
+
+	err = ice_rem_adv_rule_by_id(hw, rule);
+	kfree(rule);
+
+	return err;
+}
+
+static u16
+ice_eswitch_br_get_lkups_cnt(u16 vid)
+{
+	return ice_eswitch_br_is_vid_valid(vid) ? 2 : 1;
+}
+
+static void
+ice_eswitch_br_add_vlan_lkup(struct ice_adv_lkup_elem *list, u16 vid)
+{
+	if (ice_eswitch_br_is_vid_valid(vid)) {
+		list[1].type = ICE_VLAN_OFOS;
+		list[1].h_u.vlan_hdr.vlan = cpu_to_be16(vid & VLAN_VID_MASK);
+		list[1].m_u.vlan_hdr.vlan = cpu_to_be16(0xFFFF);
+	}
+}
+
+static struct ice_rule_query_data *
+ice_eswitch_br_fwd_rule_create(struct ice_hw *hw, int vsi_idx, int port_type,
+			       const unsigned char *mac, u16 vid)
+{
+	struct ice_adv_rule_info rule_info = { 0 };
+	struct ice_rule_query_data *rule;
+	struct ice_adv_lkup_elem *list;
+	u16 lkups_cnt;
+	int err;
+
+	lkups_cnt = ice_eswitch_br_get_lkups_cnt(vid);
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return ERR_PTR(-ENOMEM);
+
+	list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+	if (!list) {
+		err = -ENOMEM;
+		goto err_list_alloc;
+	}
+
+	switch (port_type) {
+	case ICE_ESWITCH_BR_UPLINK_PORT:
+		ice_eswitch_br_egress_rule_setup(&rule_info, vsi_idx);
+		break;
+	case ICE_ESWITCH_BR_VF_REPR_PORT:
+		ice_eswitch_br_ingress_rule_setup(&rule_info, hw->pf_id,
+						  vsi_idx);
+		break;
+	default:
+		err = -EINVAL;
+		goto err_add_rule;
+	}
+
+	list[0].type = ICE_MAC_OFOS;
+	ether_addr_copy(list[0].h_u.eth_hdr.dst_addr, mac);
+	eth_broadcast_addr(list[0].m_u.eth_hdr.dst_addr);
+
+	ice_eswitch_br_add_vlan_lkup(list, vid);
+
+	rule_info.need_pass_l2 = true;
+
+	rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI;
+
+	err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, rule);
+	if (err)
+		goto err_add_rule;
+
+	kfree(list);
+
+	return rule;
+
+err_add_rule:
+	kfree(list);
+err_list_alloc:
+	kfree(rule);
+
+	return ERR_PTR(err);
+}
+
+static struct ice_rule_query_data *
+ice_eswitch_br_guard_rule_create(struct ice_hw *hw, u16 vsi_idx,
+				 const unsigned char *mac, u16 vid)
+{
+	struct ice_adv_rule_info rule_info = { 0 };
+	struct ice_rule_query_data *rule;
+	struct ice_adv_lkup_elem *list;
+	int err = -ENOMEM;
+	u16 lkups_cnt;
+
+	lkups_cnt = ice_eswitch_br_get_lkups_cnt(vid);
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		goto err_exit;
+
+	list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+	if (!list)
+		goto err_list_alloc;
+
+	list[0].type = ICE_MAC_OFOS;
+	ether_addr_copy(list[0].h_u.eth_hdr.src_addr, mac);
+	eth_broadcast_addr(list[0].m_u.eth_hdr.src_addr);
+
+	ice_eswitch_br_add_vlan_lkup(list, vid);
+
+	rule_info.allow_pass_l2 = true;
+	rule_info.sw_act.vsi_handle = vsi_idx;
+	rule_info.sw_act.fltr_act = ICE_NOP;
+	rule_info.priority = 5;
+
+	err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, rule);
+	if (err)
+		goto err_add_rule;
+
+	kfree(list);
+
+	return rule;
+
+err_add_rule:
+	kfree(list);
+err_list_alloc:
+	kfree(rule);
+err_exit:
+	return ERR_PTR(err);
+}
+
+static struct ice_esw_br_flow *
+ice_eswitch_br_flow_create(struct device *dev, struct ice_hw *hw, int vsi_idx,
+			   int port_type, const unsigned char *mac, u16 vid)
+{
+	struct ice_rule_query_data *fwd_rule, *guard_rule;
+	struct ice_esw_br_flow *flow;
+	int err;
+
+	flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	fwd_rule = ice_eswitch_br_fwd_rule_create(hw, vsi_idx, port_type, mac,
+						  vid);
+	err = PTR_ERR_OR_ZERO(fwd_rule);
+	if (err) {
+		dev_err(dev, "Failed to create eswitch bridge %sgress forward rule, err: %d\n",
+			port_type == ICE_ESWITCH_BR_UPLINK_PORT ? "e" : "in",
+			err);
+		goto err_fwd_rule;
+	}
+
+	guard_rule = ice_eswitch_br_guard_rule_create(hw, vsi_idx, mac, vid);
+	err = PTR_ERR_OR_ZERO(guard_rule);
+	if (err) {
+		dev_err(dev, "Failed to create eswitch bridge %sgress guard rule, err: %d\n",
+			port_type == ICE_ESWITCH_BR_UPLINK_PORT ? "e" : "in",
+			err);
+		goto err_guard_rule;
+	}
+
+	flow->fwd_rule = fwd_rule;
+	flow->guard_rule = guard_rule;
+
+	return flow;
+
+err_guard_rule:
+	ice_eswitch_br_rule_delete(hw, fwd_rule);
+err_fwd_rule:
+	kfree(flow);
+
+	return ERR_PTR(err);
+}
+
+static struct ice_esw_br_fdb_entry *
+ice_eswitch_br_fdb_find(struct ice_esw_br *bridge, const unsigned char *mac,
+			u16 vid)
+{
+	struct ice_esw_br_fdb_data data = {
+		.vid = vid,
+	};
+
+	ether_addr_copy(data.addr, mac);
+	return rhashtable_lookup_fast(&bridge->fdb_ht, &data,
+				      ice_fdb_ht_params);
+}
+
+static void
+ice_eswitch_br_flow_delete(struct ice_pf *pf, struct ice_esw_br_flow *flow)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	err = ice_eswitch_br_rule_delete(&pf->hw, flow->fwd_rule);
+	if (err)
+		dev_err(dev, "Failed to delete FDB forward rule, err: %d\n",
+			err);
+
+	err = ice_eswitch_br_rule_delete(&pf->hw, flow->guard_rule);
+	if (err)
+		dev_err(dev, "Failed to delete FDB guard rule, err: %d\n",
+			err);
+
+	kfree(flow);
+}
+
+static struct ice_esw_br_vlan *
+ice_esw_br_port_vlan_lookup(struct ice_esw_br *bridge, u16 vsi_idx, u16 vid)
+{
+	struct ice_pf *pf = bridge->br_offloads->pf;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_esw_br_port *port;
+	struct ice_esw_br_vlan *vlan;
+
+	port = xa_load(&bridge->ports, vsi_idx);
+	if (!port) {
+		dev_info(dev, "Bridge port lookup failed (vsi=%u)\n", vsi_idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	vlan = xa_load(&port->vlans, vid);
+	if (!vlan) {
+		dev_info(dev, "Bridge port vlan metadata lookup failed (vsi=%u)\n",
+			 vsi_idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return vlan;
+}
+
+static void
+ice_eswitch_br_fdb_entry_delete(struct ice_esw_br *bridge,
+				struct ice_esw_br_fdb_entry *fdb_entry)
+{
+	struct ice_pf *pf = bridge->br_offloads->pf;
+
+	rhashtable_remove_fast(&bridge->fdb_ht, &fdb_entry->ht_node,
+			       ice_fdb_ht_params);
+	list_del(&fdb_entry->list);
+
+	ice_eswitch_br_flow_delete(pf, fdb_entry->flow);
+
+	kfree(fdb_entry);
+}
+
+static void
+ice_eswitch_br_fdb_offload_notify(struct net_device *dev,
+				  const unsigned char *mac, u16 vid,
+				  unsigned long val)
+{
+	struct switchdev_notifier_fdb_info fdb_info = {
+		.addr = mac,
+		.vid = vid,
+		.offloaded = true,
+	};
+
+	call_switchdev_notifiers(val, dev, &fdb_info.info, NULL);
+}
+
+static void
+ice_eswitch_br_fdb_entry_notify_and_cleanup(struct ice_esw_br *bridge,
+					    struct ice_esw_br_fdb_entry *entry)
+{
+	if (!(entry->flags & ICE_ESWITCH_BR_FDB_ADDED_BY_USER))
+		ice_eswitch_br_fdb_offload_notify(entry->dev, entry->data.addr,
+						  entry->data.vid,
+						  SWITCHDEV_FDB_DEL_TO_BRIDGE);
+	ice_eswitch_br_fdb_entry_delete(bridge, entry);
+}
+
+static void
+ice_eswitch_br_fdb_entry_find_and_delete(struct ice_esw_br *bridge,
+					 const unsigned char *mac, u16 vid)
+{
+	struct ice_pf *pf = bridge->br_offloads->pf;
+	struct ice_esw_br_fdb_entry *fdb_entry;
+	struct device *dev = ice_pf_to_dev(pf);
+
+	fdb_entry = ice_eswitch_br_fdb_find(bridge, mac, vid);
+	if (!fdb_entry) {
+		dev_err(dev, "FDB entry with mac: %pM and vid: %u not found\n",
+			mac, vid);
+		return;
+	}
+
+	trace_ice_eswitch_br_fdb_entry_find_and_delete(fdb_entry);
+	ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, fdb_entry);
+}
+
+static void
+ice_eswitch_br_fdb_entry_create(struct net_device *netdev,
+				struct ice_esw_br_port *br_port,
+				bool added_by_user,
+				const unsigned char *mac, u16 vid)
+{
+	struct ice_esw_br *bridge = br_port->bridge;
+	struct ice_pf *pf = bridge->br_offloads->pf;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_esw_br_fdb_entry *fdb_entry;
+	struct ice_esw_br_flow *flow;
+	struct ice_esw_br_vlan *vlan;
+	struct ice_hw *hw = &pf->hw;
+	unsigned long event;
+	int err;
+
+	/* untagged filtering is not yet supported */
+	if (!(bridge->flags & ICE_ESWITCH_BR_VLAN_FILTERING) && vid)
+		return;
+
+	if ((bridge->flags & ICE_ESWITCH_BR_VLAN_FILTERING)) {
+		vlan = ice_esw_br_port_vlan_lookup(bridge, br_port->vsi_idx,
+						   vid);
+		if (IS_ERR(vlan)) {
+			dev_err(dev, "Failed to find vlan lookup, err: %ld\n",
+				PTR_ERR(vlan));
+			return;
+		}
+	}
+
+	fdb_entry = ice_eswitch_br_fdb_find(bridge, mac, vid);
+	if (fdb_entry)
+		ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, fdb_entry);
+
+	fdb_entry = kzalloc(sizeof(*fdb_entry), GFP_KERNEL);
+	if (!fdb_entry) {
+		err = -ENOMEM;
+		goto err_exit;
+	}
+
+	flow = ice_eswitch_br_flow_create(dev, hw, br_port->vsi_idx,
+					  br_port->type, mac, vid);
+	if (IS_ERR(flow)) {
+		err = PTR_ERR(flow);
+		goto err_add_flow;
+	}
+
+	ether_addr_copy(fdb_entry->data.addr, mac);
+	fdb_entry->data.vid = vid;
+	fdb_entry->br_port = br_port;
+	fdb_entry->flow = flow;
+	fdb_entry->dev = netdev;
+	fdb_entry->last_use = jiffies;
+	event = SWITCHDEV_FDB_ADD_TO_BRIDGE;
+
+	if (added_by_user) {
+		fdb_entry->flags |= ICE_ESWITCH_BR_FDB_ADDED_BY_USER;
+		event = SWITCHDEV_FDB_OFFLOADED;
+	}
+
+	err = rhashtable_insert_fast(&bridge->fdb_ht, &fdb_entry->ht_node,
+				     ice_fdb_ht_params);
+	if (err)
+		goto err_fdb_insert;
+
+	list_add(&fdb_entry->list, &bridge->fdb_list);
+	trace_ice_eswitch_br_fdb_entry_create(fdb_entry);
+
+	ice_eswitch_br_fdb_offload_notify(netdev, mac, vid, event);
+
+	return;
+
+err_fdb_insert:
+	ice_eswitch_br_flow_delete(pf, flow);
+err_add_flow:
+	kfree(fdb_entry);
+err_exit:
+	dev_err(dev, "Failed to create fdb entry, err: %d\n", err);
+}
+
+static void
+ice_eswitch_br_fdb_work_dealloc(struct ice_esw_br_fdb_work *fdb_work)
+{
+	kfree(fdb_work->fdb_info.addr);
+	kfree(fdb_work);
+}
+
+static void
+ice_eswitch_br_fdb_event_work(struct work_struct *work)
+{
+	struct ice_esw_br_fdb_work *fdb_work = ice_work_to_fdb_work(work);
+	bool added_by_user = fdb_work->fdb_info.added_by_user;
+	const unsigned char *mac = fdb_work->fdb_info.addr;
+	u16 vid = fdb_work->fdb_info.vid;
+	struct ice_esw_br_port *br_port;
+
+	rtnl_lock();
+
+	br_port = ice_eswitch_br_netdev_to_port(fdb_work->dev);
+	if (!br_port)
+		goto err_exit;
+
+	switch (fdb_work->event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+		ice_eswitch_br_fdb_entry_create(fdb_work->dev, br_port,
+						added_by_user, mac, vid);
+		break;
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		ice_eswitch_br_fdb_entry_find_and_delete(br_port->bridge,
+							 mac, vid);
+		break;
+	default:
+		goto err_exit;
+	}
+
+err_exit:
+	rtnl_unlock();
+	dev_put(fdb_work->dev);
+	ice_eswitch_br_fdb_work_dealloc(fdb_work);
+}
+
+static struct ice_esw_br_fdb_work *
+ice_eswitch_br_fdb_work_alloc(struct switchdev_notifier_fdb_info *fdb_info,
+			      struct net_device *dev,
+			      unsigned long event)
+{
+	struct ice_esw_br_fdb_work *work;
+	unsigned char *mac;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_WORK(&work->work, ice_eswitch_br_fdb_event_work);
+	memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info));
+
+	mac = kzalloc(ETH_ALEN, GFP_ATOMIC);
+	if (!mac) {
+		kfree(work);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ether_addr_copy(mac, fdb_info->addr);
+	work->fdb_info.addr = mac;
+	work->event = event;
+	work->dev = dev;
+
+	return work;
+}
+
+static int
+ice_eswitch_br_switchdev_event(struct notifier_block *nb,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	struct switchdev_notifier_fdb_info *fdb_info;
+	struct switchdev_notifier_info *info = ptr;
+	struct ice_esw_br_offloads *br_offloads;
+	struct ice_esw_br_fdb_work *work;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper;
+
+	br_offloads = ice_nb_to_br_offloads(nb, switchdev_nb);
+	extack = switchdev_notifier_info_to_extack(ptr);
+
+	upper = netdev_master_upper_dev_get_rcu(dev);
+	if (!upper)
+		return NOTIFY_DONE;
+
+	if (!netif_is_bridge_master(upper))
+		return NOTIFY_DONE;
+
+	if (!ice_eswitch_br_is_dev_valid(dev))
+		return NOTIFY_DONE;
+
+	if (!ice_eswitch_br_netdev_to_port(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case SWITCHDEV_FDB_ADD_TO_DEVICE:
+	case SWITCHDEV_FDB_DEL_TO_DEVICE:
+		fdb_info = container_of(info, typeof(*fdb_info), info);
+
+		work = ice_eswitch_br_fdb_work_alloc(fdb_info, dev, event);
+		if (IS_ERR(work)) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to init switchdev fdb work");
+			return notifier_from_errno(PTR_ERR(work));
+		}
+		dev_hold(dev);
+
+		queue_work(br_offloads->wq, &work->work);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static void ice_eswitch_br_fdb_flush(struct ice_esw_br *bridge)
+{
+	struct ice_esw_br_fdb_entry *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list)
+		ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, entry);
+}
+
+static void
+ice_eswitch_br_vlan_filtering_set(struct ice_esw_br *bridge, bool enable)
+{
+	if (enable == !!(bridge->flags & ICE_ESWITCH_BR_VLAN_FILTERING))
+		return;
+
+	ice_eswitch_br_fdb_flush(bridge);
+	if (enable)
+		bridge->flags |= ICE_ESWITCH_BR_VLAN_FILTERING;
+	else
+		bridge->flags &= ~ICE_ESWITCH_BR_VLAN_FILTERING;
+}
+
+static void
+ice_eswitch_br_clear_pvid(struct ice_esw_br_port *port)
+{
+	struct ice_vlan port_vlan = ICE_VLAN(ETH_P_8021Q, port->pvid, 0);
+	struct ice_vsi_vlan_ops *vlan_ops;
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(port->vsi);
+
+	vlan_ops->del_vlan(port->vsi, &port_vlan);
+	vlan_ops->clear_port_vlan(port->vsi);
+
+	ice_vf_vsi_disable_port_vlan(port->vsi);
+
+	port->pvid = 0;
+}
+
+static void
+ice_eswitch_br_vlan_cleanup(struct ice_esw_br_port *port,
+			    struct ice_esw_br_vlan *vlan)
+{
+	struct ice_esw_br_fdb_entry *fdb_entry, *tmp;
+	struct ice_esw_br *bridge = port->bridge;
+
+	trace_ice_eswitch_br_vlan_cleanup(vlan);
+
+	list_for_each_entry_safe(fdb_entry, tmp, &bridge->fdb_list, list) {
+		if (vlan->vid == fdb_entry->data.vid)
+			ice_eswitch_br_fdb_entry_delete(bridge, fdb_entry);
+	}
+
+	xa_erase(&port->vlans, vlan->vid);
+	if (port->pvid == vlan->vid)
+		ice_eswitch_br_clear_pvid(port);
+	kfree(vlan);
+}
+
+static void ice_eswitch_br_port_vlans_flush(struct ice_esw_br_port *port)
+{
+	struct ice_esw_br_vlan *vlan;
+	unsigned long index;
+
+	xa_for_each(&port->vlans, index, vlan)
+		ice_eswitch_br_vlan_cleanup(port, vlan);
+}
+
+static int
+ice_eswitch_br_set_pvid(struct ice_esw_br_port *port,
+			struct ice_esw_br_vlan *vlan)
+{
+	struct ice_vlan port_vlan = ICE_VLAN(ETH_P_8021Q, vlan->vid, 0);
+	struct device *dev = ice_pf_to_dev(port->vsi->back);
+	struct ice_vsi_vlan_ops *vlan_ops;
+	int err;
+
+	if (port->pvid == vlan->vid || vlan->vid == 1)
+		return 0;
+
+	/* Setting port vlan on uplink isn't supported by hw */
+	if (port->type == ICE_ESWITCH_BR_UPLINK_PORT)
+		return -EOPNOTSUPP;
+
+	if (port->pvid) {
+		dev_info(dev,
+			 "Port VLAN (vsi=%u, vid=%u) already exists on the port, remove it before adding new one\n",
+			 port->vsi_idx, port->pvid);
+		return -EEXIST;
+	}
+
+	ice_vf_vsi_enable_port_vlan(port->vsi);
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(port->vsi);
+	err = vlan_ops->set_port_vlan(port->vsi, &port_vlan);
+	if (err)
+		return err;
+
+	err = vlan_ops->add_vlan(port->vsi, &port_vlan);
+	if (err)
+		return err;
+
+	ice_eswitch_br_port_vlans_flush(port);
+	port->pvid = vlan->vid;
+
+	return 0;
+}
+
+static struct ice_esw_br_vlan *
+ice_eswitch_br_vlan_create(u16 vid, u16 flags, struct ice_esw_br_port *port)
+{
+	struct device *dev = ice_pf_to_dev(port->vsi->back);
+	struct ice_esw_br_vlan *vlan;
+	int err;
+
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return ERR_PTR(-ENOMEM);
+
+	vlan->vid = vid;
+	vlan->flags = flags;
+	if ((flags & BRIDGE_VLAN_INFO_PVID) &&
+	    (flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+		err = ice_eswitch_br_set_pvid(port, vlan);
+		if (err)
+			goto err_set_pvid;
+	} else if ((flags & BRIDGE_VLAN_INFO_PVID) ||
+		   (flags & BRIDGE_VLAN_INFO_UNTAGGED)) {
+		dev_info(dev, "VLAN push and pop are supported only simultaneously\n");
+		err = -EOPNOTSUPP;
+		goto err_set_pvid;
+	}
+
+	err = xa_insert(&port->vlans, vlan->vid, vlan, GFP_KERNEL);
+	if (err)
+		goto err_insert;
+
+	trace_ice_eswitch_br_vlan_create(vlan);
+
+	return vlan;
+
+err_insert:
+	if (port->pvid)
+		ice_eswitch_br_clear_pvid(port);
+err_set_pvid:
+	kfree(vlan);
+	return ERR_PTR(err);
+}
+
+static int
+ice_eswitch_br_port_vlan_add(struct ice_esw_br *bridge, u16 vsi_idx, u16 vid,
+			     u16 flags, struct netlink_ext_ack *extack)
+{
+	struct ice_esw_br_port *port;
+	struct ice_esw_br_vlan *vlan;
+
+	port = xa_load(&bridge->ports, vsi_idx);
+	if (!port)
+		return -EINVAL;
+
+	if (port->pvid) {
+		dev_info(ice_pf_to_dev(port->vsi->back),
+			 "Port VLAN (vsi=%u, vid=%d) exists on the port, remove it to add trunk VLANs\n",
+			 port->vsi_idx, port->pvid);
+		return -EEXIST;
+	}
+
+	vlan = xa_load(&port->vlans, vid);
+	if (vlan) {
+		if (vlan->flags == flags)
+			return 0;
+
+		ice_eswitch_br_vlan_cleanup(port, vlan);
+	}
+
+	vlan = ice_eswitch_br_vlan_create(vid, flags, port);
+	if (IS_ERR(vlan)) {
+		NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to create VLAN entry, vid: %u, vsi: %u",
+				       vid, vsi_idx);
+		return PTR_ERR(vlan);
+	}
+
+	return 0;
+}
+
+static void
+ice_eswitch_br_port_vlan_del(struct ice_esw_br *bridge, u16 vsi_idx, u16 vid)
+{
+	struct ice_esw_br_port *port;
+	struct ice_esw_br_vlan *vlan;
+
+	port = xa_load(&bridge->ports, vsi_idx);
+	if (!port)
+		return;
+
+	vlan = xa_load(&port->vlans, vid);
+	if (!vlan)
+		return;
+
+	ice_eswitch_br_vlan_cleanup(port, vlan);
+}
+
+static int
+ice_eswitch_br_port_obj_add(struct net_device *netdev, const void *ctx,
+			    const struct switchdev_obj *obj,
+			    struct netlink_ext_ack *extack)
+{
+	struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(netdev);
+	struct switchdev_obj_port_vlan *vlan;
+	int err;
+
+	if (!br_port)
+		return -EINVAL;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+		err = ice_eswitch_br_port_vlan_add(br_port->bridge,
+						   br_port->vsi_idx, vlan->vid,
+						   vlan->flags, extack);
+		return err;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+ice_eswitch_br_port_obj_del(struct net_device *netdev, const void *ctx,
+			    const struct switchdev_obj *obj)
+{
+	struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(netdev);
+	struct switchdev_obj_port_vlan *vlan;
+
+	if (!br_port)
+		return -EINVAL;
+
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
+		ice_eswitch_br_port_vlan_del(br_port->bridge, br_port->vsi_idx,
+					     vlan->vid);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+ice_eswitch_br_port_obj_attr_set(struct net_device *netdev, const void *ctx,
+				 const struct switchdev_attr *attr,
+				 struct netlink_ext_ack *extack)
+{
+	struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(netdev);
+
+	if (!br_port)
+		return -EINVAL;
+
+	switch (attr->id) {
+	case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+		ice_eswitch_br_vlan_filtering_set(br_port->bridge,
+						  attr->u.vlan_filtering);
+		return 0;
+	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+		br_port->bridge->ageing_time =
+			clock_t_to_jiffies(attr->u.ageing_time);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+ice_eswitch_br_event_blocking(struct notifier_block *nb, unsigned long event,
+			      void *ptr)
+{
+	struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+	int err;
+
+	switch (event) {
+	case SWITCHDEV_PORT_OBJ_ADD:
+		err = switchdev_handle_port_obj_add(dev, ptr,
+						    ice_eswitch_br_is_dev_valid,
+						    ice_eswitch_br_port_obj_add);
+		break;
+	case SWITCHDEV_PORT_OBJ_DEL:
+		err = switchdev_handle_port_obj_del(dev, ptr,
+						    ice_eswitch_br_is_dev_valid,
+						    ice_eswitch_br_port_obj_del);
+		break;
+	case SWITCHDEV_PORT_ATTR_SET:
+		err = switchdev_handle_port_attr_set(dev, ptr,
+						     ice_eswitch_br_is_dev_valid,
+						     ice_eswitch_br_port_obj_attr_set);
+		break;
+	default:
+		err = 0;
+	}
+
+	return notifier_from_errno(err);
+}
+
+static void
+ice_eswitch_br_port_deinit(struct ice_esw_br *bridge,
+			   struct ice_esw_br_port *br_port)
+{
+	struct ice_esw_br_fdb_entry *fdb_entry, *tmp;
+	struct ice_vsi *vsi = br_port->vsi;
+
+	list_for_each_entry_safe(fdb_entry, tmp, &bridge->fdb_list, list) {
+		if (br_port == fdb_entry->br_port)
+			ice_eswitch_br_fdb_entry_delete(bridge, fdb_entry);
+	}
+
+	if (br_port->type == ICE_ESWITCH_BR_UPLINK_PORT && vsi->back)
+		vsi->back->br_port = NULL;
+	else if (vsi->vf && vsi->vf->repr)
+		vsi->vf->repr->br_port = NULL;
+
+	xa_erase(&bridge->ports, br_port->vsi_idx);
+	ice_eswitch_br_port_vlans_flush(br_port);
+	kfree(br_port);
+}
+
+static struct ice_esw_br_port *
+ice_eswitch_br_port_init(struct ice_esw_br *bridge)
+{
+	struct ice_esw_br_port *br_port;
+
+	br_port = kzalloc(sizeof(*br_port), GFP_KERNEL);
+	if (!br_port)
+		return ERR_PTR(-ENOMEM);
+
+	xa_init(&br_port->vlans);
+
+	br_port->bridge = bridge;
+
+	return br_port;
+}
+
+static int
+ice_eswitch_br_vf_repr_port_init(struct ice_esw_br *bridge,
+				 struct ice_repr *repr)
+{
+	struct ice_esw_br_port *br_port;
+	int err;
+
+	br_port = ice_eswitch_br_port_init(bridge);
+	if (IS_ERR(br_port))
+		return PTR_ERR(br_port);
+
+	br_port->vsi = repr->src_vsi;
+	br_port->vsi_idx = br_port->vsi->idx;
+	br_port->type = ICE_ESWITCH_BR_VF_REPR_PORT;
+	repr->br_port = br_port;
+
+	err = xa_insert(&bridge->ports, br_port->vsi_idx, br_port, GFP_KERNEL);
+	if (err) {
+		ice_eswitch_br_port_deinit(bridge, br_port);
+		return err;
+	}
+
+	return 0;
+}
+
+static int
+ice_eswitch_br_uplink_port_init(struct ice_esw_br *bridge, struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = pf->switchdev.uplink_vsi;
+	struct ice_esw_br_port *br_port;
+	int err;
+
+	br_port = ice_eswitch_br_port_init(bridge);
+	if (IS_ERR(br_port))
+		return PTR_ERR(br_port);
+
+	br_port->vsi = vsi;
+	br_port->vsi_idx = br_port->vsi->idx;
+	br_port->type = ICE_ESWITCH_BR_UPLINK_PORT;
+	pf->br_port = br_port;
+
+	err = xa_insert(&bridge->ports, br_port->vsi_idx, br_port, GFP_KERNEL);
+	if (err) {
+		ice_eswitch_br_port_deinit(bridge, br_port);
+		return err;
+	}
+
+	return 0;
+}
+
+static void
+ice_eswitch_br_ports_flush(struct ice_esw_br *bridge)
+{
+	struct ice_esw_br_port *port;
+	unsigned long i;
+
+	xa_for_each(&bridge->ports, i, port)
+		ice_eswitch_br_port_deinit(bridge, port);
+}
+
+static void
+ice_eswitch_br_deinit(struct ice_esw_br_offloads *br_offloads,
+		      struct ice_esw_br *bridge)
+{
+	if (!bridge)
+		return;
+
+	/* Cleanup all the ports that were added asynchronously
+	 * through NETDEV_CHANGEUPPER event.
+	 */
+	ice_eswitch_br_ports_flush(bridge);
+	WARN_ON(!xa_empty(&bridge->ports));
+	xa_destroy(&bridge->ports);
+	rhashtable_destroy(&bridge->fdb_ht);
+
+	br_offloads->bridge = NULL;
+	kfree(bridge);
+}
+
+static struct ice_esw_br *
+ice_eswitch_br_init(struct ice_esw_br_offloads *br_offloads, int ifindex)
+{
+	struct ice_esw_br *bridge;
+	int err;
+
+	bridge = kzalloc(sizeof(*bridge), GFP_KERNEL);
+	if (!bridge)
+		return ERR_PTR(-ENOMEM);
+
+	err = rhashtable_init(&bridge->fdb_ht, &ice_fdb_ht_params);
+	if (err) {
+		kfree(bridge);
+		return ERR_PTR(err);
+	}
+
+	INIT_LIST_HEAD(&bridge->fdb_list);
+	bridge->br_offloads = br_offloads;
+	bridge->ifindex = ifindex;
+	bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME);
+	xa_init(&bridge->ports);
+	br_offloads->bridge = bridge;
+
+	return bridge;
+}
+
+static struct ice_esw_br *
+ice_eswitch_br_get(struct ice_esw_br_offloads *br_offloads, int ifindex,
+		   struct netlink_ext_ack *extack)
+{
+	struct ice_esw_br *bridge = br_offloads->bridge;
+
+	if (bridge) {
+		if (bridge->ifindex != ifindex) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Only one bridge is supported per eswitch");
+			return ERR_PTR(-EOPNOTSUPP);
+		}
+		return bridge;
+	}
+
+	/* Create the bridge if it doesn't exist yet */
+	bridge = ice_eswitch_br_init(br_offloads, ifindex);
+	if (IS_ERR(bridge))
+		NL_SET_ERR_MSG_MOD(extack, "Failed to init the bridge");
+
+	return bridge;
+}
+
+static void
+ice_eswitch_br_verify_deinit(struct ice_esw_br_offloads *br_offloads,
+			     struct ice_esw_br *bridge)
+{
+	/* Remove the bridge if it exists and there are no ports left */
+	if (!bridge || !xa_empty(&bridge->ports))
+		return;
+
+	ice_eswitch_br_deinit(br_offloads, bridge);
+}
+
+static int
+ice_eswitch_br_port_unlink(struct ice_esw_br_offloads *br_offloads,
+			   struct net_device *dev, int ifindex,
+			   struct netlink_ext_ack *extack)
+{
+	struct ice_esw_br_port *br_port = ice_eswitch_br_netdev_to_port(dev);
+	struct ice_esw_br *bridge;
+
+	if (!br_port) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Port representor is not attached to any bridge");
+		return -EINVAL;
+	}
+
+	if (br_port->bridge->ifindex != ifindex) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Port representor is attached to another bridge");
+		return -EINVAL;
+	}
+
+	bridge = br_port->bridge;
+
+	trace_ice_eswitch_br_port_unlink(br_port);
+	ice_eswitch_br_port_deinit(br_port->bridge, br_port);
+	ice_eswitch_br_verify_deinit(br_offloads, bridge);
+
+	return 0;
+}
+
+static int
+ice_eswitch_br_port_link(struct ice_esw_br_offloads *br_offloads,
+			 struct net_device *dev, int ifindex,
+			 struct netlink_ext_ack *extack)
+{
+	struct ice_esw_br *bridge;
+	int err;
+
+	if (ice_eswitch_br_netdev_to_port(dev)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Port is already attached to the bridge");
+		return -EINVAL;
+	}
+
+	bridge = ice_eswitch_br_get(br_offloads, ifindex, extack);
+	if (IS_ERR(bridge))
+		return PTR_ERR(bridge);
+
+	if (ice_is_port_repr_netdev(dev)) {
+		struct ice_repr *repr = ice_netdev_to_repr(dev);
+
+		err = ice_eswitch_br_vf_repr_port_init(bridge, repr);
+		trace_ice_eswitch_br_port_link(repr->br_port);
+	} else {
+		struct net_device *ice_dev;
+		struct ice_pf *pf;
+
+		if (netif_is_lag_master(dev))
+			ice_dev = ice_eswitch_br_get_uplink_from_lag(dev);
+		else
+			ice_dev = dev;
+
+		if (!ice_dev)
+			return 0;
+
+		pf = ice_netdev_to_pf(ice_dev);
+
+		err = ice_eswitch_br_uplink_port_init(bridge, pf);
+		trace_ice_eswitch_br_port_link(pf->br_port);
+	}
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to init bridge port");
+		goto err_port_init;
+	}
+
+	return 0;
+
+err_port_init:
+	ice_eswitch_br_verify_deinit(br_offloads, bridge);
+	return err;
+}
+
+static int
+ice_eswitch_br_port_changeupper(struct notifier_block *nb, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct ice_esw_br_offloads *br_offloads;
+	struct netlink_ext_ack *extack;
+	struct net_device *upper;
+
+	br_offloads = ice_nb_to_br_offloads(nb, netdev_nb);
+
+	if (!ice_eswitch_br_is_dev_valid(dev))
+		return 0;
+
+	upper = info->upper_dev;
+	if (!netif_is_bridge_master(upper))
+		return 0;
+
+	extack = netdev_notifier_info_to_extack(&info->info);
+
+	if (info->linking)
+		return ice_eswitch_br_port_link(br_offloads, dev,
+						upper->ifindex, extack);
+	else
+		return ice_eswitch_br_port_unlink(br_offloads, dev,
+						  upper->ifindex, extack);
+}
+
+static int
+ice_eswitch_br_port_event(struct notifier_block *nb,
+			  unsigned long event, void *ptr)
+{
+	int err = 0;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		err = ice_eswitch_br_port_changeupper(nb, ptr);
+		break;
+	}
+
+	return notifier_from_errno(err);
+}
+
+static void
+ice_eswitch_br_offloads_dealloc(struct ice_pf *pf)
+{
+	struct ice_esw_br_offloads *br_offloads = pf->switchdev.br_offloads;
+
+	ASSERT_RTNL();
+
+	if (!br_offloads)
+		return;
+
+	ice_eswitch_br_deinit(br_offloads, br_offloads->bridge);
+
+	pf->switchdev.br_offloads = NULL;
+	kfree(br_offloads);
+}
+
+static struct ice_esw_br_offloads *
+ice_eswitch_br_offloads_alloc(struct ice_pf *pf)
+{
+	struct ice_esw_br_offloads *br_offloads;
+
+	ASSERT_RTNL();
+
+	if (pf->switchdev.br_offloads)
+		return ERR_PTR(-EEXIST);
+
+	br_offloads = kzalloc(sizeof(*br_offloads), GFP_KERNEL);
+	if (!br_offloads)
+		return ERR_PTR(-ENOMEM);
+
+	pf->switchdev.br_offloads = br_offloads;
+	br_offloads->pf = pf;
+
+	return br_offloads;
+}
+
+void
+ice_eswitch_br_offloads_deinit(struct ice_pf *pf)
+{
+	struct ice_esw_br_offloads *br_offloads;
+
+	br_offloads = pf->switchdev.br_offloads;
+	if (!br_offloads)
+		return;
+
+	cancel_delayed_work_sync(&br_offloads->update_work);
+	unregister_netdevice_notifier(&br_offloads->netdev_nb);
+	unregister_switchdev_blocking_notifier(&br_offloads->switchdev_blk);
+	unregister_switchdev_notifier(&br_offloads->switchdev_nb);
+	destroy_workqueue(br_offloads->wq);
+	/* Although notifier block is unregistered just before,
+	 * so we don't get any new events, some events might be
+	 * already in progress. Hold the rtnl lock and wait for
+	 * them to finished.
+	 */
+	rtnl_lock();
+	ice_eswitch_br_offloads_dealloc(pf);
+	rtnl_unlock();
+}
+
+static void ice_eswitch_br_update(struct ice_esw_br_offloads *br_offloads)
+{
+	struct ice_esw_br *bridge = br_offloads->bridge;
+	struct ice_esw_br_fdb_entry *entry, *tmp;
+
+	if (!bridge)
+		return;
+
+	rtnl_lock();
+	list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) {
+		if (entry->flags & ICE_ESWITCH_BR_FDB_ADDED_BY_USER)
+			continue;
+
+		if (time_is_after_eq_jiffies(entry->last_use +
+					     bridge->ageing_time))
+			continue;
+
+		ice_eswitch_br_fdb_entry_notify_and_cleanup(bridge, entry);
+	}
+	rtnl_unlock();
+}
+
+static void ice_eswitch_br_update_work(struct work_struct *work)
+{
+	struct ice_esw_br_offloads *br_offloads;
+
+	br_offloads = ice_work_to_br_offloads(work);
+
+	ice_eswitch_br_update(br_offloads);
+
+	queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+			   ICE_ESW_BRIDGE_UPDATE_INTERVAL);
+}
+
+int
+ice_eswitch_br_offloads_init(struct ice_pf *pf)
+{
+	struct ice_esw_br_offloads *br_offloads;
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	rtnl_lock();
+	br_offloads = ice_eswitch_br_offloads_alloc(pf);
+	rtnl_unlock();
+	if (IS_ERR(br_offloads)) {
+		dev_err(dev, "Failed to init eswitch bridge\n");
+		return PTR_ERR(br_offloads);
+	}
+
+	br_offloads->wq = alloc_ordered_workqueue("ice_bridge_wq", 0);
+	if (!br_offloads->wq) {
+		err = -ENOMEM;
+		dev_err(dev, "Failed to allocate bridge workqueue\n");
+		goto err_alloc_wq;
+	}
+
+	br_offloads->switchdev_nb.notifier_call =
+		ice_eswitch_br_switchdev_event;
+	err = register_switchdev_notifier(&br_offloads->switchdev_nb);
+	if (err) {
+		dev_err(dev,
+			"Failed to register switchdev notifier\n");
+		goto err_reg_switchdev_nb;
+	}
+
+	br_offloads->switchdev_blk.notifier_call =
+		ice_eswitch_br_event_blocking;
+	err = register_switchdev_blocking_notifier(&br_offloads->switchdev_blk);
+	if (err) {
+		dev_err(dev,
+			"Failed to register bridge blocking switchdev notifier\n");
+		goto err_reg_switchdev_blk;
+	}
+
+	br_offloads->netdev_nb.notifier_call = ice_eswitch_br_port_event;
+	err = register_netdevice_notifier(&br_offloads->netdev_nb);
+	if (err) {
+		dev_err(dev,
+			"Failed to register bridge port event notifier\n");
+		goto err_reg_netdev_nb;
+	}
+
+	INIT_DELAYED_WORK(&br_offloads->update_work,
+			  ice_eswitch_br_update_work);
+	queue_delayed_work(br_offloads->wq, &br_offloads->update_work,
+			   ICE_ESW_BRIDGE_UPDATE_INTERVAL);
+
+	return 0;
+
+err_reg_netdev_nb:
+	unregister_switchdev_blocking_notifier(&br_offloads->switchdev_blk);
+err_reg_switchdev_blk:
+	unregister_switchdev_notifier(&br_offloads->switchdev_nb);
+err_reg_switchdev_nb:
+	destroy_workqueue(br_offloads->wq);
+err_alloc_wq:
+	rtnl_lock();
+	ice_eswitch_br_offloads_dealloc(pf);
+	rtnl_unlock();
+
+	return err;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch_br.h b/drivers/net/ethernet/intel/ice/ice_eswitch_br.h
new file mode 100644
index 0000000000..85a8fadb29
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch_br.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023, Intel Corporation. */
+
+#ifndef _ICE_ESWITCH_BR_H_
+#define _ICE_ESWITCH_BR_H_
+
+#include <linux/rhashtable.h>
+#include <linux/workqueue.h>
+
+struct ice_esw_br_fdb_data {
+	unsigned char addr[ETH_ALEN];
+	u16 vid;
+};
+
+struct ice_esw_br_flow {
+	struct ice_rule_query_data *fwd_rule;
+	struct ice_rule_query_data *guard_rule;
+};
+
+enum {
+	ICE_ESWITCH_BR_FDB_ADDED_BY_USER = BIT(0),
+};
+
+struct ice_esw_br_fdb_entry {
+	struct ice_esw_br_fdb_data data;
+	struct rhash_head ht_node;
+	struct list_head list;
+
+	int flags;
+
+	struct net_device *dev;
+	struct ice_esw_br_port *br_port;
+	struct ice_esw_br_flow *flow;
+
+	unsigned long last_use;
+};
+
+enum ice_esw_br_port_type {
+	ICE_ESWITCH_BR_UPLINK_PORT = 0,
+	ICE_ESWITCH_BR_VF_REPR_PORT = 1,
+};
+
+struct ice_esw_br_port {
+	struct ice_esw_br *bridge;
+	struct ice_vsi *vsi;
+	enum ice_esw_br_port_type type;
+	u16 vsi_idx;
+	u16 pvid;
+	struct xarray vlans;
+};
+
+enum {
+	ICE_ESWITCH_BR_VLAN_FILTERING = BIT(0),
+};
+
+struct ice_esw_br {
+	struct ice_esw_br_offloads *br_offloads;
+	struct xarray ports;
+
+	struct rhashtable fdb_ht;
+	struct list_head fdb_list;
+
+	int ifindex;
+	u32 flags;
+	unsigned long ageing_time;
+};
+
+struct ice_esw_br_offloads {
+	struct ice_pf *pf;
+	struct ice_esw_br *bridge;
+	struct notifier_block netdev_nb;
+	struct notifier_block switchdev_blk;
+	struct notifier_block switchdev_nb;
+
+	struct workqueue_struct *wq;
+	struct delayed_work update_work;
+};
+
+struct ice_esw_br_fdb_work {
+	struct work_struct work;
+	struct switchdev_notifier_fdb_info fdb_info;
+	struct net_device *dev;
+	unsigned long event;
+};
+
+struct ice_esw_br_vlan {
+	u16 vid;
+	u16 flags;
+};
+
+#define ice_nb_to_br_offloads(nb, nb_name) \
+	container_of(nb, \
+		     struct ice_esw_br_offloads, \
+		     nb_name)
+
+#define ice_work_to_br_offloads(w) \
+	container_of(w, \
+		     struct ice_esw_br_offloads, \
+		     update_work.work)
+
+#define ice_work_to_fdb_work(w) \
+	container_of(w, \
+		     struct ice_esw_br_fdb_work, \
+		     work)
+
+static inline bool ice_eswitch_br_is_vid_valid(u16 vid)
+{
+	/* In trunk VLAN mode, for untagged traffic the bridge sends requests
+	 * to offload VLAN 1 with pvid and untagged flags set. Since these
+	 * flags are not supported, add a MAC filter instead.
+	 */
+	return vid > 1;
+}
+
+void
+ice_eswitch_br_offloads_deinit(struct ice_pf *pf);
+int
+ice_eswitch_br_offloads_init(struct ice_pf *pf);
+
+#endif /* _ICE_ESWITCH_BR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
new file mode 100644
index 0000000000..9be13e9840
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -0,0 +1,4241 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* ethtool support for ice */
+
+#include "ice.h"
+#include "ice_ethtool.h"
+#include "ice_flow.h"
+#include "ice_fltr.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+#include <net/dcbnl.h>
+
+struct ice_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define ICE_STAT(_type, _name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(_type, _stat), \
+	.stat_offset = offsetof(_type, _stat) \
+}
+
+#define ICE_VSI_STAT(_name, _stat) \
+		ICE_STAT(struct ice_vsi, _name, _stat)
+#define ICE_PF_STAT(_name, _stat) \
+		ICE_STAT(struct ice_pf, _name, _stat)
+
+static int ice_q_stats_len(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return ((np->vsi->alloc_txq + np->vsi->alloc_rxq) *
+		(sizeof(struct ice_q_stats) / sizeof(u64)));
+}
+
+#define ICE_PF_STATS_LEN	ARRAY_SIZE(ice_gstrings_pf_stats)
+#define ICE_VSI_STATS_LEN	ARRAY_SIZE(ice_gstrings_vsi_stats)
+
+#define ICE_PFC_STATS_LEN ( \
+		(sizeof_field(struct ice_pf, stats.priority_xoff_rx) + \
+		 sizeof_field(struct ice_pf, stats.priority_xon_rx) + \
+		 sizeof_field(struct ice_pf, stats.priority_xoff_tx) + \
+		 sizeof_field(struct ice_pf, stats.priority_xon_tx)) \
+		 / sizeof(u64))
+#define ICE_ALL_STATS_LEN(n)	(ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
+				 ICE_VSI_STATS_LEN + ice_q_stats_len(n))
+
+static const struct ice_stats ice_gstrings_vsi_stats[] = {
+	ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
+	ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
+	ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
+	ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+	ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
+	ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+	ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
+	ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+	ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
+	ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
+	ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
+	ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+	ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+	ICE_VSI_STAT("tx_linearize", tx_linearize),
+	ICE_VSI_STAT("tx_busy", tx_busy),
+	ICE_VSI_STAT("tx_restart", tx_restart),
+};
+
+enum ice_ethtool_test_id {
+	ICE_ETH_TEST_REG = 0,
+	ICE_ETH_TEST_EEPROM,
+	ICE_ETH_TEST_INTR,
+	ICE_ETH_TEST_LOOP,
+	ICE_ETH_TEST_LINK,
+};
+
+static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)",
+	"EEPROM test    (offline)",
+	"Interrupt test (offline)",
+	"Loopback test  (offline)",
+	"Link test   (on/offline)",
+};
+
+#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
+
+/* These PF_STATs might look like duplicates of some NETDEV_STATs,
+ * but they aren't. This device is capable of supporting multiple
+ * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
+ * netdevs whereas the PF_STATs are for the physical function that's
+ * hosting these netdevs.
+ *
+ * The PF_STATs are appended to the netdev stats only when ethtool -S
+ * is queried on the base PF netdev.
+ */
+static const struct ice_stats ice_gstrings_pf_stats[] = {
+	ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
+	ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
+	ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
+	ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
+	ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
+	ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
+	ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
+	ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
+	ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
+	ICE_PF_STAT("tx_timeout.nic", tx_timeout_count),
+	ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
+	ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
+	ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
+	ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
+	ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
+	ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
+	ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
+	ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
+	ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
+	ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
+	ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
+	ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
+	ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
+	ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
+	ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
+	ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
+	ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
+	ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
+	ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
+	ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
+	ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
+	ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
+	ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
+	ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+	ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
+	ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
+	ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
+	ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
+	ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
+	ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
+	ICE_PF_STAT("fdir_sb_match.nic", stats.fd_sb_match),
+	ICE_PF_STAT("fdir_sb_status.nic", stats.fd_sb_status),
+	ICE_PF_STAT("tx_hwtstamp_skipped", ptp.tx_hwtstamp_skipped),
+	ICE_PF_STAT("tx_hwtstamp_timeouts", ptp.tx_hwtstamp_timeouts),
+	ICE_PF_STAT("tx_hwtstamp_flushed", ptp.tx_hwtstamp_flushed),
+	ICE_PF_STAT("tx_hwtstamp_discarded", ptp.tx_hwtstamp_discarded),
+	ICE_PF_STAT("late_cached_phc_updates", ptp.late_cached_phc_updates),
+};
+
+static const u32 ice_regs_dump_list[] = {
+	PFGEN_STATE,
+	PRTGEN_STATUS,
+	QRX_CTRL(0),
+	QINT_TQCTL(0),
+	QINT_RQCTL(0),
+	PFINT_OICR_ENA,
+	QRX_ITR(0),
+#define GLDCB_TLPM_PCI_DM			0x000A0180
+	GLDCB_TLPM_PCI_DM,
+#define GLDCB_TLPM_TC2PFC			0x000A0194
+	GLDCB_TLPM_TC2PFC,
+#define TCDCB_TLPM_WAIT_DM(_i)			(0x000A0080 + ((_i) * 4))
+	TCDCB_TLPM_WAIT_DM(0),
+	TCDCB_TLPM_WAIT_DM(1),
+	TCDCB_TLPM_WAIT_DM(2),
+	TCDCB_TLPM_WAIT_DM(3),
+	TCDCB_TLPM_WAIT_DM(4),
+	TCDCB_TLPM_WAIT_DM(5),
+	TCDCB_TLPM_WAIT_DM(6),
+	TCDCB_TLPM_WAIT_DM(7),
+	TCDCB_TLPM_WAIT_DM(8),
+	TCDCB_TLPM_WAIT_DM(9),
+	TCDCB_TLPM_WAIT_DM(10),
+	TCDCB_TLPM_WAIT_DM(11),
+	TCDCB_TLPM_WAIT_DM(12),
+	TCDCB_TLPM_WAIT_DM(13),
+	TCDCB_TLPM_WAIT_DM(14),
+	TCDCB_TLPM_WAIT_DM(15),
+	TCDCB_TLPM_WAIT_DM(16),
+	TCDCB_TLPM_WAIT_DM(17),
+	TCDCB_TLPM_WAIT_DM(18),
+	TCDCB_TLPM_WAIT_DM(19),
+	TCDCB_TLPM_WAIT_DM(20),
+	TCDCB_TLPM_WAIT_DM(21),
+	TCDCB_TLPM_WAIT_DM(22),
+	TCDCB_TLPM_WAIT_DM(23),
+	TCDCB_TLPM_WAIT_DM(24),
+	TCDCB_TLPM_WAIT_DM(25),
+	TCDCB_TLPM_WAIT_DM(26),
+	TCDCB_TLPM_WAIT_DM(27),
+	TCDCB_TLPM_WAIT_DM(28),
+	TCDCB_TLPM_WAIT_DM(29),
+	TCDCB_TLPM_WAIT_DM(30),
+	TCDCB_TLPM_WAIT_DM(31),
+#define GLPCI_WATMK_CLNT_PIPEMON		0x000BFD90
+	GLPCI_WATMK_CLNT_PIPEMON,
+#define GLPCI_CUR_CLNT_COMMON			0x000BFD84
+	GLPCI_CUR_CLNT_COMMON,
+#define GLPCI_CUR_CLNT_PIPEMON			0x000BFD88
+	GLPCI_CUR_CLNT_PIPEMON,
+#define GLPCI_PCIERR				0x0009DEB0
+	GLPCI_PCIERR,
+#define GLPSM_DEBUG_CTL_STATUS			0x000B0600
+	GLPSM_DEBUG_CTL_STATUS,
+#define GLPSM0_DEBUG_FIFO_OVERFLOW_DETECT	0x000B0680
+	GLPSM0_DEBUG_FIFO_OVERFLOW_DETECT,
+#define GLPSM0_DEBUG_FIFO_UNDERFLOW_DETECT	0x000B0684
+	GLPSM0_DEBUG_FIFO_UNDERFLOW_DETECT,
+#define GLPSM0_DEBUG_DT_OUT_OF_WINDOW		0x000B0688
+	GLPSM0_DEBUG_DT_OUT_OF_WINDOW,
+#define GLPSM0_DEBUG_INTF_HW_ERROR_DETECT	0x000B069C
+	GLPSM0_DEBUG_INTF_HW_ERROR_DETECT,
+#define GLPSM0_DEBUG_MISC_HW_ERROR_DETECT	0x000B06A0
+	GLPSM0_DEBUG_MISC_HW_ERROR_DETECT,
+#define GLPSM1_DEBUG_FIFO_OVERFLOW_DETECT	0x000B0E80
+	GLPSM1_DEBUG_FIFO_OVERFLOW_DETECT,
+#define GLPSM1_DEBUG_FIFO_UNDERFLOW_DETECT	0x000B0E84
+	GLPSM1_DEBUG_FIFO_UNDERFLOW_DETECT,
+#define GLPSM1_DEBUG_SRL_FIFO_OVERFLOW_DETECT	0x000B0E88
+	GLPSM1_DEBUG_SRL_FIFO_OVERFLOW_DETECT,
+#define GLPSM1_DEBUG_SRL_FIFO_UNDERFLOW_DETECT  0x000B0E8C
+	GLPSM1_DEBUG_SRL_FIFO_UNDERFLOW_DETECT,
+#define GLPSM1_DEBUG_MISC_HW_ERROR_DETECT       0x000B0E90
+	GLPSM1_DEBUG_MISC_HW_ERROR_DETECT,
+#define GLPSM2_DEBUG_FIFO_OVERFLOW_DETECT       0x000B1680
+	GLPSM2_DEBUG_FIFO_OVERFLOW_DETECT,
+#define GLPSM2_DEBUG_FIFO_UNDERFLOW_DETECT      0x000B1684
+	GLPSM2_DEBUG_FIFO_UNDERFLOW_DETECT,
+#define GLPSM2_DEBUG_MISC_HW_ERROR_DETECT       0x000B1688
+	GLPSM2_DEBUG_MISC_HW_ERROR_DETECT,
+#define GLTDPU_TCLAN_COMP_BOB(_i)               (0x00049ADC + ((_i) * 4))
+	GLTDPU_TCLAN_COMP_BOB(1),
+	GLTDPU_TCLAN_COMP_BOB(2),
+	GLTDPU_TCLAN_COMP_BOB(3),
+	GLTDPU_TCLAN_COMP_BOB(4),
+	GLTDPU_TCLAN_COMP_BOB(5),
+	GLTDPU_TCLAN_COMP_BOB(6),
+	GLTDPU_TCLAN_COMP_BOB(7),
+	GLTDPU_TCLAN_COMP_BOB(8),
+#define GLTDPU_TCB_CMD_BOB(_i)                  (0x0004975C + ((_i) * 4))
+	GLTDPU_TCB_CMD_BOB(1),
+	GLTDPU_TCB_CMD_BOB(2),
+	GLTDPU_TCB_CMD_BOB(3),
+	GLTDPU_TCB_CMD_BOB(4),
+	GLTDPU_TCB_CMD_BOB(5),
+	GLTDPU_TCB_CMD_BOB(6),
+	GLTDPU_TCB_CMD_BOB(7),
+	GLTDPU_TCB_CMD_BOB(8),
+#define GLTDPU_PSM_UPDATE_BOB(_i)               (0x00049B5C + ((_i) * 4))
+	GLTDPU_PSM_UPDATE_BOB(1),
+	GLTDPU_PSM_UPDATE_BOB(2),
+	GLTDPU_PSM_UPDATE_BOB(3),
+	GLTDPU_PSM_UPDATE_BOB(4),
+	GLTDPU_PSM_UPDATE_BOB(5),
+	GLTDPU_PSM_UPDATE_BOB(6),
+	GLTDPU_PSM_UPDATE_BOB(7),
+	GLTDPU_PSM_UPDATE_BOB(8),
+#define GLTCB_CMD_IN_BOB(_i)                    (0x000AE288 + ((_i) * 4))
+	GLTCB_CMD_IN_BOB(1),
+	GLTCB_CMD_IN_BOB(2),
+	GLTCB_CMD_IN_BOB(3),
+	GLTCB_CMD_IN_BOB(4),
+	GLTCB_CMD_IN_BOB(5),
+	GLTCB_CMD_IN_BOB(6),
+	GLTCB_CMD_IN_BOB(7),
+	GLTCB_CMD_IN_BOB(8),
+#define GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(_i)   (0x000FC148 + ((_i) * 4))
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(1),
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(2),
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(3),
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(4),
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(5),
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(6),
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(7),
+	GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(8),
+#define GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(_i) (0x000FC248 + ((_i) * 4))
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(1),
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(2),
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(3),
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(4),
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(5),
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(6),
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(7),
+	GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(8),
+#define GLLAN_TCLAN_CACHE_CTL_BOB_CTL(_i)       (0x000FC1C8 + ((_i) * 4))
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(1),
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(2),
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(3),
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(4),
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(5),
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(6),
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(7),
+	GLLAN_TCLAN_CACHE_CTL_BOB_CTL(8),
+#define GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(_i)  (0x000FC188 + ((_i) * 4))
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(1),
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(2),
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(3),
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(4),
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(5),
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(6),
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(7),
+	GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(8),
+#define GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(_i) (0x000FC288 + ((_i) * 4))
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(1),
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(2),
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(3),
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(4),
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(5),
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(6),
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(7),
+	GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(8),
+#define PRTDCB_TCUPM_REG_CM(_i)			(0x000BC360 + ((_i) * 4))
+	PRTDCB_TCUPM_REG_CM(0),
+	PRTDCB_TCUPM_REG_CM(1),
+	PRTDCB_TCUPM_REG_CM(2),
+	PRTDCB_TCUPM_REG_CM(3),
+#define PRTDCB_TCUPM_REG_DM(_i)			(0x000BC3A0 + ((_i) * 4))
+	PRTDCB_TCUPM_REG_DM(0),
+	PRTDCB_TCUPM_REG_DM(1),
+	PRTDCB_TCUPM_REG_DM(2),
+	PRTDCB_TCUPM_REG_DM(3),
+#define PRTDCB_TLPM_REG_DM(_i)			(0x000A0000 + ((_i) * 4))
+	PRTDCB_TLPM_REG_DM(0),
+	PRTDCB_TLPM_REG_DM(1),
+	PRTDCB_TLPM_REG_DM(2),
+	PRTDCB_TLPM_REG_DM(3),
+};
+
+struct ice_priv_flag {
+	char name[ETH_GSTRING_LEN];
+	u32 bitno;			/* bit position in pf->flags */
+};
+
+#define ICE_PRIV_FLAG(_name, _bitno) { \
+	.name = _name, \
+	.bitno = _bitno, \
+}
+
+static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
+	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
+	ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+	ICE_PRIV_FLAG("vf-true-promisc-support",
+		      ICE_FLAG_VF_TRUE_PROMISC_ENA),
+	ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF),
+	ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING),
+	ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
+};
+
+#define ICE_PRIV_FLAG_ARRAY_SIZE	ARRAY_SIZE(ice_gstrings_priv_flags)
+
+static void
+__ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo,
+		  struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_orom_info *orom;
+	struct ice_nvm_info *nvm;
+
+	nvm = &hw->flash.nvm;
+	orom = &hw->flash.orom;
+
+	strscpy(drvinfo->driver, KBUILD_MODNAME, sizeof(drvinfo->driver));
+
+	/* Display NVM version (from which the firmware version can be
+	 * determined) which contains more pertinent information.
+	 */
+	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		 "%x.%02x 0x%x %d.%d.%d", nvm->major, nvm->minor,
+		 nvm->eetrack, orom->major, orom->build, orom->patch);
+
+	strscpy(drvinfo->bus_info, pci_name(pf->pdev),
+		sizeof(drvinfo->bus_info));
+}
+
+static void
+ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	__ice_get_drvinfo(netdev, drvinfo, np->vsi);
+	drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE;
+}
+
+static int ice_get_regs_len(struct net_device __always_unused *netdev)
+{
+	return sizeof(ice_regs_dump_list);
+}
+
+static void
+ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 *regs_buf = (u32 *)p;
+	unsigned int i;
+
+	regs->version = 1;
+
+	for (i = 0; i < ARRAY_SIZE(ice_regs_dump_list); ++i)
+		regs_buf[i] = rd32(hw, ice_regs_dump_list[i]);
+}
+
+static u32 ice_get_msglevel(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (pf->hw.debug_mask)
+		netdev_info(netdev, "hw debug_mask: 0x%llX\n",
+			    pf->hw.debug_mask);
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+	return pf->msg_enable;
+}
+
+static void ice_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (ICE_DBG_USER & data)
+		pf->hw.debug_mask = data;
+	else
+		pf->msg_enable = data;
+#else
+	pf->msg_enable = data;
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+}
+
+static int ice_get_eeprom_len(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	return (int)pf->hw.flash.flash_size;
+}
+
+static int
+ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
+	       u8 *bytes)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct device *dev;
+	int ret;
+	u8 *buf;
+
+	dev = ice_pf_to_dev(pf);
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+	netdev_dbg(netdev, "GEEPROM cmd 0x%08x, offset 0x%08x, len 0x%08x\n",
+		   eeprom->cmd, eeprom->offset, eeprom->len);
+
+	buf = kzalloc(eeprom->len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (ret) {
+		dev_err(dev, "ice_acquire_nvm failed, err %d aq_err %s\n",
+			ret, ice_aq_str(hw->adminq.sq_last_status));
+		goto out;
+	}
+
+	ret = ice_read_flat_nvm(hw, eeprom->offset, &eeprom->len, buf,
+				false);
+	if (ret) {
+		dev_err(dev, "ice_read_flat_nvm failed, err %d aq_err %s\n",
+			ret, ice_aq_str(hw->adminq.sq_last_status));
+		goto release;
+	}
+
+	memcpy(bytes, buf, eeprom->len);
+release:
+	ice_release_nvm(hw);
+out:
+	kfree(buf);
+	return ret;
+}
+
+/**
+ * ice_active_vfs - check if there are any active VFs
+ * @pf: board private structure
+ *
+ * Returns true if an active VF is found, otherwise returns false
+ */
+static bool ice_active_vfs(struct ice_pf *pf)
+{
+	bool active = false;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	rcu_read_lock();
+	ice_for_each_vf_rcu(pf, bkt, vf) {
+		if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+			active = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return active;
+}
+
+/**
+ * ice_link_test - perform a link test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_link_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool link_up = false;
+	int status;
+
+	netdev_info(netdev, "link test\n");
+	status = ice_get_link_status(np->vsi->port_info, &link_up);
+	if (status) {
+		netdev_err(netdev, "link query error, status = %d\n",
+			   status);
+		return 1;
+	}
+
+	if (!link_up)
+		return 2;
+
+	return 0;
+}
+
+/**
+ * ice_eeprom_test - perform an EEPROM test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_eeprom_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	netdev_info(netdev, "EEPROM test\n");
+	return !!(ice_nvm_validate_checksum(&pf->hw));
+}
+
+/**
+ * ice_reg_pattern_test
+ * @hw: pointer to the HW struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ */
+static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
+{
+	struct ice_pf *pf = (struct ice_pf *)hw->back;
+	struct device *dev = ice_pf_to_dev(pf);
+	static const u32 patterns[] = {
+		0x5A5A5A5A, 0xA5A5A5A5,
+		0x00000000, 0xFFFFFFFF
+	};
+	u32 val, orig_val;
+	unsigned int i;
+
+	orig_val = rd32(hw, reg);
+	for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
+		u32 pattern = patterns[i] & mask;
+
+		wr32(hw, reg, pattern);
+		val = rd32(hw, reg);
+		if (val == pattern)
+			continue;
+		dev_err(dev, "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+			, __func__, reg, pattern, val);
+		return 1;
+	}
+
+	wr32(hw, reg, orig_val);
+	val = rd32(hw, reg);
+	if (val != orig_val) {
+		dev_err(dev, "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+			, __func__, reg, orig_val, val);
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reg_test - perform a register test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_reg_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_hw *hw = np->vsi->port_info->hw;
+	u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
+		hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
+	struct ice_diag_reg_test_info {
+		u32 address;
+		u32 mask;
+		u32 elem_num;
+		u32 elem_size;
+	} ice_reg_list[] = {
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
+		{GLINT_ITR(1, 0), 0x00000fff, int_elements,
+			GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
+		{GLINT_ITR(0, 0), 0x00000fff, int_elements,
+			GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
+		{GLINT_CTL, 0xffff0001, 1, 0}
+	};
+	unsigned int i;
+
+	netdev_dbg(netdev, "Register test\n");
+	for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
+		u32 j;
+
+		for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
+			u32 mask = ice_reg_list[i].mask;
+			u32 reg = ice_reg_list[i].address +
+				(j * ice_reg_list[i].elem_size);
+
+			/* bail on failure (non-zero return) */
+			if (ice_reg_pattern_test(hw, reg, mask))
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_prepare_rings - configure Tx/Rx test rings
+ * @vsi: pointer to the VSI structure
+ *
+ * Function configures rings of a VSI for loopback test without
+ * enabling interrupts or informing the kernel about new queues.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_setup_tx_rings(vsi);
+	if (status)
+		goto err_setup_tx_ring;
+
+	status = ice_vsi_setup_rx_rings(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_cfg_lan(vsi);
+	if (status)
+		goto err_setup_rx_ring;
+
+	status = ice_vsi_start_all_rx_rings(vsi);
+	if (status)
+		goto err_start_rx_ring;
+
+	return 0;
+
+err_start_rx_ring:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_rx_ring:
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+err_setup_tx_ring:
+	ice_vsi_free_tx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
+ * @vsi: pointer to the VSI structure
+ *
+ * Function stops and frees VSI rings after a loopback test.
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
+{
+	int status;
+
+	status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	status = ice_vsi_stop_all_rx_rings(vsi);
+	if (status)
+		netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, status);
+
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+
+	return status;
+}
+
+/**
+ * ice_lbtest_create_frame - create test packet
+ * @pf: pointer to the PF structure
+ * @ret_data: allocated frame buffer
+ * @size: size of the packet data
+ *
+ * Function allocates a frame with a test pattern on specific offsets.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
+{
+	u8 *data;
+
+	if (!pf)
+		return -EINVAL;
+
+	data = devm_kzalloc(ice_pf_to_dev(pf), size, GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Since the ethernet test frame should always be at least
+	 * 64 bytes long, fill some octets in the payload with test data.
+	 */
+	memset(data, 0xFF, size);
+	data[32] = 0xDE;
+	data[42] = 0xAD;
+	data[44] = 0xBE;
+	data[46] = 0xEF;
+
+	*ret_data = data;
+
+	return 0;
+}
+
+/**
+ * ice_lbtest_check_frame - verify received loopback frame
+ * @frame: pointer to the raw packet data
+ *
+ * Function verifies received test frame with a pattern.
+ * Returns true if frame matches the pattern, false otherwise.
+ */
+static bool ice_lbtest_check_frame(u8 *frame)
+{
+	/* Validate bytes of a frame under offsets chosen earlier */
+	if (frame[32] == 0xDE &&
+	    frame[42] == 0xAD &&
+	    frame[44] == 0xBE &&
+	    frame[46] == 0xEF &&
+	    frame[48] == 0xFF)
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_diag_send - send test frames to the test ring
+ * @tx_ring: pointer to the transmit ring
+ * @data: pointer to the raw packet data
+ * @size: size of the packet to send
+ *
+ * Function sends loopback packets on a test Tx ring.
+ */
+static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size)
+{
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	dma_addr_t dma;
+	u64 td_cmd;
+
+	tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
+	tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
+
+	dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(tx_ring->dev, dma))
+		return -EINVAL;
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+
+	/* These flags are required for a descriptor to be pushed out */
+	td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+	tx_desc->cmd_type_offset_bsz =
+		cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			    (td_cmd << ICE_TXD_QW1_CMD_S) |
+			    ((u64)0 << ICE_TXD_QW1_OFFSET_S) |
+			    ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			    ((u64)0 << ICE_TXD_QW1_L2TAG1_S));
+
+	tx_buf->next_to_watch = tx_desc;
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	tx_ring->next_to_use++;
+	if (tx_ring->next_to_use >= tx_ring->count)
+		tx_ring->next_to_use = 0;
+
+	writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+	/* Wait until the packets get transmitted to the receive queue. */
+	usleep_range(1000, 2000);
+	dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
+
+	return 0;
+}
+
+#define ICE_LB_FRAME_SIZE 64
+/**
+ * ice_lbtest_receive_frames - receive and verify test frames
+ * @rx_ring: pointer to the receive ring
+ *
+ * Function receives loopback packets and verify their correctness.
+ * Returns number of received valid frames.
+ */
+static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
+{
+	struct ice_rx_buf *rx_buf;
+	int valid_frames, i;
+	u8 *received_buf;
+
+	valid_frames = 0;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		union ice_32b_rx_flex_desc *rx_desc;
+
+		rx_desc = ICE_RX_DESC(rx_ring, i);
+
+		if (!(rx_desc->wb.status_error0 &
+		    (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
+		     cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
+			continue;
+
+		rx_buf = &rx_ring->rx_buf[i];
+		received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
+
+		if (ice_lbtest_check_frame(received_buf))
+			valid_frames++;
+	}
+
+	return valid_frames;
+}
+
+/**
+ * ice_loopback_test - perform a loopback test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_loopback_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
+	struct ice_pf *pf = orig_vsi->back;
+	u8 broadcast[ETH_ALEN], ret = 0;
+	int num_frames, valid_frames;
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	struct device *dev;
+	u8 *tx_frame;
+	int i;
+
+	dev = ice_pf_to_dev(pf);
+	netdev_info(netdev, "loopback test\n");
+
+	test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
+	if (!test_vsi) {
+		netdev_err(netdev, "Failed to create a VSI for the loopback test\n");
+		return 1;
+	}
+
+	test_vsi->netdev = netdev;
+	tx_ring = test_vsi->tx_rings[0];
+	rx_ring = test_vsi->rx_rings[0];
+
+	if (ice_lbtest_prepare_rings(test_vsi)) {
+		ret = 2;
+		goto lbtest_vsi_close;
+	}
+
+	if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
+		ret = 3;
+		goto lbtest_rings_dis;
+	}
+
+	/* Enable MAC loopback in firmware */
+	if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
+		ret = 4;
+		goto lbtest_mac_dis;
+	}
+
+	/* Test VSI needs to receive broadcast packets */
+	eth_broadcast_addr(broadcast);
+	if (ice_fltr_add_mac(test_vsi, broadcast, ICE_FWD_TO_VSI)) {
+		ret = 5;
+		goto lbtest_mac_dis;
+	}
+
+	if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
+		ret = 7;
+		goto remove_mac_filters;
+	}
+
+	num_frames = min_t(int, tx_ring->count, 32);
+	for (i = 0; i < num_frames; i++) {
+		if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
+			ret = 8;
+			goto lbtest_free_frame;
+		}
+	}
+
+	valid_frames = ice_lbtest_receive_frames(rx_ring);
+	if (!valid_frames)
+		ret = 9;
+	else if (valid_frames != num_frames)
+		ret = 10;
+
+lbtest_free_frame:
+	devm_kfree(dev, tx_frame);
+remove_mac_filters:
+	if (ice_fltr_remove_mac(test_vsi, broadcast, ICE_FWD_TO_VSI))
+		netdev_err(netdev, "Could not remove MAC filter for the test VSI\n");
+lbtest_mac_dis:
+	/* Disable MAC loopback after the test is completed. */
+	if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
+		netdev_err(netdev, "Could not disable MAC loopback\n");
+lbtest_rings_dis:
+	if (ice_lbtest_disable_rings(test_vsi))
+		netdev_err(netdev, "Could not disable test rings\n");
+lbtest_vsi_close:
+	test_vsi->netdev = NULL;
+	if (ice_vsi_release(test_vsi))
+		netdev_err(netdev, "Failed to remove the test VSI\n");
+
+	return ret;
+}
+
+/**
+ * ice_intr_test - perform an interrupt test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_intr_test(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	u16 swic_old = pf->sw_int_count;
+
+	netdev_info(netdev, "interrupt test\n");
+
+	wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_irq.index),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M |
+	     GLINT_DYN_CTL_INTENA_MSK_M |
+	     GLINT_DYN_CTL_SWINT_TRIG_M);
+
+	usleep_range(1000, 2000);
+	return (swic_old == pf->sw_int_count);
+}
+
+/**
+ * ice_self_test - handler function for performing a self-test by ethtool
+ * @netdev: network interface device structure
+ * @eth_test: ethtool_test structure
+ * @data: required by ethtool.self_test
+ *
+ * This function is called after invoking 'ethtool -t devname' command where
+ * devname is the name of the network device on which ethtool should operate.
+ * It performs a set of self-tests to check if a device works properly.
+ */
+static void
+ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
+	      u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		netdev_info(netdev, "offline testing starting\n");
+
+		set_bit(ICE_TESTING, pf->state);
+
+		if (ice_active_vfs(pf)) {
+			dev_warn(dev, "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+			data[ICE_ETH_TEST_REG] = 1;
+			data[ICE_ETH_TEST_EEPROM] = 1;
+			data[ICE_ETH_TEST_INTR] = 1;
+			data[ICE_ETH_TEST_LOOP] = 1;
+			data[ICE_ETH_TEST_LINK] = 1;
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+			clear_bit(ICE_TESTING, pf->state);
+			goto skip_ol_tests;
+		}
+		/* If the device is online then take it offline */
+		if (if_running)
+			/* indicate we're in test mode */
+			ice_stop(netdev);
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
+		data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
+		data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
+		data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
+
+		if (data[ICE_ETH_TEST_LINK] ||
+		    data[ICE_ETH_TEST_EEPROM] ||
+		    data[ICE_ETH_TEST_LOOP] ||
+		    data[ICE_ETH_TEST_INTR] ||
+		    data[ICE_ETH_TEST_REG])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		clear_bit(ICE_TESTING, pf->state);
+
+		if (if_running) {
+			int status = ice_open(netdev);
+
+			if (status) {
+				dev_err(dev, "Could not open device %s, err %d\n",
+					pf->int_name, status);
+			}
+		}
+	} else {
+		/* Online tests */
+		netdev_info(netdev, "online testing starting\n");
+
+		data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+		if (data[ICE_ETH_TEST_LINK])
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Offline only tests, not run in online; pass by default */
+		data[ICE_ETH_TEST_REG] = 0;
+		data[ICE_ETH_TEST_EEPROM] = 0;
+		data[ICE_ETH_TEST_INTR] = 0;
+		data[ICE_ETH_TEST_LOOP] = 0;
+	}
+
+skip_ol_tests:
+	netdev_info(netdev, "testing finished\n");
+}
+
+static void
+__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data,
+		  struct ice_vsi *vsi)
+{
+	unsigned int i;
+	u8 *p = data;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ICE_VSI_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ice_gstrings_vsi_stats[i].stat_string);
+
+		if (ice_is_port_repr_netdev(netdev))
+			return;
+
+		ice_for_each_alloc_txq(vsi, i) {
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+		}
+
+		ice_for_each_alloc_rxq(vsi, i) {
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+		}
+
+		if (vsi->type != ICE_VSI_PF)
+			return;
+
+		for (i = 0; i < ICE_PF_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ice_gstrings_pf_stats[i].stat_string);
+
+		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+			ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i);
+			ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i);
+		}
+		for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+			ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i);
+			ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i);
+		}
+		break;
+	case ETH_SS_TEST:
+		memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++)
+			ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name);
+		break;
+	default:
+		break;
+	}
+}
+
+static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	__ice_get_strings(netdev, stringset, data, np->vsi);
+}
+
+static int
+ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	bool led_active;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		led_active = true;
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		led_active = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ice_aq_set_port_id_led(np->vsi->port_info, !led_active, NULL))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_set_fec_cfg - Set link FEC options
+ * @netdev: network interface device structure
+ * @req_fec: FEC mode to configure
+ */
+static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_set_phy_cfg_data config = { 0 };
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+
+	pi = vsi->port_info;
+	if (!pi)
+		return -EOPNOTSUPP;
+
+	/* Changing the FEC parameters is not supported if not the PF VSI */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Proceed only if requesting different FEC mode */
+	if (pi->phy.curr_user_fec_req == req_fec)
+		return 0;
+
+	/* Copy the current user PHY configuration. The current user PHY
+	 * configuration is initialized during probe from PHY capabilities
+	 * software mode, and updated on set PHY configuration.
+	 */
+	memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config));
+
+	ice_cfg_phy_fec(pi, &config, req_fec);
+	config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+	if (ice_aq_set_phy_cfg(pi->hw, pi, &config, NULL))
+		return -EAGAIN;
+
+	/* Save requested FEC config */
+	pi->phy.curr_user_fec_req = req_fec;
+
+	return 0;
+}
+
+/**
+ * ice_set_fecparam - Set FEC link options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	enum ice_fec_mode fec;
+
+	switch (fecparam->fec) {
+	case ETHTOOL_FEC_AUTO:
+		fec = ICE_FEC_AUTO;
+		break;
+	case ETHTOOL_FEC_RS:
+		fec = ICE_FEC_RS;
+		break;
+	case ETHTOOL_FEC_BASER:
+		fec = ICE_FEC_BASER;
+		break;
+	case ETHTOOL_FEC_OFF:
+	case ETHTOOL_FEC_NONE:
+		fec = ICE_FEC_NONE;
+		break;
+	default:
+		dev_warn(ice_pf_to_dev(vsi->back), "Unsupported FEC mode: %d\n",
+			 fecparam->fec);
+		return -EINVAL;
+	}
+
+	return ice_set_fec_cfg(netdev, fec);
+}
+
+/**
+ * ice_get_fecparam - Get link FEC options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_port_info *pi;
+	int err;
+
+	pi = vsi->port_info;
+
+	if (!pi)
+		return -EOPNOTSUPP;
+	link_info = &pi->phy.link_info;
+
+	/* Set FEC mode based on negotiated link info */
+	switch (link_info->fec_info) {
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_BASER;
+		break;
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fecparam->active_fec = ETHTOOL_FEC_RS;
+		break;
+	default:
+		fecparam->active_fec = ETHTOOL_FEC_OFF;
+		break;
+	}
+
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+				  caps, NULL);
+	if (err)
+		goto done;
+
+	/* Set supported/configured FEC modes based on PHY capability */
+	if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
+		fecparam->fec |= ETHTOOL_FEC_AUTO;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fecparam->fec |= ETHTOOL_FEC_BASER;
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		fecparam->fec |= ETHTOOL_FEC_RS;
+	if (caps->link_fec_options == 0)
+		fecparam->fec |= ETHTOOL_FEC_OFF;
+
+done:
+	kfree(caps);
+	return err;
+}
+
+/**
+ * ice_nway_reset - restart autonegotiation
+ * @netdev: network interface device structure
+ */
+static int ice_nway_reset(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int err;
+
+	/* If VSI state is up, then restart autoneg with link up */
+	if (!test_bit(ICE_DOWN, vsi->back->state))
+		err = ice_set_link(vsi, true);
+	else
+		err = ice_set_link(vsi, false);
+
+	return err;
+}
+
+/**
+ * ice_get_priv_flags - report device private flags
+ * @netdev: network interface device structure
+ *
+ * The get string set count and the string set should be matched for each
+ * flag returned.  Add new strings for each flag to the ice_gstrings_priv_flags
+ * array.
+ *
+ * Returns a u32 bitmap of flags.
+ */
+static u32 ice_get_priv_flags(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u32 i, ret_flags = 0;
+
+	for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+		const struct ice_priv_flag *priv_flag;
+
+		priv_flag = &ice_gstrings_priv_flags[i];
+
+		if (test_bit(priv_flag->bitno, pf->flags))
+			ret_flags |= BIT(i);
+	}
+
+	return ret_flags;
+}
+
+/**
+ * ice_set_priv_flags - set private flags
+ * @netdev: network interface device structure
+ * @flags: bit flags to be set
+ */
+static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	DECLARE_BITMAP(change_flags, ICE_PF_FLAGS_NBITS);
+	DECLARE_BITMAP(orig_flags, ICE_PF_FLAGS_NBITS);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int ret = 0;
+	u32 i;
+
+	if (flags > BIT(ICE_PRIV_FLAG_ARRAY_SIZE))
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+	set_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+
+	bitmap_copy(orig_flags, pf->flags, ICE_PF_FLAGS_NBITS);
+	for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
+		const struct ice_priv_flag *priv_flag;
+
+		priv_flag = &ice_gstrings_priv_flags[i];
+
+		if (flags & BIT(i))
+			set_bit(priv_flag->bitno, pf->flags);
+		else
+			clear_bit(priv_flag->bitno, pf->flags);
+	}
+
+	bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
+
+	/* Do not allow change to link-down-on-close when Total Port Shutdown
+	 * is enabled.
+	 */
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, change_flags) &&
+	    test_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) {
+		dev_err(dev, "Setting link-down-on-close not supported on this port\n");
+		set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+		ret = -EINVAL;
+		goto ethtool_exit;
+	}
+
+	if (test_bit(ICE_FLAG_FW_LLDP_AGENT, change_flags)) {
+		if (!test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) {
+			int status;
+
+			/* Disable FW LLDP engine */
+			status = ice_cfg_lldp_mib_change(&pf->hw, false);
+
+			/* If unregistering for LLDP events fails, this is
+			 * not an error state, as there shouldn't be any
+			 * events to respond to.
+			 */
+			if (status)
+				dev_info(dev, "Failed to unreg for LLDP events\n");
+
+			/* The AQ call to stop the FW LLDP agent will generate
+			 * an error if the agent is already stopped.
+			 */
+			status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
+			if (status)
+				dev_warn(dev, "Fail to stop LLDP agent\n");
+			/* Use case for having the FW LLDP agent stopped
+			 * will likely not need DCB, so failure to init is
+			 * not a concern of ethtool
+			 */
+			status = ice_init_pf_dcb(pf, true);
+			if (status)
+				dev_warn(dev, "Fail to init DCB\n");
+
+			pf->dcbx_cap &= ~DCB_CAP_DCBX_LLD_MANAGED;
+			pf->dcbx_cap |= DCB_CAP_DCBX_HOST;
+		} else {
+			bool dcbx_agent_status;
+			int status;
+
+			if (ice_get_pfc_mode(pf) == ICE_QOS_MODE_DSCP) {
+				clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags);
+				dev_err(dev, "QoS in L3 DSCP mode, FW Agent not allowed to start\n");
+				ret = -EOPNOTSUPP;
+				goto ethtool_exit;
+			}
+
+			/* Remove rule to direct LLDP packets to default VSI.
+			 * The FW LLDP engine will now be consuming them.
+			 */
+			ice_cfg_sw_lldp(vsi, false, false);
+
+			/* AQ command to start FW LLDP agent will return an
+			 * error if the agent is already started
+			 */
+			status = ice_aq_start_lldp(&pf->hw, true, NULL);
+			if (status)
+				dev_warn(dev, "Fail to start LLDP Agent\n");
+
+			/* AQ command to start FW DCBX agent will fail if
+			 * the agent is already started
+			 */
+			status = ice_aq_start_stop_dcbx(&pf->hw, true,
+							&dcbx_agent_status,
+							NULL);
+			if (status)
+				dev_dbg(dev, "Failed to start FW DCBX\n");
+
+			dev_info(dev, "FW DCBX agent is %s\n",
+				 dcbx_agent_status ? "ACTIVE" : "DISABLED");
+
+			/* Failure to configure MIB change or init DCB is not
+			 * relevant to ethtool.  Print notification that
+			 * registration/init failed but do not return error
+			 * state to ethtool
+			 */
+			status = ice_init_pf_dcb(pf, true);
+			if (status)
+				dev_dbg(dev, "Fail to init DCB\n");
+
+			/* Register for MIB change events */
+			status = ice_cfg_lldp_mib_change(&pf->hw, true);
+			if (status)
+				dev_dbg(dev, "Fail to enable MIB change events\n");
+
+			pf->dcbx_cap &= ~DCB_CAP_DCBX_HOST;
+			pf->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
+
+			ice_nway_reset(netdev);
+		}
+	}
+	if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
+		/* down and up VSI so that changes of Rx cfg are reflected. */
+		ice_down_up(vsi);
+	}
+	/* don't allow modification of this flag when a single VF is in
+	 * promiscuous mode because it's not supported
+	 */
+	if (test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, change_flags) &&
+	    ice_is_any_vf_in_unicast_promisc(pf)) {
+		dev_err(dev, "Changing vf-true-promisc-support flag while VF(s) are in promiscuous mode not supported\n");
+		/* toggle bit back to previous state */
+		change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags);
+		ret = -EAGAIN;
+	}
+
+	if (test_bit(ICE_FLAG_VF_VLAN_PRUNING, change_flags) &&
+	    ice_has_vfs(pf)) {
+		dev_err(dev, "vf-vlan-pruning: VLAN pruning cannot be changed while VFs are active.\n");
+		/* toggle bit back to previous state */
+		change_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags);
+		ret = -EOPNOTSUPP;
+	}
+ethtool_exit:
+	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
+	return ret;
+}
+
+static int ice_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		/* The number (and order) of strings reported *must* remain
+		 * constant for a given netdevice. This function must not
+		 * report a different number based on run time parameters
+		 * (such as the number of queues in use, or the setting of
+		 * a private ethtool flag). This is due to the nature of the
+		 * ethtool stats API.
+		 *
+		 * Userspace programs such as ethtool must make 3 separate
+		 * ioctl requests, one for size, one for the strings, and
+		 * finally one for the stats. Since these cross into
+		 * userspace, changes to the number or size could result in
+		 * undefined memory access or incorrect string<->value
+		 * correlations for statistics.
+		 *
+		 * Even if it appears to be safe, changes to the size or
+		 * order of strings will suffer from race conditions and are
+		 * not safe.
+		 */
+		return ICE_ALL_STATS_LEN(netdev);
+	case ETH_SS_TEST:
+		return ICE_TEST_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return ICE_PRIV_FLAG_ARRAY_SIZE;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void
+__ice_get_ethtool_stats(struct net_device *netdev,
+			struct ethtool_stats __always_unused *stats, u64 *data,
+			struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	unsigned int j;
+	int i = 0;
+	char *p;
+
+	ice_update_pf_stats(pf);
+	ice_update_vsi_stats(vsi);
+
+	for (j = 0; j < ICE_VSI_STATS_LEN; j++) {
+		p = (char *)vsi + ice_gstrings_vsi_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_vsi_stats[j].sizeof_stat ==
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	if (ice_is_port_repr_netdev(netdev))
+		return;
+
+	/* populate per queue stats */
+	rcu_read_lock();
+
+	ice_for_each_alloc_txq(vsi, j) {
+		tx_ring = READ_ONCE(vsi->tx_rings[j]);
+		if (tx_ring && tx_ring->ring_stats) {
+			data[i++] = tx_ring->ring_stats->stats.pkts;
+			data[i++] = tx_ring->ring_stats->stats.bytes;
+		} else {
+			data[i++] = 0;
+			data[i++] = 0;
+		}
+	}
+
+	ice_for_each_alloc_rxq(vsi, j) {
+		rx_ring = READ_ONCE(vsi->rx_rings[j]);
+		if (rx_ring && rx_ring->ring_stats) {
+			data[i++] = rx_ring->ring_stats->stats.pkts;
+			data[i++] = rx_ring->ring_stats->stats.bytes;
+		} else {
+			data[i++] = 0;
+			data[i++] = 0;
+		}
+	}
+
+	rcu_read_unlock();
+
+	if (vsi->type != ICE_VSI_PF)
+		return;
+
+	for (j = 0; j < ICE_PF_STATS_LEN; j++) {
+		p = (char *)pf + ice_gstrings_pf_stats[j].stat_offset;
+		data[i++] = (ice_gstrings_pf_stats[j].sizeof_stat ==
+			     sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_tx[j];
+		data[i++] = pf->stats.priority_xoff_tx[j];
+	}
+
+	for (j = 0; j < ICE_MAX_USER_PRIORITY; j++) {
+		data[i++] = pf->stats.priority_xon_rx[j];
+		data[i++] = pf->stats.priority_xoff_rx[j];
+	}
+}
+
+static void
+ice_get_ethtool_stats(struct net_device *netdev,
+		      struct ethtool_stats __always_unused *stats, u64 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	__ice_get_ethtool_stats(netdev, stats, data, np->vsi);
+}
+
+#define ICE_PHY_TYPE_LOW_MASK_MIN_1G	(ICE_PHY_TYPE_LOW_100BASE_TX | \
+					 ICE_PHY_TYPE_LOW_100M_SGMII)
+
+#define ICE_PHY_TYPE_LOW_MASK_MIN_25G	(ICE_PHY_TYPE_LOW_MASK_MIN_1G | \
+					 ICE_PHY_TYPE_LOW_1000BASE_T | \
+					 ICE_PHY_TYPE_LOW_1000BASE_SX | \
+					 ICE_PHY_TYPE_LOW_1000BASE_LX | \
+					 ICE_PHY_TYPE_LOW_1000BASE_KX | \
+					 ICE_PHY_TYPE_LOW_1G_SGMII | \
+					 ICE_PHY_TYPE_LOW_2500BASE_T | \
+					 ICE_PHY_TYPE_LOW_2500BASE_X | \
+					 ICE_PHY_TYPE_LOW_2500BASE_KX | \
+					 ICE_PHY_TYPE_LOW_5GBASE_T | \
+					 ICE_PHY_TYPE_LOW_5GBASE_KR | \
+					 ICE_PHY_TYPE_LOW_10GBASE_T | \
+					 ICE_PHY_TYPE_LOW_10G_SFI_DA | \
+					 ICE_PHY_TYPE_LOW_10GBASE_SR | \
+					 ICE_PHY_TYPE_LOW_10GBASE_LR | \
+					 ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 | \
+					 ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | \
+					 ICE_PHY_TYPE_LOW_10G_SFI_C2C)
+
+#define ICE_PHY_TYPE_LOW_MASK_100G	(ICE_PHY_TYPE_LOW_100GBASE_CR4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_SR4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_LR4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_KR4 | \
+					 ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | \
+					 ICE_PHY_TYPE_LOW_100G_CAUI4 | \
+					 ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | \
+					 ICE_PHY_TYPE_LOW_100G_AUI4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_CP2 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_SR2 | \
+					 ICE_PHY_TYPE_LOW_100GBASE_DR)
+
+#define ICE_PHY_TYPE_HIGH_MASK_100G	(ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4 | \
+					 ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC |\
+					 ICE_PHY_TYPE_HIGH_100G_CAUI2 | \
+					 ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | \
+					 ICE_PHY_TYPE_HIGH_100G_AUI2)
+
+/**
+ * ice_mask_min_supported_speeds
+ * @hw: pointer to the HW structure
+ * @phy_types_high: PHY type high
+ * @phy_types_low: PHY type low to apply minimum supported speeds mask
+ *
+ * Apply minimum supported speeds mask to PHY type low. These are the speeds
+ * for ethtool supported link mode.
+ */
+static void
+ice_mask_min_supported_speeds(struct ice_hw *hw,
+			      u64 phy_types_high, u64 *phy_types_low)
+{
+	/* if QSFP connection with 100G speed, minimum supported speed is 25G */
+	if (*phy_types_low & ICE_PHY_TYPE_LOW_MASK_100G ||
+	    phy_types_high & ICE_PHY_TYPE_HIGH_MASK_100G)
+		*phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_25G;
+	else if (!ice_is_100m_speed_supported(hw))
+		*phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G;
+}
+
+/**
+ * ice_linkmode_set_bit - set link mode bit
+ * @phy_to_ethtool: PHY type to ethtool link mode struct to set
+ * @ks: ethtool link ksettings struct to fill out
+ * @req_speeds: speed requested by user
+ * @advert_phy_type: advertised PHY type
+ * @phy_type: PHY type
+ */
+static void
+ice_linkmode_set_bit(const struct ice_phy_type_to_ethtool *phy_to_ethtool,
+		     struct ethtool_link_ksettings *ks, u32 req_speeds,
+		     u64 advert_phy_type, u32 phy_type)
+{
+	linkmode_set_bit(phy_to_ethtool->link_mode, ks->link_modes.supported);
+
+	if (req_speeds & phy_to_ethtool->aq_link_speed ||
+	    (!req_speeds && advert_phy_type & BIT(phy_type)))
+		linkmode_set_bit(phy_to_ethtool->link_mode,
+				 ks->link_modes.advertising);
+}
+
+/**
+ * ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes
+ * @netdev: network interface device structure
+ * @ks: ethtool link ksettings struct to fill out
+ */
+static void
+ice_phy_type_to_ethtool(struct net_device *netdev,
+			struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u64 advert_phy_type_lo = 0;
+	u64 advert_phy_type_hi = 0;
+	u64 phy_types_high = 0;
+	u64 phy_types_low = 0;
+	u32 req_speeds;
+	u32 i;
+
+	req_speeds = vsi->port_info->phy.link_info.req_speeds;
+
+	/* Check if lenient mode is supported and enabled, or in strict mode.
+	 *
+	 * In lenient mode the Supported link modes are the PHY types without
+	 * media. The Advertising link mode is either 1. the user requested
+	 * speed, 2. the override PHY mask, or 3. the PHY types with media.
+	 *
+	 * In strict mode Supported link mode are the PHY type with media,
+	 * and Advertising link modes are the media PHY type or the speed
+	 * requested by user.
+	 */
+	if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+		phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo);
+		phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
+
+		ice_mask_min_supported_speeds(&pf->hw, phy_types_high,
+					      &phy_types_low);
+		/* determine advertised modes based on link override only
+		 * if it's supported and if the FW doesn't abstract the
+		 * driver from having to account for link overrides
+		 */
+		if (ice_fw_supports_link_override(&pf->hw) &&
+		    !ice_fw_supports_report_dflt_cfg(&pf->hw)) {
+			struct ice_link_default_override_tlv *ldo;
+
+			ldo = &pf->link_dflt_override;
+			/* If override enabled and PHY mask set, then
+			 * Advertising link mode is the intersection of the PHY
+			 * types without media and the override PHY mask.
+			 */
+			if (ldo->options & ICE_LINK_OVERRIDE_EN &&
+			    (ldo->phy_type_low || ldo->phy_type_high)) {
+				advert_phy_type_lo =
+					le64_to_cpu(pf->nvm_phy_type_lo) &
+					ldo->phy_type_low;
+				advert_phy_type_hi =
+					le64_to_cpu(pf->nvm_phy_type_hi) &
+					ldo->phy_type_high;
+			}
+		}
+	} else {
+		/* strict mode */
+		phy_types_low = vsi->port_info->phy.phy_type_low;
+		phy_types_high = vsi->port_info->phy.phy_type_high;
+	}
+
+	/* If Advertising link mode PHY type is not using override PHY type,
+	 * then use PHY type with media.
+	 */
+	if (!advert_phy_type_lo && !advert_phy_type_hi) {
+		advert_phy_type_lo = vsi->port_info->phy.phy_type_low;
+		advert_phy_type_hi = vsi->port_info->phy.phy_type_high;
+	}
+
+	linkmode_zero(ks->link_modes.supported);
+	linkmode_zero(ks->link_modes.advertising);
+
+	for (i = 0; i < ARRAY_SIZE(phy_type_low_lkup); i++) {
+		if (phy_types_low & BIT_ULL(i))
+			ice_linkmode_set_bit(&phy_type_low_lkup[i], ks,
+					     req_speeds, advert_phy_type_lo,
+					     i);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(phy_type_high_lkup); i++) {
+		if (phy_types_high & BIT_ULL(i))
+			ice_linkmode_set_bit(&phy_type_high_lkup[i], ks,
+					     req_speeds, advert_phy_type_hi,
+					     i);
+	}
+}
+
+#define TEST_SET_BITS_TIMEOUT	50
+#define TEST_SET_BITS_SLEEP_MAX	2000
+#define TEST_SET_BITS_SLEEP_MIN	1000
+
+/**
+ * ice_get_settings_link_up - Get Link settings for when link is up
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ */
+static void
+ice_get_settings_link_up(struct ethtool_link_ksettings *ks,
+			 struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_port_info *pi = np->vsi->port_info;
+	struct ice_link_status *link_info;
+	struct ice_vsi *vsi = np->vsi;
+
+	link_info = &vsi->port_info->phy.link_info;
+
+	/* Get supported and advertised settings from PHY ability with media */
+	ice_phy_type_to_ethtool(netdev, ks);
+
+	switch (link_info->link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		ks->base.speed = SPEED_100000;
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		ks->base.speed = SPEED_50000;
+		break;
+	case ICE_AQ_LINK_SPEED_40GB:
+		ks->base.speed = SPEED_40000;
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		ks->base.speed = SPEED_25000;
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		ks->base.speed = SPEED_20000;
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		ks->base.speed = SPEED_10000;
+		break;
+	case ICE_AQ_LINK_SPEED_5GB:
+		ks->base.speed = SPEED_5000;
+		break;
+	case ICE_AQ_LINK_SPEED_2500MB:
+		ks->base.speed = SPEED_2500;
+		break;
+	case ICE_AQ_LINK_SPEED_1000MB:
+		ks->base.speed = SPEED_1000;
+		break;
+	case ICE_AQ_LINK_SPEED_100MB:
+		ks->base.speed = SPEED_100;
+		break;
+	default:
+		netdev_info(netdev, "WARNING: Unrecognized link_speed (0x%x).\n",
+			    link_info->link_speed);
+		break;
+	}
+	ks->base.duplex = DUPLEX_FULL;
+
+	if (link_info->an_info & ICE_AQ_AN_COMPLETED)
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Autoneg);
+
+	/* Set flow control negotiated Rx/Tx pause */
+	switch (pi->fc.current_mode) {
+	case ICE_FC_FULL:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+		break;
+	case ICE_FC_TX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_RX_PAUSE:
+		ethtool_link_ksettings_add_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	case ICE_FC_PFC:
+	default:
+		ethtool_link_ksettings_del_link_mode(ks, lp_advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, lp_advertising,
+						     Asym_Pause);
+		break;
+	}
+}
+
+/**
+ * ice_get_settings_link_down - Get the Link settings when link is down
+ * @ks: ethtool ksettings to fill in
+ * @netdev: network interface device structure
+ *
+ * Reports link settings that can be determined when link is down
+ */
+static void
+ice_get_settings_link_down(struct ethtool_link_ksettings *ks,
+			   struct net_device *netdev)
+{
+	/* link is down and the driver needs to fall back on
+	 * supported PHY types to figure out what info to display
+	 */
+	ice_phy_type_to_ethtool(netdev, ks);
+
+	/* With no link, speed and duplex are unknown */
+	ks->base.speed = SPEED_UNKNOWN;
+	ks->base.duplex = DUPLEX_UNKNOWN;
+}
+
+/**
+ * ice_get_link_ksettings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Reports speed/duplex settings based on media_type
+ */
+static int
+ice_get_link_ksettings(struct net_device *netdev,
+		       struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *caps;
+	struct ice_link_status *hw_link_info;
+	struct ice_vsi *vsi = np->vsi;
+	int err;
+
+	ethtool_link_ksettings_zero_link_mode(ks, supported);
+	ethtool_link_ksettings_zero_link_mode(ks, advertising);
+	ethtool_link_ksettings_zero_link_mode(ks, lp_advertising);
+	hw_link_info = &vsi->port_info->phy.link_info;
+
+	/* set speed and duplex */
+	if (hw_link_info->link_info & ICE_AQ_LINK_UP)
+		ice_get_settings_link_up(ks, netdev);
+	else
+		ice_get_settings_link_down(ks, netdev);
+
+	/* set autoneg settings */
+	ks->base.autoneg = (hw_link_info->an_info & ICE_AQ_AN_COMPLETED) ?
+		AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	/* set media type settings */
+	switch (vsi->port_info->phy.media_type) {
+	case ICE_MEDIA_FIBER:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ks->base.port = PORT_FIBRE;
+		break;
+	case ICE_MEDIA_BASET:
+		ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+		ks->base.port = PORT_TP;
+		break;
+	case ICE_MEDIA_BACKPLANE:
+		ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Backplane);
+		ks->base.port = PORT_NONE;
+		break;
+	case ICE_MEDIA_DA:
+		ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+		ks->base.port = PORT_DA;
+		break;
+	default:
+		ks->base.port = PORT_OTHER;
+		break;
+	}
+
+	/* flow control is symmetric and always supported */
+	ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps)
+		return -ENOMEM;
+
+	err = ice_aq_get_phy_caps(vsi->port_info, false,
+				  ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
+	if (err)
+		goto done;
+
+	/* Set the advertised flow control based on the PHY capability */
+	if ((caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) &&
+	    (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else if (caps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else if (caps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE) {
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     Asym_Pause);
+	} else {
+		ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(ks, advertising,
+						     Asym_Pause);
+	}
+
+	/* Set advertised FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising,
+						     FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+	err = ice_aq_get_phy_caps(vsi->port_info, false,
+				  ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL);
+	if (err)
+		goto done;
+
+	/* Set supported FEC modes based on PHY capability */
+	ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+
+	/* Set supported and advertised autoneg */
+	if (ice_is_phy_caps_an_enabled(caps)) {
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+	}
+
+done:
+	kfree(caps);
+	return err;
+}
+
+/**
+ * ice_ksettings_find_adv_link_speed - Find advertising link speed
+ * @ks: ethtool ksettings
+ */
+static u16
+ice_ksettings_find_adv_link_speed(const struct ethtool_link_ksettings *ks)
+{
+	u16 adv_link_speed = 0;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_100MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseX_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseKX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_1000MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  2500baseX_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_2500MB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  5000baseT_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_5GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseT_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseKR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseSR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  10000baseLR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_10GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseCR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseSR_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  25000baseKR_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_25GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseLR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  40000baseKR4_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_40GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseCR2_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseKR2_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  50000baseSR2_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_50GB;
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseCR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseSR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseLR4_ER4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseKR4_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseCR2_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseSR2_Full) ||
+	    ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  100000baseKR2_Full))
+		adv_link_speed |= ICE_AQ_LINK_SPEED_100GB;
+
+	return adv_link_speed;
+}
+
+/**
+ * ice_setup_autoneg
+ * @p: port info
+ * @ks: ethtool_link_ksettings
+ * @config: configuration that will be sent down to FW
+ * @autoneg_enabled: autonegotiation is enabled or not
+ * @autoneg_changed: will there a change in autonegotiation
+ * @netdev: network interface device structure
+ *
+ * Setup PHY autonegotiation feature
+ */
+static int
+ice_setup_autoneg(struct ice_port_info *p, struct ethtool_link_ksettings *ks,
+		  struct ice_aqc_set_phy_cfg_data *config,
+		  u8 autoneg_enabled, u8 *autoneg_changed,
+		  struct net_device *netdev)
+{
+	int err = 0;
+
+	*autoneg_changed = 0;
+
+	/* Check autoneg */
+	if (autoneg_enabled == AUTONEG_ENABLE) {
+		/* If autoneg was not already enabled */
+		if (!(p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)) {
+			/* If autoneg is not supported, return error */
+			if (!ethtool_link_ksettings_test_link_mode(ks,
+								   supported,
+								   Autoneg)) {
+				netdev_info(netdev, "Autoneg not supported on this phy.\n");
+				err = -EINVAL;
+			} else {
+				/* Autoneg is allowed to change */
+				config->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+				*autoneg_changed = 1;
+			}
+		}
+	} else {
+		/* If autoneg is currently enabled */
+		if (p->phy.link_info.an_info & ICE_AQ_AN_COMPLETED) {
+			/* If autoneg is supported 10GBASE_T is the only PHY
+			 * that can disable it, so otherwise return error
+			 */
+			if (ethtool_link_ksettings_test_link_mode(ks,
+								  supported,
+								  Autoneg)) {
+				netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
+				err = -EINVAL;
+			} else {
+				/* Autoneg is allowed to change */
+				config->caps &= ~ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+				*autoneg_changed = 1;
+			}
+		}
+	}
+
+	return err;
+}
+
+/**
+ * ice_set_phy_type_from_speed - set phy_types based on speeds
+ * and advertised modes
+ * @ks: ethtool link ksettings struct
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @adv_link_speed: targeted link speeds bitmap
+ */
+static void
+ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks,
+			    u64 *phy_type_low, u64 *phy_type_high,
+			    u16 adv_link_speed)
+{
+	/* Handle 1000M speed in a special way because ice_update_phy_type
+	 * enables all link modes, but having mixed copper and optical
+	 * standards is not supported.
+	 */
+	adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseT_Full))
+		*phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T |
+				 ICE_PHY_TYPE_LOW_1G_SGMII;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseKX_Full))
+		*phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX;
+
+	if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+						  1000baseX_Full))
+		*phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX |
+				 ICE_PHY_TYPE_LOW_1000BASE_LX;
+
+	ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed);
+}
+
+/**
+ * ice_set_link_ksettings - Set Speed and Duplex
+ * @netdev: network interface device structure
+ * @ks: ethtool ksettings
+ *
+ * Set speed/duplex per media_types advertised/forced
+ */
+static int
+ice_set_link_ksettings(struct net_device *netdev,
+		       const struct ethtool_link_ksettings *ks)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
+	struct ethtool_link_ksettings copy_ks = *ks;
+	struct ethtool_link_ksettings safe_ks = {};
+	struct ice_aqc_get_phy_caps_data *phy_caps;
+	struct ice_aqc_set_phy_cfg_data config;
+	u16 adv_link_speed, curr_link_speed;
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_port_info *pi;
+	u8 autoneg_changed = 0;
+	u64 phy_type_high = 0;
+	u64 phy_type_low = 0;
+	bool linkup;
+	int err;
+
+	pi = np->vsi->port_info;
+
+	if (!pi)
+		return -EIO;
+
+	if (pi->phy.media_type != ICE_MEDIA_BASET &&
+	    pi->phy.media_type != ICE_MEDIA_FIBER &&
+	    pi->phy.media_type != ICE_MEDIA_BACKPLANE &&
+	    pi->phy.media_type != ICE_MEDIA_DA &&
+	    pi->phy.link_info.link_info & ICE_AQ_LINK_UP)
+		return -EOPNOTSUPP;
+
+	phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL);
+	if (!phy_caps)
+		return -ENOMEM;
+
+	/* Get the PHY capabilities based on media */
+	if (ice_fw_supports_report_dflt_cfg(pi->hw))
+		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+					  phy_caps, NULL);
+	else
+		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					  phy_caps, NULL);
+	if (err)
+		goto done;
+
+	/* save autoneg out of ksettings */
+	autoneg = copy_ks.base.autoneg;
+
+	/* Get link modes supported by hardware.*/
+	ice_phy_type_to_ethtool(netdev, &safe_ks);
+
+	/* and check against modes requested by user.
+	 * Return an error if unsupported mode was set.
+	 */
+	if (!bitmap_subset(copy_ks.link_modes.advertising,
+			   safe_ks.link_modes.supported,
+			   __ETHTOOL_LINK_MODE_MASK_NBITS)) {
+		if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags))
+			netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+
+	/* get our own copy of the bits to check against */
+	memset(&safe_ks, 0, sizeof(safe_ks));
+	safe_ks.base.cmd = copy_ks.base.cmd;
+	safe_ks.base.link_mode_masks_nwords =
+		copy_ks.base.link_mode_masks_nwords;
+	ice_get_link_ksettings(netdev, &safe_ks);
+
+	/* set autoneg back to what it currently is */
+	copy_ks.base.autoneg = safe_ks.base.autoneg;
+	/* we don't compare the speed */
+	copy_ks.base.speed = safe_ks.base.speed;
+
+	/* If copy_ks.base and safe_ks.base are not the same now, then they are
+	 * trying to set something that we do not support.
+	 */
+	if (memcmp(&copy_ks.base, &safe_ks.base, sizeof(copy_ks.base))) {
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout) {
+			err = -EBUSY;
+			goto done;
+		}
+		usleep_range(TEST_SET_BITS_SLEEP_MIN, TEST_SET_BITS_SLEEP_MAX);
+	}
+
+	/* Copy the current user PHY configuration. The current user PHY
+	 * configuration is initialized during probe from PHY capabilities
+	 * software mode, and updated on set PHY configuration.
+	 */
+	config = pi->phy.curr_user_phy_cfg;
+
+	config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+	/* Check autoneg */
+	err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed,
+				netdev);
+
+	if (err)
+		goto done;
+
+	/* Call to get the current link speed */
+	pi->phy.get_link_info = true;
+	err = ice_get_link_status(pi, &linkup);
+	if (err)
+		goto done;
+
+	curr_link_speed = pi->phy.curr_user_speed_req;
+	adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
+
+	/* If speed didn't get set, set it to what it currently is.
+	 * This is needed because if advertise is 0 (as it is when autoneg
+	 * is disabled) then speed won't get set.
+	 */
+	if (!adv_link_speed)
+		adv_link_speed = curr_link_speed;
+
+	/* Convert the advertise link speeds to their corresponded PHY_TYPE */
+	ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high,
+				    adv_link_speed);
+
+	if (!autoneg_changed && adv_link_speed == curr_link_speed) {
+		netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
+		goto done;
+	}
+
+	/* save the requested speeds */
+	pi->phy.link_info.req_speeds = adv_link_speed;
+
+	/* set link and auto negotiation so changes take effect */
+	config.caps |= ICE_AQ_PHY_ENA_LINK;
+
+	/* check if there is a PHY type for the requested advertised speed */
+	if (!(phy_type_low || phy_type_high)) {
+		netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+
+	/* intersect requested advertised speed PHY types with media PHY types
+	 * for set PHY configuration
+	 */
+	config.phy_type_high = cpu_to_le64(phy_type_high) &
+			phy_caps->phy_type_high;
+	config.phy_type_low = cpu_to_le64(phy_type_low) &
+			phy_caps->phy_type_low;
+
+	if (!(config.phy_type_high || config.phy_type_low)) {
+		/* If there is no intersection and lenient mode is enabled, then
+		 * intersect the requested advertised speed with NVM media type
+		 * PHY types.
+		 */
+		if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
+			config.phy_type_high = cpu_to_le64(phy_type_high) &
+					       pf->nvm_phy_type_hi;
+			config.phy_type_low = cpu_to_le64(phy_type_low) &
+					      pf->nvm_phy_type_lo;
+		} else {
+			netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
+			err = -EOPNOTSUPP;
+			goto done;
+		}
+	}
+
+	/* If link is up put link down */
+	if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) {
+		/* Tell the OS link is going down, the link will go
+		 * back up when fw says it is ready asynchronously
+		 */
+		ice_print_link_msg(np->vsi, false);
+		netif_carrier_off(netdev);
+		netif_tx_stop_all_queues(netdev);
+	}
+
+	/* make the aq call */
+	err = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL);
+	if (err) {
+		netdev_info(netdev, "Set phy config failed,\n");
+		goto done;
+	}
+
+	/* Save speed request */
+	pi->phy.curr_user_speed_req = adv_link_speed;
+done:
+	kfree(phy_caps);
+	clear_bit(ICE_CFG_BUSY, pf->state);
+
+	return err;
+}
+
+/**
+ * ice_parse_hdrs - parses headers from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * header types for RSS configuration
+ */
+static u32 ice_parse_hdrs(struct ethtool_rxnfc *nfc)
+{
+	u32 hdrs = ICE_FLOW_SEG_HDR_NONE;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case UDP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case SCTP_V4_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4;
+		break;
+	case TCP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	case UDP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	case SCTP_V6_FLOW:
+		hdrs |= ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6;
+		break;
+	default:
+		break;
+	}
+	return hdrs;
+}
+
+#define ICE_FLOW_HASH_FLD_IPV4_SA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)
+#define ICE_FLOW_HASH_FLD_IPV6_SA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)
+#define ICE_FLOW_HASH_FLD_IPV4_DA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)
+#define ICE_FLOW_HASH_FLD_IPV6_DA	BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)
+#define ICE_FLOW_HASH_FLD_TCP_SRC_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_TCP_DST_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_SRC_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_UDP_DST_PORT	BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_SRC_PORT	\
+	BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)
+#define ICE_FLOW_HASH_FLD_SCTP_DST_PORT	\
+	BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)
+
+/**
+ * ice_parse_hash_flds - parses hash fields from RSS hash input
+ * @nfc: ethtool rxnfc command
+ *
+ * This function parses the rxnfc command and returns intended
+ * hash fields for RSS configuration
+ */
+static u64 ice_parse_hash_flds(struct ethtool_rxnfc *nfc)
+{
+	u64 hfld = ICE_HASH_INVALID;
+
+	if (nfc->data & RXH_IP_SRC || nfc->data & RXH_IP_DST) {
+		switch (nfc->flow_type) {
+		case TCP_V4_FLOW:
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+			if (nfc->data & RXH_IP_SRC)
+				hfld |= ICE_FLOW_HASH_FLD_IPV4_SA;
+			if (nfc->data & RXH_IP_DST)
+				hfld |= ICE_FLOW_HASH_FLD_IPV4_DA;
+			break;
+		case TCP_V6_FLOW:
+		case UDP_V6_FLOW:
+		case SCTP_V6_FLOW:
+			if (nfc->data & RXH_IP_SRC)
+				hfld |= ICE_FLOW_HASH_FLD_IPV6_SA;
+			if (nfc->data & RXH_IP_DST)
+				hfld |= ICE_FLOW_HASH_FLD_IPV6_DA;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (nfc->data & RXH_L4_B_0_1 || nfc->data & RXH_L4_B_2_3) {
+		switch (nfc->flow_type) {
+		case TCP_V4_FLOW:
+		case TCP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_TCP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_TCP_DST_PORT;
+			break;
+		case UDP_V4_FLOW:
+		case UDP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_UDP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_UDP_DST_PORT;
+			break;
+		case SCTP_V4_FLOW:
+		case SCTP_V6_FLOW:
+			if (nfc->data & RXH_L4_B_0_1)
+				hfld |= ICE_FLOW_HASH_FLD_SCTP_SRC_PORT;
+			if (nfc->data & RXH_L4_B_2_3)
+				hfld |= ICE_FLOW_HASH_FLD_SCTP_DST_PORT;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return hfld;
+}
+
+/**
+ * ice_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ */
+static int
+ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u64 hashed_flds;
+	int status;
+	u32 hdrs;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	hashed_flds = ice_parse_hash_flds(nfc);
+	if (hashed_flds == ICE_HASH_INVALID) {
+		dev_dbg(dev, "Invalid hash fields, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	hdrs = ice_parse_hdrs(nfc);
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+		dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs);
+	if (status) {
+		dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %d\n",
+			vsi->vsi_num, status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rss_hash_opt - Retrieve hash fields for a given flow-type
+ * @vsi: the VSI being configured
+ * @nfc: ethtool rxnfc command
+ */
+static void
+ice_get_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u64 hash_flds;
+	u32 hdrs;
+
+	dev = ice_pf_to_dev(pf);
+
+	nfc->data = 0;
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	hdrs = ice_parse_hdrs(nfc);
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE) {
+		dev_dbg(dev, "Header type is not valid, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	hash_flds = ice_get_rss_cfg(&pf->hw, vsi->idx, hdrs);
+	if (hash_flds == ICE_HASH_INVALID) {
+		dev_dbg(dev, "No hash fields found for the given header type, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_SA ||
+	    hash_flds & ICE_FLOW_HASH_FLD_IPV6_SA)
+		nfc->data |= (u64)RXH_IP_SRC;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_IPV4_DA ||
+	    hash_flds & ICE_FLOW_HASH_FLD_IPV6_DA)
+		nfc->data |= (u64)RXH_IP_DST;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_TCP_SRC_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_UDP_SRC_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_SCTP_SRC_PORT)
+		nfc->data |= (u64)RXH_L4_B_0_1;
+
+	if (hash_flds & ICE_FLOW_HASH_FLD_TCP_DST_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_UDP_DST_PORT ||
+	    hash_flds & ICE_FLOW_HASH_FLD_SCTP_DST_PORT)
+		nfc->data |= (u64)RXH_L4_B_2_3;
+}
+
+/**
+ * ice_set_rxnfc - command to set Rx flow rules.
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns 0 for success and negative values for errors
+ */
+static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		return ice_add_fdir_ethtool(vsi, cmd);
+	case ETHTOOL_SRXCLSRLDEL:
+		return ice_del_fdir_ethtool(vsi, cmd);
+	case ETHTOOL_SRXFH:
+		return ice_set_rss_hash_opt(vsi, cmd);
+	default:
+		break;
+	}
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_get_rxnfc - command to get Rx flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ * @rule_locs: buffer to rturn Rx flow classification rules
+ *
+ * Returns Success if the command is supported.
+ */
+static int
+ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+	      u32 __always_unused *rule_locs)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int ret = -EOPNOTSUPP;
+	struct ice_hw *hw;
+
+	hw = &vsi->back->hw;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = vsi->rss_size;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = hw->fdir_active_fltr;
+		/* report total rule count */
+		cmd->data = ice_get_fdir_cnt_all(hw);
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = ice_get_ethtool_fdir_entry(hw, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = ice_get_fdir_fltr_ids(hw, cmd, (u32 *)rule_locs);
+		break;
+	case ETHTOOL_GRXFH:
+		ice_get_rss_hash_opt(vsi, cmd);
+		ret = 0;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static void
+ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
+		  struct kernel_ethtool_ringparam *kernel_ring,
+		  struct netlink_ext_ack *extack)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	ring->rx_max_pending = ICE_MAX_NUM_DESC;
+	ring->tx_max_pending = ICE_MAX_NUM_DESC;
+	if (vsi->tx_rings && vsi->rx_rings) {
+		ring->rx_pending = vsi->rx_rings[0]->count;
+		ring->tx_pending = vsi->tx_rings[0]->count;
+	} else {
+		ring->rx_pending = 0;
+		ring->tx_pending = 0;
+	}
+
+	/* Rx mini and jumbo rings are not supported */
+	ring->rx_mini_max_pending = 0;
+	ring->rx_jumbo_max_pending = 0;
+	ring->rx_mini_pending = 0;
+	ring->rx_jumbo_pending = 0;
+}
+
+static int
+ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
+		  struct kernel_ethtool_ringparam *kernel_ring,
+		  struct netlink_ext_ack *extack)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_tx_ring *xdp_rings = NULL;
+	struct ice_tx_ring *tx_rings = NULL;
+	struct ice_rx_ring *rx_rings = NULL;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int i, timeout = 50, err = 0;
+	u16 new_rx_cnt, new_tx_cnt;
+
+	if (ring->tx_pending > ICE_MAX_NUM_DESC ||
+	    ring->tx_pending < ICE_MIN_NUM_DESC ||
+	    ring->rx_pending > ICE_MAX_NUM_DESC ||
+	    ring->rx_pending < ICE_MIN_NUM_DESC) {
+		netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n",
+			   ring->tx_pending, ring->rx_pending,
+			   ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC,
+			   ICE_REQ_DESC_MULTIPLE);
+		return -EINVAL;
+	}
+
+	/* Return if there is no rings (device is reloading) */
+	if (!vsi->tx_rings || !vsi->rx_rings)
+		return -EBUSY;
+
+	new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
+	if (new_tx_cnt != ring->tx_pending)
+		netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
+			    new_tx_cnt);
+	new_rx_cnt = ALIGN(ring->rx_pending, ICE_REQ_DESC_MULTIPLE);
+	if (new_rx_cnt != ring->rx_pending)
+		netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
+			    new_rx_cnt);
+
+	/* if nothing to do return success */
+	if (new_tx_cnt == vsi->tx_rings[0]->count &&
+	    new_rx_cnt == vsi->rx_rings[0]->count) {
+		netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
+		return 0;
+	}
+
+	/* If there is a AF_XDP UMEM attached to any of Rx rings,
+	 * disallow changing the number of descriptors -- regardless
+	 * if the netdev is running or not.
+	 */
+	if (ice_xsk_any_rx_ring_ena(vsi))
+		return -EBUSY;
+
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	/* set for the next time the netdev is started */
+	if (!netif_running(vsi->netdev)) {
+		ice_for_each_alloc_txq(vsi, i)
+			vsi->tx_rings[i]->count = new_tx_cnt;
+		ice_for_each_alloc_rxq(vsi, i)
+			vsi->rx_rings[i]->count = new_rx_cnt;
+		if (ice_is_xdp_ena_vsi(vsi))
+			ice_for_each_xdp_txq(vsi, i)
+				vsi->xdp_rings[i]->count = new_tx_cnt;
+		vsi->num_tx_desc = (u16)new_tx_cnt;
+		vsi->num_rx_desc = (u16)new_rx_cnt;
+		netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
+		goto done;
+	}
+
+	if (new_tx_cnt == vsi->tx_rings[0]->count)
+		goto process_rx;
+
+	/* alloc updated Tx resources */
+	netdev_info(netdev, "Changing Tx descriptor count from %d to %d\n",
+		    vsi->tx_rings[0]->count, new_tx_cnt);
+
+	tx_rings = kcalloc(vsi->num_txq, sizeof(*tx_rings), GFP_KERNEL);
+	if (!tx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	ice_for_each_txq(vsi, i) {
+		/* clone ring and setup updated count */
+		tx_rings[i] = *vsi->tx_rings[i];
+		tx_rings[i].count = new_tx_cnt;
+		tx_rings[i].desc = NULL;
+		tx_rings[i].tx_buf = NULL;
+		tx_rings[i].tx_tstamps = &pf->ptp.port.tx;
+		err = ice_setup_tx_ring(&tx_rings[i]);
+		if (err) {
+			while (i--)
+				ice_clean_tx_ring(&tx_rings[i]);
+			kfree(tx_rings);
+			goto done;
+		}
+	}
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		goto process_rx;
+
+	/* alloc updated XDP resources */
+	netdev_info(netdev, "Changing XDP descriptor count from %d to %d\n",
+		    vsi->xdp_rings[0]->count, new_tx_cnt);
+
+	xdp_rings = kcalloc(vsi->num_xdp_txq, sizeof(*xdp_rings), GFP_KERNEL);
+	if (!xdp_rings) {
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	ice_for_each_xdp_txq(vsi, i) {
+		/* clone ring and setup updated count */
+		xdp_rings[i] = *vsi->xdp_rings[i];
+		xdp_rings[i].count = new_tx_cnt;
+		xdp_rings[i].desc = NULL;
+		xdp_rings[i].tx_buf = NULL;
+		err = ice_setup_tx_ring(&xdp_rings[i]);
+		if (err) {
+			while (i--)
+				ice_clean_tx_ring(&xdp_rings[i]);
+			kfree(xdp_rings);
+			goto free_tx;
+		}
+		ice_set_ring_xdp(&xdp_rings[i]);
+	}
+
+process_rx:
+	if (new_rx_cnt == vsi->rx_rings[0]->count)
+		goto process_link;
+
+	/* alloc updated Rx resources */
+	netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n",
+		    vsi->rx_rings[0]->count, new_rx_cnt);
+
+	rx_rings = kcalloc(vsi->num_rxq, sizeof(*rx_rings), GFP_KERNEL);
+	if (!rx_rings) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	ice_for_each_rxq(vsi, i) {
+		/* clone ring and setup updated count */
+		rx_rings[i] = *vsi->rx_rings[i];
+		rx_rings[i].count = new_rx_cnt;
+		rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
+		rx_rings[i].desc = NULL;
+		rx_rings[i].rx_buf = NULL;
+		/* this is to allow wr32 to have something to write to
+		 * during early allocation of Rx buffers
+		 */
+		rx_rings[i].tail = vsi->back->hw.hw_addr + PRTGEN_STATUS;
+
+		err = ice_setup_rx_ring(&rx_rings[i]);
+		if (err)
+			goto rx_unwind;
+
+		/* allocate Rx buffers */
+		err = ice_alloc_rx_bufs(&rx_rings[i],
+					ICE_RX_DESC_UNUSED(&rx_rings[i]));
+rx_unwind:
+		if (err) {
+			while (i) {
+				i--;
+				ice_free_rx_ring(&rx_rings[i]);
+			}
+			kfree(rx_rings);
+			err = -ENOMEM;
+			goto free_tx;
+		}
+	}
+
+process_link:
+	/* Bring interface down, copy in the new ring info, then restore the
+	 * interface. if VSI is up, bring it down and then back up
+	 */
+	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+		ice_down(vsi);
+
+		if (tx_rings) {
+			ice_for_each_txq(vsi, i) {
+				ice_free_tx_ring(vsi->tx_rings[i]);
+				*vsi->tx_rings[i] = tx_rings[i];
+			}
+			kfree(tx_rings);
+		}
+
+		if (rx_rings) {
+			ice_for_each_rxq(vsi, i) {
+				ice_free_rx_ring(vsi->rx_rings[i]);
+				/* copy the real tail offset */
+				rx_rings[i].tail = vsi->rx_rings[i]->tail;
+				/* this is to fake out the allocation routine
+				 * into thinking it has to realloc everything
+				 * but the recycling logic will let us re-use
+				 * the buffers allocated above
+				 */
+				rx_rings[i].next_to_use = 0;
+				rx_rings[i].next_to_clean = 0;
+				rx_rings[i].next_to_alloc = 0;
+				*vsi->rx_rings[i] = rx_rings[i];
+			}
+			kfree(rx_rings);
+		}
+
+		if (xdp_rings) {
+			ice_for_each_xdp_txq(vsi, i) {
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+				*vsi->xdp_rings[i] = xdp_rings[i];
+			}
+			kfree(xdp_rings);
+		}
+
+		vsi->num_tx_desc = new_tx_cnt;
+		vsi->num_rx_desc = new_rx_cnt;
+		ice_up(vsi);
+	}
+	goto done;
+
+free_tx:
+	/* error cleanup if the Rx allocations failed after getting Tx */
+	if (tx_rings) {
+		ice_for_each_txq(vsi, i)
+			ice_free_tx_ring(&tx_rings[i]);
+		kfree(tx_rings);
+	}
+
+done:
+	clear_bit(ICE_CFG_BUSY, pf->state);
+	return err;
+}
+
+/**
+ * ice_get_pauseparam - Get Flow Control status
+ * @netdev: network interface device structure
+ * @pause: ethernet pause (flow control) parameters
+ *
+ * Get requested flow control status from PHY capability.
+ * If autoneg is true, then ethtool will send the ETHTOOL_GSET ioctl which
+ * is handled by ice_get_link_ksettings. ice_get_link_ksettings will report
+ * the negotiated Rx/Tx pause via lp_advertising.
+ */
+static void
+ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_port_info *pi = np->vsi->port_info;
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	int status;
+
+	/* Initialize pause params */
+	pause->rx_pause = 0;
+	pause->tx_pause = 0;
+
+	dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return;
+
+	/* Get current PHY config */
+	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				     NULL);
+	if (status)
+		goto out;
+
+	pause->autoneg = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+							     AUTONEG_DISABLE;
+
+	if (dcbx_cfg->pfc.pfcena)
+		/* PFC enabled so report LFC as off */
+		goto out;
+
+	if (pcaps->caps & ICE_AQC_PHY_EN_TX_LINK_PAUSE)
+		pause->tx_pause = 1;
+	if (pcaps->caps & ICE_AQC_PHY_EN_RX_LINK_PAUSE)
+		pause->rx_pause = 1;
+
+out:
+	kfree(pcaps);
+}
+
+/**
+ * ice_set_pauseparam - Set Flow Control parameter
+ * @netdev: network interface device structure
+ * @pause: return Tx/Rx flow control status
+ */
+static int
+ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_link_status *hw_link_info;
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_dcbx_cfg *dcbx_cfg;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_port_info *pi;
+	u8 aq_failures;
+	bool link_up;
+	u32 is_an;
+	int err;
+
+	pi = vsi->port_info;
+	hw_link_info = &pi->phy.link_info;
+	dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
+	link_up = hw_link_info->link_info & ICE_AQ_LINK_UP;
+
+	/* Changing the port's flow control is not supported if this isn't the
+	 * PF VSI
+	 */
+	if (vsi->type != ICE_VSI_PF) {
+		netdev_info(netdev, "Changing flow control parameters only supported for PF VSI\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Get pause param reports configured and negotiated flow control pause
+	 * when ETHTOOL_GLINKSETTINGS is defined. Since ETHTOOL_GLINKSETTINGS is
+	 * defined get pause param pause->autoneg reports SW configured setting,
+	 * so compare pause->autoneg with SW configured to prevent the user from
+	 * using set pause param to chance autoneg.
+	 */
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	/* Get current PHY config */
+	err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				  NULL);
+	if (err) {
+		kfree(pcaps);
+		return err;
+	}
+
+	is_an = ice_is_phy_caps_an_enabled(pcaps) ? AUTONEG_ENABLE :
+						    AUTONEG_DISABLE;
+
+	kfree(pcaps);
+
+	if (pause->autoneg != is_an) {
+		netdev_info(netdev, "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If we have link and don't have autoneg */
+	if (!test_bit(ICE_DOWN, pf->state) &&
+	    !(hw_link_info->an_info & ICE_AQ_AN_COMPLETED)) {
+		/* Send message that it might not necessarily work*/
+		netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n");
+	}
+
+	if (dcbx_cfg->pfc.pfcena) {
+		netdev_info(netdev, "Priority flow control enabled. Cannot set link flow control.\n");
+		return -EOPNOTSUPP;
+	}
+	if (pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_FULL;
+	else if (pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_RX_PAUSE;
+	else if (!pause->rx_pause && pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_TX_PAUSE;
+	else if (!pause->rx_pause && !pause->tx_pause)
+		pi->fc.req_mode = ICE_FC_NONE;
+	else
+		return -EINVAL;
+
+	/* Set the FC mode and only restart AN if link is up */
+	err = ice_set_fc(pi, &aq_failures, link_up);
+
+	if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) {
+		netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %s\n",
+			    err, ice_aq_str(hw->adminq.sq_last_status));
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) {
+		netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %s\n",
+			    err, ice_aq_str(hw->adminq.sq_last_status));
+		err = -EAGAIN;
+	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) {
+		netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %s\n",
+			    err, ice_aq_str(hw->adminq.sq_last_status));
+		err = -EAGAIN;
+	}
+
+	return err;
+}
+
+/**
+ * ice_get_rxfh_key_size - get the RSS hash key size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_key_size(struct net_device __always_unused *netdev)
+{
+	return ICE_VSIQF_HKEY_ARRAY_SIZE;
+}
+
+/**
+ * ice_get_rxfh_indir_size - get the Rx flow hash indirection table size
+ * @netdev: network interface device structure
+ *
+ * Returns the table size.
+ */
+static u32 ice_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->vsi->rss_table_size;
+}
+
+static int
+ice_get_rxfh_context(struct net_device *netdev, u32 *indir,
+		     u8 *key, u8 *hfunc, u32 rss_context)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u16 qcount, offset;
+	int err, num_tc, i;
+	u8 *lut;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		netdev_warn(netdev, "RSS is not supported on this VSI!\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (rss_context && !ice_is_adq_active(pf)) {
+		netdev_err(netdev, "RSS context cannot be non-zero when ADQ is not configured.\n");
+		return -EINVAL;
+	}
+
+	qcount = vsi->mqprio_qopt.qopt.count[rss_context];
+	offset = vsi->mqprio_qopt.qopt.offset[rss_context];
+
+	if (rss_context && ice_is_adq_active(pf)) {
+		num_tc = vsi->mqprio_qopt.qopt.num_tc;
+		if (rss_context >= num_tc) {
+			netdev_err(netdev, "RSS context:%d  > num_tc:%d\n",
+				   rss_context, num_tc);
+			return -EINVAL;
+		}
+		/* Use channel VSI of given TC */
+		vsi = vsi->tc_map_vsi[rss_context];
+	}
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (!indir)
+		return 0;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	err = ice_get_rss_key(vsi, key);
+	if (err)
+		goto out;
+
+	err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size);
+	if (err)
+		goto out;
+
+	if (ice_is_adq_active(pf)) {
+		for (i = 0; i < vsi->rss_table_size; i++)
+			indir[i] = offset + lut[i] % qcount;
+		goto out;
+	}
+
+	for (i = 0; i < vsi->rss_table_size; i++)
+		indir[i] = lut[i];
+
+out:
+	kfree(lut);
+	return err;
+}
+
+/**
+ * ice_get_rxfh - get the Rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware.
+ */
+static int
+ice_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc)
+{
+	return ice_get_rxfh_context(netdev, indir, key, hfunc, 0);
+}
+
+/**
+ * ice_set_rxfh - set the Rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Returns -EINVAL if the table specifies an invalid queue ID, otherwise
+ * returns 0 after programming the table.
+ */
+static int
+ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key,
+	     const u8 hfunc)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		/* RSS not supported return error here */
+		netdev_warn(netdev, "RSS is not configured on this VSI!\n");
+		return -EIO;
+	}
+
+	if (ice_is_adq_active(pf)) {
+		netdev_err(netdev, "Cannot change RSS params with ADQ configured.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (key) {
+		if (!vsi->rss_hkey_user) {
+			vsi->rss_hkey_user =
+				devm_kzalloc(dev, ICE_VSIQF_HKEY_ARRAY_SIZE,
+					     GFP_KERNEL);
+			if (!vsi->rss_hkey_user)
+				return -ENOMEM;
+		}
+		memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
+
+		err = ice_set_rss_key(vsi, vsi->rss_hkey_user);
+		if (err)
+			return err;
+	}
+
+	if (!vsi->rss_lut_user) {
+		vsi->rss_lut_user = devm_kzalloc(dev, vsi->rss_table_size,
+						 GFP_KERNEL);
+		if (!vsi->rss_lut_user)
+			return -ENOMEM;
+	}
+
+	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
+	if (indir) {
+		int i;
+
+		for (i = 0; i < vsi->rss_table_size; i++)
+			vsi->rss_lut_user[i] = (u8)(indir[i]);
+	} else {
+		ice_fill_rss_lut(vsi->rss_lut_user, vsi->rss_table_size,
+				 vsi->rss_size);
+	}
+
+	err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int
+ice_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(dev);
+
+	/* only report timestamping if PTP is enabled */
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return ethtool_op_get_ts_info(dev, info);
+
+	info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_RX_SOFTWARE |
+				SOF_TIMESTAMPING_SOFTWARE |
+				SOF_TIMESTAMPING_TX_HARDWARE |
+				SOF_TIMESTAMPING_RX_HARDWARE |
+				SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->phc_index = ice_get_ptp_clock_index(pf);
+
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
+
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
+/**
+ * ice_get_max_txq - return the maximum number of Tx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_txq(struct ice_pf *pf)
+{
+	return min3(pf->num_lan_msix, (u16)num_online_cpus(),
+		    (u16)pf->hw.func_caps.common_cap.num_txq);
+}
+
+/**
+ * ice_get_max_rxq - return the maximum number of Rx queues for in a PF
+ * @pf: PF structure
+ */
+static int ice_get_max_rxq(struct ice_pf *pf)
+{
+	return min3(pf->num_lan_msix, (u16)num_online_cpus(),
+		    (u16)pf->hw.func_caps.common_cap.num_rxq);
+}
+
+/**
+ * ice_get_combined_cnt - return the current number of combined channels
+ * @vsi: PF VSI pointer
+ *
+ * Go through all queue vectors and count ones that have both Rx and Tx ring
+ * attached
+ */
+static u32 ice_get_combined_cnt(struct ice_vsi *vsi)
+{
+	u32 combined = 0;
+	int q_idx;
+
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.rx_ring && q_vector->tx.tx_ring)
+			combined++;
+	}
+
+	return combined;
+}
+
+/**
+ * ice_get_channels - get the current and max supported channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static void
+ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	/* report maximum channels */
+	ch->max_rx = ice_get_max_rxq(pf);
+	ch->max_tx = ice_get_max_txq(pf);
+	ch->max_combined = min_t(int, ch->max_rx, ch->max_tx);
+
+	/* report current channels */
+	ch->combined_count = ice_get_combined_cnt(vsi);
+	ch->rx_count = vsi->num_rxq - ch->combined_count;
+	ch->tx_count = vsi->num_txq - ch->combined_count;
+
+	/* report other queues */
+	ch->other_count = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+	ch->max_other = ch->other_count;
+}
+
+/**
+ * ice_get_valid_rss_size - return valid number of RSS queues
+ * @hw: pointer to the HW structure
+ * @new_size: requested RSS queues
+ */
+static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size)
+{
+	struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
+
+	return min_t(int, new_size, BIT(caps->rss_table_entry_width));
+}
+
+/**
+ * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
+ * @vsi: VSI to reconfigure RSS LUT on
+ * @req_rss_size: requested range of queue numbers for hashing
+ *
+ * Set the VSI's RSS parameters, configure the RSS LUT based on these.
+ */
+static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	struct ice_hw *hw;
+	int err;
+	u8 *lut;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+
+	if (!req_rss_size)
+		return -EINVAL;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	/* set RSS LUT parameters */
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		vsi->rss_size = 1;
+	else
+		vsi->rss_size = ice_get_valid_rss_size(hw, req_rss_size);
+
+	/* create/set RSS LUT */
+	ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+	if (err)
+		dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err,
+			ice_aq_str(hw->adminq.sq_last_status));
+
+	kfree(lut);
+	return err;
+}
+
+/**
+ * ice_set_channels - set the number channels
+ * @dev: network interface device structure
+ * @ch: ethtool channel data structure
+ */
+static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int new_rx = 0, new_tx = 0;
+	bool locked = false;
+	u32 curr_combined;
+	int ret = 0;
+
+	/* do not support changing channels in Safe Mode */
+	if (ice_is_safe_mode(pf)) {
+		netdev_err(dev, "Changing channel in Safe Mode is not supported\n");
+		return -EOPNOTSUPP;
+	}
+	/* do not support changing other_count */
+	if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U))
+		return -EINVAL;
+
+	if (ice_is_adq_active(pf)) {
+		netdev_err(dev, "Cannot set channels with ADQ configured.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) {
+		netdev_err(dev, "Cannot set channels when Flow Director filters are active\n");
+		return -EOPNOTSUPP;
+	}
+
+	curr_combined = ice_get_combined_cnt(vsi);
+
+	/* these checks are for cases where user didn't specify a particular
+	 * value on cmd line but we get non-zero value anyway via
+	 * get_channels(); look at ethtool.c in ethtool repository (the user
+	 * space part), particularly, do_schannels() routine
+	 */
+	if (ch->rx_count == vsi->num_rxq - curr_combined)
+		ch->rx_count = 0;
+	if (ch->tx_count == vsi->num_txq - curr_combined)
+		ch->tx_count = 0;
+	if (ch->combined_count == curr_combined)
+		ch->combined_count = 0;
+
+	if (!(ch->combined_count || (ch->rx_count && ch->tx_count))) {
+		netdev_err(dev, "Please specify at least 1 Rx and 1 Tx channel\n");
+		return -EINVAL;
+	}
+
+	new_rx = ch->combined_count + ch->rx_count;
+	new_tx = ch->combined_count + ch->tx_count;
+
+	if (new_rx < vsi->tc_cfg.numtc) {
+		netdev_err(dev, "Cannot set less Rx channels, than Traffic Classes you have (%u)\n",
+			   vsi->tc_cfg.numtc);
+		return -EINVAL;
+	}
+	if (new_tx < vsi->tc_cfg.numtc) {
+		netdev_err(dev, "Cannot set less Tx channels, than Traffic Classes you have (%u)\n",
+			   vsi->tc_cfg.numtc);
+		return -EINVAL;
+	}
+	if (new_rx > ice_get_max_rxq(pf)) {
+		netdev_err(dev, "Maximum allowed Rx channels is %d\n",
+			   ice_get_max_rxq(pf));
+		return -EINVAL;
+	}
+	if (new_tx > ice_get_max_txq(pf)) {
+		netdev_err(dev, "Maximum allowed Tx channels is %d\n",
+			   ice_get_max_txq(pf));
+		return -EINVAL;
+	}
+
+	if (pf->adev) {
+		mutex_lock(&pf->adev_mutex);
+		device_lock(&pf->adev->dev);
+		locked = true;
+		if (pf->adev->dev.driver) {
+			netdev_err(dev, "Cannot change channels when RDMA is active\n");
+			ret = -EBUSY;
+			goto adev_unlock;
+		}
+	}
+
+	ice_vsi_recfg_qs(vsi, new_rx, new_tx, locked);
+
+	if (!netif_is_rxfh_configured(dev)) {
+		ret = ice_vsi_set_dflt_rss_lut(vsi, new_rx);
+		goto adev_unlock;
+	}
+
+	/* Update rss_size due to change in Rx queues */
+	vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
+
+adev_unlock:
+	if (locked) {
+		device_unlock(&pf->adev->dev);
+		mutex_unlock(&pf->adev_mutex);
+	}
+	return ret;
+}
+
+/**
+ * ice_get_wol - get current Wake on LAN configuration
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to retrieve WoL settings
+ */
+static void ice_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	if (np->vsi->type != ICE_VSI_PF)
+		netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n");
+
+	/* Get WoL settings based on the HW capability */
+	if (ice_is_wol_supported(&pf->hw)) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0;
+	} else {
+		wol->supported = 0;
+		wol->wolopts = 0;
+	}
+}
+
+/**
+ * ice_set_wol - set Wake on LAN on supported device
+ * @netdev: network interface device structure
+ * @wol: Ethtool structure to set WoL
+ */
+static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw))
+		return -EOPNOTSUPP;
+
+	/* only magic packet is supported */
+	if (wol->wolopts && wol->wolopts != WAKE_MAGIC)
+		return -EOPNOTSUPP;
+
+	/* Set WoL only if there is a new value */
+	if (pf->wol_ena != !!wol->wolopts) {
+		pf->wol_ena = !!wol->wolopts;
+		device_set_wakeup_enable(ice_pf_to_dev(pf), pf->wol_ena);
+		netdev_dbg(netdev, "WoL magic packet %sabled\n",
+			   pf->wol_ena ? "en" : "dis");
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_rc_coalesce - get ITR values for specific ring container
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @rc: ring container that the ITR values will come from
+ *
+ * Query the device for ice_ring_container specific ITR values. This is
+ * done per ice_ring_container because each q_vector can have 1 or more rings
+ * and all of said ring(s) will have the same ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_get_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc)
+{
+	if (!rc->rx_ring)
+		return -EINVAL;
+
+	switch (rc->type) {
+	case ICE_RX_CONTAINER:
+		ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc);
+		ec->rx_coalesce_usecs = rc->itr_setting;
+		ec->rx_coalesce_usecs_high = rc->rx_ring->q_vector->intrl;
+		break;
+	case ICE_TX_CONTAINER:
+		ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc);
+		ec->tx_coalesce_usecs = rc->itr_setting;
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(rc->rx_ring->vsi->back), "Invalid c_type %d\n", rc->type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_q_coalesce - get a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue for getting ITR/INTRL (coalesce) settings
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Getting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+	if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+		if (ice_get_rc_coalesce(ec,
+					&vsi->rx_rings[q_num]->q_vector->rx))
+			return -EINVAL;
+		if (ice_get_rc_coalesce(ec,
+					&vsi->tx_rings[q_num]->q_vector->tx))
+			return -EINVAL;
+	} else if (q_num < vsi->num_rxq) {
+		if (ice_get_rc_coalesce(ec,
+					&vsi->rx_rings[q_num]->q_vector->rx))
+			return -EINVAL;
+	} else if (q_num < vsi->num_txq) {
+		if (ice_get_rc_coalesce(ec,
+					&vsi->tx_rings[q_num]->q_vector->tx))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * __ice_get_coalesce - get ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we return coalesce settings
+ * based on queue number 0, else use the actual q_num passed in.
+ */
+static int
+__ice_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+		   int q_num)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (q_num < 0)
+		q_num = 0;
+
+	if (ice_get_q_coalesce(vsi, ec, q_num))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ice_get_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec,
+			    struct kernel_ethtool_coalesce *kernel_coal,
+			    struct netlink_ext_ack *extack)
+{
+	return __ice_get_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num,
+		       struct ethtool_coalesce *ec)
+{
+	return __ice_get_coalesce(netdev, ec, q_num);
+}
+
+/**
+ * ice_set_rc_coalesce - set ITR values for specific ring container
+ * @ec: ethtool structure from user to update ITR settings
+ * @rc: ring container that the ITR values will come from
+ * @vsi: VSI associated to the ring container
+ *
+ * Set specific ITR values. This is done per ice_ring_container because each
+ * q_vector can have 1 or more rings and all of said ring(s) will have the same
+ * ITR values.
+ *
+ * Returns 0 on success, negative otherwise.
+ */
+static int
+ice_set_rc_coalesce(struct ethtool_coalesce *ec,
+		    struct ice_ring_container *rc, struct ice_vsi *vsi)
+{
+	const char *c_type_str = (rc->type == ICE_RX_CONTAINER) ? "rx" : "tx";
+	u32 use_adaptive_coalesce, coalesce_usecs;
+	struct ice_pf *pf = vsi->back;
+	u16 itr_setting;
+
+	if (!rc->rx_ring)
+		return -EINVAL;
+
+	switch (rc->type) {
+	case ICE_RX_CONTAINER:
+	{
+		struct ice_q_vector *q_vector = rc->rx_ring->q_vector;
+
+		if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL ||
+		    (ec->rx_coalesce_usecs_high &&
+		     ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) {
+			netdev_info(vsi->netdev, "Invalid value, %s-usecs-high valid values are 0 (disabled), %d-%d\n",
+				    c_type_str, pf->hw.intrl_gran,
+				    ICE_MAX_INTRL);
+			return -EINVAL;
+		}
+		if (ec->rx_coalesce_usecs_high != q_vector->intrl &&
+		    (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) {
+			netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n",
+				    c_type_str);
+			return -EINVAL;
+		}
+		if (ec->rx_coalesce_usecs_high != q_vector->intrl)
+			q_vector->intrl = ec->rx_coalesce_usecs_high;
+
+		use_adaptive_coalesce = ec->use_adaptive_rx_coalesce;
+		coalesce_usecs = ec->rx_coalesce_usecs;
+
+		break;
+	}
+	case ICE_TX_CONTAINER:
+		use_adaptive_coalesce = ec->use_adaptive_tx_coalesce;
+		coalesce_usecs = ec->tx_coalesce_usecs;
+
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n",
+			rc->type);
+		return -EINVAL;
+	}
+
+	itr_setting = rc->itr_setting;
+	if (coalesce_usecs != itr_setting && use_adaptive_coalesce) {
+		netdev_info(vsi->netdev, "%s interrupt throttling cannot be changed if adaptive-%s is enabled\n",
+			    c_type_str, c_type_str);
+		return -EINVAL;
+	}
+
+	if (coalesce_usecs > ICE_ITR_MAX) {
+		netdev_info(vsi->netdev, "Invalid value, %s-usecs range is 0-%d\n",
+			    c_type_str, ICE_ITR_MAX);
+		return -EINVAL;
+	}
+
+	if (use_adaptive_coalesce) {
+		rc->itr_mode = ITR_DYNAMIC;
+	} else {
+		rc->itr_mode = ITR_STATIC;
+		/* store user facing value how it was set */
+		rc->itr_setting = coalesce_usecs;
+		/* write the change to the register */
+		ice_write_itr(rc, coalesce_usecs);
+		/* force writes to take effect immediately, the flush shouldn't
+		 * be done in the functions above because the intent is for
+		 * them to do lazy writes.
+		 */
+		ice_flush(&pf->hw);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_set_q_coalesce - set a queue's ITR/INTRL (coalesce) settings
+ * @vsi: VSI associated to the queue that need updating
+ * @ec: coalesce settings to program the device with
+ * @q_num: update ITR/INTRL (coalesce) settings for this queue number/index
+ *
+ * Return 0 on success, and negative under the following conditions:
+ * 1. Setting Tx or Rx ITR/INTRL (coalesce) settings failed.
+ * 2. The q_num passed in is not a valid number/index for Tx and Rx rings.
+ */
+static int
+ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
+{
+	if (q_num < vsi->num_rxq && q_num < vsi->num_txq) {
+		if (ice_set_rc_coalesce(ec,
+					&vsi->rx_rings[q_num]->q_vector->rx,
+					vsi))
+			return -EINVAL;
+
+		if (ice_set_rc_coalesce(ec,
+					&vsi->tx_rings[q_num]->q_vector->tx,
+					vsi))
+			return -EINVAL;
+	} else if (q_num < vsi->num_rxq) {
+		if (ice_set_rc_coalesce(ec,
+					&vsi->rx_rings[q_num]->q_vector->rx,
+					vsi))
+			return -EINVAL;
+	} else if (q_num < vsi->num_txq) {
+		if (ice_set_rc_coalesce(ec,
+					&vsi->tx_rings[q_num]->q_vector->tx,
+					vsi))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_print_if_odd_usecs - print message if user tries to set odd [tx|rx]-usecs
+ * @netdev: netdev used for print
+ * @itr_setting: previous user setting
+ * @use_adaptive_coalesce: if adaptive coalesce is enabled or being enabled
+ * @coalesce_usecs: requested value of [tx|rx]-usecs
+ * @c_type_str: either "rx" or "tx" to match user set field of [tx|rx]-usecs
+ */
+static void
+ice_print_if_odd_usecs(struct net_device *netdev, u16 itr_setting,
+		       u32 use_adaptive_coalesce, u32 coalesce_usecs,
+		       const char *c_type_str)
+{
+	if (use_adaptive_coalesce)
+		return;
+
+	if (itr_setting != coalesce_usecs && (coalesce_usecs % 2))
+		netdev_info(netdev, "User set %s-usecs to %d, device only supports even values. Rounding down and attempting to set %s-usecs to %d\n",
+			    c_type_str, coalesce_usecs, c_type_str,
+			    ITR_REG_ALIGN(coalesce_usecs));
+}
+
+/**
+ * __ice_set_coalesce - set ITR/INTRL values for the device
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ * @q_num: queue number to get the coalesce settings for
+ *
+ * If the caller passes in a negative q_num then we set the coalesce settings
+ * for all Tx/Rx queues, else use the actual q_num passed in.
+ */
+static int
+__ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
+		   int q_num)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (q_num < 0) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[0];
+		int v_idx;
+
+		if (q_vector) {
+			ice_print_if_odd_usecs(netdev, q_vector->rx.itr_setting,
+					       ec->use_adaptive_rx_coalesce,
+					       ec->rx_coalesce_usecs, "rx");
+
+			ice_print_if_odd_usecs(netdev, q_vector->tx.itr_setting,
+					       ec->use_adaptive_tx_coalesce,
+					       ec->tx_coalesce_usecs, "tx");
+		}
+
+		ice_for_each_q_vector(vsi, v_idx) {
+			/* In some cases if DCB is configured the num_[rx|tx]q
+			 * can be less than vsi->num_q_vectors. This check
+			 * accounts for that so we don't report a false failure
+			 */
+			if (v_idx >= vsi->num_rxq && v_idx >= vsi->num_txq)
+				goto set_complete;
+
+			if (ice_set_q_coalesce(vsi, ec, v_idx))
+				return -EINVAL;
+
+			ice_set_q_vector_intrl(vsi->q_vectors[v_idx]);
+		}
+		goto set_complete;
+	}
+
+	if (ice_set_q_coalesce(vsi, ec, q_num))
+		return -EINVAL;
+
+	ice_set_q_vector_intrl(vsi->q_vectors[q_num]);
+
+set_complete:
+	return 0;
+}
+
+static int ice_set_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec,
+			    struct kernel_ethtool_coalesce *kernel_coal,
+			    struct netlink_ext_ack *extack)
+{
+	return __ice_set_coalesce(netdev, ec, -1);
+}
+
+static int
+ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num,
+		       struct ethtool_coalesce *ec)
+{
+	return __ice_set_coalesce(netdev, ec, q_num);
+}
+
+static void
+ice_repr_get_drvinfo(struct net_device *netdev,
+		     struct ethtool_drvinfo *drvinfo)
+{
+	struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+	if (ice_check_vf_ready_for_cfg(repr->vf))
+		return;
+
+	__ice_get_drvinfo(netdev, drvinfo, repr->src_vsi);
+}
+
+static void
+ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+	/* for port representors only ETH_SS_STATS is supported */
+	if (ice_check_vf_ready_for_cfg(repr->vf) ||
+	    stringset != ETH_SS_STATS)
+		return;
+
+	__ice_get_strings(netdev, stringset, data, repr->src_vsi);
+}
+
+static void
+ice_repr_get_ethtool_stats(struct net_device *netdev,
+			   struct ethtool_stats __always_unused *stats,
+			   u64 *data)
+{
+	struct ice_repr *repr = ice_netdev_to_repr(netdev);
+
+	if (ice_check_vf_ready_for_cfg(repr->vf))
+		return;
+
+	__ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi);
+}
+
+static int ice_repr_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ICE_VSI_STATS_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+#define ICE_I2C_EEPROM_DEV_ADDR		0xA0
+#define ICE_I2C_EEPROM_DEV_ADDR2	0xA2
+#define ICE_MODULE_TYPE_SFP		0x03
+#define ICE_MODULE_TYPE_QSFP_PLUS	0x0D
+#define ICE_MODULE_TYPE_QSFP28		0x11
+#define ICE_MODULE_SFF_ADDR_MODE	0x04
+#define ICE_MODULE_SFF_DIAG_CAPAB	0x40
+#define ICE_MODULE_REVISION_ADDR	0x01
+#define ICE_MODULE_SFF_8472_COMP	0x5E
+#define ICE_MODULE_SFF_8472_SWAP	0x5C
+#define ICE_MODULE_QSFP_MAX_LEN		640
+
+/**
+ * ice_get_module_info - get SFF module type and revision information
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ */
+static int
+ice_get_module_info(struct net_device *netdev,
+		    struct ethtool_modinfo *modinfo)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u8 sff8472_comp = 0;
+	u8 sff8472_swap = 0;
+	u8 sff8636_rev = 0;
+	u8 value = 0;
+	int status;
+
+	status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR, 0x00, 0x00,
+				   0, &value, 1, 0, NULL);
+	if (status)
+		return status;
+
+	switch (value) {
+	case ICE_MODULE_TYPE_SFP:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_COMP, 0x00, 0,
+					   &sff8472_comp, 1, 0, NULL);
+		if (status)
+			return status;
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_SFF_8472_SWAP, 0x00, 0,
+					   &sff8472_swap, 1, 0, NULL);
+		if (status)
+			return status;
+
+		if (sff8472_swap & ICE_MODULE_SFF_ADDR_MODE) {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		} else if (sff8472_comp &&
+			   (sff8472_swap & ICE_MODULE_SFF_DIAG_CAPAB)) {
+			modinfo->type = ETH_MODULE_SFF_8472;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8079;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+		}
+		break;
+	case ICE_MODULE_TYPE_QSFP_PLUS:
+	case ICE_MODULE_TYPE_QSFP28:
+		status = ice_aq_sff_eeprom(hw, 0, ICE_I2C_EEPROM_DEV_ADDR,
+					   ICE_MODULE_REVISION_ADDR, 0x00, 0,
+					   &sff8636_rev, 1, 0, NULL);
+		if (status)
+			return status;
+		/* Check revision compliance */
+		if (sff8636_rev > 0x02) {
+			/* Module is SFF-8636 compliant */
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = ICE_MODULE_QSFP_MAX_LEN;
+		}
+		break;
+	default:
+		netdev_warn(netdev, "SFF Module Type not recognized.\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_get_module_eeprom - fill buffer with SFF EEPROM contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ */
+static int
+ice_get_module_eeprom(struct net_device *netdev,
+		      struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+#define SFF_READ_BLOCK_SIZE 8
+	u8 value[SFF_READ_BLOCK_SIZE] = { 0 };
+	u8 addr = ICE_I2C_EEPROM_DEV_ADDR;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	bool is_sfp = false;
+	unsigned int i, j;
+	u16 offset = 0;
+	u8 page = 0;
+	int status;
+
+	if (!ee || !ee->len || !data)
+		return -EINVAL;
+
+	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, value, 1, 0,
+				   NULL);
+	if (status)
+		return status;
+
+	if (value[0] == ICE_MODULE_TYPE_SFP)
+		is_sfp = true;
+
+	memset(data, 0, ee->len);
+	for (i = 0; i < ee->len; i += SFF_READ_BLOCK_SIZE) {
+		offset = i + ee->offset;
+		page = 0;
+
+		/* Check if we need to access the other memory page */
+		if (is_sfp) {
+			if (offset >= ETH_MODULE_SFF_8079_LEN) {
+				offset -= ETH_MODULE_SFF_8079_LEN;
+				addr = ICE_I2C_EEPROM_DEV_ADDR2;
+			}
+		} else {
+			while (offset >= ETH_MODULE_SFF_8436_LEN) {
+				/* Compute memory page number and offset. */
+				offset -= ETH_MODULE_SFF_8436_LEN / 2;
+				page++;
+			}
+		}
+
+		/* Bit 2 of EEPROM address 0x02 declares upper
+		 * pages are disabled on QSFP modules.
+		 * SFP modules only ever use page 0.
+		 */
+		if (page == 0 || !(data[0x2] & 0x4)) {
+			u32 copy_len;
+
+			/* If i2c bus is busy due to slow page change or
+			 * link management access, call can fail. This is normal.
+			 * So we retry this a few times.
+			 */
+			for (j = 0; j < 4; j++) {
+				status = ice_aq_sff_eeprom(hw, 0, addr, offset, page,
+							   !is_sfp, value,
+							   SFF_READ_BLOCK_SIZE,
+							   0, NULL);
+				netdev_dbg(netdev, "SFF %02X %02X %02X %X = %02X%02X%02X%02X.%02X%02X%02X%02X (%X)\n",
+					   addr, offset, page, is_sfp,
+					   value[0], value[1], value[2], value[3],
+					   value[4], value[5], value[6], value[7],
+					   status);
+				if (status) {
+					usleep_range(1500, 2500);
+					memset(value, 0, SFF_READ_BLOCK_SIZE);
+					continue;
+				}
+				break;
+			}
+
+			/* Make sure we have enough room for the new block */
+			copy_len = min_t(u32, SFF_READ_BLOCK_SIZE, ee->len - i);
+			memcpy(data + i, value, copy_len);
+		}
+	}
+	return 0;
+}
+
+static const struct ethtool_ops ice_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
+				     ETHTOOL_COALESCE_USE_ADAPTIVE |
+				     ETHTOOL_COALESCE_RX_USECS_HIGH,
+	.get_link_ksettings	= ice_get_link_ksettings,
+	.set_link_ksettings	= ice_set_link_ksettings,
+	.get_drvinfo		= ice_get_drvinfo,
+	.get_regs_len		= ice_get_regs_len,
+	.get_regs		= ice_get_regs,
+	.get_wol		= ice_get_wol,
+	.set_wol		= ice_set_wol,
+	.get_msglevel		= ice_get_msglevel,
+	.set_msglevel		= ice_set_msglevel,
+	.self_test		= ice_self_test,
+	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= ice_get_eeprom_len,
+	.get_eeprom		= ice_get_eeprom,
+	.get_coalesce		= ice_get_coalesce,
+	.set_coalesce		= ice_set_coalesce,
+	.get_strings		= ice_get_strings,
+	.set_phys_id		= ice_set_phys_id,
+	.get_ethtool_stats      = ice_get_ethtool_stats,
+	.get_priv_flags		= ice_get_priv_flags,
+	.set_priv_flags		= ice_set_priv_flags,
+	.get_sset_count		= ice_get_sset_count,
+	.get_rxnfc		= ice_get_rxnfc,
+	.set_rxnfc		= ice_set_rxnfc,
+	.get_ringparam		= ice_get_ringparam,
+	.set_ringparam		= ice_set_ringparam,
+	.nway_reset		= ice_nway_reset,
+	.get_pauseparam		= ice_get_pauseparam,
+	.set_pauseparam		= ice_set_pauseparam,
+	.get_rxfh_key_size	= ice_get_rxfh_key_size,
+	.get_rxfh_indir_size	= ice_get_rxfh_indir_size,
+	.get_rxfh_context	= ice_get_rxfh_context,
+	.get_rxfh		= ice_get_rxfh,
+	.set_rxfh		= ice_set_rxfh,
+	.get_channels		= ice_get_channels,
+	.set_channels		= ice_set_channels,
+	.get_ts_info		= ice_get_ts_info,
+	.get_per_queue_coalesce	= ice_get_per_q_coalesce,
+	.set_per_queue_coalesce	= ice_set_per_q_coalesce,
+	.get_fecparam		= ice_get_fecparam,
+	.set_fecparam		= ice_set_fecparam,
+	.get_module_info	= ice_get_module_info,
+	.get_module_eeprom	= ice_get_module_eeprom,
+};
+
+static const struct ethtool_ops ice_ethtool_safe_mode_ops = {
+	.get_link_ksettings	= ice_get_link_ksettings,
+	.set_link_ksettings	= ice_set_link_ksettings,
+	.get_drvinfo		= ice_get_drvinfo,
+	.get_regs_len		= ice_get_regs_len,
+	.get_regs		= ice_get_regs,
+	.get_wol		= ice_get_wol,
+	.set_wol		= ice_set_wol,
+	.get_msglevel		= ice_get_msglevel,
+	.set_msglevel		= ice_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= ice_get_eeprom_len,
+	.get_eeprom		= ice_get_eeprom,
+	.get_strings		= ice_get_strings,
+	.get_ethtool_stats	= ice_get_ethtool_stats,
+	.get_sset_count		= ice_get_sset_count,
+	.get_ringparam		= ice_get_ringparam,
+	.set_ringparam		= ice_set_ringparam,
+	.nway_reset		= ice_nway_reset,
+	.get_channels		= ice_get_channels,
+};
+
+/**
+ * ice_set_ethtool_safe_mode_ops - setup safe mode ethtool ops
+ * @netdev: network interface device structure
+ */
+void ice_set_ethtool_safe_mode_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_safe_mode_ops;
+}
+
+static const struct ethtool_ops ice_ethtool_repr_ops = {
+	.get_drvinfo		= ice_repr_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_strings		= ice_repr_get_strings,
+	.get_ethtool_stats      = ice_repr_get_ethtool_stats,
+	.get_sset_count		= ice_repr_get_sset_count,
+};
+
+/**
+ * ice_set_ethtool_repr_ops - setup VF's port representor ethtool ops
+ * @netdev: network interface device structure
+ */
+void ice_set_ethtool_repr_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_repr_ops;
+}
+
+/**
+ * ice_set_ethtool_ops - setup netdev ethtool ops
+ * @netdev: network interface device structure
+ *
+ * setup netdev ethtool ops with ice specific ops
+ */
+void ice_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ice_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h
new file mode 100644
index 0000000000..b403ee79cd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (C) 2023 Intel Corporation */
+
+#ifndef _ICE_ETHTOOL_H_
+#define _ICE_ETHTOOL_H_
+
+struct ice_phy_type_to_ethtool {
+	u64 aq_link_speed;
+	u8 link_mode;
+};
+
+/* Macro to make PHY type to Ethtool link mode table entry.
+ * The index is the PHY type.
+ */
+#define ICE_PHY_TYPE(LINK_SPEED, ETHTOOL_LINK_MODE) {\
+	.aq_link_speed = ICE_AQ_LINK_SPEED_##LINK_SPEED, \
+	.link_mode = ETHTOOL_LINK_MODE_##ETHTOOL_LINK_MODE##_BIT, \
+}
+
+/* Lookup table mapping PHY type low to link speed and Ethtool link modes.
+ * Array index corresponds to HW PHY type bit, see
+ * ice_adminq_cmd.h:ICE_PHY_TYPE_LOW_*.
+ */
+static const struct ice_phy_type_to_ethtool
+phy_type_low_lkup[] = {
+	[0] = ICE_PHY_TYPE(100MB, 100baseT_Full),
+	[1] = ICE_PHY_TYPE(100MB, 100baseT_Full),
+	[2] = ICE_PHY_TYPE(1000MB, 1000baseT_Full),
+	[3] = ICE_PHY_TYPE(1000MB, 1000baseX_Full),
+	[4] = ICE_PHY_TYPE(1000MB, 1000baseX_Full),
+	[5] = ICE_PHY_TYPE(1000MB, 1000baseKX_Full),
+	[6] = ICE_PHY_TYPE(1000MB, 1000baseT_Full),
+	[7] = ICE_PHY_TYPE(2500MB, 2500baseT_Full),
+	[8] = ICE_PHY_TYPE(2500MB, 2500baseX_Full),
+	[9] = ICE_PHY_TYPE(2500MB, 2500baseX_Full),
+	[10] = ICE_PHY_TYPE(5GB, 5000baseT_Full),
+	[11] = ICE_PHY_TYPE(5GB, 5000baseT_Full),
+	[12] = ICE_PHY_TYPE(10GB, 10000baseT_Full),
+	[13] = ICE_PHY_TYPE(10GB, 10000baseCR_Full),
+	[14] = ICE_PHY_TYPE(10GB, 10000baseSR_Full),
+	[15] = ICE_PHY_TYPE(10GB, 10000baseLR_Full),
+	[16] = ICE_PHY_TYPE(10GB, 10000baseKR_Full),
+	[17] = ICE_PHY_TYPE(10GB, 10000baseCR_Full),
+	[18] = ICE_PHY_TYPE(10GB, 10000baseKR_Full),
+	[19] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+	[20] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+	[21] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+	[22] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+	[23] = ICE_PHY_TYPE(25GB, 25000baseSR_Full),
+	[24] = ICE_PHY_TYPE(25GB, 25000baseSR_Full),
+	[25] = ICE_PHY_TYPE(25GB, 25000baseKR_Full),
+	[26] = ICE_PHY_TYPE(25GB, 25000baseKR_Full),
+	[27] = ICE_PHY_TYPE(25GB, 25000baseKR_Full),
+	[28] = ICE_PHY_TYPE(25GB, 25000baseSR_Full),
+	[29] = ICE_PHY_TYPE(25GB, 25000baseCR_Full),
+	[30] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full),
+	[31] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full),
+	[32] = ICE_PHY_TYPE(40GB, 40000baseLR4_Full),
+	[33] = ICE_PHY_TYPE(40GB, 40000baseKR4_Full),
+	[34] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full),
+	[35] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full),
+	[36] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full),
+	[37] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+	[38] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+	[39] = ICE_PHY_TYPE(50GB, 50000baseKR2_Full),
+	[40] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+	[41] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full),
+	[42] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full),
+	[43] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full),
+	[44] = ICE_PHY_TYPE(50GB, 50000baseCR_Full),
+	[45] = ICE_PHY_TYPE(50GB, 50000baseSR_Full),
+	[46] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full),
+	[47] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full),
+	[48] = ICE_PHY_TYPE(50GB, 50000baseKR_Full),
+	[49] = ICE_PHY_TYPE(50GB, 50000baseSR_Full),
+	[50] = ICE_PHY_TYPE(50GB, 50000baseCR_Full),
+	[51] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+	[52] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full),
+	[53] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full),
+	[54] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full),
+	[55] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+	[56] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+	[57] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full),
+	[58] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+	[59] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full),
+	[60] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full),
+	[61] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full),
+	[62] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full),
+	[63] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full),
+};
+
+/* Lookup table mapping PHY type high to link speed and Ethtool link modes.
+ * Array index corresponds to HW PHY type bit, see
+ * ice_adminq_cmd.h:ICE_PHY_TYPE_HIGH_*
+ */
+static const struct ice_phy_type_to_ethtool
+phy_type_high_lkup[] = {
+	[0] = ICE_PHY_TYPE(100GB, 100000baseKR2_Full),
+	[1] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full),
+	[2] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full),
+	[3] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full),
+	[4] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full),
+};
+
+#endif /* !_ICE_ETHTOOL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
new file mode 100644
index 0000000000..8c6e13f87b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -0,0 +1,1939 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+/* flow director ethtool support for ice */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_fdir.h"
+#include "ice_flow.h"
+
+static struct in6_addr full_ipv6_addr_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+		}
+	}
+};
+
+static struct in6_addr zero_ipv6_addr_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		}
+	}
+};
+
+/* calls to ice_flow_add_prof require the number of segments in the array
+ * for segs_cnt. In this code that is one more than the index.
+ */
+#define TNL_SEG_CNT(_TNL_) ((_TNL_) + 1)
+
+/**
+ * ice_fltr_to_ethtool_flow - convert filter type values to ethtool
+ * flow type values
+ * @flow: filter type to be converted
+ *
+ * Returns the corresponding ethtool flow type.
+ */
+static int ice_fltr_to_ethtool_flow(enum ice_fltr_ptype flow)
+{
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		return TCP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		return UDP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		return SCTP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		return IPV4_USER_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		return TCP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		return UDP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		return SCTP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		return IPV6_USER_FLOW;
+	default:
+		/* 0 is undefined ethtool flow */
+		return 0;
+	}
+}
+
+/**
+ * ice_ethtool_flow_to_fltr - convert ethtool flow type to filter enum
+ * @eth: Ethtool flow type to be converted
+ *
+ * Returns flow enum
+ */
+static enum ice_fltr_ptype ice_ethtool_flow_to_fltr(int eth)
+{
+	switch (eth) {
+	case TCP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+	case UDP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+	case SCTP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+	case IPV4_USER_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+	case TCP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+	case UDP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+	case SCTP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+	case IPV6_USER_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+	default:
+		return ICE_FLTR_PTYPE_NONF_NONE;
+	}
+}
+
+/**
+ * ice_is_mask_valid - check mask field set
+ * @mask: full mask to check
+ * @field: field for which mask should be valid
+ *
+ * If the mask is fully set return true. If it is not valid for field return
+ * false.
+ */
+static bool ice_is_mask_valid(u64 mask, u64 field)
+{
+	return (mask & field) == field;
+}
+
+/**
+ * ice_get_ethtool_fdir_entry - fill ethtool structure with fdir filter data
+ * @hw: hardware structure that contains filter list
+ * @cmd: ethtool command data structure to receive the filter data
+ *
+ * Returns 0 on success and -EINVAL on failure
+ */
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp;
+	struct ice_fdir_fltr *rule;
+	int ret = 0;
+	u16 idx;
+
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+
+	rule = ice_fdir_find_fltr_by_idx(hw, fsp->location);
+
+	if (!rule || fsp->location != rule->fltr_id) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	fsp->flow_type = ice_fltr_to_ethtool_flow(rule->flow_type);
+
+	memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+	memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+	switch (fsp->flow_type) {
+	case IPV4_USER_FLOW:
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip.v4.l4_header;
+		fsp->h_u.usr_ip4_spec.tos = rule->ip.v4.tos;
+		fsp->h_u.usr_ip4_spec.ip4src = rule->ip.v4.src_ip;
+		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+		fsp->m_u.usr_ip4_spec.ip4src = rule->mask.v4.src_ip;
+		fsp->m_u.usr_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
+		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->mask.v4.l4_header;
+		fsp->m_u.usr_ip4_spec.tos = rule->mask.v4.tos;
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fsp->h_u.tcp_ip4_spec.psrc = rule->ip.v4.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->ip.v4.dst_port;
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip.v4.src_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+		fsp->m_u.tcp_ip4_spec.psrc = rule->mask.v4.src_port;
+		fsp->m_u.tcp_ip4_spec.pdst = rule->mask.v4.dst_port;
+		fsp->m_u.tcp_ip4_spec.ip4src = rule->mask.v4.src_ip;
+		fsp->m_u.tcp_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
+		break;
+	case IPV6_USER_FLOW:
+		fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip.v6.l4_header;
+		fsp->h_u.usr_ip6_spec.tclass = rule->ip.v6.tc;
+		fsp->h_u.usr_ip6_spec.l4_proto = rule->ip.v6.proto;
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6src, rule->mask.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, rule->mask.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->mask.v6.l4_header;
+		fsp->m_u.usr_ip6_spec.tclass = rule->mask.v6.tc;
+		fsp->m_u.usr_ip6_spec.l4_proto = rule->mask.v6.proto;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.tcp_ip6_spec.psrc = rule->ip.v6.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = rule->ip.v6.dst_port;
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6src,
+		       rule->mask.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6dst,
+		       rule->mask.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.tcp_ip6_spec.psrc = rule->mask.v6.src_port;
+		fsp->m_u.tcp_ip6_spec.pdst = rule->mask.v6.dst_port;
+		fsp->h_u.tcp_ip6_spec.tclass = rule->ip.v6.tc;
+		fsp->m_u.tcp_ip6_spec.tclass = rule->mask.v6.tc;
+		break;
+	default:
+		break;
+	}
+
+	if (rule->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->orig_q_index;
+
+	idx = ice_ethtool_flow_to_fltr(fsp->flow_type);
+	if (idx == ICE_FLTR_PTYPE_NONF_NONE) {
+		dev_err(ice_hw_to_dev(hw), "Missing input index for flow_type %d\n",
+			rule->flow_type);
+		ret = -EINVAL;
+	}
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+	return ret;
+}
+
+/**
+ * ice_get_fdir_fltr_ids - fill buffer with filter IDs of active filters
+ * @hw: hardware structure containing the filter list
+ * @cmd: ethtool command data structure
+ * @rule_locs: ethtool array passed in from OS to receive filter IDs
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+		      u32 *rule_locs)
+{
+	struct ice_fdir_fltr *f_rule;
+	unsigned int cnt = 0;
+	int val = 0;
+
+	/* report total rule count */
+	cmd->data = ice_get_fdir_cnt_all(hw);
+
+	mutex_lock(&hw->fdir_fltr_lock);
+
+	list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+		if (cnt == cmd->rule_cnt) {
+			val = -EMSGSIZE;
+			goto release_lock;
+		}
+		rule_locs[cnt] = f_rule->fltr_id;
+		cnt++;
+	}
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+	if (!val)
+		cmd->rule_cnt = cnt;
+	return val;
+}
+
+/**
+ * ice_fdir_remap_entries - update the FDir entries in profile
+ * @prof: FDir structure pointer
+ * @tun: tunneled or non-tunneled packet
+ * @idx: FDir entry index
+ */
+static void
+ice_fdir_remap_entries(struct ice_fd_hw_prof *prof, int tun, int idx)
+{
+	if (idx != prof->cnt && tun < ICE_FD_HW_SEG_MAX) {
+		int i;
+
+		for (i = idx; i < (prof->cnt - 1); i++) {
+			u64 old_entry_h;
+
+			old_entry_h = prof->entry_h[i + 1][tun];
+			prof->entry_h[i][tun] = old_entry_h;
+			prof->vsi_h[i] = prof->vsi_h[i + 1];
+		}
+
+		prof->entry_h[i][tun] = 0;
+		prof->vsi_h[i] = 0;
+	}
+}
+
+/**
+ * ice_fdir_rem_adq_chnl - remove an ADQ channel from HW filter rules
+ * @hw: hardware structure containing filter list
+ * @vsi_idx: VSI handle
+ */
+void ice_fdir_rem_adq_chnl(struct ice_hw *hw, u16 vsi_idx)
+{
+	int status, flow;
+
+	if (!hw->fdir_prof)
+		return;
+
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		struct ice_fd_hw_prof *prof = hw->fdir_prof[flow];
+		int tun, i;
+
+		if (!prof || !prof->cnt)
+			continue;
+
+		for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+			u64 prof_id;
+
+			prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+
+			for (i = 0; i < prof->cnt; i++) {
+				if (prof->vsi_h[i] != vsi_idx)
+					continue;
+
+				prof->entry_h[i][tun] = 0;
+				prof->vsi_h[i] = 0;
+				break;
+			}
+
+			/* after clearing FDir entries update the remaining */
+			ice_fdir_remap_entries(prof, tun, i);
+
+			/* find flow profile corresponding to prof_id and clear
+			 * vsi_idx from bitmap.
+			 */
+			status = ice_flow_rem_vsi_prof(hw, vsi_idx, prof_id);
+			if (status) {
+				dev_err(ice_hw_to_dev(hw), "ice_flow_rem_vsi_prof() failed status=%d\n",
+					status);
+			}
+		}
+		prof->cnt--;
+	}
+}
+
+/**
+ * ice_fdir_get_hw_prof - return the ice_fd_hw_proc associated with a flow
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static struct ice_fd_hw_prof *
+ice_fdir_get_hw_prof(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+	if (blk == ICE_BLK_FD && hw->fdir_prof)
+		return hw->fdir_prof[flow];
+
+	return NULL;
+}
+
+/**
+ * ice_fdir_erase_flow_from_hw - remove a flow from the HW profile tables
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static void
+ice_fdir_erase_flow_from_hw(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+	struct ice_fd_hw_prof *prof = ice_fdir_get_hw_prof(hw, blk, flow);
+	int tun;
+
+	if (!prof)
+		return;
+
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		u64 prof_id;
+		int j;
+
+		prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+		for (j = 0; j < prof->cnt; j++) {
+			u16 vsi_num;
+
+			if (!prof->entry_h[j][tun] || !prof->vsi_h[j])
+				continue;
+			vsi_num = ice_get_hw_vsi_num(hw, prof->vsi_h[j]);
+			ice_rem_prof_id_flow(hw, blk, vsi_num, prof_id);
+			ice_flow_rem_entry(hw, blk, prof->entry_h[j][tun]);
+			prof->entry_h[j][tun] = 0;
+		}
+		ice_flow_rem_prof(hw, blk, prof_id);
+	}
+}
+
+/**
+ * ice_fdir_rem_flow - release the ice_flow structures for a filter type
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_rem_flow(struct ice_hw *hw, enum ice_block blk,
+		  enum ice_fltr_ptype flow_type)
+{
+	int flow = (int)flow_type & ~FLOW_EXT;
+	struct ice_fd_hw_prof *prof;
+	int tun, i;
+
+	prof = ice_fdir_get_hw_prof(hw, blk, flow);
+	if (!prof)
+		return;
+
+	ice_fdir_erase_flow_from_hw(hw, blk, flow);
+	for (i = 0; i < prof->cnt; i++)
+		prof->vsi_h[i] = 0;
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		if (!prof->fdir_seg[tun])
+			continue;
+		devm_kfree(ice_hw_to_dev(hw), prof->fdir_seg[tun]);
+		prof->fdir_seg[tun] = NULL;
+	}
+	prof->cnt = 0;
+}
+
+/**
+ * ice_fdir_release_flows - release all flows in use for later replay
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_release_flows(struct ice_hw *hw)
+{
+	int flow;
+
+	/* release Flow Director HW table entries */
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++)
+		ice_fdir_erase_flow_from_hw(hw, ICE_BLK_FD, flow);
+}
+
+/**
+ * ice_fdir_replay_flows - replay HW Flow Director filter info
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_replay_flows(struct ice_hw *hw)
+{
+	int flow;
+
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		int tun;
+
+		if (!hw->fdir_prof[flow] || !hw->fdir_prof[flow]->cnt)
+			continue;
+		for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+			struct ice_flow_prof *hw_prof;
+			struct ice_fd_hw_prof *prof;
+			u64 prof_id;
+			int j;
+
+			prof = hw->fdir_prof[flow];
+			prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+			ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id,
+					  prof->fdir_seg[tun], TNL_SEG_CNT(tun),
+					  &hw_prof);
+			for (j = 0; j < prof->cnt; j++) {
+				enum ice_flow_priority prio;
+				u64 entry_h = 0;
+				int err;
+
+				prio = ICE_FLOW_PRIO_NORMAL;
+				err = ice_flow_add_entry(hw, ICE_BLK_FD,
+							 prof_id,
+							 prof->vsi_h[0],
+							 prof->vsi_h[j],
+							 prio, prof->fdir_seg,
+							 &entry_h);
+				if (err) {
+					dev_err(ice_hw_to_dev(hw), "Could not replay Flow Director, flow type %d\n",
+						flow);
+					continue;
+				}
+				prof->entry_h[j][tun] = entry_h;
+			}
+		}
+	}
+}
+
+/**
+ * ice_parse_rx_flow_user_data - deconstruct user-defined data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @data: pointer to userdef data structure for storage
+ *
+ * Returns 0 on success, negative error value on failure
+ */
+static int
+ice_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+			    struct ice_rx_flow_userdef *data)
+{
+	u64 value, mask;
+
+	memset(data, 0, sizeof(*data));
+	if (!(fsp->flow_type & FLOW_EXT))
+		return 0;
+
+	value = be64_to_cpu(*((__force __be64 *)fsp->h_ext.data));
+	mask = be64_to_cpu(*((__force __be64 *)fsp->m_ext.data));
+	if (!mask)
+		return 0;
+
+#define ICE_USERDEF_FLEX_WORD_M	GENMASK_ULL(15, 0)
+#define ICE_USERDEF_FLEX_OFFS_S	16
+#define ICE_USERDEF_FLEX_OFFS_M	GENMASK_ULL(31, ICE_USERDEF_FLEX_OFFS_S)
+#define ICE_USERDEF_FLEX_FLTR_M	GENMASK_ULL(31, 0)
+
+	/* 0x1fe is the maximum value for offsets stored in the internal
+	 * filtering tables.
+	 */
+#define ICE_USERDEF_FLEX_MAX_OFFS_VAL 0x1fe
+
+	if (!ice_is_mask_valid(mask, ICE_USERDEF_FLEX_FLTR_M) ||
+	    value > ICE_USERDEF_FLEX_FLTR_M)
+		return -EINVAL;
+
+	data->flex_word = value & ICE_USERDEF_FLEX_WORD_M;
+	data->flex_offset = (value & ICE_USERDEF_FLEX_OFFS_M) >>
+			     ICE_USERDEF_FLEX_OFFS_S;
+	if (data->flex_offset > ICE_USERDEF_FLEX_MAX_OFFS_VAL)
+		return -EINVAL;
+
+	data->flex_fltr = true;
+
+	return 0;
+}
+
+/**
+ * ice_fdir_num_avail_fltr - return the number of unused flow director filters
+ * @hw: pointer to hardware structure
+ * @vsi: software VSI structure
+ *
+ * There are 2 filter pools: guaranteed and best effort(shared). Each VSI can
+ * use filters from either pool. The guaranteed pool is divided between VSIs.
+ * The best effort filter pool is common to all VSIs and is a device shared
+ * resource pool. The number of filters available to this VSI is the sum of
+ * the VSIs guaranteed filter pool and the global available best effort
+ * filter pool.
+ *
+ * Returns the number of available flow director filters to this VSI
+ */
+static int ice_fdir_num_avail_fltr(struct ice_hw *hw, struct ice_vsi *vsi)
+{
+	u16 vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
+	u16 num_guar;
+	u16 num_be;
+
+	/* total guaranteed filters assigned to this VSI */
+	num_guar = vsi->num_gfltr;
+
+	/* minus the guaranteed filters programed by this VSI */
+	num_guar -= (rd32(hw, VSIQF_FD_CNT(vsi_num)) &
+		     VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S;
+
+	/* total global best effort filters */
+	num_be = hw->func_caps.fd_fltr_best_effort;
+
+	/* minus the global best effort filters programmed */
+	num_be -= (rd32(hw, GLQF_FD_CNT) & GLQF_FD_CNT_FD_BCNT_M) >>
+		   GLQF_FD_CNT_FD_BCNT_S;
+
+	return num_guar + num_be;
+}
+
+/**
+ * ice_fdir_alloc_flow_prof - allocate FDir flow profile structure(s)
+ * @hw: HW structure containing the FDir flow profile structure(s)
+ * @flow: flow type to allocate the flow profile for
+ *
+ * Allocate the fdir_prof and fdir_prof[flow] if not already created. Return 0
+ * on success and negative on error.
+ */
+static int
+ice_fdir_alloc_flow_prof(struct ice_hw *hw, enum ice_fltr_ptype flow)
+{
+	if (!hw)
+		return -EINVAL;
+
+	if (!hw->fdir_prof) {
+		hw->fdir_prof = devm_kcalloc(ice_hw_to_dev(hw),
+					     ICE_FLTR_PTYPE_MAX,
+					     sizeof(*hw->fdir_prof),
+					     GFP_KERNEL);
+		if (!hw->fdir_prof)
+			return -ENOMEM;
+	}
+
+	if (!hw->fdir_prof[flow]) {
+		hw->fdir_prof[flow] = devm_kzalloc(ice_hw_to_dev(hw),
+						   sizeof(**hw->fdir_prof),
+						   GFP_KERNEL);
+		if (!hw->fdir_prof[flow])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fdir_prof_vsi_idx - find or insert a vsi_idx in structure
+ * @prof: pointer to flow director HW profile
+ * @vsi_idx: vsi_idx to locate
+ *
+ * return the index of the vsi_idx. if vsi_idx is not found insert it
+ * into the vsi_h table.
+ */
+static u16
+ice_fdir_prof_vsi_idx(struct ice_fd_hw_prof *prof, int vsi_idx)
+{
+	u16 idx = 0;
+
+	for (idx = 0; idx < prof->cnt; idx++)
+		if (prof->vsi_h[idx] == vsi_idx)
+			return idx;
+
+	if (idx == prof->cnt)
+		prof->vsi_h[prof->cnt++] = vsi_idx;
+	return idx;
+}
+
+/**
+ * ice_fdir_set_hw_fltr_rule - Configure HW tables to generate a FDir rule
+ * @pf: pointer to the PF structure
+ * @seg: protocol header description pointer
+ * @flow: filter enum
+ * @tun: FDir segment to program
+ */
+static int
+ice_fdir_set_hw_fltr_rule(struct ice_pf *pf, struct ice_flow_seg_info *seg,
+			  enum ice_fltr_ptype flow, enum ice_fd_hw_seg tun)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *main_vsi, *ctrl_vsi;
+	struct ice_flow_seg_info *old_seg;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_fd_hw_prof *hw_prof;
+	struct ice_hw *hw = &pf->hw;
+	u64 entry1_h = 0;
+	u64 entry2_h = 0;
+	bool del_last;
+	u64 prof_id;
+	int err;
+	int idx;
+
+	main_vsi = ice_get_main_vsi(pf);
+	if (!main_vsi)
+		return -EINVAL;
+
+	ctrl_vsi = ice_get_ctrl_vsi(pf);
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	err = ice_fdir_alloc_flow_prof(hw, flow);
+	if (err)
+		return err;
+
+	hw_prof = hw->fdir_prof[flow];
+	old_seg = hw_prof->fdir_seg[tun];
+	if (old_seg) {
+		/* This flow_type already has a changed input set.
+		 * If it matches the requested input set then we are
+		 * done. Or, if it's different then it's an error.
+		 */
+		if (!memcmp(old_seg, seg, sizeof(*seg)))
+			return -EEXIST;
+
+		/* if there are FDir filters using this flow,
+		 * then return error.
+		 */
+		if (hw->fdir_fltr_cnt[flow]) {
+			dev_err(dev, "Failed to add filter.  Flow director filters on each port must have the same input set.\n");
+			return -EINVAL;
+		}
+
+		if (ice_is_arfs_using_perfect_flow(hw, flow)) {
+			dev_err(dev, "aRFS using perfect flow type %d, cannot change input set\n",
+				flow);
+			return -EINVAL;
+		}
+
+		/* remove HW filter definition */
+		ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+	}
+
+	/* Adding a profile, but there is only one header supported.
+	 * That is the final parameters are 1 header (segment), no
+	 * actions (NULL) and zero actions 0.
+	 */
+	prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+	err = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+				TNL_SEG_CNT(tun), &prof);
+	if (err)
+		return err;
+	err = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+				 main_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				 seg, &entry1_h);
+	if (err)
+		goto err_prof;
+	err = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+				 ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				 seg, &entry2_h);
+	if (err)
+		goto err_entry;
+
+	hw_prof->fdir_seg[tun] = seg;
+	hw_prof->entry_h[0][tun] = entry1_h;
+	hw_prof->entry_h[1][tun] = entry2_h;
+	hw_prof->vsi_h[0] = main_vsi->idx;
+	hw_prof->vsi_h[1] = ctrl_vsi->idx;
+	if (!hw_prof->cnt)
+		hw_prof->cnt = 2;
+
+	for (idx = 1; idx < ICE_CHNL_MAX_TC; idx++) {
+		u16 vsi_idx;
+		u16 vsi_h;
+
+		if (!ice_is_adq_active(pf) || !main_vsi->tc_map_vsi[idx])
+			continue;
+
+		entry1_h = 0;
+		vsi_h = main_vsi->tc_map_vsi[idx]->idx;
+		err = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id,
+					 main_vsi->idx, vsi_h,
+					 ICE_FLOW_PRIO_NORMAL, seg,
+					 &entry1_h);
+		if (err) {
+			dev_err(dev, "Could not add Channel VSI %d to flow group\n",
+				idx);
+			goto err_unroll;
+		}
+
+		vsi_idx = ice_fdir_prof_vsi_idx(hw_prof,
+						main_vsi->tc_map_vsi[idx]->idx);
+		hw_prof->entry_h[vsi_idx][tun] = entry1_h;
+	}
+
+	return 0;
+
+err_unroll:
+	entry1_h = 0;
+	hw_prof->fdir_seg[tun] = NULL;
+
+	/* The variable del_last will be used to determine when to clean up
+	 * the VSI group data. The VSI data is not needed if there are no
+	 * segments.
+	 */
+	del_last = true;
+	for (idx = 0; idx < ICE_FD_HW_SEG_MAX; idx++)
+		if (hw_prof->fdir_seg[idx]) {
+			del_last = false;
+			break;
+		}
+
+	for (idx = 0; idx < hw_prof->cnt; idx++) {
+		u16 vsi_num = ice_get_hw_vsi_num(hw, hw_prof->vsi_h[idx]);
+
+		if (!hw_prof->entry_h[idx][tun])
+			continue;
+		ice_rem_prof_id_flow(hw, ICE_BLK_FD, vsi_num, prof_id);
+		ice_flow_rem_entry(hw, ICE_BLK_FD, hw_prof->entry_h[idx][tun]);
+		hw_prof->entry_h[idx][tun] = 0;
+		if (del_last)
+			hw_prof->vsi_h[idx] = 0;
+	}
+	if (del_last)
+		hw_prof->cnt = 0;
+err_entry:
+	ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+			     ice_get_hw_vsi_num(hw, main_vsi->idx), prof_id);
+	ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+	dev_err(dev, "Failed to add filter.  Flow director filters on each port must have the same input set.\n");
+
+	return err;
+}
+
+/**
+ * ice_set_init_fdir_seg
+ * @seg: flow segment for programming
+ * @l3_proto: ICE_FLOW_SEG_HDR_IPV4 or ICE_FLOW_SEG_HDR_IPV6
+ * @l4_proto: ICE_FLOW_SEG_HDR_TCP or ICE_FLOW_SEG_HDR_UDP
+ *
+ * Set the configuration for perfect filters to the provided flow segment for
+ * programming the HW filter. This is to be called only when initializing
+ * filters as this function it assumes no filters exist.
+ */
+static int
+ice_set_init_fdir_seg(struct ice_flow_seg_info *seg,
+		      enum ice_flow_seg_hdr l3_proto,
+		      enum ice_flow_seg_hdr l4_proto)
+{
+	enum ice_flow_field src_addr, dst_addr, src_port, dst_port;
+
+	if (!seg)
+		return -EINVAL;
+
+	if (l3_proto == ICE_FLOW_SEG_HDR_IPV4) {
+		src_addr = ICE_FLOW_FIELD_IDX_IPV4_SA;
+		dst_addr = ICE_FLOW_FIELD_IDX_IPV4_DA;
+	} else if (l3_proto == ICE_FLOW_SEG_HDR_IPV6) {
+		src_addr = ICE_FLOW_FIELD_IDX_IPV6_SA;
+		dst_addr = ICE_FLOW_FIELD_IDX_IPV6_DA;
+	} else {
+		return -EINVAL;
+	}
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else {
+		return -EINVAL;
+	}
+
+	ICE_FLOW_SET_HDRS(seg, l3_proto | l4_proto);
+
+	/* IP source address */
+	ice_flow_set_fld(seg, src_addr, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* IP destination address */
+	ice_flow_set_fld(seg, dst_addr, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* Layer 4 source port */
+	ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* Layer 4 destination port */
+	ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	return 0;
+}
+
+/**
+ * ice_create_init_fdir_rule
+ * @pf: PF structure
+ * @flow: filter enum
+ *
+ * Return error value or 0 on success.
+ */
+static int
+ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow)
+{
+	struct ice_flow_seg_info *seg, *tun_seg;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int ret;
+
+	/* if there is already a filter rule for kind return -EINVAL */
+	if (hw->fdir_prof && hw->fdir_prof[flow] &&
+	    hw->fdir_prof[flow]->fdir_seg[0])
+		return -EINVAL;
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	tun_seg = devm_kcalloc(dev, ICE_FD_HW_SEG_MAX, sizeof(*tun_seg),
+			       GFP_KERNEL);
+	if (!tun_seg) {
+		devm_kfree(dev, seg);
+		return -ENOMEM;
+	}
+
+	if (flow == ICE_FLTR_PTYPE_NONF_IPV4_TCP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+					    ICE_FLOW_SEG_HDR_TCP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV4_UDP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+					    ICE_FLOW_SEG_HDR_UDP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_TCP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+					    ICE_FLOW_SEG_HDR_TCP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+					    ICE_FLOW_SEG_HDR_UDP);
+	else
+		ret = -EINVAL;
+	if (ret)
+		goto err_exit;
+
+	/* add filter for outer headers */
+	ret = ice_fdir_set_hw_fltr_rule(pf, seg, flow, ICE_FD_HW_SEG_NON_TUN);
+	if (ret)
+		/* could not write filter, free memory */
+		goto err_exit;
+
+	/* make tunneled filter HW entries if possible */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+	ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, flow, ICE_FD_HW_SEG_TUN);
+	if (ret)
+		/* could not write tunnel filter, but outer header filter
+		 * exists
+		 */
+		devm_kfree(dev, tun_seg);
+
+	set_bit(flow, hw->fdir_perfect_fltr);
+	return ret;
+err_exit:
+	devm_kfree(dev, tun_seg);
+	devm_kfree(dev, seg);
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_set_fdir_ip4_seg
+ * @seg: flow segment for programming
+ * @tcp_ip4_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv4
+ */
+static int
+ice_set_fdir_ip4_seg(struct ice_flow_seg_info *seg,
+		     struct ethtool_tcpip4_spec *tcp_ip4_spec,
+		     enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+	enum ice_flow_field src_port, dst_port;
+
+	/* make sure we don't have any empty rule */
+	if (!tcp_ip4_spec->psrc && !tcp_ip4_spec->ip4src &&
+	    !tcp_ip4_spec->pdst && !tcp_ip4_spec->ip4dst)
+		return -EINVAL;
+
+	/* filtering on TOS not supported */
+	if (tcp_ip4_spec->tos)
+		return -EOPNOTSUPP;
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+		src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 | l4_proto);
+
+	/* IP source address */
+	if (tcp_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!tcp_ip4_spec->ip4src)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* IP destination address */
+	if (tcp_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!tcp_ip4_spec->ip4dst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 source port */
+	if (tcp_ip4_spec->psrc == htons(0xFFFF))
+		ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip4_spec->psrc)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 destination port */
+	if (tcp_ip4_spec->pdst == htons(0xFFFF))
+		ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip4_spec->pdst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip4_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip4_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv4
+ */
+static int
+ice_set_fdir_ip4_usr_seg(struct ice_flow_seg_info *seg,
+			 struct ethtool_usrip4_spec *usr_ip4_spec,
+			 bool *perfect_fltr)
+{
+	/* first 4 bytes of Layer 4 header */
+	if (usr_ip4_spec->l4_4_bytes)
+		return -EINVAL;
+	if (usr_ip4_spec->tos)
+		return -EINVAL;
+	if (usr_ip4_spec->ip_ver)
+		return -EINVAL;
+	/* Filtering on Layer 4 protocol not supported */
+	if (usr_ip4_spec->proto)
+		return -EOPNOTSUPP;
+	/* empty rules are not valid */
+	if (!usr_ip4_spec->ip4src && !usr_ip4_spec->ip4dst)
+		return -EINVAL;
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4);
+
+	/* IP source address */
+	if (usr_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!usr_ip4_spec->ip4src)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* IP destination address */
+	if (usr_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!usr_ip4_spec->ip4dst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip6_seg
+ * @seg: flow segment for programming
+ * @tcp_ip6_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv6
+ */
+static int
+ice_set_fdir_ip6_seg(struct ice_flow_seg_info *seg,
+		     struct ethtool_tcpip6_spec *tcp_ip6_spec,
+		     enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+	enum ice_flow_field src_port, dst_port;
+
+	/* make sure we don't have any empty rule */
+	if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !tcp_ip6_spec->psrc && !tcp_ip6_spec->pdst)
+		return -EINVAL;
+
+	/* filtering on TC not supported */
+	if (tcp_ip6_spec->tclass)
+		return -EOPNOTSUPP;
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+		src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+	} else {
+		return -EINVAL;
+	}
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 | l4_proto);
+
+	if (!memcmp(tcp_ip6_spec->ip6src, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	if (!memcmp(tcp_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 source port */
+	if (tcp_ip6_spec->psrc == htons(0xFFFF))
+		ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip6_spec->psrc)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 destination port */
+	if (tcp_ip6_spec->pdst == htons(0xFFFF))
+		ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip6_spec->pdst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip6_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip6_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv6
+ */
+static int
+ice_set_fdir_ip6_usr_seg(struct ice_flow_seg_info *seg,
+			 struct ethtool_usrip6_spec *usr_ip6_spec,
+			 bool *perfect_fltr)
+{
+	/* filtering on Layer 4 bytes not supported */
+	if (usr_ip6_spec->l4_4_bytes)
+		return -EOPNOTSUPP;
+	/* filtering on TC not supported */
+	if (usr_ip6_spec->tclass)
+		return -EOPNOTSUPP;
+	/* filtering on Layer 4 protocol not supported */
+	if (usr_ip6_spec->l4_proto)
+		return -EOPNOTSUPP;
+	/* empty rules are not valid */
+	if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		return -EINVAL;
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6);
+
+	if (!memcmp(usr_ip6_spec->ip6src, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	if (!memcmp(usr_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_cfg_fdir_xtrct_seq - Configure extraction sequence for the given filter
+ * @pf: PF structure
+ * @fsp: pointer to ethtool Rx flow specification
+ * @user: user defined data from flow specification
+ *
+ * Returns 0 on success.
+ */
+static int
+ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
+		       struct ice_rx_flow_userdef *user)
+{
+	struct ice_flow_seg_info *seg, *tun_seg;
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_fltr_ptype fltr_idx;
+	struct ice_hw *hw = &pf->hw;
+	bool perfect_filter;
+	int ret;
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	tun_seg = devm_kcalloc(dev, ICE_FD_HW_SEG_MAX, sizeof(*tun_seg),
+			       GFP_KERNEL);
+	if (!tun_seg) {
+		devm_kfree(dev, seg);
+		return -ENOMEM;
+	}
+
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_TCP,
+					   &perfect_filter);
+		break;
+	case UDP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_UDP,
+					   &perfect_filter);
+		break;
+	case SCTP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_SCTP,
+					   &perfect_filter);
+		break;
+	case IPV4_USER_FLOW:
+		ret = ice_set_fdir_ip4_usr_seg(seg, &fsp->m_u.usr_ip4_spec,
+					       &perfect_filter);
+		break;
+	case TCP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_TCP,
+					   &perfect_filter);
+		break;
+	case UDP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_UDP,
+					   &perfect_filter);
+		break;
+	case SCTP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_SCTP,
+					   &perfect_filter);
+		break;
+	case IPV6_USER_FLOW:
+		ret = ice_set_fdir_ip6_usr_seg(seg, &fsp->m_u.usr_ip6_spec,
+					       &perfect_filter);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (ret)
+		goto err_exit;
+
+	/* tunnel segments are shifted up one. */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+
+	if (user && user->flex_fltr) {
+		perfect_filter = false;
+		ice_flow_add_fld_raw(seg, user->flex_offset,
+				     ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+				     ICE_FLOW_FLD_OFF_INVAL,
+				     ICE_FLOW_FLD_OFF_INVAL);
+		ice_flow_add_fld_raw(&tun_seg[1], user->flex_offset,
+				     ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+				     ICE_FLOW_FLD_OFF_INVAL,
+				     ICE_FLOW_FLD_OFF_INVAL);
+	}
+
+	fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
+
+	assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter);
+
+	/* add filter for outer headers */
+	ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
+					ICE_FD_HW_SEG_NON_TUN);
+	if (ret == -EEXIST) {
+		/* Rule already exists, free memory and count as success */
+		ret = 0;
+		goto err_exit;
+	} else if (ret) {
+		/* could not write filter, free memory */
+		goto err_exit;
+	}
+
+	/* make tunneled filter HW entries if possible */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+	ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, fltr_idx,
+					ICE_FD_HW_SEG_TUN);
+	if (ret == -EEXIST) {
+		/* Rule already exists, free memory and count as success */
+		devm_kfree(dev, tun_seg);
+		ret = 0;
+	} else if (ret) {
+		/* could not write tunnel filter, but outer filter exists */
+		devm_kfree(dev, tun_seg);
+	}
+
+	return ret;
+
+err_exit:
+	devm_kfree(dev, tun_seg);
+	devm_kfree(dev, seg);
+
+	return ret;
+}
+
+/**
+ * ice_update_per_q_fltr
+ * @vsi: ptr to VSI
+ * @q_index: queue index
+ * @inc: true to increment or false to decrement per queue filter count
+ *
+ * This function is used to keep track of per queue sideband filters
+ */
+static void ice_update_per_q_fltr(struct ice_vsi *vsi, u32 q_index, bool inc)
+{
+	struct ice_rx_ring *rx_ring;
+
+	if (!vsi->num_rxq || q_index >= vsi->num_rxq)
+		return;
+
+	rx_ring = vsi->rx_rings[q_index];
+	if (!rx_ring || !rx_ring->ch)
+		return;
+
+	if (inc)
+		atomic_inc(&rx_ring->ch->num_sb_fltr);
+	else
+		atomic_dec_if_positive(&rx_ring->ch->num_sb_fltr);
+}
+
+/**
+ * ice_fdir_write_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ * @is_tun: true adds inner filter on tunnel and false outer headers
+ *
+ * returns 0 on success and negative value on error
+ */
+int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+		    bool is_tun)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_fltr_desc desc;
+	struct ice_vsi *ctrl_vsi;
+	u8 *pkt, *frag_pkt;
+	bool has_frag;
+	int err;
+
+	ctrl_vsi = ice_get_ctrl_vsi(pf);
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+	frag_pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!frag_pkt) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ice_fdir_get_prgm_desc(hw, input, &desc, add);
+	err = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+	if (err)
+		goto err_free_all;
+	err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+	if (err)
+		goto err_free_all;
+
+	/* repeat for fragment packet */
+	has_frag = ice_fdir_has_frag(input->flow_type);
+	if (has_frag) {
+		/* does not return error */
+		ice_fdir_get_prgm_desc(hw, input, &desc, add);
+		err = ice_fdir_get_gen_prgm_pkt(hw, input, frag_pkt, true,
+						is_tun);
+		if (err)
+			goto err_frag;
+		err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, frag_pkt);
+		if (err)
+			goto err_frag;
+	} else {
+		devm_kfree(dev, frag_pkt);
+	}
+
+	return 0;
+
+err_free_all:
+	devm_kfree(dev, frag_pkt);
+err_free:
+	devm_kfree(dev, pkt);
+	return err;
+
+err_frag:
+	devm_kfree(dev, frag_pkt);
+	return err;
+}
+
+/**
+ * ice_fdir_write_all_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ *
+ * returns 0 on success and negative value on error
+ */
+static int
+ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
+			bool add)
+{
+	u16 port_num;
+	int tun;
+
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		bool is_tun = tun == ICE_FD_HW_SEG_TUN;
+		int err;
+
+		if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num, TNL_ALL))
+			continue;
+		err = ice_fdir_write_fltr(pf, input, add, is_tun);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+/**
+ * ice_fdir_replay_fltrs - replay filters from the HW filter list
+ * @pf: board private structure
+ */
+void ice_fdir_replay_fltrs(struct ice_pf *pf)
+{
+	struct ice_fdir_fltr *f_rule;
+	struct ice_hw *hw = &pf->hw;
+
+	list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+		int err = ice_fdir_write_all_fltr(pf, f_rule, true);
+
+		if (err)
+			dev_dbg(ice_pf_to_dev(pf), "Flow Director error %d, could not reprogram filter %d\n",
+				err, f_rule->fltr_id);
+	}
+}
+
+/**
+ * ice_fdir_create_dflt_rules - create default perfect filters
+ * @pf: PF data structure
+ *
+ * Returns 0 for success or error.
+ */
+int ice_fdir_create_dflt_rules(struct ice_pf *pf)
+{
+	int err;
+
+	/* Create perfect TCP and UDP rules in hardware. */
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP);
+
+	return err;
+}
+
+/**
+ * ice_fdir_del_all_fltrs - Delete all flow director filters
+ * @vsi: the VSI being changed
+ *
+ * This function needs to be called while holding hw->fdir_fltr_lock
+ */
+void ice_fdir_del_all_fltrs(struct ice_vsi *vsi)
+{
+	struct ice_fdir_fltr *f_rule, *tmp;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+
+	list_for_each_entry_safe(f_rule, tmp, &hw->fdir_list_head, fltr_node) {
+		ice_fdir_write_all_fltr(pf, f_rule, false);
+		ice_fdir_update_cntrs(hw, f_rule->flow_type, false);
+		list_del(&f_rule->fltr_node);
+		devm_kfree(ice_pf_to_dev(pf), f_rule);
+	}
+}
+
+/**
+ * ice_vsi_manage_fdir - turn on/off flow director
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ */
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_fltr_ptype flow;
+
+	if (ena) {
+		set_bit(ICE_FLAG_FD_ENA, pf->flags);
+		ice_fdir_create_dflt_rules(pf);
+		return;
+	}
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	if (!test_and_clear_bit(ICE_FLAG_FD_ENA, pf->flags))
+		goto release_lock;
+
+	ice_fdir_del_all_fltrs(vsi);
+
+	if (hw->fdir_prof)
+		for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX;
+		     flow++)
+			if (hw->fdir_prof[flow])
+				ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+}
+
+/**
+ * ice_fdir_do_rem_flow - delete flow and possibly add perfect flow
+ * @pf: PF structure
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_do_rem_flow(struct ice_pf *pf, enum ice_fltr_ptype flow_type)
+{
+	struct ice_hw *hw = &pf->hw;
+	bool need_perfect = false;
+
+	if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		need_perfect = true;
+
+	if (need_perfect && test_bit(flow_type, hw->fdir_perfect_fltr))
+		return;
+
+	ice_fdir_rem_flow(hw, ICE_BLK_FD, flow_type);
+	if (need_perfect)
+		ice_create_init_fdir_rule(pf, flow_type);
+}
+
+/**
+ * ice_fdir_update_list_entry - add or delete a filter from the filter list
+ * @pf: PF structure
+ * @input: filter structure
+ * @fltr_idx: ethtool index of filter to modify
+ *
+ * returns 0 on success and negative on errors
+ */
+static int
+ice_fdir_update_list_entry(struct ice_pf *pf, struct ice_fdir_fltr *input,
+			   int fltr_idx)
+{
+	struct ice_fdir_fltr *old_fltr;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vsi *vsi;
+	int err = -ENOENT;
+
+	/* Do not update filters during reset */
+	if (ice_is_reset_in_progress(pf->state))
+		return -EBUSY;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return -EINVAL;
+
+	old_fltr = ice_fdir_find_fltr_by_idx(hw, fltr_idx);
+	if (old_fltr) {
+		err = ice_fdir_write_all_fltr(pf, old_fltr, false);
+		if (err)
+			return err;
+		ice_fdir_update_cntrs(hw, old_fltr->flow_type, false);
+		/* update sb-filters count, specific to ring->channel */
+		ice_update_per_q_fltr(vsi, old_fltr->orig_q_index, false);
+		if (!input && !hw->fdir_fltr_cnt[old_fltr->flow_type])
+			/* we just deleted the last filter of flow_type so we
+			 * should also delete the HW filter info.
+			 */
+			ice_fdir_do_rem_flow(pf, old_fltr->flow_type);
+		list_del(&old_fltr->fltr_node);
+		devm_kfree(ice_hw_to_dev(hw), old_fltr);
+	}
+	if (!input)
+		return err;
+	ice_fdir_list_add_fltr(hw, input);
+	/* update sb-filters count, specific to ring->channel */
+	ice_update_per_q_fltr(vsi, input->orig_q_index, true);
+	ice_fdir_update_cntrs(hw, input->flow_type, true);
+	return 0;
+}
+
+/**
+ * ice_del_fdir_ethtool - delete Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int val;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	/* Do not delete filters during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(ice_pf_to_dev(pf), "Device is resetting - deleting Flow Director filters not supported during reset\n");
+		return -EBUSY;
+	}
+
+	if (test_bit(ICE_FD_FLUSH_REQ, pf->state))
+		return -EBUSY;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	val = ice_fdir_update_list_entry(pf, NULL, fsp->location);
+	mutex_unlock(&hw->fdir_fltr_lock);
+
+	return val;
+}
+
+/**
+ * ice_update_ring_dest_vsi - update dest ring and dest VSI
+ * @vsi: pointer to target VSI
+ * @dest_vsi: ptr to dest VSI index
+ * @ring: ptr to dest ring
+ *
+ * This function updates destination VSI and queue if user specifies
+ * target queue which falls in channel's (aka ADQ) queue region
+ */
+static void
+ice_update_ring_dest_vsi(struct ice_vsi *vsi, u16 *dest_vsi, u32 *ring)
+{
+	struct ice_channel *ch;
+
+	list_for_each_entry(ch, &vsi->ch_list, list) {
+		if (!ch->ch_vsi)
+			continue;
+
+		/* make sure to locate corresponding channel based on "queue"
+		 * specified
+		 */
+		if ((*ring < ch->base_q) ||
+		    (*ring >= (ch->base_q + ch->num_rxq)))
+			continue;
+
+		/* update the dest_vsi based on channel */
+		*dest_vsi = ch->ch_vsi->idx;
+
+		/* update the "ring" to be correct based on channel */
+		*ring -= ch->base_q;
+	}
+}
+
+/**
+ * ice_set_fdir_input_set - Set the input set for Flow Director
+ * @vsi: pointer to target VSI
+ * @fsp: pointer to ethtool Rx flow specification
+ * @input: filter structure
+ */
+static int
+ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp,
+		       struct ice_fdir_fltr *input)
+{
+	u16 dest_vsi, q_index = 0;
+	u16 orig_q_index = 0;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int flow_type;
+	u8 dest_ctl;
+
+	if (!vsi || !fsp || !input)
+		return -EINVAL;
+
+	pf = vsi->back;
+	hw = &pf->hw;
+
+	dest_vsi = vsi->idx;
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+		if (vf) {
+			dev_err(ice_pf_to_dev(pf), "Failed to add filter. Flow director filters are not supported on VF queues.\n");
+			return -EINVAL;
+		}
+
+		if (ring >= vsi->num_rxq)
+			return -EINVAL;
+
+		orig_q_index = ring;
+		ice_update_ring_dest_vsi(vsi, &dest_vsi, &ring);
+		dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+		q_index = ring;
+	}
+
+	input->fltr_id = fsp->location;
+	input->q_index = q_index;
+	flow_type = fsp->flow_type & ~FLOW_EXT;
+
+	/* Record the original queue index as specified by user.
+	 * with channel configuration 'q_index' becomes relative
+	 * to TC (channel).
+	 */
+	input->orig_q_index = orig_q_index;
+	input->dest_vsi = dest_vsi;
+	input->dest_ctl = dest_ctl;
+	input->fltr_status = ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID;
+	input->cnt_index = ICE_FD_SB_STAT_IDX(hw->fd_ctr_base);
+	input->flow_type = ice_ethtool_flow_to_fltr(flow_type);
+
+	if (fsp->flow_type & FLOW_EXT) {
+		memcpy(input->ext_data.usr_def, fsp->h_ext.data,
+		       sizeof(input->ext_data.usr_def));
+		input->ext_data.vlan_type = fsp->h_ext.vlan_etype;
+		input->ext_data.vlan_tag = fsp->h_ext.vlan_tci;
+		memcpy(input->ext_mask.usr_def, fsp->m_ext.data,
+		       sizeof(input->ext_mask.usr_def));
+		input->ext_mask.vlan_type = fsp->m_ext.vlan_etype;
+		input->ext_mask.vlan_tag = fsp->m_ext.vlan_tci;
+	}
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		input->ip.v4.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+		input->ip.v4.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		input->ip.v4.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+		input->ip.v4.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		input->mask.v4.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+		input->mask.v4.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+		input->mask.v4.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+		input->mask.v4.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+		break;
+	case IPV4_USER_FLOW:
+		input->ip.v4.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+		input->ip.v4.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+		input->ip.v4.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+		input->ip.v4.proto = fsp->h_u.usr_ip4_spec.proto;
+		input->ip.v4.ip_ver = fsp->h_u.usr_ip4_spec.ip_ver;
+		input->ip.v4.tos = fsp->h_u.usr_ip4_spec.tos;
+		input->mask.v4.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+		input->mask.v4.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+		input->mask.v4.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+		input->mask.v4.proto = fsp->m_u.usr_ip4_spec.proto;
+		input->mask.v4.ip_ver = fsp->m_u.usr_ip4_spec.ip_ver;
+		input->mask.v4.tos = fsp->m_u.usr_ip4_spec.tos;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->ip.v6.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+		input->ip.v6.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		input->ip.v6.tc = fsp->h_u.tcp_ip6_spec.tclass;
+		memcpy(input->mask.v6.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->mask.v6.src_ip, fsp->m_u.tcp_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->mask.v6.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+		input->mask.v6.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+		input->mask.v6.tc = fsp->m_u.tcp_ip6_spec.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->ip.v6.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes;
+		input->ip.v6.tc = fsp->h_u.usr_ip6_spec.tclass;
+
+		/* if no protocol requested, use IPPROTO_NONE */
+		if (!fsp->m_u.usr_ip6_spec.l4_proto)
+			input->ip.v6.proto = IPPROTO_NONE;
+		else
+			input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto;
+
+		memcpy(input->mask.v6.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->mask.v6.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->mask.v6.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+		input->mask.v6.tc = fsp->m_u.usr_ip6_spec.tclass;
+		input->mask.v6.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+		break;
+	default:
+		/* not doing un-parsed flow types */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_add_fdir_ethtool - Add/Remove Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+	struct ice_rx_flow_userdef userdata;
+	struct ethtool_rx_flow_spec *fsp;
+	struct ice_fdir_fltr *input;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int fltrs_needed;
+	u16 tunnel_port;
+	int ret;
+
+	if (!vsi)
+		return -EINVAL;
+
+	pf = vsi->back;
+	hw = &pf->hw;
+	dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	/* Do not program filters during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(dev, "Device is resetting - adding Flow Director filters not supported during reset\n");
+		return -EBUSY;
+	}
+
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	if (ice_parse_rx_flow_user_data(fsp, &userdata))
+		return -EINVAL;
+
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return -EINVAL;
+
+	ret = ice_cfg_fdir_xtrct_seq(pf, fsp, &userdata);
+	if (ret)
+		return ret;
+
+	if (fsp->location >= ice_get_fdir_cnt_all(hw)) {
+		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
+		return -ENOSPC;
+	}
+
+	/* return error if not an update and no available filters */
+	fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port, TNL_ALL) ? 2 : 1;
+	if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) &&
+	    ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) {
+		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
+		return -ENOSPC;
+	}
+
+	input = devm_kzalloc(dev, sizeof(*input), GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	ret = ice_set_fdir_input_set(vsi, fsp, input);
+	if (ret)
+		goto free_input;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	if (ice_fdir_is_dup_fltr(hw, input)) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	if (userdata.flex_fltr) {
+		input->flex_fltr = true;
+		input->flex_word = cpu_to_be16(userdata.flex_word);
+		input->flex_offset = userdata.flex_offset;
+	}
+
+	input->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+
+	/* input struct is added to the HW filter list */
+	ret = ice_fdir_update_list_entry(pf, input, fsp->location);
+	if (ret)
+		goto release_lock;
+
+	ret = ice_fdir_write_all_fltr(pf, input, true);
+	if (ret)
+		goto remove_sw_rule;
+
+	goto release_lock;
+
+remove_sw_rule:
+	ice_fdir_update_cntrs(hw, input->flow_type, false);
+	/* update sb-filters count, specific to ring->channel */
+	ice_update_per_q_fltr(vsi, input->orig_q_index, false);
+	list_del(&input->fltr_node);
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+free_input:
+	if (ret)
+		devm_kfree(dev, input);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
new file mode 100644
index 0000000000..ae089d32ee
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -0,0 +1,1302 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice_common.h"
+
+/* These are training packet headers used to program flow director filters. */
+static const u8 ice_fdir_tcpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x28, 0x00, 0x01, 0x00, 0x00, 0x40, 0x06,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00,
+	0x20, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const u8 ice_fdir_udpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x20, 0x00, 0x00, 0x40, 0x00, 0x40, 0x84,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x10,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_udp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x58, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x28, 0x00, 0x00, 0x40, 0x00, 0x40, 0x06,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_icmp4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x1c, 0x00, 0x00, 0x40, 0x00, 0x40, 0x01,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_gtpu4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x44, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x08, 0x68, 0x08, 0x68, 0x00, 0x00,
+	0x00, 0x00, 0x34, 0xff, 0x00, 0x28, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0x02, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_l2tpv3_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x73,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_l2tpv3_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x73, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x32,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x32, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_ah_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x33,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_ipv6_ah_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x33, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_nat_t_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x11, 0x94, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_nat_t_esp_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x11, 0x94, 0x00, 0x00, 0x00, 0x08,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_node_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+	0x00, 0x00, 0x20, 0x00, 0x00, 0x10, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pfcp_session_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x2C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x22, 0x65, 0x22, 0x65, 0x00, 0x00,
+	0x00, 0x00, 0x21, 0x00, 0x00, 0x10, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_node_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+	0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00,
+	0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pfcp_session_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x18, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x65,
+	0x22, 0x65, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00,
+	0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_non_ip_l2_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x14, 0x06, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x50, 0x00, 0x20, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_udpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x0C, 0x84, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x3B, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x28, 0x00, 0x00, 0x40, 0x00,
+	0x40, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00,
+	0x40, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x52, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00,
+	0x40, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x46, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00,
+	0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_tcp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x6e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x14, 0x06, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x20, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x62, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x08, 0x11, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x66, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x84, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+/* Flow Director no-op training packet table */
+static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+		sizeof(ice_fdir_tcpv4_pkt), ice_fdir_tcpv4_pkt,
+		sizeof(ice_fdir_tcp4_tun_pkt), ice_fdir_tcp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+		sizeof(ice_fdir_udpv4_pkt), ice_fdir_udpv4_pkt,
+		sizeof(ice_fdir_udp4_tun_pkt), ice_fdir_udp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+		sizeof(ice_fdir_sctpv4_pkt), ice_fdir_sctpv4_pkt,
+		sizeof(ice_fdir_sctp4_tun_pkt), ice_fdir_sctp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+		sizeof(ice_fdir_ipv4_pkt), ice_fdir_ipv4_pkt,
+		sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+		sizeof(ice_fdir_udp4_gtpu4_pkt),
+		ice_fdir_udp4_gtpu4_pkt,
+		sizeof(ice_fdir_udp4_gtpu4_pkt),
+		ice_fdir_udp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+		sizeof(ice_fdir_tcp4_gtpu4_pkt),
+		ice_fdir_tcp4_gtpu4_pkt,
+		sizeof(ice_fdir_tcp4_gtpu4_pkt),
+		ice_fdir_tcp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+		sizeof(ice_fdir_icmp4_gtpu4_pkt),
+		ice_fdir_icmp4_gtpu4_pkt,
+		sizeof(ice_fdir_icmp4_gtpu4_pkt),
+		ice_fdir_icmp4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+		sizeof(ice_fdir_ipv4_gtpu4_pkt),
+		ice_fdir_ipv4_gtpu4_pkt,
+		sizeof(ice_fdir_ipv4_gtpu4_pkt),
+		ice_fdir_ipv4_gtpu4_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+		sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+		sizeof(ice_fdir_ipv4_l2tpv3_pkt), ice_fdir_ipv4_l2tpv3_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+		sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+		sizeof(ice_fdir_ipv6_l2tpv3_pkt), ice_fdir_ipv6_l2tpv3_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+		sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+		sizeof(ice_fdir_ipv4_esp_pkt), ice_fdir_ipv4_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+		sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+		sizeof(ice_fdir_ipv6_esp_pkt), ice_fdir_ipv6_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_AH,
+		sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+		sizeof(ice_fdir_ipv4_ah_pkt), ice_fdir_ipv4_ah_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_AH,
+		sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+		sizeof(ice_fdir_ipv6_ah_pkt), ice_fdir_ipv6_ah_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+		sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+		ice_fdir_ipv4_nat_t_esp_pkt,
+		sizeof(ice_fdir_ipv4_nat_t_esp_pkt),
+		ice_fdir_ipv4_nat_t_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+		sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+		ice_fdir_ipv6_nat_t_esp_pkt,
+		sizeof(ice_fdir_ipv6_nat_t_esp_pkt),
+		ice_fdir_ipv6_nat_t_esp_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+		sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+		ice_fdir_ipv4_pfcp_node_pkt,
+		sizeof(ice_fdir_ipv4_pfcp_node_pkt),
+		ice_fdir_ipv4_pfcp_node_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+		sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+		ice_fdir_ipv4_pfcp_session_pkt,
+		sizeof(ice_fdir_ipv4_pfcp_session_pkt),
+		ice_fdir_ipv4_pfcp_session_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+		sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+		ice_fdir_ipv6_pfcp_node_pkt,
+		sizeof(ice_fdir_ipv6_pfcp_node_pkt),
+		ice_fdir_ipv6_pfcp_node_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
+		sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+		ice_fdir_ipv6_pfcp_session_pkt,
+		sizeof(ice_fdir_ipv6_pfcp_session_pkt),
+		ice_fdir_ipv6_pfcp_session_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NON_IP_L2,
+		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+		sizeof(ice_fdir_non_ip_l2_pkt), ice_fdir_non_ip_l2_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+		sizeof(ice_fdir_tcpv6_pkt), ice_fdir_tcpv6_pkt,
+		sizeof(ice_fdir_tcp6_tun_pkt), ice_fdir_tcp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+		sizeof(ice_fdir_udpv6_pkt), ice_fdir_udpv6_pkt,
+		sizeof(ice_fdir_udp6_tun_pkt), ice_fdir_udp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+		sizeof(ice_fdir_sctpv6_pkt), ice_fdir_sctpv6_pkt,
+		sizeof(ice_fdir_sctp6_tun_pkt), ice_fdir_sctp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
+		sizeof(ice_fdir_ipv6_pkt), ice_fdir_ipv6_pkt,
+		sizeof(ice_fdir_ip6_tun_pkt), ice_fdir_ip6_tun_pkt,
+	},
+};
+
+#define ICE_FDIR_NUM_PKT ARRAY_SIZE(ice_fdir_pkt)
+
+/**
+ * ice_set_dflt_val_fd_desc
+ * @fd_fltr_ctx: pointer to fd filter descriptor
+ */
+static void ice_set_dflt_val_fd_desc(struct ice_fd_fltr_desc_ctx *fd_fltr_ctx)
+{
+	fd_fltr_ctx->comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+	fd_fltr_ctx->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+	fd_fltr_ctx->fd_space = ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST;
+	fd_fltr_ctx->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	fd_fltr_ctx->evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE;
+	fd_fltr_ctx->toq = ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX;
+	fd_fltr_ctx->toq_prio = ICE_FXD_FLTR_QW0_TO_Q_PRIO1;
+	fd_fltr_ctx->dpu_recipe = ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT;
+	fd_fltr_ctx->drop = ICE_FXD_FLTR_QW0_DROP_NO;
+	fd_fltr_ctx->flex_prio = ICE_FXD_FLTR_QW0_FLEX_PRI_NONE;
+	fd_fltr_ctx->flex_mdid = ICE_FXD_FLTR_QW0_FLEX_MDID0;
+	fd_fltr_ctx->flex_val = ICE_FXD_FLTR_QW0_FLEX_VAL0;
+	fd_fltr_ctx->dtype = ICE_TX_DESC_DTYPE_FLTR_PROG;
+	fd_fltr_ctx->desc_prof_prio = ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO;
+	fd_fltr_ctx->desc_prof = ICE_FXD_FLTR_QW1_PROF_ZERO;
+	fd_fltr_ctx->swap = ICE_FXD_FLTR_QW1_SWAP_SET;
+	fd_fltr_ctx->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_ONE;
+	fd_fltr_ctx->fdid_mdid = ICE_FXD_FLTR_QW1_FDID_MDID_FD;
+	fd_fltr_ctx->fdid = ICE_FXD_FLTR_QW1_FDID_ZERO;
+}
+
+/**
+ * ice_set_fd_desc_val
+ * @ctx: pointer to fd filter descriptor context
+ * @fdir_desc: populated with fd filter descriptor values
+ */
+static void
+ice_set_fd_desc_val(struct ice_fd_fltr_desc_ctx *ctx,
+		    struct ice_fltr_desc *fdir_desc)
+{
+	u64 qword;
+
+	/* prep QW0 of FD filter programming desc */
+	qword = ((u64)ctx->qindex << ICE_FXD_FLTR_QW0_QINDEX_S) &
+		ICE_FXD_FLTR_QW0_QINDEX_M;
+	qword |= ((u64)ctx->comp_q << ICE_FXD_FLTR_QW0_COMP_Q_S) &
+		 ICE_FXD_FLTR_QW0_COMP_Q_M;
+	qword |= ((u64)ctx->comp_report << ICE_FXD_FLTR_QW0_COMP_REPORT_S) &
+		 ICE_FXD_FLTR_QW0_COMP_REPORT_M;
+	qword |= ((u64)ctx->fd_space << ICE_FXD_FLTR_QW0_FD_SPACE_S) &
+		 ICE_FXD_FLTR_QW0_FD_SPACE_M;
+	qword |= ((u64)ctx->cnt_index << ICE_FXD_FLTR_QW0_STAT_CNT_S) &
+		 ICE_FXD_FLTR_QW0_STAT_CNT_M;
+	qword |= ((u64)ctx->cnt_ena << ICE_FXD_FLTR_QW0_STAT_ENA_S) &
+		 ICE_FXD_FLTR_QW0_STAT_ENA_M;
+	qword |= ((u64)ctx->evict_ena << ICE_FXD_FLTR_QW0_EVICT_ENA_S) &
+		 ICE_FXD_FLTR_QW0_EVICT_ENA_M;
+	qword |= ((u64)ctx->toq << ICE_FXD_FLTR_QW0_TO_Q_S) &
+		 ICE_FXD_FLTR_QW0_TO_Q_M;
+	qword |= ((u64)ctx->toq_prio << ICE_FXD_FLTR_QW0_TO_Q_PRI_S) &
+		 ICE_FXD_FLTR_QW0_TO_Q_PRI_M;
+	qword |= ((u64)ctx->dpu_recipe << ICE_FXD_FLTR_QW0_DPU_RECIPE_S) &
+		 ICE_FXD_FLTR_QW0_DPU_RECIPE_M;
+	qword |= ((u64)ctx->drop << ICE_FXD_FLTR_QW0_DROP_S) &
+		 ICE_FXD_FLTR_QW0_DROP_M;
+	qword |= ((u64)ctx->flex_prio << ICE_FXD_FLTR_QW0_FLEX_PRI_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_PRI_M;
+	qword |= ((u64)ctx->flex_mdid << ICE_FXD_FLTR_QW0_FLEX_MDID_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_MDID_M;
+	qword |= ((u64)ctx->flex_val << ICE_FXD_FLTR_QW0_FLEX_VAL_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_VAL_M;
+	fdir_desc->qidx_compq_space_stat = cpu_to_le64(qword);
+
+	/* prep QW1 of FD filter programming desc */
+	qword = ((u64)ctx->dtype << ICE_FXD_FLTR_QW1_DTYPE_S) &
+		ICE_FXD_FLTR_QW1_DTYPE_M;
+	qword |= ((u64)ctx->pcmd << ICE_FXD_FLTR_QW1_PCMD_S) &
+		 ICE_FXD_FLTR_QW1_PCMD_M;
+	qword |= ((u64)ctx->desc_prof_prio << ICE_FXD_FLTR_QW1_PROF_PRI_S) &
+		 ICE_FXD_FLTR_QW1_PROF_PRI_M;
+	qword |= ((u64)ctx->desc_prof << ICE_FXD_FLTR_QW1_PROF_S) &
+		 ICE_FXD_FLTR_QW1_PROF_M;
+	qword |= ((u64)ctx->fd_vsi << ICE_FXD_FLTR_QW1_FD_VSI_S) &
+		 ICE_FXD_FLTR_QW1_FD_VSI_M;
+	qword |= ((u64)ctx->swap << ICE_FXD_FLTR_QW1_SWAP_S) &
+		 ICE_FXD_FLTR_QW1_SWAP_M;
+	qword |= ((u64)ctx->fdid_prio << ICE_FXD_FLTR_QW1_FDID_PRI_S) &
+		 ICE_FXD_FLTR_QW1_FDID_PRI_M;
+	qword |= ((u64)ctx->fdid_mdid << ICE_FXD_FLTR_QW1_FDID_MDID_S) &
+		 ICE_FXD_FLTR_QW1_FDID_MDID_M;
+	qword |= ((u64)ctx->fdid << ICE_FXD_FLTR_QW1_FDID_S) &
+		 ICE_FXD_FLTR_QW1_FDID_M;
+	fdir_desc->dtype_cmd_vsi_fdid = cpu_to_le64(qword);
+}
+
+/**
+ * ice_fdir_get_prgm_desc - set a fdir descriptor from a fdir filter struct
+ * @hw: pointer to the hardware structure
+ * @input: filter
+ * @fdesc: filter descriptor
+ * @add: if add is true, this is an add operation, false implies delete
+ */
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+		       struct ice_fltr_desc *fdesc, bool add)
+{
+	struct ice_fd_fltr_desc_ctx fdir_fltr_ctx = { 0 };
+
+	/* set default context info */
+	ice_set_dflt_val_fd_desc(&fdir_fltr_ctx);
+
+	/* change sideband filtering values */
+	fdir_fltr_ctx.fdid = input->fltr_id;
+	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES;
+		fdir_fltr_ctx.qindex = 0;
+	} else if (input->dest_ctl ==
+		   ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER) {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+		fdir_fltr_ctx.qindex = 0;
+	} else {
+		if (input->dest_ctl ==
+		    ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP)
+			fdir_fltr_ctx.toq = input->q_region;
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+		fdir_fltr_ctx.qindex = input->q_index;
+	}
+	fdir_fltr_ctx.cnt_ena = input->cnt_ena;
+	fdir_fltr_ctx.cnt_index = input->cnt_index;
+	fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi);
+	fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE;
+	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER)
+		fdir_fltr_ctx.toq_prio = 0;
+	else
+		fdir_fltr_ctx.toq_prio = 3;
+	fdir_fltr_ctx.pcmd = add ? ICE_FXD_FLTR_QW1_PCMD_ADD :
+		ICE_FXD_FLTR_QW1_PCMD_REMOVE;
+	fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET;
+	fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+	fdir_fltr_ctx.comp_report = input->comp_report;
+	fdir_fltr_ctx.fdid_prio = input->fdid_prio;
+	fdir_fltr_ctx.desc_prof = 1;
+	fdir_fltr_ctx.desc_prof_prio = 3;
+	ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc);
+}
+
+/**
+ * ice_alloc_fd_res_cntr - obtain counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ */
+int ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_free_fd_res_cntr - Free counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: counter index to be freed
+ */
+int ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id)
+{
+	return ice_free_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+				 ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_alloc_fd_guar_item - allocate resource for FD guaranteed entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+int ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+				  cntr_id);
+}
+
+/**
+ * ice_alloc_fd_shrd_item - allocate resource for flow director shared entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+int ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+				  cntr_id);
+}
+
+/**
+ * ice_get_fdir_cnt_all - get the number of Flow Director filters
+ * @hw: hardware data structure
+ *
+ * Returns the number of filters available on device
+ */
+int ice_get_fdir_cnt_all(struct ice_hw *hw)
+{
+	return hw->func_caps.fd_fltr_guar + hw->func_caps.fd_fltr_best_effort;
+}
+
+/**
+ * ice_pkt_insert_ipv6_addr - insert a be32 IPv6 address into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @addr: IPv6 address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr)
+{
+	int idx;
+
+	for (idx = 0; idx < ICE_IPV6_ADDR_LEN_AS_U32; idx++)
+		memcpy(pkt + offset + idx * sizeof(*addr), &addr[idx],
+		       sizeof(*addr));
+}
+
+/**
+ * ice_pkt_insert_u6_qfi - insert a u6 value QFI into a memory buffer for GTPU
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting QFI (6 bits) for GTPU.
+ */
+static void ice_pkt_insert_u6_qfi(u8 *pkt, int offset, u8 data)
+{
+	u8 ret;
+
+	ret = (data & 0x3F) + (*(pkt + offset) & 0xC0);
+	memcpy(pkt + offset, &ret, sizeof(ret));
+}
+
+/**
+ * ice_pkt_insert_u8 - insert a u8 value into a memory buffer.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u8(u8 *pkt, int offset, u8 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u8_tc - insert a u8 value into a memory buffer for TC ipv6.
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 8 bit value to convert and insert into pkt at offset
+ *
+ * This function is designed for inserting Traffic Class (TC) for IPv6,
+ * since that TC is not aligned in number of bytes. Here we split it out
+ * into two part and fill each byte with data copy from pkt, then insert
+ * the two bytes data one by one.
+ */
+static void ice_pkt_insert_u8_tc(u8 *pkt, int offset, u8 data)
+{
+	u8 high, low;
+
+	high = (data >> 4) + (*(pkt + offset) & 0xF0);
+	memcpy(pkt + offset, &high, sizeof(high));
+
+	low = (*(pkt + offset + 1) & 0x0F) + ((data & 0x0F) << 4);
+	memcpy(pkt + offset + 1, &low, sizeof(low));
+}
+
+/**
+ * ice_pkt_insert_u16 - insert a be16 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 16 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u16(u8 *pkt, int offset, __be16 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u32 - insert a be32 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 32 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u32(u8 *pkt, int offset, __be32 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_mac_addr - insert a MAC addr into a memory buffer.
+ * @pkt: packet buffer
+ * @addr: MAC address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_mac_addr(u8 *pkt, u8 *addr)
+{
+	ether_addr_copy(pkt, addr);
+}
+
+/**
+ * ice_fdir_get_gen_prgm_pkt - generate a training packet
+ * @hw: pointer to the hardware structure
+ * @input: flow director filter data structure
+ * @pkt: pointer to return filter packet
+ * @frag: generate a fragment packet
+ * @tun: true implies generate a tunnel packet
+ */
+int
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+			  u8 *pkt, bool frag, bool tun)
+{
+	enum ice_fltr_ptype flow;
+	u16 tnl_port;
+	u8 *loc;
+	u16 idx;
+
+	if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+		switch (input->ip.v4.proto) {
+		case IPPROTO_TCP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+			break;
+		case IPPROTO_UDP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+			break;
+		case IPPROTO_SCTP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+			break;
+		default:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+			break;
+		}
+	} else if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+		switch (input->ip.v6.proto) {
+		case IPPROTO_TCP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+			break;
+		case IPPROTO_UDP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+			break;
+		case IPPROTO_SCTP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+			break;
+		default:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+			break;
+		}
+	} else {
+		flow = input->flow_type;
+	}
+
+	for (idx = 0; idx < ICE_FDIR_NUM_PKT; idx++)
+		if (ice_fdir_pkt[idx].flow == flow)
+			break;
+	if (idx == ICE_FDIR_NUM_PKT)
+		return -EINVAL;
+	if (!tun) {
+		memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len);
+		loc = pkt;
+	} else {
+		if (!ice_get_open_tunnel_port(hw, &tnl_port, TNL_ALL))
+			return -ENOENT;
+		if (!ice_fdir_pkt[idx].tun_pkt)
+			return -EINVAL;
+		memcpy(pkt, ice_fdir_pkt[idx].tun_pkt,
+		       ice_fdir_pkt[idx].tun_pkt_len);
+		ice_pkt_insert_u16(pkt, ICE_IPV4_UDP_DST_PORT_OFFSET,
+				   htons(tnl_port));
+		loc = &pkt[ICE_FDIR_TUN_PKT_OFF];
+	}
+
+	/* Reverse the src and dst, since the HW expects them to be from Tx
+	 * perspective. The input from user is from Rx filter perspective.
+	 */
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_DST_PORT_OFFSET,
+				   input->ip.v4.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		if (frag)
+			loc[20] = ICE_FDIR_IPV4_PKT_FLAG_MF;
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_DST_PORT_OFFSET,
+				   input->ip.v4.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		ice_pkt_insert_mac_addr(loc + ETH_ALEN,
+					input->ext_data.src_mac);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_DST_PORT_OFFSET,
+				   input->ip.v4.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TOS_OFFSET, input->ip.v4.tos);
+		ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl);
+		ice_pkt_insert_u8(loc, ICE_IPV4_PROTO_OFFSET,
+				  input->ip.v4.proto);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_GTPU_TEID_OFFSET,
+				   input->gtpu_data.teid);
+		ice_pkt_insert_u6_qfi(loc, ICE_IPV4_GTPU_QFI_OFFSET,
+				      input->gtpu_data.qfi);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+		ice_pkt_insert_u32(loc, ICE_IPV4_L2TPV3_SESS_ID_OFFSET,
+				   input->l2tpv3_data.session_id);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+		ice_pkt_insert_u32(loc, ICE_IPV6_L2TPV3_SESS_ID_OFFSET,
+				   input->l2tpv3_data.session_id);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_ESP_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV6_ESP_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+		ice_pkt_insert_u32(loc, ICE_IPV4_AH_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+		ice_pkt_insert_u32(loc, ICE_IPV6_AH_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.v4.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.v4.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_NAT_T_ESP_SPI_OFFSET,
+				   input->ip.v4.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV6_NAT_T_ESP_SPI_OFFSET,
+				   input->ip.v6.sec_parm_idx);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+				   input->ip.v4.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NON_IP_L2:
+		ice_pkt_insert_u16(loc, ICE_MAC_ETHTYPE_OFFSET,
+				   input->ext_data.ether_type);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u8_tc(loc, ICE_IPV6_TC_OFFSET, input->ip.v6.tc);
+		ice_pkt_insert_u8(loc, ICE_IPV6_HLIM_OFFSET, input->ip.v6.hlim);
+		ice_pkt_insert_u8(loc, ICE_IPV6_PROTO_OFFSET,
+				  input->ip.v6.proto);
+		ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (input->flex_fltr)
+		ice_pkt_insert_u16(loc, input->flex_offset, input->flex_word);
+
+	return 0;
+}
+
+/**
+ * ice_fdir_has_frag - does flow type have 2 ptypes
+ * @flow: flow ptype
+ *
+ * returns true is there is a fragment packet for this ptype
+ */
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow)
+{
+	if (flow == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+		return true;
+	else
+		return false;
+}
+
+/**
+ * ice_fdir_find_fltr_by_idx - find filter with idx
+ * @hw: pointer to hardware structure
+ * @fltr_idx: index to find.
+ *
+ * Returns pointer to filter if found or null
+ */
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx)
+{
+	struct ice_fdir_fltr *rule;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		/* rule ID found in the list */
+		if (fltr_idx == rule->fltr_id)
+			return rule;
+		if (fltr_idx < rule->fltr_id)
+			break;
+	}
+	return NULL;
+}
+
+/**
+ * ice_fdir_list_add_fltr - add a new node to the flow director filter list
+ * @hw: hardware structure
+ * @fltr: filter node to add to structure
+ */
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *fltr)
+{
+	struct ice_fdir_fltr *rule, *parent = NULL;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		/* rule ID found or pass its spot in the list */
+		if (rule->fltr_id >= fltr->fltr_id)
+			break;
+		parent = rule;
+	}
+
+	if (parent)
+		list_add(&fltr->fltr_node, &parent->fltr_node);
+	else
+		list_add(&fltr->fltr_node, &hw->fdir_list_head);
+}
+
+/**
+ * ice_fdir_update_cntrs - increment / decrement filter counter
+ * @hw: pointer to hardware structure
+ * @flow: filter flow type
+ * @add: true implies filters added
+ */
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add)
+{
+	int incr;
+
+	incr = add ? 1 : -1;
+	hw->fdir_active_fltr += incr;
+
+	if (flow == ICE_FLTR_PTYPE_NONF_NONE || flow >= ICE_FLTR_PTYPE_MAX)
+		ice_debug(hw, ICE_DBG_SW, "Unknown filter type %d\n", flow);
+	else
+		hw->fdir_fltr_cnt[flow] += incr;
+}
+
+/**
+ * ice_cmp_ipv6_addr - compare 2 IP v6 addresses
+ * @a: IP v6 address
+ * @b: IP v6 address
+ *
+ * Returns 0 on equal, returns non-0 if different
+ */
+static int ice_cmp_ipv6_addr(__be32 *a, __be32 *b)
+{
+	return memcmp(a, b, 4 * sizeof(__be32));
+}
+
+/**
+ * ice_fdir_comp_rules - compare 2 filters
+ * @a: a Flow Director filter data structure
+ * @b: a Flow Director filter data structure
+ * @v6: bool true if v6 filter
+ *
+ * Returns true if the filters match
+ */
+static bool
+ice_fdir_comp_rules(struct ice_fdir_fltr *a,  struct ice_fdir_fltr *b, bool v6)
+{
+	enum ice_fltr_ptype flow_type = a->flow_type;
+
+	/* The calling function already checks that the two filters have the
+	 * same flow_type.
+	 */
+	if (!v6) {
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) {
+			if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+			    a->ip.v4.src_ip == b->ip.v4.src_ip &&
+			    a->ip.v4.dst_port == b->ip.v4.dst_port &&
+			    a->ip.v4.src_port == b->ip.v4.src_port)
+				return true;
+		} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+			if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+			    a->ip.v4.src_ip == b->ip.v4.src_ip &&
+			    a->ip.v4.l4_header == b->ip.v4.l4_header &&
+			    a->ip.v4.proto == b->ip.v4.proto &&
+			    a->ip.v4.ip_ver == b->ip.v4.ip_ver &&
+			    a->ip.v4.tos == b->ip.v4.tos)
+				return true;
+		}
+	} else {
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_SCTP) {
+			if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+			    a->ip.v6.src_port == b->ip.v6.src_port &&
+			    !ice_cmp_ipv6_addr(a->ip.v6.dst_ip,
+					       b->ip.v6.dst_ip) &&
+			    !ice_cmp_ipv6_addr(a->ip.v6.src_ip,
+					       b->ip.v6.src_ip))
+				return true;
+		} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+			if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+			    a->ip.v6.src_port == b->ip.v6.src_port)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+/**
+ * ice_fdir_is_dup_fltr - test if filter is already in list for PF
+ * @hw: hardware data structure
+ * @input: Flow Director filter data structure
+ *
+ * Returns true if the filter is found in the list
+ */
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input)
+{
+	struct ice_fdir_fltr *rule;
+	bool ret = false;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		enum ice_fltr_ptype flow_type;
+
+		if (rule->flow_type != input->flow_type)
+			continue;
+
+		flow_type = input->flow_type;
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+			ret = ice_fdir_comp_rules(rule, input, false);
+		else
+			ret = ice_fdir_comp_rules(rule, input, true);
+		if (ret) {
+			if (rule->fltr_id == input->fltr_id &&
+			    rule->q_index != input->q_index)
+				ret = false;
+			else
+				break;
+		}
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
new file mode 100644
index 0000000000..1b9b844906
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FDIR_H_
+#define _ICE_FDIR_H_
+
+#define ICE_FDIR_TUN_PKT_OFF		50
+#define ICE_FDIR_MAX_RAW_PKT_SIZE	(512 + ICE_FDIR_TUN_PKT_OFF)
+
+/* macros for offsets into packets for flow director programming */
+#define ICE_IPV4_SRC_ADDR_OFFSET	26
+#define ICE_IPV4_DST_ADDR_OFFSET	30
+#define ICE_IPV4_TCP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_TCP_DST_PORT_OFFSET	36
+#define ICE_IPV4_UDP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_UDP_DST_PORT_OFFSET	36
+#define ICE_IPV4_SCTP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_SCTP_DST_PORT_OFFSET	36
+#define ICE_IPV4_PROTO_OFFSET		23
+#define ICE_IPV6_SRC_ADDR_OFFSET	22
+#define ICE_IPV6_DST_ADDR_OFFSET	38
+#define ICE_IPV6_TCP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_TCP_DST_PORT_OFFSET	56
+#define ICE_IPV6_UDP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_UDP_DST_PORT_OFFSET	56
+#define ICE_IPV6_SCTP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_SCTP_DST_PORT_OFFSET	56
+#define ICE_MAC_ETHTYPE_OFFSET		12
+#define ICE_IPV4_TOS_OFFSET		15
+#define ICE_IPV4_TTL_OFFSET		22
+#define ICE_IPV6_TC_OFFSET		14
+#define ICE_IPV6_HLIM_OFFSET		21
+#define ICE_IPV6_PROTO_OFFSET		20
+#define ICE_IPV4_GTPU_TEID_OFFSET	46
+#define ICE_IPV4_GTPU_QFI_OFFSET	56
+#define ICE_IPV4_L2TPV3_SESS_ID_OFFSET	34
+#define ICE_IPV6_L2TPV3_SESS_ID_OFFSET	54
+#define ICE_IPV4_ESP_SPI_OFFSET		34
+#define ICE_IPV6_ESP_SPI_OFFSET		54
+#define ICE_IPV4_AH_SPI_OFFSET		38
+#define ICE_IPV6_AH_SPI_OFFSET		58
+#define ICE_IPV4_NAT_T_ESP_SPI_OFFSET	42
+#define ICE_IPV6_NAT_T_ESP_SPI_OFFSET	62
+
+#define ICE_FDIR_MAX_FLTRS		16384
+
+/* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF
+ * requests that the packet not be fragmented. MF indicates that a packet has
+ * been fragmented.
+ */
+#define ICE_FDIR_IPV4_PKT_FLAG_MF		0x20
+
+enum ice_fltr_prgm_desc_dest {
+	ICE_FLTR_PRGM_DESC_DEST_DROP_PKT,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER,
+};
+
+enum ice_fltr_prgm_desc_fd_status {
+	ICE_FLTR_PRGM_DESC_FD_STATUS_NONE,
+	ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID,
+};
+
+/* Flow Director (FD) Filter Programming descriptor */
+struct ice_fd_fltr_desc_ctx {
+	u32 fdid;
+	u16 qindex;
+	u16 cnt_index;
+	u16 fd_vsi;
+	u16 flex_val;
+	u8 comp_q;
+	u8 comp_report;
+	u8 fd_space;
+	u8 cnt_ena;
+	u8 evict_ena;
+	u8 toq;
+	u8 toq_prio;
+	u8 dpu_recipe;
+	u8 drop;
+	u8 flex_prio;
+	u8 flex_mdid;
+	u8 dtype;
+	u8 pcmd;
+	u8 desc_prof_prio;
+	u8 desc_prof;
+	u8 swap;
+	u8 fdid_prio;
+	u8 fdid_mdid;
+};
+
+#define ICE_FLTR_PRGM_FLEX_WORD_SIZE	sizeof(__be16)
+
+struct ice_rx_flow_userdef {
+	u16 flex_word;
+	u16 flex_offset;
+	u16 flex_fltr;
+};
+
+struct ice_fdir_v4 {
+	__be32 dst_ip;
+	__be32 src_ip;
+	__be16 dst_port;
+	__be16 src_port;
+	__be32 l4_header;
+	__be32 sec_parm_idx;	/* security parameter index */
+	u8 tos;
+	u8 ip_ver;
+	u8 proto;
+	u8 ttl;
+};
+
+#define ICE_IPV6_ADDR_LEN_AS_U32		4
+
+struct ice_fdir_v6 {
+	__be32 dst_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+	__be32 src_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+	__be16 dst_port;
+	__be16 src_port;
+	__be32 l4_header; /* next header */
+	__be32 sec_parm_idx; /* security parameter index */
+	u8 tc;
+	u8 proto;
+	u8 hlim;
+};
+
+struct ice_fdir_udp_gtp {
+	u8 flags;
+	u8 msg_type;
+	__be16 rsrvd_len;
+	__be32 teid;
+	__be16 rsrvd_seq_nbr;
+	u8 rsrvd_n_pdu_nbr;
+	u8 rsrvd_next_ext_type;
+	u8 rsvrd_ext_len;
+	u8	pdu_type:4,
+		spare:4;
+	u8	ppp:1,
+		rqi:1,
+		qfi:6;
+	u32 rsvrd;
+	u8 next_ext;
+};
+
+struct ice_fdir_l2tpv3 {
+	__be32 session_id;
+};
+
+struct ice_fdir_extra {
+	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
+	u8 src_mac[ETH_ALEN];	/* src MAC address */
+	__be16 ether_type;	/* for NON_IP_L2 */
+	u32 usr_def[2];		/* user data */
+	__be16 vlan_type;	/* VLAN ethertype */
+	__be16 vlan_tag;	/* VLAN tag info */
+};
+
+struct ice_fdir_fltr {
+	struct list_head fltr_node;
+	enum ice_fltr_ptype flow_type;
+
+	union {
+		struct ice_fdir_v4 v4;
+		struct ice_fdir_v6 v6;
+	} ip, mask;
+
+	struct ice_fdir_udp_gtp gtpu_data;
+	struct ice_fdir_udp_gtp gtpu_mask;
+
+	struct ice_fdir_l2tpv3 l2tpv3_data;
+	struct ice_fdir_l2tpv3 l2tpv3_mask;
+
+	struct ice_fdir_extra ext_data;
+	struct ice_fdir_extra ext_mask;
+
+	/* flex byte filter data */
+	__be16 flex_word;
+	/* queue region size (=2^q_region) */
+	u8 q_region;
+	u16 flex_offset;
+	u16 flex_fltr;
+
+	/* filter control */
+	u16 q_index;
+	u16 orig_q_index;
+	u16 dest_vsi;
+	u8 dest_ctl;
+	u8 cnt_ena;
+	u8 fltr_status;
+	u16 cnt_index;
+	u32 fltr_id;
+	u8 fdid_prio;
+	u8 comp_report;
+};
+
+/* Dummy packet filter definition structure */
+struct ice_fdir_base_pkt {
+	enum ice_fltr_ptype flow;
+	u16 pkt_len;
+	const u8 *pkt;
+	u16 tun_pkt_len;
+	const u8 *tun_pkt;
+};
+
+int ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id);
+int ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id);
+int ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+int ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+		       struct ice_fltr_desc *fdesc, bool add);
+int
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+			  u8 *pkt, bool frag, bool tun);
+int ice_get_fdir_cnt_all(struct ice_hw *hw);
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow);
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx);
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add);
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
+#endif /* _ICE_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
new file mode 100644
index 0000000000..5ce4139659
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -0,0 +1,4225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flex_pipe.h"
+#include "ice_flow.h"
+#include "ice.h"
+
+static const u32 ice_sect_lkup[ICE_BLK_COUNT][ICE_SECT_COUNT] = {
+	/* SWITCH */
+	{
+		ICE_SID_XLT0_SW,
+		ICE_SID_XLT_KEY_BUILDER_SW,
+		ICE_SID_XLT1_SW,
+		ICE_SID_XLT2_SW,
+		ICE_SID_PROFID_TCAM_SW,
+		ICE_SID_PROFID_REDIR_SW,
+		ICE_SID_FLD_VEC_SW,
+		ICE_SID_CDID_KEY_BUILDER_SW,
+		ICE_SID_CDID_REDIR_SW
+	},
+
+	/* ACL */
+	{
+		ICE_SID_XLT0_ACL,
+		ICE_SID_XLT_KEY_BUILDER_ACL,
+		ICE_SID_XLT1_ACL,
+		ICE_SID_XLT2_ACL,
+		ICE_SID_PROFID_TCAM_ACL,
+		ICE_SID_PROFID_REDIR_ACL,
+		ICE_SID_FLD_VEC_ACL,
+		ICE_SID_CDID_KEY_BUILDER_ACL,
+		ICE_SID_CDID_REDIR_ACL
+	},
+
+	/* FD */
+	{
+		ICE_SID_XLT0_FD,
+		ICE_SID_XLT_KEY_BUILDER_FD,
+		ICE_SID_XLT1_FD,
+		ICE_SID_XLT2_FD,
+		ICE_SID_PROFID_TCAM_FD,
+		ICE_SID_PROFID_REDIR_FD,
+		ICE_SID_FLD_VEC_FD,
+		ICE_SID_CDID_KEY_BUILDER_FD,
+		ICE_SID_CDID_REDIR_FD
+	},
+
+	/* RSS */
+	{
+		ICE_SID_XLT0_RSS,
+		ICE_SID_XLT_KEY_BUILDER_RSS,
+		ICE_SID_XLT1_RSS,
+		ICE_SID_XLT2_RSS,
+		ICE_SID_PROFID_TCAM_RSS,
+		ICE_SID_PROFID_REDIR_RSS,
+		ICE_SID_FLD_VEC_RSS,
+		ICE_SID_CDID_KEY_BUILDER_RSS,
+		ICE_SID_CDID_REDIR_RSS
+	},
+
+	/* PE */
+	{
+		ICE_SID_XLT0_PE,
+		ICE_SID_XLT_KEY_BUILDER_PE,
+		ICE_SID_XLT1_PE,
+		ICE_SID_XLT2_PE,
+		ICE_SID_PROFID_TCAM_PE,
+		ICE_SID_PROFID_REDIR_PE,
+		ICE_SID_FLD_VEC_PE,
+		ICE_SID_CDID_KEY_BUILDER_PE,
+		ICE_SID_CDID_REDIR_PE
+	}
+};
+
+/**
+ * ice_sect_id - returns section ID
+ * @blk: block type
+ * @sect: section type
+ *
+ * This helper function returns the proper section ID given a block type and a
+ * section type.
+ */
+static u32 ice_sect_id(enum ice_block blk, enum ice_sect sect)
+{
+	return ice_sect_lkup[blk][sect];
+}
+
+/**
+ * ice_hw_ptype_ena - check if the PTYPE is enabled or not
+ * @hw: pointer to the HW structure
+ * @ptype: the hardware PTYPE
+ */
+bool ice_hw_ptype_ena(struct ice_hw *hw, u16 ptype)
+{
+	return ptype < ICE_FLOW_PTYPE_MAX &&
+	       test_bit(ptype, hw->hw_ptype);
+}
+
+/* Key creation */
+
+#define ICE_DC_KEY	0x1	/* don't care */
+#define ICE_DC_KEYINV	0x1
+#define ICE_NM_KEY	0x0	/* never match */
+#define ICE_NM_KEYINV	0x0
+#define ICE_0_KEY	0x1	/* match 0 */
+#define ICE_0_KEYINV	0x0
+#define ICE_1_KEY	0x0	/* match 1 */
+#define ICE_1_KEYINV	0x1
+
+/**
+ * ice_gen_key_word - generate 16-bits of a key/mask word
+ * @val: the value
+ * @valid: valid bits mask (change only the valid bits)
+ * @dont_care: don't care mask
+ * @nvr_mtch: never match mask
+ * @key: pointer to an array of where the resulting key portion
+ * @key_inv: pointer to an array of where the resulting key invert portion
+ *
+ * This function generates 16-bits from a 8-bit value, an 8-bit don't care mask
+ * and an 8-bit never match mask. The 16-bits of output are divided into 8 bits
+ * of key and 8 bits of key invert.
+ *
+ *     '0' =    b01, always match a 0 bit
+ *     '1' =    b10, always match a 1 bit
+ *     '?' =    b11, don't care bit (always matches)
+ *     '~' =    b00, never match bit
+ *
+ * Input:
+ *          val:         b0  1  0  1  0  1
+ *          dont_care:   b0  0  1  1  0  0
+ *          never_mtch:  b0  0  0  0  1  1
+ *          ------------------------------
+ * Result:  key:        b01 10 11 11 00 00
+ */
+static int
+ice_gen_key_word(u8 val, u8 valid, u8 dont_care, u8 nvr_mtch, u8 *key,
+		 u8 *key_inv)
+{
+	u8 in_key = *key, in_key_inv = *key_inv;
+	u8 i;
+
+	/* 'dont_care' and 'nvr_mtch' masks cannot overlap */
+	if ((dont_care ^ nvr_mtch) != (dont_care | nvr_mtch))
+		return -EIO;
+
+	*key = 0;
+	*key_inv = 0;
+
+	/* encode the 8 bits into 8-bit key and 8-bit key invert */
+	for (i = 0; i < 8; i++) {
+		*key >>= 1;
+		*key_inv >>= 1;
+
+		if (!(valid & 0x1)) { /* change only valid bits */
+			*key |= (in_key & 0x1) << 7;
+			*key_inv |= (in_key_inv & 0x1) << 7;
+		} else if (dont_care & 0x1) { /* don't care bit */
+			*key |= ICE_DC_KEY << 7;
+			*key_inv |= ICE_DC_KEYINV << 7;
+		} else if (nvr_mtch & 0x1) { /* never match bit */
+			*key |= ICE_NM_KEY << 7;
+			*key_inv |= ICE_NM_KEYINV << 7;
+		} else if (val & 0x01) { /* exact 1 match */
+			*key |= ICE_1_KEY << 7;
+			*key_inv |= ICE_1_KEYINV << 7;
+		} else { /* exact 0 match */
+			*key |= ICE_0_KEY << 7;
+			*key_inv |= ICE_0_KEYINV << 7;
+		}
+
+		dont_care >>= 1;
+		nvr_mtch >>= 1;
+		valid >>= 1;
+		val >>= 1;
+		in_key >>= 1;
+		in_key_inv >>= 1;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_bits_max_set - determine if the number of bits set is within a maximum
+ * @mask: pointer to the byte array which is the mask
+ * @size: the number of bytes in the mask
+ * @max: the max number of set bits
+ *
+ * This function determines if there are at most 'max' number of bits set in an
+ * array. Returns true if the number for bits set is <= max or will return false
+ * otherwise.
+ */
+static bool ice_bits_max_set(const u8 *mask, u16 size, u16 max)
+{
+	u16 count = 0;
+	u16 i;
+
+	/* check each byte */
+	for (i = 0; i < size; i++) {
+		/* if 0, go to next byte */
+		if (!mask[i])
+			continue;
+
+		/* We know there is at least one set bit in this byte because of
+		 * the above check; if we already have found 'max' number of
+		 * bits set, then we can return failure now.
+		 */
+		if (count == max)
+			return false;
+
+		/* count the bits in this byte, checking threshold */
+		count += hweight8(mask[i]);
+		if (count > max)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_set_key - generate a variable sized key with multiples of 16-bits
+ * @key: pointer to where the key will be stored
+ * @size: the size of the complete key in bytes (must be even)
+ * @val: array of 8-bit values that makes up the value portion of the key
+ * @upd: array of 8-bit masks that determine what key portion to update
+ * @dc: array of 8-bit masks that make up the don't care mask
+ * @nm: array of 8-bit masks that make up the never match mask
+ * @off: the offset of the first byte in the key to update
+ * @len: the number of bytes in the key update
+ *
+ * This function generates a key from a value, a don't care mask and a never
+ * match mask.
+ * upd, dc, and nm are optional parameters, and can be NULL:
+ *	upd == NULL --> upd mask is all 1's (update all bits)
+ *	dc == NULL --> dc mask is all 0's (no don't care bits)
+ *	nm == NULL --> nm mask is all 0's (no never match bits)
+ */
+static int
+ice_set_key(u8 *key, u16 size, u8 *val, u8 *upd, u8 *dc, u8 *nm, u16 off,
+	    u16 len)
+{
+	u16 half_size;
+	u16 i;
+
+	/* size must be a multiple of 2 bytes. */
+	if (size % 2)
+		return -EIO;
+
+	half_size = size / 2;
+	if (off + len > half_size)
+		return -EIO;
+
+	/* Make sure at most one bit is set in the never match mask. Having more
+	 * than one never match mask bit set will cause HW to consume excessive
+	 * power otherwise; this is a power management efficiency check.
+	 */
+#define ICE_NVR_MTCH_BITS_MAX	1
+	if (nm && !ice_bits_max_set(nm, len, ICE_NVR_MTCH_BITS_MAX))
+		return -EIO;
+
+	for (i = 0; i < len; i++)
+		if (ice_gen_key_word(val[i], upd ? upd[i] : 0xff,
+				     dc ? dc[i] : 0, nm ? nm[i] : 0,
+				     key + off + i, key + half_size + off + i))
+			return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_acquire_change_lock
+ * @hw: pointer to the HW structure
+ * @access: access type (read or write)
+ *
+ * This function will request ownership of the change lock.
+ */
+int
+ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access)
+{
+	return ice_acquire_res(hw, ICE_CHANGE_LOCK_RES_ID, access,
+			       ICE_CHANGE_LOCK_TIMEOUT);
+}
+
+/**
+ * ice_release_change_lock
+ * @hw: pointer to the HW structure
+ *
+ * This function will release the change lock using the proper Admin Command.
+ */
+void ice_release_change_lock(struct ice_hw *hw)
+{
+	ice_release_res(hw, ICE_CHANGE_LOCK_RES_ID);
+}
+
+/**
+ * ice_get_open_tunnel_port - retrieve an open tunnel port
+ * @hw: pointer to the HW structure
+ * @port: returns open port
+ * @type: type of tunnel, can be TNL_LAST if it doesn't matter
+ */
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port,
+			 enum ice_tunnel_type type)
+{
+	bool res = false;
+	u16 i;
+
+	mutex_lock(&hw->tnl_lock);
+
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port &&
+		    (type == TNL_LAST || type == hw->tnl.tbl[i].type)) {
+			*port = hw->tnl.tbl[i].port;
+			res = true;
+			break;
+		}
+
+	mutex_unlock(&hw->tnl_lock);
+
+	return res;
+}
+
+/**
+ * ice_upd_dvm_boost_entry
+ * @hw: pointer to the HW structure
+ * @entry: pointer to double vlan boost entry info
+ */
+static int
+ice_upd_dvm_boost_entry(struct ice_hw *hw, struct ice_dvm_entry *entry)
+{
+	struct ice_boost_tcam_section *sect_rx, *sect_tx;
+	int status = -ENOSPC;
+	struct ice_buf_build *bld;
+	u8 val, dc, nm;
+
+	bld = ice_pkg_buf_alloc(hw);
+	if (!bld)
+		return -ENOMEM;
+
+	/* allocate 2 sections, one for Rx parser, one for Tx parser */
+	if (ice_pkg_buf_reserve_section(bld, 2))
+		goto ice_upd_dvm_boost_entry_err;
+
+	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+					    struct_size(sect_rx, tcam, 1));
+	if (!sect_rx)
+		goto ice_upd_dvm_boost_entry_err;
+	sect_rx->count = cpu_to_le16(1);
+
+	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+					    struct_size(sect_tx, tcam, 1));
+	if (!sect_tx)
+		goto ice_upd_dvm_boost_entry_err;
+	sect_tx->count = cpu_to_le16(1);
+
+	/* copy original boost entry to update package buffer */
+	memcpy(sect_rx->tcam, entry->boost_entry, sizeof(*sect_rx->tcam));
+
+	/* re-write the don't care and never match bits accordingly */
+	if (entry->enable) {
+		/* all bits are don't care */
+		val = 0x00;
+		dc = 0xFF;
+		nm = 0x00;
+	} else {
+		/* disable, one never match bit, the rest are don't care */
+		val = 0x00;
+		dc = 0xF7;
+		nm = 0x08;
+	}
+
+	ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
+		    &val, NULL, &dc, &nm, 0, sizeof(u8));
+
+	/* exact copy of entry to Tx section entry */
+	memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
+
+	status = ice_update_pkg_no_lock(hw, ice_pkg_buf(bld), 1);
+
+ice_upd_dvm_boost_entry_err:
+	ice_pkg_buf_free(hw, bld);
+
+	return status;
+}
+
+/**
+ * ice_set_dvm_boost_entries
+ * @hw: pointer to the HW structure
+ *
+ * Enable double vlan by updating the appropriate boost tcam entries.
+ */
+int ice_set_dvm_boost_entries(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < hw->dvm_upd.count; i++) {
+		int status;
+
+		status = ice_upd_dvm_boost_entry(hw, &hw->dvm_upd.tbl[i]);
+		if (status)
+			return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_tunnel_idx_to_entry - convert linear index to the sparse one
+ * @hw: pointer to the HW structure
+ * @type: type of tunnel
+ * @idx: linear index
+ *
+ * Stack assumes we have 2 linear tables with indexes [0, count_valid),
+ * but really the port table may be sprase, and types are mixed, so convert
+ * the stack index into the device index.
+ */
+static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type,
+				   u16 idx)
+{
+	u16 i;
+
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid &&
+		    hw->tnl.tbl[i].type == type &&
+		    idx-- == 0)
+			return i;
+
+	WARN_ON_ONCE(1);
+	return 0;
+}
+
+/**
+ * ice_create_tunnel
+ * @hw: pointer to the HW structure
+ * @index: device table entry
+ * @type: type of tunnel
+ * @port: port of tunnel to create
+ *
+ * Create a tunnel by updating the parse graph in the parser. We do that by
+ * creating a package buffer with the tunnel info and issuing an update package
+ * command.
+ */
+static int
+ice_create_tunnel(struct ice_hw *hw, u16 index,
+		  enum ice_tunnel_type type, u16 port)
+{
+	struct ice_boost_tcam_section *sect_rx, *sect_tx;
+	struct ice_buf_build *bld;
+	int status = -ENOSPC;
+
+	mutex_lock(&hw->tnl_lock);
+
+	bld = ice_pkg_buf_alloc(hw);
+	if (!bld) {
+		status = -ENOMEM;
+		goto ice_create_tunnel_end;
+	}
+
+	/* allocate 2 sections, one for Rx parser, one for Tx parser */
+	if (ice_pkg_buf_reserve_section(bld, 2))
+		goto ice_create_tunnel_err;
+
+	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+					    struct_size(sect_rx, tcam, 1));
+	if (!sect_rx)
+		goto ice_create_tunnel_err;
+	sect_rx->count = cpu_to_le16(1);
+
+	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+					    struct_size(sect_tx, tcam, 1));
+	if (!sect_tx)
+		goto ice_create_tunnel_err;
+	sect_tx->count = cpu_to_le16(1);
+
+	/* copy original boost entry to update package buffer */
+	memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_rx->tcam));
+
+	/* over-write the never-match dest port key bits with the encoded port
+	 * bits
+	 */
+	ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
+		    (u8 *)&port, NULL, NULL, NULL,
+		    (u16)offsetof(struct ice_boost_key_value, hv_dst_port_key),
+		    sizeof(sect_rx->tcam[0].key.key.hv_dst_port_key));
+
+	/* exact copy of entry to Tx section entry */
+	memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
+
+	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+	if (!status)
+		hw->tnl.tbl[index].port = port;
+
+ice_create_tunnel_err:
+	ice_pkg_buf_free(hw, bld);
+
+ice_create_tunnel_end:
+	mutex_unlock(&hw->tnl_lock);
+
+	return status;
+}
+
+/**
+ * ice_destroy_tunnel
+ * @hw: pointer to the HW structure
+ * @index: device table entry
+ * @type: type of tunnel
+ * @port: port of tunnel to destroy (ignored if the all parameter is true)
+ *
+ * Destroys a tunnel or all tunnels by creating an update package buffer
+ * targeting the specific updates requested and then performing an update
+ * package.
+ */
+static int
+ice_destroy_tunnel(struct ice_hw *hw, u16 index, enum ice_tunnel_type type,
+		   u16 port)
+{
+	struct ice_boost_tcam_section *sect_rx, *sect_tx;
+	struct ice_buf_build *bld;
+	int status = -ENOSPC;
+
+	mutex_lock(&hw->tnl_lock);
+
+	if (WARN_ON(!hw->tnl.tbl[index].valid ||
+		    hw->tnl.tbl[index].type != type ||
+		    hw->tnl.tbl[index].port != port)) {
+		status = -EIO;
+		goto ice_destroy_tunnel_end;
+	}
+
+	bld = ice_pkg_buf_alloc(hw);
+	if (!bld) {
+		status = -ENOMEM;
+		goto ice_destroy_tunnel_end;
+	}
+
+	/* allocate 2 sections, one for Rx parser, one for Tx parser */
+	if (ice_pkg_buf_reserve_section(bld, 2))
+		goto ice_destroy_tunnel_err;
+
+	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+					    struct_size(sect_rx, tcam, 1));
+	if (!sect_rx)
+		goto ice_destroy_tunnel_err;
+	sect_rx->count = cpu_to_le16(1);
+
+	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+					    struct_size(sect_tx, tcam, 1));
+	if (!sect_tx)
+		goto ice_destroy_tunnel_err;
+	sect_tx->count = cpu_to_le16(1);
+
+	/* copy original boost entry to update package buffer, one copy to Rx
+	 * section, another copy to the Tx section
+	 */
+	memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_rx->tcam));
+	memcpy(sect_tx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_tx->tcam));
+
+	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+	if (!status)
+		hw->tnl.tbl[index].port = 0;
+
+ice_destroy_tunnel_err:
+	ice_pkg_buf_free(hw, bld);
+
+ice_destroy_tunnel_end:
+	mutex_unlock(&hw->tnl_lock);
+
+	return status;
+}
+
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+			    unsigned int idx, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	enum ice_tunnel_type tnl_type;
+	int status;
+	u16 index;
+
+	tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+	index = ice_tunnel_idx_to_entry(&pf->hw, tnl_type, idx);
+
+	status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port));
+	if (status) {
+		netdev_err(netdev, "Error adding UDP tunnel - %d\n",
+			   status);
+		return -EIO;
+	}
+
+	udp_tunnel_nic_set_port_priv(netdev, table, idx, index);
+	return 0;
+}
+
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+			      unsigned int idx, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	enum ice_tunnel_type tnl_type;
+	int status;
+
+	tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE;
+
+	status = ice_destroy_tunnel(&pf->hw, ti->hw_priv, tnl_type,
+				    ntohs(ti->port));
+	if (status) {
+		netdev_err(netdev, "Error removing UDP tunnel - %d\n",
+			   status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_find_prot_off - find prot ID and offset pair, based on prof and FV index
+ * @hw: pointer to the hardware structure
+ * @blk: hardware block
+ * @prof: profile ID
+ * @fv_idx: field vector word index
+ * @prot: variable to receive the protocol ID
+ * @off: variable to receive the protocol offset
+ */
+int
+ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx,
+		  u8 *prot, u16 *off)
+{
+	struct ice_fv_word *fv_ext;
+
+	if (prof >= hw->blk[blk].es.count)
+		return -EINVAL;
+
+	if (fv_idx >= hw->blk[blk].es.fvw)
+		return -EINVAL;
+
+	fv_ext = hw->blk[blk].es.t + (prof * hw->blk[blk].es.fvw);
+
+	*prot = fv_ext[fv_idx].prot_id;
+	*off = fv_ext[fv_idx].off;
+
+	return 0;
+}
+
+/* PTG Management */
+
+/**
+ * ice_ptg_find_ptype - Search for packet type group using packet type (ptype)
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to search for
+ * @ptg: pointer to variable that receives the PTG
+ *
+ * This function will search the PTGs for a particular ptype, returning the
+ * PTG ID that contains it through the PTG parameter, with the value of
+ * ICE_DEFAULT_PTG (0) meaning it is part the default PTG.
+ */
+static int
+ice_ptg_find_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 *ptg)
+{
+	if (ptype >= ICE_XLT1_CNT || !ptg)
+		return -EINVAL;
+
+	*ptg = hw->blk[blk].xlt1.ptypes[ptype].ptg;
+	return 0;
+}
+
+/**
+ * ice_ptg_alloc_val - Allocates a new packet type group ID by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptg: the PTG to allocate
+ *
+ * This function allocates a given packet type group ID specified by the PTG
+ * parameter.
+ */
+static void ice_ptg_alloc_val(struct ice_hw *hw, enum ice_block blk, u8 ptg)
+{
+	hw->blk[blk].xlt1.ptg_tbl[ptg].in_use = true;
+}
+
+/**
+ * ice_ptg_remove_ptype - Removes ptype from a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to remove
+ * @ptg: the PTG to remove the ptype from
+ *
+ * This function will remove the ptype from the specific PTG, and move it to
+ * the default PTG (ICE_DEFAULT_PTG).
+ */
+static int
+ice_ptg_remove_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+	struct ice_ptg_ptype **ch;
+	struct ice_ptg_ptype *p;
+
+	if (ptype > ICE_XLT1_CNT - 1)
+		return -EINVAL;
+
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use)
+		return -ENOENT;
+
+	/* Should not happen if .in_use is set, bad config */
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype)
+		return -EIO;
+
+	/* find the ptype within this PTG, and bypass the link over it */
+	p = hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	ch = &hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	while (p) {
+		if (ptype == (p - hw->blk[blk].xlt1.ptypes)) {
+			*ch = p->next_ptype;
+			break;
+		}
+
+		ch = &p->next_ptype;
+		p = p->next_ptype;
+	}
+
+	hw->blk[blk].xlt1.ptypes[ptype].ptg = ICE_DEFAULT_PTG;
+	hw->blk[blk].xlt1.ptypes[ptype].next_ptype = NULL;
+
+	return 0;
+}
+
+/**
+ * ice_ptg_add_mv_ptype - Adds/moves ptype to a particular packet type group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @ptype: the ptype to add or move
+ * @ptg: the PTG to add or move the ptype to
+ *
+ * This function will either add or move a ptype to a particular PTG depending
+ * on if the ptype is already part of another group. Note that using a
+ * destination PTG ID of ICE_DEFAULT_PTG (0) will move the ptype to the
+ * default PTG.
+ */
+static int
+ice_ptg_add_mv_ptype(struct ice_hw *hw, enum ice_block blk, u16 ptype, u8 ptg)
+{
+	u8 original_ptg;
+	int status;
+
+	if (ptype > ICE_XLT1_CNT - 1)
+		return -EINVAL;
+
+	if (!hw->blk[blk].xlt1.ptg_tbl[ptg].in_use && ptg != ICE_DEFAULT_PTG)
+		return -ENOENT;
+
+	status = ice_ptg_find_ptype(hw, blk, ptype, &original_ptg);
+	if (status)
+		return status;
+
+	/* Is ptype already in the correct PTG? */
+	if (original_ptg == ptg)
+		return 0;
+
+	/* Remove from original PTG and move back to the default PTG */
+	if (original_ptg != ICE_DEFAULT_PTG)
+		ice_ptg_remove_ptype(hw, blk, ptype, original_ptg);
+
+	/* Moving to default PTG? Then we're done with this request */
+	if (ptg == ICE_DEFAULT_PTG)
+		return 0;
+
+	/* Add ptype to PTG at beginning of list */
+	hw->blk[blk].xlt1.ptypes[ptype].next_ptype =
+		hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype;
+	hw->blk[blk].xlt1.ptg_tbl[ptg].first_ptype =
+		&hw->blk[blk].xlt1.ptypes[ptype];
+
+	hw->blk[blk].xlt1.ptypes[ptype].ptg = ptg;
+	hw->blk[blk].xlt1.t[ptype] = ptg;
+
+	return 0;
+}
+
+/* Block / table size info */
+struct ice_blk_size_details {
+	u16 xlt1;			/* # XLT1 entries */
+	u16 xlt2;			/* # XLT2 entries */
+	u16 prof_tcam;			/* # profile ID TCAM entries */
+	u16 prof_id;			/* # profile IDs */
+	u8 prof_cdid_bits;		/* # CDID one-hot bits used in key */
+	u16 prof_redir;			/* # profile redirection entries */
+	u16 es;				/* # extraction sequence entries */
+	u16 fvw;			/* # field vector words */
+	u8 overwrite;			/* overwrite existing entries allowed */
+	u8 reverse;			/* reverse FV order */
+};
+
+static const struct ice_blk_size_details blk_sizes[ICE_BLK_COUNT] = {
+	/**
+	 * Table Definitions
+	 * XLT1 - Number of entries in XLT1 table
+	 * XLT2 - Number of entries in XLT2 table
+	 * TCAM - Number of entries Profile ID TCAM table
+	 * CDID - Control Domain ID of the hardware block
+	 * PRED - Number of entries in the Profile Redirection Table
+	 * FV   - Number of entries in the Field Vector
+	 * FVW  - Width (in WORDs) of the Field Vector
+	 * OVR  - Overwrite existing table entries
+	 * REV  - Reverse FV
+	 */
+	/*          XLT1        , XLT2        ,TCAM, PID,CDID,PRED,   FV, FVW */
+	/*          Overwrite   , Reverse FV */
+	/* SW  */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 256,   0,  256, 256,  48,
+		    false, false },
+	/* ACL */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  32,
+		    false, false },
+	/* FD  */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  24,
+		    false, true  },
+	/* RSS */ { ICE_XLT1_CNT, ICE_XLT2_CNT, 512, 128,   0,  128, 128,  24,
+		    true,  true  },
+	/* PE  */ { ICE_XLT1_CNT, ICE_XLT2_CNT,  64,  32,   0,   32,  32,  24,
+		    false, false },
+};
+
+enum ice_sid_all {
+	ICE_SID_XLT1_OFF = 0,
+	ICE_SID_XLT2_OFF,
+	ICE_SID_PR_OFF,
+	ICE_SID_PR_REDIR_OFF,
+	ICE_SID_ES_OFF,
+	ICE_SID_OFF_COUNT,
+};
+
+/* Characteristic handling */
+
+/**
+ * ice_match_prop_lst - determine if properties of two lists match
+ * @list1: first properties list
+ * @list2: second properties list
+ *
+ * Count, cookies and the order must match in order to be considered equivalent.
+ */
+static bool
+ice_match_prop_lst(struct list_head *list1, struct list_head *list2)
+{
+	struct ice_vsig_prof *tmp1;
+	struct ice_vsig_prof *tmp2;
+	u16 chk_count = 0;
+	u16 count = 0;
+
+	/* compare counts */
+	list_for_each_entry(tmp1, list1, list)
+		count++;
+	list_for_each_entry(tmp2, list2, list)
+		chk_count++;
+	if (!count || count != chk_count)
+		return false;
+
+	tmp1 = list_first_entry(list1, struct ice_vsig_prof, list);
+	tmp2 = list_first_entry(list2, struct ice_vsig_prof, list);
+
+	/* profile cookies must compare, and in the exact same order to take
+	 * into account priority
+	 */
+	while (count--) {
+		if (tmp2->profile_cookie != tmp1->profile_cookie)
+			return false;
+
+		tmp1 = list_next_entry(tmp1, list);
+		tmp2 = list_next_entry(tmp2, list);
+	}
+
+	return true;
+}
+
+/* VSIG Management */
+
+/**
+ * ice_vsig_find_vsi - find a VSIG that contains a specified VSI
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI of interest
+ * @vsig: pointer to receive the VSI group
+ *
+ * This function will lookup the VSI entry in the XLT2 list and return
+ * the VSI group its associated with.
+ */
+static int
+ice_vsig_find_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 *vsig)
+{
+	if (!vsig || vsi >= ICE_MAX_VSI)
+		return -EINVAL;
+
+	/* As long as there's a default or valid VSIG associated with the input
+	 * VSI, the functions returns a success. Any handling of VSIG will be
+	 * done by the following add, update or remove functions.
+	 */
+	*vsig = hw->blk[blk].xlt2.vsis[vsi].vsig;
+
+	return 0;
+}
+
+/**
+ * ice_vsig_alloc_val - allocate a new VSIG by value
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: the VSIG to allocate
+ *
+ * This function will allocate a given VSIG specified by the VSIG parameter.
+ */
+static u16 ice_vsig_alloc_val(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use) {
+		INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+		hw->blk[blk].xlt2.vsig_tbl[idx].in_use = true;
+	}
+
+	return ICE_VSIG_VALUE(idx, hw->pf_id);
+}
+
+/**
+ * ice_vsig_alloc - Finds a free entry and allocates a new VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ *
+ * This function will iterate through the VSIG list and mark the first
+ * unused entry for the new VSIG entry as used and return that value.
+ */
+static u16 ice_vsig_alloc(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (!hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+			return ice_vsig_alloc_val(hw, blk, i);
+
+	return ICE_DEFAULT_VSIG;
+}
+
+/**
+ * ice_find_dup_props_vsig - find VSI group with a specified set of properties
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @chs: characteristic list
+ * @vsig: returns the VSIG with the matching profiles, if found
+ *
+ * Each VSIG is associated with a characteristic set; i.e. all VSIs under
+ * a group have the same characteristic set. To check if there exists a VSIG
+ * which has the same characteristics as the input characteristics; this
+ * function will iterate through the XLT2 list and return the VSIG that has a
+ * matching configuration. In order to make sure that priorities are accounted
+ * for, the list must match exactly, including the order in which the
+ * characteristics are listed.
+ */
+static int
+ice_find_dup_props_vsig(struct ice_hw *hw, enum ice_block blk,
+			struct list_head *chs, u16 *vsig)
+{
+	struct ice_xlt2 *xlt2 = &hw->blk[blk].xlt2;
+	u16 i;
+
+	for (i = 0; i < xlt2->count; i++)
+		if (xlt2->vsig_tbl[i].in_use &&
+		    ice_match_prop_lst(chs, &xlt2->vsig_tbl[i].prop_lst)) {
+			*vsig = ICE_VSIG_VALUE(i, hw->pf_id);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+/**
+ * ice_vsig_free - free VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to remove
+ *
+ * The function will remove all VSIs associated with the input VSIG and move
+ * them to the DEFAULT_VSIG and mark the VSIG available.
+ */
+static int ice_vsig_free(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	struct ice_vsig_prof *dtmp, *del;
+	struct ice_vsig_vsi *vsi_cur;
+	u16 idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+	if (idx >= ICE_MAX_VSIGS)
+		return -EINVAL;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return -ENOENT;
+
+	hw->blk[blk].xlt2.vsig_tbl[idx].in_use = false;
+
+	vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	/* If the VSIG has at least 1 VSI then iterate through the
+	 * list and remove the VSIs before deleting the group.
+	 */
+	if (vsi_cur) {
+		/* remove all vsis associated with this VSIG XLT2 entry */
+		do {
+			struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+
+			vsi_cur->vsig = ICE_DEFAULT_VSIG;
+			vsi_cur->changed = 1;
+			vsi_cur->next_vsi = NULL;
+			vsi_cur = tmp;
+		} while (vsi_cur);
+
+		/* NULL terminate head of VSI list */
+		hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi = NULL;
+	}
+
+	/* free characteristic list */
+	list_for_each_entry_safe(del, dtmp,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list) {
+		list_del(&del->list);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	/* if VSIG characteristic list was cleared for reset
+	 * re-initialize the list head
+	 */
+	INIT_LIST_HEAD(&hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst);
+
+	return 0;
+}
+
+/**
+ * ice_vsig_remove_vsi - remove VSI from VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to remove
+ * @vsig: VSI group to remove from
+ *
+ * The function will remove the input VSI from its VSI group and move it
+ * to the DEFAULT_VSIG.
+ */
+static int
+ice_vsig_remove_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+	struct ice_vsig_vsi **vsi_head, *vsi_cur, *vsi_tgt;
+	u16 idx;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+		return -EINVAL;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return -ENOENT;
+
+	/* entry already in default VSIG, don't have to remove */
+	if (idx == ICE_DEFAULT_VSIG)
+		return 0;
+
+	vsi_head = &hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	if (!(*vsi_head))
+		return -EIO;
+
+	vsi_tgt = &hw->blk[blk].xlt2.vsis[vsi];
+	vsi_cur = (*vsi_head);
+
+	/* iterate the VSI list, skip over the entry to be removed */
+	while (vsi_cur) {
+		if (vsi_tgt == vsi_cur) {
+			(*vsi_head) = vsi_cur->next_vsi;
+			break;
+		}
+		vsi_head = &vsi_cur->next_vsi;
+		vsi_cur = vsi_cur->next_vsi;
+	}
+
+	/* verify if VSI was removed from group list */
+	if (!vsi_cur)
+		return -ENOENT;
+
+	vsi_cur->vsig = ICE_DEFAULT_VSIG;
+	vsi_cur->changed = 1;
+	vsi_cur->next_vsi = NULL;
+
+	return 0;
+}
+
+/**
+ * ice_vsig_add_mv_vsi - add or move a VSI to a VSI group
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsi: VSI to move
+ * @vsig: destination VSI group
+ *
+ * This function will move or add the input VSI to the target VSIG.
+ * The function will find the original VSIG the VSI belongs to and
+ * move the entry to the DEFAULT_VSIG, update the original VSIG and
+ * then move entry to the new VSIG.
+ */
+static int
+ice_vsig_add_mv_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig)
+{
+	struct ice_vsig_vsi *tmp;
+	u16 orig_vsig, idx;
+	int status;
+
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	if (vsi >= ICE_MAX_VSI || idx >= ICE_MAX_VSIGS)
+		return -EINVAL;
+
+	/* if VSIG not in use and VSIG is not default type this VSIG
+	 * doesn't exist.
+	 */
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use &&
+	    vsig != ICE_DEFAULT_VSIG)
+		return -ENOENT;
+
+	status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+	if (status)
+		return status;
+
+	/* no update required if vsigs match */
+	if (orig_vsig == vsig)
+		return 0;
+
+	if (orig_vsig != ICE_DEFAULT_VSIG) {
+		/* remove entry from orig_vsig and add to default VSIG */
+		status = ice_vsig_remove_vsi(hw, blk, vsi, orig_vsig);
+		if (status)
+			return status;
+	}
+
+	if (idx == ICE_DEFAULT_VSIG)
+		return 0;
+
+	/* Create VSI entry and add VSIG and prop_mask values */
+	hw->blk[blk].xlt2.vsis[vsi].vsig = vsig;
+	hw->blk[blk].xlt2.vsis[vsi].changed = 1;
+
+	/* Add new entry to the head of the VSIG list */
+	tmp = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi =
+		&hw->blk[blk].xlt2.vsis[vsi];
+	hw->blk[blk].xlt2.vsis[vsi].next_vsi = tmp;
+	hw->blk[blk].xlt2.t[vsi] = vsig;
+
+	return 0;
+}
+
+/**
+ * ice_prof_has_mask_idx - determine if profile index masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @idx: profile index to check
+ * @mask: mask to match
+ */
+static bool
+ice_prof_has_mask_idx(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 idx,
+		      u16 mask)
+{
+	bool expect_no_mask = false;
+	bool found = false;
+	bool match = false;
+	u16 i;
+
+	/* If mask is 0x0000 or 0xffff, then there is no masking */
+	if (mask == 0 || mask == 0xffff)
+		expect_no_mask = true;
+
+	/* Scan the enabled masks on this profile, for the specified idx */
+	for (i = hw->blk[blk].masks.first; i < hw->blk[blk].masks.first +
+	     hw->blk[blk].masks.count; i++)
+		if (hw->blk[blk].es.mask_ena[prof] & BIT(i))
+			if (hw->blk[blk].masks.masks[i].in_use &&
+			    hw->blk[blk].masks.masks[i].idx == idx) {
+				found = true;
+				if (hw->blk[blk].masks.masks[i].mask == mask)
+					match = true;
+				break;
+			}
+
+	if (expect_no_mask) {
+		if (found)
+			return false;
+	} else {
+		if (!match)
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_prof_has_mask - determine if profile masking is identical
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @prof: profile to check
+ * @masks: masks to match
+ */
+static bool
+ice_prof_has_mask(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 *masks)
+{
+	u16 i;
+
+	/* es->mask_ena[prof] will have the mask */
+	for (i = 0; i < hw->blk[blk].es.fvw; i++)
+		if (!ice_prof_has_mask_idx(hw, blk, prof, i, masks[i]))
+			return false;
+
+	return true;
+}
+
+/**
+ * ice_find_prof_id_with_mask - find profile ID for a given field vector
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @fv: field vector to search for
+ * @masks: masks for FV
+ * @prof_id: receives the profile ID
+ */
+static int
+ice_find_prof_id_with_mask(struct ice_hw *hw, enum ice_block blk,
+			   struct ice_fv_word *fv, u16 *masks, u8 *prof_id)
+{
+	struct ice_es *es = &hw->blk[blk].es;
+	u8 i;
+
+	/* For FD, we don't want to re-use a existed profile with the same
+	 * field vector and mask. This will cause rule interference.
+	 */
+	if (blk == ICE_BLK_FD)
+		return -ENOENT;
+
+	for (i = 0; i < (u8)es->count; i++) {
+		u16 off = i * es->fvw;
+
+		if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv)))
+			continue;
+
+		/* check if masks settings are the same for this profile */
+		if (masks && !ice_prof_has_mask(hw, blk, i, masks))
+			continue;
+
+		*prof_id = i;
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+/**
+ * ice_prof_id_rsrc_type - get profile ID resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_prof_id_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+	switch (blk) {
+	case ICE_BLK_FD:
+		*rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID;
+		break;
+	case ICE_BLK_RSS:
+		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+/**
+ * ice_tcam_ent_rsrc_type - get TCAM entry resource type for a block type
+ * @blk: the block type
+ * @rsrc_type: pointer to variable to receive the resource type
+ */
+static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type)
+{
+	switch (blk) {
+	case ICE_BLK_FD:
+		*rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM;
+		break;
+	case ICE_BLK_RSS:
+		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+/**
+ * ice_alloc_tcam_ent - allocate hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the TCAM for
+ * @btm: true to allocate from bottom of table, false to allocate from top
+ * @tcam_idx: pointer to variable to receive the TCAM entry
+ *
+ * This function allocates a new entry in a Profile ID TCAM for a specific
+ * block.
+ */
+static int
+ice_alloc_tcam_ent(struct ice_hw *hw, enum ice_block blk, bool btm,
+		   u16 *tcam_idx)
+{
+	u16 res_type;
+
+	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+		return -EINVAL;
+
+	return ice_alloc_hw_res(hw, res_type, 1, btm, tcam_idx);
+}
+
+/**
+ * ice_free_tcam_ent - free hardware TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the TCAM entry
+ * @tcam_idx: the TCAM entry to free
+ *
+ * This function frees an entry in a Profile ID TCAM for a specific block.
+ */
+static int
+ice_free_tcam_ent(struct ice_hw *hw, enum ice_block blk, u16 tcam_idx)
+{
+	u16 res_type;
+
+	if (!ice_tcam_ent_rsrc_type(blk, &res_type))
+		return -EINVAL;
+
+	return ice_free_hw_res(hw, res_type, 1, &tcam_idx);
+}
+
+/**
+ * ice_alloc_prof_id - allocate profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block to allocate the profile ID for
+ * @prof_id: pointer to variable to receive the profile ID
+ *
+ * This function allocates a new profile ID, which also corresponds to a Field
+ * Vector (Extraction Sequence) entry.
+ */
+static int ice_alloc_prof_id(struct ice_hw *hw, enum ice_block blk, u8 *prof_id)
+{
+	u16 res_type;
+	u16 get_prof;
+	int status;
+
+	if (!ice_prof_id_rsrc_type(blk, &res_type))
+		return -EINVAL;
+
+	status = ice_alloc_hw_res(hw, res_type, 1, false, &get_prof);
+	if (!status)
+		*prof_id = (u8)get_prof;
+
+	return status;
+}
+
+/**
+ * ice_free_prof_id - free profile ID
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID to free
+ *
+ * This function frees a profile ID, which also corresponds to a Field Vector.
+ */
+static int ice_free_prof_id(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	u16 tmp_prof_id = (u16)prof_id;
+	u16 res_type;
+
+	if (!ice_prof_id_rsrc_type(blk, &res_type))
+		return -EINVAL;
+
+	return ice_free_hw_res(hw, res_type, 1, &tmp_prof_id);
+}
+
+/**
+ * ice_prof_inc_ref - increment reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to increment the reference count
+ */
+static int ice_prof_inc_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	if (prof_id > hw->blk[blk].es.count)
+		return -EINVAL;
+
+	hw->blk[blk].es.ref_count[prof_id]++;
+
+	return 0;
+}
+
+/**
+ * ice_write_prof_mask_reg - write profile mask register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: mask index
+ * @idx: index of the FV which will use the mask
+ * @mask: the 16-bit mask
+ */
+static void
+ice_write_prof_mask_reg(struct ice_hw *hw, enum ice_block blk, u16 mask_idx,
+			u16 idx, u16 mask)
+{
+	u32 offset;
+	u32 val;
+
+	switch (blk) {
+	case ICE_BLK_RSS:
+		offset = GLQF_HMASK(mask_idx);
+		val = (idx << GLQF_HMASK_MSK_INDEX_S) & GLQF_HMASK_MSK_INDEX_M;
+		val |= (mask << GLQF_HMASK_MASK_S) & GLQF_HMASK_MASK_M;
+		break;
+	case ICE_BLK_FD:
+		offset = GLQF_FDMASK(mask_idx);
+		val = (idx << GLQF_FDMASK_MSK_INDEX_S) & GLQF_FDMASK_MSK_INDEX_M;
+		val |= (mask << GLQF_FDMASK_MASK_S) & GLQF_FDMASK_MASK_M;
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+			  blk);
+		return;
+	}
+
+	wr32(hw, offset, val);
+	ice_debug(hw, ICE_DBG_PKG, "write mask, blk %d (%d): %x = %x\n",
+		  blk, idx, offset, val);
+}
+
+/**
+ * ice_write_prof_mask_enable_res - write profile mask enable register
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @enable_mask: enable mask
+ */
+static void
+ice_write_prof_mask_enable_res(struct ice_hw *hw, enum ice_block blk,
+			       u16 prof_id, u32 enable_mask)
+{
+	u32 offset;
+
+	switch (blk) {
+	case ICE_BLK_RSS:
+		offset = GLQF_HMASK_SEL(prof_id);
+		break;
+	case ICE_BLK_FD:
+		offset = GLQF_FDMASK_SEL(prof_id);
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "No profile masks for block %d\n",
+			  blk);
+		return;
+	}
+
+	wr32(hw, offset, enable_mask);
+	ice_debug(hw, ICE_DBG_PKG, "write mask enable, blk %d (%d): %x = %x\n",
+		  blk, prof_id, offset, enable_mask);
+}
+
+/**
+ * ice_init_prof_masks - initial prof masks
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ */
+static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 per_pf;
+	u16 i;
+
+	mutex_init(&hw->blk[blk].masks.lock);
+
+	per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs;
+
+	hw->blk[blk].masks.count = per_pf;
+	hw->blk[blk].masks.first = hw->pf_id * per_pf;
+
+	memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks));
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+		ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+}
+
+/**
+ * ice_init_all_prof_masks - initialize all prof masks
+ * @hw: pointer to the HW struct
+ */
+static void ice_init_all_prof_masks(struct ice_hw *hw)
+{
+	ice_init_prof_masks(hw, ICE_BLK_RSS);
+	ice_init_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_alloc_prof_mask - allocate profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: index of FV which will use the mask
+ * @mask: the 16-bit mask
+ * @mask_idx: variable to receive the mask index
+ */
+static int
+ice_alloc_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 idx, u16 mask,
+		    u16 *mask_idx)
+{
+	bool found_unused = false, found_copy = false;
+	u16 unused_idx = 0, copy_idx = 0;
+	int status = -ENOSPC;
+	u16 i;
+
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return -EINVAL;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++)
+		if (hw->blk[blk].masks.masks[i].in_use) {
+			/* if mask is in use and it exactly duplicates the
+			 * desired mask and index, then in can be reused
+			 */
+			if (hw->blk[blk].masks.masks[i].mask == mask &&
+			    hw->blk[blk].masks.masks[i].idx == idx) {
+				found_copy = true;
+				copy_idx = i;
+				break;
+			}
+		} else {
+			/* save off unused index, but keep searching in case
+			 * there is an exact match later on
+			 */
+			if (!found_unused) {
+				found_unused = true;
+				unused_idx = i;
+			}
+		}
+
+	if (found_copy)
+		i = copy_idx;
+	else if (found_unused)
+		i = unused_idx;
+	else
+		goto err_ice_alloc_prof_mask;
+
+	/* update mask for a new entry */
+	if (found_unused) {
+		hw->blk[blk].masks.masks[i].in_use = true;
+		hw->blk[blk].masks.masks[i].mask = mask;
+		hw->blk[blk].masks.masks[i].idx = idx;
+		hw->blk[blk].masks.masks[i].ref = 0;
+		ice_write_prof_mask_reg(hw, blk, i, idx, mask);
+	}
+
+	hw->blk[blk].masks.masks[i].ref++;
+	*mask_idx = i;
+	status = 0;
+
+err_ice_alloc_prof_mask:
+	mutex_unlock(&hw->blk[blk].masks.lock);
+
+	return status;
+}
+
+/**
+ * ice_free_prof_mask - free profile mask
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @mask_idx: index of mask
+ */
+static int
+ice_free_prof_mask(struct ice_hw *hw, enum ice_block blk, u16 mask_idx)
+{
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return -EINVAL;
+
+	if (!(mask_idx >= hw->blk[blk].masks.first &&
+	      mask_idx < hw->blk[blk].masks.first + hw->blk[blk].masks.count))
+		return -ENOENT;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	if (!hw->blk[blk].masks.masks[mask_idx].in_use)
+		goto exit_ice_free_prof_mask;
+
+	if (hw->blk[blk].masks.masks[mask_idx].ref > 1) {
+		hw->blk[blk].masks.masks[mask_idx].ref--;
+		goto exit_ice_free_prof_mask;
+	}
+
+	/* remove mask */
+	hw->blk[blk].masks.masks[mask_idx].in_use = false;
+	hw->blk[blk].masks.masks[mask_idx].mask = 0;
+	hw->blk[blk].masks.masks[mask_idx].idx = 0;
+
+	/* update mask as unused entry */
+	ice_debug(hw, ICE_DBG_PKG, "Free mask, blk %d, mask %d\n", blk,
+		  mask_idx);
+	ice_write_prof_mask_reg(hw, blk, mask_idx, 0, 0);
+
+exit_ice_free_prof_mask:
+	mutex_unlock(&hw->blk[blk].masks.lock);
+
+	return 0;
+}
+
+/**
+ * ice_free_prof_masks - free all profile masks for a profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ */
+static int
+ice_free_prof_masks(struct ice_hw *hw, enum ice_block blk, u16 prof_id)
+{
+	u32 mask_bm;
+	u16 i;
+
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return -EINVAL;
+
+	mask_bm = hw->blk[blk].es.mask_ena[prof_id];
+	for (i = 0; i < BITS_PER_BYTE * sizeof(mask_bm); i++)
+		if (mask_bm & BIT(i))
+			ice_free_prof_mask(hw, blk, i);
+
+	return 0;
+}
+
+/**
+ * ice_shutdown_prof_masks - releases lock for masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_prof_masks(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	mutex_lock(&hw->blk[blk].masks.lock);
+
+	for (i = hw->blk[blk].masks.first;
+	     i < hw->blk[blk].masks.first + hw->blk[blk].masks.count; i++) {
+		ice_write_prof_mask_reg(hw, blk, i, 0, 0);
+
+		hw->blk[blk].masks.masks[i].in_use = false;
+		hw->blk[blk].masks.masks[i].idx = 0;
+		hw->blk[blk].masks.masks[i].mask = 0;
+	}
+
+	mutex_unlock(&hw->blk[blk].masks.lock);
+	mutex_destroy(&hw->blk[blk].masks.lock);
+}
+
+/**
+ * ice_shutdown_all_prof_masks - releases all locks for masking
+ * @hw: pointer to the HW struct
+ *
+ * This should be called before unloading the driver
+ */
+static void ice_shutdown_all_prof_masks(struct ice_hw *hw)
+{
+	ice_shutdown_prof_masks(hw, ICE_BLK_RSS);
+	ice_shutdown_prof_masks(hw, ICE_BLK_FD);
+}
+
+/**
+ * ice_update_prof_masking - set registers according to masking
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof_id: profile ID
+ * @masks: masks
+ */
+static int
+ice_update_prof_masking(struct ice_hw *hw, enum ice_block blk, u16 prof_id,
+			u16 *masks)
+{
+	bool err = false;
+	u32 ena_mask = 0;
+	u16 idx;
+	u16 i;
+
+	/* Only support FD and RSS masking, otherwise nothing to be done */
+	if (blk != ICE_BLK_RSS && blk != ICE_BLK_FD)
+		return 0;
+
+	for (i = 0; i < hw->blk[blk].es.fvw; i++)
+		if (masks[i] && masks[i] != 0xFFFF) {
+			if (!ice_alloc_prof_mask(hw, blk, i, masks[i], &idx)) {
+				ena_mask |= BIT(idx);
+			} else {
+				/* not enough bitmaps */
+				err = true;
+				break;
+			}
+		}
+
+	if (err) {
+		/* free any bitmaps we have allocated */
+		for (i = 0; i < BITS_PER_BYTE * sizeof(ena_mask); i++)
+			if (ena_mask & BIT(i))
+				ice_free_prof_mask(hw, blk, i);
+
+		return -EIO;
+	}
+
+	/* enable the masks for this profile */
+	ice_write_prof_mask_enable_res(hw, blk, prof_id, ena_mask);
+
+	/* store enabled masks with profile so that they can be freed later */
+	hw->blk[blk].es.mask_ena[prof_id] = ena_mask;
+
+	return 0;
+}
+
+/**
+ * ice_write_es - write an extraction sequence to hardware
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write the extraction sequence
+ * @prof_id: the profile ID to write
+ * @fv: pointer to the extraction sequence to write - NULL to clear extraction
+ */
+static void
+ice_write_es(struct ice_hw *hw, enum ice_block blk, u8 prof_id,
+	     struct ice_fv_word *fv)
+{
+	u16 off;
+
+	off = prof_id * hw->blk[blk].es.fvw;
+	if (!fv) {
+		memset(&hw->blk[blk].es.t[off], 0,
+		       hw->blk[blk].es.fvw * sizeof(*fv));
+		hw->blk[blk].es.written[prof_id] = false;
+	} else {
+		memcpy(&hw->blk[blk].es.t[off], fv,
+		       hw->blk[blk].es.fvw * sizeof(*fv));
+	}
+}
+
+/**
+ * ice_prof_dec_ref - decrement reference count for profile
+ * @hw: pointer to the HW struct
+ * @blk: the block from which to free the profile ID
+ * @prof_id: the profile ID for which to decrement the reference count
+ */
+static int
+ice_prof_dec_ref(struct ice_hw *hw, enum ice_block blk, u8 prof_id)
+{
+	if (prof_id > hw->blk[blk].es.count)
+		return -EINVAL;
+
+	if (hw->blk[blk].es.ref_count[prof_id] > 0) {
+		if (!--hw->blk[blk].es.ref_count[prof_id]) {
+			ice_write_es(hw, blk, prof_id, NULL);
+			ice_free_prof_masks(hw, blk, prof_id);
+			return ice_free_prof_id(hw, blk, prof_id);
+		}
+	}
+
+	return 0;
+}
+
+/* Block / table section IDs */
+static const u32 ice_blk_sids[ICE_BLK_COUNT][ICE_SID_OFF_COUNT] = {
+	/* SWITCH */
+	{	ICE_SID_XLT1_SW,
+		ICE_SID_XLT2_SW,
+		ICE_SID_PROFID_TCAM_SW,
+		ICE_SID_PROFID_REDIR_SW,
+		ICE_SID_FLD_VEC_SW
+	},
+
+	/* ACL */
+	{	ICE_SID_XLT1_ACL,
+		ICE_SID_XLT2_ACL,
+		ICE_SID_PROFID_TCAM_ACL,
+		ICE_SID_PROFID_REDIR_ACL,
+		ICE_SID_FLD_VEC_ACL
+	},
+
+	/* FD */
+	{	ICE_SID_XLT1_FD,
+		ICE_SID_XLT2_FD,
+		ICE_SID_PROFID_TCAM_FD,
+		ICE_SID_PROFID_REDIR_FD,
+		ICE_SID_FLD_VEC_FD
+	},
+
+	/* RSS */
+	{	ICE_SID_XLT1_RSS,
+		ICE_SID_XLT2_RSS,
+		ICE_SID_PROFID_TCAM_RSS,
+		ICE_SID_PROFID_REDIR_RSS,
+		ICE_SID_FLD_VEC_RSS
+	},
+
+	/* PE */
+	{	ICE_SID_XLT1_PE,
+		ICE_SID_XLT2_PE,
+		ICE_SID_PROFID_TCAM_PE,
+		ICE_SID_PROFID_REDIR_PE,
+		ICE_SID_FLD_VEC_PE
+	}
+};
+
+/**
+ * ice_init_sw_xlt1_db - init software XLT1 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt1_db(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 pt;
+
+	for (pt = 0; pt < hw->blk[blk].xlt1.count; pt++) {
+		u8 ptg;
+
+		ptg = hw->blk[blk].xlt1.t[pt];
+		if (ptg != ICE_DEFAULT_PTG) {
+			ice_ptg_alloc_val(hw, blk, ptg);
+			ice_ptg_add_mv_ptype(hw, blk, pt, ptg);
+		}
+	}
+}
+
+/**
+ * ice_init_sw_xlt2_db - init software XLT2 database from HW tables
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block to initialize
+ */
+static void ice_init_sw_xlt2_db(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 vsi;
+
+	for (vsi = 0; vsi < hw->blk[blk].xlt2.count; vsi++) {
+		u16 vsig;
+
+		vsig = hw->blk[blk].xlt2.t[vsi];
+		if (vsig) {
+			ice_vsig_alloc_val(hw, blk, vsig);
+			ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+			/* no changes at this time, since this has been
+			 * initialized from the original package
+			 */
+			hw->blk[blk].xlt2.vsis[vsi].changed = 0;
+		}
+	}
+}
+
+/**
+ * ice_init_sw_db - init software database from HW tables
+ * @hw: pointer to the hardware structure
+ */
+static void ice_init_sw_db(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		ice_init_sw_xlt1_db(hw, (enum ice_block)i);
+		ice_init_sw_xlt2_db(hw, (enum ice_block)i);
+	}
+}
+
+/**
+ * ice_fill_tbl - Reads content of a single table type into database
+ * @hw: pointer to the hardware structure
+ * @block_id: Block ID of the table to copy
+ * @sid: Section ID of the table to copy
+ *
+ * Will attempt to read the entire content of a given table of a single block
+ * into the driver database. We assume that the buffer will always
+ * be as large or larger than the data contained in the package. If
+ * this condition is not met, there is most likely an error in the package
+ * contents.
+ */
+static void ice_fill_tbl(struct ice_hw *hw, enum ice_block block_id, u32 sid)
+{
+	u32 dst_len, sect_len, offset = 0;
+	struct ice_prof_redir_section *pr;
+	struct ice_prof_id_section *pid;
+	struct ice_xlt1_section *xlt1;
+	struct ice_xlt2_section *xlt2;
+	struct ice_sw_fv_section *es;
+	struct ice_pkg_enum state;
+	u8 *src, *dst;
+	void *sect;
+
+	/* if the HW segment pointer is null then the first iteration of
+	 * ice_pkg_enum_section() will fail. In this case the HW tables will
+	 * not be filled and return success.
+	 */
+	if (!hw->seg) {
+		ice_debug(hw, ICE_DBG_PKG, "hw->seg is NULL, tables are not filled\n");
+		return;
+	}
+
+	memset(&state, 0, sizeof(state));
+
+	sect = ice_pkg_enum_section(hw->seg, &state, sid);
+
+	while (sect) {
+		switch (sid) {
+		case ICE_SID_XLT1_SW:
+		case ICE_SID_XLT1_FD:
+		case ICE_SID_XLT1_RSS:
+		case ICE_SID_XLT1_ACL:
+		case ICE_SID_XLT1_PE:
+			xlt1 = sect;
+			src = xlt1->value;
+			sect_len = le16_to_cpu(xlt1->count) *
+				sizeof(*hw->blk[block_id].xlt1.t);
+			dst = hw->blk[block_id].xlt1.t;
+			dst_len = hw->blk[block_id].xlt1.count *
+				sizeof(*hw->blk[block_id].xlt1.t);
+			break;
+		case ICE_SID_XLT2_SW:
+		case ICE_SID_XLT2_FD:
+		case ICE_SID_XLT2_RSS:
+		case ICE_SID_XLT2_ACL:
+		case ICE_SID_XLT2_PE:
+			xlt2 = sect;
+			src = (__force u8 *)xlt2->value;
+			sect_len = le16_to_cpu(xlt2->count) *
+				sizeof(*hw->blk[block_id].xlt2.t);
+			dst = (u8 *)hw->blk[block_id].xlt2.t;
+			dst_len = hw->blk[block_id].xlt2.count *
+				sizeof(*hw->blk[block_id].xlt2.t);
+			break;
+		case ICE_SID_PROFID_TCAM_SW:
+		case ICE_SID_PROFID_TCAM_FD:
+		case ICE_SID_PROFID_TCAM_RSS:
+		case ICE_SID_PROFID_TCAM_ACL:
+		case ICE_SID_PROFID_TCAM_PE:
+			pid = sect;
+			src = (u8 *)pid->entry;
+			sect_len = le16_to_cpu(pid->count) *
+				sizeof(*hw->blk[block_id].prof.t);
+			dst = (u8 *)hw->blk[block_id].prof.t;
+			dst_len = hw->blk[block_id].prof.count *
+				sizeof(*hw->blk[block_id].prof.t);
+			break;
+		case ICE_SID_PROFID_REDIR_SW:
+		case ICE_SID_PROFID_REDIR_FD:
+		case ICE_SID_PROFID_REDIR_RSS:
+		case ICE_SID_PROFID_REDIR_ACL:
+		case ICE_SID_PROFID_REDIR_PE:
+			pr = sect;
+			src = pr->redir_value;
+			sect_len = le16_to_cpu(pr->count) *
+				sizeof(*hw->blk[block_id].prof_redir.t);
+			dst = hw->blk[block_id].prof_redir.t;
+			dst_len = hw->blk[block_id].prof_redir.count *
+				sizeof(*hw->blk[block_id].prof_redir.t);
+			break;
+		case ICE_SID_FLD_VEC_SW:
+		case ICE_SID_FLD_VEC_FD:
+		case ICE_SID_FLD_VEC_RSS:
+		case ICE_SID_FLD_VEC_ACL:
+		case ICE_SID_FLD_VEC_PE:
+			es = sect;
+			src = (u8 *)es->fv;
+			sect_len = (u32)(le16_to_cpu(es->count) *
+					 hw->blk[block_id].es.fvw) *
+				sizeof(*hw->blk[block_id].es.t);
+			dst = (u8 *)hw->blk[block_id].es.t;
+			dst_len = (u32)(hw->blk[block_id].es.count *
+					hw->blk[block_id].es.fvw) *
+				sizeof(*hw->blk[block_id].es.t);
+			break;
+		default:
+			return;
+		}
+
+		/* if the section offset exceeds destination length, terminate
+		 * table fill.
+		 */
+		if (offset > dst_len)
+			return;
+
+		/* if the sum of section size and offset exceed destination size
+		 * then we are out of bounds of the HW table size for that PF.
+		 * Changing section length to fill the remaining table space
+		 * of that PF.
+		 */
+		if ((offset + sect_len) > dst_len)
+			sect_len = dst_len - offset;
+
+		memcpy(dst + offset, src, sect_len);
+		offset += sect_len;
+		sect = ice_pkg_enum_section(NULL, &state, sid);
+	}
+}
+
+/**
+ * ice_fill_blk_tbls - Read package context for tables
+ * @hw: pointer to the hardware structure
+ *
+ * Reads the current package contents and populates the driver
+ * database with the data iteratively for all advanced feature
+ * blocks. Assume that the HW tables have been allocated.
+ */
+void ice_fill_blk_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		enum ice_block blk_id = (enum ice_block)i;
+
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt1.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].xlt2.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].prof_redir.sid);
+		ice_fill_tbl(hw, blk_id, hw->blk[blk_id].es.sid);
+	}
+
+	ice_init_sw_db(hw);
+}
+
+/**
+ * ice_free_prof_map - free profile map
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_prof_map(struct ice_hw *hw, u8 blk_idx)
+{
+	struct ice_es *es = &hw->blk[blk_idx].es;
+	struct ice_prof_map *del, *tmp;
+
+	mutex_lock(&es->prof_map_lock);
+	list_for_each_entry_safe(del, tmp, &es->prof_map, list) {
+		list_del(&del->list);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+	INIT_LIST_HEAD(&es->prof_map);
+	mutex_unlock(&es->prof_map_lock);
+}
+
+/**
+ * ice_free_flow_profs - free flow profile entries
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_free_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+	struct ice_flow_prof *p, *tmp;
+
+	mutex_lock(&hw->fl_profs_locks[blk_idx]);
+	list_for_each_entry_safe(p, tmp, &hw->fl_profs[blk_idx], l_entry) {
+		struct ice_flow_entry *e, *t;
+
+		list_for_each_entry_safe(e, t, &p->entries, l_entry)
+			ice_flow_rem_entry(hw, (enum ice_block)blk_idx,
+					   ICE_FLOW_ENTRY_HNDL(e));
+
+		list_del(&p->l_entry);
+
+		mutex_destroy(&p->entries_lock);
+		devm_kfree(ice_hw_to_dev(hw), p);
+	}
+	mutex_unlock(&hw->fl_profs_locks[blk_idx]);
+
+	/* if driver is in reset and tables are being cleared
+	 * re-initialize the flow profile list heads
+	 */
+	INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
+ * ice_free_vsig_tbl - free complete VSIG table entries
+ * @hw: pointer to the hardware structure
+ * @blk: the HW block on which to free the VSIG table entries
+ */
+static void ice_free_vsig_tbl(struct ice_hw *hw, enum ice_block blk)
+{
+	u16 i;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl)
+		return;
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (hw->blk[blk].xlt2.vsig_tbl[i].in_use)
+			ice_vsig_free(hw, blk, i);
+}
+
+/**
+ * ice_free_hw_tbls - free hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+void ice_free_hw_tbls(struct ice_hw *hw)
+{
+	struct ice_rss_cfg *r, *rt;
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		if (hw->blk[i].is_list_init) {
+			struct ice_es *es = &hw->blk[i].es;
+
+			ice_free_prof_map(hw, i);
+			mutex_destroy(&es->prof_map_lock);
+
+			ice_free_flow_profs(hw, i);
+			mutex_destroy(&hw->fl_profs_locks[i]);
+
+			hw->blk[i].is_list_init = false;
+		}
+		ice_free_vsig_tbl(hw, (enum ice_block)i);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptypes);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.ptg_tbl);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt1.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsig_tbl);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].xlt2.vsis);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].prof_redir.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.t);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.ref_count);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.written);
+		devm_kfree(ice_hw_to_dev(hw), hw->blk[i].es.mask_ena);
+	}
+
+	list_for_each_entry_safe(r, rt, &hw->rss_list_head, l_entry) {
+		list_del(&r->l_entry);
+		devm_kfree(ice_hw_to_dev(hw), r);
+	}
+	mutex_destroy(&hw->rss_locks);
+	ice_shutdown_all_prof_masks(hw);
+	memset(hw->blk, 0, sizeof(hw->blk));
+}
+
+/**
+ * ice_init_flow_profs - init flow profile locks and list heads
+ * @hw: pointer to the hardware structure
+ * @blk_idx: HW block index
+ */
+static void ice_init_flow_profs(struct ice_hw *hw, u8 blk_idx)
+{
+	mutex_init(&hw->fl_profs_locks[blk_idx]);
+	INIT_LIST_HEAD(&hw->fl_profs[blk_idx]);
+}
+
+/**
+ * ice_clear_hw_tbls - clear HW tables and flow profiles
+ * @hw: pointer to the hardware structure
+ */
+void ice_clear_hw_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+		struct ice_prof_tcam *prof = &hw->blk[i].prof;
+		struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+		struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+		struct ice_es *es = &hw->blk[i].es;
+
+		if (hw->blk[i].is_list_init) {
+			ice_free_prof_map(hw, i);
+			ice_free_flow_profs(hw, i);
+		}
+
+		ice_free_vsig_tbl(hw, (enum ice_block)i);
+
+		memset(xlt1->ptypes, 0, xlt1->count * sizeof(*xlt1->ptypes));
+		memset(xlt1->ptg_tbl, 0,
+		       ICE_MAX_PTGS * sizeof(*xlt1->ptg_tbl));
+		memset(xlt1->t, 0, xlt1->count * sizeof(*xlt1->t));
+
+		memset(xlt2->vsis, 0, xlt2->count * sizeof(*xlt2->vsis));
+		memset(xlt2->vsig_tbl, 0,
+		       xlt2->count * sizeof(*xlt2->vsig_tbl));
+		memset(xlt2->t, 0, xlt2->count * sizeof(*xlt2->t));
+
+		memset(prof->t, 0, prof->count * sizeof(*prof->t));
+		memset(prof_redir->t, 0,
+		       prof_redir->count * sizeof(*prof_redir->t));
+
+		memset(es->t, 0, es->count * sizeof(*es->t) * es->fvw);
+		memset(es->ref_count, 0, es->count * sizeof(*es->ref_count));
+		memset(es->written, 0, es->count * sizeof(*es->written));
+		memset(es->mask_ena, 0, es->count * sizeof(*es->mask_ena));
+	}
+}
+
+/**
+ * ice_init_hw_tbls - init hardware table memory
+ * @hw: pointer to the hardware structure
+ */
+int ice_init_hw_tbls(struct ice_hw *hw)
+{
+	u8 i;
+
+	mutex_init(&hw->rss_locks);
+	INIT_LIST_HEAD(&hw->rss_list_head);
+	ice_init_all_prof_masks(hw);
+	for (i = 0; i < ICE_BLK_COUNT; i++) {
+		struct ice_prof_redir *prof_redir = &hw->blk[i].prof_redir;
+		struct ice_prof_tcam *prof = &hw->blk[i].prof;
+		struct ice_xlt1 *xlt1 = &hw->blk[i].xlt1;
+		struct ice_xlt2 *xlt2 = &hw->blk[i].xlt2;
+		struct ice_es *es = &hw->blk[i].es;
+		u16 j;
+
+		if (hw->blk[i].is_list_init)
+			continue;
+
+		ice_init_flow_profs(hw, i);
+		mutex_init(&es->prof_map_lock);
+		INIT_LIST_HEAD(&es->prof_map);
+		hw->blk[i].is_list_init = true;
+
+		hw->blk[i].overwrite = blk_sizes[i].overwrite;
+		es->reverse = blk_sizes[i].reverse;
+
+		xlt1->sid = ice_blk_sids[i][ICE_SID_XLT1_OFF];
+		xlt1->count = blk_sizes[i].xlt1;
+
+		xlt1->ptypes = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+					    sizeof(*xlt1->ptypes), GFP_KERNEL);
+
+		if (!xlt1->ptypes)
+			goto err;
+
+		xlt1->ptg_tbl = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_PTGS,
+					     sizeof(*xlt1->ptg_tbl),
+					     GFP_KERNEL);
+
+		if (!xlt1->ptg_tbl)
+			goto err;
+
+		xlt1->t = devm_kcalloc(ice_hw_to_dev(hw), xlt1->count,
+				       sizeof(*xlt1->t), GFP_KERNEL);
+		if (!xlt1->t)
+			goto err;
+
+		xlt2->sid = ice_blk_sids[i][ICE_SID_XLT2_OFF];
+		xlt2->count = blk_sizes[i].xlt2;
+
+		xlt2->vsis = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+					  sizeof(*xlt2->vsis), GFP_KERNEL);
+
+		if (!xlt2->vsis)
+			goto err;
+
+		xlt2->vsig_tbl = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+					      sizeof(*xlt2->vsig_tbl),
+					      GFP_KERNEL);
+		if (!xlt2->vsig_tbl)
+			goto err;
+
+		for (j = 0; j < xlt2->count; j++)
+			INIT_LIST_HEAD(&xlt2->vsig_tbl[j].prop_lst);
+
+		xlt2->t = devm_kcalloc(ice_hw_to_dev(hw), xlt2->count,
+				       sizeof(*xlt2->t), GFP_KERNEL);
+		if (!xlt2->t)
+			goto err;
+
+		prof->sid = ice_blk_sids[i][ICE_SID_PR_OFF];
+		prof->count = blk_sizes[i].prof_tcam;
+		prof->max_prof_id = blk_sizes[i].prof_id;
+		prof->cdid_bits = blk_sizes[i].prof_cdid_bits;
+		prof->t = devm_kcalloc(ice_hw_to_dev(hw), prof->count,
+				       sizeof(*prof->t), GFP_KERNEL);
+
+		if (!prof->t)
+			goto err;
+
+		prof_redir->sid = ice_blk_sids[i][ICE_SID_PR_REDIR_OFF];
+		prof_redir->count = blk_sizes[i].prof_redir;
+		prof_redir->t = devm_kcalloc(ice_hw_to_dev(hw),
+					     prof_redir->count,
+					     sizeof(*prof_redir->t),
+					     GFP_KERNEL);
+
+		if (!prof_redir->t)
+			goto err;
+
+		es->sid = ice_blk_sids[i][ICE_SID_ES_OFF];
+		es->count = blk_sizes[i].es;
+		es->fvw = blk_sizes[i].fvw;
+		es->t = devm_kcalloc(ice_hw_to_dev(hw),
+				     (u32)(es->count * es->fvw),
+				     sizeof(*es->t), GFP_KERNEL);
+		if (!es->t)
+			goto err;
+
+		es->ref_count = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					     sizeof(*es->ref_count),
+					     GFP_KERNEL);
+		if (!es->ref_count)
+			goto err;
+
+		es->written = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					   sizeof(*es->written), GFP_KERNEL);
+		if (!es->written)
+			goto err;
+
+		es->mask_ena = devm_kcalloc(ice_hw_to_dev(hw), es->count,
+					    sizeof(*es->mask_ena), GFP_KERNEL);
+		if (!es->mask_ena)
+			goto err;
+	}
+	return 0;
+
+err:
+	ice_free_hw_tbls(hw);
+	return -ENOMEM;
+}
+
+/**
+ * ice_prof_gen_key - generate profile ID key
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ * @key: output of profile ID key
+ */
+static int
+ice_prof_gen_key(struct ice_hw *hw, enum ice_block blk, u8 ptg, u16 vsig,
+		 u8 cdid, u16 flags, u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+		 u8 dc_msk[ICE_TCAM_KEY_VAL_SZ], u8 nm_msk[ICE_TCAM_KEY_VAL_SZ],
+		 u8 key[ICE_TCAM_KEY_SZ])
+{
+	struct ice_prof_id_key inkey;
+
+	inkey.xlt1 = ptg;
+	inkey.xlt2_cdid = cpu_to_le16(vsig);
+	inkey.flags = cpu_to_le16(flags);
+
+	switch (hw->blk[blk].prof.cdid_bits) {
+	case 0:
+		break;
+	case 2:
+#define ICE_CD_2_M 0xC000U
+#define ICE_CD_2_S 14
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_2_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_2_S);
+		break;
+	case 4:
+#define ICE_CD_4_M 0xF000U
+#define ICE_CD_4_S 12
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_4_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_4_S);
+		break;
+	case 8:
+#define ICE_CD_8_M 0xFF00U
+#define ICE_CD_8_S 16
+		inkey.xlt2_cdid &= ~cpu_to_le16(ICE_CD_8_M);
+		inkey.xlt2_cdid |= cpu_to_le16(BIT(cdid) << ICE_CD_8_S);
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_PKG, "Error in profile config\n");
+		break;
+	}
+
+	return ice_set_key(key, ICE_TCAM_KEY_SZ, (u8 *)&inkey, vl_msk, dc_msk,
+			   nm_msk, 0, ICE_TCAM_KEY_SZ / 2);
+}
+
+/**
+ * ice_tcam_write_entry - write TCAM entry
+ * @hw: pointer to the HW struct
+ * @blk: the block in which to write profile ID to
+ * @idx: the entry index to write to
+ * @prof_id: profile ID
+ * @ptg: packet type group (PTG) portion of key
+ * @vsig: VSIG portion of key
+ * @cdid: CDID portion of key
+ * @flags: flag portion of key
+ * @vl_msk: valid mask
+ * @dc_msk: don't care mask
+ * @nm_msk: never match mask
+ */
+static int
+ice_tcam_write_entry(struct ice_hw *hw, enum ice_block blk, u16 idx,
+		     u8 prof_id, u8 ptg, u16 vsig, u8 cdid, u16 flags,
+		     u8 vl_msk[ICE_TCAM_KEY_VAL_SZ],
+		     u8 dc_msk[ICE_TCAM_KEY_VAL_SZ],
+		     u8 nm_msk[ICE_TCAM_KEY_VAL_SZ])
+{
+	struct ice_prof_tcam_entry;
+	int status;
+
+	status = ice_prof_gen_key(hw, blk, ptg, vsig, cdid, flags, vl_msk,
+				  dc_msk, nm_msk, hw->blk[blk].prof.t[idx].key);
+	if (!status) {
+		hw->blk[blk].prof.t[idx].addr = cpu_to_le16(idx);
+		hw->blk[blk].prof.t[idx].prof_id = prof_id;
+	}
+
+	return status;
+}
+
+/**
+ * ice_vsig_get_ref - returns number of VSIs belong to a VSIG
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to query
+ * @refs: pointer to variable to receive the reference count
+ */
+static int
+ice_vsig_get_ref(struct ice_hw *hw, enum ice_block blk, u16 vsig, u16 *refs)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_vsi *ptr;
+
+	*refs = 0;
+
+	if (!hw->blk[blk].xlt2.vsig_tbl[idx].in_use)
+		return -ENOENT;
+
+	ptr = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	while (ptr) {
+		(*refs)++;
+		ptr = ptr->next_vsi;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_has_prof_vsig - check to see if VSIG has a specific profile
+ * @hw: pointer to the hardware structure
+ * @blk: HW block
+ * @vsig: VSIG to check against
+ * @hdl: profile handle
+ */
+static bool
+ice_has_prof_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_prof *ent;
+
+	list_for_each_entry(ent, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list)
+		if (ent->profile_cookie == hdl)
+			return true;
+
+	ice_debug(hw, ICE_DBG_INIT, "Characteristic list for VSI group %d not found.\n",
+		  vsig);
+	return false;
+}
+
+/**
+ * ice_prof_bld_es - build profile ID extraction sequence changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_es(struct ice_hw *hw, enum ice_block blk,
+		struct ice_buf_build *bld, struct list_head *chgs)
+{
+	u16 vec_size = hw->blk[blk].es.fvw * sizeof(struct ice_fv_word);
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_PTG_ES_ADD && tmp->add_prof) {
+			u16 off = tmp->prof_id * hw->blk[blk].es.fvw;
+			struct ice_pkg_es *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_VEC_TBL);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, es, 1) +
+						      vec_size -
+						      sizeof(p->es[0]));
+
+			if (!p)
+				return -ENOSPC;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->prof_id);
+
+			memcpy(p->es, &hw->blk[blk].es.t[off], vec_size);
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_tcam - build profile ID TCAM changes
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_tcam(struct ice_hw *hw, enum ice_block blk,
+		  struct ice_buf_build *bld, struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_TCAM_ADD && tmp->add_tcam_idx) {
+			struct ice_prof_id_section *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_PROF_TCAM);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, entry, 1));
+
+			if (!p)
+				return -ENOSPC;
+
+			p->count = cpu_to_le16(1);
+			p->entry[0].addr = cpu_to_le16(tmp->tcam_idx);
+			p->entry[0].prof_id = tmp->prof_id;
+
+			memcpy(p->entry[0].key,
+			       &hw->blk[blk].prof.t[tmp->tcam_idx].key,
+			       sizeof(hw->blk[blk].prof.t->key));
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_xlt1 - build XLT1 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_xlt1(enum ice_block blk, struct ice_buf_build *bld,
+		  struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry)
+		if (tmp->type == ICE_PTG_ES_ADD && tmp->add_ptg) {
+			struct ice_xlt1_section *p;
+			u32 id;
+
+			id = ice_sect_id(blk, ICE_XLT1);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, value, 1));
+
+			if (!p)
+				return -ENOSPC;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->ptype);
+			p->value[0] = tmp->ptg;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_prof_bld_xlt2 - build XLT2 changes
+ * @blk: hardware block
+ * @bld: the update package buffer build to add to
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_prof_bld_xlt2(enum ice_block blk, struct ice_buf_build *bld,
+		  struct list_head *chgs)
+{
+	struct ice_chs_chg *tmp;
+
+	list_for_each_entry(tmp, chgs, list_entry) {
+		struct ice_xlt2_section *p;
+		u32 id;
+
+		switch (tmp->type) {
+		case ICE_VSIG_ADD:
+		case ICE_VSI_MOVE:
+		case ICE_VSIG_REM:
+			id = ice_sect_id(blk, ICE_XLT2);
+			p = ice_pkg_buf_alloc_section(bld, id,
+						      struct_size(p, value, 1));
+
+			if (!p)
+				return -ENOSPC;
+
+			p->count = cpu_to_le16(1);
+			p->offset = cpu_to_le16(tmp->vsi);
+			p->value[0] = cpu_to_le16(tmp->vsig);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_upd_prof_hw - update hardware using the change list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @chgs: the list of changes to make in hardware
+ */
+static int
+ice_upd_prof_hw(struct ice_hw *hw, enum ice_block blk,
+		struct list_head *chgs)
+{
+	struct ice_buf_build *b;
+	struct ice_chs_chg *tmp;
+	u16 pkg_sects;
+	u16 xlt1 = 0;
+	u16 xlt2 = 0;
+	u16 tcam = 0;
+	u16 es = 0;
+	int status;
+	u16 sects;
+
+	/* count number of sections we need */
+	list_for_each_entry(tmp, chgs, list_entry) {
+		switch (tmp->type) {
+		case ICE_PTG_ES_ADD:
+			if (tmp->add_ptg)
+				xlt1++;
+			if (tmp->add_prof)
+				es++;
+			break;
+		case ICE_TCAM_ADD:
+			tcam++;
+			break;
+		case ICE_VSIG_ADD:
+		case ICE_VSI_MOVE:
+		case ICE_VSIG_REM:
+			xlt2++;
+			break;
+		default:
+			break;
+		}
+	}
+	sects = xlt1 + xlt2 + tcam + es;
+
+	if (!sects)
+		return 0;
+
+	/* Build update package buffer */
+	b = ice_pkg_buf_alloc(hw);
+	if (!b)
+		return -ENOMEM;
+
+	status = ice_pkg_buf_reserve_section(b, sects);
+	if (status)
+		goto error_tmp;
+
+	/* Preserve order of table update: ES, TCAM, PTG, VSIG */
+	if (es) {
+		status = ice_prof_bld_es(hw, blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (tcam) {
+		status = ice_prof_bld_tcam(hw, blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (xlt1) {
+		status = ice_prof_bld_xlt1(blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	if (xlt2) {
+		status = ice_prof_bld_xlt2(blk, b, chgs);
+		if (status)
+			goto error_tmp;
+	}
+
+	/* After package buffer build check if the section count in buffer is
+	 * non-zero and matches the number of sections detected for package
+	 * update.
+	 */
+	pkg_sects = ice_pkg_buf_get_active_sections(b);
+	if (!pkg_sects || pkg_sects != sects) {
+		status = -EINVAL;
+		goto error_tmp;
+	}
+
+	/* update package */
+	status = ice_update_pkg(hw, ice_pkg_buf(b), 1);
+	if (status == -EIO)
+		ice_debug(hw, ICE_DBG_INIT, "Unable to update HW profile\n");
+
+error_tmp:
+	ice_pkg_buf_free(hw, b);
+	return status;
+}
+
+/**
+ * ice_update_fd_mask - set Flow Director Field Vector mask for a profile
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @mask_sel: mask select
+ *
+ * This function enable any of the masks selected by the mask select parameter
+ * for the profile specified.
+ */
+static void ice_update_fd_mask(struct ice_hw *hw, u16 prof_id, u32 mask_sel)
+{
+	wr32(hw, GLQF_FDMASK_SEL(prof_id), mask_sel);
+
+	ice_debug(hw, ICE_DBG_INIT, "fd mask(%d): %x = %x\n", prof_id,
+		  GLQF_FDMASK_SEL(prof_id), mask_sel);
+}
+
+struct ice_fd_src_dst_pair {
+	u8 prot_id;
+	u8 count;
+	u16 off;
+};
+
+static const struct ice_fd_src_dst_pair ice_fd_pairs[] = {
+	/* These are defined in pairs */
+	{ ICE_PROT_IPV4_OF_OR_S, 2, 12 },
+	{ ICE_PROT_IPV4_OF_OR_S, 2, 16 },
+
+	{ ICE_PROT_IPV4_IL, 2, 12 },
+	{ ICE_PROT_IPV4_IL, 2, 16 },
+
+	{ ICE_PROT_IPV6_OF_OR_S, 8, 8 },
+	{ ICE_PROT_IPV6_OF_OR_S, 8, 24 },
+
+	{ ICE_PROT_IPV6_IL, 8, 8 },
+	{ ICE_PROT_IPV6_IL, 8, 24 },
+
+	{ ICE_PROT_TCP_IL, 1, 0 },
+	{ ICE_PROT_TCP_IL, 1, 2 },
+
+	{ ICE_PROT_UDP_OF, 1, 0 },
+	{ ICE_PROT_UDP_OF, 1, 2 },
+
+	{ ICE_PROT_UDP_IL_OR_S, 1, 0 },
+	{ ICE_PROT_UDP_IL_OR_S, 1, 2 },
+
+	{ ICE_PROT_SCTP_IL, 1, 0 },
+	{ ICE_PROT_SCTP_IL, 1, 2 }
+};
+
+#define ICE_FD_SRC_DST_PAIR_COUNT	ARRAY_SIZE(ice_fd_pairs)
+
+/**
+ * ice_update_fd_swap - set register appropriately for a FD FV extraction
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @es: extraction sequence (length of array is determined by the block)
+ */
+static int
+ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
+{
+	DECLARE_BITMAP(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+	u8 pair_start[ICE_FD_SRC_DST_PAIR_COUNT] = { 0 };
+#define ICE_FD_FV_NOT_FOUND (-2)
+	s8 first_free = ICE_FD_FV_NOT_FOUND;
+	u8 used[ICE_MAX_FV_WORDS] = { 0 };
+	s8 orig_free, si;
+	u32 mask_sel = 0;
+	u8 i, j, k;
+
+	bitmap_zero(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+
+	/* This code assumes that the Flow Director field vectors are assigned
+	 * from the end of the FV indexes working towards the zero index, that
+	 * only complete fields will be included and will be consecutive, and
+	 * that there are no gaps between valid indexes.
+	 */
+
+	/* Determine swap fields present */
+	for (i = 0; i < hw->blk[ICE_BLK_FD].es.fvw; i++) {
+		/* Find the first free entry, assuming right to left population.
+		 * This is where we can start adding additional pairs if needed.
+		 */
+		if (first_free == ICE_FD_FV_NOT_FOUND && es[i].prot_id !=
+		    ICE_PROT_INVALID)
+			first_free = i - 1;
+
+		for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+			if (es[i].prot_id == ice_fd_pairs[j].prot_id &&
+			    es[i].off == ice_fd_pairs[j].off) {
+				__set_bit(j, pair_list);
+				pair_start[j] = i;
+			}
+	}
+
+	orig_free = first_free;
+
+	/* determine missing swap fields that need to be added */
+	for (i = 0; i < ICE_FD_SRC_DST_PAIR_COUNT; i += 2) {
+		u8 bit1 = test_bit(i + 1, pair_list);
+		u8 bit0 = test_bit(i, pair_list);
+
+		if (bit0 ^ bit1) {
+			u8 index;
+
+			/* add the appropriate 'paired' entry */
+			if (!bit0)
+				index = i;
+			else
+				index = i + 1;
+
+			/* check for room */
+			if (first_free + 1 < (s8)ice_fd_pairs[index].count)
+				return -ENOSPC;
+
+			/* place in extraction sequence */
+			for (k = 0; k < ice_fd_pairs[index].count; k++) {
+				es[first_free - k].prot_id =
+					ice_fd_pairs[index].prot_id;
+				es[first_free - k].off =
+					ice_fd_pairs[index].off + (k * 2);
+
+				if (k > first_free)
+					return -EIO;
+
+				/* keep track of non-relevant fields */
+				mask_sel |= BIT(first_free - k);
+			}
+
+			pair_start[index] = first_free;
+			first_free -= ice_fd_pairs[index].count;
+		}
+	}
+
+	/* fill in the swap array */
+	si = hw->blk[ICE_BLK_FD].es.fvw - 1;
+	while (si >= 0) {
+		u8 indexes_used = 1;
+
+		/* assume flat at this index */
+#define ICE_SWAP_VALID	0x80
+		used[si] = si | ICE_SWAP_VALID;
+
+		if (orig_free == ICE_FD_FV_NOT_FOUND || si <= orig_free) {
+			si -= indexes_used;
+			continue;
+		}
+
+		/* check for a swap location */
+		for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+			if (es[si].prot_id == ice_fd_pairs[j].prot_id &&
+			    es[si].off == ice_fd_pairs[j].off) {
+				u8 idx;
+
+				/* determine the appropriate matching field */
+				idx = j + ((j % 2) ? -1 : 1);
+
+				indexes_used = ice_fd_pairs[idx].count;
+				for (k = 0; k < indexes_used; k++) {
+					used[si - k] = (pair_start[idx] - k) |
+						ICE_SWAP_VALID;
+				}
+
+				break;
+			}
+
+		si -= indexes_used;
+	}
+
+	/* for each set of 4 swap and 4 inset indexes, write the appropriate
+	 * register
+	 */
+	for (j = 0; j < hw->blk[ICE_BLK_FD].es.fvw / 4; j++) {
+		u32 raw_swap = 0;
+		u32 raw_in = 0;
+
+		for (k = 0; k < 4; k++) {
+			u8 idx;
+
+			idx = (j * 4) + k;
+			if (used[idx] && !(mask_sel & BIT(idx))) {
+				raw_swap |= used[idx] << (k * BITS_PER_BYTE);
+#define ICE_INSET_DFLT 0x9f
+				raw_in |= ICE_INSET_DFLT << (k * BITS_PER_BYTE);
+			}
+		}
+
+		/* write the appropriate swap register set */
+		wr32(hw, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+		ice_debug(hw, ICE_DBG_INIT, "swap wr(%d, %d): %x = %08x\n",
+			  prof_id, j, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+		/* write the appropriate inset register set */
+		wr32(hw, GLQF_FDINSET(prof_id, j), raw_in);
+
+		ice_debug(hw, ICE_DBG_INIT, "inset wr(%d, %d): %x = %08x\n",
+			  prof_id, j, GLQF_FDINSET(prof_id, j), raw_in);
+	}
+
+	/* initially clear the mask select for this profile */
+	ice_update_fd_mask(hw, prof_id, 0);
+
+	return 0;
+}
+
+/* The entries here needs to match the order of enum ice_ptype_attrib */
+static const struct ice_ptype_attrib_info ice_ptype_attributes[] = {
+	{ ICE_GTP_PDU_EH,	ICE_GTP_PDU_FLAG_MASK },
+	{ ICE_GTP_SESSION,	ICE_GTP_FLAGS_MASK },
+	{ ICE_GTP_DOWNLINK,	ICE_GTP_FLAGS_MASK },
+	{ ICE_GTP_UPLINK,	ICE_GTP_FLAGS_MASK },
+};
+
+/**
+ * ice_get_ptype_attrib_info - get PTYPE attribute information
+ * @type: attribute type
+ * @info: pointer to variable to the attribute information
+ */
+static void
+ice_get_ptype_attrib_info(enum ice_ptype_attrib_type type,
+			  struct ice_ptype_attrib_info *info)
+{
+	*info = ice_ptype_attributes[type];
+}
+
+/**
+ * ice_add_prof_attrib - add any PTG with attributes to profile
+ * @prof: pointer to the profile to which PTG entries will be added
+ * @ptg: PTG to be added
+ * @ptype: PTYPE that needs to be looked up
+ * @attr: array of attributes that will be considered
+ * @attr_cnt: number of elements in the attribute array
+ */
+static int
+ice_add_prof_attrib(struct ice_prof_map *prof, u8 ptg, u16 ptype,
+		    const struct ice_ptype_attributes *attr, u16 attr_cnt)
+{
+	bool found = false;
+	u16 i;
+
+	for (i = 0; i < attr_cnt; i++)
+		if (attr[i].ptype == ptype) {
+			found = true;
+
+			prof->ptg[prof->ptg_cnt] = ptg;
+			ice_get_ptype_attrib_info(attr[i].attrib,
+						  &prof->attr[prof->ptg_cnt]);
+
+			if (++prof->ptg_cnt >= ICE_MAX_PTG_PER_PROFILE)
+				return -ENOSPC;
+		}
+
+	if (!found)
+		return -ENOENT;
+
+	return 0;
+}
+
+/**
+ * ice_add_prof - add profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ * @ptypes: array of bitmaps indicating ptypes (ICE_FLOW_PTYPE_MAX bits)
+ * @attr: array of attributes
+ * @attr_cnt: number of elements in attr array
+ * @es: extraction sequence (length of array is determined by the block)
+ * @masks: mask for extraction sequence
+ *
+ * This function registers a profile, which matches a set of PTYPES with a
+ * particular extraction sequence. While the hardware profile is allocated
+ * it will not be written until the first call to ice_add_flow that specifies
+ * the ID value used here.
+ */
+int
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
+	     struct ice_fv_word *es, u16 *masks)
+{
+	u32 bytes = DIV_ROUND_UP(ICE_FLOW_PTYPE_MAX, BITS_PER_BYTE);
+	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+	struct ice_prof_map *prof;
+	u8 byte = 0;
+	u8 prof_id;
+	int status;
+
+	bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+	/* search for existing profile */
+	status = ice_find_prof_id_with_mask(hw, blk, es, masks, &prof_id);
+	if (status) {
+		/* allocate profile ID */
+		status = ice_alloc_prof_id(hw, blk, &prof_id);
+		if (status)
+			goto err_ice_add_prof;
+		if (blk == ICE_BLK_FD) {
+			/* For Flow Director block, the extraction sequence may
+			 * need to be altered in the case where there are paired
+			 * fields that have no match. This is necessary because
+			 * for Flow Director, src and dest fields need to paired
+			 * for filter programming and these values are swapped
+			 * during Tx.
+			 */
+			status = ice_update_fd_swap(hw, prof_id, es);
+			if (status)
+				goto err_ice_add_prof;
+		}
+		status = ice_update_prof_masking(hw, blk, prof_id, masks);
+		if (status)
+			goto err_ice_add_prof;
+
+		/* and write new es */
+		ice_write_es(hw, blk, prof_id, es);
+	}
+
+	ice_prof_inc_ref(hw, blk, prof_id);
+
+	/* add profile info */
+	prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*prof), GFP_KERNEL);
+	if (!prof) {
+		status = -ENOMEM;
+		goto err_ice_add_prof;
+	}
+
+	prof->profile_cookie = id;
+	prof->prof_id = prof_id;
+	prof->ptg_cnt = 0;
+	prof->context = 0;
+
+	/* build list of ptgs */
+	while (bytes && prof->ptg_cnt < ICE_MAX_PTG_PER_PROFILE) {
+		u8 bit;
+
+		if (!ptypes[byte]) {
+			bytes--;
+			byte++;
+			continue;
+		}
+
+		/* Examine 8 bits per byte */
+		for_each_set_bit(bit, (unsigned long *)&ptypes[byte],
+				 BITS_PER_BYTE) {
+			u16 ptype;
+			u8 ptg;
+
+			ptype = byte * BITS_PER_BYTE + bit;
+
+			/* The package should place all ptypes in a non-zero
+			 * PTG, so the following call should never fail.
+			 */
+			if (ice_ptg_find_ptype(hw, blk, ptype, &ptg))
+				continue;
+
+			/* If PTG is already added, skip and continue */
+			if (test_bit(ptg, ptgs_used))
+				continue;
+
+			__set_bit(ptg, ptgs_used);
+			/* Check to see there are any attributes for
+			 * this PTYPE, and add them if found.
+			 */
+			status = ice_add_prof_attrib(prof, ptg, ptype,
+						     attr, attr_cnt);
+			if (status == -ENOSPC)
+				break;
+			if (status) {
+				/* This is simple a PTYPE/PTG with no
+				 * attribute
+				 */
+				prof->ptg[prof->ptg_cnt] = ptg;
+				prof->attr[prof->ptg_cnt].flags = 0;
+				prof->attr[prof->ptg_cnt].mask = 0;
+
+				if (++prof->ptg_cnt >=
+				    ICE_MAX_PTG_PER_PROFILE)
+					break;
+			}
+		}
+
+		bytes--;
+		byte++;
+	}
+
+	list_add(&prof->list, &hw->blk[blk].es.prof_map);
+	status = 0;
+
+err_ice_add_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_search_prof_id - Search for a profile tracking ID
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will search for a profile tracking ID which was previously added.
+ * The profile map lock should be held before calling this function.
+ */
+static struct ice_prof_map *
+ice_search_prof_id(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_prof_map *entry = NULL;
+	struct ice_prof_map *map;
+
+	list_for_each_entry(map, &hw->blk[blk].es.prof_map, list)
+		if (map->profile_cookie == id) {
+			entry = map;
+			break;
+		}
+
+	return entry;
+}
+
+/**
+ * ice_vsig_prof_id_count - count profiles in a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ */
+static u16
+ice_vsig_prof_id_count(struct ice_hw *hw, enum ice_block blk, u16 vsig)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M, count = 0;
+	struct ice_vsig_prof *p;
+
+	list_for_each_entry(p, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list)
+		count++;
+
+	return count;
+}
+
+/**
+ * ice_rel_tcam_idx - release a TCAM index
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @idx: the index to release
+ */
+static int ice_rel_tcam_idx(struct ice_hw *hw, enum ice_block blk, u16 idx)
+{
+	/* Masks to invoke a never match entry */
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFE, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x01, 0x00, 0x00, 0x00, 0x00 };
+	int status;
+
+	/* write the TCAM entry */
+	status = ice_tcam_write_entry(hw, blk, idx, 0, 0, 0, 0, 0, vl_msk,
+				      dc_msk, nm_msk);
+	if (status)
+		return status;
+
+	/* release the TCAM entry */
+	status = ice_free_tcam_ent(hw, blk, idx);
+
+	return status;
+}
+
+/**
+ * ice_rem_prof_id - remove one profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @prof: pointer to profile structure to remove
+ */
+static int
+ice_rem_prof_id(struct ice_hw *hw, enum ice_block blk,
+		struct ice_vsig_prof *prof)
+{
+	int status;
+	u16 i;
+
+	for (i = 0; i < prof->tcam_count; i++)
+		if (prof->tcam[i].in_use) {
+			prof->tcam[i].in_use = false;
+			status = ice_rel_tcam_idx(hw, blk,
+						  prof->tcam[i].tcam_idx);
+			if (status)
+				return -EIO;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_rem_vsig - remove VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to remove
+ * @chg: the change list
+ */
+static int
+ice_rem_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+	     struct list_head *chg)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_vsi *vsi_cur;
+	struct ice_vsig_prof *d, *t;
+
+	/* remove TCAM entries */
+	list_for_each_entry_safe(d, t,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list) {
+		int status;
+
+		status = ice_rem_prof_id(hw, blk, d);
+		if (status)
+			return status;
+
+		list_del(&d->list);
+		devm_kfree(ice_hw_to_dev(hw), d);
+	}
+
+	/* Move all VSIS associated with this VSIG to the default VSIG */
+	vsi_cur = hw->blk[blk].xlt2.vsig_tbl[idx].first_vsi;
+	/* If the VSIG has at least 1 VSI then iterate through the list
+	 * and remove the VSIs before deleting the group.
+	 */
+	if (vsi_cur)
+		do {
+			struct ice_vsig_vsi *tmp = vsi_cur->next_vsi;
+			struct ice_chs_chg *p;
+
+			p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+					 GFP_KERNEL);
+			if (!p)
+				return -ENOMEM;
+
+			p->type = ICE_VSIG_REM;
+			p->orig_vsig = vsig;
+			p->vsig = ICE_DEFAULT_VSIG;
+			p->vsi = vsi_cur - hw->blk[blk].xlt2.vsis;
+
+			list_add(&p->list_entry, chg);
+
+			vsi_cur = tmp;
+		} while (vsi_cur);
+
+	return ice_vsig_free(hw, blk, vsig);
+}
+
+/**
+ * ice_rem_prof_id_vsig - remove a specific profile from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG to remove the profile from
+ * @hdl: profile handle indicating which profile to remove
+ * @chg: list to receive a record of changes
+ */
+static int
+ice_rem_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+		     struct list_head *chg)
+{
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+	struct ice_vsig_prof *p, *t;
+
+	list_for_each_entry_safe(p, t,
+				 &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+				 list)
+		if (p->profile_cookie == hdl) {
+			int status;
+
+			if (ice_vsig_prof_id_count(hw, blk, vsig) == 1)
+				/* this is the last profile, remove the VSIG */
+				return ice_rem_vsig(hw, blk, vsig, chg);
+
+			status = ice_rem_prof_id(hw, blk, p);
+			if (!status) {
+				list_del(&p->list);
+				devm_kfree(ice_hw_to_dev(hw), p);
+			}
+			return status;
+		}
+
+	return -ENOENT;
+}
+
+/**
+ * ice_rem_flow_all - remove all flows with a particular profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ */
+static int ice_rem_flow_all(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_chs_chg *del, *tmp;
+	struct list_head chg;
+	int status;
+	u16 i;
+
+	INIT_LIST_HEAD(&chg);
+
+	for (i = 1; i < ICE_MAX_VSIGS; i++)
+		if (hw->blk[blk].xlt2.vsig_tbl[i].in_use) {
+			if (ice_has_prof_vsig(hw, blk, i, id)) {
+				status = ice_rem_prof_id_vsig(hw, blk, i, id,
+							      &chg);
+				if (status)
+					goto err_ice_rem_flow_all;
+			}
+		}
+
+	status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_flow_all:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	return status;
+}
+
+/**
+ * ice_rem_prof - remove profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @id: profile tracking ID
+ *
+ * This will remove the profile specified by the ID parameter, which was
+ * previously created through ice_add_prof. If any existing entries
+ * are associated with this profile, they will be removed as well.
+ */
+int ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id)
+{
+	struct ice_prof_map *pmap;
+	int status;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+
+	pmap = ice_search_prof_id(hw, blk, id);
+	if (!pmap) {
+		status = -ENOENT;
+		goto err_ice_rem_prof;
+	}
+
+	/* remove all flows with this profile */
+	status = ice_rem_flow_all(hw, blk, pmap->profile_cookie);
+	if (status)
+		goto err_ice_rem_prof;
+
+	/* dereference profile, and possibly remove */
+	ice_prof_dec_ref(hw, blk, pmap->prof_id);
+
+	list_del(&pmap->list);
+	devm_kfree(ice_hw_to_dev(hw), pmap);
+
+err_ice_rem_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_get_prof - get profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: profile handle
+ * @chg: change list
+ */
+static int
+ice_get_prof(struct ice_hw *hw, enum ice_block blk, u64 hdl,
+	     struct list_head *chg)
+{
+	struct ice_prof_map *map;
+	struct ice_chs_chg *p;
+	int status = 0;
+	u16 i;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+	/* Get the details on the profile specified by the handle ID */
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map) {
+		status = -ENOENT;
+		goto err_ice_get_prof;
+	}
+
+	for (i = 0; i < map->ptg_cnt; i++)
+		if (!hw->blk[blk].es.written[map->prof_id]) {
+			/* add ES to change list */
+			p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p),
+					 GFP_KERNEL);
+			if (!p) {
+				status = -ENOMEM;
+				goto err_ice_get_prof;
+			}
+
+			p->type = ICE_PTG_ES_ADD;
+			p->ptype = 0;
+			p->ptg = map->ptg[i];
+			p->add_ptg = 0;
+
+			p->add_prof = 1;
+			p->prof_id = map->prof_id;
+
+			hw->blk[blk].es.written[map->prof_id] = true;
+
+			list_add(&p->list_entry, chg);
+		}
+
+err_ice_get_prof:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	/* let caller clean up the change list */
+	return status;
+}
+
+/**
+ * ice_get_profs_vsig - get a copy of the list of profiles from a VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: VSIG from which to copy the list
+ * @lst: output list
+ *
+ * This routine makes a copy of the list of profiles in the specified VSIG.
+ */
+static int
+ice_get_profs_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+		   struct list_head *lst)
+{
+	struct ice_vsig_prof *ent1, *ent2;
+	u16 idx = vsig & ICE_VSIG_IDX_M;
+
+	list_for_each_entry(ent1, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list) {
+		struct ice_vsig_prof *p;
+
+		/* copy to the input list */
+		p = devm_kmemdup(ice_hw_to_dev(hw), ent1, sizeof(*p),
+				 GFP_KERNEL);
+		if (!p)
+			goto err_ice_get_profs_vsig;
+
+		list_add_tail(&p->list, lst);
+	}
+
+	return 0;
+
+err_ice_get_profs_vsig:
+	list_for_each_entry_safe(ent1, ent2, lst, list) {
+		list_del(&ent1->list);
+		devm_kfree(ice_hw_to_dev(hw), ent1);
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * ice_add_prof_to_lst - add profile entry to a list
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @lst: the list to be added to
+ * @hdl: profile handle of entry to add
+ */
+static int
+ice_add_prof_to_lst(struct ice_hw *hw, enum ice_block blk,
+		    struct list_head *lst, u64 hdl)
+{
+	struct ice_prof_map *map;
+	struct ice_vsig_prof *p;
+	int status = 0;
+	u16 i;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map) {
+		status = -ENOENT;
+		goto err_ice_add_prof_to_lst;
+	}
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		status = -ENOMEM;
+		goto err_ice_add_prof_to_lst;
+	}
+
+	p->profile_cookie = map->profile_cookie;
+	p->prof_id = map->prof_id;
+	p->tcam_count = map->ptg_cnt;
+
+	for (i = 0; i < map->ptg_cnt; i++) {
+		p->tcam[i].prof_id = map->prof_id;
+		p->tcam[i].tcam_idx = ICE_INVALID_TCAM;
+		p->tcam[i].ptg = map->ptg[i];
+	}
+
+	list_add(&p->list, lst);
+
+err_ice_add_prof_to_lst:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+}
+
+/**
+ * ice_move_vsi - move VSI to another VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to move
+ * @vsig: the VSIG to move the VSI to
+ * @chg: the change list
+ */
+static int
+ice_move_vsi(struct ice_hw *hw, enum ice_block blk, u16 vsi, u16 vsig,
+	     struct list_head *chg)
+{
+	struct ice_chs_chg *p;
+	u16 orig_vsig;
+	int status;
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	status = ice_vsig_find_vsi(hw, blk, vsi, &orig_vsig);
+	if (!status)
+		status = ice_vsig_add_mv_vsi(hw, blk, vsi, vsig);
+
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), p);
+		return status;
+	}
+
+	p->type = ICE_VSI_MOVE;
+	p->vsi = vsi;
+	p->orig_vsig = orig_vsig;
+	p->vsig = vsig;
+
+	list_add(&p->list_entry, chg);
+
+	return 0;
+}
+
+/**
+ * ice_rem_chg_tcam_ent - remove a specific TCAM entry from change list
+ * @hw: pointer to the HW struct
+ * @idx: the index of the TCAM entry to remove
+ * @chg: the list of change structures to search
+ */
+static void
+ice_rem_chg_tcam_ent(struct ice_hw *hw, u16 idx, struct list_head *chg)
+{
+	struct ice_chs_chg *pos, *tmp;
+
+	list_for_each_entry_safe(tmp, pos, chg, list_entry)
+		if (tmp->type == ICE_TCAM_ADD && tmp->tcam_idx == idx) {
+			list_del(&tmp->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), tmp);
+		}
+}
+
+/**
+ * ice_prof_tcam_ena_dis - add enable or disable TCAM change
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @enable: true to enable, false to disable
+ * @vsig: the VSIG of the TCAM entry
+ * @tcam: pointer the TCAM info structure of the TCAM to disable
+ * @chg: the change list
+ *
+ * This function appends an enable or disable TCAM entry in the change log
+ */
+static int
+ice_prof_tcam_ena_dis(struct ice_hw *hw, enum ice_block blk, bool enable,
+		      u16 vsig, struct ice_tcam_inf *tcam,
+		      struct list_head *chg)
+{
+	struct ice_chs_chg *p;
+	int status;
+
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+
+	/* if disabling, free the TCAM */
+	if (!enable) {
+		status = ice_rel_tcam_idx(hw, blk, tcam->tcam_idx);
+
+		/* if we have already created a change for this TCAM entry, then
+		 * we need to remove that entry, in order to prevent writing to
+		 * a TCAM entry we no longer will have ownership of.
+		 */
+		ice_rem_chg_tcam_ent(hw, tcam->tcam_idx, chg);
+		tcam->tcam_idx = 0;
+		tcam->in_use = 0;
+		return status;
+	}
+
+	/* for re-enabling, reallocate a TCAM */
+	/* for entries with empty attribute masks, allocate entry from
+	 * the bottom of the TCAM table; otherwise, allocate from the
+	 * top of the table in order to give it higher priority
+	 */
+	status = ice_alloc_tcam_ent(hw, blk, tcam->attr.mask == 0,
+				    &tcam->tcam_idx);
+	if (status)
+		return status;
+
+	/* add TCAM to change list */
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	status = ice_tcam_write_entry(hw, blk, tcam->tcam_idx, tcam->prof_id,
+				      tcam->ptg, vsig, 0, tcam->attr.flags,
+				      vl_msk, dc_msk, nm_msk);
+	if (status)
+		goto err_ice_prof_tcam_ena_dis;
+
+	tcam->in_use = 1;
+
+	p->type = ICE_TCAM_ADD;
+	p->add_tcam_idx = true;
+	p->prof_id = tcam->prof_id;
+	p->ptg = tcam->ptg;
+	p->vsig = 0;
+	p->tcam_idx = tcam->tcam_idx;
+
+	/* log change */
+	list_add(&p->list_entry, chg);
+
+	return 0;
+
+err_ice_prof_tcam_ena_dis:
+	devm_kfree(ice_hw_to_dev(hw), p);
+	return status;
+}
+
+/**
+ * ice_adj_prof_priorities - adjust profile based on priorities
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG for which to adjust profile priorities
+ * @chg: the change list
+ */
+static int
+ice_adj_prof_priorities(struct ice_hw *hw, enum ice_block blk, u16 vsig,
+			struct list_head *chg)
+{
+	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
+	struct ice_vsig_prof *t;
+	int status;
+	u16 idx;
+
+	bitmap_zero(ptgs_used, ICE_XLT1_CNT);
+	idx = vsig & ICE_VSIG_IDX_M;
+
+	/* Priority is based on the order in which the profiles are added. The
+	 * newest added profile has highest priority and the oldest added
+	 * profile has the lowest priority. Since the profile property list for
+	 * a VSIG is sorted from newest to oldest, this code traverses the list
+	 * in order and enables the first of each PTG that it finds (that is not
+	 * already enabled); it also disables any duplicate PTGs that it finds
+	 * in the older profiles (that are currently enabled).
+	 */
+
+	list_for_each_entry(t, &hw->blk[blk].xlt2.vsig_tbl[idx].prop_lst,
+			    list) {
+		u16 i;
+
+		for (i = 0; i < t->tcam_count; i++) {
+			/* Scan the priorities from newest to oldest.
+			 * Make sure that the newest profiles take priority.
+			 */
+			if (test_bit(t->tcam[i].ptg, ptgs_used) &&
+			    t->tcam[i].in_use) {
+				/* need to mark this PTG as never match, as it
+				 * was already in use and therefore duplicate
+				 * (and lower priority)
+				 */
+				status = ice_prof_tcam_ena_dis(hw, blk, false,
+							       vsig,
+							       &t->tcam[i],
+							       chg);
+				if (status)
+					return status;
+			} else if (!test_bit(t->tcam[i].ptg, ptgs_used) &&
+				   !t->tcam[i].in_use) {
+				/* need to enable this PTG, as it in not in use
+				 * and not enabled (highest priority)
+				 */
+				status = ice_prof_tcam_ena_dis(hw, blk, true,
+							       vsig,
+							       &t->tcam[i],
+							       chg);
+				if (status)
+					return status;
+			}
+
+			/* keep track of used ptgs */
+			__set_bit(t->tcam[i].ptg, ptgs_used);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_add_prof_id_vsig - add profile to VSIG
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsig: the VSIG to which this profile is to be added
+ * @hdl: the profile handle indicating the profile to add
+ * @rev: true to add entries to the end of the list
+ * @chg: the change list
+ */
+static int
+ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
+		     bool rev, struct list_head *chg)
+{
+	/* Masks that ignore flags */
+	u8 vl_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
+	u8 dc_msk[ICE_TCAM_KEY_VAL_SZ] = { 0xFF, 0xFF, 0x00, 0x00, 0x00 };
+	u8 nm_msk[ICE_TCAM_KEY_VAL_SZ] = { 0x00, 0x00, 0x00, 0x00, 0x00 };
+	struct ice_prof_map *map;
+	struct ice_vsig_prof *t;
+	struct ice_chs_chg *p;
+	u16 vsig_idx, i;
+	int status = 0;
+
+	/* Error, if this VSIG already has this profile */
+	if (ice_has_prof_vsig(hw, blk, vsig, hdl))
+		return -EEXIST;
+
+	/* new VSIG profile structure */
+	t = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return -ENOMEM;
+
+	mutex_lock(&hw->blk[blk].es.prof_map_lock);
+	/* Get the details on the profile specified by the handle ID */
+	map = ice_search_prof_id(hw, blk, hdl);
+	if (!map) {
+		status = -ENOENT;
+		goto err_ice_add_prof_id_vsig;
+	}
+
+	t->profile_cookie = map->profile_cookie;
+	t->prof_id = map->prof_id;
+	t->tcam_count = map->ptg_cnt;
+
+	/* create TCAM entries */
+	for (i = 0; i < map->ptg_cnt; i++) {
+		u16 tcam_idx;
+
+		/* add TCAM to change list */
+		p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+		if (!p) {
+			status = -ENOMEM;
+			goto err_ice_add_prof_id_vsig;
+		}
+
+		/* allocate the TCAM entry index */
+		/* for entries with empty attribute masks, allocate entry from
+		 * the bottom of the TCAM table; otherwise, allocate from the
+		 * top of the table in order to give it higher priority
+		 */
+		status = ice_alloc_tcam_ent(hw, blk, map->attr[i].mask == 0,
+					    &tcam_idx);
+		if (status) {
+			devm_kfree(ice_hw_to_dev(hw), p);
+			goto err_ice_add_prof_id_vsig;
+		}
+
+		t->tcam[i].ptg = map->ptg[i];
+		t->tcam[i].prof_id = map->prof_id;
+		t->tcam[i].tcam_idx = tcam_idx;
+		t->tcam[i].attr = map->attr[i];
+		t->tcam[i].in_use = true;
+
+		p->type = ICE_TCAM_ADD;
+		p->add_tcam_idx = true;
+		p->prof_id = t->tcam[i].prof_id;
+		p->ptg = t->tcam[i].ptg;
+		p->vsig = vsig;
+		p->tcam_idx = t->tcam[i].tcam_idx;
+
+		/* write the TCAM entry */
+		status = ice_tcam_write_entry(hw, blk, t->tcam[i].tcam_idx,
+					      t->tcam[i].prof_id,
+					      t->tcam[i].ptg, vsig, 0, 0,
+					      vl_msk, dc_msk, nm_msk);
+		if (status) {
+			devm_kfree(ice_hw_to_dev(hw), p);
+			goto err_ice_add_prof_id_vsig;
+		}
+
+		/* log change */
+		list_add(&p->list_entry, chg);
+	}
+
+	/* add profile to VSIG */
+	vsig_idx = vsig & ICE_VSIG_IDX_M;
+	if (rev)
+		list_add_tail(&t->list,
+			      &hw->blk[blk].xlt2.vsig_tbl[vsig_idx].prop_lst);
+	else
+		list_add(&t->list,
+			 &hw->blk[blk].xlt2.vsig_tbl[vsig_idx].prop_lst);
+
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	return status;
+
+err_ice_add_prof_id_vsig:
+	mutex_unlock(&hw->blk[blk].es.prof_map_lock);
+	/* let caller clean up the change list */
+	devm_kfree(ice_hw_to_dev(hw), t);
+	return status;
+}
+
+/**
+ * ice_create_prof_id_vsig - add a new VSIG with a single profile
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @hdl: the profile handle of the profile that will be added to the VSIG
+ * @chg: the change list
+ */
+static int
+ice_create_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl,
+			struct list_head *chg)
+{
+	struct ice_chs_chg *p;
+	u16 new_vsig;
+	int status;
+
+	p = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	new_vsig = ice_vsig_alloc(hw, blk);
+	if (!new_vsig) {
+		status = -EIO;
+		goto err_ice_create_prof_id_vsig;
+	}
+
+	status = ice_move_vsi(hw, blk, vsi, new_vsig, chg);
+	if (status)
+		goto err_ice_create_prof_id_vsig;
+
+	status = ice_add_prof_id_vsig(hw, blk, new_vsig, hdl, false, chg);
+	if (status)
+		goto err_ice_create_prof_id_vsig;
+
+	p->type = ICE_VSIG_ADD;
+	p->vsi = vsi;
+	p->orig_vsig = ICE_DEFAULT_VSIG;
+	p->vsig = new_vsig;
+
+	list_add(&p->list_entry, chg);
+
+	return 0;
+
+err_ice_create_prof_id_vsig:
+	/* let caller clean up the change list */
+	devm_kfree(ice_hw_to_dev(hw), p);
+	return status;
+}
+
+/**
+ * ice_create_vsig_from_lst - create a new VSIG with a list of profiles
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the initial VSI that will be in VSIG
+ * @lst: the list of profile that will be added to the VSIG
+ * @new_vsig: return of new VSIG
+ * @chg: the change list
+ */
+static int
+ice_create_vsig_from_lst(struct ice_hw *hw, enum ice_block blk, u16 vsi,
+			 struct list_head *lst, u16 *new_vsig,
+			 struct list_head *chg)
+{
+	struct ice_vsig_prof *t;
+	int status;
+	u16 vsig;
+
+	vsig = ice_vsig_alloc(hw, blk);
+	if (!vsig)
+		return -EIO;
+
+	status = ice_move_vsi(hw, blk, vsi, vsig, chg);
+	if (status)
+		return status;
+
+	list_for_each_entry(t, lst, list) {
+		/* Reverse the order here since we are copying the list */
+		status = ice_add_prof_id_vsig(hw, blk, vsig, t->profile_cookie,
+					      true, chg);
+		if (status)
+			return status;
+	}
+
+	*new_vsig = vsig;
+
+	return 0;
+}
+
+/**
+ * ice_find_prof_vsig - find a VSIG with a specific profile handle
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @hdl: the profile handle of the profile to search for
+ * @vsig: returns the VSIG with the matching profile
+ */
+static bool
+ice_find_prof_vsig(struct ice_hw *hw, enum ice_block blk, u64 hdl, u16 *vsig)
+{
+	struct ice_vsig_prof *t;
+	struct list_head lst;
+	int status;
+
+	INIT_LIST_HEAD(&lst);
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return false;
+
+	t->profile_cookie = hdl;
+	list_add(&t->list, &lst);
+
+	status = ice_find_dup_props_vsig(hw, blk, &lst, vsig);
+
+	list_del(&t->list);
+	kfree(t);
+
+	return !status;
+}
+
+/**
+ * ice_add_prof_id_flow - add profile flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI to enable with the profile specified by ID
+ * @hdl: profile handle
+ *
+ * Calling this function will update the hardware tables to enable the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be enabled.
+ */
+int
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+	struct ice_vsig_prof *tmp1, *del1;
+	struct ice_chs_chg *tmp, *del;
+	struct list_head union_lst;
+	struct list_head chg;
+	int status;
+	u16 vsig;
+
+	INIT_LIST_HEAD(&union_lst);
+	INIT_LIST_HEAD(&chg);
+
+	/* Get profile */
+	status = ice_get_prof(hw, blk, hdl, &chg);
+	if (status)
+		return status;
+
+	/* determine if VSI is already part of a VSIG */
+	status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+	if (!status && vsig) {
+		bool only_vsi;
+		u16 or_vsig;
+		u16 ref;
+
+		/* found in VSIG */
+		or_vsig = vsig;
+
+		/* make sure that there is no overlap/conflict between the new
+		 * characteristics and the existing ones; we don't support that
+		 * scenario
+		 */
+		if (ice_has_prof_vsig(hw, blk, vsig, hdl)) {
+			status = -EEXIST;
+			goto err_ice_add_prof_id_flow;
+		}
+
+		/* last VSI in the VSIG? */
+		status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+		only_vsi = (ref == 1);
+
+		/* create a union of the current profiles and the one being
+		 * added
+		 */
+		status = ice_get_profs_vsig(hw, blk, vsig, &union_lst);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+
+		status = ice_add_prof_to_lst(hw, blk, &union_lst, hdl);
+		if (status)
+			goto err_ice_add_prof_id_flow;
+
+		/* search for an existing VSIG with an exact charc match */
+		status = ice_find_dup_props_vsig(hw, blk, &union_lst, &vsig);
+		if (!status) {
+			/* move VSI to the VSIG that matches */
+			status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* VSI has been moved out of or_vsig. If the or_vsig had
+			 * only that VSI it is now empty and can be removed.
+			 */
+			if (only_vsi) {
+				status = ice_rem_vsig(hw, blk, or_vsig, &chg);
+				if (status)
+					goto err_ice_add_prof_id_flow;
+			}
+		} else if (only_vsi) {
+			/* If the original VSIG only contains one VSI, then it
+			 * will be the requesting VSI. In this case the VSI is
+			 * not sharing entries and we can simply add the new
+			 * profile to the VSIG.
+			 */
+			status = ice_add_prof_id_vsig(hw, blk, vsig, hdl, false,
+						      &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* Adjust priorities */
+			status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		} else {
+			/* No match, so we need a new VSIG */
+			status = ice_create_vsig_from_lst(hw, blk, vsi,
+							  &union_lst, &vsig,
+							  &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+
+			/* Adjust priorities */
+			status = ice_adj_prof_priorities(hw, blk, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		}
+	} else {
+		/* need to find or add a VSIG */
+		/* search for an existing VSIG with an exact charc match */
+		if (ice_find_prof_vsig(hw, blk, hdl, &vsig)) {
+			/* found an exact match */
+			/* add or move VSI to the VSIG that matches */
+			status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		} else {
+			/* we did not find an exact match */
+			/* we need to add a VSIG */
+			status = ice_create_prof_id_vsig(hw, blk, vsi, hdl,
+							 &chg);
+			if (status)
+				goto err_ice_add_prof_id_flow;
+		}
+	}
+
+	/* update hardware */
+	if (!status)
+		status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_add_prof_id_flow:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	list_for_each_entry_safe(del1, tmp1, &union_lst, list) {
+		list_del(&del1->list);
+		devm_kfree(ice_hw_to_dev(hw), del1);
+	}
+
+	return status;
+}
+
+/**
+ * ice_rem_prof_from_list - remove a profile from list
+ * @hw: pointer to the HW struct
+ * @lst: list to remove the profile from
+ * @hdl: the profile handle indicating the profile to remove
+ */
+static int
+ice_rem_prof_from_list(struct ice_hw *hw, struct list_head *lst, u64 hdl)
+{
+	struct ice_vsig_prof *ent, *tmp;
+
+	list_for_each_entry_safe(ent, tmp, lst, list)
+		if (ent->profile_cookie == hdl) {
+			list_del(&ent->list);
+			devm_kfree(ice_hw_to_dev(hw), ent);
+			return 0;
+		}
+
+	return -ENOENT;
+}
+
+/**
+ * ice_rem_prof_id_flow - remove flow
+ * @hw: pointer to the HW struct
+ * @blk: hardware block
+ * @vsi: the VSI from which to remove the profile specified by ID
+ * @hdl: profile tracking handle
+ *
+ * Calling this function will update the hardware tables to remove the
+ * profile indicated by the ID parameter for the VSIs specified in the VSI
+ * array. Once successfully called, the flow will be disabled.
+ */
+int
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl)
+{
+	struct ice_vsig_prof *tmp1, *del1;
+	struct ice_chs_chg *tmp, *del;
+	struct list_head chg, copy;
+	int status;
+	u16 vsig;
+
+	INIT_LIST_HEAD(&copy);
+	INIT_LIST_HEAD(&chg);
+
+	/* determine if VSI is already part of a VSIG */
+	status = ice_vsig_find_vsi(hw, blk, vsi, &vsig);
+	if (!status && vsig) {
+		bool last_profile;
+		bool only_vsi;
+		u16 ref;
+
+		/* found in VSIG */
+		last_profile = ice_vsig_prof_id_count(hw, blk, vsig) == 1;
+		status = ice_vsig_get_ref(hw, blk, vsig, &ref);
+		if (status)
+			goto err_ice_rem_prof_id_flow;
+		only_vsi = (ref == 1);
+
+		if (only_vsi) {
+			/* If the original VSIG only contains one reference,
+			 * which will be the requesting VSI, then the VSI is not
+			 * sharing entries and we can simply remove the specific
+			 * characteristics from the VSIG.
+			 */
+
+			if (last_profile) {
+				/* If there are no profiles left for this VSIG,
+				 * then simply remove the VSIG.
+				 */
+				status = ice_rem_vsig(hw, blk, vsig, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			} else {
+				status = ice_rem_prof_id_vsig(hw, blk, vsig,
+							      hdl, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+				/* Adjust priorities */
+				status = ice_adj_prof_priorities(hw, blk, vsig,
+								 &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			}
+
+		} else {
+			/* Make a copy of the VSIG's list of Profiles */
+			status = ice_get_profs_vsig(hw, blk, vsig, &copy);
+			if (status)
+				goto err_ice_rem_prof_id_flow;
+
+			/* Remove specified profile entry from the list */
+			status = ice_rem_prof_from_list(hw, &copy, hdl);
+			if (status)
+				goto err_ice_rem_prof_id_flow;
+
+			if (list_empty(&copy)) {
+				status = ice_move_vsi(hw, blk, vsi,
+						      ICE_DEFAULT_VSIG, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+			} else if (!ice_find_dup_props_vsig(hw, blk, &copy,
+							    &vsig)) {
+				/* found an exact match */
+				/* add or move VSI to the VSIG that matches */
+				/* Search for a VSIG with a matching profile
+				 * list
+				 */
+
+				/* Found match, move VSI to the matching VSIG */
+				status = ice_move_vsi(hw, blk, vsi, vsig, &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			} else {
+				/* since no existing VSIG supports this
+				 * characteristic pattern, we need to create a
+				 * new VSIG and TCAM entries
+				 */
+				status = ice_create_vsig_from_lst(hw, blk, vsi,
+								  &copy, &vsig,
+								  &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+
+				/* Adjust priorities */
+				status = ice_adj_prof_priorities(hw, blk, vsig,
+								 &chg);
+				if (status)
+					goto err_ice_rem_prof_id_flow;
+			}
+		}
+	} else {
+		status = -ENOENT;
+	}
+
+	/* update hardware tables */
+	if (!status)
+		status = ice_upd_prof_hw(hw, blk, &chg);
+
+err_ice_rem_prof_id_flow:
+	list_for_each_entry_safe(del, tmp, &chg, list_entry) {
+		list_del(&del->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), del);
+	}
+
+	list_for_each_entry_safe(del1, tmp1, &copy, list) {
+		list_del(&del1->list);
+		devm_kfree(ice_hw_to_dev(hw), del1);
+	}
+
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
new file mode 100644
index 0000000000..7af7c8e9aa
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_PIPE_H_
+#define _ICE_FLEX_PIPE_H_
+
+#include "ice_type.h"
+
+int
+ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access);
+void ice_release_change_lock(struct ice_hw *hw);
+int
+ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx,
+		  u8 *prot, u16 *off);
+void
+ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type type,
+		     unsigned long *bm);
+void
+ice_init_prof_result_bm(struct ice_hw *hw);
+int
+ice_get_sw_fv_list(struct ice_hw *hw, struct ice_prot_lkup_ext *lkups,
+		   unsigned long *bm, struct list_head *fv_list);
+int
+ice_pkg_buf_unreserve_section(struct ice_buf_build *bld, u16 count);
+u16 ice_pkg_buf_get_free_space(struct ice_buf_build *bld);
+int
+ice_aq_upload_section(struct ice_hw *hw, struct ice_buf_hdr *pkg_buf,
+		      u16 buf_size, struct ice_sq_cd *cd);
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port,
+			 enum ice_tunnel_type type);
+int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table,
+			    unsigned int idx, struct udp_tunnel_info *ti);
+int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table,
+			      unsigned int idx, struct udp_tunnel_info *ti);
+int ice_set_dvm_boost_entries(struct ice_hw *hw);
+
+/* Rx parser PTYPE functions */
+bool ice_hw_ptype_ena(struct ice_hw *hw, u16 ptype);
+
+/* XLT2/VSI group functions */
+int
+ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+	     const struct ice_ptype_attributes *attr, u16 attr_cnt,
+	     struct ice_fv_word *es, u16 *masks);
+int
+ice_add_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+int
+ice_rem_prof_id_flow(struct ice_hw *hw, enum ice_block blk, u16 vsi, u64 hdl);
+enum ice_ddp_state ice_init_pkg(struct ice_hw *hw, u8 *buff, u32 len);
+enum ice_ddp_state
+ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, u32 len);
+bool ice_is_init_pkg_successful(enum ice_ddp_state state);
+int ice_init_hw_tbls(struct ice_hw *hw);
+void ice_free_seg(struct ice_hw *hw);
+void ice_fill_blk_tbls(struct ice_hw *hw);
+void ice_clear_hw_tbls(struct ice_hw *hw);
+void ice_free_hw_tbls(struct ice_hw *hw);
+int ice_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 id);
+struct ice_buf_build *
+ice_pkg_buf_alloc_single_section(struct ice_hw *hw, u32 type, u16 size,
+				 void **section);
+struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld);
+void ice_pkg_buf_free(struct ice_hw *hw, struct ice_buf_build *bld);
+
+#endif /* _ICE_FLEX_PIPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
new file mode 100644
index 0000000000..4f42e14ed3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -0,0 +1,383 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLEX_TYPE_H_
+#define _ICE_FLEX_TYPE_H_
+#include "ice_ddp.h"
+
+/* Packet Type (PTYPE) values */
+#define ICE_PTYPE_MAC_PAY		1
+#define ICE_PTYPE_IPV4_PAY		23
+#define ICE_PTYPE_IPV4_UDP_PAY		24
+#define ICE_PTYPE_IPV4_TCP_PAY		26
+#define ICE_PTYPE_IPV4_SCTP_PAY		27
+#define ICE_PTYPE_IPV6_PAY		89
+#define ICE_PTYPE_IPV6_UDP_PAY		90
+#define ICE_PTYPE_IPV6_TCP_PAY		92
+#define ICE_PTYPE_IPV6_SCTP_PAY		93
+#define ICE_MAC_IPV4_ESP		160
+#define ICE_MAC_IPV6_ESP		161
+#define ICE_MAC_IPV4_AH			162
+#define ICE_MAC_IPV6_AH			163
+#define ICE_MAC_IPV4_NAT_T_ESP		164
+#define ICE_MAC_IPV6_NAT_T_ESP		165
+#define ICE_MAC_IPV4_GTPU		329
+#define ICE_MAC_IPV6_GTPU		330
+#define ICE_MAC_IPV4_GTPU_IPV4_FRAG	331
+#define ICE_MAC_IPV4_GTPU_IPV4_PAY	332
+#define ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY	333
+#define ICE_MAC_IPV4_GTPU_IPV4_TCP	334
+#define ICE_MAC_IPV4_GTPU_IPV4_ICMP	335
+#define ICE_MAC_IPV6_GTPU_IPV4_FRAG	336
+#define ICE_MAC_IPV6_GTPU_IPV4_PAY	337
+#define ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY	338
+#define ICE_MAC_IPV6_GTPU_IPV4_TCP	339
+#define ICE_MAC_IPV6_GTPU_IPV4_ICMP	340
+#define ICE_MAC_IPV4_GTPU_IPV6_FRAG	341
+#define ICE_MAC_IPV4_GTPU_IPV6_PAY	342
+#define ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY	343
+#define ICE_MAC_IPV4_GTPU_IPV6_TCP	344
+#define ICE_MAC_IPV4_GTPU_IPV6_ICMPV6	345
+#define ICE_MAC_IPV6_GTPU_IPV6_FRAG	346
+#define ICE_MAC_IPV6_GTPU_IPV6_PAY	347
+#define ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY	348
+#define ICE_MAC_IPV6_GTPU_IPV6_TCP	349
+#define ICE_MAC_IPV6_GTPU_IPV6_ICMPV6	350
+#define ICE_MAC_IPV4_PFCP_SESSION	352
+#define ICE_MAC_IPV6_PFCP_SESSION	354
+#define ICE_MAC_IPV4_L2TPV3		360
+#define ICE_MAC_IPV6_L2TPV3		361
+
+/* Attributes that can modify PTYPE definitions.
+ *
+ * These values will represent special attributes for PTYPEs, which will
+ * resolve into metadata packet flags definitions that can be used in the TCAM
+ * for identifying a PTYPE with specific characteristics.
+ */
+enum ice_ptype_attrib_type {
+	/* GTP PTYPEs */
+	ICE_PTYPE_ATTR_GTP_PDU_EH,
+	ICE_PTYPE_ATTR_GTP_SESSION,
+	ICE_PTYPE_ATTR_GTP_DOWNLINK,
+	ICE_PTYPE_ATTR_GTP_UPLINK,
+};
+
+struct ice_ptype_attrib_info {
+	u16 flags;
+	u16 mask;
+};
+
+/* TCAM flag definitions */
+#define ICE_GTP_PDU			BIT(14)
+#define ICE_GTP_PDU_LINK		BIT(13)
+
+/* GTP attributes */
+#define ICE_GTP_PDU_FLAG_MASK		(ICE_GTP_PDU)
+#define ICE_GTP_PDU_EH			ICE_GTP_PDU
+
+#define ICE_GTP_FLAGS_MASK		(ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+#define ICE_GTP_SESSION			0
+#define ICE_GTP_DOWNLINK		ICE_GTP_PDU
+#define ICE_GTP_UPLINK			(ICE_GTP_PDU | ICE_GTP_PDU_LINK)
+
+struct ice_ptype_attributes {
+	u16 ptype;
+	enum ice_ptype_attrib_type attrib;
+};
+
+/* Tunnel enabling */
+
+enum ice_tunnel_type {
+	TNL_VXLAN = 0,
+	TNL_GENEVE,
+	TNL_GRETAP,
+	TNL_GTPC,
+	TNL_GTPU,
+	__TNL_TYPE_CNT,
+	TNL_LAST = 0xFF,
+	TNL_ALL = 0xFF,
+};
+
+struct ice_tunnel_type_scan {
+	enum ice_tunnel_type type;
+	const char *label_prefix;
+};
+
+struct ice_tunnel_entry {
+	enum ice_tunnel_type type;
+	u16 boost_addr;
+	u16 port;
+	struct ice_boost_tcam_entry *boost_entry;
+	u8 valid;
+};
+
+#define ICE_TUNNEL_MAX_ENTRIES	16
+
+struct ice_tunnel_table {
+	struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES];
+	u16 count;
+	u16 valid_count[__TNL_TYPE_CNT];
+};
+
+struct ice_dvm_entry {
+	u16 boost_addr;
+	u16 enable;
+	struct ice_boost_tcam_entry *boost_entry;
+};
+
+#define ICE_DVM_MAX_ENTRIES	48
+
+struct ice_dvm_table {
+	struct ice_dvm_entry tbl[ICE_DVM_MAX_ENTRIES];
+	u16 count;
+};
+
+struct ice_pkg_es {
+	__le16 count;
+	__le16 offset;
+	struct ice_fv_word es[];
+};
+
+struct ice_es {
+	u32 sid;
+	u16 count;
+	u16 fvw;
+	u16 *ref_count;
+	u32 *mask_ena;
+	struct list_head prof_map;
+	struct ice_fv_word *t;
+	struct mutex prof_map_lock;	/* protect access to profiles list */
+	u8 *written;
+	u8 reverse; /* set to true to reverse FV order */
+};
+
+/* PTYPE Group management */
+
+/* Note: XLT1 table takes 13-bit as input, and results in an 8-bit packet type
+ * group (PTG) ID as output.
+ *
+ * Note: PTG 0 is the default packet type group and it is assumed that all PTYPE
+ * are a part of this group until moved to a new PTG.
+ */
+#define ICE_DEFAULT_PTG	0
+
+struct ice_ptg_entry {
+	struct ice_ptg_ptype *first_ptype;
+	u8 in_use;
+};
+
+struct ice_ptg_ptype {
+	struct ice_ptg_ptype *next_ptype;
+	u8 ptg;
+};
+
+#define ICE_MAX_TCAM_PER_PROFILE	32
+#define ICE_MAX_PTG_PER_PROFILE		32
+
+struct ice_prof_map {
+	struct list_head list;
+	u64 profile_cookie;
+	u64 context;
+	u8 prof_id;
+	u8 ptg_cnt;
+	u8 ptg[ICE_MAX_PTG_PER_PROFILE];
+	struct ice_ptype_attrib_info attr[ICE_MAX_PTG_PER_PROFILE];
+};
+
+#define ICE_INVALID_TCAM	0xFFFF
+
+struct ice_tcam_inf {
+	u16 tcam_idx;
+	struct ice_ptype_attrib_info attr;
+	u8 ptg;
+	u8 prof_id;
+	u8 in_use;
+};
+
+struct ice_vsig_prof {
+	struct list_head list;
+	u64 profile_cookie;
+	u8 prof_id;
+	u8 tcam_count;
+	struct ice_tcam_inf tcam[ICE_MAX_TCAM_PER_PROFILE];
+};
+
+struct ice_vsig_entry {
+	struct list_head prop_lst;
+	struct ice_vsig_vsi *first_vsi;
+	u8 in_use;
+};
+
+struct ice_vsig_vsi {
+	struct ice_vsig_vsi *next_vsi;
+	u32 prop_mask;
+	u16 changed;
+	u16 vsig;
+};
+
+#define ICE_XLT1_CNT	1024
+#define ICE_MAX_PTGS	256
+
+/* XLT1 Table */
+struct ice_xlt1 {
+	struct ice_ptg_entry *ptg_tbl;
+	struct ice_ptg_ptype *ptypes;
+	u8 *t;
+	u32 sid;
+	u16 count;
+};
+
+#define ICE_XLT2_CNT	768
+#define ICE_MAX_VSIGS	768
+
+/* VSIG bit layout:
+ * [0:12]: incremental VSIG index 1 to ICE_MAX_VSIGS
+ * [13:15]: PF number of device
+ */
+#define ICE_VSIG_IDX_M	(0x1FFF)
+#define ICE_PF_NUM_S	13
+#define ICE_PF_NUM_M	(0x07 << ICE_PF_NUM_S)
+#define ICE_VSIG_VALUE(vsig, pf_id) \
+	((u16)((((u16)(vsig)) & ICE_VSIG_IDX_M) | \
+	       (((u16)(pf_id) << ICE_PF_NUM_S) & ICE_PF_NUM_M)))
+#define ICE_DEFAULT_VSIG	0
+
+/* XLT2 Table */
+struct ice_xlt2 {
+	struct ice_vsig_entry *vsig_tbl;
+	struct ice_vsig_vsi *vsis;
+	u16 *t;
+	u32 sid;
+	u16 count;
+};
+
+/* Profile ID Management */
+struct ice_prof_id_key {
+	__le16 flags;
+	u8 xlt1;
+	__le16 xlt2_cdid;
+} __packed;
+
+/* Keys are made up of two values, each one-half the size of the key.
+ * For TCAM, the entire key is 80 bits wide (or 2, 40-bit wide values)
+ */
+#define ICE_TCAM_KEY_VAL_SZ	5
+#define ICE_TCAM_KEY_SZ		(2 * ICE_TCAM_KEY_VAL_SZ)
+
+struct ice_prof_tcam_entry {
+	__le16 addr;
+	u8 key[ICE_TCAM_KEY_SZ];
+	u8 prof_id;
+} __packed;
+
+struct ice_prof_id_section {
+	__le16 count;
+	struct ice_prof_tcam_entry entry[];
+};
+
+struct ice_prof_tcam {
+	u32 sid;
+	u16 count;
+	u16 max_prof_id;
+	struct ice_prof_tcam_entry *t;
+	u8 cdid_bits; /* # CDID bits to use in key, 0, 2, 4, or 8 */
+};
+
+struct ice_prof_redir {
+	u8 *t;
+	u32 sid;
+	u16 count;
+};
+
+struct ice_mask {
+	u16 mask;	/* 16-bit mask */
+	u16 idx;	/* index */
+	u16 ref;	/* reference count */
+	u8 in_use;	/* non-zero if used */
+};
+
+struct ice_masks {
+	struct mutex lock; /* lock to protect this structure */
+	u16 first;	/* first mask owned by the PF */
+	u16 count;	/* number of masks owned by the PF */
+#define ICE_PROF_MASK_COUNT 32
+	struct ice_mask masks[ICE_PROF_MASK_COUNT];
+};
+
+/* Tables per block */
+struct ice_blk_info {
+	struct ice_xlt1 xlt1;
+	struct ice_xlt2 xlt2;
+	struct ice_prof_tcam prof;
+	struct ice_prof_redir prof_redir;
+	struct ice_es es;
+	struct ice_masks masks;
+	u8 overwrite; /* set to true to allow overwrite of table entries */
+	u8 is_list_init;
+};
+
+enum ice_chg_type {
+	ICE_TCAM_NONE = 0,
+	ICE_PTG_ES_ADD,
+	ICE_TCAM_ADD,
+	ICE_VSIG_ADD,
+	ICE_VSIG_REM,
+	ICE_VSI_MOVE,
+};
+
+struct ice_chs_chg {
+	struct list_head list_entry;
+	enum ice_chg_type type;
+
+	u8 add_ptg;
+	u8 add_vsig;
+	u8 add_tcam_idx;
+	u8 add_prof;
+	u16 ptype;
+	u8 ptg;
+	u8 prof_id;
+	u16 vsi;
+	u16 vsig;
+	u16 orig_vsig;
+	u16 tcam_idx;
+	struct ice_ptype_attrib_info attr;
+};
+
+#define ICE_FLOW_PTYPE_MAX		ICE_XLT1_CNT
+
+enum ice_prof_type {
+	ICE_PROF_NON_TUN = 0x1,
+	ICE_PROF_TUN_UDP = 0x2,
+	ICE_PROF_TUN_GRE = 0x4,
+	ICE_PROF_TUN_GTPU = 0x8,
+	ICE_PROF_TUN_GTPC = 0x10,
+	ICE_PROF_TUN_ALL = 0x1E,
+	ICE_PROF_ALL = 0xFF,
+};
+
+/* Number of bits/bytes contained in meta init entry. Note, this should be a
+ * multiple of 32 bits.
+ */
+#define ICE_META_INIT_BITS	192
+#define ICE_META_INIT_DW_CNT	(ICE_META_INIT_BITS / (sizeof(__le32) * \
+				 BITS_PER_BYTE))
+
+/* The meta init Flag field starts at this bit */
+#define ICE_META_FLAGS_ST		123
+
+/* The entry and bit to check for Double VLAN Mode (DVM) support */
+#define ICE_META_VLAN_MODE_ENTRY	0
+#define ICE_META_FLAG_VLAN_MODE		60
+#define ICE_META_VLAN_MODE_BIT		(ICE_META_FLAGS_ST + \
+					 ICE_META_FLAG_VLAN_MODE)
+
+struct ice_meta_init_entry {
+	__le32 bm[ICE_META_INIT_DW_CNT];
+};
+
+struct ice_meta_init_section {
+	__le16 count;
+	__le16 offset;
+	struct ice_meta_init_entry entry;
+};
+#endif /* _ICE_FLEX_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
new file mode 100644
index 0000000000..85cca572c2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -0,0 +1,2457 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_flow.h"
+#include <net/gre.h>
+
+/* Describe properties of a protocol header field */
+struct ice_flow_field_info {
+	enum ice_flow_seg_hdr hdr;
+	s16 off;	/* Offset from start of a protocol header, in bits */
+	u16 size;	/* Size of fields in bits */
+	u16 mask;	/* 16-bit mask for field */
+};
+
+#define ICE_FLOW_FLD_INFO(_hdr, _offset_bytes, _size_bytes) { \
+	.hdr = _hdr, \
+	.off = (_offset_bytes) * BITS_PER_BYTE, \
+	.size = (_size_bytes) * BITS_PER_BYTE, \
+	.mask = 0, \
+}
+
+#define ICE_FLOW_FLD_INFO_MSK(_hdr, _offset_bytes, _size_bytes, _mask) { \
+	.hdr = _hdr, \
+	.off = (_offset_bytes) * BITS_PER_BYTE, \
+	.size = (_size_bytes) * BITS_PER_BYTE, \
+	.mask = _mask, \
+}
+
+/* Table containing properties of supported protocol header fields */
+static const
+struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
+	/* Ether */
+	/* ICE_FLOW_FIELD_IDX_ETH_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ETH_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, ETH_ALEN, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_S_VLAN */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 12, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_C_VLAN */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_VLAN, 14, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_ETH_TYPE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ETH, 0, sizeof(__be16)),
+	/* IPv4 / IPv6 */
+	/* ICE_FLOW_FIELD_IDX_IPV4_DSCP */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV4, 0, 1, 0x00fc),
+	/* ICE_FLOW_FIELD_IDX_IPV6_DSCP */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_IPV6, 0, 1, 0x0ff0),
+	/* ICE_FLOW_FIELD_IDX_IPV4_TTL */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0xff00),
+	/* ICE_FLOW_FIELD_IDX_IPV4_PROT */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 8, 1, 0x00ff),
+	/* ICE_FLOW_FIELD_IDX_IPV6_TTL */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0x00ff),
+	/* ICE_FLOW_FIELD_IDX_IPV6_PROT */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_NONE, 6, 1, 0xff00),
+	/* ICE_FLOW_FIELD_IDX_IPV4_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 12, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV4_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV4, 16, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV6_SA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 8, sizeof(struct in6_addr)),
+	/* ICE_FLOW_FIELD_IDX_IPV6_DA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_IPV6, 24, sizeof(struct in6_addr)),
+	/* Transport */
+	/* ICE_FLOW_FIELD_IDX_TCP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_TCP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_UDP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_UDP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_UDP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)),
+	/* ICE_FLOW_FIELD_IDX_TCP_FLAGS */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_TCP, 13, 1),
+	/* ARP */
+	/* ICE_FLOW_FIELD_IDX_ARP_SIP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 14, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_ARP_DIP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 24, sizeof(struct in_addr)),
+	/* ICE_FLOW_FIELD_IDX_ARP_SHA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 8, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ARP_DHA */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 18, ETH_ALEN),
+	/* ICE_FLOW_FIELD_IDX_ARP_OP */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ARP, 6, sizeof(__be16)),
+	/* ICMP */
+	/* ICE_FLOW_FIELD_IDX_ICMP_TYPE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 0, 1),
+	/* ICE_FLOW_FIELD_IDX_ICMP_CODE */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ICMP, 1, 1),
+	/* GRE */
+	/* ICE_FLOW_FIELD_IDX_GRE_KEYID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12,
+			  sizeof_field(struct gre_full_hdr, key)),
+	/* GTP */
+	/* ICE_FLOW_FIELD_IDX_GTPC_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPC_TEID, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_IP_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_IP, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_EH_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_EH, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_EH_QFI */
+	ICE_FLOW_FLD_INFO_MSK(ICE_FLOW_SEG_HDR_GTPU_EH, 22, sizeof(__be16),
+			      0x3f00),
+	/* ICE_FLOW_FIELD_IDX_GTPU_UP_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_UP, 12, sizeof(__be32)),
+	/* ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GTPU_DWN, 12, sizeof(__be32)),
+	/* PPPoE */
+	/* ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PPPOE, 2, sizeof(__be16)),
+	/* PFCP */
+	/* ICE_FLOW_FIELD_IDX_PFCP_SEID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_PFCP_SESSION, 12, sizeof(__be64)),
+	/* L2TPv3 */
+	/* ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_L2TPV3, 0, sizeof(__be32)),
+	/* ESP */
+	/* ICE_FLOW_FIELD_IDX_ESP_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_ESP, 0, sizeof(__be32)),
+	/* AH */
+	/* ICE_FLOW_FIELD_IDX_AH_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_AH, 4, sizeof(__be32)),
+	/* NAT_T_ESP */
+	/* ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_NAT_T_ESP, 8, sizeof(__be32)),
+};
+
+/* Bitmaps indicating relevant packet types for a particular protocol header
+ *
+ * Packet types for packets with an Outer/First/Single MAC header
+ */
+static const u32 ice_ptypes_mac_ofos[] = {
+	0xFDC00846, 0xBFBF7F7E, 0xF70001DF, 0xFEFDFDFB,
+	0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+	0x00400000, 0x03FFF000, 0x7FFFFFE0, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last MAC VLAN header */
+static const u32 ice_ptypes_macvlan_il[] = {
+	0x00000000, 0xBC000000, 0x000001DF, 0xF0000000,
+	0x0000077E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header, does NOT
+ * include IPv4 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv4_ofos[] = {
+	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000155, 0x00000000, 0x00000000,
+	0x00000000, 0x000FC000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header, includes
+ * IPv4 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv4_ofos_all[] = {
+	0x1DC00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000155, 0x00000000, 0x00000000,
+	0x00000000, 0x000FC000, 0x83E0F800, 0x00000101,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header */
+static const u32 ice_ptypes_ipv4_il[] = {
+	0xE0000000, 0xB807700E, 0x80000003, 0xE01DC03B,
+	0x0000000E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x001FF800, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header, does NOT
+ * include IPv6 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv6_ofos[] = {
+	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+	0x00000000, 0x03F00000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header, includes
+ * IPv6 other PTYPEs
+ */
+static const u32 ice_ptypes_ipv6_ofos_all[] = {
+	0x00000000, 0x00000000, 0x77000000, 0x10002000,
+	0x00000000, 0x000002AA, 0x00000000, 0x00000000,
+	0x00080F00, 0x03F00000, 0x7C1F0000, 0x00000206,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header */
+static const u32 ice_ptypes_ipv6_il[] = {
+	0x00000000, 0x03B80770, 0x000001DC, 0x0EE00000,
+	0x00000770, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x7FE00000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv4 header - no L4 */
+static const u32 ice_ptypes_ipv4_ofos_no_l4[] = {
+	0x10C00000, 0x04000800, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First ARP header */
+static const u32 ice_ptypes_arp_of[] = {
+	0x00000800, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv4 header - no L4 */
+static const u32 ice_ptypes_ipv4_il_no_l4[] = {
+	0x60000000, 0x18043008, 0x80000002, 0x6010c021,
+	0x00000008, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outer/First/Single IPv6 header - no L4 */
+static const u32 ice_ptypes_ipv6_ofos_no_l4[] = {
+	0x00000000, 0x00000000, 0x43000000, 0x10002000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last IPv6 header - no L4 */
+static const u32 ice_ptypes_ipv6_il_no_l4[] = {
+	0x00000000, 0x02180430, 0x0000010c, 0x086010c0,
+	0x00000430, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* UDP Packet types for non-tunneled packets or tunneled
+ * packets with inner UDP.
+ */
+static const u32 ice_ptypes_udp_il[] = {
+	0x81000000, 0x20204040, 0x04000010, 0x80810102,
+	0x00000040, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00410000, 0x90842000, 0x00000007,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last TCP header */
+static const u32 ice_ptypes_tcp_il[] = {
+	0x04000000, 0x80810102, 0x10000040, 0x02040408,
+	0x00000102, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00820000, 0x21084000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last SCTP header */
+static const u32 ice_ptypes_sctp_il[] = {
+	0x08000000, 0x01020204, 0x20000081, 0x04080810,
+	0x00000204, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x01040000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First ICMP header */
+static const u32 ice_ptypes_icmp_of[] = {
+	0x10000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last ICMP header */
+static const u32 ice_ptypes_icmp_il[] = {
+	0x00000000, 0x02040408, 0x40000102, 0x08101020,
+	0x00000408, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x42108000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Outermost/First GRE header */
+static const u32 ice_ptypes_gre_of[] = {
+	0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000,
+	0x0000017E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with an Innermost/Last MAC header */
+static const u32 ice_ptypes_mac_il[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPC */
+static const u32 ice_ptypes_gtpc[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000180, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPC with TEID */
+static const u32 ice_ptypes_gtpc_tid[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000060, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for GTPU */
+static const struct ice_ptype_attributes ice_attr_gtpu_eh[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_PDU_EH },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_PDU_EH },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_down[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_DOWNLINK },
+};
+
+static const struct ice_ptype_attributes ice_attr_gtpu_up[] = {
+	{ ICE_MAC_IPV4_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV4_ICMP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV4_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_FRAG,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_PAY,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_UDP_PAY, ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_TCP,	  ICE_PTYPE_ATTR_GTP_UPLINK },
+	{ ICE_MAC_IPV6_GTPU_IPV6_ICMPV6,  ICE_PTYPE_ATTR_GTP_UPLINK },
+};
+
+static const u32 ice_ptypes_gtpu[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x7FFFFE00, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for PPPoE */
+static const u32 ice_ptypes_pppoe[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x03ffe000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP NODE header */
+static const u32 ice_ptypes_pfcp_node[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x80000000, 0x00000002,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with PFCP SESSION header */
+static const u32 ice_ptypes_pfcp_session[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000005,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for L2TPv3 */
+static const u32 ice_ptypes_l2tpv3[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000300,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for ESP */
+static const u32 ice_ptypes_esp[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000003, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for AH */
+static const u32 ice_ptypes_ah[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x0000000C, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Packet types for packets with NAT_T ESP header */
+static const u32 ice_ptypes_nat_t_esp[] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000030, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+static const u32 ice_ptypes_mac_non_ip_ofos[] = {
+	0x00000846, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00400000, 0x03FFF000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/* Manage parameters and info. used during the creation of a flow profile */
+struct ice_flow_prof_params {
+	enum ice_block blk;
+	u16 entry_length; /* # of bytes formatted entry will require */
+	u8 es_cnt;
+	struct ice_flow_prof *prof;
+
+	/* For ACL, the es[0] will have the data of ICE_RX_MDID_PKT_FLAGS_15_0
+	 * This will give us the direction flags.
+	 */
+	struct ice_fv_word es[ICE_MAX_FV_WORDS];
+	/* attributes can be used to add attributes to a particular PTYPE */
+	const struct ice_ptype_attributes *attr;
+	u16 attr_cnt;
+
+	u16 mask[ICE_MAX_FV_WORDS];
+	DECLARE_BITMAP(ptypes, ICE_FLOW_PTYPE_MAX);
+};
+
+#define ICE_FLOW_RSS_HDRS_INNER_MASK \
+	(ICE_FLOW_SEG_HDR_PPPOE | ICE_FLOW_SEG_HDR_GTPC | \
+	ICE_FLOW_SEG_HDR_GTPC_TEID | ICE_FLOW_SEG_HDR_GTPU | \
+	ICE_FLOW_SEG_HDR_PFCP_SESSION | ICE_FLOW_SEG_HDR_L2TPV3 | \
+	ICE_FLOW_SEG_HDR_ESP | ICE_FLOW_SEG_HDR_AH | \
+	ICE_FLOW_SEG_HDR_NAT_T_ESP)
+
+#define ICE_FLOW_SEG_HDRS_L3_MASK	\
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6 | ICE_FLOW_SEG_HDR_ARP)
+#define ICE_FLOW_SEG_HDRS_L4_MASK	\
+	(ICE_FLOW_SEG_HDR_ICMP | ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | \
+	 ICE_FLOW_SEG_HDR_SCTP)
+/* mask for L4 protocols that are NOT part of IPv4/6 OTHER PTYPE groups */
+#define ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER	\
+	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+/**
+ * ice_flow_val_hdrs - validates packet segments for valid protocol headers
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ */
+static int ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
+{
+	u8 i;
+
+	for (i = 0; i < segs_cnt; i++) {
+		/* Multiple L3 headers */
+		if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK &&
+		    !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L3_MASK))
+			return -EINVAL;
+
+		/* Multiple L4 headers */
+		if (segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK &&
+		    !is_power_of_2(segs[i].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Sizes of fixed known protocol headers without header options */
+#define ICE_FLOW_PROT_HDR_SZ_MAC	14
+#define ICE_FLOW_PROT_HDR_SZ_MAC_VLAN	(ICE_FLOW_PROT_HDR_SZ_MAC + 2)
+#define ICE_FLOW_PROT_HDR_SZ_IPV4	20
+#define ICE_FLOW_PROT_HDR_SZ_IPV6	40
+#define ICE_FLOW_PROT_HDR_SZ_ARP	28
+#define ICE_FLOW_PROT_HDR_SZ_ICMP	8
+#define ICE_FLOW_PROT_HDR_SZ_TCP	20
+#define ICE_FLOW_PROT_HDR_SZ_UDP	8
+#define ICE_FLOW_PROT_HDR_SZ_SCTP	12
+
+/**
+ * ice_flow_calc_seg_sz - calculates size of a packet segment based on headers
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose header size is to be determined
+ */
+static u16 ice_flow_calc_seg_sz(struct ice_flow_prof_params *params, u8 seg)
+{
+	u16 sz;
+
+	/* L2 headers */
+	sz = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_VLAN) ?
+		ICE_FLOW_PROT_HDR_SZ_MAC_VLAN : ICE_FLOW_PROT_HDR_SZ_MAC;
+
+	/* L3 headers */
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4)
+		sz += ICE_FLOW_PROT_HDR_SZ_IPV4;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV6)
+		sz += ICE_FLOW_PROT_HDR_SZ_IPV6;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ARP)
+		sz += ICE_FLOW_PROT_HDR_SZ_ARP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDRS_L4_MASK)
+		/* An L3 header is required if L4 is specified */
+		return 0;
+
+	/* L4 headers */
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_ICMP)
+		sz += ICE_FLOW_PROT_HDR_SZ_ICMP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
+		sz += ICE_FLOW_PROT_HDR_SZ_TCP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_UDP)
+		sz += ICE_FLOW_PROT_HDR_SZ_UDP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_SCTP)
+		sz += ICE_FLOW_PROT_HDR_SZ_SCTP;
+
+	return sz;
+}
+
+/**
+ * ice_flow_proc_seg_hdrs - process protocol headers present in pkt segments
+ * @params: information about the flow to be processed
+ *
+ * This function identifies the packet types associated with the protocol
+ * headers being present in packet segments of the specified flow profile.
+ */
+static int ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
+{
+	struct ice_flow_prof *prof;
+	u8 i;
+
+	memset(params->ptypes, 0xff, sizeof(params->ptypes));
+
+	prof = params->prof;
+
+	for (i = 0; i < params->prof->segs_cnt; i++) {
+		const unsigned long *src;
+		u32 hdrs;
+
+		hdrs = prof->segs[i].hdrs;
+
+		if (hdrs & ICE_FLOW_SEG_HDR_ETH) {
+			src = !i ? (const unsigned long *)ice_ptypes_mac_ofos :
+				(const unsigned long *)ice_ptypes_mac_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (i && hdrs & ICE_FLOW_SEG_HDR_VLAN) {
+			src = (const unsigned long *)ice_ptypes_macvlan_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (!i && hdrs & ICE_FLOW_SEG_HDR_ARP) {
+			bitmap_and(params->ptypes, params->ptypes,
+				   (const unsigned long *)ice_ptypes_arp_of,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+		    (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+			src = i ? (const unsigned long *)ice_ptypes_ipv4_il :
+				(const unsigned long *)ice_ptypes_ipv4_ofos_all;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+			   (hdrs & ICE_FLOW_SEG_HDR_IPV_OTHER)) {
+			src = i ? (const unsigned long *)ice_ptypes_ipv6_il :
+				(const unsigned long *)ice_ptypes_ipv6_ofos_all;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV4) &&
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos_no_l4 :
+				(const unsigned long *)ice_ptypes_ipv4_il_no_l4;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV4) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv4_ofos :
+				(const unsigned long *)ice_ptypes_ipv4_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if ((hdrs & ICE_FLOW_SEG_HDR_IPV6) &&
+			   !(hdrs & ICE_FLOW_SEG_HDRS_L4_MASK_NO_OTHER)) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos_no_l4 :
+				(const unsigned long *)ice_ptypes_ipv6_il_no_l4;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_IPV6) {
+			src = !i ? (const unsigned long *)ice_ptypes_ipv6_ofos :
+				(const unsigned long *)ice_ptypes_ipv6_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_ETH_NON_IP) {
+			src = (const unsigned long *)ice_ptypes_mac_non_ip_ofos;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_PPPOE) {
+			src = (const unsigned long *)ice_ptypes_pppoe;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else {
+			src = (const unsigned long *)ice_ptypes_pppoe;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_UDP) {
+			src = (const unsigned long *)ice_ptypes_udp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_TCP) {
+			bitmap_and(params->ptypes, params->ptypes,
+				   (const unsigned long *)ice_ptypes_tcp_il,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_SCTP) {
+			src = (const unsigned long *)ice_ptypes_sctp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_ICMP) {
+			src = !i ? (const unsigned long *)ice_ptypes_icmp_of :
+				(const unsigned long *)ice_ptypes_icmp_il;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GRE) {
+			if (!i) {
+				src = (const unsigned long *)ice_ptypes_gre_of;
+				bitmap_and(params->ptypes, params->ptypes,
+					   src, ICE_FLOW_PTYPE_MAX);
+			}
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPC) {
+			src = (const unsigned long *)ice_ptypes_gtpc;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPC_TEID) {
+			src = (const unsigned long *)ice_ptypes_gtpc_tid;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_DWN) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with downlink */
+			params->attr = ice_attr_gtpu_down;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_down);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_UP) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with uplink */
+			params->attr = ice_attr_gtpu_up;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_up);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_EH) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+
+			/* Attributes for GTP packet with Extension Header */
+			params->attr = ice_attr_gtpu_eh;
+			params->attr_cnt = ARRAY_SIZE(ice_attr_gtpu_eh);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GTPU_IP) {
+			src = (const unsigned long *)ice_ptypes_gtpu;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_L2TPV3) {
+			src = (const unsigned long *)ice_ptypes_l2tpv3;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_ESP) {
+			src = (const unsigned long *)ice_ptypes_esp;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_AH) {
+			src = (const unsigned long *)ice_ptypes_ah;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_NAT_T_ESP) {
+			src = (const unsigned long *)ice_ptypes_nat_t_esp;
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		}
+
+		if (hdrs & ICE_FLOW_SEG_HDR_PFCP) {
+			if (hdrs & ICE_FLOW_SEG_HDR_PFCP_NODE)
+				src = (const unsigned long *)ice_ptypes_pfcp_node;
+			else
+				src = (const unsigned long *)ice_ptypes_pfcp_session;
+
+			bitmap_and(params->ptypes, params->ptypes, src,
+				   ICE_FLOW_PTYPE_MAX);
+		} else {
+			src = (const unsigned long *)ice_ptypes_pfcp_node;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
+
+			src = (const unsigned long *)ice_ptypes_pfcp_session;
+			bitmap_andnot(params->ptypes, params->ptypes, src,
+				      ICE_FLOW_PTYPE_MAX);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_xtract_fld - Create an extraction sequence entry for the given field
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: packet segment index of the field to be extracted
+ * @fld: ID of field to be extracted
+ * @match: bit field of all fields
+ *
+ * This function determines the protocol ID, offset, and size of the given
+ * field. It then allocates one or more extraction sequence entries for the
+ * given field, and fill the entries with protocol ID and offset information.
+ */
+static int
+ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
+		    u8 seg, enum ice_flow_field fld, u64 match)
+{
+	enum ice_flow_field sib = ICE_FLOW_FIELD_IDX_MAX;
+	enum ice_prot_id prot_id = ICE_PROT_ID_INVAL;
+	u8 fv_words = hw->blk[params->blk].es.fvw;
+	struct ice_flow_fld_info *flds;
+	u16 cnt, ese_bits, i;
+	u16 sib_mask = 0;
+	u16 mask;
+	u16 off;
+
+	flds = params->prof->segs[seg].fields;
+
+	switch (fld) {
+	case ICE_FLOW_FIELD_IDX_ETH_DA:
+	case ICE_FLOW_FIELD_IDX_ETH_SA:
+	case ICE_FLOW_FIELD_IDX_S_VLAN:
+	case ICE_FLOW_FIELD_IDX_C_VLAN:
+		prot_id = seg == 0 ? ICE_PROT_MAC_OF_OR_S : ICE_PROT_MAC_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_ETH_TYPE:
+		prot_id = seg == 0 ? ICE_PROT_ETYPE_OL : ICE_PROT_ETYPE_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV4_DSCP:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_DSCP:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV4_TTL:
+	case ICE_FLOW_FIELD_IDX_IPV4_PROT:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+
+		/* TTL and PROT share the same extraction seq. entry.
+		 * Each is considered a sibling to the other in terms of sharing
+		 * the same extraction sequence entry.
+		 */
+		if (fld == ICE_FLOW_FIELD_IDX_IPV4_TTL)
+			sib = ICE_FLOW_FIELD_IDX_IPV4_PROT;
+		else if (fld == ICE_FLOW_FIELD_IDX_IPV4_PROT)
+			sib = ICE_FLOW_FIELD_IDX_IPV4_TTL;
+
+		/* If the sibling field is also included, that field's
+		 * mask needs to be included.
+		 */
+		if (match & BIT(sib))
+			sib_mask = ice_flds_info[sib].mask;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_TTL:
+	case ICE_FLOW_FIELD_IDX_IPV6_PROT:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+
+		/* TTL and PROT share the same extraction seq. entry.
+		 * Each is considered a sibling to the other in terms of sharing
+		 * the same extraction sequence entry.
+		 */
+		if (fld == ICE_FLOW_FIELD_IDX_IPV6_TTL)
+			sib = ICE_FLOW_FIELD_IDX_IPV6_PROT;
+		else if (fld == ICE_FLOW_FIELD_IDX_IPV6_PROT)
+			sib = ICE_FLOW_FIELD_IDX_IPV6_TTL;
+
+		/* If the sibling field is also included, that field's
+		 * mask needs to be included.
+		 */
+		if (match & BIT(sib))
+			sib_mask = ice_flds_info[sib].mask;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV4_SA:
+	case ICE_FLOW_FIELD_IDX_IPV4_DA:
+		prot_id = seg == 0 ? ICE_PROT_IPV4_OF_OR_S : ICE_PROT_IPV4_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_IPV6_SA:
+	case ICE_FLOW_FIELD_IDX_IPV6_DA:
+		prot_id = seg == 0 ? ICE_PROT_IPV6_OF_OR_S : ICE_PROT_IPV6_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_TCP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_TCP_DST_PORT:
+	case ICE_FLOW_FIELD_IDX_TCP_FLAGS:
+		prot_id = ICE_PROT_TCP_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_UDP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_UDP_DST_PORT:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT:
+	case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
+		prot_id = ICE_PROT_SCTP_IL;
+		break;
+	case ICE_FLOW_FIELD_IDX_GTPC_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_IP_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_UP_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_EH_TEID:
+	case ICE_FLOW_FIELD_IDX_GTPU_EH_QFI:
+		/* GTP is accessed through UDP OF protocol */
+		prot_id = ICE_PROT_UDP_OF;
+		break;
+	case ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID:
+		prot_id = ICE_PROT_PPPOE;
+		break;
+	case ICE_FLOW_FIELD_IDX_PFCP_SEID:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID:
+		prot_id = ICE_PROT_L2TPV3;
+		break;
+	case ICE_FLOW_FIELD_IDX_ESP_SPI:
+		prot_id = ICE_PROT_ESP_F;
+		break;
+	case ICE_FLOW_FIELD_IDX_AH_SPI:
+		prot_id = ICE_PROT_ESP_2;
+		break;
+	case ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI:
+		prot_id = ICE_PROT_UDP_IL_OR_S;
+		break;
+	case ICE_FLOW_FIELD_IDX_ARP_SIP:
+	case ICE_FLOW_FIELD_IDX_ARP_DIP:
+	case ICE_FLOW_FIELD_IDX_ARP_SHA:
+	case ICE_FLOW_FIELD_IDX_ARP_DHA:
+	case ICE_FLOW_FIELD_IDX_ARP_OP:
+		prot_id = ICE_PROT_ARP_OF;
+		break;
+	case ICE_FLOW_FIELD_IDX_ICMP_TYPE:
+	case ICE_FLOW_FIELD_IDX_ICMP_CODE:
+		/* ICMP type and code share the same extraction seq. entry */
+		prot_id = (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4) ?
+				ICE_PROT_ICMP_IL : ICE_PROT_ICMPV6_IL;
+		sib = fld == ICE_FLOW_FIELD_IDX_ICMP_TYPE ?
+			ICE_FLOW_FIELD_IDX_ICMP_CODE :
+			ICE_FLOW_FIELD_IDX_ICMP_TYPE;
+		break;
+	case ICE_FLOW_FIELD_IDX_GRE_KEYID:
+		prot_id = ICE_PROT_GRE_OF;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* Each extraction sequence entry is a word in size, and extracts a
+	 * word-aligned offset from a protocol header.
+	 */
+	ese_bits = ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE;
+
+	flds[fld].xtrct.prot_id = prot_id;
+	flds[fld].xtrct.off = (ice_flds_info[fld].off / ese_bits) *
+		ICE_FLOW_FV_EXTRACT_SZ;
+	flds[fld].xtrct.disp = (u8)(ice_flds_info[fld].off % ese_bits);
+	flds[fld].xtrct.idx = params->es_cnt;
+	flds[fld].xtrct.mask = ice_flds_info[fld].mask;
+
+	/* Adjust the next field-entry index after accommodating the number of
+	 * entries this field consumes
+	 */
+	cnt = DIV_ROUND_UP(flds[fld].xtrct.disp + ice_flds_info[fld].size,
+			   ese_bits);
+
+	/* Fill in the extraction sequence entries needed for this field */
+	off = flds[fld].xtrct.off;
+	mask = flds[fld].xtrct.mask;
+	for (i = 0; i < cnt; i++) {
+		/* Only consume an extraction sequence entry if there is no
+		 * sibling field associated with this field or the sibling entry
+		 * already extracts the word shared with this field.
+		 */
+		if (sib == ICE_FLOW_FIELD_IDX_MAX ||
+		    flds[sib].xtrct.prot_id == ICE_PROT_ID_INVAL ||
+		    flds[sib].xtrct.off != off) {
+			u8 idx;
+
+			/* Make sure the number of extraction sequence required
+			 * does not exceed the block's capability
+			 */
+			if (params->es_cnt >= fv_words)
+				return -ENOSPC;
+
+			/* some blocks require a reversed field vector layout */
+			if (hw->blk[params->blk].es.reverse)
+				idx = fv_words - params->es_cnt - 1;
+			else
+				idx = params->es_cnt;
+
+			params->es[idx].prot_id = prot_id;
+			params->es[idx].off = off;
+			params->mask[idx] = mask | sib_mask;
+			params->es_cnt++;
+		}
+
+		off += ICE_FLOW_FV_EXTRACT_SZ;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_xtract_raws - Create extract sequence entries for raw bytes
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose raw fields are to be extracted
+ */
+static int
+ice_flow_xtract_raws(struct ice_hw *hw, struct ice_flow_prof_params *params,
+		     u8 seg)
+{
+	u16 fv_words;
+	u16 hdrs_sz;
+	u8 i;
+
+	if (!params->prof->segs[seg].raws_cnt)
+		return 0;
+
+	if (params->prof->segs[seg].raws_cnt >
+	    ARRAY_SIZE(params->prof->segs[seg].raws))
+		return -ENOSPC;
+
+	/* Offsets within the segment headers are not supported */
+	hdrs_sz = ice_flow_calc_seg_sz(params, seg);
+	if (!hdrs_sz)
+		return -EINVAL;
+
+	fv_words = hw->blk[params->blk].es.fvw;
+
+	for (i = 0; i < params->prof->segs[seg].raws_cnt; i++) {
+		struct ice_flow_seg_fld_raw *raw;
+		u16 off, cnt, j;
+
+		raw = &params->prof->segs[seg].raws[i];
+
+		/* Storing extraction information */
+		raw->info.xtrct.prot_id = ICE_PROT_MAC_OF_OR_S;
+		raw->info.xtrct.off = (raw->off / ICE_FLOW_FV_EXTRACT_SZ) *
+			ICE_FLOW_FV_EXTRACT_SZ;
+		raw->info.xtrct.disp = (raw->off % ICE_FLOW_FV_EXTRACT_SZ) *
+			BITS_PER_BYTE;
+		raw->info.xtrct.idx = params->es_cnt;
+
+		/* Determine the number of field vector entries this raw field
+		 * consumes.
+		 */
+		cnt = DIV_ROUND_UP(raw->info.xtrct.disp +
+				   (raw->info.src.last * BITS_PER_BYTE),
+				   (ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE));
+		off = raw->info.xtrct.off;
+		for (j = 0; j < cnt; j++) {
+			u16 idx;
+
+			/* Make sure the number of extraction sequence required
+			 * does not exceed the block's capability
+			 */
+			if (params->es_cnt >= hw->blk[params->blk].es.count ||
+			    params->es_cnt >= ICE_MAX_FV_WORDS)
+				return -ENOSPC;
+
+			/* some blocks require a reversed field vector layout */
+			if (hw->blk[params->blk].es.reverse)
+				idx = fv_words - params->es_cnt - 1;
+			else
+				idx = params->es_cnt;
+
+			params->es[idx].prot_id = raw->info.xtrct.prot_id;
+			params->es[idx].off = off;
+			params->es_cnt++;
+			off += ICE_FLOW_FV_EXTRACT_SZ;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flow_create_xtrct_seq - Create an extraction sequence for given segments
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ *
+ * This function iterates through all matched fields in the given segments, and
+ * creates an extraction sequence for the fields.
+ */
+static int
+ice_flow_create_xtrct_seq(struct ice_hw *hw,
+			  struct ice_flow_prof_params *params)
+{
+	struct ice_flow_prof *prof = params->prof;
+	int status = 0;
+	u8 i;
+
+	for (i = 0; i < prof->segs_cnt; i++) {
+		u64 match = params->prof->segs[i].match;
+		enum ice_flow_field j;
+
+		for_each_set_bit(j, (unsigned long *)&match,
+				 ICE_FLOW_FIELD_IDX_MAX) {
+			status = ice_flow_xtract_fld(hw, params, i, j, match);
+			if (status)
+				return status;
+			clear_bit(j, (unsigned long *)&match);
+		}
+
+		/* Process raw matching bytes */
+		status = ice_flow_xtract_raws(hw, params, i);
+		if (status)
+			return status;
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_proc_segs - process all packet segments associated with a profile
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ */
+static int
+ice_flow_proc_segs(struct ice_hw *hw, struct ice_flow_prof_params *params)
+{
+	int status;
+
+	status = ice_flow_proc_seg_hdrs(params);
+	if (status)
+		return status;
+
+	status = ice_flow_create_xtrct_seq(hw, params);
+	if (status)
+		return status;
+
+	switch (params->blk) {
+	case ICE_BLK_FD:
+	case ICE_BLK_RSS:
+		status = 0;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return status;
+}
+
+#define ICE_FLOW_FIND_PROF_CHK_FLDS	0x00000001
+#define ICE_FLOW_FIND_PROF_CHK_VSI	0x00000002
+#define ICE_FLOW_FIND_PROF_NOT_CHK_DIR	0x00000004
+
+/**
+ * ice_flow_find_prof_conds - Find a profile matching headers and conditions
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @vsi_handle: software VSI handle to check VSI (ICE_FLOW_FIND_PROF_CHK_VSI)
+ * @conds: additional conditions to be checked (ICE_FLOW_FIND_PROF_CHK_*)
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_conds(struct ice_hw *hw, enum ice_block blk,
+			 enum ice_flow_dir dir, struct ice_flow_seg_info *segs,
+			 u8 segs_cnt, u16 vsi_handle, u32 conds)
+{
+	struct ice_flow_prof *p, *prof = NULL;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+	list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+		if ((p->dir == dir || conds & ICE_FLOW_FIND_PROF_NOT_CHK_DIR) &&
+		    segs_cnt && segs_cnt == p->segs_cnt) {
+			u8 i;
+
+			/* Check for profile-VSI association if specified */
+			if ((conds & ICE_FLOW_FIND_PROF_CHK_VSI) &&
+			    ice_is_vsi_valid(hw, vsi_handle) &&
+			    !test_bit(vsi_handle, p->vsis))
+				continue;
+
+			/* Protocol headers must be checked. Matched fields are
+			 * checked if specified.
+			 */
+			for (i = 0; i < segs_cnt; i++)
+				if (segs[i].hdrs != p->segs[i].hdrs ||
+				    ((conds & ICE_FLOW_FIND_PROF_CHK_FLDS) &&
+				     segs[i].match != p->segs[i].match))
+					break;
+
+			/* A match is found if all segments are matched */
+			if (i == segs_cnt) {
+				prof = p;
+				break;
+			}
+		}
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return prof;
+}
+
+/**
+ * ice_flow_find_prof_id - Look up a profile with given profile ID
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: unique ID to identify this flow profile
+ */
+static struct ice_flow_prof *
+ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+	struct ice_flow_prof *p;
+
+	list_for_each_entry(p, &hw->fl_profs[blk], l_entry)
+		if (p->id == prof_id)
+			return p;
+
+	return NULL;
+}
+
+/**
+ * ice_flow_rem_entry_sync - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry: flow entry to be removed
+ */
+static int
+ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk,
+			struct ice_flow_entry *entry)
+{
+	if (!entry)
+		return -EINVAL;
+
+	list_del(&entry->l_entry);
+
+	devm_kfree(ice_hw_to_dev(hw), entry->entry);
+	devm_kfree(ice_hw_to_dev(hw), entry);
+
+	return 0;
+}
+
+/**
+ * ice_flow_add_prof_sync - Add a flow profile for packet segments and fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static int
+ice_flow_add_prof_sync(struct ice_hw *hw, enum ice_block blk,
+		       enum ice_flow_dir dir, u64 prof_id,
+		       struct ice_flow_seg_info *segs, u8 segs_cnt,
+		       struct ice_flow_prof **prof)
+{
+	struct ice_flow_prof_params *params;
+	int status;
+	u8 i;
+
+	if (!prof)
+		return -EINVAL;
+
+	params = kzalloc(sizeof(*params), GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	params->prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*params->prof),
+				    GFP_KERNEL);
+	if (!params->prof) {
+		status = -ENOMEM;
+		goto free_params;
+	}
+
+	/* initialize extraction sequence to all invalid (0xff) */
+	for (i = 0; i < ICE_MAX_FV_WORDS; i++) {
+		params->es[i].prot_id = ICE_PROT_INVALID;
+		params->es[i].off = ICE_FV_OFFSET_INVAL;
+	}
+
+	params->blk = blk;
+	params->prof->id = prof_id;
+	params->prof->dir = dir;
+	params->prof->segs_cnt = segs_cnt;
+
+	/* Make a copy of the segments that need to be persistent in the flow
+	 * profile instance
+	 */
+	for (i = 0; i < segs_cnt; i++)
+		memcpy(&params->prof->segs[i], &segs[i], sizeof(*segs));
+
+	status = ice_flow_proc_segs(hw, params);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW, "Error processing a flow's packet segments\n");
+		goto out;
+	}
+
+	/* Add a HW profile for this flow profile */
+	status = ice_add_prof(hw, blk, prof_id, (u8 *)params->ptypes,
+			      params->attr, params->attr_cnt, params->es,
+			      params->mask);
+	if (status) {
+		ice_debug(hw, ICE_DBG_FLOW, "Error adding a HW flow profile\n");
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&params->prof->entries);
+	mutex_init(&params->prof->entries_lock);
+	*prof = params->prof;
+
+out:
+	if (status)
+		devm_kfree(ice_hw_to_dev(hw), params->prof);
+free_params:
+	kfree(params);
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_prof_sync - remove a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile to remove
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ */
+static int
+ice_flow_rem_prof_sync(struct ice_hw *hw, enum ice_block blk,
+		       struct ice_flow_prof *prof)
+{
+	int status;
+
+	/* Remove all remaining flow entries before removing the flow profile */
+	if (!list_empty(&prof->entries)) {
+		struct ice_flow_entry *e, *t;
+
+		mutex_lock(&prof->entries_lock);
+
+		list_for_each_entry_safe(e, t, &prof->entries, l_entry) {
+			status = ice_flow_rem_entry_sync(hw, blk, e);
+			if (status)
+				break;
+		}
+
+		mutex_unlock(&prof->entries_lock);
+	}
+
+	/* Remove all hardware profiles associated with this flow profile */
+	status = ice_rem_prof(hw, blk, prof->id);
+	if (!status) {
+		list_del(&prof->l_entry);
+		mutex_destroy(&prof->entries_lock);
+		devm_kfree(ice_hw_to_dev(hw), prof);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_assoc_prof - associate a VSI with a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static int
+ice_flow_assoc_prof(struct ice_hw *hw, enum ice_block blk,
+		    struct ice_flow_prof *prof, u16 vsi_handle)
+{
+	int status = 0;
+
+	if (!test_bit(vsi_handle, prof->vsis)) {
+		status = ice_add_prof_id_flow(hw, blk,
+					      ice_get_hw_vsi_num(hw,
+								 vsi_handle),
+					      prof->id);
+		if (!status)
+			set_bit(vsi_handle, prof->vsis);
+		else
+			ice_debug(hw, ICE_DBG_FLOW, "HW profile add failed, %d\n",
+				  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_disassoc_prof - disassociate a VSI from a flow profile
+ * @hw: pointer to the hardware structure
+ * @blk: classification stage
+ * @prof: pointer to flow profile
+ * @vsi_handle: software VSI handle
+ *
+ * Assumption: the caller has acquired the lock to the profile list
+ * and the software VSI handle has been validated
+ */
+static int
+ice_flow_disassoc_prof(struct ice_hw *hw, enum ice_block blk,
+		       struct ice_flow_prof *prof, u16 vsi_handle)
+{
+	int status = 0;
+
+	if (test_bit(vsi_handle, prof->vsis)) {
+		status = ice_rem_prof_id_flow(hw, blk,
+					      ice_get_hw_vsi_num(hw,
+								 vsi_handle),
+					      prof->id);
+		if (!status)
+			clear_bit(vsi_handle, prof->vsis);
+		else
+			ice_debug(hw, ICE_DBG_FLOW, "HW profile remove failed, %d\n",
+				  status);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_add_prof - Add a flow profile for packet segments and matched fields
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @dir: flow direction
+ * @prof_id: unique ID to identify this flow profile
+ * @segs: array of one or more packet segments that describe the flow
+ * @segs_cnt: number of packet segments provided
+ * @prof: stores the returned flow profile added
+ */
+int
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+		  u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+		  struct ice_flow_prof **prof)
+{
+	int status;
+
+	if (segs_cnt > ICE_FLOW_SEG_MAX)
+		return -ENOSPC;
+
+	if (!segs_cnt)
+		return -EINVAL;
+
+	if (!segs)
+		return -EINVAL;
+
+	status = ice_flow_val_hdrs(segs, segs_cnt);
+	if (status)
+		return status;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	status = ice_flow_add_prof_sync(hw, blk, dir, prof_id, segs, segs_cnt,
+					prof);
+	if (!status)
+		list_add(&(*prof)->l_entry, &hw->fl_profs[blk]);
+
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_prof - Remove a flow profile and all entries associated with it
+ * @hw: pointer to the HW struct
+ * @blk: the block for which the flow profile is to be removed
+ * @prof_id: unique ID of the flow profile to be removed
+ */
+int ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
+{
+	struct ice_flow_prof *prof;
+	int status;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	prof = ice_flow_find_prof_id(hw, blk, prof_id);
+	if (!prof) {
+		status = -ENOENT;
+		goto out;
+	}
+
+	/* prof becomes invalid after the call */
+	status = ice_flow_rem_prof_sync(hw, blk, prof);
+
+out:
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+
+	return status;
+}
+
+/**
+ * ice_flow_add_entry - Add a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: ID of the profile to add a new flow entry to
+ * @entry_id: unique ID to identify this flow entry
+ * @vsi_handle: software VSI handle for the flow entry
+ * @prio: priority of the flow entry
+ * @data: pointer to a data buffer containing flow entry's match values/masks
+ * @entry_h: pointer to buffer that receives the new flow entry's handle
+ */
+int
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+		   u64 entry_id, u16 vsi_handle, enum ice_flow_priority prio,
+		   void *data, u64 *entry_h)
+{
+	struct ice_flow_entry *e = NULL;
+	struct ice_flow_prof *prof;
+	int status;
+
+	/* No flow entry data is expected for RSS */
+	if (!entry_h || (!data && blk != ICE_BLK_RSS))
+		return -EINVAL;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	prof = ice_flow_find_prof_id(hw, blk, prof_id);
+	if (!prof) {
+		status = -ENOENT;
+	} else {
+		/* Allocate memory for the entry being added and associate
+		 * the VSI to the found flow profile
+		 */
+		e = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*e), GFP_KERNEL);
+		if (!e)
+			status = -ENOMEM;
+		else
+			status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+	}
+
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+	if (status)
+		goto out;
+
+	e->id = entry_id;
+	e->vsi_handle = vsi_handle;
+	e->prof = prof;
+	e->priority = prio;
+
+	switch (blk) {
+	case ICE_BLK_FD:
+	case ICE_BLK_RSS:
+		break;
+	default:
+		status = -EOPNOTSUPP;
+		goto out;
+	}
+
+	mutex_lock(&prof->entries_lock);
+	list_add(&e->l_entry, &prof->entries);
+	mutex_unlock(&prof->entries_lock);
+
+	*entry_h = ICE_FLOW_ENTRY_HNDL(e);
+
+out:
+	if (status && e) {
+		devm_kfree(ice_hw_to_dev(hw), e->entry);
+		devm_kfree(ice_hw_to_dev(hw), e);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_entry - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry_h: handle to the flow entry to be removed
+ */
+int ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk, u64 entry_h)
+{
+	struct ice_flow_entry *entry;
+	struct ice_flow_prof *prof;
+	int status = 0;
+
+	if (entry_h == ICE_FLOW_ENTRY_HANDLE_INVAL)
+		return -EINVAL;
+
+	entry = ICE_FLOW_ENTRY_PTR(entry_h);
+
+	/* Retain the pointer to the flow profile as the entry will be freed */
+	prof = entry->prof;
+
+	if (prof) {
+		mutex_lock(&prof->entries_lock);
+		status = ice_flow_rem_entry_sync(hw, blk, entry);
+		mutex_unlock(&prof->entries_lock);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_set_fld_ext - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @field_type: type of the field
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ *           entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ *            input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ *            entry's input buffer
+ *
+ * This helper function stores information of a field being matched, including
+ * the type of the field and the locations of the value to match, the mask, and
+ * the upper-bound value in the start of the input buffer for a flow entry.
+ * This function should only be used for fixed-size data structures.
+ *
+ * This function also opportunistically determines the protocol headers to be
+ * present based on the fields being set. Some fields cannot be used alone to
+ * determine the protocol headers present. Sometimes, fields for particular
+ * protocol headers are not matched. In those cases, the protocol headers
+ * must be explicitly set.
+ */
+static void
+ice_flow_set_fld_ext(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		     enum ice_flow_fld_match_type field_type, u16 val_loc,
+		     u16 mask_loc, u16 last_loc)
+{
+	u64 bit = BIT_ULL(fld);
+
+	seg->match |= bit;
+	if (field_type == ICE_FLOW_FLD_TYPE_RANGE)
+		seg->range |= bit;
+
+	seg->fields[fld].type = field_type;
+	seg->fields[fld].src.val = val_loc;
+	seg->fields[fld].src.mask = mask_loc;
+	seg->fields[fld].src.last = last_loc;
+
+	ICE_FLOW_SET_HDRS(seg, ice_flds_info[fld].hdr);
+}
+
+/**
+ * ice_flow_set_fld - specifies locations of field from entry's input buffer
+ * @seg: packet segment the field being set belongs to
+ * @fld: field to be set
+ * @val_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of the value to match from
+ *           entry's input buffer
+ * @mask_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of mask value from entry's
+ *            input buffer
+ * @last_loc: if not ICE_FLOW_FLD_OFF_INVAL, location of last/upper value from
+ *            entry's input buffer
+ * @range: indicate if field being matched is to be in a range
+ *
+ * This function specifies the locations, in the form of byte offsets from the
+ * start of the input buffer for a flow entry, from where the value to match,
+ * the mask value, and upper value can be extracted. These locations are then
+ * stored in the flow profile. When adding a flow entry associated with the
+ * flow profile, these locations will be used to quickly extract the values and
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range)
+{
+	enum ice_flow_fld_match_type t = range ?
+		ICE_FLOW_FLD_TYPE_RANGE : ICE_FLOW_FLD_TYPE_REG;
+
+	ice_flow_set_fld_ext(seg, fld, t, val_loc, mask_loc, last_loc);
+}
+
+/**
+ * ice_flow_add_fld_raw - sets locations of a raw field from entry's input buf
+ * @seg: packet segment the field being set belongs to
+ * @off: offset of the raw field from the beginning of the segment in bytes
+ * @len: length of the raw pattern to be matched
+ * @val_loc: location of the value to match from entry's input buffer
+ * @mask_loc: location of mask value from entry's input buffer
+ *
+ * This function specifies the offset of the raw field to be match from the
+ * beginning of the specified packet segment, and the locations, in the form of
+ * byte offsets from the start of the input buffer for a flow entry, from where
+ * the value to match and the mask value to be extracted. These locations are
+ * then stored in the flow profile. When adding flow entries to the associated
+ * flow profile, these locations can be used to quickly extract the values to
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+		     u16 val_loc, u16 mask_loc)
+{
+	if (seg->raws_cnt < ICE_FLOW_SEG_RAW_FLD_MAX) {
+		seg->raws[seg->raws_cnt].off = off;
+		seg->raws[seg->raws_cnt].info.type = ICE_FLOW_FLD_TYPE_SIZE;
+		seg->raws[seg->raws_cnt].info.src.val = val_loc;
+		seg->raws[seg->raws_cnt].info.src.mask = mask_loc;
+		/* The "last" field is used to store the length of the field */
+		seg->raws[seg->raws_cnt].info.src.last = len;
+	}
+
+	/* Overflows of "raws" will be handled as an error condition later in
+	 * the flow when this information is processed.
+	 */
+	seg->raws_cnt++;
+}
+
+/**
+ * ice_flow_rem_vsi_prof - remove VSI from flow profile
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof_id: unique ID to identify this flow profile
+ *
+ * This function removes the flow entries associated to the input
+ * VSI handle and disassociate the VSI from the flow profile.
+ */
+int ice_flow_rem_vsi_prof(struct ice_hw *hw, u16 vsi_handle, u64 prof_id)
+{
+	struct ice_flow_prof *prof;
+	int status = 0;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	/* find flow profile pointer with input package block and profile ID */
+	prof = ice_flow_find_prof_id(hw, ICE_BLK_FD, prof_id);
+	if (!prof) {
+		ice_debug(hw, ICE_DBG_PKG, "Cannot find flow profile id=%llu\n",
+			  prof_id);
+		return -ENOENT;
+	}
+
+	/* Remove all remaining flow entries before removing the flow profile */
+	if (!list_empty(&prof->entries)) {
+		struct ice_flow_entry *e, *t;
+
+		mutex_lock(&prof->entries_lock);
+		list_for_each_entry_safe(e, t, &prof->entries, l_entry) {
+			if (e->vsi_handle != vsi_handle)
+				continue;
+
+			status = ice_flow_rem_entry_sync(hw, ICE_BLK_FD, e);
+			if (status)
+				break;
+		}
+		mutex_unlock(&prof->entries_lock);
+	}
+	if (status)
+		return status;
+
+	/* disassociate the flow profile from sw VSI handle */
+	status = ice_flow_disassoc_prof(hw, ICE_BLK_FD, prof, vsi_handle);
+	if (status)
+		ice_debug(hw, ICE_DBG_PKG, "ice_flow_disassoc_prof() failed with status=%d\n",
+			  status);
+	return status;
+}
+
+#define ICE_FLOW_RSS_SEG_HDR_L2_MASKS \
+	(ICE_FLOW_SEG_HDR_ETH | ICE_FLOW_SEG_HDR_VLAN)
+
+#define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \
+	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
+
+#define ICE_FLOW_RSS_SEG_HDR_L4_MASKS \
+	(ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_SCTP)
+
+#define ICE_FLOW_RSS_SEG_HDR_VAL_MASKS \
+	(ICE_FLOW_RSS_SEG_HDR_L2_MASKS | \
+	 ICE_FLOW_RSS_SEG_HDR_L3_MASKS | \
+	 ICE_FLOW_RSS_SEG_HDR_L4_MASKS)
+
+/**
+ * ice_flow_set_rss_seg_info - setup packet segments for RSS
+ * @segs: pointer to the flow field segment(s)
+ * @hash_fields: fields to be hashed on for the segment(s)
+ * @flow_hdr: protocol header fields within a packet segment
+ *
+ * Helper function to extract fields from hash bitmap and use flow
+ * header value to set flow field segment for further use in flow
+ * profile entry or removal.
+ */
+static int
+ice_flow_set_rss_seg_info(struct ice_flow_seg_info *segs, u64 hash_fields,
+			  u32 flow_hdr)
+{
+	u64 val;
+	u8 i;
+
+	for_each_set_bit(i, (unsigned long *)&hash_fields,
+			 ICE_FLOW_FIELD_IDX_MAX)
+		ice_flow_set_fld(segs, (enum ice_flow_field)i,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	ICE_FLOW_SET_HDRS(segs, flow_hdr);
+
+	if (segs->hdrs & ~ICE_FLOW_RSS_SEG_HDR_VAL_MASKS &
+	    ~ICE_FLOW_RSS_HDRS_INNER_MASK & ~ICE_FLOW_SEG_HDR_IPV_OTHER)
+		return -EINVAL;
+
+	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L3_MASKS);
+	if (val && !is_power_of_2(val))
+		return -EIO;
+
+	val = (u64)(segs->hdrs & ICE_FLOW_RSS_SEG_HDR_L4_MASKS);
+	if (val && !is_power_of_2(val))
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_rem_vsi_rss_list - remove VSI from RSS list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * Remove the VSI from all RSS configurations in the list.
+ */
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_rss_cfg *r, *tmp;
+
+	if (list_empty(&hw->rss_list_head))
+		return;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+		if (test_and_clear_bit(vsi_handle, r->vsis))
+			if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+				list_del(&r->l_entry);
+				devm_kfree(ice_hw_to_dev(hw), r);
+			}
+	mutex_unlock(&hw->rss_locks);
+}
+
+/**
+ * ice_rem_vsi_rss_cfg - remove RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function will iterate through all flow profiles and disassociate
+ * the VSI from that profile. If the flow profile has no VSIs it will
+ * be removed.
+ */
+int ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_prof *p, *t;
+	int status = 0;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	if (list_empty(&hw->fl_profs[blk]))
+		return 0;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry_safe(p, t, &hw->fl_profs[blk], l_entry)
+		if (test_bit(vsi_handle, p->vsis)) {
+			status = ice_flow_disassoc_prof(hw, blk, p, vsi_handle);
+			if (status)
+				break;
+
+			if (bitmap_empty(p->vsis, ICE_MAX_VSI)) {
+				status = ice_flow_rem_prof(hw, blk, p->id);
+				if (status)
+					break;
+			}
+		}
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/**
+ * ice_rem_rss_list - remove RSS configuration from list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static void
+ice_rem_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+	struct ice_rss_cfg *r, *tmp;
+
+	/* Search for RSS hash fields associated to the VSI that match the
+	 * hash configurations associated to the flow profile. If found
+	 * remove from the RSS entry list of the VSI context and delete entry.
+	 */
+	list_for_each_entry_safe(r, tmp, &hw->rss_list_head, l_entry)
+		if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+		    r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+			clear_bit(vsi_handle, r->vsis);
+			if (bitmap_empty(r->vsis, ICE_MAX_VSI)) {
+				list_del(&r->l_entry);
+				devm_kfree(ice_hw_to_dev(hw), r);
+			}
+			return;
+		}
+}
+
+/**
+ * ice_add_rss_list - add RSS configuration to list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @prof: pointer to flow profile
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static int
+ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
+{
+	struct ice_rss_cfg *r, *rss_cfg;
+
+	list_for_each_entry(r, &hw->rss_list_head, l_entry)
+		if (r->hashed_flds == prof->segs[prof->segs_cnt - 1].match &&
+		    r->packet_hdr == prof->segs[prof->segs_cnt - 1].hdrs) {
+			set_bit(vsi_handle, r->vsis);
+			return 0;
+		}
+
+	rss_cfg = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rss_cfg),
+			       GFP_KERNEL);
+	if (!rss_cfg)
+		return -ENOMEM;
+
+	rss_cfg->hashed_flds = prof->segs[prof->segs_cnt - 1].match;
+	rss_cfg->packet_hdr = prof->segs[prof->segs_cnt - 1].hdrs;
+	set_bit(vsi_handle, rss_cfg->vsis);
+
+	list_add_tail(&rss_cfg->l_entry, &hw->rss_list_head);
+
+	return 0;
+}
+
+#define ICE_FLOW_PROF_HASH_S	0
+#define ICE_FLOW_PROF_HASH_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_HASH_S)
+#define ICE_FLOW_PROF_HDR_S	32
+#define ICE_FLOW_PROF_HDR_M	(0x3FFFFFFFULL << ICE_FLOW_PROF_HDR_S)
+#define ICE_FLOW_PROF_ENCAP_S	63
+#define ICE_FLOW_PROF_ENCAP_M	(BIT_ULL(ICE_FLOW_PROF_ENCAP_S))
+
+#define ICE_RSS_OUTER_HEADERS	1
+#define ICE_RSS_INNER_HEADERS	2
+
+/* Flow profile ID format:
+ * [0:31] - Packet match fields
+ * [32:62] - Protocol header
+ * [63] - Encapsulation flag, 0 if non-tunneled, 1 if tunneled
+ */
+#define ICE_FLOW_GEN_PROFID(hash, hdr, segs_cnt) \
+	((u64)(((u64)(hash) & ICE_FLOW_PROF_HASH_M) | \
+	       (((u64)(hdr) << ICE_FLOW_PROF_HDR_S) & ICE_FLOW_PROF_HDR_M) | \
+	       ((u8)((segs_cnt) - 1) ? ICE_FLOW_PROF_ENCAP_M : 0)))
+
+/**
+ * ice_add_rss_cfg_sync - add an RSS configuration
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ * @segs_cnt: packet segment count
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static int
+ice_add_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		     u32 addl_hdrs, u8 segs_cnt)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_flow_seg_info *segs;
+	int status;
+
+	if (!segs_cnt || segs_cnt > ICE_FLOW_SEG_MAX)
+		return -EINVAL;
+
+	segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL);
+	if (!segs)
+		return -ENOMEM;
+
+	/* Construct the packet segment info from the hashed fields */
+	status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds,
+					   addl_hdrs);
+	if (status)
+		goto exit;
+
+	/* Search for a flow profile that has matching headers, hash fields
+	 * and has the input VSI associated to it. If found, no further
+	 * operations required and exit.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS |
+					ICE_FLOW_FIND_PROF_CHK_VSI);
+	if (prof)
+		goto exit;
+
+	/* Check if a flow profile exists with the same protocol headers and
+	 * associated with the input VSI. If so disassociate the VSI from
+	 * this profile. The VSI will be added to a new profile created with
+	 * the protocol header and new hash field configuration.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle, ICE_FLOW_FIND_PROF_CHK_VSI);
+	if (prof) {
+		status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle);
+		if (!status)
+			ice_rem_rss_list(hw, vsi_handle, prof);
+		else
+			goto exit;
+
+		/* Remove profile if it has no VSIs associated */
+		if (bitmap_empty(prof->vsis, ICE_MAX_VSI)) {
+			status = ice_flow_rem_prof(hw, blk, prof->id);
+			if (status)
+				goto exit;
+		}
+	}
+
+	/* Search for a profile that has same match fields only. If this
+	 * exists then associate the VSI to this profile.
+	 */
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS);
+	if (prof) {
+		status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+		if (!status)
+			status = ice_add_rss_list(hw, vsi_handle, prof);
+		goto exit;
+	}
+
+	/* Create a new flow profile with generated profile and packet
+	 * segment information.
+	 */
+	status = ice_flow_add_prof(hw, blk, ICE_FLOW_RX,
+				   ICE_FLOW_GEN_PROFID(hashed_flds,
+						       segs[segs_cnt - 1].hdrs,
+						       segs_cnt),
+				   segs, segs_cnt, &prof);
+	if (status)
+		goto exit;
+
+	status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+	/* If association to a new flow profile failed then this profile can
+	 * be removed.
+	 */
+	if (status) {
+		ice_flow_rem_prof(hw, blk, prof->id);
+		goto exit;
+	}
+
+	status = ice_add_rss_list(hw, vsi_handle, prof);
+
+exit:
+	kfree(segs);
+	return status;
+}
+
+/**
+ * ice_add_rss_cfg - add an RSS configuration with specified hashed fields
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: hash bit fields (ICE_FLOW_HASH_*) to configure
+ * @addl_hdrs: protocol header fields
+ *
+ * This function will generate a flow profile based on fields associated with
+ * the input fields to hash on, the flow type and use the VSI number to add
+ * a flow entry to the profile.
+ */
+int
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs)
+{
+	int status;
+
+	if (hashed_flds == ICE_HASH_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	mutex_lock(&hw->rss_locks);
+	status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
+				      ICE_RSS_OUTER_HEADERS);
+	if (!status)
+		status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds,
+					      addl_hdrs, ICE_RSS_INNER_HEADERS);
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/**
+ * ice_rem_rss_cfg_sync - remove an existing RSS configuration
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove
+ * @addl_hdrs: Protocol header fields within a packet segment
+ * @segs_cnt: packet segment count
+ *
+ * Assumption: lock has already been acquired for RSS list
+ */
+static int
+ice_rem_rss_cfg_sync(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		     u32 addl_hdrs, u8 segs_cnt)
+{
+	const enum ice_block blk = ICE_BLK_RSS;
+	struct ice_flow_seg_info *segs;
+	struct ice_flow_prof *prof;
+	int status;
+
+	segs = kcalloc(segs_cnt, sizeof(*segs), GFP_KERNEL);
+	if (!segs)
+		return -ENOMEM;
+
+	/* Construct the packet segment info from the hashed fields */
+	status = ice_flow_set_rss_seg_info(&segs[segs_cnt - 1], hashed_flds,
+					   addl_hdrs);
+	if (status)
+		goto out;
+
+	prof = ice_flow_find_prof_conds(hw, blk, ICE_FLOW_RX, segs, segs_cnt,
+					vsi_handle,
+					ICE_FLOW_FIND_PROF_CHK_FLDS);
+	if (!prof) {
+		status = -ENOENT;
+		goto out;
+	}
+
+	status = ice_flow_disassoc_prof(hw, blk, prof, vsi_handle);
+	if (status)
+		goto out;
+
+	/* Remove RSS configuration from VSI context before deleting
+	 * the flow profile.
+	 */
+	ice_rem_rss_list(hw, vsi_handle, prof);
+
+	if (bitmap_empty(prof->vsis, ICE_MAX_VSI))
+		status = ice_flow_rem_prof(hw, blk, prof->id);
+
+out:
+	kfree(segs);
+	return status;
+}
+
+/**
+ * ice_rem_rss_cfg - remove an existing RSS config with matching hashed fields
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hashed_flds: Packet hash types (ICE_FLOW_HASH_*) to remove
+ * @addl_hdrs: Protocol header fields within a packet segment
+ *
+ * This function will lookup the flow profile based on the input
+ * hash field bitmap, iterate through the profile entry list of
+ * that profile and find entry associated with input VSI to be
+ * removed. Calls are made to underlying flow s which will APIs
+ * turn build or update buffers for RSS XLT1 section.
+ */
+int __maybe_unused
+ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs)
+{
+	int status;
+
+	if (hashed_flds == ICE_HASH_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	mutex_lock(&hw->rss_locks);
+	status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
+				      ICE_RSS_OUTER_HEADERS);
+	if (!status)
+		status = ice_rem_rss_cfg_sync(hw, vsi_handle, hashed_flds,
+					      addl_hdrs, ICE_RSS_INNER_HEADERS);
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/* Mapping of AVF hash bit fields to an L3-L4 hash combination.
+ * As the ice_flow_avf_hdr_field represent individual bit shifts in a hash,
+ * convert its values to their appropriate flow L3, L4 values.
+ */
+#define ICE_FLOW_AVF_RSS_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4))
+#define ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP))
+#define ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS \
+	(ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS | \
+	 ICE_FLOW_AVF_RSS_IPV4_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP))
+
+#define ICE_FLOW_AVF_RSS_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6))
+#define ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP))
+#define ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS \
+	(BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+	 BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP))
+#define ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS \
+	(ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS | ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS | \
+	 ICE_FLOW_AVF_RSS_IPV6_MASKS | BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP))
+
+/**
+ * ice_add_avf_rss_cfg - add an RSS configuration for AVF driver
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @avf_hash: hash bit fields (ICE_AVF_FLOW_FIELD_*) to configure
+ *
+ * This function will take the hash bitmap provided by the AVF driver via a
+ * message, convert it to ICE-compatible values, and configure RSS flow
+ * profiles.
+ */
+int ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 avf_hash)
+{
+	int status = 0;
+	u64 hash_flds;
+
+	if (avf_hash == ICE_AVF_FLOW_FIELD_INVALID ||
+	    !ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	/* Make sure no unsupported bits are specified */
+	if (avf_hash & ~(ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS |
+			 ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS))
+		return -EIO;
+
+	hash_flds = avf_hash;
+
+	/* Always create an L3 RSS configuration for any L4 RSS configuration */
+	if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS)
+		hash_flds |= ICE_FLOW_AVF_RSS_IPV4_MASKS;
+
+	if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS)
+		hash_flds |= ICE_FLOW_AVF_RSS_IPV6_MASKS;
+
+	/* Create the corresponding RSS configuration for each valid hash bit */
+	while (hash_flds) {
+		u64 rss_hash = ICE_HASH_INVALID;
+
+		if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV4_MASKS) {
+			if (hash_flds & ICE_FLOW_AVF_RSS_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_IPV4_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_TCP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV4_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_UDP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV4_MASKS;
+			} else if (hash_flds &
+				   BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP)) {
+				rss_hash = ICE_FLOW_HASH_IPV4 |
+					ICE_FLOW_HASH_SCTP_PORT;
+				hash_flds &=
+					~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP);
+			}
+		} else if (hash_flds & ICE_FLOW_AVF_RSS_ALL_IPV6_MASKS) {
+			if (hash_flds & ICE_FLOW_AVF_RSS_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_IPV6_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_TCP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_TCP_IPV6_MASKS;
+			} else if (hash_flds &
+				   ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_UDP_PORT;
+				hash_flds &= ~ICE_FLOW_AVF_RSS_UDP_IPV6_MASKS;
+			} else if (hash_flds &
+				   BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP)) {
+				rss_hash = ICE_FLOW_HASH_IPV6 |
+					ICE_FLOW_HASH_SCTP_PORT;
+				hash_flds &=
+					~BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP);
+			}
+		}
+
+		if (rss_hash == ICE_HASH_INVALID)
+			return -EIO;
+
+		status = ice_add_rss_cfg(hw, vsi_handle, rss_hash,
+					 ICE_FLOW_SEG_HDR_NONE);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * ice_replay_rss_cfg - replay RSS configurations associated with VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ */
+int ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_rss_cfg *r;
+	int status = 0;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry(r, &hw->rss_list_head, l_entry) {
+		if (test_bit(vsi_handle, r->vsis)) {
+			status = ice_add_rss_cfg_sync(hw, vsi_handle,
+						      r->hashed_flds,
+						      r->packet_hdr,
+						      ICE_RSS_OUTER_HEADERS);
+			if (status)
+				break;
+			status = ice_add_rss_cfg_sync(hw, vsi_handle,
+						      r->hashed_flds,
+						      r->packet_hdr,
+						      ICE_RSS_INNER_HEADERS);
+			if (status)
+				break;
+		}
+	}
+	mutex_unlock(&hw->rss_locks);
+
+	return status;
+}
+
+/**
+ * ice_get_rss_cfg - returns hashed fields for the given header types
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: software VSI handle
+ * @hdrs: protocol header type
+ *
+ * This function will return the match fields of the first instance of flow
+ * profile having the given header types and containing input VSI
+ */
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs)
+{
+	u64 rss_hash = ICE_HASH_INVALID;
+	struct ice_rss_cfg *r;
+
+	/* verify if the protocol header is non zero and VSI is valid */
+	if (hdrs == ICE_FLOW_SEG_HDR_NONE || !ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_HASH_INVALID;
+
+	mutex_lock(&hw->rss_locks);
+	list_for_each_entry(r, &hw->rss_list_head, l_entry)
+		if (test_bit(vsi_handle, r->vsis) &&
+		    r->packet_hdr == hdrs) {
+			rss_hash = r->hashed_flds;
+			break;
+		}
+	mutex_unlock(&hw->rss_locks);
+
+	return rss_hash;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
new file mode 100644
index 0000000000..b465d27d9b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -0,0 +1,416 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_FLOW_H_
+#define _ICE_FLOW_H_
+
+#include "ice_flex_type.h"
+
+#define ICE_FLOW_ENTRY_HANDLE_INVAL	0
+#define ICE_FLOW_FLD_OFF_INVAL		0xffff
+
+/* Generate flow hash field from flow field type(s) */
+#define ICE_FLOW_HASH_ETH	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA))
+#define ICE_FLOW_HASH_IPV4	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA))
+#define ICE_FLOW_HASH_IPV6	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA))
+#define ICE_FLOW_HASH_TCP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT))
+#define ICE_FLOW_HASH_UDP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT))
+#define ICE_FLOW_HASH_SCTP_PORT	\
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT) | \
+	 BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT))
+
+#define ICE_HASH_INVALID	0
+#define ICE_HASH_TCP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_TCP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_TCP_PORT)
+#define ICE_HASH_UDP_IPV4	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_UDP_PORT)
+#define ICE_HASH_UDP_IPV6	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_UDP_PORT)
+
+#define ICE_FLOW_HASH_GTP_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPC_TEID))
+
+#define ICE_FLOW_HASH_GTP_IPV4_TEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_TEID)
+#define ICE_FLOW_HASH_GTP_IPV6_TEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_TEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_TEID)
+#define ICE_FLOW_HASH_GTP_U_IPV6_TEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_TEID)
+
+#define ICE_FLOW_HASH_GTP_U_EH_TEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_TEID))
+
+#define ICE_FLOW_HASH_GTP_U_EH_QFI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_EH_QFI))
+
+#define ICE_FLOW_HASH_GTP_U_IPV4_EH \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+	 ICE_FLOW_HASH_GTP_U_EH_QFI)
+#define ICE_FLOW_HASH_GTP_U_IPV6_EH \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_GTP_U_EH_TEID | \
+	 ICE_FLOW_HASH_GTP_U_EH_QFI)
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID))
+
+#define ICE_FLOW_HASH_PPPOE_SESS_ID_ETH \
+	(ICE_FLOW_HASH_ETH | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_TCP_ID \
+	(ICE_FLOW_HASH_TCP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+#define ICE_FLOW_HASH_PPPOE_UDP_ID \
+	(ICE_FLOW_HASH_UDP_PORT | ICE_FLOW_HASH_PPPOE_SESS_ID)
+
+#define ICE_FLOW_HASH_PFCP_SEID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID))
+#define ICE_FLOW_HASH_PFCP_IPV4_SEID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_PFCP_SEID)
+#define ICE_FLOW_HASH_PFCP_IPV6_SEID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_PFCP_SEID)
+
+#define ICE_FLOW_HASH_L2TPV3_SESS_ID \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID))
+#define ICE_FLOW_HASH_L2TPV3_IPV4_SESS_ID \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+#define ICE_FLOW_HASH_L2TPV3_IPV6_SESS_ID \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_L2TPV3_SESS_ID)
+
+#define ICE_FLOW_HASH_ESP_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI))
+#define ICE_FLOW_HASH_ESP_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_ESP_SPI)
+#define ICE_FLOW_HASH_ESP_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_ESP_SPI)
+
+#define ICE_FLOW_HASH_AH_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI))
+#define ICE_FLOW_HASH_AH_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_AH_SPI)
+#define ICE_FLOW_HASH_AH_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_AH_SPI)
+
+#define ICE_FLOW_HASH_NAT_T_ESP_SPI \
+	(BIT_ULL(ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI))
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV4_SPI \
+	(ICE_FLOW_HASH_IPV4 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+#define ICE_FLOW_HASH_NAT_T_ESP_IPV6_SPI \
+	(ICE_FLOW_HASH_IPV6 | ICE_FLOW_HASH_NAT_T_ESP_SPI)
+
+/* Protocol header fields within a packet segment. A segment consists of one or
+ * more protocol headers that make up a logical group of protocol headers. Each
+ * logical group of protocol headers encapsulates or is encapsulated using/by
+ * tunneling or encapsulation protocols for network virtualization such as GRE,
+ * VxLAN, etc.
+ */
+enum ice_flow_seg_hdr {
+	ICE_FLOW_SEG_HDR_NONE		= 0x00000000,
+	ICE_FLOW_SEG_HDR_ETH		= 0x00000001,
+	ICE_FLOW_SEG_HDR_VLAN		= 0x00000002,
+	ICE_FLOW_SEG_HDR_IPV4		= 0x00000004,
+	ICE_FLOW_SEG_HDR_IPV6		= 0x00000008,
+	ICE_FLOW_SEG_HDR_ARP		= 0x00000010,
+	ICE_FLOW_SEG_HDR_ICMP		= 0x00000020,
+	ICE_FLOW_SEG_HDR_TCP		= 0x00000040,
+	ICE_FLOW_SEG_HDR_UDP		= 0x00000080,
+	ICE_FLOW_SEG_HDR_SCTP		= 0x00000100,
+	ICE_FLOW_SEG_HDR_GRE		= 0x00000200,
+	ICE_FLOW_SEG_HDR_GTPC		= 0x00000400,
+	ICE_FLOW_SEG_HDR_GTPC_TEID	= 0x00000800,
+	ICE_FLOW_SEG_HDR_GTPU_IP	= 0x00001000,
+	ICE_FLOW_SEG_HDR_GTPU_EH	= 0x00002000,
+	ICE_FLOW_SEG_HDR_GTPU_DWN	= 0x00004000,
+	ICE_FLOW_SEG_HDR_GTPU_UP	= 0x00008000,
+	ICE_FLOW_SEG_HDR_PPPOE		= 0x00010000,
+	ICE_FLOW_SEG_HDR_PFCP_NODE	= 0x00020000,
+	ICE_FLOW_SEG_HDR_PFCP_SESSION	= 0x00040000,
+	ICE_FLOW_SEG_HDR_L2TPV3		= 0x00080000,
+	ICE_FLOW_SEG_HDR_ESP		= 0x00100000,
+	ICE_FLOW_SEG_HDR_AH		= 0x00200000,
+	ICE_FLOW_SEG_HDR_NAT_T_ESP	= 0x00400000,
+	ICE_FLOW_SEG_HDR_ETH_NON_IP	= 0x00800000,
+	/* The following is an additive bit for ICE_FLOW_SEG_HDR_IPV4 and
+	 * ICE_FLOW_SEG_HDR_IPV6 which include the IPV4 other PTYPEs
+	 */
+	ICE_FLOW_SEG_HDR_IPV_OTHER      = 0x20000000,
+};
+
+/* These segments all have the same PTYPES, but are otherwise distinguished by
+ * the value of the gtp_eh_pdu and gtp_eh_pdu_link flags:
+ *
+ *                                gtp_eh_pdu     gtp_eh_pdu_link
+ * ICE_FLOW_SEG_HDR_GTPU_IP           0              0
+ * ICE_FLOW_SEG_HDR_GTPU_EH           1              don't care
+ * ICE_FLOW_SEG_HDR_GTPU_DWN          1              0
+ * ICE_FLOW_SEG_HDR_GTPU_UP           1              1
+ */
+#define ICE_FLOW_SEG_HDR_GTPU (ICE_FLOW_SEG_HDR_GTPU_IP | \
+			       ICE_FLOW_SEG_HDR_GTPU_EH | \
+			       ICE_FLOW_SEG_HDR_GTPU_DWN | \
+			       ICE_FLOW_SEG_HDR_GTPU_UP)
+#define ICE_FLOW_SEG_HDR_PFCP (ICE_FLOW_SEG_HDR_PFCP_NODE | \
+			       ICE_FLOW_SEG_HDR_PFCP_SESSION)
+
+enum ice_flow_field {
+	/* L2 */
+	ICE_FLOW_FIELD_IDX_ETH_DA,
+	ICE_FLOW_FIELD_IDX_ETH_SA,
+	ICE_FLOW_FIELD_IDX_S_VLAN,
+	ICE_FLOW_FIELD_IDX_C_VLAN,
+	ICE_FLOW_FIELD_IDX_ETH_TYPE,
+	/* L3 */
+	ICE_FLOW_FIELD_IDX_IPV4_DSCP,
+	ICE_FLOW_FIELD_IDX_IPV6_DSCP,
+	ICE_FLOW_FIELD_IDX_IPV4_TTL,
+	ICE_FLOW_FIELD_IDX_IPV4_PROT,
+	ICE_FLOW_FIELD_IDX_IPV6_TTL,
+	ICE_FLOW_FIELD_IDX_IPV6_PROT,
+	ICE_FLOW_FIELD_IDX_IPV4_SA,
+	ICE_FLOW_FIELD_IDX_IPV4_DA,
+	ICE_FLOW_FIELD_IDX_IPV6_SA,
+	ICE_FLOW_FIELD_IDX_IPV6_DA,
+	/* L4 */
+	ICE_FLOW_FIELD_IDX_TCP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_TCP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_UDP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_UDP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT,
+	ICE_FLOW_FIELD_IDX_SCTP_DST_PORT,
+	ICE_FLOW_FIELD_IDX_TCP_FLAGS,
+	/* ARP */
+	ICE_FLOW_FIELD_IDX_ARP_SIP,
+	ICE_FLOW_FIELD_IDX_ARP_DIP,
+	ICE_FLOW_FIELD_IDX_ARP_SHA,
+	ICE_FLOW_FIELD_IDX_ARP_DHA,
+	ICE_FLOW_FIELD_IDX_ARP_OP,
+	/* ICMP */
+	ICE_FLOW_FIELD_IDX_ICMP_TYPE,
+	ICE_FLOW_FIELD_IDX_ICMP_CODE,
+	/* GRE */
+	ICE_FLOW_FIELD_IDX_GRE_KEYID,
+	/* GTPC_TEID */
+	ICE_FLOW_FIELD_IDX_GTPC_TEID,
+	/* GTPU_IP */
+	ICE_FLOW_FIELD_IDX_GTPU_IP_TEID,
+	/* GTPU_EH */
+	ICE_FLOW_FIELD_IDX_GTPU_EH_TEID,
+	ICE_FLOW_FIELD_IDX_GTPU_EH_QFI,
+	/* GTPU_UP */
+	ICE_FLOW_FIELD_IDX_GTPU_UP_TEID,
+	/* GTPU_DWN */
+	ICE_FLOW_FIELD_IDX_GTPU_DWN_TEID,
+	/* PPPoE */
+	ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID,
+	/* PFCP */
+	ICE_FLOW_FIELD_IDX_PFCP_SEID,
+	/* L2TPv3 */
+	ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID,
+	/* ESP */
+	ICE_FLOW_FIELD_IDX_ESP_SPI,
+	/* AH */
+	ICE_FLOW_FIELD_IDX_AH_SPI,
+	/* NAT_T ESP */
+	ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+	 /* The total number of enums must not exceed 64 */
+	ICE_FLOW_FIELD_IDX_MAX
+};
+
+/* Flow headers and fields for AVF support */
+enum ice_flow_avf_hdr_field {
+	/* Values 0 - 28 are reserved for future use */
+	ICE_AVF_FLOW_FIELD_INVALID		= 0,
+	ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP	= 29,
+	ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP,
+	ICE_AVF_FLOW_FIELD_IPV4_UDP,
+	ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK,
+	ICE_AVF_FLOW_FIELD_IPV4_TCP,
+	ICE_AVF_FLOW_FIELD_IPV4_SCTP,
+	ICE_AVF_FLOW_FIELD_IPV4_OTHER,
+	ICE_AVF_FLOW_FIELD_FRAG_IPV4,
+	/* Values 37-38 are reserved */
+	ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP	= 39,
+	ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP,
+	ICE_AVF_FLOW_FIELD_IPV6_UDP,
+	ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK,
+	ICE_AVF_FLOW_FIELD_IPV6_TCP,
+	ICE_AVF_FLOW_FIELD_IPV6_SCTP,
+	ICE_AVF_FLOW_FIELD_IPV6_OTHER,
+	ICE_AVF_FLOW_FIELD_FRAG_IPV6,
+	ICE_AVF_FLOW_FIELD_RSVD47,
+	ICE_AVF_FLOW_FIELD_FCOE_OX,
+	ICE_AVF_FLOW_FIELD_FCOE_RX,
+	ICE_AVF_FLOW_FIELD_FCOE_OTHER,
+	/* Values 51-62 are reserved */
+	ICE_AVF_FLOW_FIELD_L2_PAYLOAD		= 63,
+	ICE_AVF_FLOW_FIELD_MAX
+};
+
+/* Supported RSS offloads  This macro is defined to support
+ * VIRTCHNL_OP_GET_RSS_HENA_CAPS ops. PF driver sends the RSS hardware
+ * capabilities to the caller of this ops.
+ */
+#define ICE_DEFAULT_RSS_HENA ( \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_SCTP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_OTHER) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV4) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_SCTP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_OTHER) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_FRAG_IPV6) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV4_TCP_SYN_NO_ACK) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV4_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_IPV6_TCP_SYN_NO_ACK) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_UNICAST_IPV6_UDP) | \
+	BIT_ULL(ICE_AVF_FLOW_FIELD_MULTICAST_IPV6_UDP))
+
+enum ice_flow_dir {
+	ICE_FLOW_RX		= 0x02,
+};
+
+enum ice_flow_priority {
+	ICE_FLOW_PRIO_LOW,
+	ICE_FLOW_PRIO_NORMAL,
+	ICE_FLOW_PRIO_HIGH
+};
+
+#define ICE_FLOW_SEG_MAX		2
+#define ICE_FLOW_SEG_RAW_FLD_MAX	2
+#define ICE_FLOW_FV_EXTRACT_SZ		2
+
+#define ICE_FLOW_SET_HDRS(seg, val)	((seg)->hdrs |= (u32)(val))
+
+struct ice_flow_seg_xtrct {
+	u8 prot_id;	/* Protocol ID of extracted header field */
+	u16 off;	/* Starting offset of the field in header in bytes */
+	u8 idx;		/* Index of FV entry used */
+	u8 disp;	/* Displacement of field in bits fr. FV entry's start */
+	u16 mask;	/* Mask for field */
+};
+
+enum ice_flow_fld_match_type {
+	ICE_FLOW_FLD_TYPE_REG,		/* Value, mask */
+	ICE_FLOW_FLD_TYPE_RANGE,	/* Value, mask, last (upper bound) */
+	ICE_FLOW_FLD_TYPE_PREFIX,	/* IP address, prefix, size of prefix */
+	ICE_FLOW_FLD_TYPE_SIZE,		/* Value, mask, size of match */
+};
+
+struct ice_flow_fld_loc {
+	/* Describe offsets of field information relative to the beginning of
+	 * input buffer provided when adding flow entries.
+	 */
+	u16 val;	/* Offset where the value is located */
+	u16 mask;	/* Offset where the mask/prefix value is located */
+	u16 last;	/* Length or offset where the upper value is located */
+};
+
+struct ice_flow_fld_info {
+	enum ice_flow_fld_match_type type;
+	/* Location where to retrieve data from an input buffer */
+	struct ice_flow_fld_loc src;
+	/* Location where to put the data into the final entry buffer */
+	struct ice_flow_fld_loc entry;
+	struct ice_flow_seg_xtrct xtrct;
+};
+
+struct ice_flow_seg_fld_raw {
+	struct ice_flow_fld_info info;
+	u16 off;	/* Offset from the start of the segment */
+};
+
+struct ice_flow_seg_info {
+	u32 hdrs;	/* Bitmask indicating protocol headers present */
+	u64 match;	/* Bitmask indicating header fields to be matched */
+	u64 range;	/* Bitmask indicating header fields matched as ranges */
+
+	struct ice_flow_fld_info fields[ICE_FLOW_FIELD_IDX_MAX];
+
+	u8 raws_cnt;	/* Number of raw fields to be matched */
+	struct ice_flow_seg_fld_raw raws[ICE_FLOW_SEG_RAW_FLD_MAX];
+};
+
+/* This structure describes a flow entry, and is tracked only in this file */
+struct ice_flow_entry {
+	struct list_head l_entry;
+
+	u64 id;
+	struct ice_flow_prof *prof;
+	/* Flow entry's content */
+	void *entry;
+	enum ice_flow_priority priority;
+	u16 vsi_handle;
+	u16 entry_sz;
+};
+
+#define ICE_FLOW_ENTRY_HNDL(e)	((u64)(uintptr_t)e)
+#define ICE_FLOW_ENTRY_PTR(h)	((struct ice_flow_entry *)(uintptr_t)(h))
+
+struct ice_flow_prof {
+	struct list_head l_entry;
+
+	u64 id;
+	enum ice_flow_dir dir;
+	u8 segs_cnt;
+
+	/* Keep track of flow entries associated with this flow profile */
+	struct mutex entries_lock;
+	struct list_head entries;
+
+	struct ice_flow_seg_info segs[ICE_FLOW_SEG_MAX];
+
+	/* software VSI handles referenced by this flow profile */
+	DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+};
+
+struct ice_rss_cfg {
+	struct list_head l_entry;
+	/* bitmap of VSIs added to the RSS entry */
+	DECLARE_BITMAP(vsis, ICE_MAX_VSI);
+	u64 hashed_flds;
+	u32 packet_hdr;
+};
+
+int
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+		  u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+		  struct ice_flow_prof **prof);
+int ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id);
+int
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+		   u64 entry_id, u16 vsi, enum ice_flow_priority prio,
+		   void *data, u64 *entry_h);
+int ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk, u64 entry_h);
+void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range);
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+		     u16 val_loc, u16 mask_loc);
+int ice_flow_rem_vsi_prof(struct ice_hw *hw, u16 vsi_handle, u64 prof_id);
+void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle);
+int ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+int ice_add_avf_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds);
+int ice_rem_vsi_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
+int
+ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs);
+int
+ice_rem_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
+		u32 addl_hdrs);
+u64 ice_get_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u32 hdrs);
+#endif /* _ICE_FLOW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
new file mode 100644
index 0000000000..aff7a141c3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_fltr.h"
+
+/**
+ * ice_fltr_free_list - free filter lists helper
+ * @dev: pointer to the device struct
+ * @h: pointer to the list head to be freed
+ *
+ * Helper function to free filter lists previously created using
+ * ice_fltr_add_mac_to_list
+ */
+void ice_fltr_free_list(struct device *dev, struct list_head *h)
+{
+	struct ice_fltr_list_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, h, list_entry) {
+		list_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_fltr_add_entry_to_list - allocate and add filter entry to list
+ * @dev: pointer to device needed by alloc function
+ * @info: filter info struct that gets added to the passed in list
+ * @list: pointer to the list which contains MAC filters entry
+ */
+static int
+ice_fltr_add_entry_to_list(struct device *dev, struct ice_fltr_info *info,
+			   struct list_head *list)
+{
+	struct ice_fltr_list_entry *entry;
+
+	entry = devm_kzalloc(dev, sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->fltr_info = *info;
+
+	INIT_LIST_HEAD(&entry->list_entry);
+	list_add(&entry->list_entry, list);
+
+	return 0;
+}
+
+/**
+ * ice_fltr_set_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi: the VSI being configured
+ * @promisc_mask: mask of promiscuous config bits
+ *
+ * Set VSI with all associated VLANs to given promiscuous mode(s)
+ */
+int
+ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+			      u8 promisc_mask)
+{
+	struct ice_pf *pf = hw->back;
+	int result;
+
+	result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, false);
+	if (result && result != -EEXIST)
+		dev_err(ice_pf_to_dev(pf),
+			"Error setting promisc mode on VSI %i (rc=%d)\n",
+			vsi->vsi_num, result);
+
+	return result;
+}
+
+/**
+ * ice_fltr_clear_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi: the VSI being configured
+ * @promisc_mask: mask of promiscuous config bits
+ *
+ * Clear VSI with all associated VLANs to given promiscuous mode(s)
+ */
+int
+ice_fltr_clear_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+				u8 promisc_mask)
+{
+	struct ice_pf *pf = hw->back;
+	int result;
+
+	result = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_mask, true);
+	if (result && result != -EEXIST)
+		dev_err(ice_pf_to_dev(pf),
+			"Error clearing promisc mode on VSI %i (rc=%d)\n",
+			vsi->vsi_num, result);
+
+	return result;
+}
+
+/**
+ * ice_fltr_clear_vsi_promisc - clear specified promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to clear mode
+ * @promisc_mask: mask of promiscuous config bits to clear
+ * @vid: VLAN ID to clear VLAN promiscuous
+ */
+int
+ice_fltr_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			   u16 vid)
+{
+	struct ice_pf *pf = hw->back;
+	int result;
+
+	result = ice_clear_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+	if (result && result != -EEXIST)
+		dev_err(ice_pf_to_dev(pf),
+			"Error clearing promisc mode on VSI %i for VID %u (rc=%d)\n",
+			ice_get_hw_vsi_num(hw, vsi_handle), vid, result);
+
+	return result;
+}
+
+/**
+ * ice_fltr_set_vsi_promisc - set given VSI to given promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @vid: VLAN ID to set VLAN promiscuous
+ */
+int
+ice_fltr_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 u16 vid)
+{
+	struct ice_pf *pf = hw->back;
+	int result;
+
+	result = ice_set_vsi_promisc(hw, vsi_handle, promisc_mask, vid);
+	if (result && result != -EEXIST)
+		dev_err(ice_pf_to_dev(pf),
+			"Error setting promisc mode on VSI %i for VID %u (rc=%d)\n",
+			ice_get_hw_vsi_num(hw, vsi_handle), vid, result);
+
+	return result;
+}
+
+/**
+ * ice_fltr_add_mac_list - add list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+int ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_mac_list - remove list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+int ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_vlan_list - add list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int ice_fltr_add_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_vlan_list - remove list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int
+ice_fltr_remove_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_eth_list - add list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int ice_fltr_add_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_eth_list - remove list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static int ice_fltr_remove_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_all - remove all filters associated with VSI
+ * @vsi: pointer to VSI struct
+ */
+void ice_fltr_remove_all(struct ice_vsi *vsi)
+{
+	ice_remove_vsi_fltr(&vsi->back->hw, vsi->idx);
+	/* sync netdev filters if exist */
+	if (vsi->netdev) {
+		__dev_uc_unsync(vsi->netdev, NULL);
+		__dev_mc_unsync(vsi->netdev, NULL);
+	}
+}
+
+/**
+ * ice_fltr_add_mac_to_list - add MAC filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @mac: MAC address to add
+ * @action: filter action
+ */
+int
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 const u8 *mac, enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = ICE_FLTR_TX;
+	info.src_id = ICE_SRC_ID_VSI;
+	info.lkup_type = ICE_SW_LKUP_MAC;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+
+	ether_addr_copy(info.l_data.mac.mac_addr, mac);
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_add_vlan_to_list - add VLAN filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @vlan: VLAN filter details
+ */
+static int
+ice_fltr_add_vlan_to_list(struct ice_vsi *vsi, struct list_head *list,
+			  struct ice_vlan *vlan)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = ICE_FLTR_TX;
+	info.src_id = ICE_SRC_ID_VSI;
+	info.lkup_type = ICE_SW_LKUP_VLAN;
+	info.fltr_act = ICE_FWD_TO_VSI;
+	info.vsi_handle = vsi->idx;
+	info.l_data.vlan.vlan_id = vlan->vid;
+	info.l_data.vlan.tpid = vlan->tpid;
+	info.l_data.vlan.tpid_valid = true;
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_add_eth_to_list - add ethertype filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @ethertype: ethertype of packet that matches filter
+ * @flag: filter direction, Tx or Rx
+ * @action: filter action
+ */
+static int
+ice_fltr_add_eth_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 u16 ethertype, u16 flag,
+			 enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = flag;
+	info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+	info.l_data.ethertype_mac.ethertype = ethertype;
+
+	if (flag == ICE_FLTR_TX)
+		info.src_id = ICE_SRC_ID_VSI;
+	else
+		info.src_id = ICE_SRC_ID_LPORT;
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_prepare_mac - add or remove MAC rule
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static int
+ice_fltr_prepare_mac(struct ice_vsi *vsi, const u8 *mac,
+		     enum ice_sw_fwd_act_type action,
+		     int (*mac_action)(struct ice_vsi *, struct list_head *))
+{
+	LIST_HEAD(tmp_list);
+	int result;
+
+	if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action)) {
+		ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+		return -ENOMEM;
+	}
+
+	result = mac_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_mac_and_broadcast - add or remove MAC and broadcast filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static int
+ice_fltr_prepare_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+				   enum ice_sw_fwd_act_type action,
+				   int(*mac_action)
+				   (struct ice_vsi *, struct list_head *))
+{
+	u8 broadcast[ETH_ALEN];
+	LIST_HEAD(tmp_list);
+	int result;
+
+	eth_broadcast_addr(broadcast);
+	if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action) ||
+	    ice_fltr_add_mac_to_list(vsi, &tmp_list, broadcast, action)) {
+		ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+		return -ENOMEM;
+	}
+
+	result = mac_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_vlan - add or remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan: VLAN filter details
+ * @vlan_action: pointer to add or remove VLAN function
+ */
+static int
+ice_fltr_prepare_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan,
+		      int (*vlan_action)(struct ice_vsi *, struct list_head *))
+{
+	LIST_HEAD(tmp_list);
+	int result;
+
+	if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan))
+		return -ENOMEM;
+
+	result = vlan_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_eth - add or remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of packet to be filtered
+ * @flag: direction of packet, Tx or Rx
+ * @action: action to be performed on filter match
+ * @eth_action: pointer to add or remove ethertype function
+ */
+static int
+ice_fltr_prepare_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		     enum ice_sw_fwd_act_type action,
+		     int (*eth_action)(struct ice_vsi *, struct list_head *))
+{
+	LIST_HEAD(tmp_list);
+	int result;
+
+	if (ice_fltr_add_eth_to_list(vsi, &tmp_list, ethertype, flag, action))
+		return -ENOMEM;
+
+	result = eth_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_add_mac - add single MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+int ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+		     enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_add_mac_and_broadcast - add single MAC and broadcast
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+int
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+			       enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac_and_broadcast(vsi, mac, action,
+						  ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_remove_mac - remove MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: filter MAC to remove
+ * @action: action to remove
+ */
+int ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+			enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_remove_mac_list);
+}
+
+/**
+ * ice_fltr_add_vlan - add single VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan: VLAN filter details
+ */
+int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_add_vlan_list);
+}
+
+/**
+ * ice_fltr_remove_vlan - remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan: VLAN filter details
+ */
+int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	return ice_fltr_prepare_vlan(vsi, vlan, ice_fltr_remove_vlan_list);
+}
+
+/**
+ * ice_fltr_add_eth - add specyfic ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of packet to be filtered, Tx or Rx
+ * @action: action to be performed on filter match
+ */
+int ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		     enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+				    ice_fltr_add_eth_list);
+}
+
+/**
+ * ice_fltr_remove_eth - remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of filter
+ * @action: action to remove
+ */
+int ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+			enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+				    ice_fltr_remove_eth_list);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h
new file mode 100644
index 0000000000..0f3dbc308e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FLTR_H_
+#define _ICE_FLTR_H_
+
+#include "ice_vlan.h"
+
+void ice_fltr_free_list(struct device *dev, struct list_head *h);
+int
+ice_fltr_set_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+			      u8 promisc_mask);
+int
+ice_fltr_clear_vlan_vsi_promisc(struct ice_hw *hw, struct ice_vsi *vsi,
+				u8 promisc_mask);
+int
+ice_fltr_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			   u16 vid);
+int
+ice_fltr_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 u16 vid);
+int
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 const u8 *mac, enum ice_sw_fwd_act_type action);
+int
+ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+		 enum ice_sw_fwd_act_type action);
+int
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+			       enum ice_sw_fwd_act_type action);
+int ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list);
+int
+ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+		    enum ice_sw_fwd_act_type action);
+int ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list);
+
+int ice_fltr_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_fltr_remove_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+int
+ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		 enum ice_sw_fwd_act_type action);
+int
+ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		    enum ice_sw_fwd_act_type action);
+void ice_fltr_remove_all(struct ice_vsi *vsi);
+
+int
+ice_fltr_update_flags(struct ice_vsi *vsi, u16 rule_id, u16 recipe_id,
+		      u32 new_flags);
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
new file mode 100644
index 0000000000..319a2d6fe2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -0,0 +1,1054 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#include <asm/unaligned.h>
+#include <linux/uuid.h>
+#include <linux/crc32.h>
+#include <linux/pldmfw.h>
+#include "ice.h"
+#include "ice_fw_update.h"
+
+struct ice_fwu_priv {
+	struct pldmfw context;
+
+	struct ice_pf *pf;
+	struct netlink_ext_ack *extack;
+
+	/* Track which NVM banks to activate at the end of the update */
+	u8 activate_flags;
+
+	/* Track the firmware response of the required reset to complete the
+	 * flash update.
+	 *
+	 * 0 - ICE_AQC_NVM_POR_FLAG - A full power on is required
+	 * 1 - ICE_AQC_NVM_PERST_FLAG - A cold PCIe reset is required
+	 * 2 - ICE_AQC_NVM_EMPR_FLAG - An EMP reset is required
+	 */
+	u8 reset_level;
+
+	/* Track if EMP reset is available */
+	u8 emp_reset_available;
+};
+
+/**
+ * ice_send_package_data - Send record package data to firmware
+ * @context: PLDM fw update structure
+ * @data: pointer to the package data
+ * @length: length of the package data
+ *
+ * Send a copy of the package data associated with the PLDM record matching
+ * this device to the firmware.
+ *
+ * Note that this function sends an AdminQ command that will fail unless the
+ * NVM resource has been acquired.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_send_package_data(struct pldmfw *context, const u8 *data, u16 length)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct device *dev = context->dev;
+	struct ice_pf *pf = priv->pf;
+	struct ice_hw *hw = &pf->hw;
+	u8 *package_data;
+	int status;
+
+	dev_dbg(dev, "Sending PLDM record package data to firmware\n");
+
+	package_data = kmemdup(data, length, GFP_KERNEL);
+	if (!package_data)
+		return -ENOMEM;
+
+	status = ice_nvm_set_pkg_data(hw, false, package_data, length, NULL);
+
+	kfree(package_data);
+
+	if (status) {
+		dev_err(dev, "Failed to send record package data to firmware, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to record package data to firmware");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_check_component_response - Report firmware response to a component
+ * @pf: device private data structure
+ * @id: component id being checked
+ * @response: indicates whether this component can be updated
+ * @code: code indicating reason for response
+ * @extack: netlink extended ACK structure
+ *
+ * Check whether firmware indicates if this component can be updated. Report
+ * a suitable error message over the netlink extended ACK if the component
+ * cannot be updated.
+ *
+ * Returns: zero if the component can be updated, or -ECANCELED of the
+ * firmware indicates the component cannot be updated.
+ */
+static int
+ice_check_component_response(struct ice_pf *pf, u16 id, u8 response, u8 code,
+			     struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	const char *component;
+
+	switch (id) {
+	case NVM_COMP_ID_OROM:
+		component = "fw.undi";
+		break;
+	case NVM_COMP_ID_NVM:
+		component = "fw.mgmt";
+		break;
+	case NVM_COMP_ID_NETLIST:
+		component = "fw.netlist";
+		break;
+	default:
+		WARN(1, "Unexpected unknown component identifier 0x%02x", id);
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "%s: firmware response 0x%x, code 0x%x\n",
+		component, response, code);
+
+	switch (response) {
+	case ICE_AQ_NVM_PASS_COMP_CAN_BE_UPDATED:
+		/* firmware indicated this update is good to proceed */
+		return 0;
+	case ICE_AQ_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE:
+		dev_warn(dev, "firmware recommends not updating %s, as it may result in a downgrade. continuing anyways\n", component);
+		return 0;
+	case ICE_AQ_NVM_PASS_COMP_CAN_NOT_BE_UPDATED:
+		dev_info(dev, "firmware has rejected updating %s\n", component);
+		break;
+	}
+
+	switch (code) {
+	case ICE_AQ_NVM_PASS_COMP_STAMP_IDENTICAL_CODE:
+		dev_err(dev, "Component comparison stamp for %s is identical to the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is identical to running image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_STAMP_LOWER:
+		dev_err(dev, "Component comparison stamp for %s is lower than the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is lower than running image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_INVALID_STAMP_CODE:
+		dev_err(dev, "Component comparison stamp for %s is invalid\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component comparison stamp is invalid");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_CONFLICT_CODE:
+		dev_err(dev, "%s conflicts with a previous component table\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component table conflict occurred");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE:
+		dev_err(dev, "Pre-requisites for component %s have not been met\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component pre-requisites not met");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_NOT_SUPPORTED_CODE:
+		dev_err(dev, "%s is not a supported component\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component not supported");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE:
+		dev_err(dev, "Security restrictions prevent %s from being downgraded\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component cannot be downgraded");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE:
+		dev_err(dev, "Received an incomplete component image for %s\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Incomplete component image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE:
+		dev_err(dev, "Component version for %s is identical to the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component version is identical to running image");
+		break;
+	case ICE_AQ_NVM_PASS_COMP_VER_STR_LOWER_CODE:
+		dev_err(dev, "Component version for %s is lower than the running image\n",
+			component);
+		NL_SET_ERR_MSG_MOD(extack, "Component version is lower than the running image");
+		break;
+	default:
+		dev_err(dev, "Unexpected response code 0x02%x for %s\n",
+			code, component);
+		NL_SET_ERR_MSG_MOD(extack, "Received unexpected response code from firmware");
+		break;
+	}
+
+	return -ECANCELED;
+}
+
+/**
+ * ice_send_component_table - Send PLDM component table to firmware
+ * @context: PLDM fw update structure
+ * @component: the component to process
+ * @transfer_flag: relative transfer order of this component
+ *
+ * Read relevant data from the component and forward it to the device
+ * firmware. Check the response to determine if the firmware indicates that
+ * the update can proceed.
+ *
+ * This function sends AdminQ commands related to the NVM, and assumes that
+ * the NVM resource has been acquired.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_send_component_table(struct pldmfw *context, struct pldmfw_component *component,
+			 u8 transfer_flag)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct ice_aqc_nvm_comp_tbl *comp_tbl;
+	u8 comp_response, comp_response_code;
+	struct device *dev = context->dev;
+	struct ice_pf *pf = priv->pf;
+	struct ice_hw *hw = &pf->hw;
+	size_t length;
+	int status;
+
+	switch (component->identifier) {
+	case NVM_COMP_ID_OROM:
+	case NVM_COMP_ID_NVM:
+	case NVM_COMP_ID_NETLIST:
+		break;
+	default:
+		dev_err(dev, "Unable to update due to a firmware component with unknown ID %u\n",
+			component->identifier);
+		NL_SET_ERR_MSG_MOD(extack, "Unable to update due to unknown firmware component");
+		return -EOPNOTSUPP;
+	}
+
+	length = struct_size(comp_tbl, cvs, component->version_len);
+	comp_tbl = kzalloc(length, GFP_KERNEL);
+	if (!comp_tbl)
+		return -ENOMEM;
+
+	comp_tbl->comp_class = cpu_to_le16(component->classification);
+	comp_tbl->comp_id = cpu_to_le16(component->identifier);
+	comp_tbl->comp_class_idx = FWU_COMP_CLASS_IDX_NOT_USE;
+	comp_tbl->comp_cmp_stamp = cpu_to_le32(component->comparison_stamp);
+	comp_tbl->cvs_type = component->version_type;
+	comp_tbl->cvs_len = component->version_len;
+	memcpy(comp_tbl->cvs, component->version_string, component->version_len);
+
+	dev_dbg(dev, "Sending component table to firmware:\n");
+
+	status = ice_nvm_pass_component_tbl(hw, (u8 *)comp_tbl, length,
+					    transfer_flag, &comp_response,
+					    &comp_response_code, NULL);
+
+	kfree(comp_tbl);
+
+	if (status) {
+		dev_err(dev, "Failed to transfer component table to firmware, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to transfer component table to firmware");
+		return -EIO;
+	}
+
+	return ice_check_component_response(pf, component->identifier, comp_response,
+					    comp_response_code, extack);
+}
+
+/**
+ * ice_write_one_nvm_block - Write an NVM block and await completion response
+ * @pf: the PF data structure
+ * @module: the module to write to
+ * @offset: offset in bytes
+ * @block_size: size of the block to write, up to 4k
+ * @block: pointer to block of data to write
+ * @last_cmd: whether this is the last command
+ * @reset_level: storage for reset level required
+ * @extack: netlink extended ACK structure
+ *
+ * Write a block of data to a flash module, and await for the completion
+ * response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * On successful return, reset level indicates the device reset required to
+ * complete the update.
+ *
+ *   0 - ICE_AQC_NVM_POR_FLAG - A full power on is required
+ *   1 - ICE_AQC_NVM_PERST_FLAG - A cold PCIe reset is required
+ *   2 - ICE_AQC_NVM_EMPR_FLAG - An EMP reset is required
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
+			u16 block_size, u8 *block, bool last_cmd,
+			u8 *reset_level, struct netlink_ext_ack *extack)
+{
+	u16 completion_module, completion_retval;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_aq_task task = {};
+	struct ice_hw *hw = &pf->hw;
+	struct ice_aq_desc *desc;
+	u32 completion_offset;
+	int err;
+
+	dev_dbg(dev, "Writing block of %u bytes for module 0x%02x at offset %u\n",
+		block_size, module, offset);
+
+	ice_aq_prep_for_event(pf, &task, ice_aqc_opc_nvm_write);
+
+	err = ice_aq_update_nvm(hw, module, offset, block_size, block,
+				last_cmd, 0, NULL);
+	if (err) {
+		dev_err(dev, "Failed to flash module 0x%02x with block of size %u at offset %u, err %d aq_err %s\n",
+			module, block_size, offset, err,
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to program flash module");
+		return -EIO;
+	}
+
+	/* In most cases, firmware reports a write completion within a few
+	 * milliseconds. However, it has been observed that a completion might
+	 * take more than a second to complete in some cases. The timeout here
+	 * is conservative and is intended to prevent failure to update when
+	 * firmware is slow to respond.
+	 */
+	err = ice_aq_wait_for_event(pf, &task, 15 * HZ);
+	if (err) {
+		dev_err(dev, "Timed out while trying to flash module 0x%02x with block of size %u at offset %u, err %d\n",
+			module, block_size, offset, err);
+		NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+		return -EIO;
+	}
+
+	desc = &task.event.desc;
+	completion_module = le16_to_cpu(desc->params.nvm.module_typeid);
+	completion_retval = le16_to_cpu(desc->retval);
+
+	completion_offset = le16_to_cpu(desc->params.nvm.offset_low);
+	completion_offset |= desc->params.nvm.offset_high << 16;
+
+	if (completion_module != module) {
+		dev_err(dev, "Unexpected module_typeid in write completion: got 0x%x, expected 0x%x\n",
+			completion_module, module);
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+		return -EIO;
+	}
+
+	if (completion_offset != offset) {
+		dev_err(dev, "Unexpected offset in write completion: got %u, expected %u\n",
+			completion_offset, offset);
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+		return -EIO;
+	}
+
+	if (completion_retval) {
+		dev_err(dev, "Firmware failed to flash module 0x%02x with block of size %u at offset %u, err %s\n",
+			module, block_size, offset,
+			ice_aq_str((enum ice_aq_err)completion_retval));
+		NL_SET_ERR_MSG_MOD(extack, "Firmware failed to program flash module");
+		return -EIO;
+	}
+
+	/* For the last command to write the NVM bank, newer versions of
+	 * firmware indicate the required level of reset to complete
+	 * activation of firmware. If the firmware supports this, cache the
+	 * response for indicating to the user later. Otherwise, assume that
+	 * a full power cycle is required.
+	 */
+	if (reset_level && last_cmd && module == ICE_SR_1ST_NVM_BANK_PTR) {
+		if (hw->dev_caps.common_cap.pcie_reset_avoidance) {
+			*reset_level = desc->params.nvm.cmd_flags &
+				       ICE_AQC_NVM_RESET_LVL_M;
+			dev_dbg(dev, "Firmware reported required reset level as %u\n",
+				*reset_level);
+		} else {
+			*reset_level = ICE_AQC_NVM_POR_FLAG;
+			dev_dbg(dev, "Firmware doesn't support indicating required reset level. Assuming a power cycle is required\n");
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_write_nvm_module - Write data to an NVM module
+ * @pf: the PF driver structure
+ * @module: the module id to program
+ * @component: the name of the component being updated
+ * @image: buffer of image data to write to the NVM
+ * @length: length of the buffer
+ * @reset_level: storage for reset level required
+ * @extack: netlink extended ACK structure
+ *
+ * Loop over the data for a given NVM module and program it in 4 Kb
+ * blocks. Notify devlink core of progress after each block is programmed.
+ * Loops over a block of data and programs the NVM in 4k block chunks.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_write_nvm_module(struct ice_pf *pf, u16 module, const char *component,
+		     const u8 *image, u32 length, u8 *reset_level,
+		     struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct devlink *devlink;
+	u32 offset = 0;
+	bool last_cmd;
+	u8 *block;
+	int err;
+
+	dev_dbg(dev, "Beginning write of flash component '%s', module 0x%02x\n", component, module);
+
+	devlink = priv_to_devlink(pf);
+
+	devlink_flash_update_status_notify(devlink, "Flashing",
+					   component, 0, length);
+
+	block = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!block)
+		return -ENOMEM;
+
+	do {
+		u32 block_size;
+
+		block_size = min_t(u32, ICE_AQ_MAX_BUF_LEN, length - offset);
+		last_cmd = !(offset + block_size < length);
+
+		/* ice_aq_update_nvm may copy the firmware response into the
+		 * buffer, so we must make a copy since the source data is
+		 * constant.
+		 */
+		memcpy(block, image + offset, block_size);
+
+		err = ice_write_one_nvm_block(pf, module, offset, block_size,
+					      block, last_cmd, reset_level,
+					      extack);
+		if (err)
+			break;
+
+		offset += block_size;
+
+		devlink_flash_update_status_notify(devlink, "Flashing",
+						   component, offset, length);
+	} while (!last_cmd);
+
+	dev_dbg(dev, "Completed write of flash component '%s', module 0x%02x\n", component, module);
+
+	if (err)
+		devlink_flash_update_status_notify(devlink, "Flashing failed",
+						   component, length, length);
+	else
+		devlink_flash_update_status_notify(devlink, "Flashing done",
+						   component, length, length);
+
+	kfree(block);
+	return err;
+}
+
+/* Length in seconds to wait before timing out when erasing a flash module.
+ * Yes, erasing really can take minutes to complete.
+ */
+#define ICE_FW_ERASE_TIMEOUT 300
+
+/**
+ * ice_erase_nvm_module - Erase an NVM module and await firmware completion
+ * @pf: the PF data structure
+ * @module: the module to erase
+ * @component: name of the component being updated
+ * @extack: netlink extended ACK structure
+ *
+ * Erase the inactive NVM bank associated with this module, and await for
+ * a completion response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_erase_nvm_module(struct ice_pf *pf, u16 module, const char *component,
+		     struct netlink_ext_ack *extack)
+{
+	u16 completion_module, completion_retval;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_aq_task task = {};
+	struct ice_hw *hw = &pf->hw;
+	struct ice_aq_desc *desc;
+	struct devlink *devlink;
+	int err;
+
+	dev_dbg(dev, "Beginning erase of flash component '%s', module 0x%02x\n", component, module);
+
+	devlink = priv_to_devlink(pf);
+
+	devlink_flash_update_timeout_notify(devlink, "Erasing", component, ICE_FW_ERASE_TIMEOUT);
+
+	ice_aq_prep_for_event(pf, &task, ice_aqc_opc_nvm_erase);
+
+	err = ice_aq_erase_nvm(hw, module, NULL);
+	if (err) {
+		dev_err(dev, "Failed to erase %s (module 0x%02x), err %d aq_err %s\n",
+			component, module, err,
+			ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to erase flash module");
+		err = -EIO;
+		goto out_notify_devlink;
+	}
+
+	err = ice_aq_wait_for_event(pf, &task, ICE_FW_ERASE_TIMEOUT * HZ);
+	if (err) {
+		dev_err(dev, "Timed out waiting for firmware to respond with erase completion for %s (module 0x%02x), err %d\n",
+			component, module, err);
+		NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+		goto out_notify_devlink;
+	}
+
+	desc = &task.event.desc;
+	completion_module = le16_to_cpu(desc->params.nvm.module_typeid);
+	completion_retval = le16_to_cpu(desc->retval);
+
+	if (completion_module != module) {
+		dev_err(dev, "Unexpected module_typeid in erase completion for %s: got 0x%x, expected 0x%x\n",
+			component, completion_module, module);
+		NL_SET_ERR_MSG_MOD(extack, "Unexpected firmware response");
+		err = -EIO;
+		goto out_notify_devlink;
+	}
+
+	if (completion_retval) {
+		dev_err(dev, "Firmware failed to erase %s (module 0x02%x), aq_err %s\n",
+			component, module,
+			ice_aq_str((enum ice_aq_err)completion_retval));
+		NL_SET_ERR_MSG_MOD(extack, "Firmware failed to erase flash");
+		err = -EIO;
+		goto out_notify_devlink;
+	}
+
+	dev_dbg(dev, "Completed erase of flash component '%s', module 0x%02x\n", component, module);
+
+out_notify_devlink:
+	if (err)
+		devlink_flash_update_status_notify(devlink, "Erasing failed",
+						   component, 0, 0);
+	else
+		devlink_flash_update_status_notify(devlink, "Erasing done",
+						   component, 0, 0);
+
+	return err;
+}
+
+/**
+ * ice_switch_flash_banks - Tell firmware to switch NVM banks
+ * @pf: Pointer to the PF data structure
+ * @activate_flags: flags used for the activation command
+ * @emp_reset_available: on return, indicates if EMP reset is available
+ * @extack: netlink extended ACK structure
+ *
+ * Notify firmware to activate the newly written flash banks, and wait for the
+ * firmware response.
+ *
+ * Returns: zero on success or an error code on failure.
+ */
+static int
+ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags,
+		       u8 *emp_reset_available, struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_aq_task task = {};
+	struct ice_hw *hw = &pf->hw;
+	u16 completion_retval;
+	u8 response_flags;
+	int err;
+
+	ice_aq_prep_for_event(pf, &task, ice_aqc_opc_nvm_write_activate);
+
+	err = ice_nvm_write_activate(hw, activate_flags, &response_flags);
+	if (err) {
+		dev_err(dev, "Failed to switch active flash banks, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to switch active flash banks");
+		return -EIO;
+	}
+
+	/* Newer versions of firmware have support to indicate whether an EMP
+	 * reset to reload firmware is available. For older firmware, EMP
+	 * reset is always available.
+	 */
+	if (emp_reset_available) {
+		if (hw->dev_caps.common_cap.reset_restrict_support) {
+			*emp_reset_available = response_flags & ICE_AQC_NVM_EMPR_ENA;
+			dev_dbg(dev, "Firmware indicated that EMP reset is %s\n",
+				*emp_reset_available ?
+				"available" : "not available");
+		} else {
+			*emp_reset_available = ICE_AQC_NVM_EMPR_ENA;
+			dev_dbg(dev, "Firmware does not support restricting EMP reset availability\n");
+		}
+	}
+
+	err = ice_aq_wait_for_event(pf, &task, 30 * HZ);
+	if (err) {
+		dev_err(dev, "Timed out waiting for firmware to switch active flash banks, err %d\n",
+			err);
+		NL_SET_ERR_MSG_MOD(extack, "Timed out waiting for firmware");
+		return err;
+	}
+
+	completion_retval = le16_to_cpu(task.event.desc.retval);
+	if (completion_retval) {
+		dev_err(dev, "Firmware failed to switch active flash banks aq_err %s\n",
+			ice_aq_str((enum ice_aq_err)completion_retval));
+		NL_SET_ERR_MSG_MOD(extack, "Firmware failed to switch active flash banks");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_flash_component - Flash a component of the NVM
+ * @context: PLDM fw update structure
+ * @component: the component table to program
+ *
+ * Program the flash contents for a given component. First, determine the
+ * module id. Then, erase the secondary bank for this module. Finally, write
+ * the contents of the component to the NVM.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_flash_component(struct pldmfw *context, struct pldmfw_component *component)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct ice_pf *pf = priv->pf;
+	const char *name;
+	u8 *reset_level;
+	u16 module;
+	u8 flag;
+	int err;
+
+	switch (component->identifier) {
+	case NVM_COMP_ID_OROM:
+		module = ICE_SR_1ST_OROM_BANK_PTR;
+		flag = ICE_AQC_NVM_ACTIV_SEL_OROM;
+		reset_level = NULL;
+		name = "fw.undi";
+		break;
+	case NVM_COMP_ID_NVM:
+		module = ICE_SR_1ST_NVM_BANK_PTR;
+		flag = ICE_AQC_NVM_ACTIV_SEL_NVM;
+		reset_level = &priv->reset_level;
+		name = "fw.mgmt";
+		break;
+	case NVM_COMP_ID_NETLIST:
+		module = ICE_SR_NETLIST_BANK_PTR;
+		flag = ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+		reset_level = NULL;
+		name = "fw.netlist";
+		break;
+	default:
+		/* This should not trigger, since we check the id before
+		 * sending the component table to firmware.
+		 */
+		WARN(1, "Unexpected unknown component identifier 0x%02x",
+		     component->identifier);
+		return -EINVAL;
+	}
+
+	/* Mark this component for activating at the end */
+	priv->activate_flags |= flag;
+
+	err = ice_erase_nvm_module(pf, module, name, extack);
+	if (err)
+		return err;
+
+	return ice_write_nvm_module(pf, module, name, component->component_data,
+				    component->component_size, reset_level,
+				    extack);
+}
+
+/**
+ * ice_finalize_update - Perform last steps to complete device update
+ * @context: PLDM fw update structure
+ *
+ * Called as the last step of the update process. Complete the update by
+ * telling the firmware to switch active banks, and perform a reset of
+ * configured.
+ *
+ * Returns: 0 on success, or an error code on failure.
+ */
+static int ice_finalize_update(struct pldmfw *context)
+{
+	struct ice_fwu_priv *priv = container_of(context, struct ice_fwu_priv, context);
+	struct netlink_ext_ack *extack = priv->extack;
+	struct ice_pf *pf = priv->pf;
+	struct devlink *devlink;
+	int err;
+
+	/* Finally, notify firmware to activate the written NVM banks */
+	err = ice_switch_flash_banks(pf, priv->activate_flags,
+				     &priv->emp_reset_available, extack);
+	if (err)
+		return err;
+
+	devlink = priv_to_devlink(pf);
+
+	/* If the required reset is EMPR, but EMPR is disabled, report that
+	 * a reboot is required instead.
+	 */
+	if (priv->reset_level == ICE_AQC_NVM_EMPR_FLAG &&
+	    !priv->emp_reset_available) {
+		dev_dbg(ice_pf_to_dev(pf), "Firmware indicated EMP reset as sufficient, but EMP reset is disabled\n");
+		priv->reset_level = ICE_AQC_NVM_PERST_FLAG;
+	}
+
+	switch (priv->reset_level) {
+	case ICE_AQC_NVM_EMPR_FLAG:
+		devlink_flash_update_status_notify(devlink,
+						   "Activate new firmware by devlink reload",
+						   NULL, 0, 0);
+		break;
+	case ICE_AQC_NVM_PERST_FLAG:
+		devlink_flash_update_status_notify(devlink,
+						   "Activate new firmware by rebooting the system",
+						   NULL, 0, 0);
+		break;
+	case ICE_AQC_NVM_POR_FLAG:
+	default:
+		devlink_flash_update_status_notify(devlink,
+						   "Activate new firmware by power cycling the system",
+						   NULL, 0, 0);
+		break;
+	}
+
+	pf->fw_emp_reset_disabled = !priv->emp_reset_available;
+
+	return 0;
+}
+
+struct ice_pldm_pci_record_id {
+	u32 vendor;
+	u32 device;
+	u32 subsystem_vendor;
+	u32 subsystem_device;
+};
+
+/**
+ * ice_op_pci_match_record - Check if a PCI device matches the record
+ * @context: PLDM fw update structure
+ * @record: list of records extracted from the PLDM image
+ *
+ * Determine if the PCI device associated with this device matches the record
+ * data provided.
+ *
+ * Searches the descriptor TLVs and extracts the relevant descriptor data into
+ * a pldm_pci_record_id. This is then compared against the PCI device ID
+ * information.
+ *
+ * Returns: true if the device matches the record, false otherwise.
+ */
+static bool
+ice_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record)
+{
+	struct pci_dev *pdev = to_pci_dev(context->dev);
+	struct ice_pldm_pci_record_id id = {
+		.vendor = PCI_ANY_ID,
+		.device = PCI_ANY_ID,
+		.subsystem_vendor = PCI_ANY_ID,
+		.subsystem_device = PCI_ANY_ID,
+	};
+	struct pldmfw_desc_tlv *desc;
+
+	list_for_each_entry(desc, &record->descs, entry) {
+		u16 value;
+		int *ptr;
+
+		switch (desc->type) {
+		case PLDM_DESC_ID_PCI_VENDOR_ID:
+			ptr = &id.vendor;
+			break;
+		case PLDM_DESC_ID_PCI_DEVICE_ID:
+			ptr = &id.device;
+			break;
+		case PLDM_DESC_ID_PCI_SUBVENDOR_ID:
+			ptr = &id.subsystem_vendor;
+			break;
+		case PLDM_DESC_ID_PCI_SUBDEV_ID:
+			ptr = &id.subsystem_device;
+			break;
+		default:
+			/* Skip unrelated TLVs */
+			continue;
+		}
+
+		value = get_unaligned_le16(desc->data);
+		/* A value of zero for one of the descriptors is sometimes
+		 * used when the record should ignore this field when matching
+		 * device. For example if the record applies to any subsystem
+		 * device or vendor.
+		 */
+		if (value)
+			*ptr = value;
+		else
+			*ptr = PCI_ANY_ID;
+	}
+
+	/* the E822 device can have a generic device ID so check for that */
+	if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) &&
+	    (id.device == PCI_ANY_ID || id.device == pdev->device ||
+	    id.device == ICE_DEV_ID_E822_SI_DFLT) &&
+	    (id.subsystem_vendor == PCI_ANY_ID ||
+	    id.subsystem_vendor == pdev->subsystem_vendor) &&
+	    (id.subsystem_device == PCI_ANY_ID ||
+	    id.subsystem_device == pdev->subsystem_device))
+		return true;
+
+	return false;
+}
+
+static const struct pldmfw_ops ice_fwu_ops_e810 = {
+	.match_record = &pldmfw_op_pci_match_record,
+	.send_package_data = &ice_send_package_data,
+	.send_component_table = &ice_send_component_table,
+	.flash_component = &ice_flash_component,
+	.finalize_update = &ice_finalize_update,
+};
+
+static const struct pldmfw_ops ice_fwu_ops_e822 = {
+	.match_record = &ice_op_pci_match_record,
+	.send_package_data = &ice_send_package_data,
+	.send_component_table = &ice_send_component_table,
+	.flash_component = &ice_flash_component,
+	.finalize_update = &ice_finalize_update,
+};
+
+/**
+ * ice_get_pending_updates - Check if the component has a pending update
+ * @pf: the PF driver structure
+ * @pending: on return, bitmap of updates pending
+ * @extack: Netlink extended ACK
+ *
+ * Check if the device has any pending updates on any flash components.
+ *
+ * Returns: zero on success, or a negative error code on failure. Updates
+ * pending with the bitmap of pending updates.
+ */
+int ice_get_pending_updates(struct ice_pf *pf, u8 *pending,
+			    struct netlink_ext_ack *extack)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw_dev_caps *dev_caps;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	dev_caps = kzalloc(sizeof(*dev_caps), GFP_KERNEL);
+	if (!dev_caps)
+		return -ENOMEM;
+
+	/* Read the most recent device capabilities from firmware. Do not use
+	 * the cached values in hw->dev_caps, because the pending update flag
+	 * may have changed, e.g. if an update was previously completed and
+	 * the system has not yet rebooted.
+	 */
+	err = ice_discover_dev_caps(hw, dev_caps);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to read device capabilities");
+		kfree(dev_caps);
+		return err;
+	}
+
+	*pending = 0;
+
+	if (dev_caps->common_cap.nvm_update_pending_nvm) {
+		dev_info(dev, "The fw.mgmt flash component has a pending update\n");
+		*pending |= ICE_AQC_NVM_ACTIV_SEL_NVM;
+	}
+
+	if (dev_caps->common_cap.nvm_update_pending_orom) {
+		dev_info(dev, "The fw.undi flash component has a pending update\n");
+		*pending |= ICE_AQC_NVM_ACTIV_SEL_OROM;
+	}
+
+	if (dev_caps->common_cap.nvm_update_pending_netlist) {
+		dev_info(dev, "The fw.netlist flash component has a pending update\n");
+		*pending |= ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+	}
+
+	kfree(dev_caps);
+
+	return 0;
+}
+
+/**
+ * ice_cancel_pending_update - Cancel any pending update for a component
+ * @pf: the PF driver structure
+ * @component: if not NULL, the name of the component being updated
+ * @extack: Netlink extended ACK structure
+ *
+ * Cancel any pending update for the specified component. If component is
+ * NULL, all device updates will be canceled.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+static int
+ice_cancel_pending_update(struct ice_pf *pf, const char *component,
+			  struct netlink_ext_ack *extack)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	u8 pending;
+	int err;
+
+	err = ice_get_pending_updates(pf, &pending, extack);
+	if (err)
+		return err;
+
+	/* If the flash_update request is for a specific component, ignore all
+	 * of the other components.
+	 */
+	if (component) {
+		if (strcmp(component, "fw.mgmt") == 0)
+			pending &= ICE_AQC_NVM_ACTIV_SEL_NVM;
+		else if (strcmp(component, "fw.undi") == 0)
+			pending &= ICE_AQC_NVM_ACTIV_SEL_OROM;
+		else if (strcmp(component, "fw.netlist") == 0)
+			pending &= ICE_AQC_NVM_ACTIV_SEL_NETLIST;
+		else
+			WARN(1, "Unexpected flash component %s", component);
+	}
+
+	/* There is no previous pending update, so this request may continue */
+	if (!pending)
+		return 0;
+
+	/* In order to allow overwriting a previous pending update, notify
+	 * firmware to cancel that update by issuing the appropriate command.
+	 */
+	devlink_flash_update_status_notify(devlink,
+					   "Canceling previous pending update",
+					   component, 0, 0);
+
+	err = ice_acquire_nvm(hw, ICE_RES_WRITE);
+	if (err) {
+		dev_err(dev, "Failed to acquire device flash lock, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire device flash lock");
+		return err;
+	}
+
+	pending |= ICE_AQC_NVM_REVERT_LAST_ACTIV;
+	err = ice_switch_flash_banks(pf, pending, NULL, extack);
+
+	ice_release_nvm(hw);
+
+	/* Since we've canceled the pending update, we no longer know if EMP
+	 * reset is restricted.
+	 */
+	pf->fw_emp_reset_disabled = false;
+
+	return err;
+}
+
+/**
+ * ice_devlink_flash_update - Write a firmware image to the device
+ * @devlink: pointer to devlink associated with the device to update
+ * @params: devlink flash update parameters
+ * @extack: netlink extended ACK structure
+ *
+ * Parse the data for a given firmware file, verifying that it is a valid PLDM
+ * formatted image that matches this device.
+ *
+ * Extract the device record Package Data and Component Tables and send them
+ * to the firmware. Extract and write the flash data for each of the three
+ * main flash components, "fw.mgmt", "fw.undi", and "fw.netlist". Notify
+ * firmware once the data is written to the inactive banks.
+ *
+ * Returns: zero on success or a negative error code on failure.
+ */
+int ice_devlink_flash_update(struct devlink *devlink,
+			     struct devlink_flash_update_params *params,
+			     struct netlink_ext_ack *extack)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_fwu_priv priv;
+	u8 preservation;
+	int err;
+
+	if (!params->overwrite_mask) {
+		/* preserve all settings and identifiers */
+		preservation = ICE_AQC_NVM_PRESERVE_ALL;
+	} else if (params->overwrite_mask == DEVLINK_FLASH_OVERWRITE_SETTINGS) {
+		/* overwrite settings, but preserve the vital device identifiers */
+		preservation = ICE_AQC_NVM_PRESERVE_SELECTED;
+	} else if (params->overwrite_mask == (DEVLINK_FLASH_OVERWRITE_SETTINGS |
+					      DEVLINK_FLASH_OVERWRITE_IDENTIFIERS)) {
+		/* overwrite both settings and identifiers, preserve nothing */
+		preservation = ICE_AQC_NVM_NO_PRESERVATION;
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "Requested overwrite mask is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (!hw->dev_caps.common_cap.nvm_unified_update) {
+		NL_SET_ERR_MSG_MOD(extack, "Current firmware does not support unified update");
+		return -EOPNOTSUPP;
+	}
+
+	memset(&priv, 0, sizeof(priv));
+
+	/* the E822 device needs a slightly different ops */
+	if (hw->mac_type == ICE_MAC_GENERIC)
+		priv.context.ops = &ice_fwu_ops_e822;
+	else
+		priv.context.ops = &ice_fwu_ops_e810;
+	priv.context.dev = dev;
+	priv.extack = extack;
+	priv.pf = pf;
+	priv.activate_flags = preservation;
+
+	devlink_flash_update_status_notify(devlink, "Preparing to flash", NULL, 0, 0);
+
+	err = ice_cancel_pending_update(pf, NULL, extack);
+	if (err)
+		return err;
+
+	err = ice_acquire_nvm(hw, ICE_RES_WRITE);
+	if (err) {
+		dev_err(dev, "Failed to acquire device flash lock, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to acquire device flash lock");
+		return err;
+	}
+
+	err = pldmfw_flash_image(&priv.context, params->fw);
+	if (err == -ENOENT) {
+		dev_err(dev, "Firmware image has no record matching this device\n");
+		NL_SET_ERR_MSG_MOD(extack, "Firmware image has no record matching this device");
+	} else if (err) {
+		/* Do not set a generic extended ACK message here. A more
+		 * specific message may already have been set by one of our
+		 * ops.
+		 */
+		dev_err(dev, "Failed to flash PLDM image, err %d", err);
+	}
+
+	ice_release_nvm(hw);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.h b/drivers/net/ethernet/intel/ice/ice_fw_update.h
new file mode 100644
index 0000000000..7505748857
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2019, Intel Corporation. */
+
+#ifndef _ICE_FW_UPDATE_H_
+#define _ICE_FW_UPDATE_H_
+
+int ice_devlink_flash_update(struct devlink *devlink,
+			     struct devlink_flash_update_params *params,
+			     struct netlink_ext_ack *extack);
+int ice_get_pending_updates(struct ice_pf *pf, u8 *pending,
+			    struct netlink_ext_ack *extack);
+
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c
new file mode 100644
index 0000000000..75c9de675f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021-2022, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+
+/**
+ * ice_gnss_do_write - Write data to internal GNSS receiver
+ * @pf: board private structure
+ * @buf: command buffer
+ * @size: command buffer size
+ *
+ * Write UBX command data to the GNSS receiver
+ *
+ * Return:
+ * * number of bytes written - success
+ * * negative - error code
+ */
+static int
+ice_gnss_do_write(struct ice_pf *pf, const unsigned char *buf, unsigned int size)
+{
+	struct ice_aqc_link_topo_addr link_topo;
+	struct ice_hw *hw = &pf->hw;
+	unsigned int offset = 0;
+	int err = 0;
+
+	memset(&link_topo, 0, sizeof(struct ice_aqc_link_topo_addr));
+	link_topo.topo_params.index = ICE_E810T_GNSS_I2C_BUS;
+	link_topo.topo_params.node_type_ctx |=
+		FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
+			   ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE);
+
+	/* It's not possible to write a single byte to u-blox.
+	 * Write all bytes in a loop until there are 6 or less bytes left. If
+	 * there are exactly 6 bytes left, the last write would be only a byte.
+	 * In this case, do 4+2 bytes writes instead of 5+1. Otherwise, do the
+	 * last 2 to 5 bytes write.
+	 */
+	while (size - offset > ICE_GNSS_UBX_WRITE_BYTES + 1) {
+		err = ice_aq_write_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+				       cpu_to_le16(buf[offset]),
+				       ICE_MAX_I2C_WRITE_BYTES,
+				       &buf[offset + 1], NULL);
+		if (err)
+			goto err_out;
+
+		offset += ICE_GNSS_UBX_WRITE_BYTES;
+	}
+
+	/* Single byte would be written. Write 4 bytes instead of 5. */
+	if (size - offset == ICE_GNSS_UBX_WRITE_BYTES + 1) {
+		err = ice_aq_write_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+				       cpu_to_le16(buf[offset]),
+				       ICE_MAX_I2C_WRITE_BYTES - 1,
+				       &buf[offset + 1], NULL);
+		if (err)
+			goto err_out;
+
+		offset += ICE_GNSS_UBX_WRITE_BYTES - 1;
+	}
+
+	/* Do the last write, 2 to 5 bytes. */
+	err = ice_aq_write_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+			       cpu_to_le16(buf[offset]), size - offset - 1,
+			       &buf[offset + 1], NULL);
+	if (err)
+		goto err_out;
+
+	return size;
+
+err_out:
+	dev_err(ice_pf_to_dev(pf), "GNSS failed to write, offset=%u, size=%u, err=%d\n",
+		offset, size, err);
+
+	return err;
+}
+
+/**
+ * ice_gnss_read - Read data from internal GNSS module
+ * @work: GNSS read work structure
+ *
+ * Read the data from internal GNSS receiver, write it to gnss_dev.
+ */
+static void ice_gnss_read(struct kthread_work *work)
+{
+	struct gnss_serial *gnss = container_of(work, struct gnss_serial,
+						read_work.work);
+	unsigned long delay = ICE_GNSS_POLL_DATA_DELAY_TIME;
+	unsigned int i, bytes_read, data_len, count;
+	struct ice_aqc_link_topo_addr link_topo;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	__be16 data_len_b;
+	char *buf = NULL;
+	u8 i2c_params;
+	int err = 0;
+
+	pf = gnss->back;
+	if (!pf || !test_bit(ICE_FLAG_GNSS, pf->flags))
+		return;
+
+	hw = &pf->hw;
+
+	memset(&link_topo, 0, sizeof(struct ice_aqc_link_topo_addr));
+	link_topo.topo_params.index = ICE_E810T_GNSS_I2C_BUS;
+	link_topo.topo_params.node_type_ctx |=
+		FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
+			   ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE);
+
+	i2c_params = ICE_GNSS_UBX_DATA_LEN_WIDTH |
+		     ICE_AQC_I2C_USE_REPEATED_START;
+
+	err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+			      cpu_to_le16(ICE_GNSS_UBX_DATA_LEN_H),
+			      i2c_params, (u8 *)&data_len_b, NULL);
+	if (err)
+		goto requeue;
+
+	data_len = be16_to_cpu(data_len_b);
+	if (data_len == 0 || data_len == U16_MAX)
+		goto requeue;
+
+	/* The u-blox has data_len bytes for us to read */
+
+	data_len = min_t(typeof(data_len), data_len, PAGE_SIZE);
+
+	buf = (char *)get_zeroed_page(GFP_KERNEL);
+	if (!buf) {
+		err = -ENOMEM;
+		goto requeue;
+	}
+
+	/* Read received data */
+	for (i = 0; i < data_len; i += bytes_read) {
+		unsigned int bytes_left = data_len - i;
+
+		bytes_read = min_t(typeof(bytes_left), bytes_left,
+				   ICE_MAX_I2C_DATA_SIZE);
+
+		err = ice_aq_read_i2c(hw, link_topo, ICE_GNSS_UBX_I2C_BUS_ADDR,
+				      cpu_to_le16(ICE_GNSS_UBX_EMPTY_DATA),
+				      bytes_read, &buf[i], NULL);
+		if (err)
+			goto free_buf;
+	}
+
+	count = gnss_insert_raw(pf->gnss_dev, buf, i);
+	if (count != i)
+		dev_warn(ice_pf_to_dev(pf),
+			 "gnss_insert_raw ret=%d size=%d\n",
+			 count, i);
+	delay = ICE_GNSS_TIMER_DELAY_TIME;
+free_buf:
+	free_page((unsigned long)buf);
+requeue:
+	kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, delay);
+	if (err)
+		dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err);
+}
+
+/**
+ * ice_gnss_struct_init - Initialize GNSS receiver
+ * @pf: Board private structure
+ *
+ * Initialize GNSS structures and workers.
+ *
+ * Return:
+ * * pointer to initialized gnss_serial struct - success
+ * * NULL - error
+ */
+static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct kthread_worker *kworker;
+	struct gnss_serial *gnss;
+
+	gnss = kzalloc(sizeof(*gnss), GFP_KERNEL);
+	if (!gnss)
+		return NULL;
+
+	gnss->back = pf;
+	pf->gnss_serial = gnss;
+
+	kthread_init_delayed_work(&gnss->read_work, ice_gnss_read);
+	kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev));
+	if (IS_ERR(kworker)) {
+		kfree(gnss);
+		return NULL;
+	}
+
+	gnss->kworker = kworker;
+
+	return gnss;
+}
+
+/**
+ * ice_gnss_open - Open GNSS device
+ * @gdev: pointer to the gnss device struct
+ *
+ * Open GNSS device and start filling the read buffer for consumer.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - error code
+ */
+static int ice_gnss_open(struct gnss_device *gdev)
+{
+	struct ice_pf *pf = gnss_get_drvdata(gdev);
+	struct gnss_serial *gnss;
+
+	if (!pf)
+		return -EFAULT;
+
+	if (!test_bit(ICE_FLAG_GNSS, pf->flags))
+		return -EFAULT;
+
+	gnss = pf->gnss_serial;
+	if (!gnss)
+		return -ENODEV;
+
+	kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, 0);
+
+	return 0;
+}
+
+/**
+ * ice_gnss_close - Close GNSS device
+ * @gdev: pointer to the gnss device struct
+ *
+ * Close GNSS device, cancel worker, stop filling the read buffer.
+ */
+static void ice_gnss_close(struct gnss_device *gdev)
+{
+	struct ice_pf *pf = gnss_get_drvdata(gdev);
+	struct gnss_serial *gnss;
+
+	if (!pf)
+		return;
+
+	gnss = pf->gnss_serial;
+	if (!gnss)
+		return;
+
+	kthread_cancel_delayed_work_sync(&gnss->read_work);
+}
+
+/**
+ * ice_gnss_write - Write to GNSS device
+ * @gdev: pointer to the gnss device struct
+ * @buf: pointer to the user data
+ * @count: size of the buffer to be sent to the GNSS device
+ *
+ * Return:
+ * * number of written bytes - success
+ * * negative - error code
+ */
+static int
+ice_gnss_write(struct gnss_device *gdev, const unsigned char *buf,
+	       size_t count)
+{
+	struct ice_pf *pf = gnss_get_drvdata(gdev);
+	struct gnss_serial *gnss;
+
+	/* We cannot write a single byte using our I2C implementation. */
+	if (count <= 1 || count > ICE_GNSS_TTY_WRITE_BUF)
+		return -EINVAL;
+
+	if (!pf)
+		return -EFAULT;
+
+	if (!test_bit(ICE_FLAG_GNSS, pf->flags))
+		return -EFAULT;
+
+	gnss = pf->gnss_serial;
+	if (!gnss)
+		return -ENODEV;
+
+	return ice_gnss_do_write(pf, buf, count);
+}
+
+static const struct gnss_operations ice_gnss_ops = {
+	.open = ice_gnss_open,
+	.close = ice_gnss_close,
+	.write_raw = ice_gnss_write,
+};
+
+/**
+ * ice_gnss_register - Register GNSS receiver
+ * @pf: Board private structure
+ *
+ * Allocate and register GNSS receiver in the Linux GNSS subsystem.
+ *
+ * Return:
+ * * 0 - success
+ * * negative - error code
+ */
+static int ice_gnss_register(struct ice_pf *pf)
+{
+	struct gnss_device *gdev;
+	int ret;
+
+	gdev = gnss_allocate_device(ice_pf_to_dev(pf));
+	if (!gdev) {
+		dev_err(ice_pf_to_dev(pf),
+			"gnss_allocate_device returns NULL\n");
+		return -ENOMEM;
+	}
+
+	gdev->ops = &ice_gnss_ops;
+	gdev->type = GNSS_TYPE_UBX;
+	gnss_set_drvdata(gdev, pf);
+	ret = gnss_register_device(gdev);
+	if (ret) {
+		dev_err(ice_pf_to_dev(pf), "gnss_register_device err=%d\n",
+			ret);
+		gnss_put_device(gdev);
+	} else {
+		pf->gnss_dev = gdev;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_gnss_deregister - Deregister GNSS receiver
+ * @pf: Board private structure
+ *
+ * Deregister GNSS receiver from the Linux GNSS subsystem,
+ * release its resources.
+ */
+static void ice_gnss_deregister(struct ice_pf *pf)
+{
+	if (pf->gnss_dev) {
+		gnss_deregister_device(pf->gnss_dev);
+		gnss_put_device(pf->gnss_dev);
+		pf->gnss_dev = NULL;
+	}
+}
+
+/**
+ * ice_gnss_init - Initialize GNSS support
+ * @pf: Board private structure
+ */
+void ice_gnss_init(struct ice_pf *pf)
+{
+	int ret;
+
+	pf->gnss_serial = ice_gnss_struct_init(pf);
+	if (!pf->gnss_serial)
+		return;
+
+	ret = ice_gnss_register(pf);
+	if (!ret) {
+		set_bit(ICE_FLAG_GNSS, pf->flags);
+		dev_info(ice_pf_to_dev(pf), "GNSS init successful\n");
+	} else {
+		ice_gnss_exit(pf);
+		dev_err(ice_pf_to_dev(pf), "GNSS init failure\n");
+	}
+}
+
+/**
+ * ice_gnss_exit - Disable GNSS TTY support
+ * @pf: Board private structure
+ */
+void ice_gnss_exit(struct ice_pf *pf)
+{
+	ice_gnss_deregister(pf);
+	clear_bit(ICE_FLAG_GNSS, pf->flags);
+
+	if (pf->gnss_serial) {
+		struct gnss_serial *gnss = pf->gnss_serial;
+
+		kthread_cancel_delayed_work_sync(&gnss->read_work);
+		kthread_destroy_worker(gnss->kworker);
+		gnss->kworker = NULL;
+
+		kfree(gnss);
+		pf->gnss_serial = NULL;
+	}
+}
+
+/**
+ * ice_gnss_is_gps_present - Check if GPS HW is present
+ * @hw: pointer to HW struct
+ */
+bool ice_gnss_is_gps_present(struct ice_hw *hw)
+{
+	if (!hw->func_caps.ts_func_info.src_tmr_owned)
+		return false;
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+	if (ice_is_e810t(hw)) {
+		int err;
+		u8 data;
+
+		err = ice_read_pca9575_reg_e810t(hw, ICE_PCA9575_P0_IN, &data);
+		if (err || !!(data & ICE_E810T_P0_GNSS_PRSNT_N))
+			return false;
+	} else {
+		return false;
+	}
+#else
+	if (!ice_is_e810t(hw))
+		return false;
+#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+
+	return true;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h
new file mode 100644
index 0000000000..75e567ad70
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_gnss.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021-2022, Intel Corporation. */
+
+#ifndef _ICE_GNSS_H_
+#define _ICE_GNSS_H_
+
+#define ICE_E810T_GNSS_I2C_BUS		0x2
+#define ICE_GNSS_POLL_DATA_DELAY_TIME	(HZ / 50) /* poll every 20 ms */
+#define ICE_GNSS_TIMER_DELAY_TIME	(HZ / 10) /* 0.1 second per message */
+#define ICE_GNSS_TTY_WRITE_BUF		250
+#define ICE_MAX_I2C_DATA_SIZE		FIELD_MAX(ICE_AQC_I2C_DATA_SIZE_M)
+#define ICE_MAX_I2C_WRITE_BYTES		4
+
+/* u-blox ZED-F9T specific definitions */
+#define ICE_GNSS_UBX_I2C_BUS_ADDR	0x42
+/* Data length register is big endian */
+#define ICE_GNSS_UBX_DATA_LEN_H		0xFD
+#define ICE_GNSS_UBX_DATA_LEN_WIDTH	2
+#define ICE_GNSS_UBX_EMPTY_DATA		0xFF
+/* For u-blox writes are performed without address so the first byte to write is
+ * passed as I2C addr parameter.
+ */
+#define ICE_GNSS_UBX_WRITE_BYTES	(ICE_MAX_I2C_WRITE_BYTES + 1)
+
+/**
+ * struct gnss_serial - data used to initialize GNSS TTY port
+ * @back: back pointer to PF
+ * @kworker: kwork thread for handling periodic work
+ * @read_work: read_work function for handling GNSS reads
+ */
+struct gnss_serial {
+	struct ice_pf *back;
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work read_work;
+};
+
+#if IS_ENABLED(CONFIG_GNSS)
+void ice_gnss_init(struct ice_pf *pf);
+void ice_gnss_exit(struct ice_pf *pf);
+bool ice_gnss_is_gps_present(struct ice_hw *hw);
+#else
+static inline void ice_gnss_init(struct ice_pf *pf) { }
+static inline void ice_gnss_exit(struct ice_pf *pf) { }
+static inline bool ice_gnss_is_gps_present(struct ice_hw *hw)
+{
+	return false;
+}
+#endif /* IS_ENABLED(CONFIG_GNSS) */
+#endif /* _ICE_GNSS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
new file mode 100644
index 0000000000..531cc21947
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Machine-generated file */
+
+#ifndef _ICE_HW_AUTOGEN_H_
+#define _ICE_HW_AUTOGEN_H_
+
+#define QTX_COMM_DBELL(_DBQM)			(0x002C0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD(_DBQM)			(0x000E0000 + ((_DBQM) * 4))
+#define QTX_COMM_HEAD_HEAD_S			0
+#define QTX_COMM_HEAD_HEAD_M			ICE_M(0x1FFF, 0)
+#define PF_FW_ARQBAH				0x00080180
+#define PF_FW_ARQBAL				0x00080080
+#define PF_FW_ARQH				0x00080380
+#define PF_FW_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN				0x00080280
+#define PF_FW_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_FW_ARQLEN_ARQVFE_M			BIT(28)
+#define PF_FW_ARQLEN_ARQOVFL_M			BIT(29)
+#define PF_FW_ARQLEN_ARQCRIT_M			BIT(30)
+#define PF_FW_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_FW_ARQT				0x00080480
+#define PF_FW_ATQBAH				0x00080100
+#define PF_FW_ATQBAL				0x00080000
+#define PF_FW_ATQH				0x00080300
+#define PF_FW_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN				0x00080200
+#define PF_FW_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_FW_ATQLEN_ATQVFE_M			BIT(28)
+#define PF_FW_ATQLEN_ATQOVFL_M			BIT(29)
+#define PF_FW_ATQLEN_ATQCRIT_M			BIT(30)
+#define VF_MBX_ARQLEN(_VF)			(0x0022BC00 + ((_VF) * 4))
+#define VF_MBX_ATQLEN(_VF)			(0x0022A800 + ((_VF) * 4))
+#define PF_FW_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_FW_ATQT				0x00080400
+#define PF_MBX_ARQBAH				0x0022E400
+#define PF_MBX_ARQBAL				0x0022E380
+#define PF_MBX_ARQH				0x0022E500
+#define PF_MBX_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN				0x0022E480
+#define PF_MBX_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN_ARQCRIT_M			BIT(30)
+#define PF_MBX_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_MBX_ARQT				0x0022E580
+#define PF_MBX_ATQBAH				0x0022E180
+#define PF_MBX_ATQBAL				0x0022E100
+#define PF_MBX_ATQH				0x0022E280
+#define PF_MBX_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN				0x0022E200
+#define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN_ATQCRIT_M			BIT(30)
+#define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_MBX_ATQT				0x0022E300
+#define PF_SB_ARQBAH				0x0022FF00
+#define PF_SB_ARQBAH_ARQBAH_S			0
+#define PF_SB_ARQBAH_ARQBAH_M			ICE_M(0xFFFFFFFF, 0)
+#define PF_SB_ARQBAL				0x0022FE80
+#define PF_SB_ARQBAL_ARQBAL_LSB_S		0
+#define PF_SB_ARQBAL_ARQBAL_LSB_M		ICE_M(0x3F, 0)
+#define PF_SB_ARQBAL_ARQBAL_S			6
+#define PF_SB_ARQBAL_ARQBAL_M			ICE_M(0x3FFFFFF, 6)
+#define PF_SB_ARQH				0x00230000
+#define PF_SB_ARQH_ARQH_S			0
+#define PF_SB_ARQH_ARQH_M			ICE_M(0x3FF, 0)
+#define PF_SB_ARQLEN				0x0022FF80
+#define PF_SB_ARQLEN_ARQLEN_S			0
+#define PF_SB_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_SB_ARQLEN_ARQVFE_S			28
+#define PF_SB_ARQLEN_ARQVFE_M			BIT(28)
+#define PF_SB_ARQLEN_ARQOVFL_S			29
+#define PF_SB_ARQLEN_ARQOVFL_M			BIT(29)
+#define PF_SB_ARQLEN_ARQCRIT_S			30
+#define PF_SB_ARQLEN_ARQCRIT_M			BIT(30)
+#define PF_SB_ARQLEN_ARQENABLE_S		31
+#define PF_SB_ARQLEN_ARQENABLE_M		BIT(31)
+#define PF_SB_ARQT				0x00230080
+#define PF_SB_ARQT_ARQT_S			0
+#define PF_SB_ARQT_ARQT_M			ICE_M(0x3FF, 0)
+#define PF_SB_ATQBAH				0x0022FC80
+#define PF_SB_ATQBAH_ATQBAH_S			0
+#define PF_SB_ATQBAH_ATQBAH_M			ICE_M(0xFFFFFFFF, 0)
+#define PF_SB_ATQBAL				0x0022FC00
+#define PF_SB_ATQBAL_ATQBAL_S			6
+#define PF_SB_ATQBAL_ATQBAL_M			ICE_M(0x3FFFFFF, 6)
+#define PF_SB_ATQH				0x0022FD80
+#define PF_SB_ATQH_ATQH_S			0
+#define PF_SB_ATQH_ATQH_M			ICE_M(0x3FF, 0)
+#define PF_SB_ATQLEN				0x0022FD00
+#define PF_SB_ATQLEN_ATQLEN_S			0
+#define PF_SB_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_SB_ATQLEN_ATQVFE_S			28
+#define PF_SB_ATQLEN_ATQVFE_M			BIT(28)
+#define PF_SB_ATQLEN_ATQOVFL_S			29
+#define PF_SB_ATQLEN_ATQOVFL_M			BIT(29)
+#define PF_SB_ATQLEN_ATQCRIT_S			30
+#define PF_SB_ATQLEN_ATQCRIT_M			BIT(30)
+#define PF_SB_ATQLEN_ATQENABLE_S		31
+#define PF_SB_ATQLEN_ATQENABLE_M		BIT(31)
+#define PF_SB_ATQT				0x0022FE00
+#define PF_SB_ATQT_ATQT_S			0
+#define PF_SB_ATQT_ATQT_M			ICE_M(0x3FF, 0)
+#define PF_SB_REM_DEV_CTL			0x002300F0
+#define PRTDCB_GENC				0x00083000
+#define PRTDCB_GENC_PFCLDA_S			16
+#define PRTDCB_GENC_PFCLDA_M			ICE_M(0xFFFF, 16)
+#define PRTDCB_GENS				0x00083020
+#define PRTDCB_GENS_DCBX_STATUS_S		0
+#define PRTDCB_GENS_DCBX_STATUS_M		ICE_M(0x7, 0)
+#define PRTDCB_TUP2TC				0x001D26C0
+#define GL_PREEXT_L2_PMASK0(_i)			(0x0020F0FC + ((_i) * 4))
+#define GL_PREEXT_L2_PMASK1(_i)			(0x0020F108 + ((_i) * 4))
+#define GLFLXP_RXDID_FLAGS(_i, _j)              (0x0045D000 + ((_i) * 4 + (_j) * 256))
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S       0
+#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M       ICE_M(0x3F, 0)
+#define GLFLXP_RXDID_FLX_WRD_0(_i)		(0x0045c800 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_0_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_1(_i)		(0x0045c900 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_1_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_1_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_2(_i)		(0x0045ca00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_2_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_2_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define GLFLXP_RXDID_FLX_WRD_3(_i)		(0x0045cb00 + ((_i) * 4))
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_S	0
+#define GLFLXP_RXDID_FLX_WRD_3_PROT_MDID_M	ICE_M(0xFF, 0)
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_S	30
+#define GLFLXP_RXDID_FLX_WRD_3_RXDID_OPCODE_M	ICE_M(0x3, 30)
+#define QRXFLXP_CNTXT(_QRX)			(0x00480000 + ((_QRX) * 4))
+#define QRXFLXP_CNTXT_RXDID_IDX_S		0
+#define QRXFLXP_CNTXT_RXDID_IDX_M		ICE_M(0x3F, 0)
+#define QRXFLXP_CNTXT_RXDID_PRIO_S		8
+#define QRXFLXP_CNTXT_RXDID_PRIO_M		ICE_M(0x7, 8)
+#define QRXFLXP_CNTXT_TS_M			BIT(11)
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S		4
+#define GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M		ICE_M(0x3, 4)
+#define GLGEN_CLKSTAT_SRC			0x000B826C
+#define GLGEN_GPIO_CTL(_i)			(0x000880C8 + ((_i) * 4))
+#define GLGEN_GPIO_CTL_PIN_DIR_M		BIT(4)
+#define GLGEN_GPIO_CTL_PIN_FUNC_S		8
+#define GLGEN_GPIO_CTL_PIN_FUNC_M		ICE_M(0xF, 8)
+#define GLGEN_RSTAT				0x000B8188
+#define GLGEN_RSTAT_DEVSTATE_M			ICE_M(0x3, 0)
+#define GLGEN_RSTCTL				0x000B8180
+#define GLGEN_RSTCTL_GRSTDEL_S			0
+#define GLGEN_RSTCTL_GRSTDEL_M			ICE_M(0x3F, GLGEN_RSTCTL_GRSTDEL_S)
+#define GLGEN_RSTAT_RESET_TYPE_S		2
+#define GLGEN_RSTAT_RESET_TYPE_M		ICE_M(0x3, 2)
+#define GLGEN_RTRIG				0x000B8190
+#define GLGEN_RTRIG_CORER_M			BIT(0)
+#define GLGEN_RTRIG_GLOBR_M			BIT(1)
+#define GLGEN_STAT				0x000B612C
+#define GLGEN_VFLRSTAT(_i)			(0x00093A04 + ((_i) * 4))
+#define PFGEN_CTRL				0x00091000
+#define PFGEN_CTRL_PFSWR_M			BIT(0)
+#define PFGEN_STATE				0x00088000
+#define PRTGEN_STATUS				0x000B8100
+#define VFGEN_RSTAT(_VF)			(0x00074000 + ((_VF) * 4))
+#define VPGEN_VFRSTAT(_VF)			(0x00090800 + ((_VF) * 4))
+#define VPGEN_VFRSTAT_VFRD_M			BIT(0)
+#define VPGEN_VFRTRIG(_VF)			(0x00090000 + ((_VF) * 4))
+#define VPGEN_VFRTRIG_VFSWR_M			BIT(0)
+#define GLINT_CTL				0x0016CC54
+#define GLINT_CTL_DIS_AUTOMASK_M		BIT(0)
+#define GLINT_CTL_ITR_GRAN_200_S		16
+#define GLINT_CTL_ITR_GRAN_200_M		ICE_M(0xF, 16)
+#define GLINT_CTL_ITR_GRAN_100_S		20
+#define GLINT_CTL_ITR_GRAN_100_M		ICE_M(0xF, 20)
+#define GLINT_CTL_ITR_GRAN_50_S			24
+#define GLINT_CTL_ITR_GRAN_50_M			ICE_M(0xF, 24)
+#define GLINT_CTL_ITR_GRAN_25_S			28
+#define GLINT_CTL_ITR_GRAN_25_M			ICE_M(0xF, 28)
+#define GLINT_DYN_CTL(_INT)			(0x00160000 + ((_INT) * 4))
+#define GLINT_DYN_CTL_INTENA_M			BIT(0)
+#define GLINT_DYN_CTL_CLEARPBA_M		BIT(1)
+#define GLINT_DYN_CTL_SWINT_TRIG_M		BIT(2)
+#define GLINT_DYN_CTL_ITR_INDX_S		3
+#define GLINT_DYN_CTL_ITR_INDX_M		ICE_M(0x3, 3)
+#define GLINT_DYN_CTL_INTERVAL_S		5
+#define GLINT_DYN_CTL_INTERVAL_M		ICE_M(0xFFF, 5)
+#define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M		BIT(24)
+#define GLINT_DYN_CTL_SW_ITR_INDX_S		25
+#define GLINT_DYN_CTL_SW_ITR_INDX_M		ICE_M(0x3, 25)
+#define GLINT_DYN_CTL_WB_ON_ITR_M		BIT(30)
+#define GLINT_DYN_CTL_INTENA_MSK_M		BIT(31)
+#define GLINT_ITR(_i, _INT)			(0x00154000 + ((_i) * 8192 + (_INT) * 4))
+#define GLINT_RATE(_INT)			(0x0015A000 + ((_INT) * 4))
+#define GLINT_RATE_INTRL_ENA_M			BIT(6)
+#define GLINT_VECT2FUNC(_INT)			(0x00162000 + ((_INT) * 4))
+#define GLINT_VECT2FUNC_VF_NUM_S		0
+#define GLINT_VECT2FUNC_VF_NUM_M		ICE_M(0xFF, 0)
+#define GLINT_VECT2FUNC_PF_NUM_S		12
+#define GLINT_VECT2FUNC_PF_NUM_M		ICE_M(0x7, 12)
+#define GLINT_VECT2FUNC_IS_PF_S			16
+#define GLINT_VECT2FUNC_IS_PF_M			BIT(16)
+#define PFINT_FW_CTL				0x0016C800
+#define PFINT_FW_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_FW_CTL_ITR_INDX_S			11
+#define PFINT_FW_CTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define PFINT_FW_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_MBX_CTL				0x0016B280
+#define PFINT_MBX_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_MBX_CTL_ITR_INDX_S		11
+#define PFINT_MBX_CTL_ITR_INDX_M		ICE_M(0x3, 11)
+#define PFINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_OICR				0x0016CA00
+#define PFINT_OICR_TSYN_TX_M			BIT(11)
+#define PFINT_OICR_TSYN_EVNT_M			BIT(12)
+#define PFINT_OICR_ECC_ERR_M			BIT(16)
+#define PFINT_OICR_MAL_DETECT_M			BIT(19)
+#define PFINT_OICR_GRST_M			BIT(20)
+#define PFINT_OICR_PCI_EXCEPTION_M		BIT(21)
+#define PFINT_OICR_HMC_ERR_M			BIT(26)
+#define PFINT_OICR_PE_PUSH_M			BIT(27)
+#define PFINT_OICR_PE_CRITERR_M			BIT(28)
+#define PFINT_OICR_VFLR_M			BIT(29)
+#define PFINT_OICR_SWINT_M			BIT(31)
+#define PFINT_OICR_CTL				0x0016CA80
+#define PFINT_OICR_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_OICR_CTL_ITR_INDX_S		11
+#define PFINT_OICR_CTL_ITR_INDX_M		ICE_M(0x3, 11)
+#define PFINT_OICR_CTL_CAUSE_ENA_M		BIT(30)
+#define PFINT_OICR_ENA				0x0016C900
+#define PFINT_SB_CTL				0x0016B600
+#define PFINT_SB_CTL_MSIX_INDX_M		ICE_M(0x7FF, 0)
+#define PFINT_SB_CTL_CAUSE_ENA_M		BIT(30)
+#define QINT_RQCTL(_QRX)			(0x00150000 + ((_QRX) * 4))
+#define QINT_RQCTL_MSIX_INDX_S			0
+#define QINT_RQCTL_MSIX_INDX_M			ICE_M(0x7FF, 0)
+#define QINT_RQCTL_ITR_INDX_S			11
+#define QINT_RQCTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define QINT_RQCTL_CAUSE_ENA_M			BIT(30)
+#define QINT_TQCTL(_DBQM)			(0x00140000 + ((_DBQM) * 4))
+#define QINT_TQCTL_MSIX_INDX_S			0
+#define QINT_TQCTL_MSIX_INDX_M			ICE_M(0x7FF, 0)
+#define QINT_TQCTL_ITR_INDX_S			11
+#define QINT_TQCTL_ITR_INDX_M			ICE_M(0x3, 11)
+#define QINT_TQCTL_CAUSE_ENA_M			BIT(30)
+#define VPINT_ALLOC(_VF)			(0x001D1000 + ((_VF) * 4))
+#define VPINT_ALLOC_FIRST_S			0
+#define VPINT_ALLOC_FIRST_M			ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_LAST_S			12
+#define VPINT_ALLOC_LAST_M			ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_VALID_M			BIT(31)
+#define VPINT_ALLOC_PCI(_VF)			(0x0009D000 + ((_VF) * 4))
+#define VPINT_ALLOC_PCI_FIRST_S			0
+#define VPINT_ALLOC_PCI_FIRST_M			ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_PCI_LAST_S			12
+#define VPINT_ALLOC_PCI_LAST_M			ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_PCI_VALID_M			BIT(31)
+#define VPINT_MBX_CTL(_VSI)			(0x0016A000 + ((_VSI) * 4))
+#define VPINT_MBX_CTL_CAUSE_ENA_M		BIT(30)
+#define GLLAN_RCTL_0				0x002941F8
+#define QRX_CONTEXT(_i, _QRX)			(0x00280000 + ((_i) * 8192 + (_QRX) * 4))
+#define QRX_CTRL(_QRX)				(0x00120000 + ((_QRX) * 4))
+#define QRX_CTRL_MAX_INDEX			2047
+#define QRX_CTRL_QENA_REQ_S			0
+#define QRX_CTRL_QENA_REQ_M			BIT(0)
+#define QRX_CTRL_QENA_STAT_S			2
+#define QRX_CTRL_QENA_STAT_M			BIT(2)
+#define QRX_ITR(_QRX)				(0x00292000 + ((_QRX) * 4))
+#define QRX_TAIL(_QRX)				(0x00290000 + ((_QRX) * 4))
+#define QRX_TAIL_MAX_INDEX			2047
+#define QRX_TAIL_TAIL_S				0
+#define QRX_TAIL_TAIL_M				ICE_M(0x1FFF, 0)
+#define VPLAN_RX_QBASE(_VF)			(0x00072000 + ((_VF) * 4))
+#define VPLAN_RX_QBASE_VFFIRSTQ_S		0
+#define VPLAN_RX_QBASE_VFFIRSTQ_M		ICE_M(0x7FF, 0)
+#define VPLAN_RX_QBASE_VFNUMQ_S			16
+#define VPLAN_RX_QBASE_VFNUMQ_M			ICE_M(0xFF, 16)
+#define VPLAN_RXQ_MAPENA(_VF)			(0x00073000 + ((_VF) * 4))
+#define VPLAN_RXQ_MAPENA_RX_ENA_M		BIT(0)
+#define VPLAN_TX_QBASE(_VF)			(0x001D1800 + ((_VF) * 4))
+#define VPLAN_TX_QBASE_VFFIRSTQ_S		0
+#define VPLAN_TX_QBASE_VFFIRSTQ_M		ICE_M(0x3FFF, 0)
+#define VPLAN_TX_QBASE_VFNUMQ_S			16
+#define VPLAN_TX_QBASE_VFNUMQ_M			ICE_M(0xFF, 16)
+#define VPLAN_TXQ_MAPENA(_VF)			(0x00073800 + ((_VF) * 4))
+#define VPLAN_TXQ_MAPENA_TX_ENA_M		BIT(0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(_i)	(0x001E36E0 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX 8
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M ICE_M(0xFFFF, 0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3800 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M ICE_M(0xFFFF, 0)
+#define GL_MDCK_TX_TDPU				0x00049348
+#define GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M BIT(1)
+#define GL_MDET_RX				0x00294C00
+#define GL_MDET_RX_QNUM_S			0
+#define GL_MDET_RX_QNUM_M			ICE_M(0x7FFF, 0)
+#define GL_MDET_RX_VF_NUM_S			15
+#define GL_MDET_RX_VF_NUM_M			ICE_M(0xFF, 15)
+#define GL_MDET_RX_PF_NUM_S			23
+#define GL_MDET_RX_PF_NUM_M			ICE_M(0x7, 23)
+#define GL_MDET_RX_MAL_TYPE_S			26
+#define GL_MDET_RX_MAL_TYPE_M			ICE_M(0x1F, 26)
+#define GL_MDET_RX_VALID_M			BIT(31)
+#define GL_MDET_TX_PQM				0x002D2E00
+#define GL_MDET_TX_PQM_PF_NUM_S			0
+#define GL_MDET_TX_PQM_PF_NUM_M			ICE_M(0x7, 0)
+#define GL_MDET_TX_PQM_VF_NUM_S			4
+#define GL_MDET_TX_PQM_VF_NUM_M			ICE_M(0xFF, 4)
+#define GL_MDET_TX_PQM_QNUM_S			12
+#define GL_MDET_TX_PQM_QNUM_M			ICE_M(0x3FFF, 12)
+#define GL_MDET_TX_PQM_MAL_TYPE_S		26
+#define GL_MDET_TX_PQM_MAL_TYPE_M		ICE_M(0x1F, 26)
+#define GL_MDET_TX_PQM_VALID_M			BIT(31)
+#define GL_MDET_TX_TCLAN			0x000FC068
+#define GL_MDET_TX_TCLAN_QNUM_S			0
+#define GL_MDET_TX_TCLAN_QNUM_M			ICE_M(0x7FFF, 0)
+#define GL_MDET_TX_TCLAN_VF_NUM_S		15
+#define GL_MDET_TX_TCLAN_VF_NUM_M		ICE_M(0xFF, 15)
+#define GL_MDET_TX_TCLAN_PF_NUM_S		23
+#define GL_MDET_TX_TCLAN_PF_NUM_M		ICE_M(0x7, 23)
+#define GL_MDET_TX_TCLAN_MAL_TYPE_S		26
+#define GL_MDET_TX_TCLAN_MAL_TYPE_M		ICE_M(0x1F, 26)
+#define GL_MDET_TX_TCLAN_VALID_M		BIT(31)
+#define PF_MDET_RX				0x00294280
+#define PF_MDET_RX_VALID_M			BIT(0)
+#define PF_MDET_TX_PQM				0x002D2C80
+#define PF_MDET_TX_PQM_VALID_M			BIT(0)
+#define PF_MDET_TX_TCLAN			0x000FC000
+#define PF_MDET_TX_TCLAN_VALID_M		BIT(0)
+#define VP_MDET_RX(_VF)				(0x00294400 + ((_VF) * 4))
+#define VP_MDET_RX_VALID_M			BIT(0)
+#define VP_MDET_TX_PQM(_VF)			(0x002D2000 + ((_VF) * 4))
+#define VP_MDET_TX_PQM_VALID_M			BIT(0)
+#define VP_MDET_TX_TCLAN(_VF)			(0x000FB800 + ((_VF) * 4))
+#define VP_MDET_TX_TCLAN_VALID_M		BIT(0)
+#define VP_MDET_TX_TDPU(_VF)			(0x00040000 + ((_VF) * 4))
+#define VP_MDET_TX_TDPU_VALID_M			BIT(0)
+#define GL_MNG_FWSM				0x000B6134
+#define GL_MNG_FWSM_FW_LOADING_M		BIT(30)
+#define GLNVM_FLA				0x000B6108
+#define GLNVM_FLA_LOCKED_M			BIT(6)
+#define GLNVM_GENS				0x000B6100
+#define GLNVM_GENS_SR_SIZE_S			5
+#define GLNVM_GENS_SR_SIZE_M			ICE_M(0x7, 5)
+#define GLNVM_ULD				0x000B6008
+#define GLNVM_ULD_PCIER_DONE_M			BIT(0)
+#define GLNVM_ULD_PCIER_DONE_1_M		BIT(1)
+#define GLNVM_ULD_CORER_DONE_M			BIT(3)
+#define GLNVM_ULD_GLOBR_DONE_M			BIT(4)
+#define GLNVM_ULD_POR_DONE_M			BIT(5)
+#define GLNVM_ULD_POR_DONE_1_M			BIT(8)
+#define GLNVM_ULD_PCIER_DONE_2_M		BIT(9)
+#define GLNVM_ULD_PE_DONE_M			BIT(10)
+#define GLPCI_CNF2				0x000BE004
+#define GLPCI_CNF2_CACHELINE_SIZE_M		BIT(1)
+#define PF_FUNC_RID				0x0009E880
+#define PF_FUNC_RID_FUNC_NUM_S			0
+#define PF_FUNC_RID_FUNC_NUM_M			ICE_M(0x7, 0)
+#define PF_PCI_CIAA				0x0009E580
+#define PF_PCI_CIAA_VF_NUM_S			12
+#define PF_PCI_CIAD				0x0009E500
+#define GL_PWR_MODE_CTL				0x000B820C
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_S		30
+#define GL_PWR_MODE_CTL_CAR_MAX_BW_M		ICE_M(0x3, 30)
+#define GLQF_FD_CNT				0x00460018
+#define GLQF_FD_CNT_FD_BCNT_S			16
+#define GLQF_FD_CNT_FD_BCNT_M			ICE_M(0x7FFF, 16)
+#define GLQF_FD_SIZE				0x00460010
+#define GLQF_FD_SIZE_FD_GSIZE_S			0
+#define GLQF_FD_SIZE_FD_GSIZE_M			ICE_M(0x7FFF, 0)
+#define GLQF_FD_SIZE_FD_BSIZE_S			16
+#define GLQF_FD_SIZE_FD_BSIZE_M			ICE_M(0x7FFF, 16)
+#define GLQF_FDINSET(_i, _j)			(0x00412000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_FDMASK(_i)				(0x00410800 + ((_i) * 4))
+#define GLQF_FDMASK_MAX_INDEX			31
+#define GLQF_FDMASK_MSK_INDEX_S			0
+#define GLQF_FDMASK_MSK_INDEX_M			ICE_M(0x1F, 0)
+#define GLQF_FDMASK_MASK_S			16
+#define GLQF_FDMASK_MASK_M			ICE_M(0xFFFF, 16)
+#define GLQF_FDMASK_SEL(_i)			(0x00410400 + ((_i) * 4))
+#define GLQF_FDSWAP(_i, _j)			(0x00413000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_HMASK(_i)				(0x0040FC00 + ((_i) * 4))
+#define GLQF_HMASK_MAX_INDEX			31
+#define GLQF_HMASK_MSK_INDEX_S			0
+#define GLQF_HMASK_MSK_INDEX_M			ICE_M(0x1F, 0)
+#define GLQF_HMASK_MASK_S			16
+#define GLQF_HMASK_MASK_M			ICE_M(0xFFFF, 16)
+#define GLQF_HMASK_SEL(_i)			(0x00410000 + ((_i) * 4))
+#define GLQF_HMASK_SEL_MAX_INDEX		127
+#define GLQF_HMASK_SEL_MASK_SEL_S		0
+#define PFQF_FD_ENA				0x0043A000
+#define PFQF_FD_ENA_FD_ENA_M			BIT(0)
+#define PFQF_FD_SIZE				0x00460100
+#define GLDCB_RTCTQ_RXQNUM_S			0
+#define GLDCB_RTCTQ_RXQNUM_M			ICE_M(0x7FF, 0)
+#define GLPRT_BPRCL(_i)				(0x00381380 + ((_i) * 8))
+#define GLPRT_BPTCL(_i)				(0x00381240 + ((_i) * 8))
+#define GLPRT_CRCERRS(_i)			(0x00380100 + ((_i) * 8))
+#define GLPRT_GORCL(_i)				(0x00380000 + ((_i) * 8))
+#define GLPRT_GOTCL(_i)				(0x00380B40 + ((_i) * 8))
+#define GLPRT_ILLERRC(_i)			(0x003801C0 + ((_i) * 8))
+#define GLPRT_LXOFFRXC(_i)			(0x003802C0 + ((_i) * 8))
+#define GLPRT_LXOFFTXC(_i)			(0x00381180 + ((_i) * 8))
+#define GLPRT_LXONRXC(_i)			(0x00380280 + ((_i) * 8))
+#define GLPRT_LXONTXC(_i)			(0x00381140 + ((_i) * 8))
+#define GLPRT_MLFC(_i)				(0x00380040 + ((_i) * 8))
+#define GLPRT_MPRCL(_i)				(0x00381340 + ((_i) * 8))
+#define GLPRT_MPTCL(_i)				(0x00381200 + ((_i) * 8))
+#define GLPRT_MRFC(_i)				(0x00380080 + ((_i) * 8))
+#define GLPRT_PRC1023L(_i)			(0x00380A00 + ((_i) * 8))
+#define GLPRT_PRC127L(_i)			(0x00380940 + ((_i) * 8))
+#define GLPRT_PRC1522L(_i)			(0x00380A40 + ((_i) * 8))
+#define GLPRT_PRC255L(_i)			(0x00380980 + ((_i) * 8))
+#define GLPRT_PRC511L(_i)			(0x003809C0 + ((_i) * 8))
+#define GLPRT_PRC64L(_i)			(0x00380900 + ((_i) * 8))
+#define GLPRT_PRC9522L(_i)			(0x00380A80 + ((_i) * 8))
+#define GLPRT_PTC1023L(_i)			(0x00380C80 + ((_i) * 8))
+#define GLPRT_PTC127L(_i)			(0x00380BC0 + ((_i) * 8))
+#define GLPRT_PTC1522L(_i)			(0x00380CC0 + ((_i) * 8))
+#define GLPRT_PTC255L(_i)			(0x00380C00 + ((_i) * 8))
+#define GLPRT_PTC511L(_i)			(0x00380C40 + ((_i) * 8))
+#define GLPRT_PTC64L(_i)			(0x00380B80 + ((_i) * 8))
+#define GLPRT_PTC9522L(_i)			(0x00380D00 + ((_i) * 8))
+#define GLPRT_PXOFFRXC(_i, _j)			(0x00380500 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXOFFTXC(_i, _j)			(0x00380F40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONRXC(_i, _j)			(0x00380300 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_PXONTXC(_i, _j)			(0x00380D40 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_RFC(_i)				(0x00380AC0 + ((_i) * 8))
+#define GLPRT_RJC(_i)				(0x00380B00 + ((_i) * 8))
+#define GLPRT_RLEC(_i)				(0x00380140 + ((_i) * 8))
+#define GLPRT_ROC(_i)				(0x00380240 + ((_i) * 8))
+#define GLPRT_RUC(_i)				(0x00380200 + ((_i) * 8))
+#define GLPRT_RXON2OFFCNT(_i, _j)		(0x00380700 + ((_i) * 8 + (_j) * 64))
+#define GLPRT_TDOLD(_i)				(0x00381280 + ((_i) * 8))
+#define GLPRT_UPRCL(_i)				(0x00381300 + ((_i) * 8))
+#define GLPRT_UPTCL(_i)				(0x003811C0 + ((_i) * 8))
+#define GLSTAT_FD_CNT0L(_i)			(0x003A0000 + ((_i) * 8))
+#define GLV_BPRCL(_i)				(0x003B6000 + ((_i) * 8))
+#define GLV_BPTCL(_i)				(0x0030E000 + ((_i) * 8))
+#define GLV_GORCL(_i)				(0x003B0000 + ((_i) * 8))
+#define GLV_GOTCL(_i)				(0x00300000 + ((_i) * 8))
+#define GLV_MPRCL(_i)				(0x003B4000 + ((_i) * 8))
+#define GLV_MPTCL(_i)				(0x0030C000 + ((_i) * 8))
+#define GLV_RDPC(_i)				(0x00294C04 + ((_i) * 4))
+#define GLV_TEPC(_VSI)				(0x00312000 + ((_VSI) * 4))
+#define GLV_UPRCL(_i)				(0x003B2000 + ((_i) * 8))
+#define GLV_UPTCL(_i)				(0x0030A000 + ((_i) * 8))
+#define PRTRPB_RDPC				0x000AC260
+#define GLHH_ART_CTL				0x000A41D4
+#define GLHH_ART_CTL_ACTIVE_M			BIT(0)
+#define GLHH_ART_TIME_H				0x000A41D8
+#define GLHH_ART_TIME_L				0x000A41DC
+#define GLTSYN_AUX_IN_0(_i)			(0x000889D8 + ((_i) * 4))
+#define GLTSYN_AUX_IN_0_INT_ENA_M		BIT(4)
+#define GLTSYN_AUX_OUT_0(_i)			(0x00088998 + ((_i) * 4))
+#define GLTSYN_AUX_OUT_0_OUT_ENA_M		BIT(0)
+#define GLTSYN_AUX_OUT_0_OUTMOD_M		ICE_M(0x3, 1)
+#define GLTSYN_CLKO_0(_i)			(0x000889B8 + ((_i) * 4))
+#define GLTSYN_CMD				0x00088810
+#define GLTSYN_CMD_SYNC				0x00088814
+#define GLTSYN_ENA(_i)				(0x00088808 + ((_i) * 4))
+#define GLTSYN_ENA_TSYN_ENA_M			BIT(0)
+#define GLTSYN_EVNT_H_0(_i)			(0x00088970 + ((_i) * 4))
+#define GLTSYN_EVNT_L_0(_i)			(0x00088968 + ((_i) * 4))
+#define GLTSYN_HHTIME_H(_i)			(0x00088900 + ((_i) * 4))
+#define GLTSYN_HHTIME_L(_i)			(0x000888F8 + ((_i) * 4))
+#define GLTSYN_INCVAL_H(_i)			(0x00088920 + ((_i) * 4))
+#define GLTSYN_INCVAL_L(_i)			(0x00088918 + ((_i) * 4))
+#define GLTSYN_SHADJ_H(_i)			(0x00088910 + ((_i) * 4))
+#define GLTSYN_SHADJ_L(_i)			(0x00088908 + ((_i) * 4))
+#define GLTSYN_SHTIME_0(_i)			(0x000888E0 + ((_i) * 4))
+#define GLTSYN_SHTIME_H(_i)			(0x000888F0 + ((_i) * 4))
+#define GLTSYN_SHTIME_L(_i)			(0x000888E8 + ((_i) * 4))
+#define GLTSYN_STAT(_i)				(0x000888C0 + ((_i) * 4))
+#define GLTSYN_STAT_EVENT0_M			BIT(0)
+#define GLTSYN_STAT_EVENT1_M			BIT(1)
+#define GLTSYN_STAT_EVENT2_M			BIT(2)
+#define GLTSYN_SYNC_DLAY			0x00088818
+#define GLTSYN_TGT_H_0(_i)			(0x00088930 + ((_i) * 4))
+#define GLTSYN_TGT_L_0(_i)			(0x00088928 + ((_i) * 4))
+#define GLTSYN_TIME_H(_i)			(0x000888D8 + ((_i) * 4))
+#define GLTSYN_TIME_L(_i)			(0x000888D0 + ((_i) * 4))
+#define PFHH_SEM				0x000A4200 /* Reset Source: PFR */
+#define PFHH_SEM_BUSY_M				BIT(0)
+#define PFTSYN_SEM				0x00088880
+#define PFTSYN_SEM_BUSY_M			BIT(0)
+#define VSIQF_FD_CNT(_VSI)			(0x00464000 + ((_VSI) * 4))
+#define VSIQF_FD_CNT_FD_GCNT_S			0
+#define VSIQF_FD_CNT_FD_GCNT_M			ICE_M(0x3FFF, 0)
+#define VSIQF_FD_CNT_FD_BCNT_S			16
+#define VSIQF_FD_CNT_FD_BCNT_M			ICE_M(0x3FFF, 16)
+#define VSIQF_FD_SIZE(_VSI)			(0x00462000 + ((_VSI) * 4))
+#define VSIQF_HKEY_MAX_INDEX			12
+#define PFPM_APM				0x000B8080
+#define PFPM_APM_APME_M				BIT(0)
+#define PFPM_WUFC				0x0009DC00
+#define PFPM_WUFC_MAG_M				BIT(1)
+#define PFPM_WUS				0x0009DB80
+#define PFPM_WUS_LNKC_M				BIT(0)
+#define PFPM_WUS_MAG_M				BIT(1)
+#define PFPM_WUS_MNG_M				BIT(3)
+#define PFPM_WUS_FW_RST_WK_M			BIT(31)
+#define VFINT_DYN_CTLN(_i)			(0x00003800 + ((_i) * 4))
+#define VFINT_DYN_CTLN_CLEARPBA_M		BIT(1)
+
+#endif /* _ICE_HW_AUTOGEN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c
new file mode 100644
index 0000000000..145b27f2a4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_idc.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+/* Inter-Driver Communication */
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_dcb_lib.h"
+
+static DEFINE_XARRAY_ALLOC1(ice_aux_id);
+
+/**
+ * ice_get_auxiliary_drv - retrieve iidc_auxiliary_drv struct
+ * @pf: pointer to PF struct
+ *
+ * This function has to be called with a device_lock on the
+ * pf->adev.dev to avoid race conditions.
+ */
+static struct iidc_auxiliary_drv *ice_get_auxiliary_drv(struct ice_pf *pf)
+{
+	struct auxiliary_device *adev;
+
+	adev = pf->adev;
+	if (!adev || !adev->dev.driver)
+		return NULL;
+
+	return container_of(adev->dev.driver, struct iidc_auxiliary_drv,
+			    adrv.driver);
+}
+
+/**
+ * ice_send_event_to_aux - send event to RDMA AUX driver
+ * @pf: pointer to PF struct
+ * @event: event struct
+ */
+void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event)
+{
+	struct iidc_auxiliary_drv *iadrv;
+
+	if (WARN_ON_ONCE(!in_task()))
+		return;
+
+	mutex_lock(&pf->adev_mutex);
+	if (!pf->adev)
+		goto finish;
+
+	device_lock(&pf->adev->dev);
+	iadrv = ice_get_auxiliary_drv(pf);
+	if (iadrv && iadrv->event_handler)
+		iadrv->event_handler(pf, event);
+	device_unlock(&pf->adev->dev);
+finish:
+	mutex_unlock(&pf->adev_mutex);
+}
+
+/**
+ * ice_add_rdma_qset - Add Leaf Node for RDMA Qset
+ * @pf: PF struct
+ * @qset: Resource to be allocated
+ */
+int ice_add_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset)
+{
+	u16 max_rdmaqs[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_vsi *vsi;
+	struct device *dev;
+	u32 qset_teid;
+	u16 qs_handle;
+	int status;
+	int i;
+
+	if (WARN_ON(!pf || !qset))
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (!ice_is_rdma_ena(pf))
+		return -EINVAL;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi) {
+		dev_err(dev, "RDMA QSet invalid VSI\n");
+		return -EINVAL;
+	}
+
+	ice_for_each_traffic_class(i)
+		max_rdmaqs[i] = 0;
+
+	max_rdmaqs[qset->tc]++;
+	qs_handle = qset->qs_handle;
+
+	status = ice_cfg_vsi_rdma(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				  max_rdmaqs);
+	if (status) {
+		dev_err(dev, "Failed VSI RDMA Qset config\n");
+		return status;
+	}
+
+	status = ice_ena_vsi_rdma_qset(vsi->port_info, vsi->idx, qset->tc,
+				       &qs_handle, 1, &qset_teid);
+	if (status) {
+		dev_err(dev, "Failed VSI RDMA Qset enable\n");
+		return status;
+	}
+	vsi->qset_handle[qset->tc] = qset->qs_handle;
+	qset->teid = qset_teid;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ice_add_rdma_qset);
+
+/**
+ * ice_del_rdma_qset - Delete leaf node for RDMA Qset
+ * @pf: PF struct
+ * @qset: Resource to be freed
+ */
+int ice_del_rdma_qset(struct ice_pf *pf, struct iidc_rdma_qset_params *qset)
+{
+	struct ice_vsi *vsi;
+	u32 teid;
+	u16 q_id;
+
+	if (WARN_ON(!pf || !qset))
+		return -EINVAL;
+
+	vsi = ice_find_vsi(pf, qset->vport_id);
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "RDMA Invalid VSI\n");
+		return -EINVAL;
+	}
+
+	q_id = qset->qs_handle;
+	teid = qset->teid;
+
+	vsi->qset_handle[qset->tc] = 0;
+
+	return ice_dis_vsi_rdma_qset(vsi->port_info, 1, &teid, &q_id);
+}
+EXPORT_SYMBOL_GPL(ice_del_rdma_qset);
+
+/**
+ * ice_rdma_request_reset - accept request from RDMA to perform a reset
+ * @pf: struct for PF
+ * @reset_type: type of reset
+ */
+int ice_rdma_request_reset(struct ice_pf *pf, enum iidc_reset_type reset_type)
+{
+	enum ice_reset_req reset;
+
+	if (WARN_ON(!pf))
+		return -EINVAL;
+
+	switch (reset_type) {
+	case IIDC_PFR:
+		reset = ICE_RESET_PFR;
+		break;
+	case IIDC_CORER:
+		reset = ICE_RESET_CORER;
+		break;
+	case IIDC_GLOBR:
+		reset = ICE_RESET_GLOBR;
+		break;
+	default:
+		dev_err(ice_pf_to_dev(pf), "incorrect reset request\n");
+		return -EINVAL;
+	}
+
+	return ice_schedule_reset(pf, reset);
+}
+EXPORT_SYMBOL_GPL(ice_rdma_request_reset);
+
+/**
+ * ice_rdma_update_vsi_filter - update main VSI filters for RDMA
+ * @pf: pointer to struct for PF
+ * @vsi_id: VSI HW idx to update filter on
+ * @enable: bool whether to enable or disable filters
+ */
+int ice_rdma_update_vsi_filter(struct ice_pf *pf, u16 vsi_id, bool enable)
+{
+	struct ice_vsi *vsi;
+	int status;
+
+	if (WARN_ON(!pf))
+		return -EINVAL;
+
+	vsi = ice_find_vsi(pf, vsi_id);
+	if (!vsi)
+		return -EINVAL;
+
+	status = ice_cfg_rdma_fltr(&pf->hw, vsi->idx, enable);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Failed to  %sable RDMA filtering\n",
+			enable ? "en" : "dis");
+	} else {
+		if (enable)
+			vsi->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+		else
+			vsi->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+	}
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(ice_rdma_update_vsi_filter);
+
+/**
+ * ice_get_qos_params - parse QoS params for RDMA consumption
+ * @pf: pointer to PF struct
+ * @qos: set of QoS values
+ */
+void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos)
+{
+	struct ice_dcbx_cfg *dcbx_cfg;
+	unsigned int i;
+	u32 up2tc;
+
+	dcbx_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
+
+	qos->num_tc = ice_dcb_get_num_tc(dcbx_cfg);
+	for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++)
+		qos->up2tc[i] = (up2tc >> (i * 3)) & 0x7;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		qos->tc_info[i].rel_bw = dcbx_cfg->etscfg.tcbwtable[i];
+
+	qos->pfc_mode = dcbx_cfg->pfc_mode;
+	if (qos->pfc_mode == IIDC_DSCP_PFC_MODE)
+		for (i = 0; i < IIDC_MAX_DSCP_MAPPING; i++)
+			qos->dscp_map[i] = dcbx_cfg->dscp_map[i];
+}
+EXPORT_SYMBOL_GPL(ice_get_qos_params);
+
+/**
+ * ice_alloc_rdma_qvectors - Allocate vector resources for RDMA driver
+ * @pf: board private structure to initialize
+ */
+static int ice_alloc_rdma_qvectors(struct ice_pf *pf)
+{
+	if (ice_is_rdma_ena(pf)) {
+		int i;
+
+		pf->msix_entries = kcalloc(pf->num_rdma_msix,
+					   sizeof(*pf->msix_entries),
+						  GFP_KERNEL);
+		if (!pf->msix_entries)
+			return -ENOMEM;
+
+		/* RDMA is the only user of pf->msix_entries array */
+		pf->rdma_base_vector = 0;
+
+		for (i = 0; i < pf->num_rdma_msix; i++) {
+			struct msix_entry *entry = &pf->msix_entries[i];
+			struct msi_map map;
+
+			map = ice_alloc_irq(pf, false);
+			if (map.index < 0)
+				break;
+
+			entry->entry = map.index;
+			entry->vector = map.virq;
+		}
+	}
+	return 0;
+}
+
+/**
+ * ice_free_rdma_qvector - free vector resources reserved for RDMA driver
+ * @pf: board private structure to initialize
+ */
+static void ice_free_rdma_qvector(struct ice_pf *pf)
+{
+	int i;
+
+	if (!pf->msix_entries)
+		return;
+
+	for (i = 0; i < pf->num_rdma_msix; i++) {
+		struct msi_map map;
+
+		map.index = pf->msix_entries[i].entry;
+		map.virq = pf->msix_entries[i].vector;
+		ice_free_irq(pf, map);
+	}
+
+	kfree(pf->msix_entries);
+	pf->msix_entries = NULL;
+}
+
+/**
+ * ice_adev_release - function to be mapped to AUX dev's release op
+ * @dev: pointer to device to free
+ */
+static void ice_adev_release(struct device *dev)
+{
+	struct iidc_auxiliary_dev *iadev;
+
+	iadev = container_of(dev, struct iidc_auxiliary_dev, adev.dev);
+	kfree(iadev);
+}
+
+/**
+ * ice_plug_aux_dev - allocate and register AUX device
+ * @pf: pointer to pf struct
+ */
+int ice_plug_aux_dev(struct ice_pf *pf)
+{
+	struct iidc_auxiliary_dev *iadev;
+	struct auxiliary_device *adev;
+	int ret;
+
+	/* if this PF doesn't support a technology that requires auxiliary
+	 * devices, then gracefully exit
+	 */
+	if (!ice_is_rdma_ena(pf))
+		return 0;
+
+	iadev = kzalloc(sizeof(*iadev), GFP_KERNEL);
+	if (!iadev)
+		return -ENOMEM;
+
+	adev = &iadev->adev;
+	iadev->pf = pf;
+
+	adev->id = pf->aux_idx;
+	adev->dev.release = ice_adev_release;
+	adev->dev.parent = &pf->pdev->dev;
+	adev->name = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? "roce" : "iwarp";
+
+	ret = auxiliary_device_init(adev);
+	if (ret) {
+		kfree(iadev);
+		return ret;
+	}
+
+	ret = auxiliary_device_add(adev);
+	if (ret) {
+		auxiliary_device_uninit(adev);
+		return ret;
+	}
+
+	mutex_lock(&pf->adev_mutex);
+	pf->adev = adev;
+	mutex_unlock(&pf->adev_mutex);
+
+	return 0;
+}
+
+/* ice_unplug_aux_dev - unregister and free AUX device
+ * @pf: pointer to pf struct
+ */
+void ice_unplug_aux_dev(struct ice_pf *pf)
+{
+	struct auxiliary_device *adev;
+
+	mutex_lock(&pf->adev_mutex);
+	adev = pf->adev;
+	pf->adev = NULL;
+	mutex_unlock(&pf->adev_mutex);
+
+	if (adev) {
+		auxiliary_device_delete(adev);
+		auxiliary_device_uninit(adev);
+	}
+}
+
+/**
+ * ice_init_rdma - initializes PF for RDMA use
+ * @pf: ptr to ice_pf
+ */
+int ice_init_rdma(struct ice_pf *pf)
+{
+	struct device *dev = &pf->pdev->dev;
+	int ret;
+
+	if (!ice_is_rdma_ena(pf)) {
+		dev_warn(dev, "RDMA is not supported on this device\n");
+		return 0;
+	}
+
+	ret = xa_alloc(&ice_aux_id, &pf->aux_idx, NULL, XA_LIMIT(1, INT_MAX),
+		       GFP_KERNEL);
+	if (ret) {
+		dev_err(dev, "Failed to allocate device ID for AUX driver\n");
+		return -ENOMEM;
+	}
+
+	/* Reserve vector resources */
+	ret = ice_alloc_rdma_qvectors(pf);
+	if (ret < 0) {
+		dev_err(dev, "failed to reserve vectors for RDMA\n");
+		goto err_reserve_rdma_qvector;
+	}
+	pf->rdma_mode |= IIDC_RDMA_PROTOCOL_ROCEV2;
+	ret = ice_plug_aux_dev(pf);
+	if (ret)
+		goto err_plug_aux_dev;
+	return 0;
+
+err_plug_aux_dev:
+	ice_free_rdma_qvector(pf);
+err_reserve_rdma_qvector:
+	pf->adev = NULL;
+	xa_erase(&ice_aux_id, pf->aux_idx);
+	return ret;
+}
+
+/**
+ * ice_deinit_rdma - deinitialize RDMA on PF
+ * @pf: ptr to ice_pf
+ */
+void ice_deinit_rdma(struct ice_pf *pf)
+{
+	if (!ice_is_rdma_ena(pf))
+		return;
+
+	ice_unplug_aux_dev(pf);
+	ice_free_rdma_qvector(pf);
+	xa_erase(&ice_aux_id, pf->aux_idx);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_idc_int.h b/drivers/net/ethernet/intel/ice/ice_idc_int.h
new file mode 100644
index 0000000000..4b0c86757d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_idc_int.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021, Intel Corporation. */
+
+#ifndef _ICE_IDC_INT_H_
+#define _ICE_IDC_INT_H_
+
+#include <linux/net/intel/iidc.h>
+
+struct ice_pf;
+
+void ice_send_event_to_aux(struct ice_pf *pf, struct iidc_event *event);
+
+#endif /* !_ICE_IDC_INT_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c
new file mode 100644
index 0000000000..ad82ff7d19
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_irq.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_irq.h"
+
+/**
+ * ice_init_irq_tracker - initialize interrupt tracker
+ * @pf: board private structure
+ * @max_vectors: maximum number of vectors that tracker can hold
+ * @num_static: number of preallocated interrupts
+ */
+static void
+ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors,
+		     unsigned int num_static)
+{
+	pf->irq_tracker.num_entries = max_vectors;
+	pf->irq_tracker.num_static = num_static;
+	xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC);
+}
+
+/**
+ * ice_deinit_irq_tracker - free xarray tracker
+ * @pf: board private structure
+ */
+static void ice_deinit_irq_tracker(struct ice_pf *pf)
+{
+	xa_destroy(&pf->irq_tracker.entries);
+}
+
+/**
+ * ice_free_irq_res - free a block of resources
+ * @pf: board private structure
+ * @index: starting index previously returned by ice_get_res
+ */
+static void ice_free_irq_res(struct ice_pf *pf, u16 index)
+{
+	struct ice_irq_entry *entry;
+
+	entry = xa_erase(&pf->irq_tracker.entries, index);
+	kfree(entry);
+}
+
+/**
+ * ice_get_irq_res - get an interrupt resource
+ * @pf: board private structure
+ * @dyn_only: force entry to be dynamically allocated
+ *
+ * Allocate new irq entry in the free slot of the tracker. Since xarray
+ * is used, always allocate new entry at the lowest possible index. Set
+ * proper allocation limit for maximum tracker entries.
+ *
+ * Returns allocated irq entry or NULL on failure.
+ */
+static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only)
+{
+	struct xa_limit limit = { .max = pf->irq_tracker.num_entries,
+				  .min = 0 };
+	unsigned int num_static = pf->irq_tracker.num_static;
+	struct ice_irq_entry *entry;
+	unsigned int index;
+	int ret;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return NULL;
+
+	/* skip preallocated entries if the caller says so */
+	if (dyn_only)
+		limit.min = num_static;
+
+	ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit,
+		       GFP_KERNEL);
+
+	if (ret) {
+		kfree(entry);
+		entry = NULL;
+	} else {
+		entry->index = index;
+		entry->dynamic = index >= num_static;
+	}
+
+	return entry;
+}
+
+/**
+ * ice_reduce_msix_usage - Reduce usage of MSI-X vectors
+ * @pf: board private structure
+ * @v_remain: number of remaining MSI-X vectors to be distributed
+ *
+ * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled.
+ * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of
+ * remaining vectors.
+ */
+static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain)
+{
+	int v_rdma;
+
+	if (!ice_is_rdma_ena(pf)) {
+		pf->num_lan_msix = v_remain;
+		return;
+	}
+
+	/* RDMA needs at least 1 interrupt in addition to AEQ MSIX */
+	v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
+
+	if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) {
+		dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n");
+		clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+
+		pf->num_rdma_msix = 0;
+		pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
+	} else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
+		   (v_remain - v_rdma < v_rdma)) {
+		/* Support minimum RDMA and give remaining vectors to LAN MSIX
+		 */
+		pf->num_rdma_msix = ICE_MIN_RDMA_MSIX;
+		pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX;
+	} else {
+		/* Split remaining MSIX with RDMA after accounting for AEQ MSIX
+		 */
+		pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
+				    ICE_RDMA_NUM_AEQ_MSIX;
+		pf->num_lan_msix = v_remain - pf->num_rdma_msix;
+	}
+}
+
+/**
+ * ice_ena_msix_range - Request a range of MSIX vectors from the OS
+ * @pf: board private structure
+ *
+ * Compute the number of MSIX vectors wanted and request from the OS. Adjust
+ * device usage if there are not enough vectors. Return the number of vectors
+ * reserved or negative on failure.
+ */
+static int ice_ena_msix_range(struct ice_pf *pf)
+{
+	int num_cpus, hw_num_msix, v_other, v_wanted, v_actual;
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors;
+	num_cpus = num_online_cpus();
+
+	/* LAN miscellaneous handler */
+	v_other = ICE_MIN_LAN_OICR_MSIX;
+
+	/* Flow Director */
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		v_other += ICE_FDIR_MSIX;
+
+	/* switchdev */
+	v_other += ICE_ESWITCH_MSIX;
+
+	v_wanted = v_other;
+
+	/* LAN traffic */
+	pf->num_lan_msix = num_cpus;
+	v_wanted += pf->num_lan_msix;
+
+	/* RDMA auxiliary driver */
+	if (ice_is_rdma_ena(pf)) {
+		pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
+		v_wanted += pf->num_rdma_msix;
+	}
+
+	if (v_wanted > hw_num_msix) {
+		int v_remain;
+
+		dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n",
+			 v_wanted, hw_num_msix);
+
+		if (hw_num_msix < ICE_MIN_MSIX) {
+			err = -ERANGE;
+			goto exit_err;
+		}
+
+		v_remain = hw_num_msix - v_other;
+		if (v_remain < ICE_MIN_LAN_TXRX_MSIX) {
+			v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX;
+			v_remain = ICE_MIN_LAN_TXRX_MSIX;
+		}
+
+		ice_reduce_msix_usage(pf, v_remain);
+		v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other;
+
+		dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n",
+			   pf->num_lan_msix);
+		if (ice_is_rdma_ena(pf))
+			dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n",
+				   pf->num_rdma_msix);
+	}
+
+	/* actually reserve the vectors */
+	v_actual = pci_alloc_irq_vectors(pf->pdev, ICE_MIN_MSIX, v_wanted,
+					 PCI_IRQ_MSIX);
+	if (v_actual < 0) {
+		dev_err(dev, "unable to reserve MSI-X vectors\n");
+		err = v_actual;
+		goto exit_err;
+	}
+
+	if (v_actual < v_wanted) {
+		dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
+			 v_wanted, v_actual);
+
+		if (v_actual < ICE_MIN_MSIX) {
+			/* error if we can't get minimum vectors */
+			pci_free_irq_vectors(pf->pdev);
+			err = -ERANGE;
+			goto exit_err;
+		} else {
+			int v_remain = v_actual - v_other;
+
+			if (v_remain < ICE_MIN_LAN_TXRX_MSIX)
+				v_remain = ICE_MIN_LAN_TXRX_MSIX;
+
+			ice_reduce_msix_usage(pf, v_remain);
+
+			dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
+				   pf->num_lan_msix);
+
+			if (ice_is_rdma_ena(pf))
+				dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
+					   pf->num_rdma_msix);
+		}
+	}
+
+	return v_actual;
+
+exit_err:
+	pf->num_rdma_msix = 0;
+	pf->num_lan_msix = 0;
+	return err;
+}
+
+/**
+ * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
+ * @pf: board private structure
+ */
+void ice_clear_interrupt_scheme(struct ice_pf *pf)
+{
+	pci_free_irq_vectors(pf->pdev);
+	ice_deinit_irq_tracker(pf);
+}
+
+/**
+ * ice_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ */
+int ice_init_interrupt_scheme(struct ice_pf *pf)
+{
+	int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
+	int vectors, max_vectors;
+
+	vectors = ice_ena_msix_range(pf);
+
+	if (vectors < 0)
+		return -ENOMEM;
+
+	if (pci_msix_can_alloc_dyn(pf->pdev))
+		max_vectors = total_vectors;
+	else
+		max_vectors = vectors;
+
+	ice_init_irq_tracker(pf, max_vectors, vectors);
+
+	return 0;
+}
+
+/**
+ * ice_alloc_irq - Allocate new interrupt vector
+ * @pf: board private structure
+ * @dyn_only: force dynamic allocation of the interrupt
+ *
+ * Allocate new interrupt vector for a given owner id.
+ * return struct msi_map with interrupt details and track
+ * allocated interrupt appropriately.
+ *
+ * This function reserves new irq entry from the irq_tracker.
+ * if according to the tracker information all interrupts that
+ * were allocated with ice_pci_alloc_irq_vectors are already used
+ * and dynamically allocated interrupts are supported then new
+ * interrupt will be allocated with pci_msix_alloc_irq_at.
+ *
+ * Some callers may only support dynamically allocated interrupts.
+ * This is indicated with dyn_only flag.
+ *
+ * On failure, return map with negative .index. The caller
+ * is expected to check returned map index.
+ *
+ */
+struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only)
+{
+	int sriov_base_vector = pf->sriov_base_vector;
+	struct msi_map map = { .index = -ENOENT };
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_irq_entry *entry;
+
+	entry = ice_get_irq_res(pf, dyn_only);
+	if (!entry)
+		return map;
+
+	/* fail if we're about to violate SRIOV vectors space */
+	if (sriov_base_vector && entry->index >= sriov_base_vector)
+		goto exit_free_res;
+
+	if (pci_msix_can_alloc_dyn(pf->pdev) && entry->dynamic) {
+		map = pci_msix_alloc_irq_at(pf->pdev, entry->index, NULL);
+		if (map.index < 0)
+			goto exit_free_res;
+		dev_dbg(dev, "allocated new irq at index %d\n", map.index);
+	} else {
+		map.index = entry->index;
+		map.virq = pci_irq_vector(pf->pdev, map.index);
+	}
+
+	return map;
+
+exit_free_res:
+	dev_err(dev, "Could not allocate irq at idx %d\n", entry->index);
+	ice_free_irq_res(pf, entry->index);
+	return map;
+}
+
+/**
+ * ice_free_irq - Free interrupt vector
+ * @pf: board private structure
+ * @map: map with interrupt details
+ *
+ * Remove allocated interrupt from the interrupt tracker. If interrupt was
+ * allocated dynamically, free respective interrupt vector.
+ */
+void ice_free_irq(struct ice_pf *pf, struct msi_map map)
+{
+	struct ice_irq_entry *entry;
+
+	entry = xa_load(&pf->irq_tracker.entries, map.index);
+
+	if (!entry) {
+		dev_err(ice_pf_to_dev(pf), "Failed to get MSIX interrupt entry at index %d",
+			map.index);
+		return;
+	}
+
+	dev_dbg(ice_pf_to_dev(pf), "Free irq at index %d\n", map.index);
+
+	if (entry->dynamic)
+		pci_msix_free_irq(pf->pdev, map);
+
+	ice_free_irq_res(pf, map.index);
+}
+
+/**
+ * ice_get_max_used_msix_vector - Get the max used interrupt vector
+ * @pf: board private structure
+ *
+ * Return index of maximum used interrupt vectors with respect to the
+ * beginning of the MSIX table. Take into account that some interrupts
+ * may have been dynamically allocated after MSIX was initially enabled.
+ */
+int ice_get_max_used_msix_vector(struct ice_pf *pf)
+{
+	unsigned long start, index, max_idx;
+	void *entry;
+
+	/* Treat all preallocated interrupts as used */
+	start = pf->irq_tracker.num_static;
+	max_idx = start - 1;
+
+	xa_for_each_start(&pf->irq_tracker.entries, index, entry, start) {
+		if (index > max_idx)
+			max_idx = index;
+	}
+
+	return max_idx;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h
new file mode 100644
index 0000000000..f35efc0857
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_irq.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023, Intel Corporation. */
+
+#ifndef _ICE_IRQ_H_
+#define _ICE_IRQ_H_
+
+struct ice_irq_entry {
+	unsigned int index;
+	bool dynamic;	/* allocation type flag */
+};
+
+struct ice_irq_tracker {
+	struct xarray entries;
+	u16 num_entries;	/* total vectors available */
+	u16 num_static;	/* preallocated entries */
+};
+
+int ice_init_interrupt_scheme(struct ice_pf *pf);
+void ice_clear_interrupt_scheme(struct ice_pf *pf);
+
+struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only);
+void ice_free_irq(struct ice_pf *pf, struct msi_map map);
+int ice_get_max_used_msix_vector(struct ice_pf *pf);
+
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c
new file mode 100644
index 0000000000..23e197c3d0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.c
@@ -0,0 +1,2145 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+/* Link Aggregation code */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_lag.h"
+
+#define ICE_LAG_RES_SHARED	BIT(14)
+#define ICE_LAG_RES_VALID	BIT(15)
+
+#define LACP_TRAIN_PKT_LEN		16
+static const u8 lacp_train_pkt[LACP_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0,
+						       0, 0, 0, 0, 0, 0,
+						       0x88, 0x09, 0, 0 };
+
+#define ICE_RECIPE_LEN			64
+static const u8 ice_dflt_vsi_rcp[ICE_RECIPE_LEN] = {
+	0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0x85, 0, 0x01, 0, 0, 0, 0xff, 0xff, 0x08, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/**
+ * ice_lag_set_primary - set PF LAG state as Primary
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_primary(struct ice_lag *lag)
+{
+	struct ice_pf *pf = lag->pf;
+
+	if (!pf)
+		return;
+
+	if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_BACKUP) {
+		dev_warn(ice_pf_to_dev(pf), "%s: Attempt to be Primary, but incompatible state.\n",
+			 netdev_name(lag->netdev));
+		return;
+	}
+
+	lag->role = ICE_LAG_PRIMARY;
+}
+
+/**
+ * ice_lag_set_backup - set PF LAG state to Backup
+ * @lag: LAG info struct
+ */
+static void ice_lag_set_backup(struct ice_lag *lag)
+{
+	struct ice_pf *pf = lag->pf;
+
+	if (!pf)
+		return;
+
+	if (lag->role != ICE_LAG_UNSET && lag->role != ICE_LAG_PRIMARY) {
+		dev_dbg(ice_pf_to_dev(pf), "%s: Attempt to be Backup, but incompatible state\n",
+			netdev_name(lag->netdev));
+		return;
+	}
+
+	lag->role = ICE_LAG_BACKUP;
+}
+
+/**
+ * netif_is_same_ice - determine if netdev is on the same ice NIC as local PF
+ * @pf: local PF struct
+ * @netdev: netdev we are evaluating
+ */
+static bool netif_is_same_ice(struct ice_pf *pf, struct net_device *netdev)
+{
+	struct ice_netdev_priv *np;
+	struct ice_pf *test_pf;
+	struct ice_vsi *vsi;
+
+	if (!netif_is_ice(netdev))
+		return false;
+
+	np = netdev_priv(netdev);
+	if (!np)
+		return false;
+
+	vsi = np->vsi;
+	if (!vsi)
+		return false;
+
+	test_pf = vsi->back;
+	if (!test_pf)
+		return false;
+
+	if (pf->pdev->bus != test_pf->pdev->bus ||
+	    pf->pdev->slot != test_pf->pdev->slot)
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_netdev_to_lag - return pointer to associated lag struct from netdev
+ * @netdev: pointer to net_device struct to query
+ */
+static struct ice_lag *ice_netdev_to_lag(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np;
+	struct ice_vsi *vsi;
+
+	if (!netif_is_ice(netdev))
+		return NULL;
+
+	np = netdev_priv(netdev);
+	if (!np)
+		return NULL;
+
+	vsi = np->vsi;
+	if (!vsi)
+		return NULL;
+
+	return vsi->back->lag;
+}
+
+/**
+ * ice_lag_find_hw_by_lport - return an hw struct from bond members lport
+ * @lag: lag struct
+ * @lport: lport value to search for
+ */
+static struct ice_hw *
+ice_lag_find_hw_by_lport(struct ice_lag *lag, u8 lport)
+{
+	struct ice_lag_netdev_list *entry;
+	struct net_device *tmp_netdev;
+	struct ice_netdev_priv *np;
+	struct ice_hw *hw;
+
+	list_for_each_entry(entry, lag->netdev_head, node) {
+		tmp_netdev = entry->netdev;
+		if (!tmp_netdev || !netif_is_ice(tmp_netdev))
+			continue;
+
+		np = netdev_priv(tmp_netdev);
+		if (!np || !np->vsi)
+			continue;
+
+		hw = &np->vsi->back->hw;
+		if (hw->port_info->lport == lport)
+			return hw;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_lag_find_primary - returns pointer to primary interfaces lag struct
+ * @lag: local interfaces lag struct
+ */
+static struct ice_lag *ice_lag_find_primary(struct ice_lag *lag)
+{
+	struct ice_lag *primary_lag = NULL;
+	struct list_head *tmp;
+
+	list_for_each(tmp, lag->netdev_head) {
+		struct ice_lag_netdev_list *entry;
+		struct ice_lag *tmp_lag;
+
+		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
+		tmp_lag = ice_netdev_to_lag(entry->netdev);
+		if (tmp_lag && tmp_lag->primary) {
+			primary_lag = tmp_lag;
+			break;
+		}
+	}
+
+	return primary_lag;
+}
+
+/**
+ * ice_lag_cfg_dflt_fltr - Add/Remove default VSI rule for LAG
+ * @lag: lag struct for local interface
+ * @add: boolean on whether we are adding filters
+ */
+static int
+ice_lag_cfg_dflt_fltr(struct ice_lag *lag, bool add)
+{
+	struct ice_sw_rule_lkup_rx_tx *s_rule;
+	u16 s_rule_sz, vsi_num;
+	struct ice_hw *hw;
+	u32 act, opc;
+	u8 *eth_hdr;
+	int err;
+
+	hw = &lag->pf->hw;
+	vsi_num = ice_get_hw_vsi_num(hw, 0);
+
+	s_rule_sz = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule);
+	s_rule = kzalloc(s_rule_sz, GFP_KERNEL);
+	if (!s_rule) {
+		dev_err(ice_pf_to_dev(lag->pf), "error allocating rule for LAG default VSI\n");
+		return -ENOMEM;
+	}
+
+	if (add) {
+		eth_hdr = s_rule->hdr_data;
+		ice_fill_eth_hdr(eth_hdr);
+
+		act = (vsi_num << ICE_SINGLE_ACT_VSI_ID_S) &
+			ICE_SINGLE_ACT_VSI_ID_M;
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+			ICE_SINGLE_ACT_VALID_BIT | ICE_SINGLE_ACT_LAN_ENABLE;
+
+		s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
+		s_rule->recipe_id = cpu_to_le16(lag->pf_recipe);
+		s_rule->src = cpu_to_le16(hw->port_info->lport);
+		s_rule->act = cpu_to_le32(act);
+		s_rule->hdr_len = cpu_to_le16(DUMMY_ETH_HDR_LEN);
+		opc = ice_aqc_opc_add_sw_rules;
+	} else {
+		s_rule->index = cpu_to_le16(lag->pf_rule_id);
+		opc = ice_aqc_opc_remove_sw_rules;
+	}
+
+	err = ice_aq_sw_rules(&lag->pf->hw, s_rule, s_rule_sz, 1, opc, NULL);
+	if (err)
+		goto dflt_fltr_free;
+
+	if (add)
+		lag->pf_rule_id = le16_to_cpu(s_rule->index);
+	else
+		lag->pf_rule_id = 0;
+
+dflt_fltr_free:
+	kfree(s_rule);
+	return err;
+}
+
+/**
+ * ice_lag_cfg_pf_fltrs - set filters up for new active port
+ * @lag: local interfaces lag struct
+ * @ptr: opaque data containing notifier event
+ */
+static void
+ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr)
+{
+	struct netdev_notifier_bonding_info *info;
+	struct netdev_bonding_info *bonding_info;
+	struct net_device *event_netdev;
+	struct device *dev;
+
+	event_netdev = netdev_notifier_info_to_dev(ptr);
+	/* not for this netdev */
+	if (event_netdev != lag->netdev)
+		return;
+
+	info = (struct netdev_notifier_bonding_info *)ptr;
+	bonding_info = &info->bonding_info;
+	dev = ice_pf_to_dev(lag->pf);
+
+	/* interface not active - remove old default VSI rule */
+	if (bonding_info->slave.state && lag->pf_rule_id) {
+		if (ice_lag_cfg_dflt_fltr(lag, false))
+			dev_err(dev, "Error removing old default VSI filter\n");
+		return;
+	}
+
+	/* interface becoming active - add new default VSI rule */
+	if (!bonding_info->slave.state && !lag->pf_rule_id)
+		if (ice_lag_cfg_dflt_fltr(lag, true))
+			dev_err(dev, "Error adding new default VSI filter\n");
+}
+
+/**
+ * ice_display_lag_info - print LAG info
+ * @lag: LAG info struct
+ */
+static void ice_display_lag_info(struct ice_lag *lag)
+{
+	const char *name, *upper, *role, *bonded, *primary;
+	struct device *dev = &lag->pf->pdev->dev;
+
+	name = lag->netdev ? netdev_name(lag->netdev) : "unset";
+	upper = lag->upper_netdev ? netdev_name(lag->upper_netdev) : "unset";
+	primary = lag->primary ? "TRUE" : "FALSE";
+	bonded = lag->bonded ? "BONDED" : "UNBONDED";
+
+	switch (lag->role) {
+	case ICE_LAG_NONE:
+		role = "NONE";
+		break;
+	case ICE_LAG_PRIMARY:
+		role = "PRIMARY";
+		break;
+	case ICE_LAG_BACKUP:
+		role = "BACKUP";
+		break;
+	case ICE_LAG_UNSET:
+		role = "UNSET";
+		break;
+	default:
+		role = "ERROR";
+	}
+
+	dev_dbg(dev, "%s %s, upper:%s, role:%s, primary:%s\n", name, bonded,
+		upper, role, primary);
+}
+
+/**
+ * ice_lag_qbuf_recfg - generate a buffer of queues for a reconfigure command
+ * @hw: HW struct that contains the queue contexts
+ * @qbuf: pointer to buffer to populate
+ * @vsi_num: index of the VSI in PF space
+ * @numq: number of queues to search for
+ * @tc: traffic class that contains the queues
+ *
+ * function returns the number of valid queues in buffer
+ */
+static u16
+ice_lag_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf,
+		   u16 vsi_num, u16 numq, u8 tc)
+{
+	struct ice_q_ctx *q_ctx;
+	u16 qid, count = 0;
+	struct ice_pf *pf;
+	int i;
+
+	pf = hw->back;
+	for (i = 0; i < numq; i++) {
+		q_ctx = ice_get_lan_q_ctx(hw, vsi_num, tc, i);
+		if (!q_ctx) {
+			dev_dbg(ice_hw_to_dev(hw), "%s queue %d NO Q CONTEXT\n",
+				__func__, i);
+			continue;
+		}
+		if (q_ctx->q_teid == ICE_INVAL_TEID) {
+			dev_dbg(ice_hw_to_dev(hw), "%s queue %d INVAL TEID\n",
+				__func__, i);
+			continue;
+		}
+		if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) {
+			dev_dbg(ice_hw_to_dev(hw), "%s queue %d INVAL Q HANDLE\n",
+				__func__, i);
+			continue;
+		}
+
+		qid = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle];
+		qbuf->queue_info[count].q_handle = cpu_to_le16(qid);
+		qbuf->queue_info[count].tc = tc;
+		qbuf->queue_info[count].q_teid = cpu_to_le32(q_ctx->q_teid);
+		count++;
+	}
+
+	return count;
+}
+
+/**
+ * ice_lag_get_sched_parent - locate or create a sched node parent
+ * @hw: HW struct for getting parent in
+ * @tc: traffic class on parent/node
+ */
+static struct ice_sched_node *
+ice_lag_get_sched_parent(struct ice_hw *hw, u8 tc)
+{
+	struct ice_sched_node *tc_node, *aggnode, *parent = NULL;
+	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_port_info *pi = hw->port_info;
+	struct device *dev;
+	u8 aggl, vsil;
+	int n;
+
+	dev = ice_hw_to_dev(hw);
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node) {
+		dev_warn(dev, "Failure to find TC node for LAG move\n");
+		return parent;
+	}
+
+	aggnode = ice_sched_get_agg_node(pi, tc_node, ICE_DFLT_AGG_ID);
+	if (!aggnode) {
+		dev_warn(dev, "Failure to find aggregate node for LAG move\n");
+		return parent;
+	}
+
+	aggl = ice_sched_get_agg_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+
+	for (n = aggl + 1; n < vsil; n++)
+		num_nodes[n] = 1;
+
+	for (n = 0; n < aggnode->num_children; n++) {
+		parent = ice_sched_get_free_vsi_parent(hw, aggnode->children[n],
+						       num_nodes);
+		if (parent)
+			return parent;
+	}
+
+	/* if free parent not found - add one */
+	parent = aggnode;
+	for (n = aggl + 1; n < vsil; n++) {
+		u16 num_nodes_added;
+		u32 first_teid;
+		int err;
+
+		err = ice_sched_add_nodes_to_layer(pi, tc_node, parent, n,
+						   num_nodes[n], &first_teid,
+						   &num_nodes_added);
+		if (err || num_nodes[n] != num_nodes_added)
+			return NULL;
+
+		if (num_nodes_added)
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_teid);
+		else
+			parent = parent->children[0];
+		if (!parent) {
+			dev_warn(dev, "Failure to add new parent for LAG move\n");
+			return parent;
+		}
+	}
+
+	return parent;
+}
+
+/**
+ * ice_lag_move_vf_node_tc - move scheduling nodes for one VF on one TC
+ * @lag: lag info struct
+ * @oldport: lport of previous nodes location
+ * @newport: lport of destination nodes location
+ * @vsi_num: array index of VSI in PF space
+ * @tc: traffic class to move
+ */
+static void
+ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport,
+			u16 vsi_num, u8 tc)
+{
+	u16 numq, valq, buf_size, num_moved, qbuf_size;
+	struct device *dev = ice_pf_to_dev(lag->pf);
+	struct ice_aqc_cfg_txqs_buf *qbuf;
+	struct ice_aqc_move_elem *buf;
+	struct ice_sched_node *n_prt;
+	struct ice_hw *new_hw = NULL;
+	__le32 teid, parent_teid;
+	struct ice_vsi_ctx *ctx;
+	u32 tmp_teid;
+
+	ctx = ice_get_vsi_ctx(&lag->pf->hw, vsi_num);
+	if (!ctx) {
+		dev_warn(dev, "Unable to locate VSI context for LAG failover\n");
+		return;
+	}
+
+	/* check to see if this VF is enabled on this TC */
+	if (!ctx->sched.vsi_node[tc])
+		return;
+
+	/* locate HW struct for destination port */
+	new_hw = ice_lag_find_hw_by_lport(lag, newport);
+	if (!new_hw) {
+		dev_warn(dev, "Unable to locate HW struct for LAG node destination\n");
+		return;
+	}
+
+	numq = ctx->num_lan_q_entries[tc];
+	teid = ctx->sched.vsi_node[tc]->info.node_teid;
+	tmp_teid = le32_to_cpu(teid);
+	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
+	/* if no teid assigned or numq == 0, then this TC is not active */
+	if (!tmp_teid || !numq)
+		return;
+
+	/* suspend VSI subtree for Traffic Class "tc" on
+	 * this VF's VSI
+	 */
+	if (ice_sched_suspend_resume_elems(&lag->pf->hw, 1, &tmp_teid, true))
+		dev_dbg(dev, "Problem suspending traffic for LAG node move\n");
+
+	/* reconfigure all VF's queues on this Traffic Class
+	 * to new port
+	 */
+	qbuf_size = struct_size(qbuf, queue_info, numq);
+	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
+	if (!qbuf) {
+		dev_warn(dev, "Failure allocating memory for VF queue recfg buffer\n");
+		goto resume_traffic;
+	}
+
+	/* add the per queue info for the reconfigure command buffer */
+	valq = ice_lag_qbuf_recfg(&lag->pf->hw, qbuf, vsi_num, numq, tc);
+	if (!valq) {
+		dev_dbg(dev, "No valid queues found for LAG failover\n");
+		goto qbuf_none;
+	}
+
+	if (ice_aq_cfg_lan_txq(&lag->pf->hw, qbuf, qbuf_size, valq, oldport,
+			       newport, NULL)) {
+		dev_warn(dev, "Failure to configure queues for LAG failover\n");
+		goto qbuf_err;
+	}
+
+qbuf_none:
+	kfree(qbuf);
+
+	/* find new parent in destination port's tree for VF VSI node on this
+	 * Traffic Class
+	 */
+	n_prt = ice_lag_get_sched_parent(new_hw, tc);
+	if (!n_prt)
+		goto resume_traffic;
+
+	/* Move Vf's VSI node for this TC to newport's scheduler tree */
+	buf_size = struct_size(buf, teid, 1);
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		dev_warn(dev, "Failure to alloc memory for VF node failover\n");
+		goto resume_traffic;
+	}
+
+	buf->hdr.src_parent_teid = parent_teid;
+	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(1);
+	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
+	buf->teid[0] = teid;
+
+	if (ice_aq_move_sched_elems(&lag->pf->hw, 1, buf, buf_size, &num_moved,
+				    NULL))
+		dev_warn(dev, "Failure to move VF nodes for failover\n");
+	else
+		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
+
+	kfree(buf);
+	goto resume_traffic;
+
+qbuf_err:
+	kfree(qbuf);
+
+resume_traffic:
+	/* restart traffic for VSI node */
+	if (ice_sched_suspend_resume_elems(&lag->pf->hw, 1, &tmp_teid, false))
+		dev_dbg(dev, "Problem restarting traffic for LAG node move\n");
+}
+
+/**
+ * ice_lag_build_netdev_list - populate the lag struct's netdev list
+ * @lag: local lag struct
+ * @ndlist: pointer to netdev list to populate
+ */
+static void ice_lag_build_netdev_list(struct ice_lag *lag,
+				      struct ice_lag_netdev_list *ndlist)
+{
+	struct ice_lag_netdev_list *nl;
+	struct net_device *tmp_nd;
+
+	INIT_LIST_HEAD(&ndlist->node);
+	rcu_read_lock();
+	for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
+		nl = kzalloc(sizeof(*nl), GFP_ATOMIC);
+		if (!nl)
+			break;
+
+		nl->netdev = tmp_nd;
+		list_add(&nl->node, &ndlist->node);
+	}
+	rcu_read_unlock();
+	lag->netdev_head = &ndlist->node;
+}
+
+/**
+ * ice_lag_destroy_netdev_list - free lag struct's netdev list
+ * @lag: pointer to local lag struct
+ * @ndlist: pointer to lag struct netdev list
+ */
+static void ice_lag_destroy_netdev_list(struct ice_lag *lag,
+					struct ice_lag_netdev_list *ndlist)
+{
+	struct ice_lag_netdev_list *entry, *n;
+
+	rcu_read_lock();
+	list_for_each_entry_safe(entry, n, &ndlist->node, node) {
+		list_del(&entry->node);
+		kfree(entry);
+	}
+	rcu_read_unlock();
+	lag->netdev_head = NULL;
+}
+
+/**
+ * ice_lag_move_single_vf_nodes - Move Tx scheduling nodes for single VF
+ * @lag: primary interface LAG struct
+ * @oldport: lport of previous interface
+ * @newport: lport of destination interface
+ * @vsi_num: SW index of VF's VSI
+ */
+static void
+ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport,
+			     u16 vsi_num)
+{
+	u8 tc;
+
+	ice_for_each_traffic_class(tc)
+		ice_lag_move_vf_node_tc(lag, oldport, newport, vsi_num, tc);
+}
+
+/**
+ * ice_lag_move_new_vf_nodes - Move Tx scheduling nodes for a VF if required
+ * @vf: the VF to move Tx nodes for
+ *
+ * Called just after configuring new VF queues. Check whether the VF Tx
+ * scheduling nodes need to be updated to fail over to the active port. If so,
+ * move them now.
+ */
+void ice_lag_move_new_vf_nodes(struct ice_vf *vf)
+{
+	struct ice_lag_netdev_list ndlist;
+	u8 pri_port, act_port;
+	struct ice_lag *lag;
+	struct ice_vsi *vsi;
+	struct ice_pf *pf;
+
+	vsi = ice_get_vf_vsi(vf);
+
+	if (WARN_ON(!vsi))
+		return;
+
+	if (WARN_ON(vsi->type != ICE_VSI_VF))
+		return;
+
+	pf = vf->pf;
+	lag = pf->lag;
+
+	mutex_lock(&pf->lag_mutex);
+	if (!lag->bonded)
+		goto new_vf_unlock;
+
+	pri_port = pf->hw.port_info->lport;
+	act_port = lag->active_port;
+
+	if (lag->upper_netdev)
+		ice_lag_build_netdev_list(lag, &ndlist);
+
+	if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) &&
+	    lag->bonded && lag->primary && pri_port != act_port &&
+	    !list_empty(lag->netdev_head))
+		ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx);
+
+	ice_lag_destroy_netdev_list(lag, &ndlist);
+
+new_vf_unlock:
+	mutex_unlock(&pf->lag_mutex);
+}
+
+/**
+ * ice_lag_move_vf_nodes - move Tx scheduling nodes for all VFs to new port
+ * @lag: lag info struct
+ * @oldport: lport of previous interface
+ * @newport: lport of destination interface
+ */
+static void ice_lag_move_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport)
+{
+	struct ice_pf *pf;
+	int i;
+
+	if (!lag->primary)
+		return;
+
+	pf = lag->pf;
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF ||
+				   pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL))
+			ice_lag_move_single_vf_nodes(lag, oldport, newport, i);
+}
+
+/**
+ * ice_lag_move_vf_nodes_cfg - move vf nodes outside LAG netdev event context
+ * @lag: local lag struct
+ * @src_prt: lport value for source port
+ * @dst_prt: lport value for destination port
+ *
+ * This function is used to move nodes during an out-of-netdev-event situation,
+ * primarily when the driver needs to reconfigure or recreate resources.
+ *
+ * Must be called while holding the lag_mutex to avoid lag events from
+ * processing while out-of-sync moves are happening.  Also, paired moves,
+ * such as used in a reset flow, should both be called under the same mutex
+ * lock to avoid changes between start of reset and end of reset.
+ */
+void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt)
+{
+	struct ice_lag_netdev_list ndlist;
+
+	ice_lag_build_netdev_list(lag, &ndlist);
+	ice_lag_move_vf_nodes(lag, src_prt, dst_prt);
+	ice_lag_destroy_netdev_list(lag, &ndlist);
+}
+
+#define ICE_LAG_SRIOV_CP_RECIPE		10
+#define ICE_LAG_SRIOV_TRAIN_PKT_LEN	16
+
+/**
+ * ice_lag_cfg_cp_fltr - configure filter for control packets
+ * @lag: local interface's lag struct
+ * @add: add or remove rule
+ */
+static void
+ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add)
+{
+	struct ice_sw_rule_lkup_rx_tx *s_rule = NULL;
+	struct ice_vsi *vsi;
+	u16 buf_len, opc;
+
+	vsi = lag->pf->vsi[0];
+
+	buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule,
+					     ICE_LAG_SRIOV_TRAIN_PKT_LEN);
+	s_rule = kzalloc(buf_len, GFP_KERNEL);
+	if (!s_rule) {
+		netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n");
+		return;
+	}
+
+	if (add) {
+		s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
+		s_rule->recipe_id = cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE);
+		s_rule->src = cpu_to_le16(vsi->port_info->lport);
+		s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI |
+					  ICE_SINGLE_ACT_LAN_ENABLE |
+					  ICE_SINGLE_ACT_VALID_BIT |
+					  ((vsi->vsi_num <<
+					    ICE_SINGLE_ACT_VSI_ID_S) &
+					   ICE_SINGLE_ACT_VSI_ID_M));
+		s_rule->hdr_len = cpu_to_le16(ICE_LAG_SRIOV_TRAIN_PKT_LEN);
+		memcpy(s_rule->hdr_data, lacp_train_pkt, LACP_TRAIN_PKT_LEN);
+		opc = ice_aqc_opc_add_sw_rules;
+	} else {
+		opc = ice_aqc_opc_remove_sw_rules;
+		s_rule->index = cpu_to_le16(lag->cp_rule_idx);
+	}
+	if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) {
+		netdev_warn(lag->netdev, "Error %s CP rule for fail-over\n",
+			    add ? "ADDING" : "REMOVING");
+		goto cp_free;
+	}
+
+	if (add)
+		lag->cp_rule_idx = le16_to_cpu(s_rule->index);
+	else
+		lag->cp_rule_idx = 0;
+
+cp_free:
+	kfree(s_rule);
+}
+
+/**
+ * ice_lag_info_event - handle NETDEV_BONDING_INFO event
+ * @lag: LAG info struct
+ * @ptr: opaque data pointer
+ *
+ * ptr is to be cast to (netdev_notifier_bonding_info *)
+ */
+static void ice_lag_info_event(struct ice_lag *lag, void *ptr)
+{
+	struct netdev_notifier_bonding_info *info;
+	struct netdev_bonding_info *bonding_info;
+	struct net_device *event_netdev;
+	const char *lag_netdev_name;
+
+	event_netdev = netdev_notifier_info_to_dev(ptr);
+	info = ptr;
+	lag_netdev_name = netdev_name(lag->netdev);
+	bonding_info = &info->bonding_info;
+
+	if (event_netdev != lag->netdev || !lag->bonded || !lag->upper_netdev)
+		return;
+
+	if (bonding_info->master.bond_mode != BOND_MODE_ACTIVEBACKUP) {
+		netdev_dbg(lag->netdev, "Bonding event recv, but mode not active/backup\n");
+		goto lag_out;
+	}
+
+	if (strcmp(bonding_info->slave.slave_name, lag_netdev_name)) {
+		netdev_dbg(lag->netdev, "Bonding event recv, but secondary info not for us\n");
+		goto lag_out;
+	}
+
+	if (bonding_info->slave.state)
+		ice_lag_set_backup(lag);
+	else
+		ice_lag_set_primary(lag);
+
+lag_out:
+	ice_display_lag_info(lag);
+}
+
+/**
+ * ice_lag_reclaim_vf_tc - move scheduling nodes back to primary interface
+ * @lag: primary interface lag struct
+ * @src_hw: HW struct current node location
+ * @vsi_num: VSI index in PF space
+ * @tc: traffic class to move
+ */
+static void
+ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num,
+		      u8 tc)
+{
+	u16 numq, valq, buf_size, num_moved, qbuf_size;
+	struct device *dev = ice_pf_to_dev(lag->pf);
+	struct ice_aqc_cfg_txqs_buf *qbuf;
+	struct ice_aqc_move_elem *buf;
+	struct ice_sched_node *n_prt;
+	__le32 teid, parent_teid;
+	struct ice_vsi_ctx *ctx;
+	struct ice_hw *hw;
+	u32 tmp_teid;
+
+	hw = &lag->pf->hw;
+	ctx = ice_get_vsi_ctx(hw, vsi_num);
+	if (!ctx) {
+		dev_warn(dev, "Unable to locate VSI context for LAG reclaim\n");
+		return;
+	}
+
+	/* check to see if this VF is enabled on this TC */
+	if (!ctx->sched.vsi_node[tc])
+		return;
+
+	numq = ctx->num_lan_q_entries[tc];
+	teid = ctx->sched.vsi_node[tc]->info.node_teid;
+	tmp_teid = le32_to_cpu(teid);
+	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
+
+	/* if !teid or !numq, then this TC is not active */
+	if (!tmp_teid || !numq)
+		return;
+
+	/* suspend traffic */
+	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, true))
+		dev_dbg(dev, "Problem suspending traffic for LAG node move\n");
+
+	/* reconfig queues for new port */
+	qbuf_size = struct_size(qbuf, queue_info, numq);
+	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
+	if (!qbuf) {
+		dev_warn(dev, "Failure allocating memory for VF queue recfg buffer\n");
+		goto resume_reclaim;
+	}
+
+	/* add the per queue info for the reconfigure command buffer */
+	valq = ice_lag_qbuf_recfg(hw, qbuf, vsi_num, numq, tc);
+	if (!valq) {
+		dev_dbg(dev, "No valid queues found for LAG reclaim\n");
+		goto reclaim_none;
+	}
+
+	if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq,
+			       src_hw->port_info->lport, hw->port_info->lport,
+			       NULL)) {
+		dev_warn(dev, "Failure to configure queues for LAG failover\n");
+		goto reclaim_qerr;
+	}
+
+reclaim_none:
+	kfree(qbuf);
+
+	/* find parent in primary tree */
+	n_prt = ice_lag_get_sched_parent(hw, tc);
+	if (!n_prt)
+		goto resume_reclaim;
+
+	/* Move node to new parent */
+	buf_size = struct_size(buf, teid, 1);
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		dev_warn(dev, "Failure to alloc memory for VF node failover\n");
+		goto resume_reclaim;
+	}
+
+	buf->hdr.src_parent_teid = parent_teid;
+	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(1);
+	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
+	buf->teid[0] = teid;
+
+	if (ice_aq_move_sched_elems(&lag->pf->hw, 1, buf, buf_size, &num_moved,
+				    NULL))
+		dev_warn(dev, "Failure to move VF nodes for LAG reclaim\n");
+	else
+		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
+
+	kfree(buf);
+	goto resume_reclaim;
+
+reclaim_qerr:
+	kfree(qbuf);
+
+resume_reclaim:
+	/* restart traffic */
+	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, false))
+		dev_warn(dev, "Problem restarting traffic for LAG node reclaim\n");
+}
+
+/**
+ * ice_lag_reclaim_vf_nodes - When interface leaving bond primary reclaims nodes
+ * @lag: primary interface lag struct
+ * @src_hw: HW struct for current node location
+ */
+static void
+ice_lag_reclaim_vf_nodes(struct ice_lag *lag, struct ice_hw *src_hw)
+{
+	struct ice_pf *pf;
+	int i, tc;
+
+	if (!lag->primary || !src_hw)
+		return;
+
+	pf = lag->pf;
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF ||
+				   pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL))
+			ice_for_each_traffic_class(tc)
+				ice_lag_reclaim_vf_tc(lag, src_hw, i, tc);
+}
+
+/**
+ * ice_lag_link - handle LAG link event
+ * @lag: LAG info struct
+ */
+static void ice_lag_link(struct ice_lag *lag)
+{
+	struct ice_pf *pf = lag->pf;
+
+	if (lag->bonded)
+		dev_warn(ice_pf_to_dev(pf), "%s Already part of a bond\n",
+			 netdev_name(lag->netdev));
+
+	lag->bonded = true;
+	lag->role = ICE_LAG_UNSET;
+	netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n");
+}
+
+/**
+ * ice_lag_unlink - handle unlink event
+ * @lag: LAG info struct
+ */
+static void ice_lag_unlink(struct ice_lag *lag)
+{
+	u8 pri_port, act_port, loc_port;
+	struct ice_pf *pf = lag->pf;
+
+	if (!lag->bonded) {
+		netdev_dbg(lag->netdev, "bonding unlink event on non-LAG netdev\n");
+		return;
+	}
+
+	if (lag->primary) {
+		act_port = lag->active_port;
+		pri_port = lag->pf->hw.port_info->lport;
+		if (act_port != pri_port && act_port != ICE_LAG_INVALID_PORT)
+			ice_lag_move_vf_nodes(lag, act_port, pri_port);
+		lag->primary = false;
+		lag->active_port = ICE_LAG_INVALID_PORT;
+	} else {
+		struct ice_lag *primary_lag;
+
+		primary_lag = ice_lag_find_primary(lag);
+		if (primary_lag) {
+			act_port = primary_lag->active_port;
+			pri_port = primary_lag->pf->hw.port_info->lport;
+			loc_port = pf->hw.port_info->lport;
+			if (act_port == loc_port &&
+			    act_port != ICE_LAG_INVALID_PORT) {
+				ice_lag_reclaim_vf_nodes(primary_lag,
+							 &lag->pf->hw);
+				primary_lag->active_port = ICE_LAG_INVALID_PORT;
+			}
+		}
+	}
+
+	lag->bonded = false;
+	lag->role = ICE_LAG_NONE;
+	lag->upper_netdev = NULL;
+}
+
+/**
+ * ice_lag_link_unlink - helper function to call lag_link/unlink
+ * @lag: lag info struct
+ * @ptr: opaque pointer data
+ */
+static void ice_lag_link_unlink(struct ice_lag *lag, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_changeupper_info *info = ptr;
+
+	if (netdev != lag->netdev)
+		return;
+
+	if (info->linking)
+		ice_lag_link(lag);
+	else
+		ice_lag_unlink(lag);
+}
+
+/**
+ * ice_lag_set_swid - set the SWID on secondary interface
+ * @primary_swid: primary interface's SWID
+ * @local_lag: local interfaces LAG struct
+ * @link: Is this a linking activity
+ *
+ * If link is false, then primary_swid should be expected to not be valid
+ * This function should never be called in interrupt context.
+ */
+static void
+ice_lag_set_swid(u16 primary_swid, struct ice_lag *local_lag,
+		 bool link)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	struct ice_aqc_set_port_params *cmd;
+	struct ice_aq_desc desc;
+	u16 buf_len, swid;
+	int status, i;
+
+	buf_len = struct_size(buf, elem, 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf) {
+		dev_err(ice_pf_to_dev(local_lag->pf), "-ENOMEM error setting SWID\n");
+		return;
+	}
+
+	buf->num_elems = cpu_to_le16(1);
+	buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_SWID);
+	/* if unlinnking need to free the shared resource */
+	if (!link && local_lag->bond_swid) {
+		buf->elem[0].e.sw_resp = cpu_to_le16(local_lag->bond_swid);
+		status = ice_aq_alloc_free_res(&local_lag->pf->hw, buf,
+					       buf_len, ice_aqc_opc_free_res);
+		if (status)
+			dev_err(ice_pf_to_dev(local_lag->pf), "Error freeing SWID during LAG unlink\n");
+		local_lag->bond_swid = 0;
+	}
+
+	if (link) {
+		buf->res_type |=  cpu_to_le16(ICE_LAG_RES_SHARED |
+					      ICE_LAG_RES_VALID);
+		/* store the primary's SWID in case it leaves bond first */
+		local_lag->bond_swid = primary_swid;
+		buf->elem[0].e.sw_resp = cpu_to_le16(local_lag->bond_swid);
+	} else {
+		buf->elem[0].e.sw_resp =
+			cpu_to_le16(local_lag->pf->hw.port_info->sw_id);
+	}
+
+	status = ice_aq_alloc_free_res(&local_lag->pf->hw, buf, buf_len,
+				       ice_aqc_opc_alloc_res);
+	if (status)
+		dev_err(ice_pf_to_dev(local_lag->pf), "Error subscribing to SWID 0x%04X\n",
+			local_lag->bond_swid);
+
+	kfree(buf);
+
+	/* Configure port param SWID to correct value */
+	if (link)
+		swid = primary_swid;
+	else
+		swid = local_lag->pf->hw.port_info->sw_id;
+
+	cmd = &desc.params.set_port_params;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_port_params);
+
+	cmd->swid = cpu_to_le16(ICE_AQC_PORT_SWID_VALID | swid);
+	/* If this is happening in reset context, it is possible that the
+	 * primary interface has not finished setting its SWID to SHARED
+	 * yet.  Allow retries to account for this timing issue between
+	 * interfaces.
+	 */
+	for (i = 0; i < ICE_LAG_RESET_RETRIES; i++) {
+		status = ice_aq_send_cmd(&local_lag->pf->hw, &desc, NULL, 0,
+					 NULL);
+		if (!status)
+			break;
+
+		usleep_range(1000, 2000);
+	}
+
+	if (status)
+		dev_err(ice_pf_to_dev(local_lag->pf), "Error setting SWID in port params %d\n",
+			status);
+}
+
+/**
+ * ice_lag_primary_swid - set/clear the SHARED attrib of primary's SWID
+ * @lag: primary interface's lag struct
+ * @link: is this a linking activity
+ *
+ * Implement setting primary SWID as shared using 0x020B
+ */
+static void ice_lag_primary_swid(struct ice_lag *lag, bool link)
+{
+	struct ice_hw *hw;
+	u16 swid;
+
+	hw = &lag->pf->hw;
+	swid = hw->port_info->sw_id;
+
+	if (ice_share_res(hw, ICE_AQC_RES_TYPE_SWID, link, swid))
+		dev_warn(ice_pf_to_dev(lag->pf), "Failure to set primary interface shared status\n");
+}
+
+/**
+ * ice_lag_add_prune_list - Adds event_pf's VSI to primary's prune list
+ * @lag: lag info struct
+ * @event_pf: PF struct for VSI we are adding to primary's prune list
+ */
+static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
+{
+	u16 num_vsi, rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx;
+	struct ice_sw_rule_vsi_list *s_rule = NULL;
+	struct device *dev;
+
+	num_vsi = 1;
+
+	dev = ice_pf_to_dev(lag->pf);
+	event_vsi_num = event_pf->vsi[0]->vsi_num;
+	prim_vsi_idx = lag->pf->vsi[0]->idx;
+
+	if (!ice_find_vsi_list_entry(&lag->pf->hw, ICE_SW_LKUP_VLAN,
+				     prim_vsi_idx, &vsi_list_id)) {
+		dev_warn(dev, "Could not locate prune list when setting up SRIOV LAG\n");
+		return;
+	}
+
+	rule_buf_sz = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
+	s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
+	if (!s_rule) {
+		dev_warn(dev, "Error allocating space for prune list when configuring SRIOV LAG\n");
+		return;
+	}
+
+	s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_PRUNE_LIST_SET);
+	s_rule->index = cpu_to_le16(vsi_list_id);
+	s_rule->number_vsi = cpu_to_le16(num_vsi);
+	s_rule->vsi[0] = cpu_to_le16(event_vsi_num);
+
+	if (ice_aq_sw_rules(&event_pf->hw, s_rule, rule_buf_sz, 1,
+			    ice_aqc_opc_update_sw_rules, NULL))
+		dev_warn(dev, "Error adding VSI prune list\n");
+	kfree(s_rule);
+}
+
+/**
+ * ice_lag_del_prune_list - Remove secondary's vsi from primary's prune list
+ * @lag: primary interface's ice_lag struct
+ * @event_pf: PF struct for unlinking interface
+ */
+static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf)
+{
+	u16 num_vsi, vsi_num, vsi_idx, rule_buf_sz, vsi_list_id;
+	struct ice_sw_rule_vsi_list *s_rule = NULL;
+	struct device *dev;
+
+	num_vsi = 1;
+
+	dev = ice_pf_to_dev(lag->pf);
+	vsi_num = event_pf->vsi[0]->vsi_num;
+	vsi_idx = lag->pf->vsi[0]->idx;
+
+	if (!ice_find_vsi_list_entry(&lag->pf->hw, ICE_SW_LKUP_VLAN,
+				     vsi_idx, &vsi_list_id)) {
+		dev_warn(dev, "Could not locate prune list when unwinding SRIOV LAG\n");
+		return;
+	}
+
+	rule_buf_sz = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
+	s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
+	if (!s_rule) {
+		dev_warn(dev, "Error allocating prune list when unwinding SRIOV LAG\n");
+		return;
+	}
+
+	s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR);
+	s_rule->index = cpu_to_le16(vsi_list_id);
+	s_rule->number_vsi = cpu_to_le16(num_vsi);
+	s_rule->vsi[0] = cpu_to_le16(vsi_num);
+
+	if (ice_aq_sw_rules(&event_pf->hw, (struct ice_aqc_sw_rules *)s_rule,
+			    rule_buf_sz, 1, ice_aqc_opc_update_sw_rules, NULL))
+		dev_warn(dev, "Error clearing VSI prune list\n");
+
+	kfree(s_rule);
+}
+
+/**
+ * ice_lag_init_feature_support_flag - Check for NVM support for LAG
+ * @pf: PF struct
+ */
+static void ice_lag_init_feature_support_flag(struct ice_pf *pf)
+{
+	struct ice_hw_common_caps *caps;
+
+	caps = &pf->hw.dev_caps.common_cap;
+	if (caps->roce_lag)
+		ice_set_feature_support(pf, ICE_F_ROCE_LAG);
+	else
+		ice_clear_feature_support(pf, ICE_F_ROCE_LAG);
+
+	if (caps->sriov_lag)
+		ice_set_feature_support(pf, ICE_F_SRIOV_LAG);
+	else
+		ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
+}
+
+/**
+ * ice_lag_changeupper_event - handle LAG changeupper event
+ * @lag: LAG info struct
+ * @ptr: opaque pointer data
+ */
+static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *info;
+	struct ice_lag *primary_lag;
+	struct net_device *netdev;
+
+	info = ptr;
+	netdev = netdev_notifier_info_to_dev(ptr);
+
+	/* not for this netdev */
+	if (netdev != lag->netdev)
+		return;
+
+	primary_lag = ice_lag_find_primary(lag);
+	if (info->linking) {
+		lag->upper_netdev = info->upper_dev;
+		/* If there is not already a primary interface in the LAG,
+		 * then mark this one as primary.
+		 */
+		if (!primary_lag) {
+			lag->primary = true;
+			/* Configure primary's SWID to be shared */
+			ice_lag_primary_swid(lag, true);
+			primary_lag = lag;
+		} else {
+			u16 swid;
+
+			swid = primary_lag->pf->hw.port_info->sw_id;
+			ice_lag_set_swid(swid, lag, true);
+			ice_lag_add_prune_list(primary_lag, lag->pf);
+		}
+		/* add filter for primary control packets */
+		ice_lag_cfg_cp_fltr(lag, true);
+	} else {
+		if (!primary_lag && lag->primary)
+			primary_lag = lag;
+
+		if (!lag->primary) {
+			ice_lag_set_swid(0, lag, false);
+		} else {
+			if (primary_lag && lag->primary) {
+				ice_lag_primary_swid(lag, false);
+				ice_lag_del_prune_list(primary_lag, lag->pf);
+			}
+		}
+		/* remove filter for control packets */
+		ice_lag_cfg_cp_fltr(lag, false);
+	}
+}
+
+/**
+ * ice_lag_monitor_link - monitor interfaces entering/leaving the aggregate
+ * @lag: lag info struct
+ * @ptr: opaque data containing notifier event
+ *
+ * This function only operates after a primary has been set.
+ */
+static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *info;
+	struct ice_hw *prim_hw, *active_hw;
+	struct net_device *event_netdev;
+	struct ice_pf *pf;
+	u8 prim_port;
+
+	if (!lag->primary)
+		return;
+
+	event_netdev = netdev_notifier_info_to_dev(ptr);
+	if (!netif_is_same_ice(lag->pf, event_netdev))
+		return;
+
+	pf = lag->pf;
+	prim_hw = &pf->hw;
+	prim_port = prim_hw->port_info->lport;
+
+	info = (struct netdev_notifier_changeupper_info *)ptr;
+	if (info->upper_dev != lag->upper_netdev)
+		return;
+
+	if (!info->linking) {
+		/* Since there are only two interfaces allowed in SRIOV+LAG, if
+		 * one port is leaving, then nodes need to be on primary
+		 * interface.
+		 */
+		if (prim_port != lag->active_port &&
+		    lag->active_port != ICE_LAG_INVALID_PORT) {
+			active_hw = ice_lag_find_hw_by_lport(lag,
+							     lag->active_port);
+			ice_lag_reclaim_vf_nodes(lag, active_hw);
+			lag->active_port = ICE_LAG_INVALID_PORT;
+		}
+	}
+}
+
+/**
+ * ice_lag_monitor_active - main PF keep track of which port is active
+ * @lag: lag info struct
+ * @ptr: opaque data containing notifier event
+ *
+ * This function is for the primary PF to monitor changes in which port is
+ * active and handle changes for SRIOV VF functionality
+ */
+static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr)
+{
+	struct net_device *event_netdev, *event_upper;
+	struct netdev_notifier_bonding_info *info;
+	struct netdev_bonding_info *bonding_info;
+	struct ice_netdev_priv *event_np;
+	struct ice_pf *pf, *event_pf;
+	u8 prim_port, event_port;
+
+	if (!lag->primary)
+		return;
+
+	pf = lag->pf;
+	if (!pf)
+		return;
+
+	event_netdev = netdev_notifier_info_to_dev(ptr);
+	rcu_read_lock();
+	event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
+	rcu_read_unlock();
+	if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev)
+		return;
+
+	event_np = netdev_priv(event_netdev);
+	event_pf = event_np->vsi->back;
+	event_port = event_pf->hw.port_info->lport;
+	prim_port = pf->hw.port_info->lport;
+
+	info = (struct netdev_notifier_bonding_info *)ptr;
+	bonding_info = &info->bonding_info;
+
+	if (!bonding_info->slave.state) {
+		/* if no port is currently active, then nodes and filters exist
+		 * on primary port, check if we need to move them
+		 */
+		if (lag->active_port == ICE_LAG_INVALID_PORT) {
+			if (event_port != prim_port)
+				ice_lag_move_vf_nodes(lag, prim_port,
+						      event_port);
+			lag->active_port = event_port;
+			return;
+		}
+
+		/* active port is already set and is current event port */
+		if (lag->active_port == event_port)
+			return;
+		/* new active port */
+		ice_lag_move_vf_nodes(lag, lag->active_port, event_port);
+		lag->active_port = event_port;
+	} else {
+		/* port not set as currently active (e.g. new active port
+		 * has already claimed the nodes and filters
+		 */
+		if (lag->active_port != event_port)
+			return;
+		/* This is the case when neither port is active (both link down)
+		 * Link down on the bond - set active port to invalid and move
+		 * nodes and filters back to primary if not already there
+		 */
+		if (event_port != prim_port)
+			ice_lag_move_vf_nodes(lag, event_port, prim_port);
+		lag->active_port = ICE_LAG_INVALID_PORT;
+	}
+}
+
+/**
+ * ice_lag_chk_comp - evaluate bonded interface for feature support
+ * @lag: lag info struct
+ * @ptr: opaque data for netdev event info
+ */
+static bool
+ice_lag_chk_comp(struct ice_lag *lag, void *ptr)
+{
+	struct net_device *event_netdev, *event_upper;
+	struct netdev_notifier_bonding_info *info;
+	struct netdev_bonding_info *bonding_info;
+	struct list_head *tmp;
+	struct device *dev;
+	int count = 0;
+
+	if (!lag->primary)
+		return true;
+
+	event_netdev = netdev_notifier_info_to_dev(ptr);
+	rcu_read_lock();
+	event_upper = netdev_master_upper_dev_get_rcu(event_netdev);
+	rcu_read_unlock();
+	if (event_upper != lag->upper_netdev)
+		return true;
+
+	dev = ice_pf_to_dev(lag->pf);
+
+	/* only supporting switchdev mode for SRIOV VF LAG.
+	 * primary interface has to be in switchdev mode
+	 */
+	if (!ice_is_switchdev_running(lag->pf)) {
+		dev_info(dev, "Primary interface not in switchdev mode - VF LAG disabled\n");
+		return false;
+	}
+
+	info = (struct netdev_notifier_bonding_info *)ptr;
+	bonding_info = &info->bonding_info;
+	lag->bond_mode = bonding_info->master.bond_mode;
+	if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) {
+		dev_info(dev, "Bond Mode not ACTIVE-BACKUP - VF LAG disabled\n");
+		return false;
+	}
+
+	list_for_each(tmp, lag->netdev_head) {
+		struct ice_dcbx_cfg *dcb_cfg, *peer_dcb_cfg;
+		struct ice_lag_netdev_list *entry;
+		struct ice_netdev_priv *peer_np;
+		struct net_device *peer_netdev;
+		struct ice_vsi *vsi, *peer_vsi;
+		struct ice_pf *peer_pf;
+
+		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
+		peer_netdev = entry->netdev;
+		if (!netif_is_ice(peer_netdev)) {
+			dev_info(dev, "Found %s non-ice netdev in LAG - VF LAG disabled\n",
+				 netdev_name(peer_netdev));
+			return false;
+		}
+
+		count++;
+		if (count > 2) {
+			dev_info(dev, "Found more than two netdevs in LAG - VF LAG disabled\n");
+			return false;
+		}
+
+		peer_np = netdev_priv(peer_netdev);
+		vsi = ice_get_main_vsi(lag->pf);
+		peer_vsi = peer_np->vsi;
+		if (lag->pf->pdev->bus != peer_vsi->back->pdev->bus ||
+		    lag->pf->pdev->slot != peer_vsi->back->pdev->slot) {
+			dev_info(dev, "Found %s on different device in LAG - VF LAG disabled\n",
+				 netdev_name(peer_netdev));
+			return false;
+		}
+
+		dcb_cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg;
+		peer_dcb_cfg = &peer_vsi->port_info->qos_cfg.local_dcbx_cfg;
+		if (memcmp(dcb_cfg, peer_dcb_cfg,
+			   sizeof(struct ice_dcbx_cfg))) {
+			dev_info(dev, "Found %s with different DCB in LAG - VF LAG disabled\n",
+				 netdev_name(peer_netdev));
+			return false;
+		}
+
+		peer_pf = peer_vsi->back;
+		if (test_bit(ICE_FLAG_FW_LLDP_AGENT, peer_pf->flags)) {
+			dev_warn(dev, "Found %s with FW LLDP agent active - VF LAG disabled\n",
+				 netdev_name(peer_netdev));
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * ice_lag_unregister - handle netdev unregister events
+ * @lag: LAG info struct
+ * @event_netdev: netdev struct for target of notifier event
+ */
+static void
+ice_lag_unregister(struct ice_lag *lag, struct net_device *event_netdev)
+{
+	struct ice_netdev_priv *np;
+	struct ice_pf *event_pf;
+	struct ice_lag *p_lag;
+
+	p_lag = ice_lag_find_primary(lag);
+	np = netdev_priv(event_netdev);
+	event_pf = np->vsi->back;
+
+	if (p_lag) {
+		if (p_lag->active_port != p_lag->pf->hw.port_info->lport &&
+		    p_lag->active_port != ICE_LAG_INVALID_PORT) {
+			struct ice_hw *active_hw;
+
+			active_hw = ice_lag_find_hw_by_lport(lag,
+							     p_lag->active_port);
+			if (active_hw)
+				ice_lag_reclaim_vf_nodes(p_lag, active_hw);
+			lag->active_port = ICE_LAG_INVALID_PORT;
+		}
+	}
+
+	/* primary processing for primary */
+	if (lag->primary && lag->netdev == event_netdev)
+		ice_lag_primary_swid(lag, false);
+
+	/* primary processing for secondary */
+	if (lag->primary && lag->netdev != event_netdev)
+		ice_lag_del_prune_list(lag, event_pf);
+
+	/* secondary processing for secondary */
+	if (!lag->primary && lag->netdev == event_netdev)
+		ice_lag_set_swid(0, lag, false);
+}
+
+/**
+ * ice_lag_monitor_rdma - set and clear rdma functionality
+ * @lag: pointer to lag struct
+ * @ptr: opaque data for netdev event info
+ */
+static void
+ice_lag_monitor_rdma(struct ice_lag *lag, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *info;
+	struct net_device *netdev;
+
+	info = ptr;
+	netdev = netdev_notifier_info_to_dev(ptr);
+
+	if (netdev != lag->netdev)
+		return;
+
+	if (info->linking)
+		ice_clear_rdma_cap(lag->pf);
+	else
+		ice_set_rdma_cap(lag->pf);
+}
+
+/**
+ * ice_lag_chk_disabled_bond - monitor interfaces entering/leaving disabled bond
+ * @lag: lag info struct
+ * @ptr: opaque data containing event
+ *
+ * as interfaces enter a bond - determine if the bond is currently
+ * SRIOV LAG compliant and flag if not.  As interfaces leave the
+ * bond, reset their compliant status.
+ */
+static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct ice_lag *prim_lag;
+
+	if (netdev != lag->netdev)
+		return;
+
+	if (info->linking) {
+		prim_lag = ice_lag_find_primary(lag);
+		if (prim_lag &&
+		    !ice_is_feature_supported(prim_lag->pf, ICE_F_SRIOV_LAG)) {
+			ice_clear_feature_support(lag->pf, ICE_F_SRIOV_LAG);
+			netdev_info(netdev, "Interface added to non-compliant SRIOV LAG aggregate\n");
+		}
+	} else {
+		ice_lag_init_feature_support_flag(lag->pf);
+	}
+}
+
+/**
+ * ice_lag_disable_sriov_bond - set members of bond as not supporting SRIOV LAG
+ * @lag: primary interfaces lag struct
+ */
+static void ice_lag_disable_sriov_bond(struct ice_lag *lag)
+{
+	struct ice_netdev_priv *np;
+	struct ice_pf *pf;
+
+	np = netdev_priv(lag->netdev);
+	pf = np->vsi->back;
+	ice_clear_feature_support(pf, ICE_F_SRIOV_LAG);
+}
+
+/**
+ * ice_lag_process_event - process a task assigned to the lag_wq
+ * @work: pointer to work_struct
+ */
+static void ice_lag_process_event(struct work_struct *work)
+{
+	struct netdev_notifier_changeupper_info *info;
+	struct ice_lag_work *lag_work;
+	struct net_device *netdev;
+	struct list_head *tmp, *n;
+	struct ice_pf *pf;
+
+	lag_work = container_of(work, struct ice_lag_work, lag_task);
+	pf = lag_work->lag->pf;
+
+	mutex_lock(&pf->lag_mutex);
+	lag_work->lag->netdev_head = &lag_work->netdev_list.node;
+
+	switch (lag_work->event) {
+	case NETDEV_CHANGEUPPER:
+		info = &lag_work->info.changeupper_info;
+		ice_lag_chk_disabled_bond(lag_work->lag, info);
+		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
+			ice_lag_monitor_link(lag_work->lag, info);
+			ice_lag_changeupper_event(lag_work->lag, info);
+			ice_lag_link_unlink(lag_work->lag, info);
+		}
+		ice_lag_monitor_rdma(lag_work->lag, info);
+		break;
+	case NETDEV_BONDING_INFO:
+		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
+			if (!ice_lag_chk_comp(lag_work->lag,
+					      &lag_work->info.bonding_info)) {
+				netdev = lag_work->info.bonding_info.info.dev;
+				ice_lag_disable_sriov_bond(lag_work->lag);
+				ice_lag_unregister(lag_work->lag, netdev);
+				goto lag_cleanup;
+			}
+			ice_lag_monitor_active(lag_work->lag,
+					       &lag_work->info.bonding_info);
+			ice_lag_cfg_pf_fltrs(lag_work->lag,
+					     &lag_work->info.bonding_info);
+		}
+		ice_lag_info_event(lag_work->lag, &lag_work->info.bonding_info);
+		break;
+	case NETDEV_UNREGISTER:
+		if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) {
+			netdev = lag_work->info.bonding_info.info.dev;
+			if ((netdev == lag_work->lag->netdev ||
+			     lag_work->lag->primary) && lag_work->lag->bonded)
+				ice_lag_unregister(lag_work->lag, netdev);
+		}
+		break;
+	default:
+		break;
+	}
+
+lag_cleanup:
+	/* cleanup resources allocated for this work item */
+	list_for_each_safe(tmp, n, &lag_work->netdev_list.node) {
+		struct ice_lag_netdev_list *entry;
+
+		entry = list_entry(tmp, struct ice_lag_netdev_list, node);
+		list_del(&entry->node);
+		kfree(entry);
+	}
+	lag_work->lag->netdev_head = NULL;
+
+	mutex_unlock(&pf->lag_mutex);
+
+	kfree(lag_work);
+}
+
+/**
+ * ice_lag_event_handler - handle LAG events from netdev
+ * @notif_blk: notifier block registered by this netdev
+ * @event: event type
+ * @ptr: opaque data containing notifier event
+ */
+static int
+ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event,
+		      void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+	struct net_device *upper_netdev;
+	struct ice_lag_work *lag_work;
+	struct ice_lag *lag;
+
+	if (!netif_is_ice(netdev))
+		return NOTIFY_DONE;
+
+	if (event != NETDEV_CHANGEUPPER && event != NETDEV_BONDING_INFO &&
+	    event != NETDEV_UNREGISTER)
+		return NOTIFY_DONE;
+
+	if (!(netdev->priv_flags & IFF_BONDING))
+		return NOTIFY_DONE;
+
+	lag = container_of(notif_blk, struct ice_lag, notif_block);
+	if (!lag->netdev)
+		return NOTIFY_DONE;
+
+	if (!net_eq(dev_net(netdev), &init_net))
+		return NOTIFY_DONE;
+
+	/* This memory will be freed at the end of ice_lag_process_event */
+	lag_work = kzalloc(sizeof(*lag_work), GFP_KERNEL);
+	if (!lag_work)
+		return -ENOMEM;
+
+	lag_work->event_netdev = netdev;
+	lag_work->lag = lag;
+	lag_work->event = event;
+	if (event == NETDEV_CHANGEUPPER) {
+		struct netdev_notifier_changeupper_info *info;
+
+		info = ptr;
+		upper_netdev = info->upper_dev;
+	} else {
+		upper_netdev = netdev_master_upper_dev_get(netdev);
+	}
+
+	INIT_LIST_HEAD(&lag_work->netdev_list.node);
+	if (upper_netdev) {
+		struct ice_lag_netdev_list *nd_list;
+		struct net_device *tmp_nd;
+
+		rcu_read_lock();
+		for_each_netdev_in_bond_rcu(upper_netdev, tmp_nd) {
+			nd_list = kzalloc(sizeof(*nd_list), GFP_ATOMIC);
+			if (!nd_list)
+				break;
+
+			nd_list->netdev = tmp_nd;
+			list_add(&nd_list->node, &lag_work->netdev_list.node);
+		}
+		rcu_read_unlock();
+	}
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		lag_work->info.changeupper_info =
+			*((struct netdev_notifier_changeupper_info *)ptr);
+		break;
+	case NETDEV_BONDING_INFO:
+		lag_work->info.bonding_info =
+			*((struct netdev_notifier_bonding_info *)ptr);
+		break;
+	default:
+		lag_work->info.notifier_info =
+			*((struct netdev_notifier_info *)ptr);
+		break;
+	}
+
+	INIT_WORK(&lag_work->lag_task, ice_lag_process_event);
+	queue_work(ice_lag_wq, &lag_work->lag_task);
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * ice_register_lag_handler - register LAG handler on netdev
+ * @lag: LAG struct
+ */
+static int ice_register_lag_handler(struct ice_lag *lag)
+{
+	struct device *dev = ice_pf_to_dev(lag->pf);
+	struct notifier_block *notif_blk;
+
+	notif_blk = &lag->notif_block;
+
+	if (!notif_blk->notifier_call) {
+		notif_blk->notifier_call = ice_lag_event_handler;
+		if (register_netdevice_notifier(notif_blk)) {
+			notif_blk->notifier_call = NULL;
+			dev_err(dev, "FAIL register LAG event handler!\n");
+			return -EINVAL;
+		}
+		dev_dbg(dev, "LAG event handler registered\n");
+	}
+	return 0;
+}
+
+/**
+ * ice_unregister_lag_handler - unregister LAG handler on netdev
+ * @lag: LAG struct
+ */
+static void ice_unregister_lag_handler(struct ice_lag *lag)
+{
+	struct device *dev = ice_pf_to_dev(lag->pf);
+	struct notifier_block *notif_blk;
+
+	notif_blk = &lag->notif_block;
+	if (notif_blk->notifier_call) {
+		unregister_netdevice_notifier(notif_blk);
+		dev_dbg(dev, "LAG event handler unregistered\n");
+	}
+}
+
+/**
+ * ice_create_lag_recipe
+ * @hw: pointer to HW struct
+ * @rid: pointer to u16 to pass back recipe index
+ * @base_recipe: recipe to base the new recipe on
+ * @prio: priority for new recipe
+ *
+ * function returns 0 on error
+ */
+static int ice_create_lag_recipe(struct ice_hw *hw, u16 *rid,
+				 const u8 *base_recipe, u8 prio)
+{
+	struct ice_aqc_recipe_data_elem *new_rcp;
+	int err;
+
+	err = ice_alloc_recipe(hw, rid);
+	if (err)
+		return err;
+
+	new_rcp = kzalloc(ICE_RECIPE_LEN * ICE_MAX_NUM_RECIPES, GFP_KERNEL);
+	if (!new_rcp)
+		return -ENOMEM;
+
+	memcpy(new_rcp, base_recipe, ICE_RECIPE_LEN);
+	new_rcp->content.act_ctrl_fwd_priority = prio;
+	new_rcp->content.rid = *rid | ICE_AQ_RECIPE_ID_IS_ROOT;
+	new_rcp->recipe_indx = *rid;
+	bitmap_zero((unsigned long *)new_rcp->recipe_bitmap,
+		    ICE_MAX_NUM_RECIPES);
+	set_bit(*rid, (unsigned long *)new_rcp->recipe_bitmap);
+
+	err = ice_aq_add_recipe(hw, new_rcp, 1, NULL);
+	if (err)
+		*rid = 0;
+
+	kfree(new_rcp);
+	return err;
+}
+
+/**
+ * ice_lag_move_vf_nodes_tc_sync - move a VF's nodes for a tc during reset
+ * @lag: primary interfaces lag struct
+ * @dest_hw: HW struct for destination's interface
+ * @vsi_num: VSI index in PF space
+ * @tc: traffic class to move
+ */
+static void
+ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw,
+			      u16 vsi_num, u8 tc)
+{
+	u16 numq, valq, buf_size, num_moved, qbuf_size;
+	struct device *dev = ice_pf_to_dev(lag->pf);
+	struct ice_aqc_cfg_txqs_buf *qbuf;
+	struct ice_aqc_move_elem *buf;
+	struct ice_sched_node *n_prt;
+	__le32 teid, parent_teid;
+	struct ice_vsi_ctx *ctx;
+	struct ice_hw *hw;
+	u32 tmp_teid;
+
+	hw = &lag->pf->hw;
+	ctx = ice_get_vsi_ctx(hw, vsi_num);
+	if (!ctx) {
+		dev_warn(dev, "LAG rebuild failed after reset due to VSI Context failure\n");
+		return;
+	}
+
+	if (!ctx->sched.vsi_node[tc])
+		return;
+
+	numq = ctx->num_lan_q_entries[tc];
+	teid = ctx->sched.vsi_node[tc]->info.node_teid;
+	tmp_teid = le32_to_cpu(teid);
+	parent_teid = ctx->sched.vsi_node[tc]->info.parent_teid;
+
+	if (!tmp_teid || !numq)
+		return;
+
+	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, true))
+		dev_dbg(dev, "Problem suspending traffic during reset rebuild\n");
+
+	/* reconfig queues for new port */
+	qbuf_size = struct_size(qbuf, queue_info, numq);
+	qbuf = kzalloc(qbuf_size, GFP_KERNEL);
+	if (!qbuf) {
+		dev_warn(dev, "Failure allocating VF queue recfg buffer for reset rebuild\n");
+		goto resume_sync;
+	}
+
+	/* add the per queue info for the reconfigure command buffer */
+	valq = ice_lag_qbuf_recfg(hw, qbuf, vsi_num, numq, tc);
+	if (!valq) {
+		dev_warn(dev, "Failure to reconfig queues for LAG reset rebuild\n");
+		goto sync_none;
+	}
+
+	if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, hw->port_info->lport,
+			       dest_hw->port_info->lport, NULL)) {
+		dev_warn(dev, "Failure to configure queues for LAG reset rebuild\n");
+		goto sync_qerr;
+	}
+
+sync_none:
+	kfree(qbuf);
+
+	/* find parent in destination tree */
+	n_prt = ice_lag_get_sched_parent(dest_hw, tc);
+	if (!n_prt)
+		goto resume_sync;
+
+	/* Move node to new parent */
+	buf_size = struct_size(buf, teid, 1);
+	buf = kzalloc(buf_size, GFP_KERNEL);
+	if (!buf) {
+		dev_warn(dev, "Failure to alloc for VF node move in reset rebuild\n");
+		goto resume_sync;
+	}
+
+	buf->hdr.src_parent_teid = parent_teid;
+	buf->hdr.dest_parent_teid = n_prt->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(1);
+	buf->hdr.mode = ICE_AQC_MOVE_ELEM_MODE_KEEP_OWN;
+	buf->teid[0] = teid;
+
+	if (ice_aq_move_sched_elems(&lag->pf->hw, 1, buf, buf_size, &num_moved,
+				    NULL))
+		dev_warn(dev, "Failure to move VF nodes for LAG reset rebuild\n");
+	else
+		ice_sched_update_parent(n_prt, ctx->sched.vsi_node[tc]);
+
+	kfree(buf);
+	goto resume_sync;
+
+sync_qerr:
+	kfree(qbuf);
+
+resume_sync:
+	if (ice_sched_suspend_resume_elems(hw, 1, &tmp_teid, false))
+		dev_warn(dev, "Problem restarting traffic for LAG node reset rebuild\n");
+}
+
+/**
+ * ice_lag_move_vf_nodes_sync - move vf nodes to active interface
+ * @lag: primary interfaces lag struct
+ * @dest_hw: lport value for currently active port
+ *
+ * This function is used in a reset context, outside of event handling,
+ * to move the VF nodes to the secondary interface when that interface
+ * is the active interface during a reset rebuild
+ */
+static void
+ice_lag_move_vf_nodes_sync(struct ice_lag *lag, struct ice_hw *dest_hw)
+{
+	struct ice_pf *pf;
+	int i, tc;
+
+	if (!lag->primary || !dest_hw)
+		return;
+
+	pf = lag->pf;
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && (pf->vsi[i]->type == ICE_VSI_VF ||
+				   pf->vsi[i]->type == ICE_VSI_SWITCHDEV_CTRL))
+			ice_for_each_traffic_class(tc)
+				ice_lag_move_vf_nodes_tc_sync(lag, dest_hw, i,
+							      tc);
+}
+
+/**
+ * ice_init_lag - initialize support for LAG
+ * @pf: PF struct
+ *
+ * Alloc memory for LAG structs and initialize the elements.
+ * Memory will be freed in ice_deinit_lag
+ */
+int ice_init_lag(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_lag *lag;
+	struct ice_vsi *vsi;
+	u64 recipe_bits = 0;
+	int n, err;
+
+	ice_lag_init_feature_support_flag(pf);
+	if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
+		return 0;
+
+	pf->lag = kzalloc(sizeof(*lag), GFP_KERNEL);
+	if (!pf->lag)
+		return -ENOMEM;
+	lag = pf->lag;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi) {
+		dev_err(dev, "couldn't get main vsi, link aggregation init fail\n");
+		err = -EIO;
+		goto lag_error;
+	}
+
+	lag->pf = pf;
+	lag->netdev = vsi->netdev;
+	lag->role = ICE_LAG_NONE;
+	lag->active_port = ICE_LAG_INVALID_PORT;
+	lag->bonded = false;
+	lag->upper_netdev = NULL;
+	lag->notif_block.notifier_call = NULL;
+
+	err = ice_register_lag_handler(lag);
+	if (err) {
+		dev_warn(dev, "INIT LAG: Failed to register event handler\n");
+		goto lag_error;
+	}
+
+	err = ice_create_lag_recipe(&pf->hw, &lag->pf_recipe, ice_dflt_vsi_rcp,
+				    1);
+	if (err)
+		goto lag_error;
+
+	/* associate recipes to profiles */
+	for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) {
+		err = ice_aq_get_recipe_to_profile(&pf->hw, n,
+						   (u8 *)&recipe_bits, NULL);
+		if (err)
+			continue;
+
+		if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) {
+			recipe_bits |= BIT(lag->pf_recipe);
+			ice_aq_map_recipe_to_profile(&pf->hw, n,
+						     (u8 *)&recipe_bits, NULL);
+		}
+	}
+
+	ice_display_lag_info(lag);
+
+	dev_dbg(dev, "INIT LAG complete\n");
+	return 0;
+
+lag_error:
+	kfree(lag);
+	pf->lag = NULL;
+	return err;
+}
+
+/**
+ * ice_deinit_lag - Clean up LAG
+ * @pf: PF struct
+ *
+ * Clean up kernel LAG info and free memory
+ * This function is meant to only be called on driver remove/shutdown
+ */
+void ice_deinit_lag(struct ice_pf *pf)
+{
+	struct ice_lag *lag;
+
+	lag = pf->lag;
+
+	if (!lag)
+		return;
+
+	if (lag->pf)
+		ice_unregister_lag_handler(lag);
+
+	flush_workqueue(ice_lag_wq);
+
+	ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1,
+			&pf->lag->pf_recipe);
+
+	kfree(lag);
+
+	pf->lag = NULL;
+}
+
+/**
+ * ice_lag_rebuild - rebuild lag resources after reset
+ * @pf: pointer to local pf struct
+ *
+ * PF resets are promoted to CORER resets when interface in an aggregate.  This
+ * means that we need to rebuild the PF resources for the interface.  Since
+ * this will happen outside the normal event processing, need to acquire the lag
+ * lock.
+ *
+ * This function will also evaluate the VF resources if this is the primary
+ * interface.
+ */
+void ice_lag_rebuild(struct ice_pf *pf)
+{
+	struct ice_lag_netdev_list ndlist;
+	struct ice_lag *lag, *prim_lag;
+	u8 act_port, loc_port;
+
+	if (!pf->lag || !pf->lag->bonded)
+		return;
+
+	mutex_lock(&pf->lag_mutex);
+
+	lag = pf->lag;
+	if (lag->primary) {
+		prim_lag = lag;
+	} else {
+		ice_lag_build_netdev_list(lag, &ndlist);
+		prim_lag = ice_lag_find_primary(lag);
+	}
+
+	if (!prim_lag) {
+		dev_dbg(ice_pf_to_dev(pf), "No primary interface in aggregate, can't rebuild\n");
+		goto lag_rebuild_out;
+	}
+
+	act_port = prim_lag->active_port;
+	loc_port = lag->pf->hw.port_info->lport;
+
+	/* configure SWID for this port */
+	if (lag->primary) {
+		ice_lag_primary_swid(lag, true);
+	} else {
+		ice_lag_set_swid(prim_lag->pf->hw.port_info->sw_id, lag, true);
+		ice_lag_add_prune_list(prim_lag, pf);
+		if (act_port == loc_port)
+			ice_lag_move_vf_nodes_sync(prim_lag, &pf->hw);
+	}
+
+	ice_lag_cfg_cp_fltr(lag, true);
+
+	if (lag->pf_rule_id)
+		if (ice_lag_cfg_dflt_fltr(lag, true))
+			dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n");
+
+	ice_clear_rdma_cap(pf);
+lag_rebuild_out:
+	ice_lag_destroy_netdev_list(lag, &ndlist);
+	mutex_unlock(&pf->lag_mutex);
+}
+
+/**
+ * ice_lag_is_switchdev_running
+ * @pf: pointer to PF structure
+ *
+ * Check if switchdev is running on any of the interfaces connected to lag.
+ */
+bool ice_lag_is_switchdev_running(struct ice_pf *pf)
+{
+	struct ice_lag *lag = pf->lag;
+	struct net_device *tmp_nd;
+
+	if (!ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) || !lag)
+		return false;
+
+	rcu_read_lock();
+	for_each_netdev_in_bond_rcu(lag->upper_netdev, tmp_nd) {
+		struct ice_netdev_priv *priv = netdev_priv(tmp_nd);
+
+		if (!netif_is_ice(tmp_nd) || !priv || !priv->vsi ||
+		    !priv->vsi->back)
+			continue;
+
+		if (ice_is_switchdev_running(priv->vsi->back)) {
+			rcu_read_unlock();
+			return true;
+		}
+	}
+	rcu_read_unlock();
+
+	return false;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h
new file mode 100644
index 0000000000..7f22987675
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lag.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_LAG_H_
+#define _ICE_LAG_H_
+
+#include <linux/netdevice.h>
+
+/* LAG roles for netdev */
+enum ice_lag_role {
+	ICE_LAG_NONE,
+	ICE_LAG_PRIMARY,
+	ICE_LAG_BACKUP,
+	ICE_LAG_UNSET
+};
+
+#define ICE_LAG_INVALID_PORT 0xFF
+
+#define ICE_LAG_RESET_RETRIES		5
+
+struct ice_pf;
+struct ice_vf;
+
+struct ice_lag_netdev_list {
+	struct list_head node;
+	struct net_device *netdev;
+};
+
+/* LAG info struct */
+struct ice_lag {
+	struct ice_pf *pf; /* backlink to PF struct */
+	struct net_device *netdev; /* this PF's netdev */
+	struct net_device *upper_netdev; /* upper bonding netdev */
+	struct list_head *netdev_head;
+	struct notifier_block notif_block;
+	s32 bond_mode;
+	u16 bond_swid; /* swid for primary interface */
+	u8 active_port; /* lport value for the current active port */
+	u8 bonded:1; /* currently bonded */
+	u8 primary:1; /* this is primary */
+	u16 pf_recipe;
+	u16 pf_rule_id;
+	u16 cp_rule_idx;
+	u8 role;
+};
+
+/* LAG workqueue struct */
+struct ice_lag_work {
+	struct work_struct lag_task;
+	struct ice_lag_netdev_list netdev_list;
+	struct ice_lag *lag;
+	unsigned long event;
+	struct net_device *event_netdev;
+	union {
+		struct netdev_notifier_changeupper_info changeupper_info;
+		struct netdev_notifier_bonding_info bonding_info;
+		struct netdev_notifier_info notifier_info;
+	} info;
+};
+
+void ice_lag_move_new_vf_nodes(struct ice_vf *vf);
+int ice_init_lag(struct ice_pf *pf);
+void ice_deinit_lag(struct ice_pf *pf);
+void ice_lag_rebuild(struct ice_pf *pf);
+bool ice_lag_is_switchdev_running(struct ice_pf *pf);
+void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt);
+#endif /* _ICE_LAG_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
new file mode 100644
index 0000000000..89f986a75c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -0,0 +1,912 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LAN_TX_RX_H_
+#define _ICE_LAN_TX_RX_H_
+
+union ice_32byte_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+			/* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		struct {
+			struct {
+				__le16 mirroring_status;
+				__le16 l2tag1;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				__le32 fd_id; /* Flow Director filter ID */
+			} hi_dword;
+		} qword0;
+		struct {
+			/* status/error/PTYPE/length */
+			__le64 status_error_len;
+		} qword1;
+		struct {
+			__le16 ext_status; /* extended status */
+			__le16 rsvd;
+			__le16 l2tag2_1;
+			__le16 l2tag2_2;
+		} qword2;
+		struct {
+			__le32 reserved;
+			__le32 fd_id;
+		} qword3;
+	} wb; /* writeback */
+};
+
+struct ice_fltr_desc {
+	__le64 qidx_compq_space_stat;
+	__le64 dtype_cmd_vsi_fdid;
+};
+
+#define ICE_FXD_FLTR_QW0_QINDEX_S	0
+#define ICE_FXD_FLTR_QW0_QINDEX_M	(0x7FFULL << ICE_FXD_FLTR_QW0_QINDEX_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_S	11
+#define ICE_FXD_FLTR_QW0_COMP_Q_M	BIT_ULL(ICE_FXD_FLTR_QW0_COMP_Q_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_S	12
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_M	\
+				(0x3ULL << ICE_FXD_FLTR_QW0_COMP_REPORT_S)
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL	0x1ULL
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW		0x2ULL
+
+#define ICE_FXD_FLTR_QW0_FD_SPACE_S	14
+#define ICE_FXD_FLTR_QW0_FD_SPACE_M	(0x3ULL << ICE_FXD_FLTR_QW0_FD_SPACE_S)
+#define ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST		0x2ULL
+
+#define ICE_FXD_FLTR_QW0_STAT_CNT_S	16
+#define ICE_FXD_FLTR_QW0_STAT_CNT_M	\
+				(0x1FFFULL << ICE_FXD_FLTR_QW0_STAT_CNT_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_S	29
+#define ICE_FXD_FLTR_QW0_STAT_ENA_M	(0x3ULL << ICE_FXD_FLTR_QW0_STAT_ENA_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_PKTS		0x1ULL
+
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_S	31
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_M	BIT_ULL(ICE_FXD_FLTR_QW0_EVICT_ENA_S)
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE	0x0ULL
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE		0x1ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_S		32
+#define ICE_FXD_FLTR_QW0_TO_Q_M		(0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_S	35
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_PRI_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_PRIO1	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_S	38
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_M	\
+			(0x3ULL << ICE_FXD_FLTR_QW0_DPU_RECIPE_S)
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_DROP_S		40
+#define ICE_FXD_FLTR_QW0_DROP_M		BIT_ULL(ICE_FXD_FLTR_QW0_DROP_S)
+#define ICE_FXD_FLTR_QW0_DROP_NO	0x0ULL
+#define ICE_FXD_FLTR_QW0_DROP_YES	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_S	41
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW0_FLEX_PRI_S)
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_NONE	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_S	44
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_M	(0xFULL << ICE_FXD_FLTR_QW0_FLEX_MDID_S)
+#define ICE_FXD_FLTR_QW0_FLEX_MDID0	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_S	48
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_M	\
+				(0xFFFFULL << ICE_FXD_FLTR_QW0_FLEX_VAL_S)
+#define ICE_FXD_FLTR_QW0_FLEX_VAL0	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_DTYPE_S	0
+#define ICE_FXD_FLTR_QW1_DTYPE_M	(0xFULL << ICE_FXD_FLTR_QW1_DTYPE_S)
+#define ICE_FXD_FLTR_QW1_PCMD_S		4
+#define ICE_FXD_FLTR_QW1_PCMD_M		BIT_ULL(ICE_FXD_FLTR_QW1_PCMD_S)
+#define ICE_FXD_FLTR_QW1_PCMD_ADD	0x0ULL
+#define ICE_FXD_FLTR_QW1_PCMD_REMOVE	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_PRI_S	5
+#define ICE_FXD_FLTR_QW1_PROF_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_PROF_PRI_S)
+#define ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_S		8
+#define ICE_FXD_FLTR_QW1_PROF_M		(0x3FULL << ICE_FXD_FLTR_QW1_PROF_S)
+#define ICE_FXD_FLTR_QW1_PROF_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_FD_VSI_S	14
+#define ICE_FXD_FLTR_QW1_FD_VSI_M	(0x3FFULL << ICE_FXD_FLTR_QW1_FD_VSI_S)
+#define ICE_FXD_FLTR_QW1_SWAP_S		24
+#define ICE_FXD_FLTR_QW1_SWAP_M		BIT_ULL(ICE_FXD_FLTR_QW1_SWAP_S)
+#define ICE_FXD_FLTR_QW1_SWAP_NOT_SET	0x0ULL
+#define ICE_FXD_FLTR_QW1_SWAP_SET	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_PRI_S	25
+#define ICE_FXD_FLTR_QW1_FDID_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_FDID_PRI_S)
+#define ICE_FXD_FLTR_QW1_FDID_PRI_ONE	0x1ULL
+#define ICE_FXD_FLTR_QW1_FDID_PRI_THREE	0x3ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_MDID_S	28
+#define ICE_FXD_FLTR_QW1_FDID_MDID_M	(0xFULL << ICE_FXD_FLTR_QW1_FDID_MDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_MDID_FD	0x05ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_S		32
+#define ICE_FXD_FLTR_QW1_FDID_M		\
+			(0xFFFFFFFFULL << ICE_FXD_FLTR_QW1_FDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_ZERO	0x0ULL
+
+/* definition for FD filter programming status descriptor WB format */
+#define ICE_FXD_FLTR_WB_QW1_DD_S	0
+#define ICE_FXD_FLTR_WB_QW1_DD_M	(0x1ULL << ICE_FXD_FLTR_WB_QW1_DD_S)
+#define ICE_FXD_FLTR_WB_QW1_DD_YES	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_S	1
+#define ICE_FXD_FLTR_WB_QW1_PROG_ID_M	\
+				(0x3ULL << ICE_FXD_FLTR_WB_QW1_PROG_ID_S)
+#define ICE_FXD_FLTR_WB_QW1_PROG_ADD	0x0ULL
+#define ICE_FXD_FLTR_WB_QW1_PROG_DEL	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_S	4
+#define ICE_FXD_FLTR_WB_QW1_FAIL_M	(0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_YES	0x1ULL
+
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S	5
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M	\
+				(0x1ULL << ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S)
+#define ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES	0x1ULL
+
+struct ice_rx_ptype_decoded {
+	u32 known:1;
+	u32 outer_ip:1;
+	u32 outer_ip_ver:2;
+	u32 outer_frag:1;
+	u32 tunnel_type:3;
+	u32 tunnel_end_prot:2;
+	u32 tunnel_end_frag:1;
+	u32 inner_prot:4;
+	u32 payload_layer:3;
+};
+
+enum ice_rx_ptype_outer_ip {
+	ICE_RX_PTYPE_OUTER_L2	= 0,
+	ICE_RX_PTYPE_OUTER_IP	= 1,
+};
+
+enum ice_rx_ptype_outer_ip_ver {
+	ICE_RX_PTYPE_OUTER_NONE	= 0,
+	ICE_RX_PTYPE_OUTER_IPV4	= 1,
+	ICE_RX_PTYPE_OUTER_IPV6	= 2,
+};
+
+enum ice_rx_ptype_outer_fragmented {
+	ICE_RX_PTYPE_NOT_FRAG	= 0,
+	ICE_RX_PTYPE_FRAG	= 1,
+};
+
+enum ice_rx_ptype_tunnel_type {
+	ICE_RX_PTYPE_TUNNEL_NONE		= 0,
+	ICE_RX_PTYPE_TUNNEL_IP_IP		= 1,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT		= 2,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC	= 3,
+	ICE_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN	= 4,
+};
+
+enum ice_rx_ptype_tunnel_end_prot {
+	ICE_RX_PTYPE_TUNNEL_END_NONE	= 0,
+	ICE_RX_PTYPE_TUNNEL_END_IPV4	= 1,
+	ICE_RX_PTYPE_TUNNEL_END_IPV6	= 2,
+};
+
+enum ice_rx_ptype_inner_prot {
+	ICE_RX_PTYPE_INNER_PROT_NONE		= 0,
+	ICE_RX_PTYPE_INNER_PROT_UDP		= 1,
+	ICE_RX_PTYPE_INNER_PROT_TCP		= 2,
+	ICE_RX_PTYPE_INNER_PROT_SCTP		= 3,
+	ICE_RX_PTYPE_INNER_PROT_ICMP		= 4,
+	ICE_RX_PTYPE_INNER_PROT_TIMESYNC	= 5,
+};
+
+enum ice_rx_ptype_payload_layer {
+	ICE_RX_PTYPE_PAYLOAD_LAYER_NONE	= 0,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY2	= 1,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3	= 2,
+	ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4	= 3,
+};
+
+/* Rx Flex Descriptor
+ * This descriptor is used instead of the legacy version descriptor when
+ * ice_rlan_ctx.adv_desc is set
+ */
+union ice_32b_rx_flex_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+				 /* bit 0 of hdr_addr is DD bit */
+		__le64 rsvd1;
+		__le64 rsvd2;
+	} read;
+	struct {
+		/* Qword 0 */
+		u8 rxdid; /* descriptor builder profile ID */
+		u8 mir_id_umb_cast; /* mirror=[5:0], umb=[7:6] */
+		__le16 ptype_flex_flags0; /* ptype=[9:0], ff0=[15:10] */
+		__le16 pkt_len; /* [15:14] are reserved */
+		__le16 hdr_len_sph_flex_flags1; /* header=[10:0] */
+						/* sph=[11:11] */
+						/* ff1/ext=[15:12] */
+
+		/* Qword 1 */
+		__le16 status_error0;
+		__le16 l2tag1;
+		__le16 flex_meta0;
+		__le16 flex_meta1;
+
+		/* Qword 2 */
+		__le16 status_error1;
+		u8 flex_flags2;
+		u8 time_stamp_low;
+		__le16 l2tag2_1st;
+		__le16 l2tag2_2nd;
+
+		/* Qword 3 */
+		__le16 flex_meta2;
+		__le16 flex_meta3;
+		union {
+			struct {
+				__le16 flex_meta4;
+				__le16 flex_meta5;
+			} flex;
+			__le32 ts_high;
+		} flex_ts;
+	} wb; /* writeback */
+};
+
+/* Rx Flex Descriptor NIC Profile
+ * This descriptor corresponds to RxDID 2 which contains
+ * metadata fields for RSS, flow ID and timestamp info
+ */
+struct ice_32b_rx_flex_desc_nic {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le32 flow_id;
+	union {
+		struct {
+			__le16 vlan_id;
+			__le16 flow_id_ipv6;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Rx Flex Descriptor NIC Profile
+ * RxDID Profile ID 6
+ * Flex-field 0: RSS hash lower 16-bits
+ * Flex-field 1: RSS hash upper 16-bits
+ * Flex-field 2: Flow ID lower 16-bits
+ * Flex-field 3: Source VSI
+ * Flex-field 4: reserved, VLAN ID taken from L2Tag
+ */
+struct ice_32b_rx_flex_desc_nic_2 {
+	/* Qword 0 */
+	u8 rxdid;
+	u8 mir_id_umb_cast;
+	__le16 ptype_flexi_flags0;
+	__le16 pkt_len;
+	__le16 hdr_len_sph_flex_flags1;
+
+	/* Qword 1 */
+	__le16 status_error0;
+	__le16 l2tag1;
+	__le32 rss_hash;
+
+	/* Qword 2 */
+	__le16 status_error1;
+	u8 flexi_flags2;
+	u8 ts_low;
+	__le16 l2tag2_1st;
+	__le16 l2tag2_2nd;
+
+	/* Qword 3 */
+	__le16 flow_id;
+	__le16 src_vsi;
+	union {
+		struct {
+			__le16 rsvd;
+			__le16 flow_id_ipv6;
+		} flex;
+		__le32 ts_high;
+	} flex_ts;
+};
+
+/* Receive Flex Descriptor profile IDs: There are a total
+ * of 64 profiles where profile IDs 0/1 are for legacy; and
+ * profiles 2-63 are flex profiles that can be programmed
+ * with a specific metadata (profile 7 reserved for HW)
+ */
+enum ice_rxdid {
+	ICE_RXDID_LEGACY_0		= 0,
+	ICE_RXDID_LEGACY_1		= 1,
+	ICE_RXDID_FLEX_NIC		= 2,
+	ICE_RXDID_FLEX_NIC_2		= 6,
+	ICE_RXDID_HW			= 7,
+	ICE_RXDID_LAST			= 63,
+};
+
+/* Receive Flex Descriptor Rx opcode values */
+#define ICE_RX_OPC_MDID		0x01
+
+/* Receive Descriptor MDID values that access packet flags */
+enum ice_flex_mdid_pkt_flags {
+	ICE_RX_MDID_PKT_FLAGS_15_0	= 20,
+	ICE_RX_MDID_PKT_FLAGS_31_16,
+	ICE_RX_MDID_PKT_FLAGS_47_32,
+	ICE_RX_MDID_PKT_FLAGS_63_48,
+};
+
+/* Receive Descriptor MDID values */
+enum ice_flex_rx_mdid {
+	ICE_RX_MDID_FLOW_ID_LOWER	= 5,
+	ICE_RX_MDID_FLOW_ID_HIGH,
+	ICE_RX_MDID_SRC_VSI		= 19,
+	ICE_RX_MDID_HASH_LOW		= 56,
+	ICE_RX_MDID_HASH_HIGH,
+};
+
+/* Rx/Tx Flag64 packet flag bits */
+enum ice_flg64_bits {
+	ICE_FLG_PKT_DSI		= 0,
+	ICE_FLG_EVLAN_x8100	= 14,
+	ICE_FLG_EVLAN_x9100,
+	ICE_FLG_VLAN_x8100,
+	ICE_FLG_TNL_MAC		= 22,
+	ICE_FLG_TNL_VLAN,
+	ICE_FLG_PKT_FRG,
+	ICE_FLG_FIN		= 32,
+	ICE_FLG_SYN,
+	ICE_FLG_RST,
+	ICE_FLG_TNL0		= 38,
+	ICE_FLG_TNL1,
+	ICE_FLG_TNL2,
+	ICE_FLG_UDP_GRE,
+	ICE_FLG_RSVD		= 63
+};
+
+/* for ice_32byte_rx_flex_desc.ptype_flexi_flags0 member */
+#define ICE_RX_FLEX_DESC_PTYPE_M	(0x3FF) /* 10-bits */
+
+/* for ice_32byte_rx_flex_desc.pkt_length member */
+#define ICE_RX_FLX_DESC_PKT_LEN_M	(0x3FFF) /* 14-bits */
+
+enum ice_rx_flex_desc_status_error_0_bits {
+	/* Note: These are predefined bit offsets */
+	ICE_RX_FLEX_DESC_STATUS0_DD_S = 0,
+	ICE_RX_FLEX_DESC_STATUS0_EOF_S,
+	ICE_RX_FLEX_DESC_STATUS0_HBO_S,
+	ICE_RX_FLEX_DESC_STATUS0_L3L4P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S,
+	ICE_RX_FLEX_DESC_STATUS0_LPBK_S,
+	ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S,
+	ICE_RX_FLEX_DESC_STATUS0_RXE_S,
+	ICE_RX_FLEX_DESC_STATUS0_CRCP_S,
+	ICE_RX_FLEX_DESC_STATUS0_RSS_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD0_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_XTRMD1_VALID_S,
+	ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
+};
+
+enum ice_rx_flex_desc_status_error_1_bits {
+	/* Note: These are predefined bit offsets */
+	ICE_RX_FLEX_DESC_STATUS1_NAT_S = 4,
+	 /* [10:5] reserved */
+	ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S = 11,
+	ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */
+};
+
+#define ICE_RXQ_CTX_SIZE_DWORDS		8
+#define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
+#define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS	22
+#define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS	5
+#define GLTCLAN_CQ_CNTX(i, CQ)		(GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800))
+
+/* RLAN Rx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_rlan_ctx {
+	u16 head;
+	u16 cpuid; /* bigger than needed, see above for reason */
+#define ICE_RLAN_BASE_S 7
+	u64 base;
+	u16 qlen;
+#define ICE_RLAN_CTX_DBUF_S 7
+	u16 dbuf; /* bigger than needed, see above for reason */
+#define ICE_RLAN_CTX_HBUF_S 6
+	u16 hbuf; /* bigger than needed, see above for reason */
+	u8 dtype;
+	u8 dsize;
+	u8 crcstrip;
+	u8 l2tsel;
+	u8 hsplit_0;
+	u8 hsplit_1;
+	u8 showiv;
+	u32 rxmax; /* bigger than needed, see above for reason */
+	u8 tphrdesc_ena;
+	u8 tphwdesc_ena;
+	u8 tphdata_ena;
+	u8 tphhead_ena;
+	u16 lrxqthresh; /* bigger than needed, see above for reason */
+	u8 prefena;	/* NOTE: normally must be set to 1 at init */
+};
+
+struct ice_ctx_ele {
+	u16 offset;
+	u16 size_of;
+	u16 width;
+	u16 lsb;
+};
+
+#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) {	\
+	.offset = offsetof(struct _struct, _ele),	\
+	.size_of = sizeof_field(struct _struct, _ele),	\
+	.width = _width,				\
+	.lsb = _lsb,					\
+}
+
+/* for hsplit_0 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_0 {
+	ICE_RLAN_RX_HSPLIT_0_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_IP		= 2,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP	= 4,
+	ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP		= 8,
+};
+
+/* for hsplit_1 field of Rx RLAN context */
+enum ice_rlan_ctx_rx_hsplit_1 {
+	ICE_RLAN_RX_HSPLIT_1_NO_SPLIT		= 0,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_L2		= 1,
+	ICE_RLAN_RX_HSPLIT_1_SPLIT_ALWAYS	= 2,
+};
+
+/* Tx Descriptor */
+struct ice_tx_desc {
+	__le64 buf_addr; /* Address of descriptor's data buf */
+	__le64 cmd_type_offset_bsz;
+};
+
+enum ice_tx_desc_dtype_value {
+	ICE_TX_DESC_DTYPE_DATA		= 0x0,
+	ICE_TX_DESC_DTYPE_CTX		= 0x1,
+	ICE_TX_DESC_DTYPE_FLTR_PROG	= 0x8,
+	/* DESC_DONE - HW has completed write-back of descriptor */
+	ICE_TX_DESC_DTYPE_DESC_DONE	= 0xF,
+};
+
+#define ICE_TXD_QW1_CMD_S	4
+#define ICE_TXD_QW1_CMD_M	(0xFFFUL << ICE_TXD_QW1_CMD_S)
+
+enum ice_tx_desc_cmd_bits {
+	ICE_TX_DESC_CMD_EOP			= 0x0001,
+	ICE_TX_DESC_CMD_RS			= 0x0002,
+	ICE_TX_DESC_CMD_IL2TAG1			= 0x0008,
+	ICE_TX_DESC_CMD_DUMMY			= 0x0010,
+	ICE_TX_DESC_CMD_IIPT_IPV6		= 0x0020,
+	ICE_TX_DESC_CMD_IIPT_IPV4		= 0x0040,
+	ICE_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060,
+	ICE_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100,
+	ICE_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200,
+	ICE_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300,
+	ICE_TX_DESC_CMD_RE			= 0x0400,
+};
+
+#define ICE_TXD_QW1_OFFSET_S	16
+#define ICE_TXD_QW1_OFFSET_M	(0x3FFFFULL << ICE_TXD_QW1_OFFSET_S)
+
+enum ice_tx_desc_len_fields {
+	/* Note: These are predefined bit offsets */
+	ICE_TX_DESC_LEN_MACLEN_S	= 0, /* 7 BITS */
+	ICE_TX_DESC_LEN_IPLEN_S	= 7, /* 7 BITS */
+	ICE_TX_DESC_LEN_L4_LEN_S	= 14 /* 4 BITS */
+};
+
+#define ICE_TXD_QW1_MACLEN_M (0x7FUL << ICE_TX_DESC_LEN_MACLEN_S)
+#define ICE_TXD_QW1_IPLEN_M  (0x7FUL << ICE_TX_DESC_LEN_IPLEN_S)
+#define ICE_TXD_QW1_L4LEN_M  (0xFUL << ICE_TX_DESC_LEN_L4_LEN_S)
+
+/* Tx descriptor field limits in bytes */
+#define ICE_TXD_MACLEN_MAX ((ICE_TXD_QW1_MACLEN_M >> \
+			     ICE_TX_DESC_LEN_MACLEN_S) * ICE_BYTES_PER_WORD)
+#define ICE_TXD_IPLEN_MAX ((ICE_TXD_QW1_IPLEN_M >> \
+			    ICE_TX_DESC_LEN_IPLEN_S) * ICE_BYTES_PER_DWORD)
+#define ICE_TXD_L4LEN_MAX ((ICE_TXD_QW1_L4LEN_M >> \
+			    ICE_TX_DESC_LEN_L4_LEN_S) * ICE_BYTES_PER_DWORD)
+
+#define ICE_TXD_QW1_TX_BUF_SZ_S	34
+#define ICE_TXD_QW1_L2TAG1_S	48
+
+/* Context descriptors */
+struct ice_tx_ctx_desc {
+	__le32 tunneling_params;
+	__le16 l2tag2;
+	__le16 rsvd;
+	__le64 qw1;
+};
+
+#define ICE_TXD_CTX_QW1_CMD_S	4
+#define ICE_TXD_CTX_QW1_CMD_M	(0x7FUL << ICE_TXD_CTX_QW1_CMD_S)
+
+#define ICE_TXD_CTX_QW1_TSO_LEN_S	30
+#define ICE_TXD_CTX_QW1_TSO_LEN_M	\
+			(0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S)
+
+#define ICE_TXD_CTX_QW1_MSS_S	50
+#define ICE_TXD_CTX_MIN_MSS	64
+
+#define ICE_TXD_CTX_QW1_VSI_S	50
+#define ICE_TXD_CTX_QW1_VSI_M	(0x3FFULL << ICE_TXD_CTX_QW1_VSI_S)
+
+enum ice_tx_ctx_desc_cmd_bits {
+	ICE_TX_CTX_DESC_TSO		= 0x01,
+	ICE_TX_CTX_DESC_TSYN		= 0x02,
+	ICE_TX_CTX_DESC_IL2TAG2		= 0x04,
+	ICE_TX_CTX_DESC_IL2TAG2_IL2H	= 0x08,
+	ICE_TX_CTX_DESC_SWTCH_NOTAG	= 0x00,
+	ICE_TX_CTX_DESC_SWTCH_UPLINK	= 0x10,
+	ICE_TX_CTX_DESC_SWTCH_LOCAL	= 0x20,
+	ICE_TX_CTX_DESC_SWTCH_VSI	= 0x30,
+	ICE_TX_CTX_DESC_RESERVED	= 0x40
+};
+
+enum ice_tx_ctx_desc_eipt_offload {
+	ICE_TX_CTX_EIPT_NONE		= 0x0,
+	ICE_TX_CTX_EIPT_IPV6		= 0x1,
+	ICE_TX_CTX_EIPT_IPV4_NO_CSUM	= 0x2,
+	ICE_TX_CTX_EIPT_IPV4		= 0x3
+};
+
+#define ICE_TXD_CTX_QW0_EIPLEN_S	2
+
+#define ICE_TXD_CTX_QW0_L4TUNT_S	9
+
+#define ICE_TXD_CTX_UDP_TUNNELING	BIT_ULL(ICE_TXD_CTX_QW0_L4TUNT_S)
+#define ICE_TXD_CTX_GRE_TUNNELING	(0x2ULL << ICE_TXD_CTX_QW0_L4TUNT_S)
+
+#define ICE_TXD_CTX_QW0_NATLEN_S	12
+
+#define ICE_TXD_CTX_QW0_L4T_CS_S	23
+#define ICE_TXD_CTX_QW0_L4T_CS_M	BIT_ULL(ICE_TXD_CTX_QW0_L4T_CS_S)
+
+#define ICE_LAN_TXQ_MAX_QGRPS	127
+#define ICE_LAN_TXQ_MAX_QDIS	1023
+
+/* Tx queue context data
+ *
+ * The sizes of the variables may be larger than needed due to crossing byte
+ * boundaries. If we do not have the width of the variable set to the correct
+ * size then we could end up shifting bits off the top of the variable when the
+ * variable is at the top of a byte and crosses over into the next byte.
+ */
+struct ice_tlan_ctx {
+#define ICE_TLAN_CTX_BASE_S	7
+	u64 base;		/* base is defined in 128-byte units */
+	u8 port_num;
+	u16 cgd_num;		/* bigger than needed, see above for reason */
+	u8 pf_num;
+	u16 vmvf_num;
+	u8 vmvf_type;
+#define ICE_TLAN_CTX_VMVF_TYPE_VF	0
+#define ICE_TLAN_CTX_VMVF_TYPE_VMQ	1
+#define ICE_TLAN_CTX_VMVF_TYPE_PF	2
+	u16 src_vsi;
+	u8 tsyn_ena;
+	u8 internal_usage_flag;
+	u8 alt_vlan;
+	u16 cpuid;		/* bigger than needed, see above for reason */
+	u8 wb_mode;
+	u8 tphrd_desc;
+	u8 tphrd;
+	u8 tphwr_desc;
+	u16 cmpq_id;
+	u16 qnum_in_func;
+	u8 itr_notification_mode;
+	u8 adjust_prof_id;
+	u32 qlen;		/* bigger than needed, see above for reason */
+	u8 quanta_prof_idx;
+	u8 tso_ena;
+	u16 tso_qnum;
+	u8 legacy_int;
+	u8 drop_ena;
+	u8 cache_prof_idx;
+	u8 pkt_shaper_prof_idx;
+	u8 int_q_state;	/* width not needed - internal - DO NOT WRITE!!! */
+};
+
+/* The ice_ptype_lkup table is used to convert from the 10-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT ice_ptype_lkup[ptype].known
+ * THEN
+ *      Packet is unknown
+ * ELSE IF ice_ptype_lkup[ptype].outer_ip == ICE_RX_PTYPE_OUTER_IP
+ *      Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ *      Use the enum ice_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short, use explicit indexing with [PTYPE] */
+#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	[PTYPE] = { \
+		1, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP, \
+		ICE_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		ICE_RX_PTYPE_##OUTER_FRAG, \
+		ICE_RX_PTYPE_TUNNEL_##T, \
+		ICE_RX_PTYPE_TUNNEL_END_##TE, \
+		ICE_RX_PTYPE_##TEF, \
+		ICE_RX_PTYPE_INNER_PROT_##I, \
+		ICE_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define ICE_RX_PTYPE_NOF		ICE_RX_PTYPE_NOT_FRAG
+#define ICE_RX_PTYPE_FRG		ICE_RX_PTYPE_FRAG
+
+/* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */
+static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = {
+	/* L2 Packet types */
+	ICE_PTT_UNUSED_ENTRY(0),
+	ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	ICE_PTT_UNUSED_ENTRY(2),
+	ICE_PTT_UNUSED_ENTRY(3),
+	ICE_PTT_UNUSED_ENTRY(4),
+	ICE_PTT_UNUSED_ENTRY(5),
+	ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT_UNUSED_ENTRY(8),
+	ICE_PTT_UNUSED_ENTRY(9),
+	ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	ICE_PTT_UNUSED_ENTRY(12),
+	ICE_PTT_UNUSED_ENTRY(13),
+	ICE_PTT_UNUSED_ENTRY(14),
+	ICE_PTT_UNUSED_ENTRY(15),
+	ICE_PTT_UNUSED_ENTRY(16),
+	ICE_PTT_UNUSED_ENTRY(17),
+	ICE_PTT_UNUSED_ENTRY(18),
+	ICE_PTT_UNUSED_ENTRY(19),
+	ICE_PTT_UNUSED_ENTRY(20),
+	ICE_PTT_UNUSED_ENTRY(21),
+
+	/* Non Tunneled IPv4 */
+	ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(25),
+	ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv4 */
+	ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(32),
+	ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv6 */
+	ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(39),
+	ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT */
+	ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> IPv4 */
+	ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(47),
+	ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> IPv6 */
+	ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(54),
+	ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC */
+	ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+	ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(62),
+	ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+	ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(69),
+	ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC/VLAN */
+	ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+	ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(77),
+	ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+	ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(84),
+	ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* Non Tunneled IPv6 */
+	ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+	ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(91),
+	ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv4 */
+	ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(98),
+	ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv6 */
+	ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(105),
+	ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT */
+	ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> IPv4 */
+	ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(113),
+	ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> IPv6 */
+	ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(120),
+	ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC */
+	ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+	ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(128),
+	ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+	ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(135),
+	ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN */
+	ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+	ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(143),
+	ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+	ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	ICE_PTT_UNUSED_ENTRY(150),
+	ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* unused entries */
+	[154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+};
+
+static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype)
+{
+	return ice_ptype_lkup[ptype];
+}
+
+
+#endif /* _ICE_LAN_TX_RX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
new file mode 100644
index 0000000000..a66c3b6cce
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -0,0 +1,4089 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_flow.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_devlink.h"
+#include "ice_vsi_vlan_ops.h"
+
+/**
+ * ice_vsi_type_str - maps VSI type enum to string equivalents
+ * @vsi_type: VSI type enum
+ */
+const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
+{
+	switch (vsi_type) {
+	case ICE_VSI_PF:
+		return "ICE_VSI_PF";
+	case ICE_VSI_VF:
+		return "ICE_VSI_VF";
+	case ICE_VSI_CTRL:
+		return "ICE_VSI_CTRL";
+	case ICE_VSI_CHNL:
+		return "ICE_VSI_CHNL";
+	case ICE_VSI_LB:
+		return "ICE_VSI_LB";
+	case ICE_VSI_SWITCHDEV_CTRL:
+		return "ICE_VSI_SWITCHDEV_CTRL";
+	default:
+		return "unknown";
+	}
+}
+
+/**
+ * ice_vsi_ctrl_all_rx_rings - Start or stop a VSI's Rx rings
+ * @vsi: the VSI being configured
+ * @ena: start or stop the Rx rings
+ *
+ * First enable/disable all of the Rx rings, flush any remaining writes, and
+ * then verify that they have all been enabled/disabled successfully. This will
+ * let all of the register writes complete when enabling/disabling the Rx rings
+ * before waiting for the change in hardware to complete.
+ */
+static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena)
+{
+	int ret = 0;
+	u16 i;
+
+	ice_for_each_rxq(vsi, i)
+		ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false);
+
+	ice_flush(&vsi->back->hw);
+
+	ice_for_each_rxq(vsi, i) {
+		ret = ice_vsi_wait_one_rx_ring(vsi, ena, i);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the VSI
+ * @vsi: VSI pointer
+ *
+ * On error: returns error code (negative)
+ * On success: returns 0
+ */
+static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+	if (vsi->type == ICE_VSI_CHNL)
+		return 0;
+
+	/* allocate memory for both Tx and Rx ring pointers */
+	vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq,
+				     sizeof(*vsi->tx_rings), GFP_KERNEL);
+	if (!vsi->tx_rings)
+		return -ENOMEM;
+
+	vsi->rx_rings = devm_kcalloc(dev, vsi->alloc_rxq,
+				     sizeof(*vsi->rx_rings), GFP_KERNEL);
+	if (!vsi->rx_rings)
+		goto err_rings;
+
+	/* txq_map needs to have enough space to track both Tx (stack) rings
+	 * and XDP rings; at this point vsi->num_xdp_txq might not be set,
+	 * so use num_possible_cpus() as we want to always provide XDP ring
+	 * per CPU, regardless of queue count settings from user that might
+	 * have come from ethtool's set_channels() callback;
+	 */
+	vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
+				    sizeof(*vsi->txq_map), GFP_KERNEL);
+
+	if (!vsi->txq_map)
+		goto err_txq_map;
+
+	vsi->rxq_map = devm_kcalloc(dev, vsi->alloc_rxq,
+				    sizeof(*vsi->rxq_map), GFP_KERNEL);
+	if (!vsi->rxq_map)
+		goto err_rxq_map;
+
+	/* There is no need to allocate q_vectors for a loopback VSI. */
+	if (vsi->type == ICE_VSI_LB)
+		return 0;
+
+	/* allocate memory for q_vector pointers */
+	vsi->q_vectors = devm_kcalloc(dev, vsi->num_q_vectors,
+				      sizeof(*vsi->q_vectors), GFP_KERNEL);
+	if (!vsi->q_vectors)
+		goto err_vectors;
+
+	vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
+	if (!vsi->af_xdp_zc_qps)
+		goto err_zc_qps;
+
+	return 0;
+
+err_zc_qps:
+	devm_kfree(dev, vsi->q_vectors);
+err_vectors:
+	devm_kfree(dev, vsi->rxq_map);
+err_rxq_map:
+	devm_kfree(dev, vsi->txq_map);
+err_txq_map:
+	devm_kfree(dev, vsi->rx_rings);
+err_rings:
+	devm_kfree(dev, vsi->tx_rings);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_set_num_desc - Set number of descriptors for queues on this VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
+{
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+	case ICE_VSI_SWITCHDEV_CTRL:
+	case ICE_VSI_CTRL:
+	case ICE_VSI_LB:
+		/* a user could change the values of num_[tr]x_desc using
+		 * ethtool -G so we should keep those values instead of
+		 * overwriting them with the defaults.
+		 */
+		if (!vsi->num_rx_desc)
+			vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
+		if (!vsi->num_tx_desc)
+			vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(vsi->back), "Not setting number of Tx/Rx descriptors for VSI type %d\n",
+			vsi->type);
+		break;
+	}
+}
+
+/**
+ * ice_vsi_set_num_qs - Set number of queues, descriptors and vectors for a VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ */
+static void ice_vsi_set_num_qs(struct ice_vsi *vsi)
+{
+	enum ice_vsi_type vsi_type = vsi->type;
+	struct ice_pf *pf = vsi->back;
+	struct ice_vf *vf = vsi->vf;
+
+	if (WARN_ON(vsi_type == ICE_VSI_VF && !vf))
+		return;
+
+	switch (vsi_type) {
+	case ICE_VSI_PF:
+		if (vsi->req_txq) {
+			vsi->alloc_txq = vsi->req_txq;
+			vsi->num_txq = vsi->req_txq;
+		} else {
+			vsi->alloc_txq = min3(pf->num_lan_msix,
+					      ice_get_avail_txq_count(pf),
+					      (u16)num_online_cpus());
+		}
+
+		pf->num_lan_tx = vsi->alloc_txq;
+
+		/* only 1 Rx queue unless RSS is enabled */
+		if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+			vsi->alloc_rxq = 1;
+		} else {
+			if (vsi->req_rxq) {
+				vsi->alloc_rxq = vsi->req_rxq;
+				vsi->num_rxq = vsi->req_rxq;
+			} else {
+				vsi->alloc_rxq = min3(pf->num_lan_msix,
+						      ice_get_avail_rxq_count(pf),
+						      (u16)num_online_cpus());
+			}
+		}
+
+		pf->num_lan_rx = vsi->alloc_rxq;
+
+		vsi->num_q_vectors = min_t(int, pf->num_lan_msix,
+					   max_t(int, vsi->alloc_rxq,
+						 vsi->alloc_txq));
+		break;
+	case ICE_VSI_SWITCHDEV_CTRL:
+		/* The number of queues for ctrl VSI is equal to number of VFs.
+		 * Each ring is associated to the corresponding VF_PR netdev.
+		 */
+		vsi->alloc_txq = ice_get_num_vfs(pf);
+		vsi->alloc_rxq = vsi->alloc_txq;
+		vsi->num_q_vectors = 1;
+		break;
+	case ICE_VSI_VF:
+		if (vf->num_req_qs)
+			vf->num_vf_qs = vf->num_req_qs;
+		vsi->alloc_txq = vf->num_vf_qs;
+		vsi->alloc_rxq = vf->num_vf_qs;
+		/* pf->vfs.num_msix_per includes (VF miscellaneous vector +
+		 * data queue interrupts). Since vsi->num_q_vectors is number
+		 * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
+		 * original vector count
+		 */
+		vsi->num_q_vectors = pf->vfs.num_msix_per - ICE_NONQ_VECS_VF;
+		break;
+	case ICE_VSI_CTRL:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
+		vsi->num_q_vectors = 1;
+		break;
+	case ICE_VSI_CHNL:
+		vsi->alloc_txq = 0;
+		vsi->alloc_rxq = 0;
+		break;
+	case ICE_VSI_LB:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
+		break;
+	default:
+		dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n", vsi_type);
+		break;
+	}
+
+	ice_vsi_set_num_desc(vsi);
+}
+
+/**
+ * ice_get_free_slot - get the next non-NULL location index in array
+ * @array: array to search
+ * @size: size of the array
+ * @curr: last known occupied index to be used as a search hint
+ *
+ * void * is being used to keep the functionality generic. This lets us use this
+ * function on any array of pointers.
+ */
+static int ice_get_free_slot(void *array, int size, int curr)
+{
+	int **tmp_array = (int **)array;
+	int next;
+
+	if (curr < (size - 1) && !tmp_array[curr + 1]) {
+		next = curr + 1;
+	} else {
+		int i = 0;
+
+		while ((i < size) && (tmp_array[i]))
+			i++;
+		if (i == size)
+			next = ICE_NO_VSI;
+		else
+			next = i;
+	}
+	return next;
+}
+
+/**
+ * ice_vsi_delete_from_hw - delete a VSI from the switch
+ * @vsi: pointer to VSI being removed
+ */
+static void ice_vsi_delete_from_hw(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_vsi_ctx *ctxt;
+	int status;
+
+	ice_fltr_remove_all(vsi);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return;
+
+	if (vsi->type == ICE_VSI_VF)
+		ctxt->vf_num = vsi->vf->vf_id;
+	ctxt->vsi_num = vsi->vsi_num;
+
+	memcpy(&ctxt->info, &vsi->info, sizeof(ctxt->info));
+
+	status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
+	if (status)
+		dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n",
+			vsi->vsi_num, status);
+
+	kfree(ctxt);
+}
+
+/**
+ * ice_vsi_free_arrays - De-allocate queue and vector pointer arrays for the VSI
+ * @vsi: pointer to VSI being cleared
+ */
+static void ice_vsi_free_arrays(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	bitmap_free(vsi->af_xdp_zc_qps);
+	vsi->af_xdp_zc_qps = NULL;
+	/* free the ring and vector containers */
+	devm_kfree(dev, vsi->q_vectors);
+	vsi->q_vectors = NULL;
+	devm_kfree(dev, vsi->tx_rings);
+	vsi->tx_rings = NULL;
+	devm_kfree(dev, vsi->rx_rings);
+	vsi->rx_rings = NULL;
+	devm_kfree(dev, vsi->txq_map);
+	vsi->txq_map = NULL;
+	devm_kfree(dev, vsi->rxq_map);
+	vsi->rxq_map = NULL;
+}
+
+/**
+ * ice_vsi_free_stats - Free the ring statistics structures
+ * @vsi: VSI pointer
+ */
+static void ice_vsi_free_stats(struct ice_vsi *vsi)
+{
+	struct ice_vsi_stats *vsi_stat;
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	if (vsi->type == ICE_VSI_CHNL)
+		return;
+	if (!pf->vsi_stats)
+		return;
+
+	vsi_stat = pf->vsi_stats[vsi->idx];
+	if (!vsi_stat)
+		return;
+
+	ice_for_each_alloc_txq(vsi, i) {
+		if (vsi_stat->tx_ring_stats[i]) {
+			kfree_rcu(vsi_stat->tx_ring_stats[i], rcu);
+			WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL);
+		}
+	}
+
+	ice_for_each_alloc_rxq(vsi, i) {
+		if (vsi_stat->rx_ring_stats[i]) {
+			kfree_rcu(vsi_stat->rx_ring_stats[i], rcu);
+			WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL);
+		}
+	}
+
+	kfree(vsi_stat->tx_ring_stats);
+	kfree(vsi_stat->rx_ring_stats);
+	kfree(vsi_stat);
+	pf->vsi_stats[vsi->idx] = NULL;
+}
+
+/**
+ * ice_vsi_alloc_ring_stats - Allocates Tx and Rx ring stats for the VSI
+ * @vsi: VSI which is having stats allocated
+ */
+static int ice_vsi_alloc_ring_stats(struct ice_vsi *vsi)
+{
+	struct ice_ring_stats **tx_ring_stats;
+	struct ice_ring_stats **rx_ring_stats;
+	struct ice_vsi_stats *vsi_stats;
+	struct ice_pf *pf = vsi->back;
+	u16 i;
+
+	vsi_stats = pf->vsi_stats[vsi->idx];
+	tx_ring_stats = vsi_stats->tx_ring_stats;
+	rx_ring_stats = vsi_stats->rx_ring_stats;
+
+	/* Allocate Tx ring stats */
+	ice_for_each_alloc_txq(vsi, i) {
+		struct ice_ring_stats *ring_stats;
+		struct ice_tx_ring *ring;
+
+		ring = vsi->tx_rings[i];
+		ring_stats = tx_ring_stats[i];
+
+		if (!ring_stats) {
+			ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL);
+			if (!ring_stats)
+				goto err_out;
+
+			WRITE_ONCE(tx_ring_stats[i], ring_stats);
+		}
+
+		ring->ring_stats = ring_stats;
+	}
+
+	/* Allocate Rx ring stats */
+	ice_for_each_alloc_rxq(vsi, i) {
+		struct ice_ring_stats *ring_stats;
+		struct ice_rx_ring *ring;
+
+		ring = vsi->rx_rings[i];
+		ring_stats = rx_ring_stats[i];
+
+		if (!ring_stats) {
+			ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL);
+			if (!ring_stats)
+				goto err_out;
+
+			WRITE_ONCE(rx_ring_stats[i], ring_stats);
+		}
+
+		ring->ring_stats = ring_stats;
+	}
+
+	return 0;
+
+err_out:
+	ice_vsi_free_stats(vsi);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_free - clean up and deallocate the provided VSI
+ * @vsi: pointer to VSI being cleared
+ *
+ * This deallocates the VSI's queue resources, removes it from the PF's
+ * VSI array if necessary, and deallocates the VSI
+ */
+static void ice_vsi_free(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = NULL;
+	struct device *dev;
+
+	if (!vsi || !vsi->back)
+		return;
+
+	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
+
+	if (!pf->vsi[vsi->idx] || pf->vsi[vsi->idx] != vsi) {
+		dev_dbg(dev, "vsi does not exist at pf->vsi[%d]\n", vsi->idx);
+		return;
+	}
+
+	mutex_lock(&pf->sw_mutex);
+	/* updates the PF for this cleared VSI */
+
+	pf->vsi[vsi->idx] = NULL;
+	pf->next_vsi = vsi->idx;
+
+	ice_vsi_free_stats(vsi);
+	ice_vsi_free_arrays(vsi);
+	mutex_unlock(&pf->sw_mutex);
+	devm_kfree(dev, vsi);
+}
+
+void ice_vsi_delete(struct ice_vsi *vsi)
+{
+	ice_vsi_delete_from_hw(vsi);
+	ice_vsi_free(vsi);
+}
+
+/**
+ * ice_msix_clean_ctrl_vsi - MSIX mode interrupt handler for ctrl VSI
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_ctrl_vsi(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.tx_ring)
+		return IRQ_HANDLED;
+
+#define FDIR_RX_DESC_CLEAN_BUDGET 64
+	ice_clean_rx_irq(q_vector->rx.rx_ring, FDIR_RX_DESC_CLEAN_BUDGET);
+	ice_clean_ctrl_tx_irq(q_vector->tx.tx_ring);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ice_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
+		return IRQ_HANDLED;
+
+	q_vector->total_events++;
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+	struct ice_pf *pf = q_vector->vsi->back;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring)
+		return IRQ_HANDLED;
+
+	rcu_read_lock();
+	ice_for_each_vf_rcu(pf, bkt, vf)
+		napi_schedule(&vf->repr->q_vector->napi);
+	rcu_read_unlock();
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ice_vsi_alloc_stat_arrays - Allocate statistics arrays
+ * @vsi: VSI pointer
+ */
+static int ice_vsi_alloc_stat_arrays(struct ice_vsi *vsi)
+{
+	struct ice_vsi_stats *vsi_stat;
+	struct ice_pf *pf = vsi->back;
+
+	if (vsi->type == ICE_VSI_CHNL)
+		return 0;
+	if (!pf->vsi_stats)
+		return -ENOENT;
+
+	if (pf->vsi_stats[vsi->idx])
+	/* realloc will happen in rebuild path */
+		return 0;
+
+	vsi_stat = kzalloc(sizeof(*vsi_stat), GFP_KERNEL);
+	if (!vsi_stat)
+		return -ENOMEM;
+
+	vsi_stat->tx_ring_stats =
+		kcalloc(vsi->alloc_txq, sizeof(*vsi_stat->tx_ring_stats),
+			GFP_KERNEL);
+	if (!vsi_stat->tx_ring_stats)
+		goto err_alloc_tx;
+
+	vsi_stat->rx_ring_stats =
+		kcalloc(vsi->alloc_rxq, sizeof(*vsi_stat->rx_ring_stats),
+			GFP_KERNEL);
+	if (!vsi_stat->rx_ring_stats)
+		goto err_alloc_rx;
+
+	pf->vsi_stats[vsi->idx] = vsi_stat;
+
+	return 0;
+
+err_alloc_rx:
+	kfree(vsi_stat->rx_ring_stats);
+err_alloc_tx:
+	kfree(vsi_stat->tx_ring_stats);
+	kfree(vsi_stat);
+	pf->vsi_stats[vsi->idx] = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_alloc_def - set default values for already allocated VSI
+ * @vsi: ptr to VSI
+ * @ch: ptr to channel
+ */
+static int
+ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+	if (vsi->type != ICE_VSI_CHNL) {
+		ice_vsi_set_num_qs(vsi);
+		if (ice_vsi_alloc_arrays(vsi))
+			return -ENOMEM;
+	}
+
+	switch (vsi->type) {
+	case ICE_VSI_SWITCHDEV_CTRL:
+		/* Setup eswitch MSIX irq handler for VSI */
+		vsi->irq_handler = ice_eswitch_msix_clean_rings;
+		break;
+	case ICE_VSI_PF:
+		/* Setup default MSIX irq handler for VSI */
+		vsi->irq_handler = ice_msix_clean_rings;
+		break;
+	case ICE_VSI_CTRL:
+		/* Setup ctrl VSI MSIX irq handler */
+		vsi->irq_handler = ice_msix_clean_ctrl_vsi;
+		break;
+	case ICE_VSI_CHNL:
+		if (!ch)
+			return -EINVAL;
+
+		vsi->num_rxq = ch->num_rxq;
+		vsi->num_txq = ch->num_txq;
+		vsi->next_base_q = ch->base_q;
+		break;
+	case ICE_VSI_VF:
+	case ICE_VSI_LB:
+		break;
+	default:
+		ice_vsi_free_arrays(vsi);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_alloc - Allocates the next available struct VSI in the PF
+ * @pf: board private structure
+ *
+ * Reserves a VSI index from the PF and allocates an empty VSI structure
+ * without a type. The VSI structure must later be initialized by calling
+ * ice_vsi_cfg().
+ *
+ * returns a pointer to a VSI on success, NULL on failure.
+ */
+static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *vsi = NULL;
+
+	/* Need to protect the allocation of the VSIs at the PF level */
+	mutex_lock(&pf->sw_mutex);
+
+	/* If we have already allocated our maximum number of VSIs,
+	 * pf->next_vsi will be ICE_NO_VSI. If not, pf->next_vsi index
+	 * is available to be populated
+	 */
+	if (pf->next_vsi == ICE_NO_VSI) {
+		dev_dbg(dev, "out of VSI slots!\n");
+		goto unlock_pf;
+	}
+
+	vsi = devm_kzalloc(dev, sizeof(*vsi), GFP_KERNEL);
+	if (!vsi)
+		goto unlock_pf;
+
+	vsi->back = pf;
+	set_bit(ICE_VSI_DOWN, vsi->state);
+
+	/* fill slot and make note of the index */
+	vsi->idx = pf->next_vsi;
+	pf->vsi[pf->next_vsi] = vsi;
+
+	/* prepare pf->next_vsi for next use */
+	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
+					 pf->next_vsi);
+
+unlock_pf:
+	mutex_unlock(&pf->sw_mutex);
+	return vsi;
+}
+
+/**
+ * ice_alloc_fd_res - Allocate FD resource for a VSI
+ * @vsi: pointer to the ice_vsi
+ *
+ * This allocates the FD resources
+ *
+ * Returns 0 on success, -EPERM on no-op or -EIO on failure
+ */
+static int ice_alloc_fd_res(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u32 g_val, b_val;
+
+	/* Flow Director filters are only allocated/assigned to the PF VSI or
+	 * CHNL VSI which passes the traffic. The CTRL VSI is only used to
+	 * add/delete filters so resources are not allocated to it
+	 */
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EPERM;
+
+	if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF ||
+	      vsi->type == ICE_VSI_CHNL))
+		return -EPERM;
+
+	/* FD filters from guaranteed pool per VSI */
+	g_val = pf->hw.func_caps.fd_fltr_guar;
+	if (!g_val)
+		return -EPERM;
+
+	/* FD filters from best effort pool */
+	b_val = pf->hw.func_caps.fd_fltr_best_effort;
+	if (!b_val)
+		return -EPERM;
+
+	/* PF main VSI gets only 64 FD resources from guaranteed pool
+	 * when ADQ is configured.
+	 */
+#define ICE_PF_VSI_GFLTR	64
+
+	/* determine FD filter resources per VSI from shared(best effort) and
+	 * dedicated pool
+	 */
+	if (vsi->type == ICE_VSI_PF) {
+		vsi->num_gfltr = g_val;
+		/* if MQPRIO is configured, main VSI doesn't get all FD
+		 * resources from guaranteed pool. PF VSI gets 64 FD resources
+		 */
+		if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+			if (g_val < ICE_PF_VSI_GFLTR)
+				return -EPERM;
+			/* allow bare minimum entries for PF VSI */
+			vsi->num_gfltr = ICE_PF_VSI_GFLTR;
+		}
+
+		/* each VSI gets same "best_effort" quota */
+		vsi->num_bfltr = b_val;
+	} else if (vsi->type == ICE_VSI_VF) {
+		vsi->num_gfltr = 0;
+
+		/* each VSI gets same "best_effort" quota */
+		vsi->num_bfltr = b_val;
+	} else {
+		struct ice_vsi *main_vsi;
+		int numtc;
+
+		main_vsi = ice_get_main_vsi(pf);
+		if (!main_vsi)
+			return -EPERM;
+
+		if (!main_vsi->all_numtc)
+			return -EINVAL;
+
+		/* figure out ADQ numtc */
+		numtc = main_vsi->all_numtc - ICE_CHNL_START_TC;
+
+		/* only one TC but still asking resources for channels,
+		 * invalid config
+		 */
+		if (numtc < ICE_CHNL_START_TC)
+			return -EPERM;
+
+		g_val -= ICE_PF_VSI_GFLTR;
+		/* channel VSIs gets equal share from guaranteed pool */
+		vsi->num_gfltr = g_val / numtc;
+
+		/* each VSI gets same "best_effort" quota */
+		vsi->num_bfltr = b_val;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_get_qs - Assign queues from PF to VSI
+ * @vsi: the VSI to assign queues to
+ *
+ * Returns 0 on success and a negative value on error
+ */
+static int ice_vsi_get_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_qs_cfg tx_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_txqs,
+		.pf_map_size = pf->max_pf_txqs,
+		.q_count = vsi->alloc_txq,
+		.scatter_count = ICE_MAX_SCATTER_TXQS,
+		.vsi_map = vsi->txq_map,
+		.vsi_map_offset = 0,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	struct ice_qs_cfg rx_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_rxqs,
+		.pf_map_size = pf->max_pf_rxqs,
+		.q_count = vsi->alloc_rxq,
+		.scatter_count = ICE_MAX_SCATTER_RXQS,
+		.vsi_map = vsi->rxq_map,
+		.vsi_map_offset = 0,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	int ret;
+
+	if (vsi->type == ICE_VSI_CHNL)
+		return 0;
+
+	ret = __ice_vsi_get_qs(&tx_qs_cfg);
+	if (ret)
+		return ret;
+	vsi->tx_mapping_mode = tx_qs_cfg.mapping_mode;
+
+	ret = __ice_vsi_get_qs(&rx_qs_cfg);
+	if (ret)
+		return ret;
+	vsi->rx_mapping_mode = rx_qs_cfg.mapping_mode;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_put_qs - Release queues from VSI to PF
+ * @vsi: the VSI that is going to release queues
+ */
+static void ice_vsi_put_qs(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	mutex_lock(&pf->avail_q_mutex);
+
+	ice_for_each_alloc_txq(vsi, i) {
+		clear_bit(vsi->txq_map[i], pf->avail_txqs);
+		vsi->txq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	ice_for_each_alloc_rxq(vsi, i) {
+		clear_bit(vsi->rxq_map[i], pf->avail_rxqs);
+		vsi->rxq_map[i] = ICE_INVAL_Q_INDEX;
+	}
+
+	mutex_unlock(&pf->avail_q_mutex);
+}
+
+/**
+ * ice_is_safe_mode
+ * @pf: pointer to the PF struct
+ *
+ * returns true if driver is in safe mode, false otherwise
+ */
+bool ice_is_safe_mode(struct ice_pf *pf)
+{
+	return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_is_rdma_ena
+ * @pf: pointer to the PF struct
+ *
+ * returns true if RDMA is currently supported, false otherwise
+ */
+bool ice_is_rdma_ena(struct ice_pf *pf)
+{
+	return test_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+}
+
+/**
+ * ice_vsi_clean_rss_flow_fld - Delete RSS configuration
+ * @vsi: the VSI being cleaned up
+ *
+ * This function deletes RSS input set for all flows that were configured
+ * for this VSI
+ */
+static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int status;
+
+	if (ice_is_safe_mode(pf))
+		return;
+
+	status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
+	if (status)
+		dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %d\n",
+			vsi->vsi_num, status);
+}
+
+/**
+ * ice_rss_clean - Delete RSS related VSI structures and configuration
+ * @vsi: the VSI being removed
+ */
+static void ice_rss_clean(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	devm_kfree(dev, vsi->rss_hkey_user);
+	devm_kfree(dev, vsi->rss_lut_user);
+
+	ice_vsi_clean_rss_flow_fld(vsi);
+	/* remove RSS replay list */
+	if (!ice_is_safe_mode(pf))
+		ice_rem_vsi_rss_list(&pf->hw, vsi->idx);
+}
+
+/**
+ * ice_vsi_set_rss_params - Setup RSS capabilities per VSI type
+ * @vsi: the VSI being configured
+ */
+static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
+{
+	struct ice_hw_common_caps *cap;
+	struct ice_pf *pf = vsi->back;
+	u16 max_rss_size;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		vsi->rss_size = 1;
+		return;
+	}
+
+	cap = &pf->hw.func_caps.common_cap;
+	max_rss_size = BIT(cap->rss_table_entry_width);
+	switch (vsi->type) {
+	case ICE_VSI_CHNL:
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		vsi->rss_table_size = (u16)cap->rss_table_size;
+		if (vsi->type == ICE_VSI_CHNL)
+			vsi->rss_size = min_t(u16, vsi->num_rxq, max_rss_size);
+		else
+			vsi->rss_size = min_t(u16, num_online_cpus(),
+					      max_rss_size);
+		vsi->rss_lut_type = ICE_LUT_PF;
+		break;
+	case ICE_VSI_SWITCHDEV_CTRL:
+		vsi->rss_table_size = ICE_LUT_VSI_SIZE;
+		vsi->rss_size = min_t(u16, num_online_cpus(), max_rss_size);
+		vsi->rss_lut_type = ICE_LUT_VSI;
+		break;
+	case ICE_VSI_VF:
+		/* VF VSI will get a small RSS table.
+		 * For VSI_LUT, LUT size should be set to 64 bytes.
+		 */
+		vsi->rss_table_size = ICE_LUT_VSI_SIZE;
+		vsi->rss_size = ICE_MAX_RSS_QS_PER_VF;
+		vsi->rss_lut_type = ICE_LUT_VSI;
+		break;
+	case ICE_VSI_LB:
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(pf), "Unsupported VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
+		break;
+	}
+}
+
+/**
+ * ice_set_dflt_vsi_ctx - Set default VSI context before adding a VSI
+ * @hw: HW structure used to determine the VLAN mode of the device
+ * @ctxt: the VSI context being set
+ *
+ * This initializes a default VSI context for all sections except the Queues.
+ */
+static void ice_set_dflt_vsi_ctx(struct ice_hw *hw, struct ice_vsi_ctx *ctxt)
+{
+	u32 table = 0;
+
+	memset(&ctxt->info, 0, sizeof(ctxt->info));
+	/* VSI's should be allocated from shared pool */
+	ctxt->alloc_from_pool = true;
+	/* Src pruning enabled by default */
+	ctxt->info.sw_flags = ICE_AQ_VSI_SW_FLAG_SRC_PRUNE;
+	/* Traffic from VSI can be sent to LAN */
+	ctxt->info.sw_flags2 = ICE_AQ_VSI_SW_FLAG_LAN_ENA;
+	/* allow all untagged/tagged packets by default on Tx */
+	ctxt->info.inner_vlan_flags = ((ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL &
+				  ICE_AQ_VSI_INNER_VLAN_TX_MODE_M) >>
+				 ICE_AQ_VSI_INNER_VLAN_TX_MODE_S);
+	/* SVM - by default bits 3 and 4 in inner_vlan_flags are 0's which
+	 * results in legacy behavior (show VLAN, DEI, and UP) in descriptor.
+	 *
+	 * DVM - leave inner VLAN in packet by default
+	 */
+	if (ice_is_dvm_ena(hw)) {
+		ctxt->info.inner_vlan_flags |=
+			ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+		ctxt->info.outer_vlan_flags =
+			(ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+			 ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+			ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M;
+		ctxt->info.outer_vlan_flags |=
+			(ICE_AQ_VSI_OUTER_TAG_VLAN_8100 <<
+			 ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+			ICE_AQ_VSI_OUTER_TAG_TYPE_M;
+		ctxt->info.outer_vlan_flags |=
+			FIELD_PREP(ICE_AQ_VSI_OUTER_VLAN_EMODE_M,
+				   ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING);
+	}
+	/* Have 1:1 UP mapping for both ingress/egress tables */
+	table |= ICE_UP_TABLE_TRANSLATE(0, 0);
+	table |= ICE_UP_TABLE_TRANSLATE(1, 1);
+	table |= ICE_UP_TABLE_TRANSLATE(2, 2);
+	table |= ICE_UP_TABLE_TRANSLATE(3, 3);
+	table |= ICE_UP_TABLE_TRANSLATE(4, 4);
+	table |= ICE_UP_TABLE_TRANSLATE(5, 5);
+	table |= ICE_UP_TABLE_TRANSLATE(6, 6);
+	table |= ICE_UP_TABLE_TRANSLATE(7, 7);
+	ctxt->info.ingress_table = cpu_to_le32(table);
+	ctxt->info.egress_table = cpu_to_le32(table);
+	/* Have 1:1 UP mapping for outer to inner UP table */
+	ctxt->info.outer_up_table = cpu_to_le32(table);
+	/* No Outer tag support outer_tag_flags remains to zero */
+}
+
+/**
+ * ice_vsi_setup_q_map - Setup a VSI queue map
+ * @vsi: the VSI being configured
+ * @ctxt: VSI context structure
+ */
+static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+	u16 offset = 0, qmap = 0, tx_count = 0, rx_count = 0, pow = 0;
+	u16 num_txq_per_tc, num_rxq_per_tc;
+	u16 qcount_tx = vsi->alloc_txq;
+	u16 qcount_rx = vsi->alloc_rxq;
+	u8 netdev_tc = 0;
+	int i;
+
+	if (!vsi->tc_cfg.numtc) {
+		/* at least TC0 should be enabled by default */
+		vsi->tc_cfg.numtc = 1;
+		vsi->tc_cfg.ena_tc = 1;
+	}
+
+	num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC);
+	if (!num_rxq_per_tc)
+		num_rxq_per_tc = 1;
+	num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc;
+	if (!num_txq_per_tc)
+		num_txq_per_tc = 1;
+
+	/* find the (rounded up) power-of-2 of qcount */
+	pow = (u16)order_base_2(num_rxq_per_tc);
+
+	/* TC mapping is a function of the number of Rx queues assigned to the
+	 * VSI for each traffic class and the offset of these queues.
+	 * The first 10 bits are for queue offset for TC0, next 4 bits for no:of
+	 * queues allocated to TC0. No:of queues is a power-of-2.
+	 *
+	 * If TC is not enabled, the queue offset is set to 0, and allocate one
+	 * queue, this way, traffic for the given TC will be sent to the default
+	 * queue.
+	 *
+	 * Setup number and offset of Rx queues for all TCs for the VSI
+	 */
+	ice_for_each_traffic_class(i) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+			/* TC is not enabled */
+			vsi->tc_cfg.tc_info[i].qoffset = 0;
+			vsi->tc_cfg.tc_info[i].qcount_rx = 1;
+			vsi->tc_cfg.tc_info[i].qcount_tx = 1;
+			vsi->tc_cfg.tc_info[i].netdev_tc = 0;
+			ctxt->info.tc_mapping[i] = 0;
+			continue;
+		}
+
+		/* TC is enabled */
+		vsi->tc_cfg.tc_info[i].qoffset = offset;
+		vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc;
+		vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc;
+		vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
+
+		qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+			ICE_AQ_VSI_TC_Q_OFFSET_M) |
+			((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+			 ICE_AQ_VSI_TC_Q_NUM_M);
+		offset += num_rxq_per_tc;
+		tx_count += num_txq_per_tc;
+		ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+	}
+
+	/* if offset is non-zero, means it is calculated correctly based on
+	 * enabled TCs for a given VSI otherwise qcount_rx will always
+	 * be correct and non-zero because it is based off - VSI's
+	 * allocated Rx queues which is at least 1 (hence qcount_tx will be
+	 * at least 1)
+	 */
+	if (offset)
+		rx_count = offset;
+	else
+		rx_count = num_rxq_per_tc;
+
+	if (rx_count > vsi->alloc_rxq) {
+		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+			rx_count, vsi->alloc_rxq);
+		return -EINVAL;
+	}
+
+	if (tx_count > vsi->alloc_txq) {
+		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+			tx_count, vsi->alloc_txq);
+		return -EINVAL;
+	}
+
+	vsi->num_txq = tx_count;
+	vsi->num_rxq = rx_count;
+
+	if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
+		dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
+		/* since there is a chance that num_rxq could have been changed
+		 * in the above for loop, make num_txq equal to num_rxq.
+		 */
+		vsi->num_txq = vsi->num_rxq;
+	}
+
+	/* Rx queue mapping */
+	ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+	/* q_mapping buffer holds the info for the first queue allocated for
+	 * this VSI in the PF space and also the number of queues associated
+	 * with this VSI.
+	 */
+	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+	ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+
+	return 0;
+}
+
+/**
+ * ice_set_fd_vsi_ctx - Set FD VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 dflt_q_group, dflt_q_prio;
+	u16 dflt_q, report_q, val;
+
+	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL &&
+	    vsi->type != ICE_VSI_VF && vsi->type != ICE_VSI_CHNL)
+		return;
+
+	val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
+	ctxt->info.valid_sections |= cpu_to_le16(val);
+	dflt_q = 0;
+	dflt_q_group = 0;
+	report_q = 0;
+	dflt_q_prio = 0;
+
+	/* enable flow director filtering/programming */
+	val = ICE_AQ_VSI_FD_ENABLE | ICE_AQ_VSI_FD_PROG_ENABLE;
+	ctxt->info.fd_options = cpu_to_le16(val);
+	/* max of allocated flow director filters */
+	ctxt->info.max_fd_fltr_dedicated =
+			cpu_to_le16(vsi->num_gfltr);
+	/* max of shared flow director filters any VSI may program */
+	ctxt->info.max_fd_fltr_shared =
+			cpu_to_le16(vsi->num_bfltr);
+	/* default queue index within the VSI of the default FD */
+	val = ((dflt_q << ICE_AQ_VSI_FD_DEF_Q_S) &
+	       ICE_AQ_VSI_FD_DEF_Q_M);
+	/* target queue or queue group to the FD filter */
+	val |= ((dflt_q_group << ICE_AQ_VSI_FD_DEF_GRP_S) &
+		ICE_AQ_VSI_FD_DEF_GRP_M);
+	ctxt->info.fd_def_q = cpu_to_le16(val);
+	/* queue index on which FD filter completion is reported */
+	val = ((report_q << ICE_AQ_VSI_FD_REPORT_Q_S) &
+	       ICE_AQ_VSI_FD_REPORT_Q_M);
+	/* priority of the default qindex action */
+	val |= ((dflt_q_prio << ICE_AQ_VSI_FD_DEF_PRIORITY_S) &
+		ICE_AQ_VSI_FD_DEF_PRIORITY_M);
+	ctxt->info.fd_report_opt = cpu_to_le16(val);
+}
+
+/**
+ * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 lut_type, hash_type;
+	struct device *dev;
+	struct ice_pf *pf;
+
+	pf = vsi->back;
+	dev = ice_pf_to_dev(pf);
+
+	switch (vsi->type) {
+	case ICE_VSI_CHNL:
+	case ICE_VSI_PF:
+		/* PF VSI will inherit RSS instance of PF */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	case ICE_VSI_VF:
+		/* VF VSI will gets a small RSS table which is a VSI LUT type */
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+		break;
+	default:
+		dev_dbg(dev, "Unsupported VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
+		return;
+	}
+
+	ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+				ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+				(hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+}
+
+static void
+ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+{
+	struct ice_pf *pf = vsi->back;
+	u16 qcount, qmap;
+	u8 offset = 0;
+	int pow;
+
+	qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix);
+
+	pow = order_base_2(qcount);
+	qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+		 ICE_AQ_VSI_TC_Q_OFFSET_M) |
+		 ((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
+		   ICE_AQ_VSI_TC_Q_NUM_M);
+
+	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+	ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG);
+	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->next_base_q);
+	ctxt->info.q_mapping[1] = cpu_to_le16(qcount);
+}
+
+/**
+ * ice_vsi_is_vlan_pruning_ena - check if VLAN pruning is enabled or not
+ * @vsi: VSI to check whether or not VLAN pruning is enabled.
+ *
+ * returns true if Rx VLAN pruning is enabled and false otherwise.
+ */
+static bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi)
+{
+	return vsi->info.sw_flags2 & ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+}
+
+/**
+ * ice_vsi_init - Create and initialize a VSI
+ * @vsi: the VSI being configured
+ * @vsi_flags: VSI configuration flags
+ *
+ * Set ICE_FLAG_VSI_INIT to initialize a new VSI context, clear it to
+ * reconfigure an existing context.
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command to create a new VSI.
+ */
+static int ice_vsi_init(struct ice_vsi *vsi, u32 vsi_flags)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vsi_ctx *ctxt;
+	struct device *dev;
+	int ret = 0;
+
+	dev = ice_pf_to_dev(pf);
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	switch (vsi->type) {
+	case ICE_VSI_CTRL:
+	case ICE_VSI_LB:
+	case ICE_VSI_PF:
+		ctxt->flags = ICE_AQ_VSI_TYPE_PF;
+		break;
+	case ICE_VSI_SWITCHDEV_CTRL:
+	case ICE_VSI_CHNL:
+		ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2;
+		break;
+	case ICE_VSI_VF:
+		ctxt->flags = ICE_AQ_VSI_TYPE_VF;
+		/* VF number here is the absolute VF number (0-255) */
+		ctxt->vf_num = vsi->vf->vf_id + hw->func_caps.vf_base_id;
+		break;
+	default:
+		ret = -ENODEV;
+		goto out;
+	}
+
+	/* Handle VLAN pruning for channel VSI if main VSI has VLAN
+	 * prune enabled
+	 */
+	if (vsi->type == ICE_VSI_CHNL) {
+		struct ice_vsi *main_vsi;
+
+		main_vsi = ice_get_main_vsi(pf);
+		if (main_vsi && ice_vsi_is_vlan_pruning_ena(main_vsi))
+			ctxt->info.sw_flags2 |=
+				ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+		else
+			ctxt->info.sw_flags2 &=
+				~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	}
+
+	ice_set_dflt_vsi_ctx(hw, ctxt);
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		ice_set_fd_vsi_ctx(ctxt, vsi);
+	/* if the switch is in VEB mode, allow VSI loopback */
+	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
+		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+
+	/* Set LUT type and HASH type if RSS is enabled */
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags) &&
+	    vsi->type != ICE_VSI_CTRL) {
+		ice_set_rss_vsi_ctx(ctxt, vsi);
+		/* if updating VSI context, make sure to set valid_section:
+		 * to indicate which section of VSI context being updated
+		 */
+		if (!(vsi_flags & ICE_VSI_FLAG_INIT))
+			ctxt->info.valid_sections |=
+				cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+	}
+
+	ctxt->info.sw_id = vsi->port_info->sw_id;
+	if (vsi->type == ICE_VSI_CHNL) {
+		ice_chnl_vsi_setup_q_map(vsi, ctxt);
+	} else {
+		ret = ice_vsi_setup_q_map(vsi, ctxt);
+		if (ret)
+			goto out;
+
+		if (!(vsi_flags & ICE_VSI_FLAG_INIT))
+			/* means VSI being updated */
+			/* must to indicate which section of VSI context are
+			 * being modified
+			 */
+			ctxt->info.valid_sections |=
+				cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+	}
+
+	/* Allow control frames out of main VSI */
+	if (vsi->type == ICE_VSI_PF) {
+		ctxt->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+		ctxt->info.valid_sections |=
+			cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+	}
+
+	if (vsi_flags & ICE_VSI_FLAG_INIT) {
+		ret = ice_add_vsi(hw, vsi->idx, ctxt, NULL);
+		if (ret) {
+			dev_err(dev, "Add VSI failed, err %d\n", ret);
+			ret = -EIO;
+			goto out;
+		}
+	} else {
+		ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+		if (ret) {
+			dev_err(dev, "Update VSI failed, err %d\n", ret);
+			ret = -EIO;
+			goto out;
+		}
+	}
+
+	/* keep context for update VSI operations */
+	vsi->info = ctxt->info;
+
+	/* record VSI number returned */
+	vsi->vsi_num = ctxt->vsi_num;
+
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI
+ * @vsi: the VSI having rings deallocated
+ */
+static void ice_vsi_clear_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	/* Avoid stale references by clearing map from vector to ring */
+	if (vsi->q_vectors) {
+		ice_for_each_q_vector(vsi, i) {
+			struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+			if (q_vector) {
+				q_vector->tx.tx_ring = NULL;
+				q_vector->rx.rx_ring = NULL;
+			}
+		}
+	}
+
+	if (vsi->tx_rings) {
+		ice_for_each_alloc_txq(vsi, i) {
+			if (vsi->tx_rings[i]) {
+				kfree_rcu(vsi->tx_rings[i], rcu);
+				WRITE_ONCE(vsi->tx_rings[i], NULL);
+			}
+		}
+	}
+	if (vsi->rx_rings) {
+		ice_for_each_alloc_rxq(vsi, i) {
+			if (vsi->rx_rings[i]) {
+				kfree_rcu(vsi->rx_rings[i], rcu);
+				WRITE_ONCE(vsi->rx_rings[i], NULL);
+			}
+		}
+	}
+}
+
+/**
+ * ice_vsi_alloc_rings - Allocates Tx and Rx rings for the VSI
+ * @vsi: VSI which is having rings allocated
+ */
+static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
+{
+	bool dvm_ena = ice_is_dvm_ena(&vsi->back->hw);
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u16 i;
+
+	dev = ice_pf_to_dev(pf);
+	/* Allocate Tx rings */
+	ice_for_each_alloc_txq(vsi, i) {
+		struct ice_tx_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->txq_map[i];
+		ring->vsi = vsi;
+		ring->tx_tstamps = &pf->ptp.port.tx;
+		ring->dev = dev;
+		ring->count = vsi->num_tx_desc;
+		ring->txq_teid = ICE_INVAL_TEID;
+		if (dvm_ena)
+			ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG2;
+		else
+			ring->flags |= ICE_TX_FLAGS_RING_VLAN_L2TAG1;
+		WRITE_ONCE(vsi->tx_rings[i], ring);
+	}
+
+	/* Allocate Rx rings */
+	ice_for_each_alloc_rxq(vsi, i) {
+		struct ice_rx_ring *ring;
+
+		/* allocate with kzalloc(), free with kfree_rcu() */
+		ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+		if (!ring)
+			goto err_out;
+
+		ring->q_index = i;
+		ring->reg_idx = vsi->rxq_map[i];
+		ring->vsi = vsi;
+		ring->netdev = vsi->netdev;
+		ring->dev = dev;
+		ring->count = vsi->num_rx_desc;
+		ring->cached_phctime = pf->ptp.cached_phc_time;
+		WRITE_ONCE(vsi->rx_rings[i], ring);
+	}
+
+	return 0;
+
+err_out:
+	ice_vsi_clear_rings(vsi);
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_manage_rss_lut - disable/enable RSS
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ *
+ * In the event of disable request for RSS, this function will zero out RSS
+ * LUT, while in the event of enable request for RSS, it will reconfigure RSS
+ * LUT.
+ */
+void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena)
+{
+	u8 *lut;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return;
+
+	if (ena) {
+		if (vsi->rss_lut_user)
+			memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+		else
+			ice_fill_rss_lut(lut, vsi->rss_table_size,
+					 vsi->rss_size);
+	}
+
+	ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+	kfree(lut);
+}
+
+/**
+ * ice_vsi_cfg_crc_strip - Configure CRC stripping for a VSI
+ * @vsi: VSI to be configured
+ * @disable: set to true to have FCS / CRC in the frame data
+ */
+void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable)
+{
+	int i;
+
+	ice_for_each_rxq(vsi, i)
+		if (disable)
+			vsi->rx_rings[i]->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS;
+		else
+			vsi->rx_rings[i]->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS;
+}
+
+/**
+ * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI
+ * @vsi: VSI to be configured
+ */
+int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u8 *lut, *key;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	if (vsi->type == ICE_VSI_PF && vsi->ch_rss_size &&
+	    (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))) {
+		vsi->rss_size = min_t(u16, vsi->rss_size, vsi->ch_rss_size);
+	} else {
+		vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
+
+		/* If orig_rss_size is valid and it is less than determined
+		 * main VSI's rss_size, update main VSI's rss_size to be
+		 * orig_rss_size so that when tc-qdisc is deleted, main VSI
+		 * RSS table gets programmed to be correct (whatever it was
+		 * to begin with (prior to setup-tc for ADQ config)
+		 */
+		if (vsi->orig_rss_size && vsi->rss_size < vsi->orig_rss_size &&
+		    vsi->orig_rss_size <= vsi->num_rxq) {
+			vsi->rss_size = vsi->orig_rss_size;
+			/* now orig_rss_size is used, reset it to zero */
+			vsi->orig_rss_size = 0;
+		}
+	}
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
+
+	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
+	if (err) {
+		dev_err(dev, "set_rss_lut failed, error %d\n", err);
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL);
+	if (!key) {
+		err = -ENOMEM;
+		goto ice_vsi_cfg_rss_exit;
+	}
+
+	if (vsi->rss_hkey_user)
+		memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+	else
+		netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
+
+	err = ice_set_rss_key(vsi, key);
+	if (err)
+		dev_err(dev, "set_rss_key failed, error %d\n", err);
+
+	kfree(key);
+ice_vsi_cfg_rss_exit:
+	kfree(lut);
+	return err;
+}
+
+/**
+ * ice_vsi_set_vf_rss_flow_fld - Sets VF VSI RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called during the VF VSI setup. Upon successful
+ * completion of package download, this function will configure default RSS
+ * input sets for VF VSI.
+ */
+static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int status;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi->vsi_num);
+		return;
+	}
+
+	status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA);
+	if (status)
+		dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %d\n",
+			vsi->vsi_num, status);
+}
+
+/**
+ * ice_vsi_set_rss_flow_fld - Sets RSS input set for different flows
+ * @vsi: VSI to be configured
+ *
+ * This function will only be called after successful download package call
+ * during initialization of PF. Since the downloaded package will erase the
+ * RSS section, this function will configure RSS input sets for different
+ * flow types. The last profile added has the highest priority, therefore 2
+ * tuple profiles (i.e. IPv4 src/dst) are added before 4 tuple profiles
+ * (i.e. IPv4 src/dst TCP src/dst port).
+ */
+static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
+{
+	u16 vsi_handle = vsi->idx, vsi_num = vsi->vsi_num;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct device *dev;
+	int status;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		dev_dbg(dev, "Advanced RSS disabled. Package download failed, vsi num = %d\n",
+			vsi_num);
+		return;
+	}
+	/* configure RSS for IPv4 with input set IP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+				 ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for IPv6 with input set IPv6 src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+				 ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4,
+				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for udp4 with input set IP src/dst, UDP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4,
+				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for sctp4 with input set IP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
+				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6,
+				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6,
+				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	/* configure RSS for sctp6 with input set IPv6 src/dst */
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
+				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+
+	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_ESP_SPI,
+				 ICE_FLOW_SEG_HDR_ESP);
+	if (status)
+		dev_dbg(dev, "ice_add_rss_cfg failed for esp/spi flow, vsi = %d, error = %d\n",
+			vsi_num, status);
+}
+
+/**
+ * ice_vsi_cfg_frame_size - setup max frame size and Rx buffer length
+ * @vsi: VSI
+ */
+static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
+{
+	if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
+		vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
+		vsi->rx_buf_len = ICE_RXBUF_1664;
+#if (PAGE_SIZE < 8192)
+	} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
+		   (vsi->netdev->mtu <= ETH_DATA_LEN)) {
+		vsi->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
+		vsi->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
+#endif
+	} else {
+		vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
+		vsi->rx_buf_len = ICE_RXBUF_3072;
+	}
+}
+
+/**
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
+ *
+ * Check the PF's state for a collection of bits that would indicate
+ * the PF is in a state that would inhibit normal operation for
+ * driver functionality.
+ *
+ * Returns true if PF is in a nominal state, false otherwise
+ */
+bool ice_pf_state_is_nominal(struct ice_pf *pf)
+{
+	DECLARE_BITMAP(check_bits, ICE_STATE_NBITS) = { 0 };
+
+	if (!pf)
+		return false;
+
+	bitmap_set(check_bits, 0, ICE_STATE_NOMINAL_CHECK_BITS);
+	if (bitmap_intersects(pf->state, check_bits, ICE_STATE_NBITS))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
+ * @vsi: the VSI to be updated
+ */
+void ice_update_eth_stats(struct ice_vsi *vsi)
+{
+	struct ice_eth_stats *prev_es, *cur_es;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_pf *pf = vsi->back;
+	u16 vsi_num = vsi->vsi_num;    /* HW absolute index of a VSI */
+
+	prev_es = &vsi->eth_stats_prev;
+	cur_es = &vsi->eth_stats;
+
+	if (ice_is_reset_in_progress(pf->state))
+		vsi->stat_offsets_loaded = false;
+
+	ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_bytes, &cur_es->rx_bytes);
+
+	ice_stat_update40(hw, GLV_UPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_unicast, &cur_es->rx_unicast);
+
+	ice_stat_update40(hw, GLV_MPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_multicast, &cur_es->rx_multicast);
+
+	ice_stat_update40(hw, GLV_BPRCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_broadcast, &cur_es->rx_broadcast);
+
+	ice_stat_update32(hw, GLV_RDPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->rx_discards, &cur_es->rx_discards);
+
+	ice_stat_update40(hw, GLV_GOTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_bytes, &cur_es->tx_bytes);
+
+	ice_stat_update40(hw, GLV_UPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_unicast, &cur_es->tx_unicast);
+
+	ice_stat_update40(hw, GLV_MPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_multicast, &cur_es->tx_multicast);
+
+	ice_stat_update40(hw, GLV_BPTCL(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_broadcast, &cur_es->tx_broadcast);
+
+	ice_stat_update32(hw, GLV_TEPC(vsi_num), vsi->stat_offsets_loaded,
+			  &prev_es->tx_errors, &cur_es->tx_errors);
+
+	vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * ice_write_qrxflxp_cntxt - write/configure QRXFLXP_CNTXT register
+ * @hw: HW pointer
+ * @pf_q: index of the Rx queue in the PF's queue space
+ * @rxdid: flexible descriptor RXDID
+ * @prio: priority for the RXDID for this queue
+ * @ena_ts: true to enable timestamp and false to disable timestamp
+ */
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
+			bool ena_ts)
+{
+	int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+
+	/* clear any previous values */
+	regval &= ~(QRXFLXP_CNTXT_RXDID_IDX_M |
+		    QRXFLXP_CNTXT_RXDID_PRIO_M |
+		    QRXFLXP_CNTXT_TS_M);
+
+	regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+		QRXFLXP_CNTXT_RXDID_IDX_M;
+
+	regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+		QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+	if (ena_ts)
+		/* Enable TimeSync on this queue */
+		regval |= QRXFLXP_CNTXT_TS_M;
+
+	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+}
+
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
+{
+	if (q_idx >= vsi->num_rxq)
+		return -EINVAL;
+
+	return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]);
+}
+
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	int err;
+
+	if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx])
+		return -EINVAL;
+
+	qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	err = ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf);
+	kfree(qg_buf);
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_rxqs - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Rx VSI for operation.
+ */
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi)
+{
+	u16 i;
+
+	if (vsi->type == ICE_VSI_VF)
+		goto setup_rings;
+
+	ice_vsi_cfg_frame_size(vsi);
+setup_rings:
+	/* set up individual rings */
+	ice_for_each_rxq(vsi, i) {
+		int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ * @rings: Tx ring array to be configured
+ * @count: number of Tx ring array elements
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+static int
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	u16 q_idx = 0;
+	int err = 0;
+
+	qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	for (q_idx = 0; q_idx < count; q_idx++) {
+		err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
+		if (err)
+			goto err_cfg_txqs;
+	}
+
+err_cfg_txqs:
+	kfree(qg_buf);
+	return err;
+}
+
+/**
+ * ice_vsi_cfg_lan_txqs - Configure the VSI for Tx
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx VSI for operation.
+ */
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
+{
+	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
+}
+
+/**
+ * ice_vsi_cfg_xdp_txqs - Configure Tx queues dedicated for XDP in given VSI
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and a negative value on error
+ * Configure the Tx queues dedicated for XDP in given VSI for operation.
+ */
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
+{
+	int ret;
+	int i;
+
+	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
+	if (ret)
+		return ret;
+
+	ice_for_each_rxq(vsi, i)
+		ice_tx_xsk_pool(vsi, i);
+
+	return 0;
+}
+
+/**
+ * ice_intrl_usec_to_reg - convert interrupt rate limit to register value
+ * @intrl: interrupt rate limit in usecs
+ * @gran: interrupt rate limit granularity in usecs
+ *
+ * This function converts a decimal interrupt rate limit in usecs to the format
+ * expected by firmware.
+ */
+static u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran)
+{
+	u32 val = intrl / gran;
+
+	if (val)
+		return val | GLINT_RATE_INTRL_ENA_M;
+	return 0;
+}
+
+/**
+ * ice_write_intrl - write throttle rate limit to interrupt specific register
+ * @q_vector: pointer to interrupt specific structure
+ * @intrl: throttle rate limit in microseconds to write
+ */
+void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl)
+{
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	wr32(hw, GLINT_RATE(q_vector->reg_idx),
+	     ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25));
+}
+
+static struct ice_q_vector *ice_pull_qvec_from_rc(struct ice_ring_container *rc)
+{
+	switch (rc->type) {
+	case ICE_RX_CONTAINER:
+		if (rc->rx_ring)
+			return rc->rx_ring->q_vector;
+		break;
+	case ICE_TX_CONTAINER:
+		if (rc->tx_ring)
+			return rc->tx_ring->q_vector;
+		break;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+/**
+ * __ice_write_itr - write throttle rate to register
+ * @q_vector: pointer to interrupt data structure
+ * @rc: pointer to ring container
+ * @itr: throttle rate in microseconds to write
+ */
+static void __ice_write_itr(struct ice_q_vector *q_vector,
+			    struct ice_ring_container *rc, u16 itr)
+{
+	struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+	wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
+	     ITR_REG_ALIGN(itr) >> ICE_ITR_GRAN_S);
+}
+
+/**
+ * ice_write_itr - write throttle rate to queue specific register
+ * @rc: pointer to ring container
+ * @itr: throttle rate in microseconds to write
+ */
+void ice_write_itr(struct ice_ring_container *rc, u16 itr)
+{
+	struct ice_q_vector *q_vector;
+
+	q_vector = ice_pull_qvec_from_rc(rc);
+	if (!q_vector)
+		return;
+
+	__ice_write_itr(q_vector, rc, itr);
+}
+
+/**
+ * ice_set_q_vector_intrl - set up interrupt rate limiting
+ * @q_vector: the vector to be configured
+ *
+ * Interrupt rate limiting is local to the vector, not per-queue so we must
+ * detect if either ring container has dynamic moderation enabled to decide
+ * what to set the interrupt rate limit to via INTRL settings. In the case that
+ * dynamic moderation is disabled on both, write the value with the cached
+ * setting to make sure INTRL register matches the user visible value.
+ */
+void ice_set_q_vector_intrl(struct ice_q_vector *q_vector)
+{
+	if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) {
+		/* in the case of dynamic enabled, cap each vector to no more
+		 * than (4 us) 250,000 ints/sec, which allows low latency
+		 * but still less than 500,000 interrupts per second, which
+		 * reduces CPU a bit in the case of the lowest latency
+		 * setting. The 4 here is a value in microseconds.
+		 */
+		ice_write_intrl(q_vector, 4);
+	} else {
+		ice_write_intrl(q_vector, q_vector->intrl);
+	}
+}
+
+/**
+ * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ *
+ * This configures MSIX mode interrupts for the PF VSI, and should not be used
+ * for the VF VSI.
+ */
+void ice_vsi_cfg_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u16 txq = 0, rxq = 0;
+	int i, q;
+
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+		u16 reg_idx = q_vector->reg_idx;
+
+		ice_cfg_itr(hw, q_vector);
+
+		/* Both Transmit Queue Interrupt Cause Control register
+		 * and Receive Queue Interrupt Cause control register
+		 * expects MSIX_INDX field to be the vector index
+		 * within the function space and not the absolute
+		 * vector index across PF or across device.
+		 * For SR-IOV VF VSIs queue vector index always starts
+		 * with 1 since first vector index(0) is used for OICR
+		 * in VF space. Since VMDq and other PF VSIs are within
+		 * the PF function space, use the vector index that is
+		 * tracked for this PF.
+		 */
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			ice_cfg_txq_interrupt(vsi, txq, reg_idx,
+					      q_vector->tx.itr_idx);
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
+					      q_vector->rx.itr_idx);
+			rxq++;
+		}
+	}
+}
+
+/**
+ * ice_vsi_start_all_rx_rings - start/enable all of a VSI's Rx rings
+ * @vsi: the VSI whose rings are to be enabled
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_all_rx_rings(vsi, true);
+}
+
+/**
+ * ice_vsi_stop_all_rx_rings - stop/disable all of a VSI's Rx rings
+ * @vsi: the VSI whose rings are to be disabled
+ *
+ * Returns 0 on success and a negative value on error
+ */
+int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_ctrl_all_rx_rings(vsi, false);
+}
+
+/**
+ * ice_vsi_stop_tx_rings - Disable Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ * @rings: Tx ring array to be stopped
+ * @count: number of Tx ring array elements
+ */
+static int
+ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+		      u16 rel_vmvf_num, struct ice_tx_ring **rings, u16 count)
+{
+	u16 q_idx;
+
+	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
+		return -EINVAL;
+
+	for (q_idx = 0; q_idx < count; q_idx++) {
+		struct ice_txq_meta txq_meta = { };
+		int status;
+
+		if (!rings || !rings[q_idx])
+			return -EINVAL;
+
+		ice_fill_txq_meta(vsi, rings[q_idx], &txq_meta);
+		status = ice_vsi_stop_tx_ring(vsi, rst_src, rel_vmvf_num,
+					      rings[q_idx], &txq_meta);
+
+		if (status)
+			return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_stop_lan_tx_rings - Disable LAN Tx rings
+ * @vsi: the VSI being configured
+ * @rst_src: reset source
+ * @rel_vmvf_num: Relative ID of VF/VM
+ */
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+			  u16 rel_vmvf_num)
+{
+	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
+}
+
+/**
+ * ice_vsi_stop_xdp_tx_rings - Disable XDP Tx rings
+ * @vsi: the VSI being configured
+ */
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
+{
+	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
+}
+
+/**
+ * ice_vsi_is_rx_queue_active
+ * @vsi: the VSI being configured
+ *
+ * Return true if at least one queue is active.
+ */
+bool ice_vsi_is_rx_queue_active(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int i;
+
+	ice_for_each_rxq(vsi, i) {
+		u32 rx_reg;
+		int pf_q;
+
+		pf_q = vsi->rxq_map[i];
+		rx_reg = rd32(hw, QRX_CTRL(pf_q));
+		if (rx_reg & QRX_CTRL_QENA_STAT_M)
+			return true;
+	}
+
+	return false;
+}
+
+static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
+{
+	if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) {
+		vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS;
+		vsi->tc_cfg.numtc = 1;
+		return;
+	}
+
+	/* set VSI TC information based on DCB config */
+	ice_vsi_set_dcb_tc_cfg(vsi);
+}
+
+/**
+ * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * @vsi: the VSI being configured
+ * @tx: bool to determine Tx or Rx rule
+ * @create: bool to determine create or remove Rule
+ */
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+{
+	int (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag,
+			enum ice_sw_fwd_act_type act);
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int status;
+
+	dev = ice_pf_to_dev(pf);
+	eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth;
+
+	if (tx) {
+		status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
+				  ICE_DROP_PACKET);
+	} else {
+		if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) {
+			status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num,
+							  create);
+		} else {
+			status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX,
+					  ICE_FWD_TO_VSI);
+		}
+	}
+
+	if (status)
+		dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n",
+			create ? "adding" : "removing", tx ? "TX" : "RX",
+			vsi->vsi_num, status);
+}
+
+/**
+ * ice_set_agg_vsi - sets up scheduler aggregator node and move VSI into it
+ * @vsi: pointer to the VSI
+ *
+ * This function will allocate new scheduler aggregator now if needed and will
+ * move specified VSI into it.
+ */
+static void ice_set_agg_vsi(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_agg_node *agg_node_iter = NULL;
+	u32 agg_id = ICE_INVALID_AGG_NODE_ID;
+	struct ice_agg_node *agg_node = NULL;
+	int node_offset, max_agg_nodes = 0;
+	struct ice_port_info *port_info;
+	struct ice_pf *pf = vsi->back;
+	u32 agg_node_id_start = 0;
+	int status;
+
+	/* create (as needed) scheduler aggregator node and move VSI into
+	 * corresponding aggregator node
+	 * - PF aggregator node to contains VSIs of type _PF and _CTRL
+	 * - VF aggregator nodes will contain VF VSI
+	 */
+	port_info = pf->hw.port_info;
+	if (!port_info)
+		return;
+
+	switch (vsi->type) {
+	case ICE_VSI_CTRL:
+	case ICE_VSI_CHNL:
+	case ICE_VSI_LB:
+	case ICE_VSI_PF:
+	case ICE_VSI_SWITCHDEV_CTRL:
+		max_agg_nodes = ICE_MAX_PF_AGG_NODES;
+		agg_node_id_start = ICE_PF_AGG_NODE_ID_START;
+		agg_node_iter = &pf->pf_agg_node[0];
+		break;
+	case ICE_VSI_VF:
+		/* user can create 'n' VFs on a given PF, but since max children
+		 * per aggregator node can be only 64. Following code handles
+		 * aggregator(s) for VF VSIs, either selects a agg_node which
+		 * was already created provided num_vsis < 64, otherwise
+		 * select next available node, which will be created
+		 */
+		max_agg_nodes = ICE_MAX_VF_AGG_NODES;
+		agg_node_id_start = ICE_VF_AGG_NODE_ID_START;
+		agg_node_iter = &pf->vf_agg_node[0];
+		break;
+	default:
+		/* other VSI type, handle later if needed */
+		dev_dbg(dev, "unexpected VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
+		return;
+	}
+
+	/* find the appropriate aggregator node */
+	for (node_offset = 0; node_offset < max_agg_nodes; node_offset++) {
+		/* see if we can find space in previously created
+		 * node if num_vsis < 64, otherwise skip
+		 */
+		if (agg_node_iter->num_vsis &&
+		    agg_node_iter->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+			agg_node_iter++;
+			continue;
+		}
+
+		if (agg_node_iter->valid &&
+		    agg_node_iter->agg_id != ICE_INVALID_AGG_NODE_ID) {
+			agg_id = agg_node_iter->agg_id;
+			agg_node = agg_node_iter;
+			break;
+		}
+
+		/* find unclaimed agg_id */
+		if (agg_node_iter->agg_id == ICE_INVALID_AGG_NODE_ID) {
+			agg_id = node_offset + agg_node_id_start;
+			agg_node = agg_node_iter;
+			break;
+		}
+		/* move to next agg_node */
+		agg_node_iter++;
+	}
+
+	if (!agg_node)
+		return;
+
+	/* if selected aggregator node was not created, create it */
+	if (!agg_node->valid) {
+		status = ice_cfg_agg(port_info, agg_id, ICE_AGG_TYPE_AGG,
+				     (u8)vsi->tc_cfg.ena_tc);
+		if (status) {
+			dev_err(dev, "unable to create aggregator node with agg_id %u\n",
+				agg_id);
+			return;
+		}
+		/* aggregator node is created, store the needed info */
+		agg_node->valid = true;
+		agg_node->agg_id = agg_id;
+	}
+
+	/* move VSI to corresponding aggregator node */
+	status = ice_move_vsi_to_agg(port_info, agg_id, vsi->idx,
+				     (u8)vsi->tc_cfg.ena_tc);
+	if (status) {
+		dev_err(dev, "unable to move VSI idx %u into aggregator %u node",
+			vsi->idx, agg_id);
+		return;
+	}
+
+	/* keep active children count for aggregator node */
+	agg_node->num_vsis++;
+
+	/* cache the 'agg_id' in VSI, so that after reset - VSI will be moved
+	 * to aggregator node
+	 */
+	vsi->agg_node = agg_node;
+	dev_dbg(dev, "successfully moved VSI idx %u tc_bitmap 0x%x) into aggregator node %d which has num_vsis %u\n",
+		vsi->idx, vsi->tc_cfg.ena_tc, vsi->agg_node->agg_id,
+		vsi->agg_node->num_vsis);
+}
+
+static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret, i;
+
+	/* configure VSI nodes based on number of queues and TC's */
+	ice_for_each_traffic_class(i) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(i)))
+			continue;
+
+		if (vsi->type == ICE_VSI_CHNL) {
+			if (!vsi->alloc_txq && vsi->num_txq)
+				max_txqs[i] = vsi->num_txq;
+			else
+				max_txqs[i] = pf->num_lan_tx;
+		} else {
+			max_txqs[i] = vsi->alloc_txq;
+		}
+
+		if (vsi->type == ICE_VSI_PF)
+			max_txqs[i] += vsi->num_xdp_txq;
+	}
+
+	dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc);
+	ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+			      max_txqs);
+	if (ret) {
+		dev_err(dev, "VSI %d failed lan queue config, error %d\n",
+			vsi->vsi_num, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_def - configure default VSI based on the type
+ * @vsi: pointer to VSI
+ * @params: the parameters to configure this VSI with
+ */
+static int
+ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_pf *pf = vsi->back;
+	int ret;
+
+	vsi->vsw = pf->first_sw;
+
+	ret = ice_vsi_alloc_def(vsi, params->ch);
+	if (ret)
+		return ret;
+
+	/* allocate memory for Tx/Rx ring stat pointers */
+	ret = ice_vsi_alloc_stat_arrays(vsi);
+	if (ret)
+		goto unroll_vsi_alloc;
+
+	ice_alloc_fd_res(vsi);
+
+	ret = ice_vsi_get_qs(vsi);
+	if (ret) {
+		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
+			vsi->idx);
+		goto unroll_vsi_alloc_stat;
+	}
+
+	/* set RSS capabilities */
+	ice_vsi_set_rss_params(vsi);
+
+	/* set TC configuration */
+	ice_vsi_set_tc_cfg(vsi);
+
+	/* create the VSI */
+	ret = ice_vsi_init(vsi, params->flags);
+	if (ret)
+		goto unroll_get_qs;
+
+	ice_vsi_init_vlan_ops(vsi);
+
+	switch (vsi->type) {
+	case ICE_VSI_CTRL:
+	case ICE_VSI_SWITCHDEV_CTRL:
+	case ICE_VSI_PF:
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		ret = ice_vsi_alloc_ring_stats(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		ice_vsi_map_rings_to_vectors(vsi);
+		vsi->stat_offsets_loaded = false;
+
+		if (ice_is_xdp_ena_vsi(vsi)) {
+			ret = ice_vsi_determine_xdp_res(vsi);
+			if (ret)
+				goto unroll_vector_base;
+			ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
+			if (ret)
+				goto unroll_vector_base;
+		}
+
+		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+		if (vsi->type != ICE_VSI_CTRL)
+			/* Do not exit if configuring RSS had an issue, at
+			 * least receive traffic on first queue. Hence no
+			 * need to capture return value
+			 */
+			if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+				ice_vsi_cfg_rss_lut_key(vsi);
+				ice_vsi_set_rss_flow_fld(vsi);
+			}
+		ice_init_arfs(vsi);
+		break;
+	case ICE_VSI_CHNL:
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+			ice_vsi_cfg_rss_lut_key(vsi);
+			ice_vsi_set_rss_flow_fld(vsi);
+		}
+		break;
+	case ICE_VSI_VF:
+		/* VF driver will take care of creating netdev for this type and
+		 * map queues to vectors through Virtchnl, PF driver only
+		 * creates a VSI and corresponding structures for bookkeeping
+		 * purpose
+		 */
+		ret = ice_vsi_alloc_q_vectors(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_alloc_q_vector;
+
+		ret = ice_vsi_alloc_ring_stats(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		vsi->stat_offsets_loaded = false;
+
+		/* Do not exit if configuring RSS had an issue, at least
+		 * receive traffic on first queue. Hence no need to capture
+		 * return value
+		 */
+		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+			ice_vsi_cfg_rss_lut_key(vsi);
+			ice_vsi_set_vf_rss_flow_fld(vsi);
+		}
+		break;
+	case ICE_VSI_LB:
+		ret = ice_vsi_alloc_rings(vsi);
+		if (ret)
+			goto unroll_vsi_init;
+
+		ret = ice_vsi_alloc_ring_stats(vsi);
+		if (ret)
+			goto unroll_vector_base;
+
+		break;
+	default:
+		/* clean up the resources and exit */
+		ret = -EINVAL;
+		goto unroll_vsi_init;
+	}
+
+	return 0;
+
+unroll_vector_base:
+	/* reclaim SW interrupts back to the common pool */
+unroll_alloc_q_vector:
+	ice_vsi_free_q_vectors(vsi);
+unroll_vsi_init:
+	ice_vsi_delete_from_hw(vsi);
+unroll_get_qs:
+	ice_vsi_put_qs(vsi);
+unroll_vsi_alloc_stat:
+	ice_vsi_free_stats(vsi);
+unroll_vsi_alloc:
+	ice_vsi_free_arrays(vsi);
+	return ret;
+}
+
+/**
+ * ice_vsi_cfg - configure a previously allocated VSI
+ * @vsi: pointer to VSI
+ * @params: parameters used to configure this VSI
+ */
+int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params)
+{
+	struct ice_pf *pf = vsi->back;
+	int ret;
+
+	if (WARN_ON(params->type == ICE_VSI_VF && !params->vf))
+		return -EINVAL;
+
+	vsi->type = params->type;
+	vsi->port_info = params->pi;
+
+	/* For VSIs which don't have a connected VF, this will be NULL */
+	vsi->vf = params->vf;
+
+	ret = ice_vsi_cfg_def(vsi, params);
+	if (ret)
+		return ret;
+
+	ret = ice_vsi_cfg_tc_lan(vsi->back, vsi);
+	if (ret)
+		ice_vsi_decfg(vsi);
+
+	if (vsi->type == ICE_VSI_CTRL) {
+		if (vsi->vf) {
+			WARN_ON(vsi->vf->ctrl_vsi_idx != ICE_NO_VSI);
+			vsi->vf->ctrl_vsi_idx = vsi->idx;
+		} else {
+			WARN_ON(pf->ctrl_vsi_idx != ICE_NO_VSI);
+			pf->ctrl_vsi_idx = vsi->idx;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * ice_vsi_decfg - remove all VSI configuration
+ * @vsi: pointer to VSI
+ */
+void ice_vsi_decfg(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	/* The Rx rule will only exist to remove if the LLDP FW
+	 * engine is currently stopped
+	 */
+	if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
+	    !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+		ice_cfg_sw_lldp(vsi, false, false);
+
+	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
+	err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
+	if (err)
+		dev_err(ice_pf_to_dev(pf), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
+			vsi->vsi_num, err);
+
+	if (ice_is_xdp_ena_vsi(vsi))
+		/* return value check can be skipped here, it always returns
+		 * 0 if reset is in progress
+		 */
+		ice_destroy_xdp_rings(vsi);
+
+	ice_vsi_clear_rings(vsi);
+	ice_vsi_free_q_vectors(vsi);
+	ice_vsi_put_qs(vsi);
+	ice_vsi_free_arrays(vsi);
+
+	/* SR-IOV determines needed MSIX resources all at once instead of per
+	 * VSI since when VFs are spawned we know how many VFs there are and how
+	 * many interrupts each VF needs. SR-IOV MSIX resources are also
+	 * cleared in the same manner.
+	 */
+
+	if (vsi->type == ICE_VSI_VF &&
+	    vsi->agg_node && vsi->agg_node->valid)
+		vsi->agg_node->num_vsis--;
+}
+
+/**
+ * ice_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @params: parameters to use when creating the VSI
+ *
+ * This allocates the sw VSI structure and its queue resources.
+ *
+ * Returns pointer to the successfully allocated and configured VSI sw struct on
+ * success, NULL on failure.
+ */
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *vsi;
+	int ret;
+
+	/* ice_vsi_setup can only initialize a new VSI, and we must have
+	 * a port_info structure for it.
+	 */
+	if (WARN_ON(!(params->flags & ICE_VSI_FLAG_INIT)) ||
+	    WARN_ON(!params->pi))
+		return NULL;
+
+	vsi = ice_vsi_alloc(pf);
+	if (!vsi) {
+		dev_err(dev, "could not allocate VSI\n");
+		return NULL;
+	}
+
+	ret = ice_vsi_cfg(vsi, params);
+	if (ret)
+		goto err_vsi_cfg;
+
+	/* Add switch rule to drop all Tx Flow Control Frames, of look up
+	 * type ETHERTYPE from VSIs, and restrict malicious VF from sending
+	 * out PAUSE or PFC frames. If enabled, FW can still send FC frames.
+	 * The rule is added once for PF VSI in order to create appropriate
+	 * recipe, since VSI/VSI list is ignored with drop action...
+	 * Also add rules to handle LLDP Tx packets.  Tx LLDP packets need to
+	 * be dropped so that VFs cannot send LLDP packets to reconfig DCB
+	 * settings in the HW.
+	 */
+	if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF) {
+		ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
+				 ICE_DROP_PACKET);
+		ice_cfg_sw_lldp(vsi, true, true);
+	}
+
+	if (!vsi->agg_node)
+		ice_set_agg_vsi(vsi);
+
+	return vsi;
+
+err_vsi_cfg:
+	ice_vsi_free(vsi);
+
+	return NULL;
+}
+
+/**
+ * ice_vsi_release_msix - Clear the queue to Interrupt mapping in HW
+ * @vsi: the VSI being cleaned up
+ */
+static void ice_vsi_release_msix(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 txq = 0;
+	u32 rxq = 0;
+	int i, q;
+
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		ice_write_intrl(q_vector, 0);
+		for (q = 0; q < q_vector->num_ring_tx; q++) {
+			ice_write_itr(&q_vector->tx, 0);
+			wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
+			if (ice_is_xdp_ena_vsi(vsi)) {
+				u32 xdp_txq = txq + vsi->num_xdp_txq;
+
+				wr32(hw, QINT_TQCTL(vsi->txq_map[xdp_txq]), 0);
+			}
+			txq++;
+		}
+
+		for (q = 0; q < q_vector->num_ring_rx; q++) {
+			ice_write_itr(&q_vector->rx, 0);
+			wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), 0);
+			rxq++;
+		}
+	}
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_vsi_free_irq - Free the IRQ association with the OS
+ * @vsi: the VSI being configured
+ */
+void ice_vsi_free_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	if (!vsi->q_vectors || !vsi->irqs_ready)
+		return;
+
+	ice_vsi_release_msix(vsi);
+	if (vsi->type == ICE_VSI_VF)
+		return;
+
+	vsi->irqs_ready = false;
+	ice_free_cpu_rx_rmap(vsi);
+
+	ice_for_each_q_vector(vsi, i) {
+		int irq_num;
+
+		irq_num = vsi->q_vectors[i]->irq.virq;
+
+		/* free only the irqs that were actually requested */
+		if (!vsi->q_vectors[i] ||
+		    !(vsi->q_vectors[i]->num_ring_tx ||
+		      vsi->q_vectors[i]->num_ring_rx))
+			continue;
+
+		/* clear the affinity notifier in the IRQ descriptor */
+		if (!IS_ENABLED(CONFIG_RFS_ACCEL))
+			irq_set_affinity_notifier(irq_num, NULL);
+
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_set_affinity_hint(irq_num, NULL);
+		synchronize_irq(irq_num);
+		devm_free_irq(ice_pf_to_dev(pf), irq_num, vsi->q_vectors[i]);
+	}
+}
+
+/**
+ * ice_vsi_free_tx_rings - Free Tx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->tx_rings)
+		return;
+
+	ice_for_each_txq(vsi, i)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			ice_free_tx_ring(vsi->tx_rings[i]);
+}
+
+/**
+ * ice_vsi_free_rx_rings - Free Rx resources for VSI queues
+ * @vsi: the VSI having resources freed
+ */
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi)
+{
+	int i;
+
+	if (!vsi->rx_rings)
+		return;
+
+	ice_for_each_rxq(vsi, i)
+		if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
+			ice_free_rx_ring(vsi->rx_rings[i]);
+}
+
+/**
+ * ice_vsi_close - Shut down a VSI
+ * @vsi: the VSI being shut down
+ */
+void ice_vsi_close(struct ice_vsi *vsi)
+{
+	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state))
+		ice_down(vsi);
+
+	ice_vsi_free_irq(vsi);
+	ice_vsi_free_tx_rings(vsi);
+	ice_vsi_free_rx_rings(vsi);
+}
+
+/**
+ * ice_ena_vsi - resume a VSI
+ * @vsi: the VSI being resume
+ * @locked: is the rtnl_lock already held
+ */
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
+{
+	int err = 0;
+
+	if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state))
+		return 0;
+
+	clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
+
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			err = ice_open_internal(vsi->netdev);
+
+			if (!locked)
+				rtnl_unlock();
+		}
+	} else if (vsi->type == ICE_VSI_CTRL) {
+		err = ice_vsi_open_ctrl(vsi);
+	}
+
+	return err;
+}
+
+/**
+ * ice_dis_vsi - pause a VSI
+ * @vsi: the VSI being paused
+ * @locked: is the rtnl_lock already held
+ */
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
+{
+	if (test_bit(ICE_VSI_DOWN, vsi->state))
+		return;
+
+	set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
+
+	if (vsi->type == ICE_VSI_PF && vsi->netdev) {
+		if (netif_running(vsi->netdev)) {
+			if (!locked)
+				rtnl_lock();
+
+			ice_vsi_close(vsi);
+
+			if (!locked)
+				rtnl_unlock();
+		} else {
+			ice_vsi_close(vsi);
+		}
+	} else if (vsi->type == ICE_VSI_CTRL ||
+		   vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
+		ice_vsi_close(vsi);
+	}
+}
+
+/**
+ * ice_vsi_dis_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ */
+void ice_vsi_dis_irq(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+	int i;
+
+	/* disable interrupt causation from each queue */
+	if (vsi->tx_rings) {
+		ice_for_each_txq(vsi, i) {
+			if (vsi->tx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->tx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_TQCTL(reg));
+				val &= ~QINT_TQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_TQCTL(reg), val);
+			}
+		}
+	}
+
+	if (vsi->rx_rings) {
+		ice_for_each_rxq(vsi, i) {
+			if (vsi->rx_rings[i]) {
+				u16 reg;
+
+				reg = vsi->rx_rings[i]->reg_idx;
+				val = rd32(hw, QINT_RQCTL(reg));
+				val &= ~QINT_RQCTL_CAUSE_ENA_M;
+				wr32(hw, QINT_RQCTL(reg), val);
+			}
+		}
+	}
+
+	/* disable each interrupt */
+	ice_for_each_q_vector(vsi, i) {
+		if (!vsi->q_vectors[i])
+			continue;
+		wr32(hw, GLINT_DYN_CTL(vsi->q_vectors[i]->reg_idx), 0);
+	}
+
+	ice_flush(hw);
+
+	/* don't call synchronize_irq() for VF's from the host */
+	if (vsi->type == ICE_VSI_VF)
+		return;
+
+	ice_for_each_q_vector(vsi, i)
+		synchronize_irq(vsi->q_vectors[i]->irq.virq);
+}
+
+/**
+ * ice_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ */
+int ice_vsi_release(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf;
+
+	if (!vsi->back)
+		return -ENODEV;
+	pf = vsi->back;
+
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+		ice_rss_clean(vsi);
+
+	ice_vsi_close(vsi);
+	ice_vsi_decfg(vsi);
+
+	/* retain SW VSI data structure since it is needed to unregister and
+	 * free VSI netdev when PF is not in reset recovery pending state,\
+	 * for ex: during rmmod.
+	 */
+	if (!ice_is_reset_in_progress(pf->state))
+		ice_vsi_delete(vsi);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: array of struct with stored coalesce
+ *
+ * Returns array size.
+ */
+static int
+ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
+			     struct ice_coalesce_stored *coalesce)
+{
+	int i;
+
+	ice_for_each_q_vector(vsi, i) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+		coalesce[i].itr_tx = q_vector->tx.itr_settings;
+		coalesce[i].itr_rx = q_vector->rx.itr_settings;
+		coalesce[i].intrl = q_vector->intrl;
+
+		if (i < vsi->num_txq)
+			coalesce[i].tx_valid = true;
+		if (i < vsi->num_rxq)
+			coalesce[i].rx_valid = true;
+	}
+
+	return vsi->num_q_vectors;
+}
+
+/**
+ * ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: pointer to array of struct with stored coalesce
+ * @size: size of coalesce array
+ *
+ * Before this function, ice_vsi_rebuild_get_coalesce should be called to save
+ * ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce
+ * to default value.
+ */
+static void
+ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+			     struct ice_coalesce_stored *coalesce, int size)
+{
+	struct ice_ring_container *rc;
+	int i;
+
+	if ((size && !coalesce) || !vsi)
+		return;
+
+	/* There are a couple of cases that have to be handled here:
+	 *   1. The case where the number of queue vectors stays the same, but
+	 *      the number of Tx or Rx rings changes (the first for loop)
+	 *   2. The case where the number of queue vectors increased (the
+	 *      second for loop)
+	 */
+	for (i = 0; i < size && i < vsi->num_q_vectors; i++) {
+		/* There are 2 cases to handle here and they are the same for
+		 * both Tx and Rx:
+		 *   if the entry was valid previously (coalesce[i].[tr]x_valid
+		 *   and the loop variable is less than the number of rings
+		 *   allocated, then write the previous values
+		 *
+		 *   if the entry was not valid previously, but the number of
+		 *   rings is less than are allocated (this means the number of
+		 *   rings increased from previously), then write out the
+		 *   values in the first element
+		 *
+		 *   Also, always write the ITR, even if in ITR_IS_DYNAMIC
+		 *   as there is no harm because the dynamic algorithm
+		 *   will just overwrite.
+		 */
+		if (i < vsi->alloc_rxq && coalesce[i].rx_valid) {
+			rc = &vsi->q_vectors[i]->rx;
+			rc->itr_settings = coalesce[i].itr_rx;
+			ice_write_itr(rc, rc->itr_setting);
+		} else if (i < vsi->alloc_rxq) {
+			rc = &vsi->q_vectors[i]->rx;
+			rc->itr_settings = coalesce[0].itr_rx;
+			ice_write_itr(rc, rc->itr_setting);
+		}
+
+		if (i < vsi->alloc_txq && coalesce[i].tx_valid) {
+			rc = &vsi->q_vectors[i]->tx;
+			rc->itr_settings = coalesce[i].itr_tx;
+			ice_write_itr(rc, rc->itr_setting);
+		} else if (i < vsi->alloc_txq) {
+			rc = &vsi->q_vectors[i]->tx;
+			rc->itr_settings = coalesce[0].itr_tx;
+			ice_write_itr(rc, rc->itr_setting);
+		}
+
+		vsi->q_vectors[i]->intrl = coalesce[i].intrl;
+		ice_set_q_vector_intrl(vsi->q_vectors[i]);
+	}
+
+	/* the number of queue vectors increased so write whatever is in
+	 * the first element
+	 */
+	for (; i < vsi->num_q_vectors; i++) {
+		/* transmit */
+		rc = &vsi->q_vectors[i]->tx;
+		rc->itr_settings = coalesce[0].itr_tx;
+		ice_write_itr(rc, rc->itr_setting);
+
+		/* receive */
+		rc = &vsi->q_vectors[i]->rx;
+		rc->itr_settings = coalesce[0].itr_rx;
+		ice_write_itr(rc, rc->itr_setting);
+
+		vsi->q_vectors[i]->intrl = coalesce[0].intrl;
+		ice_set_q_vector_intrl(vsi->q_vectors[i]);
+	}
+}
+
+/**
+ * ice_vsi_realloc_stat_arrays - Frees unused stat structures
+ * @vsi: VSI pointer
+ * @prev_txq: Number of Tx rings before ring reallocation
+ * @prev_rxq: Number of Rx rings before ring reallocation
+ */
+static void
+ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq)
+{
+	struct ice_vsi_stats *vsi_stat;
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	if (!prev_txq || !prev_rxq)
+		return;
+	if (vsi->type == ICE_VSI_CHNL)
+		return;
+
+	vsi_stat = pf->vsi_stats[vsi->idx];
+
+	if (vsi->num_txq < prev_txq) {
+		for (i = vsi->num_txq; i < prev_txq; i++) {
+			if (vsi_stat->tx_ring_stats[i]) {
+				kfree_rcu(vsi_stat->tx_ring_stats[i], rcu);
+				WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL);
+			}
+		}
+	}
+
+	if (vsi->num_rxq < prev_rxq) {
+		for (i = vsi->num_rxq; i < prev_rxq; i++) {
+			if (vsi_stat->rx_ring_stats[i]) {
+				kfree_rcu(vsi_stat->rx_ring_stats[i], rcu);
+				WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL);
+			}
+		}
+	}
+}
+
+/**
+ * ice_vsi_rebuild - Rebuild VSI after reset
+ * @vsi: VSI to be rebuild
+ * @vsi_flags: flags used for VSI rebuild flow
+ *
+ * Set vsi_flags to ICE_VSI_FLAG_INIT to initialize a new VSI, or
+ * ICE_VSI_FLAG_NO_INIT to rebuild an existing VSI in hardware.
+ *
+ * Returns 0 on success and negative value on failure
+ */
+int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags)
+{
+	struct ice_vsi_cfg_params params = {};
+	struct ice_coalesce_stored *coalesce;
+	int ret, prev_txq, prev_rxq;
+	int prev_num_q_vectors = 0;
+	struct ice_pf *pf;
+
+	if (!vsi)
+		return -EINVAL;
+
+	params = ice_vsi_to_params(vsi);
+	params.flags = vsi_flags;
+
+	pf = vsi->back;
+	if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf))
+		return -EINVAL;
+
+	coalesce = kcalloc(vsi->num_q_vectors,
+			   sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+	if (!coalesce)
+		return -ENOMEM;
+
+	prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
+
+	prev_txq = vsi->num_txq;
+	prev_rxq = vsi->num_rxq;
+
+	ice_vsi_decfg(vsi);
+	ret = ice_vsi_cfg_def(vsi, &params);
+	if (ret)
+		goto err_vsi_cfg;
+
+	ret = ice_vsi_cfg_tc_lan(pf, vsi);
+	if (ret) {
+		if (vsi_flags & ICE_VSI_FLAG_INIT) {
+			ret = -EIO;
+			goto err_vsi_cfg_tc_lan;
+		}
+
+		kfree(coalesce);
+		return ice_schedule_reset(pf, ICE_RESET_PFR);
+	}
+
+	ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq);
+
+	ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
+	kfree(coalesce);
+
+	return 0;
+
+err_vsi_cfg_tc_lan:
+	ice_vsi_decfg(vsi);
+err_vsi_cfg:
+	kfree(coalesce);
+	return ret;
+}
+
+/**
+ * ice_is_reset_in_progress - check for a reset in progress
+ * @state: PF state field
+ */
+bool ice_is_reset_in_progress(unsigned long *state)
+{
+	return test_bit(ICE_RESET_OICR_RECV, state) ||
+	       test_bit(ICE_PFR_REQ, state) ||
+	       test_bit(ICE_CORER_REQ, state) ||
+	       test_bit(ICE_GLOBR_REQ, state);
+}
+
+/**
+ * ice_wait_for_reset - Wait for driver to finish reset and rebuild
+ * @pf: pointer to the PF structure
+ * @timeout: length of time to wait, in jiffies
+ *
+ * Wait (sleep) for a short time until the driver finishes cleaning up from
+ * a device reset. The caller must be able to sleep. Use this to delay
+ * operations that could fail while the driver is cleaning up after a device
+ * reset.
+ *
+ * Returns 0 on success, -EBUSY if the reset is not finished within the
+ * timeout, and -ERESTARTSYS if the thread was interrupted.
+ */
+int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout)
+{
+	long ret;
+
+	ret = wait_event_interruptible_timeout(pf->reset_wait_queue,
+					       !ice_is_reset_in_progress(pf->state),
+					       timeout);
+	if (ret < 0)
+		return ret;
+	else if (!ret)
+		return -EBUSY;
+	else
+		return 0;
+}
+
+/**
+ * ice_vsi_update_q_map - update our copy of the VSI info with new queue map
+ * @vsi: VSI being configured
+ * @ctx: the context buffer returned from AQ VSI update command
+ */
+static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx)
+{
+	vsi->info.mapping_flags = ctx->info.mapping_flags;
+	memcpy(&vsi->info.q_mapping, &ctx->info.q_mapping,
+	       sizeof(vsi->info.q_mapping));
+	memcpy(&vsi->info.tc_mapping, ctx->info.tc_mapping,
+	       sizeof(vsi->info.tc_mapping));
+}
+
+/**
+ * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @ena_tc: TC map to be enabled
+ */
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf = vsi->back;
+	int numtc = vsi->tc_cfg.numtc;
+	struct ice_dcbx_cfg *dcbcfg;
+	u8 netdev_tc;
+	int i;
+
+	if (!netdev)
+		return;
+
+	/* CHNL VSI doesn't have it's own netdev, hence, no netdev_tc */
+	if (vsi->type == ICE_VSI_CHNL)
+		return;
+
+	if (!ena_tc) {
+		netdev_reset_tc(netdev);
+		return;
+	}
+
+	if (vsi->type == ICE_VSI_PF && ice_is_adq_active(pf))
+		numtc = vsi->all_numtc;
+
+	if (netdev_set_num_tc(netdev, numtc))
+		return;
+
+	dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+
+	ice_for_each_traffic_class(i)
+		if (vsi->tc_cfg.ena_tc & BIT(i))
+			netdev_set_tc_queue(netdev,
+					    vsi->tc_cfg.tc_info[i].netdev_tc,
+					    vsi->tc_cfg.tc_info[i].qcount_tx,
+					    vsi->tc_cfg.tc_info[i].qoffset);
+	/* setup TC queue map for CHNL TCs */
+	ice_for_each_chnl_tc(i) {
+		if (!(vsi->all_enatc & BIT(i)))
+			break;
+		if (!vsi->mqprio_qopt.qopt.count[i])
+			break;
+		netdev_set_tc_queue(netdev, i,
+				    vsi->mqprio_qopt.qopt.count[i],
+				    vsi->mqprio_qopt.qopt.offset[i]);
+	}
+
+	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+		return;
+
+	for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
+		u8 ets_tc = dcbcfg->etscfg.prio_table[i];
+
+		/* Get the mapped netdev TC# for the UP */
+		netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc;
+		netdev_set_prio_tc_map(netdev, i, netdev_tc);
+	}
+}
+
+/**
+ * ice_vsi_setup_q_map_mqprio - Prepares mqprio based tc_config
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @ena_tc: number of traffic classes to enable
+ *
+ * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+ */
+static int
+ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
+			   u8 ena_tc)
+{
+	u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap;
+	u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0];
+	int tc0_qcount = vsi->mqprio_qopt.qopt.count[0];
+	u16 new_txq, new_rxq;
+	u8 netdev_tc = 0;
+	int i;
+
+	vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1;
+
+	pow = order_base_2(tc0_qcount);
+	qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
+		ICE_AQ_VSI_TC_Q_OFFSET_M) |
+		((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M);
+
+	ice_for_each_traffic_class(i) {
+		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
+			/* TC is not enabled */
+			vsi->tc_cfg.tc_info[i].qoffset = 0;
+			vsi->tc_cfg.tc_info[i].qcount_rx = 1;
+			vsi->tc_cfg.tc_info[i].qcount_tx = 1;
+			vsi->tc_cfg.tc_info[i].netdev_tc = 0;
+			ctxt->info.tc_mapping[i] = 0;
+			continue;
+		}
+
+		offset = vsi->mqprio_qopt.qopt.offset[i];
+		qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+		qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+		vsi->tc_cfg.tc_info[i].qoffset = offset;
+		vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
+		vsi->tc_cfg.tc_info[i].qcount_tx = qcount_tx;
+		vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
+	}
+
+	if (vsi->all_numtc && vsi->all_numtc != vsi->tc_cfg.numtc) {
+		ice_for_each_chnl_tc(i) {
+			if (!(vsi->all_enatc & BIT(i)))
+				continue;
+			offset = vsi->mqprio_qopt.qopt.offset[i];
+			qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+			qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+		}
+	}
+
+	new_txq = offset + qcount_tx;
+	if (new_txq > vsi->alloc_txq) {
+		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+			new_txq, vsi->alloc_txq);
+		return -EINVAL;
+	}
+
+	new_rxq = offset + qcount_rx;
+	if (new_rxq > vsi->alloc_rxq) {
+		dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+			new_rxq, vsi->alloc_rxq);
+		return -EINVAL;
+	}
+
+	/* Set actual Tx/Rx queue pairs */
+	vsi->num_txq = new_txq;
+	vsi->num_rxq = new_rxq;
+
+	/* Setup queue TC[0].qmap for given VSI context */
+	ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+	ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
+	ctxt->info.q_mapping[1] = cpu_to_le16(tc0_qcount);
+
+	/* Find queue count available for channel VSIs and starting offset
+	 * for channel VSIs
+	 */
+	if (tc0_qcount && tc0_qcount < vsi->num_rxq) {
+		vsi->cnt_q_avail = vsi->num_rxq - tc0_qcount;
+		vsi->next_base_q = tc0_qcount;
+	}
+	dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_txq = %d\n",  vsi->num_txq);
+	dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n",  vsi->num_rxq);
+	dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n",
+		vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc);
+
+	return 0;
+}
+
+/**
+ * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map
+ * @vsi: VSI to be configured
+ * @ena_tc: TC bitmap
+ *
+ * VSI queues expected to be quiesced before calling this function
+ */
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_pf *pf = vsi->back;
+	struct ice_tc_cfg old_tc_cfg;
+	struct ice_vsi_ctx *ctx;
+	struct device *dev;
+	int i, ret = 0;
+	u8 num_tc = 0;
+
+	dev = ice_pf_to_dev(pf);
+	if (vsi->tc_cfg.ena_tc == ena_tc &&
+	    vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
+		return 0;
+
+	ice_for_each_traffic_class(i) {
+		/* build bitmap of enabled TCs */
+		if (ena_tc & BIT(i))
+			num_tc++;
+		/* populate max_txqs per TC */
+		max_txqs[i] = vsi->alloc_txq;
+		/* Update max_txqs if it is CHNL VSI, because alloc_t[r]xq are
+		 * zero for CHNL VSI, hence use num_txq instead as max_txqs
+		 */
+		if (vsi->type == ICE_VSI_CHNL &&
+		    test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+			max_txqs[i] = vsi->num_txq;
+	}
+
+	memcpy(&old_tc_cfg, &vsi->tc_cfg, sizeof(old_tc_cfg));
+	vsi->tc_cfg.ena_tc = ena_tc;
+	vsi->tc_cfg.numtc = num_tc;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->vf_num = 0;
+	ctx->info = vsi->info;
+
+	if (vsi->type == ICE_VSI_PF &&
+	    test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+		ret = ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
+	else
+		ret = ice_vsi_setup_q_map(vsi, ctx);
+
+	if (ret) {
+		memcpy(&vsi->tc_cfg, &old_tc_cfg, sizeof(vsi->tc_cfg));
+		goto out;
+	}
+
+	/* must to indicate which section of VSI context are being modified */
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
+	ret = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
+	if (ret) {
+		dev_info(dev, "Failed VSI Update\n");
+		goto out;
+	}
+
+	if (vsi->type == ICE_VSI_PF &&
+	    test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+		ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs);
+	else
+		ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx,
+				      vsi->tc_cfg.ena_tc, max_txqs);
+
+	if (ret) {
+		dev_err(dev, "VSI %d failed TC config, error %d\n",
+			vsi->vsi_num, ret);
+		goto out;
+	}
+	ice_vsi_update_q_map(vsi, ctx);
+	vsi->info.valid_sections = 0;
+
+	ice_vsi_cfg_netdev_tc(vsi, ena_tc);
+out:
+	kfree(ctx);
+	return ret;
+}
+
+/**
+ * ice_update_ring_stats - Update ring statistics
+ * @stats: stats to be updated
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ *
+ * This function assumes that caller has acquired a u64_stats_sync lock.
+ */
+static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes)
+{
+	stats->bytes += bytes;
+	stats->pkts += pkts;
+}
+
+/**
+ * ice_update_tx_ring_stats - Update Tx ring specific counters
+ * @tx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&tx_ring->ring_stats->syncp);
+	ice_update_ring_stats(&tx_ring->ring_stats->stats, pkts, bytes);
+	u64_stats_update_end(&tx_ring->ring_stats->syncp);
+}
+
+/**
+ * ice_update_rx_ring_stats - Update Rx ring specific counters
+ * @rx_ring: ring to update
+ * @pkts: number of processed packets
+ * @bytes: number of processed bytes
+ */
+void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes)
+{
+	u64_stats_update_begin(&rx_ring->ring_stats->syncp);
+	ice_update_ring_stats(&rx_ring->ring_stats->stats, pkts, bytes);
+	u64_stats_update_end(&rx_ring->ring_stats->syncp);
+}
+
+/**
+ * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
+ * @pi: port info of the switch with default VSI
+ *
+ * Return true if the there is a single VSI in default forwarding VSI list
+ */
+bool ice_is_dflt_vsi_in_use(struct ice_port_info *pi)
+{
+	bool exists = false;
+
+	ice_check_if_dflt_vsi(pi, 0, &exists);
+	return exists;
+}
+
+/**
+ * ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI
+ * @vsi: VSI to compare against default forwarding VSI
+ *
+ * If this VSI passed in is the default forwarding VSI then return true, else
+ * return false
+ */
+bool ice_is_vsi_dflt_vsi(struct ice_vsi *vsi)
+{
+	return ice_check_if_dflt_vsi(vsi->port_info, vsi->idx, NULL);
+}
+
+/**
+ * ice_set_dflt_vsi - set the default forwarding VSI
+ * @vsi: VSI getting set as the default forwarding VSI on the switch
+ *
+ * If the VSI passed in is already the default VSI and it's enabled just return
+ * success.
+ *
+ * Otherwise try to set the VSI passed in as the switch's default VSI and
+ * return the result.
+ */
+int ice_set_dflt_vsi(struct ice_vsi *vsi)
+{
+	struct device *dev;
+	int status;
+
+	if (!vsi)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	if (ice_lag_is_switchdev_running(vsi->back)) {
+		dev_dbg(dev, "VSI %d passed is a part of LAG containing interfaces in switchdev mode, nothing to do\n",
+			vsi->vsi_num);
+		return 0;
+	}
+
+	/* the VSI passed in is already the default VSI */
+	if (ice_is_vsi_dflt_vsi(vsi)) {
+		dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n",
+			vsi->vsi_num);
+		return 0;
+	}
+
+	status = ice_cfg_dflt_vsi(vsi->port_info, vsi->idx, true, ICE_FLTR_RX);
+	if (status) {
+		dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n",
+			vsi->vsi_num, status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_clear_dflt_vsi - clear the default forwarding VSI
+ * @vsi: VSI to remove from filter list
+ *
+ * If the switch has no default VSI or it's not enabled then return error.
+ *
+ * Otherwise try to clear the default VSI and return the result.
+ */
+int ice_clear_dflt_vsi(struct ice_vsi *vsi)
+{
+	struct device *dev;
+	int status;
+
+	if (!vsi)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	/* there is no default VSI configured */
+	if (!ice_is_dflt_vsi_in_use(vsi->port_info))
+		return -ENODEV;
+
+	status = ice_cfg_dflt_vsi(vsi->port_info, vsi->idx, false,
+				  ICE_FLTR_RX);
+	if (status) {
+		dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n",
+			vsi->vsi_num, status);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_link_speed_mbps - get link speed in Mbps
+ * @vsi: the VSI whose link speed is being queried
+ *
+ * Return current VSI link speed and 0 if the speed is unknown.
+ */
+int ice_get_link_speed_mbps(struct ice_vsi *vsi)
+{
+	unsigned int link_speed;
+
+	link_speed = vsi->port_info->phy.link_info.link_speed;
+
+	return (int)ice_get_link_speed(fls(link_speed) - 1);
+}
+
+/**
+ * ice_get_link_speed_kbps - get link speed in Kbps
+ * @vsi: the VSI whose link speed is being queried
+ *
+ * Return current VSI link speed and 0 if the speed is unknown.
+ */
+int ice_get_link_speed_kbps(struct ice_vsi *vsi)
+{
+	int speed_mbps;
+
+	speed_mbps = ice_get_link_speed_mbps(vsi);
+
+	return speed_mbps * 1000;
+}
+
+/**
+ * ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate
+ * @vsi: VSI to be configured
+ * @min_tx_rate: min Tx rate in Kbps to be configured as BW limit
+ *
+ * If the min_tx_rate is specified as 0 that means to clear the minimum BW limit
+ * profile, otherwise a non-zero value will force a minimum BW limit for the VSI
+ * on TC 0.
+ */
+int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int status;
+	int speed;
+
+	dev = ice_pf_to_dev(pf);
+	if (!vsi->port_info) {
+		dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
+			vsi->idx, vsi->type);
+		return -EINVAL;
+	}
+
+	speed = ice_get_link_speed_kbps(vsi);
+	if (min_tx_rate > (u64)speed) {
+		dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
+			min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
+			speed);
+		return -EINVAL;
+	}
+
+	/* Configure min BW for VSI limit */
+	if (min_tx_rate) {
+		status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
+						   ICE_MIN_BW, min_tx_rate);
+		if (status) {
+			dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n",
+				min_tx_rate, ice_vsi_type_str(vsi->type),
+				vsi->idx);
+			return status;
+		}
+
+		dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n",
+			min_tx_rate, ice_vsi_type_str(vsi->type));
+	} else {
+		status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
+							vsi->idx, 0,
+							ICE_MIN_BW);
+		if (status) {
+			dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n",
+				ice_vsi_type_str(vsi->type), vsi->idx);
+			return status;
+		}
+
+		dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n",
+			ice_vsi_type_str(vsi->type), vsi->idx);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @max_tx_rate: max Tx rate in Kbps to be configured as BW limit
+ *
+ * If the max_tx_rate is specified as 0 that means to clear the maximum BW limit
+ * profile, otherwise a non-zero value will force a maximum BW limit for the VSI
+ * on TC 0.
+ */
+int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int status;
+	int speed;
+
+	dev = ice_pf_to_dev(pf);
+	if (!vsi->port_info) {
+		dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n",
+			vsi->idx, vsi->type);
+		return -EINVAL;
+	}
+
+	speed = ice_get_link_speed_kbps(vsi);
+	if (max_tx_rate > (u64)speed) {
+		dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n",
+			max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx,
+			speed);
+		return -EINVAL;
+	}
+
+	/* Configure max BW for VSI limit */
+	if (max_tx_rate) {
+		status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0,
+						   ICE_MAX_BW, max_tx_rate);
+		if (status) {
+			dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n",
+				max_tx_rate, ice_vsi_type_str(vsi->type),
+				vsi->idx);
+			return status;
+		}
+
+		dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n",
+			max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx);
+	} else {
+		status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info,
+							vsi->idx, 0,
+							ICE_MAX_BW);
+		if (status) {
+			dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n",
+				ice_vsi_type_str(vsi->type), vsi->idx);
+			return status;
+		}
+
+		dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n",
+			ice_vsi_type_str(vsi->type), vsi->idx);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_set_link - turn on/off physical link
+ * @vsi: VSI to modify physical link on
+ * @ena: turn on/off physical link
+ */
+int ice_set_link(struct ice_vsi *vsi, bool ena)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_port_info *pi = vsi->port_info;
+	struct ice_hw *hw = pi->hw;
+	int status;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	status = ice_aq_set_link_restart_an(pi, ena, NULL);
+
+	/* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE.
+	 * this is not a fatal error, so print a warning message and return
+	 * a success code. Return an error if FW returns an error code other
+	 * than ICE_AQ_RC_EMODE
+	 */
+	if (status == -EIO) {
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
+			dev_dbg(dev, "can't set link to %s, err %d aq_err %s. not fatal, continuing\n",
+				(ena ? "ON" : "OFF"), status,
+				ice_aq_str(hw->adminq.sq_last_status));
+	} else if (status) {
+		dev_err(dev, "can't set link to %s, err %d aq_err %s\n",
+			(ena ? "ON" : "OFF"), status,
+			ice_aq_str(hw->adminq.sq_last_status));
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_add_vlan_zero - add VLAN 0 filter(s) for this VSI
+ * @vsi: VSI used to add VLAN filters
+ *
+ * In Single VLAN Mode (SVM), single VLAN filters via ICE_SW_LKUP_VLAN are based
+ * on the inner VLAN ID, so the VLAN TPID (i.e. 0x8100 or 0x888a8) doesn't
+ * matter. In Double VLAN Mode (DVM), outer/single VLAN filters via
+ * ICE_SW_LKUP_VLAN are based on the outer/single VLAN ID + VLAN TPID.
+ *
+ * For both modes add a VLAN 0 + no VLAN TPID filter to handle untagged traffic
+ * when VLAN pruning is enabled. Also, this handles VLAN 0 priority tagged
+ * traffic in SVM, since the VLAN TPID isn't part of filtering.
+ *
+ * If DVM is enabled then an explicit VLAN 0 + VLAN TPID filter needs to be
+ * added to allow VLAN 0 priority tagged traffic in DVM, since the VLAN TPID is
+ * part of filtering.
+ */
+int ice_vsi_add_vlan_zero(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+	struct ice_vlan vlan;
+	int err;
+
+	vlan = ICE_VLAN(0, 0, 0);
+	err = vlan_ops->add_vlan(vsi, &vlan);
+	if (err && err != -EEXIST)
+		return err;
+
+	/* in SVM both VLAN 0 filters are identical */
+	if (!ice_is_dvm_ena(&vsi->back->hw))
+		return 0;
+
+	vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
+	err = vlan_ops->add_vlan(vsi, &vlan);
+	if (err && err != -EEXIST)
+		return err;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_del_vlan_zero - delete VLAN 0 filter(s) for this VSI
+ * @vsi: VSI used to add VLAN filters
+ *
+ * Delete the VLAN 0 filters in the same manner that they were added in
+ * ice_vsi_add_vlan_zero.
+ */
+int ice_vsi_del_vlan_zero(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+	struct ice_vlan vlan;
+	int err;
+
+	vlan = ICE_VLAN(0, 0, 0);
+	err = vlan_ops->del_vlan(vsi, &vlan);
+	if (err && err != -EEXIST)
+		return err;
+
+	/* in SVM both VLAN 0 filters are identical */
+	if (!ice_is_dvm_ena(&vsi->back->hw))
+		return 0;
+
+	vlan = ICE_VLAN(ETH_P_8021Q, 0, 0);
+	err = vlan_ops->del_vlan(vsi, &vlan);
+	if (err && err != -EEXIST)
+		return err;
+
+	/* when deleting the last VLAN filter, make sure to disable the VLAN
+	 * promisc mode so the filter isn't left by accident
+	 */
+	return ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+				    ICE_MCAST_VLAN_PROMISC_BITS, 0);
+}
+
+/**
+ * ice_vsi_num_zero_vlans - get number of VLAN 0 filters based on VLAN mode
+ * @vsi: VSI used to get the VLAN mode
+ *
+ * If DVM is enabled then 2 VLAN 0 filters are added, else if SVM is enabled
+ * then 1 VLAN 0 filter is added. See ice_vsi_add_vlan_zero for more details.
+ */
+static u16 ice_vsi_num_zero_vlans(struct ice_vsi *vsi)
+{
+#define ICE_DVM_NUM_ZERO_VLAN_FLTRS	2
+#define ICE_SVM_NUM_ZERO_VLAN_FLTRS	1
+	/* no VLAN 0 filter is created when a port VLAN is active */
+	if (vsi->type == ICE_VSI_VF) {
+		if (WARN_ON(!vsi->vf))
+			return 0;
+
+		if (ice_vf_is_port_vlan_ena(vsi->vf))
+			return 0;
+	}
+
+	if (ice_is_dvm_ena(&vsi->back->hw))
+		return ICE_DVM_NUM_ZERO_VLAN_FLTRS;
+	else
+		return ICE_SVM_NUM_ZERO_VLAN_FLTRS;
+}
+
+/**
+ * ice_vsi_has_non_zero_vlans - check if VSI has any non-zero VLANs
+ * @vsi: VSI used to determine if any non-zero VLANs have been added
+ */
+bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi)
+{
+	return (vsi->num_vlan > ice_vsi_num_zero_vlans(vsi));
+}
+
+/**
+ * ice_vsi_num_non_zero_vlans - get the number of non-zero VLANs for this VSI
+ * @vsi: VSI used to get the number of non-zero VLANs added
+ */
+u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi)
+{
+	return (vsi->num_vlan - ice_vsi_num_zero_vlans(vsi));
+}
+
+/**
+ * ice_is_feature_supported
+ * @pf: pointer to the struct ice_pf instance
+ * @f: feature enum to be checked
+ *
+ * returns true if feature is supported, false otherwise
+ */
+bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f)
+{
+	if (f < 0 || f >= ICE_F_MAX)
+		return false;
+
+	return test_bit(f, pf->features);
+}
+
+/**
+ * ice_set_feature_support
+ * @pf: pointer to the struct ice_pf instance
+ * @f: feature enum to set
+ */
+void ice_set_feature_support(struct ice_pf *pf, enum ice_feature f)
+{
+	if (f < 0 || f >= ICE_F_MAX)
+		return;
+
+	set_bit(f, pf->features);
+}
+
+/**
+ * ice_clear_feature_support
+ * @pf: pointer to the struct ice_pf instance
+ * @f: feature enum to clear
+ */
+void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f)
+{
+	if (f < 0 || f >= ICE_F_MAX)
+		return;
+
+	clear_bit(f, pf->features);
+}
+
+/**
+ * ice_init_feature_support
+ * @pf: pointer to the struct ice_pf instance
+ *
+ * called during init to setup supported feature
+ */
+void ice_init_feature_support(struct ice_pf *pf)
+{
+	switch (pf->hw.device_id) {
+	case ICE_DEV_ID_E810C_BACKPLANE:
+	case ICE_DEV_ID_E810C_QSFP:
+	case ICE_DEV_ID_E810C_SFP:
+		ice_set_feature_support(pf, ICE_F_DSCP);
+		ice_set_feature_support(pf, ICE_F_PTP_EXTTS);
+		if (ice_is_e810t(&pf->hw)) {
+			ice_set_feature_support(pf, ICE_F_SMA_CTRL);
+			if (ice_gnss_is_gps_present(&pf->hw))
+				ice_set_feature_support(pf, ICE_F_GNSS);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_vsi_update_security - update security block in VSI
+ * @vsi: pointer to VSI structure
+ * @fill: function pointer to fill ctx
+ */
+int
+ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *))
+{
+	struct ice_vsi_ctx ctx = { 0 };
+
+	ctx.info = vsi->info;
+	ctx.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+	fill(&ctx);
+
+	if (ice_update_vsi(&vsi->back->hw, vsi->idx, &ctx, NULL))
+		return -ENODEV;
+
+	vsi->info = ctx.info;
+	return 0;
+}
+
+/**
+ * ice_vsi_ctx_set_antispoof - set antispoof function in VSI ctx
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx)
+{
+	ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+			       (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+}
+
+/**
+ * ice_vsi_ctx_clear_antispoof - clear antispoof function in VSI ctx
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx)
+{
+	ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF &
+			       ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+}
+
+/**
+ * ice_vsi_ctx_set_allow_override - allow destination override on VSI
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx)
+{
+	ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+}
+
+/**
+ * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI
+ * @ctx: pointer to VSI ctx structure
+ */
+void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx)
+{
+	ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD;
+}
+
+/**
+ * ice_vsi_update_local_lb - update sw block in VSI with local loopback bit
+ * @vsi: pointer to VSI structure
+ * @set: set or unset the bit
+ */
+int
+ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set)
+{
+	struct ice_vsi_ctx ctx = {
+		.info	= vsi->info,
+	};
+
+	ctx.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+	if (set)
+		ctx.info.sw_flags |= ICE_AQ_VSI_SW_FLAG_LOCAL_LB;
+	else
+		ctx.info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_LOCAL_LB;
+
+	if (ice_update_vsi(&vsi->back->hw, vsi->idx, &ctx, NULL))
+		return -ENODEV;
+
+	vsi->info = ctx.info;
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
new file mode 100644
index 0000000000..f24f5d1e6f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_LIB_H_
+#define _ICE_LIB_H_
+
+#include "ice.h"
+#include "ice_vlan.h"
+
+/* Flags used for VSI configuration and rebuild */
+#define ICE_VSI_FLAG_INIT	BIT(0)
+#define ICE_VSI_FLAG_NO_INIT	0
+
+/**
+ * struct ice_vsi_cfg_params - VSI configuration parameters
+ * @pi: pointer to the port_info instance for the VSI
+ * @ch: pointer to the channel structure for the VSI, may be NULL
+ * @vf: pointer to the VF associated with this VSI, may be NULL
+ * @type: the type of VSI to configure
+ * @flags: VSI flags used for rebuild and configuration
+ *
+ * Parameter structure used when configuring a new VSI.
+ */
+struct ice_vsi_cfg_params {
+	struct ice_port_info *pi;
+	struct ice_channel *ch;
+	struct ice_vf *vf;
+	enum ice_vsi_type type;
+	u32 flags;
+};
+
+/**
+ * ice_vsi_to_params - Get parameters for an existing VSI
+ * @vsi: the VSI to get parameters for
+ *
+ * Fill a parameter structure for reconfiguring a VSI with its current
+ * parameters, such as during a rebuild operation.
+ */
+static inline struct ice_vsi_cfg_params ice_vsi_to_params(struct ice_vsi *vsi)
+{
+	struct ice_vsi_cfg_params params = {};
+
+	params.pi = vsi->port_info;
+	params.ch = vsi->ch;
+	params.vf = vsi->vf;
+	params.type = vsi->type;
+
+	return params;
+}
+
+const char *ice_vsi_type_str(enum ice_vsi_type vsi_type);
+
+bool ice_pf_state_is_nominal(struct ice_pf *pf);
+
+void ice_update_eth_stats(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx);
+
+int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx);
+
+int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
+
+void ice_vsi_cfg_msix(struct ice_vsi *vsi);
+
+int ice_vsi_start_all_rx_rings(struct ice_vsi *vsi);
+
+int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi);
+
+int
+ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
+			  u16 rel_vmvf_num);
+
+int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi);
+
+int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
+
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+
+int ice_set_link(struct ice_vsi *vsi, bool ena);
+
+void ice_vsi_delete(struct ice_vsi *vsi);
+
+int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
+
+int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi);
+
+void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
+
+struct ice_vsi *
+ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params);
+
+int ice_vsi_release(struct ice_vsi *vsi);
+
+void ice_vsi_close(struct ice_vsi *vsi);
+
+int ice_ena_vsi(struct ice_vsi *vsi, bool locked);
+
+void ice_vsi_decfg(struct ice_vsi *vsi);
+void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
+
+int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags);
+int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params);
+
+bool ice_is_reset_in_progress(unsigned long *state);
+int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout);
+
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
+			bool ena_ts);
+
+void ice_vsi_dis_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_irq(struct ice_vsi *vsi);
+
+void ice_vsi_free_rx_rings(struct ice_vsi *vsi);
+
+void ice_vsi_free_tx_rings(struct ice_vsi *vsi);
+
+void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
+
+void ice_vsi_cfg_crc_strip(struct ice_vsi *vsi, bool disable);
+
+void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes);
+
+void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes);
+
+void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl);
+void ice_write_itr(struct ice_ring_container *rc, u16 itr);
+void ice_set_q_vector_intrl(struct ice_q_vector *q_vector);
+
+int ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
+
+bool ice_is_safe_mode(struct ice_pf *pf);
+bool ice_is_rdma_ena(struct ice_pf *pf);
+bool ice_is_dflt_vsi_in_use(struct ice_port_info *pi);
+bool ice_is_vsi_dflt_vsi(struct ice_vsi *vsi);
+int ice_set_dflt_vsi(struct ice_vsi *vsi);
+int ice_clear_dflt_vsi(struct ice_vsi *vsi);
+int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate);
+int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate);
+int ice_get_link_speed_kbps(struct ice_vsi *vsi);
+int ice_get_link_speed_mbps(struct ice_vsi *vsi);
+int
+ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *));
+
+void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx);
+
+void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx);
+
+void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx);
+
+void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx);
+int ice_vsi_update_local_lb(struct ice_vsi *vsi, bool set);
+int ice_vsi_add_vlan_zero(struct ice_vsi *vsi);
+int ice_vsi_del_vlan_zero(struct ice_vsi *vsi);
+bool ice_vsi_has_non_zero_vlans(struct ice_vsi *vsi);
+u16 ice_vsi_num_non_zero_vlans(struct ice_vsi *vsi);
+bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f);
+void ice_set_feature_support(struct ice_pf *pf, enum ice_feature f);
+void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f);
+void ice_init_feature_support(struct ice_pf *pf);
+bool ice_vsi_is_rx_queue_active(struct ice_vsi *vsi);
+#endif /* !_ICE_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
new file mode 100644
index 0000000000..d8d2aa4c02
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -0,0 +1,9301 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* Intel(R) Ethernet Connection E800 Series Linux Driver */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <generated/utsrelease.h>
+#include <linux/crash_dump.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_dcb_nl.h"
+#include "ice_devlink.h"
+/* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
+ * ice tracepoint functions. This must be done exactly once across the
+ * ice driver.
+ */
+#define CREATE_TRACE_POINTS
+#include "ice_trace.h"
+#include "ice_eswitch.h"
+#include "ice_tc_lib.h"
+#include "ice_vsi_vlan_ops.h"
+#include <net/xdp_sock_drv.h>
+
+#define DRV_SUMMARY	"Intel(R) Ethernet Connection E800 Series Linux Driver"
+static const char ice_driver_string[] = DRV_SUMMARY;
+static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
+
+/* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
+#define ICE_DDP_PKG_PATH	"intel/ice/ddp/"
+#define ICE_DDP_PKG_FILE	ICE_DDP_PKG_PATH "ice.pkg"
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+MODULE_FIRMWARE(ICE_DDP_PKG_FILE);
+
+static int debug = -1;
+module_param(debug, int, 0644);
+#ifndef CONFIG_DYNAMIC_DEBUG
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
+#else
+MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
+#endif /* !CONFIG_DYNAMIC_DEBUG */
+
+DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key);
+EXPORT_SYMBOL(ice_xdp_locking_key);
+
+/**
+ * ice_hw_to_dev - Get device pointer from the hardware structure
+ * @hw: pointer to the device HW structure
+ *
+ * Used to access the device pointer from compilation units which can't easily
+ * include the definition of struct ice_pf without leading to circular header
+ * dependencies.
+ */
+struct device *ice_hw_to_dev(struct ice_hw *hw)
+{
+	struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
+
+	return &pf->pdev->dev;
+}
+
+static struct workqueue_struct *ice_wq;
+struct workqueue_struct *ice_lag_wq;
+static const struct net_device_ops ice_netdev_safe_mode_ops;
+static const struct net_device_ops ice_netdev_ops;
+
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type);
+
+static void ice_vsi_release_all(struct ice_pf *pf);
+
+static int ice_rebuild_channels(struct ice_pf *pf);
+static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr);
+
+static int
+ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
+		     void *cb_priv, enum tc_setup_type type, void *type_data,
+		     void *data,
+		     void (*cleanup)(struct flow_block_cb *block_cb));
+
+bool netif_is_ice(const struct net_device *dev)
+{
+	return dev && (dev->netdev_ops == &ice_netdev_ops);
+}
+
+/**
+ * ice_get_tx_pending - returns number of Tx descriptors not processed
+ * @ring: the ring of descriptors
+ */
+static u16 ice_get_tx_pending(struct ice_tx_ring *ring)
+{
+	u16 head, tail;
+
+	head = ring->next_to_clean;
+	tail = ring->next_to_use;
+
+	if (head != tail)
+		return (head < tail) ?
+			tail - head : (tail + ring->count - head);
+	return 0;
+}
+
+/**
+ * ice_check_for_hang_subtask - check for and recover hung queues
+ * @pf: pointer to PF struct
+ */
+static void ice_check_for_hang_subtask(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = NULL;
+	struct ice_hw *hw;
+	unsigned int i;
+	int packets;
+	u32 v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
+			vsi = pf->vsi[v];
+			break;
+		}
+
+	if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
+		return;
+
+	if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
+		return;
+
+	hw = &vsi->back->hw;
+
+	ice_for_each_txq(vsi, i) {
+		struct ice_tx_ring *tx_ring = vsi->tx_rings[i];
+		struct ice_ring_stats *ring_stats;
+
+		if (!tx_ring)
+			continue;
+		if (ice_ring_ch_enabled(tx_ring))
+			continue;
+
+		ring_stats = tx_ring->ring_stats;
+		if (!ring_stats)
+			continue;
+
+		if (tx_ring->desc) {
+			/* If packet counter has not changed the queue is
+			 * likely stalled, so force an interrupt for this
+			 * queue.
+			 *
+			 * prev_pkt would be negative if there was no
+			 * pending work.
+			 */
+			packets = ring_stats->stats.pkts & INT_MAX;
+			if (ring_stats->tx_stats.prev_pkt == packets) {
+				/* Trigger sw interrupt to revive the queue */
+				ice_trigger_sw_intr(hw, tx_ring->q_vector);
+				continue;
+			}
+
+			/* Memory barrier between read of packet count and call
+			 * to ice_get_tx_pending()
+			 */
+			smp_rmb();
+			ring_stats->tx_stats.prev_pkt =
+			    ice_get_tx_pending(tx_ring) ? packets : -1;
+		}
+	}
+}
+
+/**
+ * ice_init_mac_fltr - Set initial MAC filters
+ * @pf: board private structure
+ *
+ * Set initial set of MAC filters for PF VSI; configure filters for permanent
+ * address and broadcast address. If an error is encountered, netdevice will be
+ * unregistered.
+ */
+static int ice_init_mac_fltr(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi;
+	u8 *perm_addr;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return -EINVAL;
+
+	perm_addr = vsi->port_info->mac.perm_addr;
+	return ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
+}
+
+/**
+ * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
+ * @netdev: the net device on which the sync is happening
+ * @addr: MAC address to sync
+ *
+ * This is a callback function which is called by the in kernel device sync
+ * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
+ * populates the tmp_sync_list, which is later used by ice_add_mac to add the
+ * MAC filters from the hardware.
+ */
+static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
+				     ICE_FWD_TO_VSI))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
+ * @netdev: the net device on which the unsync is happening
+ * @addr: MAC address to unsync
+ *
+ * This is a callback function which is called by the in kernel device unsync
+ * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
+ * populates the tmp_unsync_list, which is later used by ice_remove_mac to
+ * delete the MAC filters from the hardware.
+ */
+static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	/* Under some circumstances, we might receive a request to delete our
+	 * own device address from our uc list. Because we store the device
+	 * address in the VSI's MAC filter list, we need to ignore such
+	 * requests and not delete our device address from this list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
+	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
+				     ICE_FWD_TO_VSI))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_vsi_fltr_changed - check if filter state changed
+ * @vsi: VSI to be checked
+ *
+ * returns true if filter state has changed, false otherwise.
+ */
+static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
+{
+	return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
+	       test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+}
+
+/**
+ * ice_set_promisc - Enable promiscuous mode for a given PF
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ *
+ */
+static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m)
+{
+	int status;
+
+	if (vsi->type != ICE_VSI_PF)
+		return 0;
+
+	if (ice_vsi_has_non_zero_vlans(vsi)) {
+		promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
+		status = ice_fltr_set_vlan_vsi_promisc(&vsi->back->hw, vsi,
+						       promisc_m);
+	} else {
+		status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+						  promisc_m, 0);
+	}
+	if (status && status != -EEXIST)
+		return status;
+
+	netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n",
+		   vsi->vsi_num, promisc_m);
+	return 0;
+}
+
+/**
+ * ice_clear_promisc - Disable promiscuous mode for a given PF
+ * @vsi: the VSI being configured
+ * @promisc_m: mask of promiscuous config bits
+ *
+ */
+static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m)
+{
+	int status;
+
+	if (vsi->type != ICE_VSI_PF)
+		return 0;
+
+	if (ice_vsi_has_non_zero_vlans(vsi)) {
+		promisc_m |= (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX);
+		status = ice_fltr_clear_vlan_vsi_promisc(&vsi->back->hw, vsi,
+							 promisc_m);
+	} else {
+		status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+						    promisc_m, 0);
+	}
+
+	netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n",
+		   vsi->vsi_num, promisc_m);
+	return status;
+}
+
+/**
+ * ice_vsi_sync_fltr - Update the VSI filter list to the HW
+ * @vsi: ptr to the VSI
+ *
+ * Push any outstanding VSI filter changes through the AdminQ.
+ */
+static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct net_device *netdev = vsi->netdev;
+	bool promisc_forced_on = false;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 changed_flags = 0;
+	int err;
+
+	if (!vsi->netdev)
+		return -EINVAL;
+
+	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+		usleep_range(1000, 2000);
+
+	changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
+	vsi->current_netdev_flags = vsi->netdev->flags;
+
+	INIT_LIST_HEAD(&vsi->tmp_sync_list);
+	INIT_LIST_HEAD(&vsi->tmp_unsync_list);
+
+	if (ice_vsi_fltr_changed(vsi)) {
+		clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+		clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+
+		/* grab the netdev's addr_list_lock */
+		netif_addr_lock_bh(netdev);
+		__dev_uc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		__dev_mc_sync(netdev, ice_add_mac_to_sync_list,
+			      ice_add_mac_to_unsync_list);
+		/* our temp lists are populated. release lock */
+		netif_addr_unlock_bh(netdev);
+	}
+
+	/* Remove MAC addresses in the unsync list */
+	err = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
+	ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
+	if (err) {
+		netdev_err(netdev, "Failed to delete MAC filters\n");
+		/* if we failed because of alloc failures, just bail */
+		if (err == -ENOMEM)
+			goto out;
+	}
+
+	/* Add MAC addresses in the sync list */
+	err = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
+	ice_fltr_free_list(dev, &vsi->tmp_sync_list);
+	/* If filter is added successfully or already exists, do not go into
+	 * 'if' condition and report it as error. Instead continue processing
+	 * rest of the function.
+	 */
+	if (err && err != -EEXIST) {
+		netdev_err(netdev, "Failed to add MAC filters\n");
+		/* If there is no more space for new umac filters, VSI
+		 * should go into promiscuous mode. There should be some
+		 * space reserved for promiscuous filters.
+		 */
+		if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOSPC &&
+		    !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC,
+				      vsi->state)) {
+			promisc_forced_on = true;
+			netdev_warn(netdev, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
+				    vsi->vsi_num);
+		} else {
+			goto out;
+		}
+	}
+	err = 0;
+	/* check for changes in promiscuous modes */
+	if (changed_flags & IFF_ALLMULTI) {
+		if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+			err = ice_set_promisc(vsi, ICE_MCAST_PROMISC_BITS);
+			if (err) {
+				vsi->current_netdev_flags &= ~IFF_ALLMULTI;
+				goto out_promisc;
+			}
+		} else {
+			/* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
+			err = ice_clear_promisc(vsi, ICE_MCAST_PROMISC_BITS);
+			if (err) {
+				vsi->current_netdev_flags |= IFF_ALLMULTI;
+				goto out_promisc;
+			}
+		}
+	}
+
+	if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
+	    test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
+		clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
+		if (vsi->current_netdev_flags & IFF_PROMISC) {
+			/* Apply Rx filter rule to get traffic from wire */
+			if (!ice_is_dflt_vsi_in_use(vsi->port_info)) {
+				err = ice_set_dflt_vsi(vsi);
+				if (err && err != -EEXIST) {
+					netdev_err(netdev, "Error %d setting default VSI %i Rx rule\n",
+						   err, vsi->vsi_num);
+					vsi->current_netdev_flags &=
+						~IFF_PROMISC;
+					goto out_promisc;
+				}
+				err = 0;
+				vlan_ops->dis_rx_filtering(vsi);
+
+				/* promiscuous mode implies allmulticast so
+				 * that VSIs that are in promiscuous mode are
+				 * subscribed to multicast packets coming to
+				 * the port
+				 */
+				err = ice_set_promisc(vsi,
+						      ICE_MCAST_PROMISC_BITS);
+				if (err)
+					goto out_promisc;
+			}
+		} else {
+			/* Clear Rx filter to remove traffic from wire */
+			if (ice_is_vsi_dflt_vsi(vsi)) {
+				err = ice_clear_dflt_vsi(vsi);
+				if (err) {
+					netdev_err(netdev, "Error %d clearing default VSI %i Rx rule\n",
+						   err, vsi->vsi_num);
+					vsi->current_netdev_flags |=
+						IFF_PROMISC;
+					goto out_promisc;
+				}
+				if (vsi->netdev->features &
+				    NETIF_F_HW_VLAN_CTAG_FILTER)
+					vlan_ops->ena_rx_filtering(vsi);
+			}
+
+			/* disable allmulti here, but only if allmulti is not
+			 * still enabled for the netdev
+			 */
+			if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
+				err = ice_clear_promisc(vsi,
+							ICE_MCAST_PROMISC_BITS);
+				if (err) {
+					netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n",
+						   err, vsi->vsi_num);
+				}
+			}
+		}
+	}
+	goto exit;
+
+out_promisc:
+	set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
+	goto exit;
+out:
+	/* if something went wrong then set the changed flag so we try again */
+	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+exit:
+	clear_bit(ICE_CFG_BUSY, vsi->state);
+	return err;
+}
+
+/**
+ * ice_sync_fltr_subtask - Sync the VSI filter list with HW
+ * @pf: board private structure
+ */
+static void ice_sync_fltr_subtask(struct ice_pf *pf)
+{
+	int v;
+
+	if (!pf || !(test_bit(ICE_FLAG_FLTR_SYNC, pf->flags)))
+		return;
+
+	clear_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v] && ice_vsi_fltr_changed(pf->vsi[v]) &&
+		    ice_vsi_sync_fltr(pf->vsi[v])) {
+			/* come back and try again later */
+			set_bit(ICE_FLAG_FLTR_SYNC, pf->flags);
+			break;
+		}
+}
+
+/**
+ * ice_pf_dis_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ * @locked: is the rtnl_lock already held
+ */
+static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked)
+{
+	int node;
+	int v;
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			ice_dis_vsi(pf->vsi[v], locked);
+
+	for (node = 0; node < ICE_MAX_PF_AGG_NODES; node++)
+		pf->pf_agg_node[node].num_vsis = 0;
+
+	for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
+		pf->vf_agg_node[node].num_vsis = 0;
+}
+
+/**
+ * ice_clear_sw_switch_recipes - clear switch recipes
+ * @pf: board private structure
+ *
+ * Mark switch recipes as not created in sw structures. There are cases where
+ * rules (especially advanced rules) need to be restored, either re-read from
+ * hardware or added again. For example after the reset. 'recp_created' flag
+ * prevents from doing that and need to be cleared upfront.
+ */
+static void ice_clear_sw_switch_recipes(struct ice_pf *pf)
+{
+	struct ice_sw_recipe *recp;
+	u8 i;
+
+	recp = pf->hw.switch_info->recp_list;
+	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++)
+		recp[i].recp_created = false;
+}
+
+/**
+ * ice_prepare_for_reset - prep for reset
+ * @pf: board private structure
+ * @reset_type: reset type requested
+ *
+ * Inform or close all dependent features in prep for reset.
+ */
+static void
+ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vsi *vsi;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type);
+
+	/* already prepared for reset */
+	if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
+		return;
+
+	ice_unplug_aux_dev(pf);
+
+	/* Notify VFs of impending reset */
+	if (ice_check_sq_alive(hw, &hw->mailboxq))
+		ice_vc_notify_reset(pf);
+
+	/* Disable VFs until reset is completed */
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf)
+		ice_set_vf_state_dis(vf);
+	mutex_unlock(&pf->vfs.table_lock);
+
+	if (ice_is_eswitch_mode_switchdev(pf)) {
+		if (reset_type != ICE_RESET_PFR)
+			ice_clear_sw_switch_recipes(pf);
+	}
+
+	/* release ADQ specific HW and SW resources */
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		goto skip;
+
+	/* to be on safe side, reset orig_rss_size so that normal flow
+	 * of deciding rss_size can take precedence
+	 */
+	vsi->orig_rss_size = 0;
+
+	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+		if (reset_type == ICE_RESET_PFR) {
+			vsi->old_ena_tc = vsi->all_enatc;
+			vsi->old_numtc = vsi->all_numtc;
+		} else {
+			ice_remove_q_channels(vsi, true);
+
+			/* for other reset type, do not support channel rebuild
+			 * hence reset needed info
+			 */
+			vsi->old_ena_tc = 0;
+			vsi->all_enatc = 0;
+			vsi->old_numtc = 0;
+			vsi->all_numtc = 0;
+			vsi->req_txq = 0;
+			vsi->req_rxq = 0;
+			clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+			memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt));
+		}
+	}
+skip:
+
+	/* clear SW filtering DB */
+	ice_clear_hw_tbls(hw);
+	/* disable the VSIs and their queues that are not already DOWN */
+	ice_pf_dis_all_vsi(pf, false);
+
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_prepare_for_reset(pf);
+
+	if (ice_is_feature_supported(pf, ICE_F_GNSS))
+		ice_gnss_exit(pf);
+
+	if (hw->port_info)
+		ice_sched_clear_port(hw->port_info);
+
+	ice_shutdown_all_ctrlq(hw);
+
+	set_bit(ICE_PREPARED_FOR_RESET, pf->state);
+}
+
+/**
+ * ice_do_reset - Initiate one of many types of resets
+ * @pf: board private structure
+ * @reset_type: reset type requested before this function was called.
+ */
+static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+
+	dev_dbg(dev, "reset_type 0x%x requested\n", reset_type);
+
+	if (pf->lag && pf->lag->bonded && reset_type == ICE_RESET_PFR) {
+		dev_dbg(dev, "PFR on a bonded interface, promoting to CORER\n");
+		reset_type = ICE_RESET_CORER;
+	}
+
+	ice_prepare_for_reset(pf, reset_type);
+
+	/* trigger the reset */
+	if (ice_reset(hw, reset_type)) {
+		dev_err(dev, "reset %d failed\n", reset_type);
+		set_bit(ICE_RESET_FAILED, pf->state);
+		clear_bit(ICE_RESET_OICR_RECV, pf->state);
+		clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(ICE_PFR_REQ, pf->state);
+		clear_bit(ICE_CORER_REQ, pf->state);
+		clear_bit(ICE_GLOBR_REQ, pf->state);
+		wake_up(&pf->reset_wait_queue);
+		return;
+	}
+
+	/* PFR is a bit of a special case because it doesn't result in an OICR
+	 * interrupt. So for PFR, rebuild after the reset and clear the reset-
+	 * associated state bits.
+	 */
+	if (reset_type == ICE_RESET_PFR) {
+		pf->pfr_count++;
+		ice_rebuild(pf, reset_type);
+		clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+		clear_bit(ICE_PFR_REQ, pf->state);
+		wake_up(&pf->reset_wait_queue);
+		ice_reset_all_vfs(pf);
+	}
+}
+
+/**
+ * ice_reset_subtask - Set up for resetting the device and driver
+ * @pf: board private structure
+ */
+static void ice_reset_subtask(struct ice_pf *pf)
+{
+	enum ice_reset_req reset_type = ICE_RESET_INVAL;
+
+	/* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
+	 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
+	 * of reset is pending and sets bits in pf->state indicating the reset
+	 * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
+	 * prepare for pending reset if not already (for PF software-initiated
+	 * global resets the software should already be prepared for it as
+	 * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
+	 * by firmware or software on other PFs, that bit is not set so prepare
+	 * for the reset now), poll for reset done, rebuild and return.
+	 */
+	if (test_bit(ICE_RESET_OICR_RECV, pf->state)) {
+		/* Perform the largest reset requested */
+		if (test_and_clear_bit(ICE_CORER_RECV, pf->state))
+			reset_type = ICE_RESET_CORER;
+		if (test_and_clear_bit(ICE_GLOBR_RECV, pf->state))
+			reset_type = ICE_RESET_GLOBR;
+		if (test_and_clear_bit(ICE_EMPR_RECV, pf->state))
+			reset_type = ICE_RESET_EMPR;
+		/* return if no valid reset type requested */
+		if (reset_type == ICE_RESET_INVAL)
+			return;
+		ice_prepare_for_reset(pf, reset_type);
+
+		/* make sure we are ready to rebuild */
+		if (ice_check_reset(&pf->hw)) {
+			set_bit(ICE_RESET_FAILED, pf->state);
+		} else {
+			/* done with reset. start rebuild */
+			pf->hw.reset_ongoing = false;
+			ice_rebuild(pf, reset_type);
+			/* clear bit to resume normal operations, but
+			 * ICE_NEEDS_RESTART bit is set in case rebuild failed
+			 */
+			clear_bit(ICE_RESET_OICR_RECV, pf->state);
+			clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
+			clear_bit(ICE_PFR_REQ, pf->state);
+			clear_bit(ICE_CORER_REQ, pf->state);
+			clear_bit(ICE_GLOBR_REQ, pf->state);
+			wake_up(&pf->reset_wait_queue);
+			ice_reset_all_vfs(pf);
+		}
+
+		return;
+	}
+
+	/* No pending resets to finish processing. Check for new resets */
+	if (test_bit(ICE_PFR_REQ, pf->state)) {
+		reset_type = ICE_RESET_PFR;
+		if (pf->lag && pf->lag->bonded) {
+			dev_dbg(ice_pf_to_dev(pf), "PFR on a bonded interface, promoting to CORER\n");
+			reset_type = ICE_RESET_CORER;
+		}
+	}
+	if (test_bit(ICE_CORER_REQ, pf->state))
+		reset_type = ICE_RESET_CORER;
+	if (test_bit(ICE_GLOBR_REQ, pf->state))
+		reset_type = ICE_RESET_GLOBR;
+	/* If no valid reset type requested just return */
+	if (reset_type == ICE_RESET_INVAL)
+		return;
+
+	/* reset if not already down or busy */
+	if (!test_bit(ICE_DOWN, pf->state) &&
+	    !test_bit(ICE_CFG_BUSY, pf->state)) {
+		ice_do_reset(pf, reset_type);
+	}
+}
+
+/**
+ * ice_print_topo_conflict - print topology conflict message
+ * @vsi: the VSI whose topology status is being checked
+ */
+static void ice_print_topo_conflict(struct ice_vsi *vsi)
+{
+	switch (vsi->port_info->phy.link_info.topo_media_conflict) {
+	case ICE_AQ_LINK_TOPO_CONFLICT:
+	case ICE_AQ_LINK_MEDIA_CONFLICT:
+	case ICE_AQ_LINK_TOPO_UNREACH_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
+	case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
+		netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
+		break;
+	case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
+		if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags))
+			netdev_warn(vsi->netdev, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n");
+		else
+			netdev_err(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ice_print_link_msg - print link up or down message
+ * @vsi: the VSI whose link status is being queried
+ * @isup: boolean for if the link is now up or down
+ */
+void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
+{
+	struct ice_aqc_get_phy_caps_data *caps;
+	const char *an_advertised;
+	const char *fec_req;
+	const char *speed;
+	const char *fec;
+	const char *fc;
+	const char *an;
+	int status;
+
+	if (!vsi)
+		return;
+
+	if (vsi->current_isup == isup)
+		return;
+
+	vsi->current_isup = isup;
+
+	if (!isup) {
+		netdev_info(vsi->netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	switch (vsi->port_info->phy.link_info.link_speed) {
+	case ICE_AQ_LINK_SPEED_100GB:
+		speed = "100 G";
+		break;
+	case ICE_AQ_LINK_SPEED_50GB:
+		speed = "50 G";
+		break;
+	case ICE_AQ_LINK_SPEED_40GB:
+		speed = "40 G";
+		break;
+	case ICE_AQ_LINK_SPEED_25GB:
+		speed = "25 G";
+		break;
+	case ICE_AQ_LINK_SPEED_20GB:
+		speed = "20 G";
+		break;
+	case ICE_AQ_LINK_SPEED_10GB:
+		speed = "10 G";
+		break;
+	case ICE_AQ_LINK_SPEED_5GB:
+		speed = "5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_2500MB:
+		speed = "2.5 G";
+		break;
+	case ICE_AQ_LINK_SPEED_1000MB:
+		speed = "1 G";
+		break;
+	case ICE_AQ_LINK_SPEED_100MB:
+		speed = "100 M";
+		break;
+	default:
+		speed = "Unknown ";
+		break;
+	}
+
+	switch (vsi->port_info->fc.current_mode) {
+	case ICE_FC_FULL:
+		fc = "Rx/Tx";
+		break;
+	case ICE_FC_TX_PAUSE:
+		fc = "Tx";
+		break;
+	case ICE_FC_RX_PAUSE:
+		fc = "Rx";
+		break;
+	case ICE_FC_NONE:
+		fc = "None";
+		break;
+	default:
+		fc = "Unknown";
+		break;
+	}
+
+	/* Get FEC mode based on negotiated link info */
+	switch (vsi->port_info->phy.link_info.fec_info) {
+	case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+	case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+		fec = "RS-FEC";
+		break;
+	case ICE_AQ_LINK_25G_KR_FEC_EN:
+		fec = "FC-FEC/BASE-R";
+		break;
+	default:
+		fec = "NONE";
+		break;
+	}
+
+	/* check if autoneg completed, might be false due to not supported */
+	if (vsi->port_info->phy.link_info.an_info & ICE_AQ_AN_COMPLETED)
+		an = "True";
+	else
+		an = "False";
+
+	/* Get FEC mode requested based on PHY caps last SW configuration */
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps) {
+		fec_req = "Unknown";
+		an_advertised = "Unknown";
+		goto done;
+	}
+
+	status = ice_aq_get_phy_caps(vsi->port_info, false,
+				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
+	if (status)
+		netdev_info(vsi->netdev, "Get phy capability failed.\n");
+
+	an_advertised = ice_is_phy_caps_an_enabled(caps) ? "On" : "Off";
+
+	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+	    caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+		fec_req = "RS-FEC";
+	else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+		 caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+		fec_req = "FC-FEC/BASE-R";
+	else
+		fec_req = "NONE";
+
+	kfree(caps);
+
+done:
+	netdev_info(vsi->netdev, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
+		    speed, fec_req, fec, an_advertised, an, fc);
+	ice_print_topo_conflict(vsi);
+}
+
+/**
+ * ice_vsi_link_event - update the VSI's netdev
+ * @vsi: the VSI on which the link event occurred
+ * @link_up: whether or not the VSI needs to be set up or down
+ */
+static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
+{
+	if (!vsi)
+		return;
+
+	if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
+		return;
+
+	if (vsi->type == ICE_VSI_PF) {
+		if (link_up == netif_carrier_ok(vsi->netdev))
+			return;
+
+		if (link_up) {
+			netif_carrier_on(vsi->netdev);
+			netif_tx_wake_all_queues(vsi->netdev);
+		} else {
+			netif_carrier_off(vsi->netdev);
+			netif_tx_stop_all_queues(vsi->netdev);
+		}
+	}
+}
+
+/**
+ * ice_set_dflt_mib - send a default config MIB to the FW
+ * @pf: private PF struct
+ *
+ * This function sends a default configuration MIB to the FW.
+ *
+ * If this function errors out at any point, the driver is still able to
+ * function.  The main impact is that LFC may not operate as expected.
+ * Therefore an error state in this function should be treated with a DBG
+ * message and continue on with driver rebuild/reenable.
+ */
+static void ice_set_dflt_mib(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	u8 mib_type, *buf, *lldpmib = NULL;
+	u16 len, typelen, offset = 0;
+	struct ice_lldp_org_tlv *tlv;
+	struct ice_hw *hw = &pf->hw;
+	u32 ouisubtype;
+
+	mib_type = SET_LOCAL_MIB_TYPE_LOCAL_MIB;
+	lldpmib = kzalloc(ICE_LLDPDU_SIZE, GFP_KERNEL);
+	if (!lldpmib) {
+		dev_dbg(dev, "%s Failed to allocate MIB memory\n",
+			__func__);
+		return;
+	}
+
+	/* Add ETS CFG TLV */
+	tlv = (struct ice_lldp_org_tlv *)lldpmib;
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_ETS_TLV_LEN);
+	tlv->typelen = htons(typelen);
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	buf = tlv->tlvinfo;
+	buf[0] = 0;
+
+	/* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
+	 * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
+	 * Octets 13 - 20 are TSA values - leave as zeros
+	 */
+	buf[5] = 0x64;
+	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+	offset += len + 2;
+	tlv = (struct ice_lldp_org_tlv *)
+		((char *)tlv + sizeof(tlv->typelen) + len);
+
+	/* Add ETS REC TLV */
+	buf = tlv->tlvinfo;
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_ETS_REC);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* First octet of buf is reserved
+	 * Octets 1 - 4 map UP to TC - all UPs map to zero
+	 * Octets 5 - 12 are BW values - set TC 0 to 100%.
+	 * Octets 13 - 20 are TSA value - leave as zeros
+	 */
+	buf[5] = 0x64;
+	offset += len + 2;
+	tlv = (struct ice_lldp_org_tlv *)
+		((char *)tlv + sizeof(tlv->typelen) + len);
+
+	/* Add PFC CFG TLV */
+	typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) |
+		   ICE_IEEE_PFC_TLV_LEN);
+	tlv->typelen = htons(typelen);
+
+	ouisubtype = ((ICE_IEEE_8021QAZ_OUI << ICE_LLDP_TLV_OUI_S) |
+		      ICE_IEEE_SUBTYPE_PFC_CFG);
+	tlv->ouisubtype = htonl(ouisubtype);
+
+	/* Octet 1 left as all zeros - PFC disabled */
+	buf[0] = 0x08;
+	len = (typelen & ICE_LLDP_TLV_LEN_M) >> ICE_LLDP_TLV_LEN_S;
+	offset += len + 2;
+
+	if (ice_aq_set_lldp_mib(hw, mib_type, (void *)lldpmib, offset, NULL))
+		dev_dbg(dev, "%s Failed to set default LLDP MIB\n", __func__);
+
+	kfree(lldpmib);
+}
+
+/**
+ * ice_check_phy_fw_load - check if PHY FW load failed
+ * @pf: pointer to PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * check if external PHY FW load failed and print an error message if it did
+ */
+static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err)
+{
+	if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
+		clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
+		return;
+	}
+
+	if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags))
+		return;
+
+	if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
+		dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
+		set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags);
+	}
+}
+
+/**
+ * ice_check_module_power
+ * @pf: pointer to PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * check module power level returned by a previous call to aq_get_link_info
+ * and print error messages if module power level is not supported
+ */
+static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
+{
+	/* if module power level is supported, clear the flag */
+	if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT |
+			      ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
+		clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+		return;
+	}
+
+	/* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
+	 * above block didn't clear this bit, there's nothing to do
+	 */
+	if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
+		return;
+
+	if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
+		dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
+		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+	} else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
+		dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
+		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
+	}
+}
+
+/**
+ * ice_check_link_cfg_err - check if link configuration failed
+ * @pf: pointer to the PF struct
+ * @link_cfg_err: bitmap from the link info structure
+ *
+ * print if any link configuration failure happens due to the value in the
+ * link_cfg_err parameter in the link info structure
+ */
+static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err)
+{
+	ice_check_module_power(pf, link_cfg_err);
+	ice_check_phy_fw_load(pf, link_cfg_err);
+}
+
+/**
+ * ice_link_event - process the link event
+ * @pf: PF that the link event is associated with
+ * @pi: port_info for the port that the link event is associated with
+ * @link_up: true if the physical link is up and false if it is down
+ * @link_speed: current link speed received from the link event
+ *
+ * Returns 0 on success and negative on failure
+ */
+static int
+ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
+	       u16 link_speed)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_phy_info *phy_info;
+	struct ice_vsi *vsi;
+	u16 old_link_speed;
+	bool old_link;
+	int status;
+
+	phy_info = &pi->phy;
+	phy_info->link_info_old = phy_info->link_info;
+
+	old_link = !!(phy_info->link_info_old.link_info & ICE_AQ_LINK_UP);
+	old_link_speed = phy_info->link_info_old.link_speed;
+
+	/* update the link info structures and re-enable link events,
+	 * don't bail on failure due to other book keeping needed
+	 */
+	status = ice_update_link_info(pi);
+	if (status)
+		dev_dbg(dev, "Failed to update link status on port %d, err %d aq_err %s\n",
+			pi->lport, status,
+			ice_aq_str(pi->hw->adminq.sq_last_status));
+
+	ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
+
+	/* Check if the link state is up after updating link info, and treat
+	 * this event as an UP event since the link is actually UP now.
+	 */
+	if (phy_info->link_info.link_info & ICE_AQ_LINK_UP)
+		link_up = true;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi || !vsi->port_info)
+		return -EINVAL;
+
+	/* turn off PHY if media was removed */
+	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
+	    !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
+		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+		ice_set_link(vsi, false);
+	}
+
+	/* if the old link up/down and speed is the same as the new */
+	if (link_up == old_link && link_speed == old_link_speed)
+		return 0;
+
+	ice_ptp_link_change(pf, pf->hw.pf_id, link_up);
+
+	if (ice_is_dcb_active(pf)) {
+		if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+			ice_dcb_rebuild(pf);
+	} else {
+		if (link_up)
+			ice_set_dflt_mib(pf);
+	}
+	ice_vsi_link_event(vsi, link_up);
+	ice_print_link_msg(vsi, link_up);
+
+	ice_vc_notify_link_state(pf);
+
+	return 0;
+}
+
+/**
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
+ */
+static void ice_watchdog_subtask(struct ice_pf *pf)
+{
+	int i;
+
+	/* if interface is down do nothing */
+	if (test_bit(ICE_DOWN, pf->state) ||
+	    test_bit(ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* make sure we don't do these things too often */
+	if (time_before(jiffies,
+			pf->serv_tmr_prev + pf->serv_tmr_period))
+		return;
+
+	pf->serv_tmr_prev = jiffies;
+
+	/* Update the stats for active netdevs so the network stack
+	 * can look at updated numbers whenever it cares to
+	 */
+	ice_update_pf_stats(pf);
+	ice_for_each_vsi(pf, i)
+		if (pf->vsi[i] && pf->vsi[i]->netdev)
+			ice_update_vsi_stats(pf->vsi[i]);
+}
+
+/**
+ * ice_init_link_events - enable/initialize link events
+ * @pi: pointer to the port_info instance
+ *
+ * Returns -EIO on failure, 0 on success
+ */
+static int ice_init_link_events(struct ice_port_info *pi)
+{
+	u16 mask;
+
+	mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
+		       ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL |
+		       ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL));
+
+	if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
+		dev_dbg(ice_hw_to_dev(pi->hw), "Failed to enable link events for port %d\n",
+			pi->lport);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_handle_link_event - handle link event via ARQ
+ * @pf: PF that the link event is associated with
+ * @event: event structure containing link status info
+ */
+static int
+ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+	struct ice_aqc_get_link_status_data *link_data;
+	struct ice_port_info *port_info;
+	int status;
+
+	link_data = (struct ice_aqc_get_link_status_data *)event->msg_buf;
+	port_info = pf->hw.port_info;
+	if (!port_info)
+		return -EINVAL;
+
+	status = ice_link_event(pf, port_info,
+				!!(link_data->link_info & ICE_AQ_LINK_UP),
+				le16_to_cpu(link_data->link_speed));
+	if (status)
+		dev_dbg(ice_pf_to_dev(pf), "Could not process link event, error %d\n",
+			status);
+
+	return status;
+}
+
+/**
+ * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
+ * @pf: pointer to the PF private structure
+ * @task: intermediate helper storage and identifier for waiting
+ * @opcode: the opcode to wait for
+ *
+ * Prepares to wait for a specific AdminQ completion event on the ARQ for
+ * a given PF. Actual wait would be done by a call to ice_aq_wait_for_event().
+ *
+ * Calls are separated to allow caller registering for event before sending
+ * the command, which mitigates a race between registering and FW responding.
+ *
+ * To obtain only the descriptor contents, pass an task->event with null
+ * msg_buf. If the complete data buffer is desired, allocate the
+ * task->event.msg_buf with enough space ahead of time.
+ */
+void ice_aq_prep_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+			   u16 opcode)
+{
+	INIT_HLIST_NODE(&task->entry);
+	task->opcode = opcode;
+	task->state = ICE_AQ_TASK_WAITING;
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_add_head(&task->entry, &pf->aq_wait_list);
+	spin_unlock_bh(&pf->aq_wait_lock);
+}
+
+/**
+ * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
+ * @pf: pointer to the PF private structure
+ * @task: ptr prepared by ice_aq_prep_for_event()
+ * @timeout: how long to wait, in jiffies
+ *
+ * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
+ * current thread will be put to sleep until the specified event occurs or
+ * until the given timeout is reached.
+ *
+ * Returns: zero on success, or a negative error code on failure.
+ */
+int ice_aq_wait_for_event(struct ice_pf *pf, struct ice_aq_task *task,
+			  unsigned long timeout)
+{
+	enum ice_aq_task_state *state = &task->state;
+	struct device *dev = ice_pf_to_dev(pf);
+	unsigned long start = jiffies;
+	long ret;
+	int err;
+
+	ret = wait_event_interruptible_timeout(pf->aq_wait_queue,
+					       *state != ICE_AQ_TASK_WAITING,
+					       timeout);
+	switch (*state) {
+	case ICE_AQ_TASK_NOT_PREPARED:
+		WARN(1, "call to %s without ice_aq_prep_for_event()", __func__);
+		err = -EINVAL;
+		break;
+	case ICE_AQ_TASK_WAITING:
+		err = ret < 0 ? ret : -ETIMEDOUT;
+		break;
+	case ICE_AQ_TASK_CANCELED:
+		err = ret < 0 ? ret : -ECANCELED;
+		break;
+	case ICE_AQ_TASK_COMPLETE:
+		err = ret < 0 ? ret : 0;
+		break;
+	default:
+		WARN(1, "Unexpected AdminQ wait task state %u", *state);
+		err = -EINVAL;
+		break;
+	}
+
+	dev_dbg(dev, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
+		jiffies_to_msecs(jiffies - start),
+		jiffies_to_msecs(timeout),
+		task->opcode);
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_del(&task->entry);
+	spin_unlock_bh(&pf->aq_wait_lock);
+
+	return err;
+}
+
+/**
+ * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
+ * @pf: pointer to the PF private structure
+ * @opcode: the opcode of the event
+ * @event: the event to check
+ *
+ * Loops over the current list of pending threads waiting for an AdminQ event.
+ * For each matching task, copy the contents of the event into the task
+ * structure and wake up the thread.
+ *
+ * If multiple threads wait for the same opcode, they will all be woken up.
+ *
+ * Note that event->msg_buf will only be duplicated if the event has a buffer
+ * with enough space already allocated. Otherwise, only the descriptor and
+ * message length will be copied.
+ *
+ * Returns: true if an event was found, false otherwise
+ */
+static void ice_aq_check_events(struct ice_pf *pf, u16 opcode,
+				struct ice_rq_event_info *event)
+{
+	struct ice_rq_event_info *task_ev;
+	struct ice_aq_task *task;
+	bool found = false;
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_for_each_entry(task, &pf->aq_wait_list, entry) {
+		if (task->state != ICE_AQ_TASK_WAITING)
+			continue;
+		if (task->opcode != opcode)
+			continue;
+
+		task_ev = &task->event;
+		memcpy(&task_ev->desc, &event->desc, sizeof(event->desc));
+		task_ev->msg_len = event->msg_len;
+
+		/* Only copy the data buffer if a destination was set */
+		if (task_ev->msg_buf && task_ev->buf_len >= event->buf_len) {
+			memcpy(task_ev->msg_buf, event->msg_buf,
+			       event->buf_len);
+			task_ev->buf_len = event->buf_len;
+		}
+
+		task->state = ICE_AQ_TASK_COMPLETE;
+		found = true;
+	}
+	spin_unlock_bh(&pf->aq_wait_lock);
+
+	if (found)
+		wake_up(&pf->aq_wait_queue);
+}
+
+/**
+ * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
+ * @pf: the PF private structure
+ *
+ * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
+ * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
+ */
+static void ice_aq_cancel_waiting_tasks(struct ice_pf *pf)
+{
+	struct ice_aq_task *task;
+
+	spin_lock_bh(&pf->aq_wait_lock);
+	hlist_for_each_entry(task, &pf->aq_wait_list, entry)
+		task->state = ICE_AQ_TASK_CANCELED;
+	spin_unlock_bh(&pf->aq_wait_lock);
+
+	wake_up(&pf->aq_wait_queue);
+}
+
+#define ICE_MBX_OVERFLOW_WATERMARK 64
+
+/**
+ * __ice_clean_ctrlq - helper function to clean controlq rings
+ * @pf: ptr to struct ice_pf
+ * @q_type: specific Control queue type
+ */
+static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_rq_event_info event;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_ctl_q_info *cq;
+	u16 pending, i = 0;
+	const char *qtype;
+	u32 oldval, val;
+
+	/* Do not clean control queue if/when PF reset fails */
+	if (test_bit(ICE_RESET_FAILED, pf->state))
+		return 0;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		qtype = "Admin";
+		break;
+	case ICE_CTL_Q_SB:
+		cq = &hw->sbq;
+		qtype = "Sideband";
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		qtype = "Mailbox";
+		/* we are going to try to detect a malicious VF, so set the
+		 * state to begin detection
+		 */
+		hw->mbx_snapshot.mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+		break;
+	default:
+		dev_warn(dev, "Unknown control queue type 0x%x\n", q_type);
+		return 0;
+	}
+
+	/* check for error indications - PF_xx_AxQLEN register layout for
+	 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
+	 */
+	val = rd32(hw, cq->rq.len);
+	if (val & (PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+		   PF_FW_ARQLEN_ARQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ARQLEN_ARQVFE_M)
+			dev_dbg(dev, "%s Receive Queue VF Error detected\n",
+				qtype);
+		if (val & PF_FW_ARQLEN_ARQOVFL_M) {
+			dev_dbg(dev, "%s Receive Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ARQLEN_ARQCRIT_M)
+			dev_dbg(dev, "%s Receive Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ARQLEN_ARQVFE_M | PF_FW_ARQLEN_ARQOVFL_M |
+			 PF_FW_ARQLEN_ARQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->rq.len, val);
+	}
+
+	val = rd32(hw, cq->sq.len);
+	if (val & (PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+		   PF_FW_ATQLEN_ATQCRIT_M)) {
+		oldval = val;
+		if (val & PF_FW_ATQLEN_ATQVFE_M)
+			dev_dbg(dev, "%s Send Queue VF Error detected\n",
+				qtype);
+		if (val & PF_FW_ATQLEN_ATQOVFL_M) {
+			dev_dbg(dev, "%s Send Queue Overflow Error detected\n",
+				qtype);
+		}
+		if (val & PF_FW_ATQLEN_ATQCRIT_M)
+			dev_dbg(dev, "%s Send Queue Critical Error detected\n",
+				qtype);
+		val &= ~(PF_FW_ATQLEN_ATQVFE_M | PF_FW_ATQLEN_ATQOVFL_M |
+			 PF_FW_ATQLEN_ATQCRIT_M);
+		if (oldval != val)
+			wr32(hw, cq->sq.len, val);
+	}
+
+	event.buf_len = cq->rq_buf_size;
+	event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
+	if (!event.msg_buf)
+		return 0;
+
+	do {
+		struct ice_mbx_data data = {};
+		u16 opcode;
+		int ret;
+
+		ret = ice_clean_rq_elem(hw, cq, &event, &pending);
+		if (ret == -EALREADY)
+			break;
+		if (ret) {
+			dev_err(dev, "%s Receive Queue event error %d\n", qtype,
+				ret);
+			break;
+		}
+
+		opcode = le16_to_cpu(event.desc.opcode);
+
+		/* Notify any thread that might be waiting for this event */
+		ice_aq_check_events(pf, opcode, &event);
+
+		switch (opcode) {
+		case ice_aqc_opc_get_link_status:
+			if (ice_handle_link_event(pf, &event))
+				dev_err(dev, "Could not handle link event\n");
+			break;
+		case ice_aqc_opc_event_lan_overflow:
+			ice_vf_lan_overflow_event(pf, &event);
+			break;
+		case ice_mbx_opc_send_msg_to_pf:
+			data.num_msg_proc = i;
+			data.num_pending_arq = pending;
+			data.max_num_msgs_mbx = hw->mailboxq.num_rq_entries;
+			data.async_watermark_val = ICE_MBX_OVERFLOW_WATERMARK;
+
+			ice_vc_process_vf_msg(pf, &event, &data);
+			break;
+		case ice_aqc_opc_fw_logging:
+			ice_output_fw_log(hw, &event.desc, event.msg_buf);
+			break;
+		case ice_aqc_opc_lldp_set_mib_change:
+			ice_dcb_process_lldp_set_mib_change(pf, &event);
+			break;
+		default:
+			dev_dbg(dev, "%s Receive Queue unknown event 0x%04x ignored\n",
+				qtype, opcode);
+			break;
+		}
+	} while (pending && (i++ < ICE_DFLT_IRQ_WORK));
+
+	kfree(event.msg_buf);
+
+	return pending && (i == ICE_DFLT_IRQ_WORK);
+}
+
+/**
+ * ice_ctrlq_pending - check if there is a difference between ntc and ntu
+ * @hw: pointer to hardware info
+ * @cq: control queue information
+ *
+ * returns true if there are pending messages in a queue, false if there aren't
+ */
+static bool ice_ctrlq_pending(struct ice_hw *hw, struct ice_ctl_q_info *cq)
+{
+	u16 ntu;
+
+	ntu = (u16)(rd32(hw, cq->rq.head) & cq->rq.head_mask);
+	return cq->rq.next_to_clean != ntu;
+}
+
+/**
+ * ice_clean_adminq_subtask - clean the AdminQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_adminq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	if (!test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN))
+		return;
+
+	clear_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
+
+	/* There might be a situation where new messages arrive to a control
+	 * queue between processing the last message and clearing the
+	 * EVENT_PENDING bit. So before exiting, check queue head again (using
+	 * ice_ctrlq_pending) and process new messages if any.
+	 */
+	if (ice_ctrlq_pending(hw, &hw->adminq))
+		__ice_clean_ctrlq(pf, ICE_CTL_Q_ADMIN);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_clean_mailboxq_subtask - clean the MailboxQ rings
+ * @pf: board private structure
+ */
+static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX))
+		return;
+
+	clear_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+
+	if (ice_ctrlq_pending(hw, &hw->mailboxq))
+		__ice_clean_ctrlq(pf, ICE_CTL_Q_MAILBOX);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_clean_sbq_subtask - clean the Sideband Queue rings
+ * @pf: board private structure
+ */
+static void ice_clean_sbq_subtask(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	/* Nothing to do here if sideband queue is not supported */
+	if (!ice_is_sbq_supported(hw)) {
+		clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+		return;
+	}
+
+	if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
+		return;
+
+	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB))
+		return;
+
+	clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+
+	if (ice_ctrlq_pending(hw, &hw->sbq))
+		__ice_clean_ctrlq(pf, ICE_CTL_Q_SB);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_service_task_schedule - schedule the service task to wake up
+ * @pf: board private structure
+ *
+ * If not already scheduled, this puts the task into the work queue.
+ */
+void ice_service_task_schedule(struct ice_pf *pf)
+{
+	if (!test_bit(ICE_SERVICE_DIS, pf->state) &&
+	    !test_and_set_bit(ICE_SERVICE_SCHED, pf->state) &&
+	    !test_bit(ICE_NEEDS_RESTART, pf->state))
+		queue_work(ice_wq, &pf->serv_task);
+}
+
+/**
+ * ice_service_task_complete - finish up the service task
+ * @pf: board private structure
+ */
+static void ice_service_task_complete(struct ice_pf *pf)
+{
+	WARN_ON(!test_bit(ICE_SERVICE_SCHED, pf->state));
+
+	/* force memory (pf->state) to sync before next service task */
+	smp_mb__before_atomic();
+	clear_bit(ICE_SERVICE_SCHED, pf->state);
+}
+
+/**
+ * ice_service_task_stop - stop service task and cancel works
+ * @pf: board private structure
+ *
+ * Return 0 if the ICE_SERVICE_DIS bit was not already set,
+ * 1 otherwise.
+ */
+static int ice_service_task_stop(struct ice_pf *pf)
+{
+	int ret;
+
+	ret = test_and_set_bit(ICE_SERVICE_DIS, pf->state);
+
+	if (pf->serv_tmr.function)
+		del_timer_sync(&pf->serv_tmr);
+	if (pf->serv_task.func)
+		cancel_work_sync(&pf->serv_task);
+
+	clear_bit(ICE_SERVICE_SCHED, pf->state);
+	return ret;
+}
+
+/**
+ * ice_service_task_restart - restart service task and schedule works
+ * @pf: board private structure
+ *
+ * This function is needed for suspend and resume works (e.g WoL scenario)
+ */
+static void ice_service_task_restart(struct ice_pf *pf)
+{
+	clear_bit(ICE_SERVICE_DIS, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_service_timer - timer callback to schedule service task
+ * @t: pointer to timer_list
+ */
+static void ice_service_timer(struct timer_list *t)
+{
+	struct ice_pf *pf = from_timer(pf, t, serv_tmr);
+
+	mod_timer(&pf->serv_tmr, round_jiffies(pf->serv_tmr_period + jiffies));
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_handle_mdd_event - handle malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from service task. OICR interrupt handler indicates MDD event.
+ * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
+ * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
+ * disable the queue, the PF can be configured to reset the VF using ethtool
+ * private flag mdd-auto-reset-vf.
+ */
+static void ice_handle_mdd_event(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	unsigned int bkt;
+	u32 reg;
+
+	if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING, pf->state)) {
+		/* Since the VF MDD event logging is rate limited, check if
+		 * there are pending MDD events.
+		 */
+		ice_print_vfs_mdd_events(pf);
+		return;
+	}
+
+	/* find what triggered an MDD event */
+	reg = rd32(hw, GL_MDET_TX_PQM);
+	if (reg & GL_MDET_TX_PQM_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >>
+				GL_MDET_TX_PQM_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_TX_PQM_VF_NUM_M) >>
+				GL_MDET_TX_PQM_VF_NUM_S;
+		u8 event = (reg & GL_MDET_TX_PQM_MAL_TYPE_M) >>
+				GL_MDET_TX_PQM_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_TX_PQM_QNUM_M) >>
+				GL_MDET_TX_PQM_QNUM_S);
+
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
+	}
+
+	reg = rd32(hw, GL_MDET_TX_TCLAN);
+	if (reg & GL_MDET_TX_TCLAN_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_TX_TCLAN_PF_NUM_M) >>
+				GL_MDET_TX_TCLAN_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_TX_TCLAN_VF_NUM_M) >>
+				GL_MDET_TX_TCLAN_VF_NUM_S;
+		u8 event = (reg & GL_MDET_TX_TCLAN_MAL_TYPE_M) >>
+				GL_MDET_TX_TCLAN_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_TX_TCLAN_QNUM_M) >>
+				GL_MDET_TX_TCLAN_QNUM_S);
+
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff);
+	}
+
+	reg = rd32(hw, GL_MDET_RX);
+	if (reg & GL_MDET_RX_VALID_M) {
+		u8 pf_num = (reg & GL_MDET_RX_PF_NUM_M) >>
+				GL_MDET_RX_PF_NUM_S;
+		u16 vf_num = (reg & GL_MDET_RX_VF_NUM_M) >>
+				GL_MDET_RX_VF_NUM_S;
+		u8 event = (reg & GL_MDET_RX_MAL_TYPE_M) >>
+				GL_MDET_RX_MAL_TYPE_S;
+		u16 queue = ((reg & GL_MDET_RX_QNUM_M) >>
+				GL_MDET_RX_QNUM_S);
+
+		if (netif_msg_rx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
+				 event, queue, pf_num, vf_num);
+		wr32(hw, GL_MDET_RX, 0xffffffff);
+	}
+
+	/* check to see if this PF caused an MDD event */
+	reg = rd32(hw, PF_MDET_TX_PQM);
+	if (reg & PF_MDET_TX_PQM_VALID_M) {
+		wr32(hw, PF_MDET_TX_PQM, 0xFFFF);
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n");
+	}
+
+	reg = rd32(hw, PF_MDET_TX_TCLAN);
+	if (reg & PF_MDET_TX_TCLAN_VALID_M) {
+		wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF);
+		if (netif_msg_tx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
+	}
+
+	reg = rd32(hw, PF_MDET_RX);
+	if (reg & PF_MDET_RX_VALID_M) {
+		wr32(hw, PF_MDET_RX, 0xFFFF);
+		if (netif_msg_rx_err(pf))
+			dev_info(dev, "Malicious Driver Detection event RX detected on PF\n");
+	}
+
+	/* Check to see if one of the VFs caused an MDD event, and then
+	 * increment counters and set print pending
+	 */
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf) {
+		reg = rd32(hw, VP_MDET_TX_PQM(vf->vf_id));
+		if (reg & VP_MDET_TX_PQM_VALID_M) {
+			wr32(hw, VP_MDET_TX_PQM(vf->vf_id), 0xFFFF);
+			vf->mdd_tx_events.count++;
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_tx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
+					 vf->vf_id);
+		}
+
+		reg = rd32(hw, VP_MDET_TX_TCLAN(vf->vf_id));
+		if (reg & VP_MDET_TX_TCLAN_VALID_M) {
+			wr32(hw, VP_MDET_TX_TCLAN(vf->vf_id), 0xFFFF);
+			vf->mdd_tx_events.count++;
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_tx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
+					 vf->vf_id);
+		}
+
+		reg = rd32(hw, VP_MDET_TX_TDPU(vf->vf_id));
+		if (reg & VP_MDET_TX_TDPU_VALID_M) {
+			wr32(hw, VP_MDET_TX_TDPU(vf->vf_id), 0xFFFF);
+			vf->mdd_tx_events.count++;
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_tx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
+					 vf->vf_id);
+		}
+
+		reg = rd32(hw, VP_MDET_RX(vf->vf_id));
+		if (reg & VP_MDET_RX_VALID_M) {
+			wr32(hw, VP_MDET_RX(vf->vf_id), 0xFFFF);
+			vf->mdd_rx_events.count++;
+			set_bit(ICE_MDD_VF_PRINT_PENDING, pf->state);
+			if (netif_msg_rx_err(pf))
+				dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n",
+					 vf->vf_id);
+
+			/* Since the queue is disabled on VF Rx MDD events, the
+			 * PF can be configured to reset the VF through ethtool
+			 * private flag mdd-auto-reset-vf.
+			 */
+			if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+				/* VF MDD event counters will be cleared by
+				 * reset, so print the event prior to reset.
+				 */
+				ice_print_vf_rx_mdd_event(vf);
+				ice_reset_vf(vf, ICE_VF_RESET_LOCK);
+			}
+		}
+	}
+	mutex_unlock(&pf->vfs.table_lock);
+
+	ice_print_vfs_mdd_events(pf);
+}
+
+/**
+ * ice_force_phys_link_state - Force the physical link state
+ * @vsi: VSI to force the physical link state to up/down
+ * @link_up: true/false indicates to set the physical link to up/down
+ *
+ * Force the physical link state by getting the current PHY capabilities from
+ * hardware and setting the PHY config based on the determined capabilities. If
+ * link changes a link event will be triggered because both the Enable Automatic
+ * Link Update and LESM Enable bits are set when setting the PHY capabilities.
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_aqc_set_phy_cfg_data *cfg;
+	struct ice_port_info *pi;
+	struct device *dev;
+	int retcode;
+
+	if (!vsi || !vsi->port_info || !vsi->back)
+		return -EINVAL;
+	if (vsi->type != ICE_VSI_PF)
+		return 0;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	pi = vsi->port_info;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				      NULL);
+	if (retcode) {
+		dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
+			vsi->vsi_num, retcode);
+		retcode = -EIO;
+		goto out;
+	}
+
+	/* No change in link */
+	if (link_up == !!(pcaps->caps & ICE_AQC_PHY_EN_LINK) &&
+	    link_up == !!(pi->phy.link_info.link_info & ICE_AQ_LINK_UP))
+		goto out;
+
+	/* Use the current user PHY configuration. The current user PHY
+	 * configuration is initialized during probe from PHY capabilities
+	 * software mode, and updated on set PHY configuration.
+	 */
+	cfg = kmemdup(&pi->phy.curr_user_phy_cfg, sizeof(*cfg), GFP_KERNEL);
+	if (!cfg) {
+		retcode = -ENOMEM;
+		goto out;
+	}
+
+	cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+	if (link_up)
+		cfg->caps |= ICE_AQ_PHY_ENA_LINK;
+	else
+		cfg->caps &= ~ICE_AQ_PHY_ENA_LINK;
+
+	retcode = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
+	if (retcode) {
+		dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
+			vsi->vsi_num, retcode);
+		retcode = -EIO;
+	}
+
+	kfree(cfg);
+out:
+	kfree(pcaps);
+	return retcode;
+}
+
+/**
+ * ice_init_nvm_phy_type - Initialize the NVM PHY type
+ * @pi: port info structure
+ *
+ * Initialize nvm_phy_type_[low|high] for link lenient mode support
+ */
+static int ice_init_nvm_phy_type(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_pf *pf = pi->hw->back;
+	int err;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA,
+				  pcaps, NULL);
+
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+		goto out;
+	}
+
+	pf->nvm_phy_type_hi = pcaps->phy_type_high;
+	pf->nvm_phy_type_lo = pcaps->phy_type_low;
+
+out:
+	kfree(pcaps);
+	return err;
+}
+
+/**
+ * ice_init_link_dflt_override - Initialize link default override
+ * @pi: port info structure
+ *
+ * Initialize link default override and PHY total port shutdown during probe
+ */
+static void ice_init_link_dflt_override(struct ice_port_info *pi)
+{
+	struct ice_link_default_override_tlv *ldo;
+	struct ice_pf *pf = pi->hw->back;
+
+	ldo = &pf->link_dflt_override;
+	if (ice_get_link_default_override(ldo, pi))
+		return;
+
+	if (!(ldo->options & ICE_LINK_OVERRIDE_PORT_DIS))
+		return;
+
+	/* Enable Total Port Shutdown (override/replace link-down-on-close
+	 * ethtool private flag) for ports with Port Disable bit set.
+	 */
+	set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
+	set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
+}
+
+/**
+ * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
+ * @pi: port info structure
+ *
+ * If default override is enabled, initialize the user PHY cfg speed and FEC
+ * settings using the default override mask from the NVM.
+ *
+ * The PHY should only be configured with the default override settings the
+ * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
+ * is used to indicate that the user PHY cfg default override is initialized
+ * and the PHY has not been configured with the default override settings. The
+ * state is set here, and cleared in ice_configure_phy the first time the PHY is
+ * configured.
+ *
+ * This function should be called only if the FW doesn't support default
+ * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
+ */
+static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
+{
+	struct ice_link_default_override_tlv *ldo;
+	struct ice_aqc_set_phy_cfg_data *cfg;
+	struct ice_phy_info *phy = &pi->phy;
+	struct ice_pf *pf = pi->hw->back;
+
+	ldo = &pf->link_dflt_override;
+
+	/* If link default override is enabled, use to mask NVM PHY capabilities
+	 * for speed and FEC default configuration.
+	 */
+	cfg = &phy->curr_user_phy_cfg;
+
+	if (ldo->phy_type_low || ldo->phy_type_high) {
+		cfg->phy_type_low = pf->nvm_phy_type_lo &
+				    cpu_to_le64(ldo->phy_type_low);
+		cfg->phy_type_high = pf->nvm_phy_type_hi &
+				     cpu_to_le64(ldo->phy_type_high);
+	}
+	cfg->link_fec_opt = ldo->fec_options;
+	phy->curr_user_fec_req = ICE_FEC_AUTO;
+
+	set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING, pf->state);
+}
+
+/**
+ * ice_init_phy_user_cfg - Initialize the PHY user configuration
+ * @pi: port info structure
+ *
+ * Initialize the current user PHY configuration, speed, FEC, and FC requested
+ * mode to default. The PHY defaults are from get PHY capabilities topology
+ * with media so call when media is first available. An error is returned if
+ * called when media is not available. The PHY initialization completed state is
+ * set here.
+ *
+ * These configurations are used when setting PHY
+ * configuration. The user PHY configuration is updated on set PHY
+ * configuration. Returns 0 on success, negative on failure
+ */
+static int ice_init_phy_user_cfg(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_phy_info *phy = &pi->phy;
+	struct ice_pf *pf = pi->hw->back;
+	int err;
+
+	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+		return -EIO;
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	if (ice_fw_supports_report_dflt_cfg(pi->hw))
+		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+					  pcaps, NULL);
+	else
+		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					  pcaps, NULL);
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
+		goto err_out;
+	}
+
+	ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
+
+	/* check if lenient mode is supported and enabled */
+	if (ice_fw_supports_link_override(pi->hw) &&
+	    !(pcaps->module_compliance_enforcement &
+	      ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
+		set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
+
+		/* if the FW supports default PHY configuration mode, then the driver
+		 * does not have to apply link override settings. If not,
+		 * initialize user PHY configuration with link override values
+		 */
+		if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
+		    (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
+			ice_init_phy_cfg_dflt_override(pi);
+			goto out;
+		}
+	}
+
+	/* if link default override is not enabled, set user flow control and
+	 * FEC settings based on what get_phy_caps returned
+	 */
+	phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
+						      pcaps->link_fec_options);
+	phy->curr_user_fc_req = ice_caps_to_fc_mode(pcaps->caps);
+
+out:
+	phy->curr_user_speed_req = ICE_AQ_LINK_SPEED_M;
+	set_bit(ICE_PHY_INIT_COMPLETE, pf->state);
+err_out:
+	kfree(pcaps);
+	return err;
+}
+
+/**
+ * ice_configure_phy - configure PHY
+ * @vsi: VSI of PHY
+ *
+ * Set the PHY configuration. If the current PHY configuration is the same as
+ * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
+ * configure the based get PHY capabilities for topology with media.
+ */
+static int ice_configure_phy(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_port_info *pi = vsi->port_info;
+	struct ice_aqc_get_phy_caps_data *pcaps;
+	struct ice_aqc_set_phy_cfg_data *cfg;
+	struct ice_phy_info *phy = &pi->phy;
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	/* Ensure we have media as we cannot configure a medialess port */
+	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
+		return -ENOMEDIUM;
+
+	ice_print_topo_conflict(vsi);
+
+	if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) &&
+	    phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
+		return -EPERM;
+
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
+		return ice_force_phys_link_state(vsi, true);
+
+	pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
+	if (!pcaps)
+		return -ENOMEM;
+
+	/* Get current PHY config */
+	err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
+				  NULL);
+	if (err) {
+		dev_err(dev, "Failed to get PHY configuration, VSI %d error %d\n",
+			vsi->vsi_num, err);
+		goto done;
+	}
+
+	/* If PHY enable link is configured and configuration has not changed,
+	 * there's nothing to do
+	 */
+	if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
+	    ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
+		goto done;
+
+	/* Use PHY topology as baseline for configuration */
+	memset(pcaps, 0, sizeof(*pcaps));
+	if (ice_fw_supports_report_dflt_cfg(pi->hw))
+		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
+					  pcaps, NULL);
+	else
+		err = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
+					  pcaps, NULL);
+	if (err) {
+		dev_err(dev, "Failed to get PHY caps, VSI %d error %d\n",
+			vsi->vsi_num, err);
+		goto done;
+	}
+
+	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+	if (!cfg) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	ice_copy_phy_caps_to_cfg(pi, pcaps, cfg);
+
+	/* Speed - If default override pending, use curr_user_phy_cfg set in
+	 * ice_init_phy_user_cfg_ldo.
+	 */
+	if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING,
+			       vsi->back->state)) {
+		cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
+		cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
+	} else {
+		u64 phy_low = 0, phy_high = 0;
+
+		ice_update_phy_type(&phy_low, &phy_high,
+				    pi->phy.curr_user_speed_req);
+		cfg->phy_type_low = pcaps->phy_type_low & cpu_to_le64(phy_low);
+		cfg->phy_type_high = pcaps->phy_type_high &
+				     cpu_to_le64(phy_high);
+	}
+
+	/* Can't provide what was requested; use PHY capabilities */
+	if (!cfg->phy_type_low && !cfg->phy_type_high) {
+		cfg->phy_type_low = pcaps->phy_type_low;
+		cfg->phy_type_high = pcaps->phy_type_high;
+	}
+
+	/* FEC */
+	ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
+
+	/* Can't provide what was requested; use PHY capabilities */
+	if (cfg->link_fec_opt !=
+	    (cfg->link_fec_opt & pcaps->link_fec_options)) {
+		cfg->caps |= pcaps->caps & ICE_AQC_PHY_EN_AUTO_FEC;
+		cfg->link_fec_opt = pcaps->link_fec_options;
+	}
+
+	/* Flow Control - always supported; no need to check against
+	 * capabilities
+	 */
+	ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
+
+	/* Enable link and link update */
+	cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
+
+	err = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL);
+	if (err)
+		dev_err(dev, "Failed to set phy config, VSI %d error %d\n",
+			vsi->vsi_num, err);
+
+	kfree(cfg);
+done:
+	kfree(pcaps);
+	return err;
+}
+
+/**
+ * ice_check_media_subtask - Check for media
+ * @pf: pointer to PF struct
+ *
+ * If media is available, then initialize PHY user configuration if it is not
+ * been, and configure the PHY if the interface is up.
+ */
+static void ice_check_media_subtask(struct ice_pf *pf)
+{
+	struct ice_port_info *pi;
+	struct ice_vsi *vsi;
+	int err;
+
+	/* No need to check for media if it's already present */
+	if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags))
+		return;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Refresh link info and check if media is present */
+	pi = vsi->port_info;
+	err = ice_update_link_info(pi);
+	if (err)
+		return;
+
+	ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
+
+	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
+			ice_init_phy_user_cfg(pi);
+
+		/* PHY settings are reset on media insertion, reconfigure
+		 * PHY to preserve settings.
+		 */
+		if (test_bit(ICE_VSI_DOWN, vsi->state) &&
+		    test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
+			return;
+
+		err = ice_configure_phy(vsi);
+		if (!err)
+			clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+
+		/* A Link Status Event will be generated; the event handler
+		 * will complete bringing the interface up
+		 */
+	}
+}
+
+/**
+ * ice_service_task - manage and run subtasks
+ * @work: pointer to work_struct contained by the PF struct
+ */
+static void ice_service_task(struct work_struct *work)
+{
+	struct ice_pf *pf = container_of(work, struct ice_pf, serv_task);
+	unsigned long start_time = jiffies;
+
+	/* subtasks */
+
+	/* process reset requests first */
+	ice_reset_subtask(pf);
+
+	/* bail if a reset/recovery cycle is pending or rebuild failed */
+	if (ice_is_reset_in_progress(pf->state) ||
+	    test_bit(ICE_SUSPENDED, pf->state) ||
+	    test_bit(ICE_NEEDS_RESTART, pf->state)) {
+		ice_service_task_complete(pf);
+		return;
+	}
+
+	if (test_and_clear_bit(ICE_AUX_ERR_PENDING, pf->state)) {
+		struct iidc_event *event;
+
+		event = kzalloc(sizeof(*event), GFP_KERNEL);
+		if (event) {
+			set_bit(IIDC_EVENT_CRIT_ERR, event->type);
+			/* report the entire OICR value to AUX driver */
+			swap(event->reg, pf->oicr_err_reg);
+			ice_send_event_to_aux(pf, event);
+			kfree(event);
+		}
+	}
+
+	/* unplug aux dev per request, if an unplug request came in
+	 * while processing a plug request, this will handle it
+	 */
+	if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags))
+		ice_unplug_aux_dev(pf);
+
+	/* Plug aux device per request */
+	if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+		ice_plug_aux_dev(pf);
+
+	if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
+		struct iidc_event *event;
+
+		event = kzalloc(sizeof(*event), GFP_KERNEL);
+		if (event) {
+			set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
+			ice_send_event_to_aux(pf, event);
+			kfree(event);
+		}
+	}
+
+	ice_clean_adminq_subtask(pf);
+	ice_check_media_subtask(pf);
+	ice_check_for_hang_subtask(pf);
+	ice_sync_fltr_subtask(pf);
+	ice_handle_mdd_event(pf);
+	ice_watchdog_subtask(pf);
+
+	if (ice_is_safe_mode(pf)) {
+		ice_service_task_complete(pf);
+		return;
+	}
+
+	ice_process_vflr_event(pf);
+	ice_clean_mailboxq_subtask(pf);
+	ice_clean_sbq_subtask(pf);
+	ice_sync_arfs_fltrs(pf);
+	ice_flush_fdir_ctx(pf);
+
+	/* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
+	ice_service_task_complete(pf);
+
+	/* If the tasks have taken longer than one service timer period
+	 * or there is more work to be done, reset the service timer to
+	 * schedule the service task now.
+	 */
+	if (time_after(jiffies, (start_time + pf->serv_tmr_period)) ||
+	    test_bit(ICE_MDD_EVENT_PENDING, pf->state) ||
+	    test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
+	    test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
+	    test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
+	    test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) ||
+	    test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
+		mod_timer(&pf->serv_tmr, jiffies);
+}
+
+/**
+ * ice_set_ctrlq_len - helper function to set controlq length
+ * @hw: pointer to the HW instance
+ */
+static void ice_set_ctrlq_len(struct ice_hw *hw)
+{
+	hw->adminq.num_rq_entries = ICE_AQ_LEN;
+	hw->adminq.num_sq_entries = ICE_AQ_LEN;
+	hw->adminq.rq_buf_size = ICE_AQ_MAX_BUF_LEN;
+	hw->adminq.sq_buf_size = ICE_AQ_MAX_BUF_LEN;
+	hw->mailboxq.num_rq_entries = PF_MBX_ARQLEN_ARQLEN_M;
+	hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
+	hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+	hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
+	hw->sbq.num_rq_entries = ICE_SBQ_LEN;
+	hw->sbq.num_sq_entries = ICE_SBQ_LEN;
+	hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
+	hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
+}
+
+/**
+ * ice_schedule_reset - schedule a reset
+ * @pf: board private structure
+ * @reset: reset being requested
+ */
+int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	/* bail out if earlier reset has failed */
+	if (test_bit(ICE_RESET_FAILED, pf->state)) {
+		dev_dbg(dev, "earlier reset has failed\n");
+		return -EIO;
+	}
+	/* bail if reset/recovery already in progress */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_dbg(dev, "Reset already in progress\n");
+		return -EBUSY;
+	}
+
+	switch (reset) {
+	case ICE_RESET_PFR:
+		set_bit(ICE_PFR_REQ, pf->state);
+		break;
+	case ICE_RESET_CORER:
+		set_bit(ICE_CORER_REQ, pf->state);
+		break;
+	case ICE_RESET_GLOBR:
+		set_bit(ICE_GLOBR_REQ, pf->state);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ice_service_task_schedule(pf);
+	return 0;
+}
+
+/**
+ * ice_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ */
+static void
+ice_irq_affinity_notify(struct irq_affinity_notify *notify,
+			const cpumask_t *mask)
+{
+	struct ice_q_vector *q_vector =
+		container_of(notify, struct ice_q_vector, affinity_notify);
+
+	cpumask_copy(&q_vector->affinity_mask, mask);
+}
+
+/**
+ * ice_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ */
+static void ice_irq_affinity_release(struct kref __always_unused *ref) {}
+
+/**
+ * ice_vsi_ena_irq - Enable IRQ for the given VSI
+ * @vsi: the VSI being configured
+ */
+static int ice_vsi_ena_irq(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	int i;
+
+	ice_for_each_q_vector(vsi, i)
+		ice_irq_dynamic_ena(hw, vsi, vsi->q_vectors[i]);
+
+	ice_flush(hw);
+	return 0;
+}
+
+/**
+ * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ */
+static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
+{
+	int q_vectors = vsi->num_q_vectors;
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int rx_int_idx = 0;
+	int tx_int_idx = 0;
+	int vector, err;
+	int irq_num;
+
+	dev = ice_pf_to_dev(pf);
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[vector];
+
+		irq_num = q_vector->irq.virq;
+
+		if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
+			tx_int_idx++;
+		} else if (q_vector->rx.rx_ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "rx", rx_int_idx++);
+		} else if (q_vector->tx.tx_ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-%s-%d", basename, "tx", tx_int_idx++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		if (vsi->type == ICE_VSI_CTRL && vsi->vf)
+			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+					       IRQF_SHARED, q_vector->name,
+					       q_vector);
+		else
+			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
+					       0, q_vector->name, q_vector);
+		if (err) {
+			netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
+				   err);
+			goto free_q_irqs;
+		}
+
+		/* register for affinity change notifications */
+		if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
+			struct irq_affinity_notify *affinity_notify;
+
+			affinity_notify = &q_vector->affinity_notify;
+			affinity_notify->notify = ice_irq_affinity_notify;
+			affinity_notify->release = ice_irq_affinity_release;
+			irq_set_affinity_notifier(irq_num, affinity_notify);
+		}
+
+		/* assign the mask for this irq */
+		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
+	}
+
+	err = ice_set_cpu_rx_rmap(vsi);
+	if (err) {
+		netdev_err(vsi->netdev, "Failed to setup CPU RMAP on VSI %u: %pe\n",
+			   vsi->vsi_num, ERR_PTR(err));
+		goto free_q_irqs;
+	}
+
+	vsi->irqs_ready = true;
+	return 0;
+
+free_q_irqs:
+	while (vector--) {
+		irq_num = vsi->q_vectors[vector]->irq.virq;
+		if (!IS_ENABLED(CONFIG_RFS_ACCEL))
+			irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
+	}
+	return err;
+}
+
+/**
+ * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
+ * @vsi: VSI to setup Tx rings used by XDP
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(vsi->back);
+	struct ice_tx_desc *tx_desc;
+	int i, j;
+
+	ice_for_each_xdp_txq(vsi, i) {
+		u16 xdp_q_idx = vsi->alloc_txq + i;
+		struct ice_ring_stats *ring_stats;
+		struct ice_tx_ring *xdp_ring;
+
+		xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL);
+		if (!xdp_ring)
+			goto free_xdp_rings;
+
+		ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL);
+		if (!ring_stats) {
+			ice_free_tx_ring(xdp_ring);
+			goto free_xdp_rings;
+		}
+
+		xdp_ring->ring_stats = ring_stats;
+		xdp_ring->q_index = xdp_q_idx;
+		xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx];
+		xdp_ring->vsi = vsi;
+		xdp_ring->netdev = NULL;
+		xdp_ring->dev = dev;
+		xdp_ring->count = vsi->num_tx_desc;
+		WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
+		if (ice_setup_tx_ring(xdp_ring))
+			goto free_xdp_rings;
+		ice_set_ring_xdp(xdp_ring);
+		spin_lock_init(&xdp_ring->tx_lock);
+		for (j = 0; j < xdp_ring->count; j++) {
+			tx_desc = ICE_TX_DESC(xdp_ring, j);
+			tx_desc->cmd_type_offset_bsz = 0;
+		}
+	}
+
+	return 0;
+
+free_xdp_rings:
+	for (; i >= 0; i--) {
+		if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) {
+			kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
+			vsi->xdp_rings[i]->ring_stats = NULL;
+			ice_free_tx_ring(vsi->xdp_rings[i]);
+		}
+	}
+	return -ENOMEM;
+}
+
+/**
+ * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
+ * @vsi: VSI to set the bpf prog on
+ * @prog: the bpf prog pointer
+ */
+static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	struct bpf_prog *old_prog;
+	int i;
+
+	old_prog = xchg(&vsi->xdp_prog, prog);
+	ice_for_each_rxq(vsi, i)
+		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+}
+
+/**
+ * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
+ * @vsi: VSI to bring up Tx rings used by XDP
+ * @prog: bpf program that will be assigned to VSI
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	int xdp_rings_rem = vsi->num_xdp_txq;
+	struct ice_pf *pf = vsi->back;
+	struct ice_qs_cfg xdp_qs_cfg = {
+		.qs_mutex = &pf->avail_q_mutex,
+		.pf_map = pf->avail_txqs,
+		.pf_map_size = pf->max_pf_txqs,
+		.q_count = vsi->num_xdp_txq,
+		.scatter_count = ICE_MAX_SCATTER_TXQS,
+		.vsi_map = vsi->txq_map,
+		.vsi_map_offset = vsi->alloc_txq,
+		.mapping_mode = ICE_VSI_MAP_CONTIG
+	};
+	struct device *dev;
+	int i, v_idx;
+	int status;
+
+	dev = ice_pf_to_dev(pf);
+	vsi->xdp_rings = devm_kcalloc(dev, vsi->num_xdp_txq,
+				      sizeof(*vsi->xdp_rings), GFP_KERNEL);
+	if (!vsi->xdp_rings)
+		return -ENOMEM;
+
+	vsi->xdp_mapping_mode = xdp_qs_cfg.mapping_mode;
+	if (__ice_vsi_get_qs(&xdp_qs_cfg))
+		goto err_map_xdp;
+
+	if (static_key_enabled(&ice_xdp_locking_key))
+		netdev_warn(vsi->netdev,
+			    "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n");
+
+	if (ice_xdp_alloc_setup_rings(vsi))
+		goto clear_xdp_rings;
+
+	/* follow the logic from ice_vsi_map_rings_to_vectors */
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		int xdp_rings_per_v, q_id, q_base;
+
+		xdp_rings_per_v = DIV_ROUND_UP(xdp_rings_rem,
+					       vsi->num_q_vectors - v_idx);
+		q_base = vsi->num_xdp_txq - xdp_rings_rem;
+
+		for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) {
+			struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id];
+
+			xdp_ring->q_vector = q_vector;
+			xdp_ring->next = q_vector->tx.tx_ring;
+			q_vector->tx.tx_ring = xdp_ring;
+		}
+		xdp_rings_rem -= xdp_rings_per_v;
+	}
+
+	ice_for_each_rxq(vsi, i) {
+		if (static_key_enabled(&ice_xdp_locking_key)) {
+			vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
+		} else {
+			struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
+			struct ice_tx_ring *ring;
+
+			ice_for_each_tx_ring(ring, q_vector->tx) {
+				if (ice_ring_is_xdp(ring)) {
+					vsi->rx_rings[i]->xdp_ring = ring;
+					break;
+				}
+			}
+		}
+		ice_tx_xsk_pool(vsi, i);
+	}
+
+	/* omit the scheduler update if in reset path; XDP queues will be
+	 * taken into account at the end of ice_vsi_rebuild, where
+	 * ice_cfg_vsi_lan is being called
+	 */
+	if (ice_is_reset_in_progress(pf->state))
+		return 0;
+
+	/* tell the Tx scheduler that right now we have
+	 * additional queues
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq + vsi->num_xdp_txq;
+
+	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+				 max_txqs);
+	if (status) {
+		dev_err(dev, "Failed VSI LAN queue config for XDP, error: %d\n",
+			status);
+		goto clear_xdp_rings;
+	}
+
+	/* assign the prog only when it's not already present on VSI;
+	 * this flow is a subject of both ethtool -L and ndo_bpf flows;
+	 * VSI rebuild that happens under ethtool -L can expose us to
+	 * the bpf_prog refcount issues as we would be swapping same
+	 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
+	 * on it as it would be treated as an 'old_prog'; for ndo_bpf
+	 * this is not harmful as dev_xdp_install bumps the refcount
+	 * before calling the op exposed by the driver;
+	 */
+	if (!ice_is_xdp_ena_vsi(vsi))
+		ice_vsi_assign_bpf_prog(vsi, prog);
+
+	return 0;
+clear_xdp_rings:
+	ice_for_each_xdp_txq(vsi, i)
+		if (vsi->xdp_rings[i]) {
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+err_map_xdp:
+	mutex_lock(&pf->avail_q_mutex);
+	ice_for_each_xdp_txq(vsi, i) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	devm_kfree(dev, vsi->xdp_rings);
+	return -ENOMEM;
+}
+
+/**
+ * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
+ * @vsi: VSI to remove XDP rings
+ *
+ * Detach XDP rings from irq vectors, clean up the PF bitmap and free
+ * resources
+ */
+int ice_destroy_xdp_rings(struct ice_vsi *vsi)
+{
+	u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+	struct ice_pf *pf = vsi->back;
+	int i, v_idx;
+
+	/* q_vectors are freed in reset path so there's no point in detaching
+	 * rings; in case of rebuild being triggered not from reset bits
+	 * in pf->state won't be set, so additionally check first q_vector
+	 * against NULL
+	 */
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		goto free_qmap;
+
+	ice_for_each_q_vector(vsi, v_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[v_idx];
+		struct ice_tx_ring *ring;
+
+		ice_for_each_tx_ring(ring, q_vector->tx)
+			if (!ring->tx_buf || !ice_ring_is_xdp(ring))
+				break;
+
+		/* restore the value of last node prior to XDP setup */
+		q_vector->tx.tx_ring = ring;
+	}
+
+free_qmap:
+	mutex_lock(&pf->avail_q_mutex);
+	ice_for_each_xdp_txq(vsi, i) {
+		clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs);
+		vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX;
+	}
+	mutex_unlock(&pf->avail_q_mutex);
+
+	ice_for_each_xdp_txq(vsi, i)
+		if (vsi->xdp_rings[i]) {
+			if (vsi->xdp_rings[i]->desc) {
+				synchronize_rcu();
+				ice_free_tx_ring(vsi->xdp_rings[i]);
+			}
+			kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu);
+			vsi->xdp_rings[i]->ring_stats = NULL;
+			kfree_rcu(vsi->xdp_rings[i], rcu);
+			vsi->xdp_rings[i] = NULL;
+		}
+
+	devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings);
+	vsi->xdp_rings = NULL;
+
+	if (static_key_enabled(&ice_xdp_locking_key))
+		static_branch_dec(&ice_xdp_locking_key);
+
+	if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0])
+		return 0;
+
+	ice_vsi_assign_bpf_prog(vsi, NULL);
+
+	/* notify Tx scheduler that we destroyed XDP queues and bring
+	 * back the old number of child nodes
+	 */
+	for (i = 0; i < vsi->tc_cfg.numtc; i++)
+		max_txqs[i] = vsi->num_txq;
+
+	/* change number of XDP Tx queues to 0 */
+	vsi->num_xdp_txq = 0;
+
+	return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+			       max_txqs);
+}
+
+/**
+ * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
+ * @vsi: VSI to schedule napi on
+ */
+static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi)
+{
+	int i;
+
+	ice_for_each_rxq(vsi, i) {
+		struct ice_rx_ring *rx_ring = vsi->rx_rings[i];
+
+		if (rx_ring->xsk_pool)
+			napi_schedule(&rx_ring->q_vector->napi);
+	}
+}
+
+/**
+ * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have
+ * @vsi: VSI to determine the count of XDP Tx qs
+ *
+ * returns 0 if Tx qs count is higher than at least half of CPU count,
+ * -ENOMEM otherwise
+ */
+int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
+{
+	u16 avail = ice_get_avail_txq_count(vsi->back);
+	u16 cpus = num_possible_cpus();
+
+	if (avail < cpus / 2)
+		return -ENOMEM;
+
+	vsi->num_xdp_txq = min_t(u16, avail, cpus);
+
+	if (vsi->num_xdp_txq < cpus)
+		static_branch_inc(&ice_xdp_locking_key);
+
+	return 0;
+}
+
+/**
+ * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @vsi: Pointer to VSI structure
+ */
+static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
+{
+	if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
+		return ICE_RXBUF_1664;
+	else
+		return ICE_RXBUF_3072;
+}
+
+/**
+ * ice_xdp_setup_prog - Add or remove XDP eBPF program
+ * @vsi: VSI to setup XDP for
+ * @prog: XDP program
+ * @extack: netlink extended ack
+ */
+static int
+ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
+		   struct netlink_ext_ack *extack)
+{
+	unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
+	bool if_running = netif_running(vsi->netdev);
+	int ret = 0, xdp_ring_err = 0;
+
+	if (prog && !prog->aux->xdp_has_frags) {
+		if (frame_size > ice_max_xdp_frame_size(vsi)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "MTU is too large for linear frames and XDP prog does not support frags");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	/* hot swap progs and avoid toggling link */
+	if (ice_is_xdp_ena_vsi(vsi) == !!prog) {
+		ice_vsi_assign_bpf_prog(vsi, prog);
+		return 0;
+	}
+
+	/* need to stop netdev while setting up the program for Rx rings */
+	if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+		ret = ice_down(vsi);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
+			return ret;
+		}
+	}
+
+	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
+		xdp_ring_err = ice_vsi_determine_xdp_res(vsi);
+		if (xdp_ring_err) {
+			NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP");
+		} else {
+			xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
+			if (xdp_ring_err)
+				NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
+		}
+		xdp_features_set_redirect_target(vsi->netdev, true);
+		/* reallocate Rx queues that are used for zero-copy */
+		xdp_ring_err = ice_realloc_zc_buf(vsi, true);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
+	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
+		xdp_features_clear_redirect_target(vsi->netdev);
+		xdp_ring_err = ice_destroy_xdp_rings(vsi);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
+		/* reallocate Rx queues that were used for zero-copy */
+		xdp_ring_err = ice_realloc_zc_buf(vsi, false);
+		if (xdp_ring_err)
+			NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
+	}
+
+	if (if_running)
+		ret = ice_up(vsi);
+
+	if (!ret && prog)
+		ice_vsi_rx_napi_schedule(vsi);
+
+	return (ret || xdp_ring_err) ? -ENOMEM : 0;
+}
+
+/**
+ * ice_xdp_safe_mode - XDP handler for safe mode
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
+			     struct netdev_bpf *xdp)
+{
+	NL_SET_ERR_MSG_MOD(xdp->extack,
+			   "Please provide working DDP firmware package in order to use XDP\n"
+			   "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_xdp - implements XDP handler
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (vsi->type != ICE_VSI_PF) {
+		NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
+		return -EINVAL;
+	}
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+	case XDP_SETUP_XSK_POOL:
+		return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
+					  xdp->xsk.queue_id);
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * ice_ena_misc_vector - enable the non-queue interrupts
+ * @pf: board private structure
+ */
+static void ice_ena_misc_vector(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 val;
+
+	/* Disable anti-spoof detection interrupt to prevent spurious event
+	 * interrupts during a function reset. Anti-spoof functionally is
+	 * still supported.
+	 */
+	val = rd32(hw, GL_MDCK_TX_TDPU);
+	val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M;
+	wr32(hw, GL_MDCK_TX_TDPU, val);
+
+	/* clear things first */
+	wr32(hw, PFINT_OICR_ENA, 0);	/* disable all */
+	rd32(hw, PFINT_OICR);		/* read to clear */
+
+	val = (PFINT_OICR_ECC_ERR_M |
+	       PFINT_OICR_MAL_DETECT_M |
+	       PFINT_OICR_GRST_M |
+	       PFINT_OICR_PCI_EXCEPTION_M |
+	       PFINT_OICR_VFLR_M |
+	       PFINT_OICR_HMC_ERR_M |
+	       PFINT_OICR_PE_PUSH_M |
+	       PFINT_OICR_PE_CRITERR_M);
+
+	wr32(hw, PFINT_OICR_ENA, val);
+
+	/* SW_ITR_IDX = 0, but don't change INTENA */
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
+	     GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
+}
+
+/**
+ * ice_misc_intr - misc interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
+{
+	struct ice_pf *pf = (struct ice_pf *)data;
+	struct ice_hw *hw = &pf->hw;
+	struct device *dev;
+	u32 oicr, ena_mask;
+
+	dev = ice_pf_to_dev(pf);
+	set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
+	set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
+	set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
+
+	oicr = rd32(hw, PFINT_OICR);
+	ena_mask = rd32(hw, PFINT_OICR_ENA);
+
+	if (oicr & PFINT_OICR_SWINT_M) {
+		ena_mask &= ~PFINT_OICR_SWINT_M;
+		pf->sw_int_count++;
+	}
+
+	if (oicr & PFINT_OICR_MAL_DETECT_M) {
+		ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
+		set_bit(ICE_MDD_EVENT_PENDING, pf->state);
+	}
+	if (oicr & PFINT_OICR_VFLR_M) {
+		/* disable any further VFLR event notifications */
+		if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
+			u32 reg = rd32(hw, PFINT_OICR_ENA);
+
+			reg &= ~PFINT_OICR_VFLR_M;
+			wr32(hw, PFINT_OICR_ENA, reg);
+		} else {
+			ena_mask &= ~PFINT_OICR_VFLR_M;
+			set_bit(ICE_VFLR_EVENT_PENDING, pf->state);
+		}
+	}
+
+	if (oicr & PFINT_OICR_GRST_M) {
+		u32 reset;
+
+		/* we have a reset warning */
+		ena_mask &= ~PFINT_OICR_GRST_M;
+		reset = (rd32(hw, GLGEN_RSTAT) & GLGEN_RSTAT_RESET_TYPE_M) >>
+			GLGEN_RSTAT_RESET_TYPE_S;
+
+		if (reset == ICE_RESET_CORER)
+			pf->corer_count++;
+		else if (reset == ICE_RESET_GLOBR)
+			pf->globr_count++;
+		else if (reset == ICE_RESET_EMPR)
+			pf->empr_count++;
+		else
+			dev_dbg(dev, "Invalid reset type %d\n", reset);
+
+		/* If a reset cycle isn't already in progress, we set a bit in
+		 * pf->state so that the service task can start a reset/rebuild.
+		 */
+		if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
+			if (reset == ICE_RESET_CORER)
+				set_bit(ICE_CORER_RECV, pf->state);
+			else if (reset == ICE_RESET_GLOBR)
+				set_bit(ICE_GLOBR_RECV, pf->state);
+			else
+				set_bit(ICE_EMPR_RECV, pf->state);
+
+			/* There are couple of different bits at play here.
+			 * hw->reset_ongoing indicates whether the hardware is
+			 * in reset. This is set to true when a reset interrupt
+			 * is received and set back to false after the driver
+			 * has determined that the hardware is out of reset.
+			 *
+			 * ICE_RESET_OICR_RECV in pf->state indicates
+			 * that a post reset rebuild is required before the
+			 * driver is operational again. This is set above.
+			 *
+			 * As this is the start of the reset/rebuild cycle, set
+			 * both to indicate that.
+			 */
+			hw->reset_ongoing = true;
+		}
+	}
+
+	if (oicr & PFINT_OICR_TSYN_TX_M) {
+		ena_mask &= ~PFINT_OICR_TSYN_TX_M;
+		if (!hw->reset_ongoing)
+			set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread);
+	}
+
+	if (oicr & PFINT_OICR_TSYN_EVNT_M) {
+		u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+		u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx));
+
+		ena_mask &= ~PFINT_OICR_TSYN_EVNT_M;
+
+		if (hw->func_caps.ts_func_info.src_tmr_owned) {
+			/* Save EVENTs from GLTSYN register */
+			pf->ptp.ext_ts_irq |= gltsyn_stat &
+					      (GLTSYN_STAT_EVENT0_M |
+					       GLTSYN_STAT_EVENT1_M |
+					       GLTSYN_STAT_EVENT2_M);
+
+			set_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread);
+		}
+	}
+
+#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
+	if (oicr & ICE_AUX_CRIT_ERR) {
+		pf->oicr_err_reg |= oicr;
+		set_bit(ICE_AUX_ERR_PENDING, pf->state);
+		ena_mask &= ~ICE_AUX_CRIT_ERR;
+	}
+
+	/* Report any remaining unexpected interrupts */
+	oicr &= ena_mask;
+	if (oicr) {
+		dev_dbg(dev, "unhandled interrupt oicr=0x%08x\n", oicr);
+		/* If a critical error is pending there is no choice but to
+		 * reset the device.
+		 */
+		if (oicr & (PFINT_OICR_PCI_EXCEPTION_M |
+			    PFINT_OICR_ECC_ERR_M)) {
+			set_bit(ICE_PFR_REQ, pf->state);
+		}
+	}
+
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * ice_misc_intr_thread_fn - misc interrupt thread function
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
+{
+	struct ice_pf *pf = data;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+
+	if (ice_is_reset_in_progress(pf->state))
+		return IRQ_HANDLED;
+
+	ice_service_task_schedule(pf);
+
+	if (test_and_clear_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread))
+		ice_ptp_extts_event(pf);
+
+	if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) {
+		/* Process outstanding Tx timestamps. If there is more work,
+		 * re-arm the interrupt to trigger again.
+		 */
+		if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) {
+			wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
+			ice_flush(hw);
+		}
+	}
+
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ice_dis_ctrlq_interrupts - disable control queue interrupts
+ * @hw: pointer to HW structure
+ */
+static void ice_dis_ctrlq_interrupts(struct ice_hw *hw)
+{
+	/* disable Admin queue Interrupt causes */
+	wr32(hw, PFINT_FW_CTL,
+	     rd32(hw, PFINT_FW_CTL) & ~PFINT_FW_CTL_CAUSE_ENA_M);
+
+	/* disable Mailbox queue Interrupt causes */
+	wr32(hw, PFINT_MBX_CTL,
+	     rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
+
+	wr32(hw, PFINT_SB_CTL,
+	     rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
+
+	/* disable Control queue Interrupt causes */
+	wr32(hw, PFINT_OICR_CTL,
+	     rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_free_irq_msix_misc - Unroll misc vector setup
+ * @pf: board private structure
+ */
+static void ice_free_irq_msix_misc(struct ice_pf *pf)
+{
+	int misc_irq_num = pf->oicr_irq.virq;
+	struct ice_hw *hw = &pf->hw;
+
+	ice_dis_ctrlq_interrupts(hw);
+
+	/* disable OICR interrupt */
+	wr32(hw, PFINT_OICR_ENA, 0);
+	ice_flush(hw);
+
+	synchronize_irq(misc_irq_num);
+	devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf);
+
+	ice_free_irq(pf, pf->oicr_irq);
+}
+
+/**
+ * ice_ena_ctrlq_interrupts - enable control queue interrupts
+ * @hw: pointer to HW structure
+ * @reg_idx: HW vector index to associate the control queue interrupts with
+ */
+static void ice_ena_ctrlq_interrupts(struct ice_hw *hw, u16 reg_idx)
+{
+	u32 val;
+
+	val = ((reg_idx & PFINT_OICR_CTL_MSIX_INDX_M) |
+	       PFINT_OICR_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_OICR_CTL, val);
+
+	/* enable Admin queue Interrupt causes */
+	val = ((reg_idx & PFINT_FW_CTL_MSIX_INDX_M) |
+	       PFINT_FW_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_FW_CTL, val);
+
+	/* enable Mailbox queue Interrupt causes */
+	val = ((reg_idx & PFINT_MBX_CTL_MSIX_INDX_M) |
+	       PFINT_MBX_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_MBX_CTL, val);
+
+	/* This enables Sideband queue Interrupt causes */
+	val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) |
+	       PFINT_SB_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_SB_CTL, val);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0, which is used to manage the
+ * non-queue interrupts, e.g. AdminQ and errors. This is not used
+ * when in MSI or Legacy interrupt mode.
+ */
+static int ice_req_irq_msix_misc(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct msi_map oicr_irq;
+	int err = 0;
+
+	if (!pf->int_name[0])
+		snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc",
+			 dev_driver_string(dev), dev_name(dev));
+
+	/* Do not request IRQ but do enable OICR interrupt since settings are
+	 * lost during reset. Note that this function is called only during
+	 * rebuild path and not while reset is in progress.
+	 */
+	if (ice_is_reset_in_progress(pf->state))
+		goto skip_req_irq;
+
+	/* reserve one vector in irq_tracker for misc interrupts */
+	oicr_irq = ice_alloc_irq(pf, false);
+	if (oicr_irq.index < 0)
+		return oicr_irq.index;
+
+	pf->oicr_irq = oicr_irq;
+	err = devm_request_threaded_irq(dev, pf->oicr_irq.virq, ice_misc_intr,
+					ice_misc_intr_thread_fn, 0,
+					pf->int_name, pf);
+	if (err) {
+		dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n",
+			pf->int_name, err);
+		ice_free_irq(pf, pf->oicr_irq);
+		return err;
+	}
+
+skip_req_irq:
+	ice_ena_misc_vector(pf);
+
+	ice_ena_ctrlq_interrupts(hw, pf->oicr_irq.index);
+	wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index),
+	     ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
+
+	ice_flush(hw);
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+
+	return 0;
+}
+
+/**
+ * ice_napi_add - register NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be registered
+ *
+ * This function is only called in the driver's load path. Registering the NAPI
+ * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
+ * reset/rebuild, etc.)
+ */
+static void ice_napi_add(struct ice_vsi *vsi)
+{
+	int v_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, v_idx)
+		netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
+			       ice_napi_poll);
+}
+
+/**
+ * ice_set_ops - set netdev and ethtools ops for the given netdev
+ * @vsi: the VSI associated with the new netdev
+ */
+static void ice_set_ops(struct ice_vsi *vsi)
+{
+	struct net_device *netdev = vsi->netdev;
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+
+	if (ice_is_safe_mode(pf)) {
+		netdev->netdev_ops = &ice_netdev_safe_mode_ops;
+		ice_set_ethtool_safe_mode_ops(netdev);
+		return;
+	}
+
+	netdev->netdev_ops = &ice_netdev_ops;
+	netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
+	ice_set_ethtool_ops(netdev);
+
+	if (vsi->type != ICE_VSI_PF)
+		return;
+
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+			       NETDEV_XDP_ACT_XSK_ZEROCOPY |
+			       NETDEV_XDP_ACT_RX_SG;
+	netdev->xdp_zc_max_segs = ICE_MAX_BUF_TXD;
+}
+
+/**
+ * ice_set_netdev_features - set features for the given netdev
+ * @netdev: netdev instance
+ */
+static void ice_set_netdev_features(struct net_device *netdev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	bool is_dvm_ena = ice_is_dvm_ena(&pf->hw);
+	netdev_features_t csumo_features;
+	netdev_features_t vlano_features;
+	netdev_features_t dflt_features;
+	netdev_features_t tso_features;
+
+	if (ice_is_safe_mode(pf)) {
+		/* safe mode */
+		netdev->features = NETIF_F_SG | NETIF_F_HIGHDMA;
+		netdev->hw_features = netdev->features;
+		return;
+	}
+
+	dflt_features = NETIF_F_SG	|
+			NETIF_F_HIGHDMA	|
+			NETIF_F_NTUPLE	|
+			NETIF_F_RXHASH;
+
+	csumo_features = NETIF_F_RXCSUM	  |
+			 NETIF_F_IP_CSUM  |
+			 NETIF_F_SCTP_CRC |
+			 NETIF_F_IPV6_CSUM;
+
+	vlano_features = NETIF_F_HW_VLAN_CTAG_FILTER |
+			 NETIF_F_HW_VLAN_CTAG_TX     |
+			 NETIF_F_HW_VLAN_CTAG_RX;
+
+	/* Enable CTAG/STAG filtering by default in Double VLAN Mode (DVM) */
+	if (is_dvm_ena)
+		vlano_features |= NETIF_F_HW_VLAN_STAG_FILTER;
+
+	tso_features = NETIF_F_TSO			|
+		       NETIF_F_TSO_ECN			|
+		       NETIF_F_TSO6			|
+		       NETIF_F_GSO_GRE			|
+		       NETIF_F_GSO_UDP_TUNNEL		|
+		       NETIF_F_GSO_GRE_CSUM		|
+		       NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+		       NETIF_F_GSO_PARTIAL		|
+		       NETIF_F_GSO_IPXIP4		|
+		       NETIF_F_GSO_IPXIP6		|
+		       NETIF_F_GSO_UDP_L4;
+
+	netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
+					NETIF_F_GSO_GRE_CSUM;
+	/* set features that user can change */
+	netdev->hw_features = dflt_features | csumo_features |
+			      vlano_features | tso_features;
+
+	/* add support for HW_CSUM on packets with MPLS header */
+	netdev->mpls_features =  NETIF_F_HW_CSUM |
+				 NETIF_F_TSO     |
+				 NETIF_F_TSO6;
+
+	/* enable features */
+	netdev->features |= netdev->hw_features;
+
+	netdev->hw_features |= NETIF_F_HW_TC;
+	netdev->hw_features |= NETIF_F_LOOPBACK;
+
+	/* encap and VLAN devices inherit default, csumo and tso features */
+	netdev->hw_enc_features |= dflt_features | csumo_features |
+				   tso_features;
+	netdev->vlan_features |= dflt_features | csumo_features |
+				 tso_features;
+
+	/* advertise support but don't enable by default since only one type of
+	 * VLAN offload can be enabled at a time (i.e. CTAG or STAG). When one
+	 * type turns on the other has to be turned off. This is enforced by the
+	 * ice_fix_features() ndo callback.
+	 */
+	if (is_dvm_ena)
+		netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX |
+			NETIF_F_HW_VLAN_STAG_TX;
+
+	/* Leave CRC / FCS stripping enabled by default, but allow the value to
+	 * be changed at runtime
+	 */
+	netdev->hw_features |= NETIF_F_RXFCS;
+
+	netif_set_tso_max_size(netdev, ICE_MAX_TSO_SIZE);
+}
+
+/**
+ * ice_fill_rss_lut - Fill the RSS lookup table with default values
+ * @lut: Lookup table
+ * @rss_table_size: Lookup table size
+ * @rss_size: Range of queue number for hashing
+ */
+void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
+{
+	u16 i;
+
+	for (i = 0; i < rss_table_size; i++)
+		lut[i] = i % rss_size;
+}
+
+/**
+ * ice_pf_vsi_setup - Set up a PF VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	struct ice_vsi_cfg_params params = {};
+
+	params.type = ICE_VSI_PF;
+	params.pi = pi;
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	return ice_vsi_setup(pf, &params);
+}
+
+static struct ice_vsi *
+ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
+		   struct ice_channel *ch)
+{
+	struct ice_vsi_cfg_params params = {};
+
+	params.type = ICE_VSI_CHNL;
+	params.pi = pi;
+	params.ch = ch;
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	return ice_vsi_setup(pf, &params);
+}
+
+/**
+ * ice_ctrl_vsi_setup - Set up a control VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	struct ice_vsi_cfg_params params = {};
+
+	params.type = ICE_VSI_CTRL;
+	params.pi = pi;
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	return ice_vsi_setup(pf, &params);
+}
+
+/**
+ * ice_lb_vsi_setup - Set up a loopback VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+struct ice_vsi *
+ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	struct ice_vsi_cfg_params params = {};
+
+	params.type = ICE_VSI_LB;
+	params.pi = pi;
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	return ice_vsi_setup(pf, &params);
+}
+
+/**
+ * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: VLAN TPID
+ * @vid: VLAN ID to be added
+ *
+ * net_device_ops implementation for adding VLAN IDs
+ */
+static int
+ice_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi_vlan_ops *vlan_ops;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_vlan vlan;
+	int ret;
+
+	/* VLAN 0 is added by default during load/reset */
+	if (!vid)
+		return 0;
+
+	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+		usleep_range(1000, 2000);
+
+	/* Add multicast promisc rule for the VLAN ID to be added if
+	 * all-multicast is currently enabled.
+	 */
+	if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+		ret = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+					       ICE_MCAST_VLAN_PROMISC_BITS,
+					       vid);
+		if (ret)
+			goto finish;
+	}
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+	/* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
+	 * packets aren't pruned by the device's internal switch on Rx
+	 */
+	vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
+	ret = vlan_ops->add_vlan(vsi, &vlan);
+	if (ret)
+		goto finish;
+
+	/* If all-multicast is currently enabled and this VLAN ID is only one
+	 * besides VLAN-0 we have to update look-up type of multicast promisc
+	 * rule for VLAN-0 from ICE_SW_LKUP_PROMISC to ICE_SW_LKUP_PROMISC_VLAN.
+	 */
+	if ((vsi->current_netdev_flags & IFF_ALLMULTI) &&
+	    ice_vsi_num_non_zero_vlans(vsi) == 1) {
+		ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+					   ICE_MCAST_PROMISC_BITS, 0);
+		ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+					 ICE_MCAST_VLAN_PROMISC_BITS, 0);
+	}
+
+finish:
+	clear_bit(ICE_CFG_BUSY, vsi->state);
+
+	return ret;
+}
+
+/**
+ * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
+ * @netdev: network interface to be adjusted
+ * @proto: VLAN TPID
+ * @vid: VLAN ID to be removed
+ *
+ * net_device_ops implementation for removing VLAN IDs
+ */
+static int
+ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi_vlan_ops *vlan_ops;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_vlan vlan;
+	int ret;
+
+	/* don't allow removal of VLAN 0 */
+	if (!vid)
+		return 0;
+
+	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state))
+		usleep_range(1000, 2000);
+
+	ret = ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+				    ICE_MCAST_VLAN_PROMISC_BITS, vid);
+	if (ret) {
+		netdev_err(netdev, "Error clearing multicast promiscuous mode on VSI %i\n",
+			   vsi->vsi_num);
+		vsi->current_netdev_flags |= IFF_ALLMULTI;
+	}
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+	/* Make sure VLAN delete is successful before updating VLAN
+	 * information
+	 */
+	vlan = ICE_VLAN(be16_to_cpu(proto), vid, 0);
+	ret = vlan_ops->del_vlan(vsi, &vlan);
+	if (ret)
+		goto finish;
+
+	/* Remove multicast promisc rule for the removed VLAN ID if
+	 * all-multicast is enabled.
+	 */
+	if (vsi->current_netdev_flags & IFF_ALLMULTI)
+		ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+					   ICE_MCAST_VLAN_PROMISC_BITS, vid);
+
+	if (!ice_vsi_has_non_zero_vlans(vsi)) {
+		/* Update look-up type of multicast promisc rule for VLAN 0
+		 * from ICE_SW_LKUP_PROMISC_VLAN to ICE_SW_LKUP_PROMISC when
+		 * all-multicast is enabled and VLAN 0 is the only VLAN rule.
+		 */
+		if (vsi->current_netdev_flags & IFF_ALLMULTI) {
+			ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx,
+						   ICE_MCAST_VLAN_PROMISC_BITS,
+						   0);
+			ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx,
+						 ICE_MCAST_PROMISC_BITS, 0);
+		}
+	}
+
+finish:
+	clear_bit(ICE_CFG_BUSY, vsi->state);
+
+	return ret;
+}
+
+/**
+ * ice_rep_indr_tc_block_unbind
+ * @cb_priv: indirection block private data
+ */
+static void ice_rep_indr_tc_block_unbind(void *cb_priv)
+{
+	struct ice_indr_block_priv *indr_priv = cb_priv;
+
+	list_del(&indr_priv->list);
+	kfree(indr_priv);
+}
+
+/**
+ * ice_tc_indir_block_unregister - Unregister TC indirect block notifications
+ * @vsi: VSI struct which has the netdev
+ */
+static void ice_tc_indir_block_unregister(struct ice_vsi *vsi)
+{
+	struct ice_netdev_priv *np = netdev_priv(vsi->netdev);
+
+	flow_indr_dev_unregister(ice_indr_setup_tc_cb, np,
+				 ice_rep_indr_tc_block_unbind);
+}
+
+/**
+ * ice_tc_indir_block_register - Register TC indirect block notifications
+ * @vsi: VSI struct which has the netdev
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_tc_indir_block_register(struct ice_vsi *vsi)
+{
+	struct ice_netdev_priv *np;
+
+	if (!vsi || !vsi->netdev)
+		return -EINVAL;
+
+	np = netdev_priv(vsi->netdev);
+
+	INIT_LIST_HEAD(&np->tc_indr_block_priv_list);
+	return flow_indr_dev_register(ice_indr_setup_tc_cb, np);
+}
+
+/**
+ * ice_get_avail_q_count - Get count of queues in use
+ * @pf_qmap: bitmap to get queue use count from
+ * @lock: pointer to a mutex that protects access to pf_qmap
+ * @size: size of the bitmap
+ */
+static u16
+ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
+{
+	unsigned long bit;
+	u16 count = 0;
+
+	mutex_lock(lock);
+	for_each_clear_bit(bit, pf_qmap, size)
+		count++;
+	mutex_unlock(lock);
+
+	return count;
+}
+
+/**
+ * ice_get_avail_txq_count - Get count of Tx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_txq_count(struct ice_pf *pf)
+{
+	return ice_get_avail_q_count(pf->avail_txqs, &pf->avail_q_mutex,
+				     pf->max_pf_txqs);
+}
+
+/**
+ * ice_get_avail_rxq_count - Get count of Rx queues in use
+ * @pf: pointer to an ice_pf instance
+ */
+u16 ice_get_avail_rxq_count(struct ice_pf *pf)
+{
+	return ice_get_avail_q_count(pf->avail_rxqs, &pf->avail_q_mutex,
+				     pf->max_pf_rxqs);
+}
+
+/**
+ * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
+ * @pf: board private structure to initialize
+ */
+static void ice_deinit_pf(struct ice_pf *pf)
+{
+	ice_service_task_stop(pf);
+	mutex_destroy(&pf->lag_mutex);
+	mutex_destroy(&pf->adev_mutex);
+	mutex_destroy(&pf->sw_mutex);
+	mutex_destroy(&pf->tc_mutex);
+	mutex_destroy(&pf->avail_q_mutex);
+	mutex_destroy(&pf->vfs.table_lock);
+
+	if (pf->avail_txqs) {
+		bitmap_free(pf->avail_txqs);
+		pf->avail_txqs = NULL;
+	}
+
+	if (pf->avail_rxqs) {
+		bitmap_free(pf->avail_rxqs);
+		pf->avail_rxqs = NULL;
+	}
+
+	if (pf->ptp.clock)
+		ptp_clock_unregister(pf->ptp.clock);
+}
+
+/**
+ * ice_set_pf_caps - set PFs capability flags
+ * @pf: pointer to the PF instance
+ */
+static void ice_set_pf_caps(struct ice_pf *pf)
+{
+	struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
+
+	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+	if (func_caps->common_cap.rdma)
+		set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
+	clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+	if (func_caps->common_cap.dcb)
+		set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+	clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+	if (func_caps->common_cap.sr_iov_1_1) {
+		set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags);
+		pf->vfs.num_supported = min_t(int, func_caps->num_allocd_vfs,
+					      ICE_MAX_SRIOV_VFS);
+	}
+	clear_bit(ICE_FLAG_RSS_ENA, pf->flags);
+	if (func_caps->common_cap.rss_table_size)
+		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
+
+	clear_bit(ICE_FLAG_FD_ENA, pf->flags);
+	if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) {
+		u16 unused;
+
+		/* ctrl_vsi_idx will be set to a valid value when flow director
+		 * is setup by ice_init_fdir
+		 */
+		pf->ctrl_vsi_idx = ICE_NO_VSI;
+		set_bit(ICE_FLAG_FD_ENA, pf->flags);
+		/* force guaranteed filter pool for PF */
+		ice_alloc_fd_guar_item(&pf->hw, &unused,
+				       func_caps->fd_fltr_guar);
+		/* force shared filter pool for PF */
+		ice_alloc_fd_shrd_item(&pf->hw, &unused,
+				       func_caps->fd_fltr_best_effort);
+	}
+
+	clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
+	if (func_caps->common_cap.ieee_1588)
+		set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
+
+	pf->max_pf_txqs = func_caps->common_cap.num_txq;
+	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
+}
+
+/**
+ * ice_init_pf - Initialize general software structures (struct ice_pf)
+ * @pf: board private structure to initialize
+ */
+static int ice_init_pf(struct ice_pf *pf)
+{
+	ice_set_pf_caps(pf);
+
+	mutex_init(&pf->sw_mutex);
+	mutex_init(&pf->tc_mutex);
+	mutex_init(&pf->adev_mutex);
+	mutex_init(&pf->lag_mutex);
+
+	INIT_HLIST_HEAD(&pf->aq_wait_list);
+	spin_lock_init(&pf->aq_wait_lock);
+	init_waitqueue_head(&pf->aq_wait_queue);
+
+	init_waitqueue_head(&pf->reset_wait_queue);
+
+	/* setup service timer and periodic service task */
+	timer_setup(&pf->serv_tmr, ice_service_timer, 0);
+	pf->serv_tmr_period = HZ;
+	INIT_WORK(&pf->serv_task, ice_service_task);
+	clear_bit(ICE_SERVICE_SCHED, pf->state);
+
+	mutex_init(&pf->avail_q_mutex);
+	pf->avail_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL);
+	if (!pf->avail_txqs)
+		return -ENOMEM;
+
+	pf->avail_rxqs = bitmap_zalloc(pf->max_pf_rxqs, GFP_KERNEL);
+	if (!pf->avail_rxqs) {
+		bitmap_free(pf->avail_txqs);
+		pf->avail_txqs = NULL;
+		return -ENOMEM;
+	}
+
+	mutex_init(&pf->vfs.table_lock);
+	hash_init(pf->vfs.table);
+	ice_mbx_init_snapshot(&pf->hw);
+
+	return 0;
+}
+
+/**
+ * ice_is_wol_supported - check if WoL is supported
+ * @hw: pointer to hardware info
+ *
+ * Check if WoL is supported based on the HW configuration.
+ * Returns true if NVM supports and enables WoL for this port, false otherwise
+ */
+bool ice_is_wol_supported(struct ice_hw *hw)
+{
+	u16 wol_ctrl;
+
+	/* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
+	 * word) indicates WoL is not supported on the corresponding PF ID.
+	 */
+	if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
+		return false;
+
+	return !(BIT(hw->port_info->lport) & wol_ctrl);
+}
+
+/**
+ * ice_vsi_recfg_qs - Change the number of queues on a VSI
+ * @vsi: VSI being changed
+ * @new_rx: new number of Rx queues
+ * @new_tx: new number of Tx queues
+ * @locked: is adev device_lock held
+ *
+ * Only change the number of queues if new_tx, or new_rx is non-0.
+ *
+ * Returns 0 on success.
+ */
+int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked)
+{
+	struct ice_pf *pf = vsi->back;
+	int err = 0, timeout = 50;
+
+	if (!new_rx && !new_tx)
+		return -EINVAL;
+
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+
+	if (new_tx)
+		vsi->req_txq = (u16)new_tx;
+	if (new_rx)
+		vsi->req_rxq = (u16)new_rx;
+
+	/* set for the next time the netdev is started */
+	if (!netif_running(vsi->netdev)) {
+		ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+		dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n");
+		goto done;
+	}
+
+	ice_vsi_close(vsi);
+	ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+	ice_pf_dcb_recfg(pf, locked);
+	ice_vsi_open(vsi);
+done:
+	clear_bit(ICE_CFG_BUSY, pf->state);
+	return err;
+}
+
+/**
+ * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
+ * @pf: PF to configure
+ *
+ * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
+ * VSI can still Tx/Rx VLAN tagged packets.
+ */
+static void ice_set_safe_mode_vlan_cfg(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
+	struct ice_vsi_ctx *ctxt;
+	struct ice_hw *hw;
+	int status;
+
+	if (!vsi)
+		return;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return;
+
+	hw = &pf->hw;
+	ctxt->info = vsi->info;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+			    ICE_AQ_VSI_PROP_SECURITY_VALID |
+			    ICE_AQ_VSI_PROP_SW_VALID);
+
+	/* disable VLAN anti-spoof */
+	ctxt->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+				  ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+
+	/* disable VLAN pruning and keep all other settings */
+	ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+	/* allow all VLANs on Tx and don't strip on Rx */
+	ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL |
+		ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+
+	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to update VSI for safe mode VLANs, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+	} else {
+		vsi->info.sec_flags = ctxt->info.sec_flags;
+		vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+		vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+	}
+
+	kfree(ctxt);
+}
+
+/**
+ * ice_log_pkg_init - log result of DDP package load
+ * @hw: pointer to hardware info
+ * @state: state of package load
+ */
+static void ice_log_pkg_init(struct ice_hw *hw, enum ice_ddp_state state)
+{
+	struct ice_pf *pf = hw->back;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	switch (state) {
+	case ICE_DDP_PKG_SUCCESS:
+		dev_info(dev, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
+			 hw->active_pkg_name,
+			 hw->active_pkg_ver.major,
+			 hw->active_pkg_ver.minor,
+			 hw->active_pkg_ver.update,
+			 hw->active_pkg_ver.draft);
+		break;
+	case ICE_DDP_PKG_SAME_VERSION_ALREADY_LOADED:
+		dev_info(dev, "DDP package already present on device: %s version %d.%d.%d.%d\n",
+			 hw->active_pkg_name,
+			 hw->active_pkg_ver.major,
+			 hw->active_pkg_ver.minor,
+			 hw->active_pkg_ver.update,
+			 hw->active_pkg_ver.draft);
+		break;
+	case ICE_DDP_PKG_ALREADY_LOADED_NOT_SUPPORTED:
+		dev_err(dev, "The device has a DDP package that is not supported by the driver.  The device has package '%s' version %d.%d.x.x.  The driver requires version %d.%d.x.x.  Entering Safe Mode.\n",
+			hw->active_pkg_name,
+			hw->active_pkg_ver.major,
+			hw->active_pkg_ver.minor,
+			ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+		break;
+	case ICE_DDP_PKG_COMPATIBLE_ALREADY_LOADED:
+		dev_info(dev, "The driver could not load the DDP package file because a compatible DDP package is already present on the device.  The device has package '%s' version %d.%d.%d.%d.  The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
+			 hw->active_pkg_name,
+			 hw->active_pkg_ver.major,
+			 hw->active_pkg_ver.minor,
+			 hw->active_pkg_ver.update,
+			 hw->active_pkg_ver.draft,
+			 hw->pkg_name,
+			 hw->pkg_ver.major,
+			 hw->pkg_ver.minor,
+			 hw->pkg_ver.update,
+			 hw->pkg_ver.draft);
+		break;
+	case ICE_DDP_PKG_FW_MISMATCH:
+		dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
+		break;
+	case ICE_DDP_PKG_INVALID_FILE:
+		dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
+		break;
+	case ICE_DDP_PKG_FILE_VERSION_TOO_HIGH:
+		dev_err(dev, "The DDP package file version is higher than the driver supports.  Please use an updated driver.  Entering Safe Mode.\n");
+		break;
+	case ICE_DDP_PKG_FILE_VERSION_TOO_LOW:
+		dev_err(dev, "The DDP package file version is lower than the driver supports.  The driver requires version %d.%d.x.x.  Please use an updated DDP Package file.  Entering Safe Mode.\n",
+			ICE_PKG_SUPP_VER_MAJ, ICE_PKG_SUPP_VER_MNR);
+		break;
+	case ICE_DDP_PKG_FILE_SIGNATURE_INVALID:
+		dev_err(dev, "The DDP package could not be loaded because its signature is not valid.  Please use a valid DDP Package.  Entering Safe Mode.\n");
+		break;
+	case ICE_DDP_PKG_FILE_REVISION_TOO_LOW:
+		dev_err(dev, "The DDP Package could not be loaded because its security revision is too low.  Please use an updated DDP Package.  Entering Safe Mode.\n");
+		break;
+	case ICE_DDP_PKG_LOAD_ERROR:
+		dev_err(dev, "An error occurred on the device while loading the DDP package.  The device will be reset.\n");
+		/* poll for reset to complete */
+		if (ice_check_reset(hw))
+			dev_err(dev, "Error resetting device. Please reload the driver\n");
+		break;
+	case ICE_DDP_PKG_ERR:
+	default:
+		dev_err(dev, "An unknown error occurred when loading the DDP package.  Entering Safe Mode.\n");
+		break;
+	}
+}
+
+/**
+ * ice_load_pkg - load/reload the DDP Package file
+ * @firmware: firmware structure when firmware requested or NULL for reload
+ * @pf: pointer to the PF instance
+ *
+ * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
+ * initialize HW tables.
+ */
+static void
+ice_load_pkg(const struct firmware *firmware, struct ice_pf *pf)
+{
+	enum ice_ddp_state state = ICE_DDP_PKG_ERR;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+
+	/* Load DDP Package */
+	if (firmware && !hw->pkg_copy) {
+		state = ice_copy_and_init_pkg(hw, firmware->data,
+					      firmware->size);
+		ice_log_pkg_init(hw, state);
+	} else if (!firmware && hw->pkg_copy) {
+		/* Reload package during rebuild after CORER/GLOBR reset */
+		state = ice_init_pkg(hw, hw->pkg_copy, hw->pkg_size);
+		ice_log_pkg_init(hw, state);
+	} else {
+		dev_err(dev, "The DDP package file failed to load. Entering Safe Mode.\n");
+	}
+
+	if (!ice_is_init_pkg_successful(state)) {
+		/* Safe Mode */
+		clear_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+		return;
+	}
+
+	/* Successful download package is the precondition for advanced
+	 * features, hence setting the ICE_FLAG_ADV_FEATURES flag
+	 */
+	set_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
+}
+
+/**
+ * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
+ * @pf: pointer to the PF structure
+ *
+ * There is no error returned here because the driver should be able to handle
+ * 128 Byte cache lines, so we only print a warning in case issues are seen,
+ * specifically with Tx.
+ */
+static void ice_verify_cacheline_size(struct ice_pf *pf)
+{
+	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
+		dev_warn(ice_pf_to_dev(pf), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
+			 ICE_CACHE_LINE_BYTES);
+}
+
+/**
+ * ice_send_version - update firmware with driver version
+ * @pf: PF struct
+ *
+ * Returns 0 on success, else error code
+ */
+static int ice_send_version(struct ice_pf *pf)
+{
+	struct ice_driver_ver dv;
+
+	dv.major_ver = 0xff;
+	dv.minor_ver = 0xff;
+	dv.build_ver = 0xff;
+	dv.subbuild_ver = 0;
+	strscpy((char *)dv.driver_string, UTS_RELEASE,
+		sizeof(dv.driver_string));
+	return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
+}
+
+/**
+ * ice_init_fdir - Initialize flow director VSI and configuration
+ * @pf: pointer to the PF instance
+ *
+ * returns 0 on success, negative on error
+ */
+static int ice_init_fdir(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *ctrl_vsi;
+	int err;
+
+	/* Side Band Flow Director needs to have a control VSI.
+	 * Allocate it and store it in the PF.
+	 */
+	ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info);
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "could not create control VSI\n");
+		return -ENOMEM;
+	}
+
+	err = ice_vsi_open_ctrl(ctrl_vsi);
+	if (err) {
+		dev_dbg(dev, "could not open control VSI\n");
+		goto err_vsi_open;
+	}
+
+	mutex_init(&pf->hw.fdir_fltr_lock);
+
+	err = ice_fdir_create_dflt_rules(pf);
+	if (err)
+		goto err_fdir_rule;
+
+	return 0;
+
+err_fdir_rule:
+	ice_fdir_release_flows(&pf->hw);
+	ice_vsi_close(ctrl_vsi);
+err_vsi_open:
+	ice_vsi_release(ctrl_vsi);
+	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
+		pf->vsi[pf->ctrl_vsi_idx] = NULL;
+		pf->ctrl_vsi_idx = ICE_NO_VSI;
+	}
+	return err;
+}
+
+static void ice_deinit_fdir(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = ice_get_ctrl_vsi(pf);
+
+	if (!vsi)
+		return;
+
+	ice_vsi_manage_fdir(vsi, false);
+	ice_vsi_release(vsi);
+	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
+		pf->vsi[pf->ctrl_vsi_idx] = NULL;
+		pf->ctrl_vsi_idx = ICE_NO_VSI;
+	}
+
+	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
+}
+
+/**
+ * ice_get_opt_fw_name - return optional firmware file name or NULL
+ * @pf: pointer to the PF instance
+ */
+static char *ice_get_opt_fw_name(struct ice_pf *pf)
+{
+	/* Optional firmware name same as default with additional dash
+	 * followed by a EUI-64 identifier (PCIe Device Serial Number)
+	 */
+	struct pci_dev *pdev = pf->pdev;
+	char *opt_fw_filename;
+	u64 dsn;
+
+	/* Determine the name of the optional file using the DSN (two
+	 * dwords following the start of the DSN Capability).
+	 */
+	dsn = pci_get_dsn(pdev);
+	if (!dsn)
+		return NULL;
+
+	opt_fw_filename = kzalloc(NAME_MAX, GFP_KERNEL);
+	if (!opt_fw_filename)
+		return NULL;
+
+	snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg",
+		 ICE_DDP_PKG_PATH, dsn);
+
+	return opt_fw_filename;
+}
+
+/**
+ * ice_request_fw - Device initialization routine
+ * @pf: pointer to the PF instance
+ */
+static void ice_request_fw(struct ice_pf *pf)
+{
+	char *opt_fw_filename = ice_get_opt_fw_name(pf);
+	const struct firmware *firmware = NULL;
+	struct device *dev = ice_pf_to_dev(pf);
+	int err = 0;
+
+	/* optional device-specific DDP (if present) overrides the default DDP
+	 * package file. kernel logs a debug message if the file doesn't exist,
+	 * and warning messages for other errors.
+	 */
+	if (opt_fw_filename) {
+		err = firmware_request_nowarn(&firmware, opt_fw_filename, dev);
+		if (err) {
+			kfree(opt_fw_filename);
+			goto dflt_pkg_load;
+		}
+
+		/* request for firmware was successful. Download to device */
+		ice_load_pkg(firmware, pf);
+		kfree(opt_fw_filename);
+		release_firmware(firmware);
+		return;
+	}
+
+dflt_pkg_load:
+	err = request_firmware(&firmware, ICE_DDP_PKG_FILE, dev);
+	if (err) {
+		dev_err(dev, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
+		return;
+	}
+
+	/* request for firmware was successful. Download to device */
+	ice_load_pkg(firmware, pf);
+	release_firmware(firmware);
+}
+
+/**
+ * ice_print_wake_reason - show the wake up cause in the log
+ * @pf: pointer to the PF struct
+ */
+static void ice_print_wake_reason(struct ice_pf *pf)
+{
+	u32 wus = pf->wakeup_reason;
+	const char *wake_str;
+
+	/* if no wake event, nothing to print */
+	if (!wus)
+		return;
+
+	if (wus & PFPM_WUS_LNKC_M)
+		wake_str = "Link\n";
+	else if (wus & PFPM_WUS_MAG_M)
+		wake_str = "Magic Packet\n";
+	else if (wus & PFPM_WUS_MNG_M)
+		wake_str = "Management\n";
+	else if (wus & PFPM_WUS_FW_RST_WK_M)
+		wake_str = "Firmware Reset\n";
+	else
+		wake_str = "Unknown\n";
+
+	dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
+}
+
+/**
+ * ice_register_netdev - register netdev
+ * @vsi: pointer to the VSI struct
+ */
+static int ice_register_netdev(struct ice_vsi *vsi)
+{
+	int err;
+
+	if (!vsi || !vsi->netdev)
+		return -EIO;
+
+	err = register_netdev(vsi->netdev);
+	if (err)
+		return err;
+
+	set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+	netif_carrier_off(vsi->netdev);
+	netif_tx_stop_all_queues(vsi->netdev);
+
+	return 0;
+}
+
+static void ice_unregister_netdev(struct ice_vsi *vsi)
+{
+	if (!vsi || !vsi->netdev)
+		return;
+
+	unregister_netdev(vsi->netdev);
+	clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
+}
+
+/**
+ * ice_cfg_netdev - Allocate, configure and register a netdev
+ * @vsi: the VSI associated with the new netdev
+ *
+ * Returns 0 on success, negative value on failure
+ */
+static int ice_cfg_netdev(struct ice_vsi *vsi)
+{
+	struct ice_netdev_priv *np;
+	struct net_device *netdev;
+	u8 mac_addr[ETH_ALEN];
+
+	netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
+				    vsi->alloc_rxq);
+	if (!netdev)
+		return -ENOMEM;
+
+	set_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+	vsi->netdev = netdev;
+	np = netdev_priv(netdev);
+	np->vsi = vsi;
+
+	ice_set_netdev_features(netdev);
+	ice_set_ops(vsi);
+
+	if (vsi->type == ICE_VSI_PF) {
+		SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
+		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+		eth_hw_addr_set(netdev, mac_addr);
+	}
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	/* Setup netdev TC information */
+	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
+
+	netdev->max_mtu = ICE_MAX_MTU;
+
+	return 0;
+}
+
+static void ice_decfg_netdev(struct ice_vsi *vsi)
+{
+	clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
+	free_netdev(vsi->netdev);
+	vsi->netdev = NULL;
+}
+
+static int ice_start_eth(struct ice_vsi *vsi)
+{
+	int err;
+
+	err = ice_init_mac_fltr(vsi->back);
+	if (err)
+		return err;
+
+	err = ice_vsi_open(vsi);
+	if (err)
+		ice_fltr_remove_all(vsi);
+
+	return err;
+}
+
+static void ice_stop_eth(struct ice_vsi *vsi)
+{
+	ice_fltr_remove_all(vsi);
+	ice_vsi_close(vsi);
+}
+
+static int ice_init_eth(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
+	int err;
+
+	if (!vsi)
+		return -EINVAL;
+
+	/* init channel list */
+	INIT_LIST_HEAD(&vsi->ch_list);
+
+	err = ice_cfg_netdev(vsi);
+	if (err)
+		return err;
+	/* Setup DCB netlink interface */
+	ice_dcbnl_setup(vsi);
+
+	err = ice_init_mac_fltr(pf);
+	if (err)
+		goto err_init_mac_fltr;
+
+	err = ice_devlink_create_pf_port(pf);
+	if (err)
+		goto err_devlink_create_pf_port;
+
+	SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
+
+	err = ice_register_netdev(vsi);
+	if (err)
+		goto err_register_netdev;
+
+	err = ice_tc_indir_block_register(vsi);
+	if (err)
+		goto err_tc_indir_block_register;
+
+	ice_napi_add(vsi);
+
+	return 0;
+
+err_tc_indir_block_register:
+	ice_unregister_netdev(vsi);
+err_register_netdev:
+	ice_devlink_destroy_pf_port(pf);
+err_devlink_create_pf_port:
+err_init_mac_fltr:
+	ice_decfg_netdev(vsi);
+	return err;
+}
+
+static void ice_deinit_eth(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
+
+	if (!vsi)
+		return;
+
+	ice_vsi_close(vsi);
+	ice_unregister_netdev(vsi);
+	ice_devlink_destroy_pf_port(pf);
+	ice_tc_indir_block_unregister(vsi);
+	ice_decfg_netdev(vsi);
+}
+
+/**
+ * ice_wait_for_fw - wait for full FW readiness
+ * @hw: pointer to the hardware structure
+ * @timeout: milliseconds that can elapse before timing out
+ */
+static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout)
+{
+	int fw_loading;
+	u32 elapsed = 0;
+
+	while (elapsed <= timeout) {
+		fw_loading = rd32(hw, GL_MNG_FWSM) & GL_MNG_FWSM_FW_LOADING_M;
+
+		/* firmware was not yet loaded, we have to wait more */
+		if (fw_loading) {
+			elapsed += 100;
+			msleep(100);
+			continue;
+		}
+		return 0;
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int ice_init_dev(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	err = ice_init_hw(hw);
+	if (err) {
+		dev_err(dev, "ice_init_hw failed: %d\n", err);
+		return err;
+	}
+
+	/* Some cards require longer initialization times
+	 * due to necessity of loading FW from an external source.
+	 * This can take even half a minute.
+	 */
+	if (ice_is_pf_c827(hw)) {
+		err = ice_wait_for_fw(hw, 30000);
+		if (err) {
+			dev_err(dev, "ice_wait_for_fw timed out");
+			return err;
+		}
+	}
+
+	ice_init_feature_support(pf);
+
+	ice_request_fw(pf);
+
+	/* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be
+	 * set in pf->state, which will cause ice_is_safe_mode to return
+	 * true
+	 */
+	if (ice_is_safe_mode(pf)) {
+		/* we already got function/device capabilities but these don't
+		 * reflect what the driver needs to do in safe mode. Instead of
+		 * adding conditional logic everywhere to ignore these
+		 * device/function capabilities, override them.
+		 */
+		ice_set_safe_mode_caps(hw);
+	}
+
+	err = ice_init_pf(pf);
+	if (err) {
+		dev_err(dev, "ice_init_pf failed: %d\n", err);
+		goto err_init_pf;
+	}
+
+	pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port;
+	pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port;
+	pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
+	pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared;
+	if (pf->hw.tnl.valid_count[TNL_VXLAN]) {
+		pf->hw.udp_tunnel_nic.tables[0].n_entries =
+			pf->hw.tnl.valid_count[TNL_VXLAN];
+		pf->hw.udp_tunnel_nic.tables[0].tunnel_types =
+			UDP_TUNNEL_TYPE_VXLAN;
+	}
+	if (pf->hw.tnl.valid_count[TNL_GENEVE]) {
+		pf->hw.udp_tunnel_nic.tables[1].n_entries =
+			pf->hw.tnl.valid_count[TNL_GENEVE];
+		pf->hw.udp_tunnel_nic.tables[1].tunnel_types =
+			UDP_TUNNEL_TYPE_GENEVE;
+	}
+
+	err = ice_init_interrupt_scheme(pf);
+	if (err) {
+		dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
+		err = -EIO;
+		goto err_init_interrupt_scheme;
+	}
+
+	/* In case of MSIX we are going to setup the misc vector right here
+	 * to handle admin queue events etc. In case of legacy and MSI
+	 * the misc functionality and queue processing is combined in
+	 * the same vector and that gets setup at open.
+	 */
+	err = ice_req_irq_msix_misc(pf);
+	if (err) {
+		dev_err(dev, "setup of misc vector failed: %d\n", err);
+		goto err_req_irq_msix_misc;
+	}
+
+	return 0;
+
+err_req_irq_msix_misc:
+	ice_clear_interrupt_scheme(pf);
+err_init_interrupt_scheme:
+	ice_deinit_pf(pf);
+err_init_pf:
+	ice_deinit_hw(hw);
+	return err;
+}
+
+static void ice_deinit_dev(struct ice_pf *pf)
+{
+	ice_free_irq_msix_misc(pf);
+	ice_deinit_pf(pf);
+	ice_deinit_hw(&pf->hw);
+
+	/* Service task is already stopped, so call reset directly. */
+	ice_reset(&pf->hw, ICE_RESET_PFR);
+	pci_wait_for_pending_transaction(pf->pdev);
+	ice_clear_interrupt_scheme(pf);
+}
+
+static void ice_init_features(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	if (ice_is_safe_mode(pf))
+		return;
+
+	/* initialize DDP driven features */
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_init(pf);
+
+	if (ice_is_feature_supported(pf, ICE_F_GNSS))
+		ice_gnss_init(pf);
+
+	/* Note: Flow director init failure is non-fatal to load */
+	if (ice_init_fdir(pf))
+		dev_err(dev, "could not initialize flow director\n");
+
+	/* Note: DCB init failure is non-fatal to load */
+	if (ice_init_pf_dcb(pf, false)) {
+		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
+		clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
+	} else {
+		ice_cfg_lldp_mib_change(&pf->hw, true);
+	}
+
+	if (ice_init_lag(pf))
+		dev_warn(dev, "Failed to init link aggregation support\n");
+}
+
+static void ice_deinit_features(struct ice_pf *pf)
+{
+	if (ice_is_safe_mode(pf))
+		return;
+
+	ice_deinit_lag(pf);
+	if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
+		ice_cfg_lldp_mib_change(&pf->hw, false);
+	ice_deinit_fdir(pf);
+	if (ice_is_feature_supported(pf, ICE_F_GNSS))
+		ice_gnss_exit(pf);
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_release(pf);
+}
+
+static void ice_init_wakeup(struct ice_pf *pf)
+{
+	/* Save wakeup reason register for later use */
+	pf->wakeup_reason = rd32(&pf->hw, PFPM_WUS);
+
+	/* check for a power management event */
+	ice_print_wake_reason(pf);
+
+	/* clear wake status, all bits */
+	wr32(&pf->hw, PFPM_WUS, U32_MAX);
+
+	/* Disable WoL at init, wait for user to enable */
+	device_set_wakeup_enable(ice_pf_to_dev(pf), false);
+}
+
+static int ice_init_link(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	err = ice_init_link_events(pf->hw.port_info);
+	if (err) {
+		dev_err(dev, "ice_init_link_events failed: %d\n", err);
+		return err;
+	}
+
+	/* not a fatal error if this fails */
+	err = ice_init_nvm_phy_type(pf->hw.port_info);
+	if (err)
+		dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
+
+	/* not a fatal error if this fails */
+	err = ice_update_link_info(pf->hw.port_info);
+	if (err)
+		dev_err(dev, "ice_update_link_info failed: %d\n", err);
+
+	ice_init_link_dflt_override(pf->hw.port_info);
+
+	ice_check_link_cfg_err(pf,
+			       pf->hw.port_info->phy.link_info.link_cfg_err);
+
+	/* if media available, initialize PHY settings */
+	if (pf->hw.port_info->phy.link_info.link_info &
+	    ICE_AQ_MEDIA_AVAILABLE) {
+		/* not a fatal error if this fails */
+		err = ice_init_phy_user_cfg(pf->hw.port_info);
+		if (err)
+			dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
+
+		if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
+			struct ice_vsi *vsi = ice_get_main_vsi(pf);
+
+			if (vsi)
+				ice_configure_phy(vsi);
+		}
+	} else {
+		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+	}
+
+	return err;
+}
+
+static int ice_init_pf_sw(struct ice_pf *pf)
+{
+	bool dvm = ice_is_dvm_ena(&pf->hw);
+	struct ice_vsi *vsi;
+	int err;
+
+	/* create switch struct for the switch element created by FW on boot */
+	pf->first_sw = kzalloc(sizeof(*pf->first_sw), GFP_KERNEL);
+	if (!pf->first_sw)
+		return -ENOMEM;
+
+	if (pf->hw.evb_veb)
+		pf->first_sw->bridge_mode = BRIDGE_MODE_VEB;
+	else
+		pf->first_sw->bridge_mode = BRIDGE_MODE_VEPA;
+
+	pf->first_sw->pf = pf;
+
+	/* record the sw_id available for later use */
+	pf->first_sw->sw_id = pf->hw.port_info->sw_id;
+
+	err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
+	if (err)
+		goto err_aq_set_port_params;
+
+	vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
+	if (!vsi) {
+		err = -ENOMEM;
+		goto err_pf_vsi_setup;
+	}
+
+	return 0;
+
+err_pf_vsi_setup:
+err_aq_set_port_params:
+	kfree(pf->first_sw);
+	return err;
+}
+
+static void ice_deinit_pf_sw(struct ice_pf *pf)
+{
+	struct ice_vsi *vsi = ice_get_main_vsi(pf);
+
+	if (!vsi)
+		return;
+
+	ice_vsi_release(vsi);
+	kfree(pf->first_sw);
+}
+
+static int ice_alloc_vsis(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	pf->num_alloc_vsi = pf->hw.func_caps.guar_num_vsi;
+	if (!pf->num_alloc_vsi)
+		return -EIO;
+
+	if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
+		dev_warn(dev,
+			 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
+			 pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
+		pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
+	}
+
+	pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi),
+			       GFP_KERNEL);
+	if (!pf->vsi)
+		return -ENOMEM;
+
+	pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi,
+				     sizeof(*pf->vsi_stats), GFP_KERNEL);
+	if (!pf->vsi_stats) {
+		devm_kfree(dev, pf->vsi);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void ice_dealloc_vsis(struct ice_pf *pf)
+{
+	devm_kfree(ice_pf_to_dev(pf), pf->vsi_stats);
+	pf->vsi_stats = NULL;
+
+	pf->num_alloc_vsi = 0;
+	devm_kfree(ice_pf_to_dev(pf), pf->vsi);
+	pf->vsi = NULL;
+}
+
+static int ice_init_devlink(struct ice_pf *pf)
+{
+	int err;
+
+	err = ice_devlink_register_params(pf);
+	if (err)
+		return err;
+
+	ice_devlink_init_regions(pf);
+	ice_devlink_register(pf);
+
+	return 0;
+}
+
+static void ice_deinit_devlink(struct ice_pf *pf)
+{
+	ice_devlink_unregister(pf);
+	ice_devlink_destroy_regions(pf);
+	ice_devlink_unregister_params(pf);
+}
+
+static int ice_init(struct ice_pf *pf)
+{
+	int err;
+
+	err = ice_init_dev(pf);
+	if (err)
+		return err;
+
+	err = ice_alloc_vsis(pf);
+	if (err)
+		goto err_alloc_vsis;
+
+	err = ice_init_pf_sw(pf);
+	if (err)
+		goto err_init_pf_sw;
+
+	ice_init_wakeup(pf);
+
+	err = ice_init_link(pf);
+	if (err)
+		goto err_init_link;
+
+	err = ice_send_version(pf);
+	if (err)
+		goto err_init_link;
+
+	ice_verify_cacheline_size(pf);
+
+	if (ice_is_safe_mode(pf))
+		ice_set_safe_mode_vlan_cfg(pf);
+	else
+		/* print PCI link speed and width */
+		pcie_print_link_status(pf->pdev);
+
+	/* ready to go, so clear down state bit */
+	clear_bit(ICE_DOWN, pf->state);
+	clear_bit(ICE_SERVICE_DIS, pf->state);
+
+	/* since everything is good, start the service timer */
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+	return 0;
+
+err_init_link:
+	ice_deinit_pf_sw(pf);
+err_init_pf_sw:
+	ice_dealloc_vsis(pf);
+err_alloc_vsis:
+	ice_deinit_dev(pf);
+	return err;
+}
+
+static void ice_deinit(struct ice_pf *pf)
+{
+	set_bit(ICE_SERVICE_DIS, pf->state);
+	set_bit(ICE_DOWN, pf->state);
+
+	ice_deinit_pf_sw(pf);
+	ice_dealloc_vsis(pf);
+	ice_deinit_dev(pf);
+}
+
+/**
+ * ice_load - load pf by init hw and starting VSI
+ * @pf: pointer to the pf instance
+ */
+int ice_load(struct ice_pf *pf)
+{
+	struct ice_vsi_cfg_params params = {};
+	struct ice_vsi *vsi;
+	int err;
+
+	err = ice_init_dev(pf);
+	if (err)
+		return err;
+
+	vsi = ice_get_main_vsi(pf);
+
+	params = ice_vsi_to_params(vsi);
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	rtnl_lock();
+	err = ice_vsi_cfg(vsi, &params);
+	if (err)
+		goto err_vsi_cfg;
+
+	err = ice_start_eth(ice_get_main_vsi(pf));
+	if (err)
+		goto err_start_eth;
+	rtnl_unlock();
+
+	err = ice_init_rdma(pf);
+	if (err)
+		goto err_init_rdma;
+
+	ice_init_features(pf);
+	ice_service_task_restart(pf);
+
+	clear_bit(ICE_DOWN, pf->state);
+
+	return 0;
+
+err_init_rdma:
+	ice_vsi_close(ice_get_main_vsi(pf));
+	rtnl_lock();
+err_start_eth:
+	ice_vsi_decfg(ice_get_main_vsi(pf));
+err_vsi_cfg:
+	rtnl_unlock();
+	ice_deinit_dev(pf);
+	return err;
+}
+
+/**
+ * ice_unload - unload pf by stopping VSI and deinit hw
+ * @pf: pointer to the pf instance
+ */
+void ice_unload(struct ice_pf *pf)
+{
+	ice_deinit_features(pf);
+	ice_deinit_rdma(pf);
+	rtnl_lock();
+	ice_stop_eth(ice_get_main_vsi(pf));
+	ice_vsi_decfg(ice_get_main_vsi(pf));
+	rtnl_unlock();
+	ice_deinit_dev(pf);
+}
+
+/**
+ * ice_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ice_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int err;
+
+	if (pdev->is_virtfn) {
+		dev_err(dev, "can't probe a virtual function\n");
+		return -EINVAL;
+	}
+
+	/* when under a kdump kernel initiate a reset before enabling the
+	 * device in order to clear out any pending DMA transactions. These
+	 * transactions can cause some systems to machine check when doing
+	 * the pcim_enable_device() below.
+	 */
+	if (is_kdump_kernel()) {
+		pci_save_state(pdev);
+		pci_clear_master(pdev);
+		err = pcie_flr(pdev);
+		if (err)
+			return err;
+		pci_restore_state(pdev);
+	}
+
+	/* this driver uses devres, see
+	 * Documentation/driver-api/driver-model/devres.rst
+	 */
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, BIT(ICE_BAR0), dev_driver_string(dev));
+	if (err) {
+		dev_err(dev, "BAR0 I/O map error %d\n", err);
+		return err;
+	}
+
+	pf = ice_allocate_pf(dev);
+	if (!pf)
+		return -ENOMEM;
+
+	/* initialize Auxiliary index to invalid value */
+	pf->aux_idx = -1;
+
+	/* set up for high or low DMA */
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(dev, "DMA configuration failed: 0x%x\n", err);
+		return err;
+	}
+
+	pci_set_master(pdev);
+
+	pf->pdev = pdev;
+	pci_set_drvdata(pdev, pf);
+	set_bit(ICE_DOWN, pf->state);
+	/* Disable service task until DOWN bit is cleared */
+	set_bit(ICE_SERVICE_DIS, pf->state);
+
+	hw = &pf->hw;
+	hw->hw_addr = pcim_iomap_table(pdev)[ICE_BAR0];
+	pci_save_state(pdev);
+
+	hw->back = pf;
+	hw->port_info = NULL;
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+	hw->bus.device = PCI_SLOT(pdev->devfn);
+	hw->bus.func = PCI_FUNC(pdev->devfn);
+	ice_set_ctrlq_len(hw);
+
+	pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M);
+
+#ifndef CONFIG_DYNAMIC_DEBUG
+	if (debug < -1)
+		hw->debug_mask = debug;
+#endif
+
+	err = ice_init(pf);
+	if (err)
+		goto err_init;
+
+	err = ice_init_eth(pf);
+	if (err)
+		goto err_init_eth;
+
+	err = ice_init_rdma(pf);
+	if (err)
+		goto err_init_rdma;
+
+	err = ice_init_devlink(pf);
+	if (err)
+		goto err_init_devlink;
+
+	ice_init_features(pf);
+
+	return 0;
+
+err_init_devlink:
+	ice_deinit_rdma(pf);
+err_init_rdma:
+	ice_deinit_eth(pf);
+err_init_eth:
+	ice_deinit(pf);
+err_init:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * ice_set_wake - enable or disable Wake on LAN
+ * @pf: pointer to the PF struct
+ *
+ * Simple helper for WoL control
+ */
+static void ice_set_wake(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	bool wol = pf->wol_ena;
+
+	/* clear wake state, otherwise new wake events won't fire */
+	wr32(hw, PFPM_WUS, U32_MAX);
+
+	/* enable / disable APM wake up, no RMW needed */
+	wr32(hw, PFPM_APM, wol ? PFPM_APM_APME_M : 0);
+
+	/* set magic packet filter enabled */
+	wr32(hw, PFPM_WUFC, wol ? PFPM_WUFC_MAG_M : 0);
+}
+
+/**
+ * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
+ * @pf: pointer to the PF struct
+ *
+ * Issue firmware command to enable multicast magic wake, making
+ * sure that any locally administered address (LAA) is used for
+ * wake, and that PF reset doesn't undo the LAA.
+ */
+static void ice_setup_mc_magic_wake(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	u8 mac_addr[ETH_ALEN];
+	struct ice_vsi *vsi;
+	int status;
+	u8 flags;
+
+	if (!pf->wol_ena)
+		return;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Get current MAC address in case it's an LAA */
+	if (vsi->netdev)
+		ether_addr_copy(mac_addr, vsi->netdev->dev_addr);
+	else
+		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
+
+	flags = ICE_AQC_MAN_MAC_WR_MC_MAG_EN |
+		ICE_AQC_MAN_MAC_UPDATE_LAA_WOL |
+		ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP;
+
+	status = ice_aq_manage_mac_write(hw, mac_addr, flags, NULL);
+	if (status)
+		dev_err(dev, "Failed to enable Multicast Magic Packet wake, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+}
+
+/**
+ * ice_remove - Device removal routine
+ * @pdev: PCI device information struct
+ */
+static void ice_remove(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
+		if (!ice_is_reset_in_progress(pf->state))
+			break;
+		msleep(100);
+	}
+
+	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
+		set_bit(ICE_VF_RESETS_DISABLED, pf->state);
+		ice_free_vfs(pf);
+	}
+
+	ice_service_task_stop(pf);
+	ice_aq_cancel_waiting_tasks(pf);
+	set_bit(ICE_DOWN, pf->state);
+
+	if (!ice_is_safe_mode(pf))
+		ice_remove_arfs(pf);
+	ice_deinit_features(pf);
+	ice_deinit_devlink(pf);
+	ice_deinit_rdma(pf);
+	ice_deinit_eth(pf);
+	ice_deinit(pf);
+
+	ice_vsi_release_all(pf);
+
+	ice_setup_mc_magic_wake(pf);
+	ice_set_wake(pf);
+
+	pci_disable_device(pdev);
+}
+
+/**
+ * ice_shutdown - PCI callback for shutting down device
+ * @pdev: PCI device information struct
+ */
+static void ice_shutdown(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	ice_remove(pdev);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, pf->wol_ena);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+#ifdef CONFIG_PM
+/**
+ * ice_prepare_for_shutdown - prep for PCI shutdown
+ * @pf: board private structure
+ *
+ * Inform or close all dependent features in prep for PCI device shutdown
+ */
+static void ice_prepare_for_shutdown(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 v;
+
+	/* Notify VFs of impending reset */
+	if (ice_check_sq_alive(hw, &hw->mailboxq))
+		ice_vc_notify_reset(pf);
+
+	dev_dbg(ice_pf_to_dev(pf), "Tearing down internal switch for shutdown\n");
+
+	/* disable the VSIs and their queues that are not already DOWN */
+	ice_pf_dis_all_vsi(pf, false);
+
+	ice_for_each_vsi(pf, v)
+		if (pf->vsi[v])
+			pf->vsi[v]->vsi_num = 0;
+
+	ice_shutdown_all_ctrlq(hw);
+}
+
+/**
+ * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
+ * @pf: board private structure to reinitialize
+ *
+ * This routine reinitialize interrupt scheme that was cleared during
+ * power management suspend callback.
+ *
+ * This should be called during resume routine to re-allocate the q_vectors
+ * and reacquire interrupts.
+ */
+static int ice_reinit_interrupt_scheme(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret, v;
+
+	/* Since we clear MSIX flag during suspend, we need to
+	 * set it back during resume...
+	 */
+
+	ret = ice_init_interrupt_scheme(pf);
+	if (ret) {
+		dev_err(dev, "Failed to re-initialize interrupt %d\n", ret);
+		return ret;
+	}
+
+	/* Remap vectors and rings, after successful re-init interrupts */
+	ice_for_each_vsi(pf, v) {
+		if (!pf->vsi[v])
+			continue;
+
+		ret = ice_vsi_alloc_q_vectors(pf->vsi[v]);
+		if (ret)
+			goto err_reinit;
+		ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+	}
+
+	ret = ice_req_irq_msix_misc(pf);
+	if (ret) {
+		dev_err(dev, "Setting up misc vector failed after device suspend %d\n",
+			ret);
+		goto err_reinit;
+	}
+
+	return 0;
+
+err_reinit:
+	while (v--)
+		if (pf->vsi[v])
+			ice_vsi_free_q_vectors(pf->vsi[v]);
+
+	return ret;
+}
+
+/**
+ * ice_suspend
+ * @dev: generic device information structure
+ *
+ * Power Management callback to quiesce the device and prepare
+ * for D3 transition.
+ */
+static int __maybe_unused ice_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct ice_pf *pf;
+	int disabled, v;
+
+	pf = pci_get_drvdata(pdev);
+
+	if (!ice_pf_state_is_nominal(pf)) {
+		dev_err(dev, "Device is not ready, no need to suspend it\n");
+		return -EBUSY;
+	}
+
+	/* Stop watchdog tasks until resume completion.
+	 * Even though it is most likely that the service task is
+	 * disabled if the device is suspended or down, the service task's
+	 * state is controlled by a different state bit, and we should
+	 * store and honor whatever state that bit is in at this point.
+	 */
+	disabled = ice_service_task_stop(pf);
+
+	ice_unplug_aux_dev(pf);
+
+	/* Already suspended?, then there is nothing to do */
+	if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
+		if (!disabled)
+			ice_service_task_restart(pf);
+		return 0;
+	}
+
+	if (test_bit(ICE_DOWN, pf->state) ||
+	    ice_is_reset_in_progress(pf->state)) {
+		dev_err(dev, "can't suspend device in reset or already down\n");
+		if (!disabled)
+			ice_service_task_restart(pf);
+		return 0;
+	}
+
+	ice_setup_mc_magic_wake(pf);
+
+	ice_prepare_for_shutdown(pf);
+
+	ice_set_wake(pf);
+
+	/* Free vectors, clear the interrupt scheme and release IRQs
+	 * for proper hibernation, especially with large number of CPUs.
+	 * Otherwise hibernation might fail when mapping all the vectors back
+	 * to CPU0.
+	 */
+	ice_free_irq_msix_misc(pf);
+	ice_for_each_vsi(pf, v) {
+		if (!pf->vsi[v])
+			continue;
+		ice_vsi_free_q_vectors(pf->vsi[v]);
+	}
+	ice_clear_interrupt_scheme(pf);
+
+	pci_save_state(pdev);
+	pci_wake_from_d3(pdev, pf->wol_ena);
+	pci_set_power_state(pdev, PCI_D3hot);
+	return 0;
+}
+
+/**
+ * ice_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
+ */
+static int __maybe_unused ice_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	enum ice_reset_req reset_type;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int ret;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	if (!pci_device_is_present(pdev))
+		return -ENODEV;
+
+	ret = pci_enable_device_mem(pdev);
+	if (ret) {
+		dev_err(dev, "Cannot enable device after suspend\n");
+		return ret;
+	}
+
+	pf = pci_get_drvdata(pdev);
+	hw = &pf->hw;
+
+	pf->wakeup_reason = rd32(hw, PFPM_WUS);
+	ice_print_wake_reason(pf);
+
+	/* We cleared the interrupt scheme when we suspended, so we need to
+	 * restore it now to resume device functionality.
+	 */
+	ret = ice_reinit_interrupt_scheme(pf);
+	if (ret)
+		dev_err(dev, "Cannot restore interrupt scheme: %d\n", ret);
+
+	clear_bit(ICE_DOWN, pf->state);
+	/* Now perform PF reset and rebuild */
+	reset_type = ICE_RESET_PFR;
+	/* re-enable service task for reset, but allow reset to schedule it */
+	clear_bit(ICE_SERVICE_DIS, pf->state);
+
+	if (ice_schedule_reset(pf, reset_type))
+		dev_err(dev, "Reset during resume failed.\n");
+
+	clear_bit(ICE_SUSPENDED, pf->state);
+	ice_service_task_restart(pf);
+
+	/* Restart the service task */
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+
+	return 0;
+}
+#endif /* CONFIG_PM */
+
+/**
+ * ice_pci_err_detected - warning that PCI error has been detected
+ * @pdev: PCI device information struct
+ * @err: the type of PCI error
+ *
+ * Called to warn that something happened on the PCI bus and the error handling
+ * is in progress.  Allows the driver to gracefully prepare/handle PCI errors.
+ */
+static pci_ers_result_t
+ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!pf) {
+		dev_err(&pdev->dev, "%s: unrecoverable device error %d\n",
+			__func__, err);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	if (!test_bit(ICE_SUSPENDED, pf->state)) {
+		ice_service_task_stop(pf);
+
+		if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(ICE_PFR_REQ, pf->state);
+			ice_prepare_for_reset(pf, ICE_RESET_PFR);
+		}
+	}
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ice_pci_err_slot_reset - a PCI slot reset has just happened
+ * @pdev: PCI device information struct
+ *
+ * Called to determine if the driver can recover from the PCI slot reset by
+ * using a register read to determine if the device is recoverable.
+ */
+static pci_ers_result_t ice_pci_err_slot_reset(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	pci_ers_result_t result;
+	int err;
+	u32 reg;
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot re-enable PCI device after reset, error %d\n",
+			err);
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+		pci_wake_from_d3(pdev, false);
+
+		/* Check for life */
+		reg = rd32(&pf->hw, GLGEN_RTRIG);
+		if (!reg)
+			result = PCI_ERS_RESULT_RECOVERED;
+		else
+			result = PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return result;
+}
+
+/**
+ * ice_pci_err_resume - restart operations after PCI error recovery
+ * @pdev: PCI device information struct
+ *
+ * Called to allow the driver to bring things back up after PCI error and/or
+ * reset recovery have finished
+ */
+static void ice_pci_err_resume(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!pf) {
+		dev_err(&pdev->dev, "%s failed, device is unrecoverable\n",
+			__func__);
+		return;
+	}
+
+	if (test_bit(ICE_SUSPENDED, pf->state)) {
+		dev_dbg(&pdev->dev, "%s failed to resume normal operations!\n",
+			__func__);
+		return;
+	}
+
+	ice_restore_all_vfs_msi_state(pdev);
+
+	ice_do_reset(pf, ICE_RESET_PFR);
+	ice_service_task_restart(pf);
+	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
+}
+
+/**
+ * ice_pci_err_reset_prepare - prepare device driver for PCI reset
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_prepare(struct pci_dev *pdev)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+
+	if (!test_bit(ICE_SUSPENDED, pf->state)) {
+		ice_service_task_stop(pf);
+
+		if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) {
+			set_bit(ICE_PFR_REQ, pf->state);
+			ice_prepare_for_reset(pf, ICE_RESET_PFR);
+		}
+	}
+}
+
+/**
+ * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void ice_pci_err_reset_done(struct pci_dev *pdev)
+{
+	ice_pci_err_resume(pdev);
+}
+
+/* ice_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ice_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_BACKPLANE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_SFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 },
+	{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 },
+	/* required last entry */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ice_pci_tbl);
+
+static __maybe_unused SIMPLE_DEV_PM_OPS(ice_pm_ops, ice_suspend, ice_resume);
+
+static const struct pci_error_handlers ice_pci_err_handler = {
+	.error_detected = ice_pci_err_detected,
+	.slot_reset = ice_pci_err_slot_reset,
+	.reset_prepare = ice_pci_err_reset_prepare,
+	.reset_done = ice_pci_err_reset_done,
+	.resume = ice_pci_err_resume
+};
+
+static struct pci_driver ice_driver = {
+	.name = KBUILD_MODNAME,
+	.id_table = ice_pci_tbl,
+	.probe = ice_probe,
+	.remove = ice_remove,
+#ifdef CONFIG_PM
+	.driver.pm = &ice_pm_ops,
+#endif /* CONFIG_PM */
+	.shutdown = ice_shutdown,
+	.sriov_configure = ice_sriov_configure,
+	.err_handler = &ice_pci_err_handler
+};
+
+/**
+ * ice_module_init - Driver registration routine
+ *
+ * ice_module_init is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init ice_module_init(void)
+{
+	int status = -ENOMEM;
+
+	pr_info("%s\n", ice_driver_string);
+	pr_info("%s\n", ice_copyright);
+
+	ice_wq = alloc_workqueue("%s", 0, 0, KBUILD_MODNAME);
+	if (!ice_wq) {
+		pr_err("Failed to create workqueue\n");
+		return status;
+	}
+
+	ice_lag_wq = alloc_ordered_workqueue("ice_lag_wq", 0);
+	if (!ice_lag_wq) {
+		pr_err("Failed to create LAG workqueue\n");
+		goto err_dest_wq;
+	}
+
+	status = pci_register_driver(&ice_driver);
+	if (status) {
+		pr_err("failed to register PCI driver, err %d\n", status);
+		goto err_dest_lag_wq;
+	}
+
+	return 0;
+
+err_dest_lag_wq:
+	destroy_workqueue(ice_lag_wq);
+err_dest_wq:
+	destroy_workqueue(ice_wq);
+	return status;
+}
+module_init(ice_module_init);
+
+/**
+ * ice_module_exit - Driver exit cleanup routine
+ *
+ * ice_module_exit is called just before the driver is removed
+ * from memory.
+ */
+static void __exit ice_module_exit(void)
+{
+	pci_unregister_driver(&ice_driver);
+	destroy_workqueue(ice_wq);
+	destroy_workqueue(ice_lag_wq);
+	pr_info("module unloaded\n");
+}
+module_exit(ice_module_exit);
+
+/**
+ * ice_set_mac_address - NDO callback to set MAC address
+ * @netdev: network interface device structure
+ * @pi: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_set_mac_address(struct net_device *netdev, void *pi)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct sockaddr *addr = pi;
+	u8 old_mac[ETH_ALEN];
+	u8 flags = 0;
+	u8 *mac;
+	int err;
+
+	mac = (u8 *)addr->sa_data;
+
+	if (!is_valid_ether_addr(mac))
+		return -EADDRNOTAVAIL;
+
+	if (test_bit(ICE_DOWN, pf->state) ||
+	    ice_is_reset_in_progress(pf->state)) {
+		netdev_err(netdev, "can't set mac %pM. device not ready\n",
+			   mac);
+		return -EBUSY;
+	}
+
+	if (ice_chnl_dmac_fltr_cnt(pf)) {
+		netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n",
+			   mac);
+		return -EAGAIN;
+	}
+
+	netif_addr_lock_bh(netdev);
+	ether_addr_copy(old_mac, netdev->dev_addr);
+	/* change the netdev's MAC address */
+	eth_hw_addr_set(netdev, mac);
+	netif_addr_unlock_bh(netdev);
+
+	/* Clean up old MAC filter. Not an error if old filter doesn't exist */
+	err = ice_fltr_remove_mac(vsi, old_mac, ICE_FWD_TO_VSI);
+	if (err && err != -ENOENT) {
+		err = -EADDRNOTAVAIL;
+		goto err_update_filters;
+	}
+
+	/* Add filter for new MAC. If filter exists, return success */
+	err = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
+	if (err == -EEXIST) {
+		/* Although this MAC filter is already present in hardware it's
+		 * possible in some cases (e.g. bonding) that dev_addr was
+		 * modified outside of the driver and needs to be restored back
+		 * to this value.
+		 */
+		netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
+
+		return 0;
+	} else if (err) {
+		/* error if the new filter addition failed */
+		err = -EADDRNOTAVAIL;
+	}
+
+err_update_filters:
+	if (err) {
+		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
+			   mac);
+		netif_addr_lock_bh(netdev);
+		eth_hw_addr_set(netdev, old_mac);
+		netif_addr_unlock_bh(netdev);
+		return err;
+	}
+
+	netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
+		   netdev->dev_addr);
+
+	/* write new MAC address to the firmware */
+	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
+	err = ice_aq_manage_mac_write(hw, mac, flags, NULL);
+	if (err) {
+		netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n",
+			   mac, err);
+	}
+	return 0;
+}
+
+/**
+ * ice_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ */
+static void ice_set_rx_mode(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+
+	if (!vsi || ice_is_switchdev_running(vsi->back))
+		return;
+
+	/* Set the flags to synchronize filters
+	 * ndo_set_rx_mode may be triggered even without a change in netdev
+	 * flags
+	 */
+	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
+	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
+	set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
+
+	/* schedule our worker thread which will take care of
+	 * applying the new filter changes
+	 */
+	ice_service_task_schedule(vsi->back);
+}
+
+/**
+ * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
+ * @netdev: network interface device structure
+ * @queue_index: Queue ID
+ * @maxrate: maximum bandwidth in Mbps
+ */
+static int
+ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	u16 q_handle;
+	int status;
+	u8 tc;
+
+	/* Validate maxrate requested is within permitted range */
+	if (maxrate && (maxrate > (ICE_SCHED_MAX_BW / 1000))) {
+		netdev_err(netdev, "Invalid max rate %d specified for the queue %d\n",
+			   maxrate, queue_index);
+		return -EINVAL;
+	}
+
+	q_handle = vsi->tx_rings[queue_index]->q_handle;
+	tc = ice_dcb_get_tc(vsi, queue_index);
+
+	vsi = ice_locate_vsi_using_queue(vsi, queue_index);
+	if (!vsi) {
+		netdev_err(netdev, "Invalid VSI for given queue %d\n",
+			   queue_index);
+		return -EINVAL;
+	}
+
+	/* Set BW back to default, when user set maxrate to 0 */
+	if (!maxrate)
+		status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
+					       q_handle, ICE_MAX_BW);
+	else
+		status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
+					  q_handle, ICE_MAX_BW, maxrate * 1000);
+	if (status)
+		netdev_err(netdev, "Unable to set Tx max rate, error %d\n",
+			   status);
+
+	return status;
+}
+
+/**
+ * ice_fdb_add - add an entry to the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN ID
+ * @flags: instructions from stack about fdb operation
+ * @extack: netlink extended ack
+ */
+static int
+ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
+	    struct net_device *dev, const unsigned char *addr, u16 vid,
+	    u16 flags, struct netlink_ext_ack __always_unused *extack)
+{
+	int err;
+
+	if (vid) {
+		netdev_err(dev, "VLANs aren't supported yet for dev_uc|mc_add()\n");
+		return -EINVAL;
+	}
+	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
+		err = dev_uc_add_excl(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_add_excl(dev, addr);
+	else
+		err = -EINVAL;
+
+	/* Only return duplicate errors if NLM_F_EXCL is set */
+	if (err == -EEXIST && !(flags & NLM_F_EXCL))
+		err = 0;
+
+	return err;
+}
+
+/**
+ * ice_fdb_del - delete an entry from the hardware database
+ * @ndm: the input from the stack
+ * @tb: pointer to array of nladdr (unused)
+ * @dev: the net device pointer
+ * @addr: the MAC address entry being added
+ * @vid: VLAN ID
+ * @extack: netlink extended ack
+ */
+static int
+ice_fdb_del(struct ndmsg *ndm, __always_unused struct nlattr *tb[],
+	    struct net_device *dev, const unsigned char *addr,
+	    __always_unused u16 vid, struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (ndm->ndm_state & NUD_PERMANENT) {
+		netdev_err(dev, "FDB only supports static addresses\n");
+		return -EINVAL;
+	}
+
+	if (is_unicast_ether_addr(addr))
+		err = dev_uc_del(dev, addr);
+	else if (is_multicast_ether_addr(addr))
+		err = dev_mc_del(dev, addr);
+	else
+		err = -EINVAL;
+
+	return err;
+}
+
+#define NETIF_VLAN_OFFLOAD_FEATURES	(NETIF_F_HW_VLAN_CTAG_RX | \
+					 NETIF_F_HW_VLAN_CTAG_TX | \
+					 NETIF_F_HW_VLAN_STAG_RX | \
+					 NETIF_F_HW_VLAN_STAG_TX)
+
+#define NETIF_VLAN_STRIPPING_FEATURES	(NETIF_F_HW_VLAN_CTAG_RX | \
+					 NETIF_F_HW_VLAN_STAG_RX)
+
+#define NETIF_VLAN_FILTERING_FEATURES	(NETIF_F_HW_VLAN_CTAG_FILTER | \
+					 NETIF_F_HW_VLAN_STAG_FILTER)
+
+/**
+ * ice_fix_features - fix the netdev features flags based on device limitations
+ * @netdev: ptr to the netdev that flags are being fixed on
+ * @features: features that need to be checked and possibly fixed
+ *
+ * Make sure any fixups are made to features in this callback. This enables the
+ * driver to not have to check unsupported configurations throughout the driver
+ * because that's the responsiblity of this callback.
+ *
+ * Single VLAN Mode (SVM) Supported Features:
+ *	NETIF_F_HW_VLAN_CTAG_FILTER
+ *	NETIF_F_HW_VLAN_CTAG_RX
+ *	NETIF_F_HW_VLAN_CTAG_TX
+ *
+ * Double VLAN Mode (DVM) Supported Features:
+ *	NETIF_F_HW_VLAN_CTAG_FILTER
+ *	NETIF_F_HW_VLAN_CTAG_RX
+ *	NETIF_F_HW_VLAN_CTAG_TX
+ *
+ *	NETIF_F_HW_VLAN_STAG_FILTER
+ *	NETIF_HW_VLAN_STAG_RX
+ *	NETIF_HW_VLAN_STAG_TX
+ *
+ * Features that need fixing:
+ *	Cannot simultaneously enable CTAG and STAG stripping and/or insertion.
+ *	These are mutually exlusive as the VSI context cannot support multiple
+ *	VLAN ethertypes simultaneously for stripping and/or insertion. If this
+ *	is not done, then default to clearing the requested STAG offload
+ *	settings.
+ *
+ *	All supported filtering has to be enabled or disabled together. For
+ *	example, in DVM, CTAG and STAG filtering have to be enabled and disabled
+ *	together. If this is not done, then default to VLAN filtering disabled.
+ *	These are mutually exclusive as there is currently no way to
+ *	enable/disable VLAN filtering based on VLAN ethertype when using VLAN
+ *	prune rules.
+ */
+static netdev_features_t
+ice_fix_features(struct net_device *netdev, netdev_features_t features)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	netdev_features_t req_vlan_fltr, cur_vlan_fltr;
+	bool cur_ctag, cur_stag, req_ctag, req_stag;
+
+	cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
+	cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+	cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+	req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
+	req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+	req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+	if (req_vlan_fltr != cur_vlan_fltr) {
+		if (ice_is_dvm_ena(&np->vsi->back->hw)) {
+			if (req_ctag && req_stag) {
+				features |= NETIF_VLAN_FILTERING_FEATURES;
+			} else if (!req_ctag && !req_stag) {
+				features &= ~NETIF_VLAN_FILTERING_FEATURES;
+			} else if ((!cur_ctag && req_ctag && !cur_stag) ||
+				   (!cur_stag && req_stag && !cur_ctag)) {
+				features |= NETIF_VLAN_FILTERING_FEATURES;
+				netdev_warn(netdev,  "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
+			} else if ((cur_ctag && !req_ctag && cur_stag) ||
+				   (cur_stag && !req_stag && cur_ctag)) {
+				features &= ~NETIF_VLAN_FILTERING_FEATURES;
+				netdev_warn(netdev,  "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
+			}
+		} else {
+			if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
+				netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");
+
+			if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
+				features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+		}
+	}
+
+	if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX)) &&
+	    (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))) {
+		netdev_warn(netdev, "cannot support CTAG and STAG VLAN stripping and/or insertion simultaneously since CTAG and STAG offloads are mutually exclusive, clearing STAG offload settings\n");
+		features &= ~(NETIF_F_HW_VLAN_STAG_RX |
+			      NETIF_F_HW_VLAN_STAG_TX);
+	}
+
+	if (!(netdev->features & NETIF_F_RXFCS) &&
+	    (features & NETIF_F_RXFCS) &&
+	    (features & NETIF_VLAN_STRIPPING_FEATURES) &&
+	    !ice_vsi_has_non_zero_vlans(np->vsi)) {
+		netdev_warn(netdev, "Disabling VLAN stripping as FCS/CRC stripping is also disabled and there is no VLAN configured\n");
+		features &= ~NETIF_VLAN_STRIPPING_FEATURES;
+	}
+
+	return features;
+}
+
+/**
+ * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
+ * @vsi: PF's VSI
+ * @features: features used to determine VLAN offload settings
+ *
+ * First, determine the vlan_ethertype based on the VLAN offload bits in
+ * features. Then determine if stripping and insertion should be enabled or
+ * disabled. Finally enable or disable VLAN stripping and insertion.
+ */
+static int
+ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features)
+{
+	bool enable_stripping = true, enable_insertion = true;
+	struct ice_vsi_vlan_ops *vlan_ops;
+	int strip_err = 0, insert_err = 0;
+	u16 vlan_ethertype = 0;
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+	if (features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_STAG_TX))
+		vlan_ethertype = ETH_P_8021AD;
+	else if (features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX))
+		vlan_ethertype = ETH_P_8021Q;
+
+	if (!(features & (NETIF_F_HW_VLAN_STAG_RX | NETIF_F_HW_VLAN_CTAG_RX)))
+		enable_stripping = false;
+	if (!(features & (NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_CTAG_TX)))
+		enable_insertion = false;
+
+	if (enable_stripping)
+		strip_err = vlan_ops->ena_stripping(vsi, vlan_ethertype);
+	else
+		strip_err = vlan_ops->dis_stripping(vsi);
+
+	if (enable_insertion)
+		insert_err = vlan_ops->ena_insertion(vsi, vlan_ethertype);
+	else
+		insert_err = vlan_ops->dis_insertion(vsi);
+
+	if (strip_err || insert_err)
+		return -EIO;
+
+	return 0;
+}
+
+/**
+ * ice_set_vlan_filtering_features - set VLAN filtering features for the PF VSI
+ * @vsi: PF's VSI
+ * @features: features used to determine VLAN filtering settings
+ *
+ * Enable or disable Rx VLAN filtering based on the VLAN filtering bits in the
+ * features.
+ */
+static int
+ice_set_vlan_filtering_features(struct ice_vsi *vsi, netdev_features_t features)
+{
+	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+	int err = 0;
+
+	/* support Single VLAN Mode (SVM) and Double VLAN Mode (DVM) by checking
+	 * if either bit is set
+	 */
+	if (features &
+	    (NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER))
+		err = vlan_ops->ena_rx_filtering(vsi);
+	else
+		err = vlan_ops->dis_rx_filtering(vsi);
+
+	return err;
+}
+
+/**
+ * ice_set_vlan_features - set VLAN settings based on suggested feature set
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ *
+ * Only update VLAN settings if the requested_vlan_features are different than
+ * the current_vlan_features.
+ */
+static int
+ice_set_vlan_features(struct net_device *netdev, netdev_features_t features)
+{
+	netdev_features_t current_vlan_features, requested_vlan_features;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	int err;
+
+	current_vlan_features = netdev->features & NETIF_VLAN_OFFLOAD_FEATURES;
+	requested_vlan_features = features & NETIF_VLAN_OFFLOAD_FEATURES;
+	if (current_vlan_features ^ requested_vlan_features) {
+		if ((features & NETIF_F_RXFCS) &&
+		    (features & NETIF_VLAN_STRIPPING_FEATURES)) {
+			dev_err(ice_pf_to_dev(vsi->back),
+				"To enable VLAN stripping, you must first enable FCS/CRC stripping\n");
+			return -EIO;
+		}
+
+		err = ice_set_vlan_offload_features(vsi, features);
+		if (err)
+			return err;
+	}
+
+	current_vlan_features = netdev->features &
+		NETIF_VLAN_FILTERING_FEATURES;
+	requested_vlan_features = features & NETIF_VLAN_FILTERING_FEATURES;
+	if (current_vlan_features ^ requested_vlan_features) {
+		err = ice_set_vlan_filtering_features(vsi, features);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_set_loopback - turn on/off loopback mode on underlying PF
+ * @vsi: ptr to VSI
+ * @ena: flag to indicate the on/off setting
+ */
+static int ice_set_loopback(struct ice_vsi *vsi, bool ena)
+{
+	bool if_running = netif_running(vsi->netdev);
+	int ret;
+
+	if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
+		ret = ice_down(vsi);
+		if (ret) {
+			netdev_err(vsi->netdev, "Preparing device to toggle loopback failed\n");
+			return ret;
+		}
+	}
+	ret = ice_aq_set_mac_loopback(&vsi->back->hw, ena, NULL);
+	if (ret)
+		netdev_err(vsi->netdev, "Failed to toggle loopback state\n");
+	if (if_running)
+		ret = ice_up(vsi);
+
+	return ret;
+}
+
+/**
+ * ice_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ */
+static int
+ice_set_features(struct net_device *netdev, netdev_features_t features)
+{
+	netdev_features_t changed = netdev->features ^ features;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int ret = 0;
+
+	/* Don't set any netdev advanced features with device in Safe Mode */
+	if (ice_is_safe_mode(pf)) {
+		dev_err(ice_pf_to_dev(pf),
+			"Device is in Safe Mode - not enabling advanced netdev features\n");
+		return ret;
+	}
+
+	/* Do not change setting during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(ice_pf_to_dev(pf),
+			"Device is resetting, changing advanced netdev features temporarily unavailable.\n");
+		return -EBUSY;
+	}
+
+	/* Multiple features can be changed in one call so keep features in
+	 * separate if/else statements to guarantee each feature is checked
+	 */
+	if (changed & NETIF_F_RXHASH)
+		ice_vsi_manage_rss_lut(vsi, !!(features & NETIF_F_RXHASH));
+
+	ret = ice_set_vlan_features(netdev, features);
+	if (ret)
+		return ret;
+
+	/* Turn on receive of FCS aka CRC, and after setting this
+	 * flag the packet data will have the 4 byte CRC appended
+	 */
+	if (changed & NETIF_F_RXFCS) {
+		if ((features & NETIF_F_RXFCS) &&
+		    (features & NETIF_VLAN_STRIPPING_FEATURES)) {
+			dev_err(ice_pf_to_dev(vsi->back),
+				"To disable FCS/CRC stripping, you must first disable VLAN stripping\n");
+			return -EIO;
+		}
+
+		ice_vsi_cfg_crc_strip(vsi, !!(features & NETIF_F_RXFCS));
+		ret = ice_down_up(vsi);
+		if (ret)
+			return ret;
+	}
+
+	if (changed & NETIF_F_NTUPLE) {
+		bool ena = !!(features & NETIF_F_NTUPLE);
+
+		ice_vsi_manage_fdir(vsi, ena);
+		ena ? ice_init_arfs(vsi) : ice_clear_arfs(vsi);
+	}
+
+	/* don't turn off hw_tc_offload when ADQ is already enabled */
+	if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) {
+		dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n");
+		return -EACCES;
+	}
+
+	if (changed & NETIF_F_HW_TC) {
+		bool ena = !!(features & NETIF_F_HW_TC);
+
+		ena ? set_bit(ICE_FLAG_CLS_FLOWER, pf->flags) :
+		      clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
+	}
+
+	if (changed & NETIF_F_LOOPBACK)
+		ret = ice_set_loopback(vsi, !!(features & NETIF_F_LOOPBACK));
+
+	return ret;
+}
+
+/**
+ * ice_vsi_vlan_setup - Setup VLAN offload properties on a PF VSI
+ * @vsi: VSI to setup VLAN properties for
+ */
+static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
+{
+	int err;
+
+	err = ice_set_vlan_offload_features(vsi, vsi->netdev->features);
+	if (err)
+		return err;
+
+	err = ice_set_vlan_filtering_features(vsi, vsi->netdev->features);
+	if (err)
+		return err;
+
+	return ice_vsi_add_vlan_zero(vsi);
+}
+
+/**
+ * ice_vsi_cfg_lan - Setup the VSI lan related config
+ * @vsi: the VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+int ice_vsi_cfg_lan(struct ice_vsi *vsi)
+{
+	int err;
+
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		ice_set_rx_mode(vsi->netdev);
+
+		err = ice_vsi_vlan_setup(vsi);
+		if (err)
+			return err;
+	}
+	ice_vsi_cfg_dcb_rings(vsi);
+
+	err = ice_vsi_cfg_lan_txqs(vsi);
+	if (!err && ice_is_xdp_ena_vsi(vsi))
+		err = ice_vsi_cfg_xdp_txqs(vsi);
+	if (!err)
+		err = ice_vsi_cfg_rxqs(vsi);
+
+	return err;
+}
+
+/* THEORY OF MODERATION:
+ * The ice driver hardware works differently than the hardware that DIMLIB was
+ * originally made for. ice hardware doesn't have packet count limits that
+ * can trigger an interrupt, but it *does* have interrupt rate limit support,
+ * which is hard-coded to a limit of 250,000 ints/second.
+ * If not using dynamic moderation, the INTRL value can be modified
+ * by ethtool rx-usecs-high.
+ */
+struct ice_dim {
+	/* the throttle rate for interrupts, basically worst case delay before
+	 * an initial interrupt fires, value is stored in microseconds.
+	 */
+	u16 itr;
+};
+
+/* Make a different profile for Rx that doesn't allow quite so aggressive
+ * moderation at the high end (it maxes out at 126us or about 8k interrupts a
+ * second.
+ */
+static const struct ice_dim rx_profile[] = {
+	{2},    /* 500,000 ints/s, capped at 250K by INTRL */
+	{8},    /* 125,000 ints/s */
+	{16},   /*  62,500 ints/s */
+	{62},   /*  16,129 ints/s */
+	{126}   /*   7,936 ints/s */
+};
+
+/* The transmit profile, which has the same sorts of values
+ * as the previous struct
+ */
+static const struct ice_dim tx_profile[] = {
+	{2},    /* 500,000 ints/s, capped at 250K by INTRL */
+	{8},    /* 125,000 ints/s */
+	{40},   /*  16,125 ints/s */
+	{128},  /*   7,812 ints/s */
+	{256}   /*   3,906 ints/s */
+};
+
+static void ice_tx_dim_work(struct work_struct *work)
+{
+	struct ice_ring_container *rc;
+	struct dim *dim;
+	u16 itr;
+
+	dim = container_of(work, struct dim, work);
+	rc = dim->priv;
+
+	WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile));
+
+	/* look up the values in our local table */
+	itr = tx_profile[dim->profile_ix].itr;
+
+	ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim);
+	ice_write_itr(rc, itr);
+
+	dim->state = DIM_START_MEASURE;
+}
+
+static void ice_rx_dim_work(struct work_struct *work)
+{
+	struct ice_ring_container *rc;
+	struct dim *dim;
+	u16 itr;
+
+	dim = container_of(work, struct dim, work);
+	rc = dim->priv;
+
+	WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile));
+
+	/* look up the values in our local table */
+	itr = rx_profile[dim->profile_ix].itr;
+
+	ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim);
+	ice_write_itr(rc, itr);
+
+	dim->state = DIM_START_MEASURE;
+}
+
+#define ICE_DIM_DEFAULT_PROFILE_IX 1
+
+/**
+ * ice_init_moderation - set up interrupt moderation
+ * @q_vector: the vector containing rings to be configured
+ *
+ * Set up interrupt moderation registers, with the intent to do the right thing
+ * when called from reset or from probe, and whether or not dynamic moderation
+ * is enabled or not. Take special care to write all the registers in both
+ * dynamic moderation mode or not in order to make sure hardware is in a known
+ * state.
+ */
+static void ice_init_moderation(struct ice_q_vector *q_vector)
+{
+	struct ice_ring_container *rc;
+	bool tx_dynamic, rx_dynamic;
+
+	rc = &q_vector->tx;
+	INIT_WORK(&rc->dim.work, ice_tx_dim_work);
+	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
+	rc->dim.priv = rc;
+	tx_dynamic = ITR_IS_DYNAMIC(rc);
+
+	/* set the initial TX ITR to match the above */
+	ice_write_itr(rc, tx_dynamic ?
+		      tx_profile[rc->dim.profile_ix].itr : rc->itr_setting);
+
+	rc = &q_vector->rx;
+	INIT_WORK(&rc->dim.work, ice_rx_dim_work);
+	rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+	rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX;
+	rc->dim.priv = rc;
+	rx_dynamic = ITR_IS_DYNAMIC(rc);
+
+	/* set the initial RX ITR to match the above */
+	ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr :
+				       rc->itr_setting);
+
+	ice_set_q_vector_intrl(q_vector);
+}
+
+/**
+ * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ */
+static void ice_napi_enable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		ice_init_moderation(q_vector);
+
+		if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
+			napi_enable(&q_vector->napi);
+	}
+}
+
+/**
+ * ice_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: The VSI being configured
+ *
+ * Return 0 on success and negative value on error
+ */
+static int ice_up_complete(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	ice_vsi_cfg_msix(vsi);
+
+	/* Enable only Rx rings, Tx rings were enabled by the FW when the
+	 * Tx queue group list was configured and the context bits were
+	 * programmed using ice_vsi_cfg_txqs
+	 */
+	err = ice_vsi_start_all_rx_rings(vsi);
+	if (err)
+		return err;
+
+	clear_bit(ICE_VSI_DOWN, vsi->state);
+	ice_napi_enable_all(vsi);
+	ice_vsi_ena_irq(vsi);
+
+	if (vsi->port_info &&
+	    (vsi->port_info->phy.link_info.link_info & ICE_AQ_LINK_UP) &&
+	    vsi->netdev && vsi->type == ICE_VSI_PF) {
+		ice_print_link_msg(vsi, true);
+		netif_tx_start_all_queues(vsi->netdev);
+		netif_carrier_on(vsi->netdev);
+		ice_ptp_link_change(pf, pf->hw.pf_id, true);
+	}
+
+	/* Perform an initial read of the statistics registers now to
+	 * set the baseline so counters are ready when interface is up
+	 */
+	ice_update_eth_stats(vsi);
+
+	if (vsi->type == ICE_VSI_PF)
+		ice_service_task_schedule(pf);
+
+	return 0;
+}
+
+/**
+ * ice_up - Bring the connection back up after being down
+ * @vsi: VSI being configured
+ */
+int ice_up(struct ice_vsi *vsi)
+{
+	int err;
+
+	err = ice_vsi_cfg_lan(vsi);
+	if (!err)
+		err = ice_up_complete(vsi);
+
+	return err;
+}
+
+/**
+ * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
+ * @syncp: pointer to u64_stats_sync
+ * @stats: stats that pkts and bytes count will be taken from
+ * @pkts: packets stats counter
+ * @bytes: bytes stats counter
+ *
+ * This function fetches stats from the ring considering the atomic operations
+ * that needs to be performed to read u64 values in 32 bit machine.
+ */
+void
+ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp,
+			     struct ice_q_stats stats, u64 *pkts, u64 *bytes)
+{
+	unsigned int start;
+
+	do {
+		start = u64_stats_fetch_begin(syncp);
+		*pkts = stats.pkts;
+		*bytes = stats.bytes;
+	} while (u64_stats_fetch_retry(syncp, start));
+}
+
+/**
+ * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
+ * @vsi: the VSI to be updated
+ * @vsi_stats: the stats struct to be updated
+ * @rings: rings to work on
+ * @count: number of rings
+ */
+static void
+ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi,
+			     struct rtnl_link_stats64 *vsi_stats,
+			     struct ice_tx_ring **rings, u16 count)
+{
+	u16 i;
+
+	for (i = 0; i < count; i++) {
+		struct ice_tx_ring *ring;
+		u64 pkts = 0, bytes = 0;
+
+		ring = READ_ONCE(rings[i]);
+		if (!ring || !ring->ring_stats)
+			continue;
+		ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp,
+					     ring->ring_stats->stats, &pkts,
+					     &bytes);
+		vsi_stats->tx_packets += pkts;
+		vsi_stats->tx_bytes += bytes;
+		vsi->tx_restart += ring->ring_stats->tx_stats.restart_q;
+		vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy;
+		vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize;
+	}
+}
+
+/**
+ * ice_update_vsi_ring_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *net_stats, *stats_prev;
+	struct rtnl_link_stats64 *vsi_stats;
+	u64 pkts, bytes;
+	int i;
+
+	vsi_stats = kzalloc(sizeof(*vsi_stats), GFP_ATOMIC);
+	if (!vsi_stats)
+		return;
+
+	/* reset non-netdev (extended) stats */
+	vsi->tx_restart = 0;
+	vsi->tx_busy = 0;
+	vsi->tx_linearize = 0;
+	vsi->rx_buf_failed = 0;
+	vsi->rx_page_failed = 0;
+
+	rcu_read_lock();
+
+	/* update Tx rings counters */
+	ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->tx_rings,
+				     vsi->num_txq);
+
+	/* update Rx rings counters */
+	ice_for_each_rxq(vsi, i) {
+		struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]);
+		struct ice_ring_stats *ring_stats;
+
+		ring_stats = ring->ring_stats;
+		ice_fetch_u64_stats_per_ring(&ring_stats->syncp,
+					     ring_stats->stats, &pkts,
+					     &bytes);
+		vsi_stats->rx_packets += pkts;
+		vsi_stats->rx_bytes += bytes;
+		vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed;
+		vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed;
+	}
+
+	/* update XDP Tx rings counters */
+	if (ice_is_xdp_ena_vsi(vsi))
+		ice_update_vsi_tx_ring_stats(vsi, vsi_stats, vsi->xdp_rings,
+					     vsi->num_xdp_txq);
+
+	rcu_read_unlock();
+
+	net_stats = &vsi->net_stats;
+	stats_prev = &vsi->net_stats_prev;
+
+	/* clear prev counters after reset */
+	if (vsi_stats->tx_packets < stats_prev->tx_packets ||
+	    vsi_stats->rx_packets < stats_prev->rx_packets) {
+		stats_prev->tx_packets = 0;
+		stats_prev->tx_bytes = 0;
+		stats_prev->rx_packets = 0;
+		stats_prev->rx_bytes = 0;
+	}
+
+	/* update netdev counters */
+	net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets;
+	net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes;
+	net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets;
+	net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes;
+
+	stats_prev->tx_packets = vsi_stats->tx_packets;
+	stats_prev->tx_bytes = vsi_stats->tx_bytes;
+	stats_prev->rx_packets = vsi_stats->rx_packets;
+	stats_prev->rx_bytes = vsi_stats->rx_bytes;
+
+	kfree(vsi_stats);
+}
+
+/**
+ * ice_update_vsi_stats - Update VSI stats counters
+ * @vsi: the VSI to be updated
+ */
+void ice_update_vsi_stats(struct ice_vsi *vsi)
+{
+	struct rtnl_link_stats64 *cur_ns = &vsi->net_stats;
+	struct ice_eth_stats *cur_es = &vsi->eth_stats;
+	struct ice_pf *pf = vsi->back;
+
+	if (test_bit(ICE_VSI_DOWN, vsi->state) ||
+	    test_bit(ICE_CFG_BUSY, pf->state))
+		return;
+
+	/* get stats as recorded by Tx/Rx rings */
+	ice_update_vsi_ring_stats(vsi);
+
+	/* get VSI stats as recorded by the hardware */
+	ice_update_eth_stats(vsi);
+
+	cur_ns->tx_errors = cur_es->tx_errors;
+	cur_ns->rx_dropped = cur_es->rx_discards;
+	cur_ns->tx_dropped = cur_es->tx_discards;
+	cur_ns->multicast = cur_es->rx_multicast;
+
+	/* update some more netdev stats if this is main VSI */
+	if (vsi->type == ICE_VSI_PF) {
+		cur_ns->rx_crc_errors = pf->stats.crc_errors;
+		cur_ns->rx_errors = pf->stats.crc_errors +
+				    pf->stats.illegal_bytes +
+				    pf->stats.rx_len_errors +
+				    pf->stats.rx_undersize +
+				    pf->hw_csum_rx_error +
+				    pf->stats.rx_jabber +
+				    pf->stats.rx_fragments +
+				    pf->stats.rx_oversize;
+		cur_ns->rx_length_errors = pf->stats.rx_len_errors;
+		/* record drops from the port level */
+		cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
+	}
+}
+
+/**
+ * ice_update_pf_stats - Update PF port stats counters
+ * @pf: PF whose stats needs to be updated
+ */
+void ice_update_pf_stats(struct ice_pf *pf)
+{
+	struct ice_hw_port_stats *prev_ps, *cur_ps;
+	struct ice_hw *hw = &pf->hw;
+	u16 fd_ctr_base;
+	u8 port;
+
+	port = hw->port_info->lport;
+	prev_ps = &pf->stats_prev;
+	cur_ps = &pf->stats;
+
+	if (ice_is_reset_in_progress(pf->state))
+		pf->stat_prev_loaded = false;
+
+	ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_bytes,
+			  &cur_ps->eth.rx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_unicast,
+			  &cur_ps->eth.rx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_multicast,
+			  &cur_ps->eth.rx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPRCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_broadcast,
+			  &cur_ps->eth.rx_broadcast);
+
+	ice_stat_update32(hw, PRTRPB_RDPC, pf->stat_prev_loaded,
+			  &prev_ps->eth.rx_discards,
+			  &cur_ps->eth.rx_discards);
+
+	ice_stat_update40(hw, GLPRT_GOTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_bytes,
+			  &cur_ps->eth.tx_bytes);
+
+	ice_stat_update40(hw, GLPRT_UPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_unicast,
+			  &cur_ps->eth.tx_unicast);
+
+	ice_stat_update40(hw, GLPRT_MPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_multicast,
+			  &cur_ps->eth.tx_multicast);
+
+	ice_stat_update40(hw, GLPRT_BPTCL(port), pf->stat_prev_loaded,
+			  &prev_ps->eth.tx_broadcast,
+			  &cur_ps->eth.tx_broadcast);
+
+	ice_stat_update32(hw, GLPRT_TDOLD(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_dropped_link_down,
+			  &cur_ps->tx_dropped_link_down);
+
+	ice_stat_update40(hw, GLPRT_PRC64L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_64, &cur_ps->rx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PRC127L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_127, &cur_ps->rx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PRC255L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_255, &cur_ps->rx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PRC511L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_511, &cur_ps->rx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PRC1023L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1023, &cur_ps->rx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PRC1522L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_1522, &cur_ps->rx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PRC9522L(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_size_big, &cur_ps->rx_size_big);
+
+	ice_stat_update40(hw, GLPRT_PTC64L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_64, &cur_ps->tx_size_64);
+
+	ice_stat_update40(hw, GLPRT_PTC127L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_127, &cur_ps->tx_size_127);
+
+	ice_stat_update40(hw, GLPRT_PTC255L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_255, &cur_ps->tx_size_255);
+
+	ice_stat_update40(hw, GLPRT_PTC511L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_511, &cur_ps->tx_size_511);
+
+	ice_stat_update40(hw, GLPRT_PTC1023L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1023, &cur_ps->tx_size_1023);
+
+	ice_stat_update40(hw, GLPRT_PTC1522L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_1522, &cur_ps->tx_size_1522);
+
+	ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
+			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
+
+	fd_ctr_base = hw->fd_ctr_base;
+
+	ice_stat_update40(hw,
+			  GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
+			  pf->stat_prev_loaded, &prev_ps->fd_sb_match,
+			  &cur_ps->fd_sb_match);
+	ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFRXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_rx, &cur_ps->link_xoff_rx);
+
+	ice_stat_update32(hw, GLPRT_LXONTXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xon_tx, &cur_ps->link_xon_tx);
+
+	ice_stat_update32(hw, GLPRT_LXOFFTXC(port), pf->stat_prev_loaded,
+			  &prev_ps->link_xoff_tx, &cur_ps->link_xoff_tx);
+
+	ice_update_dcb_stats(pf);
+
+	ice_stat_update32(hw, GLPRT_CRCERRS(port), pf->stat_prev_loaded,
+			  &prev_ps->crc_errors, &cur_ps->crc_errors);
+
+	ice_stat_update32(hw, GLPRT_ILLERRC(port), pf->stat_prev_loaded,
+			  &prev_ps->illegal_bytes, &cur_ps->illegal_bytes);
+
+	ice_stat_update32(hw, GLPRT_MLFC(port), pf->stat_prev_loaded,
+			  &prev_ps->mac_local_faults,
+			  &cur_ps->mac_local_faults);
+
+	ice_stat_update32(hw, GLPRT_MRFC(port), pf->stat_prev_loaded,
+			  &prev_ps->mac_remote_faults,
+			  &cur_ps->mac_remote_faults);
+
+	ice_stat_update32(hw, GLPRT_RLEC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_len_errors, &cur_ps->rx_len_errors);
+
+	ice_stat_update32(hw, GLPRT_RUC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_undersize, &cur_ps->rx_undersize);
+
+	ice_stat_update32(hw, GLPRT_RFC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_fragments, &cur_ps->rx_fragments);
+
+	ice_stat_update32(hw, GLPRT_ROC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_oversize, &cur_ps->rx_oversize);
+
+	ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
+			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
+
+	cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+
+	pf->stat_prev_loaded = true;
+}
+
+/**
+ * ice_get_stats64 - get statistics for network device structure
+ * @netdev: network interface device structure
+ * @stats: main device statistics structure
+ */
+static
+void ice_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct rtnl_link_stats64 *vsi_stats;
+	struct ice_vsi *vsi = np->vsi;
+
+	vsi_stats = &vsi->net_stats;
+
+	if (!vsi->num_txq || !vsi->num_rxq)
+		return;
+
+	/* netdev packet/byte stats come from ring counter. These are obtained
+	 * by summing up ring counters (done by ice_update_vsi_ring_stats).
+	 * But, only call the update routine and read the registers if VSI is
+	 * not down.
+	 */
+	if (!test_bit(ICE_VSI_DOWN, vsi->state))
+		ice_update_vsi_ring_stats(vsi);
+	stats->tx_packets = vsi_stats->tx_packets;
+	stats->tx_bytes = vsi_stats->tx_bytes;
+	stats->rx_packets = vsi_stats->rx_packets;
+	stats->rx_bytes = vsi_stats->rx_bytes;
+
+	/* The rest of the stats can be read from the hardware but instead we
+	 * just return values that the watchdog task has already obtained from
+	 * the hardware.
+	 */
+	stats->multicast = vsi_stats->multicast;
+	stats->tx_errors = vsi_stats->tx_errors;
+	stats->tx_dropped = vsi_stats->tx_dropped;
+	stats->rx_errors = vsi_stats->rx_errors;
+	stats->rx_dropped = vsi_stats->rx_dropped;
+	stats->rx_crc_errors = vsi_stats->rx_crc_errors;
+	stats->rx_length_errors = vsi_stats->rx_length_errors;
+}
+
+/**
+ * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
+ * @vsi: VSI having NAPI disabled
+ */
+static void ice_napi_disable_all(struct ice_vsi *vsi)
+{
+	int q_idx;
+
+	if (!vsi->netdev)
+		return;
+
+	ice_for_each_q_vector(vsi, q_idx) {
+		struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
+
+		if (q_vector->rx.rx_ring || q_vector->tx.tx_ring)
+			napi_disable(&q_vector->napi);
+
+		cancel_work_sync(&q_vector->tx.dim.work);
+		cancel_work_sync(&q_vector->rx.dim.work);
+	}
+}
+
+/**
+ * ice_down - Shutdown the connection
+ * @vsi: The VSI being stopped
+ *
+ * Caller of this function is expected to set the vsi->state ICE_DOWN bit
+ */
+int ice_down(struct ice_vsi *vsi)
+{
+	int i, tx_err, rx_err, vlan_err = 0;
+
+	WARN_ON(!test_bit(ICE_VSI_DOWN, vsi->state));
+
+	if (vsi->netdev && vsi->type == ICE_VSI_PF) {
+		vlan_err = ice_vsi_del_vlan_zero(vsi);
+		ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false);
+		netif_carrier_off(vsi->netdev);
+		netif_tx_disable(vsi->netdev);
+	} else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) {
+		ice_eswitch_stop_all_tx_queues(vsi->back);
+	}
+
+	ice_vsi_dis_irq(vsi);
+
+	tx_err = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+	if (tx_err)
+		netdev_err(vsi->netdev, "Failed stop Tx rings, VSI %d error %d\n",
+			   vsi->vsi_num, tx_err);
+	if (!tx_err && ice_is_xdp_ena_vsi(vsi)) {
+		tx_err = ice_vsi_stop_xdp_tx_rings(vsi);
+		if (tx_err)
+			netdev_err(vsi->netdev, "Failed stop XDP rings, VSI %d error %d\n",
+				   vsi->vsi_num, tx_err);
+	}
+
+	rx_err = ice_vsi_stop_all_rx_rings(vsi);
+	if (rx_err)
+		netdev_err(vsi->netdev, "Failed stop Rx rings, VSI %d error %d\n",
+			   vsi->vsi_num, rx_err);
+
+	ice_napi_disable_all(vsi);
+
+	ice_for_each_txq(vsi, i)
+		ice_clean_tx_ring(vsi->tx_rings[i]);
+
+	if (ice_is_xdp_ena_vsi(vsi))
+		ice_for_each_xdp_txq(vsi, i)
+			ice_clean_tx_ring(vsi->xdp_rings[i]);
+
+	ice_for_each_rxq(vsi, i)
+		ice_clean_rx_ring(vsi->rx_rings[i]);
+
+	if (tx_err || rx_err || vlan_err) {
+		netdev_err(vsi->netdev, "Failed to close VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_down_up - shutdown the VSI connection and bring it up
+ * @vsi: the VSI to be reconnected
+ */
+int ice_down_up(struct ice_vsi *vsi)
+{
+	int ret;
+
+	/* if DOWN already set, nothing to do */
+	if (test_and_set_bit(ICE_VSI_DOWN, vsi->state))
+		return 0;
+
+	ret = ice_down(vsi);
+	if (ret)
+		return ret;
+
+	ret = ice_up(vsi);
+	if (ret) {
+		netdev_err(vsi->netdev, "reallocating resources failed during netdev features change, may need to reload driver\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+{
+	int i, err = 0;
+
+	if (!vsi->num_txq) {
+		dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Tx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_txq(vsi, i) {
+		struct ice_tx_ring *ring = vsi->tx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		if (vsi->netdev)
+			ring->netdev = vsi->netdev;
+		err = ice_setup_tx_ring(ring);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
+ * @vsi: VSI having resources allocated
+ *
+ * Return 0 on success, negative on failure
+ */
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+{
+	int i, err = 0;
+
+	if (!vsi->num_rxq) {
+		dev_err(ice_pf_to_dev(vsi->back), "VSI %d has 0 Rx queues\n",
+			vsi->vsi_num);
+		return -EINVAL;
+	}
+
+	ice_for_each_rxq(vsi, i) {
+		struct ice_rx_ring *ring = vsi->rx_rings[i];
+
+		if (!ring)
+			return -EINVAL;
+
+		if (vsi->netdev)
+			ring->netdev = vsi->netdev;
+		err = ice_setup_rx_ring(ring);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * ice_vsi_open_ctrl - open control VSI for use
+ * @vsi: the VSI to open
+ *
+ * Initialization of the Control VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+int ice_vsi_open_ctrl(struct ice_vsi *vsi)
+{
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	/* allocate descriptors */
+	err = ice_vsi_setup_tx_rings(vsi);
+	if (err)
+		goto err_setup_tx;
+
+	err = ice_vsi_setup_rx_rings(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = ice_vsi_cfg_lan(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	snprintf(int_name, sizeof(int_name) - 1, "%s-%s:ctrl",
+		 dev_driver_string(dev), dev_name(dev));
+	err = ice_vsi_req_irq_msix(vsi, int_name);
+	if (err)
+		goto err_setup_rx;
+
+	ice_vsi_cfg_msix(vsi);
+
+	err = ice_vsi_start_all_rx_rings(vsi);
+	if (err)
+		goto err_up_complete;
+
+	clear_bit(ICE_VSI_DOWN, vsi->state);
+	ice_vsi_ena_irq(vsi);
+
+	return 0;
+
+err_up_complete:
+	ice_down(vsi);
+err_setup_rx:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+	ice_vsi_free_tx_rings(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_open - Called when a network interface is made active
+ * @vsi: the VSI to open
+ *
+ * Initialization of the VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+int ice_vsi_open(struct ice_vsi *vsi)
+{
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	/* allocate descriptors */
+	err = ice_vsi_setup_tx_rings(vsi);
+	if (err)
+		goto err_setup_tx;
+
+	err = ice_vsi_setup_rx_rings(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = ice_vsi_cfg_lan(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+		 dev_driver_string(ice_pf_to_dev(pf)), vsi->netdev->name);
+	err = ice_vsi_req_irq_msix(vsi, int_name);
+	if (err)
+		goto err_setup_rx;
+
+	ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc);
+
+	if (vsi->type == ICE_VSI_PF) {
+		/* Notify the stack of the actual queue counts. */
+		err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq);
+		if (err)
+			goto err_set_qs;
+
+		err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq);
+		if (err)
+			goto err_set_qs;
+	}
+
+	err = ice_up_complete(vsi);
+	if (err)
+		goto err_up_complete;
+
+	return 0;
+
+err_up_complete:
+	ice_down(vsi);
+err_set_qs:
+	ice_vsi_free_irq(vsi);
+err_setup_rx:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+	ice_vsi_free_tx_rings(vsi);
+
+	return err;
+}
+
+/**
+ * ice_vsi_release_all - Delete all VSIs
+ * @pf: PF from which all VSIs are being removed
+ */
+static void ice_vsi_release_all(struct ice_pf *pf)
+{
+	int err, i;
+
+	if (!pf->vsi)
+		return;
+
+	ice_for_each_vsi(pf, i) {
+		if (!pf->vsi[i])
+			continue;
+
+		if (pf->vsi[i]->type == ICE_VSI_CHNL)
+			continue;
+
+		err = ice_vsi_release(pf->vsi[i]);
+		if (err)
+			dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
+				i, err, pf->vsi[i]->vsi_num);
+	}
+}
+
+/**
+ * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
+ * @pf: pointer to the PF instance
+ * @type: VSI type to rebuild
+ *
+ * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
+ */
+static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int i, err;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi || vsi->type != type)
+			continue;
+
+		/* rebuild the VSI */
+		err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
+		if (err) {
+			dev_err(dev, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
+			return err;
+		}
+
+		/* replay filters for the VSI */
+		err = ice_replay_vsi(&pf->hw, vsi->idx);
+		if (err) {
+			dev_err(dev, "replay VSI failed, error %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
+			return err;
+		}
+
+		/* Re-map HW VSI number, using VSI handle that has been
+		 * previously validated in ice_replay_vsi() call above
+		 */
+		vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+
+		/* enable the VSI */
+		err = ice_ena_vsi(vsi, false);
+		if (err) {
+			dev_err(dev, "enable VSI failed, err %d, VSI index %d, type %s\n",
+				err, vsi->idx, ice_vsi_type_str(type));
+			return err;
+		}
+
+		dev_info(dev, "VSI rebuilt. VSI index %d, type %s\n", vsi->idx,
+			 ice_vsi_type_str(type));
+	}
+
+	return 0;
+}
+
+/**
+ * ice_update_pf_netdev_link - Update PF netdev link status
+ * @pf: pointer to the PF instance
+ */
+static void ice_update_pf_netdev_link(struct ice_pf *pf)
+{
+	bool link_up;
+	int i;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+
+		if (!vsi || vsi->type != ICE_VSI_PF)
+			return;
+
+		ice_get_link_status(pf->vsi[i]->port_info, &link_up);
+		if (link_up) {
+			netif_carrier_on(pf->vsi[i]->netdev);
+			netif_tx_wake_all_queues(pf->vsi[i]->netdev);
+		} else {
+			netif_carrier_off(pf->vsi[i]->netdev);
+			netif_tx_stop_all_queues(pf->vsi[i]->netdev);
+		}
+	}
+}
+
+/**
+ * ice_rebuild - rebuild after reset
+ * @pf: PF to rebuild
+ * @reset_type: type of reset
+ *
+ * Do not rebuild VF VSI in this flow because that is already handled via
+ * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
+ * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
+ * to reset/rebuild all the VF VSI twice.
+ */
+static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	bool dvm;
+	int err;
+
+	if (test_bit(ICE_DOWN, pf->state))
+		goto clear_recovery;
+
+	dev_dbg(dev, "rebuilding PF after reset_type=%d\n", reset_type);
+
+#define ICE_EMP_RESET_SLEEP_MS 5000
+	if (reset_type == ICE_RESET_EMPR) {
+		/* If an EMP reset has occurred, any previously pending flash
+		 * update will have completed. We no longer know whether or
+		 * not the NVM update EMP reset is restricted.
+		 */
+		pf->fw_emp_reset_disabled = false;
+
+		msleep(ICE_EMP_RESET_SLEEP_MS);
+	}
+
+	err = ice_init_all_ctrlq(hw);
+	if (err) {
+		dev_err(dev, "control queues init failed %d\n", err);
+		goto err_init_ctrlq;
+	}
+
+	/* if DDP was previously loaded successfully */
+	if (!ice_is_safe_mode(pf)) {
+		/* reload the SW DB of filter tables */
+		if (reset_type == ICE_RESET_PFR)
+			ice_fill_blk_tbls(hw);
+		else
+			/* Reload DDP Package after CORER/GLOBR reset */
+			ice_load_pkg(NULL, pf);
+	}
+
+	err = ice_clear_pf_cfg(hw);
+	if (err) {
+		dev_err(dev, "clear PF configuration failed %d\n", err);
+		goto err_init_ctrlq;
+	}
+
+	ice_clear_pxe_mode(hw);
+
+	err = ice_init_nvm(hw);
+	if (err) {
+		dev_err(dev, "ice_init_nvm failed %d\n", err);
+		goto err_init_ctrlq;
+	}
+
+	err = ice_get_caps(hw);
+	if (err) {
+		dev_err(dev, "ice_get_caps failed %d\n", err);
+		goto err_init_ctrlq;
+	}
+
+	err = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
+	if (err) {
+		dev_err(dev, "set_mac_cfg failed %d\n", err);
+		goto err_init_ctrlq;
+	}
+
+	dvm = ice_is_dvm_ena(hw);
+
+	err = ice_aq_set_port_params(pf->hw.port_info, dvm, NULL);
+	if (err)
+		goto err_init_ctrlq;
+
+	err = ice_sched_init_port(hw->port_info);
+	if (err)
+		goto err_sched_init_port;
+
+	/* start misc vector */
+	err = ice_req_irq_msix_misc(pf);
+	if (err) {
+		dev_err(dev, "misc vector setup failed: %d\n", err);
+		goto err_sched_init_port;
+	}
+
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+		if (!rd32(hw, PFQF_FD_SIZE)) {
+			u16 unused, guar, b_effort;
+
+			guar = hw->func_caps.fd_fltr_guar;
+			b_effort = hw->func_caps.fd_fltr_best_effort;
+
+			/* force guaranteed filter pool for PF */
+			ice_alloc_fd_guar_item(hw, &unused, guar);
+			/* force shared filter pool for PF */
+			ice_alloc_fd_shrd_item(hw, &unused, b_effort);
+		}
+	}
+
+	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
+		ice_dcb_rebuild(pf);
+
+	/* If the PF previously had enabled PTP, PTP init needs to happen before
+	 * the VSI rebuild. If not, this causes the PTP link status events to
+	 * fail.
+	 */
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_reset(pf);
+
+	if (ice_is_feature_supported(pf, ICE_F_GNSS))
+		ice_gnss_init(pf);
+
+	/* rebuild PF VSI */
+	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
+	if (err) {
+		dev_err(dev, "PF VSI rebuild failed: %d\n", err);
+		goto err_vsi_rebuild;
+	}
+
+	/* configure PTP timestamping after VSI rebuild */
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
+		ice_ptp_cfg_timestamp(pf, false);
+
+	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL);
+	if (err) {
+		dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err);
+		goto err_vsi_rebuild;
+	}
+
+	if (reset_type == ICE_RESET_PFR) {
+		err = ice_rebuild_channels(pf);
+		if (err) {
+			dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n",
+				err);
+			goto err_vsi_rebuild;
+		}
+	}
+
+	/* If Flow Director is active */
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
+		if (err) {
+			dev_err(dev, "control VSI rebuild failed: %d\n", err);
+			goto err_vsi_rebuild;
+		}
+
+		/* replay HW Flow Director recipes */
+		if (hw->fdir_prof)
+			ice_fdir_replay_flows(hw);
+
+		/* replay Flow Director filters */
+		ice_fdir_replay_fltrs(pf);
+
+		ice_rebuild_arfs(pf);
+	}
+
+	ice_update_pf_netdev_link(pf);
+
+	/* tell the firmware we are up */
+	err = ice_send_version(pf);
+	if (err) {
+		dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
+			err);
+		goto err_vsi_rebuild;
+	}
+
+	ice_replay_post(hw);
+
+	/* if we get here, reset flow is successful */
+	clear_bit(ICE_RESET_FAILED, pf->state);
+
+	ice_plug_aux_dev(pf);
+	if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG))
+		ice_lag_rebuild(pf);
+	return;
+
+err_vsi_rebuild:
+err_sched_init_port:
+	ice_sched_cleanup_all(hw);
+err_init_ctrlq:
+	ice_shutdown_all_ctrlq(hw);
+	set_bit(ICE_RESET_FAILED, pf->state);
+clear_recovery:
+	/* set this bit in PF state to control service task scheduling */
+	set_bit(ICE_NEEDS_RESTART, pf->state);
+	dev_err(dev, "Rebuild failed, unload and reload driver\n");
+}
+
+/**
+ * ice_change_mtu - NDO callback to change the MTU
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct bpf_prog *prog;
+	u8 count = 0;
+	int err = 0;
+
+	if (new_mtu == (int)netdev->mtu) {
+		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
+		return 0;
+	}
+
+	prog = vsi->xdp_prog;
+	if (prog && !prog->aux->xdp_has_frags) {
+		int frame_size = ice_max_xdp_frame_size(vsi);
+
+		if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
+			netdev_err(netdev, "max MTU for XDP usage is %d\n",
+				   frame_size - ICE_ETH_PKT_HDR_PAD);
+			return -EINVAL;
+		}
+	} else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
+		if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
+			netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n",
+				   ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
+			return -EINVAL;
+		}
+	}
+
+	/* if a reset is in progress, wait for some time for it to complete */
+	do {
+		if (ice_is_reset_in_progress(pf->state)) {
+			count++;
+			usleep_range(1000, 2000);
+		} else {
+			break;
+		}
+
+	} while (count < 100);
+
+	if (count == 100) {
+		netdev_err(netdev, "can't change MTU. Device is busy\n");
+		return -EBUSY;
+	}
+
+	netdev->mtu = (unsigned int)new_mtu;
+	err = ice_down_up(vsi);
+	if (err)
+		return err;
+
+	netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
+	set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);
+
+	return err;
+}
+
+/**
+ * ice_eth_ioctl - Access the hwtstamp interface
+ * @netdev: network interface device structure
+ * @ifr: interface request data
+ * @cmd: ioctl command
+ */
+static int ice_eth_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		return ice_ptp_get_ts_config(pf, ifr);
+	case SIOCSHWTSTAMP:
+		return ice_ptp_set_ts_config(pf, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * ice_aq_str - convert AQ err code to a string
+ * @aq_err: the AQ error code to convert
+ */
+const char *ice_aq_str(enum ice_aq_err aq_err)
+{
+	switch (aq_err) {
+	case ICE_AQ_RC_OK:
+		return "OK";
+	case ICE_AQ_RC_EPERM:
+		return "ICE_AQ_RC_EPERM";
+	case ICE_AQ_RC_ENOENT:
+		return "ICE_AQ_RC_ENOENT";
+	case ICE_AQ_RC_ENOMEM:
+		return "ICE_AQ_RC_ENOMEM";
+	case ICE_AQ_RC_EBUSY:
+		return "ICE_AQ_RC_EBUSY";
+	case ICE_AQ_RC_EEXIST:
+		return "ICE_AQ_RC_EEXIST";
+	case ICE_AQ_RC_EINVAL:
+		return "ICE_AQ_RC_EINVAL";
+	case ICE_AQ_RC_ENOSPC:
+		return "ICE_AQ_RC_ENOSPC";
+	case ICE_AQ_RC_ENOSYS:
+		return "ICE_AQ_RC_ENOSYS";
+	case ICE_AQ_RC_EMODE:
+		return "ICE_AQ_RC_EMODE";
+	case ICE_AQ_RC_ENOSEC:
+		return "ICE_AQ_RC_ENOSEC";
+	case ICE_AQ_RC_EBADSIG:
+		return "ICE_AQ_RC_EBADSIG";
+	case ICE_AQ_RC_ESVN:
+		return "ICE_AQ_RC_ESVN";
+	case ICE_AQ_RC_EBADMAN:
+		return "ICE_AQ_RC_EBADMAN";
+	case ICE_AQ_RC_EBADBUF:
+		return "ICE_AQ_RC_EBADBUF";
+	}
+
+	return "ICE_AQ_RC_UNKNOWN";
+}
+
+/**
+ * ice_set_rss_lut - Set RSS LUT
+ * @vsi: Pointer to VSI structure
+ * @lut: Lookup table
+ * @lut_size: Lookup table size
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
+{
+	struct ice_aq_get_set_rss_lut_params params = {};
+	struct ice_hw *hw = &vsi->back->hw;
+	int status;
+
+	if (!lut)
+		return -EINVAL;
+
+	params.vsi_handle = vsi->idx;
+	params.lut_size = lut_size;
+	params.lut_type = vsi->rss_lut_type;
+	params.lut = lut;
+
+	status = ice_aq_set_rss_lut(hw, &params);
+	if (status)
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+
+	return status;
+}
+
+/**
+ * ice_set_rss_key - Set RSS key
+ * @vsi: Pointer to the VSI structure
+ * @seed: RSS hash seed
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	int status;
+
+	if (!seed)
+		return -EINVAL;
+
+	status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
+	if (status)
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+
+	return status;
+}
+
+/**
+ * ice_get_rss_lut - Get RSS LUT
+ * @vsi: Pointer to VSI structure
+ * @lut: Buffer to store the lookup table entries
+ * @lut_size: Size of buffer to store the lookup table entries
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
+{
+	struct ice_aq_get_set_rss_lut_params params = {};
+	struct ice_hw *hw = &vsi->back->hw;
+	int status;
+
+	if (!lut)
+		return -EINVAL;
+
+	params.vsi_handle = vsi->idx;
+	params.lut_size = lut_size;
+	params.lut_type = vsi->rss_lut_type;
+	params.lut = lut;
+
+	status = ice_aq_get_rss_lut(hw, &params);
+	if (status)
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+
+	return status;
+}
+
+/**
+ * ice_get_rss_key - Get RSS key
+ * @vsi: Pointer to VSI structure
+ * @seed: Buffer to store the key in
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	int status;
+
+	if (!seed)
+		return -EINVAL;
+
+	status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
+	if (status)
+		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %d aq_err %s\n",
+			status, ice_aq_str(hw->adminq.sq_last_status));
+
+	return status;
+}
+
+/**
+ * ice_bridge_getlink - Get the hardware bridge mode
+ * @skb: skb buff
+ * @pid: process ID
+ * @seq: RTNL message seq
+ * @dev: the netdev being configured
+ * @filter_mask: filter mask passed in
+ * @nlflags: netlink flags passed in
+ *
+ * Return the bridge mode (VEB/VEPA)
+ */
+static int
+ice_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+		   struct net_device *dev, u32 filter_mask, int nlflags)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u16 bmode;
+
+	bmode = pf->first_sw->bridge_mode;
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, bmode, 0, 0, nlflags,
+				       filter_mask, NULL);
+}
+
+/**
+ * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
+ * @vsi: Pointer to VSI structure
+ * @bmode: Hardware bridge mode (VEB/VEPA)
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
+{
+	struct ice_aqc_vsi_props *vsi_props;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	int ret;
+
+	vsi_props = &vsi->info;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+
+	if (bmode == BRIDGE_MODE_VEB)
+		/* change from VEPA to VEB mode */
+		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+	else
+		/* change from VEB to VEPA mode */
+		ctxt->info.sw_flags &= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+	ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (ret) {
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %s\n",
+			bmode, ret, ice_aq_str(hw->adminq.sq_last_status));
+		goto out;
+	}
+	/* Update sw flags for book keeping */
+	vsi_props->sw_flags = ctxt->info.sw_flags;
+
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_bridge_setlink - Set the hardware bridge mode
+ * @dev: the netdev being configured
+ * @nlh: RTNL message
+ * @flags: bridge setlink flags
+ * @extack: netlink extended ack
+ *
+ * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
+ * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
+ * not already set for all VSIs connected to this switch. And also update the
+ * unicast switch filter rules for the corresponding switch of the netdev.
+ */
+static int
+ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+		   u16 __always_unused flags,
+		   struct netlink_ext_ack __always_unused *extack)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	struct ice_pf *pf = np->vsi->back;
+	struct nlattr *attr, *br_spec;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_sw *pf_sw;
+	int rem, v, err = 0;
+
+	pf_sw = pf->first_sw;
+	/* find the attribute in the netlink message */
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		__u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+		mode = nla_get_u16(attr);
+		if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
+			return -EINVAL;
+		/* Continue  if bridge mode is not being flipped */
+		if (mode == pf_sw->bridge_mode)
+			continue;
+		/* Iterates through the PF VSI list and update the loopback
+		 * mode of the VSI
+		 */
+		ice_for_each_vsi(pf, v) {
+			if (!pf->vsi[v])
+				continue;
+			err = ice_vsi_update_bridge_mode(pf->vsi[v], mode);
+			if (err)
+				return err;
+		}
+
+		hw->evb_veb = (mode == BRIDGE_MODE_VEB);
+		/* Update the unicast switch filter rules for the corresponding
+		 * switch of the netdev
+		 */
+		err = ice_update_sw_rule_bridge_mode(hw);
+		if (err) {
+			netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %s\n",
+				   mode, err,
+				   ice_aq_str(hw->adminq.sq_last_status));
+			/* revert hw->evb_veb */
+			hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
+			return err;
+		}
+
+		pf_sw->bridge_mode = mode;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: Tx queue
+ */
+static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_tx_ring *tx_ring = NULL;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u32 i;
+
+	pf->tx_timeout_count++;
+
+	/* Check if PFC is enabled for the TC to which the queue belongs
+	 * to. If yes then Tx timeout is not caused by a hung queue, no
+	 * need to reset and rebuild
+	 */
+	if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
+		dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
+			 txqueue);
+		return;
+	}
+
+	/* now that we have an index, find the tx_ring struct */
+	ice_for_each_txq(vsi, i)
+		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
+			if (txqueue == vsi->tx_rings[i]->q_index) {
+				tx_ring = vsi->tx_rings[i];
+				break;
+			}
+
+	/* Reset recovery level if enough time has elapsed after last timeout.
+	 * Also ensure no new reset action happens before next timeout period.
+	 */
+	if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ * 20)))
+		pf->tx_timeout_recovery_level = 1;
+	else if (time_before(jiffies, (pf->tx_timeout_last_recovery +
+				       netdev->watchdog_timeo)))
+		return;
+
+	if (tx_ring) {
+		struct ice_hw *hw = &pf->hw;
+		u32 head, val = 0;
+
+		head = (rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue])) &
+			QTX_COMM_HEAD_HEAD_M) >> QTX_COMM_HEAD_HEAD_S;
+		/* Read interrupt register */
+		val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx));
+
+		netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
+			    vsi->vsi_num, txqueue, tx_ring->next_to_clean,
+			    head, tx_ring->next_to_use, val);
+	}
+
+	pf->tx_timeout_last_recovery = jiffies;
+	netdev_info(netdev, "tx_timeout recovery level %d, txqueue %u\n",
+		    pf->tx_timeout_recovery_level, txqueue);
+
+	switch (pf->tx_timeout_recovery_level) {
+	case 1:
+		set_bit(ICE_PFR_REQ, pf->state);
+		break;
+	case 2:
+		set_bit(ICE_CORER_REQ, pf->state);
+		break;
+	case 3:
+		set_bit(ICE_GLOBR_REQ, pf->state);
+		break;
+	default:
+		netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
+		set_bit(ICE_DOWN, pf->state);
+		set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
+		set_bit(ICE_SERVICE_DIS, pf->state);
+		break;
+	}
+
+	ice_service_task_schedule(pf);
+	pf->tx_timeout_recovery_level++;
+}
+
+/**
+ * ice_setup_tc_cls_flower - flower classifier offloads
+ * @np: net device to configure
+ * @filter_dev: device on which filter is added
+ * @cls_flower: offload data
+ */
+static int
+ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
+			struct net_device *filter_dev,
+			struct flow_cls_offload *cls_flower)
+{
+	struct ice_vsi *vsi = np->vsi;
+
+	if (cls_flower->common.chain_index)
+		return -EOPNOTSUPP;
+
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return ice_add_cls_flower(filter_dev, vsi, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return ice_del_cls_flower(vsi, cls_flower);
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * ice_setup_tc_block_cb - callback handler registered for TC block
+ * @type: TC SETUP type
+ * @type_data: TC flower offload data that contains user input
+ * @cb_priv: netdev private data
+ */
+static int
+ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{
+	struct ice_netdev_priv *np = cb_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return ice_setup_tc_cls_flower(np, np->vsi->netdev,
+					       type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * ice_validate_mqprio_qopt - Validate TCF input parameters
+ * @vsi: Pointer to VSI
+ * @mqprio_qopt: input parameters for mqprio queue configuration
+ *
+ * This function validates MQPRIO params, such as qcount (power of 2 wherever
+ * needed), and make sure user doesn't specify qcount and BW rate limit
+ * for TCs, which are more than "num_tc"
+ */
+static int
+ice_validate_mqprio_qopt(struct ice_vsi *vsi,
+			 struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+	int non_power_of_2_qcount = 0;
+	struct ice_pf *pf = vsi->back;
+	int max_rss_q_cnt = 0;
+	u64 sum_min_rate = 0;
+	struct device *dev;
+	int i, speed;
+	u8 num_tc;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	if (mqprio_qopt->qopt.offset[0] != 0 ||
+	    mqprio_qopt->qopt.num_tc < 1 ||
+	    mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+	vsi->ch_rss_size = 0;
+	num_tc = mqprio_qopt->qopt.num_tc;
+	speed = ice_get_link_speed_kbps(vsi);
+
+	for (i = 0; num_tc; i++) {
+		int qcount = mqprio_qopt->qopt.count[i];
+		u64 max_rate, min_rate, rem;
+
+		if (!qcount)
+			return -EINVAL;
+
+		if (is_power_of_2(qcount)) {
+			if (non_power_of_2_qcount &&
+			    qcount > non_power_of_2_qcount) {
+				dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n",
+					qcount, non_power_of_2_qcount);
+				return -EINVAL;
+			}
+			if (qcount > max_rss_q_cnt)
+				max_rss_q_cnt = qcount;
+		} else {
+			if (non_power_of_2_qcount &&
+			    qcount != non_power_of_2_qcount) {
+				dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n",
+					qcount, non_power_of_2_qcount);
+				return -EINVAL;
+			}
+			if (qcount < max_rss_q_cnt) {
+				dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n",
+					qcount, max_rss_q_cnt);
+				return -EINVAL;
+			}
+			max_rss_q_cnt = qcount;
+			non_power_of_2_qcount = qcount;
+		}
+
+		/* TC command takes input in K/N/Gbps or K/M/Gbit etc but
+		 * converts the bandwidth rate limit into Bytes/s when
+		 * passing it down to the driver. So convert input bandwidth
+		 * from Bytes/s to Kbps
+		 */
+		max_rate = mqprio_qopt->max_rate[i];
+		max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
+
+		/* min_rate is minimum guaranteed rate and it can't be zero */
+		min_rate = mqprio_qopt->min_rate[i];
+		min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR);
+		sum_min_rate += min_rate;
+
+		if (min_rate && min_rate < ICE_MIN_BW_LIMIT) {
+			dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i,
+				min_rate, ICE_MIN_BW_LIMIT);
+			return -EINVAL;
+		}
+
+		if (max_rate && max_rate > speed) {
+			dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
+				i, max_rate, speed);
+			return -EINVAL;
+		}
+
+		iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
+		if (rem) {
+			dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
+				i, ICE_MIN_BW_LIMIT);
+			return -EINVAL;
+		}
+
+		iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem);
+		if (rem) {
+			dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps",
+				i, ICE_MIN_BW_LIMIT);
+			return -EINVAL;
+		}
+
+		/* min_rate can't be more than max_rate, except when max_rate
+		 * is zero (implies max_rate sought is max line rate). In such
+		 * a case min_rate can be more than max.
+		 */
+		if (max_rate && min_rate > max_rate) {
+			dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n",
+				min_rate, max_rate);
+			return -EINVAL;
+		}
+
+		if (i >= mqprio_qopt->qopt.num_tc - 1)
+			break;
+		if (mqprio_qopt->qopt.offset[i + 1] !=
+		    (mqprio_qopt->qopt.offset[i] + qcount))
+			return -EINVAL;
+	}
+	if (vsi->num_rxq <
+	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+		return -EINVAL;
+	if (vsi->num_txq <
+	    (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+		return -EINVAL;
+
+	if (sum_min_rate && sum_min_rate > (u64)speed) {
+		dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
+			sum_min_rate, speed);
+		return -EINVAL;
+	}
+
+	/* make sure vsi->ch_rss_size is set correctly based on TC's qcount */
+	vsi->ch_rss_size = max_rss_q_cnt;
+
+	return 0;
+}
+
+/**
+ * ice_add_vsi_to_fdir - add a VSI to the flow director group for PF
+ * @pf: ptr to PF device
+ * @vsi: ptr to VSI
+ */
+static int ice_add_vsi_to_fdir(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	bool added = false;
+	struct ice_hw *hw;
+	int flow;
+
+	if (!(vsi->num_gfltr || vsi->num_bfltr))
+		return -EINVAL;
+
+	hw = &pf->hw;
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		struct ice_fd_hw_prof *prof;
+		int tun, status;
+		u64 entry_h;
+
+		if (!(hw->fdir_prof && hw->fdir_prof[flow] &&
+		      hw->fdir_prof[flow]->cnt))
+			continue;
+
+		for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+			enum ice_flow_priority prio;
+			u64 prof_id;
+
+			/* add this VSI to FDir profile for this flow */
+			prio = ICE_FLOW_PRIO_NORMAL;
+			prof = hw->fdir_prof[flow];
+			prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+			status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id,
+						    prof->vsi_h[0], vsi->idx,
+						    prio, prof->fdir_seg[tun],
+						    &entry_h);
+			if (status) {
+				dev_err(dev, "channel VSI idx %d, not able to add to group %d\n",
+					vsi->idx, flow);
+				continue;
+			}
+
+			prof->entry_h[prof->cnt][tun] = entry_h;
+		}
+
+		/* store VSI for filter replay and delete */
+		prof->vsi_h[prof->cnt] = vsi->idx;
+		prof->cnt++;
+
+		added = true;
+		dev_dbg(dev, "VSI idx %d added to fdir group %d\n", vsi->idx,
+			flow);
+	}
+
+	if (!added)
+		dev_dbg(dev, "VSI idx %d not added to fdir groups\n", vsi->idx);
+
+	return 0;
+}
+
+/**
+ * ice_add_channel - add a channel by adding VSI
+ * @pf: ptr to PF device
+ * @sw_id: underlying HW switching element ID
+ * @ch: ptr to channel structure
+ *
+ * Add a channel (VSI) using add_vsi and queue_map
+ */
+static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *vsi;
+
+	if (ch->type != ICE_VSI_CHNL) {
+		dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type);
+		return -EINVAL;
+	}
+
+	vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch);
+	if (!vsi || vsi->type != ICE_VSI_CHNL) {
+		dev_err(dev, "create chnl VSI failure\n");
+		return -EINVAL;
+	}
+
+	ice_add_vsi_to_fdir(pf, vsi);
+
+	ch->sw_id = sw_id;
+	ch->vsi_num = vsi->vsi_num;
+	ch->info.mapping_flags = vsi->info.mapping_flags;
+	ch->ch_vsi = vsi;
+	/* set the back pointer of channel for newly created VSI */
+	vsi->ch = ch;
+
+	memcpy(&ch->info.q_mapping, &vsi->info.q_mapping,
+	       sizeof(vsi->info.q_mapping));
+	memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping,
+	       sizeof(vsi->info.tc_mapping));
+
+	return 0;
+}
+
+/**
+ * ice_chnl_cfg_res
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Configure channel specific resources such as rings, vector.
+ */
+static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+	int i;
+
+	for (i = 0; i < ch->num_txq; i++) {
+		struct ice_q_vector *tx_q_vector, *rx_q_vector;
+		struct ice_ring_container *rc;
+		struct ice_tx_ring *tx_ring;
+		struct ice_rx_ring *rx_ring;
+
+		tx_ring = vsi->tx_rings[ch->base_q + i];
+		rx_ring = vsi->rx_rings[ch->base_q + i];
+		if (!tx_ring || !rx_ring)
+			continue;
+
+		/* setup ring being channel enabled */
+		tx_ring->ch = ch;
+		rx_ring->ch = ch;
+
+		/* following code block sets up vector specific attributes */
+		tx_q_vector = tx_ring->q_vector;
+		rx_q_vector = rx_ring->q_vector;
+		if (!tx_q_vector && !rx_q_vector)
+			continue;
+
+		if (tx_q_vector) {
+			tx_q_vector->ch = ch;
+			/* setup Tx and Rx ITR setting if DIM is off */
+			rc = &tx_q_vector->tx;
+			if (!ITR_IS_DYNAMIC(rc))
+				ice_write_itr(rc, rc->itr_setting);
+		}
+		if (rx_q_vector) {
+			rx_q_vector->ch = ch;
+			/* setup Tx and Rx ITR setting if DIM is off */
+			rc = &rx_q_vector->rx;
+			if (!ITR_IS_DYNAMIC(rc))
+				ice_write_itr(rc, rc->itr_setting);
+		}
+	}
+
+	/* it is safe to assume that, if channel has non-zero num_t[r]xq, then
+	 * GLINT_ITR register would have written to perform in-context
+	 * update, hence perform flush
+	 */
+	if (ch->num_txq || ch->num_rxq)
+		ice_flush(&vsi->back->hw);
+}
+
+/**
+ * ice_cfg_chnl_all_res - configure channel resources
+ * @vsi: pte to main_vsi
+ * @ch: ptr to channel structure
+ *
+ * This function configures channel specific resources such as flow-director
+ * counter index, and other resources such as queues, vectors, ITR settings
+ */
+static void
+ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+	/* configure channel (aka ADQ) resources such as queues, vectors,
+	 * ITR settings for channel specific vectors and anything else
+	 */
+	ice_chnl_cfg_res(vsi, ch);
+}
+
+/**
+ * ice_setup_hw_channel - setup new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ * @sw_id: underlying HW switching element ID
+ * @type: type of channel to be created (VMDq2/VF)
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and configures Tx rings accordingly
+ */
+static int
+ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi,
+		     struct ice_channel *ch, u16 sw_id, u8 type)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int ret;
+
+	ch->base_q = vsi->next_base_q;
+	ch->type = type;
+
+	ret = ice_add_channel(pf, sw_id, ch);
+	if (ret) {
+		dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id);
+		return ret;
+	}
+
+	/* configure/setup ADQ specific resources */
+	ice_cfg_chnl_all_res(vsi, ch);
+
+	/* make sure to update the next_base_q so that subsequent channel's
+	 * (aka ADQ) VSI queue map is correct
+	 */
+	vsi->next_base_q = vsi->next_base_q + ch->num_rxq;
+	dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num,
+		ch->num_rxq);
+
+	return 0;
+}
+
+/**
+ * ice_setup_channel - setup new channel using uplink element
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and uplink switching element
+ */
+static bool
+ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi,
+		  struct ice_channel *ch)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	u16 sw_id;
+	int ret;
+
+	if (vsi->type != ICE_VSI_PF) {
+		dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type);
+		return false;
+	}
+
+	sw_id = pf->first_sw->sw_id;
+
+	/* create channel (VSI) */
+	ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL);
+	if (ret) {
+		dev_err(dev, "failed to setup hw_channel\n");
+		return false;
+	}
+	dev_dbg(dev, "successfully created channel()\n");
+
+	return ch->ch_vsi ? true : false;
+}
+
+/**
+ * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit
+ * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit
+ */
+static int
+ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate)
+{
+	int err;
+
+	err = ice_set_min_bw_limit(vsi, min_tx_rate);
+	if (err)
+		return err;
+
+	return ice_set_max_bw_limit(vsi, max_tx_rate);
+}
+
+/**
+ * ice_create_q_channel - function to create channel
+ * @vsi: VSI to be configured
+ * @ch: ptr to channel (it contains channel specific params)
+ *
+ * This function creates channel (VSI) using num_queues specified by user,
+ * reconfigs RSS if needed.
+ */
+static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+
+	if (!ch)
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+	if (!ch->num_txq || !ch->num_rxq) {
+		dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq);
+		return -EINVAL;
+	}
+
+	if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) {
+		dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n",
+			vsi->cnt_q_avail, ch->num_txq);
+		return -EINVAL;
+	}
+
+	if (!ice_setup_channel(pf, vsi, ch)) {
+		dev_info(dev, "Failed to setup channel\n");
+		return -EINVAL;
+	}
+	/* configure BW rate limit */
+	if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) {
+		int ret;
+
+		ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate,
+				       ch->min_tx_rate);
+		if (ret)
+			dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n",
+				ch->max_tx_rate, ch->ch_vsi->vsi_num);
+		else
+			dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n",
+				ch->max_tx_rate, ch->ch_vsi->vsi_num);
+	}
+
+	vsi->cnt_q_avail -= ch->num_txq;
+
+	return 0;
+}
+
+/**
+ * ice_rem_all_chnl_fltrs - removes all channel filters
+ * @pf: ptr to PF, TC-flower based filter are tracked at PF level
+ *
+ * Remove all advanced switch filters only if they are channel specific
+ * tc-flower based filter
+ */
+static void ice_rem_all_chnl_fltrs(struct ice_pf *pf)
+{
+	struct ice_tc_flower_fltr *fltr;
+	struct hlist_node *node;
+
+	/* to remove all channel filters, iterate an ordered list of filters */
+	hlist_for_each_entry_safe(fltr, node,
+				  &pf->tc_flower_fltr_list,
+				  tc_flower_node) {
+		struct ice_rule_query_data rule;
+		int status;
+
+		/* for now process only channel specific filters */
+		if (!ice_is_chnl_fltr(fltr))
+			continue;
+
+		rule.rid = fltr->rid;
+		rule.rule_id = fltr->rule_id;
+		rule.vsi_handle = fltr->dest_vsi_handle;
+		status = ice_rem_adv_rule_by_id(&pf->hw, &rule);
+		if (status) {
+			if (status == -ENOENT)
+				dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n",
+					rule.rule_id);
+			else
+				dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n",
+					status);
+		} else if (fltr->dest_vsi) {
+			/* update advanced switch filter count */
+			if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
+				u32 flags = fltr->flags;
+
+				fltr->dest_vsi->num_chnl_fltr--;
+				if (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+					     ICE_TC_FLWR_FIELD_ENC_DST_MAC))
+					pf->num_dmac_chnl_fltrs--;
+			}
+		}
+
+		hlist_del(&fltr->tc_flower_node);
+		kfree(fltr);
+	}
+}
+
+/**
+ * ice_remove_q_channels - Remove queue channels for the TCs
+ * @vsi: VSI to be configured
+ * @rem_fltr: delete advanced switch filter or not
+ *
+ * Remove queue channels for the TCs
+ */
+static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr)
+{
+	struct ice_channel *ch, *ch_tmp;
+	struct ice_pf *pf = vsi->back;
+	int i;
+
+	/* remove all tc-flower based filter if they are channel filters only */
+	if (rem_fltr)
+		ice_rem_all_chnl_fltrs(pf);
+
+	/* remove ntuple filters since queue configuration is being changed */
+	if  (vsi->netdev->features & NETIF_F_NTUPLE) {
+		struct ice_hw *hw = &pf->hw;
+
+		mutex_lock(&hw->fdir_fltr_lock);
+		ice_fdir_del_all_fltrs(vsi);
+		mutex_unlock(&hw->fdir_fltr_lock);
+	}
+
+	/* perform cleanup for channels if they exist */
+	list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+		struct ice_vsi *ch_vsi;
+
+		list_del(&ch->list);
+		ch_vsi = ch->ch_vsi;
+		if (!ch_vsi) {
+			kfree(ch);
+			continue;
+		}
+
+		/* Reset queue contexts */
+		for (i = 0; i < ch->num_rxq; i++) {
+			struct ice_tx_ring *tx_ring;
+			struct ice_rx_ring *rx_ring;
+
+			tx_ring = vsi->tx_rings[ch->base_q + i];
+			rx_ring = vsi->rx_rings[ch->base_q + i];
+			if (tx_ring) {
+				tx_ring->ch = NULL;
+				if (tx_ring->q_vector)
+					tx_ring->q_vector->ch = NULL;
+			}
+			if (rx_ring) {
+				rx_ring->ch = NULL;
+				if (rx_ring->q_vector)
+					rx_ring->q_vector->ch = NULL;
+			}
+		}
+
+		/* Release FD resources for the channel VSI */
+		ice_fdir_rem_adq_chnl(&pf->hw, ch->ch_vsi->idx);
+
+		/* clear the VSI from scheduler tree */
+		ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx);
+
+		/* Delete VSI from FW, PF and HW VSI arrays */
+		ice_vsi_delete(ch->ch_vsi);
+
+		/* free the channel */
+		kfree(ch);
+	}
+
+	/* clear the channel VSI map which is stored in main VSI */
+	ice_for_each_chnl_tc(i)
+		vsi->tc_map_vsi[i] = NULL;
+
+	/* reset main VSI's all TC information */
+	vsi->all_enatc = 0;
+	vsi->all_numtc = 0;
+}
+
+/**
+ * ice_rebuild_channels - rebuild channel
+ * @pf: ptr to PF
+ *
+ * Recreate channel VSIs and replay filters
+ */
+static int ice_rebuild_channels(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *main_vsi;
+	bool rem_adv_fltr = true;
+	struct ice_channel *ch;
+	struct ice_vsi *vsi;
+	int tc_idx = 1;
+	int i, err;
+
+	main_vsi = ice_get_main_vsi(pf);
+	if (!main_vsi)
+		return 0;
+
+	if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) ||
+	    main_vsi->old_numtc == 1)
+		return 0; /* nothing to be done */
+
+	/* reconfigure main VSI based on old value of TC and cached values
+	 * for MQPRIO opts
+	 */
+	err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc);
+	if (err) {
+		dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n",
+			main_vsi->old_ena_tc, main_vsi->vsi_num);
+		return err;
+	}
+
+	/* rebuild ADQ VSIs */
+	ice_for_each_vsi(pf, i) {
+		enum ice_vsi_type type;
+
+		vsi = pf->vsi[i];
+		if (!vsi || vsi->type != ICE_VSI_CHNL)
+			continue;
+
+		type = vsi->type;
+
+		/* rebuild ADQ VSI */
+		err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT);
+		if (err) {
+			dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n",
+				ice_vsi_type_str(type), vsi->idx, err);
+			goto cleanup;
+		}
+
+		/* Re-map HW VSI number, using VSI handle that has been
+		 * previously validated in ice_replay_vsi() call above
+		 */
+		vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+
+		/* replay filters for the VSI */
+		err = ice_replay_vsi(&pf->hw, vsi->idx);
+		if (err) {
+			dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n",
+				ice_vsi_type_str(type), err, vsi->idx);
+			rem_adv_fltr = false;
+			goto cleanup;
+		}
+		dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n",
+			 ice_vsi_type_str(type), vsi->idx);
+
+		/* store ADQ VSI at correct TC index in main VSI's
+		 * map of TC to VSI
+		 */
+		main_vsi->tc_map_vsi[tc_idx++] = vsi;
+	}
+
+	/* ADQ VSI(s) has been rebuilt successfully, so setup
+	 * channel for main VSI's Tx and Rx rings
+	 */
+	list_for_each_entry(ch, &main_vsi->ch_list, list) {
+		struct ice_vsi *ch_vsi;
+
+		ch_vsi = ch->ch_vsi;
+		if (!ch_vsi)
+			continue;
+
+		/* reconfig channel resources */
+		ice_cfg_chnl_all_res(main_vsi, ch);
+
+		/* replay BW rate limit if it is non-zero */
+		if (!ch->max_tx_rate && !ch->min_tx_rate)
+			continue;
+
+		err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate,
+				       ch->min_tx_rate);
+		if (err)
+			dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
+				err, ch->max_tx_rate, ch->min_tx_rate,
+				ch_vsi->vsi_num);
+		else
+			dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n",
+				ch->max_tx_rate, ch->min_tx_rate,
+				ch_vsi->vsi_num);
+	}
+
+	/* reconfig RSS for main VSI */
+	if (main_vsi->ch_rss_size)
+		ice_vsi_cfg_rss_lut_key(main_vsi);
+
+	return 0;
+
+cleanup:
+	ice_remove_q_channels(main_vsi, rem_adv_fltr);
+	return err;
+}
+
+/**
+ * ice_create_q_channels - Add queue channel for the given TCs
+ * @vsi: VSI to be configured
+ *
+ * Configures queue channel mapping to the given TCs
+ */
+static int ice_create_q_channels(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_channel *ch;
+	int ret = 0, i;
+
+	ice_for_each_chnl_tc(i) {
+		if (!(vsi->all_enatc & BIT(i)))
+			continue;
+
+		ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+		if (!ch) {
+			ret = -ENOMEM;
+			goto err_free;
+		}
+		INIT_LIST_HEAD(&ch->list);
+		ch->num_rxq = vsi->mqprio_qopt.qopt.count[i];
+		ch->num_txq = vsi->mqprio_qopt.qopt.count[i];
+		ch->base_q = vsi->mqprio_qopt.qopt.offset[i];
+		ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i];
+		ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i];
+
+		/* convert to Kbits/s */
+		if (ch->max_tx_rate)
+			ch->max_tx_rate = div_u64(ch->max_tx_rate,
+						  ICE_BW_KBPS_DIVISOR);
+		if (ch->min_tx_rate)
+			ch->min_tx_rate = div_u64(ch->min_tx_rate,
+						  ICE_BW_KBPS_DIVISOR);
+
+		ret = ice_create_q_channel(vsi, ch);
+		if (ret) {
+			dev_err(ice_pf_to_dev(pf),
+				"failed creating channel TC:%d\n", i);
+			kfree(ch);
+			goto err_free;
+		}
+		list_add_tail(&ch->list, &vsi->ch_list);
+		vsi->tc_map_vsi[i] = ch->ch_vsi;
+		dev_dbg(ice_pf_to_dev(pf),
+			"successfully created channel: VSI %pK\n", ch->ch_vsi);
+	}
+	return 0;
+
+err_free:
+	ice_remove_q_channels(vsi, false);
+
+	return ret;
+}
+
+/**
+ * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes
+ * @netdev: net device to configure
+ * @type_data: TC offload data
+ */
+static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data)
+{
+	struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u16 mode, ena_tc_qdisc = 0;
+	int cur_txq, cur_rxq;
+	u8 hw = 0, num_tcf;
+	struct device *dev;
+	int ret, i;
+
+	dev = ice_pf_to_dev(pf);
+	num_tcf = mqprio_qopt->qopt.num_tc;
+	hw = mqprio_qopt->qopt.hw;
+	mode = mqprio_qopt->mode;
+	if (!hw) {
+		clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+		vsi->ch_rss_size = 0;
+		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+		goto config_tcf;
+	}
+
+	/* Generate queue region map for number of TCF requested */
+	for (i = 0; i < num_tcf; i++)
+		ena_tc_qdisc |= BIT(i);
+
+	switch (mode) {
+	case TC_MQPRIO_MODE_CHANNEL:
+
+		if (pf->hw.port_info->is_custom_tx_enabled) {
+			dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n");
+			return -EBUSY;
+		}
+		ice_tear_down_devlink_rate_tree(pf);
+
+		ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt);
+		if (ret) {
+			netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n",
+				   ret);
+			return ret;
+		}
+		memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+		set_bit(ICE_FLAG_TC_MQPRIO, pf->flags);
+		/* don't assume state of hw_tc_offload during driver load
+		 * and set the flag for TC flower filter if hw_tc_offload
+		 * already ON
+		 */
+		if (vsi->netdev->features & NETIF_F_HW_TC)
+			set_bit(ICE_FLAG_CLS_FLOWER, pf->flags);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+config_tcf:
+
+	/* Requesting same TCF configuration as already enabled */
+	if (ena_tc_qdisc == vsi->tc_cfg.ena_tc &&
+	    mode != TC_MQPRIO_MODE_CHANNEL)
+		return 0;
+
+	/* Pause VSI queues */
+	ice_dis_vsi(vsi, true);
+
+	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
+		ice_remove_q_channels(vsi, true);
+
+	if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+		vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf),
+				     num_online_cpus());
+		vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf),
+				     num_online_cpus());
+	} else {
+		/* logic to rebuild VSI, same like ethtool -L */
+		u16 offset = 0, qcount_tx = 0, qcount_rx = 0;
+
+		for (i = 0; i < num_tcf; i++) {
+			if (!(ena_tc_qdisc & BIT(i)))
+				continue;
+
+			offset = vsi->mqprio_qopt.qopt.offset[i];
+			qcount_rx = vsi->mqprio_qopt.qopt.count[i];
+			qcount_tx = vsi->mqprio_qopt.qopt.count[i];
+		}
+		vsi->req_txq = offset + qcount_tx;
+		vsi->req_rxq = offset + qcount_rx;
+
+		/* store away original rss_size info, so that it gets reused
+		 * form ice_vsi_rebuild during tc-qdisc delete stage - to
+		 * determine, what should be the rss_sizefor main VSI
+		 */
+		vsi->orig_rss_size = vsi->rss_size;
+	}
+
+	/* save current values of Tx and Rx queues before calling VSI rebuild
+	 * for fallback option
+	 */
+	cur_txq = vsi->num_txq;
+	cur_rxq = vsi->num_rxq;
+
+	/* proceed with rebuild main VSI using correct number of queues */
+	ret = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT);
+	if (ret) {
+		/* fallback to current number of queues */
+		dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n");
+		vsi->req_txq = cur_txq;
+		vsi->req_rxq = cur_rxq;
+		clear_bit(ICE_RESET_FAILED, pf->state);
+		if (ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT)) {
+			dev_err(dev, "Rebuild of main VSI failed again\n");
+			return ret;
+		}
+	}
+
+	vsi->all_numtc = num_tcf;
+	vsi->all_enatc = ena_tc_qdisc;
+	ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc);
+	if (ret) {
+		netdev_err(netdev, "failed configuring TC for VSI id=%d\n",
+			   vsi->vsi_num);
+		goto exit;
+	}
+
+	if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) {
+		u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+		u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0];
+
+		/* set TC0 rate limit if specified */
+		if (max_tx_rate || min_tx_rate) {
+			/* convert to Kbits/s */
+			if (max_tx_rate)
+				max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR);
+			if (min_tx_rate)
+				min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR);
+
+			ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate);
+			if (!ret) {
+				dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n",
+					max_tx_rate, min_tx_rate, vsi->vsi_num);
+			} else {
+				dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n",
+					max_tx_rate, min_tx_rate, vsi->vsi_num);
+				goto exit;
+			}
+		}
+		ret = ice_create_q_channels(vsi);
+		if (ret) {
+			netdev_err(netdev, "failed configuring queue channels\n");
+			goto exit;
+		} else {
+			netdev_dbg(netdev, "successfully configured channels\n");
+		}
+	}
+
+	if (vsi->ch_rss_size)
+		ice_vsi_cfg_rss_lut_key(vsi);
+
+exit:
+	/* if error, reset the all_numtc and all_enatc */
+	if (ret) {
+		vsi->all_numtc = 0;
+		vsi->all_enatc = 0;
+	}
+	/* resume VSI */
+	ice_ena_vsi(vsi, true);
+
+	return ret;
+}
+
+static LIST_HEAD(ice_block_cb_list);
+
+static int
+ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+	     void *type_data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	bool locked = false;
+	int err;
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple(type_data,
+						  &ice_block_cb_list,
+						  ice_setup_tc_block_cb,
+						  np, np, true);
+	case TC_SETUP_QDISC_MQPRIO:
+		if (ice_is_eswitch_mode_switchdev(pf)) {
+			netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
+			return -EOPNOTSUPP;
+		}
+
+		if (pf->adev) {
+			mutex_lock(&pf->adev_mutex);
+			device_lock(&pf->adev->dev);
+			locked = true;
+			if (pf->adev->dev.driver) {
+				netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
+				err = -EBUSY;
+				goto adev_unlock;
+			}
+		}
+
+		/* setup traffic classifier for receive side */
+		mutex_lock(&pf->tc_mutex);
+		err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
+		mutex_unlock(&pf->tc_mutex);
+
+adev_unlock:
+		if (locked) {
+			device_unlock(&pf->adev->dev);
+			mutex_unlock(&pf->adev_mutex);
+		}
+		return err;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return -EOPNOTSUPP;
+}
+
+static struct ice_indr_block_priv *
+ice_indr_block_priv_lookup(struct ice_netdev_priv *np,
+			   struct net_device *netdev)
+{
+	struct ice_indr_block_priv *cb_priv;
+
+	list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) {
+		if (!cb_priv->netdev)
+			return NULL;
+		if (cb_priv->netdev == netdev)
+			return cb_priv;
+	}
+	return NULL;
+}
+
+static int
+ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
+			void *indr_priv)
+{
+	struct ice_indr_block_priv *priv = indr_priv;
+	struct ice_netdev_priv *np = priv->np;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return ice_setup_tc_cls_flower(np, priv->netdev,
+					       (struct flow_cls_offload *)
+					       type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch,
+			struct ice_netdev_priv *np,
+			struct flow_block_offload *f, void *data,
+			void (*cleanup)(struct flow_block_cb *block_cb))
+{
+	struct ice_indr_block_priv *indr_priv;
+	struct flow_block_cb *block_cb;
+
+	if (!ice_is_tunnel_supported(netdev) &&
+	    !(is_vlan_dev(netdev) &&
+	      vlan_dev_real_dev(netdev) == np->vsi->netdev))
+		return -EOPNOTSUPP;
+
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		indr_priv = ice_indr_block_priv_lookup(np, netdev);
+		if (indr_priv)
+			return -EEXIST;
+
+		indr_priv = kzalloc(sizeof(*indr_priv), GFP_KERNEL);
+		if (!indr_priv)
+			return -ENOMEM;
+
+		indr_priv->netdev = netdev;
+		indr_priv->np = np;
+		list_add(&indr_priv->list, &np->tc_indr_block_priv_list);
+
+		block_cb =
+			flow_indr_block_cb_alloc(ice_indr_setup_block_cb,
+						 indr_priv, indr_priv,
+						 ice_rep_indr_tc_block_unbind,
+						 f, netdev, sch, data, np,
+						 cleanup);
+
+		if (IS_ERR(block_cb)) {
+			list_del(&indr_priv->list);
+			kfree(indr_priv);
+			return PTR_ERR(block_cb);
+		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &ice_block_cb_list);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		indr_priv = ice_indr_block_priv_lookup(np, netdev);
+		if (!indr_priv)
+			return -ENOENT;
+
+		block_cb = flow_block_cb_lookup(f->block,
+						ice_indr_setup_block_cb,
+						indr_priv);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_indr_block_cb_remove(block_cb, f);
+
+		list_del(&block_cb->driver_list);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int
+ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch,
+		     void *cb_priv, enum tc_setup_type type, void *type_data,
+		     void *data,
+		     void (*cleanup)(struct flow_block_cb *block_cb))
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data,
+					       data, cleanup);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * ice_open - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP). At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog is enabled,
+ * and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ */
+int ice_open(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+
+	if (ice_is_reset_in_progress(pf->state)) {
+		netdev_err(netdev, "can't open net device while reset is in progress");
+		return -EBUSY;
+	}
+
+	return ice_open_internal(netdev);
+}
+
+/**
+ * ice_open_internal - Called when a network interface becomes active
+ * @netdev: network interface device structure
+ *
+ * Internal ice_open implementation. Should not be used directly except for ice_open and reset
+ * handling routine
+ *
+ * Returns 0 on success, negative value on failure
+ */
+int ice_open_internal(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	struct ice_port_info *pi;
+	int err;
+
+	if (test_bit(ICE_NEEDS_RESTART, pf->state)) {
+		netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
+		return -EIO;
+	}
+
+	netif_carrier_off(netdev);
+
+	pi = vsi->port_info;
+	err = ice_update_link_info(pi);
+	if (err) {
+		netdev_err(netdev, "Failed to get link info, error %d\n", err);
+		return err;
+	}
+
+	ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err);
+
+	/* Set PHY if there is media, otherwise, turn off PHY */
+	if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
+		clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+		if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) {
+			err = ice_init_phy_user_cfg(pi);
+			if (err) {
+				netdev_err(netdev, "Failed to initialize PHY settings, error %d\n",
+					   err);
+				return err;
+			}
+		}
+
+		err = ice_configure_phy(vsi);
+		if (err) {
+			netdev_err(netdev, "Failed to set physical link up, error %d\n",
+				   err);
+			return err;
+		}
+	} else {
+		set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
+		ice_set_link(vsi, false);
+	}
+
+	err = ice_vsi_open(vsi);
+	if (err)
+		netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
+			   vsi->vsi_num, vsi->vsw->sw_id);
+
+	/* Update existing tunnels information */
+	udp_tunnel_get_rx_info(netdev);
+
+	return err;
+}
+
+/**
+ * ice_stop - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when an interface is de-activated by the OS,
+ * and the netdevice enters the DOWN state. The hardware is still under the
+ * driver's control, but the netdev interface is disabled.
+ *
+ * Returns success only - not allowed to fail
+ */
+int ice_stop(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+
+	if (ice_is_reset_in_progress(pf->state)) {
+		netdev_err(netdev, "can't stop net device while reset is in progress");
+		return -EBUSY;
+	}
+
+	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags)) {
+		int link_err = ice_force_phys_link_state(vsi, false);
+
+		if (link_err) {
+			if (link_err == -ENOMEDIUM)
+				netdev_info(vsi->netdev, "Skipping link reconfig - no media attached, VSI %d\n",
+					    vsi->vsi_num);
+			else
+				netdev_err(vsi->netdev, "Failed to set physical link down, VSI %d error %d\n",
+					   vsi->vsi_num, link_err);
+
+			ice_vsi_close(vsi);
+			return -EIO;
+		}
+	}
+
+	ice_vsi_close(vsi);
+
+	return 0;
+}
+
+/**
+ * ice_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buffer
+ * @netdev: This port's netdev
+ * @features: Offload features that the stack believes apply
+ */
+static netdev_features_t
+ice_features_check(struct sk_buff *skb,
+		   struct net_device __always_unused *netdev,
+		   netdev_features_t features)
+{
+	bool gso = skb_is_gso(skb);
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame. We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes. If it is then we need to drop support for GSO.
+	 */
+	if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS))
+		features &= ~NETIF_F_GSO_MASK;
+
+	len = skb_network_offset(skb);
+	if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
+		goto out_rm_features;
+
+	len = skb_network_header_len(skb);
+	if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+		goto out_rm_features;
+
+	if (skb->encapsulation) {
+		/* this must work for VXLAN frames AND IPIP/SIT frames, and in
+		 * the case of IPIP frames, the transport header pointer is
+		 * after the inner header! So check to make sure that this
+		 * is a GRE or UDP_TUNNEL frame before doing that math.
+		 */
+		if (gso && (skb_shinfo(skb)->gso_type &
+			    (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) {
+			len = skb_inner_network_header(skb) -
+			      skb_transport_header(skb);
+			if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
+				goto out_rm_features;
+		}
+
+		len = skb_inner_network_header_len(skb);
+		if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
+			goto out_rm_features;
+	}
+
+	return features;
+out_rm_features:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
+static const struct net_device_ops ice_netdev_safe_mode_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
+	.ndo_set_mac_address = ice_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ice_change_mtu,
+	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp_safe_mode,
+};
+
+static const struct net_device_ops ice_netdev_ops = {
+	.ndo_open = ice_open,
+	.ndo_stop = ice_stop,
+	.ndo_start_xmit = ice_start_xmit,
+	.ndo_select_queue = ice_select_queue,
+	.ndo_features_check = ice_features_check,
+	.ndo_fix_features = ice_fix_features,
+	.ndo_set_rx_mode = ice_set_rx_mode,
+	.ndo_set_mac_address = ice_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_change_mtu = ice_change_mtu,
+	.ndo_get_stats64 = ice_get_stats64,
+	.ndo_set_tx_maxrate = ice_set_tx_maxrate,
+	.ndo_eth_ioctl = ice_eth_ioctl,
+	.ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
+	.ndo_set_vf_mac = ice_set_vf_mac,
+	.ndo_get_vf_config = ice_get_vf_cfg,
+	.ndo_set_vf_trust = ice_set_vf_trust,
+	.ndo_set_vf_vlan = ice_set_vf_port_vlan,
+	.ndo_set_vf_link_state = ice_set_vf_link_state,
+	.ndo_get_vf_stats = ice_get_vf_stats,
+	.ndo_set_vf_rate = ice_set_vf_bw,
+	.ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid,
+	.ndo_setup_tc = ice_setup_tc,
+	.ndo_set_features = ice_set_features,
+	.ndo_bridge_getlink = ice_bridge_getlink,
+	.ndo_bridge_setlink = ice_bridge_setlink,
+	.ndo_fdb_add = ice_fdb_add,
+	.ndo_fdb_del = ice_fdb_del,
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer = ice_rx_flow_steer,
+#endif
+	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp,
+	.ndo_xdp_xmit = ice_xdp_xmit,
+	.ndo_xsk_wakeup = ice_xsk_wakeup,
+};
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
new file mode 100644
index 0000000000..f6f52a2480
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -0,0 +1,1239 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include <linux/vmalloc.h>
+
+#include "ice_common.h"
+
+/**
+ * ice_aq_read_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @read_shadow_ram: tell if this is a shadow RAM read
+ * @cd: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands (0x0701)
+ */
+static int
+ice_aq_read_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset, u16 length,
+		void *data, bool last_command, bool read_shadow_ram,
+		struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	if (offset > ICE_AQC_NVM_MAX_OFFSET)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_read);
+
+	if (!read_shadow_ram && module_typeid == ICE_AQC_NVM_START_POINT)
+		cmd->cmd_flags |= ICE_AQC_NVM_FLASH_ONLY;
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+	cmd->module_typeid = cpu_to_le16(module_typeid);
+	cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+	cmd->offset_high = (offset >> 16) & 0xFF;
+	cmd->length = cpu_to_le16(length);
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_read_flat_nvm - Read portion of NVM by flat offset
+ * @hw: pointer to the HW struct
+ * @offset: offset from beginning of NVM
+ * @length: (in) number of bytes to read; (out) number of bytes actually read
+ * @data: buffer to return data in (sized to fit the specified length)
+ * @read_shadow_ram: if true, read from shadow RAM instead of NVM
+ *
+ * Reads a portion of the NVM, as a flat memory space. This function correctly
+ * breaks read requests across Shadow RAM sectors and ensures that no single
+ * read request exceeds the maximum 4KB read for a single AdminQ command.
+ *
+ * Returns a status code on failure. Note that the data pointer may be
+ * partially updated if some reads succeed before a failure.
+ */
+int
+ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
+		  bool read_shadow_ram)
+{
+	u32 inlen = *length;
+	u32 bytes_read = 0;
+	bool last_cmd;
+	int status;
+
+	*length = 0;
+
+	/* Verify the length of the read if this is for the Shadow RAM */
+	if (read_shadow_ram && ((offset + inlen) > (hw->flash.sr_words * 2u))) {
+		ice_debug(hw, ICE_DBG_NVM, "NVM error: requested offset is beyond Shadow RAM limit\n");
+		return -EINVAL;
+	}
+
+	do {
+		u32 read_size, sector_offset;
+
+		/* ice_aq_read_nvm cannot read more than 4KB at a time.
+		 * Additionally, a read from the Shadow RAM may not cross over
+		 * a sector boundary. Conveniently, the sector size is also
+		 * 4KB.
+		 */
+		sector_offset = offset % ICE_AQ_MAX_BUF_LEN;
+		read_size = min_t(u32, ICE_AQ_MAX_BUF_LEN - sector_offset,
+				  inlen - bytes_read);
+
+		last_cmd = !(bytes_read + read_size < inlen);
+
+		status = ice_aq_read_nvm(hw, ICE_AQC_NVM_START_POINT,
+					 offset, read_size,
+					 data + bytes_read, last_cmd,
+					 read_shadow_ram, NULL);
+		if (status)
+			break;
+
+		bytes_read += read_size;
+		offset += read_size;
+	} while (!last_cmd);
+
+	*length = bytes_read;
+	return status;
+}
+
+/**
+ * ice_aq_update_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be written (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @command_flags: command parameters
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update the NVM using the admin queue commands (0x0703)
+ */
+int
+ice_aq_update_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset,
+		  u16 length, void *data, bool last_command, u8 command_flags,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	/* In offset the highest byte must be zeroed. */
+	if (offset & 0xFF000000)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_write);
+
+	cmd->cmd_flags |= command_flags;
+
+	/* If this is the last command in a series, set the proper flag. */
+	if (last_command)
+		cmd->cmd_flags |= ICE_AQC_NVM_LAST_CMD;
+	cmd->module_typeid = cpu_to_le16(module_typeid);
+	cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+	cmd->offset_high = (offset >> 16) & 0xFF;
+	cmd->length = cpu_to_le16(length);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/**
+ * ice_aq_erase_nvm
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @cd: pointer to command details structure or NULL
+ *
+ * Erase the NVM sector using the admin queue commands (0x0702)
+ */
+int ice_aq_erase_nvm(struct ice_hw *hw, u16 module_typeid, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	struct ice_aqc_nvm *cmd;
+
+	cmd = &desc.params.nvm;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_erase);
+
+	cmd->module_typeid = cpu_to_le16(module_typeid);
+	cmd->length = cpu_to_le16(ICE_AQC_NVM_ERASE_LEN);
+	cmd->offset_low = 0;
+	cmd->offset_high = 0;
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_read_sr_word_aq - Reads Shadow RAM via AQ
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using ice_read_flat_nvm.
+ */
+static int ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	u32 bytes = sizeof(u16);
+	__le16 data_local;
+	int status;
+
+	/* Note that ice_read_flat_nvm takes into account the 4Kb AdminQ and
+	 * Shadow RAM sector restrictions necessary when reading from the NVM.
+	 */
+	status = ice_read_flat_nvm(hw, offset * sizeof(u16), &bytes,
+				   (__force u8 *)&data_local, true);
+	if (status)
+		return status;
+
+	*data = le16_to_cpu(data_local);
+	return 0;
+}
+
+/**
+ * ice_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
+ *
+ * This function will request NVM ownership.
+ */
+int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access)
+{
+	if (hw->flash.blank_nvm_mode)
+		return 0;
+
+	return ice_acquire_res(hw, ICE_NVM_RES_ID, access, ICE_NVM_TIMEOUT);
+}
+
+/**
+ * ice_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
+ *
+ * This function will release NVM ownership.
+ */
+void ice_release_nvm(struct ice_hw *hw)
+{
+	if (hw->flash.blank_nvm_mode)
+		return;
+
+	ice_release_res(hw, ICE_NVM_RES_ID);
+}
+
+/**
+ * ice_get_flash_bank_offset - Get offset into requested flash bank
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive flash bank
+ * @module: the module to read from
+ *
+ * Based on the module, lookup the module offset from the beginning of the
+ * flash.
+ *
+ * Returns the flash offset. Note that a value of zero is invalid and must be
+ * treated as an error.
+ */
+static u32 ice_get_flash_bank_offset(struct ice_hw *hw, enum ice_bank_select bank, u16 module)
+{
+	struct ice_bank_info *banks = &hw->flash.banks;
+	enum ice_flash_bank active_bank;
+	bool second_bank_active;
+	u32 offset, size;
+
+	switch (module) {
+	case ICE_SR_1ST_NVM_BANK_PTR:
+		offset = banks->nvm_ptr;
+		size = banks->nvm_size;
+		active_bank = banks->nvm_bank;
+		break;
+	case ICE_SR_1ST_OROM_BANK_PTR:
+		offset = banks->orom_ptr;
+		size = banks->orom_size;
+		active_bank = banks->orom_bank;
+		break;
+	case ICE_SR_NETLIST_BANK_PTR:
+		offset = banks->netlist_ptr;
+		size = banks->netlist_size;
+		active_bank = banks->netlist_bank;
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_NVM, "Unexpected value for flash module: 0x%04x\n", module);
+		return 0;
+	}
+
+	switch (active_bank) {
+	case ICE_1ST_FLASH_BANK:
+		second_bank_active = false;
+		break;
+	case ICE_2ND_FLASH_BANK:
+		second_bank_active = true;
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_NVM, "Unexpected value for active flash bank: %u\n",
+			  active_bank);
+		return 0;
+	}
+
+	/* The second flash bank is stored immediately following the first
+	 * bank. Based on whether the 1st or 2nd bank is active, and whether
+	 * we want the active or inactive bank, calculate the desired offset.
+	 */
+	switch (bank) {
+	case ICE_ACTIVE_FLASH_BANK:
+		return offset + (second_bank_active ? size : 0);
+	case ICE_INACTIVE_FLASH_BANK:
+		return offset + (second_bank_active ? 0 : size);
+	}
+
+	ice_debug(hw, ICE_DBG_NVM, "Unexpected value for flash bank selection: %u\n", bank);
+	return 0;
+}
+
+/**
+ * ice_read_flash_module - Read a word from one of the main NVM modules
+ * @hw: pointer to the HW structure
+ * @bank: which bank of the module to read
+ * @module: the module to read
+ * @offset: the offset into the module in bytes
+ * @data: storage for the word read from the flash
+ * @length: bytes of data to read
+ *
+ * Read data from the specified flash module. The bank parameter indicates
+ * whether or not to read from the active bank or the inactive bank of that
+ * module.
+ *
+ * The word will be read using flat NVM access, and relies on the
+ * hw->flash.banks data being setup by ice_determine_active_flash_banks()
+ * during initialization.
+ */
+static int
+ice_read_flash_module(struct ice_hw *hw, enum ice_bank_select bank, u16 module,
+		      u32 offset, u8 *data, u32 length)
+{
+	int status;
+	u32 start;
+
+	start = ice_get_flash_bank_offset(hw, bank, module);
+	if (!start) {
+		ice_debug(hw, ICE_DBG_NVM, "Unable to calculate flash bank offset for module 0x%04x\n",
+			  module);
+		return -EINVAL;
+	}
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	status = ice_read_flat_nvm(hw, start + offset, &length, data, false);
+
+	ice_release_nvm(hw);
+
+	return status;
+}
+
+/**
+ * ice_read_nvm_module - Read from the active main NVM module
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from active or inactive NVM module
+ * @offset: offset into the NVM module to read, in words
+ * @data: storage for returned word value
+ *
+ * Read the specified word from the active NVM module. This includes the CSS
+ * header at the start of the NVM module.
+ */
+static int
+ice_read_nvm_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
+{
+	__le16 data_local;
+	int status;
+
+	status = ice_read_flash_module(hw, bank, ICE_SR_1ST_NVM_BANK_PTR, offset * sizeof(u16),
+				       (__force u8 *)&data_local, sizeof(u16));
+	if (!status)
+		*data = le16_to_cpu(data_local);
+
+	return status;
+}
+
+/**
+ * ice_read_nvm_sr_copy - Read a word from the Shadow RAM copy in the NVM bank
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive NVM module
+ * @offset: offset into the Shadow RAM copy to read, in words
+ * @data: storage for returned word value
+ *
+ * Read the specified word from the copy of the Shadow RAM found in the
+ * specified NVM module.
+ */
+static int
+ice_read_nvm_sr_copy(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
+{
+	return ice_read_nvm_module(hw, bank, ICE_NVM_SR_COPY_WORD_OFFSET + offset, data);
+}
+
+/**
+ * ice_read_netlist_module - Read data from the netlist module area
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive module
+ * @offset: offset into the netlist to read from
+ * @data: storage for returned word value
+ *
+ * Read a word from the specified netlist bank.
+ */
+static int
+ice_read_netlist_module(struct ice_hw *hw, enum ice_bank_select bank, u32 offset, u16 *data)
+{
+	__le16 data_local;
+	int status;
+
+	status = ice_read_flash_module(hw, bank, ICE_SR_NETLIST_BANK_PTR, offset * sizeof(u16),
+				       (__force u8 *)&data_local, sizeof(u16));
+	if (!status)
+		*data = le16_to_cpu(data_local);
+
+	return status;
+}
+
+/**
+ * ice_read_sr_word - Reads Shadow RAM word and acquire NVM if necessary
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
+ *
+ * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
+ */
+int ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+{
+	int status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (!status) {
+		status = ice_read_sr_word_aq(hw, offset, data);
+		ice_release_nvm(hw);
+	}
+
+	return status;
+}
+
+/**
+ * ice_get_pfa_module_tlv - Reads sub module TLV from NVM PFA
+ * @hw: pointer to hardware structure
+ * @module_tlv: pointer to module TLV to return
+ * @module_tlv_len: pointer to module TLV length to return
+ * @module_type: module type requested
+ *
+ * Finds the requested sub module TLV type from the Preserved Field
+ * Area (PFA) and returns the TLV pointer and length. The caller can
+ * use these to read the variable length TLV value.
+ */
+int
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type)
+{
+	u16 pfa_len, pfa_ptr;
+	u16 next_tlv;
+	int status;
+
+	status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Preserved Field Array pointer.\n");
+		return status;
+	}
+	status = ice_read_sr_word(hw, pfa_ptr, &pfa_len);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n");
+		return status;
+	}
+	/* Starting with first TLV after PFA length, iterate through the list
+	 * of TLVs to find the requested one.
+	 */
+	next_tlv = pfa_ptr + 1;
+	while (next_tlv < pfa_ptr + pfa_len) {
+		u16 tlv_sub_module_type;
+		u16 tlv_len;
+
+		/* Read TLV type */
+		status = ice_read_sr_word(hw, next_tlv, &tlv_sub_module_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV type.\n");
+			break;
+		}
+		/* Read TLV length */
+		status = ice_read_sr_word(hw, next_tlv + 1, &tlv_len);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read TLV length.\n");
+			break;
+		}
+		if (tlv_sub_module_type == module_type) {
+			if (tlv_len) {
+				*module_tlv = next_tlv;
+				*module_tlv_len = tlv_len;
+				return 0;
+			}
+			return -EINVAL;
+		}
+		/* Check next TLV, i.e. current TLV pointer + length + 2 words
+		 * (for current TLV's type and length)
+		 */
+		next_tlv = next_tlv + tlv_len + 2;
+	}
+	/* Module does not exist */
+	return -ENOENT;
+}
+
+/**
+ * ice_read_pba_string - Reads part number string from NVM
+ * @hw: pointer to hardware structure
+ * @pba_num: stores the part number string from the NVM
+ * @pba_num_size: part number string buffer length
+ *
+ * Reads the part number string from the NVM.
+ */
+int ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size)
+{
+	u16 pba_tlv, pba_tlv_len;
+	u16 pba_word, pba_size;
+	int status;
+	u16 i;
+
+	status = ice_get_pfa_module_tlv(hw, &pba_tlv, &pba_tlv_len,
+					ICE_SR_PBA_BLOCK_PTR);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block TLV.\n");
+		return status;
+	}
+
+	/* pba_size is the next word */
+	status = ice_read_sr_word(hw, (pba_tlv + 2), &pba_size);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Section size.\n");
+		return status;
+	}
+
+	if (pba_tlv_len < pba_size) {
+		ice_debug(hw, ICE_DBG_INIT, "Invalid PBA Block TLV size.\n");
+		return -EINVAL;
+	}
+
+	/* Subtract one to get PBA word count (PBA Size word is included in
+	 * total size)
+	 */
+	pba_size--;
+	if (pba_num_size < (((u32)pba_size * 2) + 1)) {
+		ice_debug(hw, ICE_DBG_INIT, "Buffer too small for PBA data.\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < pba_size; i++) {
+		status = ice_read_sr_word(hw, (pba_tlv + 2 + 1) + i, &pba_word);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to read PBA Block word %d.\n", i);
+			return status;
+		}
+
+		pba_num[(i * 2)] = (pba_word >> 8) & 0xFF;
+		pba_num[(i * 2) + 1] = pba_word & 0xFF;
+	}
+	pba_num[(pba_size * 2)] = '\0';
+
+	return status;
+}
+
+/**
+ * ice_get_nvm_ver_info - Read NVM version information
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @nvm: pointer to NVM info structure
+ *
+ * Read the NVM EETRACK ID and map version of the main NVM image bank, filling
+ * in the NVM info structure.
+ */
+static int
+ice_get_nvm_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_nvm_info *nvm)
+{
+	u16 eetrack_lo, eetrack_hi, ver;
+	int status;
+
+	status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_DEV_STARTER_VER, &ver);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read DEV starter version.\n");
+		return status;
+	}
+
+	nvm->major = (ver & ICE_NVM_VER_HI_MASK) >> ICE_NVM_VER_HI_SHIFT;
+	nvm->minor = (ver & ICE_NVM_VER_LO_MASK) >> ICE_NVM_VER_LO_SHIFT;
+
+	status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_EETRACK_LO, &eetrack_lo);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK lo.\n");
+		return status;
+	}
+	status = ice_read_nvm_sr_copy(hw, bank, ICE_SR_NVM_EETRACK_HI, &eetrack_hi);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read EETRACK hi.\n");
+		return status;
+	}
+
+	nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
+
+	return 0;
+}
+
+/**
+ * ice_get_inactive_nvm_ver - Read Option ROM version from the inactive bank
+ * @hw: pointer to the HW structure
+ * @nvm: storage for Option ROM version information
+ *
+ * Reads the NVM EETRACK ID, Map version, and security revision of the
+ * inactive NVM bank. Used to access version data for a pending update that
+ * has not yet been activated.
+ */
+int ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm)
+{
+	return ice_get_nvm_ver_info(hw, ICE_INACTIVE_FLASH_BANK, nvm);
+}
+
+/**
+ * ice_get_orom_civd_data - Get the combo version information from Option ROM
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash module
+ * @civd: storage for the Option ROM CIVD data.
+ *
+ * Searches through the Option ROM flash contents to locate the CIVD data for
+ * the image.
+ */
+static int
+ice_get_orom_civd_data(struct ice_hw *hw, enum ice_bank_select bank,
+		       struct ice_orom_civd_info *civd)
+{
+	u8 *orom_data;
+	int status;
+	u32 offset;
+
+	/* The CIVD section is located in the Option ROM aligned to 512 bytes.
+	 * The first 4 bytes must contain the ASCII characters "$CIV".
+	 * A simple modulo 256 sum of all of the bytes of the structure must
+	 * equal 0.
+	 *
+	 * The exact location is unknown and varies between images but is
+	 * usually somewhere in the middle of the bank. We need to scan the
+	 * Option ROM bank to locate it.
+	 *
+	 * It's significantly faster to read the entire Option ROM up front
+	 * using the maximum page size, than to read each possible location
+	 * with a separate firmware command.
+	 */
+	orom_data = vzalloc(hw->flash.banks.orom_size);
+	if (!orom_data)
+		return -ENOMEM;
+
+	status = ice_read_flash_module(hw, bank, ICE_SR_1ST_OROM_BANK_PTR, 0,
+				       orom_data, hw->flash.banks.orom_size);
+	if (status) {
+		vfree(orom_data);
+		ice_debug(hw, ICE_DBG_NVM, "Unable to read Option ROM data\n");
+		return status;
+	}
+
+	/* Scan the memory buffer to locate the CIVD data section */
+	for (offset = 0; (offset + 512) <= hw->flash.banks.orom_size; offset += 512) {
+		struct ice_orom_civd_info *tmp;
+		u8 sum = 0, i;
+
+		tmp = (struct ice_orom_civd_info *)&orom_data[offset];
+
+		/* Skip forward until we find a matching signature */
+		if (memcmp("$CIV", tmp->signature, sizeof(tmp->signature)) != 0)
+			continue;
+
+		ice_debug(hw, ICE_DBG_NVM, "Found CIVD section at offset %u\n",
+			  offset);
+
+		/* Verify that the simple checksum is zero */
+		for (i = 0; i < sizeof(*tmp); i++)
+			sum += ((u8 *)tmp)[i];
+
+		if (sum) {
+			ice_debug(hw, ICE_DBG_NVM, "Found CIVD data with invalid checksum of %u\n",
+				  sum);
+			goto err_invalid_checksum;
+		}
+
+		*civd = *tmp;
+		vfree(orom_data);
+		return 0;
+	}
+
+	ice_debug(hw, ICE_DBG_NVM, "Unable to locate CIVD data within the Option ROM\n");
+
+err_invalid_checksum:
+	vfree(orom_data);
+	return -EIO;
+}
+
+/**
+ * ice_get_orom_ver_info - Read Option ROM version information
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash module
+ * @orom: pointer to Option ROM info structure
+ *
+ * Read Option ROM version and security revision from the Option ROM flash
+ * section.
+ */
+static int
+ice_get_orom_ver_info(struct ice_hw *hw, enum ice_bank_select bank, struct ice_orom_info *orom)
+{
+	struct ice_orom_civd_info civd;
+	u32 combo_ver;
+	int status;
+
+	status = ice_get_orom_civd_data(hw, bank, &civd);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to locate valid Option ROM CIVD data\n");
+		return status;
+	}
+
+	combo_ver = le32_to_cpu(civd.combo_ver);
+
+	orom->major = (u8)((combo_ver & ICE_OROM_VER_MASK) >> ICE_OROM_VER_SHIFT);
+	orom->patch = (u8)(combo_ver & ICE_OROM_VER_PATCH_MASK);
+	orom->build = (u16)((combo_ver & ICE_OROM_VER_BUILD_MASK) >> ICE_OROM_VER_BUILD_SHIFT);
+
+	return 0;
+}
+
+/**
+ * ice_get_inactive_orom_ver - Read Option ROM version from the inactive bank
+ * @hw: pointer to the HW structure
+ * @orom: storage for Option ROM version information
+ *
+ * Reads the Option ROM version and security revision data for the inactive
+ * section of flash. Used to access version data for a pending update that has
+ * not yet been activated.
+ */
+int ice_get_inactive_orom_ver(struct ice_hw *hw, struct ice_orom_info *orom)
+{
+	return ice_get_orom_ver_info(hw, ICE_INACTIVE_FLASH_BANK, orom);
+}
+
+/**
+ * ice_get_netlist_info
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @netlist: pointer to netlist version info structure
+ *
+ * Get the netlist version information from the requested bank. Reads the Link
+ * Topology section to find the Netlist ID block and extract the relevant
+ * information into the netlist version structure.
+ */
+static int
+ice_get_netlist_info(struct ice_hw *hw, enum ice_bank_select bank,
+		     struct ice_netlist_info *netlist)
+{
+	u16 module_id, length, node_count, i;
+	u16 *id_blk;
+	int status;
+
+	status = ice_read_netlist_module(hw, bank, ICE_NETLIST_TYPE_OFFSET, &module_id);
+	if (status)
+		return status;
+
+	if (module_id != ICE_NETLIST_LINK_TOPO_MOD_ID) {
+		ice_debug(hw, ICE_DBG_NVM, "Expected netlist module_id ID of 0x%04x, but got 0x%04x\n",
+			  ICE_NETLIST_LINK_TOPO_MOD_ID, module_id);
+		return -EIO;
+	}
+
+	status = ice_read_netlist_module(hw, bank, ICE_LINK_TOPO_MODULE_LEN, &length);
+	if (status)
+		return status;
+
+	/* sanity check that we have at least enough words to store the netlist ID block */
+	if (length < ICE_NETLIST_ID_BLK_SIZE) {
+		ice_debug(hw, ICE_DBG_NVM, "Netlist Link Topology module too small. Expected at least %u words, but got %u words.\n",
+			  ICE_NETLIST_ID_BLK_SIZE, length);
+		return -EIO;
+	}
+
+	status = ice_read_netlist_module(hw, bank, ICE_LINK_TOPO_NODE_COUNT, &node_count);
+	if (status)
+		return status;
+	node_count &= ICE_LINK_TOPO_NODE_COUNT_M;
+
+	id_blk = kcalloc(ICE_NETLIST_ID_BLK_SIZE, sizeof(*id_blk), GFP_KERNEL);
+	if (!id_blk)
+		return -ENOMEM;
+
+	/* Read out the entire Netlist ID Block at once. */
+	status = ice_read_flash_module(hw, bank, ICE_SR_NETLIST_BANK_PTR,
+				       ICE_NETLIST_ID_BLK_OFFSET(node_count) * sizeof(u16),
+				       (u8 *)id_blk, ICE_NETLIST_ID_BLK_SIZE * sizeof(u16));
+	if (status)
+		goto exit_error;
+
+	for (i = 0; i < ICE_NETLIST_ID_BLK_SIZE; i++)
+		id_blk[i] = le16_to_cpu(((__force __le16 *)id_blk)[i]);
+
+	netlist->major = id_blk[ICE_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16 |
+			 id_blk[ICE_NETLIST_ID_BLK_MAJOR_VER_LOW];
+	netlist->minor = id_blk[ICE_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16 |
+			 id_blk[ICE_NETLIST_ID_BLK_MINOR_VER_LOW];
+	netlist->type = id_blk[ICE_NETLIST_ID_BLK_TYPE_HIGH] << 16 |
+			id_blk[ICE_NETLIST_ID_BLK_TYPE_LOW];
+	netlist->rev = id_blk[ICE_NETLIST_ID_BLK_REV_HIGH] << 16 |
+		       id_blk[ICE_NETLIST_ID_BLK_REV_LOW];
+	netlist->cust_ver = id_blk[ICE_NETLIST_ID_BLK_CUST_VER];
+	/* Read the left most 4 bytes of SHA */
+	netlist->hash = id_blk[ICE_NETLIST_ID_BLK_SHA_HASH_WORD(15)] << 16 |
+			id_blk[ICE_NETLIST_ID_BLK_SHA_HASH_WORD(14)];
+
+exit_error:
+	kfree(id_blk);
+
+	return status;
+}
+
+/**
+ * ice_get_inactive_netlist_ver
+ * @hw: pointer to the HW struct
+ * @netlist: pointer to netlist version info structure
+ *
+ * Read the netlist version data from the inactive netlist bank. Used to
+ * extract version data of a pending flash update in order to display the
+ * version data.
+ */
+int ice_get_inactive_netlist_ver(struct ice_hw *hw, struct ice_netlist_info *netlist)
+{
+	return ice_get_netlist_info(hw, ICE_INACTIVE_FLASH_BANK, netlist);
+}
+
+/**
+ * ice_discover_flash_size - Discover the available flash size.
+ * @hw: pointer to the HW struct
+ *
+ * The device flash could be up to 16MB in size. However, it is possible that
+ * the actual size is smaller. Use bisection to determine the accessible size
+ * of flash memory.
+ */
+static int ice_discover_flash_size(struct ice_hw *hw)
+{
+	u32 min_size = 0, max_size = ICE_AQC_NVM_MAX_OFFSET + 1;
+	int status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	while ((max_size - min_size) > 1) {
+		u32 offset = (max_size + min_size) / 2;
+		u32 len = 1;
+		u8 data;
+
+		status = ice_read_flat_nvm(hw, offset, &len, &data, false);
+		if (status == -EIO &&
+		    hw->adminq.sq_last_status == ICE_AQ_RC_EINVAL) {
+			ice_debug(hw, ICE_DBG_NVM, "%s: New upper bound of %u bytes\n",
+				  __func__, offset);
+			status = 0;
+			max_size = offset;
+		} else if (!status) {
+			ice_debug(hw, ICE_DBG_NVM, "%s: New lower bound of %u bytes\n",
+				  __func__, offset);
+			min_size = offset;
+		} else {
+			/* an unexpected error occurred */
+			goto err_read_flat_nvm;
+		}
+	}
+
+	ice_debug(hw, ICE_DBG_NVM, "Predicted flash size is %u bytes\n", max_size);
+
+	hw->flash.flash_size = max_size;
+
+err_read_flat_nvm:
+	ice_release_nvm(hw);
+
+	return status;
+}
+
+/**
+ * ice_read_sr_pointer - Read the value of a Shadow RAM pointer word
+ * @hw: pointer to the HW structure
+ * @offset: the word offset of the Shadow RAM word to read
+ * @pointer: pointer value read from Shadow RAM
+ *
+ * Read the given Shadow RAM word, and convert it to a pointer value specified
+ * in bytes. This function assumes the specified offset is a valid pointer
+ * word.
+ *
+ * Each pointer word specifies whether it is stored in word size or 4KB
+ * sector size by using the highest bit. The reported pointer value will be in
+ * bytes, intended for flat NVM reads.
+ */
+static int ice_read_sr_pointer(struct ice_hw *hw, u16 offset, u32 *pointer)
+{
+	int status;
+	u16 value;
+
+	status = ice_read_sr_word(hw, offset, &value);
+	if (status)
+		return status;
+
+	/* Determine if the pointer is in 4KB or word units */
+	if (value & ICE_SR_NVM_PTR_4KB_UNITS)
+		*pointer = (value & ~ICE_SR_NVM_PTR_4KB_UNITS) * 4 * 1024;
+	else
+		*pointer = value * 2;
+
+	return 0;
+}
+
+/**
+ * ice_read_sr_area_size - Read an area size from a Shadow RAM word
+ * @hw: pointer to the HW structure
+ * @offset: the word offset of the Shadow RAM to read
+ * @size: size value read from the Shadow RAM
+ *
+ * Read the given Shadow RAM word, and convert it to an area size value
+ * specified in bytes. This function assumes the specified offset is a valid
+ * area size word.
+ *
+ * Each area size word is specified in 4KB sector units. This function reports
+ * the size in bytes, intended for flat NVM reads.
+ */
+static int ice_read_sr_area_size(struct ice_hw *hw, u16 offset, u32 *size)
+{
+	int status;
+	u16 value;
+
+	status = ice_read_sr_word(hw, offset, &value);
+	if (status)
+		return status;
+
+	/* Area sizes are always specified in 4KB units */
+	*size = value * 4 * 1024;
+
+	return 0;
+}
+
+/**
+ * ice_determine_active_flash_banks - Discover active bank for each module
+ * @hw: pointer to the HW struct
+ *
+ * Read the Shadow RAM control word and determine which banks are active for
+ * the NVM, OROM, and Netlist modules. Also read and calculate the associated
+ * pointer and size. These values are then cached into the ice_flash_info
+ * structure for later use in order to calculate the correct offset to read
+ * from the active module.
+ */
+static int ice_determine_active_flash_banks(struct ice_hw *hw)
+{
+	struct ice_bank_info *banks = &hw->flash.banks;
+	u16 ctrl_word;
+	int status;
+
+	status = ice_read_sr_word(hw, ICE_SR_NVM_CTRL_WORD, &ctrl_word);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read the Shadow RAM control word\n");
+		return status;
+	}
+
+	/* Check that the control word indicates validity */
+	if ((ctrl_word & ICE_SR_CTRL_WORD_1_M) >> ICE_SR_CTRL_WORD_1_S != ICE_SR_CTRL_WORD_VALID) {
+		ice_debug(hw, ICE_DBG_NVM, "Shadow RAM control word is invalid\n");
+		return -EIO;
+	}
+
+	if (!(ctrl_word & ICE_SR_CTRL_WORD_NVM_BANK))
+		banks->nvm_bank = ICE_1ST_FLASH_BANK;
+	else
+		banks->nvm_bank = ICE_2ND_FLASH_BANK;
+
+	if (!(ctrl_word & ICE_SR_CTRL_WORD_OROM_BANK))
+		banks->orom_bank = ICE_1ST_FLASH_BANK;
+	else
+		banks->orom_bank = ICE_2ND_FLASH_BANK;
+
+	if (!(ctrl_word & ICE_SR_CTRL_WORD_NETLIST_BANK))
+		banks->netlist_bank = ICE_1ST_FLASH_BANK;
+	else
+		banks->netlist_bank = ICE_2ND_FLASH_BANK;
+
+	status = ice_read_sr_pointer(hw, ICE_SR_1ST_NVM_BANK_PTR, &banks->nvm_ptr);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read NVM bank pointer\n");
+		return status;
+	}
+
+	status = ice_read_sr_area_size(hw, ICE_SR_NVM_BANK_SIZE, &banks->nvm_size);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read NVM bank area size\n");
+		return status;
+	}
+
+	status = ice_read_sr_pointer(hw, ICE_SR_1ST_OROM_BANK_PTR, &banks->orom_ptr);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read OROM bank pointer\n");
+		return status;
+	}
+
+	status = ice_read_sr_area_size(hw, ICE_SR_OROM_BANK_SIZE, &banks->orom_size);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read OROM bank area size\n");
+		return status;
+	}
+
+	status = ice_read_sr_pointer(hw, ICE_SR_NETLIST_BANK_PTR, &banks->netlist_ptr);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read Netlist bank pointer\n");
+		return status;
+	}
+
+	status = ice_read_sr_area_size(hw, ICE_SR_NETLIST_BANK_SIZE, &banks->netlist_size);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to read Netlist bank area size\n");
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_init_nvm - initializes NVM setting
+ * @hw: pointer to the HW struct
+ *
+ * This function reads and populates NVM settings such as Shadow RAM size,
+ * max_timeout, and blank_nvm_mode
+ */
+int ice_init_nvm(struct ice_hw *hw)
+{
+	struct ice_flash_info *flash = &hw->flash;
+	u32 fla, gens_stat;
+	u8 sr_size;
+	int status;
+
+	/* The SR size is stored regardless of the NVM programming mode
+	 * as the blank mode may be used in the factory line.
+	 */
+	gens_stat = rd32(hw, GLNVM_GENS);
+	sr_size = (gens_stat & GLNVM_GENS_SR_SIZE_M) >> GLNVM_GENS_SR_SIZE_S;
+
+	/* Switching to words (sr_size contains power of 2) */
+	flash->sr_words = BIT(sr_size) * ICE_SR_WORDS_IN_1KB;
+
+	/* Check if we are in the normal or blank NVM programming mode */
+	fla = rd32(hw, GLNVM_FLA);
+	if (fla & GLNVM_FLA_LOCKED_M) { /* Normal programming mode */
+		flash->blank_nvm_mode = false;
+	} else {
+		/* Blank programming mode */
+		flash->blank_nvm_mode = true;
+		ice_debug(hw, ICE_DBG_NVM, "NVM init error: unsupported blank mode.\n");
+		return -EIO;
+	}
+
+	status = ice_discover_flash_size(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "NVM init error: failed to discover flash size.\n");
+		return status;
+	}
+
+	status = ice_determine_active_flash_banks(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to determine active flash banks.\n");
+		return status;
+	}
+
+	status = ice_get_nvm_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->nvm);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read NVM info.\n");
+		return status;
+	}
+
+	status = ice_get_orom_ver_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->orom);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read Option ROM info.\n");
+
+	/* read the netlist version information */
+	status = ice_get_netlist_info(hw, ICE_ACTIVE_FLASH_BANK, &flash->netlist);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read netlist info.\n");
+
+	return 0;
+}
+
+/**
+ * ice_nvm_validate_checksum
+ * @hw: pointer to the HW struct
+ *
+ * Verify NVM PFA checksum validity (0x0706)
+ */
+int ice_nvm_validate_checksum(struct ice_hw *hw)
+{
+	struct ice_aqc_nvm_checksum *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	status = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (status)
+		return status;
+
+	cmd = &desc.params.nvm_checksum;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum);
+	cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	ice_release_nvm(hw);
+
+	if (!status)
+		if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT)
+			status = -EIO;
+
+	return status;
+}
+
+/**
+ * ice_nvm_write_activate
+ * @hw: pointer to the HW struct
+ * @cmd_flags: flags for write activate command
+ * @response_flags: response indicators from firmware
+ *
+ * Update the control word with the required banks' validity bits
+ * and dumps the Shadow RAM to flash (0x0707)
+ *
+ * cmd_flags controls which banks to activate, the preservation level to use
+ * when activating the NVM bank, and whether an EMP reset is required for
+ * activation.
+ *
+ * Note that the 16bit cmd_flags value is split between two separate 1 byte
+ * flag values in the descriptor.
+ *
+ * On successful return of the firmware command, the response_flags variable
+ * is updated with the flags reported by firmware indicating certain status,
+ * such as whether EMP reset is enabled.
+ */
+int ice_nvm_write_activate(struct ice_hw *hw, u16 cmd_flags, u8 *response_flags)
+{
+	struct ice_aqc_nvm *cmd;
+	struct ice_aq_desc desc;
+	int err;
+
+	cmd = &desc.params.nvm;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_write_activate);
+
+	cmd->cmd_flags = (u8)(cmd_flags & 0xFF);
+	cmd->offset_high = (u8)((cmd_flags >> 8) & 0xFF);
+
+	err = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (!err && response_flags)
+		*response_flags = cmd->cmd_flags;
+
+	return err;
+}
+
+/**
+ * ice_aq_nvm_update_empr
+ * @hw: pointer to the HW struct
+ *
+ * Update empr (0x0709). This command allows SW to
+ * request an EMPR to activate new FW.
+ */
+int ice_aq_nvm_update_empr(struct ice_hw *hw)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_update_empr);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+}
+
+/* ice_nvm_set_pkg_data
+ * @hw: pointer to the HW struct
+ * @del_pkg_data_flag: If is set then the current pkg_data store by FW
+ *		       is deleted.
+ *		       If bit is set to 1, then buffer should be size 0.
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set package data (0x070A). This command is equivalent to the reception
+ * of a PLDM FW Update GetPackageData cmd. This command should be sent
+ * as part of the NVM update as the first cmd in the flow.
+ */
+
+int
+ice_nvm_set_pkg_data(struct ice_hw *hw, bool del_pkg_data_flag, u8 *data,
+		     u16 length, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_nvm_pkg_data *cmd;
+	struct ice_aq_desc desc;
+
+	if (length != 0 && !data)
+		return -EINVAL;
+
+	cmd = &desc.params.pkg_data;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_pkg_data);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	if (del_pkg_data_flag)
+		cmd->cmd_flags |= ICE_AQC_NVM_PKG_DELETE;
+
+	return ice_aq_send_cmd(hw, &desc, data, length, cd);
+}
+
+/* ice_nvm_pass_component_tbl
+ * @hw: pointer to the HW struct
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ * @transfer_flag: parameter for determining stage of the update
+ * @comp_response: a pointer to the response from the 0x070B AQC.
+ * @comp_response_code: a pointer to the response code from the 0x070B AQC.
+ * @cd: pointer to command details structure or NULL
+ *
+ * Pass component table (0x070B). This command is equivalent to the reception
+ * of a PLDM FW Update PassComponentTable cmd. This command should be sent once
+ * per component. It can be only sent after Set Package Data cmd and before
+ * actual update. FW will assume these commands are going to be sent until
+ * the TransferFlag is set to End or StartAndEnd.
+ */
+
+int
+ice_nvm_pass_component_tbl(struct ice_hw *hw, u8 *data, u16 length,
+			   u8 transfer_flag, u8 *comp_response,
+			   u8 *comp_response_code, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_nvm_pass_comp_tbl *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	if (!data || !comp_response || !comp_response_code)
+		return -EINVAL;
+
+	cmd = &desc.params.pass_comp_tbl;
+
+	ice_fill_dflt_direct_cmd_desc(&desc,
+				      ice_aqc_opc_nvm_pass_component_tbl);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	cmd->transfer_flag = transfer_flag;
+	status = ice_aq_send_cmd(hw, &desc, data, length, cd);
+
+	if (!status) {
+		*comp_response = cmd->component_response;
+		*comp_response_code = cmd->component_response_code;
+	}
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
new file mode 100644
index 0000000000..774c231796
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_NVM_H_
+#define _ICE_NVM_H_
+
+struct ice_orom_civd_info {
+	u8 signature[4];	/* Must match ASCII '$CIV' characters */
+	u8 checksum;		/* Simple modulo 256 sum of all structure bytes must equal 0 */
+	__le32 combo_ver;	/* Combo Image Version number */
+	u8 combo_name_len;	/* Length of the unicode combo image version string, max of 32 */
+	__le16 combo_name[32];	/* Unicode string representing the Combo Image version */
+} __packed;
+
+int ice_acquire_nvm(struct ice_hw *hw, enum ice_aq_res_access_type access);
+void ice_release_nvm(struct ice_hw *hw);
+int
+ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
+		  bool read_shadow_ram);
+int
+ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
+		       u16 module_type);
+int ice_get_inactive_orom_ver(struct ice_hw *hw, struct ice_orom_info *orom);
+int ice_get_inactive_nvm_ver(struct ice_hw *hw, struct ice_nvm_info *nvm);
+int
+ice_get_inactive_netlist_ver(struct ice_hw *hw, struct ice_netlist_info *netlist);
+int ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size);
+int ice_init_nvm(struct ice_hw *hw);
+int ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
+int
+ice_aq_update_nvm(struct ice_hw *hw, u16 module_typeid, u32 offset,
+		  u16 length, void *data, bool last_command, u8 command_flags,
+		  struct ice_sq_cd *cd);
+int
+ice_aq_erase_nvm(struct ice_hw *hw, u16 module_typeid, struct ice_sq_cd *cd);
+int ice_nvm_validate_checksum(struct ice_hw *hw);
+int ice_nvm_write_activate(struct ice_hw *hw, u16 cmd_flags, u8 *response_flags);
+int ice_aq_nvm_update_empr(struct ice_hw *hw);
+int
+ice_nvm_set_pkg_data(struct ice_hw *hw, bool del_pkg_data_flag, u8 *data,
+		     u16 length, struct ice_sq_cd *cd);
+int
+ice_nvm_pass_component_tbl(struct ice_hw *hw, u8 *data, u16 length,
+			   u8 transfer_flag, u8 *comp_response,
+			   u8 *comp_response_code, struct ice_sq_cd *cd);
+#endif /* _ICE_NVM_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_osdep.h b/drivers/net/ethernet/intel/ice/ice_osdep.h
new file mode 100644
index 0000000000..82bc54fec7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_osdep.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_OSDEP_H_
+#define _ICE_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/pci_ids.h>
+#ifndef CONFIG_64BIT
+#include <linux/io-64-nonatomic-lo-hi.h>
+#endif
+#include <net/udp_tunnel.h>
+
+#define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)		readl((a)->hw_addr + (reg))
+#define wr64(a, reg, value)	writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)		readq((a)->hw_addr + (reg))
+
+#define ice_flush(a)		rd32((a), GLGEN_STAT)
+#define ICE_M(m, s)		((m) << (s))
+
+struct ice_dma_mem {
+	void *va;
+	dma_addr_t pa;
+	size_t size;
+};
+
+struct ice_hw;
+struct device *ice_hw_to_dev(struct ice_hw *hw);
+
+#ifdef CONFIG_DYNAMIC_DEBUG
+#define ice_debug(hw, type, fmt, args...) \
+	dev_dbg(ice_hw_to_dev(hw), fmt, ##args)
+
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+	print_hex_dump_debug(KBUILD_MODNAME " ",		\
+			     DUMP_PREFIX_OFFSET, rowsize,	\
+			     groupsize, buf, len, false)
+#else
+#define ice_debug(hw, type, fmt, args...)			\
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		dev_info(ice_hw_to_dev(hw), fmt, ##args);	\
+} while (0)
+
+#ifdef DEBUG
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	if ((type) & (hw)->debug_mask)				\
+		print_hex_dump_debug(KBUILD_MODNAME,		\
+				     DUMP_PREFIX_OFFSET,	\
+				     rowsize, groupsize, buf,	\
+				     len, false);		\
+} while (0)
+#else
+#define ice_debug_array(hw, type, rowsize, groupsize, buf, len) \
+do {								\
+	struct ice_hw *hw_l = hw;				\
+	if ((type) & (hw_l)->debug_mask) {			\
+		u16 len_l = len;				\
+		u8 *buf_l = buf;				\
+		int i;						\
+		for (i = 0; i < (len_l - 16); i += 16)		\
+			ice_debug(hw_l, type, "0x%04X  %16ph\n",\
+				  i, ((buf_l) + i));		\
+		if (i < len_l)					\
+			ice_debug(hw_l, type, "0x%04X  %*ph\n", \
+				  i, ((len_l) - i), ((buf_l) + i));\
+	}							\
+} while (0)
+#endif /* DEBUG */
+#endif /* CONFIG_DYNAMIC_DEBUG */
+
+#endif /* _ICE_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c
new file mode 100644
index 0000000000..976a03d3bd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_ops.h"
+#include "ice_vsi_vlan_lib.h"
+#include "ice_vlan_mode.h"
+#include "ice.h"
+#include "ice_pf_vsi_vlan_ops.h"
+
+void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+
+	if (ice_is_dvm_ena(&vsi->back->hw)) {
+		vlan_ops = &vsi->outer_vlan_ops;
+
+		vlan_ops->add_vlan = ice_vsi_add_vlan;
+		vlan_ops->del_vlan = ice_vsi_del_vlan;
+		vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
+		vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+		vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
+		vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+		vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+		vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+	} else {
+		vlan_ops = &vsi->inner_vlan_ops;
+
+		vlan_ops->add_vlan = ice_vsi_add_vlan;
+		vlan_ops->del_vlan = ice_vsi_del_vlan;
+		vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+		vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+		vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+		vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+		vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+		vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+	}
+}
+
diff --git a/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h
new file mode 100644
index 0000000000..6741ec8c5f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_pf_vsi_vlan_ops.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_PF_VSI_VLAN_OPS_H_
+#define _ICE_PF_VSI_VLAN_OPS_H_
+
+#include "ice_vsi_vlan_ops.h"
+
+struct ice_vsi;
+
+void ice_pf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+
+#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
new file mode 100644
index 0000000000..f6f27361c3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -0,0 +1,468 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_PROTOCOL_TYPE_H_
+#define _ICE_PROTOCOL_TYPE_H_
+#define ICE_IPV6_ADDR_LENGTH 16
+
+/* Each recipe can match up to 5 different fields. Fields to match can be meta-
+ * data, values extracted from packet headers, or results from other recipes.
+ * One of the 5 fields is reserved for matching the switch ID. So, up to 4
+ * recipes can provide intermediate results to another one through chaining,
+ * e.g. recipes 0, 1, 2, and 3 can provide intermediate results to recipe 4.
+ */
+#define ICE_NUM_WORDS_RECIPE 4
+
+/* Max recipes that can be chained */
+#define ICE_MAX_CHAIN_RECIPE 5
+
+/* 1 word reserved for switch ID from allowed 5 words.
+ * So a recipe can have max 4 words. And you can chain 5 such recipes
+ * together. So maximum words that can be programmed for look up is 5 * 4.
+ */
+#define ICE_MAX_CHAIN_WORDS (ICE_NUM_WORDS_RECIPE * ICE_MAX_CHAIN_RECIPE)
+
+/* Field vector index corresponding to chaining */
+#define ICE_CHAIN_FV_INDEX_START 47
+
+enum ice_protocol_type {
+	ICE_MAC_OFOS = 0,
+	ICE_MAC_IL,
+	ICE_ETYPE_OL,
+	ICE_ETYPE_IL,
+	ICE_VLAN_OFOS,
+	ICE_IPV4_OFOS,
+	ICE_IPV4_IL,
+	ICE_IPV6_OFOS,
+	ICE_IPV6_IL,
+	ICE_TCP_IL,
+	ICE_UDP_OF,
+	ICE_UDP_ILOS,
+	ICE_VXLAN,
+	ICE_GENEVE,
+	ICE_NVGRE,
+	ICE_GTP,
+	ICE_GTP_NO_PAY,
+	ICE_PPPOE,
+	ICE_L2TPV3,
+	ICE_VLAN_EX,
+	ICE_VLAN_IN,
+	ICE_HW_METADATA,
+	ICE_VXLAN_GPE,
+	ICE_SCTP_IL,
+	ICE_PROTOCOL_LAST
+};
+
+enum ice_sw_tunnel_type {
+	ICE_NON_TUN = 0,
+	ICE_SW_TUN_AND_NON_TUN,
+	ICE_SW_TUN_VXLAN,
+	ICE_SW_TUN_GENEVE,
+	ICE_SW_TUN_NVGRE,
+	ICE_SW_TUN_GTPU,
+	ICE_SW_TUN_GTPC,
+	ICE_ALL_TUNNELS /* All tunnel types including NVGRE */
+};
+
+/* Decoders for ice_prot_id:
+ * - F: First
+ * - I: Inner
+ * - L: Last
+ * - O: Outer
+ * - S: Single
+ */
+enum ice_prot_id {
+	ICE_PROT_ID_INVAL	= 0,
+	ICE_PROT_MAC_OF_OR_S	= 1,
+	ICE_PROT_MAC_IL		= 4,
+	ICE_PROT_ETYPE_OL	= 9,
+	ICE_PROT_ETYPE_IL	= 10,
+	ICE_PROT_IPV4_OF_OR_S	= 32,
+	ICE_PROT_IPV4_IL	= 33,
+	ICE_PROT_IPV6_OF_OR_S	= 40,
+	ICE_PROT_IPV6_IL	= 41,
+	ICE_PROT_TCP_IL		= 49,
+	ICE_PROT_UDP_OF		= 52,
+	ICE_PROT_UDP_IL_OR_S	= 53,
+	ICE_PROT_GRE_OF		= 64,
+	ICE_PROT_ESP_F		= 88,
+	ICE_PROT_ESP_2		= 89,
+	ICE_PROT_SCTP_IL	= 96,
+	ICE_PROT_ICMP_IL	= 98,
+	ICE_PROT_ICMPV6_IL	= 100,
+	ICE_PROT_PPPOE		= 103,
+	ICE_PROT_L2TPV3		= 104,
+	ICE_PROT_ARP_OF		= 118,
+	ICE_PROT_META_ID	= 255, /* when offset == metadata */
+	ICE_PROT_INVALID	= 255  /* when offset == ICE_FV_OFFSET_INVAL */
+};
+
+#define ICE_VNI_OFFSET		12 /* offset of VNI from ICE_PROT_UDP_OF */
+
+#define ICE_MAC_OFOS_HW		1
+#define ICE_MAC_IL_HW		4
+#define ICE_ETYPE_OL_HW		9
+#define ICE_ETYPE_IL_HW		10
+#define ICE_VLAN_OF_HW		16
+#define ICE_VLAN_OL_HW		17
+#define ICE_IPV4_OFOS_HW	32
+#define ICE_IPV4_IL_HW		33
+#define ICE_IPV6_OFOS_HW	40
+#define ICE_IPV6_IL_HW		41
+#define ICE_TCP_IL_HW		49
+#define ICE_UDP_ILOS_HW		53
+#define ICE_GRE_OF_HW		64
+#define ICE_PPPOE_HW		103
+#define ICE_L2TPV3_HW		104
+
+#define ICE_UDP_OF_HW	52 /* UDP Tunnels */
+
+
+#define ICE_TUN_FLAG_FV_IND 2
+
+/* Mapping of software defined protocol ID to hardware defined protocol ID */
+struct ice_protocol_entry {
+	enum ice_protocol_type type;
+	u8 protocol_id;
+};
+
+struct ice_ether_hdr {
+	u8 dst_addr[ETH_ALEN];
+	u8 src_addr[ETH_ALEN];
+};
+
+struct ice_ethtype_hdr {
+	__be16 ethtype_id;
+};
+
+struct ice_ether_vlan_hdr {
+	u8 dst_addr[ETH_ALEN];
+	u8 src_addr[ETH_ALEN];
+	__be32 vlan_id;
+};
+
+struct ice_vlan_hdr {
+	__be16 type;
+	__be16 vlan;
+};
+
+struct ice_ipv4_hdr {
+	u8 version;
+	u8 tos;
+	__be16 total_length;
+	__be16 id;
+	__be16 frag_off;
+	u8 time_to_live;
+	u8 protocol;
+	__be16 check;
+	__be32 src_addr;
+	__be32 dst_addr;
+};
+
+struct ice_ipv6_hdr {
+	__be32 be_ver_tc_flow;
+	__be16 payload_len;
+	u8 next_hdr;
+	u8 hop_limit;
+	u8 src_addr[ICE_IPV6_ADDR_LENGTH];
+	u8 dst_addr[ICE_IPV6_ADDR_LENGTH];
+};
+
+struct ice_sctp_hdr {
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 verification_tag;
+	__be32 check;
+};
+
+struct ice_l4_hdr {
+	__be16 src_port;
+	__be16 dst_port;
+	__be16 len;
+	__be16 check;
+};
+
+struct ice_udp_tnl_hdr {
+	__be16 field;
+	__be16 proto_type;
+	__be32 vni;     /* only use lower 24-bits */
+};
+
+struct ice_udp_gtp_hdr {
+	u8 flags;
+	u8 msg_type;
+	__be16 rsrvd_len;
+	__be32 teid;
+	__be16 rsrvd_seq_nbr;
+	u8 rsrvd_n_pdu_nbr;
+	u8 rsrvd_next_ext;
+	u8 rsvrd_ext_len;
+	u8 pdu_type;
+	u8 qfi;
+	u8 rsvrd;
+};
+
+struct ice_pppoe_hdr {
+	u8 rsrvd_ver_type;
+	u8 rsrvd_code;
+	__be16 session_id;
+	__be16 length;
+	__be16 ppp_prot_id; /* control and data only */
+};
+
+struct ice_l2tpv3_sess_hdr {
+	__be32 session_id;
+	__be64 cookie;
+};
+
+struct ice_nvgre_hdr {
+	__be16 flags;
+	__be16 protocol;
+	__be32 tni_flow;
+};
+
+/* Metadata information
+ *
+ * Not all MDIDs can be used by switch block. It depends on package version.
+ *
+ * MDID 16 (Rx offset)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  A  |   B     |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = Source port where the transaction came from (3b).
+ *
+ * B = Destination TC of the packet. The TC is relative to a port (5b).
+ *
+ * MDID 17
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      PTYPE        | Reserved  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * PTYPE = Encodes the packet type (10b).
+ *
+ * MDID 18
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Packet length             | R |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Packet length = Length of the packet in bytes
+ *		   (packet always carriers CRC) (14b).
+ * R = Reserved (2b).
+ *
+ * MDID 19
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Source VSI      | Reserved  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Source VSI = Source VSI of packet loopbacked in switch (for egress) (10b).
+ */
+#define ICE_MDID_SOURCE_VSI_MASK GENMASK(9, 0)
+
+/*
+ * MDID 20
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|E|F|R|R|G|H|I|J|K|L|M|N|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = DSI - set for DSI RX pkts.
+ * B = ipsec_decrypted - invalid on NIC.
+ * C = marker - this is a marker packet.
+ * D = from_network - for TX sets to 0
+ *		      for RX:
+ *		        * 1 - packet is from external link
+ *		        * 0 - packet source is from internal
+ * E = source_interface_is_rx - reflect the physical interface from where the
+ *				packet was received:
+ *				* 1 - Rx
+ *				* 0 - Tx
+ * F = from_mng - The bit signals that the packet's origin is the management.
+ * G = ucast - Outer L2 MAC address is unicast.
+ * H = mcast - Outer L2 MAC address is multicast.
+ * I = bcast - Outer L2 MAC address is broadcast.
+ * J = second_outer_mac_present - 2 outer MAC headers are present in the packet.
+ * K = STAG or BVLAN - Outer L2 header has STAG (ethernet type 0x88a8) or
+ *		       BVLAN (ethernet type 0x88a8).
+ * L = ITAG - Outer L2 header has ITAG *ethernet type 0x88e7)
+ * M = EVLAN (0x8100) - Outer L2 header has EVLAN (ethernet type 0x8100)
+ * N = EVLAN (0x9100) - Outer L2 header has EVLAN (ethernet type 0x9100)
+ */
+#define ICE_PKT_FROM_NETWORK	BIT(3)
+#define ICE_PKT_VLAN_STAG	BIT(12)
+#define ICE_PKT_VLAN_ITAG	BIT(13)
+#define ICE_PKT_VLAN_EVLAN	(BIT(14) | BIT(15))
+#define ICE_PKT_VLAN_MASK	(ICE_PKT_VLAN_STAG | ICE_PKT_VLAN_ITAG | \
+				ICE_PKT_VLAN_EVLAN)
+/* MDID 21
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|E|F|G|H|I|J|R|R|K|L|M|N|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = VLAN (0x8100) - Outer L2 header has VLAN (ethernet type 0x8100)
+ * B = NSHoE - Outer L2 header has NSH (ethernet type 0x894f)
+ * C = MPLS (0x8847) - There is at least 1 MPLS tag in the outer header
+ *		       (ethernet type 0x8847)
+ * D = MPLS (0x8848) - There is at least 1 MPLS tag in the outer header
+ *		       (ethernet type 0x8848)
+ * E = multi MPLS - There is more than a single MPLS tag in the outer header
+ * F = inner MPLS - There is inner MPLS tag in the packet
+ * G = tunneled MAC - Set if the packet includes a tunneled MAC
+ * H = tunneled VLAN - Same as VLAN, but for a tunneled header
+ * I = pkt_is_frag - Packet is fragmented (ipv4 or ipv6)
+ * J = ipv6_ext - The packet has routing or destination ipv6 extension in inner
+ *		  or outer ipv6 headers
+ * K = RoCE - UDP packet detected as RoCEv2
+ * L = UDP_XSUM_0 - Set to 1 if L4 checksum is 0 in a UDP packet
+ * M = ESP - This is a ESP packet
+ * N = NAT_ESP - This is a ESP packet encapsulated in UDP NAT
+ */
+#define ICE_PKT_TUNNEL_MAC	BIT(6)
+#define ICE_PKT_TUNNEL_VLAN	BIT(7)
+#define ICE_PKT_TUNNEL_MASK	(ICE_PKT_TUNNEL_MAC | ICE_PKT_TUNNEL_VLAN)
+
+/* MDID 22
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|E|F|  G  |H|I|J| K |L|M|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = fin - fin flag in tcp header
+ * B = sync - sync flag in tcp header
+ * C = rst - rst flag in tcp header
+ * D = psh - psh flag in tcp header
+ * E = ack - ack flag in tcp header
+ * F = urg - urg flag in tcp header
+ * G = tunnel type (3b) - Flags used to decode tunnel type:
+ *			  * b000 - not a VXLAN/Geneve/GRE tunnel
+ *			  * b001 - VXLAN-GPE
+ *			  * b010 - VXLAN (non-GPE)
+ *			  * b011 - Geneve
+ *			  * b100 - GRE (no key, no xsum)
+ *			  * b101 - GREK (key, no xsum)
+ *			  * b110 - GREC (no key, xsum)
+ *			  * b111 - GREKC (key, xsum)
+ * H = UDP_GRE - Packet is UDP (VXLAN or VLAN_GPE or Geneve or MPLSoUDP or GRE)
+ *		 tunnel
+ * I = OAM - VXLAN/Geneve/tunneled NSH packet with the OAM bit set
+ * J = tunneled NSH - Packet has NSHoGRE or NSHoUDP
+ * K = switch (2b) - Direction on switch
+ *		     * b00 - normal
+ *		     * b01 - TX force only LAN
+ *		     * b10 - TX disable LAN
+ *		     * b11 - direct to VSI
+ * L = swpe - Represents SWPE bit in TX command
+ * M = sw_cmd - Switch command
+ *
+ * MDID 23
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |A|B|C|D|        R        |E|F|R|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * A = MAC error - Produced by MAC according to L2 error conditions
+ * B = PPRS no offload - FIFO overflow in PPRS or any problematic condition in
+ *			 PPRS ANA
+ * C = abort - Set when malicious packet is detected
+ * D = partial analysis - ANA's analysing got cut in the middle
+ *			 (header > 504B etc.)
+ * E = FLM - Flow director hit indication
+ * F = FDLONG - Flow direector long bucket indication
+ *
+ */
+#define ICE_MDID_SIZE 2
+#define ICE_META_DATA_ID_HW 255
+
+enum ice_hw_metadata_id {
+	ICE_SOURCE_PORT_MDID = 16,
+	ICE_PTYPE_MDID = 17,
+	ICE_PACKET_LENGTH_MDID = 18,
+	ICE_SOURCE_VSI_MDID = 19,
+	ICE_PKT_VLAN_MDID = 20,
+	ICE_PKT_TUNNEL_MDID = 21,
+	ICE_PKT_TCP_MDID = 22,
+	ICE_PKT_ERROR_MDID = 23,
+};
+
+enum ice_hw_metadata_offset {
+	ICE_SOURCE_PORT_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_PORT_MDID,
+	ICE_PTYPE_MDID_OFFSET = ICE_MDID_SIZE * ICE_PTYPE_MDID,
+	ICE_PACKET_LENGTH_MDID_OFFSET = ICE_MDID_SIZE * ICE_PACKET_LENGTH_MDID,
+	ICE_SOURCE_VSI_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_VSI_MDID,
+	ICE_PKT_VLAN_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_VLAN_MDID,
+	ICE_PKT_TUNNEL_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TUNNEL_MDID,
+	ICE_PKT_TCP_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TCP_MDID,
+	ICE_PKT_ERROR_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_ERROR_MDID,
+};
+
+enum ice_pkt_flags {
+	ICE_PKT_FLAGS_MDID20 = 0,
+	ICE_PKT_FLAGS_MDID21 = 1,
+	ICE_PKT_FLAGS_MDID22 = 2,
+	ICE_PKT_FLAGS_MDID23 = 3,
+};
+
+struct ice_hw_metadata {
+	__be16 source_port;
+	__be16 ptype;
+	__be16 packet_length;
+	__be16 source_vsi;
+	__be16 flags[4];
+};
+
+union ice_prot_hdr {
+	struct ice_ether_hdr eth_hdr;
+	struct ice_ethtype_hdr ethertype;
+	struct ice_vlan_hdr vlan_hdr;
+	struct ice_ipv4_hdr ipv4_hdr;
+	struct ice_ipv6_hdr ipv6_hdr;
+	struct ice_l4_hdr l4_hdr;
+	struct ice_sctp_hdr sctp_hdr;
+	struct ice_udp_tnl_hdr tnl_hdr;
+	struct ice_nvgre_hdr nvgre_hdr;
+	struct ice_udp_gtp_hdr gtp_hdr;
+	struct ice_pppoe_hdr pppoe_hdr;
+	struct ice_l2tpv3_sess_hdr l2tpv3_sess_hdr;
+	struct ice_hw_metadata metadata;
+};
+
+/* This is mapping table entry that maps every word within a given protocol
+ * structure to the real byte offset as per the specification of that
+ * protocol header.
+ * for e.g. dst address is 3 words in ethertype header and corresponding bytes
+ * are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8
+ */
+struct ice_prot_ext_tbl_entry {
+	enum ice_protocol_type prot_type;
+	/* Byte offset into header of given protocol type */
+	u8 offs[sizeof(union ice_prot_hdr)];
+};
+
+/* Extractions to be looked up for a given recipe */
+struct ice_prot_lkup_ext {
+	u16 prot_type;
+	u8 n_val_words;
+	/* create a buffer to hold max words per recipe */
+	u16 field_off[ICE_MAX_CHAIN_WORDS];
+	u16 field_mask[ICE_MAX_CHAIN_WORDS];
+
+	struct ice_fv_word fv_words[ICE_MAX_CHAIN_WORDS];
+
+	/* Indicate field offsets that have field vector indices assigned */
+	DECLARE_BITMAP(done, ICE_MAX_CHAIN_WORDS);
+};
+
+struct ice_pref_recipe_group {
+	u8 n_val_pairs;		/* Number of valid pairs */
+	struct ice_fv_word pairs[ICE_NUM_WORDS_RECIPE];
+	u16 mask[ICE_NUM_WORDS_RECIPE];
+};
+
+struct ice_recp_grp_entry {
+	struct list_head l_entry;
+
+#define ICE_INVAL_CHAIN_IND 0xFF
+	u16 rid;
+	u8 chain_idx;
+	u16 fv_idx[ICE_NUM_WORDS_RECIPE];
+	u16 fv_mask[ICE_NUM_WORDS_RECIPE];
+	struct ice_pref_recipe_group r_group;
+};
+#endif /* _ICE_PROTOCOL_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
new file mode 100644
index 0000000000..c4270708a7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -0,0 +1,2792 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_trace.h"
+
+#define E810_OUT_PROP_DELAY_NS 1
+
+#define UNKNOWN_INCVAL_E822 0x100000000ULL
+
+static const struct ptp_pin_desc ice_pin_desc_e810t[] = {
+	/* name    idx   func         chan */
+	{ "GNSS",  GNSS, PTP_PF_EXTTS, 0, { 0, } },
+	{ "SMA1",  SMA1, PTP_PF_NONE, 1, { 0, } },
+	{ "U.FL1", UFL1, PTP_PF_NONE, 1, { 0, } },
+	{ "SMA2",  SMA2, PTP_PF_NONE, 2, { 0, } },
+	{ "U.FL2", UFL2, PTP_PF_NONE, 2, { 0, } },
+};
+
+/**
+ * ice_get_sma_config_e810t
+ * @hw: pointer to the hw struct
+ * @ptp_pins: pointer to the ptp_pin_desc struture
+ *
+ * Read the configuration of the SMA control logic and put it into the
+ * ptp_pin_desc structure
+ */
+static int
+ice_get_sma_config_e810t(struct ice_hw *hw, struct ptp_pin_desc *ptp_pins)
+{
+	u8 data, i;
+	int status;
+
+	/* Read initial pin state */
+	status = ice_read_sma_ctrl_e810t(hw, &data);
+	if (status)
+		return status;
+
+	/* initialize with defaults */
+	for (i = 0; i < NUM_PTP_PINS_E810T; i++) {
+		snprintf(ptp_pins[i].name, sizeof(ptp_pins[i].name),
+			 "%s", ice_pin_desc_e810t[i].name);
+		ptp_pins[i].index = ice_pin_desc_e810t[i].index;
+		ptp_pins[i].func = ice_pin_desc_e810t[i].func;
+		ptp_pins[i].chan = ice_pin_desc_e810t[i].chan;
+	}
+
+	/* Parse SMA1/UFL1 */
+	switch (data & ICE_SMA1_MASK_E810T) {
+	case ICE_SMA1_MASK_E810T:
+	default:
+		ptp_pins[SMA1].func = PTP_PF_NONE;
+		ptp_pins[UFL1].func = PTP_PF_NONE;
+		break;
+	case ICE_SMA1_DIR_EN_E810T:
+		ptp_pins[SMA1].func = PTP_PF_PEROUT;
+		ptp_pins[UFL1].func = PTP_PF_NONE;
+		break;
+	case ICE_SMA1_TX_EN_E810T:
+		ptp_pins[SMA1].func = PTP_PF_EXTTS;
+		ptp_pins[UFL1].func = PTP_PF_NONE;
+		break;
+	case 0:
+		ptp_pins[SMA1].func = PTP_PF_EXTTS;
+		ptp_pins[UFL1].func = PTP_PF_PEROUT;
+		break;
+	}
+
+	/* Parse SMA2/UFL2 */
+	switch (data & ICE_SMA2_MASK_E810T) {
+	case ICE_SMA2_MASK_E810T:
+	default:
+		ptp_pins[SMA2].func = PTP_PF_NONE;
+		ptp_pins[UFL2].func = PTP_PF_NONE;
+		break;
+	case (ICE_SMA2_TX_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T):
+		ptp_pins[SMA2].func = PTP_PF_EXTTS;
+		ptp_pins[UFL2].func = PTP_PF_NONE;
+		break;
+	case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T):
+		ptp_pins[SMA2].func = PTP_PF_PEROUT;
+		ptp_pins[UFL2].func = PTP_PF_NONE;
+		break;
+	case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T):
+		ptp_pins[SMA2].func = PTP_PF_NONE;
+		ptp_pins[UFL2].func = PTP_PF_EXTTS;
+		break;
+	case ICE_SMA2_DIR_EN_E810T:
+		ptp_pins[SMA2].func = PTP_PF_PEROUT;
+		ptp_pins[UFL2].func = PTP_PF_EXTTS;
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_set_sma_config_e810t
+ * @hw: pointer to the hw struct
+ * @ptp_pins: pointer to the ptp_pin_desc struture
+ *
+ * Set the configuration of the SMA control logic based on the configuration in
+ * num_pins parameter
+ */
+static int
+ice_ptp_set_sma_config_e810t(struct ice_hw *hw,
+			     const struct ptp_pin_desc *ptp_pins)
+{
+	int status;
+	u8 data;
+
+	/* SMA1 and UFL1 cannot be set to TX at the same time */
+	if (ptp_pins[SMA1].func == PTP_PF_PEROUT &&
+	    ptp_pins[UFL1].func == PTP_PF_PEROUT)
+		return -EINVAL;
+
+	/* SMA2 and UFL2 cannot be set to RX at the same time */
+	if (ptp_pins[SMA2].func == PTP_PF_EXTTS &&
+	    ptp_pins[UFL2].func == PTP_PF_EXTTS)
+		return -EINVAL;
+
+	/* Read initial pin state value */
+	status = ice_read_sma_ctrl_e810t(hw, &data);
+	if (status)
+		return status;
+
+	/* Set the right sate based on the desired configuration */
+	data &= ~ICE_SMA1_MASK_E810T;
+	if (ptp_pins[SMA1].func == PTP_PF_NONE &&
+	    ptp_pins[UFL1].func == PTP_PF_NONE) {
+		dev_info(ice_hw_to_dev(hw), "SMA1 + U.FL1 disabled");
+		data |= ICE_SMA1_MASK_E810T;
+	} else if (ptp_pins[SMA1].func == PTP_PF_EXTTS &&
+		   ptp_pins[UFL1].func == PTP_PF_NONE) {
+		dev_info(ice_hw_to_dev(hw), "SMA1 RX");
+		data |= ICE_SMA1_TX_EN_E810T;
+	} else if (ptp_pins[SMA1].func == PTP_PF_NONE &&
+		   ptp_pins[UFL1].func == PTP_PF_PEROUT) {
+		/* U.FL 1 TX will always enable SMA 1 RX */
+		dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX");
+	} else if (ptp_pins[SMA1].func == PTP_PF_EXTTS &&
+		   ptp_pins[UFL1].func == PTP_PF_PEROUT) {
+		dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX");
+	} else if (ptp_pins[SMA1].func == PTP_PF_PEROUT &&
+		   ptp_pins[UFL1].func == PTP_PF_NONE) {
+		dev_info(ice_hw_to_dev(hw), "SMA1 TX");
+		data |= ICE_SMA1_DIR_EN_E810T;
+	}
+
+	data &= ~ICE_SMA2_MASK_E810T;
+	if (ptp_pins[SMA2].func == PTP_PF_NONE &&
+	    ptp_pins[UFL2].func == PTP_PF_NONE) {
+		dev_info(ice_hw_to_dev(hw), "SMA2 + U.FL2 disabled");
+		data |= ICE_SMA2_MASK_E810T;
+	} else if (ptp_pins[SMA2].func == PTP_PF_EXTTS &&
+			ptp_pins[UFL2].func == PTP_PF_NONE) {
+		dev_info(ice_hw_to_dev(hw), "SMA2 RX");
+		data |= (ICE_SMA2_TX_EN_E810T |
+			 ICE_SMA2_UFL2_RX_DIS_E810T);
+	} else if (ptp_pins[SMA2].func == PTP_PF_NONE &&
+		   ptp_pins[UFL2].func == PTP_PF_EXTTS) {
+		dev_info(ice_hw_to_dev(hw), "UFL2 RX");
+		data |= (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T);
+	} else if (ptp_pins[SMA2].func == PTP_PF_PEROUT &&
+		   ptp_pins[UFL2].func == PTP_PF_NONE) {
+		dev_info(ice_hw_to_dev(hw), "SMA2 TX");
+		data |= (ICE_SMA2_DIR_EN_E810T |
+			 ICE_SMA2_UFL2_RX_DIS_E810T);
+	} else if (ptp_pins[SMA2].func == PTP_PF_PEROUT &&
+		   ptp_pins[UFL2].func == PTP_PF_EXTTS) {
+		dev_info(ice_hw_to_dev(hw), "SMA2 TX + U.FL2 RX");
+		data |= ICE_SMA2_DIR_EN_E810T;
+	}
+
+	return ice_write_sma_ctrl_e810t(hw, data);
+}
+
+/**
+ * ice_ptp_set_sma_e810t
+ * @info: the driver's PTP info structure
+ * @pin: pin index in kernel structure
+ * @func: Pin function to be set (PTP_PF_NONE, PTP_PF_EXTTS or PTP_PF_PEROUT)
+ *
+ * Set the configuration of a single SMA pin
+ */
+static int
+ice_ptp_set_sma_e810t(struct ptp_clock_info *info, unsigned int pin,
+		      enum ptp_pin_function func)
+{
+	struct ptp_pin_desc ptp_pins[NUM_PTP_PINS_E810T];
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	if (pin < SMA1 || func > PTP_PF_PEROUT)
+		return -EOPNOTSUPP;
+
+	err = ice_get_sma_config_e810t(hw, ptp_pins);
+	if (err)
+		return err;
+
+	/* Disable the same function on the other pin sharing the channel */
+	if (pin == SMA1 && ptp_pins[UFL1].func == func)
+		ptp_pins[UFL1].func = PTP_PF_NONE;
+	if (pin == UFL1 && ptp_pins[SMA1].func == func)
+		ptp_pins[SMA1].func = PTP_PF_NONE;
+
+	if (pin == SMA2 && ptp_pins[UFL2].func == func)
+		ptp_pins[UFL2].func = PTP_PF_NONE;
+	if (pin == UFL2 && ptp_pins[SMA2].func == func)
+		ptp_pins[SMA2].func = PTP_PF_NONE;
+
+	/* Set up new pin function in the temp table */
+	ptp_pins[pin].func = func;
+
+	return ice_ptp_set_sma_config_e810t(hw, ptp_pins);
+}
+
+/**
+ * ice_verify_pin_e810t
+ * @info: the driver's PTP info structure
+ * @pin: Pin index
+ * @func: Assigned function
+ * @chan: Assigned channel
+ *
+ * Verify if pin supports requested pin function. If the Check pins consistency.
+ * Reconfigure the SMA logic attached to the given pin to enable its
+ * desired functionality
+ */
+static int
+ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin,
+		     enum ptp_pin_function func, unsigned int chan)
+{
+	/* Don't allow channel reassignment */
+	if (chan != ice_pin_desc_e810t[pin].chan)
+		return -EOPNOTSUPP;
+
+	/* Check if functions are properly assigned */
+	switch (func) {
+	case PTP_PF_NONE:
+		break;
+	case PTP_PF_EXTTS:
+		if (pin == UFL1)
+			return -EOPNOTSUPP;
+		break;
+	case PTP_PF_PEROUT:
+		if (pin == UFL2 || pin == GNSS)
+			return -EOPNOTSUPP;
+		break;
+	case PTP_PF_PHYSYNC:
+		return -EOPNOTSUPP;
+	}
+
+	return ice_ptp_set_sma_e810t(info, pin, func);
+}
+
+/**
+ * ice_set_tx_tstamp - Enable or disable Tx timestamping
+ * @pf: The PF pointer to search in
+ * @on: bool value for whether timestamps are enabled or disabled
+ */
+static void ice_set_tx_tstamp(struct ice_pf *pf, bool on)
+{
+	struct ice_vsi *vsi;
+	u32 val;
+	u16 i;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Set the timestamp enable flag for all the Tx rings */
+	ice_for_each_txq(vsi, i) {
+		if (!vsi->tx_rings[i])
+			continue;
+		vsi->tx_rings[i]->ptp_tx = on;
+	}
+
+	/* Configure the Tx timestamp interrupt */
+	val = rd32(&pf->hw, PFINT_OICR_ENA);
+	if (on)
+		val |= PFINT_OICR_TSYN_TX_M;
+	else
+		val &= ~PFINT_OICR_TSYN_TX_M;
+	wr32(&pf->hw, PFINT_OICR_ENA, val);
+
+	pf->ptp.tstamp_config.tx_type = on ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+}
+
+/**
+ * ice_set_rx_tstamp - Enable or disable Rx timestamping
+ * @pf: The PF pointer to search in
+ * @on: bool value for whether timestamps are enabled or disabled
+ */
+static void ice_set_rx_tstamp(struct ice_pf *pf, bool on)
+{
+	struct ice_vsi *vsi;
+	u16 i;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return;
+
+	/* Set the timestamp flag for all the Rx rings */
+	ice_for_each_rxq(vsi, i) {
+		if (!vsi->rx_rings[i])
+			continue;
+		vsi->rx_rings[i]->ptp_rx = on;
+	}
+
+	pf->ptp.tstamp_config.rx_filter = on ? HWTSTAMP_FILTER_ALL :
+					       HWTSTAMP_FILTER_NONE;
+}
+
+/**
+ * ice_ptp_cfg_timestamp - Configure timestamp for init/deinit
+ * @pf: Board private structure
+ * @ena: bool value to enable or disable time stamp
+ *
+ * This function will configure timestamping during PTP initialization
+ * and deinitialization
+ */
+void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena)
+{
+	ice_set_tx_tstamp(pf, ena);
+	ice_set_rx_tstamp(pf, ena);
+}
+
+/**
+ * ice_get_ptp_clock_index - Get the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Determine the clock index of the PTP clock associated with this device. If
+ * this is the PF controlling the clock, just use the local access to the
+ * clock device pointer.
+ *
+ * Otherwise, read from the driver shared parameters to determine the clock
+ * index value.
+ *
+ * Returns: the index of the PTP clock associated with this device, or -1 if
+ * there is no associated clock.
+ */
+int ice_get_ptp_clock_index(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_aqc_driver_params param_idx;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+	u32 value;
+	int err;
+
+	/* Use the ptp_clock structure if we're the main PF */
+	if (pf->ptp.clock)
+		return ptp_clock_index(pf->ptp.clock);
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+	if (!tmr_idx)
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+	else
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+	err = ice_aq_get_driver_param(hw, param_idx, &value, NULL);
+	if (err) {
+		dev_err(dev, "Failed to read PTP clock index parameter, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		return -1;
+	}
+
+	/* The PTP clock index is an integer, and will be between 0 and
+	 * INT_MAX. The highest bit of the driver shared parameter is used to
+	 * indicate whether or not the currently stored clock index is valid.
+	 */
+	if (!(value & PTP_SHARED_CLK_IDX_VALID))
+		return -1;
+
+	return value & ~PTP_SHARED_CLK_IDX_VALID;
+}
+
+/**
+ * ice_set_ptp_clock_index - Set the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Set the PTP clock index for this device into the shared driver parameters,
+ * so that other PFs associated with this device can read it.
+ *
+ * If the PF is unable to store the clock index, it will log an error, but
+ * will continue operating PTP.
+ */
+static void ice_set_ptp_clock_index(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_aqc_driver_params param_idx;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+	u32 value;
+	int err;
+
+	if (!pf->ptp.clock)
+		return;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+	if (!tmr_idx)
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+	else
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+	value = (u32)ptp_clock_index(pf->ptp.clock);
+	if (value > INT_MAX) {
+		dev_err(dev, "PTP Clock index is too large to store\n");
+		return;
+	}
+	value |= PTP_SHARED_CLK_IDX_VALID;
+
+	err = ice_aq_set_driver_param(hw, param_idx, value, NULL);
+	if (err) {
+		dev_err(dev, "Failed to set PTP clock index parameter, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	}
+}
+
+/**
+ * ice_clear_ptp_clock_index - Clear the PTP clock index
+ * @pf: the PF pointer
+ *
+ * Clear the PTP clock index for this device. Must be called when
+ * unregistering the PTP clock, in order to ensure other PFs stop reporting
+ * a clock object that no longer exists.
+ */
+static void ice_clear_ptp_clock_index(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_aqc_driver_params param_idx;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+	int err;
+
+	/* Do not clear the index if we don't own the timer */
+	if (!hw->func_caps.ts_func_info.src_tmr_owned)
+		return;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+	if (!tmr_idx)
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR0;
+	else
+		param_idx = ICE_AQC_DRIVER_PARAM_CLK_IDX_TMR1;
+
+	err = ice_aq_set_driver_param(hw, param_idx, 0, NULL);
+	if (err) {
+		dev_dbg(dev, "Failed to clear PTP clock index parameter, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	}
+}
+
+/**
+ * ice_ptp_read_src_clk_reg - Read the source clock register
+ * @pf: Board private structure
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ *       the system clock. Will be ignored if NULL is given.
+ */
+static u64
+ice_ptp_read_src_clk_reg(struct ice_pf *pf, struct ptp_system_timestamp *sts)
+{
+	struct ice_hw *hw = &pf->hw;
+	u32 hi, lo, lo2;
+	u8 tmr_idx;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+	/* Read the system timestamp pre PHC read */
+	ptp_read_system_prets(sts);
+
+	lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+
+	/* Read the system timestamp post PHC read */
+	ptp_read_system_postts(sts);
+
+	hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
+	lo2 = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+
+	if (lo2 < lo) {
+		/* if TIME_L rolled over read TIME_L again and update
+		 * system timestamps
+		 */
+		ptp_read_system_prets(sts);
+		lo = rd32(hw, GLTSYN_TIME_L(tmr_idx));
+		ptp_read_system_postts(sts);
+		hi = rd32(hw, GLTSYN_TIME_H(tmr_idx));
+	}
+
+	return ((u64)hi << 32) | lo;
+}
+
+/**
+ * ice_ptp_extend_32b_ts - Convert a 32b nanoseconds timestamp to 64b
+ * @cached_phc_time: recently cached copy of PHC time
+ * @in_tstamp: Ingress/egress 32b nanoseconds timestamp value
+ *
+ * Hardware captures timestamps which contain only 32 bits of nominal
+ * nanoseconds, as opposed to the 64bit timestamps that the stack expects.
+ * Note that the captured timestamp values may be 40 bits, but the lower
+ * 8 bits are sub-nanoseconds and generally discarded.
+ *
+ * Extend the 32bit nanosecond timestamp using the following algorithm and
+ * assumptions:
+ *
+ * 1) have a recently cached copy of the PHC time
+ * 2) assume that the in_tstamp was captured 2^31 nanoseconds (~2.1
+ *    seconds) before or after the PHC time was captured.
+ * 3) calculate the delta between the cached time and the timestamp
+ * 4) if the delta is smaller than 2^31 nanoseconds, then the timestamp was
+ *    captured after the PHC time. In this case, the full timestamp is just
+ *    the cached PHC time plus the delta.
+ * 5) otherwise, if the delta is larger than 2^31 nanoseconds, then the
+ *    timestamp was captured *before* the PHC time, i.e. because the PHC
+ *    cache was updated after the timestamp was captured by hardware. In this
+ *    case, the full timestamp is the cached time minus the inverse delta.
+ *
+ * This algorithm works even if the PHC time was updated after a Tx timestamp
+ * was requested, but before the Tx timestamp event was reported from
+ * hardware.
+ *
+ * This calculation primarily relies on keeping the cached PHC time up to
+ * date. If the timestamp was captured more than 2^31 nanoseconds after the
+ * PHC time, it is possible that the lower 32bits of PHC time have
+ * overflowed more than once, and we might generate an incorrect timestamp.
+ *
+ * This is prevented by (a) periodically updating the cached PHC time once
+ * a second, and (b) discarding any Tx timestamp packet if it has waited for
+ * a timestamp for more than one second.
+ */
+static u64 ice_ptp_extend_32b_ts(u64 cached_phc_time, u32 in_tstamp)
+{
+	u32 delta, phc_time_lo;
+	u64 ns;
+
+	/* Extract the lower 32 bits of the PHC time */
+	phc_time_lo = (u32)cached_phc_time;
+
+	/* Calculate the delta between the lower 32bits of the cached PHC
+	 * time and the in_tstamp value
+	 */
+	delta = (in_tstamp - phc_time_lo);
+
+	/* Do not assume that the in_tstamp is always more recent than the
+	 * cached PHC time. If the delta is large, it indicates that the
+	 * in_tstamp was taken in the past, and should be converted
+	 * forward.
+	 */
+	if (delta > (U32_MAX / 2)) {
+		/* reverse the delta calculation here */
+		delta = (phc_time_lo - in_tstamp);
+		ns = cached_phc_time - delta;
+	} else {
+		ns = cached_phc_time + delta;
+	}
+
+	return ns;
+}
+
+/**
+ * ice_ptp_extend_40b_ts - Convert a 40b timestamp to 64b nanoseconds
+ * @pf: Board private structure
+ * @in_tstamp: Ingress/egress 40b timestamp value
+ *
+ * The Tx and Rx timestamps are 40 bits wide, including 32 bits of nominal
+ * nanoseconds, 7 bits of sub-nanoseconds, and a valid bit.
+ *
+ *  *--------------------------------------------------------------*
+ *  | 32 bits of nanoseconds | 7 high bits of sub ns underflow | v |
+ *  *--------------------------------------------------------------*
+ *
+ * The low bit is an indicator of whether the timestamp is valid. The next
+ * 7 bits are a capture of the upper 7 bits of the sub-nanosecond underflow,
+ * and the remaining 32 bits are the lower 32 bits of the PHC timer.
+ *
+ * It is assumed that the caller verifies the timestamp is valid prior to
+ * calling this function.
+ *
+ * Extract the 32bit nominal nanoseconds and extend them. Use the cached PHC
+ * time stored in the device private PTP structure as the basis for timestamp
+ * extension.
+ *
+ * See ice_ptp_extend_32b_ts for a detailed explanation of the extension
+ * algorithm.
+ */
+static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
+{
+	const u64 mask = GENMASK_ULL(31, 0);
+	unsigned long discard_time;
+
+	/* Discard the hardware timestamp if the cached PHC time is too old */
+	discard_time = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000);
+	if (time_is_before_jiffies(discard_time)) {
+		pf->ptp.tx_hwtstamp_discarded++;
+		return 0;
+	}
+
+	return ice_ptp_extend_32b_ts(pf->ptp.cached_phc_time,
+				     (in_tstamp >> 8) & mask);
+}
+
+/**
+ * ice_ptp_is_tx_tracker_up - Check if Tx tracker is ready for new timestamps
+ * @tx: the PTP Tx timestamp tracker to check
+ *
+ * Check that a given PTP Tx timestamp tracker is up, i.e. that it is ready
+ * to accept new timestamp requests.
+ *
+ * Assumes the tx->lock spinlock is already held.
+ */
+static bool
+ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx)
+{
+	lockdep_assert_held(&tx->lock);
+
+	return tx->init && !tx->calibrating;
+}
+
+/**
+ * ice_ptp_process_tx_tstamp - Process Tx timestamps for a port
+ * @tx: the PTP Tx timestamp tracker
+ *
+ * Process timestamps captured by the PHY associated with this port. To do
+ * this, loop over each index with a waiting skb.
+ *
+ * If a given index has a valid timestamp, perform the following steps:
+ *
+ * 1) check that the timestamp request is not stale
+ * 2) check that a timestamp is ready and available in the PHY memory bank
+ * 3) read and copy the timestamp out of the PHY register
+ * 4) unlock the index by clearing the associated in_use bit
+ * 5) check if the timestamp is stale, and discard if so
+ * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value
+ * 7) send this 64 bit timestamp to the stack
+ *
+ * Note that we do not hold the tracking lock while reading the Tx timestamp.
+ * This is because reading the timestamp requires taking a mutex that might
+ * sleep.
+ *
+ * The only place where we set in_use is when a new timestamp is initiated
+ * with a slot index. This is only called in the hard xmit routine where an
+ * SKB has a request flag set. The only places where we clear this bit is this
+ * function, or during teardown when the Tx timestamp tracker is being
+ * removed. A timestamp index will never be re-used until the in_use bit for
+ * that index is cleared.
+ *
+ * If a Tx thread starts a new timestamp, we might not begin processing it
+ * right away but we will notice it at the end when we re-queue the task.
+ *
+ * If a Tx thread starts a new timestamp just after this function exits, the
+ * interrupt for that timestamp should re-trigger this function once
+ * a timestamp is ready.
+ *
+ * In cases where the PTP hardware clock was directly adjusted, some
+ * timestamps may not be able to safely use the timestamp extension math. In
+ * this case, software will set the stale bit for any outstanding Tx
+ * timestamps when the clock is adjusted. Then this function will discard
+ * those captured timestamps instead of sending them to the stack.
+ *
+ * If a Tx packet has been waiting for more than 2 seconds, it is not possible
+ * to correctly extend the timestamp using the cached PHC time. It is
+ * extremely unlikely that a packet will ever take this long to timestamp. If
+ * we detect a Tx timestamp request that has waited for this long we assume
+ * the packet will never be sent by hardware and discard it without reading
+ * the timestamp register.
+ */
+static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx)
+{
+	struct ice_ptp_port *ptp_port;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u64 tstamp_ready;
+	bool link_up;
+	int err;
+	u8 idx;
+
+	if (!tx->init)
+		return;
+
+	ptp_port = container_of(tx, struct ice_ptp_port, tx);
+	pf = ptp_port_to_pf(ptp_port);
+	hw = &pf->hw;
+
+	/* Read the Tx ready status first */
+	err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready);
+	if (err)
+		return;
+
+	/* Drop packets if the link went down */
+	link_up = ptp_port->link_up;
+
+	for_each_set_bit(idx, tx->in_use, tx->len) {
+		struct skb_shared_hwtstamps shhwtstamps = {};
+		u8 phy_idx = idx + tx->offset;
+		u64 raw_tstamp = 0, tstamp;
+		bool drop_ts = !link_up;
+		struct sk_buff *skb;
+
+		/* Drop packets which have waited for more than 2 seconds */
+		if (time_is_before_jiffies(tx->tstamps[idx].start + 2 * HZ)) {
+			drop_ts = true;
+
+			/* Count the number of Tx timestamps that timed out */
+			pf->ptp.tx_hwtstamp_timeouts++;
+		}
+
+		/* Only read a timestamp from the PHY if its marked as ready
+		 * by the tstamp_ready register. This avoids unnecessary
+		 * reading of timestamps which are not yet valid. This is
+		 * important as we must read all timestamps which are valid
+		 * and only timestamps which are valid during each interrupt.
+		 * If we do not, the hardware logic for generating a new
+		 * interrupt can get stuck on some devices.
+		 */
+		if (!(tstamp_ready & BIT_ULL(phy_idx))) {
+			if (drop_ts)
+				goto skip_ts_read;
+
+			continue;
+		}
+
+		ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx);
+
+		err = ice_read_phy_tstamp(hw, tx->block, phy_idx, &raw_tstamp);
+		if (err && !drop_ts)
+			continue;
+
+		ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx);
+
+		/* For PHYs which don't implement a proper timestamp ready
+		 * bitmap, verify that the timestamp value is different
+		 * from the last cached timestamp. If it is not, skip this for
+		 * now assuming it hasn't yet been captured by hardware.
+		 */
+		if (!drop_ts && tx->verify_cached &&
+		    raw_tstamp == tx->tstamps[idx].cached_tstamp)
+			continue;
+
+		/* Discard any timestamp value without the valid bit set */
+		if (!(raw_tstamp & ICE_PTP_TS_VALID))
+			drop_ts = true;
+
+skip_ts_read:
+		spin_lock(&tx->lock);
+		if (tx->verify_cached && raw_tstamp)
+			tx->tstamps[idx].cached_tstamp = raw_tstamp;
+		clear_bit(idx, tx->in_use);
+		skb = tx->tstamps[idx].skb;
+		tx->tstamps[idx].skb = NULL;
+		if (test_and_clear_bit(idx, tx->stale))
+			drop_ts = true;
+		spin_unlock(&tx->lock);
+
+		/* It is unlikely but possible that the SKB will have been
+		 * flushed at this point due to link change or teardown.
+		 */
+		if (!skb)
+			continue;
+
+		if (drop_ts) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		/* Extend the timestamp using cached PHC time */
+		tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp);
+		if (tstamp) {
+			shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+			ice_trace(tx_tstamp_complete, skb, idx);
+		}
+
+		skb_tstamp_tx(skb, &shhwtstamps);
+		dev_kfree_skb_any(skb);
+	}
+}
+
+/**
+ * ice_ptp_tx_tstamp - Process Tx timestamps for this function.
+ * @tx: Tx tracking structure to initialize
+ *
+ * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding incomplete
+ * Tx timestamps, or ICE_TX_TSTAMP_WORK_DONE otherwise.
+ */
+static enum ice_tx_tstamp_work ice_ptp_tx_tstamp(struct ice_ptp_tx *tx)
+{
+	bool more_timestamps;
+
+	if (!tx->init)
+		return ICE_TX_TSTAMP_WORK_DONE;
+
+	/* Process the Tx timestamp tracker */
+	ice_ptp_process_tx_tstamp(tx);
+
+	/* Check if there are outstanding Tx timestamps */
+	spin_lock(&tx->lock);
+	more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len);
+	spin_unlock(&tx->lock);
+
+	if (more_timestamps)
+		return ICE_TX_TSTAMP_WORK_PENDING;
+
+	return ICE_TX_TSTAMP_WORK_DONE;
+}
+
+/**
+ * ice_ptp_alloc_tx_tracker - Initialize tracking for Tx timestamps
+ * @tx: Tx tracking structure to initialize
+ *
+ * Assumes that the length has already been initialized. Do not call directly,
+ * use the ice_ptp_init_tx_* instead.
+ */
+static int
+ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx)
+{
+	unsigned long *in_use, *stale;
+	struct ice_tx_tstamp *tstamps;
+
+	tstamps = kcalloc(tx->len, sizeof(*tstamps), GFP_KERNEL);
+	in_use = bitmap_zalloc(tx->len, GFP_KERNEL);
+	stale = bitmap_zalloc(tx->len, GFP_KERNEL);
+
+	if (!tstamps || !in_use || !stale) {
+		kfree(tstamps);
+		bitmap_free(in_use);
+		bitmap_free(stale);
+
+		return -ENOMEM;
+	}
+
+	tx->tstamps = tstamps;
+	tx->in_use = in_use;
+	tx->stale = stale;
+	tx->init = 1;
+
+	spin_lock_init(&tx->lock);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker
+ * @pf: Board private structure
+ * @tx: the tracker to flush
+ *
+ * Called during teardown when a Tx tracker is being removed.
+ */
+static void
+ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+	struct ice_hw *hw = &pf->hw;
+	u64 tstamp_ready;
+	int err;
+	u8 idx;
+
+	err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready);
+	if (err) {
+		dev_dbg(ice_pf_to_dev(pf), "Failed to get the Tx tstamp ready bitmap for block %u, err %d\n",
+			tx->block, err);
+
+		/* If we fail to read the Tx timestamp ready bitmap just
+		 * skip clearing the PHY timestamps.
+		 */
+		tstamp_ready = 0;
+	}
+
+	for_each_set_bit(idx, tx->in_use, tx->len) {
+		u8 phy_idx = idx + tx->offset;
+		struct sk_buff *skb;
+
+		/* In case this timestamp is ready, we need to clear it. */
+		if (!hw->reset_ongoing && (tstamp_ready & BIT_ULL(phy_idx)))
+			ice_clear_phy_tstamp(hw, tx->block, phy_idx);
+
+		spin_lock(&tx->lock);
+		skb = tx->tstamps[idx].skb;
+		tx->tstamps[idx].skb = NULL;
+		clear_bit(idx, tx->in_use);
+		clear_bit(idx, tx->stale);
+		spin_unlock(&tx->lock);
+
+		/* Count the number of Tx timestamps flushed */
+		pf->ptp.tx_hwtstamp_flushed++;
+
+		/* Free the SKB after we've cleared the bit */
+		dev_kfree_skb_any(skb);
+	}
+}
+
+/**
+ * ice_ptp_mark_tx_tracker_stale - Mark unfinished timestamps as stale
+ * @tx: the tracker to mark
+ *
+ * Mark currently outstanding Tx timestamps as stale. This prevents sending
+ * their timestamp value to the stack. This is required to prevent extending
+ * the 40bit hardware timestamp incorrectly.
+ *
+ * This should be called when the PTP clock is modified such as after a set
+ * time request.
+ */
+static void
+ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx)
+{
+	spin_lock(&tx->lock);
+	bitmap_or(tx->stale, tx->stale, tx->in_use, tx->len);
+	spin_unlock(&tx->lock);
+}
+
+/**
+ * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker
+ * @pf: Board private structure
+ * @tx: Tx tracking structure to release
+ *
+ * Free memory associated with the Tx timestamp tracker.
+ */
+static void
+ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+	spin_lock(&tx->lock);
+	tx->init = 0;
+	spin_unlock(&tx->lock);
+
+	/* wait for potentially outstanding interrupt to complete */
+	synchronize_irq(pf->oicr_irq.virq);
+
+	ice_ptp_flush_tx_tracker(pf, tx);
+
+	kfree(tx->tstamps);
+	tx->tstamps = NULL;
+
+	bitmap_free(tx->in_use);
+	tx->in_use = NULL;
+
+	bitmap_free(tx->stale);
+	tx->stale = NULL;
+
+	tx->len = 0;
+}
+
+/**
+ * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps
+ * @pf: Board private structure
+ * @tx: the Tx tracking structure to initialize
+ * @port: the port this structure tracks
+ *
+ * Initialize the Tx timestamp tracker for this port. For generic MAC devices,
+ * the timestamp block is shared for all ports in the same quad. To avoid
+ * ports using the same timestamp index, logically break the block of
+ * registers into chunks based on the port number.
+ */
+static int
+ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
+{
+	tx->block = port / ICE_PORTS_PER_QUAD;
+	tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E822;
+	tx->len = INDEX_PER_PORT_E822;
+	tx->verify_cached = 0;
+
+	return ice_ptp_alloc_tx_tracker(tx);
+}
+
+/**
+ * ice_ptp_init_tx_e810 - Initialize tracking for Tx timestamps
+ * @pf: Board private structure
+ * @tx: the Tx tracking structure to initialize
+ *
+ * Initialize the Tx timestamp tracker for this PF. For E810 devices, each
+ * port has its own block of timestamps, independent of the other ports.
+ */
+static int
+ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx)
+{
+	tx->block = pf->hw.port_info->lport;
+	tx->offset = 0;
+	tx->len = INDEX_PER_PORT_E810;
+	/* The E810 PHY does not provide a timestamp ready bitmap. Instead,
+	 * verify new timestamps against cached copy of the last read
+	 * timestamp.
+	 */
+	tx->verify_cached = 1;
+
+	return ice_ptp_alloc_tx_tracker(tx);
+}
+
+/**
+ * ice_ptp_update_cached_phctime - Update the cached PHC time values
+ * @pf: Board specific private structure
+ *
+ * This function updates the system time values which are cached in the PF
+ * structure and the Rx rings.
+ *
+ * This function must be called periodically to ensure that the cached value
+ * is never more than 2 seconds old.
+ *
+ * Note that the cached copy in the PF PTP structure is always updated, even
+ * if we can't update the copy in the Rx rings.
+ *
+ * Return:
+ * * 0 - OK, successfully updated
+ * * -EAGAIN - PF was busy, need to reschedule the update
+ */
+static int ice_ptp_update_cached_phctime(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	unsigned long update_before;
+	u64 systime;
+	int i;
+
+	update_before = pf->ptp.cached_phc_jiffies + msecs_to_jiffies(2000);
+	if (pf->ptp.cached_phc_time &&
+	    time_is_before_jiffies(update_before)) {
+		unsigned long time_taken = jiffies - pf->ptp.cached_phc_jiffies;
+
+		dev_warn(dev, "%u msecs passed between update to cached PHC time\n",
+			 jiffies_to_msecs(time_taken));
+		pf->ptp.late_cached_phc_updates++;
+	}
+
+	/* Read the current PHC time */
+	systime = ice_ptp_read_src_clk_reg(pf, NULL);
+
+	/* Update the cached PHC time stored in the PF structure */
+	WRITE_ONCE(pf->ptp.cached_phc_time, systime);
+	WRITE_ONCE(pf->ptp.cached_phc_jiffies, jiffies);
+
+	if (test_and_set_bit(ICE_CFG_BUSY, pf->state))
+		return -EAGAIN;
+
+	ice_for_each_vsi(pf, i) {
+		struct ice_vsi *vsi = pf->vsi[i];
+		int j;
+
+		if (!vsi)
+			continue;
+
+		if (vsi->type != ICE_VSI_PF)
+			continue;
+
+		ice_for_each_rxq(vsi, j) {
+			if (!vsi->rx_rings[j])
+				continue;
+			WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime);
+		}
+	}
+	clear_bit(ICE_CFG_BUSY, pf->state);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_reset_cached_phctime - Reset cached PHC time after an update
+ * @pf: Board specific private structure
+ *
+ * This function must be called when the cached PHC time is no longer valid,
+ * such as after a time adjustment. It marks any currently outstanding Tx
+ * timestamps as stale and updates the cached PHC time for both the PF and Rx
+ * rings.
+ *
+ * If updating the PHC time cannot be done immediately, a warning message is
+ * logged and the work item is scheduled immediately to minimize the window
+ * with a wrong cached timestamp.
+ */
+static void ice_ptp_reset_cached_phctime(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	/* Update the cached PHC time immediately if possible, otherwise
+	 * schedule the work item to execute soon.
+	 */
+	err = ice_ptp_update_cached_phctime(pf);
+	if (err) {
+		/* If another thread is updating the Rx rings, we won't
+		 * properly reset them here. This could lead to reporting of
+		 * invalid timestamps, but there isn't much we can do.
+		 */
+		dev_warn(dev, "%s: ICE_CFG_BUSY, unable to immediately update cached PHC time\n",
+			 __func__);
+
+		/* Queue the work item to update the Rx rings when possible */
+		kthread_queue_delayed_work(pf->ptp.kworker, &pf->ptp.work,
+					   msecs_to_jiffies(10));
+	}
+
+	/* Mark any outstanding timestamps as stale, since they might have
+	 * been captured in hardware before the time update. This could lead
+	 * to us extending them with the wrong cached value resulting in
+	 * incorrect timestamp values.
+	 */
+	ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx);
+}
+
+/**
+ * ice_ptp_read_time - Read the time from the device
+ * @pf: Board private structure
+ * @ts: timespec structure to hold the current time value
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ *       the system clock. Will be ignored if NULL is given.
+ *
+ * This function reads the source clock registers and stores them in a timespec.
+ * However, since the registers are 64 bits of nanoseconds, we must convert the
+ * result to a timespec before we can return.
+ */
+static void
+ice_ptp_read_time(struct ice_pf *pf, struct timespec64 *ts,
+		  struct ptp_system_timestamp *sts)
+{
+	u64 time_ns = ice_ptp_read_src_clk_reg(pf, sts);
+
+	*ts = ns_to_timespec64(time_ns);
+}
+
+/**
+ * ice_ptp_write_init - Set PHC time to provided value
+ * @pf: Board private structure
+ * @ts: timespec structure that holds the new time value
+ *
+ * Set the PHC time to the specified time provided in the timespec.
+ */
+static int ice_ptp_write_init(struct ice_pf *pf, struct timespec64 *ts)
+{
+	u64 ns = timespec64_to_ns(ts);
+	struct ice_hw *hw = &pf->hw;
+
+	return ice_ptp_init_time(hw, ns);
+}
+
+/**
+ * ice_ptp_write_adj - Adjust PHC clock time atomically
+ * @pf: Board private structure
+ * @adj: Adjustment in nanoseconds
+ *
+ * Perform an atomic adjustment of the PHC time by the specified number of
+ * nanoseconds.
+ */
+static int ice_ptp_write_adj(struct ice_pf *pf, s32 adj)
+{
+	struct ice_hw *hw = &pf->hw;
+
+	return ice_ptp_adj_clock(hw, adj);
+}
+
+/**
+ * ice_base_incval - Get base timer increment value
+ * @pf: Board private structure
+ *
+ * Look up the base timer increment value for this device. The base increment
+ * value is used to define the nominal clock tick rate. This increment value
+ * is programmed during device initialization. It is also used as the basis
+ * for calculating adjustments using scaled_ppm.
+ */
+static u64 ice_base_incval(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	u64 incval;
+
+	if (ice_is_e810(hw))
+		incval = ICE_PTP_NOMINAL_INCVAL_E810;
+	else if (ice_e822_time_ref(hw) < NUM_ICE_TIME_REF_FREQ)
+		incval = ice_e822_nominal_incval(ice_e822_time_ref(hw));
+	else
+		incval = UNKNOWN_INCVAL_E822;
+
+	dev_dbg(ice_pf_to_dev(pf), "PTP: using base increment value of 0x%016llx\n",
+		incval);
+
+	return incval;
+}
+
+/**
+ * ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state
+ * @port: PTP port for which Tx FIFO is checked
+ */
+static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port)
+{
+	int quad = port->port_num / ICE_PORTS_PER_QUAD;
+	int offs = port->port_num % ICE_PORTS_PER_QUAD;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u32 val, phy_sts;
+	int err;
+
+	pf = ptp_port_to_pf(port);
+	hw = &pf->hw;
+
+	if (port->tx_fifo_busy_cnt == FIFO_OK)
+		return 0;
+
+	/* need to read FIFO state */
+	if (offs == 0 || offs == 1)
+		err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO01_STATUS,
+					     &val);
+	else
+		err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO23_STATUS,
+					     &val);
+
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to check port %d Tx FIFO, err %d\n",
+			port->port_num, err);
+		return err;
+	}
+
+	if (offs & 0x1)
+		phy_sts = (val & Q_REG_FIFO13_M) >> Q_REG_FIFO13_S;
+	else
+		phy_sts = (val & Q_REG_FIFO02_M) >> Q_REG_FIFO02_S;
+
+	if (phy_sts & FIFO_EMPTY) {
+		port->tx_fifo_busy_cnt = FIFO_OK;
+		return 0;
+	}
+
+	port->tx_fifo_busy_cnt++;
+
+	dev_dbg(ice_pf_to_dev(pf), "Try %d, port %d FIFO not empty\n",
+		port->tx_fifo_busy_cnt, port->port_num);
+
+	if (port->tx_fifo_busy_cnt == ICE_PTP_FIFO_NUM_CHECKS) {
+		dev_dbg(ice_pf_to_dev(pf),
+			"Port %d Tx FIFO still not empty; resetting quad %d\n",
+			port->port_num, quad);
+		ice_ptp_reset_ts_memory_quad_e822(hw, quad);
+		port->tx_fifo_busy_cnt = FIFO_OK;
+		return 0;
+	}
+
+	return -EAGAIN;
+}
+
+/**
+ * ice_ptp_wait_for_offsets - Check for valid Tx and Rx offsets
+ * @work: Pointer to the kthread_work structure for this task
+ *
+ * Check whether hardware has completed measuring the Tx and Rx offset values
+ * used to configure and enable vernier timestamp calibration.
+ *
+ * Once the offset in either direction is measured, configure the associated
+ * registers with the calibrated offset values and enable timestamping. The Tx
+ * and Rx directions are configured independently as soon as their associated
+ * offsets are known.
+ *
+ * This function reschedules itself until both Tx and Rx calibration have
+ * completed.
+ */
+static void ice_ptp_wait_for_offsets(struct kthread_work *work)
+{
+	struct ice_ptp_port *port;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int tx_err;
+	int rx_err;
+
+	port = container_of(work, struct ice_ptp_port, ov_work.work);
+	pf = ptp_port_to_pf(port);
+	hw = &pf->hw;
+
+	if (ice_is_reset_in_progress(pf->state)) {
+		/* wait for device driver to complete reset */
+		kthread_queue_delayed_work(pf->ptp.kworker,
+					   &port->ov_work,
+					   msecs_to_jiffies(100));
+		return;
+	}
+
+	tx_err = ice_ptp_check_tx_fifo(port);
+	if (!tx_err)
+		tx_err = ice_phy_cfg_tx_offset_e822(hw, port->port_num);
+	rx_err = ice_phy_cfg_rx_offset_e822(hw, port->port_num);
+	if (tx_err || rx_err) {
+		/* Tx and/or Rx offset not yet configured, try again later */
+		kthread_queue_delayed_work(pf->ptp.kworker,
+					   &port->ov_work,
+					   msecs_to_jiffies(100));
+		return;
+	}
+}
+
+/**
+ * ice_ptp_port_phy_stop - Stop timestamping for a PHY port
+ * @ptp_port: PTP port to stop
+ */
+static int
+ice_ptp_port_phy_stop(struct ice_ptp_port *ptp_port)
+{
+	struct ice_pf *pf = ptp_port_to_pf(ptp_port);
+	u8 port = ptp_port->port_num;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	if (ice_is_e810(hw))
+		return 0;
+
+	mutex_lock(&ptp_port->ps_lock);
+
+	kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
+
+	err = ice_stop_phy_timer_e822(hw, port, true);
+	if (err)
+		dev_err(ice_pf_to_dev(pf), "PTP failed to set PHY port %d down, err %d\n",
+			port, err);
+
+	mutex_unlock(&ptp_port->ps_lock);
+
+	return err;
+}
+
+/**
+ * ice_ptp_port_phy_restart - (Re)start and calibrate PHY timestamping
+ * @ptp_port: PTP port for which the PHY start is set
+ *
+ * Start the PHY timestamping block, and initiate Vernier timestamping
+ * calibration. If timestamping cannot be calibrated (such as if link is down)
+ * then disable the timestamping block instead.
+ */
+static int
+ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port)
+{
+	struct ice_pf *pf = ptp_port_to_pf(ptp_port);
+	u8 port = ptp_port->port_num;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	if (ice_is_e810(hw))
+		return 0;
+
+	if (!ptp_port->link_up)
+		return ice_ptp_port_phy_stop(ptp_port);
+
+	mutex_lock(&ptp_port->ps_lock);
+
+	kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
+
+	/* temporarily disable Tx timestamps while calibrating PHY offset */
+	spin_lock(&ptp_port->tx.lock);
+	ptp_port->tx.calibrating = true;
+	spin_unlock(&ptp_port->tx.lock);
+	ptp_port->tx_fifo_busy_cnt = 0;
+
+	/* Start the PHY timer in Vernier mode */
+	err = ice_start_phy_timer_e822(hw, port);
+	if (err)
+		goto out_unlock;
+
+	/* Enable Tx timestamps right away */
+	spin_lock(&ptp_port->tx.lock);
+	ptp_port->tx.calibrating = false;
+	spin_unlock(&ptp_port->tx.lock);
+
+	kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0);
+
+out_unlock:
+	if (err)
+		dev_err(ice_pf_to_dev(pf), "PTP failed to set PHY port %d up, err %d\n",
+			port, err);
+
+	mutex_unlock(&ptp_port->ps_lock);
+
+	return err;
+}
+
+/**
+ * ice_ptp_link_change - Reconfigure PTP after link status change
+ * @pf: Board private structure
+ * @port: Port for which the PHY start is set
+ * @linkup: Link is up or down
+ */
+void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+{
+	struct ice_ptp_port *ptp_port;
+
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return;
+
+	if (WARN_ON_ONCE(port >= ICE_NUM_EXTERNAL_PORTS))
+		return;
+
+	ptp_port = &pf->ptp.port;
+	if (WARN_ON_ONCE(ptp_port->port_num != port))
+		return;
+
+	/* Update cached link status for this port immediately */
+	ptp_port->link_up = linkup;
+
+	/* E810 devices do not need to reconfigure the PHY */
+	if (ice_is_e810(&pf->hw))
+		return;
+
+	ice_ptp_port_phy_restart(ptp_port);
+}
+
+/**
+ * ice_ptp_tx_ena_intr - Enable or disable the Tx timestamp interrupt
+ * @pf: PF private structure
+ * @ena: bool value to enable or disable interrupt
+ * @threshold: Minimum number of packets at which intr is triggered
+ *
+ * Utility function to enable or disable Tx timestamp interrupt and threshold
+ */
+static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold)
+{
+	struct ice_hw *hw = &pf->hw;
+	int err = 0;
+	int quad;
+	u32 val;
+
+	ice_ptp_reset_ts_memory(hw);
+
+	for (quad = 0; quad < ICE_MAX_QUAD; quad++) {
+		err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG,
+					     &val);
+		if (err)
+			break;
+
+		if (ena) {
+			val |= Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M;
+			val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_THR_M;
+			val |= ((threshold << Q_REG_TX_MEM_GBL_CFG_INTR_THR_S) &
+				Q_REG_TX_MEM_GBL_CFG_INTR_THR_M);
+		} else {
+			val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M;
+		}
+
+		err = ice_write_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG,
+					      val);
+		if (err)
+			break;
+	}
+
+	if (err)
+		dev_err(ice_pf_to_dev(pf), "PTP failed in intr ena, err %d\n",
+			err);
+	return err;
+}
+
+/**
+ * ice_ptp_reset_phy_timestamping - Reset PHY timestamping block
+ * @pf: Board private structure
+ */
+static void ice_ptp_reset_phy_timestamping(struct ice_pf *pf)
+{
+	ice_ptp_port_phy_restart(&pf->ptp.port);
+}
+
+/**
+ * ice_ptp_adjfine - Adjust clock increment rate
+ * @info: the driver's PTP info structure
+ * @scaled_ppm: Parts per million with 16-bit fractional field
+ *
+ * Adjust the frequency of the clock by the indicated scaled ppm from the
+ * base frequency.
+ */
+static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_hw *hw = &pf->hw;
+	u64 incval;
+	int err;
+
+	incval = adjust_by_scaled_ppm(ice_base_incval(pf), scaled_ppm);
+	err = ice_ptp_write_incval_locked(hw, incval);
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to set incval, err %d\n",
+			err);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_extts_event - Process PTP external clock event
+ * @pf: Board private structure
+ */
+void ice_ptp_extts_event(struct ice_pf *pf)
+{
+	struct ptp_clock_event event;
+	struct ice_hw *hw = &pf->hw;
+	u8 chan, tmr_idx;
+	u32 hi, lo;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	/* Event time is captured by one of the two matched registers
+	 *      GLTSYN_EVNT_L: 32 LSB of sampled time event
+	 *      GLTSYN_EVNT_H: 32 MSB of sampled time event
+	 * Event is defined in GLTSYN_EVNT_0 register
+	 */
+	for (chan = 0; chan < GLTSYN_EVNT_H_IDX_MAX; chan++) {
+		/* Check if channel is enabled */
+		if (pf->ptp.ext_ts_irq & (1 << chan)) {
+			lo = rd32(hw, GLTSYN_EVNT_L(chan, tmr_idx));
+			hi = rd32(hw, GLTSYN_EVNT_H(chan, tmr_idx));
+			event.timestamp = (((u64)hi) << 32) | lo;
+			event.type = PTP_CLOCK_EXTTS;
+			event.index = chan;
+
+			/* Fire event */
+			ptp_clock_event(pf->ptp.clock, &event);
+			pf->ptp.ext_ts_irq &= ~(1 << chan);
+		}
+	}
+}
+
+/**
+ * ice_ptp_cfg_extts - Configure EXTTS pin and channel
+ * @pf: Board private structure
+ * @ena: true to enable; false to disable
+ * @chan: GPIO channel (0-3)
+ * @gpio_pin: GPIO pin
+ * @extts_flags: request flags from the ptp_extts_request.flags
+ */
+static int
+ice_ptp_cfg_extts(struct ice_pf *pf, bool ena, unsigned int chan, u32 gpio_pin,
+		  unsigned int extts_flags)
+{
+	u32 func, aux_reg, gpio_reg, irq_reg;
+	struct ice_hw *hw = &pf->hw;
+	u8 tmr_idx;
+
+	if (chan > (unsigned int)pf->ptp.info.n_ext_ts)
+		return -EINVAL;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	irq_reg = rd32(hw, PFINT_OICR_ENA);
+
+	if (ena) {
+		/* Enable the interrupt */
+		irq_reg |= PFINT_OICR_TSYN_EVNT_M;
+		aux_reg = GLTSYN_AUX_IN_0_INT_ENA_M;
+
+#define GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE	BIT(0)
+#define GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE	BIT(1)
+
+		/* set event level to requested edge */
+		if (extts_flags & PTP_FALLING_EDGE)
+			aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_FALLING_EDGE;
+		if (extts_flags & PTP_RISING_EDGE)
+			aux_reg |= GLTSYN_AUX_IN_0_EVNTLVL_RISING_EDGE;
+
+		/* Write GPIO CTL reg.
+		 * 0x1 is input sampled by EVENT register(channel)
+		 * + num_in_channels * tmr_idx
+		 */
+		func = 1 + chan + (tmr_idx * 3);
+		gpio_reg = ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) &
+			    GLGEN_GPIO_CTL_PIN_FUNC_M);
+		pf->ptp.ext_ts_chan |= (1 << chan);
+	} else {
+		/* clear the values we set to reset defaults */
+		aux_reg = 0;
+		gpio_reg = 0;
+		pf->ptp.ext_ts_chan &= ~(1 << chan);
+		if (!pf->ptp.ext_ts_chan)
+			irq_reg &= ~PFINT_OICR_TSYN_EVNT_M;
+	}
+
+	wr32(hw, PFINT_OICR_ENA, irq_reg);
+	wr32(hw, GLTSYN_AUX_IN(chan, tmr_idx), aux_reg);
+	wr32(hw, GLGEN_GPIO_CTL(gpio_pin), gpio_reg);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_cfg_clkout - Configure clock to generate periodic wave
+ * @pf: Board private structure
+ * @chan: GPIO channel (0-3)
+ * @config: desired periodic clk configuration. NULL will disable channel
+ * @store: If set to true the values will be stored
+ *
+ * Configure the internal clock generator modules to generate the clock wave of
+ * specified period.
+ */
+static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan,
+			      struct ice_perout_channel *config, bool store)
+{
+	u64 current_time, period, start_time, phase;
+	struct ice_hw *hw = &pf->hw;
+	u32 func, val, gpio_pin;
+	u8 tmr_idx;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* 0. Reset mode & out_en in AUX_OUT */
+	wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), 0);
+
+	/* If we're disabling the output, clear out CLKO and TGT and keep
+	 * output level low
+	 */
+	if (!config || !config->ena) {
+		wr32(hw, GLTSYN_CLKO(chan, tmr_idx), 0);
+		wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), 0);
+		wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), 0);
+
+		val = GLGEN_GPIO_CTL_PIN_DIR_M;
+		gpio_pin = pf->ptp.perout_channels[chan].gpio_pin;
+		wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val);
+
+		/* Store the value if requested */
+		if (store)
+			memset(&pf->ptp.perout_channels[chan], 0,
+			       sizeof(struct ice_perout_channel));
+
+		return 0;
+	}
+	period = config->period;
+	start_time = config->start_time;
+	div64_u64_rem(start_time, period, &phase);
+	gpio_pin = config->gpio_pin;
+
+	/* 1. Write clkout with half of required period value */
+	if (period & 0x1) {
+		dev_err(ice_pf_to_dev(pf), "CLK Period must be an even value\n");
+		goto err;
+	}
+
+	period >>= 1;
+
+	/* For proper operation, the GLTSYN_CLKO must be larger than clock tick
+	 */
+#define MIN_PULSE 3
+	if (period <= MIN_PULSE || period > U32_MAX) {
+		dev_err(ice_pf_to_dev(pf), "CLK Period must be > %d && < 2^33",
+			MIN_PULSE * 2);
+		goto err;
+	}
+
+	wr32(hw, GLTSYN_CLKO(chan, tmr_idx), lower_32_bits(period));
+
+	/* Allow time for programming before start_time is hit */
+	current_time = ice_ptp_read_src_clk_reg(pf, NULL);
+
+	/* if start time is in the past start the timer at the nearest second
+	 * maintaining phase
+	 */
+	if (start_time < current_time)
+		start_time = div64_u64(current_time + NSEC_PER_SEC - 1,
+				       NSEC_PER_SEC) * NSEC_PER_SEC + phase;
+
+	if (ice_is_e810(hw))
+		start_time -= E810_OUT_PROP_DELAY_NS;
+	else
+		start_time -= ice_e822_pps_delay(ice_e822_time_ref(hw));
+
+	/* 2. Write TARGET time */
+	wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), lower_32_bits(start_time));
+	wr32(hw, GLTSYN_TGT_H(chan, tmr_idx), upper_32_bits(start_time));
+
+	/* 3. Write AUX_OUT register */
+	val = GLTSYN_AUX_OUT_0_OUT_ENA_M | GLTSYN_AUX_OUT_0_OUTMOD_M;
+	wr32(hw, GLTSYN_AUX_OUT(chan, tmr_idx), val);
+
+	/* 4. write GPIO CTL reg */
+	func = 8 + chan + (tmr_idx * 4);
+	val = GLGEN_GPIO_CTL_PIN_DIR_M |
+	      ((func << GLGEN_GPIO_CTL_PIN_FUNC_S) & GLGEN_GPIO_CTL_PIN_FUNC_M);
+	wr32(hw, GLGEN_GPIO_CTL(gpio_pin), val);
+
+	/* Store the value if requested */
+	if (store) {
+		memcpy(&pf->ptp.perout_channels[chan], config,
+		       sizeof(struct ice_perout_channel));
+		pf->ptp.perout_channels[chan].start_time = phase;
+	}
+
+	return 0;
+err:
+	dev_err(ice_pf_to_dev(pf), "PTP failed to cfg per_clk\n");
+	return -EFAULT;
+}
+
+/**
+ * ice_ptp_disable_all_clkout - Disable all currently configured outputs
+ * @pf: pointer to the PF structure
+ *
+ * Disable all currently configured clock outputs. This is necessary before
+ * certain changes to the PTP hardware clock. Use ice_ptp_enable_all_clkout to
+ * re-enable the clocks again.
+ */
+static void ice_ptp_disable_all_clkout(struct ice_pf *pf)
+{
+	uint i;
+
+	for (i = 0; i < pf->ptp.info.n_per_out; i++)
+		if (pf->ptp.perout_channels[i].ena)
+			ice_ptp_cfg_clkout(pf, i, NULL, false);
+}
+
+/**
+ * ice_ptp_enable_all_clkout - Enable all configured periodic clock outputs
+ * @pf: pointer to the PF structure
+ *
+ * Enable all currently configured clock outputs. Use this after
+ * ice_ptp_disable_all_clkout to reconfigure the output signals according to
+ * their configuration.
+ */
+static void ice_ptp_enable_all_clkout(struct ice_pf *pf)
+{
+	uint i;
+
+	for (i = 0; i < pf->ptp.info.n_per_out; i++)
+		if (pf->ptp.perout_channels[i].ena)
+			ice_ptp_cfg_clkout(pf, i, &pf->ptp.perout_channels[i],
+					   false);
+}
+
+/**
+ * ice_ptp_gpio_enable_e810 - Enable/disable ancillary features of PHC
+ * @info: the driver's PTP info structure
+ * @rq: The requested feature to change
+ * @on: Enable/disable flag
+ */
+static int
+ice_ptp_gpio_enable_e810(struct ptp_clock_info *info,
+			 struct ptp_clock_request *rq, int on)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_perout_channel clk_cfg = {0};
+	bool sma_pres = false;
+	unsigned int chan;
+	u32 gpio_pin;
+	int err;
+
+	if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL))
+		sma_pres = true;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PEROUT:
+		chan = rq->perout.index;
+		if (sma_pres) {
+			if (chan == ice_pin_desc_e810t[SMA1].chan)
+				clk_cfg.gpio_pin = GPIO_20;
+			else if (chan == ice_pin_desc_e810t[SMA2].chan)
+				clk_cfg.gpio_pin = GPIO_22;
+			else
+				return -1;
+		} else if (ice_is_e810t(&pf->hw)) {
+			if (chan == 0)
+				clk_cfg.gpio_pin = GPIO_20;
+			else
+				clk_cfg.gpio_pin = GPIO_22;
+		} else if (chan == PPS_CLK_GEN_CHAN) {
+			clk_cfg.gpio_pin = PPS_PIN_INDEX;
+		} else {
+			clk_cfg.gpio_pin = chan;
+		}
+
+		clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) +
+				   rq->perout.period.nsec);
+		clk_cfg.start_time = ((rq->perout.start.sec * NSEC_PER_SEC) +
+				       rq->perout.start.nsec);
+		clk_cfg.ena = !!on;
+
+		err = ice_ptp_cfg_clkout(pf, chan, &clk_cfg, true);
+		break;
+	case PTP_CLK_REQ_EXTTS:
+		chan = rq->extts.index;
+		if (sma_pres) {
+			if (chan < ice_pin_desc_e810t[SMA2].chan)
+				gpio_pin = GPIO_21;
+			else
+				gpio_pin = GPIO_23;
+		} else if (ice_is_e810t(&pf->hw)) {
+			if (chan == 0)
+				gpio_pin = GPIO_21;
+			else
+				gpio_pin = GPIO_23;
+		} else {
+			gpio_pin = chan;
+		}
+
+		err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin,
+					rq->extts.flags);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+/**
+ * ice_ptp_gpio_enable_e823 - Enable/disable ancillary features of PHC
+ * @info: the driver's PTP info structure
+ * @rq: The requested feature to change
+ * @on: Enable/disable flag
+ */
+static int ice_ptp_gpio_enable_e823(struct ptp_clock_info *info,
+				    struct ptp_clock_request *rq, int on)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_perout_channel clk_cfg = {0};
+	int err;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PPS:
+		clk_cfg.gpio_pin = PPS_PIN_INDEX;
+		clk_cfg.period = NSEC_PER_SEC;
+		clk_cfg.ena = !!on;
+
+		err = ice_ptp_cfg_clkout(pf, PPS_CLK_GEN_CHAN, &clk_cfg, true);
+		break;
+	case PTP_CLK_REQ_EXTTS:
+		err = ice_ptp_cfg_extts(pf, !!on, rq->extts.index,
+					TIME_SYNC_PIN_INDEX, rq->extts.flags);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+/**
+ * ice_ptp_gettimex64 - Get the time of the clock
+ * @info: the driver's PTP info structure
+ * @ts: timespec64 structure to hold the current time value
+ * @sts: Optional parameter for holding a pair of system timestamps from
+ *       the system clock. Will be ignored if NULL is given.
+ *
+ * Read the device clock and return the correct value on ns, after converting it
+ * into a timespec struct.
+ */
+static int
+ice_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts,
+		   struct ptp_system_timestamp *sts)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_hw *hw = &pf->hw;
+
+	if (!ice_ptp_lock(hw)) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to get time\n");
+		return -EBUSY;
+	}
+
+	ice_ptp_read_time(pf, ts, sts);
+	ice_ptp_unlock(hw);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_settime64 - Set the time of the clock
+ * @info: the driver's PTP info structure
+ * @ts: timespec64 structure that holds the new time value
+ *
+ * Set the device clock to the user input value. The conversion from timespec
+ * to ns happens in the write function.
+ */
+static int
+ice_ptp_settime64(struct ptp_clock_info *info, const struct timespec64 *ts)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct timespec64 ts64 = *ts;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	/* For Vernier mode, we need to recalibrate after new settime
+	 * Start with disabling timestamp block
+	 */
+	if (pf->ptp.port.link_up)
+		ice_ptp_port_phy_stop(&pf->ptp.port);
+
+	if (!ice_ptp_lock(hw)) {
+		err = -EBUSY;
+		goto exit;
+	}
+
+	/* Disable periodic outputs */
+	ice_ptp_disable_all_clkout(pf);
+
+	err = ice_ptp_write_init(pf, &ts64);
+	ice_ptp_unlock(hw);
+
+	if (!err)
+		ice_ptp_reset_cached_phctime(pf);
+
+	/* Reenable periodic outputs */
+	ice_ptp_enable_all_clkout(pf);
+
+	/* Recalibrate and re-enable timestamp block */
+	if (pf->ptp.port.link_up)
+		ice_ptp_port_phy_restart(&pf->ptp.port);
+exit:
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to set time %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_adjtime_nonatomic - Do a non-atomic clock adjustment
+ * @info: the driver's PTP info structure
+ * @delta: Offset in nanoseconds to adjust the time by
+ */
+static int ice_ptp_adjtime_nonatomic(struct ptp_clock_info *info, s64 delta)
+{
+	struct timespec64 now, then;
+	int ret;
+
+	then = ns_to_timespec64(delta);
+	ret = ice_ptp_gettimex64(info, &now, NULL);
+	if (ret)
+		return ret;
+	now = timespec64_add(now, then);
+
+	return ice_ptp_settime64(info, (const struct timespec64 *)&now);
+}
+
+/**
+ * ice_ptp_adjtime - Adjust the time of the clock by the indicated delta
+ * @info: the driver's PTP info structure
+ * @delta: Offset in nanoseconds to adjust the time by
+ */
+static int ice_ptp_adjtime(struct ptp_clock_info *info, s64 delta)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+	struct ice_hw *hw = &pf->hw;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+
+	/* Hardware only supports atomic adjustments using signed 32-bit
+	 * integers. For any adjustment outside this range, perform
+	 * a non-atomic get->adjust->set flow.
+	 */
+	if (delta > S32_MAX || delta < S32_MIN) {
+		dev_dbg(dev, "delta = %lld, adjtime non-atomic\n", delta);
+		return ice_ptp_adjtime_nonatomic(info, delta);
+	}
+
+	if (!ice_ptp_lock(hw)) {
+		dev_err(dev, "PTP failed to acquire semaphore in adjtime\n");
+		return -EBUSY;
+	}
+
+	/* Disable periodic outputs */
+	ice_ptp_disable_all_clkout(pf);
+
+	err = ice_ptp_write_adj(pf, delta);
+
+	/* Reenable periodic outputs */
+	ice_ptp_enable_all_clkout(pf);
+
+	ice_ptp_unlock(hw);
+
+	if (err) {
+		dev_err(dev, "PTP failed to adjust time, err %d\n", err);
+		return err;
+	}
+
+	ice_ptp_reset_cached_phctime(pf);
+
+	return 0;
+}
+
+#ifdef CONFIG_ICE_HWTS
+/**
+ * ice_ptp_get_syncdevicetime - Get the cross time stamp info
+ * @device: Current device time
+ * @system: System counter value read synchronously with device time
+ * @ctx: Context provided by timekeeping code
+ *
+ * Read device and system (ART) clock simultaneously and return the corrected
+ * clock values in ns.
+ */
+static int
+ice_ptp_get_syncdevicetime(ktime_t *device,
+			   struct system_counterval_t *system,
+			   void *ctx)
+{
+	struct ice_pf *pf = (struct ice_pf *)ctx;
+	struct ice_hw *hw = &pf->hw;
+	u32 hh_lock, hh_art_ctl;
+	int i;
+
+	/* Get the HW lock */
+	hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+	if (hh_lock & PFHH_SEM_BUSY_M) {
+		dev_err(ice_pf_to_dev(pf), "PTP failed to get hh lock\n");
+		return -EFAULT;
+	}
+
+	/* Start the ART and device clock sync sequence */
+	hh_art_ctl = rd32(hw, GLHH_ART_CTL);
+	hh_art_ctl = hh_art_ctl | GLHH_ART_CTL_ACTIVE_M;
+	wr32(hw, GLHH_ART_CTL, hh_art_ctl);
+
+#define MAX_HH_LOCK_TRIES 100
+
+	for (i = 0; i < MAX_HH_LOCK_TRIES; i++) {
+		/* Wait for sync to complete */
+		hh_art_ctl = rd32(hw, GLHH_ART_CTL);
+		if (hh_art_ctl & GLHH_ART_CTL_ACTIVE_M) {
+			udelay(1);
+			continue;
+		} else {
+			u32 hh_ts_lo, hh_ts_hi, tmr_idx;
+			u64 hh_ts;
+
+			tmr_idx = hw->func_caps.ts_func_info.tmr_index_assoc;
+			/* Read ART time */
+			hh_ts_lo = rd32(hw, GLHH_ART_TIME_L);
+			hh_ts_hi = rd32(hw, GLHH_ART_TIME_H);
+			hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo;
+			*system = convert_art_ns_to_tsc(hh_ts);
+			/* Read Device source clock time */
+			hh_ts_lo = rd32(hw, GLTSYN_HHTIME_L(tmr_idx));
+			hh_ts_hi = rd32(hw, GLTSYN_HHTIME_H(tmr_idx));
+			hh_ts = ((u64)hh_ts_hi << 32) | hh_ts_lo;
+			*device = ns_to_ktime(hh_ts);
+			break;
+		}
+	}
+	/* Release HW lock */
+	hh_lock = rd32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+	hh_lock = hh_lock & ~PFHH_SEM_BUSY_M;
+	wr32(hw, PFHH_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), hh_lock);
+
+	if (i == MAX_HH_LOCK_TRIES)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+/**
+ * ice_ptp_getcrosststamp_e822 - Capture a device cross timestamp
+ * @info: the driver's PTP info structure
+ * @cts: The memory to fill the cross timestamp info
+ *
+ * Capture a cross timestamp between the ART and the device PTP hardware
+ * clock. Fill the cross timestamp information and report it back to the
+ * caller.
+ *
+ * This is only valid for E822 devices which have support for generating the
+ * cross timestamp via PCIe PTM.
+ *
+ * In order to correctly correlate the ART timestamp back to the TSC time, the
+ * CPU must have X86_FEATURE_TSC_KNOWN_FREQ.
+ */
+static int
+ice_ptp_getcrosststamp_e822(struct ptp_clock_info *info,
+			    struct system_device_crosststamp *cts)
+{
+	struct ice_pf *pf = ptp_info_to_pf(info);
+
+	return get_device_system_crosststamp(ice_ptp_get_syncdevicetime,
+					     pf, NULL, cts);
+}
+#endif /* CONFIG_ICE_HWTS */
+
+/**
+ * ice_ptp_get_ts_config - ioctl interface to read the timestamping config
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Copy the timestamping config to user buffer
+ */
+int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	struct hwtstamp_config *config;
+
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return -EIO;
+
+	config = &pf->ptp.tstamp_config;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * ice_ptp_set_timestamp_mode - Setup driver for requested timestamp mode
+ * @pf: Board private structure
+ * @config: hwtstamp settings requested or saved
+ */
+static int
+ice_ptp_set_timestamp_mode(struct ice_pf *pf, struct hwtstamp_config *config)
+{
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		ice_set_tx_tstamp(pf, false);
+		break;
+	case HWTSTAMP_TX_ON:
+		ice_set_tx_tstamp(pf, true);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		ice_set_rx_tstamp(pf, false);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+		ice_set_rx_tstamp(pf, true);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_set_ts_config - ioctl interface to control the timestamping
+ * @pf: Board private structure
+ * @ifr: ioctl data
+ *
+ * Get the user config and store it
+ */
+int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return -EAGAIN;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = ice_ptp_set_timestamp_mode(pf, &config);
+	if (err)
+		return err;
+
+	/* Return the actual configuration set */
+	config = pf->ptp.tstamp_config;
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * ice_ptp_rx_hwtstamp - Check for an Rx timestamp
+ * @rx_ring: Ring to get the VSI info
+ * @rx_desc: Receive descriptor
+ * @skb: Particular skb to send timestamp with
+ *
+ * The driver receives a notification in the receive descriptor with timestamp.
+ * The timestamp is in ns, so we must convert the result first.
+ */
+void
+ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
+		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb)
+{
+	struct skb_shared_hwtstamps *hwtstamps;
+	u64 ts_ns, cached_time;
+	u32 ts_high;
+
+	if (!(rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID))
+		return;
+
+	cached_time = READ_ONCE(rx_ring->cached_phctime);
+
+	/* Do not report a timestamp if we don't have a cached PHC time */
+	if (!cached_time)
+		return;
+
+	/* Use ice_ptp_extend_32b_ts directly, using the ring-specific cached
+	 * PHC value, rather than accessing the PF. This also allows us to
+	 * simply pass the upper 32bits of nanoseconds directly. Calling
+	 * ice_ptp_extend_40b_ts is unnecessary as it would just discard these
+	 * bits itself.
+	 */
+	ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
+	ts_ns = ice_ptp_extend_32b_ts(cached_time, ts_high);
+
+	hwtstamps = skb_hwtstamps(skb);
+	memset(hwtstamps, 0, sizeof(*hwtstamps));
+	hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
+}
+
+/**
+ * ice_ptp_disable_sma_pins_e810t - Disable E810-T SMA pins
+ * @pf: pointer to the PF structure
+ * @info: PTP clock info structure
+ *
+ * Disable the OS access to the SMA pins. Called to clear out the OS
+ * indications of pin support when we fail to setup the E810-T SMA control
+ * register.
+ */
+static void
+ice_ptp_disable_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	dev_warn(dev, "Failed to configure E810-T SMA pin control\n");
+
+	info->enable = NULL;
+	info->verify = NULL;
+	info->n_pins = 0;
+	info->n_ext_ts = 0;
+	info->n_per_out = 0;
+}
+
+/**
+ * ice_ptp_setup_sma_pins_e810t - Setup the SMA pins
+ * @pf: pointer to the PF structure
+ * @info: PTP clock info structure
+ *
+ * Finish setting up the SMA pins by allocating pin_config, and setting it up
+ * according to the current status of the SMA. On failure, disable all of the
+ * extended SMA pin support.
+ */
+static void
+ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	/* Allocate memory for kernel pins interface */
+	info->pin_config = devm_kcalloc(dev, info->n_pins,
+					sizeof(*info->pin_config), GFP_KERNEL);
+	if (!info->pin_config) {
+		ice_ptp_disable_sma_pins_e810t(pf, info);
+		return;
+	}
+
+	/* Read current SMA status */
+	err = ice_get_sma_config_e810t(&pf->hw, info->pin_config);
+	if (err)
+		ice_ptp_disable_sma_pins_e810t(pf, info);
+}
+
+/**
+ * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs
+ * @pf: pointer to the PF instance
+ * @info: PTP clock capabilities
+ */
+static void
+ice_ptp_setup_pins_e810(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+	if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) {
+		info->n_ext_ts = N_EXT_TS_E810;
+		info->n_per_out = N_PER_OUT_E810T;
+		info->n_pins = NUM_PTP_PINS_E810T;
+		info->verify = ice_verify_pin_e810t;
+
+		/* Complete setup of the SMA pins */
+		ice_ptp_setup_sma_pins_e810t(pf, info);
+	} else if (ice_is_e810t(&pf->hw)) {
+		info->n_ext_ts = N_EXT_TS_NO_SMA_E810T;
+		info->n_per_out = N_PER_OUT_NO_SMA_E810T;
+	} else {
+		info->n_per_out = N_PER_OUT_E810;
+		info->n_ext_ts = N_EXT_TS_E810;
+	}
+}
+
+/**
+ * ice_ptp_setup_pins_e823 - Setup PTP pins in sysfs
+ * @pf: pointer to the PF instance
+ * @info: PTP clock capabilities
+ */
+static void
+ice_ptp_setup_pins_e823(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+	info->pps = 1;
+	info->n_per_out = 0;
+	info->n_ext_ts = 1;
+}
+
+/**
+ * ice_ptp_set_funcs_e822 - Set specialized functions for E822 support
+ * @pf: Board private structure
+ * @info: PTP info to fill
+ *
+ * Assign functions to the PTP capabiltiies structure for E822 devices.
+ * Functions which operate across all device families should be set directly
+ * in ice_ptp_set_caps. Only add functions here which are distinct for E822
+ * devices.
+ */
+static void
+ice_ptp_set_funcs_e822(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+#ifdef CONFIG_ICE_HWTS
+	if (boot_cpu_has(X86_FEATURE_ART) &&
+	    boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ))
+		info->getcrosststamp = ice_ptp_getcrosststamp_e822;
+#endif /* CONFIG_ICE_HWTS */
+}
+
+/**
+ * ice_ptp_set_funcs_e810 - Set specialized functions for E810 support
+ * @pf: Board private structure
+ * @info: PTP info to fill
+ *
+ * Assign functions to the PTP capabiltiies structure for E810 devices.
+ * Functions which operate across all device families should be set directly
+ * in ice_ptp_set_caps. Only add functions here which are distinct for e810
+ * devices.
+ */
+static void
+ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+	info->enable = ice_ptp_gpio_enable_e810;
+	ice_ptp_setup_pins_e810(pf, info);
+}
+
+/**
+ * ice_ptp_set_funcs_e823 - Set specialized functions for E823 support
+ * @pf: Board private structure
+ * @info: PTP info to fill
+ *
+ * Assign functions to the PTP capabiltiies structure for E823 devices.
+ * Functions which operate across all device families should be set directly
+ * in ice_ptp_set_caps. Only add functions here which are distinct for e823
+ * devices.
+ */
+static void
+ice_ptp_set_funcs_e823(struct ice_pf *pf, struct ptp_clock_info *info)
+{
+	info->enable = ice_ptp_gpio_enable_e823;
+	ice_ptp_setup_pins_e823(pf, info);
+}
+
+/**
+ * ice_ptp_set_caps - Set PTP capabilities
+ * @pf: Board private structure
+ */
+static void ice_ptp_set_caps(struct ice_pf *pf)
+{
+	struct ptp_clock_info *info = &pf->ptp.info;
+	struct device *dev = ice_pf_to_dev(pf);
+
+	snprintf(info->name, sizeof(info->name) - 1, "%s-%s-clk",
+		 dev_driver_string(dev), dev_name(dev));
+	info->owner = THIS_MODULE;
+	info->max_adj = 100000000;
+	info->adjtime = ice_ptp_adjtime;
+	info->adjfine = ice_ptp_adjfine;
+	info->gettimex64 = ice_ptp_gettimex64;
+	info->settime64 = ice_ptp_settime64;
+
+	if (ice_is_e810(&pf->hw))
+		ice_ptp_set_funcs_e810(pf, info);
+	else if (ice_is_e823(&pf->hw))
+		ice_ptp_set_funcs_e823(pf, info);
+	else
+		ice_ptp_set_funcs_e822(pf, info);
+}
+
+/**
+ * ice_ptp_create_clock - Create PTP clock device for userspace
+ * @pf: Board private structure
+ *
+ * This function creates a new PTP clock device. It only creates one if we
+ * don't already have one. Will return error if it can't create one, but success
+ * if we already have a device. Should be used by ice_ptp_init to create clock
+ * initially, and prevent global resets from creating new clock devices.
+ */
+static long ice_ptp_create_clock(struct ice_pf *pf)
+{
+	struct ptp_clock_info *info;
+	struct ptp_clock *clock;
+	struct device *dev;
+
+	/* No need to create a clock device if we already have one */
+	if (pf->ptp.clock)
+		return 0;
+
+	ice_ptp_set_caps(pf);
+
+	info = &pf->ptp.info;
+	dev = ice_pf_to_dev(pf);
+
+	/* Attempt to register the clock before enabling the hardware. */
+	clock = ptp_clock_register(info, dev);
+	if (IS_ERR(clock))
+		return PTR_ERR(clock);
+
+	pf->ptp.clock = clock;
+
+	return 0;
+}
+
+/**
+ * ice_ptp_request_ts - Request an available Tx timestamp index
+ * @tx: the PTP Tx timestamp tracker to request from
+ * @skb: the SKB to associate with this timestamp request
+ */
+s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
+{
+	u8 idx;
+
+	spin_lock(&tx->lock);
+
+	/* Check that this tracker is accepting new timestamp requests */
+	if (!ice_ptp_is_tx_tracker_up(tx)) {
+		spin_unlock(&tx->lock);
+		return -1;
+	}
+
+	/* Find and set the first available index */
+	idx = find_first_zero_bit(tx->in_use, tx->len);
+	if (idx < tx->len) {
+		/* We got a valid index that no other thread could have set. Store
+		 * a reference to the skb and the start time to allow discarding old
+		 * requests.
+		 */
+		set_bit(idx, tx->in_use);
+		clear_bit(idx, tx->stale);
+		tx->tstamps[idx].start = jiffies;
+		tx->tstamps[idx].skb = skb_get(skb);
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		ice_trace(tx_tstamp_request, skb, idx);
+	}
+
+	spin_unlock(&tx->lock);
+
+	/* return the appropriate PHY timestamp register index, -1 if no
+	 * indexes were available.
+	 */
+	if (idx >= tx->len)
+		return -1;
+	else
+		return idx + tx->offset;
+}
+
+/**
+ * ice_ptp_process_ts - Process the PTP Tx timestamps
+ * @pf: Board private structure
+ *
+ * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding Tx
+ * timestamps that need processing, and ICE_TX_TSTAMP_WORK_DONE otherwise.
+ */
+enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf)
+{
+	return ice_ptp_tx_tstamp(&pf->ptp.port.tx);
+}
+
+static void ice_ptp_periodic_work(struct kthread_work *work)
+{
+	struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
+	struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp);
+	int err;
+
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return;
+
+	err = ice_ptp_update_cached_phctime(pf);
+
+	/* Run twice a second or reschedule if phc update failed */
+	kthread_queue_delayed_work(ptp->kworker, &ptp->work,
+				   msecs_to_jiffies(err ? 10 : 500));
+}
+
+/**
+ * ice_ptp_reset - Initialize PTP hardware clock support after reset
+ * @pf: Board private structure
+ */
+void ice_ptp_reset(struct ice_pf *pf)
+{
+	struct ice_ptp *ptp = &pf->ptp;
+	struct ice_hw *hw = &pf->hw;
+	struct timespec64 ts;
+	int err, itr = 1;
+	u64 time_diff;
+
+	if (test_bit(ICE_PFR_REQ, pf->state))
+		goto pfr;
+
+	if (!hw->func_caps.ts_func_info.src_tmr_owned)
+		goto reset_ts;
+
+	err = ice_ptp_init_phc(hw);
+	if (err)
+		goto err;
+
+	/* Acquire the global hardware lock */
+	if (!ice_ptp_lock(hw)) {
+		err = -EBUSY;
+		goto err;
+	}
+
+	/* Write the increment time value to PHY and LAN */
+	err = ice_ptp_write_incval(hw, ice_base_incval(pf));
+	if (err) {
+		ice_ptp_unlock(hw);
+		goto err;
+	}
+
+	/* Write the initial Time value to PHY and LAN using the cached PHC
+	 * time before the reset and time difference between stopping and
+	 * starting the clock.
+	 */
+	if (ptp->cached_phc_time) {
+		time_diff = ktime_get_real_ns() - ptp->reset_time;
+		ts = ns_to_timespec64(ptp->cached_phc_time + time_diff);
+	} else {
+		ts = ktime_to_timespec64(ktime_get_real());
+	}
+	err = ice_ptp_write_init(pf, &ts);
+	if (err) {
+		ice_ptp_unlock(hw);
+		goto err;
+	}
+
+	/* Release the global hardware lock */
+	ice_ptp_unlock(hw);
+
+	if (!ice_is_e810(hw)) {
+		/* Enable quad interrupts */
+		err = ice_ptp_tx_ena_intr(pf, true, itr);
+		if (err)
+			goto err;
+	}
+
+reset_ts:
+	/* Restart the PHY timestamping block */
+	ice_ptp_reset_phy_timestamping(pf);
+
+pfr:
+	/* Init Tx structures */
+	if (ice_is_e810(&pf->hw)) {
+		err = ice_ptp_init_tx_e810(pf, &ptp->port.tx);
+	} else {
+		kthread_init_delayed_work(&ptp->port.ov_work,
+					  ice_ptp_wait_for_offsets);
+		err = ice_ptp_init_tx_e822(pf, &ptp->port.tx,
+					   ptp->port.port_num);
+	}
+	if (err)
+		goto err;
+
+	set_bit(ICE_FLAG_PTP, pf->flags);
+
+	/* Start periodic work going */
+	kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0);
+
+	dev_info(ice_pf_to_dev(pf), "PTP reset successful\n");
+	return;
+
+err:
+	dev_err(ice_pf_to_dev(pf), "PTP reset failed %d\n", err);
+}
+
+/**
+ * ice_ptp_prepare_for_reset - Prepare PTP for reset
+ * @pf: Board private structure
+ */
+void ice_ptp_prepare_for_reset(struct ice_pf *pf)
+{
+	struct ice_ptp *ptp = &pf->ptp;
+	u8 src_tmr;
+
+	clear_bit(ICE_FLAG_PTP, pf->flags);
+
+	/* Disable timestamping for both Tx and Rx */
+	ice_ptp_cfg_timestamp(pf, false);
+
+	kthread_cancel_delayed_work_sync(&ptp->work);
+
+	if (test_bit(ICE_PFR_REQ, pf->state))
+		return;
+
+	ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+
+	/* Disable periodic outputs */
+	ice_ptp_disable_all_clkout(pf);
+
+	src_tmr = ice_get_ptp_src_clock_index(&pf->hw);
+
+	/* Disable source clock */
+	wr32(&pf->hw, GLTSYN_ENA(src_tmr), (u32)~GLTSYN_ENA_TSYN_ENA_M);
+
+	/* Acquire PHC and system timer to restore after reset */
+	ptp->reset_time = ktime_get_real_ns();
+}
+
+/**
+ * ice_ptp_init_owner - Initialize PTP_1588_CLOCK device
+ * @pf: Board private structure
+ *
+ * Setup and initialize a PTP clock device that represents the device hardware
+ * clock. Save the clock index for other functions connected to the same
+ * hardware resource.
+ */
+static int ice_ptp_init_owner(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	struct timespec64 ts;
+	int err, itr = 1;
+
+	err = ice_ptp_init_phc(hw);
+	if (err) {
+		dev_err(ice_pf_to_dev(pf), "Failed to initialize PHC, err %d\n",
+			err);
+		return err;
+	}
+
+	/* Acquire the global hardware lock */
+	if (!ice_ptp_lock(hw)) {
+		err = -EBUSY;
+		goto err_exit;
+	}
+
+	/* Write the increment time value to PHY and LAN */
+	err = ice_ptp_write_incval(hw, ice_base_incval(pf));
+	if (err) {
+		ice_ptp_unlock(hw);
+		goto err_exit;
+	}
+
+	ts = ktime_to_timespec64(ktime_get_real());
+	/* Write the initial Time value to PHY and LAN */
+	err = ice_ptp_write_init(pf, &ts);
+	if (err) {
+		ice_ptp_unlock(hw);
+		goto err_exit;
+	}
+
+	/* Release the global hardware lock */
+	ice_ptp_unlock(hw);
+
+	if (!ice_is_e810(hw)) {
+		/* Enable quad interrupts */
+		err = ice_ptp_tx_ena_intr(pf, true, itr);
+		if (err)
+			goto err_exit;
+	}
+
+	/* Ensure we have a clock device */
+	err = ice_ptp_create_clock(pf);
+	if (err)
+		goto err_clk;
+
+	/* Store the PTP clock index for other PFs */
+	ice_set_ptp_clock_index(pf);
+
+	return 0;
+
+err_clk:
+	pf->ptp.clock = NULL;
+err_exit:
+	return err;
+}
+
+/**
+ * ice_ptp_init_work - Initialize PTP work threads
+ * @pf: Board private structure
+ * @ptp: PF PTP structure
+ */
+static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp)
+{
+	struct kthread_worker *kworker;
+
+	/* Initialize work functions */
+	kthread_init_delayed_work(&ptp->work, ice_ptp_periodic_work);
+
+	/* Allocate a kworker for handling work required for the ports
+	 * connected to the PTP hardware clock.
+	 */
+	kworker = kthread_create_worker(0, "ice-ptp-%s",
+					dev_name(ice_pf_to_dev(pf)));
+	if (IS_ERR(kworker))
+		return PTR_ERR(kworker);
+
+	ptp->kworker = kworker;
+
+	/* Start periodic work going */
+	kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_init_port - Initialize PTP port structure
+ * @pf: Board private structure
+ * @ptp_port: PTP port structure
+ */
+static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port)
+{
+	mutex_init(&ptp_port->ps_lock);
+
+	if (ice_is_e810(&pf->hw))
+		return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
+
+	kthread_init_delayed_work(&ptp_port->ov_work,
+				  ice_ptp_wait_for_offsets);
+	return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num);
+}
+
+/**
+ * ice_ptp_init - Initialize PTP hardware clock support
+ * @pf: Board private structure
+ *
+ * Set up the device for interacting with the PTP hardware clock for all
+ * functions, both the function that owns the clock hardware, and the
+ * functions connected to the clock hardware.
+ *
+ * The clock owner will allocate and register a ptp_clock with the
+ * PTP_1588_CLOCK infrastructure. All functions allocate a kthread and work
+ * items used for asynchronous work such as Tx timestamps and periodic work.
+ */
+void ice_ptp_init(struct ice_pf *pf)
+{
+	struct ice_ptp *ptp = &pf->ptp;
+	struct ice_hw *hw = &pf->hw;
+	int err;
+
+	/* If this function owns the clock hardware, it must allocate and
+	 * configure the PTP clock device to represent it.
+	 */
+	if (hw->func_caps.ts_func_info.src_tmr_owned) {
+		err = ice_ptp_init_owner(pf);
+		if (err)
+			goto err;
+	}
+
+	ptp->port.port_num = hw->pf_id;
+	err = ice_ptp_init_port(pf, &ptp->port);
+	if (err)
+		goto err;
+
+	/* Start the PHY timestamping block */
+	ice_ptp_reset_phy_timestamping(pf);
+
+	set_bit(ICE_FLAG_PTP, pf->flags);
+	err = ice_ptp_init_work(pf, ptp);
+	if (err)
+		goto err;
+
+	dev_info(ice_pf_to_dev(pf), "PTP init successful\n");
+	return;
+
+err:
+	/* If we registered a PTP clock, release it */
+	if (pf->ptp.clock) {
+		ptp_clock_unregister(ptp->clock);
+		pf->ptp.clock = NULL;
+	}
+	clear_bit(ICE_FLAG_PTP, pf->flags);
+	dev_err(ice_pf_to_dev(pf), "PTP failed %d\n", err);
+}
+
+/**
+ * ice_ptp_release - Disable the driver/HW support and unregister the clock
+ * @pf: Board private structure
+ *
+ * This function handles the cleanup work required from the initialization by
+ * clearing out the important information and unregistering the clock
+ */
+void ice_ptp_release(struct ice_pf *pf)
+{
+	if (!test_bit(ICE_FLAG_PTP, pf->flags))
+		return;
+
+	/* Disable timestamping for both Tx and Rx */
+	ice_ptp_cfg_timestamp(pf, false);
+
+	ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
+
+	clear_bit(ICE_FLAG_PTP, pf->flags);
+
+	kthread_cancel_delayed_work_sync(&pf->ptp.work);
+
+	ice_ptp_port_phy_stop(&pf->ptp.port);
+	mutex_destroy(&pf->ptp.port.ps_lock);
+	if (pf->ptp.kworker) {
+		kthread_destroy_worker(pf->ptp.kworker);
+		pf->ptp.kworker = NULL;
+	}
+
+	if (!pf->ptp.clock)
+		return;
+
+	/* Disable periodic outputs */
+	ice_ptp_disable_all_clkout(pf);
+
+	ice_clear_ptp_clock_index(pf);
+	ptp_clock_unregister(pf->ptp.clock);
+	pf->ptp.clock = NULL;
+
+	dev_info(ice_pf_to_dev(pf), "Removed PTP clock\n");
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
new file mode 100644
index 0000000000..995a57019b
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -0,0 +1,318 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_H_
+#define _ICE_PTP_H_
+
+#include <linux/ptp_clock_kernel.h>
+#include <linux/kthread.h>
+
+#include "ice_ptp_hw.h"
+
+enum ice_ptp_pin_e810 {
+	GPIO_20 = 0,
+	GPIO_21,
+	GPIO_22,
+	GPIO_23,
+	NUM_PTP_PIN_E810
+};
+
+enum ice_ptp_pin_e810t {
+	GNSS = 0,
+	SMA1,
+	UFL1,
+	SMA2,
+	UFL2,
+	NUM_PTP_PINS_E810T
+};
+
+struct ice_perout_channel {
+	bool ena;
+	u32 gpio_pin;
+	u64 period;
+	u64 start_time;
+};
+
+/* The ice hardware captures Tx hardware timestamps in the PHY. The timestamp
+ * is stored in a buffer of registers. Depending on the specific hardware,
+ * this buffer might be shared across multiple PHY ports.
+ *
+ * On transmit of a packet to be timestamped, software is responsible for
+ * selecting an open index. Hardware makes no attempt to lock or prevent
+ * re-use of an index for multiple packets.
+ *
+ * To handle this, timestamp indexes must be tracked by software to ensure
+ * that an index is not re-used for multiple transmitted packets. The
+ * structures and functions declared in this file track the available Tx
+ * register indexes, as well as provide storage for the SKB pointers.
+ *
+ * To allow multiple ports to access the shared register block independently,
+ * the blocks are split up so that indexes are assigned to each port based on
+ * hardware logical port number.
+ *
+ * The timestamp blocks are handled differently for E810- and E822-based
+ * devices. In E810 devices, each port has its own block of timestamps, while in
+ * E822 there is a need to logically break the block of registers into smaller
+ * chunks based on the port number to avoid collisions.
+ *
+ * Example for port 5 in E810:
+ *  +--------+--------+--------+--------+--------+--------+--------+--------+
+ *  |register|register|register|register|register|register|register|register|
+ *  | block  | block  | block  | block  | block  | block  | block  | block  |
+ *  |  for   |  for   |  for   |  for   |  for   |  for   |  for   |  for   |
+ *  | port 0 | port 1 | port 2 | port 3 | port 4 | port 5 | port 6 | port 7 |
+ *  +--------+--------+--------+--------+--------+--------+--------+--------+
+ *                                               ^^
+ *                                               ||
+ *                                               |---  quad offset is always 0
+ *                                               ---- quad number
+ *
+ * Example for port 5 in E822:
+ * +-----------------------------+-----------------------------+
+ * |  register block for quad 0  |  register block for quad 1  |
+ * |+------+------+------+------+|+------+------+------+------+|
+ * ||port 0|port 1|port 2|port 3|||port 0|port 1|port 2|port 3||
+ * |+------+------+------+------+|+------+------+------+------+|
+ * +-----------------------------+-------^---------------------+
+ *                                ^      |
+ *                                |      --- quad offset*
+ *                                ---- quad number
+ *
+ *   * PHY port 5 is port 1 in quad 1
+ *
+ */
+
+/**
+ * struct ice_tx_tstamp - Tracking for a single Tx timestamp
+ * @skb: pointer to the SKB for this timestamp request
+ * @start: jiffies when the timestamp was first requested
+ * @cached_tstamp: last read timestamp
+ *
+ * This structure tracks a single timestamp request. The SKB pointer is
+ * provided when initiating a request. The start time is used to ensure that
+ * we discard old requests that were not fulfilled within a 2 second time
+ * window.
+ * Timestamp values in the PHY are read only and do not get cleared except at
+ * hardware reset or when a new timestamp value is captured.
+ *
+ * Some PHY types do not provide a "ready" bitmap indicating which timestamp
+ * indexes are valid. In these cases, we use a cached_tstamp to keep track of
+ * the last timestamp we read for a given index. If the current timestamp
+ * value is the same as the cached value, we assume a new timestamp hasn't
+ * been captured. This avoids reporting stale timestamps to the stack. This is
+ * only done if the verify_cached flag is set in ice_ptp_tx structure.
+ */
+struct ice_tx_tstamp {
+	struct sk_buff *skb;
+	unsigned long start;
+	u64 cached_tstamp;
+};
+
+/**
+ * enum ice_tx_tstamp_work - Status of Tx timestamp work function
+ * @ICE_TX_TSTAMP_WORK_DONE: Tx timestamp processing is complete
+ * @ICE_TX_TSTAMP_WORK_PENDING: More Tx timestamps are pending
+ */
+enum ice_tx_tstamp_work {
+	ICE_TX_TSTAMP_WORK_DONE = 0,
+	ICE_TX_TSTAMP_WORK_PENDING,
+};
+
+/**
+ * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port
+ * @lock: lock to prevent concurrent access to fields of this struct
+ * @tstamps: array of len to store outstanding requests
+ * @in_use: bitmap of len to indicate which slots are in use
+ * @stale: bitmap of len to indicate slots which have stale timestamps
+ * @block: which memory block (quad or port) the timestamps are captured in
+ * @offset: offset into timestamp block to get the real index
+ * @len: length of the tstamps and in_use fields.
+ * @init: if true, the tracker is initialized;
+ * @calibrating: if true, the PHY is calibrating the Tx offset. During this
+ *               window, timestamps are temporarily disabled.
+ * @verify_cached: if true, verify new timestamp differs from last read value
+ */
+struct ice_ptp_tx {
+	spinlock_t lock; /* lock protecting in_use bitmap */
+	struct ice_tx_tstamp *tstamps;
+	unsigned long *in_use;
+	unsigned long *stale;
+	u8 block;
+	u8 offset;
+	u8 len;
+	u8 init : 1;
+	u8 calibrating : 1;
+	u8 verify_cached : 1;
+};
+
+/* Quad and port information for initializing timestamp blocks */
+#define INDEX_PER_QUAD			64
+#define INDEX_PER_PORT_E822		16
+#define INDEX_PER_PORT_E810		64
+
+/**
+ * struct ice_ptp_port - data used to initialize an external port for PTP
+ *
+ * This structure contains data indicating whether a single external port is
+ * ready for PTP functionality. It is used to track the port initialization
+ * and determine when the port's PHY offset is valid.
+ *
+ * @tx: Tx timestamp tracking for this port
+ * @ov_work: delayed work task for tracking when PHY offset is valid
+ * @ps_lock: mutex used to protect the overall PTP PHY start procedure
+ * @link_up: indicates whether the link is up
+ * @tx_fifo_busy_cnt: number of times the Tx FIFO was busy
+ * @port_num: the port number this structure represents
+ */
+struct ice_ptp_port {
+	struct ice_ptp_tx tx;
+	struct kthread_delayed_work ov_work;
+	struct mutex ps_lock; /* protects overall PTP PHY start procedure */
+	bool link_up;
+	u8 tx_fifo_busy_cnt;
+	u8 port_num;
+};
+
+#define GLTSYN_TGT_H_IDX_MAX		4
+
+/**
+ * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK
+ * @port: data for the PHY port initialization procedure
+ * @work: delayed work function for periodic tasks
+ * @cached_phc_time: a cached copy of the PHC time for timestamp extension
+ * @cached_phc_jiffies: jiffies when cached_phc_time was last updated
+ * @ext_ts_chan: the external timestamp channel in use
+ * @ext_ts_irq: the external timestamp IRQ in use
+ * @kworker: kwork thread for handling periodic work
+ * @perout_channels: periodic output data
+ * @info: structure defining PTP hardware capabilities
+ * @clock: pointer to registered PTP clock device
+ * @tstamp_config: hardware timestamping configuration
+ * @reset_time: kernel time after clock stop on reset
+ * @tx_hwtstamp_skipped: number of Tx time stamp requests skipped
+ * @tx_hwtstamp_timeouts: number of Tx skbs discarded with no time stamp
+ * @tx_hwtstamp_flushed: number of Tx skbs flushed due to interface closed
+ * @tx_hwtstamp_discarded: number of Tx skbs discarded due to cached PHC time
+ *                         being too old to correctly extend timestamp
+ * @late_cached_phc_updates: number of times cached PHC update is late
+ */
+struct ice_ptp {
+	struct ice_ptp_port port;
+	struct kthread_delayed_work work;
+	u64 cached_phc_time;
+	unsigned long cached_phc_jiffies;
+	u8 ext_ts_chan;
+	u8 ext_ts_irq;
+	struct kthread_worker *kworker;
+	struct ice_perout_channel perout_channels[GLTSYN_TGT_H_IDX_MAX];
+	struct ptp_clock_info info;
+	struct ptp_clock *clock;
+	struct hwtstamp_config tstamp_config;
+	u64 reset_time;
+	u32 tx_hwtstamp_skipped;
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_flushed;
+	u32 tx_hwtstamp_discarded;
+	u32 late_cached_phc_updates;
+};
+
+#define __ptp_port_to_ptp(p) \
+	container_of((p), struct ice_ptp, port)
+#define ptp_port_to_pf(p) \
+	container_of(__ptp_port_to_ptp((p)), struct ice_pf, ptp)
+
+#define __ptp_info_to_ptp(i) \
+	container_of((i), struct ice_ptp, info)
+#define ptp_info_to_pf(i) \
+	container_of(__ptp_info_to_ptp((i)), struct ice_pf, ptp)
+
+#define PFTSYN_SEM_BYTES		4
+#define PTP_SHARED_CLK_IDX_VALID	BIT(31)
+#define TS_CMD_MASK			0xF
+#define SYNC_EXEC_CMD			0x3
+#define ICE_PTP_TS_VALID		BIT(0)
+
+#define FIFO_EMPTY			BIT(2)
+#define FIFO_OK				0xFF
+#define ICE_PTP_FIFO_NUM_CHECKS		5
+/* Per-channel register definitions */
+#define GLTSYN_AUX_OUT(_chan, _idx)	(GLTSYN_AUX_OUT_0(_idx) + ((_chan) * 8))
+#define GLTSYN_AUX_IN(_chan, _idx)	(GLTSYN_AUX_IN_0(_idx) + ((_chan) * 8))
+#define GLTSYN_CLKO(_chan, _idx)	(GLTSYN_CLKO_0(_idx) + ((_chan) * 8))
+#define GLTSYN_TGT_L(_chan, _idx)	(GLTSYN_TGT_L_0(_idx) + ((_chan) * 16))
+#define GLTSYN_TGT_H(_chan, _idx)	(GLTSYN_TGT_H_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_L(_chan, _idx)	(GLTSYN_EVNT_L_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_H(_chan, _idx)	(GLTSYN_EVNT_H_0(_idx) + ((_chan) * 16))
+#define GLTSYN_EVNT_H_IDX_MAX		3
+
+/* Pin definitions for PTP PPS out */
+#define PPS_CLK_GEN_CHAN		3
+#define PPS_CLK_SRC_CHAN		2
+#define PPS_PIN_INDEX			5
+#define TIME_SYNC_PIN_INDEX		4
+#define N_EXT_TS_E810			3
+#define N_PER_OUT_E810			4
+#define N_PER_OUT_E810T			3
+#define N_PER_OUT_NO_SMA_E810T		2
+#define N_EXT_TS_NO_SMA_E810T		2
+#define ETH_GLTSYN_ENA(_i)		(0x03000348 + ((_i) * 4))
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
+struct ice_pf;
+int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);
+int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr);
+void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena);
+int ice_get_ptp_clock_index(struct ice_pf *pf);
+
+void ice_ptp_extts_event(struct ice_pf *pf);
+s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
+enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf);
+
+void
+ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
+		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb);
+void ice_ptp_reset(struct ice_pf *pf);
+void ice_ptp_prepare_for_reset(struct ice_pf *pf);
+void ice_ptp_init(struct ice_pf *pf);
+void ice_ptp_release(struct ice_pf *pf);
+void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup);
+#else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena) { }
+static inline int ice_get_ptp_clock_index(struct ice_pf *pf)
+{
+	return -1;
+}
+
+static inline void ice_ptp_extts_event(struct ice_pf *pf) { }
+static inline s8
+ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
+{
+	return -1;
+}
+
+static inline bool ice_ptp_process_ts(struct ice_pf *pf)
+{
+	return true;
+}
+static inline void
+ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
+		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { }
+static inline void ice_ptp_reset(struct ice_pf *pf) { }
+static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { }
+static inline void ice_ptp_init(struct ice_pf *pf) { }
+static inline void ice_ptp_release(struct ice_pf *pf) { }
+static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup)
+{
+}
+#endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */
+#endif /* _ICE_PTP_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
new file mode 100644
index 0000000000..4109aa3b2f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_CONSTS_H_
+#define _ICE_PTP_CONSTS_H_
+
+/* Constant definitions related to the hardware clock used for PTP 1588
+ * features and functionality.
+ */
+/* Constants defined for the PTP 1588 clock hardware. */
+
+/* struct ice_time_ref_info_e822
+ *
+ * E822 hardware can use different sources as the reference for the PTP
+ * hardware clock. Each clock has different characteristics such as a slightly
+ * different frequency, etc.
+ *
+ * This lookup table defines several constants that depend on the current time
+ * reference. See the struct ice_time_ref_info_e822 for information about the
+ * meaning of each constant.
+ */
+const struct ice_time_ref_info_e822 e822_time_ref[NUM_ICE_TIME_REF_FREQ] = {
+	/* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */
+	{
+		/* pll_freq */
+		823437500, /* 823.4375 MHz PLL */
+		/* nominal_incval */
+		0x136e44fabULL,
+		/* pps_delay */
+		11,
+	},
+
+	/* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */
+	{
+		/* pll_freq */
+		783360000, /* 783.36 MHz */
+		/* nominal_incval */
+		0x146cc2177ULL,
+		/* pps_delay */
+		12,
+	},
+
+	/* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */
+	{
+		/* pll_freq */
+		796875000, /* 796.875 MHz */
+		/* nominal_incval */
+		0x141414141ULL,
+		/* pps_delay */
+		12,
+	},
+
+	/* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */
+	{
+		/* pll_freq */
+		816000000, /* 816 MHz */
+		/* nominal_incval */
+		0x139b9b9baULL,
+		/* pps_delay */
+		12,
+	},
+
+	/* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */
+	{
+		/* pll_freq */
+		830078125, /* 830.78125 MHz */
+		/* nominal_incval */
+		0x134679aceULL,
+		/* pps_delay */
+		11,
+	},
+
+	/* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */
+	{
+		/* pll_freq */
+		783360000, /* 783.36 MHz */
+		/* nominal_incval */
+		0x146cc2177ULL,
+		/* pps_delay */
+		12,
+	},
+};
+
+const struct ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ] = {
+	/* ICE_TIME_REF_FREQ_25_000 -> 25 MHz */
+	{
+		/* refclk_pre_div */
+		1,
+		/* feedback_div */
+		197,
+		/* frac_n_div */
+		2621440,
+		/* post_pll_div */
+		6,
+	},
+
+	/* ICE_TIME_REF_FREQ_122_880 -> 122.88 MHz */
+	{
+		/* refclk_pre_div */
+		5,
+		/* feedback_div */
+		223,
+		/* frac_n_div */
+		524288,
+		/* post_pll_div */
+		7,
+	},
+
+	/* ICE_TIME_REF_FREQ_125_000 -> 125 MHz */
+	{
+		/* refclk_pre_div */
+		5,
+		/* feedback_div */
+		223,
+		/* frac_n_div */
+		524288,
+		/* post_pll_div */
+		7,
+	},
+
+	/* ICE_TIME_REF_FREQ_153_600 -> 153.6 MHz */
+	{
+		/* refclk_pre_div */
+		5,
+		/* feedback_div */
+		159,
+		/* frac_n_div */
+		1572864,
+		/* post_pll_div */
+		6,
+	},
+
+	/* ICE_TIME_REF_FREQ_156_250 -> 156.25 MHz */
+	{
+		/* refclk_pre_div */
+		5,
+		/* feedback_div */
+		159,
+		/* frac_n_div */
+		1572864,
+		/* post_pll_div */
+		6,
+	},
+
+	/* ICE_TIME_REF_FREQ_245_760 -> 245.76 MHz */
+	{
+		/* refclk_pre_div */
+		10,
+		/* feedback_div */
+		223,
+		/* frac_n_div */
+		524288,
+		/* post_pll_div */
+		7,
+	},
+};
+
+/* struct ice_vernier_info_e822
+ *
+ * E822 hardware calibrates the delay of the timestamp indication from the
+ * actual packet transmission or reception during the initialization of the
+ * PHY. To do this, the hardware mechanism uses some conversions between the
+ * various clocks within the PHY block. This table defines constants used to
+ * calculate the correct conversion ratios in the PHY registers.
+ *
+ * Many of the values relate to the PAR/PCS clock conversion registers. For
+ * these registers, a value of 0 means that the associated register is not
+ * used by this link speed, and that the register should be cleared by writing
+ * 0. Other values specify the clock frequency in Hz.
+ */
+const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD] = {
+	/* ICE_PTP_LNK_SPD_1G */
+	{
+		/* tx_par_clk */
+		31250000, /* 31.25 MHz */
+		/* rx_par_clk */
+		31250000, /* 31.25 MHz */
+		/* tx_pcs_clk */
+		125000000, /* 125 MHz */
+		/* rx_pcs_clk */
+		125000000, /* 125 MHz */
+		/* tx_desk_rsgb_par */
+		0, /* unused */
+		/* rx_desk_rsgb_par */
+		0, /* unused */
+		/* tx_desk_rsgb_pcs */
+		0, /* unused */
+		/* rx_desk_rsgb_pcs */
+		0, /* unused */
+		/* tx_fixed_delay */
+		25140,
+		/* pmd_adj_divisor */
+		10000000,
+		/* rx_fixed_delay */
+		17372,
+	},
+	/* ICE_PTP_LNK_SPD_10G */
+	{
+		/* tx_par_clk */
+		257812500, /* 257.8125 MHz */
+		/* rx_par_clk */
+		257812500, /* 257.8125 MHz */
+		/* tx_pcs_clk */
+		156250000, /* 156.25 MHz */
+		/* rx_pcs_clk */
+		156250000, /* 156.25 MHz */
+		/* tx_desk_rsgb_par */
+		0, /* unused */
+		/* rx_desk_rsgb_par */
+		0, /* unused */
+		/* tx_desk_rsgb_pcs */
+		0, /* unused */
+		/* rx_desk_rsgb_pcs */
+		0, /* unused */
+		/* tx_fixed_delay */
+		6938,
+		/* pmd_adj_divisor */
+		82500000,
+		/* rx_fixed_delay */
+		6212,
+	},
+	/* ICE_PTP_LNK_SPD_25G */
+	{
+		/* tx_par_clk */
+		644531250, /* 644.53125 MHZ */
+		/* rx_par_clk */
+		644531250, /* 644.53125 MHz */
+		/* tx_pcs_clk */
+		390625000, /* 390.625 MHz */
+		/* rx_pcs_clk */
+		390625000, /* 390.625 MHz */
+		/* tx_desk_rsgb_par */
+		0, /* unused */
+		/* rx_desk_rsgb_par */
+		0, /* unused */
+		/* tx_desk_rsgb_pcs */
+		0, /* unused */
+		/* rx_desk_rsgb_pcs */
+		0, /* unused */
+		/* tx_fixed_delay */
+		2778,
+		/* pmd_adj_divisor */
+		206250000,
+		/* rx_fixed_delay */
+		2491,
+	},
+	/* ICE_PTP_LNK_SPD_25G_RS */
+	{
+		/* tx_par_clk */
+		0, /* unused */
+		/* rx_par_clk */
+		0, /* unused */
+		/* tx_pcs_clk */
+		0, /* unused */
+		/* rx_pcs_clk */
+		0, /* unused */
+		/* tx_desk_rsgb_par */
+		161132812, /* 162.1328125 MHz Reed Solomon gearbox */
+		/* rx_desk_rsgb_par */
+		161132812, /* 162.1328125 MHz Reed Solomon gearbox */
+		/* tx_desk_rsgb_pcs */
+		97656250, /* 97.62625 MHz Reed Solomon gearbox */
+		/* rx_desk_rsgb_pcs */
+		97656250, /* 97.62625 MHz Reed Solomon gearbox */
+		/* tx_fixed_delay */
+		3928,
+		/* pmd_adj_divisor */
+		206250000,
+		/* rx_fixed_delay */
+		29535,
+	},
+	/* ICE_PTP_LNK_SPD_40G */
+	{
+		/* tx_par_clk */
+		257812500,
+		/* rx_par_clk */
+		257812500,
+		/* tx_pcs_clk */
+		156250000, /* 156.25 MHz */
+		/* rx_pcs_clk */
+		156250000, /* 156.25 MHz */
+		/* tx_desk_rsgb_par */
+		0, /* unused */
+		/* rx_desk_rsgb_par */
+		156250000, /* 156.25 MHz deskew clock */
+		/* tx_desk_rsgb_pcs */
+		0, /* unused */
+		/* rx_desk_rsgb_pcs */
+		156250000, /* 156.25 MHz deskew clock */
+		/* tx_fixed_delay */
+		5666,
+		/* pmd_adj_divisor */
+		82500000,
+		/* rx_fixed_delay */
+		4244,
+	},
+	/* ICE_PTP_LNK_SPD_50G */
+	{
+		/* tx_par_clk */
+		644531250, /* 644.53125 MHZ */
+		/* rx_par_clk */
+		644531250, /* 644.53125 MHZ */
+		/* tx_pcs_clk */
+		390625000, /* 390.625 MHz */
+		/* rx_pcs_clk */
+		390625000, /* 390.625 MHz */
+		/* tx_desk_rsgb_par */
+		0, /* unused */
+		/* rx_desk_rsgb_par */
+		195312500, /* 193.3125 MHz deskew clock */
+		/* tx_desk_rsgb_pcs */
+		0, /* unused */
+		/* rx_desk_rsgb_pcs */
+		195312500, /* 193.3125 MHz deskew clock */
+		/* tx_fixed_delay */
+		2778,
+		/* pmd_adj_divisor */
+		206250000,
+		/* rx_fixed_delay */
+		2868,
+	},
+	/* ICE_PTP_LNK_SPD_50G_RS */
+	{
+		/* tx_par_clk */
+		0, /* unused */
+		/* rx_par_clk */
+		644531250, /* 644.53125 MHz */
+		/* tx_pcs_clk */
+		0, /* unused */
+		/* rx_pcs_clk */
+		644531250, /* 644.53125 MHz */
+		/* tx_desk_rsgb_par */
+		322265625, /* 322.265625 MHz Reed Solomon gearbox */
+		/* rx_desk_rsgb_par */
+		322265625, /* 322.265625 MHz Reed Solomon gearbox */
+		/* tx_desk_rsgb_pcs */
+		644531250, /* 644.53125 MHz Reed Solomon gearbox */
+		/* rx_desk_rsgb_pcs */
+		644531250, /* 644.53125 MHz Reed Solomon gearbox */
+		/* tx_fixed_delay */
+		2095,
+		/* pmd_adj_divisor */
+		206250000,
+		/* rx_fixed_delay */
+		14524,
+	},
+	/* ICE_PTP_LNK_SPD_100G_RS */
+	{
+		/* tx_par_clk */
+		0, /* unused */
+		/* rx_par_clk */
+		644531250, /* 644.53125 MHz */
+		/* tx_pcs_clk */
+		0, /* unused */
+		/* rx_pcs_clk */
+		644531250, /* 644.53125 MHz */
+		/* tx_desk_rsgb_par */
+		644531250, /* 644.53125 MHz Reed Solomon gearbox */
+		/* rx_desk_rsgb_par */
+		644531250, /* 644.53125 MHz Reed Solomon gearbox */
+		/* tx_desk_rsgb_pcs */
+		644531250, /* 644.53125 MHz Reed Solomon gearbox */
+		/* rx_desk_rsgb_pcs */
+		644531250, /* 644.53125 MHz Reed Solomon gearbox */
+		/* tx_fixed_delay */
+		1620,
+		/* pmd_adj_divisor */
+		206250000,
+		/* rx_fixed_delay */
+		7775,
+	},
+};
+
+#endif /* _ICE_PTP_CONSTS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
new file mode 100644
index 0000000000..f818dd215c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -0,0 +1,3409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include <linux/delay.h>
+#include "ice_common.h"
+#include "ice_ptp_hw.h"
+#include "ice_ptp_consts.h"
+#include "ice_cgu_regs.h"
+
+/* Low level functions for interacting with and managing the device clock used
+ * for the Precision Time Protocol.
+ *
+ * The ice hardware represents the current time using three registers:
+ *
+ *    GLTSYN_TIME_H     GLTSYN_TIME_L     GLTSYN_TIME_R
+ *  +---------------+ +---------------+ +---------------+
+ *  |    32 bits    | |    32 bits    | |    32 bits    |
+ *  +---------------+ +---------------+ +---------------+
+ *
+ * The registers are incremented every clock tick using a 40bit increment
+ * value defined over two registers:
+ *
+ *                     GLTSYN_INCVAL_H   GLTSYN_INCVAL_L
+ *                    +---------------+ +---------------+
+ *                    |    8 bit s    | |    32 bits    |
+ *                    +---------------+ +---------------+
+ *
+ * The increment value is added to the GLSTYN_TIME_R and GLSTYN_TIME_L
+ * registers every clock source tick. Depending on the specific device
+ * configuration, the clock source frequency could be one of a number of
+ * values.
+ *
+ * For E810 devices, the increment frequency is 812.5 MHz
+ *
+ * For E822 devices the clock can be derived from different sources, and the
+ * increment has an effective frequency of one of the following:
+ * - 823.4375 MHz
+ * - 783.36 MHz
+ * - 796.875 MHz
+ * - 816 MHz
+ * - 830.078125 MHz
+ * - 783.36 MHz
+ *
+ * The hardware captures timestamps in the PHY for incoming packets, and for
+ * outgoing packets on request. To support this, the PHY maintains a timer
+ * that matches the lower 64 bits of the global source timer.
+ *
+ * In order to ensure that the PHY timers and the source timer are equivalent,
+ * shadow registers are used to prepare the desired initial values. A special
+ * sync command is issued to trigger copying from the shadow registers into
+ * the appropriate source and PHY registers simultaneously.
+ *
+ * The driver supports devices which have different PHYs with subtly different
+ * mechanisms to program and control the timers. We divide the devices into
+ * families named after the first major device, E810 and similar devices, and
+ * E822 and similar devices.
+ *
+ * - E822 based devices have additional support for fine grained Vernier
+ *   calibration which requires significant setup
+ * - The layout of timestamp data in the PHY register blocks is different
+ * - The way timer synchronization commands are issued is different.
+ *
+ * To support this, very low level functions have an e810 or e822 suffix
+ * indicating what type of device they work on. Higher level abstractions for
+ * tasks that can be done on both devices do not have the suffix and will
+ * correctly look up the appropriate low level function when running.
+ *
+ * Functions which only make sense on a single device family may not have
+ * a suitable generic implementation
+ */
+
+/**
+ * ice_get_ptp_src_clock_index - determine source clock index
+ * @hw: pointer to HW struct
+ *
+ * Determine the source clock index currently in use, based on device
+ * capabilities reported during initialization.
+ */
+u8 ice_get_ptp_src_clock_index(struct ice_hw *hw)
+{
+	return hw->func_caps.ts_func_info.tmr_index_assoc;
+}
+
+/**
+ * ice_ptp_read_src_incval - Read source timer increment value
+ * @hw: pointer to HW struct
+ *
+ * Read the increment value of the source timer and return it.
+ */
+static u64 ice_ptp_read_src_incval(struct ice_hw *hw)
+{
+	u32 lo, hi;
+	u8 tmr_idx;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+
+	lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx));
+	hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx));
+
+	return ((u64)(hi & INCVAL_HIGH_M) << 32) | lo;
+}
+
+/**
+ * ice_ptp_src_cmd - Prepare source timer for a timer command
+ * @hw: pointer to HW structure
+ * @cmd: Timer command
+ *
+ * Prepare the source timer for an upcoming timer sync command.
+ */
+static void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+	u32 cmd_val;
+	u8 tmr_idx;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+	cmd_val = tmr_idx << SEL_CPK_SRC;
+
+	switch (cmd) {
+	case INIT_TIME:
+		cmd_val |= GLTSYN_CMD_INIT_TIME;
+		break;
+	case INIT_INCVAL:
+		cmd_val |= GLTSYN_CMD_INIT_INCVAL;
+		break;
+	case ADJ_TIME:
+		cmd_val |= GLTSYN_CMD_ADJ_TIME;
+		break;
+	case ADJ_TIME_AT_TIME:
+		cmd_val |= GLTSYN_CMD_ADJ_INIT_TIME;
+		break;
+	case READ_TIME:
+		cmd_val |= GLTSYN_CMD_READ_TIME;
+		break;
+	case ICE_PTP_NOP:
+		break;
+	}
+
+	wr32(hw, GLTSYN_CMD, cmd_val);
+}
+
+/**
+ * ice_ptp_exec_tmr_cmd - Execute all prepared timer commands
+ * @hw: pointer to HW struct
+ *
+ * Write the SYNC_EXEC_CMD bit to the GLTSYN_CMD_SYNC register, and flush the
+ * write immediately. This triggers the hardware to begin executing all of the
+ * source and PHY timer commands synchronously.
+ */
+static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
+{
+	wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
+	ice_flush(hw);
+}
+
+/* E822 family functions
+ *
+ * The following functions operate on the E822 family of devices.
+ */
+
+/**
+ * ice_fill_phy_msg_e822 - Fill message data for a PHY register access
+ * @msg: the PHY message buffer to fill in
+ * @port: the port to access
+ * @offset: the register offset
+ */
+static void
+ice_fill_phy_msg_e822(struct ice_sbq_msg_input *msg, u8 port, u16 offset)
+{
+	int phy_port, phy, quadtype;
+
+	phy_port = port % ICE_PORTS_PER_PHY;
+	phy = port / ICE_PORTS_PER_PHY;
+	quadtype = (port / ICE_PORTS_PER_QUAD) % ICE_NUM_QUAD_TYPE;
+
+	if (quadtype == 0) {
+		msg->msg_addr_low = P_Q0_L(P_0_BASE + offset, phy_port);
+		msg->msg_addr_high = P_Q0_H(P_0_BASE + offset, phy_port);
+	} else {
+		msg->msg_addr_low = P_Q1_L(P_4_BASE + offset, phy_port);
+		msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port);
+	}
+
+	if (phy == 0)
+		msg->dest_dev = rmn_0;
+	else if (phy == 1)
+		msg->dest_dev = rmn_1;
+	else
+		msg->dest_dev = rmn_2;
+}
+
+/**
+ * ice_is_64b_phy_reg_e822 - Check if this is a 64bit PHY register
+ * @low_addr: the low address to check
+ * @high_addr: on return, contains the high address of the 64bit register
+ *
+ * Checks if the provided low address is one of the known 64bit PHY values
+ * represented as two 32bit registers. If it is, return the appropriate high
+ * register offset to use.
+ */
+static bool ice_is_64b_phy_reg_e822(u16 low_addr, u16 *high_addr)
+{
+	switch (low_addr) {
+	case P_REG_PAR_PCS_TX_OFFSET_L:
+		*high_addr = P_REG_PAR_PCS_TX_OFFSET_U;
+		return true;
+	case P_REG_PAR_PCS_RX_OFFSET_L:
+		*high_addr = P_REG_PAR_PCS_RX_OFFSET_U;
+		return true;
+	case P_REG_PAR_TX_TIME_L:
+		*high_addr = P_REG_PAR_TX_TIME_U;
+		return true;
+	case P_REG_PAR_RX_TIME_L:
+		*high_addr = P_REG_PAR_RX_TIME_U;
+		return true;
+	case P_REG_TOTAL_TX_OFFSET_L:
+		*high_addr = P_REG_TOTAL_TX_OFFSET_U;
+		return true;
+	case P_REG_TOTAL_RX_OFFSET_L:
+		*high_addr = P_REG_TOTAL_RX_OFFSET_U;
+		return true;
+	case P_REG_UIX66_10G_40G_L:
+		*high_addr = P_REG_UIX66_10G_40G_U;
+		return true;
+	case P_REG_UIX66_25G_100G_L:
+		*high_addr = P_REG_UIX66_25G_100G_U;
+		return true;
+	case P_REG_TX_CAPTURE_L:
+		*high_addr = P_REG_TX_CAPTURE_U;
+		return true;
+	case P_REG_RX_CAPTURE_L:
+		*high_addr = P_REG_RX_CAPTURE_U;
+		return true;
+	case P_REG_TX_TIMER_INC_PRE_L:
+		*high_addr = P_REG_TX_TIMER_INC_PRE_U;
+		return true;
+	case P_REG_RX_TIMER_INC_PRE_L:
+		*high_addr = P_REG_RX_TIMER_INC_PRE_U;
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_is_40b_phy_reg_e822 - Check if this is a 40bit PHY register
+ * @low_addr: the low address to check
+ * @high_addr: on return, contains the high address of the 40bit value
+ *
+ * Checks if the provided low address is one of the known 40bit PHY values
+ * split into two registers with the lower 8 bits in the low register and the
+ * upper 32 bits in the high register. If it is, return the appropriate high
+ * register offset to use.
+ */
+static bool ice_is_40b_phy_reg_e822(u16 low_addr, u16 *high_addr)
+{
+	switch (low_addr) {
+	case P_REG_TIMETUS_L:
+		*high_addr = P_REG_TIMETUS_U;
+		return true;
+	case P_REG_PAR_RX_TUS_L:
+		*high_addr = P_REG_PAR_RX_TUS_U;
+		return true;
+	case P_REG_PAR_TX_TUS_L:
+		*high_addr = P_REG_PAR_TX_TUS_U;
+		return true;
+	case P_REG_PCS_RX_TUS_L:
+		*high_addr = P_REG_PCS_RX_TUS_U;
+		return true;
+	case P_REG_PCS_TX_TUS_L:
+		*high_addr = P_REG_PCS_TX_TUS_U;
+		return true;
+	case P_REG_DESK_PAR_RX_TUS_L:
+		*high_addr = P_REG_DESK_PAR_RX_TUS_U;
+		return true;
+	case P_REG_DESK_PAR_TX_TUS_L:
+		*high_addr = P_REG_DESK_PAR_TX_TUS_U;
+		return true;
+	case P_REG_DESK_PCS_RX_TUS_L:
+		*high_addr = P_REG_DESK_PCS_RX_TUS_U;
+		return true;
+	case P_REG_DESK_PCS_TX_TUS_L:
+		*high_addr = P_REG_DESK_PCS_TX_TUS_U;
+		return true;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_read_phy_reg_e822 - Read a PHY register
+ * @hw: pointer to the HW struct
+ * @port: PHY port to read from
+ * @offset: PHY register offset to read
+ * @val: on return, the contents read from the PHY
+ *
+ * Read a PHY register for the given port over the device sideband queue.
+ */
+static int
+ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int err;
+
+	ice_fill_phy_msg_e822(&msg, port, offset);
+	msg.opcode = ice_sbq_msg_rd;
+
+	err = ice_sbq_rw_reg(hw, &msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+			  err);
+		return err;
+	}
+
+	*val = msg.data;
+
+	return 0;
+}
+
+/**
+ * ice_read_64b_phy_reg_e822 - Read a 64bit value from PHY registers
+ * @hw: pointer to the HW struct
+ * @port: PHY port to read from
+ * @low_addr: offset of the lower register to read from
+ * @val: on return, the contents of the 64bit value from the PHY registers
+ *
+ * Reads the two registers associated with a 64bit value and returns it in the
+ * val pointer. The offset always specifies the lower register offset to use.
+ * The high offset is looked up. This function only operates on registers
+ * known to be two parts of a 64bit value.
+ */
+static int
+ice_read_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 *val)
+{
+	u32 low, high;
+	u16 high_addr;
+	int err;
+
+	/* Only operate on registers known to be split into two 32bit
+	 * registers.
+	 */
+	if (!ice_is_64b_phy_reg_e822(low_addr, &high_addr)) {
+		ice_debug(hw, ICE_DBG_PTP, "Invalid 64b register addr 0x%08x\n",
+			  low_addr);
+		return -EINVAL;
+	}
+
+	err = ice_read_phy_reg_e822(hw, port, low_addr, &low);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read from low register 0x%08x\n, err %d",
+			  low_addr, err);
+		return err;
+	}
+
+	err = ice_read_phy_reg_e822(hw, port, high_addr, &high);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read from high register 0x%08x\n, err %d",
+			  high_addr, err);
+		return err;
+	}
+
+	*val = (u64)high << 32 | low;
+
+	return 0;
+}
+
+/**
+ * ice_write_phy_reg_e822 - Write a PHY register
+ * @hw: pointer to the HW struct
+ * @port: PHY port to write to
+ * @offset: PHY register offset to write
+ * @val: The value to write to the register
+ *
+ * Write a PHY register for the given port over the device sideband queue.
+ */
+static int
+ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int err;
+
+	ice_fill_phy_msg_e822(&msg, port, offset);
+	msg.opcode = ice_sbq_msg_wr;
+	msg.data = val;
+
+	err = ice_sbq_rw_reg(hw, &msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_write_40b_phy_reg_e822 - Write a 40b value to the PHY
+ * @hw: pointer to the HW struct
+ * @port: port to write to
+ * @low_addr: offset of the low register
+ * @val: 40b value to write
+ *
+ * Write the provided 40b value to the two associated registers by splitting
+ * it up into two chunks, the lower 8 bits and the upper 32 bits.
+ */
+static int
+ice_write_40b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val)
+{
+	u32 low, high;
+	u16 high_addr;
+	int err;
+
+	/* Only operate on registers known to be split into a lower 8 bit
+	 * register and an upper 32 bit register.
+	 */
+	if (!ice_is_40b_phy_reg_e822(low_addr, &high_addr)) {
+		ice_debug(hw, ICE_DBG_PTP, "Invalid 40b register addr 0x%08x\n",
+			  low_addr);
+		return -EINVAL;
+	}
+
+	low = (u32)(val & P_REG_40B_LOW_M);
+	high = (u32)(val >> P_REG_40B_HIGH_S);
+
+	err = ice_write_phy_reg_e822(hw, port, low_addr, low);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write to low register 0x%08x\n, err %d",
+			  low_addr, err);
+		return err;
+	}
+
+	err = ice_write_phy_reg_e822(hw, port, high_addr, high);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write to high register 0x%08x\n, err %d",
+			  high_addr, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_write_64b_phy_reg_e822 - Write a 64bit value to PHY registers
+ * @hw: pointer to the HW struct
+ * @port: PHY port to read from
+ * @low_addr: offset of the lower register to read from
+ * @val: the contents of the 64bit value to write to PHY
+ *
+ * Write the 64bit value to the two associated 32bit PHY registers. The offset
+ * is always specified as the lower register, and the high address is looked
+ * up. This function only operates on registers known to be two parts of
+ * a 64bit value.
+ */
+static int
+ice_write_64b_phy_reg_e822(struct ice_hw *hw, u8 port, u16 low_addr, u64 val)
+{
+	u32 low, high;
+	u16 high_addr;
+	int err;
+
+	/* Only operate on registers known to be split into two 32bit
+	 * registers.
+	 */
+	if (!ice_is_64b_phy_reg_e822(low_addr, &high_addr)) {
+		ice_debug(hw, ICE_DBG_PTP, "Invalid 64b register addr 0x%08x\n",
+			  low_addr);
+		return -EINVAL;
+	}
+
+	low = lower_32_bits(val);
+	high = upper_32_bits(val);
+
+	err = ice_write_phy_reg_e822(hw, port, low_addr, low);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write to low register 0x%08x\n, err %d",
+			  low_addr, err);
+		return err;
+	}
+
+	err = ice_write_phy_reg_e822(hw, port, high_addr, high);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write to high register 0x%08x\n, err %d",
+			  high_addr, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fill_quad_msg_e822 - Fill message data for quad register access
+ * @msg: the PHY message buffer to fill in
+ * @quad: the quad to access
+ * @offset: the register offset
+ *
+ * Fill a message buffer for accessing a register in a quad shared between
+ * multiple PHYs.
+ */
+static void
+ice_fill_quad_msg_e822(struct ice_sbq_msg_input *msg, u8 quad, u16 offset)
+{
+	u32 addr;
+
+	msg->dest_dev = rmn_0;
+
+	if ((quad % ICE_NUM_QUAD_TYPE) == 0)
+		addr = Q_0_BASE + offset;
+	else
+		addr = Q_1_BASE + offset;
+
+	msg->msg_addr_low = lower_16_bits(addr);
+	msg->msg_addr_high = upper_16_bits(addr);
+}
+
+/**
+ * ice_read_quad_reg_e822 - Read a PHY quad register
+ * @hw: pointer to the HW struct
+ * @quad: quad to read from
+ * @offset: quad register offset to read
+ * @val: on return, the contents read from the quad
+ *
+ * Read a quad register over the device sideband queue. Quad registers are
+ * shared between multiple PHYs.
+ */
+int
+ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int err;
+
+	if (quad >= ICE_MAX_QUAD)
+		return -EINVAL;
+
+	ice_fill_quad_msg_e822(&msg, quad, offset);
+	msg.opcode = ice_sbq_msg_rd;
+
+	err = ice_sbq_rw_reg(hw, &msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+			  err);
+		return err;
+	}
+
+	*val = msg.data;
+
+	return 0;
+}
+
+/**
+ * ice_write_quad_reg_e822 - Write a PHY quad register
+ * @hw: pointer to the HW struct
+ * @quad: quad to write to
+ * @offset: quad register offset to write
+ * @val: The value to write to the register
+ *
+ * Write a quad register over the device sideband queue. Quad registers are
+ * shared between multiple PHYs.
+ */
+int
+ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int err;
+
+	if (quad >= ICE_MAX_QUAD)
+		return -EINVAL;
+
+	ice_fill_quad_msg_e822(&msg, quad, offset);
+	msg.opcode = ice_sbq_msg_wr;
+	msg.data = val;
+
+	err = ice_sbq_rw_reg(hw, &msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_read_phy_tstamp_e822 - Read a PHY timestamp out of the quad block
+ * @hw: pointer to the HW struct
+ * @quad: the quad to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the two associated registers in the
+ * quad memory block that is shared between the internal PHYs of the E822
+ * family of devices.
+ */
+static int
+ice_read_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx, u64 *tstamp)
+{
+	u16 lo_addr, hi_addr;
+	u32 lo, hi;
+	int err;
+
+	lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx);
+	hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx);
+
+	err = ice_read_quad_reg_e822(hw, quad, lo_addr, &lo);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = ice_read_quad_reg_e822(hw, quad, hi_addr, &hi);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	/* For E822 based internal PHYs, the timestamp is reported with the
+	 * lower 8 bits in the low register, and the upper 32 bits in the high
+	 * register.
+	 */
+	*tstamp = ((u64)hi) << TS_PHY_HIGH_S | ((u64)lo & TS_PHY_LOW_M);
+
+	return 0;
+}
+
+/**
+ * ice_clear_phy_tstamp_e822 - Clear a timestamp from the quad block
+ * @hw: pointer to the HW struct
+ * @quad: the quad to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the PHY quad block that is
+ * shared between the internal PHYs on the E822 devices.
+ */
+static int
+ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx)
+{
+	u16 lo_addr, hi_addr;
+	int err;
+
+	lo_addr = (u16)TS_L(Q_REG_TX_MEMORY_BANK_START, idx);
+	hi_addr = (u16)TS_H(Q_REG_TX_MEMORY_BANK_START, idx);
+
+	err = ice_write_quad_reg_e822(hw, quad, lo_addr, 0);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = ice_write_quad_reg_e822(hw, quad, hi_addr, 0);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_reset_ts_memory_quad_e822 - Clear all timestamps from the quad block
+ * @hw: pointer to the HW struct
+ * @quad: the quad to read from
+ *
+ * Clear all timestamps from the PHY quad block that is shared between the
+ * internal PHYs on the E822 devices.
+ */
+void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad)
+{
+	ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M);
+	ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M);
+}
+
+/**
+ * ice_ptp_reset_ts_memory_e822 - Clear all timestamps from all quad blocks
+ * @hw: pointer to the HW struct
+ */
+static void ice_ptp_reset_ts_memory_e822(struct ice_hw *hw)
+{
+	unsigned int quad;
+
+	for (quad = 0; quad < ICE_MAX_QUAD; quad++)
+		ice_ptp_reset_ts_memory_quad_e822(hw, quad);
+}
+
+/**
+ * ice_read_cgu_reg_e822 - Read a CGU register
+ * @hw: pointer to the HW struct
+ * @addr: Register address to read
+ * @val: storage for register value read
+ *
+ * Read the contents of a register of the Clock Generation Unit. Only
+ * applicable to E822 devices.
+ */
+static int
+ice_read_cgu_reg_e822(struct ice_hw *hw, u32 addr, u32 *val)
+{
+	struct ice_sbq_msg_input cgu_msg;
+	int err;
+
+	cgu_msg.opcode = ice_sbq_msg_rd;
+	cgu_msg.dest_dev = cgu;
+	cgu_msg.msg_addr_low = addr;
+	cgu_msg.msg_addr_high = 0x0;
+
+	err = ice_sbq_rw_reg(hw, &cgu_msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read CGU register 0x%04x, err %d\n",
+			  addr, err);
+		return err;
+	}
+
+	*val = cgu_msg.data;
+
+	return err;
+}
+
+/**
+ * ice_write_cgu_reg_e822 - Write a CGU register
+ * @hw: pointer to the HW struct
+ * @addr: Register address to write
+ * @val: value to write into the register
+ *
+ * Write the specified value to a register of the Clock Generation Unit. Only
+ * applicable to E822 devices.
+ */
+static int
+ice_write_cgu_reg_e822(struct ice_hw *hw, u32 addr, u32 val)
+{
+	struct ice_sbq_msg_input cgu_msg;
+	int err;
+
+	cgu_msg.opcode = ice_sbq_msg_wr;
+	cgu_msg.dest_dev = cgu;
+	cgu_msg.msg_addr_low = addr;
+	cgu_msg.msg_addr_high = 0x0;
+	cgu_msg.data = val;
+
+	err = ice_sbq_rw_reg(hw, &cgu_msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write CGU register 0x%04x, err %d\n",
+			  addr, err);
+		return err;
+	}
+
+	return err;
+}
+
+/**
+ * ice_clk_freq_str - Convert time_ref_freq to string
+ * @clk_freq: Clock frequency
+ *
+ * Convert the specified TIME_REF clock frequency to a string.
+ */
+static const char *ice_clk_freq_str(u8 clk_freq)
+{
+	switch ((enum ice_time_ref_freq)clk_freq) {
+	case ICE_TIME_REF_FREQ_25_000:
+		return "25 MHz";
+	case ICE_TIME_REF_FREQ_122_880:
+		return "122.88 MHz";
+	case ICE_TIME_REF_FREQ_125_000:
+		return "125 MHz";
+	case ICE_TIME_REF_FREQ_153_600:
+		return "153.6 MHz";
+	case ICE_TIME_REF_FREQ_156_250:
+		return "156.25 MHz";
+	case ICE_TIME_REF_FREQ_245_760:
+		return "245.76 MHz";
+	default:
+		return "Unknown";
+	}
+}
+
+/**
+ * ice_clk_src_str - Convert time_ref_src to string
+ * @clk_src: Clock source
+ *
+ * Convert the specified clock source to its string name.
+ */
+static const char *ice_clk_src_str(u8 clk_src)
+{
+	switch ((enum ice_clk_src)clk_src) {
+	case ICE_CLK_SRC_TCX0:
+		return "TCX0";
+	case ICE_CLK_SRC_TIME_REF:
+		return "TIME_REF";
+	default:
+		return "Unknown";
+	}
+}
+
+/**
+ * ice_cfg_cgu_pll_e822 - Configure the Clock Generation Unit
+ * @hw: pointer to the HW struct
+ * @clk_freq: Clock frequency to program
+ * @clk_src: Clock source to select (TIME_REF, or TCX0)
+ *
+ * Configure the Clock Generation Unit with the desired clock frequency and
+ * time reference, enabling the PLL which drives the PTP hardware clock.
+ */
+static int
+ice_cfg_cgu_pll_e822(struct ice_hw *hw, enum ice_time_ref_freq clk_freq,
+		     enum ice_clk_src clk_src)
+{
+	union tspll_ro_bwm_lf bwm_lf;
+	union nac_cgu_dword19 dw19;
+	union nac_cgu_dword22 dw22;
+	union nac_cgu_dword24 dw24;
+	union nac_cgu_dword9 dw9;
+	int err;
+
+	if (clk_freq >= NUM_ICE_TIME_REF_FREQ) {
+		dev_warn(ice_hw_to_dev(hw), "Invalid TIME_REF frequency %u\n",
+			 clk_freq);
+		return -EINVAL;
+	}
+
+	if (clk_src >= NUM_ICE_CLK_SRC) {
+		dev_warn(ice_hw_to_dev(hw), "Invalid clock source %u\n",
+			 clk_src);
+		return -EINVAL;
+	}
+
+	if (clk_src == ICE_CLK_SRC_TCX0 &&
+	    clk_freq != ICE_TIME_REF_FREQ_25_000) {
+		dev_warn(ice_hw_to_dev(hw),
+			 "TCX0 only supports 25 MHz frequency\n");
+		return -EINVAL;
+	}
+
+	err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD9, &dw9.val);
+	if (err)
+		return err;
+
+	err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD24, &dw24.val);
+	if (err)
+		return err;
+
+	err = ice_read_cgu_reg_e822(hw, TSPLL_RO_BWM_LF, &bwm_lf.val);
+	if (err)
+		return err;
+
+	/* Log the current clock configuration */
+	ice_debug(hw, ICE_DBG_PTP, "Current CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n",
+		  dw24.field.ts_pll_enable ? "enabled" : "disabled",
+		  ice_clk_src_str(dw24.field.time_ref_sel),
+		  ice_clk_freq_str(dw9.field.time_ref_freq_sel),
+		  bwm_lf.field.plllock_true_lock_cri ? "locked" : "unlocked");
+
+	/* Disable the PLL before changing the clock source or frequency */
+	if (dw24.field.ts_pll_enable) {
+		dw24.field.ts_pll_enable = 0;
+
+		err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val);
+		if (err)
+			return err;
+	}
+
+	/* Set the frequency */
+	dw9.field.time_ref_freq_sel = clk_freq;
+	err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD9, dw9.val);
+	if (err)
+		return err;
+
+	/* Configure the TS PLL feedback divisor */
+	err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD19, &dw19.val);
+	if (err)
+		return err;
+
+	dw19.field.tspll_fbdiv_intgr = e822_cgu_params[clk_freq].feedback_div;
+	dw19.field.tspll_ndivratio = 1;
+
+	err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD19, dw19.val);
+	if (err)
+		return err;
+
+	/* Configure the TS PLL post divisor */
+	err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD22, &dw22.val);
+	if (err)
+		return err;
+
+	dw22.field.time1588clk_div = e822_cgu_params[clk_freq].post_pll_div;
+	dw22.field.time1588clk_sel_div2 = 0;
+
+	err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD22, dw22.val);
+	if (err)
+		return err;
+
+	/* Configure the TS PLL pre divisor and clock source */
+	err = ice_read_cgu_reg_e822(hw, NAC_CGU_DWORD24, &dw24.val);
+	if (err)
+		return err;
+
+	dw24.field.ref1588_ck_div = e822_cgu_params[clk_freq].refclk_pre_div;
+	dw24.field.tspll_fbdiv_frac = e822_cgu_params[clk_freq].frac_n_div;
+	dw24.field.time_ref_sel = clk_src;
+
+	err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val);
+	if (err)
+		return err;
+
+	/* Finally, enable the PLL */
+	dw24.field.ts_pll_enable = 1;
+
+	err = ice_write_cgu_reg_e822(hw, NAC_CGU_DWORD24, dw24.val);
+	if (err)
+		return err;
+
+	/* Wait to verify if the PLL locks */
+	usleep_range(1000, 5000);
+
+	err = ice_read_cgu_reg_e822(hw, TSPLL_RO_BWM_LF, &bwm_lf.val);
+	if (err)
+		return err;
+
+	if (!bwm_lf.field.plllock_true_lock_cri) {
+		dev_warn(ice_hw_to_dev(hw), "CGU PLL failed to lock\n");
+		return -EBUSY;
+	}
+
+	/* Log the current clock configuration */
+	ice_debug(hw, ICE_DBG_PTP, "New CGU configuration -- %s, clk_src %s, clk_freq %s, PLL %s\n",
+		  dw24.field.ts_pll_enable ? "enabled" : "disabled",
+		  ice_clk_src_str(dw24.field.time_ref_sel),
+		  ice_clk_freq_str(dw9.field.time_ref_freq_sel),
+		  bwm_lf.field.plllock_true_lock_cri ? "locked" : "unlocked");
+
+	return 0;
+}
+
+/**
+ * ice_init_cgu_e822 - Initialize CGU with settings from firmware
+ * @hw: pointer to the HW structure
+ *
+ * Initialize the Clock Generation Unit of the E822 device.
+ */
+static int ice_init_cgu_e822(struct ice_hw *hw)
+{
+	struct ice_ts_func_info *ts_info = &hw->func_caps.ts_func_info;
+	union tspll_cntr_bist_settings cntr_bist;
+	int err;
+
+	err = ice_read_cgu_reg_e822(hw, TSPLL_CNTR_BIST_SETTINGS,
+				    &cntr_bist.val);
+	if (err)
+		return err;
+
+	/* Disable sticky lock detection so lock err reported is accurate */
+	cntr_bist.field.i_plllock_sel_0 = 0;
+	cntr_bist.field.i_plllock_sel_1 = 0;
+
+	err = ice_write_cgu_reg_e822(hw, TSPLL_CNTR_BIST_SETTINGS,
+				     cntr_bist.val);
+	if (err)
+		return err;
+
+	/* Configure the CGU PLL using the parameters from the function
+	 * capabilities.
+	 */
+	err = ice_cfg_cgu_pll_e822(hw, ts_info->time_ref,
+				   (enum ice_clk_src)ts_info->clk_src);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * ice_ptp_set_vernier_wl - Set the window length for vernier calibration
+ * @hw: pointer to the HW struct
+ *
+ * Set the window length used for the vernier port calibration process.
+ */
+static int ice_ptp_set_vernier_wl(struct ice_hw *hw)
+{
+	u8 port;
+
+	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+		int err;
+
+		err = ice_write_phy_reg_e822(hw, port, P_REG_WL,
+					     PTP_VERNIER_WL);
+		if (err) {
+			ice_debug(hw, ICE_DBG_PTP, "Failed to set vernier window length for port %u, err %d\n",
+				  port, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_init_phc_e822 - Perform E822 specific PHC initialization
+ * @hw: pointer to HW struct
+ *
+ * Perform PHC initialization steps specific to E822 devices.
+ */
+static int ice_ptp_init_phc_e822(struct ice_hw *hw)
+{
+	int err;
+	u32 regval;
+
+	/* Enable reading switch and PHY registers over the sideband queue */
+#define PF_SB_REM_DEV_CTL_SWITCH_READ BIT(1)
+#define PF_SB_REM_DEV_CTL_PHY0 BIT(2)
+	regval = rd32(hw, PF_SB_REM_DEV_CTL);
+	regval |= (PF_SB_REM_DEV_CTL_SWITCH_READ |
+		   PF_SB_REM_DEV_CTL_PHY0);
+	wr32(hw, PF_SB_REM_DEV_CTL, regval);
+
+	/* Initialize the Clock Generation Unit */
+	err = ice_init_cgu_e822(hw);
+	if (err)
+		return err;
+
+	/* Set window length for all the ports */
+	return ice_ptp_set_vernier_wl(hw);
+}
+
+/**
+ * ice_ptp_prep_phy_time_e822 - Prepare PHY port with initial time
+ * @hw: pointer to the HW struct
+ * @time: Time to initialize the PHY port clocks to
+ *
+ * Program the PHY port registers with a new initial time value. The port
+ * clock will be initialized once the driver issues an INIT_TIME sync
+ * command. The time value is the upper 32 bits of the PHY timer, usually in
+ * units of nominal nanoseconds.
+ */
+static int
+ice_ptp_prep_phy_time_e822(struct ice_hw *hw, u32 time)
+{
+	u64 phy_time;
+	u8 port;
+	int err;
+
+	/* The time represents the upper 32 bits of the PHY timer, so we need
+	 * to shift to account for this when programming.
+	 */
+	phy_time = (u64)time << 32;
+
+	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+		/* Tx case */
+		err = ice_write_64b_phy_reg_e822(hw, port,
+						 P_REG_TX_TIMER_INC_PRE_L,
+						 phy_time);
+		if (err)
+			goto exit_err;
+
+		/* Rx case */
+		err = ice_write_64b_phy_reg_e822(hw, port,
+						 P_REG_RX_TIMER_INC_PRE_L,
+						 phy_time);
+		if (err)
+			goto exit_err;
+	}
+
+	return 0;
+
+exit_err:
+	ice_debug(hw, ICE_DBG_PTP, "Failed to write init time for port %u, err %d\n",
+		  port, err);
+
+	return err;
+}
+
+/**
+ * ice_ptp_prep_port_adj_e822 - Prepare a single port for time adjust
+ * @hw: pointer to HW struct
+ * @port: Port number to be programmed
+ * @time: time in cycles to adjust the port Tx and Rx clocks
+ *
+ * Program the port for an atomic adjustment by writing the Tx and Rx timer
+ * registers. The atomic adjustment won't be completed until the driver issues
+ * an ADJ_TIME command.
+ *
+ * Note that time is not in units of nanoseconds. It is in clock time
+ * including the lower sub-nanosecond portion of the port timer.
+ *
+ * Negative adjustments are supported using 2s complement arithmetic.
+ */
+static int
+ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time)
+{
+	u32 l_time, u_time;
+	int err;
+
+	l_time = lower_32_bits(time);
+	u_time = upper_32_bits(time);
+
+	/* Tx case */
+	err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TIMER_INC_PRE_L,
+				     l_time);
+	if (err)
+		goto exit_err;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TIMER_INC_PRE_U,
+				     u_time);
+	if (err)
+		goto exit_err;
+
+	/* Rx case */
+	err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TIMER_INC_PRE_L,
+				     l_time);
+	if (err)
+		goto exit_err;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TIMER_INC_PRE_U,
+				     u_time);
+	if (err)
+		goto exit_err;
+
+	return 0;
+
+exit_err:
+	ice_debug(hw, ICE_DBG_PTP, "Failed to write time adjust for port %u, err %d\n",
+		  port, err);
+	return err;
+}
+
+/**
+ * ice_ptp_prep_phy_adj_e822 - Prep PHY ports for a time adjustment
+ * @hw: pointer to HW struct
+ * @adj: adjustment in nanoseconds
+ *
+ * Prepare the PHY ports for an atomic time adjustment by programming the PHY
+ * Tx and Rx port registers. The actual adjustment is completed by issuing an
+ * ADJ_TIME or ADJ_TIME_AT_TIME sync command.
+ */
+static int
+ice_ptp_prep_phy_adj_e822(struct ice_hw *hw, s32 adj)
+{
+	s64 cycles;
+	u8 port;
+
+	/* The port clock supports adjustment of the sub-nanosecond portion of
+	 * the clock. We shift the provided adjustment in nanoseconds to
+	 * calculate the appropriate adjustment to program into the PHY ports.
+	 */
+	if (adj > 0)
+		cycles = (s64)adj << 32;
+	else
+		cycles = -(((s64)-adj) << 32);
+
+	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+		int err;
+
+		err = ice_ptp_prep_port_adj_e822(hw, port, cycles);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_incval_e822 - Prepare PHY ports for time adjustment
+ * @hw: pointer to HW struct
+ * @incval: new increment value to prepare
+ *
+ * Prepare each of the PHY ports for a new increment value by programming the
+ * port's TIMETUS registers. The new increment value will be updated after
+ * issuing an INIT_INCVAL command.
+ */
+static int
+ice_ptp_prep_phy_incval_e822(struct ice_hw *hw, u64 incval)
+{
+	int err;
+	u8 port;
+
+	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+		err = ice_write_40b_phy_reg_e822(hw, port, P_REG_TIMETUS_L,
+						 incval);
+		if (err)
+			goto exit_err;
+	}
+
+	return 0;
+
+exit_err:
+	ice_debug(hw, ICE_DBG_PTP, "Failed to write incval for port %u, err %d\n",
+		  port, err);
+
+	return err;
+}
+
+/**
+ * ice_ptp_read_port_capture - Read a port's local time capture
+ * @hw: pointer to HW struct
+ * @port: Port number to read
+ * @tx_ts: on return, the Tx port time capture
+ * @rx_ts: on return, the Rx port time capture
+ *
+ * Read the port's Tx and Rx local time capture values.
+ *
+ * Note this has no equivalent for the E810 devices.
+ */
+static int
+ice_ptp_read_port_capture(struct ice_hw *hw, u8 port, u64 *tx_ts, u64 *rx_ts)
+{
+	int err;
+
+	/* Tx case */
+	err = ice_read_64b_phy_reg_e822(hw, port, P_REG_TX_CAPTURE_L, tx_ts);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read REG_TX_CAPTURE, err %d\n",
+			  err);
+		return err;
+	}
+
+	ice_debug(hw, ICE_DBG_PTP, "tx_init = 0x%016llx\n",
+		  (unsigned long long)*tx_ts);
+
+	/* Rx case */
+	err = ice_read_64b_phy_reg_e822(hw, port, P_REG_RX_CAPTURE_L, rx_ts);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_CAPTURE, err %d\n",
+			  err);
+		return err;
+	}
+
+	ice_debug(hw, ICE_DBG_PTP, "rx_init = 0x%016llx\n",
+		  (unsigned long long)*rx_ts);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_write_port_cmd_e822 - Prepare a single PHY port for a timer command
+ * @hw: pointer to HW struct
+ * @port: Port to which cmd has to be sent
+ * @cmd: Command to be sent to the port
+ *
+ * Prepare the requested port for an upcoming timer sync command.
+ *
+ * Do not use this function directly. If you want to configure exactly one
+ * port, use ice_ptp_one_port_cmd() instead.
+ */
+static int
+ice_ptp_write_port_cmd_e822(struct ice_hw *hw, u8 port, enum ice_ptp_tmr_cmd cmd)
+{
+	u32 cmd_val, val;
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+	cmd_val = tmr_idx << SEL_PHY_SRC;
+	switch (cmd) {
+	case INIT_TIME:
+		cmd_val |= PHY_CMD_INIT_TIME;
+		break;
+	case INIT_INCVAL:
+		cmd_val |= PHY_CMD_INIT_INCVAL;
+		break;
+	case ADJ_TIME:
+		cmd_val |= PHY_CMD_ADJ_TIME;
+		break;
+	case READ_TIME:
+		cmd_val |= PHY_CMD_READ_TIME;
+		break;
+	case ADJ_TIME_AT_TIME:
+		cmd_val |= PHY_CMD_ADJ_TIME_AT_TIME;
+		break;
+	case ICE_PTP_NOP:
+		break;
+	}
+
+	/* Tx case */
+	/* Read, modify, write */
+	err = ice_read_phy_reg_e822(hw, port, P_REG_TX_TMR_CMD, &val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_TMR_CMD, err %d\n",
+			  err);
+		return err;
+	}
+
+	/* Modify necessary bits only and perform write */
+	val &= ~TS_CMD_MASK;
+	val |= cmd_val;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_TX_TMR_CMD, val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write back TX_TMR_CMD, err %d\n",
+			  err);
+		return err;
+	}
+
+	/* Rx case */
+	/* Read, modify, write */
+	err = ice_read_phy_reg_e822(hw, port, P_REG_RX_TMR_CMD, &val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_TMR_CMD, err %d\n",
+			  err);
+		return err;
+	}
+
+	/* Modify necessary bits only and perform write */
+	val &= ~TS_CMD_MASK;
+	val |= cmd_val;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_RX_TMR_CMD, val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write back RX_TMR_CMD, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_one_port_cmd - Prepare one port for a timer command
+ * @hw: pointer to the HW struct
+ * @configured_port: the port to configure with configured_cmd
+ * @configured_cmd: timer command to prepare on the configured_port
+ *
+ * Prepare the configured_port for the configured_cmd, and prepare all other
+ * ports for ICE_PTP_NOP. This causes the configured_port to execute the
+ * desired command while all other ports perform no operation.
+ */
+static int
+ice_ptp_one_port_cmd(struct ice_hw *hw, u8 configured_port,
+		     enum ice_ptp_tmr_cmd configured_cmd)
+{
+	u8 port;
+
+	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+		enum ice_ptp_tmr_cmd cmd;
+		int err;
+
+		if (port == configured_port)
+			cmd = configured_cmd;
+		else
+			cmd = ICE_PTP_NOP;
+
+		err = ice_ptp_write_port_cmd_e822(hw, port, cmd);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_port_cmd_e822 - Prepare all ports for a timer command
+ * @hw: pointer to the HW struct
+ * @cmd: timer command to prepare
+ *
+ * Prepare all ports connected to this device for an upcoming timer sync
+ * command.
+ */
+static int
+ice_ptp_port_cmd_e822(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+	u8 port;
+
+	for (port = 0; port < ICE_NUM_EXTERNAL_PORTS; port++) {
+		int err;
+
+		err = ice_ptp_write_port_cmd_e822(hw, port, cmd);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* E822 Vernier calibration functions
+ *
+ * The following functions are used as part of the vernier calibration of
+ * a port. This calibration increases the precision of the timestamps on the
+ * port.
+ */
+
+/**
+ * ice_phy_get_speed_and_fec_e822 - Get link speed and FEC based on serdes mode
+ * @hw: pointer to HW struct
+ * @port: the port to read from
+ * @link_out: if non-NULL, holds link speed on success
+ * @fec_out: if non-NULL, holds FEC algorithm on success
+ *
+ * Read the serdes data for the PHY port and extract the link speed and FEC
+ * algorithm.
+ */
+static int
+ice_phy_get_speed_and_fec_e822(struct ice_hw *hw, u8 port,
+			       enum ice_ptp_link_spd *link_out,
+			       enum ice_ptp_fec_mode *fec_out)
+{
+	enum ice_ptp_link_spd link;
+	enum ice_ptp_fec_mode fec;
+	u32 serdes;
+	int err;
+
+	err = ice_read_phy_reg_e822(hw, port, P_REG_LINK_SPEED, &serdes);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read serdes info\n");
+		return err;
+	}
+
+	/* Determine the FEC algorithm */
+	fec = (enum ice_ptp_fec_mode)P_REG_LINK_SPEED_FEC_MODE(serdes);
+
+	serdes &= P_REG_LINK_SPEED_SERDES_M;
+
+	/* Determine the link speed */
+	if (fec == ICE_PTP_FEC_MODE_RS_FEC) {
+		switch (serdes) {
+		case ICE_PTP_SERDES_25G:
+			link = ICE_PTP_LNK_SPD_25G_RS;
+			break;
+		case ICE_PTP_SERDES_50G:
+			link = ICE_PTP_LNK_SPD_50G_RS;
+			break;
+		case ICE_PTP_SERDES_100G:
+			link = ICE_PTP_LNK_SPD_100G_RS;
+			break;
+		default:
+			return -EIO;
+		}
+	} else {
+		switch (serdes) {
+		case ICE_PTP_SERDES_1G:
+			link = ICE_PTP_LNK_SPD_1G;
+			break;
+		case ICE_PTP_SERDES_10G:
+			link = ICE_PTP_LNK_SPD_10G;
+			break;
+		case ICE_PTP_SERDES_25G:
+			link = ICE_PTP_LNK_SPD_25G;
+			break;
+		case ICE_PTP_SERDES_40G:
+			link = ICE_PTP_LNK_SPD_40G;
+			break;
+		case ICE_PTP_SERDES_50G:
+			link = ICE_PTP_LNK_SPD_50G;
+			break;
+		default:
+			return -EIO;
+		}
+	}
+
+	if (link_out)
+		*link_out = link;
+	if (fec_out)
+		*fec_out = fec;
+
+	return 0;
+}
+
+/**
+ * ice_phy_cfg_lane_e822 - Configure PHY quad for single/multi-lane timestamp
+ * @hw: pointer to HW struct
+ * @port: to configure the quad for
+ */
+static void ice_phy_cfg_lane_e822(struct ice_hw *hw, u8 port)
+{
+	enum ice_ptp_link_spd link_spd;
+	int err;
+	u32 val;
+	u8 quad;
+
+	err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, NULL);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to get PHY link speed, err %d\n",
+			  err);
+		return;
+	}
+
+	quad = port / ICE_PORTS_PER_QUAD;
+
+	err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, &val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEM_GLB_CFG, err %d\n",
+			  err);
+		return;
+	}
+
+	if (link_spd >= ICE_PTP_LNK_SPD_40G)
+		val &= ~Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M;
+	else
+		val |= Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M;
+
+	err = ice_write_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write back TX_MEM_GBL_CFG, err %d\n",
+			  err);
+		return;
+	}
+}
+
+/**
+ * ice_phy_cfg_uix_e822 - Configure Serdes UI to TU conversion for E822
+ * @hw: pointer to the HW structure
+ * @port: the port to configure
+ *
+ * Program the conversion ration of Serdes clock "unit intervals" (UIs) to PHC
+ * hardware clock time units (TUs). That is, determine the number of TUs per
+ * serdes unit interval, and program the UIX registers with this conversion.
+ *
+ * This conversion is used as part of the calibration process when determining
+ * the additional error of a timestamp vs the real time of transmission or
+ * receipt of the packet.
+ *
+ * Hardware uses the number of TUs per 66 UIs, written to the UIX registers
+ * for the two main serdes clock rates, 10G/40G and 25G/100G serdes clocks.
+ *
+ * To calculate the conversion ratio, we use the following facts:
+ *
+ * a) the clock frequency in Hz (cycles per second)
+ * b) the number of TUs per cycle (the increment value of the clock)
+ * c) 1 second per 1 billion nanoseconds
+ * d) the duration of 66 UIs in nanoseconds
+ *
+ * Given these facts, we can use the following table to work out what ratios
+ * to multiply in order to get the number of TUs per 66 UIs:
+ *
+ * cycles |   1 second   | incval (TUs) | nanoseconds
+ * -------+--------------+--------------+-------------
+ * second | 1 billion ns |    cycle     |   66 UIs
+ *
+ * To perform the multiplication using integers without too much loss of
+ * precision, we can take use the following equation:
+ *
+ * (freq * incval * 6600 LINE_UI ) / ( 100 * 1 billion)
+ *
+ * We scale up to using 6600 UI instead of 66 in order to avoid fractional
+ * nanosecond UIs (66 UI at 10G/40G is 6.4 ns)
+ *
+ * The increment value has a maximum expected range of about 34 bits, while
+ * the frequency value is about 29 bits. Multiplying these values shouldn't
+ * overflow the 64 bits. However, we must then further multiply them again by
+ * the Serdes unit interval duration. To avoid overflow here, we split the
+ * overall divide by 1e11 into a divide by 256 (shift down by 8 bits) and
+ * a divide by 390,625,000. This does lose some precision, but avoids
+ * miscalculation due to arithmetic overflow.
+ */
+static int ice_phy_cfg_uix_e822(struct ice_hw *hw, u8 port)
+{
+	u64 cur_freq, clk_incval, tu_per_sec, uix;
+	int err;
+
+	cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+	clk_incval = ice_ptp_read_src_incval(hw);
+
+	/* Calculate TUs per second divided by 256 */
+	tu_per_sec = (cur_freq * clk_incval) >> 8;
+
+#define LINE_UI_10G_40G 640 /* 6600 UIs is 640 nanoseconds at 10Gb/40Gb */
+#define LINE_UI_25G_100G 256 /* 6600 UIs is 256 nanoseconds at 25Gb/100Gb */
+
+	/* Program the 10Gb/40Gb conversion ratio */
+	uix = div_u64(tu_per_sec * LINE_UI_10G_40G, 390625000);
+
+	err = ice_write_64b_phy_reg_e822(hw, port, P_REG_UIX66_10G_40G_L,
+					 uix);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write UIX66_10G_40G, err %d\n",
+			  err);
+		return err;
+	}
+
+	/* Program the 25Gb/100Gb conversion ratio */
+	uix = div_u64(tu_per_sec * LINE_UI_25G_100G, 390625000);
+
+	err = ice_write_64b_phy_reg_e822(hw, port, P_REG_UIX66_25G_100G_L,
+					 uix);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write UIX66_25G_100G, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_phy_cfg_parpcs_e822 - Configure TUs per PAR/PCS clock cycle
+ * @hw: pointer to the HW struct
+ * @port: port to configure
+ *
+ * Configure the number of TUs for the PAR and PCS clocks used as part of the
+ * timestamp calibration process. This depends on the link speed, as the PHY
+ * uses different markers depending on the speed.
+ *
+ * 1Gb/10Gb/25Gb:
+ * - Tx/Rx PAR/PCS markers
+ *
+ * 25Gb RS:
+ * - Tx/Rx Reed Solomon gearbox PAR/PCS markers
+ *
+ * 40Gb/50Gb:
+ * - Tx/Rx PAR/PCS markers
+ * - Rx Deskew PAR/PCS markers
+ *
+ * 50G RS and 100GB RS:
+ * - Tx/Rx Reed Solomon gearbox PAR/PCS markers
+ * - Rx Deskew PAR/PCS markers
+ * - Tx PAR/PCS markers
+ *
+ * To calculate the conversion, we use the PHC clock frequency (cycles per
+ * second), the increment value (TUs per cycle), and the related PHY clock
+ * frequency to calculate the TUs per unit of the PHY link clock. The
+ * following table shows how the units convert:
+ *
+ * cycles |  TUs  | second
+ * -------+-------+--------
+ * second | cycle | cycles
+ *
+ * For each conversion register, look up the appropriate frequency from the
+ * e822 PAR/PCS table and calculate the TUs per unit of that clock. Program
+ * this to the appropriate register, preparing hardware to perform timestamp
+ * calibration to calculate the total Tx or Rx offset to adjust the timestamp
+ * in order to calibrate for the internal PHY delays.
+ *
+ * Note that the increment value ranges up to ~34 bits, and the clock
+ * frequency is ~29 bits, so multiplying them together should fit within the
+ * 64 bit arithmetic.
+ */
+static int ice_phy_cfg_parpcs_e822(struct ice_hw *hw, u8 port)
+{
+	u64 cur_freq, clk_incval, tu_per_sec, phy_tus;
+	enum ice_ptp_link_spd link_spd;
+	enum ice_ptp_fec_mode fec_mode;
+	int err;
+
+	err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+	if (err)
+		return err;
+
+	cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+	clk_incval = ice_ptp_read_src_incval(hw);
+
+	/* Calculate TUs per cycle of the PHC clock */
+	tu_per_sec = cur_freq * clk_incval;
+
+	/* For each PHY conversion register, look up the appropriate link
+	 * speed frequency and determine the TUs per that clock's cycle time.
+	 * Split this into a high and low value and then program the
+	 * appropriate register. If that link speed does not use the
+	 * associated register, write zeros to clear it instead.
+	 */
+
+	/* P_REG_PAR_TX_TUS */
+	if (e822_vernier[link_spd].tx_par_clk)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].tx_par_clk);
+	else
+		phy_tus = 0;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PAR_TX_TUS_L,
+					 phy_tus);
+	if (err)
+		return err;
+
+	/* P_REG_PAR_RX_TUS */
+	if (e822_vernier[link_spd].rx_par_clk)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].rx_par_clk);
+	else
+		phy_tus = 0;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PAR_RX_TUS_L,
+					 phy_tus);
+	if (err)
+		return err;
+
+	/* P_REG_PCS_TX_TUS */
+	if (e822_vernier[link_spd].tx_pcs_clk)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].tx_pcs_clk);
+	else
+		phy_tus = 0;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PCS_TX_TUS_L,
+					 phy_tus);
+	if (err)
+		return err;
+
+	/* P_REG_PCS_RX_TUS */
+	if (e822_vernier[link_spd].rx_pcs_clk)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].rx_pcs_clk);
+	else
+		phy_tus = 0;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_PCS_RX_TUS_L,
+					 phy_tus);
+	if (err)
+		return err;
+
+	/* P_REG_DESK_PAR_TX_TUS */
+	if (e822_vernier[link_spd].tx_desk_rsgb_par)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].tx_desk_rsgb_par);
+	else
+		phy_tus = 0;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PAR_TX_TUS_L,
+					 phy_tus);
+	if (err)
+		return err;
+
+	/* P_REG_DESK_PAR_RX_TUS */
+	if (e822_vernier[link_spd].rx_desk_rsgb_par)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].rx_desk_rsgb_par);
+	else
+		phy_tus = 0;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PAR_RX_TUS_L,
+					 phy_tus);
+	if (err)
+		return err;
+
+	/* P_REG_DESK_PCS_TX_TUS */
+	if (e822_vernier[link_spd].tx_desk_rsgb_pcs)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].tx_desk_rsgb_pcs);
+	else
+		phy_tus = 0;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PCS_TX_TUS_L,
+					 phy_tus);
+	if (err)
+		return err;
+
+	/* P_REG_DESK_PCS_RX_TUS */
+	if (e822_vernier[link_spd].rx_desk_rsgb_pcs)
+		phy_tus = div_u64(tu_per_sec,
+				  e822_vernier[link_spd].rx_desk_rsgb_pcs);
+	else
+		phy_tus = 0;
+
+	return ice_write_40b_phy_reg_e822(hw, port, P_REG_DESK_PCS_RX_TUS_L,
+					  phy_tus);
+}
+
+/**
+ * ice_calc_fixed_tx_offset_e822 - Calculated Fixed Tx offset for a port
+ * @hw: pointer to the HW struct
+ * @link_spd: the Link speed to calculate for
+ *
+ * Calculate the fixed offset due to known static latency data.
+ */
+static u64
+ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd)
+{
+	u64 cur_freq, clk_incval, tu_per_sec, fixed_offset;
+
+	cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+	clk_incval = ice_ptp_read_src_incval(hw);
+
+	/* Calculate TUs per second */
+	tu_per_sec = cur_freq * clk_incval;
+
+	/* Calculate number of TUs to add for the fixed Tx latency. Since the
+	 * latency measurement is in 1/100th of a nanosecond, we need to
+	 * multiply by tu_per_sec and then divide by 1e11. This calculation
+	 * overflows 64 bit integer arithmetic, so break it up into two
+	 * divisions by 1e4 first then by 1e7.
+	 */
+	fixed_offset = div_u64(tu_per_sec, 10000);
+	fixed_offset *= e822_vernier[link_spd].tx_fixed_delay;
+	fixed_offset = div_u64(fixed_offset, 10000000);
+
+	return fixed_offset;
+}
+
+/**
+ * ice_phy_cfg_tx_offset_e822 - Configure total Tx timestamp offset
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to configure
+ *
+ * Program the P_REG_TOTAL_TX_OFFSET register with the total number of TUs to
+ * adjust Tx timestamps by. This is calculated by combining some known static
+ * latency along with the Vernier offset computations done by hardware.
+ *
+ * This function will not return successfully until the Tx offset calculations
+ * have been completed, which requires waiting until at least one packet has
+ * been transmitted by the device. It is safe to call this function
+ * periodically until calibration succeeds, as it will only program the offset
+ * once.
+ *
+ * To avoid overflow, when calculating the offset based on the known static
+ * latency values, we use measurements in 1/100th of a nanosecond, and divide
+ * the TUs per second up front. This avoids overflow while allowing
+ * calculation of the adjustment using integer arithmetic.
+ *
+ * Returns zero on success, -EBUSY if the hardware vernier offset
+ * calibration has not completed, or another error code on failure.
+ */
+int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port)
+{
+	enum ice_ptp_link_spd link_spd;
+	enum ice_ptp_fec_mode fec_mode;
+	u64 total_offset, val;
+	int err;
+	u32 reg;
+
+	/* Nothing to do if we've already programmed the offset */
+	err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OR, &reg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OR for port %u, err %d\n",
+			  port, err);
+		return err;
+	}
+
+	if (reg)
+		return 0;
+
+	err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &reg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n",
+			  port, err);
+		return err;
+	}
+
+	if (!(reg & P_REG_TX_OV_STATUS_OV_M))
+		return -EBUSY;
+
+	err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+	if (err)
+		return err;
+
+	total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd);
+
+	/* Read the first Vernier offset from the PHY register and add it to
+	 * the total offset.
+	 */
+	if (link_spd == ICE_PTP_LNK_SPD_1G ||
+	    link_spd == ICE_PTP_LNK_SPD_10G ||
+	    link_spd == ICE_PTP_LNK_SPD_25G ||
+	    link_spd == ICE_PTP_LNK_SPD_25G_RS ||
+	    link_spd == ICE_PTP_LNK_SPD_40G ||
+	    link_spd == ICE_PTP_LNK_SPD_50G) {
+		err = ice_read_64b_phy_reg_e822(hw, port,
+						P_REG_PAR_PCS_TX_OFFSET_L,
+						&val);
+		if (err)
+			return err;
+
+		total_offset += val;
+	}
+
+	/* For Tx, we only need to use the second Vernier offset for
+	 * multi-lane link speeds with RS-FEC. The lanes will always be
+	 * aligned.
+	 */
+	if (link_spd == ICE_PTP_LNK_SPD_50G_RS ||
+	    link_spd == ICE_PTP_LNK_SPD_100G_RS) {
+		err = ice_read_64b_phy_reg_e822(hw, port,
+						P_REG_PAR_TX_TIME_L,
+						&val);
+		if (err)
+			return err;
+
+		total_offset += val;
+	}
+
+	/* Now that the total offset has been calculated, program it to the
+	 * PHY and indicate that the Tx offset is ready. After this,
+	 * timestamps will be enabled.
+	 */
+	err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L,
+					 total_offset);
+	if (err)
+		return err;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1);
+	if (err)
+		return err;
+
+	dev_info(ice_hw_to_dev(hw), "Port=%d Tx vernier offset calibration complete\n",
+		 port);
+
+	return 0;
+}
+
+/**
+ * ice_phy_calc_pmd_adj_e822 - Calculate PMD adjustment for Rx
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to adjust for
+ * @link_spd: the current link speed of the PHY
+ * @fec_mode: the current FEC mode of the PHY
+ * @pmd_adj: on return, the amount to adjust the Rx total offset by
+ *
+ * Calculates the adjustment to Rx timestamps due to PMD alignment in the PHY.
+ * This varies by link speed and FEC mode. The value calculated accounts for
+ * various delays caused when receiving a packet.
+ */
+static int
+ice_phy_calc_pmd_adj_e822(struct ice_hw *hw, u8 port,
+			  enum ice_ptp_link_spd link_spd,
+			  enum ice_ptp_fec_mode fec_mode, u64 *pmd_adj)
+{
+	u64 cur_freq, clk_incval, tu_per_sec, mult, adj;
+	u8 pmd_align;
+	u32 val;
+	int err;
+
+	err = ice_read_phy_reg_e822(hw, port, P_REG_PMD_ALIGNMENT, &val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read PMD alignment, err %d\n",
+			  err);
+		return err;
+	}
+
+	pmd_align = (u8)val;
+
+	cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+	clk_incval = ice_ptp_read_src_incval(hw);
+
+	/* Calculate TUs per second */
+	tu_per_sec = cur_freq * clk_incval;
+
+	/* The PMD alignment adjustment measurement depends on the link speed,
+	 * and whether FEC is enabled. For each link speed, the alignment
+	 * adjustment is calculated by dividing a value by the length of
+	 * a Time Unit in nanoseconds.
+	 *
+	 * 1G: align == 4 ? 10 * 0.8 : (align + 6 % 10) * 0.8
+	 * 10G: align == 65 ? 0 : (align * 0.1 * 32/33)
+	 * 10G w/FEC: align * 0.1 * 32/33
+	 * 25G: align == 65 ? 0 : (align * 0.4 * 32/33)
+	 * 25G w/FEC: align * 0.4 * 32/33
+	 * 40G: align == 65 ? 0 : (align * 0.1 * 32/33)
+	 * 40G w/FEC: align * 0.1 * 32/33
+	 * 50G: align == 65 ? 0 : (align * 0.4 * 32/33)
+	 * 50G w/FEC: align * 0.8 * 32/33
+	 *
+	 * For RS-FEC, if align is < 17 then we must also add 1.6 * 32/33.
+	 *
+	 * To allow for calculating this value using integer arithmetic, we
+	 * instead start with the number of TUs per second, (inverse of the
+	 * length of a Time Unit in nanoseconds), multiply by a value based
+	 * on the PMD alignment register, and then divide by the right value
+	 * calculated based on the table above. To avoid integer overflow this
+	 * division is broken up into a step of dividing by 125 first.
+	 */
+	if (link_spd == ICE_PTP_LNK_SPD_1G) {
+		if (pmd_align == 4)
+			mult = 10;
+		else
+			mult = (pmd_align + 6) % 10;
+	} else if (link_spd == ICE_PTP_LNK_SPD_10G ||
+		   link_spd == ICE_PTP_LNK_SPD_25G ||
+		   link_spd == ICE_PTP_LNK_SPD_40G ||
+		   link_spd == ICE_PTP_LNK_SPD_50G) {
+		/* If Clause 74 FEC, always calculate PMD adjust */
+		if (pmd_align != 65 || fec_mode == ICE_PTP_FEC_MODE_CLAUSE74)
+			mult = pmd_align;
+		else
+			mult = 0;
+	} else if (link_spd == ICE_PTP_LNK_SPD_25G_RS ||
+		   link_spd == ICE_PTP_LNK_SPD_50G_RS ||
+		   link_spd == ICE_PTP_LNK_SPD_100G_RS) {
+		if (pmd_align < 17)
+			mult = pmd_align + 40;
+		else
+			mult = pmd_align;
+	} else {
+		ice_debug(hw, ICE_DBG_PTP, "Unknown link speed %d, skipping PMD adjustment\n",
+			  link_spd);
+		mult = 0;
+	}
+
+	/* In some cases, there's no need to adjust for the PMD alignment */
+	if (!mult) {
+		*pmd_adj = 0;
+		return 0;
+	}
+
+	/* Calculate the adjustment by multiplying TUs per second by the
+	 * appropriate multiplier and divisor. To avoid overflow, we first
+	 * divide by 125, and then handle remaining divisor based on the link
+	 * speed pmd_adj_divisor value.
+	 */
+	adj = div_u64(tu_per_sec, 125);
+	adj *= mult;
+	adj = div_u64(adj, e822_vernier[link_spd].pmd_adj_divisor);
+
+	/* Finally, for 25G-RS and 50G-RS, a further adjustment for the Rx
+	 * cycle count is necessary.
+	 */
+	if (link_spd == ICE_PTP_LNK_SPD_25G_RS) {
+		u64 cycle_adj;
+		u8 rx_cycle;
+
+		err = ice_read_phy_reg_e822(hw, port, P_REG_RX_40_TO_160_CNT,
+					    &val);
+		if (err) {
+			ice_debug(hw, ICE_DBG_PTP, "Failed to read 25G-RS Rx cycle count, err %d\n",
+				  err);
+			return err;
+		}
+
+		rx_cycle = val & P_REG_RX_40_TO_160_CNT_RXCYC_M;
+		if (rx_cycle) {
+			mult = (4 - rx_cycle) * 40;
+
+			cycle_adj = div_u64(tu_per_sec, 125);
+			cycle_adj *= mult;
+			cycle_adj = div_u64(cycle_adj, e822_vernier[link_spd].pmd_adj_divisor);
+
+			adj += cycle_adj;
+		}
+	} else if (link_spd == ICE_PTP_LNK_SPD_50G_RS) {
+		u64 cycle_adj;
+		u8 rx_cycle;
+
+		err = ice_read_phy_reg_e822(hw, port, P_REG_RX_80_TO_160_CNT,
+					    &val);
+		if (err) {
+			ice_debug(hw, ICE_DBG_PTP, "Failed to read 50G-RS Rx cycle count, err %d\n",
+				  err);
+			return err;
+		}
+
+		rx_cycle = val & P_REG_RX_80_TO_160_CNT_RXCYC_M;
+		if (rx_cycle) {
+			mult = rx_cycle * 40;
+
+			cycle_adj = div_u64(tu_per_sec, 125);
+			cycle_adj *= mult;
+			cycle_adj = div_u64(cycle_adj, e822_vernier[link_spd].pmd_adj_divisor);
+
+			adj += cycle_adj;
+		}
+	}
+
+	/* Return the calculated adjustment */
+	*pmd_adj = adj;
+
+	return 0;
+}
+
+/**
+ * ice_calc_fixed_rx_offset_e822 - Calculated the fixed Rx offset for a port
+ * @hw: pointer to HW struct
+ * @link_spd: The Link speed to calculate for
+ *
+ * Determine the fixed Rx latency for a given link speed.
+ */
+static u64
+ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd)
+{
+	u64 cur_freq, clk_incval, tu_per_sec, fixed_offset;
+
+	cur_freq = ice_e822_pll_freq(ice_e822_time_ref(hw));
+	clk_incval = ice_ptp_read_src_incval(hw);
+
+	/* Calculate TUs per second */
+	tu_per_sec = cur_freq * clk_incval;
+
+	/* Calculate number of TUs to add for the fixed Rx latency. Since the
+	 * latency measurement is in 1/100th of a nanosecond, we need to
+	 * multiply by tu_per_sec and then divide by 1e11. This calculation
+	 * overflows 64 bit integer arithmetic, so break it up into two
+	 * divisions by 1e4 first then by 1e7.
+	 */
+	fixed_offset = div_u64(tu_per_sec, 10000);
+	fixed_offset *= e822_vernier[link_spd].rx_fixed_delay;
+	fixed_offset = div_u64(fixed_offset, 10000000);
+
+	return fixed_offset;
+}
+
+/**
+ * ice_phy_cfg_rx_offset_e822 - Configure total Rx timestamp offset
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to configure
+ *
+ * Program the P_REG_TOTAL_RX_OFFSET register with the number of Time Units to
+ * adjust Rx timestamps by. This combines calculations from the Vernier offset
+ * measurements taken in hardware with some data about known fixed delay as
+ * well as adjusting for multi-lane alignment delay.
+ *
+ * This function will not return successfully until the Rx offset calculations
+ * have been completed, which requires waiting until at least one packet has
+ * been received by the device. It is safe to call this function periodically
+ * until calibration succeeds, as it will only program the offset once.
+ *
+ * This function must be called only after the offset registers are valid,
+ * i.e. after the Vernier calibration wait has passed, to ensure that the PHY
+ * has measured the offset.
+ *
+ * To avoid overflow, when calculating the offset based on the known static
+ * latency values, we use measurements in 1/100th of a nanosecond, and divide
+ * the TUs per second up front. This avoids overflow while allowing
+ * calculation of the adjustment using integer arithmetic.
+ *
+ * Returns zero on success, -EBUSY if the hardware vernier offset
+ * calibration has not completed, or another error code on failure.
+ */
+int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port)
+{
+	enum ice_ptp_link_spd link_spd;
+	enum ice_ptp_fec_mode fec_mode;
+	u64 total_offset, pmd, val;
+	int err;
+	u32 reg;
+
+	/* Nothing to do if we've already programmed the offset */
+	err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OR, &reg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OR for port %u, err %d\n",
+			  port, err);
+		return err;
+	}
+
+	if (reg)
+		return 0;
+
+	err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &reg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n",
+			  port, err);
+		return err;
+	}
+
+	if (!(reg & P_REG_RX_OV_STATUS_OV_M))
+		return -EBUSY;
+
+	err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode);
+	if (err)
+		return err;
+
+	total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd);
+
+	/* Read the first Vernier offset from the PHY register and add it to
+	 * the total offset.
+	 */
+	err = ice_read_64b_phy_reg_e822(hw, port,
+					P_REG_PAR_PCS_RX_OFFSET_L,
+					&val);
+	if (err)
+		return err;
+
+	total_offset += val;
+
+	/* For Rx, all multi-lane link speeds include a second Vernier
+	 * calibration, because the lanes might not be aligned.
+	 */
+	if (link_spd == ICE_PTP_LNK_SPD_40G ||
+	    link_spd == ICE_PTP_LNK_SPD_50G ||
+	    link_spd == ICE_PTP_LNK_SPD_50G_RS ||
+	    link_spd == ICE_PTP_LNK_SPD_100G_RS) {
+		err = ice_read_64b_phy_reg_e822(hw, port,
+						P_REG_PAR_RX_TIME_L,
+						&val);
+		if (err)
+			return err;
+
+		total_offset += val;
+	}
+
+	/* In addition, Rx must account for the PMD alignment */
+	err = ice_phy_calc_pmd_adj_e822(hw, port, link_spd, fec_mode, &pmd);
+	if (err)
+		return err;
+
+	/* For RS-FEC, this adjustment adds delay, but for other modes, it
+	 * subtracts delay.
+	 */
+	if (fec_mode == ICE_PTP_FEC_MODE_RS_FEC)
+		total_offset += pmd;
+	else
+		total_offset -= pmd;
+
+	/* Now that the total offset has been calculated, program it to the
+	 * PHY and indicate that the Rx offset is ready. After this,
+	 * timestamps will be enabled.
+	 */
+	err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L,
+					 total_offset);
+	if (err)
+		return err;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1);
+	if (err)
+		return err;
+
+	dev_info(ice_hw_to_dev(hw), "Port=%d Rx vernier offset calibration complete\n",
+		 port);
+
+	return 0;
+}
+
+/**
+ * ice_read_phy_and_phc_time_e822 - Simultaneously capture PHC and PHY time
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to read
+ * @phy_time: on return, the 64bit PHY timer value
+ * @phc_time: on return, the lower 64bits of PHC time
+ *
+ * Issue a READ_TIME timer command to simultaneously capture the PHY and PHC
+ * timer values.
+ */
+static int
+ice_read_phy_and_phc_time_e822(struct ice_hw *hw, u8 port, u64 *phy_time,
+			       u64 *phc_time)
+{
+	u64 tx_time, rx_time;
+	u32 zo, lo;
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+
+	/* Prepare the PHC timer for a READ_TIME capture command */
+	ice_ptp_src_cmd(hw, READ_TIME);
+
+	/* Prepare the PHY timer for a READ_TIME capture command */
+	err = ice_ptp_one_port_cmd(hw, port, READ_TIME);
+	if (err)
+		return err;
+
+	/* Issue the sync to start the READ_TIME capture */
+	ice_ptp_exec_tmr_cmd(hw);
+
+	/* Read the captured PHC time from the shadow time registers */
+	zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx));
+	lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx));
+	*phc_time = (u64)lo << 32 | zo;
+
+	/* Read the captured PHY time from the PHY shadow registers */
+	err = ice_ptp_read_port_capture(hw, port, &tx_time, &rx_time);
+	if (err)
+		return err;
+
+	/* If the PHY Tx and Rx timers don't match, log a warning message.
+	 * Note that this should not happen in normal circumstances since the
+	 * driver always programs them together.
+	 */
+	if (tx_time != rx_time)
+		dev_warn(ice_hw_to_dev(hw),
+			 "PHY port %u Tx and Rx timers do not match, tx_time 0x%016llX, rx_time 0x%016llX\n",
+			 port, (unsigned long long)tx_time,
+			 (unsigned long long)rx_time);
+
+	*phy_time = tx_time;
+
+	return 0;
+}
+
+/**
+ * ice_sync_phy_timer_e822 - Synchronize the PHY timer with PHC timer
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to synchronize
+ *
+ * Perform an adjustment to ensure that the PHY and PHC timers are in sync.
+ * This is done by issuing a READ_TIME command which triggers a simultaneous
+ * read of the PHY timer and PHC timer. Then we use the difference to
+ * calculate an appropriate 2s complement addition to add to the PHY timer in
+ * order to ensure it reads the same value as the primary PHC timer.
+ */
+static int ice_sync_phy_timer_e822(struct ice_hw *hw, u8 port)
+{
+	u64 phc_time, phy_time, difference;
+	int err;
+
+	if (!ice_ptp_lock(hw)) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to acquire PTP semaphore\n");
+		return -EBUSY;
+	}
+
+	err = ice_read_phy_and_phc_time_e822(hw, port, &phy_time, &phc_time);
+	if (err)
+		goto err_unlock;
+
+	/* Calculate the amount required to add to the port time in order for
+	 * it to match the PHC time.
+	 *
+	 * Note that the port adjustment is done using 2s complement
+	 * arithmetic. This is convenient since it means that we can simply
+	 * calculate the difference between the PHC time and the port time,
+	 * and it will be interpreted correctly.
+	 */
+	difference = phc_time - phy_time;
+
+	err = ice_ptp_prep_port_adj_e822(hw, port, (s64)difference);
+	if (err)
+		goto err_unlock;
+
+	err = ice_ptp_one_port_cmd(hw, port, ADJ_TIME);
+	if (err)
+		goto err_unlock;
+
+	/* Do not perform any action on the main timer */
+	ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
+	/* Issue the sync to activate the time adjustment */
+	ice_ptp_exec_tmr_cmd(hw);
+
+	/* Re-capture the timer values to flush the command registers and
+	 * verify that the time was properly adjusted.
+	 */
+	err = ice_read_phy_and_phc_time_e822(hw, port, &phy_time, &phc_time);
+	if (err)
+		goto err_unlock;
+
+	dev_info(ice_hw_to_dev(hw),
+		 "Port %u PHY time synced to PHC: 0x%016llX, 0x%016llX\n",
+		 port, (unsigned long long)phy_time,
+		 (unsigned long long)phc_time);
+
+	ice_ptp_unlock(hw);
+
+	return 0;
+
+err_unlock:
+	ice_ptp_unlock(hw);
+	return err;
+}
+
+/**
+ * ice_stop_phy_timer_e822 - Stop the PHY clock timer
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to stop
+ * @soft_reset: if true, hold the SOFT_RESET bit of P_REG_PS
+ *
+ * Stop the clock of a PHY port. This must be done as part of the flow to
+ * re-calibrate Tx and Rx timestamping offsets whenever the clock time is
+ * initialized or when link speed changes.
+ */
+int
+ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset)
+{
+	int err;
+	u32 val;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 0);
+	if (err)
+		return err;
+
+	err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 0);
+	if (err)
+		return err;
+
+	err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val);
+	if (err)
+		return err;
+
+	val &= ~P_REG_PS_START_M;
+	err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+	if (err)
+		return err;
+
+	val &= ~P_REG_PS_ENA_CLK_M;
+	err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+	if (err)
+		return err;
+
+	if (soft_reset) {
+		val |= P_REG_PS_SFT_RESET_M;
+		err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+		if (err)
+			return err;
+	}
+
+	ice_debug(hw, ICE_DBG_PTP, "Disabled clock on PHY port %u\n", port);
+
+	return 0;
+}
+
+/**
+ * ice_start_phy_timer_e822 - Start the PHY clock timer
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to start
+ *
+ * Start the clock of a PHY port. This must be done as part of the flow to
+ * re-calibrate Tx and Rx timestamping offsets whenever the clock time is
+ * initialized or when link speed changes.
+ *
+ * Hardware will take Vernier measurements on Tx or Rx of packets.
+ */
+int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port)
+{
+	u32 lo, hi, val;
+	u64 incval;
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = ice_get_ptp_src_clock_index(hw);
+
+	err = ice_stop_phy_timer_e822(hw, port, false);
+	if (err)
+		return err;
+
+	ice_phy_cfg_lane_e822(hw, port);
+
+	err = ice_phy_cfg_uix_e822(hw, port);
+	if (err)
+		return err;
+
+	err = ice_phy_cfg_parpcs_e822(hw, port);
+	if (err)
+		return err;
+
+	lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx));
+	hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx));
+	incval = (u64)hi << 32 | lo;
+
+	err = ice_write_40b_phy_reg_e822(hw, port, P_REG_TIMETUS_L, incval);
+	if (err)
+		return err;
+
+	err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+	if (err)
+		return err;
+
+	/* Do not perform any action on the main timer */
+	ice_ptp_src_cmd(hw, ICE_PTP_NOP);
+
+	ice_ptp_exec_tmr_cmd(hw);
+
+	err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val);
+	if (err)
+		return err;
+
+	val |= P_REG_PS_SFT_RESET_M;
+	err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+	if (err)
+		return err;
+
+	val |= P_REG_PS_START_M;
+	err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+	if (err)
+		return err;
+
+	val &= ~P_REG_PS_SFT_RESET_M;
+	err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+	if (err)
+		return err;
+
+	err = ice_ptp_one_port_cmd(hw, port, INIT_INCVAL);
+	if (err)
+		return err;
+
+	ice_ptp_exec_tmr_cmd(hw);
+
+	val |= P_REG_PS_ENA_CLK_M;
+	err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+	if (err)
+		return err;
+
+	val |= P_REG_PS_LOAD_OFFSET_M;
+	err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val);
+	if (err)
+		return err;
+
+	ice_ptp_exec_tmr_cmd(hw);
+
+	err = ice_sync_phy_timer_e822(hw, port);
+	if (err)
+		return err;
+
+	ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port);
+
+	return 0;
+}
+
+/**
+ * ice_get_phy_tx_tstamp_ready_e822 - Read Tx memory status register
+ * @hw: pointer to the HW struct
+ * @quad: the timestamp quad to read from
+ * @tstamp_ready: contents of the Tx memory status register
+ *
+ * Read the Q_REG_TX_MEMORY_STATUS register indicating which timestamps in
+ * the PHY are ready. A set bit means the corresponding timestamp is valid and
+ * ready to be captured from the PHY timestamp block.
+ */
+static int
+ice_get_phy_tx_tstamp_ready_e822(struct ice_hw *hw, u8 quad, u64 *tstamp_ready)
+{
+	u32 hi, lo;
+	int err;
+
+	err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_U, &hi);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_U for quad %u, err %d\n",
+			  quad, err);
+		return err;
+	}
+
+	err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_L, &lo);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_L for quad %u, err %d\n",
+			  quad, err);
+		return err;
+	}
+
+	*tstamp_ready = (u64)hi << 32 | (u64)lo;
+
+	return 0;
+}
+
+/* E810 functions
+ *
+ * The following functions operate on the E810 series devices which use
+ * a separate external PHY.
+ */
+
+/**
+ * ice_read_phy_reg_e810 - Read register from external PHY on E810
+ * @hw: pointer to the HW struct
+ * @addr: the address to read from
+ * @val: On return, the value read from the PHY
+ *
+ * Read a register from the external PHY on the E810 device.
+ */
+static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int err;
+
+	msg.msg_addr_low = lower_16_bits(addr);
+	msg.msg_addr_high = upper_16_bits(addr);
+	msg.opcode = ice_sbq_msg_rd;
+	msg.dest_dev = rmn_0;
+
+	err = ice_sbq_rw_reg(hw, &msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+			  err);
+		return err;
+	}
+
+	*val = msg.data;
+
+	return 0;
+}
+
+/**
+ * ice_write_phy_reg_e810 - Write register on external PHY on E810
+ * @hw: pointer to the HW struct
+ * @addr: the address to writem to
+ * @val: the value to write to the PHY
+ *
+ * Write a value to a register of the external PHY on the E810 device.
+ */
+static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val)
+{
+	struct ice_sbq_msg_input msg = {0};
+	int err;
+
+	msg.msg_addr_low = lower_16_bits(addr);
+	msg.msg_addr_high = upper_16_bits(addr);
+	msg.opcode = ice_sbq_msg_wr;
+	msg.dest_dev = rmn_0;
+	msg.data = val;
+
+	err = ice_sbq_rw_reg(hw, &msg);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to send message to PHY, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_read_phy_tstamp_ll_e810 - Read a PHY timestamp registers through the FW
+ * @hw: pointer to the HW struct
+ * @idx: the timestamp index to read
+ * @hi: 8 bit timestamp high value
+ * @lo: 32 bit timestamp low value
+ *
+ * Read a 8bit timestamp high value and 32 bit timestamp low value out of the
+ * timestamp block of the external PHY on the E810 device using the low latency
+ * timestamp read.
+ */
+static int
+ice_read_phy_tstamp_ll_e810(struct ice_hw *hw, u8 idx, u8 *hi, u32 *lo)
+{
+	u32 val;
+	u8 i;
+
+	/* Write TS index to read to the PF register so the FW can read it */
+	val = FIELD_PREP(TS_LL_READ_TS_IDX, idx) | TS_LL_READ_TS;
+	wr32(hw, PF_SB_ATQBAL, val);
+
+	/* Read the register repeatedly until the FW provides us the TS */
+	for (i = TS_LL_READ_RETRIES; i > 0; i--) {
+		val = rd32(hw, PF_SB_ATQBAL);
+
+		/* When the bit is cleared, the TS is ready in the register */
+		if (!(FIELD_GET(TS_LL_READ_TS, val))) {
+			/* High 8 bit value of the TS is on the bits 16:23 */
+			*hi = FIELD_GET(TS_LL_READ_TS_HIGH, val);
+
+			/* Read the low 32 bit value and set the TS valid bit */
+			*lo = rd32(hw, PF_SB_ATQBAH) | TS_VALID;
+			return 0;
+		}
+
+		udelay(10);
+	}
+
+	/* FW failed to provide the TS in time */
+	ice_debug(hw, ICE_DBG_PTP, "Failed to read PTP timestamp using low latency read\n");
+	return -EINVAL;
+}
+
+/**
+ * ice_read_phy_tstamp_sbq_e810 - Read a PHY timestamp registers through the sbq
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to read
+ * @hi: 8 bit timestamp high value
+ * @lo: 32 bit timestamp low value
+ *
+ * Read a 8bit timestamp high value and 32 bit timestamp low value out of the
+ * timestamp block of the external PHY on the E810 device using sideband queue.
+ */
+static int
+ice_read_phy_tstamp_sbq_e810(struct ice_hw *hw, u8 lport, u8 idx, u8 *hi,
+			     u32 *lo)
+{
+	u32 hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
+	u32 lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
+	u32 lo_val, hi_val;
+	int err;
+
+	err = ice_read_phy_reg_e810(hw, lo_addr, &lo_val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read low PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = ice_read_phy_reg_e810(hw, hi_addr, &hi_val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read high PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	*lo = lo_val;
+	*hi = (u8)hi_val;
+
+	return 0;
+}
+
+/**
+ * ice_read_phy_tstamp_e810 - Read a PHY timestamp out of the external PHY
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the timestamp block of the external PHY
+ * on the E810 device.
+ */
+static int
+ice_read_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx, u64 *tstamp)
+{
+	u32 lo = 0;
+	u8 hi = 0;
+	int err;
+
+	if (hw->dev_caps.ts_dev_info.ts_ll_read)
+		err = ice_read_phy_tstamp_ll_e810(hw, idx, &hi, &lo);
+	else
+		err = ice_read_phy_tstamp_sbq_e810(hw, lport, idx, &hi, &lo);
+
+	if (err)
+		return err;
+
+	/* For E810 devices, the timestamp is reported with the lower 32 bits
+	 * in the low register, and the upper 8 bits in the high register.
+	 */
+	*tstamp = ((u64)hi) << TS_HIGH_S | ((u64)lo & TS_LOW_M);
+
+	return 0;
+}
+
+/**
+ * ice_clear_phy_tstamp_e810 - Clear a timestamp from the external PHY
+ * @hw: pointer to the HW struct
+ * @lport: the lport to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the timestamp block of the
+ * external PHY on the E810 device.
+ */
+static int ice_clear_phy_tstamp_e810(struct ice_hw *hw, u8 lport, u8 idx)
+{
+	u32 lo_addr, hi_addr;
+	int err;
+
+	lo_addr = TS_EXT(LOW_TX_MEMORY_BANK_START, lport, idx);
+	hi_addr = TS_EXT(HIGH_TX_MEMORY_BANK_START, lport, idx);
+
+	err = ice_write_phy_reg_e810(hw, lo_addr, 0);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear low PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = ice_write_phy_reg_e810(hw, hi_addr, 0);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to clear high PTP timestamp register, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_init_phy_e810 - Enable PTP function on the external PHY
+ * @hw: pointer to HW struct
+ *
+ * Enable the timesync PTP functionality for the external PHY connected to
+ * this function.
+ */
+int ice_ptp_init_phy_e810(struct ice_hw *hw)
+{
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_ENA(tmr_idx),
+				     GLTSYN_ENA_TSYN_ENA_M);
+	if (err)
+		ice_debug(hw, ICE_DBG_PTP, "PTP failed in ena_phy_time_syn %d\n",
+			  err);
+
+	return err;
+}
+
+/**
+ * ice_ptp_init_phc_e810 - Perform E810 specific PHC initialization
+ * @hw: pointer to HW struct
+ *
+ * Perform E810-specific PTP hardware clock initialization steps.
+ */
+static int ice_ptp_init_phc_e810(struct ice_hw *hw)
+{
+	/* Ensure synchronization delay is zero */
+	wr32(hw, GLTSYN_SYNC_DLAY, 0);
+
+	/* Initialize the PHY */
+	return ice_ptp_init_phy_e810(hw);
+}
+
+/**
+ * ice_ptp_prep_phy_time_e810 - Prepare PHY port with initial time
+ * @hw: Board private structure
+ * @time: Time to initialize the PHY port clock to
+ *
+ * Program the PHY port ETH_GLTSYN_SHTIME registers in preparation setting the
+ * initial clock time. The time will not actually be programmed until the
+ * driver issues an INIT_TIME command.
+ *
+ * The time value is the upper 32 bits of the PHY timer, usually in units of
+ * nominal nanoseconds.
+ */
+static int ice_ptp_prep_phy_time_e810(struct ice_hw *hw, u32 time)
+{
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_0(tmr_idx), 0);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_0, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHTIME_L(tmr_idx), time);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write SHTIME_L, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_adj_e810 - Prep PHY port for a time adjustment
+ * @hw: pointer to HW struct
+ * @adj: adjustment value to program
+ *
+ * Prepare the PHY port for an atomic adjustment by programming the PHY
+ * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual adjustment
+ * is completed by issuing an ADJ_TIME sync command.
+ *
+ * The adjustment value only contains the portion used for the upper 32bits of
+ * the PHY timer, usually in units of nominal nanoseconds. Negative
+ * adjustments are supported using 2s complement arithmetic.
+ */
+static int ice_ptp_prep_phy_adj_e810(struct ice_hw *hw, s32 adj)
+{
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Adjustments are represented as signed 2's complement values in
+	 * nanoseconds. Sub-nanosecond adjustment is not supported.
+	 */
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), 0);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_L, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), adj);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write adj to PHY SHADJ_H, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_prep_phy_incval_e810 - Prep PHY port increment value change
+ * @hw: pointer to HW struct
+ * @incval: The new 40bit increment value to prepare
+ *
+ * Prepare the PHY port for a new increment value by programming the PHY
+ * ETH_GLTSYN_SHADJ_L and ETH_GLTSYN_SHADJ_H registers. The actual change is
+ * completed by issuing an INIT_INCVAL command.
+ */
+static int ice_ptp_prep_phy_incval_e810(struct ice_hw *hw, u64 incval)
+{
+	u32 high, low;
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+	low = lower_32_bits(incval);
+	high = upper_32_bits(incval);
+
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_L(tmr_idx), low);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write incval to PHY SHADJ_L, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_SHADJ_H(tmr_idx), high);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write incval PHY SHADJ_H, err %d\n",
+			  err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_ptp_port_cmd_e810 - Prepare all external PHYs for a timer command
+ * @hw: pointer to HW struct
+ * @cmd: Command to be sent to the port
+ *
+ * Prepare the external PHYs connected to this device for a timer sync
+ * command.
+ */
+static int ice_ptp_port_cmd_e810(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+	u32 cmd_val, val;
+	int err;
+
+	switch (cmd) {
+	case INIT_TIME:
+		cmd_val = GLTSYN_CMD_INIT_TIME;
+		break;
+	case INIT_INCVAL:
+		cmd_val = GLTSYN_CMD_INIT_INCVAL;
+		break;
+	case ADJ_TIME:
+		cmd_val = GLTSYN_CMD_ADJ_TIME;
+		break;
+	case READ_TIME:
+		cmd_val = GLTSYN_CMD_READ_TIME;
+		break;
+	case ADJ_TIME_AT_TIME:
+		cmd_val = GLTSYN_CMD_ADJ_INIT_TIME;
+		break;
+	case ICE_PTP_NOP:
+		return 0;
+	}
+
+	/* Read, modify, write */
+	err = ice_read_phy_reg_e810(hw, ETH_GLTSYN_CMD, &val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to read GLTSYN_CMD, err %d\n", err);
+		return err;
+	}
+
+	/* Modify necessary bits only and perform write */
+	val &= ~TS_CMD_MASK_E810;
+	val |= cmd_val;
+
+	err = ice_write_phy_reg_e810(hw, ETH_GLTSYN_CMD, val);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to write back GLTSYN_CMD, err %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_get_phy_tx_tstamp_ready_e810 - Read Tx memory status register
+ * @hw: pointer to the HW struct
+ * @port: the PHY port to read
+ * @tstamp_ready: contents of the Tx memory status register
+ *
+ * E810 devices do not use a Tx memory status register. Instead simply
+ * indicate that all timestamps are currently ready.
+ */
+static int
+ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready)
+{
+	*tstamp_ready = 0xFFFFFFFFFFFFFFFF;
+	return 0;
+}
+
+/* E810T SMA functions
+ *
+ * The following functions operate specifically on E810T hardware and are used
+ * to access the extended GPIOs available.
+ */
+
+/**
+ * ice_get_pca9575_handle
+ * @hw: pointer to the hw struct
+ * @pca9575_handle: GPIO controller's handle
+ *
+ * Find and return the GPIO controller's handle in the netlist.
+ * When found - the value will be cached in the hw structure and following calls
+ * will return cached value
+ */
+static int
+ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle)
+{
+	struct ice_aqc_get_link_topo *cmd;
+	struct ice_aq_desc desc;
+	int status;
+	u8 idx;
+
+	/* If handle was read previously return cached value */
+	if (hw->io_expander_handle) {
+		*pca9575_handle = hw->io_expander_handle;
+		return 0;
+	}
+
+	/* If handle was not detected read it from the netlist */
+	cmd = &desc.params.get_link_topo;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo);
+
+	/* Set node type to GPIO controller */
+	cmd->addr.topo_params.node_type_ctx =
+		(ICE_AQC_LINK_TOPO_NODE_TYPE_M &
+		 ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL);
+
+#define SW_PCA9575_SFP_TOPO_IDX		2
+#define SW_PCA9575_QSFP_TOPO_IDX	1
+
+	/* Check if the SW IO expander controlling SMA exists in the netlist. */
+	if (hw->device_id == ICE_DEV_ID_E810C_SFP)
+		idx = SW_PCA9575_SFP_TOPO_IDX;
+	else if (hw->device_id == ICE_DEV_ID_E810C_QSFP)
+		idx = SW_PCA9575_QSFP_TOPO_IDX;
+	else
+		return -EOPNOTSUPP;
+
+	cmd->addr.topo_params.index = idx;
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+	if (status)
+		return -EOPNOTSUPP;
+
+	/* Verify if we found the right IO expander type */
+	if (desc.params.get_link_topo.node_part_num !=
+		ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575)
+		return -EOPNOTSUPP;
+
+	/* If present save the handle and return it */
+	hw->io_expander_handle =
+		le16_to_cpu(desc.params.get_link_topo.addr.handle);
+	*pca9575_handle = hw->io_expander_handle;
+
+	return 0;
+}
+
+/**
+ * ice_read_sma_ctrl_e810t
+ * @hw: pointer to the hw struct
+ * @data: pointer to data to be read from the GPIO controller
+ *
+ * Read the SMA controller state. It is connected to pins 3-7 of Port 1 of the
+ * PCA9575 expander, so only bits 3-7 in data are valid.
+ */
+int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data)
+{
+	int status;
+	u16 handle;
+	u8 i;
+
+	status = ice_get_pca9575_handle(hw, &handle);
+	if (status)
+		return status;
+
+	*data = 0;
+
+	for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) {
+		bool pin;
+
+		status = ice_aq_get_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET,
+					 &pin, NULL);
+		if (status)
+			break;
+		*data |= (u8)(!pin) << i;
+	}
+
+	return status;
+}
+
+/**
+ * ice_write_sma_ctrl_e810t
+ * @hw: pointer to the hw struct
+ * @data: data to be written to the GPIO controller
+ *
+ * Write the data to the SMA controller. It is connected to pins 3-7 of Port 1
+ * of the PCA9575 expander, so only bits 3-7 in data are valid.
+ */
+int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data)
+{
+	int status;
+	u16 handle;
+	u8 i;
+
+	status = ice_get_pca9575_handle(hw, &handle);
+	if (status)
+		return status;
+
+	for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) {
+		bool pin;
+
+		pin = !(data & (1 << i));
+		status = ice_aq_set_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET,
+					 pin, NULL);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * ice_read_pca9575_reg_e810t
+ * @hw: pointer to the hw struct
+ * @offset: GPIO controller register offset
+ * @data: pointer to data to be read from the GPIO controller
+ *
+ * Read the register from the GPIO controller
+ */
+int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data)
+{
+	struct ice_aqc_link_topo_addr link_topo;
+	__le16 addr;
+	u16 handle;
+	int err;
+
+	memset(&link_topo, 0, sizeof(link_topo));
+
+	err = ice_get_pca9575_handle(hw, &handle);
+	if (err)
+		return err;
+
+	link_topo.handle = cpu_to_le16(handle);
+	link_topo.topo_params.node_type_ctx =
+		FIELD_PREP(ICE_AQC_LINK_TOPO_NODE_CTX_M,
+			   ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED);
+
+	addr = cpu_to_le16((u16)offset);
+
+	return ice_aq_read_i2c(hw, link_topo, 0, addr, 1, data, NULL);
+}
+
+/* Device agnostic functions
+ *
+ * The following functions implement shared behavior common to both E822 and
+ * E810 devices, possibly calling a device specific implementation where
+ * necessary.
+ */
+
+/**
+ * ice_ptp_lock - Acquire PTP global semaphore register lock
+ * @hw: pointer to the HW struct
+ *
+ * Acquire the global PTP hardware semaphore lock. Returns true if the lock
+ * was acquired, false otherwise.
+ *
+ * The PFTSYN_SEM register sets the busy bit on read, returning the previous
+ * value. If software sees the busy bit cleared, this means that this function
+ * acquired the lock (and the busy bit is now set). If software sees the busy
+ * bit set, it means that another function acquired the lock.
+ *
+ * Software must clear the busy bit with a write to release the lock for other
+ * functions when done.
+ */
+bool ice_ptp_lock(struct ice_hw *hw)
+{
+	u32 hw_lock;
+	int i;
+
+#define MAX_TRIES 15
+
+	for (i = 0; i < MAX_TRIES; i++) {
+		hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id));
+		hw_lock = hw_lock & PFTSYN_SEM_BUSY_M;
+		if (hw_lock) {
+			/* Somebody is holding the lock */
+			usleep_range(5000, 6000);
+			continue;
+		}
+
+		break;
+	}
+
+	return !hw_lock;
+}
+
+/**
+ * ice_ptp_unlock - Release PTP global semaphore register lock
+ * @hw: pointer to the HW struct
+ *
+ * Release the global PTP hardware semaphore lock. This is done by writing to
+ * the PFTSYN_SEM register.
+ */
+void ice_ptp_unlock(struct ice_hw *hw)
+{
+	wr32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id), 0);
+}
+
+/**
+ * ice_ptp_tmr_cmd - Prepare and trigger a timer sync command
+ * @hw: pointer to HW struct
+ * @cmd: the command to issue
+ *
+ * Prepare the source timer and PHY timers and then trigger the requested
+ * command. This causes the shadow registers previously written in preparation
+ * for the command to be synchronously applied to both the source and PHY
+ * timers.
+ */
+static int ice_ptp_tmr_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
+{
+	int err;
+
+	/* First, prepare the source timer */
+	ice_ptp_src_cmd(hw, cmd);
+
+	/* Next, prepare the ports */
+	if (ice_is_e810(hw))
+		err = ice_ptp_port_cmd_e810(hw, cmd);
+	else
+		err = ice_ptp_port_cmd_e822(hw, cmd);
+	if (err) {
+		ice_debug(hw, ICE_DBG_PTP, "Failed to prepare PHY ports for timer command %u, err %d\n",
+			  cmd, err);
+		return err;
+	}
+
+	/* Write the sync command register to drive both source and PHY timer
+	 * commands synchronously
+	 */
+	ice_ptp_exec_tmr_cmd(hw);
+
+	return 0;
+}
+
+/**
+ * ice_ptp_init_time - Initialize device time to provided value
+ * @hw: pointer to HW struct
+ * @time: 64bits of time (GLTSYN_TIME_L and GLTSYN_TIME_H)
+ *
+ * Initialize the device to the specified time provided. This requires a three
+ * step process:
+ *
+ * 1) write the new init time to the source timer shadow registers
+ * 2) write the new init time to the PHY timer shadow registers
+ * 3) issue an init_time timer command to synchronously switch both the source
+ *    and port timers to the new init time value at the next clock cycle.
+ */
+int ice_ptp_init_time(struct ice_hw *hw, u64 time)
+{
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Source timers */
+	wr32(hw, GLTSYN_SHTIME_L(tmr_idx), lower_32_bits(time));
+	wr32(hw, GLTSYN_SHTIME_H(tmr_idx), upper_32_bits(time));
+	wr32(hw, GLTSYN_SHTIME_0(tmr_idx), 0);
+
+	/* PHY timers */
+	/* Fill Rx and Tx ports and send msg to PHY */
+	if (ice_is_e810(hw))
+		err = ice_ptp_prep_phy_time_e810(hw, time & 0xFFFFFFFF);
+	else
+		err = ice_ptp_prep_phy_time_e822(hw, time & 0xFFFFFFFF);
+	if (err)
+		return err;
+
+	return ice_ptp_tmr_cmd(hw, INIT_TIME);
+}
+
+/**
+ * ice_ptp_write_incval - Program PHC with new increment value
+ * @hw: pointer to HW struct
+ * @incval: Source timer increment value per clock cycle
+ *
+ * Program the PHC with a new increment value. This requires a three-step
+ * process:
+ *
+ * 1) Write the increment value to the source timer shadow registers
+ * 2) Write the increment value to the PHY timer shadow registers
+ * 3) Issue an INIT_INCVAL timer command to synchronously switch both the
+ *    source and port timers to the new increment value at the next clock
+ *    cycle.
+ */
+int ice_ptp_write_incval(struct ice_hw *hw, u64 incval)
+{
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Shadow Adjust */
+	wr32(hw, GLTSYN_SHADJ_L(tmr_idx), lower_32_bits(incval));
+	wr32(hw, GLTSYN_SHADJ_H(tmr_idx), upper_32_bits(incval));
+
+	if (ice_is_e810(hw))
+		err = ice_ptp_prep_phy_incval_e810(hw, incval);
+	else
+		err = ice_ptp_prep_phy_incval_e822(hw, incval);
+	if (err)
+		return err;
+
+	return ice_ptp_tmr_cmd(hw, INIT_INCVAL);
+}
+
+/**
+ * ice_ptp_write_incval_locked - Program new incval while holding semaphore
+ * @hw: pointer to HW struct
+ * @incval: Source timer increment value per clock cycle
+ *
+ * Program a new PHC incval while holding the PTP semaphore.
+ */
+int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval)
+{
+	int err;
+
+	if (!ice_ptp_lock(hw))
+		return -EBUSY;
+
+	err = ice_ptp_write_incval(hw, incval);
+
+	ice_ptp_unlock(hw);
+
+	return err;
+}
+
+/**
+ * ice_ptp_adj_clock - Adjust PHC clock time atomically
+ * @hw: pointer to HW struct
+ * @adj: Adjustment in nanoseconds
+ *
+ * Perform an atomic adjustment of the PHC time by the specified number of
+ * nanoseconds. This requires a three-step process:
+ *
+ * 1) Write the adjustment to the source timer shadow registers
+ * 2) Write the adjustment to the PHY timer shadow registers
+ * 3) Issue an ADJ_TIME timer command to synchronously apply the adjustment to
+ *    both the source and port timers at the next clock cycle.
+ */
+int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj)
+{
+	u8 tmr_idx;
+	int err;
+
+	tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Write the desired clock adjustment into the GLTSYN_SHADJ register.
+	 * For an ADJ_TIME command, this set of registers represents the value
+	 * to add to the clock time. It supports subtraction by interpreting
+	 * the value as a 2's complement integer.
+	 */
+	wr32(hw, GLTSYN_SHADJ_L(tmr_idx), 0);
+	wr32(hw, GLTSYN_SHADJ_H(tmr_idx), adj);
+
+	if (ice_is_e810(hw))
+		err = ice_ptp_prep_phy_adj_e810(hw, adj);
+	else
+		err = ice_ptp_prep_phy_adj_e822(hw, adj);
+	if (err)
+		return err;
+
+	return ice_ptp_tmr_cmd(hw, ADJ_TIME);
+}
+
+/**
+ * ice_read_phy_tstamp - Read a PHY timestamp from the timestamo block
+ * @hw: pointer to the HW struct
+ * @block: the block to read from
+ * @idx: the timestamp index to read
+ * @tstamp: on return, the 40bit timestamp value
+ *
+ * Read a 40bit timestamp value out of the timestamp block. For E822 devices,
+ * the block is the quad to read from. For E810 devices, the block is the
+ * logical port to read from.
+ */
+int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp)
+{
+	if (ice_is_e810(hw))
+		return ice_read_phy_tstamp_e810(hw, block, idx, tstamp);
+	else
+		return ice_read_phy_tstamp_e822(hw, block, idx, tstamp);
+}
+
+/**
+ * ice_clear_phy_tstamp - Clear a timestamp from the timestamp block
+ * @hw: pointer to the HW struct
+ * @block: the block to read from
+ * @idx: the timestamp index to reset
+ *
+ * Clear a timestamp, resetting its valid bit, from the timestamp block. For
+ * E822 devices, the block is the quad to clear from. For E810 devices, the
+ * block is the logical port to clear from.
+ */
+int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx)
+{
+	if (ice_is_e810(hw))
+		return ice_clear_phy_tstamp_e810(hw, block, idx);
+	else
+		return ice_clear_phy_tstamp_e822(hw, block, idx);
+}
+
+/**
+ * ice_ptp_reset_ts_memory - Reset timestamp memory for all blocks
+ * @hw: pointer to the HW struct
+ */
+void ice_ptp_reset_ts_memory(struct ice_hw *hw)
+{
+	if (ice_is_e810(hw))
+		return;
+
+	ice_ptp_reset_ts_memory_e822(hw);
+}
+
+/**
+ * ice_ptp_init_phc - Initialize PTP hardware clock
+ * @hw: pointer to the HW struct
+ *
+ * Perform the steps required to initialize the PTP hardware clock.
+ */
+int ice_ptp_init_phc(struct ice_hw *hw)
+{
+	u8 src_idx = hw->func_caps.ts_func_info.tmr_index_owned;
+
+	/* Enable source clocks */
+	wr32(hw, GLTSYN_ENA(src_idx), GLTSYN_ENA_TSYN_ENA_M);
+
+	/* Clear event err indications for auxiliary pins */
+	(void)rd32(hw, GLTSYN_STAT(src_idx));
+
+	if (ice_is_e810(hw))
+		return ice_ptp_init_phc_e810(hw);
+	else
+		return ice_ptp_init_phc_e822(hw);
+}
+
+/**
+ * ice_get_phy_tx_tstamp_ready - Read PHY Tx memory status indication
+ * @hw: pointer to the HW struct
+ * @block: the timestamp block to check
+ * @tstamp_ready: storage for the PHY Tx memory status information
+ *
+ * Check the PHY for Tx timestamp memory status. This reports a 64 bit value
+ * which indicates which timestamps in the block may be captured. A set bit
+ * means the timestamp can be read. An unset bit means the timestamp is not
+ * ready and software should avoid reading the register.
+ */
+int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready)
+{
+	if (ice_is_e810(hw))
+		return ice_get_phy_tx_tstamp_ready_e810(hw, block,
+							tstamp_ready);
+	else
+		return ice_get_phy_tx_tstamp_ready_e822(hw, block,
+							tstamp_ready);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
new file mode 100644
index 0000000000..9aa10b0426
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -0,0 +1,464 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_PTP_HW_H_
+#define _ICE_PTP_HW_H_
+
+enum ice_ptp_tmr_cmd {
+	INIT_TIME,
+	INIT_INCVAL,
+	ADJ_TIME,
+	ADJ_TIME_AT_TIME,
+	READ_TIME,
+	ICE_PTP_NOP,
+};
+
+enum ice_ptp_serdes {
+	ICE_PTP_SERDES_1G,
+	ICE_PTP_SERDES_10G,
+	ICE_PTP_SERDES_25G,
+	ICE_PTP_SERDES_40G,
+	ICE_PTP_SERDES_50G,
+	ICE_PTP_SERDES_100G
+};
+
+enum ice_ptp_link_spd {
+	ICE_PTP_LNK_SPD_1G,
+	ICE_PTP_LNK_SPD_10G,
+	ICE_PTP_LNK_SPD_25G,
+	ICE_PTP_LNK_SPD_25G_RS,
+	ICE_PTP_LNK_SPD_40G,
+	ICE_PTP_LNK_SPD_50G,
+	ICE_PTP_LNK_SPD_50G_RS,
+	ICE_PTP_LNK_SPD_100G_RS,
+	NUM_ICE_PTP_LNK_SPD /* Must be last */
+};
+
+enum ice_ptp_fec_mode {
+	ICE_PTP_FEC_MODE_NONE,
+	ICE_PTP_FEC_MODE_CLAUSE74,
+	ICE_PTP_FEC_MODE_RS_FEC
+};
+
+/**
+ * struct ice_time_ref_info_e822
+ * @pll_freq: Frequency of PLL that drives timer ticks in Hz
+ * @nominal_incval: increment to generate nanoseconds in GLTSYN_TIME_L
+ * @pps_delay: propagation delay of the PPS output signal
+ *
+ * Characteristic information for the various TIME_REF sources possible in the
+ * E822 devices
+ */
+struct ice_time_ref_info_e822 {
+	u64 pll_freq;
+	u64 nominal_incval;
+	u8 pps_delay;
+};
+
+/**
+ * struct ice_vernier_info_e822
+ * @tx_par_clk: Frequency used to calculate P_REG_PAR_TX_TUS
+ * @rx_par_clk: Frequency used to calculate P_REG_PAR_RX_TUS
+ * @tx_pcs_clk: Frequency used to calculate P_REG_PCS_TX_TUS
+ * @rx_pcs_clk: Frequency used to calculate P_REG_PCS_RX_TUS
+ * @tx_desk_rsgb_par: Frequency used to calculate P_REG_DESK_PAR_TX_TUS
+ * @rx_desk_rsgb_par: Frequency used to calculate P_REG_DESK_PAR_RX_TUS
+ * @tx_desk_rsgb_pcs: Frequency used to calculate P_REG_DESK_PCS_TX_TUS
+ * @rx_desk_rsgb_pcs: Frequency used to calculate P_REG_DESK_PCS_RX_TUS
+ * @tx_fixed_delay: Fixed Tx latency measured in 1/100th nanoseconds
+ * @pmd_adj_divisor: Divisor used to calculate PDM alignment adjustment
+ * @rx_fixed_delay: Fixed Rx latency measured in 1/100th nanoseconds
+ *
+ * Table of constants used during as part of the Vernier calibration of the Tx
+ * and Rx timestamps. This includes frequency values used to compute TUs per
+ * PAR/PCS clock cycle, and static delay values measured during hardware
+ * design.
+ *
+ * Note that some values are not used for all link speeds, and the
+ * P_REG_DESK_PAR* registers may represent different clock markers at
+ * different link speeds, either the deskew marker for multi-lane link speeds
+ * or the Reed Solomon gearbox marker for RS-FEC.
+ */
+struct ice_vernier_info_e822 {
+	u32 tx_par_clk;
+	u32 rx_par_clk;
+	u32 tx_pcs_clk;
+	u32 rx_pcs_clk;
+	u32 tx_desk_rsgb_par;
+	u32 rx_desk_rsgb_par;
+	u32 tx_desk_rsgb_pcs;
+	u32 rx_desk_rsgb_pcs;
+	u32 tx_fixed_delay;
+	u32 pmd_adj_divisor;
+	u32 rx_fixed_delay;
+};
+
+/**
+ * struct ice_cgu_pll_params_e822
+ * @refclk_pre_div: Reference clock pre-divisor
+ * @feedback_div: Feedback divisor
+ * @frac_n_div: Fractional divisor
+ * @post_pll_div: Post PLL divisor
+ *
+ * Clock Generation Unit parameters used to program the PLL based on the
+ * selected TIME_REF frequency.
+ */
+struct ice_cgu_pll_params_e822 {
+	u32 refclk_pre_div;
+	u32 feedback_div;
+	u32 frac_n_div;
+	u32 post_pll_div;
+};
+
+extern const struct
+ice_cgu_pll_params_e822 e822_cgu_params[NUM_ICE_TIME_REF_FREQ];
+
+#define E810C_QSFP_C827_0_HANDLE 2
+#define E810C_QSFP_C827_1_HANDLE 3
+
+/* Table of constants related to possible TIME_REF sources */
+extern const struct ice_time_ref_info_e822 e822_time_ref[NUM_ICE_TIME_REF_FREQ];
+
+/* Table of constants for Vernier calibration on E822 */
+extern const struct ice_vernier_info_e822 e822_vernier[NUM_ICE_PTP_LNK_SPD];
+
+/* Increment value to generate nanoseconds in the GLTSYN_TIME_L register for
+ * the E810 devices. Based off of a PLL with an 812.5 MHz frequency.
+ */
+#define ICE_PTP_NOMINAL_INCVAL_E810 0x13b13b13bULL
+
+/* Device agnostic functions */
+u8 ice_get_ptp_src_clock_index(struct ice_hw *hw);
+bool ice_ptp_lock(struct ice_hw *hw);
+void ice_ptp_unlock(struct ice_hw *hw);
+int ice_ptp_init_time(struct ice_hw *hw, u64 time);
+int ice_ptp_write_incval(struct ice_hw *hw, u64 incval);
+int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval);
+int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj);
+int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp);
+int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx);
+void ice_ptp_reset_ts_memory(struct ice_hw *hw);
+int ice_ptp_init_phc(struct ice_hw *hw);
+int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready);
+
+/* E822 family functions */
+int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val);
+int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val);
+void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad);
+
+/**
+ * ice_e822_time_ref - Get the current TIME_REF from capabilities
+ * @hw: pointer to the HW structure
+ *
+ * Returns the current TIME_REF from the capabilities structure.
+ */
+static inline enum ice_time_ref_freq ice_e822_time_ref(struct ice_hw *hw)
+{
+	return hw->func_caps.ts_func_info.time_ref;
+}
+
+/**
+ * ice_set_e822_time_ref - Set new TIME_REF
+ * @hw: pointer to the HW structure
+ * @time_ref: new TIME_REF to set
+ *
+ * Update the TIME_REF in the capabilities structure in response to some
+ * change, such as an update to the CGU registers.
+ */
+static inline void
+ice_set_e822_time_ref(struct ice_hw *hw, enum ice_time_ref_freq time_ref)
+{
+	hw->func_caps.ts_func_info.time_ref = time_ref;
+}
+
+static inline u64 ice_e822_pll_freq(enum ice_time_ref_freq time_ref)
+{
+	return e822_time_ref[time_ref].pll_freq;
+}
+
+static inline u64 ice_e822_nominal_incval(enum ice_time_ref_freq time_ref)
+{
+	return e822_time_ref[time_ref].nominal_incval;
+}
+
+static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref)
+{
+	return e822_time_ref[time_ref].pps_delay;
+}
+
+/* E822 Vernier calibration functions */
+int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset);
+int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port);
+int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port);
+int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port);
+
+/* E810 family functions */
+int ice_ptp_init_phy_e810(struct ice_hw *hw);
+int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data);
+int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data);
+int ice_read_pca9575_reg_e810t(struct ice_hw *hw, u8 offset, u8 *data);
+
+#define PFTSYN_SEM_BYTES	4
+
+#define ICE_PTP_CLOCK_INDEX_0	0x00
+#define ICE_PTP_CLOCK_INDEX_1	0x01
+
+/* PHY timer commands */
+#define SEL_CPK_SRC	8
+#define SEL_PHY_SRC	3
+
+/* Time Sync command Definitions */
+#define GLTSYN_CMD_INIT_TIME		BIT(0)
+#define GLTSYN_CMD_INIT_INCVAL		BIT(1)
+#define GLTSYN_CMD_INIT_TIME_INCVAL	(BIT(0) | BIT(1))
+#define GLTSYN_CMD_ADJ_TIME		BIT(2)
+#define GLTSYN_CMD_ADJ_INIT_TIME	(BIT(2) | BIT(3))
+#define GLTSYN_CMD_READ_TIME		BIT(7)
+
+/* PHY port Time Sync command definitions */
+#define PHY_CMD_INIT_TIME		BIT(0)
+#define PHY_CMD_INIT_INCVAL		BIT(1)
+#define PHY_CMD_ADJ_TIME		(BIT(0) | BIT(1))
+#define PHY_CMD_ADJ_TIME_AT_TIME	(BIT(0) | BIT(2))
+#define PHY_CMD_READ_TIME		(BIT(0) | BIT(1) | BIT(2))
+
+#define TS_CMD_MASK_E810		0xFF
+#define TS_CMD_MASK			0xF
+#define SYNC_EXEC_CMD			0x3
+
+/* Macros to derive port low and high addresses on both quads */
+#define P_Q0_L(a, p) ((((a) + (0x2000 * (p)))) & 0xFFFF)
+#define P_Q0_H(a, p) ((((a) + (0x2000 * (p)))) >> 16)
+#define P_Q1_L(a, p) ((((a) - (0x2000 * ((p) - ICE_PORTS_PER_QUAD)))) & 0xFFFF)
+#define P_Q1_H(a, p) ((((a) - (0x2000 * ((p) - ICE_PORTS_PER_QUAD)))) >> 16)
+
+/* PHY QUAD register base addresses */
+#define Q_0_BASE			0x94000
+#define Q_1_BASE			0x114000
+
+/* Timestamp memory reset registers */
+#define Q_REG_TS_CTRL			0x618
+#define Q_REG_TS_CTRL_S			0
+#define Q_REG_TS_CTRL_M			BIT(0)
+
+/* Timestamp availability status registers */
+#define Q_REG_TX_MEMORY_STATUS_L	0xCF0
+#define Q_REG_TX_MEMORY_STATUS_U	0xCF4
+
+/* Tx FIFO status registers */
+#define Q_REG_FIFO23_STATUS		0xCF8
+#define Q_REG_FIFO01_STATUS		0xCFC
+#define Q_REG_FIFO02_S			0
+#define Q_REG_FIFO02_M			ICE_M(0x3FF, 0)
+#define Q_REG_FIFO13_S			10
+#define Q_REG_FIFO13_M			ICE_M(0x3FF, 10)
+
+/* Interrupt control Config registers */
+#define Q_REG_TX_MEM_GBL_CFG		0xC08
+#define Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_S	0
+#define Q_REG_TX_MEM_GBL_CFG_LANE_TYPE_M	BIT(0)
+#define Q_REG_TX_MEM_GBL_CFG_TX_TYPE_S	1
+#define Q_REG_TX_MEM_GBL_CFG_TX_TYPE_M	ICE_M(0xFF, 1)
+#define Q_REG_TX_MEM_GBL_CFG_INTR_THR_S	9
+#define Q_REG_TX_MEM_GBL_CFG_INTR_THR_M ICE_M(0x3F, 9)
+#define Q_REG_TX_MEM_GBL_CFG_INTR_ENA_S	15
+#define Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M	BIT(15)
+
+/* Tx Timestamp data registers */
+#define Q_REG_TX_MEMORY_BANK_START	0xA00
+
+/* PHY port register base addresses */
+#define P_0_BASE			0x80000
+#define P_4_BASE			0x106000
+
+/* Timestamp init registers */
+#define P_REG_RX_TIMER_INC_PRE_L	0x46C
+#define P_REG_RX_TIMER_INC_PRE_U	0x470
+#define P_REG_TX_TIMER_INC_PRE_L	0x44C
+#define P_REG_TX_TIMER_INC_PRE_U	0x450
+
+/* Timestamp match and adjust target registers */
+#define P_REG_RX_TIMER_CNT_ADJ_L	0x474
+#define P_REG_RX_TIMER_CNT_ADJ_U	0x478
+#define P_REG_TX_TIMER_CNT_ADJ_L	0x454
+#define P_REG_TX_TIMER_CNT_ADJ_U	0x458
+
+/* Timestamp capture registers */
+#define P_REG_RX_CAPTURE_L		0x4D8
+#define P_REG_RX_CAPTURE_U		0x4DC
+#define P_REG_TX_CAPTURE_L		0x4B4
+#define P_REG_TX_CAPTURE_U		0x4B8
+
+/* Timestamp PHY incval registers */
+#define P_REG_TIMETUS_L			0x410
+#define P_REG_TIMETUS_U			0x414
+
+#define P_REG_40B_LOW_M			0xFF
+#define P_REG_40B_HIGH_S		8
+
+/* PHY window length registers */
+#define P_REG_WL			0x40C
+
+#define PTP_VERNIER_WL			0x111ed
+
+/* PHY start registers */
+#define P_REG_PS			0x408
+#define P_REG_PS_START_S		0
+#define P_REG_PS_START_M		BIT(0)
+#define P_REG_PS_BYPASS_MODE_S		1
+#define P_REG_PS_BYPASS_MODE_M		BIT(1)
+#define P_REG_PS_ENA_CLK_S		2
+#define P_REG_PS_ENA_CLK_M		BIT(2)
+#define P_REG_PS_LOAD_OFFSET_S		3
+#define P_REG_PS_LOAD_OFFSET_M		BIT(3)
+#define P_REG_PS_SFT_RESET_S		11
+#define P_REG_PS_SFT_RESET_M		BIT(11)
+
+/* PHY offset valid registers */
+#define P_REG_TX_OV_STATUS		0x4D4
+#define P_REG_TX_OV_STATUS_OV_S		0
+#define P_REG_TX_OV_STATUS_OV_M		BIT(0)
+#define P_REG_RX_OV_STATUS		0x4F8
+#define P_REG_RX_OV_STATUS_OV_S		0
+#define P_REG_RX_OV_STATUS_OV_M		BIT(0)
+
+/* PHY offset ready registers */
+#define P_REG_TX_OR			0x45C
+#define P_REG_RX_OR			0x47C
+
+/* PHY total offset registers */
+#define P_REG_TOTAL_RX_OFFSET_L		0x460
+#define P_REG_TOTAL_RX_OFFSET_U		0x464
+#define P_REG_TOTAL_TX_OFFSET_L		0x440
+#define P_REG_TOTAL_TX_OFFSET_U		0x444
+
+/* Timestamp PAR/PCS registers */
+#define P_REG_UIX66_10G_40G_L		0x480
+#define P_REG_UIX66_10G_40G_U		0x484
+#define P_REG_UIX66_25G_100G_L		0x488
+#define P_REG_UIX66_25G_100G_U		0x48C
+#define P_REG_DESK_PAR_RX_TUS_L		0x490
+#define P_REG_DESK_PAR_RX_TUS_U		0x494
+#define P_REG_DESK_PAR_TX_TUS_L		0x498
+#define P_REG_DESK_PAR_TX_TUS_U		0x49C
+#define P_REG_DESK_PCS_RX_TUS_L		0x4A0
+#define P_REG_DESK_PCS_RX_TUS_U		0x4A4
+#define P_REG_DESK_PCS_TX_TUS_L		0x4A8
+#define P_REG_DESK_PCS_TX_TUS_U		0x4AC
+#define P_REG_PAR_RX_TUS_L		0x420
+#define P_REG_PAR_RX_TUS_U		0x424
+#define P_REG_PAR_TX_TUS_L		0x428
+#define P_REG_PAR_TX_TUS_U		0x42C
+#define P_REG_PCS_RX_TUS_L		0x430
+#define P_REG_PCS_RX_TUS_U		0x434
+#define P_REG_PCS_TX_TUS_L		0x438
+#define P_REG_PCS_TX_TUS_U		0x43C
+#define P_REG_PAR_RX_TIME_L		0x4F0
+#define P_REG_PAR_RX_TIME_U		0x4F4
+#define P_REG_PAR_TX_TIME_L		0x4CC
+#define P_REG_PAR_TX_TIME_U		0x4D0
+#define P_REG_PAR_PCS_RX_OFFSET_L	0x4E8
+#define P_REG_PAR_PCS_RX_OFFSET_U	0x4EC
+#define P_REG_PAR_PCS_TX_OFFSET_L	0x4C4
+#define P_REG_PAR_PCS_TX_OFFSET_U	0x4C8
+#define P_REG_LINK_SPEED		0x4FC
+#define P_REG_LINK_SPEED_SERDES_S	0
+#define P_REG_LINK_SPEED_SERDES_M	ICE_M(0x7, 0)
+#define P_REG_LINK_SPEED_FEC_MODE_S	3
+#define P_REG_LINK_SPEED_FEC_MODE_M	ICE_M(0x3, 3)
+#define P_REG_LINK_SPEED_FEC_MODE(reg)			\
+	(((reg) & P_REG_LINK_SPEED_FEC_MODE_M) >>	\
+	 P_REG_LINK_SPEED_FEC_MODE_S)
+
+/* PHY timestamp related registers */
+#define P_REG_PMD_ALIGNMENT		0x0FC
+#define P_REG_RX_80_TO_160_CNT		0x6FC
+#define P_REG_RX_80_TO_160_CNT_RXCYC_S	0
+#define P_REG_RX_80_TO_160_CNT_RXCYC_M	BIT(0)
+#define P_REG_RX_40_TO_160_CNT		0x8FC
+#define P_REG_RX_40_TO_160_CNT_RXCYC_S	0
+#define P_REG_RX_40_TO_160_CNT_RXCYC_M	ICE_M(0x3, 0)
+
+/* Rx FIFO status registers */
+#define P_REG_RX_OV_FS			0x4F8
+#define P_REG_RX_OV_FS_FIFO_STATUS_S	2
+#define P_REG_RX_OV_FS_FIFO_STATUS_M	ICE_M(0x3FF, 2)
+
+/* Timestamp command registers */
+#define P_REG_TX_TMR_CMD		0x448
+#define P_REG_RX_TMR_CMD		0x468
+
+/* E810 timesync enable register */
+#define ETH_GLTSYN_ENA(_i)		(0x03000348 + ((_i) * 4))
+
+/* E810 shadow init time registers */
+#define ETH_GLTSYN_SHTIME_0(i)		(0x03000368 + ((i) * 32))
+#define ETH_GLTSYN_SHTIME_L(i)		(0x0300036C + ((i) * 32))
+
+/* E810 shadow time adjust registers */
+#define ETH_GLTSYN_SHADJ_L(_i)		(0x03000378 + ((_i) * 32))
+#define ETH_GLTSYN_SHADJ_H(_i)		(0x0300037C + ((_i) * 32))
+
+/* E810 timer command register */
+#define ETH_GLTSYN_CMD			0x03000344
+
+/* Source timer incval macros */
+#define INCVAL_HIGH_M			0xFF
+
+/* Timestamp block macros */
+#define TS_VALID			BIT(0)
+#define TS_LOW_M			0xFFFFFFFF
+#define TS_HIGH_M			0xFF
+#define TS_HIGH_S			32
+
+#define TS_PHY_LOW_M			0xFF
+#define TS_PHY_HIGH_M			0xFFFFFFFF
+#define TS_PHY_HIGH_S			8
+
+#define BYTES_PER_IDX_ADDR_L_U		8
+#define BYTES_PER_IDX_ADDR_L		4
+
+/* Tx timestamp low latency read definitions */
+#define TS_LL_READ_RETRIES		200
+#define TS_LL_READ_TS_HIGH		GENMASK(23, 16)
+#define TS_LL_READ_TS_IDX		GENMASK(29, 24)
+#define TS_LL_READ_TS			BIT(31)
+
+/* Internal PHY timestamp address */
+#define TS_L(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U))
+#define TS_H(a, idx) ((a) + ((idx) * BYTES_PER_IDX_ADDR_L_U +		\
+			     BYTES_PER_IDX_ADDR_L))
+
+/* External PHY timestamp address */
+#define TS_EXT(a, port, idx) ((a) + (0x1000 * (port)) +			\
+				 ((idx) * BYTES_PER_IDX_ADDR_L_U))
+
+#define LOW_TX_MEMORY_BANK_START	0x03090000
+#define HIGH_TX_MEMORY_BANK_START	0x03090004
+
+/* E810T SMA controller pin control */
+#define ICE_SMA1_DIR_EN_E810T		BIT(4)
+#define ICE_SMA1_TX_EN_E810T		BIT(5)
+#define ICE_SMA2_UFL2_RX_DIS_E810T	BIT(3)
+#define ICE_SMA2_DIR_EN_E810T		BIT(6)
+#define ICE_SMA2_TX_EN_E810T		BIT(7)
+
+#define ICE_SMA1_MASK_E810T	(ICE_SMA1_DIR_EN_E810T | \
+				 ICE_SMA1_TX_EN_E810T)
+#define ICE_SMA2_MASK_E810T	(ICE_SMA2_UFL2_RX_DIS_E810T | \
+				 ICE_SMA2_DIR_EN_E810T | \
+				 ICE_SMA2_TX_EN_E810T)
+#define ICE_ALL_SMA_MASK_E810T	(ICE_SMA1_MASK_E810T | \
+				 ICE_SMA2_MASK_E810T)
+
+#define ICE_SMA_MIN_BIT_E810T	3
+#define ICE_SMA_MAX_BIT_E810T	7
+#define ICE_PCA9575_P1_OFFSET	8
+
+/* E810T PCA9575 IO controller registers */
+#define ICE_PCA9575_P0_IN	0x0
+
+/* E810T PCA9575 IO controller pin control */
+#define ICE_E810T_P0_GNSS_PRSNT_N	BIT(4)
+
+#endif /* _ICE_PTP_HW_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
new file mode 100644
index 0000000000..c686ac0935
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_eswitch.h"
+#include "ice_devlink.h"
+#include "ice_sriov.h"
+#include "ice_tc_lib.h"
+#include "ice_dcb_lib.h"
+
+/**
+ * ice_repr_get_sw_port_id - get port ID associated with representor
+ * @repr: pointer to port representor
+ */
+static int ice_repr_get_sw_port_id(struct ice_repr *repr)
+{
+	return repr->vf->pf->hw.port_info->lport;
+}
+
+/**
+ * ice_repr_get_phys_port_name - get phys port name
+ * @netdev: pointer to port representor netdev
+ * @buf: write here port name
+ * @len: max length of buf
+ */
+static int
+ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_repr *repr = np->repr;
+	int res;
+
+	/* Devlink port is registered and devlink core is taking care of name formatting. */
+	if (repr->vf->devlink_port.devlink)
+		return -EOPNOTSUPP;
+
+	res = snprintf(buf, len, "pf%dvfr%d", ice_repr_get_sw_port_id(repr),
+		       repr->vf->vf_id);
+	if (res <= 0)
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+/**
+ * ice_repr_get_stats64 - get VF stats for VFPR use
+ * @netdev: pointer to port representor netdev
+ * @stats: pointer to struct where stats can be stored
+ */
+static void
+ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_eth_stats *eth_stats;
+	struct ice_vsi *vsi;
+
+	if (ice_is_vf_disabled(np->repr->vf))
+		return;
+	vsi = np->repr->src_vsi;
+
+	ice_update_vsi_stats(vsi);
+	eth_stats = &vsi->eth_stats;
+
+	stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast +
+			    eth_stats->tx_multicast;
+	stats->rx_packets = eth_stats->rx_unicast + eth_stats->rx_broadcast +
+			    eth_stats->rx_multicast;
+	stats->tx_bytes = eth_stats->tx_bytes;
+	stats->rx_bytes = eth_stats->rx_bytes;
+	stats->multicast = eth_stats->rx_multicast;
+	stats->tx_errors = eth_stats->tx_errors;
+	stats->tx_dropped = eth_stats->tx_discards;
+	stats->rx_dropped = eth_stats->rx_discards;
+}
+
+/**
+ * ice_netdev_to_repr - Get port representor for given netdevice
+ * @netdev: pointer to port representor netdev
+ */
+struct ice_repr *ice_netdev_to_repr(struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	return np->repr;
+}
+
+/**
+ * ice_repr_open - Enable port representor's network interface
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a port representor's network
+ * interface is made active by the system (IFF_UP). Corresponding
+ * VF is notified about link status change.
+ *
+ * Returns 0 on success
+ */
+static int ice_repr_open(struct net_device *netdev)
+{
+	struct ice_repr *repr = ice_netdev_to_repr(netdev);
+	struct ice_vf *vf;
+
+	vf = repr->vf;
+	vf->link_forced = true;
+	vf->link_up = true;
+	ice_vc_notify_vf_link_state(vf);
+
+	netif_carrier_on(netdev);
+	netif_tx_start_all_queues(netdev);
+
+	return 0;
+}
+
+/**
+ * ice_repr_stop - Disable port representor's network interface
+ * @netdev: network interface device structure
+ *
+ * The stop entry point is called when a port representor's network
+ * interface is de-activated by the system. Corresponding
+ * VF is notified about link status change.
+ *
+ * Returns 0 on success
+ */
+static int ice_repr_stop(struct net_device *netdev)
+{
+	struct ice_repr *repr = ice_netdev_to_repr(netdev);
+	struct ice_vf *vf;
+
+	vf = repr->vf;
+	vf->link_forced = true;
+	vf->link_up = false;
+	ice_vc_notify_vf_link_state(vf);
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	return 0;
+}
+
+/**
+ * ice_repr_sp_stats64 - get slow path stats for port representor
+ * @dev: network interface device structure
+ * @stats: netlink stats structure
+ *
+ * RX/TX stats are being swapped here to be consistent with VF stats. In slow
+ * path, port representor receives data when the corresponding VF is sending it
+ * (and vice versa), TX and RX bytes/packets are effectively swapped on port
+ * representor.
+ */
+static int
+ice_repr_sp_stats64(const struct net_device *dev,
+		    struct rtnl_link_stats64 *stats)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	int vf_id = np->repr->vf->vf_id;
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	u64 pkts, bytes;
+
+	tx_ring = np->vsi->tx_rings[vf_id];
+	ice_fetch_u64_stats_per_ring(&tx_ring->ring_stats->syncp,
+				     tx_ring->ring_stats->stats,
+				     &pkts, &bytes);
+	stats->rx_packets = pkts;
+	stats->rx_bytes = bytes;
+
+	rx_ring = np->vsi->rx_rings[vf_id];
+	ice_fetch_u64_stats_per_ring(&rx_ring->ring_stats->syncp,
+				     rx_ring->ring_stats->stats,
+				     &pkts, &bytes);
+	stats->tx_packets = pkts;
+	stats->tx_bytes = bytes;
+	stats->tx_dropped = rx_ring->ring_stats->rx_stats.alloc_page_failed +
+			    rx_ring->ring_stats->rx_stats.alloc_buf_failed;
+
+	return 0;
+}
+
+static bool
+ice_repr_ndo_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+	return attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT;
+}
+
+static int
+ice_repr_ndo_get_offload_stats(int attr_id, const struct net_device *dev,
+			       void *sp)
+{
+	if (attr_id == IFLA_OFFLOAD_XSTATS_CPU_HIT)
+		return ice_repr_sp_stats64(dev, (struct rtnl_link_stats64 *)sp);
+
+	return -EINVAL;
+}
+
+static int
+ice_repr_setup_tc_cls_flower(struct ice_repr *repr,
+			     struct flow_cls_offload *flower)
+{
+	switch (flower->command) {
+	case FLOW_CLS_REPLACE:
+		return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower);
+	case FLOW_CLS_DESTROY:
+		return ice_del_cls_flower(repr->src_vsi, flower);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+ice_repr_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+			   void *cb_priv)
+{
+	struct flow_cls_offload *flower = (struct flow_cls_offload *)type_data;
+	struct ice_netdev_priv *np = (struct ice_netdev_priv *)cb_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return ice_repr_setup_tc_cls_flower(np->repr, flower);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(ice_repr_block_cb_list);
+
+static int
+ice_repr_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+		  void *type_data)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple((struct flow_block_offload *)
+						  type_data,
+						  &ice_repr_block_cb_list,
+						  ice_repr_setup_tc_block_cb,
+						  np, np, true);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct net_device_ops ice_repr_netdev_ops = {
+	.ndo_get_phys_port_name = ice_repr_get_phys_port_name,
+	.ndo_get_stats64 = ice_repr_get_stats64,
+	.ndo_open = ice_repr_open,
+	.ndo_stop = ice_repr_stop,
+	.ndo_start_xmit = ice_eswitch_port_start_xmit,
+	.ndo_setup_tc = ice_repr_setup_tc,
+	.ndo_has_offload_stats = ice_repr_ndo_has_offload_stats,
+	.ndo_get_offload_stats = ice_repr_ndo_get_offload_stats,
+};
+
+/**
+ * ice_is_port_repr_netdev - Check if a given netdevice is a port representor netdev
+ * @netdev: pointer to netdev
+ */
+bool ice_is_port_repr_netdev(const struct net_device *netdev)
+{
+	return netdev && (netdev->netdev_ops == &ice_repr_netdev_ops);
+}
+
+/**
+ * ice_repr_reg_netdev - register port representor netdev
+ * @netdev: pointer to port representor netdev
+ */
+static int
+ice_repr_reg_netdev(struct net_device *netdev)
+{
+	eth_hw_addr_random(netdev);
+	netdev->netdev_ops = &ice_repr_netdev_ops;
+	ice_set_ethtool_repr_ops(netdev);
+
+	netdev->hw_features |= NETIF_F_HW_TC;
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	return register_netdev(netdev);
+}
+
+/**
+ * ice_repr_add - add representor for VF
+ * @vf: pointer to VF structure
+ */
+static int ice_repr_add(struct ice_vf *vf)
+{
+	struct ice_q_vector *q_vector;
+	struct ice_netdev_priv *np;
+	struct ice_repr *repr;
+	struct ice_vsi *vsi;
+	int err;
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi)
+		return -EINVAL;
+
+	repr = kzalloc(sizeof(*repr), GFP_KERNEL);
+	if (!repr)
+		return -ENOMEM;
+
+	repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv));
+	if (!repr->netdev) {
+		err =  -ENOMEM;
+		goto err_alloc;
+	}
+
+	repr->src_vsi = vsi;
+	repr->vf = vf;
+	vf->repr = repr;
+	np = netdev_priv(repr->netdev);
+	np->repr = repr;
+
+	q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL);
+	if (!q_vector) {
+		err = -ENOMEM;
+		goto err_alloc_q_vector;
+	}
+	repr->q_vector = q_vector;
+
+	err = ice_devlink_create_vf_port(vf);
+	if (err)
+		goto err_devlink;
+
+	repr->netdev->min_mtu = ETH_MIN_MTU;
+	repr->netdev->max_mtu = ICE_MAX_MTU;
+
+	SET_NETDEV_DEV(repr->netdev, ice_pf_to_dev(vf->pf));
+	SET_NETDEV_DEVLINK_PORT(repr->netdev, &vf->devlink_port);
+	err = ice_repr_reg_netdev(repr->netdev);
+	if (err)
+		goto err_netdev;
+
+	ice_virtchnl_set_repr_ops(vf);
+
+	return 0;
+
+err_netdev:
+	ice_devlink_destroy_vf_port(vf);
+err_devlink:
+	kfree(repr->q_vector);
+	vf->repr->q_vector = NULL;
+err_alloc_q_vector:
+	free_netdev(repr->netdev);
+	repr->netdev = NULL;
+err_alloc:
+	kfree(repr);
+	vf->repr = NULL;
+	return err;
+}
+
+/**
+ * ice_repr_rem - remove representor from VF
+ * @vf: pointer to VF structure
+ */
+static void ice_repr_rem(struct ice_vf *vf)
+{
+	if (!vf->repr)
+		return;
+
+	kfree(vf->repr->q_vector);
+	vf->repr->q_vector = NULL;
+	unregister_netdev(vf->repr->netdev);
+	ice_devlink_destroy_vf_port(vf);
+	free_netdev(vf->repr->netdev);
+	vf->repr->netdev = NULL;
+	kfree(vf->repr);
+	vf->repr = NULL;
+
+	ice_virtchnl_set_dflt_ops(vf);
+}
+
+/**
+ * ice_repr_rem_from_all_vfs - remove port representor for all VFs
+ * @pf: pointer to PF structure
+ */
+void ice_repr_rem_from_all_vfs(struct ice_pf *pf)
+{
+	struct devlink *devlink;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	ice_for_each_vf(pf, bkt, vf)
+		ice_repr_rem(vf);
+
+	/* since all port representors are destroyed, there is
+	 * no point in keeping the nodes
+	 */
+	devlink = priv_to_devlink(pf);
+	devl_lock(devlink);
+	devl_rate_nodes_destroy(devlink);
+	devl_unlock(devlink);
+}
+
+/**
+ * ice_repr_add_for_all_vfs - add port representor for all VFs
+ * @pf: pointer to PF structure
+ */
+int ice_repr_add_for_all_vfs(struct ice_pf *pf)
+{
+	struct devlink *devlink;
+	struct ice_vf *vf;
+	unsigned int bkt;
+	int err;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	ice_for_each_vf(pf, bkt, vf) {
+		err = ice_repr_add(vf);
+		if (err)
+			goto err;
+	}
+
+	/* only export if ADQ and DCB disabled */
+	if (ice_is_adq_active(pf) || ice_is_dcb_active(pf))
+		return 0;
+
+	devlink = priv_to_devlink(pf);
+	ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf));
+
+	return 0;
+
+err:
+	ice_repr_rem_from_all_vfs(pf);
+
+	return err;
+}
+
+/**
+ * ice_repr_start_tx_queues - start Tx queues of port representor
+ * @repr: pointer to repr structure
+ */
+void ice_repr_start_tx_queues(struct ice_repr *repr)
+{
+	netif_carrier_on(repr->netdev);
+	netif_tx_start_all_queues(repr->netdev);
+}
+
+/**
+ * ice_repr_stop_tx_queues - stop Tx queues of port representor
+ * @repr: pointer to repr structure
+ */
+void ice_repr_stop_tx_queues(struct ice_repr *repr)
+{
+	netif_carrier_off(repr->netdev);
+	netif_tx_stop_all_queues(repr->netdev);
+}
+
+/**
+ * ice_repr_set_traffic_vsi - set traffic VSI for port representor
+ * @repr: repr on with VSI will be set
+ * @vsi: pointer to VSI that will be used by port representor to pass traffic
+ */
+void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi)
+{
+	struct ice_netdev_priv *np = netdev_priv(repr->netdev);
+
+	np->vsi = vsi;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h
new file mode 100644
index 0000000000..e1ee2d2c1d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_repr.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_REPR_H_
+#define _ICE_REPR_H_
+
+#include <net/dst_metadata.h>
+
+struct ice_repr {
+	struct ice_vsi *src_vsi;
+	struct ice_vf *vf;
+	struct ice_q_vector *q_vector;
+	struct net_device *netdev;
+	struct metadata_dst *dst;
+	struct ice_esw_br_port *br_port;
+#ifdef CONFIG_ICE_SWITCHDEV
+	/* info about slow path rule */
+	struct ice_rule_query_data sp_rule;
+#endif
+};
+
+int ice_repr_add_for_all_vfs(struct ice_pf *pf);
+void ice_repr_rem_from_all_vfs(struct ice_pf *pf);
+
+void ice_repr_start_tx_queues(struct ice_repr *repr);
+void ice_repr_stop_tx_queues(struct ice_repr *repr);
+
+void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi);
+
+struct ice_repr *ice_netdev_to_repr(struct net_device *netdev);
+bool ice_is_port_repr_netdev(const struct net_device *netdev);
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
new file mode 100644
index 0000000000..ead75fe2bc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_SBQ_CMD_H_
+#define _ICE_SBQ_CMD_H_
+
+/* This header file defines the Sideband Queue commands, error codes and
+ * descriptor format. It is shared between Firmware and Software.
+ */
+
+/* Sideband Queue command structure and opcodes */
+enum ice_sbq_opc {
+	/* Sideband Queue commands */
+	ice_sbq_opc_neigh_dev_req			= 0x0C00,
+	ice_sbq_opc_neigh_dev_ev			= 0x0C01
+};
+
+/* Sideband Queue descriptor. Indirect command
+ * and non posted
+ */
+struct ice_sbq_cmd_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 cmd_retval;
+
+	/* Opaque message data */
+	__le32 cookie_high;
+	__le32 cookie_low;
+
+	union {
+		__le16 cmd_len;
+		__le16 cmpl_len;
+	} param0;
+
+	u8 reserved[6];
+	__le32 addr_high;
+	__le32 addr_low;
+};
+
+struct ice_sbq_evt_desc {
+	__le16 flags;
+	__le16 opcode;
+	__le16 datalen;
+	__le16 cmd_retval;
+	u8 data[24];
+};
+
+enum ice_sbq_msg_dev {
+	rmn_0	= 0x02,
+	rmn_1	= 0x03,
+	rmn_2	= 0x04,
+	cgu	= 0x06
+};
+
+enum ice_sbq_msg_opcode {
+	ice_sbq_msg_rd	= 0x00,
+	ice_sbq_msg_wr	= 0x01
+};
+
+#define ICE_SBQ_MSG_FLAGS	0x40
+#define ICE_SBQ_MSG_SBE_FBE	0x0F
+
+struct ice_sbq_msg_req {
+	u8 dest_dev;
+	u8 src_dev;
+	u8 opcode;
+	u8 flags;
+	u8 sbe_fbe;
+	u8 func_id;
+	__le16 msg_addr_low;
+	__le32 msg_addr_high;
+	__le32 data;
+};
+
+struct ice_sbq_msg_cmpl {
+	u8 dest_dev;
+	u8 src_dev;
+	u8 opcode;
+	u8 flags;
+	__le32 data;
+};
+
+/* Internal struct */
+struct ice_sbq_msg_input {
+	u8 dest_dev;
+	u8 opcode;
+	u16 msg_addr_low;
+	u32 msg_addr_high;
+	u32 data;
+};
+#endif /* _ICE_SBQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
new file mode 100644
index 0000000000..c0533d7b66
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -0,0 +1,4354 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include <net/devlink.h>
+#include "ice_sched.h"
+
+/**
+ * ice_sched_add_root_node - Insert the Tx scheduler root node in SW DB
+ * @pi: port information structure
+ * @info: Scheduler element information from firmware
+ *
+ * This function inserts the root node of the scheduling tree topology
+ * to the SW DB.
+ */
+static int
+ice_sched_add_root_node(struct ice_port_info *pi,
+			struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_sched_node *root;
+	struct ice_hw *hw;
+
+	if (!pi)
+		return -EINVAL;
+
+	hw = pi->hw;
+
+	root = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*root), GFP_KERNEL);
+	if (!root)
+		return -ENOMEM;
+
+	/* coverity[suspicious_sizeof] */
+	root->children = devm_kcalloc(ice_hw_to_dev(hw), hw->max_children[0],
+				      sizeof(*root), GFP_KERNEL);
+	if (!root->children) {
+		devm_kfree(ice_hw_to_dev(hw), root);
+		return -ENOMEM;
+	}
+
+	memcpy(&root->info, info, sizeof(*info));
+	pi->root = root;
+	return 0;
+}
+
+/**
+ * ice_sched_find_node_by_teid - Find the Tx scheduler node in SW DB
+ * @start_node: pointer to the starting ice_sched_node struct in a sub-tree
+ * @teid: node TEID to search
+ *
+ * This function searches for a node matching the TEID in the scheduling tree
+ * from the SW DB. The search is recursive and is restricted by the number of
+ * layers it has searched through; stopping at the max supported layer.
+ *
+ * This function needs to be called when holding the port_info->sched_lock
+ */
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid)
+{
+	u16 i;
+
+	/* The TEID is same as that of the start_node */
+	if (ICE_TXSCHED_GET_NODE_TEID(start_node) == teid)
+		return start_node;
+
+	/* The node has no children or is at the max layer */
+	if (!start_node->num_children ||
+	    start_node->tx_sched_layer >= ICE_AQC_TOPO_MAX_LEVEL_NUM ||
+	    start_node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF)
+		return NULL;
+
+	/* Check if TEID matches to any of the children nodes */
+	for (i = 0; i < start_node->num_children; i++)
+		if (ICE_TXSCHED_GET_NODE_TEID(start_node->children[i]) == teid)
+			return start_node->children[i];
+
+	/* Search within each child's sub-tree */
+	for (i = 0; i < start_node->num_children; i++) {
+		struct ice_sched_node *tmp;
+
+		tmp = ice_sched_find_node_by_teid(start_node->children[i],
+						  teid);
+		if (tmp)
+			return tmp;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_aqc_send_sched_elem_cmd - send scheduling elements cmd
+ * @hw: pointer to the HW struct
+ * @cmd_opc: cmd opcode
+ * @elems_req: number of elements to request
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_resp: returns total number of elements response
+ * @cd: pointer to command details structure or NULL
+ *
+ * This function sends a scheduling elements cmd (cmd_opc)
+ */
+static int
+ice_aqc_send_sched_elem_cmd(struct ice_hw *hw, enum ice_adminq_opc cmd_opc,
+			    u16 elems_req, void *buf, u16 buf_size,
+			    u16 *elems_resp, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_sched_elem_cmd *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.sched_elem_cmd;
+	ice_fill_dflt_direct_cmd_desc(&desc, cmd_opc);
+	cmd->num_elem_req = cpu_to_le16(elems_req);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && elems_resp)
+		*elems_resp = le16_to_cpu(cmd->num_elem_resp);
+
+	return status;
+}
+
+/**
+ * ice_aq_query_sched_elems - query scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to query
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements returned
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduling elements (0x0404)
+ */
+int
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_get_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
+}
+
+/**
+ * ice_sched_add_node - Insert the Tx scheduler node in SW DB
+ * @pi: port information structure
+ * @layer: Scheduler layer of the node
+ * @info: Scheduler element information from firmware
+ * @prealloc_node: preallocated ice_sched_node struct for SW DB
+ *
+ * This function inserts a scheduler node to the SW DB.
+ */
+int
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info,
+		   struct ice_sched_node *prealloc_node)
+{
+	struct ice_aqc_txsched_elem_data elem;
+	struct ice_sched_node *parent;
+	struct ice_sched_node *node;
+	struct ice_hw *hw;
+	int status;
+
+	if (!pi)
+		return -EINVAL;
+
+	hw = pi->hw;
+
+	/* A valid parent node should be there */
+	parent = ice_sched_find_node_by_teid(pi->root,
+					     le32_to_cpu(info->parent_teid));
+	if (!parent) {
+		ice_debug(hw, ICE_DBG_SCHED, "Parent Node not found for parent_teid=0x%x\n",
+			  le32_to_cpu(info->parent_teid));
+		return -EINVAL;
+	}
+
+	/* query the current node information from FW before adding it
+	 * to the SW DB
+	 */
+	status = ice_sched_query_elem(hw, le32_to_cpu(info->node_teid), &elem);
+	if (status)
+		return status;
+
+	if (prealloc_node)
+		node = prealloc_node;
+	else
+		node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
+	if (hw->max_children[layer]) {
+		/* coverity[suspicious_sizeof] */
+		node->children = devm_kcalloc(ice_hw_to_dev(hw),
+					      hw->max_children[layer],
+					      sizeof(*node), GFP_KERNEL);
+		if (!node->children) {
+			devm_kfree(ice_hw_to_dev(hw), node);
+			return -ENOMEM;
+		}
+	}
+
+	node->in_use = true;
+	node->parent = parent;
+	node->tx_sched_layer = layer;
+	parent->children[parent->num_children++] = node;
+	node->info = elem;
+	return 0;
+}
+
+/**
+ * ice_aq_delete_sched_elems - delete scheduler elements
+ * @hw: pointer to the HW struct
+ * @grps_req: number of groups to delete
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_del: returns total number of elements deleted
+ * @cd: pointer to command details structure or NULL
+ *
+ * Delete scheduling elements (0x040F)
+ */
+static int
+ice_aq_delete_sched_elems(struct ice_hw *hw, u16 grps_req,
+			  struct ice_aqc_delete_elem *buf, u16 buf_size,
+			  u16 *grps_del, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_delete_sched_elems,
+					   grps_req, (void *)buf, buf_size,
+					   grps_del, cd);
+}
+
+/**
+ * ice_sched_remove_elems - remove nodes from HW
+ * @hw: pointer to the HW struct
+ * @parent: pointer to the parent node
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be deleted
+ *
+ * This function remove nodes from HW
+ */
+static int
+ice_sched_remove_elems(struct ice_hw *hw, struct ice_sched_node *parent,
+		       u16 num_nodes, u32 *node_teids)
+{
+	struct ice_aqc_delete_elem *buf;
+	u16 i, num_groups_removed = 0;
+	u16 buf_size;
+	int status;
+
+	buf_size = struct_size(buf, teid, num_nodes);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++)
+		buf->teid[i] = cpu_to_le32(node_teids[i]);
+
+	status = ice_aq_delete_sched_elems(hw, 1, buf, buf_size,
+					   &num_groups_removed, NULL);
+	if (status || num_groups_removed != 1)
+		ice_debug(hw, ICE_DBG_SCHED, "remove node failed FW error %d\n",
+			  hw->adminq.sq_last_status);
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_get_first_node - get the first node of the given layer
+ * @pi: port information structure
+ * @parent: pointer the base node of the subtree
+ * @layer: layer number
+ *
+ * This function retrieves the first node of the given layer from the subtree
+ */
+static struct ice_sched_node *
+ice_sched_get_first_node(struct ice_port_info *pi,
+			 struct ice_sched_node *parent, u8 layer)
+{
+	return pi->sib_head[parent->tc_num][layer];
+}
+
+/**
+ * ice_sched_get_tc_node - get pointer to TC node
+ * @pi: port information structure
+ * @tc: TC number
+ *
+ * This function returns the TC node pointer
+ */
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc)
+{
+	u8 i;
+
+	if (!pi || !pi->root)
+		return NULL;
+	for (i = 0; i < pi->root->num_children; i++)
+		if (pi->root->children[i]->tc_num == tc)
+			return pi->root->children[i];
+	return NULL;
+}
+
+/**
+ * ice_free_sched_node - Free a Tx scheduler node from SW DB
+ * @pi: port information structure
+ * @node: pointer to the ice_sched_node struct
+ *
+ * This function frees up a node from SW DB as well as from HW
+ *
+ * This function needs to be called with the port_info->sched_lock held
+ */
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+	struct ice_sched_node *parent;
+	struct ice_hw *hw = pi->hw;
+	u8 i, j;
+
+	/* Free the children before freeing up the parent node
+	 * The parent array is updated below and that shifts the nodes
+	 * in the array. So always pick the first child if num children > 0
+	 */
+	while (node->num_children)
+		ice_free_sched_node(pi, node->children[0]);
+
+	/* Leaf, TC and root nodes can't be deleted by SW */
+	if (node->tx_sched_layer >= hw->sw_entry_point_layer &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT &&
+	    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+
+		ice_sched_remove_elems(hw, node->parent, 1, &teid);
+	}
+	parent = node->parent;
+	/* root has no parent */
+	if (parent) {
+		struct ice_sched_node *p;
+
+		/* update the parent */
+		for (i = 0; i < parent->num_children; i++)
+			if (parent->children[i] == node) {
+				for (j = i + 1; j < parent->num_children; j++)
+					parent->children[j - 1] =
+						parent->children[j];
+				parent->num_children--;
+				break;
+			}
+
+		p = ice_sched_get_first_node(pi, node, node->tx_sched_layer);
+		while (p) {
+			if (p->sibling == node) {
+				p->sibling = node->sibling;
+				break;
+			}
+			p = p->sibling;
+		}
+
+		/* update the sibling head if head is getting removed */
+		if (pi->sib_head[node->tc_num][node->tx_sched_layer] == node)
+			pi->sib_head[node->tc_num][node->tx_sched_layer] =
+				node->sibling;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), node->children);
+	kfree(node->name);
+	xa_erase(&pi->sched_node_ids, node->id);
+	devm_kfree(ice_hw_to_dev(hw), node);
+}
+
+/**
+ * ice_aq_get_dflt_topo - gets default scheduler topology
+ * @hw: pointer to the HW struct
+ * @lport: logical port number
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_branches: returns total number of queue to port branches
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get default scheduler topology (0x400)
+ */
+static int
+ice_aq_get_dflt_topo(struct ice_hw *hw, u8 lport,
+		     struct ice_aqc_get_topo_elem *buf, u16 buf_size,
+		     u8 *num_branches, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_topo *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.get_topo;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_dflt_topo);
+	cmd->port_num = lport;
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_branches)
+		*num_branches = cmd->num_branches;
+
+	return status;
+}
+
+/**
+ * ice_aq_add_sched_elems - adds scheduling element
+ * @hw: pointer to the HW struct
+ * @grps_req: the number of groups that are requested to be added
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_added: returns total number of groups added
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add scheduling elements (0x0401)
+ */
+static int
+ice_aq_add_sched_elems(struct ice_hw *hw, u16 grps_req,
+		       struct ice_aqc_add_elem *buf, u16 buf_size,
+		       u16 *grps_added, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_add_sched_elems,
+					   grps_req, (void *)buf, buf_size,
+					   grps_added, cd);
+}
+
+/**
+ * ice_aq_cfg_sched_elems - configures scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to configure
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_cfgd: returns total number of elements configured
+ * @cd: pointer to command details structure or NULL
+ *
+ * Configure scheduling elements (0x0403)
+ */
+static int
+ice_aq_cfg_sched_elems(struct ice_hw *hw, u16 elems_req,
+		       struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+		       u16 *elems_cfgd, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_cfg_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_cfgd, cd);
+}
+
+/**
+ * ice_aq_move_sched_elems - move scheduler elements
+ * @hw: pointer to the HW struct
+ * @grps_req: number of groups to move
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @grps_movd: returns total number of groups moved
+ * @cd: pointer to command details structure or NULL
+ *
+ * Move scheduling elements (0x0408)
+ */
+int
+ice_aq_move_sched_elems(struct ice_hw *hw, u16 grps_req,
+			struct ice_aqc_move_elem *buf, u16 buf_size,
+			u16 *grps_movd, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_move_sched_elems,
+					   grps_req, (void *)buf, buf_size,
+					   grps_movd, cd);
+}
+
+/**
+ * ice_aq_suspend_sched_elems - suspend scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to suspend
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements suspended
+ * @cd: pointer to command details structure or NULL
+ *
+ * Suspend scheduling elements (0x0409)
+ */
+static int
+ice_aq_suspend_sched_elems(struct ice_hw *hw, u16 elems_req, __le32 *buf,
+			   u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_suspend_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
+}
+
+/**
+ * ice_aq_resume_sched_elems - resume scheduler elements
+ * @hw: pointer to the HW struct
+ * @elems_req: number of elements to resume
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @elems_ret: returns total number of elements resumed
+ * @cd: pointer to command details structure or NULL
+ *
+ * resume scheduling elements (0x040A)
+ */
+static int
+ice_aq_resume_sched_elems(struct ice_hw *hw, u16 elems_req, __le32 *buf,
+			  u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd)
+{
+	return ice_aqc_send_sched_elem_cmd(hw, ice_aqc_opc_resume_sched_elems,
+					   elems_req, (void *)buf, buf_size,
+					   elems_ret, cd);
+}
+
+/**
+ * ice_aq_query_sched_res - query scheduler resource
+ * @hw: pointer to the HW struct
+ * @buf_size: buffer size in bytes
+ * @buf: pointer to buffer
+ * @cd: pointer to command details structure or NULL
+ *
+ * Query scheduler resource allocation (0x0412)
+ */
+static int
+ice_aq_query_sched_res(struct ice_hw *hw, u16 buf_size,
+		       struct ice_aqc_query_txsched_res_resp *buf,
+		       struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_query_sched_res);
+	return ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+}
+
+/**
+ * ice_sched_suspend_resume_elems - suspend or resume HW nodes
+ * @hw: pointer to the HW struct
+ * @num_nodes: number of nodes
+ * @node_teids: array of node teids to be suspended or resumed
+ * @suspend: true means suspend / false means resume
+ *
+ * This function suspends or resumes HW nodes
+ */
+int
+ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
+			       bool suspend)
+{
+	u16 i, buf_size, num_elem_ret = 0;
+	__le32 *buf;
+	int status;
+
+	buf_size = sizeof(*buf) * num_nodes;
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	for (i = 0; i < num_nodes; i++)
+		buf[i] = cpu_to_le32(node_teids[i]);
+
+	if (suspend)
+		status = ice_aq_suspend_sched_elems(hw, num_nodes, buf,
+						    buf_size, &num_elem_ret,
+						    NULL);
+	else
+		status = ice_aq_resume_sched_elems(hw, num_nodes, buf,
+						   buf_size, &num_elem_ret,
+						   NULL);
+	if (status || num_elem_ret != num_nodes)
+		ice_debug(hw, ICE_DBG_SCHED, "suspend/resume failed\n");
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_alloc_lan_q_ctx - allocate LAN queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
+ */
+static int
+ice_alloc_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+	struct ice_vsi_ctx *vsi_ctx;
+	struct ice_q_ctx *q_ctx;
+	u16 idx;
+
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return -EINVAL;
+	/* allocate LAN queue contexts */
+	if (!vsi_ctx->lan_q_ctx[tc]) {
+		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+				     sizeof(*q_ctx), GFP_KERNEL);
+		if (!q_ctx)
+			return -ENOMEM;
+
+		for (idx = 0; idx < new_numqs; idx++) {
+			q_ctx[idx].q_handle = ICE_INVAL_Q_HANDLE;
+			q_ctx[idx].q_teid = ICE_INVAL_TEID;
+		}
+
+		vsi_ctx->lan_q_ctx[tc] = q_ctx;
+		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+		return 0;
+	}
+	/* num queues are increased, update the queue contexts */
+	if (new_numqs > vsi_ctx->num_lan_q_entries[tc]) {
+		u16 prev_num = vsi_ctx->num_lan_q_entries[tc];
+
+		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+				     sizeof(*q_ctx), GFP_KERNEL);
+		if (!q_ctx)
+			return -ENOMEM;
+
+		memcpy(q_ctx, vsi_ctx->lan_q_ctx[tc],
+		       prev_num * sizeof(*q_ctx));
+		devm_kfree(ice_hw_to_dev(hw), vsi_ctx->lan_q_ctx[tc]);
+
+		for (idx = prev_num; idx < new_numqs; idx++) {
+			q_ctx[idx].q_handle = ICE_INVAL_Q_HANDLE;
+			q_ctx[idx].q_teid = ICE_INVAL_TEID;
+		}
+
+		vsi_ctx->lan_q_ctx[tc] = q_ctx;
+		vsi_ctx->num_lan_q_entries[tc] = new_numqs;
+	}
+	return 0;
+}
+
+/**
+ * ice_alloc_rdma_q_ctx - allocate RDMA queue contexts for the given VSI and TC
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @tc: TC number
+ * @new_numqs: number of queues
+ */
+static int
+ice_alloc_rdma_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 new_numqs)
+{
+	struct ice_vsi_ctx *vsi_ctx;
+	struct ice_q_ctx *q_ctx;
+
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return -EINVAL;
+	/* allocate RDMA queue contexts */
+	if (!vsi_ctx->rdma_q_ctx[tc]) {
+		vsi_ctx->rdma_q_ctx[tc] = devm_kcalloc(ice_hw_to_dev(hw),
+						       new_numqs,
+						       sizeof(*q_ctx),
+						       GFP_KERNEL);
+		if (!vsi_ctx->rdma_q_ctx[tc])
+			return -ENOMEM;
+		vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+		return 0;
+	}
+	/* num queues are increased, update the queue contexts */
+	if (new_numqs > vsi_ctx->num_rdma_q_entries[tc]) {
+		u16 prev_num = vsi_ctx->num_rdma_q_entries[tc];
+
+		q_ctx = devm_kcalloc(ice_hw_to_dev(hw), new_numqs,
+				     sizeof(*q_ctx), GFP_KERNEL);
+		if (!q_ctx)
+			return -ENOMEM;
+		memcpy(q_ctx, vsi_ctx->rdma_q_ctx[tc],
+		       prev_num * sizeof(*q_ctx));
+		devm_kfree(ice_hw_to_dev(hw), vsi_ctx->rdma_q_ctx[tc]);
+		vsi_ctx->rdma_q_ctx[tc] = q_ctx;
+		vsi_ctx->num_rdma_q_entries[tc] = new_numqs;
+	}
+	return 0;
+}
+
+/**
+ * ice_aq_rl_profile - performs a rate limiting task
+ * @hw: pointer to the HW struct
+ * @opcode: opcode for add, query, or remove profile(s)
+ * @num_profiles: the number of profiles
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_processed: number of processed add or remove profile(s) to return
+ * @cd: pointer to command details structure
+ *
+ * RL profile function to add, query, or remove profile(s)
+ */
+static int
+ice_aq_rl_profile(struct ice_hw *hw, enum ice_adminq_opc opcode,
+		  u16 num_profiles, struct ice_aqc_rl_profile_elem *buf,
+		  u16 buf_size, u16 *num_processed, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_rl_profile *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.rl_profile;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opcode);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	cmd->num_profiles = cpu_to_le16(num_profiles);
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status && num_processed)
+		*num_processed = le16_to_cpu(cmd->num_processed);
+	return status;
+}
+
+/**
+ * ice_aq_add_rl_profile - adds rate limiting profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to be add
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_added: total number of profiles added to return
+ * @cd: pointer to command details structure
+ *
+ * Add RL profile (0x0410)
+ */
+static int
+ice_aq_add_rl_profile(struct ice_hw *hw, u16 num_profiles,
+		      struct ice_aqc_rl_profile_elem *buf, u16 buf_size,
+		      u16 *num_profiles_added, struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_add_rl_profiles, num_profiles,
+				 buf, buf_size, num_profiles_added, cd);
+}
+
+/**
+ * ice_aq_remove_rl_profile - removes RL profile(s)
+ * @hw: pointer to the HW struct
+ * @num_profiles: the number of profile(s) to remove
+ * @buf: pointer to buffer
+ * @buf_size: buffer size in bytes
+ * @num_profiles_removed: total number of profiles removed to return
+ * @cd: pointer to command details structure or NULL
+ *
+ * Remove RL profile (0x0415)
+ */
+static int
+ice_aq_remove_rl_profile(struct ice_hw *hw, u16 num_profiles,
+			 struct ice_aqc_rl_profile_elem *buf, u16 buf_size,
+			 u16 *num_profiles_removed, struct ice_sq_cd *cd)
+{
+	return ice_aq_rl_profile(hw, ice_aqc_opc_remove_rl_profiles,
+				 num_profiles, buf, buf_size,
+				 num_profiles_removed, cd);
+}
+
+/**
+ * ice_sched_del_rl_profile - remove RL profile
+ * @hw: pointer to the HW struct
+ * @rl_info: rate limit profile information
+ *
+ * If the profile ID is not referenced anymore, it removes profile ID with
+ * its associated parameters from HW DB,and locally. The caller needs to
+ * hold scheduler lock.
+ */
+static int
+ice_sched_del_rl_profile(struct ice_hw *hw,
+			 struct ice_aqc_rl_profile_info *rl_info)
+{
+	struct ice_aqc_rl_profile_elem *buf;
+	u16 num_profiles_removed;
+	u16 num_profiles = 1;
+	int status;
+
+	if (rl_info->prof_id_ref != 0)
+		return -EBUSY;
+
+	/* Safe to remove profile ID */
+	buf = &rl_info->profile;
+	status = ice_aq_remove_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+					  &num_profiles_removed, NULL);
+	if (status || num_profiles_removed != num_profiles)
+		return -EIO;
+
+	/* Delete stale entry now */
+	list_del(&rl_info->list_entry);
+	devm_kfree(ice_hw_to_dev(hw), rl_info);
+	return status;
+}
+
+/**
+ * ice_sched_clear_rl_prof - clears RL prof entries
+ * @pi: port information structure
+ *
+ * This function removes all RL profile from HW as well as from SW DB.
+ */
+static void ice_sched_clear_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			struct ice_hw *hw = pi->hw;
+			int status;
+
+			rl_prof_elem->prof_id_ref = 0;
+			status = ice_sched_del_rl_profile(hw, rl_prof_elem);
+			if (status) {
+				ice_debug(hw, ICE_DBG_SCHED, "Remove rl profile failed\n");
+				/* On error, free mem required */
+				list_del(&rl_prof_elem->list_entry);
+				devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+			}
+		}
+	}
+}
+
+/**
+ * ice_sched_clear_agg - clears the aggregator related information
+ * @hw: pointer to the hardware structure
+ *
+ * This function removes aggregator list and free up aggregator related memory
+ * previously allocated.
+ */
+void ice_sched_clear_agg(struct ice_hw *hw)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_agg_info *atmp;
+
+	list_for_each_entry_safe(agg_info, atmp, &hw->agg_list, list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(agg_vsi_info, vtmp,
+					 &agg_info->agg_vsi_list, list_entry) {
+			list_del(&agg_vsi_info->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), agg_vsi_info);
+		}
+		list_del(&agg_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), agg_info);
+	}
+}
+
+/**
+ * ice_sched_clear_tx_topo - clears the scheduler tree nodes
+ * @pi: port information structure
+ *
+ * This function removes all the nodes from HW as well as from SW DB.
+ */
+static void ice_sched_clear_tx_topo(struct ice_port_info *pi)
+{
+	if (!pi)
+		return;
+	/* remove RL profiles related lists */
+	ice_sched_clear_rl_prof(pi);
+	if (pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+}
+
+/**
+ * ice_sched_clear_port - clear the scheduler elements from SW DB for a port
+ * @pi: port information structure
+ *
+ * Cleanup scheduling elements from SW DB
+ */
+void ice_sched_clear_port(struct ice_port_info *pi)
+{
+	if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
+		return;
+
+	pi->port_state = ICE_SCHED_PORT_STATE_INIT;
+	mutex_lock(&pi->sched_lock);
+	ice_sched_clear_tx_topo(pi);
+	mutex_unlock(&pi->sched_lock);
+	mutex_destroy(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_cleanup_all - cleanup scheduler elements from SW DB for all ports
+ * @hw: pointer to the HW struct
+ *
+ * Cleanup scheduling elements from SW DB for all the ports
+ */
+void ice_sched_cleanup_all(struct ice_hw *hw)
+{
+	if (!hw)
+		return;
+
+	devm_kfree(ice_hw_to_dev(hw), hw->layer_info);
+	hw->layer_info = NULL;
+
+	ice_sched_clear_port(hw->port_info);
+
+	hw->num_tx_sched_layers = 0;
+	hw->num_tx_sched_phys_layers = 0;
+	hw->flattened_layers = 0;
+	hw->max_cgds = 0;
+}
+
+/**
+ * ice_sched_add_elems - add nodes to HW and SW DB
+ * @pi: port information structure
+ * @tc_node: pointer to the branch node
+ * @parent: pointer to the parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes
+ * @num_nodes_added: pointer to num nodes added
+ * @first_node_teid: if new nodes are added then return the TEID of first node
+ * @prealloc_nodes: preallocated nodes struct for software DB
+ *
+ * This function add nodes to HW as well as to SW DB for a given layer
+ */
+int
+ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
+		    u16 *num_nodes_added, u32 *first_node_teid,
+		    struct ice_sched_node **prealloc_nodes)
+{
+	struct ice_sched_node *prev, *new_node;
+	struct ice_aqc_add_elem *buf;
+	u16 i, num_groups_added = 0;
+	struct ice_hw *hw = pi->hw;
+	size_t buf_size;
+	int status = 0;
+	u32 teid;
+
+	buf_size = struct_size(buf, generic, num_nodes);
+	buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->hdr.parent_teid = parent->info.node_teid;
+	buf->hdr.num_elems = cpu_to_le16(num_nodes);
+	for (i = 0; i < num_nodes; i++) {
+		buf->generic[i].parent_teid = parent->info.node_teid;
+		buf->generic[i].data.elem_type = ICE_AQC_ELEM_TYPE_SE_GENERIC;
+		buf->generic[i].data.valid_sections =
+			ICE_AQC_ELEM_VALID_GENERIC | ICE_AQC_ELEM_VALID_CIR |
+			ICE_AQC_ELEM_VALID_EIR;
+		buf->generic[i].data.generic = 0;
+		buf->generic[i].data.cir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->generic[i].data.cir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+		buf->generic[i].data.eir_bw.bw_profile_idx =
+			cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+		buf->generic[i].data.eir_bw.bw_alloc =
+			cpu_to_le16(ICE_SCHED_DFLT_BW_WT);
+	}
+
+	status = ice_aq_add_sched_elems(hw, 1, buf, buf_size,
+					&num_groups_added, NULL);
+	if (status || num_groups_added != 1) {
+		ice_debug(hw, ICE_DBG_SCHED, "add node failed FW Error %d\n",
+			  hw->adminq.sq_last_status);
+		devm_kfree(ice_hw_to_dev(hw), buf);
+		return -EIO;
+	}
+
+	*num_nodes_added = num_nodes;
+	/* add nodes to the SW DB */
+	for (i = 0; i < num_nodes; i++) {
+		if (prealloc_nodes)
+			status = ice_sched_add_node(pi, layer, &buf->generic[i], prealloc_nodes[i]);
+		else
+			status = ice_sched_add_node(pi, layer, &buf->generic[i], NULL);
+
+		if (status) {
+			ice_debug(hw, ICE_DBG_SCHED, "add nodes in SW DB failed status =%d\n",
+				  status);
+			break;
+		}
+
+		teid = le32_to_cpu(buf->generic[i].node_teid);
+		new_node = ice_sched_find_node_by_teid(parent, teid);
+		if (!new_node) {
+			ice_debug(hw, ICE_DBG_SCHED, "Node is missing for teid =%d\n", teid);
+			break;
+		}
+
+		new_node->sibling = NULL;
+		new_node->tc_num = tc_node->tc_num;
+		new_node->tx_weight = ICE_SCHED_DFLT_BW_WT;
+		new_node->tx_share = ICE_SCHED_DFLT_BW;
+		new_node->tx_max = ICE_SCHED_DFLT_BW;
+		new_node->name = kzalloc(SCHED_NODE_NAME_MAX_LEN, GFP_KERNEL);
+		if (!new_node->name)
+			return -ENOMEM;
+
+		status = xa_alloc(&pi->sched_node_ids, &new_node->id, NULL, XA_LIMIT(0, UINT_MAX),
+				  GFP_KERNEL);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SCHED, "xa_alloc failed for sched node status =%d\n",
+				  status);
+			break;
+		}
+
+		snprintf(new_node->name, SCHED_NODE_NAME_MAX_LEN, "node_%u", new_node->id);
+
+		/* add it to previous node sibling pointer */
+		/* Note: siblings are not linked across branches */
+		prev = ice_sched_get_first_node(pi, tc_node, layer);
+		if (prev && prev != new_node) {
+			while (prev->sibling)
+				prev = prev->sibling;
+			prev->sibling = new_node;
+		}
+
+		/* initialize the sibling head */
+		if (!pi->sib_head[tc_node->tc_num][layer])
+			pi->sib_head[tc_node->tc_num][layer] = new_node;
+
+		if (i == 0)
+			*first_node_teid = teid;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_add_nodes_to_hw_layer - Add nodes to HW layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node TEID
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * Add nodes into specific HW layer.
+ */
+static int
+ice_sched_add_nodes_to_hw_layer(struct ice_port_info *pi,
+				struct ice_sched_node *tc_node,
+				struct ice_sched_node *parent, u8 layer,
+				u16 num_nodes, u32 *first_node_teid,
+				u16 *num_nodes_added)
+{
+	u16 max_child_nodes;
+
+	*num_nodes_added = 0;
+
+	if (!num_nodes)
+		return 0;
+
+	if (!parent || layer < pi->hw->sw_entry_point_layer)
+		return -EINVAL;
+
+	/* max children per node per layer */
+	max_child_nodes = pi->hw->max_children[parent->tx_sched_layer];
+
+	/* current number of children + required nodes exceed max children */
+	if ((parent->num_children + num_nodes) > max_child_nodes) {
+		/* Fail if the parent is a TC node */
+		if (parent == tc_node)
+			return -EIO;
+		return -ENOSPC;
+	}
+
+	return ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes,
+				   num_nodes_added, first_node_teid, NULL);
+}
+
+/**
+ * ice_sched_add_nodes_to_layer - Add nodes to a given layer
+ * @pi: port information structure
+ * @tc_node: pointer to TC node
+ * @parent: pointer to parent node
+ * @layer: layer number to add nodes
+ * @num_nodes: number of nodes to be added
+ * @first_node_teid: pointer to the first node TEID
+ * @num_nodes_added: pointer to number of nodes added
+ *
+ * This function add nodes to a given layer.
+ */
+int
+ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
+			     struct ice_sched_node *tc_node,
+			     struct ice_sched_node *parent, u8 layer,
+			     u16 num_nodes, u32 *first_node_teid,
+			     u16 *num_nodes_added)
+{
+	u32 *first_teid_ptr = first_node_teid;
+	u16 new_num_nodes = num_nodes;
+	int status = 0;
+
+	*num_nodes_added = 0;
+	while (*num_nodes_added < num_nodes) {
+		u16 max_child_nodes, num_added = 0;
+		u32 temp;
+
+		status = ice_sched_add_nodes_to_hw_layer(pi, tc_node, parent,
+							 layer,	new_num_nodes,
+							 first_teid_ptr,
+							 &num_added);
+		if (!status)
+			*num_nodes_added += num_added;
+		/* added more nodes than requested ? */
+		if (*num_nodes_added > num_nodes) {
+			ice_debug(pi->hw, ICE_DBG_SCHED, "added extra nodes %d %d\n", num_nodes,
+				  *num_nodes_added);
+			status = -EIO;
+			break;
+		}
+		/* break if all the nodes are added successfully */
+		if (!status && (*num_nodes_added == num_nodes))
+			break;
+		/* break if the error is not max limit */
+		if (status && status != -ENOSPC)
+			break;
+		/* Exceeded the max children */
+		max_child_nodes = pi->hw->max_children[parent->tx_sched_layer];
+		/* utilize all the spaces if the parent is not full */
+		if (parent->num_children < max_child_nodes) {
+			new_num_nodes = max_child_nodes - parent->num_children;
+		} else {
+			/* This parent is full, try the next sibling */
+			parent = parent->sibling;
+			/* Don't modify the first node TEID memory if the
+			 * first node was added already in the above call.
+			 * Instead send some temp memory for all other
+			 * recursive calls.
+			 */
+			if (num_added)
+				first_teid_ptr = &temp;
+
+			new_num_nodes = num_nodes - *num_nodes_added;
+		}
+	}
+	return status;
+}
+
+/**
+ * ice_sched_get_qgrp_layer - get the current queue group layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current queue group layer number
+ */
+static u8 ice_sched_get_qgrp_layer(struct ice_hw *hw)
+{
+	/* It's always total layers - 1, the array is 0 relative so -2 */
+	return hw->num_tx_sched_layers - ICE_QGRP_LAYER_OFFSET;
+}
+
+/**
+ * ice_sched_get_vsi_layer - get the current VSI layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current VSI layer number
+ */
+u8 ice_sched_get_vsi_layer(struct ice_hw *hw)
+{
+	/* Num Layers       VSI layer
+	 *     9               6
+	 *     7               4
+	 *     5 or less       sw_entry_point_layer
+	 */
+	/* calculate the VSI layer based on number of layers. */
+	if (hw->num_tx_sched_layers > ICE_VSI_LAYER_OFFSET + 1) {
+		u8 layer = hw->num_tx_sched_layers - ICE_VSI_LAYER_OFFSET;
+
+		if (layer > hw->sw_entry_point_layer)
+			return layer;
+	}
+	return hw->sw_entry_point_layer;
+}
+
+/**
+ * ice_sched_get_agg_layer - get the current aggregator layer number
+ * @hw: pointer to the HW struct
+ *
+ * This function returns the current aggregator layer number
+ */
+u8 ice_sched_get_agg_layer(struct ice_hw *hw)
+{
+	/* Num Layers       aggregator layer
+	 *     9               4
+	 *     7 or less       sw_entry_point_layer
+	 */
+	/* calculate the aggregator layer based on number of layers. */
+	if (hw->num_tx_sched_layers > ICE_AGG_LAYER_OFFSET + 1) {
+		u8 layer = hw->num_tx_sched_layers - ICE_AGG_LAYER_OFFSET;
+
+		if (layer > hw->sw_entry_point_layer)
+			return layer;
+	}
+	return hw->sw_entry_point_layer;
+}
+
+/**
+ * ice_rm_dflt_leaf_node - remove the default leaf node in the tree
+ * @pi: port information structure
+ *
+ * This function removes the leaf node that was created by the FW
+ * during initialization
+ */
+static void ice_rm_dflt_leaf_node(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	node = pi->root;
+	while (node) {
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+	if (node && node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF) {
+		u32 teid = le32_to_cpu(node->info.node_teid);
+		int status;
+
+		/* remove the default leaf node */
+		status = ice_sched_remove_elems(pi->hw, node->parent, 1, &teid);
+		if (!status)
+			ice_free_sched_node(pi, node);
+	}
+}
+
+/**
+ * ice_sched_rm_dflt_nodes - free the default nodes in the tree
+ * @pi: port information structure
+ *
+ * This function frees all the nodes except root and TC that were created by
+ * the FW during initialization
+ */
+static void ice_sched_rm_dflt_nodes(struct ice_port_info *pi)
+{
+	struct ice_sched_node *node;
+
+	ice_rm_dflt_leaf_node(pi);
+
+	/* remove the default nodes except TC and root nodes */
+	node = pi->root;
+	while (node) {
+		if (node->tx_sched_layer >= pi->hw->sw_entry_point_layer &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_TC &&
+		    node->info.data.elem_type != ICE_AQC_ELEM_TYPE_ROOT_PORT) {
+			ice_free_sched_node(pi, node);
+			break;
+		}
+
+		if (!node->num_children)
+			break;
+		node = node->children[0];
+	}
+}
+
+/**
+ * ice_sched_init_port - Initialize scheduler by querying information from FW
+ * @pi: port info structure for the tree to cleanup
+ *
+ * This function is the initial call to find the total number of Tx scheduler
+ * resources, default topology created by firmware and storing the information
+ * in SW DB.
+ */
+int ice_sched_init_port(struct ice_port_info *pi)
+{
+	struct ice_aqc_get_topo_elem *buf;
+	struct ice_hw *hw;
+	u8 num_branches;
+	u16 num_elems;
+	int status;
+	u8 i, j;
+
+	if (!pi)
+		return -EINVAL;
+	hw = pi->hw;
+
+	/* Query the Default Topology from FW */
+	buf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	/* Query default scheduling tree topology */
+	status = ice_aq_get_dflt_topo(hw, pi->lport, buf, ICE_AQ_MAX_BUF_LEN,
+				      &num_branches, NULL);
+	if (status)
+		goto err_init_port;
+
+	/* num_branches should be between 1-8 */
+	if (num_branches < 1 || num_branches > ICE_TXSCHED_MAX_BRANCHES) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_branches unexpected %d\n",
+			  num_branches);
+		status = -EINVAL;
+		goto err_init_port;
+	}
+
+	/* get the number of elements on the default/first branch */
+	num_elems = le16_to_cpu(buf[0].hdr.num_elems);
+
+	/* num_elems should always be between 1-9 */
+	if (num_elems < 1 || num_elems > ICE_AQC_TOPO_MAX_LEVEL_NUM) {
+		ice_debug(hw, ICE_DBG_SCHED, "num_elems unexpected %d\n",
+			  num_elems);
+		status = -EINVAL;
+		goto err_init_port;
+	}
+
+	/* If the last node is a leaf node then the index of the queue group
+	 * layer is two less than the number of elements.
+	 */
+	if (num_elems > 2 && buf[0].generic[num_elems - 1].data.elem_type ==
+	    ICE_AQC_ELEM_TYPE_LEAF)
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 2].node_teid);
+	else
+		pi->last_node_teid =
+			le32_to_cpu(buf[0].generic[num_elems - 1].node_teid);
+
+	/* Insert the Tx Sched root node */
+	status = ice_sched_add_root_node(pi, &buf[0].generic[0]);
+	if (status)
+		goto err_init_port;
+
+	/* Parse the default tree and cache the information */
+	for (i = 0; i < num_branches; i++) {
+		num_elems = le16_to_cpu(buf[i].hdr.num_elems);
+
+		/* Skip root element as already inserted */
+		for (j = 1; j < num_elems; j++) {
+			/* update the sw entry point */
+			if (buf[0].generic[j].data.elem_type ==
+			    ICE_AQC_ELEM_TYPE_ENTRY_POINT)
+				hw->sw_entry_point_layer = j;
+
+			status = ice_sched_add_node(pi, j, &buf[i].generic[j], NULL);
+			if (status)
+				goto err_init_port;
+		}
+	}
+
+	/* Remove the default nodes. */
+	if (pi->root)
+		ice_sched_rm_dflt_nodes(pi);
+
+	/* initialize the port for handling the scheduler tree */
+	pi->port_state = ICE_SCHED_PORT_STATE_READY;
+	mutex_init(&pi->sched_lock);
+	for (i = 0; i < ICE_AQC_TOPO_MAX_LEVEL_NUM; i++)
+		INIT_LIST_HEAD(&pi->rl_prof_list[i]);
+
+err_init_port:
+	if (status && pi->root) {
+		ice_free_sched_node(pi, pi->root);
+		pi->root = NULL;
+	}
+
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_sched_query_res_alloc - query the FW for num of logical sched layers
+ * @hw: pointer to the HW struct
+ *
+ * query FW for allocated scheduler resources and store in HW struct
+ */
+int ice_sched_query_res_alloc(struct ice_hw *hw)
+{
+	struct ice_aqc_query_txsched_res_resp *buf;
+	__le16 max_sibl;
+	int status = 0;
+	u16 i;
+
+	if (hw->layer_info)
+		return status;
+
+	buf = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	status = ice_aq_query_sched_res(hw, sizeof(*buf), buf, NULL);
+	if (status)
+		goto sched_query_out;
+
+	hw->num_tx_sched_layers = le16_to_cpu(buf->sched_props.logical_levels);
+	hw->num_tx_sched_phys_layers =
+		le16_to_cpu(buf->sched_props.phys_levels);
+	hw->flattened_layers = buf->sched_props.flattening_bitmap;
+	hw->max_cgds = buf->sched_props.max_pf_cgds;
+
+	/* max sibling group size of current layer refers to the max children
+	 * of the below layer node.
+	 * layer 1 node max children will be layer 2 max sibling group size
+	 * layer 2 node max children will be layer 3 max sibling group size
+	 * and so on. This array will be populated from root (index 0) to
+	 * qgroup layer 7. Leaf node has no children.
+	 */
+	for (i = 0; i < hw->num_tx_sched_layers - 1; i++) {
+		max_sibl = buf->layer_props[i + 1].max_sibl_grp_sz;
+		hw->max_children[i] = le16_to_cpu(max_sibl);
+	}
+
+	hw->layer_info = devm_kmemdup(ice_hw_to_dev(hw), buf->layer_props,
+				      (hw->num_tx_sched_layers *
+				       sizeof(*hw->layer_info)),
+				      GFP_KERNEL);
+	if (!hw->layer_info) {
+		status = -ENOMEM;
+		goto sched_query_out;
+	}
+
+sched_query_out:
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_sched_get_psm_clk_freq - determine the PSM clock frequency
+ * @hw: pointer to the HW struct
+ *
+ * Determine the PSM clock frequency and store in HW struct
+ */
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw)
+{
+	u32 val, clk_src;
+
+	val = rd32(hw, GLGEN_CLKSTAT_SRC);
+	clk_src = (val & GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_M) >>
+		GLGEN_CLKSTAT_SRC_PSM_CLK_SRC_S;
+
+#define PSM_CLK_SRC_367_MHZ 0x0
+#define PSM_CLK_SRC_416_MHZ 0x1
+#define PSM_CLK_SRC_446_MHZ 0x2
+#define PSM_CLK_SRC_390_MHZ 0x3
+
+	switch (clk_src) {
+	case PSM_CLK_SRC_367_MHZ:
+		hw->psm_clk_freq = ICE_PSM_CLK_367MHZ_IN_HZ;
+		break;
+	case PSM_CLK_SRC_416_MHZ:
+		hw->psm_clk_freq = ICE_PSM_CLK_416MHZ_IN_HZ;
+		break;
+	case PSM_CLK_SRC_446_MHZ:
+		hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+		break;
+	case PSM_CLK_SRC_390_MHZ:
+		hw->psm_clk_freq = ICE_PSM_CLK_390MHZ_IN_HZ;
+		break;
+	default:
+		ice_debug(hw, ICE_DBG_SCHED, "PSM clk_src unexpected %u\n",
+			  clk_src);
+		/* fall back to a safe default */
+		hw->psm_clk_freq = ICE_PSM_CLK_446MHZ_IN_HZ;
+	}
+}
+
+/**
+ * ice_sched_find_node_in_subtree - Find node in part of base node subtree
+ * @hw: pointer to the HW struct
+ * @base: pointer to the base node
+ * @node: pointer to the node to search
+ *
+ * This function checks whether a given node is part of the base node
+ * subtree or not
+ */
+static bool
+ice_sched_find_node_in_subtree(struct ice_hw *hw, struct ice_sched_node *base,
+			       struct ice_sched_node *node)
+{
+	u8 i;
+
+	for (i = 0; i < base->num_children; i++) {
+		struct ice_sched_node *child = base->children[i];
+
+		if (node == child)
+			return true;
+
+		if (child->tx_sched_layer > node->tx_sched_layer)
+			return false;
+
+		/* this recursion is intentional, and wouldn't
+		 * go more than 8 calls
+		 */
+		if (ice_sched_find_node_in_subtree(hw, child, node))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * ice_sched_get_free_qgrp - Scan all queue group siblings and find a free node
+ * @pi: port information structure
+ * @vsi_node: software VSI handle
+ * @qgrp_node: first queue group node identified for scanning
+ * @owner: LAN or RDMA
+ *
+ * This function retrieves a free LAN or RDMA queue group node by scanning
+ * qgrp_node and its siblings for the queue group with the fewest number
+ * of queues currently assigned.
+ */
+static struct ice_sched_node *
+ice_sched_get_free_qgrp(struct ice_port_info *pi,
+			struct ice_sched_node *vsi_node,
+			struct ice_sched_node *qgrp_node, u8 owner)
+{
+	struct ice_sched_node *min_qgrp;
+	u8 min_children;
+
+	if (!qgrp_node)
+		return qgrp_node;
+	min_children = qgrp_node->num_children;
+	if (!min_children)
+		return qgrp_node;
+	min_qgrp = qgrp_node;
+	/* scan all queue groups until find a node which has less than the
+	 * minimum number of children. This way all queue group nodes get
+	 * equal number of shares and active. The bandwidth will be equally
+	 * distributed across all queues.
+	 */
+	while (qgrp_node) {
+		/* make sure the qgroup node is part of the VSI subtree */
+		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+			if (qgrp_node->num_children < min_children &&
+			    qgrp_node->owner == owner) {
+				/* replace the new min queue group node */
+				min_qgrp = qgrp_node;
+				min_children = min_qgrp->num_children;
+				/* break if it has no children, */
+				if (!min_children)
+					break;
+			}
+		qgrp_node = qgrp_node->sibling;
+	}
+	return min_qgrp;
+}
+
+/**
+ * ice_sched_get_free_qparent - Get a free LAN or RDMA queue group node
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: branch number
+ * @owner: LAN or RDMA
+ *
+ * This function retrieves a free LAN or RDMA queue group node
+ */
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			   u8 owner)
+{
+	struct ice_sched_node *vsi_node, *qgrp_node;
+	struct ice_vsi_ctx *vsi_ctx;
+	u16 max_children;
+	u8 qgrp_layer;
+
+	qgrp_layer = ice_sched_get_qgrp_layer(pi->hw);
+	max_children = pi->hw->max_children[qgrp_layer];
+
+	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+	if (!vsi_ctx)
+		return NULL;
+	vsi_node = vsi_ctx->sched.vsi_node[tc];
+	/* validate invalid VSI ID */
+	if (!vsi_node)
+		return NULL;
+
+	/* get the first queue group node from VSI sub-tree */
+	qgrp_node = ice_sched_get_first_node(pi, vsi_node, qgrp_layer);
+	while (qgrp_node) {
+		/* make sure the qgroup node is part of the VSI subtree */
+		if (ice_sched_find_node_in_subtree(pi->hw, vsi_node, qgrp_node))
+			if (qgrp_node->num_children < max_children &&
+			    qgrp_node->owner == owner)
+				break;
+		qgrp_node = qgrp_node->sibling;
+	}
+
+	/* Select the best queue group */
+	return ice_sched_get_free_qgrp(pi, vsi_node, qgrp_node, owner);
+}
+
+/**
+ * ice_sched_get_vsi_node - Get a VSI node based on VSI ID
+ * @pi: pointer to the port information structure
+ * @tc_node: pointer to the TC node
+ * @vsi_handle: software VSI handle
+ *
+ * This function retrieves a VSI node for a given VSI ID from a given
+ * TC branch
+ */
+static struct ice_sched_node *
+ice_sched_get_vsi_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		       u16 vsi_handle)
+{
+	struct ice_sched_node *node;
+	u8 vsi_layer;
+
+	vsi_layer = ice_sched_get_vsi_layer(pi->hw);
+	node = ice_sched_get_first_node(pi, tc_node, vsi_layer);
+
+	/* Check whether it already exists */
+	while (node) {
+		if (node->vsi_handle == vsi_handle)
+			return node;
+		node = node->sibling;
+	}
+
+	return node;
+}
+
+/**
+ * ice_sched_get_agg_node - Get an aggregator node based on aggregator ID
+ * @pi: pointer to the port information structure
+ * @tc_node: pointer to the TC node
+ * @agg_id: aggregator ID
+ *
+ * This function retrieves an aggregator node for a given aggregator ID from
+ * a given TC branch
+ */
+struct ice_sched_node *
+ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		       u32 agg_id)
+{
+	struct ice_sched_node *node;
+	struct ice_hw *hw = pi->hw;
+	u8 agg_layer;
+
+	if (!hw)
+		return NULL;
+	agg_layer = ice_sched_get_agg_layer(hw);
+	node = ice_sched_get_first_node(pi, tc_node, agg_layer);
+
+	/* Check whether it already exists */
+	while (node) {
+		if (node->agg_id == agg_id)
+			return node;
+		node = node->sibling;
+	}
+
+	return node;
+}
+
+/**
+ * ice_sched_calc_vsi_child_nodes - calculate number of VSI child nodes
+ * @hw: pointer to the HW struct
+ * @num_qs: number of queues
+ * @num_nodes: num nodes array
+ *
+ * This function calculates the number of VSI child nodes based on the
+ * number of queues.
+ */
+static void
+ice_sched_calc_vsi_child_nodes(struct ice_hw *hw, u16 num_qs, u16 *num_nodes)
+{
+	u16 num = num_qs;
+	u8 i, qgl, vsil;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+
+	/* calculate num nodes from queue group to VSI layer */
+	for (i = qgl; i > vsil; i--) {
+		/* round to the next integer if there is a remainder */
+		num = DIV_ROUND_UP(num, hw->max_children[i]);
+
+		/* need at least one node */
+		num_nodes[i] = num ? num : 1;
+	}
+}
+
+/**
+ * ice_sched_add_vsi_child_nodes - add VSI child nodes to tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_node: pointer to the TC node
+ * @num_nodes: pointer to the num nodes that needs to be added per layer
+ * @owner: node owner (LAN or RDMA)
+ *
+ * This function adds the VSI child nodes to tree. It gets called for
+ * LAN and RDMA separately.
+ */
+static int
+ice_sched_add_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
+			      struct ice_sched_node *tc_node, u16 *num_nodes,
+			      u8 owner)
+{
+	struct ice_sched_node *parent, *node;
+	struct ice_hw *hw = pi->hw;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, qgl, vsil;
+
+	qgl = ice_sched_get_qgrp_layer(hw);
+	vsil = ice_sched_get_vsi_layer(hw);
+	parent = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+	for (i = vsil + 1; i <= qgl; i++) {
+		int status;
+
+		if (!parent)
+			return -EIO;
+
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+						      num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return -EIO;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added) {
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+			node = parent;
+			while (node) {
+				node->owner = owner;
+				node = node->sibling;
+			}
+		} else {
+			parent = parent->children[0];
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sched_calc_vsi_support_nodes - calculate number of VSI support nodes
+ * @pi: pointer to the port info structure
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function calculates the number of supported nodes needed to add this
+ * VSI into Tx tree including the VSI, parent and intermediate nodes in below
+ * layers
+ */
+static void
+ice_sched_calc_vsi_support_nodes(struct ice_port_info *pi,
+				 struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *node;
+	u8 vsil;
+	int i;
+
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+	for (i = vsil; i >= pi->hw->sw_entry_point_layer; i--)
+		/* Add intermediate nodes if TC has no children and
+		 * need at least one node for VSI
+		 */
+		if (!tc_node->num_children || i == vsil) {
+			num_nodes[i]++;
+		} else {
+			/* If intermediate nodes are reached max children
+			 * then add a new one.
+			 */
+			node = ice_sched_get_first_node(pi, tc_node, (u8)i);
+			/* scan all the siblings */
+			while (node) {
+				if (node->num_children < pi->hw->max_children[i])
+					break;
+				node = node->sibling;
+			}
+
+			/* tree has one intermediate node to add this new VSI.
+			 * So no need to calculate supported nodes for below
+			 * layers.
+			 */
+			if (node)
+				break;
+			/* all the nodes are full, allocate a new one */
+			num_nodes[i]++;
+		}
+}
+
+/**
+ * ice_sched_add_vsi_support_nodes - add VSI supported nodes into Tx tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc_node: pointer to TC node
+ * @num_nodes: pointer to num nodes array
+ *
+ * This function adds the VSI supported nodes into Tx tree including the
+ * VSI, its parent and intermediate nodes in below layers
+ */
+static int
+ice_sched_add_vsi_support_nodes(struct ice_port_info *pi, u16 vsi_handle,
+				struct ice_sched_node *tc_node, u16 *num_nodes)
+{
+	struct ice_sched_node *parent = tc_node;
+	u32 first_node_teid;
+	u16 num_added = 0;
+	u8 i, vsil;
+
+	if (!pi)
+		return -EINVAL;
+
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+	for (i = pi->hw->sw_entry_point_layer; i <= vsil; i++) {
+		int status;
+
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent,
+						      i, num_nodes[i],
+						      &first_node_teid,
+						      &num_added);
+		if (status || num_nodes[i] != num_added)
+			return -EIO;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_added)
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+		else
+			parent = parent->children[0];
+
+		if (!parent)
+			return -EIO;
+
+		if (i == vsil)
+			parent->vsi_handle = vsi_handle;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sched_add_vsi_to_topo - add a new VSI into tree
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ *
+ * This function adds a new VSI into scheduler tree
+ */
+static int
+ice_sched_add_vsi_to_topo(struct ice_port_info *pi, u16 vsi_handle, u8 tc)
+{
+	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *tc_node;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return -EINVAL;
+
+	/* calculate number of supported nodes needed for this VSI */
+	ice_sched_calc_vsi_support_nodes(pi, tc_node, num_nodes);
+
+	/* add VSI supported nodes to TC subtree */
+	return ice_sched_add_vsi_support_nodes(pi, vsi_handle, tc_node,
+					       num_nodes);
+}
+
+/**
+ * ice_sched_update_vsi_child_nodes - update VSI child nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @new_numqs: new number of max queues
+ * @owner: owner of this subtree
+ *
+ * This function updates the VSI child nodes based on the number of queues
+ */
+static int
+ice_sched_update_vsi_child_nodes(struct ice_port_info *pi, u16 vsi_handle,
+				 u8 tc, u16 new_numqs, u8 owner)
+{
+	u16 new_num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_sched_node *vsi_node;
+	struct ice_sched_node *tc_node;
+	struct ice_vsi_ctx *vsi_ctx;
+	struct ice_hw *hw = pi->hw;
+	u16 prev_numqs;
+	int status = 0;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return -EIO;
+
+	vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+	if (!vsi_node)
+		return -EIO;
+
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return -EINVAL;
+
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		prev_numqs = vsi_ctx->sched.max_lanq[tc];
+	else
+		prev_numqs = vsi_ctx->sched.max_rdmaq[tc];
+	/* num queues are not changed or less than the previous number */
+	if (new_numqs <= prev_numqs)
+		return status;
+	if (owner == ICE_SCHED_NODE_OWNER_LAN) {
+		status = ice_alloc_lan_q_ctx(hw, vsi_handle, tc, new_numqs);
+		if (status)
+			return status;
+	} else {
+		status = ice_alloc_rdma_q_ctx(hw, vsi_handle, tc, new_numqs);
+		if (status)
+			return status;
+	}
+
+	if (new_numqs)
+		ice_sched_calc_vsi_child_nodes(hw, new_numqs, new_num_nodes);
+	/* Keep the max number of queue configuration all the time. Update the
+	 * tree only if number of queues > previous number of queues. This may
+	 * leave some extra nodes in the tree if number of queues < previous
+	 * number but that wouldn't harm anything. Removing those extra nodes
+	 * may complicate the code if those nodes are part of SRL or
+	 * individually rate limited.
+	 */
+	status = ice_sched_add_vsi_child_nodes(pi, vsi_handle, tc_node,
+					       new_num_nodes, owner);
+	if (status)
+		return status;
+	if (owner == ICE_SCHED_NODE_OWNER_LAN)
+		vsi_ctx->sched.max_lanq[tc] = new_numqs;
+	else
+		vsi_ctx->sched.max_rdmaq[tc] = new_numqs;
+
+	return 0;
+}
+
+/**
+ * ice_sched_cfg_vsi - configure the new/existing VSI
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: TC number
+ * @maxqs: max number of queues
+ * @owner: LAN or RDMA
+ * @enable: TC enabled or disabled
+ *
+ * This function adds/updates VSI nodes based on the number of queues. If TC is
+ * enabled and VSI is in suspended state then resume the VSI back. If TC is
+ * disabled then suspend the VSI if it is not already.
+ */
+int
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
+		  u8 owner, bool enable)
+{
+	struct ice_sched_node *vsi_node, *tc_node;
+	struct ice_vsi_ctx *vsi_ctx;
+	struct ice_hw *hw = pi->hw;
+	int status = 0;
+
+	ice_debug(pi->hw, ICE_DBG_SCHED, "add/config VSI %d\n", vsi_handle);
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return -EINVAL;
+	vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!vsi_ctx)
+		return -EINVAL;
+	vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+
+	/* suspend the VSI if TC is not enabled */
+	if (!enable) {
+		if (vsi_node && vsi_node->in_use) {
+			u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+			status = ice_sched_suspend_resume_elems(hw, 1, &teid,
+								true);
+			if (!status)
+				vsi_node->in_use = false;
+		}
+		return status;
+	}
+
+	/* TC is enabled, if it is a new VSI then add it to the tree */
+	if (!vsi_node) {
+		status = ice_sched_add_vsi_to_topo(pi, vsi_handle, tc);
+		if (status)
+			return status;
+
+		vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+		if (!vsi_node)
+			return -EIO;
+
+		vsi_ctx->sched.vsi_node[tc] = vsi_node;
+		vsi_node->in_use = true;
+		/* invalidate the max queues whenever VSI gets added first time
+		 * into the scheduler tree (boot or after reset). We need to
+		 * recreate the child nodes all the time in these cases.
+		 */
+		vsi_ctx->sched.max_lanq[tc] = 0;
+		vsi_ctx->sched.max_rdmaq[tc] = 0;
+	}
+
+	/* update the VSI child nodes */
+	status = ice_sched_update_vsi_child_nodes(pi, vsi_handle, tc, maxqs,
+						  owner);
+	if (status)
+		return status;
+
+	/* TC is enabled, resume the VSI if it is in the suspend state */
+	if (!vsi_node->in_use) {
+		u32 teid = le32_to_cpu(vsi_node->info.node_teid);
+
+		status = ice_sched_suspend_resume_elems(hw, 1, &teid, false);
+		if (!status)
+			vsi_node->in_use = true;
+	}
+
+	return status;
+}
+
+/**
+ * ice_sched_rm_agg_vsi_info - remove aggregator related VSI info entry
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function removes single aggregator VSI info entry from
+ * aggregator list.
+ */
+static void ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_sched_agg_info *atmp;
+
+	list_for_each_entry_safe(agg_info, atmp, &pi->hw->agg_list,
+				 list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(agg_vsi_info, vtmp,
+					 &agg_info->agg_vsi_list, list_entry)
+			if (agg_vsi_info->vsi_handle == vsi_handle) {
+				list_del(&agg_vsi_info->list_entry);
+				devm_kfree(ice_hw_to_dev(pi->hw),
+					   agg_vsi_info);
+				return;
+			}
+	}
+}
+
+/**
+ * ice_sched_is_leaf_node_present - check for a leaf node in the sub-tree
+ * @node: pointer to the sub-tree node
+ *
+ * This function checks for a leaf node presence in a given sub-tree node.
+ */
+static bool ice_sched_is_leaf_node_present(struct ice_sched_node *node)
+{
+	u8 i;
+
+	for (i = 0; i < node->num_children; i++)
+		if (ice_sched_is_leaf_node_present(node->children[i]))
+			return true;
+	/* check for a leaf node */
+	return (node->info.data.elem_type == ICE_AQC_ELEM_TYPE_LEAF);
+}
+
+/**
+ * ice_sched_rm_vsi_cfg - remove the VSI and its children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @owner: LAN or RDMA
+ *
+ * This function removes the VSI and its LAN or RDMA children nodes from the
+ * scheduler tree.
+ */
+static int
+ice_sched_rm_vsi_cfg(struct ice_port_info *pi, u16 vsi_handle, u8 owner)
+{
+	struct ice_vsi_ctx *vsi_ctx;
+	int status = -EINVAL;
+	u8 i;
+
+	ice_debug(pi->hw, ICE_DBG_SCHED, "removing VSI %d\n", vsi_handle);
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return status;
+	mutex_lock(&pi->sched_lock);
+	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+	if (!vsi_ctx)
+		goto exit_sched_rm_vsi_cfg;
+
+	ice_for_each_traffic_class(i) {
+		struct ice_sched_node *vsi_node, *tc_node;
+		u8 j = 0;
+
+		tc_node = ice_sched_get_tc_node(pi, i);
+		if (!tc_node)
+			continue;
+
+		vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+		if (!vsi_node)
+			continue;
+
+		if (ice_sched_is_leaf_node_present(vsi_node)) {
+			ice_debug(pi->hw, ICE_DBG_SCHED, "VSI has leaf nodes in TC %d\n", i);
+			status = -EBUSY;
+			goto exit_sched_rm_vsi_cfg;
+		}
+		while (j < vsi_node->num_children) {
+			if (vsi_node->children[j]->owner == owner) {
+				ice_free_sched_node(pi, vsi_node->children[j]);
+
+				/* reset the counter again since the num
+				 * children will be updated after node removal
+				 */
+				j = 0;
+			} else {
+				j++;
+			}
+		}
+		/* remove the VSI if it has no children */
+		if (!vsi_node->num_children) {
+			ice_free_sched_node(pi, vsi_node);
+			vsi_ctx->sched.vsi_node[i] = NULL;
+
+			/* clean up aggregator related VSI info if any */
+			ice_sched_rm_agg_vsi_info(pi, vsi_handle);
+		}
+		if (owner == ICE_SCHED_NODE_OWNER_LAN)
+			vsi_ctx->sched.max_lanq[i] = 0;
+		else
+			vsi_ctx->sched.max_rdmaq[i] = 0;
+	}
+	status = 0;
+
+exit_sched_rm_vsi_cfg:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_rm_vsi_lan_cfg - remove VSI and its LAN children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function clears the VSI and its LAN children nodes from scheduler tree
+ * for all TCs.
+ */
+int ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle)
+{
+	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_LAN);
+}
+
+/**
+ * ice_rm_vsi_rdma_cfg - remove VSI and its RDMA children nodes
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ *
+ * This function clears the VSI and its RDMA children nodes from scheduler tree
+ * for all TCs.
+ */
+int ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle)
+{
+	return ice_sched_rm_vsi_cfg(pi, vsi_handle, ICE_SCHED_NODE_OWNER_RDMA);
+}
+
+/**
+ * ice_get_agg_info - get the aggregator ID
+ * @hw: pointer to the hardware structure
+ * @agg_id: aggregator ID
+ *
+ * This function validates aggregator ID. The function returns info if
+ * aggregator ID is present in list otherwise it returns null.
+ */
+static struct ice_sched_agg_info *
+ice_get_agg_info(struct ice_hw *hw, u32 agg_id)
+{
+	struct ice_sched_agg_info *agg_info;
+
+	list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+		if (agg_info->agg_id == agg_id)
+			return agg_info;
+
+	return NULL;
+}
+
+/**
+ * ice_sched_get_free_vsi_parent - Find a free parent node in aggregator subtree
+ * @hw: pointer to the HW struct
+ * @node: pointer to a child node
+ * @num_nodes: num nodes count array
+ *
+ * This function walks through the aggregator subtree to find a free parent
+ * node
+ */
+struct ice_sched_node *
+ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node,
+			      u16 *num_nodes)
+{
+	u8 l = node->tx_sched_layer;
+	u8 vsil, i;
+
+	vsil = ice_sched_get_vsi_layer(hw);
+
+	/* Is it VSI parent layer ? */
+	if (l == vsil - 1)
+		return (node->num_children < hw->max_children[l]) ? node : NULL;
+
+	/* We have intermediate nodes. Let's walk through the subtree. If the
+	 * intermediate node has space to add a new node then clear the count
+	 */
+	if (node->num_children < hw->max_children[l])
+		num_nodes[l] = 0;
+	/* The below recursive call is intentional and wouldn't go more than
+	 * 2 or 3 iterations.
+	 */
+
+	for (i = 0; i < node->num_children; i++) {
+		struct ice_sched_node *parent;
+
+		parent = ice_sched_get_free_vsi_parent(hw, node->children[i],
+						       num_nodes);
+		if (parent)
+			return parent;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_sched_update_parent - update the new parent in SW DB
+ * @new_parent: pointer to a new parent node
+ * @node: pointer to a child node
+ *
+ * This function removes the child from the old parent and adds it to a new
+ * parent
+ */
+void
+ice_sched_update_parent(struct ice_sched_node *new_parent,
+			struct ice_sched_node *node)
+{
+	struct ice_sched_node *old_parent;
+	u8 i, j;
+
+	old_parent = node->parent;
+
+	/* update the old parent children */
+	for (i = 0; i < old_parent->num_children; i++)
+		if (old_parent->children[i] == node) {
+			for (j = i + 1; j < old_parent->num_children; j++)
+				old_parent->children[j - 1] =
+					old_parent->children[j];
+			old_parent->num_children--;
+			break;
+		}
+
+	/* now move the node to a new parent */
+	new_parent->children[new_parent->num_children++] = node;
+	node->parent = new_parent;
+	node->info.parent_teid = new_parent->info.node_teid;
+}
+
+/**
+ * ice_sched_move_nodes - move child nodes to a given parent
+ * @pi: port information structure
+ * @parent: pointer to parent node
+ * @num_items: number of child nodes to be moved
+ * @list: pointer to child node teids
+ *
+ * This function move the child nodes to a given parent.
+ */
+int
+ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent,
+		     u16 num_items, u32 *list)
+{
+	struct ice_aqc_move_elem *buf;
+	struct ice_sched_node *node;
+	u16 i, grps_movd = 0;
+	struct ice_hw *hw;
+	int status = 0;
+	u16 buf_len;
+
+	hw = pi->hw;
+
+	if (!parent || !num_items)
+		return -EINVAL;
+
+	/* Does parent have enough space */
+	if (parent->num_children + num_items >
+	    hw->max_children[parent->tx_sched_layer])
+		return -ENOSPC;
+
+	buf_len = struct_size(buf, teid, 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	for (i = 0; i < num_items; i++) {
+		node = ice_sched_find_node_by_teid(pi->root, list[i]);
+		if (!node) {
+			status = -EINVAL;
+			goto move_err_exit;
+		}
+
+		buf->hdr.src_parent_teid = node->info.parent_teid;
+		buf->hdr.dest_parent_teid = parent->info.node_teid;
+		buf->teid[0] = node->info.node_teid;
+		buf->hdr.num_elems = cpu_to_le16(1);
+		status = ice_aq_move_sched_elems(hw, 1, buf, buf_len,
+						 &grps_movd, NULL);
+		if (status && grps_movd != 1) {
+			status = -EIO;
+			goto move_err_exit;
+		}
+
+		/* update the SW DB */
+		ice_sched_update_parent(parent, node);
+	}
+
+move_err_exit:
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_sched_move_vsi_to_agg - move VSI to aggregator node
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function moves a VSI to an aggregator node or its subtree.
+ * Intermediate nodes may be created if required.
+ */
+static int
+ice_sched_move_vsi_to_agg(struct ice_port_info *pi, u16 vsi_handle, u32 agg_id,
+			  u8 tc)
+{
+	struct ice_sched_node *vsi_node, *agg_node, *tc_node, *parent;
+	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	u32 first_node_teid, vsi_teid;
+	u16 num_nodes_added;
+	u8 aggl, vsil, i;
+	int status;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return -EIO;
+
+	agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+	if (!agg_node)
+		return -ENOENT;
+
+	vsi_node = ice_sched_get_vsi_node(pi, tc_node, vsi_handle);
+	if (!vsi_node)
+		return -ENOENT;
+
+	/* Is this VSI already part of given aggregator? */
+	if (ice_sched_find_node_in_subtree(pi->hw, agg_node, vsi_node))
+		return 0;
+
+	aggl = ice_sched_get_agg_layer(pi->hw);
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+
+	/* set intermediate node count to 1 between aggregator and VSI layers */
+	for (i = aggl + 1; i < vsil; i++)
+		num_nodes[i] = 1;
+
+	/* Check if the aggregator subtree has any free node to add the VSI */
+	for (i = 0; i < agg_node->num_children; i++) {
+		parent = ice_sched_get_free_vsi_parent(pi->hw,
+						       agg_node->children[i],
+						       num_nodes);
+		if (parent)
+			goto move_nodes;
+	}
+
+	/* add new nodes */
+	parent = agg_node;
+	for (i = aggl + 1; i < vsil; i++) {
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+						      num_nodes[i],
+						      &first_node_teid,
+						      &num_nodes_added);
+		if (status || num_nodes[i] != num_nodes_added)
+			return -EIO;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_nodes_added)
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+		else
+			parent = parent->children[0];
+
+		if (!parent)
+			return -EIO;
+	}
+
+move_nodes:
+	vsi_teid = le32_to_cpu(vsi_node->info.node_teid);
+	return ice_sched_move_nodes(pi, parent, 1, &vsi_teid);
+}
+
+/**
+ * ice_move_all_vsi_to_dflt_agg - move all VSI(s) to default aggregator
+ * @pi: port information structure
+ * @agg_info: aggregator info
+ * @tc: traffic class number
+ * @rm_vsi_info: true or false
+ *
+ * This function move all the VSI(s) to the default aggregator and delete
+ * aggregator VSI info based on passed in boolean parameter rm_vsi_info. The
+ * caller holds the scheduler lock.
+ */
+static int
+ice_move_all_vsi_to_dflt_agg(struct ice_port_info *pi,
+			     struct ice_sched_agg_info *agg_info, u8 tc,
+			     bool rm_vsi_info)
+{
+	struct ice_sched_agg_vsi_info *agg_vsi_info;
+	struct ice_sched_agg_vsi_info *tmp;
+	int status = 0;
+
+	list_for_each_entry_safe(agg_vsi_info, tmp, &agg_info->agg_vsi_list,
+				 list_entry) {
+		u16 vsi_handle = agg_vsi_info->vsi_handle;
+
+		/* Move VSI to default aggregator */
+		if (!ice_is_tc_ena(agg_vsi_info->tc_bitmap[0], tc))
+			continue;
+
+		status = ice_sched_move_vsi_to_agg(pi, vsi_handle,
+						   ICE_DFLT_AGG_ID, tc);
+		if (status)
+			break;
+
+		clear_bit(tc, agg_vsi_info->tc_bitmap);
+		if (rm_vsi_info && !agg_vsi_info->tc_bitmap[0]) {
+			list_del(&agg_vsi_info->list_entry);
+			devm_kfree(ice_hw_to_dev(pi->hw), agg_vsi_info);
+		}
+	}
+
+	return status;
+}
+
+/**
+ * ice_sched_is_agg_inuse - check whether the aggregator is in use or not
+ * @pi: port information structure
+ * @node: node pointer
+ *
+ * This function checks whether the aggregator is attached with any VSI or not.
+ */
+static bool
+ice_sched_is_agg_inuse(struct ice_port_info *pi, struct ice_sched_node *node)
+{
+	u8 vsil, i;
+
+	vsil = ice_sched_get_vsi_layer(pi->hw);
+	if (node->tx_sched_layer < vsil - 1) {
+		for (i = 0; i < node->num_children; i++)
+			if (ice_sched_is_agg_inuse(pi, node->children[i]))
+				return true;
+		return false;
+	} else {
+		return node->num_children ? true : false;
+	}
+}
+
+/**
+ * ice_sched_rm_agg_cfg - remove the aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function removes the aggregator node and intermediate nodes if any
+ * from the given TC
+ */
+static int
+ice_sched_rm_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+	struct ice_sched_node *tc_node, *agg_node;
+	struct ice_hw *hw = pi->hw;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return -EIO;
+
+	agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+	if (!agg_node)
+		return -ENOENT;
+
+	/* Can't remove the aggregator node if it has children */
+	if (ice_sched_is_agg_inuse(pi, agg_node))
+		return -EBUSY;
+
+	/* need to remove the whole subtree if aggregator node is the
+	 * only child.
+	 */
+	while (agg_node->tx_sched_layer > hw->sw_entry_point_layer) {
+		struct ice_sched_node *parent = agg_node->parent;
+
+		if (!parent)
+			return -EIO;
+
+		if (parent->num_children > 1)
+			break;
+
+		agg_node = parent;
+	}
+
+	ice_free_sched_node(pi, agg_node);
+	return 0;
+}
+
+/**
+ * ice_rm_agg_cfg_tc - remove aggregator configuration for TC
+ * @pi: port information structure
+ * @agg_info: aggregator ID
+ * @tc: TC number
+ * @rm_vsi_info: bool value true or false
+ *
+ * This function removes aggregator reference to VSI of given TC. It removes
+ * the aggregator configuration completely for requested TC. The caller needs
+ * to hold the scheduler lock.
+ */
+static int
+ice_rm_agg_cfg_tc(struct ice_port_info *pi, struct ice_sched_agg_info *agg_info,
+		  u8 tc, bool rm_vsi_info)
+{
+	int status = 0;
+
+	/* If nothing to remove - return success */
+	if (!ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+		goto exit_rm_agg_cfg_tc;
+
+	status = ice_move_all_vsi_to_dflt_agg(pi, agg_info, tc, rm_vsi_info);
+	if (status)
+		goto exit_rm_agg_cfg_tc;
+
+	/* Delete aggregator node(s) */
+	status = ice_sched_rm_agg_cfg(pi, agg_info->agg_id, tc);
+	if (status)
+		goto exit_rm_agg_cfg_tc;
+
+	clear_bit(tc, agg_info->tc_bitmap);
+exit_rm_agg_cfg_tc:
+	return status;
+}
+
+/**
+ * ice_save_agg_tc_bitmap - save aggregator TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc_bitmap: 8 bits TC bitmap
+ *
+ * Save aggregator TC bitmap. This function needs to be called with scheduler
+ * lock held.
+ */
+static int
+ice_save_agg_tc_bitmap(struct ice_port_info *pi, u32 agg_id,
+		       unsigned long *tc_bitmap)
+{
+	struct ice_sched_agg_info *agg_info;
+
+	agg_info = ice_get_agg_info(pi->hw, agg_id);
+	if (!agg_info)
+		return -EINVAL;
+	bitmap_copy(agg_info->replay_tc_bitmap, tc_bitmap,
+		    ICE_MAX_TRAFFIC_CLASS);
+	return 0;
+}
+
+/**
+ * ice_sched_add_agg_cfg - create an aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @tc: TC number
+ *
+ * This function creates an aggregator node and intermediate nodes if required
+ * for the given TC
+ */
+static int
+ice_sched_add_agg_cfg(struct ice_port_info *pi, u32 agg_id, u8 tc)
+{
+	struct ice_sched_node *parent, *agg_node, *tc_node;
+	u16 num_nodes[ICE_AQC_TOPO_MAX_LEVEL_NUM] = { 0 };
+	struct ice_hw *hw = pi->hw;
+	u32 first_node_teid;
+	u16 num_nodes_added;
+	int status = 0;
+	u8 i, aggl;
+
+	tc_node = ice_sched_get_tc_node(pi, tc);
+	if (!tc_node)
+		return -EIO;
+
+	agg_node = ice_sched_get_agg_node(pi, tc_node, agg_id);
+	/* Does Agg node already exist ? */
+	if (agg_node)
+		return status;
+
+	aggl = ice_sched_get_agg_layer(hw);
+
+	/* need one node in Agg layer */
+	num_nodes[aggl] = 1;
+
+	/* Check whether the intermediate nodes have space to add the
+	 * new aggregator. If they are full, then SW needs to allocate a new
+	 * intermediate node on those layers
+	 */
+	for (i = hw->sw_entry_point_layer; i < aggl; i++) {
+		parent = ice_sched_get_first_node(pi, tc_node, i);
+
+		/* scan all the siblings */
+		while (parent) {
+			if (parent->num_children < hw->max_children[i])
+				break;
+			parent = parent->sibling;
+		}
+
+		/* all the nodes are full, reserve one for this layer */
+		if (!parent)
+			num_nodes[i]++;
+	}
+
+	/* add the aggregator node */
+	parent = tc_node;
+	for (i = hw->sw_entry_point_layer; i <= aggl; i++) {
+		if (!parent)
+			return -EIO;
+
+		status = ice_sched_add_nodes_to_layer(pi, tc_node, parent, i,
+						      num_nodes[i],
+						      &first_node_teid,
+						      &num_nodes_added);
+		if (status || num_nodes[i] != num_nodes_added)
+			return -EIO;
+
+		/* The newly added node can be a new parent for the next
+		 * layer nodes
+		 */
+		if (num_nodes_added) {
+			parent = ice_sched_find_node_by_teid(tc_node,
+							     first_node_teid);
+			/* register aggregator ID with the aggregator node */
+			if (parent && i == aggl)
+				parent->agg_id = agg_id;
+		} else {
+			parent = parent->children[0];
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sched_cfg_agg - configure aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * It registers a unique aggregator node into scheduler services. It
+ * allows a user to register with a unique ID to track it's resources.
+ * The aggregator type determines if this is a queue group, VSI group
+ * or aggregator group. It then creates the aggregator node(s) for requested
+ * TC(s) or removes an existing aggregator node including its configuration
+ * if indicated via tc_bitmap. Call ice_rm_agg_cfg to release aggregator
+ * resources and remove aggregator ID.
+ * This function needs to be called with scheduler lock held.
+ */
+static int
+ice_sched_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+		  enum ice_agg_type agg_type, unsigned long *tc_bitmap)
+{
+	struct ice_sched_agg_info *agg_info;
+	struct ice_hw *hw = pi->hw;
+	int status = 0;
+	u8 tc;
+
+	agg_info = ice_get_agg_info(hw, agg_id);
+	if (!agg_info) {
+		/* Create new entry for new aggregator ID */
+		agg_info = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*agg_info),
+					GFP_KERNEL);
+		if (!agg_info)
+			return -ENOMEM;
+
+		agg_info->agg_id = agg_id;
+		agg_info->agg_type = agg_type;
+		agg_info->tc_bitmap[0] = 0;
+
+		/* Initialize the aggregator VSI list head */
+		INIT_LIST_HEAD(&agg_info->agg_vsi_list);
+
+		/* Add new entry in aggregator list */
+		list_add(&agg_info->list_entry, &hw->agg_list);
+	}
+	/* Create aggregator node(s) for requested TC(s) */
+	ice_for_each_traffic_class(tc) {
+		if (!ice_is_tc_ena(*tc_bitmap, tc)) {
+			/* Delete aggregator cfg TC if it exists previously */
+			status = ice_rm_agg_cfg_tc(pi, agg_info, tc, false);
+			if (status)
+				break;
+			continue;
+		}
+
+		/* Check if aggregator node for TC already exists */
+		if (ice_is_tc_ena(agg_info->tc_bitmap[0], tc))
+			continue;
+
+		/* Create new aggregator node for TC */
+		status = ice_sched_add_agg_cfg(pi, agg_id, tc);
+		if (status)
+			break;
+
+		/* Save aggregator node's TC information */
+		set_bit(tc, agg_info->tc_bitmap);
+	}
+
+	return status;
+}
+
+/**
+ * ice_cfg_agg - config aggregator node
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @agg_type: aggregator type queue, VSI, or aggregator group
+ * @tc_bitmap: bits TC bitmap
+ *
+ * This function configures aggregator node(s).
+ */
+int
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id, enum ice_agg_type agg_type,
+	    u8 tc_bitmap)
+{
+	unsigned long bitmap = tc_bitmap;
+	int status;
+
+	mutex_lock(&pi->sched_lock);
+	status = ice_sched_cfg_agg(pi, agg_id, agg_type, &bitmap);
+	if (!status)
+		status = ice_save_agg_tc_bitmap(pi, agg_id, &bitmap);
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_get_agg_vsi_info - get the aggregator ID
+ * @agg_info: aggregator info
+ * @vsi_handle: software VSI handle
+ *
+ * The function returns aggregator VSI info based on VSI handle. This function
+ * needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_vsi_info *
+ice_get_agg_vsi_info(struct ice_sched_agg_info *agg_info, u16 vsi_handle)
+{
+	struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+	list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list, list_entry)
+		if (agg_vsi_info->vsi_handle == vsi_handle)
+			return agg_vsi_info;
+
+	return NULL;
+}
+
+/**
+ * ice_get_vsi_agg_info - get the aggregator info of VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: Sw VSI handle
+ *
+ * The function returns aggregator info of VSI represented via vsi_handle. The
+ * VSI has in this case a different aggregator than the default one. This
+ * function needs to be called with scheduler lock held.
+ */
+static struct ice_sched_agg_info *
+ice_get_vsi_agg_info(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_sched_agg_info *agg_info;
+
+	list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+		agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+		if (agg_vsi_info)
+			return agg_info;
+	}
+	return NULL;
+}
+
+/**
+ * ice_save_agg_vsi_tc_bitmap - save aggregator VSI TC bitmap
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Save VSI to aggregator TC bitmap. This function needs to call with scheduler
+ * lock held.
+ */
+static int
+ice_save_agg_vsi_tc_bitmap(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+			   unsigned long *tc_bitmap)
+{
+	struct ice_sched_agg_vsi_info *agg_vsi_info;
+	struct ice_sched_agg_info *agg_info;
+
+	agg_info = ice_get_agg_info(pi->hw, agg_id);
+	if (!agg_info)
+		return -EINVAL;
+	/* check if entry already exist */
+	agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+	if (!agg_vsi_info)
+		return -EINVAL;
+	bitmap_copy(agg_vsi_info->replay_tc_bitmap, tc_bitmap,
+		    ICE_MAX_TRAFFIC_CLASS);
+	return 0;
+}
+
+/**
+ * ice_sched_assoc_vsi_to_agg - associate/move VSI to new/default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * This function moves VSI to a new or default aggregator node. If VSI is
+ * already associated to the aggregator node then no operation is performed on
+ * the tree. This function needs to be called with scheduler lock held.
+ */
+static int
+ice_sched_assoc_vsi_to_agg(struct ice_port_info *pi, u32 agg_id,
+			   u16 vsi_handle, unsigned long *tc_bitmap)
+{
+	struct ice_sched_agg_vsi_info *agg_vsi_info, *iter, *old_agg_vsi_info = NULL;
+	struct ice_sched_agg_info *agg_info, *old_agg_info;
+	struct ice_hw *hw = pi->hw;
+	int status = 0;
+	u8 tc;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return -EINVAL;
+	agg_info = ice_get_agg_info(hw, agg_id);
+	if (!agg_info)
+		return -EINVAL;
+	/* If the VSI is already part of another aggregator then update
+	 * its VSI info list
+	 */
+	old_agg_info = ice_get_vsi_agg_info(hw, vsi_handle);
+	if (old_agg_info && old_agg_info != agg_info) {
+		struct ice_sched_agg_vsi_info *vtmp;
+
+		list_for_each_entry_safe(iter, vtmp,
+					 &old_agg_info->agg_vsi_list,
+					 list_entry)
+			if (iter->vsi_handle == vsi_handle) {
+				old_agg_vsi_info = iter;
+				break;
+			}
+	}
+
+	/* check if entry already exist */
+	agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+	if (!agg_vsi_info) {
+		/* Create new entry for VSI under aggregator list */
+		agg_vsi_info = devm_kzalloc(ice_hw_to_dev(hw),
+					    sizeof(*agg_vsi_info), GFP_KERNEL);
+		if (!agg_vsi_info)
+			return -EINVAL;
+
+		/* add VSI ID into the aggregator list */
+		agg_vsi_info->vsi_handle = vsi_handle;
+		list_add(&agg_vsi_info->list_entry, &agg_info->agg_vsi_list);
+	}
+	/* Move VSI node to new aggregator node for requested TC(s) */
+	ice_for_each_traffic_class(tc) {
+		if (!ice_is_tc_ena(*tc_bitmap, tc))
+			continue;
+
+		/* Move VSI to new aggregator */
+		status = ice_sched_move_vsi_to_agg(pi, vsi_handle, agg_id, tc);
+		if (status)
+			break;
+
+		set_bit(tc, agg_vsi_info->tc_bitmap);
+		if (old_agg_vsi_info)
+			clear_bit(tc, old_agg_vsi_info->tc_bitmap);
+	}
+	if (old_agg_vsi_info && !old_agg_vsi_info->tc_bitmap[0]) {
+		list_del(&old_agg_vsi_info->list_entry);
+		devm_kfree(ice_hw_to_dev(pi->hw), old_agg_vsi_info);
+	}
+	return status;
+}
+
+/**
+ * ice_sched_rm_unused_rl_prof - remove unused RL profile
+ * @pi: port information structure
+ *
+ * This function removes unused rate limit profiles from the HW and
+ * SW DB. The caller needs to hold scheduler lock.
+ */
+static void ice_sched_rm_unused_rl_prof(struct ice_port_info *pi)
+{
+	u16 ln;
+
+	for (ln = 0; ln < pi->hw->num_tx_sched_layers; ln++) {
+		struct ice_aqc_rl_profile_info *rl_prof_elem;
+		struct ice_aqc_rl_profile_info *rl_prof_tmp;
+
+		list_for_each_entry_safe(rl_prof_elem, rl_prof_tmp,
+					 &pi->rl_prof_list[ln], list_entry) {
+			if (!ice_sched_del_rl_profile(pi->hw, rl_prof_elem))
+				ice_debug(pi->hw, ICE_DBG_SCHED, "Removed rl profile\n");
+		}
+	}
+}
+
+/**
+ * ice_sched_update_elem - update element
+ * @hw: pointer to the HW struct
+ * @node: pointer to node
+ * @info: node info to update
+ *
+ * Update the HW DB, and local SW DB of node. Update the scheduling
+ * parameters of node from argument info data buffer (Info->data buf) and
+ * returns success or error on config sched element failure. The caller
+ * needs to hold scheduler lock.
+ */
+static int
+ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node,
+		      struct ice_aqc_txsched_elem_data *info)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	u16 elem_cfgd = 0;
+	u16 num_elems = 1;
+	int status;
+
+	buf = *info;
+	/* Parent TEID is reserved field in this aq call */
+	buf.parent_teid = 0;
+	/* Element type is reserved field in this aq call */
+	buf.data.elem_type = 0;
+	/* Flags is reserved field in this aq call */
+	buf.data.flags = 0;
+
+	/* Update HW DB */
+	/* Configure element node */
+	status = ice_aq_cfg_sched_elems(hw, num_elems, &buf, sizeof(buf),
+					&elem_cfgd, NULL);
+	if (status || elem_cfgd != num_elems) {
+		ice_debug(hw, ICE_DBG_SCHED, "Config sched elem error\n");
+		return -EIO;
+	}
+
+	/* Config success case */
+	/* Now update local SW DB */
+	/* Only copy the data portion of info buffer */
+	node->info.data = info->data;
+	return status;
+}
+
+/**
+ * ice_sched_cfg_node_bw_alloc - configure node BW weight/alloc params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @bw_alloc: BW weight/allocation
+ *
+ * This function configures node element's BW allocation.
+ */
+static int
+ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
+			    enum ice_rl_type rl_type, u16 bw_alloc)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+
+	buf = node->info;
+	data = &buf.data;
+	if (rl_type == ICE_MIN_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else if (rl_type == ICE_MAX_BW) {
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_alloc = cpu_to_le16(bw_alloc);
+	} else {
+		return -EINVAL;
+	}
+
+	/* Configure element */
+	return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_move_vsi_to_agg - moves VSI to new or default aggregator
+ * @pi: port information structure
+ * @agg_id: aggregator ID
+ * @vsi_handle: software VSI handle
+ * @tc_bitmap: TC bitmap of enabled TC(s)
+ *
+ * Move or associate VSI to a new or default aggregator node.
+ */
+int
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+		    u8 tc_bitmap)
+{
+	unsigned long bitmap = tc_bitmap;
+	int status;
+
+	mutex_lock(&pi->sched_lock);
+	status = ice_sched_assoc_vsi_to_agg(pi, agg_id, vsi_handle,
+					    (unsigned long *)&bitmap);
+	if (!status)
+		status = ice_save_agg_vsi_tc_bitmap(pi, agg_id, vsi_handle,
+						    (unsigned long *)&bitmap);
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_set_clear_cir_bw - set or clear CIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear CIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = 0;
+	} else {
+		/* Save type of BW information */
+		set_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->cir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_eir_bw - set or clear EIR BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear EIR bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved shared BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+		/* save EIR BW information */
+		set_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = bw;
+	}
+}
+
+/**
+ * ice_set_clear_shared_bw - set or clear shared BW
+ * @bw_t_info: bandwidth type information structure
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save or clear shared bandwidth (BW) in the passed param bw_t_info.
+ */
+static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+{
+	if (bw == ICE_SCHED_DFLT_BW) {
+		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = 0;
+	} else {
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element.
+		 * First clear earlier saved EIR BW information.
+		 */
+		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
+		bw_t_info->eir_bw.bw = 0;
+		/* save shared BW information */
+		set_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
+		bw_t_info->shared_bw = bw;
+	}
+}
+
+/**
+ * ice_sched_save_vsi_bw - save VSI node's BW information
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of VSI type node for post replay use.
+ */
+static int
+ice_sched_save_vsi_bw(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      enum ice_rl_type rl_type, u32 bw)
+{
+	struct ice_vsi_ctx *vsi_ctx;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return -EINVAL;
+	vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+	if (!vsi_ctx)
+		return -EINVAL;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		ice_set_clear_cir_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+		break;
+	case ICE_MAX_BW:
+		ice_set_clear_eir_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+		break;
+	case ICE_SHARED_BW:
+		ice_set_clear_shared_bw(&vsi_ctx->sched.bw_t_info[tc], bw);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_calc_wakeup - calculate RL profile wakeup parameter
+ * @hw: pointer to the HW struct
+ * @bw: bandwidth in Kbps
+ *
+ * This function calculates the wakeup parameter of RL profile.
+ */
+static u16 ice_sched_calc_wakeup(struct ice_hw *hw, s32 bw)
+{
+	s64 bytes_per_sec, wakeup_int, wakeup_a, wakeup_b, wakeup_f;
+	s32 wakeup_f_int;
+	u16 wakeup = 0;
+
+	/* Get the wakeup integer value */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+	wakeup_int = div64_long(hw->psm_clk_freq, bytes_per_sec);
+	if (wakeup_int > 63) {
+		wakeup = (u16)((1 << 15) | wakeup_int);
+	} else {
+		/* Calculate fraction value up to 4 decimals
+		 * Convert Integer value to a constant multiplier
+		 */
+		wakeup_b = (s64)ICE_RL_PROF_MULTIPLIER * wakeup_int;
+		wakeup_a = div64_long((s64)ICE_RL_PROF_MULTIPLIER *
+					   hw->psm_clk_freq, bytes_per_sec);
+
+		/* Get Fraction value */
+		wakeup_f = wakeup_a - wakeup_b;
+
+		/* Round up the Fractional value via Ceil(Fractional value) */
+		if (wakeup_f > div64_long(ICE_RL_PROF_MULTIPLIER, 2))
+			wakeup_f += 1;
+
+		wakeup_f_int = (s32)div64_long(wakeup_f * ICE_RL_PROF_FRACTION,
+					       ICE_RL_PROF_MULTIPLIER);
+		wakeup |= (u16)(wakeup_int << 9);
+		wakeup |= (u16)(0x1ff & wakeup_f_int);
+	}
+
+	return wakeup;
+}
+
+/**
+ * ice_sched_bw_to_rl_profile - convert BW to profile parameters
+ * @hw: pointer to the HW struct
+ * @bw: bandwidth in Kbps
+ * @profile: profile parameters to return
+ *
+ * This function converts the BW to profile structure format.
+ */
+static int
+ice_sched_bw_to_rl_profile(struct ice_hw *hw, u32 bw,
+			   struct ice_aqc_rl_profile_elem *profile)
+{
+	s64 bytes_per_sec, ts_rate, mv_tmp;
+	int status = -EINVAL;
+	bool found = false;
+	s32 encode = 0;
+	s64 mv = 0;
+	s32 i;
+
+	/* Bw settings range is from 0.5Mb/sec to 100Gb/sec */
+	if (bw < ICE_SCHED_MIN_BW || bw > ICE_SCHED_MAX_BW)
+		return status;
+
+	/* Bytes per second from Kbps */
+	bytes_per_sec = div64_long(((s64)bw * 1000), BITS_PER_BYTE);
+
+	/* encode is 6 bits but really useful are 5 bits */
+	for (i = 0; i < 64; i++) {
+		u64 pow_result = BIT_ULL(i);
+
+		ts_rate = div64_long((s64)hw->psm_clk_freq,
+				     pow_result * ICE_RL_PROF_TS_MULTIPLIER);
+		if (ts_rate <= 0)
+			continue;
+
+		/* Multiplier value */
+		mv_tmp = div64_long(bytes_per_sec * ICE_RL_PROF_MULTIPLIER,
+				    ts_rate);
+
+		/* Round to the nearest ICE_RL_PROF_MULTIPLIER */
+		mv = round_up_64bit(mv_tmp, ICE_RL_PROF_MULTIPLIER);
+
+		/* First multiplier value greater than the given
+		 * accuracy bytes
+		 */
+		if (mv > ICE_RL_PROF_ACCURACY_BYTES) {
+			encode = i;
+			found = true;
+			break;
+		}
+	}
+	if (found) {
+		u16 wm;
+
+		wm = ice_sched_calc_wakeup(hw, bw);
+		profile->rl_multiply = cpu_to_le16(mv);
+		profile->wake_up_calc = cpu_to_le16(wm);
+		profile->rl_encode = cpu_to_le16(encode);
+		status = 0;
+	} else {
+		status = -ENOENT;
+	}
+
+	return status;
+}
+
+/**
+ * ice_sched_add_rl_profile - add RL profile
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: specifies in which layer to create profile
+ *
+ * This function first checks the existing list for corresponding BW
+ * parameter. If it exists, it returns the associated profile otherwise
+ * it creates a new rate limit profile for requested BW, and adds it to
+ * the HW DB and local list. It returns the new profile or null on error.
+ * The caller needs to hold the scheduler lock.
+ */
+static struct ice_aqc_rl_profile_info *
+ice_sched_add_rl_profile(struct ice_port_info *pi,
+			 enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	u16 profiles_added = 0, num_profiles = 1;
+	struct ice_aqc_rl_profile_elem *buf;
+	struct ice_hw *hw;
+	u8 profile_type;
+	int status;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return NULL;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		break;
+	default:
+		return NULL;
+	}
+
+	if (!pi)
+		return NULL;
+	hw = pi->hw;
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if ((rl_prof_elem->profile.flags & ICE_AQC_RL_PROFILE_TYPE_M) ==
+		    profile_type && rl_prof_elem->bw == bw)
+			/* Return existing profile ID info */
+			return rl_prof_elem;
+
+	/* Create new profile ID */
+	rl_prof_elem = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rl_prof_elem),
+				    GFP_KERNEL);
+
+	if (!rl_prof_elem)
+		return NULL;
+
+	status = ice_sched_bw_to_rl_profile(hw, bw, &rl_prof_elem->profile);
+	if (status)
+		goto exit_add_rl_prof;
+
+	rl_prof_elem->bw = bw;
+	/* layer_num is zero relative, and fw expects level from 1 to 9 */
+	rl_prof_elem->profile.level = layer_num + 1;
+	rl_prof_elem->profile.flags = profile_type;
+	rl_prof_elem->profile.max_burst_size = cpu_to_le16(hw->max_burst_size);
+
+	/* Create new entry in HW DB */
+	buf = &rl_prof_elem->profile;
+	status = ice_aq_add_rl_profile(hw, num_profiles, buf, sizeof(*buf),
+				       &profiles_added, NULL);
+	if (status || profiles_added != num_profiles)
+		goto exit_add_rl_prof;
+
+	/* Good entry - add in the list */
+	rl_prof_elem->prof_id_ref = 0;
+	list_add(&rl_prof_elem->list_entry, &pi->rl_prof_list[layer_num]);
+	return rl_prof_elem;
+
+exit_add_rl_prof:
+	devm_kfree(ice_hw_to_dev(hw), rl_prof_elem);
+	return NULL;
+}
+
+/**
+ * ice_sched_cfg_node_bw_lmt - configure node sched params
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @rl_type: rate limit type CIR, EIR, or shared
+ * @rl_prof_id: rate limit profile ID
+ *
+ * This function configures node element's BW limit.
+ */
+static int
+ice_sched_cfg_node_bw_lmt(struct ice_hw *hw, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u16 rl_prof_id)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+
+	buf = node->info;
+	data = &buf.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		data->valid_sections |= ICE_AQC_ELEM_VALID_CIR;
+		data->cir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_MAX_BW:
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			return -EIO;
+		data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+		data->eir_bw.bw_profile_idx = cpu_to_le16(rl_prof_id);
+		break;
+	case ICE_SHARED_BW:
+		/* Check for removing shared BW */
+		if (rl_prof_id == ICE_SCHED_NO_SHARED_RL_PROF_ID) {
+			/* remove shared profile */
+			data->valid_sections &= ~ICE_AQC_ELEM_VALID_SHARED;
+			data->srl_id = 0; /* clear SRL field */
+
+			/* enable back EIR to default profile */
+			data->valid_sections |= ICE_AQC_ELEM_VALID_EIR;
+			data->eir_bw.bw_profile_idx =
+				cpu_to_le16(ICE_SCHED_DFLT_RL_PROF_ID);
+			break;
+		}
+		/* EIR BW and Shared BW profiles are mutually exclusive and
+		 * hence only one of them may be set for any given element
+		 */
+		if ((data->valid_sections & ICE_AQC_ELEM_VALID_EIR) &&
+		    (le16_to_cpu(data->eir_bw.bw_profile_idx) !=
+			    ICE_SCHED_DFLT_RL_PROF_ID))
+			return -EIO;
+		/* EIR BW is set to default, disable it */
+		data->valid_sections &= ~ICE_AQC_ELEM_VALID_EIR;
+		/* Okay to enable shared BW now */
+		data->valid_sections |= ICE_AQC_ELEM_VALID_SHARED;
+		data->srl_id = cpu_to_le16(rl_prof_id);
+		break;
+	default:
+		/* Unknown rate limit type */
+		return -EINVAL;
+	}
+
+	/* Configure element */
+	return ice_sched_update_elem(hw, node, &buf);
+}
+
+/**
+ * ice_sched_get_node_rl_prof_id - get node's rate limit profile ID
+ * @node: sched node
+ * @rl_type: rate limit type
+ *
+ * If existing profile matches, it returns the corresponding rate
+ * limit profile ID, otherwise it returns an invalid ID as error.
+ */
+static u16
+ice_sched_get_node_rl_prof_id(struct ice_sched_node *node,
+			      enum ice_rl_type rl_type)
+{
+	u16 rl_prof_id = ICE_SCHED_INVAL_PROF_ID;
+	struct ice_aqc_txsched_elem *data;
+
+	data = &node->info.data;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_CIR)
+			rl_prof_id = le16_to_cpu(data->cir_bw.bw_profile_idx);
+		break;
+	case ICE_MAX_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_EIR)
+			rl_prof_id = le16_to_cpu(data->eir_bw.bw_profile_idx);
+		break;
+	case ICE_SHARED_BW:
+		if (data->valid_sections & ICE_AQC_ELEM_VALID_SHARED)
+			rl_prof_id = le16_to_cpu(data->srl_id);
+		break;
+	default:
+		break;
+	}
+
+	return rl_prof_id;
+}
+
+/**
+ * ice_sched_get_rl_prof_layer - selects rate limit profile creation layer
+ * @pi: port information structure
+ * @rl_type: type of rate limit BW - min, max, or shared
+ * @layer_index: layer index
+ *
+ * This function returns requested profile creation layer.
+ */
+static u8
+ice_sched_get_rl_prof_layer(struct ice_port_info *pi, enum ice_rl_type rl_type,
+			    u8 layer_index)
+{
+	struct ice_hw *hw = pi->hw;
+
+	if (layer_index >= hw->num_tx_sched_layers)
+		return ICE_SCHED_INVAL_LAYER_NUM;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		if (hw->layer_info[layer_index].max_cir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_MAX_BW:
+		if (hw->layer_info[layer_index].max_eir_rl_profiles)
+			return layer_index;
+		break;
+	case ICE_SHARED_BW:
+		/* if current layer doesn't support SRL profile creation
+		 * then try a layer up or down.
+		 */
+		if (hw->layer_info[layer_index].max_srl_profiles)
+			return layer_index;
+		else if (layer_index < hw->num_tx_sched_layers - 1 &&
+			 hw->layer_info[layer_index + 1].max_srl_profiles)
+			return layer_index + 1;
+		else if (layer_index > 0 &&
+			 hw->layer_info[layer_index - 1].max_srl_profiles)
+			return layer_index - 1;
+		break;
+	default:
+		break;
+	}
+	return ICE_SCHED_INVAL_LAYER_NUM;
+}
+
+/**
+ * ice_sched_get_srl_node - get shared rate limit node
+ * @node: tree node
+ * @srl_layer: shared rate limit layer
+ *
+ * This function returns SRL node to be used for shared rate limit purpose.
+ * The caller needs to hold scheduler lock.
+ */
+static struct ice_sched_node *
+ice_sched_get_srl_node(struct ice_sched_node *node, u8 srl_layer)
+{
+	if (srl_layer > node->tx_sched_layer)
+		return node->children[0];
+	else if (srl_layer < node->tx_sched_layer)
+		/* Node can't be created without a parent. It will always
+		 * have a valid parent except root node.
+		 */
+		return node->parent;
+	else
+		return node;
+}
+
+/**
+ * ice_sched_rm_rl_profile - remove RL profile ID
+ * @pi: port information structure
+ * @layer_num: layer number where profiles are saved
+ * @profile_type: profile type like EIR, CIR, or SRL
+ * @profile_id: profile ID to remove
+ *
+ * This function removes rate limit profile from layer 'layer_num' of type
+ * 'profile_type' and profile ID as 'profile_id'. The caller needs to hold
+ * scheduler lock.
+ */
+static int
+ice_sched_rm_rl_profile(struct ice_port_info *pi, u8 layer_num, u8 profile_type,
+			u16 profile_id)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_elem;
+	int status = 0;
+
+	if (layer_num >= ICE_AQC_TOPO_MAX_LEVEL_NUM)
+		return -EINVAL;
+	/* Check the existing list for RL profile */
+	list_for_each_entry(rl_prof_elem, &pi->rl_prof_list[layer_num],
+			    list_entry)
+		if ((rl_prof_elem->profile.flags & ICE_AQC_RL_PROFILE_TYPE_M) ==
+		    profile_type &&
+		    le16_to_cpu(rl_prof_elem->profile.profile_id) ==
+		    profile_id) {
+			if (rl_prof_elem->prof_id_ref)
+				rl_prof_elem->prof_id_ref--;
+
+			/* Remove old profile ID from database */
+			status = ice_sched_del_rl_profile(pi->hw, rl_prof_elem);
+			if (status && status != -EBUSY)
+				ice_debug(pi->hw, ICE_DBG_SCHED, "Remove rl profile failed\n");
+			break;
+		}
+	if (status == -EBUSY)
+		status = 0;
+	return status;
+}
+
+/**
+ * ice_sched_set_node_bw_dflt - set node's bandwidth limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ * @layer_num: layer number where RL profiles are saved
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static int
+ice_sched_set_node_bw_dflt(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   enum ice_rl_type rl_type, u8 layer_num)
+{
+	struct ice_hw *hw;
+	u8 profile_type;
+	u16 rl_prof_id;
+	u16 old_id;
+	int status;
+
+	hw = pi->hw;
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_CIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_MAX_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_EIR;
+		rl_prof_id = ICE_SCHED_DFLT_RL_PROF_ID;
+		break;
+	case ICE_SHARED_BW:
+		profile_type = ICE_AQC_RL_PROFILE_TYPE_SRL;
+		/* No SRL is configured for default case */
+		rl_prof_id = ICE_SCHED_NO_SHARED_RL_PROF_ID;
+		break;
+	default:
+		return -EINVAL;
+	}
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* Remove stale RL profile ID */
+	if (old_id == ICE_SCHED_DFLT_RL_PROF_ID ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num, profile_type, old_id);
+}
+
+/**
+ * ice_sched_set_eir_srl_excl - set EIR/SRL exclusiveness
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @layer_num: layer number where rate limit profiles are saved
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth value
+ *
+ * This function prepares node element's bandwidth to SRL or EIR exclusively.
+ * EIR BW and Shared BW profiles are mutually exclusive and hence only one of
+ * them may be set for any given element. This function needs to be called
+ * with the scheduler lock held.
+ */
+static int
+ice_sched_set_eir_srl_excl(struct ice_port_info *pi,
+			   struct ice_sched_node *node,
+			   u8 layer_num, enum ice_rl_type rl_type, u32 bw)
+{
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node passed in this case, it may be different node */
+		if (bw == ICE_SCHED_DFLT_BW)
+			/* SRL being removed, ice_sched_cfg_node_bw_lmt()
+			 * enables EIR to default. EIR is not set in this
+			 * case, so no additional action is required.
+			 */
+			return 0;
+
+		/* SRL being configured, set EIR to default here.
+		 * ice_sched_cfg_node_bw_lmt() disables EIR when it
+		 * configures SRL
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node, ICE_MAX_BW,
+						  layer_num);
+	} else if (rl_type == ICE_MAX_BW &&
+		   node->info.data.valid_sections & ICE_AQC_ELEM_VALID_SHARED) {
+		/* Remove Shared profile. Set default shared BW call
+		 * removes shared profile for a node.
+		 */
+		return ice_sched_set_node_bw_dflt(pi, node,
+						  ICE_SHARED_BW,
+						  layer_num);
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_node_bw - set node's bandwidth
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ * @layer_num: layer number
+ *
+ * This function adds new profile corresponding to requested BW, configures
+ * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile
+ * ID from local database. The caller needs to hold scheduler lock.
+ */
+int
+ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node,
+		      enum ice_rl_type rl_type, u32 bw, u8 layer_num)
+{
+	struct ice_aqc_rl_profile_info *rl_prof_info;
+	struct ice_hw *hw = pi->hw;
+	u16 old_id, rl_prof_id;
+	int status = -EINVAL;
+
+	rl_prof_info = ice_sched_add_rl_profile(pi, rl_type, bw, layer_num);
+	if (!rl_prof_info)
+		return status;
+
+	rl_prof_id = le16_to_cpu(rl_prof_info->profile.profile_id);
+
+	/* Save existing RL prof ID for later clean up */
+	old_id = ice_sched_get_node_rl_prof_id(node, rl_type);
+	/* Configure BW scheduling parameters */
+	status = ice_sched_cfg_node_bw_lmt(hw, node, rl_type, rl_prof_id);
+	if (status)
+		return status;
+
+	/* New changes has been applied */
+	/* Increment the profile ID reference count */
+	rl_prof_info->prof_id_ref++;
+
+	/* Check for old ID removal */
+	if ((old_id == ICE_SCHED_DFLT_RL_PROF_ID && rl_type != ICE_SHARED_BW) ||
+	    old_id == ICE_SCHED_INVAL_PROF_ID || old_id == rl_prof_id)
+		return 0;
+
+	return ice_sched_rm_rl_profile(pi, layer_num,
+				       rl_prof_info->profile.flags &
+				       ICE_AQC_RL_PROFILE_TYPE_M, old_id);
+}
+
+/**
+ * ice_sched_set_node_priority - set node's priority
+ * @pi: port information structure
+ * @node: tree node
+ * @priority: number 0-7 representing priority among siblings
+ *
+ * This function sets priority of a node among it's siblings.
+ */
+int
+ice_sched_set_node_priority(struct ice_port_info *pi, struct ice_sched_node *node,
+			    u16 priority)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+
+	buf = node->info;
+	data = &buf.data;
+
+	data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC;
+	data->generic |= FIELD_PREP(ICE_AQC_ELEM_GENERIC_PRIO_M, priority);
+
+	return ice_sched_update_elem(pi->hw, node, &buf);
+}
+
+/**
+ * ice_sched_set_node_weight - set node's weight
+ * @pi: port information structure
+ * @node: tree node
+ * @weight: number 1-200 representing weight for WFQ
+ *
+ * This function sets weight of the node for WFQ algorithm.
+ */
+int
+ice_sched_set_node_weight(struct ice_port_info *pi, struct ice_sched_node *node, u16 weight)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+
+	buf = node->info;
+	data = &buf.data;
+
+	data->valid_sections = ICE_AQC_ELEM_VALID_CIR | ICE_AQC_ELEM_VALID_EIR |
+			       ICE_AQC_ELEM_VALID_GENERIC;
+	data->cir_bw.bw_alloc = cpu_to_le16(weight);
+	data->eir_bw.bw_alloc = cpu_to_le16(weight);
+
+	data->generic |= FIELD_PREP(ICE_AQC_ELEM_GENERIC_SP_M, 0x0);
+
+	return ice_sched_update_elem(pi->hw, node, &buf);
+}
+
+/**
+ * ice_sched_set_node_bw_lmt - set node's BW limit
+ * @pi: port information structure
+ * @node: tree node
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * It updates node's BW limit parameters like BW RL profile ID of type CIR,
+ * EIR, or SRL. The caller needs to hold scheduler lock.
+ */
+int
+ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u32 bw)
+{
+	struct ice_sched_node *cfg_node = node;
+	int status;
+
+	struct ice_hw *hw;
+	u8 layer_num;
+
+	if (!pi)
+		return -EINVAL;
+	hw = pi->hw;
+	/* Remove unused RL profile IDs from HW and SW DB */
+	ice_sched_rm_unused_rl_prof(pi);
+	layer_num = ice_sched_get_rl_prof_layer(pi, rl_type,
+						node->tx_sched_layer);
+	if (layer_num >= hw->num_tx_sched_layers)
+		return -EINVAL;
+
+	if (rl_type == ICE_SHARED_BW) {
+		/* SRL node may be different */
+		cfg_node = ice_sched_get_srl_node(node, layer_num);
+		if (!cfg_node)
+			return -EIO;
+	}
+	/* EIR BW and Shared BW profiles are mutually exclusive and
+	 * hence only one of them may be set for any given element
+	 */
+	status = ice_sched_set_eir_srl_excl(pi, cfg_node, layer_num, rl_type,
+					    bw);
+	if (status)
+		return status;
+	if (bw == ICE_SCHED_DFLT_BW)
+		return ice_sched_set_node_bw_dflt(pi, cfg_node, rl_type,
+						  layer_num);
+	return ice_sched_set_node_bw(pi, cfg_node, rl_type, bw, layer_num);
+}
+
+/**
+ * ice_sched_set_node_bw_dflt_lmt - set node's BW limit to default
+ * @pi: port information structure
+ * @node: pointer to node structure
+ * @rl_type: rate limit type min, max, or shared
+ *
+ * This function configures node element's BW rate limit profile ID of
+ * type CIR, EIR, or SRL to default. This function needs to be called
+ * with the scheduler lock held.
+ */
+static int
+ice_sched_set_node_bw_dflt_lmt(struct ice_port_info *pi,
+			       struct ice_sched_node *node,
+			       enum ice_rl_type rl_type)
+{
+	return ice_sched_set_node_bw_lmt(pi, node, rl_type,
+					 ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_validate_srl_node - Check node for SRL applicability
+ * @node: sched node to configure
+ * @sel_layer: selected SRL layer
+ *
+ * This function checks if the SRL can be applied to a selected layer node on
+ * behalf of the requested node (first argument). This function needs to be
+ * called with scheduler lock held.
+ */
+static int
+ice_sched_validate_srl_node(struct ice_sched_node *node, u8 sel_layer)
+{
+	/* SRL profiles are not available on all layers. Check if the
+	 * SRL profile can be applied to a node above or below the
+	 * requested node. SRL configuration is possible only if the
+	 * selected layer's node has single child.
+	 */
+	if (sel_layer == node->tx_sched_layer ||
+	    ((sel_layer == node->tx_sched_layer + 1) &&
+	    node->num_children == 1) ||
+	    ((sel_layer == node->tx_sched_layer - 1) &&
+	    (node->parent && node->parent->num_children == 1)))
+		return 0;
+
+	return -EIO;
+}
+
+/**
+ * ice_sched_save_q_bw - save queue node's BW information
+ * @q_ctx: queue context structure
+ * @rl_type: rate limit type min, max, or shared
+ * @bw: bandwidth in Kbps - Kilo bits per sec
+ *
+ * Save BW information of queue type node for post replay use.
+ */
+static int
+ice_sched_save_q_bw(struct ice_q_ctx *q_ctx, enum ice_rl_type rl_type, u32 bw)
+{
+	switch (rl_type) {
+	case ICE_MIN_BW:
+		ice_set_clear_cir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_MAX_BW:
+		ice_set_clear_eir_bw(&q_ctx->bw_t_info, bw);
+		break;
+	case ICE_SHARED_BW:
+		ice_set_clear_shared_bw(&q_ctx->bw_t_info, bw);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * ice_sched_set_q_bw_lmt - sets queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of queue scheduling node.
+ */
+static int
+ice_sched_set_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		       u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	struct ice_sched_node *node;
+	struct ice_q_ctx *q_ctx;
+	int status = -EINVAL;
+
+	if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+		return -EINVAL;
+	mutex_lock(&pi->sched_lock);
+	q_ctx = ice_get_lan_q_ctx(pi->hw, vsi_handle, tc, q_handle);
+	if (!q_ctx)
+		goto exit_q_bw_lmt;
+	node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!node) {
+		ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong q_teid\n");
+		goto exit_q_bw_lmt;
+	}
+
+	/* Return error if it is not a leaf node */
+	if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF)
+		goto exit_q_bw_lmt;
+
+	/* SRL bandwidth layer selection */
+	if (rl_type == ICE_SHARED_BW) {
+		u8 sel_layer; /* selected layer */
+
+		sel_layer = ice_sched_get_rl_prof_layer(pi, rl_type,
+							node->tx_sched_layer);
+		if (sel_layer >= pi->hw->num_tx_sched_layers) {
+			status = -EINVAL;
+			goto exit_q_bw_lmt;
+		}
+		status = ice_sched_validate_srl_node(node, sel_layer);
+		if (status)
+			goto exit_q_bw_lmt;
+	}
+
+	if (bw == ICE_SCHED_DFLT_BW)
+		status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+	else
+		status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+	if (!status)
+		status = ice_sched_save_q_bw(q_ctx, rl_type, bw);
+
+exit_q_bw_lmt:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_q_bw_lmt - configure queue BW limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of queue scheduling node.
+ */
+int
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      bw);
+}
+
+/**
+ * ice_cfg_q_bw_dflt_lmt - configure queue BW default limit
+ * @pi: port information structure
+ * @vsi_handle: sw VSI handle
+ * @tc: traffic class
+ * @q_handle: software queue handle
+ * @rl_type: min, max, or shared
+ *
+ * This function configures BW default limit of queue scheduling node.
+ */
+int
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type)
+{
+	return ice_sched_set_q_bw_lmt(pi, vsi_handle, tc, q_handle, rl_type,
+				      ICE_SCHED_DFLT_BW);
+}
+
+/**
+ * ice_sched_get_node_by_id_type - get node from ID type
+ * @pi: port information structure
+ * @id: identifier
+ * @agg_type: type of aggregator
+ * @tc: traffic class
+ *
+ * This function returns node identified by ID of type aggregator, and
+ * based on traffic class (TC). This function needs to be called with
+ * the scheduler lock held.
+ */
+static struct ice_sched_node *
+ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id,
+			      enum ice_agg_type agg_type, u8 tc)
+{
+	struct ice_sched_node *node = NULL;
+
+	switch (agg_type) {
+	case ICE_AGG_TYPE_VSI: {
+		struct ice_vsi_ctx *vsi_ctx;
+		u16 vsi_handle = (u16)id;
+
+		if (!ice_is_vsi_valid(pi->hw, vsi_handle))
+			break;
+		/* Get sched_vsi_info */
+		vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle);
+		if (!vsi_ctx)
+			break;
+		node = vsi_ctx->sched.vsi_node[tc];
+		break;
+	}
+
+	case ICE_AGG_TYPE_AGG: {
+		struct ice_sched_node *tc_node;
+
+		tc_node = ice_sched_get_tc_node(pi, tc);
+		if (tc_node)
+			node = ice_sched_get_agg_node(pi, tc_node, id);
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	return node;
+}
+
+/**
+ * ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC
+ * @pi: port information structure
+ * @id: ID (software VSI handle or AGG ID)
+ * @agg_type: aggregator type (VSI or AGG type node)
+ * @tc: traffic class
+ * @rl_type: min or max
+ * @bw: bandwidth in Kbps
+ *
+ * This function sets BW limit of VSI or Aggregator scheduling node
+ * based on TC information from passed in argument BW.
+ */
+static int
+ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id,
+				 enum ice_agg_type agg_type, u8 tc,
+				 enum ice_rl_type rl_type, u32 bw)
+{
+	struct ice_sched_node *node;
+	int status = -EINVAL;
+
+	if (!pi)
+		return status;
+
+	if (rl_type == ICE_UNKNOWN_BW)
+		return status;
+
+	mutex_lock(&pi->sched_lock);
+	node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc);
+	if (!node) {
+		ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n");
+		goto exit_set_node_bw_lmt_per_tc;
+	}
+	if (bw == ICE_SCHED_DFLT_BW)
+		status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type);
+	else
+		status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw);
+
+exit_set_node_bw_lmt_per_tc:
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: traffic class
+ * @rl_type: min or max
+ * @bw: bandwidth in Kbps
+ *
+ * This function configures BW limit of VSI scheduling node based on TC
+ * information.
+ */
+int
+ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			  enum ice_rl_type rl_type, u32 bw)
+{
+	int status;
+
+	status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+						  ICE_AGG_TYPE_VSI,
+						  tc, rl_type, bw);
+	if (!status) {
+		mutex_lock(&pi->sched_lock);
+		status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, bw);
+		mutex_unlock(&pi->sched_lock);
+	}
+	return status;
+}
+
+/**
+ * ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC
+ * @pi: port information structure
+ * @vsi_handle: software VSI handle
+ * @tc: traffic class
+ * @rl_type: min or max
+ *
+ * This function configures default BW limit of VSI scheduling node based on TC
+ * information.
+ */
+int
+ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			       enum ice_rl_type rl_type)
+{
+	int status;
+
+	status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle,
+						  ICE_AGG_TYPE_VSI,
+						  tc, rl_type,
+						  ICE_SCHED_DFLT_BW);
+	if (!status) {
+		mutex_lock(&pi->sched_lock);
+		status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type,
+					       ICE_SCHED_DFLT_BW);
+		mutex_unlock(&pi->sched_lock);
+	}
+	return status;
+}
+
+/**
+ * ice_cfg_rl_burst_size - Set burst size value
+ * @hw: pointer to the HW struct
+ * @bytes: burst size in bytes
+ *
+ * This function configures/set the burst size to requested new value. The new
+ * burst size value is used for future rate limit calls. It doesn't change the
+ * existing or previously created RL profiles.
+ */
+int ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes)
+{
+	u16 burst_size_to_prog;
+
+	if (bytes < ICE_MIN_BURST_SIZE_ALLOWED ||
+	    bytes > ICE_MAX_BURST_SIZE_ALLOWED)
+		return -EINVAL;
+	if (ice_round_to_num(bytes, 64) <=
+	    ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY) {
+		/* 64 byte granularity case */
+		/* Disable MSB granularity bit */
+		burst_size_to_prog = ICE_64_BYTE_GRANULARITY;
+		/* round number to nearest 64 byte granularity */
+		bytes = ice_round_to_num(bytes, 64);
+		/* The value is in 64 byte chunks */
+		burst_size_to_prog |= (u16)(bytes / 64);
+	} else {
+		/* k bytes granularity case */
+		/* Enable MSB granularity bit */
+		burst_size_to_prog = ICE_KBYTE_GRANULARITY;
+		/* round number to nearest 1024 granularity */
+		bytes = ice_round_to_num(bytes, 1024);
+		/* check rounding doesn't go beyond allowed */
+		if (bytes > ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY)
+			bytes = ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY;
+		/* The value is in k bytes */
+		burst_size_to_prog |= (u16)(bytes / 1024);
+	}
+	hw->max_burst_size = burst_size_to_prog;
+	return 0;
+}
+
+/**
+ * ice_sched_replay_node_prio - re-configure node priority
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @priority: priority value
+ *
+ * This function configures node element's priority value. It
+ * needs to be called with scheduler lock held.
+ */
+static int
+ice_sched_replay_node_prio(struct ice_hw *hw, struct ice_sched_node *node,
+			   u8 priority)
+{
+	struct ice_aqc_txsched_elem_data buf;
+	struct ice_aqc_txsched_elem *data;
+	int status;
+
+	buf = node->info;
+	data = &buf.data;
+	data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC;
+	data->generic = priority;
+
+	/* Configure element */
+	status = ice_sched_update_elem(hw, node, &buf);
+	return status;
+}
+
+/**
+ * ice_sched_replay_node_bw - replay node(s) BW
+ * @hw: pointer to the HW struct
+ * @node: sched node to configure
+ * @bw_t_info: BW type information
+ *
+ * This function restores node's BW from bw_t_info. The caller needs
+ * to hold the scheduler lock.
+ */
+static int
+ice_sched_replay_node_bw(struct ice_hw *hw, struct ice_sched_node *node,
+			 struct ice_bw_type_info *bw_t_info)
+{
+	struct ice_port_info *pi = hw->port_info;
+	int status = -EINVAL;
+	u16 bw_alloc;
+
+	if (!node)
+		return status;
+	if (bitmap_empty(bw_t_info->bw_t_bitmap, ICE_BW_TYPE_CNT))
+		return 0;
+	if (test_bit(ICE_BW_TYPE_PRIO, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_replay_node_prio(hw, node,
+						    bw_t_info->generic);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW,
+						   bw_t_info->cir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_CIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->cir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MIN_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap)) {
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW,
+						   bw_t_info->eir_bw.bw);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_EIR_WT, bw_t_info->bw_t_bitmap)) {
+		bw_alloc = bw_t_info->eir_bw.bw_alloc;
+		status = ice_sched_cfg_node_bw_alloc(hw, node, ICE_MAX_BW,
+						     bw_alloc);
+		if (status)
+			return status;
+	}
+	if (test_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap))
+		status = ice_sched_set_node_bw_lmt(pi, node, ICE_SHARED_BW,
+						   bw_t_info->shared_bw);
+	return status;
+}
+
+/**
+ * ice_sched_get_ena_tc_bitmap - get enabled TC bitmap
+ * @pi: port info struct
+ * @tc_bitmap: 8 bits TC bitmap to check
+ * @ena_tc_bitmap: 8 bits enabled TC bitmap to return
+ *
+ * This function returns enabled TC bitmap in variable ena_tc_bitmap. Some TCs
+ * may be missing, it returns enabled TCs. This function needs to be called with
+ * scheduler lock held.
+ */
+static void
+ice_sched_get_ena_tc_bitmap(struct ice_port_info *pi,
+			    unsigned long *tc_bitmap,
+			    unsigned long *ena_tc_bitmap)
+{
+	u8 tc;
+
+	/* Some TC(s) may be missing after reset, adjust for replay */
+	ice_for_each_traffic_class(tc)
+		if (ice_is_tc_ena(*tc_bitmap, tc) &&
+		    (ice_sched_get_tc_node(pi, tc)))
+			set_bit(tc, ena_tc_bitmap);
+}
+
+/**
+ * ice_sched_replay_agg - recreate aggregator node(s)
+ * @hw: pointer to the HW struct
+ *
+ * This function recreate aggregator type nodes which are not replayed earlier.
+ * It also replay aggregator BW information. These aggregator nodes are not
+ * associated with VSI type node yet.
+ */
+void ice_sched_replay_agg(struct ice_hw *hw)
+{
+	struct ice_port_info *pi = hw->port_info;
+	struct ice_sched_agg_info *agg_info;
+
+	mutex_lock(&pi->sched_lock);
+	list_for_each_entry(agg_info, &hw->agg_list, list_entry)
+		/* replay aggregator (re-create aggregator node) */
+		if (!bitmap_equal(agg_info->tc_bitmap, agg_info->replay_tc_bitmap,
+				  ICE_MAX_TRAFFIC_CLASS)) {
+			DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+			int status;
+
+			bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+			ice_sched_get_ena_tc_bitmap(pi,
+						    agg_info->replay_tc_bitmap,
+						    replay_bitmap);
+			status = ice_sched_cfg_agg(hw->port_info,
+						   agg_info->agg_id,
+						   ICE_AGG_TYPE_AGG,
+						   replay_bitmap);
+			if (status) {
+				dev_info(ice_hw_to_dev(hw),
+					 "Replay agg id[%d] failed\n",
+					 agg_info->agg_id);
+				/* Move on to next one */
+				continue;
+			}
+		}
+	mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_agg_vsi_preinit - Agg/VSI replay pre initialization
+ * @hw: pointer to the HW struct
+ *
+ * This function initialize aggregator(s) TC bitmap to zero. A required
+ * preinit step for replaying aggregators.
+ */
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw)
+{
+	struct ice_port_info *pi = hw->port_info;
+	struct ice_sched_agg_info *agg_info;
+
+	mutex_lock(&pi->sched_lock);
+	list_for_each_entry(agg_info, &hw->agg_list, list_entry) {
+		struct ice_sched_agg_vsi_info *agg_vsi_info;
+
+		agg_info->tc_bitmap[0] = 0;
+		list_for_each_entry(agg_vsi_info, &agg_info->agg_vsi_list,
+				    list_entry)
+			agg_vsi_info->tc_bitmap[0] = 0;
+	}
+	mutex_unlock(&pi->sched_lock);
+}
+
+/**
+ * ice_sched_replay_vsi_agg - replay aggregator & VSI to aggregator node(s)
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays aggregator node, VSI to aggregator type nodes, and
+ * their node bandwidth information. This function needs to be called with
+ * scheduler lock held.
+ */
+static int ice_sched_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+	DECLARE_BITMAP(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	struct ice_sched_agg_vsi_info *agg_vsi_info;
+	struct ice_port_info *pi = hw->port_info;
+	struct ice_sched_agg_info *agg_info;
+	int status;
+
+	bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+	agg_info = ice_get_vsi_agg_info(hw, vsi_handle);
+	if (!agg_info)
+		return 0; /* Not present in list - default Agg case */
+	agg_vsi_info = ice_get_agg_vsi_info(agg_info, vsi_handle);
+	if (!agg_vsi_info)
+		return 0; /* Not present in list - default Agg case */
+	ice_sched_get_ena_tc_bitmap(pi, agg_info->replay_tc_bitmap,
+				    replay_bitmap);
+	/* Replay aggregator node associated to vsi_handle */
+	status = ice_sched_cfg_agg(hw->port_info, agg_info->agg_id,
+				   ICE_AGG_TYPE_AGG, replay_bitmap);
+	if (status)
+		return status;
+
+	bitmap_zero(replay_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	ice_sched_get_ena_tc_bitmap(pi, agg_vsi_info->replay_tc_bitmap,
+				    replay_bitmap);
+	/* Move this VSI (vsi_handle) to above aggregator */
+	return ice_sched_assoc_vsi_to_agg(pi, agg_info->agg_id, vsi_handle,
+					  replay_bitmap);
+}
+
+/**
+ * ice_replay_vsi_agg - replay VSI to aggregator node
+ * @hw: pointer to the HW struct
+ * @vsi_handle: software VSI handle
+ *
+ * This function replays association of VSI to aggregator type nodes, and
+ * node bandwidth information.
+ */
+int ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_port_info *pi = hw->port_info;
+	int status;
+
+	mutex_lock(&pi->sched_lock);
+	status = ice_sched_replay_vsi_agg(hw, vsi_handle);
+	mutex_unlock(&pi->sched_lock);
+	return status;
+}
+
+/**
+ * ice_sched_replay_q_bw - replay queue type node BW
+ * @pi: port information structure
+ * @q_ctx: queue context structure
+ *
+ * This function replays queue type node bandwidth. This function needs to be
+ * called with scheduler lock held.
+ */
+int ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx)
+{
+	struct ice_sched_node *q_node;
+
+	/* Following also checks the presence of node in tree */
+	q_node = ice_sched_find_node_by_teid(pi->root, q_ctx->q_teid);
+	if (!q_node)
+		return -EINVAL;
+	return ice_sched_replay_node_bw(pi->hw, q_node, &q_ctx->bw_t_info);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h
new file mode 100644
index 0000000000..0055d9330c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sched.h
@@ -0,0 +1,170 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SCHED_H_
+#define _ICE_SCHED_H_
+
+#include "ice_common.h"
+
+#define SCHED_NODE_NAME_MAX_LEN 32
+
+#define ICE_QGRP_LAYER_OFFSET	2
+#define ICE_VSI_LAYER_OFFSET	4
+#define ICE_AGG_LAYER_OFFSET	6
+#define ICE_SCHED_INVAL_LAYER_NUM	0xFF
+/* Burst size is a 12 bits register that is configured while creating the RL
+ * profile(s). MSB is a granularity bit and tells the granularity type
+ * 0 - LSB bits are in 64 bytes granularity
+ * 1 - LSB bits are in 1K bytes granularity
+ */
+#define ICE_64_BYTE_GRANULARITY			0
+#define ICE_KBYTE_GRANULARITY			BIT(11)
+#define ICE_MIN_BURST_SIZE_ALLOWED		64 /* In Bytes */
+#define ICE_MAX_BURST_SIZE_ALLOWED \
+	((BIT(11) - 1) * 1024) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_64_BYTE_GRANULARITY \
+	((BIT(11) - 1) * 64) /* In Bytes */
+#define ICE_MAX_BURST_SIZE_KBYTE_GRANULARITY	ICE_MAX_BURST_SIZE_ALLOWED
+
+#define ICE_RL_PROF_ACCURACY_BYTES 128
+#define ICE_RL_PROF_MULTIPLIER 10000
+#define ICE_RL_PROF_TS_MULTIPLIER 32
+#define ICE_RL_PROF_FRACTION 512
+
+#define ICE_PSM_CLK_367MHZ_IN_HZ 367647059
+#define ICE_PSM_CLK_416MHZ_IN_HZ 416666667
+#define ICE_PSM_CLK_446MHZ_IN_HZ 446428571
+#define ICE_PSM_CLK_390MHZ_IN_HZ 390625000
+
+/* BW rate limit profile parameters list entry along
+ * with bandwidth maintained per layer in port info
+ */
+struct ice_aqc_rl_profile_info {
+	struct ice_aqc_rl_profile_elem profile;
+	struct list_head list_entry;
+	u32 bw;			/* requested */
+	u16 prof_id_ref;	/* profile ID to node association ref count */
+};
+
+struct ice_sched_agg_vsi_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u16 vsi_handle;
+	/* save aggregator VSI TC bitmap */
+	DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+};
+
+struct ice_sched_agg_info {
+	struct list_head agg_vsi_list;
+	struct list_head list_entry;
+	DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+	u32 agg_id;
+	enum ice_agg_type agg_type;
+	/* bw_t_info saves aggregator BW information */
+	struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
+	/* save aggregator TC bitmap */
+	DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS);
+};
+
+/* FW AQ command calls */
+int
+ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req,
+			 struct ice_aqc_txsched_elem_data *buf, u16 buf_size,
+			 u16 *elems_ret, struct ice_sq_cd *cd);
+
+int
+ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node,
+			  enum ice_rl_type rl_type, u32 bw);
+
+int
+ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node,
+		      enum ice_rl_type rl_type, u32 bw, u8 layer_num);
+
+int
+ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		    struct ice_sched_node *parent, u8 layer, u16 num_nodes,
+		    u16 *num_nodes_added, u32 *first_node_teid,
+		    struct ice_sched_node **prealloc_node);
+
+int
+ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent,
+		     u16 num_items, u32 *list);
+
+int ice_sched_set_node_priority(struct ice_port_info *pi, struct ice_sched_node *node,
+				u16 priority);
+int ice_sched_set_node_weight(struct ice_port_info *pi, struct ice_sched_node *node, u16 weight);
+
+int ice_sched_init_port(struct ice_port_info *pi);
+int ice_sched_query_res_alloc(struct ice_hw *hw);
+void ice_sched_get_psm_clk_freq(struct ice_hw *hw);
+
+void ice_sched_clear_port(struct ice_port_info *pi);
+void ice_sched_cleanup_all(struct ice_hw *hw);
+void ice_sched_clear_agg(struct ice_hw *hw);
+
+struct ice_sched_node *
+ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid);
+int
+ice_sched_add_node(struct ice_port_info *pi, u8 layer,
+		   struct ice_aqc_txsched_elem_data *info,
+		   struct ice_sched_node *prealloc_node);
+void
+ice_sched_update_parent(struct ice_sched_node *new_parent,
+			struct ice_sched_node *node);
+void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node);
+struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc);
+struct ice_sched_node *
+ice_sched_get_free_qparent(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			   u8 owner);
+int
+ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
+		  u8 owner, bool enable);
+int ice_rm_vsi_lan_cfg(struct ice_port_info *pi, u16 vsi_handle);
+int ice_rm_vsi_rdma_cfg(struct ice_port_info *pi, u16 vsi_handle);
+
+/* Tx scheduler rate limiter functions */
+int
+ice_cfg_agg(struct ice_port_info *pi, u32 agg_id,
+	    enum ice_agg_type agg_type, u8 tc_bitmap);
+int
+ice_move_vsi_to_agg(struct ice_port_info *pi, u32 agg_id, u16 vsi_handle,
+		    u8 tc_bitmap);
+int
+ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		 u16 q_handle, enum ice_rl_type rl_type, u32 bw);
+int
+ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+		      u16 q_handle, enum ice_rl_type rl_type);
+int
+ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			  enum ice_rl_type rl_type, u32 bw);
+int
+ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc,
+			       enum ice_rl_type rl_type);
+int ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes);
+int
+ice_sched_suspend_resume_elems(struct ice_hw *hw, u8 num_nodes, u32 *node_teids,
+			       bool suspend);
+struct ice_sched_node *
+ice_sched_get_agg_node(struct ice_port_info *pi, struct ice_sched_node *tc_node,
+		       u32 agg_id);
+u8 ice_sched_get_agg_layer(struct ice_hw *hw);
+u8 ice_sched_get_vsi_layer(struct ice_hw *hw);
+struct ice_sched_node *
+ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node,
+			      u16 *num_nodes);
+int
+ice_sched_add_nodes_to_layer(struct ice_port_info *pi,
+			     struct ice_sched_node *tc_node,
+			     struct ice_sched_node *parent, u8 layer,
+			     u16 num_nodes, u32 *first_node_teid,
+			     u16 *num_nodes_added);
+void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw);
+void ice_sched_replay_agg(struct ice_hw *hw);
+int
+ice_aq_move_sched_elems(struct ice_hw *hw, u16 grps_req,
+			struct ice_aqc_move_elem *buf, u16 buf_size,
+			u16 *grps_movd, struct ice_sq_cd *cd);
+int ice_replay_vsi_agg(struct ice_hw *hw, u16 vsi_handle);
+int ice_sched_replay_q_bw(struct ice_port_info *pi, struct ice_q_ctx *q_ctx);
+#endif /* _ICE_SCHED_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
new file mode 100644
index 0000000000..31314e7540
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -0,0 +1,1739 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_vf_lib_private.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_dcb_lib.h"
+#include "ice_flow.h"
+#include "ice_eswitch.h"
+#include "ice_virtchnl_allowlist.h"
+#include "ice_flex_pipe.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_vlan.h"
+
+/**
+ * ice_free_vf_entries - Free all VF entries from the hash table
+ * @pf: pointer to the PF structure
+ *
+ * Iterate over the VF hash table, removing and releasing all VF entries.
+ * Called during VF teardown or as cleanup during failed VF initialization.
+ */
+static void ice_free_vf_entries(struct ice_pf *pf)
+{
+	struct ice_vfs *vfs = &pf->vfs;
+	struct hlist_node *tmp;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	/* Remove all VFs from the hash table and release their main
+	 * reference. Once all references to the VF are dropped, ice_put_vf()
+	 * will call ice_release_vf which will remove the VF memory.
+	 */
+	lockdep_assert_held(&vfs->table_lock);
+
+	hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) {
+		hash_del_rcu(&vf->entry);
+		ice_put_vf(vf);
+	}
+}
+
+/**
+ * ice_free_vf_res - Free a VF's resources
+ * @vf: pointer to the VF info
+ */
+static void ice_free_vf_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	int i, last_vector_idx;
+
+	/* First, disable VF's configuration API to prevent OS from
+	 * accessing the VF's VSI after it's freed or invalidated.
+	 */
+	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+	ice_vf_fdir_exit(vf);
+	/* free VF control VSI */
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		ice_vf_ctrl_vsi_release(vf);
+
+	/* free VSI and disconnect it from the parent uplink */
+	if (vf->lan_vsi_idx != ICE_NO_VSI) {
+		ice_vf_vsi_release(vf);
+		vf->num_mac = 0;
+	}
+
+	last_vector_idx = vf->first_vector_idx + pf->vfs.num_msix_per - 1;
+
+	/* clear VF MDD event information */
+	memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
+	memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
+
+	/* Disable interrupts so that VF starts in a known state */
+	for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
+		wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
+		ice_flush(&pf->hw);
+	}
+	/* reset some of the state variables keeping track of the resources */
+	clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+}
+
+/**
+ * ice_dis_vf_mappings
+ * @vf: pointer to the VF structure
+ */
+static void ice_dis_vf_mappings(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	int first, last, v;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+	vsi = ice_get_vf_vsi(vf);
+	if (WARN_ON(!vsi))
+		return;
+
+	dev = ice_pf_to_dev(pf);
+	wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
+	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
+
+	first = vf->first_vector_idx;
+	last = first + pf->vfs.num_msix_per - 1;
+	for (v = first; v <= last; v++) {
+		u32 reg;
+
+		reg = (((1 << GLINT_VECT2FUNC_IS_PF_S) &
+			GLINT_VECT2FUNC_IS_PF_M) |
+		       ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+			GLINT_VECT2FUNC_PF_NUM_M));
+		wr32(hw, GLINT_VECT2FUNC(v), reg);
+	}
+
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG)
+		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), 0);
+	else
+		dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
+
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG)
+		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), 0);
+	else
+		dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
+}
+
+/**
+ * ice_sriov_free_msix_res - Reset/free any used MSIX resources
+ * @pf: pointer to the PF structure
+ *
+ * Since no MSIX entries are taken from the pf->irq_tracker then just clear
+ * the pf->sriov_base_vector.
+ *
+ * Returns 0 on success, and -EINVAL on error.
+ */
+static int ice_sriov_free_msix_res(struct ice_pf *pf)
+{
+	if (!pf)
+		return -EINVAL;
+
+	pf->sriov_base_vector = 0;
+
+	return 0;
+}
+
+/**
+ * ice_free_vfs - Free all VFs
+ * @pf: pointer to the PF structure
+ */
+void ice_free_vfs(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vfs *vfs = &pf->vfs;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	if (!ice_has_vfs(pf))
+		return;
+
+	while (test_and_set_bit(ICE_VF_DIS, pf->state))
+		usleep_range(1000, 2000);
+
+	/* Disable IOV before freeing resources. This lets any VF drivers
+	 * running in the host get themselves cleaned up before we yank
+	 * the carpet out from underneath their feet.
+	 */
+	if (!pci_vfs_assigned(pf->pdev))
+		pci_disable_sriov(pf->pdev);
+	else
+		dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
+
+	mutex_lock(&vfs->table_lock);
+
+	ice_eswitch_release(pf);
+
+	ice_for_each_vf(pf, bkt, vf) {
+		mutex_lock(&vf->cfg_lock);
+
+		ice_dis_vf_qs(vf);
+
+		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+			/* disable VF qp mappings and set VF disable state */
+			ice_dis_vf_mappings(vf);
+			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+			ice_free_vf_res(vf);
+		}
+
+		if (!pci_vfs_assigned(pf->pdev)) {
+			u32 reg_idx, bit_idx;
+
+			reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32;
+			bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32;
+			wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+		}
+
+		/* clear malicious info since the VF is getting released */
+		list_del(&vf->mbx_info.list_entry);
+
+		mutex_unlock(&vf->cfg_lock);
+	}
+
+	if (ice_sriov_free_msix_res(pf))
+		dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
+
+	vfs->num_qps_per = 0;
+	ice_free_vf_entries(pf);
+
+	mutex_unlock(&vfs->table_lock);
+
+	clear_bit(ICE_VF_DIS, pf->state);
+	clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+}
+
+/**
+ * ice_vf_vsi_setup - Set up a VF VSI
+ * @vf: VF to setup VSI for
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
+{
+	struct ice_vsi_cfg_params params = {};
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	params.type = ICE_VSI_VF;
+	params.pi = ice_vf_get_port_info(vf);
+	params.vf = vf;
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	vsi = ice_vsi_setup(pf, &params);
+
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
+		ice_vf_invalidate_vsi(vf);
+		return NULL;
+	}
+
+	vf->lan_vsi_idx = vsi->idx;
+	vf->lan_vsi_num = vsi->vsi_num;
+
+	return vsi;
+}
+
+/**
+ * ice_calc_vf_first_vector_idx - Calculate MSIX vector index in the PF space
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF that the first MSIX vector index is being calculated for
+ *
+ * This returns the first MSIX vector index in PF space that is used by this VF.
+ * This index is used when accessing PF relative registers such as
+ * GLINT_VECT2FUNC and GLINT_DYN_CTL.
+ * This will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration will have to increment by
+ * 1 to avoid meddling with the OICR index.
+ */
+static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
+{
+	return pf->sriov_base_vector + vf->vf_id * pf->vfs.num_msix_per;
+}
+
+/**
+ * ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware
+ * @vf: VF to enable MSIX mappings for
+ *
+ * Some of the registers need to be indexed/configured using hardware global
+ * device values and other registers need 0-based values, which represent PF
+ * based values.
+ */
+static void ice_ena_vf_msix_mappings(struct ice_vf *vf)
+{
+	int device_based_first_msix, device_based_last_msix;
+	int pf_based_first_msix, pf_based_last_msix, v;
+	struct ice_pf *pf = vf->pf;
+	int device_based_vf_id;
+	struct ice_hw *hw;
+	u32 reg;
+
+	hw = &pf->hw;
+	pf_based_first_msix = vf->first_vector_idx;
+	pf_based_last_msix = (pf_based_first_msix + pf->vfs.num_msix_per) - 1;
+
+	device_based_first_msix = pf_based_first_msix +
+		pf->hw.func_caps.common_cap.msix_vector_first_id;
+	device_based_last_msix =
+		(device_based_first_msix + pf->vfs.num_msix_per) - 1;
+	device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) &
+		VPINT_ALLOC_FIRST_M) |
+	       ((device_based_last_msix << VPINT_ALLOC_LAST_S) &
+		VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M);
+	wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
+
+	reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S)
+		 & VPINT_ALLOC_PCI_FIRST_M) |
+	       ((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) &
+		VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M);
+	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
+
+	/* map the interrupts to its functions */
+	for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) {
+		reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
+			GLINT_VECT2FUNC_VF_NUM_M) |
+		       ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
+			GLINT_VECT2FUNC_PF_NUM_M));
+		wr32(hw, GLINT_VECT2FUNC(v), reg);
+	}
+
+	/* Map mailbox interrupt to VF MSI-X vector 0 */
+	wr32(hw, VPINT_MBX_CTL(device_based_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_ena_vf_q_mappings - enable Rx/Tx queue mappings for a VF
+ * @vf: VF to enable the mappings for
+ * @max_txq: max Tx queues allowed on the VF's VSI
+ * @max_rxq: max Rx queues allowed on the VF's VSI
+ */
+static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+	struct ice_hw *hw = &vf->pf->hw;
+	u32 reg;
+
+	if (WARN_ON(!vsi))
+		return;
+
+	/* set regardless of mapping mode */
+	wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
+
+	/* VF Tx queues allocation */
+	if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+		/* set the VF PF Tx queue range
+		 * VFNUMQ value should be set to (number of queues - 1). A value
+		 * of 0 means 1 queue and a value of 255 means 256 queues
+		 */
+		reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) &
+			VPLAN_TX_QBASE_VFFIRSTQ_M) |
+		       (((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) &
+			VPLAN_TX_QBASE_VFNUMQ_M));
+		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
+	} else {
+		dev_err(dev, "Scattered mode for VF Tx queues is not yet implemented\n");
+	}
+
+	/* set regardless of mapping mode */
+	wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M);
+
+	/* VF Rx queues allocation */
+	if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) {
+		/* set the VF PF Rx queue range
+		 * VFNUMQ value should be set to (number of queues - 1). A value
+		 * of 0 means 1 queue and a value of 255 means 256 queues
+		 */
+		reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) &
+			VPLAN_RX_QBASE_VFFIRSTQ_M) |
+		       (((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) &
+			VPLAN_RX_QBASE_VFNUMQ_M));
+		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
+	} else {
+		dev_err(dev, "Scattered mode for VF Rx queues is not yet implemented\n");
+	}
+}
+
+/**
+ * ice_ena_vf_mappings - enable VF MSIX and queue mapping
+ * @vf: pointer to the VF structure
+ */
+static void ice_ena_vf_mappings(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+	if (WARN_ON(!vsi))
+		return;
+
+	ice_ena_vf_msix_mappings(vf);
+	ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq);
+}
+
+/**
+ * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
+ * @vf: VF to calculate the register index for
+ * @q_vector: a q_vector associated to the VF
+ */
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf;
+
+	if (!vf || !q_vector)
+		return -EINVAL;
+
+	pf = vf->pf;
+
+	/* always add one to account for the OICR being the first MSIX */
+	return pf->sriov_base_vector + pf->vfs.num_msix_per * vf->vf_id +
+		q_vector->v_idx + 1;
+}
+
+/**
+ * ice_sriov_set_msix_res - Set any used MSIX resources
+ * @pf: pointer to PF structure
+ * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
+ *
+ * This function allows SR-IOV resources to be taken from the end of the PF's
+ * allowed HW MSIX vectors so that the irq_tracker will not be affected. We
+ * just set the pf->sriov_base_vector and return success.
+ *
+ * If there are not enough resources available, return an error. This should
+ * always be caught by ice_set_per_vf_res().
+ *
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors
+ * in the PF's space available for SR-IOV.
+ */
+static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
+{
+	u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
+	int vectors_used = ice_get_max_used_msix_vector(pf);
+	int sriov_base_vector;
+
+	sriov_base_vector = total_vectors - num_msix_needed;
+
+	/* make sure we only grab irq_tracker entries from the list end and
+	 * that we have enough available MSIX vectors
+	 */
+	if (sriov_base_vector < vectors_used)
+		return -EINVAL;
+
+	pf->sriov_base_vector = sriov_base_vector;
+
+	return 0;
+}
+
+/**
+ * ice_set_per_vf_res - check if vectors and queues are available
+ * @pf: pointer to the PF structure
+ * @num_vfs: the number of SR-IOV VFs being configured
+ *
+ * First, determine HW interrupts from common pool. If we allocate fewer VFs, we
+ * get more vectors and can enable more queues per VF. Note that this does not
+ * grab any vectors from the SW pool already allocated. Also note, that all
+ * vector counts include one for each VF's miscellaneous interrupt vector
+ * (i.e. OICR).
+ *
+ * Minimum VFs - 2 vectors, 1 queue pair
+ * Small VFs - 5 vectors, 4 queue pairs
+ * Medium VFs - 17 vectors, 16 queue pairs
+ *
+ * Second, determine number of queue pairs per VF by starting with a pre-defined
+ * maximum each VF supports. If this is not possible, then we adjust based on
+ * queue pairs available on the device.
+ *
+ * Lastly, set queue and MSI-X VF variables tracked by the PF so it can be used
+ * by each VF during VF initialization and reset.
+ */
+static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
+{
+	int vectors_used = ice_get_max_used_msix_vector(pf);
+	u16 num_msix_per_vf, num_txq, num_rxq, avail_qs;
+	int msix_avail_per_vf, msix_avail_for_sriov;
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	if (!num_vfs)
+		return -EINVAL;
+
+	/* determine MSI-X resources per VF */
+	msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors -
+		vectors_used;
+	msix_avail_per_vf = msix_avail_for_sriov / num_vfs;
+	if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) {
+		num_msix_per_vf = ICE_NUM_VF_MSIX_MED;
+	} else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_SMALL) {
+		num_msix_per_vf = ICE_NUM_VF_MSIX_SMALL;
+	} else if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MULTIQ_MIN) {
+		num_msix_per_vf = ICE_NUM_VF_MSIX_MULTIQ_MIN;
+	} else if (msix_avail_per_vf >= ICE_MIN_INTR_PER_VF) {
+		num_msix_per_vf = ICE_MIN_INTR_PER_VF;
+	} else {
+		dev_err(dev, "Only %d MSI-X interrupts available for SR-IOV. Not enough to support minimum of %d MSI-X interrupts per VF for %d VFs\n",
+			msix_avail_for_sriov, ICE_MIN_INTR_PER_VF,
+			num_vfs);
+		return -ENOSPC;
+	}
+
+	num_txq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF,
+			ICE_MAX_RSS_QS_PER_VF);
+	avail_qs = ice_get_avail_txq_count(pf) / num_vfs;
+	if (!avail_qs)
+		num_txq = 0;
+	else if (num_txq > avail_qs)
+		num_txq = rounddown_pow_of_two(avail_qs);
+
+	num_rxq = min_t(u16, num_msix_per_vf - ICE_NONQ_VECS_VF,
+			ICE_MAX_RSS_QS_PER_VF);
+	avail_qs = ice_get_avail_rxq_count(pf) / num_vfs;
+	if (!avail_qs)
+		num_rxq = 0;
+	else if (num_rxq > avail_qs)
+		num_rxq = rounddown_pow_of_two(avail_qs);
+
+	if (num_txq < ICE_MIN_QS_PER_VF || num_rxq < ICE_MIN_QS_PER_VF) {
+		dev_err(dev, "Not enough queues to support minimum of %d queue pairs per VF for %d VFs\n",
+			ICE_MIN_QS_PER_VF, num_vfs);
+		return -ENOSPC;
+	}
+
+	err = ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs);
+	if (err) {
+		dev_err(dev, "Unable to set MSI-X resources for %d VFs, err %d\n",
+			num_vfs, err);
+		return err;
+	}
+
+	/* only allow equal Tx/Rx queue count (i.e. queue pairs) */
+	pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq);
+	pf->vfs.num_msix_per = num_msix_per_vf;
+	dev_info(dev, "Enabling %d VFs with %d vectors and %d queues per VF\n",
+		 num_vfs, pf->vfs.num_msix_per, pf->vfs.num_qps_per);
+
+	return 0;
+}
+
+/**
+ * ice_init_vf_vsi_res - initialize/setup VF VSI resources
+ * @vf: VF to initialize/setup the VSI for
+ *
+ * This function creates a VSI for the VF, adds a VLAN 0 filter, and sets up the
+ * VF VSI's broadcast filter and is only used during initial VF creation.
+ */
+static int ice_init_vf_vsi_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int err;
+
+	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
+
+	vsi = ice_vf_vsi_setup(vf);
+	if (!vsi)
+		return -ENOMEM;
+
+	err = ice_vf_init_host_cfg(vf, vsi);
+	if (err)
+		goto release_vsi;
+
+	return 0;
+
+release_vsi:
+	ice_vf_vsi_release(vf);
+	return err;
+}
+
+/**
+ * ice_start_vfs - start VFs so they are ready to be used by SR-IOV
+ * @pf: PF the VFs are associated with
+ */
+static int ice_start_vfs(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	unsigned int bkt, it_cnt;
+	struct ice_vf *vf;
+	int retval;
+
+	lockdep_assert_held(&pf->vfs.table_lock);
+
+	it_cnt = 0;
+	ice_for_each_vf(pf, bkt, vf) {
+		vf->vf_ops->clear_reset_trigger(vf);
+
+		retval = ice_init_vf_vsi_res(vf);
+		if (retval) {
+			dev_err(ice_pf_to_dev(pf), "Failed to initialize VSI resources for VF %d, error %d\n",
+				vf->vf_id, retval);
+			goto teardown;
+		}
+
+		set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+		ice_ena_vf_mappings(vf);
+		wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+		it_cnt++;
+	}
+
+	ice_flush(hw);
+	return 0;
+
+teardown:
+	ice_for_each_vf(pf, bkt, vf) {
+		if (it_cnt == 0)
+			break;
+
+		ice_dis_vf_mappings(vf);
+		ice_vf_vsi_release(vf);
+		it_cnt--;
+	}
+
+	return retval;
+}
+
+/**
+ * ice_sriov_free_vf - Free VF memory after all references are dropped
+ * @vf: pointer to VF to free
+ *
+ * Called by ice_put_vf through ice_release_vf once the last reference to a VF
+ * structure has been dropped.
+ */
+static void ice_sriov_free_vf(struct ice_vf *vf)
+{
+	mutex_destroy(&vf->cfg_lock);
+
+	kfree_rcu(vf, rcu);
+}
+
+/**
+ * ice_sriov_clear_reset_state - clears VF Reset status register
+ * @vf: the vf to configure
+ */
+static void ice_sriov_clear_reset_state(struct ice_vf *vf)
+{
+	struct ice_hw *hw = &vf->pf->hw;
+
+	/* Clear the reset status register so that VF immediately sees that
+	 * the device is resetting, even if hardware hasn't yet gotten around
+	 * to clearing VFGEN_RSTAT for us.
+	 */
+	wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_INPROGRESS);
+}
+
+/**
+ * ice_sriov_clear_mbx_register - clears SRIOV VF's mailbox registers
+ * @vf: the vf to configure
+ */
+static void ice_sriov_clear_mbx_register(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+
+	wr32(&pf->hw, VF_MBX_ARQLEN(vf->vf_id), 0);
+	wr32(&pf->hw, VF_MBX_ATQLEN(vf->vf_id), 0);
+}
+
+/**
+ * ice_sriov_trigger_reset_register - trigger VF reset for SRIOV VF
+ * @vf: pointer to VF structure
+ * @is_vflr: true if reset occurred due to VFLR
+ *
+ * Trigger and cleanup after a VF reset for a SR-IOV VF.
+ */
+static void ice_sriov_trigger_reset_register(struct ice_vf *vf, bool is_vflr)
+{
+	struct ice_pf *pf = vf->pf;
+	u32 reg, reg_idx, bit_idx;
+	unsigned int vf_abs_id, i;
+	struct device *dev;
+	struct ice_hw *hw;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vf_abs_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	/* In the case of a VFLR, HW has already reset the VF and we just need
+	 * to clean up. Otherwise we must first trigger the reset using the
+	 * VFRTRIG register.
+	 */
+	if (!is_vflr) {
+		reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+		reg |= VPGEN_VFRTRIG_VFSWR_M;
+		wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+	}
+
+	/* clear the VFLR bit in GLGEN_VFLRSTAT */
+	reg_idx = (vf_abs_id) / 32;
+	bit_idx = (vf_abs_id) % 32;
+	wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+	ice_flush(hw);
+
+	wr32(hw, PF_PCI_CIAA,
+	     VF_DEVICE_STATUS | (vf_abs_id << PF_PCI_CIAA_VF_NUM_S));
+	for (i = 0; i < ICE_PCI_CIAD_WAIT_COUNT; i++) {
+		reg = rd32(hw, PF_PCI_CIAD);
+		/* no transactions pending so stop polling */
+		if ((reg & VF_TRANS_PENDING_M) == 0)
+			break;
+
+		dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id);
+		udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
+	}
+}
+
+/**
+ * ice_sriov_poll_reset_status - poll SRIOV VF reset status
+ * @vf: pointer to VF structure
+ *
+ * Returns true when reset is successful, else returns false
+ */
+static bool ice_sriov_poll_reset_status(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	unsigned int i;
+	u32 reg;
+
+	for (i = 0; i < 10; i++) {
+		/* VF reset requires driver to first reset the VF and then
+		 * poll the status register to make sure that the reset
+		 * completed successfully.
+		 */
+		reg = rd32(&pf->hw, VPGEN_VFRSTAT(vf->vf_id));
+		if (reg & VPGEN_VFRSTAT_VFRD_M)
+			return true;
+
+		/* only sleep if the reset is not done */
+		usleep_range(10, 20);
+	}
+	return false;
+}
+
+/**
+ * ice_sriov_clear_reset_trigger - enable VF to access hardware
+ * @vf: VF to enabled hardware access for
+ */
+static void ice_sriov_clear_reset_trigger(struct ice_vf *vf)
+{
+	struct ice_hw *hw = &vf->pf->hw;
+	u32 reg;
+
+	reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+	reg &= ~VPGEN_VFRTRIG_VFSWR_M;
+	wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+	ice_flush(hw);
+}
+
+/**
+ * ice_sriov_create_vsi - Create a new VSI for a VF
+ * @vf: VF to create the VSI for
+ *
+ * This is called by ice_vf_recreate_vsi to create the new VSI after the old
+ * VSI has been released.
+ */
+static int ice_sriov_create_vsi(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi;
+
+	vsi = ice_vf_vsi_setup(vf);
+	if (!vsi)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * ice_sriov_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt
+ * @vf: VF to perform tasks on
+ */
+static void ice_sriov_post_vsi_rebuild(struct ice_vf *vf)
+{
+	ice_ena_vf_mappings(vf);
+	wr32(&vf->pf->hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+}
+
+static const struct ice_vf_ops ice_sriov_vf_ops = {
+	.reset_type = ICE_VF_RESET,
+	.free = ice_sriov_free_vf,
+	.clear_reset_state = ice_sriov_clear_reset_state,
+	.clear_mbx_register = ice_sriov_clear_mbx_register,
+	.trigger_reset_register = ice_sriov_trigger_reset_register,
+	.poll_reset_status = ice_sriov_poll_reset_status,
+	.clear_reset_trigger = ice_sriov_clear_reset_trigger,
+	.irq_close = NULL,
+	.create_vsi = ice_sriov_create_vsi,
+	.post_vsi_rebuild = ice_sriov_post_vsi_rebuild,
+};
+
+/**
+ * ice_create_vf_entries - Allocate and insert VF entries
+ * @pf: pointer to the PF structure
+ * @num_vfs: the number of VFs to allocate
+ *
+ * Allocate new VF entries and insert them into the hash table. Set some
+ * basic default fields for initializing the new VFs.
+ *
+ * After this function exits, the hash table will have num_vfs entries
+ * inserted.
+ *
+ * Returns 0 on success or an integer error code on failure.
+ */
+static int ice_create_vf_entries(struct ice_pf *pf, u16 num_vfs)
+{
+	struct ice_vfs *vfs = &pf->vfs;
+	struct ice_vf *vf;
+	u16 vf_id;
+	int err;
+
+	lockdep_assert_held(&vfs->table_lock);
+
+	for (vf_id = 0; vf_id < num_vfs; vf_id++) {
+		vf = kzalloc(sizeof(*vf), GFP_KERNEL);
+		if (!vf) {
+			err = -ENOMEM;
+			goto err_free_entries;
+		}
+		kref_init(&vf->refcnt);
+
+		vf->pf = pf;
+		vf->vf_id = vf_id;
+
+		/* set sriov vf ops for VFs created during SRIOV flow */
+		vf->vf_ops = &ice_sriov_vf_ops;
+
+		ice_initialize_vf_entry(vf);
+
+		vf->vf_sw_id = pf->first_sw;
+
+		hash_add_rcu(vfs->table, &vf->entry, vf_id);
+	}
+
+	return 0;
+
+err_free_entries:
+	ice_free_vf_entries(pf);
+	return err;
+}
+
+/**
+ * ice_ena_vfs - enable VFs so they are ready to be used
+ * @pf: pointer to the PF structure
+ * @num_vfs: number of VFs to enable
+ */
+static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int ret;
+
+	/* Disable global interrupt 0 so we don't try to handle the VFLR. */
+	wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
+	     ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
+	set_bit(ICE_OICR_INTR_DIS, pf->state);
+	ice_flush(hw);
+
+	ret = pci_enable_sriov(pf->pdev, num_vfs);
+	if (ret)
+		goto err_unroll_intr;
+
+	mutex_lock(&pf->vfs.table_lock);
+
+	ret = ice_set_per_vf_res(pf, num_vfs);
+	if (ret) {
+		dev_err(dev, "Not enough resources for %d VFs, err %d. Try with fewer number of VFs\n",
+			num_vfs, ret);
+		goto err_unroll_sriov;
+	}
+
+	ret = ice_create_vf_entries(pf, num_vfs);
+	if (ret) {
+		dev_err(dev, "Failed to allocate VF entries for %d VFs\n",
+			num_vfs);
+		goto err_unroll_sriov;
+	}
+
+	ret = ice_start_vfs(pf);
+	if (ret) {
+		dev_err(dev, "Failed to start %d VFs, err %d\n", num_vfs, ret);
+		ret = -EAGAIN;
+		goto err_unroll_vf_entries;
+	}
+
+	clear_bit(ICE_VF_DIS, pf->state);
+
+	ret = ice_eswitch_configure(pf);
+	if (ret) {
+		dev_err(dev, "Failed to configure eswitch, err %d\n", ret);
+		goto err_unroll_sriov;
+	}
+
+	/* rearm global interrupts */
+	if (test_and_clear_bit(ICE_OICR_INTR_DIS, pf->state))
+		ice_irq_dynamic_ena(hw, NULL, NULL);
+
+	mutex_unlock(&pf->vfs.table_lock);
+
+	return 0;
+
+err_unroll_vf_entries:
+	ice_free_vf_entries(pf);
+err_unroll_sriov:
+	mutex_unlock(&pf->vfs.table_lock);
+	pci_disable_sriov(pf->pdev);
+err_unroll_intr:
+	/* rearm interrupts here */
+	ice_irq_dynamic_ena(hw, NULL, NULL);
+	clear_bit(ICE_OICR_INTR_DIS, pf->state);
+	return ret;
+}
+
+/**
+ * ice_pci_sriov_ena - Enable or change number of VFs
+ * @pf: pointer to the PF structure
+ * @num_vfs: number of VFs to allocate
+ *
+ * Returns 0 on success and negative on failure
+ */
+static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	if (!num_vfs) {
+		ice_free_vfs(pf);
+		return 0;
+	}
+
+	if (num_vfs > pf->vfs.num_supported) {
+		dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n",
+			num_vfs, pf->vfs.num_supported);
+		return -EOPNOTSUPP;
+	}
+
+	dev_info(dev, "Enabling %d VFs\n", num_vfs);
+	err = ice_ena_vfs(pf, num_vfs);
+	if (err) {
+		dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
+		return err;
+	}
+
+	set_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
+	return 0;
+}
+
+/**
+ * ice_check_sriov_allowed - check if SR-IOV is allowed based on various checks
+ * @pf: PF to enabled SR-IOV on
+ */
+static int ice_check_sriov_allowed(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
+		dev_err(dev, "This device is not capable of SR-IOV\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (ice_is_safe_mode(pf)) {
+		dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!ice_pf_state_is_nominal(pf)) {
+		dev_err(dev, "Cannot enable SR-IOV, device not ready\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_sriov_configure - Enable or change number of VFs via sysfs
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of VFs to allocate or 0 to free VFs
+ *
+ * This function is called when the user updates the number of VFs in sysfs. On
+ * success return whatever num_vfs was set to by the caller. Return negative on
+ * failure.
+ */
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	struct ice_pf *pf = pci_get_drvdata(pdev);
+	struct device *dev = ice_pf_to_dev(pf);
+	int err;
+
+	err = ice_check_sriov_allowed(pf);
+	if (err)
+		return err;
+
+	if (!num_vfs) {
+		if (!pci_vfs_assigned(pdev)) {
+			ice_free_vfs(pf);
+			return 0;
+		}
+
+		dev_err(dev, "can't free VFs because some are assigned to VMs.\n");
+		return -EBUSY;
+	}
+
+	err = ice_pci_sriov_ena(pf, num_vfs);
+	if (err)
+		return err;
+
+	return num_vfs;
+}
+
+/**
+ * ice_process_vflr_event - Free VF resources via IRQ calls
+ * @pf: pointer to the PF structure
+ *
+ * called from the VFLR IRQ handler to
+ * free up VF resources and state variables
+ */
+void ice_process_vflr_event(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	unsigned int bkt;
+	u32 reg;
+
+	if (!test_and_clear_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
+	    !ice_has_vfs(pf))
+		return;
+
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf) {
+		u32 reg_idx, bit_idx;
+
+		reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32;
+		bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32;
+		/* read GLGEN_VFLRSTAT register to find out the flr VFs */
+		reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
+		if (reg & BIT(bit_idx))
+			/* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
+			ice_reset_vf(vf, ICE_VF_RESET_VFLR | ICE_VF_RESET_LOCK);
+	}
+	mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_get_vf_from_pfq - get the VF who owns the PF space queue passed in
+ * @pf: PF used to index all VFs
+ * @pfq: queue index relative to the PF's function space
+ *
+ * If no VF is found who owns the pfq then return NULL, otherwise return a
+ * pointer to the VF who owns the pfq
+ *
+ * If this function returns non-NULL, it acquires a reference count of the VF
+ * structure. The caller is responsible for calling ice_put_vf() to drop this
+ * reference.
+ */
+static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	rcu_read_lock();
+	ice_for_each_vf_rcu(pf, bkt, vf) {
+		struct ice_vsi *vsi;
+		u16 rxq_idx;
+
+		vsi = ice_get_vf_vsi(vf);
+		if (!vsi)
+			continue;
+
+		ice_for_each_rxq(vsi, rxq_idx)
+			if (vsi->rxq_map[rxq_idx] == pfq) {
+				struct ice_vf *found;
+
+				if (kref_get_unless_zero(&vf->refcnt))
+					found = vf;
+				else
+					found = NULL;
+				rcu_read_unlock();
+				return found;
+			}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+/**
+ * ice_globalq_to_pfq - convert from global queue index to PF space queue index
+ * @pf: PF used for conversion
+ * @globalq: global queue index used to convert to PF space queue index
+ */
+static u32 ice_globalq_to_pfq(struct ice_pf *pf, u32 globalq)
+{
+	return globalq - pf->hw.func_caps.common_cap.rxq_first_id;
+}
+
+/**
+ * ice_vf_lan_overflow_event - handle LAN overflow event for a VF
+ * @pf: PF that the LAN overflow event happened on
+ * @event: structure holding the event information for the LAN overflow event
+ *
+ * Determine if the LAN overflow event was caused by a VF queue. If it was not
+ * caused by a VF, do nothing. If a VF caused this LAN overflow event trigger a
+ * reset on the offending VF.
+ */
+void
+ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event)
+{
+	u32 gldcb_rtctq, queue;
+	struct ice_vf *vf;
+
+	gldcb_rtctq = le32_to_cpu(event->desc.params.lan_overflow.prtdcb_ruptq);
+	dev_dbg(ice_pf_to_dev(pf), "GLDCB_RTCTQ: 0x%08x\n", gldcb_rtctq);
+
+	/* event returns device global Rx queue number */
+	queue = (gldcb_rtctq & GLDCB_RTCTQ_RXQNUM_M) >>
+		GLDCB_RTCTQ_RXQNUM_S;
+
+	vf = ice_get_vf_from_pfq(pf, ice_globalq_to_pfq(pf, queue));
+	if (!vf)
+		return;
+
+	ice_reset_vf(vf, ICE_VF_RESET_NOTIFY | ICE_VF_RESET_LOCK);
+	ice_put_vf(vf);
+}
+
+/**
+ * ice_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ena: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ */
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_pf *pf = np->vsi->back;
+	struct ice_vsi *vf_vsi;
+	struct device *dev;
+	struct ice_vf *vf;
+	int ret;
+
+	dev = ice_pf_to_dev(pf);
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	vf_vsi = ice_get_vf_vsi(vf);
+	if (!vf_vsi) {
+		netdev_err(netdev, "VSI %d for VF %d is null\n",
+			   vf->lan_vsi_idx, vf->vf_id);
+		ret = -EINVAL;
+		goto out_put_vf;
+	}
+
+	if (vf_vsi->type != ICE_VSI_VF) {
+		netdev_err(netdev, "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
+			   vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
+		ret = -ENODEV;
+		goto out_put_vf;
+	}
+
+	if (ena == vf->spoofchk) {
+		dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF");
+		ret = 0;
+		goto out_put_vf;
+	}
+
+	ret = ice_vsi_apply_spoofchk(vf_vsi, ena);
+	if (ret)
+		dev_err(dev, "Failed to set spoofchk %s for VF %d VSI %d\n error %d\n",
+			ena ? "ON" : "OFF", vf->vf_id, vf_vsi->vsi_num, ret);
+	else
+		vf->spoofchk = ena;
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_get_vf_cfg
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ivi: VF configuration structure
+ *
+ * return VF configuration
+ */
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+	int ret;
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	ivi->vf = vf_id;
+	ether_addr_copy(ivi->mac, vf->hw_lan_addr);
+
+	/* VF configuration for VLAN and applicable QoS */
+	ivi->vlan = ice_vf_get_port_vlan_id(vf);
+	ivi->qos = ice_vf_get_port_vlan_prio(vf);
+	if (ice_vf_is_port_vlan_ena(vf))
+		ivi->vlan_proto = cpu_to_be16(ice_vf_get_port_vlan_tpid(vf));
+
+	ivi->trusted = vf->trusted;
+	ivi->spoofchk = vf->spoofchk;
+	if (!vf->link_forced)
+		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
+	else if (vf->link_up)
+		ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+	else
+		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
+	ivi->max_tx_rate = vf->max_tx_rate;
+	ivi->min_tx_rate = vf->min_tx_rate;
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_set_vf_mac
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @mac: MAC address
+ *
+ * program VF MAC address
+ */
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+	int ret;
+
+	if (is_multicast_ether_addr(mac)) {
+		netdev_err(netdev, "%pM not a valid unicast address\n", mac);
+		return -EINVAL;
+	}
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	/* nothing left to do, unicast MAC already set */
+	if (ether_addr_equal(vf->dev_lan_addr, mac) &&
+	    ether_addr_equal(vf->hw_lan_addr, mac)) {
+		ret = 0;
+		goto out_put_vf;
+	}
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	mutex_lock(&vf->cfg_lock);
+
+	/* VF is notified of its new MAC via the PF's response to the
+	 * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
+	 */
+	ether_addr_copy(vf->dev_lan_addr, mac);
+	ether_addr_copy(vf->hw_lan_addr, mac);
+	if (is_zero_ether_addr(mac)) {
+		/* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
+		vf->pf_set_mac = false;
+		netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n",
+			    vf->vf_id);
+	} else {
+		/* PF will add MAC rule for the VF */
+		vf->pf_set_mac = true;
+		netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n",
+			    mac, vf_id);
+	}
+
+	ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+	mutex_unlock(&vf->cfg_lock);
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_set_vf_trust
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @trusted: Boolean value to enable/disable trusted VF
+ *
+ * Enable or disable a given VF as trusted
+ */
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+	int ret;
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	if (ice_is_eswitch_mode_switchdev(pf)) {
+		dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	/* Check if already trusted */
+	if (trusted == vf->trusted) {
+		ret = 0;
+		goto out_put_vf;
+	}
+
+	mutex_lock(&vf->cfg_lock);
+
+	vf->trusted = trusted;
+	ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+	dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
+		 vf_id, trusted ? "" : "un");
+
+	mutex_unlock(&vf->cfg_lock);
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_set_vf_link_state
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @link_state: required link state
+ *
+ * Set VF's link state, irrespective of physical link state status
+ */
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vf *vf;
+	int ret;
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	switch (link_state) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		vf->link_forced = false;
+		break;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		vf->link_forced = true;
+		vf->link_up = true;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		vf->link_forced = true;
+		vf->link_up = false;
+		break;
+	default:
+		ret = -EINVAL;
+		goto out_put_vf;
+	}
+
+	ice_vc_notify_vf_link_state(vf);
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs
+ * @pf: PF associated with VFs
+ */
+static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+	int rate = 0;
+
+	rcu_read_lock();
+	ice_for_each_vf_rcu(pf, bkt, vf)
+		rate += vf->min_tx_rate;
+	rcu_read_unlock();
+
+	return rate;
+}
+
+/**
+ * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription
+ * @vf: VF trying to configure min_tx_rate
+ * @min_tx_rate: min Tx rate in Mbps
+ *
+ * Check if the min_tx_rate being passed in will cause oversubscription of total
+ * min_tx_rate based on the current link speed and all other VFs configured
+ * min_tx_rate
+ *
+ * Return true if the passed min_tx_rate would cause oversubscription, else
+ * return false
+ */
+static bool
+ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+	int all_vfs_min_tx_rate;
+	int link_speed_mbps;
+
+	if (WARN_ON(!vsi))
+		return false;
+
+	link_speed_mbps = ice_get_link_speed_mbps(vsi);
+	all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf);
+
+	/* this VF's previous rate is being overwritten */
+	all_vfs_min_tx_rate -= vf->min_tx_rate;
+
+	if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) {
+		dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n",
+			min_tx_rate, vf->vf_id,
+			all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps,
+			link_speed_mbps);
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_set_vf_bw - set min/max VF bandwidth
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @min_tx_rate: Minimum Tx rate in Mbps
+ * @max_tx_rate: Maximum Tx rate in Mbps
+ */
+int
+ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+	      int max_tx_rate)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_vsi *vsi;
+	struct device *dev;
+	struct ice_vf *vf;
+	int ret;
+
+	dev = ice_pf_to_dev(pf);
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		ret = -EINVAL;
+		goto out_put_vf;
+	}
+
+	if (min_tx_rate && ice_is_dcb_active(pf)) {
+		dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n");
+		ret = -EOPNOTSUPP;
+		goto out_put_vf;
+	}
+
+	if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) {
+		ret = -EINVAL;
+		goto out_put_vf;
+	}
+
+	if (vf->min_tx_rate != (unsigned int)min_tx_rate) {
+		ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000);
+		if (ret) {
+			dev_err(dev, "Unable to set min-tx-rate for VF %d\n",
+				vf->vf_id);
+			goto out_put_vf;
+		}
+
+		vf->min_tx_rate = min_tx_rate;
+	}
+
+	if (vf->max_tx_rate != (unsigned int)max_tx_rate) {
+		ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000);
+		if (ret) {
+			dev_err(dev, "Unable to set max-tx-rate for VF %d\n",
+				vf->vf_id);
+			goto out_put_vf;
+		}
+
+		vf->max_tx_rate = max_tx_rate;
+	}
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_get_vf_stats - populate some stats for the VF
+ * @netdev: the netdev of the PF
+ * @vf_id: the host OS identifier (0-255)
+ * @vf_stats: pointer to the OS memory to be initialized
+ */
+int ice_get_vf_stats(struct net_device *netdev, int vf_id,
+		     struct ifla_vf_stats *vf_stats)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_eth_stats *stats;
+	struct ice_vsi *vsi;
+	struct ice_vf *vf;
+	int ret;
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		ret = -EINVAL;
+		goto out_put_vf;
+	}
+
+	ice_update_eth_stats(vsi);
+	stats = &vsi->eth_stats;
+
+	memset(vf_stats, 0, sizeof(*vf_stats));
+
+	vf_stats->rx_packets = stats->rx_unicast + stats->rx_broadcast +
+		stats->rx_multicast;
+	vf_stats->tx_packets = stats->tx_unicast + stats->tx_broadcast +
+		stats->tx_multicast;
+	vf_stats->rx_bytes   = stats->rx_bytes;
+	vf_stats->tx_bytes   = stats->tx_bytes;
+	vf_stats->broadcast  = stats->rx_broadcast;
+	vf_stats->multicast  = stats->rx_multicast;
+	vf_stats->rx_dropped = stats->rx_discards;
+	vf_stats->tx_dropped = stats->tx_discards;
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_is_supported_port_vlan_proto - make sure the vlan_proto is supported
+ * @hw: hardware structure used to check the VLAN mode
+ * @vlan_proto: VLAN TPID being checked
+ *
+ * If the device is configured in Double VLAN Mode (DVM), then both ETH_P_8021Q
+ * and ETH_P_8021AD are supported. If the device is configured in Single VLAN
+ * Mode (SVM), then only ETH_P_8021Q is supported.
+ */
+static bool
+ice_is_supported_port_vlan_proto(struct ice_hw *hw, u16 vlan_proto)
+{
+	bool is_supported = false;
+
+	switch (vlan_proto) {
+	case ETH_P_8021Q:
+		is_supported = true;
+		break;
+	case ETH_P_8021AD:
+		if (ice_is_dvm_ena(hw))
+			is_supported = true;
+		break;
+	}
+
+	return is_supported;
+}
+
+/**
+ * ice_set_vf_port_vlan
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @vlan_id: VLAN ID being set
+ * @qos: priority setting
+ * @vlan_proto: VLAN protocol
+ *
+ * program VF Port VLAN ID and/or QoS
+ */
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	u16 local_vlan_proto = ntohs(vlan_proto);
+	struct device *dev;
+	struct ice_vf *vf;
+	int ret;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (vlan_id >= VLAN_N_VID || qos > 7) {
+		dev_err(dev, "Invalid Port VLAN parameters for VF %d, ID %d, QoS %d\n",
+			vf_id, vlan_id, qos);
+		return -EINVAL;
+	}
+
+	if (!ice_is_supported_port_vlan_proto(&pf->hw, local_vlan_proto)) {
+		dev_err(dev, "VF VLAN protocol 0x%04x is not supported\n",
+			local_vlan_proto);
+		return -EPROTONOSUPPORT;
+	}
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf)
+		return -EINVAL;
+
+	ret = ice_check_vf_ready_for_cfg(vf);
+	if (ret)
+		goto out_put_vf;
+
+	if (ice_vf_get_port_vlan_prio(vf) == qos &&
+	    ice_vf_get_port_vlan_tpid(vf) == local_vlan_proto &&
+	    ice_vf_get_port_vlan_id(vf) == vlan_id) {
+		/* duplicate request, so just return success */
+		dev_dbg(dev, "Duplicate port VLAN %u, QoS %u, TPID 0x%04x request\n",
+			vlan_id, qos, local_vlan_proto);
+		ret = 0;
+		goto out_put_vf;
+	}
+
+	mutex_lock(&vf->cfg_lock);
+
+	vf->port_vlan_info = ICE_VLAN(local_vlan_proto, vlan_id, qos);
+	if (ice_vf_is_port_vlan_ena(vf))
+		dev_info(dev, "Setting VLAN %u, QoS %u, TPID 0x%04x on VF %d\n",
+			 vlan_id, qos, local_vlan_proto, vf_id);
+	else
+		dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
+
+	ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+	mutex_unlock(&vf->cfg_lock);
+
+out_put_vf:
+	ice_put_vf(vf);
+	return ret;
+}
+
+/**
+ * ice_print_vf_rx_mdd_event - print VF Rx malicious driver detect event
+ * @vf: pointer to the VF structure
+ */
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
+		 vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id,
+		 vf->dev_lan_addr,
+		 test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
+			  ? "on" : "off");
+}
+
+/**
+ * ice_print_vfs_mdd_events - print VFs malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from ice_handle_mdd_event to rate limit and print VFs MDD events.
+ */
+void ice_print_vfs_mdd_events(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	/* check that there are pending MDD events to print */
+	if (!test_and_clear_bit(ICE_MDD_VF_PRINT_PENDING, pf->state))
+		return;
+
+	/* VF MDD event logs are rate limited to one second intervals */
+	if (time_is_after_jiffies(pf->vfs.last_printed_mdd_jiffies + HZ * 1))
+		return;
+
+	pf->vfs.last_printed_mdd_jiffies = jiffies;
+
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf) {
+		/* only print Rx MDD event message if there are new events */
+		if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
+			vf->mdd_rx_events.last_printed =
+							vf->mdd_rx_events.count;
+			ice_print_vf_rx_mdd_event(vf);
+		}
+
+		/* only print Tx MDD event message if there are new events */
+		if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
+			vf->mdd_tx_events.last_printed =
+							vf->mdd_tx_events.count;
+
+			dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n",
+				 vf->mdd_tx_events.count, hw->pf_id, vf->vf_id,
+				 vf->dev_lan_addr);
+		}
+	}
+	mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_restore_all_vfs_msi_state - restore VF MSI state after PF FLR
+ * @pdev: pointer to a pci_dev structure
+ *
+ * Called when recovering from a PF FLR to restore interrupt capability to
+ * the VFs.
+ */
+void ice_restore_all_vfs_msi_state(struct pci_dev *pdev)
+{
+	u16 vf_id;
+	int pos;
+
+	if (!pci_num_vf(pdev))
+		return;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+	if (pos) {
+		struct pci_dev *vfdev;
+
+		pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID,
+				     &vf_id);
+		vfdev = pci_get_device(pdev->vendor, vf_id, NULL);
+		while (vfdev) {
+			if (vfdev->is_virtfn && vfdev->physfn == pdev)
+				pci_restore_msi_state(vfdev);
+			vfdev = pci_get_device(pdev->vendor, vf_id,
+					       vfdev);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h
new file mode 100644
index 0000000000..346cb2666f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SRIOV_H_
+#define _ICE_SRIOV_H_
+#include "ice_virtchnl_fdir.h"
+#include "ice_vf_lib.h"
+#include "ice_virtchnl.h"
+
+/* Static VF transaction/status register def */
+#define VF_DEVICE_STATUS		0xAA
+#define VF_TRANS_PENDING_M		0x20
+
+/* wait defines for polling PF_PCI_CIAD register status */
+#define ICE_PCI_CIAD_WAIT_COUNT		100
+#define ICE_PCI_CIAD_WAIT_DELAY_US	1
+
+/* VF resource constraints */
+#define ICE_MIN_QS_PER_VF		1
+#define ICE_NONQ_VECS_VF		1
+#define ICE_NUM_VF_MSIX_MED		17
+#define ICE_NUM_VF_MSIX_SMALL		5
+#define ICE_NUM_VF_MSIX_MULTIQ_MIN	3
+#define ICE_MIN_INTR_PER_VF		(ICE_MIN_QS_PER_VF + 1)
+#define ICE_MAX_VF_RESET_TRIES		40
+#define ICE_MAX_VF_RESET_SLEEP_MS	20
+
+#ifdef CONFIG_PCI_IOV
+void ice_process_vflr_event(struct ice_pf *pf);
+int ice_sriov_configure(struct pci_dev *pdev, int num_vfs);
+int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
+int
+ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi);
+
+void ice_free_vfs(struct ice_pf *pf);
+void ice_restore_all_vfs_msi_state(struct pci_dev *pdev);
+
+int
+ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto);
+
+int
+ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+	      int max_tx_rate);
+
+int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
+
+int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
+
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
+
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
+
+int
+ice_get_vf_stats(struct net_device *netdev, int vf_id,
+		 struct ifla_vf_stats *vf_stats);
+void
+ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
+void ice_print_vfs_mdd_events(struct ice_pf *pf);
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf);
+bool
+ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto);
+#else /* CONFIG_PCI_IOV */
+static inline void ice_process_vflr_event(struct ice_pf *pf) { }
+static inline void ice_free_vfs(struct ice_pf *pf) { }
+static inline
+void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { }
+static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { }
+static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { }
+static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { }
+
+static inline int
+ice_sriov_configure(struct pci_dev __always_unused *pdev,
+		    int __always_unused num_vfs)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_mac(struct net_device __always_unused *netdev,
+	       int __always_unused vf_id, u8 __always_unused *mac)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_get_vf_cfg(struct net_device __always_unused *netdev,
+	       int __always_unused vf_id,
+	       struct ifla_vf_info __always_unused *ivi)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_trust(struct net_device __always_unused *netdev,
+		 int __always_unused vf_id, bool __always_unused trusted)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_port_vlan(struct net_device __always_unused *netdev,
+		     int __always_unused vf_id, u16 __always_unused vid,
+		     u8 __always_unused qos, __be16 __always_unused v_proto)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_spoofchk(struct net_device __always_unused *netdev,
+		    int __always_unused vf_id, bool __always_unused ena)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_link_state(struct net_device __always_unused *netdev,
+		      int __always_unused vf_id, int __always_unused link_state)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_set_vf_bw(struct net_device __always_unused *netdev,
+	      int __always_unused vf_id, int __always_unused min_tx_rate,
+	      int __always_unused max_tx_rate)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
+		    struct ice_q_vector __always_unused *q_vector)
+{
+	return 0;
+}
+
+static inline int
+ice_get_vf_stats(struct net_device __always_unused *netdev,
+		 int __always_unused vf_id,
+		 struct ifla_vf_stats __always_unused *vf_stats)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_SRIOV_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
new file mode 100644
index 0000000000..2f77b684ff
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -0,0 +1,6647 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_lib.h"
+#include "ice_switch.h"
+
+#define ICE_ETH_DA_OFFSET		0
+#define ICE_ETH_ETHTYPE_OFFSET		12
+#define ICE_ETH_VLAN_TCI_OFFSET		14
+#define ICE_MAX_VLAN_ID			0xFFF
+#define ICE_IPV6_ETHER_ID		0x86DD
+
+/* Dummy ethernet header needed in the ice_aqc_sw_rules_elem
+ * struct to configure any switch filter rules.
+ * {DA (6 bytes), SA(6 bytes),
+ * Ether type (2 bytes for header without VLAN tag) OR
+ * VLAN tag (4 bytes for header with VLAN tag) }
+ *
+ * Word on Hardcoded values
+ * byte 0 = 0x2: to identify it as locally administered DA MAC
+ * byte 6 = 0x2: to identify it as locally administered SA MAC
+ * byte 12 = 0x81 & byte 13 = 0x00:
+ *      In case of VLAN filter first two bytes defines ether type (0x8100)
+ *      and remaining two bytes are placeholder for programming a given VLAN ID
+ *      In case of Ether type filter it is treated as header without VLAN tag
+ *      and byte 12 and 13 is used to program a given Ether type instead
+ */
+static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0,
+							0x2, 0, 0, 0, 0, 0,
+							0x81, 0, 0, 0};
+
+enum {
+	ICE_PKT_OUTER_IPV6	= BIT(0),
+	ICE_PKT_TUN_GTPC	= BIT(1),
+	ICE_PKT_TUN_GTPU	= BIT(2),
+	ICE_PKT_TUN_NVGRE	= BIT(3),
+	ICE_PKT_TUN_UDP		= BIT(4),
+	ICE_PKT_INNER_IPV6	= BIT(5),
+	ICE_PKT_INNER_TCP	= BIT(6),
+	ICE_PKT_INNER_UDP	= BIT(7),
+	ICE_PKT_GTP_NOPAY	= BIT(8),
+	ICE_PKT_KMALLOC		= BIT(9),
+	ICE_PKT_PPPOE		= BIT(10),
+	ICE_PKT_L2TPV3		= BIT(11),
+};
+
+struct ice_dummy_pkt_offsets {
+	enum ice_protocol_type type;
+	u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */
+};
+
+struct ice_dummy_pkt_profile {
+	const struct ice_dummy_pkt_offsets *offsets;
+	const u8 *pkt;
+	u32 match;
+	u16 pkt_len;
+	u16 offsets_len;
+};
+
+#define ICE_DECLARE_PKT_OFFSETS(type)					\
+	static const struct ice_dummy_pkt_offsets			\
+	ice_dummy_##type##_packet_offsets[]
+
+#define ICE_DECLARE_PKT_TEMPLATE(type)					\
+	static const u8 ice_dummy_##type##_packet[]
+
+#define ICE_PKT_PROFILE(type, m) {					\
+	.match		= (m),						\
+	.pkt		= ice_dummy_##type##_packet,			\
+	.pkt_len	= sizeof(ice_dummy_##type##_packet),		\
+	.offsets	= ice_dummy_##type##_packet_offsets,		\
+	.offsets_len	= sizeof(ice_dummy_##type##_packet_offsets),	\
+}
+
+ICE_DECLARE_PKT_OFFSETS(vlan) = {
+	{ ICE_VLAN_OFOS,        12 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(vlan) = {
+	0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */
+};
+
+ICE_DECLARE_PKT_OFFSETS(qinq) = {
+	{ ICE_VLAN_EX,          12 },
+	{ ICE_VLAN_IN,          16 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(qinq) = {
+	0x91, 0x00, 0x00, 0x00, /* ICE_VLAN_EX 12 */
+	0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_IN 16 */
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_NVGRE,		34 },
+	{ ICE_MAC_IL,		42 },
+	{ ICE_ETYPE_IL,		54 },
+	{ ICE_IPV4_IL,		56 },
+	{ ICE_TCP_IL,		76 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_tcp) = {
+	0x00, 0x00, 0x00, 0x00,	/* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x3E,	/* ICE_IPV4_OFOS 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x2F, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x80, 0x00, 0x65, 0x58,	/* ICE_NVGRE 34 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00,	/* ICE_MAC_IL 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_IL 54 */
+
+	0x45, 0x00, 0x00, 0x14,	/* ICE_IPV4_IL 56 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x06, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00,	/* ICE_TCP_IL 76 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x02, 0x20, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_NVGRE,		34 },
+	{ ICE_MAC_IL,		42 },
+	{ ICE_ETYPE_IL,		54 },
+	{ ICE_IPV4_IL,		56 },
+	{ ICE_UDP_ILOS,		76 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_udp) = {
+	0x00, 0x00, 0x00, 0x00,	/* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x3E,	/* ICE_IPV4_OFOS 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x2F, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x80, 0x00, 0x65, 0x58,	/* ICE_NVGRE 34 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00,	/* ICE_MAC_IL 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_IL 54 */
+
+	0x45, 0x00, 0x00, 0x14,	/* ICE_IPV4_IL 56 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00,	/* ICE_UDP_ILOS 76 */
+	0x00, 0x08, 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_VXLAN,		42 },
+	{ ICE_GENEVE,		42 },
+	{ ICE_VXLAN_GPE,	42 },
+	{ ICE_MAC_IL,		50 },
+	{ ICE_ETYPE_IL,		62 },
+	{ ICE_IPV4_IL,		64 },
+	{ ICE_TCP_IL,		84 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_tcp) = {
+	0x00, 0x00, 0x00, 0x00,  /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x5a, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x01, 0x00, 0x00,
+	0x40, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+	0x00, 0x46, 0x00, 0x00,
+
+	0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_IL 62 */
+
+	0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_IL 64 */
+	0x00, 0x01, 0x00, 0x00,
+	0x40, 0x06, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 84 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x02, 0x20, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_VXLAN,		42 },
+	{ ICE_GENEVE,		42 },
+	{ ICE_VXLAN_GPE,	42 },
+	{ ICE_MAC_IL,		50 },
+	{ ICE_ETYPE_IL,		62 },
+	{ ICE_IPV4_IL,		64 },
+	{ ICE_UDP_ILOS,		84 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_udp) = {
+	0x00, 0x00, 0x00, 0x00,  /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x4e, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+	0x00, 0x3a, 0x00, 0x00,
+
+	0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_IL 62 */
+
+	0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_IL 64 */
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 84 */
+	0x00, 0x08, 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_ipv6_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_NVGRE,		34 },
+	{ ICE_MAC_IL,		42 },
+	{ ICE_ETYPE_IL,		54 },
+	{ ICE_IPV6_IL,		56 },
+	{ ICE_TCP_IL,		96 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x66, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x2F, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x86, 0xdd,		/* ICE_ETYPE_IL 54 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 56 */
+	0x00, 0x08, 0x06, 0x40,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 96 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x02, 0x20, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(gre_ipv6_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_NVGRE,		34 },
+	{ ICE_MAC_IL,		42 },
+	{ ICE_ETYPE_IL,		54 },
+	{ ICE_IPV6_IL,		56 },
+	{ ICE_UDP_ILOS,		96 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(gre_ipv6_udp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x5a, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x2F, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x86, 0xdd,		/* ICE_ETYPE_IL 54 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 56 */
+	0x00, 0x08, 0x11, 0x40,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 96 */
+	0x00, 0x08, 0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_VXLAN,		42 },
+	{ ICE_GENEVE,		42 },
+	{ ICE_VXLAN_GPE,	42 },
+	{ ICE_MAC_IL,		50 },
+	{ ICE_ETYPE_IL,		62 },
+	{ ICE_IPV6_IL,		64 },
+	{ ICE_TCP_IL,		104 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_tcp) = {
+	0x00, 0x00, 0x00, 0x00,  /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x6e, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x01, 0x00, 0x00,
+	0x40, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+	0x00, 0x5a, 0x00, 0x00,
+
+	0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x86, 0xdd,		/* ICE_ETYPE_IL 62 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 64 */
+	0x00, 0x08, 0x06, 0x40,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 104 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x02, 0x20, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+ICE_DECLARE_PKT_OFFSETS(udp_tun_ipv6_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_VXLAN,		42 },
+	{ ICE_GENEVE,		42 },
+	{ ICE_VXLAN_GPE,	42 },
+	{ ICE_MAC_IL,		50 },
+	{ ICE_ETYPE_IL,		62 },
+	{ ICE_IPV6_IL,		64 },
+	{ ICE_UDP_ILOS,		104 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(udp_tun_ipv6_udp) = {
+	0x00, 0x00, 0x00, 0x00,  /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x62, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */
+	0x00, 0x4e, 0x00, 0x00,
+
+	0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x86, 0xdd,		/* ICE_ETYPE_IL 62 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 64 */
+	0x00, 0x08, 0x11, 0x40,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 104 */
+	0x00, 0x08, 0x00, 0x00,
+};
+
+/* offset info for MAC + IPv4 + UDP dummy packet */
+ICE_DECLARE_PKT_OFFSETS(udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_ILOS,		34 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+/* Dummy packet for MAC + IPv4 + UDP */
+ICE_DECLARE_PKT_TEMPLATE(udp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 34 */
+	0x00, 0x08, 0x00, 0x00,
+
+	0x00, 0x00,	/* 2 bytes for 4 byte alignment */
+};
+
+/* offset info for MAC + IPv4 + TCP dummy packet */
+ICE_DECLARE_PKT_OFFSETS(tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_TCP_IL,		34 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+/* Dummy packet for MAC + IPv4 + TCP */
+ICE_DECLARE_PKT_TEMPLATE(tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x06, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 34 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00,	/* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(tcp_ipv6) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_TCP_IL,		54 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(tcp_ipv6) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x86, 0xDD,		/* ICE_ETYPE_OL 12 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */
+	0x00, 0x14, 0x06, 0x00, /* Next header is TCP */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 54 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* IPv6 + UDP */
+ICE_DECLARE_PKT_OFFSETS(udp_ipv6) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_UDP_ILOS,		54 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+/* IPv6 + UDP dummy packet */
+ICE_DECLARE_PKT_TEMPLATE(udp_ipv6) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x86, 0xDD,		/* ICE_ETYPE_OL 12 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */
+	0x00, 0x10, 0x11, 0x00, /* Next header UDP */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 54 */
+	0x00, 0x10, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* needed for ESP packets */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner TCP */
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_GTP,		42 },
+	{ ICE_IPV4_IL,		62 },
+	{ ICE_TCP_IL,		82 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x08, 0x00,
+
+	0x45, 0x00, 0x00, 0x58, /* IP 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+	0x00, 0x44, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x34, /* ICE_GTP Header 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x45, 0x00, 0x00, 0x28, /* IP 62 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x06, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* TCP 82 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* Outer IPv4 + Outer UDP + GTP + Inner IPv4 + Inner UDP */
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_GTP,		42 },
+	{ ICE_IPV4_IL,		62 },
+	{ ICE_UDP_ILOS,		82 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4_udp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x08, 0x00,
+
+	0x45, 0x00, 0x00, 0x4c, /* IP 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+	0x00, 0x38, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x28, /* ICE_GTP Header 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x45, 0x00, 0x00, 0x1c, /* IP 62 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* UDP 82 */
+	0x00, 0x08, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+/* Outer IPv6 + Outer UDP + GTP + Inner IPv4 + Inner TCP */
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_GTP,		42 },
+	{ ICE_IPV6_IL,		62 },
+	{ ICE_TCP_IL,		102 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x08, 0x00,
+
+	0x45, 0x00, 0x00, 0x6c, /* IP 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+	0x00, 0x58, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x48, /* ICE_GTP Header 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 62 */
+	0x00, 0x14, 0x06, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* TCP 102 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv6_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_GTP,		42 },
+	{ ICE_IPV6_IL,		62 },
+	{ ICE_UDP_ILOS,		102 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv6_udp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x08, 0x00,
+
+	0x45, 0x00, 0x00, 0x60, /* IP 14 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 34 */
+	0x00, 0x4c, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x3c, /* ICE_GTP Header 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 54 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 62 */
+	0x00, 0x08, 0x11, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* UDP 102 */
+	0x00, 0x08, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_UDP_OF,		54 },
+	{ ICE_GTP,		62 },
+	{ ICE_IPV4_IL,		82 },
+	{ ICE_TCP_IL,		102 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x86, 0xdd,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+	0x00, 0x44, 0x11, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+	0x00, 0x44, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x34, /* ICE_GTP Header 62 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x45, 0x00, 0x00, 0x28, /* IP 82 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x06, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* TCP 102 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv4_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_UDP_OF,		54 },
+	{ ICE_GTP,		62 },
+	{ ICE_IPV4_IL,		82 },
+	{ ICE_UDP_ILOS,		102 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv4_udp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x86, 0xdd,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+	0x00, 0x38, 0x11, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+	0x00, 0x38, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x28, /* ICE_GTP Header 62 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x45, 0x00, 0x00, 0x1c, /* IP 82 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* UDP 102 */
+	0x00, 0x08, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_UDP_OF,		54 },
+	{ ICE_GTP,		62 },
+	{ ICE_IPV6_IL,		82 },
+	{ ICE_TCP_IL,		122 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x86, 0xdd,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+	0x00, 0x58, 0x11, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+	0x00, 0x58, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x48, /* ICE_GTP Header 62 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 82 */
+	0x00, 0x14, 0x06, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* TCP 122 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtpu_ipv6_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_UDP_OF,		54 },
+	{ ICE_GTP,		62 },
+	{ ICE_IPV6_IL,		82 },
+	{ ICE_UDP_ILOS,		122 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtpu_ipv6_udp) = {
+	0x00, 0x00, 0x00, 0x00, /* Ethernet 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x86, 0xdd,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 14 */
+	0x00, 0x4c, 0x11, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x08, 0x68, /* UDP 54 */
+	0x00, 0x4c, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x3c, /* ICE_GTP Header 62 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* GTP_PDUSession_ExtensionHeader 74 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x60, 0x00, 0x00, 0x00, /* IPv6 82 */
+	0x00, 0x08, 0x11, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* UDP 122 */
+	0x00, 0x08, 0x00, 0x00,
+
+	0x00, 0x00, /* 2 bytes for 4 byte alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv4_gtpu_ipv4) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_UDP_OF,		34 },
+	{ ICE_GTP_NO_PAY,	42 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_gtpu_ipv4) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x08, 0x00,
+
+	0x45, 0x00, 0x00, 0x44, /* ICE_IPV4_OFOS 14 */
+	0x00, 0x00, 0x40, 0x00,
+	0x40, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x68, 0x08, 0x68, /* ICE_UDP_OF 34 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x34, 0xff, 0x00, 0x28, /* ICE_GTP 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x85,
+
+	0x02, 0x00, 0x00, 0x00, /* PDU Session extension header */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 62 */
+	0x00, 0x00, 0x40, 0x00,
+	0x40, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_gtp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_UDP_OF,		54 },
+	{ ICE_GTP_NO_PAY,	62 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_gtp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x86, 0xdd,
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 14 */
+	0x00, 0x6c, 0x11, 0x00, /* Next header UDP*/
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x68, 0x08, 0x68, /* ICE_UDP_OF 54 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x30, 0x00, 0x00, 0x28, /* ICE_GTP 62 */
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00,
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv4_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_PPPOE,		14 },
+	{ ICE_IPV4_OFOS,	22 },
+	{ ICE_TCP_IL,		42 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv4_tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x88, 0x64,		/* ICE_ETYPE_OL 12 */
+
+	0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+	0x00, 0x16,
+
+	0x00, 0x21,		/* PPP Link Layer 20 */
+
+	0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 22 */
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x06, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 42 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00,		/* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv4_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_PPPOE,		14 },
+	{ ICE_IPV4_OFOS,	22 },
+	{ ICE_UDP_ILOS,		42 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv4_udp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x88, 0x64,		/* ICE_ETYPE_OL 12 */
+
+	0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+	0x00, 0x16,
+
+	0x00, 0x21,		/* PPP Link Layer 20 */
+
+	0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 22 */
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x11, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 42 */
+	0x00, 0x08, 0x00, 0x00,
+
+	0x00, 0x00,		/* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv6_tcp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_PPPOE,		14 },
+	{ ICE_IPV6_OFOS,	22 },
+	{ ICE_TCP_IL,		62 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv6_tcp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x88, 0x64,		/* ICE_ETYPE_OL 12 */
+
+	0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+	0x00, 0x2a,
+
+	0x00, 0x57,		/* PPP Link Layer 20 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 22 */
+	0x00, 0x14, 0x06, 0x00, /* Next header is TCP */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 62 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00,		/* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(pppoe_ipv6_udp) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_PPPOE,		14 },
+	{ ICE_IPV6_OFOS,	22 },
+	{ ICE_UDP_ILOS,		62 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(pppoe_ipv6_udp) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x88, 0x64,		/* ICE_ETYPE_OL 12 */
+
+	0x11, 0x00, 0x00, 0x00, /* ICE_PPPOE 14 */
+	0x00, 0x2a,
+
+	0x00, 0x57,		/* PPP Link Layer 20 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 22 */
+	0x00, 0x08, 0x11, 0x00, /* Next header UDP*/
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 62 */
+	0x00, 0x08, 0x00, 0x00,
+
+	0x00, 0x00,		/* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv4_l2tpv3) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV4_OFOS,	14 },
+	{ ICE_L2TPV3,		34 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv4_l2tpv3) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x08, 0x00,		/* ICE_ETYPE_OL 12 */
+
+	0x45, 0x00, 0x00, 0x20, /* ICE_IPV4_IL 14 */
+	0x00, 0x00, 0x40, 0x00,
+	0x40, 0x73, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_L2TPV3 34 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,		/* 2 bytes for 4 bytes alignment */
+};
+
+ICE_DECLARE_PKT_OFFSETS(ipv6_l2tpv3) = {
+	{ ICE_MAC_OFOS,		0 },
+	{ ICE_ETYPE_OL,		12 },
+	{ ICE_IPV6_OFOS,	14 },
+	{ ICE_L2TPV3,		54 },
+	{ ICE_PROTOCOL_LAST,	0 },
+};
+
+ICE_DECLARE_PKT_TEMPLATE(ipv6_l2tpv3) = {
+	0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x86, 0xDD,		/* ICE_ETYPE_OL 12 */
+
+	0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_IL 14 */
+	0x00, 0x0c, 0x73, 0x40,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+
+	0x00, 0x00, 0x00, 0x00, /* ICE_L2TPV3 54 */
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,		/* 2 bytes for 4 bytes alignment */
+};
+
+static const struct ice_dummy_pkt_profile ice_dummy_pkt_profiles[] = {
+	ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPU | ICE_PKT_OUTER_IPV6 |
+				  ICE_PKT_GTP_NOPAY),
+	ICE_PKT_PROFILE(ipv6_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU |
+					    ICE_PKT_OUTER_IPV6 |
+					    ICE_PKT_INNER_IPV6 |
+					    ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(ipv6_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU |
+					    ICE_PKT_OUTER_IPV6 |
+					    ICE_PKT_INNER_IPV6),
+	ICE_PKT_PROFILE(ipv6_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU |
+					    ICE_PKT_OUTER_IPV6 |
+					    ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(ipv6_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU |
+					    ICE_PKT_OUTER_IPV6),
+	ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPU | ICE_PKT_GTP_NOPAY),
+	ICE_PKT_PROFILE(ipv4_gtpu_ipv6_udp, ICE_PKT_TUN_GTPU |
+					    ICE_PKT_INNER_IPV6 |
+					    ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(ipv4_gtpu_ipv6_tcp, ICE_PKT_TUN_GTPU |
+					    ICE_PKT_INNER_IPV6),
+	ICE_PKT_PROFILE(ipv4_gtpu_ipv4_udp, ICE_PKT_TUN_GTPU |
+					    ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(ipv4_gtpu_ipv4_tcp, ICE_PKT_TUN_GTPU),
+	ICE_PKT_PROFILE(ipv6_gtp, ICE_PKT_TUN_GTPC | ICE_PKT_OUTER_IPV6),
+	ICE_PKT_PROFILE(ipv4_gtpu_ipv4, ICE_PKT_TUN_GTPC),
+	ICE_PKT_PROFILE(pppoe_ipv6_udp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6 |
+					ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(pppoe_ipv6_tcp, ICE_PKT_PPPOE | ICE_PKT_OUTER_IPV6),
+	ICE_PKT_PROFILE(pppoe_ipv4_udp, ICE_PKT_PPPOE | ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(pppoe_ipv4_tcp, ICE_PKT_PPPOE),
+	ICE_PKT_PROFILE(gre_ipv6_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6 |
+				      ICE_PKT_INNER_TCP),
+	ICE_PKT_PROFILE(gre_tcp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_TCP),
+	ICE_PKT_PROFILE(gre_ipv6_udp, ICE_PKT_TUN_NVGRE | ICE_PKT_INNER_IPV6),
+	ICE_PKT_PROFILE(gre_udp, ICE_PKT_TUN_NVGRE),
+	ICE_PKT_PROFILE(udp_tun_ipv6_tcp, ICE_PKT_TUN_UDP |
+					  ICE_PKT_INNER_IPV6 |
+					  ICE_PKT_INNER_TCP),
+	ICE_PKT_PROFILE(ipv6_l2tpv3, ICE_PKT_L2TPV3 | ICE_PKT_OUTER_IPV6),
+	ICE_PKT_PROFILE(ipv4_l2tpv3, ICE_PKT_L2TPV3),
+	ICE_PKT_PROFILE(udp_tun_tcp, ICE_PKT_TUN_UDP | ICE_PKT_INNER_TCP),
+	ICE_PKT_PROFILE(udp_tun_ipv6_udp, ICE_PKT_TUN_UDP |
+					  ICE_PKT_INNER_IPV6),
+	ICE_PKT_PROFILE(udp_tun_udp, ICE_PKT_TUN_UDP),
+	ICE_PKT_PROFILE(udp_ipv6, ICE_PKT_OUTER_IPV6 | ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(udp, ICE_PKT_INNER_UDP),
+	ICE_PKT_PROFILE(tcp_ipv6, ICE_PKT_OUTER_IPV6),
+	ICE_PKT_PROFILE(tcp, 0),
+};
+
+/* this is a recipe to profile association bitmap */
+static DECLARE_BITMAP(recipe_to_profile[ICE_MAX_NUM_RECIPES],
+			  ICE_MAX_NUM_PROFILES);
+
+/* this is a profile to recipe association bitmap */
+static DECLARE_BITMAP(profile_to_recipe[ICE_MAX_NUM_PROFILES],
+			  ICE_MAX_NUM_RECIPES);
+
+/**
+ * ice_init_def_sw_recp - initialize the recipe book keeping tables
+ * @hw: pointer to the HW struct
+ *
+ * Allocate memory for the entire recipe table and initialize the structures/
+ * entries corresponding to basic recipes.
+ */
+int ice_init_def_sw_recp(struct ice_hw *hw)
+{
+	struct ice_sw_recipe *recps;
+	u8 i;
+
+	recps = devm_kcalloc(ice_hw_to_dev(hw), ICE_MAX_NUM_RECIPES,
+			     sizeof(*recps), GFP_KERNEL);
+	if (!recps)
+		return -ENOMEM;
+
+	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+		recps[i].root_rid = i;
+		INIT_LIST_HEAD(&recps[i].filt_rules);
+		INIT_LIST_HEAD(&recps[i].filt_replay_rules);
+		INIT_LIST_HEAD(&recps[i].rg_list);
+		mutex_init(&recps[i].filt_rule_lock);
+	}
+
+	hw->switch_info->recp_list = recps;
+
+	return 0;
+}
+
+/**
+ * ice_aq_get_sw_cfg - get switch configuration
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of the buffer available for response
+ * @req_desc: pointer to requested descriptor
+ * @num_elems: pointer to number of elements
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get switch configuration (0x0200) to be placed in buf.
+ * This admin command returns information such as initial VSI/port number
+ * and switch ID it belongs to.
+ *
+ * NOTE: *req_desc is both an input/output parameter.
+ * The caller of this function first calls this function with *request_desc set
+ * to 0. If the response from f/w has *req_desc set to 0, all the switch
+ * configuration information has been returned; if non-zero (meaning not all
+ * the information was returned), the caller should call this function again
+ * with *req_desc set to the previous value returned by f/w to get the
+ * next block of switch configuration information.
+ *
+ * *num_elems is output only parameter. This reflects the number of elements
+ * in response buffer. The caller of this function to use *num_elems while
+ * parsing the response buffer.
+ */
+static int
+ice_aq_get_sw_cfg(struct ice_hw *hw, struct ice_aqc_get_sw_cfg_resp_elem *buf,
+		  u16 buf_size, u16 *req_desc, u16 *num_elems,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_get_sw_cfg *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_sw_cfg);
+	cmd = &desc.params.get_sw_conf;
+	cmd->element = cpu_to_le16(*req_desc);
+
+	status = ice_aq_send_cmd(hw, &desc, buf, buf_size, cd);
+	if (!status) {
+		*req_desc = le16_to_cpu(cmd->element);
+		*num_elems = le16_to_cpu(cmd->num_elems);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_add_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware (0x0210)
+ */
+static int
+ice_aq_add_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *res;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.vsi_cmd;
+	res = &desc.params.add_update_free_vsi_res;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_vsi);
+
+	if (!vsi_ctx->alloc_from_pool)
+		cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num |
+					   ICE_AQ_VSI_IS_VALID);
+	cmd->vf_id = vsi_ctx->vf_num;
+
+	cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsi_num = le16_to_cpu(res->vsi_num) & ICE_AQ_VSI_NUM_M;
+		vsi_ctx->vsis_allocd = le16_to_cpu(res->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(res->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_free_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Free VSI context info from hardware (0x0213)
+ */
+static int
+ice_aq_free_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = &desc.params.add_update_free_vsi_res;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_free_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+	if (keep_vsi_alloc)
+		cmd->cmd_flags = cpu_to_le16(ICE_AQ_VSI_KEEP_ALLOC);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_aq_update_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware (0x0211)
+ */
+static int
+ice_aq_update_vsi(struct ice_hw *hw, struct ice_vsi_ctx *vsi_ctx,
+		  struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_update_free_vsi_resp *resp;
+	struct ice_aqc_add_get_update_free_vsi *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.vsi_cmd;
+	resp = &desc.params.add_update_free_vsi_res;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_update_vsi);
+
+	cmd->vsi_num = cpu_to_le16(vsi_ctx->vsi_num | ICE_AQ_VSI_IS_VALID);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	status = ice_aq_send_cmd(hw, &desc, &vsi_ctx->info,
+				 sizeof(vsi_ctx->info), cd);
+
+	if (!status) {
+		vsi_ctx->vsis_allocd = le16_to_cpu(resp->vsi_used);
+		vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+	}
+
+	return status;
+}
+
+/**
+ * ice_is_vsi_valid - check whether the VSI is valid or not
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * check whether the VSI is valid or not
+ */
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle)
+{
+	return vsi_handle < ICE_MAX_VSI && hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_get_hw_vsi_num - return the HW VSI number
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the HW VSI number
+ * Caution: call this function only if VSI is valid (ice_is_vsi_valid)
+ */
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle)
+{
+	return hw->vsi_ctx[vsi_handle]->vsi_num;
+}
+
+/**
+ * ice_get_vsi_ctx - return the VSI context entry for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * return the VSI context entry for a given VSI handle
+ */
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	return (vsi_handle >= ICE_MAX_VSI) ? NULL : hw->vsi_ctx[vsi_handle];
+}
+
+/**
+ * ice_save_vsi_ctx - save the VSI context for a given VSI handle
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ * @vsi: VSI context pointer
+ *
+ * save the VSI context entry for a given VSI handle
+ */
+static void
+ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi)
+{
+	hw->vsi_ctx[vsi_handle] = vsi;
+}
+
+/**
+ * ice_clear_vsi_q_ctx - clear VSI queue contexts for all TCs
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ */
+static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_vsi_ctx *vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	u8 i;
+
+	if (!vsi)
+		return;
+	ice_for_each_traffic_class(i) {
+		devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]);
+		vsi->lan_q_ctx[i] = NULL;
+		devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]);
+		vsi->rdma_q_ctx[i] = NULL;
+	}
+}
+
+/**
+ * ice_clear_vsi_ctx - clear the VSI context entry
+ * @hw: pointer to the HW struct
+ * @vsi_handle: VSI handle
+ *
+ * clear the VSI context entry
+ */
+static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_vsi_ctx *vsi;
+
+	vsi = ice_get_vsi_ctx(hw, vsi_handle);
+	if (vsi) {
+		ice_clear_vsi_q_ctx(hw, vsi_handle);
+		devm_kfree(ice_hw_to_dev(hw), vsi);
+		hw->vsi_ctx[vsi_handle] = NULL;
+	}
+}
+
+/**
+ * ice_clear_all_vsi_ctx - clear all the VSI context entries
+ * @hw: pointer to the HW struct
+ */
+void ice_clear_all_vsi_ctx(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < ICE_MAX_VSI; i++)
+		ice_clear_vsi_ctx(hw, i);
+}
+
+/**
+ * ice_add_vsi - add VSI context to the hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle provided by drivers
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware also add it into the VSI handle list.
+ * If this function gets called after reset for existing VSIs then update
+ * with the new HW VSI number in the corresponding VSI handle list entry.
+ */
+int
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	    struct ice_sq_cd *cd)
+{
+	struct ice_vsi_ctx *tmp_vsi_ctx;
+	int status;
+
+	if (vsi_handle >= ICE_MAX_VSI)
+		return -EINVAL;
+	status = ice_aq_add_vsi(hw, vsi_ctx, cd);
+	if (status)
+		return status;
+	tmp_vsi_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!tmp_vsi_ctx) {
+		/* Create a new VSI context */
+		tmp_vsi_ctx = devm_kzalloc(ice_hw_to_dev(hw),
+					   sizeof(*tmp_vsi_ctx), GFP_KERNEL);
+		if (!tmp_vsi_ctx) {
+			ice_aq_free_vsi(hw, vsi_ctx, false, cd);
+			return -ENOMEM;
+		}
+		*tmp_vsi_ctx = *vsi_ctx;
+		ice_save_vsi_ctx(hw, vsi_handle, tmp_vsi_ctx);
+	} else {
+		/* update with new HW VSI num */
+		tmp_vsi_ctx->vsi_num = vsi_ctx->vsi_num;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_free_vsi- free VSI context from hardware and VSI handle list
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
+ * @vsi_ctx: pointer to a VSI context struct
+ * @keep_vsi_alloc: keep VSI allocation as part of this PF's resources
+ * @cd: pointer to command details structure or NULL
+ *
+ * Free VSI context info from hardware as well as from VSI handle list
+ */
+int
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	     bool keep_vsi_alloc, struct ice_sq_cd *cd)
+{
+	int status;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+	vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+	status = ice_aq_free_vsi(hw, vsi_ctx, keep_vsi_alloc, cd);
+	if (!status)
+		ice_clear_vsi_ctx(hw, vsi_handle);
+	return status;
+}
+
+/**
+ * ice_update_vsi
+ * @hw: pointer to the HW struct
+ * @vsi_handle: unique VSI handle
+ * @vsi_ctx: pointer to a VSI context struct
+ * @cd: pointer to command details structure or NULL
+ *
+ * Update VSI context in the hardware
+ */
+int
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd)
+{
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+	vsi_ctx->vsi_num = ice_get_hw_vsi_num(hw, vsi_handle);
+	return ice_aq_update_vsi(hw, vsi_ctx, cd);
+}
+
+/**
+ * ice_cfg_rdma_fltr - enable/disable RDMA filtering on VSI
+ * @hw: pointer to HW struct
+ * @vsi_handle: VSI SW index
+ * @enable: boolean for enable/disable
+ */
+int
+ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable)
+{
+	struct ice_vsi_ctx *ctx, *cached_ctx;
+	int status;
+
+	cached_ctx = ice_get_vsi_ctx(hw, vsi_handle);
+	if (!cached_ctx)
+		return -ENOENT;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->info.q_opt_rss = cached_ctx->info.q_opt_rss;
+	ctx->info.q_opt_tc = cached_ctx->info.q_opt_tc;
+	ctx->info.q_opt_flags = cached_ctx->info.q_opt_flags;
+
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+
+	if (enable)
+		ctx->info.q_opt_flags |= ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+	else
+		ctx->info.q_opt_flags &= ~ICE_AQ_VSI_Q_OPT_PE_FLTR_EN;
+
+	status = ice_update_vsi(hw, vsi_handle, ctx, NULL);
+	if (!status) {
+		cached_ctx->info.q_opt_flags = ctx->info.q_opt_flags;
+		cached_ctx->info.valid_sections |= ctx->info.valid_sections;
+	}
+
+	kfree(ctx);
+	return status;
+}
+
+/**
+ * ice_aq_alloc_free_vsi_list
+ * @hw: pointer to the HW struct
+ * @vsi_list_id: VSI list ID returned or used for lookup
+ * @lkup_type: switch rule filter lookup type
+ * @opc: switch rules population command type - pass in the command opcode
+ *
+ * allocates or free a VSI list resource
+ */
+static int
+ice_aq_alloc_free_vsi_list(struct ice_hw *hw, u16 *vsi_list_id,
+			   enum ice_sw_lkup_type lkup_type,
+			   enum ice_adminq_opc opc)
+{
+	struct ice_aqc_alloc_free_res_elem *sw_buf;
+	struct ice_aqc_res_elem *vsi_ele;
+	u16 buf_len;
+	int status;
+
+	buf_len = struct_size(sw_buf, elem, 1);
+	sw_buf = devm_kzalloc(ice_hw_to_dev(hw), buf_len, GFP_KERNEL);
+	if (!sw_buf)
+		return -ENOMEM;
+	sw_buf->num_elems = cpu_to_le16(1);
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_DFLT) {
+		sw_buf->res_type = cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_REP);
+	} else if (lkup_type == ICE_SW_LKUP_VLAN) {
+		if (opc == ice_aqc_opc_alloc_res)
+			sw_buf->res_type =
+				cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE |
+					    ICE_AQC_RES_TYPE_FLAG_SHARED);
+		else
+			sw_buf->res_type =
+				cpu_to_le16(ICE_AQC_RES_TYPE_VSI_LIST_PRUNE);
+	} else {
+		status = -EINVAL;
+		goto ice_aq_alloc_free_vsi_list_exit;
+	}
+
+	if (opc == ice_aqc_opc_free_res)
+		sw_buf->elem[0].e.sw_resp = cpu_to_le16(*vsi_list_id);
+
+	status = ice_aq_alloc_free_res(hw, sw_buf, buf_len, opc);
+	if (status)
+		goto ice_aq_alloc_free_vsi_list_exit;
+
+	if (opc == ice_aqc_opc_alloc_res) {
+		vsi_ele = &sw_buf->elem[0];
+		*vsi_list_id = le16_to_cpu(vsi_ele->e.sw_resp);
+	}
+
+ice_aq_alloc_free_vsi_list_exit:
+	devm_kfree(ice_hw_to_dev(hw), sw_buf);
+	return status;
+}
+
+/**
+ * ice_aq_sw_rules - add/update/remove switch rules
+ * @hw: pointer to the HW struct
+ * @rule_list: pointer to switch rule population list
+ * @rule_list_sz: total size of the rule list in bytes
+ * @num_rules: number of switch rules in the rule_list
+ * @opc: switch rules population command type - pass in the command opcode
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware
+ */
+int
+ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
+		u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd)
+{
+	struct ice_aq_desc desc;
+	int status;
+
+	if (opc != ice_aqc_opc_add_sw_rules &&
+	    opc != ice_aqc_opc_update_sw_rules &&
+	    opc != ice_aqc_opc_remove_sw_rules)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, opc);
+
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+	desc.params.sw_rules.num_rules_fltr_entry_index =
+		cpu_to_le16(num_rules);
+	status = ice_aq_send_cmd(hw, &desc, rule_list, rule_list_sz, cd);
+	if (opc != ice_aqc_opc_add_sw_rules &&
+	    hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT)
+		status = -ENOENT;
+
+	return status;
+}
+
+/**
+ * ice_aq_add_recipe - add switch recipe
+ * @hw: pointer to the HW struct
+ * @s_recipe_list: pointer to switch rule population list
+ * @num_recipes: number of switch recipes in the list
+ * @cd: pointer to command details structure or NULL
+ *
+ * Add(0x0290)
+ */
+int
+ice_aq_add_recipe(struct ice_hw *hw,
+		  struct ice_aqc_recipe_data_elem *s_recipe_list,
+		  u16 num_recipes, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_get_recipe *cmd;
+	struct ice_aq_desc desc;
+	u16 buf_size;
+
+	cmd = &desc.params.add_get_recipe;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_recipe);
+
+	cmd->num_sub_recipes = cpu_to_le16(num_recipes);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	buf_size = num_recipes * sizeof(*s_recipe_list);
+
+	return ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd);
+}
+
+/**
+ * ice_aq_get_recipe - get switch recipe
+ * @hw: pointer to the HW struct
+ * @s_recipe_list: pointer to switch rule population list
+ * @num_recipes: pointer to the number of recipes (input and output)
+ * @recipe_root: root recipe number of recipe(s) to retrieve
+ * @cd: pointer to command details structure or NULL
+ *
+ * Get(0x0292)
+ *
+ * On input, *num_recipes should equal the number of entries in s_recipe_list.
+ * On output, *num_recipes will equal the number of entries returned in
+ * s_recipe_list.
+ *
+ * The caller must supply enough space in s_recipe_list to hold all possible
+ * recipes and *num_recipes must equal ICE_MAX_NUM_RECIPES.
+ */
+int
+ice_aq_get_recipe(struct ice_hw *hw,
+		  struct ice_aqc_recipe_data_elem *s_recipe_list,
+		  u16 *num_recipes, u16 recipe_root, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_add_get_recipe *cmd;
+	struct ice_aq_desc desc;
+	u16 buf_size;
+	int status;
+
+	if (*num_recipes != ICE_MAX_NUM_RECIPES)
+		return -EINVAL;
+
+	cmd = &desc.params.add_get_recipe;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe);
+
+	cmd->return_index = cpu_to_le16(recipe_root);
+	cmd->num_sub_recipes = 0;
+
+	buf_size = *num_recipes * sizeof(*s_recipe_list);
+
+	status = ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd);
+	*num_recipes = le16_to_cpu(cmd->num_sub_recipes);
+
+	return status;
+}
+
+/**
+ * ice_update_recipe_lkup_idx - update a default recipe based on the lkup_idx
+ * @hw: pointer to the HW struct
+ * @params: parameters used to update the default recipe
+ *
+ * This function only supports updating default recipes and it only supports
+ * updating a single recipe based on the lkup_idx at a time.
+ *
+ * This is done as a read-modify-write operation. First, get the current recipe
+ * contents based on the recipe's ID. Then modify the field vector index and
+ * mask if it's valid at the lkup_idx. Finally, use the add recipe AQ to update
+ * the pre-existing recipe with the modifications.
+ */
+int
+ice_update_recipe_lkup_idx(struct ice_hw *hw,
+			   struct ice_update_recipe_lkup_idx_params *params)
+{
+	struct ice_aqc_recipe_data_elem *rcp_list;
+	u16 num_recps = ICE_MAX_NUM_RECIPES;
+	int status;
+
+	rcp_list = kcalloc(num_recps, sizeof(*rcp_list), GFP_KERNEL);
+	if (!rcp_list)
+		return -ENOMEM;
+
+	/* read current recipe list from firmware */
+	rcp_list->recipe_indx = params->rid;
+	status = ice_aq_get_recipe(hw, rcp_list, &num_recps, params->rid, NULL);
+	if (status) {
+		ice_debug(hw, ICE_DBG_SW, "Failed to get recipe %d, status %d\n",
+			  params->rid, status);
+		goto error_out;
+	}
+
+	/* only modify existing recipe's lkup_idx and mask if valid, while
+	 * leaving all other fields the same, then update the recipe firmware
+	 */
+	rcp_list->content.lkup_indx[params->lkup_idx] = params->fv_idx;
+	if (params->mask_valid)
+		rcp_list->content.mask[params->lkup_idx] =
+			cpu_to_le16(params->mask);
+
+	if (params->ignore_valid)
+		rcp_list->content.lkup_indx[params->lkup_idx] |=
+			ICE_AQ_RECIPE_LKUP_IGNORE;
+
+	status = ice_aq_add_recipe(hw, &rcp_list[0], 1, NULL);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW, "Failed to update recipe %d lkup_idx %d fv_idx %d mask %d mask_valid %s, status %d\n",
+			  params->rid, params->lkup_idx, params->fv_idx,
+			  params->mask, params->mask_valid ? "true" : "false",
+			  status);
+
+error_out:
+	kfree(rcp_list);
+	return status;
+}
+
+/**
+ * ice_aq_map_recipe_to_profile - Map recipe to packet profile
+ * @hw: pointer to the HW struct
+ * @profile_id: package profile ID to associate the recipe with
+ * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @cd: pointer to command details structure or NULL
+ * Recipe to profile association (0x0291)
+ */
+int
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+			     struct ice_sq_cd *cd)
+{
+	struct ice_aqc_recipe_to_profile *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.recipe_to_profile;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_recipe_to_profile);
+	cmd->profile_id = cpu_to_le16(profile_id);
+	/* Set the recipe ID bit in the bitmask to let the device know which
+	 * profile we are associating the recipe to
+	 */
+	memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc));
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
+ * ice_aq_get_recipe_to_profile - Map recipe to packet profile
+ * @hw: pointer to the HW struct
+ * @profile_id: package profile ID to associate the recipe with
+ * @r_bitmap: Recipe bitmap filled in and need to be returned as response
+ * @cd: pointer to command details structure or NULL
+ * Associate profile ID with given recipe (0x0293)
+ */
+int
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+			     struct ice_sq_cd *cd)
+{
+	struct ice_aqc_recipe_to_profile *cmd;
+	struct ice_aq_desc desc;
+	int status;
+
+	cmd = &desc.params.recipe_to_profile;
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe_to_profile);
+	cmd->profile_id = cpu_to_le16(profile_id);
+
+	status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+	if (!status)
+		memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc));
+
+	return status;
+}
+
+/**
+ * ice_alloc_recipe - add recipe resource
+ * @hw: pointer to the hardware structure
+ * @rid: recipe ID returned as response to AQ call
+ */
+int ice_alloc_recipe(struct ice_hw *hw, u16 *rid)
+{
+	struct ice_aqc_alloc_free_res_elem *sw_buf;
+	u16 buf_len;
+	int status;
+
+	buf_len = struct_size(sw_buf, elem, 1);
+	sw_buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!sw_buf)
+		return -ENOMEM;
+
+	sw_buf->num_elems = cpu_to_le16(1);
+	sw_buf->res_type = cpu_to_le16((ICE_AQC_RES_TYPE_RECIPE <<
+					ICE_AQC_RES_TYPE_S) |
+					ICE_AQC_RES_TYPE_FLAG_SHARED);
+	status = ice_aq_alloc_free_res(hw, sw_buf, buf_len,
+				       ice_aqc_opc_alloc_res);
+	if (!status)
+		*rid = le16_to_cpu(sw_buf->elem[0].e.sw_resp);
+	kfree(sw_buf);
+
+	return status;
+}
+
+/**
+ * ice_get_recp_to_prof_map - updates recipe to profile mapping
+ * @hw: pointer to hardware structure
+ *
+ * This function is used to populate recipe_to_profile matrix where index to
+ * this array is the recipe ID and the element is the mapping of which profiles
+ * is this recipe mapped to.
+ */
+static void ice_get_recp_to_prof_map(struct ice_hw *hw)
+{
+	DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+	u16 i;
+
+	for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) {
+		u16 j;
+
+		bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES);
+		bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES);
+		if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL))
+			continue;
+		bitmap_copy(profile_to_recipe[i], r_bitmap,
+			    ICE_MAX_NUM_RECIPES);
+		for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES)
+			set_bit(i, recipe_to_profile[j]);
+	}
+}
+
+/**
+ * ice_collect_result_idx - copy result index values
+ * @buf: buffer that contains the result index
+ * @recp: the recipe struct to copy data into
+ */
+static void
+ice_collect_result_idx(struct ice_aqc_recipe_data_elem *buf,
+		       struct ice_sw_recipe *recp)
+{
+	if (buf->content.result_indx & ICE_AQ_RECIPE_RESULT_EN)
+		set_bit(buf->content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN,
+			recp->res_idxs);
+}
+
+/**
+ * ice_get_recp_frm_fw - update SW bookkeeping from FW recipe entries
+ * @hw: pointer to hardware structure
+ * @recps: struct that we need to populate
+ * @rid: recipe ID that we are populating
+ * @refresh_required: true if we should get recipe to profile mapping from FW
+ *
+ * This function is used to populate all the necessary entries into our
+ * bookkeeping so that we have a current list of all the recipes that are
+ * programmed in the firmware.
+ */
+static int
+ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid,
+		    bool *refresh_required)
+{
+	DECLARE_BITMAP(result_bm, ICE_MAX_FV_WORDS);
+	struct ice_aqc_recipe_data_elem *tmp;
+	u16 num_recps = ICE_MAX_NUM_RECIPES;
+	struct ice_prot_lkup_ext *lkup_exts;
+	u8 fv_word_idx = 0;
+	u16 sub_recps;
+	int status;
+
+	bitmap_zero(result_bm, ICE_MAX_FV_WORDS);
+
+	/* we need a buffer big enough to accommodate all the recipes */
+	tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp[0].recipe_indx = rid;
+	status = ice_aq_get_recipe(hw, tmp, &num_recps, rid, NULL);
+	/* non-zero status meaning recipe doesn't exist */
+	if (status)
+		goto err_unroll;
+
+	/* Get recipe to profile map so that we can get the fv from lkups that
+	 * we read for a recipe from FW. Since we want to minimize the number of
+	 * times we make this FW call, just make one call and cache the copy
+	 * until a new recipe is added. This operation is only required the
+	 * first time to get the changes from FW. Then to search existing
+	 * entries we don't need to update the cache again until another recipe
+	 * gets added.
+	 */
+	if (*refresh_required) {
+		ice_get_recp_to_prof_map(hw);
+		*refresh_required = false;
+	}
+
+	/* Start populating all the entries for recps[rid] based on lkups from
+	 * firmware. Note that we are only creating the root recipe in our
+	 * database.
+	 */
+	lkup_exts = &recps[rid].lkup_exts;
+
+	for (sub_recps = 0; sub_recps < num_recps; sub_recps++) {
+		struct ice_aqc_recipe_data_elem root_bufs = tmp[sub_recps];
+		struct ice_recp_grp_entry *rg_entry;
+		u8 i, prof, idx, prot = 0;
+		bool is_root;
+		u16 off = 0;
+
+		rg_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rg_entry),
+					GFP_KERNEL);
+		if (!rg_entry) {
+			status = -ENOMEM;
+			goto err_unroll;
+		}
+
+		idx = root_bufs.recipe_indx;
+		is_root = root_bufs.content.rid & ICE_AQ_RECIPE_ID_IS_ROOT;
+
+		/* Mark all result indices in this chain */
+		if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN)
+			set_bit(root_bufs.content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN,
+				result_bm);
+
+		/* get the first profile that is associated with rid */
+		prof = find_first_bit(recipe_to_profile[idx],
+				      ICE_MAX_NUM_PROFILES);
+		for (i = 0; i < ICE_NUM_WORDS_RECIPE; i++) {
+			u8 lkup_indx = root_bufs.content.lkup_indx[i + 1];
+
+			rg_entry->fv_idx[i] = lkup_indx;
+			rg_entry->fv_mask[i] =
+				le16_to_cpu(root_bufs.content.mask[i + 1]);
+
+			/* If the recipe is a chained recipe then all its
+			 * child recipe's result will have a result index.
+			 * To fill fv_words we should not use those result
+			 * index, we only need the protocol ids and offsets.
+			 * We will skip all the fv_idx which stores result
+			 * index in them. We also need to skip any fv_idx which
+			 * has ICE_AQ_RECIPE_LKUP_IGNORE or 0 since it isn't a
+			 * valid offset value.
+			 */
+			if (test_bit(rg_entry->fv_idx[i], hw->switch_info->prof_res_bm[prof]) ||
+			    rg_entry->fv_idx[i] & ICE_AQ_RECIPE_LKUP_IGNORE ||
+			    rg_entry->fv_idx[i] == 0)
+				continue;
+
+			ice_find_prot_off(hw, ICE_BLK_SW, prof,
+					  rg_entry->fv_idx[i], &prot, &off);
+			lkup_exts->fv_words[fv_word_idx].prot_id = prot;
+			lkup_exts->fv_words[fv_word_idx].off = off;
+			lkup_exts->field_mask[fv_word_idx] =
+				rg_entry->fv_mask[i];
+			fv_word_idx++;
+		}
+		/* populate rg_list with the data from the child entry of this
+		 * recipe
+		 */
+		list_add(&rg_entry->l_entry, &recps[rid].rg_list);
+
+		/* Propagate some data to the recipe database */
+		recps[idx].is_root = !!is_root;
+		recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority;
+		recps[idx].need_pass_l2 = root_bufs.content.act_ctrl &
+					  ICE_AQ_RECIPE_ACT_NEED_PASS_L2;
+		recps[idx].allow_pass_l2 = root_bufs.content.act_ctrl &
+					   ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2;
+		bitmap_zero(recps[idx].res_idxs, ICE_MAX_FV_WORDS);
+		if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN) {
+			recps[idx].chain_idx = root_bufs.content.result_indx &
+				~ICE_AQ_RECIPE_RESULT_EN;
+			set_bit(recps[idx].chain_idx, recps[idx].res_idxs);
+		} else {
+			recps[idx].chain_idx = ICE_INVAL_CHAIN_IND;
+		}
+
+		if (!is_root)
+			continue;
+
+		/* Only do the following for root recipes entries */
+		memcpy(recps[idx].r_bitmap, root_bufs.recipe_bitmap,
+		       sizeof(recps[idx].r_bitmap));
+		recps[idx].root_rid = root_bufs.content.rid &
+			~ICE_AQ_RECIPE_ID_IS_ROOT;
+		recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority;
+	}
+
+	/* Complete initialization of the root recipe entry */
+	lkup_exts->n_val_words = fv_word_idx;
+	recps[rid].big_recp = (num_recps > 1);
+	recps[rid].n_grp_count = (u8)num_recps;
+	recps[rid].root_buf = devm_kmemdup(ice_hw_to_dev(hw), tmp,
+					   recps[rid].n_grp_count * sizeof(*recps[rid].root_buf),
+					   GFP_KERNEL);
+	if (!recps[rid].root_buf) {
+		status = -ENOMEM;
+		goto err_unroll;
+	}
+
+	/* Copy result indexes */
+	bitmap_copy(recps[rid].res_idxs, result_bm, ICE_MAX_FV_WORDS);
+	recps[rid].recp_created = true;
+
+err_unroll:
+	kfree(tmp);
+	return status;
+}
+
+/* ice_init_port_info - Initialize port_info with switch configuration data
+ * @pi: pointer to port_info
+ * @vsi_port_num: VSI number or port number
+ * @type: Type of switch element (port or VSI)
+ * @swid: switch ID of the switch the element is attached to
+ * @pf_vf_num: PF or VF number
+ * @is_vf: true if the element is a VF, false otherwise
+ */
+static void
+ice_init_port_info(struct ice_port_info *pi, u16 vsi_port_num, u8 type,
+		   u16 swid, u16 pf_vf_num, bool is_vf)
+{
+	switch (type) {
+	case ICE_AQC_GET_SW_CONF_RESP_PHYS_PORT:
+		pi->lport = (u8)(vsi_port_num & ICE_LPORT_MASK);
+		pi->sw_id = swid;
+		pi->pf_vf_num = pf_vf_num;
+		pi->is_vf = is_vf;
+		break;
+	default:
+		ice_debug(pi->hw, ICE_DBG_SW, "incorrect VSI/port type received\n");
+		break;
+	}
+}
+
+/* ice_get_initial_sw_cfg - Get initial port and default VSI data
+ * @hw: pointer to the hardware structure
+ */
+int ice_get_initial_sw_cfg(struct ice_hw *hw)
+{
+	struct ice_aqc_get_sw_cfg_resp_elem *rbuf;
+	u16 req_desc = 0;
+	u16 num_elems;
+	int status;
+	u16 i;
+
+	rbuf = kzalloc(ICE_SW_CFG_MAX_BUF_LEN, GFP_KERNEL);
+	if (!rbuf)
+		return -ENOMEM;
+
+	/* Multiple calls to ice_aq_get_sw_cfg may be required
+	 * to get all the switch configuration information. The need
+	 * for additional calls is indicated by ice_aq_get_sw_cfg
+	 * writing a non-zero value in req_desc
+	 */
+	do {
+		struct ice_aqc_get_sw_cfg_resp_elem *ele;
+
+		status = ice_aq_get_sw_cfg(hw, rbuf, ICE_SW_CFG_MAX_BUF_LEN,
+					   &req_desc, &num_elems, NULL);
+
+		if (status)
+			break;
+
+		for (i = 0, ele = rbuf; i < num_elems; i++, ele++) {
+			u16 pf_vf_num, swid, vsi_port_num;
+			bool is_vf = false;
+			u8 res_type;
+
+			vsi_port_num = le16_to_cpu(ele->vsi_port_num) &
+				ICE_AQC_GET_SW_CONF_RESP_VSI_PORT_NUM_M;
+
+			pf_vf_num = le16_to_cpu(ele->pf_vf_num) &
+				ICE_AQC_GET_SW_CONF_RESP_FUNC_NUM_M;
+
+			swid = le16_to_cpu(ele->swid);
+
+			if (le16_to_cpu(ele->pf_vf_num) &
+			    ICE_AQC_GET_SW_CONF_RESP_IS_VF)
+				is_vf = true;
+
+			res_type = (u8)(le16_to_cpu(ele->vsi_port_num) >>
+					ICE_AQC_GET_SW_CONF_RESP_TYPE_S);
+
+			if (res_type == ICE_AQC_GET_SW_CONF_RESP_VSI) {
+				/* FW VSI is not needed. Just continue. */
+				continue;
+			}
+
+			ice_init_port_info(hw->port_info, vsi_port_num,
+					   res_type, swid, pf_vf_num, is_vf);
+		}
+	} while (req_desc && !status);
+
+	kfree(rbuf);
+	return status;
+}
+
+/**
+ * ice_fill_sw_info - Helper function to populate lb_en and lan_en
+ * @hw: pointer to the hardware structure
+ * @fi: filter info structure to fill/update
+ *
+ * This helper function populates the lb_en and lan_en elements of the provided
+ * ice_fltr_info struct using the switch's type and characteristics of the
+ * switch rule being configured.
+ */
+static void ice_fill_sw_info(struct ice_hw *hw, struct ice_fltr_info *fi)
+{
+	fi->lb_en = false;
+	fi->lan_en = false;
+	if ((fi->flag & ICE_FLTR_TX) &&
+	    (fi->fltr_act == ICE_FWD_TO_VSI ||
+	     fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+	     fi->fltr_act == ICE_FWD_TO_Q ||
+	     fi->fltr_act == ICE_FWD_TO_QGRP)) {
+		/* Setting LB for prune actions will result in replicated
+		 * packets to the internal switch that will be dropped.
+		 */
+		if (fi->lkup_type != ICE_SW_LKUP_VLAN)
+			fi->lb_en = true;
+
+		/* Set lan_en to TRUE if
+		 * 1. The switch is a VEB AND
+		 * 2
+		 * 2.1 The lookup is a directional lookup like ethertype,
+		 * promiscuous, ethertype-MAC, promiscuous-VLAN
+		 * and default-port OR
+		 * 2.2 The lookup is VLAN, OR
+		 * 2.3 The lookup is MAC with mcast or bcast addr for MAC, OR
+		 * 2.4 The lookup is MAC_VLAN with mcast or bcast addr for MAC.
+		 *
+		 * OR
+		 *
+		 * The switch is a VEPA.
+		 *
+		 * In all other cases, the LAN enable has to be set to false.
+		 */
+		if (hw->evb_veb) {
+			if (fi->lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+			    fi->lkup_type == ICE_SW_LKUP_PROMISC ||
+			    fi->lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+			    fi->lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+			    fi->lkup_type == ICE_SW_LKUP_DFLT ||
+			    fi->lkup_type == ICE_SW_LKUP_VLAN ||
+			    (fi->lkup_type == ICE_SW_LKUP_MAC &&
+			     !is_unicast_ether_addr(fi->l_data.mac.mac_addr)) ||
+			    (fi->lkup_type == ICE_SW_LKUP_MAC_VLAN &&
+			     !is_unicast_ether_addr(fi->l_data.mac.mac_addr)))
+				fi->lan_en = true;
+		} else {
+			fi->lan_en = true;
+		}
+	}
+}
+
+/**
+ * ice_fill_eth_hdr - helper to copy dummy_eth_hdr into supplied buffer
+ * @eth_hdr: pointer to buffer to populate
+ */
+void ice_fill_eth_hdr(u8 *eth_hdr)
+{
+	memcpy(eth_hdr, dummy_eth_header, DUMMY_ETH_HDR_LEN);
+}
+
+/**
+ * ice_fill_sw_rule - Helper function to fill switch rule structure
+ * @hw: pointer to the hardware structure
+ * @f_info: entry containing packet forwarding information
+ * @s_rule: switch rule structure to be filled in based on mac_entry
+ * @opc: switch rules population command type - pass in the command opcode
+ */
+static void
+ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
+		 struct ice_sw_rule_lkup_rx_tx *s_rule,
+		 enum ice_adminq_opc opc)
+{
+	u16 vlan_id = ICE_MAX_VLAN_ID + 1;
+	u16 vlan_tpid = ETH_P_8021Q;
+	void *daddr = NULL;
+	u16 eth_hdr_sz;
+	u8 *eth_hdr;
+	u32 act = 0;
+	__be16 *off;
+	u8 q_rgn;
+
+	if (opc == ice_aqc_opc_remove_sw_rules) {
+		s_rule->act = 0;
+		s_rule->index = cpu_to_le16(f_info->fltr_rule_id);
+		s_rule->hdr_len = 0;
+		return;
+	}
+
+	eth_hdr_sz = sizeof(dummy_eth_header);
+	eth_hdr = s_rule->hdr_data;
+
+	/* initialize the ether header with a dummy header */
+	memcpy(eth_hdr, dummy_eth_header, eth_hdr_sz);
+	ice_fill_sw_info(hw, f_info);
+
+	switch (f_info->fltr_act) {
+	case ICE_FWD_TO_VSI:
+		act |= (f_info->fwd_id.hw_vsi_id << ICE_SINGLE_ACT_VSI_ID_S) &
+			ICE_SINGLE_ACT_VSI_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_VSI_LIST:
+		act |= ICE_SINGLE_ACT_VSI_LIST;
+		act |= (f_info->fwd_id.vsi_list_id <<
+			ICE_SINGLE_ACT_VSI_LIST_ID_S) &
+			ICE_SINGLE_ACT_VSI_LIST_ID_M;
+		if (f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			act |= ICE_SINGLE_ACT_VSI_FORWARDING |
+				ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_Q:
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+			ICE_SINGLE_ACT_Q_INDEX_M;
+		break;
+	case ICE_DROP_PACKET:
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP |
+			ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_QGRP:
+		q_rgn = f_info->qgrp_size > 0 ?
+			(u8)ilog2(f_info->qgrp_size) : 0;
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (f_info->fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+			ICE_SINGLE_ACT_Q_INDEX_M;
+		act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) &
+			ICE_SINGLE_ACT_Q_REGION_M;
+		break;
+	default:
+		return;
+	}
+
+	if (f_info->lb_en)
+		act |= ICE_SINGLE_ACT_LB_ENABLE;
+	if (f_info->lan_en)
+		act |= ICE_SINGLE_ACT_LAN_ENABLE;
+
+	switch (f_info->lkup_type) {
+	case ICE_SW_LKUP_MAC:
+		daddr = f_info->l_data.mac.mac_addr;
+		break;
+	case ICE_SW_LKUP_VLAN:
+		vlan_id = f_info->l_data.vlan.vlan_id;
+		if (f_info->l_data.vlan.tpid_valid)
+			vlan_tpid = f_info->l_data.vlan.tpid;
+		if (f_info->fltr_act == ICE_FWD_TO_VSI ||
+		    f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+			act |= ICE_SINGLE_ACT_PRUNE;
+			act |= ICE_SINGLE_ACT_EGRESS | ICE_SINGLE_ACT_INGRESS;
+		}
+		break;
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+		daddr = f_info->l_data.ethertype_mac.mac_addr;
+		fallthrough;
+	case ICE_SW_LKUP_ETHERTYPE:
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+		*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
+		break;
+	case ICE_SW_LKUP_MAC_VLAN:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		break;
+	case ICE_SW_LKUP_PROMISC_VLAN:
+		vlan_id = f_info->l_data.mac_vlan.vlan_id;
+		fallthrough;
+	case ICE_SW_LKUP_PROMISC:
+		daddr = f_info->l_data.mac_vlan.mac_addr;
+		break;
+	default:
+		break;
+	}
+
+	s_rule->hdr.type = (f_info->flag & ICE_FLTR_RX) ?
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX) :
+		cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
+
+	/* Recipe set depending on lookup type */
+	s_rule->recipe_id = cpu_to_le16(f_info->lkup_type);
+	s_rule->src = cpu_to_le16(f_info->src);
+	s_rule->act = cpu_to_le32(act);
+
+	if (daddr)
+		ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr);
+
+	if (!(vlan_id > ICE_MAX_VLAN_ID)) {
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
+		*off = cpu_to_be16(vlan_id);
+		off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+		*off = cpu_to_be16(vlan_tpid);
+	}
+
+	/* Create the switch rule with the final dummy Ethernet header */
+	if (opc != ice_aqc_opc_update_sw_rules)
+		s_rule->hdr_len = cpu_to_le16(eth_hdr_sz);
+}
+
+/**
+ * ice_add_marker_act
+ * @hw: pointer to the hardware structure
+ * @m_ent: the management entry for which sw marker needs to be added
+ * @sw_marker: sw marker to tag the Rx descriptor with
+ * @l_id: large action resource ID
+ *
+ * Create a large action to hold software marker and update the switch rule
+ * entry pointed by m_ent with newly created large action
+ */
+static int
+ice_add_marker_act(struct ice_hw *hw, struct ice_fltr_mgmt_list_entry *m_ent,
+		   u16 sw_marker, u16 l_id)
+{
+	struct ice_sw_rule_lkup_rx_tx *rx_tx;
+	struct ice_sw_rule_lg_act *lg_act;
+	/* For software marker we need 3 large actions
+	 * 1. FWD action: FWD TO VSI or VSI LIST
+	 * 2. GENERIC VALUE action to hold the profile ID
+	 * 3. GENERIC VALUE action to hold the software marker ID
+	 */
+	const u16 num_lg_acts = 3;
+	u16 lg_act_size;
+	u16 rules_size;
+	int status;
+	u32 act;
+	u16 id;
+
+	if (m_ent->fltr_info.lkup_type != ICE_SW_LKUP_MAC)
+		return -EINVAL;
+
+	/* Create two back-to-back switch rules and submit them to the HW using
+	 * one memory buffer:
+	 *    1. Large Action
+	 *    2. Look up Tx Rx
+	 */
+	lg_act_size = (u16)ICE_SW_RULE_LG_ACT_SIZE(lg_act, num_lg_acts);
+	rules_size = lg_act_size + ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(rx_tx);
+	lg_act = devm_kzalloc(ice_hw_to_dev(hw), rules_size, GFP_KERNEL);
+	if (!lg_act)
+		return -ENOMEM;
+
+	rx_tx = (typeof(rx_tx))((u8 *)lg_act + lg_act_size);
+
+	/* Fill in the first switch rule i.e. large action */
+	lg_act->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LG_ACT);
+	lg_act->index = cpu_to_le16(l_id);
+	lg_act->size = cpu_to_le16(num_lg_acts);
+
+	/* First action VSI forwarding or VSI list forwarding depending on how
+	 * many VSIs
+	 */
+	id = (m_ent->vsi_count > 1) ? m_ent->fltr_info.fwd_id.vsi_list_id :
+		m_ent->fltr_info.fwd_id.hw_vsi_id;
+
+	act = ICE_LG_ACT_VSI_FORWARDING | ICE_LG_ACT_VALID_BIT;
+	act |= (id << ICE_LG_ACT_VSI_LIST_ID_S) & ICE_LG_ACT_VSI_LIST_ID_M;
+	if (m_ent->vsi_count > 1)
+		act |= ICE_LG_ACT_VSI_LIST;
+	lg_act->act[0] = cpu_to_le32(act);
+
+	/* Second action descriptor type */
+	act = ICE_LG_ACT_GENERIC;
+
+	act |= (1 << ICE_LG_ACT_GENERIC_VALUE_S) & ICE_LG_ACT_GENERIC_VALUE_M;
+	lg_act->act[1] = cpu_to_le32(act);
+
+	act = (ICE_LG_ACT_GENERIC_OFF_RX_DESC_PROF_IDX <<
+	       ICE_LG_ACT_GENERIC_OFFSET_S) & ICE_LG_ACT_GENERIC_OFFSET_M;
+
+	/* Third action Marker value */
+	act |= ICE_LG_ACT_GENERIC;
+	act |= (sw_marker << ICE_LG_ACT_GENERIC_VALUE_S) &
+		ICE_LG_ACT_GENERIC_VALUE_M;
+
+	lg_act->act[2] = cpu_to_le32(act);
+
+	/* call the fill switch rule to fill the lookup Tx Rx structure */
+	ice_fill_sw_rule(hw, &m_ent->fltr_info, rx_tx,
+			 ice_aqc_opc_update_sw_rules);
+
+	/* Update the action to point to the large action ID */
+	rx_tx->act = cpu_to_le32(ICE_SINGLE_ACT_PTR |
+				 ((l_id << ICE_SINGLE_ACT_PTR_VAL_S) &
+				  ICE_SINGLE_ACT_PTR_VAL_M));
+
+	/* Use the filter rule ID of the previously created rule with single
+	 * act. Once the update happens, hardware will treat this as large
+	 * action
+	 */
+	rx_tx->index = cpu_to_le16(m_ent->fltr_info.fltr_rule_id);
+
+	status = ice_aq_sw_rules(hw, lg_act, rules_size, 2,
+				 ice_aqc_opc_update_sw_rules, NULL);
+	if (!status) {
+		m_ent->lg_act_idx = l_id;
+		m_ent->sw_marker_id = sw_marker;
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), lg_act);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_map
+ * @hw: pointer to the hardware structure
+ * @vsi_handle_arr: array of VSI handles to set in the VSI mapping
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ *
+ * Helper function to create a new entry of VSI list ID to VSI mapping
+ * using the given VSI list ID
+ */
+static struct ice_vsi_list_map_info *
+ice_create_vsi_list_map(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+			u16 vsi_list_id)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_vsi_list_map_info *v_map;
+	int i;
+
+	v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL);
+	if (!v_map)
+		return NULL;
+
+	v_map->vsi_list_id = vsi_list_id;
+	v_map->ref_cnt = 1;
+	for (i = 0; i < num_vsi; i++)
+		set_bit(vsi_handle_arr[i], v_map->vsi_map);
+
+	list_add(&v_map->list_entry, &sw->vsi_list_map_head);
+	return v_map;
+}
+
+/**
+ * ice_update_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ * @remove: Boolean value to indicate if this is a remove action
+ * @opc: switch rules population command type - pass in the command opcode
+ * @lkup_type: lookup type of the filter
+ *
+ * Call AQ command to add a new switch rule or update existing switch rule
+ * using the given VSI list ID
+ */
+static int
+ice_update_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+			 u16 vsi_list_id, bool remove, enum ice_adminq_opc opc,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_sw_rule_vsi_list *s_rule;
+	u16 s_rule_size;
+	u16 rule_type;
+	int status;
+	int i;
+
+	if (!num_vsi)
+		return -EINVAL;
+
+	if (lkup_type == ICE_SW_LKUP_MAC ||
+	    lkup_type == ICE_SW_LKUP_MAC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE ||
+	    lkup_type == ICE_SW_LKUP_ETHERTYPE_MAC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC ||
+	    lkup_type == ICE_SW_LKUP_PROMISC_VLAN ||
+	    lkup_type == ICE_SW_LKUP_DFLT)
+		rule_type = remove ? ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR :
+			ICE_AQC_SW_RULES_T_VSI_LIST_SET;
+	else if (lkup_type == ICE_SW_LKUP_VLAN)
+		rule_type = remove ? ICE_AQC_SW_RULES_T_PRUNE_LIST_CLEAR :
+			ICE_AQC_SW_RULES_T_PRUNE_LIST_SET;
+	else
+		return -EINVAL;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, num_vsi);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return -ENOMEM;
+	for (i = 0; i < num_vsi; i++) {
+		if (!ice_is_vsi_valid(hw, vsi_handle_arr[i])) {
+			status = -EINVAL;
+			goto exit;
+		}
+		/* AQ call requires hw_vsi_id(s) */
+		s_rule->vsi[i] =
+			cpu_to_le16(ice_get_hw_vsi_num(hw, vsi_handle_arr[i]));
+	}
+
+	s_rule->hdr.type = cpu_to_le16(rule_type);
+	s_rule->number_vsi = cpu_to_le16(num_vsi);
+	s_rule->index = cpu_to_le16(vsi_list_id);
+
+	status = ice_aq_sw_rules(hw, s_rule, s_rule_size, 1, opc, NULL);
+
+exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_create_vsi_list_rule - Creates and populates a VSI list rule
+ * @hw: pointer to the HW struct
+ * @vsi_handle_arr: array of VSI handles to form a VSI list
+ * @num_vsi: number of VSI handles in the array
+ * @vsi_list_id: stores the ID of the VSI list to be created
+ * @lkup_type: switch rule filter's lookup type
+ */
+static int
+ice_create_vsi_list_rule(struct ice_hw *hw, u16 *vsi_handle_arr, u16 num_vsi,
+			 u16 *vsi_list_id, enum ice_sw_lkup_type lkup_type)
+{
+	int status;
+
+	status = ice_aq_alloc_free_vsi_list(hw, vsi_list_id, lkup_type,
+					    ice_aqc_opc_alloc_res);
+	if (status)
+		return status;
+
+	/* Update the newly created VSI list to include the specified VSIs */
+	return ice_update_vsi_list_rule(hw, vsi_handle_arr, num_vsi,
+					*vsi_list_id, false,
+					ice_aqc_opc_add_sw_rules, lkup_type);
+}
+
+/**
+ * ice_create_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: entry containing packet forwarding information
+ *
+ * Create switch rule with given filter information and add an entry
+ * to the corresponding filter management list to track this switch rule
+ * and VSI mapping
+ */
+static int
+ice_create_pkt_fwd_rule(struct ice_hw *hw,
+			struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	struct ice_sw_rule_lkup_rx_tx *s_rule;
+	enum ice_sw_lkup_type l_type;
+	struct ice_sw_recipe *recp;
+	int status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule),
+			      GFP_KERNEL);
+	if (!s_rule)
+		return -ENOMEM;
+	fm_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*fm_entry),
+				GFP_KERNEL);
+	if (!fm_entry) {
+		status = -ENOMEM;
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	fm_entry->fltr_info = f_entry->fltr_info;
+
+	/* Initialize all the fields for the management entry */
+	fm_entry->vsi_count = 1;
+	fm_entry->lg_act_idx = ICE_INVAL_LG_ACT_INDEX;
+	fm_entry->sw_marker_id = ICE_INVAL_SW_MARKER_ID;
+	fm_entry->counter_index = ICE_INVAL_COUNTER_ID;
+
+	ice_fill_sw_rule(hw, &fm_entry->fltr_info, s_rule,
+			 ice_aqc_opc_add_sw_rules);
+
+	status = ice_aq_sw_rules(hw, s_rule,
+				 ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), 1,
+				 ice_aqc_opc_add_sw_rules, NULL);
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+		goto ice_create_pkt_fwd_rule_exit;
+	}
+
+	f_entry->fltr_info.fltr_rule_id = le16_to_cpu(s_rule->index);
+	fm_entry->fltr_info.fltr_rule_id = le16_to_cpu(s_rule->index);
+
+	/* The book keeping entries will get removed when base driver
+	 * calls remove filter AQ command
+	 */
+	l_type = fm_entry->fltr_info.lkup_type;
+	recp = &hw->switch_info->recp_list[l_type];
+	list_add(&fm_entry->list_entry, &recp->filt_rules);
+
+ice_create_pkt_fwd_rule_exit:
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_update_pkt_fwd_rule
+ * @hw: pointer to the hardware structure
+ * @f_info: filter information for switch rule
+ *
+ * Call AQ command to update a previously created switch rule with a
+ * VSI list ID
+ */
+static int
+ice_update_pkt_fwd_rule(struct ice_hw *hw, struct ice_fltr_info *f_info)
+{
+	struct ice_sw_rule_lkup_rx_tx *s_rule;
+	int status;
+
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+			      ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule),
+			      GFP_KERNEL);
+	if (!s_rule)
+		return -ENOMEM;
+
+	ice_fill_sw_rule(hw, f_info, s_rule, ice_aqc_opc_update_sw_rules);
+
+	s_rule->index = cpu_to_le16(f_info->fltr_rule_id);
+
+	/* Update switch rule with new rule set to forward VSI list */
+	status = ice_aq_sw_rules(hw, s_rule,
+				 ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule), 1,
+				 ice_aqc_opc_update_sw_rules, NULL);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_update_sw_rule_bridge_mode
+ * @hw: pointer to the HW struct
+ *
+ * Updates unicast switch filter rules based on VEB/VEPA mode
+ */
+int ice_update_sw_rule_bridge_mode(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	struct list_head *rule_head;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	int status = 0;
+
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock;
+	rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules;
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(fm_entry, rule_head, list_entry) {
+		struct ice_fltr_info *fi = &fm_entry->fltr_info;
+		u8 *addr = fi->l_data.mac.mac_addr;
+
+		/* Update unicast Tx rules to reflect the selected
+		 * VEB/VEPA mode
+		 */
+		if ((fi->flag & ICE_FLTR_TX) && is_unicast_ether_addr(addr) &&
+		    (fi->fltr_act == ICE_FWD_TO_VSI ||
+		     fi->fltr_act == ICE_FWD_TO_VSI_LIST ||
+		     fi->fltr_act == ICE_FWD_TO_Q ||
+		     fi->fltr_act == ICE_FWD_TO_QGRP)) {
+			status = ice_update_pkt_fwd_rule(hw, fi);
+			if (status)
+				break;
+		}
+	}
+
+	mutex_unlock(rule_lock);
+
+	return status;
+}
+
+/**
+ * ice_add_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @m_entry: pointer to current filter management list entry
+ * @cur_fltr: filter information from the book keeping entry
+ * @new_fltr: filter information with the new VSI to be added
+ *
+ * Call AQ command to add or update previously created VSI list with new VSI.
+ *
+ * Helper function to do book keeping associated with adding filter information
+ * The algorithm to do the book keeping is described below :
+ * When a VSI needs to subscribe to a given filter (MAC/VLAN/Ethtype etc.)
+ *	if only one VSI has been added till now
+ *		Allocate a new VSI list and add two VSIs
+ *		to this list using switch rule command
+ *		Update the previously created switch rule with the
+ *		newly created VSI list ID
+ *	if a VSI list was previously created
+ *		Add the new VSI to the previously created VSI list set
+ *		using the update switch rule command
+ */
+static int
+ice_add_update_vsi_list(struct ice_hw *hw,
+			struct ice_fltr_mgmt_list_entry *m_entry,
+			struct ice_fltr_info *cur_fltr,
+			struct ice_fltr_info *new_fltr)
+{
+	u16 vsi_list_id = 0;
+	int status = 0;
+
+	if ((cur_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_QGRP))
+		return -EOPNOTSUPP;
+
+	if ((new_fltr->fltr_act == ICE_FWD_TO_Q ||
+	     new_fltr->fltr_act == ICE_FWD_TO_QGRP) &&
+	    (cur_fltr->fltr_act == ICE_FWD_TO_VSI ||
+	     cur_fltr->fltr_act == ICE_FWD_TO_VSI_LIST))
+		return -EOPNOTSUPP;
+
+	if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) {
+		/* Only one entry existed in the mapping and it was not already
+		 * a part of a VSI list. So, create a VSI list with the old and
+		 * new VSIs.
+		 */
+		struct ice_fltr_info tmp_fltr;
+		u16 vsi_handle_arr[2];
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_fltr->fwd_id.hw_vsi_id == new_fltr->fwd_id.hw_vsi_id)
+			return -EEXIST;
+
+		vsi_handle_arr[0] = cur_fltr->vsi_handle;
+		vsi_handle_arr[1] = new_fltr->vsi_handle;
+		status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+						  &vsi_list_id,
+						  new_fltr->lkup_type);
+		if (status)
+			return status;
+
+		tmp_fltr = *new_fltr;
+		tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+		tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+		/* Update the previous switch rule of "MAC forward to VSI" to
+		 * "MAC fwd to VSI list"
+		 */
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+		if (status)
+			return status;
+
+		cur_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		cur_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+		m_entry->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+						vsi_list_id);
+
+		if (!m_entry->vsi_list_info)
+			return -ENOMEM;
+
+		/* If this entry was large action then the large action needs
+		 * to be updated to point to FWD to VSI list
+		 */
+		if (m_entry->sw_marker_id != ICE_INVAL_SW_MARKER_ID)
+			status =
+			    ice_add_marker_act(hw, m_entry,
+					       m_entry->sw_marker_id,
+					       m_entry->lg_act_idx);
+	} else {
+		u16 vsi_handle = new_fltr->vsi_handle;
+		enum ice_adminq_opc opcode;
+
+		if (!m_entry->vsi_list_info)
+			return -EIO;
+
+		/* A rule already exists with the new VSI being added */
+		if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
+			return 0;
+
+		/* Update the previously created VSI list set with
+		 * the new VSI ID passed in
+		 */
+		vsi_list_id = cur_fltr->fwd_id.vsi_list_id;
+		opcode = ice_aqc_opc_update_sw_rules;
+
+		status = ice_update_vsi_list_rule(hw, &vsi_handle, 1,
+						  vsi_list_id, false, opcode,
+						  new_fltr->lkup_type);
+		/* update VSI list mapping info with new VSI ID */
+		if (!status)
+			set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map);
+	}
+	if (!status)
+		m_entry->vsi_count++;
+	return status;
+}
+
+/**
+ * ice_find_rule_entry - Search a rule entry
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which the specified rule needs to be searched
+ * @f_info: rule information
+ *
+ * Helper function to search for a given rule entry
+ * Returns pointer to entry storing the rule if found
+ */
+static struct ice_fltr_mgmt_list_entry *
+ice_find_rule_entry(struct ice_hw *hw, u8 recp_id, struct ice_fltr_info *f_info)
+{
+	struct ice_fltr_mgmt_list_entry *list_itr, *ret = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct list_head *list_head;
+
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (!memcmp(&f_info->l_data, &list_itr->fltr_info.l_data,
+			    sizeof(f_info->l_data)) &&
+		    f_info->flag == list_itr->fltr_info.flag) {
+			ret = list_itr;
+			break;
+		}
+	}
+	return ret;
+}
+
+/**
+ * ice_find_vsi_list_entry - Search VSI list map with VSI count 1
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type for which VSI lists needs to be searched
+ * @vsi_handle: VSI handle to be found in VSI list
+ * @vsi_list_id: VSI list ID found containing vsi_handle
+ *
+ * Helper function to search a VSI list with single entry containing given VSI
+ * handle element. This can be extended further to search VSI list with more
+ * than 1 vsi_count. Returns pointer to VSI list entry if found.
+ */
+struct ice_vsi_list_map_info *
+ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
+			u16 *vsi_list_id)
+{
+	struct ice_vsi_list_map_info *map_info = NULL;
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_itr;
+	struct list_head *list_head;
+
+	list_head = &sw->recp_list[recp_id].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (list_itr->vsi_list_info) {
+			map_info = list_itr->vsi_list_info;
+			if (test_bit(vsi_handle, map_info->vsi_map)) {
+				*vsi_list_id = map_info->vsi_list_id;
+				return map_info;
+			}
+		}
+	}
+	return NULL;
+}
+
+/**
+ * ice_add_rule_internal - add rule for a given lookup type
+ * @hw: pointer to the hardware structure
+ * @recp_id: lookup type (recipe ID) for which rule has to be added
+ * @f_entry: structure containing MAC forwarding information
+ *
+ * Adds or updates the rule lists for a given recipe
+ */
+static int
+ice_add_rule_internal(struct ice_hw *hw, u8 recp_id,
+		      struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	struct ice_fltr_mgmt_list_entry *m_entry;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	int status = 0;
+
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return -EINVAL;
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+	rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+
+	mutex_lock(rule_lock);
+	new_fltr = &f_entry->fltr_info;
+	if (new_fltr->flag & ICE_FLTR_RX)
+		new_fltr->src = hw->port_info->lport;
+	else if (new_fltr->flag & ICE_FLTR_TX)
+		new_fltr->src = f_entry->fltr_info.fwd_id.hw_vsi_id;
+
+	m_entry = ice_find_rule_entry(hw, recp_id, new_fltr);
+	if (!m_entry) {
+		mutex_unlock(rule_lock);
+		return ice_create_pkt_fwd_rule(hw, f_entry);
+	}
+
+	cur_fltr = &m_entry->fltr_info;
+	status = ice_add_update_vsi_list(hw, m_entry, cur_fltr, new_fltr);
+	mutex_unlock(rule_lock);
+
+	return status;
+}
+
+/**
+ * ice_remove_vsi_list_rule
+ * @hw: pointer to the hardware structure
+ * @vsi_list_id: VSI list ID generated as part of allocate resource
+ * @lkup_type: switch rule filter lookup type
+ *
+ * The VSI list should be emptied before this function is called to remove the
+ * VSI list.
+ */
+static int
+ice_remove_vsi_list_rule(struct ice_hw *hw, u16 vsi_list_id,
+			 enum ice_sw_lkup_type lkup_type)
+{
+	struct ice_sw_rule_vsi_list *s_rule;
+	u16 s_rule_size;
+	int status;
+
+	s_rule_size = (u16)ICE_SW_RULE_VSI_LIST_SIZE(s_rule, 0);
+	s_rule = devm_kzalloc(ice_hw_to_dev(hw), s_rule_size, GFP_KERNEL);
+	if (!s_rule)
+		return -ENOMEM;
+
+	s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_VSI_LIST_CLEAR);
+	s_rule->index = cpu_to_le16(vsi_list_id);
+
+	/* Free the vsi_list resource that we allocated. It is assumed that the
+	 * list is empty at this point.
+	 */
+	status = ice_aq_alloc_free_vsi_list(hw, &vsi_list_id, lkup_type,
+					    ice_aqc_opc_free_res);
+
+	devm_kfree(ice_hw_to_dev(hw), s_rule);
+	return status;
+}
+
+/**
+ * ice_rem_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle of the VSI to remove
+ * @fm_list: filter management entry for which the VSI list management needs to
+ *           be done
+ */
+static int
+ice_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle,
+			struct ice_fltr_mgmt_list_entry *fm_list)
+{
+	enum ice_sw_lkup_type lkup_type;
+	u16 vsi_list_id;
+	int status = 0;
+
+	if (fm_list->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST ||
+	    fm_list->vsi_count == 0)
+		return -EINVAL;
+
+	/* A rule with the VSI being removed does not exist */
+	if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map))
+		return -ENOENT;
+
+	lkup_type = fm_list->fltr_info.lkup_type;
+	vsi_list_id = fm_list->fltr_info.fwd_id.vsi_list_id;
+	status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true,
+					  ice_aqc_opc_update_sw_rules,
+					  lkup_type);
+	if (status)
+		return status;
+
+	fm_list->vsi_count--;
+	clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map);
+
+	if (fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) {
+		struct ice_fltr_info tmp_fltr_info = fm_list->fltr_info;
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list->vsi_list_info;
+		u16 rem_vsi_handle;
+
+		rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map,
+						ICE_MAX_VSI);
+		if (!ice_is_vsi_valid(hw, rem_vsi_handle))
+			return -EIO;
+
+		/* Make sure VSI list is empty before removing it below */
+		status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1,
+						  vsi_list_id, true,
+						  ice_aqc_opc_update_sw_rules,
+						  lkup_type);
+		if (status)
+			return status;
+
+		tmp_fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		tmp_fltr_info.fwd_id.hw_vsi_id =
+			ice_get_hw_vsi_num(hw, rem_vsi_handle);
+		tmp_fltr_info.vsi_handle = rem_vsi_handle;
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr_info);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW, "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n",
+				  tmp_fltr_info.fwd_id.hw_vsi_id, status);
+			return status;
+		}
+
+		fm_list->fltr_info = tmp_fltr_info;
+	}
+
+	if ((fm_list->vsi_count == 1 && lkup_type != ICE_SW_LKUP_VLAN) ||
+	    (fm_list->vsi_count == 0 && lkup_type == ICE_SW_LKUP_VLAN)) {
+		struct ice_vsi_list_map_info *vsi_list_info =
+			fm_list->vsi_list_info;
+
+		/* Remove the VSI list since it is no longer used */
+		status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW, "Failed to remove VSI list %d, error %d\n",
+				  vsi_list_id, status);
+			return status;
+		}
+
+		list_del(&vsi_list_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+		fm_list->vsi_list_info = NULL;
+	}
+
+	return status;
+}
+
+/**
+ * ice_remove_rule_internal - Remove a filter rule of a given type
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @f_entry: rule entry containing filter information
+ */
+static int
+ice_remove_rule_internal(struct ice_hw *hw, u8 recp_id,
+			 struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *list_elem;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	bool remove_rule = false;
+	u16 vsi_handle;
+	int status = 0;
+
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return -EINVAL;
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+
+	rule_lock = &sw->recp_list[recp_id].filt_rule_lock;
+	mutex_lock(rule_lock);
+	list_elem = ice_find_rule_entry(hw, recp_id, &f_entry->fltr_info);
+	if (!list_elem) {
+		status = -ENOENT;
+		goto exit;
+	}
+
+	if (list_elem->fltr_info.fltr_act != ICE_FWD_TO_VSI_LIST) {
+		remove_rule = true;
+	} else if (!list_elem->vsi_list_info) {
+		status = -ENOENT;
+		goto exit;
+	} else if (list_elem->vsi_list_info->ref_cnt > 1) {
+		/* a ref_cnt > 1 indicates that the vsi_list is being
+		 * shared by multiple rules. Decrement the ref_cnt and
+		 * remove this rule, but do not modify the list, as it
+		 * is in-use by other rules.
+		 */
+		list_elem->vsi_list_info->ref_cnt--;
+		remove_rule = true;
+	} else {
+		/* a ref_cnt of 1 indicates the vsi_list is only used
+		 * by one rule. However, the original removal request is only
+		 * for a single VSI. Update the vsi_list first, and only
+		 * remove the rule if there are no further VSIs in this list.
+		 */
+		vsi_handle = f_entry->fltr_info.vsi_handle;
+		status = ice_rem_update_vsi_list(hw, vsi_handle, list_elem);
+		if (status)
+			goto exit;
+		/* if VSI count goes to zero after updating the VSI list */
+		if (list_elem->vsi_count == 0)
+			remove_rule = true;
+	}
+
+	if (remove_rule) {
+		/* Remove the lookup rule */
+		struct ice_sw_rule_lkup_rx_tx *s_rule;
+
+		s_rule = devm_kzalloc(ice_hw_to_dev(hw),
+				      ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule),
+				      GFP_KERNEL);
+		if (!s_rule) {
+			status = -ENOMEM;
+			goto exit;
+		}
+
+		ice_fill_sw_rule(hw, &list_elem->fltr_info, s_rule,
+				 ice_aqc_opc_remove_sw_rules);
+
+		status = ice_aq_sw_rules(hw, s_rule,
+					 ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule),
+					 1, ice_aqc_opc_remove_sw_rules, NULL);
+
+		/* Remove a book keeping from the list */
+		devm_kfree(ice_hw_to_dev(hw), s_rule);
+
+		if (status)
+			goto exit;
+
+		list_del(&list_elem->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), list_elem);
+	}
+exit:
+	mutex_unlock(rule_lock);
+	return status;
+}
+
+/**
+ * ice_vlan_fltr_exist - does this VLAN filter exist for given VSI
+ * @hw: pointer to the hardware structure
+ * @vlan_id: VLAN ID
+ * @vsi_handle: check MAC filter for this VSI
+ */
+bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle)
+{
+	struct ice_fltr_mgmt_list_entry *entry;
+	struct list_head *rule_head;
+	struct ice_switch_info *sw;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	u16 hw_vsi_id;
+
+	if (vlan_id > ICE_MAX_VLAN_ID)
+		return false;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return false;
+
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+	sw = hw->switch_info;
+	rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
+	if (!rule_head)
+		return false;
+
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+	mutex_lock(rule_lock);
+	list_for_each_entry(entry, rule_head, list_entry) {
+		struct ice_fltr_info *f_info = &entry->fltr_info;
+		u16 entry_vlan_id = f_info->l_data.vlan.vlan_id;
+		struct ice_vsi_list_map_info *map_info;
+
+		if (entry_vlan_id > ICE_MAX_VLAN_ID)
+			continue;
+
+		if (f_info->flag != ICE_FLTR_TX ||
+		    f_info->src_id != ICE_SRC_ID_VSI ||
+		    f_info->lkup_type != ICE_SW_LKUP_VLAN)
+			continue;
+
+		/* Only allowed filter action are FWD_TO_VSI/_VSI_LIST */
+		if (f_info->fltr_act != ICE_FWD_TO_VSI &&
+		    f_info->fltr_act != ICE_FWD_TO_VSI_LIST)
+			continue;
+
+		if (f_info->fltr_act == ICE_FWD_TO_VSI) {
+			if (hw_vsi_id != f_info->fwd_id.hw_vsi_id)
+				continue;
+		} else if (f_info->fltr_act == ICE_FWD_TO_VSI_LIST) {
+			/* If filter_action is FWD_TO_VSI_LIST, make sure
+			 * that VSI being checked is part of VSI list
+			 */
+			if (entry->vsi_count == 1 &&
+			    entry->vsi_list_info) {
+				map_info = entry->vsi_list_info;
+				if (!test_bit(vsi_handle, map_info->vsi_map))
+					continue;
+			}
+		}
+
+		if (vlan_id == entry_vlan_id) {
+			mutex_unlock(rule_lock);
+			return true;
+		}
+	}
+	mutex_unlock(rule_lock);
+
+	return false;
+}
+
+/**
+ * ice_add_mac - Add a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ */
+int ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_fltr_list_entry *m_list_itr;
+	int status = 0;
+
+	if (!m_list || !hw)
+		return -EINVAL;
+
+	list_for_each_entry(m_list_itr, m_list, list_entry) {
+		u8 *add = &m_list_itr->fltr_info.l_data.mac.mac_addr[0];
+		u16 vsi_handle;
+		u16 hw_vsi_id;
+
+		m_list_itr->fltr_info.flag = ICE_FLTR_TX;
+		vsi_handle = m_list_itr->fltr_info.vsi_handle;
+		if (!ice_is_vsi_valid(hw, vsi_handle))
+			return -EINVAL;
+		hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+		m_list_itr->fltr_info.fwd_id.hw_vsi_id = hw_vsi_id;
+		/* update the src in case it is VSI num */
+		if (m_list_itr->fltr_info.src_id != ICE_SRC_ID_VSI)
+			return -EINVAL;
+		m_list_itr->fltr_info.src = hw_vsi_id;
+		if (m_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_MAC ||
+		    is_zero_ether_addr(add))
+			return -EINVAL;
+
+		m_list_itr->status = ice_add_rule_internal(hw, ICE_SW_LKUP_MAC,
+							   m_list_itr);
+		if (m_list_itr->status)
+			return m_list_itr->status;
+	}
+
+	return status;
+}
+
+/**
+ * ice_add_vlan_internal - Add one VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @f_entry: filter entry containing one VLAN information
+ */
+static int
+ice_add_vlan_internal(struct ice_hw *hw, struct ice_fltr_list_entry *f_entry)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_mgmt_list_entry *v_list_itr;
+	struct ice_fltr_info *new_fltr, *cur_fltr;
+	enum ice_sw_lkup_type lkup_type;
+	u16 vsi_list_id = 0, vsi_handle;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	int status = 0;
+
+	if (!ice_is_vsi_valid(hw, f_entry->fltr_info.vsi_handle))
+		return -EINVAL;
+
+	f_entry->fltr_info.fwd_id.hw_vsi_id =
+		ice_get_hw_vsi_num(hw, f_entry->fltr_info.vsi_handle);
+	new_fltr = &f_entry->fltr_info;
+
+	/* VLAN ID should only be 12 bits */
+	if (new_fltr->l_data.vlan.vlan_id > ICE_MAX_VLAN_ID)
+		return -EINVAL;
+
+	if (new_fltr->src_id != ICE_SRC_ID_VSI)
+		return -EINVAL;
+
+	new_fltr->src = new_fltr->fwd_id.hw_vsi_id;
+	lkup_type = new_fltr->lkup_type;
+	vsi_handle = new_fltr->vsi_handle;
+	rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+	mutex_lock(rule_lock);
+	v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN, new_fltr);
+	if (!v_list_itr) {
+		struct ice_vsi_list_map_info *map_info = NULL;
+
+		if (new_fltr->fltr_act == ICE_FWD_TO_VSI) {
+			/* All VLAN pruning rules use a VSI list. Check if
+			 * there is already a VSI list containing VSI that we
+			 * want to add. If found, use the same vsi_list_id for
+			 * this new VLAN rule or else create a new list.
+			 */
+			map_info = ice_find_vsi_list_entry(hw, ICE_SW_LKUP_VLAN,
+							   vsi_handle,
+							   &vsi_list_id);
+			if (!map_info) {
+				status = ice_create_vsi_list_rule(hw,
+								  &vsi_handle,
+								  1,
+								  &vsi_list_id,
+								  lkup_type);
+				if (status)
+					goto exit;
+			}
+			/* Convert the action to forwarding to a VSI list. */
+			new_fltr->fltr_act = ICE_FWD_TO_VSI_LIST;
+			new_fltr->fwd_id.vsi_list_id = vsi_list_id;
+		}
+
+		status = ice_create_pkt_fwd_rule(hw, f_entry);
+		if (!status) {
+			v_list_itr = ice_find_rule_entry(hw, ICE_SW_LKUP_VLAN,
+							 new_fltr);
+			if (!v_list_itr) {
+				status = -ENOENT;
+				goto exit;
+			}
+			/* reuse VSI list for new rule and increment ref_cnt */
+			if (map_info) {
+				v_list_itr->vsi_list_info = map_info;
+				map_info->ref_cnt++;
+			} else {
+				v_list_itr->vsi_list_info =
+					ice_create_vsi_list_map(hw, &vsi_handle,
+								1, vsi_list_id);
+			}
+		}
+	} else if (v_list_itr->vsi_list_info->ref_cnt == 1) {
+		/* Update existing VSI list to add new VSI ID only if it used
+		 * by one VLAN rule.
+		 */
+		cur_fltr = &v_list_itr->fltr_info;
+		status = ice_add_update_vsi_list(hw, v_list_itr, cur_fltr,
+						 new_fltr);
+	} else {
+		/* If VLAN rule exists and VSI list being used by this rule is
+		 * referenced by more than 1 VLAN rule. Then create a new VSI
+		 * list appending previous VSI with new VSI and update existing
+		 * VLAN rule to point to new VSI list ID
+		 */
+		struct ice_fltr_info tmp_fltr;
+		u16 vsi_handle_arr[2];
+		u16 cur_handle;
+
+		/* Current implementation only supports reusing VSI list with
+		 * one VSI count. We should never hit below condition
+		 */
+		if (v_list_itr->vsi_count > 1 &&
+		    v_list_itr->vsi_list_info->ref_cnt > 1) {
+			ice_debug(hw, ICE_DBG_SW, "Invalid configuration: Optimization to reuse VSI list with more than one VSI is not being done yet\n");
+			status = -EIO;
+			goto exit;
+		}
+
+		cur_handle =
+			find_first_bit(v_list_itr->vsi_list_info->vsi_map,
+				       ICE_MAX_VSI);
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_handle == vsi_handle) {
+			status = -EEXIST;
+			goto exit;
+		}
+
+		vsi_handle_arr[0] = cur_handle;
+		vsi_handle_arr[1] = vsi_handle;
+		status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+						  &vsi_list_id, lkup_type);
+		if (status)
+			goto exit;
+
+		tmp_fltr = v_list_itr->fltr_info;
+		tmp_fltr.fltr_rule_id = v_list_itr->fltr_info.fltr_rule_id;
+		tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+		/* Update the previous switch rule to a new VSI list which
+		 * includes current VSI that is requested
+		 */
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+		if (status)
+			goto exit;
+
+		/* before overriding VSI list map info. decrement ref_cnt of
+		 * previous VSI list
+		 */
+		v_list_itr->vsi_list_info->ref_cnt--;
+
+		/* now update to newly created list */
+		v_list_itr->fltr_info.fwd_id.vsi_list_id = vsi_list_id;
+		v_list_itr->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+						vsi_list_id);
+		v_list_itr->vsi_count++;
+	}
+
+exit:
+	mutex_unlock(rule_lock);
+	return status;
+}
+
+/**
+ * ice_add_vlan - Add VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+int ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr;
+
+	if (!v_list || !hw)
+		return -EINVAL;
+
+	list_for_each_entry(v_list_itr, v_list, list_entry) {
+		if (v_list_itr->fltr_info.lkup_type != ICE_SW_LKUP_VLAN)
+			return -EINVAL;
+		v_list_itr->fltr_info.flag = ICE_FLTR_TX;
+		v_list_itr->status = ice_add_vlan_internal(hw, v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_add_eth_mac - Add ethertype and MAC based filter rule
+ * @hw: pointer to the hardware structure
+ * @em_list: list of ether type MAC filter, MAC is optional
+ *
+ * This function requires the caller to populate the entries in
+ * the filter list with the necessary fields (including flags to
+ * indicate Tx or Rx rules).
+ */
+int ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
+{
+	struct ice_fltr_list_entry *em_list_itr;
+
+	if (!em_list || !hw)
+		return -EINVAL;
+
+	list_for_each_entry(em_list_itr, em_list, list_entry) {
+		enum ice_sw_lkup_type l_type =
+			em_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+		    l_type != ICE_SW_LKUP_ETHERTYPE)
+			return -EINVAL;
+
+		em_list_itr->status = ice_add_rule_internal(hw, l_type,
+							    em_list_itr);
+		if (em_list_itr->status)
+			return em_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_remove_eth_mac - Remove an ethertype (or MAC) based filter rule
+ * @hw: pointer to the hardware structure
+ * @em_list: list of ethertype or ethertype MAC entries
+ */
+int ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list)
+{
+	struct ice_fltr_list_entry *em_list_itr, *tmp;
+
+	if (!em_list || !hw)
+		return -EINVAL;
+
+	list_for_each_entry_safe(em_list_itr, tmp, em_list, list_entry) {
+		enum ice_sw_lkup_type l_type =
+			em_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_ETHERTYPE_MAC &&
+		    l_type != ICE_SW_LKUP_ETHERTYPE)
+			return -EINVAL;
+
+		em_list_itr->status = ice_remove_rule_internal(hw, l_type,
+							       em_list_itr);
+		if (em_list_itr->status)
+			return em_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_rem_sw_rule_info
+ * @hw: pointer to the hardware structure
+ * @rule_head: pointer to the switch list structure that we want to delete
+ */
+static void
+ice_rem_sw_rule_info(struct ice_hw *hw, struct list_head *rule_head)
+{
+	if (!list_empty(rule_head)) {
+		struct ice_fltr_mgmt_list_entry *entry;
+		struct ice_fltr_mgmt_list_entry *tmp;
+
+		list_for_each_entry_safe(entry, tmp, rule_head, list_entry) {
+			list_del(&entry->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), entry);
+		}
+	}
+}
+
+/**
+ * ice_rem_adv_rule_info
+ * @hw: pointer to the hardware structure
+ * @rule_head: pointer to the switch list structure that we want to delete
+ */
+static void
+ice_rem_adv_rule_info(struct ice_hw *hw, struct list_head *rule_head)
+{
+	struct ice_adv_fltr_mgmt_list_entry *tmp_entry;
+	struct ice_adv_fltr_mgmt_list_entry *lst_itr;
+
+	if (list_empty(rule_head))
+		return;
+
+	list_for_each_entry_safe(lst_itr, tmp_entry, rule_head, list_entry) {
+		list_del(&lst_itr->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups);
+		devm_kfree(ice_hw_to_dev(hw), lst_itr);
+	}
+}
+
+/**
+ * ice_cfg_dflt_vsi - change state of VSI to set/clear default
+ * @pi: pointer to the port_info structure
+ * @vsi_handle: VSI handle to set as default
+ * @set: true to add the above mentioned switch rule, false to remove it
+ * @direction: ICE_FLTR_RX or ICE_FLTR_TX
+ *
+ * add filter rule to set/unset given VSI as default VSI for the switch
+ * (represented by swid)
+ */
+int
+ice_cfg_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool set,
+		 u8 direction)
+{
+	struct ice_fltr_list_entry f_list_entry;
+	struct ice_fltr_info f_info;
+	struct ice_hw *hw = pi->hw;
+	u16 hw_vsi_id;
+	int status;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	memset(&f_info, 0, sizeof(f_info));
+
+	f_info.lkup_type = ICE_SW_LKUP_DFLT;
+	f_info.flag = direction;
+	f_info.fltr_act = ICE_FWD_TO_VSI;
+	f_info.fwd_id.hw_vsi_id = hw_vsi_id;
+	f_info.vsi_handle = vsi_handle;
+
+	if (f_info.flag & ICE_FLTR_RX) {
+		f_info.src = hw->port_info->lport;
+		f_info.src_id = ICE_SRC_ID_LPORT;
+	} else if (f_info.flag & ICE_FLTR_TX) {
+		f_info.src_id = ICE_SRC_ID_VSI;
+		f_info.src = hw_vsi_id;
+	}
+	f_list_entry.fltr_info = f_info;
+
+	if (set)
+		status = ice_add_rule_internal(hw, ICE_SW_LKUP_DFLT,
+					       &f_list_entry);
+	else
+		status = ice_remove_rule_internal(hw, ICE_SW_LKUP_DFLT,
+						  &f_list_entry);
+
+	return status;
+}
+
+/**
+ * ice_vsi_uses_fltr - Determine if given VSI uses specified filter
+ * @fm_entry: filter entry to inspect
+ * @vsi_handle: VSI handle to compare with filter info
+ */
+static bool
+ice_vsi_uses_fltr(struct ice_fltr_mgmt_list_entry *fm_entry, u16 vsi_handle)
+{
+	return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI &&
+		 fm_entry->fltr_info.vsi_handle == vsi_handle) ||
+		(fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST &&
+		 fm_entry->vsi_list_info &&
+		 (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map))));
+}
+
+/**
+ * ice_check_if_dflt_vsi - check if VSI is default VSI
+ * @pi: pointer to the port_info structure
+ * @vsi_handle: vsi handle to check for in filter list
+ * @rule_exists: indicates if there are any VSI's in the rule list
+ *
+ * checks if the VSI is in a default VSI list, and also indicates
+ * if the default VSI list is empty
+ */
+bool
+ice_check_if_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle,
+		      bool *rule_exists)
+{
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	struct ice_sw_recipe *recp_list;
+	struct list_head *rule_head;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	bool ret = false;
+
+	recp_list = &pi->hw->switch_info->recp_list[ICE_SW_LKUP_DFLT];
+	rule_lock = &recp_list->filt_rule_lock;
+	rule_head = &recp_list->filt_rules;
+
+	mutex_lock(rule_lock);
+
+	if (rule_exists && !list_empty(rule_head))
+		*rule_exists = true;
+
+	list_for_each_entry(fm_entry, rule_head, list_entry) {
+		if (ice_vsi_uses_fltr(fm_entry, vsi_handle)) {
+			ret = true;
+			break;
+		}
+	}
+
+	mutex_unlock(rule_lock);
+
+	return ret;
+}
+
+/**
+ * ice_remove_mac - remove a MAC address based filter rule
+ * @hw: pointer to the hardware structure
+ * @m_list: list of MAC addresses and forwarding information
+ *
+ * This function removes either a MAC filter rule or a specific VSI from a
+ * VSI list for a multicast MAC address.
+ *
+ * Returns -ENOENT if a given entry was not added by ice_add_mac. Caller should
+ * be aware that this call will only work if all the entries passed into m_list
+ * were added previously. It will not attempt to do a partial remove of entries
+ * that were found.
+ */
+int ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+{
+	struct ice_fltr_list_entry *list_itr, *tmp;
+
+	if (!m_list)
+		return -EINVAL;
+
+	list_for_each_entry_safe(list_itr, tmp, m_list, list_entry) {
+		enum ice_sw_lkup_type l_type = list_itr->fltr_info.lkup_type;
+		u16 vsi_handle;
+
+		if (l_type != ICE_SW_LKUP_MAC)
+			return -EINVAL;
+
+		vsi_handle = list_itr->fltr_info.vsi_handle;
+		if (!ice_is_vsi_valid(hw, vsi_handle))
+			return -EINVAL;
+
+		list_itr->fltr_info.fwd_id.hw_vsi_id =
+					ice_get_hw_vsi_num(hw, vsi_handle);
+
+		list_itr->status = ice_remove_rule_internal(hw,
+							    ICE_SW_LKUP_MAC,
+							    list_itr);
+		if (list_itr->status)
+			return list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_remove_vlan - Remove VLAN based filter rule
+ * @hw: pointer to the hardware structure
+ * @v_list: list of VLAN entries and forwarding information
+ */
+int ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr, *tmp;
+
+	if (!v_list || !hw)
+		return -EINVAL;
+
+	list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+		enum ice_sw_lkup_type l_type = v_list_itr->fltr_info.lkup_type;
+
+		if (l_type != ICE_SW_LKUP_VLAN)
+			return -EINVAL;
+		v_list_itr->status = ice_remove_rule_internal(hw,
+							      ICE_SW_LKUP_VLAN,
+							      v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_add_entry_to_vsi_fltr_list - Add copy of fltr_list_entry to remove list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @vsi_list_head: pointer to the list to add entry to
+ * @fi: pointer to fltr_info of filter entry to copy & add
+ *
+ * Helper function, used when creating a list of filters to remove from
+ * a specific VSI. The entry added to vsi_list_head is a COPY of the
+ * original filter entry, with the exception of fltr_info.fltr_act and
+ * fltr_info.fwd_id fields. These are set such that later logic can
+ * extract which VSI to remove the fltr from, and pass on that information.
+ */
+static int
+ice_add_entry_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
+			       struct list_head *vsi_list_head,
+			       struct ice_fltr_info *fi)
+{
+	struct ice_fltr_list_entry *tmp;
+
+	/* this memory is freed up in the caller function
+	 * once filters for this VSI are removed
+	 */
+	tmp = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	tmp->fltr_info = *fi;
+
+	/* Overwrite these fields to indicate which VSI to remove filter from,
+	 * so find and remove logic can extract the information from the
+	 * list entries. Note that original entries will still have proper
+	 * values.
+	 */
+	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+	tmp->fltr_info.vsi_handle = vsi_handle;
+	tmp->fltr_info.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	list_add(&tmp->list_entry, vsi_list_head);
+
+	return 0;
+}
+
+/**
+ * ice_add_to_vsi_fltr_list - Add VSI filters to the list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @lkup_list_head: pointer to the list that has certain lookup type filters
+ * @vsi_list_head: pointer to the list pertaining to VSI with vsi_handle
+ *
+ * Locates all filters in lkup_list_head that are used by the given VSI,
+ * and adds COPIES of those entries to vsi_list_head (intended to be used
+ * to remove the listed filters).
+ * Note that this means all entries in vsi_list_head must be explicitly
+ * deallocated by the caller when done with list.
+ */
+static int
+ice_add_to_vsi_fltr_list(struct ice_hw *hw, u16 vsi_handle,
+			 struct list_head *lkup_list_head,
+			 struct list_head *vsi_list_head)
+{
+	struct ice_fltr_mgmt_list_entry *fm_entry;
+	int status = 0;
+
+	/* check to make sure VSI ID is valid and within boundary */
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	list_for_each_entry(fm_entry, lkup_list_head, list_entry) {
+		if (!ice_vsi_uses_fltr(fm_entry, vsi_handle))
+			continue;
+
+		status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+							vsi_list_head,
+							&fm_entry->fltr_info);
+		if (status)
+			return status;
+	}
+	return status;
+}
+
+/**
+ * ice_determine_promisc_mask
+ * @fi: filter info to parse
+ *
+ * Helper function to determine which ICE_PROMISC_ mask corresponds
+ * to given filter into.
+ */
+static u8 ice_determine_promisc_mask(struct ice_fltr_info *fi)
+{
+	u16 vid = fi->l_data.mac_vlan.vlan_id;
+	u8 *macaddr = fi->l_data.mac.mac_addr;
+	bool is_tx_fltr = false;
+	u8 promisc_mask = 0;
+
+	if (fi->flag == ICE_FLTR_TX)
+		is_tx_fltr = true;
+
+	if (is_broadcast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_BCAST_TX : ICE_PROMISC_BCAST_RX;
+	else if (is_multicast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_MCAST_TX : ICE_PROMISC_MCAST_RX;
+	else if (is_unicast_ether_addr(macaddr))
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_UCAST_TX : ICE_PROMISC_UCAST_RX;
+	if (vid)
+		promisc_mask |= is_tx_fltr ?
+			ICE_PROMISC_VLAN_TX : ICE_PROMISC_VLAN_RX;
+
+	return promisc_mask;
+}
+
+/**
+ * ice_remove_promisc - Remove promisc based filter rules
+ * @hw: pointer to the hardware structure
+ * @recp_id: recipe ID for which the rule needs to removed
+ * @v_list: list of promisc entries
+ */
+static int
+ice_remove_promisc(struct ice_hw *hw, u8 recp_id, struct list_head *v_list)
+{
+	struct ice_fltr_list_entry *v_list_itr, *tmp;
+
+	list_for_each_entry_safe(v_list_itr, tmp, v_list, list_entry) {
+		v_list_itr->status =
+			ice_remove_rule_internal(hw, recp_id, v_list_itr);
+		if (v_list_itr->status)
+			return v_list_itr->status;
+	}
+	return 0;
+}
+
+/**
+ * ice_clear_vsi_promisc - clear specified promiscuous mode(s) for given VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to clear mode
+ * @promisc_mask: mask of promiscuous config bits to clear
+ * @vid: VLAN ID to clear VLAN promiscuous
+ */
+int
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		      u16 vid)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *fm_entry, *tmp;
+	struct list_head remove_list_head;
+	struct ice_fltr_mgmt_list_entry *itr;
+	struct list_head *rule_head;
+	struct mutex *rule_lock;	/* Lock to protect filter rule list */
+	int status = 0;
+	u8 recipe_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+
+	if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX))
+		recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+	else
+		recipe_id = ICE_SW_LKUP_PROMISC;
+
+	rule_head = &sw->recp_list[recipe_id].filt_rules;
+	rule_lock = &sw->recp_list[recipe_id].filt_rule_lock;
+
+	INIT_LIST_HEAD(&remove_list_head);
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(itr, rule_head, list_entry) {
+		struct ice_fltr_info *fltr_info;
+		u8 fltr_promisc_mask = 0;
+
+		if (!ice_vsi_uses_fltr(itr, vsi_handle))
+			continue;
+		fltr_info = &itr->fltr_info;
+
+		if (recipe_id == ICE_SW_LKUP_PROMISC_VLAN &&
+		    vid != fltr_info->l_data.mac_vlan.vlan_id)
+			continue;
+
+		fltr_promisc_mask |= ice_determine_promisc_mask(fltr_info);
+
+		/* Skip if filter is not completely specified by given mask */
+		if (fltr_promisc_mask & ~promisc_mask)
+			continue;
+
+		status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
+							&remove_list_head,
+							fltr_info);
+		if (status) {
+			mutex_unlock(rule_lock);
+			goto free_fltr_list;
+		}
+	}
+	mutex_unlock(rule_lock);
+
+	status = ice_remove_promisc(hw, recipe_id, &remove_list_head);
+
+free_fltr_list:
+	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+		list_del(&fm_entry->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+	}
+
+	return status;
+}
+
+/**
+ * ice_set_vsi_promisc - set given VSI to given promiscuous mode(s)
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @vid: VLAN ID to set VLAN promiscuous
+ */
+int
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, u16 vid)
+{
+	enum { UCAST_FLTR = 1, MCAST_FLTR, BCAST_FLTR };
+	struct ice_fltr_list_entry f_list_entry;
+	struct ice_fltr_info new_fltr;
+	bool is_tx_fltr;
+	int status = 0;
+	u16 hw_vsi_id;
+	int pkt_type;
+	u8 recipe_id;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return -EINVAL;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	memset(&new_fltr, 0, sizeof(new_fltr));
+
+	if (promisc_mask & (ICE_PROMISC_VLAN_RX | ICE_PROMISC_VLAN_TX)) {
+		new_fltr.lkup_type = ICE_SW_LKUP_PROMISC_VLAN;
+		new_fltr.l_data.mac_vlan.vlan_id = vid;
+		recipe_id = ICE_SW_LKUP_PROMISC_VLAN;
+	} else {
+		new_fltr.lkup_type = ICE_SW_LKUP_PROMISC;
+		recipe_id = ICE_SW_LKUP_PROMISC;
+	}
+
+	/* Separate filters must be set for each direction/packet type
+	 * combination, so we will loop over the mask value, store the
+	 * individual type, and clear it out in the input mask as it
+	 * is found.
+	 */
+	while (promisc_mask) {
+		u8 *mac_addr;
+
+		pkt_type = 0;
+		is_tx_fltr = false;
+
+		if (promisc_mask & ICE_PROMISC_UCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_UCAST_RX;
+			pkt_type = UCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_UCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_UCAST_TX;
+			pkt_type = UCAST_FLTR;
+			is_tx_fltr = true;
+		} else if (promisc_mask & ICE_PROMISC_MCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_MCAST_RX;
+			pkt_type = MCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_MCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_MCAST_TX;
+			pkt_type = MCAST_FLTR;
+			is_tx_fltr = true;
+		} else if (promisc_mask & ICE_PROMISC_BCAST_RX) {
+			promisc_mask &= ~ICE_PROMISC_BCAST_RX;
+			pkt_type = BCAST_FLTR;
+		} else if (promisc_mask & ICE_PROMISC_BCAST_TX) {
+			promisc_mask &= ~ICE_PROMISC_BCAST_TX;
+			pkt_type = BCAST_FLTR;
+			is_tx_fltr = true;
+		}
+
+		/* Check for VLAN promiscuous flag */
+		if (promisc_mask & ICE_PROMISC_VLAN_RX) {
+			promisc_mask &= ~ICE_PROMISC_VLAN_RX;
+		} else if (promisc_mask & ICE_PROMISC_VLAN_TX) {
+			promisc_mask &= ~ICE_PROMISC_VLAN_TX;
+			is_tx_fltr = true;
+		}
+
+		/* Set filter DA based on packet type */
+		mac_addr = new_fltr.l_data.mac.mac_addr;
+		if (pkt_type == BCAST_FLTR) {
+			eth_broadcast_addr(mac_addr);
+		} else if (pkt_type == MCAST_FLTR ||
+			   pkt_type == UCAST_FLTR) {
+			/* Use the dummy ether header DA */
+			ether_addr_copy(mac_addr, dummy_eth_header);
+			if (pkt_type == MCAST_FLTR)
+				mac_addr[0] |= 0x1;	/* Set multicast bit */
+		}
+
+		/* Need to reset this to zero for all iterations */
+		new_fltr.flag = 0;
+		if (is_tx_fltr) {
+			new_fltr.flag |= ICE_FLTR_TX;
+			new_fltr.src = hw_vsi_id;
+		} else {
+			new_fltr.flag |= ICE_FLTR_RX;
+			new_fltr.src = hw->port_info->lport;
+		}
+
+		new_fltr.fltr_act = ICE_FWD_TO_VSI;
+		new_fltr.vsi_handle = vsi_handle;
+		new_fltr.fwd_id.hw_vsi_id = hw_vsi_id;
+		f_list_entry.fltr_info = new_fltr;
+
+		status = ice_add_rule_internal(hw, recipe_id, &f_list_entry);
+		if (status)
+			goto set_promisc_exit;
+	}
+
+set_promisc_exit:
+	return status;
+}
+
+/**
+ * ice_set_vlan_vsi_promisc
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to configure
+ * @promisc_mask: mask of promiscuous config bits
+ * @rm_vlan_promisc: Clear VLANs VSI promisc mode
+ *
+ * Configure VSI with all associated VLANs to given promiscuous mode(s)
+ */
+int
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 bool rm_vlan_promisc)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *list_itr, *tmp;
+	struct list_head vsi_list_head;
+	struct list_head *vlan_head;
+	struct mutex *vlan_lock; /* Lock to protect filter rule list */
+	u16 vlan_id;
+	int status;
+
+	INIT_LIST_HEAD(&vsi_list_head);
+	vlan_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock;
+	vlan_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules;
+	mutex_lock(vlan_lock);
+	status = ice_add_to_vsi_fltr_list(hw, vsi_handle, vlan_head,
+					  &vsi_list_head);
+	mutex_unlock(vlan_lock);
+	if (status)
+		goto free_fltr_list;
+
+	list_for_each_entry(list_itr, &vsi_list_head, list_entry) {
+		/* Avoid enabling or disabling VLAN zero twice when in double
+		 * VLAN mode
+		 */
+		if (ice_is_dvm_ena(hw) &&
+		    list_itr->fltr_info.l_data.vlan.tpid == 0)
+			continue;
+
+		vlan_id = list_itr->fltr_info.l_data.vlan.vlan_id;
+		if (rm_vlan_promisc)
+			status = ice_clear_vsi_promisc(hw, vsi_handle,
+						       promisc_mask, vlan_id);
+		else
+			status = ice_set_vsi_promisc(hw, vsi_handle,
+						     promisc_mask, vlan_id);
+		if (status && status != -EEXIST)
+			break;
+	}
+
+free_fltr_list:
+	list_for_each_entry_safe(list_itr, tmp, &vsi_list_head, list_entry) {
+		list_del(&list_itr->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), list_itr);
+	}
+	return status;
+}
+
+/**
+ * ice_remove_vsi_lkup_fltr - Remove lookup type filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ * @lkup: switch rule filter lookup type
+ */
+static void
+ice_remove_vsi_lkup_fltr(struct ice_hw *hw, u16 vsi_handle,
+			 enum ice_sw_lkup_type lkup)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	struct ice_fltr_list_entry *fm_entry;
+	struct list_head remove_list_head;
+	struct list_head *rule_head;
+	struct ice_fltr_list_entry *tmp;
+	struct mutex *rule_lock;	/* Lock to protect filter rule list */
+	int status;
+
+	INIT_LIST_HEAD(&remove_list_head);
+	rule_lock = &sw->recp_list[lkup].filt_rule_lock;
+	rule_head = &sw->recp_list[lkup].filt_rules;
+	mutex_lock(rule_lock);
+	status = ice_add_to_vsi_fltr_list(hw, vsi_handle, rule_head,
+					  &remove_list_head);
+	mutex_unlock(rule_lock);
+	if (status)
+		goto free_fltr_list;
+
+	switch (lkup) {
+	case ICE_SW_LKUP_MAC:
+		ice_remove_mac(hw, &remove_list_head);
+		break;
+	case ICE_SW_LKUP_VLAN:
+		ice_remove_vlan(hw, &remove_list_head);
+		break;
+	case ICE_SW_LKUP_PROMISC:
+	case ICE_SW_LKUP_PROMISC_VLAN:
+		ice_remove_promisc(hw, lkup, &remove_list_head);
+		break;
+	case ICE_SW_LKUP_MAC_VLAN:
+	case ICE_SW_LKUP_ETHERTYPE:
+	case ICE_SW_LKUP_ETHERTYPE_MAC:
+	case ICE_SW_LKUP_DFLT:
+	case ICE_SW_LKUP_LAST:
+	default:
+		ice_debug(hw, ICE_DBG_SW, "Unsupported lookup type %d\n", lkup);
+		break;
+	}
+
+free_fltr_list:
+	list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
+		list_del(&fm_entry->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fm_entry);
+	}
+}
+
+/**
+ * ice_remove_vsi_fltr - Remove all filters for a VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle to remove filters from
+ */
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle)
+{
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_MAC_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_VLAN);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_DFLT);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_ETHERTYPE_MAC);
+	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC_VLAN);
+}
+
+/**
+ * ice_alloc_res_cntr - allocating resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries requested for FD resource type
+ * @counter_id: counter index returned by AQ call
+ */
+int
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		   u16 *counter_id)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	u16 buf_len;
+	int status;
+
+	/* Allocate resource */
+	buf_len = struct_size(buf, elem, 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->num_elems = cpu_to_le16(num_items);
+	buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+				      ICE_AQC_RES_TYPE_M) | alloc_shared);
+
+	status = ice_aq_alloc_free_res(hw, buf, buf_len, ice_aqc_opc_alloc_res);
+	if (status)
+		goto exit;
+
+	*counter_id = le16_to_cpu(buf->elem[0].e.sw_resp);
+
+exit:
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_free_res_cntr - free resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries to be freed for FD resource type
+ * @counter_id: counter ID resource which needs to be freed
+ */
+int
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		  u16 counter_id)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	u16 buf_len;
+	int status;
+
+	/* Free resource */
+	buf_len = struct_size(buf, elem, 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->num_elems = cpu_to_le16(num_items);
+	buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+				      ICE_AQC_RES_TYPE_M) | alloc_shared);
+	buf->elem[0].e.sw_resp = cpu_to_le16(counter_id);
+
+	status = ice_aq_alloc_free_res(hw, buf, buf_len, ice_aqc_opc_free_res);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW, "counter resource could not be freed\n");
+
+	kfree(buf);
+	return status;
+}
+
+#define ICE_PROTOCOL_ENTRY(id, ...) {		\
+	.prot_type	= id,			\
+	.offs		= {__VA_ARGS__},	\
+}
+
+/**
+ * ice_share_res - set a resource as shared or dedicated
+ * @hw: hw struct of original owner of resource
+ * @type: resource type
+ * @shared: is the resource being set to shared
+ * @res_id: resource id (descriptor)
+ */
+int ice_share_res(struct ice_hw *hw, u16 type, u8 shared, u16 res_id)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	u16 buf_len;
+	int status;
+
+	buf_len = struct_size(buf, elem, 1);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->num_elems = cpu_to_le16(1);
+	if (shared)
+		buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+					     ICE_AQC_RES_TYPE_M) |
+					    ICE_AQC_RES_TYPE_FLAG_SHARED);
+	else
+		buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+					     ICE_AQC_RES_TYPE_M) &
+					    ~ICE_AQC_RES_TYPE_FLAG_SHARED);
+
+	buf->elem[0].e.sw_resp = cpu_to_le16(res_id);
+	status = ice_aq_alloc_free_res(hw, buf, buf_len,
+				       ice_aqc_opc_share_res);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW, "Could not set resource type %u id %u to %s\n",
+			  type, res_id, shared ? "SHARED" : "DEDICATED");
+
+	kfree(buf);
+	return status;
+}
+
+/* This is mapping table entry that maps every word within a given protocol
+ * structure to the real byte offset as per the specification of that
+ * protocol header.
+ * for example dst address is 3 words in ethertype header and corresponding
+ * bytes are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8
+ * IMPORTANT: Every structure part of "ice_prot_hdr" union should have a
+ * matching entry describing its field. This needs to be updated if new
+ * structure is added to that union.
+ */
+static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = {
+	ICE_PROTOCOL_ENTRY(ICE_MAC_OFOS, 0, 2, 4, 6, 8, 10, 12),
+	ICE_PROTOCOL_ENTRY(ICE_MAC_IL, 0, 2, 4, 6, 8, 10, 12),
+	ICE_PROTOCOL_ENTRY(ICE_ETYPE_OL, 0),
+	ICE_PROTOCOL_ENTRY(ICE_ETYPE_IL, 0),
+	ICE_PROTOCOL_ENTRY(ICE_VLAN_OFOS, 2, 0),
+	ICE_PROTOCOL_ENTRY(ICE_IPV4_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
+	ICE_PROTOCOL_ENTRY(ICE_IPV4_IL,	0, 2, 4, 6, 8, 10, 12, 14, 16, 18),
+	ICE_PROTOCOL_ENTRY(ICE_IPV6_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18,
+			   20, 22, 24, 26, 28, 30, 32, 34, 36, 38),
+	ICE_PROTOCOL_ENTRY(ICE_IPV6_IL, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20,
+			   22, 24, 26, 28, 30, 32, 34, 36, 38),
+	ICE_PROTOCOL_ENTRY(ICE_TCP_IL, 0, 2),
+	ICE_PROTOCOL_ENTRY(ICE_UDP_OF, 0, 2),
+	ICE_PROTOCOL_ENTRY(ICE_UDP_ILOS, 0, 2),
+	ICE_PROTOCOL_ENTRY(ICE_VXLAN, 8, 10, 12, 14),
+	ICE_PROTOCOL_ENTRY(ICE_GENEVE, 8, 10, 12, 14),
+	ICE_PROTOCOL_ENTRY(ICE_NVGRE, 0, 2, 4, 6),
+	ICE_PROTOCOL_ENTRY(ICE_GTP, 8, 10, 12, 14, 16, 18, 20, 22),
+	ICE_PROTOCOL_ENTRY(ICE_GTP_NO_PAY, 8, 10, 12, 14),
+	ICE_PROTOCOL_ENTRY(ICE_PPPOE, 0, 2, 4, 6),
+	ICE_PROTOCOL_ENTRY(ICE_L2TPV3, 0, 2, 4, 6, 8, 10),
+	ICE_PROTOCOL_ENTRY(ICE_VLAN_EX, 2, 0),
+	ICE_PROTOCOL_ENTRY(ICE_VLAN_IN, 2, 0),
+	ICE_PROTOCOL_ENTRY(ICE_HW_METADATA,
+			   ICE_SOURCE_PORT_MDID_OFFSET,
+			   ICE_PTYPE_MDID_OFFSET,
+			   ICE_PACKET_LENGTH_MDID_OFFSET,
+			   ICE_SOURCE_VSI_MDID_OFFSET,
+			   ICE_PKT_VLAN_MDID_OFFSET,
+			   ICE_PKT_TUNNEL_MDID_OFFSET,
+			   ICE_PKT_TCP_MDID_OFFSET,
+			   ICE_PKT_ERROR_MDID_OFFSET),
+};
+
+static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = {
+	{ ICE_MAC_OFOS,		ICE_MAC_OFOS_HW },
+	{ ICE_MAC_IL,		ICE_MAC_IL_HW },
+	{ ICE_ETYPE_OL,		ICE_ETYPE_OL_HW },
+	{ ICE_ETYPE_IL,		ICE_ETYPE_IL_HW },
+	{ ICE_VLAN_OFOS,	ICE_VLAN_OL_HW },
+	{ ICE_IPV4_OFOS,	ICE_IPV4_OFOS_HW },
+	{ ICE_IPV4_IL,		ICE_IPV4_IL_HW },
+	{ ICE_IPV6_OFOS,	ICE_IPV6_OFOS_HW },
+	{ ICE_IPV6_IL,		ICE_IPV6_IL_HW },
+	{ ICE_TCP_IL,		ICE_TCP_IL_HW },
+	{ ICE_UDP_OF,		ICE_UDP_OF_HW },
+	{ ICE_UDP_ILOS,		ICE_UDP_ILOS_HW },
+	{ ICE_VXLAN,		ICE_UDP_OF_HW },
+	{ ICE_GENEVE,		ICE_UDP_OF_HW },
+	{ ICE_NVGRE,		ICE_GRE_OF_HW },
+	{ ICE_GTP,		ICE_UDP_OF_HW },
+	{ ICE_GTP_NO_PAY,	ICE_UDP_ILOS_HW },
+	{ ICE_PPPOE,		ICE_PPPOE_HW },
+	{ ICE_L2TPV3,		ICE_L2TPV3_HW },
+	{ ICE_VLAN_EX,          ICE_VLAN_OF_HW },
+	{ ICE_VLAN_IN,          ICE_VLAN_OL_HW },
+	{ ICE_HW_METADATA,      ICE_META_DATA_ID_HW },
+};
+
+/**
+ * ice_find_recp - find a recipe
+ * @hw: pointer to the hardware structure
+ * @lkup_exts: extension sequence to match
+ * @rinfo: information regarding the rule e.g. priority and action info
+ *
+ * Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found.
+ */
+static u16
+ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts,
+	      const struct ice_adv_rule_info *rinfo)
+{
+	bool refresh_required = true;
+	struct ice_sw_recipe *recp;
+	u8 i;
+
+	/* Walk through existing recipes to find a match */
+	recp = hw->switch_info->recp_list;
+	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+		/* If recipe was not created for this ID, in SW bookkeeping,
+		 * check if FW has an entry for this recipe. If the FW has an
+		 * entry update it in our SW bookkeeping and continue with the
+		 * matching.
+		 */
+		if (!recp[i].recp_created)
+			if (ice_get_recp_frm_fw(hw,
+						hw->switch_info->recp_list, i,
+						&refresh_required))
+				continue;
+
+		/* Skip inverse action recipes */
+		if (recp[i].root_buf && recp[i].root_buf->content.act_ctrl &
+		    ICE_AQ_RECIPE_ACT_INV_ACT)
+			continue;
+
+		/* if number of words we are looking for match */
+		if (lkup_exts->n_val_words == recp[i].lkup_exts.n_val_words) {
+			struct ice_fv_word *ar = recp[i].lkup_exts.fv_words;
+			struct ice_fv_word *be = lkup_exts->fv_words;
+			u16 *cr = recp[i].lkup_exts.field_mask;
+			u16 *de = lkup_exts->field_mask;
+			bool found = true;
+			u8 pe, qr;
+
+			/* ar, cr, and qr are related to the recipe words, while
+			 * be, de, and pe are related to the lookup words
+			 */
+			for (pe = 0; pe < lkup_exts->n_val_words; pe++) {
+				for (qr = 0; qr < recp[i].lkup_exts.n_val_words;
+				     qr++) {
+					if (ar[qr].off == be[pe].off &&
+					    ar[qr].prot_id == be[pe].prot_id &&
+					    cr[qr] == de[pe])
+						/* Found the "pe"th word in the
+						 * given recipe
+						 */
+						break;
+				}
+				/* After walking through all the words in the
+				 * "i"th recipe if "p"th word was not found then
+				 * this recipe is not what we are looking for.
+				 * So break out from this loop and try the next
+				 * recipe
+				 */
+				if (qr >= recp[i].lkup_exts.n_val_words) {
+					found = false;
+					break;
+				}
+			}
+			/* If for "i"th recipe the found was never set to false
+			 * then it means we found our match
+			 * Also tun type and *_pass_l2 of recipe needs to be
+			 * checked
+			 */
+			if (found && recp[i].tun_type == rinfo->tun_type &&
+			    recp[i].need_pass_l2 == rinfo->need_pass_l2 &&
+			    recp[i].allow_pass_l2 == rinfo->allow_pass_l2)
+				return i; /* Return the recipe ID */
+		}
+	}
+	return ICE_MAX_NUM_RECIPES;
+}
+
+/**
+ * ice_change_proto_id_to_dvm - change proto id in prot_id_tbl
+ *
+ * As protocol id for outer vlan is different in dvm and svm, if dvm is
+ * supported protocol array record for outer vlan has to be modified to
+ * reflect the value proper for DVM.
+ */
+void ice_change_proto_id_to_dvm(void)
+{
+	u8 i;
+
+	for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++)
+		if (ice_prot_id_tbl[i].type == ICE_VLAN_OFOS &&
+		    ice_prot_id_tbl[i].protocol_id != ICE_VLAN_OF_HW)
+			ice_prot_id_tbl[i].protocol_id = ICE_VLAN_OF_HW;
+}
+
+/**
+ * ice_prot_type_to_id - get protocol ID from protocol type
+ * @type: protocol type
+ * @id: pointer to variable that will receive the ID
+ *
+ * Returns true if found, false otherwise
+ */
+static bool ice_prot_type_to_id(enum ice_protocol_type type, u8 *id)
+{
+	u8 i;
+
+	for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++)
+		if (ice_prot_id_tbl[i].type == type) {
+			*id = ice_prot_id_tbl[i].protocol_id;
+			return true;
+		}
+	return false;
+}
+
+/**
+ * ice_fill_valid_words - count valid words
+ * @rule: advanced rule with lookup information
+ * @lkup_exts: byte offset extractions of the words that are valid
+ *
+ * calculate valid words in a lookup rule using mask value
+ */
+static u8
+ice_fill_valid_words(struct ice_adv_lkup_elem *rule,
+		     struct ice_prot_lkup_ext *lkup_exts)
+{
+	u8 j, word, prot_id, ret_val;
+
+	if (!ice_prot_type_to_id(rule->type, &prot_id))
+		return 0;
+
+	word = lkup_exts->n_val_words;
+
+	for (j = 0; j < sizeof(rule->m_u) / sizeof(u16); j++)
+		if (((u16 *)&rule->m_u)[j] &&
+		    rule->type < ARRAY_SIZE(ice_prot_ext)) {
+			/* No more space to accommodate */
+			if (word >= ICE_MAX_CHAIN_WORDS)
+				return 0;
+			lkup_exts->fv_words[word].off =
+				ice_prot_ext[rule->type].offs[j];
+			lkup_exts->fv_words[word].prot_id =
+				ice_prot_id_tbl[rule->type].protocol_id;
+			lkup_exts->field_mask[word] =
+				be16_to_cpu(((__force __be16 *)&rule->m_u)[j]);
+			word++;
+		}
+
+	ret_val = word - lkup_exts->n_val_words;
+	lkup_exts->n_val_words = word;
+
+	return ret_val;
+}
+
+/**
+ * ice_create_first_fit_recp_def - Create a recipe grouping
+ * @hw: pointer to the hardware structure
+ * @lkup_exts: an array of protocol header extractions
+ * @rg_list: pointer to a list that stores new recipe groups
+ * @recp_cnt: pointer to a variable that stores returned number of recipe groups
+ *
+ * Using first fit algorithm, take all the words that are still not done
+ * and start grouping them in 4-word groups. Each group makes up one
+ * recipe.
+ */
+static int
+ice_create_first_fit_recp_def(struct ice_hw *hw,
+			      struct ice_prot_lkup_ext *lkup_exts,
+			      struct list_head *rg_list,
+			      u8 *recp_cnt)
+{
+	struct ice_pref_recipe_group *grp = NULL;
+	u8 j;
+
+	*recp_cnt = 0;
+
+	/* Walk through every word in the rule to check if it is not done. If so
+	 * then this word needs to be part of a new recipe.
+	 */
+	for (j = 0; j < lkup_exts->n_val_words; j++)
+		if (!test_bit(j, lkup_exts->done)) {
+			if (!grp ||
+			    grp->n_val_pairs == ICE_NUM_WORDS_RECIPE) {
+				struct ice_recp_grp_entry *entry;
+
+				entry = devm_kzalloc(ice_hw_to_dev(hw),
+						     sizeof(*entry),
+						     GFP_KERNEL);
+				if (!entry)
+					return -ENOMEM;
+				list_add(&entry->l_entry, rg_list);
+				grp = &entry->r_group;
+				(*recp_cnt)++;
+			}
+
+			grp->pairs[grp->n_val_pairs].prot_id =
+				lkup_exts->fv_words[j].prot_id;
+			grp->pairs[grp->n_val_pairs].off =
+				lkup_exts->fv_words[j].off;
+			grp->mask[grp->n_val_pairs] = lkup_exts->field_mask[j];
+			grp->n_val_pairs++;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_fill_fv_word_index - fill in the field vector indices for a recipe group
+ * @hw: pointer to the hardware structure
+ * @fv_list: field vector with the extraction sequence information
+ * @rg_list: recipe groupings with protocol-offset pairs
+ *
+ * Helper function to fill in the field vector indices for protocol-offset
+ * pairs. These indexes are then ultimately programmed into a recipe.
+ */
+static int
+ice_fill_fv_word_index(struct ice_hw *hw, struct list_head *fv_list,
+		       struct list_head *rg_list)
+{
+	struct ice_sw_fv_list_entry *fv;
+	struct ice_recp_grp_entry *rg;
+	struct ice_fv_word *fv_ext;
+
+	if (list_empty(fv_list))
+		return 0;
+
+	fv = list_first_entry(fv_list, struct ice_sw_fv_list_entry,
+			      list_entry);
+	fv_ext = fv->fv_ptr->ew;
+
+	list_for_each_entry(rg, rg_list, l_entry) {
+		u8 i;
+
+		for (i = 0; i < rg->r_group.n_val_pairs; i++) {
+			struct ice_fv_word *pr;
+			bool found = false;
+			u16 mask;
+			u8 j;
+
+			pr = &rg->r_group.pairs[i];
+			mask = rg->r_group.mask[i];
+
+			for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++)
+				if (fv_ext[j].prot_id == pr->prot_id &&
+				    fv_ext[j].off == pr->off) {
+					found = true;
+
+					/* Store index of field vector */
+					rg->fv_idx[i] = j;
+					rg->fv_mask[i] = mask;
+					break;
+				}
+
+			/* Protocol/offset could not be found, caller gave an
+			 * invalid pair
+			 */
+			if (!found)
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_find_free_recp_res_idx - find free result indexes for recipe
+ * @hw: pointer to hardware structure
+ * @profiles: bitmap of profiles that will be associated with the new recipe
+ * @free_idx: pointer to variable to receive the free index bitmap
+ *
+ * The algorithm used here is:
+ *	1. When creating a new recipe, create a set P which contains all
+ *	   Profiles that will be associated with our new recipe
+ *
+ *	2. For each Profile p in set P:
+ *	    a. Add all recipes associated with Profile p into set R
+ *	    b. Optional : PossibleIndexes &= profile[p].possibleIndexes
+ *		[initially PossibleIndexes should be 0xFFFFFFFFFFFFFFFF]
+ *		i. Or just assume they all have the same possible indexes:
+ *			44, 45, 46, 47
+ *			i.e., PossibleIndexes = 0x0000F00000000000
+ *
+ *	3. For each Recipe r in set R:
+ *	    a. UsedIndexes |= (bitwise or ) recipe[r].res_indexes
+ *	    b. FreeIndexes = UsedIndexes ^ PossibleIndexes
+ *
+ *	FreeIndexes will contain the bits indicating the indexes free for use,
+ *      then the code needs to update the recipe[r].used_result_idx_bits to
+ *      indicate which indexes were selected for use by this recipe.
+ */
+static u16
+ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles,
+			   unsigned long *free_idx)
+{
+	DECLARE_BITMAP(possible_idx, ICE_MAX_FV_WORDS);
+	DECLARE_BITMAP(recipes, ICE_MAX_NUM_RECIPES);
+	DECLARE_BITMAP(used_idx, ICE_MAX_FV_WORDS);
+	u16 bit;
+
+	bitmap_zero(recipes, ICE_MAX_NUM_RECIPES);
+	bitmap_zero(used_idx, ICE_MAX_FV_WORDS);
+
+	bitmap_fill(possible_idx, ICE_MAX_FV_WORDS);
+
+	/* For each profile we are going to associate the recipe with, add the
+	 * recipes that are associated with that profile. This will give us
+	 * the set of recipes that our recipe may collide with. Also, determine
+	 * what possible result indexes are usable given this set of profiles.
+	 */
+	for_each_set_bit(bit, profiles, ICE_MAX_NUM_PROFILES) {
+		bitmap_or(recipes, recipes, profile_to_recipe[bit],
+			  ICE_MAX_NUM_RECIPES);
+		bitmap_and(possible_idx, possible_idx,
+			   hw->switch_info->prof_res_bm[bit],
+			   ICE_MAX_FV_WORDS);
+	}
+
+	/* For each recipe that our new recipe may collide with, determine
+	 * which indexes have been used.
+	 */
+	for_each_set_bit(bit, recipes, ICE_MAX_NUM_RECIPES)
+		bitmap_or(used_idx, used_idx,
+			  hw->switch_info->recp_list[bit].res_idxs,
+			  ICE_MAX_FV_WORDS);
+
+	bitmap_xor(free_idx, used_idx, possible_idx, ICE_MAX_FV_WORDS);
+
+	/* return number of free indexes */
+	return (u16)bitmap_weight(free_idx, ICE_MAX_FV_WORDS);
+}
+
+/**
+ * ice_add_sw_recipe - function to call AQ calls to create switch recipe
+ * @hw: pointer to hardware structure
+ * @rm: recipe management list entry
+ * @profiles: bitmap of profiles that will be associated.
+ */
+static int
+ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm,
+		  unsigned long *profiles)
+{
+	DECLARE_BITMAP(result_idx_bm, ICE_MAX_FV_WORDS);
+	struct ice_aqc_recipe_content *content;
+	struct ice_aqc_recipe_data_elem *tmp;
+	struct ice_aqc_recipe_data_elem *buf;
+	struct ice_recp_grp_entry *entry;
+	u16 free_res_idx;
+	u16 recipe_count;
+	u8 chain_idx;
+	u8 recps = 0;
+	int status;
+
+	/* When more than one recipe are required, another recipe is needed to
+	 * chain them together. Matching a tunnel metadata ID takes up one of
+	 * the match fields in the chaining recipe reducing the number of
+	 * chained recipes by one.
+	 */
+	 /* check number of free result indices */
+	bitmap_zero(result_idx_bm, ICE_MAX_FV_WORDS);
+	free_res_idx = ice_find_free_recp_res_idx(hw, profiles, result_idx_bm);
+
+	ice_debug(hw, ICE_DBG_SW, "Result idx slots: %d, need %d\n",
+		  free_res_idx, rm->n_grp_count);
+
+	if (rm->n_grp_count > 1) {
+		if (rm->n_grp_count > free_res_idx)
+			return -ENOSPC;
+
+		rm->n_grp_count++;
+	}
+
+	if (rm->n_grp_count > ICE_MAX_CHAIN_RECIPE)
+		return -ENOSPC;
+
+	tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	buf = devm_kcalloc(ice_hw_to_dev(hw), rm->n_grp_count, sizeof(*buf),
+			   GFP_KERNEL);
+	if (!buf) {
+		status = -ENOMEM;
+		goto err_mem;
+	}
+
+	bitmap_zero(rm->r_bitmap, ICE_MAX_NUM_RECIPES);
+	recipe_count = ICE_MAX_NUM_RECIPES;
+	status = ice_aq_get_recipe(hw, tmp, &recipe_count, ICE_SW_LKUP_MAC,
+				   NULL);
+	if (status || recipe_count == 0)
+		goto err_unroll;
+
+	/* Allocate the recipe resources, and configure them according to the
+	 * match fields from protocol headers and extracted field vectors.
+	 */
+	chain_idx = find_first_bit(result_idx_bm, ICE_MAX_FV_WORDS);
+	list_for_each_entry(entry, &rm->rg_list, l_entry) {
+		u8 i;
+
+		status = ice_alloc_recipe(hw, &entry->rid);
+		if (status)
+			goto err_unroll;
+
+		content = &buf[recps].content;
+
+		/* Clear the result index of the located recipe, as this will be
+		 * updated, if needed, later in the recipe creation process.
+		 */
+		tmp[0].content.result_indx = 0;
+
+		buf[recps] = tmp[0];
+		buf[recps].recipe_indx = (u8)entry->rid;
+		/* if the recipe is a non-root recipe RID should be programmed
+		 * as 0 for the rules to be applied correctly.
+		 */
+		content->rid = 0;
+		memset(&content->lkup_indx, 0,
+		       sizeof(content->lkup_indx));
+
+		/* All recipes use look-up index 0 to match switch ID. */
+		content->lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
+		content->mask[0] = cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
+		/* Setup lkup_indx 1..4 to INVALID/ignore and set the mask
+		 * to be 0
+		 */
+		for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) {
+			content->lkup_indx[i] = 0x80;
+			content->mask[i] = 0;
+		}
+
+		for (i = 0; i < entry->r_group.n_val_pairs; i++) {
+			content->lkup_indx[i + 1] = entry->fv_idx[i];
+			content->mask[i + 1] = cpu_to_le16(entry->fv_mask[i]);
+		}
+
+		if (rm->n_grp_count > 1) {
+			/* Checks to see if there really is a valid result index
+			 * that can be used.
+			 */
+			if (chain_idx >= ICE_MAX_FV_WORDS) {
+				ice_debug(hw, ICE_DBG_SW, "No chain index available\n");
+				status = -ENOSPC;
+				goto err_unroll;
+			}
+
+			entry->chain_idx = chain_idx;
+			content->result_indx =
+				ICE_AQ_RECIPE_RESULT_EN |
+				((chain_idx << ICE_AQ_RECIPE_RESULT_DATA_S) &
+				 ICE_AQ_RECIPE_RESULT_DATA_M);
+			clear_bit(chain_idx, result_idx_bm);
+			chain_idx = find_first_bit(result_idx_bm,
+						   ICE_MAX_FV_WORDS);
+		}
+
+		/* fill recipe dependencies */
+		bitmap_zero((unsigned long *)buf[recps].recipe_bitmap,
+			    ICE_MAX_NUM_RECIPES);
+		set_bit(buf[recps].recipe_indx,
+			(unsigned long *)buf[recps].recipe_bitmap);
+		content->act_ctrl_fwd_priority = rm->priority;
+
+		if (rm->need_pass_l2)
+			content->act_ctrl |= ICE_AQ_RECIPE_ACT_NEED_PASS_L2;
+
+		if (rm->allow_pass_l2)
+			content->act_ctrl |= ICE_AQ_RECIPE_ACT_ALLOW_PASS_L2;
+		recps++;
+	}
+
+	if (rm->n_grp_count == 1) {
+		rm->root_rid = buf[0].recipe_indx;
+		set_bit(buf[0].recipe_indx, rm->r_bitmap);
+		buf[0].content.rid = rm->root_rid | ICE_AQ_RECIPE_ID_IS_ROOT;
+		if (sizeof(buf[0].recipe_bitmap) >= sizeof(rm->r_bitmap)) {
+			memcpy(buf[0].recipe_bitmap, rm->r_bitmap,
+			       sizeof(buf[0].recipe_bitmap));
+		} else {
+			status = -EINVAL;
+			goto err_unroll;
+		}
+		/* Applicable only for ROOT_RECIPE, set the fwd_priority for
+		 * the recipe which is getting created if specified
+		 * by user. Usually any advanced switch filter, which results
+		 * into new extraction sequence, ended up creating a new recipe
+		 * of type ROOT and usually recipes are associated with profiles
+		 * Switch rule referreing newly created recipe, needs to have
+		 * either/or 'fwd' or 'join' priority, otherwise switch rule
+		 * evaluation will not happen correctly. In other words, if
+		 * switch rule to be evaluated on priority basis, then recipe
+		 * needs to have priority, otherwise it will be evaluated last.
+		 */
+		buf[0].content.act_ctrl_fwd_priority = rm->priority;
+	} else {
+		struct ice_recp_grp_entry *last_chain_entry;
+		u16 rid, i;
+
+		/* Allocate the last recipe that will chain the outcomes of the
+		 * other recipes together
+		 */
+		status = ice_alloc_recipe(hw, &rid);
+		if (status)
+			goto err_unroll;
+
+		content = &buf[recps].content;
+
+		buf[recps].recipe_indx = (u8)rid;
+		content->rid = (u8)rid;
+		content->rid |= ICE_AQ_RECIPE_ID_IS_ROOT;
+		/* the new entry created should also be part of rg_list to
+		 * make sure we have complete recipe
+		 */
+		last_chain_entry = devm_kzalloc(ice_hw_to_dev(hw),
+						sizeof(*last_chain_entry),
+						GFP_KERNEL);
+		if (!last_chain_entry) {
+			status = -ENOMEM;
+			goto err_unroll;
+		}
+		last_chain_entry->rid = rid;
+		memset(&content->lkup_indx, 0, sizeof(content->lkup_indx));
+		/* All recipes use look-up index 0 to match switch ID. */
+		content->lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX;
+		content->mask[0] = cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK);
+		for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) {
+			content->lkup_indx[i] = ICE_AQ_RECIPE_LKUP_IGNORE;
+			content->mask[i] = 0;
+		}
+
+		i = 1;
+		/* update r_bitmap with the recp that is used for chaining */
+		set_bit(rid, rm->r_bitmap);
+		/* this is the recipe that chains all the other recipes so it
+		 * should not have a chaining ID to indicate the same
+		 */
+		last_chain_entry->chain_idx = ICE_INVAL_CHAIN_IND;
+		list_for_each_entry(entry, &rm->rg_list, l_entry) {
+			last_chain_entry->fv_idx[i] = entry->chain_idx;
+			content->lkup_indx[i] = entry->chain_idx;
+			content->mask[i++] = cpu_to_le16(0xFFFF);
+			set_bit(entry->rid, rm->r_bitmap);
+		}
+		list_add(&last_chain_entry->l_entry, &rm->rg_list);
+		if (sizeof(buf[recps].recipe_bitmap) >=
+		    sizeof(rm->r_bitmap)) {
+			memcpy(buf[recps].recipe_bitmap, rm->r_bitmap,
+			       sizeof(buf[recps].recipe_bitmap));
+		} else {
+			status = -EINVAL;
+			goto err_unroll;
+		}
+		content->act_ctrl_fwd_priority = rm->priority;
+
+		recps++;
+		rm->root_rid = (u8)rid;
+	}
+	status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+	if (status)
+		goto err_unroll;
+
+	status = ice_aq_add_recipe(hw, buf, rm->n_grp_count, NULL);
+	ice_release_change_lock(hw);
+	if (status)
+		goto err_unroll;
+
+	/* Every recipe that just got created add it to the recipe
+	 * book keeping list
+	 */
+	list_for_each_entry(entry, &rm->rg_list, l_entry) {
+		struct ice_switch_info *sw = hw->switch_info;
+		bool is_root, idx_found = false;
+		struct ice_sw_recipe *recp;
+		u16 idx, buf_idx = 0;
+
+		/* find buffer index for copying some data */
+		for (idx = 0; idx < rm->n_grp_count; idx++)
+			if (buf[idx].recipe_indx == entry->rid) {
+				buf_idx = idx;
+				idx_found = true;
+			}
+
+		if (!idx_found) {
+			status = -EIO;
+			goto err_unroll;
+		}
+
+		recp = &sw->recp_list[entry->rid];
+		is_root = (rm->root_rid == entry->rid);
+		recp->is_root = is_root;
+
+		recp->root_rid = entry->rid;
+		recp->big_recp = (is_root && rm->n_grp_count > 1);
+
+		memcpy(&recp->ext_words, entry->r_group.pairs,
+		       entry->r_group.n_val_pairs * sizeof(struct ice_fv_word));
+
+		memcpy(recp->r_bitmap, buf[buf_idx].recipe_bitmap,
+		       sizeof(recp->r_bitmap));
+
+		/* Copy non-result fv index values and masks to recipe. This
+		 * call will also update the result recipe bitmask.
+		 */
+		ice_collect_result_idx(&buf[buf_idx], recp);
+
+		/* for non-root recipes, also copy to the root, this allows
+		 * easier matching of a complete chained recipe
+		 */
+		if (!is_root)
+			ice_collect_result_idx(&buf[buf_idx],
+					       &sw->recp_list[rm->root_rid]);
+
+		recp->n_ext_words = entry->r_group.n_val_pairs;
+		recp->chain_idx = entry->chain_idx;
+		recp->priority = buf[buf_idx].content.act_ctrl_fwd_priority;
+		recp->n_grp_count = rm->n_grp_count;
+		recp->tun_type = rm->tun_type;
+		recp->need_pass_l2 = rm->need_pass_l2;
+		recp->allow_pass_l2 = rm->allow_pass_l2;
+		recp->recp_created = true;
+	}
+	rm->root_buf = buf;
+	kfree(tmp);
+	return status;
+
+err_unroll:
+err_mem:
+	kfree(tmp);
+	devm_kfree(ice_hw_to_dev(hw), buf);
+	return status;
+}
+
+/**
+ * ice_create_recipe_group - creates recipe group
+ * @hw: pointer to hardware structure
+ * @rm: recipe management list entry
+ * @lkup_exts: lookup elements
+ */
+static int
+ice_create_recipe_group(struct ice_hw *hw, struct ice_sw_recipe *rm,
+			struct ice_prot_lkup_ext *lkup_exts)
+{
+	u8 recp_count = 0;
+	int status;
+
+	rm->n_grp_count = 0;
+
+	/* Create recipes for words that are marked not done by packing them
+	 * as best fit.
+	 */
+	status = ice_create_first_fit_recp_def(hw, lkup_exts,
+					       &rm->rg_list, &recp_count);
+	if (!status) {
+		rm->n_grp_count += recp_count;
+		rm->n_ext_words = lkup_exts->n_val_words;
+		memcpy(&rm->ext_words, lkup_exts->fv_words,
+		       sizeof(rm->ext_words));
+		memcpy(rm->word_masks, lkup_exts->field_mask,
+		       sizeof(rm->word_masks));
+	}
+
+	return status;
+}
+
+/* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule
+ * @hw: pointer to hardware structure
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ * @bm: pointer to memory for returning the bitmap of field vectors
+ */
+static void
+ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo,
+			 unsigned long *bm)
+{
+	enum ice_prof_type prof_type;
+
+	bitmap_zero(bm, ICE_MAX_NUM_PROFILES);
+
+	switch (rinfo->tun_type) {
+	case ICE_NON_TUN:
+		prof_type = ICE_PROF_NON_TUN;
+		break;
+	case ICE_ALL_TUNNELS:
+		prof_type = ICE_PROF_TUN_ALL;
+		break;
+	case ICE_SW_TUN_GENEVE:
+	case ICE_SW_TUN_VXLAN:
+		prof_type = ICE_PROF_TUN_UDP;
+		break;
+	case ICE_SW_TUN_NVGRE:
+		prof_type = ICE_PROF_TUN_GRE;
+		break;
+	case ICE_SW_TUN_GTPU:
+		prof_type = ICE_PROF_TUN_GTPU;
+		break;
+	case ICE_SW_TUN_GTPC:
+		prof_type = ICE_PROF_TUN_GTPC;
+		break;
+	case ICE_SW_TUN_AND_NON_TUN:
+	default:
+		prof_type = ICE_PROF_ALL;
+		break;
+	}
+
+	ice_get_sw_fv_bitmap(hw, prof_type, bm);
+}
+
+/**
+ * ice_add_adv_recipe - Add an advanced recipe that is not part of the default
+ * @hw: pointer to hardware structure
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ *  structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ * @rid: return the recipe ID of the recipe created
+ */
+static int
+ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+		   u16 lkups_cnt, struct ice_adv_rule_info *rinfo, u16 *rid)
+{
+	DECLARE_BITMAP(fv_bitmap, ICE_MAX_NUM_PROFILES);
+	DECLARE_BITMAP(profiles, ICE_MAX_NUM_PROFILES);
+	struct ice_prot_lkup_ext *lkup_exts;
+	struct ice_recp_grp_entry *r_entry;
+	struct ice_sw_fv_list_entry *fvit;
+	struct ice_recp_grp_entry *r_tmp;
+	struct ice_sw_fv_list_entry *tmp;
+	struct ice_sw_recipe *rm;
+	int status = 0;
+	u8 i;
+
+	if (!lkups_cnt)
+		return -EINVAL;
+
+	lkup_exts = kzalloc(sizeof(*lkup_exts), GFP_KERNEL);
+	if (!lkup_exts)
+		return -ENOMEM;
+
+	/* Determine the number of words to be matched and if it exceeds a
+	 * recipe's restrictions
+	 */
+	for (i = 0; i < lkups_cnt; i++) {
+		u16 count;
+
+		if (lkups[i].type >= ICE_PROTOCOL_LAST) {
+			status = -EIO;
+			goto err_free_lkup_exts;
+		}
+
+		count = ice_fill_valid_words(&lkups[i], lkup_exts);
+		if (!count) {
+			status = -EIO;
+			goto err_free_lkup_exts;
+		}
+	}
+
+	rm = kzalloc(sizeof(*rm), GFP_KERNEL);
+	if (!rm) {
+		status = -ENOMEM;
+		goto err_free_lkup_exts;
+	}
+
+	/* Get field vectors that contain fields extracted from all the protocol
+	 * headers being programmed.
+	 */
+	INIT_LIST_HEAD(&rm->fv_list);
+	INIT_LIST_HEAD(&rm->rg_list);
+
+	/* Get bitmap of field vectors (profiles) that are compatible with the
+	 * rule request; only these will be searched in the subsequent call to
+	 * ice_get_sw_fv_list.
+	 */
+	ice_get_compat_fv_bitmap(hw, rinfo, fv_bitmap);
+
+	status = ice_get_sw_fv_list(hw, lkup_exts, fv_bitmap, &rm->fv_list);
+	if (status)
+		goto err_unroll;
+
+	/* Group match words into recipes using preferred recipe grouping
+	 * criteria.
+	 */
+	status = ice_create_recipe_group(hw, rm, lkup_exts);
+	if (status)
+		goto err_unroll;
+
+	/* set the recipe priority if specified */
+	rm->priority = (u8)rinfo->priority;
+
+	rm->need_pass_l2 = rinfo->need_pass_l2;
+	rm->allow_pass_l2 = rinfo->allow_pass_l2;
+
+	/* Find offsets from the field vector. Pick the first one for all the
+	 * recipes.
+	 */
+	status = ice_fill_fv_word_index(hw, &rm->fv_list, &rm->rg_list);
+	if (status)
+		goto err_unroll;
+
+	/* get bitmap of all profiles the recipe will be associated with */
+	bitmap_zero(profiles, ICE_MAX_NUM_PROFILES);
+	list_for_each_entry(fvit, &rm->fv_list, list_entry) {
+		ice_debug(hw, ICE_DBG_SW, "profile: %d\n", fvit->profile_id);
+		set_bit((u16)fvit->profile_id, profiles);
+	}
+
+	/* Look for a recipe which matches our requested fv / mask list */
+	*rid = ice_find_recp(hw, lkup_exts, rinfo);
+	if (*rid < ICE_MAX_NUM_RECIPES)
+		/* Success if found a recipe that match the existing criteria */
+		goto err_unroll;
+
+	rm->tun_type = rinfo->tun_type;
+	/* Recipe we need does not exist, add a recipe */
+	status = ice_add_sw_recipe(hw, rm, profiles);
+	if (status)
+		goto err_unroll;
+
+	/* Associate all the recipes created with all the profiles in the
+	 * common field vector.
+	 */
+	list_for_each_entry(fvit, &rm->fv_list, list_entry) {
+		DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+		u16 j;
+
+		status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id,
+						      (u8 *)r_bitmap, NULL);
+		if (status)
+			goto err_unroll;
+
+		bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap,
+			  ICE_MAX_NUM_RECIPES);
+		status = ice_acquire_change_lock(hw, ICE_RES_WRITE);
+		if (status)
+			goto err_unroll;
+
+		status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id,
+						      (u8 *)r_bitmap,
+						      NULL);
+		ice_release_change_lock(hw);
+
+		if (status)
+			goto err_unroll;
+
+		/* Update profile to recipe bitmap array */
+		bitmap_copy(profile_to_recipe[fvit->profile_id], r_bitmap,
+			    ICE_MAX_NUM_RECIPES);
+
+		/* Update recipe to profile bitmap array */
+		for_each_set_bit(j, rm->r_bitmap, ICE_MAX_NUM_RECIPES)
+			set_bit((u16)fvit->profile_id, recipe_to_profile[j]);
+	}
+
+	*rid = rm->root_rid;
+	memcpy(&hw->switch_info->recp_list[*rid].lkup_exts, lkup_exts,
+	       sizeof(*lkup_exts));
+err_unroll:
+	list_for_each_entry_safe(r_entry, r_tmp, &rm->rg_list, l_entry) {
+		list_del(&r_entry->l_entry);
+		devm_kfree(ice_hw_to_dev(hw), r_entry);
+	}
+
+	list_for_each_entry_safe(fvit, tmp, &rm->fv_list, list_entry) {
+		list_del(&fvit->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), fvit);
+	}
+
+	devm_kfree(ice_hw_to_dev(hw), rm->root_buf);
+	kfree(rm);
+
+err_free_lkup_exts:
+	kfree(lkup_exts);
+
+	return status;
+}
+
+/**
+ * ice_dummy_packet_add_vlan - insert VLAN header to dummy pkt
+ *
+ * @dummy_pkt: dummy packet profile pattern to which VLAN tag(s) will be added
+ * @num_vlan: number of VLAN tags
+ */
+static struct ice_dummy_pkt_profile *
+ice_dummy_packet_add_vlan(const struct ice_dummy_pkt_profile *dummy_pkt,
+			  u32 num_vlan)
+{
+	struct ice_dummy_pkt_profile *profile;
+	struct ice_dummy_pkt_offsets *offsets;
+	u32 buf_len, off, etype_off, i;
+	u8 *pkt;
+
+	if (num_vlan < 1 || num_vlan > 2)
+		return ERR_PTR(-EINVAL);
+
+	off = num_vlan * VLAN_HLEN;
+
+	buf_len = array_size(num_vlan, sizeof(ice_dummy_vlan_packet_offsets)) +
+		  dummy_pkt->offsets_len;
+	offsets = kzalloc(buf_len, GFP_KERNEL);
+	if (!offsets)
+		return ERR_PTR(-ENOMEM);
+
+	offsets[0] = dummy_pkt->offsets[0];
+	if (num_vlan == 2) {
+		offsets[1] = ice_dummy_qinq_packet_offsets[0];
+		offsets[2] = ice_dummy_qinq_packet_offsets[1];
+	} else if (num_vlan == 1) {
+		offsets[1] = ice_dummy_vlan_packet_offsets[0];
+	}
+
+	for (i = 1; dummy_pkt->offsets[i].type != ICE_PROTOCOL_LAST; i++) {
+		offsets[i + num_vlan].type = dummy_pkt->offsets[i].type;
+		offsets[i + num_vlan].offset =
+			dummy_pkt->offsets[i].offset + off;
+	}
+	offsets[i + num_vlan] = dummy_pkt->offsets[i];
+
+	etype_off = dummy_pkt->offsets[1].offset;
+
+	buf_len = array_size(num_vlan, sizeof(ice_dummy_vlan_packet)) +
+		  dummy_pkt->pkt_len;
+	pkt = kzalloc(buf_len, GFP_KERNEL);
+	if (!pkt) {
+		kfree(offsets);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	memcpy(pkt, dummy_pkt->pkt, etype_off);
+	memcpy(pkt + etype_off,
+	       num_vlan == 2 ? ice_dummy_qinq_packet : ice_dummy_vlan_packet,
+	       off);
+	memcpy(pkt + etype_off + off, dummy_pkt->pkt + etype_off,
+	       dummy_pkt->pkt_len - etype_off);
+
+	profile = kzalloc(sizeof(*profile), GFP_KERNEL);
+	if (!profile) {
+		kfree(offsets);
+		kfree(pkt);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	profile->offsets = offsets;
+	profile->pkt = pkt;
+	profile->pkt_len = buf_len;
+	profile->match |= ICE_PKT_KMALLOC;
+
+	return profile;
+}
+
+/**
+ * ice_find_dummy_packet - find dummy packet
+ *
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ *	   structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @tun_type: tunnel type
+ *
+ * Returns the &ice_dummy_pkt_profile corresponding to these lookup params.
+ */
+static const struct ice_dummy_pkt_profile *
+ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
+		      enum ice_sw_tunnel_type tun_type)
+{
+	const struct ice_dummy_pkt_profile *ret = ice_dummy_pkt_profiles;
+	u32 match = 0, vlan_count = 0;
+	u16 i;
+
+	switch (tun_type) {
+	case ICE_SW_TUN_GTPC:
+		match |= ICE_PKT_TUN_GTPC;
+		break;
+	case ICE_SW_TUN_GTPU:
+		match |= ICE_PKT_TUN_GTPU;
+		break;
+	case ICE_SW_TUN_NVGRE:
+		match |= ICE_PKT_TUN_NVGRE;
+		break;
+	case ICE_SW_TUN_GENEVE:
+	case ICE_SW_TUN_VXLAN:
+		match |= ICE_PKT_TUN_UDP;
+		break;
+	default:
+		break;
+	}
+
+	for (i = 0; i < lkups_cnt; i++) {
+		if (lkups[i].type == ICE_UDP_ILOS)
+			match |= ICE_PKT_INNER_UDP;
+		else if (lkups[i].type == ICE_TCP_IL)
+			match |= ICE_PKT_INNER_TCP;
+		else if (lkups[i].type == ICE_IPV6_OFOS)
+			match |= ICE_PKT_OUTER_IPV6;
+		else if (lkups[i].type == ICE_VLAN_OFOS ||
+			 lkups[i].type == ICE_VLAN_EX)
+			vlan_count++;
+		else if (lkups[i].type == ICE_VLAN_IN)
+			vlan_count++;
+		else if (lkups[i].type == ICE_ETYPE_OL &&
+			 lkups[i].h_u.ethertype.ethtype_id ==
+				cpu_to_be16(ICE_IPV6_ETHER_ID) &&
+			 lkups[i].m_u.ethertype.ethtype_id ==
+				cpu_to_be16(0xFFFF))
+			match |= ICE_PKT_OUTER_IPV6;
+		else if (lkups[i].type == ICE_ETYPE_IL &&
+			 lkups[i].h_u.ethertype.ethtype_id ==
+				cpu_to_be16(ICE_IPV6_ETHER_ID) &&
+			 lkups[i].m_u.ethertype.ethtype_id ==
+				cpu_to_be16(0xFFFF))
+			match |= ICE_PKT_INNER_IPV6;
+		else if (lkups[i].type == ICE_IPV6_IL)
+			match |= ICE_PKT_INNER_IPV6;
+		else if (lkups[i].type == ICE_GTP_NO_PAY)
+			match |= ICE_PKT_GTP_NOPAY;
+		else if (lkups[i].type == ICE_PPPOE) {
+			match |= ICE_PKT_PPPOE;
+			if (lkups[i].h_u.pppoe_hdr.ppp_prot_id ==
+			    htons(PPP_IPV6))
+				match |= ICE_PKT_OUTER_IPV6;
+		} else if (lkups[i].type == ICE_L2TPV3)
+			match |= ICE_PKT_L2TPV3;
+	}
+
+	while (ret->match && (match & ret->match) != ret->match)
+		ret++;
+
+	if (vlan_count != 0)
+		ret = ice_dummy_packet_add_vlan(ret, vlan_count);
+
+	return ret;
+}
+
+/**
+ * ice_fill_adv_dummy_packet - fill a dummy packet with given match criteria
+ *
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ *	   structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @s_rule: stores rule information from the match criteria
+ * @profile: dummy packet profile (the template, its size and header offsets)
+ */
+static int
+ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt,
+			  struct ice_sw_rule_lkup_rx_tx *s_rule,
+			  const struct ice_dummy_pkt_profile *profile)
+{
+	u8 *pkt;
+	u16 i;
+
+	/* Start with a packet with a pre-defined/dummy content. Then, fill
+	 * in the header values to be looked up or matched.
+	 */
+	pkt = s_rule->hdr_data;
+
+	memcpy(pkt, profile->pkt, profile->pkt_len);
+
+	for (i = 0; i < lkups_cnt; i++) {
+		const struct ice_dummy_pkt_offsets *offsets = profile->offsets;
+		enum ice_protocol_type type;
+		u16 offset = 0, len = 0, j;
+		bool found = false;
+
+		/* find the start of this layer; it should be found since this
+		 * was already checked when search for the dummy packet
+		 */
+		type = lkups[i].type;
+		/* metadata isn't present in the packet */
+		if (type == ICE_HW_METADATA)
+			continue;
+
+		for (j = 0; offsets[j].type != ICE_PROTOCOL_LAST; j++) {
+			if (type == offsets[j].type) {
+				offset = offsets[j].offset;
+				found = true;
+				break;
+			}
+		}
+		/* this should never happen in a correct calling sequence */
+		if (!found)
+			return -EINVAL;
+
+		switch (lkups[i].type) {
+		case ICE_MAC_OFOS:
+		case ICE_MAC_IL:
+			len = sizeof(struct ice_ether_hdr);
+			break;
+		case ICE_ETYPE_OL:
+		case ICE_ETYPE_IL:
+			len = sizeof(struct ice_ethtype_hdr);
+			break;
+		case ICE_VLAN_OFOS:
+		case ICE_VLAN_EX:
+		case ICE_VLAN_IN:
+			len = sizeof(struct ice_vlan_hdr);
+			break;
+		case ICE_IPV4_OFOS:
+		case ICE_IPV4_IL:
+			len = sizeof(struct ice_ipv4_hdr);
+			break;
+		case ICE_IPV6_OFOS:
+		case ICE_IPV6_IL:
+			len = sizeof(struct ice_ipv6_hdr);
+			break;
+		case ICE_TCP_IL:
+		case ICE_UDP_OF:
+		case ICE_UDP_ILOS:
+			len = sizeof(struct ice_l4_hdr);
+			break;
+		case ICE_SCTP_IL:
+			len = sizeof(struct ice_sctp_hdr);
+			break;
+		case ICE_NVGRE:
+			len = sizeof(struct ice_nvgre_hdr);
+			break;
+		case ICE_VXLAN:
+		case ICE_GENEVE:
+			len = sizeof(struct ice_udp_tnl_hdr);
+			break;
+		case ICE_GTP_NO_PAY:
+		case ICE_GTP:
+			len = sizeof(struct ice_udp_gtp_hdr);
+			break;
+		case ICE_PPPOE:
+			len = sizeof(struct ice_pppoe_hdr);
+			break;
+		case ICE_L2TPV3:
+			len = sizeof(struct ice_l2tpv3_sess_hdr);
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		/* the length should be a word multiple */
+		if (len % ICE_BYTES_PER_WORD)
+			return -EIO;
+
+		/* We have the offset to the header start, the length, the
+		 * caller's header values and mask. Use this information to
+		 * copy the data into the dummy packet appropriately based on
+		 * the mask. Note that we need to only write the bits as
+		 * indicated by the mask to make sure we don't improperly write
+		 * over any significant packet data.
+		 */
+		for (j = 0; j < len / sizeof(u16); j++) {
+			u16 *ptr = (u16 *)(pkt + offset);
+			u16 mask = lkups[i].m_raw[j];
+
+			if (!mask)
+				continue;
+
+			ptr[j] = (ptr[j] & ~mask) | (lkups[i].h_raw[j] & mask);
+		}
+	}
+
+	s_rule->hdr_len = cpu_to_le16(profile->pkt_len);
+
+	return 0;
+}
+
+/**
+ * ice_fill_adv_packet_tun - fill dummy packet with udp tunnel port
+ * @hw: pointer to the hardware structure
+ * @tun_type: tunnel type
+ * @pkt: dummy packet to fill in
+ * @offsets: offset info for the dummy packet
+ */
+static int
+ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type,
+			u8 *pkt, const struct ice_dummy_pkt_offsets *offsets)
+{
+	u16 open_port, i;
+
+	switch (tun_type) {
+	case ICE_SW_TUN_VXLAN:
+		if (!ice_get_open_tunnel_port(hw, &open_port, TNL_VXLAN))
+			return -EIO;
+		break;
+	case ICE_SW_TUN_GENEVE:
+		if (!ice_get_open_tunnel_port(hw, &open_port, TNL_GENEVE))
+			return -EIO;
+		break;
+	default:
+		/* Nothing needs to be done for this tunnel type */
+		return 0;
+	}
+
+	/* Find the outer UDP protocol header and insert the port number */
+	for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) {
+		if (offsets[i].type == ICE_UDP_OF) {
+			struct ice_l4_hdr *hdr;
+			u16 offset;
+
+			offset = offsets[i].offset;
+			hdr = (struct ice_l4_hdr *)&pkt[offset];
+			hdr->dst_port = cpu_to_be16(open_port);
+
+			return 0;
+		}
+	}
+
+	return -EIO;
+}
+
+/**
+ * ice_fill_adv_packet_vlan - fill dummy packet with VLAN tag type
+ * @hw: pointer to hw structure
+ * @vlan_type: VLAN tag type
+ * @pkt: dummy packet to fill in
+ * @offsets: offset info for the dummy packet
+ */
+static int
+ice_fill_adv_packet_vlan(struct ice_hw *hw, u16 vlan_type, u8 *pkt,
+			 const struct ice_dummy_pkt_offsets *offsets)
+{
+	u16 i;
+
+	/* Check if there is something to do */
+	if (!vlan_type || !ice_is_dvm_ena(hw))
+		return 0;
+
+	/* Find VLAN header and insert VLAN TPID */
+	for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) {
+		if (offsets[i].type == ICE_VLAN_OFOS ||
+		    offsets[i].type == ICE_VLAN_EX) {
+			struct ice_vlan_hdr *hdr;
+			u16 offset;
+
+			offset = offsets[i].offset;
+			hdr = (struct ice_vlan_hdr *)&pkt[offset];
+			hdr->type = cpu_to_be16(vlan_type);
+
+			return 0;
+		}
+	}
+
+	return -EIO;
+}
+
+static bool ice_rules_equal(const struct ice_adv_rule_info *first,
+			    const struct ice_adv_rule_info *second)
+{
+	return first->sw_act.flag == second->sw_act.flag &&
+	       first->tun_type == second->tun_type &&
+	       first->vlan_type == second->vlan_type &&
+	       first->src_vsi == second->src_vsi &&
+	       first->need_pass_l2 == second->need_pass_l2 &&
+	       first->allow_pass_l2 == second->allow_pass_l2;
+}
+
+/**
+ * ice_find_adv_rule_entry - Search a rule entry
+ * @hw: pointer to the hardware structure
+ * @lkups: lookup elements or match criteria for the advanced recipe, one
+ *	   structure per protocol header
+ * @lkups_cnt: number of protocols
+ * @recp_id: recipe ID for which we are finding the rule
+ * @rinfo: other information regarding the rule e.g. priority and action info
+ *
+ * Helper function to search for a given advance rule entry
+ * Returns pointer to entry storing the rule if found
+ */
+static struct ice_adv_fltr_mgmt_list_entry *
+ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+			u16 lkups_cnt, u16 recp_id,
+			struct ice_adv_rule_info *rinfo)
+{
+	struct ice_adv_fltr_mgmt_list_entry *list_itr;
+	struct ice_switch_info *sw = hw->switch_info;
+	int i;
+
+	list_for_each_entry(list_itr, &sw->recp_list[recp_id].filt_rules,
+			    list_entry) {
+		bool lkups_matched = true;
+
+		if (lkups_cnt != list_itr->lkups_cnt)
+			continue;
+		for (i = 0; i < list_itr->lkups_cnt; i++)
+			if (memcmp(&list_itr->lkups[i], &lkups[i],
+				   sizeof(*lkups))) {
+				lkups_matched = false;
+				break;
+			}
+		if (ice_rules_equal(rinfo, &list_itr->rule_info) &&
+		    lkups_matched)
+			return list_itr;
+	}
+	return NULL;
+}
+
+/**
+ * ice_adv_add_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @m_entry: pointer to current adv filter management list entry
+ * @cur_fltr: filter information from the book keeping entry
+ * @new_fltr: filter information with the new VSI to be added
+ *
+ * Call AQ command to add or update previously created VSI list with new VSI.
+ *
+ * Helper function to do book keeping associated with adding filter information
+ * The algorithm to do the booking keeping is described below :
+ * When a VSI needs to subscribe to a given advanced filter
+ *	if only one VSI has been added till now
+ *		Allocate a new VSI list and add two VSIs
+ *		to this list using switch rule command
+ *		Update the previously created switch rule with the
+ *		newly created VSI list ID
+ *	if a VSI list was previously created
+ *		Add the new VSI to the previously created VSI list set
+ *		using the update switch rule command
+ */
+static int
+ice_adv_add_update_vsi_list(struct ice_hw *hw,
+			    struct ice_adv_fltr_mgmt_list_entry *m_entry,
+			    struct ice_adv_rule_info *cur_fltr,
+			    struct ice_adv_rule_info *new_fltr)
+{
+	u16 vsi_list_id = 0;
+	int status;
+
+	if (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_Q ||
+	    cur_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP ||
+	    cur_fltr->sw_act.fltr_act == ICE_DROP_PACKET)
+		return -EOPNOTSUPP;
+
+	if ((new_fltr->sw_act.fltr_act == ICE_FWD_TO_Q ||
+	     new_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP) &&
+	    (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI ||
+	     cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI_LIST))
+		return -EOPNOTSUPP;
+
+	if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) {
+		 /* Only one entry existed in the mapping and it was not already
+		  * a part of a VSI list. So, create a VSI list with the old and
+		  * new VSIs.
+		  */
+		struct ice_fltr_info tmp_fltr;
+		u16 vsi_handle_arr[2];
+
+		/* A rule already exists with the new VSI being added */
+		if (cur_fltr->sw_act.fwd_id.hw_vsi_id ==
+		    new_fltr->sw_act.fwd_id.hw_vsi_id)
+			return -EEXIST;
+
+		vsi_handle_arr[0] = cur_fltr->sw_act.vsi_handle;
+		vsi_handle_arr[1] = new_fltr->sw_act.vsi_handle;
+		status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2,
+						  &vsi_list_id,
+						  ICE_SW_LKUP_LAST);
+		if (status)
+			return status;
+
+		memset(&tmp_fltr, 0, sizeof(tmp_fltr));
+		tmp_fltr.flag = m_entry->rule_info.sw_act.flag;
+		tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST;
+		tmp_fltr.fwd_id.vsi_list_id = vsi_list_id;
+		tmp_fltr.lkup_type = ICE_SW_LKUP_LAST;
+
+		/* Update the previous switch rule of "forward to VSI" to
+		 * "fwd to VSI list"
+		 */
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+		if (status)
+			return status;
+
+		cur_fltr->sw_act.fwd_id.vsi_list_id = vsi_list_id;
+		cur_fltr->sw_act.fltr_act = ICE_FWD_TO_VSI_LIST;
+		m_entry->vsi_list_info =
+			ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
+						vsi_list_id);
+	} else {
+		u16 vsi_handle = new_fltr->sw_act.vsi_handle;
+
+		if (!m_entry->vsi_list_info)
+			return -EIO;
+
+		/* A rule already exists with the new VSI being added */
+		if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
+			return 0;
+
+		/* Update the previously created VSI list set with
+		 * the new VSI ID passed in
+		 */
+		vsi_list_id = cur_fltr->sw_act.fwd_id.vsi_list_id;
+
+		status = ice_update_vsi_list_rule(hw, &vsi_handle, 1,
+						  vsi_list_id, false,
+						  ice_aqc_opc_update_sw_rules,
+						  ICE_SW_LKUP_LAST);
+		/* update VSI list mapping info with new VSI ID */
+		if (!status)
+			set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map);
+	}
+	if (!status)
+		m_entry->vsi_count++;
+	return status;
+}
+
+void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup)
+{
+	lkup->type = ICE_HW_METADATA;
+	lkup->m_u.metadata.flags[ICE_PKT_FLAGS_MDID21] |=
+		cpu_to_be16(ICE_PKT_TUNNEL_MASK);
+}
+
+void ice_rule_add_direction_metadata(struct ice_adv_lkup_elem *lkup)
+{
+	lkup->type = ICE_HW_METADATA;
+	lkup->m_u.metadata.flags[ICE_PKT_FLAGS_MDID20] |=
+		cpu_to_be16(ICE_PKT_FROM_NETWORK);
+}
+
+void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup)
+{
+	lkup->type = ICE_HW_METADATA;
+	lkup->m_u.metadata.flags[ICE_PKT_FLAGS_MDID20] |=
+		cpu_to_be16(ICE_PKT_VLAN_MASK);
+}
+
+void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup)
+{
+	lkup->type = ICE_HW_METADATA;
+	lkup->m_u.metadata.source_vsi = cpu_to_be16(ICE_MDID_SOURCE_VSI_MASK);
+}
+
+/**
+ * ice_add_adv_rule - helper function to create an advanced switch rule
+ * @hw: pointer to the hardware structure
+ * @lkups: information on the words that needs to be looked up. All words
+ * together makes one recipe
+ * @lkups_cnt: num of entries in the lkups array
+ * @rinfo: other information related to the rule that needs to be programmed
+ * @added_entry: this will return recipe_id, rule_id and vsi_handle. should be
+ *               ignored is case of error.
+ *
+ * This function can program only 1 rule at a time. The lkups is used to
+ * describe the all the words that forms the "lookup" portion of the recipe.
+ * These words can span multiple protocols. Callers to this function need to
+ * pass in a list of protocol headers with lookup information along and mask
+ * that determines which words are valid from the given protocol header.
+ * rinfo describes other information related to this rule such as forwarding
+ * IDs, priority of this rule, etc.
+ */
+int
+ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+		 u16 lkups_cnt, struct ice_adv_rule_info *rinfo,
+		 struct ice_rule_query_data *added_entry)
+{
+	struct ice_adv_fltr_mgmt_list_entry *m_entry, *adv_fltr = NULL;
+	struct ice_sw_rule_lkup_rx_tx *s_rule = NULL;
+	const struct ice_dummy_pkt_profile *profile;
+	u16 rid = 0, i, rule_buf_sz, vsi_handle;
+	struct list_head *rule_head;
+	struct ice_switch_info *sw;
+	u16 word_cnt;
+	u32 act = 0;
+	int status;
+	u8 q_rgn;
+
+	/* Initialize profile to result index bitmap */
+	if (!hw->switch_info->prof_res_bm_init) {
+		hw->switch_info->prof_res_bm_init = 1;
+		ice_init_prof_result_bm(hw);
+	}
+
+	if (!lkups_cnt)
+		return -EINVAL;
+
+	/* get # of words we need to match */
+	word_cnt = 0;
+	for (i = 0; i < lkups_cnt; i++) {
+		u16 j;
+
+		for (j = 0; j < ARRAY_SIZE(lkups->m_raw); j++)
+			if (lkups[i].m_raw[j])
+				word_cnt++;
+	}
+
+	if (!word_cnt)
+		return -EINVAL;
+
+	if (word_cnt > ICE_MAX_CHAIN_WORDS)
+		return -ENOSPC;
+
+	/* locate a dummy packet */
+	profile = ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type);
+	if (IS_ERR(profile))
+		return PTR_ERR(profile);
+
+	if (!(rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI ||
+	      rinfo->sw_act.fltr_act == ICE_FWD_TO_Q ||
+	      rinfo->sw_act.fltr_act == ICE_FWD_TO_QGRP ||
+	      rinfo->sw_act.fltr_act == ICE_DROP_PACKET ||
+	      rinfo->sw_act.fltr_act == ICE_NOP)) {
+		status = -EIO;
+		goto free_pkt_profile;
+	}
+
+	vsi_handle = rinfo->sw_act.vsi_handle;
+	if (!ice_is_vsi_valid(hw, vsi_handle)) {
+		status =  -EINVAL;
+		goto free_pkt_profile;
+	}
+
+	if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI ||
+	    rinfo->sw_act.fltr_act == ICE_NOP)
+		rinfo->sw_act.fwd_id.hw_vsi_id =
+			ice_get_hw_vsi_num(hw, vsi_handle);
+
+	if (rinfo->src_vsi)
+		rinfo->sw_act.src = ice_get_hw_vsi_num(hw, rinfo->src_vsi);
+	else
+		rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid);
+	if (status)
+		goto free_pkt_profile;
+	m_entry = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo);
+	if (m_entry) {
+		/* we have to add VSI to VSI_LIST and increment vsi_count.
+		 * Also Update VSI list so that we can change forwarding rule
+		 * if the rule already exists, we will check if it exists with
+		 * same vsi_id, if not then add it to the VSI list if it already
+		 * exists if not then create a VSI list and add the existing VSI
+		 * ID and the new VSI ID to the list
+		 * We will add that VSI to the list
+		 */
+		status = ice_adv_add_update_vsi_list(hw, m_entry,
+						     &m_entry->rule_info,
+						     rinfo);
+		if (added_entry) {
+			added_entry->rid = rid;
+			added_entry->rule_id = m_entry->rule_info.fltr_rule_id;
+			added_entry->vsi_handle = rinfo->sw_act.vsi_handle;
+		}
+		goto free_pkt_profile;
+	}
+	rule_buf_sz = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, profile->pkt_len);
+	s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
+	if (!s_rule) {
+		status = -ENOMEM;
+		goto free_pkt_profile;
+	}
+	if (!rinfo->flags_info.act_valid) {
+		act |= ICE_SINGLE_ACT_LAN_ENABLE;
+		act |= ICE_SINGLE_ACT_LB_ENABLE;
+	} else {
+		act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE |
+						ICE_SINGLE_ACT_LB_ENABLE);
+	}
+
+	switch (rinfo->sw_act.fltr_act) {
+	case ICE_FWD_TO_VSI:
+		act |= (rinfo->sw_act.fwd_id.hw_vsi_id <<
+			ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M;
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_FWD_TO_Q:
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+		       ICE_SINGLE_ACT_Q_INDEX_M;
+		break;
+	case ICE_FWD_TO_QGRP:
+		q_rgn = rinfo->sw_act.qgrp_size > 0 ?
+			(u8)ilog2(rinfo->sw_act.qgrp_size) : 0;
+		act |= ICE_SINGLE_ACT_TO_Q;
+		act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) &
+		       ICE_SINGLE_ACT_Q_INDEX_M;
+		act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) &
+		       ICE_SINGLE_ACT_Q_REGION_M;
+		break;
+	case ICE_DROP_PACKET:
+		act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP |
+		       ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	case ICE_NOP:
+		act |= FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M,
+				  rinfo->sw_act.fwd_id.hw_vsi_id);
+		act &= ~ICE_SINGLE_ACT_VALID_BIT;
+		break;
+	default:
+		status = -EIO;
+		goto err_ice_add_adv_rule;
+	}
+
+	/* If there is no matching criteria for direction there
+	 * is only one difference between Rx and Tx:
+	 * - get switch id base on VSI number from source field (Tx)
+	 * - get switch id base on port number (Rx)
+	 *
+	 * If matching on direction metadata is chose rule direction is
+	 * extracted from type value set here.
+	 */
+	if (rinfo->sw_act.flag & ICE_FLTR_TX) {
+		s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX);
+		s_rule->src = cpu_to_le16(rinfo->sw_act.src);
+	} else {
+		s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX);
+		s_rule->src = cpu_to_le16(hw->port_info->lport);
+	}
+
+	s_rule->recipe_id = cpu_to_le16(rid);
+	s_rule->act = cpu_to_le32(act);
+
+	status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, profile);
+	if (status)
+		goto err_ice_add_adv_rule;
+
+	status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, s_rule->hdr_data,
+					 profile->offsets);
+	if (status)
+		goto err_ice_add_adv_rule;
+
+	status = ice_fill_adv_packet_vlan(hw, rinfo->vlan_type,
+					  s_rule->hdr_data,
+					  profile->offsets);
+	if (status)
+		goto err_ice_add_adv_rule;
+
+	status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule,
+				 rule_buf_sz, 1, ice_aqc_opc_add_sw_rules,
+				 NULL);
+	if (status)
+		goto err_ice_add_adv_rule;
+	adv_fltr = devm_kzalloc(ice_hw_to_dev(hw),
+				sizeof(struct ice_adv_fltr_mgmt_list_entry),
+				GFP_KERNEL);
+	if (!adv_fltr) {
+		status = -ENOMEM;
+		goto err_ice_add_adv_rule;
+	}
+
+	adv_fltr->lkups = devm_kmemdup(ice_hw_to_dev(hw), lkups,
+				       lkups_cnt * sizeof(*lkups), GFP_KERNEL);
+	if (!adv_fltr->lkups) {
+		status = -ENOMEM;
+		goto err_ice_add_adv_rule;
+	}
+
+	adv_fltr->lkups_cnt = lkups_cnt;
+	adv_fltr->rule_info = *rinfo;
+	adv_fltr->rule_info.fltr_rule_id = le16_to_cpu(s_rule->index);
+	sw = hw->switch_info;
+	sw->recp_list[rid].adv_rule = true;
+	rule_head = &sw->recp_list[rid].filt_rules;
+
+	if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI)
+		adv_fltr->vsi_count = 1;
+
+	/* Add rule entry to book keeping list */
+	list_add(&adv_fltr->list_entry, rule_head);
+	if (added_entry) {
+		added_entry->rid = rid;
+		added_entry->rule_id = adv_fltr->rule_info.fltr_rule_id;
+		added_entry->vsi_handle = rinfo->sw_act.vsi_handle;
+	}
+err_ice_add_adv_rule:
+	if (status && adv_fltr) {
+		devm_kfree(ice_hw_to_dev(hw), adv_fltr->lkups);
+		devm_kfree(ice_hw_to_dev(hw), adv_fltr);
+	}
+
+	kfree(s_rule);
+
+free_pkt_profile:
+	if (profile->match & ICE_PKT_KMALLOC) {
+		kfree(profile->offsets);
+		kfree(profile->pkt);
+		kfree(profile);
+	}
+
+	return status;
+}
+
+/**
+ * ice_replay_vsi_fltr - Replay filters for requested VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ * @recp_id: Recipe ID for which rules need to be replayed
+ * @list_head: list for which filters need to be replayed
+ *
+ * Replays the filter of recipe recp_id for a VSI represented via vsi_handle.
+ * It is required to pass valid VSI handle.
+ */
+static int
+ice_replay_vsi_fltr(struct ice_hw *hw, u16 vsi_handle, u8 recp_id,
+		    struct list_head *list_head)
+{
+	struct ice_fltr_mgmt_list_entry *itr;
+	int status = 0;
+	u16 hw_vsi_id;
+
+	if (list_empty(list_head))
+		return status;
+	hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
+
+	list_for_each_entry(itr, list_head, list_entry) {
+		struct ice_fltr_list_entry f_entry;
+
+		f_entry.fltr_info = itr->fltr_info;
+		if (itr->vsi_count < 2 && recp_id != ICE_SW_LKUP_VLAN &&
+		    itr->fltr_info.vsi_handle == vsi_handle) {
+			/* update the src in case it is VSI num */
+			if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+				f_entry.fltr_info.src = hw_vsi_id;
+			status = ice_add_rule_internal(hw, recp_id, &f_entry);
+			if (status)
+				goto end;
+			continue;
+		}
+		if (!itr->vsi_list_info ||
+		    !test_bit(vsi_handle, itr->vsi_list_info->vsi_map))
+			continue;
+		/* Clearing it so that the logic can add it back */
+		clear_bit(vsi_handle, itr->vsi_list_info->vsi_map);
+		f_entry.fltr_info.vsi_handle = vsi_handle;
+		f_entry.fltr_info.fltr_act = ICE_FWD_TO_VSI;
+		/* update the src in case it is VSI num */
+		if (f_entry.fltr_info.src_id == ICE_SRC_ID_VSI)
+			f_entry.fltr_info.src = hw_vsi_id;
+		if (recp_id == ICE_SW_LKUP_VLAN)
+			status = ice_add_vlan_internal(hw, &f_entry);
+		else
+			status = ice_add_rule_internal(hw, recp_id, &f_entry);
+		if (status)
+			goto end;
+	}
+end:
+	return status;
+}
+
+/**
+ * ice_adv_rem_update_vsi_list
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: VSI handle of the VSI to remove
+ * @fm_list: filter management entry for which the VSI list management needs to
+ *	     be done
+ */
+static int
+ice_adv_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle,
+			    struct ice_adv_fltr_mgmt_list_entry *fm_list)
+{
+	struct ice_vsi_list_map_info *vsi_list_info;
+	enum ice_sw_lkup_type lkup_type;
+	u16 vsi_list_id;
+	int status;
+
+	if (fm_list->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST ||
+	    fm_list->vsi_count == 0)
+		return -EINVAL;
+
+	/* A rule with the VSI being removed does not exist */
+	if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map))
+		return -ENOENT;
+
+	lkup_type = ICE_SW_LKUP_LAST;
+	vsi_list_id = fm_list->rule_info.sw_act.fwd_id.vsi_list_id;
+	status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true,
+					  ice_aqc_opc_update_sw_rules,
+					  lkup_type);
+	if (status)
+		return status;
+
+	fm_list->vsi_count--;
+	clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map);
+	vsi_list_info = fm_list->vsi_list_info;
+	if (fm_list->vsi_count == 1) {
+		struct ice_fltr_info tmp_fltr;
+		u16 rem_vsi_handle;
+
+		rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map,
+						ICE_MAX_VSI);
+		if (!ice_is_vsi_valid(hw, rem_vsi_handle))
+			return -EIO;
+
+		/* Make sure VSI list is empty before removing it below */
+		status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1,
+						  vsi_list_id, true,
+						  ice_aqc_opc_update_sw_rules,
+						  lkup_type);
+		if (status)
+			return status;
+
+		memset(&tmp_fltr, 0, sizeof(tmp_fltr));
+		tmp_fltr.flag = fm_list->rule_info.sw_act.flag;
+		tmp_fltr.fltr_rule_id = fm_list->rule_info.fltr_rule_id;
+		fm_list->rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI;
+		tmp_fltr.fltr_act = ICE_FWD_TO_VSI;
+		tmp_fltr.fwd_id.hw_vsi_id =
+			ice_get_hw_vsi_num(hw, rem_vsi_handle);
+		fm_list->rule_info.sw_act.fwd_id.hw_vsi_id =
+			ice_get_hw_vsi_num(hw, rem_vsi_handle);
+		fm_list->rule_info.sw_act.vsi_handle = rem_vsi_handle;
+
+		/* Update the previous switch rule of "MAC forward to VSI" to
+		 * "MAC fwd to VSI list"
+		 */
+		status = ice_update_pkt_fwd_rule(hw, &tmp_fltr);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW, "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n",
+				  tmp_fltr.fwd_id.hw_vsi_id, status);
+			return status;
+		}
+		fm_list->vsi_list_info->ref_cnt--;
+
+		/* Remove the VSI list since it is no longer used */
+		status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type);
+		if (status) {
+			ice_debug(hw, ICE_DBG_SW, "Failed to remove VSI list %d, error %d\n",
+				  vsi_list_id, status);
+			return status;
+		}
+
+		list_del(&vsi_list_info->list_entry);
+		devm_kfree(ice_hw_to_dev(hw), vsi_list_info);
+		fm_list->vsi_list_info = NULL;
+	}
+
+	return status;
+}
+
+/**
+ * ice_rem_adv_rule - removes existing advanced switch rule
+ * @hw: pointer to the hardware structure
+ * @lkups: information on the words that needs to be looked up. All words
+ *         together makes one recipe
+ * @lkups_cnt: num of entries in the lkups array
+ * @rinfo: Its the pointer to the rule information for the rule
+ *
+ * This function can be used to remove 1 rule at a time. The lkups is
+ * used to describe all the words that forms the "lookup" portion of the
+ * rule. These words can span multiple protocols. Callers to this function
+ * need to pass in a list of protocol headers with lookup information along
+ * and mask that determines which words are valid from the given protocol
+ * header. rinfo describes other information related to this rule such as
+ * forwarding IDs, priority of this rule, etc.
+ */
+static int
+ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+		 u16 lkups_cnt, struct ice_adv_rule_info *rinfo)
+{
+	struct ice_adv_fltr_mgmt_list_entry *list_elem;
+	struct ice_prot_lkup_ext lkup_exts;
+	bool remove_rule = false;
+	struct mutex *rule_lock; /* Lock to protect filter rule list */
+	u16 i, rid, vsi_handle;
+	int status = 0;
+
+	memset(&lkup_exts, 0, sizeof(lkup_exts));
+	for (i = 0; i < lkups_cnt; i++) {
+		u16 count;
+
+		if (lkups[i].type >= ICE_PROTOCOL_LAST)
+			return -EIO;
+
+		count = ice_fill_valid_words(&lkups[i], &lkup_exts);
+		if (!count)
+			return -EIO;
+	}
+
+	rid = ice_find_recp(hw, &lkup_exts, rinfo);
+	/* If did not find a recipe that match the existing criteria */
+	if (rid == ICE_MAX_NUM_RECIPES)
+		return -EINVAL;
+
+	rule_lock = &hw->switch_info->recp_list[rid].filt_rule_lock;
+	list_elem = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo);
+	/* the rule is already removed */
+	if (!list_elem)
+		return 0;
+	mutex_lock(rule_lock);
+	if (list_elem->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST) {
+		remove_rule = true;
+	} else if (list_elem->vsi_count > 1) {
+		remove_rule = false;
+		vsi_handle = rinfo->sw_act.vsi_handle;
+		status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem);
+	} else {
+		vsi_handle = rinfo->sw_act.vsi_handle;
+		status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem);
+		if (status) {
+			mutex_unlock(rule_lock);
+			return status;
+		}
+		if (list_elem->vsi_count == 0)
+			remove_rule = true;
+	}
+	mutex_unlock(rule_lock);
+	if (remove_rule) {
+		struct ice_sw_rule_lkup_rx_tx *s_rule;
+		u16 rule_buf_sz;
+
+		rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s_rule);
+		s_rule = kzalloc(rule_buf_sz, GFP_KERNEL);
+		if (!s_rule)
+			return -ENOMEM;
+		s_rule->act = 0;
+		s_rule->index = cpu_to_le16(list_elem->rule_info.fltr_rule_id);
+		s_rule->hdr_len = 0;
+		status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule,
+					 rule_buf_sz, 1,
+					 ice_aqc_opc_remove_sw_rules, NULL);
+		if (!status || status == -ENOENT) {
+			struct ice_switch_info *sw = hw->switch_info;
+
+			mutex_lock(rule_lock);
+			list_del(&list_elem->list_entry);
+			devm_kfree(ice_hw_to_dev(hw), list_elem->lkups);
+			devm_kfree(ice_hw_to_dev(hw), list_elem);
+			mutex_unlock(rule_lock);
+			if (list_empty(&sw->recp_list[rid].filt_rules))
+				sw->recp_list[rid].adv_rule = false;
+		}
+		kfree(s_rule);
+	}
+	return status;
+}
+
+/**
+ * ice_rem_adv_rule_by_id - removes existing advanced switch rule by ID
+ * @hw: pointer to the hardware structure
+ * @remove_entry: data struct which holds rule_id, VSI handle and recipe ID
+ *
+ * This function is used to remove 1 rule at a time. The removal is based on
+ * the remove_entry parameter. This function will remove rule for a given
+ * vsi_handle with a given rule_id which is passed as parameter in remove_entry
+ */
+int
+ice_rem_adv_rule_by_id(struct ice_hw *hw,
+		       struct ice_rule_query_data *remove_entry)
+{
+	struct ice_adv_fltr_mgmt_list_entry *list_itr;
+	struct list_head *list_head;
+	struct ice_adv_rule_info rinfo;
+	struct ice_switch_info *sw;
+
+	sw = hw->switch_info;
+	if (!sw->recp_list[remove_entry->rid].recp_created)
+		return -EINVAL;
+	list_head = &sw->recp_list[remove_entry->rid].filt_rules;
+	list_for_each_entry(list_itr, list_head, list_entry) {
+		if (list_itr->rule_info.fltr_rule_id ==
+		    remove_entry->rule_id) {
+			rinfo = list_itr->rule_info;
+			rinfo.sw_act.vsi_handle = remove_entry->vsi_handle;
+			return ice_rem_adv_rule(hw, list_itr->lkups,
+						list_itr->lkups_cnt, &rinfo);
+		}
+	}
+	/* either list is empty or unable to find rule */
+	return -ENOENT;
+}
+
+/**
+ * ice_replay_vsi_adv_rule - Replay advanced rule for requested VSI
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ * @list_head: list for which filters need to be replayed
+ *
+ * Replay the advanced rule for the given VSI.
+ */
+static int
+ice_replay_vsi_adv_rule(struct ice_hw *hw, u16 vsi_handle,
+			struct list_head *list_head)
+{
+	struct ice_rule_query_data added_entry = { 0 };
+	struct ice_adv_fltr_mgmt_list_entry *adv_fltr;
+	int status = 0;
+
+	if (list_empty(list_head))
+		return status;
+	list_for_each_entry(adv_fltr, list_head, list_entry) {
+		struct ice_adv_rule_info *rinfo = &adv_fltr->rule_info;
+		u16 lk_cnt = adv_fltr->lkups_cnt;
+
+		if (vsi_handle != rinfo->sw_act.vsi_handle)
+			continue;
+		status = ice_add_adv_rule(hw, adv_fltr->lkups, lk_cnt, rinfo,
+					  &added_entry);
+		if (status)
+			break;
+	}
+	return status;
+}
+
+/**
+ * ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists
+ * @hw: pointer to the hardware structure
+ * @vsi_handle: driver VSI handle
+ *
+ * Replays filters for requested VSI via vsi_handle.
+ */
+int ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	int status;
+	u8 i;
+
+	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+		struct list_head *head;
+
+		head = &sw->recp_list[i].filt_replay_rules;
+		if (!sw->recp_list[i].adv_rule)
+			status = ice_replay_vsi_fltr(hw, vsi_handle, i, head);
+		else
+			status = ice_replay_vsi_adv_rule(hw, vsi_handle, head);
+		if (status)
+			return status;
+	}
+	return status;
+}
+
+/**
+ * ice_rm_all_sw_replay_rule_info - deletes filter replay rules
+ * @hw: pointer to the HW struct
+ *
+ * Deletes the filter replay rules.
+ */
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw)
+{
+	struct ice_switch_info *sw = hw->switch_info;
+	u8 i;
+
+	if (!sw)
+		return;
+
+	for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) {
+		if (!list_empty(&sw->recp_list[i].filt_replay_rules)) {
+			struct list_head *l_head;
+
+			l_head = &sw->recp_list[i].filt_replay_rules;
+			if (!sw->recp_list[i].adv_rule)
+				ice_rem_sw_rule_info(hw, l_head);
+			else
+				ice_rem_adv_rule_info(hw, l_head);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
new file mode 100644
index 0000000000..db7e501b7e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -0,0 +1,433 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_SWITCH_H_
+#define _ICE_SWITCH_H_
+
+#include "ice_common.h"
+
+#define ICE_SW_CFG_MAX_BUF_LEN 2048
+#define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_FLTR_RX BIT(0)
+#define ICE_FLTR_TX BIT(1)
+#define ICE_VSI_INVAL_ID 0xffff
+#define ICE_INVAL_Q_HANDLE 0xFFFF
+
+/* Switch Profile IDs for Profile related switch rules */
+#define ICE_PROFID_IPV4_GTPC_TEID			41
+#define ICE_PROFID_IPV4_GTPC_NO_TEID			42
+#define ICE_PROFID_IPV4_GTPU_TEID			43
+#define ICE_PROFID_IPV6_GTPC_TEID			44
+#define ICE_PROFID_IPV6_GTPC_NO_TEID			45
+#define ICE_PROFID_IPV6_GTPU_TEID			46
+#define ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER		70
+
+#define ICE_SW_RULE_VSI_LIST_SIZE(s, n)		struct_size((s), vsi, (n))
+#define ICE_SW_RULE_RX_TX_HDR_SIZE(s, l)	struct_size((s), hdr_data, (l))
+#define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s)	\
+	ICE_SW_RULE_RX_TX_HDR_SIZE((s), DUMMY_ETH_HDR_LEN)
+#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE(s)	\
+	ICE_SW_RULE_RX_TX_HDR_SIZE((s), 0)
+#define ICE_SW_RULE_LG_ACT_SIZE(s, n)		struct_size((s), act, (n))
+
+#define DUMMY_ETH_HDR_LEN		16
+
+/* VSI context structure for add/get/update/free operations */
+struct ice_vsi_ctx {
+	u16 vsi_num;
+	u16 vsis_allocd;
+	u16 vsis_unallocated;
+	u16 flags;
+	struct ice_aqc_vsi_props info;
+	struct ice_sched_vsi_info sched;
+	u8 alloc_from_pool;
+	u8 vf_num;
+	u16 num_lan_q_entries[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_q_ctx *lan_q_ctx[ICE_MAX_TRAFFIC_CLASS];
+	u16 num_rdma_q_entries[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_q_ctx *rdma_q_ctx[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* Switch recipe ID enum values are specific to hardware */
+enum ice_sw_lkup_type {
+	ICE_SW_LKUP_ETHERTYPE = 0,
+	ICE_SW_LKUP_MAC = 1,
+	ICE_SW_LKUP_MAC_VLAN = 2,
+	ICE_SW_LKUP_PROMISC = 3,
+	ICE_SW_LKUP_VLAN = 4,
+	ICE_SW_LKUP_DFLT = 5,
+	ICE_SW_LKUP_ETHERTYPE_MAC = 8,
+	ICE_SW_LKUP_PROMISC_VLAN = 9,
+	ICE_SW_LKUP_LAST
+};
+
+/* type of filter src ID */
+enum ice_src_id {
+	ICE_SRC_ID_UNKNOWN = 0,
+	ICE_SRC_ID_VSI,
+	ICE_SRC_ID_QUEUE,
+	ICE_SRC_ID_LPORT,
+};
+
+struct ice_fltr_info {
+	/* Look up information: how to look up packet */
+	enum ice_sw_lkup_type lkup_type;
+	/* Forward action: filter action to do after lookup */
+	enum ice_sw_fwd_act_type fltr_act;
+	/* rule ID returned by firmware once filter rule is created */
+	u16 fltr_rule_id;
+	u16 flag;
+
+	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
+	u16 src;
+	enum ice_src_id src_id;
+
+	union {
+		struct {
+			u8 mac_addr[ETH_ALEN];
+		} mac;
+		struct {
+			u8 mac_addr[ETH_ALEN];
+			u16 vlan_id;
+		} mac_vlan;
+		struct {
+			u16 vlan_id;
+			u16 tpid;
+			u8 tpid_valid;
+		} vlan;
+		/* Set lkup_type as ICE_SW_LKUP_ETHERTYPE
+		 * if just using ethertype as filter. Set lkup_type as
+		 * ICE_SW_LKUP_ETHERTYPE_MAC if MAC also needs to be
+		 * passed in as filter.
+		 */
+		struct {
+			u16 ethertype;
+			u8 mac_addr[ETH_ALEN]; /* optional */
+		} ethertype_mac;
+	} l_data; /* Make sure to zero out the memory of l_data before using
+		   * it or only set the data associated with lookup match
+		   * rest everything should be zero
+		   */
+
+	/* Depending on filter action */
+	union {
+		/* queue ID in case of ICE_FWD_TO_Q and starting
+		 * queue ID in case of ICE_FWD_TO_QGRP.
+		 */
+		u16 q_id:11;
+		u16 hw_vsi_id:10;
+		u16 vsi_list_id:10;
+	} fwd_id;
+
+	/* Sw VSI handle */
+	u16 vsi_handle;
+
+	/* Set to num_queues if action is ICE_FWD_TO_QGRP. This field
+	 * determines the range of queues the packet needs to be forwarded to.
+	 * Note that qgrp_size must be set to a power of 2.
+	 */
+	u8 qgrp_size;
+
+	/* Rule creations populate these indicators basing on the switch type */
+	u8 lb_en;	/* Indicate if packet can be looped back */
+	u8 lan_en;	/* Indicate if packet can be forwarded to the uplink */
+};
+
+struct ice_update_recipe_lkup_idx_params {
+	u16 rid;
+	u16 fv_idx;
+	bool ignore_valid;
+	u16 mask;
+	bool mask_valid;
+	u8 lkup_idx;
+};
+
+struct ice_adv_lkup_elem {
+	enum ice_protocol_type type;
+	union {
+		union ice_prot_hdr h_u;	/* Header values */
+		/* Used to iterate over the headers */
+		u16 h_raw[sizeof(union ice_prot_hdr) / sizeof(u16)];
+	};
+	union {
+		union ice_prot_hdr m_u;	/* Mask of header values to match */
+		/* Used to iterate over header mask */
+		u16 m_raw[sizeof(union ice_prot_hdr) / sizeof(u16)];
+	};
+};
+
+struct ice_sw_act_ctrl {
+	/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
+	u16 src;
+	u16 flag;
+	enum ice_sw_fwd_act_type fltr_act;
+	/* Depending on filter action */
+	union {
+		/* This is a queue ID in case of ICE_FWD_TO_Q and starting
+		 * queue ID in case of ICE_FWD_TO_QGRP.
+		 */
+		u16 q_id:11;
+		u16 vsi_id:10;
+		u16 hw_vsi_id:10;
+		u16 vsi_list_id:10;
+	} fwd_id;
+	/* software VSI handle */
+	u16 vsi_handle;
+	u8 qgrp_size;
+};
+
+struct ice_rule_query_data {
+	/* Recipe ID for which the requested rule was added */
+	u16 rid;
+	/* Rule ID that was added or is supposed to be removed */
+	u16 rule_id;
+	/* vsi_handle for which Rule was added or is supposed to be removed */
+	u16 vsi_handle;
+};
+
+/* This structure allows to pass info about lb_en and lan_en
+ * flags to ice_add_adv_rule. Values in act would be used
+ * only if act_valid was set to true, otherwise default
+ * values would be used.
+ */
+struct ice_adv_rule_flags_info {
+	u32 act;
+	u8 act_valid;		/* indicate if flags in act are valid */
+};
+
+struct ice_adv_rule_info {
+	/* Store metadata values in rule info */
+	enum ice_sw_tunnel_type tun_type;
+	u16 vlan_type;
+	u16 fltr_rule_id;
+	u32 priority;
+	u16 need_pass_l2:1;
+	u16 allow_pass_l2:1;
+	u16 src_vsi;
+	struct ice_sw_act_ctrl sw_act;
+	struct ice_adv_rule_flags_info flags_info;
+};
+
+/* A collection of one or more four word recipe */
+struct ice_sw_recipe {
+	/* For a chained recipe the root recipe is what should be used for
+	 * programming rules
+	 */
+	u8 is_root;
+	u8 root_rid;
+	u8 recp_created;
+
+	/* Number of extraction words */
+	u8 n_ext_words;
+	/* Protocol ID and Offset pair (extraction word) to describe the
+	 * recipe
+	 */
+	struct ice_fv_word ext_words[ICE_MAX_CHAIN_WORDS];
+	u16 word_masks[ICE_MAX_CHAIN_WORDS];
+
+	/* if this recipe is a collection of other recipe */
+	u8 big_recp;
+
+	/* if this recipe is part of another bigger recipe then chain index
+	 * corresponding to this recipe
+	 */
+	u8 chain_idx;
+
+	/* if this recipe is a collection of other recipe then count of other
+	 * recipes and recipe IDs of those recipes
+	 */
+	u8 n_grp_count;
+
+	/* Bit map specifying the IDs associated with this group of recipe */
+	DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES);
+
+	enum ice_sw_tunnel_type tun_type;
+
+	/* List of type ice_fltr_mgmt_list_entry or adv_rule */
+	u8 adv_rule;
+	struct list_head filt_rules;
+	struct list_head filt_replay_rules;
+
+	struct mutex filt_rule_lock;	/* protect filter rule structure */
+
+	/* Profiles this recipe should be associated with */
+	struct list_head fv_list;
+
+	/* Profiles this recipe is associated with */
+	u8 num_profs, *prof_ids;
+
+	/* Bit map for possible result indexes */
+	DECLARE_BITMAP(res_idxs, ICE_MAX_FV_WORDS);
+
+	/* This allows user to specify the recipe priority.
+	 * For now, this becomes 'fwd_priority' when recipe
+	 * is created, usually recipes can have 'fwd' and 'join'
+	 * priority.
+	 */
+	u8 priority;
+
+	u8 need_pass_l2:1;
+	u8 allow_pass_l2:1;
+
+	struct list_head rg_list;
+
+	/* AQ buffer associated with this recipe */
+	struct ice_aqc_recipe_data_elem *root_buf;
+	/* This struct saves the fv_words for a given lookup */
+	struct ice_prot_lkup_ext lkup_exts;
+};
+
+/* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list ID */
+struct ice_vsi_list_map_info {
+	struct list_head list_entry;
+	DECLARE_BITMAP(vsi_map, ICE_MAX_VSI);
+	u16 vsi_list_id;
+	/* counter to track how many rules are reusing this VSI list */
+	u16 ref_cnt;
+};
+
+struct ice_fltr_list_entry {
+	struct list_head list_entry;
+	int status;
+	struct ice_fltr_info fltr_info;
+};
+
+/* This defines an entry in the list that maintains MAC or VLAN membership
+ * to HW list mapping, since multiple VSIs can subscribe to the same MAC or
+ * VLAN. As an optimization the VSI list should be created only when a
+ * second VSI becomes a subscriber to the same MAC address. VSI lists are always
+ * used for VLAN membership.
+ */
+struct ice_fltr_mgmt_list_entry {
+	/* back pointer to VSI list ID to VSI list mapping */
+	struct ice_vsi_list_map_info *vsi_list_info;
+	u16 vsi_count;
+#define ICE_INVAL_LG_ACT_INDEX 0xffff
+	u16 lg_act_idx;
+#define ICE_INVAL_SW_MARKER_ID 0xffff
+	u16 sw_marker_id;
+	struct list_head list_entry;
+	struct ice_fltr_info fltr_info;
+#define ICE_INVAL_COUNTER_ID 0xff
+	u8 counter_index;
+};
+
+struct ice_adv_fltr_mgmt_list_entry {
+	struct list_head list_entry;
+
+	struct ice_adv_lkup_elem *lkups;
+	struct ice_adv_rule_info rule_info;
+	u16 lkups_cnt;
+	struct ice_vsi_list_map_info *vsi_list_info;
+	u16 vsi_count;
+};
+
+enum ice_promisc_flags {
+	ICE_PROMISC_UCAST_RX = 0x1,
+	ICE_PROMISC_UCAST_TX = 0x2,
+	ICE_PROMISC_MCAST_RX = 0x4,
+	ICE_PROMISC_MCAST_TX = 0x8,
+	ICE_PROMISC_BCAST_RX = 0x10,
+	ICE_PROMISC_BCAST_TX = 0x20,
+	ICE_PROMISC_VLAN_RX = 0x40,
+	ICE_PROMISC_VLAN_TX = 0x80,
+};
+
+/* VSI related commands */
+int
+ice_add_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	    struct ice_sq_cd *cd);
+int
+ice_free_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	     bool keep_vsi_alloc, struct ice_sq_cd *cd);
+int
+ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
+	       struct ice_sq_cd *cd);
+bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
+struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle);
+void ice_clear_all_vsi_ctx(struct ice_hw *hw);
+/* Switch config */
+int ice_get_initial_sw_cfg(struct ice_hw *hw);
+
+int
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		   u16 *counter_id);
+int
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		  u16 counter_id);
+int ice_share_res(struct ice_hw *hw, u16 type, u8 shared, u16 res_id);
+
+/* Switch/bridge related commands */
+void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup);
+void ice_rule_add_direction_metadata(struct ice_adv_lkup_elem *lkup);
+void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup);
+void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup);
+int
+ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups,
+		 u16 lkups_cnt, struct ice_adv_rule_info *rinfo,
+		 struct ice_rule_query_data *added_entry);
+int ice_update_sw_rule_bridge_mode(struct ice_hw *hw);
+int ice_add_vlan(struct ice_hw *hw, struct list_head *m_list);
+int ice_remove_vlan(struct ice_hw *hw, struct list_head *v_list);
+int ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
+int ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst);
+bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle);
+int ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+int ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list);
+int ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable);
+void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle);
+
+/* Promisc/defport setup for VSIs */
+int
+ice_cfg_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle, bool set,
+		 u8 direction);
+bool
+ice_check_if_dflt_vsi(struct ice_port_info *pi, u16 vsi_handle,
+		      bool *rule_exists);
+
+int
+ice_set_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		    u16 vid);
+int
+ice_clear_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+		      u16 vid);
+int
+ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask,
+			 bool rm_vlan_promisc);
+
+int
+ice_rem_adv_rule_by_id(struct ice_hw *hw,
+		       struct ice_rule_query_data *remove_entry);
+
+int ice_init_def_sw_recp(struct ice_hw *hw);
+u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle);
+
+int ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle);
+void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw);
+void ice_fill_eth_hdr(u8 *eth_hdr);
+
+int
+ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz,
+		u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd);
+int
+ice_update_recipe_lkup_idx(struct ice_hw *hw,
+			   struct ice_update_recipe_lkup_idx_params *params);
+void ice_change_proto_id_to_dvm(void);
+struct ice_vsi_list_map_info *
+ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
+			u16 *vsi_list_id);
+int ice_alloc_recipe(struct ice_hw *hw, u16 *rid);
+int ice_aq_get_recipe(struct ice_hw *hw,
+		      struct ice_aqc_recipe_data_elem *s_recipe_list,
+		      u16 *num_recipes, u16 recipe_root, struct ice_sq_cd *cd);
+int ice_aq_add_recipe(struct ice_hw *hw,
+		      struct ice_aqc_recipe_data_elem *s_recipe_list,
+		      u16 num_recipes, struct ice_sq_cd *cd);
+int
+ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+			     struct ice_sq_cd *cd);
+int
+ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap,
+			     struct ice_sq_cd *cd);
+
+#endif /* _ICE_SWITCH_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
new file mode 100644
index 0000000000..dd03cb69ad
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -0,0 +1,2054 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_tc_lib.h"
+#include "ice_fltr.h"
+#include "ice_lib.h"
+#include "ice_protocol_type.h"
+
+#define ICE_TC_METADATA_LKUP_IDX 0
+
+/**
+ * ice_tc_count_lkups - determine lookup count for switch filter
+ * @flags: TC-flower flags
+ * @headers: Pointer to TC flower filter header structure
+ * @fltr: Pointer to outer TC filter structure
+ *
+ * Determine lookup count based on TC flower input for switch filter.
+ */
+static int
+ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
+		   struct ice_tc_flower_fltr *fltr)
+{
+	int lkups_cnt = 1; /* 0th lookup is metadata */
+
+	/* Always add metadata as the 0th lookup. Included elements:
+	 * - Direction flag (always present)
+	 * - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified)
+	 * - Tunnel flag (present if tunnel)
+	 */
+
+	if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
+		lkups_cnt++;
+
+	if (flags & ICE_TC_FLWR_FIELD_ENC_DST_MAC)
+		lkups_cnt++;
+
+	if (flags & ICE_TC_FLWR_FIELD_ENC_OPTS)
+		lkups_cnt++;
+
+	if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+		     ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 |
+		     ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+		     ICE_TC_FLWR_FIELD_ENC_DEST_IPV6))
+		lkups_cnt++;
+
+	if (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS |
+		     ICE_TC_FLWR_FIELD_ENC_IP_TTL))
+		lkups_cnt++;
+
+	if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT)
+		lkups_cnt++;
+
+	if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID)
+		lkups_cnt++;
+
+	/* are MAC fields specified? */
+	if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | ICE_TC_FLWR_FIELD_SRC_MAC))
+		lkups_cnt++;
+
+	/* is VLAN specified? */
+	if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO))
+		lkups_cnt++;
+
+	/* is CVLAN specified? */
+	if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO))
+		lkups_cnt++;
+
+	/* are PPPoE options specified? */
+	if (flags & (ICE_TC_FLWR_FIELD_PPPOE_SESSID |
+		     ICE_TC_FLWR_FIELD_PPP_PROTO))
+		lkups_cnt++;
+
+	/* are IPv[4|6] fields specified? */
+	if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4 |
+		     ICE_TC_FLWR_FIELD_DEST_IPV6 | ICE_TC_FLWR_FIELD_SRC_IPV6))
+		lkups_cnt++;
+
+	if (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))
+		lkups_cnt++;
+
+	/* are L2TPv3 options specified? */
+	if (flags & ICE_TC_FLWR_FIELD_L2TPV3_SESSID)
+		lkups_cnt++;
+
+	/* is L4 (TCP/UDP/any other L4 protocol fields) specified? */
+	if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT |
+		     ICE_TC_FLWR_FIELD_SRC_L4_PORT))
+		lkups_cnt++;
+
+	return lkups_cnt;
+}
+
+static enum ice_protocol_type ice_proto_type_from_mac(bool inner)
+{
+	return inner ? ICE_MAC_IL : ICE_MAC_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_etype(bool inner)
+{
+	return inner ? ICE_ETYPE_IL : ICE_ETYPE_OL;
+}
+
+static enum ice_protocol_type ice_proto_type_from_ipv4(bool inner)
+{
+	return inner ? ICE_IPV4_IL : ICE_IPV4_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_ipv6(bool inner)
+{
+	return inner ? ICE_IPV6_IL : ICE_IPV6_OFOS;
+}
+
+static enum ice_protocol_type ice_proto_type_from_l4_port(u16 ip_proto)
+{
+	switch (ip_proto) {
+	case IPPROTO_TCP:
+		return ICE_TCP_IL;
+	case IPPROTO_UDP:
+		return ICE_UDP_ILOS;
+	}
+
+	return 0;
+}
+
+static enum ice_protocol_type
+ice_proto_type_from_tunnel(enum ice_tunnel_type type)
+{
+	switch (type) {
+	case TNL_VXLAN:
+		return ICE_VXLAN;
+	case TNL_GENEVE:
+		return ICE_GENEVE;
+	case TNL_GRETAP:
+		return ICE_NVGRE;
+	case TNL_GTPU:
+		/* NO_PAY profiles will not work with GTP-U */
+		return ICE_GTP;
+	case TNL_GTPC:
+		return ICE_GTP_NO_PAY;
+	default:
+		return 0;
+	}
+}
+
+static enum ice_sw_tunnel_type
+ice_sw_type_from_tunnel(enum ice_tunnel_type type)
+{
+	switch (type) {
+	case TNL_VXLAN:
+		return ICE_SW_TUN_VXLAN;
+	case TNL_GENEVE:
+		return ICE_SW_TUN_GENEVE;
+	case TNL_GRETAP:
+		return ICE_SW_TUN_NVGRE;
+	case TNL_GTPU:
+		return ICE_SW_TUN_GTPU;
+	case TNL_GTPC:
+		return ICE_SW_TUN_GTPC;
+	default:
+		return ICE_NON_TUN;
+	}
+}
+
+static u16 ice_check_supported_vlan_tpid(u16 vlan_tpid)
+{
+	switch (vlan_tpid) {
+	case ETH_P_8021Q:
+	case ETH_P_8021AD:
+	case ETH_P_QINQ1:
+		return vlan_tpid;
+	default:
+		return 0;
+	}
+}
+
+static int
+ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr,
+			 struct ice_adv_lkup_elem *list, int i)
+{
+	struct ice_tc_flower_lyr_2_4_hdrs *hdr = &fltr->outer_headers;
+
+	if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) {
+		u32 tenant_id;
+
+		list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type);
+		switch (fltr->tunnel_type) {
+		case TNL_VXLAN:
+		case TNL_GENEVE:
+			tenant_id = be32_to_cpu(fltr->tenant_id) << 8;
+			list[i].h_u.tnl_hdr.vni = cpu_to_be32(tenant_id);
+			memcpy(&list[i].m_u.tnl_hdr.vni, "\xff\xff\xff\x00", 4);
+			i++;
+			break;
+		case TNL_GRETAP:
+			list[i].h_u.nvgre_hdr.tni_flow = fltr->tenant_id;
+			memcpy(&list[i].m_u.nvgre_hdr.tni_flow,
+			       "\xff\xff\xff\xff", 4);
+			i++;
+			break;
+		case TNL_GTPC:
+		case TNL_GTPU:
+			list[i].h_u.gtp_hdr.teid = fltr->tenant_id;
+			memcpy(&list[i].m_u.gtp_hdr.teid,
+			       "\xff\xff\xff\xff", 4);
+			i++;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (flags & ICE_TC_FLWR_FIELD_ENC_DST_MAC) {
+		list[i].type = ice_proto_type_from_mac(false);
+		ether_addr_copy(list[i].h_u.eth_hdr.dst_addr,
+				hdr->l2_key.dst_mac);
+		ether_addr_copy(list[i].m_u.eth_hdr.dst_addr,
+				hdr->l2_mask.dst_mac);
+		i++;
+	}
+
+	if (flags & ICE_TC_FLWR_FIELD_ENC_OPTS &&
+	    (fltr->tunnel_type == TNL_GTPU || fltr->tunnel_type == TNL_GTPC)) {
+		list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type);
+
+		if (fltr->gtp_pdu_info_masks.pdu_type) {
+			list[i].h_u.gtp_hdr.pdu_type =
+				fltr->gtp_pdu_info_keys.pdu_type << 4;
+			memcpy(&list[i].m_u.gtp_hdr.pdu_type, "\xf0", 1);
+		}
+
+		if (fltr->gtp_pdu_info_masks.qfi) {
+			list[i].h_u.gtp_hdr.qfi = fltr->gtp_pdu_info_keys.qfi;
+			memcpy(&list[i].m_u.gtp_hdr.qfi, "\x3f", 1);
+		}
+
+		i++;
+	}
+
+	if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+		     ICE_TC_FLWR_FIELD_ENC_DEST_IPV4)) {
+		list[i].type = ice_proto_type_from_ipv4(false);
+
+		if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV4) {
+			list[i].h_u.ipv4_hdr.src_addr = hdr->l3_key.src_ipv4;
+			list[i].m_u.ipv4_hdr.src_addr = hdr->l3_mask.src_ipv4;
+		}
+		if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV4) {
+			list[i].h_u.ipv4_hdr.dst_addr = hdr->l3_key.dst_ipv4;
+			list[i].m_u.ipv4_hdr.dst_addr = hdr->l3_mask.dst_ipv4;
+		}
+		i++;
+	}
+
+	if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+		     ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) {
+		list[i].type = ice_proto_type_from_ipv6(false);
+
+		if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV6) {
+			memcpy(&list[i].h_u.ipv6_hdr.src_addr,
+			       &hdr->l3_key.src_ipv6_addr,
+			       sizeof(hdr->l3_key.src_ipv6_addr));
+			memcpy(&list[i].m_u.ipv6_hdr.src_addr,
+			       &hdr->l3_mask.src_ipv6_addr,
+			       sizeof(hdr->l3_mask.src_ipv6_addr));
+		}
+		if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV6) {
+			memcpy(&list[i].h_u.ipv6_hdr.dst_addr,
+			       &hdr->l3_key.dst_ipv6_addr,
+			       sizeof(hdr->l3_key.dst_ipv6_addr));
+			memcpy(&list[i].m_u.ipv6_hdr.dst_addr,
+			       &hdr->l3_mask.dst_ipv6_addr,
+			       sizeof(hdr->l3_mask.dst_ipv6_addr));
+		}
+		i++;
+	}
+
+	if (fltr->inner_headers.l2_key.n_proto == htons(ETH_P_IP) &&
+	    (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS |
+		      ICE_TC_FLWR_FIELD_ENC_IP_TTL))) {
+		list[i].type = ice_proto_type_from_ipv4(false);
+
+		if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TOS) {
+			list[i].h_u.ipv4_hdr.tos = hdr->l3_key.tos;
+			list[i].m_u.ipv4_hdr.tos = hdr->l3_mask.tos;
+		}
+
+		if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TTL) {
+			list[i].h_u.ipv4_hdr.time_to_live = hdr->l3_key.ttl;
+			list[i].m_u.ipv4_hdr.time_to_live = hdr->l3_mask.ttl;
+		}
+
+		i++;
+	}
+
+	if (fltr->inner_headers.l2_key.n_proto == htons(ETH_P_IPV6) &&
+	    (flags & (ICE_TC_FLWR_FIELD_ENC_IP_TOS |
+		      ICE_TC_FLWR_FIELD_ENC_IP_TTL))) {
+		struct ice_ipv6_hdr *hdr_h, *hdr_m;
+
+		hdr_h = &list[i].h_u.ipv6_hdr;
+		hdr_m = &list[i].m_u.ipv6_hdr;
+		list[i].type = ice_proto_type_from_ipv6(false);
+
+		if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TOS) {
+			be32p_replace_bits(&hdr_h->be_ver_tc_flow,
+					   hdr->l3_key.tos,
+					   ICE_IPV6_HDR_TC_MASK);
+			be32p_replace_bits(&hdr_m->be_ver_tc_flow,
+					   hdr->l3_mask.tos,
+					   ICE_IPV6_HDR_TC_MASK);
+		}
+
+		if (flags & ICE_TC_FLWR_FIELD_ENC_IP_TTL) {
+			hdr_h->hop_limit = hdr->l3_key.ttl;
+			hdr_m->hop_limit = hdr->l3_mask.ttl;
+		}
+
+		i++;
+	}
+
+	if ((flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) &&
+	    hdr->l3_key.ip_proto == IPPROTO_UDP) {
+		list[i].type = ICE_UDP_OF;
+		list[i].h_u.l4_hdr.dst_port = hdr->l4_key.dst_port;
+		list[i].m_u.l4_hdr.dst_port = hdr->l4_mask.dst_port;
+		i++;
+	}
+
+	/* always fill matching on tunneled packets in metadata */
+	ice_rule_add_tunnel_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
+
+	return i;
+}
+
+/**
+ * ice_tc_fill_rules - fill filter rules based on TC fltr
+ * @hw: pointer to HW structure
+ * @flags: tc flower field flags
+ * @tc_fltr: pointer to TC flower filter
+ * @list: list of advance rule elements
+ * @rule_info: pointer to information about rule
+ * @l4_proto: pointer to information such as L4 proto type
+ *
+ * Fill ice_adv_lkup_elem list based on TC flower flags and
+ * TC flower headers. This list should be used to add
+ * advance filter in hardware.
+ */
+static int
+ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
+		  struct ice_tc_flower_fltr *tc_fltr,
+		  struct ice_adv_lkup_elem *list,
+		  struct ice_adv_rule_info *rule_info,
+		  u16 *l4_proto)
+{
+	struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers;
+	bool inner = false;
+	u16 vlan_tpid = 0;
+	int i = 1; /* 0th lookup is metadata */
+
+	rule_info->vlan_type = vlan_tpid;
+
+	/* Always add direction metadata */
+	ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
+
+	rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
+	if (tc_fltr->tunnel_type != TNL_LAST) {
+		i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i);
+
+		headers = &tc_fltr->inner_headers;
+		inner = true;
+	}
+
+	if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) {
+		list[i].type = ice_proto_type_from_etype(inner);
+		list[i].h_u.ethertype.ethtype_id = headers->l2_key.n_proto;
+		list[i].m_u.ethertype.ethtype_id = headers->l2_mask.n_proto;
+		i++;
+	}
+
+	if (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+		     ICE_TC_FLWR_FIELD_SRC_MAC)) {
+		struct ice_tc_l2_hdr *l2_key, *l2_mask;
+
+		l2_key = &headers->l2_key;
+		l2_mask = &headers->l2_mask;
+
+		list[i].type = ice_proto_type_from_mac(inner);
+		if (flags & ICE_TC_FLWR_FIELD_DST_MAC) {
+			ether_addr_copy(list[i].h_u.eth_hdr.dst_addr,
+					l2_key->dst_mac);
+			ether_addr_copy(list[i].m_u.eth_hdr.dst_addr,
+					l2_mask->dst_mac);
+		}
+		if (flags & ICE_TC_FLWR_FIELD_SRC_MAC) {
+			ether_addr_copy(list[i].h_u.eth_hdr.src_addr,
+					l2_key->src_mac);
+			ether_addr_copy(list[i].m_u.eth_hdr.src_addr,
+					l2_mask->src_mac);
+		}
+		i++;
+	}
+
+	/* copy VLAN info */
+	if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) {
+		if (flags & ICE_TC_FLWR_FIELD_CVLAN)
+			list[i].type = ICE_VLAN_EX;
+		else
+			list[i].type = ICE_VLAN_OFOS;
+
+		if (flags & ICE_TC_FLWR_FIELD_VLAN) {
+			list[i].h_u.vlan_hdr.vlan = headers->vlan_hdr.vlan_id;
+			list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0x0FFF);
+		}
+
+		if (flags & ICE_TC_FLWR_FIELD_VLAN_PRIO) {
+			if (flags & ICE_TC_FLWR_FIELD_VLAN) {
+				list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xEFFF);
+			} else {
+				list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xE000);
+				list[i].h_u.vlan_hdr.vlan = 0;
+			}
+			list[i].h_u.vlan_hdr.vlan |=
+				headers->vlan_hdr.vlan_prio;
+		}
+
+		i++;
+	}
+
+	if (flags & ICE_TC_FLWR_FIELD_VLAN_TPID) {
+		vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid);
+		rule_info->vlan_type =
+				ice_check_supported_vlan_tpid(vlan_tpid);
+
+		ice_rule_add_vlan_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
+	}
+
+	if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) {
+		list[i].type = ICE_VLAN_IN;
+
+		if (flags & ICE_TC_FLWR_FIELD_CVLAN) {
+			list[i].h_u.vlan_hdr.vlan = headers->cvlan_hdr.vlan_id;
+			list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0x0FFF);
+		}
+
+		if (flags & ICE_TC_FLWR_FIELD_CVLAN_PRIO) {
+			if (flags & ICE_TC_FLWR_FIELD_CVLAN) {
+				list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xEFFF);
+			} else {
+				list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xE000);
+				list[i].h_u.vlan_hdr.vlan = 0;
+			}
+			list[i].h_u.vlan_hdr.vlan |=
+				headers->cvlan_hdr.vlan_prio;
+		}
+
+		i++;
+	}
+
+	if (flags & (ICE_TC_FLWR_FIELD_PPPOE_SESSID |
+		     ICE_TC_FLWR_FIELD_PPP_PROTO)) {
+		struct ice_pppoe_hdr *vals, *masks;
+
+		vals = &list[i].h_u.pppoe_hdr;
+		masks = &list[i].m_u.pppoe_hdr;
+
+		list[i].type = ICE_PPPOE;
+
+		if (flags & ICE_TC_FLWR_FIELD_PPPOE_SESSID) {
+			vals->session_id = headers->pppoe_hdr.session_id;
+			masks->session_id = cpu_to_be16(0xFFFF);
+		}
+
+		if (flags & ICE_TC_FLWR_FIELD_PPP_PROTO) {
+			vals->ppp_prot_id = headers->pppoe_hdr.ppp_proto;
+			masks->ppp_prot_id = cpu_to_be16(0xFFFF);
+		}
+
+		i++;
+	}
+
+	/* copy L3 (IPv[4|6]: src, dest) address */
+	if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 |
+		     ICE_TC_FLWR_FIELD_SRC_IPV4)) {
+		struct ice_tc_l3_hdr *l3_key, *l3_mask;
+
+		list[i].type = ice_proto_type_from_ipv4(inner);
+		l3_key = &headers->l3_key;
+		l3_mask = &headers->l3_mask;
+		if (flags & ICE_TC_FLWR_FIELD_DEST_IPV4) {
+			list[i].h_u.ipv4_hdr.dst_addr = l3_key->dst_ipv4;
+			list[i].m_u.ipv4_hdr.dst_addr = l3_mask->dst_ipv4;
+		}
+		if (flags & ICE_TC_FLWR_FIELD_SRC_IPV4) {
+			list[i].h_u.ipv4_hdr.src_addr = l3_key->src_ipv4;
+			list[i].m_u.ipv4_hdr.src_addr = l3_mask->src_ipv4;
+		}
+		i++;
+	} else if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV6 |
+			    ICE_TC_FLWR_FIELD_SRC_IPV6)) {
+		struct ice_ipv6_hdr *ipv6_hdr, *ipv6_mask;
+		struct ice_tc_l3_hdr *l3_key, *l3_mask;
+
+		list[i].type = ice_proto_type_from_ipv6(inner);
+		ipv6_hdr = &list[i].h_u.ipv6_hdr;
+		ipv6_mask = &list[i].m_u.ipv6_hdr;
+		l3_key = &headers->l3_key;
+		l3_mask = &headers->l3_mask;
+
+		if (flags & ICE_TC_FLWR_FIELD_DEST_IPV6) {
+			memcpy(&ipv6_hdr->dst_addr, &l3_key->dst_ipv6_addr,
+			       sizeof(l3_key->dst_ipv6_addr));
+			memcpy(&ipv6_mask->dst_addr, &l3_mask->dst_ipv6_addr,
+			       sizeof(l3_mask->dst_ipv6_addr));
+		}
+		if (flags & ICE_TC_FLWR_FIELD_SRC_IPV6) {
+			memcpy(&ipv6_hdr->src_addr, &l3_key->src_ipv6_addr,
+			       sizeof(l3_key->src_ipv6_addr));
+			memcpy(&ipv6_mask->src_addr, &l3_mask->src_ipv6_addr,
+			       sizeof(l3_mask->src_ipv6_addr));
+		}
+		i++;
+	}
+
+	if (headers->l2_key.n_proto == htons(ETH_P_IP) &&
+	    (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))) {
+		list[i].type = ice_proto_type_from_ipv4(inner);
+
+		if (flags & ICE_TC_FLWR_FIELD_IP_TOS) {
+			list[i].h_u.ipv4_hdr.tos = headers->l3_key.tos;
+			list[i].m_u.ipv4_hdr.tos = headers->l3_mask.tos;
+		}
+
+		if (flags & ICE_TC_FLWR_FIELD_IP_TTL) {
+			list[i].h_u.ipv4_hdr.time_to_live =
+				headers->l3_key.ttl;
+			list[i].m_u.ipv4_hdr.time_to_live =
+				headers->l3_mask.ttl;
+		}
+
+		i++;
+	}
+
+	if (headers->l2_key.n_proto == htons(ETH_P_IPV6) &&
+	    (flags & (ICE_TC_FLWR_FIELD_IP_TOS | ICE_TC_FLWR_FIELD_IP_TTL))) {
+		struct ice_ipv6_hdr *hdr_h, *hdr_m;
+
+		hdr_h = &list[i].h_u.ipv6_hdr;
+		hdr_m = &list[i].m_u.ipv6_hdr;
+		list[i].type = ice_proto_type_from_ipv6(inner);
+
+		if (flags & ICE_TC_FLWR_FIELD_IP_TOS) {
+			be32p_replace_bits(&hdr_h->be_ver_tc_flow,
+					   headers->l3_key.tos,
+					   ICE_IPV6_HDR_TC_MASK);
+			be32p_replace_bits(&hdr_m->be_ver_tc_flow,
+					   headers->l3_mask.tos,
+					   ICE_IPV6_HDR_TC_MASK);
+		}
+
+		if (flags & ICE_TC_FLWR_FIELD_IP_TTL) {
+			hdr_h->hop_limit = headers->l3_key.ttl;
+			hdr_m->hop_limit = headers->l3_mask.ttl;
+		}
+
+		i++;
+	}
+
+	if (flags & ICE_TC_FLWR_FIELD_L2TPV3_SESSID) {
+		list[i].type = ICE_L2TPV3;
+
+		list[i].h_u.l2tpv3_sess_hdr.session_id =
+			headers->l2tpv3_hdr.session_id;
+		list[i].m_u.l2tpv3_sess_hdr.session_id =
+			cpu_to_be32(0xFFFFFFFF);
+
+		i++;
+	}
+
+	/* copy L4 (src, dest) port */
+	if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT |
+		     ICE_TC_FLWR_FIELD_SRC_L4_PORT)) {
+		struct ice_tc_l4_hdr *l4_key, *l4_mask;
+
+		list[i].type = ice_proto_type_from_l4_port(headers->l3_key.ip_proto);
+		l4_key = &headers->l4_key;
+		l4_mask = &headers->l4_mask;
+
+		if (flags & ICE_TC_FLWR_FIELD_DEST_L4_PORT) {
+			list[i].h_u.l4_hdr.dst_port = l4_key->dst_port;
+			list[i].m_u.l4_hdr.dst_port = l4_mask->dst_port;
+		}
+		if (flags & ICE_TC_FLWR_FIELD_SRC_L4_PORT) {
+			list[i].h_u.l4_hdr.src_port = l4_key->src_port;
+			list[i].m_u.l4_hdr.src_port = l4_mask->src_port;
+		}
+		i++;
+	}
+
+	return i;
+}
+
+/**
+ * ice_tc_tun_get_type - get the tunnel type
+ * @tunnel_dev: ptr to tunnel device
+ *
+ * This function detects appropriate tunnel_type if specified device is
+ * tunnel device such as VXLAN/Geneve
+ */
+static int ice_tc_tun_get_type(struct net_device *tunnel_dev)
+{
+	if (netif_is_vxlan(tunnel_dev))
+		return TNL_VXLAN;
+	if (netif_is_geneve(tunnel_dev))
+		return TNL_GENEVE;
+	if (netif_is_gretap(tunnel_dev) ||
+	    netif_is_ip6gretap(tunnel_dev))
+		return TNL_GRETAP;
+
+	/* Assume GTP-U by default in case of GTP netdev.
+	 * GTP-C may be selected later, based on enc_dst_port.
+	 */
+	if (netif_is_gtp(tunnel_dev))
+		return TNL_GTPU;
+	return TNL_LAST;
+}
+
+bool ice_is_tunnel_supported(struct net_device *dev)
+{
+	return ice_tc_tun_get_type(dev) != TNL_LAST;
+}
+
+static bool ice_tc_is_dev_uplink(struct net_device *dev)
+{
+	return netif_is_ice(dev) || ice_is_tunnel_supported(dev);
+}
+
+static int ice_tc_setup_redirect_action(struct net_device *filter_dev,
+					struct ice_tc_flower_fltr *fltr,
+					struct net_device *target_dev)
+{
+	struct ice_repr *repr;
+
+	fltr->action.fltr_act = ICE_FWD_TO_VSI;
+
+	if (ice_is_port_repr_netdev(filter_dev) &&
+	    ice_is_port_repr_netdev(target_dev)) {
+		repr = ice_netdev_to_repr(target_dev);
+
+		fltr->dest_vsi = repr->src_vsi;
+		fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
+	} else if (ice_is_port_repr_netdev(filter_dev) &&
+		   ice_tc_is_dev_uplink(target_dev)) {
+		repr = ice_netdev_to_repr(filter_dev);
+
+		fltr->dest_vsi = repr->src_vsi->back->switchdev.uplink_vsi;
+		fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
+	} else if (ice_tc_is_dev_uplink(filter_dev) &&
+		   ice_is_port_repr_netdev(target_dev)) {
+		repr = ice_netdev_to_repr(target_dev);
+
+		fltr->dest_vsi = repr->src_vsi;
+		fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
+	} else {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unsupported netdevice in switchdev mode");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_tc_setup_drop_action(struct net_device *filter_dev,
+			 struct ice_tc_flower_fltr *fltr)
+{
+	fltr->action.fltr_act = ICE_DROP_PACKET;
+
+	if (ice_is_port_repr_netdev(filter_dev)) {
+		fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
+	} else if (ice_tc_is_dev_uplink(filter_dev)) {
+		fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
+	} else {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unsupported netdevice in switchdev mode");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ice_eswitch_tc_parse_action(struct net_device *filter_dev,
+				       struct ice_tc_flower_fltr *fltr,
+				       struct flow_action_entry *act)
+{
+	int err;
+
+	switch (act->id) {
+	case FLOW_ACTION_DROP:
+		err = ice_tc_setup_drop_action(filter_dev, fltr);
+		if (err)
+			return err;
+
+		break;
+
+	case FLOW_ACTION_REDIRECT:
+		err = ice_tc_setup_redirect_action(filter_dev, fltr, act->dev);
+		if (err)
+			return err;
+
+		break;
+
+	default:
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action in switchdev mode");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+{
+	struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
+	struct ice_adv_rule_info rule_info = { 0 };
+	struct ice_rule_query_data rule_added;
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_adv_lkup_elem *list;
+	u32 flags = fltr->flags;
+	int lkups_cnt;
+	int ret;
+	int i;
+
+	if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
+		return -EOPNOTSUPP;
+	}
+
+	lkups_cnt = ice_tc_count_lkups(flags, headers, fltr);
+	list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+	if (!list)
+		return -ENOMEM;
+
+	i = ice_tc_fill_rules(hw, flags, fltr, list, &rule_info, NULL);
+	if (i != lkups_cnt) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	rule_info.sw_act.fltr_act = fltr->action.fltr_act;
+	if (fltr->action.fltr_act != ICE_DROP_PACKET)
+		rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx;
+	/* For now, making priority to be highest, and it also becomes
+	 * the priority for recipe which will get created as a result of
+	 * new extraction sequence based on input set.
+	 * Priority '7' is max val for switch recipe, higher the number
+	 * results into order of switch rule evaluation.
+	 */
+	rule_info.priority = 7;
+	rule_info.flags_info.act_valid = true;
+
+	if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) {
+		/* Uplink to VF */
+		rule_info.sw_act.flag |= ICE_FLTR_RX;
+		rule_info.sw_act.src = hw->pf_id;
+		rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
+	} else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
+		   fltr->dest_vsi == vsi->back->switchdev.uplink_vsi) {
+		/* VF to Uplink */
+		rule_info.sw_act.flag |= ICE_FLTR_TX;
+		rule_info.sw_act.src = vsi->idx;
+		rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE;
+	} else {
+		/* VF to VF */
+		rule_info.sw_act.flag |= ICE_FLTR_TX;
+		rule_info.sw_act.src = vsi->idx;
+		rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
+	}
+
+	/* specify the cookie as filter_rule_id */
+	rule_info.fltr_rule_id = fltr->cookie;
+
+	ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
+	if (ret == -EEXIST) {
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because it already exist");
+		ret = -EINVAL;
+		goto exit;
+	} else if (ret) {
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter due to error");
+		goto exit;
+	}
+
+	/* store the output params, which are needed later for removing
+	 * advanced switch filter
+	 */
+	fltr->rid = rule_added.rid;
+	fltr->rule_id = rule_added.rule_id;
+	fltr->dest_vsi_handle = rule_added.vsi_handle;
+
+exit:
+	kfree(list);
+	return ret;
+}
+
+/**
+ * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action)
+ * @vsi: Pointer to VSI
+ * @queue: Queue index
+ *
+ * Locate the VSI using specified "queue". When ADQ is not enabled,
+ * always return input VSI, otherwise locate corresponding
+ * VSI based on per channel "offset" and "qcount"
+ */
+struct ice_vsi *
+ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue)
+{
+	int num_tc, tc;
+
+	/* if ADQ is not active, passed VSI is the candidate VSI */
+	if (!ice_is_adq_active(vsi->back))
+		return vsi;
+
+	/* Locate the VSI (it could still be main PF VSI or CHNL_VSI depending
+	 * upon queue number)
+	 */
+	num_tc = vsi->mqprio_qopt.qopt.num_tc;
+
+	for (tc = 0; tc < num_tc; tc++) {
+		int qcount = vsi->mqprio_qopt.qopt.count[tc];
+		int offset = vsi->mqprio_qopt.qopt.offset[tc];
+
+		if (queue >= offset && queue < offset + qcount) {
+			/* for non-ADQ TCs, passed VSI is the candidate VSI */
+			if (tc < ICE_CHNL_START_TC)
+				return vsi;
+			else
+				return vsi->tc_map_vsi[tc];
+		}
+	}
+	return NULL;
+}
+
+static struct ice_rx_ring *
+ice_locate_rx_ring_using_queue(struct ice_vsi *vsi,
+			       struct ice_tc_flower_fltr *tc_fltr)
+{
+	u16 queue = tc_fltr->action.fwd.q.queue;
+
+	return queue < vsi->num_rxq ? vsi->rx_rings[queue] : NULL;
+}
+
+/**
+ * ice_tc_forward_action - Determine destination VSI and queue for the action
+ * @vsi: Pointer to VSI
+ * @tc_fltr: Pointer to TC flower filter structure
+ *
+ * Validates the tc forward action and determines the destination VSI and queue
+ * for the forward action.
+ */
+static struct ice_vsi *
+ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr)
+{
+	struct ice_rx_ring *ring = NULL;
+	struct ice_vsi *dest_vsi = NULL;
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	u32 tc_class;
+	int q;
+
+	dev = ice_pf_to_dev(pf);
+
+	/* Get the destination VSI and/or destination queue and validate them */
+	switch (tc_fltr->action.fltr_act) {
+	case ICE_FWD_TO_VSI:
+		tc_class = tc_fltr->action.fwd.tc.tc_class;
+		/* Select the destination VSI */
+		if (tc_class < ICE_CHNL_START_TC) {
+			NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+					   "Unable to add filter because of unsupported destination");
+			return ERR_PTR(-EOPNOTSUPP);
+		}
+		/* Locate ADQ VSI depending on hw_tc number */
+		dest_vsi = vsi->tc_map_vsi[tc_class];
+		break;
+	case ICE_FWD_TO_Q:
+		/* Locate the Rx queue */
+		ring = ice_locate_rx_ring_using_queue(vsi, tc_fltr);
+		if (!ring) {
+			dev_err(dev,
+				"Unable to locate Rx queue for action fwd_to_queue: %u\n",
+				tc_fltr->action.fwd.q.queue);
+			return ERR_PTR(-EINVAL);
+		}
+		/* Determine destination VSI even though the action is
+		 * FWD_TO_QUEUE, because QUEUE is associated with VSI
+		 */
+		q = tc_fltr->action.fwd.q.queue;
+		dest_vsi = ice_locate_vsi_using_queue(vsi, q);
+		break;
+	default:
+		dev_err(dev,
+			"Unable to add filter because of unsupported action %u (supported actions: fwd to tc, fwd to queue)\n",
+			tc_fltr->action.fltr_act);
+		return ERR_PTR(-EINVAL);
+	}
+	/* Must have valid dest_vsi (it could be main VSI or ADQ VSI) */
+	if (!dest_vsi) {
+		dev_err(dev,
+			"Unable to add filter because specified destination VSI doesn't exist\n");
+		return ERR_PTR(-EINVAL);
+	}
+	return dest_vsi;
+}
+
+/**
+ * ice_add_tc_flower_adv_fltr - add appropriate filter rules
+ * @vsi: Pointer to VSI
+ * @tc_fltr: Pointer to TC flower filter structure
+ *
+ * based on filter parameters using Advance recipes supported
+ * by OS package.
+ */
+static int
+ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
+			   struct ice_tc_flower_fltr *tc_fltr)
+{
+	struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers;
+	struct ice_adv_rule_info rule_info = {0};
+	struct ice_rule_query_data rule_added;
+	struct ice_adv_lkup_elem *list;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u32 flags = tc_fltr->flags;
+	struct ice_vsi *dest_vsi;
+	struct device *dev;
+	u16 lkups_cnt = 0;
+	u16 l4_proto = 0;
+	int ret = 0;
+	u16 i = 0;
+
+	dev = ice_pf_to_dev(pf);
+	if (ice_is_safe_mode(pf)) {
+		NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because driver is in safe mode");
+		return -EOPNOTSUPP;
+	}
+
+	if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 |
+				ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 |
+				ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 |
+				ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+				ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) {
+		NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unsupported encap field(s)");
+		return -EOPNOTSUPP;
+	}
+
+	/* validate forwarding action VSI and queue */
+	if (ice_is_forward_action(tc_fltr->action.fltr_act)) {
+		dest_vsi = ice_tc_forward_action(vsi, tc_fltr);
+		if (IS_ERR(dest_vsi))
+			return PTR_ERR(dest_vsi);
+	}
+
+	lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr);
+	list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
+	if (!list)
+		return -ENOMEM;
+
+	i = ice_tc_fill_rules(hw, flags, tc_fltr, list, &rule_info, &l4_proto);
+	if (i != lkups_cnt) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act;
+	/* specify the cookie as filter_rule_id */
+	rule_info.fltr_rule_id = tc_fltr->cookie;
+
+	switch (tc_fltr->action.fltr_act) {
+	case ICE_FWD_TO_VSI:
+		rule_info.sw_act.vsi_handle = dest_vsi->idx;
+		rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI;
+		rule_info.sw_act.src = hw->pf_id;
+		dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n",
+			tc_fltr->action.fwd.tc.tc_class,
+			rule_info.sw_act.vsi_handle, lkups_cnt);
+		break;
+	case ICE_FWD_TO_Q:
+		/* HW queue number in global space */
+		rule_info.sw_act.fwd_id.q_id = tc_fltr->action.fwd.q.hw_queue;
+		rule_info.sw_act.vsi_handle = dest_vsi->idx;
+		rule_info.priority = ICE_SWITCH_FLTR_PRIO_QUEUE;
+		rule_info.sw_act.src = hw->pf_id;
+		dev_dbg(dev, "add switch rule action to forward to queue:%u (HW queue %u), lkups_cnt:%u\n",
+			tc_fltr->action.fwd.q.queue,
+			tc_fltr->action.fwd.q.hw_queue, lkups_cnt);
+		break;
+	case ICE_DROP_PACKET:
+		rule_info.sw_act.flag |= ICE_FLTR_RX;
+		rule_info.sw_act.src = hw->pf_id;
+		rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI;
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		goto exit;
+	}
+
+	ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
+	if (ret == -EEXIST) {
+		NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+				   "Unable to add filter because it already exist");
+		ret = -EINVAL;
+		goto exit;
+	} else if (ret) {
+		NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+				   "Unable to add filter due to error");
+		goto exit;
+	}
+
+	/* store the output params, which are needed later for removing
+	 * advanced switch filter
+	 */
+	tc_fltr->rid = rule_added.rid;
+	tc_fltr->rule_id = rule_added.rule_id;
+	tc_fltr->dest_vsi_handle = rule_added.vsi_handle;
+	if (tc_fltr->action.fltr_act == ICE_FWD_TO_VSI ||
+	    tc_fltr->action.fltr_act == ICE_FWD_TO_Q) {
+		tc_fltr->dest_vsi = dest_vsi;
+		/* keep track of advanced switch filter for
+		 * destination VSI
+		 */
+		dest_vsi->num_chnl_fltr++;
+
+		/* keeps track of channel filters for PF VSI */
+		if (vsi->type == ICE_VSI_PF &&
+		    (flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+			      ICE_TC_FLWR_FIELD_ENC_DST_MAC)))
+			pf->num_dmac_chnl_fltrs++;
+	}
+	switch (tc_fltr->action.fltr_act) {
+	case ICE_FWD_TO_VSI:
+		dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x), action is forward to TC %u, rid %u, rule_id %u, vsi_idx %u\n",
+			lkups_cnt, flags,
+			tc_fltr->action.fwd.tc.tc_class, rule_added.rid,
+			rule_added.rule_id, rule_added.vsi_handle);
+		break;
+	case ICE_FWD_TO_Q:
+		dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x), action is forward to queue: %u (HW queue %u)     , rid %u, rule_id %u\n",
+			lkups_cnt, flags, tc_fltr->action.fwd.q.queue,
+			tc_fltr->action.fwd.q.hw_queue, rule_added.rid,
+			rule_added.rule_id);
+		break;
+	case ICE_DROP_PACKET:
+		dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x), action is drop, rid %u, rule_id %u\n",
+			lkups_cnt, flags, rule_added.rid, rule_added.rule_id);
+		break;
+	default:
+		break;
+	}
+exit:
+	kfree(list);
+	return ret;
+}
+
+/**
+ * ice_tc_set_pppoe - Parse PPPoE fields from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: Pointer to outer header fields
+ * @returns PPP protocol used in filter (ppp_ses or ppp_disc)
+ */
+static u16
+ice_tc_set_pppoe(struct flow_match_pppoe *match,
+		 struct ice_tc_flower_fltr *fltr,
+		 struct ice_tc_flower_lyr_2_4_hdrs *headers)
+{
+	if (match->mask->session_id) {
+		fltr->flags |= ICE_TC_FLWR_FIELD_PPPOE_SESSID;
+		headers->pppoe_hdr.session_id = match->key->session_id;
+	}
+
+	if (match->mask->ppp_proto) {
+		fltr->flags |= ICE_TC_FLWR_FIELD_PPP_PROTO;
+		headers->pppoe_hdr.ppp_proto = match->key->ppp_proto;
+	}
+
+	return be16_to_cpu(match->key->type);
+}
+
+/**
+ * ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel IPv4 address
+ */
+static int
+ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match,
+		struct ice_tc_flower_fltr *fltr,
+		struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
+{
+	if (match->key->dst) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV4;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4;
+		headers->l3_key.dst_ipv4 = match->key->dst;
+		headers->l3_mask.dst_ipv4 = match->mask->dst;
+	}
+	if (match->key->src) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV4;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4;
+		headers->l3_key.src_ipv4 = match->key->src;
+		headers->l3_mask.src_ipv4 = match->mask->src;
+	}
+	return 0;
+}
+
+/**
+ * ice_tc_set_ipv6 - Parse IPv6 addresses from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel IPv6 address
+ */
+static int
+ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match,
+		struct ice_tc_flower_fltr *fltr,
+		struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
+{
+	struct ice_tc_l3_hdr *l3_key, *l3_mask;
+
+	/* src and dest IPV6 address should not be LOOPBACK
+	 * (0:0:0:0:0:0:0:1), which can be represented as ::1
+	 */
+	if (ipv6_addr_loopback(&match->key->dst) ||
+	    ipv6_addr_loopback(&match->key->src)) {
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Bad IPv6, addr is LOOPBACK");
+		return -EINVAL;
+	}
+	/* if src/dest IPv6 address is *,* error */
+	if (ipv6_addr_any(&match->mask->dst) &&
+	    ipv6_addr_any(&match->mask->src)) {
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Bad src/dest IPv6, addr is any");
+		return -EINVAL;
+	}
+	if (!ipv6_addr_any(&match->mask->dst)) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV6;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6;
+	}
+	if (!ipv6_addr_any(&match->mask->src)) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV6;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6;
+	}
+
+	l3_key = &headers->l3_key;
+	l3_mask = &headers->l3_mask;
+
+	if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 |
+			   ICE_TC_FLWR_FIELD_SRC_IPV6)) {
+		memcpy(&l3_key->src_ipv6_addr, &match->key->src.s6_addr,
+		       sizeof(match->key->src.s6_addr));
+		memcpy(&l3_mask->src_ipv6_addr, &match->mask->src.s6_addr,
+		       sizeof(match->mask->src.s6_addr));
+	}
+	if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 |
+			   ICE_TC_FLWR_FIELD_DEST_IPV6)) {
+		memcpy(&l3_key->dst_ipv6_addr, &match->key->dst.s6_addr,
+		       sizeof(match->key->dst.s6_addr));
+		memcpy(&l3_mask->dst_ipv6_addr, &match->mask->dst.s6_addr,
+		       sizeof(match->mask->dst.s6_addr));
+	}
+
+	return 0;
+}
+
+/**
+ * ice_tc_set_tos_ttl - Parse IP ToS/TTL from TC flower filter
+ * @match: Pointer to flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel
+ */
+static void
+ice_tc_set_tos_ttl(struct flow_match_ip *match,
+		   struct ice_tc_flower_fltr *fltr,
+		   struct ice_tc_flower_lyr_2_4_hdrs *headers,
+		   bool is_encap)
+{
+	if (match->mask->tos) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_IP_TOS;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_IP_TOS;
+
+		headers->l3_key.tos = match->key->tos;
+		headers->l3_mask.tos = match->mask->tos;
+	}
+
+	if (match->mask->ttl) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_IP_TTL;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_IP_TTL;
+
+		headers->l3_key.ttl = match->key->ttl;
+		headers->l3_mask.ttl = match->mask->ttl;
+	}
+}
+
+/**
+ * ice_tc_set_port - Parse ports from TC flower filter
+ * @match: Flow match structure
+ * @fltr: Pointer to filter structure
+ * @headers: inner or outer header fields
+ * @is_encap: set true for tunnel port
+ */
+static int
+ice_tc_set_port(struct flow_match_ports match,
+		struct ice_tc_flower_fltr *fltr,
+		struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap)
+{
+	if (match.key->dst) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT;
+
+		headers->l4_key.dst_port = match.key->dst;
+		headers->l4_mask.dst_port = match.mask->dst;
+	}
+	if (match.key->src) {
+		if (is_encap)
+			fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT;
+		else
+			fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT;
+
+		headers->l4_key.src_port = match.key->src;
+		headers->l4_mask.src_port = match.mask->src;
+	}
+	return 0;
+}
+
+static struct net_device *
+ice_get_tunnel_device(struct net_device *dev, struct flow_rule *rule)
+{
+	struct flow_action_entry *act;
+	int i;
+
+	if (ice_is_tunnel_supported(dev))
+		return dev;
+
+	flow_action_for_each(i, act, &rule->action) {
+		if (act->id == FLOW_ACTION_REDIRECT &&
+		    ice_is_tunnel_supported(act->dev))
+			return act->dev;
+	}
+
+	return NULL;
+}
+
+/**
+ * ice_parse_gtp_type - Sets GTP tunnel type to GTP-U or GTP-C
+ * @match: Flow match structure
+ * @fltr: Pointer to filter structure
+ *
+ * GTP-C/GTP-U is selected based on destination port number (enc_dst_port).
+ * Before calling this funtcion, fltr->tunnel_type should be set to TNL_GTPU,
+ * therefore making GTP-U the default choice (when destination port number is
+ * not specified).
+ */
+static int
+ice_parse_gtp_type(struct flow_match_ports match,
+		   struct ice_tc_flower_fltr *fltr)
+{
+	u16 dst_port;
+
+	if (match.key->dst) {
+		dst_port = be16_to_cpu(match.key->dst);
+
+		switch (dst_port) {
+		case 2152:
+			break;
+		case 2123:
+			fltr->tunnel_type = TNL_GTPC;
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported GTP port number");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int
+ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule,
+		      struct ice_tc_flower_fltr *fltr)
+{
+	struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
+	struct flow_match_control enc_control;
+
+	fltr->tunnel_type = ice_tc_tun_get_type(dev);
+	headers->l3_key.ip_proto = IPPROTO_UDP;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid enc_keyid;
+
+		flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+		if (!enc_keyid.mask->keyid ||
+		    enc_keyid.mask->keyid != cpu_to_be32(ICE_TC_FLOWER_MASK_32))
+			return -EINVAL;
+
+		fltr->flags |= ICE_TC_FLWR_FIELD_TENANT_ID;
+		fltr->tenant_id = enc_keyid.key->keyid;
+	}
+
+	flow_rule_match_enc_control(rule, &enc_control);
+
+	if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_enc_ipv4_addrs(rule, &match);
+		if (ice_tc_set_ipv4(&match, fltr, headers, true))
+			return -EINVAL;
+	} else if (enc_control.key->addr_type ==
+					FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_enc_ipv6_addrs(rule, &match);
+		if (ice_tc_set_ipv6(&match, fltr, headers, true))
+			return -EINVAL;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_enc_ip(rule, &match);
+		ice_tc_set_tos_ttl(&match, fltr, headers, true);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) &&
+	    fltr->tunnel_type != TNL_VXLAN && fltr->tunnel_type != TNL_GENEVE) {
+		struct flow_match_ports match;
+
+		flow_rule_match_enc_ports(rule, &match);
+
+		if (fltr->tunnel_type != TNL_GTPU) {
+			if (ice_tc_set_port(match, fltr, headers, true))
+				return -EINVAL;
+		} else {
+			if (ice_parse_gtp_type(match, fltr))
+				return -EINVAL;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) {
+		struct flow_match_enc_opts match;
+
+		flow_rule_match_enc_opts(rule, &match);
+
+		memcpy(&fltr->gtp_pdu_info_keys, &match.key->data[0],
+		       sizeof(struct gtp_pdu_session_info));
+
+		memcpy(&fltr->gtp_pdu_info_masks, &match.mask->data[0],
+		       sizeof(struct gtp_pdu_session_info));
+
+		fltr->flags |= ICE_TC_FLWR_FIELD_ENC_OPTS;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_parse_cls_flower - Parse TC flower filters provided by kernel
+ * @vsi: Pointer to the VSI
+ * @filter_dev: Pointer to device on which filter is being added
+ * @f: Pointer to struct flow_cls_offload
+ * @fltr: Pointer to filter structure
+ */
+static int
+ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
+		     struct flow_cls_offload *f,
+		     struct ice_tc_flower_fltr *fltr)
+{
+	struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
+	struct flow_dissector *dissector;
+	struct net_device *tunnel_dev;
+
+	dissector = rule->match.dissector;
+
+	if (dissector->used_keys &
+	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_PPPOE) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_L2TPV3))) {
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used");
+		return -EOPNOTSUPP;
+	}
+
+	tunnel_dev = ice_get_tunnel_device(filter_dev, rule);
+	if (tunnel_dev) {
+		int err;
+
+		filter_dev = tunnel_dev;
+
+		err = ice_parse_tunnel_attr(filter_dev, rule, fltr);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to parse TC flower tunnel attributes");
+			return err;
+		}
+
+		/* header pointers should point to the inner headers, outer
+		 * header were already set by ice_parse_tunnel_attr
+		 */
+		headers = &fltr->inner_headers;
+	} else if (dissector->used_keys &
+		  (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+		   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+		   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+		   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
+		return -EOPNOTSUPP;
+	} else {
+		fltr->tunnel_type = TNL_LAST;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
+
+		if (n_proto_key == ETH_P_ALL || n_proto_key == 0 ||
+		    fltr->tunnel_type == TNL_GTPU ||
+		    fltr->tunnel_type == TNL_GTPC) {
+			n_proto_key = 0;
+			n_proto_mask = 0;
+		} else {
+			fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID;
+		}
+
+		headers->l2_key.n_proto = cpu_to_be16(n_proto_key);
+		headers->l2_mask.n_proto = cpu_to_be16(n_proto_mask);
+		headers->l3_key.ip_proto = match.key->ip_proto;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+
+		if (!is_zero_ether_addr(match.key->dst)) {
+			ether_addr_copy(headers->l2_key.dst_mac,
+					match.key->dst);
+			ether_addr_copy(headers->l2_mask.dst_mac,
+					match.mask->dst);
+			fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC;
+		}
+
+		if (!is_zero_ether_addr(match.key->src)) {
+			ether_addr_copy(headers->l2_key.src_mac,
+					match.key->src);
+			ether_addr_copy(headers->l2_mask.src_mac,
+					match.mask->src);
+			fltr->flags |= ICE_TC_FLWR_FIELD_SRC_MAC;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
+	    is_vlan_dev(filter_dev)) {
+		struct flow_dissector_key_vlan mask;
+		struct flow_dissector_key_vlan key;
+		struct flow_match_vlan match;
+
+		if (is_vlan_dev(filter_dev)) {
+			match.key = &key;
+			match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
+			match.key->vlan_priority = 0;
+			match.mask = &mask;
+			memset(match.mask, 0xff, sizeof(*match.mask));
+			match.mask->vlan_priority = 0;
+		} else {
+			flow_rule_match_vlan(rule, &match);
+		}
+
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
+				fltr->flags |= ICE_TC_FLWR_FIELD_VLAN;
+				headers->vlan_hdr.vlan_id =
+					cpu_to_be16(match.key->vlan_id &
+						    VLAN_VID_MASK);
+			} else {
+				NL_SET_ERR_MSG_MOD(fltr->extack, "Bad VLAN mask");
+				return -EINVAL;
+			}
+		}
+
+		if (match.mask->vlan_priority) {
+			fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_PRIO;
+			headers->vlan_hdr.vlan_prio =
+				be16_encode_bits(match.key->vlan_priority,
+						 VLAN_PRIO_MASK);
+		}
+
+		if (match.mask->vlan_tpid) {
+			headers->vlan_hdr.vlan_tpid = match.key->vlan_tpid;
+			fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_TPID;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+		struct flow_match_vlan match;
+
+		if (!ice_is_dvm_ena(&vsi->back->hw)) {
+			NL_SET_ERR_MSG_MOD(fltr->extack, "Double VLAN mode is not enabled");
+			return -EINVAL;
+		}
+
+		flow_rule_match_cvlan(rule, &match);
+
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
+				fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN;
+				headers->cvlan_hdr.vlan_id =
+					cpu_to_be16(match.key->vlan_id &
+						    VLAN_VID_MASK);
+			} else {
+				NL_SET_ERR_MSG_MOD(fltr->extack,
+						   "Bad CVLAN mask");
+				return -EINVAL;
+			}
+		}
+
+		if (match.mask->vlan_priority) {
+			fltr->flags |= ICE_TC_FLWR_FIELD_CVLAN_PRIO;
+			headers->cvlan_hdr.vlan_prio =
+				be16_encode_bits(match.key->vlan_priority,
+						 VLAN_PRIO_MASK);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PPPOE)) {
+		struct flow_match_pppoe match;
+
+		flow_rule_match_pppoe(rule, &match);
+		n_proto_key = ice_tc_set_pppoe(&match, fltr, headers);
+
+		/* If ethertype equals ETH_P_PPP_SES, n_proto might be
+		 * overwritten by encapsulated protocol (ppp_proto field) or set
+		 * to 0. To correct this, flow_match_pppoe provides the type
+		 * field, which contains the actual ethertype (ETH_P_PPP_SES).
+		 */
+		headers->l2_key.n_proto = cpu_to_be16(n_proto_key);
+		headers->l2_mask.n_proto = cpu_to_be16(0xFFFF);
+		fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+
+		addr_type = match.key->addr_type;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if (ice_tc_set_ipv4(&match, fltr, headers, false))
+			return -EINVAL;
+	}
+
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+		if (ice_tc_set_ipv6(&match, fltr, headers, false))
+			return -EINVAL;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_ip(rule, &match);
+		ice_tc_set_tos_ttl(&match, fltr, headers, false);
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_L2TPV3)) {
+		struct flow_match_l2tpv3 match;
+
+		flow_rule_match_l2tpv3(rule, &match);
+
+		fltr->flags |= ICE_TC_FLWR_FIELD_L2TPV3_SESSID;
+		headers->l2tpv3_hdr.session_id = match.key->session_id;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		if (ice_tc_set_port(match, fltr, headers, false))
+			return -EINVAL;
+		switch (headers->l3_key.ip_proto) {
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+			break;
+		default:
+			NL_SET_ERR_MSG_MOD(fltr->extack, "Only UDP and TCP transport are supported");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/**
+ * ice_add_switch_fltr - Add TC flower filters
+ * @vsi: Pointer to VSI
+ * @fltr: Pointer to struct ice_tc_flower_fltr
+ *
+ * Add filter in HW switch block
+ */
+static int
+ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+{
+	if (fltr->action.fltr_act == ICE_FWD_TO_QGRP)
+		return -EOPNOTSUPP;
+
+	if (ice_is_eswitch_mode_switchdev(vsi->back))
+		return ice_eswitch_add_tc_fltr(vsi, fltr);
+
+	return ice_add_tc_flower_adv_fltr(vsi, fltr);
+}
+
+/**
+ * ice_prep_adq_filter - Prepare ADQ filter with the required additional headers
+ * @vsi: Pointer to VSI
+ * @fltr: Pointer to TC flower filter structure
+ *
+ * Prepare ADQ filter with the required additional header fields
+ */
+static int
+ice_prep_adq_filter(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+{
+	if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) &&
+	    (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+			   ICE_TC_FLWR_FIELD_SRC_MAC))) {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unable to add filter because filter using tunnel key and inner MAC is unsupported combination");
+		return -EOPNOTSUPP;
+	}
+
+	/* For ADQ, filter must include dest MAC address, otherwise unwanted
+	 * packets with unrelated MAC address get delivered to ADQ VSIs as long
+	 * as remaining filter criteria is satisfied such as dest IP address
+	 * and dest/src L4 port. Below code handles the following cases:
+	 * 1. For non-tunnel, if user specify MAC addresses, use them.
+	 * 2. For non-tunnel, if user didn't specify MAC address, add implicit
+	 * dest MAC to be lower netdev's active unicast MAC address
+	 * 3. For tunnel,  as of now TC-filter through flower classifier doesn't
+	 * have provision for user to specify outer DMAC, hence driver to
+	 * implicitly add outer dest MAC to be lower netdev's active unicast
+	 * MAC address.
+	 */
+	if (fltr->tunnel_type != TNL_LAST &&
+	    !(fltr->flags & ICE_TC_FLWR_FIELD_ENC_DST_MAC))
+		fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DST_MAC;
+
+	if (fltr->tunnel_type == TNL_LAST &&
+	    !(fltr->flags & ICE_TC_FLWR_FIELD_DST_MAC))
+		fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC;
+
+	if (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+			   ICE_TC_FLWR_FIELD_ENC_DST_MAC)) {
+		ether_addr_copy(fltr->outer_headers.l2_key.dst_mac,
+				vsi->netdev->dev_addr);
+		eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac);
+	}
+
+	/* Make sure VLAN is already added to main VSI, before allowing ADQ to
+	 * add a VLAN based filter such as MAC + VLAN + L4 port.
+	 */
+	if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) {
+		u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id);
+
+		if (!ice_vlan_fltr_exist(&vsi->back->hw, vlan_id, vsi->idx)) {
+			NL_SET_ERR_MSG_MOD(fltr->extack,
+					   "Unable to add filter because legacy VLAN filter for specified destination doesn't exist");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/**
+ * ice_handle_tclass_action - Support directing to a traffic class
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to TC flower offload structure
+ * @fltr: Pointer to TC flower filter structure
+ *
+ * Support directing traffic to a traffic class/queue-set
+ */
+static int
+ice_handle_tclass_action(struct ice_vsi *vsi,
+			 struct flow_cls_offload *cls_flower,
+			 struct ice_tc_flower_fltr *fltr)
+{
+	int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid);
+
+	/* user specified hw_tc (must be non-zero for ADQ TC), action is forward
+	 * to hw_tc (i.e. ADQ channel number)
+	 */
+	if (tc < ICE_CHNL_START_TC) {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unable to add filter because of unsupported destination");
+		return -EOPNOTSUPP;
+	}
+	if (!(vsi->all_enatc & BIT(tc))) {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unable to add filter because of non-existence destination");
+		return -EINVAL;
+	}
+	fltr->action.fltr_act = ICE_FWD_TO_VSI;
+	fltr->action.fwd.tc.tc_class = tc;
+
+	return ice_prep_adq_filter(vsi, fltr);
+}
+
+static int
+ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr,
+			struct flow_action_entry *act)
+{
+	struct ice_vsi *ch_vsi = NULL;
+	u16 queue = act->rx_queue;
+
+	if (queue >= vsi->num_rxq) {
+		NL_SET_ERR_MSG_MOD(fltr->extack,
+				   "Unable to add filter because specified queue is invalid");
+		return -EINVAL;
+	}
+	fltr->action.fltr_act = ICE_FWD_TO_Q;
+	fltr->action.fwd.q.queue = queue;
+	/* determine corresponding HW queue */
+	fltr->action.fwd.q.hw_queue = vsi->rxq_map[queue];
+
+	/* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare
+	 * ADQ switch filter
+	 */
+	ch_vsi = ice_locate_vsi_using_queue(vsi, fltr->action.fwd.q.queue);
+	if (!ch_vsi)
+		return -EINVAL;
+	fltr->dest_vsi = ch_vsi;
+	if (!ice_is_chnl_fltr(fltr))
+		return 0;
+
+	return ice_prep_adq_filter(vsi, fltr);
+}
+
+static int
+ice_tc_parse_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr,
+		    struct flow_action_entry *act)
+{
+	switch (act->id) {
+	case FLOW_ACTION_RX_QUEUE_MAPPING:
+		/* forward to queue */
+		return ice_tc_forward_to_queue(vsi, fltr, act);
+	case FLOW_ACTION_DROP:
+		fltr->action.fltr_act = ICE_DROP_PACKET;
+		return 0;
+	default:
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported TC action");
+		return -EOPNOTSUPP;
+	}
+}
+
+/**
+ * ice_parse_tc_flower_actions - Parse the actions for a TC filter
+ * @filter_dev: Pointer to device on which filter is being added
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to TC flower offload structure
+ * @fltr: Pointer to TC flower filter structure
+ *
+ * Parse the actions for a TC filter
+ */
+static int ice_parse_tc_flower_actions(struct net_device *filter_dev,
+				       struct ice_vsi *vsi,
+				       struct flow_cls_offload *cls_flower,
+				       struct ice_tc_flower_fltr *fltr)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower);
+	struct flow_action *flow_action = &rule->action;
+	struct flow_action_entry *act;
+	int i, err;
+
+	if (cls_flower->classid)
+		return ice_handle_tclass_action(vsi, cls_flower, fltr);
+
+	if (!flow_action_has_entries(flow_action))
+		return -EINVAL;
+
+	flow_action_for_each(i, act, flow_action) {
+		if (ice_is_eswitch_mode_switchdev(vsi->back))
+			err = ice_eswitch_tc_parse_action(filter_dev, fltr, act);
+		else
+			err = ice_tc_parse_action(vsi, fltr, act);
+		if (err)
+			return err;
+		continue;
+	}
+	return 0;
+}
+
+/**
+ * ice_del_tc_fltr - deletes a filter from HW table
+ * @vsi: Pointer to VSI
+ * @fltr: Pointer to struct ice_tc_flower_fltr
+ *
+ * This function deletes a filter from HW table and manages book-keeping
+ */
+static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
+{
+	struct ice_rule_query_data rule_rem;
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	rule_rem.rid = fltr->rid;
+	rule_rem.rule_id = fltr->rule_id;
+	rule_rem.vsi_handle = fltr->dest_vsi_handle;
+	err = ice_rem_adv_rule_by_id(&pf->hw, &rule_rem);
+	if (err) {
+		if (err == -ENOENT) {
+			NL_SET_ERR_MSG_MOD(fltr->extack, "Filter does not exist");
+			return -ENOENT;
+		}
+		NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to delete TC flower filter");
+		return -EIO;
+	}
+
+	/* update advanced switch filter count for destination
+	 * VSI if filter destination was VSI
+	 */
+	if (fltr->dest_vsi) {
+		if (fltr->dest_vsi->type == ICE_VSI_CHNL) {
+			fltr->dest_vsi->num_chnl_fltr--;
+
+			/* keeps track of channel filters for PF VSI */
+			if (vsi->type == ICE_VSI_PF &&
+			    (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC |
+					    ICE_TC_FLWR_FIELD_ENC_DST_MAC)))
+				pf->num_dmac_chnl_fltrs--;
+		}
+	}
+	return 0;
+}
+
+/**
+ * ice_add_tc_fltr - adds a TC flower filter
+ * @netdev: Pointer to netdev
+ * @vsi: Pointer to VSI
+ * @f: Pointer to flower offload structure
+ * @__fltr: Pointer to struct ice_tc_flower_fltr
+ *
+ * This function parses TC-flower input fields, parses action,
+ * and adds a filter.
+ */
+static int
+ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi,
+		struct flow_cls_offload *f,
+		struct ice_tc_flower_fltr **__fltr)
+{
+	struct ice_tc_flower_fltr *fltr;
+	int err;
+
+	/* by default, set output to be INVALID */
+	*__fltr = NULL;
+
+	fltr = kzalloc(sizeof(*fltr), GFP_KERNEL);
+	if (!fltr)
+		return -ENOMEM;
+
+	fltr->cookie = f->cookie;
+	fltr->extack = f->common.extack;
+	fltr->src_vsi = vsi;
+	INIT_HLIST_NODE(&fltr->tc_flower_node);
+
+	err = ice_parse_cls_flower(netdev, vsi, f, fltr);
+	if (err < 0)
+		goto err;
+
+	err = ice_parse_tc_flower_actions(netdev, vsi, f, fltr);
+	if (err < 0)
+		goto err;
+
+	err = ice_add_switch_fltr(vsi, fltr);
+	if (err < 0)
+		goto err;
+
+	/* return the newly created filter */
+	*__fltr = fltr;
+
+	return 0;
+err:
+	kfree(fltr);
+	return err;
+}
+
+/**
+ * ice_find_tc_flower_fltr - Find the TC flower filter in the list
+ * @pf: Pointer to PF
+ * @cookie: filter specific cookie
+ */
+static struct ice_tc_flower_fltr *
+ice_find_tc_flower_fltr(struct ice_pf *pf, unsigned long cookie)
+{
+	struct ice_tc_flower_fltr *fltr;
+
+	hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node)
+		if (cookie == fltr->cookie)
+			return fltr;
+
+	return NULL;
+}
+
+/**
+ * ice_add_cls_flower - add TC flower filters
+ * @netdev: Pointer to filter device
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to flower offload structure
+ */
+int
+ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
+		   struct flow_cls_offload *cls_flower)
+{
+	struct netlink_ext_ack *extack = cls_flower->common.extack;
+	struct net_device *vsi_netdev = vsi->netdev;
+	struct ice_tc_flower_fltr *fltr;
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	if (ice_is_reset_in_progress(pf->state))
+		return -EBUSY;
+	if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+		return -EINVAL;
+
+	if (ice_is_port_repr_netdev(netdev))
+		vsi_netdev = netdev;
+
+	if (!(vsi_netdev->features & NETIF_F_HW_TC) &&
+	    !test_bit(ICE_FLAG_CLS_FLOWER, pf->flags)) {
+		/* Based on TC indirect notifications from kernel, all ice
+		 * devices get an instance of rule from higher level device.
+		 * Avoid triggering explicit error in this case.
+		 */
+		if (netdev == vsi_netdev)
+			NL_SET_ERR_MSG_MOD(extack, "can't apply TC flower filters, turn ON hw-tc-offload and try again");
+		return -EINVAL;
+	}
+
+	/* avoid duplicate entries, if exists - return error */
+	fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie);
+	if (fltr) {
+		NL_SET_ERR_MSG_MOD(extack, "filter cookie already exists, ignoring");
+		return -EEXIST;
+	}
+
+	/* prep and add TC-flower filter in HW */
+	err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr);
+	if (err)
+		return err;
+
+	/* add filter into an ordered list */
+	hlist_add_head(&fltr->tc_flower_node, &pf->tc_flower_fltr_list);
+	return 0;
+}
+
+/**
+ * ice_del_cls_flower - delete TC flower filters
+ * @vsi: Pointer to VSI
+ * @cls_flower: Pointer to struct flow_cls_offload
+ */
+int
+ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower)
+{
+	struct ice_tc_flower_fltr *fltr;
+	struct ice_pf *pf = vsi->back;
+	int err;
+
+	/* find filter */
+	fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie);
+	if (!fltr) {
+		if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) &&
+		    hlist_empty(&pf->tc_flower_fltr_list))
+			return 0;
+
+		NL_SET_ERR_MSG_MOD(cls_flower->common.extack, "failed to delete TC flower filter because unable to find it");
+		return -EINVAL;
+	}
+
+	fltr->extack = cls_flower->common.extack;
+	/* delete filter from HW */
+	err = ice_del_tc_fltr(vsi, fltr);
+	if (err)
+		return err;
+
+	/* delete filter from an ordered list */
+	hlist_del(&fltr->tc_flower_node);
+
+	/* free the filter node */
+	kfree(fltr);
+
+	return 0;
+}
+
+/**
+ * ice_replay_tc_fltrs - replay TC filters
+ * @pf: pointer to PF struct
+ */
+void ice_replay_tc_fltrs(struct ice_pf *pf)
+{
+	struct ice_tc_flower_fltr *fltr;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(fltr, node,
+				  &pf->tc_flower_fltr_list,
+				  tc_flower_node) {
+		fltr->extack = NULL;
+		ice_add_switch_fltr(fltr->src_vsi, fltr);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
new file mode 100644
index 0000000000..65d387163a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_TC_LIB_H_
+#define _ICE_TC_LIB_H_
+
+#define ICE_TC_FLWR_FIELD_DST_MAC		BIT(0)
+#define ICE_TC_FLWR_FIELD_SRC_MAC		BIT(1)
+#define ICE_TC_FLWR_FIELD_VLAN			BIT(2)
+#define ICE_TC_FLWR_FIELD_DEST_IPV4		BIT(3)
+#define ICE_TC_FLWR_FIELD_SRC_IPV4		BIT(4)
+#define ICE_TC_FLWR_FIELD_DEST_IPV6		BIT(5)
+#define ICE_TC_FLWR_FIELD_SRC_IPV6		BIT(6)
+#define ICE_TC_FLWR_FIELD_DEST_L4_PORT		BIT(7)
+#define ICE_TC_FLWR_FIELD_SRC_L4_PORT		BIT(8)
+#define ICE_TC_FLWR_FIELD_TENANT_ID		BIT(9)
+#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV4		BIT(10)
+#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV4		BIT(11)
+#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV6		BIT(12)
+#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV6		BIT(13)
+#define ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT	BIT(14)
+#define ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT	BIT(15)
+#define ICE_TC_FLWR_FIELD_ENC_DST_MAC		BIT(16)
+#define ICE_TC_FLWR_FIELD_ETH_TYPE_ID		BIT(17)
+#define ICE_TC_FLWR_FIELD_ENC_OPTS		BIT(18)
+#define ICE_TC_FLWR_FIELD_CVLAN			BIT(19)
+#define ICE_TC_FLWR_FIELD_PPPOE_SESSID		BIT(20)
+#define ICE_TC_FLWR_FIELD_PPP_PROTO		BIT(21)
+#define ICE_TC_FLWR_FIELD_IP_TOS		BIT(22)
+#define ICE_TC_FLWR_FIELD_IP_TTL		BIT(23)
+#define ICE_TC_FLWR_FIELD_ENC_IP_TOS		BIT(24)
+#define ICE_TC_FLWR_FIELD_ENC_IP_TTL		BIT(25)
+#define ICE_TC_FLWR_FIELD_L2TPV3_SESSID		BIT(26)
+#define ICE_TC_FLWR_FIELD_VLAN_PRIO		BIT(27)
+#define ICE_TC_FLWR_FIELD_CVLAN_PRIO		BIT(28)
+#define ICE_TC_FLWR_FIELD_VLAN_TPID		BIT(29)
+
+#define ICE_TC_FLOWER_MASK_32   0xFFFFFFFF
+
+#define ICE_IPV6_HDR_TC_MASK 0xFF00000
+
+struct ice_indr_block_priv {
+	struct net_device *netdev;
+	struct ice_netdev_priv *np;
+	struct list_head list;
+};
+
+struct ice_tc_flower_action {
+	/* forward action specific params */
+	union {
+		struct {
+			u32 tc_class; /* forward to hw_tc */
+			u32 rsvd;
+		} tc;
+		struct {
+			u16 queue; /* forward to queue */
+			/* To add filter in HW, absolute queue number in global
+			 * space of queues (between 0...N) is needed
+			 */
+			u16 hw_queue;
+		} q;
+	} fwd;
+	enum ice_sw_fwd_act_type fltr_act;
+};
+
+struct ice_tc_vlan_hdr {
+	__be16 vlan_id; /* Only last 12 bits valid */
+	__be16 vlan_prio; /* Only last 3 bits valid (valid values: 0..7) */
+	__be16 vlan_tpid;
+};
+
+struct ice_tc_pppoe_hdr {
+	__be16 session_id;
+	__be16 ppp_proto;
+};
+
+struct ice_tc_l2_hdr {
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	__be16 n_proto;    /* Ethernet Protocol */
+};
+
+struct ice_tc_l3_hdr {
+	u8 ip_proto;    /* IPPROTO value */
+	union {
+		struct {
+			struct in_addr dst_ip;
+			struct in_addr src_ip;
+		} v4;
+		struct {
+			struct in6_addr dst_ip6;
+			struct in6_addr src_ip6;
+		} v6;
+	} ip;
+#define dst_ipv6	ip.v6.dst_ip6.s6_addr32
+#define dst_ipv6_addr	ip.v6.dst_ip6.s6_addr
+#define src_ipv6	ip.v6.src_ip6.s6_addr32
+#define src_ipv6_addr	ip.v6.src_ip6.s6_addr
+#define dst_ipv4	ip.v4.dst_ip.s_addr
+#define src_ipv4	ip.v4.src_ip.s_addr
+
+	u8 tos;
+	u8 ttl;
+};
+
+struct ice_tc_l2tpv3_hdr {
+	__be32 session_id;
+};
+
+struct ice_tc_l4_hdr {
+	__be16 dst_port;
+	__be16 src_port;
+};
+
+struct ice_tc_flower_lyr_2_4_hdrs {
+	/* L2 layer fields with their mask */
+	struct ice_tc_l2_hdr l2_key;
+	struct ice_tc_l2_hdr l2_mask;
+	struct ice_tc_vlan_hdr vlan_hdr;
+	struct ice_tc_vlan_hdr cvlan_hdr;
+	struct ice_tc_pppoe_hdr pppoe_hdr;
+	struct ice_tc_l2tpv3_hdr l2tpv3_hdr;
+	/* L3 (IPv4[6]) layer fields with their mask */
+	struct ice_tc_l3_hdr l3_key;
+	struct ice_tc_l3_hdr l3_mask;
+
+	/* L4 layer fields with their mask */
+	struct ice_tc_l4_hdr l4_key;
+	struct ice_tc_l4_hdr l4_mask;
+};
+
+enum ice_eswitch_fltr_direction {
+	ICE_ESWITCH_FLTR_INGRESS,
+	ICE_ESWITCH_FLTR_EGRESS,
+};
+
+struct ice_tc_flower_fltr {
+	struct hlist_node tc_flower_node;
+
+	/* cookie becomes filter_rule_id if rule is added successfully */
+	unsigned long cookie;
+
+	/* add_adv_rule returns information like recipe ID, rule_id. Store
+	 * those values since they are needed to remove advanced rule
+	 */
+	u16 rid;
+	u16 rule_id;
+	/* VSI handle of the destination VSI (it could be main PF VSI, CHNL_VSI,
+	 * VF VSI)
+	 */
+	u16 dest_vsi_handle;
+	/* ptr to destination VSI */
+	struct ice_vsi *dest_vsi;
+	/* direction of fltr for eswitch use case */
+	enum ice_eswitch_fltr_direction direction;
+
+	/* Parsed TC flower configuration params */
+	struct ice_tc_flower_lyr_2_4_hdrs outer_headers;
+	struct ice_tc_flower_lyr_2_4_hdrs inner_headers;
+	struct ice_vsi *src_vsi;
+	__be32 tenant_id;
+	struct gtp_pdu_session_info gtp_pdu_info_keys;
+	struct gtp_pdu_session_info gtp_pdu_info_masks;
+	u32 flags;
+	u8 tunnel_type;
+	struct ice_tc_flower_action	action;
+
+	/* cache ptr which is used wherever needed to communicate netlink
+	 * messages
+	 */
+	struct netlink_ext_ack *extack;
+};
+
+/**
+ * ice_is_chnl_fltr - is this a valid channel filter
+ * @f: Pointer to tc-flower filter
+ *
+ * Criteria to determine of given filter is valid channel filter
+ * or not is based on its destination.
+ * For forward to VSI action, if destination is valid hw_tc (aka tc_class)
+ * and in supported range of TCs for ADQ, then return true.
+ * For forward to queue, as long as dest_vsi is valid and it is of type
+ * VSI_CHNL (PF ADQ VSI is of type VSI_CHNL), return true.
+ * NOTE: For forward to queue, correct dest_vsi is still set in tc_fltr based
+ * on destination queue specified.
+ */
+static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f)
+{
+	if (f->action.fltr_act == ICE_FWD_TO_VSI)
+		return f->action.fwd.tc.tc_class >= ICE_CHNL_START_TC &&
+		       f->action.fwd.tc.tc_class < ICE_CHNL_MAX_TC;
+	else if (f->action.fltr_act == ICE_FWD_TO_Q)
+		return f->dest_vsi && f->dest_vsi->type == ICE_VSI_CHNL;
+
+	return false;
+}
+
+/**
+ * ice_chnl_dmac_fltr_cnt - DMAC based CHNL filter count
+ * @pf: Pointer to PF
+ */
+static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf)
+{
+	return pf->num_dmac_chnl_fltrs;
+}
+
+struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue);
+int
+ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
+		   struct flow_cls_offload *cls_flower);
+int
+ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower);
+void ice_replay_tc_fltrs(struct ice_pf *pf);
+bool ice_is_tunnel_supported(struct net_device *dev);
+
+static inline bool ice_is_forward_action(enum ice_sw_fwd_act_type fltr_act)
+{
+	switch (fltr_act) {
+	case ICE_FWD_TO_VSI:
+	case ICE_FWD_TO_Q:
+		return true;
+	default:
+		return false;
+	}
+}
+#endif /* _ICE_TC_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h
new file mode 100644
index 0000000000..b2f5c9fe01
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_trace.h
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021 Intel Corporation. */
+
+/* Modeled on trace-events-sample.h */
+
+/* The trace subsystem name for ice will be "ice".
+ *
+ * This file is named ice_trace.h.
+ *
+ * Since this include file's name is different from the trace
+ * subsystem name, we'll have to define TRACE_INCLUDE_FILE at the end
+ * of this file.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ice
+
+/* See trace-events-sample.h for a detailed description of why this
+ * guard clause is different from most normal include files.
+ */
+#if !defined(_ICE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _ICE_TRACE_H_
+
+#include <linux/tracepoint.h>
+#include "ice_eswitch_br.h"
+
+/* ice_trace() macro enables shared code to refer to trace points
+ * like:
+ *
+ * trace_ice_example(args...)
+ *
+ * ... as:
+ *
+ * ice_trace(example, args...)
+ *
+ * ... to resolve to the PF version of the tracepoint without
+ * ifdefs, and to allow tracepoints to be disabled entirely at build
+ * time.
+ *
+ * Trace point should always be referred to in the driver via this
+ * macro.
+ *
+ * Similarly, ice_trace_enabled(trace_name) wraps references to
+ * trace_ice_<trace_name>_enabled() functions.
+ * @trace_name: name of tracepoint
+ */
+#define _ICE_TRACE_NAME(trace_name) (trace_##ice##_##trace_name)
+#define ICE_TRACE_NAME(trace_name) _ICE_TRACE_NAME(trace_name)
+
+#define ice_trace(trace_name, args...) ICE_TRACE_NAME(trace_name)(args)
+
+#define ice_trace_enabled(trace_name) ICE_TRACE_NAME(trace_name##_enabled)()
+
+/* This is for events common to PF. Corresponding versions will be named
+ * trace_ice_*. The ice_trace() macro above will select the right trace point
+ * name for the driver.
+ */
+
+/* Begin tracepoints */
+
+/* Global tracepoints */
+
+/* Events related to DIM, q_vectors and ring containers */
+DECLARE_EVENT_CLASS(ice_rx_dim_template,
+		    TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+		    TP_ARGS(q_vector, dim),
+		    TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector)
+				     __field(struct dim *, dim)
+				     __string(devname, q_vector->rx.rx_ring->netdev->name)),
+
+		    TP_fast_assign(__entry->q_vector = q_vector;
+				   __entry->dim = dim;
+				   __assign_str(devname, q_vector->rx.rx_ring->netdev->name);),
+
+		    TP_printk("netdev: %s Rx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d",
+			      __get_str(devname),
+			      __entry->q_vector->rx.rx_ring->q_index,
+			      __entry->dim->state,
+			      __entry->dim->profile_ix,
+			      __entry->dim->tune_state,
+			      __entry->dim->steps_right,
+			      __entry->dim->steps_left,
+			      __entry->dim->tired)
+);
+
+DEFINE_EVENT(ice_rx_dim_template, ice_rx_dim_work,
+	     TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+	     TP_ARGS(q_vector, dim)
+);
+
+DECLARE_EVENT_CLASS(ice_tx_dim_template,
+		    TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+		    TP_ARGS(q_vector, dim),
+		    TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector)
+				     __field(struct dim *, dim)
+				     __string(devname, q_vector->tx.tx_ring->netdev->name)),
+
+		    TP_fast_assign(__entry->q_vector = q_vector;
+				   __entry->dim = dim;
+				   __assign_str(devname, q_vector->tx.tx_ring->netdev->name);),
+
+		    TP_printk("netdev: %s Tx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d",
+			      __get_str(devname),
+			      __entry->q_vector->tx.tx_ring->q_index,
+			      __entry->dim->state,
+			      __entry->dim->profile_ix,
+			      __entry->dim->tune_state,
+			      __entry->dim->steps_right,
+			      __entry->dim->steps_left,
+			      __entry->dim->tired)
+);
+
+DEFINE_EVENT(ice_tx_dim_template, ice_tx_dim_work,
+	     TP_PROTO(struct ice_q_vector *q_vector, struct dim *dim),
+	     TP_ARGS(q_vector, dim)
+);
+
+/* Events related to a vsi & ring */
+DECLARE_EVENT_CLASS(ice_tx_template,
+		    TP_PROTO(struct ice_tx_ring *ring, struct ice_tx_desc *desc,
+			     struct ice_tx_buf *buf),
+
+		    TP_ARGS(ring, desc, buf),
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, desc)
+				     __field(void *, buf)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->desc = desc;
+				   __entry->buf = buf;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s ring: %pK desc: %pK buf %pK", __get_str(devname),
+			      __entry->ring, __entry->desc, __entry->buf)
+);
+
+#define DEFINE_TX_TEMPLATE_OP_EVENT(name) \
+DEFINE_EVENT(ice_tx_template, name, \
+	     TP_PROTO(struct ice_tx_ring *ring, \
+		      struct ice_tx_desc *desc, \
+		      struct ice_tx_buf *buf), \
+	     TP_ARGS(ring, desc, buf))
+
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq);
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap);
+DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap_eop);
+
+DECLARE_EVENT_CLASS(ice_rx_template,
+		    TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc),
+
+		    TP_ARGS(ring, desc),
+
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, desc)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->desc = desc;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s ring: %pK desc: %pK", __get_str(devname),
+			      __entry->ring, __entry->desc)
+);
+DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq,
+	     TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc),
+	     TP_ARGS(ring, desc)
+);
+
+DECLARE_EVENT_CLASS(ice_rx_indicate_template,
+		    TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc,
+			     struct sk_buff *skb),
+
+		    TP_ARGS(ring, desc, skb),
+
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, desc)
+				     __field(void *, skb)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->desc = desc;
+				   __entry->skb = skb;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s ring: %pK desc: %pK skb %pK", __get_str(devname),
+			      __entry->ring, __entry->desc, __entry->skb)
+);
+
+DEFINE_EVENT(ice_rx_indicate_template, ice_clean_rx_irq_indicate,
+	     TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc,
+		      struct sk_buff *skb),
+	     TP_ARGS(ring, desc, skb)
+);
+
+DECLARE_EVENT_CLASS(ice_xmit_template,
+		    TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb),
+
+		    TP_ARGS(ring, skb),
+
+		    TP_STRUCT__entry(__field(void *, ring)
+				     __field(void *, skb)
+				     __string(devname, ring->netdev->name)),
+
+		    TP_fast_assign(__entry->ring = ring;
+				   __entry->skb = skb;
+				   __assign_str(devname, ring->netdev->name);),
+
+		    TP_printk("netdev: %s skb: %pK ring: %pK", __get_str(devname),
+			      __entry->skb, __entry->ring)
+);
+
+#define DEFINE_XMIT_TEMPLATE_OP_EVENT(name) \
+DEFINE_EVENT(ice_xmit_template, name, \
+	     TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb), \
+	     TP_ARGS(ring, skb))
+
+DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring);
+DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring_drop);
+
+DECLARE_EVENT_CLASS(ice_tx_tstamp_template,
+		    TP_PROTO(struct sk_buff *skb, int idx),
+
+		    TP_ARGS(skb, idx),
+
+		    TP_STRUCT__entry(__field(void *, skb)
+				     __field(int, idx)),
+
+		    TP_fast_assign(__entry->skb = skb;
+				   __entry->idx = idx;),
+
+		    TP_printk("skb %pK idx %d",
+			      __entry->skb, __entry->idx)
+);
+#define DEFINE_TX_TSTAMP_OP_EVENT(name) \
+DEFINE_EVENT(ice_tx_tstamp_template, name, \
+	     TP_PROTO(struct sk_buff *skb, int idx), \
+	     TP_ARGS(skb, idx))
+
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_request);
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_req);
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_fw_done);
+DEFINE_TX_TSTAMP_OP_EVENT(ice_tx_tstamp_complete);
+
+DECLARE_EVENT_CLASS(ice_esw_br_fdb_template,
+		    TP_PROTO(struct ice_esw_br_fdb_entry *fdb),
+		    TP_ARGS(fdb),
+		    TP_STRUCT__entry(__array(char, dev_name, IFNAMSIZ)
+				     __array(unsigned char, addr, ETH_ALEN)
+				     __field(u16, vid)
+				     __field(int, flags)),
+		    TP_fast_assign(strscpy(__entry->dev_name,
+					   netdev_name(fdb->dev),
+					   IFNAMSIZ);
+				   memcpy(__entry->addr, fdb->data.addr, ETH_ALEN);
+				   __entry->vid = fdb->data.vid;
+				   __entry->flags = fdb->flags;),
+		    TP_printk("net_device=%s addr=%pM vid=%u flags=%x",
+			      __entry->dev_name,
+			      __entry->addr,
+			      __entry->vid,
+			      __entry->flags)
+);
+
+DEFINE_EVENT(ice_esw_br_fdb_template,
+	     ice_eswitch_br_fdb_entry_create,
+	     TP_PROTO(struct ice_esw_br_fdb_entry *fdb),
+	     TP_ARGS(fdb)
+);
+
+DEFINE_EVENT(ice_esw_br_fdb_template,
+	     ice_eswitch_br_fdb_entry_find_and_delete,
+	     TP_PROTO(struct ice_esw_br_fdb_entry *fdb),
+	     TP_ARGS(fdb)
+);
+
+DECLARE_EVENT_CLASS(ice_esw_br_vlan_template,
+		    TP_PROTO(struct ice_esw_br_vlan *vlan),
+		    TP_ARGS(vlan),
+		    TP_STRUCT__entry(__field(u16, vid)
+				     __field(u16, flags)),
+		    TP_fast_assign(__entry->vid = vlan->vid;
+				   __entry->flags = vlan->flags;),
+		    TP_printk("vid=%u flags=%x",
+			      __entry->vid,
+			      __entry->flags)
+);
+
+DEFINE_EVENT(ice_esw_br_vlan_template,
+	     ice_eswitch_br_vlan_create,
+	     TP_PROTO(struct ice_esw_br_vlan *vlan),
+	     TP_ARGS(vlan)
+);
+
+DEFINE_EVENT(ice_esw_br_vlan_template,
+	     ice_eswitch_br_vlan_cleanup,
+	     TP_PROTO(struct ice_esw_br_vlan *vlan),
+	     TP_ARGS(vlan)
+);
+
+#define ICE_ESW_BR_PORT_NAME_L 16
+
+DECLARE_EVENT_CLASS(ice_esw_br_port_template,
+		    TP_PROTO(struct ice_esw_br_port *port),
+		    TP_ARGS(port),
+		    TP_STRUCT__entry(__field(u16, vport_num)
+				     __array(char, port_type, ICE_ESW_BR_PORT_NAME_L)),
+		    TP_fast_assign(__entry->vport_num = port->vsi_idx;
+					if (port->type == ICE_ESWITCH_BR_UPLINK_PORT)
+						strscpy(__entry->port_type,
+							"Uplink",
+							ICE_ESW_BR_PORT_NAME_L);
+					else
+						strscpy(__entry->port_type,
+							"VF Representor",
+							ICE_ESW_BR_PORT_NAME_L);),
+		    TP_printk("vport_num=%u port type=%s",
+			      __entry->vport_num,
+			      __entry->port_type)
+);
+
+DEFINE_EVENT(ice_esw_br_port_template,
+	     ice_eswitch_br_port_link,
+	     TP_PROTO(struct ice_esw_br_port *port),
+	     TP_ARGS(port)
+);
+
+DEFINE_EVENT(ice_esw_br_port_template,
+	     ice_eswitch_br_port_unlink,
+	     TP_PROTO(struct ice_esw_br_port *port),
+	     TP_ARGS(port)
+);
+
+/* End tracepoints */
+
+#endif /* _ICE_TRACE_H_ */
+/* This must be outside ifdef _ICE_TRACE_H */
+
+/* This trace include file is not located in the .../include/trace
+ * with the kernel tracepoint definitions, because we're a loadable
+ * module.
+ */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE ../../drivers/net/ethernet/intel/ice/ice_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
new file mode 100644
index 0000000000..24c9140159
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -0,0 +1,2579 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* The driver transmit and receive code */
+
+#include <linux/mm.h>
+#include <linux/netdevice.h>
+#include <linux/prefetch.h>
+#include <linux/bpf_trace.h>
+#include <net/dsfield.h>
+#include <net/mpls.h>
+#include <net/xdp.h>
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+#include "ice.h"
+#include "ice_trace.h"
+#include "ice_dcb_lib.h"
+#include "ice_xsk.h"
+#include "ice_eswitch.h"
+
+#define ICE_RX_HDR_SIZE		256
+
+#define FDIR_DESC_RXDID 0x40
+#define ICE_FDIR_CLEAN_DELAY 10
+
+/**
+ * ice_prgm_fdir_fltr - Program a Flow Director filter
+ * @vsi: VSI to send dummy packet
+ * @fdir_desc: flow director descriptor
+ * @raw_packet: allocated buffer for flow director
+ */
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+		   u8 *raw_packet)
+{
+	struct ice_tx_buf *tx_buf, *first;
+	struct ice_fltr_desc *f_desc;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_ring *tx_ring;
+	struct device *dev;
+	dma_addr_t dma;
+	u32 td_cmd;
+	u16 i;
+
+	/* VSI and Tx ring */
+	if (!vsi)
+		return -ENOENT;
+	tx_ring = vsi->tx_rings[0];
+	if (!tx_ring || !tx_ring->desc)
+		return -ENOENT;
+	dev = tx_ring->dev;
+
+	/* we are using two descriptors to add/del a filter and we can wait */
+	for (i = ICE_FDIR_CLEAN_DELAY; ICE_DESC_UNUSED(tx_ring) < 2; i--) {
+		if (!i)
+			return -EAGAIN;
+		msleep_interruptible(1);
+	}
+
+	dma = dma_map_single(dev, raw_packet, ICE_FDIR_MAX_RAW_PKT_SIZE,
+			     DMA_TO_DEVICE);
+
+	if (dma_mapping_error(dev, dma))
+		return -EINVAL;
+
+	/* grab the next descriptor */
+	i = tx_ring->next_to_use;
+	first = &tx_ring->tx_buf[i];
+	f_desc = ICE_TX_FDIRDESC(tx_ring, i);
+	memcpy(f_desc, fdir_desc, sizeof(*f_desc));
+
+	i++;
+	i = (i < tx_ring->count) ? i : 0;
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	tx_buf = &tx_ring->tx_buf[i];
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	memset(tx_buf, 0, sizeof(*tx_buf));
+	dma_unmap_len_set(tx_buf, len, ICE_FDIR_MAX_RAW_PKT_SIZE);
+	dma_unmap_addr_set(tx_buf, dma, dma);
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY |
+		 ICE_TX_DESC_CMD_RE;
+
+	tx_buf->type = ICE_TX_BUF_DUMMY;
+	tx_buf->raw_buf = raw_packet;
+
+	tx_desc->cmd_type_offset_bsz =
+		ice_build_ctob(td_cmd, 0, ICE_FDIR_MAX_RAW_PKT_SIZE, 0);
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	/* mark the data descriptor to be watched */
+	first->next_to_watch = tx_desc;
+
+	writel(tx_ring->next_to_use, tx_ring->tail);
+
+	return 0;
+}
+
+/**
+ * ice_unmap_and_free_tx_buf - Release a Tx buffer
+ * @ring: the ring that owns the buffer
+ * @tx_buf: the buffer to free
+ */
+static void
+ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
+{
+	if (dma_unmap_len(tx_buf, len))
+		dma_unmap_page(ring->dev,
+			       dma_unmap_addr(tx_buf, dma),
+			       dma_unmap_len(tx_buf, len),
+			       DMA_TO_DEVICE);
+
+	switch (tx_buf->type) {
+	case ICE_TX_BUF_DUMMY:
+		devm_kfree(ring->dev, tx_buf->raw_buf);
+		break;
+	case ICE_TX_BUF_SKB:
+		dev_kfree_skb_any(tx_buf->skb);
+		break;
+	case ICE_TX_BUF_XDP_TX:
+		page_frag_free(tx_buf->raw_buf);
+		break;
+	case ICE_TX_BUF_XDP_XMIT:
+		xdp_return_frame(tx_buf->xdpf);
+		break;
+	}
+
+	tx_buf->next_to_watch = NULL;
+	tx_buf->type = ICE_TX_BUF_EMPTY;
+	dma_unmap_len_set(tx_buf, len, 0);
+	/* tx_buf must be completely set up in the transmit path */
+}
+
+static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->q_index);
+}
+
+/**
+ * ice_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ */
+void ice_clean_tx_ring(struct ice_tx_ring *tx_ring)
+{
+	u32 size;
+	u16 i;
+
+	if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) {
+		ice_xsk_clean_xdp_ring(tx_ring);
+		goto tx_skip_free;
+	}
+
+	/* ring already cleared, nothing to do */
+	if (!tx_ring->tx_buf)
+		return;
+
+	/* Free all the Tx ring sk_buffs */
+	for (i = 0; i < tx_ring->count; i++)
+		ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
+
+tx_skip_free:
+	memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);
+
+	size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+		     PAGE_SIZE);
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	if (!tx_ring->netdev)
+		return;
+
+	/* cleanup Tx queue statistics */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+}
+
+/**
+ * ice_free_tx_ring - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void ice_free_tx_ring(struct ice_tx_ring *tx_ring)
+{
+	u32 size;
+
+	ice_clean_tx_ring(tx_ring);
+	devm_kfree(tx_ring->dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+
+	if (tx_ring->desc) {
+		size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+			     PAGE_SIZE);
+		dmam_free_coherent(tx_ring->dev, size,
+				   tx_ring->desc, tx_ring->dma);
+		tx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_clean_tx_irq - Reclaim resources after transmit completes
+ * @tx_ring: Tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ */
+static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget)
+{
+	unsigned int total_bytes = 0, total_pkts = 0;
+	unsigned int budget = ICE_DFLT_IRQ_WORK;
+	struct ice_vsi *vsi = tx_ring->vsi;
+	s16 i = tx_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+
+	/* get the bql data ready */
+	netdev_txq_bql_complete_prefetchw(txring_txq(tx_ring));
+
+	tx_buf = &tx_ring->tx_buf[i];
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	prefetch(&vsi->state);
+
+	do {
+		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* follow the guidelines of other drivers */
+		prefetchw(&tx_buf->skb->users);
+
+		smp_rmb();	/* prevent any other reads prior to eop_desc */
+
+		ice_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
+		/* if the descriptor isn't done, no work yet to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buf->bytecount;
+		total_pkts += tx_buf->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buf->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buf, dma),
+				 dma_unmap_len(tx_buf, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buf data */
+		tx_buf->type = ICE_TX_BUF_EMPTY;
+		dma_unmap_len_set(tx_buf, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			ice_trace(clean_tx_irq_unmap, tx_ring, tx_desc, tx_buf);
+			tx_buf++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buf = tx_ring->tx_buf;
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buf, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buf, dma),
+					       dma_unmap_len(tx_buf, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buf, len, 0);
+			}
+		}
+		ice_trace(clean_tx_irq_unmap_eop, tx_ring, tx_desc, tx_buf);
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+
+	ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);
+	netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, total_bytes);
+
+#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
+	if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
+		     (ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (netif_tx_queue_stopped(txring_txq(tx_ring)) &&
+		    !test_bit(ICE_VSI_DOWN, vsi->state)) {
+			netif_tx_wake_queue(txring_txq(tx_ring));
+			++tx_ring->ring_stats->tx_stats.restart_q;
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * ice_setup_tx_ring - Allocate the Tx descriptors
+ * @tx_ring: the Tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_tx_ring(struct ice_tx_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	u32 size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(tx_ring->tx_buf);
+	tx_ring->tx_buf =
+		devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count,
+			     GFP_KERNEL);
+	if (!tx_ring->tx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest page */
+	size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc),
+		     PAGE_SIZE);
+	tx_ring->desc = dmam_alloc_coherent(dev, size, &tx_ring->dma,
+					    GFP_KERNEL);
+	if (!tx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+			size);
+		goto err;
+	}
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	tx_ring->ring_stats->tx_stats.prev_pkt = -1;
+	return 0;
+
+err:
+	devm_kfree(dev, tx_ring->tx_buf);
+	tx_ring->tx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ */
+void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
+{
+	struct xdp_buff *xdp = &rx_ring->xdp;
+	struct device *dev = rx_ring->dev;
+	u32 size;
+	u16 i;
+
+	/* ring already cleared, nothing to do */
+	if (!rx_ring->rx_buf)
+		return;
+
+	if (rx_ring->xsk_pool) {
+		ice_xsk_clean_rx_ring(rx_ring);
+		goto rx_skip_free;
+	}
+
+	if (xdp->data) {
+		xdp_return_buff(xdp);
+		xdp->data = NULL;
+	}
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
+
+		if (!rx_buf->page)
+			continue;
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(dev, rx_buf->dma,
+					      rx_buf->page_offset,
+					      rx_ring->rx_buf_len,
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
+
+		rx_buf->page = NULL;
+		rx_buf->page_offset = 0;
+	}
+
+rx_skip_free:
+	if (rx_ring->xsk_pool)
+		memset(rx_ring->xdp_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->xdp_buf)));
+	else
+		memset(rx_ring->rx_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->rx_buf)));
+
+	/* Zero out the descriptor ring */
+	size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+		     PAGE_SIZE);
+	memset(rx_ring->desc, 0, size);
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->first_desc = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * ice_free_rx_ring - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
+{
+	u32 size;
+
+	ice_clean_rx_ring(rx_ring);
+	if (rx_ring->vsi->type == ICE_VSI_PF)
+		if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+			xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	rx_ring->xdp_prog = NULL;
+	if (rx_ring->xsk_pool) {
+		kfree(rx_ring->xdp_buf);
+		rx_ring->xdp_buf = NULL;
+	} else {
+		kfree(rx_ring->rx_buf);
+		rx_ring->rx_buf = NULL;
+	}
+
+	if (rx_ring->desc) {
+		size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+			     PAGE_SIZE);
+		dmam_free_coherent(rx_ring->dev, size,
+				   rx_ring->desc, rx_ring->dma);
+		rx_ring->desc = NULL;
+	}
+}
+
+/**
+ * ice_setup_rx_ring - Allocate the Rx descriptors
+ * @rx_ring: the Rx ring to set up
+ *
+ * Return 0 on success, negative on error
+ */
+int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	u32 size;
+
+	if (!dev)
+		return -ENOMEM;
+
+	/* warn if we are about to overwrite the pointer */
+	WARN_ON(rx_ring->rx_buf);
+	rx_ring->rx_buf =
+		kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL);
+	if (!rx_ring->rx_buf)
+		return -ENOMEM;
+
+	/* round up to nearest page */
+	size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
+		     PAGE_SIZE);
+	rx_ring->desc = dmam_alloc_coherent(dev, size, &rx_ring->dma,
+					    GFP_KERNEL);
+	if (!rx_ring->desc) {
+		dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+			size);
+		goto err;
+	}
+
+	rx_ring->next_to_use = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->first_desc = 0;
+
+	if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+		WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
+
+	return 0;
+
+err:
+	kfree(rx_ring->rx_buf);
+	rx_ring->rx_buf = NULL;
+	return -ENOMEM;
+}
+
+/**
+ * ice_rx_frame_truesize
+ * @rx_ring: ptr to Rx ring
+ * @size: size
+ *
+ * calculate the truesize with taking into the account PAGE_SIZE of
+ * underlying arch
+ */
+static unsigned int
+ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = rx_ring->rx_offset ?
+		SKB_DATA_ALIGN(rx_ring->rx_offset + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
+/**
+ * ice_run_xdp - Executes an XDP program on initialized xdp_buff
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ * @xdp_ring: ring to be used for XDP_TX action
+ * @rx_buf: Rx buffer to store the XDP action
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static void
+ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+	    struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
+	    struct ice_rx_buf *rx_buf)
+{
+	unsigned int ret = ICE_XDP_PASS;
+	u32 act;
+
+	if (!xdp_prog)
+		goto exit;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		if (static_branch_unlikely(&ice_xdp_locking_key))
+			spin_lock(&xdp_ring->tx_lock);
+		ret = __ice_xmit_xdp_ring(xdp, xdp_ring, false);
+		if (static_branch_unlikely(&ice_xdp_locking_key))
+			spin_unlock(&xdp_ring->tx_lock);
+		if (ret == ICE_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	case XDP_REDIRECT:
+		if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))
+			goto out_failure;
+		ret = ICE_XDP_REDIR;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_DROP:
+		ret = ICE_XDP_CONSUMED;
+	}
+exit:
+	ice_set_rx_bufs_act(xdp, rx_ring, ret);
+}
+
+/**
+ * ice_xmit_xdp_ring - submit frame to XDP ring for transmission
+ * @xdpf: XDP frame that will be converted to XDP buff
+ * @xdp_ring: XDP ring for transmission
+ */
+static int ice_xmit_xdp_ring(const struct xdp_frame *xdpf,
+			     struct ice_tx_ring *xdp_ring)
+{
+	struct xdp_buff xdp;
+
+	xdp.data_hard_start = (void *)xdpf;
+	xdp.data = xdpf->data;
+	xdp.data_end = xdp.data + xdpf->len;
+	xdp.frame_sz = xdpf->frame_sz;
+	xdp.flags = xdpf->flags;
+
+	return __ice_xmit_xdp_ring(&xdp, xdp_ring, true);
+}
+
+/**
+ * ice_xdp_xmit - submit packets to XDP ring for transmission
+ * @dev: netdev
+ * @n: number of XDP frames to be transmitted
+ * @frames: XDP frames to be transmitted
+ * @flags: transmit flags
+ *
+ * Returns number of frames successfully sent. Failed frames
+ * will be free'ed by XDP core.
+ * For error cases, a negative errno code is returned and no-frames
+ * are transmitted (caller must handle freeing frames).
+ */
+int
+ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
+	     u32 flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(dev);
+	unsigned int queue_index = smp_processor_id();
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_tx_ring *xdp_ring;
+	struct ice_tx_buf *tx_buf;
+	int nxmit = 0, i;
+
+	if (test_bit(ICE_VSI_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		return -ENXIO;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	if (static_branch_unlikely(&ice_xdp_locking_key)) {
+		queue_index %= vsi->num_xdp_txq;
+		xdp_ring = vsi->xdp_rings[queue_index];
+		spin_lock(&xdp_ring->tx_lock);
+	} else {
+		/* Generally, should not happen */
+		if (unlikely(queue_index >= vsi->num_xdp_txq))
+			return -ENXIO;
+		xdp_ring = vsi->xdp_rings[queue_index];
+	}
+
+	tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
+	for (i = 0; i < n; i++) {
+		const struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = ice_xmit_xdp_ring(xdpf, xdp_ring);
+		if (err != ICE_XDP_TX)
+			break;
+		nxmit++;
+	}
+
+	tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		ice_xdp_ring_update_tail(xdp_ring);
+
+	if (static_branch_unlikely(&ice_xdp_locking_key))
+		spin_unlock(&xdp_ring->tx_lock);
+
+	return nxmit;
+}
+
+/**
+ * ice_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buf struct to modify
+ *
+ * Returns true if the page was successfully allocated or
+ * reused.
+ */
+static bool
+ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->ring_stats->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, ice_rx_pg_order(rx_ring));
+		rx_ring->ring_stats->rx_stats.alloc_page_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = rx_ring->rx_offset;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
+
+	return true;
+}
+
+/**
+ * ice_alloc_rx_bufs - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ *
+ * Returns false if all allocations were successful, true if any fail. Returning
+ * true signals to the caller that we didn't replace cleaned_count buffers and
+ * there is more work to do.
+ *
+ * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
+ * buffers. Then bump tail at most one time. Grouping like this lets us avoid
+ * multiple tail writes per call.
+ */
+bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
+{
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	struct ice_rx_buf *bi;
+
+	/* do nothing if no valid netdev defined */
+	if ((!rx_ring->netdev && rx_ring->vsi->type != ICE_VSI_CTRL) ||
+	    !cleaned_count)
+		return false;
+
+	/* get the Rx descriptor and buffer based on next_to_use */
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	bi = &rx_ring->rx_buf[ntu];
+
+	do {
+		/* if we fail here, we have work remaining */
+		if (!ice_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 rx_ring->rx_buf_len,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		ntu++;
+		if (unlikely(ntu == rx_ring->count)) {
+			rx_desc = ICE_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buf;
+			ntu = 0;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.status_error0 = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	return !!cleaned_count;
+}
+
+/**
+ * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
+ * @rx_buf: Rx buffer to adjust
+ * @size: Size of adjustment
+ *
+ * Update the offset within page so that Rx buf will be ready to be reused.
+ * For systems with PAGE_SIZE < 8192 this function will flip the page offset
+ * so the second half of page assigned to Rx buffer will be used, otherwise
+ * the offset is moved by "size" bytes
+ */
+static void
+ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	/* flip page offset to other buffer */
+	rx_buf->page_offset ^= size;
+#else
+	/* move offset up to the next cache line */
+	rx_buf->page_offset += size;
+#endif
+}
+
+/**
+ * ice_can_reuse_rx_page - Determine if page can be reused for another Rx
+ * @rx_buf: buffer containing the page
+ *
+ * If page is reusable, we have a green light for calling ice_reuse_rx_page,
+ * which will assign the current buffer to the buffer that next_to_alloc is
+ * pointing to; otherwise, the DMA mapping needs to be destroyed and
+ * page freed
+ */
+static bool
+ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
+{
+	unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
+	struct page *page = rx_buf->page;
+
+	/* avoid re-using remote and pfmemalloc pages */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1))
+		return false;
+#else
+#define ICE_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048)
+	if (rx_buf->page_offset > ICE_LAST_OFFSET)
+		return false;
+#endif /* PAGE_SIZE < 8192) */
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buf->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp: xdp buff to place the data into
+ * @rx_buf: buffer containing page to add
+ * @size: packet length from rx_desc
+ *
+ * This function will add the data contained in rx_buf->page to the xdp buf.
+ * It will just attach the page as a frag.
+ */
+static int
+ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+		 struct ice_rx_buf *rx_buf, const unsigned int size)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
+
+	if (!size)
+		return 0;
+
+	if (!xdp_buff_has_frags(xdp)) {
+		sinfo->nr_frags = 0;
+		sinfo->xdp_frags_size = 0;
+		xdp_buff_set_frags_flag(xdp);
+	}
+
+	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+		ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
+		return -ENOMEM;
+	}
+
+	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
+				   rx_buf->page_offset, size);
+	sinfo->xdp_frags_size += size;
+	/* remember frag count before XDP prog execution; bpf_xdp_adjust_tail()
+	 * can pop off frags but driver has to handle it on its own
+	 */
+	rx_ring->nr_frags = sinfo->nr_frags;
+
+	if (page_is_pfmemalloc(rx_buf->page))
+		xdp_buff_set_frag_pfmemalloc(xdp);
+
+	return 0;
+}
+
+/**
+ * ice_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: Rx descriptor ring to store buffers on
+ * @old_buf: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void
+ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct ice_rx_buf *new_buf;
+
+	new_buf = &rx_ring->rx_buf[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls and unnecessary copy of skb.
+	 */
+	new_buf->dma = old_buf->dma;
+	new_buf->page = old_buf->page;
+	new_buf->page_offset = old_buf->page_offset;
+	new_buf->pagecnt_bias = old_buf->pagecnt_bias;
+}
+
+/**
+ * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @size: size of buffer to add to skb
+ * @ntc: index of next to clean element
+ *
+ * This function will pull an Rx buffer from the ring and synchronize it
+ * for use by the CPU.
+ */
+static struct ice_rx_buf *
+ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
+	       const unsigned int ntc)
+{
+	struct ice_rx_buf *rx_buf;
+
+	rx_buf = &rx_ring->rx_buf[ntc];
+	rx_buf->pgcnt =
+#if (PAGE_SIZE < 8192)
+		page_count(rx_buf->page);
+#else
+		0;
+#endif
+	prefetchw(rx_buf->page);
+
+	if (!size)
+		return rx_buf;
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
+				      rx_buf->page_offset, size,
+				      DMA_FROM_DEVICE);
+
+	/* We have pulled a buffer for use, so decrement pagecnt_bias */
+	rx_buf->pagecnt_bias--;
+
+	return rx_buf;
+}
+
+/**
+ * ice_build_skb - Build skb around an existing buffer
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function builds an skb around an existing XDP buffer, taking care
+ * to set up the skb correctly and avoid any memcpy overhead. Driver has
+ * already combined frags (if any) to skb_shared_info.
+ */
+static struct sk_buff *
+ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
+{
+	u8 metasize = xdp->data - xdp->data_meta;
+	struct skb_shared_info *sinfo = NULL;
+	unsigned int nr_frags;
+	struct sk_buff *skb;
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points exactly as xdp->data, otherwise we
+	 * likely have a consumer accessing first few bytes of meta
+	 * data, and then actual data.
+	 */
+	net_prefetch(xdp->data_meta);
+	/* build an skb around the page buffer */
+	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* must to record Rx queue, otherwise OS features such as
+	 * symmetric queue won't work
+	 */
+	skb_record_rx_queue(skb, rx_ring->q_index);
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	if (unlikely(xdp_buff_has_frags(xdp)))
+		xdp_update_skb_shared_info(skb, nr_frags,
+					   sinfo->xdp_frags_size,
+					   nr_frags * xdp->frame_sz,
+					   xdp_buff_is_frag_pfmemalloc(xdp));
+
+	return skb;
+}
+
+/**
+ * ice_construct_skb - Allocate skb and populate it
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @xdp: xdp_buff pointing to the data
+ *
+ * This function allocates an skb. It then populates it with the page
+ * data from the current receive descriptor, taking care to set up the
+ * skb correctly.
+ */
+static struct sk_buff *
+ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
+{
+	unsigned int size = xdp->data_end - xdp->data;
+	struct skb_shared_info *sinfo = NULL;
+	struct ice_rx_buf *rx_buf;
+	unsigned int nr_frags = 0;
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data);
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	rx_buf = &rx_ring->rx_buf[rx_ring->first_desc];
+	skb_record_rx_queue(skb, rx_ring->q_index);
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > ICE_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
+							 sizeof(long)));
+
+	/* if we exhaust the linear part then add what is left as a frag */
+	size -= headlen;
+	if (size) {
+		/* besides adding here a partial frag, we are going to add
+		 * frags from xdp_buff, make sure there is enough space for
+		 * them
+		 */
+		if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) {
+			dev_kfree_skb(skb);
+			return NULL;
+		}
+		skb_add_rx_frag(skb, 0, rx_buf->page,
+				rx_buf->page_offset + headlen, size,
+				xdp->frame_sz);
+	} else {
+		/* buffer is unused, change the act that should be taken later
+		 * on; data was copied onto skb's linear part so there's no
+		 * need for adjusting page offset and we can reuse this buffer
+		 * as-is
+		 */
+		rx_buf->act = ICE_SKB_CONSUMED;
+	}
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		struct skb_shared_info *skinfo = skb_shinfo(skb);
+
+		memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
+		       sizeof(skb_frag_t) * nr_frags);
+
+		xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
+					   sinfo->xdp_frags_size,
+					   nr_frags * xdp->frame_sz,
+					   xdp_buff_is_frag_pfmemalloc(xdp));
+	}
+
+	return skb;
+}
+
+/**
+ * ice_put_rx_buf - Clean up used buffer and either recycle or free
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @rx_buf: Rx buffer to pull data from
+ *
+ * This function will clean up the contents of the rx_buf. It will either
+ * recycle the buffer or unmap it and free the associated resources.
+ */
+static void
+ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
+{
+	if (!rx_buf)
+		return;
+
+	if (ice_can_reuse_rx_page(rx_buf)) {
+		/* hand second half of page back to the ring */
+		ice_reuse_rx_page(rx_ring, rx_buf);
+	} else {
+		/* we are not reusing the buffer so unmap it */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
+				     ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     ICE_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
+	}
+
+	/* clear contents of buffer_info */
+	rx_buf->page = NULL;
+}
+
+/**
+ * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: Rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
+ */
+int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
+	unsigned int offset = rx_ring->rx_offset;
+	struct xdp_buff *xdp = &rx_ring->xdp;
+	u32 cached_ntc = rx_ring->first_desc;
+	struct ice_tx_ring *xdp_ring = NULL;
+	struct bpf_prog *xdp_prog = NULL;
+	u32 ntc = rx_ring->next_to_clean;
+	u32 cnt = rx_ring->count;
+	u32 xdp_xmit = 0;
+	u32 cached_ntu;
+	bool failure;
+	u32 first;
+
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0);
+#endif
+
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	if (xdp_prog) {
+		xdp_ring = rx_ring->xdp_ring;
+		cached_ntu = xdp_ring->next_to_use;
+	}
+
+	/* start the loop to process Rx packets bounded by 'budget' */
+	while (likely(total_rx_pkts < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		struct ice_rx_buf *rx_buf;
+		struct sk_buff *skb;
+		unsigned int size;
+		u16 stat_err_bits;
+		u16 vlan_tag = 0;
+		u16 rx_ptype;
+
+		/* get the Rx desc from Rx ring based on 'next_to_clean' */
+		rx_desc = ICE_RX_DESC(rx_ring, ntc);
+
+		/* status_error_len will always be zero for unused descriptors
+		 * because it's cleared in cleanup, and overlaps with hdr_addr
+		 * which is always zero because packet split isn't used, if the
+		 * hardware wrote DD then it will be non-zero
+		 */
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * DD bit is set.
+		 */
+		dma_rmb();
+
+		ice_trace(clean_rx_irq, rx_ring, rx_desc);
+		if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
+			struct ice_vsi *ctrl_vsi = rx_ring->vsi;
+
+			if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
+			    ctrl_vsi->vf)
+				ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
+			if (++ntc == cnt)
+				ntc = 0;
+			rx_ring->first_desc = ntc;
+			continue;
+		}
+
+		size = le16_to_cpu(rx_desc->wb.pkt_len) &
+			ICE_RX_FLX_DESC_PKT_LEN_M;
+
+		/* retrieve a buffer from the ring */
+		rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
+
+		if (!xdp->data) {
+			void *hard_start;
+
+			hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
+				     offset;
+			xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size);
+#endif
+			xdp_buff_clear_frags_flag(xdp);
+		} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
+			break;
+		}
+		if (++ntc == cnt)
+			ntc = 0;
+
+		/* skip if it is NOP desc */
+		if (ice_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
+		if (rx_buf->act == ICE_XDP_PASS)
+			goto construct_skb;
+		total_rx_bytes += xdp_get_buff_len(xdp);
+		total_rx_pkts++;
+
+		xdp->data = NULL;
+		rx_ring->first_desc = ntc;
+		rx_ring->nr_frags = 0;
+		continue;
+construct_skb:
+		if (likely(ice_ring_uses_build_skb(rx_ring)))
+			skb = ice_build_skb(rx_ring, xdp);
+		else
+			skb = ice_construct_skb(rx_ring, xdp);
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->ring_stats->rx_stats.alloc_page_failed++;
+			rx_buf->act = ICE_XDP_CONSUMED;
+			if (unlikely(xdp_buff_has_frags(xdp)))
+				ice_set_rx_bufs_act(xdp, rx_ring,
+						    ICE_XDP_CONSUMED);
+			xdp->data = NULL;
+			rx_ring->first_desc = ntc;
+			rx_ring->nr_frags = 0;
+			break;
+		}
+		xdp->data = NULL;
+		rx_ring->first_desc = ntc;
+		rx_ring->nr_frags = 0;
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
+		if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
+					      stat_err_bits))) {
+			dev_kfree_skb_any(skb);
+			continue;
+		}
+
+		vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
+
+		/* pad the skb if needed, to make a valid ethernet frame */
+		if (eth_skb_pad(skb))
+			continue;
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* populate checksum, VLAN, and protocol */
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+			ICE_RX_FLEX_DESC_PTYPE_M;
+
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+
+		ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
+		/* send completed skb up the stack */
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+
+		/* update budget accounting */
+		total_rx_pkts++;
+	}
+
+	first = rx_ring->first_desc;
+	while (cached_ntc != first) {
+		struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc];
+
+		if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+			xdp_xmit |= buf->act;
+		} else if (buf->act & ICE_XDP_CONSUMED) {
+			buf->pagecnt_bias++;
+		} else if (buf->act == ICE_XDP_PASS) {
+			ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
+		}
+
+		ice_put_rx_buf(rx_ring, buf);
+		if (++cached_ntc >= cnt)
+			cached_ntc = 0;
+	}
+	rx_ring->next_to_clean = ntc;
+	/* return up to cleaned_count buffers to hardware */
+	failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring));
+
+	if (xdp_xmit)
+		ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu);
+
+	if (rx_ring->ring_stats)
+		ice_update_rx_ring_stats(rx_ring, total_rx_pkts,
+					 total_rx_bytes);
+
+	/* guarantee a trip back through this routine if there was a failure */
+	return failure ? budget : (int)total_rx_pkts;
+}
+
+static void __ice_update_sample(struct ice_q_vector *q_vector,
+				struct ice_ring_container *rc,
+				struct dim_sample *sample,
+				bool is_tx)
+{
+	u64 packets = 0, bytes = 0;
+
+	if (is_tx) {
+		struct ice_tx_ring *tx_ring;
+
+		ice_for_each_tx_ring(tx_ring, *rc) {
+			struct ice_ring_stats *ring_stats;
+
+			ring_stats = tx_ring->ring_stats;
+			if (!ring_stats)
+				continue;
+			packets += ring_stats->stats.pkts;
+			bytes += ring_stats->stats.bytes;
+		}
+	} else {
+		struct ice_rx_ring *rx_ring;
+
+		ice_for_each_rx_ring(rx_ring, *rc) {
+			struct ice_ring_stats *ring_stats;
+
+			ring_stats = rx_ring->ring_stats;
+			if (!ring_stats)
+				continue;
+			packets += ring_stats->stats.pkts;
+			bytes += ring_stats->stats.bytes;
+		}
+	}
+
+	dim_update_sample(q_vector->total_events, packets, bytes, sample);
+	sample->comp_ctr = 0;
+
+	/* if dim settings get stale, like when not updated for 1
+	 * second or longer, force it to start again. This addresses the
+	 * frequent case of an idle queue being switched to by the
+	 * scheduler. The 1,000 here means 1,000 milliseconds.
+	 */
+	if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000)
+		rc->dim.state = DIM_START_MEASURE;
+}
+
+/**
+ * ice_net_dim - Update net DIM algorithm
+ * @q_vector: the vector associated with the interrupt
+ *
+ * Create a DIM sample and notify net_dim() so that it can possibly decide
+ * a new ITR value based on incoming packets, bytes, and interrupts.
+ *
+ * This function is a no-op if the ring is not configured to dynamic ITR.
+ */
+static void ice_net_dim(struct ice_q_vector *q_vector)
+{
+	struct ice_ring_container *tx = &q_vector->tx;
+	struct ice_ring_container *rx = &q_vector->rx;
+
+	if (ITR_IS_DYNAMIC(tx)) {
+		struct dim_sample dim_sample;
+
+		__ice_update_sample(q_vector, tx, &dim_sample, true);
+		net_dim(&tx->dim, dim_sample);
+	}
+
+	if (ITR_IS_DYNAMIC(rx)) {
+		struct dim_sample dim_sample;
+
+		__ice_update_sample(q_vector, rx, &dim_sample, false);
+		net_dim(&rx->dim, dim_sample);
+	}
+}
+
+/**
+ * ice_buildreg_itr - build value for writing to the GLINT_DYN_CTL register
+ * @itr_idx: interrupt throttling index
+ * @itr: interrupt throttling value in usecs
+ */
+static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
+{
+	/* The ITR value is reported in microseconds, and the register value is
+	 * recorded in 2 microsecond units. For this reason we only need to
+	 * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
+	 * granularity as a shift instead of division. The mask makes sure the
+	 * ITR value is never odd so we don't accidentally write into the field
+	 * prior to the ITR field.
+	 */
+	itr &= ICE_ITR_MASK;
+
+	return GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
+		(itr_idx << GLINT_DYN_CTL_ITR_INDX_S) |
+		(itr << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S));
+}
+
+/**
+ * ice_enable_interrupt - re-enable MSI-X interrupt
+ * @q_vector: the vector associated with the interrupt to enable
+ *
+ * If the VSI is down, the interrupt will not be re-enabled. Also,
+ * when enabling the interrupt always reset the wb_on_itr to false
+ * and trigger a software interrupt to clean out internal state.
+ */
+static void ice_enable_interrupt(struct ice_q_vector *q_vector)
+{
+	struct ice_vsi *vsi = q_vector->vsi;
+	bool wb_en = q_vector->wb_on_itr;
+	u32 itr_val;
+
+	if (test_bit(ICE_DOWN, vsi->state))
+		return;
+
+	/* trigger an ITR delayed software interrupt when exiting busy poll, to
+	 * make sure to catch any pending cleanups that might have been missed
+	 * due to interrupt state transition. If busy poll or poll isn't
+	 * enabled, then don't update ITR, and just enable the interrupt.
+	 */
+	if (!wb_en) {
+		itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0);
+	} else {
+		q_vector->wb_on_itr = false;
+
+		/* do two things here with a single write. Set up the third ITR
+		 * index to be used for software interrupt moderation, and then
+		 * trigger a software interrupt with a rate limit of 20K on
+		 * software interrupts, this will help avoid high interrupt
+		 * loads due to frequently polling and exiting polling.
+		 */
+		itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K);
+		itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M |
+			   ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S |
+			   GLINT_DYN_CTL_SW_ITR_INDX_ENA_M;
+	}
+	wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
+}
+
+/**
+ * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
+ * @q_vector: q_vector to set WB_ON_ITR on
+ *
+ * We need to tell hardware to write-back completed descriptors even when
+ * interrupts are disabled. Descriptors will be written back on cache line
+ * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
+ * descriptors may not be written back if they don't fill a cache line until
+ * the next interrupt.
+ *
+ * This sets the write-back frequency to whatever was set previously for the
+ * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we
+ * aren't meddling with the INTENA_M bit.
+ */
+static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
+{
+	struct ice_vsi *vsi = q_vector->vsi;
+
+	/* already in wb_on_itr mode no need to change it */
+	if (q_vector->wb_on_itr)
+		return;
+
+	/* use previously set ITR values for all of the ITR indices by
+	 * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and
+	 * be static in non-adaptive mode (user configured)
+	 */
+	wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+	     ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) &
+	      GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M |
+	     GLINT_DYN_CTL_WB_ON_ITR_M);
+
+	q_vector->wb_on_itr = true;
+}
+
+/**
+ * ice_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ */
+int ice_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct ice_q_vector *q_vector =
+				container_of(napi, struct ice_q_vector, napi);
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	bool clean_complete = true;
+	int budget_per_ring;
+	int work_done = 0;
+
+	/* Since the actual Tx work is minimal, we can give the Tx a larger
+	 * budget and be more aggressive about cleaning up the Tx descriptors.
+	 */
+	ice_for_each_tx_ring(tx_ring, q_vector->tx) {
+		bool wd;
+
+		if (tx_ring->xsk_pool)
+			wd = ice_xmit_zc(tx_ring);
+		else if (ice_ring_is_xdp(tx_ring))
+			wd = true;
+		else
+			wd = ice_clean_tx_irq(tx_ring, budget);
+
+		if (!wd)
+			clean_complete = false;
+	}
+
+	/* Handle case where we are called by netpoll with a budget of 0 */
+	if (unlikely(budget <= 0))
+		return budget;
+
+	/* normally we have 1 Rx ring per q_vector */
+	if (unlikely(q_vector->num_ring_rx > 1))
+		/* We attempt to distribute budget to each Rx queue fairly, but
+		 * don't allow the budget to go below 1 because that would exit
+		 * polling early.
+		 */
+		budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1);
+	else
+		/* Max of 1 Rx ring in this q_vector so give it the budget */
+		budget_per_ring = budget;
+
+	ice_for_each_rx_ring(rx_ring, q_vector->rx) {
+		int cleaned;
+
+		/* A dedicated path for zero-copy allows making a single
+		 * comparison in the irq context instead of many inside the
+		 * ice_clean_rx_irq function and makes the codebase cleaner.
+		 */
+		cleaned = rx_ring->xsk_pool ?
+			  ice_clean_rx_irq_zc(rx_ring, budget_per_ring) :
+			  ice_clean_rx_irq(rx_ring, budget_per_ring);
+		work_done += cleaned;
+		/* if we clean as many as budgeted, we must not be done */
+		if (cleaned >= budget_per_ring)
+			clean_complete = false;
+	}
+
+	/* If work not completed, return budget and polling will return */
+	if (!clean_complete) {
+		/* Set the writeback on ITR so partial completions of
+		 * cache-lines will still continue even if we're polling.
+		 */
+		ice_set_wb_on_itr(q_vector);
+		return budget;
+	}
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (napi_complete_done(napi, work_done)) {
+		ice_net_dim(q_vector);
+		ice_enable_interrupt(q_vector);
+	} else {
+		ice_set_wb_on_itr(q_vector);
+	}
+
+	return min_t(int, work_done, budget - 1);
+}
+
+/**
+ * __ice_maybe_stop_tx - 2nd level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size: the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ */
+static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
+{
+	netif_tx_stop_queue(txring_txq(tx_ring));
+	/* Memory barrier before checking head and tail */
+	smp_mb();
+
+	/* Check again in a case another CPU has just made room available. */
+	if (likely(ICE_DESC_UNUSED(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_queue because it doesn't call schedule */
+	netif_tx_start_queue(txring_txq(tx_ring));
+	++tx_ring->ring_stats->tx_stats.restart_q;
+	return 0;
+}
+
+/**
+ * ice_maybe_stop_tx - 1st level check for Tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ */
+static int ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size)
+{
+	if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
+		return 0;
+
+	return __ice_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * ice_tx_map - Build the Tx descriptor
+ * @tx_ring: ring to send buffer on
+ * @first: first buffer info buffer to use
+ * @off: pointer to struct that holds offload parameters
+ *
+ * This function loops over the skb data pointed to by *first
+ * and gets a physical address for each memory location and programs
+ * it and the length into the transmit descriptor.
+ */
+static void
+ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first,
+	   struct ice_tx_offload_params *off)
+{
+	u64 td_offset, td_tag, td_cmd;
+	u16 i = tx_ring->next_to_use;
+	unsigned int data_len, size;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	struct sk_buff *skb;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+	bool kick;
+
+	td_tag = off->td_l2tag1;
+	td_cmd = off->td_cmd;
+	td_offset = off->td_offset;
+	skb = first->skb;
+
+	data_len = skb->data_len;
+	size = skb_headlen(skb);
+
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+
+	if (first->tx_flags & ICE_TX_FLAGS_HW_VLAN) {
+		td_cmd |= (u64)ICE_TX_DESC_CMD_IL2TAG1;
+		td_tag = first->vid;
+	}
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buf = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buf, len, size);
+		dma_unmap_addr_set(tx_buf, dma, dma);
+
+		/* align size to end of page */
+		max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+
+		/* account for data chunks larger than the hardware
+		 * can handle
+		 */
+		while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
+			tx_desc->cmd_type_offset_bsz =
+				ice_build_ctob(td_cmd, td_offset, max_data,
+					       td_tag);
+
+			tx_desc++;
+			i++;
+
+			if (i == tx_ring->count) {
+				tx_desc = ICE_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+
+			dma += max_data;
+			size -= max_data;
+
+			max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
+			tx_desc->buf_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset,
+							      size, td_tag);
+
+		tx_desc++;
+		i++;
+
+		if (i == tx_ring->count) {
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buf = &tx_ring->tx_buf[i];
+		tx_buf->type = ICE_TX_BUF_FRAG;
+	}
+
+	/* record SW timestamp if HW timestamp is not available */
+	skb_tx_timestamp(first->skb);
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	/* write last descriptor with RS and EOP bits */
+	td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
+	tx_desc->cmd_type_offset_bsz =
+			ice_build_ctob(td_cmd, td_offset, size, td_tag);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	tx_ring->next_to_use = i;
+
+	ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	/* notify HW of packet */
+	kick = __netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount,
+				      netdev_xmit_more());
+	if (kick)
+		/* notify HW of packet */
+		writel(i, tx_ring->tail);
+
+	return;
+
+dma_error:
+	/* clear DMA mappings for failed tx_buf map */
+	for (;;) {
+		tx_buf = &tx_ring->tx_buf[i];
+		ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
+		if (tx_buf == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	tx_ring->next_to_use = i;
+}
+
+/**
+ * ice_tx_csum - Enable Tx checksum offloads
+ * @first: pointer to the first descriptor
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if checksum offload can't happen, 1 otherwise.
+ */
+static
+int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	u32 l4_len = 0, l3_len = 0, l2_len = 0;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	__be16 frag_off, protocol;
+	unsigned char *exthdr;
+	u32 offset, cmd = 0;
+	u8 l4_proto = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	protocol = vlan_get_protocol(skb);
+
+	if (eth_p_mpls(protocol)) {
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_checksum_start(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+	}
+
+	/* compute outer L2 header size */
+	l2_len = ip.hdr - skb->data;
+	offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
+
+	/* set the tx_flags to indicate the IP protocol type. this is
+	 * required so that checksum header computation below is accurate.
+	 */
+	if (ip.v4->version == 4)
+		first->tx_flags |= ICE_TX_FLAGS_IPV4;
+	else if (ip.v6->version == 6)
+		first->tx_flags |= ICE_TX_FLAGS_IPV6;
+
+	if (skb->encapsulation) {
+		bool gso_ena = false;
+		u32 tunnel = 0;
+
+		/* define outer network header type */
+		if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
+			tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ?
+				  ICE_TX_CTX_EIPT_IPV4 :
+				  ICE_TX_CTX_EIPT_IPV4_NO_CSUM;
+			l4_proto = ip.v4->protocol;
+		} else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+			int ret;
+
+			tunnel |= ICE_TX_CTX_EIPT_IPV6;
+			exthdr = ip.hdr + sizeof(*ip.v6);
+			l4_proto = ip.v6->nexthdr;
+			ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
+					       &l4_proto, &frag_off);
+			if (ret < 0)
+				return -1;
+		}
+
+		/* define outer transport */
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			tunnel |= ICE_TXD_CTX_UDP_TUNNELING;
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			break;
+		case IPPROTO_GRE:
+			tunnel |= ICE_TXD_CTX_GRE_TUNNELING;
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			break;
+		case IPPROTO_IPIP:
+		case IPPROTO_IPV6:
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			l4.hdr = skb_inner_network_header(skb);
+			break;
+		default:
+			if (first->tx_flags & ICE_TX_FLAGS_TSO)
+				return -1;
+
+			skb_checksum_help(skb);
+			return 0;
+		}
+
+		/* compute outer L3 header size */
+		tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+			  ICE_TXD_CTX_QW0_EIPLEN_S;
+
+		/* switch IP header pointer from outer to inner header */
+		ip.hdr = skb_inner_network_header(skb);
+
+		/* compute tunnel header size */
+		tunnel |= ((ip.hdr - l4.hdr) / 2) <<
+			   ICE_TXD_CTX_QW0_NATLEN_S;
+
+		gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
+		/* indicate if we need to offload outer UDP header */
+		if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+			tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M;
+
+		/* record tunnel offload values */
+		off->cd_tunnel_params |= tunnel;
+
+		/* set DTYP=1 to indicate that it's an Tx context descriptor
+		 * in IPsec tunnel mode with Tx offloads in Quad word 1
+		 */
+		off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX;
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+		l4_proto = 0;
+
+		/* reset type as we transition from outer to inner headers */
+		first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6);
+		if (ip.v4->version == 4)
+			first->tx_flags |= ICE_TX_FLAGS_IPV4;
+		if (ip.v6->version == 6)
+			first->tx_flags |= ICE_TX_FLAGS_IPV6;
+	}
+
+	/* Enable IP checksum offloads */
+	if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
+		l4_proto = ip.v4->protocol;
+		/* the stack computes the IP header already, the only time we
+		 * need the hardware to recompute it is in the case of TSO.
+		 */
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+		else
+			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+
+	} else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+		cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+		exthdr = ip.hdr + sizeof(*ip.v6);
+		l4_proto = ip.v6->nexthdr;
+		if (l4.hdr != exthdr)
+			ipv6_skip_exthdr(skb, exthdr - skb->data, &l4_proto,
+					 &frag_off);
+	} else {
+		return -1;
+	}
+
+	/* compute inner L3 header size */
+	l3_len = l4.hdr - ip.hdr;
+	offset |= (l3_len / 4) << ICE_TX_DESC_LEN_IPLEN_S;
+
+	/* Enable L4 checksum offloads */
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		/* enable checksum offloads */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+		l4_len = l4.tcp->doff;
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_UDP:
+		/* enable UDP checksum offload */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+		l4_len = (sizeof(struct udphdr) >> 2);
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case IPPROTO_SCTP:
+		/* enable SCTP checksum offload */
+		cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
+		l4_len = sizeof(struct sctphdr) >> 2;
+		offset |= l4_len << ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+
+	default:
+		if (first->tx_flags & ICE_TX_FLAGS_TSO)
+			return -1;
+		skb_checksum_help(skb);
+		return 0;
+	}
+
+	off->td_cmd |= cmd;
+	off->td_offset |= offset;
+	return 1;
+}
+
+/**
+ * ice_tx_prepare_vlan_flags - prepare generic Tx VLAN tagging flags for HW
+ * @tx_ring: ring to send buffer on
+ * @first: pointer to struct ice_tx_buf
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ */
+static void
+ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first)
+{
+	struct sk_buff *skb = first->skb;
+
+	/* nothing left to do, software offloaded VLAN */
+	if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol))
+		return;
+
+	/* the VLAN ethertype/tpid is determined by VSI configuration and netdev
+	 * feature flags, which the driver only allows either 802.1Q or 802.1ad
+	 * VLAN offloads exclusively so we only care about the VLAN ID here
+	 */
+	if (skb_vlan_tag_present(skb)) {
+		first->vid = skb_vlan_tag_get(skb);
+		if (tx_ring->flags & ICE_TX_FLAGS_RING_VLAN_L2TAG2)
+			first->tx_flags |= ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN;
+		else
+			first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
+	}
+
+	ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
+}
+
+/**
+ * ice_tso - computes mss and TSO length to prepare for TSO
+ * @first: pointer to struct ice_tx_buf
+ * @off: pointer to struct that holds offload parameters
+ *
+ * Returns 0 or error (negative) if TSO can't happen, 1 otherwise.
+ */
+static
+int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u64 cd_mss, cd_tso_len;
+	__be16 protocol;
+	u32 paylen;
+	u8 l4_start;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	protocol = vlan_get_protocol(skb);
+
+	if (eth_p_mpls(protocol))
+		ip.hdr = skb_inner_network_header(skb);
+	else
+		ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		ip.v4->tot_len = 0;
+		ip.v4->check = 0;
+	} else {
+		ip.v6->payload_len = 0;
+	}
+
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+					 SKB_GSO_GRE_CSUM |
+					 SKB_GSO_IPXIP4 |
+					 SKB_GSO_IPXIP6 |
+					 SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)) {
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
+			l4.udp->len = 0;
+
+			/* determine offset of outer transport header */
+			l4_start = (u8)(l4.hdr - skb->data);
+
+			/* remove payload length from outer checksum */
+			paylen = skb->len - l4_start;
+			csum_replace_by_diff(&l4.udp->check,
+					     (__force __wsum)htonl(paylen));
+		}
+
+		/* reset pointers to inner headers */
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+
+		/* initialize inner IP header fields */
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			ip.v4->check = 0;
+		} else {
+			ip.v6->payload_len = 0;
+		}
+	}
+
+	/* determine offset of transport header */
+	l4_start = (u8)(l4.hdr - skb->data);
+
+	/* remove payload length from checksum */
+	paylen = skb->len - l4_start;
+
+	if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of UDP segmentation header */
+		off->header_len = (u8)sizeof(l4.udp) + l4_start;
+	} else {
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+		/* compute length of TCP segmentation header */
+		off->header_len = (u8)((l4.tcp->doff * 4) + l4_start);
+	}
+
+	/* update gso_segs and bytecount */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * off->header_len;
+
+	cd_tso_len = skb->len - off->header_len;
+	cd_mss = skb_shinfo(skb)->gso_size;
+
+	/* record cdesc_qw1 with TSO parameters */
+	off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+			     (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+			     (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+			     (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
+	first->tx_flags |= ICE_TX_FLAGS_TSO;
+	return 1;
+}
+
+/**
+ * ice_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment. For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
+ */
+static unsigned int ice_txd_use_count(unsigned int size)
+{
+	return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
+}
+
+/**
+ * ice_xmit_desc_count - calculate number of Tx descriptors needed
+ * @skb: send buffer
+ *
+ * Returns number of data descriptors needed for this skb.
+ */
+static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
+{
+	const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	unsigned int count = 0, size = skb_headlen(skb);
+
+	for (;;) {
+		count += ice_txd_use_count(size);
+
+		if (!nr_frags--)
+			break;
+
+		size = skb_frag_size(frag++);
+	}
+
+	return count;
+}
+
+/**
+ * __ice_chk_linearize - Check if there are more than 8 buffers per packet
+ * @skb: send buffer
+ *
+ * Note: This HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
+ */
+static bool __ice_chk_linearize(struct sk_buff *skb)
+{
+	const skb_frag_t *frag, *stale;
+	int nr_frags, sum;
+
+	/* no need to check if number of frags is less than 7 */
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	if (nr_frags < (ICE_MAX_BUF_TXD - 1))
+		return false;
+
+	/* We need to walk through the list and validate that each group
+	 * of 6 fragments totals at least gso_size.
+	 */
+	nr_frags -= ICE_MAX_BUF_TXD - 2;
+	frag = &skb_shinfo(skb)->frags[0];
+
+	/* Initialize size to the negative value of gso_size minus 1. We
+	 * use this as the worst case scenario in which the frag ahead
+	 * of us only provides one byte which is why we are limited to 6
+	 * descriptors for a single transmit as the header and previous
+	 * fragment are already consuming 2 descriptors.
+	 */
+	sum = 1 - skb_shinfo(skb)->gso_size;
+
+	/* Add size of frags 0 through 4 to create our initial sum */
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+	sum += skb_frag_size(frag++);
+
+	/* Walk through fragments adding latest fragment, testing it, and
+	 * then removing stale fragments from the sum.
+	 */
+	for (stale = &skb_shinfo(skb)->frags[0];; stale++) {
+		int stale_size = skb_frag_size(stale);
+
+		sum += skb_frag_size(frag++);
+
+		/* The stale fragment may present us with a smaller
+		 * descriptor than the actual fragment size. To account
+		 * for that we need to remove all the data on the front and
+		 * figure out what the remainder would be in the last
+		 * descriptor associated with the fragment.
+		 */
+		if (stale_size > ICE_MAX_DATA_PER_TXD) {
+			int align_pad = -(skb_frag_off(stale)) &
+					(ICE_MAX_READ_REQ_SIZE - 1);
+
+			sum -= align_pad;
+			stale_size -= align_pad;
+
+			do {
+				sum -= ICE_MAX_DATA_PER_TXD_ALIGNED;
+				stale_size -= ICE_MAX_DATA_PER_TXD_ALIGNED;
+			} while (stale_size > ICE_MAX_DATA_PER_TXD);
+		}
+
+		/* if sum is negative we failed to make sufficient progress */
+		if (sum < 0)
+			return true;
+
+		if (!nr_frags--)
+			break;
+
+		sum -= stale_size;
+	}
+
+	return false;
+}
+
+/**
+ * ice_chk_linearize - Check if there are more than 8 fragments per packet
+ * @skb:      send buffer
+ * @count:    number of buffers used
+ *
+ * Note: Our HW can't scatter-gather more than 8 fragments to build
+ * a packet on the wire and so we need to figure out the cases where we
+ * need to linearize the skb.
+ */
+static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
+{
+	/* Both TSO and single send will work if count is less than 8 */
+	if (likely(count < ICE_MAX_BUF_TXD))
+		return false;
+
+	if (skb_is_gso(skb))
+		return __ice_chk_linearize(skb);
+
+	/* we can support up to 8 data buffers for a single send */
+	return count != ICE_MAX_BUF_TXD;
+}
+
+/**
+ * ice_tstamp - set up context descriptor for hardware timestamp
+ * @tx_ring: pointer to the Tx ring to send buffer on
+ * @skb: pointer to the SKB we're sending
+ * @first: Tx buffer
+ * @off: Tx offload parameters
+ */
+static void
+ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb,
+	   struct ice_tx_buf *first, struct ice_tx_offload_params *off)
+{
+	s8 idx;
+
+	/* only timestamp the outbound packet if the user has requested it */
+	if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
+		return;
+
+	if (!tx_ring->ptp_tx)
+		return;
+
+	/* Tx timestamps cannot be sampled when doing TSO */
+	if (first->tx_flags & ICE_TX_FLAGS_TSO)
+		return;
+
+	/* Grab an open timestamp slot */
+	idx = ice_ptp_request_ts(tx_ring->tx_tstamps, skb);
+	if (idx < 0) {
+		tx_ring->vsi->back->ptp.tx_hwtstamp_skipped++;
+		return;
+	}
+
+	off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+			     (ICE_TX_CTX_DESC_TSYN << ICE_TXD_CTX_QW1_CMD_S) |
+			     ((u64)idx << ICE_TXD_CTX_QW1_TSO_LEN_S));
+	first->tx_flags |= ICE_TX_FLAGS_TSYN;
+}
+
+/**
+ * ice_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb: send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+static netdev_tx_t
+ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
+{
+	struct ice_tx_offload_params offload = { 0 };
+	struct ice_vsi *vsi = tx_ring->vsi;
+	struct ice_tx_buf *first;
+	struct ethhdr *eth;
+	unsigned int count;
+	int tso, csum;
+
+	ice_trace(xmit_frame_ring, tx_ring, skb);
+
+	if (unlikely(ipv6_hopopt_jumbo_remove(skb)))
+		goto out_drop;
+
+	count = ice_xmit_desc_count(skb);
+	if (ice_chk_linearize(skb, count)) {
+		if (__skb_linearize(skb))
+			goto out_drop;
+		count = ice_txd_use_count(skb->len);
+		tx_ring->ring_stats->tx_stats.tx_linearize++;
+	}
+
+	/* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
+	 *       + 4 desc gap to avoid the cache line where head is,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE +
+			      ICE_DESCS_FOR_CTX_DESC)) {
+		tx_ring->ring_stats->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* prefetch for bql data which is infrequently used */
+	netdev_txq_bql_enqueue_prefetchw(txring_txq(tx_ring));
+
+	offload.tx_ring = tx_ring;
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buf[tx_ring->next_to_use];
+	first->skb = skb;
+	first->type = ICE_TX_BUF_SKB;
+	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
+	first->gso_segs = 1;
+	first->tx_flags = 0;
+
+	/* prepare the VLAN tagging flags for Tx */
+	ice_tx_prepare_vlan_flags(tx_ring, first);
+	if (first->tx_flags & ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN) {
+		offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+					(ICE_TX_CTX_DESC_IL2TAG2 <<
+					ICE_TXD_CTX_QW1_CMD_S));
+		offload.cd_l2tag2 = first->vid;
+	}
+
+	/* set up TSO offload */
+	tso = ice_tso(first, &offload);
+	if (tso < 0)
+		goto out_drop;
+
+	/* always set up Tx checksum offload */
+	csum = ice_tx_csum(first, &offload);
+	if (csum < 0)
+		goto out_drop;
+
+	/* allow CONTROL frames egress from main VSI if FW LLDP disabled */
+	eth = (struct ethhdr *)skb_mac_header(skb);
+	if (unlikely((skb->priority == TC_PRIO_CONTROL ||
+		      eth->h_proto == htons(ETH_P_LLDP)) &&
+		     vsi->type == ICE_VSI_PF &&
+		     vsi->port_info->qos_cfg.is_sw_lldp))
+		offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+					ICE_TX_CTX_DESC_SWTCH_UPLINK <<
+					ICE_TXD_CTX_QW1_CMD_S);
+
+	ice_tstamp(tx_ring, skb, first, &offload);
+	if (ice_is_switchdev_running(vsi->back))
+		ice_eswitch_set_target_vsi(skb, &offload);
+
+	if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
+		struct ice_tx_ctx_desc *cdesc;
+		u16 i = tx_ring->next_to_use;
+
+		/* grab the next descriptor */
+		cdesc = ICE_TX_CTX_DESC(tx_ring, i);
+		i++;
+		tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+		/* setup context descriptor */
+		cdesc->tunneling_params = cpu_to_le32(offload.cd_tunnel_params);
+		cdesc->l2tag2 = cpu_to_le16(offload.cd_l2tag2);
+		cdesc->rsvd = cpu_to_le16(0);
+		cdesc->qw1 = cpu_to_le64(offload.cd_qw1);
+	}
+
+	ice_tx_map(tx_ring, first, &offload);
+	return NETDEV_TX_OK;
+
+out_drop:
+	ice_trace(xmit_frame_ring_drop, tx_ring, skb);
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/**
+ * ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
+ * @skb: send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ */
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_tx_ring *tx_ring;
+
+	tx_ring = vsi->tx_rings[skb->queue_mapping];
+
+	/* hardware can't handle really short frames, hardware padding works
+	 * beyond this point
+	 */
+	if (skb_put_padto(skb, ICE_MIN_TX_LEN))
+		return NETDEV_TX_OK;
+
+	return ice_xmit_frame_ring(skb, tx_ring);
+}
+
+/**
+ * ice_get_dscp_up - return the UP/TC value for a SKB
+ * @dcbcfg: DCB config that contains DSCP to UP/TC mapping
+ * @skb: SKB to query for info to determine UP/TC
+ *
+ * This function is to only be called when the PF is in L3 DSCP PFC mode
+ */
+static u8 ice_get_dscp_up(struct ice_dcbx_cfg *dcbcfg, struct sk_buff *skb)
+{
+	u8 dscp = 0;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+	else if (skb->protocol == htons(ETH_P_IPV6))
+		dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+
+	return dcbcfg->dscp_map[dscp];
+}
+
+u16
+ice_select_queue(struct net_device *netdev, struct sk_buff *skb,
+		 struct net_device *sb_dev)
+{
+	struct ice_pf *pf = ice_netdev_to_pf(netdev);
+	struct ice_dcbx_cfg *dcbcfg;
+
+	dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg;
+	if (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP)
+		skb->priority = ice_get_dscp_up(dcbcfg, skb);
+
+	return netdev_pick_tx(netdev, skb, sb_dev);
+}
+
+/**
+ * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue
+ * @tx_ring: tx_ring to clean
+ */
+void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring)
+{
+	struct ice_vsi *vsi = tx_ring->vsi;
+	s16 i = tx_ring->next_to_clean;
+	int budget = ICE_DFLT_IRQ_WORK;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+
+	tx_buf = &tx_ring->tx_buf[i];
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no pending work */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if the descriptor isn't done, no work to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+		tx_desc->buf_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+
+		/* move past filter desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		/* unmap the data header */
+		if (dma_unmap_len(tx_buf, len))
+			dma_unmap_single(tx_ring->dev,
+					 dma_unmap_addr(tx_buf, dma),
+					 dma_unmap_len(tx_buf, len),
+					 DMA_TO_DEVICE);
+		if (tx_buf->type == ICE_TX_BUF_DUMMY)
+			devm_kfree(tx_ring->dev, tx_buf->raw_buf);
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->type = ICE_TX_BUF_EMPTY;
+		tx_buf->tx_flags = 0;
+		tx_buf->next_to_watch = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+		tx_desc->buf_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+
+		/* move past eop_desc for start of next FD desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+
+	/* re-enable interrupt if needed */
+	ice_irq_dynamic_ena(&vsi->back->hw, vsi, vsi->q_vectors[0]);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
new file mode 100644
index 0000000000..407d4c3200
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -0,0 +1,484 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TXRX_H_
+#define _ICE_TXRX_H_
+
+#include "ice_type.h"
+
+#define ICE_DFLT_IRQ_WORK	256
+#define ICE_RXBUF_3072		3072
+#define ICE_RXBUF_2048		2048
+#define ICE_RXBUF_1664		1664
+#define ICE_RXBUF_1536		1536
+#define ICE_MAX_CHAINED_RX_BUFS	5
+#define ICE_MAX_BUF_TXD		8
+#define ICE_MIN_TX_LEN		17
+#define ICE_MAX_FRAME_LEGACY_RX 8320
+
+/* The size limit for a transmit buffer in a descriptor is (16K - 1).
+ * In order to align with the read requests we will align the value to
+ * the nearest 4K which represents our maximum read request size.
+ */
+#define ICE_MAX_READ_REQ_SIZE	4096
+#define ICE_MAX_DATA_PER_TXD	(16 * 1024 - 1)
+#define ICE_MAX_DATA_PER_TXD_ALIGNED \
+	(~(ICE_MAX_READ_REQ_SIZE - 1) & ICE_MAX_DATA_PER_TXD)
+
+#define ICE_MAX_TXQ_PER_TXQG	128
+
+/* Attempt to maximize the headroom available for incoming frames. We use a 2K
+ * buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
+ * This leaves us with 512 bytes of room.  From that we need to deduct the
+ * space needed for the shared info and the padding needed to IP align the
+ * frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the legacy
+ *	 receive path.
+ */
+#if (PAGE_SIZE < 8192)
+#define ICE_2K_TOO_SMALL_WITH_PADDING \
+	((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \
+			SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+
+/**
+ * ice_compute_pad - compute the padding
+ * @rx_buf_len: buffer length
+ *
+ * Figure out the size of half page based on given buffer length and
+ * then subtract the skb_shared_info followed by subtraction of the
+ * actual buffer length; this in turn results in the actual space that
+ * is left for padding usage
+ */
+static inline int ice_compute_pad(int rx_buf_len)
+{
+	int half_page_size;
+
+	half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
+}
+
+/**
+ * ice_skb_pad - determine the padding that we can supply
+ *
+ * Figure out the right Rx buffer size and based on that calculate the
+ * padding
+ */
+static inline int ice_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (ICE_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = ICE_RXBUF_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return ice_compute_pad(rx_buf_len);
+}
+
+#define ICE_SKB_PAD ice_skb_pad()
+#else
+#define ICE_2K_TOO_SMALL_WITH_PADDING false
+#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/* We are assuming that the cache line is always 64 Bytes here for ice.
+ * In order to make sure that is a correct assumption there is a check in probe
+ * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
+ * size is 128 bytes. We do it this way because we do not want to read the
+ * GLPCI_CNF2 register or a variable containing the value on every pass through
+ * the Tx path.
+ */
+#define ICE_CACHE_LINE_BYTES		64
+#define ICE_DESCS_PER_CACHE_LINE	(ICE_CACHE_LINE_BYTES / \
+					 sizeof(struct ice_tx_desc))
+#define ICE_DESCS_FOR_CTX_DESC		1
+#define ICE_DESCS_FOR_SKB_DATA_PTR	1
+/* Tx descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \
+		     ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR)
+#define ICE_DESC_UNUSED(R)	\
+	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->next_to_clean - (R)->next_to_use - 1)
+
+#define ICE_RX_DESC_UNUSED(R)	\
+	((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->first_desc - (R)->next_to_use - 1)
+
+#define ICE_RING_QUARTER(R) ((R)->count >> 2)
+
+#define ICE_TX_FLAGS_TSO	BIT(0)
+#define ICE_TX_FLAGS_HW_VLAN	BIT(1)
+#define ICE_TX_FLAGS_SW_VLAN	BIT(2)
+/* Free, was ICE_TX_FLAGS_DUMMY_PKT */
+#define ICE_TX_FLAGS_TSYN	BIT(4)
+#define ICE_TX_FLAGS_IPV4	BIT(5)
+#define ICE_TX_FLAGS_IPV6	BIT(6)
+#define ICE_TX_FLAGS_TUNNEL	BIT(7)
+#define ICE_TX_FLAGS_HW_OUTER_SINGLE_VLAN	BIT(8)
+
+#define ICE_XDP_PASS		0
+#define ICE_XDP_CONSUMED	BIT(0)
+#define ICE_XDP_TX		BIT(1)
+#define ICE_XDP_REDIR		BIT(2)
+#define ICE_XDP_EXIT		BIT(3)
+#define ICE_SKB_CONSUMED	ICE_XDP_CONSUMED
+
+#define ICE_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define ICE_ETH_PKT_HDR_PAD	(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
+
+/**
+ * enum ice_tx_buf_type - type of &ice_tx_buf to act on Tx completion
+ * @ICE_TX_BUF_EMPTY: unused OR XSk frame, no action required
+ * @ICE_TX_BUF_DUMMY: dummy Flow Director packet, unmap and kfree()
+ * @ICE_TX_BUF_FRAG: mapped skb OR &xdp_buff frag, only unmap DMA
+ * @ICE_TX_BUF_SKB: &sk_buff, unmap and consume_skb(), update stats
+ * @ICE_TX_BUF_XDP_TX: &xdp_buff, unmap and page_frag_free(), stats
+ * @ICE_TX_BUF_XDP_XMIT: &xdp_frame, unmap and xdp_return_frame(), stats
+ * @ICE_TX_BUF_XSK_TX: &xdp_buff on XSk queue, xsk_buff_free(), stats
+ */
+enum ice_tx_buf_type {
+	ICE_TX_BUF_EMPTY	= 0U,
+	ICE_TX_BUF_DUMMY,
+	ICE_TX_BUF_FRAG,
+	ICE_TX_BUF_SKB,
+	ICE_TX_BUF_XDP_TX,
+	ICE_TX_BUF_XDP_XMIT,
+	ICE_TX_BUF_XSK_TX,
+};
+
+struct ice_tx_buf {
+	union {
+		struct ice_tx_desc *next_to_watch;
+		u32 rs_idx;
+	};
+	union {
+		void *raw_buf;		/* used for XDP_TX and FDir rules */
+		struct sk_buff *skb;	/* used for .ndo_start_xmit() */
+		struct xdp_frame *xdpf;	/* used for .ndo_xdp_xmit() */
+		struct xdp_buff *xdp;	/* used for XDP_TX ZC */
+	};
+	unsigned int bytecount;
+	union {
+		unsigned int gso_segs;
+		unsigned int nr_frags;	/* used for mbuf XDP */
+	};
+	u32 tx_flags:12;
+	u32 type:4;			/* &ice_tx_buf_type */
+	u32 vid:16;
+	DEFINE_DMA_UNMAP_LEN(len);
+	DEFINE_DMA_UNMAP_ADDR(dma);
+};
+
+struct ice_tx_offload_params {
+	u64 cd_qw1;
+	struct ice_tx_ring *tx_ring;
+	u32 td_cmd;
+	u32 td_offset;
+	u32 td_l2tag1;
+	u32 cd_tunnel_params;
+	u16 cd_l2tag2;
+	u8 header_len;
+};
+
+struct ice_rx_buf {
+	dma_addr_t dma;
+	struct page *page;
+	unsigned int page_offset;
+	unsigned int pgcnt;
+	unsigned int act;
+	unsigned int pagecnt_bias;
+};
+
+struct ice_q_stats {
+	u64 pkts;
+	u64 bytes;
+};
+
+struct ice_txq_stats {
+	u64 restart_q;
+	u64 tx_busy;
+	u64 tx_linearize;
+	int prev_pkt; /* negative if no pending Tx descriptors */
+};
+
+struct ice_rxq_stats {
+	u64 non_eop_descs;
+	u64 alloc_page_failed;
+	u64 alloc_buf_failed;
+};
+
+struct ice_ring_stats {
+	struct rcu_head rcu;	/* to avoid race on free */
+	struct ice_q_stats stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct ice_txq_stats tx_stats;
+		struct ice_rxq_stats rx_stats;
+	};
+};
+
+enum ice_ring_state_t {
+	ICE_TX_XPS_INIT_DONE,
+	ICE_TX_NBITS,
+};
+
+/* this enum matches hardware bits and is meant to be used by DYN_CTLN
+ * registers and QINT registers or more generally anywhere in the manual
+ * mentioning ITR_INDX, ITR_NONE cannot be used as an index 'n' into any
+ * register but instead is a special value meaning "don't update" ITR0/1/2.
+ */
+enum ice_dyn_idx_t {
+	ICE_IDX_ITR0 = 0,
+	ICE_IDX_ITR1 = 1,
+	ICE_IDX_ITR2 = 2,
+	ICE_ITR_NONE = 3	/* ITR_NONE must not be used as an index */
+};
+
+/* Header split modes defined by DTYPE field of Rx RLAN context */
+enum ice_rx_dtype {
+	ICE_RX_DTYPE_NO_SPLIT		= 0,
+	ICE_RX_DTYPE_HEADER_SPLIT	= 1,
+	ICE_RX_DTYPE_SPLIT_ALWAYS	= 2,
+};
+
+/* indices into GLINT_ITR registers */
+#define ICE_RX_ITR	ICE_IDX_ITR0
+#define ICE_TX_ITR	ICE_IDX_ITR1
+#define ICE_ITR_8K	124
+#define ICE_ITR_20K	50
+#define ICE_ITR_MAX	8160 /* 0x1FE0 */
+#define ICE_DFLT_TX_ITR	ICE_ITR_20K
+#define ICE_DFLT_RX_ITR	ICE_ITR_20K
+enum ice_dynamic_itr {
+	ITR_STATIC = 0,
+	ITR_DYNAMIC = 1
+};
+
+#define ITR_IS_DYNAMIC(rc) ((rc)->itr_mode == ITR_DYNAMIC)
+#define ICE_ITR_GRAN_S		1	/* ITR granularity is always 2us */
+#define ICE_ITR_GRAN_US		BIT(ICE_ITR_GRAN_S)
+#define ICE_ITR_MASK		0x1FFE	/* ITR register value alignment mask */
+#define ITR_REG_ALIGN(setting)	((setting) & ICE_ITR_MASK)
+
+#define ICE_DFLT_INTRL	0
+#define ICE_MAX_INTRL	236
+
+#define ICE_IN_WB_ON_ITR_MODE	255
+/* Sets WB_ON_ITR and assumes INTENA bit is already cleared, which allows
+ * setting the MSK_M bit to tell hardware to ignore the INTENA_M bit. Also,
+ * set the write-back latency to the usecs passed in.
+ */
+#define ICE_GLINT_DYN_CTL_WB_ON_ITR(usecs, itr_idx)	\
+	((((usecs) << (GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S)) & \
+	  GLINT_DYN_CTL_INTERVAL_M) | \
+	 (((itr_idx) << GLINT_DYN_CTL_ITR_INDX_S) & \
+	  GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | \
+	 GLINT_DYN_CTL_WB_ON_ITR_M)
+
+/* Legacy or Advanced Mode Queue */
+#define ICE_TX_ADVANCED	0
+#define ICE_TX_LEGACY	1
+
+/* descriptor ring, associated with a VSI */
+struct ice_rx_ring {
+	/* CL1 - 1st cacheline starts here */
+	struct ice_rx_ring *next;	/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
+	struct device *dev;		/* Used for DMA mapping */
+	struct net_device *netdev;	/* netdev ring maps to */
+	struct ice_vsi *vsi;		/* Backreference to associated VSI */
+	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
+	u8 __iomem *tail;
+	u16 q_index;			/* Queue number of ring */
+
+	u16 count;			/* Number of descriptors */
+	u16 reg_idx;			/* HW register index of the ring */
+	u16 next_to_alloc;
+	/* CL2 - 2nd cacheline starts here */
+	union {
+		struct ice_rx_buf *rx_buf;
+		struct xdp_buff **xdp_buf;
+	};
+	struct xdp_buff xdp;
+	/* CL3 - 3rd cacheline starts here */
+	struct bpf_prog *xdp_prog;
+	u16 rx_offset;
+
+	/* used in interrupt processing */
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 first_desc;
+
+	/* stats structs */
+	struct ice_ring_stats *ring_stats;
+
+	struct rcu_head rcu;		/* to avoid race on free */
+	/* CL4 - 4th cacheline starts here */
+	struct ice_channel *ch;
+	struct ice_tx_ring *xdp_ring;
+	struct xsk_buff_pool *xsk_pool;
+	u32 nr_frags;
+	dma_addr_t dma;			/* physical address of ring */
+	u64 cached_phctime;
+	u16 rx_buf_len;
+	u8 dcb_tc;			/* Traffic class of ring */
+	u8 ptp_rx;
+#define ICE_RX_FLAGS_RING_BUILD_SKB	BIT(1)
+#define ICE_RX_FLAGS_CRC_STRIP_DIS	BIT(2)
+	u8 flags;
+	/* CL5 - 5th cacheline starts here */
+	struct xdp_rxq_info xdp_rxq;
+} ____cacheline_internodealigned_in_smp;
+
+struct ice_tx_ring {
+	/* CL1 - 1st cacheline starts here */
+	struct ice_tx_ring *next;	/* pointer to next ring in q_vector */
+	void *desc;			/* Descriptor ring memory */
+	struct device *dev;		/* Used for DMA mapping */
+	u8 __iomem *tail;
+	struct ice_tx_buf *tx_buf;
+	struct ice_q_vector *q_vector;	/* Backreference to associated vector */
+	struct net_device *netdev;	/* netdev ring maps to */
+	struct ice_vsi *vsi;		/* Backreference to associated VSI */
+	/* CL2 - 2nd cacheline starts here */
+	dma_addr_t dma;			/* physical address of ring */
+	struct xsk_buff_pool *xsk_pool;
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 q_handle;			/* Queue handle per TC */
+	u16 reg_idx;			/* HW register index of the ring */
+	u16 count;			/* Number of descriptors */
+	u16 q_index;			/* Queue number of ring */
+	u16 xdp_tx_active;
+	/* stats structs */
+	struct ice_ring_stats *ring_stats;
+	/* CL3 - 3rd cacheline starts here */
+	struct rcu_head rcu;		/* to avoid race on free */
+	DECLARE_BITMAP(xps_state, ICE_TX_NBITS);	/* XPS Config State */
+	struct ice_channel *ch;
+	struct ice_ptp_tx *tx_tstamps;
+	spinlock_t tx_lock;
+	u32 txq_teid;			/* Added Tx queue TEID */
+	/* CL4 - 4th cacheline starts here */
+#define ICE_TX_FLAGS_RING_XDP		BIT(0)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG1	BIT(1)
+#define ICE_TX_FLAGS_RING_VLAN_L2TAG2	BIT(2)
+	u8 flags;
+	u8 dcb_tc;			/* Traffic class of ring */
+	u8 ptp_tx;
+} ____cacheline_internodealigned_in_smp;
+
+static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring)
+{
+	return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
+}
+
+static inline void ice_set_ring_build_skb_ena(struct ice_rx_ring *ring)
+{
+	ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring)
+{
+	ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
+}
+
+static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring)
+{
+	return !!ring->ch;
+}
+
+static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring)
+{
+	return !!(ring->flags & ICE_TX_FLAGS_RING_XDP);
+}
+
+enum ice_container_type {
+	ICE_RX_CONTAINER,
+	ICE_TX_CONTAINER,
+};
+
+struct ice_ring_container {
+	/* head of linked-list of rings */
+	union {
+		struct ice_rx_ring *rx_ring;
+		struct ice_tx_ring *tx_ring;
+	};
+	struct dim dim;		/* data for net_dim algorithm */
+	u16 itr_idx;		/* index in the interrupt vector */
+	/* this matches the maximum number of ITR bits, but in usec
+	 * values, so it is shifted left one bit (bit zero is ignored)
+	 */
+	union {
+		struct {
+			u16 itr_setting:13;
+			u16 itr_reserved:2;
+			u16 itr_mode:1;
+		};
+		u16 itr_settings;
+	};
+	enum ice_container_type type;
+};
+
+struct ice_coalesce_stored {
+	u16 itr_tx;
+	u16 itr_rx;
+	u8 intrl;
+	u8 tx_valid;
+	u8 rx_valid;
+};
+
+/* iterator for handling rings in ring container */
+#define ice_for_each_rx_ring(pos, head) \
+	for (pos = (head).rx_ring; pos; pos = pos->next)
+
+#define ice_for_each_tx_ring(pos, head) \
+	for (pos = (head).tx_ring; pos; pos = pos->next)
+
+static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring->rx_buf_len > (PAGE_SIZE / 2))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
+
+union ice_32b_rx_flex_desc;
+
+bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count);
+netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+u16
+ice_select_queue(struct net_device *dev, struct sk_buff *skb,
+		 struct net_device *sb_dev);
+void ice_clean_tx_ring(struct ice_tx_ring *tx_ring);
+void ice_clean_rx_ring(struct ice_rx_ring *rx_ring);
+int ice_setup_tx_ring(struct ice_tx_ring *tx_ring);
+int ice_setup_rx_ring(struct ice_rx_ring *rx_ring);
+void ice_free_tx_ring(struct ice_tx_ring *tx_ring);
+void ice_free_rx_ring(struct ice_rx_ring *rx_ring);
+int ice_napi_poll(struct napi_struct *napi, int budget);
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+		   u8 *raw_packet);
+int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget);
+void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring);
+#endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
new file mode 100644
index 0000000000..c8322fb6f2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/filter.h>
+
+#include "ice_txrx_lib.h"
+#include "ice_eswitch.h"
+#include "ice_lib.h"
+
+/**
+ * ice_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ */
+void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val)
+{
+	u16 prev_ntu = rx_ring->next_to_use & ~0x7;
+
+	rx_ring->next_to_use = val;
+
+	/* update next to alloc since we have filled the ring */
+	rx_ring->next_to_alloc = val;
+
+	/* QRX_TAIL will be updated with any tail value, but hardware ignores
+	 * the lower 3 bits. This makes it so we only bump tail on meaningful
+	 * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+	 * the budget depending on the current traffic load.
+	 */
+	val &= ~0x7;
+	if (prev_ntu != val) {
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch. (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(val, rx_ring->tail);
+	}
+}
+
+/**
+ * ice_ptype_to_htype - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by
+ * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
+ * Rx desc.
+ */
+static enum pkt_hash_types ice_ptype_to_htype(u16 ptype)
+{
+	struct ice_rx_ptype_decoded decoded = ice_decode_rx_desc_ptype(ptype);
+
+	if (!decoded.known)
+		return PKT_HASH_TYPE_NONE;
+	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+		return PKT_HASH_TYPE_L4;
+	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+		return PKT_HASH_TYPE_L3;
+	if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
+		return PKT_HASH_TYPE_L2;
+
+	return PKT_HASH_TYPE_NONE;
+}
+
+/**
+ * ice_rx_hash - set the hash value in the skb
+ * @rx_ring: descriptor ring
+ * @rx_desc: specific descriptor
+ * @skb: pointer to current skb
+ * @rx_ptype: the ptype value from the descriptor
+ */
+static void
+ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
+	    struct sk_buff *skb, u16 rx_ptype)
+{
+	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	u32 hash;
+
+	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
+		return;
+
+	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+	hash = le32_to_cpu(nic_mdid->rss_hash);
+	skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype));
+}
+
+/**
+ * ice_rx_csum - Indicate in skb if checksum is good
+ * @ring: the ring we care about
+ * @skb: skb currently being received and modified
+ * @rx_desc: the receive descriptor
+ * @ptype: the packet type decoded by hardware
+ *
+ * skb->protocol must be set before this function is called
+ */
+static void
+ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
+	    union ice_32b_rx_flex_desc *rx_desc, u16 ptype)
+{
+	struct ice_rx_ptype_decoded decoded;
+	u16 rx_status0, rx_status1;
+	bool ipv4, ipv6;
+
+	rx_status0 = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_status1 = le16_to_cpu(rx_desc->wb.status_error1);
+
+	decoded = ice_decode_rx_desc_ptype(ptype);
+
+	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
+	skb->ip_summed = CHECKSUM_NONE;
+	skb_checksum_none_assert(skb);
+
+	/* check if Rx checksum is enabled */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check if HW has decoded the packet and checksum */
+	if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+		return;
+
+	if (!(decoded.known && decoded.outer_ip))
+		return;
+
+	ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4);
+	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
+	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
+
+	if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				   BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+		goto checksum_fail;
+
+	if (ipv6 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+		goto checksum_fail;
+
+	/* check for L4 errors and handle packets that were not able to be
+	 * checksummed due to arrival speed
+	 */
+	if (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+		goto checksum_fail;
+
+	/* check for outer UDP checksum error in tunneled packets */
+	if ((rx_status1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) &&
+	    (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+		goto checksum_fail;
+
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
+	 */
+	if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
+	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
+	switch (decoded.inner_prot) {
+	case ICE_RX_PTYPE_INNER_PROT_TCP:
+	case ICE_RX_PTYPE_INNER_PROT_UDP:
+	case ICE_RX_PTYPE_INNER_PROT_SCTP:
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		break;
+	default:
+		break;
+	}
+	return;
+
+checksum_fail:
+	ring->vsi->back->hw_csum_rx_error++;
+}
+
+/**
+ * ice_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @ptype: the packet type decoded by hardware
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
+ */
+void
+ice_process_skb_fields(struct ice_rx_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u16 ptype)
+{
+	ice_rx_hash(rx_ring, rx_desc, skb, ptype);
+
+	/* modifies the skb - consumes the enet header */
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+
+	ice_rx_csum(rx_ring, skb, rx_desc, ptype);
+
+	if (rx_ring->ptp_rx)
+		ice_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
+}
+
+/**
+ * ice_receive_skb - Send a completed packet up the stack
+ * @rx_ring: Rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: VLAN tag for packet
+ *
+ * This function sends the completed packet (via. skb) up the stack using
+ * gro receive functions (with/without VLAN tag)
+ */
+void
+ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
+{
+	netdev_features_t features = rx_ring->netdev->features;
+	bool non_zero_vlan = !!(vlan_tag & VLAN_VID_MASK);
+
+	if ((features & NETIF_F_HW_VLAN_CTAG_RX) && non_zero_vlan)
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+	else if ((features & NETIF_F_HW_VLAN_STAG_RX) && non_zero_vlan)
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag);
+
+	napi_gro_receive(&rx_ring->q_vector->napi, skb);
+}
+
+/**
+ * ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
+ * @dev: device for DMA mapping
+ * @tx_buf: Tx buffer to clean
+ * @bq: XDP bulk flush struct
+ */
+static void
+ice_clean_xdp_tx_buf(struct device *dev, struct ice_tx_buf *tx_buf,
+		     struct xdp_frame_bulk *bq)
+{
+	dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma),
+			 dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buf, len, 0);
+
+	switch (tx_buf->type) {
+	case ICE_TX_BUF_XDP_TX:
+		page_frag_free(tx_buf->raw_buf);
+		break;
+	case ICE_TX_BUF_XDP_XMIT:
+		xdp_return_frame_bulk(tx_buf->xdpf, bq);
+		break;
+	}
+
+	tx_buf->type = ICE_TX_BUF_EMPTY;
+}
+
+/**
+ * ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring
+ * @xdp_ring: XDP ring to clean
+ */
+static u32 ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
+{
+	int total_bytes = 0, total_pkts = 0;
+	struct device *dev = xdp_ring->dev;
+	u32 ntc = xdp_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	u32 cnt = xdp_ring->count;
+	struct xdp_frame_bulk bq;
+	u32 frags, xdp_tx = 0;
+	u32 ready_frames = 0;
+	u32 idx;
+	u32 ret;
+
+	idx = xdp_ring->tx_buf[ntc].rs_idx;
+	tx_desc = ICE_TX_DESC(xdp_ring, idx);
+	if (tx_desc->cmd_type_offset_bsz &
+	    cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) {
+		if (idx >= ntc)
+			ready_frames = idx - ntc + 1;
+		else
+			ready_frames = idx + cnt - ntc + 1;
+	}
+
+	if (unlikely(!ready_frames))
+		return 0;
+	ret = ready_frames;
+
+	xdp_frame_bulk_init(&bq);
+	rcu_read_lock(); /* xdp_return_frame_bulk() */
+
+	while (ready_frames) {
+		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+		struct ice_tx_buf *head = tx_buf;
+
+		/* bytecount holds size of head + frags */
+		total_bytes += tx_buf->bytecount;
+		frags = tx_buf->nr_frags;
+		total_pkts++;
+		/* count head + frags */
+		ready_frames -= frags + 1;
+		xdp_tx++;
+
+		ntc++;
+		if (ntc == cnt)
+			ntc = 0;
+
+		for (int i = 0; i < frags; i++) {
+			tx_buf = &xdp_ring->tx_buf[ntc];
+
+			ice_clean_xdp_tx_buf(dev, tx_buf, &bq);
+			ntc++;
+			if (ntc == cnt)
+				ntc = 0;
+		}
+
+		ice_clean_xdp_tx_buf(dev, head, &bq);
+	}
+
+	xdp_flush_frame_bulk(&bq);
+	rcu_read_unlock();
+
+	tx_desc->cmd_type_offset_bsz = 0;
+	xdp_ring->next_to_clean = ntc;
+	xdp_ring->xdp_tx_active -= xdp_tx;
+	ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
+
+	return ret;
+}
+
+/**
+ * __ice_xmit_xdp_ring - submit frame to XDP ring for transmission
+ * @xdp: XDP buffer to be placed onto Tx descriptors
+ * @xdp_ring: XDP ring for transmission
+ * @frame: whether this comes from .ndo_xdp_xmit()
+ */
+int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
+			bool frame)
+{
+	struct skb_shared_info *sinfo = NULL;
+	u32 size = xdp->data_end - xdp->data;
+	struct device *dev = xdp_ring->dev;
+	u32 ntu = xdp_ring->next_to_use;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_head;
+	struct ice_tx_buf *tx_buf;
+	u32 cnt = xdp_ring->count;
+	void *data = xdp->data;
+	u32 nr_frags = 0;
+	u32 free_space;
+	u32 frag = 0;
+
+	free_space = ICE_DESC_UNUSED(xdp_ring);
+	if (free_space < ICE_RING_QUARTER(xdp_ring))
+		free_space += ice_clean_xdp_irq(xdp_ring);
+
+	if (unlikely(!free_space))
+		goto busy;
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+		if (free_space < nr_frags + 1)
+			goto busy;
+	}
+
+	tx_desc = ICE_TX_DESC(xdp_ring, ntu);
+	tx_head = &xdp_ring->tx_buf[ntu];
+	tx_buf = tx_head;
+
+	for (;;) {
+		dma_addr_t dma;
+
+		dma = dma_map_single(dev, data, size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma))
+			goto dma_unmap;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buf, len, size);
+		dma_unmap_addr_set(tx_buf, dma, dma);
+
+		if (frame) {
+			tx_buf->type = ICE_TX_BUF_FRAG;
+		} else {
+			tx_buf->type = ICE_TX_BUF_XDP_TX;
+			tx_buf->raw_buf = data;
+		}
+
+		tx_desc->buf_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
+
+		ntu++;
+		if (ntu == cnt)
+			ntu = 0;
+
+		if (frag == nr_frags)
+			break;
+
+		tx_desc = ICE_TX_DESC(xdp_ring, ntu);
+		tx_buf = &xdp_ring->tx_buf[ntu];
+
+		data = skb_frag_address(&sinfo->frags[frag]);
+		size = skb_frag_size(&sinfo->frags[frag]);
+		frag++;
+	}
+
+	/* store info about bytecount and frag count in first desc */
+	tx_head->bytecount = xdp_get_buff_len(xdp);
+	tx_head->nr_frags = nr_frags;
+
+	if (frame) {
+		tx_head->type = ICE_TX_BUF_XDP_XMIT;
+		tx_head->xdpf = xdp->data_hard_start;
+	}
+
+	/* update last descriptor from a frame with EOP */
+	tx_desc->cmd_type_offset_bsz |=
+		cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S);
+
+	xdp_ring->xdp_tx_active++;
+	xdp_ring->next_to_use = ntu;
+
+	return ICE_XDP_TX;
+
+dma_unmap:
+	for (;;) {
+		tx_buf = &xdp_ring->tx_buf[ntu];
+		dma_unmap_page(dev, dma_unmap_addr(tx_buf, dma),
+			       dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buf, len, 0);
+		if (tx_buf == tx_head)
+			break;
+
+		if (!ntu)
+			ntu += cnt;
+		ntu--;
+	}
+	return ICE_XDP_CONSUMED;
+
+busy:
+	xdp_ring->ring_stats->tx_stats.tx_busy++;
+
+	return ICE_XDP_CONSUMED;
+}
+
+/**
+ * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map
+ * @xdp_ring: XDP ring
+ * @xdp_res: Result of the receive batch
+ * @first_idx: index to write from caller
+ *
+ * This function bumps XDP Tx tail and/or flush redirect map, and
+ * should be called when a batch of packets has been processed in the
+ * napi loop.
+ */
+void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
+			 u32 first_idx)
+{
+	struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[first_idx];
+
+	if (xdp_res & ICE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_res & ICE_XDP_TX) {
+		if (static_branch_unlikely(&ice_xdp_locking_key))
+			spin_lock(&xdp_ring->tx_lock);
+		/* store index of descriptor with RS bit set in the first
+		 * ice_tx_buf of given NAPI batch
+		 */
+		tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
+		ice_xdp_ring_update_tail(xdp_ring);
+		if (static_branch_unlikely(&ice_xdp_locking_key))
+			spin_unlock(&xdp_ring->tx_lock);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
new file mode 100644
index 0000000000..b0e56675f9
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_TXRX_LIB_H_
+#define _ICE_TXRX_LIB_H_
+#include "ice.h"
+
+/**
+ * ice_set_rx_bufs_act - propagate Rx buffer action to frags
+ * @xdp: XDP buffer representing frame (linear and frags part)
+ * @rx_ring: Rx ring struct
+ * act: action to store onto Rx buffers related to XDP buffer parts
+ *
+ * Set action that should be taken before putting Rx buffer from first frag
+ * to the last.
+ */
+static inline void
+ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
+		    const unsigned int act)
+{
+	u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
+	u32 nr_frags = rx_ring->nr_frags + 1;
+	u32 idx = rx_ring->first_desc;
+	u32 cnt = rx_ring->count;
+	struct ice_rx_buf *buf;
+
+	for (int i = 0; i < nr_frags; i++) {
+		buf = &rx_ring->rx_buf[idx];
+		buf->act = act;
+
+		if (++idx == cnt)
+			idx = 0;
+	}
+
+	/* adjust pagecnt_bias on frags freed by XDP prog */
+	if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) {
+		u32 delta = rx_ring->nr_frags - sinfo_frags;
+
+		while (delta) {
+			if (idx == 0)
+				idx = cnt - 1;
+			else
+				idx--;
+			buf = &rx_ring->rx_buf[idx];
+			buf->pagecnt_bias--;
+			delta--;
+		}
+	}
+}
+
+/**
+ * ice_test_staterr - tests bits in Rx descriptor status and error fields
+ * @status_err_n: Rx descriptor status_error0 or status_error1 bits
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool
+ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
+{
+	return !!(status_err_n & cpu_to_le16(stat_err_bits));
+}
+
+/**
+ * ice_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * If the buffer is an EOP buffer, this function exits returning false,
+ * otherwise return true indicating that this is in fact a non-EOP buffer.
+ */
+static inline bool
+ice_is_non_eop(const struct ice_rx_ring *rx_ring,
+	       const union ice_32b_rx_flex_desc *rx_desc)
+{
+	/* if we are the last buffer then there is nothing else to do */
+#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
+	if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
+		return false;
+
+	rx_ring->ring_stats->rx_stats.non_eop_descs++;
+
+	return true;
+}
+
+static inline __le64
+ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+{
+	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
+			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
+			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
+}
+
+/**
+ * ice_get_vlan_tag_from_rx_desc - get VLAN from Rx flex descriptor
+ * @rx_desc: Rx 32b flex descriptor with RXDID=2
+ *
+ * The OS and current PF implementation only support stripping a single VLAN tag
+ * at a time, so there should only ever be 0 or 1 tags in the l2tag* fields. If
+ * one is found return the tag, else return 0 to mean no VLAN tag was found.
+ */
+static inline u16
+ice_get_vlan_tag_from_rx_desc(union ice_32b_rx_flex_desc *rx_desc)
+{
+	u16 stat_err_bits;
+
+	stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
+	if (ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
+		return le16_to_cpu(rx_desc->wb.l2tag1);
+
+	stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS1_L2TAG2P_S);
+	if (ice_test_staterr(rx_desc->wb.status_error1, stat_err_bits))
+		return le16_to_cpu(rx_desc->wb.l2tag2_2nd);
+
+	return 0;
+}
+
+/**
+ * ice_xdp_ring_update_tail - Updates the XDP Tx ring tail register
+ * @xdp_ring: XDP Tx ring
+ *
+ * This function updates the XDP Tx ring tail register.
+ */
+static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring)
+{
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.
+	 */
+	wmb();
+	writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
+}
+
+/**
+ * ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ *
+ * returns index of descriptor that had RS bit produced on
+ */
+static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring)
+{
+	u32 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
+	struct ice_tx_desc *tx_desc;
+
+	tx_desc = ICE_TX_DESC(xdp_ring, rs_idx);
+	tx_desc->cmd_type_offset_bsz |=
+		cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
+
+	return rs_idx;
+}
+
+void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx);
+int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
+int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
+			bool frame);
+void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
+void
+ice_process_skb_fields(struct ice_rx_ring *rx_ring,
+		       union ice_32b_rx_flex_desc *rx_desc,
+		       struct sk_buff *skb, u16 ptype);
+void
+ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag);
+#endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
new file mode 100644
index 0000000000..5e353b0cbe
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -0,0 +1,1158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_TYPE_H_
+#define _ICE_TYPE_H_
+
+#define ICE_BYTES_PER_WORD	2
+#define ICE_BYTES_PER_DWORD	4
+#define ICE_CHNL_MAX_TC		16
+
+#include "ice_hw_autogen.h"
+#include "ice_devids.h"
+#include "ice_osdep.h"
+#include "ice_controlq.h"
+#include "ice_lan_tx_rx.h"
+#include "ice_flex_type.h"
+#include "ice_protocol_type.h"
+#include "ice_sbq_cmd.h"
+#include "ice_vlan_mode.h"
+
+static inline bool ice_is_tc_ena(unsigned long bitmap, u8 tc)
+{
+	return test_bit(tc, &bitmap);
+}
+
+static inline u64 round_up_64bit(u64 a, u32 b)
+{
+	return div64_long(((a) + (b) / 2), (b));
+}
+
+static inline u32 ice_round_to_num(u32 N, u32 R)
+{
+	return ((((N) % (R)) < ((R) / 2)) ? (((N) / (R)) * (R)) :
+		((((N) + (R) - 1) / (R)) * (R)));
+}
+
+/* Driver always calls main vsi_handle first */
+#define ICE_MAIN_VSI_HANDLE		0
+
+/* debug masks - set these bits in hw->debug_mask to control output */
+#define ICE_DBG_INIT		BIT_ULL(1)
+#define ICE_DBG_FW_LOG		BIT_ULL(3)
+#define ICE_DBG_LINK		BIT_ULL(4)
+#define ICE_DBG_PHY		BIT_ULL(5)
+#define ICE_DBG_QCTX		BIT_ULL(6)
+#define ICE_DBG_NVM		BIT_ULL(7)
+#define ICE_DBG_LAN		BIT_ULL(8)
+#define ICE_DBG_FLOW		BIT_ULL(9)
+#define ICE_DBG_SW		BIT_ULL(13)
+#define ICE_DBG_SCHED		BIT_ULL(14)
+#define ICE_DBG_RDMA		BIT_ULL(15)
+#define ICE_DBG_PKG		BIT_ULL(16)
+#define ICE_DBG_RES		BIT_ULL(17)
+#define ICE_DBG_PTP		BIT_ULL(19)
+#define ICE_DBG_AQ_MSG		BIT_ULL(24)
+#define ICE_DBG_AQ_DESC		BIT_ULL(25)
+#define ICE_DBG_AQ_DESC_BUF	BIT_ULL(26)
+#define ICE_DBG_AQ_CMD		BIT_ULL(27)
+#define ICE_DBG_AQ		(ICE_DBG_AQ_MSG		| \
+				 ICE_DBG_AQ_DESC	| \
+				 ICE_DBG_AQ_DESC_BUF	| \
+				 ICE_DBG_AQ_CMD)
+
+#define ICE_DBG_USER		BIT_ULL(31)
+
+enum ice_aq_res_ids {
+	ICE_NVM_RES_ID = 1,
+	ICE_SPD_RES_ID,
+	ICE_CHANGE_LOCK_RES_ID,
+	ICE_GLOBAL_CFG_LOCK_RES_ID
+};
+
+/* FW update timeout definitions are in milliseconds */
+#define ICE_NVM_TIMEOUT			180000
+#define ICE_CHANGE_LOCK_TIMEOUT		1000
+#define ICE_GLOBAL_CFG_LOCK_TIMEOUT	5000
+
+enum ice_aq_res_access_type {
+	ICE_RES_READ = 1,
+	ICE_RES_WRITE
+};
+
+struct ice_driver_ver {
+	u8 major_ver;
+	u8 minor_ver;
+	u8 build_ver;
+	u8 subbuild_ver;
+	u8 driver_string[32];
+};
+
+enum ice_fc_mode {
+	ICE_FC_NONE = 0,
+	ICE_FC_RX_PAUSE,
+	ICE_FC_TX_PAUSE,
+	ICE_FC_FULL,
+	ICE_FC_PFC,
+	ICE_FC_DFLT
+};
+
+enum ice_phy_cache_mode {
+	ICE_FC_MODE = 0,
+	ICE_SPEED_MODE,
+	ICE_FEC_MODE
+};
+
+enum ice_fec_mode {
+	ICE_FEC_NONE = 0,
+	ICE_FEC_RS,
+	ICE_FEC_BASER,
+	ICE_FEC_AUTO
+};
+
+struct ice_phy_cache_mode_data {
+	union {
+		enum ice_fec_mode curr_user_fec_req;
+		enum ice_fc_mode curr_user_fc_req;
+		u16 curr_user_speed_req;
+	} data;
+};
+
+enum ice_set_fc_aq_failures {
+	ICE_SET_FC_AQ_FAIL_NONE = 0,
+	ICE_SET_FC_AQ_FAIL_GET,
+	ICE_SET_FC_AQ_FAIL_SET,
+	ICE_SET_FC_AQ_FAIL_UPDATE
+};
+
+/* Various MAC types */
+enum ice_mac_type {
+	ICE_MAC_UNKNOWN = 0,
+	ICE_MAC_E810,
+	ICE_MAC_GENERIC,
+};
+
+/* Media Types */
+enum ice_media_type {
+	ICE_MEDIA_UNKNOWN = 0,
+	ICE_MEDIA_FIBER,
+	ICE_MEDIA_BASET,
+	ICE_MEDIA_BACKPLANE,
+	ICE_MEDIA_DA,
+};
+
+enum ice_vsi_type {
+	ICE_VSI_PF = 0,
+	ICE_VSI_VF = 1,
+	ICE_VSI_CTRL = 3,	/* equates to ICE_VSI_PF with 1 queue pair */
+	ICE_VSI_CHNL = 4,
+	ICE_VSI_LB = 6,
+	ICE_VSI_SWITCHDEV_CTRL = 7,
+};
+
+struct ice_link_status {
+	/* Refer to ice_aq_phy_type for bits definition */
+	u64 phy_type_low;
+	u64 phy_type_high;
+	u8 topo_media_conflict;
+	u16 max_frame_size;
+	u16 link_speed;
+	u16 req_speeds;
+	u8 link_cfg_err;
+	u8 lse_ena;	/* Link Status Event notification */
+	u8 link_info;
+	u8 an_info;
+	u8 ext_info;
+	u8 fec_info;
+	u8 pacing;
+	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
+	 * ice_aqc_get_phy_caps structure
+	 */
+	u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
+};
+
+/* Different reset sources for which a disable queue AQ call has to be made in
+ * order to clean the Tx scheduler as a part of the reset
+ */
+enum ice_disq_rst_src {
+	ICE_NO_RESET = 0,
+	ICE_VM_RESET,
+	ICE_VF_RESET,
+};
+
+/* PHY info such as phy_type, etc... */
+struct ice_phy_info {
+	struct ice_link_status link_info;
+	struct ice_link_status link_info_old;
+	u64 phy_type_low;
+	u64 phy_type_high;
+	enum ice_media_type media_type;
+	u8 get_link_info;
+	/* Please refer to struct ice_aqc_get_link_status_data to get
+	 * detail of enable bit in curr_user_speed_req
+	 */
+	u16 curr_user_speed_req;
+	enum ice_fec_mode curr_user_fec_req;
+	enum ice_fc_mode curr_user_fc_req;
+	struct ice_aqc_set_phy_cfg_data curr_user_phy_cfg;
+};
+
+/* protocol enumeration for filters */
+enum ice_fltr_ptype {
+	/* NONE - used for undef/error */
+	ICE_FLTR_PTYPE_NONF_NONE = 0,
+	ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+	ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
+	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER,
+	ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
+	ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
+	ICE_FLTR_PTYPE_NONF_IPV4_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV6_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV4_AH,
+	ICE_FLTR_PTYPE_NONF_IPV6_AH,
+	ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
+	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
+	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
+	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
+	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
+	ICE_FLTR_PTYPE_NON_IP_L2,
+	ICE_FLTR_PTYPE_FRAG_IPV4,
+	ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+	ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
+	ICE_FLTR_PTYPE_MAX,
+};
+
+enum ice_fd_hw_seg {
+	ICE_FD_HW_SEG_NON_TUN = 0,
+	ICE_FD_HW_SEG_TUN,
+	ICE_FD_HW_SEG_MAX,
+};
+
+/* 1 ICE_VSI_PF + 1 ICE_VSI_CTRL + ICE_CHNL_MAX_TC */
+#define ICE_MAX_FDIR_VSI_PER_FILTER	(2 + ICE_CHNL_MAX_TC)
+
+struct ice_fd_hw_prof {
+	struct ice_flow_seg_info *fdir_seg[ICE_FD_HW_SEG_MAX];
+	int cnt;
+	u64 entry_h[ICE_MAX_FDIR_VSI_PER_FILTER][ICE_FD_HW_SEG_MAX];
+	u16 vsi_h[ICE_MAX_FDIR_VSI_PER_FILTER];
+};
+
+/* Common HW capabilities for SW use */
+struct ice_hw_common_caps {
+	u32 valid_functions;
+	/* DCB capabilities */
+	u32 active_tc_bitmap;
+	u32 maxtc;
+
+	/* Tx/Rx queues */
+	u16 num_rxq;		/* Number/Total Rx queues */
+	u16 rxq_first_id;	/* First queue ID for Rx queues */
+	u16 num_txq;		/* Number/Total Tx queues */
+	u16 txq_first_id;	/* First queue ID for Tx queues */
+
+	/* MSI-X vectors */
+	u16 num_msix_vectors;
+	u16 msix_vector_first_id;
+
+	/* Max MTU for function or device */
+	u16 max_mtu;
+
+	/* Virtualization support */
+	u8 sr_iov_1_1;			/* SR-IOV enabled */
+
+	/* RSS related capabilities */
+	u16 rss_table_size;		/* 512 for PFs and 64 for VFs */
+	u8 rss_table_entry_width;	/* RSS Entry width in bits */
+
+	u8 dcb;
+	u8 ieee_1588;
+	u8 rdma;
+	u8 roce_lag;
+	u8 sriov_lag;
+
+	bool nvm_update_pending_nvm;
+	bool nvm_update_pending_orom;
+	bool nvm_update_pending_netlist;
+#define ICE_NVM_PENDING_NVM_IMAGE		BIT(0)
+#define ICE_NVM_PENDING_OROM			BIT(1)
+#define ICE_NVM_PENDING_NETLIST			BIT(2)
+	bool nvm_unified_update;
+#define ICE_NVM_MGMT_UNIFIED_UPD_SUPPORT	BIT(3)
+	/* PCIe reset avoidance */
+	bool pcie_reset_avoidance;
+	/* Post update reset restriction */
+	bool reset_restrict_support;
+};
+
+/* IEEE 1588 TIME_SYNC specific info */
+/* Function specific definitions */
+#define ICE_TS_FUNC_ENA_M		BIT(0)
+#define ICE_TS_SRC_TMR_OWND_M		BIT(1)
+#define ICE_TS_TMR_ENA_M		BIT(2)
+#define ICE_TS_TMR_IDX_OWND_S		4
+#define ICE_TS_TMR_IDX_OWND_M		BIT(4)
+#define ICE_TS_CLK_FREQ_S		16
+#define ICE_TS_CLK_FREQ_M		ICE_M(0x7, ICE_TS_CLK_FREQ_S)
+#define ICE_TS_CLK_SRC_S		20
+#define ICE_TS_CLK_SRC_M		BIT(20)
+#define ICE_TS_TMR_IDX_ASSOC_S		24
+#define ICE_TS_TMR_IDX_ASSOC_M		BIT(24)
+
+/* TIME_REF clock rate specification */
+enum ice_time_ref_freq {
+	ICE_TIME_REF_FREQ_25_000	= 0,
+	ICE_TIME_REF_FREQ_122_880	= 1,
+	ICE_TIME_REF_FREQ_125_000	= 2,
+	ICE_TIME_REF_FREQ_153_600	= 3,
+	ICE_TIME_REF_FREQ_156_250	= 4,
+	ICE_TIME_REF_FREQ_245_760	= 5,
+
+	NUM_ICE_TIME_REF_FREQ
+};
+
+/* Clock source specification */
+enum ice_clk_src {
+	ICE_CLK_SRC_TCX0	= 0, /* Temperature compensated oscillator  */
+	ICE_CLK_SRC_TIME_REF	= 1, /* Use TIME_REF reference clock */
+
+	NUM_ICE_CLK_SRC
+};
+
+struct ice_ts_func_info {
+	/* Function specific info */
+	enum ice_time_ref_freq time_ref;
+	u8 clk_freq;
+	u8 clk_src;
+	u8 tmr_index_assoc;
+	u8 ena;
+	u8 tmr_index_owned;
+	u8 src_tmr_owned;
+	u8 tmr_ena;
+};
+
+/* Device specific definitions */
+#define ICE_TS_TMR0_OWNR_M		0x7
+#define ICE_TS_TMR0_OWND_M		BIT(3)
+#define ICE_TS_TMR1_OWNR_S		4
+#define ICE_TS_TMR1_OWNR_M		ICE_M(0x7, ICE_TS_TMR1_OWNR_S)
+#define ICE_TS_TMR1_OWND_M		BIT(7)
+#define ICE_TS_DEV_ENA_M		BIT(24)
+#define ICE_TS_TMR0_ENA_M		BIT(25)
+#define ICE_TS_TMR1_ENA_M		BIT(26)
+#define ICE_TS_LL_TX_TS_READ_M		BIT(28)
+
+struct ice_ts_dev_info {
+	/* Device specific info */
+	u32 ena_ports;
+	u32 tmr_own_map;
+	u32 tmr0_owner;
+	u32 tmr1_owner;
+	u8 tmr0_owned;
+	u8 tmr1_owned;
+	u8 ena;
+	u8 tmr0_ena;
+	u8 tmr1_ena;
+	u8 ts_ll_read;
+};
+
+/* Function specific capabilities */
+struct ice_hw_func_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 num_allocd_vfs;		/* Number of allocated VFs */
+	u32 vf_base_id;			/* Logical ID of the first VF */
+	u32 guar_num_vsi;
+	u32 fd_fltr_guar;		/* Number of filters guaranteed */
+	u32 fd_fltr_best_effort;	/* Number of best effort filters */
+	struct ice_ts_func_info ts_func_info;
+};
+
+/* Device wide capabilities */
+struct ice_hw_dev_caps {
+	struct ice_hw_common_caps common_cap;
+	u32 num_vfs_exposed;		/* Total number of VFs exposed */
+	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
+	u32 num_flow_director_fltr;	/* Number of FD filters available */
+	struct ice_ts_dev_info ts_dev_info;
+	u32 num_funcs;
+};
+
+/* MAC info */
+struct ice_mac_info {
+	u8 lan_addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+};
+
+/* Reset types used to determine which kind of reset was requested. These
+ * defines match what the RESET_TYPE field of the GLGEN_RSTAT register.
+ * ICE_RESET_PFR does not match any RESET_TYPE field in the GLGEN_RSTAT register
+ * because its reset source is different than the other types listed.
+ */
+enum ice_reset_req {
+	ICE_RESET_POR	= 0,
+	ICE_RESET_INVAL	= 0,
+	ICE_RESET_CORER	= 1,
+	ICE_RESET_GLOBR	= 2,
+	ICE_RESET_EMPR	= 3,
+	ICE_RESET_PFR	= 4,
+};
+
+/* Bus parameters */
+struct ice_bus_info {
+	u16 device;
+	u8 func;
+};
+
+/* Flow control (FC) parameters */
+struct ice_fc_info {
+	enum ice_fc_mode current_mode;	/* FC mode in effect */
+	enum ice_fc_mode req_mode;	/* FC mode requested by caller */
+};
+
+/* Option ROM version information */
+struct ice_orom_info {
+	u8 major;			/* Major version of OROM */
+	u8 patch;			/* Patch version of OROM */
+	u16 build;			/* Build version of OROM */
+};
+
+/* NVM version information */
+struct ice_nvm_info {
+	u32 eetrack;
+	u8 major;
+	u8 minor;
+};
+
+/* netlist version information */
+struct ice_netlist_info {
+	u32 major;			/* major high/low */
+	u32 minor;			/* minor high/low */
+	u32 type;			/* type high/low */
+	u32 rev;			/* revision high/low */
+	u32 hash;			/* SHA-1 hash word */
+	u16 cust_ver;			/* customer version */
+};
+
+/* Enumeration of possible flash banks for the NVM, OROM, and Netlist modules
+ * of the flash image.
+ */
+enum ice_flash_bank {
+	ICE_INVALID_FLASH_BANK,
+	ICE_1ST_FLASH_BANK,
+	ICE_2ND_FLASH_BANK,
+};
+
+/* Enumeration of which flash bank is desired to read from, either the active
+ * bank or the inactive bank. Used to abstract 1st and 2nd bank notion from
+ * code which just wants to read the active or inactive flash bank.
+ */
+enum ice_bank_select {
+	ICE_ACTIVE_FLASH_BANK,
+	ICE_INACTIVE_FLASH_BANK,
+};
+
+/* information for accessing NVM, OROM, and Netlist flash banks */
+struct ice_bank_info {
+	u32 nvm_ptr;				/* Pointer to 1st NVM bank */
+	u32 nvm_size;				/* Size of NVM bank */
+	u32 orom_ptr;				/* Pointer to 1st OROM bank */
+	u32 orom_size;				/* Size of OROM bank */
+	u32 netlist_ptr;			/* Pointer to 1st Netlist bank */
+	u32 netlist_size;			/* Size of Netlist bank */
+	enum ice_flash_bank nvm_bank;		/* Active NVM bank */
+	enum ice_flash_bank orom_bank;		/* Active OROM bank */
+	enum ice_flash_bank netlist_bank;	/* Active Netlist bank */
+};
+
+/* Flash Chip Information */
+struct ice_flash_info {
+	struct ice_orom_info orom;	/* Option ROM version info */
+	struct ice_nvm_info nvm;	/* NVM version information */
+	struct ice_netlist_info netlist;/* Netlist version info */
+	struct ice_bank_info banks;	/* Flash Bank information */
+	u16 sr_words;			/* Shadow RAM size in words */
+	u32 flash_size;			/* Size of available flash in bytes */
+	u8 blank_nvm_mode;		/* is NVM empty (no FW present) */
+};
+
+struct ice_link_default_override_tlv {
+	u8 options;
+#define ICE_LINK_OVERRIDE_OPT_M		0x3F
+#define ICE_LINK_OVERRIDE_STRICT_MODE	BIT(0)
+#define ICE_LINK_OVERRIDE_EPCT_DIS	BIT(1)
+#define ICE_LINK_OVERRIDE_PORT_DIS	BIT(2)
+#define ICE_LINK_OVERRIDE_EN		BIT(3)
+#define ICE_LINK_OVERRIDE_AUTO_LINK_DIS	BIT(4)
+#define ICE_LINK_OVERRIDE_EEE_EN	BIT(5)
+	u8 phy_config;
+#define ICE_LINK_OVERRIDE_PHY_CFG_S	8
+#define ICE_LINK_OVERRIDE_PHY_CFG_M	(0xC3 << ICE_LINK_OVERRIDE_PHY_CFG_S)
+#define ICE_LINK_OVERRIDE_PAUSE_M	0x3
+#define ICE_LINK_OVERRIDE_LESM_EN	BIT(6)
+#define ICE_LINK_OVERRIDE_AUTO_FEC_EN	BIT(7)
+	u8 fec_options;
+#define ICE_LINK_OVERRIDE_FEC_OPT_M	0xFF
+	u8 rsvd1;
+	u64 phy_type_low;
+	u64 phy_type_high;
+};
+
+#define ICE_NVM_VER_LEN	32
+
+/* Max number of port to queue branches w.r.t topology */
+#define ICE_MAX_TRAFFIC_CLASS 8
+#define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS
+
+#define ice_for_each_traffic_class(_i)	\
+	for ((_i) = 0; (_i) < ICE_MAX_TRAFFIC_CLASS; (_i)++)
+
+/* ICE_DFLT_AGG_ID means that all new VM(s)/VSI node connects
+ * to driver defined policy for default aggregator
+ */
+#define ICE_INVAL_TEID 0xFFFFFFFF
+#define ICE_DFLT_AGG_ID 0
+
+struct ice_sched_node {
+	struct ice_sched_node *parent;
+	struct ice_sched_node *sibling; /* next sibling in the same layer */
+	struct ice_sched_node **children;
+	struct ice_aqc_txsched_elem_data info;
+	char *name;
+	struct devlink_rate *rate_node;
+	u64 tx_max;
+	u64 tx_share;
+	u32 agg_id;			/* aggregator group ID */
+	u32 id;
+	u32 tx_priority;
+	u32 tx_weight;
+	u16 vsi_handle;
+	u8 in_use;			/* suspended or in use */
+	u8 tx_sched_layer;		/* Logical Layer (1-9) */
+	u8 num_children;
+	u8 tc_num;
+	u8 owner;
+#define ICE_SCHED_NODE_OWNER_LAN	0
+#define ICE_SCHED_NODE_OWNER_RDMA	2
+};
+
+/* Access Macros for Tx Sched Elements data */
+#define ICE_TXSCHED_GET_NODE_TEID(x) le32_to_cpu((x)->info.node_teid)
+
+/* The aggregator type determines if identifier is for a VSI group,
+ * aggregator group, aggregator of queues, or queue group.
+ */
+enum ice_agg_type {
+	ICE_AGG_TYPE_UNKNOWN = 0,
+	ICE_AGG_TYPE_VSI,
+	ICE_AGG_TYPE_AGG, /* aggregator */
+	ICE_AGG_TYPE_Q,
+	ICE_AGG_TYPE_QG
+};
+
+/* Rate limit types */
+enum ice_rl_type {
+	ICE_UNKNOWN_BW = 0,
+	ICE_MIN_BW,		/* for CIR profile */
+	ICE_MAX_BW,		/* for EIR profile */
+	ICE_SHARED_BW		/* for shared profile */
+};
+
+#define ICE_SCHED_MIN_BW		500		/* in Kbps */
+#define ICE_SCHED_MAX_BW		100000000	/* in Kbps */
+#define ICE_SCHED_DFLT_BW		0xFFFFFFFF	/* unlimited */
+#define ICE_SCHED_DFLT_RL_PROF_ID	0
+#define ICE_SCHED_NO_SHARED_RL_PROF_ID	0xFFFF
+#define ICE_SCHED_DFLT_BW_WT		4
+#define ICE_SCHED_INVAL_PROF_ID		0xFFFF
+#define ICE_SCHED_DFLT_BURST_SIZE	(15 * 1024)	/* in bytes (15k) */
+
+#define ICE_MAX_PORT_PER_PCI_DEV 8
+
+ /* Data structure for saving BW information */
+enum ice_bw_type {
+	ICE_BW_TYPE_PRIO,
+	ICE_BW_TYPE_CIR,
+	ICE_BW_TYPE_CIR_WT,
+	ICE_BW_TYPE_EIR,
+	ICE_BW_TYPE_EIR_WT,
+	ICE_BW_TYPE_SHARED,
+	ICE_BW_TYPE_CNT		/* This must be last */
+};
+
+struct ice_bw {
+	u32 bw;
+	u16 bw_alloc;
+};
+
+struct ice_bw_type_info {
+	DECLARE_BITMAP(bw_t_bitmap, ICE_BW_TYPE_CNT);
+	u8 generic;
+	struct ice_bw cir_bw;
+	struct ice_bw eir_bw;
+	u32 shared_bw;
+};
+
+/* VSI queue context structure for given TC */
+struct ice_q_ctx {
+	u16  q_handle;
+	u32  q_teid;
+	/* bw_t_info saves queue BW information */
+	struct ice_bw_type_info bw_t_info;
+};
+
+/* VSI type list entry to locate corresponding VSI/aggregator nodes */
+struct ice_sched_vsi_info {
+	struct ice_sched_node *vsi_node[ICE_MAX_TRAFFIC_CLASS];
+	struct ice_sched_node *ag_node[ICE_MAX_TRAFFIC_CLASS];
+	struct list_head list_entry;
+	u16 max_lanq[ICE_MAX_TRAFFIC_CLASS];
+	u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS];
+	/* bw_t_info saves VSI BW information */
+	struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* driver defines the policy */
+struct ice_sched_tx_policy {
+	u16 max_num_vsis;
+	u8 max_num_lan_qs_per_tc[ICE_MAX_TRAFFIC_CLASS];
+	u8 rdma_ena;
+};
+
+/* CEE or IEEE 802.1Qaz ETS Configuration data */
+struct ice_dcb_ets_cfg {
+	u8 willing;
+	u8 cbs;
+	u8 maxtcs;
+	u8 prio_table[ICE_MAX_TRAFFIC_CLASS];
+	u8 tcbwtable[ICE_MAX_TRAFFIC_CLASS];
+	u8 tsatable[ICE_MAX_TRAFFIC_CLASS];
+};
+
+/* CEE or IEEE 802.1Qaz PFC Configuration data */
+struct ice_dcb_pfc_cfg {
+	u8 willing;
+	u8 mbc;
+	u8 pfccap;
+	u8 pfcena;
+};
+
+/* CEE or IEEE 802.1Qaz Application Priority data */
+struct ice_dcb_app_priority_table {
+	u16 prot_id;
+	u8 priority;
+	u8 selector;
+};
+
+#define ICE_MAX_USER_PRIORITY	8
+#define ICE_DCBX_MAX_APPS	64
+#define ICE_DSCP_NUM_VAL	64
+#define ICE_LLDPDU_SIZE		1500
+#define ICE_TLV_STATUS_OPER	0x1
+#define ICE_TLV_STATUS_SYNC	0x2
+#define ICE_TLV_STATUS_ERR	0x4
+#define ICE_APP_PROT_ID_ISCSI_860 0x035c
+#define ICE_APP_SEL_ETHTYPE	0x1
+#define ICE_APP_SEL_TCPIP	0x2
+#define ICE_CEE_APP_SEL_ETHTYPE	0x0
+#define ICE_SR_LINK_DEFAULT_OVERRIDE_PTR	0x134
+#define ICE_CEE_APP_SEL_TCPIP	0x1
+
+struct ice_dcbx_cfg {
+	u32 numapps;
+	u32 tlv_status; /* CEE mode TLV status */
+	struct ice_dcb_ets_cfg etscfg;
+	struct ice_dcb_ets_cfg etsrec;
+	struct ice_dcb_pfc_cfg pfc;
+#define ICE_QOS_MODE_VLAN	0x0
+#define ICE_QOS_MODE_DSCP	0x1
+	u8 pfc_mode;
+	struct ice_dcb_app_priority_table app[ICE_DCBX_MAX_APPS];
+	/* when DSCP mapping defined by user set its bit to 1 */
+	DECLARE_BITMAP(dscp_mapped, ICE_DSCP_NUM_VAL);
+	/* array holding DSCP -> UP/TC values for DSCP L3 QoS mode */
+	u8 dscp_map[ICE_DSCP_NUM_VAL];
+	u8 dcbx_mode;
+#define ICE_DCBX_MODE_CEE	0x1
+#define ICE_DCBX_MODE_IEEE	0x2
+	u8 app_mode;
+#define ICE_DCBX_APPS_NON_WILLING	0x1
+};
+
+struct ice_qos_cfg {
+	struct ice_dcbx_cfg local_dcbx_cfg;	/* Oper/Local Cfg */
+	struct ice_dcbx_cfg desired_dcbx_cfg;	/* CEE Desired Cfg */
+	struct ice_dcbx_cfg remote_dcbx_cfg;	/* Peer Cfg */
+	u8 dcbx_status : 3;			/* see ICE_DCBX_STATUS_DIS */
+	u8 is_sw_lldp : 1;
+};
+
+struct ice_port_info {
+	struct ice_sched_node *root;	/* Root Node per Port */
+	struct ice_hw *hw;		/* back pointer to HW instance */
+	u32 last_node_teid;		/* scheduler last node info */
+	u16 sw_id;			/* Initial switch ID belongs to port */
+	u16 pf_vf_num;
+	u8 port_state;
+#define ICE_SCHED_PORT_STATE_INIT	0x0
+#define ICE_SCHED_PORT_STATE_READY	0x1
+	u8 lport;
+#define ICE_LPORT_MASK			0xff
+	struct ice_fc_info fc;
+	struct ice_mac_info mac;
+	struct ice_phy_info phy;
+	struct mutex sched_lock;	/* protect access to TXSched tree */
+	struct ice_sched_node *
+		sib_head[ICE_MAX_TRAFFIC_CLASS][ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	/* List contain profile ID(s) and other params per layer */
+	struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	struct ice_qos_cfg qos_cfg;
+	struct xarray sched_node_ids;
+	u8 is_vf:1;
+	u8 is_custom_tx_enabled:1;
+};
+
+struct ice_switch_info {
+	struct list_head vsi_list_map_head;
+	struct ice_sw_recipe *recp_list;
+	u16 prof_res_bm_init;
+	u16 max_used_prof_index;
+
+	DECLARE_BITMAP(prof_res_bm[ICE_MAX_NUM_PROFILES], ICE_MAX_FV_WORDS);
+};
+
+/* FW logging configuration */
+struct ice_fw_log_evnt {
+	u8 cfg : 4;	/* New event enables to configure */
+	u8 cur : 4;	/* Current/active event enables */
+};
+
+struct ice_fw_log_cfg {
+	u8 cq_en : 1;    /* FW logging is enabled via the control queue */
+	u8 uart_en : 1;  /* FW logging is enabled via UART for all PFs */
+	u8 actv_evnts;   /* Cumulation of currently enabled log events */
+
+#define ICE_FW_LOG_EVNT_INFO	(ICE_AQC_FW_LOG_INFO_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_INIT	(ICE_AQC_FW_LOG_INIT_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_FLOW	(ICE_AQC_FW_LOG_FLOW_EN >> ICE_AQC_FW_LOG_EN_S)
+#define ICE_FW_LOG_EVNT_ERR	(ICE_AQC_FW_LOG_ERR_EN >> ICE_AQC_FW_LOG_EN_S)
+	struct ice_fw_log_evnt evnts[ICE_AQC_FW_LOG_ID_MAX];
+};
+
+/* Enum defining the different states of the mailbox snapshot in the
+ * PF-VF mailbox overflow detection algorithm. The snapshot can be in
+ * states:
+ * 1. ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT - generate a new static snapshot
+ * within the mailbox buffer.
+ * 2. ICE_MAL_VF_DETECT_STATE_TRAVERSE - iterate through the mailbox snaphot
+ * 3. ICE_MAL_VF_DETECT_STATE_DETECT - track the messages sent per VF via the
+ * mailbox and mark any VFs sending more messages than the threshold limit set.
+ * 4. ICE_MAL_VF_DETECT_STATE_INVALID - Invalid mailbox state set to 0xFFFFFFFF.
+ */
+enum ice_mbx_snapshot_state {
+	ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT = 0,
+	ICE_MAL_VF_DETECT_STATE_TRAVERSE,
+	ICE_MAL_VF_DETECT_STATE_DETECT,
+	ICE_MAL_VF_DETECT_STATE_INVALID = 0xFFFFFFFF,
+};
+
+/* Structure to hold information of the static snapshot and the mailbox
+ * buffer data used to generate and track the snapshot.
+ * 1. state: the state of the mailbox snapshot in the malicious VF
+ * detection state handler ice_mbx_vf_state_handler()
+ * 2. head: head of the mailbox snapshot in a circular mailbox buffer
+ * 3. tail: tail of the mailbox snapshot in a circular mailbox buffer
+ * 4. num_iterations: number of messages traversed in circular mailbox buffer
+ * 5. num_msg_proc: number of messages processed in mailbox
+ * 6. num_pending_arq: number of pending asynchronous messages
+ * 7. max_num_msgs_mbx: maximum messages in mailbox for currently
+ * serviced work item or interrupt.
+ */
+struct ice_mbx_snap_buffer_data {
+	enum ice_mbx_snapshot_state state;
+	u32 head;
+	u32 tail;
+	u32 num_iterations;
+	u16 num_msg_proc;
+	u16 num_pending_arq;
+	u16 max_num_msgs_mbx;
+};
+
+/* Structure used to track a single VF's messages on the mailbox:
+ * 1. list_entry: linked list entry node
+ * 2. msg_count: the number of asynchronous messages sent by this VF
+ * 3. malicious: whether this VF has been detected as malicious before
+ */
+struct ice_mbx_vf_info {
+	struct list_head list_entry;
+	u32 msg_count;
+	u8 malicious : 1;
+};
+
+/* Structure to hold data relevant to the captured static snapshot
+ * of the PF-VF mailbox.
+ */
+struct ice_mbx_snapshot {
+	struct ice_mbx_snap_buffer_data mbx_buf;
+	struct list_head mbx_vf;
+};
+
+/* Structure to hold data to be used for capturing or updating a
+ * static snapshot.
+ * 1. num_msg_proc: number of messages processed in mailbox
+ * 2. num_pending_arq: number of pending asynchronous messages
+ * 3. max_num_msgs_mbx: maximum messages in mailbox for currently
+ * serviced work item or interrupt.
+ * 4. async_watermark_val: An upper threshold set by caller to determine
+ * if the pending arq count is large enough to assume that there is
+ * the possibility of a mailicious VF.
+ */
+struct ice_mbx_data {
+	u16 num_msg_proc;
+	u16 num_pending_arq;
+	u16 max_num_msgs_mbx;
+	u16 async_watermark_val;
+};
+
+/* Port hardware description */
+struct ice_hw {
+	u8 __iomem *hw_addr;
+	void *back;
+	struct ice_aqc_layer_props *layer_info;
+	struct ice_port_info *port_info;
+	/* PSM clock frequency for calculating RL profile params */
+	u32 psm_clk_freq;
+	u64 debug_mask;		/* bitmap for debug mask */
+	enum ice_mac_type mac_type;
+
+	u16 fd_ctr_base;	/* FD counter base index */
+
+	/* pci info */
+	u16 device_id;
+	u16 vendor_id;
+	u16 subsystem_device_id;
+	u16 subsystem_vendor_id;
+	u8 revision_id;
+
+	u8 pf_id;		/* device profile info */
+
+	u16 max_burst_size;	/* driver sets this value */
+
+	/* Tx Scheduler values */
+	u8 num_tx_sched_layers;
+	u8 num_tx_sched_phys_layers;
+	u8 flattened_layers;
+	u8 max_cgds;
+	u8 sw_entry_point_layer;
+	u16 max_children[ICE_AQC_TOPO_MAX_LEVEL_NUM];
+	struct list_head agg_list;	/* lists all aggregator */
+
+	struct ice_vsi_ctx *vsi_ctx[ICE_MAX_VSI];
+	u8 evb_veb;		/* true for VEB, false for VEPA */
+	u8 reset_ongoing;	/* true if HW is in reset, false otherwise */
+	struct ice_bus_info bus;
+	struct ice_flash_info flash;
+	struct ice_hw_dev_caps dev_caps;	/* device capabilities */
+	struct ice_hw_func_caps func_caps;	/* function capabilities */
+
+	struct ice_switch_info *switch_info;	/* switch filter lists */
+
+	/* Control Queue info */
+	struct ice_ctl_q_info adminq;
+	struct ice_ctl_q_info sbq;
+	struct ice_ctl_q_info mailboxq;
+
+	u8 api_branch;		/* API branch version */
+	u8 api_maj_ver;		/* API major version */
+	u8 api_min_ver;		/* API minor version */
+	u8 api_patch;		/* API patch version */
+	u8 fw_branch;		/* firmware branch version */
+	u8 fw_maj_ver;		/* firmware major version */
+	u8 fw_min_ver;		/* firmware minor version */
+	u8 fw_patch;		/* firmware patch version */
+	u32 fw_build;		/* firmware build number */
+
+	struct ice_fw_log_cfg fw_log;
+
+/* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
+ * register. Used for determining the ITR/INTRL granularity during
+ * initialization.
+ */
+#define ICE_MAX_AGG_BW_200G	0x0
+#define ICE_MAX_AGG_BW_100G	0X1
+#define ICE_MAX_AGG_BW_50G	0x2
+#define ICE_MAX_AGG_BW_25G	0x3
+	/* ITR granularity for different speeds */
+#define ICE_ITR_GRAN_ABOVE_25	2
+#define ICE_ITR_GRAN_MAX_25	4
+	/* ITR granularity in 1 us */
+	u8 itr_gran;
+	/* INTRL granularity for different speeds */
+#define ICE_INTRL_GRAN_ABOVE_25	4
+#define ICE_INTRL_GRAN_MAX_25	8
+	/* INTRL granularity in 1 us */
+	u8 intrl_gran;
+
+#define ICE_PHY_PER_NAC		1
+#define ICE_MAX_QUAD		2
+#define ICE_NUM_QUAD_TYPE	2
+#define ICE_PORTS_PER_QUAD	4
+#define ICE_PHY_0_LAST_QUAD	1
+#define ICE_PORTS_PER_PHY	8
+#define ICE_NUM_EXTERNAL_PORTS		ICE_PORTS_PER_PHY
+
+	/* Active package version (currently active) */
+	struct ice_pkg_ver active_pkg_ver;
+	u32 active_track_id;
+	u8 active_pkg_name[ICE_PKG_NAME_SIZE];
+	u8 active_pkg_in_nvm;
+
+	/* Driver's package ver - (from the Ice Metadata section) */
+	struct ice_pkg_ver pkg_ver;
+	u8 pkg_name[ICE_PKG_NAME_SIZE];
+
+	/* Driver's Ice segment format version and ID (from the Ice seg) */
+	struct ice_pkg_ver ice_seg_fmt_ver;
+	u8 ice_seg_id[ICE_SEG_ID_SIZE];
+
+	/* Pointer to the ice segment */
+	struct ice_seg *seg;
+
+	/* Pointer to allocated copy of pkg memory */
+	u8 *pkg_copy;
+	u32 pkg_size;
+
+	/* tunneling info */
+	struct mutex tnl_lock;
+	struct ice_tunnel_table tnl;
+
+	struct udp_tunnel_nic_shared udp_tunnel_shared;
+	struct udp_tunnel_nic_info udp_tunnel_nic;
+
+	/* dvm boost update information */
+	struct ice_dvm_table dvm_upd;
+
+	/* HW block tables */
+	struct ice_blk_info blk[ICE_BLK_COUNT];
+	struct mutex fl_profs_locks[ICE_BLK_COUNT];	/* lock fltr profiles */
+	struct list_head fl_profs[ICE_BLK_COUNT];
+
+	/* Flow Director filter info */
+	int fdir_active_fltr;
+
+	struct mutex fdir_fltr_lock;	/* protect Flow Director */
+	struct list_head fdir_list_head;
+
+	/* Book-keeping of side-band filter count per flow-type.
+	 * This is used to detect and handle input set changes for
+	 * respective flow-type.
+	 */
+	u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX];
+
+	struct ice_fd_hw_prof **fdir_prof;
+	DECLARE_BITMAP(fdir_perfect_fltr, ICE_FLTR_PTYPE_MAX);
+	struct mutex rss_locks;	/* protect RSS configuration */
+	struct list_head rss_list_head;
+	struct ice_mbx_snapshot mbx_snapshot;
+	DECLARE_BITMAP(hw_ptype, ICE_FLOW_PTYPE_MAX);
+	u8 dvm_ena;
+	u16 io_expander_handle;
+};
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct ice_eth_stats {
+	u64 rx_bytes;			/* gorc */
+	u64 rx_unicast;			/* uprc */
+	u64 rx_multicast;		/* mprc */
+	u64 rx_broadcast;		/* bprc */
+	u64 rx_discards;		/* rdpc */
+	u64 rx_unknown_protocol;	/* rupp */
+	u64 tx_bytes;			/* gotc */
+	u64 tx_unicast;			/* uptc */
+	u64 tx_multicast;		/* mptc */
+	u64 tx_broadcast;		/* bptc */
+	u64 tx_discards;		/* tdpc */
+	u64 tx_errors;			/* tepc */
+};
+
+#define ICE_MAX_UP	8
+
+/* Statistics collected by the MAC */
+struct ice_hw_port_stats {
+	/* eth stats collected by the port */
+	struct ice_eth_stats eth;
+	/* additional port specific stats */
+	u64 tx_dropped_link_down;	/* tdold */
+	u64 crc_errors;			/* crcerrs */
+	u64 illegal_bytes;		/* illerrc */
+	u64 error_bytes;		/* errbc */
+	u64 mac_local_faults;		/* mlfc */
+	u64 mac_remote_faults;		/* mrfc */
+	u64 rx_len_errors;		/* rlec */
+	u64 link_xon_rx;		/* lxonrxc */
+	u64 link_xoff_rx;		/* lxoffrxc */
+	u64 link_xon_tx;		/* lxontxc */
+	u64 link_xoff_tx;		/* lxofftxc */
+	u64 priority_xon_rx[8];		/* pxonrxc[8] */
+	u64 priority_xoff_rx[8];	/* pxoffrxc[8] */
+	u64 priority_xon_tx[8];		/* pxontxc[8] */
+	u64 priority_xoff_tx[8];	/* pxofftxc[8] */
+	u64 priority_xon_2_xoff[8];	/* pxon2offc[8] */
+	u64 rx_size_64;			/* prc64 */
+	u64 rx_size_127;		/* prc127 */
+	u64 rx_size_255;		/* prc255 */
+	u64 rx_size_511;		/* prc511 */
+	u64 rx_size_1023;		/* prc1023 */
+	u64 rx_size_1522;		/* prc1522 */
+	u64 rx_size_big;		/* prc9522 */
+	u64 rx_undersize;		/* ruc */
+	u64 rx_fragments;		/* rfc */
+	u64 rx_oversize;		/* roc */
+	u64 rx_jabber;			/* rjc */
+	u64 tx_size_64;			/* ptc64 */
+	u64 tx_size_127;		/* ptc127 */
+	u64 tx_size_255;		/* ptc255 */
+	u64 tx_size_511;		/* ptc511 */
+	u64 tx_size_1023;		/* ptc1023 */
+	u64 tx_size_1522;		/* ptc1522 */
+	u64 tx_size_big;		/* ptc9522 */
+	/* flow director stats */
+	u32 fd_sb_status;
+	u64 fd_sb_match;
+};
+
+enum ice_sw_fwd_act_type {
+	ICE_FWD_TO_VSI = 0,
+	ICE_FWD_TO_VSI_LIST, /* Do not use this when adding filter */
+	ICE_FWD_TO_Q,
+	ICE_FWD_TO_QGRP,
+	ICE_DROP_PACKET,
+	ICE_NOP,
+	ICE_INVAL_ACT
+};
+
+struct ice_aq_get_set_rss_lut_params {
+	u8 *lut;		/* input RSS LUT for set and output RSS LUT for get */
+	enum ice_lut_size lut_size; /* size of the LUT buffer */
+	enum ice_lut_type lut_type; /* type of the LUT (i.e. VSI, PF, Global) */
+	u16 vsi_handle;		/* software VSI handle */
+	u8 global_lut_id;	/* only valid when lut_type is global */
+};
+
+/* Checksum and Shadow RAM pointers */
+#define ICE_SR_NVM_CTRL_WORD		0x00
+#define ICE_SR_BOOT_CFG_PTR		0x132
+#define ICE_SR_NVM_WOL_CFG		0x19
+#define ICE_NVM_OROM_VER_OFF		0x02
+#define ICE_SR_PBA_BLOCK_PTR		0x16
+#define ICE_SR_NVM_DEV_STARTER_VER	0x18
+#define ICE_SR_NVM_EETRACK_LO		0x2D
+#define ICE_SR_NVM_EETRACK_HI		0x2E
+#define ICE_NVM_VER_LO_SHIFT		0
+#define ICE_NVM_VER_LO_MASK		(0xff << ICE_NVM_VER_LO_SHIFT)
+#define ICE_NVM_VER_HI_SHIFT		12
+#define ICE_NVM_VER_HI_MASK		(0xf << ICE_NVM_VER_HI_SHIFT)
+#define ICE_OROM_VER_PATCH_SHIFT	0
+#define ICE_OROM_VER_PATCH_MASK		(0xff << ICE_OROM_VER_PATCH_SHIFT)
+#define ICE_OROM_VER_BUILD_SHIFT	8
+#define ICE_OROM_VER_BUILD_MASK		(0xffff << ICE_OROM_VER_BUILD_SHIFT)
+#define ICE_OROM_VER_SHIFT		24
+#define ICE_OROM_VER_MASK		(0xff << ICE_OROM_VER_SHIFT)
+#define ICE_SR_PFA_PTR			0x40
+#define ICE_SR_1ST_NVM_BANK_PTR		0x42
+#define ICE_SR_NVM_BANK_SIZE		0x43
+#define ICE_SR_1ST_OROM_BANK_PTR	0x44
+#define ICE_SR_OROM_BANK_SIZE		0x45
+#define ICE_SR_NETLIST_BANK_PTR		0x46
+#define ICE_SR_NETLIST_BANK_SIZE	0x47
+#define ICE_SR_SECTOR_SIZE_IN_WORDS	0x800
+
+/* CSS Header words */
+#define ICE_NVM_CSS_SREV_L			0x14
+#define ICE_NVM_CSS_SREV_H			0x15
+
+/* Length of CSS header section in words */
+#define ICE_CSS_HEADER_LENGTH			330
+
+/* Offset of Shadow RAM copy in the NVM bank area. */
+#define ICE_NVM_SR_COPY_WORD_OFFSET		roundup(ICE_CSS_HEADER_LENGTH, 32)
+
+/* Size in bytes of Option ROM trailer */
+#define ICE_NVM_OROM_TRAILER_LENGTH		(2 * ICE_CSS_HEADER_LENGTH)
+
+/* The Link Topology Netlist section is stored as a series of words. It is
+ * stored in the NVM as a TLV, with the first two words containing the type
+ * and length.
+ */
+#define ICE_NETLIST_LINK_TOPO_MOD_ID		0x011B
+#define ICE_NETLIST_TYPE_OFFSET			0x0000
+#define ICE_NETLIST_LEN_OFFSET			0x0001
+
+/* The Link Topology section follows the TLV header. When reading the netlist
+ * using ice_read_netlist_module, we need to account for the 2-word TLV
+ * header.
+ */
+#define ICE_NETLIST_LINK_TOPO_OFFSET(n)		((n) + 2)
+
+#define ICE_LINK_TOPO_MODULE_LEN		ICE_NETLIST_LINK_TOPO_OFFSET(0x0000)
+#define ICE_LINK_TOPO_NODE_COUNT		ICE_NETLIST_LINK_TOPO_OFFSET(0x0001)
+
+#define ICE_LINK_TOPO_NODE_COUNT_M		ICE_M(0x3FF, 0)
+
+/* The Netlist ID Block is located after all of the Link Topology nodes. */
+#define ICE_NETLIST_ID_BLK_SIZE			0x30
+#define ICE_NETLIST_ID_BLK_OFFSET(n)		ICE_NETLIST_LINK_TOPO_OFFSET(0x0004 + 2 * (n))
+
+/* netlist ID block field offsets (word offsets) */
+#define ICE_NETLIST_ID_BLK_MAJOR_VER_LOW	0x02
+#define ICE_NETLIST_ID_BLK_MAJOR_VER_HIGH	0x03
+#define ICE_NETLIST_ID_BLK_MINOR_VER_LOW	0x04
+#define ICE_NETLIST_ID_BLK_MINOR_VER_HIGH	0x05
+#define ICE_NETLIST_ID_BLK_TYPE_LOW		0x06
+#define ICE_NETLIST_ID_BLK_TYPE_HIGH		0x07
+#define ICE_NETLIST_ID_BLK_REV_LOW		0x08
+#define ICE_NETLIST_ID_BLK_REV_HIGH		0x09
+#define ICE_NETLIST_ID_BLK_SHA_HASH_WORD(n)	(0x0A + (n))
+#define ICE_NETLIST_ID_BLK_CUST_VER		0x2F
+
+/* Auxiliary field, mask, and shift definition for Shadow RAM and NVM Flash */
+#define ICE_SR_CTRL_WORD_1_S		0x06
+#define ICE_SR_CTRL_WORD_1_M		(0x03 << ICE_SR_CTRL_WORD_1_S)
+#define ICE_SR_CTRL_WORD_VALID		0x1
+#define ICE_SR_CTRL_WORD_OROM_BANK	BIT(3)
+#define ICE_SR_CTRL_WORD_NETLIST_BANK	BIT(4)
+#define ICE_SR_CTRL_WORD_NVM_BANK	BIT(5)
+
+#define ICE_SR_NVM_PTR_4KB_UNITS	BIT(15)
+
+/* Link override related */
+#define ICE_SR_PFA_LINK_OVERRIDE_WORDS		10
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_WORDS	4
+#define ICE_SR_PFA_LINK_OVERRIDE_OFFSET		2
+#define ICE_SR_PFA_LINK_OVERRIDE_FEC_OFFSET	1
+#define ICE_SR_PFA_LINK_OVERRIDE_PHY_OFFSET	2
+#define ICE_FW_API_LINK_OVERRIDE_MAJ		1
+#define ICE_FW_API_LINK_OVERRIDE_MIN		5
+#define ICE_FW_API_LINK_OVERRIDE_PATCH		2
+
+#define ICE_SR_WORDS_IN_1KB		512
+
+/* AQ API version for LLDP_FILTER_CONTROL */
+#define ICE_FW_API_LLDP_FLTR_MAJ	1
+#define ICE_FW_API_LLDP_FLTR_MIN	7
+#define ICE_FW_API_LLDP_FLTR_PATCH	1
+
+/* AQ API version for report default configuration */
+#define ICE_FW_API_REPORT_DFLT_CFG_MAJ		1
+#define ICE_FW_API_REPORT_DFLT_CFG_MIN		7
+#define ICE_FW_API_REPORT_DFLT_CFG_PATCH	3
+
+#endif /* _ICE_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
new file mode 100644
index 0000000000..d488c7156d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -0,0 +1,1359 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "ice_vf_lib_private.h"
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_virtchnl_allowlist.h"
+
+/* Public functions which may be accessed by all driver files */
+
+/**
+ * ice_get_vf_by_id - Get pointer to VF by ID
+ * @pf: the PF private structure
+ * @vf_id: the VF ID to locate
+ *
+ * Locate and return a pointer to the VF structure associated with a given ID.
+ * Returns NULL if the ID does not have a valid VF structure associated with
+ * it.
+ *
+ * This function takes a reference to the VF, which must be released by
+ * calling ice_put_vf() once the caller is finished accessing the VF structure
+ * returned.
+ */
+struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id)
+{
+	struct ice_vf *vf;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(pf->vfs.table, vf, entry, vf_id) {
+		if (vf->vf_id == vf_id) {
+			struct ice_vf *found;
+
+			if (kref_get_unless_zero(&vf->refcnt))
+				found = vf;
+			else
+				found = NULL;
+
+			rcu_read_unlock();
+			return found;
+		}
+	}
+	rcu_read_unlock();
+
+	return NULL;
+}
+
+/**
+ * ice_release_vf - Release VF associated with a refcount
+ * @ref: the kref decremented to zero
+ *
+ * Callback function for kref_put to release a VF once its reference count has
+ * hit zero.
+ */
+static void ice_release_vf(struct kref *ref)
+{
+	struct ice_vf *vf = container_of(ref, struct ice_vf, refcnt);
+
+	vf->vf_ops->free(vf);
+}
+
+/**
+ * ice_put_vf - Release a reference to a VF
+ * @vf: the VF structure to decrease reference count on
+ *
+ * Decrease the reference count for a VF, and free the entry if it is no
+ * longer in use.
+ *
+ * This must be called after ice_get_vf_by_id() once the reference to the VF
+ * structure is no longer used. Otherwise, the VF structure will never be
+ * freed.
+ */
+void ice_put_vf(struct ice_vf *vf)
+{
+	kref_put(&vf->refcnt, ice_release_vf);
+}
+
+/**
+ * ice_has_vfs - Return true if the PF has any associated VFs
+ * @pf: the PF private structure
+ *
+ * Return whether or not the PF has any allocated VFs.
+ *
+ * Note that this function only guarantees that there are no VFs at the point
+ * of calling it. It does not guarantee that no more VFs will be added.
+ */
+bool ice_has_vfs(struct ice_pf *pf)
+{
+	/* A simple check that the hash table is not empty does not require
+	 * the mutex or rcu_read_lock.
+	 */
+	return !hash_empty(pf->vfs.table);
+}
+
+/**
+ * ice_get_num_vfs - Get number of allocated VFs
+ * @pf: the PF private structure
+ *
+ * Return the total number of allocated VFs. NOTE: VF IDs are not guaranteed
+ * to be contiguous. Do not assume that a VF ID is guaranteed to be less than
+ * the output of this function.
+ */
+u16 ice_get_num_vfs(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+	u16 num_vfs = 0;
+
+	rcu_read_lock();
+	ice_for_each_vf_rcu(pf, bkt, vf)
+		num_vfs++;
+	rcu_read_unlock();
+
+	return num_vfs;
+}
+
+/**
+ * ice_get_vf_vsi - get VF's VSI based on the stored index
+ * @vf: VF used to get VSI
+ */
+struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
+{
+	if (vf->lan_vsi_idx == ICE_NO_VSI)
+		return NULL;
+
+	return vf->pf->vsi[vf->lan_vsi_idx];
+}
+
+/**
+ * ice_is_vf_disabled
+ * @vf: pointer to the VF info
+ *
+ * If the PF has been disabled, there is no need resetting VF until PF is
+ * active again. Similarly, if the VF has been disabled, this means something
+ * else is resetting the VF, so we shouldn't continue.
+ *
+ * Returns true if the caller should consider the VF as disabled whether
+ * because that single VF is explicitly disabled or because the PF is
+ * currently disabled.
+ */
+bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+
+	return (test_bit(ICE_VF_DIS, pf->state) ||
+		test_bit(ICE_VF_STATE_DIS, vf->vf_states));
+}
+
+/**
+ * ice_wait_on_vf_reset - poll to make sure a given VF is ready after reset
+ * @vf: The VF being resseting
+ *
+ * The max poll time is about ~800ms, which is about the maximum time it takes
+ * for a VF to be reset and/or a VF driver to be removed.
+ */
+static void ice_wait_on_vf_reset(struct ice_vf *vf)
+{
+	int i;
+
+	for (i = 0; i < ICE_MAX_VF_RESET_TRIES; i++) {
+		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+			break;
+		msleep(ICE_MAX_VF_RESET_SLEEP_MS);
+	}
+}
+
+/**
+ * ice_check_vf_ready_for_cfg - check if VF is ready to be configured/queried
+ * @vf: VF to check if it's ready to be configured/queried
+ *
+ * The purpose of this function is to make sure the VF is not in reset, not
+ * disabled, and initialized so it can be configured and/or queried by a host
+ * administrator.
+ */
+int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
+{
+	ice_wait_on_vf_reset(vf);
+
+	if (ice_is_vf_disabled(vf))
+		return -EINVAL;
+
+	if (ice_check_vf_init(vf))
+		return -EBUSY;
+
+	return 0;
+}
+
+/**
+ * ice_trigger_vf_reset - Reset a VF on HW
+ * @vf: pointer to the VF structure
+ * @is_vflr: true if VFLR was issued, false if not
+ * @is_pfr: true if the reset was triggered due to a previous PFR
+ *
+ * Trigger hardware to start a reset for a particular VF. Expects the caller
+ * to wait the proper amount of time to allow hardware to reset the VF before
+ * it cleans up and restores VF functionality.
+ */
+static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
+{
+	/* Inform VF that it is no longer active, as a warning */
+	clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+	/* Disable VF's configuration API during reset. The flag is re-enabled
+	 * when it's safe again to access VF's VSI.
+	 */
+	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
+
+	/* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver
+	 * needs to clear them in the case of VFR/VFLR. If this is done for
+	 * PFR, it can mess up VF resets because the VF driver may already
+	 * have started cleanup by the time we get here.
+	 */
+	if (!is_pfr)
+		vf->vf_ops->clear_mbx_register(vf);
+
+	vf->vf_ops->trigger_reset_register(vf, is_vflr);
+}
+
+static void ice_vf_clear_counters(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+	if (vsi)
+		vsi->num_vlan = 0;
+
+	vf->num_mac = 0;
+	memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
+	memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
+}
+
+/**
+ * ice_vf_pre_vsi_rebuild - tasks to be done prior to VSI rebuild
+ * @vf: VF to perform pre VSI rebuild tasks
+ *
+ * These tasks are items that don't need to be amortized since they are most
+ * likely called in a for loop with all VF(s) in the reset_all_vfs() case.
+ */
+static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf)
+{
+	/* Close any IRQ mapping now */
+	if (vf->vf_ops->irq_close)
+		vf->vf_ops->irq_close(vf);
+
+	ice_vf_clear_counters(vf);
+	vf->vf_ops->clear_reset_trigger(vf);
+}
+
+/**
+ * ice_vf_recreate_vsi - Release and re-create the VF's VSI
+ * @vf: VF to recreate the VSI for
+ *
+ * This is only called when a single VF is being reset (i.e. VVF, VFLR, host
+ * VF configuration change, etc)
+ *
+ * It releases and then re-creates a new VSI.
+ */
+static int ice_vf_recreate_vsi(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	int err;
+
+	ice_vf_vsi_release(vf);
+
+	err = vf->vf_ops->create_vsi(vf);
+	if (err) {
+		dev_err(ice_pf_to_dev(pf),
+			"Failed to recreate the VF%u's VSI, error %d\n",
+			vf->vf_id, err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_rebuild_vsi - rebuild the VF's VSI
+ * @vf: VF to rebuild the VSI for
+ *
+ * This is only called when all VF(s) are being reset (i.e. PCIe Reset on the
+ * host, PFR, CORER, etc.).
+ *
+ * It reprograms the VSI configuration back into hardware.
+ */
+static int ice_vf_rebuild_vsi(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+	struct ice_pf *pf = vf->pf;
+
+	if (WARN_ON(!vsi))
+		return -EINVAL;
+
+	if (ice_vsi_rebuild(vsi, ICE_VSI_FLAG_INIT)) {
+		dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n",
+			vf->vf_id);
+		return -EIO;
+	}
+	/* vsi->idx will remain the same in this case so don't update
+	 * vf->lan_vsi_idx
+	 */
+	vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+	vf->lan_vsi_num = vsi->vsi_num;
+
+	return 0;
+}
+
+/**
+ * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
+ * @vf: VF to add MAC filters for
+ * @vsi: Pointer to VSI
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds either a VLAN 0 or port VLAN based filter after reset.
+ */
+static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	int err;
+
+	if (ice_vf_is_port_vlan_ena(vf)) {
+		err = vlan_ops->set_port_vlan(vsi, &vf->port_vlan_info);
+		if (err) {
+			dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n",
+				vf->vf_id, err);
+			return err;
+		}
+
+		err = vlan_ops->add_vlan(vsi, &vf->port_vlan_info);
+	} else {
+		err = ice_vsi_add_vlan_zero(vsi);
+	}
+
+	if (err) {
+		dev_err(dev, "failed to add VLAN %u filter for VF %u during VF rebuild, error %d\n",
+			ice_vf_is_port_vlan_ena(vf) ?
+			ice_vf_get_port_vlan_id(vf) : 0, vf->vf_id, err);
+		return err;
+	}
+
+	err = vlan_ops->ena_rx_filtering(vsi);
+	if (err)
+		dev_warn(dev, "failed to enable Rx VLAN filtering for VF %d VSI %d during VF rebuild, error %d\n",
+			 vf->vf_id, vsi->idx, err);
+
+	return 0;
+}
+
+/**
+ * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration
+ * @vf: VF to re-apply the configuration for
+ *
+ * Called after a VF VSI has been re-added/rebuild during reset. The PF driver
+ * needs to re-apply the host configured Tx rate limiting configuration.
+ */
+static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+	int err;
+
+	if (WARN_ON(!vsi))
+		return -EINVAL;
+
+	if (vf->min_tx_rate) {
+		err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000);
+		if (err) {
+			dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n",
+				vf->min_tx_rate, vf->vf_id, err);
+			return err;
+		}
+	}
+
+	if (vf->max_tx_rate) {
+		err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000);
+		if (err) {
+			dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n",
+				vf->max_tx_rate, vf->vf_id, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value
+ * @vf: VF to configure trust setting for
+ */
+static void ice_vf_set_host_trust_cfg(struct ice_vf *vf)
+{
+	assign_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps, vf->trusted);
+}
+
+/**
+ * ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA
+ * @vf: VF to add MAC filters for
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds a broadcast filter and the VF's perm_addr/LAA after reset.
+ */
+static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+	u8 broadcast[ETH_ALEN];
+	int status;
+
+	if (WARN_ON(!vsi))
+		return -EINVAL;
+
+	if (ice_is_eswitch_mode_switchdev(vf->pf))
+		return 0;
+
+	eth_broadcast_addr(broadcast);
+	status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+	if (status) {
+		dev_err(dev, "failed to add broadcast MAC filter for VF %u, error %d\n",
+			vf->vf_id, status);
+		return status;
+	}
+
+	vf->num_mac++;
+
+	if (is_valid_ether_addr(vf->hw_lan_addr)) {
+		status = ice_fltr_add_mac(vsi, vf->hw_lan_addr,
+					  ICE_FWD_TO_VSI);
+		if (status) {
+			dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %d\n",
+				&vf->hw_lan_addr[0], vf->vf_id,
+				status);
+			return status;
+		}
+		vf->num_mac++;
+
+		ether_addr_copy(vf->dev_lan_addr, vf->hw_lan_addr);
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_rebuild_aggregator_node_cfg - rebuild aggregator node config
+ * @vsi: Pointer to VSI
+ *
+ * This function moves VSI into corresponding scheduler aggregator node
+ * based on cached value of "aggregator node info" per VSI
+ */
+static void ice_vf_rebuild_aggregator_node_cfg(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int status;
+
+	if (!vsi->agg_node)
+		return;
+
+	dev = ice_pf_to_dev(pf);
+	if (vsi->agg_node->num_vsis == ICE_MAX_VSIS_IN_AGG_NODE) {
+		dev_dbg(dev,
+			"agg_id %u already has reached max_num_vsis %u\n",
+			vsi->agg_node->agg_id, vsi->agg_node->num_vsis);
+		return;
+	}
+
+	status = ice_move_vsi_to_agg(pf->hw.port_info, vsi->agg_node->agg_id,
+				     vsi->idx, vsi->tc_cfg.ena_tc);
+	if (status)
+		dev_dbg(dev, "unable to move VSI idx %u into aggregator %u node",
+			vsi->idx, vsi->agg_node->agg_id);
+	else
+		vsi->agg_node->num_vsis++;
+}
+
+/**
+ * ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset
+ * @vf: VF to rebuild host configuration on
+ */
+static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+	if (WARN_ON(!vsi))
+		return;
+
+	ice_vf_set_host_trust_cfg(vf);
+
+	if (ice_vf_rebuild_host_mac_cfg(vf))
+		dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n",
+			vf->vf_id);
+
+	if (ice_vf_rebuild_host_vlan_cfg(vf, vsi))
+		dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
+			vf->vf_id);
+
+	if (ice_vf_rebuild_host_tx_rate_cfg(vf))
+		dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n",
+			vf->vf_id);
+
+	if (ice_vsi_apply_spoofchk(vsi, vf->spoofchk))
+		dev_err(dev, "failed to rebuild spoofchk configuration for VF %d\n",
+			vf->vf_id);
+
+	/* rebuild aggregator node config for main VF VSI */
+	ice_vf_rebuild_aggregator_node_cfg(vsi);
+}
+
+/**
+ * ice_set_vf_state_qs_dis - Set VF queues state to disabled
+ * @vf: pointer to the VF structure
+ */
+static void ice_set_vf_state_qs_dis(struct ice_vf *vf)
+{
+	/* Clear Rx/Tx enabled queues flag */
+	bitmap_zero(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF);
+	bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+	clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+}
+
+/**
+ * ice_vf_set_initialized - VF is ready for VIRTCHNL communication
+ * @vf: VF to set in initialized state
+ *
+ * After this function the VF will be ready to receive/handle the
+ * VIRTCHNL_OP_GET_VF_RESOURCES message
+ */
+static void ice_vf_set_initialized(struct ice_vf *vf)
+{
+	ice_set_vf_state_qs_dis(vf);
+	clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
+	set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+	memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps));
+}
+
+/**
+ * ice_vf_post_vsi_rebuild - Reset tasks that occur after VSI rebuild
+ * @vf: the VF being reset
+ *
+ * Perform reset tasks which must occur after the VSI has been re-created or
+ * rebuilt during a VF reset.
+ */
+static void ice_vf_post_vsi_rebuild(struct ice_vf *vf)
+{
+	ice_vf_rebuild_host_cfg(vf);
+	ice_vf_set_initialized(vf);
+
+	vf->vf_ops->post_vsi_rebuild(vf);
+}
+
+/**
+ * ice_is_any_vf_in_unicast_promisc - check if any VF(s)
+ * are in unicast promiscuous mode
+ * @pf: PF structure for accessing VF(s)
+ *
+ * Return false if no VF(s) are in unicast promiscuous mode,
+ * else return true
+ */
+bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf)
+{
+	bool is_vf_promisc = false;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	rcu_read_lock();
+	ice_for_each_vf_rcu(pf, bkt, vf) {
+		/* found a VF that has promiscuous mode configured */
+		if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) {
+			is_vf_promisc = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return is_vf_promisc;
+}
+
+/**
+ * ice_vf_get_promisc_masks - Calculate masks for promiscuous modes
+ * @vf: the VF pointer
+ * @vsi: the VSI to configure
+ * @ucast_m: promiscuous mask to apply to unicast
+ * @mcast_m: promiscuous mask to apply to multicast
+ *
+ * Decide which mask should be used for unicast and multicast filter,
+ * based on presence of VLANs
+ */
+void
+ice_vf_get_promisc_masks(struct ice_vf *vf, struct ice_vsi *vsi,
+			 u8 *ucast_m, u8 *mcast_m)
+{
+	if (ice_vf_is_port_vlan_ena(vf) ||
+	    ice_vsi_has_non_zero_vlans(vsi)) {
+		*mcast_m = ICE_MCAST_VLAN_PROMISC_BITS;
+		*ucast_m = ICE_UCAST_VLAN_PROMISC_BITS;
+	} else {
+		*mcast_m = ICE_MCAST_PROMISC_BITS;
+		*ucast_m = ICE_UCAST_PROMISC_BITS;
+	}
+}
+
+/**
+ * ice_vf_clear_all_promisc_modes - Clear promisc/allmulticast on VF VSI
+ * @vf: the VF pointer
+ * @vsi: the VSI to configure
+ *
+ * Clear all promiscuous/allmulticast filters for a VF
+ */
+static int
+ice_vf_clear_all_promisc_modes(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vf->pf;
+	u8 ucast_m, mcast_m;
+	int ret = 0;
+
+	ice_vf_get_promisc_masks(vf, vsi, &ucast_m, &mcast_m);
+	if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states)) {
+		if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
+			if (ice_is_dflt_vsi_in_use(vsi->port_info))
+				ret = ice_clear_dflt_vsi(vsi);
+		} else {
+			ret = ice_vf_clear_vsi_promisc(vf, vsi, ucast_m);
+		}
+
+		if (ret) {
+			dev_err(ice_pf_to_dev(vf->pf), "Disabling promiscuous mode failed\n");
+		} else {
+			clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+			dev_info(ice_pf_to_dev(vf->pf), "Disabling promiscuous mode succeeded\n");
+		}
+	}
+
+	if (test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
+		ret = ice_vf_clear_vsi_promisc(vf, vsi, mcast_m);
+		if (ret) {
+			dev_err(ice_pf_to_dev(vf->pf), "Disabling allmulticast mode failed\n");
+		} else {
+			clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+			dev_info(ice_pf_to_dev(vf->pf), "Disabling allmulticast mode succeeded\n");
+		}
+	}
+	return ret;
+}
+
+/**
+ * ice_vf_set_vsi_promisc - Enable promiscuous mode for a VF VSI
+ * @vf: the VF to configure
+ * @vsi: the VF's VSI
+ * @promisc_m: the promiscuous mode to enable
+ */
+int
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	int status;
+
+	if (ice_vf_is_port_vlan_ena(vf))
+		status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m,
+						  ice_vf_get_port_vlan_id(vf));
+	else if (ice_vsi_has_non_zero_vlans(vsi))
+		status = ice_fltr_set_vlan_vsi_promisc(hw, vsi, promisc_m);
+	else
+		status = ice_fltr_set_vsi_promisc(hw, vsi->idx, promisc_m, 0);
+
+	if (status && status != -EEXIST) {
+		dev_err(ice_pf_to_dev(vsi->back), "enable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n",
+			vf->vf_id, status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_clear_vsi_promisc - Disable promiscuous mode for a VF VSI
+ * @vf: the VF to configure
+ * @vsi: the VF's VSI
+ * @promisc_m: the promiscuous mode to disable
+ */
+int
+ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	int status;
+
+	if (ice_vf_is_port_vlan_ena(vf))
+		status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m,
+						    ice_vf_get_port_vlan_id(vf));
+	else if (ice_vsi_has_non_zero_vlans(vsi))
+		status = ice_fltr_clear_vlan_vsi_promisc(hw, vsi, promisc_m);
+	else
+		status = ice_fltr_clear_vsi_promisc(hw, vsi->idx, promisc_m, 0);
+
+	if (status && status != -ENOENT) {
+		dev_err(ice_pf_to_dev(vsi->back), "disable Tx/Rx filter promiscuous mode on VF-%u failed, error: %d\n",
+			vf->vf_id, status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_reset_all_vfs - reset all allocated VFs in one go
+ * @pf: pointer to the PF structure
+ *
+ * Reset all VFs at once, in response to a PF or other device reset.
+ *
+ * First, tell the hardware to reset each VF, then do all the waiting in one
+ * chunk, and finally finish restoring each VF after the wait. This is useful
+ * during PF routines which need to reset all VFs, as otherwise it must perform
+ * these resets in a serialized fashion.
+ */
+void ice_reset_all_vfs(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	/* If we don't have any VFs, then there is nothing to reset */
+	if (!ice_has_vfs(pf))
+		return;
+
+	mutex_lock(&pf->vfs.table_lock);
+
+	/* clear all malicious info if the VFs are getting reset */
+	ice_for_each_vf(pf, bkt, vf)
+		ice_mbx_clear_malvf(&vf->mbx_info);
+
+	/* If VFs have been disabled, there is no need to reset */
+	if (test_and_set_bit(ICE_VF_DIS, pf->state)) {
+		mutex_unlock(&pf->vfs.table_lock);
+		return;
+	}
+
+	/* Begin reset on all VFs at once */
+	ice_for_each_vf(pf, bkt, vf)
+		ice_trigger_vf_reset(vf, true, true);
+
+	/* HW requires some time to make sure it can flush the FIFO for a VF
+	 * when it resets it. Now that we've triggered all of the VFs, iterate
+	 * the table again and wait for each VF to complete.
+	 */
+	ice_for_each_vf(pf, bkt, vf) {
+		if (!vf->vf_ops->poll_reset_status(vf)) {
+			/* Display a warning if at least one VF didn't manage
+			 * to reset in time, but continue on with the
+			 * operation.
+			 */
+			dev_warn(dev, "VF %u reset check timeout\n", vf->vf_id);
+			break;
+		}
+	}
+
+	/* free VF resources to begin resetting the VSI state */
+	ice_for_each_vf(pf, bkt, vf) {
+		mutex_lock(&vf->cfg_lock);
+
+		vf->driver_caps = 0;
+		ice_vc_set_default_allowlist(vf);
+
+		ice_vf_fdir_exit(vf);
+		ice_vf_fdir_init(vf);
+		/* clean VF control VSI when resetting VFs since it should be
+		 * setup only when VF creates its first FDIR rule.
+		 */
+		if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+			ice_vf_ctrl_invalidate_vsi(vf);
+
+		ice_vf_pre_vsi_rebuild(vf);
+		ice_vf_rebuild_vsi(vf);
+		ice_vf_post_vsi_rebuild(vf);
+
+		mutex_unlock(&vf->cfg_lock);
+	}
+
+	if (ice_is_eswitch_mode_switchdev(pf))
+		if (ice_eswitch_rebuild(pf))
+			dev_warn(dev, "eswitch rebuild failed\n");
+
+	ice_flush(hw);
+	clear_bit(ICE_VF_DIS, pf->state);
+
+	mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_notify_vf_reset - Notify VF of a reset event
+ * @vf: pointer to the VF structure
+ */
+static void ice_notify_vf_reset(struct ice_vf *vf)
+{
+	struct ice_hw *hw = &vf->pf->hw;
+	struct virtchnl_pf_event pfe;
+
+	/* Bail out if VF is in disabled state, neither initialized, nor active
+	 * state - otherwise proceed with notifications
+	 */
+	if ((!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+	     !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) ||
+	    test_bit(ICE_VF_STATE_DIS, vf->vf_states))
+		return;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe, sizeof(pfe),
+			      NULL);
+}
+
+/**
+ * ice_reset_vf - Reset a particular VF
+ * @vf: pointer to the VF structure
+ * @flags: flags controlling behavior of the reset
+ *
+ * Flags:
+ *   ICE_VF_RESET_VFLR - Indicates a reset is due to VFLR event
+ *   ICE_VF_RESET_NOTIFY - Send VF a notification prior to reset
+ *   ICE_VF_RESET_LOCK - Acquire VF cfg_lock before resetting
+ *
+ * Returns 0 if the VF is currently in reset, if resets are disabled, or if
+ * the VF resets successfully. Returns an error code if the VF fails to
+ * rebuild.
+ */
+int ice_reset_vf(struct ice_vf *vf, u32 flags)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_lag *lag;
+	struct ice_vsi *vsi;
+	u8 act_prt, pri_prt;
+	struct device *dev;
+	int err = 0;
+	bool rsd;
+
+	dev = ice_pf_to_dev(pf);
+	act_prt = ICE_LAG_INVALID_PORT;
+	pri_prt = pf->hw.port_info->lport;
+
+	if (flags & ICE_VF_RESET_NOTIFY)
+		ice_notify_vf_reset(vf);
+
+	if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
+		dev_dbg(dev, "Trying to reset VF %d, but all VF resets are disabled\n",
+			vf->vf_id);
+		return 0;
+	}
+
+	lag = pf->lag;
+	mutex_lock(&pf->lag_mutex);
+	if (lag && lag->bonded && lag->primary) {
+		act_prt = lag->active_port;
+		if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT &&
+		    lag->upper_netdev)
+			ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
+		else
+			act_prt = ICE_LAG_INVALID_PORT;
+	}
+
+	if (flags & ICE_VF_RESET_LOCK)
+		mutex_lock(&vf->cfg_lock);
+	else
+		lockdep_assert_held(&vf->cfg_lock);
+
+	if (ice_is_vf_disabled(vf)) {
+		vsi = ice_get_vf_vsi(vf);
+		if (!vsi) {
+			dev_dbg(dev, "VF is already removed\n");
+			err = -EINVAL;
+			goto out_unlock;
+		}
+		ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+
+		if (ice_vsi_is_rx_queue_active(vsi))
+			ice_vsi_stop_all_rx_rings(vsi);
+
+		dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
+			vf->vf_id);
+		goto out_unlock;
+	}
+
+	/* Set VF disable bit state here, before triggering reset */
+	set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+	ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);
+
+	vsi = ice_get_vf_vsi(vf);
+	if (WARN_ON(!vsi)) {
+		err = -EIO;
+		goto out_unlock;
+	}
+
+	ice_dis_vf_qs(vf);
+
+	/* Call Disable LAN Tx queue AQ whether or not queues are
+	 * enabled. This is needed for successful completion of VFR.
+	 */
+	ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
+			NULL, vf->vf_ops->reset_type, vf->vf_id, NULL);
+
+	/* poll VPGEN_VFRSTAT reg to make sure
+	 * that reset is complete
+	 */
+	rsd = vf->vf_ops->poll_reset_status(vf);
+
+	/* Display a warning if VF didn't manage to reset in time, but need to
+	 * continue on with the operation.
+	 */
+	if (!rsd)
+		dev_warn(dev, "VF reset check timeout on VF %d\n", vf->vf_id);
+
+	vf->driver_caps = 0;
+	ice_vc_set_default_allowlist(vf);
+
+	/* disable promiscuous modes in case they were enabled
+	 * ignore any error if disabling process failed
+	 */
+	ice_vf_clear_all_promisc_modes(vf, vsi);
+
+	ice_vf_fdir_exit(vf);
+	ice_vf_fdir_init(vf);
+	/* clean VF control VSI when resetting VF since it should be setup
+	 * only when VF creates its first FDIR rule.
+	 */
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		ice_vf_ctrl_vsi_release(vf);
+
+	ice_vf_pre_vsi_rebuild(vf);
+
+	if (ice_vf_recreate_vsi(vf)) {
+		dev_err(dev, "Failed to release and setup the VF%u's VSI\n",
+			vf->vf_id);
+		err = -EFAULT;
+		goto out_unlock;
+	}
+
+	ice_vf_post_vsi_rebuild(vf);
+	vsi = ice_get_vf_vsi(vf);
+	if (WARN_ON(!vsi)) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	ice_eswitch_update_repr(vsi);
+
+	/* if the VF has been reset allow it to come up again */
+	ice_mbx_clear_malvf(&vf->mbx_info);
+
+out_unlock:
+	if (flags & ICE_VF_RESET_LOCK)
+		mutex_unlock(&vf->cfg_lock);
+
+	if (lag && lag->bonded && lag->primary &&
+	    act_prt != ICE_LAG_INVALID_PORT)
+		ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+	mutex_unlock(&pf->lag_mutex);
+
+	return err;
+}
+
+/**
+ * ice_set_vf_state_dis - Set VF state to disabled
+ * @vf: pointer to the VF structure
+ */
+void ice_set_vf_state_dis(struct ice_vf *vf)
+{
+	ice_set_vf_state_qs_dis(vf);
+	vf->vf_ops->clear_reset_state(vf);
+}
+
+/* Private functions only accessed from other virtualization files */
+
+/**
+ * ice_initialize_vf_entry - Initialize a VF entry
+ * @vf: pointer to the VF structure
+ */
+void ice_initialize_vf_entry(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vfs *vfs;
+
+	vfs = &pf->vfs;
+
+	/* assign default capabilities */
+	vf->spoofchk = true;
+	vf->num_vf_qs = vfs->num_qps_per;
+	ice_vc_set_default_allowlist(vf);
+	ice_virtchnl_set_dflt_ops(vf);
+
+	/* ctrl_vsi_idx will be set to a valid value only when iAVF
+	 * creates its first fdir rule.
+	 */
+	ice_vf_ctrl_invalidate_vsi(vf);
+	ice_vf_fdir_init(vf);
+
+	/* Initialize mailbox info for this VF */
+	ice_mbx_init_vf_info(&pf->hw, &vf->mbx_info);
+
+	mutex_init(&vf->cfg_lock);
+}
+
+/**
+ * ice_dis_vf_qs - Disable the VF queues
+ * @vf: pointer to the VF structure
+ */
+void ice_dis_vf_qs(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+	if (WARN_ON(!vsi))
+		return;
+
+	ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+	ice_vsi_stop_all_rx_rings(vsi);
+	ice_set_vf_state_qs_dis(vf);
+}
+
+/**
+ * ice_err_to_virt_err - translate errors for VF return code
+ * @err: error return code
+ */
+enum virtchnl_status_code ice_err_to_virt_err(int err)
+{
+	switch (err) {
+	case 0:
+		return VIRTCHNL_STATUS_SUCCESS;
+	case -EINVAL:
+	case -ENODEV:
+		return VIRTCHNL_STATUS_ERR_PARAM;
+	case -ENOMEM:
+		return VIRTCHNL_STATUS_ERR_NO_MEMORY;
+	case -EALREADY:
+	case -EBUSY:
+	case -EIO:
+	case -ENOSPC:
+		return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+	default:
+		return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+	}
+}
+
+/**
+ * ice_check_vf_init - helper to check if VF init complete
+ * @vf: the pointer to the VF to check
+ */
+int ice_check_vf_init(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+
+	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
+		dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n",
+			vf->vf_id);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/**
+ * ice_vf_get_port_info - Get the VF's port info structure
+ * @vf: VF used to get the port info structure for
+ */
+struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf)
+{
+	return vf->pf->hw.port_info;
+}
+
+/**
+ * ice_cfg_mac_antispoof - Configure MAC antispoof checking behavior
+ * @vsi: the VSI to configure
+ * @enable: whether to enable or disable the spoof checking
+ *
+ * Configure a VSI to enable (or disable) spoof checking behavior.
+ */
+static int ice_cfg_mac_antispoof(struct ice_vsi *vsi, bool enable)
+{
+	struct ice_vsi_ctx *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->info.sec_flags = vsi->info.sec_flags;
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+
+	if (enable)
+		ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+	else
+		ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+
+	err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL);
+	if (err)
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx MAC anti-spoof %s for VSI %d, error %d\n",
+			enable ? "ON" : "OFF", vsi->vsi_num, err);
+	else
+		vsi->info.sec_flags = ctx->info.sec_flags;
+
+	kfree(ctx);
+
+	return err;
+}
+
+/**
+ * ice_vsi_ena_spoofchk - enable Tx spoof checking for this VSI
+ * @vsi: VSI to enable Tx spoof checking for
+ */
+static int ice_vsi_ena_spoofchk(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+	int err = 0;
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+	/* Allow VF with VLAN 0 only to send all tagged traffic */
+	if (vsi->type != ICE_VSI_VF || ice_vsi_has_non_zero_vlans(vsi)) {
+		err = vlan_ops->ena_tx_filtering(vsi);
+		if (err)
+			return err;
+	}
+
+	return ice_cfg_mac_antispoof(vsi, true);
+}
+
+/**
+ * ice_vsi_dis_spoofchk - disable Tx spoof checking for this VSI
+ * @vsi: VSI to disable Tx spoof checking for
+ */
+static int ice_vsi_dis_spoofchk(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+	int err;
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+
+	err = vlan_ops->dis_tx_filtering(vsi);
+	if (err)
+		return err;
+
+	return ice_cfg_mac_antispoof(vsi, false);
+}
+
+/**
+ * ice_vsi_apply_spoofchk - Apply Tx spoof checking setting to a VSI
+ * @vsi: VSI associated to the VF
+ * @enable: whether to enable or disable the spoof checking
+ */
+int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable)
+{
+	int err;
+
+	if (enable)
+		err = ice_vsi_ena_spoofchk(vsi);
+	else
+		err = ice_vsi_dis_spoofchk(vsi);
+
+	return err;
+}
+
+/**
+ * ice_is_vf_trusted
+ * @vf: pointer to the VF info
+ */
+bool ice_is_vf_trusted(struct ice_vf *vf)
+{
+	return test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+}
+
+/**
+ * ice_vf_has_no_qs_ena - check if the VF has any Rx or Tx queues enabled
+ * @vf: the VF to check
+ *
+ * Returns true if the VF has no Rx and no Tx queues enabled and returns false
+ * otherwise
+ */
+bool ice_vf_has_no_qs_ena(struct ice_vf *vf)
+{
+	return (!bitmap_weight(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF) &&
+		!bitmap_weight(vf->txq_ena, ICE_MAX_RSS_QS_PER_VF));
+}
+
+/**
+ * ice_is_vf_link_up - check if the VF's link is up
+ * @vf: VF to check if link is up
+ */
+bool ice_is_vf_link_up(struct ice_vf *vf)
+{
+	struct ice_port_info *pi = ice_vf_get_port_info(vf);
+
+	if (ice_check_vf_init(vf))
+		return false;
+
+	if (ice_vf_has_no_qs_ena(vf))
+		return false;
+	else if (vf->link_forced)
+		return vf->link_up;
+	else
+		return pi->phy.link_info.link_info &
+			ICE_AQ_LINK_UP;
+}
+
+/**
+ * ice_vf_ctrl_invalidate_vsi - invalidate ctrl_vsi_idx to remove VSI access
+ * @vf: VF that control VSI is being invalidated on
+ */
+void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf)
+{
+	vf->ctrl_vsi_idx = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_ctrl_vsi_release - invalidate the VF's control VSI after freeing it
+ * @vf: VF that control VSI is being released on
+ */
+void ice_vf_ctrl_vsi_release(struct ice_vf *vf)
+{
+	ice_vsi_release(vf->pf->vsi[vf->ctrl_vsi_idx]);
+	ice_vf_ctrl_invalidate_vsi(vf);
+}
+
+/**
+ * ice_vf_ctrl_vsi_setup - Set up a VF control VSI
+ * @vf: VF to setup control VSI for
+ *
+ * Returns pointer to the successfully allocated VSI struct on success,
+ * otherwise returns NULL on failure.
+ */
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf)
+{
+	struct ice_vsi_cfg_params params = {};
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	params.type = ICE_VSI_CTRL;
+	params.pi = ice_vf_get_port_info(vf);
+	params.vf = vf;
+	params.flags = ICE_VSI_FLAG_INIT;
+
+	vsi = ice_vsi_setup(pf, &params);
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n");
+		ice_vf_ctrl_invalidate_vsi(vf);
+	}
+
+	return vsi;
+}
+
+/**
+ * ice_vf_init_host_cfg - Initialize host admin configuration
+ * @vf: VF to initialize
+ * @vsi: the VSI created at initialization
+ *
+ * Initialize the VF host configuration. Called during VF creation to setup
+ * VLAN 0, add the VF VSI broadcast filter, and setup spoof checking. It
+ * should only be called during VF creation.
+ */
+int ice_vf_init_host_cfg(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+	struct ice_pf *pf = vf->pf;
+	u8 broadcast[ETH_ALEN];
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+
+	err = ice_vsi_add_vlan_zero(vsi);
+	if (err) {
+		dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n",
+			 vf->vf_id);
+		return err;
+	}
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+	err = vlan_ops->ena_rx_filtering(vsi);
+	if (err) {
+		dev_warn(dev, "Failed to enable Rx VLAN filtering for VF %d\n",
+			 vf->vf_id);
+		return err;
+	}
+
+	eth_broadcast_addr(broadcast);
+	err = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+	if (err) {
+		dev_err(dev, "Failed to add broadcast MAC filter for VF %d, status %d\n",
+			vf->vf_id, err);
+		return err;
+	}
+
+	vf->num_mac = 1;
+
+	err = ice_vsi_apply_spoofchk(vsi, vf->spoofchk);
+	if (err) {
+		dev_warn(dev, "Failed to initialize spoofchk setting for VF %d\n",
+			 vf->vf_id);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access
+ * @vf: VF to remove access to VSI for
+ */
+void ice_vf_invalidate_vsi(struct ice_vf *vf)
+{
+	vf->lan_vsi_idx = ICE_NO_VSI;
+	vf->lan_vsi_num = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_vsi_release - Release the VF VSI and invalidate indexes
+ * @vf: pointer to the VF structure
+ *
+ * Release the VF associated with this VSI and then invalidate the VSI
+ * indexes.
+ */
+void ice_vf_vsi_release(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+	if (WARN_ON(!vsi))
+		return;
+
+	ice_vsi_release(vsi);
+	ice_vf_invalidate_vsi(vf);
+}
+
+/**
+ * ice_get_vf_ctrl_vsi - Get first VF control VSI pointer
+ * @pf: the PF private structure
+ * @vsi: pointer to the VSI
+ *
+ * Return first found VF control VSI other than the vsi
+ * passed by parameter. This function is used to determine
+ * whether new resources have to be allocated for control VSI
+ * or they can be shared with existing one.
+ *
+ * Return found VF control VSI pointer other itself. Return
+ * NULL Otherwise.
+ *
+ */
+struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+	struct ice_vsi *ctrl_vsi = NULL;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	rcu_read_lock();
+	ice_for_each_vf_rcu(pf, bkt, vf) {
+		if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) {
+			ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	return ctrl_vsi;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
new file mode 100644
index 0000000000..48fea6fa03
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_VF_LIB_H_
+#define _ICE_VF_LIB_H_
+
+#include <linux/types.h>
+#include <linux/hashtable.h>
+#include <linux/bitmap.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <net/devlink.h>
+#include <linux/avf/virtchnl.h>
+#include "ice_type.h"
+#include "ice_virtchnl_fdir.h"
+#include "ice_vsi_vlan_ops.h"
+
+#define ICE_MAX_SRIOV_VFS		256
+
+/* VF resource constraints */
+#define ICE_MAX_RSS_QS_PER_VF	16
+
+struct ice_pf;
+struct ice_vf;
+struct ice_virtchnl_ops;
+
+/* VF capabilities */
+enum ice_virtchnl_cap {
+	ICE_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
+};
+
+/* Specific VF states */
+enum ice_vf_states {
+	ICE_VF_STATE_INIT = 0,		/* PF is initializing VF */
+	ICE_VF_STATE_ACTIVE,		/* VF resources are allocated for use */
+	ICE_VF_STATE_QS_ENA,		/* VF queue(s) enabled */
+	ICE_VF_STATE_DIS,
+	ICE_VF_STATE_MC_PROMISC,
+	ICE_VF_STATE_UC_PROMISC,
+	ICE_VF_STATES_NBITS
+};
+
+struct ice_time_mac {
+	unsigned long time_modified;
+	u8 addr[ETH_ALEN];
+};
+
+/* VF MDD events print structure */
+struct ice_mdd_vf_events {
+	u16 count;			/* total count of Rx|Tx events */
+	/* count number of the last printed event */
+	u16 last_printed;
+};
+
+/* VF operations */
+struct ice_vf_ops {
+	enum ice_disq_rst_src reset_type;
+	void (*free)(struct ice_vf *vf);
+	void (*clear_reset_state)(struct ice_vf *vf);
+	void (*clear_mbx_register)(struct ice_vf *vf);
+	void (*trigger_reset_register)(struct ice_vf *vf, bool is_vflr);
+	bool (*poll_reset_status)(struct ice_vf *vf);
+	void (*clear_reset_trigger)(struct ice_vf *vf);
+	void (*irq_close)(struct ice_vf *vf);
+	int (*create_vsi)(struct ice_vf *vf);
+	void (*post_vsi_rebuild)(struct ice_vf *vf);
+};
+
+/* Virtchnl/SR-IOV config info */
+struct ice_vfs {
+	DECLARE_HASHTABLE(table, 8);	/* table of VF entries */
+	struct mutex table_lock;	/* Lock for protecting the hash table */
+	u16 num_supported;		/* max supported VFs on this PF */
+	u16 num_qps_per;		/* number of queue pairs per VF */
+	u16 num_msix_per;		/* number of MSI-X vectors per VF */
+	unsigned long last_printed_mdd_jiffies;	/* MDD message rate limit */
+};
+
+/* VF information structure */
+struct ice_vf {
+	struct hlist_node entry;
+	struct rcu_head rcu;
+	struct kref refcnt;
+	struct ice_pf *pf;
+
+	/* Used during virtchnl message handling and NDO ops against the VF
+	 * that will trigger a VFR
+	 */
+	struct mutex cfg_lock;
+
+	u16 vf_id;			/* VF ID in the PF space */
+	u16 lan_vsi_idx;		/* index into PF struct */
+	u16 ctrl_vsi_idx;
+	struct ice_vf_fdir fdir;
+	/* first vector index of this VF in the PF space */
+	int first_vector_idx;
+	struct ice_sw *vf_sw_id;	/* switch ID the VF VSIs connect to */
+	struct virtchnl_version_info vf_ver;
+	u32 driver_caps;		/* reported by VF driver */
+	u8 dev_lan_addr[ETH_ALEN];
+	u8 hw_lan_addr[ETH_ALEN];
+	struct ice_time_mac legacy_last_added_umac;
+	DECLARE_BITMAP(txq_ena, ICE_MAX_RSS_QS_PER_VF);
+	DECLARE_BITMAP(rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+	struct ice_vlan port_vlan_info;	/* Port VLAN ID, QoS, and TPID */
+	struct virtchnl_vlan_caps vlan_v2_caps;
+	struct ice_mbx_vf_info mbx_info;
+	u8 pf_set_mac:1;		/* VF MAC address set by VMM admin */
+	u8 trusted:1;
+	u8 spoofchk:1;
+	u8 link_forced:1;
+	u8 link_up:1;			/* only valid if VF link is forced */
+	/* VSI indices - actual VSI pointers are maintained in the PF structure
+	 * When assigned, these will be non-zero, because VSI 0 is always
+	 * the main LAN VSI for the PF.
+	 */
+	u16 lan_vsi_num;		/* ID as used by firmware */
+	unsigned int min_tx_rate;	/* Minimum Tx bandwidth limit in Mbps */
+	unsigned int max_tx_rate;	/* Maximum Tx bandwidth limit in Mbps */
+	DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS);	/* VF runtime states */
+
+	unsigned long vf_caps;		/* VF's adv. capabilities */
+	u8 num_req_qs;			/* num of queue pairs requested by VF */
+	u16 num_mac;
+	u16 num_vf_qs;			/* num of queue configured per VF */
+	struct ice_mdd_vf_events mdd_rx_events;
+	struct ice_mdd_vf_events mdd_tx_events;
+	DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX);
+
+	struct ice_repr *repr;
+	const struct ice_virtchnl_ops *virtchnl_ops;
+	const struct ice_vf_ops *vf_ops;
+
+	/* devlink port data */
+	struct devlink_port devlink_port;
+};
+
+/* Flags for controlling behavior of ice_reset_vf */
+enum ice_vf_reset_flags {
+	ICE_VF_RESET_VFLR = BIT(0), /* Indicate a VFLR reset */
+	ICE_VF_RESET_NOTIFY = BIT(1), /* Notify VF prior to reset */
+	ICE_VF_RESET_LOCK = BIT(2), /* Acquire the VF cfg_lock */
+};
+
+static inline u16 ice_vf_get_port_vlan_id(struct ice_vf *vf)
+{
+	return vf->port_vlan_info.vid;
+}
+
+static inline u8 ice_vf_get_port_vlan_prio(struct ice_vf *vf)
+{
+	return vf->port_vlan_info.prio;
+}
+
+static inline bool ice_vf_is_port_vlan_ena(struct ice_vf *vf)
+{
+	return (ice_vf_get_port_vlan_id(vf) || ice_vf_get_port_vlan_prio(vf));
+}
+
+static inline u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf)
+{
+	return vf->port_vlan_info.tpid;
+}
+
+/* VF Hash Table access functions
+ *
+ * These functions provide abstraction for interacting with the VF hash table.
+ * In general, direct access to the hash table should be avoided outside of
+ * these functions where possible.
+ *
+ * The VF entries in the hash table are protected by reference counting to
+ * track lifetime of accesses from the table. The ice_get_vf_by_id() function
+ * obtains a reference to the VF structure which must be dropped by using
+ * ice_put_vf().
+ */
+
+/**
+ * ice_for_each_vf - Iterate over each VF entry
+ * @pf: pointer to the PF private structure
+ * @bkt: bucket index used for iteration
+ * @vf: pointer to the VF entry currently being processed in the loop
+ *
+ * The bkt variable is an unsigned integer iterator used to traverse the VF
+ * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is.
+ * Use vf->vf_id to get the id number if needed.
+ *
+ * The caller is expected to be under the table_lock mutex for the entire
+ * loop. Use this iterator if your loop is long or if it might sleep.
+ */
+#define ice_for_each_vf(pf, bkt, vf) \
+	hash_for_each((pf)->vfs.table, (bkt), (vf), entry)
+
+/**
+ * ice_for_each_vf_rcu - Iterate over each VF entry protected by RCU
+ * @pf: pointer to the PF private structure
+ * @bkt: bucket index used for iteration
+ * @vf: pointer to the VF entry currently being processed in the loop
+ *
+ * The bkt variable is an unsigned integer iterator used to traverse the VF
+ * entries. It is *not* guaranteed to be the VF's vf_id. Do not assume it is.
+ * Use vf->vf_id to get the id number if needed.
+ *
+ * The caller is expected to be under rcu_read_lock() for the entire loop.
+ * Only use this iterator if your loop is short and you can guarantee it does
+ * not sleep.
+ */
+#define ice_for_each_vf_rcu(pf, bkt, vf) \
+	hash_for_each_rcu((pf)->vfs.table, (bkt), (vf), entry)
+
+#ifdef CONFIG_PCI_IOV
+struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id);
+void ice_put_vf(struct ice_vf *vf);
+bool ice_has_vfs(struct ice_pf *pf);
+u16 ice_get_num_vfs(struct ice_pf *pf);
+struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
+bool ice_is_vf_disabled(struct ice_vf *vf);
+int ice_check_vf_ready_for_cfg(struct ice_vf *vf);
+void ice_set_vf_state_dis(struct ice_vf *vf);
+bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf);
+void
+ice_vf_get_promisc_masks(struct ice_vf *vf, struct ice_vsi *vsi,
+			 u8 *ucast_m, u8 *mcast_m);
+int
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m);
+int
+ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m);
+int ice_reset_vf(struct ice_vf *vf, u32 flags);
+void ice_reset_all_vfs(struct ice_pf *pf);
+struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi);
+#else /* CONFIG_PCI_IOV */
+static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id)
+{
+	return NULL;
+}
+
+static inline void ice_put_vf(struct ice_vf *vf)
+{
+}
+
+static inline bool ice_has_vfs(struct ice_pf *pf)
+{
+	return false;
+}
+
+static inline u16 ice_get_num_vfs(struct ice_pf *pf)
+{
+	return 0;
+}
+
+static inline struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf)
+{
+	return NULL;
+}
+
+static inline bool ice_is_vf_disabled(struct ice_vf *vf)
+{
+	return true;
+}
+
+static inline int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void ice_set_vf_state_dis(struct ice_vf *vf)
+{
+}
+
+static inline bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf)
+{
+	return false;
+}
+
+static inline int
+ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ice_reset_vf(struct ice_vf *vf, u32 flags)
+{
+	return 0;
+}
+
+static inline void ice_reset_all_vfs(struct ice_pf *pf)
+{
+}
+
+static inline struct ice_vsi *
+ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi)
+{
+	return NULL;
+}
+#endif /* !CONFIG_PCI_IOV */
+
+#endif /* _ICE_VF_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h
new file mode 100644
index 0000000000..0c7e77c0a0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2021, Intel Corporation. */
+
+#ifndef _ICE_VF_LIB_PRIVATE_H_
+#define _ICE_VF_LIB_PRIVATE_H_
+
+#include "ice_vf_lib.h"
+
+/* This header file is for exposing functions in ice_vf_lib.c to other files
+ * which are also conditionally compiled depending on CONFIG_PCI_IOV.
+ * Functions which may be used by other files should be exposed as part of
+ * ice_vf_lib.h
+ *
+ * Functions in this file are exposed only when CONFIG_PCI_IOV is enabled, and
+ * thus this header must not be included by .c files which may be compiled
+ * with CONFIG_PCI_IOV disabled.
+ *
+ * To avoid this, only include this header file directly within .c files that
+ * are conditionally enabled in the "ice-$(CONFIG_PCI_IOV)" block.
+ */
+
+#ifndef CONFIG_PCI_IOV
+#warning "Only include ice_vf_lib_private.h in CONFIG_PCI_IOV virtualization files"
+#endif
+
+void ice_initialize_vf_entry(struct ice_vf *vf);
+void ice_dis_vf_qs(struct ice_vf *vf);
+int ice_check_vf_init(struct ice_vf *vf);
+enum virtchnl_status_code ice_err_to_virt_err(int err);
+struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf);
+int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable);
+bool ice_is_vf_trusted(struct ice_vf *vf);
+bool ice_vf_has_no_qs_ena(struct ice_vf *vf);
+bool ice_is_vf_link_up(struct ice_vf *vf);
+void ice_vf_ctrl_invalidate_vsi(struct ice_vf *vf);
+void ice_vf_ctrl_vsi_release(struct ice_vf *vf);
+struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf);
+int ice_vf_init_host_cfg(struct ice_vf *vf, struct ice_vsi *vsi);
+void ice_vf_invalidate_vsi(struct ice_vf *vf);
+void ice_vf_vsi_release(struct ice_vf *vf);
+
+#endif /* _ICE_VF_LIB_PRIVATE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
new file mode 100644
index 0000000000..40cb4ba078
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+#include "ice_common.h"
+#include "ice_vf_mbx.h"
+
+/**
+ * ice_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: VF ID to send msg
+ * @v_opcode: opcodes for VF-PF communication
+ * @v_retval: return error code
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cd: pointer to command details
+ *
+ * Send message to VF driver (0x0802) using mailbox
+ * queue and asynchronously sending message via
+ * ice_sq_send_cmd() function
+ */
+int
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_pf_vf_msg *cmd;
+	struct ice_aq_desc desc;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_mbx_opc_send_msg_to_vf);
+
+	cmd = &desc.params.virt;
+	cmd->id = cpu_to_le32(vfid);
+
+	desc.cookie_high = cpu_to_le32(v_opcode);
+	desc.cookie_low = cpu_to_le32(v_retval);
+
+	if (msglen)
+		desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd);
+}
+
+static const u32 ice_legacy_aq_to_vc_speed[] = {
+	VIRTCHNL_LINK_SPEED_100MB,	/* BIT(0) */
+	VIRTCHNL_LINK_SPEED_100MB,
+	VIRTCHNL_LINK_SPEED_1GB,
+	VIRTCHNL_LINK_SPEED_1GB,
+	VIRTCHNL_LINK_SPEED_1GB,
+	VIRTCHNL_LINK_SPEED_10GB,
+	VIRTCHNL_LINK_SPEED_20GB,
+	VIRTCHNL_LINK_SPEED_25GB,
+	VIRTCHNL_LINK_SPEED_40GB,
+	VIRTCHNL_LINK_SPEED_40GB,
+	VIRTCHNL_LINK_SPEED_40GB,
+};
+
+/**
+ * ice_conv_link_speed_to_virtchnl
+ * @adv_link_support: determines the format of the returned link speed
+ * @link_speed: variable containing the link_speed to be converted
+ *
+ * Convert link speed supported by HW to link speed supported by virtchnl.
+ * If adv_link_support is true, then return link speed in Mbps. Else return
+ * link speed as a VIRTCHNL_LINK_SPEED_* casted to a u32. Note that the caller
+ * needs to cast back to an enum virtchnl_link_speed in the case where
+ * adv_link_support is false, but when adv_link_support is true the caller can
+ * expect the speed in Mbps.
+ */
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed)
+{
+	/* convert a BIT() value into an array index */
+	u32 index = fls(link_speed) - 1;
+
+	if (adv_link_support)
+		return ice_get_link_speed(index);
+	else if (index < ARRAY_SIZE(ice_legacy_aq_to_vc_speed))
+		/* Virtchnl speeds are not defined for every speed supported in
+		 * the hardware. To maintain compatibility with older AVF
+		 * drivers, while reporting the speed the new speed values are
+		 * resolved to the closest known virtchnl speeds
+		 */
+		return ice_legacy_aq_to_vc_speed[index];
+
+	return VIRTCHNL_LINK_SPEED_UNKNOWN;
+}
+
+/* The mailbox overflow detection algorithm helps to check if there
+ * is a possibility of a malicious VF transmitting too many MBX messages to the
+ * PF.
+ * 1. The mailbox snapshot structure, ice_mbx_snapshot, is initialized during
+ * driver initialization in ice_init_hw() using ice_mbx_init_snapshot().
+ * The struct ice_mbx_snapshot helps to track and traverse a static window of
+ * messages within the mailbox queue while looking for a malicious VF.
+ *
+ * 2. When the caller starts processing its mailbox queue in response to an
+ * interrupt, the structure ice_mbx_snapshot is expected to be cleared before
+ * the algorithm can be run for the first time for that interrupt. This
+ * requires calling ice_mbx_reset_snapshot() as well as calling
+ * ice_mbx_reset_vf_info() for each VF tracking structure.
+ *
+ * 3. For every message read by the caller from the MBX Queue, the caller must
+ * call the detection algorithm's entry function ice_mbx_vf_state_handler().
+ * Before every call to ice_mbx_vf_state_handler() the struct ice_mbx_data is
+ * filled as it is required to be passed to the algorithm.
+ *
+ * 4. Every time a message is read from the MBX queue, a tracking structure
+ * for the VF must be passed to the state handler. The boolean output
+ * report_malvf from ice_mbx_vf_state_handler() serves as an indicator to the
+ * caller whether it must report this VF as malicious or not.
+ *
+ * 5. When a VF is identified to be malicious, the caller can send a message
+ * to the system administrator.
+ *
+ * 6. The PF is responsible for maintaining the struct ice_mbx_vf_info
+ * structure for each VF. The PF should clear the VF tracking structure if the
+ * VF is reset. When a VF is shut down and brought back up, we will then
+ * assume that the new VF is not malicious and may report it again if we
+ * detect it again.
+ *
+ * 7. The function ice_mbx_reset_snapshot() is called to reset the information
+ * in ice_mbx_snapshot for every new mailbox interrupt handled.
+ */
+#define ICE_RQ_DATA_MASK(rq_data) ((rq_data) & PF_MBX_ARQH_ARQH_M)
+/* Using the highest value for an unsigned 16-bit value 0xFFFF to indicate that
+ * the max messages check must be ignored in the algorithm
+ */
+#define ICE_IGNORE_MAX_MSG_CNT	0xFFFF
+
+/**
+ * ice_mbx_reset_snapshot - Reset mailbox snapshot structure
+ * @snap: pointer to the mailbox snapshot
+ */
+static void ice_mbx_reset_snapshot(struct ice_mbx_snapshot *snap)
+{
+	struct ice_mbx_vf_info *vf_info;
+
+	/* Clear mbx_buf in the mailbox snaphot structure and setting the
+	 * mailbox snapshot state to a new capture.
+	 */
+	memset(&snap->mbx_buf, 0, sizeof(snap->mbx_buf));
+	snap->mbx_buf.state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+
+	/* Reset message counts for all VFs to zero */
+	list_for_each_entry(vf_info, &snap->mbx_vf, list_entry)
+		vf_info->msg_count = 0;
+}
+
+/**
+ * ice_mbx_traverse - Pass through mailbox snapshot
+ * @hw: pointer to the HW struct
+ * @new_state: new algorithm state
+ *
+ * Traversing the mailbox static snapshot without checking
+ * for malicious VFs.
+ */
+static void
+ice_mbx_traverse(struct ice_hw *hw,
+		 enum ice_mbx_snapshot_state *new_state)
+{
+	struct ice_mbx_snap_buffer_data *snap_buf;
+	u32 num_iterations;
+
+	snap_buf = &hw->mbx_snapshot.mbx_buf;
+
+	/* As mailbox buffer is circular, applying a mask
+	 * on the incremented iteration count.
+	 */
+	num_iterations = ICE_RQ_DATA_MASK(++snap_buf->num_iterations);
+
+	/* Checking either of the below conditions to exit snapshot traversal:
+	 * Condition-1: If the number of iterations in the mailbox is equal to
+	 * the mailbox head which would indicate that we have reached the end
+	 * of the static snapshot.
+	 * Condition-2: If the maximum messages serviced in the mailbox for a
+	 * given interrupt is the highest possible value then there is no need
+	 * to check if the number of messages processed is equal to it. If not
+	 * check if the number of messages processed is greater than or equal
+	 * to the maximum number of mailbox entries serviced in current work item.
+	 */
+	if (num_iterations == snap_buf->head ||
+	    (snap_buf->max_num_msgs_mbx < ICE_IGNORE_MAX_MSG_CNT &&
+	     ++snap_buf->num_msg_proc >= snap_buf->max_num_msgs_mbx))
+		*new_state = ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT;
+}
+
+/**
+ * ice_mbx_detect_malvf - Detect malicious VF in snapshot
+ * @hw: pointer to the HW struct
+ * @vf_info: mailbox tracking structure for a VF
+ * @new_state: new algorithm state
+ * @is_malvf: boolean output to indicate if VF is malicious
+ *
+ * This function tracks the number of asynchronous messages
+ * sent per VF and marks the VF as malicious if it exceeds
+ * the permissible number of messages to send.
+ */
+static int
+ice_mbx_detect_malvf(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info,
+		     enum ice_mbx_snapshot_state *new_state,
+		     bool *is_malvf)
+{
+	/* increment the message count for this VF */
+	vf_info->msg_count++;
+
+	if (vf_info->msg_count >= ICE_ASYNC_VF_MSG_THRESHOLD)
+		*is_malvf = true;
+
+	/* continue to iterate through the mailbox snapshot */
+	ice_mbx_traverse(hw, new_state);
+
+	return 0;
+}
+
+/**
+ * ice_mbx_vf_state_handler - Handle states of the overflow algorithm
+ * @hw: pointer to the HW struct
+ * @mbx_data: pointer to structure containing mailbox data
+ * @vf_info: mailbox tracking structure for the VF in question
+ * @report_malvf: boolean output to indicate whether VF should be reported
+ *
+ * The function serves as an entry point for the malicious VF
+ * detection algorithm by handling the different states and state
+ * transitions of the algorithm:
+ * New snapshot: This state is entered when creating a new static
+ * snapshot. The data from any previous mailbox snapshot is
+ * cleared and a new capture of the mailbox head and tail is
+ * logged. This will be the new static snapshot to detect
+ * asynchronous messages sent by VFs. On capturing the snapshot
+ * and depending on whether the number of pending messages in that
+ * snapshot exceed the watermark value, the state machine enters
+ * traverse or detect states.
+ * Traverse: If pending message count is below watermark then iterate
+ * through the snapshot without any action on VF.
+ * Detect: If pending message count exceeds watermark traverse
+ * the static snapshot and look for a malicious VF.
+ */
+int
+ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
+			 struct ice_mbx_vf_info *vf_info, bool *report_malvf)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+	struct ice_mbx_snap_buffer_data *snap_buf;
+	struct ice_ctl_q_info *cq = &hw->mailboxq;
+	enum ice_mbx_snapshot_state new_state;
+	bool is_malvf = false;
+	int status = 0;
+
+	if (!report_malvf || !mbx_data || !vf_info)
+		return -EINVAL;
+
+	*report_malvf = false;
+
+	/* When entering the mailbox state machine assume that the VF
+	 * is not malicious until detected.
+	 */
+	 /* Checking if max messages allowed to be processed while servicing current
+	  * interrupt is not less than the defined AVF message threshold.
+	  */
+	if (mbx_data->max_num_msgs_mbx <= ICE_ASYNC_VF_MSG_THRESHOLD)
+		return -EINVAL;
+
+	/* The watermark value should not be lesser than the threshold limit
+	 * set for the number of asynchronous messages a VF can send to mailbox
+	 * nor should it be greater than the maximum number of messages in the
+	 * mailbox serviced in current interrupt.
+	 */
+	if (mbx_data->async_watermark_val < ICE_ASYNC_VF_MSG_THRESHOLD ||
+	    mbx_data->async_watermark_val > mbx_data->max_num_msgs_mbx)
+		return -EINVAL;
+
+	new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+	snap_buf = &snap->mbx_buf;
+
+	switch (snap_buf->state) {
+	case ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT:
+		/* Clear any previously held data in mailbox snapshot structure. */
+		ice_mbx_reset_snapshot(snap);
+
+		/* Collect the pending ARQ count, number of messages processed and
+		 * the maximum number of messages allowed to be processed from the
+		 * Mailbox for current interrupt.
+		 */
+		snap_buf->num_pending_arq = mbx_data->num_pending_arq;
+		snap_buf->num_msg_proc = mbx_data->num_msg_proc;
+		snap_buf->max_num_msgs_mbx = mbx_data->max_num_msgs_mbx;
+
+		/* Capture a new static snapshot of the mailbox by logging the
+		 * head and tail of snapshot and set num_iterations to the tail
+		 * value to mark the start of the iteration through the snapshot.
+		 */
+		snap_buf->head = ICE_RQ_DATA_MASK(cq->rq.next_to_clean +
+						  mbx_data->num_pending_arq);
+		snap_buf->tail = ICE_RQ_DATA_MASK(cq->rq.next_to_clean - 1);
+		snap_buf->num_iterations = snap_buf->tail;
+
+		/* Pending ARQ messages returned by ice_clean_rq_elem
+		 * is the difference between the head and tail of the
+		 * mailbox queue. Comparing this value against the watermark
+		 * helps to check if we potentially have malicious VFs.
+		 */
+		if (snap_buf->num_pending_arq >=
+		    mbx_data->async_watermark_val) {
+			new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+			status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf);
+		} else {
+			new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+			ice_mbx_traverse(hw, &new_state);
+		}
+		break;
+
+	case ICE_MAL_VF_DETECT_STATE_TRAVERSE:
+		new_state = ICE_MAL_VF_DETECT_STATE_TRAVERSE;
+		ice_mbx_traverse(hw, &new_state);
+		break;
+
+	case ICE_MAL_VF_DETECT_STATE_DETECT:
+		new_state = ICE_MAL_VF_DETECT_STATE_DETECT;
+		status = ice_mbx_detect_malvf(hw, vf_info, &new_state, &is_malvf);
+		break;
+
+	default:
+		new_state = ICE_MAL_VF_DETECT_STATE_INVALID;
+		status = -EIO;
+	}
+
+	snap_buf->state = new_state;
+
+	/* Only report VFs as malicious the first time we detect it */
+	if (is_malvf && !vf_info->malicious) {
+		vf_info->malicious = 1;
+		*report_malvf = true;
+	}
+
+	return status;
+}
+
+/**
+ * ice_mbx_clear_malvf - Clear VF mailbox info
+ * @vf_info: the mailbox tracking structure for a VF
+ *
+ * In case of a VF reset, this function shall be called to clear the VF's
+ * current mailbox tracking state.
+ */
+void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info)
+{
+	vf_info->malicious = 0;
+	vf_info->msg_count = 0;
+}
+
+/**
+ * ice_mbx_init_vf_info - Initialize a new VF mailbox tracking info
+ * @hw: pointer to the hardware structure
+ * @vf_info: the mailbox tracking info structure for a VF
+ *
+ * Initialize a VF mailbox tracking info structure and insert it into the
+ * snapshot list.
+ *
+ * If you remove the VF, you must also delete the associated VF info structure
+ * from the linked list.
+ */
+void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+	ice_mbx_clear_malvf(vf_info);
+	list_add(&vf_info->list_entry, &snap->mbx_vf);
+}
+
+/**
+ * ice_mbx_init_snapshot - Initialize mailbox snapshot data
+ * @hw: pointer to the hardware structure
+ *
+ * Clear the mailbox snapshot structure and initialize the VF mailbox list.
+ */
+void ice_mbx_init_snapshot(struct ice_hw *hw)
+{
+	struct ice_mbx_snapshot *snap = &hw->mbx_snapshot;
+
+	INIT_LIST_HEAD(&snap->mbx_vf);
+	ice_mbx_reset_snapshot(snap);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.h b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h
new file mode 100644
index 0000000000..44bc030d17
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _ICE_VF_MBX_H_
+#define _ICE_VF_MBX_H_
+
+#include "ice_type.h"
+#include "ice_controlq.h"
+
+/* Defining the mailbox message threshold as 63 asynchronous
+ * pending messages. Normal VF functionality does not require
+ * sending more than 63 asynchronous pending message.
+ */
+#define ICE_ASYNC_VF_MSG_THRESHOLD	63
+
+#ifdef CONFIG_PCI_IOV
+int
+ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval,
+		      u8 *msg, u16 msglen, struct ice_sq_cd *cd);
+
+u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed);
+int
+ice_mbx_vf_state_handler(struct ice_hw *hw, struct ice_mbx_data *mbx_data,
+			 struct ice_mbx_vf_info *vf_info, bool *report_malvf);
+void ice_mbx_clear_malvf(struct ice_mbx_vf_info *vf_info);
+void ice_mbx_init_vf_info(struct ice_hw *hw, struct ice_mbx_vf_info *vf_info);
+void ice_mbx_init_snapshot(struct ice_hw *hw);
+#else /* CONFIG_PCI_IOV */
+static inline int
+ice_aq_send_msg_to_vf(struct ice_hw __always_unused *hw,
+		      u16 __always_unused vfid, u32 __always_unused v_opcode,
+		      u32 __always_unused v_retval, u8 __always_unused *msg,
+		      u16 __always_unused msglen,
+		      struct ice_sq_cd __always_unused *cd)
+{
+	return 0;
+}
+
+static inline u32
+ice_conv_link_speed_to_virtchnl(bool __always_unused adv_link_support,
+				u16 __always_unused link_speed)
+{
+	return 0;
+}
+
+static inline void ice_mbx_init_snapshot(struct ice_hw *hw)
+{
+}
+
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_VF_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
new file mode 100644
index 0000000000..80dc4bcdd3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_ops.h"
+#include "ice_vsi_vlan_lib.h"
+#include "ice_vlan_mode.h"
+#include "ice.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_sriov.h"
+
+static int
+noop_vlan_arg(struct ice_vsi __always_unused *vsi,
+	      struct ice_vlan __always_unused *vlan)
+{
+	return 0;
+}
+
+static int
+noop_vlan(struct ice_vsi __always_unused *vsi)
+{
+	return 0;
+}
+
+static void ice_port_vlan_on(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+	struct ice_pf *pf = vsi->back;
+
+	if (ice_is_dvm_ena(&pf->hw)) {
+		vlan_ops = &vsi->outer_vlan_ops;
+
+		/* setup outer VLAN ops */
+		vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+		vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+
+		/* setup inner VLAN ops */
+		vlan_ops = &vsi->inner_vlan_ops;
+		vlan_ops->add_vlan = noop_vlan_arg;
+		vlan_ops->del_vlan = noop_vlan_arg;
+		vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+		vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+		vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+		vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+	} else {
+		vlan_ops = &vsi->inner_vlan_ops;
+
+		vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
+		vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
+	}
+
+	/* all Rx traffic should be in the domain of the assigned port VLAN,
+	 * so prevent disabling Rx VLAN filtering
+	 */
+	vlan_ops->dis_rx_filtering = noop_vlan;
+
+	vlan_ops->ena_rx_filtering = ice_vsi_ena_rx_vlan_filtering;
+}
+
+static void ice_port_vlan_off(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+	struct ice_pf *pf = vsi->back;
+
+	/* setup inner VLAN ops */
+	vlan_ops = &vsi->inner_vlan_ops;
+
+	vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
+	vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
+	vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
+	vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
+
+	if (ice_is_dvm_ena(&pf->hw)) {
+		vlan_ops = &vsi->outer_vlan_ops;
+
+		vlan_ops->del_vlan = ice_vsi_del_vlan;
+		vlan_ops->ena_stripping = ice_vsi_ena_outer_stripping;
+		vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+		vlan_ops->ena_insertion = ice_vsi_ena_outer_insertion;
+		vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+	} else {
+		vlan_ops->del_vlan = ice_vsi_del_vlan;
+	}
+
+	vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+
+	if (!test_bit(ICE_FLAG_VF_VLAN_PRUNING, pf->flags))
+		vlan_ops->ena_rx_filtering = noop_vlan;
+	else
+		vlan_ops->ena_rx_filtering =
+			ice_vsi_ena_rx_vlan_filtering;
+}
+
+/**
+ * ice_vf_vsi_enable_port_vlan - Set VSI VLAN ops to support port VLAN
+ * @vsi: VF's VSI being configured
+ *
+ * The function won't create port VLAN, it only allows to create port VLAN
+ * using VLAN ops on the VF VSI.
+ */
+void ice_vf_vsi_enable_port_vlan(struct ice_vsi *vsi)
+{
+	if (WARN_ON_ONCE(!vsi->vf))
+		return;
+
+	ice_port_vlan_on(vsi);
+}
+
+/**
+ * ice_vf_vsi_disable_port_vlan - Clear VSI support for creating port VLAN
+ * @vsi: VF's VSI being configured
+ *
+ * The function should be called after removing port VLAN on VSI
+ * (using VLAN ops)
+ */
+void ice_vf_vsi_disable_port_vlan(struct ice_vsi *vsi)
+{
+	if (WARN_ON_ONCE(!vsi->vf))
+		return;
+
+	ice_port_vlan_off(vsi);
+}
+
+/**
+ * ice_vf_vsi_init_vlan_ops - Initialize default VSI VLAN ops for VF VSI
+ * @vsi: VF's VSI being configured
+ *
+ * If Double VLAN Mode (DVM) is enabled, assume that the VF supports the new
+ * VIRTCHNL_VF_VLAN_OFFLOAD_V2 capability and set up the VLAN ops accordingly.
+ * If SVM is enabled maintain the same level of VLAN support previous to
+ * VIRTCHNL_VF_VLAN_OFFLOAD_V2.
+ */
+void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+	struct ice_pf *pf = vsi->back;
+	struct ice_vf *vf = vsi->vf;
+
+	if (WARN_ON(!vf))
+		return;
+
+	if (ice_vf_is_port_vlan_ena(vf))
+		ice_port_vlan_on(vsi);
+	else
+		ice_port_vlan_off(vsi);
+
+	vlan_ops = ice_is_dvm_ena(&pf->hw) ?
+		&vsi->outer_vlan_ops : &vsi->inner_vlan_ops;
+
+	vlan_ops->add_vlan = ice_vsi_add_vlan;
+	vlan_ops->ena_tx_filtering = ice_vsi_ena_tx_vlan_filtering;
+	vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+}
+
+/**
+ * ice_vf_vsi_cfg_dvm_legacy_vlan_mode - Config VLAN mode for old VFs in DVM
+ * @vsi: VF's VSI being configured
+ *
+ * This should only be called when Double VLAN Mode (DVM) is enabled, there
+ * is not a port VLAN enabled on this VF, and the VF negotiates
+ * VIRTCHNL_VF_OFFLOAD_VLAN.
+ *
+ * This function sets up the VF VSI's inner and outer ice_vsi_vlan_ops and also
+ * initializes software only VLAN mode (i.e. allow all VLANs). Also, use no-op
+ * implementations for any functions that may be called during the lifetime of
+ * the VF so these methods do nothing and succeed.
+ */
+void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi)
+{
+	struct ice_vsi_vlan_ops *vlan_ops;
+	struct ice_vf *vf = vsi->vf;
+	struct device *dev;
+
+	if (WARN_ON(!vf))
+		return;
+
+	dev = ice_pf_to_dev(vf->pf);
+
+	if (!ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf))
+		return;
+
+	vlan_ops = &vsi->outer_vlan_ops;
+
+	/* Rx VLAN filtering always disabled to allow software offloaded VLANs
+	 * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a
+	 * port VLAN configured
+	 */
+	vlan_ops->dis_rx_filtering = ice_vsi_dis_rx_vlan_filtering;
+	/* Don't fail when attempting to enable Rx VLAN filtering */
+	vlan_ops->ena_rx_filtering = noop_vlan;
+
+	/* Tx VLAN filtering always disabled to allow software offloaded VLANs
+	 * for VFs that only support VIRTCHNL_VF_OFFLOAD_VLAN and don't have a
+	 * port VLAN configured
+	 */
+	vlan_ops->dis_tx_filtering = ice_vsi_dis_tx_vlan_filtering;
+	/* Don't fail when attempting to enable Tx VLAN filtering */
+	vlan_ops->ena_tx_filtering = noop_vlan;
+
+	if (vlan_ops->dis_rx_filtering(vsi))
+		dev_dbg(dev, "Failed to disable Rx VLAN filtering for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+	if (vlan_ops->dis_tx_filtering(vsi))
+		dev_dbg(dev, "Failed to disable Tx VLAN filtering for old VF without VIRTHCNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+	/* All outer VLAN offloads must be disabled */
+	vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+	vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+	if (vlan_ops->dis_stripping(vsi))
+		dev_dbg(dev, "Failed to disable outer VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+	if (vlan_ops->dis_insertion(vsi))
+		dev_dbg(dev, "Failed to disable outer VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+	/* All inner VLAN offloads must be disabled */
+	vlan_ops = &vsi->inner_vlan_ops;
+
+	vlan_ops->dis_stripping = ice_vsi_dis_outer_stripping;
+	vlan_ops->dis_insertion = ice_vsi_dis_outer_insertion;
+
+	if (vlan_ops->dis_stripping(vsi))
+		dev_dbg(dev, "Failed to disable inner VLAN stripping for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+
+	if (vlan_ops->dis_insertion(vsi))
+		dev_dbg(dev, "Failed to disable inner VLAN insertion for old VF without VIRTCHNL_VF_OFFLOAD_VLAN_V2 support\n");
+}
+
+/**
+ * ice_vf_vsi_cfg_svm_legacy_vlan_mode - Config VLAN mode for old VFs in SVM
+ * @vsi: VF's VSI being configured
+ *
+ * This should only be called when Single VLAN Mode (SVM) is enabled, there is
+ * not a port VLAN enabled on this VF, and the VF negotiates
+ * VIRTCHNL_VF_OFFLOAD_VLAN.
+ *
+ * All of the normal SVM VLAN ops are identical for this case. However, by
+ * default Rx VLAN filtering should be turned off by default in this case.
+ */
+void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi)
+{
+	struct ice_vf *vf = vsi->vf;
+
+	if (WARN_ON(!vf))
+		return;
+
+	if (ice_is_dvm_ena(&vsi->back->hw) || ice_vf_is_port_vlan_ena(vf))
+		return;
+
+	if (vsi->inner_vlan_ops.dis_rx_filtering(vsi))
+		dev_dbg(ice_pf_to_dev(vf->pf), "Failed to disable Rx VLAN filtering for old VF with VIRTCHNL_VF_OFFLOAD_VLAN support\n");
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
new file mode 100644
index 0000000000..df8aa09df3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VF_VSI_VLAN_OPS_H_
+#define _ICE_VF_VSI_VLAN_OPS_H_
+
+#include "ice_vsi_vlan_ops.h"
+
+struct ice_vsi;
+
+void ice_vf_vsi_cfg_dvm_legacy_vlan_mode(struct ice_vsi *vsi);
+void ice_vf_vsi_cfg_svm_legacy_vlan_mode(struct ice_vsi *vsi);
+
+#ifdef CONFIG_PCI_IOV
+void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi);
+void ice_vf_vsi_enable_port_vlan(struct ice_vsi *vsi);
+void ice_vf_vsi_disable_port_vlan(struct ice_vsi *vsi);
+#else
+static inline void ice_vf_vsi_init_vlan_ops(struct ice_vsi *vsi) { }
+static inline void ice_vf_vsi_enable_port_vlan(struct ice_vsi *vsi) { }
+static inline void ice_vf_vsi_disable_port_vlan(struct ice_vsi *vsi) { }
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_PF_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
new file mode 100644
index 0000000000..62337e6569
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -0,0 +1,4089 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022, Intel Corporation. */
+
+#include "ice_virtchnl.h"
+#include "ice_vf_lib_private.h"
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice_virtchnl_allowlist.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_vlan.h"
+#include "ice_flex_pipe.h"
+#include "ice_dcb_lib.h"
+
+#define FIELD_SELECTOR(proto_hdr_field) \
+		BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK)
+
+struct ice_vc_hdr_match_type {
+	u32 vc_hdr;	/* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */
+	u32 ice_hdr;	/* ice headers (ICE_FLOW_SEG_HDR_XXX) */
+};
+
+static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = {
+	{VIRTCHNL_PROTO_HDR_NONE,	ICE_FLOW_SEG_HDR_NONE},
+	{VIRTCHNL_PROTO_HDR_ETH,	ICE_FLOW_SEG_HDR_ETH},
+	{VIRTCHNL_PROTO_HDR_S_VLAN,	ICE_FLOW_SEG_HDR_VLAN},
+	{VIRTCHNL_PROTO_HDR_C_VLAN,	ICE_FLOW_SEG_HDR_VLAN},
+	{VIRTCHNL_PROTO_HDR_IPV4,	ICE_FLOW_SEG_HDR_IPV4 |
+					ICE_FLOW_SEG_HDR_IPV_OTHER},
+	{VIRTCHNL_PROTO_HDR_IPV6,	ICE_FLOW_SEG_HDR_IPV6 |
+					ICE_FLOW_SEG_HDR_IPV_OTHER},
+	{VIRTCHNL_PROTO_HDR_TCP,	ICE_FLOW_SEG_HDR_TCP},
+	{VIRTCHNL_PROTO_HDR_UDP,	ICE_FLOW_SEG_HDR_UDP},
+	{VIRTCHNL_PROTO_HDR_SCTP,	ICE_FLOW_SEG_HDR_SCTP},
+	{VIRTCHNL_PROTO_HDR_PPPOE,	ICE_FLOW_SEG_HDR_PPPOE},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP,	ICE_FLOW_SEG_HDR_GTPU_IP},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH,	ICE_FLOW_SEG_HDR_GTPU_EH},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
+					ICE_FLOW_SEG_HDR_GTPU_DWN},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
+					ICE_FLOW_SEG_HDR_GTPU_UP},
+	{VIRTCHNL_PROTO_HDR_L2TPV3,	ICE_FLOW_SEG_HDR_L2TPV3},
+	{VIRTCHNL_PROTO_HDR_ESP,	ICE_FLOW_SEG_HDR_ESP},
+	{VIRTCHNL_PROTO_HDR_AH,		ICE_FLOW_SEG_HDR_AH},
+	{VIRTCHNL_PROTO_HDR_PFCP,	ICE_FLOW_SEG_HDR_PFCP_SESSION},
+};
+
+struct ice_vc_hash_field_match_type {
+	u32 vc_hdr;		/* virtchnl headers
+				 * (VIRTCHNL_PROTO_HDR_XXX)
+				 */
+	u32 vc_hash_field;	/* virtchnl hash fields selector
+				 * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX))
+				 */
+	u64 ice_hash_field;	/* ice hash fields
+				 * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX))
+				 */
+};
+
+static const struct
+ice_vc_hash_field_match_type ice_vc_hash_field_list[] = {
+	{VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)},
+	{VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)},
+	{VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST),
+		ICE_FLOW_HASH_ETH},
+	{VIRTCHNL_PROTO_HDR_ETH,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)},
+	{VIRTCHNL_PROTO_HDR_S_VLAN,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)},
+	{VIRTCHNL_PROTO_HDR_C_VLAN,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST),
+		ICE_FLOW_HASH_IPV4},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST),
+		ICE_FLOW_HASH_IPV6},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) |
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_TCP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT),
+		ICE_FLOW_HASH_TCP_PORT},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_UDP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT),
+		ICE_FLOW_HASH_UDP_PORT},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)},
+	{VIRTCHNL_PROTO_HDR_SCTP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) |
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT),
+		ICE_FLOW_HASH_SCTP_PORT},
+	{VIRTCHNL_PROTO_HDR_PPPOE,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)},
+	{VIRTCHNL_PROTO_HDR_L2TPV3,
+		FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)},
+	{VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)},
+	{VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)},
+	{VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID),
+		BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)},
+};
+
+/**
+ * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
+ * @pf: pointer to the PF structure
+ * @v_opcode: operation code
+ * @v_retval: return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ */
+static void
+ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
+		    enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+	struct ice_hw *hw = &pf->hw;
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf) {
+		/* Not all vfs are enabled so skip the ones that are not */
+		if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
+		    !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			continue;
+
+		/* Ignore return value on purpose - a given VF may fail, but
+		 * we need to keep going and send to all of them
+		 */
+		ice_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval, msg,
+				      msglen, NULL);
+	}
+	mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_set_pfe_link - Set the link speed/status of the virtchnl_pf_event
+ * @vf: pointer to the VF structure
+ * @pfe: pointer to the virtchnl_pf_event to set link speed/status for
+ * @ice_link_speed: link speed specified by ICE_AQ_LINK_SPEED_*
+ * @link_up: whether or not to set the link up/down
+ */
+static void
+ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
+		 int ice_link_speed, bool link_up)
+{
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED) {
+		pfe->event_data.link_event_adv.link_status = link_up;
+		/* Speed in Mbps */
+		pfe->event_data.link_event_adv.link_speed =
+			ice_conv_link_speed_to_virtchnl(true, ice_link_speed);
+	} else {
+		pfe->event_data.link_event.link_status = link_up;
+		/* Legacy method for virtchnl link speeds */
+		pfe->event_data.link_event.link_speed =
+			(enum virtchnl_link_speed)
+			ice_conv_link_speed_to_virtchnl(false, ice_link_speed);
+	}
+}
+
+/**
+ * ice_vc_notify_vf_link_state - Inform a VF of link status
+ * @vf: pointer to the VF structure
+ *
+ * send a link status message to a single VF
+ */
+void ice_vc_notify_vf_link_state(struct ice_vf *vf)
+{
+	struct virtchnl_pf_event pfe = { 0 };
+	struct ice_hw *hw = &vf->pf->hw;
+
+	pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
+	pfe.severity = PF_EVENT_SEVERITY_INFO;
+
+	if (ice_is_vf_link_up(vf))
+		ice_set_pfe_link(vf, &pfe,
+				 hw->port_info->phy.link_info.link_speed, true);
+	else
+		ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
+
+	ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
+			      VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
+			      sizeof(pfe), NULL);
+}
+
+/**
+ * ice_vc_notify_link_state - Inform all VFs on a PF of link status
+ * @pf: pointer to the PF structure
+ */
+void ice_vc_notify_link_state(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf)
+		ice_vc_notify_vf_link_state(vf);
+	mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_vc_notify_reset - Send pending reset message to all VFs
+ * @pf: pointer to the PF structure
+ *
+ * indicate a pending reset to all VFs on a given PF
+ */
+void ice_vc_notify_reset(struct ice_pf *pf)
+{
+	struct virtchnl_pf_event pfe;
+
+	if (!ice_has_vfs(pf))
+		return;
+
+	pfe.event = VIRTCHNL_EVENT_RESET_IMPENDING;
+	pfe.severity = PF_EVENT_SEVERITY_CERTAIN_DOOM;
+	ice_vc_vf_broadcast(pf, VIRTCHNL_OP_EVENT, VIRTCHNL_STATUS_SUCCESS,
+			    (u8 *)&pfe, sizeof(struct virtchnl_pf_event));
+}
+
+/**
+ * ice_vc_send_msg_to_vf - Send message to VF
+ * @vf: pointer to the VF info
+ * @v_opcode: virtual channel opcode
+ * @v_retval: virtual channel return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send msg to VF
+ */
+int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+	struct device *dev;
+	struct ice_pf *pf;
+	int aq_ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+
+	aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
+				       msg, msglen, NULL);
+	if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
+		dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %s\n",
+			 vf->vf_id, aq_ret,
+			 ice_aq_str(pf->hw.mailboxq.sq_last_status));
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_get_ver_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request the API version used by the PF
+ */
+static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_version_info info = {
+		VIRTCHNL_VERSION_MAJOR, VIRTCHNL_VERSION_MINOR
+	};
+
+	vf->vf_ver = *(struct virtchnl_version_info *)msg;
+	/* VFs running the 1.0 API expect to get 1.0 back or they will cry. */
+	if (VF_IS_V10(&vf->vf_ver))
+		info.minor = VIRTCHNL_VERSION_MINOR_NO_VF_CAPS;
+
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_VERSION,
+				     VIRTCHNL_STATUS_SUCCESS, (u8 *)&info,
+				     sizeof(struct virtchnl_version_info));
+}
+
+/**
+ * ice_vc_get_max_frame_size - get max frame size allowed for VF
+ * @vf: VF used to determine max frame size
+ *
+ * Max frame size is determined based on the current port's max frame size and
+ * whether a port VLAN is configured on this VF. The VF is not aware whether
+ * it's in a port VLAN so the PF needs to account for this in max frame size
+ * checks and sending the max frame size to the VF.
+ */
+static u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
+{
+	struct ice_port_info *pi = ice_vf_get_port_info(vf);
+	u16 max_frame_size;
+
+	max_frame_size = pi->phy.link_info.max_frame_size;
+
+	if (ice_vf_is_port_vlan_ena(vf))
+		max_frame_size -= VLAN_HLEN;
+
+	return max_frame_size;
+}
+
+/**
+ * ice_vc_get_vlan_caps
+ * @hw: pointer to the hw
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VSI
+ * @driver_caps: current driver caps
+ *
+ * Return 0 if there is no VLAN caps supported, or VLAN caps value
+ */
+static u32
+ice_vc_get_vlan_caps(struct ice_hw *hw, struct ice_vf *vf, struct ice_vsi *vsi,
+		     u32 driver_caps)
+{
+	if (ice_is_eswitch_mode_switchdev(vf->pf))
+		/* In switchdev setting VLAN from VF isn't supported */
+		return 0;
+
+	if (driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN_V2) {
+		/* VLAN offloads based on current device configuration */
+		return VIRTCHNL_VF_OFFLOAD_VLAN_V2;
+	} else if (driver_caps & VIRTCHNL_VF_OFFLOAD_VLAN) {
+		/* allow VF to negotiate VIRTCHNL_VF_OFFLOAD explicitly for
+		 * these two conditions, which amounts to guest VLAN filtering
+		 * and offloads being based on the inner VLAN or the
+		 * inner/single VLAN respectively and don't allow VF to
+		 * negotiate VIRTCHNL_VF_OFFLOAD in any other cases
+		 */
+		if (ice_is_dvm_ena(hw) && ice_vf_is_port_vlan_ena(vf)) {
+			return VIRTCHNL_VF_OFFLOAD_VLAN;
+		} else if (!ice_is_dvm_ena(hw) &&
+			   !ice_vf_is_port_vlan_ena(vf)) {
+			/* configure backward compatible support for VFs that
+			 * only support VIRTCHNL_VF_OFFLOAD_VLAN, the PF is
+			 * configured in SVM, and no port VLAN is configured
+			 */
+			ice_vf_vsi_cfg_svm_legacy_vlan_mode(vsi);
+			return VIRTCHNL_VF_OFFLOAD_VLAN;
+		} else if (ice_is_dvm_ena(hw)) {
+			/* configure software offloaded VLAN support when DVM
+			 * is enabled, but no port VLAN is enabled
+			 */
+			ice_vf_vsi_cfg_dvm_legacy_vlan_mode(vsi);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_get_vf_res_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to request its resources
+ */
+static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vf_resource *vfres = NULL;
+	struct ice_hw *hw = &vf->pf->hw;
+	struct ice_vsi *vsi;
+	int len = 0;
+	int ret;
+
+	if (ice_check_vf_init(vf)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	len = virtchnl_struct_size(vfres, vsi_res, 0);
+
+	vfres = kzalloc(len, GFP_KERNEL);
+	if (!vfres) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		len = 0;
+		goto err;
+	}
+	if (VF_IS_V11(&vf->vf_ver))
+		vf->driver_caps = *(u32 *)msg;
+	else
+		vf->driver_caps = VIRTCHNL_VF_OFFLOAD_L2 |
+				  VIRTCHNL_VF_OFFLOAD_RSS_REG |
+				  VIRTCHNL_VF_OFFLOAD_VLAN;
+
+	vfres->vf_cap_flags = VIRTCHNL_VF_OFFLOAD_L2;
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	vfres->vf_cap_flags |= ice_vc_get_vlan_caps(hw, vf, vsi,
+						    vf->driver_caps);
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PF;
+	} else {
+		if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_AQ)
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_AQ;
+		else
+			vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG;
+	}
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_POLLING)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+
+	if (vf->driver_caps & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_CAP_ADV_LINK_SPEED;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF;
+
+	if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO)
+		vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_USO;
+
+	vfres->num_vsis = 1;
+	/* Tx and Rx queue are equal for VF */
+	vfres->num_queue_pairs = vsi->num_txq;
+	vfres->max_vectors = vf->pf->vfs.num_msix_per;
+	vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE;
+	vfres->rss_lut_size = ICE_LUT_VSI_SIZE;
+	vfres->max_mtu = ice_vc_get_max_frame_size(vf);
+
+	vfres->vsi_res[0].vsi_id = vf->lan_vsi_num;
+	vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
+	vfres->vsi_res[0].num_queue_pairs = vsi->num_txq;
+	ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
+			vf->hw_lan_addr);
+
+	/* match guest capabilities */
+	vf->driver_caps = vfres->vf_cap_flags;
+
+	ice_vc_set_caps_allowlist(vf);
+	ice_vc_set_working_allowlist(vf);
+
+	set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
+
+err:
+	/* send the response back to the VF */
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_VF_RESOURCES, v_ret,
+				    (u8 *)vfres, len);
+
+	kfree(vfres);
+	return ret;
+}
+
+/**
+ * ice_vc_reset_vf_msg
+ * @vf: pointer to the VF info
+ *
+ * called from the VF to reset itself,
+ * unlike other virtchnl messages, PF driver
+ * doesn't send the response back to the VF
+ */
+static void ice_vc_reset_vf_msg(struct ice_vf *vf)
+{
+	if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
+		ice_reset_vf(vf, 0);
+}
+
+/**
+ * ice_vc_isvalid_vsi_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VF relative VSI ID
+ *
+ * check for the valid VSI ID
+ */
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = ice_find_vsi(pf, vsi_id);
+
+	return (vsi && (vsi->vf == vf));
+}
+
+/**
+ * ice_vc_isvalid_q_id
+ * @vf: pointer to the VF info
+ * @vsi_id: VSI ID
+ * @qid: VSI relative queue ID
+ *
+ * check for the valid queue ID
+ */
+static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid)
+{
+	struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id);
+	/* allocated Tx and Rx queues should be always equal for VF VSI */
+	return (vsi && (qid < vsi->alloc_txq));
+}
+
+/**
+ * ice_vc_isvalid_ring_len
+ * @ring_len: length of ring
+ *
+ * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE
+ * or zero
+ */
+static bool ice_vc_isvalid_ring_len(u16 ring_len)
+{
+	return ring_len == 0 ||
+	       (ring_len >= ICE_MIN_NUM_DESC &&
+		ring_len <= ICE_MAX_NUM_DESC &&
+		!(ring_len % ICE_REQ_DESC_MULTIPLE));
+}
+
+/**
+ * ice_vc_validate_pattern
+ * @vf: pointer to the VF info
+ * @proto: virtchnl protocol headers
+ *
+ * validate the pattern is supported or not.
+ *
+ * Return: true on success, false on error.
+ */
+bool
+ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto)
+{
+	bool is_ipv4 = false;
+	bool is_ipv6 = false;
+	bool is_udp = false;
+	u16 ptype = -1;
+	int i = 0;
+
+	while (i < proto->count &&
+	       proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) {
+		switch (proto->proto_hdr[i].type) {
+		case VIRTCHNL_PROTO_HDR_ETH:
+			ptype = ICE_PTYPE_MAC_PAY;
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV4:
+			ptype = ICE_PTYPE_IPV4_PAY;
+			is_ipv4 = true;
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV6:
+			ptype = ICE_PTYPE_IPV6_PAY;
+			is_ipv6 = true;
+			break;
+		case VIRTCHNL_PROTO_HDR_UDP:
+			if (is_ipv4)
+				ptype = ICE_PTYPE_IPV4_UDP_PAY;
+			else if (is_ipv6)
+				ptype = ICE_PTYPE_IPV6_UDP_PAY;
+			is_udp = true;
+			break;
+		case VIRTCHNL_PROTO_HDR_TCP:
+			if (is_ipv4)
+				ptype = ICE_PTYPE_IPV4_TCP_PAY;
+			else if (is_ipv6)
+				ptype = ICE_PTYPE_IPV6_TCP_PAY;
+			break;
+		case VIRTCHNL_PROTO_HDR_SCTP:
+			if (is_ipv4)
+				ptype = ICE_PTYPE_IPV4_SCTP_PAY;
+			else if (is_ipv6)
+				ptype = ICE_PTYPE_IPV6_SCTP_PAY;
+			break;
+		case VIRTCHNL_PROTO_HDR_GTPU_IP:
+		case VIRTCHNL_PROTO_HDR_GTPU_EH:
+			if (is_ipv4)
+				ptype = ICE_MAC_IPV4_GTPU;
+			else if (is_ipv6)
+				ptype = ICE_MAC_IPV6_GTPU;
+			goto out;
+		case VIRTCHNL_PROTO_HDR_L2TPV3:
+			if (is_ipv4)
+				ptype = ICE_MAC_IPV4_L2TPV3;
+			else if (is_ipv6)
+				ptype = ICE_MAC_IPV6_L2TPV3;
+			goto out;
+		case VIRTCHNL_PROTO_HDR_ESP:
+			if (is_ipv4)
+				ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP :
+						ICE_MAC_IPV4_ESP;
+			else if (is_ipv6)
+				ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP :
+						ICE_MAC_IPV6_ESP;
+			goto out;
+		case VIRTCHNL_PROTO_HDR_AH:
+			if (is_ipv4)
+				ptype = ICE_MAC_IPV4_AH;
+			else if (is_ipv6)
+				ptype = ICE_MAC_IPV6_AH;
+			goto out;
+		case VIRTCHNL_PROTO_HDR_PFCP:
+			if (is_ipv4)
+				ptype = ICE_MAC_IPV4_PFCP_SESSION;
+			else if (is_ipv6)
+				ptype = ICE_MAC_IPV6_PFCP_SESSION;
+			goto out;
+		default:
+			break;
+		}
+		i++;
+	}
+
+out:
+	return ice_hw_ptype_ena(&vf->pf->hw, ptype);
+}
+
+/**
+ * ice_vc_parse_rss_cfg - parses hash fields and headers from
+ * a specific virtchnl RSS cfg
+ * @hw: pointer to the hardware
+ * @rss_cfg: pointer to the virtchnl RSS cfg
+ * @addl_hdrs: pointer to the protocol header fields (ICE_FLOW_SEG_HDR_*)
+ * to configure
+ * @hash_flds: pointer to the hash bit fields (ICE_FLOW_HASH_*) to configure
+ *
+ * Return true if all the protocol header and hash fields in the RSS cfg could
+ * be parsed, else return false
+ *
+ * This function parses the virtchnl RSS cfg to be the intended
+ * hash fields and the intended header for RSS configuration
+ */
+static bool
+ice_vc_parse_rss_cfg(struct ice_hw *hw, struct virtchnl_rss_cfg *rss_cfg,
+		     u32 *addl_hdrs, u64 *hash_flds)
+{
+	const struct ice_vc_hash_field_match_type *hf_list;
+	const struct ice_vc_hdr_match_type *hdr_list;
+	int i, hf_list_len, hdr_list_len;
+
+	hf_list = ice_vc_hash_field_list;
+	hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list);
+	hdr_list = ice_vc_hdr_list;
+	hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list);
+
+	for (i = 0; i < rss_cfg->proto_hdrs.count; i++) {
+		struct virtchnl_proto_hdr *proto_hdr =
+					&rss_cfg->proto_hdrs.proto_hdr[i];
+		bool hdr_found = false;
+		int j;
+
+		/* Find matched ice headers according to virtchnl headers. */
+		for (j = 0; j < hdr_list_len; j++) {
+			struct ice_vc_hdr_match_type hdr_map = hdr_list[j];
+
+			if (proto_hdr->type == hdr_map.vc_hdr) {
+				*addl_hdrs |= hdr_map.ice_hdr;
+				hdr_found = true;
+			}
+		}
+
+		if (!hdr_found)
+			return false;
+
+		/* Find matched ice hash fields according to
+		 * virtchnl hash fields.
+		 */
+		for (j = 0; j < hf_list_len; j++) {
+			struct ice_vc_hash_field_match_type hf_map = hf_list[j];
+
+			if (proto_hdr->type == hf_map.vc_hdr &&
+			    proto_hdr->field_selector == hf_map.vc_hash_field) {
+				*hash_flds |= hf_map.ice_hash_field;
+				break;
+			}
+		}
+	}
+
+	return true;
+}
+
+/**
+ * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced
+ * RSS offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set,
+ * else return false
+ */
+static bool ice_vf_adv_rss_offload_ena(u32 caps)
+{
+	return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF);
+}
+
+/**
+ * ice_vc_handle_rss_cfg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the message buffer
+ * @add: add a RSS config if true, otherwise delete a RSS config
+ *
+ * This function adds/deletes a RSS config
+ */
+static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add)
+{
+	u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG;
+	struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg;
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_hw *hw = &vf->pf->hw;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+		goto error_param;
+	}
+
+	if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) {
+		dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS ||
+	    rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC ||
+	    rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) {
+		dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) {
+		struct ice_vsi_ctx *ctx;
+		u8 lut_type, hash_type;
+		int status;
+
+		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
+		hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_XOR :
+				ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
+
+		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+		if (!ctx) {
+			v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+			goto error_param;
+		}
+
+		ctx->info.q_opt_rss = ((lut_type <<
+					ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
+				       ICE_AQ_VSI_Q_OPT_RSS_LUT_M) |
+				       (hash_type &
+					ICE_AQ_VSI_Q_OPT_RSS_HASH_M);
+
+		/* Preserve existing queueing option setting */
+		ctx->info.q_opt_rss |= (vsi->info.q_opt_rss &
+					  ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M);
+		ctx->info.q_opt_tc = vsi->info.q_opt_tc;
+		ctx->info.q_opt_flags = vsi->info.q_opt_rss;
+
+		ctx->info.valid_sections =
+				cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+
+		status = ice_update_vsi(hw, vsi->idx, ctx, NULL);
+		if (status) {
+			dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n",
+				status, ice_aq_str(hw->adminq.sq_last_status));
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		} else {
+			vsi->info.q_opt_rss = ctx->info.q_opt_rss;
+		}
+
+		kfree(ctx);
+	} else {
+		u32 addl_hdrs = ICE_FLOW_SEG_HDR_NONE;
+		u64 hash_flds = ICE_HASH_INVALID;
+
+		if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &addl_hdrs,
+					  &hash_flds)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		if (add) {
+			if (ice_add_rss_cfg(hw, vsi->idx, hash_flds,
+					    addl_hdrs)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n",
+					vsi->vsi_num, v_ret);
+			}
+		} else {
+			int status;
+
+			status = ice_rem_rss_cfg(hw, vsi->idx, hash_flds,
+						 addl_hdrs);
+			/* We just ignore -ENOENT, because if two configurations
+			 * share the same profile remove one of them actually
+			 * removes both, since the profile is deleted.
+			 */
+			if (status && status != -ENOENT) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n",
+					vf->vf_id, status);
+			}
+		}
+	}
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_key
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS key
+ */
+static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_rss_key *vrk =
+		(struct virtchnl_rss_key *)msg;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_set_rss_key(vsi, vrk->key))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_config_rss_lut
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Configure the VF's RSS LUT
+ */
+static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg;
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vrl->lut_entries != ICE_LUT_VSI_SIZE) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (ice_set_rss_lut(vsi, vrl->lut, ICE_LUT_VSI_SIZE))
+		v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_promiscuous_mode_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure VF VSIs promiscuous mode
+ */
+static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	bool rm_promisc, alluni = false, allmulti = false;
+	struct virtchnl_promisc_info *info =
+	    (struct virtchnl_promisc_info *)msg;
+	struct ice_vsi_vlan_ops *vlan_ops;
+	int mcast_err = 0, ucast_err = 0;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	u8 mcast_m, ucast_m;
+	struct device *dev;
+	int ret = 0;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	dev = ice_pf_to_dev(pf);
+	if (!ice_is_vf_trusted(vf)) {
+		dev_err(dev, "Unprivileged VF %d is attempting to configure promiscuous mode\n",
+			vf->vf_id);
+		/* Leave v_ret alone, lie to the VF on purpose. */
+		goto error_param;
+	}
+
+	if (info->flags & FLAG_VF_UNICAST_PROMISC)
+		alluni = true;
+
+	if (info->flags & FLAG_VF_MULTICAST_PROMISC)
+		allmulti = true;
+
+	rm_promisc = !allmulti && !alluni;
+
+	vlan_ops = ice_get_compat_vsi_vlan_ops(vsi);
+	if (rm_promisc)
+		ret = vlan_ops->ena_rx_filtering(vsi);
+	else
+		ret = vlan_ops->dis_rx_filtering(vsi);
+	if (ret) {
+		dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n");
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	ice_vf_get_promisc_masks(vf, vsi, &ucast_m, &mcast_m);
+
+	if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
+		if (alluni) {
+			/* in this case we're turning on promiscuous mode */
+			ret = ice_set_dflt_vsi(vsi);
+		} else {
+			/* in this case we're turning off promiscuous mode */
+			if (ice_is_dflt_vsi_in_use(vsi->port_info))
+				ret = ice_clear_dflt_vsi(vsi);
+		}
+
+		/* in this case we're turning on/off only
+		 * allmulticast
+		 */
+		if (allmulti)
+			mcast_err = ice_vf_set_vsi_promisc(vf, vsi, mcast_m);
+		else
+			mcast_err = ice_vf_clear_vsi_promisc(vf, vsi, mcast_m);
+
+		if (ret) {
+			dev_err(dev, "Turning on/off promiscuous mode for VF %d failed, error: %d\n",
+				vf->vf_id, ret);
+			v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+			goto error_param;
+		}
+	} else {
+		if (alluni)
+			ucast_err = ice_vf_set_vsi_promisc(vf, vsi, ucast_m);
+		else
+			ucast_err = ice_vf_clear_vsi_promisc(vf, vsi, ucast_m);
+
+		if (allmulti)
+			mcast_err = ice_vf_set_vsi_promisc(vf, vsi, mcast_m);
+		else
+			mcast_err = ice_vf_clear_vsi_promisc(vf, vsi, mcast_m);
+
+		if (ucast_err || mcast_err)
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+	}
+
+	if (!mcast_err) {
+		if (allmulti &&
+		    !test_and_set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+			dev_info(dev, "VF %u successfully set multicast promiscuous mode\n",
+				 vf->vf_id);
+		else if (!allmulti &&
+			 test_and_clear_bit(ICE_VF_STATE_MC_PROMISC,
+					    vf->vf_states))
+			dev_info(dev, "VF %u successfully unset multicast promiscuous mode\n",
+				 vf->vf_id);
+	} else {
+		dev_err(dev, "Error while modifying multicast promiscuous mode for VF %u, error: %d\n",
+			vf->vf_id, mcast_err);
+	}
+
+	if (!ucast_err) {
+		if (alluni &&
+		    !test_and_set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states))
+			dev_info(dev, "VF %u successfully set unicast promiscuous mode\n",
+				 vf->vf_id);
+		else if (!alluni &&
+			 test_and_clear_bit(ICE_VF_STATE_UC_PROMISC,
+					    vf->vf_states))
+			dev_info(dev, "VF %u successfully unset unicast promiscuous mode\n",
+				 vf->vf_id);
+	} else {
+		dev_err(dev, "Error while modifying unicast promiscuous mode for VF %u, error: %d\n",
+			vf->vf_id, ucast_err);
+	}
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_get_stats_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to get VSI stats
+ */
+static int ice_vc_get_stats_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+		(struct virtchnl_queue_select *)msg;
+	struct ice_eth_stats stats = { 0 };
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	ice_update_eth_stats(vsi);
+
+	stats = vsi->eth_stats;
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_STATS, v_ret,
+				     (u8 *)&stats, sizeof(stats));
+}
+
+/**
+ * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL
+ * @vqs: virtchnl_queue_select structure containing bitmaps to validate
+ *
+ * Return true on successful validation, else false
+ */
+static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
+{
+	if ((!vqs->rx_queues && !vqs->tx_queues) ||
+	    vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) ||
+	    vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 pfq = vsi->txq_map[q_idx];
+	u32 reg;
+
+	reg = rd32(hw, QINT_TQCTL(pfq));
+
+	/* MSI-X index 0 in the VF's space is always for the OICR, which means
+	 * this is most likely a poll mode VF driver, so don't enable an
+	 * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+	 */
+	if (!(reg & QINT_TQCTL_MSIX_INDX_M))
+		return;
+
+	wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 pfq = vsi->rxq_map[q_idx];
+	u32 reg;
+
+	reg = rd32(hw, QINT_RQCTL(pfq));
+
+	/* MSI-X index 0 in the VF's space is always for the OICR, which means
+	 * this is most likely a poll mode VF driver, so don't enable an
+	 * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+	 */
+	if (!(reg & QINT_RQCTL_MSIX_INDX_M))
+		return;
+
+	wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vc_ena_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to enable all or specific queue(s)
+ */
+static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	/* Enable only Rx rings, Tx rings were enabled by the FW when the
+	 * Tx queue group list was configured and the context bits were
+	 * programmed using ice_vsi_cfg_txqs
+	 */
+	q_map = vqs->rx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->rxq_ena))
+			continue;
+
+		if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) {
+			dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n",
+				vf_q_id, vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		ice_vf_ena_rxq_interrupt(vsi, vf_q_id);
+		set_bit(vf_q_id, vf->rxq_ena);
+	}
+
+	q_map = vqs->tx_queues;
+	for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+		if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* Skip queue if enabled */
+		if (test_bit(vf_q_id, vf->txq_ena))
+			continue;
+
+		ice_vf_ena_txq_interrupt(vsi, vf_q_id);
+		set_bit(vf_q_id, vf->txq_ena);
+	}
+
+	/* Set flag to indicate that queues are enabled */
+	if (v_ret == VIRTCHNL_STATUS_SUCCESS)
+		set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vf_vsi_dis_single_txq - disable a single Tx queue
+ * @vf: VF to disable queue for
+ * @vsi: VSI for the VF
+ * @q_id: VF relative (0-based) queue ID
+ *
+ * Attempt to disable the Tx queue passed in. If the Tx queue was successfully
+ * disabled then clear q_id bit in the enabled queues bitmap and return
+ * success. Otherwise return error.
+ */
+static int
+ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id)
+{
+	struct ice_txq_meta txq_meta = { 0 };
+	struct ice_tx_ring *ring;
+	int err;
+
+	if (!test_bit(q_id, vf->txq_ena))
+		dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n",
+			q_id, vsi->vsi_num);
+
+	ring = vsi->tx_rings[q_id];
+	if (!ring)
+		return -EINVAL;
+
+	ice_fill_txq_meta(vsi, ring, &txq_meta);
+
+	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta);
+	if (err) {
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n",
+			q_id, vsi->vsi_num);
+		return err;
+	}
+
+	/* Clear enabled queues flag */
+	clear_bit(q_id, vf->txq_ena);
+
+	return 0;
+}
+
+/**
+ * ice_vc_dis_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to disable all or specific queue(s)
+ */
+static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_queue_select *vqs =
+	    (struct virtchnl_queue_select *)msg;
+	struct ice_vsi *vsi;
+	unsigned long q_map;
+	u16 vf_q_id;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) &&
+	    !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_validate_vqs_bitmaps(vqs)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vqs->tx_queues) {
+		q_map = vqs->tx_queues;
+
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+		}
+	}
+
+	q_map = vqs->rx_queues;
+	/* speed up Rx queue disable by batching them if possible */
+	if (q_map &&
+	    bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) {
+		if (ice_vsi_stop_all_rx_rings(vsi)) {
+			dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n",
+				vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF);
+	} else if (q_map) {
+		for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) {
+			if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Skip queue if not enabled */
+			if (!test_bit(vf_q_id, vf->rxq_ena))
+				continue;
+
+			if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id,
+						     true)) {
+				dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n",
+					vf_q_id, vsi->vsi_num);
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Clear enabled queues flag */
+			clear_bit(vf_q_id, vf->rxq_ena);
+		}
+	}
+
+	/* Clear enabled queues flag */
+	if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf))
+		clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states);
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_cfg_interrupt
+ * @vf: pointer to the VF info
+ * @vsi: the VSI being configured
+ * @vector_id: vector ID
+ * @map: vector map for mapping vectors to queues
+ * @q_vector: structure for interrupt vector
+ * configure the IRQ to queue map
+ */
+static int
+ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id,
+		  struct virtchnl_vector_map *map,
+		  struct ice_q_vector *q_vector)
+{
+	u16 vsi_q_id, vsi_q_id_idx;
+	unsigned long qmap;
+
+	q_vector->num_ring_rx = 0;
+	q_vector->num_ring_tx = 0;
+
+	qmap = map->rxq_map;
+	for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+		vsi_q_id = vsi_q_id_idx;
+
+		if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+			return VIRTCHNL_STATUS_ERR_PARAM;
+
+		q_vector->num_ring_rx++;
+		q_vector->rx.itr_idx = map->rxitr_idx;
+		vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+		ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
+				      q_vector->rx.itr_idx);
+	}
+
+	qmap = map->txq_map;
+	for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) {
+		vsi_q_id = vsi_q_id_idx;
+
+		if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id))
+			return VIRTCHNL_STATUS_ERR_PARAM;
+
+		q_vector->num_ring_tx++;
+		q_vector->tx.itr_idx = map->txitr_idx;
+		vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+		ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
+				      q_vector->tx.itr_idx);
+	}
+
+	return VIRTCHNL_STATUS_SUCCESS;
+}
+
+/**
+ * ice_vc_cfg_irq_map_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the IRQ to queue map
+ */
+static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	u16 num_q_vectors_mapped, vsi_id, vector_id;
+	struct virtchnl_irq_map_info *irqmap_info;
+	struct virtchnl_vector_map *map;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	int i;
+
+	irqmap_info = (struct virtchnl_irq_map_info *)msg;
+	num_q_vectors_mapped = irqmap_info->num_vectors;
+
+	/* Check to make sure number of VF vectors mapped is not greater than
+	 * number of VF vectors originally allocated, and check that
+	 * there is actually at least a single VF queue vector mapped
+	 */
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+	    pf->vfs.num_msix_per < num_q_vectors_mapped ||
+	    !num_q_vectors_mapped) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	for (i = 0; i < num_q_vectors_mapped; i++) {
+		struct ice_q_vector *q_vector;
+
+		map = &irqmap_info->vecmap[i];
+
+		vector_id = map->vector_id;
+		vsi_id = map->vsi_id;
+		/* vector_id is always 0-based for each VF, and can never be
+		 * larger than or equal to the max allowed interrupts per VF
+		 */
+		if (!(vector_id < pf->vfs.num_msix_per) ||
+		    !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+		    (!vector_id && (map->rxq_map || map->txq_map))) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* No need to map VF miscellaneous or rogue vector */
+		if (!vector_id)
+			continue;
+
+		/* Subtract non queue vector from vector_id passed by VF
+		 * to get actual number of VSI queue vector array index
+		 */
+		q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+		if (!q_vector) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		/* lookout for the invalid queue index */
+		v_ret = (enum virtchnl_status_code)
+			ice_cfg_interrupt(vf, vsi, vector_id, map, q_vector);
+		if (v_ret)
+			goto error_param;
+	}
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_cfg_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure the Rx/Tx queues
+ */
+static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_vsi_queue_config_info *qci =
+	    (struct virtchnl_vsi_queue_config_info *)msg;
+	struct virtchnl_queue_pair_info *qpi;
+	struct ice_pf *pf = vf->pf;
+	struct ice_lag *lag;
+	struct ice_vsi *vsi;
+	u8 act_prt, pri_prt;
+	int i = -1, q_idx;
+
+	lag = pf->lag;
+	mutex_lock(&pf->lag_mutex);
+	act_prt = ICE_LAG_INVALID_PORT;
+	pri_prt = pf->hw.port_info->lport;
+	if (lag && lag->bonded && lag->primary) {
+		act_prt = lag->active_port;
+		if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT &&
+		    lag->upper_netdev)
+			ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt);
+		else
+			act_prt = ICE_LAG_INVALID_PORT;
+	}
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+		goto error_param;
+
+	if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))
+		goto error_param;
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi)
+		goto error_param;
+
+	if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
+	    qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
+		dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
+			vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
+		goto error_param;
+	}
+
+	for (i = 0; i < qci->num_queue_pairs; i++) {
+		qpi = &qci->qpair[i];
+		if (qpi->txq.vsi_id != qci->vsi_id ||
+		    qpi->rxq.vsi_id != qci->vsi_id ||
+		    qpi->rxq.queue_id != qpi->txq.queue_id ||
+		    qpi->txq.headwb_enabled ||
+		    !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
+		    !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
+		    !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
+			goto error_param;
+		}
+
+		q_idx = qpi->rxq.queue_id;
+
+		/* make sure selected "q_idx" is in valid range of queues
+		 * for selected "vsi"
+		 */
+		if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
+			goto error_param;
+		}
+
+		/* copy Tx queue info from VF into VSI */
+		if (qpi->txq.ring_len > 0) {
+			vsi->tx_rings[i]->dma = qpi->txq.dma_ring_addr;
+			vsi->tx_rings[i]->count = qpi->txq.ring_len;
+
+			/* Disable any existing queue first */
+			if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))
+				goto error_param;
+
+			/* Configure a queue with the requested settings */
+			if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
+				dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",
+					 vf->vf_id, i);
+				goto error_param;
+			}
+		}
+
+		/* copy Rx queue info from VF into VSI */
+		if (qpi->rxq.ring_len > 0) {
+			u16 max_frame_size = ice_vc_get_max_frame_size(vf);
+			u32 rxdid;
+
+			vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
+			vsi->rx_rings[i]->count = qpi->rxq.ring_len;
+
+			if (qpi->rxq.databuffer_size != 0 &&
+			    (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
+			     qpi->rxq.databuffer_size < 1024))
+				goto error_param;
+			vsi->rx_buf_len = qpi->rxq.databuffer_size;
+			vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
+			if (qpi->rxq.max_pkt_size > max_frame_size ||
+			    qpi->rxq.max_pkt_size < 64)
+				goto error_param;
+
+			vsi->max_frame = qpi->rxq.max_pkt_size;
+			/* add space for the port VLAN since the VF driver is
+			 * not expected to account for it in the MTU
+			 * calculation
+			 */
+			if (ice_vf_is_port_vlan_ena(vf))
+				vsi->max_frame += VLAN_HLEN;
+
+			if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
+				dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",
+					 vf->vf_id, i);
+				goto error_param;
+			}
+
+			/* If Rx flex desc is supported, select RXDID for Rx
+			 * queues. Otherwise, use legacy 32byte descriptor
+			 * format. Legacy 16byte descriptor is not supported.
+			 * If this RXDID is selected, return error.
+			 */
+			if (vf->driver_caps &
+			    VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) {
+				rxdid = qpi->rxq.rxdid;
+				if (!(BIT(rxdid) & pf->supported_rxdids))
+					goto error_param;
+			} else {
+				rxdid = ICE_RXDID_LEGACY_1;
+			}
+
+			ice_write_qrxflxp_cntxt(&vsi->back->hw,
+						vsi->rxq_map[q_idx],
+						rxdid, 0x03, false);
+		}
+	}
+
+	if (lag && lag->bonded && lag->primary &&
+	    act_prt != ICE_LAG_INVALID_PORT)
+		ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+	mutex_unlock(&pf->lag_mutex);
+
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+				     VIRTCHNL_STATUS_SUCCESS, NULL, 0);
+error_param:
+	/* disable whatever we can */
+	for (; i >= 0; i--) {
+		if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))
+			dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",
+				vf->vf_id, i);
+		if (ice_vf_vsi_dis_single_txq(vf, vsi, i))
+			dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",
+				vf->vf_id, i);
+	}
+
+	if (lag && lag->bonded && lag->primary &&
+	    act_prt != ICE_LAG_INVALID_PORT)
+		ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt);
+	mutex_unlock(&pf->lag_mutex);
+
+	ice_lag_move_new_vf_nodes(vf);
+
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+				     VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);
+}
+
+/**
+ * ice_can_vf_change_mac
+ * @vf: pointer to the VF info
+ *
+ * Return true if the VF is allowed to change its MAC filters, false otherwise
+ */
+static bool ice_can_vf_change_mac(struct ice_vf *vf)
+{
+	/* If the VF MAC address has been set administratively (via the
+	 * ndo_set_vf_mac command), then deny permission to the VF to
+	 * add/delete unicast MAC addresses, unless the VF is trusted
+	 */
+	if (vf->pf_set_mac && !ice_is_vf_trusted(vf))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_ether_addr_type - get type of virtchnl_ether_addr
+ * @vc_ether_addr: used to extract the type
+ */
+static u8
+ice_vc_ether_addr_type(struct virtchnl_ether_addr *vc_ether_addr)
+{
+	return (vc_ether_addr->type & VIRTCHNL_ETHER_ADDR_TYPE_MASK);
+}
+
+/**
+ * ice_is_vc_addr_legacy - check if the MAC address is from an older VF
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ */
+static bool
+ice_is_vc_addr_legacy(struct virtchnl_ether_addr *vc_ether_addr)
+{
+	u8 type = ice_vc_ether_addr_type(vc_ether_addr);
+
+	return (type == VIRTCHNL_ETHER_ADDR_LEGACY);
+}
+
+/**
+ * ice_is_vc_addr_primary - check if the MAC address is the VF's primary MAC
+ * @vc_ether_addr: VIRTCHNL structure that contains MAC and type
+ *
+ * This function should only be called when the MAC address in
+ * virtchnl_ether_addr is a valid unicast MAC
+ */
+static bool
+ice_is_vc_addr_primary(struct virtchnl_ether_addr __maybe_unused *vc_ether_addr)
+{
+	u8 type = ice_vc_ether_addr_type(vc_ether_addr);
+
+	return (type == VIRTCHNL_ETHER_ADDR_PRIMARY);
+}
+
+/**
+ * ice_vfhw_mac_add - update the VF's cached hardware MAC if allowed
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to add
+ */
+static void
+ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
+{
+	u8 *mac_addr = vc_ether_addr->addr;
+
+	if (!is_valid_ether_addr(mac_addr))
+		return;
+
+	/* only allow legacy VF drivers to set the device and hardware MAC if it
+	 * is zero and allow new VF drivers to set the hardware MAC if the type
+	 * was correctly specified over VIRTCHNL
+	 */
+	if ((ice_is_vc_addr_legacy(vc_ether_addr) &&
+	     is_zero_ether_addr(vf->hw_lan_addr)) ||
+	    ice_is_vc_addr_primary(vc_ether_addr)) {
+		ether_addr_copy(vf->dev_lan_addr, mac_addr);
+		ether_addr_copy(vf->hw_lan_addr, mac_addr);
+	}
+
+	/* hardware and device MACs are already set, but its possible that the
+	 * VF driver sent the VIRTCHNL_OP_ADD_ETH_ADDR message before the
+	 * VIRTCHNL_OP_DEL_ETH_ADDR when trying to update its MAC, so save it
+	 * away for the legacy VF driver case as it will be updated in the
+	 * delete flow for this case
+	 */
+	if (ice_is_vc_addr_legacy(vc_ether_addr)) {
+		ether_addr_copy(vf->legacy_last_added_umac.addr,
+				mac_addr);
+		vf->legacy_last_added_umac.time_modified = jiffies;
+	}
+}
+
+/**
+ * ice_vc_add_mac_addr - attempt to add the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @vc_ether_addr: VIRTCHNL MAC address structure used to add MAC
+ */
+static int
+ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+		    struct virtchnl_ether_addr *vc_ether_addr)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	u8 *mac_addr = vc_ether_addr->addr;
+	int ret;
+
+	/* device MAC already added */
+	if (ether_addr_equal(mac_addr, vf->dev_lan_addr))
+		return 0;
+
+	if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
+		dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+		return -EPERM;
+	}
+
+	ret = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
+	if (ret == -EEXIST) {
+		dev_dbg(dev, "MAC %pM already exists for VF %d\n", mac_addr,
+			vf->vf_id);
+		/* don't return since we might need to update
+		 * the primary MAC in ice_vfhw_mac_add() below
+		 */
+	} else if (ret) {
+		dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n",
+			mac_addr, vf->vf_id, ret);
+		return ret;
+	} else {
+		vf->num_mac++;
+	}
+
+	ice_vfhw_mac_add(vf, vc_ether_addr);
+
+	return ret;
+}
+
+/**
+ * ice_is_legacy_umac_expired - check if last added legacy unicast MAC expired
+ * @last_added_umac: structure used to check expiration
+ */
+static bool ice_is_legacy_umac_expired(struct ice_time_mac *last_added_umac)
+{
+#define ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME	msecs_to_jiffies(3000)
+	return time_is_before_jiffies(last_added_umac->time_modified +
+				      ICE_LEGACY_VF_MAC_CHANGE_EXPIRE_TIME);
+}
+
+/**
+ * ice_update_legacy_cached_mac - update cached hardware MAC for legacy VF
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to check
+ *
+ * only update cached hardware MAC for legacy VF drivers on delete
+ * because we cannot guarantee order/type of MAC from the VF driver
+ */
+static void
+ice_update_legacy_cached_mac(struct ice_vf *vf,
+			     struct virtchnl_ether_addr *vc_ether_addr)
+{
+	if (!ice_is_vc_addr_legacy(vc_ether_addr) ||
+	    ice_is_legacy_umac_expired(&vf->legacy_last_added_umac))
+		return;
+
+	ether_addr_copy(vf->dev_lan_addr, vf->legacy_last_added_umac.addr);
+	ether_addr_copy(vf->hw_lan_addr, vf->legacy_last_added_umac.addr);
+}
+
+/**
+ * ice_vfhw_mac_del - update the VF's cached hardware MAC if allowed
+ * @vf: VF to update
+ * @vc_ether_addr: structure from VIRTCHNL with MAC to delete
+ */
+static void
+ice_vfhw_mac_del(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
+{
+	u8 *mac_addr = vc_ether_addr->addr;
+
+	if (!is_valid_ether_addr(mac_addr) ||
+	    !ether_addr_equal(vf->dev_lan_addr, mac_addr))
+		return;
+
+	/* allow the device MAC to be repopulated in the add flow and don't
+	 * clear the hardware MAC (i.e. hw_lan_addr) here as that is meant
+	 * to be persistent on VM reboot and across driver unload/load, which
+	 * won't work if we clear the hardware MAC here
+	 */
+	eth_zero_addr(vf->dev_lan_addr);
+
+	ice_update_legacy_cached_mac(vf, vc_ether_addr);
+}
+
+/**
+ * ice_vc_del_mac_addr - attempt to delete the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @vc_ether_addr: VIRTCHNL MAC address structure used to delete MAC
+ */
+static int
+ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
+		    struct virtchnl_ether_addr *vc_ether_addr)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	u8 *mac_addr = vc_ether_addr->addr;
+	int status;
+
+	if (!ice_can_vf_change_mac(vf) &&
+	    ether_addr_equal(vf->dev_lan_addr, mac_addr))
+		return 0;
+
+	status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
+	if (status == -ENOENT) {
+		dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr,
+			vf->vf_id);
+		return -ENOENT;
+	} else if (status) {
+		dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n",
+			mac_addr, vf->vf_id, status);
+		return -EIO;
+	}
+
+	ice_vfhw_mac_del(vf, vc_ether_addr);
+
+	vf->num_mac--;
+
+	return 0;
+}
+
+/**
+ * ice_vc_handle_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @set: true if MAC filters are being set, false otherwise
+ *
+ * add guest MAC address filter
+ */
+static int
+ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
+{
+	int (*ice_vc_cfg_mac)
+		(struct ice_vf *vf, struct ice_vsi *vsi,
+		 struct virtchnl_ether_addr *virtchnl_ether_addr);
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
+	struct ice_pf *pf = vf->pf;
+	enum virtchnl_ops vc_op;
+	struct ice_vsi *vsi;
+	int i;
+
+	if (set) {
+		vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
+		ice_vc_cfg_mac = ice_vc_add_mac_addr;
+	} else {
+		vc_op = VIRTCHNL_OP_DEL_ETH_ADDR;
+		ice_vc_cfg_mac = ice_vc_del_mac_addr;
+	}
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+	    !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	/* If this VF is not privileged, then we can't add more than a
+	 * limited number of addresses. Check to make sure that the
+	 * additions do not push us over the limit.
+	 */
+	if (set && !ice_is_vf_trusted(vf) &&
+	    (vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
+		dev_err(ice_pf_to_dev(pf), "Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
+			vf->vf_id);
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		u8 *mac_addr = al->list[i].addr;
+		int result;
+
+		if (is_broadcast_ether_addr(mac_addr) ||
+		    is_zero_ether_addr(mac_addr))
+			continue;
+
+		result = ice_vc_cfg_mac(vf, vsi, &al->list[i]);
+		if (result == -EEXIST || result == -ENOENT) {
+			continue;
+		} else if (result) {
+			v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+			goto handle_mac_exit;
+		}
+	}
+
+handle_mac_exit:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_add_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * add guest MAC address filter
+ */
+static int ice_vc_add_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_handle_mac_addr_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_del_mac_addr_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove guest MAC address filter
+ */
+static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_handle_mac_addr_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_request_qs_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number. If the request is successful, PF will reset the VF and
+ * return 0. If unsuccessful, PF will send message informing VF of number of
+ * available queue pairs via virtchnl message response to VF.
+ */
+static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vf_res_request *vfres =
+		(struct virtchnl_vf_res_request *)msg;
+	u16 req_queues = vfres->num_queue_pairs;
+	struct ice_pf *pf = vf->pf;
+	u16 max_allowed_vf_queues;
+	u16 tx_rx_queue_left;
+	struct device *dev;
+	u16 cur_queues;
+
+	dev = ice_pf_to_dev(pf);
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	cur_queues = vf->num_vf_qs;
+	tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf),
+				 ice_get_avail_rxq_count(pf));
+	max_allowed_vf_queues = tx_rx_queue_left + cur_queues;
+	if (!req_queues) {
+		dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n",
+			vf->vf_id);
+	} else if (req_queues > ICE_MAX_RSS_QS_PER_VF) {
+		dev_err(dev, "VF %d tried to request more than %d queues.\n",
+			vf->vf_id, ICE_MAX_RSS_QS_PER_VF);
+		vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF;
+	} else if (req_queues > cur_queues &&
+		   req_queues - cur_queues > tx_rx_queue_left) {
+		dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n",
+			 vf->vf_id, req_queues - cur_queues, tx_rx_queue_left);
+		vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues,
+					       ICE_MAX_RSS_QS_PER_VF);
+	} else {
+		/* request is successful, then reset VF */
+		vf->num_req_qs = req_queues;
+		ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
+		dev_info(dev, "VF %d granted request of %u queues.\n",
+			 vf->vf_id, req_queues);
+		return 0;
+	}
+
+error_param:
+	/* send the response to the VF */
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES,
+				     v_ret, (u8 *)vfres, sizeof(*vfres));
+}
+
+/**
+ * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads
+ * @caps: VF driver negotiated capabilities
+ *
+ * Return true if VIRTCHNL_VF_OFFLOAD_VLAN capability is set, else return false
+ */
+static bool ice_vf_vlan_offload_ena(u32 caps)
+{
+	return !!(caps & VIRTCHNL_VF_OFFLOAD_VLAN);
+}
+
+/**
+ * ice_is_vlan_promisc_allowed - check if VLAN promiscuous config is allowed
+ * @vf: VF used to determine if VLAN promiscuous config is allowed
+ */
+static bool ice_is_vlan_promisc_allowed(struct ice_vf *vf)
+{
+	if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+	     test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) &&
+	    test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, vf->pf->flags))
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_vf_ena_vlan_promisc - Enable Tx/Rx VLAN promiscuous for the VLAN
+ * @vsi: VF's VSI used to enable VLAN promiscuous mode
+ * @vlan: VLAN used to enable VLAN promiscuous
+ *
+ * This function should only be called if VLAN promiscuous mode is allowed,
+ * which can be determined via ice_is_vlan_promisc_allowed().
+ */
+static int ice_vf_ena_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX;
+	int status;
+
+	status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m,
+					  vlan->vid);
+	if (status && status != -EEXIST)
+		return status;
+
+	return 0;
+}
+
+/**
+ * ice_vf_dis_vlan_promisc - Disable Tx/Rx VLAN promiscuous for the VLAN
+ * @vsi: VF's VSI used to disable VLAN promiscuous mode for
+ * @vlan: VLAN used to disable VLAN promiscuous
+ *
+ * This function should only be called if VLAN promiscuous mode is allowed,
+ * which can be determined via ice_is_vlan_promisc_allowed().
+ */
+static int ice_vf_dis_vlan_promisc(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	u8 promisc_m = ICE_PROMISC_VLAN_TX | ICE_PROMISC_VLAN_RX;
+	int status;
+
+	status = ice_fltr_clear_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m,
+					    vlan->vid);
+	if (status && status != -ENOENT)
+		return status;
+
+	return 0;
+}
+
+/**
+ * ice_vf_has_max_vlans - check if VF already has the max allowed VLAN filters
+ * @vf: VF to check against
+ * @vsi: VF's VSI
+ *
+ * If the VF is trusted then the VF is allowed to add as many VLANs as it
+ * wants to, so return false.
+ *
+ * When the VF is untrusted compare the number of non-zero VLANs + 1 to the max
+ * allowed VLANs for an untrusted VF. Return the result of this comparison.
+ */
+static bool ice_vf_has_max_vlans(struct ice_vf *vf, struct ice_vsi *vsi)
+{
+	if (ice_is_vf_trusted(vf))
+		return false;
+
+#define ICE_VF_ADDED_VLAN_ZERO_FLTRS	1
+	return ((ice_vsi_num_non_zero_vlans(vsi) +
+		ICE_VF_ADDED_VLAN_ZERO_FLTRS) >= ICE_MAX_VLAN_PER_VF);
+}
+
+/**
+ * ice_vc_process_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @add_v: Add VLAN if true, otherwise delete VLAN
+ *
+ * Process virtchnl op to add or remove programmed guest VLAN ID
+ */
+static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_filter_list *vfl =
+	    (struct virtchnl_vlan_filter_list *)msg;
+	struct ice_pf *pf = vf->pf;
+	bool vlan_promisc = false;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	int status = 0;
+	int i;
+
+	dev = ice_pf_to_dev(pf);
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vfl->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		if (vfl->vlan_id[i] >= VLAN_N_VID) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			dev_err(dev, "invalid VF VLAN id %d\n",
+				vfl->vlan_id[i]);
+			goto error_param;
+		}
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (add_v && ice_vf_has_max_vlans(vf, vsi)) {
+		dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+			 vf->vf_id);
+		/* There is no need to let VF know about being not trusted,
+		 * so we can just return success message here
+		 */
+		goto error_param;
+	}
+
+	/* in DVM a VF can add/delete inner VLAN filters when
+	 * VIRTCHNL_VF_OFFLOAD_VLAN is negotiated, so only reject in SVM
+	 */
+	if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&pf->hw)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	/* in DVM VLAN promiscuous is based on the outer VLAN, which would be
+	 * the port VLAN if VIRTCHNL_VF_OFFLOAD_VLAN was negotiated, so only
+	 * allow vlan_promisc = true in SVM and if no port VLAN is configured
+	 */
+	vlan_promisc = ice_is_vlan_promisc_allowed(vf) &&
+		!ice_is_dvm_ena(&pf->hw) &&
+		!ice_vf_is_port_vlan_ena(vf);
+
+	if (add_v) {
+		for (i = 0; i < vfl->num_elements; i++) {
+			u16 vid = vfl->vlan_id[i];
+			struct ice_vlan vlan;
+
+			if (ice_vf_has_max_vlans(vf, vsi)) {
+				dev_info(dev, "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+					 vf->vf_id);
+				/* There is no need to let VF know about being
+				 * not trusted, so we can just return success
+				 * message here as well.
+				 */
+				goto error_param;
+			}
+
+			/* we add VLAN 0 by default for each VF so we can enable
+			 * Tx VLAN anti-spoof without triggering MDD events so
+			 * we don't need to add it again here
+			 */
+			if (!vid)
+				continue;
+
+			vlan = ICE_VLAN(ETH_P_8021Q, vid, 0);
+			status = vsi->inner_vlan_ops.add_vlan(vsi, &vlan);
+			if (status) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Enable VLAN filtering on first non-zero VLAN */
+			if (!vlan_promisc && vid && !ice_is_dvm_ena(&pf->hw)) {
+				if (vf->spoofchk) {
+					status = vsi->inner_vlan_ops.ena_tx_filtering(vsi);
+					if (status) {
+						v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+						dev_err(dev, "Enable VLAN anti-spoofing on VLAN ID: %d failed error-%d\n",
+							vid, status);
+						goto error_param;
+					}
+				}
+				if (vsi->inner_vlan_ops.ena_rx_filtering(vsi)) {
+					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+					dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n",
+						vid, status);
+					goto error_param;
+				}
+			} else if (vlan_promisc) {
+				status = ice_vf_ena_vlan_promisc(vsi, &vlan);
+				if (status) {
+					v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+					dev_err(dev, "Enable Unicast/multicast promiscuous mode on VLAN ID:%d failed error-%d\n",
+						vid, status);
+				}
+			}
+		}
+	} else {
+		/* In case of non_trusted VF, number of VLAN elements passed
+		 * to PF for removal might be greater than number of VLANs
+		 * filter programmed for that VF - So, use actual number of
+		 * VLANS added earlier with add VLAN opcode. In order to avoid
+		 * removing VLAN that doesn't exist, which result to sending
+		 * erroneous failed message back to the VF
+		 */
+		int num_vf_vlan;
+
+		num_vf_vlan = vsi->num_vlan;
+		for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
+			u16 vid = vfl->vlan_id[i];
+			struct ice_vlan vlan;
+
+			/* we add VLAN 0 by default for each VF so we can enable
+			 * Tx VLAN anti-spoof without triggering MDD events so
+			 * we don't want a VIRTCHNL request to remove it
+			 */
+			if (!vid)
+				continue;
+
+			vlan = ICE_VLAN(ETH_P_8021Q, vid, 0);
+			status = vsi->inner_vlan_ops.del_vlan(vsi, &vlan);
+			if (status) {
+				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+				goto error_param;
+			}
+
+			/* Disable VLAN filtering when only VLAN 0 is left */
+			if (!ice_vsi_has_non_zero_vlans(vsi)) {
+				vsi->inner_vlan_ops.dis_tx_filtering(vsi);
+				vsi->inner_vlan_ops.dis_rx_filtering(vsi);
+			}
+
+			if (vlan_promisc)
+				ice_vf_dis_vlan_promisc(vsi, &vlan);
+		}
+	}
+
+error_param:
+	/* send the response to the VF */
+	if (add_v)
+		return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, v_ret,
+					     NULL, 0);
+	else
+		return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, v_ret,
+					     NULL, 0);
+}
+
+/**
+ * ice_vc_add_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Add and program guest VLAN ID
+ */
+static int ice_vc_add_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_process_vlan_msg(vf, msg, true);
+}
+
+/**
+ * ice_vc_remove_vlan_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * remove programmed guest VLAN ID
+ */
+static int ice_vc_remove_vlan_msg(struct ice_vf *vf, u8 *msg)
+{
+	return ice_vc_process_vlan_msg(vf, msg, false);
+}
+
+/**
+ * ice_vc_ena_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Enable VLAN header stripping for a given VF
+ */
+static int ice_vc_ena_vlan_stripping(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_stripping
+ * @vf: pointer to the VF info
+ *
+ * Disable VLAN header stripping for a given VF
+ */
+static int ice_vc_dis_vlan_stripping(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vf_vlan_offload_ena(vf->driver_caps)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (vsi->inner_vlan_ops.dis_stripping(vsi))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_get_rss_hena - return the RSS HENA bits allowed by the hardware
+ * @vf: pointer to the VF info
+ */
+static int ice_vc_get_rss_hena(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_rss_hena *vrh = NULL;
+	int len = 0, ret;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) {
+		dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n");
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	len = sizeof(struct virtchnl_rss_hena);
+	vrh = kzalloc(len, GFP_KERNEL);
+	if (!vrh) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		len = 0;
+		goto err;
+	}
+
+	vrh->hena = ICE_DEFAULT_RSS_HENA;
+err:
+	/* send the response back to the VF */
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HENA_CAPS, v_ret,
+				    (u8 *)vrh, len);
+	kfree(vrh);
+	return ret;
+}
+
+/**
+ * ice_vc_set_rss_hena - set RSS HENA bits for the VF
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ */
+static int ice_vc_set_rss_hena(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg;
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	int status;
+
+	dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+		dev_err(dev, "RSS not supported by PF\n");
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	/* clear all previously programmed RSS configuration to allow VF drivers
+	 * the ability to customize the RSS configuration and/or completely
+	 * disable RSS
+	 */
+	status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
+	if (status && !vrh->hena) {
+		/* only report failure to clear the current RSS configuration if
+		 * that was clearly the VF's intention (i.e. vrh->hena = 0)
+		 */
+		v_ret = ice_err_to_virt_err(status);
+		goto err;
+	} else if (status) {
+		/* allow the VF to update the RSS configuration even on failure
+		 * to clear the current RSS confguration in an attempt to keep
+		 * RSS in a working state
+		 */
+		dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n",
+			 vf->vf_id);
+	}
+
+	if (vrh->hena) {
+		status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, vrh->hena);
+		v_ret = ice_err_to_virt_err(status);
+	}
+
+	/* send the response to the VF */
+err:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA, v_ret,
+				     NULL, 0);
+}
+
+/**
+ * ice_vc_query_rxdid - query RXDID supported by DDP package
+ * @vf: pointer to VF info
+ *
+ * Called from VF to query a bitmap of supported flexible
+ * descriptor RXDIDs of a DDP package.
+ */
+static int ice_vc_query_rxdid(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_supported_rxdids *rxdid = NULL;
+	struct ice_hw *hw = &vf->pf->hw;
+	struct ice_pf *pf = vf->pf;
+	int len = 0;
+	int ret, i;
+	u32 regval;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto err;
+	}
+
+	len = sizeof(struct virtchnl_supported_rxdids);
+	rxdid = kzalloc(len, GFP_KERNEL);
+	if (!rxdid) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		len = 0;
+		goto err;
+	}
+
+	/* RXDIDs supported by DDP package can be read from the register
+	 * to get the supported RXDID bitmap. But the legacy 32byte RXDID
+	 * is not listed in DDP package, add it in the bitmap manually.
+	 * Legacy 16byte descriptor is not supported.
+	 */
+	rxdid->supported_rxdids |= BIT(ICE_RXDID_LEGACY_1);
+
+	for (i = ICE_RXDID_FLEX_NIC; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) {
+		regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0));
+		if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S)
+			& GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M)
+			rxdid->supported_rxdids |= BIT(i);
+	}
+
+	pf->supported_rxdids = rxdid->supported_rxdids;
+
+err:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS,
+				    v_ret, (u8 *)rxdid, len);
+	kfree(rxdid);
+	return ret;
+}
+
+/**
+ * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization
+ * @vf: VF to enable/disable VLAN stripping for on initialization
+ *
+ * Set the default for VLAN stripping based on whether a port VLAN is configured
+ * and the current VLAN mode of the device.
+ */
+static int ice_vf_init_vlan_stripping(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = ice_get_vf_vsi(vf);
+
+	if (!vsi)
+		return -EINVAL;
+
+	/* don't modify stripping if port VLAN is configured in SVM since the
+	 * port VLAN is based on the inner/single VLAN in SVM
+	 */
+	if (ice_vf_is_port_vlan_ena(vf) && !ice_is_dvm_ena(&vsi->back->hw))
+		return 0;
+
+	if (ice_vf_vlan_offload_ena(vf->driver_caps))
+		return vsi->inner_vlan_ops.ena_stripping(vsi, ETH_P_8021Q);
+	else
+		return vsi->inner_vlan_ops.dis_stripping(vsi);
+}
+
+static u16 ice_vc_get_max_vlan_fltrs(struct ice_vf *vf)
+{
+	if (vf->trusted)
+		return VLAN_N_VID;
+	else
+		return ICE_MAX_VLAN_PER_VF;
+}
+
+/**
+ * ice_vf_outer_vlan_not_allowed - check if outer VLAN can be used
+ * @vf: VF that being checked for
+ *
+ * When the device is in double VLAN mode, check whether or not the outer VLAN
+ * is allowed.
+ */
+static bool ice_vf_outer_vlan_not_allowed(struct ice_vf *vf)
+{
+	if (ice_vf_is_port_vlan_ena(vf))
+		return true;
+
+	return false;
+}
+
+/**
+ * ice_vc_set_dvm_caps - set VLAN capabilities when the device is in DVM
+ * @vf: VF that capabilities are being set for
+ * @caps: VLAN capabilities to populate
+ *
+ * Determine VLAN capabilities support based on whether a port VLAN is
+ * configured. If a port VLAN is configured then the VF should use the inner
+ * filtering/offload capabilities since the port VLAN is using the outer VLAN
+ * capabilies.
+ */
+static void
+ice_vc_set_dvm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps)
+{
+	struct virtchnl_vlan_supported_caps *supported_caps;
+
+	if (ice_vf_outer_vlan_not_allowed(vf)) {
+		/* until support for inner VLAN filtering is added when a port
+		 * VLAN is configured, only support software offloaded inner
+		 * VLANs when a port VLAN is confgured in DVM
+		 */
+		supported_caps = &caps->filtering.filtering_support;
+		supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		supported_caps = &caps->offloads.stripping_support;
+		supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		supported_caps = &caps->offloads.insertion_support;
+		supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+		caps->offloads.ethertype_match =
+			VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+	} else {
+		supported_caps = &caps->filtering.filtering_support;
+		supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+		supported_caps->outer = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+					VIRTCHNL_VLAN_ETHERTYPE_9100 |
+					VIRTCHNL_VLAN_ETHERTYPE_AND;
+		caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+						 VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+						 VIRTCHNL_VLAN_ETHERTYPE_9100;
+
+		supported_caps = &caps->offloads.stripping_support;
+		supported_caps->inner = VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+		supported_caps->outer = VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+					VIRTCHNL_VLAN_ETHERTYPE_9100 |
+					VIRTCHNL_VLAN_ETHERTYPE_XOR |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2;
+
+		supported_caps = &caps->offloads.insertion_support;
+		supported_caps->inner = VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+		supported_caps->outer = VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_ETHERTYPE_88A8 |
+					VIRTCHNL_VLAN_ETHERTYPE_9100 |
+					VIRTCHNL_VLAN_ETHERTYPE_XOR |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2;
+
+		caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+		caps->offloads.ethertype_match =
+			VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+	}
+
+	caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf);
+}
+
+/**
+ * ice_vc_set_svm_caps - set VLAN capabilities when the device is in SVM
+ * @vf: VF that capabilities are being set for
+ * @caps: VLAN capabilities to populate
+ *
+ * Determine VLAN capabilities support based on whether a port VLAN is
+ * configured. If a port VLAN is configured then the VF does not have any VLAN
+ * filtering or offload capabilities since the port VLAN is using the inner VLAN
+ * capabilities in single VLAN mode (SVM). Otherwise allow the VF to use inner
+ * VLAN fitlering and offload capabilities.
+ */
+static void
+ice_vc_set_svm_caps(struct ice_vf *vf, struct virtchnl_vlan_caps *caps)
+{
+	struct virtchnl_vlan_supported_caps *supported_caps;
+
+	if (ice_vf_is_port_vlan_ena(vf)) {
+		supported_caps = &caps->filtering.filtering_support;
+		supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		supported_caps = &caps->offloads.stripping_support;
+		supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		supported_caps = &caps->offloads.insertion_support;
+		supported_caps->inner = VIRTCHNL_VLAN_UNSUPPORTED;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		caps->offloads.ethertype_init = VIRTCHNL_VLAN_UNSUPPORTED;
+		caps->offloads.ethertype_match = VIRTCHNL_VLAN_UNSUPPORTED;
+		caps->filtering.max_filters = 0;
+	} else {
+		supported_caps = &caps->filtering.filtering_support;
+		supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+		caps->filtering.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+		supported_caps = &caps->offloads.stripping_support;
+		supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		supported_caps = &caps->offloads.insertion_support;
+		supported_caps->inner = VIRTCHNL_VLAN_ETHERTYPE_8100 |
+					VIRTCHNL_VLAN_TOGGLE |
+					VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1;
+		supported_caps->outer = VIRTCHNL_VLAN_UNSUPPORTED;
+
+		caps->offloads.ethertype_init = VIRTCHNL_VLAN_ETHERTYPE_8100;
+		caps->offloads.ethertype_match =
+			VIRTCHNL_ETHERTYPE_STRIPPING_MATCHES_INSERTION;
+		caps->filtering.max_filters = ice_vc_get_max_vlan_fltrs(vf);
+	}
+}
+
+/**
+ * ice_vc_get_offload_vlan_v2_caps - determine VF's VLAN capabilities
+ * @vf: VF to determine VLAN capabilities for
+ *
+ * This will only be called if the VF and PF successfully negotiated
+ * VIRTCHNL_VF_OFFLOAD_VLAN_V2.
+ *
+ * Set VLAN capabilities based on the current VLAN mode and whether a port VLAN
+ * is configured or not.
+ */
+static int ice_vc_get_offload_vlan_v2_caps(struct ice_vf *vf)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_caps *caps = NULL;
+	int err, len = 0;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	caps = kzalloc(sizeof(*caps), GFP_KERNEL);
+	if (!caps) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		goto out;
+	}
+	len = sizeof(*caps);
+
+	if (ice_is_dvm_ena(&vf->pf->hw))
+		ice_vc_set_dvm_caps(vf, caps);
+	else
+		ice_vc_set_svm_caps(vf, caps);
+
+	/* store negotiated caps to prevent invalid VF messages */
+	memcpy(&vf->vlan_v2_caps, caps, sizeof(*caps));
+
+out:
+	err = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS,
+				    v_ret, (u8 *)caps, len);
+	kfree(caps);
+	return err;
+}
+
+/**
+ * ice_vc_validate_vlan_tpid - validate VLAN TPID
+ * @filtering_caps: negotiated/supported VLAN filtering capabilities
+ * @tpid: VLAN TPID used for validation
+ *
+ * Convert the VLAN TPID to a VIRTCHNL_VLAN_ETHERTYPE_* and then compare against
+ * the negotiated/supported filtering caps to see if the VLAN TPID is valid.
+ */
+static bool ice_vc_validate_vlan_tpid(u16 filtering_caps, u16 tpid)
+{
+	enum virtchnl_vlan_support vlan_ethertype = VIRTCHNL_VLAN_UNSUPPORTED;
+
+	switch (tpid) {
+	case ETH_P_8021Q:
+		vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_8100;
+		break;
+	case ETH_P_8021AD:
+		vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_88A8;
+		break;
+	case ETH_P_QINQ1:
+		vlan_ethertype = VIRTCHNL_VLAN_ETHERTYPE_9100;
+		break;
+	}
+
+	if (!(filtering_caps & vlan_ethertype))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_is_valid_vlan - validate the virtchnl_vlan
+ * @vc_vlan: virtchnl_vlan to validate
+ *
+ * If the VLAN TCI and VLAN TPID are 0, then this filter is invalid, so return
+ * false. Otherwise return true.
+ */
+static bool ice_vc_is_valid_vlan(struct virtchnl_vlan *vc_vlan)
+{
+	if (!vc_vlan->tci || !vc_vlan->tpid)
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_validate_vlan_filter_list - validate the filter list from the VF
+ * @vfc: negotiated/supported VLAN filtering capabilities
+ * @vfl: VLAN filter list from VF to validate
+ *
+ * Validate all of the filters in the VLAN filter list from the VF. If any of
+ * the checks fail then return false. Otherwise return true.
+ */
+static bool
+ice_vc_validate_vlan_filter_list(struct virtchnl_vlan_filtering_caps *vfc,
+				 struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+	u16 i;
+
+	if (!vfl->num_elements)
+		return false;
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		struct virtchnl_vlan_supported_caps *filtering_support =
+			&vfc->filtering_support;
+		struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+		struct virtchnl_vlan *outer = &vlan_fltr->outer;
+		struct virtchnl_vlan *inner = &vlan_fltr->inner;
+
+		if ((ice_vc_is_valid_vlan(outer) &&
+		     filtering_support->outer == VIRTCHNL_VLAN_UNSUPPORTED) ||
+		    (ice_vc_is_valid_vlan(inner) &&
+		     filtering_support->inner == VIRTCHNL_VLAN_UNSUPPORTED))
+			return false;
+
+		if ((outer->tci_mask &&
+		     !(filtering_support->outer & VIRTCHNL_VLAN_FILTER_MASK)) ||
+		    (inner->tci_mask &&
+		     !(filtering_support->inner & VIRTCHNL_VLAN_FILTER_MASK)))
+			return false;
+
+		if (((outer->tci & VLAN_PRIO_MASK) &&
+		     !(filtering_support->outer & VIRTCHNL_VLAN_PRIO)) ||
+		    ((inner->tci & VLAN_PRIO_MASK) &&
+		     !(filtering_support->inner & VIRTCHNL_VLAN_PRIO)))
+			return false;
+
+		if ((ice_vc_is_valid_vlan(outer) &&
+		     !ice_vc_validate_vlan_tpid(filtering_support->outer,
+						outer->tpid)) ||
+		    (ice_vc_is_valid_vlan(inner) &&
+		     !ice_vc_validate_vlan_tpid(filtering_support->inner,
+						inner->tpid)))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_vc_to_vlan - transform from struct virtchnl_vlan to struct ice_vlan
+ * @vc_vlan: struct virtchnl_vlan to transform
+ */
+static struct ice_vlan ice_vc_to_vlan(struct virtchnl_vlan *vc_vlan)
+{
+	struct ice_vlan vlan = { 0 };
+
+	vlan.prio = (vc_vlan->tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+	vlan.vid = vc_vlan->tci & VLAN_VID_MASK;
+	vlan.tpid = vc_vlan->tpid;
+
+	return vlan;
+}
+
+/**
+ * ice_vc_vlan_action - action to perform on the virthcnl_vlan
+ * @vsi: VF's VSI used to perform the action
+ * @vlan_action: function to perform the action with (i.e. add/del)
+ * @vlan: VLAN filter to perform the action with
+ */
+static int
+ice_vc_vlan_action(struct ice_vsi *vsi,
+		   int (*vlan_action)(struct ice_vsi *, struct ice_vlan *),
+		   struct ice_vlan *vlan)
+{
+	int err;
+
+	err = vlan_action(vsi, vlan);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+/**
+ * ice_vc_del_vlans - delete VLAN(s) from the virtchnl filter list
+ * @vf: VF used to delete the VLAN(s)
+ * @vsi: VF's VSI used to delete the VLAN(s)
+ * @vfl: virthchnl filter list used to delete the filters
+ */
+static int
+ice_vc_del_vlans(struct ice_vf *vf, struct ice_vsi *vsi,
+		 struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+	bool vlan_promisc = ice_is_vlan_promisc_allowed(vf);
+	int err;
+	u16 i;
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+		struct virtchnl_vlan *vc_vlan;
+
+		vc_vlan = &vlan_fltr->outer;
+		if (ice_vc_is_valid_vlan(vc_vlan)) {
+			struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+			err = ice_vc_vlan_action(vsi,
+						 vsi->outer_vlan_ops.del_vlan,
+						 &vlan);
+			if (err)
+				return err;
+
+			if (vlan_promisc)
+				ice_vf_dis_vlan_promisc(vsi, &vlan);
+
+			/* Disable VLAN filtering when only VLAN 0 is left */
+			if (!ice_vsi_has_non_zero_vlans(vsi) && ice_is_dvm_ena(&vsi->back->hw)) {
+				err = vsi->outer_vlan_ops.dis_tx_filtering(vsi);
+				if (err)
+					return err;
+			}
+		}
+
+		vc_vlan = &vlan_fltr->inner;
+		if (ice_vc_is_valid_vlan(vc_vlan)) {
+			struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+			err = ice_vc_vlan_action(vsi,
+						 vsi->inner_vlan_ops.del_vlan,
+						 &vlan);
+			if (err)
+				return err;
+
+			/* no support for VLAN promiscuous on inner VLAN unless
+			 * we are in Single VLAN Mode (SVM)
+			 */
+			if (!ice_is_dvm_ena(&vsi->back->hw)) {
+				if (vlan_promisc)
+					ice_vf_dis_vlan_promisc(vsi, &vlan);
+
+				/* Disable VLAN filtering when only VLAN 0 is left */
+				if (!ice_vsi_has_non_zero_vlans(vsi)) {
+					err = vsi->inner_vlan_ops.dis_tx_filtering(vsi);
+					if (err)
+						return err;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_remove_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_DEL_VLAN_V2
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ */
+static int ice_vc_remove_vlan_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_vlan_filter_list_v2 *vfl =
+		(struct virtchnl_vlan_filter_list_v2 *)msg;
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct ice_vsi *vsi;
+
+	if (!ice_vc_validate_vlan_filter_list(&vf->vlan_v2_caps.filtering,
+					      vfl)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (ice_vc_del_vlans(vf, vsi, vfl))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+out:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN_V2, v_ret, NULL,
+				     0);
+}
+
+/**
+ * ice_vc_add_vlans - add VLAN(s) from the virtchnl filter list
+ * @vf: VF used to add the VLAN(s)
+ * @vsi: VF's VSI used to add the VLAN(s)
+ * @vfl: virthchnl filter list used to add the filters
+ */
+static int
+ice_vc_add_vlans(struct ice_vf *vf, struct ice_vsi *vsi,
+		 struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+	bool vlan_promisc = ice_is_vlan_promisc_allowed(vf);
+	int err;
+	u16 i;
+
+	for (i = 0; i < vfl->num_elements; i++) {
+		struct virtchnl_vlan_filter *vlan_fltr = &vfl->filters[i];
+		struct virtchnl_vlan *vc_vlan;
+
+		vc_vlan = &vlan_fltr->outer;
+		if (ice_vc_is_valid_vlan(vc_vlan)) {
+			struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+			err = ice_vc_vlan_action(vsi,
+						 vsi->outer_vlan_ops.add_vlan,
+						 &vlan);
+			if (err)
+				return err;
+
+			if (vlan_promisc) {
+				err = ice_vf_ena_vlan_promisc(vsi, &vlan);
+				if (err)
+					return err;
+			}
+
+			/* Enable VLAN filtering on first non-zero VLAN */
+			if (vf->spoofchk && vlan.vid && ice_is_dvm_ena(&vsi->back->hw)) {
+				err = vsi->outer_vlan_ops.ena_tx_filtering(vsi);
+				if (err)
+					return err;
+			}
+		}
+
+		vc_vlan = &vlan_fltr->inner;
+		if (ice_vc_is_valid_vlan(vc_vlan)) {
+			struct ice_vlan vlan = ice_vc_to_vlan(vc_vlan);
+
+			err = ice_vc_vlan_action(vsi,
+						 vsi->inner_vlan_ops.add_vlan,
+						 &vlan);
+			if (err)
+				return err;
+
+			/* no support for VLAN promiscuous on inner VLAN unless
+			 * we are in Single VLAN Mode (SVM)
+			 */
+			if (!ice_is_dvm_ena(&vsi->back->hw)) {
+				if (vlan_promisc) {
+					err = ice_vf_ena_vlan_promisc(vsi, &vlan);
+					if (err)
+						return err;
+				}
+
+				/* Enable VLAN filtering on first non-zero VLAN */
+				if (vf->spoofchk && vlan.vid) {
+					err = vsi->inner_vlan_ops.ena_tx_filtering(vsi);
+					if (err)
+						return err;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_validate_add_vlan_filter_list - validate add filter list from the VF
+ * @vsi: VF VSI used to get number of existing VLAN filters
+ * @vfc: negotiated/supported VLAN filtering capabilities
+ * @vfl: VLAN filter list from VF to validate
+ *
+ * Validate all of the filters in the VLAN filter list from the VF during the
+ * VIRTCHNL_OP_ADD_VLAN_V2 opcode. If any of the checks fail then return false.
+ * Otherwise return true.
+ */
+static bool
+ice_vc_validate_add_vlan_filter_list(struct ice_vsi *vsi,
+				     struct virtchnl_vlan_filtering_caps *vfc,
+				     struct virtchnl_vlan_filter_list_v2 *vfl)
+{
+	u16 num_requested_filters = ice_vsi_num_non_zero_vlans(vsi) +
+		vfl->num_elements;
+
+	if (num_requested_filters > vfc->max_filters)
+		return false;
+
+	return ice_vc_validate_vlan_filter_list(vfc, vfl);
+}
+
+/**
+ * ice_vc_add_vlan_v2_msg - virtchnl handler for VIRTCHNL_OP_ADD_VLAN_V2
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ */
+static int ice_vc_add_vlan_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_filter_list_v2 *vfl =
+		(struct virtchnl_vlan_filter_list_v2 *)msg;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, vfl->vport_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (!ice_vc_validate_add_vlan_filter_list(vsi,
+						  &vf->vlan_v2_caps.filtering,
+						  vfl)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (ice_vc_add_vlans(vf, vsi, vfl))
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+
+out:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN_V2, v_ret, NULL,
+				     0);
+}
+
+/**
+ * ice_vc_valid_vlan_setting - validate VLAN setting
+ * @negotiated_settings: negotiated VLAN settings during VF init
+ * @ethertype_setting: ethertype(s) requested for the VLAN setting
+ */
+static bool
+ice_vc_valid_vlan_setting(u32 negotiated_settings, u32 ethertype_setting)
+{
+	if (ethertype_setting && !(negotiated_settings & ethertype_setting))
+		return false;
+
+	/* only allow a single VIRTCHNL_VLAN_ETHERTYPE if
+	 * VIRTHCNL_VLAN_ETHERTYPE_AND is not negotiated/supported
+	 */
+	if (!(negotiated_settings & VIRTCHNL_VLAN_ETHERTYPE_AND) &&
+	    hweight32(ethertype_setting) > 1)
+		return false;
+
+	/* ability to modify the VLAN setting was not negotiated */
+	if (!(negotiated_settings & VIRTCHNL_VLAN_TOGGLE))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_valid_vlan_setting_msg - validate the VLAN setting message
+ * @caps: negotiated VLAN settings during VF init
+ * @msg: message to validate
+ *
+ * Used to validate any VLAN virtchnl message sent as a
+ * virtchnl_vlan_setting structure. Validates the message against the
+ * negotiated/supported caps during VF driver init.
+ */
+static bool
+ice_vc_valid_vlan_setting_msg(struct virtchnl_vlan_supported_caps *caps,
+			      struct virtchnl_vlan_setting *msg)
+{
+	if ((!msg->outer_ethertype_setting &&
+	     !msg->inner_ethertype_setting) ||
+	    (!caps->outer && !caps->inner))
+		return false;
+
+	if (msg->outer_ethertype_setting &&
+	    !ice_vc_valid_vlan_setting(caps->outer,
+				       msg->outer_ethertype_setting))
+		return false;
+
+	if (msg->inner_ethertype_setting &&
+	    !ice_vc_valid_vlan_setting(caps->inner,
+				       msg->inner_ethertype_setting))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_get_tpid - transform from VIRTCHNL_VLAN_ETHERTYPE_* to VLAN TPID
+ * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* used to get VLAN TPID
+ * @tpid: VLAN TPID to populate
+ */
+static int ice_vc_get_tpid(u32 ethertype_setting, u16 *tpid)
+{
+	switch (ethertype_setting) {
+	case VIRTCHNL_VLAN_ETHERTYPE_8100:
+		*tpid = ETH_P_8021Q;
+		break;
+	case VIRTCHNL_VLAN_ETHERTYPE_88A8:
+		*tpid = ETH_P_8021AD;
+		break;
+	case VIRTCHNL_VLAN_ETHERTYPE_9100:
+		*tpid = ETH_P_QINQ1;
+		break;
+	default:
+		*tpid = 0;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_ena_vlan_offload - enable VLAN offload based on the ethertype_setting
+ * @vsi: VF's VSI used to enable the VLAN offload
+ * @ena_offload: function used to enable the VLAN offload
+ * @ethertype_setting: VIRTCHNL_VLAN_ETHERTYPE_* to enable offloads for
+ */
+static int
+ice_vc_ena_vlan_offload(struct ice_vsi *vsi,
+			int (*ena_offload)(struct ice_vsi *vsi, u16 tpid),
+			u32 ethertype_setting)
+{
+	u16 tpid;
+	int err;
+
+	err = ice_vc_get_tpid(ethertype_setting, &tpid);
+	if (err)
+		return err;
+
+	err = ena_offload(vsi, tpid);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+#define ICE_L2TSEL_QRX_CONTEXT_REG_IDX	3
+#define ICE_L2TSEL_BIT_OFFSET		23
+enum ice_l2tsel {
+	ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND,
+	ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1,
+};
+
+/**
+ * ice_vsi_update_l2tsel - update l2tsel field for all Rx rings on this VSI
+ * @vsi: VSI used to update l2tsel on
+ * @l2tsel: l2tsel setting requested
+ *
+ * Use the l2tsel setting to update all of the Rx queue context bits for l2tsel.
+ * This will modify which descriptor field the first offloaded VLAN will be
+ * stripped into.
+ */
+static void ice_vsi_update_l2tsel(struct ice_vsi *vsi, enum ice_l2tsel l2tsel)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 l2tsel_bit;
+	int i;
+
+	if (l2tsel == ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND)
+		l2tsel_bit = 0;
+	else
+		l2tsel_bit = BIT(ICE_L2TSEL_BIT_OFFSET);
+
+	for (i = 0; i < vsi->alloc_rxq; i++) {
+		u16 pfq = vsi->rxq_map[i];
+		u32 qrx_context_offset;
+		u32 regval;
+
+		qrx_context_offset =
+			QRX_CONTEXT(ICE_L2TSEL_QRX_CONTEXT_REG_IDX, pfq);
+
+		regval = rd32(hw, qrx_context_offset);
+		regval &= ~BIT(ICE_L2TSEL_BIT_OFFSET);
+		regval |= l2tsel_bit;
+		wr32(hw, qrx_context_offset, regval);
+	}
+}
+
+/**
+ * ice_vc_ena_vlan_stripping_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2
+ */
+static int ice_vc_ena_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_supported_caps *stripping_support;
+	struct virtchnl_vlan_setting *strip_msg =
+		(struct virtchnl_vlan_setting *)msg;
+	u32 ethertype_setting;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	stripping_support = &vf->vlan_v2_caps.offloads.stripping_support;
+	if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	ethertype_setting = strip_msg->outer_ethertype_setting;
+	if (ethertype_setting) {
+		if (ice_vc_ena_vlan_offload(vsi,
+					    vsi->outer_vlan_ops.ena_stripping,
+					    ethertype_setting)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto out;
+		} else {
+			enum ice_l2tsel l2tsel =
+				ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG2_2ND;
+
+			/* PF tells the VF that the outer VLAN tag is always
+			 * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and
+			 * inner is always extracted to
+			 * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to
+			 * support outer stripping so the first tag always ends
+			 * up in L2TAG2_2ND and the second/inner tag, if
+			 * enabled, is extracted in L2TAG1.
+			 */
+			ice_vsi_update_l2tsel(vsi, l2tsel);
+		}
+	}
+
+	ethertype_setting = strip_msg->inner_ethertype_setting;
+	if (ethertype_setting &&
+	    ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_stripping,
+				    ethertype_setting)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+out:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_stripping_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2
+ */
+static int ice_vc_dis_vlan_stripping_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_supported_caps *stripping_support;
+	struct virtchnl_vlan_setting *strip_msg =
+		(struct virtchnl_vlan_setting *)msg;
+	u32 ethertype_setting;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, strip_msg->vport_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	stripping_support = &vf->vlan_v2_caps.offloads.stripping_support;
+	if (!ice_vc_valid_vlan_setting_msg(stripping_support, strip_msg)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	ethertype_setting = strip_msg->outer_ethertype_setting;
+	if (ethertype_setting) {
+		if (vsi->outer_vlan_ops.dis_stripping(vsi)) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto out;
+		} else {
+			enum ice_l2tsel l2tsel =
+				ICE_L2TSEL_EXTRACT_FIRST_TAG_L2TAG1;
+
+			/* PF tells the VF that the outer VLAN tag is always
+			 * extracted to VIRTCHNL_VLAN_TAG_LOCATION_L2TAG2_2 and
+			 * inner is always extracted to
+			 * VIRTCHNL_VLAN_TAG_LOCATION_L2TAG1. This is needed to
+			 * support inner stripping while outer stripping is
+			 * disabled so that the first and only tag is extracted
+			 * in L2TAG1.
+			 */
+			ice_vsi_update_l2tsel(vsi, l2tsel);
+		}
+	}
+
+	ethertype_setting = strip_msg->inner_ethertype_setting;
+	if (ethertype_setting && vsi->inner_vlan_ops.dis_stripping(vsi)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+out:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_ena_vlan_insertion_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2
+ */
+static int ice_vc_ena_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_supported_caps *insertion_support;
+	struct virtchnl_vlan_setting *insertion_msg =
+		(struct virtchnl_vlan_setting *)msg;
+	u32 ethertype_setting;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	insertion_support = &vf->vlan_v2_caps.offloads.insertion_support;
+	if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	ethertype_setting = insertion_msg->outer_ethertype_setting;
+	if (ethertype_setting &&
+	    ice_vc_ena_vlan_offload(vsi, vsi->outer_vlan_ops.ena_insertion,
+				    ethertype_setting)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	ethertype_setting = insertion_msg->inner_ethertype_setting;
+	if (ethertype_setting &&
+	    ice_vc_ena_vlan_offload(vsi, vsi->inner_vlan_ops.ena_insertion,
+				    ethertype_setting)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+out:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_dis_vlan_insertion_v2_msg
+ * @vf: VF the message was received from
+ * @msg: message received from the VF
+ *
+ * virthcnl handler for VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2
+ */
+static int ice_vc_dis_vlan_insertion_v2_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_vlan_supported_caps *insertion_support;
+	struct virtchnl_vlan_setting *insertion_msg =
+		(struct virtchnl_vlan_setting *)msg;
+	u32 ethertype_setting;
+	struct ice_vsi *vsi;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, insertion_msg->vport_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	insertion_support = &vf->vlan_v2_caps.offloads.insertion_support;
+	if (!ice_vc_valid_vlan_setting_msg(insertion_support, insertion_msg)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	ethertype_setting = insertion_msg->outer_ethertype_setting;
+	if (ethertype_setting && vsi->outer_vlan_ops.dis_insertion(vsi)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+	ethertype_setting = insertion_msg->inner_ethertype_setting;
+	if (ethertype_setting && vsi->inner_vlan_ops.dis_insertion(vsi)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto out;
+	}
+
+out:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2,
+				     v_ret, NULL, 0);
+}
+
+static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = {
+	.get_ver_msg = ice_vc_get_ver_msg,
+	.get_vf_res_msg = ice_vc_get_vf_res_msg,
+	.reset_vf = ice_vc_reset_vf_msg,
+	.add_mac_addr_msg = ice_vc_add_mac_addr_msg,
+	.del_mac_addr_msg = ice_vc_del_mac_addr_msg,
+	.cfg_qs_msg = ice_vc_cfg_qs_msg,
+	.ena_qs_msg = ice_vc_ena_qs_msg,
+	.dis_qs_msg = ice_vc_dis_qs_msg,
+	.request_qs_msg = ice_vc_request_qs_msg,
+	.cfg_irq_map_msg = ice_vc_cfg_irq_map_msg,
+	.config_rss_key = ice_vc_config_rss_key,
+	.config_rss_lut = ice_vc_config_rss_lut,
+	.get_stats_msg = ice_vc_get_stats_msg,
+	.cfg_promiscuous_mode_msg = ice_vc_cfg_promiscuous_mode_msg,
+	.add_vlan_msg = ice_vc_add_vlan_msg,
+	.remove_vlan_msg = ice_vc_remove_vlan_msg,
+	.query_rxdid = ice_vc_query_rxdid,
+	.get_rss_hena = ice_vc_get_rss_hena,
+	.set_rss_hena_msg = ice_vc_set_rss_hena,
+	.ena_vlan_stripping = ice_vc_ena_vlan_stripping,
+	.dis_vlan_stripping = ice_vc_dis_vlan_stripping,
+	.handle_rss_cfg_msg = ice_vc_handle_rss_cfg,
+	.add_fdir_fltr_msg = ice_vc_add_fdir_fltr,
+	.del_fdir_fltr_msg = ice_vc_del_fdir_fltr,
+	.get_offload_vlan_v2_caps = ice_vc_get_offload_vlan_v2_caps,
+	.add_vlan_v2_msg = ice_vc_add_vlan_v2_msg,
+	.remove_vlan_v2_msg = ice_vc_remove_vlan_v2_msg,
+	.ena_vlan_stripping_v2_msg = ice_vc_ena_vlan_stripping_v2_msg,
+	.dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg,
+	.ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg,
+	.dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg,
+};
+
+/**
+ * ice_virtchnl_set_dflt_ops - Switch to default virtchnl ops
+ * @vf: the VF to switch ops
+ */
+void ice_virtchnl_set_dflt_ops(struct ice_vf *vf)
+{
+	vf->virtchnl_ops = &ice_virtchnl_dflt_ops;
+}
+
+/**
+ * ice_vc_repr_add_mac
+ * @vf: pointer to VF
+ * @msg: virtchannel message
+ *
+ * When port representors are created, we do not add MAC rule
+ * to firmware, we store it so that PF could report same
+ * MAC as VF.
+ */
+static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_ether_addr_list *al =
+	    (struct virtchnl_ether_addr_list *)msg;
+	struct ice_vsi *vsi;
+	struct ice_pf *pf;
+	int i;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
+	    !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	pf = vf->pf;
+
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto handle_mac_exit;
+	}
+
+	for (i = 0; i < al->num_elements; i++) {
+		u8 *mac_addr = al->list[i].addr;
+
+		if (!is_unicast_ether_addr(mac_addr) ||
+		    ether_addr_equal(mac_addr, vf->hw_lan_addr))
+			continue;
+
+		if (vf->pf_set_mac) {
+			dev_err(ice_pf_to_dev(pf), "VF attempting to override administratively set MAC address\n");
+			v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+			goto handle_mac_exit;
+		}
+
+		ice_vfhw_mac_add(vf, &al->list[i]);
+		vf->num_mac++;
+		break;
+	}
+
+handle_mac_exit:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR,
+				     v_ret, NULL, 0);
+}
+
+/**
+ * ice_vc_repr_del_mac - response with success for deleting MAC
+ * @vf: pointer to VF
+ * @msg: virtchannel message
+ *
+ * Respond with success to not break normal VF flow.
+ * For legacy VF driver try to update cached MAC address.
+ */
+static int
+ice_vc_repr_del_mac(struct ice_vf __always_unused *vf, u8 __always_unused *msg)
+{
+	struct virtchnl_ether_addr_list *al =
+		(struct virtchnl_ether_addr_list *)msg;
+
+	ice_update_legacy_cached_mac(vf, &al->list[0]);
+
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR,
+				     VIRTCHNL_STATUS_SUCCESS, NULL, 0);
+}
+
+static int
+ice_vc_repr_cfg_promiscuous_mode(struct ice_vf *vf, u8 __always_unused *msg)
+{
+	dev_dbg(ice_pf_to_dev(vf->pf),
+		"Can't config promiscuous mode in switchdev mode for VF %d\n",
+		vf->vf_id);
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+				     VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+				     NULL, 0);
+}
+
+static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = {
+	.get_ver_msg = ice_vc_get_ver_msg,
+	.get_vf_res_msg = ice_vc_get_vf_res_msg,
+	.reset_vf = ice_vc_reset_vf_msg,
+	.add_mac_addr_msg = ice_vc_repr_add_mac,
+	.del_mac_addr_msg = ice_vc_repr_del_mac,
+	.cfg_qs_msg = ice_vc_cfg_qs_msg,
+	.ena_qs_msg = ice_vc_ena_qs_msg,
+	.dis_qs_msg = ice_vc_dis_qs_msg,
+	.request_qs_msg = ice_vc_request_qs_msg,
+	.cfg_irq_map_msg = ice_vc_cfg_irq_map_msg,
+	.config_rss_key = ice_vc_config_rss_key,
+	.config_rss_lut = ice_vc_config_rss_lut,
+	.get_stats_msg = ice_vc_get_stats_msg,
+	.cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode,
+	.add_vlan_msg = ice_vc_add_vlan_msg,
+	.remove_vlan_msg = ice_vc_remove_vlan_msg,
+	.query_rxdid = ice_vc_query_rxdid,
+	.get_rss_hena = ice_vc_get_rss_hena,
+	.set_rss_hena_msg = ice_vc_set_rss_hena,
+	.ena_vlan_stripping = ice_vc_ena_vlan_stripping,
+	.dis_vlan_stripping = ice_vc_dis_vlan_stripping,
+	.handle_rss_cfg_msg = ice_vc_handle_rss_cfg,
+	.add_fdir_fltr_msg = ice_vc_add_fdir_fltr,
+	.del_fdir_fltr_msg = ice_vc_del_fdir_fltr,
+	.get_offload_vlan_v2_caps = ice_vc_get_offload_vlan_v2_caps,
+	.add_vlan_v2_msg = ice_vc_add_vlan_v2_msg,
+	.remove_vlan_v2_msg = ice_vc_remove_vlan_v2_msg,
+	.ena_vlan_stripping_v2_msg = ice_vc_ena_vlan_stripping_v2_msg,
+	.dis_vlan_stripping_v2_msg = ice_vc_dis_vlan_stripping_v2_msg,
+	.ena_vlan_insertion_v2_msg = ice_vc_ena_vlan_insertion_v2_msg,
+	.dis_vlan_insertion_v2_msg = ice_vc_dis_vlan_insertion_v2_msg,
+};
+
+/**
+ * ice_virtchnl_set_repr_ops - Switch to representor virtchnl ops
+ * @vf: the VF to switch ops
+ */
+void ice_virtchnl_set_repr_ops(struct ice_vf *vf)
+{
+	vf->virtchnl_ops = &ice_virtchnl_repr_ops;
+}
+
+/**
+ * ice_is_malicious_vf - check if this vf might be overflowing mailbox
+ * @vf: the VF to check
+ * @mbxdata: data about the state of the mailbox
+ *
+ * Detect if a given VF might be malicious and attempting to overflow the PF
+ * mailbox. If so, log a warning message and ignore this event.
+ */
+static bool
+ice_is_malicious_vf(struct ice_vf *vf, struct ice_mbx_data *mbxdata)
+{
+	bool report_malvf = false;
+	struct device *dev;
+	struct ice_pf *pf;
+	int status;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+
+	if (test_bit(ICE_VF_STATE_DIS, vf->vf_states))
+		return vf->mbx_info.malicious;
+
+	/* check to see if we have a newly malicious VF */
+	status = ice_mbx_vf_state_handler(&pf->hw, mbxdata, &vf->mbx_info,
+					  &report_malvf);
+	if (status)
+		dev_warn_ratelimited(dev, "Unable to check status of mailbox overflow for VF %u MAC %pM, status %d\n",
+				     vf->vf_id, vf->dev_lan_addr, status);
+
+	if (report_malvf) {
+		struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+		u8 zero_addr[ETH_ALEN] = {};
+
+		dev_warn(dev, "VF MAC %pM on PF MAC %pM is generating asynchronous messages and may be overflowing the PF message queue. Please see the Adapter User Guide for more information\n",
+			 vf->dev_lan_addr,
+			 pf_vsi ? pf_vsi->netdev->dev_addr : zero_addr);
+	}
+
+	return vf->mbx_info.malicious;
+}
+
+/**
+ * ice_vc_process_vf_msg - Process request from VF
+ * @pf: pointer to the PF structure
+ * @event: pointer to the AQ event
+ * @mbxdata: information used to detect VF attempting mailbox overflow
+ *
+ * called from the common asq/arq handler to
+ * process request from VF
+ */
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event,
+			   struct ice_mbx_data *mbxdata)
+{
+	u32 v_opcode = le32_to_cpu(event->desc.cookie_high);
+	s16 vf_id = le16_to_cpu(event->desc.retval);
+	const struct ice_virtchnl_ops *ops;
+	u16 msglen = event->msg_len;
+	u8 *msg = event->msg_buf;
+	struct ice_vf *vf = NULL;
+	struct device *dev;
+	int err = 0;
+
+	dev = ice_pf_to_dev(pf);
+
+	vf = ice_get_vf_by_id(pf, vf_id);
+	if (!vf) {
+		dev_err(dev, "Unable to locate VF for message from VF ID %d, opcode %d, len %d\n",
+			vf_id, v_opcode, msglen);
+		return;
+	}
+
+	mutex_lock(&vf->cfg_lock);
+
+	/* Check if the VF is trying to overflow the mailbox */
+	if (ice_is_malicious_vf(vf, mbxdata))
+		goto finish;
+
+	/* Check if VF is disabled. */
+	if (test_bit(ICE_VF_STATE_DIS, vf->vf_states)) {
+		err = -EPERM;
+		goto error_handler;
+	}
+
+	ops = vf->virtchnl_ops;
+
+	/* Perform basic checks on the msg */
+	err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen);
+	if (err) {
+		if (err == VIRTCHNL_STATUS_ERR_PARAM)
+			err = -EPERM;
+		else
+			err = -EINVAL;
+	}
+
+error_handler:
+	if (err) {
+		ice_vc_send_msg_to_vf(vf, v_opcode, VIRTCHNL_STATUS_ERR_PARAM,
+				      NULL, 0);
+		dev_err(dev, "Invalid message from VF %d, opcode %d, len %d, error %d\n",
+			vf_id, v_opcode, msglen, err);
+		goto finish;
+	}
+
+	if (!ice_vc_is_opcode_allowed(vf, v_opcode)) {
+		ice_vc_send_msg_to_vf(vf, v_opcode,
+				      VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, NULL,
+				      0);
+		goto finish;
+	}
+
+	switch (v_opcode) {
+	case VIRTCHNL_OP_VERSION:
+		err = ops->get_ver_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_VF_RESOURCES:
+		err = ops->get_vf_res_msg(vf, msg);
+		if (ice_vf_init_vlan_stripping(vf))
+			dev_dbg(dev, "Failed to initialize VLAN stripping for VF %d\n",
+				vf->vf_id);
+		ice_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_RESET_VF:
+		ops->reset_vf(vf);
+		break;
+	case VIRTCHNL_OP_ADD_ETH_ADDR:
+		err = ops->add_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_ETH_ADDR:
+		err = ops->del_mac_addr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+		err = ops->cfg_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_QUEUES:
+		err = ops->ena_qs_msg(vf, msg);
+		ice_vc_notify_vf_link_state(vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_QUEUES:
+		err = ops->dis_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_REQUEST_QUEUES:
+		err = ops->request_qs_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+		err = ops->cfg_irq_map_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_KEY:
+		err = ops->config_rss_key(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_RSS_LUT:
+		err = ops->config_rss_lut(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_STATS:
+		err = ops->get_stats_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+		err = ops->cfg_promiscuous_mode_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_VLAN:
+		err = ops->add_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_VLAN:
+		err = ops->remove_vlan_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS:
+		err = ops->query_rxdid(vf);
+		break;
+	case VIRTCHNL_OP_GET_RSS_HENA_CAPS:
+		err = ops->get_rss_hena(vf);
+		break;
+	case VIRTCHNL_OP_SET_RSS_HENA:
+		err = ops->set_rss_hena_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
+		err = ops->ena_vlan_stripping(vf);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
+		err = ops->dis_vlan_stripping(vf);
+		break;
+	case VIRTCHNL_OP_ADD_FDIR_FILTER:
+		err = ops->add_fdir_fltr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_FDIR_FILTER:
+		err = ops->del_fdir_fltr_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ADD_RSS_CFG:
+		err = ops->handle_rss_cfg_msg(vf, msg, true);
+		break;
+	case VIRTCHNL_OP_DEL_RSS_CFG:
+		err = ops->handle_rss_cfg_msg(vf, msg, false);
+		break;
+	case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
+		err = ops->get_offload_vlan_v2_caps(vf);
+		break;
+	case VIRTCHNL_OP_ADD_VLAN_V2:
+		err = ops->add_vlan_v2_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DEL_VLAN_V2:
+		err = ops->remove_vlan_v2_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2:
+		err = ops->ena_vlan_stripping_v2_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+		err = ops->dis_vlan_stripping_v2_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2:
+		err = ops->ena_vlan_insertion_v2_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2:
+		err = ops->dis_vlan_insertion_v2_msg(vf, msg);
+		break;
+	case VIRTCHNL_OP_UNKNOWN:
+	default:
+		dev_err(dev, "Unsupported opcode %d from VF %d\n", v_opcode,
+			vf_id);
+		err = ice_vc_send_msg_to_vf(vf, v_opcode,
+					    VIRTCHNL_STATUS_ERR_NOT_SUPPORTED,
+					    NULL, 0);
+		break;
+	}
+	if (err) {
+		/* Helper function cares less about error return values here
+		 * as it is busy with pending work.
+		 */
+		dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n",
+			 vf_id, v_opcode, err);
+	}
+
+finish:
+	mutex_unlock(&vf->cfg_lock);
+	ice_put_vf(vf);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
new file mode 100644
index 0000000000..cd747718de
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2022, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_H_
+#define _ICE_VIRTCHNL_H_
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/if_ether.h>
+#include <linux/avf/virtchnl.h>
+#include "ice_vf_lib.h"
+
+/* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
+#define ICE_MAX_VLAN_PER_VF		8
+
+/* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for
+ * broadcast, and 16 for additional unicast/multicast filters
+ */
+#define ICE_MAX_MACADDR_PER_VF		18
+#define ICE_FLEX_DESC_RXDID_MAX_NUM	64
+
+struct ice_virtchnl_ops {
+	int (*get_ver_msg)(struct ice_vf *vf, u8 *msg);
+	int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg);
+	void (*reset_vf)(struct ice_vf *vf);
+	int (*add_mac_addr_msg)(struct ice_vf *vf, u8 *msg);
+	int (*del_mac_addr_msg)(struct ice_vf *vf, u8 *msg);
+	int (*cfg_qs_msg)(struct ice_vf *vf, u8 *msg);
+	int (*ena_qs_msg)(struct ice_vf *vf, u8 *msg);
+	int (*dis_qs_msg)(struct ice_vf *vf, u8 *msg);
+	int (*request_qs_msg)(struct ice_vf *vf, u8 *msg);
+	int (*cfg_irq_map_msg)(struct ice_vf *vf, u8 *msg);
+	int (*config_rss_key)(struct ice_vf *vf, u8 *msg);
+	int (*config_rss_lut)(struct ice_vf *vf, u8 *msg);
+	int (*get_stats_msg)(struct ice_vf *vf, u8 *msg);
+	int (*cfg_promiscuous_mode_msg)(struct ice_vf *vf, u8 *msg);
+	int (*add_vlan_msg)(struct ice_vf *vf, u8 *msg);
+	int (*remove_vlan_msg)(struct ice_vf *vf, u8 *msg);
+	int (*query_rxdid)(struct ice_vf *vf);
+	int (*get_rss_hena)(struct ice_vf *vf);
+	int (*set_rss_hena_msg)(struct ice_vf *vf, u8 *msg);
+	int (*ena_vlan_stripping)(struct ice_vf *vf);
+	int (*dis_vlan_stripping)(struct ice_vf *vf);
+	int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add);
+	int (*add_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg);
+	int (*del_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg);
+	int (*get_offload_vlan_v2_caps)(struct ice_vf *vf);
+	int (*add_vlan_v2_msg)(struct ice_vf *vf, u8 *msg);
+	int (*remove_vlan_v2_msg)(struct ice_vf *vf, u8 *msg);
+	int (*ena_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg);
+	int (*dis_vlan_stripping_v2_msg)(struct ice_vf *vf, u8 *msg);
+	int (*ena_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg);
+	int (*dis_vlan_insertion_v2_msg)(struct ice_vf *vf, u8 *msg);
+};
+
+#ifdef CONFIG_PCI_IOV
+void ice_virtchnl_set_dflt_ops(struct ice_vf *vf);
+void ice_virtchnl_set_repr_ops(struct ice_vf *vf);
+void ice_vc_notify_vf_link_state(struct ice_vf *vf);
+void ice_vc_notify_link_state(struct ice_pf *pf);
+void ice_vc_notify_reset(struct ice_pf *pf);
+int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen);
+bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id);
+void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event,
+			   struct ice_mbx_data *mbxdata);
+#else /* CONFIG_PCI_IOV */
+static inline void ice_virtchnl_set_dflt_ops(struct ice_vf *vf) { }
+static inline void ice_virtchnl_set_repr_ops(struct ice_vf *vf) { }
+static inline void ice_vc_notify_vf_link_state(struct ice_vf *vf) { }
+static inline void ice_vc_notify_link_state(struct ice_pf *pf) { }
+static inline void ice_vc_notify_reset(struct ice_pf *pf) { }
+
+static inline int
+ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
+		      enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id)
+{
+	return false;
+}
+
+static inline void
+ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event,
+		      struct ice_mbx_data *mbxdata)
+{
+}
+#endif /* !CONFIG_PCI_IOV */
+
+#endif /* _ICE_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
new file mode 100644
index 0000000000..7d547fa616
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice_virtchnl_allowlist.h"
+
+/* Purpose of this file is to share functionality to allowlist or denylist
+ * opcodes used in PF <-> VF communication. Group of opcodes:
+ * - default -> should be always allowed after creating VF,
+ *   default_allowlist_opcodes
+ * - opcodes needed by VF to work correctly, but not associated with caps ->
+ *   should be allowed after successful VF resources allocation,
+ *   working_allowlist_opcodes
+ * - opcodes needed by VF when caps are activated
+ *
+ * Caps that don't use new opcodes (no opcodes should be allowed):
+ * - VIRTCHNL_VF_OFFLOAD_RSS_AQ
+ * - VIRTCHNL_VF_OFFLOAD_RSS_REG
+ * - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR
+ * - VIRTCHNL_VF_OFFLOAD_CRC
+ * - VIRTCHNL_VF_OFFLOAD_RX_POLLING
+ * - VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2
+ * - VIRTCHNL_VF_OFFLOAD_ENCAP
+ * - VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM
+ * - VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM
+ * - VIRTCHNL_VF_OFFLOAD_USO
+ */
+
+/* default opcodes to communicate with VF */
+static const u32 default_allowlist_opcodes[] = {
+	VIRTCHNL_OP_GET_VF_RESOURCES, VIRTCHNL_OP_VERSION, VIRTCHNL_OP_RESET_VF,
+};
+
+/* opcodes supported after successful VIRTCHNL_OP_GET_VF_RESOURCES */
+static const u32 working_allowlist_opcodes[] = {
+	VIRTCHNL_OP_CONFIG_TX_QUEUE, VIRTCHNL_OP_CONFIG_RX_QUEUE,
+	VIRTCHNL_OP_CONFIG_VSI_QUEUES, VIRTCHNL_OP_CONFIG_IRQ_MAP,
+	VIRTCHNL_OP_ENABLE_QUEUES, VIRTCHNL_OP_DISABLE_QUEUES,
+	VIRTCHNL_OP_GET_STATS, VIRTCHNL_OP_EVENT,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_L2 */
+static const u32 l2_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_ETH_ADDR, VIRTCHNL_OP_DEL_ETH_ADDR,
+	VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_REQ_QUEUES */
+static const u32 req_queues_allowlist_opcodes[] = {
+	VIRTCHNL_OP_REQUEST_QUEUES,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_VLAN */
+static const u32 vlan_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_VLAN, VIRTCHNL_OP_DEL_VLAN,
+	VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_VLAN_V2 */
+static const u32 vlan_v2_allowlist_opcodes[] = {
+	VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS, VIRTCHNL_OP_ADD_VLAN_V2,
+	VIRTCHNL_OP_DEL_VLAN_V2, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING_V2,
+	VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+	VIRTCHNL_OP_ENABLE_VLAN_INSERTION_V2,
+	VIRTCHNL_OP_DISABLE_VLAN_INSERTION_V2,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_RSS_PF */
+static const u32 rss_pf_allowlist_opcodes[] = {
+	VIRTCHNL_OP_CONFIG_RSS_KEY, VIRTCHNL_OP_CONFIG_RSS_LUT,
+	VIRTCHNL_OP_GET_RSS_HENA_CAPS, VIRTCHNL_OP_SET_RSS_HENA,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC */
+static const u32 rx_flex_desc_allowlist_opcodes[] = {
+	VIRTCHNL_OP_GET_SUPPORTED_RXDIDS,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF */
+static const u32 adv_rss_pf_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_RSS_CFG, VIRTCHNL_OP_DEL_RSS_CFG,
+};
+
+/* VIRTCHNL_VF_OFFLOAD_FDIR_PF */
+static const u32 fdir_pf_allowlist_opcodes[] = {
+	VIRTCHNL_OP_ADD_FDIR_FILTER, VIRTCHNL_OP_DEL_FDIR_FILTER,
+};
+
+struct allowlist_opcode_info {
+	const u32 *opcodes;
+	size_t size;
+};
+
+#define BIT_INDEX(caps) (HWEIGHT((caps) - 1))
+#define ALLOW_ITEM(caps, list) \
+	[BIT_INDEX(caps)] = { \
+		.opcodes = list, \
+		.size = ARRAY_SIZE(list) \
+	}
+static const struct allowlist_opcode_info allowlist_opcodes[] = {
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_L2, l2_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_REQ_QUEUES, req_queues_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN, vlan_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC, rx_flex_desc_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes),
+	ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes),
+};
+
+/**
+ * ice_vc_is_opcode_allowed - check if this opcode is allowed on this VF
+ * @vf: pointer to VF structure
+ * @opcode: virtchnl opcode
+ *
+ * Return true if message is allowed on this VF
+ */
+bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode)
+{
+	if (opcode >= VIRTCHNL_OP_MAX)
+		return false;
+
+	return test_bit(opcode, vf->opcodes_allowlist);
+}
+
+/**
+ * ice_vc_allowlist_opcodes - allowlist selected opcodes
+ * @vf: pointer to VF structure
+ * @opcodes: array of opocodes to allowlist
+ * @size: size of opcodes array
+ *
+ * Function should be called to allowlist opcodes on VF.
+ */
+static void
+ice_vc_allowlist_opcodes(struct ice_vf *vf, const u32 *opcodes, size_t size)
+{
+	unsigned int i;
+
+	for (i = 0; i < size; i++)
+		set_bit(opcodes[i], vf->opcodes_allowlist);
+}
+
+/**
+ * ice_vc_clear_allowlist - clear all allowlist opcodes
+ * @vf: pointer to VF structure
+ */
+static void ice_vc_clear_allowlist(struct ice_vf *vf)
+{
+	bitmap_zero(vf->opcodes_allowlist, VIRTCHNL_OP_MAX);
+}
+
+/**
+ * ice_vc_set_default_allowlist - allowlist default opcodes for VF
+ * @vf: pointer to VF structure
+ */
+void ice_vc_set_default_allowlist(struct ice_vf *vf)
+{
+	ice_vc_clear_allowlist(vf);
+	ice_vc_allowlist_opcodes(vf, default_allowlist_opcodes,
+				 ARRAY_SIZE(default_allowlist_opcodes));
+}
+
+/**
+ * ice_vc_set_working_allowlist - allowlist opcodes needed to by VF to work
+ * @vf: pointer to VF structure
+ *
+ * allowlist opcodes that aren't associated with specific caps, but
+ * are needed by VF to work.
+ */
+void ice_vc_set_working_allowlist(struct ice_vf *vf)
+{
+	ice_vc_allowlist_opcodes(vf, working_allowlist_opcodes,
+				 ARRAY_SIZE(working_allowlist_opcodes));
+}
+
+/**
+ * ice_vc_set_caps_allowlist - allowlist VF opcodes according caps
+ * @vf: pointer to VF structure
+ */
+void ice_vc_set_caps_allowlist(struct ice_vf *vf)
+{
+	unsigned long caps = vf->driver_caps;
+	unsigned int i;
+
+	for_each_set_bit(i, &caps, ARRAY_SIZE(allowlist_opcodes))
+		ice_vc_allowlist_opcodes(vf, allowlist_opcodes[i].opcodes,
+					 allowlist_opcodes[i].size);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h
new file mode 100644
index 0000000000..d3ae86ded2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_ALLOWLIST_H_
+#define _ICE_VIRTCHNL_ALLOWLIST_H_
+#include "ice.h"
+
+bool ice_vc_is_opcode_allowed(struct ice_vf *vf, u32 opcode);
+
+void ice_vc_set_default_allowlist(struct ice_vf *vf);
+void ice_vc_set_working_allowlist(struct ice_vf *vf);
+void ice_vc_set_caps_allowlist(struct ice_vf *vf);
+#endif /* _ICE_VIRTCHNL_ALLOWLIST_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
new file mode 100644
index 0000000000..daa6a1e894
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c
@@ -0,0 +1,2029 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_lib.h"
+#include "ice_flow.h"
+#include "ice_vf_lib_private.h"
+
+#define to_fltr_conf_from_desc(p) \
+	container_of(p, struct virtchnl_fdir_fltr_conf, input)
+
+#define ICE_FLOW_PROF_TYPE_S	0
+#define ICE_FLOW_PROF_TYPE_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_TYPE_S)
+#define ICE_FLOW_PROF_VSI_S	32
+#define ICE_FLOW_PROF_VSI_M	(0xFFFFFFFFULL << ICE_FLOW_PROF_VSI_S)
+
+/* Flow profile ID format:
+ * [0:31] - flow type, flow + tun_offs
+ * [32:63] - VSI index
+ */
+#define ICE_FLOW_PROF_FD(vsi, flow, tun_offs) \
+	((u64)(((((flow) + (tun_offs)) & ICE_FLOW_PROF_TYPE_M)) | \
+	      (((u64)(vsi) << ICE_FLOW_PROF_VSI_S) & ICE_FLOW_PROF_VSI_M)))
+
+#define GTPU_TEID_OFFSET 4
+#define GTPU_EH_QFI_OFFSET 1
+#define GTPU_EH_QFI_MASK 0x3F
+#define PFCP_S_OFFSET 0
+#define PFCP_S_MASK 0x1
+#define PFCP_PORT_NR 8805
+
+#define FDIR_INSET_FLAG_ESP_S 0
+#define FDIR_INSET_FLAG_ESP_M BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_UDP BIT_ULL(FDIR_INSET_FLAG_ESP_S)
+#define FDIR_INSET_FLAG_ESP_IPSEC (0ULL << FDIR_INSET_FLAG_ESP_S)
+
+enum ice_fdir_tunnel_type {
+	ICE_FDIR_TUNNEL_TYPE_NONE = 0,
+	ICE_FDIR_TUNNEL_TYPE_GTPU,
+	ICE_FDIR_TUNNEL_TYPE_GTPU_EH,
+};
+
+struct virtchnl_fdir_fltr_conf {
+	struct ice_fdir_fltr input;
+	enum ice_fdir_tunnel_type ttype;
+	u64 inset_flag;
+	u32 flow_id;
+};
+
+struct virtchnl_fdir_inset_map {
+	enum virtchnl_proto_hdr_field field;
+	enum ice_flow_field fld;
+	u64 flag;
+	u64 mask;
+};
+
+static const struct virtchnl_fdir_inset_map fdir_inset_map[] = {
+	{VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE, ICE_FLOW_FIELD_IDX_ETH_TYPE, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_SRC, ICE_FLOW_FIELD_IDX_IPV4_SA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_DST, ICE_FLOW_FIELD_IDX_IPV4_DA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_DSCP, ICE_FLOW_FIELD_IDX_IPV4_DSCP, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_TTL, ICE_FLOW_FIELD_IDX_IPV4_TTL, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV4_PROT, ICE_FLOW_FIELD_IDX_IPV4_PROT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_SRC, ICE_FLOW_FIELD_IDX_IPV6_SA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_DST, ICE_FLOW_FIELD_IDX_IPV6_DA, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_TC, ICE_FLOW_FIELD_IDX_IPV6_DSCP, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT, ICE_FLOW_FIELD_IDX_IPV6_TTL, 0, 0},
+	{VIRTCHNL_PROTO_HDR_IPV6_PROT, ICE_FLOW_FIELD_IDX_IPV6_PROT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_UDP_SRC_PORT, ICE_FLOW_FIELD_IDX_UDP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_TCP_SRC_PORT, ICE_FLOW_FIELD_IDX_TCP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_TCP_DST_PORT, ICE_FLOW_FIELD_IDX_TCP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_SCTP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, 0, 0},
+	{VIRTCHNL_PROTO_HDR_GTPU_IP_TEID, ICE_FLOW_FIELD_IDX_GTPU_IP_TEID, 0, 0},
+	{VIRTCHNL_PROTO_HDR_GTPU_EH_QFI, ICE_FLOW_FIELD_IDX_GTPU_EH_QFI, 0, 0},
+	{VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_ESP_SPI,
+		FDIR_INSET_FLAG_ESP_IPSEC, FDIR_INSET_FLAG_ESP_M},
+	{VIRTCHNL_PROTO_HDR_ESP_SPI, ICE_FLOW_FIELD_IDX_NAT_T_ESP_SPI,
+		FDIR_INSET_FLAG_ESP_UDP, FDIR_INSET_FLAG_ESP_M},
+	{VIRTCHNL_PROTO_HDR_AH_SPI, ICE_FLOW_FIELD_IDX_AH_SPI, 0, 0},
+	{VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID, ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID, 0, 0},
+	{VIRTCHNL_PROTO_HDR_PFCP_S_FIELD, ICE_FLOW_FIELD_IDX_UDP_DST_PORT, 0, 0},
+};
+
+/**
+ * ice_vc_fdir_param_check
+ * @vf: pointer to the VF structure
+ * @vsi_id: VF relative VSI ID
+ *
+ * Check for the valid VSI ID, PF's state and VF's state
+ *
+ * Return: 0 on success, and -EINVAL on error.
+ */
+static int
+ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id)
+{
+	struct ice_pf *pf = vf->pf;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EINVAL;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+		return -EINVAL;
+
+	if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF))
+		return -EINVAL;
+
+	if (vsi_id != vf->lan_vsi_num)
+		return -EINVAL;
+
+	if (!ice_vc_isvalid_vsi_id(vf, vsi_id))
+		return -EINVAL;
+
+	if (!ice_get_vf_vsi(vf))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * ice_vf_start_ctrl_vsi
+ * @vf: pointer to the VF structure
+ *
+ * Allocate ctrl_vsi for the first time and open the ctrl_vsi port for VF
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vf_start_ctrl_vsi(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *ctrl_vsi;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI)
+		return -EEXIST;
+
+	ctrl_vsi = ice_vf_ctrl_vsi_setup(vf);
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "Could not setup control VSI for VF %d\n",
+			vf->vf_id);
+		return -ENOMEM;
+	}
+
+	err = ice_vsi_open_ctrl(ctrl_vsi);
+	if (err) {
+		dev_dbg(dev, "Could not open control VSI for VF %d\n",
+			vf->vf_id);
+		goto err_vsi_open;
+	}
+
+	return 0;
+
+err_vsi_open:
+	ice_vsi_release(ctrl_vsi);
+	if (vf->ctrl_vsi_idx != ICE_NO_VSI) {
+		pf->vsi[vf->ctrl_vsi_idx] = NULL;
+		vf->ctrl_vsi_idx = ICE_NO_VSI;
+	}
+	return err;
+}
+
+/**
+ * ice_vc_fdir_alloc_prof - allocate profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_alloc_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	if (!fdir->fdir_prof) {
+		fdir->fdir_prof = devm_kcalloc(ice_pf_to_dev(vf->pf),
+					       ICE_FLTR_PTYPE_MAX,
+					       sizeof(*fdir->fdir_prof),
+					       GFP_KERNEL);
+		if (!fdir->fdir_prof)
+			return -ENOMEM;
+	}
+
+	if (!fdir->fdir_prof[flow]) {
+		fdir->fdir_prof[flow] = devm_kzalloc(ice_pf_to_dev(vf->pf),
+						     sizeof(**fdir->fdir_prof),
+						     GFP_KERNEL);
+		if (!fdir->fdir_prof[flow])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_free_prof - free profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ */
+static void
+ice_vc_fdir_free_prof(struct ice_vf *vf, enum ice_fltr_ptype flow)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	if (!fdir->fdir_prof)
+		return;
+
+	if (!fdir->fdir_prof[flow])
+		return;
+
+	devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof[flow]);
+	fdir->fdir_prof[flow] = NULL;
+}
+
+/**
+ * ice_vc_fdir_free_prof_all - free all the profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_free_prof_all(struct ice_vf *vf)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	enum ice_fltr_ptype flow;
+
+	if (!fdir->fdir_prof)
+		return;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX; flow++)
+		ice_vc_fdir_free_prof(vf, flow);
+
+	devm_kfree(ice_pf_to_dev(vf->pf), fdir->fdir_prof);
+	fdir->fdir_prof = NULL;
+}
+
+/**
+ * ice_vc_fdir_parse_flow_fld
+ * @proto_hdr: virtual channel protocol filter header
+ * @conf: FDIR configuration for each filter
+ * @fld: field type array
+ * @fld_cnt: field counter
+ *
+ * Parse the virtual channel filter header and store them into field type array
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_flow_fld(struct virtchnl_proto_hdr *proto_hdr,
+			   struct virtchnl_fdir_fltr_conf *conf,
+			   enum ice_flow_field *fld, int *fld_cnt)
+{
+	struct virtchnl_proto_hdr hdr;
+	u32 i;
+
+	memcpy(&hdr, proto_hdr, sizeof(hdr));
+
+	for (i = 0; (i < ARRAY_SIZE(fdir_inset_map)) &&
+	     VIRTCHNL_GET_PROTO_HDR_FIELD(&hdr); i++)
+		if (VIRTCHNL_TEST_PROTO_HDR(&hdr, fdir_inset_map[i].field)) {
+			if (fdir_inset_map[i].mask &&
+			    ((fdir_inset_map[i].mask & conf->inset_flag) !=
+			     fdir_inset_map[i].flag))
+				continue;
+
+			fld[*fld_cnt] = fdir_inset_map[i].fld;
+			*fld_cnt += 1;
+			if (*fld_cnt >= ICE_FLOW_FIELD_IDX_MAX)
+				return -EINVAL;
+			VIRTCHNL_DEL_PROTO_HDR_FIELD(&hdr,
+						     fdir_inset_map[i].field);
+		}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_fld
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Parse the virtual channel add msg buffer's field vector and store them into
+ * flow's packet segment field
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_fld(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			 struct virtchnl_fdir_fltr_conf *conf,
+			 struct ice_flow_seg_info *seg)
+{
+	struct virtchnl_fdir_rule *rule = &fltr->rule_cfg;
+	enum ice_flow_field fld[ICE_FLOW_FIELD_IDX_MAX];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct virtchnl_proto_hdrs *proto;
+	int fld_cnt = 0;
+	int i;
+
+	proto = &rule->proto_hdrs;
+	for (i = 0; i < proto->count; i++) {
+		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+		int ret;
+
+		ret = ice_vc_fdir_parse_flow_fld(hdr, conf, fld, &fld_cnt);
+		if (ret)
+			return ret;
+	}
+
+	if (fld_cnt == 0) {
+		dev_dbg(dev, "Empty input set for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < fld_cnt; i++)
+		ice_flow_set_fld(seg, fld[i],
+				 ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_set_flow_hdr - config the flow's packet segment header
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @seg: array of one or more packet segments that describe the flow
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_flow_hdr(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf,
+			 struct ice_flow_seg_info *seg)
+{
+	enum ice_fltr_ptype flow = conf->input.flow_type;
+	enum ice_fdir_tunnel_type ttype = conf->ttype;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NON_IP_L2:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ETH_NON_IP);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_AH:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP:
+	case ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER:
+		if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU) {
+			ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_IP |
+					  ICE_FLOW_SEG_HDR_IPV4 |
+					  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		} else if (ttype == ICE_FDIR_TUNNEL_TYPE_GTPU_EH) {
+			ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_GTPU_EH |
+					  ICE_FLOW_SEG_HDR_GTPU_IP |
+					  ICE_FLOW_SEG_HDR_IPV4 |
+					  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		} else {
+			dev_dbg(dev, "Invalid tunnel type 0x%x for VF %d\n",
+				flow, vf->vf_id);
+			return -EINVAL;
+		}
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+				  ICE_FLOW_SEG_HDR_IPV4 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_L2TPV3 |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_ESP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_AH:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_AH |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_NAT_T_ESP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_NODE |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_PFCP_SESSION |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_TCP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_UDP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_SCTP |
+				  ICE_FLOW_SEG_HDR_IPV6 |
+				  ICE_FLOW_SEG_HDR_IPV_OTHER);
+		break;
+	default:
+		dev_dbg(dev, "Invalid flow type 0x%x for VF %d failed\n",
+			flow, vf->vf_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof - remove profile for this filter flow type
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ */
+static void
+ice_vc_fdir_rem_prof(struct ice_vf *vf, enum ice_fltr_ptype flow, int tun)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	struct ice_fd_hw_prof *vf_prof;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vf_vsi;
+	struct device *dev;
+	struct ice_hw *hw;
+	u64 prof_id;
+	int i;
+
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	if (!fdir->fdir_prof || !fdir->fdir_prof[flow])
+		return;
+
+	vf_prof = fdir->fdir_prof[flow];
+
+	vf_vsi = ice_get_vf_vsi(vf);
+	if (!vf_vsi) {
+		dev_dbg(dev, "NULL vf %d vsi pointer\n", vf->vf_id);
+		return;
+	}
+
+	if (!fdir->prof_entry_cnt[flow][tun])
+		return;
+
+	prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num,
+				   flow, tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+	for (i = 0; i < fdir->prof_entry_cnt[flow][tun]; i++)
+		if (vf_prof->entry_h[i][tun]) {
+			u16 vsi_num = ice_get_hw_vsi_num(hw, vf_prof->vsi_h[i]);
+
+			ice_rem_prof_id_flow(hw, ICE_BLK_FD, vsi_num, prof_id);
+			ice_flow_rem_entry(hw, ICE_BLK_FD,
+					   vf_prof->entry_h[i][tun]);
+			vf_prof->entry_h[i][tun] = 0;
+		}
+
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+	devm_kfree(dev, vf_prof->fdir_seg[tun]);
+	vf_prof->fdir_seg[tun] = NULL;
+
+	for (i = 0; i < vf_prof->cnt; i++)
+		vf_prof->vsi_h[i] = 0;
+
+	fdir->prof_entry_cnt[flow][tun] = 0;
+}
+
+/**
+ * ice_vc_fdir_rem_prof_all - remove profile for this VF
+ * @vf: pointer to the VF structure
+ */
+static void ice_vc_fdir_rem_prof_all(struct ice_vf *vf)
+{
+	enum ice_fltr_ptype flow;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+	     flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		ice_vc_fdir_rem_prof(vf, flow, 0);
+		ice_vc_fdir_rem_prof(vf, flow, 1);
+	}
+}
+
+/**
+ * ice_vc_fdir_reset_cnt_all - reset all FDIR counters for this VF FDIR
+ * @fdir: pointer to the VF FDIR structure
+ */
+static void ice_vc_fdir_reset_cnt_all(struct ice_vf_fdir *fdir)
+{
+	enum ice_fltr_ptype flow;
+
+	for (flow = ICE_FLTR_PTYPE_NONF_NONE;
+	     flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		fdir->fdir_fltr_cnt[flow][0] = 0;
+		fdir->fdir_fltr_cnt[flow][1] = 0;
+	}
+}
+
+/**
+ * ice_vc_fdir_has_prof_conflict
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ *
+ * Check if @conf has conflicting profile with existing profiles
+ *
+ * Return: true on success, and false on error.
+ */
+static bool
+ice_vc_fdir_has_prof_conflict(struct ice_vf *vf,
+			      struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct ice_fdir_fltr *desc;
+
+	list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
+		struct virtchnl_fdir_fltr_conf *existing_conf;
+		enum ice_fltr_ptype flow_type_a, flow_type_b;
+		struct ice_fdir_fltr *a, *b;
+
+		existing_conf = to_fltr_conf_from_desc(desc);
+		a = &existing_conf->input;
+		b = &conf->input;
+		flow_type_a = a->flow_type;
+		flow_type_b = b->flow_type;
+
+		/* No need to compare two rules with different tunnel types or
+		 * with the same protocol type.
+		 */
+		if (existing_conf->ttype != conf->ttype ||
+		    flow_type_a == flow_type_b)
+			continue;
+
+		switch (flow_type_a) {
+		case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+				return true;
+			break;
+		case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV4_SCTP)
+				return true;
+			break;
+		case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_OTHER)
+				return true;
+			break;
+		case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+			if (flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
+			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+			    flow_type_b == ICE_FLTR_PTYPE_NONF_IPV6_SCTP)
+				return true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return false;
+}
+
+/**
+ * ice_vc_fdir_write_flow_prof
+ * @vf: pointer to the VF structure
+ * @flow: filter flow type
+ * @seg: array of one or more packet segments that describe the flow
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Write the flow's profile config and packet segment into the hardware
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_write_flow_prof(struct ice_vf *vf, enum ice_fltr_ptype flow,
+			    struct ice_flow_seg_info *seg, int tun)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+	struct ice_vsi *vf_vsi, *ctrl_vsi;
+	struct ice_flow_seg_info *old_seg;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_fd_hw_prof *vf_prof;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u64 entry1_h = 0;
+	u64 entry2_h = 0;
+	u64 prof_id;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vf_vsi = ice_get_vf_vsi(vf);
+	if (!vf_vsi)
+		return -EINVAL;
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	vf_prof = fdir->fdir_prof[flow];
+	old_seg = vf_prof->fdir_seg[tun];
+	if (old_seg) {
+		if (!memcmp(old_seg, seg, sizeof(*seg))) {
+			dev_dbg(dev, "Duplicated profile for VF %d!\n",
+				vf->vf_id);
+			return -EEXIST;
+		}
+
+		if (fdir->fdir_fltr_cnt[flow][tun]) {
+			ret = -EINVAL;
+			dev_dbg(dev, "Input set conflicts for VF %d\n",
+				vf->vf_id);
+			goto err_exit;
+		}
+
+		/* remove previously allocated profile */
+		ice_vc_fdir_rem_prof(vf, flow, tun);
+	}
+
+	prof_id = ICE_FLOW_PROF_FD(vf_vsi->vsi_num, flow,
+				   tun ? ICE_FLTR_PTYPE_MAX : 0);
+
+	ret = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+				tun + 1, &prof);
+	if (ret) {
+		dev_dbg(dev, "Could not add VSI flow 0x%x for VF %d\n",
+			flow, vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+				 vf_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				 seg, &entry1_h);
+	if (ret) {
+		dev_dbg(dev, "Could not add flow 0x%x VSI entry for VF %d\n",
+			flow, vf->vf_id);
+		goto err_prof;
+	}
+
+	ret = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, vf_vsi->idx,
+				 ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				 seg, &entry2_h);
+	if (ret) {
+		dev_dbg(dev,
+			"Could not add flow 0x%x Ctrl VSI entry for VF %d\n",
+			flow, vf->vf_id);
+		goto err_entry_1;
+	}
+
+	vf_prof->fdir_seg[tun] = seg;
+	vf_prof->cnt = 0;
+	fdir->prof_entry_cnt[flow][tun] = 0;
+
+	vf_prof->entry_h[vf_prof->cnt][tun] = entry1_h;
+	vf_prof->vsi_h[vf_prof->cnt] = vf_vsi->idx;
+	vf_prof->cnt++;
+	fdir->prof_entry_cnt[flow][tun]++;
+
+	vf_prof->entry_h[vf_prof->cnt][tun] = entry2_h;
+	vf_prof->vsi_h[vf_prof->cnt] = ctrl_vsi->idx;
+	vf_prof->cnt++;
+	fdir->prof_entry_cnt[flow][tun]++;
+
+	return 0;
+
+err_entry_1:
+	ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+			     ice_get_hw_vsi_num(hw, vf_vsi->idx), prof_id);
+	ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+err_exit:
+	return ret;
+}
+
+/**
+ * ice_vc_fdir_config_input_set
+ * @vf: pointer to the VF structure
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ * @tun: 0 implies non-tunnel type filter, 1 implies tunnel type filter
+ *
+ * Config the input set type and value for virtual channel add msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_config_input_set(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			     struct virtchnl_fdir_fltr_conf *conf, int tun)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_flow_seg_info *seg;
+	enum ice_fltr_ptype flow;
+	int ret;
+
+	ret = ice_vc_fdir_has_prof_conflict(vf, conf);
+	if (ret) {
+		dev_dbg(dev, "Found flow profile conflict for VF %d\n",
+			vf->vf_id);
+		return ret;
+	}
+
+	flow = input->flow_type;
+	ret = ice_vc_fdir_alloc_prof(vf, flow);
+	if (ret) {
+		dev_dbg(dev, "Alloc flow prof for VF %d failed\n", vf->vf_id);
+		return ret;
+	}
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	ret = ice_vc_fdir_set_flow_fld(vf, fltr, conf, seg);
+	if (ret) {
+		dev_dbg(dev, "Set flow field for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_set_flow_hdr(vf, conf, seg);
+	if (ret) {
+		dev_dbg(dev, "Set flow hdr for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_write_flow_prof(vf, flow, seg, tun);
+	if (ret == -EEXIST) {
+		devm_kfree(dev, seg);
+	} else if (ret) {
+		dev_dbg(dev, "Write flow profile for VF %d failed\n",
+			vf->vf_id);
+		goto err_exit;
+	}
+
+	return 0;
+
+err_exit:
+	devm_kfree(dev, seg);
+	return ret;
+}
+
+/**
+ * ice_vc_fdir_parse_pattern
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's pattern and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_pattern(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			  struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+	enum virtchnl_proto_hdr_type l3 = VIRTCHNL_PROTO_HDR_NONE;
+	enum virtchnl_proto_hdr_type l4 = VIRTCHNL_PROTO_HDR_NONE;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_fdir_fltr *input = &conf->input;
+	int i;
+
+	if (proto->count > VIRTCHNL_MAX_NUM_PROTO_HDRS) {
+		dev_dbg(dev, "Invalid protocol count:0x%x for VF %d\n",
+			proto->count, vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < proto->count; i++) {
+		struct virtchnl_proto_hdr *hdr = &proto->proto_hdr[i];
+		struct ip_esp_hdr *esph;
+		struct ip_auth_hdr *ah;
+		struct sctphdr *sctph;
+		struct ipv6hdr *ip6h;
+		struct udphdr *udph;
+		struct tcphdr *tcph;
+		struct ethhdr *eth;
+		struct iphdr *iph;
+		u8 s_field;
+		u8 *rawh;
+
+		switch (hdr->type) {
+		case VIRTCHNL_PROTO_HDR_ETH:
+			eth = (struct ethhdr *)hdr->buffer;
+			input->flow_type = ICE_FLTR_PTYPE_NON_IP_L2;
+
+			if (hdr->field_selector)
+				input->ext_data.ether_type = eth->h_proto;
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV4:
+			iph = (struct iphdr *)hdr->buffer;
+			l3 = VIRTCHNL_PROTO_HDR_IPV4;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+
+			if (hdr->field_selector) {
+				input->ip.v4.src_ip = iph->saddr;
+				input->ip.v4.dst_ip = iph->daddr;
+				input->ip.v4.tos = iph->tos;
+				input->ip.v4.proto = iph->protocol;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_IPV6:
+			ip6h = (struct ipv6hdr *)hdr->buffer;
+			l3 = VIRTCHNL_PROTO_HDR_IPV6;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+
+			if (hdr->field_selector) {
+				memcpy(input->ip.v6.src_ip,
+				       ip6h->saddr.in6_u.u6_addr8,
+				       sizeof(ip6h->saddr));
+				memcpy(input->ip.v6.dst_ip,
+				       ip6h->daddr.in6_u.u6_addr8,
+				       sizeof(ip6h->daddr));
+				input->ip.v6.tc = ((u8)(ip6h->priority) << 4) |
+						  (ip6h->flow_lbl[0] >> 4);
+				input->ip.v6.proto = ip6h->nexthdr;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_TCP:
+			tcph = (struct tcphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = tcph->source;
+					input->ip.v4.dst_port = tcph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = tcph->source;
+					input->ip.v6.dst_port = tcph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_UDP:
+			udph = (struct udphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = udph->source;
+					input->ip.v4.dst_port = udph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = udph->source;
+					input->ip.v6.dst_port = udph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_SCTP:
+			sctph = (struct sctphdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type =
+					ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type =
+					ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4) {
+					input->ip.v4.src_port = sctph->source;
+					input->ip.v4.dst_port = sctph->dest;
+				} else if (l3 == VIRTCHNL_PROTO_HDR_IPV6) {
+					input->ip.v6.src_port = sctph->source;
+					input->ip.v6.dst_port = sctph->dest;
+				}
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_L2TPV3:
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3;
+
+			if (hdr->field_selector)
+				input->l2tpv3_data.session_id = *((__be32 *)hdr->buffer);
+			break;
+		case VIRTCHNL_PROTO_HDR_ESP:
+			esph = (struct ip_esp_hdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+			    l4 == VIRTCHNL_PROTO_HDR_UDP)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+				 l4 == VIRTCHNL_PROTO_HDR_UDP)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 &&
+				 l4 == VIRTCHNL_PROTO_HDR_NONE)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_ESP;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 &&
+				 l4 == VIRTCHNL_PROTO_HDR_NONE)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_ESP;
+
+			if (l4 == VIRTCHNL_PROTO_HDR_UDP)
+				conf->inset_flag |= FDIR_INSET_FLAG_ESP_UDP;
+			else
+				conf->inset_flag |= FDIR_INSET_FLAG_ESP_IPSEC;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.sec_parm_idx = esph->spi;
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.sec_parm_idx = esph->spi;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_AH:
+			ah = (struct ip_auth_hdr *)hdr->buffer;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_AH;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_AH;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.sec_parm_idx = ah->spi;
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.sec_parm_idx = ah->spi;
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_PFCP:
+			rawh = (u8 *)hdr->buffer;
+			s_field = (rawh[0] >> PFCP_S_OFFSET) & PFCP_S_MASK;
+			if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 0)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV4 && s_field == 1)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 0)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE;
+			else if (l3 == VIRTCHNL_PROTO_HDR_IPV6 && s_field == 1)
+				input->flow_type = ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION;
+
+			if (hdr->field_selector) {
+				if (l3 == VIRTCHNL_PROTO_HDR_IPV4)
+					input->ip.v4.dst_port = cpu_to_be16(PFCP_PORT_NR);
+				else if (l3 == VIRTCHNL_PROTO_HDR_IPV6)
+					input->ip.v6.dst_port = cpu_to_be16(PFCP_PORT_NR);
+			}
+			break;
+		case VIRTCHNL_PROTO_HDR_GTPU_IP:
+			rawh = (u8 *)hdr->buffer;
+			input->flow_type = ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER;
+
+			if (hdr->field_selector)
+				input->gtpu_data.teid = *(__be32 *)(&rawh[GTPU_TEID_OFFSET]);
+			conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU;
+			break;
+		case VIRTCHNL_PROTO_HDR_GTPU_EH:
+			rawh = (u8 *)hdr->buffer;
+
+			if (hdr->field_selector)
+				input->gtpu_data.qfi = rawh[GTPU_EH_QFI_OFFSET] & GTPU_EH_QFI_MASK;
+			conf->ttype = ICE_FDIR_TUNNEL_TYPE_GTPU_EH;
+			break;
+		default:
+			dev_dbg(dev, "Invalid header type 0x:%x for VF %d\n",
+				hdr->type, vf->vf_id);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_parse_action
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Parse the virtual channel filter's action and store them into conf
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_parse_action(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			 struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct virtchnl_filter_action_set *as = &fltr->rule_cfg.action_set;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_fdir_fltr *input = &conf->input;
+	u32 dest_num = 0;
+	u32 mark_num = 0;
+	int i;
+
+	if (as->count > VIRTCHNL_MAX_NUM_ACTIONS) {
+		dev_dbg(dev, "Invalid action numbers:0x%x for VF %d\n",
+			as->count, vf->vf_id);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < as->count; i++) {
+		struct virtchnl_filter_action *action = &as->actions[i];
+
+		switch (action->type) {
+		case VIRTCHNL_ACTION_PASSTHRU:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_OTHER;
+			break;
+		case VIRTCHNL_ACTION_DROP:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+			break;
+		case VIRTCHNL_ACTION_QUEUE:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+			input->q_index = action->act_conf.queue.index;
+			break;
+		case VIRTCHNL_ACTION_Q_REGION:
+			dest_num++;
+			input->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QGROUP;
+			input->q_index = action->act_conf.queue.index;
+			input->q_region = action->act_conf.queue.region;
+			break;
+		case VIRTCHNL_ACTION_MARK:
+			mark_num++;
+			input->fltr_id = action->act_conf.mark_id;
+			input->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_THREE;
+			break;
+		default:
+			dev_dbg(dev, "Invalid action type:0x%x for VF %d\n",
+				action->type, vf->vf_id);
+			return -EINVAL;
+		}
+	}
+
+	if (dest_num == 0 || dest_num >= 2) {
+		dev_dbg(dev, "Invalid destination action for VF %d\n",
+			vf->vf_id);
+		return -EINVAL;
+	}
+
+	if (mark_num >= 2) {
+		dev_dbg(dev, "Too many mark actions for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vc_validate_fdir_fltr - validate the virtual channel filter
+ * @vf: pointer to the VF info
+ * @fltr: virtual channel add cmd buffer
+ * @conf: FDIR configuration for each filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_validate_fdir_fltr(struct ice_vf *vf, struct virtchnl_fdir_add *fltr,
+			  struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct virtchnl_proto_hdrs *proto = &fltr->rule_cfg.proto_hdrs;
+	int ret;
+
+	if (!ice_vc_validate_pattern(vf, proto))
+		return -EINVAL;
+
+	ret = ice_vc_fdir_parse_pattern(vf, fltr, conf);
+	if (ret)
+		return ret;
+
+	return ice_vc_fdir_parse_action(vf, fltr, conf);
+}
+
+/**
+ * ice_vc_fdir_comp_rules - compare if two filter rules have the same value
+ * @conf_a: FDIR configuration for filter a
+ * @conf_b: FDIR configuration for filter b
+ *
+ * Return: 0 on success, and other on error.
+ */
+static bool
+ice_vc_fdir_comp_rules(struct virtchnl_fdir_fltr_conf *conf_a,
+		       struct virtchnl_fdir_fltr_conf *conf_b)
+{
+	struct ice_fdir_fltr *a = &conf_a->input;
+	struct ice_fdir_fltr *b = &conf_b->input;
+
+	if (conf_a->ttype != conf_b->ttype)
+		return false;
+	if (a->flow_type != b->flow_type)
+		return false;
+	if (memcmp(&a->ip, &b->ip, sizeof(a->ip)))
+		return false;
+	if (memcmp(&a->mask, &b->mask, sizeof(a->mask)))
+		return false;
+	if (memcmp(&a->gtpu_data, &b->gtpu_data, sizeof(a->gtpu_data)))
+		return false;
+	if (memcmp(&a->gtpu_mask, &b->gtpu_mask, sizeof(a->gtpu_mask)))
+		return false;
+	if (memcmp(&a->l2tpv3_data, &b->l2tpv3_data, sizeof(a->l2tpv3_data)))
+		return false;
+	if (memcmp(&a->l2tpv3_mask, &b->l2tpv3_mask, sizeof(a->l2tpv3_mask)))
+		return false;
+	if (memcmp(&a->ext_data, &b->ext_data, sizeof(a->ext_data)))
+		return false;
+	if (memcmp(&a->ext_mask, &b->ext_mask, sizeof(a->ext_mask)))
+		return false;
+
+	return true;
+}
+
+/**
+ * ice_vc_fdir_is_dup_fltr
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ *
+ * Check if there is duplicated rule with same conf value
+ *
+ * Return: 0 true success, and false on error.
+ */
+static bool
+ice_vc_fdir_is_dup_fltr(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf)
+{
+	struct ice_fdir_fltr *desc;
+	bool ret;
+
+	list_for_each_entry(desc, &vf->fdir.fdir_rule_list, fltr_node) {
+		struct virtchnl_fdir_fltr_conf *node =
+				to_fltr_conf_from_desc(desc);
+
+		ret = ice_vc_fdir_comp_rules(node, conf);
+		if (ret)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_vc_fdir_insert_entry
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: pointer to ID value allocated by driver
+ *
+ * Insert FDIR conf entry into list and allocate ID for this filter
+ *
+ * Return: 0 true success, and other on error.
+ */
+static int
+ice_vc_fdir_insert_entry(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf, u32 *id)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	int i;
+
+	/* alloc ID corresponding with conf */
+	i = idr_alloc(&vf->fdir.fdir_rule_idr, conf, 0,
+		      ICE_FDIR_MAX_FLTRS, GFP_KERNEL);
+	if (i < 0)
+		return -EINVAL;
+	*id = i;
+
+	list_add(&input->fltr_node, &vf->fdir.fdir_rule_list);
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_remove_entry - remove FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @id: filter rule's ID
+ */
+static void
+ice_vc_fdir_remove_entry(struct ice_vf *vf,
+			 struct virtchnl_fdir_fltr_conf *conf, u32 id)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+
+	idr_remove(&vf->fdir.fdir_rule_idr, id);
+	list_del(&input->fltr_node);
+}
+
+/**
+ * ice_vc_fdir_lookup_entry - lookup FDIR conf entry by ID value
+ * @vf: pointer to the VF info
+ * @id: filter rule's ID
+ *
+ * Return: NULL on error, and other on success.
+ */
+static struct virtchnl_fdir_fltr_conf *
+ice_vc_fdir_lookup_entry(struct ice_vf *vf, u32 id)
+{
+	return idr_find(&vf->fdir.fdir_rule_idr, id);
+}
+
+/**
+ * ice_vc_fdir_flush_entry - remove all FDIR conf entry
+ * @vf: pointer to the VF info
+ */
+static void ice_vc_fdir_flush_entry(struct ice_vf *vf)
+{
+	struct virtchnl_fdir_fltr_conf *conf;
+	struct ice_fdir_fltr *desc, *temp;
+
+	list_for_each_entry_safe(desc, temp,
+				 &vf->fdir.fdir_rule_list, fltr_node) {
+		conf = to_fltr_conf_from_desc(desc);
+		list_del(&desc->fltr_node);
+		devm_kfree(ice_pf_to_dev(vf->pf), conf);
+	}
+}
+
+/**
+ * ice_vc_fdir_write_fltr - write filter rule into hardware
+ * @vf: pointer to the VF info
+ * @conf: FDIR configuration for each filter
+ * @add: true implies add rule, false implies del rules
+ * @is_tun: false implies non-tunnel type filter, true implies tunnel filter
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int ice_vc_fdir_write_fltr(struct ice_vf *vf,
+				  struct virtchnl_fdir_fltr_conf *conf,
+				  bool add, bool is_tun)
+{
+	struct ice_fdir_fltr *input = &conf->input;
+	struct ice_vsi *vsi, *ctrl_vsi;
+	struct ice_fltr_desc desc;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int ret;
+	u8 *pkt;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	hw = &pf->hw;
+	vsi = ice_get_vf_vsi(vf);
+	if (!vsi) {
+		dev_dbg(dev, "Invalid vsi for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	input->dest_vsi = vsi->idx;
+	input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW;
+
+	ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx];
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "Invalid ctrl_vsi for VF %d\n", vf->vf_id);
+		return -EINVAL;
+	}
+
+	pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+
+	ice_fdir_get_prgm_desc(hw, input, &desc, add);
+	ret = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+	if (ret) {
+		dev_dbg(dev, "Gen training pkt for VF %d ptype %d failed\n",
+			vf->vf_id, input->flow_type);
+		goto err_free_pkt;
+	}
+
+	ret = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+	if (ret)
+		goto err_free_pkt;
+
+	return 0;
+
+err_free_pkt:
+	devm_kfree(dev, pkt);
+	return ret;
+}
+
+/**
+ * ice_vf_fdir_timer - FDIR program waiting timer interrupt handler
+ * @t: pointer to timer_list
+ */
+static void ice_vf_fdir_timer(struct timer_list *t)
+{
+	struct ice_vf_fdir_ctx *ctx_irq = from_timer(ctx_irq, t, rx_tmr);
+	struct ice_vf_fdir_ctx *ctx_done;
+	struct ice_vf_fdir *fdir;
+	unsigned long flags;
+	struct ice_vf *vf;
+	struct ice_pf *pf;
+
+	fdir = container_of(ctx_irq, struct ice_vf_fdir, ctx_irq);
+	vf = container_of(fdir, struct ice_vf, fdir);
+	ctx_done = &fdir->ctx_done;
+	pf = vf->pf;
+	spin_lock_irqsave(&fdir->ctx_lock, flags);
+	if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+	ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+	ctx_done->conf = ctx_irq->conf;
+	ctx_done->stat = ICE_FDIR_CTX_TIMEOUT;
+	ctx_done->v_opcode = ctx_irq->v_opcode;
+	spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+	set_bit(ICE_FD_VF_FLUSH_CTX, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vc_fdir_irq_handler - ctrl_vsi Rx queue interrupt handler
+ * @ctrl_vsi: pointer to a VF's CTRL VSI
+ * @rx_desc: pointer to FDIR Rx queue descriptor
+ */
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+			union ice_32b_rx_flex_desc *rx_desc)
+{
+	struct ice_pf *pf = ctrl_vsi->back;
+	struct ice_vf *vf = ctrl_vsi->vf;
+	struct ice_vf_fdir_ctx *ctx_done;
+	struct ice_vf_fdir_ctx *ctx_irq;
+	struct ice_vf_fdir *fdir;
+	unsigned long flags;
+	struct device *dev;
+	int ret;
+
+	if (WARN_ON(!vf))
+		return;
+
+	fdir = &vf->fdir;
+	ctx_done = &fdir->ctx_done;
+	ctx_irq = &fdir->ctx_irq;
+	dev = ice_pf_to_dev(pf);
+	spin_lock_irqsave(&fdir->ctx_lock, flags);
+	if (!(ctx_irq->flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	ctx_irq->flags &= ~ICE_VF_FDIR_CTX_VALID;
+
+	ctx_done->flags |= ICE_VF_FDIR_CTX_VALID;
+	ctx_done->conf = ctx_irq->conf;
+	ctx_done->stat = ICE_FDIR_CTX_IRQ;
+	ctx_done->v_opcode = ctx_irq->v_opcode;
+	memcpy(&ctx_done->rx_desc, rx_desc, sizeof(*rx_desc));
+	spin_unlock_irqrestore(&fdir->ctx_lock, flags);
+
+	ret = del_timer(&ctx_irq->rx_tmr);
+	if (!ret)
+		dev_err(dev, "VF %d: Unexpected inactive timer!\n", vf->vf_id);
+
+	set_bit(ICE_FD_VF_FLUSH_CTX, pf->state);
+	ice_service_task_schedule(pf);
+}
+
+/**
+ * ice_vf_fdir_dump_info - dump FDIR information for diagnosis
+ * @vf: pointer to the VF info
+ */
+static void ice_vf_fdir_dump_info(struct ice_vf *vf)
+{
+	struct ice_vsi *vf_vsi;
+	u32 fd_size, fd_cnt;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	u16 vsi_num;
+
+	pf = vf->pf;
+	hw = &pf->hw;
+	dev = ice_pf_to_dev(pf);
+	vf_vsi = ice_get_vf_vsi(vf);
+	if (!vf_vsi) {
+		dev_dbg(dev, "VF %d: invalid VSI pointer\n", vf->vf_id);
+		return;
+	}
+
+	vsi_num = ice_get_hw_vsi_num(hw, vf_vsi->idx);
+
+	fd_size = rd32(hw, VSIQF_FD_SIZE(vsi_num));
+	fd_cnt = rd32(hw, VSIQF_FD_CNT(vsi_num));
+	dev_dbg(dev, "VF %d: space allocated: guar:0x%x, be:0x%x, space consumed: guar:0x%x, be:0x%x\n",
+		vf->vf_id,
+		(fd_size & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+		(fd_size & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S,
+		(fd_cnt & VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S,
+		(fd_cnt & VSIQF_FD_CNT_FD_BCNT_M) >> VSIQF_FD_CNT_FD_BCNT_S);
+}
+
+/**
+ * ice_vf_verify_rx_desc - verify received FDIR programming status descriptor
+ * @vf: pointer to the VF info
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vf_verify_rx_desc(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+		      enum virtchnl_fdir_prgm_status *status)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	u32 stat_err, error, prog_id;
+	int ret;
+
+	stat_err = le16_to_cpu(ctx->rx_desc.wb.status_error0);
+	if (((stat_err & ICE_FXD_FLTR_WB_QW1_DD_M) >>
+	    ICE_FXD_FLTR_WB_QW1_DD_S) != ICE_FXD_FLTR_WB_QW1_DD_YES) {
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: Desc Done not set\n", vf->vf_id);
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	prog_id = (stat_err & ICE_FXD_FLTR_WB_QW1_PROG_ID_M) >>
+		ICE_FXD_FLTR_WB_QW1_PROG_ID_S;
+	if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD &&
+	    ctx->v_opcode != VIRTCHNL_OP_ADD_FDIR_FILTER) {
+		dev_err(dev, "VF %d: Desc show add, but ctx not",
+			vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_DEL &&
+	    ctx->v_opcode != VIRTCHNL_OP_DEL_FDIR_FILTER) {
+		dev_err(dev, "VF %d: Desc show del, but ctx not",
+			vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_M) >>
+		ICE_FXD_FLTR_WB_QW1_FAIL_S;
+	if (error == ICE_FXD_FLTR_WB_QW1_FAIL_YES) {
+		if (prog_id == ICE_FXD_FLTR_WB_QW1_PROG_ADD) {
+			dev_err(dev, "VF %d, Failed to add FDIR rule due to no space in the table",
+				vf->vf_id);
+			*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		} else {
+			dev_err(dev, "VF %d, Failed to remove FDIR rule, attempt to remove non-existent entry",
+				vf->vf_id);
+			*status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+		}
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	error = (stat_err & ICE_FXD_FLTR_WB_QW1_FAIL_PROF_M) >>
+		ICE_FXD_FLTR_WB_QW1_FAIL_PROF_S;
+	if (error == ICE_FXD_FLTR_WB_QW1_FAIL_PROF_YES) {
+		dev_err(dev, "VF %d: Profile matching error", vf->vf_id);
+		*status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		ret = -EINVAL;
+		goto err_exit;
+	}
+
+	*status = VIRTCHNL_FDIR_SUCCESS;
+
+	return 0;
+
+err_exit:
+	ice_vf_fdir_dump_info(vf);
+	return ret;
+}
+
+/**
+ * ice_vc_add_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director add command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_add_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+			  enum virtchnl_fdir_prgm_status status,
+			  bool success)
+{
+	struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum virtchnl_status_code v_ret;
+	struct virtchnl_fdir_add *resp;
+	int ret, len, is_tun;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	len = sizeof(*resp);
+	resp = kzalloc(len, GFP_KERNEL);
+	if (!resp) {
+		len = 0;
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+		goto err_exit;
+	}
+
+	if (!success)
+		goto err_exit;
+
+	is_tun = 0;
+	resp->status = status;
+	resp->flow_id = conf->flow_id;
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]++;
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+
+	dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+		vf->vf_id, conf->flow_id,
+		(ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+		"add" : "del");
+	return ret;
+
+err_exit:
+	if (resp)
+		resp->status = status;
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+	devm_kfree(dev, conf);
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+	return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr_post
+ * @vf: pointer to the VF structure
+ * @ctx: FDIR context info for post processing
+ * @status: virtchnl FDIR program status
+ * @success: true implies success, false implies failure
+ *
+ * Post process for flow director del command. If success, then do post process
+ * and send back success msg by virtchnl. Otherwise, do context reversion and
+ * send back failure msg by virtchnl.
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_del_fdir_fltr_post(struct ice_vf *vf, struct ice_vf_fdir_ctx *ctx,
+			  enum virtchnl_fdir_prgm_status status,
+			  bool success)
+{
+	struct virtchnl_fdir_fltr_conf *conf = ctx->conf;
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum virtchnl_status_code v_ret;
+	struct virtchnl_fdir_del *resp;
+	int ret, len, is_tun;
+
+	v_ret = VIRTCHNL_STATUS_SUCCESS;
+	len = sizeof(*resp);
+	resp = kzalloc(len, GFP_KERNEL);
+	if (!resp) {
+		len = 0;
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "VF %d: Alloc resp buf fail", vf->vf_id);
+		goto err_exit;
+	}
+
+	if (!success)
+		goto err_exit;
+
+	is_tun = 0;
+	resp->status = status;
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+	vf->fdir.fdir_fltr_cnt[conf->input.flow_type][is_tun]--;
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+
+	dev_dbg(dev, "VF %d: flow_id:0x%X, FDIR %s success!\n",
+		vf->vf_id, conf->flow_id,
+		(ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER) ?
+		"add" : "del");
+	devm_kfree(dev, conf);
+	return ret;
+
+err_exit:
+	if (resp)
+		resp->status = status;
+	if (success)
+		devm_kfree(dev, conf);
+
+	ret = ice_vc_send_msg_to_vf(vf, ctx->v_opcode, v_ret,
+				    (u8 *)resp, len);
+	kfree(resp);
+	return ret;
+}
+
+/**
+ * ice_flush_fdir_ctx
+ * @pf: pointer to the PF structure
+ *
+ * Flush all the pending event on ctx_done list and process them.
+ */
+void ice_flush_fdir_ctx(struct ice_pf *pf)
+{
+	struct ice_vf *vf;
+	unsigned int bkt;
+
+	if (!test_and_clear_bit(ICE_FD_VF_FLUSH_CTX, pf->state))
+		return;
+
+	mutex_lock(&pf->vfs.table_lock);
+	ice_for_each_vf(pf, bkt, vf) {
+		struct device *dev = ice_pf_to_dev(pf);
+		enum virtchnl_fdir_prgm_status status;
+		struct ice_vf_fdir_ctx *ctx;
+		unsigned long flags;
+		int ret;
+
+		if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+			continue;
+
+		if (vf->ctrl_vsi_idx == ICE_NO_VSI)
+			continue;
+
+		ctx = &vf->fdir.ctx_done;
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		if (!(ctx->flags & ICE_VF_FDIR_CTX_VALID)) {
+			spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+			continue;
+		}
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+		WARN_ON(ctx->stat == ICE_FDIR_CTX_READY);
+		if (ctx->stat == ICE_FDIR_CTX_TIMEOUT) {
+			status = VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT;
+			dev_err(dev, "VF %d: ctrl_vsi irq timeout\n",
+				vf->vf_id);
+			goto err_exit;
+		}
+
+		ret = ice_vf_verify_rx_desc(vf, ctx, &status);
+		if (ret)
+			goto err_exit;
+
+		if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+			ice_vc_add_fdir_fltr_post(vf, ctx, status, true);
+		else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+			ice_vc_del_fdir_fltr_post(vf, ctx, status, true);
+		else
+			dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+		continue;
+err_exit:
+		if (ctx->v_opcode == VIRTCHNL_OP_ADD_FDIR_FILTER)
+			ice_vc_add_fdir_fltr_post(vf, ctx, status, false);
+		else if (ctx->v_opcode == VIRTCHNL_OP_DEL_FDIR_FILTER)
+			ice_vc_del_fdir_fltr_post(vf, ctx, status, false);
+		else
+			dev_err(dev, "VF %d: Unsupported opcode\n", vf->vf_id);
+
+		spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+		ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+	}
+	mutex_unlock(&pf->vfs.table_lock);
+}
+
+/**
+ * ice_vc_fdir_set_irq_ctx - set FDIR context info for later IRQ handler
+ * @vf: pointer to the VF structure
+ * @conf: FDIR configuration for each filter
+ * @v_opcode: virtual channel operation code
+ *
+ * Return: 0 on success, and other on error.
+ */
+static int
+ice_vc_fdir_set_irq_ctx(struct ice_vf *vf, struct virtchnl_fdir_fltr_conf *conf,
+			enum virtchnl_ops v_opcode)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_vf_fdir_ctx *ctx;
+	unsigned long flags;
+
+	ctx = &vf->fdir.ctx_irq;
+	spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+	if ((vf->fdir.ctx_irq.flags & ICE_VF_FDIR_CTX_VALID) ||
+	    (vf->fdir.ctx_done.flags & ICE_VF_FDIR_CTX_VALID)) {
+		spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+		dev_dbg(dev, "VF %d: Last request is still in progress\n",
+			vf->vf_id);
+		return -EBUSY;
+	}
+	ctx->flags |= ICE_VF_FDIR_CTX_VALID;
+	spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+
+	ctx->conf = conf;
+	ctx->v_opcode = v_opcode;
+	ctx->stat = ICE_FDIR_CTX_READY;
+	timer_setup(&ctx->rx_tmr, ice_vf_fdir_timer, 0);
+
+	mod_timer(&ctx->rx_tmr, round_jiffies(msecs_to_jiffies(10) + jiffies));
+
+	return 0;
+}
+
+/**
+ * ice_vc_fdir_clear_irq_ctx - clear FDIR context info for IRQ handler
+ * @vf: pointer to the VF structure
+ *
+ * Return: 0 on success, and other on error.
+ */
+static void ice_vc_fdir_clear_irq_ctx(struct ice_vf *vf)
+{
+	struct ice_vf_fdir_ctx *ctx = &vf->fdir.ctx_irq;
+	unsigned long flags;
+
+	del_timer(&ctx->rx_tmr);
+	spin_lock_irqsave(&vf->fdir.ctx_lock, flags);
+	ctx->flags &= ~ICE_VF_FDIR_CTX_VALID;
+	spin_unlock_irqrestore(&vf->fdir.ctx_lock, flags);
+}
+
+/**
+ * ice_vc_add_fdir_fltr - add a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_fdir_add *fltr = (struct virtchnl_fdir_add *)msg;
+	struct virtchnl_fdir_add *stat = NULL;
+	struct virtchnl_fdir_fltr_conf *conf;
+	enum virtchnl_status_code v_ret;
+	struct device *dev;
+	struct ice_pf *pf;
+	int is_tun = 0;
+	int len = 0;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vf_start_ctrl_vsi(vf);
+	if (ret && (ret != -EEXIST)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_err(dev, "Init FDIR for VF %d failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_exit;
+	}
+
+	stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+	if (!stat) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	conf = devm_kzalloc(dev, sizeof(*conf), GFP_KERNEL);
+	if (!conf) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc conf for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	len = sizeof(*stat);
+	ret = ice_vc_validate_fdir_fltr(vf, fltr, conf);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_INVALID;
+		dev_dbg(dev, "Invalid FDIR filter from VF %d\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
+	if (fltr->validate_only) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_SUCCESS;
+		devm_kfree(dev, conf);
+		ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER,
+					    v_ret, (u8 *)stat, len);
+		goto exit;
+	}
+
+	ret = ice_vc_fdir_config_input_set(vf, fltr, conf, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT;
+		dev_err(dev, "VF %d: FDIR input set configure failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_is_dup_fltr(vf, conf);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_EXIST;
+		dev_dbg(dev, "VF %d: duplicated FDIR rule detected\n",
+			vf->vf_id);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_insert_entry(vf, conf, &conf->flow_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: insert FDIR list failed\n", vf->vf_id);
+		goto err_free_conf;
+	}
+
+	ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_ADD_FDIR_FILTER);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+		goto err_rem_entry;
+	}
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, true, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_clr_irq;
+	}
+
+exit:
+	kfree(stat);
+	return ret;
+
+err_clr_irq:
+	ice_vc_fdir_clear_irq_ctx(vf);
+err_rem_entry:
+	ice_vc_fdir_remove_entry(vf, conf, conf->flow_id);
+err_free_conf:
+	devm_kfree(dev, conf);
+err_exit:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_FDIR_FILTER, v_ret,
+				    (u8 *)stat, len);
+	kfree(stat);
+	return ret;
+}
+
+/**
+ * ice_vc_del_fdir_fltr - delete a FDIR filter for VF by the msg buffer
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * Return: 0 on success, and other on error.
+ */
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg)
+{
+	struct virtchnl_fdir_del *fltr = (struct virtchnl_fdir_del *)msg;
+	struct virtchnl_fdir_del *stat = NULL;
+	struct virtchnl_fdir_fltr_conf *conf;
+	enum virtchnl_status_code v_ret;
+	struct device *dev;
+	struct ice_pf *pf;
+	int is_tun = 0;
+	int len = 0;
+	int ret;
+
+	pf = vf->pf;
+	dev = ice_pf_to_dev(pf);
+	ret = ice_vc_fdir_param_check(vf, fltr->vsi_id);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		dev_dbg(dev, "Parameter check for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+	if (!stat) {
+		v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY;
+		dev_dbg(dev, "Alloc stat for VF %d failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	len = sizeof(*stat);
+
+	conf = ice_vc_fdir_lookup_entry(vf, fltr->flow_id);
+	if (!conf) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST;
+		dev_dbg(dev, "VF %d: FDIR invalid flow_id:0x%X\n",
+			vf->vf_id, fltr->flow_id);
+		goto err_exit;
+	}
+
+	/* Just return failure when ctrl_vsi idx is invalid */
+	if (vf->ctrl_vsi_idx == ICE_NO_VSI) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "Invalid FDIR ctrl_vsi for VF %d\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_set_irq_ctx(vf, conf, VIRTCHNL_OP_DEL_FDIR_FILTER);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_dbg(dev, "VF %d: set FDIR context failed\n", vf->vf_id);
+		goto err_exit;
+	}
+
+	ret = ice_vc_fdir_write_fltr(vf, conf, false, is_tun);
+	if (ret) {
+		v_ret = VIRTCHNL_STATUS_SUCCESS;
+		stat->status = VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE;
+		dev_err(dev, "VF %d: writing FDIR rule failed, ret:%d\n",
+			vf->vf_id, ret);
+		goto err_del_tmr;
+	}
+
+	kfree(stat);
+
+	return ret;
+
+err_del_tmr:
+	ice_vc_fdir_clear_irq_ctx(vf);
+err_exit:
+	ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_FDIR_FILTER, v_ret,
+				    (u8 *)stat, len);
+	kfree(stat);
+	return ret;
+}
+
+/**
+ * ice_vf_fdir_init - init FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_init(struct ice_vf *vf)
+{
+	struct ice_vf_fdir *fdir = &vf->fdir;
+
+	idr_init(&fdir->fdir_rule_idr);
+	INIT_LIST_HEAD(&fdir->fdir_rule_list);
+
+	spin_lock_init(&fdir->ctx_lock);
+	fdir->ctx_irq.flags = 0;
+	fdir->ctx_done.flags = 0;
+	ice_vc_fdir_reset_cnt_all(fdir);
+}
+
+/**
+ * ice_vf_fdir_exit - destroy FDIR resource for VF
+ * @vf: pointer to the VF info
+ */
+void ice_vf_fdir_exit(struct ice_vf *vf)
+{
+	ice_vc_fdir_flush_entry(vf);
+	idr_destroy(&vf->fdir.fdir_rule_idr);
+	ice_vc_fdir_rem_prof_all(vf);
+	ice_vc_fdir_free_prof_all(vf);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
new file mode 100644
index 0000000000..c5bcc8d748
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2021, Intel Corporation. */
+
+#ifndef _ICE_VIRTCHNL_FDIR_H_
+#define _ICE_VIRTCHNL_FDIR_H_
+
+struct ice_vf;
+struct ice_pf;
+struct ice_vsi;
+
+enum ice_fdir_ctx_stat {
+	ICE_FDIR_CTX_READY,
+	ICE_FDIR_CTX_IRQ,
+	ICE_FDIR_CTX_TIMEOUT,
+};
+
+struct ice_vf_fdir_ctx {
+	struct timer_list rx_tmr;
+	enum virtchnl_ops v_opcode;
+	enum ice_fdir_ctx_stat stat;
+	union ice_32b_rx_flex_desc rx_desc;
+#define ICE_VF_FDIR_CTX_VALID		BIT(0)
+	u32 flags;
+
+	void *conf;
+};
+
+/* VF FDIR information structure */
+struct ice_vf_fdir {
+	u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+	int prof_entry_cnt[ICE_FLTR_PTYPE_MAX][ICE_FD_HW_SEG_MAX];
+	struct ice_fd_hw_prof **fdir_prof;
+
+	struct idr fdir_rule_idr;
+	struct list_head fdir_rule_list;
+
+	spinlock_t ctx_lock; /* protects FDIR context info */
+	struct ice_vf_fdir_ctx ctx_irq;
+	struct ice_vf_fdir_ctx ctx_done;
+};
+
+#ifdef CONFIG_PCI_IOV
+int ice_vc_add_fdir_fltr(struct ice_vf *vf, u8 *msg);
+int ice_vc_del_fdir_fltr(struct ice_vf *vf, u8 *msg);
+void ice_vf_fdir_init(struct ice_vf *vf);
+void ice_vf_fdir_exit(struct ice_vf *vf);
+void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi,
+			union ice_32b_rx_flex_desc *rx_desc);
+void ice_flush_fdir_ctx(struct ice_pf *pf);
+#else
+static inline void
+ice_vc_fdir_irq_handler(struct ice_vsi *ctrl_vsi, union ice_32b_rx_flex_desc *rx_desc) { }
+static inline void ice_flush_fdir_ctx(struct ice_pf *pf) { }
+#endif /* CONFIG_PCI_IOV */
+#endif /* _ICE_VIRTCHNL_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan.h b/drivers/net/ethernet/intel/ice/ice_vlan.h
new file mode 100644
index 0000000000..bc4550a031
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VLAN_H_
+#define _ICE_VLAN_H_
+
+#include <linux/types.h>
+#include "ice_type.h"
+
+struct ice_vlan {
+	u16 tpid;
+	u16 vid;
+	u8 prio;
+};
+
+#define ICE_VLAN(tpid, vid, prio) ((struct ice_vlan){ tpid, vid, prio })
+
+#endif /* _ICE_VLAN_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c
new file mode 100644
index 0000000000..1279c1ffe3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_pkg_get_supported_vlan_mode - determine if DDP supports Double VLAN mode
+ * @hw: pointer to the HW struct
+ * @dvm: output variable to determine if DDP supports DVM(true) or SVM(false)
+ */
+static int
+ice_pkg_get_supported_vlan_mode(struct ice_hw *hw, bool *dvm)
+{
+	u16 meta_init_size = sizeof(struct ice_meta_init_section);
+	struct ice_meta_init_section *sect;
+	struct ice_buf_build *bld;
+	int status;
+
+	/* if anything fails, we assume there is no DVM support */
+	*dvm = false;
+
+	bld = ice_pkg_buf_alloc_single_section(hw,
+					       ICE_SID_RXPARSER_METADATA_INIT,
+					       meta_init_size, (void **)&sect);
+	if (!bld)
+		return -ENOMEM;
+
+	/* only need to read a single section */
+	sect->count = cpu_to_le16(1);
+	sect->offset = cpu_to_le16(ICE_META_VLAN_MODE_ENTRY);
+
+	status = ice_aq_upload_section(hw,
+				       (struct ice_buf_hdr *)ice_pkg_buf(bld),
+				       ICE_PKG_BUF_SIZE, NULL);
+	if (!status) {
+		DECLARE_BITMAP(entry, ICE_META_INIT_BITS);
+		u32 arr[ICE_META_INIT_DW_CNT];
+		u16 i;
+
+		/* convert to host bitmap format */
+		for (i = 0; i < ICE_META_INIT_DW_CNT; i++)
+			arr[i] = le32_to_cpu(sect->entry.bm[i]);
+
+		bitmap_from_arr32(entry, arr, (u16)ICE_META_INIT_BITS);
+
+		/* check if DVM is supported */
+		*dvm = test_bit(ICE_META_VLAN_MODE_BIT, entry);
+	}
+
+	ice_pkg_buf_free(hw, bld);
+
+	return status;
+}
+
+/**
+ * ice_aq_get_vlan_mode - get the VLAN mode of the device
+ * @hw: pointer to the HW structure
+ * @get_params: structure FW fills in based on the current VLAN mode config
+ *
+ * Get VLAN Mode Parameters (0x020D)
+ */
+static int
+ice_aq_get_vlan_mode(struct ice_hw *hw,
+		     struct ice_aqc_get_vlan_mode *get_params)
+{
+	struct ice_aq_desc desc;
+
+	if (!get_params)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc,
+				      ice_aqc_opc_get_vlan_mode_parameters);
+
+	return ice_aq_send_cmd(hw, &desc, get_params, sizeof(*get_params),
+			       NULL);
+}
+
+/**
+ * ice_aq_is_dvm_ena - query FW to check if double VLAN mode is enabled
+ * @hw: pointer to the HW structure
+ *
+ * Returns true if the hardware/firmware is configured in double VLAN mode,
+ * else return false signaling that the hardware/firmware is configured in
+ * single VLAN mode.
+ *
+ * Also, return false if this call fails for any reason (i.e. firmware doesn't
+ * support this AQ call).
+ */
+static bool ice_aq_is_dvm_ena(struct ice_hw *hw)
+{
+	struct ice_aqc_get_vlan_mode get_params = { 0 };
+	int status;
+
+	status = ice_aq_get_vlan_mode(hw, &get_params);
+	if (status) {
+		ice_debug(hw, ICE_DBG_AQ, "Failed to get VLAN mode, status %d\n",
+			  status);
+		return false;
+	}
+
+	return (get_params.vlan_mode & ICE_AQ_VLAN_MODE_DVM_ENA);
+}
+
+/**
+ * ice_is_dvm_ena - check if double VLAN mode is enabled
+ * @hw: pointer to the HW structure
+ *
+ * The device is configured in single or double VLAN mode on initialization and
+ * this cannot be dynamically changed during runtime. Based on this there is no
+ * need to make an AQ call every time the driver needs to know the VLAN mode.
+ * Instead, use the cached VLAN mode.
+ */
+bool ice_is_dvm_ena(struct ice_hw *hw)
+{
+	return hw->dvm_ena;
+}
+
+/**
+ * ice_cache_vlan_mode - cache VLAN mode after DDP is downloaded
+ * @hw: pointer to the HW structure
+ *
+ * This is only called after downloading the DDP and after the global
+ * configuration lock has been released because all ports on a device need to
+ * cache the VLAN mode.
+ */
+static void ice_cache_vlan_mode(struct ice_hw *hw)
+{
+	hw->dvm_ena = ice_aq_is_dvm_ena(hw) ? true : false;
+}
+
+/**
+ * ice_pkg_supports_dvm - find out if DDP supports DVM
+ * @hw: pointer to the HW structure
+ */
+static bool ice_pkg_supports_dvm(struct ice_hw *hw)
+{
+	bool pkg_supports_dvm;
+	int status;
+
+	status = ice_pkg_get_supported_vlan_mode(hw, &pkg_supports_dvm);
+	if (status) {
+		ice_debug(hw, ICE_DBG_PKG, "Failed to get supported VLAN mode, status %d\n",
+			  status);
+		return false;
+	}
+
+	return pkg_supports_dvm;
+}
+
+/**
+ * ice_fw_supports_dvm - find out if FW supports DVM
+ * @hw: pointer to the HW structure
+ */
+static bool ice_fw_supports_dvm(struct ice_hw *hw)
+{
+	struct ice_aqc_get_vlan_mode get_vlan_mode = { 0 };
+	int status;
+
+	/* If firmware returns success, then it supports DVM, else it only
+	 * supports SVM
+	 */
+	status = ice_aq_get_vlan_mode(hw, &get_vlan_mode);
+	if (status) {
+		ice_debug(hw, ICE_DBG_NVM, "Failed to get VLAN mode, status %d\n",
+			  status);
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_is_dvm_supported - check if Double VLAN Mode is supported
+ * @hw: pointer to the hardware structure
+ *
+ * Returns true if Double VLAN Mode (DVM) is supported and false if only Single
+ * VLAN Mode (SVM) is supported. In order for DVM to be supported the DDP and
+ * firmware must support it, otherwise only SVM is supported. This function
+ * should only be called while the global config lock is held and after the
+ * package has been successfully downloaded.
+ */
+static bool ice_is_dvm_supported(struct ice_hw *hw)
+{
+	if (!ice_pkg_supports_dvm(hw)) {
+		ice_debug(hw, ICE_DBG_PKG, "DDP doesn't support DVM\n");
+		return false;
+	}
+
+	if (!ice_fw_supports_dvm(hw)) {
+		ice_debug(hw, ICE_DBG_PKG, "FW doesn't support DVM\n");
+		return false;
+	}
+
+	return true;
+}
+
+#define ICE_EXTERNAL_VLAN_ID_FV_IDX			11
+#define ICE_SW_LKUP_VLAN_LOC_LKUP_IDX			1
+#define ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX		2
+#define ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX		2
+#define ICE_PKT_FLAGS_0_TO_15_FV_IDX			1
+static struct ice_update_recipe_lkup_idx_params ice_dvm_dflt_recipes[] = {
+	{
+		/* Update recipe ICE_SW_LKUP_VLAN to filter based on the
+		 * outer/single VLAN in DVM
+		 */
+		.rid = ICE_SW_LKUP_VLAN,
+		.fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX,
+		.ignore_valid = true,
+		.mask = 0,
+		.mask_valid = false, /* use pre-existing mask */
+		.lkup_idx = ICE_SW_LKUP_VLAN_LOC_LKUP_IDX,
+	},
+	{
+		/* Update recipe ICE_SW_LKUP_VLAN to filter based on the VLAN
+		 * packet flags to support VLAN filtering on multiple VLAN
+		 * ethertypes (i.e. 0x8100 and 0x88a8) in DVM
+		 */
+		.rid = ICE_SW_LKUP_VLAN,
+		.fv_idx = ICE_PKT_FLAGS_0_TO_15_FV_IDX,
+		.ignore_valid = false,
+		.mask = ICE_PKT_VLAN_MASK,
+		.mask_valid = true,
+		.lkup_idx = ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX,
+	},
+	{
+		/* Update recipe ICE_SW_LKUP_PROMISC_VLAN to filter based on the
+		 * outer/single VLAN in DVM
+		 */
+		.rid = ICE_SW_LKUP_PROMISC_VLAN,
+		.fv_idx = ICE_EXTERNAL_VLAN_ID_FV_IDX,
+		.ignore_valid = true,
+		.mask = 0,
+		.mask_valid = false,  /* use pre-existing mask */
+		.lkup_idx = ICE_SW_LKUP_PROMISC_VLAN_LOC_LKUP_IDX,
+	},
+};
+
+/**
+ * ice_dvm_update_dflt_recipes - update default switch recipes in DVM
+ * @hw: hardware structure used to update the recipes
+ */
+static int ice_dvm_update_dflt_recipes(struct ice_hw *hw)
+{
+	unsigned long i;
+
+	for (i = 0; i < ARRAY_SIZE(ice_dvm_dflt_recipes); i++) {
+		struct ice_update_recipe_lkup_idx_params *params;
+		int status;
+
+		params = &ice_dvm_dflt_recipes[i];
+
+		status = ice_update_recipe_lkup_idx(hw, params);
+		if (status) {
+			ice_debug(hw, ICE_DBG_INIT, "Failed to update RID %d lkup_idx %d fv_idx %d mask_valid %s mask 0x%04x\n",
+				  params->rid, params->lkup_idx, params->fv_idx,
+				  params->mask_valid ? "true" : "false",
+				  params->mask);
+			return status;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ice_aq_set_vlan_mode - set the VLAN mode of the device
+ * @hw: pointer to the HW structure
+ * @set_params: requested VLAN mode configuration
+ *
+ * Set VLAN Mode Parameters (0x020C)
+ */
+static int
+ice_aq_set_vlan_mode(struct ice_hw *hw,
+		     struct ice_aqc_set_vlan_mode *set_params)
+{
+	u8 rdma_packet, mng_vlan_prot_id;
+	struct ice_aq_desc desc;
+
+	if (!set_params)
+		return -EINVAL;
+
+	if (set_params->l2tag_prio_tagging > ICE_AQ_VLAN_PRIO_TAG_MAX)
+		return -EINVAL;
+
+	rdma_packet = set_params->rdma_packet;
+	if (rdma_packet != ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING &&
+	    rdma_packet != ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING)
+		return -EINVAL;
+
+	mng_vlan_prot_id = set_params->mng_vlan_prot_id;
+	if (mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER &&
+	    mng_vlan_prot_id != ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER)
+		return -EINVAL;
+
+	ice_fill_dflt_direct_cmd_desc(&desc,
+				      ice_aqc_opc_set_vlan_mode_parameters);
+	desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+	return ice_aq_send_cmd(hw, &desc, set_params, sizeof(*set_params),
+			       NULL);
+}
+
+/**
+ * ice_set_dvm - sets up software and hardware for double VLAN mode
+ * @hw: pointer to the hardware structure
+ */
+static int ice_set_dvm(struct ice_hw *hw)
+{
+	struct ice_aqc_set_vlan_mode params = { 0 };
+	int status;
+
+	params.l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_OUTER_CTAG;
+	params.rdma_packet = ICE_AQ_DVM_VLAN_RDMA_PKT_FLAG_SETTING;
+	params.mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_OUTER;
+
+	status = ice_aq_set_vlan_mode(hw, &params);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to set double VLAN mode parameters, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_dvm_update_dflt_recipes(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to update default recipes for double VLAN mode, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_aq_set_port_params(hw->port_info, true, NULL);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to set port in double VLAN mode, status %d\n",
+			  status);
+		return status;
+	}
+
+	status = ice_set_dvm_boost_entries(hw);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to set boost TCAM entries for double VLAN mode, status %d\n",
+			  status);
+		return status;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_set_svm - set single VLAN mode
+ * @hw: pointer to the HW structure
+ */
+static int ice_set_svm(struct ice_hw *hw)
+{
+	struct ice_aqc_set_vlan_mode *set_params;
+	int status;
+
+	status = ice_aq_set_port_params(hw->port_info, false, NULL);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Failed to set port parameters for single VLAN mode\n");
+		return status;
+	}
+
+	set_params = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*set_params),
+				  GFP_KERNEL);
+	if (!set_params)
+		return -ENOMEM;
+
+	/* default configuration for SVM configurations */
+	set_params->l2tag_prio_tagging = ICE_AQ_VLAN_PRIO_TAG_INNER_CTAG;
+	set_params->rdma_packet = ICE_AQ_SVM_VLAN_RDMA_PKT_FLAG_SETTING;
+	set_params->mng_vlan_prot_id = ICE_AQ_VLAN_MNG_PROTOCOL_ID_INNER;
+
+	status = ice_aq_set_vlan_mode(hw, set_params);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to configure port in single VLAN mode\n");
+
+	devm_kfree(ice_hw_to_dev(hw), set_params);
+	return status;
+}
+
+/**
+ * ice_set_vlan_mode
+ * @hw: pointer to the HW structure
+ */
+int ice_set_vlan_mode(struct ice_hw *hw)
+{
+	if (!ice_is_dvm_supported(hw))
+		return 0;
+
+	if (!ice_set_dvm(hw))
+		return 0;
+
+	return ice_set_svm(hw);
+}
+
+/**
+ * ice_print_dvm_not_supported - print if DDP and/or FW doesn't support DVM
+ * @hw: pointer to the HW structure
+ *
+ * The purpose of this function is to print that  QinQ is not supported due to
+ * incompatibilty from the DDP and/or FW. This will give a hint to the user to
+ * update one and/or both components if they expect QinQ functionality.
+ */
+static void ice_print_dvm_not_supported(struct ice_hw *hw)
+{
+	bool pkg_supports_dvm = ice_pkg_supports_dvm(hw);
+	bool fw_supports_dvm = ice_fw_supports_dvm(hw);
+
+	if (!fw_supports_dvm && !pkg_supports_dvm)
+		dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package and NVM to versions that support QinQ.\n");
+	else if (!pkg_supports_dvm)
+		dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your DDP package to a version that supports QinQ.\n");
+	else if (!fw_supports_dvm)
+		dev_info(ice_hw_to_dev(hw), "QinQ functionality cannot be enabled on this device. Update your NVM to a version that supports QinQ.\n");
+}
+
+/**
+ * ice_post_pkg_dwnld_vlan_mode_cfg - configure VLAN mode after DDP download
+ * @hw: pointer to the HW structure
+ *
+ * This function is meant to configure any VLAN mode specific functionality
+ * after the global configuration lock has been released and the DDP has been
+ * downloaded.
+ *
+ * Since only one PF downloads the DDP and configures the VLAN mode there needs
+ * to be a way to configure the other PFs after the DDP has been downloaded and
+ * the global configuration lock has been released. All such code should go in
+ * this function.
+ */
+void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw)
+{
+	ice_cache_vlan_mode(hw);
+
+	if (ice_is_dvm_ena(hw))
+		ice_change_proto_id_to_dvm();
+	else
+		ice_print_dvm_not_supported(hw);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.h b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h
new file mode 100644
index 0000000000..a0fb743d08
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VLAN_MODE_H_
+#define _ICE_VLAN_MODE_H_
+
+struct ice_hw;
+
+bool ice_is_dvm_ena(struct ice_hw *hw);
+int ice_set_vlan_mode(struct ice_hw *hw);
+void ice_post_pkg_dwnld_vlan_mode_cfg(struct ice_hw *hw);
+
+#endif /* _ICE_VLAN_MODE_H */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
new file mode 100644
index 0000000000..76266e709a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.c
@@ -0,0 +1,785 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_vsi_vlan_lib.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
+#include "ice.h"
+
+static void print_invalid_tpid(struct ice_vsi *vsi, u16 tpid)
+{
+	dev_err(ice_pf_to_dev(vsi->back), "%s %d specified invalid VLAN tpid 0x%04x\n",
+		ice_vsi_type_str(vsi->type), vsi->idx, tpid);
+}
+
+/**
+ * validate_vlan - check if the ice_vlan passed in is valid
+ * @vsi: VSI used for printing error message
+ * @vlan: ice_vlan structure to validate
+ *
+ * Return true if the VLAN TPID is valid or if the VLAN TPID is 0 and the VLAN
+ * VID is 0, which allows for non-zero VLAN filters with the specified VLAN TPID
+ * and untagged VLAN 0 filters to be added to the prune list respectively.
+ */
+static bool validate_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	if (vlan->tpid != ETH_P_8021Q && vlan->tpid != ETH_P_8021AD &&
+	    vlan->tpid != ETH_P_QINQ1 && (vlan->tpid || vlan->vid)) {
+		print_invalid_tpid(vsi, vlan->tpid);
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * ice_vsi_add_vlan - default add VLAN implementation for all VSI types
+ * @vsi: VSI being configured
+ * @vlan: VLAN filter to add
+ */
+int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	int err;
+
+	if (!validate_vlan(vsi, vlan))
+		return -EINVAL;
+
+	err = ice_fltr_add_vlan(vsi, vlan);
+	if (err && err != -EEXIST) {
+		dev_err(ice_pf_to_dev(vsi->back), "Failure Adding VLAN %d on VSI %i, status %d\n",
+			vlan->vid, vsi->vsi_num, err);
+		return err;
+	}
+
+	vsi->num_vlan++;
+	return 0;
+}
+
+/**
+ * ice_vsi_del_vlan - default del VLAN implementation for all VSI types
+ * @vsi: VSI being configured
+ * @vlan: VLAN filter to delete
+ */
+int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int err;
+
+	if (!validate_vlan(vsi, vlan))
+		return -EINVAL;
+
+	dev = ice_pf_to_dev(pf);
+
+	err = ice_fltr_remove_vlan(vsi, vlan);
+	if (!err)
+		vsi->num_vlan--;
+	else if (err == -ENOENT || err == -EBUSY)
+		err = 0;
+	else
+		dev_err(dev, "Error removing VLAN %d on VSI %i error: %d\n",
+			vlan->vid, vsi->vsi_num, err);
+
+	return err;
+}
+
+/**
+ * ice_vsi_manage_vlan_insertion - Manage VLAN insertion for the VSI for Tx
+ * @vsi: the VSI being changed
+ */
+static int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	int err;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	/* Here we are configuring the VSI to let the driver add VLAN tags by
+	 * setting inner_vlan_flags to ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL. The actual VLAN tag
+	 * insertion happens in the Tx hot path, in ice_tx_map.
+	 */
+	ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL;
+
+	/* Preserve existing VLAN strip setting */
+	ctxt->info.inner_vlan_flags |= (vsi->info.inner_vlan_flags &
+					ICE_AQ_VSI_INNER_VLAN_EMODE_M);
+
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err) {
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+		goto out;
+	}
+
+	vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+out:
+	kfree(ctxt);
+	return err;
+}
+
+/**
+ * ice_vsi_manage_vlan_stripping - Manage VLAN stripping for the VSI for Rx
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is a enable or disable request
+ */
+static int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	int err;
+
+	/* do not allow modifying VLAN stripping when a port VLAN is configured
+	 * on this VSI
+	 */
+	if (vsi->info.port_based_inner_vlan)
+		return 0;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	/* Here we are configuring what the VSI should do with the VLAN tag in
+	 * the Rx packet. We can either leave the tag in the packet or put it in
+	 * the Rx descriptor.
+	 */
+	if (ena)
+		/* Strip VLAN tag from Rx packet and put it in the desc */
+		ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_STR_BOTH;
+	else
+		/* Disable stripping. Leave tag in packet */
+		ctxt->info.inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_EMODE_NOTHING;
+
+	/* Allow all packets untagged/tagged */
+	ctxt->info.inner_vlan_flags |= ICE_AQ_VSI_INNER_VLAN_TX_MODE_ALL;
+
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID);
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err) {
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %s\n",
+			ena, err, ice_aq_str(hw->adminq.sq_last_status));
+		goto out;
+	}
+
+	vsi->info.inner_vlan_flags = ctxt->info.inner_vlan_flags;
+out:
+	kfree(ctxt);
+	return err;
+}
+
+int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, const u16 tpid)
+{
+	if (tpid != ETH_P_8021Q) {
+		print_invalid_tpid(vsi, tpid);
+		return -EINVAL;
+	}
+
+	return ice_vsi_manage_vlan_stripping(vsi, true);
+}
+
+int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi)
+{
+	return ice_vsi_manage_vlan_stripping(vsi, false);
+}
+
+int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, const u16 tpid)
+{
+	if (tpid != ETH_P_8021Q) {
+		print_invalid_tpid(vsi, tpid);
+		return -EINVAL;
+	}
+
+	return ice_vsi_manage_vlan_insertion(vsi);
+}
+
+int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi)
+{
+	return ice_vsi_manage_vlan_insertion(vsi);
+}
+
+static void
+ice_save_vlan_info(struct ice_aqc_vsi_props *info,
+		   struct ice_vsi_vlan_info *vlan)
+{
+	vlan->sw_flags2 = info->sw_flags2;
+	vlan->inner_vlan_flags = info->inner_vlan_flags;
+	vlan->outer_vlan_flags = info->outer_vlan_flags;
+}
+
+static void
+ice_restore_vlan_info(struct ice_aqc_vsi_props *info,
+		      struct ice_vsi_vlan_info *vlan)
+{
+	info->sw_flags2 = vlan->sw_flags2;
+	info->inner_vlan_flags = vlan->inner_vlan_flags;
+	info->outer_vlan_flags = vlan->outer_vlan_flags;
+}
+
+/**
+ * __ice_vsi_set_inner_port_vlan - set port VLAN VSI context settings to enable a port VLAN
+ * @vsi: the VSI to update
+ * @pvid_info: VLAN ID and QoS used to set the PVID VSI context field
+ */
+static int __ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, u16 pvid_info)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_aqc_vsi_props *info;
+	struct ice_vsi_ctx *ctxt;
+	int ret;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ice_save_vlan_info(&vsi->info, &vsi->vlan_info);
+	ctxt->info = vsi->info;
+	info = &ctxt->info;
+	info->inner_vlan_flags = ICE_AQ_VSI_INNER_VLAN_TX_MODE_ACCEPTUNTAGGED |
+		ICE_AQ_VSI_INNER_VLAN_INSERT_PVID |
+		ICE_AQ_VSI_INNER_VLAN_EMODE_STR;
+	info->sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+	info->port_based_inner_vlan = cpu_to_le16(pvid_info);
+	info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+					   ICE_AQ_VSI_PROP_SW_VALID);
+
+	ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (ret) {
+		dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n",
+			 ret, ice_aq_str(hw->adminq.sq_last_status));
+		goto out;
+	}
+
+	vsi->info.inner_vlan_flags = info->inner_vlan_flags;
+	vsi->info.sw_flags2 = info->sw_flags2;
+	vsi->info.port_based_inner_vlan = info->port_based_inner_vlan;
+out:
+	kfree(ctxt);
+	return ret;
+}
+
+int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	u16 port_vlan_info;
+
+	if (vlan->tpid != ETH_P_8021Q)
+		return -EINVAL;
+
+	if (vlan->prio > 7)
+		return -EINVAL;
+
+	port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT);
+
+	return __ice_vsi_set_inner_port_vlan(vsi, port_vlan_info);
+}
+
+int ice_vsi_clear_inner_port_vlan(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_aqc_vsi_props *info;
+	struct ice_vsi_ctx *ctxt;
+	int ret;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ice_restore_vlan_info(&vsi->info, &vsi->vlan_info);
+	vsi->info.port_based_inner_vlan = 0;
+	ctxt->info = vsi->info;
+	info = &ctxt->info;
+	info->valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID |
+					   ICE_AQ_VSI_PROP_SW_VALID);
+
+	ret = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (ret)
+		dev_err(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %s\n",
+			ret, ice_aq_str(hw->adminq.sq_last_status));
+
+	kfree(ctxt);
+	return ret;
+}
+
+/**
+ * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI
+ * @vsi: VSI to enable or disable VLAN pruning on
+ * @ena: set to true to enable VLAN pruning and false to disable it
+ *
+ * returns 0 if VSI is updated, negative otherwise
+ */
+static int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_vsi_ctx *ctxt;
+	struct ice_pf *pf;
+	int status;
+
+	if (!vsi)
+		return -EINVAL;
+
+	/* Don't enable VLAN pruning if the netdev is currently in promiscuous
+	 * mode. VLAN pruning will be enabled when the interface exits
+	 * promiscuous mode if any VLAN filters are active.
+	 */
+	if (vsi->netdev && vsi->netdev->flags & IFF_PROMISC && ena)
+		return 0;
+
+	pf = vsi->back;
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info = vsi->info;
+
+	if (ena)
+		ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+	else
+		ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+	ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
+
+	status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
+	if (status) {
+		netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %s\n",
+			   ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status,
+			   ice_aq_str(pf->hw.adminq.sq_last_status));
+		goto err_out;
+	}
+
+	vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+
+	kfree(ctxt);
+	return 0;
+
+err_out:
+	kfree(ctxt);
+	return status;
+}
+
+int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi)
+{
+	return ice_cfg_vlan_pruning(vsi, true);
+}
+
+int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi)
+{
+	return ice_cfg_vlan_pruning(vsi, false);
+}
+
+static int ice_cfg_vlan_antispoof(struct ice_vsi *vsi, bool enable)
+{
+	struct ice_vsi_ctx *ctx;
+	int err;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->info.sec_flags = vsi->info.sec_flags;
+	ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+
+	if (enable)
+		ctx->info.sec_flags |= ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+			ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S;
+	else
+		ctx->info.sec_flags &= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+					 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+
+	err = ice_update_vsi(&vsi->back->hw, vsi->idx, ctx, NULL);
+	if (err)
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to configure Tx VLAN anti-spoof %s for VSI %d, error %d\n",
+			enable ? "ON" : "OFF", vsi->vsi_num, err);
+	else
+		vsi->info.sec_flags = ctx->info.sec_flags;
+
+	kfree(ctx);
+
+	return err;
+}
+
+int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi)
+{
+	return ice_cfg_vlan_antispoof(vsi, true);
+}
+
+int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi)
+{
+	return ice_cfg_vlan_antispoof(vsi, false);
+}
+
+/**
+ * tpid_to_vsi_outer_vlan_type - convert from TPID to VSI context based tag_type
+ * @tpid: tpid used to translate into VSI context based tag_type
+ * @tag_type: output variable to hold the VSI context based tag type
+ */
+static int tpid_to_vsi_outer_vlan_type(u16 tpid, u8 *tag_type)
+{
+	switch (tpid) {
+	case ETH_P_8021Q:
+		*tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_8100;
+		break;
+	case ETH_P_8021AD:
+		*tag_type = ICE_AQ_VSI_OUTER_TAG_STAG;
+		break;
+	case ETH_P_QINQ1:
+		*tag_type = ICE_AQ_VSI_OUTER_TAG_VLAN_9100;
+		break;
+	default:
+		*tag_type = 0;
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vsi_ena_outer_stripping - enable outer VLAN stripping
+ * @vsi: VSI to configure
+ * @tpid: TPID to enable outer VLAN stripping for
+ *
+ * Enable outer VLAN stripping via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Since the VSI context only supports a single TPID for insertion and
+ * stripping, setting the TPID for stripping will affect the TPID for insertion.
+ * Callers need to be aware of this limitation.
+ *
+ * Only modify outer VLAN stripping settings and the VLAN TPID. Outer VLAN
+ * insertion settings are unmodified.
+ *
+ * This enables hardware to strip a VLAN tag with the specified TPID to be
+ * stripped from the packet and placed in the receive descriptor.
+ */
+int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	u8 tag_type;
+	int err;
+
+	/* do not allow modifying VLAN stripping when a port VLAN is configured
+	 * on this VSI
+	 */
+	if (vsi->info.port_based_outer_vlan)
+		return 0;
+
+	if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+		return -EINVAL;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+	/* clear current outer VLAN strip settings */
+	ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+		~(ICE_AQ_VSI_OUTER_VLAN_EMODE_M | ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+	ctxt->info.outer_vlan_flags |=
+		((ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW_BOTH <<
+		  ICE_AQ_VSI_OUTER_VLAN_EMODE_S) |
+		 ((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+		  ICE_AQ_VSI_OUTER_TAG_TYPE_M));
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err)
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN stripping failed, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	else
+		vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+	kfree(ctxt);
+	return err;
+}
+
+/**
+ * ice_vsi_dis_outer_stripping - disable outer VLAN stripping
+ * @vsi: VSI to configure
+ *
+ * Disable outer VLAN stripping via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Only modify the outer VLAN stripping settings. The VLAN TPID and outer VLAN
+ * insertion settings are unmodified.
+ *
+ * This tells the hardware to not strip any VLAN tagged packets, thus leaving
+ * them in the packet. This enables software offloaded VLAN stripping and
+ * disables hardware offloaded VLAN stripping.
+ */
+int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	int err;
+
+	if (vsi->info.port_based_outer_vlan)
+		return 0;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+	/* clear current outer VLAN strip settings */
+	ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+		~ICE_AQ_VSI_OUTER_VLAN_EMODE_M;
+	ctxt->info.outer_vlan_flags |= ICE_AQ_VSI_OUTER_VLAN_EMODE_NOTHING <<
+		ICE_AQ_VSI_OUTER_VLAN_EMODE_S;
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err)
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN stripping failed, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	else
+		vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+	kfree(ctxt);
+	return err;
+}
+
+/**
+ * ice_vsi_ena_outer_insertion - enable outer VLAN insertion
+ * @vsi: VSI to configure
+ * @tpid: TPID to enable outer VLAN insertion for
+ *
+ * Enable outer VLAN insertion via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Since the VSI context only supports a single TPID for insertion and
+ * stripping, setting the TPID for insertion will affect the TPID for stripping.
+ * Callers need to be aware of this limitation.
+ *
+ * Only modify outer VLAN insertion settings and the VLAN TPID. Outer VLAN
+ * stripping settings are unmodified.
+ *
+ * This allows a VLAN tag with the specified TPID to be inserted in the transmit
+ * descriptor.
+ */
+int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	u8 tag_type;
+	int err;
+
+	if (vsi->info.port_based_outer_vlan)
+		return 0;
+
+	if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+		return -EINVAL;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+	/* clear current outer VLAN insertion settings */
+	ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+		~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT |
+		  ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+		  ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M |
+		  ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+	ctxt->info.outer_vlan_flags |=
+		((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+		  ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+		 ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M) |
+		((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+		 ICE_AQ_VSI_OUTER_TAG_TYPE_M);
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err)
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for enabling outer VLAN insertion failed, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	else
+		vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+	kfree(ctxt);
+	return err;
+}
+
+/**
+ * ice_vsi_dis_outer_insertion - disable outer VLAN insertion
+ * @vsi: VSI to configure
+ *
+ * Disable outer VLAN insertion via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Only modify the outer VLAN insertion settings. The VLAN TPID and outer VLAN
+ * settings are unmodified.
+ *
+ * This tells the hardware to not allow any VLAN tagged packets in the transmit
+ * descriptor. This enables software offloaded VLAN insertion and disables
+ * hardware offloaded VLAN insertion.
+ */
+int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	int err;
+
+	if (vsi->info.port_based_outer_vlan)
+		return 0;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID);
+	/* clear current outer VLAN insertion settings */
+	ctxt->info.outer_vlan_flags = vsi->info.outer_vlan_flags &
+		~(ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT |
+		  ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M);
+	ctxt->info.outer_vlan_flags |=
+		ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+		((ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ALL <<
+		  ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) &
+		 ICE_AQ_VSI_OUTER_VLAN_TX_MODE_M);
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err)
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for disabling outer VLAN insertion failed, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	else
+		vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+
+	kfree(ctxt);
+	return err;
+}
+
+/**
+ * __ice_vsi_set_outer_port_vlan - set the outer port VLAN and related settings
+ * @vsi: VSI to configure
+ * @vlan_info: packed u16 that contains the VLAN prio and ID
+ * @tpid: TPID of the port VLAN
+ *
+ * Set the port VLAN prio, ID, and TPID.
+ *
+ * Enable VLAN pruning so the VSI doesn't receive any traffic that doesn't match
+ * a VLAN prune rule. The caller should take care to add a VLAN prune rule that
+ * matches the port VLAN ID and TPID.
+ *
+ * Tell hardware to strip outer VLAN tagged packets on receive and don't put
+ * them in the receive descriptor. VSI(s) in port VLANs should not be aware of
+ * the port VLAN ID or TPID they are assigned to.
+ *
+ * Tell hardware to prevent outer VLAN tag insertion on transmit and only allow
+ * untagged outer packets from the transmit descriptor.
+ *
+ * Also, tell the hardware to insert the port VLAN on transmit.
+ */
+static int
+__ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, u16 vlan_info, u16 tpid)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	u8 tag_type;
+	int err;
+
+	if (tpid_to_vsi_outer_vlan_type(tpid, &tag_type))
+		return -EINVAL;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ice_save_vlan_info(&vsi->info, &vsi->vlan_info);
+	ctxt->info = vsi->info;
+
+	ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
+
+	ctxt->info.port_based_outer_vlan = cpu_to_le16(vlan_info);
+	ctxt->info.outer_vlan_flags =
+		(ICE_AQ_VSI_OUTER_VLAN_EMODE_SHOW <<
+		 ICE_AQ_VSI_OUTER_VLAN_EMODE_S) |
+		((tag_type << ICE_AQ_VSI_OUTER_TAG_TYPE_S) &
+		 ICE_AQ_VSI_OUTER_TAG_TYPE_M) |
+		ICE_AQ_VSI_OUTER_VLAN_BLOCK_TX_DESC |
+		(ICE_AQ_VSI_OUTER_VLAN_TX_MODE_ACCEPTUNTAGGED <<
+		 ICE_AQ_VSI_OUTER_VLAN_TX_MODE_S) |
+		ICE_AQ_VSI_OUTER_VLAN_PORT_BASED_INSERT;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID |
+			    ICE_AQ_VSI_PROP_SW_VALID);
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err) {
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for setting outer port based VLAN failed, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+	} else {
+		vsi->info.port_based_outer_vlan = ctxt->info.port_based_outer_vlan;
+		vsi->info.outer_vlan_flags = ctxt->info.outer_vlan_flags;
+		vsi->info.sw_flags2 = ctxt->info.sw_flags2;
+	}
+
+	kfree(ctxt);
+	return err;
+}
+
+/**
+ * ice_vsi_set_outer_port_vlan - public version of __ice_vsi_set_outer_port_vlan
+ * @vsi: VSI to configure
+ * @vlan: ice_vlan structure used to set the port VLAN
+ *
+ * Set the outer port VLAN via VSI context. This function should only be
+ * used if DVM is supported. Also, this function should never be called directly
+ * as it should be part of ice_vsi_vlan_ops if it's needed.
+ *
+ * Use the ice_vlan structure passed in to set this VSI in a port VLAN.
+ */
+int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan)
+{
+	u16 port_vlan_info;
+
+	if (vlan->prio > (VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))
+		return -EINVAL;
+
+	port_vlan_info = vlan->vid | (vlan->prio << VLAN_PRIO_SHIFT);
+
+	return __ice_vsi_set_outer_port_vlan(vsi, port_vlan_info, vlan->tpid);
+}
+
+/**
+ * ice_vsi_clear_outer_port_vlan - clear outer port vlan
+ * @vsi: VSI to configure
+ *
+ * The function is restoring previously set vlan config (saved in
+ * vsi->vlan_info). Setting happens in port vlan configuration.
+ */
+int ice_vsi_clear_outer_port_vlan(struct ice_vsi *vsi)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	struct ice_vsi_ctx *ctxt;
+	int err;
+
+	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+	if (!ctxt)
+		return -ENOMEM;
+
+	ice_restore_vlan_info(&vsi->info, &vsi->vlan_info);
+	vsi->info.port_based_outer_vlan = 0;
+	ctxt->info = vsi->info;
+
+	ctxt->info.valid_sections =
+		cpu_to_le16(ICE_AQ_VSI_PROP_OUTER_TAG_VALID |
+			    ICE_AQ_VSI_PROP_SW_VALID);
+
+	err = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
+	if (err)
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for clearing outer port based VLAN failed, err %d aq_err %s\n",
+			err, ice_aq_str(hw->adminq.sq_last_status));
+
+	kfree(ctxt);
+	return err;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
new file mode 100644
index 0000000000..f0d84d11bd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_lib.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VSI_VLAN_LIB_H_
+#define _ICE_VSI_VLAN_LIB_H_
+
+#include <linux/types.h>
+#include "ice_vlan.h"
+
+struct ice_vsi_vlan_info {
+	u8 sw_flags2;
+	u8 inner_vlan_flags;
+	u8 outer_vlan_flags;
+};
+
+struct ice_vsi;
+
+int ice_vsi_add_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_vsi_del_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+
+int ice_vsi_ena_inner_stripping(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_inner_stripping(struct ice_vsi *vsi);
+int ice_vsi_ena_inner_insertion(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_inner_insertion(struct ice_vsi *vsi);
+int ice_vsi_set_inner_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_vsi_clear_inner_port_vlan(struct ice_vsi *vsi);
+
+int ice_vsi_ena_rx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_dis_rx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_ena_tx_vlan_filtering(struct ice_vsi *vsi);
+int ice_vsi_dis_tx_vlan_filtering(struct ice_vsi *vsi);
+
+int ice_vsi_ena_outer_stripping(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_outer_stripping(struct ice_vsi *vsi);
+int ice_vsi_ena_outer_insertion(struct ice_vsi *vsi, u16 tpid);
+int ice_vsi_dis_outer_insertion(struct ice_vsi *vsi);
+int ice_vsi_set_outer_port_vlan(struct ice_vsi *vsi, struct ice_vlan *vlan);
+int ice_vsi_clear_outer_port_vlan(struct ice_vsi *vsi);
+
+#endif /* _ICE_VSI_VLAN_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
new file mode 100644
index 0000000000..4a6c850d83
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#include "ice_pf_vsi_vlan_ops.h"
+#include "ice_vf_vsi_vlan_ops.h"
+#include "ice_lib.h"
+#include "ice.h"
+
+static int
+op_unsupported_vlan_arg(struct ice_vsi * __always_unused vsi,
+			struct ice_vlan * __always_unused vlan)
+{
+	return -EOPNOTSUPP;
+}
+
+static int
+op_unsupported_tpid_arg(struct ice_vsi *__always_unused vsi,
+			u16 __always_unused tpid)
+{
+	return -EOPNOTSUPP;
+}
+
+static int op_unsupported(struct ice_vsi *__always_unused vsi)
+{
+	return -EOPNOTSUPP;
+}
+
+/* If any new ops are added to the VSI VLAN ops interface then an unsupported
+ * implementation should be set here.
+ */
+static struct ice_vsi_vlan_ops ops_unsupported = {
+	.add_vlan = op_unsupported_vlan_arg,
+	.del_vlan = op_unsupported_vlan_arg,
+	.ena_stripping = op_unsupported_tpid_arg,
+	.dis_stripping = op_unsupported,
+	.ena_insertion = op_unsupported_tpid_arg,
+	.dis_insertion = op_unsupported,
+	.ena_rx_filtering = op_unsupported,
+	.dis_rx_filtering = op_unsupported,
+	.ena_tx_filtering = op_unsupported,
+	.dis_tx_filtering = op_unsupported,
+	.set_port_vlan = op_unsupported_vlan_arg,
+};
+
+/**
+ * ice_vsi_init_unsupported_vlan_ops - init all VSI VLAN ops to unsupported
+ * @vsi: VSI to initialize VSI VLAN ops to unsupported for
+ *
+ * By default all inner and outer VSI VLAN ops return -EOPNOTSUPP. This was done
+ * as oppsed to leaving the ops null to prevent unexpected crashes. Instead if
+ * an unsupported VSI VLAN op is called it will just return -EOPNOTSUPP.
+ *
+ */
+static void ice_vsi_init_unsupported_vlan_ops(struct ice_vsi *vsi)
+{
+	vsi->outer_vlan_ops = ops_unsupported;
+	vsi->inner_vlan_ops = ops_unsupported;
+}
+
+/**
+ * ice_vsi_init_vlan_ops - initialize type specific VSI VLAN ops
+ * @vsi: VSI to initialize ops for
+ *
+ * If any VSI types are added and/or require different ops than the PF or VF VSI
+ * then they will have to add a case here to handle that. Also, VSI type
+ * specific files should be added in the same manner that was done for PF VSI.
+ */
+void ice_vsi_init_vlan_ops(struct ice_vsi *vsi)
+{
+	/* Initialize all VSI types to have unsupported VSI VLAN ops */
+	ice_vsi_init_unsupported_vlan_ops(vsi);
+
+	switch (vsi->type) {
+	case ICE_VSI_PF:
+	case ICE_VSI_SWITCHDEV_CTRL:
+		ice_pf_vsi_init_vlan_ops(vsi);
+		break;
+	case ICE_VSI_VF:
+		ice_vf_vsi_init_vlan_ops(vsi);
+		break;
+	default:
+		dev_dbg(ice_pf_to_dev(vsi->back), "%s does not support VLAN operations\n",
+			ice_vsi_type_str(vsi->type));
+		break;
+	}
+}
+
+/**
+ * ice_get_compat_vsi_vlan_ops - Get VSI VLAN ops based on VLAN mode
+ * @vsi: VSI used to get the VSI VLAN ops
+ *
+ * This function is meant to be used when the caller doesn't know which VLAN ops
+ * to use (i.e. inner or outer). This allows backward compatibility for VLANs
+ * since most of the Outer VSI VLAN functins are not supported when
+ * the device is configured in Single VLAN Mode (SVM).
+ */
+struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi)
+{
+	if (ice_is_dvm_ena(&vsi->back->hw))
+		return &vsi->outer_vlan_ops;
+	else
+		return &vsi->inner_vlan_ops;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
new file mode 100644
index 0000000000..b2d2330ded
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_vsi_vlan_ops.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019-2021, Intel Corporation. */
+
+#ifndef _ICE_VSI_VLAN_OPS_H_
+#define _ICE_VSI_VLAN_OPS_H_
+
+#include "ice_type.h"
+#include "ice_vsi_vlan_lib.h"
+
+struct ice_vsi;
+
+struct ice_vsi_vlan_ops {
+	int (*add_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+	int (*del_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+	int (*ena_stripping)(struct ice_vsi *vsi, const u16 tpid);
+	int (*dis_stripping)(struct ice_vsi *vsi);
+	int (*ena_insertion)(struct ice_vsi *vsi, const u16 tpid);
+	int (*dis_insertion)(struct ice_vsi *vsi);
+	int (*ena_rx_filtering)(struct ice_vsi *vsi);
+	int (*dis_rx_filtering)(struct ice_vsi *vsi);
+	int (*ena_tx_filtering)(struct ice_vsi *vsi);
+	int (*dis_tx_filtering)(struct ice_vsi *vsi);
+	int (*set_port_vlan)(struct ice_vsi *vsi, struct ice_vlan *vlan);
+	int (*clear_port_vlan)(struct ice_vsi *vsi);
+};
+
+void ice_vsi_init_vlan_ops(struct ice_vsi *vsi);
+struct ice_vsi_vlan_ops *ice_get_compat_vsi_vlan_ops(struct ice_vsi *vsi);
+
+#endif /* _ICE_VSI_VLAN_OPS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
new file mode 100644
index 0000000000..307c609137
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -0,0 +1,1204 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019, Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+#include "ice.h"
+#include "ice_base.h"
+#include "ice_type.h"
+#include "ice_xsk.h"
+#include "ice_txrx.h"
+#include "ice_txrx_lib.h"
+#include "ice_lib.h"
+
+static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx)
+{
+	return &rx_ring->xdp_buf[idx];
+}
+
+/**
+ * ice_qp_reset_stats - Resets all stats for rings of given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_vsi_stats *vsi_stat;
+	struct ice_pf *pf;
+
+	pf = vsi->back;
+	if (!pf->vsi_stats)
+		return;
+
+	vsi_stat = pf->vsi_stats[vsi->idx];
+	if (!vsi_stat)
+		return;
+
+	memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0,
+	       sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats));
+	memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0,
+	       sizeof(vsi_stat->tx_ring_stats[q_idx]->stats));
+	if (ice_is_xdp_ena_vsi(vsi))
+		memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0,
+		       sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats));
+}
+
+/**
+ * ice_qp_clean_rings - Cleans all the rings of a given index
+ * @vsi: VSI that contains rings of interest
+ * @q_idx: ring index in array
+ */
+static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx)
+{
+	ice_clean_tx_ring(vsi->tx_rings[q_idx]);
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		synchronize_rcu();
+		ice_clean_tx_ring(vsi->xdp_rings[q_idx]);
+	}
+	ice_clean_rx_ring(vsi->rx_rings[q_idx]);
+}
+
+/**
+ * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector
+ * @vsi: VSI that has netdev
+ * @q_vector: q_vector that has NAPI context
+ * @enable: true for enable, false for disable
+ */
+static void
+ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
+		     bool enable)
+{
+	if (!vsi->netdev || !q_vector)
+		return;
+
+	if (enable)
+		napi_enable(&q_vector->napi);
+	else
+		napi_disable(&q_vector->napi);
+}
+
+/**
+ * ice_qvec_dis_irq - Mask off queue interrupt generation on given ring
+ * @vsi: the VSI that contains queue vector being un-configured
+ * @rx_ring: Rx ring that will have its IRQ disabled
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring,
+		 struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	u16 reg;
+	u32 val;
+
+	/* QINT_TQCTL is being cleared in ice_vsi_stop_tx_ring, so handle
+	 * here only QINT_RQCTL
+	 */
+	reg = rx_ring->reg_idx;
+	val = rd32(hw, QINT_RQCTL(reg));
+	val &= ~QINT_RQCTL_CAUSE_ENA_M;
+	wr32(hw, QINT_RQCTL(reg), val);
+
+	if (q_vector) {
+		wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0);
+		ice_flush(hw);
+		synchronize_irq(q_vector->irq.virq);
+	}
+}
+
+/**
+ * ice_qvec_cfg_msix - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void
+ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	u16 reg_idx = q_vector->reg_idx;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+
+	ice_cfg_itr(hw, q_vector);
+
+	ice_for_each_tx_ring(tx_ring, q_vector->tx)
+		ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx,
+				      q_vector->tx.itr_idx);
+
+	ice_for_each_rx_ring(rx_ring, q_vector->rx)
+		ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx,
+				      q_vector->rx.itr_idx);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qvec_ena_irq - Enable IRQ for given queue vector
+ * @vsi: the VSI that contains queue vector
+ * @q_vector: queue vector
+ */
+static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+
+	ice_irq_dynamic_ena(hw, vsi, q_vector);
+
+	ice_flush(hw);
+}
+
+/**
+ * ice_qp_dis - Disables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_txq_meta txq_meta = { };
+	struct ice_q_vector *q_vector;
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	int timeout = 50;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) {
+		timeout--;
+		if (!timeout)
+			return -EBUSY;
+		usleep_range(1000, 2000);
+	}
+	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+
+	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
+	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
+	if (err)
+		return err;
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(&txq_meta, 0, sizeof(txq_meta));
+		ice_fill_txq_meta(vsi, xdp_ring, &txq_meta);
+		err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring,
+					   &txq_meta);
+		if (err)
+			return err;
+	}
+	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
+	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
+	if (err)
+		return err;
+
+	ice_qvec_toggle_napi(vsi, q_vector, false);
+	ice_qp_clean_rings(vsi, q_idx);
+	ice_qp_reset_stats(vsi, q_idx);
+
+	return 0;
+}
+
+/**
+ * ice_qp_ena - Enables a queue pair
+ * @vsi: VSI of interest
+ * @q_idx: ring index in array
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
+{
+	struct ice_aqc_add_tx_qgrp *qg_buf;
+	struct ice_q_vector *q_vector;
+	struct ice_tx_ring *tx_ring;
+	struct ice_rx_ring *rx_ring;
+	u16 size;
+	int err;
+
+	if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq)
+		return -EINVAL;
+
+	size = struct_size(qg_buf, txqs, 1);
+	qg_buf = kzalloc(size, GFP_KERNEL);
+	if (!qg_buf)
+		return -ENOMEM;
+
+	qg_buf->num_txqs = 1;
+
+	tx_ring = vsi->tx_rings[q_idx];
+	rx_ring = vsi->rx_rings[q_idx];
+	q_vector = rx_ring->q_vector;
+
+	err = ice_vsi_cfg_txq(vsi, tx_ring, qg_buf);
+	if (err)
+		goto free_buf;
+
+	if (ice_is_xdp_ena_vsi(vsi)) {
+		struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx];
+
+		memset(qg_buf, 0, size);
+		qg_buf->num_txqs = 1;
+		err = ice_vsi_cfg_txq(vsi, xdp_ring, qg_buf);
+		if (err)
+			goto free_buf;
+		ice_set_ring_xdp(xdp_ring);
+		ice_tx_xsk_pool(vsi, q_idx);
+	}
+
+	err = ice_vsi_cfg_rxq(rx_ring);
+	if (err)
+		goto free_buf;
+
+	ice_qvec_cfg_msix(vsi, q_vector);
+
+	err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true);
+	if (err)
+		goto free_buf;
+
+	clear_bit(ICE_CFG_BUSY, vsi->state);
+	ice_qvec_toggle_napi(vsi, q_vector, true);
+	ice_qvec_ena_irq(vsi, q_vector);
+
+	netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
+free_buf:
+	kfree(qg_buf);
+	return err;
+}
+
+/**
+ * ice_xsk_pool_disable - disable a buffer pool region
+ * @vsi: Current VSI
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
+{
+	struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid);
+
+	if (!pool)
+		return -EINVAL;
+
+	clear_bit(qid, vsi->af_xdp_zc_qps);
+	xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_pool_enable - enable a buffer pool region
+ * @vsi: Current VSI
+ * @pool: pointer to a requested buffer pool region
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+{
+	int err;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	if (qid >= vsi->netdev->real_num_rx_queues ||
+	    qid >= vsi->netdev->real_num_tx_queues)
+		return -EINVAL;
+
+	err = xsk_pool_dma_map(pool, ice_pf_to_dev(vsi->back),
+			       ICE_RX_DMA_ATTR);
+	if (err)
+		return err;
+
+	set_bit(qid, vsi->af_xdp_zc_qps);
+
+	return 0;
+}
+
+/**
+ * ice_realloc_rx_xdp_bufs - reallocate for either XSK or normal buffer
+ * @rx_ring: Rx ring
+ * @pool_present: is pool for XSK present
+ *
+ * Try allocating memory and return ENOMEM, if failed to allocate.
+ * If allocation was successful, substitute buffer with allocated one.
+ * Returns 0 on success, negative on failure
+ */
+static int
+ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
+{
+	size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) :
+					  sizeof(*rx_ring->rx_buf);
+	void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
+
+	if (!sw_ring)
+		return -ENOMEM;
+
+	if (pool_present) {
+		kfree(rx_ring->rx_buf);
+		rx_ring->rx_buf = NULL;
+		rx_ring->xdp_buf = sw_ring;
+	} else {
+		kfree(rx_ring->xdp_buf);
+		rx_ring->xdp_buf = NULL;
+		rx_ring->rx_buf = sw_ring;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_realloc_zc_buf - reallocate XDP ZC queue pairs
+ * @vsi: Current VSI
+ * @zc: is zero copy set
+ *
+ * Reallocate buffer for rx_rings that might be used by XSK.
+ * XDP requires more memory, than rx_buf provides.
+ * Returns 0 on success, negative on failure
+ */
+int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
+{
+	struct ice_rx_ring *rx_ring;
+	unsigned long q;
+
+	for_each_set_bit(q, vsi->af_xdp_zc_qps,
+			 max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) {
+		rx_ring = vsi->rx_rings[q];
+		if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_xsk_pool_setup - enable/disable a buffer pool region depending on its state
+ * @vsi: Current VSI
+ * @pool: buffer pool to enable/associate to a ring, NULL to disable
+ * @qid: queue ID
+ *
+ * Returns 0 on success, negative on failure
+ */
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
+{
+	bool if_running, pool_present = !!pool;
+	int ret = 0, pool_failure = 0;
+
+	if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
+		netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
+		pool_failure = -EINVAL;
+		goto failure;
+	}
+
+	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+
+	if (if_running) {
+		struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
+
+		ret = ice_qp_dis(vsi, qid);
+		if (ret) {
+			netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
+			goto xsk_pool_if_up;
+		}
+
+		ret = ice_realloc_rx_xdp_bufs(rx_ring, pool_present);
+		if (ret)
+			goto xsk_pool_if_up;
+	}
+
+	pool_failure = pool_present ? ice_xsk_pool_enable(vsi, pool, qid) :
+				      ice_xsk_pool_disable(vsi, qid);
+
+xsk_pool_if_up:
+	if (if_running) {
+		ret = ice_qp_ena(vsi, qid);
+		if (!ret && pool_present)
+			napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
+		else if (ret)
+			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
+	}
+
+failure:
+	if (pool_failure) {
+		netdev_err(vsi->netdev, "Could not %sable buffer pool, error = %d\n",
+			   pool_present ? "en" : "dis", pool_failure);
+		return pool_failure;
+	}
+
+	return ret;
+}
+
+/**
+ * ice_fill_rx_descs - pick buffers from XSK buffer pool and use it
+ * @pool: XSK Buffer pool to pull the buffers from
+ * @xdp: SW ring of xdp_buff that will hold the buffers
+ * @rx_desc: Pointer to Rx descriptors that will be filled
+ * @count: The number of buffers to allocate
+ *
+ * This function allocates a number of Rx buffers from the fill ring
+ * or the internal recycle mechanism and places them on the Rx ring.
+ *
+ * Note that ring wrap should be handled by caller of this function.
+ *
+ * Returns the amount of allocated Rx descriptors
+ */
+static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+			     union ice_32b_rx_flex_desc *rx_desc, u16 count)
+{
+	dma_addr_t dma;
+	u16 buffs;
+	int i;
+
+	buffs = xsk_buff_alloc_batch(pool, xdp, count);
+	for (i = 0; i < buffs; i++) {
+		dma = xsk_buff_xdp_get_dma(*xdp);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
+		rx_desc->wb.status_error0 = 0;
+
+		rx_desc++;
+		xdp++;
+	}
+
+	return buffs;
+}
+
+/**
+ * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Place the @count of descriptors onto Rx ring. Handle the ring wrap
+ * for case where space from next_to_use up to the end of ring is less
+ * than @count. Finally do a tail bump.
+ *
+ * Returns true if all allocations were successful, false if any fail.
+ */
+static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+	u32 nb_buffs_extra = 0, nb_buffs = 0;
+	union ice_32b_rx_flex_desc *rx_desc;
+	u16 ntu = rx_ring->next_to_use;
+	u16 total_count = count;
+	struct xdp_buff **xdp;
+
+	rx_desc = ICE_RX_DESC(rx_ring, ntu);
+	xdp = ice_xdp_buf(rx_ring, ntu);
+
+	if (ntu + count >= rx_ring->count) {
+		nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp,
+						   rx_desc,
+						   rx_ring->count - ntu);
+		if (nb_buffs_extra != rx_ring->count - ntu) {
+			ntu += nb_buffs_extra;
+			goto exit;
+		}
+		rx_desc = ICE_RX_DESC(rx_ring, 0);
+		xdp = ice_xdp_buf(rx_ring, 0);
+		ntu = 0;
+		count -= nb_buffs_extra;
+		ice_release_rx_desc(rx_ring, 0);
+	}
+
+	nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count);
+
+	ntu += nb_buffs;
+	if (ntu == rx_ring->count)
+		ntu = 0;
+
+exit:
+	if (rx_ring->next_to_use != ntu)
+		ice_release_rx_desc(rx_ring, ntu);
+
+	return total_count == (nb_buffs_extra + nb_buffs);
+}
+
+/**
+ * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
+ * @rx_ring: Rx ring
+ * @count: The number of buffers to allocate
+ *
+ * Wrapper for internal allocation routine; figure out how many tail
+ * bumps should take place based on the given threshold
+ *
+ * Returns true if all calls to internal alloc routine succeeded
+ */
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count)
+{
+	u16 rx_thresh = ICE_RING_QUARTER(rx_ring);
+	u16 leftover, i, tail_bumps;
+
+	tail_bumps = count / rx_thresh;
+	leftover = count - (tail_bumps * rx_thresh);
+
+	for (i = 0; i < tail_bumps; i++)
+		if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh))
+			return false;
+	return __ice_alloc_rx_bufs_zc(rx_ring, leftover);
+}
+
+/**
+ * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
+ * @rx_ring: Rx ring
+ * @xdp: Pointer to XDP buffer
+ *
+ * This function allocates a new skb from a zero-copy Rx buffer.
+ *
+ * Returns the skb on success, NULL on failure.
+ */
+static struct sk_buff *
+ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
+{
+	unsigned int totalsize = xdp->data_end - xdp->data_meta;
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct skb_shared_info *sinfo = NULL;
+	struct sk_buff *skb;
+	u32 nr_frags = 0;
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+	net_prefetch(xdp->data_meta);
+
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+	       ALIGN(totalsize, sizeof(long)));
+
+	if (metasize) {
+		skb_metadata_set(skb, metasize);
+		__skb_pull(skb, metasize);
+	}
+
+	if (likely(!xdp_buff_has_frags(xdp)))
+		goto out;
+
+	for (int i = 0; i < nr_frags; i++) {
+		struct skb_shared_info *skinfo = skb_shinfo(skb);
+		skb_frag_t *frag = &sinfo->frags[i];
+		struct page *page;
+		void *addr;
+
+		page = dev_alloc_page();
+		if (!page) {
+			dev_kfree_skb(skb);
+			return NULL;
+		}
+		addr = page_to_virt(page);
+
+		memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
+
+		__skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
+					   addr, 0, skb_frag_size(frag));
+	}
+
+out:
+	xsk_buff_free(xdp);
+	return skb;
+}
+
+/**
+ * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
+ * @xdp_ring: XDP Tx ring
+ */
+static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
+{
+	u16 ntc = xdp_ring->next_to_clean;
+	struct ice_tx_desc *tx_desc;
+	u16 cnt = xdp_ring->count;
+	struct ice_tx_buf *tx_buf;
+	u16 completed_frames = 0;
+	u16 xsk_frames = 0;
+	u16 last_rs;
+	int i;
+
+	last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
+	tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
+	if (tx_desc->cmd_type_offset_bsz &
+	    cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) {
+		if (last_rs >= ntc)
+			completed_frames = last_rs - ntc + 1;
+		else
+			completed_frames = last_rs + cnt - ntc + 1;
+	}
+
+	if (!completed_frames)
+		return 0;
+
+	if (likely(!xdp_ring->xdp_tx_active)) {
+		xsk_frames = completed_frames;
+		goto skip;
+	}
+
+	ntc = xdp_ring->next_to_clean;
+	for (i = 0; i < completed_frames; i++) {
+		tx_buf = &xdp_ring->tx_buf[ntc];
+
+		if (tx_buf->type == ICE_TX_BUF_XSK_TX) {
+			tx_buf->type = ICE_TX_BUF_EMPTY;
+			xsk_buff_free(tx_buf->xdp);
+			xdp_ring->xdp_tx_active--;
+		} else {
+			xsk_frames++;
+		}
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+skip:
+	tx_desc->cmd_type_offset_bsz = 0;
+	xdp_ring->next_to_clean += completed_frames;
+	if (xdp_ring->next_to_clean >= cnt)
+		xdp_ring->next_to_clean -= cnt;
+	if (xsk_frames)
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+
+	return completed_frames;
+}
+
+/**
+ * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX
+ * @xdp: XDP buffer to xmit
+ * @xdp_ring: XDP ring to produce descriptor onto
+ *
+ * note that this function works directly on xdp_buff, no need to convert
+ * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning
+ * side will be able to xsk_buff_free() it.
+ *
+ * Returns ICE_XDP_TX for successfully produced desc, ICE_XDP_CONSUMED if there
+ * was not enough space on XDP ring
+ */
+static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
+			      struct ice_tx_ring *xdp_ring)
+{
+	struct skb_shared_info *sinfo = NULL;
+	u32 size = xdp->data_end - xdp->data;
+	u32 ntu = xdp_ring->next_to_use;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+	struct xdp_buff *head;
+	u32 nr_frags = 0;
+	u32 free_space;
+	u32 frag = 0;
+
+	free_space = ICE_DESC_UNUSED(xdp_ring);
+	if (free_space < ICE_RING_QUARTER(xdp_ring))
+		free_space += ice_clean_xdp_irq_zc(xdp_ring);
+
+	if (unlikely(!free_space))
+		goto busy;
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+		if (free_space < nr_frags + 1)
+			goto busy;
+	}
+
+	tx_desc = ICE_TX_DESC(xdp_ring, ntu);
+	tx_buf = &xdp_ring->tx_buf[ntu];
+	head = xdp;
+
+	for (;;) {
+		dma_addr_t dma;
+
+		dma = xsk_buff_xdp_get_dma(xdp);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
+
+		tx_buf->xdp = xdp;
+		tx_buf->type = ICE_TX_BUF_XSK_TX;
+		tx_desc->buf_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
+		/* account for each xdp_buff from xsk_buff_pool */
+		xdp_ring->xdp_tx_active++;
+
+		if (++ntu == xdp_ring->count)
+			ntu = 0;
+
+		if (frag == nr_frags)
+			break;
+
+		tx_desc = ICE_TX_DESC(xdp_ring, ntu);
+		tx_buf = &xdp_ring->tx_buf[ntu];
+
+		xdp = xsk_buff_get_frag(head);
+		size = skb_frag_size(&sinfo->frags[frag]);
+		frag++;
+	}
+
+	xdp_ring->next_to_use = ntu;
+	/* update last descriptor from a frame with EOP */
+	tx_desc->cmd_type_offset_bsz |=
+		cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S);
+
+	return ICE_XDP_TX;
+
+busy:
+	xdp_ring->ring_stats->tx_stats.tx_busy++;
+
+	return ICE_XDP_CONSUMED;
+}
+
+/**
+ * ice_run_xdp_zc - Executes an XDP program in zero-copy path
+ * @rx_ring: Rx ring
+ * @xdp: xdp_buff used as input to the XDP program
+ * @xdp_prog: XDP program to run
+ * @xdp_ring: ring to be used for XDP_TX action
+ *
+ * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
+ */
+static int
+ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
+	       struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
+{
+	int err, result = ICE_XDP_PASS;
+	u32 act;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		if (!err)
+			return ICE_XDP_REDIR;
+		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+			result = ICE_XDP_EXIT;
+		else
+			result = ICE_XDP_CONSUMED;
+		goto out_failure;
+	}
+
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		result = ice_xmit_xdp_tx_zc(xdp, xdp_ring);
+		if (result == ICE_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	case XDP_DROP:
+		result = ICE_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+		result = ICE_XDP_CONSUMED;
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		break;
+	}
+
+	return result;
+}
+
+static int
+ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first,
+		 struct xdp_buff *xdp, const unsigned int size)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first);
+
+	if (!size)
+		return 0;
+
+	if (!xdp_buff_has_frags(first)) {
+		sinfo->nr_frags = 0;
+		sinfo->xdp_frags_size = 0;
+		xdp_buff_set_frags_flag(first);
+	}
+
+	if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
+		xsk_buff_free(first);
+		return -ENOMEM;
+	}
+
+	__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++,
+				   virt_to_page(xdp->data_hard_start),
+				   XDP_PACKET_HEADROOM, size);
+	sinfo->xdp_frags_size += size;
+	xsk_buff_add_frag(xdp);
+
+	return 0;
+}
+
+/**
+ * ice_clean_rx_irq_zc - consumes packets from the hardware ring
+ * @rx_ring: AF_XDP Rx ring
+ * @budget: NAPI budget
+ *
+ * Returns number of processed packets on success, remaining budget on failure.
+ */
+int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool;
+	u32 ntc = rx_ring->next_to_clean;
+	u32 ntu = rx_ring->next_to_use;
+	struct xdp_buff *first = NULL;
+	struct ice_tx_ring *xdp_ring;
+	unsigned int xdp_xmit = 0;
+	struct bpf_prog *xdp_prog;
+	u32 cnt = rx_ring->count;
+	bool failure = false;
+	int entries_to_alloc;
+
+	/* ZC patch is enabled only when XDP program is set,
+	 * so here it can not be NULL
+	 */
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	xdp_ring = rx_ring->xdp_ring;
+
+	if (ntc != rx_ring->first_desc)
+		first = *ice_xdp_buf(rx_ring, rx_ring->first_desc);
+
+	while (likely(total_rx_packets < (unsigned int)budget)) {
+		union ice_32b_rx_flex_desc *rx_desc;
+		unsigned int size, xdp_res = 0;
+		struct xdp_buff *xdp;
+		struct sk_buff *skb;
+		u16 stat_err_bits;
+		u16 vlan_tag = 0;
+		u16 rx_ptype;
+
+		rx_desc = ICE_RX_DESC(rx_ring, ntc);
+
+		stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
+		if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we have
+		 * verified the descriptor has been written back.
+		 */
+		dma_rmb();
+
+		if (unlikely(ntc == ntu))
+			break;
+
+		xdp = *ice_xdp_buf(rx_ring, ntc);
+
+		size = le16_to_cpu(rx_desc->wb.pkt_len) &
+				   ICE_RX_FLX_DESC_PKT_LEN_M;
+
+		xsk_buff_set_size(xdp, size);
+		xsk_buff_dma_sync_for_cpu(xdp, xsk_pool);
+
+		if (!first) {
+			first = xdp;
+		} else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) {
+			break;
+		}
+
+		if (++ntc == cnt)
+			ntc = 0;
+
+		if (ice_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring);
+		if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
+			xdp_xmit |= xdp_res;
+		} else if (xdp_res == ICE_XDP_EXIT) {
+			failure = true;
+			first = NULL;
+			rx_ring->first_desc = ntc;
+			break;
+		} else if (xdp_res == ICE_XDP_CONSUMED) {
+			xsk_buff_free(first);
+		} else if (xdp_res == ICE_XDP_PASS) {
+			goto construct_skb;
+		}
+
+		total_rx_bytes += xdp_get_buff_len(first);
+		total_rx_packets++;
+
+		first = NULL;
+		rx_ring->first_desc = ntc;
+		continue;
+
+construct_skb:
+		/* XDP_PASS path */
+		skb = ice_construct_skb_zc(rx_ring, first);
+		if (!skb) {
+			rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
+			break;
+		}
+
+		first = NULL;
+		rx_ring->first_desc = ntc;
+
+		if (eth_skb_pad(skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
+
+		rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+				       ICE_RX_FLEX_DESC_PTYPE_M;
+
+		ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
+		ice_receive_skb(rx_ring, skb, vlan_tag);
+	}
+
+	rx_ring->next_to_clean = ntc;
+	entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring);
+	if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
+		failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
+
+	ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0);
+	ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
+
+	if (xsk_uses_need_wakeup(xsk_pool)) {
+		/* ntu could have changed when allocating entries above, so
+		 * use rx_ring value instead of stack based one
+		 */
+		if (failure || ntc == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(xsk_pool);
+
+		return (int)total_rx_packets;
+	}
+
+	return failure ? budget : (int)total_rx_packets;
+}
+
+/**
+ * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
+ * @xdp_ring: XDP ring to produce the HW Tx descriptor on
+ * @desc: AF_XDP descriptor to pull the DMA address and length from
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc,
+			 unsigned int *total_bytes)
+{
+	struct ice_tx_desc *tx_desc;
+	dma_addr_t dma;
+
+	dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
+	xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);
+
+	tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(desc),
+						      0, desc->len, 0);
+
+	*total_bytes += desc->len;
+}
+
+/**
+ * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+			       unsigned int *total_bytes)
+{
+	u16 ntu = xdp_ring->next_to_use;
+	struct ice_tx_desc *tx_desc;
+	u32 i;
+
+	loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
+		dma_addr_t dma;
+
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len);
+
+		tx_desc = ICE_TX_DESC(xdp_ring, ntu++);
+		tx_desc->buf_addr = cpu_to_le64(dma);
+		tx_desc->cmd_type_offset_bsz = ice_build_ctob(xsk_is_eop_desc(&descs[i]),
+							      0, descs[i].len, 0);
+
+		*total_bytes += descs[i].len;
+	}
+
+	xdp_ring->next_to_use = ntu;
+}
+
+/**
+ * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from
+ * @nb_pkts: count of packets to be send
+ * @total_bytes: bytes accumulator that will be used for stats update
+ */
+static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs,
+				u32 nb_pkts, unsigned int *total_bytes)
+{
+	u32 batched, leftover, i;
+
+	batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH);
+	leftover = nb_pkts & (PKTS_PER_BATCH - 1);
+	for (i = 0; i < batched; i += PKTS_PER_BATCH)
+		ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
+	for (; i < batched + leftover; i++)
+		ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
+}
+
+/**
+ * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
+ * @xdp_ring: XDP ring to produce the HW Tx descriptors on
+ *
+ * Returns true if there is no more work that needs to be done, false otherwise
+ */
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring)
+{
+	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
+	u32 nb_pkts, nb_processed = 0;
+	unsigned int total_bytes = 0;
+	int budget;
+
+	ice_clean_xdp_irq_zc(xdp_ring);
+
+	budget = ICE_DESC_UNUSED(xdp_ring);
+	budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring));
+
+	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
+	if (!nb_pkts)
+		return true;
+
+	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
+		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
+		ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
+		xdp_ring->next_to_use = 0;
+	}
+
+	ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
+			    &total_bytes);
+
+	ice_set_rs_bit(xdp_ring);
+	ice_xdp_ring_update_tail(xdp_ring);
+	ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes);
+
+	if (xsk_uses_need_wakeup(xdp_ring->xsk_pool))
+		xsk_set_tx_need_wakeup(xdp_ring->xsk_pool);
+
+	return nb_pkts < budget;
+}
+
+/**
+ * ice_xsk_wakeup - Implements ndo_xsk_wakeup
+ * @netdev: net_device
+ * @queue_id: queue to wake up
+ * @flags: ignored in our case, since we have Rx and Tx in the same NAPI
+ *
+ * Returns negative on error, zero otherwise.
+ */
+int
+ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
+	       u32 __always_unused flags)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_q_vector *q_vector;
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_tx_ring *ring;
+
+	if (test_bit(ICE_VSI_DOWN, vsi->state))
+		return -ENETDOWN;
+
+	if (!ice_is_xdp_ena_vsi(vsi))
+		return -EINVAL;
+
+	if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
+		return -EINVAL;
+
+	ring = vsi->rx_rings[queue_id]->xdp_ring;
+
+	if (!ring->xsk_pool)
+		return -EINVAL;
+
+	/* The idea here is that if NAPI is running, mark a miss, so
+	 * it will run again. If not, trigger an interrupt and
+	 * schedule the NAPI from interrupt context. If NAPI would be
+	 * scheduled here, the interrupt affinity would not be
+	 * honored.
+	 */
+	q_vector = ring->q_vector;
+	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+		ice_trigger_sw_intr(&vsi->back->hw, q_vector);
+
+	return 0;
+}
+
+/**
+ * ice_xsk_any_rx_ring_ena - Checks if Rx rings have AF_XDP buff pool attached
+ * @vsi: VSI to be checked
+ *
+ * Returns true if any of the Rx rings has an AF_XDP buff pool attached
+ */
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi)
+{
+	int i;
+
+	ice_for_each_rxq(vsi, i) {
+		if (xsk_get_pool_from_qid(vsi->netdev, i))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring
+ * @rx_ring: ring to be cleaned
+ */
+void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring)
+{
+	u16 ntc = rx_ring->next_to_clean;
+	u16 ntu = rx_ring->next_to_use;
+
+	while (ntc != ntu) {
+		struct xdp_buff *xdp = *ice_xdp_buf(rx_ring, ntc);
+
+		xsk_buff_free(xdp);
+		ntc++;
+		if (ntc >= rx_ring->count)
+			ntc = 0;
+	}
+}
+
+/**
+ * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues
+ * @xdp_ring: XDP_Tx ring
+ */
+void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
+{
+	u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
+
+		if (tx_buf->type == ICE_TX_BUF_XSK_TX) {
+			tx_buf->type = ICE_TX_BUF_EMPTY;
+			xsk_buff_free(tx_buf->xdp);
+		} else {
+			xsk_frames++;
+		}
+
+		ntc++;
+		if (ntc >= xdp_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
new file mode 100644
index 0000000000..6fa181f080
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Intel Corporation. */
+
+#ifndef _ICE_XSK_H_
+#define _ICE_XSK_H_
+#include "ice_txrx.h"
+
+#define PKTS_PER_BATCH 8
+
+#ifdef __clang__
+#define loop_unrolled_for _Pragma("clang loop unroll_count(8)") for
+#elif __GNUC__ >= 8
+#define loop_unrolled_for _Pragma("GCC unroll 8") for
+#else
+#define loop_unrolled_for for
+#endif
+
+struct ice_vsi;
+
+#ifdef CONFIG_XDP_SOCKETS
+int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool,
+		       u16 qid);
+int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget);
+int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
+bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count);
+bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
+void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
+void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
+bool ice_xmit_zc(struct ice_tx_ring *xdp_ring);
+int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
+#else
+static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring)
+{
+	return false;
+}
+
+static inline int
+ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi,
+		   struct xsk_buff_pool __always_unused *pool,
+		   u16 __always_unused qid)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring,
+		    int __always_unused budget)
+{
+	return 0;
+}
+
+static inline bool
+ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring,
+		     u16 __always_unused count)
+{
+	return false;
+}
+
+static inline bool ice_xsk_any_rx_ring_ena(struct ice_vsi __always_unused *vsi)
+{
+	return false;
+}
+
+static inline int
+ice_xsk_wakeup(struct net_device __always_unused *netdev,
+	       u32 __always_unused queue_id, u32 __always_unused flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { }
+static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { }
+
+static inline int
+ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi,
+		   bool __always_unused zc)
+{
+	return 0;
+}
+#endif /* CONFIG_XDP_SOCKETS */
+#endif /* !_ICE_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
new file mode 100644
index 0000000000..394c1e0656
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 1999 - 2018 Intel Corporation.
+#
+# Makefile for the Intel(R) 82575 PCI-Express ethernet driver
+#
+
+obj-$(CONFIG_IGB) += igb.o
+
+igb-objs := igb_main.o igb_ethtool.o e1000_82575.o \
+	    e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
+	    e1000_i210.o igb_ptp.o igb_hwmon.o
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
new file mode 100644
index 0000000000..8d6e44ee18
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -0,0 +1,2927 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+/* e1000_82575
+ * e1000_82576
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/i2c.h>
+
+#include "e1000_mac.h"
+#include "e1000_82575.h"
+#include "e1000_i210.h"
+#include "igb.h"
+
+static s32  igb_get_invariants_82575(struct e1000_hw *);
+static s32  igb_acquire_phy_82575(struct e1000_hw *);
+static void igb_release_phy_82575(struct e1000_hw *);
+static s32  igb_acquire_nvm_82575(struct e1000_hw *);
+static void igb_release_nvm_82575(struct e1000_hw *);
+static s32  igb_check_for_link_82575(struct e1000_hw *);
+static s32  igb_get_cfg_done_82575(struct e1000_hw *);
+static s32  igb_init_hw_82575(struct e1000_hw *);
+static s32  igb_phy_hw_reset_sgmii_82575(struct e1000_hw *);
+static s32  igb_read_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16 *);
+static s32  igb_reset_hw_82575(struct e1000_hw *);
+static s32  igb_reset_hw_82580(struct e1000_hw *);
+static s32  igb_set_d0_lplu_state_82575(struct e1000_hw *, bool);
+static s32  igb_set_d0_lplu_state_82580(struct e1000_hw *, bool);
+static s32  igb_set_d3_lplu_state_82580(struct e1000_hw *, bool);
+static s32  igb_setup_copper_link_82575(struct e1000_hw *);
+static s32  igb_setup_serdes_link_82575(struct e1000_hw *);
+static s32  igb_write_phy_reg_sgmii_82575(struct e1000_hw *, u32, u16);
+static void igb_clear_hw_cntrs_82575(struct e1000_hw *);
+static s32  igb_acquire_swfw_sync_82575(struct e1000_hw *, u16);
+static s32  igb_get_pcs_speed_and_duplex_82575(struct e1000_hw *, u16 *,
+						 u16 *);
+static s32  igb_get_phy_id_82575(struct e1000_hw *);
+static void igb_release_swfw_sync_82575(struct e1000_hw *, u16);
+static bool igb_sgmii_active_82575(struct e1000_hw *);
+static s32  igb_reset_init_script_82575(struct e1000_hw *);
+static s32  igb_read_mac_addr_82575(struct e1000_hw *);
+static s32  igb_set_pcie_completion_timeout(struct e1000_hw *hw);
+static s32  igb_reset_mdicnfg_82580(struct e1000_hw *hw);
+static s32  igb_validate_nvm_checksum_82580(struct e1000_hw *hw);
+static s32  igb_update_nvm_checksum_82580(struct e1000_hw *hw);
+static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw);
+static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw);
+static const u16 e1000_82580_rxpbs_table[] = {
+	36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 };
+
+/* Due to a hw errata, if the host tries to  configure the VFTA register
+ * while performing queries from the BMC or DMA, then the VFTA in some
+ * cases won't be written.
+ */
+
+/**
+ *  igb_write_vfta_i350 - Write value to VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset in VLAN filter table
+ *  @value: register value written to VLAN filter table
+ *
+ *  Writes value at the given offset in the register array which stores
+ *  the VLAN filter table.
+ **/
+static void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	struct igb_adapter *adapter = hw->back;
+	int i;
+
+	for (i = 10; i--;)
+		array_wr32(E1000_VFTA, offset, value);
+
+	wrfl();
+	adapter->shadow_vfta[offset] = value;
+}
+
+/**
+ *  igb_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO
+ *  @hw: pointer to the HW structure
+ *
+ *  Called to determine if the I2C pins are being used for I2C or as an
+ *  external MDIO interface since the two options are mutually exclusive.
+ **/
+static bool igb_sgmii_uses_mdio_82575(struct e1000_hw *hw)
+{
+	u32 reg = 0;
+	bool ext_mdio = false;
+
+	switch (hw->mac.type) {
+	case e1000_82575:
+	case e1000_82576:
+		reg = rd32(E1000_MDIC);
+		ext_mdio = !!(reg & E1000_MDIC_DEST);
+		break;
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		reg = rd32(E1000_MDICNFG);
+		ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO);
+		break;
+	default:
+		break;
+	}
+	return ext_mdio;
+}
+
+/**
+ *  igb_check_for_link_media_swap - Check which M88E1112 interface linked
+ *  @hw: pointer to the HW structure
+ *
+ *  Poll the M88E1112 interfaces to see which interface achieved link.
+ */
+static s32 igb_check_for_link_media_swap(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	u8 port = 0;
+
+	/* Check the copper medium. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+	if (data & E1000_M88E1112_STATUS_LINK)
+		port = E1000_MEDIA_PORT_COPPER;
+
+	/* Check the other medium. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data);
+	if (ret_val)
+		return ret_val;
+
+
+	if (data & E1000_M88E1112_STATUS_LINK)
+		port = E1000_MEDIA_PORT_OTHER;
+
+	/* Determine if a swap needs to happen. */
+	if (port && (hw->dev_spec._82575.media_port != port)) {
+		hw->dev_spec._82575.media_port = port;
+		hw->dev_spec._82575.media_changed = true;
+	}
+
+	if (port == E1000_MEDIA_PORT_COPPER) {
+		/* reset page to 0 */
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
+		if (ret_val)
+			return ret_val;
+		igb_check_for_link_82575(hw);
+	} else {
+		igb_check_for_link_82575(hw);
+		/* reset page to 0 */
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0);
+		if (ret_val)
+			return ret_val;
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_init_phy_params_82575 - Init PHY func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 igb_init_phy_params_82575(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u32 ctrl_ext;
+
+	if (hw->phy.media_type != e1000_media_type_copper) {
+		phy->type = e1000_phy_none;
+		goto out;
+	}
+
+	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT;
+	phy->reset_delay_us	= 100;
+
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+
+	if (igb_sgmii_active_82575(hw)) {
+		phy->ops.reset = igb_phy_hw_reset_sgmii_82575;
+		ctrl_ext |= E1000_CTRL_I2C_ENA;
+	} else {
+		phy->ops.reset = igb_phy_hw_reset;
+		ctrl_ext &= ~E1000_CTRL_I2C_ENA;
+	}
+
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+	igb_reset_mdicnfg_82580(hw);
+
+	if (igb_sgmii_active_82575(hw) && !igb_sgmii_uses_mdio_82575(hw)) {
+		phy->ops.read_reg = igb_read_phy_reg_sgmii_82575;
+		phy->ops.write_reg = igb_write_phy_reg_sgmii_82575;
+	} else {
+		switch (hw->mac.type) {
+		case e1000_82580:
+		case e1000_i350:
+		case e1000_i354:
+		case e1000_i210:
+		case e1000_i211:
+			phy->ops.read_reg = igb_read_phy_reg_82580;
+			phy->ops.write_reg = igb_write_phy_reg_82580;
+			break;
+		default:
+			phy->ops.read_reg = igb_read_phy_reg_igp;
+			phy->ops.write_reg = igb_write_phy_reg_igp;
+		}
+	}
+
+	/* set lan id */
+	hw->bus.func = (rd32(E1000_STATUS) & E1000_STATUS_FUNC_MASK) >>
+			E1000_STATUS_FUNC_SHIFT;
+
+	/* Set phy->phy_addr and phy->id. */
+	ret_val = igb_get_phy_id_82575(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* Verify phy id and set remaining function pointers */
+	switch (phy->id) {
+	case M88E1543_E_PHY_ID:
+	case M88E1512_E_PHY_ID:
+	case I347AT4_E_PHY_ID:
+	case M88E1112_E_PHY_ID:
+	case M88E1111_I_PHY_ID:
+		phy->type		= e1000_phy_m88;
+		phy->ops.check_polarity	= igb_check_polarity_m88;
+		phy->ops.get_phy_info	= igb_get_phy_info_m88;
+		if (phy->id != M88E1111_I_PHY_ID)
+			phy->ops.get_cable_length =
+					 igb_get_cable_length_m88_gen2;
+		else
+			phy->ops.get_cable_length = igb_get_cable_length_m88;
+		phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88;
+		/* Check if this PHY is configured for media swap. */
+		if (phy->id == M88E1112_E_PHY_ID) {
+			u16 data;
+
+			ret_val = phy->ops.write_reg(hw,
+						     E1000_M88E1112_PAGE_ADDR,
+						     2);
+			if (ret_val)
+				goto out;
+
+			ret_val = phy->ops.read_reg(hw,
+						    E1000_M88E1112_MAC_CTRL_1,
+						    &data);
+			if (ret_val)
+				goto out;
+
+			data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >>
+			       E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT;
+			if (data == E1000_M88E1112_AUTO_COPPER_SGMII ||
+			    data == E1000_M88E1112_AUTO_COPPER_BASEX)
+				hw->mac.ops.check_for_link =
+						igb_check_for_link_media_swap;
+		}
+		if (phy->id == M88E1512_E_PHY_ID) {
+			ret_val = igb_initialize_M88E1512_phy(hw);
+			if (ret_val)
+				goto out;
+		}
+		if (phy->id == M88E1543_E_PHY_ID) {
+			ret_val = igb_initialize_M88E1543_phy(hw);
+			if (ret_val)
+				goto out;
+		}
+		break;
+	case IGP03E1000_E_PHY_ID:
+		phy->type = e1000_phy_igp_3;
+		phy->ops.get_phy_info = igb_get_phy_info_igp;
+		phy->ops.get_cable_length = igb_get_cable_length_igp_2;
+		phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_igp;
+		phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82575;
+		phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state;
+		break;
+	case I82580_I_PHY_ID:
+	case I350_I_PHY_ID:
+		phy->type = e1000_phy_82580;
+		phy->ops.force_speed_duplex =
+					 igb_phy_force_speed_duplex_82580;
+		phy->ops.get_cable_length = igb_get_cable_length_82580;
+		phy->ops.get_phy_info = igb_get_phy_info_82580;
+		phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580;
+		phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580;
+		break;
+	case I210_I_PHY_ID:
+		phy->type		= e1000_phy_i210;
+		phy->ops.check_polarity	= igb_check_polarity_m88;
+		phy->ops.get_cfg_done	= igb_get_cfg_done_i210;
+		phy->ops.get_phy_info	= igb_get_phy_info_m88;
+		phy->ops.get_cable_length = igb_get_cable_length_m88_gen2;
+		phy->ops.set_d0_lplu_state = igb_set_d0_lplu_state_82580;
+		phy->ops.set_d3_lplu_state = igb_set_d3_lplu_state_82580;
+		phy->ops.force_speed_duplex = igb_phy_force_speed_duplex_m88;
+		break;
+	case BCM54616_E_PHY_ID:
+		phy->type = e1000_phy_bcm54616;
+		break;
+	default:
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_init_nvm_params_82575 - Init NVM func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 igb_init_nvm_params_82575(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = rd32(E1000_EECD);
+	u16 size;
+
+	size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >>
+		     E1000_EECD_SIZE_EX_SHIFT);
+
+	/* Added to a constant, "size" becomes the left-shift value
+	 * for setting word_size.
+	 */
+	size += NVM_WORD_SIZE_BASE_SHIFT;
+
+	/* Just in case size is out of range, cap it to the largest
+	 * EEPROM size supported
+	 */
+	if (size > 15)
+		size = 15;
+
+	nvm->word_size = BIT(size);
+	nvm->opcode_bits = 8;
+	nvm->delay_usec = 1;
+
+	switch (nvm->override) {
+	case e1000_nvm_override_spi_large:
+		nvm->page_size = 32;
+		nvm->address_bits = 16;
+		break;
+	case e1000_nvm_override_spi_small:
+		nvm->page_size = 8;
+		nvm->address_bits = 8;
+		break;
+	default:
+		nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8;
+		nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ?
+				    16 : 8;
+		break;
+	}
+	if (nvm->word_size == BIT(15))
+		nvm->page_size = 128;
+
+	nvm->type = e1000_nvm_eeprom_spi;
+
+	/* NVM Function Pointers */
+	nvm->ops.acquire = igb_acquire_nvm_82575;
+	nvm->ops.release = igb_release_nvm_82575;
+	nvm->ops.write = igb_write_nvm_spi;
+	nvm->ops.validate = igb_validate_nvm_checksum;
+	nvm->ops.update = igb_update_nvm_checksum;
+	if (nvm->word_size < BIT(15))
+		nvm->ops.read = igb_read_nvm_eerd;
+	else
+		nvm->ops.read = igb_read_nvm_spi;
+
+	/* override generic family function pointers for specific descendants */
+	switch (hw->mac.type) {
+	case e1000_82580:
+		nvm->ops.validate = igb_validate_nvm_checksum_82580;
+		nvm->ops.update = igb_update_nvm_checksum_82580;
+		break;
+	case e1000_i354:
+	case e1000_i350:
+		nvm->ops.validate = igb_validate_nvm_checksum_i350;
+		nvm->ops.update = igb_update_nvm_checksum_i350;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_init_mac_params_82575 - Init MAC func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+static s32 igb_init_mac_params_82575(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+
+	/* Set mta register count */
+	mac->mta_reg_count = 128;
+	/* Set uta register count */
+	mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128;
+	/* Set rar entry count */
+	switch (mac->type) {
+	case e1000_82576:
+		mac->rar_entry_count = E1000_RAR_ENTRIES_82576;
+		break;
+	case e1000_82580:
+		mac->rar_entry_count = E1000_RAR_ENTRIES_82580;
+		break;
+	case e1000_i350:
+	case e1000_i354:
+		mac->rar_entry_count = E1000_RAR_ENTRIES_I350;
+		break;
+	default:
+		mac->rar_entry_count = E1000_RAR_ENTRIES_82575;
+		break;
+	}
+	/* reset */
+	if (mac->type >= e1000_82580)
+		mac->ops.reset_hw = igb_reset_hw_82580;
+	else
+		mac->ops.reset_hw = igb_reset_hw_82575;
+
+	if (mac->type >= e1000_i210) {
+		mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_i210;
+		mac->ops.release_swfw_sync = igb_release_swfw_sync_i210;
+
+	} else {
+		mac->ops.acquire_swfw_sync = igb_acquire_swfw_sync_82575;
+		mac->ops.release_swfw_sync = igb_release_swfw_sync_82575;
+	}
+
+	if ((hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i354))
+		mac->ops.write_vfta = igb_write_vfta_i350;
+	else
+		mac->ops.write_vfta = igb_write_vfta;
+
+	/* Set if part includes ASF firmware */
+	mac->asf_firmware_present = true;
+	/* Set if manageability features are enabled. */
+	mac->arc_subsystem_valid =
+		(rd32(E1000_FWSM) & E1000_FWSM_MODE_MASK)
+			? true : false;
+	/* enable EEE on i350 parts and later parts */
+	if (mac->type >= e1000_i350)
+		dev_spec->eee_disable = false;
+	else
+		dev_spec->eee_disable = true;
+	/* Allow a single clear of the SW semaphore on I210 and newer */
+	if (mac->type >= e1000_i210)
+		dev_spec->clear_semaphore_once = true;
+	/* physical interface link setup */
+	mac->ops.setup_physical_interface =
+		(hw->phy.media_type == e1000_media_type_copper)
+			? igb_setup_copper_link_82575
+			: igb_setup_serdes_link_82575;
+
+	if (mac->type == e1000_82580 || mac->type == e1000_i350) {
+		switch (hw->device_id) {
+		/* feature not supported on these id's */
+		case E1000_DEV_ID_DH89XXCC_SGMII:
+		case E1000_DEV_ID_DH89XXCC_SERDES:
+		case E1000_DEV_ID_DH89XXCC_BACKPLANE:
+		case E1000_DEV_ID_DH89XXCC_SFP:
+			break;
+		default:
+			hw->dev_spec._82575.mas_capable = true;
+			break;
+		}
+	}
+	return 0;
+}
+
+/**
+ *  igb_set_sfp_media_type_82575 - derives SFP module media type.
+ *  @hw: pointer to the HW structure
+ *
+ *  The media type is chosen based on SFP module.
+ *  compatibility flags retrieved from SFP ID EEPROM.
+ **/
+static s32 igb_set_sfp_media_type_82575(struct e1000_hw *hw)
+{
+	s32 ret_val = E1000_ERR_CONFIG;
+	u32 ctrl_ext = 0;
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+	struct e1000_sfp_flags *eth_flags = &dev_spec->eth_flags;
+	u8 tranceiver_type = 0;
+	s32 timeout = 3;
+
+	/* Turn I2C interface ON and power on sfp cage */
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
+	wr32(E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA);
+
+	wrfl();
+
+	/* Read SFP module data */
+	while (timeout) {
+		ret_val = igb_read_sfp_data_byte(hw,
+			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET),
+			&tranceiver_type);
+		if (ret_val == 0)
+			break;
+		msleep(100);
+		timeout--;
+	}
+	if (ret_val != 0)
+		goto out;
+
+	ret_val = igb_read_sfp_data_byte(hw,
+			E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET),
+			(u8 *)eth_flags);
+	if (ret_val != 0)
+		goto out;
+
+	/* Check if there is some SFP module plugged and powered */
+	if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) ||
+	    (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) {
+		dev_spec->module_plugged = true;
+		if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) {
+			hw->phy.media_type = e1000_media_type_internal_serdes;
+		} else if (eth_flags->e100_base_fx || eth_flags->e100_base_lx) {
+			dev_spec->sgmii_active = true;
+			hw->phy.media_type = e1000_media_type_internal_serdes;
+		} else if (eth_flags->e1000_base_t) {
+			dev_spec->sgmii_active = true;
+			hw->phy.media_type = e1000_media_type_copper;
+		} else {
+			hw->phy.media_type = e1000_media_type_unknown;
+			hw_dbg("PHY module has not been recognized\n");
+			goto out;
+		}
+	} else {
+		hw->phy.media_type = e1000_media_type_unknown;
+	}
+	ret_val = 0;
+out:
+	/* Restore I2C interface setting */
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+	return ret_val;
+}
+
+static s32 igb_get_invariants_82575(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+	s32 ret_val;
+	u32 ctrl_ext = 0;
+	u32 link_mode = 0;
+
+	switch (hw->device_id) {
+	case E1000_DEV_ID_82575EB_COPPER:
+	case E1000_DEV_ID_82575EB_FIBER_SERDES:
+	case E1000_DEV_ID_82575GB_QUAD_COPPER:
+		mac->type = e1000_82575;
+		break;
+	case E1000_DEV_ID_82576:
+	case E1000_DEV_ID_82576_NS:
+	case E1000_DEV_ID_82576_NS_SERDES:
+	case E1000_DEV_ID_82576_FIBER:
+	case E1000_DEV_ID_82576_SERDES:
+	case E1000_DEV_ID_82576_QUAD_COPPER:
+	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+	case E1000_DEV_ID_82576_SERDES_QUAD:
+		mac->type = e1000_82576;
+		break;
+	case E1000_DEV_ID_82580_COPPER:
+	case E1000_DEV_ID_82580_FIBER:
+	case E1000_DEV_ID_82580_QUAD_FIBER:
+	case E1000_DEV_ID_82580_SERDES:
+	case E1000_DEV_ID_82580_SGMII:
+	case E1000_DEV_ID_82580_COPPER_DUAL:
+	case E1000_DEV_ID_DH89XXCC_SGMII:
+	case E1000_DEV_ID_DH89XXCC_SERDES:
+	case E1000_DEV_ID_DH89XXCC_BACKPLANE:
+	case E1000_DEV_ID_DH89XXCC_SFP:
+		mac->type = e1000_82580;
+		break;
+	case E1000_DEV_ID_I350_COPPER:
+	case E1000_DEV_ID_I350_FIBER:
+	case E1000_DEV_ID_I350_SERDES:
+	case E1000_DEV_ID_I350_SGMII:
+		mac->type = e1000_i350;
+		break;
+	case E1000_DEV_ID_I210_COPPER:
+	case E1000_DEV_ID_I210_FIBER:
+	case E1000_DEV_ID_I210_SERDES:
+	case E1000_DEV_ID_I210_SGMII:
+	case E1000_DEV_ID_I210_COPPER_FLASHLESS:
+	case E1000_DEV_ID_I210_SERDES_FLASHLESS:
+		mac->type = e1000_i210;
+		break;
+	case E1000_DEV_ID_I211_COPPER:
+		mac->type = e1000_i211;
+		break;
+	case E1000_DEV_ID_I354_BACKPLANE_1GBPS:
+	case E1000_DEV_ID_I354_SGMII:
+	case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS:
+		mac->type = e1000_i354;
+		break;
+	default:
+		return -E1000_ERR_MAC_INIT;
+	}
+
+	/* Set media type */
+	/* The 82575 uses bits 22:23 for link mode. The mode can be changed
+	 * based on the EEPROM. We cannot rely upon device ID. There
+	 * is no distinguishable difference between fiber and internal
+	 * SerDes mode on the 82575. There can be an external PHY attached
+	 * on the SGMII interface. For this, we'll set sgmii_active to true.
+	 */
+	hw->phy.media_type = e1000_media_type_copper;
+	dev_spec->sgmii_active = false;
+	dev_spec->module_plugged = false;
+
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+
+	link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK;
+	switch (link_mode) {
+	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
+		hw->phy.media_type = e1000_media_type_internal_serdes;
+		break;
+	case E1000_CTRL_EXT_LINK_MODE_SGMII:
+		/* Get phy control interface type set (MDIO vs. I2C)*/
+		if (igb_sgmii_uses_mdio_82575(hw)) {
+			hw->phy.media_type = e1000_media_type_copper;
+			dev_spec->sgmii_active = true;
+			break;
+		}
+		fallthrough; /* for I2C based SGMII */
+	case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
+		/* read media type from SFP EEPROM */
+		ret_val = igb_set_sfp_media_type_82575(hw);
+		if ((ret_val != 0) ||
+		    (hw->phy.media_type == e1000_media_type_unknown)) {
+			/* If media type was not identified then return media
+			 * type defined by the CTRL_EXT settings.
+			 */
+			hw->phy.media_type = e1000_media_type_internal_serdes;
+
+			if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) {
+				hw->phy.media_type = e1000_media_type_copper;
+				dev_spec->sgmii_active = true;
+			}
+
+			break;
+		}
+
+		/* change current link mode setting */
+		ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
+
+		if (dev_spec->sgmii_active)
+			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII;
+		else
+			ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+
+		wr32(E1000_CTRL_EXT, ctrl_ext);
+
+		break;
+	default:
+		break;
+	}
+
+	/* mac initialization and operations */
+	ret_val = igb_init_mac_params_82575(hw);
+	if (ret_val)
+		goto out;
+
+	/* NVM initialization */
+	ret_val = igb_init_nvm_params_82575(hw);
+	switch (hw->mac.type) {
+	case e1000_i210:
+	case e1000_i211:
+		ret_val = igb_init_nvm_params_i210(hw);
+		break;
+	default:
+		break;
+	}
+
+	if (ret_val)
+		goto out;
+
+	/* if part supports SR-IOV then initialize mailbox parameters */
+	switch (mac->type) {
+	case e1000_82576:
+	case e1000_i350:
+		igb_init_mbx_params_pf(hw);
+		break;
+	default:
+		break;
+	}
+
+	/* setup PHY parameters */
+	ret_val = igb_init_phy_params_82575(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_acquire_phy_82575 - Acquire rights to access PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire access rights to the correct PHY.  This is a
+ *  function pointer entry point called by the api module.
+ **/
+static s32 igb_acquire_phy_82575(struct e1000_hw *hw)
+{
+	u16 mask = E1000_SWFW_PHY0_SM;
+
+	if (hw->bus.func == E1000_FUNC_1)
+		mask = E1000_SWFW_PHY1_SM;
+	else if (hw->bus.func == E1000_FUNC_2)
+		mask = E1000_SWFW_PHY2_SM;
+	else if (hw->bus.func == E1000_FUNC_3)
+		mask = E1000_SWFW_PHY3_SM;
+
+	return hw->mac.ops.acquire_swfw_sync(hw, mask);
+}
+
+/**
+ *  igb_release_phy_82575 - Release rights to access PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  A wrapper to release access rights to the correct PHY.  This is a
+ *  function pointer entry point called by the api module.
+ **/
+static void igb_release_phy_82575(struct e1000_hw *hw)
+{
+	u16 mask = E1000_SWFW_PHY0_SM;
+
+	if (hw->bus.func == E1000_FUNC_1)
+		mask = E1000_SWFW_PHY1_SM;
+	else if (hw->bus.func == E1000_FUNC_2)
+		mask = E1000_SWFW_PHY2_SM;
+	else if (hw->bus.func == E1000_FUNC_3)
+		mask = E1000_SWFW_PHY3_SM;
+
+	hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+/**
+ *  igb_read_phy_reg_sgmii_82575 - Read PHY register using sgmii
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset using the serial gigabit media independent
+ *  interface and stores the retrieved information in data.
+ **/
+static s32 igb_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+					  u16 *data)
+{
+	s32 ret_val = -E1000_ERR_PARAM;
+
+	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
+		hw_dbg("PHY Address %u is out of range\n", offset);
+		goto out;
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_read_phy_reg_i2c(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_phy_reg_sgmii_82575 - Write PHY register using sgmii
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset using the serial gigabit
+ *  media independent interface.
+ **/
+static s32 igb_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset,
+					   u16 data)
+{
+	s32 ret_val = -E1000_ERR_PARAM;
+
+
+	if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		goto out;
+	}
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_write_phy_reg_i2c(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_phy_id_82575 - Retrieve PHY addr and id
+ *  @hw: pointer to the HW structure
+ *
+ *  Retrieves the PHY address and ID for both PHY's which do and do not use
+ *  sgmi interface.
+ **/
+static s32 igb_get_phy_id_82575(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32  ret_val = 0;
+	u16 phy_id;
+	u32 ctrl_ext;
+	u32 mdic;
+
+	/* Extra read required for some PHY's on i354 */
+	if (hw->mac.type == e1000_i354)
+		igb_get_phy_id(hw);
+
+	/* For SGMII PHYs, we try the list of possible addresses until
+	 * we find one that works.  For non-SGMII PHYs
+	 * (e.g. integrated copper PHYs), an address of 1 should
+	 * work.  The result of this function should mean phy->phy_addr
+	 * and phy->id are set correctly.
+	 */
+	if (!(igb_sgmii_active_82575(hw))) {
+		phy->addr = 1;
+		ret_val = igb_get_phy_id(hw);
+		goto out;
+	}
+
+	if (igb_sgmii_uses_mdio_82575(hw)) {
+		switch (hw->mac.type) {
+		case e1000_82575:
+		case e1000_82576:
+			mdic = rd32(E1000_MDIC);
+			mdic &= E1000_MDIC_PHY_MASK;
+			phy->addr = mdic >> E1000_MDIC_PHY_SHIFT;
+			break;
+		case e1000_82580:
+		case e1000_i350:
+		case e1000_i354:
+		case e1000_i210:
+		case e1000_i211:
+			mdic = rd32(E1000_MDICNFG);
+			mdic &= E1000_MDICNFG_PHY_MASK;
+			phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT;
+			break;
+		default:
+			ret_val = -E1000_ERR_PHY;
+			goto out;
+		}
+		ret_val = igb_get_phy_id(hw);
+		goto out;
+	}
+
+	/* Power on sgmii phy if it is disabled */
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	wr32(E1000_CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA);
+	wrfl();
+	msleep(300);
+
+	/* The address field in the I2CCMD register is 3 bits and 0 is invalid.
+	 * Therefore, we need to test 1-7
+	 */
+	for (phy->addr = 1; phy->addr < 8; phy->addr++) {
+		ret_val = igb_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id);
+		if (ret_val == 0) {
+			hw_dbg("Vendor ID 0x%08X read at address %u\n",
+			       phy_id, phy->addr);
+			/* At the time of this writing, The M88 part is
+			 * the only supported SGMII PHY product.
+			 */
+			if (phy_id == M88_VENDOR)
+				break;
+		} else {
+			hw_dbg("PHY address %u was unreadable\n", phy->addr);
+		}
+	}
+
+	/* A valid PHY type couldn't be found. */
+	if (phy->addr == 8) {
+		phy->addr = 0;
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	} else {
+		ret_val = igb_get_phy_id(hw);
+	}
+
+	/* restore previous sfp cage power state */
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_hw_reset_sgmii_82575 - Performs a PHY reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Resets the PHY using the serial gigabit media independent interface.
+ **/
+static s32 igb_phy_hw_reset_sgmii_82575(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+
+	/* This isn't a true "hard" reset, but is the only reset
+	 * available to us at this time.
+	 */
+
+	hw_dbg("Soft resetting SGMII attached PHY...\n");
+
+	/* SFP documentation requires the following to configure the SPF module
+	 * to work on SGMII.  No further documentation is given.
+	 */
+	ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_phy_sw_reset(hw);
+	if (ret_val)
+		goto out;
+
+	if (phy->id == M88E1512_E_PHY_ID)
+		ret_val = igb_initialize_M88E1512_phy(hw);
+	if (phy->id == M88E1543_E_PHY_ID)
+		ret_val = igb_initialize_M88E1543_phy(hw);
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D0 state according to the active flag.  When
+ *  activating LPLU this function also disables smart speed
+ *  and vice versa.  LPLU will not be activated unless the
+ *  device autonegotiation advertisement meets standards of
+ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+static s32 igb_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+	if (ret_val)
+		goto out;
+
+	if (active) {
+		data |= IGP02E1000_PM_D0_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+						 data);
+		if (ret_val)
+			goto out;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+						&data);
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+						 data);
+		if (ret_val)
+			goto out;
+	} else {
+		data &= ~IGP02E1000_PM_D0_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+						 data);
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = phy->ops.read_reg(hw,
+					IGP01E1000_PHY_PORT_CONFIG, &data);
+			if (ret_val)
+				goto out;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+					IGP01E1000_PHY_PORT_CONFIG, data);
+			if (ret_val)
+				goto out;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = phy->ops.read_reg(hw,
+					IGP01E1000_PHY_PORT_CONFIG, &data);
+			if (ret_val)
+				goto out;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+					IGP01E1000_PHY_PORT_CONFIG, data);
+			if (ret_val)
+				goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state
+ *  @hw: pointer to the HW structure
+ *  @active: true to enable LPLU, false to disable
+ *
+ *  Sets the LPLU D0 state according to the active flag.  When
+ *  activating LPLU this function also disables smart speed
+ *  and vice versa.  LPLU will not be activated unless the
+ *  device autonegotiation advertisement meets standards of
+ *  either 10 or 10/100 or 10/100/1000 at all duplexes.
+ *  This is a function pointer entry point only called by
+ *  PHY setup routines.
+ **/
+static s32 igb_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u16 data;
+
+	data = rd32(E1000_82580_PHY_POWER_MGMT);
+
+	if (active) {
+		data |= E1000_82580_PM_D0_LPLU;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		data &= ~E1000_82580_PM_SPD;
+	} else {
+		data &= ~E1000_82580_PM_D0_LPLU;
+
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on)
+			data |= E1000_82580_PM_SPD;
+		else if (phy->smart_speed == e1000_smart_speed_off)
+			data &= ~E1000_82580_PM_SPD; }
+
+	wr32(E1000_82580_PHY_POWER_MGMT, data);
+	return 0;
+}
+
+/**
+ *  igb_set_d3_lplu_state_82580 - Sets low power link up state for D3
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  The low power link up (lplu) state is set to the power management level D3
+ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
+ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.
+ **/
+static s32 igb_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u16 data;
+
+	data = rd32(E1000_82580_PHY_POWER_MGMT);
+
+	if (!active) {
+		data &= ~E1000_82580_PM_D3_LPLU;
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on)
+			data |= E1000_82580_PM_SPD;
+		else if (phy->smart_speed == e1000_smart_speed_off)
+			data &= ~E1000_82580_PM_SPD;
+	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+		data |= E1000_82580_PM_D3_LPLU;
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		data &= ~E1000_82580_PM_SPD;
+	}
+
+	wr32(E1000_82580_PHY_POWER_MGMT, data);
+	return 0;
+}
+
+/**
+ *  igb_acquire_nvm_82575 - Request for access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the necessary semaphores for exclusive access to the EEPROM.
+ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ *  Return successful if access grant bit set, else clear the request for
+ *  EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+static s32 igb_acquire_nvm_82575(struct e1000_hw *hw)
+{
+	s32 ret_val;
+
+	ret_val = hw->mac.ops.acquire_swfw_sync(hw, E1000_SWFW_EEP_SM);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_acquire_nvm(hw);
+
+	if (ret_val)
+		hw->mac.ops.release_swfw_sync(hw, E1000_SWFW_EEP_SM);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_release_nvm_82575 - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ *  then release the semaphores acquired.
+ **/
+static void igb_release_nvm_82575(struct e1000_hw *hw)
+{
+	igb_release_nvm(hw);
+	hw->mac.ops.release_swfw_sync(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  igb_acquire_swfw_sync_82575 - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+static s32 igb_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 ret_val = 0;
+	s32 i = 0, timeout = 200;
+
+	while (i < timeout) {
+		if (igb_get_hw_semaphore(hw)) {
+			ret_val = -E1000_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = rd32(E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/* Firmware currently using resource (fwmask)
+		 * or other software thread using resource (swmask)
+		 */
+		igb_put_hw_semaphore(hw);
+		mdelay(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -E1000_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	wr32(E1000_SW_FW_SYNC, swfw_sync);
+
+	igb_put_hw_semaphore(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_release_swfw_sync_82575 - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+static void igb_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	while (igb_get_hw_semaphore(hw) != 0)
+		; /* Empty */
+
+	swfw_sync = rd32(E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	wr32(E1000_SW_FW_SYNC, swfw_sync);
+
+	igb_put_hw_semaphore(hw);
+}
+
+/**
+ *  igb_get_cfg_done_82575 - Read config done bit
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the management control register for the config done bit for
+ *  completion status.  NOTE: silicon which is EEPROM-less will fail trying
+ *  to read the config done bit, so an error is *ONLY* logged and returns
+ *  0.  If we were to return with error, EEPROM-less silicon
+ *  would not be able to be reset or change link.
+ **/
+static s32 igb_get_cfg_done_82575(struct e1000_hw *hw)
+{
+	s32 timeout = PHY_CFG_TIMEOUT;
+	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
+
+	if (hw->bus.func == 1)
+		mask = E1000_NVM_CFG_DONE_PORT_1;
+	else if (hw->bus.func == E1000_FUNC_2)
+		mask = E1000_NVM_CFG_DONE_PORT_2;
+	else if (hw->bus.func == E1000_FUNC_3)
+		mask = E1000_NVM_CFG_DONE_PORT_3;
+
+	while (timeout) {
+		if (rd32(E1000_EEMNGCTL) & mask)
+			break;
+		usleep_range(1000, 2000);
+		timeout--;
+	}
+	if (!timeout)
+		hw_dbg("MNG configuration cycle has not completed.\n");
+
+	/* If EEPROM is not marked present, init the PHY manually */
+	if (((rd32(E1000_EECD) & E1000_EECD_PRES) == 0) &&
+	    (hw->phy.type == e1000_phy_igp_3))
+		igb_phy_init_script_igp3(hw);
+
+	return 0;
+}
+
+/**
+ *  igb_get_link_up_info_82575 - Get link speed/duplex info
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  This is a wrapper function, if using the serial gigabit media independent
+ *  interface, use PCS to retrieve the link speed and duplex information.
+ *  Otherwise, use the generic function to get the link speed and duplex info.
+ **/
+static s32 igb_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed,
+					u16 *duplex)
+{
+	s32 ret_val;
+
+	if (hw->phy.media_type != e1000_media_type_copper)
+		ret_val = igb_get_pcs_speed_and_duplex_82575(hw, speed,
+							       duplex);
+	else
+		ret_val = igb_get_speed_and_duplex_copper(hw, speed,
+								    duplex);
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_for_link_82575 - Check for link
+ *  @hw: pointer to the HW structure
+ *
+ *  If sgmii is enabled, then use the pcs register to determine link, otherwise
+ *  use the generic interface for determining link.
+ **/
+static s32 igb_check_for_link_82575(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 speed, duplex;
+
+	if (hw->phy.media_type != e1000_media_type_copper) {
+		ret_val = igb_get_pcs_speed_and_duplex_82575(hw, &speed,
+							     &duplex);
+		/* Use this flag to determine if link needs to be checked or
+		 * not.  If  we have link clear the flag so that we do not
+		 * continue to check for link.
+		 */
+		hw->mac.get_link_status = !hw->mac.serdes_has_link;
+
+		/* Configure Flow Control now that Auto-Neg has completed.
+		 * First, we need to restore the desired flow control
+		 * settings because we may have had to re-autoneg with a
+		 * different link partner.
+		 */
+		ret_val = igb_config_fc_after_link_up(hw);
+		if (ret_val)
+			hw_dbg("Error configuring flow control\n");
+	} else {
+		ret_val = igb_check_for_copper_link(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_power_up_serdes_link_82575 - Power up the serdes link after shutdown
+ *  @hw: pointer to the HW structure
+ **/
+void igb_power_up_serdes_link_82575(struct e1000_hw *hw)
+{
+	u32 reg;
+
+
+	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+	    !igb_sgmii_active_82575(hw))
+		return;
+
+	/* Enable PCS to turn on link */
+	reg = rd32(E1000_PCS_CFG0);
+	reg |= E1000_PCS_CFG_PCS_EN;
+	wr32(E1000_PCS_CFG0, reg);
+
+	/* Power up the laser */
+	reg = rd32(E1000_CTRL_EXT);
+	reg &= ~E1000_CTRL_EXT_SDP3_DATA;
+	wr32(E1000_CTRL_EXT, reg);
+
+	/* flush the write to verify completion */
+	wrfl();
+	usleep_range(1000, 2000);
+}
+
+/**
+ *  igb_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  Using the physical coding sub-layer (PCS), retrieve the current speed and
+ *  duplex, then store the values in the pointers provided.
+ **/
+static s32 igb_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed,
+						u16 *duplex)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 pcs, status;
+
+	/* Set up defaults for the return values of this function */
+	mac->serdes_has_link = false;
+	*speed = 0;
+	*duplex = 0;
+
+	/* Read the PCS Status register for link state. For non-copper mode,
+	 * the status register is not accurate. The PCS status register is
+	 * used instead.
+	 */
+	pcs = rd32(E1000_PCS_LSTAT);
+
+	/* The link up bit determines when link is up on autoneg. The sync ok
+	 * gets set once both sides sync up and agree upon link. Stable link
+	 * can be determined by checking for both link up and link sync ok
+	 */
+	if ((pcs & E1000_PCS_LSTS_LINK_OK) && (pcs & E1000_PCS_LSTS_SYNK_OK)) {
+		mac->serdes_has_link = true;
+
+		/* Detect and store PCS speed */
+		if (pcs & E1000_PCS_LSTS_SPEED_1000)
+			*speed = SPEED_1000;
+		else if (pcs & E1000_PCS_LSTS_SPEED_100)
+			*speed = SPEED_100;
+		else
+			*speed = SPEED_10;
+
+		/* Detect and store PCS duplex */
+		if (pcs & E1000_PCS_LSTS_DUPLEX_FULL)
+			*duplex = FULL_DUPLEX;
+		else
+			*duplex = HALF_DUPLEX;
+
+	/* Check if it is an I354 2.5Gb backplane connection. */
+		if (mac->type == e1000_i354) {
+			status = rd32(E1000_STATUS);
+			if ((status & E1000_STATUS_2P5_SKU) &&
+			    !(status & E1000_STATUS_2P5_SKU_OVER)) {
+				*speed = SPEED_2500;
+				*duplex = FULL_DUPLEX;
+				hw_dbg("2500 Mbs, ");
+				hw_dbg("Full Duplex\n");
+			}
+		}
+
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_shutdown_serdes_link_82575 - Remove link during power down
+ *  @hw: pointer to the HW structure
+ *
+ *  In the case of fiber serdes, shut down optics and PCS on driver unload
+ *  when management pass thru is not enabled.
+ **/
+void igb_shutdown_serdes_link_82575(struct e1000_hw *hw)
+{
+	u32 reg;
+
+	if (hw->phy.media_type != e1000_media_type_internal_serdes &&
+	    igb_sgmii_active_82575(hw))
+		return;
+
+	if (!igb_enable_mng_pass_thru(hw)) {
+		/* Disable PCS to turn off link */
+		reg = rd32(E1000_PCS_CFG0);
+		reg &= ~E1000_PCS_CFG_PCS_EN;
+		wr32(E1000_PCS_CFG0, reg);
+
+		/* shutdown the laser */
+		reg = rd32(E1000_CTRL_EXT);
+		reg |= E1000_CTRL_EXT_SDP3_DATA;
+		wr32(E1000_CTRL_EXT, reg);
+
+		/* flush the write to verify completion */
+		wrfl();
+		usleep_range(1000, 2000);
+	}
+}
+
+/**
+ *  igb_reset_hw_82575 - Reset hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets the hardware into a known state.  This is a
+ *  function pointer entry point called by the api module.
+ **/
+static s32 igb_reset_hw_82575(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val;
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = igb_disable_pcie_master(hw);
+	if (ret_val)
+		hw_dbg("PCI-E Master disable polling has failed.\n");
+
+	/* set the completion timeout for interface */
+	ret_val = igb_set_pcie_completion_timeout(hw);
+	if (ret_val)
+		hw_dbg("PCI-E Set completion timeout has failed.\n");
+
+	hw_dbg("Masking off all interrupts\n");
+	wr32(E1000_IMC, 0xffffffff);
+
+	wr32(E1000_RCTL, 0);
+	wr32(E1000_TCTL, E1000_TCTL_PSP);
+	wrfl();
+
+	usleep_range(10000, 20000);
+
+	ctrl = rd32(E1000_CTRL);
+
+	hw_dbg("Issuing a global reset to MAC\n");
+	wr32(E1000_CTRL, ctrl | E1000_CTRL_RST);
+
+	ret_val = igb_get_auto_rd_done(hw);
+	if (ret_val) {
+		/* When auto config read does not complete, do not
+		 * return with an error. This can happen in situations
+		 * where there is no eeprom and prevents getting link.
+		 */
+		hw_dbg("Auto Read Done did not complete\n");
+	}
+
+	/* If EEPROM is not present, run manual init scripts */
+	if ((rd32(E1000_EECD) & E1000_EECD_PRES) == 0)
+		igb_reset_init_script_82575(hw);
+
+	/* Clear any pending interrupt events. */
+	wr32(E1000_IMC, 0xffffffff);
+	rd32(E1000_ICR);
+
+	/* Install any alternate MAC address into RAR0 */
+	ret_val = igb_check_alt_mac_addr(hw);
+
+	return ret_val;
+}
+
+/**
+ *  igb_init_hw_82575 - Initialize hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This inits the hardware readying it for operation.
+ **/
+static s32 igb_init_hw_82575(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	u16 i, rar_count = mac->rar_entry_count;
+
+	if ((hw->mac.type >= e1000_i210) &&
+	    !(igb_get_flash_presence_i210(hw))) {
+		ret_val = igb_pll_workaround_i210(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Initialize identification LED */
+	ret_val = igb_id_led_init(hw);
+	if (ret_val) {
+		hw_dbg("Error initializing identification LED\n");
+		/* This is not fatal and we should not stop init due to this */
+	}
+
+	/* Disabling VLAN filtering */
+	hw_dbg("Initializing the IEEE VLAN\n");
+	igb_clear_vfta(hw);
+
+	/* Setup the receive address */
+	igb_init_rx_addrs(hw, rar_count);
+
+	/* Zero out the Multicast HASH table */
+	hw_dbg("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		array_wr32(E1000_MTA, i, 0);
+
+	/* Zero out the Unicast HASH table */
+	hw_dbg("Zeroing the UTA\n");
+	for (i = 0; i < mac->uta_reg_count; i++)
+		array_wr32(E1000_UTA, i, 0);
+
+	/* Setup link and flow control */
+	ret_val = igb_setup_link(hw);
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	igb_clear_hw_cntrs_82575(hw);
+	return ret_val;
+}
+
+/**
+ *  igb_setup_copper_link_82575 - Configure copper link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the link for auto-neg or forced speed and duplex.  Then we check
+ *  for link, once link is established calls to configure collision distance
+ *  and flow control are called.
+ **/
+static s32 igb_setup_copper_link_82575(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32  ret_val;
+	u32 phpm_reg;
+
+	ctrl = rd32(E1000_CTRL);
+	ctrl |= E1000_CTRL_SLU;
+	ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	wr32(E1000_CTRL, ctrl);
+
+	/* Clear Go Link Disconnect bit on supported devices */
+	switch (hw->mac.type) {
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i210:
+	case e1000_i211:
+		phpm_reg = rd32(E1000_82580_PHY_POWER_MGMT);
+		phpm_reg &= ~E1000_82580_PM_GO_LINKD;
+		wr32(E1000_82580_PHY_POWER_MGMT, phpm_reg);
+		break;
+	default:
+		break;
+	}
+
+	ret_val = igb_setup_serdes_link_82575(hw);
+	if (ret_val)
+		goto out;
+
+	if (igb_sgmii_active_82575(hw) && !hw->phy.reset_disable) {
+		/* allow time for SFP cage time to power up phy */
+		msleep(300);
+
+		ret_val = hw->phy.ops.reset(hw);
+		if (ret_val) {
+			hw_dbg("Error resetting the PHY.\n");
+			goto out;
+		}
+	}
+	switch (hw->phy.type) {
+	case e1000_phy_i210:
+	case e1000_phy_m88:
+		switch (hw->phy.id) {
+		case I347AT4_E_PHY_ID:
+		case M88E1112_E_PHY_ID:
+		case M88E1543_E_PHY_ID:
+		case M88E1512_E_PHY_ID:
+		case I210_I_PHY_ID:
+			ret_val = igb_copper_link_setup_m88_gen2(hw);
+			break;
+		default:
+			ret_val = igb_copper_link_setup_m88(hw);
+			break;
+		}
+		break;
+	case e1000_phy_igp_3:
+		ret_val = igb_copper_link_setup_igp(hw);
+		break;
+	case e1000_phy_82580:
+		ret_val = igb_copper_link_setup_82580(hw);
+		break;
+	case e1000_phy_bcm54616:
+		ret_val = 0;
+		break;
+	default:
+		ret_val = -E1000_ERR_PHY;
+		break;
+	}
+
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_setup_copper_link(hw);
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_setup_serdes_link_82575 - Setup link for serdes
+ *  @hw: pointer to the HW structure
+ *
+ *  Configure the physical coding sub-layer (PCS) link.  The PCS link is
+ *  used on copper connections where the serialized gigabit media independent
+ *  interface (sgmii), or serdes fiber is being used.  Configures the link
+ *  for auto-negotiation or forces speed/duplex.
+ **/
+static s32 igb_setup_serdes_link_82575(struct e1000_hw *hw)
+{
+	u32 ctrl_ext, ctrl_reg, reg, anadv_reg;
+	bool pcs_autoneg;
+	s32 ret_val = 0;
+	u16 data;
+
+	if ((hw->phy.media_type != e1000_media_type_internal_serdes) &&
+	    !igb_sgmii_active_82575(hw))
+		return ret_val;
+
+
+	/* On the 82575, SerDes loopback mode persists until it is
+	 * explicitly turned off or a power cycle is performed.  A read to
+	 * the register does not indicate its status.  Therefore, we ensure
+	 * loopback mode is disabled during initialization.
+	 */
+	wr32(E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK);
+
+	/* power on the sfp cage if present and turn on I2C */
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA;
+	ctrl_ext |= E1000_CTRL_I2C_ENA;
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+
+	ctrl_reg = rd32(E1000_CTRL);
+	ctrl_reg |= E1000_CTRL_SLU;
+
+	if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) {
+		/* set both sw defined pins */
+		ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1;
+
+		/* Set switch control to serdes energy detect */
+		reg = rd32(E1000_CONNSW);
+		reg |= E1000_CONNSW_ENRGSRC;
+		wr32(E1000_CONNSW, reg);
+	}
+
+	reg = rd32(E1000_PCS_LCTL);
+
+	/* default pcs_autoneg to the same setting as mac autoneg */
+	pcs_autoneg = hw->mac.autoneg;
+
+	switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) {
+	case E1000_CTRL_EXT_LINK_MODE_SGMII:
+		/* sgmii mode lets the phy handle forcing speed/duplex */
+		pcs_autoneg = true;
+		/* autoneg time out should be disabled for SGMII mode */
+		reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT);
+		break;
+	case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX:
+		/* disable PCS autoneg and support parallel detect only */
+		pcs_autoneg = false;
+		fallthrough;
+	default:
+		if (hw->mac.type == e1000_82575 ||
+		    hw->mac.type == e1000_82576) {
+			ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data);
+			if (ret_val) {
+				hw_dbg(KERN_DEBUG "NVM Read Error\n\n");
+				return ret_val;
+			}
+
+			if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT)
+				pcs_autoneg = false;
+		}
+
+		/* non-SGMII modes only supports a speed of 1000/Full for the
+		 * link so it is best to just force the MAC and let the pcs
+		 * link either autoneg or be forced to 1000/Full
+		 */
+		ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD |
+				E1000_CTRL_FD | E1000_CTRL_FRCDPX;
+
+		/* set speed of 1000/Full if speed/duplex is forced */
+		reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL;
+		break;
+	}
+
+	wr32(E1000_CTRL, ctrl_reg);
+
+	/* New SerDes mode allows for forcing speed or autonegotiating speed
+	 * at 1gb. Autoneg should be default set by most drivers. This is the
+	 * mode that will be compatible with older link partners and switches.
+	 * However, both are supported by the hardware and some drivers/tools.
+	 */
+	reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP |
+		E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK);
+
+	if (pcs_autoneg) {
+		/* Set PCS register for autoneg */
+		reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */
+		       E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */
+
+		/* Disable force flow control for autoneg */
+		reg &= ~E1000_PCS_LCTL_FORCE_FCTRL;
+
+		/* Configure flow control advertisement for autoneg */
+		anadv_reg = rd32(E1000_PCS_ANADV);
+		anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE);
+		switch (hw->fc.requested_mode) {
+		case e1000_fc_full:
+		case e1000_fc_rx_pause:
+			anadv_reg |= E1000_TXCW_ASM_DIR;
+			anadv_reg |= E1000_TXCW_PAUSE;
+			break;
+		case e1000_fc_tx_pause:
+			anadv_reg |= E1000_TXCW_ASM_DIR;
+			break;
+		default:
+			break;
+		}
+		wr32(E1000_PCS_ANADV, anadv_reg);
+
+		hw_dbg("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg);
+	} else {
+		/* Set PCS register for forced link */
+		reg |= E1000_PCS_LCTL_FSD;        /* Force Speed */
+
+		/* Force flow control for forced link */
+		reg |= E1000_PCS_LCTL_FORCE_FCTRL;
+
+		hw_dbg("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg);
+	}
+
+	wr32(E1000_PCS_LCTL, reg);
+
+	if (!pcs_autoneg && !igb_sgmii_active_82575(hw))
+		igb_force_mac_fc(hw);
+
+	return ret_val;
+}
+
+/**
+ *  igb_sgmii_active_82575 - Return sgmii state
+ *  @hw: pointer to the HW structure
+ *
+ *  82575 silicon has a serialized gigabit media independent interface (sgmii)
+ *  which can be enabled for use in the embedded applications.  Simply
+ *  return the current state of the sgmii interface.
+ **/
+static bool igb_sgmii_active_82575(struct e1000_hw *hw)
+{
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+	return dev_spec->sgmii_active;
+}
+
+/**
+ *  igb_reset_init_script_82575 - Inits HW defaults after reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Inits recommended HW defaults after a reset when there is no EEPROM
+ *  detected. This is only for the 82575.
+ **/
+static s32 igb_reset_init_script_82575(struct e1000_hw *hw)
+{
+	if (hw->mac.type == e1000_82575) {
+		hw_dbg("Running reset init script for 82575\n");
+		/* SerDes configuration via SERDESCTRL */
+		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x00, 0x0C);
+		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x01, 0x78);
+		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x1B, 0x23);
+		igb_write_8bit_ctrl_reg(hw, E1000_SCTL, 0x23, 0x15);
+
+		/* CCM configuration via CCMCTL register */
+		igb_write_8bit_ctrl_reg(hw, E1000_CCMCTL, 0x14, 0x00);
+		igb_write_8bit_ctrl_reg(hw, E1000_CCMCTL, 0x10, 0x00);
+
+		/* PCIe lanes configuration */
+		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x00, 0xEC);
+		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x61, 0xDF);
+		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x34, 0x05);
+		igb_write_8bit_ctrl_reg(hw, E1000_GIOCTL, 0x2F, 0x81);
+
+		/* PCIe PLL Configuration */
+		igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x02, 0x47);
+		igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x14, 0x00);
+		igb_write_8bit_ctrl_reg(hw, E1000_SCCTL, 0x10, 0x00);
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_read_mac_addr_82575 - Read device MAC address
+ *  @hw: pointer to the HW structure
+ **/
+static s32 igb_read_mac_addr_82575(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+
+	/* If there's an alternate MAC address place it in RAR0
+	 * so that it will override the Si installed default perm
+	 * address.
+	 */
+	ret_val = igb_check_alt_mac_addr(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_read_mac_addr(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igb_power_down_phy_copper_82575 - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ **/
+void igb_power_down_phy_copper_82575(struct e1000_hw *hw)
+{
+	/* If the management interface is not enabled, then power down */
+	if (!(igb_enable_mng_pass_thru(hw) || igb_check_reset_block(hw)))
+		igb_power_down_phy_copper(hw);
+}
+
+/**
+ *  igb_clear_hw_cntrs_82575 - Clear device specific hardware counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the hardware counters by reading the counter registers.
+ **/
+static void igb_clear_hw_cntrs_82575(struct e1000_hw *hw)
+{
+	igb_clear_hw_cntrs_base(hw);
+
+	rd32(E1000_PRC64);
+	rd32(E1000_PRC127);
+	rd32(E1000_PRC255);
+	rd32(E1000_PRC511);
+	rd32(E1000_PRC1023);
+	rd32(E1000_PRC1522);
+	rd32(E1000_PTC64);
+	rd32(E1000_PTC127);
+	rd32(E1000_PTC255);
+	rd32(E1000_PTC511);
+	rd32(E1000_PTC1023);
+	rd32(E1000_PTC1522);
+
+	rd32(E1000_ALGNERRC);
+	rd32(E1000_RXERRC);
+	rd32(E1000_TNCRS);
+	rd32(E1000_CEXTERR);
+	rd32(E1000_TSCTC);
+	rd32(E1000_TSCTFC);
+
+	rd32(E1000_MGTPRC);
+	rd32(E1000_MGTPDC);
+	rd32(E1000_MGTPTC);
+
+	rd32(E1000_IAC);
+	rd32(E1000_ICRXOC);
+
+	rd32(E1000_ICRXPTC);
+	rd32(E1000_ICRXATC);
+	rd32(E1000_ICTXPTC);
+	rd32(E1000_ICTXATC);
+	rd32(E1000_ICTXQEC);
+	rd32(E1000_ICTXQMTC);
+	rd32(E1000_ICRXDMTC);
+
+	rd32(E1000_CBTMPC);
+	rd32(E1000_HTDPMC);
+	rd32(E1000_CBRMPC);
+	rd32(E1000_RPTHC);
+	rd32(E1000_HGPTC);
+	rd32(E1000_HTCBDPC);
+	rd32(E1000_HGORCL);
+	rd32(E1000_HGORCH);
+	rd32(E1000_HGOTCL);
+	rd32(E1000_HGOTCH);
+	rd32(E1000_LENERRS);
+
+	/* This register should not be read in copper configurations */
+	if (hw->phy.media_type == e1000_media_type_internal_serdes ||
+	    igb_sgmii_active_82575(hw))
+		rd32(E1000_SCVPC);
+}
+
+/**
+ *  igb_rx_fifo_flush_82575 - Clean rx fifo after RX enable
+ *  @hw: pointer to the HW structure
+ *
+ *  After rx enable if manageability is enabled then there is likely some
+ *  bad data at the start of the fifo and possibly in the DMA fifo. This
+ *  function clears the fifos and flushes any packets that came in as rx was
+ *  being enabled.
+ **/
+void igb_rx_fifo_flush_82575(struct e1000_hw *hw)
+{
+	u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
+	int i, ms_wait;
+
+	/* disable IPv6 options as per hardware errata */
+	rfctl = rd32(E1000_RFCTL);
+	rfctl |= E1000_RFCTL_IPV6_EX_DIS;
+	wr32(E1000_RFCTL, rfctl);
+
+	if (hw->mac.type != e1000_82575 ||
+	    !(rd32(E1000_MANC) & E1000_MANC_RCV_TCO_EN))
+		return;
+
+	/* Disable all RX queues */
+	for (i = 0; i < 4; i++) {
+		rxdctl[i] = rd32(E1000_RXDCTL(i));
+		wr32(E1000_RXDCTL(i),
+		     rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE);
+	}
+	/* Poll all queues to verify they have shut down */
+	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
+		usleep_range(1000, 2000);
+		rx_enabled = 0;
+		for (i = 0; i < 4; i++)
+			rx_enabled |= rd32(E1000_RXDCTL(i));
+		if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE))
+			break;
+	}
+
+	if (ms_wait == 10)
+		hw_dbg("Queue disable timed out after 10ms\n");
+
+	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
+	 * incoming packets are rejected.  Set enable and wait 2ms so that
+	 * any packet that was coming in as RCTL.EN was set is flushed
+	 */
+	wr32(E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF);
+
+	rlpml = rd32(E1000_RLPML);
+	wr32(E1000_RLPML, 0);
+
+	rctl = rd32(E1000_RCTL);
+	temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP);
+	temp_rctl |= E1000_RCTL_LPE;
+
+	wr32(E1000_RCTL, temp_rctl);
+	wr32(E1000_RCTL, temp_rctl | E1000_RCTL_EN);
+	wrfl();
+	usleep_range(2000, 3000);
+
+	/* Enable RX queues that were previously enabled and restore our
+	 * previous state
+	 */
+	for (i = 0; i < 4; i++)
+		wr32(E1000_RXDCTL(i), rxdctl[i]);
+	wr32(E1000_RCTL, rctl);
+	wrfl();
+
+	wr32(E1000_RLPML, rlpml);
+	wr32(E1000_RFCTL, rfctl);
+
+	/* Flush receive errors generated by workaround */
+	rd32(E1000_ROC);
+	rd32(E1000_RNBC);
+	rd32(E1000_MPC);
+}
+
+/**
+ *  igb_set_pcie_completion_timeout - set pci-e completion timeout
+ *  @hw: pointer to the HW structure
+ *
+ *  The defaults for 82575 and 82576 should be in the range of 50us to 50ms,
+ *  however the hardware default for these parts is 500us to 1ms which is less
+ *  than the 10ms recommended by the pci-e spec.  To address this we need to
+ *  increase the value to either 10ms to 200ms for capability version 1 config,
+ *  or 16ms to 55ms for version 2.
+ **/
+static s32 igb_set_pcie_completion_timeout(struct e1000_hw *hw)
+{
+	u32 gcr = rd32(E1000_GCR);
+	s32 ret_val = 0;
+	u16 pcie_devctl2;
+
+	/* only take action if timeout value is defaulted to 0 */
+	if (gcr & E1000_GCR_CMPL_TMOUT_MASK)
+		goto out;
+
+	/* if capabilities version is type 1 we can write the
+	 * timeout of 10ms to 200ms through the GCR register
+	 */
+	if (!(gcr & E1000_GCR_CAP_VER2)) {
+		gcr |= E1000_GCR_CMPL_TMOUT_10ms;
+		goto out;
+	}
+
+	/* for version 2 capabilities we need to write the config space
+	 * directly in order to set the completion timeout value for
+	 * 16ms to 55ms
+	 */
+	ret_val = igb_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+					&pcie_devctl2);
+	if (ret_val)
+		goto out;
+
+	pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
+
+	ret_val = igb_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
+					 &pcie_devctl2);
+out:
+	/* disable completion timeout resend */
+	gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND;
+
+	wr32(E1000_GCR, gcr);
+	return ret_val;
+}
+
+/**
+ *  igb_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing
+ *  @hw: pointer to the hardware struct
+ *  @enable: state to enter, either enabled or disabled
+ *  @pf: Physical Function pool - do not set anti-spoofing for the PF
+ *
+ *  enables/disables L2 switch anti-spoofing functionality.
+ **/
+void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf)
+{
+	u32 reg_val, reg_offset;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		reg_offset = E1000_DTXSWC;
+		break;
+	case e1000_i350:
+	case e1000_i354:
+		reg_offset = E1000_TXSWC;
+		break;
+	default:
+		return;
+	}
+
+	reg_val = rd32(reg_offset);
+	if (enable) {
+		reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK |
+			     E1000_DTXSWC_VLAN_SPOOF_MASK);
+		/* The PF can spoof - it has to in order to
+		 * support emulation mode NICs
+		 */
+		reg_val ^= (BIT(pf) | BIT(pf + MAX_NUM_VFS));
+	} else {
+		reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK |
+			     E1000_DTXSWC_VLAN_SPOOF_MASK);
+	}
+	wr32(reg_offset, reg_val);
+}
+
+/**
+ *  igb_vmdq_set_loopback_pf - enable or disable vmdq loopback
+ *  @hw: pointer to the hardware struct
+ *  @enable: state to enter, either enabled or disabled
+ *
+ *  enables/disables L2 switch loopback functionality.
+ **/
+void igb_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable)
+{
+	u32 dtxswc;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		dtxswc = rd32(E1000_DTXSWC);
+		if (enable)
+			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		else
+			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		wr32(E1000_DTXSWC, dtxswc);
+		break;
+	case e1000_i354:
+	case e1000_i350:
+		dtxswc = rd32(E1000_TXSWC);
+		if (enable)
+			dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		else
+			dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN;
+		wr32(E1000_TXSWC, dtxswc);
+		break;
+	default:
+		/* Currently no other hardware supports loopback */
+		break;
+	}
+
+}
+
+/**
+ *  igb_vmdq_set_replication_pf - enable or disable vmdq replication
+ *  @hw: pointer to the hardware struct
+ *  @enable: state to enter, either enabled or disabled
+ *
+ *  enables/disables replication of packets across multiple pools.
+ **/
+void igb_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable)
+{
+	u32 vt_ctl = rd32(E1000_VT_CTL);
+
+	if (enable)
+		vt_ctl |= E1000_VT_CTL_VM_REPL_EN;
+	else
+		vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN;
+
+	wr32(E1000_VT_CTL, vt_ctl);
+}
+
+/**
+ *  igb_read_phy_reg_82580 - Read 82580 MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the MDI control register in the PHY at offset and stores the
+ *  information read to data.
+ **/
+s32 igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_read_phy_reg_mdic(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_phy_reg_82580 - Write 82580 MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write to register at offset
+ *
+ *  Writes data to MDI control register in the PHY at offset.
+ **/
+s32 igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val;
+
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_write_phy_reg_mdic(hw, offset, data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on
+ *  the values found in the EEPROM.  This addresses an issue in which these
+ *  bits are not restored from EEPROM after reset.
+ **/
+static s32 igb_reset_mdicnfg_82580(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 mdicnfg;
+	u16 nvm_data = 0;
+
+	if (hw->mac.type != e1000_82580)
+		goto out;
+	if (!igb_sgmii_active_82575(hw))
+		goto out;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+				   NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+				   &nvm_data);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	mdicnfg = rd32(E1000_MDICNFG);
+	if (nvm_data & NVM_WORD24_EXT_MDIO)
+		mdicnfg |= E1000_MDICNFG_EXT_MDIO;
+	if (nvm_data & NVM_WORD24_COM_MDIO)
+		mdicnfg |= E1000_MDICNFG_COM_MDIO;
+	wr32(E1000_MDICNFG, mdicnfg);
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_reset_hw_82580 - Reset hardware
+ *  @hw: pointer to the HW structure
+ *
+ *  This resets function or entire device (all ports, etc.)
+ *  to a known state.
+ **/
+static s32 igb_reset_hw_82580(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	/* BH SW mailbox bit in SW_FW_SYNC */
+	u16 swmbsw_mask = E1000_SW_SYNCH_MB;
+	u32 ctrl;
+	bool global_device_reset = hw->dev_spec._82575.global_device_reset;
+
+	hw->dev_spec._82575.global_device_reset = false;
+
+	/* due to hw errata, global device reset doesn't always
+	 * work on 82580
+	 */
+	if (hw->mac.type == e1000_82580)
+		global_device_reset = false;
+
+	/* Get current control state. */
+	ctrl = rd32(E1000_CTRL);
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = igb_disable_pcie_master(hw);
+	if (ret_val)
+		hw_dbg("PCI-E Master disable polling has failed.\n");
+
+	hw_dbg("Masking off all interrupts\n");
+	wr32(E1000_IMC, 0xffffffff);
+	wr32(E1000_RCTL, 0);
+	wr32(E1000_TCTL, E1000_TCTL_PSP);
+	wrfl();
+
+	usleep_range(10000, 11000);
+
+	/* Determine whether or not a global dev reset is requested */
+	if (global_device_reset &&
+		hw->mac.ops.acquire_swfw_sync(hw, swmbsw_mask))
+			global_device_reset = false;
+
+	if (global_device_reset &&
+		!(rd32(E1000_STATUS) & E1000_STAT_DEV_RST_SET))
+		ctrl |= E1000_CTRL_DEV_RST;
+	else
+		ctrl |= E1000_CTRL_RST;
+
+	wr32(E1000_CTRL, ctrl);
+	wrfl();
+
+	/* Add delay to insure DEV_RST has time to complete */
+	if (global_device_reset)
+		usleep_range(5000, 6000);
+
+	ret_val = igb_get_auto_rd_done(hw);
+	if (ret_val) {
+		/* When auto config read does not complete, do not
+		 * return with an error. This can happen in situations
+		 * where there is no eeprom and prevents getting link.
+		 */
+		hw_dbg("Auto Read Done did not complete\n");
+	}
+
+	/* clear global device reset status bit */
+	wr32(E1000_STATUS, E1000_STAT_DEV_RST_SET);
+
+	/* Clear any pending interrupt events. */
+	wr32(E1000_IMC, 0xffffffff);
+	rd32(E1000_ICR);
+
+	ret_val = igb_reset_mdicnfg_82580(hw);
+	if (ret_val)
+		hw_dbg("Could not reset MDICNFG based on EEPROM\n");
+
+	/* Install any alternate MAC address into RAR0 */
+	ret_val = igb_check_alt_mac_addr(hw);
+
+	/* Release semaphore */
+	if (global_device_reset)
+		hw->mac.ops.release_swfw_sync(hw, swmbsw_mask);
+
+	return ret_val;
+}
+
+/**
+ *  igb_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual RX PBA size
+ *  @data: data received by reading RXPBS register
+ *
+ *  The 82580 uses a table based approach for packet buffer allocation sizes.
+ *  This function converts the retrieved value into the correct table value
+ *     0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7
+ *  0x0 36  72 144   1   2   4   8  16
+ *  0x8 35  70 140 rsv rsv rsv rsv rsv
+ */
+u16 igb_rxpbs_adjust_82580(u32 data)
+{
+	u16 ret_val = 0;
+
+	if (data < ARRAY_SIZE(e1000_82580_rxpbs_table))
+		ret_val = e1000_82580_rxpbs_table[data];
+
+	return ret_val;
+}
+
+/**
+ *  igb_validate_nvm_checksum_with_offset - Validate EEPROM
+ *  checksum
+ *  @hw: pointer to the HW structure
+ *  @offset: offset in words of the checksum protected region
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+static s32 igb_validate_nvm_checksum_with_offset(struct e1000_hw *hw,
+						 u16 offset)
+{
+	s32 ret_val = 0;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16) NVM_SUM) {
+		hw_dbg("NVM Checksum Invalid\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_update_nvm_checksum_with_offset - Update EEPROM
+ *  checksum
+ *  @hw: pointer to the HW structure
+ *  @offset: offset in words of the checksum protected region
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM.
+ **/
+static s32 igb_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset)
+{
+	s32 ret_val;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16) NVM_SUM - checksum;
+	ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1,
+				&checksum);
+	if (ret_val)
+		hw_dbg("NVM Write Error while updating checksum.\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_validate_nvm_checksum_82580 - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM section checksum by reading/adding each word of
+ *  the EEPROM and then verifies that the sum of the EEPROM is
+ *  equal to 0xBABA.
+ **/
+static s32 igb_validate_nvm_checksum_82580(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 eeprom_regions_count = 1;
+	u16 j, nvm_data;
+	u16 nvm_offset;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) {
+		/* if checksums compatibility bit is set validate checksums
+		 * for all 4 ports.
+		 */
+		eeprom_regions_count = 4;
+	}
+
+	for (j = 0; j < eeprom_regions_count; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = igb_validate_nvm_checksum_with_offset(hw,
+								nvm_offset);
+		if (ret_val != 0)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_update_nvm_checksum_82580 - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM section checksums for all 4 ports by reading/adding
+ *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
+ *  checksum and writes the value to the EEPROM.
+ **/
+static s32 igb_update_nvm_checksum_82580(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 j, nvm_data;
+	u16 nvm_offset;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("NVM Read Error while updating checksum compatibility bit.\n");
+		goto out;
+	}
+
+	if ((nvm_data & NVM_COMPATIBILITY_BIT_MASK) == 0) {
+		/* set compatibility bit to validate checksums appropriately */
+		nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK;
+		ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1,
+					&nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Write Error while updating checksum compatibility bit.\n");
+			goto out;
+		}
+	}
+
+	for (j = 0; j < 4; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = igb_update_nvm_checksum_with_offset(hw, nvm_offset);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_validate_nvm_checksum_i350 - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM section checksum by reading/adding each word of
+ *  the EEPROM and then verifies that the sum of the EEPROM is
+ *  equal to 0xBABA.
+ **/
+static s32 igb_validate_nvm_checksum_i350(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 j;
+	u16 nvm_offset;
+
+	for (j = 0; j < 4; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = igb_validate_nvm_checksum_with_offset(hw,
+								nvm_offset);
+		if (ret_val != 0)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_update_nvm_checksum_i350 - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM section checksums for all 4 ports by reading/adding
+ *  each word of the EEPROM up to the checksum.  Then calculates the EEPROM
+ *  checksum and writes the value to the EEPROM.
+ **/
+static s32 igb_update_nvm_checksum_i350(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 j;
+	u16 nvm_offset;
+
+	for (j = 0; j < 4; j++) {
+		nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j);
+		ret_val = igb_update_nvm_checksum_with_offset(hw, nvm_offset);
+		if (ret_val != 0)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  __igb_access_emi_reg - Read/write EMI register
+ *  @hw: pointer to the HW structure
+ *  @address: EMI address to program
+ *  @data: pointer to value to read/write from/to the EMI address
+ *  @read: boolean flag to indicate read or write
+ **/
+static s32 __igb_access_emi_reg(struct e1000_hw *hw, u16 address,
+				  u16 *data, bool read)
+{
+	s32 ret_val = 0;
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data);
+	else
+		ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data);
+
+	return ret_val;
+}
+
+/**
+ *  igb_read_emi_reg - Read Extended Management Interface register
+ *  @hw: pointer to the HW structure
+ *  @addr: EMI address to program
+ *  @data: value to be read from the EMI address
+ **/
+s32 igb_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data)
+{
+	return __igb_access_emi_reg(hw, addr, data, true);
+}
+
+/**
+ *  igb_set_eee_i350 - Enable/disable EEE support
+ *  @hw: pointer to the HW structure
+ *  @adv1G: boolean flag enabling 1G EEE advertisement
+ *  @adv100M: boolean flag enabling 100M EEE advertisement
+ *
+ *  Enable/disable EEE based on setting in dev_spec structure.
+ *
+ **/
+s32 igb_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M)
+{
+	u32 ipcnfg, eeer;
+
+	if ((hw->mac.type < e1000_i350) ||
+	    (hw->phy.media_type != e1000_media_type_copper))
+		goto out;
+	ipcnfg = rd32(E1000_IPCNFG);
+	eeer = rd32(E1000_EEER);
+
+	/* enable or disable per user setting */
+	if (!(hw->dev_spec._82575.eee_disable)) {
+		u32 eee_su = rd32(E1000_EEE_SU);
+
+		if (adv100M)
+			ipcnfg |= E1000_IPCNFG_EEE_100M_AN;
+		else
+			ipcnfg &= ~E1000_IPCNFG_EEE_100M_AN;
+
+		if (adv1G)
+			ipcnfg |= E1000_IPCNFG_EEE_1G_AN;
+		else
+			ipcnfg &= ~E1000_IPCNFG_EEE_1G_AN;
+
+		eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN |
+			E1000_EEER_LPI_FC);
+
+		/* This bit should not be set in normal operation. */
+		if (eee_su & E1000_EEE_SU_LPI_CLK_STP)
+			hw_dbg("LPI Clock Stop Bit should not be set!\n");
+
+	} else {
+		ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN |
+			E1000_IPCNFG_EEE_100M_AN);
+		eeer &= ~(E1000_EEER_TX_LPI_EN |
+			E1000_EEER_RX_LPI_EN |
+			E1000_EEER_LPI_FC);
+	}
+	wr32(E1000_IPCNFG, ipcnfg);
+	wr32(E1000_EEER, eeer);
+	rd32(E1000_IPCNFG);
+	rd32(E1000_EEER);
+out:
+
+	return 0;
+}
+
+/**
+ *  igb_set_eee_i354 - Enable/disable EEE support
+ *  @hw: pointer to the HW structure
+ *  @adv1G: boolean flag enabling 1G EEE advertisement
+ *  @adv100M: boolean flag enabling 100M EEE advertisement
+ *
+ *  Enable/disable EEE legacy mode based on setting in dev_spec structure.
+ *
+ **/
+s32 igb_set_eee_i354(struct e1000_hw *hw, bool adv1G, bool adv100M)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_data;
+
+	if ((hw->phy.media_type != e1000_media_type_copper) ||
+	    ((phy->id != M88E1543_E_PHY_ID) &&
+	     (phy->id != M88E1512_E_PHY_ID)))
+		goto out;
+
+	if (!hw->dev_spec._82575.eee_disable) {
+		/* Switch to PHY page 18. */
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1,
+					    &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy_data |= E1000_M88E1543_EEE_CTRL_1_MS;
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1,
+					     phy_data);
+		if (ret_val)
+			goto out;
+
+		/* Return the PHY to page 0. */
+		ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
+		if (ret_val)
+			goto out;
+
+		/* Turn on EEE advertisement. */
+		ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+					     E1000_EEE_ADV_DEV_I354,
+					     &phy_data);
+		if (ret_val)
+			goto out;
+
+		if (adv100M)
+			phy_data |= E1000_EEE_ADV_100_SUPPORTED;
+		else
+			phy_data &= ~E1000_EEE_ADV_100_SUPPORTED;
+
+		if (adv1G)
+			phy_data |= E1000_EEE_ADV_1000_SUPPORTED;
+		else
+			phy_data &= ~E1000_EEE_ADV_1000_SUPPORTED;
+
+		ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+						E1000_EEE_ADV_DEV_I354,
+						phy_data);
+	} else {
+		/* Turn off EEE advertisement. */
+		ret_val = igb_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+					     E1000_EEE_ADV_DEV_I354,
+					     &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED |
+			      E1000_EEE_ADV_1000_SUPPORTED);
+		ret_val = igb_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354,
+					      E1000_EEE_ADV_DEV_I354,
+					      phy_data);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_eee_status_i354 - Get EEE status
+ *  @hw: pointer to the HW structure
+ *  @status: EEE status
+ *
+ *  Get EEE status by guessing based on whether Tx or Rx LPI indications have
+ *  been received.
+ **/
+s32 igb_get_eee_status_i354(struct e1000_hw *hw, bool *status)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_data;
+
+	/* Check if EEE is supported on this device. */
+	if ((hw->phy.media_type != e1000_media_type_copper) ||
+	    ((phy->id != M88E1543_E_PHY_ID) &&
+	     (phy->id != M88E1512_E_PHY_ID)))
+		goto out;
+
+	ret_val = igb_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354,
+				     E1000_PCS_STATUS_DEV_I354,
+				     &phy_data);
+	if (ret_val)
+		goto out;
+
+	*status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD |
+			      E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false;
+
+out:
+	return ret_val;
+}
+
+#ifdef CONFIG_IGB_HWMON
+static const u8 e1000_emc_temp_data[4] = {
+	E1000_EMC_INTERNAL_DATA,
+	E1000_EMC_DIODE1_DATA,
+	E1000_EMC_DIODE2_DATA,
+	E1000_EMC_DIODE3_DATA
+};
+static const u8 e1000_emc_therm_limit[4] = {
+	E1000_EMC_INTERNAL_THERM_LIMIT,
+	E1000_EMC_DIODE1_THERM_LIMIT,
+	E1000_EMC_DIODE2_THERM_LIMIT,
+	E1000_EMC_DIODE3_THERM_LIMIT
+};
+
+/**
+ *  igb_get_thermal_sensor_data_generic - Gathers thermal sensor data
+ *  @hw: pointer to hardware structure
+ *
+ *  Updates the temperatures in mac.thermal_sensor_data
+ **/
+static s32 igb_get_thermal_sensor_data_generic(struct e1000_hw *hw)
+{
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  num_sensors;
+	u8  sensor_index;
+	u8  sensor_location;
+	u8  i;
+	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
+		return E1000_NOT_IMPLEMENTED;
+
+	data->sensor[0].temp = (rd32(E1000_THMJT) & 0xFF);
+
+	/* Return the internal sensor only if ETS is unsupported */
+	hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset);
+	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+		return 0;
+
+	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
+	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+	    != NVM_ETS_TYPE_EMC)
+		return E1000_NOT_IMPLEMENTED;
+
+	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+	if (num_sensors > E1000_MAX_SENSORS)
+		num_sensors = E1000_MAX_SENSORS;
+
+	for (i = 1; i < num_sensors; i++) {
+		hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
+		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+				NVM_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+				   NVM_ETS_DATA_LOC_SHIFT);
+
+		if (sensor_location != 0)
+			hw->phy.ops.read_i2c_byte(hw,
+					e1000_emc_temp_data[sensor_index],
+					E1000_I2C_THERMAL_SENSOR_ADDR,
+					&data->sensor[i].temp);
+	}
+	return 0;
+}
+
+/**
+ *  igb_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the thermal sensor thresholds according to the NVM map
+ *  and save off the threshold and location values into mac.thermal_sensor_data
+ **/
+static s32 igb_init_thermal_sensor_thresh_generic(struct e1000_hw *hw)
+{
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  low_thresh_delta;
+	u8  num_sensors;
+	u8  sensor_index;
+	u8  sensor_location;
+	u8  therm_limit;
+	u8  i;
+	struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0))
+		return E1000_NOT_IMPLEMENTED;
+
+	memset(data, 0, sizeof(struct e1000_thermal_sensor_data));
+
+	data->sensor[0].location = 0x1;
+	data->sensor[0].caution_thresh =
+		(rd32(E1000_THHIGHTC) & 0xFF);
+	data->sensor[0].max_op_thresh =
+		(rd32(E1000_THLOWTC) & 0xFF);
+
+	/* Return the internal sensor only if ETS is unsupported */
+	hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_offset);
+	if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF))
+		return 0;
+
+	hw->nvm.ops.read(hw, ets_offset, 1, &ets_cfg);
+	if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT)
+	    != NVM_ETS_TYPE_EMC)
+		return E1000_NOT_IMPLEMENTED;
+
+	low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >>
+			    NVM_ETS_LTHRES_DELTA_SHIFT);
+	num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK);
+
+	for (i = 1; i <= num_sensors; i++) {
+		hw->nvm.ops.read(hw, (ets_offset + i), 1, &ets_sensor);
+		sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >>
+				NVM_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >>
+				   NVM_ETS_DATA_LOC_SHIFT);
+		therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK;
+
+		hw->phy.ops.write_i2c_byte(hw,
+			e1000_emc_therm_limit[sensor_index],
+			E1000_I2C_THERMAL_SENSOR_ADDR,
+			therm_limit);
+
+		if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) {
+			data->sensor[i].location = sensor_location;
+			data->sensor[i].caution_thresh = therm_limit;
+			data->sensor[i].max_op_thresh = therm_limit -
+							low_thresh_delta;
+		}
+	}
+	return 0;
+}
+
+#endif
+static struct e1000_mac_operations e1000_mac_ops_82575 = {
+	.init_hw              = igb_init_hw_82575,
+	.check_for_link       = igb_check_for_link_82575,
+	.rar_set              = igb_rar_set,
+	.read_mac_addr        = igb_read_mac_addr_82575,
+	.get_speed_and_duplex = igb_get_link_up_info_82575,
+#ifdef CONFIG_IGB_HWMON
+	.get_thermal_sensor_data = igb_get_thermal_sensor_data_generic,
+	.init_thermal_sensor_thresh = igb_init_thermal_sensor_thresh_generic,
+#endif
+};
+
+static const struct e1000_phy_operations e1000_phy_ops_82575 = {
+	.acquire              = igb_acquire_phy_82575,
+	.get_cfg_done         = igb_get_cfg_done_82575,
+	.release              = igb_release_phy_82575,
+	.write_i2c_byte       = igb_write_i2c_byte,
+	.read_i2c_byte        = igb_read_i2c_byte,
+};
+
+static struct e1000_nvm_operations e1000_nvm_ops_82575 = {
+	.acquire              = igb_acquire_nvm_82575,
+	.read                 = igb_read_nvm_eerd,
+	.release              = igb_release_nvm_82575,
+	.write                = igb_write_nvm_spi,
+};
+
+const struct e1000_info e1000_82575_info = {
+	.get_invariants = igb_get_invariants_82575,
+	.mac_ops = &e1000_mac_ops_82575,
+	.phy_ops = &e1000_phy_ops_82575,
+	.nvm_ops = &e1000_nvm_ops_82575,
+};
+
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
new file mode 100644
index 0000000000..63ec253ac7
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_82575_H_
+#define _E1000_82575_H_
+
+void igb_shutdown_serdes_link_82575(struct e1000_hw *hw);
+void igb_power_up_serdes_link_82575(struct e1000_hw *hw);
+void igb_power_down_phy_copper_82575(struct e1000_hw *hw);
+void igb_rx_fifo_flush_82575(struct e1000_hw *hw);
+s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr,
+		      u8 *data);
+s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, u8 dev_addr,
+		       u8 data);
+
+#define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \
+				     (ID_LED_DEF1_DEF2 <<  8) | \
+				     (ID_LED_DEF1_DEF2 <<  4) | \
+				     (ID_LED_OFF1_ON2))
+
+#define E1000_RAR_ENTRIES_82575        16
+#define E1000_RAR_ENTRIES_82576        24
+#define E1000_RAR_ENTRIES_82580        24
+#define E1000_RAR_ENTRIES_I350         32
+
+#define E1000_SW_SYNCH_MB              0x00000100
+#define E1000_STAT_DEV_RST_SET         0x00100000
+#define E1000_CTRL_DEV_RST             0x20000000
+
+/* SRRCTL bit definitions */
+#define E1000_SRRCTL_BSIZEPKT_SHIFT                     10 /* Shift _right_ */
+#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT                 2  /* Shift _left_ */
+#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF                0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS          0x0A000000
+#define E1000_SRRCTL_DROP_EN                            0x80000000
+#define E1000_SRRCTL_TIMESTAMP                          0x40000000
+
+
+#define E1000_MRQC_ENABLE_RSS_MQ            0x00000002
+#define E1000_MRQC_ENABLE_VMDQ              0x00000003
+#define E1000_MRQC_RSS_FIELD_IPV4_UDP       0x00400000
+#define E1000_MRQC_ENABLE_VMDQ_RSS_MQ       0x00000005
+#define E1000_MRQC_RSS_FIELD_IPV6_UDP       0x00800000
+#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX    0x01000000
+
+#define E1000_EICR_TX_QUEUE ( \
+	E1000_EICR_TX_QUEUE0 |    \
+	E1000_EICR_TX_QUEUE1 |    \
+	E1000_EICR_TX_QUEUE2 |    \
+	E1000_EICR_TX_QUEUE3)
+
+#define E1000_EICR_RX_QUEUE ( \
+	E1000_EICR_RX_QUEUE0 |    \
+	E1000_EICR_RX_QUEUE1 |    \
+	E1000_EICR_RX_QUEUE2 |    \
+	E1000_EICR_RX_QUEUE3)
+
+/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
+#define E1000_IMIREXT_SIZE_BP     0x00001000  /* Packet size bypass */
+#define E1000_IMIREXT_CTRL_BP     0x00080000  /* Bypass check of ctrl bits */
+
+/* Receive Descriptor - Advanced */
+union e1000_adv_rx_desc {
+	struct {
+		__le64 pkt_addr;             /* Packet buffer address */
+		__le64 hdr_addr;             /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			struct {
+				__le16 pkt_info;   /* RSS type, Packet type */
+				__le16 hdr_info;   /* Split Head, buf len */
+			} lo_dword;
+			union {
+				__le32 rss;          /* RSS Hash */
+				struct {
+					__le16 ip_id;    /* IP id */
+					__le16 csum;     /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error;     /* ext status/error */
+			__le16 length;           /* Packet length */
+			__le16 vlan;             /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+#define E1000_RXDADV_HDRBUFLEN_MASK      0x7FE0
+#define E1000_RXDADV_HDRBUFLEN_SHIFT     5
+#define E1000_RXDADV_STAT_TS             0x10000 /* Pkt was time stamped */
+#define E1000_RXDADV_STAT_TSIP           0x08000 /* timestamp in packet */
+
+/* Transmit Descriptor - Advanced */
+union e1000_adv_tx_desc {
+	struct {
+		__le64 buffer_addr;    /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd;       /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_MAC_TSTAMP   0x00080000 /* IEEE1588 Timestamp packet */
+#define E1000_ADVTXD_DTYP_CTXT    0x00200000 /* Advanced Context Descriptor */
+#define E1000_ADVTXD_DTYP_DATA    0x00300000 /* Advanced Data Descriptor */
+#define E1000_ADVTXD_DCMD_EOP     0x01000000 /* End of Packet */
+#define E1000_ADVTXD_DCMD_IFCS    0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADVTXD_DCMD_RS      0x08000000 /* Report Status */
+#define E1000_ADVTXD_DCMD_DEXT    0x20000000 /* Descriptor extension (1=Adv) */
+#define E1000_ADVTXD_DCMD_VLE     0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE     0x80000000 /* TCP Seg enable */
+#define E1000_ADVTXD_PAYLEN_SHIFT    14 /* Adv desc PAYLEN shift */
+
+/* Context descriptors */
+struct e1000_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 seqnum_seed;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+#define E1000_ADVTXD_MACLEN_SHIFT    9  /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000  /* L4 Packet TYPE of UDP */
+#define E1000_ADVTXD_TUCMD_IPV4    0x00000400  /* IP Packet Type: 1=IPv4 */
+#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800  /* L4 Packet TYPE of TCP */
+#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */
+/* IPSec Encrypt Enable for ESP */
+#define E1000_ADVTXD_L4LEN_SHIFT     8  /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT      16  /* Adv ctxt MSS shift */
+/* Adv ctxt IPSec SA IDX mask */
+/* Adv ctxt IPSec ESP len mask */
+
+/* Additional Transmit Descriptor Control definitions */
+#define E1000_TXDCTL_QUEUE_ENABLE  0x02000000 /* Enable specific Tx Queue */
+/* Tx Queue Arbitration Priority 0=low, 1=high */
+
+/* Additional Receive Descriptor Control definitions */
+#define E1000_RXDCTL_QUEUE_ENABLE  0x02000000 /* Enable specific Rx Queue */
+
+/* Direct Cache Access (DCA) definitions */
+#define E1000_DCA_CTRL_DCA_MODE_DISABLE 0x01 /* DCA Disable */
+#define E1000_DCA_CTRL_DCA_MODE_CB2     0x02 /* DCA Mode CB2 */
+
+#define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */
+#define E1000_DCA_RXCTRL_DESC_DCA_EN BIT(5) /* DCA Rx Desc enable */
+#define E1000_DCA_RXCTRL_HEAD_DCA_EN BIT(6) /* DCA Rx Desc header enable */
+#define E1000_DCA_RXCTRL_DATA_DCA_EN BIT(7) /* DCA Rx Desc payload enable */
+#define E1000_DCA_RXCTRL_DESC_RRO_EN BIT(9) /* DCA Rx rd Desc Relax Order */
+
+#define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */
+#define E1000_DCA_TXCTRL_DESC_DCA_EN BIT(5) /* DCA Tx Desc enable */
+#define E1000_DCA_TXCTRL_DESC_RRO_EN BIT(9) /* Tx rd Desc Relax Order */
+#define E1000_DCA_TXCTRL_TX_WB_RO_EN BIT(11) /* Tx Desc writeback RO bit */
+#define E1000_DCA_TXCTRL_DATA_RRO_EN BIT(13) /* Tx rd data Relax Order */
+
+/* Additional DCA related definitions, note change in position of CPUID */
+#define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */
+#define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */
+#define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */
+#define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */
+
+/* ETQF register bit definitions */
+#define E1000_ETQF_FILTER_ENABLE   BIT(26)
+#define E1000_ETQF_1588            BIT(30)
+#define E1000_ETQF_IMM_INT         BIT(29)
+#define E1000_ETQF_QUEUE_ENABLE    BIT(31)
+#define E1000_ETQF_QUEUE_SHIFT     16
+#define E1000_ETQF_QUEUE_MASK      0x00070000
+#define E1000_ETQF_ETYPE_MASK      0x0000FFFF
+
+/* FTQF register bit definitions */
+#define E1000_FTQF_VF_BP               0x00008000
+#define E1000_FTQF_1588_TIME_STAMP     0x08000000
+#define E1000_FTQF_MASK                0xF0000000
+#define E1000_FTQF_MASK_PROTO_BP       0x10000000
+#define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000
+
+#define E1000_NVM_APME_82575          0x0400
+#define MAX_NUM_VFS                   8
+
+#define E1000_DTXSWC_MAC_SPOOF_MASK   0x000000FF /* Per VF MAC spoof control */
+#define E1000_DTXSWC_VLAN_SPOOF_MASK  0x0000FF00 /* Per VF VLAN spoof control */
+#define E1000_DTXSWC_LLE_MASK         0x00FF0000 /* Per VF Local LB enables */
+#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8
+#define E1000_DTXSWC_VMDQ_LOOPBACK_EN BIT(31)  /* global VF LB enable */
+
+/* Easy defines for setting default pool, would normally be left a zero */
+#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7
+#define E1000_VT_CTL_DEFAULT_POOL_MASK  (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT)
+
+/* Other useful VMD_CTL register defines */
+#define E1000_VT_CTL_IGNORE_MAC         BIT(28)
+#define E1000_VT_CTL_DISABLE_DEF_POOL   BIT(29)
+#define E1000_VT_CTL_VM_REPL_EN         BIT(30)
+
+/* Per VM Offload register setup */
+#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */
+#define E1000_VMOLR_LPE        0x00010000 /* Accept Long packet */
+#define E1000_VMOLR_RSSE       0x00020000 /* Enable RSS */
+#define E1000_VMOLR_AUPE       0x01000000 /* Accept untagged packets */
+#define E1000_VMOLR_ROMPE      0x02000000 /* Accept overflow multicast */
+#define E1000_VMOLR_ROPE       0x04000000 /* Accept overflow unicast */
+#define E1000_VMOLR_BAM        0x08000000 /* Accept Broadcast packets */
+#define E1000_VMOLR_MPME       0x10000000 /* Multicast promiscuous mode */
+#define E1000_VMOLR_STRVLAN    0x40000000 /* Vlan stripping enable */
+#define E1000_VMOLR_STRCRC     0x80000000 /* CRC stripping enable */
+
+#define E1000_DVMOLR_HIDEVLAN  0x20000000 /* Hide vlan enable */
+#define E1000_DVMOLR_STRVLAN   0x40000000 /* Vlan stripping enable */
+#define E1000_DVMOLR_STRCRC    0x80000000 /* CRC stripping enable */
+
+#define E1000_VLVF_ARRAY_SIZE     32
+#define E1000_VLVF_VLANID_MASK    0x00000FFF
+#define E1000_VLVF_POOLSEL_SHIFT  12
+#define E1000_VLVF_POOLSEL_MASK   (0xFF << E1000_VLVF_POOLSEL_SHIFT)
+#define E1000_VLVF_LVLAN          0x00100000
+#define E1000_VLVF_VLANID_ENABLE  0x80000000
+
+#define E1000_VMVIR_VLANA_DEFAULT      0x40000000 /* Always use default VLAN */
+#define E1000_VMVIR_VLANA_NEVER        0x80000000 /* Never insert VLAN tag */
+
+#define E1000_IOVCTL 0x05BBC
+#define E1000_IOVCTL_REUSE_VFQ 0x00000001
+
+#define E1000_RPLOLR_STRVLAN   0x40000000
+#define E1000_RPLOLR_STRCRC    0x80000000
+
+#define E1000_DTXCTL_8023LL     0x0004
+#define E1000_DTXCTL_VLAN_ADDED 0x0008
+#define E1000_DTXCTL_OOS_ENABLE 0x0010
+#define E1000_DTXCTL_MDP_EN     0x0020
+#define E1000_DTXCTL_SPOOF_INT  0x0040
+
+#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT	BIT(14)
+
+#define ALL_QUEUES   0xFFFF
+
+/* RX packet buffer size defines */
+#define E1000_RXPBS_SIZE_MASK_82576  0x0000007F
+void igb_vmdq_set_anti_spoofing_pf(struct e1000_hw *, bool, int);
+void igb_vmdq_set_loopback_pf(struct e1000_hw *, bool);
+void igb_vmdq_set_replication_pf(struct e1000_hw *, bool);
+u16 igb_rxpbs_adjust_82580(u32 data);
+s32 igb_read_emi_reg(struct e1000_hw *, u16 addr, u16 *data);
+s32 igb_set_eee_i350(struct e1000_hw *, bool adv1G, bool adv100M);
+s32 igb_set_eee_i354(struct e1000_hw *, bool adv1G, bool adv100M);
+s32 igb_get_eee_status_i354(struct e1000_hw *hw, bool *status);
+
+#define E1000_I2C_THERMAL_SENSOR_ADDR	0xF8
+#define E1000_EMC_INTERNAL_DATA		0x00
+#define E1000_EMC_INTERNAL_THERM_LIMIT	0x20
+#define E1000_EMC_DIODE1_DATA		0x01
+#define E1000_EMC_DIODE1_THERM_LIMIT	0x19
+#define E1000_EMC_DIODE2_DATA		0x23
+#define E1000_EMC_DIODE2_THERM_LIMIT	0x1A
+#define E1000_EMC_DIODE3_DATA		0x2A
+#define E1000_EMC_DIODE3_THERM_LIMIT	0x30
+#endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
new file mode 100644
index 0000000000..fa02892848
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -0,0 +1,1075 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_DEFINES_H_
+#define _E1000_DEFINES_H_
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE  8
+#define REQ_RX_DESCRIPTOR_MULTIPLE  8
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define E1000_WUC_PME_EN     0x00000002 /* PME Enable */
+
+/* Wake Up Filter Control */
+#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define E1000_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
+#define E1000_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
+#define E1000_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
+#define E1000_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
+
+/* Wake Up Status */
+#define E1000_WUS_EX	0x00000004 /* Directed Exact */
+#define E1000_WUS_ARPD	0x00000020 /* Directed ARP Request */
+#define E1000_WUS_IPV4	0x00000040 /* Directed IPv4 */
+#define E1000_WUS_IPV6	0x00000080 /* Directed IPv6 */
+#define E1000_WUS_NSD	0x00000400 /* Directed IPv6 Neighbor Solicitation */
+
+/* Packet types that are enabled for wake packet delivery */
+#define WAKE_PKT_WUS ( \
+	E1000_WUS_EX   | \
+	E1000_WUS_ARPD | \
+	E1000_WUS_IPV4 | \
+	E1000_WUS_IPV6 | \
+	E1000_WUS_NSD)
+
+/* Wake Up Packet Length */
+#define E1000_WUPL_MASK	0x00000FFF
+
+/* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */
+#define E1000_WUPM_BYTES	128
+
+/* Extended Device Control */
+#define E1000_CTRL_EXT_SDP2_DATA 0x00000040 /* Value of SW Defineable Pin 2 */
+#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* Value of SW Defineable Pin 3 */
+#define E1000_CTRL_EXT_SDP2_DIR  0x00000400 /* SDP2 Data direction */
+#define E1000_CTRL_EXT_SDP3_DIR  0x00000800 /* SDP3 Data direction */
+
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD	0x00004000
+#define E1000_CTRL_EXT_SDLPE	0X00040000  /* SerDes Low Power Enable */
+#define E1000_CTRL_EXT_LINK_MODE_MASK	0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES	0x00C00000
+#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX	0x00400000
+#define E1000_CTRL_EXT_LINK_MODE_SGMII	0x00800000
+#define E1000_CTRL_EXT_LINK_MODE_GMII	0x00000000
+#define E1000_CTRL_EXT_EIAME	0x01000000
+#define E1000_CTRL_EXT_IRCA		0x00000001
+/* Interrupt delay cancellation */
+/* Driver loaded bit for FW */
+#define E1000_CTRL_EXT_DRV_LOAD       0x10000000
+/* Interrupt acknowledge Auto-mask */
+/* Clear Interrupt timers after IMS clear */
+/* packet buffer parity error detection enabled */
+/* descriptor FIFO parity error detection enable */
+#define E1000_CTRL_EXT_PBA_CLR		0x80000000 /* PBA Clear */
+#define E1000_CTRL_EXT_PHYPDEN		0x00100000
+#define E1000_I2CCMD_REG_ADDR_SHIFT	16
+#define E1000_I2CCMD_PHY_ADDR_SHIFT	24
+#define E1000_I2CCMD_OPCODE_READ	0x08000000
+#define E1000_I2CCMD_OPCODE_WRITE	0x00000000
+#define E1000_I2CCMD_READY		0x20000000
+#define E1000_I2CCMD_ERROR		0x80000000
+#define E1000_I2CCMD_SFP_DATA_ADDR(a)	(0x0000 + (a))
+#define E1000_I2CCMD_SFP_DIAG_ADDR(a)	(0x0100 + (a))
+#define E1000_MAX_SGMII_PHY_REG_ADDR	255
+#define E1000_I2CCMD_PHY_TIMEOUT	200
+#define E1000_IVAR_VALID		0x80
+#define E1000_GPIE_NSICR		0x00000001
+#define E1000_GPIE_MSIX_MODE		0x00000010
+#define E1000_GPIE_EIAME		0x40000000
+#define E1000_GPIE_PBA			0x80000000
+
+/* Receive Descriptor bit definitions */
+#define E1000_RXD_STAT_DD       0x01    /* Descriptor Done */
+#define E1000_RXD_STAT_EOP      0x02    /* End of Packet */
+#define E1000_RXD_STAT_IXSM     0x04    /* Ignore checksum */
+#define E1000_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
+#define E1000_RXD_STAT_UDPCS    0x10    /* UDP xsum calculated */
+#define E1000_RXD_STAT_TCPCS    0x20    /* TCP xsum calculated */
+#define E1000_RXD_STAT_TS       0x10000 /* Pkt was time stamped */
+
+#define E1000_RXDEXT_STATERR_LB    0x00040000
+#define E1000_RXDEXT_STATERR_CE    0x01000000
+#define E1000_RXDEXT_STATERR_SE    0x02000000
+#define E1000_RXDEXT_STATERR_SEQ   0x04000000
+#define E1000_RXDEXT_STATERR_CXE   0x10000000
+#define E1000_RXDEXT_STATERR_TCPE  0x20000000
+#define E1000_RXDEXT_STATERR_IPE   0x40000000
+#define E1000_RXDEXT_STATERR_RXE   0x80000000
+
+/* Same mask, but for extended and packet split descriptors */
+#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
+	E1000_RXDEXT_STATERR_CE  |            \
+	E1000_RXDEXT_STATERR_SE  |            \
+	E1000_RXDEXT_STATERR_SEQ |            \
+	E1000_RXDEXT_STATERR_CXE |            \
+	E1000_RXDEXT_STATERR_RXE)
+
+#define E1000_MRQC_RSS_FIELD_IPV4_TCP          0x00010000
+#define E1000_MRQC_RSS_FIELD_IPV4              0x00020000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX       0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6              0x00100000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP          0x00200000
+
+
+/* Management Control */
+#define E1000_MANC_SMBUS_EN      0x00000001 /* SMBus Enabled - RO */
+#define E1000_MANC_ASF_EN        0x00000002 /* ASF Enabled - RO */
+#define E1000_MANC_EN_BMC2OS     0x10000000 /* OSBMC is Enabled or not */
+/* Enable Neighbor Discovery Filtering */
+#define E1000_MANC_RCV_TCO_EN    0x00020000 /* Receive TCO Packets Enabled */
+#define E1000_MANC_BLK_PHY_RST_ON_IDE   0x00040000 /* Block phy resets */
+/* Enable MAC address filtering */
+#define E1000_MANC_EN_MAC_ADDR_FILTER   0x00100000
+
+/* Receive Control */
+#define E1000_RCTL_EN             0x00000002    /* enable */
+#define E1000_RCTL_SBP            0x00000004    /* store bad packet */
+#define E1000_RCTL_UPE            0x00000008    /* unicast promiscuous enable */
+#define E1000_RCTL_MPE            0x00000010    /* multicast promiscuous enab */
+#define E1000_RCTL_LPE            0x00000020    /* long packet enable */
+#define E1000_RCTL_LBM_MAC        0x00000040    /* MAC loopback mode */
+#define E1000_RCTL_LBM_TCVR       0x000000C0    /* tcvr loopback mode */
+#define E1000_RCTL_RDMTS_HALF     0x00000000    /* rx desc min threshold size */
+#define E1000_RCTL_MO_SHIFT       12            /* multicast offset shift */
+#define E1000_RCTL_BAM            0x00008000    /* broadcast enable */
+#define E1000_RCTL_SZ_512         0x00020000    /* rx buffer size 512 */
+#define E1000_RCTL_SZ_256         0x00030000    /* rx buffer size 256 */
+#define E1000_RCTL_VFE            0x00040000    /* vlan filter enable */
+#define E1000_RCTL_CFIEN          0x00080000    /* canonical form enable */
+#define E1000_RCTL_DPF            0x00400000    /* Discard Pause Frames */
+#define E1000_RCTL_PMCF           0x00800000    /* pass MAC control frames */
+#define E1000_RCTL_SECRC          0x04000000    /* Strip Ethernet CRC */
+
+/* Use byte values for the following shift parameters
+ * Usage:
+ *     psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE0_MASK) |
+ *                ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE1_MASK) |
+ *                ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) &
+ *                  E1000_PSRCTL_BSIZE2_MASK) |
+ *                ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |;
+ *                  E1000_PSRCTL_BSIZE3_MASK))
+ * where value0 = [128..16256],  default=256
+ *       value1 = [1024..64512], default=4096
+ *       value2 = [0..64512],    default=4096
+ *       value3 = [0..64512],    default=0
+ */
+
+#define E1000_PSRCTL_BSIZE0_MASK   0x0000007F
+#define E1000_PSRCTL_BSIZE1_MASK   0x00003F00
+#define E1000_PSRCTL_BSIZE2_MASK   0x003F0000
+#define E1000_PSRCTL_BSIZE3_MASK   0x3F000000
+
+#define E1000_PSRCTL_BSIZE0_SHIFT  7            /* Shift _right_ 7 */
+#define E1000_PSRCTL_BSIZE1_SHIFT  2            /* Shift _right_ 2 */
+#define E1000_PSRCTL_BSIZE2_SHIFT  6            /* Shift _left_ 6 */
+#define E1000_PSRCTL_BSIZE3_SHIFT 14            /* Shift _left_ 14 */
+
+/* SWFW_SYNC Definitions */
+#define E1000_SWFW_EEP_SM   0x1
+#define E1000_SWFW_PHY0_SM  0x2
+#define E1000_SWFW_PHY1_SM  0x4
+#define E1000_SWFW_PHY2_SM  0x20
+#define E1000_SWFW_PHY3_SM  0x40
+
+/* FACTPS Definitions */
+/* Device Control */
+#define E1000_CTRL_FD       0x00000001  /* Full duplex.0=half; 1=full */
+#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master requests */
+#define E1000_CTRL_LRST     0x00000008  /* Link reset. 0=normal,1=reset */
+#define E1000_CTRL_ASDE     0x00000020  /* Auto-speed detect enable */
+#define E1000_CTRL_SLU      0x00000040  /* Set link up (Force Link) */
+#define E1000_CTRL_ILOS     0x00000080  /* Invert Loss-Of Signal */
+#define E1000_CTRL_SPD_SEL  0x00000300  /* Speed Select Mask */
+#define E1000_CTRL_SPD_100  0x00000100  /* Force 100Mb */
+#define E1000_CTRL_SPD_1000 0x00000200  /* Force 1Gb */
+#define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
+#define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
+/* Defined polarity of Dock/Undock indication in SDP[0] */
+/* Reset both PHY ports, through PHYRST_N pin */
+/* enable link status from external LINK_0 and LINK_1 pins */
+#define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
+#define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
+#define E1000_CTRL_ADVD3WUC 0x00100000  /* D3 WUC */
+#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 /* PHY PM enable */
+#define E1000_CTRL_SDP0_DIR 0x00400000  /* SDP0 Data direction */
+#define E1000_CTRL_SDP1_DIR 0x00800000  /* SDP1 Data direction */
+#define E1000_CTRL_RST      0x04000000  /* Global reset */
+#define E1000_CTRL_RFCE     0x08000000  /* Receive Flow Control enable */
+#define E1000_CTRL_TFCE     0x10000000  /* Transmit flow control enable */
+#define E1000_CTRL_VME      0x40000000  /* IEEE VLAN mode enable */
+#define E1000_CTRL_PHY_RST  0x80000000  /* PHY Reset */
+/* Initiate an interrupt to manageability engine */
+#define E1000_CTRL_I2C_ENA  0x02000000  /* I2C enable */
+
+/* Bit definitions for the Management Data IO (MDIO) and Management Data
+ * Clock (MDC) pins in the Device Control Register.
+ */
+
+#define E1000_CONNSW_ENRGSRC             0x4
+#define E1000_CONNSW_PHYSD		0x400
+#define E1000_CONNSW_PHY_PDN		0x800
+#define E1000_CONNSW_SERDESD		0x200
+#define E1000_CONNSW_AUTOSENSE_CONF	0x2
+#define E1000_CONNSW_AUTOSENSE_EN	0x1
+#define E1000_PCS_CFG_PCS_EN             8
+#define E1000_PCS_LCTL_FLV_LINK_UP       1
+#define E1000_PCS_LCTL_FSV_100           2
+#define E1000_PCS_LCTL_FSV_1000          4
+#define E1000_PCS_LCTL_FDV_FULL          8
+#define E1000_PCS_LCTL_FSD               0x10
+#define E1000_PCS_LCTL_FORCE_LINK        0x20
+#define E1000_PCS_LCTL_FORCE_FCTRL       0x80
+#define E1000_PCS_LCTL_AN_ENABLE         0x10000
+#define E1000_PCS_LCTL_AN_RESTART        0x20000
+#define E1000_PCS_LCTL_AN_TIMEOUT        0x40000
+#define E1000_ENABLE_SERDES_LOOPBACK     0x0410
+
+#define E1000_PCS_LSTS_LINK_OK           1
+#define E1000_PCS_LSTS_SPEED_100         2
+#define E1000_PCS_LSTS_SPEED_1000        4
+#define E1000_PCS_LSTS_DUPLEX_FULL       8
+#define E1000_PCS_LSTS_SYNK_OK           0x10
+
+/* Device Status */
+#define E1000_STATUS_FD         0x00000001      /* Full duplex.0=half,1=full */
+#define E1000_STATUS_LU         0x00000002      /* Link up.0=no,1=link */
+#define E1000_STATUS_FUNC_MASK  0x0000000C      /* PCI Function Mask */
+#define E1000_STATUS_FUNC_SHIFT 2
+#define E1000_STATUS_FUNC_1     0x00000004      /* Function 1 */
+#define E1000_STATUS_TXOFF      0x00000010      /* transmission paused */
+#define E1000_STATUS_SPEED_100  0x00000040      /* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000 0x00000080      /* Speed 1000Mb/s */
+/* Change in Dock/Undock state. Clear on write '0'. */
+/* Status of Master requests. */
+#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000
+/* BMC external code execution disabled */
+
+#define E1000_STATUS_2P5_SKU		0x00001000 /* Val of 2.5GBE SKU strap */
+#define E1000_STATUS_2P5_SKU_OVER	0x00002000 /* Val of 2.5GBE SKU Over */
+/* Constants used to intrepret the masked PCI-X bus speed. */
+
+#define SPEED_10    10
+#define SPEED_100   100
+#define SPEED_1000  1000
+#define SPEED_2500  2500
+#define HALF_DUPLEX 1
+#define FULL_DUPLEX 2
+
+
+#define ADVERTISE_10_HALF                 0x0001
+#define ADVERTISE_10_FULL                 0x0002
+#define ADVERTISE_100_HALF                0x0004
+#define ADVERTISE_100_FULL                0x0008
+#define ADVERTISE_1000_HALF               0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL               0x0020
+
+/* 1000/H is not supported, nor spec-compliant. */
+#define E1000_ALL_SPEED_DUPLEX (ADVERTISE_10_HALF  |  ADVERTISE_10_FULL | \
+				ADVERTISE_100_HALF |  ADVERTISE_100_FULL | \
+						      ADVERTISE_1000_FULL)
+#define E1000_ALL_NOT_GIG      (ADVERTISE_10_HALF  |  ADVERTISE_10_FULL | \
+				ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
+#define E1000_ALL_100_SPEED    (ADVERTISE_100_HALF |  ADVERTISE_100_FULL)
+#define E1000_ALL_10_SPEED     (ADVERTISE_10_HALF  |  ADVERTISE_10_FULL)
+#define E1000_ALL_FULL_DUPLEX  (ADVERTISE_10_FULL  |  ADVERTISE_100_FULL | \
+						      ADVERTISE_1000_FULL)
+#define E1000_ALL_HALF_DUPLEX  (ADVERTISE_10_HALF  |  ADVERTISE_100_HALF)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT   E1000_ALL_SPEED_DUPLEX
+
+/* LED Control */
+#define E1000_LEDCTL_LED0_MODE_SHIFT	0
+#define E1000_LEDCTL_LED0_BLINK		0x00000080
+#define E1000_LEDCTL_LED0_MODE_MASK	0x0000000F
+#define E1000_LEDCTL_LED0_IVRT		0x00000040
+
+#define E1000_LEDCTL_MODE_LED_ON        0xE
+#define E1000_LEDCTL_MODE_LED_OFF       0xF
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_EOP    0x01000000 /* End of Packet */
+#define E1000_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_TXD_CMD_RS     0x08000000 /* Report Status */
+#define E1000_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
+#define E1000_TXD_STAT_DD    0x00000001 /* Descriptor Done */
+/* Extended desc bits for Linksec and timesync */
+
+/* Transmit Control */
+#define E1000_TCTL_EN     0x00000002    /* enable tx */
+#define E1000_TCTL_PSP    0x00000008    /* pad short packets */
+#define E1000_TCTL_CT     0x00000ff0    /* collision threshold */
+#define E1000_TCTL_COLD   0x003ff000    /* collision distance */
+#define E1000_TCTL_RTLC   0x01000000    /* Re-transmit on late collision */
+
+/* DMA Coalescing register fields */
+#define E1000_DMACR_DMACWT_MASK         0x00003FFF /* DMA Coal Watchdog Timer */
+#define E1000_DMACR_DMACTHR_MASK        0x00FF0000 /* DMA Coal Rx Threshold */
+#define E1000_DMACR_DMACTHR_SHIFT       16
+#define E1000_DMACR_DMAC_LX_MASK        0x30000000 /* Lx when no PCIe trans */
+#define E1000_DMACR_DMAC_LX_SHIFT       28
+#define E1000_DMACR_DMAC_EN             0x80000000 /* Enable DMA Coalescing */
+/* DMA Coalescing BMC-to-OS Watchdog Enable */
+#define E1000_DMACR_DC_BMC2OSW_EN	0x00008000
+
+#define E1000_DMCTXTH_DMCTTHR_MASK      0x00000FFF /* DMA Coal Tx Threshold */
+
+#define E1000_DMCTLX_TTLX_MASK          0x00000FFF /* Time to LX request */
+
+#define E1000_DMCRTRH_UTRESH_MASK       0x0007FFFF /* Rx Traffic Rate Thresh */
+#define E1000_DMCRTRH_LRPRCW            0x80000000 /* Rx pkt rate curr window */
+
+#define E1000_DMCCNT_CCOUNT_MASK        0x01FFFFFF /* DMA Coal Rx Current Cnt */
+
+#define E1000_FCRTC_RTH_COAL_MASK       0x0003FFF0 /* FC Rx Thresh High val */
+#define E1000_FCRTC_RTH_COAL_SHIFT      4
+#define E1000_PCIEMISC_LX_DECISION      0x00000080 /* Lx power decision */
+
+/* Timestamp in Rx buffer */
+#define E1000_RXPBS_CFG_TS_EN           0x80000000
+
+#define I210_RXPBSIZE_DEFAULT		0x000000A2 /* RXPBSIZE default */
+#define I210_RXPBSIZE_MASK		0x0000003F
+#define I210_RXPBSIZE_PB_30KB		0x0000001E
+#define I210_RXPBSIZE_PB_32KB		0x00000020
+#define I210_TXPBSIZE_DEFAULT		0x04000014 /* TXPBSIZE default */
+#define I210_TXPBSIZE_MASK		0xC0FFFFFF
+#define I210_TXPBSIZE_PB0_6KB		(6 << 0)
+#define I210_TXPBSIZE_PB1_6KB		(6 << 6)
+#define I210_TXPBSIZE_PB2_6KB		(6 << 12)
+#define I210_TXPBSIZE_PB3_6KB		(6 << 18)
+
+#define I210_DTXMXPKTSZ_DEFAULT		0x00000098
+
+#define I210_SR_QUEUES_NUM		2
+
+/* SerDes Control */
+#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400
+
+/* Receive Checksum Control */
+#define E1000_RXCSUM_IPOFL     0x00000100   /* IPv4 checksum offload */
+#define E1000_RXCSUM_TUOFL     0x00000200   /* TCP / UDP checksum offload */
+#define E1000_RXCSUM_CRCOFL    0x00000800   /* CRC32 offload enable */
+#define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
+
+/* Header split receive */
+#define E1000_RFCTL_IPV6_EX_DIS         0x00010000
+#define E1000_RFCTL_LEF                 0x00040000
+
+/* Collision related configuration parameters */
+#define E1000_COLLISION_THRESHOLD       15
+#define E1000_CT_SHIFT                  4
+#define E1000_COLLISION_DISTANCE        63
+#define E1000_COLD_SHIFT                12
+
+/* Ethertype field values */
+#define ETHERNET_IEEE_VLAN_TYPE 0x8100  /* 802.3ac packet */
+
+/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
+#define MAX_JUMBO_FRAME_SIZE		0x2600
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
+
+/* PBA constants */
+#define E1000_PBA_34K 0x0022
+#define E1000_PBA_64K 0x0040    /* 64KB */
+
+/* SW Semaphore Register */
+#define E1000_SWSM_SMBI         0x00000001 /* Driver Semaphore bit */
+#define E1000_SWSM_SWESMBI      0x00000002 /* FW Semaphore bit */
+
+/* Interrupt Cause Read */
+#define E1000_ICR_TXDW          0x00000001 /* Transmit desc written back */
+#define E1000_ICR_LSC           0x00000004 /* Link Status Change */
+#define E1000_ICR_RXSEQ         0x00000008 /* rx sequence error */
+#define E1000_ICR_RXDMT0        0x00000010 /* rx desc min. threshold (0) */
+#define E1000_ICR_RXT0          0x00000080 /* rx timer intr (ring 0) */
+#define E1000_ICR_VMMB          0x00000100 /* VM MB event */
+#define E1000_ICR_TS            0x00080000 /* Time Sync Interrupt */
+#define E1000_ICR_DRSTA         0x40000000 /* Device Reset Asserted */
+/* If this bit asserted, the driver should claim the interrupt */
+#define E1000_ICR_INT_ASSERTED  0x80000000
+/* LAN connected device generates an interrupt */
+#define E1000_ICR_DOUTSYNC      0x10000000 /* NIC DMA out of sync */
+
+/* Extended Interrupt Cause Read */
+#define E1000_EICR_RX_QUEUE0    0x00000001 /* Rx Queue 0 Interrupt */
+#define E1000_EICR_RX_QUEUE1    0x00000002 /* Rx Queue 1 Interrupt */
+#define E1000_EICR_RX_QUEUE2    0x00000004 /* Rx Queue 2 Interrupt */
+#define E1000_EICR_RX_QUEUE3    0x00000008 /* Rx Queue 3 Interrupt */
+#define E1000_EICR_TX_QUEUE0    0x00000100 /* Tx Queue 0 Interrupt */
+#define E1000_EICR_TX_QUEUE1    0x00000200 /* Tx Queue 1 Interrupt */
+#define E1000_EICR_TX_QUEUE2    0x00000400 /* Tx Queue 2 Interrupt */
+#define E1000_EICR_TX_QUEUE3    0x00000800 /* Tx Queue 3 Interrupt */
+#define E1000_EICR_OTHER        0x80000000 /* Interrupt Cause Active */
+/* TCP Timer */
+
+/* This defines the bits that are set in the Interrupt Mask
+ * Set/Read Register.  Each bit is documented below:
+ *   o RXT0   = Receiver Timer Interrupt (ring 0)
+ *   o TXDW   = Transmit Descriptor Written Back
+ *   o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0)
+ *   o RXSEQ  = Receive Sequence Error
+ *   o LSC    = Link Status Change
+ */
+#define IMS_ENABLE_MASK ( \
+	E1000_IMS_RXT0   |    \
+	E1000_IMS_TXDW   |    \
+	E1000_IMS_RXDMT0 |    \
+	E1000_IMS_RXSEQ  |    \
+	E1000_IMS_LSC    |    \
+	E1000_IMS_DOUTSYNC)
+
+/* Interrupt Mask Set */
+#define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
+#define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
+#define E1000_IMS_VMMB      E1000_ICR_VMMB      /* Mail box activity */
+#define E1000_IMS_TS        E1000_ICR_TS        /* Time Sync Interrupt */
+#define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* rx sequence error */
+#define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
+#define E1000_IMS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
+#define E1000_IMS_DRSTA     E1000_ICR_DRSTA     /* Device Reset Asserted */
+#define E1000_IMS_DOUTSYNC  E1000_ICR_DOUTSYNC /* NIC DMA out of sync */
+
+/* Extended Interrupt Mask Set */
+#define E1000_EIMS_OTHER        E1000_EICR_OTHER   /* Interrupt Cause Active */
+
+/* Interrupt Cause Set */
+#define E1000_ICS_LSC       E1000_ICR_LSC       /* Link Status Change */
+#define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
+#define E1000_ICS_DRSTA     E1000_ICR_DRSTA     /* Device Reset Aserted */
+
+/* Extended Interrupt Cause Set */
+/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
+#define E1000_EITR_CNT_IGNR     0x80000000 /* Don't reset counters on write */
+
+
+/* Transmit Descriptor Control */
+/* Enable the counting of descriptors still to be processed. */
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW  0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
+#define FLOW_CONTROL_TYPE         0x8808
+
+/* Transmit Config Word */
+#define E1000_TXCW_ASM_DIR	0x00000100 /* TXCW astm pause direction */
+#define E1000_TXCW_PAUSE	0x00000080 /* TXCW sym pause request */
+
+/* 802.1q VLAN Packet Size */
+#define VLAN_TAG_SIZE              4    /* 802.3ac tag (not DMA'd) */
+#define E1000_VLAN_FILTER_TBL_SIZE 128  /* VLAN Filter Table (4096 bits) */
+
+/* Receive Address */
+/* Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots.  However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define E1000_RAH_AV  0x80000000        /* Receive descriptor valid */
+#define E1000_RAH_ASEL_SRC_ADDR 0x00010000
+#define E1000_RAH_QSEL_ENABLE 0x10000000
+#define E1000_RAL_MAC_ADDR_LEN 4
+#define E1000_RAH_MAC_ADDR_LEN 2
+#define E1000_RAH_POOL_MASK 0x03FC0000
+#define E1000_RAH_POOL_1 0x00040000
+
+/* Error Codes */
+#define E1000_ERR_NVM      1
+#define E1000_ERR_PHY      2
+#define E1000_ERR_CONFIG   3
+#define E1000_ERR_PARAM    4
+#define E1000_ERR_MAC_INIT 5
+#define E1000_ERR_RESET   9
+#define E1000_ERR_MASTER_REQUESTS_PENDING 10
+#define E1000_BLK_PHY_RESET   12
+#define E1000_ERR_SWFW_SYNC 13
+#define E1000_NOT_IMPLEMENTED 14
+#define E1000_ERR_MBX      15
+#define E1000_ERR_INVALID_ARGUMENT  16
+#define E1000_ERR_NO_SPACE          17
+#define E1000_ERR_NVM_PBA_SECTION   18
+#define E1000_ERR_INVM_VALUE_NOT_FOUND	19
+#define E1000_ERR_I2C               20
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define COPPER_LINK_UP_LIMIT              10
+#define PHY_AUTO_NEG_LIMIT                45
+#define PHY_FORCE_LIMIT                   20
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT      800
+/* Number of milliseconds we wait for PHY configuration done after MAC reset */
+#define PHY_CFG_TIMEOUT             100
+/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT      10
+
+/* Flow Control */
+#define E1000_FCRTL_XONE 0x80000000     /* Enable XON frame transmission */
+
+#define E1000_TSYNCTXCTL_VALID    0x00000001 /* tx timestamp valid */
+#define E1000_TSYNCTXCTL_ENABLED  0x00000010 /* enable tx timestampping */
+
+#define E1000_TSYNCRXCTL_VALID      0x00000001 /* rx timestamp valid */
+#define E1000_TSYNCRXCTL_TYPE_MASK  0x0000000E /* rx type mask */
+#define E1000_TSYNCRXCTL_TYPE_L2_V2       0x00
+#define E1000_TSYNCRXCTL_TYPE_L4_V1       0x02
+#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2    0x04
+#define E1000_TSYNCRXCTL_TYPE_ALL         0x08
+#define E1000_TSYNCRXCTL_TYPE_EVENT_V2    0x0A
+#define E1000_TSYNCRXCTL_ENABLED    0x00000010 /* enable rx timestampping */
+
+#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK   0x000000FF
+#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE       0x00
+#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE  0x01
+#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE   0x02
+#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03
+#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04
+
+#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK               0x00000F00
+#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE                 0x0000
+#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE            0x0100
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE       0x0200
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE      0x0300
+#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE             0x0800
+#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE           0x0900
+#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE  0x0A00
+#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE             0x0B00
+#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE           0x0C00
+#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE           0x0D00
+
+#define E1000_TIMINCA_16NS_SHIFT 24
+
+/* Time Sync Interrupt Cause/Mask Register Bits */
+
+#define TSINTR_SYS_WRAP  BIT(0) /* SYSTIM Wrap around. */
+#define TSINTR_TXTS      BIT(1) /* Transmit Timestamp. */
+#define TSINTR_RXTS      BIT(2) /* Receive Timestamp. */
+#define TSINTR_TT0       BIT(3) /* Target Time 0 Trigger. */
+#define TSINTR_TT1       BIT(4) /* Target Time 1 Trigger. */
+#define TSINTR_AUTT0     BIT(5) /* Auxiliary Timestamp 0 Taken. */
+#define TSINTR_AUTT1     BIT(6) /* Auxiliary Timestamp 1 Taken. */
+#define TSINTR_TADJ      BIT(7) /* Time Adjust Done. */
+
+#define TSYNC_INTERRUPTS TSINTR_TXTS
+#define E1000_TSICR_TXTS TSINTR_TXTS
+
+/* TSAUXC Configuration Bits */
+#define TSAUXC_EN_TT0    BIT(0)  /* Enable target time 0. */
+#define TSAUXC_EN_TT1    BIT(1)  /* Enable target time 1. */
+#define TSAUXC_EN_CLK0   BIT(2)  /* Enable Configurable Frequency Clock 0. */
+#define TSAUXC_SAMP_AUT0 BIT(3)  /* Latch SYSTIML/H into AUXSTMPL/0. */
+#define TSAUXC_ST0       BIT(4)  /* Start Clock 0 Toggle on Target Time 0. */
+#define TSAUXC_EN_CLK1   BIT(5)  /* Enable Configurable Frequency Clock 1. */
+#define TSAUXC_SAMP_AUT1 BIT(6)  /* Latch SYSTIML/H into AUXSTMPL/1. */
+#define TSAUXC_ST1       BIT(7)  /* Start Clock 1 Toggle on Target Time 1. */
+#define TSAUXC_EN_TS0    BIT(8)  /* Enable hardware timestamp 0. */
+#define TSAUXC_AUTT0     BIT(9)  /* Auxiliary Timestamp Taken. */
+#define TSAUXC_EN_TS1    BIT(10) /* Enable hardware timestamp 0. */
+#define TSAUXC_AUTT1     BIT(11) /* Auxiliary Timestamp Taken. */
+#define TSAUXC_PLSG      BIT(17) /* Generate a pulse. */
+#define TSAUXC_DISABLE   BIT(31) /* Disable SYSTIM Count Operation. */
+
+/* SDP Configuration Bits */
+#define AUX0_SEL_SDP0    (0u << 0)  /* Assign SDP0 to auxiliary time stamp 0. */
+#define AUX0_SEL_SDP1    (1u << 0)  /* Assign SDP1 to auxiliary time stamp 0. */
+#define AUX0_SEL_SDP2    (2u << 0)  /* Assign SDP2 to auxiliary time stamp 0. */
+#define AUX0_SEL_SDP3    (3u << 0)  /* Assign SDP3 to auxiliary time stamp 0. */
+#define AUX0_TS_SDP_EN   (1u << 2)  /* Enable auxiliary time stamp trigger 0. */
+#define AUX1_SEL_SDP0    (0u << 3)  /* Assign SDP0 to auxiliary time stamp 1. */
+#define AUX1_SEL_SDP1    (1u << 3)  /* Assign SDP1 to auxiliary time stamp 1. */
+#define AUX1_SEL_SDP2    (2u << 3)  /* Assign SDP2 to auxiliary time stamp 1. */
+#define AUX1_SEL_SDP3    (3u << 3)  /* Assign SDP3 to auxiliary time stamp 1. */
+#define AUX1_TS_SDP_EN   (1u << 5)  /* Enable auxiliary time stamp trigger 1. */
+#define TS_SDP0_SEL_TT0  (0u << 6)  /* Target time 0 is output on SDP0. */
+#define TS_SDP0_SEL_TT1  (1u << 6)  /* Target time 1 is output on SDP0. */
+#define TS_SDP0_SEL_FC0  (2u << 6)  /* Freq clock  0 is output on SDP0. */
+#define TS_SDP0_SEL_FC1  (3u << 6)  /* Freq clock  1 is output on SDP0. */
+#define TS_SDP0_EN       (1u << 8)  /* SDP0 is assigned to Tsync. */
+#define TS_SDP1_SEL_TT0  (0u << 9)  /* Target time 0 is output on SDP1. */
+#define TS_SDP1_SEL_TT1  (1u << 9)  /* Target time 1 is output on SDP1. */
+#define TS_SDP1_SEL_FC0  (2u << 9)  /* Freq clock  0 is output on SDP1. */
+#define TS_SDP1_SEL_FC1  (3u << 9)  /* Freq clock  1 is output on SDP1. */
+#define TS_SDP1_EN       (1u << 11) /* SDP1 is assigned to Tsync. */
+#define TS_SDP2_SEL_TT0  (0u << 12) /* Target time 0 is output on SDP2. */
+#define TS_SDP2_SEL_TT1  (1u << 12) /* Target time 1 is output on SDP2. */
+#define TS_SDP2_SEL_FC0  (2u << 12) /* Freq clock  0 is output on SDP2. */
+#define TS_SDP2_SEL_FC1  (3u << 12) /* Freq clock  1 is output on SDP2. */
+#define TS_SDP2_EN       (1u << 14) /* SDP2 is assigned to Tsync. */
+#define TS_SDP3_SEL_TT0  (0u << 15) /* Target time 0 is output on SDP3. */
+#define TS_SDP3_SEL_TT1  (1u << 15) /* Target time 1 is output on SDP3. */
+#define TS_SDP3_SEL_FC0  (2u << 15) /* Freq clock  0 is output on SDP3. */
+#define TS_SDP3_SEL_FC1  (3u << 15) /* Freq clock  1 is output on SDP3. */
+#define TS_SDP3_EN       (1u << 17) /* SDP3 is assigned to Tsync. */
+
+#define E1000_MDICNFG_EXT_MDIO    0x80000000      /* MDI ext/int destination */
+#define E1000_MDICNFG_COM_MDIO    0x40000000      /* MDI shared w/ lan 0 */
+#define E1000_MDICNFG_PHY_MASK    0x03E00000
+#define E1000_MDICNFG_PHY_SHIFT   21
+
+#define E1000_MEDIA_PORT_COPPER			1
+#define E1000_MEDIA_PORT_OTHER			2
+#define E1000_M88E1112_AUTO_COPPER_SGMII	0x2
+#define E1000_M88E1112_AUTO_COPPER_BASEX	0x3
+#define E1000_M88E1112_STATUS_LINK		0x0004 /* Interface Link Bit */
+#define E1000_M88E1112_MAC_CTRL_1		0x10
+#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK	0x0380 /* Mode Select */
+#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT	7
+#define E1000_M88E1112_PAGE_ADDR		0x16
+#define E1000_M88E1112_STATUS			0x01
+#define E1000_M88E1512_CFG_REG_1		0x0010
+#define E1000_M88E1512_CFG_REG_2		0x0011
+#define E1000_M88E1512_CFG_REG_3		0x0007
+#define E1000_M88E1512_MODE			0x0014
+
+/* PCI Express Control */
+#define E1000_GCR_CMPL_TMOUT_MASK       0x0000F000
+#define E1000_GCR_CMPL_TMOUT_10ms       0x00001000
+#define E1000_GCR_CMPL_TMOUT_RESEND     0x00010000
+#define E1000_GCR_CAP_VER2              0x00040000
+
+/* mPHY Address Control and Data Registers */
+#define E1000_MPHY_ADDR_CTL          0x0024 /* mPHY Address Control Register */
+#define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000
+#define E1000_MPHY_DATA                 0x0E10 /* mPHY Data Register */
+
+/* mPHY PCS CLK Register */
+#define E1000_MPHY_PCS_CLK_REG_OFFSET  0x0004 /* mPHY PCS CLK AFE CSR Offset */
+/* mPHY Near End Digital Loopback Override Bit */
+#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10
+
+#define E1000_PCS_LCTL_FORCE_FCTRL	0x80
+#define E1000_PCS_LSTS_AN_COMPLETE	0x10000
+
+/* PHY Control Register */
+#define MII_CR_FULL_DUPLEX      0x0100  /* FDX =1, half duplex =0 */
+#define MII_CR_RESTART_AUTO_NEG 0x0200  /* Restart auto negotiation */
+#define MII_CR_POWER_DOWN       0x0800  /* Power down */
+#define MII_CR_AUTO_NEG_EN      0x1000  /* Auto Neg Enable */
+#define MII_CR_LOOPBACK         0x4000  /* 0 = normal, 1 = loopback */
+#define MII_CR_RESET            0x8000  /* 0 = normal, 1 = PHY reset */
+#define MII_CR_SPEED_1000       0x0040
+#define MII_CR_SPEED_100        0x2000
+#define MII_CR_SPEED_10         0x0000
+
+/* PHY Status Register */
+#define MII_SR_LINK_STATUS       0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_COMPLETE  0x0020 /* Auto Neg Complete */
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_10T_HD_CAPS      0x0020   /* 10T   Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS      0x0040   /* 10T   Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS    0x0080   /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS    0x0100   /* 100TX Full Duplex Capable */
+#define NWAY_AR_PAUSE            0x0400   /* Pause operation desired */
+#define NWAY_AR_ASM_DIR          0x0800   /* Asymmetric Pause Direction bit */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_PAUSE          0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR        0x0800 /* LP Asymmetric Pause Direction bit */
+
+/* Autoneg Expansion Register */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_HD_CAPS         0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS         0x0200 /* Advertise 1000T FD capability  */
+#define CR_1000T_MS_VALUE        0x0800 /* 1=Configure PHY as Master */
+					/* 0=Configure PHY as Slave */
+#define CR_1000T_MS_ENABLE       0x1000 /* 1=Master/Slave manual config value */
+					/* 0=Automatic Master/Slave config */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */
+#define SR_1000T_LOCAL_RX_STATUS  0x2000 /* Local receiver OK */
+
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CONTROL      0x00 /* Control Register */
+#define PHY_STATUS       0x01 /* Status Register */
+#define PHY_ID1          0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2          0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV  0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY   0x05 /* Link Partner Ability (Base Page) */
+#define PHY_1000T_CTRL   0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */
+
+/* NVM Control */
+#define E1000_EECD_SK        0x00000001 /* NVM Clock */
+#define E1000_EECD_CS        0x00000002 /* NVM Chip Select */
+#define E1000_EECD_DI        0x00000004 /* NVM Data In */
+#define E1000_EECD_DO        0x00000008 /* NVM Data Out */
+#define E1000_EECD_REQ       0x00000040 /* NVM Access Request */
+#define E1000_EECD_GNT       0x00000080 /* NVM Access Grant */
+#define E1000_EECD_PRES      0x00000100 /* NVM Present */
+/* NVM Addressing bits based on type 0=small, 1=large */
+#define E1000_EECD_ADDR_BITS 0x00000400
+#define E1000_NVM_GRANT_ATTEMPTS   1000 /* NVM # attempts to gain grant */
+#define E1000_EECD_AUTO_RD          0x00000200  /* NVM Auto Read done */
+#define E1000_EECD_SIZE_EX_MASK     0x00007800  /* NVM Size */
+#define E1000_EECD_SIZE_EX_SHIFT     11
+#define E1000_EECD_FLUPD_I210		0x00800000 /* Update FLASH */
+#define E1000_EECD_FLUDONE_I210		0x04000000 /* Update FLASH done*/
+#define E1000_EECD_FLASH_DETECTED_I210	0x00080000 /* FLASH detected */
+#define E1000_FLUDONE_ATTEMPTS		20000
+#define E1000_EERD_EEWR_MAX_COUNT	512 /* buffered EEPROM words rw */
+#define E1000_I210_FIFO_SEL_RX		0x00
+#define E1000_I210_FIFO_SEL_TX_QAV(_i)	(0x02 + (_i))
+#define E1000_I210_FIFO_SEL_TX_LEGACY	E1000_I210_FIFO_SEL_TX_QAV(0)
+#define E1000_I210_FIFO_SEL_BMC2OS_TX	0x06
+#define E1000_I210_FIFO_SEL_BMC2OS_RX	0x01
+#define E1000_I210_FLASH_SECTOR_SIZE	0x1000 /* 4KB FLASH sector unit size */
+/* Secure FLASH mode requires removing MSb */
+#define E1000_I210_FW_PTR_MASK		0x7FFF
+/* Firmware code revision field word offset*/
+#define E1000_I210_FW_VER_OFFSET	328
+#define E1000_EECD_FLUPD_I210		0x00800000 /* Update FLASH */
+#define E1000_EECD_FLUDONE_I210		0x04000000 /* Update FLASH done*/
+#define E1000_FLUDONE_ATTEMPTS		20000
+#define E1000_EERD_EEWR_MAX_COUNT	512 /* buffered EEPROM words rw */
+#define E1000_I210_FIFO_SEL_RX		0x00
+#define E1000_I210_FIFO_SEL_TX_QAV(_i)	(0x02 + (_i))
+#define E1000_I210_FIFO_SEL_TX_LEGACY	E1000_I210_FIFO_SEL_TX_QAV(0)
+#define E1000_I210_FIFO_SEL_BMC2OS_TX	0x06
+#define E1000_I210_FIFO_SEL_BMC2OS_RX	0x01
+
+
+/* Offset to data in NVM read/write registers */
+#define E1000_NVM_RW_REG_DATA   16
+#define E1000_NVM_RW_REG_DONE   2    /* Offset to READ/WRITE done bit */
+#define E1000_NVM_RW_REG_START  1    /* Start operation */
+#define E1000_NVM_RW_ADDR_SHIFT 2    /* Shift to the address bits */
+#define E1000_NVM_POLL_READ     0    /* Flag for polling for read complete */
+
+/* NVM Word Offsets */
+#define NVM_COMPAT                 0x0003
+#define NVM_ID_LED_SETTINGS        0x0004 /* SERDES output amplitude */
+#define NVM_VERSION                0x0005
+#define NVM_INIT_CONTROL2_REG      0x000F
+#define NVM_INIT_CONTROL3_PORT_B   0x0014
+#define NVM_INIT_CONTROL3_PORT_A   0x0024
+#define NVM_ALT_MAC_ADDR_PTR       0x0037
+#define NVM_CHECKSUM_REG           0x003F
+#define NVM_COMPATIBILITY_REG_3    0x0003
+#define NVM_COMPATIBILITY_BIT_MASK 0x8000
+#define NVM_MAC_ADDR               0x0000
+#define NVM_SUB_DEV_ID             0x000B
+#define NVM_SUB_VEN_ID             0x000C
+#define NVM_DEV_ID                 0x000D
+#define NVM_VEN_ID                 0x000E
+#define NVM_INIT_CTRL_2            0x000F
+#define NVM_INIT_CTRL_4            0x0013
+#define NVM_LED_1_CFG              0x001C
+#define NVM_LED_0_2_CFG            0x001F
+#define NVM_ETRACK_WORD            0x0042
+#define NVM_ETRACK_HIWORD          0x0043
+#define NVM_COMB_VER_OFF           0x0083
+#define NVM_COMB_VER_PTR           0x003d
+
+/* NVM version defines */
+#define NVM_MAJOR_MASK			0xF000
+#define NVM_MINOR_MASK			0x0FF0
+#define NVM_IMAGE_ID_MASK		0x000F
+#define NVM_COMB_VER_MASK		0x00FF
+#define NVM_MAJOR_SHIFT			12
+#define NVM_MINOR_SHIFT			4
+#define NVM_COMB_VER_SHFT		8
+#define NVM_VER_INVALID			0xFFFF
+#define NVM_ETRACK_SHIFT		16
+#define NVM_ETRACK_VALID		0x8000
+#define NVM_NEW_DEC_MASK		0x0F00
+#define NVM_HEX_CONV			16
+#define NVM_HEX_TENS			10
+
+#define NVM_ETS_CFG			0x003E
+#define NVM_ETS_LTHRES_DELTA_MASK	0x07C0
+#define NVM_ETS_LTHRES_DELTA_SHIFT	6
+#define NVM_ETS_TYPE_MASK		0x0038
+#define NVM_ETS_TYPE_SHIFT		3
+#define NVM_ETS_TYPE_EMC		0x000
+#define NVM_ETS_NUM_SENSORS_MASK	0x0007
+#define NVM_ETS_DATA_LOC_MASK		0x3C00
+#define NVM_ETS_DATA_LOC_SHIFT		10
+#define NVM_ETS_DATA_INDEX_MASK		0x0300
+#define NVM_ETS_DATA_INDEX_SHIFT	8
+#define NVM_ETS_DATA_HTHRESH_MASK	0x00FF
+
+#define E1000_NVM_CFG_DONE_PORT_0  0x040000 /* MNG config cycle done */
+#define E1000_NVM_CFG_DONE_PORT_1  0x080000 /* ...for second port */
+#define E1000_NVM_CFG_DONE_PORT_2  0x100000 /* ...for third port */
+#define E1000_NVM_CFG_DONE_PORT_3  0x200000 /* ...for fourth port */
+
+#define NVM_82580_LAN_FUNC_OFFSET(a) (a ? (0x40 + (0x40 * a)) : 0)
+
+/* Mask bits for fields in Word 0x24 of the NVM */
+#define NVM_WORD24_COM_MDIO         0x0008 /* MDIO interface shared */
+#define NVM_WORD24_EXT_MDIO         0x0004 /* MDIO accesses routed external */
+
+/* Mask bits for fields in Word 0x0f of the NVM */
+#define NVM_WORD0F_PAUSE_MASK       0x3000
+#define NVM_WORD0F_ASM_DIR          0x2000
+
+/* Mask bits for fields in Word 0x1a of the NVM */
+
+/* length of string needed to store part num */
+#define E1000_PBANUM_LENGTH         11
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM                    0xBABA
+
+#define NVM_PBA_OFFSET_0           8
+#define NVM_PBA_OFFSET_1           9
+#define NVM_RESERVED_WORD		0xFFFF
+#define NVM_PBA_PTR_GUARD          0xFAFA
+#define NVM_WORD_SIZE_BASE_SHIFT   6
+
+/* NVM Commands - Microwire */
+
+/* NVM Commands - SPI */
+#define NVM_MAX_RETRY_SPI          5000 /* Max wait of 5ms, for RDY signal */
+#define NVM_WRITE_OPCODE_SPI       0x02 /* NVM write opcode */
+#define NVM_READ_OPCODE_SPI        0x03 /* NVM read opcode */
+#define NVM_A8_OPCODE_SPI          0x08 /* opcode bit-3 = address bit-8 */
+#define NVM_WREN_OPCODE_SPI        0x06 /* NVM set Write Enable latch */
+#define NVM_RDSR_OPCODE_SPI        0x05 /* NVM read Status register */
+
+/* SPI NVM Status Register */
+#define NVM_STATUS_RDY_SPI         0x01
+
+/* Word definitions for ID LED Settings */
+#define ID_LED_RESERVED_0000 0x0000
+#define ID_LED_RESERVED_FFFF 0xFFFF
+#define ID_LED_DEFAULT       ((ID_LED_OFF1_ON2  << 12) | \
+			      (ID_LED_OFF1_OFF2 <<  8) | \
+			      (ID_LED_DEF1_DEF2 <<  4) | \
+			      (ID_LED_DEF1_DEF2))
+#define ID_LED_DEF1_DEF2     0x1
+#define ID_LED_DEF1_ON2      0x2
+#define ID_LED_DEF1_OFF2     0x3
+#define ID_LED_ON1_DEF2      0x4
+#define ID_LED_ON1_ON2       0x5
+#define ID_LED_ON1_OFF2      0x6
+#define ID_LED_OFF1_DEF2     0x7
+#define ID_LED_OFF1_ON2      0x8
+#define ID_LED_OFF1_OFF2     0x9
+
+#define IGP_ACTIVITY_LED_MASK   0xFFFFF0FF
+#define IGP_ACTIVITY_LED_ENABLE 0x0300
+#define IGP_LED3_MODE           0x07000000
+
+/* PCI/PCI-X/PCI-EX Config space */
+#define PCIE_DEVICE_CONTROL2         0x28
+#define PCIE_DEVICE_CONTROL2_16ms    0x0005
+
+#define PHY_REVISION_MASK      0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS    0x1F  /* 5 bit address bus (0-0x1F) */
+#define MAX_PHY_MULTI_PAGE_REG 0xF
+
+/* Bit definitions for valid PHY IDs. */
+/* I = Integrated
+ * E = External
+ */
+#define M88E1111_I_PHY_ID    0x01410CC0
+#define M88E1112_E_PHY_ID    0x01410C90
+#define I347AT4_E_PHY_ID     0x01410DC0
+#define IGP03E1000_E_PHY_ID  0x02A80390
+#define I82580_I_PHY_ID      0x015403A0
+#define I350_I_PHY_ID        0x015403B0
+#define M88_VENDOR           0x0141
+#define I210_I_PHY_ID        0x01410C00
+#define M88E1543_E_PHY_ID    0x01410EA0
+#define M88E1512_E_PHY_ID    0x01410DD0
+#define BCM54616_E_PHY_ID    0x03625D10
+
+/* M88E1000 Specific Registers */
+#define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
+#define M88E1000_PHY_SPEC_STATUS   0x11  /* PHY Specific Status Register */
+#define M88E1000_EXT_PHY_SPEC_CTRL 0x14  /* Extended PHY Specific Control */
+
+#define M88E1000_PHY_PAGE_SELECT   0x1D  /* Reg 29 for page number setting */
+#define M88E1000_PHY_GEN_CONTROL   0x1E  /* Its meaning depends on reg 29 */
+
+/* M88E1000 PHY Specific Control Register */
+#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reversal enabled */
+/* 1=CLK125 low, 0=CLK125 toggling */
+#define M88E1000_PSCR_MDI_MANUAL_MODE  0x0000  /* MDI Crossover Mode bits 6:5 */
+					       /* Manual MDI configuration */
+#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020  /* Manual MDIX configuration */
+/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */
+#define M88E1000_PSCR_AUTO_X_1000T     0x0040
+/* Auto crossover enabled all speeds */
+#define M88E1000_PSCR_AUTO_X_MODE      0x0060
+/* 1=Enable Extended 10BASE-T distance (Lower 10BASE-T Rx Threshold
+ * 0=Normal 10BASE-T Rx Threshold
+ */
+/* 1=5-bit interface in 100BASE-TX, 0=MII interface in 100BASE-TX */
+#define M88E1000_PSCR_ASSERT_CRS_ON_TX     0x0800 /* 1=Assert CRS on Transmit */
+
+/* M88E1000 PHY Specific Status Register */
+#define M88E1000_PSSR_REV_POLARITY       0x0002 /* 1=Polarity reversed */
+#define M88E1000_PSSR_DOWNSHIFT          0x0020 /* 1=Downshifted */
+#define M88E1000_PSSR_MDIX               0x0040 /* 1=MDIX; 0=MDI */
+/* 0 = <50M
+ * 1 = 50-80M
+ * 2 = 80-110M
+ * 3 = 110-140M
+ * 4 = >140M
+ */
+#define M88E1000_PSSR_CABLE_LENGTH       0x0380
+#define M88E1000_PSSR_SPEED              0xC000 /* Speed, bits 14:15 */
+#define M88E1000_PSSR_1000MBS            0x8000 /* 10=1000Mbs */
+
+#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7
+
+/* M88E1000 Extended PHY Specific Control Register */
+/* 1 = Lost lock detect enabled.
+ * Will assert lost lock and bring
+ * link down if idle not seen
+ * within 1ms in 1000BASE-T
+ */
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the master
+ */
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00
+#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X   0x0000
+/* Number of times we will attempt to autonegotiate before downshifting if we
+ * are the slave
+ */
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK  0x0300
+#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X    0x0100
+#define M88E1000_EPSCR_TX_CLK_25      0x0070 /* 25  MHz TX_CLK */
+
+/* Intel i347-AT4 Registers */
+
+#define I347AT4_PCDL0                  0x10 /* Pair 0 PHY Cable Diagnostics Length */
+#define I347AT4_PCDL1                  0x11 /* Pair 1 PHY Cable Diagnostics Length */
+#define I347AT4_PCDL2                  0x12 /* Pair 2 PHY Cable Diagnostics Length */
+#define I347AT4_PCDL3                  0x13 /* Pair 3 PHY Cable Diagnostics Length */
+#define I347AT4_PCDC                   0x15 /* PHY Cable Diagnostics Control */
+#define I347AT4_PAGE_SELECT            0x16
+
+/* i347-AT4 Extended PHY Specific Control Register */
+
+/*  Number of times we will attempt to autonegotiate before downshifting if we
+ *  are the master
+ */
+#define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800
+#define I347AT4_PSCR_DOWNSHIFT_MASK   0x7000
+#define I347AT4_PSCR_DOWNSHIFT_1X     0x0000
+#define I347AT4_PSCR_DOWNSHIFT_2X     0x1000
+#define I347AT4_PSCR_DOWNSHIFT_3X     0x2000
+#define I347AT4_PSCR_DOWNSHIFT_4X     0x3000
+#define I347AT4_PSCR_DOWNSHIFT_5X     0x4000
+#define I347AT4_PSCR_DOWNSHIFT_6X     0x5000
+#define I347AT4_PSCR_DOWNSHIFT_7X     0x6000
+#define I347AT4_PSCR_DOWNSHIFT_8X     0x7000
+
+/* i347-AT4 PHY Cable Diagnostics Control */
+#define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */
+
+/* Marvell 1112 only registers */
+#define M88E1112_VCT_DSP_DISTANCE       0x001A
+
+/* M88EC018 Rev 2 specific DownShift settings */
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
+#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
+
+/* MDI Control */
+#define E1000_MDIC_DATA_MASK 0x0000FFFF
+#define E1000_MDIC_REG_MASK  0x001F0000
+#define E1000_MDIC_REG_SHIFT 16
+#define E1000_MDIC_PHY_MASK  0x03E00000
+#define E1000_MDIC_PHY_SHIFT 21
+#define E1000_MDIC_OP_WRITE  0x04000000
+#define E1000_MDIC_OP_READ   0x08000000
+#define E1000_MDIC_READY     0x10000000
+#define E1000_MDIC_INT_EN    0x20000000
+#define E1000_MDIC_ERROR     0x40000000
+#define E1000_MDIC_DEST      0x80000000
+
+/* Thermal Sensor */
+#define E1000_THSTAT_PWR_DOWN       0x00000001 /* Power Down Event */
+#define E1000_THSTAT_LINK_THROTTLE  0x00000002 /* Link Speed Throttle Event */
+
+/* Energy Efficient Ethernet */
+#define E1000_IPCNFG_EEE_1G_AN       0x00000008  /* EEE Enable 1G AN */
+#define E1000_IPCNFG_EEE_100M_AN     0x00000004  /* EEE Enable 100M AN */
+#define E1000_EEER_TX_LPI_EN         0x00010000  /* EEE Tx LPI Enable */
+#define E1000_EEER_RX_LPI_EN         0x00020000  /* EEE Rx LPI Enable */
+#define E1000_EEER_FRC_AN            0x10000000  /* Enable EEE in loopback */
+#define E1000_EEER_LPI_FC            0x00040000  /* EEE Enable on FC */
+#define E1000_EEE_SU_LPI_CLK_STP     0X00800000  /* EEE LPI Clock Stop */
+#define E1000_EEER_EEE_NEG           0x20000000  /* EEE capability nego */
+#define E1000_EEE_LP_ADV_ADDR_I350   0x040F      /* EEE LP Advertisement */
+#define E1000_EEE_LP_ADV_DEV_I210    7           /* EEE LP Adv Device */
+#define E1000_EEE_LP_ADV_ADDR_I210   61          /* EEE LP Adv Register */
+#define E1000_MMDAC_FUNC_DATA        0x4000      /* Data, no post increment */
+#define E1000_M88E1543_PAGE_ADDR	0x16       /* Page Offset Register */
+#define E1000_M88E1543_EEE_CTRL_1	0x0
+#define E1000_M88E1543_EEE_CTRL_1_MS	0x0001     /* EEE Master/Slave */
+#define E1000_M88E1543_FIBER_CTRL	0x0
+#define E1000_EEE_ADV_DEV_I354		7
+#define E1000_EEE_ADV_ADDR_I354		60
+#define E1000_EEE_ADV_100_SUPPORTED	BIT(1)   /* 100BaseTx EEE Supported */
+#define E1000_EEE_ADV_1000_SUPPORTED	BIT(2)   /* 1000BaseT EEE Supported */
+#define E1000_PCS_STATUS_DEV_I354	3
+#define E1000_PCS_STATUS_ADDR_I354	1
+#define E1000_PCS_STATUS_TX_LPI_IND	0x0200     /* Tx in LPI state */
+#define E1000_PCS_STATUS_RX_LPI_RCVD	0x0400
+#define E1000_PCS_STATUS_TX_LPI_RCVD	0x0800
+
+/* SerDes Control */
+#define E1000_GEN_CTL_READY             0x80000000
+#define E1000_GEN_CTL_ADDRESS_SHIFT     8
+#define E1000_GEN_POLL_TIMEOUT          640
+
+#define E1000_VFTA_ENTRY_SHIFT               5
+#define E1000_VFTA_ENTRY_MASK                0x7F
+#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK      0x1F
+
+/* Tx Rate-Scheduler Config fields */
+#define E1000_RTTBCNRC_RS_ENA		0x80000000
+#define E1000_RTTBCNRC_RF_DEC_MASK	0x00003FFF
+#define E1000_RTTBCNRC_RF_INT_SHIFT	14
+#define E1000_RTTBCNRC_RF_INT_MASK	\
+	(E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT)
+
+#define E1000_VLAPQF_QUEUE_SEL(_n, q_idx) (q_idx << ((_n) * 4))
+#define E1000_VLAPQF_P_VALID(_n)	(0x1 << (3 + (_n) * 4))
+#define E1000_VLAPQF_QUEUE_MASK	0x03
+
+/* TX Qav Control fields */
+#define E1000_TQAVCTRL_XMIT_MODE	BIT(0)
+#define E1000_TQAVCTRL_DATAFETCHARB	BIT(4)
+#define E1000_TQAVCTRL_DATATRANARB	BIT(8)
+#define E1000_TQAVCTRL_DATATRANTIM	BIT(9)
+#define E1000_TQAVCTRL_SP_WAIT_SR	BIT(10)
+/* Fetch Time Delta - bits 31:16
+ *
+ * This field holds the value to be reduced from the launch time for
+ * fetch time decision. The FetchTimeDelta value is defined in 32 ns
+ * granularity.
+ *
+ * This field is 16 bits wide, and so the maximum value is:
+ *
+ * 65535 * 32 = 2097120 ~= 2.1 msec
+ *
+ * XXX: We are configuring the max value here since we couldn't come up
+ * with a reason for not doing so.
+ */
+#define E1000_TQAVCTRL_FETCHTIME_DELTA	(0xFFFF << 16)
+
+/* TX Qav Credit Control fields */
+#define E1000_TQAVCC_IDLESLOPE_MASK	0xFFFF
+#define E1000_TQAVCC_QUEUEMODE		BIT(31)
+
+/* Transmit Descriptor Control fields */
+#define E1000_TXDCTL_PRIORITY		BIT(27)
+
+#endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_hw.h b/drivers/net/ethernet/intel/igb/e1000_hw.h
new file mode 100644
index 0000000000..44111f65af
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_hw.h
@@ -0,0 +1,554 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_IGB_HW_H_
+#define _E1000_IGB_HW_H_
+
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/netdevice.h>
+
+#include "e1000_regs.h"
+#include "e1000_defines.h"
+
+struct e1000_hw;
+
+#define E1000_DEV_ID_82576			0x10C9
+#define E1000_DEV_ID_82576_FIBER		0x10E6
+#define E1000_DEV_ID_82576_SERDES		0x10E7
+#define E1000_DEV_ID_82576_QUAD_COPPER		0x10E8
+#define E1000_DEV_ID_82576_QUAD_COPPER_ET2	0x1526
+#define E1000_DEV_ID_82576_NS			0x150A
+#define E1000_DEV_ID_82576_NS_SERDES		0x1518
+#define E1000_DEV_ID_82576_SERDES_QUAD		0x150D
+#define E1000_DEV_ID_82575EB_COPPER		0x10A7
+#define E1000_DEV_ID_82575EB_FIBER_SERDES	0x10A9
+#define E1000_DEV_ID_82575GB_QUAD_COPPER	0x10D6
+#define E1000_DEV_ID_82580_COPPER		0x150E
+#define E1000_DEV_ID_82580_FIBER		0x150F
+#define E1000_DEV_ID_82580_SERDES		0x1510
+#define E1000_DEV_ID_82580_SGMII		0x1511
+#define E1000_DEV_ID_82580_COPPER_DUAL		0x1516
+#define E1000_DEV_ID_82580_QUAD_FIBER		0x1527
+#define E1000_DEV_ID_DH89XXCC_SGMII		0x0438
+#define E1000_DEV_ID_DH89XXCC_SERDES		0x043A
+#define E1000_DEV_ID_DH89XXCC_BACKPLANE		0x043C
+#define E1000_DEV_ID_DH89XXCC_SFP		0x0440
+#define E1000_DEV_ID_I350_COPPER		0x1521
+#define E1000_DEV_ID_I350_FIBER			0x1522
+#define E1000_DEV_ID_I350_SERDES		0x1523
+#define E1000_DEV_ID_I350_SGMII			0x1524
+#define E1000_DEV_ID_I210_COPPER		0x1533
+#define E1000_DEV_ID_I210_FIBER			0x1536
+#define E1000_DEV_ID_I210_SERDES		0x1537
+#define E1000_DEV_ID_I210_SGMII			0x1538
+#define E1000_DEV_ID_I210_COPPER_FLASHLESS	0x157B
+#define E1000_DEV_ID_I210_SERDES_FLASHLESS	0x157C
+#define E1000_DEV_ID_I211_COPPER		0x1539
+#define E1000_DEV_ID_I354_BACKPLANE_1GBPS	0x1F40
+#define E1000_DEV_ID_I354_SGMII			0x1F41
+#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS	0x1F45
+
+#define E1000_REVISION_2 2
+#define E1000_REVISION_4 4
+
+#define E1000_FUNC_0     0
+#define E1000_FUNC_1     1
+#define E1000_FUNC_2     2
+#define E1000_FUNC_3     3
+
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0   0
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1   3
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2   6
+#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3   9
+
+enum e1000_mac_type {
+	e1000_undefined = 0,
+	e1000_82575,
+	e1000_82576,
+	e1000_82580,
+	e1000_i350,
+	e1000_i354,
+	e1000_i210,
+	e1000_i211,
+	e1000_num_macs  /* List is 1-based, so subtract 1 for true count. */
+};
+
+enum e1000_media_type {
+	e1000_media_type_unknown = 0,
+	e1000_media_type_copper = 1,
+	e1000_media_type_fiber = 2,
+	e1000_media_type_internal_serdes = 3,
+	e1000_num_media_types
+};
+
+enum e1000_nvm_type {
+	e1000_nvm_unknown = 0,
+	e1000_nvm_none,
+	e1000_nvm_eeprom_spi,
+	e1000_nvm_flash_hw,
+	e1000_nvm_invm,
+	e1000_nvm_flash_sw
+};
+
+enum e1000_nvm_override {
+	e1000_nvm_override_none = 0,
+	e1000_nvm_override_spi_small,
+	e1000_nvm_override_spi_large,
+};
+
+enum e1000_phy_type {
+	e1000_phy_unknown = 0,
+	e1000_phy_none,
+	e1000_phy_m88,
+	e1000_phy_igp,
+	e1000_phy_igp_2,
+	e1000_phy_gg82563,
+	e1000_phy_igp_3,
+	e1000_phy_ife,
+	e1000_phy_82580,
+	e1000_phy_i210,
+	e1000_phy_bcm54616,
+};
+
+enum e1000_bus_type {
+	e1000_bus_type_unknown = 0,
+	e1000_bus_type_pci,
+	e1000_bus_type_pcix,
+	e1000_bus_type_pci_express,
+	e1000_bus_type_reserved
+};
+
+enum e1000_bus_speed {
+	e1000_bus_speed_unknown = 0,
+	e1000_bus_speed_33,
+	e1000_bus_speed_66,
+	e1000_bus_speed_100,
+	e1000_bus_speed_120,
+	e1000_bus_speed_133,
+	e1000_bus_speed_2500,
+	e1000_bus_speed_5000,
+	e1000_bus_speed_reserved
+};
+
+enum e1000_bus_width {
+	e1000_bus_width_unknown = 0,
+	e1000_bus_width_pcie_x1,
+	e1000_bus_width_pcie_x2,
+	e1000_bus_width_pcie_x4 = 4,
+	e1000_bus_width_pcie_x8 = 8,
+	e1000_bus_width_32,
+	e1000_bus_width_64,
+	e1000_bus_width_reserved
+};
+
+enum e1000_1000t_rx_status {
+	e1000_1000t_rx_status_not_ok = 0,
+	e1000_1000t_rx_status_ok,
+	e1000_1000t_rx_status_undefined = 0xFF
+};
+
+enum e1000_rev_polarity {
+	e1000_rev_polarity_normal = 0,
+	e1000_rev_polarity_reversed,
+	e1000_rev_polarity_undefined = 0xFF
+};
+
+enum e1000_fc_mode {
+	e1000_fc_none = 0,
+	e1000_fc_rx_pause,
+	e1000_fc_tx_pause,
+	e1000_fc_full,
+	e1000_fc_default = 0xFF
+};
+
+/* Statistics counters collected by the MAC */
+struct e1000_hw_stats {
+	u64 crcerrs;
+	u64 algnerrc;
+	u64 symerrs;
+	u64 rxerrc;
+	u64 mpc;
+	u64 scc;
+	u64 ecol;
+	u64 mcc;
+	u64 latecol;
+	u64 colc;
+	u64 dc;
+	u64 tncrs;
+	u64 sec;
+	u64 cexterr;
+	u64 rlec;
+	u64 xonrxc;
+	u64 xontxc;
+	u64 xoffrxc;
+	u64 xofftxc;
+	u64 fcruc;
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc;
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mgprc;
+	u64 mgpdc;
+	u64 mgptc;
+	u64 tor;
+	u64 tot;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 tsctc;
+	u64 tsctfc;
+	u64 iac;
+	u64 icrxptc;
+	u64 icrxatc;
+	u64 ictxptc;
+	u64 ictxatc;
+	u64 ictxqec;
+	u64 ictxqmtc;
+	u64 icrxdmtc;
+	u64 icrxoc;
+	u64 cbtmpc;
+	u64 htdpmc;
+	u64 cbrdpc;
+	u64 cbrmpc;
+	u64 rpthc;
+	u64 hgptc;
+	u64 htcbdpc;
+	u64 hgorc;
+	u64 hgotc;
+	u64 lenerrs;
+	u64 scvpc;
+	u64 hrmpc;
+	u64 doosync;
+	u64 o2bgptc;
+	u64 o2bspc;
+	u64 b2ospc;
+	u64 b2ogprc;
+};
+
+struct e1000_host_mng_dhcp_cookie {
+	u32 signature;
+	u8  status;
+	u8  reserved0;
+	u16 vlan_id;
+	u32 reserved1;
+	u16 reserved2;
+	u8  reserved3;
+	u8  checksum;
+};
+
+/* Host Interface "Rev 1" */
+struct e1000_host_command_header {
+	u8 command_id;
+	u8 command_length;
+	u8 command_options;
+	u8 checksum;
+};
+
+#define E1000_HI_MAX_DATA_LENGTH     252
+struct e1000_host_command_info {
+	struct e1000_host_command_header command_header;
+	u8 command_data[E1000_HI_MAX_DATA_LENGTH];
+};
+
+/* Host Interface "Rev 2" */
+struct e1000_host_mng_command_header {
+	u8  command_id;
+	u8  checksum;
+	u16 reserved1;
+	u16 reserved2;
+	u16 command_length;
+};
+
+#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8
+struct e1000_host_mng_command_info {
+	struct e1000_host_mng_command_header command_header;
+	u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH];
+};
+
+#include "e1000_mac.h"
+#include "e1000_phy.h"
+#include "e1000_nvm.h"
+#include "e1000_mbx.h"
+
+struct e1000_mac_operations {
+	s32 (*check_for_link)(struct e1000_hw *);
+	s32 (*reset_hw)(struct e1000_hw *);
+	s32 (*init_hw)(struct e1000_hw *);
+	bool (*check_mng_mode)(struct e1000_hw *);
+	s32 (*setup_physical_interface)(struct e1000_hw *);
+	void (*rar_set)(struct e1000_hw *, u8 *, u32);
+	s32 (*read_mac_addr)(struct e1000_hw *);
+	s32 (*get_speed_and_duplex)(struct e1000_hw *, u16 *, u16 *);
+	s32 (*acquire_swfw_sync)(struct e1000_hw *, u16);
+	void (*release_swfw_sync)(struct e1000_hw *, u16);
+#ifdef CONFIG_IGB_HWMON
+	s32 (*get_thermal_sensor_data)(struct e1000_hw *);
+	s32 (*init_thermal_sensor_thresh)(struct e1000_hw *);
+#endif
+	void (*write_vfta)(struct e1000_hw *, u32, u32);
+};
+
+struct e1000_phy_operations {
+	s32 (*acquire)(struct e1000_hw *);
+	s32 (*check_polarity)(struct e1000_hw *);
+	s32 (*check_reset_block)(struct e1000_hw *);
+	s32 (*force_speed_duplex)(struct e1000_hw *);
+	s32 (*get_cfg_done)(struct e1000_hw *hw);
+	s32 (*get_cable_length)(struct e1000_hw *);
+	s32 (*get_phy_info)(struct e1000_hw *);
+	s32 (*read_reg)(struct e1000_hw *, u32, u16 *);
+	void (*release)(struct e1000_hw *);
+	s32 (*reset)(struct e1000_hw *);
+	s32 (*set_d0_lplu_state)(struct e1000_hw *, bool);
+	s32 (*set_d3_lplu_state)(struct e1000_hw *, bool);
+	s32 (*write_reg)(struct e1000_hw *, u32, u16);
+	s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *);
+	s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8);
+};
+
+struct e1000_nvm_operations {
+	s32 (*acquire)(struct e1000_hw *);
+	s32 (*read)(struct e1000_hw *, u16, u16, u16 *);
+	void (*release)(struct e1000_hw *);
+	s32 (*write)(struct e1000_hw *, u16, u16, u16 *);
+	s32 (*update)(struct e1000_hw *);
+	s32 (*validate)(struct e1000_hw *);
+	s32 (*valid_led_default)(struct e1000_hw *, u16 *);
+};
+
+#define E1000_MAX_SENSORS		3
+
+struct e1000_thermal_diode_data {
+	u8 location;
+	u8 temp;
+	u8 caution_thresh;
+	u8 max_op_thresh;
+};
+
+struct e1000_thermal_sensor_data {
+	struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS];
+};
+
+struct e1000_info {
+	s32 (*get_invariants)(struct e1000_hw *);
+	struct e1000_mac_operations *mac_ops;
+	const struct e1000_phy_operations *phy_ops;
+	struct e1000_nvm_operations *nvm_ops;
+};
+
+extern const struct e1000_info e1000_82575_info;
+
+struct e1000_mac_info {
+	struct e1000_mac_operations ops;
+
+	u8 addr[6];
+	u8 perm_addr[6];
+
+	enum e1000_mac_type type;
+
+	u32 ledctl_default;
+	u32 ledctl_mode1;
+	u32 ledctl_mode2;
+	u32 mc_filter_type;
+	u32 txcw;
+
+	u16 mta_reg_count;
+	u16 uta_reg_count;
+
+	/* Maximum size of the MTA register table in all supported adapters */
+	#define MAX_MTA_REG 128
+	u32 mta_shadow[MAX_MTA_REG];
+	u16 rar_entry_count;
+
+	u8  forced_speed_duplex;
+
+	bool adaptive_ifs;
+	bool arc_subsystem_valid;
+	bool asf_firmware_present;
+	bool autoneg;
+	bool autoneg_failed;
+	bool disable_hw_init_bits;
+	bool get_link_status;
+	bool ifs_params_forced;
+	bool in_ifs_mode;
+	bool report_tx_early;
+	bool serdes_has_link;
+	bool tx_pkt_filtering;
+	struct e1000_thermal_sensor_data thermal_sensor_data;
+};
+
+struct e1000_phy_info {
+	struct e1000_phy_operations ops;
+
+	enum e1000_phy_type type;
+
+	enum e1000_1000t_rx_status local_rx;
+	enum e1000_1000t_rx_status remote_rx;
+	enum e1000_ms_type ms_type;
+	enum e1000_ms_type original_ms_type;
+	enum e1000_rev_polarity cable_polarity;
+	enum e1000_smart_speed smart_speed;
+
+	u32 addr;
+	u32 id;
+	u32 reset_delay_us; /* in usec */
+	u32 revision;
+
+	enum e1000_media_type media_type;
+
+	u16 autoneg_advertised;
+	u16 autoneg_mask;
+	u16 cable_length;
+	u16 max_cable_length;
+	u16 min_cable_length;
+	u16 pair_length[4];
+
+	u8 mdix;
+
+	bool disable_polarity_correction;
+	bool is_mdix;
+	bool polarity_correction;
+	bool reset_disable;
+	bool speed_downgraded;
+	bool autoneg_wait_to_complete;
+};
+
+struct e1000_nvm_info {
+	struct e1000_nvm_operations ops;
+	enum e1000_nvm_type type;
+	enum e1000_nvm_override override;
+
+	u32 flash_bank_size;
+	u32 flash_base_addr;
+
+	u16 word_size;
+	u16 delay_usec;
+	u16 address_bits;
+	u16 opcode_bits;
+	u16 page_size;
+};
+
+struct e1000_bus_info {
+	enum e1000_bus_type type;
+	enum e1000_bus_speed speed;
+	enum e1000_bus_width width;
+
+	u32 snoop;
+
+	u16 func;
+	u16 pci_cmd_word;
+};
+
+struct e1000_fc_info {
+	u32 high_water;     /* Flow control high-water mark */
+	u32 low_water;      /* Flow control low-water mark */
+	u16 pause_time;     /* Flow control pause timer */
+	bool send_xon;      /* Flow control send XON */
+	bool strict_ieee;   /* Strict IEEE mode */
+	enum e1000_fc_mode current_mode; /* Type of flow control */
+	enum e1000_fc_mode requested_mode;
+};
+
+struct e1000_mbx_operations {
+	s32 (*init_params)(struct e1000_hw *hw);
+	s32 (*read)(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id,
+		    bool unlock);
+	s32 (*write)(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id);
+	s32 (*read_posted)(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id);
+	s32 (*write_posted)(struct e1000_hw *hw, u32 *msg, u16 size,
+			    u16 mbx_id);
+	s32 (*check_for_msg)(struct e1000_hw *hw, u16 mbx_id);
+	s32 (*check_for_ack)(struct e1000_hw *hw, u16 mbx_id);
+	s32 (*check_for_rst)(struct e1000_hw *hw, u16 mbx_id);
+	s32 (*unlock)(struct e1000_hw *hw, u16 mbx_id);
+};
+
+struct e1000_mbx_stats {
+	u32 msgs_tx;
+	u32 msgs_rx;
+
+	u32 acks;
+	u32 reqs;
+	u32 rsts;
+};
+
+struct e1000_mbx_info {
+	struct e1000_mbx_operations ops;
+	struct e1000_mbx_stats stats;
+	u32 timeout;
+	u32 usec_delay;
+	u16 size;
+};
+
+struct e1000_dev_spec_82575 {
+	bool sgmii_active;
+	bool global_device_reset;
+	bool eee_disable;
+	bool clear_semaphore_once;
+	struct e1000_sfp_flags eth_flags;
+	bool module_plugged;
+	u8 media_port;
+	bool media_changed;
+	bool mas_capable;
+};
+
+struct e1000_hw {
+	void *back;
+
+	u8 __iomem *hw_addr;
+	u8 __iomem *flash_address;
+	unsigned long io_base;
+
+	struct e1000_mac_info  mac;
+	struct e1000_fc_info   fc;
+	struct e1000_phy_info  phy;
+	struct e1000_nvm_info  nvm;
+	struct e1000_bus_info  bus;
+	struct e1000_mbx_info mbx;
+	struct e1000_host_mng_dhcp_cookie mng_cookie;
+
+	union {
+		struct e1000_dev_spec_82575	_82575;
+	} dev_spec;
+
+	u16 device_id;
+	u16 subsystem_vendor_id;
+	u16 subsystem_device_id;
+	u16 vendor_id;
+
+	u8  revision_id;
+};
+
+struct net_device *igb_get_hw_dev(struct e1000_hw *hw);
+#define hw_dbg(format, arg...) \
+	netdev_dbg(igb_get_hw_dev(hw), format, ##arg)
+
+/* These functions must be implemented by drivers */
+s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
+s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
+
+void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
+void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
+#endif /* _E1000_IGB_HW_H_ */
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
new file mode 100644
index 0000000000..b9b9d35494
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -0,0 +1,911 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+/* e1000_i210
+ * e1000_i211
+ */
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "e1000_hw.h"
+#include "e1000_i210.h"
+
+static s32 igb_update_flash_i210(struct e1000_hw *hw);
+
+/**
+ * igb_get_hw_semaphore_i210 - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ */
+static s32 igb_get_hw_semaphore_i210(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = rd32(E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		udelay(50);
+		i++;
+	}
+
+	if (i == timeout) {
+		/* In rare circumstances, the SW semaphore may already be held
+		 * unintentionally. Clear the semaphore once before giving up.
+		 */
+		if (hw->dev_spec._82575.clear_semaphore_once) {
+			hw->dev_spec._82575.clear_semaphore_once = false;
+			igb_put_hw_semaphore(hw);
+			for (i = 0; i < timeout; i++) {
+				swsm = rd32(E1000_SWSM);
+				if (!(swsm & E1000_SWSM_SMBI))
+					break;
+
+				udelay(50);
+			}
+		}
+
+		/* If we do not have the semaphore here, we have to give up. */
+		if (i == timeout) {
+			hw_dbg("Driver can't access device - SMBI bit is set.\n");
+			return -E1000_ERR_NVM;
+		}
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = rd32(E1000_SWSM);
+		wr32(E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (rd32(E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		udelay(50);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		igb_put_hw_semaphore(hw);
+		hw_dbg("Driver can't access the NVM\n");
+		return -E1000_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_acquire_nvm_i210 - Request for access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the necessary semaphores for exclusive access to the EEPROM.
+ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ *  Return successful if access grant bit set, else clear the request for
+ *  EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+static s32 igb_acquire_nvm_i210(struct e1000_hw *hw)
+{
+	return igb_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  igb_release_nvm_i210 - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ *  then release the semaphores acquired.
+ **/
+static void igb_release_nvm_i210(struct e1000_hw *hw)
+{
+	igb_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM);
+}
+
+/**
+ *  igb_acquire_swfw_sync_i210 - Acquire SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ *  will also specify which port we're acquiring the lock for.
+ **/
+s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+	u32 swmask = mask;
+	u32 fwmask = mask << 16;
+	s32 ret_val = 0;
+	s32 i = 0, timeout = 200; /* FIXME: find real value to use here */
+
+	while (i < timeout) {
+		if (igb_get_hw_semaphore_i210(hw)) {
+			ret_val = -E1000_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = rd32(E1000_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/* Firmware currently using resource (fwmask) */
+		igb_put_hw_semaphore(hw);
+		mdelay(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -E1000_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	wr32(E1000_SW_FW_SYNC, swfw_sync);
+
+	igb_put_hw_semaphore(hw);
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_release_swfw_sync_i210 - Release SW/FW semaphore
+ *  @hw: pointer to the HW structure
+ *  @mask: specifies which semaphore to acquire
+ *
+ *  Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ *  will also specify which port we're releasing the lock for.
+ **/
+void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	while (igb_get_hw_semaphore_i210(hw))
+		; /* Empty */
+
+	swfw_sync = rd32(E1000_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	wr32(E1000_SW_FW_SYNC, swfw_sync);
+
+	igb_put_hw_semaphore(hw);
+}
+
+/**
+ *  igb_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of word in the Shadow Ram to read
+ *  @words: number of words to read
+ *  @data: word read from the Shadow Ram
+ *
+ *  Reads a 16 bit word from the Shadow Ram using the EERD register.
+ *  Uses necessary synchronization semaphores.
+ **/
+static s32 igb_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words,
+				  u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
+			E1000_EERD_EEWR_MAX_COUNT : (words - i);
+		if (!(hw->nvm.ops.acquire(hw))) {
+			status = igb_read_nvm_eerd(hw, offset, count,
+						     data + i);
+			hw->nvm.ops.release(hw);
+		} else {
+			status = E1000_ERR_SWFW_SYNC;
+		}
+
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ *  igb_write_nvm_srwr - Write to Shadow Ram using EEWR
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the Shadow Ram to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the Shadow Ram
+ *
+ *  Writes data to Shadow Ram at offset using EEWR register.
+ *
+ *  If igb_update_nvm_checksum is not called after this function , the
+ *  Shadow Ram will most likely contain an invalid checksum.
+ **/
+static s32 igb_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words,
+				u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i, k, eewr = 0;
+	u32 attempts = 100000;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * too many words for the offset, and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) |
+			(data[i] << E1000_NVM_RW_REG_DATA) |
+			E1000_NVM_RW_REG_START;
+
+		wr32(E1000_SRWR, eewr);
+
+		for (k = 0; k < attempts; k++) {
+			if (E1000_NVM_RW_REG_DONE &
+			    rd32(E1000_SRWR)) {
+				ret_val = 0;
+				break;
+			}
+			udelay(5);
+	}
+
+		if (ret_val) {
+			hw_dbg("Shadow RAM write EEWR timed out\n");
+			break;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the Shadow RAM to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the Shadow RAM
+ *
+ *  Writes data to Shadow RAM at offset using EEWR register.
+ *
+ *  If e1000_update_nvm_checksum is not called after this function , the
+ *  data will not be committed to FLASH and also Shadow RAM will most likely
+ *  contain an invalid checksum.
+ *
+ *  If error code is returned, data and Shadow RAM may be inconsistent - buffer
+ *  partially written.
+ **/
+static s32 igb_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words,
+				   u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to write in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ?
+			E1000_EERD_EEWR_MAX_COUNT : (words - i);
+		if (!(hw->nvm.ops.acquire(hw))) {
+			status = igb_write_nvm_srwr(hw, offset, count,
+						      data + i);
+			hw->nvm.ops.release(hw);
+		} else {
+			status = E1000_ERR_SWFW_SYNC;
+		}
+
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ *  igb_read_invm_word_i210 - Reads OTP
+ *  @hw: pointer to the HW structure
+ *  @address: the word address (aka eeprom offset) to read
+ *  @data: pointer to the data read
+ *
+ *  Reads 16-bit words from the OTP. Return error when the word is not
+ *  stored in OTP.
+ **/
+static s32 igb_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data)
+{
+	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+	u32 invm_dword;
+	u16 i;
+	u8 record_type, word_address;
+
+	for (i = 0; i < E1000_INVM_SIZE; i++) {
+		invm_dword = rd32(E1000_INVM_DATA_REG(i));
+		/* Get record type */
+		record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword);
+		if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE)
+			break;
+		if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE)
+			i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS;
+		if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE)
+			i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS;
+		if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) {
+			word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword);
+			if (word_address == address) {
+				*data = INVM_DWORD_TO_WORD_DATA(invm_dword);
+				hw_dbg("Read INVM Word 0x%02x = %x\n",
+					  address, *data);
+				status = 0;
+				break;
+			}
+		}
+	}
+	if (status)
+		hw_dbg("Requested word 0x%02x not found in OTP\n", address);
+	return status;
+}
+
+/**
+ * igb_read_invm_i210 - Read invm wrapper function for I210/I211
+ *  @hw: pointer to the HW structure
+ *  @offset: offset to read from
+ *  @words: number of words to read (unused)
+ *  @data: pointer to the data read
+ *
+ *  Wrapper function to return data formerly found in the NVM.
+ **/
+static s32 igb_read_invm_i210(struct e1000_hw *hw, u16 offset,
+				u16 __always_unused words, u16 *data)
+{
+	s32 ret_val = 0;
+
+	/* Only the MAC addr is required to be present in the iNVM */
+	switch (offset) {
+	case NVM_MAC_ADDR:
+		ret_val = igb_read_invm_word_i210(hw, (u8)offset, &data[0]);
+		ret_val |= igb_read_invm_word_i210(hw, (u8)offset+1,
+						     &data[1]);
+		ret_val |= igb_read_invm_word_i210(hw, (u8)offset+2,
+						     &data[2]);
+		if (ret_val)
+			hw_dbg("MAC Addr not found in iNVM\n");
+		break;
+	case NVM_INIT_CTRL_2:
+		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val) {
+			*data = NVM_INIT_CTRL_2_DEFAULT_I211;
+			ret_val = 0;
+		}
+		break;
+	case NVM_INIT_CTRL_4:
+		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val) {
+			*data = NVM_INIT_CTRL_4_DEFAULT_I211;
+			ret_val = 0;
+		}
+		break;
+	case NVM_LED_1_CFG:
+		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val) {
+			*data = NVM_LED_1_CFG_DEFAULT_I211;
+			ret_val = 0;
+		}
+		break;
+	case NVM_LED_0_2_CFG:
+		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val) {
+			*data = NVM_LED_0_2_CFG_DEFAULT_I211;
+			ret_val = 0;
+		}
+		break;
+	case NVM_ID_LED_SETTINGS:
+		ret_val = igb_read_invm_word_i210(hw, (u8)offset, data);
+		if (ret_val) {
+			*data = ID_LED_RESERVED_FFFF;
+			ret_val = 0;
+		}
+		break;
+	case NVM_SUB_DEV_ID:
+		*data = hw->subsystem_device_id;
+		break;
+	case NVM_SUB_VEN_ID:
+		*data = hw->subsystem_vendor_id;
+		break;
+	case NVM_DEV_ID:
+		*data = hw->device_id;
+		break;
+	case NVM_VEN_ID:
+		*data = hw->vendor_id;
+		break;
+	default:
+		hw_dbg("NVM word 0x%02x is not mapped.\n", offset);
+		*data = NVM_RESERVED_WORD;
+		break;
+	}
+	return ret_val;
+}
+
+/**
+ *  igb_read_invm_version - Reads iNVM version and image type
+ *  @hw: pointer to the HW structure
+ *  @invm_ver: version structure for the version read
+ *
+ *  Reads iNVM version and image type.
+ **/
+s32 igb_read_invm_version(struct e1000_hw *hw,
+			  struct e1000_fw_version *invm_ver) {
+	u32 *record = NULL;
+	u32 *next_record = NULL;
+	u32 i = 0;
+	u32 invm_dword = 0;
+	u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE /
+					     E1000_INVM_RECORD_SIZE_IN_BYTES);
+	u32 buffer[E1000_INVM_SIZE];
+	s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+	u16 version = 0;
+
+	/* Read iNVM memory */
+	for (i = 0; i < E1000_INVM_SIZE; i++) {
+		invm_dword = rd32(E1000_INVM_DATA_REG(i));
+		buffer[i] = invm_dword;
+	}
+
+	/* Read version number */
+	for (i = 1; i < invm_blocks; i++) {
+		record = &buffer[invm_blocks - i];
+		next_record = &buffer[invm_blocks - i + 1];
+
+		/* Check if we have first version location used */
+		if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) {
+			version = 0;
+			status = 0;
+			break;
+		}
+		/* Check if we have second version location used */
+		else if ((i == 1) &&
+			 ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) {
+			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+			status = 0;
+			break;
+		}
+		/* Check if we have odd version location
+		 * used and it is the last one used
+		 */
+		else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) &&
+			 ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) &&
+			 (i != 1))) {
+			version = (*next_record & E1000_INVM_VER_FIELD_TWO)
+				  >> 13;
+			status = 0;
+			break;
+		}
+		/* Check if we have even version location
+		 * used and it is the last one used
+		 */
+		else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) &&
+			 ((*record & 0x3) == 0)) {
+			version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3;
+			status = 0;
+			break;
+		}
+	}
+
+	if (!status) {
+		invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK)
+					>> E1000_INVM_MAJOR_SHIFT;
+		invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK;
+	}
+	/* Read Image Type */
+	for (i = 1; i < invm_blocks; i++) {
+		record = &buffer[invm_blocks - i];
+		next_record = &buffer[invm_blocks - i + 1];
+
+		/* Check if we have image type in first location used */
+		if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) {
+			invm_ver->invm_img_type = 0;
+			status = 0;
+			break;
+		}
+		/* Check if we have image type in first location used */
+		else if ((((*record & 0x3) == 0) &&
+			 ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) ||
+			 ((((*record & 0x3) != 0) && (i != 1)))) {
+			invm_ver->invm_img_type =
+				(*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23;
+			status = 0;
+			break;
+		}
+	}
+	return status;
+}
+
+/**
+ *  igb_validate_nvm_checksum_i210 - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+static s32 igb_validate_nvm_checksum_i210(struct e1000_hw *hw)
+{
+	s32 status = 0;
+	s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *);
+
+	if (!(hw->nvm.ops.acquire(hw))) {
+
+		/* Replace the read function with semaphore grabbing with
+		 * the one that skips this for a while.
+		 * We have semaphore taken already here.
+		 */
+		read_op_ptr = hw->nvm.ops.read;
+		hw->nvm.ops.read = igb_read_nvm_eerd;
+
+		status = igb_validate_nvm_checksum(hw);
+
+		/* Revert original read operation. */
+		hw->nvm.ops.read = read_op_ptr;
+
+		hw->nvm.ops.release(hw);
+	} else {
+		status = E1000_ERR_SWFW_SYNC;
+	}
+
+	return status;
+}
+
+/**
+ *  igb_update_nvm_checksum_i210 - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM. Next commit EEPROM data onto the Flash.
+ **/
+static s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	ret_val = igb_read_nvm_eerd(hw, 0, 1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("EEPROM read failed\n");
+		goto out;
+	}
+
+	if (!(hw->nvm.ops.acquire(hw))) {
+		/* Do not use hw->nvm.ops.write, hw->nvm.ops.read
+		 * because we do not want to take the synchronization
+		 * semaphores twice here.
+		 */
+
+		for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+			ret_val = igb_read_nvm_eerd(hw, i, 1, &nvm_data);
+			if (ret_val) {
+				hw->nvm.ops.release(hw);
+				hw_dbg("NVM Read Error while updating checksum.\n");
+				goto out;
+			}
+			checksum += nvm_data;
+		}
+		checksum = (u16) NVM_SUM - checksum;
+		ret_val = igb_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
+						&checksum);
+		if (ret_val) {
+			hw->nvm.ops.release(hw);
+			hw_dbg("NVM Write Error while updating checksum.\n");
+			goto out;
+		}
+
+		hw->nvm.ops.release(hw);
+
+		ret_val = igb_update_flash_i210(hw);
+	} else {
+		ret_val = -E1000_ERR_SWFW_SYNC;
+	}
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_pool_flash_update_done_i210 - Pool FLUDONE status.
+ *  @hw: pointer to the HW structure
+ *
+ **/
+static s32 igb_pool_flash_update_done_i210(struct e1000_hw *hw)
+{
+	s32 ret_val = -E1000_ERR_NVM;
+	u32 i, reg;
+
+	for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) {
+		reg = rd32(E1000_EECD);
+		if (reg & E1000_EECD_FLUDONE_I210) {
+			ret_val = 0;
+			break;
+		}
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_get_flash_presence_i210 - Check if flash device is detected.
+ *  @hw: pointer to the HW structure
+ *
+ **/
+bool igb_get_flash_presence_i210(struct e1000_hw *hw)
+{
+	u32 eec = 0;
+	bool ret_val = false;
+
+	eec = rd32(E1000_EECD);
+	if (eec & E1000_EECD_FLASH_DETECTED_I210)
+		ret_val = true;
+
+	return ret_val;
+}
+
+/**
+ *  igb_update_flash_i210 - Commit EEPROM to the flash
+ *  @hw: pointer to the HW structure
+ *
+ **/
+static s32 igb_update_flash_i210(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 flup;
+
+	ret_val = igb_pool_flash_update_done_i210(hw);
+	if (ret_val == -E1000_ERR_NVM) {
+		hw_dbg("Flash update time out\n");
+		goto out;
+	}
+
+	flup = rd32(E1000_EECD) | E1000_EECD_FLUPD_I210;
+	wr32(E1000_EECD, flup);
+
+	ret_val = igb_pool_flash_update_done_i210(hw);
+	if (ret_val)
+		hw_dbg("Flash update time out\n");
+	else
+		hw_dbg("Flash update complete\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_valid_led_default_i210 - Verify a valid default LED config
+ *  @hw: pointer to the HW structure
+ *  @data: pointer to the NVM (EEPROM)
+ *
+ *  Read the EEPROM for the current default LED configuration.  If the
+ *  LED configuration is not valid, set to a valid LED configuration.
+ **/
+s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
+		switch (hw->phy.media_type) {
+		case e1000_media_type_internal_serdes:
+			*data = ID_LED_DEFAULT_I210_SERDES;
+			break;
+		case e1000_media_type_copper:
+		default:
+			*data = ID_LED_DEFAULT_I210;
+			break;
+		}
+	}
+out:
+	return ret_val;
+}
+
+/**
+ *  __igb_access_xmdio_reg - Read/write XMDIO register
+ *  @hw: pointer to the HW structure
+ *  @address: XMDIO address to program
+ *  @dev_addr: device address to program
+ *  @data: pointer to value to read/write from/to the XMDIO address
+ *  @read: boolean flag to indicate read or write
+ **/
+static s32 __igb_access_xmdio_reg(struct e1000_hw *hw, u16 address,
+				  u8 dev_addr, u16 *data, bool read)
+{
+	s32 ret_val = 0;
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA |
+							 dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data);
+	else
+		ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data);
+	if (ret_val)
+		return ret_val;
+
+	/* Recalibrate the device back to 0 */
+	ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0);
+	if (ret_val)
+		return ret_val;
+
+	return ret_val;
+}
+
+/**
+ *  igb_read_xmdio_reg - Read XMDIO register
+ *  @hw: pointer to the HW structure
+ *  @addr: XMDIO address to program
+ *  @dev_addr: device address to program
+ *  @data: value to be read from the EMI address
+ **/
+s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data)
+{
+	return __igb_access_xmdio_reg(hw, addr, dev_addr, data, true);
+}
+
+/**
+ *  igb_write_xmdio_reg - Write XMDIO register
+ *  @hw: pointer to the HW structure
+ *  @addr: XMDIO address to program
+ *  @dev_addr: device address to program
+ *  @data: value to be written to the XMDIO address
+ **/
+s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data)
+{
+	return __igb_access_xmdio_reg(hw, addr, dev_addr, &data, false);
+}
+
+/**
+ *  igb_init_nvm_params_i210 - Init NVM func ptrs.
+ *  @hw: pointer to the HW structure
+ **/
+s32 igb_init_nvm_params_i210(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+
+	nvm->ops.acquire = igb_acquire_nvm_i210;
+	nvm->ops.release = igb_release_nvm_i210;
+	nvm->ops.valid_led_default = igb_valid_led_default_i210;
+
+	/* NVM Function Pointers */
+	if (igb_get_flash_presence_i210(hw)) {
+		hw->nvm.type = e1000_nvm_flash_hw;
+		nvm->ops.read    = igb_read_nvm_srrd_i210;
+		nvm->ops.write   = igb_write_nvm_srwr_i210;
+		nvm->ops.validate = igb_validate_nvm_checksum_i210;
+		nvm->ops.update   = igb_update_nvm_checksum_i210;
+	} else {
+		hw->nvm.type = e1000_nvm_invm;
+		nvm->ops.read     = igb_read_invm_i210;
+		nvm->ops.write    = NULL;
+		nvm->ops.validate = NULL;
+		nvm->ops.update   = NULL;
+	}
+	return 0;
+}
+
+/**
+ * igb_pll_workaround_i210
+ * @hw: pointer to the HW structure
+ *
+ * Works around an errata in the PLL circuit where it occasionally
+ * provides the wrong clock frequency after power up.
+ **/
+s32 igb_pll_workaround_i210(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u32 wuc, mdicnfg, ctrl, ctrl_ext, reg_val;
+	u16 nvm_word, phy_word, pci_word, tmp_nvm;
+	int i;
+
+	/* Get and set needed register values */
+	wuc = rd32(E1000_WUC);
+	mdicnfg = rd32(E1000_MDICNFG);
+	reg_val = mdicnfg & ~E1000_MDICNFG_EXT_MDIO;
+	wr32(E1000_MDICNFG, reg_val);
+
+	/* Get data from NVM, or set default */
+	ret_val = igb_read_invm_word_i210(hw, E1000_INVM_AUTOLOAD,
+					  &nvm_word);
+	if (ret_val)
+		nvm_word = E1000_INVM_DEFAULT_AL;
+	tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL;
+	igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE);
+	phy_word = E1000_PHY_PLL_UNCONF;
+	for (i = 0; i < E1000_MAX_PLL_TRIES; i++) {
+		/* check current state directly from internal PHY */
+		igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, &phy_word);
+		if ((phy_word & E1000_PHY_PLL_UNCONF)
+		    != E1000_PHY_PLL_UNCONF) {
+			ret_val = 0;
+			break;
+		} else {
+			ret_val = -E1000_ERR_PHY;
+		}
+		/* directly reset the internal PHY */
+		ctrl = rd32(E1000_CTRL);
+		wr32(E1000_CTRL, ctrl|E1000_CTRL_PHY_RST);
+
+		ctrl_ext = rd32(E1000_CTRL_EXT);
+		ctrl_ext |= (E1000_CTRL_EXT_PHYPDEN | E1000_CTRL_EXT_SDLPE);
+		wr32(E1000_CTRL_EXT, ctrl_ext);
+
+		wr32(E1000_WUC, 0);
+		reg_val = (E1000_INVM_AUTOLOAD << 4) | (tmp_nvm << 16);
+		wr32(E1000_EEARBC_I210, reg_val);
+
+		igb_read_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word);
+		pci_word |= E1000_PCI_PMCSR_D3;
+		igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word);
+		usleep_range(1000, 2000);
+		pci_word &= ~E1000_PCI_PMCSR_D3;
+		igb_write_pci_cfg(hw, E1000_PCI_PMCSR, &pci_word);
+		reg_val = (E1000_INVM_AUTOLOAD << 4) | (nvm_word << 16);
+		wr32(E1000_EEARBC_I210, reg_val);
+
+		/* restore WUC register */
+		wr32(E1000_WUC, wuc);
+	}
+	igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, 0);
+	/* restore MDICNFG setting */
+	wr32(E1000_MDICNFG, mdicnfg);
+	return ret_val;
+}
+
+/**
+ *  igb_get_cfg_done_i210 - Read config done bit
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the management control register for the config done bit for
+ *  completion status.  NOTE: silicon which is EEPROM-less will fail trying
+ *  to read the config done bit, so an error is *ONLY* logged and returns
+ *  0.  If we were to return with error, EEPROM-less silicon
+ *  would not be able to be reset or change link.
+ **/
+s32 igb_get_cfg_done_i210(struct e1000_hw *hw)
+{
+	s32 timeout = PHY_CFG_TIMEOUT;
+	u32 mask = E1000_NVM_CFG_DONE_PORT_0;
+
+	while (timeout) {
+		if (rd32(E1000_EEMNGCTL_I210) & mask)
+			break;
+		usleep_range(1000, 2000);
+		timeout--;
+	}
+	if (!timeout)
+		hw_dbg("MNG configuration cycle has not completed.\n");
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.h b/drivers/net/ethernet/intel/igb/e1000_i210.h
new file mode 100644
index 0000000000..5c437fdc49
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_I210_H_
+#define _E1000_I210_H_
+
+s32 igb_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
+void igb_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask);
+s32 igb_valid_led_default_i210(struct e1000_hw *hw, u16 *data);
+s32 igb_read_invm_version(struct e1000_hw *hw,
+			  struct e1000_fw_version *invm_ver);
+s32 igb_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data);
+s32 igb_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data);
+s32 igb_init_nvm_params_i210(struct e1000_hw *hw);
+bool igb_get_flash_presence_i210(struct e1000_hw *hw);
+s32 igb_pll_workaround_i210(struct e1000_hw *hw);
+s32 igb_get_cfg_done_i210(struct e1000_hw *hw);
+
+#define E1000_STM_OPCODE		0xDB00
+#define E1000_EEPROM_FLASH_SIZE_WORD	0x11
+
+#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \
+	(u8)((invm_dword) & 0x7)
+#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \
+	(u8)(((invm_dword) & 0x0000FE00) >> 9)
+#define INVM_DWORD_TO_WORD_DATA(invm_dword) \
+	(u16)(((invm_dword) & 0xFFFF0000) >> 16)
+
+enum E1000_INVM_STRUCTURE_TYPE {
+	E1000_INVM_UNINITIALIZED_STRUCTURE		= 0x00,
+	E1000_INVM_WORD_AUTOLOAD_STRUCTURE		= 0x01,
+	E1000_INVM_CSR_AUTOLOAD_STRUCTURE		= 0x02,
+	E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE	= 0x03,
+	E1000_INVM_RSA_KEY_SHA256_STRUCTURE		= 0x04,
+	E1000_INVM_INVALIDATED_STRUCTURE		= 0x0F,
+};
+
+#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS	8
+#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS	1
+#define E1000_INVM_ULT_BYTES_SIZE			8
+#define E1000_INVM_RECORD_SIZE_IN_BYTES			4
+#define E1000_INVM_VER_FIELD_ONE			0x1FF8
+#define E1000_INVM_VER_FIELD_TWO			0x7FE000
+#define E1000_INVM_IMGTYPE_FIELD			0x1F800000
+
+#define E1000_INVM_MAJOR_MASK		0x3F0
+#define E1000_INVM_MINOR_MASK		0xF
+#define E1000_INVM_MAJOR_SHIFT		4
+
+#define ID_LED_DEFAULT_I210		((ID_LED_OFF1_ON2  << 8) | \
+					 (ID_LED_DEF1_DEF2 <<  4) | \
+					 (ID_LED_OFF1_OFF2))
+#define ID_LED_DEFAULT_I210_SERDES	((ID_LED_DEF1_DEF2 << 8) | \
+					 (ID_LED_DEF1_DEF2 <<  4) | \
+					 (ID_LED_OFF1_ON2))
+
+/* NVM offset defaults for i211 device */
+#define NVM_INIT_CTRL_2_DEFAULT_I211	0X7243
+#define NVM_INIT_CTRL_4_DEFAULT_I211	0x00C1
+#define NVM_LED_1_CFG_DEFAULT_I211	0x0184
+#define NVM_LED_0_2_CFG_DEFAULT_I211	0x200C
+
+/* PLL Defines */
+#define E1000_PCI_PMCSR			0x44
+#define E1000_PCI_PMCSR_D3		0x03
+#define E1000_MAX_PLL_TRIES		5
+#define E1000_PHY_PLL_UNCONF		0xFF
+#define E1000_PHY_PLL_FREQ_PAGE		0xFC
+#define E1000_PHY_PLL_FREQ_REG		0x000E
+#define E1000_INVM_DEFAULT_AL		0x202F
+#define E1000_INVM_AUTOLOAD		0x0A
+#define E1000_INVM_PLL_WO_VAL		0x0010
+
+#endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
new file mode 100644
index 0000000000..caf91c6f52
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -0,0 +1,1685 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#include <linux/if_ether.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+
+#include "e1000_mac.h"
+
+#include "igb.h"
+
+static s32 igb_set_default_fc(struct e1000_hw *hw);
+static void igb_set_fc_watermarks(struct e1000_hw *hw);
+
+/**
+ *  igb_get_bus_info_pcie - Get PCIe bus information
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines and stores the system bus information for a particular
+ *  network interface.  The following bus information is determined and stored:
+ *  bus speed, bus width, type (PCIe), and PCIe function.
+ **/
+s32 igb_get_bus_info_pcie(struct e1000_hw *hw)
+{
+	struct e1000_bus_info *bus = &hw->bus;
+	s32 ret_val;
+	u32 reg;
+	u16 pcie_link_status;
+
+	bus->type = e1000_bus_type_pci_express;
+
+	ret_val = igb_read_pcie_cap_reg(hw,
+					PCI_EXP_LNKSTA,
+					&pcie_link_status);
+	if (ret_val) {
+		bus->width = e1000_bus_width_unknown;
+		bus->speed = e1000_bus_speed_unknown;
+	} else {
+		switch (pcie_link_status & PCI_EXP_LNKSTA_CLS) {
+		case PCI_EXP_LNKSTA_CLS_2_5GB:
+			bus->speed = e1000_bus_speed_2500;
+			break;
+		case PCI_EXP_LNKSTA_CLS_5_0GB:
+			bus->speed = e1000_bus_speed_5000;
+			break;
+		default:
+			bus->speed = e1000_bus_speed_unknown;
+			break;
+		}
+
+		bus->width = (enum e1000_bus_width)((pcie_link_status &
+						     PCI_EXP_LNKSTA_NLW) >>
+						     PCI_EXP_LNKSTA_NLW_SHIFT);
+	}
+
+	reg = rd32(E1000_STATUS);
+	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
+
+	return 0;
+}
+
+/**
+ *  igb_clear_vfta - Clear VLAN filter table
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the register array which contains the VLAN filter table by
+ *  setting all the values to 0.
+ **/
+void igb_clear_vfta(struct e1000_hw *hw)
+{
+	u32 offset;
+
+	for (offset = E1000_VLAN_FILTER_TBL_SIZE; offset--;)
+		hw->mac.ops.write_vfta(hw, offset, 0);
+}
+
+/**
+ *  igb_write_vfta - Write value to VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset in VLAN filter table
+ *  @value: register value written to VLAN filter table
+ *
+ *  Writes value at the given offset in the register array which stores
+ *  the VLAN filter table.
+ **/
+void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	struct igb_adapter *adapter = hw->back;
+
+	array_wr32(E1000_VFTA, offset, value);
+	wrfl();
+
+	adapter->shadow_vfta[offset] = value;
+}
+
+/**
+ *  igb_init_rx_addrs - Initialize receive address's
+ *  @hw: pointer to the HW structure
+ *  @rar_count: receive address registers
+ *
+ *  Setups the receive address registers by setting the base receive address
+ *  register to the devices MAC address and clearing all the other receive
+ *  address registers to 0.
+ **/
+void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count)
+{
+	u32 i;
+	u8 mac_addr[ETH_ALEN] = {0};
+
+	/* Setup the receive address */
+	hw_dbg("Programming MAC Address into RAR[0]\n");
+
+	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+	/* Zero out the other (rar_entry_count - 1) receive addresses */
+	hw_dbg("Clearing RAR[1-%u]\n", rar_count-1);
+	for (i = 1; i < rar_count; i++)
+		hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ *  igb_find_vlvf_slot - find the VLAN id or the first empty slot
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vlvf_bypass: skip VLVF if no match is found
+ *
+ *  return the VLVF index where this VLAN id should be placed
+ *
+ **/
+static s32 igb_find_vlvf_slot(struct e1000_hw *hw, u32 vlan, bool vlvf_bypass)
+{
+	s32 regindex, first_empty_slot;
+	u32 bits;
+
+	/* short cut the special case */
+	if (vlan == 0)
+		return 0;
+
+	/* if vlvf_bypass is set we don't want to use an empty slot, we
+	 * will simply bypass the VLVF if there are no entries present in the
+	 * VLVF that contain our VLAN
+	 */
+	first_empty_slot = vlvf_bypass ? -E1000_ERR_NO_SPACE : 0;
+
+	/* Search for the VLAN id in the VLVF entries. Save off the first empty
+	 * slot found along the way.
+	 *
+	 * pre-decrement loop covering (IXGBE_VLVF_ENTRIES - 1) .. 1
+	 */
+	for (regindex = E1000_VLVF_ARRAY_SIZE; --regindex > 0;) {
+		bits = rd32(E1000_VLVF(regindex)) & E1000_VLVF_VLANID_MASK;
+		if (bits == vlan)
+			return regindex;
+		if (!first_empty_slot && !bits)
+			first_empty_slot = regindex;
+	}
+
+	return first_empty_slot ? : -E1000_ERR_NO_SPACE;
+}
+
+/**
+ *  igb_vfta_set - enable or disable vlan in VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @vlan: VLAN id to add or remove
+ *  @vind: VMDq output index that maps queue to VLAN id
+ *  @vlan_on: if true add filter, if false remove
+ *  @vlvf_bypass: skip VLVF if no match is found
+ *
+ *  Sets or clears a bit in the VLAN filter table array based on VLAN id
+ *  and if we are adding or removing the filter
+ **/
+s32 igb_vfta_set(struct e1000_hw *hw, u32 vlan, u32 vind,
+		 bool vlan_on, bool vlvf_bypass)
+{
+	struct igb_adapter *adapter = hw->back;
+	u32 regidx, vfta_delta, vfta, bits;
+	s32 vlvf_index;
+
+	if ((vlan > 4095) || (vind > 7))
+		return -E1000_ERR_PARAM;
+
+	/* this is a 2 part operation - first the VFTA, then the
+	 * VLVF and VLVFB if VT Mode is set
+	 * We don't write the VFTA until we know the VLVF part succeeded.
+	 */
+
+	/* Part 1
+	 * The VFTA is a bitstring made up of 128 32-bit registers
+	 * that enable the particular VLAN id, much like the MTA:
+	 *    bits[11-5]: which register
+	 *    bits[4-0]:  which bit in the register
+	 */
+	regidx = vlan / 32;
+	vfta_delta = BIT(vlan % 32);
+	vfta = adapter->shadow_vfta[regidx];
+
+	/* vfta_delta represents the difference between the current value
+	 * of vfta and the value we want in the register.  Since the diff
+	 * is an XOR mask we can just update vfta using an XOR.
+	 */
+	vfta_delta &= vlan_on ? ~vfta : vfta;
+	vfta ^= vfta_delta;
+
+	/* Part 2
+	 * If VT Mode is set
+	 *   Either vlan_on
+	 *     make sure the VLAN is in VLVF
+	 *     set the vind bit in the matching VLVFB
+	 *   Or !vlan_on
+	 *     clear the pool bit and possibly the vind
+	 */
+	if (!adapter->vfs_allocated_count)
+		goto vfta_update;
+
+	vlvf_index = igb_find_vlvf_slot(hw, vlan, vlvf_bypass);
+	if (vlvf_index < 0) {
+		if (vlvf_bypass)
+			goto vfta_update;
+		return vlvf_index;
+	}
+
+	bits = rd32(E1000_VLVF(vlvf_index));
+
+	/* set the pool bit */
+	bits |= BIT(E1000_VLVF_POOLSEL_SHIFT + vind);
+	if (vlan_on)
+		goto vlvf_update;
+
+	/* clear the pool bit */
+	bits ^= BIT(E1000_VLVF_POOLSEL_SHIFT + vind);
+
+	if (!(bits & E1000_VLVF_POOLSEL_MASK)) {
+		/* Clear VFTA first, then disable VLVF.  Otherwise
+		 * we run the risk of stray packets leaking into
+		 * the PF via the default pool
+		 */
+		if (vfta_delta)
+			hw->mac.ops.write_vfta(hw, regidx, vfta);
+
+		/* disable VLVF and clear remaining bit from pool */
+		wr32(E1000_VLVF(vlvf_index), 0);
+
+		return 0;
+	}
+
+	/* If there are still bits set in the VLVFB registers
+	 * for the VLAN ID indicated we need to see if the
+	 * caller is requesting that we clear the VFTA entry bit.
+	 * If the caller has requested that we clear the VFTA
+	 * entry bit but there are still pools/VFs using this VLAN
+	 * ID entry then ignore the request.  We're not worried
+	 * about the case where we're turning the VFTA VLAN ID
+	 * entry bit on, only when requested to turn it off as
+	 * there may be multiple pools and/or VFs using the
+	 * VLAN ID entry.  In that case we cannot clear the
+	 * VFTA bit until all pools/VFs using that VLAN ID have also
+	 * been cleared.  This will be indicated by "bits" being
+	 * zero.
+	 */
+	vfta_delta = 0;
+
+vlvf_update:
+	/* record pool change and enable VLAN ID if not already enabled */
+	wr32(E1000_VLVF(vlvf_index), bits | vlan | E1000_VLVF_VLANID_ENABLE);
+
+vfta_update:
+	/* bit was set/cleared before we started */
+	if (vfta_delta)
+		hw->mac.ops.write_vfta(hw, regidx, vfta);
+
+	return 0;
+}
+
+/**
+ *  igb_check_alt_mac_addr - Check for alternate MAC addr
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks the nvm for an alternate MAC address.  An alternate MAC address
+ *  can be setup by pre-boot software and must be treated like a permanent
+ *  address and must override the actual permanent MAC address.  If an
+ *  alternate MAC address is found it is saved in the hw struct and
+ *  programmed into RAR0 and the function returns success, otherwise the
+ *  function returns an error.
+ **/
+s32 igb_check_alt_mac_addr(struct e1000_hw *hw)
+{
+	u32 i;
+	s32 ret_val = 0;
+	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
+	u8 alt_mac_addr[ETH_ALEN];
+
+	/* Alternate MAC address is handled by the option ROM for 82580
+	 * and newer. SW support not required.
+	 */
+	if (hw->mac.type >= e1000_82580)
+		goto out;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1,
+				 &nvm_alt_mac_addr_offset);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	if ((nvm_alt_mac_addr_offset == 0xFFFF) ||
+	    (nvm_alt_mac_addr_offset == 0x0000))
+		/* There is no Alternate MAC Address */
+		goto out;
+
+	if (hw->bus.func == E1000_FUNC_1)
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1;
+	if (hw->bus.func == E1000_FUNC_2)
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2;
+
+	if (hw->bus.func == E1000_FUNC_3)
+		nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3;
+	for (i = 0; i < ETH_ALEN; i += 2) {
+		offset = nvm_alt_mac_addr_offset + (i >> 1);
+		ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+
+		alt_mac_addr[i] = (u8)(nvm_data & 0xFF);
+		alt_mac_addr[i + 1] = (u8)(nvm_data >> 8);
+	}
+
+	/* if multicast bit is set, the alternate address will not be used */
+	if (is_multicast_ether_addr(alt_mac_addr)) {
+		hw_dbg("Ignoring Alternate Mac Address with MC bit set\n");
+		goto out;
+	}
+
+	/* We have a valid alternate MAC address, and we want to treat it the
+	 * same as the normal permanent MAC address stored by the HW into the
+	 * RAR. Do this by mapping this address into RAR0.
+	 */
+	hw->mac.ops.rar_set(hw, alt_mac_addr, 0);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_rar_set - Set receive address register
+ *  @hw: pointer to the HW structure
+ *  @addr: pointer to the receive address
+ *  @index: receive address array register
+ *
+ *  Sets the receive address array register at index to the address passed
+ *  in by addr.
+ **/
+void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32) addr[0] |
+		   ((u32) addr[1] << 8) |
+		    ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
+
+	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= E1000_RAH_AV;
+
+	/* Some bridges will combine consecutive 32-bit writes into
+	 * a single burst write, which will malfunction on some parts.
+	 * The flushes avoid this.
+	 */
+	wr32(E1000_RAL(index), rar_low);
+	wrfl();
+	wr32(E1000_RAH(index), rar_high);
+	wrfl();
+}
+
+/**
+ *  igb_mta_set - Set multicast filter table address
+ *  @hw: pointer to the HW structure
+ *  @hash_value: determines the MTA register and bit to set
+ *
+ *  The multicast table address is a register array of 32-bit registers.
+ *  The hash_value is used to determine what register the bit is in, the
+ *  current value is read, the new bit is OR'd in and the new value is
+ *  written back into the register.
+ **/
+void igb_mta_set(struct e1000_hw *hw, u32 hash_value)
+{
+	u32 hash_bit, hash_reg, mta;
+
+	/* The MTA is a register array of 32-bit registers. It is
+	 * treated like an array of (32*mta_reg_count) bits.  We want to
+	 * set bit BitArray[hash_value]. So we figure out what register
+	 * the bit is in, read it, OR in the new bit, then write
+	 * back the new value.  The (hw->mac.mta_reg_count - 1) serves as a
+	 * mask to bits 31:5 of the hash value which gives us the
+	 * register we're modifying.  The hash bit within that register
+	 * is determined by the lower 5 bits of the hash value.
+	 */
+	hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+	hash_bit = hash_value & 0x1F;
+
+	mta = array_rd32(E1000_MTA, hash_reg);
+
+	mta |= BIT(hash_bit);
+
+	array_wr32(E1000_MTA, hash_reg, mta);
+	wrfl();
+}
+
+/**
+ *  igb_hash_mc_addr - Generate a multicast hash value
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: pointer to a multicast address
+ *
+ *  Generates a multicast address hash value which is used to determine
+ *  the multicast filter table array address and new table value.  See
+ *  igb_mta_set()
+ **/
+static u32 igb_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr)
+{
+	u32 hash_value, hash_mask;
+	u8 bit_shift = 1;
+
+	/* Register count multiplied by bits per register */
+	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+	/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+	 * where 0xFF would still fall within the hash mask.
+	 */
+	while (hash_mask >> bit_shift != 0xFF && bit_shift < 4)
+		bit_shift++;
+
+	/* The portion of the address that is used for the hash table
+	 * is determined by the mc_filter_type setting.
+	 * The algorithm is such that there is a total of 8 bits of shifting.
+	 * The bit_shift for a mc_filter_type of 0 represents the number of
+	 * left-shifts where the MSB of mc_addr[5] would still fall within
+	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
+	 * of 8 bits of shifting, then mc_addr[4] will shift right the
+	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
+	 * cases are a variation of this algorithm...essentially raising the
+	 * number of bits to shift mc_addr[5] left, while still keeping the
+	 * 8-bit shifting total.
+	 *
+	 * For example, given the following Destination MAC Address and an
+	 * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask),
+	 * we can see that the bit_shift for case 0 is 4.  These are the hash
+	 * values resulting from each mc_filter_type...
+	 * [0] [1] [2] [3] [4] [5]
+	 * 01  AA  00  12  34  56
+	 * LSB                 MSB
+	 *
+	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
+	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
+	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
+	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
+	 */
+	switch (hw->mac.mc_filter_type) {
+	default:
+	case 0:
+		break;
+	case 1:
+		bit_shift += 1;
+		break;
+	case 2:
+		bit_shift += 2;
+		break;
+	case 3:
+		bit_shift += 4;
+		break;
+	}
+
+	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+				  (((u16) mc_addr[5]) << bit_shift)));
+
+	return hash_value;
+}
+
+/**
+ * igb_i21x_hw_doublecheck - double checks potential HW issue in i21X
+ * @hw: pointer to the HW structure
+ *
+ * Checks if multicast array is wrote correctly
+ * If not then rewrites again to register
+ **/
+static void igb_i21x_hw_doublecheck(struct e1000_hw *hw)
+{
+	int failed_cnt = 3;
+	bool is_failed;
+	int i;
+
+	do {
+		is_failed = false;
+		for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) {
+			if (array_rd32(E1000_MTA, i) != hw->mac.mta_shadow[i]) {
+				is_failed = true;
+				array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
+				wrfl();
+			}
+		}
+		if (is_failed && --failed_cnt <= 0) {
+			hw_dbg("Failed to update MTA_REGISTER, too many retries");
+			break;
+		}
+	} while (is_failed);
+}
+
+/**
+ *  igb_update_mc_addr_list - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates entire Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void igb_update_mc_addr_list(struct e1000_hw *hw,
+			     u8 *mc_addr_list, u32 mc_addr_count)
+{
+	u32 hash_value, hash_bit, hash_reg;
+	int i;
+
+	/* clear mta_shadow */
+	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+	/* update mta_shadow from mc_addr_list */
+	for (i = 0; (u32) i < mc_addr_count; i++) {
+		hash_value = igb_hash_mc_addr(hw, mc_addr_list);
+
+		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+		hash_bit = hash_value & 0x1F;
+
+		hw->mac.mta_shadow[hash_reg] |= BIT(hash_bit);
+		mc_addr_list += (ETH_ALEN);
+	}
+
+	/* replace the entire MTA table */
+	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+		array_wr32(E1000_MTA, i, hw->mac.mta_shadow[i]);
+	wrfl();
+	if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211)
+		igb_i21x_hw_doublecheck(hw);
+}
+
+/**
+ *  igb_clear_hw_cntrs_base - Clear base hardware counters
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the base hardware counters by reading the counter registers.
+ **/
+void igb_clear_hw_cntrs_base(struct e1000_hw *hw)
+{
+	rd32(E1000_CRCERRS);
+	rd32(E1000_SYMERRS);
+	rd32(E1000_MPC);
+	rd32(E1000_SCC);
+	rd32(E1000_ECOL);
+	rd32(E1000_MCC);
+	rd32(E1000_LATECOL);
+	rd32(E1000_COLC);
+	rd32(E1000_DC);
+	rd32(E1000_SEC);
+	rd32(E1000_RLEC);
+	rd32(E1000_XONRXC);
+	rd32(E1000_XONTXC);
+	rd32(E1000_XOFFRXC);
+	rd32(E1000_XOFFTXC);
+	rd32(E1000_FCRUC);
+	rd32(E1000_GPRC);
+	rd32(E1000_BPRC);
+	rd32(E1000_MPRC);
+	rd32(E1000_GPTC);
+	rd32(E1000_GORCL);
+	rd32(E1000_GORCH);
+	rd32(E1000_GOTCL);
+	rd32(E1000_GOTCH);
+	rd32(E1000_RNBC);
+	rd32(E1000_RUC);
+	rd32(E1000_RFC);
+	rd32(E1000_ROC);
+	rd32(E1000_RJC);
+	rd32(E1000_TORL);
+	rd32(E1000_TORH);
+	rd32(E1000_TOTL);
+	rd32(E1000_TOTH);
+	rd32(E1000_TPR);
+	rd32(E1000_TPT);
+	rd32(E1000_MPTC);
+	rd32(E1000_BPTC);
+}
+
+/**
+ *  igb_check_for_copper_link - Check for link (Copper)
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks to see of the link status of the hardware has changed.  If a
+ *  change in link status has been detected, then we read the PHY registers
+ *  to get the current speed/duplex if link exists.
+ **/
+s32 igb_check_for_copper_link(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	bool link;
+
+	/* We only want to go out to the PHY registers to see if Auto-Neg
+	 * has completed and/or if our link status has changed.  The
+	 * get_link_status flag is set upon receiving a Link Status
+	 * Change or Rx Sequence Error interrupt.
+	 */
+	if (!mac->get_link_status) {
+		ret_val = 0;
+		goto out;
+	}
+
+	/* First we want to see if the MII Status Register reports
+	 * link.  If so, then we want to get the current speed/duplex
+	 * of the PHY.
+	 */
+	ret_val = igb_phy_has_link(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (!link)
+		goto out; /* No link detected */
+
+	mac->get_link_status = false;
+
+	/* Check if there was DownShift, must be checked
+	 * immediately after link-up
+	 */
+	igb_check_downshift(hw);
+
+	/* If we are forcing speed/duplex, then we simply return since
+	 * we have already determined whether we have link or not.
+	 */
+	if (!mac->autoneg) {
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	/* Auto-Neg is enabled.  Auto Speed Detection takes care
+	 * of MAC speed/duplex configuration.  So we only need to
+	 * configure Collision Distance in the MAC.
+	 */
+	igb_config_collision_dist(hw);
+
+	/* Configure Flow Control now that Auto-Neg has completed.
+	 * First, we need to restore the desired flow control
+	 * settings because we may have had to re-autoneg with a
+	 * different link partner.
+	 */
+	ret_val = igb_config_fc_after_link_up(hw);
+	if (ret_val)
+		hw_dbg("Error configuring flow control\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_setup_link - Setup flow control and link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines which flow control settings to use, then configures flow
+ *  control.  Calls the appropriate media-specific link configuration
+ *  function.  Assuming the adapter has a valid link partner, a valid link
+ *  should be established.  Assumes the hardware has previously been reset
+ *  and the transmitter and receiver are not enabled.
+ **/
+s32 igb_setup_link(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+
+	/* In the case of the phy reset being blocked, we already have a link.
+	 * We do not need to set it up again.
+	 */
+	if (igb_check_reset_block(hw))
+		goto out;
+
+	/* If requested flow control is set to default, set flow control
+	 * based on the EEPROM flow control settings.
+	 */
+	if (hw->fc.requested_mode == e1000_fc_default) {
+		ret_val = igb_set_default_fc(hw);
+		if (ret_val)
+			goto out;
+	}
+
+	/* We want to save off the original Flow Control configuration just
+	 * in case we get disconnected and then reconnected into a different
+	 * hub or switch with different Flow Control capabilities.
+	 */
+	hw->fc.current_mode = hw->fc.requested_mode;
+
+	hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
+
+	/* Call the necessary media_type subroutine to configure the link. */
+	ret_val = hw->mac.ops.setup_physical_interface(hw);
+	if (ret_val)
+		goto out;
+
+	/* Initialize the flow control address, type, and PAUSE timer
+	 * registers to their default values.  This is done even if flow
+	 * control is disabled, because it does not hurt anything to
+	 * initialize these registers.
+	 */
+	hw_dbg("Initializing the Flow Control address, type and timer regs\n");
+	wr32(E1000_FCT, FLOW_CONTROL_TYPE);
+	wr32(E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+	wr32(E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+	wr32(E1000_FCTTV, hw->fc.pause_time);
+
+	igb_set_fc_watermarks(hw);
+
+out:
+
+	return ret_val;
+}
+
+/**
+ *  igb_config_collision_dist - Configure collision distance
+ *  @hw: pointer to the HW structure
+ *
+ *  Configures the collision distance to the default value and is used
+ *  during link setup. Currently no func pointer exists and all
+ *  implementations are handled in the generic version of this function.
+ **/
+void igb_config_collision_dist(struct e1000_hw *hw)
+{
+	u32 tctl;
+
+	tctl = rd32(E1000_TCTL);
+
+	tctl &= ~E1000_TCTL_COLD;
+	tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT;
+
+	wr32(E1000_TCTL, tctl);
+	wrfl();
+}
+
+/**
+ *  igb_set_fc_watermarks - Set flow control high/low watermarks
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets the flow control high/low threshold (watermark) registers.  If
+ *  flow control XON frame transmission is enabled, then set XON frame
+ *  tansmission as well.
+ **/
+static void igb_set_fc_watermarks(struct e1000_hw *hw)
+{
+	u32 fcrtl = 0, fcrth = 0;
+
+	/* Set the flow control receive threshold registers.  Normally,
+	 * these registers will be set to a default threshold that may be
+	 * adjusted later by the driver's runtime code.  However, if the
+	 * ability to transmit pause frames is not enabled, then these
+	 * registers will be set to 0.
+	 */
+	if (hw->fc.current_mode & e1000_fc_tx_pause) {
+		/* We need to set up the Receive Threshold high and low water
+		 * marks as well as (optionally) enabling the transmission of
+		 * XON frames.
+		 */
+		fcrtl = hw->fc.low_water;
+		if (hw->fc.send_xon)
+			fcrtl |= E1000_FCRTL_XONE;
+
+		fcrth = hw->fc.high_water;
+	}
+	wr32(E1000_FCRTL, fcrtl);
+	wr32(E1000_FCRTH, fcrth);
+}
+
+/**
+ *  igb_set_default_fc - Set flow control default values
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the EEPROM for the default values for flow control and store the
+ *  values.
+ **/
+static s32 igb_set_default_fc(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 lan_offset;
+	u16 nvm_data;
+
+	/* Read and store word 0x0F of the EEPROM. This word contains bits
+	 * that determine the hardware's default PAUSE (flow control) mode,
+	 * a bit that determines whether the HW defaults to enabling or
+	 * disabling auto-negotiation, and the direction of the
+	 * SW defined pins. If there is no SW over-ride of the flow
+	 * control setting, then the variable hw->fc will
+	 * be initialized based on a value in the EEPROM.
+	 */
+	if (hw->mac.type == e1000_i350)
+		lan_offset = NVM_82580_LAN_FUNC_OFFSET(hw->bus.func);
+	else
+		lan_offset = 0;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG + lan_offset,
+				   1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == 0)
+		hw->fc.requested_mode = e1000_fc_none;
+	else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == NVM_WORD0F_ASM_DIR)
+		hw->fc.requested_mode = e1000_fc_tx_pause;
+	else
+		hw->fc.requested_mode = e1000_fc_full;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_force_mac_fc - Force the MAC's flow control settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
+ *  device control register to reflect the adapter settings.  TFCE and RFCE
+ *  need to be explicitly set by software when a copper PHY is used because
+ *  autonegotiation is managed by the PHY rather than the MAC.  Software must
+ *  also configure these bits when link is forced on a fiber connection.
+ **/
+s32 igb_force_mac_fc(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 ret_val = 0;
+
+	ctrl = rd32(E1000_CTRL);
+
+	/* Because we didn't get link via the internal auto-negotiation
+	 * mechanism (we either forced link or we got link via PHY
+	 * auto-neg), we have to manually enable/disable transmit an
+	 * receive flow control.
+	 *
+	 * The "Case" statement below enables/disable flow control
+	 * according to the "hw->fc.current_mode" parameter.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause
+	 *          frames but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not receive pause frames).
+	 *      3:  Both Rx and TX flow control (symmetric) is enabled.
+	 *  other:  No other values should be possible at this point.
+	 */
+	hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE));
+		break;
+	case e1000_fc_rx_pause:
+		ctrl &= (~E1000_CTRL_TFCE);
+		ctrl |= E1000_CTRL_RFCE;
+		break;
+	case e1000_fc_tx_pause:
+		ctrl &= (~E1000_CTRL_RFCE);
+		ctrl |= E1000_CTRL_TFCE;
+		break;
+	case e1000_fc_full:
+		ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	wr32(E1000_CTRL, ctrl);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_config_fc_after_link_up - Configures flow control after link
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks the status of auto-negotiation after link up to ensure that the
+ *  speed and duplex were not forced.  If the link needed to be forced, then
+ *  flow control needs to be forced also.  If auto-negotiation is enabled
+ *  and did not fail, then we configure flow control based on our link
+ *  partner.
+ **/
+s32 igb_config_fc_after_link_up(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val = 0;
+	u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg;
+	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+	u16 speed, duplex;
+
+	/* Check for the case where we have fiber media and auto-neg failed
+	 * so we had to force link.  In this case, we need to force the
+	 * configuration of the MAC to match the "fc" parameter.
+	 */
+	if (mac->autoneg_failed) {
+		if (hw->phy.media_type == e1000_media_type_internal_serdes)
+			ret_val = igb_force_mac_fc(hw);
+	} else {
+		if (hw->phy.media_type == e1000_media_type_copper)
+			ret_val = igb_force_mac_fc(hw);
+	}
+
+	if (ret_val) {
+		hw_dbg("Error forcing flow control settings\n");
+		goto out;
+	}
+
+	/* Check for the case where we have copper media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) {
+		/* Read the MII Status Register and check to see if AutoNeg
+		 * has completed.  We read this twice because this reg has
+		 * some "sticky" (latched) bits.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+						   &mii_status_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+						   &mii_status_reg);
+		if (ret_val)
+			goto out;
+
+		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
+			hw_dbg("Copper PHY and Auto Neg has not completed.\n");
+			goto out;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (Address 4) and the Auto_Negotiation Base
+		 * Page Ability Register (Address 5) to determine how
+		 * flow control was negotiated.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
+					    &mii_nway_adv_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
+					    &mii_nway_lp_ability_reg);
+		if (ret_val)
+			goto out;
+
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (Address 4) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (Address 5) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
+		 *   0   |    1    |   0   |   DC    | e1000_fc_none
+		 *   0   |    1    |   1   |    0    | e1000_fc_none
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | e1000_fc_none
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *   1   |    1    |   0   |    0    | e1000_fc_none
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | E1000_fc_full
+		 *
+		 */
+		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+			/* Now we need to check if the user selected RX ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise RX
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF  the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == e1000_fc_full) {
+				hw->fc.current_mode = e1000_fc_full;
+				hw_dbg("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = e1000_fc_rx_pause;
+				hw_dbg("Flow Control = RX PAUSE frames only.\n");
+			}
+		}
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 */
+		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			  (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			  (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			  (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_tx_pause;
+			hw_dbg("Flow Control = TX PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 */
+		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+		/* Per the IEEE spec, at this point flow control should be
+		 * disabled.  However, we want to consider that we could
+		 * be connected to a legacy switch that doesn't advertise
+		 * desired flow control, but can be forced on the link
+		 * partner.  So if we advertised no flow control, that is
+		 * what we will resolve to.  If we advertised some kind of
+		 * receive capability (Rx Pause Only or Full Flow Control)
+		 * and the link partner advertised none, we will configure
+		 * ourselves to enable Rx Flow Control only.  We can do
+		 * this safely for two reasons:  If the link partner really
+		 * didn't want flow control enabled, and we enable Rx, no
+		 * harm done since we won't be receiving any PAUSE frames
+		 * anyway.  If the intent on the link partner was to have
+		 * flow control enabled, then by us enabling RX only, we
+		 * can at least receive pause frames and process them.
+		 * This is a good idea because in most cases, since we are
+		 * predominantly a server NIC, more times than not we will
+		 * be asked to delay transmission of packets than asking
+		 * our link partner to pause transmission of frames.
+		 */
+		else if ((hw->fc.requested_mode == e1000_fc_none) ||
+			 (hw->fc.requested_mode == e1000_fc_tx_pause) ||
+			 (hw->fc.strict_ieee)) {
+			hw->fc.current_mode = e1000_fc_none;
+			hw_dbg("Flow Control = NONE.\n");
+		} else {
+			hw->fc.current_mode = e1000_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+
+		/* Now we need to do one last check...  If we auto-
+		 * negotiated to HALF DUPLEX, flow control should not be
+		 * enabled per IEEE 802.3 spec.
+		 */
+		ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex);
+		if (ret_val) {
+			hw_dbg("Error getting link speed and duplex\n");
+			goto out;
+		}
+
+		if (duplex == HALF_DUPLEX)
+			hw->fc.current_mode = e1000_fc_none;
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		ret_val = igb_force_mac_fc(hw);
+		if (ret_val) {
+			hw_dbg("Error forcing flow control settings\n");
+			goto out;
+		}
+	}
+	/* Check for the case where we have SerDes media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if ((hw->phy.media_type == e1000_media_type_internal_serdes)
+		&& mac->autoneg) {
+		/* Read the PCS_LSTS and check to see if AutoNeg
+		 * has completed.
+		 */
+		pcs_status_reg = rd32(E1000_PCS_LSTAT);
+
+		if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) {
+			hw_dbg("PCS Auto Neg has not completed.\n");
+			return ret_val;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (PCS_ANADV) and the Auto_Negotiation Base
+		 * Page Ability Register (PCS_LPAB) to determine how
+		 * flow control was negotiated.
+		 */
+		pcs_adv_reg = rd32(E1000_PCS_ANADV);
+		pcs_lp_ability_reg = rd32(E1000_PCS_LPAB);
+
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (PCS_ANADV) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (PCS_LPAB) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | e1000_fc_none
+		 *   0   |    1    |   0   |   DC    | e1000_fc_none
+		 *   0   |    1    |   1   |    0    | e1000_fc_none
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | e1000_fc_none
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *   1   |    1    |   0   |    0    | e1000_fc_none
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | e1000_fc_full
+		 *
+		 */
+		if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+		    (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) {
+			/* Now we need to check if the user selected Rx ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise Rx
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == e1000_fc_full) {
+				hw->fc.current_mode = e1000_fc_full;
+				hw_dbg("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = e1000_fc_rx_pause;
+				hw_dbg("Flow Control = Rx PAUSE frames only.\n");
+			}
+		}
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | e1000_fc_tx_pause
+		 */
+		else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) &&
+			  (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+			  (pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+			  (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_tx_pause;
+			hw_dbg("Flow Control = Tx PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | e1000_fc_rx_pause
+		 */
+		else if ((pcs_adv_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_adv_reg & E1000_TXCW_ASM_DIR) &&
+			 !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) &&
+			 (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) {
+			hw->fc.current_mode = e1000_fc_rx_pause;
+			hw_dbg("Flow Control = Rx PAUSE frames only.\n");
+		} else {
+			/* Per the IEEE spec, at this point flow control
+			 * should be disabled.
+			 */
+			hw->fc.current_mode = e1000_fc_none;
+			hw_dbg("Flow Control = NONE.\n");
+		}
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		pcs_ctrl_reg = rd32(E1000_PCS_LCTL);
+		pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL;
+		wr32(E1000_PCS_LCTL, pcs_ctrl_reg);
+
+		ret_val = igb_force_mac_fc(hw);
+		if (ret_val) {
+			hw_dbg("Error forcing flow control settings\n");
+			return ret_val;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_speed_and_duplex_copper - Retrieve current speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @speed: stores the current speed
+ *  @duplex: stores the current duplex
+ *
+ *  Read the status register for the current speed/duplex and store the current
+ *  speed and duplex for copper connections.
+ **/
+s32 igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
+				      u16 *duplex)
+{
+	u32 status;
+
+	status = rd32(E1000_STATUS);
+	if (status & E1000_STATUS_SPEED_1000) {
+		*speed = SPEED_1000;
+		hw_dbg("1000 Mbs, ");
+	} else if (status & E1000_STATUS_SPEED_100) {
+		*speed = SPEED_100;
+		hw_dbg("100 Mbs, ");
+	} else {
+		*speed = SPEED_10;
+		hw_dbg("10 Mbs, ");
+	}
+
+	if (status & E1000_STATUS_FD) {
+		*duplex = FULL_DUPLEX;
+		hw_dbg("Full Duplex\n");
+	} else {
+		*duplex = HALF_DUPLEX;
+		hw_dbg("Half Duplex\n");
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_get_hw_semaphore - Acquire hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Acquire the HW semaphore to access the PHY or NVM
+ **/
+s32 igb_get_hw_semaphore(struct e1000_hw *hw)
+{
+	u32 swsm;
+	s32 ret_val = 0;
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = rd32(E1000_SWSM);
+		if (!(swsm & E1000_SWSM_SMBI))
+			break;
+
+		udelay(50);
+		i++;
+	}
+
+	if (i == timeout) {
+		hw_dbg("Driver can't access device - SMBI bit is set.\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = rd32(E1000_SWSM);
+		wr32(E1000_SWSM, swsm | E1000_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (rd32(E1000_SWSM) & E1000_SWSM_SWESMBI)
+			break;
+
+		udelay(50);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		igb_put_hw_semaphore(hw);
+		hw_dbg("Driver can't access the NVM\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_put_hw_semaphore - Release hardware semaphore
+ *  @hw: pointer to the HW structure
+ *
+ *  Release hardware semaphore used to access the PHY or NVM
+ **/
+void igb_put_hw_semaphore(struct e1000_hw *hw)
+{
+	u32 swsm;
+
+	swsm = rd32(E1000_SWSM);
+
+	swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+
+	wr32(E1000_SWSM, swsm);
+}
+
+/**
+ *  igb_get_auto_rd_done - Check for auto read completion
+ *  @hw: pointer to the HW structure
+ *
+ *  Check EEPROM for Auto Read done bit.
+ **/
+s32 igb_get_auto_rd_done(struct e1000_hw *hw)
+{
+	s32 i = 0;
+	s32 ret_val = 0;
+
+
+	while (i < AUTO_READ_DONE_TIMEOUT) {
+		if (rd32(E1000_EECD) & E1000_EECD_AUTO_RD)
+			break;
+		usleep_range(1000, 2000);
+		i++;
+	}
+
+	if (i == AUTO_READ_DONE_TIMEOUT) {
+		hw_dbg("Auto read by HW from NVM has not completed.\n");
+		ret_val = -E1000_ERR_RESET;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_valid_led_default - Verify a valid default LED config
+ *  @hw: pointer to the HW structure
+ *  @data: pointer to the NVM (EEPROM)
+ *
+ *  Read the EEPROM for the current default LED configuration.  If the
+ *  LED configuration is not valid, set to a valid LED configuration.
+ **/
+static s32 igb_valid_led_default(struct e1000_hw *hw, u16 *data)
+{
+	s32 ret_val;
+
+	ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) {
+		switch (hw->phy.media_type) {
+		case e1000_media_type_internal_serdes:
+			*data = ID_LED_DEFAULT_82575_SERDES;
+			break;
+		case e1000_media_type_copper:
+		default:
+			*data = ID_LED_DEFAULT;
+			break;
+		}
+	}
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_id_led_init -
+ *  @hw: pointer to the HW structure
+ *
+ **/
+s32 igb_id_led_init(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val;
+	const u32 ledctl_mask = 0x000000FF;
+	const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON;
+	const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF;
+	u16 data, i, temp;
+	const u16 led_mask = 0x0F;
+
+	/* i210 and i211 devices have different LED mechanism */
+	if ((hw->mac.type == e1000_i210) ||
+	    (hw->mac.type == e1000_i211))
+		ret_val = igb_valid_led_default_i210(hw, &data);
+	else
+		ret_val = igb_valid_led_default(hw, &data);
+
+	if (ret_val)
+		goto out;
+
+	mac->ledctl_default = rd32(E1000_LEDCTL);
+	mac->ledctl_mode1 = mac->ledctl_default;
+	mac->ledctl_mode2 = mac->ledctl_default;
+
+	for (i = 0; i < 4; i++) {
+		temp = (data >> (i << 2)) & led_mask;
+		switch (temp) {
+		case ID_LED_ON1_DEF2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_ON1_OFF2:
+			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode1 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_OFF1_DEF2:
+		case ID_LED_OFF1_ON2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode1 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+		switch (temp) {
+		case ID_LED_DEF1_ON2:
+		case ID_LED_ON1_ON2:
+		case ID_LED_OFF1_ON2:
+			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode2 |= ledctl_on << (i << 3);
+			break;
+		case ID_LED_DEF1_OFF2:
+		case ID_LED_ON1_OFF2:
+		case ID_LED_OFF1_OFF2:
+			mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3));
+			mac->ledctl_mode2 |= ledctl_off << (i << 3);
+			break;
+		default:
+			/* Do nothing */
+			break;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_cleanup_led - Set LED config to default operation
+ *  @hw: pointer to the HW structure
+ *
+ *  Remove the current LED configuration and set the LED configuration
+ *  to the default value, saved from the EEPROM.
+ **/
+s32 igb_cleanup_led(struct e1000_hw *hw)
+{
+	wr32(E1000_LEDCTL, hw->mac.ledctl_default);
+	return 0;
+}
+
+/**
+ *  igb_blink_led - Blink LED
+ *  @hw: pointer to the HW structure
+ *
+ *  Blink the led's which are set to be on.
+ **/
+s32 igb_blink_led(struct e1000_hw *hw)
+{
+	u32 ledctl_blink = 0;
+	u32 i;
+
+	if (hw->phy.media_type == e1000_media_type_fiber) {
+		/* always blink LED0 for PCI-E fiber */
+		ledctl_blink = E1000_LEDCTL_LED0_BLINK |
+		     (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT);
+	} else {
+		/* Set the blink bit for each LED that's "on" (0x0E)
+		 * (or "off" if inverted) in ledctl_mode2.  The blink
+		 * logic in hardware only works when mode is set to "on"
+		 * so it must be changed accordingly when the mode is
+		 * "off" and inverted.
+		 */
+		ledctl_blink = hw->mac.ledctl_mode2;
+		for (i = 0; i < 32; i += 8) {
+			u32 mode = (hw->mac.ledctl_mode2 >> i) &
+			    E1000_LEDCTL_LED0_MODE_MASK;
+			u32 led_default = hw->mac.ledctl_default >> i;
+
+			if ((!(led_default & E1000_LEDCTL_LED0_IVRT) &&
+			     (mode == E1000_LEDCTL_MODE_LED_ON)) ||
+			    ((led_default & E1000_LEDCTL_LED0_IVRT) &&
+			     (mode == E1000_LEDCTL_MODE_LED_OFF))) {
+				ledctl_blink &=
+				    ~(E1000_LEDCTL_LED0_MODE_MASK << i);
+				ledctl_blink |= (E1000_LEDCTL_LED0_BLINK |
+						 E1000_LEDCTL_MODE_LED_ON) << i;
+			}
+		}
+	}
+
+	wr32(E1000_LEDCTL, ledctl_blink);
+
+	return 0;
+}
+
+/**
+ *  igb_led_off - Turn LED off
+ *  @hw: pointer to the HW structure
+ *
+ *  Turn LED off.
+ **/
+s32 igb_led_off(struct e1000_hw *hw)
+{
+	switch (hw->phy.media_type) {
+	case e1000_media_type_copper:
+		wr32(E1000_LEDCTL, hw->mac.ledctl_mode1);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_disable_pcie_master - Disables PCI-express master access
+ *  @hw: pointer to the HW structure
+ *
+ *  Returns 0 (0) if successful, else returns -10
+ *  (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ *  the master requests to be disabled.
+ *
+ *  Disables PCI-Express master access and verifies there are no pending
+ *  requests.
+ **/
+s32 igb_disable_pcie_master(struct e1000_hw *hw)
+{
+	u32 ctrl;
+	s32 timeout = MASTER_DISABLE_TIMEOUT;
+	s32 ret_val = 0;
+
+	if (hw->bus.type != e1000_bus_type_pci_express)
+		goto out;
+
+	ctrl = rd32(E1000_CTRL);
+	ctrl |= E1000_CTRL_GIO_MASTER_DISABLE;
+	wr32(E1000_CTRL, ctrl);
+
+	while (timeout) {
+		if (!(rd32(E1000_STATUS) &
+		      E1000_STATUS_GIO_MASTER_ENABLE))
+			break;
+		udelay(100);
+		timeout--;
+	}
+
+	if (!timeout) {
+		hw_dbg("Master requests are pending.\n");
+		ret_val = -E1000_ERR_MASTER_REQUESTS_PENDING;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_validate_mdi_setting - Verify MDI/MDIx settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Verify that when not using auto-negotitation that MDI/MDIx is correctly
+ *  set, which is forced to MDI mode only.
+ **/
+s32 igb_validate_mdi_setting(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+
+	/* All MDI settings are supported on 82580 and newer. */
+	if (hw->mac.type >= e1000_82580)
+		goto out;
+
+	if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) {
+		hw_dbg("Invalid MDI setting detected\n");
+		hw->phy.mdix = 1;
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_8bit_ctrl_reg - Write a 8bit CTRL register
+ *  @hw: pointer to the HW structure
+ *  @reg: 32bit register offset such as E1000_SCTL
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes an address/data control type register.  There are several of these
+ *  and they all have the format address << 8 | data and bit 31 is polled for
+ *  completion.
+ **/
+s32 igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg,
+			      u32 offset, u8 data)
+{
+	u32 i, regvalue = 0;
+	s32 ret_val = 0;
+
+	/* Set up the address and data */
+	regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT);
+	wr32(reg, regvalue);
+
+	/* Poll the ready bit to see if the MDI read completed */
+	for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) {
+		udelay(5);
+		regvalue = rd32(reg);
+		if (regvalue & E1000_GEN_CTL_READY)
+			break;
+	}
+	if (!(regvalue & E1000_GEN_CTL_READY)) {
+		hw_dbg("Reg %08x did not indicate ready\n", reg);
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_enable_mng_pass_thru - Enable processing of ARP's
+ *  @hw: pointer to the HW structure
+ *
+ *  Verifies the hardware needs to leave interface enabled so that frames can
+ *  be directed to and from the management interface.
+ **/
+bool igb_enable_mng_pass_thru(struct e1000_hw *hw)
+{
+	u32 manc;
+	u32 fwsm, factps;
+	bool ret_val = false;
+
+	if (!hw->mac.asf_firmware_present)
+		goto out;
+
+	manc = rd32(E1000_MANC);
+
+	if (!(manc & E1000_MANC_RCV_TCO_EN))
+		goto out;
+
+	if (hw->mac.arc_subsystem_valid) {
+		fwsm = rd32(E1000_FWSM);
+		factps = rd32(E1000_FACTPS);
+
+		if (!(factps & E1000_FACTPS_MNGCG) &&
+		    ((fwsm & E1000_FWSM_MODE_MASK) ==
+		     (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) {
+			ret_val = true;
+			goto out;
+		}
+	} else {
+		if ((manc & E1000_MANC_SMBUS_EN) &&
+		    !(manc & E1000_MANC_ASF_EN)) {
+			ret_val = true;
+			goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h
new file mode 100644
index 0000000000..6e110f28f9
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_MAC_H_
+#define _E1000_MAC_H_
+
+#include "e1000_hw.h"
+
+#include "e1000_phy.h"
+#include "e1000_nvm.h"
+#include "e1000_defines.h"
+#include "e1000_i210.h"
+
+/* Functions that should not be called directly from drivers but can be used
+ * by other files in this 'shared code'
+ */
+s32  igb_blink_led(struct e1000_hw *hw);
+s32  igb_check_for_copper_link(struct e1000_hw *hw);
+s32  igb_cleanup_led(struct e1000_hw *hw);
+s32  igb_config_fc_after_link_up(struct e1000_hw *hw);
+s32  igb_disable_pcie_master(struct e1000_hw *hw);
+s32  igb_force_mac_fc(struct e1000_hw *hw);
+s32  igb_get_auto_rd_done(struct e1000_hw *hw);
+s32  igb_get_bus_info_pcie(struct e1000_hw *hw);
+s32  igb_get_hw_semaphore(struct e1000_hw *hw);
+s32  igb_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
+				     u16 *duplex);
+s32  igb_id_led_init(struct e1000_hw *hw);
+s32  igb_led_off(struct e1000_hw *hw);
+void igb_update_mc_addr_list(struct e1000_hw *hw,
+			     u8 *mc_addr_list, u32 mc_addr_count);
+s32  igb_setup_link(struct e1000_hw *hw);
+s32  igb_validate_mdi_setting(struct e1000_hw *hw);
+s32  igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg,
+			     u32 offset, u8 data);
+
+void igb_clear_hw_cntrs_base(struct e1000_hw *hw);
+void igb_clear_vfta(struct e1000_hw *hw);
+void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value);
+s32  igb_vfta_set(struct e1000_hw *hw, u32 vid, u32 vind,
+		  bool vlan_on, bool vlvf_bypass);
+void igb_config_collision_dist(struct e1000_hw *hw);
+void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count);
+void igb_mta_set(struct e1000_hw *hw, u32 hash_value);
+void igb_put_hw_semaphore(struct e1000_hw *hw);
+void igb_rar_set(struct e1000_hw *hw, u8 *addr, u32 index);
+s32  igb_check_alt_mac_addr(struct e1000_hw *hw);
+
+bool igb_enable_mng_pass_thru(struct e1000_hw *hw);
+
+enum e1000_mng_mode {
+	e1000_mng_mode_none = 0,
+	e1000_mng_mode_asf,
+	e1000_mng_mode_pt,
+	e1000_mng_mode_ipmi,
+	e1000_mng_mode_host_if_only
+};
+
+#define E1000_FACTPS_MNGCG	0x20000000
+
+#define E1000_FWSM_MODE_MASK	0xE
+#define E1000_FWSM_MODE_SHIFT	1
+
+#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN	0x2
+
+void e1000_init_function_pointers_82575(struct e1000_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.c b/drivers/net/ethernet/intel/igb/e1000_mbx.c
new file mode 100644
index 0000000000..29383112bc
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#include "e1000_mbx.h"
+
+/**
+ *  igb_read_mbx - Reads a message from the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to read
+ *  @unlock: skip locking or not
+ *
+ *  returns SUCCESS if it successfully read message from buffer
+ **/
+s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id,
+		 bool unlock)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	/* limit read to size of mailbox */
+	if (size > mbx->size)
+		size = mbx->size;
+
+	if (mbx->ops.read)
+		ret_val = mbx->ops.read(hw, msg, size, mbx_id, unlock);
+
+	return ret_val;
+}
+
+/**
+ *  igb_write_mbx - Write a message to the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer
+ **/
+s32 igb_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = 0;
+
+	if (size > mbx->size)
+		ret_val = -E1000_ERR_MBX;
+
+	else if (mbx->ops.write)
+		ret_val = mbx->ops.write(hw, msg, size, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_for_msg - checks to see if someone sent us mail
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 igb_check_for_msg(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (mbx->ops.check_for_msg)
+		ret_val = mbx->ops.check_for_msg(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_for_ack - checks to see if someone sent us ACK
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 igb_check_for_ack(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (mbx->ops.check_for_ack)
+		ret_val = mbx->ops.check_for_ack(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_for_rst - checks to see if other side has reset
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 igb_check_for_rst(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (mbx->ops.check_for_rst)
+		ret_val = mbx->ops.check_for_rst(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  igb_unlock_mbx - unlock the mailbox
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the mailbox was unlocked or else ERR_MBX
+ **/
+s32 igb_unlock_mbx(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (mbx->ops.unlock)
+		ret_val = mbx->ops.unlock(hw, mbx_id);
+
+	return ret_val;
+}
+
+/**
+ *  igb_poll_for_msg - Wait for message notification
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message notification
+ **/
+static s32 igb_poll_for_msg(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!countdown || !mbx->ops.check_for_msg)
+		goto out;
+
+	while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) {
+		countdown--;
+		if (!countdown)
+			break;
+		udelay(mbx->usec_delay);
+	}
+
+	/* if we failed, all future posted messages fail until reset */
+	if (!countdown)
+		mbx->timeout = 0;
+out:
+	return countdown ? 0 : -E1000_ERR_MBX;
+}
+
+/**
+ *  igb_poll_for_ack - Wait for message acknowledgement
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message acknowledgement
+ **/
+static s32 igb_poll_for_ack(struct e1000_hw *hw, u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!countdown || !mbx->ops.check_for_ack)
+		goto out;
+
+	while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) {
+		countdown--;
+		if (!countdown)
+			break;
+		udelay(mbx->usec_delay);
+	}
+
+	/* if we failed, all future posted messages fail until reset */
+	if (!countdown)
+		mbx->timeout = 0;
+out:
+	return countdown ? 0 : -E1000_ERR_MBX;
+}
+
+/**
+ *  igb_read_posted_mbx - Wait for message notification and receive message
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message notification and
+ *  copied it into the receive buffer.
+ **/
+static s32 igb_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size,
+			       u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (!mbx->ops.read)
+		goto out;
+
+	ret_val = igb_poll_for_msg(hw, mbx_id);
+
+	if (!ret_val)
+		ret_val = mbx->ops.read(hw, msg, size, mbx_id, true);
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_posted_mbx - Write a message to the mailbox, wait for ack
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer and
+ *  received an ack to that message within delay * timeout period
+ **/
+static s32 igb_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size,
+				u16 mbx_id)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	/* exit if either we can't write or there isn't a defined timeout */
+	if (!mbx->ops.write || !mbx->timeout)
+		goto out;
+
+	/* send msg */
+	ret_val = mbx->ops.write(hw, msg, size, mbx_id);
+
+	/* if msg sent wait until we receive an ack */
+	if (!ret_val)
+		ret_val = igb_poll_for_ack(hw, mbx_id);
+out:
+	return ret_val;
+}
+
+static s32 igb_check_for_bit_pf(struct e1000_hw *hw, u32 mask)
+{
+	u32 mbvficr = rd32(E1000_MBVFICR);
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (mbvficr & mask) {
+		ret_val = 0;
+		wr32(E1000_MBVFICR, mask);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_for_msg_pf - checks to see if the VF has sent mail
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 igb_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (!igb_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) {
+		ret_val = 0;
+		hw->mbx.stats.reqs++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_for_ack_pf - checks to see if the VF has ACKed
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 igb_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (!igb_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) {
+		ret_val = 0;
+		hw->mbx.stats.acks++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_for_rst_pf - checks to see if the VF has reset
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 igb_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	u32 vflre = rd32(E1000_VFLRE);
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (vflre & BIT(vf_number)) {
+		ret_val = 0;
+		wr32(E1000_VFLRE, BIT(vf_number));
+		hw->mbx.stats.rsts++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_obtain_mbx_lock_pf - obtain mailbox lock
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  return SUCCESS if we obtained the mailbox lock
+ **/
+static s32 igb_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+	u32 p2v_mailbox;
+	int count = 10;
+
+	do {
+		/* Take ownership of the buffer */
+		wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU);
+
+		/* reserve mailbox for vf use */
+		p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number));
+		if (p2v_mailbox & E1000_P2VMAILBOX_PFU) {
+			ret_val = 0;
+			break;
+		}
+		udelay(1000);
+	} while (count-- > 0);
+
+	return ret_val;
+}
+
+/**
+ *  igb_release_mbx_lock_pf - release mailbox lock
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  return SUCCESS if we released the mailbox lock
+ **/
+static s32 igb_release_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number)
+{
+	u32 p2v_mailbox;
+
+	/* drop PF lock of mailbox, if set */
+	p2v_mailbox = rd32(E1000_P2VMAILBOX(vf_number));
+	if (p2v_mailbox & E1000_P2VMAILBOX_PFU)
+		wr32(E1000_P2VMAILBOX(vf_number),
+		     p2v_mailbox & ~E1000_P2VMAILBOX_PFU);
+
+	return 0;
+}
+
+/**
+ *  igb_write_mbx_pf - Places a message in the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer
+ **/
+static s32 igb_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
+			    u16 vf_number)
+{
+	s32 ret_val;
+	u16 i;
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	ret_val = igb_obtain_mbx_lock_pf(hw, vf_number);
+	if (ret_val)
+		goto out_no_write;
+
+	/* flush msg and acks as we are overwriting the message buffer */
+	igb_check_for_msg_pf(hw, vf_number);
+	igb_check_for_ack_pf(hw, vf_number);
+
+	/* copy the caller specified message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		array_wr32(E1000_VMBMEM(vf_number), i, msg[i]);
+
+	/* Interrupt VF to tell it a message has been sent and release buffer*/
+	wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS);
+
+	/* update stats */
+	hw->mbx.stats.msgs_tx++;
+
+out_no_write:
+	return ret_val;
+
+}
+
+/**
+ *  igb_read_mbx_pf - Read a message from the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @vf_number: the VF index
+ *  @unlock: unlock the mailbox when done?
+ *
+ *  This function copies a message from the mailbox buffer to the caller's
+ *  memory buffer.  The presumption is that the caller knows that there was
+ *  a message due to a VF request so no polling for message is needed.
+ **/
+static s32 igb_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size,
+			   u16 vf_number, bool unlock)
+{
+	s32 ret_val;
+	u16 i;
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	ret_val = igb_obtain_mbx_lock_pf(hw, vf_number);
+	if (ret_val)
+		goto out_no_read;
+
+	/* copy the message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		msg[i] = array_rd32(E1000_VMBMEM(vf_number), i);
+
+	/* Acknowledge the message and release mailbox lock (or not) */
+	if (unlock)
+		wr32(E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK);
+	else
+		wr32(E1000_P2VMAILBOX(vf_number),
+		     E1000_P2VMAILBOX_ACK | E1000_P2VMAILBOX_PFU);
+
+	/* update stats */
+	hw->mbx.stats.msgs_rx++;
+
+out_no_read:
+	return ret_val;
+}
+
+/**
+ *  igb_init_mbx_params_pf - set initial values for pf mailbox
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes the hw->mbx struct to correct values for pf mailbox
+ */
+s32 igb_init_mbx_params_pf(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+
+	mbx->timeout = 0;
+	mbx->usec_delay = 0;
+
+	mbx->size = E1000_VFMAILBOX_SIZE;
+
+	mbx->ops.read = igb_read_mbx_pf;
+	mbx->ops.write = igb_write_mbx_pf;
+	mbx->ops.read_posted = igb_read_posted_mbx;
+	mbx->ops.write_posted = igb_write_posted_mbx;
+	mbx->ops.check_for_msg = igb_check_for_msg_pf;
+	mbx->ops.check_for_ack = igb_check_for_ack_pf;
+	mbx->ops.check_for_rst = igb_check_for_rst_pf;
+	mbx->ops.unlock = igb_release_mbx_lock_pf;
+
+	mbx->stats.msgs_tx = 0;
+	mbx->stats.msgs_rx = 0;
+	mbx->stats.reqs = 0;
+	mbx->stats.acks = 0;
+	mbx->stats.rsts = 0;
+
+	return 0;
+}
+
diff --git a/drivers/net/ethernet/intel/igb/e1000_mbx.h b/drivers/net/ethernet/intel/igb/e1000_mbx.h
new file mode 100644
index 0000000000..178e60ec71
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_mbx.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_MBX_H_
+#define _E1000_MBX_H_
+
+#include "e1000_hw.h"
+
+#define E1000_P2VMAILBOX_STS	0x00000001 /* Initiate message send to VF */
+#define E1000_P2VMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
+#define E1000_P2VMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
+#define E1000_P2VMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
+#define E1000_P2VMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
+
+#define E1000_MBVFICR_VFREQ_MASK	0x000000FF /* bits for VF messages */
+#define E1000_MBVFICR_VFREQ_VF1		0x00000001 /* bit for VF 1 message */
+#define E1000_MBVFICR_VFACK_MASK	0x00FF0000 /* bits for VF acks */
+#define E1000_MBVFICR_VFACK_VF1		0x00010000 /* bit for VF 1 ack */
+
+#define E1000_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
+
+/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the
+ * PF.  The reverse is true if it is E1000_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+/* Messages below or'd with this are the ACK */
+#define E1000_VT_MSGTYPE_ACK	0x80000000
+/* Messages below or'd with this are the NACK */
+#define E1000_VT_MSGTYPE_NACK	0x40000000
+/* Indicates that VF is still clear to send requests */
+#define E1000_VT_MSGTYPE_CTS	0x20000000
+#define E1000_VT_MSGINFO_SHIFT	16
+/* bits 23:16 are used for exra info for certain messages */
+#define E1000_VT_MSGINFO_MASK	(0xFF << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_VF_RESET		0x01 /* VF requests reset */
+#define E1000_VF_SET_MAC_ADDR	0x02 /* VF requests to set MAC addr */
+/* VF requests to clear all unicast MAC filters */
+#define E1000_VF_MAC_FILTER_CLR	(0x01 << E1000_VT_MSGINFO_SHIFT)
+/* VF requests to add unicast MAC filter */
+#define E1000_VF_MAC_FILTER_ADD	(0x02 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_MULTICAST	0x03 /* VF requests to set MC addr */
+#define E1000_VF_SET_VLAN	0x04 /* VF requests to set VLAN */
+#define E1000_VF_SET_LPE	0x05 /* VF requests to set VMOLR.LPE */
+#define E1000_VF_SET_PROMISC	0x06 /*VF requests to clear VMOLR.ROPE/MPME*/
+#define E1000_VF_SET_PROMISC_MULTICAST	(0x02 << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_PF_CONTROL_MSG	0x0100 /* PF control message */
+
+s32 igb_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id,
+		 bool unlock);
+s32 igb_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id);
+s32 igb_check_for_msg(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_check_for_ack(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_check_for_rst(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_unlock_mbx(struct e1000_hw *hw, u16 mbx_id);
+s32 igb_init_mbx_params_pf(struct e1000_hw *hw);
+
+#endif /* _E1000_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c
new file mode 100644
index 0000000000..fa136e6e93
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#include <linux/if_ether.h>
+#include <linux/delay.h>
+
+#include "e1000_mac.h"
+#include "e1000_nvm.h"
+
+/**
+ *  igb_raise_eec_clk - Raise EEPROM clock
+ *  @hw: pointer to the HW structure
+ *  @eecd: pointer to the EEPROM
+ *
+ *  Enable/Raise the EEPROM clock bit.
+ **/
+static void igb_raise_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	*eecd = *eecd | E1000_EECD_SK;
+	wr32(E1000_EECD, *eecd);
+	wrfl();
+	udelay(hw->nvm.delay_usec);
+}
+
+/**
+ *  igb_lower_eec_clk - Lower EEPROM clock
+ *  @hw: pointer to the HW structure
+ *  @eecd: pointer to the EEPROM
+ *
+ *  Clear/Lower the EEPROM clock bit.
+ **/
+static void igb_lower_eec_clk(struct e1000_hw *hw, u32 *eecd)
+{
+	*eecd = *eecd & ~E1000_EECD_SK;
+	wr32(E1000_EECD, *eecd);
+	wrfl();
+	udelay(hw->nvm.delay_usec);
+}
+
+/**
+ *  igb_shift_out_eec_bits - Shift data bits our to the EEPROM
+ *  @hw: pointer to the HW structure
+ *  @data: data to send to the EEPROM
+ *  @count: number of bits to shift out
+ *
+ *  We need to shift 'count' bits out to the EEPROM.  So, the value in the
+ *  "data" parameter will be shifted out to the EEPROM one bit at a time.
+ *  In order to do this, "data" must be broken down into bits.
+ **/
+static void igb_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = rd32(E1000_EECD);
+	u32 mask;
+
+	mask = 1u << (count - 1);
+	if (nvm->type == e1000_nvm_eeprom_spi)
+		eecd |= E1000_EECD_DO;
+
+	do {
+		eecd &= ~E1000_EECD_DI;
+
+		if (data & mask)
+			eecd |= E1000_EECD_DI;
+
+		wr32(E1000_EECD, eecd);
+		wrfl();
+
+		udelay(nvm->delay_usec);
+
+		igb_raise_eec_clk(hw, &eecd);
+		igb_lower_eec_clk(hw, &eecd);
+
+		mask >>= 1;
+	} while (mask);
+
+	eecd &= ~E1000_EECD_DI;
+	wr32(E1000_EECD, eecd);
+}
+
+/**
+ *  igb_shift_in_eec_bits - Shift data bits in from the EEPROM
+ *  @hw: pointer to the HW structure
+ *  @count: number of bits to shift in
+ *
+ *  In order to read a register from the EEPROM, we need to shift 'count' bits
+ *  in from the EEPROM.  Bits are "shifted in" by raising the clock input to
+ *  the EEPROM (setting the SK bit), and then reading the value of the data out
+ *  "DO" bit.  During this "shifting in" process the data in "DI" bit should
+ *  always be clear.
+ **/
+static u16 igb_shift_in_eec_bits(struct e1000_hw *hw, u16 count)
+{
+	u32 eecd;
+	u32 i;
+	u16 data;
+
+	eecd = rd32(E1000_EECD);
+
+	eecd &= ~(E1000_EECD_DO | E1000_EECD_DI);
+	data = 0;
+
+	for (i = 0; i < count; i++) {
+		data <<= 1;
+		igb_raise_eec_clk(hw, &eecd);
+
+		eecd = rd32(E1000_EECD);
+
+		eecd &= ~E1000_EECD_DI;
+		if (eecd & E1000_EECD_DO)
+			data |= 1;
+
+		igb_lower_eec_clk(hw, &eecd);
+	}
+
+	return data;
+}
+
+/**
+ *  igb_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ *  @hw: pointer to the HW structure
+ *  @ee_reg: EEPROM flag for polling
+ *
+ *  Polls the EEPROM status bit for either read or write completion based
+ *  upon the value of 'ee_reg'.
+ **/
+static s32 igb_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg)
+{
+	u32 attempts = 100000;
+	u32 i, reg = 0;
+	s32 ret_val = -E1000_ERR_NVM;
+
+	for (i = 0; i < attempts; i++) {
+		if (ee_reg == E1000_NVM_POLL_READ)
+			reg = rd32(E1000_EERD);
+		else
+			reg = rd32(E1000_EEWR);
+
+		if (reg & E1000_NVM_RW_REG_DONE) {
+			ret_val = 0;
+			break;
+		}
+
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_acquire_nvm - Generic request for access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ *  Return successful if access grant bit set, else clear the request for
+ *  EEPROM access and return -E1000_ERR_NVM (-1).
+ **/
+s32 igb_acquire_nvm(struct e1000_hw *hw)
+{
+	u32 eecd = rd32(E1000_EECD);
+	s32 timeout = E1000_NVM_GRANT_ATTEMPTS;
+	s32 ret_val = 0;
+
+
+	wr32(E1000_EECD, eecd | E1000_EECD_REQ);
+	eecd = rd32(E1000_EECD);
+
+	while (timeout) {
+		if (eecd & E1000_EECD_GNT)
+			break;
+		udelay(5);
+		eecd = rd32(E1000_EECD);
+		timeout--;
+	}
+
+	if (!timeout) {
+		eecd &= ~E1000_EECD_REQ;
+		wr32(E1000_EECD, eecd);
+		hw_dbg("Could not acquire NVM grant\n");
+		ret_val = -E1000_ERR_NVM;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_standby_nvm - Return EEPROM to standby state
+ *  @hw: pointer to the HW structure
+ *
+ *  Return the EEPROM to a standby state.
+ **/
+static void igb_standby_nvm(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = rd32(E1000_EECD);
+
+	if (nvm->type == e1000_nvm_eeprom_spi) {
+		/* Toggle CS to flush commands */
+		eecd |= E1000_EECD_CS;
+		wr32(E1000_EECD, eecd);
+		wrfl();
+		udelay(nvm->delay_usec);
+		eecd &= ~E1000_EECD_CS;
+		wr32(E1000_EECD, eecd);
+		wrfl();
+		udelay(nvm->delay_usec);
+	}
+}
+
+/**
+ *  e1000_stop_nvm - Terminate EEPROM command
+ *  @hw: pointer to the HW structure
+ *
+ *  Terminates the current command by inverting the EEPROM's chip select pin.
+ **/
+static void e1000_stop_nvm(struct e1000_hw *hw)
+{
+	u32 eecd;
+
+	eecd = rd32(E1000_EECD);
+	if (hw->nvm.type == e1000_nvm_eeprom_spi) {
+		/* Pull CS high */
+		eecd |= E1000_EECD_CS;
+		igb_lower_eec_clk(hw, &eecd);
+	}
+}
+
+/**
+ *  igb_release_nvm - Release exclusive access to EEPROM
+ *  @hw: pointer to the HW structure
+ *
+ *  Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ **/
+void igb_release_nvm(struct e1000_hw *hw)
+{
+	u32 eecd;
+
+	e1000_stop_nvm(hw);
+
+	eecd = rd32(E1000_EECD);
+	eecd &= ~E1000_EECD_REQ;
+	wr32(E1000_EECD, eecd);
+}
+
+/**
+ *  igb_ready_nvm_eeprom - Prepares EEPROM for read/write
+ *  @hw: pointer to the HW structure
+ *
+ *  Setups the EEPROM for reading and writing.
+ **/
+static s32 igb_ready_nvm_eeprom(struct e1000_hw *hw)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 eecd = rd32(E1000_EECD);
+	s32 ret_val = 0;
+	u16 timeout = 0;
+	u8 spi_stat_reg;
+
+
+	if (nvm->type == e1000_nvm_eeprom_spi) {
+		/* Clear SK and CS */
+		eecd &= ~(E1000_EECD_CS | E1000_EECD_SK);
+		wr32(E1000_EECD, eecd);
+		wrfl();
+		udelay(1);
+		timeout = NVM_MAX_RETRY_SPI;
+
+		/* Read "Status Register" repeatedly until the LSB is cleared.
+		 * The EEPROM will signal that the command has been completed
+		 * by clearing bit 0 of the internal status register.  If it's
+		 * not cleared within 'timeout', then error out.
+		 */
+		while (timeout) {
+			igb_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI,
+					       hw->nvm.opcode_bits);
+			spi_stat_reg = (u8)igb_shift_in_eec_bits(hw, 8);
+			if (!(spi_stat_reg & NVM_STATUS_RDY_SPI))
+				break;
+
+			udelay(5);
+			igb_standby_nvm(hw);
+			timeout--;
+		}
+
+		if (!timeout) {
+			hw_dbg("SPI NVM Status error\n");
+			ret_val = -E1000_ERR_NVM;
+			goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_read_nvm_spi - Read EEPROM's using SPI
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of word in the EEPROM to read
+ *  @words: number of words to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM.
+ **/
+s32 igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i = 0;
+	s32 ret_val;
+	u16 word_in;
+	u8 read_opcode = NVM_READ_OPCODE_SPI;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+	ret_val = nvm->ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_ready_nvm_eeprom(hw);
+	if (ret_val)
+		goto release;
+
+	igb_standby_nvm(hw);
+
+	if ((nvm->address_bits == 8) && (offset >= 128))
+		read_opcode |= NVM_A8_OPCODE_SPI;
+
+	/* Send the READ command (opcode + addr) */
+	igb_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits);
+	igb_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits);
+
+	/* Read the data.  SPI NVMs increment the address with each byte
+	 * read and will roll over if reading beyond the end.  This allows
+	 * us to read the whole NVM from any offset
+	 */
+	for (i = 0; i < words; i++) {
+		word_in = igb_shift_in_eec_bits(hw, 16);
+		data[i] = (word_in >> 8) | (word_in << 8);
+	}
+
+release:
+	nvm->ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_read_nvm_eerd - Reads EEPROM using EERD register
+ *  @hw: pointer to the HW structure
+ *  @offset: offset of word in the EEPROM to read
+ *  @words: number of words to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	u32 i, eerd = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) +
+			E1000_NVM_RW_REG_START;
+
+		wr32(E1000_EERD, eerd);
+		ret_val = igb_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ);
+		if (ret_val)
+			break;
+
+		data[i] = (rd32(E1000_EERD) >>
+			E1000_NVM_RW_REG_DATA);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_nvm_spi - Write to EEPROM using SPI
+ *  @hw: pointer to the HW structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @words: number of words to write
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *
+ *  Writes data to EEPROM at offset using SPI interface.
+ *
+ *  If e1000_update_nvm_checksum is not called after this function , the
+ *  EEPROM will most likley contain an invalid checksum.
+ **/
+s32 igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct e1000_nvm_info *nvm = &hw->nvm;
+	s32 ret_val = -E1000_ERR_NVM;
+	u16 widx = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) ||
+	    (words == 0)) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		return ret_val;
+	}
+
+	while (widx < words) {
+		u8 write_opcode = NVM_WRITE_OPCODE_SPI;
+
+		ret_val = nvm->ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = igb_ready_nvm_eeprom(hw);
+		if (ret_val) {
+			nvm->ops.release(hw);
+			return ret_val;
+		}
+
+		igb_standby_nvm(hw);
+
+		/* Send the WRITE ENABLE command (8 bit opcode) */
+		igb_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI,
+					 nvm->opcode_bits);
+
+		igb_standby_nvm(hw);
+
+		/* Some SPI eeproms use the 8th address bit embedded in the
+		 * opcode
+		 */
+		if ((nvm->address_bits == 8) && (offset >= 128))
+			write_opcode |= NVM_A8_OPCODE_SPI;
+
+		/* Send the Write command (8-bit opcode + addr) */
+		igb_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits);
+		igb_shift_out_eec_bits(hw, (u16)((offset + widx) * 2),
+					 nvm->address_bits);
+
+		/* Loop to allow for up to whole page write of eeprom */
+		while (widx < words) {
+			u16 word_out = data[widx];
+
+			word_out = (word_out >> 8) | (word_out << 8);
+			igb_shift_out_eec_bits(hw, word_out, 16);
+			widx++;
+
+			if ((((offset + widx) * 2) % nvm->page_size) == 0) {
+				igb_standby_nvm(hw);
+				break;
+			}
+		}
+		usleep_range(1000, 2000);
+		nvm->ops.release(hw);
+	}
+
+	return ret_val;
+}
+
+/**
+ *  igb_read_part_string - Read device part number
+ *  @hw: pointer to the HW structure
+ *  @part_num: pointer to device part number
+ *  @part_num_size: size of part number buffer
+ *
+ *  Reads the product board assembly (PBA) number from the EEPROM and stores
+ *  the value in part_num.
+ **/
+s32 igb_read_part_string(struct e1000_hw *hw, u8 *part_num, u32 part_num_size)
+{
+	s32 ret_val;
+	u16 nvm_data;
+	u16 pointer;
+	u16 offset;
+	u16 length;
+
+	if (part_num == NULL) {
+		hw_dbg("PBA string buffer was null\n");
+		ret_val = E1000_ERR_INVALID_ARGUMENT;
+		goto out;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pointer);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	/* if nvm_data is not ptr guard the PBA must be in legacy format which
+	 * means pointer is actually our second data word for the PBA number
+	 * and we can decode it into an ascii string
+	 */
+	if (nvm_data != NVM_PBA_PTR_GUARD) {
+		hw_dbg("NVM PBA number is not stored as string\n");
+
+		/* we will need 11 characters to store the PBA */
+		if (part_num_size < 11) {
+			hw_dbg("PBA string buffer too small\n");
+			return E1000_ERR_NO_SPACE;
+		}
+
+		/* extract hex string from data and pointer */
+		part_num[0] = (nvm_data >> 12) & 0xF;
+		part_num[1] = (nvm_data >> 8) & 0xF;
+		part_num[2] = (nvm_data >> 4) & 0xF;
+		part_num[3] = nvm_data & 0xF;
+		part_num[4] = (pointer >> 12) & 0xF;
+		part_num[5] = (pointer >> 8) & 0xF;
+		part_num[6] = '-';
+		part_num[7] = 0;
+		part_num[8] = (pointer >> 4) & 0xF;
+		part_num[9] = pointer & 0xF;
+
+		/* put a null character on the end of our string */
+		part_num[10] = '\0';
+
+		/* switch all the data but the '-' to hex char */
+		for (offset = 0; offset < 10; offset++) {
+			if (part_num[offset] < 0xA)
+				part_num[offset] += '0';
+			else if (part_num[offset] < 0x10)
+				part_num[offset] += 'A' - 0xA;
+		}
+
+		goto out;
+	}
+
+	ret_val = hw->nvm.ops.read(hw, pointer, 1, &length);
+	if (ret_val) {
+		hw_dbg("NVM Read Error\n");
+		goto out;
+	}
+
+	if (length == 0xFFFF || length == 0) {
+		hw_dbg("NVM PBA number section invalid length\n");
+		ret_val = E1000_ERR_NVM_PBA_SECTION;
+		goto out;
+	}
+	/* check if part_num buffer is big enough */
+	if (part_num_size < (((u32)length * 2) - 1)) {
+		hw_dbg("PBA string buffer too small\n");
+		ret_val = E1000_ERR_NO_SPACE;
+		goto out;
+	}
+
+	/* trim pba length from start of string */
+	pointer++;
+	length--;
+
+	for (offset = 0; offset < length; offset++) {
+		ret_val = hw->nvm.ops.read(hw, pointer + offset, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+		part_num[offset * 2] = (u8)(nvm_data >> 8);
+		part_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF);
+	}
+	part_num[offset * 2] = '\0';
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_read_mac_addr - Read device MAC address
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the device MAC address from the EEPROM and stores the value.
+ *  Since devices with two ports use the same EEPROM, we increment the
+ *  last bit in the MAC address for the second port.
+ **/
+s32 igb_read_mac_addr(struct e1000_hw *hw)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = rd32(E1000_RAH(0));
+	rar_low = rd32(E1000_RAL(0));
+
+	for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8));
+
+	for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8));
+
+	for (i = 0; i < ETH_ALEN; i++)
+		hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+	return 0;
+}
+
+/**
+ *  igb_validate_nvm_checksum - Validate EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ **/
+s32 igb_validate_nvm_checksum(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16) NVM_SUM) {
+		hw_dbg("NVM Checksum Invalid\n");
+		ret_val = -E1000_ERR_NVM;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_update_nvm_checksum - Update EEPROM checksum
+ *  @hw: pointer to the HW structure
+ *
+ *  Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ *  up to the checksum.  Then calculates the EEPROM checksum and writes the
+ *  value to the EEPROM.
+ **/
+s32 igb_update_nvm_checksum(struct e1000_hw *hw)
+{
+	s32  ret_val;
+	u16 checksum = 0;
+	u16 i, nvm_data;
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16) NVM_SUM - checksum;
+	ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
+	if (ret_val)
+		hw_dbg("NVM Write Error while updating checksum.\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_fw_version - Get firmware version information
+ *  @hw: pointer to the HW structure
+ *  @fw_vers: pointer to output structure
+ *
+ *  unsupported MAC types will return all 0 version structure
+ **/
+void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers)
+{
+	u16 eeprom_verh, eeprom_verl, etrack_test, fw_version;
+	u8 q, hval, rem, result;
+	u16 comb_verh, comb_verl, comb_offset;
+
+	memset(fw_vers, 0, sizeof(struct e1000_fw_version));
+
+	/* basic eeprom version numbers and bits used vary by part and by tool
+	 * used to create the nvm images. Check which data format we have.
+	 */
+	hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test);
+	switch (hw->mac.type) {
+	case e1000_i211:
+		igb_read_invm_version(hw, fw_vers);
+		return;
+	case e1000_82575:
+	case e1000_82576:
+	case e1000_82580:
+		/* Use this format, unless EETRACK ID exists,
+		 * then use alternate format
+		 */
+		if ((etrack_test &  NVM_MAJOR_MASK) != NVM_ETRACK_VALID) {
+			hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+			fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+					      >> NVM_MAJOR_SHIFT;
+			fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK)
+					      >> NVM_MINOR_SHIFT;
+			fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK);
+			goto etrack_id;
+		}
+		break;
+	case e1000_i210:
+		if (!(igb_get_flash_presence_i210(hw))) {
+			igb_read_invm_version(hw, fw_vers);
+			return;
+		}
+		fallthrough;
+	case e1000_i350:
+		/* find combo image version */
+		hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset);
+		if ((comb_offset != 0x0) &&
+		    (comb_offset != NVM_VER_INVALID)) {
+
+			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset
+					 + 1), 1, &comb_verh);
+			hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset),
+					 1, &comb_verl);
+
+			/* get Option Rom version if it exists and is valid */
+			if ((comb_verh && comb_verl) &&
+			    ((comb_verh != NVM_VER_INVALID) &&
+			     (comb_verl != NVM_VER_INVALID))) {
+
+				fw_vers->or_valid = true;
+				fw_vers->or_major =
+					comb_verl >> NVM_COMB_VER_SHFT;
+				fw_vers->or_build =
+					(comb_verl << NVM_COMB_VER_SHFT)
+					| (comb_verh >> NVM_COMB_VER_SHFT);
+				fw_vers->or_patch =
+					comb_verh & NVM_COMB_VER_MASK;
+			}
+		}
+		break;
+	default:
+		return;
+	}
+	hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version);
+	fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK)
+			      >> NVM_MAJOR_SHIFT;
+
+	/* check for old style version format in newer images*/
+	if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) {
+		eeprom_verl = (fw_version & NVM_COMB_VER_MASK);
+	} else {
+		eeprom_verl = (fw_version & NVM_MINOR_MASK)
+				>> NVM_MINOR_SHIFT;
+	}
+	/* Convert minor value to hex before assigning to output struct
+	 * Val to be converted will not be higher than 99, per tool output
+	 */
+	q = eeprom_verl / NVM_HEX_CONV;
+	hval = q * NVM_HEX_TENS;
+	rem = eeprom_verl % NVM_HEX_CONV;
+	result = hval + rem;
+	fw_vers->eep_minor = result;
+
+etrack_id:
+	if ((etrack_test &  NVM_MAJOR_MASK) == NVM_ETRACK_VALID) {
+		hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl);
+		hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh);
+		fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT)
+			| eeprom_verl;
+	}
+}
diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.h b/drivers/net/ethernet/intel/igb/e1000_nvm.h
new file mode 100644
index 0000000000..091cddf4ad
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_nvm.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_NVM_H_
+#define _E1000_NVM_H_
+
+s32  igb_acquire_nvm(struct e1000_hw *hw);
+void igb_release_nvm(struct e1000_hw *hw);
+s32  igb_read_mac_addr(struct e1000_hw *hw);
+s32  igb_read_part_num(struct e1000_hw *hw, u32 *part_num);
+s32  igb_read_part_string(struct e1000_hw *hw, u8 *part_num,
+			  u32 part_num_size);
+s32  igb_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32  igb_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32  igb_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data);
+s32  igb_validate_nvm_checksum(struct e1000_hw *hw);
+s32  igb_update_nvm_checksum(struct e1000_hw *hw);
+
+struct e1000_fw_version {
+	u32 etrack_id;
+	u16 eep_major;
+	u16 eep_minor;
+	u16 eep_build;
+
+	u8 invm_major;
+	u8 invm_minor;
+	u8 invm_img_type;
+
+	bool or_valid;
+	u16 or_major;
+	u16 or_build;
+	u16 or_patch;
+};
+void igb_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c
new file mode 100644
index 0000000000..a018000f7d
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.c
@@ -0,0 +1,2631 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#include <linux/if_ether.h>
+#include <linux/delay.h>
+
+#include "e1000_mac.h"
+#include "e1000_phy.h"
+
+static s32  igb_phy_setup_autoneg(struct e1000_hw *hw);
+static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw,
+					     u16 *phy_ctrl);
+static s32  igb_wait_autoneg(struct e1000_hw *hw);
+static s32  igb_set_master_slave_mode(struct e1000_hw *hw);
+
+/* Cable length tables */
+static const u16 e1000_m88_cable_length_table[] = {
+	0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED };
+
+static const u16 e1000_igp_2_cable_length_table[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21,
+	0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41,
+	6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61,
+	21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82,
+	40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104,
+	60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121,
+	83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124,
+	104, 109, 114, 118, 121, 124};
+
+/**
+ *  igb_check_reset_block - Check if PHY reset is blocked
+ *  @hw: pointer to the HW structure
+ *
+ *  Read the PHY management control register and check whether a PHY reset
+ *  is blocked.  If a reset is not blocked return 0, otherwise
+ *  return E1000_BLK_PHY_RESET (12).
+ **/
+s32 igb_check_reset_block(struct e1000_hw *hw)
+{
+	u32 manc;
+
+	manc = rd32(E1000_MANC);
+
+	return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? E1000_BLK_PHY_RESET : 0;
+}
+
+/**
+ *  igb_get_phy_id - Retrieve the PHY ID and revision
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the PHY registers and stores the PHY ID and possibly the PHY
+ *  revision in the hardware structure.
+ **/
+s32 igb_get_phy_id(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_id;
+
+	/* ensure PHY page selection to fix misconfigured i210 */
+	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
+		phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0);
+
+	ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id = (u32)(phy_id << 16);
+	udelay(20);
+	ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_reset_dsp - Reset PHY DSP
+ *  @hw: pointer to the HW structure
+ *
+ *  Reset the digital signal processor.
+ **/
+static s32 igb_phy_reset_dsp(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+
+	if (!(hw->phy.ops.write_reg))
+		goto out;
+
+	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1);
+	if (ret_val)
+		goto out;
+
+	ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_read_phy_reg_mdic - Read MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the MDI control register in the PHY at offset and stores the
+ *  information read to data.
+ **/
+s32 igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -E1000_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+		(E1000_MDIC_OP_READ));
+
+	wr32(E1000_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+		udelay(50);
+		mdic = rd32(E1000_MDIC);
+		if (mdic & E1000_MDIC_READY)
+			break;
+	}
+	if (!(mdic & E1000_MDIC_READY)) {
+		hw_dbg("MDI Read did not complete\n");
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+	if (mdic & E1000_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+	*data = (u16) mdic;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_phy_reg_mdic - Write MDI control register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write to register at offset
+ *
+ *  Writes data to MDI control register in the PHY at offset.
+ **/
+s32 igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -E1000_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = (((u32)data) |
+		(offset << E1000_MDIC_REG_SHIFT) |
+		(phy->addr << E1000_MDIC_PHY_SHIFT) |
+		(E1000_MDIC_OP_WRITE));
+
+	wr32(E1000_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+		udelay(50);
+		mdic = rd32(E1000_MDIC);
+		if (mdic & E1000_MDIC_READY)
+			break;
+	}
+	if (!(mdic & E1000_MDIC_READY)) {
+		hw_dbg("MDI Write did not complete\n");
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+	if (mdic & E1000_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_read_phy_reg_i2c - Read PHY register using i2c
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Reads the PHY register at offset using the i2c interface and stores the
+ *  retrieved information in data.
+ **/
+s32 igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, i2ccmd = 0;
+
+	/* Set up Op-code, Phy Address, and register address in the I2CCMD
+	 * register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
+		  (E1000_I2CCMD_OPCODE_READ));
+
+	wr32(E1000_I2CCMD, i2ccmd);
+
+	/* Poll the ready bit to see if the I2C read completed */
+	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+		udelay(50);
+		i2ccmd = rd32(E1000_I2CCMD);
+		if (i2ccmd & E1000_I2CCMD_READY)
+			break;
+	}
+	if (!(i2ccmd & E1000_I2CCMD_READY)) {
+		hw_dbg("I2CCMD Read did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (i2ccmd & E1000_I2CCMD_ERROR) {
+		hw_dbg("I2CCMD Error bit set\n");
+		return -E1000_ERR_PHY;
+	}
+
+	/* Need to byte-swap the 16-bit value. */
+	*data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00);
+
+	return 0;
+}
+
+/**
+ *  igb_write_phy_reg_i2c - Write PHY register using i2c
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Writes the data to PHY register at the offset using the i2c interface.
+ **/
+s32 igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	u32 i, i2ccmd = 0;
+	u16 phy_data_swapped;
+
+	/* Prevent overwriting SFP I2C EEPROM which is at A0 address.*/
+	if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) {
+		hw_dbg("PHY I2C Address %d is out of range.\n",
+			  hw->phy.addr);
+		return -E1000_ERR_CONFIG;
+	}
+
+	/* Swap the data bytes for the I2C interface */
+	phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00);
+
+	/* Set up Op-code, Phy Address, and register address in the I2CCMD
+	 * register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+		  (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) |
+		  E1000_I2CCMD_OPCODE_WRITE |
+		  phy_data_swapped);
+
+	wr32(E1000_I2CCMD, i2ccmd);
+
+	/* Poll the ready bit to see if the I2C read completed */
+	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+		udelay(50);
+		i2ccmd = rd32(E1000_I2CCMD);
+		if (i2ccmd & E1000_I2CCMD_READY)
+			break;
+	}
+	if (!(i2ccmd & E1000_I2CCMD_READY)) {
+		hw_dbg("I2CCMD Write did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (i2ccmd & E1000_I2CCMD_ERROR) {
+		hw_dbg("I2CCMD Error bit set\n");
+		return -E1000_ERR_PHY;
+	}
+
+	return 0;
+}
+
+/**
+ *  igb_read_sfp_data_byte - Reads SFP module data.
+ *  @hw: pointer to the HW structure
+ *  @offset: byte location offset to be read
+ *  @data: read data buffer pointer
+ *
+ *  Reads one byte from SFP module data stored
+ *  in SFP resided EEPROM memory or SFP diagnostic area.
+ *  Function should be called with
+ *  E1000_I2CCMD_SFP_DATA_ADDR(<byte offset>) for SFP module database access
+ *  E1000_I2CCMD_SFP_DIAG_ADDR(<byte offset>) for SFP diagnostics parameters
+ *  access
+ **/
+s32 igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data)
+{
+	u32 i = 0;
+	u32 i2ccmd = 0;
+	u32 data_local = 0;
+
+	if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) {
+		hw_dbg("I2CCMD command address exceeds upper limit\n");
+		return -E1000_ERR_PHY;
+	}
+
+	/* Set up Op-code, EEPROM Address,in the I2CCMD
+	 * register. The MAC will take care of interfacing with the
+	 * EEPROM to retrieve the desired data.
+	 */
+	i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) |
+		  E1000_I2CCMD_OPCODE_READ);
+
+	wr32(E1000_I2CCMD, i2ccmd);
+
+	/* Poll the ready bit to see if the I2C read completed */
+	for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) {
+		udelay(50);
+		data_local = rd32(E1000_I2CCMD);
+		if (data_local & E1000_I2CCMD_READY)
+			break;
+	}
+	if (!(data_local & E1000_I2CCMD_READY)) {
+		hw_dbg("I2CCMD Read did not complete\n");
+		return -E1000_ERR_PHY;
+	}
+	if (data_local & E1000_I2CCMD_ERROR) {
+		hw_dbg("I2CCMD Error bit set\n");
+		return -E1000_ERR_PHY;
+	}
+	*data = (u8) data_local & 0xFF;
+
+	return 0;
+}
+
+/**
+ *  igb_read_phy_reg_igp - Read igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to be read
+ *  @data: pointer to the read data
+ *
+ *  Acquires semaphore, if necessary, then reads the PHY register at offset
+ *  and storing the retrieved information in data.  Release any acquired
+ *  semaphores before exiting.
+ **/
+s32 igb_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	s32 ret_val = 0;
+
+	if (!(hw->phy.ops.acquire))
+		goto out;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG) {
+		ret_val = igb_write_phy_reg_mdic(hw,
+						 IGP01E1000_PHY_PAGE_SELECT,
+						 (u16)offset);
+		if (ret_val) {
+			hw->phy.ops.release(hw);
+			goto out;
+		}
+	}
+
+	ret_val = igb_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_write_phy_reg_igp - Write igp PHY register
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset to write to
+ *  @data: data to write at register offset
+ *
+ *  Acquires semaphore, if necessary, then writes the data to PHY register
+ *  at the offset.  Release any acquired semaphores before exiting.
+ **/
+s32 igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	s32 ret_val = 0;
+
+	if (!(hw->phy.ops.acquire))
+		goto out;
+
+	ret_val = hw->phy.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	if (offset > MAX_PHY_MULTI_PAGE_REG) {
+		ret_val = igb_write_phy_reg_mdic(hw,
+						 IGP01E1000_PHY_PAGE_SELECT,
+						 (u16)offset);
+		if (ret_val) {
+			hw->phy.ops.release(hw);
+			goto out;
+		}
+	}
+
+	ret_val = igb_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
+					 data);
+
+	hw->phy.ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_copper_link_setup_82580 - Setup 82580 PHY for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up Carrier-sense on Transmit and downshift values.
+ **/
+s32 igb_copper_link_setup_82580(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+
+	if (phy->reset_disable) {
+		ret_val = 0;
+		goto out;
+	}
+
+	if (phy->type == e1000_phy_82580) {
+		ret_val = hw->phy.ops.reset(hw);
+		if (ret_val) {
+			hw_dbg("Error resetting the PHY.\n");
+			goto out;
+		}
+	}
+
+	/* Enable CRS on TX. This must be set for half-duplex operation. */
+	ret_val = phy->ops.read_reg(hw, I82580_CFG_REG, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy_data |= I82580_CFG_ASSERT_CRS_ON_TX;
+
+	/* Enable downshift */
+	phy_data |= I82580_CFG_ENABLE_DOWNSHIFT;
+
+	ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data);
+	if (ret_val)
+		goto out;
+
+	/* Set MDI/MDIX mode */
+	ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data);
+	if (ret_val)
+		goto out;
+	phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK;
+	/* Options:
+	 *   0 - Auto (default)
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 */
+	switch (hw->phy.mdix) {
+	case 1:
+		break;
+	case 2:
+		phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX;
+		break;
+	case 0:
+	default:
+		phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX;
+		break;
+	}
+	ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_copper_link_setup_m88 - Setup m88 PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up MDI/MDI-X and polarity for m88 PHY's.  If necessary, transmit clock
+ *  and downshift values are set also.
+ **/
+s32 igb_copper_link_setup_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+
+	if (phy->reset_disable) {
+		ret_val = 0;
+		goto out;
+	}
+
+	/* Enable CRS on TX. This must be set for half-duplex operation. */
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+
+	/* Options:
+	 *   MDI/MDI-X = 0 (default)
+	 *   0 - Auto for all speeds
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+	 */
+	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+	switch (phy->mdix) {
+	case 1:
+		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+		break;
+	case 2:
+		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+		break;
+	case 3:
+		phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+		break;
+	case 0:
+	default:
+		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+		break;
+	}
+
+	/* Options:
+	 *   disable_polarity_correction = 0 (default)
+	 *       Automatic Correction for Reversed Cable Polarity
+	 *   0 - Disabled
+	 *   1 - Enabled
+	 */
+	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+	if (phy->disable_polarity_correction == 1)
+		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+
+	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		goto out;
+
+	if (phy->revision < E1000_REVISION_4) {
+		/* Force TX_CLK in the Extended PHY Specific Control Register
+		 * to 25MHz clock.
+		 */
+		ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+					    &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy_data |= M88E1000_EPSCR_TX_CLK_25;
+
+		if ((phy->revision == E1000_REVISION_2) &&
+		    (phy->id == M88E1111_I_PHY_ID)) {
+			/* 82573L PHY - set the downshift counter to 5x. */
+			phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK;
+			phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X;
+		} else {
+			/* Configure Master and Slave downshift values */
+			phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK |
+				      M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK);
+			phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X |
+				     M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X);
+		}
+		ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL,
+					     phy_data);
+		if (ret_val)
+			goto out;
+	}
+
+	/* Commit the changes. */
+	ret_val = igb_phy_sw_reset(hw);
+	if (ret_val) {
+		hw_dbg("Error committing the PHY changes\n");
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's.
+ *  Also enables and sets the downshift parameters.
+ **/
+s32 igb_copper_link_setup_m88_gen2(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+
+	if (phy->reset_disable)
+		return 0;
+
+	/* Enable CRS on Tx. This must be set for half-duplex operation. */
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Options:
+	 *   MDI/MDI-X = 0 (default)
+	 *   0 - Auto for all speeds
+	 *   1 - MDI mode
+	 *   2 - MDI-X mode
+	 *   3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes)
+	 */
+	phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+
+	switch (phy->mdix) {
+	case 1:
+		phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE;
+		break;
+	case 2:
+		phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE;
+		break;
+	case 3:
+		/* M88E1112 does not support this mode) */
+		if (phy->id != M88E1112_E_PHY_ID) {
+			phy_data |= M88E1000_PSCR_AUTO_X_1000T;
+			break;
+		}
+		fallthrough;
+	case 0:
+	default:
+		phy_data |= M88E1000_PSCR_AUTO_X_MODE;
+		break;
+	}
+
+	/* Options:
+	 *   disable_polarity_correction = 0 (default)
+	 *       Automatic Correction for Reversed Cable Polarity
+	 *   0 - Disabled
+	 *   1 - Enabled
+	 */
+	phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL;
+	if (phy->disable_polarity_correction == 1)
+		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+
+	/* Enable downshift and setting it to X6 */
+	if (phy->id == M88E1543_E_PHY_ID) {
+		phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE;
+		ret_val =
+		    phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+		if (ret_val)
+			return ret_val;
+
+		ret_val = igb_phy_sw_reset(hw);
+		if (ret_val) {
+			hw_dbg("Error committing the PHY changes\n");
+			return ret_val;
+		}
+	}
+
+	phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK;
+	phy_data |= I347AT4_PSCR_DOWNSHIFT_6X;
+	phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE;
+
+	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* Commit the changes. */
+	ret_val = igb_phy_sw_reset(hw);
+	if (ret_val) {
+		hw_dbg("Error committing the PHY changes\n");
+		return ret_val;
+	}
+	ret_val = igb_set_master_slave_mode(hw);
+	if (ret_val)
+		return ret_val;
+
+	return 0;
+}
+
+/**
+ *  igb_copper_link_setup_igp - Setup igp PHY's for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for
+ *  igp PHY's.
+ **/
+s32 igb_copper_link_setup_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	if (phy->reset_disable) {
+		ret_val = 0;
+		goto out;
+	}
+
+	ret_val = phy->ops.reset(hw);
+	if (ret_val) {
+		hw_dbg("Error resetting the PHY.\n");
+		goto out;
+	}
+
+	/* Wait 100ms for MAC to configure PHY from NVM settings, to avoid
+	 * timeout issues when LFS is enabled.
+	 */
+	msleep(100);
+
+	/* The NVM settings will configure LPLU in D3 for
+	 * non-IGP1 PHYs.
+	 */
+	if (phy->type == e1000_phy_igp) {
+		/* disable lplu d3 during driver init */
+		if (phy->ops.set_d3_lplu_state)
+			ret_val = phy->ops.set_d3_lplu_state(hw, false);
+		if (ret_val) {
+			hw_dbg("Error Disabling LPLU D3\n");
+			goto out;
+		}
+	}
+
+	/* disable lplu d0 during driver init */
+	ret_val = phy->ops.set_d0_lplu_state(hw, false);
+	if (ret_val) {
+		hw_dbg("Error Disabling LPLU D0\n");
+		goto out;
+	}
+	/* Configure mdi-mdix settings */
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data);
+	if (ret_val)
+		goto out;
+
+	data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+
+	switch (phy->mdix) {
+	case 1:
+		data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+		break;
+	case 2:
+		data |= IGP01E1000_PSCR_FORCE_MDI_MDIX;
+		break;
+	case 0:
+	default:
+		data |= IGP01E1000_PSCR_AUTO_MDIX;
+		break;
+	}
+	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data);
+	if (ret_val)
+		goto out;
+
+	/* set auto-master slave resolution settings */
+	if (hw->mac.autoneg) {
+		/* when autonegotiation advertisement is only 1000Mbps then we
+		 * should disable SmartSpeed and enable Auto MasterSlave
+		 * resolution as hardware default.
+		 */
+		if (phy->autoneg_advertised == ADVERTISE_1000_FULL) {
+			/* Disable SmartSpeed */
+			ret_val = phy->ops.read_reg(hw,
+						    IGP01E1000_PHY_PORT_CONFIG,
+						    &data);
+			if (ret_val)
+				goto out;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				goto out;
+
+			/* Set auto Master/Slave resolution process */
+			ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data);
+			if (ret_val)
+				goto out;
+
+			data &= ~CR_1000T_MS_ENABLE;
+			ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data);
+			if (ret_val)
+				goto out;
+		}
+
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data);
+		if (ret_val)
+			goto out;
+
+		/* load defaults for future use */
+		phy->original_ms_type = (data & CR_1000T_MS_ENABLE) ?
+			((data & CR_1000T_MS_VALUE) ?
+			e1000_ms_force_master :
+			e1000_ms_force_slave) :
+			e1000_ms_auto;
+
+		switch (phy->ms_type) {
+		case e1000_ms_force_master:
+			data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
+			break;
+		case e1000_ms_force_slave:
+			data |= CR_1000T_MS_ENABLE;
+			data &= ~(CR_1000T_MS_VALUE);
+			break;
+		case e1000_ms_auto:
+			data &= ~CR_1000T_MS_ENABLE;
+			break;
+		default:
+			break;
+		}
+		ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_copper_link_autoneg - Setup/Enable autoneg for copper link
+ *  @hw: pointer to the HW structure
+ *
+ *  Performs initial bounds checking on autoneg advertisement parameter, then
+ *  configure to advertise the full capability.  Setup the PHY to autoneg
+ *  and restart the negotiation process between the link partner.  If
+ *  autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ **/
+static s32 igb_copper_link_autoneg(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_ctrl;
+
+	/* Perform some bounds checking on the autoneg advertisement
+	 * parameter.
+	 */
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* If autoneg_advertised is zero, we assume it was not defaulted
+	 * by the calling code so we set to advertise full capability.
+	 */
+	if (phy->autoneg_advertised == 0)
+		phy->autoneg_advertised = phy->autoneg_mask;
+
+	hw_dbg("Reconfiguring auto-neg advertisement params\n");
+	ret_val = igb_phy_setup_autoneg(hw);
+	if (ret_val) {
+		hw_dbg("Error Setting up Auto-Negotiation\n");
+		goto out;
+	}
+	hw_dbg("Restarting Auto-Neg\n");
+
+	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
+	 * the Auto Neg Restart bit in the PHY control register.
+	 */
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	/* Does the user want to wait for Auto-Neg to complete here, or
+	 * check at a later time (for example, callback routine).
+	 */
+	if (phy->autoneg_wait_to_complete) {
+		ret_val = igb_wait_autoneg(hw);
+		if (ret_val) {
+			hw_dbg("Error while waiting for autoneg to complete\n");
+			goto out;
+		}
+	}
+
+	hw->mac.get_link_status = true;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_setup_autoneg - Configure PHY for auto-negotiation
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the MII auto-neg advertisement register and/or the 1000T control
+ *  register and if the PHY is already setup for auto-negotiation, then
+ *  return successful.  Otherwise, setup advertisement and flow control to
+ *  the appropriate values for the wanted auto-negotiation.
+ **/
+static s32 igb_phy_setup_autoneg(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 mii_autoneg_adv_reg;
+	u16 mii_1000t_ctrl_reg = 0;
+
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
+	ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+	if (ret_val)
+		goto out;
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+		/* Read the MII 1000Base-T Control Register (Address 9). */
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
+					    &mii_1000t_ctrl_reg);
+		if (ret_val)
+			goto out;
+	}
+
+	/* Need to parse both autoneg_advertised and fc and set up
+	 * the appropriate PHY registers.  First we will parse for
+	 * autoneg_advertised software override.  Since we can advertise
+	 * a plethora of combinations, we need to check each bit
+	 * individually.
+	 */
+
+	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
+	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
+	 * the  1000Base-T Control Register (Address 9).
+	 */
+	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
+				 NWAY_AR_100TX_HD_CAPS |
+				 NWAY_AR_10T_FD_CAPS   |
+				 NWAY_AR_10T_HD_CAPS);
+	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
+
+	hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+	/* Do we want to advertise 10 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+		hw_dbg("Advertise 10mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+	}
+
+	/* Do we want to advertise 10 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+		hw_dbg("Advertise 10mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+		hw_dbg("Advertise 100mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+		hw_dbg("Advertise 100mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+		hw_dbg("Advertise 1000mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 1000 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+		hw_dbg("Advertise 1000mb Full duplex\n");
+		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+	}
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the PHY advertisement registers accordingly.  If
+	 * auto-negotiation is enabled, then software will have to set the
+	 * "PAUSE" bits to the correct value in the Auto-Negotiation
+	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
+	 * negotiation.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not support receiving pause frames).
+	 *      3:  Both Rx and TX flow control (symmetric) are enabled.
+	 *  other:  No software override.  The flow control configuration
+	 *          in the EEPROM is used.
+	 */
+	switch (hw->fc.current_mode) {
+	case e1000_fc_none:
+		/* Flow control (RX & TX) is completely disabled by a
+		 * software over-ride.
+		 */
+		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case e1000_fc_rx_pause:
+		/* RX Flow control is enabled, and TX Flow control is
+		 * disabled, by a software over-ride.
+		 *
+		 * Since there really isn't a way to advertise that we are
+		 * capable of RX Pause ONLY, we will advertise that we
+		 * support both symmetric and asymmetric RX PAUSE.  Later
+		 * (in e1000_config_fc_after_link_up) we will disable the
+		 * hw's ability to send PAUSE frames.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case e1000_fc_tx_pause:
+		/* TX Flow control is enabled, and RX Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+		break;
+	case e1000_fc_full:
+		/* Flow control (both RX and TX) is enabled by a software
+		 * over-ride.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+	if (ret_val)
+		goto out;
+
+	hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+		ret_val = phy->ops.write_reg(hw,
+					     PHY_1000T_CTRL,
+					     mii_1000t_ctrl_reg);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_setup_copper_link - Configure copper link settings
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the appropriate function to configure the link for auto-neg or forced
+ *  speed and duplex.  Then we check for link, once link is established calls
+ *  to configure collision distance and flow control are called.  If link is
+ *  not established, we return -E1000_ERR_PHY (-2).
+ **/
+s32 igb_setup_copper_link(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	bool link;
+
+	if (hw->mac.autoneg) {
+		/* Setup autoneg and flow control advertisement and perform
+		 * autonegotiation.
+		 */
+		ret_val = igb_copper_link_autoneg(hw);
+		if (ret_val)
+			goto out;
+	} else {
+		/* PHY will be set to 10H, 10F, 100H or 100F
+		 * depending on user settings.
+		 */
+		hw_dbg("Forcing Speed and Duplex\n");
+		ret_val = hw->phy.ops.force_speed_duplex(hw);
+		if (ret_val) {
+			hw_dbg("Error Forcing Speed and Duplex\n");
+			goto out;
+		}
+	}
+
+	/* Check link status. Wait up to 100 microseconds for link to become
+	 * valid.
+	 */
+	ret_val = igb_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link);
+	if (ret_val)
+		goto out;
+
+	if (link) {
+		hw_dbg("Valid link established!!!\n");
+		igb_config_collision_dist(hw);
+		ret_val = igb_config_fc_after_link_up(hw);
+	} else {
+		hw_dbg("Unable to establish link!!!\n");
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.  Clears the
+ *  auto-crossover to force MDI manually.  Waits for link and returns
+ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
+ **/
+s32 igb_phy_force_speed_duplex_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	igb_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+	if (ret_val)
+		goto out;
+
+	/* Clear Auto-Crossover to force MDI manually.  IGP requires MDI
+	 * forced whenever speed and duplex are forced.
+	 */
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX;
+	phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX;
+
+	ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data);
+	if (ret_val)
+		goto out;
+
+	hw_dbg("IGP PSCR: %X\n", phy_data);
+
+	udelay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		hw_dbg("Waiting for forced speed/duplex link on IGP phy.\n");
+
+		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 10000, &link);
+		if (ret_val)
+			goto out;
+
+		if (!link)
+			hw_dbg("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 10000, &link);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.  Clears the
+ *  auto-crossover to force MDI manually.  Resets the PHY to commit the
+ *  changes.  If time expires while waiting for link up, we reset the DSP.
+ *  After reset, TX_CLK and CRS on TX must be set.  Return successful upon
+ *  successful completion, else return corresponding error code.
+ **/
+s32 igb_phy_force_speed_duplex_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	/* I210 and I211 devices support Auto-Crossover in forced operation. */
+	if (phy->type != e1000_phy_i210) {
+		/* Clear Auto-Crossover to force MDI manually.  M88E1000
+		 * requires MDI forced whenever speed and duplex are forced.
+		 */
+		ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL,
+					    &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy_data &= ~M88E1000_PSCR_AUTO_X_MODE;
+		ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL,
+					     phy_data);
+		if (ret_val)
+			goto out;
+
+		hw_dbg("M88E1000 PSCR: %X\n", phy_data);
+	}
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	igb_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+	if (ret_val)
+		goto out;
+
+	/* Reset the phy to commit changes. */
+	ret_val = igb_phy_sw_reset(hw);
+	if (ret_val)
+		goto out;
+
+	if (phy->autoneg_wait_to_complete) {
+		hw_dbg("Waiting for forced speed/duplex link on M88 phy.\n");
+
+		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link);
+		if (ret_val)
+			goto out;
+
+		if (!link) {
+			bool reset_dsp = true;
+
+			switch (hw->phy.id) {
+			case I347AT4_E_PHY_ID:
+			case M88E1112_E_PHY_ID:
+			case M88E1543_E_PHY_ID:
+			case M88E1512_E_PHY_ID:
+			case I210_I_PHY_ID:
+				reset_dsp = false;
+				break;
+			default:
+				if (hw->phy.type != e1000_phy_m88)
+					reset_dsp = false;
+				break;
+			}
+			if (!reset_dsp) {
+				hw_dbg("Link taking longer than expected.\n");
+			} else {
+				/* We didn't get link.
+				 * Reset the DSP and cross our fingers.
+				 */
+				ret_val = phy->ops.write_reg(hw,
+						M88E1000_PHY_PAGE_SELECT,
+						0x001d);
+				if (ret_val)
+					goto out;
+				ret_val = igb_phy_reset_dsp(hw);
+				if (ret_val)
+					goto out;
+			}
+		}
+
+		/* Try once more */
+		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT,
+					   100000, &link);
+		if (ret_val)
+			goto out;
+	}
+
+	if (hw->phy.type != e1000_phy_m88 ||
+	    hw->phy.id == I347AT4_E_PHY_ID ||
+	    hw->phy.id == M88E1112_E_PHY_ID ||
+	    hw->phy.id == M88E1543_E_PHY_ID ||
+	    hw->phy.id == M88E1512_E_PHY_ID ||
+	    hw->phy.id == I210_I_PHY_ID)
+		goto out;
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	/* Resetting the phy means we need to re-force TX_CLK in the
+	 * Extended PHY Specific Control Register to 25MHz clock from
+	 * the reset value of 2.5MHz.
+	 */
+	phy_data |= M88E1000_EPSCR_TX_CLK_25;
+	ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data);
+	if (ret_val)
+		goto out;
+
+	/* In addition, we must re-enable CRS on Tx for both half and full
+	 * duplex.
+	 */
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX;
+	ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex
+ *  @hw: pointer to the HW structure
+ *  @phy_ctrl: pointer to current value of PHY_CONTROL
+ *
+ *  Forces speed and duplex on the PHY by doing the following: disable flow
+ *  control, force speed/duplex on the MAC, disable auto speed detection,
+ *  disable auto-negotiation, configure duplex, configure speed, configure
+ *  the collision distance, write configuration to CTRL register.  The
+ *  caller must write to the PHY_CONTROL register for these settings to
+ *  take affect.
+ **/
+static void igb_phy_force_speed_duplex_setup(struct e1000_hw *hw,
+					     u16 *phy_ctrl)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+	u32 ctrl;
+
+	/* Turn off flow control when forcing speed/duplex */
+	hw->fc.current_mode = e1000_fc_none;
+
+	/* Force speed/duplex on the mac */
+	ctrl = rd32(E1000_CTRL);
+	ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX);
+	ctrl &= ~E1000_CTRL_SPD_SEL;
+
+	/* Disable Auto Speed Detection */
+	ctrl &= ~E1000_CTRL_ASDE;
+
+	/* Disable autoneg on the phy */
+	*phy_ctrl &= ~MII_CR_AUTO_NEG_EN;
+
+	/* Forcing Full or Half Duplex? */
+	if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) {
+		ctrl &= ~E1000_CTRL_FD;
+		*phy_ctrl &= ~MII_CR_FULL_DUPLEX;
+		hw_dbg("Half Duplex\n");
+	} else {
+		ctrl |= E1000_CTRL_FD;
+		*phy_ctrl |= MII_CR_FULL_DUPLEX;
+		hw_dbg("Full Duplex\n");
+	}
+
+	/* Forcing 10mb or 100mb? */
+	if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) {
+		ctrl |= E1000_CTRL_SPD_100;
+		*phy_ctrl |= MII_CR_SPEED_100;
+		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_10);
+		hw_dbg("Forcing 100mb\n");
+	} else {
+		ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100);
+		*phy_ctrl |= MII_CR_SPEED_10;
+		*phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100);
+		hw_dbg("Forcing 10mb\n");
+	}
+
+	igb_config_collision_dist(hw);
+
+	wr32(E1000_CTRL, ctrl);
+}
+
+/**
+ *  igb_set_d3_lplu_state - Sets low power link up state for D3
+ *  @hw: pointer to the HW structure
+ *  @active: boolean used to enable/disable lplu
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  The low power link up (lplu) state is set to the power management level D3
+ *  and SmartSpeed is disabled when active is true, else clear lplu for D3
+ *  and enable Smartspeed.  LPLU and Smartspeed are mutually exclusive.  LPLU
+ *  is used during Dx states where the power conservation is most important.
+ *  During driver activity, SmartSpeed should be enabled so performance is
+ *  maintained.
+ **/
+s32 igb_set_d3_lplu_state(struct e1000_hw *hw, bool active)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 data;
+
+	if (!(hw->phy.ops.read_reg))
+		goto out;
+
+	ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data);
+	if (ret_val)
+		goto out;
+
+	if (!active) {
+		data &= ~IGP02E1000_PM_D3_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+					     data);
+		if (ret_val)
+			goto out;
+		/* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
+		 * during Dx states where the power conservation is most
+		 * important.  During driver activity we should enable
+		 * SmartSpeed, so performance is maintained.
+		 */
+		if (phy->smart_speed == e1000_smart_speed_on) {
+			ret_val = phy->ops.read_reg(hw,
+						    IGP01E1000_PHY_PORT_CONFIG,
+						    &data);
+			if (ret_val)
+				goto out;
+
+			data |= IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				goto out;
+		} else if (phy->smart_speed == e1000_smart_speed_off) {
+			ret_val = phy->ops.read_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     &data);
+			if (ret_val)
+				goto out;
+
+			data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+			ret_val = phy->ops.write_reg(hw,
+						     IGP01E1000_PHY_PORT_CONFIG,
+						     data);
+			if (ret_val)
+				goto out;
+		}
+	} else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) ||
+		   (phy->autoneg_advertised == E1000_ALL_NOT_GIG) ||
+		   (phy->autoneg_advertised == E1000_ALL_10_SPEED)) {
+		data |= IGP02E1000_PM_D3_LPLU;
+		ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT,
+					      data);
+		if (ret_val)
+			goto out;
+
+		/* When LPLU is enabled, we should disable SmartSpeed */
+		ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					    &data);
+		if (ret_val)
+			goto out;
+
+		data &= ~IGP01E1000_PSCFR_SMART_SPEED;
+		ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG,
+					     data);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_check_downshift - Checks whether a downshift in speed occurred
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns 1
+ *
+ *  A downshift is detected by querying the PHY link health.
+ **/
+s32 igb_check_downshift(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, offset, mask;
+
+	switch (phy->type) {
+	case e1000_phy_i210:
+	case e1000_phy_m88:
+	case e1000_phy_gg82563:
+		offset	= M88E1000_PHY_SPEC_STATUS;
+		mask	= M88E1000_PSSR_DOWNSHIFT;
+		break;
+	case e1000_phy_igp_2:
+	case e1000_phy_igp:
+	case e1000_phy_igp_3:
+		offset	= IGP01E1000_PHY_LINK_HEALTH;
+		mask	= IGP01E1000_PLHR_SS_DOWNGRADE;
+		break;
+	default:
+		/* speed downshift not supported */
+		phy->speed_downgraded = false;
+		ret_val = 0;
+		goto out;
+	}
+
+	ret_val = phy->ops.read_reg(hw, offset, &phy_data);
+
+	if (!ret_val)
+		phy->speed_downgraded = (phy_data & mask) ? true : false;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_check_polarity_m88 - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY specific status register.
+ **/
+s32 igb_check_polarity_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = (data & M88E1000_PSSR_REV_POLARITY)
+				      ? e1000_rev_polarity_reversed
+				      : e1000_rev_polarity_normal;
+
+	return ret_val;
+}
+
+/**
+ *  igb_check_polarity_igp - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY port status register, and the
+ *  current speed (since there is no polarity at 100Mbps).
+ **/
+static s32 igb_check_polarity_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data, offset, mask;
+
+	/* Polarity is determined based on the speed of
+	 * our connection.
+	 */
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+	if (ret_val)
+		goto out;
+
+	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+		offset	= IGP01E1000_PHY_PCS_INIT_REG;
+		mask	= IGP01E1000_PHY_POLARITY_MASK;
+	} else {
+		/* This really only applies to 10Mbps since
+		 * there is no polarity for 100Mbps (always 0).
+		 */
+		offset	= IGP01E1000_PHY_PORT_STATUS;
+		mask	= IGP01E1000_PSSR_POLARITY_REVERSED;
+	}
+
+	ret_val = phy->ops.read_reg(hw, offset, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = (data & mask)
+				      ? e1000_rev_polarity_reversed
+				      : e1000_rev_polarity_normal;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_wait_autoneg - Wait for auto-neg completion
+ *  @hw: pointer to the HW structure
+ *
+ *  Waits for auto-negotiation to complete or for the auto-negotiation time
+ *  limit to expire, which ever happens first.
+ **/
+static s32 igb_wait_autoneg(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 i, phy_status;
+
+	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_AUTONEG_COMPLETE)
+			break;
+		msleep(100);
+	}
+
+	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+	 * has completed.
+	 */
+	return ret_val;
+}
+
+/**
+ *  igb_phy_has_link - Polls PHY for link
+ *  @hw: pointer to the HW structure
+ *  @iterations: number of times to poll for link
+ *  @usec_interval: delay between polling attempts
+ *  @success: pointer to whether polling was successful or not
+ *
+ *  Polls the PHY status register for link, 'iterations' number of times.
+ **/
+s32 igb_phy_has_link(struct e1000_hw *hw, u32 iterations,
+		     u32 usec_interval, bool *success)
+{
+	s32 ret_val = 0;
+	u16 i, phy_status;
+
+	for (i = 0; i < iterations; i++) {
+		/* Some PHYs require the PHY_STATUS register to be read
+		 * twice due to the link bit being sticky.  No harm doing
+		 * it across the board.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val && usec_interval > 0) {
+			/* If the first read fails, another entity may have
+			 * ownership of the resources, wait and try again to
+			 * see if they have relinquished the resources yet.
+			 */
+			if (usec_interval >= 1000)
+				mdelay(usec_interval/1000);
+			else
+				udelay(usec_interval);
+		}
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_LINK_STATUS)
+			break;
+		if (usec_interval >= 1000)
+			mdelay(usec_interval/1000);
+		else
+			udelay(usec_interval);
+	}
+
+	*success = (i < iterations) ? true : false;
+
+	return ret_val;
+}
+
+/**
+ *  igb_get_cable_length_m88 - Determine cable length for m88 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Reads the PHY specific status register to retrieve the cable length
+ *  information.  The cable length is determined by averaging the minimum and
+ *  maximum values to get the "average" cable length.  The m88 PHY has four
+ *  possible cable length values, which are:
+ *	Register Value		Cable Length
+ *	0			< 50 meters
+ *	1			50 - 80 meters
+ *	2			80 - 110 meters
+ *	3			110 - 140 meters
+ *	4			> 140 meters
+ **/
+s32 igb_get_cable_length_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, index;
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+	if (ret_val)
+		goto out;
+
+	index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+		M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+	if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+
+	phy->min_cable_length = e1000_m88_cable_length_table[index];
+	phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
+
+	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+out:
+	return ret_val;
+}
+
+s32 igb_get_cable_length_m88_gen2(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, phy_data2, index, default_page, is_cm;
+	int len_tot = 0;
+	u16 len_min;
+	u16 len_max;
+
+	switch (hw->phy.id) {
+	case M88E1543_E_PHY_ID:
+	case M88E1512_E_PHY_ID:
+	case I347AT4_E_PHY_ID:
+	case I210_I_PHY_ID:
+		/* Remember the original page select and set it to 7 */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
+					    &default_page);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07);
+		if (ret_val)
+			goto out;
+
+		/* Check if the unit of cable length is meters or cm */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2);
+		if (ret_val)
+			goto out;
+
+		is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT);
+
+		/* Get cable length from Pair 0 length Regs */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PCDL0, &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy->pair_length[0] = phy_data / (is_cm ? 100 : 1);
+		len_tot = phy->pair_length[0];
+		len_min = phy->pair_length[0];
+		len_max = phy->pair_length[0];
+
+		/* Get cable length from Pair 1 length Regs */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PCDL1, &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy->pair_length[1] = phy_data / (is_cm ? 100 : 1);
+		len_tot += phy->pair_length[1];
+		len_min = min(len_min, phy->pair_length[1]);
+		len_max = max(len_max, phy->pair_length[1]);
+
+		/* Get cable length from Pair 2 length Regs */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PCDL2, &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy->pair_length[2] = phy_data / (is_cm ? 100 : 1);
+		len_tot += phy->pair_length[2];
+		len_min = min(len_min, phy->pair_length[2]);
+		len_max = max(len_max, phy->pair_length[2]);
+
+		/* Get cable length from Pair 3 length Regs */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PCDL3, &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy->pair_length[3] = phy_data / (is_cm ? 100 : 1);
+		len_tot += phy->pair_length[3];
+		len_min = min(len_min, phy->pair_length[3]);
+		len_max = max(len_max, phy->pair_length[3]);
+
+		/* Populate the phy structure with cable length in meters */
+		phy->min_cable_length = len_min;
+		phy->max_cable_length = len_max;
+		phy->cable_length = len_tot / 4;
+
+		/* Reset the page selec to its original value */
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
+					     default_page);
+		if (ret_val)
+			goto out;
+		break;
+	case M88E1112_E_PHY_ID:
+		/* Remember the original page select and set it to 5 */
+		ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT,
+					    &default_page);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE,
+					    &phy_data);
+		if (ret_val)
+			goto out;
+
+		index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >>
+			M88E1000_PSSR_CABLE_LENGTH_SHIFT;
+		if (index >= ARRAY_SIZE(e1000_m88_cable_length_table) - 1) {
+			ret_val = -E1000_ERR_PHY;
+			goto out;
+		}
+
+		phy->min_cable_length = e1000_m88_cable_length_table[index];
+		phy->max_cable_length = e1000_m88_cable_length_table[index + 1];
+
+		phy->cable_length = (phy->min_cable_length +
+				     phy->max_cable_length) / 2;
+
+		/* Reset the page select to its original value */
+		ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT,
+					     default_page);
+		if (ret_val)
+			goto out;
+
+		break;
+	default:
+		ret_val = -E1000_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_cable_length_igp_2 - Determine cable length for igp2 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  The automatic gain control (agc) normalizes the amplitude of the
+ *  received signal, adjusting for the attenuation produced by the
+ *  cable.  By reading the AGC registers, which represent the
+ *  combination of coarse and fine gain value, the value can be put
+ *  into a lookup table to obtain the approximate cable length
+ *  for each channel.
+ **/
+s32 igb_get_cable_length_igp_2(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_data, i, agc_value = 0;
+	u16 cur_agc_index, max_agc_index = 0;
+	u16 min_agc_index = ARRAY_SIZE(e1000_igp_2_cable_length_table) - 1;
+	static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = {
+		IGP02E1000_PHY_AGC_A,
+		IGP02E1000_PHY_AGC_B,
+		IGP02E1000_PHY_AGC_C,
+		IGP02E1000_PHY_AGC_D
+	};
+
+	/* Read the AGC registers for all channels */
+	for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
+		ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data);
+		if (ret_val)
+			goto out;
+
+		/* Getting bits 15:9, which represent the combination of
+		 * coarse and fine gain values.  The result is a number
+		 * that can be put into the lookup table to obtain the
+		 * approximate cable length.
+		 */
+		cur_agc_index = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
+				IGP02E1000_AGC_LENGTH_MASK;
+
+		/* Array index bound check. */
+		if ((cur_agc_index >= ARRAY_SIZE(e1000_igp_2_cable_length_table)) ||
+		    (cur_agc_index == 0)) {
+			ret_val = -E1000_ERR_PHY;
+			goto out;
+		}
+
+		/* Remove min & max AGC values from calculation. */
+		if (e1000_igp_2_cable_length_table[min_agc_index] >
+		    e1000_igp_2_cable_length_table[cur_agc_index])
+			min_agc_index = cur_agc_index;
+		if (e1000_igp_2_cable_length_table[max_agc_index] <
+		    e1000_igp_2_cable_length_table[cur_agc_index])
+			max_agc_index = cur_agc_index;
+
+		agc_value += e1000_igp_2_cable_length_table[cur_agc_index];
+	}
+
+	agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] +
+		      e1000_igp_2_cable_length_table[max_agc_index]);
+	agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
+
+	/* Calculate cable length with the error range of +/- 10 meters. */
+	phy->min_cable_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
+				 (agc_value - IGP02E1000_AGC_RANGE) : 0;
+	phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE;
+
+	phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_phy_info_m88 - Retrieve PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Valid for only copper links.  Read the PHY status register (sticky read)
+ *  to verify that link is up.  Read the PHY special control register to
+ *  determine the polarity and 10base-T extended distance.  Read the PHY
+ *  special status register to determine MDI/MDIx and current speed.  If
+ *  speed is 1000, then determine cable length, local and remote receiver.
+ **/
+s32 igb_get_phy_info_m88(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32  ret_val;
+	u16 phy_data;
+	bool link;
+
+	if (phy->media_type != e1000_media_type_copper) {
+		hw_dbg("Phy info is only valid for copper media\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	ret_val = igb_phy_has_link(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (!link) {
+		hw_dbg("Phy info is only valid if link is up\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy->polarity_correction = (phy_data & M88E1000_PSCR_POLARITY_REVERSAL)
+				   ? true : false;
+
+	ret_val = igb_check_polarity_m88(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy->is_mdix = (phy_data & M88E1000_PSSR_MDIX) ? true : false;
+
+	if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) {
+		ret_val = phy->ops.get_cable_length(hw);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data);
+		if (ret_val)
+			goto out;
+
+		phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS)
+				? e1000_1000t_rx_status_ok
+				: e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS)
+				 ? e1000_1000t_rx_status_ok
+				 : e1000_1000t_rx_status_not_ok;
+	} else {
+		/* Set values to "undefined" */
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_phy_info_igp - Retrieve igp PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Read PHY status to determine if link is up.  If link is up, then
+ *  set/determine 10base-T extended distance and polarity correction.  Read
+ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
+ *  determine on the cable length, local and remote receiver.
+ **/
+s32 igb_get_phy_info_igp(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	ret_val = igb_phy_has_link(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (!link) {
+		hw_dbg("Phy info is only valid if link is up\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	phy->polarity_correction = true;
+
+	ret_val = igb_check_polarity_igp(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data);
+	if (ret_val)
+		goto out;
+
+	phy->is_mdix = (data & IGP01E1000_PSSR_MDIX) ? true : false;
+
+	if ((data & IGP01E1000_PSSR_SPEED_MASK) ==
+	    IGP01E1000_PSSR_SPEED_1000MBPS) {
+		ret_val = phy->ops.get_cable_length(hw);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
+		if (ret_val)
+			goto out;
+
+		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
+				? e1000_1000t_rx_status_ok
+				: e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
+				 ? e1000_1000t_rx_status_ok
+				 : e1000_1000t_rx_status_not_ok;
+	} else {
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_sw_reset - PHY software reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Does a software reset of the PHY by reading the PHY control register and
+ *  setting/write the control register reset bit to the PHY.
+ **/
+s32 igb_phy_sw_reset(struct e1000_hw *hw)
+{
+	s32 ret_val = 0;
+	u16 phy_ctrl;
+
+	if (!(hw->phy.ops.read_reg))
+		goto out;
+
+	ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	phy_ctrl |= MII_CR_RESET;
+	ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	udelay(1);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_hw_reset - PHY hardware reset
+ *  @hw: pointer to the HW structure
+ *
+ *  Verify the reset block is not blocking us from resetting.  Acquire
+ *  semaphore (if necessary) and read/set/write the device control reset
+ *  bit in the PHY.  Wait the appropriate delay time for the device to
+ *  reset and release the semaphore (if necessary).
+ **/
+s32 igb_phy_hw_reset(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32  ret_val;
+	u32 ctrl;
+
+	ret_val = igb_check_reset_block(hw);
+	if (ret_val) {
+		ret_val = 0;
+		goto out;
+	}
+
+	ret_val = phy->ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	ctrl = rd32(E1000_CTRL);
+	wr32(E1000_CTRL, ctrl | E1000_CTRL_PHY_RST);
+	wrfl();
+
+	udelay(phy->reset_delay_us);
+
+	wr32(E1000_CTRL, ctrl);
+	wrfl();
+
+	udelay(150);
+
+	phy->ops.release(hw);
+
+	ret_val = phy->ops.get_cfg_done(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_phy_init_script_igp3 - Inits the IGP3 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes a Intel Gigabit PHY3 when an EEPROM is not present.
+ **/
+s32 igb_phy_init_script_igp3(struct e1000_hw *hw)
+{
+	hw_dbg("Running IGP 3 PHY init script\n");
+
+	/* PHY init IGP 3 */
+	/* Enable rise/fall, 10-mode work in class-A */
+	hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018);
+	/* Remove all caps from Replica path filter */
+	hw->phy.ops.write_reg(hw, 0x2F52, 0x0000);
+	/* Bias trimming for ADC, AFE and Driver (Default) */
+	hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24);
+	/* Increase Hybrid poly bias */
+	hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0);
+	/* Add 4% to TX amplitude in Giga mode */
+	hw->phy.ops.write_reg(hw, 0x2010, 0x10B0);
+	/* Disable trimming (TTT) */
+	hw->phy.ops.write_reg(hw, 0x2011, 0x0000);
+	/* Poly DC correction to 94.6% + 2% for all channels */
+	hw->phy.ops.write_reg(hw, 0x20DD, 0x249A);
+	/* ABS DC correction to 95.9% */
+	hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3);
+	/* BG temp curve trim */
+	hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE);
+	/* Increasing ADC OPAMP stage 1 currents to max */
+	hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4);
+	/* Force 1000 ( required for enabling PHY regs configuration) */
+	hw->phy.ops.write_reg(hw, 0x0000, 0x0140);
+	/* Set upd_freq to 6 */
+	hw->phy.ops.write_reg(hw, 0x1F30, 0x1606);
+	/* Disable NPDFE */
+	hw->phy.ops.write_reg(hw, 0x1F31, 0xB814);
+	/* Disable adaptive fixed FFE (Default) */
+	hw->phy.ops.write_reg(hw, 0x1F35, 0x002A);
+	/* Enable FFE hysteresis */
+	hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067);
+	/* Fixed FFE for short cable lengths */
+	hw->phy.ops.write_reg(hw, 0x1F54, 0x0065);
+	/* Fixed FFE for medium cable lengths */
+	hw->phy.ops.write_reg(hw, 0x1F55, 0x002A);
+	/* Fixed FFE for long cable lengths */
+	hw->phy.ops.write_reg(hw, 0x1F56, 0x002A);
+	/* Enable Adaptive Clip Threshold */
+	hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0);
+	/* AHT reset limit to 1 */
+	hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF);
+	/* Set AHT master delay to 127 msec */
+	hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC);
+	/* Set scan bits for AHT */
+	hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF);
+	/* Set AHT Preset bits */
+	hw->phy.ops.write_reg(hw, 0x1F79, 0x0210);
+	/* Change integ_factor of channel A to 3 */
+	hw->phy.ops.write_reg(hw, 0x1895, 0x0003);
+	/* Change prop_factor of channels BCD to 8 */
+	hw->phy.ops.write_reg(hw, 0x1796, 0x0008);
+	/* Change cg_icount + enable integbp for channels BCD */
+	hw->phy.ops.write_reg(hw, 0x1798, 0xD008);
+	/* Change cg_icount + enable integbp + change prop_factor_master
+	 * to 8 for channel A
+	 */
+	hw->phy.ops.write_reg(hw, 0x1898, 0xD918);
+	/* Disable AHT in Slave mode on channel A */
+	hw->phy.ops.write_reg(hw, 0x187A, 0x0800);
+	/* Enable LPLU and disable AN to 1000 in non-D0a states,
+	 * Enable SPD+B2B
+	 */
+	hw->phy.ops.write_reg(hw, 0x0019, 0x008D);
+	/* Enable restart AN on an1000_dis change */
+	hw->phy.ops.write_reg(hw, 0x001B, 0x2080);
+	/* Enable wh_fifo read clock in 10/100 modes */
+	hw->phy.ops.write_reg(hw, 0x0014, 0x0045);
+	/* Restart AN, Speed selection is 1000 */
+	hw->phy.ops.write_reg(hw, 0x0000, 0x1340);
+
+	return 0;
+}
+
+/**
+ *  igb_initialize_M88E1512_phy - Initialize M88E1512 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize Marvel 1512 to work correctly with Avoton.
+ **/
+s32 igb_initialize_M88E1512_phy(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+
+	/* Switch to PHY page 0xFF. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xCC0C);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 0xFB. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0x000D);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 0x12. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12);
+	if (ret_val)
+		goto out;
+
+	/* Change mode to SGMII-to-Copper */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001);
+	if (ret_val)
+		goto out;
+
+	/* Return the PHY to page 0. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_phy_sw_reset(hw);
+	if (ret_val) {
+		hw_dbg("Error committing the PHY changes\n");
+		return ret_val;
+	}
+
+	/* msec_delay(1000); */
+	usleep_range(1000, 2000);
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_initialize_M88E1543_phy - Initialize M88E1512 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Initialize Marvell 1543 to work correctly with Avoton.
+ **/
+s32 igb_initialize_M88E1543_phy(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+
+	/* Switch to PHY page 0xFF. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FF);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x214B);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2144);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0x0C28);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2146);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xB233);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x214D);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_2, 0xDC0C);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_1, 0x2159);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 0xFB. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x00FB);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_CFG_REG_3, 0x0C0D);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 0x12. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x12);
+	if (ret_val)
+		goto out;
+
+	/* Change mode to SGMII-to-Copper */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1512_MODE, 0x8001);
+	if (ret_val)
+		goto out;
+
+	/* Switch to PHY page 1. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0x1);
+	if (ret_val)
+		goto out;
+
+	/* Change mode to 1000BASE-X/SGMII and autoneg enable */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_FIBER_CTRL, 0x9140);
+	if (ret_val)
+		goto out;
+
+	/* Return the PHY to page 0. */
+	ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0);
+	if (ret_val)
+		goto out;
+
+	ret_val = igb_phy_sw_reset(hw);
+	if (ret_val) {
+		hw_dbg("Error committing the PHY changes\n");
+		return ret_val;
+	}
+
+	/* msec_delay(1000); */
+	usleep_range(1000, 2000);
+out:
+	return ret_val;
+}
+
+/**
+ * igb_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, restore the link to previous settings.
+ **/
+void igb_power_up_phy_copper(struct e1000_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg &= ~MII_CR_POWER_DOWN;
+	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+}
+
+/**
+ * igb_power_down_phy_copper - Power down copper PHY
+ * @hw: pointer to the HW structure
+ *
+ * Power down PHY to save power when interface is down and wake on lan
+ * is not enabled.
+ **/
+void igb_power_down_phy_copper(struct e1000_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg |= MII_CR_POWER_DOWN;
+	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+	usleep_range(1000, 2000);
+}
+
+/**
+ *  igb_check_polarity_82580 - Checks the polarity.
+ *  @hw: pointer to the HW structure
+ *
+ *  Success returns 0, Failure returns -E1000_ERR_PHY (-2)
+ *
+ *  Polarity is determined based on the PHY specific status register.
+ **/
+static s32 igb_check_polarity_82580(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+
+
+	ret_val = phy->ops.read_reg(hw, I82580_PHY_STATUS_2, &data);
+
+	if (!ret_val)
+		phy->cable_polarity = (data & I82580_PHY_STATUS2_REV_POLARITY)
+				      ? e1000_rev_polarity_reversed
+				      : e1000_rev_polarity_normal;
+
+	return ret_val;
+}
+
+/**
+ *  igb_phy_force_speed_duplex_82580 - Force speed/duplex for I82580 PHY
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls the PHY setup function to force speed and duplex.  Clears the
+ *  auto-crossover to force MDI manually.  Waits for link and returns
+ *  successful if link up is successful, else -E1000_ERR_PHY (-2).
+ **/
+s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data;
+	bool link;
+
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data);
+	if (ret_val)
+		goto out;
+
+	igb_phy_force_speed_duplex_setup(hw, &phy_data);
+
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data);
+	if (ret_val)
+		goto out;
+
+	/* Clear Auto-Crossover to force MDI manually.  82580 requires MDI
+	 * forced whenever speed and duplex are forced.
+	 */
+	ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data);
+	if (ret_val)
+		goto out;
+
+	phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK;
+
+	ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data);
+	if (ret_val)
+		goto out;
+
+	hw_dbg("I82580_PHY_CTRL_2: %X\n", phy_data);
+
+	udelay(1);
+
+	if (phy->autoneg_wait_to_complete) {
+		hw_dbg("Waiting for forced speed/duplex link on 82580 phy\n");
+
+		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link);
+		if (ret_val)
+			goto out;
+
+		if (!link)
+			hw_dbg("Link taking longer than expected.\n");
+
+		/* Try once more */
+		ret_val = igb_phy_has_link(hw, PHY_FORCE_LIMIT, 100000, &link);
+		if (ret_val)
+			goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_phy_info_82580 - Retrieve I82580 PHY information
+ *  @hw: pointer to the HW structure
+ *
+ *  Read PHY status to determine if link is up.  If link is up, then
+ *  set/determine 10base-T extended distance and polarity correction.  Read
+ *  PHY port status to determine MDI/MDIx and speed.  Based on the speed,
+ *  determine on the cable length, local and remote receiver.
+ **/
+s32 igb_get_phy_info_82580(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 data;
+	bool link;
+
+	ret_val = igb_phy_has_link(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (!link) {
+		hw_dbg("Phy info is only valid if link is up\n");
+		ret_val = -E1000_ERR_CONFIG;
+		goto out;
+	}
+
+	phy->polarity_correction = true;
+
+	ret_val = igb_check_polarity_82580(hw);
+	if (ret_val)
+		goto out;
+
+	ret_val = phy->ops.read_reg(hw, I82580_PHY_STATUS_2, &data);
+	if (ret_val)
+		goto out;
+
+	phy->is_mdix = (data & I82580_PHY_STATUS2_MDIX) ? true : false;
+
+	if ((data & I82580_PHY_STATUS2_SPEED_MASK) ==
+	    I82580_PHY_STATUS2_SPEED_1000MBPS) {
+		ret_val = hw->phy.ops.get_cable_length(hw);
+		if (ret_val)
+			goto out;
+
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data);
+		if (ret_val)
+			goto out;
+
+		phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS)
+				? e1000_1000t_rx_status_ok
+				: e1000_1000t_rx_status_not_ok;
+
+		phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS)
+				 ? e1000_1000t_rx_status_ok
+				 : e1000_1000t_rx_status_not_ok;
+	} else {
+		phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED;
+		phy->local_rx = e1000_1000t_rx_status_undefined;
+		phy->remote_rx = e1000_1000t_rx_status_undefined;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_get_cable_length_82580 - Determine cable length for 82580 PHY
+ *  @hw: pointer to the HW structure
+ *
+ * Reads the diagnostic status register and verifies result is valid before
+ * placing it in the phy_cable_length field.
+ **/
+s32 igb_get_cable_length_82580(struct e1000_hw *hw)
+{
+	struct e1000_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 phy_data, length;
+
+	ret_val = phy->ops.read_reg(hw, I82580_PHY_DIAG_STATUS, &phy_data);
+	if (ret_val)
+		goto out;
+
+	length = (phy_data & I82580_DSTATUS_CABLE_LENGTH) >>
+		 I82580_DSTATUS_CABLE_LENGTH_SHIFT;
+
+	if (length == E1000_CABLE_LENGTH_UNDEFINED)
+		ret_val = -E1000_ERR_PHY;
+
+	phy->cable_length = length;
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igb_set_master_slave_mode - Setup PHY for Master/slave mode
+ *  @hw: pointer to the HW structure
+ *
+ *  Sets up Master/slave mode
+ **/
+static s32 igb_set_master_slave_mode(struct e1000_hw *hw)
+{
+	s32 ret_val;
+	u16 phy_data;
+
+	/* Resolve Master/Slave mode */
+	ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data);
+	if (ret_val)
+		return ret_val;
+
+	/* load defaults for future use */
+	hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ?
+				   ((phy_data & CR_1000T_MS_VALUE) ?
+				    e1000_ms_force_master :
+				    e1000_ms_force_slave) : e1000_ms_auto;
+
+	switch (hw->phy.ms_type) {
+	case e1000_ms_force_master:
+		phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE);
+		break;
+	case e1000_ms_force_slave:
+		phy_data |= CR_1000T_MS_ENABLE;
+		phy_data &= ~(CR_1000T_MS_VALUE);
+		break;
+	case e1000_ms_auto:
+		phy_data &= ~CR_1000T_MS_ENABLE;
+		fallthrough;
+	default:
+		break;
+	}
+
+	return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data);
+}
diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h
new file mode 100644
index 0000000000..5894e4b1d0
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_phy.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_PHY_H_
+#define _E1000_PHY_H_
+
+enum e1000_ms_type {
+	e1000_ms_hw_default = 0,
+	e1000_ms_force_master,
+	e1000_ms_force_slave,
+	e1000_ms_auto
+};
+
+enum e1000_smart_speed {
+	e1000_smart_speed_default = 0,
+	e1000_smart_speed_on,
+	e1000_smart_speed_off
+};
+
+s32  igb_check_downshift(struct e1000_hw *hw);
+s32  igb_check_reset_block(struct e1000_hw *hw);
+s32  igb_copper_link_setup_igp(struct e1000_hw *hw);
+s32  igb_copper_link_setup_m88(struct e1000_hw *hw);
+s32  igb_copper_link_setup_m88_gen2(struct e1000_hw *hw);
+s32  igb_phy_force_speed_duplex_igp(struct e1000_hw *hw);
+s32  igb_phy_force_speed_duplex_m88(struct e1000_hw *hw);
+s32  igb_get_cable_length_m88(struct e1000_hw *hw);
+s32  igb_get_cable_length_m88_gen2(struct e1000_hw *hw);
+s32  igb_get_cable_length_igp_2(struct e1000_hw *hw);
+s32  igb_get_phy_id(struct e1000_hw *hw);
+s32  igb_get_phy_info_igp(struct e1000_hw *hw);
+s32  igb_get_phy_info_m88(struct e1000_hw *hw);
+s32  igb_phy_sw_reset(struct e1000_hw *hw);
+s32  igb_phy_hw_reset(struct e1000_hw *hw);
+s32  igb_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  igb_set_d3_lplu_state(struct e1000_hw *hw, bool active);
+s32  igb_setup_copper_link(struct e1000_hw *hw);
+s32  igb_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data);
+s32  igb_phy_has_link(struct e1000_hw *hw, u32 iterations,
+				u32 usec_interval, bool *success);
+void igb_power_up_phy_copper(struct e1000_hw *hw);
+void igb_power_down_phy_copper(struct e1000_hw *hw);
+s32  igb_phy_init_script_igp3(struct e1000_hw *hw);
+s32  igb_initialize_M88E1512_phy(struct e1000_hw *hw);
+s32  igb_initialize_M88E1543_phy(struct e1000_hw *hw);
+s32  igb_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  igb_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
+s32  igb_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  igb_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data);
+s32  igb_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data);
+s32  igb_copper_link_setup_82580(struct e1000_hw *hw);
+s32  igb_get_phy_info_82580(struct e1000_hw *hw);
+s32  igb_phy_force_speed_duplex_82580(struct e1000_hw *hw);
+s32  igb_get_cable_length_82580(struct e1000_hw *hw);
+s32  igb_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data);
+s32  igb_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data);
+s32  igb_check_polarity_m88(struct e1000_hw *hw);
+
+/* IGP01E1000 Specific Registers */
+#define IGP01E1000_PHY_PORT_CONFIG        0x10 /* Port Config */
+#define IGP01E1000_PHY_PORT_STATUS        0x11 /* Status */
+#define IGP01E1000_PHY_PORT_CTRL          0x12 /* Control */
+#define IGP01E1000_PHY_LINK_HEALTH        0x13 /* PHY Link Health */
+#define IGP02E1000_PHY_POWER_MGMT         0x19 /* Power Management */
+#define IGP01E1000_PHY_PAGE_SELECT        0x1F /* Page Select */
+#define IGP01E1000_PHY_PCS_INIT_REG       0x00B4
+#define IGP01E1000_PHY_POLARITY_MASK      0x0078
+#define IGP01E1000_PSCR_AUTO_MDIX         0x1000
+#define IGP01E1000_PSCR_FORCE_MDI_MDIX    0x2000 /* 0=MDI, 1=MDIX */
+#define IGP01E1000_PSCFR_SMART_SPEED      0x0080
+
+#define I82580_ADDR_REG                   16
+#define I82580_CFG_REG                    22
+#define I82580_CFG_ASSERT_CRS_ON_TX       BIT(15)
+#define I82580_CFG_ENABLE_DOWNSHIFT       (3u << 10) /* auto downshift 100/10 */
+#define I82580_CTRL_REG                   23
+#define I82580_CTRL_DOWNSHIFT_MASK        (7u << 10)
+
+/* 82580 specific PHY registers */
+#define I82580_PHY_CTRL_2            18
+#define I82580_PHY_LBK_CTRL          19
+#define I82580_PHY_STATUS_2          26
+#define I82580_PHY_DIAG_STATUS       31
+
+/* I82580 PHY Status 2 */
+#define I82580_PHY_STATUS2_REV_POLARITY   0x0400
+#define I82580_PHY_STATUS2_MDIX           0x0800
+#define I82580_PHY_STATUS2_SPEED_MASK     0x0300
+#define I82580_PHY_STATUS2_SPEED_1000MBPS 0x0200
+#define I82580_PHY_STATUS2_SPEED_100MBPS  0x0100
+
+/* I82580 PHY Control 2 */
+#define I82580_PHY_CTRL2_MANUAL_MDIX      0x0200
+#define I82580_PHY_CTRL2_AUTO_MDI_MDIX    0x0400
+#define I82580_PHY_CTRL2_MDIX_CFG_MASK    0x0600
+
+/* I82580 PHY Diagnostics Status */
+#define I82580_DSTATUS_CABLE_LENGTH       0x03FC
+#define I82580_DSTATUS_CABLE_LENGTH_SHIFT 2
+
+/* 82580 PHY Power Management */
+#define E1000_82580_PHY_POWER_MGMT	0xE14
+#define E1000_82580_PM_SPD		0x0001 /* Smart Power Down */
+#define E1000_82580_PM_D0_LPLU		0x0002 /* For D0a states */
+#define E1000_82580_PM_D3_LPLU		0x0004 /* For all other states */
+#define E1000_82580_PM_GO_LINKD		0x0020 /* Go Link Disconnect */
+
+/* Enable flexible speed on link-up */
+#define IGP02E1000_PM_D0_LPLU             0x0002 /* For D0a states */
+#define IGP02E1000_PM_D3_LPLU             0x0004 /* For all other states */
+#define IGP01E1000_PLHR_SS_DOWNGRADE      0x8000
+#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002
+#define IGP01E1000_PSSR_MDIX              0x0800
+#define IGP01E1000_PSSR_SPEED_MASK        0xC000
+#define IGP01E1000_PSSR_SPEED_1000MBPS    0xC000
+#define IGP02E1000_PHY_CHANNEL_NUM        4
+#define IGP02E1000_PHY_AGC_A              0x11B1
+#define IGP02E1000_PHY_AGC_B              0x12B1
+#define IGP02E1000_PHY_AGC_C              0x14B1
+#define IGP02E1000_PHY_AGC_D              0x18B1
+#define IGP02E1000_AGC_LENGTH_SHIFT       9   /* Course - 15:13, Fine - 12:9 */
+#define IGP02E1000_AGC_LENGTH_MASK        0x7F
+#define IGP02E1000_AGC_RANGE              15
+
+#define E1000_CABLE_LENGTH_UNDEFINED      0xFF
+
+/* SFP modules ID memory locations */
+#define E1000_SFF_IDENTIFIER_OFFSET	0x00
+#define E1000_SFF_IDENTIFIER_SFF	0x02
+#define E1000_SFF_IDENTIFIER_SFP	0x03
+
+#define E1000_SFF_ETH_FLAGS_OFFSET	0x06
+/* Flags for SFP modules compatible with ETH up to 1Gb */
+struct e1000_sfp_flags {
+	u8 e1000_base_sx:1;
+	u8 e1000_base_lx:1;
+	u8 e1000_base_cx:1;
+	u8 e1000_base_t:1;
+	u8 e100_base_lx:1;
+	u8 e100_base_fx:1;
+	u8 e10_base_bx10:1;
+	u8 e10_base_px:1;
+};
+
+#endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
new file mode 100644
index 0000000000..eb9f6da920
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#ifndef _E1000_REGS_H_
+#define _E1000_REGS_H_
+
+#define E1000_CTRL     0x00000  /* Device Control - RW */
+#define E1000_STATUS   0x00008  /* Device Status - RO */
+#define E1000_EECD     0x00010  /* EEPROM/Flash Control - RW */
+#define E1000_EERD     0x00014  /* EEPROM Read - RW */
+#define E1000_CTRL_EXT 0x00018  /* Extended Device Control - RW */
+#define E1000_MDIC     0x00020  /* MDI Control - RW */
+#define E1000_MDICNFG  0x00E04  /* MDI Config - RW */
+#define E1000_SCTL     0x00024  /* SerDes Control - RW */
+#define E1000_FCAL     0x00028  /* Flow Control Address Low - RW */
+#define E1000_FCAH     0x0002C  /* Flow Control Address High -RW */
+#define E1000_FCT      0x00030  /* Flow Control Type - RW */
+#define E1000_CONNSW   0x00034  /* Copper/Fiber switch control - RW */
+#define E1000_VET      0x00038  /* VLAN Ether Type - RW */
+#define E1000_TSSDP    0x0003C  /* Time Sync SDP Configuration Register - RW */
+#define E1000_ICR      0x000C0  /* Interrupt Cause Read - R/clr */
+#define E1000_ITR      0x000C4  /* Interrupt Throttling Rate - RW */
+#define E1000_ICS      0x000C8  /* Interrupt Cause Set - WO */
+#define E1000_IMS      0x000D0  /* Interrupt Mask Set - RW */
+#define E1000_IMC      0x000D8  /* Interrupt Mask Clear - WO */
+#define E1000_IAM      0x000E0  /* Interrupt Acknowledge Auto Mask */
+#define E1000_RCTL     0x00100  /* RX Control - RW */
+#define E1000_FCTTV    0x00170  /* Flow Control Transmit Timer Value - RW */
+#define E1000_TXCW     0x00178  /* TX Configuration Word - RW */
+#define E1000_EICR     0x01580  /* Ext. Interrupt Cause Read - R/clr */
+#define E1000_EITR(_n) (0x01680 + (0x4 * (_n)))
+#define E1000_EICS     0x01520  /* Ext. Interrupt Cause Set - W0 */
+#define E1000_EIMS     0x01524  /* Ext. Interrupt Mask Set/Read - RW */
+#define E1000_EIMC     0x01528  /* Ext. Interrupt Mask Clear - WO */
+#define E1000_EIAC     0x0152C  /* Ext. Interrupt Auto Clear - RW */
+#define E1000_EIAM     0x01530  /* Ext. Interrupt Ack Auto Clear Mask - RW */
+#define E1000_GPIE     0x01514  /* General Purpose Interrupt Enable - RW */
+#define E1000_IVAR0    0x01700  /* Interrupt Vector Allocation (array) - RW */
+#define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */
+#define E1000_TCTL     0x00400  /* TX Control - RW */
+#define E1000_TCTL_EXT 0x00404  /* Extended TX Control - RW */
+#define E1000_TIPG     0x00410  /* TX Inter-packet gap -RW */
+#define E1000_AIT      0x00458  /* Adaptive Interframe Spacing Throttle - RW */
+#define E1000_LEDCTL   0x00E00  /* LED Control - RW */
+#define E1000_LEDMUX   0x08130  /* LED MUX Control */
+#define E1000_PBA      0x01000  /* Packet Buffer Allocation - RW */
+#define E1000_PBS      0x01008  /* Packet Buffer Size */
+#define E1000_EEMNGCTL 0x01010  /* MNG EEprom Control */
+#define E1000_EEMNGCTL_I210 0x12030  /* MNG EEprom Control */
+#define E1000_EEARBC_I210 0x12024  /* EEPROM Auto Read Bus Control */
+#define E1000_EEWR     0x0102C  /* EEPROM Write Register - RW */
+#define E1000_I2CCMD   0x01028  /* SFPI2C Command Register - RW */
+#define E1000_FRTIMER  0x01048  /* Free Running Timer - RW */
+#define E1000_TCPTIMER 0x0104C  /* TCP Timer - RW */
+#define E1000_FCRTL    0x02160  /* Flow Control Receive Threshold Low - RW */
+#define E1000_FCRTH    0x02168  /* Flow Control Receive Threshold High - RW */
+#define E1000_FCRTV    0x02460  /* Flow Control Refresh Timer Value - RW */
+#define E1000_I2CPARAMS        0x0102C /* SFPI2C Parameters Register - RW */
+#define E1000_I2CBB_EN      0x00000100  /* I2C - Bit Bang Enable */
+#define E1000_I2C_CLK_OUT   0x00000200  /* I2C- Clock */
+#define E1000_I2C_DATA_OUT  0x00000400  /* I2C- Data Out */
+#define E1000_I2C_DATA_OE_N 0x00000800  /* I2C- Data Output Enable */
+#define E1000_I2C_DATA_IN   0x00001000  /* I2C- Data In */
+#define E1000_I2C_CLK_OE_N  0x00002000  /* I2C- Clock Output Enable */
+#define E1000_I2C_CLK_IN    0x00004000  /* I2C- Clock In */
+#define E1000_MPHY_ADDR_CTRL	0x0024 /* GbE MPHY Address Control */
+#define E1000_MPHY_DATA		0x0E10 /* GBE MPHY Data */
+#define E1000_MPHY_STAT		0x0E0C /* GBE MPHY Statistics */
+
+/* IEEE 1588 TIMESYNCH */
+#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */
+#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */
+#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */
+#define E1000_RXSTMPL    0x0B624 /* Rx timestamp Low - RO */
+#define E1000_RXSTMPH    0x0B628 /* Rx timestamp High - RO */
+#define E1000_RXSATRL    0x0B62C /* Rx timestamp attribute low - RO */
+#define E1000_RXSATRH    0x0B630 /* Rx timestamp attribute high - RO */
+#define E1000_TXSTMPL    0x0B618 /* Tx timestamp value Low - RO */
+#define E1000_TXSTMPH    0x0B61C /* Tx timestamp value High - RO */
+#define E1000_SYSTIML    0x0B600 /* System time register Low - RO */
+#define E1000_SYSTIMH    0x0B604 /* System time register High - RO */
+#define E1000_TIMINCA    0x0B608 /* Increment attributes register - RW */
+#define E1000_TSAUXC     0x0B640 /* Timesync Auxiliary Control register */
+#define E1000_TRGTTIML0  0x0B644 /* Target Time Register 0 Low  - RW */
+#define E1000_TRGTTIMH0  0x0B648 /* Target Time Register 0 High - RW */
+#define E1000_TRGTTIML1  0x0B64C /* Target Time Register 1 Low  - RW */
+#define E1000_TRGTTIMH1  0x0B650 /* Target Time Register 1 High - RW */
+#define E1000_FREQOUT0   0x0B654 /* Frequency Out 0 Control Register - RW */
+#define E1000_FREQOUT1   0x0B658 /* Frequency Out 1 Control Register - RW */
+#define E1000_AUXSTMPL0  0x0B65C /* Auxiliary Time Stamp 0 Register Low  - RO */
+#define E1000_AUXSTMPH0  0x0B660 /* Auxiliary Time Stamp 0 Register High - RO */
+#define E1000_AUXSTMPL1  0x0B664 /* Auxiliary Time Stamp 1 Register Low  - RO */
+#define E1000_AUXSTMPH1  0x0B668 /* Auxiliary Time Stamp 1 Register High - RO */
+#define E1000_SYSTIMR    0x0B6F8 /* System time register Residue */
+#define E1000_TSICR      0x0B66C /* Interrupt Cause Register */
+#define E1000_TSIM       0x0B674 /* Interrupt Mask Register */
+
+/* Filtering Registers */
+#define E1000_SAQF(_n) (0x5980 + 4 * (_n))
+#define E1000_DAQF(_n) (0x59A0 + 4 * (_n))
+#define E1000_SPQF(_n) (0x59C0 + 4 * (_n))
+#define E1000_FTQF(_n) (0x59E0 + 4 * (_n))
+#define E1000_SAQF0 E1000_SAQF(0)
+#define E1000_DAQF0 E1000_DAQF(0)
+#define E1000_SPQF0 E1000_SPQF(0)
+#define E1000_FTQF0 E1000_FTQF(0)
+#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */
+#define E1000_ETQF(_n)  (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
+
+#define E1000_RQDPC(_n) (0x0C030 + ((_n) * 0x40))
+
+/* DMA Coalescing registers */
+#define E1000_DMACR	0x02508 /* Control Register */
+#define E1000_DMCTXTH	0x03550 /* Transmit Threshold */
+#define E1000_DMCTLX	0x02514 /* Time to Lx Request */
+#define E1000_DMCRTRH	0x05DD0 /* Receive Packet Rate Threshold */
+#define E1000_DMCCNT	0x05DD4 /* Current Rx Count */
+#define E1000_FCRTC	0x02170 /* Flow Control Rx high watermark */
+
+/* TX Rate Limit Registers */
+#define E1000_RTTDQSEL	0x3604 /* Tx Desc Plane Queue Select - WO */
+#define E1000_RTTBCNRM	0x3690 /* Tx BCN Rate-scheduler MMW */
+#define E1000_RTTBCNRC	0x36B0 /* Tx BCN Rate-Scheduler Config - WO */
+
+/* Split and Replication RX Control - RW */
+#define E1000_RXPBS	0x02404 /* Rx Packet Buffer Size - RW */
+
+/* Thermal sensor configuration and status registers */
+#define E1000_THMJT	0x08100 /* Junction Temperature */
+#define E1000_THLOWTC	0x08104 /* Low Threshold Control */
+#define E1000_THMIDTC	0x08108 /* Mid Threshold Control */
+#define E1000_THHIGHTC	0x0810C /* High Threshold Control */
+#define E1000_THSTAT	0x08110 /* Thermal Sensor Status */
+
+/* Convenience macros
+ *
+ * Note: "_n" is the queue number of the register to be written to.
+ *
+ * Example usage:
+ * E1000_RDBAL_REG(current_rx_queue)
+ */
+#define E1000_RDBAL(_n)   ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) \
+				    : (0x0C000 + ((_n) * 0x40)))
+#define E1000_RDBAH(_n)   ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) \
+				    : (0x0C004 + ((_n) * 0x40)))
+#define E1000_RDLEN(_n)   ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) \
+				    : (0x0C008 + ((_n) * 0x40)))
+#define E1000_SRRCTL(_n)  ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) \
+				    : (0x0C00C + ((_n) * 0x40)))
+#define E1000_RDH(_n)     ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) \
+				    : (0x0C010 + ((_n) * 0x40)))
+#define E1000_RDT(_n)     ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) \
+				    : (0x0C018 + ((_n) * 0x40)))
+#define E1000_RXDCTL(_n)  ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) \
+				    : (0x0C028 + ((_n) * 0x40)))
+#define E1000_TDBAL(_n)   ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) \
+				    : (0x0E000 + ((_n) * 0x40)))
+#define E1000_TDBAH(_n)   ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) \
+				    : (0x0E004 + ((_n) * 0x40)))
+#define E1000_TDLEN(_n)   ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) \
+				    : (0x0E008 + ((_n) * 0x40)))
+#define E1000_TDH(_n)     ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) \
+				    : (0x0E010 + ((_n) * 0x40)))
+#define E1000_TDT(_n)     ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) \
+				    : (0x0E018 + ((_n) * 0x40)))
+#define E1000_TXDCTL(_n)  ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) \
+				    : (0x0E028 + ((_n) * 0x40)))
+#define E1000_RXCTL(_n)	  ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \
+				      (0x0C014 + ((_n) * 0x40)))
+#define E1000_DCA_RXCTRL(_n)	E1000_RXCTL(_n)
+#define E1000_TXCTL(_n)   ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \
+				      (0x0E014 + ((_n) * 0x40)))
+#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n)
+#define E1000_TDWBAL(_n)  ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) \
+				    : (0x0E038 + ((_n) * 0x40)))
+#define E1000_TDWBAH(_n)  ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) \
+				    : (0x0E03C + ((_n) * 0x40)))
+
+#define E1000_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
+#define E1000_TXPBS	0x03404  /* Tx Packet Buffer Size - RW */
+
+#define E1000_TDFH     0x03410  /* TX Data FIFO Head - RW */
+#define E1000_TDFT     0x03418  /* TX Data FIFO Tail - RW */
+#define E1000_TDFHS    0x03420  /* TX Data FIFO Head Saved - RW */
+#define E1000_TDFPC    0x03430  /* TX Data FIFO Packet Count - RW */
+#define E1000_DTXCTL   0x03590  /* DMA TX Control - RW */
+#define E1000_CRCERRS  0x04000  /* CRC Error Count - R/clr */
+#define E1000_ALGNERRC 0x04004  /* Alignment Error Count - R/clr */
+#define E1000_SYMERRS  0x04008  /* Symbol Error Count - R/clr */
+#define E1000_RXERRC   0x0400C  /* Receive Error Count - R/clr */
+#define E1000_MPC      0x04010  /* Missed Packet Count - R/clr */
+#define E1000_SCC      0x04014  /* Single Collision Count - R/clr */
+#define E1000_ECOL     0x04018  /* Excessive Collision Count - R/clr */
+#define E1000_MCC      0x0401C  /* Multiple Collision Count - R/clr */
+#define E1000_LATECOL  0x04020  /* Late Collision Count - R/clr */
+#define E1000_COLC     0x04028  /* Collision Count - R/clr */
+#define E1000_DC       0x04030  /* Defer Count - R/clr */
+#define E1000_TNCRS    0x04034  /* TX-No CRS - R/clr */
+#define E1000_SEC      0x04038  /* Sequence Error Count - R/clr */
+#define E1000_CEXTERR  0x0403C  /* Carrier Extension Error Count - R/clr */
+#define E1000_RLEC     0x04040  /* Receive Length Error Count - R/clr */
+#define E1000_XONRXC   0x04048  /* XON RX Count - R/clr */
+#define E1000_XONTXC   0x0404C  /* XON TX Count - R/clr */
+#define E1000_XOFFRXC  0x04050  /* XOFF RX Count - R/clr */
+#define E1000_XOFFTXC  0x04054  /* XOFF TX Count - R/clr */
+#define E1000_FCRUC    0x04058  /* Flow Control RX Unsupported Count- R/clr */
+#define E1000_PRC64    0x0405C  /* Packets RX (64 bytes) - R/clr */
+#define E1000_PRC127   0x04060  /* Packets RX (65-127 bytes) - R/clr */
+#define E1000_PRC255   0x04064  /* Packets RX (128-255 bytes) - R/clr */
+#define E1000_PRC511   0x04068  /* Packets RX (255-511 bytes) - R/clr */
+#define E1000_PRC1023  0x0406C  /* Packets RX (512-1023 bytes) - R/clr */
+#define E1000_PRC1522  0x04070  /* Packets RX (1024-1522 bytes) - R/clr */
+#define E1000_GPRC     0x04074  /* Good Packets RX Count - R/clr */
+#define E1000_BPRC     0x04078  /* Broadcast Packets RX Count - R/clr */
+#define E1000_MPRC     0x0407C  /* Multicast Packets RX Count - R/clr */
+#define E1000_GPTC     0x04080  /* Good Packets TX Count - R/clr */
+#define E1000_GORCL    0x04088  /* Good Octets RX Count Low - R/clr */
+#define E1000_GORCH    0x0408C  /* Good Octets RX Count High - R/clr */
+#define E1000_GOTCL    0x04090  /* Good Octets TX Count Low - R/clr */
+#define E1000_GOTCH    0x04094  /* Good Octets TX Count High - R/clr */
+#define E1000_RNBC     0x040A0  /* RX No Buffers Count - R/clr */
+#define E1000_RUC      0x040A4  /* RX Undersize Count - R/clr */
+#define E1000_RFC      0x040A8  /* RX Fragment Count - R/clr */
+#define E1000_ROC      0x040AC  /* RX Oversize Count - R/clr */
+#define E1000_RJC      0x040B0  /* RX Jabber Count - R/clr */
+#define E1000_MGTPRC   0x040B4  /* Management Packets RX Count - R/clr */
+#define E1000_MGTPDC   0x040B8  /* Management Packets Dropped Count - R/clr */
+#define E1000_MGTPTC   0x040BC  /* Management Packets TX Count - R/clr */
+#define E1000_TORL     0x040C0  /* Total Octets RX Low - R/clr */
+#define E1000_TORH     0x040C4  /* Total Octets RX High - R/clr */
+#define E1000_TOTL     0x040C8  /* Total Octets TX Low - R/clr */
+#define E1000_TOTH     0x040CC  /* Total Octets TX High - R/clr */
+#define E1000_TPR      0x040D0  /* Total Packets RX - R/clr */
+#define E1000_TPT      0x040D4  /* Total Packets TX - R/clr */
+#define E1000_PTC64    0x040D8  /* Packets TX (64 bytes) - R/clr */
+#define E1000_PTC127   0x040DC  /* Packets TX (65-127 bytes) - R/clr */
+#define E1000_PTC255   0x040E0  /* Packets TX (128-255 bytes) - R/clr */
+#define E1000_PTC511   0x040E4  /* Packets TX (256-511 bytes) - R/clr */
+#define E1000_PTC1023  0x040E8  /* Packets TX (512-1023 bytes) - R/clr */
+#define E1000_PTC1522  0x040EC  /* Packets TX (1024-1522 Bytes) - R/clr */
+#define E1000_MPTC     0x040F0  /* Multicast Packets TX Count - R/clr */
+#define E1000_BPTC     0x040F4  /* Broadcast Packets TX Count - R/clr */
+#define E1000_TSCTC    0x040F8  /* TCP Segmentation Context TX - R/clr */
+#define E1000_TSCTFC   0x040FC  /* TCP Segmentation Context TX Fail - R/clr */
+#define E1000_IAC      0x04100  /* Interrupt Assertion Count */
+/* Interrupt Cause Rx Packet Timer Expire Count */
+#define E1000_ICRXPTC  0x04104
+/* Interrupt Cause Rx Absolute Timer Expire Count */
+#define E1000_ICRXATC  0x04108
+/* Interrupt Cause Tx Packet Timer Expire Count */
+#define E1000_ICTXPTC  0x0410C
+/* Interrupt Cause Tx Absolute Timer Expire Count */
+#define E1000_ICTXATC  0x04110
+/* Interrupt Cause Tx Queue Empty Count */
+#define E1000_ICTXQEC  0x04118
+/* Interrupt Cause Tx Queue Minimum Threshold Count */
+#define E1000_ICTXQMTC 0x0411C
+/* Interrupt Cause Rx Descriptor Minimum Threshold Count */
+#define E1000_ICRXDMTC 0x04120
+#define E1000_ICRXOC   0x04124  /* Interrupt Cause Receiver Overrun Count */
+#define E1000_PCS_CFG0    0x04200  /* PCS Configuration 0 - RW */
+#define E1000_PCS_LCTL    0x04208  /* PCS Link Control - RW */
+#define E1000_PCS_LSTAT   0x0420C  /* PCS Link Status - RO */
+#define E1000_CBTMPC      0x0402C  /* Circuit Breaker TX Packet Count */
+#define E1000_HTDPMC      0x0403C  /* Host Transmit Discarded Packets */
+#define E1000_CBRMPC      0x040FC  /* Circuit Breaker RX Packet Count */
+#define E1000_RPTHC       0x04104  /* Rx Packets To Host */
+#define E1000_HGPTC       0x04118  /* Host Good Packets TX Count */
+#define E1000_HTCBDPC     0x04124  /* Host TX Circuit Breaker Dropped Count */
+#define E1000_HGORCL      0x04128  /* Host Good Octets Received Count Low */
+#define E1000_HGORCH      0x0412C  /* Host Good Octets Received Count High */
+#define E1000_HGOTCL      0x04130  /* Host Good Octets Transmit Count Low */
+#define E1000_HGOTCH      0x04134  /* Host Good Octets Transmit Count High */
+#define E1000_LENERRS     0x04138  /* Length Errors Count */
+#define E1000_SCVPC       0x04228  /* SerDes/SGMII Code Violation Pkt Count */
+#define E1000_PCS_ANADV   0x04218  /* AN advertisement - RW */
+#define E1000_PCS_LPAB    0x0421C  /* Link Partner Ability - RW */
+#define E1000_PCS_NPTX    0x04220  /* AN Next Page Transmit - RW */
+#define E1000_PCS_LPABNP  0x04224  /* Link Partner Ability Next Page - RW */
+#define E1000_RXCSUM   0x05000  /* RX Checksum Control - RW */
+#define E1000_RLPML    0x05004  /* RX Long Packet Max Length */
+#define E1000_RFCTL    0x05008  /* Receive Filter Control*/
+#define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
+#define E1000_RA       0x05400  /* Receive Address - RW Array */
+#define E1000_RA2      0x054E0  /* 2nd half of Rx address array - RW Array */
+#define E1000_PSRTYPE(_i)       (0x05480 + ((_i) * 4))
+#define E1000_RAL(_i)  (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+					(0x054E0 + ((_i - 16) * 8)))
+#define E1000_RAH(_i)  (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+					(0x054E4 + ((_i - 16) * 8)))
+#define E1000_VLAPQF	0x055B0  /* VLAN Priority Queue Filter VLAPQF */
+#define E1000_IP4AT_REG(_i)     (0x05840 + ((_i) * 8))
+#define E1000_IP6AT_REG(_i)     (0x05880 + ((_i) * 4))
+#define E1000_WUPM_REG(_i)      (0x05A00 + ((_i) * 4))
+#define E1000_FFMT_REG(_i)      (0x09000 + ((_i) * 8))
+#define E1000_FFVT_REG(_i)      (0x09800 + ((_i) * 8))
+#define E1000_FFLT_REG(_i)      (0x05F00 + ((_i) * 8))
+#define E1000_VFTA     0x05600  /* VLAN Filter Table Array - RW Array */
+#define E1000_VT_CTL   0x0581C  /* VMDq Control - RW */
+#define E1000_WUC      0x05800  /* Wakeup Control - RW */
+#define E1000_WUFC     0x05808  /* Wakeup Filter Control - RW */
+#define E1000_WUS      0x05810  /* Wakeup Status - R/W1C */
+#define E1000_MANC     0x05820  /* Management Control - RW */
+#define E1000_IPAV     0x05838  /* IP Address Valid - RW */
+#define E1000_WUPL     0x05900  /* Wakeup Packet Length - RW */
+
+#define E1000_SW_FW_SYNC  0x05B5C /* Software-Firmware Synchronization - RW */
+#define E1000_CCMCTL      0x05B48 /* CCM Control Register */
+#define E1000_GIOCTL      0x05B44 /* GIO Analog Control Register */
+#define E1000_SCCTL       0x05B4C /* PCIc PLL Configuration Register */
+#define E1000_GCR         0x05B00 /* PCI-Ex Control */
+#define E1000_FACTPS    0x05B30 /* Function Active and Power State to MNG */
+#define E1000_SWSM      0x05B50 /* SW Semaphore */
+#define E1000_FWSM      0x05B54 /* FW Semaphore */
+#define E1000_DCA_CTRL  0x05B74 /* DCA Control - RW */
+
+/* RSS registers */
+#define E1000_MRQC      0x05818 /* Multiple Receive Control - RW */
+#define E1000_IMIR(_i)      (0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
+#define E1000_IMIREXT(_i)   (0x05AA0 + ((_i) * 4))  /* Immediate Interrupt Ext*/
+#define E1000_IMIRVP    0x05AC0 /* Immediate Interrupt RX VLAN Priority - RW */
+/* MSI-X Allocation Register (_i) - RW */
+#define E1000_MSIXBM(_i)    (0x01600 + ((_i) * 4))
+/* Redirection Table - RW Array */
+#define E1000_RETA(_i)  (0x05C00 + ((_i) * 4))
+#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW Array */
+
+/* VT Registers */
+#define E1000_MBVFICR   0x00C80 /* Mailbox VF Cause - RWC */
+#define E1000_MBVFIMR   0x00C84 /* Mailbox VF int Mask - RW */
+#define E1000_VFLRE     0x00C88 /* VF Register Events - RWC */
+#define E1000_VFRE      0x00C8C /* VF Receive Enables */
+#define E1000_VFTE      0x00C90 /* VF Transmit Enables */
+#define E1000_QDE       0x02408 /* Queue Drop Enable - RW */
+#define E1000_DTXSWC    0x03500 /* DMA Tx Switch Control - RW */
+#define E1000_WVBR      0x03554 /* VM Wrong Behavior - RWS */
+#define E1000_RPLOLR    0x05AF0 /* Replication Offload - RW */
+#define E1000_UTA       0x0A000 /* Unicast Table Array - RW */
+#define E1000_IOVTCL    0x05BBC /* IOV Control Register */
+#define E1000_TXSWC     0x05ACC /* Tx Switch Control */
+#define E1000_LVMMC	0x03548 /* Last VM Misbehavior cause */
+/* These act per VF so an array friendly macro is used */
+#define E1000_P2VMAILBOX(_n)   (0x00C00 + (4 * (_n)))
+#define E1000_VMBMEM(_n)       (0x00800 + (64 * (_n)))
+#define E1000_VMOLR(_n)        (0x05AD0 + (4 * (_n)))
+#define E1000_DVMOLR(_n)       (0x0C038 + (64 * (_n)))
+#define E1000_VLVF(_n)         (0x05D00 + (4 * (_n))) /* VLAN VM Filter */
+#define E1000_VMVIR(_n)        (0x03700 + (4 * (_n)))
+
+struct e1000_hw;
+
+u32 igb_rd32(struct e1000_hw *hw, u32 reg);
+
+/* write operations, indexed using DWORDS */
+#define wr32(reg, val) \
+do { \
+	u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+	if (!E1000_REMOVED(hw_addr)) \
+		writel((val), &hw_addr[(reg)]); \
+} while (0)
+
+#define rd32(reg) (igb_rd32(hw, reg))
+
+#define wrfl() ((void)rd32(E1000_STATUS))
+
+#define array_wr32(reg, offset, value) \
+	wr32((reg) + ((offset) << 2), (value))
+
+#define array_rd32(reg, offset) (igb_rd32(hw, reg + ((offset) << 2)))
+
+/* DMA Coalescing registers */
+#define E1000_PCIEMISC	0x05BB8 /* PCIE misc config register */
+
+/* Energy Efficient Ethernet "EEE" register */
+#define E1000_IPCNFG	0x0E38 /* Internal PHY Configuration */
+#define E1000_EEER	0x0E30 /* Energy Efficient Ethernet */
+#define E1000_EEE_SU	0X0E34 /* EEE Setup */
+#define E1000_EMIADD	0x10   /* Extended Memory Indirect Address */
+#define E1000_EMIDATA	0x11   /* Extended Memory Indirect Data */
+#define E1000_MMDAC	13     /* MMD Access Control */
+#define E1000_MMDAAD	14     /* MMD Access Address/Data */
+
+/* Thermal Sensor Register */
+#define E1000_THSTAT	0x08110 /* Thermal Sensor Status */
+
+/* OS2BMC Registers */
+#define E1000_B2OSPC	0x08FE0 /* BMC2OS packets sent by BMC */
+#define E1000_B2OGPRC	0x04158 /* BMC2OS packets received by host */
+#define E1000_O2BGPTC	0x08FE4 /* OS2BMC packets received by BMC */
+#define E1000_O2BSPC	0x0415C /* OS2BMC packets transmitted by host */
+
+#define E1000_SRWR		0x12018  /* Shadow Ram Write Register - RW */
+#define E1000_I210_FLMNGCTL	0x12038
+#define E1000_I210_FLMNGDATA	0x1203C
+#define E1000_I210_FLMNGCNT	0x12040
+
+#define E1000_I210_FLSWCTL	0x12048
+#define E1000_I210_FLSWDATA	0x1204C
+#define E1000_I210_FLSWCNT	0x12050
+
+#define E1000_I210_FLA		0x1201C
+
+#define E1000_I210_DTXMXPKTSZ	0x355C
+
+#define E1000_I210_TXDCTL(_n)	(0x0E028 + ((_n) * 0x40))
+
+#define E1000_I210_TQAVCTRL	0x3570
+#define E1000_I210_TQAVCC(_n)	(0x3004 + ((_n) * 0x40))
+#define E1000_I210_TQAVHC(_n)	(0x300C + ((_n) * 0x40))
+
+#define E1000_I210_RR2DCDELAY	0x5BF4
+
+#define E1000_INVM_DATA_REG(_n)	(0x12120 + 4*(_n))
+#define E1000_INVM_SIZE		64 /* Number of INVM Data Registers */
+
+#define E1000_REMOVED(h) unlikely(!(h))
+
+#endif
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
new file mode 100644
index 0000000000..a2b759531c
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -0,0 +1,810 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _IGB_H_
+#define _IGB_H_
+
+#include "e1000_mac.h"
+#include "e1000_82575.h"
+
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/bitops.h>
+#include <linux/if_vlan.h>
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+#include <linux/pci.h>
+#include <linux/mdio.h>
+
+#include <net/xdp.h>
+
+struct igb_adapter;
+
+#define E1000_PCS_CFG_IGN_SD	1
+
+/* Interrupt defines */
+#define IGB_START_ITR		648 /* ~6000 ints/sec */
+#define IGB_4K_ITR		980
+#define IGB_20K_ITR		196
+#define IGB_70K_ITR		56
+
+/* TX/RX descriptor defines */
+#define IGB_DEFAULT_TXD		256
+#define IGB_DEFAULT_TX_WORK	128
+#define IGB_MIN_TXD		64
+#define IGB_MAX_TXD		4096
+
+#define IGB_DEFAULT_RXD		256
+#define IGB_MIN_RXD		64
+#define IGB_MAX_RXD		4096
+
+#define IGB_DEFAULT_ITR		3 /* dynamic */
+#define IGB_MAX_ITR_USECS	10000
+#define IGB_MIN_ITR_USECS	10
+#define NON_Q_VECTORS		1
+#define MAX_Q_VECTORS		8
+#define MAX_MSIX_ENTRIES	10
+
+/* Transmit and receive queues */
+#define IGB_MAX_RX_QUEUES	8
+#define IGB_MAX_RX_QUEUES_82575	4
+#define IGB_MAX_RX_QUEUES_I211	2
+#define IGB_MAX_TX_QUEUES	8
+#define IGB_MAX_VF_MC_ENTRIES	30
+#define IGB_MAX_VF_FUNCTIONS	8
+#define IGB_MAX_VFTA_ENTRIES	128
+#define IGB_82576_VF_DEV_ID	0x10CA
+#define IGB_I350_VF_DEV_ID	0x1520
+
+/* NVM version defines */
+#define IGB_MAJOR_MASK		0xF000
+#define IGB_MINOR_MASK		0x0FF0
+#define IGB_BUILD_MASK		0x000F
+#define IGB_COMB_VER_MASK	0x00FF
+#define IGB_MAJOR_SHIFT		12
+#define IGB_MINOR_SHIFT		4
+#define IGB_COMB_VER_SHFT	8
+#define IGB_NVM_VER_INVALID	0xFFFF
+#define IGB_ETRACK_SHIFT	16
+#define NVM_ETRACK_WORD		0x0042
+#define NVM_COMB_VER_OFF	0x0083
+#define NVM_COMB_VER_PTR	0x003d
+
+/* Transmit and receive latency (for PTP timestamps) */
+#define IGB_I210_TX_LATENCY_10		9542
+#define IGB_I210_TX_LATENCY_100		1024
+#define IGB_I210_TX_LATENCY_1000	178
+#define IGB_I210_RX_LATENCY_10		20662
+#define IGB_I210_RX_LATENCY_100		2213
+#define IGB_I210_RX_LATENCY_1000	448
+
+/* XDP */
+#define IGB_XDP_PASS		0
+#define IGB_XDP_CONSUMED	BIT(0)
+#define IGB_XDP_TX		BIT(1)
+#define IGB_XDP_REDIR		BIT(2)
+
+struct vf_data_storage {
+	unsigned char vf_mac_addresses[ETH_ALEN];
+	u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES];
+	u16 num_vf_mc_hashes;
+	u32 flags;
+	unsigned long last_nack;
+	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+	u16 pf_qos;
+	u16 tx_rate;
+	bool spoofchk_enabled;
+	bool trusted;
+};
+
+/* Number of unicast MAC filters reserved for the PF in the RAR registers */
+#define IGB_PF_MAC_FILTERS_RESERVED	3
+
+struct vf_mac_filter {
+	struct list_head l;
+	int vf;
+	bool free;
+	u8 vf_mac[ETH_ALEN];
+};
+
+#define IGB_VF_FLAG_CTS            0x00000001 /* VF is clear to send data */
+#define IGB_VF_FLAG_UNI_PROMISC    0x00000002 /* VF has unicast promisc */
+#define IGB_VF_FLAG_MULTI_PROMISC  0x00000004 /* VF has multicast promisc */
+#define IGB_VF_FLAG_PF_SET_MAC     0x00000008 /* PF has set MAC address */
+
+/* RX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ *           descriptors available in its onboard memory.
+ *           Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ *           available in host memory.
+ *           If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ *           descriptors until either it has this many to write back, or the
+ *           ITR timer expires.
+ */
+#define IGB_RX_PTHRESH	((hw->mac.type == e1000_i354) ? 12 : 8)
+#define IGB_RX_HTHRESH	8
+#define IGB_TX_PTHRESH	((hw->mac.type == e1000_i354) ? 20 : 8)
+#define IGB_TX_HTHRESH	1
+#define IGB_RX_WTHRESH	((hw->mac.type == e1000_82576 && \
+			  (adapter->flags & IGB_FLAG_HAS_MSIX)) ? 1 : 4)
+#define IGB_TX_WTHRESH	((hw->mac.type == e1000_82576 && \
+			  (adapter->flags & IGB_FLAG_HAS_MSIX)) ? 1 : 16)
+
+/* this is the size past which hardware will drop packets when setting LPE=0 */
+#define MAXIMUM_ETHERNET_VLAN_SIZE 1522
+
+#define IGB_ETH_PKT_HDR_PAD	(ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+/* Supported Rx Buffer Sizes */
+#define IGB_RXBUFFER_256	256
+#define IGB_RXBUFFER_1536	1536
+#define IGB_RXBUFFER_2048	2048
+#define IGB_RXBUFFER_3072	3072
+#define IGB_RX_HDR_LEN		IGB_RXBUFFER_256
+#define IGB_TS_HDR_LEN		16
+
+/* Attempt to maximize the headroom available for incoming frames.  We
+ * use a 2K buffer for receives and need 1536/1534 to store the data for
+ * the frame.  This leaves us with 512 bytes of room.  From that we need
+ * to deduct the space needed for the shared info and the padding needed
+ * to IP align the frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the 3K
+ *	 buffers.
+ */
+#if (PAGE_SIZE < 8192)
+#define IGB_MAX_FRAME_BUILD_SKB (IGB_RXBUFFER_1536 - NET_IP_ALIGN)
+#define IGB_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + IGB_TS_HDR_LEN + IGB_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IGB_RXBUFFER_2048))
+
+static inline int igb_compute_pad(int rx_buf_len)
+{
+	int page_size, pad_size;
+
+	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
+
+	return pad_size;
+}
+
+static inline int igb_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (IGB_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = IGB_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = IGB_RXBUFFER_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return igb_compute_pad(rx_buf_len);
+}
+
+#define IGB_SKB_PAD	igb_skb_pad()
+#else
+#define IGB_SKB_PAD	(NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGB_RX_BUFFER_WRITE	16 /* Must be power of 2 */
+
+#define IGB_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define AUTO_ALL_MODES		0
+#define IGB_EEPROM_APME		0x0400
+
+#ifndef IGB_MASTER_SLAVE
+/* Switch to override PHY master/slave setting */
+#define IGB_MASTER_SLAVE	e1000_ms_hw_default
+#endif
+
+#define IGB_MNG_VLAN_NONE	-1
+
+enum igb_tx_flags {
+	/* cmd_type flags */
+	IGB_TX_FLAGS_VLAN	= 0x01,
+	IGB_TX_FLAGS_TSO	= 0x02,
+	IGB_TX_FLAGS_TSTAMP	= 0x04,
+
+	/* olinfo flags */
+	IGB_TX_FLAGS_IPV4	= 0x10,
+	IGB_TX_FLAGS_CSUM	= 0x20,
+};
+
+/* VLAN info */
+#define IGB_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IGB_TX_FLAGS_VLAN_SHIFT	16
+
+/* The largest size we can write to the descriptor is 65535.  In order to
+ * maintain a power of two alignment we have to limit ourselves to 32K.
+ */
+#define IGB_MAX_TXD_PWR	15
+#define IGB_MAX_DATA_PER_TXD	(1u << IGB_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD)
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+
+/* EEPROM byte offsets */
+#define IGB_SFF_8472_SWAP		0x5C
+#define IGB_SFF_8472_COMP		0x5E
+
+/* Bitmasks */
+#define IGB_SFF_ADDRESSING_MODE		0x4
+#define IGB_SFF_8472_UNSUP		0x00
+
+/* TX resources are shared between XDP and netstack
+ * and we need to tag the buffer type to distinguish them
+ */
+enum igb_tx_buf_type {
+	IGB_TYPE_SKB = 0,
+	IGB_TYPE_XDP,
+};
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct igb_tx_buffer {
+	union e1000_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	enum igb_tx_buf_type type;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
+	unsigned int bytecount;
+	u16 gso_segs;
+	__be16 protocol;
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct igb_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 page_offset;
+#else
+	__u16 page_offset;
+#endif
+	__u16 pagecnt_bias;
+};
+
+struct igb_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 restart_queue;
+	u64 restart_queue2;
+};
+
+struct igb_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 drops;
+	u64 csum_err;
+	u64 alloc_failed;
+};
+
+struct igb_ring_container {
+	struct igb_ring *ring;		/* pointer to linked list of rings */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 work_limit;			/* total work allowed per interrupt */
+	u8 count;			/* total number of rings in vector */
+	u8 itr;				/* current ITR setting for ring */
+};
+
+struct igb_ring {
+	struct igb_q_vector *q_vector;	/* backlink to q_vector */
+	struct net_device *netdev;	/* back pointer to net_device */
+	struct bpf_prog *xdp_prog;
+	struct device *dev;		/* device pointer for dma mapping */
+	union {				/* array of buffer info structs */
+		struct igb_tx_buffer *tx_buffer_info;
+		struct igb_rx_buffer *rx_buffer_info;
+	};
+	void *desc;			/* descriptor ring memory */
+	unsigned long flags;		/* ring specific flags */
+	void __iomem *tail;		/* pointer to ring tail register */
+	dma_addr_t dma;			/* phys address of the ring */
+	unsigned int  size;		/* length of desc. ring in bytes */
+
+	u16 count;			/* number of desc. in the ring */
+	u8 queue_index;			/* logical index of the ring*/
+	u8 reg_idx;			/* physical index of the ring */
+	bool launchtime_enable;		/* true if LaunchTime is enabled */
+	bool cbs_enable;		/* indicates if CBS is enabled */
+	s32 idleslope;			/* idleSlope in kbps */
+	s32 sendslope;			/* sendSlope in kbps */
+	s32 hicredit;			/* hiCredit in bytes */
+	s32 locredit;			/* loCredit in bytes */
+
+	/* everything past this point are written often */
+	u16 next_to_clean;
+	u16 next_to_use;
+	u16 next_to_alloc;
+
+	union {
+		/* TX */
+		struct {
+			struct igb_tx_queue_stats tx_stats;
+			struct u64_stats_sync tx_syncp;
+			struct u64_stats_sync tx_syncp2;
+		};
+		/* RX */
+		struct {
+			struct sk_buff *skb;
+			struct igb_rx_queue_stats rx_stats;
+			struct u64_stats_sync rx_syncp;
+		};
+	};
+	struct xdp_rxq_info xdp_rxq;
+} ____cacheline_internodealigned_in_smp;
+
+struct igb_q_vector {
+	struct igb_adapter *adapter;	/* backlink */
+	int cpu;			/* CPU for DCA */
+	u32 eims_value;			/* EIMS mask value */
+
+	u16 itr_val;
+	u8 set_itr;
+	void __iomem *itr_register;
+
+	struct igb_ring_container rx, tx;
+
+	struct napi_struct napi;
+	struct rcu_head rcu;	/* to avoid race with update stats on free */
+	char name[IFNAMSIZ + 9];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct igb_ring ring[] ____cacheline_internodealigned_in_smp;
+};
+
+enum e1000_ring_flags_t {
+	IGB_RING_FLAG_RX_3K_BUFFER,
+	IGB_RING_FLAG_RX_BUILD_SKB_ENABLED,
+	IGB_RING_FLAG_RX_SCTP_CSUM,
+	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
+	IGB_RING_FLAG_TX_CTX_IDX,
+	IGB_RING_FLAG_TX_DETECT_HANG
+};
+
+#define ring_uses_large_buffer(ring) \
+	test_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define set_ring_uses_large_buffer(ring) \
+	set_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define clear_ring_uses_large_buffer(ring) \
+	clear_bit(IGB_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+
+#define ring_uses_build_skb(ring) \
+	test_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define set_ring_build_skb_enabled(ring) \
+	set_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+#define clear_ring_build_skb_enabled(ring) \
+	clear_bit(IGB_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
+static inline unsigned int igb_rx_bufsz(struct igb_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IGB_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IGB_MAX_FRAME_BUILD_SKB;
+#endif
+	return IGB_RXBUFFER_2048;
+}
+
+static inline unsigned int igb_rx_pg_order(struct igb_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+#define igb_rx_pg_size(_ring) (PAGE_SIZE << igb_rx_pg_order(_ring))
+
+#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS)
+
+#define IGB_RX_DESC(R, i)	\
+	(&(((union e1000_adv_rx_desc *)((R)->desc))[i]))
+#define IGB_TX_DESC(R, i)	\
+	(&(((union e1000_adv_tx_desc *)((R)->desc))[i]))
+#define IGB_TX_CTXTDESC(R, i)	\
+	(&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i]))
+
+/* igb_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc,
+				      const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+/* igb_desc_unused - calculate if we have unused descriptors */
+static inline int igb_desc_unused(struct igb_ring *ring)
+{
+	if (ring->next_to_clean > ring->next_to_use)
+		return ring->next_to_clean - ring->next_to_use - 1;
+
+	return ring->count + ring->next_to_clean - ring->next_to_use - 1;
+}
+
+#ifdef CONFIG_IGB_HWMON
+
+#define IGB_HWMON_TYPE_LOC	0
+#define IGB_HWMON_TYPE_TEMP	1
+#define IGB_HWMON_TYPE_CAUTION	2
+#define IGB_HWMON_TYPE_MAX	3
+
+struct hwmon_attr {
+	struct device_attribute dev_attr;
+	struct e1000_hw *hw;
+	struct e1000_thermal_diode_data *sensor;
+	char name[12];
+	};
+
+struct hwmon_buff {
+	struct attribute_group group;
+	const struct attribute_group *groups[2];
+	struct attribute *attrs[E1000_MAX_SENSORS * 4 + 1];
+	struct hwmon_attr hwmon_list[E1000_MAX_SENSORS * 4];
+	unsigned int n_hwmon;
+	};
+#endif
+
+/* The number of L2 ether-type filter registers, Index 3 is reserved
+ * for PTP 1588 timestamp
+ */
+#define MAX_ETYPE_FILTER	(4 - 1)
+/* ETQF filter list: one static filter per filter consumer. This is
+ * to avoid filter collisions later. Add new filters here!!
+ *
+ * Current filters:		Filter 3
+ */
+#define IGB_ETQF_FILTER_1588	3
+
+#define IGB_N_EXTTS	2
+#define IGB_N_PEROUT	2
+#define IGB_N_SDP	4
+#define IGB_RETA_SIZE	128
+
+enum igb_filter_match_flags {
+	IGB_FILTER_FLAG_ETHER_TYPE = 0x1,
+	IGB_FILTER_FLAG_VLAN_TCI   = 0x2,
+	IGB_FILTER_FLAG_SRC_MAC_ADDR   = 0x4,
+	IGB_FILTER_FLAG_DST_MAC_ADDR   = 0x8,
+};
+
+#define IGB_MAX_RXNFC_FILTERS 16
+
+/* RX network flow classification data structure */
+struct igb_nfc_input {
+	/* Byte layout in order, all values with MSB first:
+	 * match_flags - 1 byte
+	 * etype - 2 bytes
+	 * vlan_tci - 2 bytes
+	 */
+	u8 match_flags;
+	__be16 etype;
+	__be16 vlan_tci;
+	u8 src_addr[ETH_ALEN];
+	u8 dst_addr[ETH_ALEN];
+};
+
+struct igb_nfc_filter {
+	struct hlist_node nfc_node;
+	struct igb_nfc_input filter;
+	unsigned long cookie;
+	u16 etype_reg_index;
+	u16 sw_idx;
+	u16 action;
+};
+
+struct igb_mac_addr {
+	u8 addr[ETH_ALEN];
+	u8 queue;
+	u8 state; /* bitmask */
+};
+
+#define IGB_MAC_STATE_DEFAULT	0x1
+#define IGB_MAC_STATE_IN_USE	0x2
+#define IGB_MAC_STATE_SRC_ADDR	0x4
+#define IGB_MAC_STATE_QUEUE_STEERING 0x8
+
+/* board specific private data structure */
+struct igb_adapter {
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+
+	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
+
+	unsigned long state;
+	unsigned int flags;
+
+	unsigned int num_q_vectors;
+	struct msix_entry msix_entries[MAX_MSIX_ENTRIES];
+
+	/* Interrupt Throttle Rate */
+	u32 rx_itr_setting;
+	u32 tx_itr_setting;
+	u16 tx_itr;
+	u16 rx_itr;
+
+	/* TX */
+	u16 tx_work_limit;
+	u32 tx_timeout_count;
+	int num_tx_queues;
+	struct igb_ring *tx_ring[16];
+
+	/* RX */
+	int num_rx_queues;
+	struct igb_ring *rx_ring[16];
+
+	u32 max_frame_size;
+	u32 min_frame_size;
+
+	struct timer_list watchdog_timer;
+	struct timer_list phy_info_timer;
+
+	u16 mng_vlan_id;
+	u32 bd_number;
+	u32 wol;
+	u32 en_mng_pt;
+	u16 link_speed;
+	u16 link_duplex;
+
+	u8 __iomem *io_addr; /* Mainly for iounmap use */
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+	bool fc_autoneg;
+	u8  tx_timeout_factor;
+	struct timer_list blink_timer;
+	unsigned long led_status;
+
+	/* OS defined structs */
+	struct pci_dev *pdev;
+
+	spinlock_t stats64_lock;
+	struct rtnl_link_stats64 stats64;
+
+	/* structs defined in e1000_hw.h */
+	struct e1000_hw hw;
+	struct e1000_hw_stats stats;
+	struct e1000_phy_info phy_info;
+
+	u32 test_icr;
+	struct igb_ring test_tx_ring;
+	struct igb_ring test_rx_ring;
+
+	int msg_enable;
+
+	struct igb_q_vector *q_vector[MAX_Q_VECTORS];
+	u32 eims_enable_mask;
+	u32 eims_other;
+
+	/* to not mess up cache alignment, always add to the bottom */
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+	unsigned int vfs_allocated_count;
+	struct vf_data_storage *vf_data;
+	int vf_rate_link_speed;
+	u32 rss_queues;
+	u32 wvbr;
+	u32 *shadow_vfta;
+
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	struct delayed_work ptp_overflow_work;
+	struct work_struct ptp_tx_work;
+	struct sk_buff *ptp_tx_skb;
+	struct hwtstamp_config tstamp_config;
+	unsigned long ptp_tx_start;
+	unsigned long last_rx_ptp_check;
+	unsigned long last_rx_timestamp;
+	unsigned int ptp_flags;
+	spinlock_t tmreg_lock;
+	struct cyclecounter cc;
+	struct timecounter tc;
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_skipped;
+	u32 rx_hwtstamp_cleared;
+	bool pps_sys_wrap_on;
+
+	struct ptp_pin_desc sdp_config[IGB_N_SDP];
+	struct {
+		struct timespec64 start;
+		struct timespec64 period;
+	} perout[IGB_N_PEROUT];
+
+	char fw_version[32];
+#ifdef CONFIG_IGB_HWMON
+	struct hwmon_buff *igb_hwmon_buff;
+	bool ets;
+#endif
+	struct i2c_algo_bit_data i2c_algo;
+	struct i2c_adapter i2c_adap;
+	struct i2c_client *i2c_client;
+	u32 rss_indir_tbl_init;
+	u8 rss_indir_tbl[IGB_RETA_SIZE];
+
+	unsigned long link_check_timeout;
+	int copper_tries;
+	struct e1000_info ei;
+	u16 eee_advert;
+
+	/* RX network flow classification support */
+	struct hlist_head nfc_filter_list;
+	struct hlist_head cls_flower_list;
+	unsigned int nfc_filter_count;
+	/* lock for RX network flow classification filter */
+	spinlock_t nfc_lock;
+	bool etype_bitmap[MAX_ETYPE_FILTER];
+
+	struct igb_mac_addr *mac_table;
+	struct vf_mac_filter vf_macs;
+	struct vf_mac_filter *vf_mac_list;
+	/* lock for VF resources */
+	spinlock_t vfs_lock;
+};
+
+/* flags controlling PTP/1588 function */
+#define IGB_PTP_ENABLED		BIT(0)
+#define IGB_PTP_OVERFLOW_CHECK	BIT(1)
+
+#define IGB_FLAG_HAS_MSI		BIT(0)
+#define IGB_FLAG_DCA_ENABLED		BIT(1)
+#define IGB_FLAG_QUAD_PORT_A		BIT(2)
+#define IGB_FLAG_QUEUE_PAIRS		BIT(3)
+#define IGB_FLAG_DMAC			BIT(4)
+#define IGB_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
+#define IGB_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
+#define IGB_FLAG_WOL_SUPPORTED		BIT(8)
+#define IGB_FLAG_NEED_LINK_UPDATE	BIT(9)
+#define IGB_FLAG_MEDIA_RESET		BIT(10)
+#define IGB_FLAG_MAS_CAPABLE		BIT(11)
+#define IGB_FLAG_MAS_ENABLE		BIT(12)
+#define IGB_FLAG_HAS_MSIX		BIT(13)
+#define IGB_FLAG_EEE			BIT(14)
+#define IGB_FLAG_VLAN_PROMISC		BIT(15)
+#define IGB_FLAG_RX_LEGACY		BIT(16)
+#define IGB_FLAG_FQTSS			BIT(17)
+
+/* Media Auto Sense */
+#define IGB_MAS_ENABLE_0		0X0001
+#define IGB_MAS_ENABLE_1		0X0002
+#define IGB_MAS_ENABLE_2		0X0004
+#define IGB_MAS_ENABLE_3		0X0008
+
+/* DMA Coalescing defines */
+#define IGB_MIN_TXPBSIZE	20408
+#define IGB_TX_BUF_4096		4096
+#define IGB_DMCTLX_DCFLUSH_DIS	0x80000000  /* Disable DMA Coal Flush */
+
+#define IGB_82576_TSYNC_SHIFT	19
+enum e1000_state_t {
+	__IGB_TESTING,
+	__IGB_RESETTING,
+	__IGB_DOWN,
+	__IGB_PTP_TX_IN_PROGRESS,
+};
+
+enum igb_boards {
+	board_82575,
+};
+
+extern char igb_driver_name[];
+
+int igb_xmit_xdp_ring(struct igb_adapter *adapter,
+		      struct igb_ring *ring,
+		      struct xdp_frame *xdpf);
+int igb_open(struct net_device *netdev);
+int igb_close(struct net_device *netdev);
+int igb_up(struct igb_adapter *);
+void igb_down(struct igb_adapter *);
+void igb_reinit_locked(struct igb_adapter *);
+void igb_reset(struct igb_adapter *);
+int igb_reinit_queues(struct igb_adapter *);
+void igb_write_rss_indir_tbl(struct igb_adapter *);
+int igb_set_spd_dplx(struct igb_adapter *, u32, u8);
+int igb_setup_tx_resources(struct igb_ring *);
+int igb_setup_rx_resources(struct igb_ring *);
+void igb_free_tx_resources(struct igb_ring *);
+void igb_free_rx_resources(struct igb_ring *);
+void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
+void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
+void igb_setup_tctl(struct igb_adapter *);
+void igb_setup_rctl(struct igb_adapter *);
+void igb_setup_srrctl(struct igb_adapter *, struct igb_ring *);
+netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
+void igb_alloc_rx_buffers(struct igb_ring *, u16);
+void igb_update_stats(struct igb_adapter *);
+bool igb_has_link(struct igb_adapter *adapter);
+void igb_set_ethtool_ops(struct net_device *);
+void igb_power_up_link(struct igb_adapter *);
+void igb_set_fw_version(struct igb_adapter *);
+void igb_ptp_init(struct igb_adapter *adapter);
+void igb_ptp_stop(struct igb_adapter *adapter);
+void igb_ptp_reset(struct igb_adapter *adapter);
+void igb_ptp_suspend(struct igb_adapter *adapter);
+void igb_ptp_rx_hang(struct igb_adapter *adapter);
+void igb_ptp_tx_hang(struct igb_adapter *adapter);
+void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
+int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+			ktime_t *timestamp);
+int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
+int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
+void igb_set_flag_queue_pairs(struct igb_adapter *, const u32);
+unsigned int igb_get_max_rss_queues(struct igb_adapter *);
+#ifdef CONFIG_IGB_HWMON
+void igb_sysfs_exit(struct igb_adapter *adapter);
+int igb_sysfs_init(struct igb_adapter *adapter);
+#endif
+static inline s32 igb_reset_phy(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.reset)
+		return hw->phy.ops.reset(hw);
+
+	return 0;
+}
+
+static inline s32 igb_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data)
+{
+	if (hw->phy.ops.read_reg)
+		return hw->phy.ops.read_reg(hw, offset, data);
+
+	return 0;
+}
+
+static inline s32 igb_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data)
+{
+	if (hw->phy.ops.write_reg)
+		return hw->phy.ops.write_reg(hw, offset, data);
+
+	return 0;
+}
+
+static inline s32 igb_get_phy_info(struct e1000_hw *hw)
+{
+	if (hw->phy.ops.get_phy_info)
+		return hw->phy.ops.get_phy_info(hw);
+
+	return 0;
+}
+
+static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
+{
+	return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+
+int igb_add_filter(struct igb_adapter *adapter,
+		   struct igb_nfc_filter *input);
+int igb_erase_filter(struct igb_adapter *adapter,
+		     struct igb_nfc_filter *input);
+
+int igb_add_mac_steering_filter(struct igb_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags);
+int igb_del_mac_steering_filter(struct igb_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags);
+
+#endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
new file mode 100644
index 0000000000..4ee849985e
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -0,0 +1,3511 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+/* ethtool support for igb */
+
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/ethtool.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/highmem.h>
+#include <linux/mdio.h>
+
+#include "igb.h"
+
+struct igb_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define IGB_STAT(_name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(struct igb_adapter, _stat), \
+	.stat_offset = offsetof(struct igb_adapter, _stat) \
+}
+static const struct igb_stats igb_gstrings_stats[] = {
+	IGB_STAT("rx_packets", stats.gprc),
+	IGB_STAT("tx_packets", stats.gptc),
+	IGB_STAT("rx_bytes", stats.gorc),
+	IGB_STAT("tx_bytes", stats.gotc),
+	IGB_STAT("rx_broadcast", stats.bprc),
+	IGB_STAT("tx_broadcast", stats.bptc),
+	IGB_STAT("rx_multicast", stats.mprc),
+	IGB_STAT("tx_multicast", stats.mptc),
+	IGB_STAT("multicast", stats.mprc),
+	IGB_STAT("collisions", stats.colc),
+	IGB_STAT("rx_crc_errors", stats.crcerrs),
+	IGB_STAT("rx_no_buffer_count", stats.rnbc),
+	IGB_STAT("rx_missed_errors", stats.mpc),
+	IGB_STAT("tx_aborted_errors", stats.ecol),
+	IGB_STAT("tx_carrier_errors", stats.tncrs),
+	IGB_STAT("tx_window_errors", stats.latecol),
+	IGB_STAT("tx_abort_late_coll", stats.latecol),
+	IGB_STAT("tx_deferred_ok", stats.dc),
+	IGB_STAT("tx_single_coll_ok", stats.scc),
+	IGB_STAT("tx_multi_coll_ok", stats.mcc),
+	IGB_STAT("tx_timeout_count", tx_timeout_count),
+	IGB_STAT("rx_long_length_errors", stats.roc),
+	IGB_STAT("rx_short_length_errors", stats.ruc),
+	IGB_STAT("rx_align_errors", stats.algnerrc),
+	IGB_STAT("tx_tcp_seg_good", stats.tsctc),
+	IGB_STAT("tx_tcp_seg_failed", stats.tsctfc),
+	IGB_STAT("rx_flow_control_xon", stats.xonrxc),
+	IGB_STAT("rx_flow_control_xoff", stats.xoffrxc),
+	IGB_STAT("tx_flow_control_xon", stats.xontxc),
+	IGB_STAT("tx_flow_control_xoff", stats.xofftxc),
+	IGB_STAT("rx_long_byte_count", stats.gorc),
+	IGB_STAT("tx_dma_out_of_sync", stats.doosync),
+	IGB_STAT("tx_smbus", stats.mgptc),
+	IGB_STAT("rx_smbus", stats.mgprc),
+	IGB_STAT("dropped_smbus", stats.mgpdc),
+	IGB_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+	IGB_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+	IGB_STAT("os2bmc_tx_by_host", stats.o2bspc),
+	IGB_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+	IGB_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+	IGB_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped),
+	IGB_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+};
+
+#define IGB_NETDEV_STAT(_net_stat) { \
+	.stat_string = __stringify(_net_stat), \
+	.sizeof_stat = sizeof_field(struct rtnl_link_stats64, _net_stat), \
+	.stat_offset = offsetof(struct rtnl_link_stats64, _net_stat) \
+}
+static const struct igb_stats igb_gstrings_net_stats[] = {
+	IGB_NETDEV_STAT(rx_errors),
+	IGB_NETDEV_STAT(tx_errors),
+	IGB_NETDEV_STAT(tx_dropped),
+	IGB_NETDEV_STAT(rx_length_errors),
+	IGB_NETDEV_STAT(rx_over_errors),
+	IGB_NETDEV_STAT(rx_frame_errors),
+	IGB_NETDEV_STAT(rx_fifo_errors),
+	IGB_NETDEV_STAT(tx_fifo_errors),
+	IGB_NETDEV_STAT(tx_heartbeat_errors)
+};
+
+#define IGB_GLOBAL_STATS_LEN	\
+	(sizeof(igb_gstrings_stats) / sizeof(struct igb_stats))
+#define IGB_NETDEV_STATS_LEN	\
+	(sizeof(igb_gstrings_net_stats) / sizeof(struct igb_stats))
+#define IGB_RX_QUEUE_STATS_LEN \
+	(sizeof(struct igb_rx_queue_stats) / sizeof(u64))
+
+#define IGB_TX_QUEUE_STATS_LEN 3 /* packets, bytes, restart_queue */
+
+#define IGB_QUEUE_STATS_LEN \
+	((((struct igb_adapter *)netdev_priv(netdev))->num_rx_queues * \
+	  IGB_RX_QUEUE_STATS_LEN) + \
+	 (((struct igb_adapter *)netdev_priv(netdev))->num_tx_queues * \
+	  IGB_TX_QUEUE_STATS_LEN))
+#define IGB_STATS_LEN \
+	(IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN)
+
+enum igb_diagnostics_results {
+	TEST_REG = 0,
+	TEST_EEP,
+	TEST_IRQ,
+	TEST_LOOP,
+	TEST_LINK
+};
+
+static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
+	[TEST_REG]  = "Register test  (offline)",
+	[TEST_EEP]  = "Eeprom test    (offline)",
+	[TEST_IRQ]  = "Interrupt test (offline)",
+	[TEST_LOOP] = "Loopback test  (offline)",
+	[TEST_LINK] = "Link test   (on/offline)"
+};
+#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
+
+static const char igb_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IGB_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IGB_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igb_priv_flags_strings)
+
+static int igb_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575;
+	struct e1000_sfp_flags *eth_flags = &dev_spec->eth_flags;
+	u32 status;
+	u32 speed;
+	u32 supported, advertising;
+
+	status = pm_runtime_suspended(&adapter->pdev->dev) ?
+		 0 : rd32(E1000_STATUS);
+	if (hw->phy.media_type == e1000_media_type_copper) {
+
+		supported = (SUPPORTED_10baseT_Half |
+			     SUPPORTED_10baseT_Full |
+			     SUPPORTED_100baseT_Half |
+			     SUPPORTED_100baseT_Full |
+			     SUPPORTED_1000baseT_Full|
+			     SUPPORTED_Autoneg |
+			     SUPPORTED_TP |
+			     SUPPORTED_Pause);
+		advertising = ADVERTISED_TP;
+
+		if (hw->mac.autoneg == 1) {
+			advertising |= ADVERTISED_Autoneg;
+			/* the e1000 autoneg seems to match ethtool nicely */
+			advertising |= hw->phy.autoneg_advertised;
+		}
+
+		cmd->base.port = PORT_TP;
+		cmd->base.phy_address = hw->phy.addr;
+	} else {
+		supported = (SUPPORTED_FIBRE |
+			     SUPPORTED_1000baseKX_Full |
+			     SUPPORTED_Autoneg |
+			     SUPPORTED_Pause);
+		advertising = (ADVERTISED_FIBRE |
+			       ADVERTISED_1000baseKX_Full);
+		if (hw->mac.type == e1000_i354) {
+			if ((hw->device_id ==
+			     E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) &&
+			    !(status & E1000_STATUS_2P5_SKU_OVER)) {
+				supported |= SUPPORTED_2500baseX_Full;
+				supported &= ~SUPPORTED_1000baseKX_Full;
+				advertising |= ADVERTISED_2500baseX_Full;
+				advertising &= ~ADVERTISED_1000baseKX_Full;
+			}
+		}
+		if (eth_flags->e100_base_fx || eth_flags->e100_base_lx) {
+			supported |= SUPPORTED_100baseT_Full;
+			advertising |= ADVERTISED_100baseT_Full;
+		}
+		if (hw->mac.autoneg == 1)
+			advertising |= ADVERTISED_Autoneg;
+
+		cmd->base.port = PORT_FIBRE;
+	}
+	if (hw->mac.autoneg != 1)
+		advertising &= ~(ADVERTISED_Pause |
+				 ADVERTISED_Asym_Pause);
+
+	switch (hw->fc.requested_mode) {
+	case e1000_fc_full:
+		advertising |= ADVERTISED_Pause;
+		break;
+	case e1000_fc_rx_pause:
+		advertising |= (ADVERTISED_Pause |
+				ADVERTISED_Asym_Pause);
+		break;
+	case e1000_fc_tx_pause:
+		advertising |=  ADVERTISED_Asym_Pause;
+		break;
+	default:
+		advertising &= ~(ADVERTISED_Pause |
+				 ADVERTISED_Asym_Pause);
+	}
+	if (status & E1000_STATUS_LU) {
+		if ((status & E1000_STATUS_2P5_SKU) &&
+		    !(status & E1000_STATUS_2P5_SKU_OVER)) {
+			speed = SPEED_2500;
+		} else if (status & E1000_STATUS_SPEED_1000) {
+			speed = SPEED_1000;
+		} else if (status & E1000_STATUS_SPEED_100) {
+			speed = SPEED_100;
+		} else {
+			speed = SPEED_10;
+		}
+		if ((status & E1000_STATUS_FD) ||
+		    hw->phy.media_type != e1000_media_type_copper)
+			cmd->base.duplex = DUPLEX_FULL;
+		else
+			cmd->base.duplex = DUPLEX_HALF;
+	} else {
+		speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+	cmd->base.speed = speed;
+	if ((hw->phy.media_type == e1000_media_type_fiber) ||
+	    hw->mac.autoneg)
+		cmd->base.autoneg = AUTONEG_ENABLE;
+	else
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if (hw->phy.media_type == e1000_media_type_copper)
+		cmd->base.eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+						      ETH_TP_MDI;
+	else
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
+	return 0;
+}
+
+static int igb_set_link_ksettings(struct net_device *netdev,
+				  const struct ethtool_link_ksettings *cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 advertising;
+
+	/* When SoL/IDER sessions are active, autoneg/speed/duplex
+	 * cannot be changed
+	 */
+	if (igb_check_reset_block(hw)) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot change link characteristics when SoL/IDER is active.\n");
+		return -EINVAL;
+	}
+
+	/* MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		if (hw->phy.media_type != e1000_media_type_copper)
+			return -EOPNOTSUPP;
+
+		if ((cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) &&
+		    (cmd->base.autoneg != AUTONEG_ENABLE)) {
+			dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		hw->mac.autoneg = 1;
+		if (hw->phy.media_type == e1000_media_type_fiber) {
+			hw->phy.autoneg_advertised = advertising |
+						     ADVERTISED_FIBRE |
+						     ADVERTISED_Autoneg;
+			switch (adapter->link_speed) {
+			case SPEED_2500:
+				hw->phy.autoneg_advertised =
+					ADVERTISED_2500baseX_Full;
+				break;
+			case SPEED_1000:
+				hw->phy.autoneg_advertised =
+					ADVERTISED_1000baseT_Full;
+				break;
+			case SPEED_100:
+				hw->phy.autoneg_advertised =
+					ADVERTISED_100baseT_Full;
+				break;
+			default:
+				break;
+			}
+		} else {
+			hw->phy.autoneg_advertised = advertising |
+						     ADVERTISED_TP |
+						     ADVERTISED_Autoneg;
+		}
+		advertising = hw->phy.autoneg_advertised;
+		if (adapter->fc_autoneg)
+			hw->fc.requested_mode = e1000_fc_default;
+	} else {
+		u32 speed = cmd->base.speed;
+		/* calling this overrides forced MDI setting */
+		if (igb_set_spd_dplx(adapter, speed, cmd->base.duplex)) {
+			clear_bit(__IGB_RESETTING, &adapter->state);
+			return -EINVAL;
+		}
+	}
+
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		/* fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl;
+	}
+
+	/* reset the link */
+	if (netif_running(adapter->netdev)) {
+		igb_down(adapter);
+		igb_up(adapter);
+	} else
+		igb_reset(adapter);
+
+	clear_bit(__IGB_RESETTING, &adapter->state);
+	return 0;
+}
+
+static u32 igb_get_link(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+
+	/* If the link is not reported up to netdev, interrupts are disabled,
+	 * and so the physical link state may have changed since we last
+	 * looked. Set get_link_status to make sure that the true link
+	 * state is interrogated, rather than pulling a cached and possibly
+	 * stale link state from the driver.
+	 */
+	if (!netif_carrier_ok(netdev))
+		mac->get_link_status = 1;
+
+	return igb_has_link(adapter);
+}
+
+static void igb_get_pauseparam(struct net_device *netdev,
+			       struct ethtool_pauseparam *pause)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	pause->autoneg =
+		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (hw->fc.current_mode == e1000_fc_rx_pause)
+		pause->rx_pause = 1;
+	else if (hw->fc.current_mode == e1000_fc_tx_pause)
+		pause->tx_pause = 1;
+	else if (hw->fc.current_mode == e1000_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int igb_set_pauseparam(struct net_device *netdev,
+			      struct ethtool_pauseparam *pause)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int retval = 0;
+	int i;
+
+	/* 100basefx does not support setting link flow control */
+	if (hw->dev_spec._82575.eth_flags.e100_base_fx)
+		return -EINVAL;
+
+	adapter->fc_autoneg = pause->autoneg;
+
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+		hw->fc.requested_mode = e1000_fc_default;
+		if (netif_running(adapter->netdev)) {
+			igb_down(adapter);
+			igb_up(adapter);
+		} else {
+			igb_reset(adapter);
+		}
+	} else {
+		if (pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_full;
+		else if (pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_rx_pause;
+		else if (!pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_tx_pause;
+		else if (!pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = e1000_fc_none;
+
+		hw->fc.current_mode = hw->fc.requested_mode;
+
+		retval = ((hw->phy.media_type == e1000_media_type_copper) ?
+			  igb_force_mac_fc(hw) : igb_setup_link(hw));
+
+		/* Make sure SRRCTL considers new fc settings for each ring */
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct igb_ring *ring = adapter->rx_ring[i];
+
+			igb_setup_srrctl(adapter, ring);
+		}
+	}
+
+	clear_bit(__IGB_RESETTING, &adapter->state);
+	return retval;
+}
+
+static u32 igb_get_msglevel(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	return adapter->msg_enable;
+}
+
+static void igb_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	adapter->msg_enable = data;
+}
+
+static int igb_get_regs_len(struct net_device *netdev)
+{
+#define IGB_REGS_LEN 740
+	return IGB_REGS_LEN * sizeof(u32);
+}
+
+static void igb_get_regs(struct net_device *netdev,
+			 struct ethtool_regs *regs, void *p)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u8 i;
+
+	memset(p, 0, IGB_REGS_LEN * sizeof(u32));
+
+	regs->version = (1u << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = rd32(E1000_CTRL);
+	regs_buff[1] = rd32(E1000_STATUS);
+	regs_buff[2] = rd32(E1000_CTRL_EXT);
+	regs_buff[3] = rd32(E1000_MDIC);
+	regs_buff[4] = rd32(E1000_SCTL);
+	regs_buff[5] = rd32(E1000_CONNSW);
+	regs_buff[6] = rd32(E1000_VET);
+	regs_buff[7] = rd32(E1000_LEDCTL);
+	regs_buff[8] = rd32(E1000_PBA);
+	regs_buff[9] = rd32(E1000_PBS);
+	regs_buff[10] = rd32(E1000_FRTIMER);
+	regs_buff[11] = rd32(E1000_TCPTIMER);
+
+	/* NVM Register */
+	regs_buff[12] = rd32(E1000_EECD);
+
+	/* Interrupt */
+	/* Reading EICS for EICR because they read the
+	 * same but EICS does not clear on read
+	 */
+	regs_buff[13] = rd32(E1000_EICS);
+	regs_buff[14] = rd32(E1000_EICS);
+	regs_buff[15] = rd32(E1000_EIMS);
+	regs_buff[16] = rd32(E1000_EIMC);
+	regs_buff[17] = rd32(E1000_EIAC);
+	regs_buff[18] = rd32(E1000_EIAM);
+	/* Reading ICS for ICR because they read the
+	 * same but ICS does not clear on read
+	 */
+	regs_buff[19] = rd32(E1000_ICS);
+	regs_buff[20] = rd32(E1000_ICS);
+	regs_buff[21] = rd32(E1000_IMS);
+	regs_buff[22] = rd32(E1000_IMC);
+	regs_buff[23] = rd32(E1000_IAC);
+	regs_buff[24] = rd32(E1000_IAM);
+	regs_buff[25] = rd32(E1000_IMIRVP);
+
+	/* Flow Control */
+	regs_buff[26] = rd32(E1000_FCAL);
+	regs_buff[27] = rd32(E1000_FCAH);
+	regs_buff[28] = rd32(E1000_FCTTV);
+	regs_buff[29] = rd32(E1000_FCRTL);
+	regs_buff[30] = rd32(E1000_FCRTH);
+	regs_buff[31] = rd32(E1000_FCRTV);
+
+	/* Receive */
+	regs_buff[32] = rd32(E1000_RCTL);
+	regs_buff[33] = rd32(E1000_RXCSUM);
+	regs_buff[34] = rd32(E1000_RLPML);
+	regs_buff[35] = rd32(E1000_RFCTL);
+	regs_buff[36] = rd32(E1000_MRQC);
+	regs_buff[37] = rd32(E1000_VT_CTL);
+
+	/* Transmit */
+	regs_buff[38] = rd32(E1000_TCTL);
+	regs_buff[39] = rd32(E1000_TCTL_EXT);
+	regs_buff[40] = rd32(E1000_TIPG);
+	regs_buff[41] = rd32(E1000_DTXCTL);
+
+	/* Wake Up */
+	regs_buff[42] = rd32(E1000_WUC);
+	regs_buff[43] = rd32(E1000_WUFC);
+	regs_buff[44] = rd32(E1000_WUS);
+	regs_buff[45] = rd32(E1000_IPAV);
+	regs_buff[46] = rd32(E1000_WUPL);
+
+	/* MAC */
+	regs_buff[47] = rd32(E1000_PCS_CFG0);
+	regs_buff[48] = rd32(E1000_PCS_LCTL);
+	regs_buff[49] = rd32(E1000_PCS_LSTAT);
+	regs_buff[50] = rd32(E1000_PCS_ANADV);
+	regs_buff[51] = rd32(E1000_PCS_LPAB);
+	regs_buff[52] = rd32(E1000_PCS_NPTX);
+	regs_buff[53] = rd32(E1000_PCS_LPABNP);
+
+	/* Statistics */
+	regs_buff[54] = adapter->stats.crcerrs;
+	regs_buff[55] = adapter->stats.algnerrc;
+	regs_buff[56] = adapter->stats.symerrs;
+	regs_buff[57] = adapter->stats.rxerrc;
+	regs_buff[58] = adapter->stats.mpc;
+	regs_buff[59] = adapter->stats.scc;
+	regs_buff[60] = adapter->stats.ecol;
+	regs_buff[61] = adapter->stats.mcc;
+	regs_buff[62] = adapter->stats.latecol;
+	regs_buff[63] = adapter->stats.colc;
+	regs_buff[64] = adapter->stats.dc;
+	regs_buff[65] = adapter->stats.tncrs;
+	regs_buff[66] = adapter->stats.sec;
+	regs_buff[67] = adapter->stats.htdpmc;
+	regs_buff[68] = adapter->stats.rlec;
+	regs_buff[69] = adapter->stats.xonrxc;
+	regs_buff[70] = adapter->stats.xontxc;
+	regs_buff[71] = adapter->stats.xoffrxc;
+	regs_buff[72] = adapter->stats.xofftxc;
+	regs_buff[73] = adapter->stats.fcruc;
+	regs_buff[74] = adapter->stats.prc64;
+	regs_buff[75] = adapter->stats.prc127;
+	regs_buff[76] = adapter->stats.prc255;
+	regs_buff[77] = adapter->stats.prc511;
+	regs_buff[78] = adapter->stats.prc1023;
+	regs_buff[79] = adapter->stats.prc1522;
+	regs_buff[80] = adapter->stats.gprc;
+	regs_buff[81] = adapter->stats.bprc;
+	regs_buff[82] = adapter->stats.mprc;
+	regs_buff[83] = adapter->stats.gptc;
+	regs_buff[84] = adapter->stats.gorc;
+	regs_buff[86] = adapter->stats.gotc;
+	regs_buff[88] = adapter->stats.rnbc;
+	regs_buff[89] = adapter->stats.ruc;
+	regs_buff[90] = adapter->stats.rfc;
+	regs_buff[91] = adapter->stats.roc;
+	regs_buff[92] = adapter->stats.rjc;
+	regs_buff[93] = adapter->stats.mgprc;
+	regs_buff[94] = adapter->stats.mgpdc;
+	regs_buff[95] = adapter->stats.mgptc;
+	regs_buff[96] = adapter->stats.tor;
+	regs_buff[98] = adapter->stats.tot;
+	regs_buff[100] = adapter->stats.tpr;
+	regs_buff[101] = adapter->stats.tpt;
+	regs_buff[102] = adapter->stats.ptc64;
+	regs_buff[103] = adapter->stats.ptc127;
+	regs_buff[104] = adapter->stats.ptc255;
+	regs_buff[105] = adapter->stats.ptc511;
+	regs_buff[106] = adapter->stats.ptc1023;
+	regs_buff[107] = adapter->stats.ptc1522;
+	regs_buff[108] = adapter->stats.mptc;
+	regs_buff[109] = adapter->stats.bptc;
+	regs_buff[110] = adapter->stats.tsctc;
+	regs_buff[111] = adapter->stats.iac;
+	regs_buff[112] = adapter->stats.rpthc;
+	regs_buff[113] = adapter->stats.hgptc;
+	regs_buff[114] = adapter->stats.hgorc;
+	regs_buff[116] = adapter->stats.hgotc;
+	regs_buff[118] = adapter->stats.lenerrs;
+	regs_buff[119] = adapter->stats.scvpc;
+	regs_buff[120] = adapter->stats.hrmpc;
+
+	for (i = 0; i < 4; i++)
+		regs_buff[121 + i] = rd32(E1000_SRRCTL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[125 + i] = rd32(E1000_PSRTYPE(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[129 + i] = rd32(E1000_RDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[133 + i] = rd32(E1000_RDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[137 + i] = rd32(E1000_RDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[141 + i] = rd32(E1000_RDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[145 + i] = rd32(E1000_RDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[149 + i] = rd32(E1000_RXDCTL(i));
+
+	for (i = 0; i < 10; i++)
+		regs_buff[153 + i] = rd32(E1000_EITR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[163 + i] = rd32(E1000_IMIR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[171 + i] = rd32(E1000_IMIREXT(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[179 + i] = rd32(E1000_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[195 + i] = rd32(E1000_RAH(i));
+
+	for (i = 0; i < 4; i++)
+		regs_buff[211 + i] = rd32(E1000_TDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[215 + i] = rd32(E1000_TDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[219 + i] = rd32(E1000_TDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[223 + i] = rd32(E1000_TDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[227 + i] = rd32(E1000_TDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[231 + i] = rd32(E1000_TXDCTL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[235 + i] = rd32(E1000_TDWBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[239 + i] = rd32(E1000_TDWBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[243 + i] = rd32(E1000_DCA_TXCTRL(i));
+
+	for (i = 0; i < 4; i++)
+		regs_buff[247 + i] = rd32(E1000_IP4AT_REG(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[251 + i] = rd32(E1000_IP6AT_REG(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[255 + i] = rd32(E1000_WUPM_REG(i));
+	for (i = 0; i < 128; i++)
+		regs_buff[287 + i] = rd32(E1000_FFMT_REG(i));
+	for (i = 0; i < 128; i++)
+		regs_buff[415 + i] = rd32(E1000_FFVT_REG(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[543 + i] = rd32(E1000_FFLT_REG(i));
+
+	regs_buff[547] = rd32(E1000_TDFH);
+	regs_buff[548] = rd32(E1000_TDFT);
+	regs_buff[549] = rd32(E1000_TDFHS);
+	regs_buff[550] = rd32(E1000_TDFPC);
+
+	if (hw->mac.type > e1000_82580) {
+		regs_buff[551] = adapter->stats.o2bgptc;
+		regs_buff[552] = adapter->stats.b2ospc;
+		regs_buff[553] = adapter->stats.o2bspc;
+		regs_buff[554] = adapter->stats.b2ogprc;
+	}
+
+	if (hw->mac.type == e1000_82576) {
+		for (i = 0; i < 12; i++)
+			regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4));
+		for (i = 0; i < 4; i++)
+			regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[607 + i] = rd32(E1000_RDH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[619 + i] = rd32(E1000_RDT(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4));
+
+		for (i = 0; i < 12; i++)
+			regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[679 + i] = rd32(E1000_TDH(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[691 + i] = rd32(E1000_TDT(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4));
+		for (i = 0; i < 12; i++)
+			regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4));
+	}
+
+	if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211)
+		regs_buff[739] = rd32(E1000_I210_RR2DCDELAY);
+}
+
+static int igb_get_eeprom_len(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	return adapter->hw.nvm.word_size * 2;
+}
+
+static int igb_get_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	int first_word, last_word;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				    GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	if (hw->nvm.type == e1000_nvm_eeprom_spi)
+		ret_val = hw->nvm.ops.read(hw, first_word,
+					   last_word - first_word + 1,
+					   eeprom_buff);
+	else {
+		for (i = 0; i < last_word - first_word + 1; i++) {
+			ret_val = hw->nvm.ops.read(hw, first_word + i, 1,
+						   &eeprom_buff[i]);
+			if (ret_val)
+				break;
+		}
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
+			eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int igb_set_eeprom(struct net_device *netdev,
+			  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if ((hw->mac.type >= e1000_i210) &&
+	    !igb_get_flash_presence_i210(hw)) {
+		return -EOPNOTSUPP;
+	}
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EFAULT;
+
+	max_len = hw->nvm.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, first_word, 1,
+					    &eeprom_buff[0]);
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) {
+		/* need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, last_word, 1,
+				   &eeprom_buff[last_word - first_word]);
+		if (ret_val)
+			goto out;
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = hw->nvm.ops.write(hw, first_word,
+				    last_word - first_word + 1, eeprom_buff);
+
+	/* Update the checksum if nvm write succeeded */
+	if (ret_val == 0)
+		hw->nvm.ops.update(hw);
+
+	igb_set_fw_version(adapter);
+out:
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void igb_get_drvinfo(struct net_device *netdev,
+			    struct ethtool_drvinfo *drvinfo)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	strscpy(drvinfo->driver,  igb_driver_name, sizeof(drvinfo->driver));
+
+	/* EEPROM image version # is reported as firmware version # for
+	 * 82575 controllers
+	 */
+	strscpy(drvinfo->fw_version, adapter->fw_version,
+		sizeof(drvinfo->fw_version));
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IGB_PRIV_FLAGS_STR_LEN;
+}
+
+static void igb_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring,
+			      struct kernel_ethtool_ringparam *kernel_ring,
+			      struct netlink_ext_ack *extack)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IGB_MAX_RXD;
+	ring->tx_max_pending = IGB_MAX_TXD;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+}
+
+static int igb_set_ringparam(struct net_device *netdev,
+			     struct ethtool_ringparam *ring,
+			     struct kernel_ethtool_ringparam *kernel_ring,
+			     struct netlink_ext_ack *extack)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct igb_ring *temp_ring;
+	int i, err = 0;
+	u16 new_rx_count, new_tx_count;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_rx_count = min_t(u32, ring->rx_pending, IGB_MAX_RXD);
+	new_rx_count = max_t(u16, new_rx_count, IGB_MIN_RXD);
+	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	new_tx_count = min_t(u32, ring->tx_pending, IGB_MAX_TXD);
+	new_tx_count = max_t(u16, new_tx_count, IGB_MIN_TXD);
+	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	if ((new_tx_count == adapter->tx_ring_count) &&
+	    (new_rx_count == adapter->rx_ring_count)) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	if (adapter->num_tx_queues > adapter->num_rx_queues)
+		temp_ring = vmalloc(array_size(sizeof(struct igb_ring),
+					       adapter->num_tx_queues));
+	else
+		temp_ring = vmalloc(array_size(sizeof(struct igb_ring),
+					       adapter->num_rx_queues));
+
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	igb_down(adapter);
+
+	/* We can't just free everything and then setup again,
+	 * because the ISRs in MSI-X mode get passed pointers
+	 * to the Tx and Rx ring structs.
+	 */
+	if (new_tx_count != adapter->tx_ring_count) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->tx_ring[i],
+			       sizeof(struct igb_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = igb_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igb_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			igb_free_tx_resources(adapter->tx_ring[i]);
+
+			memcpy(adapter->tx_ring[i], &temp_ring[i],
+			       sizeof(struct igb_ring));
+		}
+
+		adapter->tx_ring_count = new_tx_count;
+	}
+
+	if (new_rx_count != adapter->rx_ring_count) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->rx_ring[i],
+			       sizeof(struct igb_ring));
+
+			temp_ring[i].count = new_rx_count;
+			err = igb_setup_rx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igb_free_rx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			igb_free_rx_resources(adapter->rx_ring[i]);
+
+			memcpy(adapter->rx_ring[i], &temp_ring[i],
+			       sizeof(struct igb_ring));
+		}
+
+		adapter->rx_ring_count = new_rx_count;
+	}
+err_setup:
+	igb_up(adapter);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__IGB_RESETTING, &adapter->state);
+	return err;
+}
+
+/* ethtool register test data */
+struct igb_reg_test {
+	u16 reg;
+	u16 reg_offset;
+	u16 array_len;
+	u16 test_type;
+	u32 mask;
+	u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x100 bytes apart, or in contiguous tables.  We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST	1
+#define SET_READ_TEST	2
+#define WRITE_NO_TEST	3
+#define TABLE32_TEST	4
+#define TABLE64_TEST_LO	5
+#define TABLE64_TEST_HI	6
+
+/* i210 reg test */
+static struct igb_reg_test reg_test_i210[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* RDH is read-only for i210, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
+						0x900FFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0, 0 }
+};
+
+/* i350 reg test */
+static struct igb_reg_test reg_test_i350[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_VET,	   0x100, 1,  PATTERN_TEST, 0xFFFF0000, 0xFFFF0000 },
+	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_RDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* RDH is read-only for i350, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_TDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
+						0xC3FFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 16, TABLE64_TEST_HI,
+						0xC3FFFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* 82580 reg test */
+static struct igb_reg_test reg_test_82580[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_VET,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_RDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	/* RDH is read-only for 82580, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_TDBAL(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(4),  0x40,  4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(4),  0x40,  4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_TDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TDT(4),	   0x40,  4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI,
+						0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_LO,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_HI,
+						0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST,
+						0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* 82576 reg test */
+static struct igb_reg_test reg_test_82576[] = {
+	{ E1000_FCAL,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_VET,	   0x100, 1,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),  0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_RDBAL(4),  0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(4),  0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(4),  0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	/* Enable all RX queues before testing. */
+	{ E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0,
+	  E1000_RXDCTL_QUEUE_ENABLE },
+	{ E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0,
+	  E1000_RXDCTL_QUEUE_ENABLE },
+	/* RDH is read-only for 82576, only test RDT. */
+	{ E1000_RDT(0),	   0x100, 4,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RDT(4),	   0x40, 12,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RXDCTL(0), 0x100, 4,  WRITE_NO_TEST, 0, 0 },
+	{ E1000_RXDCTL(4), 0x40, 12,  WRITE_NO_TEST, 0, 0 },
+	{ E1000_FCRTH,	   0x100, 1,  PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,	   0x100, 1,  PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,	   0x100, 1,  PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_TDBAL(4),  0x40, 12,  PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(4),  0x40, 12,  PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(4),  0x40, 12,  PATTERN_TEST, 0x000FFFF0, 0x000FFFFF },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB },
+	{ E1000_RCTL,	   0x100, 1,  SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF },
+	{ E1000_TCTL,	   0x100, 1,  SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,	   0, 16, TABLE64_TEST_HI, 0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA2,	   0, 8, TABLE64_TEST_HI, 0x83FFFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,	   0, 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+/* 82575 register test */
+static struct igb_reg_test reg_test_82575[] = {
+	{ E1000_FCAL,      0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_FCAH,      0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_FCT,       0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF },
+	{ E1000_VET,       0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDBAL(0),  0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_RDBAH(0),  0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RDLEN(0),  0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* Enable all four RX queues before testing. */
+	{ E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0,
+	  E1000_RXDCTL_QUEUE_ENABLE },
+	/* RDH is read-only for 82575, only test RDT. */
+	{ E1000_RDT(0),    0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 },
+	{ E1000_FCRTH,     0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 },
+	{ E1000_FCTTV,     0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ E1000_TIPG,      0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF },
+	{ E1000_TDBAL(0),  0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ E1000_TDBAH(0),  0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_TDLEN(0),  0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ E1000_RCTL,      0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_RCTL,      0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0x003FFFFB },
+	{ E1000_RCTL,      0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0xFFFFFFFF },
+	{ E1000_TCTL,      0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 },
+	{ E1000_TXCW,      0x100, 1, PATTERN_TEST, 0xC000FFFF, 0x0000FFFF },
+	{ E1000_RA,        0, 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ E1000_RA,        0, 16, TABLE64_TEST_HI, 0x800FFFFF, 0xFFFFFFFF },
+	{ E1000_MTA,       0, 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ 0, 0, 0, 0 }
+};
+
+static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data,
+			     int reg, u32 mask, u32 write)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 pat, val;
+	static const u32 _test[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+	for (pat = 0; pat < ARRAY_SIZE(_test); pat++) {
+		wr32(reg, (_test[pat] & write));
+		val = rd32(reg) & mask;
+		if (val != (_test[pat] & write & mask)) {
+			dev_err(&adapter->pdev->dev,
+				"pattern test reg %04X failed: got 0x%08X expected 0x%08X\n",
+				reg, val, (_test[pat] & write & mask));
+			*data = reg;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data,
+			      int reg, u32 mask, u32 write)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 val;
+
+	wr32(reg, write & mask);
+	val = rd32(reg);
+	if ((write & mask) != (val & mask)) {
+		dev_err(&adapter->pdev->dev,
+			"set/check reg %04X test failed: got 0x%08X expected 0x%08X\n",
+			reg, (val & mask), (write & mask));
+		*data = reg;
+		return true;
+	}
+
+	return false;
+}
+
+#define REG_PATTERN_TEST(reg, mask, write) \
+	do { \
+		if (reg_pattern_test(adapter, data, reg, mask, write)) \
+			return 1; \
+	} while (0)
+
+#define REG_SET_AND_CHECK(reg, mask, write) \
+	do { \
+		if (reg_set_and_check(adapter, data, reg, mask, write)) \
+			return 1; \
+	} while (0)
+
+static int igb_reg_test(struct igb_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct igb_reg_test *test;
+	u32 value, before, after;
+	u32 i, toggle;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_i350:
+	case e1000_i354:
+		test = reg_test_i350;
+		toggle = 0x7FEFF3FF;
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		test = reg_test_i210;
+		toggle = 0x7FEFF3FF;
+		break;
+	case e1000_82580:
+		test = reg_test_82580;
+		toggle = 0x7FEFF3FF;
+		break;
+	case e1000_82576:
+		test = reg_test_82576;
+		toggle = 0x7FFFF3FF;
+		break;
+	default:
+		test = reg_test_82575;
+		toggle = 0x7FFFF3FF;
+		break;
+	}
+
+	/* Because the status register is such a special case,
+	 * we handle it separately from the rest of the register
+	 * tests.  Some bits are read-only, some toggle, and some
+	 * are writable on newer MACs.
+	 */
+	before = rd32(E1000_STATUS);
+	value = (rd32(E1000_STATUS) & toggle);
+	wr32(E1000_STATUS, toggle);
+	after = rd32(E1000_STATUS) & toggle;
+	if (value != after) {
+		dev_err(&adapter->pdev->dev,
+			"failed STATUS register test got: 0x%08X expected: 0x%08X\n",
+			after, value);
+		*data = 1;
+		return 1;
+	}
+	/* restore previous status */
+	wr32(E1000_STATUS, before);
+
+	/* Perform the remainder of the register test, looping through
+	 * the test table until we either fail or reach the null entry.
+	 */
+	while (test->reg) {
+		for (i = 0; i < test->array_len; i++) {
+			switch (test->test_type) {
+			case PATTERN_TEST:
+				REG_PATTERN_TEST(test->reg +
+						(i * test->reg_offset),
+						test->mask,
+						test->write);
+				break;
+			case SET_READ_TEST:
+				REG_SET_AND_CHECK(test->reg +
+						(i * test->reg_offset),
+						test->mask,
+						test->write);
+				break;
+			case WRITE_NO_TEST:
+				writel(test->write,
+				    (adapter->hw.hw_addr + test->reg)
+					+ (i * test->reg_offset));
+				break;
+			case TABLE32_TEST:
+				REG_PATTERN_TEST(test->reg + (i * 4),
+						test->mask,
+						test->write);
+				break;
+			case TABLE64_TEST_LO:
+				REG_PATTERN_TEST(test->reg + (i * 8),
+						test->mask,
+						test->write);
+				break;
+			case TABLE64_TEST_HI:
+				REG_PATTERN_TEST((test->reg + 4) + (i * 8),
+						test->mask,
+						test->write);
+				break;
+			}
+		}
+		test++;
+	}
+
+	*data = 0;
+	return 0;
+}
+
+static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	*data = 0;
+
+	/* Validate eeprom on all parts but flashless */
+	switch (hw->mac.type) {
+	case e1000_i210:
+	case e1000_i211:
+		if (igb_get_flash_presence_i210(hw)) {
+			if (adapter->hw.nvm.ops.validate(&adapter->hw) < 0)
+				*data = 2;
+		}
+		break;
+	default:
+		if (adapter->hw.nvm.ops.validate(&adapter->hw) < 0)
+			*data = 2;
+		break;
+	}
+
+	return *data;
+}
+
+static irqreturn_t igb_test_intr(int irq, void *data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *) data;
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->test_icr |= rd32(E1000_ICR);
+
+	return IRQ_HANDLED;
+}
+
+static int igb_intr_test(struct igb_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	u32 mask, ics_mask, i = 0, shared_int = true;
+	u32 irq = adapter->pdev->irq;
+
+	*data = 0;
+
+	/* Hook up test interrupt handler just for this test */
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		if (request_irq(adapter->msix_entries[0].vector,
+				igb_test_intr, 0, netdev->name, adapter)) {
+			*data = 1;
+			return -1;
+		}
+		wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8);
+		wr32(E1000_EIMS, BIT(0));
+	} else if (adapter->flags & IGB_FLAG_HAS_MSI) {
+		shared_int = false;
+		if (request_irq(irq,
+				igb_test_intr, 0, netdev->name, adapter)) {
+			*data = 1;
+			return -1;
+		}
+	} else if (!request_irq(irq, igb_test_intr, IRQF_PROBE_SHARED,
+				netdev->name, adapter)) {
+		shared_int = false;
+	} else if (request_irq(irq, igb_test_intr, IRQF_SHARED,
+		 netdev->name, adapter)) {
+		*data = 1;
+		return -1;
+	}
+	dev_info(&adapter->pdev->dev, "testing %s interrupt\n",
+		(shared_int ? "shared" : "unshared"));
+
+	/* Disable all the interrupts */
+	wr32(E1000_IMC, ~0);
+	wrfl();
+	usleep_range(10000, 11000);
+
+	/* Define all writable bits for ICS */
+	switch (hw->mac.type) {
+	case e1000_82575:
+		ics_mask = 0x37F47EDD;
+		break;
+	case e1000_82576:
+		ics_mask = 0x77D4FBFD;
+		break;
+	case e1000_82580:
+		ics_mask = 0x77DCFED5;
+		break;
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		ics_mask = 0x77DCFED5;
+		break;
+	default:
+		ics_mask = 0x7FFFFFFF;
+		break;
+	}
+
+	/* Test each interrupt */
+	for (; i < 31; i++) {
+		/* Interrupt to test */
+		mask = BIT(i);
+
+		if (!(mask & ics_mask))
+			continue;
+
+		if (!shared_int) {
+			/* Disable the interrupt to be reported in
+			 * the cause register and then force the same
+			 * interrupt and see if one gets posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+
+			/* Flush any pending interrupts */
+			wr32(E1000_ICR, ~0);
+
+			wr32(E1000_IMC, mask);
+			wr32(E1000_ICS, mask);
+			wrfl();
+			usleep_range(10000, 11000);
+
+			if (adapter->test_icr & mask) {
+				*data = 3;
+				break;
+			}
+		}
+
+		/* Enable the interrupt to be reported in
+		 * the cause register and then force the same
+		 * interrupt and see if one gets posted.  If
+		 * an interrupt was not posted to the bus, the
+		 * test failed.
+		 */
+		adapter->test_icr = 0;
+
+		/* Flush any pending interrupts */
+		wr32(E1000_ICR, ~0);
+
+		wr32(E1000_IMS, mask);
+		wr32(E1000_ICS, mask);
+		wrfl();
+		usleep_range(10000, 11000);
+
+		if (!(adapter->test_icr & mask)) {
+			*data = 4;
+			break;
+		}
+
+		if (!shared_int) {
+			/* Disable the other interrupts to be reported in
+			 * the cause register and then force the other
+			 * interrupts and see if any get posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+
+			/* Flush any pending interrupts */
+			wr32(E1000_ICR, ~0);
+
+			wr32(E1000_IMC, ~mask);
+			wr32(E1000_ICS, ~mask);
+			wrfl();
+			usleep_range(10000, 11000);
+
+			if (adapter->test_icr & mask) {
+				*data = 5;
+				break;
+			}
+		}
+	}
+
+	/* Disable all the interrupts */
+	wr32(E1000_IMC, ~0);
+	wrfl();
+	usleep_range(10000, 11000);
+
+	/* Unhook test interrupt handler */
+	if (adapter->flags & IGB_FLAG_HAS_MSIX)
+		free_irq(adapter->msix_entries[0].vector, adapter);
+	else
+		free_irq(irq, adapter);
+
+	return *data;
+}
+
+static void igb_free_desc_rings(struct igb_adapter *adapter)
+{
+	igb_free_tx_resources(&adapter->test_tx_ring);
+	igb_free_rx_resources(&adapter->test_rx_ring);
+}
+
+static int igb_setup_desc_rings(struct igb_adapter *adapter)
+{
+	struct igb_ring *tx_ring = &adapter->test_tx_ring;
+	struct igb_ring *rx_ring = &adapter->test_rx_ring;
+	struct e1000_hw *hw = &adapter->hw;
+	int ret_val;
+
+	/* Setup Tx descriptor ring and Tx buffers */
+	tx_ring->count = IGB_DEFAULT_TXD;
+	tx_ring->dev = &adapter->pdev->dev;
+	tx_ring->netdev = adapter->netdev;
+	tx_ring->reg_idx = adapter->vfs_allocated_count;
+
+	if (igb_setup_tx_resources(tx_ring)) {
+		ret_val = 1;
+		goto err_nomem;
+	}
+
+	igb_setup_tctl(adapter);
+	igb_configure_tx_ring(adapter, tx_ring);
+
+	/* Setup Rx descriptor ring and Rx buffers */
+	rx_ring->count = IGB_DEFAULT_RXD;
+	rx_ring->dev = &adapter->pdev->dev;
+	rx_ring->netdev = adapter->netdev;
+	rx_ring->reg_idx = adapter->vfs_allocated_count;
+
+	if (igb_setup_rx_resources(rx_ring)) {
+		ret_val = 3;
+		goto err_nomem;
+	}
+
+	/* set the default queue to queue 0 of PF */
+	wr32(E1000_MRQC, adapter->vfs_allocated_count << 3);
+
+	/* enable receive ring */
+	igb_setup_rctl(adapter);
+	igb_configure_rx_ring(adapter, rx_ring);
+
+	igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+
+	return 0;
+
+err_nomem:
+	igb_free_desc_rings(adapter);
+	return ret_val;
+}
+
+static void igb_phy_disable_receiver(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Write out to PHY registers 29 and 30 to disable the Receiver. */
+	igb_write_phy_reg(hw, 29, 0x001F);
+	igb_write_phy_reg(hw, 30, 0x8FFC);
+	igb_write_phy_reg(hw, 29, 0x001A);
+	igb_write_phy_reg(hw, 30, 0x8FF0);
+}
+
+static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_reg = 0;
+
+	hw->mac.autoneg = false;
+
+	if (hw->phy.type == e1000_phy_m88) {
+		if (hw->phy.id != I210_I_PHY_ID) {
+			/* Auto-MDI/MDIX Off */
+			igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
+			/* reset to update Auto-MDI/MDIX */
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x9140);
+			/* autoneg off */
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x8140);
+		} else {
+			/* force 1000, set loopback  */
+			igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+		}
+	} else if (hw->phy.type == e1000_phy_82580) {
+		/* enable MII loopback */
+		igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041);
+	}
+
+	/* add small delay to avoid loopback test failure */
+	msleep(50);
+
+	/* force 1000, set loopback */
+	igb_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+
+	/* Now set up the MAC to the same speed/duplex as the PHY. */
+	ctrl_reg = rd32(E1000_CTRL);
+	ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */
+	ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */
+		     E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */
+		     E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
+		     E1000_CTRL_FD |	 /* Force Duplex to FULL */
+		     E1000_CTRL_SLU);	 /* Set link up enable bit */
+
+	if (hw->phy.type == e1000_phy_m88)
+		ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
+
+	wr32(E1000_CTRL, ctrl_reg);
+
+	/* Disable the receiver on the PHY so when a cable is plugged in, the
+	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
+	 */
+	if (hw->phy.type == e1000_phy_m88)
+		igb_phy_disable_receiver(adapter);
+
+	msleep(500);
+	return 0;
+}
+
+static int igb_set_phy_loopback(struct igb_adapter *adapter)
+{
+	return igb_integrated_phy_loopback(adapter);
+}
+
+static int igb_setup_loopback_test(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg;
+
+	reg = rd32(E1000_CTRL_EXT);
+
+	/* use CTRL_EXT to identify link type as SGMII can appear as copper */
+	if (reg & E1000_CTRL_EXT_LINK_MODE_MASK) {
+		if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
+		(hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
+		(hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
+		(hw->device_id == E1000_DEV_ID_DH89XXCC_SFP) ||
+		(hw->device_id == E1000_DEV_ID_I354_SGMII) ||
+		(hw->device_id == E1000_DEV_ID_I354_BACKPLANE_2_5GBPS)) {
+			/* Enable DH89xxCC MPHY for near end loopback */
+			reg = rd32(E1000_MPHY_ADDR_CTL);
+			reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) |
+			E1000_MPHY_PCS_CLK_REG_OFFSET;
+			wr32(E1000_MPHY_ADDR_CTL, reg);
+
+			reg = rd32(E1000_MPHY_DATA);
+			reg |= E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
+			wr32(E1000_MPHY_DATA, reg);
+		}
+
+		reg = rd32(E1000_RCTL);
+		reg |= E1000_RCTL_LBM_TCVR;
+		wr32(E1000_RCTL, reg);
+
+		wr32(E1000_SCTL, E1000_ENABLE_SERDES_LOOPBACK);
+
+		reg = rd32(E1000_CTRL);
+		reg &= ~(E1000_CTRL_RFCE |
+			 E1000_CTRL_TFCE |
+			 E1000_CTRL_LRST);
+		reg |= E1000_CTRL_SLU |
+		       E1000_CTRL_FD;
+		wr32(E1000_CTRL, reg);
+
+		/* Unset switch control to serdes energy detect */
+		reg = rd32(E1000_CONNSW);
+		reg &= ~E1000_CONNSW_ENRGSRC;
+		wr32(E1000_CONNSW, reg);
+
+		/* Unset sigdetect for SERDES loopback on
+		 * 82580 and newer devices.
+		 */
+		if (hw->mac.type >= e1000_82580) {
+			reg = rd32(E1000_PCS_CFG0);
+			reg |= E1000_PCS_CFG_IGN_SD;
+			wr32(E1000_PCS_CFG0, reg);
+		}
+
+		/* Set PCS register for forced speed */
+		reg = rd32(E1000_PCS_LCTL);
+		reg &= ~E1000_PCS_LCTL_AN_ENABLE;     /* Disable Autoneg*/
+		reg |= E1000_PCS_LCTL_FLV_LINK_UP |   /* Force link up */
+		       E1000_PCS_LCTL_FSV_1000 |      /* Force 1000    */
+		       E1000_PCS_LCTL_FDV_FULL |      /* SerDes Full duplex */
+		       E1000_PCS_LCTL_FSD |           /* Force Speed */
+		       E1000_PCS_LCTL_FORCE_LINK;     /* Force Link */
+		wr32(E1000_PCS_LCTL, reg);
+
+		return 0;
+	}
+
+	return igb_set_phy_loopback(adapter);
+}
+
+static void igb_loopback_cleanup(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+	u16 phy_reg;
+
+	if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) ||
+	(hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) ||
+	(hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) ||
+	(hw->device_id == E1000_DEV_ID_DH89XXCC_SFP) ||
+	(hw->device_id == E1000_DEV_ID_I354_SGMII)) {
+		u32 reg;
+
+		/* Disable near end loopback on DH89xxCC */
+		reg = rd32(E1000_MPHY_ADDR_CTL);
+		reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) |
+		E1000_MPHY_PCS_CLK_REG_OFFSET;
+		wr32(E1000_MPHY_ADDR_CTL, reg);
+
+		reg = rd32(E1000_MPHY_DATA);
+		reg &= ~E1000_MPHY_PCS_CLK_REG_DIGINELBEN;
+		wr32(E1000_MPHY_DATA, reg);
+	}
+
+	rctl = rd32(E1000_RCTL);
+	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+	wr32(E1000_RCTL, rctl);
+
+	hw->mac.autoneg = true;
+	igb_read_phy_reg(hw, PHY_CONTROL, &phy_reg);
+	if (phy_reg & MII_CR_LOOPBACK) {
+		phy_reg &= ~MII_CR_LOOPBACK;
+		igb_write_phy_reg(hw, PHY_CONTROL, phy_reg);
+		igb_phy_sw_reset(hw);
+	}
+}
+
+static void igb_create_lbtest_frame(struct sk_buff *skb,
+				    unsigned int frame_size)
+{
+	memset(skb->data, 0xFF, frame_size);
+	frame_size /= 2;
+	memset(&skb->data[frame_size], 0xAA, frame_size - 1);
+	skb->data[frame_size + 10] = 0xBE;
+	skb->data[frame_size + 12] = 0xAF;
+}
+
+static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer,
+				  unsigned int frame_size)
+{
+	unsigned char *data;
+	bool match = true;
+
+	frame_size >>= 1;
+
+	data = kmap_local_page(rx_buffer->page);
+
+	if (data[3] != 0xFF ||
+	    data[frame_size + 10] != 0xBE ||
+	    data[frame_size + 12] != 0xAF)
+		match = false;
+
+	kunmap_local(data);
+
+	return match;
+}
+
+static int igb_clean_test_rings(struct igb_ring *rx_ring,
+				struct igb_ring *tx_ring,
+				unsigned int size)
+{
+	union e1000_adv_rx_desc *rx_desc;
+	struct igb_rx_buffer *rx_buffer_info;
+	struct igb_tx_buffer *tx_buffer_info;
+	u16 rx_ntc, tx_ntc, count = 0;
+
+	/* initialize next to clean and descriptor values */
+	rx_ntc = rx_ring->next_to_clean;
+	tx_ntc = tx_ring->next_to_clean;
+	rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
+
+	while (rx_desc->wb.upper.length) {
+		/* check Rx buffer */
+		rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc];
+
+		/* sync Rx buffer for CPU read */
+		dma_sync_single_for_cpu(rx_ring->dev,
+					rx_buffer_info->dma,
+					size,
+					DMA_FROM_DEVICE);
+
+		/* verify contents of skb */
+		if (igb_check_lbtest_frame(rx_buffer_info, size))
+			count++;
+
+		/* sync Rx buffer for device write */
+		dma_sync_single_for_device(rx_ring->dev,
+					   rx_buffer_info->dma,
+					   size,
+					   DMA_FROM_DEVICE);
+
+		/* unmap buffer on Tx side */
+		tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc];
+
+		/* Free all the Tx ring sk_buffs */
+		dev_kfree_skb_any(tx_buffer_info->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer_info, dma),
+				 dma_unmap_len(tx_buffer_info, len),
+				 DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer_info, len, 0);
+
+		/* increment Rx/Tx next to clean counters */
+		rx_ntc++;
+		if (rx_ntc == rx_ring->count)
+			rx_ntc = 0;
+		tx_ntc++;
+		if (tx_ntc == tx_ring->count)
+			tx_ntc = 0;
+
+		/* fetch next descriptor */
+		rx_desc = IGB_RX_DESC(rx_ring, rx_ntc);
+	}
+
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	/* re-map buffers to ring, store next to clean values */
+	igb_alloc_rx_buffers(rx_ring, count);
+	rx_ring->next_to_clean = rx_ntc;
+	tx_ring->next_to_clean = tx_ntc;
+
+	return count;
+}
+
+static int igb_run_loopback_test(struct igb_adapter *adapter)
+{
+	struct igb_ring *tx_ring = &adapter->test_tx_ring;
+	struct igb_ring *rx_ring = &adapter->test_rx_ring;
+	u16 i, j, lc, good_cnt;
+	int ret_val = 0;
+	unsigned int size = IGB_RX_HDR_LEN;
+	netdev_tx_t tx_ret_val;
+	struct sk_buff *skb;
+
+	/* allocate test skb */
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return 11;
+
+	/* place data into test skb */
+	igb_create_lbtest_frame(skb, size);
+	skb_put(skb, size);
+
+	/* Calculate the loop count based on the largest descriptor ring
+	 * The idea is to wrap the largest ring a number of times using 64
+	 * send/receive pairs during each loop
+	 */
+
+	if (rx_ring->count <= tx_ring->count)
+		lc = ((tx_ring->count / 64) * 2) + 1;
+	else
+		lc = ((rx_ring->count / 64) * 2) + 1;
+
+	for (j = 0; j <= lc; j++) { /* loop count loop */
+		/* reset count of good packets */
+		good_cnt = 0;
+
+		/* place 64 packets on the transmit queue*/
+		for (i = 0; i < 64; i++) {
+			skb_get(skb);
+			tx_ret_val = igb_xmit_frame_ring(skb, tx_ring);
+			if (tx_ret_val == NETDEV_TX_OK)
+				good_cnt++;
+		}
+
+		if (good_cnt != 64) {
+			ret_val = 12;
+			break;
+		}
+
+		/* allow 200 milliseconds for packets to go from Tx to Rx */
+		msleep(200);
+
+		good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size);
+		if (good_cnt != 64) {
+			ret_val = 13;
+			break;
+		}
+	} /* end loop count loop */
+
+	/* free the original skb */
+	kfree_skb(skb);
+
+	return ret_val;
+}
+
+static int igb_loopback_test(struct igb_adapter *adapter, u64 *data)
+{
+	/* PHY loopback cannot be performed if SoL/IDER
+	 * sessions are active
+	 */
+	if (igb_check_reset_block(&adapter->hw)) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot do PHY loopback test when SoL/IDER is active.\n");
+		*data = 0;
+		goto out;
+	}
+
+	if (adapter->hw.mac.type == e1000_i354) {
+		dev_info(&adapter->pdev->dev,
+			"Loopback test not supported on i354.\n");
+		*data = 0;
+		goto out;
+	}
+	*data = igb_setup_desc_rings(adapter);
+	if (*data)
+		goto out;
+	*data = igb_setup_loopback_test(adapter);
+	if (*data)
+		goto err_loopback;
+	*data = igb_run_loopback_test(adapter);
+	igb_loopback_cleanup(adapter);
+
+err_loopback:
+	igb_free_desc_rings(adapter);
+out:
+	return *data;
+}
+
+static int igb_link_test(struct igb_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	*data = 0;
+	if (hw->phy.media_type == e1000_media_type_internal_serdes) {
+		int i = 0;
+
+		hw->mac.serdes_has_link = false;
+
+		/* On some blade server designs, link establishment
+		 * could take as long as 2-3 minutes
+		 */
+		do {
+			hw->mac.ops.check_for_link(&adapter->hw);
+			if (hw->mac.serdes_has_link)
+				return *data;
+			msleep(20);
+		} while (i++ < 3750);
+
+		*data = 1;
+	} else {
+		hw->mac.ops.check_for_link(&adapter->hw);
+		if (hw->mac.autoneg)
+			msleep(5000);
+
+		if (!(rd32(E1000_STATUS) & E1000_STATUS_LU))
+			*data = 1;
+	}
+	return *data;
+}
+
+static void igb_diag_test(struct net_device *netdev,
+			  struct ethtool_test *eth_test, u64 *data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	u16 autoneg_advertised;
+	u8 forced_speed_duplex, autoneg;
+	bool if_running = netif_running(netdev);
+
+	set_bit(__IGB_TESTING, &adapter->state);
+
+	/* can't do offline tests on media switching devices */
+	if (adapter->hw.dev_spec._82575.mas_capable)
+		eth_test->flags &= ~ETH_TEST_FL_OFFLINE;
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		/* Offline tests */
+
+		/* save speed, duplex, autoneg settings */
+		autoneg_advertised = adapter->hw.phy.autoneg_advertised;
+		forced_speed_duplex = adapter->hw.mac.forced_speed_duplex;
+		autoneg = adapter->hw.mac.autoneg;
+
+		dev_info(&adapter->pdev->dev, "offline testing starting\n");
+
+		/* power up link for link test */
+		igb_power_up_link(adapter);
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result
+		 */
+		if (igb_link_test(adapter, &data[TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (if_running)
+			/* indicate we're in test mode */
+			igb_close(netdev);
+		else
+			igb_reset(adapter);
+
+		if (igb_reg_test(adapter, &data[TEST_REG]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igb_reset(adapter);
+		if (igb_eeprom_test(adapter, &data[TEST_EEP]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igb_reset(adapter);
+		if (igb_intr_test(adapter, &data[TEST_IRQ]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igb_reset(adapter);
+		/* power up link for loopback test */
+		igb_power_up_link(adapter);
+		if (igb_loopback_test(adapter, &data[TEST_LOOP]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* restore speed, duplex, autoneg settings */
+		adapter->hw.phy.autoneg_advertised = autoneg_advertised;
+		adapter->hw.mac.forced_speed_duplex = forced_speed_duplex;
+		adapter->hw.mac.autoneg = autoneg;
+
+		/* force this routine to wait until autoneg complete/timeout */
+		adapter->hw.phy.autoneg_wait_to_complete = true;
+		igb_reset(adapter);
+		adapter->hw.phy.autoneg_wait_to_complete = false;
+
+		clear_bit(__IGB_TESTING, &adapter->state);
+		if (if_running)
+			igb_open(netdev);
+	} else {
+		dev_info(&adapter->pdev->dev, "online testing starting\n");
+
+		/* PHY is powered down when interface is down */
+		if (if_running && igb_link_test(adapter, &data[TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+		else
+			data[TEST_LINK] = 0;
+
+		/* Online tests aren't run; pass by default */
+		data[TEST_REG] = 0;
+		data[TEST_EEP] = 0;
+		data[TEST_IRQ] = 0;
+		data[TEST_LOOP] = 0;
+
+		clear_bit(__IGB_TESTING, &adapter->state);
+	}
+	msleep_interruptible(4 * 1000);
+}
+
+static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	wol->wolopts = 0;
+
+	if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
+		return;
+
+	wol->supported = WAKE_UCAST | WAKE_MCAST |
+			 WAKE_BCAST | WAKE_MAGIC |
+			 WAKE_PHY;
+
+	/* apply any specific unsupported masks here */
+	switch (adapter->hw.device_id) {
+	default:
+		break;
+	}
+
+	if (adapter->wol & E1000_WUFC_EX)
+		wol->wolopts |= WAKE_UCAST;
+	if (adapter->wol & E1000_WUFC_MC)
+		wol->wolopts |= WAKE_MCAST;
+	if (adapter->wol & E1000_WUFC_BC)
+		wol->wolopts |= WAKE_BCAST;
+	if (adapter->wol & E1000_WUFC_MAG)
+		wol->wolopts |= WAKE_MAGIC;
+	if (adapter->wol & E1000_WUFC_LNKC)
+		wol->wolopts |= WAKE_PHY;
+}
+
+static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE | WAKE_FILTER))
+		return -EOPNOTSUPP;
+
+	if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED))
+		return wol->wolopts ? -EOPNOTSUPP : 0;
+
+	/* these settings will always override what we currently have */
+	adapter->wol = 0;
+
+	if (wol->wolopts & WAKE_UCAST)
+		adapter->wol |= E1000_WUFC_EX;
+	if (wol->wolopts & WAKE_MCAST)
+		adapter->wol |= E1000_WUFC_MC;
+	if (wol->wolopts & WAKE_BCAST)
+		adapter->wol |= E1000_WUFC_BC;
+	if (wol->wolopts & WAKE_MAGIC)
+		adapter->wol |= E1000_WUFC_MAG;
+	if (wol->wolopts & WAKE_PHY)
+		adapter->wol |= E1000_WUFC_LNKC;
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	return 0;
+}
+
+/* bit defines for adapter->led_status */
+#define IGB_LED_ON		0
+
+static int igb_set_phys_id(struct net_device *netdev,
+			   enum ethtool_phys_id_state state)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		igb_blink_led(hw);
+		return 2;
+	case ETHTOOL_ID_ON:
+		igb_blink_led(hw);
+		break;
+	case ETHTOOL_ID_OFF:
+		igb_led_off(hw);
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		igb_led_off(hw);
+		clear_bit(IGB_LED_ON, &adapter->led_status);
+		igb_cleanup_led(hw);
+		break;
+	}
+
+	return 0;
+}
+
+static int igb_set_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec,
+			    struct kernel_ethtool_coalesce *kernel_coal,
+			    struct netlink_ext_ack *extack)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
+	    ((ec->rx_coalesce_usecs > 3) &&
+	     (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
+	    (ec->rx_coalesce_usecs == 2))
+		return -EINVAL;
+
+	if ((ec->tx_coalesce_usecs > IGB_MAX_ITR_USECS) ||
+	    ((ec->tx_coalesce_usecs > 3) &&
+	     (ec->tx_coalesce_usecs < IGB_MIN_ITR_USECS)) ||
+	    (ec->tx_coalesce_usecs == 2))
+		return -EINVAL;
+
+	if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs)
+		return -EINVAL;
+
+	/* If ITR is disabled, disable DMAC */
+	if (ec->rx_coalesce_usecs == 0) {
+		if (adapter->flags & IGB_FLAG_DMAC)
+			adapter->flags &= ~IGB_FLAG_DMAC;
+	}
+
+	/* convert to rate of irq's per second */
+	if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+
+	/* convert to rate of irq's per second */
+	if (adapter->flags & IGB_FLAG_QUEUE_PAIRS)
+		adapter->tx_itr_setting = adapter->rx_itr_setting;
+	else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igb_q_vector *q_vector = adapter->q_vector[i];
+		q_vector->tx.work_limit = adapter->tx_work_limit;
+		if (q_vector->rx.ring)
+			q_vector->itr_val = adapter->rx_itr_setting;
+		else
+			q_vector->itr_val = adapter->tx_itr_setting;
+		if (q_vector->itr_val && q_vector->itr_val <= 3)
+			q_vector->itr_val = IGB_START_ITR;
+		q_vector->set_itr = 1;
+	}
+
+	return 0;
+}
+
+static int igb_get_coalesce(struct net_device *netdev,
+			    struct ethtool_coalesce *ec,
+			    struct kernel_ethtool_coalesce *kernel_coal,
+			    struct netlink_ext_ack *extack)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->rx_itr_setting <= 3)
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+	else
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) {
+		if (adapter->tx_itr_setting <= 3)
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+		else
+			ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+	}
+
+	return 0;
+}
+
+static int igb_nway_reset(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	if (netif_running(netdev))
+		igb_reinit_locked(adapter);
+	return 0;
+}
+
+static int igb_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return IGB_STATS_LEN;
+	case ETH_SS_TEST:
+		return IGB_TEST_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IGB_PRIV_FLAGS_STR_LEN;
+	default:
+		return -ENOTSUPP;
+	}
+}
+
+static void igb_get_ethtool_stats(struct net_device *netdev,
+				  struct ethtool_stats *stats, u64 *data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
+	unsigned int start;
+	struct igb_ring *ring;
+	int i, j;
+	char *p;
+
+	spin_lock(&adapter->stats64_lock);
+	igb_update_stats(adapter);
+
+	for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) {
+		p = (char *)adapter + igb_gstrings_stats[i].stat_offset;
+		data[i] = (igb_gstrings_stats[i].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < IGB_NETDEV_STATS_LEN; j++, i++) {
+		p = (char *)net_stats + igb_gstrings_net_stats[j].stat_offset;
+		data[i] = (igb_gstrings_net_stats[j].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		u64	restart2;
+
+		ring = adapter->tx_ring[j];
+		do {
+			start = u64_stats_fetch_begin(&ring->tx_syncp);
+			data[i]   = ring->tx_stats.packets;
+			data[i+1] = ring->tx_stats.bytes;
+			data[i+2] = ring->tx_stats.restart_queue;
+		} while (u64_stats_fetch_retry(&ring->tx_syncp, start));
+		do {
+			start = u64_stats_fetch_begin(&ring->tx_syncp2);
+			restart2  = ring->tx_stats.restart_queue2;
+		} while (u64_stats_fetch_retry(&ring->tx_syncp2, start));
+		data[i+2] += restart2;
+
+		i += IGB_TX_QUEUE_STATS_LEN;
+	}
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		ring = adapter->rx_ring[j];
+		do {
+			start = u64_stats_fetch_begin(&ring->rx_syncp);
+			data[i]   = ring->rx_stats.packets;
+			data[i+1] = ring->rx_stats.bytes;
+			data[i+2] = ring->rx_stats.drops;
+			data[i+3] = ring->rx_stats.csum_err;
+			data[i+4] = ring->rx_stats.alloc_failed;
+		} while (u64_stats_fetch_retry(&ring->rx_syncp, start));
+		i += IGB_RX_QUEUE_STATS_LEN;
+	}
+	spin_unlock(&adapter->stats64_lock);
+}
+
+static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, igb_gstrings_test, sizeof(igb_gstrings_test));
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igb_gstrings_stats[i].stat_string);
+		for (i = 0; i < IGB_NETDEV_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igb_gstrings_net_stats[i].stat_string);
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "tx_queue_%u_restart", i);
+		}
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_drops", i);
+			ethtool_sprintf(&p, "rx_queue_%u_csum_err", i);
+			ethtool_sprintf(&p, "rx_queue_%u_alloc_failed", i);
+		}
+		/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, igb_priv_flags_strings,
+		       IGB_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static int igb_get_ts_info(struct net_device *dev,
+			   struct ethtool_ts_info *info)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+
+	if (adapter->ptp_clock)
+		info->phc_index = ptp_clock_index(adapter->ptp_clock);
+	else
+		info->phc_index = -1;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82575:
+		info->so_timestamping =
+			SOF_TIMESTAMPING_TX_SOFTWARE |
+			SOF_TIMESTAMPING_RX_SOFTWARE |
+			SOF_TIMESTAMPING_SOFTWARE;
+		return 0;
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		info->so_timestamping =
+			SOF_TIMESTAMPING_TX_SOFTWARE |
+			SOF_TIMESTAMPING_RX_SOFTWARE |
+			SOF_TIMESTAMPING_SOFTWARE |
+			SOF_TIMESTAMPING_TX_HARDWARE |
+			SOF_TIMESTAMPING_RX_HARDWARE |
+			SOF_TIMESTAMPING_RAW_HARDWARE;
+
+		info->tx_types =
+			BIT(HWTSTAMP_TX_OFF) |
+			BIT(HWTSTAMP_TX_ON);
+
+		info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+
+		/* 82576 does not support timestamping all packets. */
+		if (adapter->hw.mac.type >= e1000_82580)
+			info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
+		else
+			info->rx_filters |=
+				BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+				BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+				BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct igb_nfc_filter *rule = NULL;
+
+	/* report total rule count */
+	cmd->data = IGB_MAX_RXNFC_FILTERS;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (fsp->location <= rule->sw_idx)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->sw_idx)
+		return -EINVAL;
+
+	if (rule->filter.match_flags) {
+		fsp->flow_type = ETHER_FLOW;
+		fsp->ring_cookie = rule->action;
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
+			fsp->h_u.ether_spec.h_proto = rule->filter.etype;
+			fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
+		}
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) {
+			fsp->flow_type |= FLOW_EXT;
+			fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
+			fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
+		}
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) {
+			ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+					rule->filter.dst_addr);
+			/* As we only support matching by the full
+			 * mask, return the mask to userspace
+			 */
+			eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+		}
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) {
+			ether_addr_copy(fsp->h_u.ether_spec.h_source,
+					rule->filter.src_addr);
+			/* As we only support matching by the full
+			 * mask, return the mask to userspace
+			 */
+			eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
+		}
+
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int igb_get_ethtool_nfc_all(struct igb_adapter *adapter,
+				   struct ethtool_rxnfc *cmd,
+				   u32 *rule_locs)
+{
+	struct igb_nfc_filter *rule;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = IGB_MAX_RXNFC_FILTERS;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[cnt] = rule->sw_idx;
+		cnt++;
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+static int igb_get_rss_hash_opts(struct igb_adapter *adapter,
+				 struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* Report default options for RSS on igb */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case UDP_V4_FLOW:
+		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case UDP_V6_FLOW:
+		if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			 u32 *rule_locs)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_rx_queues;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->nfc_filter_count;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = igb_get_ethtool_nfc_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = igb_get_ethtool_nfc_all(adapter, cmd, rule_locs);
+		break;
+	case ETHTOOL_GRXFH:
+		ret = igb_get_rss_hash_opts(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+#define UDP_RSS_FLAGS (IGB_FLAG_RSS_FIELD_IPV4_UDP | \
+		       IGB_FLAG_RSS_FIELD_IPV6_UDP)
+static int igb_set_rss_hash_opt(struct igb_adapter *adapter,
+				struct ethtool_rxnfc *nfc)
+{
+	u32 flags = adapter->flags;
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGB_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGB_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGB_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGB_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (flags != adapter->flags) {
+		struct e1000_hw *hw = &adapter->hw;
+		u32 mrqc = rd32(E1000_MRQC);
+
+		if ((flags & UDP_RSS_FLAGS) &&
+		    !(adapter->flags & UDP_RSS_FLAGS))
+			dev_err(&adapter->pdev->dev,
+				"enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+
+		adapter->flags = flags;
+
+		/* Perform hash on these packet types */
+		mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
+			E1000_MRQC_RSS_FIELD_IPV4_TCP |
+			E1000_MRQC_RSS_FIELD_IPV6 |
+			E1000_MRQC_RSS_FIELD_IPV6_TCP;
+
+		mrqc &= ~(E1000_MRQC_RSS_FIELD_IPV4_UDP |
+			  E1000_MRQC_RSS_FIELD_IPV6_UDP);
+
+		if (flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+			mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
+
+		if (flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+			mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
+
+		wr32(E1000_MRQC, mrqc);
+	}
+
+	return 0;
+}
+
+static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter,
+					struct igb_nfc_filter *input)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 i;
+	u32 etqf;
+	u16 etype;
+
+	/* find an empty etype filter register */
+	for (i = 0; i < MAX_ETYPE_FILTER; ++i) {
+		if (!adapter->etype_bitmap[i])
+			break;
+	}
+	if (i == MAX_ETYPE_FILTER) {
+		dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n");
+		return -EINVAL;
+	}
+
+	adapter->etype_bitmap[i] = true;
+
+	etqf = rd32(E1000_ETQF(i));
+	etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK);
+
+	etqf |= E1000_ETQF_FILTER_ENABLE;
+	etqf &= ~E1000_ETQF_ETYPE_MASK;
+	etqf |= (etype & E1000_ETQF_ETYPE_MASK);
+
+	etqf &= ~E1000_ETQF_QUEUE_MASK;
+	etqf |= ((input->action << E1000_ETQF_QUEUE_SHIFT)
+		& E1000_ETQF_QUEUE_MASK);
+	etqf |= E1000_ETQF_QUEUE_ENABLE;
+
+	wr32(E1000_ETQF(i), etqf);
+
+	input->etype_reg_index = i;
+
+	return 0;
+}
+
+static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
+					    struct igb_nfc_filter *input)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 vlan_priority;
+	u16 queue_index;
+	u32 vlapqf;
+
+	vlapqf = rd32(E1000_VLAPQF);
+	vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
+				>> VLAN_PRIO_SHIFT;
+	queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK;
+
+	/* check whether this vlan prio is already set */
+	if ((vlapqf & E1000_VLAPQF_P_VALID(vlan_priority)) &&
+	    (queue_index != input->action)) {
+		dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n");
+		return -EEXIST;
+	}
+
+	vlapqf |= E1000_VLAPQF_P_VALID(vlan_priority);
+	vlapqf |= E1000_VLAPQF_QUEUE_SEL(vlan_priority, input->action);
+
+	wr32(E1000_VLAPQF, vlapqf);
+
+	return 0;
+}
+
+int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int err = -EINVAL;
+
+	if (hw->mac.type == e1000_i210 &&
+	    !(input->filter.match_flags & ~IGB_FILTER_FLAG_SRC_MAC_ADDR)) {
+		dev_err(&adapter->pdev->dev,
+			"i210 doesn't support flow classification rules specifying only source addresses.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
+		err = igb_rxnfc_write_etype_filter(adapter, input);
+		if (err)
+			return err;
+	}
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR) {
+		err = igb_add_mac_steering_filter(adapter,
+						  input->filter.dst_addr,
+						  input->action, 0);
+		err = min_t(int, err, 0);
+		if (err)
+			return err;
+	}
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR) {
+		err = igb_add_mac_steering_filter(adapter,
+						  input->filter.src_addr,
+						  input->action,
+						  IGB_MAC_STATE_SRC_ADDR);
+		err = min_t(int, err, 0);
+		if (err)
+			return err;
+	}
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
+		err = igb_rxnfc_write_vlan_prio_filter(adapter, input);
+
+	return err;
+}
+
+static void igb_clear_etype_filter_regs(struct igb_adapter *adapter,
+					u16 reg_index)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 etqf = rd32(E1000_ETQF(reg_index));
+
+	etqf &= ~E1000_ETQF_QUEUE_ENABLE;
+	etqf &= ~E1000_ETQF_QUEUE_MASK;
+	etqf &= ~E1000_ETQF_FILTER_ENABLE;
+
+	wr32(E1000_ETQF(reg_index), etqf);
+
+	adapter->etype_bitmap[reg_index] = false;
+}
+
+static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter,
+				       u16 vlan_tci)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 vlan_priority;
+	u32 vlapqf;
+
+	vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+
+	vlapqf = rd32(E1000_VLAPQF);
+	vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority);
+	vlapqf &= ~E1000_VLAPQF_QUEUE_SEL(vlan_priority,
+						E1000_VLAPQF_QUEUE_MASK);
+
+	wr32(E1000_VLAPQF, vlapqf);
+}
+
+int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
+{
+	if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE)
+		igb_clear_etype_filter_regs(adapter,
+					    input->etype_reg_index);
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
+		igb_clear_vlan_prio_filter(adapter,
+					   ntohs(input->filter.vlan_tci));
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_SRC_MAC_ADDR)
+		igb_del_mac_steering_filter(adapter, input->filter.src_addr,
+					    input->action,
+					    IGB_MAC_STATE_SRC_ADDR);
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_DST_MAC_ADDR)
+		igb_del_mac_steering_filter(adapter, input->filter.dst_addr,
+					    input->action, 0);
+
+	return 0;
+}
+
+static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter,
+					struct igb_nfc_filter *input,
+					u16 sw_idx)
+{
+	struct igb_nfc_filter *rule, *parent;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		/* hash found, or no matching entry */
+		if (rule->sw_idx >= sw_idx)
+			break;
+		parent = rule;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->sw_idx == sw_idx)) {
+		if (!input)
+			err = igb_erase_filter(adapter, rule);
+
+		hlist_del(&rule->nfc_node);
+		kfree(rule);
+		adapter->nfc_filter_count--;
+	}
+
+	/* If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
+
+	/* initialize node */
+	INIT_HLIST_NODE(&input->nfc_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_behind(&input->nfc_node, &parent->nfc_node);
+	else
+		hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list);
+
+	/* update counts */
+	adapter->nfc_filter_count++;
+
+	return 0;
+}
+
+static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct igb_nfc_filter *input, *rule;
+	int err = 0;
+
+	if (!(netdev->hw_features & NETIF_F_NTUPLE))
+		return -EOPNOTSUPP;
+
+	/* Don't allow programming if the action is a queue greater than
+	 * the number of online Rx queues.
+	 */
+	if ((fsp->ring_cookie == RX_CLS_FLOW_DISC) ||
+	    (fsp->ring_cookie >= adapter->num_rx_queues)) {
+		dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n");
+		return -EINVAL;
+	}
+
+	/* Don't allow indexes to exist outside of available space */
+	if (fsp->location >= IGB_MAX_RXNFC_FILTERS) {
+		dev_err(&adapter->pdev->dev, "Location out of range\n");
+		return -EINVAL;
+	}
+
+	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
+		return -EINVAL;
+
+	input = kzalloc(sizeof(*input), GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
+		input->filter.etype = fsp->h_u.ether_spec.h_proto;
+		input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE;
+	}
+
+	/* Only support matching addresses by the full mask */
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
+		input->filter.match_flags |= IGB_FILTER_FLAG_SRC_MAC_ADDR;
+		ether_addr_copy(input->filter.src_addr,
+				fsp->h_u.ether_spec.h_source);
+	}
+
+	/* Only support matching addresses by the full mask */
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
+		input->filter.match_flags |= IGB_FILTER_FLAG_DST_MAC_ADDR;
+		ether_addr_copy(input->filter.dst_addr,
+				fsp->h_u.ether_spec.h_dest);
+	}
+
+	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
+		if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
+			err = -EINVAL;
+			goto err_out;
+		}
+		input->filter.vlan_tci = fsp->h_ext.vlan_tci;
+		input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
+	}
+
+	input->action = fsp->ring_cookie;
+	input->sw_idx = fsp->location;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (!memcmp(&input->filter, &rule->filter,
+			    sizeof(input->filter))) {
+			err = -EEXIST;
+			dev_err(&adapter->pdev->dev,
+				"ethtool: this filter is already set\n");
+			goto err_out_w_lock;
+		}
+	}
+
+	err = igb_add_filter(adapter, input);
+	if (err)
+		goto err_out_w_lock;
+
+	err = igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx);
+	if (err)
+		goto err_out_input_filter;
+
+	spin_unlock(&adapter->nfc_lock);
+	return 0;
+
+err_out_input_filter:
+	igb_erase_filter(adapter, input);
+err_out_w_lock:
+	spin_unlock(&adapter->nfc_lock);
+err_out:
+	kfree(input);
+	return err;
+}
+
+static int igb_del_ethtool_nfc_entry(struct igb_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int err;
+
+	spin_lock(&adapter->nfc_lock);
+	err = igb_update_ethtool_nfc_entry(adapter, NULL, fsp->location);
+	spin_unlock(&adapter->nfc_lock);
+
+	return err;
+}
+
+static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		ret = igb_set_rss_hash_opt(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLINS:
+		ret = igb_add_ethtool_nfc_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = igb_del_ethtool_nfc_entry(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ret_val;
+	u16 phy_data;
+
+	if ((hw->mac.type < e1000_i350) ||
+	    (hw->phy.media_type != e1000_media_type_copper))
+		return -EOPNOTSUPP;
+
+	edata->supported = (SUPPORTED_1000baseT_Full |
+			    SUPPORTED_100baseT_Full);
+	if (!hw->dev_spec._82575.eee_disable)
+		edata->advertised =
+			mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+
+	/* The IPCNFG and EEER registers are not supported on I354. */
+	if (hw->mac.type == e1000_i354) {
+		igb_get_eee_status_i354(hw, (bool *)&edata->eee_active);
+	} else {
+		u32 eeer;
+
+		eeer = rd32(E1000_EEER);
+
+		/* EEE status on negotiated link */
+		if (eeer & E1000_EEER_EEE_NEG)
+			edata->eee_active = true;
+
+		if (eeer & E1000_EEER_TX_LPI_EN)
+			edata->tx_lpi_enabled = true;
+	}
+
+	/* EEE Link Partner Advertised */
+	switch (hw->mac.type) {
+	case e1000_i350:
+		ret_val = igb_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350,
+					   &phy_data);
+		if (ret_val)
+			return -ENODATA;
+
+		edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+		break;
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		ret_val = igb_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210,
+					     E1000_EEE_LP_ADV_DEV_I210,
+					     &phy_data);
+		if (ret_val)
+			return -ENODATA;
+
+		edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data);
+
+		break;
+	default:
+		break;
+	}
+
+	edata->eee_enabled = !hw->dev_spec._82575.eee_disable;
+
+	if ((hw->mac.type == e1000_i354) &&
+	    (edata->eee_enabled))
+		edata->tx_lpi_enabled = true;
+
+	/* Report correct negotiated EEE status for devices that
+	 * wrongly report EEE at half-duplex
+	 */
+	if (adapter->link_duplex == HALF_DUPLEX) {
+		edata->eee_enabled = false;
+		edata->eee_active = false;
+		edata->tx_lpi_enabled = false;
+		edata->advertised &= ~edata->advertised;
+	}
+
+	return 0;
+}
+
+static int igb_set_eee(struct net_device *netdev,
+		       struct ethtool_eee *edata)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct ethtool_eee eee_curr;
+	bool adv1g_eee = true, adv100m_eee = true;
+	s32 ret_val;
+
+	if ((hw->mac.type < e1000_i350) ||
+	    (hw->phy.media_type != e1000_media_type_copper))
+		return -EOPNOTSUPP;
+
+	memset(&eee_curr, 0, sizeof(struct ethtool_eee));
+
+	ret_val = igb_get_eee(netdev, &eee_curr);
+	if (ret_val)
+		return ret_val;
+
+	if (eee_curr.eee_enabled) {
+		if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) {
+			dev_err(&adapter->pdev->dev,
+				"Setting EEE tx-lpi is not supported\n");
+			return -EINVAL;
+		}
+
+		/* Tx LPI timer is not implemented currently */
+		if (edata->tx_lpi_timer) {
+			dev_err(&adapter->pdev->dev,
+				"Setting EEE Tx LPI timer is not supported\n");
+			return -EINVAL;
+		}
+
+		if (!edata->advertised || (edata->advertised &
+		    ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL))) {
+			dev_err(&adapter->pdev->dev,
+				"EEE Advertisement supports only 100Tx and/or 100T full duplex\n");
+			return -EINVAL;
+		}
+		adv100m_eee = !!(edata->advertised & ADVERTISE_100_FULL);
+		adv1g_eee = !!(edata->advertised & ADVERTISE_1000_FULL);
+
+	} else if (!edata->eee_enabled) {
+		dev_err(&adapter->pdev->dev,
+			"Setting EEE options are not supported with EEE disabled\n");
+		return -EINVAL;
+	}
+
+	adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+	if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) {
+		hw->dev_spec._82575.eee_disable = !edata->eee_enabled;
+		adapter->flags |= IGB_FLAG_EEE;
+
+		/* reset link */
+		if (netif_running(netdev))
+			igb_reinit_locked(adapter);
+		else
+			igb_reset(adapter);
+	}
+
+	if (hw->mac.type == e1000_i354)
+		ret_val = igb_set_eee_i354(hw, adv1g_eee, adv100m_eee);
+	else
+		ret_val = igb_set_eee_i350(hw, adv1g_eee, adv100m_eee);
+
+	if (ret_val) {
+		dev_err(&adapter->pdev->dev,
+			"Problem setting EEE advertisement options\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igb_get_module_info(struct net_device *netdev,
+			       struct ethtool_modinfo *modinfo)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 status = 0;
+	u16 sff8472_rev, addr_mode;
+	bool page_swap = false;
+
+	if ((hw->phy.media_type == e1000_media_type_copper) ||
+	    (hw->phy.media_type == e1000_media_type_unknown))
+		return -EOPNOTSUPP;
+
+	/* Check whether we support SFF-8472 or not */
+	status = igb_read_phy_reg_i2c(hw, IGB_SFF_8472_COMP, &sff8472_rev);
+	if (status)
+		return -EIO;
+
+	/* addressing mode is not supported */
+	status = igb_read_phy_reg_i2c(hw, IGB_SFF_8472_SWAP, &addr_mode);
+	if (status)
+		return -EIO;
+
+	/* addressing mode is not supported */
+	if ((addr_mode & 0xFF) & IGB_SFF_ADDRESSING_MODE) {
+		hw_dbg("Address change required to access page 0xA2, but not supported. Please report the module type to the driver maintainers.\n");
+		page_swap = true;
+	}
+
+	if ((sff8472_rev & 0xFF) == IGB_SFF_8472_UNSUP || page_swap) {
+		/* We have an SFP, but it does not support SFF-8472 */
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+	} else {
+		/* We have an SFP which supports a revision of SFF-8472 */
+		modinfo->type = ETH_MODULE_SFF_8472;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+	}
+
+	return 0;
+}
+
+static int igb_get_module_eeprom(struct net_device *netdev,
+				 struct ethtool_eeprom *ee, u8 *data)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 status = 0;
+	u16 *dataword;
+	u16 first_word, last_word;
+	int i = 0;
+
+	if (ee->len == 0)
+		return -EINVAL;
+
+	first_word = ee->offset >> 1;
+	last_word = (ee->offset + ee->len - 1) >> 1;
+
+	dataword = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				 GFP_KERNEL);
+	if (!dataword)
+		return -ENOMEM;
+
+	/* Read EEPROM block, SFF-8079/SFF-8472, word at a time */
+	for (i = 0; i < last_word - first_word + 1; i++) {
+		status = igb_read_phy_reg_i2c(hw, (first_word + i) * 2,
+					      &dataword[i]);
+		if (status) {
+			/* Error occurred while reading module */
+			kfree(dataword);
+			return -EIO;
+		}
+
+		be16_to_cpus(&dataword[i]);
+	}
+
+	memcpy(data, (u8 *)dataword + (ee->offset & 1), ee->len);
+	kfree(dataword);
+
+	return 0;
+}
+
+static int igb_ethtool_begin(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	pm_runtime_get_sync(&adapter->pdev->dev);
+	return 0;
+}
+
+static void igb_ethtool_complete(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	pm_runtime_put(&adapter->pdev->dev);
+}
+
+static u32 igb_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return IGB_RETA_SIZE;
+}
+
+static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+	for (i = 0; i < IGB_RETA_SIZE; i++)
+		indir[i] = adapter->rss_indir_tbl[i];
+
+	return 0;
+}
+
+void igb_write_rss_indir_tbl(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg = E1000_RETA(0);
+	u32 shift = 0;
+	int i = 0;
+
+	switch (hw->mac.type) {
+	case e1000_82575:
+		shift = 6;
+		break;
+	case e1000_82576:
+		/* 82576 supports 2 RSS queues for SR-IOV */
+		if (adapter->vfs_allocated_count)
+			shift = 3;
+		break;
+	default:
+		break;
+	}
+
+	while (i < IGB_RETA_SIZE) {
+		u32 val = 0;
+		int j;
+
+		for (j = 3; j >= 0; j--) {
+			val <<= 8;
+			val |= adapter->rss_indir_tbl[i + j];
+		}
+
+		wr32(reg, val << shift);
+		reg += 4;
+		i += 4;
+	}
+}
+
+static int igb_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+	u32 num_queues;
+
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
+	num_queues = adapter->rss_queues;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		/* 82576 supports 2 RSS queues for SR-IOV */
+		if (adapter->vfs_allocated_count)
+			num_queues = 2;
+		break;
+	default:
+		break;
+	}
+
+	/* Verify user input. */
+	for (i = 0; i < IGB_RETA_SIZE; i++)
+		if (indir[i] >= num_queues)
+			return -EINVAL;
+
+
+	for (i = 0; i < IGB_RETA_SIZE; i++)
+		adapter->rss_indir_tbl[i] = indir[i];
+
+	igb_write_rss_indir_tbl(adapter);
+
+	return 0;
+}
+
+static unsigned int igb_max_channels(struct igb_adapter *adapter)
+{
+	return igb_get_max_rss_queues(adapter);
+}
+
+static void igb_get_channels(struct net_device *netdev,
+			     struct ethtool_channels *ch)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	/* Report maximum channels */
+	ch->max_combined = igb_max_channels(adapter);
+
+	/* Report info for other vector */
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		ch->max_other = NON_Q_VECTORS;
+		ch->other_count = NON_Q_VECTORS;
+	}
+
+	ch->combined_count = adapter->rss_queues;
+}
+
+static int igb_set_channels(struct net_device *netdev,
+			    struct ethtool_channels *ch)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	unsigned int count = ch->combined_count;
+	unsigned int max_combined = 0;
+
+	/* Verify they are not requesting separate vectors */
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	/* Verify other_count is valid and has not been changed */
+	if (ch->other_count != NON_Q_VECTORS)
+		return -EINVAL;
+
+	/* Verify the number of channels doesn't exceed hw limits */
+	max_combined = igb_max_channels(adapter);
+	if (count > max_combined)
+		return -EINVAL;
+
+	if (count != adapter->rss_queues) {
+		adapter->rss_queues = count;
+		igb_set_flag_queue_pairs(adapter, max_combined);
+
+		/* Hardware has to reinitialize queues and interrupts to
+		 * match the new configuration.
+		 */
+		return igb_reinit_queues(adapter);
+	}
+
+	return 0;
+}
+
+static u32 igb_get_priv_flags(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IGB_FLAG_RX_LEGACY)
+		priv_flags |= IGB_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int igb_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IGB_FLAG_RX_LEGACY;
+	if (priv_flags & IGB_PRIV_FLAGS_LEGACY_RX)
+		flags |= IGB_FLAG_RX_LEGACY;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			igb_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
+static const struct ethtool_ops igb_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+	.get_drvinfo		= igb_get_drvinfo,
+	.get_regs_len		= igb_get_regs_len,
+	.get_regs		= igb_get_regs,
+	.get_wol		= igb_get_wol,
+	.set_wol		= igb_set_wol,
+	.get_msglevel		= igb_get_msglevel,
+	.set_msglevel		= igb_set_msglevel,
+	.nway_reset		= igb_nway_reset,
+	.get_link		= igb_get_link,
+	.get_eeprom_len		= igb_get_eeprom_len,
+	.get_eeprom		= igb_get_eeprom,
+	.set_eeprom		= igb_set_eeprom,
+	.get_ringparam		= igb_get_ringparam,
+	.set_ringparam		= igb_set_ringparam,
+	.get_pauseparam		= igb_get_pauseparam,
+	.set_pauseparam		= igb_set_pauseparam,
+	.self_test		= igb_diag_test,
+	.get_strings		= igb_get_strings,
+	.set_phys_id		= igb_set_phys_id,
+	.get_sset_count		= igb_get_sset_count,
+	.get_ethtool_stats	= igb_get_ethtool_stats,
+	.get_coalesce		= igb_get_coalesce,
+	.set_coalesce		= igb_set_coalesce,
+	.get_ts_info		= igb_get_ts_info,
+	.get_rxnfc		= igb_get_rxnfc,
+	.set_rxnfc		= igb_set_rxnfc,
+	.get_eee		= igb_get_eee,
+	.set_eee		= igb_set_eee,
+	.get_module_info	= igb_get_module_info,
+	.get_module_eeprom	= igb_get_module_eeprom,
+	.get_rxfh_indir_size	= igb_get_rxfh_indir_size,
+	.get_rxfh		= igb_get_rxfh,
+	.set_rxfh		= igb_set_rxfh,
+	.get_channels		= igb_get_channels,
+	.set_channels		= igb_set_channels,
+	.get_priv_flags		= igb_get_priv_flags,
+	.set_priv_flags		= igb_set_priv_flags,
+	.begin			= igb_ethtool_begin,
+	.complete		= igb_ethtool_complete,
+	.get_link_ksettings	= igb_get_link_ksettings,
+	.set_link_ksettings	= igb_set_link_ksettings,
+};
+
+void igb_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &igb_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/igb/igb_hwmon.c b/drivers/net/ethernet/intel/igb/igb_hwmon.c
new file mode 100644
index 0000000000..21a29a0ca7
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_hwmon.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#include "igb.h"
+#include "e1000_82575.h"
+#include "e1000_hw.h"
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/hwmon.h>
+#include <linux/pci.h>
+
+#ifdef CONFIG_IGB_HWMON
+static struct i2c_board_info i350_sensor_info = {
+	I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)),
+};
+
+/* hwmon callback functions */
+static ssize_t igb_hwmon_show_location(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						   dev_attr);
+	return sprintf(buf, "loc%u\n",
+		       igb_attr->sensor->location);
+}
+
+static ssize_t igb_hwmon_show_temp(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						   dev_attr);
+	unsigned int value;
+
+	/* reset the temp field */
+	igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw);
+
+	value = igb_attr->sensor->temp;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t igb_hwmon_show_cautionthresh(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						   dev_attr);
+	unsigned int value = igb_attr->sensor->caution_thresh;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t igb_hwmon_show_maxopthresh(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr,
+						   dev_attr);
+	unsigned int value = igb_attr->sensor->max_op_thresh;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
+ * @ adapter: pointer to the adapter structure
+ * @ offset: offset in the eeprom sensor data table
+ * @ type: type of sensor data to display
+ *
+ * For each file we want in hwmon's sysfs interface we need a device_attribute
+ * This is included in our hwmon_attr struct that contains the references to
+ * the data structures we need to get the data to display.
+ */
+static int igb_add_hwmon_attr(struct igb_adapter *adapter,
+			      unsigned int offset, int type)
+{
+	int rc;
+	unsigned int n_attr;
+	struct hwmon_attr *igb_attr;
+
+	n_attr = adapter->igb_hwmon_buff->n_hwmon;
+	igb_attr = &adapter->igb_hwmon_buff->hwmon_list[n_attr];
+
+	switch (type) {
+	case IGB_HWMON_TYPE_LOC:
+		igb_attr->dev_attr.show = igb_hwmon_show_location;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_label", offset + 1);
+		break;
+	case IGB_HWMON_TYPE_TEMP:
+		igb_attr->dev_attr.show = igb_hwmon_show_temp;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_input", offset + 1);
+		break;
+	case IGB_HWMON_TYPE_CAUTION:
+		igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_max", offset + 1);
+		break;
+	case IGB_HWMON_TYPE_MAX:
+		igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh;
+		snprintf(igb_attr->name, sizeof(igb_attr->name),
+			 "temp%u_crit", offset + 1);
+		break;
+	default:
+		rc = -EPERM;
+		return rc;
+	}
+
+	/* These always the same regardless of type */
+	igb_attr->sensor =
+		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
+	igb_attr->hw = &adapter->hw;
+	igb_attr->dev_attr.store = NULL;
+	igb_attr->dev_attr.attr.mode = 0444;
+	igb_attr->dev_attr.attr.name = igb_attr->name;
+	sysfs_attr_init(&igb_attr->dev_attr.attr);
+
+	adapter->igb_hwmon_buff->attrs[n_attr] = &igb_attr->dev_attr.attr;
+
+	++adapter->igb_hwmon_buff->n_hwmon;
+
+	return 0;
+}
+
+static void igb_sysfs_del_adapter(struct igb_adapter *adapter)
+{
+}
+
+/* called from igb_main.c */
+void igb_sysfs_exit(struct igb_adapter *adapter)
+{
+	igb_sysfs_del_adapter(adapter);
+}
+
+/* called from igb_main.c */
+int igb_sysfs_init(struct igb_adapter *adapter)
+{
+	struct hwmon_buff *igb_hwmon;
+	struct i2c_client *client;
+	struct device *hwmon_dev;
+	unsigned int i;
+	int rc = 0;
+
+	/* If this method isn't defined we don't support thermals */
+	if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL)
+		goto exit;
+
+	/* Don't create thermal hwmon interface if no sensors present */
+	rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw));
+	if (rc)
+		goto exit;
+
+	igb_hwmon = devm_kzalloc(&adapter->pdev->dev, sizeof(*igb_hwmon),
+				 GFP_KERNEL);
+	if (!igb_hwmon) {
+		rc = -ENOMEM;
+		goto exit;
+	}
+	adapter->igb_hwmon_buff = igb_hwmon;
+
+	for (i = 0; i < E1000_MAX_SENSORS; i++) {
+
+		/* Only create hwmon sysfs entries for sensors that have
+		 * meaningful data.
+		 */
+		if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
+			continue;
+
+		/* Bail if any hwmon attr struct fails to initialize */
+		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION);
+		if (rc)
+			goto exit;
+		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC);
+		if (rc)
+			goto exit;
+		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP);
+		if (rc)
+			goto exit;
+		rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX);
+		if (rc)
+			goto exit;
+	}
+
+	/* init i2c_client */
+	client = i2c_new_client_device(&adapter->i2c_adap, &i350_sensor_info);
+	if (IS_ERR(client)) {
+		dev_info(&adapter->pdev->dev,
+			 "Failed to create new i2c device.\n");
+		rc = PTR_ERR(client);
+		goto exit;
+	}
+	adapter->i2c_client = client;
+
+	igb_hwmon->groups[0] = &igb_hwmon->group;
+	igb_hwmon->group.attrs = igb_hwmon->attrs;
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(&adapter->pdev->dev,
+							   client->name,
+							   igb_hwmon,
+							   igb_hwmon->groups);
+	if (IS_ERR(hwmon_dev)) {
+		rc = PTR_ERR(hwmon_dev);
+		goto err;
+	}
+
+	goto exit;
+
+err:
+	igb_sysfs_del_adapter(adapter);
+exit:
+	return rc;
+}
+#endif
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
new file mode 100644
index 0000000000..76b34cee1d
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -0,0 +1,10176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2007 - 2018 Intel Corporation. */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/netdevice.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+#include <linux/net_tstamp.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/if_ether.h>
+#include <linux/prefetch.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/pm_runtime.h>
+#include <linux/etherdevice.h>
+#ifdef CONFIG_IGB_DCA
+#include <linux/dca.h>
+#endif
+#include <linux/i2c.h>
+#include "igb.h"
+
+enum queue_mode {
+	QUEUE_MODE_STRICT_PRIORITY,
+	QUEUE_MODE_STREAM_RESERVATION,
+};
+
+enum tx_queue_prio {
+	TX_QUEUE_PRIO_HIGH,
+	TX_QUEUE_PRIO_LOW,
+};
+
+char igb_driver_name[] = "igb";
+static const char igb_driver_string[] =
+				"Intel(R) Gigabit Ethernet Network Driver";
+static const char igb_copyright[] =
+				"Copyright (c) 2007-2014 Intel Corporation.";
+
+static const struct e1000_info *igb_info_tbl[] = {
+	[board_82575] = &e1000_82575_info,
+};
+
+static const struct pci_device_id igb_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
+
+static int igb_setup_all_tx_resources(struct igb_adapter *);
+static int igb_setup_all_rx_resources(struct igb_adapter *);
+static void igb_free_all_tx_resources(struct igb_adapter *);
+static void igb_free_all_rx_resources(struct igb_adapter *);
+static void igb_setup_mrqc(struct igb_adapter *);
+static int igb_probe(struct pci_dev *, const struct pci_device_id *);
+static void igb_remove(struct pci_dev *pdev);
+static void igb_init_queue_configuration(struct igb_adapter *adapter);
+static int igb_sw_init(struct igb_adapter *);
+int igb_open(struct net_device *);
+int igb_close(struct net_device *);
+static void igb_configure(struct igb_adapter *);
+static void igb_configure_tx(struct igb_adapter *);
+static void igb_configure_rx(struct igb_adapter *);
+static void igb_clean_all_tx_rings(struct igb_adapter *);
+static void igb_clean_all_rx_rings(struct igb_adapter *);
+static void igb_clean_tx_ring(struct igb_ring *);
+static void igb_clean_rx_ring(struct igb_ring *);
+static void igb_set_rx_mode(struct net_device *);
+static void igb_update_phy_info(struct timer_list *);
+static void igb_watchdog(struct timer_list *);
+static void igb_watchdog_task(struct work_struct *);
+static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
+static void igb_get_stats64(struct net_device *dev,
+			    struct rtnl_link_stats64 *stats);
+static int igb_change_mtu(struct net_device *, int);
+static int igb_set_mac(struct net_device *, void *);
+static void igb_set_uta(struct igb_adapter *adapter, bool set);
+static irqreturn_t igb_intr(int irq, void *);
+static irqreturn_t igb_intr_msi(int irq, void *);
+static irqreturn_t igb_msix_other(int irq, void *);
+static irqreturn_t igb_msix_ring(int irq, void *);
+#ifdef CONFIG_IGB_DCA
+static void igb_update_dca(struct igb_q_vector *);
+static void igb_setup_dca(struct igb_adapter *);
+#endif /* CONFIG_IGB_DCA */
+static int igb_poll(struct napi_struct *, int);
+static bool igb_clean_tx_irq(struct igb_q_vector *, int);
+static int igb_clean_rx_irq(struct igb_q_vector *, int);
+static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
+static void igb_tx_timeout(struct net_device *, unsigned int txqueue);
+static void igb_reset_task(struct work_struct *);
+static void igb_vlan_mode(struct net_device *netdev,
+			  netdev_features_t features);
+static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16);
+static void igb_restore_vlan(struct igb_adapter *);
+static void igb_rar_set_index(struct igb_adapter *, u32);
+static void igb_ping_all_vfs(struct igb_adapter *);
+static void igb_msg_task(struct igb_adapter *);
+static void igb_vmm_control(struct igb_adapter *);
+static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
+static void igb_flush_mac_table(struct igb_adapter *);
+static int igb_available_rars(struct igb_adapter *, u8);
+static void igb_set_default_mac_filter(struct igb_adapter *);
+static int igb_uc_sync(struct net_device *, const unsigned char *);
+static int igb_uc_unsync(struct net_device *, const unsigned char *);
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
+static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
+static int igb_ndo_set_vf_vlan(struct net_device *netdev,
+			       int vf, u16 vlan, u8 qos, __be16 vlan_proto);
+static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
+static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
+				   bool setting);
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf,
+				bool setting);
+static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
+				 struct ifla_vf_info *ivi);
+static void igb_check_vf_rate_limit(struct igb_adapter *);
+static void igb_nfc_filter_exit(struct igb_adapter *adapter);
+static void igb_nfc_filter_restore(struct igb_adapter *adapter);
+
+#ifdef CONFIG_PCI_IOV
+static int igb_vf_configure(struct igb_adapter *adapter, int vf);
+static int igb_disable_sriov(struct pci_dev *dev, bool reinit);
+#endif
+
+static int igb_suspend(struct device *);
+static int igb_resume(struct device *);
+static int igb_runtime_suspend(struct device *dev);
+static int igb_runtime_resume(struct device *dev);
+static int igb_runtime_idle(struct device *dev);
+#ifdef CONFIG_PM
+static const struct dev_pm_ops igb_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
+	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
+			igb_runtime_idle)
+};
+#endif
+static void igb_shutdown(struct pci_dev *);
+static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
+#ifdef CONFIG_IGB_DCA
+static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
+static struct notifier_block dca_notifier = {
+	.notifier_call	= igb_notify_dca,
+	.next		= NULL,
+	.priority	= 0
+};
+#endif
+#ifdef CONFIG_PCI_IOV
+static unsigned int max_vfs;
+module_param(max_vfs, uint, 0);
+MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function");
+#endif /* CONFIG_PCI_IOV */
+
+static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
+		     pci_channel_state_t);
+static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
+static void igb_io_resume(struct pci_dev *);
+
+static const struct pci_error_handlers igb_err_handler = {
+	.error_detected = igb_io_error_detected,
+	.slot_reset = igb_io_slot_reset,
+	.resume = igb_io_resume,
+};
+
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
+
+static struct pci_driver igb_driver = {
+	.name     = igb_driver_name,
+	.id_table = igb_pci_tbl,
+	.probe    = igb_probe,
+	.remove   = igb_remove,
+#ifdef CONFIG_PM
+	.driver.pm = &igb_pm_ops,
+#endif
+	.shutdown = igb_shutdown,
+	.sriov_configure = igb_pci_sriov_configure,
+	.err_handler = &igb_err_handler
+};
+
+MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
+MODULE_LICENSE("GPL v2");
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+struct igb_reg_info {
+	u32 ofs;
+	char *name;
+};
+
+static const struct igb_reg_info igb_reg_info_tbl[] = {
+
+	/* General Registers */
+	{E1000_CTRL, "CTRL"},
+	{E1000_STATUS, "STATUS"},
+	{E1000_CTRL_EXT, "CTRL_EXT"},
+
+	/* Interrupt Registers */
+	{E1000_ICR, "ICR"},
+
+	/* RX Registers */
+	{E1000_RCTL, "RCTL"},
+	{E1000_RDLEN(0), "RDLEN"},
+	{E1000_RDH(0), "RDH"},
+	{E1000_RDT(0), "RDT"},
+	{E1000_RXDCTL(0), "RXDCTL"},
+	{E1000_RDBAL(0), "RDBAL"},
+	{E1000_RDBAH(0), "RDBAH"},
+
+	/* TX Registers */
+	{E1000_TCTL, "TCTL"},
+	{E1000_TDBAL(0), "TDBAL"},
+	{E1000_TDBAH(0), "TDBAH"},
+	{E1000_TDLEN(0), "TDLEN"},
+	{E1000_TDH(0), "TDH"},
+	{E1000_TDT(0), "TDT"},
+	{E1000_TXDCTL(0), "TXDCTL"},
+	{E1000_TDFH, "TDFH"},
+	{E1000_TDFT, "TDFT"},
+	{E1000_TDFHS, "TDFHS"},
+	{E1000_TDFPC, "TDFPC"},
+
+	/* List Terminator */
+	{}
+};
+
+/* igb_regdump - register printout routine */
+static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
+{
+	int n = 0;
+	char rname[16];
+	u32 regs[8];
+
+	switch (reginfo->ofs) {
+	case E1000_RDLEN(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_RDLEN(n));
+		break;
+	case E1000_RDH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_RDH(n));
+		break;
+	case E1000_RDT(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_RDT(n));
+		break;
+	case E1000_RXDCTL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_RXDCTL(n));
+		break;
+	case E1000_RDBAL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_RDBAL(n));
+		break;
+	case E1000_RDBAH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_RDBAH(n));
+		break;
+	case E1000_TDBAL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_TDBAL(n));
+		break;
+	case E1000_TDBAH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_TDBAH(n));
+		break;
+	case E1000_TDLEN(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_TDLEN(n));
+		break;
+	case E1000_TDH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_TDH(n));
+		break;
+	case E1000_TDT(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_TDT(n));
+		break;
+	case E1000_TXDCTL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(E1000_TXDCTL(n));
+		break;
+	default:
+		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
+		return;
+	}
+
+	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
+	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
+		regs[2], regs[3]);
+}
+
+/* igb_dump - Print registers, Tx-rings and Rx-rings */
+static void igb_dump(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct igb_reg_info *reginfo;
+	struct igb_ring *tx_ring;
+	union e1000_adv_tx_desc *tx_desc;
+	struct my_u0 { __le64 a; __le64 b; } *u0;
+	struct igb_ring *rx_ring;
+	union e1000_adv_rx_desc *rx_desc;
+	u32 staterr;
+	u16 i, n;
+
+	if (!netif_msg_hw(adapter))
+		return;
+
+	/* Print netdevice Info */
+	if (netdev) {
+		dev_info(&adapter->pdev->dev, "Net device Info\n");
+		pr_info("Device Name     state            trans_start\n");
+		pr_info("%-15s %016lX %016lX\n", netdev->name,
+			netdev->state, dev_trans_start(netdev));
+	}
+
+	/* Print Registers */
+	dev_info(&adapter->pdev->dev, "Register Dump\n");
+	pr_info(" Register Name   Value\n");
+	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
+	     reginfo->name; reginfo++) {
+		igb_regdump(hw, reginfo);
+	}
+
+	/* Print TX Ring Summary */
+	if (!netdev || !netif_running(netdev))
+		goto exit;
+
+	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
+	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
+	for (n = 0; n < adapter->num_tx_queues; n++) {
+		struct igb_tx_buffer *buffer_info;
+		tx_ring = adapter->tx_ring[n];
+		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
+		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
+			n, tx_ring->next_to_use, tx_ring->next_to_clean,
+			(u64)dma_unmap_addr(buffer_info, dma),
+			dma_unmap_len(buffer_info, len),
+			buffer_info->next_to_watch,
+			(u64)buffer_info->time_stamp);
+	}
+
+	/* Print TX Rings */
+	if (!netif_msg_tx_done(adapter))
+		goto rx_ring_summary;
+
+	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
+
+	/* Transmit Descriptor Formats
+	 *
+	 * Advanced Transmit Descriptor
+	 *   +--------------------------------------------------------------+
+	 * 0 |         Buffer Address [63:0]                                |
+	 *   +--------------------------------------------------------------+
+	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
+	 *   +--------------------------------------------------------------+
+	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
+	 */
+
+	for (n = 0; n < adapter->num_tx_queues; n++) {
+		tx_ring = adapter->tx_ring[n];
+		pr_info("------------------------------------\n");
+		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
+		pr_info("------------------------------------\n");
+		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
+
+		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
+			const char *next_desc;
+			struct igb_tx_buffer *buffer_info;
+			tx_desc = IGB_TX_DESC(tx_ring, i);
+			buffer_info = &tx_ring->tx_buffer_info[i];
+			u0 = (struct my_u0 *)tx_desc;
+			if (i == tx_ring->next_to_use &&
+			    i == tx_ring->next_to_clean)
+				next_desc = " NTC/U";
+			else if (i == tx_ring->next_to_use)
+				next_desc = " NTU";
+			else if (i == tx_ring->next_to_clean)
+				next_desc = " NTC";
+			else
+				next_desc = "";
+
+			pr_info("T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
+				i, le64_to_cpu(u0->a),
+				le64_to_cpu(u0->b),
+				(u64)dma_unmap_addr(buffer_info, dma),
+				dma_unmap_len(buffer_info, len),
+				buffer_info->next_to_watch,
+				(u64)buffer_info->time_stamp,
+				buffer_info->skb, next_desc);
+
+			if (netif_msg_pktdata(adapter) && buffer_info->skb)
+				print_hex_dump(KERN_INFO, "",
+					DUMP_PREFIX_ADDRESS,
+					16, 1, buffer_info->skb->data,
+					dma_unmap_len(buffer_info, len),
+					true);
+		}
+	}
+
+	/* Print RX Rings Summary */
+rx_ring_summary:
+	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
+	pr_info("Queue [NTU] [NTC]\n");
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		pr_info(" %5d %5X %5X\n",
+			n, rx_ring->next_to_use, rx_ring->next_to_clean);
+	}
+
+	/* Print RX Rings */
+	if (!netif_msg_rx_status(adapter))
+		goto exit;
+
+	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
+
+	/* Advanced Receive Descriptor (Read) Format
+	 *    63                                           1        0
+	 *    +-----------------------------------------------------+
+	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
+	 *    +----------------------------------------------+------+
+	 *  8 |       Header Buffer Address [63:1]           |  DD  |
+	 *    +-----------------------------------------------------+
+	 *
+	 *
+	 * Advanced Receive Descriptor (Write-Back) Format
+	 *
+	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
+	 *   +------------------------------------------------------+
+	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
+	 *   | Checksum   Ident  |   |           |    | Type | Type |
+	 *   +------------------------------------------------------+
+	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
+	 *   +------------------------------------------------------+
+	 *   63       48 47    32 31            20 19               0
+	 */
+
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		pr_info("------------------------------------\n");
+		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
+		pr_info("------------------------------------\n");
+		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
+		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
+
+		for (i = 0; i < rx_ring->count; i++) {
+			const char *next_desc;
+			struct igb_rx_buffer *buffer_info;
+			buffer_info = &rx_ring->rx_buffer_info[i];
+			rx_desc = IGB_RX_DESC(rx_ring, i);
+			u0 = (struct my_u0 *)rx_desc;
+			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+
+			if (i == rx_ring->next_to_use)
+				next_desc = " NTU";
+			else if (i == rx_ring->next_to_clean)
+				next_desc = " NTC";
+			else
+				next_desc = "";
+
+			if (staterr & E1000_RXD_STAT_DD) {
+				/* Descriptor Done */
+				pr_info("%s[0x%03X]     %016llX %016llX ---------------- %s\n",
+					"RWB", i,
+					le64_to_cpu(u0->a),
+					le64_to_cpu(u0->b),
+					next_desc);
+			} else {
+				pr_info("%s[0x%03X]     %016llX %016llX %016llX %s\n",
+					"R  ", i,
+					le64_to_cpu(u0->a),
+					le64_to_cpu(u0->b),
+					(u64)buffer_info->dma,
+					next_desc);
+
+				if (netif_msg_pktdata(adapter) &&
+				    buffer_info->dma && buffer_info->page) {
+					print_hex_dump(KERN_INFO, "",
+					  DUMP_PREFIX_ADDRESS,
+					  16, 1,
+					  page_address(buffer_info->page) +
+						      buffer_info->page_offset,
+					  igb_rx_bufsz(rx_ring), true);
+				}
+			}
+		}
+	}
+
+exit:
+	return;
+}
+
+/**
+ *  igb_get_i2c_data - Reads the I2C SDA data bit
+ *  @data: opaque pointer to adapter struct
+ *
+ *  Returns the I2C data bit value
+ **/
+static int igb_get_i2c_data(void *data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = rd32(E1000_I2CPARAMS);
+
+	return !!(i2cctl & E1000_I2C_DATA_IN);
+}
+
+/**
+ *  igb_set_i2c_data - Sets the I2C data bit
+ *  @data: pointer to hardware structure
+ *  @state: I2C data value (0 or 1) to set
+ *
+ *  Sets the I2C data bit
+ **/
+static void igb_set_i2c_data(void *data, int state)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = rd32(E1000_I2CPARAMS);
+
+	if (state) {
+		i2cctl |= E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N;
+	} else {
+		i2cctl &= ~E1000_I2C_DATA_OE_N;
+		i2cctl &= ~E1000_I2C_DATA_OUT;
+	}
+
+	wr32(E1000_I2CPARAMS, i2cctl);
+	wrfl();
+}
+
+/**
+ *  igb_set_i2c_clk - Sets the I2C SCL clock
+ *  @data: pointer to hardware structure
+ *  @state: state to set clock
+ *
+ *  Sets the I2C clock line to state
+ **/
+static void igb_set_i2c_clk(void *data, int state)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = rd32(E1000_I2CPARAMS);
+
+	if (state) {
+		i2cctl |= E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N;
+	} else {
+		i2cctl &= ~E1000_I2C_CLK_OUT;
+		i2cctl &= ~E1000_I2C_CLK_OE_N;
+	}
+	wr32(E1000_I2CPARAMS, i2cctl);
+	wrfl();
+}
+
+/**
+ *  igb_get_i2c_clk - Gets the I2C SCL clock state
+ *  @data: pointer to hardware structure
+ *
+ *  Gets the I2C clock state
+ **/
+static int igb_get_i2c_clk(void *data)
+{
+	struct igb_adapter *adapter = (struct igb_adapter *)data;
+	struct e1000_hw *hw = &adapter->hw;
+	s32 i2cctl = rd32(E1000_I2CPARAMS);
+
+	return !!(i2cctl & E1000_I2C_CLK_IN);
+}
+
+static const struct i2c_algo_bit_data igb_i2c_algo = {
+	.setsda		= igb_set_i2c_data,
+	.setscl		= igb_set_i2c_clk,
+	.getsda		= igb_get_i2c_data,
+	.getscl		= igb_get_i2c_clk,
+	.udelay		= 5,
+	.timeout	= 20,
+};
+
+/**
+ *  igb_get_hw_dev - return device
+ *  @hw: pointer to hardware structure
+ *
+ *  used by hardware layer to print debugging information
+ **/
+struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
+{
+	struct igb_adapter *adapter = hw->back;
+	return adapter->netdev;
+}
+
+/**
+ *  igb_init_module - Driver Registration Routine
+ *
+ *  igb_init_module is the first routine called when the driver is
+ *  loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init igb_init_module(void)
+{
+	int ret;
+
+	pr_info("%s\n", igb_driver_string);
+	pr_info("%s\n", igb_copyright);
+
+#ifdef CONFIG_IGB_DCA
+	dca_register_notify(&dca_notifier);
+#endif
+	ret = pci_register_driver(&igb_driver);
+	return ret;
+}
+
+module_init(igb_init_module);
+
+/**
+ *  igb_exit_module - Driver Exit Cleanup Routine
+ *
+ *  igb_exit_module is called just before the driver is removed
+ *  from memory.
+ **/
+static void __exit igb_exit_module(void)
+{
+#ifdef CONFIG_IGB_DCA
+	dca_unregister_notify(&dca_notifier);
+#endif
+	pci_unregister_driver(&igb_driver);
+}
+
+module_exit(igb_exit_module);
+
+#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
+/**
+ *  igb_cache_ring_register - Descriptor ring to register mapping
+ *  @adapter: board private structure to initialize
+ *
+ *  Once we know the feature-set enabled for the device, we'll cache
+ *  the register offset the descriptor ring is assigned to.
+ **/
+static void igb_cache_ring_register(struct igb_adapter *adapter)
+{
+	int i = 0, j = 0;
+	u32 rbase_offset = adapter->vfs_allocated_count;
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		/* The queues are allocated for virtualization such that VF 0
+		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
+		 * In order to avoid collision we start at the first free queue
+		 * and continue consuming queues in the same sequence
+		 */
+		if (adapter->vfs_allocated_count) {
+			for (; i < adapter->rss_queues; i++)
+				adapter->rx_ring[i]->reg_idx = rbase_offset +
+							       Q_IDX_82576(i);
+		}
+		fallthrough;
+	case e1000_82575:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+	default:
+		for (; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
+		for (; j < adapter->num_tx_queues; j++)
+			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
+		break;
+	}
+}
+
+u32 igb_rd32(struct e1000_hw *hw, u32 reg)
+{
+	struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
+	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+	u32 value = 0;
+
+	if (E1000_REMOVED(hw_addr))
+		return ~value;
+
+	value = readl(&hw_addr[reg]);
+
+	/* reads should not return all F's */
+	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
+		struct net_device *netdev = igb->netdev;
+		hw->hw_addr = NULL;
+		netdev_err(netdev, "PCIe link lost\n");
+		WARN(pci_device_is_present(igb->pdev),
+		     "igb: Failed to read reg 0x%x!\n", reg);
+	}
+
+	return value;
+}
+
+/**
+ *  igb_write_ivar - configure ivar for given MSI-X vector
+ *  @hw: pointer to the HW structure
+ *  @msix_vector: vector number we are allocating to a given ring
+ *  @index: row index of IVAR register to write within IVAR table
+ *  @offset: column offset of in IVAR, should be multiple of 8
+ *
+ *  This function is intended to handle the writing of the IVAR register
+ *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
+ *  each containing an cause allocation for an Rx and Tx ring, and a
+ *  variable number of rows depending on the number of queues supported.
+ **/
+static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
+			   int index, int offset)
+{
+	u32 ivar = array_rd32(E1000_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((u32)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+	array_wr32(E1000_IVAR0, index, ivar);
+}
+
+#define IGB_N0_QUEUE -1
+static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	int rx_queue = IGB_N0_QUEUE;
+	int tx_queue = IGB_N0_QUEUE;
+	u32 msixbm = 0;
+
+	if (q_vector->rx.ring)
+		rx_queue = q_vector->rx.ring->reg_idx;
+	if (q_vector->tx.ring)
+		tx_queue = q_vector->tx.ring->reg_idx;
+
+	switch (hw->mac.type) {
+	case e1000_82575:
+		/* The 82575 assigns vectors using a bitmask, which matches the
+		 * bitmask for the EICR/EIMS/EIMC registers.  To assign one
+		 * or more queues to a vector, we write the appropriate bits
+		 * into the MSIXBM register for that vector.
+		 */
+		if (rx_queue > IGB_N0_QUEUE)
+			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
+		if (tx_queue > IGB_N0_QUEUE)
+			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
+		if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0)
+			msixbm |= E1000_EIMS_OTHER;
+		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
+		q_vector->eims_value = msixbm;
+		break;
+	case e1000_82576:
+		/* 82576 uses a table that essentially consists of 2 columns
+		 * with 8 rows.  The ordering is column-major so we use the
+		 * lower 3 bits as the row index, and the 4th bit as the
+		 * column offset.
+		 */
+		if (rx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       rx_queue & 0x7,
+				       (rx_queue & 0x8) << 1);
+		if (tx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       tx_queue & 0x7,
+				       ((tx_queue & 0x8) << 1) + 8);
+		q_vector->eims_value = BIT(msix_vector);
+		break;
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		/* On 82580 and newer adapters the scheme is similar to 82576
+		 * however instead of ordering column-major we have things
+		 * ordered row-major.  So we traverse the table by using
+		 * bit 0 as the column offset, and the remaining bits as the
+		 * row index.
+		 */
+		if (rx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       rx_queue >> 1,
+				       (rx_queue & 0x1) << 4);
+		if (tx_queue > IGB_N0_QUEUE)
+			igb_write_ivar(hw, msix_vector,
+				       tx_queue >> 1,
+				       ((tx_queue & 0x1) << 4) + 8);
+		q_vector->eims_value = BIT(msix_vector);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	/* add q_vector eims value to global eims_enable_mask */
+	adapter->eims_enable_mask |= q_vector->eims_value;
+
+	/* configure q_vector to set itr on first interrupt */
+	q_vector->set_itr = 1;
+}
+
+/**
+ *  igb_configure_msix - Configure MSI-X hardware
+ *  @adapter: board private structure to initialize
+ *
+ *  igb_configure_msix sets up the hardware to properly
+ *  generate MSI-X interrupts.
+ **/
+static void igb_configure_msix(struct igb_adapter *adapter)
+{
+	u32 tmp;
+	int i, vector = 0;
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->eims_enable_mask = 0;
+
+	/* set vector for other causes, i.e. link changes */
+	switch (hw->mac.type) {
+	case e1000_82575:
+		tmp = rd32(E1000_CTRL_EXT);
+		/* enable MSI-X PBA support*/
+		tmp |= E1000_CTRL_EXT_PBA_CLR;
+
+		/* Auto-Mask interrupts upon ICR read. */
+		tmp |= E1000_CTRL_EXT_EIAME;
+		tmp |= E1000_CTRL_EXT_IRCA;
+
+		wr32(E1000_CTRL_EXT, tmp);
+
+		/* enable msix_other interrupt */
+		array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER);
+		adapter->eims_other = E1000_EIMS_OTHER;
+
+		break;
+
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		/* Turn on MSI-X capability first, or our settings
+		 * won't stick.  And it will take days to debug.
+		 */
+		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
+		     E1000_GPIE_PBA | E1000_GPIE_EIAME |
+		     E1000_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		adapter->eims_other = BIT(vector);
+		tmp = (vector++ | E1000_IVAR_VALID) << 8;
+
+		wr32(E1000_IVAR_MISC, tmp);
+		break;
+	default:
+		/* do nothing, since nothing else supports MSI-X */
+		break;
+	} /* switch (hw->mac.type) */
+
+	adapter->eims_enable_mask |= adapter->eims_other;
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		igb_assign_vector(adapter->q_vector[i], vector++);
+
+	wrfl();
+}
+
+/**
+ *  igb_request_msix - Initialize MSI-X interrupts
+ *  @adapter: board private structure to initialize
+ *
+ *  igb_request_msix allocates MSI-X vectors and requests interrupts from the
+ *  kernel.
+ **/
+static int igb_request_msix(struct igb_adapter *adapter)
+{
+	unsigned int num_q_vectors = adapter->num_q_vectors;
+	struct net_device *netdev = adapter->netdev;
+	int i, err = 0, vector = 0, free_vector = 0;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  igb_msix_other, 0, netdev->name, adapter);
+	if (err)
+		goto err_out;
+
+	if (num_q_vectors > MAX_Q_VECTORS) {
+		num_q_vectors = MAX_Q_VECTORS;
+		dev_warn(&adapter->pdev->dev,
+			 "The number of queue vectors (%d) is higher than max allowed (%d)\n",
+			 adapter->num_q_vectors, MAX_Q_VECTORS);
+	}
+	for (i = 0; i < num_q_vectors; i++) {
+		struct igb_q_vector *q_vector = adapter->q_vector[i];
+
+		vector++;
+
+		q_vector->itr_register = adapter->io_addr + E1000_EITR(vector);
+
+		if (q_vector->rx.ring && q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else if (q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
+				q_vector->tx.ring->queue_index);
+		else if (q_vector->rx.ring)
+			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else
+			sprintf(q_vector->name, "%s-unused", netdev->name);
+
+		err = request_irq(adapter->msix_entries[vector].vector,
+				  igb_msix_ring, 0, q_vector->name,
+				  q_vector);
+		if (err)
+			goto err_free;
+	}
+
+	igb_configure_msix(adapter);
+	return 0;
+
+err_free:
+	/* free already assigned IRQs */
+	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+	vector--;
+	for (i = 0; i < vector; i++) {
+		free_irq(adapter->msix_entries[free_vector++].vector,
+			 adapter->q_vector[i]);
+	}
+err_out:
+	return err;
+}
+
+/**
+ *  igb_free_q_vector - Free memory allocated for specific interrupt vector
+ *  @adapter: board private structure to initialize
+ *  @v_idx: Index of vector to be freed
+ *
+ *  This function frees the memory allocated to the q_vector.
+ **/
+static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
+{
+	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	adapter->q_vector[v_idx] = NULL;
+
+	/* igb_get_stats64() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	if (q_vector)
+		kfree_rcu(q_vector, rcu);
+}
+
+/**
+ *  igb_reset_q_vector - Reset config for interrupt vector
+ *  @adapter: board private structure to initialize
+ *  @v_idx: Index of vector to be reset
+ *
+ *  If NAPI is enabled it will delete any references to the
+ *  NAPI struct. This is preparation for igb_free_q_vector.
+ **/
+static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx)
+{
+	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	/* Coming from igb_set_interrupt_capability, the vectors are not yet
+	 * allocated. So, q_vector is NULL so we should stop here.
+	 */
+	if (!q_vector)
+		return;
+
+	if (q_vector->tx.ring)
+		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+	if (q_vector->rx.ring)
+		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+	netif_napi_del(&q_vector->napi);
+
+}
+
+static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	if (adapter->flags & IGB_FLAG_HAS_MSIX)
+		pci_disable_msix(adapter->pdev);
+	else if (adapter->flags & IGB_FLAG_HAS_MSI)
+		pci_disable_msi(adapter->pdev);
+
+	while (v_idx--)
+		igb_reset_q_vector(adapter, v_idx);
+}
+
+/**
+ *  igb_free_q_vectors - Free memory allocated for interrupt vectors
+ *  @adapter: board private structure to initialize
+ *
+ *  This function frees the memory allocated to the q_vectors.  In addition if
+ *  NAPI is enabled it will delete any references to the NAPI struct prior
+ *  to freeing the q_vector.
+ **/
+static void igb_free_q_vectors(struct igb_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--) {
+		igb_reset_q_vector(adapter, v_idx);
+		igb_free_q_vector(adapter, v_idx);
+	}
+}
+
+/**
+ *  igb_clear_interrupt_scheme - reset the device to a state of no interrupts
+ *  @adapter: board private structure to initialize
+ *
+ *  This function resets the device so that it has 0 Rx queues, Tx queues, and
+ *  MSI-X interrupts allocated.
+ */
+static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
+{
+	igb_free_q_vectors(adapter);
+	igb_reset_interrupt_capability(adapter);
+}
+
+/**
+ *  igb_set_interrupt_capability - set MSI or MSI-X if supported
+ *  @adapter: board private structure to initialize
+ *  @msix: boolean value of MSIX capability
+ *
+ *  Attempt to configure interrupts using the best available
+ *  capabilities of the hardware and kernel.
+ **/
+static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
+{
+	int err;
+	int numvecs, i;
+
+	if (!msix)
+		goto msi_only;
+	adapter->flags |= IGB_FLAG_HAS_MSIX;
+
+	/* Number of supported queues. */
+	adapter->num_rx_queues = adapter->rss_queues;
+	if (adapter->vfs_allocated_count)
+		adapter->num_tx_queues = 1;
+	else
+		adapter->num_tx_queues = adapter->rss_queues;
+
+	/* start with one vector for every Rx queue */
+	numvecs = adapter->num_rx_queues;
+
+	/* if Tx handler is separate add 1 for every Tx queue */
+	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
+		numvecs += adapter->num_tx_queues;
+
+	/* store the number of vectors reserved for queues */
+	adapter->num_q_vectors = numvecs;
+
+	/* add 1 vector for link status interrupts */
+	numvecs++;
+	for (i = 0; i < numvecs; i++)
+		adapter->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_range(adapter->pdev,
+				    adapter->msix_entries,
+				    numvecs,
+				    numvecs);
+	if (err > 0)
+		return;
+
+	igb_reset_interrupt_capability(adapter);
+
+	/* If we can't do MSI-X, try MSI */
+msi_only:
+	adapter->flags &= ~IGB_FLAG_HAS_MSIX;
+#ifdef CONFIG_PCI_IOV
+	/* disable SR-IOV for non MSI-X configurations */
+	if (adapter->vf_data) {
+		struct e1000_hw *hw = &adapter->hw;
+		/* disable iov and allow time for transactions to clear */
+		pci_disable_sriov(adapter->pdev);
+		msleep(500);
+
+		kfree(adapter->vf_mac_list);
+		adapter->vf_mac_list = NULL;
+		kfree(adapter->vf_data);
+		adapter->vf_data = NULL;
+		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+		wrfl();
+		msleep(100);
+		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
+	}
+#endif
+	adapter->vfs_allocated_count = 0;
+	adapter->rss_queues = 1;
+	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
+	adapter->num_q_vectors = 1;
+	if (!pci_enable_msi(adapter->pdev))
+		adapter->flags |= IGB_FLAG_HAS_MSI;
+}
+
+static void igb_add_ring(struct igb_ring *ring,
+			 struct igb_ring_container *head)
+{
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ *  igb_alloc_q_vector - Allocate memory for a single interrupt vector
+ *  @adapter: board private structure to initialize
+ *  @v_count: q_vectors allocated on adapter, used for ring interleaving
+ *  @v_idx: index of vector in adapter struct
+ *  @txr_count: total number of Tx rings to allocate
+ *  @txr_idx: index of first Tx ring to allocate
+ *  @rxr_count: total number of Rx rings to allocate
+ *  @rxr_idx: index of first Rx ring to allocate
+ *
+ *  We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int igb_alloc_q_vector(struct igb_adapter *adapter,
+			      int v_count, int v_idx,
+			      int txr_count, int txr_idx,
+			      int rxr_count, int rxr_idx)
+{
+	struct igb_q_vector *q_vector;
+	struct igb_ring *ring;
+	int ring_count;
+	size_t size;
+
+	/* igb only supports 1 Tx and/or 1 Rx queue per vector */
+	if (txr_count > 1 || rxr_count > 1)
+		return -ENOMEM;
+
+	ring_count = txr_count + rxr_count;
+	size = kmalloc_size_roundup(struct_size(q_vector, ring, ring_count));
+
+	/* allocate q_vector and rings */
+	q_vector = adapter->q_vector[v_idx];
+	if (!q_vector) {
+		q_vector = kzalloc(size, GFP_KERNEL);
+	} else if (size > ksize(q_vector)) {
+		struct igb_q_vector *new_q_vector;
+
+		new_q_vector = kzalloc(size, GFP_KERNEL);
+		if (new_q_vector)
+			kfree_rcu(q_vector, rcu);
+		q_vector = new_q_vector;
+	} else {
+		memset(q_vector, 0, size);
+	}
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = adapter->tx_work_limit;
+
+	/* initialize ITR configuration */
+	q_vector->itr_register = adapter->io_addr + E1000_EITR(0);
+	q_vector->itr_val = IGB_START_ITR;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	/* intialize ITR */
+	if (rxr_count) {
+		/* rx or rx/tx vector */
+		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+			q_vector->itr_val = adapter->rx_itr_setting;
+	} else {
+		/* tx only vector */
+		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+			q_vector->itr_val = adapter->tx_itr_setting;
+	}
+
+	if (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		igb_add_ring(ring, &q_vector->tx);
+
+		/* For 82575, context index must be unique per ring. */
+		if (adapter->hw.mac.type == e1000_82575)
+			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		ring->cbs_enable = false;
+		ring->idleslope = 0;
+		ring->sendslope = 0;
+		ring->hicredit = 0;
+		ring->locredit = 0;
+
+		u64_stats_init(&ring->tx_syncp);
+		u64_stats_init(&ring->tx_syncp2);
+
+		/* assign ring to adapter */
+		adapter->tx_ring[txr_idx] = ring;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	if (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		igb_add_ring(ring, &q_vector->rx);
+
+		/* set flag indicating ring supports SCTP checksum offload */
+		if (adapter->hw.mac.type >= e1000_82576)
+			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
+
+		/* On i350, i354, i210, and i211, loopback VLAN packets
+		 * have the tag byte-swapped.
+		 */
+		if (adapter->hw.mac.type >= e1000_i350)
+			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		u64_stats_init(&ring->rx_syncp);
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+	}
+
+	return 0;
+}
+
+
+/**
+ *  igb_alloc_q_vectors - Allocate memory for interrupt vectors
+ *  @adapter: board private structure to initialize
+ *
+ *  We allocate one q_vector per queue interrupt.  If allocation fails we
+ *  return -ENOMEM.
+ **/
+static int igb_alloc_q_vectors(struct igb_adapter *adapter)
+{
+	int q_vectors = adapter->num_q_vectors;
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
+						 0, 0, 1, rxr_idx);
+
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+		err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
+					 tqpv, txr_idx, rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		igb_free_q_vector(adapter, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ *  igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ *  @adapter: board private structure to initialize
+ *  @msix: boolean value of MSIX capability
+ *
+ *  This function initializes the interrupts and allocates all of the queues.
+ **/
+static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+
+	igb_set_interrupt_capability(adapter, msix);
+
+	err = igb_alloc_q_vectors(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	igb_cache_ring_register(adapter);
+
+	return 0;
+
+err_alloc_q_vectors:
+	igb_reset_interrupt_capability(adapter);
+	return err;
+}
+
+/**
+ *  igb_request_irq - initialize interrupts
+ *  @adapter: board private structure to initialize
+ *
+ *  Attempts to configure interrupts using the best available
+ *  capabilities of the hardware and kernel.
+ **/
+static int igb_request_irq(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		err = igb_request_msix(adapter);
+		if (!err)
+			goto request_done;
+		/* fall back to MSI */
+		igb_free_all_tx_resources(adapter);
+		igb_free_all_rx_resources(adapter);
+
+		igb_clear_interrupt_scheme(adapter);
+		err = igb_init_interrupt_scheme(adapter, false);
+		if (err)
+			goto request_done;
+
+		igb_setup_all_tx_resources(adapter);
+		igb_setup_all_rx_resources(adapter);
+		igb_configure(adapter);
+	}
+
+	igb_assign_vector(adapter->q_vector[0], 0);
+
+	if (adapter->flags & IGB_FLAG_HAS_MSI) {
+		err = request_irq(pdev->irq, igb_intr_msi, 0,
+				  netdev->name, adapter);
+		if (!err)
+			goto request_done;
+
+		/* fall back to legacy interrupts */
+		igb_reset_interrupt_capability(adapter);
+		adapter->flags &= ~IGB_FLAG_HAS_MSI;
+	}
+
+	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
+			  netdev->name, adapter);
+
+	if (err)
+		dev_err(&pdev->dev, "Error %d getting interrupt\n",
+			err);
+
+request_done:
+	return err;
+}
+
+static void igb_free_irq(struct igb_adapter *adapter)
+{
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		int vector = 0, i;
+
+		free_irq(adapter->msix_entries[vector++].vector, adapter);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			free_irq(adapter->msix_entries[vector++].vector,
+				 adapter->q_vector[i]);
+	} else {
+		free_irq(adapter->pdev->irq, adapter);
+	}
+}
+
+/**
+ *  igb_irq_disable - Mask off interrupt generation on the NIC
+ *  @adapter: board private structure
+ **/
+static void igb_irq_disable(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* we need to be careful when disabling interrupts.  The VFs are also
+	 * mapped into these registers and so clearing the bits can cause
+	 * issues on the VF drivers so we only need to clear what we set
+	 */
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		u32 regval = rd32(E1000_EIAM);
+
+		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
+		wr32(E1000_EIMC, adapter->eims_enable_mask);
+		regval = rd32(E1000_EIAC);
+		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
+	}
+
+	wr32(E1000_IAM, 0);
+	wr32(E1000_IMC, ~0);
+	wrfl();
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		int i;
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			synchronize_irq(adapter->msix_entries[i].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+/**
+ *  igb_irq_enable - Enable default interrupt generation settings
+ *  @adapter: board private structure
+ **/
+static void igb_irq_enable(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
+		u32 regval = rd32(E1000_EIAC);
+
+		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
+		regval = rd32(E1000_EIAM);
+		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
+		wr32(E1000_EIMS, adapter->eims_enable_mask);
+		if (adapter->vfs_allocated_count) {
+			wr32(E1000_MBVFIMR, 0xFF);
+			ims |= E1000_IMS_VMMB;
+		}
+		wr32(E1000_IMS, ims);
+	} else {
+		wr32(E1000_IMS, IMS_ENABLE_MASK |
+				E1000_IMS_DRSTA);
+		wr32(E1000_IAM, IMS_ENABLE_MASK |
+				E1000_IMS_DRSTA);
+	}
+}
+
+static void igb_update_mng_vlan(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 pf_id = adapter->vfs_allocated_count;
+	u16 vid = adapter->hw.mng_cookie.vlan_id;
+	u16 old_vid = adapter->mng_vlan_id;
+
+	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
+		/* add VID to filter table */
+		igb_vfta_set(hw, vid, pf_id, true, true);
+		adapter->mng_vlan_id = vid;
+	} else {
+		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
+	}
+
+	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
+	    (vid != old_vid) &&
+	    !test_bit(old_vid, adapter->active_vlans)) {
+		/* remove VID from filter table */
+		igb_vfta_set(hw, vid, pf_id, false, true);
+	}
+}
+
+/**
+ *  igb_release_hw_control - release control of the h/w to f/w
+ *  @adapter: address of board private structure
+ *
+ *  igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
+ *  For ASF and Pass Through versions of f/w this means that the
+ *  driver is no longer loaded.
+ **/
+static void igb_release_hw_control(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware take over control of h/w */
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	wr32(E1000_CTRL_EXT,
+			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ *  igb_get_hw_control - get control of the h/w from f/w
+ *  @adapter: address of board private structure
+ *
+ *  igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
+ *  For ASF and Pass Through versions of f/w this means that
+ *  the driver is loaded.
+ **/
+static void igb_get_hw_control(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware know the driver has taken over */
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	wr32(E1000_CTRL_EXT,
+			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
+}
+
+static void enable_fqtss(struct igb_adapter *adapter, bool enable)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+
+	if (enable)
+		adapter->flags |= IGB_FLAG_FQTSS;
+	else
+		adapter->flags &= ~IGB_FLAG_FQTSS;
+
+	if (netif_running(netdev))
+		schedule_work(&adapter->reset_task);
+}
+
+static bool is_fqtss_enabled(struct igb_adapter *adapter)
+{
+	return (adapter->flags & IGB_FLAG_FQTSS) ? true : false;
+}
+
+static void set_tx_desc_fetch_prio(struct e1000_hw *hw, int queue,
+				   enum tx_queue_prio prio)
+{
+	u32 val;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+	WARN_ON(queue < 0 || queue > 4);
+
+	val = rd32(E1000_I210_TXDCTL(queue));
+
+	if (prio == TX_QUEUE_PRIO_HIGH)
+		val |= E1000_TXDCTL_PRIORITY;
+	else
+		val &= ~E1000_TXDCTL_PRIORITY;
+
+	wr32(E1000_I210_TXDCTL(queue), val);
+}
+
+static void set_queue_mode(struct e1000_hw *hw, int queue, enum queue_mode mode)
+{
+	u32 val;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+	WARN_ON(queue < 0 || queue > 1);
+
+	val = rd32(E1000_I210_TQAVCC(queue));
+
+	if (mode == QUEUE_MODE_STREAM_RESERVATION)
+		val |= E1000_TQAVCC_QUEUEMODE;
+	else
+		val &= ~E1000_TQAVCC_QUEUEMODE;
+
+	wr32(E1000_I210_TQAVCC(queue), val);
+}
+
+static bool is_any_cbs_enabled(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		if (adapter->tx_ring[i]->cbs_enable)
+			return true;
+	}
+
+	return false;
+}
+
+static bool is_any_txtime_enabled(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		if (adapter->tx_ring[i]->launchtime_enable)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ *  igb_config_tx_modes - Configure "Qav Tx mode" features on igb
+ *  @adapter: pointer to adapter struct
+ *  @queue: queue number
+ *
+ *  Configure CBS and Launchtime for a given hardware queue.
+ *  Parameters are retrieved from the correct Tx ring, so
+ *  igb_save_cbs_params() and igb_save_txtime_params() should be used
+ *  for setting those correctly prior to this function being called.
+ **/
+static void igb_config_tx_modes(struct igb_adapter *adapter, int queue)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct igb_ring *ring;
+	u32 tqavcc, tqavctrl;
+	u16 value;
+
+	WARN_ON(hw->mac.type != e1000_i210);
+	WARN_ON(queue < 0 || queue > 1);
+	ring = adapter->tx_ring[queue];
+
+	/* If any of the Qav features is enabled, configure queues as SR and
+	 * with HIGH PRIO. If none is, then configure them with LOW PRIO and
+	 * as SP.
+	 */
+	if (ring->cbs_enable || ring->launchtime_enable) {
+		set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_HIGH);
+		set_queue_mode(hw, queue, QUEUE_MODE_STREAM_RESERVATION);
+	} else {
+		set_tx_desc_fetch_prio(hw, queue, TX_QUEUE_PRIO_LOW);
+		set_queue_mode(hw, queue, QUEUE_MODE_STRICT_PRIORITY);
+	}
+
+	/* If CBS is enabled, set DataTranARB and config its parameters. */
+	if (ring->cbs_enable || queue == 0) {
+		/* i210 does not allow the queue 0 to be in the Strict
+		 * Priority mode while the Qav mode is enabled, so,
+		 * instead of disabling strict priority mode, we give
+		 * queue 0 the maximum of credits possible.
+		 *
+		 * See section 8.12.19 of the i210 datasheet, "Note:
+		 * Queue0 QueueMode must be set to 1b when
+		 * TransmitMode is set to Qav."
+		 */
+		if (queue == 0 && !ring->cbs_enable) {
+			/* max "linkspeed" idleslope in kbps */
+			ring->idleslope = 1000000;
+			ring->hicredit = ETH_FRAME_LEN;
+		}
+
+		/* Always set data transfer arbitration to credit-based
+		 * shaper algorithm on TQAVCTRL if CBS is enabled for any of
+		 * the queues.
+		 */
+		tqavctrl = rd32(E1000_I210_TQAVCTRL);
+		tqavctrl |= E1000_TQAVCTRL_DATATRANARB;
+		wr32(E1000_I210_TQAVCTRL, tqavctrl);
+
+		/* According to i210 datasheet section 7.2.7.7, we should set
+		 * the 'idleSlope' field from TQAVCC register following the
+		 * equation:
+		 *
+		 * For 100 Mbps link speed:
+		 *
+		 *     value = BW * 0x7735 * 0.2                          (E1)
+		 *
+		 * For 1000Mbps link speed:
+		 *
+		 *     value = BW * 0x7735 * 2                            (E2)
+		 *
+		 * E1 and E2 can be merged into one equation as shown below.
+		 * Note that 'link-speed' is in Mbps.
+		 *
+		 *     value = BW * 0x7735 * 2 * link-speed
+		 *                           --------------               (E3)
+		 *                                1000
+		 *
+		 * 'BW' is the percentage bandwidth out of full link speed
+		 * which can be found with the following equation. Note that
+		 * idleSlope here is the parameter from this function which
+		 * is in kbps.
+		 *
+		 *     BW =     idleSlope
+		 *          -----------------                             (E4)
+		 *          link-speed * 1000
+		 *
+		 * That said, we can come up with a generic equation to
+		 * calculate the value we should set it TQAVCC register by
+		 * replacing 'BW' in E3 by E4. The resulting equation is:
+		 *
+		 * value =     idleSlope     * 0x7735 * 2 * link-speed
+		 *         -----------------            --------------    (E5)
+		 *         link-speed * 1000                 1000
+		 *
+		 * 'link-speed' is present in both sides of the fraction so
+		 * it is canceled out. The final equation is the following:
+		 *
+		 *     value = idleSlope * 61034
+		 *             -----------------                          (E6)
+		 *                  1000000
+		 *
+		 * NOTE: For i210, given the above, we can see that idleslope
+		 *       is represented in 16.38431 kbps units by the value at
+		 *       the TQAVCC register (1Gbps / 61034), which reduces
+		 *       the granularity for idleslope increments.
+		 *       For instance, if you want to configure a 2576kbps
+		 *       idleslope, the value to be written on the register
+		 *       would have to be 157.23. If rounded down, you end
+		 *       up with less bandwidth available than originally
+		 *       required (~2572 kbps). If rounded up, you end up
+		 *       with a higher bandwidth (~2589 kbps). Below the
+		 *       approach we take is to always round up the
+		 *       calculated value, so the resulting bandwidth might
+		 *       be slightly higher for some configurations.
+		 */
+		value = DIV_ROUND_UP_ULL(ring->idleslope * 61034ULL, 1000000);
+
+		tqavcc = rd32(E1000_I210_TQAVCC(queue));
+		tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+		tqavcc |= value;
+		wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+		wr32(E1000_I210_TQAVHC(queue),
+		     0x80000000 + ring->hicredit * 0x7735);
+	} else {
+
+		/* Set idleSlope to zero. */
+		tqavcc = rd32(E1000_I210_TQAVCC(queue));
+		tqavcc &= ~E1000_TQAVCC_IDLESLOPE_MASK;
+		wr32(E1000_I210_TQAVCC(queue), tqavcc);
+
+		/* Set hiCredit to zero. */
+		wr32(E1000_I210_TQAVHC(queue), 0);
+
+		/* If CBS is not enabled for any queues anymore, then return to
+		 * the default state of Data Transmission Arbitration on
+		 * TQAVCTRL.
+		 */
+		if (!is_any_cbs_enabled(adapter)) {
+			tqavctrl = rd32(E1000_I210_TQAVCTRL);
+			tqavctrl &= ~E1000_TQAVCTRL_DATATRANARB;
+			wr32(E1000_I210_TQAVCTRL, tqavctrl);
+		}
+	}
+
+	/* If LaunchTime is enabled, set DataTranTIM. */
+	if (ring->launchtime_enable) {
+		/* Always set DataTranTIM on TQAVCTRL if LaunchTime is enabled
+		 * for any of the SR queues, and configure fetchtime delta.
+		 * XXX NOTE:
+		 *     - LaunchTime will be enabled for all SR queues.
+		 *     - A fixed offset can be added relative to the launch
+		 *       time of all packets if configured at reg LAUNCH_OS0.
+		 *       We are keeping it as 0 for now (default value).
+		 */
+		tqavctrl = rd32(E1000_I210_TQAVCTRL);
+		tqavctrl |= E1000_TQAVCTRL_DATATRANTIM |
+		       E1000_TQAVCTRL_FETCHTIME_DELTA;
+		wr32(E1000_I210_TQAVCTRL, tqavctrl);
+	} else {
+		/* If Launchtime is not enabled for any SR queues anymore,
+		 * then clear DataTranTIM on TQAVCTRL and clear fetchtime delta,
+		 * effectively disabling Launchtime.
+		 */
+		if (!is_any_txtime_enabled(adapter)) {
+			tqavctrl = rd32(E1000_I210_TQAVCTRL);
+			tqavctrl &= ~E1000_TQAVCTRL_DATATRANTIM;
+			tqavctrl &= ~E1000_TQAVCTRL_FETCHTIME_DELTA;
+			wr32(E1000_I210_TQAVCTRL, tqavctrl);
+		}
+	}
+
+	/* XXX: In i210 controller the sendSlope and loCredit parameters from
+	 * CBS are not configurable by software so we don't do any 'controller
+	 * configuration' in respect to these parameters.
+	 */
+
+	netdev_dbg(netdev, "Qav Tx mode: cbs %s, launchtime %s, queue %d idleslope %d sendslope %d hiCredit %d locredit %d\n",
+		   ring->cbs_enable ? "enabled" : "disabled",
+		   ring->launchtime_enable ? "enabled" : "disabled",
+		   queue,
+		   ring->idleslope, ring->sendslope,
+		   ring->hicredit, ring->locredit);
+}
+
+static int igb_save_txtime_params(struct igb_adapter *adapter, int queue,
+				  bool enable)
+{
+	struct igb_ring *ring;
+
+	if (queue < 0 || queue > adapter->num_tx_queues)
+		return -EINVAL;
+
+	ring = adapter->tx_ring[queue];
+	ring->launchtime_enable = enable;
+
+	return 0;
+}
+
+static int igb_save_cbs_params(struct igb_adapter *adapter, int queue,
+			       bool enable, int idleslope, int sendslope,
+			       int hicredit, int locredit)
+{
+	struct igb_ring *ring;
+
+	if (queue < 0 || queue > adapter->num_tx_queues)
+		return -EINVAL;
+
+	ring = adapter->tx_ring[queue];
+
+	ring->cbs_enable = enable;
+	ring->idleslope = idleslope;
+	ring->sendslope = sendslope;
+	ring->hicredit = hicredit;
+	ring->locredit = locredit;
+
+	return 0;
+}
+
+/**
+ *  igb_setup_tx_mode - Switch to/from Qav Tx mode when applicable
+ *  @adapter: pointer to adapter struct
+ *
+ *  Configure TQAVCTRL register switching the controller's Tx mode
+ *  if FQTSS mode is enabled or disabled. Additionally, will issue
+ *  a call to igb_config_tx_modes() per queue so any previously saved
+ *  Tx parameters are applied.
+ **/
+static void igb_setup_tx_mode(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 val;
+
+	/* Only i210 controller supports changing the transmission mode. */
+	if (hw->mac.type != e1000_i210)
+		return;
+
+	if (is_fqtss_enabled(adapter)) {
+		int i, max_queue;
+
+		/* Configure TQAVCTRL register: set transmit mode to 'Qav',
+		 * set data fetch arbitration to 'round robin', set SP_WAIT_SR
+		 * so SP queues wait for SR ones.
+		 */
+		val = rd32(E1000_I210_TQAVCTRL);
+		val |= E1000_TQAVCTRL_XMIT_MODE | E1000_TQAVCTRL_SP_WAIT_SR;
+		val &= ~E1000_TQAVCTRL_DATAFETCHARB;
+		wr32(E1000_I210_TQAVCTRL, val);
+
+		/* Configure Tx and Rx packet buffers sizes as described in
+		 * i210 datasheet section 7.2.7.7.
+		 */
+		val = rd32(E1000_TXPBS);
+		val &= ~I210_TXPBSIZE_MASK;
+		val |= I210_TXPBSIZE_PB0_6KB | I210_TXPBSIZE_PB1_6KB |
+			I210_TXPBSIZE_PB2_6KB | I210_TXPBSIZE_PB3_6KB;
+		wr32(E1000_TXPBS, val);
+
+		val = rd32(E1000_RXPBS);
+		val &= ~I210_RXPBSIZE_MASK;
+		val |= I210_RXPBSIZE_PB_30KB;
+		wr32(E1000_RXPBS, val);
+
+		/* Section 8.12.9 states that MAX_TPKT_SIZE from DTXMXPKTSZ
+		 * register should not exceed the buffer size programmed in
+		 * TXPBS. The smallest buffer size programmed in TXPBS is 4kB
+		 * so according to the datasheet we should set MAX_TPKT_SIZE to
+		 * 4kB / 64.
+		 *
+		 * However, when we do so, no frame from queue 2 and 3 are
+		 * transmitted.  It seems the MAX_TPKT_SIZE should not be great
+		 * or _equal_ to the buffer size programmed in TXPBS. For this
+		 * reason, we set MAX_ TPKT_SIZE to (4kB - 1) / 64.
+		 */
+		val = (4096 - 1) / 64;
+		wr32(E1000_I210_DTXMXPKTSZ, val);
+
+		/* Since FQTSS mode is enabled, apply any CBS configuration
+		 * previously set. If no previous CBS configuration has been
+		 * done, then the initial configuration is applied, which means
+		 * CBS is disabled.
+		 */
+		max_queue = (adapter->num_tx_queues < I210_SR_QUEUES_NUM) ?
+			    adapter->num_tx_queues : I210_SR_QUEUES_NUM;
+
+		for (i = 0; i < max_queue; i++) {
+			igb_config_tx_modes(adapter, i);
+		}
+	} else {
+		wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+		wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+		wr32(E1000_I210_DTXMXPKTSZ, I210_DTXMXPKTSZ_DEFAULT);
+
+		val = rd32(E1000_I210_TQAVCTRL);
+		/* According to Section 8.12.21, the other flags we've set when
+		 * enabling FQTSS are not relevant when disabling FQTSS so we
+		 * don't set they here.
+		 */
+		val &= ~E1000_TQAVCTRL_XMIT_MODE;
+		wr32(E1000_I210_TQAVCTRL, val);
+	}
+
+	netdev_dbg(netdev, "FQTSS %s\n", (is_fqtss_enabled(adapter)) ?
+		   "enabled" : "disabled");
+}
+
+/**
+ *  igb_configure - configure the hardware for RX and TX
+ *  @adapter: private board structure
+ **/
+static void igb_configure(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	igb_get_hw_control(adapter);
+	igb_set_rx_mode(netdev);
+	igb_setup_tx_mode(adapter);
+
+	igb_restore_vlan(adapter);
+
+	igb_setup_tctl(adapter);
+	igb_setup_mrqc(adapter);
+	igb_setup_rctl(adapter);
+
+	igb_nfc_filter_restore(adapter);
+	igb_configure_tx(adapter);
+	igb_configure_rx(adapter);
+
+	igb_rx_fifo_flush_82575(&adapter->hw);
+
+	/* call igb_desc_unused which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igb_ring *ring = adapter->rx_ring[i];
+		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
+	}
+}
+
+/**
+ *  igb_power_up_link - Power up the phy/serdes link
+ *  @adapter: address of board private structure
+ **/
+void igb_power_up_link(struct igb_adapter *adapter)
+{
+	igb_reset_phy(&adapter->hw);
+
+	if (adapter->hw.phy.media_type == e1000_media_type_copper)
+		igb_power_up_phy_copper(&adapter->hw);
+	else
+		igb_power_up_serdes_link_82575(&adapter->hw);
+
+	igb_setup_link(&adapter->hw);
+}
+
+/**
+ *  igb_power_down_link - Power down the phy/serdes link
+ *  @adapter: address of board private structure
+ */
+static void igb_power_down_link(struct igb_adapter *adapter)
+{
+	if (adapter->hw.phy.media_type == e1000_media_type_copper)
+		igb_power_down_phy_copper_82575(&adapter->hw);
+	else
+		igb_shutdown_serdes_link_82575(&adapter->hw);
+}
+
+/**
+ * igb_check_swap_media -  Detect and switch function for Media Auto Sense
+ * @adapter: address of the board private structure
+ **/
+static void igb_check_swap_media(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl_ext, connsw;
+	bool swap_now = false;
+
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	connsw = rd32(E1000_CONNSW);
+
+	/* need to live swap if current media is copper and we have fiber/serdes
+	 * to go to.
+	 */
+
+	if ((hw->phy.media_type == e1000_media_type_copper) &&
+	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
+		swap_now = true;
+	} else if ((hw->phy.media_type != e1000_media_type_copper) &&
+		   !(connsw & E1000_CONNSW_SERDESD)) {
+		/* copper signal takes time to appear */
+		if (adapter->copper_tries < 4) {
+			adapter->copper_tries++;
+			connsw |= E1000_CONNSW_AUTOSENSE_CONF;
+			wr32(E1000_CONNSW, connsw);
+			return;
+		} else {
+			adapter->copper_tries = 0;
+			if ((connsw & E1000_CONNSW_PHYSD) &&
+			    (!(connsw & E1000_CONNSW_PHY_PDN))) {
+				swap_now = true;
+				connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
+				wr32(E1000_CONNSW, connsw);
+			}
+		}
+	}
+
+	if (!swap_now)
+		return;
+
+	switch (hw->phy.media_type) {
+	case e1000_media_type_copper:
+		netdev_info(adapter->netdev,
+			"MAS: changing media to fiber/serdes\n");
+		ctrl_ext |=
+			E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+		adapter->flags |= IGB_FLAG_MEDIA_RESET;
+		adapter->copper_tries = 0;
+		break;
+	case e1000_media_type_internal_serdes:
+	case e1000_media_type_fiber:
+		netdev_info(adapter->netdev,
+			"MAS: changing media to copper\n");
+		ctrl_ext &=
+			~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
+		adapter->flags |= IGB_FLAG_MEDIA_RESET;
+		break;
+	default:
+		/* shouldn't get here during regular operation */
+		netdev_err(adapter->netdev,
+			"AMS: Invalid media type found, returning\n");
+		break;
+	}
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+}
+
+/**
+ *  igb_up - Open the interface and prepare it to handle traffic
+ *  @adapter: board private structure
+ **/
+int igb_up(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	/* hardware has been reset, we need to reload some things */
+	igb_configure(adapter);
+
+	clear_bit(__IGB_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&(adapter->q_vector[i]->napi));
+
+	if (adapter->flags & IGB_FLAG_HAS_MSIX)
+		igb_configure_msix(adapter);
+	else
+		igb_assign_vector(adapter->q_vector[0], 0);
+
+	/* Clear any pending interrupts. */
+	rd32(E1000_TSICR);
+	rd32(E1000_ICR);
+	igb_irq_enable(adapter);
+
+	/* notify VFs that reset has been completed */
+	if (adapter->vfs_allocated_count) {
+		u32 reg_data = rd32(E1000_CTRL_EXT);
+
+		reg_data |= E1000_CTRL_EXT_PFRSTD;
+		wr32(E1000_CTRL_EXT, reg_data);
+	}
+
+	netif_tx_start_all_queues(adapter->netdev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+
+	if ((adapter->flags & IGB_FLAG_EEE) &&
+	    (!hw->dev_spec._82575.eee_disable))
+		adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;
+
+	return 0;
+}
+
+void igb_down(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tctl, rctl;
+	int i;
+
+	/* signal that we're down so the interrupt handler does not
+	 * reschedule our watchdog timer
+	 */
+	set_bit(__IGB_DOWN, &adapter->state);
+
+	/* disable receives in the hardware */
+	rctl = rd32(E1000_RCTL);
+	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
+	/* flush and sleep below */
+
+	igb_nfc_filter_exit(adapter);
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	/* disable transmits in the hardware */
+	tctl = rd32(E1000_TCTL);
+	tctl &= ~E1000_TCTL_EN;
+	wr32(E1000_TCTL, tctl);
+	/* flush both disables and wait for them to finish */
+	wrfl();
+	usleep_range(10000, 11000);
+
+	igb_irq_disable(adapter);
+
+	adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		if (adapter->q_vector[i]) {
+			napi_synchronize(&adapter->q_vector[i]->napi);
+			napi_disable(&adapter->q_vector[i]->napi);
+		}
+	}
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	/* record the stats before reset*/
+	spin_lock(&adapter->stats64_lock);
+	igb_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+	if (!pci_channel_offline(adapter->pdev))
+		igb_reset(adapter);
+
+	/* clear VLAN promisc flag so VFTA will be updated if necessary */
+	adapter->flags &= ~IGB_FLAG_VLAN_PROMISC;
+
+	igb_clean_all_tx_rings(adapter);
+	igb_clean_all_rx_rings(adapter);
+#ifdef CONFIG_IGB_DCA
+
+	/* since we reset the hardware DCA settings were cleared */
+	igb_setup_dca(adapter);
+#endif
+}
+
+void igb_reinit_locked(struct igb_adapter *adapter)
+{
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	igb_down(adapter);
+	igb_up(adapter);
+	clear_bit(__IGB_RESETTING, &adapter->state);
+}
+
+/** igb_enable_mas - Media Autosense re-enable after swap
+ *
+ * @adapter: adapter struct
+ **/
+static void igb_enable_mas(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 connsw = rd32(E1000_CONNSW);
+
+	/* configure for SerDes media detect */
+	if ((hw->phy.media_type == e1000_media_type_copper) &&
+	    (!(connsw & E1000_CONNSW_SERDESD))) {
+		connsw |= E1000_CONNSW_ENRGSRC;
+		connsw |= E1000_CONNSW_AUTOSENSE_EN;
+		wr32(E1000_CONNSW, connsw);
+		wrfl();
+	}
+}
+
+#ifdef CONFIG_IGB_HWMON
+/**
+ *  igb_set_i2c_bb - Init I2C interface
+ *  @hw: pointer to hardware structure
+ **/
+static void igb_set_i2c_bb(struct e1000_hw *hw)
+{
+	u32 ctrl_ext;
+	s32 i2cctl;
+
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	ctrl_ext |= E1000_CTRL_I2C_ENA;
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+	wrfl();
+
+	i2cctl = rd32(E1000_I2CPARAMS);
+	i2cctl |= E1000_I2CBB_EN
+		| E1000_I2C_CLK_OE_N
+		| E1000_I2C_DATA_OE_N;
+	wr32(E1000_I2CPARAMS, i2cctl);
+	wrfl();
+}
+#endif
+
+void igb_reset(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_mac_info *mac = &hw->mac;
+	struct e1000_fc_info *fc = &hw->fc;
+	u32 pba, hwm;
+
+	/* Repartition Pba for greater than 9k mtu
+	 * To take effect CTRL.RST is required.
+	 */
+	switch (mac->type) {
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_82580:
+		pba = rd32(E1000_RXPBS);
+		pba = igb_rxpbs_adjust_82580(pba);
+		break;
+	case e1000_82576:
+		pba = rd32(E1000_RXPBS);
+		pba &= E1000_RXPBS_SIZE_MASK_82576;
+		break;
+	case e1000_82575:
+	case e1000_i210:
+	case e1000_i211:
+	default:
+		pba = E1000_PBA_34K;
+		break;
+	}
+
+	if (mac->type == e1000_82575) {
+		u32 min_rx_space, min_tx_space, needed_tx_space;
+
+		/* write Rx PBA so that hardware can report correct Tx PBA */
+		wr32(E1000_PBA, pba);
+
+		/* To maintain wire speed transmits, the Tx FIFO should be
+		 * large enough to accommodate two full transmit packets,
+		 * rounded up to the next 1KB and expressed in KB.  Likewise,
+		 * the Rx FIFO should be large enough to accommodate at least
+		 * one full receive packet and is similarly rounded up and
+		 * expressed in KB.
+		 */
+		min_rx_space = DIV_ROUND_UP(MAX_JUMBO_FRAME_SIZE, 1024);
+
+		/* The Tx FIFO also stores 16 bytes of information about the Tx
+		 * but don't include Ethernet FCS because hardware appends it.
+		 * We only need to round down to the nearest 512 byte block
+		 * count since the value we care about is 2 frames, not 1.
+		 */
+		min_tx_space = adapter->max_frame_size;
+		min_tx_space += sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN;
+		min_tx_space = DIV_ROUND_UP(min_tx_space, 512);
+
+		/* upper 16 bits has Tx packet buffer allocation size in KB */
+		needed_tx_space = min_tx_space - (rd32(E1000_PBA) >> 16);
+
+		/* If current Tx allocation is less than the min Tx FIFO size,
+		 * and the min Tx FIFO size is less than the current Rx FIFO
+		 * allocation, take space away from current Rx allocation.
+		 */
+		if (needed_tx_space < pba) {
+			pba -= needed_tx_space;
+
+			/* if short on Rx space, Rx wins and must trump Tx
+			 * adjustment
+			 */
+			if (pba < min_rx_space)
+				pba = min_rx_space;
+		}
+
+		/* adjust PBA for jumbo frames */
+		wr32(E1000_PBA, pba);
+	}
+
+	/* flow control settings
+	 * The high water mark must be low enough to fit one full frame
+	 * after transmitting the pause frame.  As such we must have enough
+	 * space to allow for us to complete our current transmit and then
+	 * receive the frame that is in progress from the link partner.
+	 * Set it to:
+	 * - the full Rx FIFO size minus one full Tx plus one full Rx frame
+	 */
+	hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
+
+	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
+	fc->low_water = fc->high_water - 16;
+	fc->pause_time = 0xFFFF;
+	fc->send_xon = 1;
+	fc->current_mode = fc->requested_mode;
+
+	/* disable receive for all VFs and wait one second */
+	if (adapter->vfs_allocated_count) {
+		int i;
+
+		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
+			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
+
+		/* ping all the active vfs to let them know we are going down */
+		igb_ping_all_vfs(adapter);
+
+		/* disable transmits and receives */
+		wr32(E1000_VFRE, 0);
+		wr32(E1000_VFTE, 0);
+	}
+
+	/* Allow time for pending master requests to run */
+	hw->mac.ops.reset_hw(hw);
+	wr32(E1000_WUC, 0);
+
+	if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+		/* need to resetup here after media swap */
+		adapter->ei.get_invariants(hw);
+		adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
+	}
+	if ((mac->type == e1000_82575 || mac->type == e1000_i350) &&
+	    (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
+		igb_enable_mas(adapter);
+	}
+	if (hw->mac.ops.init_hw(hw))
+		dev_err(&pdev->dev, "Hardware Error\n");
+
+	/* RAR registers were cleared during init_hw, clear mac table */
+	igb_flush_mac_table(adapter);
+	__dev_uc_unsync(adapter->netdev, NULL);
+
+	/* Recover default RAR entry */
+	igb_set_default_mac_filter(adapter);
+
+	/* Flow control settings reset on hardware reset, so guarantee flow
+	 * control is off when forcing speed.
+	 */
+	if (!hw->mac.autoneg)
+		igb_force_mac_fc(hw);
+
+	igb_init_dmac(adapter, pba);
+#ifdef CONFIG_IGB_HWMON
+	/* Re-initialize the thermal sensor on i350 devices. */
+	if (!test_bit(__IGB_DOWN, &adapter->state)) {
+		if (mac->type == e1000_i350 && hw->bus.func == 0) {
+			/* If present, re-initialize the external thermal sensor
+			 * interface.
+			 */
+			if (adapter->ets)
+				igb_set_i2c_bb(hw);
+			mac->ops.init_thermal_sensor_thresh(hw);
+		}
+	}
+#endif
+	/* Re-establish EEE setting */
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		switch (mac->type) {
+		case e1000_i350:
+		case e1000_i210:
+		case e1000_i211:
+			igb_set_eee_i350(hw, true, true);
+			break;
+		case e1000_i354:
+			igb_set_eee_i354(hw, true, true);
+			break;
+		default:
+			break;
+		}
+	}
+	if (!netif_running(adapter->netdev))
+		igb_power_down_link(adapter);
+
+	igb_update_mng_vlan(adapter);
+
+	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
+	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
+
+	/* Re-enable PTP, where applicable. */
+	if (adapter->ptp_flags & IGB_PTP_ENABLED)
+		igb_ptp_reset(adapter);
+
+	igb_get_phy_info(hw);
+}
+
+static netdev_features_t igb_fix_features(struct net_device *netdev,
+	netdev_features_t features)
+{
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
+	return features;
+}
+
+static int igb_set_features(struct net_device *netdev,
+	netdev_features_t features)
+{
+	netdev_features_t changed = netdev->features ^ features;
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+		igb_vlan_mode(netdev, features);
+
+	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
+		return 0;
+
+	if (!(features & NETIF_F_NTUPLE)) {
+		struct hlist_node *node2;
+		struct igb_nfc_filter *rule;
+
+		spin_lock(&adapter->nfc_lock);
+		hlist_for_each_entry_safe(rule, node2,
+					  &adapter->nfc_filter_list, nfc_node) {
+			igb_erase_filter(adapter, rule);
+			hlist_del(&rule->nfc_node);
+			kfree(rule);
+		}
+		spin_unlock(&adapter->nfc_lock);
+		adapter->nfc_filter_count = 0;
+	}
+
+	netdev->features = features;
+
+	if (netif_running(netdev))
+		igb_reinit_locked(adapter);
+	else
+		igb_reset(adapter);
+
+	return 1;
+}
+
+static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+			   struct net_device *dev,
+			   const unsigned char *addr, u16 vid,
+			   u16 flags,
+			   struct netlink_ext_ack *extack)
+{
+	/* guarantee we can provide a unique filter for the unicast address */
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
+		struct igb_adapter *adapter = netdev_priv(dev);
+		int vfn = adapter->vfs_allocated_count;
+
+		if (netdev_uc_count(dev) >= igb_available_rars(adapter, vfn))
+			return -ENOMEM;
+	}
+
+	return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
+}
+
+#define IGB_MAX_MAC_HDR_LEN	127
+#define IGB_MAX_NETWORK_HDR_LEN	511
+
+static netdev_features_t
+igb_features_check(struct sk_buff *skb, struct net_device *dev,
+		   netdev_features_t features)
+{
+	unsigned int network_hdr_len, mac_hdr_len;
+
+	/* Make certain the headers can be described by a context descriptor */
+	mac_hdr_len = skb_network_header(skb) - skb->data;
+	if (unlikely(mac_hdr_len > IGB_MAX_MAC_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
+				    NETIF_F_HW_VLAN_CTAG_TX |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
+	if (unlikely(network_hdr_len >  IGB_MAX_NETWORK_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	/* We can only support IPV4 TSO in tunnels if we can mangle the
+	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 */
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
+		features &= ~NETIF_F_TSO;
+
+	return features;
+}
+
+static void igb_offload_apply(struct igb_adapter *adapter, s32 queue)
+{
+	if (!is_fqtss_enabled(adapter)) {
+		enable_fqtss(adapter, true);
+		return;
+	}
+
+	igb_config_tx_modes(adapter, queue);
+
+	if (!is_any_cbs_enabled(adapter) && !is_any_txtime_enabled(adapter))
+		enable_fqtss(adapter, false);
+}
+
+static int igb_offload_cbs(struct igb_adapter *adapter,
+			   struct tc_cbs_qopt_offload *qopt)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	/* CBS offloading is only supported by i210 controller. */
+	if (hw->mac.type != e1000_i210)
+		return -EOPNOTSUPP;
+
+	/* CBS offloading is only supported by queue 0 and queue 1. */
+	if (qopt->queue < 0 || qopt->queue > 1)
+		return -EINVAL;
+
+	err = igb_save_cbs_params(adapter, qopt->queue, qopt->enable,
+				  qopt->idleslope, qopt->sendslope,
+				  qopt->hicredit, qopt->locredit);
+	if (err)
+		return err;
+
+	igb_offload_apply(adapter, qopt->queue);
+
+	return 0;
+}
+
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+#define VLAN_PRIO_FULL_MASK (0x07)
+
+static int igb_parse_cls_flower(struct igb_adapter *adapter,
+				struct flow_cls_offload *f,
+				int traffic_class,
+				struct igb_nfc_filter *input)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct netlink_ext_ack *extack = f->common.extack;
+
+	if (dissector->used_keys &
+	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN))) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Unsupported key used, only BASIC, CONTROL, ETH_ADDRS and VLAN are supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (!is_broadcast_ether_addr(match.mask->dst)) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |=
+				IGB_FILTER_FLAG_DST_MAC_ADDR;
+			ether_addr_copy(input->filter.dst_addr, match.key->dst);
+		}
+
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (!is_broadcast_ether_addr(match.mask->src)) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |=
+				IGB_FILTER_FLAG_SRC_MAC_ADDR;
+			ether_addr_copy(input->filter.src_addr, match.key->src);
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		if (match.mask->n_proto) {
+			if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE;
+			input->filter.etype = match.key->n_proto;
+		}
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_priority) {
+			if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
+				return -EINVAL;
+			}
+
+			input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
+			input->filter.vlan_tci =
+				(__force __be16)match.key->vlan_priority;
+		}
+	}
+
+	input->action = traffic_class;
+	input->cookie = f->cookie;
+
+	return 0;
+}
+
+static int igb_configure_clsflower(struct igb_adapter *adapter,
+				   struct flow_cls_offload *cls_flower)
+{
+	struct netlink_ext_ack *extack = cls_flower->common.extack;
+	struct igb_nfc_filter *filter, *f;
+	int err, tc;
+
+	tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid);
+	if (tc < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid traffic class");
+		return -EINVAL;
+	}
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	err = igb_parse_cls_flower(adapter, cls_flower, tc, filter);
+	if (err < 0)
+		goto err_parse;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(f, &adapter->nfc_filter_list, nfc_node) {
+		if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+			err = -EEXIST;
+			NL_SET_ERR_MSG_MOD(extack,
+					   "This filter is already set in ethtool");
+			goto err_locked;
+		}
+	}
+
+	hlist_for_each_entry(f, &adapter->cls_flower_list, nfc_node) {
+		if (!memcmp(&f->filter, &filter->filter, sizeof(f->filter))) {
+			err = -EEXIST;
+			NL_SET_ERR_MSG_MOD(extack,
+					   "This filter is already set in cls_flower");
+			goto err_locked;
+		}
+	}
+
+	err = igb_add_filter(adapter, filter);
+	if (err < 0) {
+		NL_SET_ERR_MSG_MOD(extack, "Could not add filter to the adapter");
+		goto err_locked;
+	}
+
+	hlist_add_head(&filter->nfc_node, &adapter->cls_flower_list);
+
+	spin_unlock(&adapter->nfc_lock);
+
+	return 0;
+
+err_locked:
+	spin_unlock(&adapter->nfc_lock);
+
+err_parse:
+	kfree(filter);
+
+	return err;
+}
+
+static int igb_delete_clsflower(struct igb_adapter *adapter,
+				struct flow_cls_offload *cls_flower)
+{
+	struct igb_nfc_filter *filter;
+	int err;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(filter, &adapter->cls_flower_list, nfc_node)
+		if (filter->cookie == cls_flower->cookie)
+			break;
+
+	if (!filter) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	err = igb_erase_filter(adapter, filter);
+	if (err < 0)
+		goto out;
+
+	hlist_del(&filter->nfc_node);
+	kfree(filter);
+
+out:
+	spin_unlock(&adapter->nfc_lock);
+
+	return err;
+}
+
+static int igb_setup_tc_cls_flower(struct igb_adapter *adapter,
+				   struct flow_cls_offload *cls_flower)
+{
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return igb_configure_clsflower(adapter, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return igb_delete_clsflower(adapter, cls_flower);
+	case FLOW_CLS_STATS:
+		return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				 void *cb_priv)
+{
+	struct igb_adapter *adapter = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return igb_setup_tc_cls_flower(adapter, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igb_offload_txtime(struct igb_adapter *adapter,
+			      struct tc_etf_qopt_offload *qopt)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	/* Launchtime offloading is only supported by i210 controller. */
+	if (hw->mac.type != e1000_i210)
+		return -EOPNOTSUPP;
+
+	/* Launchtime offloading is only supported by queues 0 and 1. */
+	if (qopt->queue < 0 || qopt->queue > 1)
+		return -EINVAL;
+
+	err = igb_save_txtime_params(adapter, qopt->queue, qopt->enable);
+	if (err)
+		return err;
+
+	igb_offload_apply(adapter, qopt->queue);
+
+	return 0;
+}
+
+static int igb_tc_query_caps(struct igb_adapter *adapter,
+			     struct tc_query_caps_base *base)
+{
+	switch (base->type) {
+	case TC_SETUP_QDISC_TAPRIO: {
+		struct tc_taprio_caps *caps = base->caps;
+
+		caps->broken_mqprio = true;
+
+		return 0;
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(igb_block_cb_list);
+
+static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			void *type_data)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+
+	switch (type) {
+	case TC_QUERY_CAPS:
+		return igb_tc_query_caps(adapter, type_data);
+	case TC_SETUP_QDISC_CBS:
+		return igb_offload_cbs(adapter, type_data);
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple(type_data,
+						  &igb_block_cb_list,
+						  igb_setup_tc_block_cb,
+						  adapter, adapter, true);
+
+	case TC_SETUP_QDISC_ETF:
+		return igb_offload_txtime(adapter, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
+{
+	int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD;
+	struct igb_adapter *adapter = netdev_priv(dev);
+	struct bpf_prog *prog = bpf->prog, *old_prog;
+	bool running = netif_running(dev);
+	bool need_reset;
+
+	/* verify igb ring attributes are sufficient for XDP */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igb_ring *ring = adapter->rx_ring[i];
+
+		if (frame_size > igb_rx_bufsz(ring)) {
+			NL_SET_ERR_MSG_MOD(bpf->extack,
+					   "The RX buffer size is too small for the frame size");
+			netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n",
+				    igb_rx_bufsz(ring), frame_size);
+			return -EINVAL;
+		}
+	}
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+	need_reset = (!!prog != !!old_prog);
+
+	/* device is up and bpf is added/removed, must setup the RX queues */
+	if (need_reset && running) {
+		igb_close(dev);
+	} else {
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			(void)xchg(&adapter->rx_ring[i]->xdp_prog,
+			    adapter->xdp_prog);
+	}
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	/* bpf is just replaced, RXQ and MTU are already setup */
+	if (!need_reset) {
+		return 0;
+	} else {
+		if (prog)
+			xdp_features_set_redirect_target(dev, true);
+		else
+			xdp_features_clear_redirect_target(dev);
+	}
+
+	if (running)
+		igb_open(dev);
+
+	return 0;
+}
+
+static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return igb_xdp_setup(dev, xdp);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void igb_xdp_ring_update_tail(struct igb_ring *ring)
+{
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 */
+	wmb();
+	writel(ring->next_to_use, ring->tail);
+}
+
+static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
+{
+	unsigned int r_idx = smp_processor_id();
+
+	if (r_idx >= adapter->num_tx_queues)
+		r_idx = r_idx % adapter->num_tx_queues;
+
+	return adapter->tx_ring[r_idx];
+}
+
+static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+	int cpu = smp_processor_id();
+	struct igb_ring *tx_ring;
+	struct netdev_queue *nq;
+	u32 ret;
+
+	if (unlikely(!xdpf))
+		return IGB_XDP_CONSUMED;
+
+	/* During program transitions its possible adapter->xdp_prog is assigned
+	 * but ring has not been configured yet. In this case simply abort xmit.
+	 */
+	tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+	if (unlikely(!tx_ring))
+		return IGB_XDP_CONSUMED;
+
+	nq = txring_txq(tx_ring);
+	__netif_tx_lock(nq, cpu);
+	/* Avoid transmit queue timeout since we share it with the slow path */
+	txq_trans_cond_update(nq);
+	ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
+	__netif_tx_unlock(nq);
+
+	return ret;
+}
+
+static int igb_xdp_xmit(struct net_device *dev, int n,
+			struct xdp_frame **frames, u32 flags)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	int cpu = smp_processor_id();
+	struct igb_ring *tx_ring;
+	struct netdev_queue *nq;
+	int nxmit = 0;
+	int i;
+
+	if (unlikely(test_bit(__IGB_DOWN, &adapter->state)))
+		return -ENETDOWN;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	/* During program transitions its possible adapter->xdp_prog is assigned
+	 * but ring has not been configured yet. In this case simply abort xmit.
+	 */
+	tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+	if (unlikely(!tx_ring))
+		return -ENXIO;
+
+	nq = txring_txq(tx_ring);
+	__netif_tx_lock(nq, cpu);
+
+	/* Avoid transmit queue timeout since we share it with the slow path */
+	txq_trans_cond_update(nq);
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = igb_xmit_xdp_ring(adapter, tx_ring, xdpf);
+		if (err != IGB_XDP_TX)
+			break;
+		nxmit++;
+	}
+
+	__netif_tx_unlock(nq);
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		igb_xdp_ring_update_tail(tx_ring);
+
+	return nxmit;
+}
+
+static const struct net_device_ops igb_netdev_ops = {
+	.ndo_open		= igb_open,
+	.ndo_stop		= igb_close,
+	.ndo_start_xmit		= igb_xmit_frame,
+	.ndo_get_stats64	= igb_get_stats64,
+	.ndo_set_rx_mode	= igb_set_rx_mode,
+	.ndo_set_mac_address	= igb_set_mac,
+	.ndo_change_mtu		= igb_change_mtu,
+	.ndo_eth_ioctl		= igb_ioctl,
+	.ndo_tx_timeout		= igb_tx_timeout,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
+	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
+	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
+	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
+	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
+	.ndo_set_vf_trust	= igb_ndo_set_vf_trust,
+	.ndo_get_vf_config	= igb_ndo_get_vf_config,
+	.ndo_fix_features	= igb_fix_features,
+	.ndo_set_features	= igb_set_features,
+	.ndo_fdb_add		= igb_ndo_fdb_add,
+	.ndo_features_check	= igb_features_check,
+	.ndo_setup_tc		= igb_setup_tc,
+	.ndo_bpf		= igb_xdp,
+	.ndo_xdp_xmit		= igb_xdp_xmit,
+};
+
+/**
+ * igb_set_fw_version - Configure version string for ethtool
+ * @adapter: adapter struct
+ **/
+void igb_set_fw_version(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_fw_version fw;
+
+	igb_get_fw_version(hw, &fw);
+
+	switch (hw->mac.type) {
+	case e1000_i210:
+	case e1000_i211:
+		if (!(igb_get_flash_presence_i210(hw))) {
+			snprintf(adapter->fw_version,
+				 sizeof(adapter->fw_version),
+				 "%2d.%2d-%d",
+				 fw.invm_major, fw.invm_minor,
+				 fw.invm_img_type);
+			break;
+		}
+		fallthrough;
+	default:
+		/* if option is rom valid, display its version too */
+		if (fw.or_valid) {
+			snprintf(adapter->fw_version,
+				 sizeof(adapter->fw_version),
+				 "%d.%d, 0x%08x, %d.%d.%d",
+				 fw.eep_major, fw.eep_minor, fw.etrack_id,
+				 fw.or_major, fw.or_build, fw.or_patch);
+		/* no option rom */
+		} else if (fw.etrack_id != 0X0000) {
+			snprintf(adapter->fw_version,
+			    sizeof(adapter->fw_version),
+			    "%d.%d, 0x%08x",
+			    fw.eep_major, fw.eep_minor, fw.etrack_id);
+		} else {
+		snprintf(adapter->fw_version,
+		    sizeof(adapter->fw_version),
+		    "%d.%d.%d",
+		    fw.eep_major, fw.eep_minor, fw.eep_build);
+		}
+		break;
+	}
+}
+
+/**
+ * igb_init_mas - init Media Autosense feature if enabled in the NVM
+ *
+ * @adapter: adapter struct
+ **/
+static void igb_init_mas(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u16 eeprom_data;
+
+	hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data);
+	switch (hw->bus.func) {
+	case E1000_FUNC_0:
+		if (eeprom_data & IGB_MAS_ENABLE_0) {
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+			netdev_info(adapter->netdev,
+				"MAS: Enabling Media Autosense for port %d\n",
+				hw->bus.func);
+		}
+		break;
+	case E1000_FUNC_1:
+		if (eeprom_data & IGB_MAS_ENABLE_1) {
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+			netdev_info(adapter->netdev,
+				"MAS: Enabling Media Autosense for port %d\n",
+				hw->bus.func);
+		}
+		break;
+	case E1000_FUNC_2:
+		if (eeprom_data & IGB_MAS_ENABLE_2) {
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+			netdev_info(adapter->netdev,
+				"MAS: Enabling Media Autosense for port %d\n",
+				hw->bus.func);
+		}
+		break;
+	case E1000_FUNC_3:
+		if (eeprom_data & IGB_MAS_ENABLE_3) {
+			adapter->flags |= IGB_FLAG_MAS_ENABLE;
+			netdev_info(adapter->netdev,
+				"MAS: Enabling Media Autosense for port %d\n",
+				hw->bus.func);
+		}
+		break;
+	default:
+		/* Shouldn't get here */
+		netdev_err(adapter->netdev,
+			"MAS: Invalid port configuration, returning\n");
+		break;
+	}
+}
+
+/**
+ *  igb_init_i2c - Init I2C interface
+ *  @adapter: pointer to adapter structure
+ **/
+static s32 igb_init_i2c(struct igb_adapter *adapter)
+{
+	s32 status = 0;
+
+	/* I2C interface supported on i350 devices */
+	if (adapter->hw.mac.type != e1000_i350)
+		return 0;
+
+	/* Initialize the i2c bus which is controlled by the registers.
+	 * This bus will use the i2c_algo_bit structure that implements
+	 * the protocol through toggling of the 4 bits in the register.
+	 */
+	adapter->i2c_adap.owner = THIS_MODULE;
+	adapter->i2c_algo = igb_i2c_algo;
+	adapter->i2c_algo.data = adapter;
+	adapter->i2c_adap.algo_data = &adapter->i2c_algo;
+	adapter->i2c_adap.dev.parent = &adapter->pdev->dev;
+	strscpy(adapter->i2c_adap.name, "igb BB",
+		sizeof(adapter->i2c_adap.name));
+	status = i2c_bit_add_bus(&adapter->i2c_adap);
+	return status;
+}
+
+/**
+ *  igb_probe - Device Initialization Routine
+ *  @pdev: PCI device information struct
+ *  @ent: entry in igb_pci_tbl
+ *
+ *  Returns 0 on success, negative on failure
+ *
+ *  igb_probe initializes an adapter identified by a pci_dev structure.
+ *  The OS initialization, configuring of the adapter private structure,
+ *  and a hardware reset occur.
+ **/
+static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct igb_adapter *adapter;
+	struct e1000_hw *hw;
+	u16 eeprom_data = 0;
+	s32 ret_val;
+	static int global_quad_port_a; /* global quad port a indication */
+	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
+	u8 part_str[E1000_PBANUM_LENGTH];
+	int err;
+
+	/* Catch broken hardware that put the wrong VF device ID in
+	 * the PCIe SR-IOV capability.
+	 */
+	if (pdev->is_virtfn) {
+		WARN(1, KERN_ERR "%s (%x:%x) should not be a VF!\n",
+			pci_name(pdev), pdev->vendor, pdev->device);
+		return -EINVAL;
+	}
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"No usable DMA configuration, aborting\n");
+		goto err_dma;
+	}
+
+	err = pci_request_mem_regions(pdev, igb_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_set_master(pdev);
+	pci_save_state(pdev);
+
+	err = -ENOMEM;
+	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
+				   IGB_MAX_TX_QUEUES);
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	err = -EIO;
+	adapter->io_addr = pci_iomap(pdev, 0, 0);
+	if (!adapter->io_addr)
+		goto err_ioremap;
+	/* hw->hw_addr can be altered, we'll use adapter->io_addr for unmap */
+	hw->hw_addr = adapter->io_addr;
+
+	netdev->netdev_ops = &igb_netdev_ops;
+	igb_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+	netdev->mem_start = pci_resource_start(pdev, 0);
+	netdev->mem_end = pci_resource_end(pdev, 0);
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->revision_id = pdev->revision;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	/* Copy the default MAC, PHY and NVM function pointers */
+	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
+	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
+	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
+	/* Initialize skew-specific constants */
+	err = ei->get_invariants(hw);
+	if (err)
+		goto err_sw_init;
+
+	/* setup the private structure */
+	err = igb_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	igb_get_bus_info_pcie(hw);
+
+	hw->phy.autoneg_wait_to_complete = false;
+
+	/* Copper options */
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		hw->phy.mdix = AUTO_ALL_MODES;
+		hw->phy.disable_polarity_correction = false;
+		hw->phy.ms_type = e1000_ms_hw_default;
+	}
+
+	if (igb_check_reset_block(hw))
+		dev_info(&pdev->dev,
+			"PHY reset is blocked due to SOL/IDER session.\n");
+
+	/* features is initialized to 0 in allocation, it might have bits
+	 * set by igb_sw_init so we should use an or instead of an
+	 * assignment.
+	 */
+	netdev->features |= NETIF_F_SG |
+			    NETIF_F_TSO |
+			    NETIF_F_TSO6 |
+			    NETIF_F_RXHASH |
+			    NETIF_F_RXCSUM |
+			    NETIF_F_HW_CSUM;
+
+	if (hw->mac.type >= e1000_82576)
+		netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
+
+	if (hw->mac.type >= e1000_i350)
+		netdev->features |= NETIF_F_HW_TC;
+
+#define IGB_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+				  NETIF_F_GSO_GRE_CSUM | \
+				  NETIF_F_GSO_IPXIP4 | \
+				  NETIF_F_GSO_IPXIP6 | \
+				  NETIF_F_GSO_UDP_TUNNEL | \
+				  NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = IGB_GSO_PARTIAL_FEATURES;
+	netdev->features |= NETIF_F_GSO_PARTIAL | IGB_GSO_PARTIAL_FEATURES;
+
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features |
+			       NETIF_F_HW_VLAN_CTAG_RX |
+			       NETIF_F_HW_VLAN_CTAG_TX |
+			       NETIF_F_RXALL;
+
+	if (hw->mac.type >= e1000_i350)
+		netdev->hw_features |= NETIF_F_NTUPLE;
+
+	netdev->features |= NETIF_F_HIGHDMA;
+
+	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
+	netdev->mpls_features |= NETIF_F_HW_CSUM;
+	netdev->hw_enc_features |= netdev->vlan_features;
+
+	/* set this bit last since it cannot be part of vlan_features */
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
+			    NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_TX;
+
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
+
+	/* MTU range: 68 - 9216 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
+	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
+
+	/* before reading the NVM, reset the controller to put the device in a
+	 * known good starting state
+	 */
+	hw->mac.ops.reset_hw(hw);
+
+	/* make sure the NVM is good , i211/i210 parts can have special NVM
+	 * that doesn't contain a checksum
+	 */
+	switch (hw->mac.type) {
+	case e1000_i210:
+	case e1000_i211:
+		if (igb_get_flash_presence_i210(hw)) {
+			if (hw->nvm.ops.validate(hw) < 0) {
+				dev_err(&pdev->dev,
+					"The NVM Checksum Is Not Valid\n");
+				err = -EIO;
+				goto err_eeprom;
+			}
+		}
+		break;
+	default:
+		if (hw->nvm.ops.validate(hw) < 0) {
+			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
+			err = -EIO;
+			goto err_eeprom;
+		}
+		break;
+	}
+
+	if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
+		/* copy the MAC address out of the NVM */
+		if (hw->mac.ops.read_mac_addr(hw))
+			dev_err(&pdev->dev, "NVM Read Error\n");
+	}
+
+	eth_hw_addr_set(netdev, hw->mac.addr);
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		dev_err(&pdev->dev, "Invalid MAC Address\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	igb_set_default_mac_filter(adapter);
+
+	/* get firmware version for ethtool -i */
+	igb_set_fw_version(adapter);
+
+	/* configure RXPBSIZE and TXPBSIZE */
+	if (hw->mac.type == e1000_i210) {
+		wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
+		wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
+	}
+
+	timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
+	timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
+
+	INIT_WORK(&adapter->reset_task, igb_reset_task);
+	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
+
+	/* Initialize link properties that are user-changeable */
+	adapter->fc_autoneg = true;
+	hw->mac.autoneg = true;
+	hw->phy.autoneg_advertised = 0x2f;
+
+	hw->fc.requested_mode = e1000_fc_default;
+	hw->fc.current_mode = e1000_fc_default;
+
+	igb_validate_mdi_setting(hw);
+
+	/* By default, support wake on port A */
+	if (hw->bus.func == 0)
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+	/* Check the NVM for wake support on non-port A ports */
+	if (hw->mac.type >= e1000_82580)
+		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
+				 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
+				 &eeprom_data);
+	else if (hw->bus.func == 1)
+		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
+
+	if (eeprom_data & IGB_EEPROM_APME)
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+
+	/* now that we have the eeprom settings, apply the special cases where
+	 * the eeprom may be wrong or the board simply won't support wake on
+	 * lan on a particular port
+	 */
+	switch (pdev->device) {
+	case E1000_DEV_ID_82575GB_QUAD_COPPER:
+		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	case E1000_DEV_ID_82575EB_FIBER_SERDES:
+	case E1000_DEV_ID_82576_FIBER:
+	case E1000_DEV_ID_82576_SERDES:
+		/* Wake events only supported on port A for dual fiber
+		 * regardless of eeprom setting
+		 */
+		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		break;
+	case E1000_DEV_ID_82576_QUAD_COPPER:
+	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
+		/* if quad port adapter, disable WoL on all but port A */
+		if (global_quad_port_a != 0)
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+		else
+			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
+		/* Reset for multiple quad port adapters */
+		if (++global_quad_port_a == 4)
+			global_quad_port_a = 0;
+		break;
+	default:
+		/* If the device can't wake, don't set software support */
+		if (!device_can_wakeup(&adapter->pdev->dev))
+			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
+	}
+
+	/* initialize the wol settings based on the eeprom settings */
+	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
+		adapter->wol |= E1000_WUFC_MAG;
+
+	/* Some vendors want WoL disabled by default, but still supported */
+	if ((hw->mac.type == e1000_i350) &&
+	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+		adapter->wol = 0;
+	}
+
+	/* Some vendors want the ability to Use the EEPROM setting as
+	 * enable/disable only, and not for capability
+	 */
+	if (((hw->mac.type == e1000_i350) ||
+	     (hw->mac.type == e1000_i354)) &&
+	    (pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)) {
+		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+		adapter->wol = 0;
+	}
+	if (hw->mac.type == e1000_i350) {
+		if (((pdev->subsystem_device == 0x5001) ||
+		     (pdev->subsystem_device == 0x5002)) &&
+				(hw->bus.func == 0)) {
+			adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+			adapter->wol = 0;
+		}
+		if (pdev->subsystem_device == 0x1F52)
+			adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
+	}
+
+	device_set_wakeup_enable(&adapter->pdev->dev,
+				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
+
+	/* reset the hardware with the new settings */
+	igb_reset(adapter);
+
+	/* Init the I2C interface */
+	err = igb_init_i2c(adapter);
+	if (err) {
+		dev_err(&pdev->dev, "failed to init i2c interface\n");
+		goto err_eeprom;
+	}
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igb_get_hw_control(adapter);
+
+	strcpy(netdev->name, "eth%d");
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+#ifdef CONFIG_IGB_DCA
+	if (dca_add_requester(&pdev->dev) == 0) {
+		adapter->flags |= IGB_FLAG_DCA_ENABLED;
+		dev_info(&pdev->dev, "DCA enabled\n");
+		igb_setup_dca(adapter);
+	}
+
+#endif
+#ifdef CONFIG_IGB_HWMON
+	/* Initialize the thermal sensor on i350 devices. */
+	if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
+		u16 ets_word;
+
+		/* Read the NVM to determine if this i350 device supports an
+		 * external thermal sensor.
+		 */
+		hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word);
+		if (ets_word != 0x0000 && ets_word != 0xFFFF)
+			adapter->ets = true;
+		else
+			adapter->ets = false;
+		/* Only enable I2C bit banging if an external thermal
+		 * sensor is supported.
+		 */
+		if (adapter->ets)
+			igb_set_i2c_bb(hw);
+		hw->mac.ops.init_thermal_sensor_thresh(hw);
+		if (igb_sysfs_init(adapter))
+			dev_err(&pdev->dev,
+				"failed to allocate sysfs resources\n");
+	} else {
+		adapter->ets = false;
+	}
+#endif
+	/* Check if Media Autosense is enabled */
+	adapter->ei = *ei;
+	if (hw->dev_spec._82575.mas_capable)
+		igb_init_mas(adapter);
+
+	/* do hw tstamp init after resetting */
+	igb_ptp_init(adapter);
+
+	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
+	/* print bus type/speed/width info, not applicable to i354 */
+	if (hw->mac.type != e1000_i354) {
+		dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
+			 netdev->name,
+			 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
+			  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
+			   "unknown"),
+			 ((hw->bus.width == e1000_bus_width_pcie_x4) ?
+			  "Width x4" :
+			  (hw->bus.width == e1000_bus_width_pcie_x2) ?
+			  "Width x2" :
+			  (hw->bus.width == e1000_bus_width_pcie_x1) ?
+			  "Width x1" : "unknown"), netdev->dev_addr);
+	}
+
+	if ((hw->mac.type == e1000_82576 &&
+	     rd32(E1000_EECD) & E1000_EECD_PRES) ||
+	    (hw->mac.type >= e1000_i210 ||
+	     igb_get_flash_presence_i210(hw))) {
+		ret_val = igb_read_part_string(hw, part_str,
+					       E1000_PBANUM_LENGTH);
+	} else {
+		ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND;
+	}
+
+	if (ret_val)
+		strcpy(part_str, "Unknown");
+	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
+	dev_info(&pdev->dev,
+		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
+		(adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" :
+		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
+		adapter->num_rx_queues, adapter->num_tx_queues);
+	if (hw->phy.media_type == e1000_media_type_copper) {
+		switch (hw->mac.type) {
+		case e1000_i350:
+		case e1000_i210:
+		case e1000_i211:
+			/* Enable EEE for internal copper PHY devices */
+			err = igb_set_eee_i350(hw, true, true);
+			if ((!err) &&
+			    (!hw->dev_spec._82575.eee_disable)) {
+				adapter->eee_advert =
+					MDIO_EEE_100TX | MDIO_EEE_1000T;
+				adapter->flags |= IGB_FLAG_EEE;
+			}
+			break;
+		case e1000_i354:
+			if ((rd32(E1000_CTRL_EXT) &
+			    E1000_CTRL_EXT_LINK_MODE_SGMII)) {
+				err = igb_set_eee_i354(hw, true, true);
+				if ((!err) &&
+					(!hw->dev_spec._82575.eee_disable)) {
+					adapter->eee_advert =
+					   MDIO_EEE_100TX | MDIO_EEE_1000T;
+					adapter->flags |= IGB_FLAG_EEE;
+				}
+			}
+			break;
+		default:
+			break;
+		}
+	}
+
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+
+	pm_runtime_put_noidle(&pdev->dev);
+	return 0;
+
+err_register:
+	igb_release_hw_control(adapter);
+	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
+err_eeprom:
+	if (!igb_check_reset_block(hw))
+		igb_reset_phy(hw);
+
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+err_sw_init:
+	kfree(adapter->mac_table);
+	kfree(adapter->shadow_vfta);
+	igb_clear_interrupt_scheme(adapter);
+#ifdef CONFIG_PCI_IOV
+	igb_disable_sriov(pdev, false);
+#endif
+	pci_iounmap(pdev, adapter->io_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_mem_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+#ifdef CONFIG_PCI_IOV
+static int igb_sriov_reinit(struct pci_dev *dev)
+{
+	struct net_device *netdev = pci_get_drvdata(dev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+
+	rtnl_lock();
+
+	if (netif_running(netdev))
+		igb_close(netdev);
+	else
+		igb_reset(adapter);
+
+	igb_clear_interrupt_scheme(adapter);
+
+	igb_init_queue_configuration(adapter);
+
+	if (igb_init_interrupt_scheme(adapter, true)) {
+		rtnl_unlock();
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	if (netif_running(netdev))
+		igb_open(netdev);
+
+	rtnl_unlock();
+
+	return 0;
+}
+
+static int igb_disable_sriov(struct pci_dev *pdev, bool reinit)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+
+	/* reclaim resources allocated to VFs */
+	if (adapter->vf_data) {
+		/* disable iov and allow time for transactions to clear */
+		if (pci_vfs_assigned(pdev)) {
+			dev_warn(&pdev->dev,
+				 "Cannot deallocate SR-IOV virtual functions while they are assigned - VFs will not be deallocated\n");
+			return -EPERM;
+		} else {
+			pci_disable_sriov(pdev);
+			msleep(500);
+		}
+		spin_lock_irqsave(&adapter->vfs_lock, flags);
+		kfree(adapter->vf_mac_list);
+		adapter->vf_mac_list = NULL;
+		kfree(adapter->vf_data);
+		adapter->vf_data = NULL;
+		adapter->vfs_allocated_count = 0;
+		spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
+		wrfl();
+		msleep(100);
+		dev_info(&pdev->dev, "IOV Disabled\n");
+
+		/* Re-enable DMA Coalescing flag since IOV is turned off */
+		adapter->flags |= IGB_FLAG_DMAC;
+	}
+
+	return reinit ? igb_sriov_reinit(pdev) : 0;
+}
+
+static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs, bool reinit)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int old_vfs = pci_num_vf(pdev);
+	struct vf_mac_filter *mac_list;
+	int err = 0;
+	int num_vf_mac_filters, i;
+
+	if (!(adapter->flags & IGB_FLAG_HAS_MSIX) || num_vfs > 7) {
+		err = -EPERM;
+		goto out;
+	}
+	if (!num_vfs)
+		goto out;
+
+	if (old_vfs) {
+		dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n",
+			 old_vfs, max_vfs);
+		adapter->vfs_allocated_count = old_vfs;
+	} else
+		adapter->vfs_allocated_count = num_vfs;
+
+	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
+				sizeof(struct vf_data_storage), GFP_KERNEL);
+
+	/* if allocation failed then we do not support SR-IOV */
+	if (!adapter->vf_data) {
+		adapter->vfs_allocated_count = 0;
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/* Due to the limited number of RAR entries calculate potential
+	 * number of MAC filters available for the VFs. Reserve entries
+	 * for PF default MAC, PF MAC filters and at least one RAR entry
+	 * for each VF for VF MAC.
+	 */
+	num_vf_mac_filters = adapter->hw.mac.rar_entry_count -
+			     (1 + IGB_PF_MAC_FILTERS_RESERVED +
+			      adapter->vfs_allocated_count);
+
+	adapter->vf_mac_list = kcalloc(num_vf_mac_filters,
+				       sizeof(struct vf_mac_filter),
+				       GFP_KERNEL);
+
+	mac_list = adapter->vf_mac_list;
+	INIT_LIST_HEAD(&adapter->vf_macs.l);
+
+	if (adapter->vf_mac_list) {
+		/* Initialize list of VF MAC filters */
+		for (i = 0; i < num_vf_mac_filters; i++) {
+			mac_list->vf = -1;
+			mac_list->free = true;
+			list_add(&mac_list->l, &adapter->vf_macs.l);
+			mac_list++;
+		}
+	} else {
+		/* If we could not allocate memory for the VF MAC filters
+		 * we can continue without this feature but warn user.
+		 */
+		dev_err(&pdev->dev,
+			"Unable to allocate memory for VF MAC filter list\n");
+	}
+
+	dev_info(&pdev->dev, "%d VFs allocated\n",
+		 adapter->vfs_allocated_count);
+	for (i = 0; i < adapter->vfs_allocated_count; i++)
+		igb_vf_configure(adapter, i);
+
+	/* DMA Coalescing is not supported in IOV mode. */
+	adapter->flags &= ~IGB_FLAG_DMAC;
+
+	if (reinit) {
+		err = igb_sriov_reinit(pdev);
+		if (err)
+			goto err_out;
+	}
+
+	/* only call pci_enable_sriov() if no VFs are allocated already */
+	if (!old_vfs) {
+		err = pci_enable_sriov(pdev, adapter->vfs_allocated_count);
+		if (err)
+			goto err_out;
+	}
+
+	goto out;
+
+err_out:
+	kfree(adapter->vf_mac_list);
+	adapter->vf_mac_list = NULL;
+	kfree(adapter->vf_data);
+	adapter->vf_data = NULL;
+	adapter->vfs_allocated_count = 0;
+out:
+	return err;
+}
+
+#endif
+/**
+ *  igb_remove_i2c - Cleanup  I2C interface
+ *  @adapter: pointer to adapter structure
+ **/
+static void igb_remove_i2c(struct igb_adapter *adapter)
+{
+	/* free the adapter bus structure */
+	i2c_del_adapter(&adapter->i2c_adap);
+}
+
+/**
+ *  igb_remove - Device Removal Routine
+ *  @pdev: PCI device information struct
+ *
+ *  igb_remove is called by the PCI subsystem to alert the driver
+ *  that it should release a PCI device.  The could be caused by a
+ *  Hot-Plug event, or because the driver is going to be removed from
+ *  memory.
+ **/
+static void igb_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	pm_runtime_get_noresume(&pdev->dev);
+#ifdef CONFIG_IGB_HWMON
+	igb_sysfs_exit(adapter);
+#endif
+	igb_remove_i2c(adapter);
+	igb_ptp_stop(adapter);
+	/* The watchdog timer may be rescheduled, so explicitly
+	 * disable watchdog from being rescheduled.
+	 */
+	set_bit(__IGB_DOWN, &adapter->state);
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	cancel_work_sync(&adapter->reset_task);
+	cancel_work_sync(&adapter->watchdog_task);
+
+#ifdef CONFIG_IGB_DCA
+	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
+		dev_info(&pdev->dev, "DCA disabled\n");
+		dca_remove_requester(&pdev->dev);
+		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
+		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
+	}
+#endif
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	igb_release_hw_control(adapter);
+
+#ifdef CONFIG_PCI_IOV
+	igb_disable_sriov(pdev, false);
+#endif
+
+	unregister_netdev(netdev);
+
+	igb_clear_interrupt_scheme(adapter);
+
+	pci_iounmap(pdev, adapter->io_addr);
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+	pci_release_mem_regions(pdev);
+
+	kfree(adapter->mac_table);
+	kfree(adapter->shadow_vfta);
+	free_netdev(netdev);
+
+	pci_disable_device(pdev);
+}
+
+/**
+ *  igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
+ *  @adapter: board private structure to initialize
+ *
+ *  This function initializes the vf specific data storage and then attempts to
+ *  allocate the VFs.  The reason for ordering it this way is because it is much
+ *  mor expensive time wise to disable SR-IOV than it is to allocate and free
+ *  the memory for the VFs.
+ **/
+static void igb_probe_vfs(struct igb_adapter *adapter)
+{
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Virtualization features not supported on i210 and 82580 family. */
+	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
+	    (hw->mac.type == e1000_82580))
+		return;
+
+	/* Of the below we really only want the effect of getting
+	 * IGB_FLAG_HAS_MSIX set (if available), without which
+	 * igb_enable_sriov() has no effect.
+	 */
+	igb_set_interrupt_capability(adapter, true);
+	igb_reset_interrupt_capability(adapter);
+
+	pci_sriov_set_totalvfs(pdev, 7);
+	igb_enable_sriov(pdev, max_vfs, false);
+
+#endif /* CONFIG_PCI_IOV */
+}
+
+unsigned int igb_get_max_rss_queues(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned int max_rss_queues;
+
+	/* Determine the maximum number of RSS queues supported. */
+	switch (hw->mac.type) {
+	case e1000_i211:
+		max_rss_queues = IGB_MAX_RX_QUEUES_I211;
+		break;
+	case e1000_82575:
+	case e1000_i210:
+		max_rss_queues = IGB_MAX_RX_QUEUES_82575;
+		break;
+	case e1000_i350:
+		/* I350 cannot do RSS and SR-IOV at the same time */
+		if (!!adapter->vfs_allocated_count) {
+			max_rss_queues = 1;
+			break;
+		}
+		fallthrough;
+	case e1000_82576:
+		if (!!adapter->vfs_allocated_count) {
+			max_rss_queues = 2;
+			break;
+		}
+		fallthrough;
+	case e1000_82580:
+	case e1000_i354:
+	default:
+		max_rss_queues = IGB_MAX_RX_QUEUES;
+		break;
+	}
+
+	return max_rss_queues;
+}
+
+static void igb_init_queue_configuration(struct igb_adapter *adapter)
+{
+	u32 max_rss_queues;
+
+	max_rss_queues = igb_get_max_rss_queues(adapter);
+	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
+
+	igb_set_flag_queue_pairs(adapter, max_rss_queues);
+}
+
+void igb_set_flag_queue_pairs(struct igb_adapter *adapter,
+			      const u32 max_rss_queues)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Determine if we need to pair queues. */
+	switch (hw->mac.type) {
+	case e1000_82575:
+	case e1000_i211:
+		/* Device supports enough interrupts without queue pairing. */
+		break;
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	default:
+		/* If rss_queues > half of max_rss_queues, pair the queues in
+		 * order to conserve interrupts due to limited supply.
+		 */
+		if (adapter->rss_queues > (max_rss_queues / 2))
+			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
+		else
+			adapter->flags &= ~IGB_FLAG_QUEUE_PAIRS;
+		break;
+	}
+}
+
+/**
+ *  igb_sw_init - Initialize general software structures (struct igb_adapter)
+ *  @adapter: board private structure to initialize
+ *
+ *  igb_sw_init initializes the Adapter private data structure.
+ *  Fields are initialized based on PCI device information and
+ *  OS network device settings (MTU size).
+ **/
+static int igb_sw_init(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IGB_DEFAULT_TXD;
+	adapter->rx_ring_count = IGB_DEFAULT_RXD;
+
+	/* set default ITR values */
+	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
+	adapter->tx_itr_setting = IGB_DEFAULT_ITR;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
+
+	adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD;
+	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+	spin_lock_init(&adapter->nfc_lock);
+	spin_lock_init(&adapter->stats64_lock);
+
+	/* init spinlock to avoid concurrency of VF resources */
+	spin_lock_init(&adapter->vfs_lock);
+#ifdef CONFIG_PCI_IOV
+	switch (hw->mac.type) {
+	case e1000_82576:
+	case e1000_i350:
+		if (max_vfs > 7) {
+			dev_warn(&pdev->dev,
+				 "Maximum of 7 VFs per PF, using max\n");
+			max_vfs = adapter->vfs_allocated_count = 7;
+		} else
+			adapter->vfs_allocated_count = max_vfs;
+		if (adapter->vfs_allocated_count)
+			dev_warn(&pdev->dev,
+				 "Enabling SR-IOV VFs using the module parameter is deprecated - please use the pci sysfs interface.\n");
+		break;
+	default:
+		break;
+	}
+#endif /* CONFIG_PCI_IOV */
+
+	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
+	adapter->flags |= IGB_FLAG_HAS_MSIX;
+
+	adapter->mac_table = kcalloc(hw->mac.rar_entry_count,
+				     sizeof(struct igb_mac_addr),
+				     GFP_KERNEL);
+	if (!adapter->mac_table)
+		return -ENOMEM;
+
+	igb_probe_vfs(adapter);
+
+	igb_init_queue_configuration(adapter);
+
+	/* Setup and initialize a copy of the hw vlan table array */
+	adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
+				       GFP_KERNEL);
+	if (!adapter->shadow_vfta)
+		return -ENOMEM;
+
+	/* This call may decrease the number of queues */
+	if (igb_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	igb_irq_disable(adapter);
+
+	if (hw->mac.type >= e1000_i350)
+		adapter->flags &= ~IGB_FLAG_DMAC;
+
+	set_bit(__IGB_DOWN, &adapter->state);
+	return 0;
+}
+
+/**
+ *  __igb_open - Called when a network interface is made active
+ *  @netdev: network interface device structure
+ *  @resuming: indicates whether we are in a resume call
+ *
+ *  Returns 0 on success, negative value on failure
+ *
+ *  The open entry point is called when a network interface is made
+ *  active by the system (IFF_UP).  At this point all resources needed
+ *  for transmit and receive operations are allocated, the interrupt
+ *  handler is registered with the OS, the watchdog timer is started,
+ *  and the stack is notified that the interface is ready.
+ **/
+static int __igb_open(struct net_device *netdev, bool resuming)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	int err;
+	int i;
+
+	/* disallow open during test */
+	if (test_bit(__IGB_TESTING, &adapter->state)) {
+		WARN_ON(resuming);
+		return -EBUSY;
+	}
+
+	if (!resuming)
+		pm_runtime_get_sync(&pdev->dev);
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = igb_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = igb_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	igb_power_up_link(adapter);
+
+	/* before we allocate an interrupt, we must be ready to handle it.
+	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+	 * as soon as we call pci_request_irq, so we have to setup our
+	 * clean_rx handler before we do so.
+	 */
+	igb_configure(adapter);
+
+	err = igb_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(adapter->netdev,
+					   adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(adapter->netdev,
+					   adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
+	/* From here on the code is the same as igb_up() */
+	clear_bit(__IGB_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&(adapter->q_vector[i]->napi));
+
+	/* Clear any pending interrupts. */
+	rd32(E1000_TSICR);
+	rd32(E1000_ICR);
+
+	igb_irq_enable(adapter);
+
+	/* notify VFs that reset has been completed */
+	if (adapter->vfs_allocated_count) {
+		u32 reg_data = rd32(E1000_CTRL_EXT);
+
+		reg_data |= E1000_CTRL_EXT_PFRSTD;
+		wr32(E1000_CTRL_EXT, reg_data);
+	}
+
+	netif_tx_start_all_queues(netdev);
+
+	if (!resuming)
+		pm_runtime_put(&pdev->dev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = 1;
+	schedule_work(&adapter->watchdog_task);
+
+	return 0;
+
+err_set_queues:
+	igb_free_irq(adapter);
+err_req_irq:
+	igb_release_hw_control(adapter);
+	igb_power_down_link(adapter);
+	igb_free_all_rx_resources(adapter);
+err_setup_rx:
+	igb_free_all_tx_resources(adapter);
+err_setup_tx:
+	igb_reset(adapter);
+	if (!resuming)
+		pm_runtime_put(&pdev->dev);
+
+	return err;
+}
+
+int igb_open(struct net_device *netdev)
+{
+	return __igb_open(netdev, false);
+}
+
+/**
+ *  __igb_close - Disables a network interface
+ *  @netdev: network interface device structure
+ *  @suspending: indicates we are in a suspend call
+ *
+ *  Returns 0, this is not allowed to fail
+ *
+ *  The close entry point is called when an interface is de-activated
+ *  by the OS.  The hardware is still under the driver's control, but
+ *  needs to be disabled.  A global MAC reset is issued to stop the
+ *  hardware, and all transmit and receive resources are freed.
+ **/
+static int __igb_close(struct net_device *netdev, bool suspending)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+
+	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
+
+	if (!suspending)
+		pm_runtime_get_sync(&pdev->dev);
+
+	igb_down(adapter);
+	igb_free_irq(adapter);
+
+	igb_free_all_tx_resources(adapter);
+	igb_free_all_rx_resources(adapter);
+
+	if (!suspending)
+		pm_runtime_put_sync(&pdev->dev);
+	return 0;
+}
+
+int igb_close(struct net_device *netdev)
+{
+	if (netif_device_present(netdev) || netdev->dismantle)
+		return __igb_close(netdev, false);
+	return 0;
+}
+
+/**
+ *  igb_setup_tx_resources - allocate Tx resources (Descriptors)
+ *  @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ *  Return 0 on success, negative on failure
+ **/
+int igb_setup_tx_resources(struct igb_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int size;
+
+	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
+
+	tx_ring->tx_buffer_info = vmalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc)
+		goto err;
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ *  igb_setup_all_tx_resources - wrapper to allocate Tx resources
+ *				 (Descriptors) for all queues
+ *  @adapter: board private structure
+ *
+ *  Return 0 on success, negative on failure
+ **/
+static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = igb_setup_tx_resources(adapter->tx_ring[i]);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Allocation for Tx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igb_free_tx_resources(adapter->tx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ *  igb_setup_tctl - configure the transmit control registers
+ *  @adapter: Board private structure
+ **/
+void igb_setup_tctl(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tctl;
+
+	/* disable queue 0 which is enabled by default on 82575 and 82576 */
+	wr32(E1000_TXDCTL(0), 0);
+
+	/* Program the Transmit Control Register */
+	tctl = rd32(E1000_TCTL);
+	tctl &= ~E1000_TCTL_CT;
+	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
+		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
+
+	igb_config_collision_dist(hw);
+
+	/* Enable transmits */
+	tctl |= E1000_TCTL_EN;
+
+	wr32(E1000_TCTL, tctl);
+}
+
+/**
+ *  igb_configure_tx_ring - Configure transmit ring after Reset
+ *  @adapter: board private structure
+ *  @ring: tx ring to configure
+ *
+ *  Configure a transmit ring after a reset.
+ **/
+void igb_configure_tx_ring(struct igb_adapter *adapter,
+			   struct igb_ring *ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 txdctl = 0;
+	u64 tdba = ring->dma;
+	int reg_idx = ring->reg_idx;
+
+	wr32(E1000_TDLEN(reg_idx),
+	     ring->count * sizeof(union e1000_adv_tx_desc));
+	wr32(E1000_TDBAL(reg_idx),
+	     tdba & 0x00000000ffffffffULL);
+	wr32(E1000_TDBAH(reg_idx), tdba >> 32);
+
+	ring->tail = adapter->io_addr + E1000_TDT(reg_idx);
+	wr32(E1000_TDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	txdctl |= IGB_TX_PTHRESH;
+	txdctl |= IGB_TX_HTHRESH << 8;
+	txdctl |= IGB_TX_WTHRESH << 16;
+
+	/* reinitialize tx_buffer_info */
+	memset(ring->tx_buffer_info, 0,
+	       sizeof(struct igb_tx_buffer) * ring->count);
+
+	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
+	wr32(E1000_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ *  igb_configure_tx - Configure transmit Unit after Reset
+ *  @adapter: board private structure
+ *
+ *  Configure the Tx unit of the MAC after a reset.
+ **/
+static void igb_configure_tx(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	/* disable the queues */
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		wr32(E1000_TXDCTL(adapter->tx_ring[i]->reg_idx), 0);
+
+	wrfl();
+	usleep_range(10000, 20000);
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
+}
+
+/**
+ *  igb_setup_rx_resources - allocate Rx resources (Descriptors)
+ *  @rx_ring: Rx descriptor ring (for a specific queue) to setup
+ *
+ *  Returns 0 on success, negative on failure
+ **/
+int igb_setup_rx_resources(struct igb_ring *rx_ring)
+{
+	struct igb_adapter *adapter = netdev_priv(rx_ring->netdev);
+	struct device *dev = rx_ring->dev;
+	int size, res;
+
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+			       rx_ring->queue_index, 0);
+	if (res < 0) {
+		dev_err(dev, "Failed to register xdp_rxq index %u\n",
+			rx_ring->queue_index);
+		return res;
+	}
+
+	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
+
+	rx_ring->rx_buffer_info = vmalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->desc)
+		goto err;
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	rx_ring->xdp_prog = adapter->xdp_prog;
+
+	return 0;
+
+err:
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ *  igb_setup_all_rx_resources - wrapper to allocate Rx resources
+ *				 (Descriptors) for all queues
+ *  @adapter: board private structure
+ *
+ *  Return 0 on success, negative on failure
+ **/
+static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = igb_setup_rx_resources(adapter->rx_ring[i]);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Allocation for Rx Queue %u failed\n", i);
+			for (i--; i >= 0; i--)
+				igb_free_rx_resources(adapter->rx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+/**
+ *  igb_setup_mrqc - configure the multiple receive queue control registers
+ *  @adapter: Board private structure
+ **/
+static void igb_setup_mrqc(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 mrqc, rxcsum;
+	u32 j, num_rx_queues;
+	u32 rss_key[10];
+
+	netdev_rss_key_fill(rss_key, sizeof(rss_key));
+	for (j = 0; j < 10; j++)
+		wr32(E1000_RSSRK(j), rss_key[j]);
+
+	num_rx_queues = adapter->rss_queues;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		/* 82576 supports 2 RSS queues for SR-IOV */
+		if (adapter->vfs_allocated_count)
+			num_rx_queues = 2;
+		break;
+	default:
+		break;
+	}
+
+	if (adapter->rss_indir_tbl_init != num_rx_queues) {
+		for (j = 0; j < IGB_RETA_SIZE; j++)
+			adapter->rss_indir_tbl[j] =
+			(j * num_rx_queues) / IGB_RETA_SIZE;
+		adapter->rss_indir_tbl_init = num_rx_queues;
+	}
+	igb_write_rss_indir_tbl(adapter);
+
+	/* Disable raw packet checksumming so that RSS hash is placed in
+	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
+	 * offloads as they are enabled by default
+	 */
+	rxcsum = rd32(E1000_RXCSUM);
+	rxcsum |= E1000_RXCSUM_PCSD;
+
+	if (adapter->hw.mac.type >= e1000_82576)
+		/* Enable Receive Checksum Offload for SCTP */
+		rxcsum |= E1000_RXCSUM_CRCOFL;
+
+	/* Don't need to set TUOFL or IPOFL, they default to 1 */
+	wr32(E1000_RXCSUM, rxcsum);
+
+	/* Generate RSS hash based on packet types, TCP/UDP
+	 * port numbers and/or IPv4/v6 src and dst addresses
+	 */
+	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
+	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
+	       E1000_MRQC_RSS_FIELD_IPV6 |
+	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
+	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
+
+	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
+		mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
+	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
+		mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
+
+	/* If VMDq is enabled then we set the appropriate mode for that, else
+	 * we default to RSS so that an RSS hash is calculated per packet even
+	 * if we are only using one queue
+	 */
+	if (adapter->vfs_allocated_count) {
+		if (hw->mac.type > e1000_82575) {
+			/* Set the default pool for the PF's first queue */
+			u32 vtctl = rd32(E1000_VT_CTL);
+
+			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
+				   E1000_VT_CTL_DISABLE_DEF_POOL);
+			vtctl |= adapter->vfs_allocated_count <<
+				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
+			wr32(E1000_VT_CTL, vtctl);
+		}
+		if (adapter->rss_queues > 1)
+			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_MQ;
+		else
+			mrqc |= E1000_MRQC_ENABLE_VMDQ;
+	} else {
+		mrqc |= E1000_MRQC_ENABLE_RSS_MQ;
+	}
+	igb_vmm_control(adapter);
+
+	wr32(E1000_MRQC, mrqc);
+}
+
+/**
+ *  igb_setup_rctl - configure the receive control registers
+ *  @adapter: Board private structure
+ **/
+void igb_setup_rctl(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	rctl = rd32(E1000_RCTL);
+
+	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
+	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
+
+	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
+		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
+
+	/* enable stripping of CRC. It's unlikely this will break BMC
+	 * redirection as it did with e1000. Newer features require
+	 * that the HW strips the CRC.
+	 */
+	rctl |= E1000_RCTL_SECRC;
+
+	/* disable store bad packets and clear size bits. */
+	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
+
+	/* enable LPE to allow for reception of jumbo frames */
+	rctl |= E1000_RCTL_LPE;
+
+	/* disable queue 0 to prevent tail write w/o re-config */
+	wr32(E1000_RXDCTL(0), 0);
+
+	/* Attention!!!  For SR-IOV PF driver operations you must enable
+	 * queue drop for all VF and PF queues to prevent head of line blocking
+	 * if an un-trusted VF does not provide descriptors to hardware.
+	 */
+	if (adapter->vfs_allocated_count) {
+		/* set all queue drop enable bits */
+		wr32(E1000_QDE, ALL_QUEUES);
+	}
+
+	/* This is useful for sniffing bad packets. */
+	if (adapter->netdev->features & NETIF_F_RXALL) {
+		/* UPE and MPE will be handled by normal PROMISC logic
+		 * in e1000e_set_rx_mode
+		 */
+		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
+			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
+			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
+
+		rctl &= ~(E1000_RCTL_DPF | /* Allow filtered pause */
+			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
+		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
+		 * and that breaks VLANs.
+		 */
+	}
+
+	wr32(E1000_RCTL, rctl);
+}
+
+static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
+				   int vfn)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vmolr;
+
+	if (size > MAX_JUMBO_FRAME_SIZE)
+		size = MAX_JUMBO_FRAME_SIZE;
+
+	vmolr = rd32(E1000_VMOLR(vfn));
+	vmolr &= ~E1000_VMOLR_RLPML_MASK;
+	vmolr |= size | E1000_VMOLR_LPE;
+	wr32(E1000_VMOLR(vfn), vmolr);
+
+	return 0;
+}
+
+static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter,
+					 int vfn, bool enable)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 val, reg;
+
+	if (hw->mac.type < e1000_82576)
+		return;
+
+	if (hw->mac.type == e1000_i350)
+		reg = E1000_DVMOLR(vfn);
+	else
+		reg = E1000_VMOLR(vfn);
+
+	val = rd32(reg);
+	if (enable)
+		val |= E1000_VMOLR_STRVLAN;
+	else
+		val &= ~(E1000_VMOLR_STRVLAN);
+	wr32(reg, val);
+}
+
+static inline void igb_set_vmolr(struct igb_adapter *adapter,
+				 int vfn, bool aupe)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vmolr;
+
+	/* This register exists only on 82576 and newer so if we are older then
+	 * we should exit and do nothing
+	 */
+	if (hw->mac.type < e1000_82576)
+		return;
+
+	vmolr = rd32(E1000_VMOLR(vfn));
+	if (aupe)
+		vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
+	else
+		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
+
+	/* clear all bits that might not be set */
+	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
+
+	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
+		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
+	/* for VMDq only allow the VFs and pool 0 to accept broadcast and
+	 * multicast packets
+	 */
+	if (vfn <= adapter->vfs_allocated_count)
+		vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
+
+	wr32(E1000_VMOLR(vfn), vmolr);
+}
+
+/**
+ *  igb_setup_srrctl - configure the split and replication receive control
+ *                     registers
+ *  @adapter: Board private structure
+ *  @ring: receive ring to be configured
+ **/
+void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int reg_idx = ring->reg_idx;
+	u32 srrctl = 0;
+
+	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+	if (hw->mac.type >= e1000_82580)
+		srrctl |= E1000_SRRCTL_TIMESTAMP;
+	/* Only set Drop Enable if VFs allocated, or we are supporting multiple
+	 * queues and rx flow control is disabled
+	 */
+	if (adapter->vfs_allocated_count ||
+	    (!(hw->fc.current_mode & e1000_fc_rx_pause) &&
+	     adapter->num_rx_queues > 1))
+		srrctl |= E1000_SRRCTL_DROP_EN;
+
+	wr32(E1000_SRRCTL(reg_idx), srrctl);
+}
+
+/**
+ *  igb_configure_rx_ring - Configure a receive ring after Reset
+ *  @adapter: board private structure
+ *  @ring: receive ring to be configured
+ *
+ *  Configure the Rx unit of the MAC after a reset.
+ **/
+void igb_configure_rx_ring(struct igb_adapter *adapter,
+			   struct igb_ring *ring)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	union e1000_adv_rx_desc *rx_desc;
+	u64 rdba = ring->dma;
+	int reg_idx = ring->reg_idx;
+	u32 rxdctl = 0;
+
+	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+					   MEM_TYPE_PAGE_SHARED, NULL));
+
+	/* disable the queue */
+	wr32(E1000_RXDCTL(reg_idx), 0);
+
+	/* Set DMA base address registers */
+	wr32(E1000_RDBAL(reg_idx),
+	     rdba & 0x00000000ffffffffULL);
+	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
+	wr32(E1000_RDLEN(reg_idx),
+	     ring->count * sizeof(union e1000_adv_rx_desc));
+
+	/* initialize head and tail */
+	ring->tail = adapter->io_addr + E1000_RDT(reg_idx);
+	wr32(E1000_RDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	/* set descriptor configuration */
+	igb_setup_srrctl(adapter, ring);
+
+	/* set filtering for VMDQ pools */
+	igb_set_vmolr(adapter, reg_idx & 0x7, true);
+
+	rxdctl |= IGB_RX_PTHRESH;
+	rxdctl |= IGB_RX_HTHRESH << 8;
+	rxdctl |= IGB_RX_WTHRESH << 16;
+
+	/* initialize rx_buffer_info */
+	memset(ring->rx_buffer_info, 0,
+	       sizeof(struct igb_rx_buffer) * ring->count);
+
+	/* initialize Rx descriptor 0 */
+	rx_desc = IGB_RX_DESC(ring, 0);
+	rx_desc->wb.upper.length = 0;
+
+	/* enable receive descriptor fetching */
+	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
+	wr32(E1000_RXDCTL(reg_idx), rxdctl);
+}
+
+static void igb_set_rx_buffer_len(struct igb_adapter *adapter,
+				  struct igb_ring *rx_ring)
+{
+#if (PAGE_SIZE < 8192)
+	struct e1000_hw *hw = &adapter->hw;
+#endif
+
+	/* set build_skb and buffer size flags */
+	clear_ring_build_skb_enabled(rx_ring);
+	clear_ring_uses_large_buffer(rx_ring);
+
+	if (adapter->flags & IGB_FLAG_RX_LEGACY)
+		return;
+
+	set_ring_build_skb_enabled(rx_ring);
+
+#if (PAGE_SIZE < 8192)
+	if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB ||
+	    rd32(E1000_RCTL) & E1000_RCTL_SBP)
+		set_ring_uses_large_buffer(rx_ring);
+#endif
+}
+
+/**
+ *  igb_configure_rx - Configure receive Unit after Reset
+ *  @adapter: board private structure
+ *
+ *  Configure the Rx unit of the MAC after a reset.
+ **/
+static void igb_configure_rx(struct igb_adapter *adapter)
+{
+	int i;
+
+	/* set the correct pool for the PF default MAC address in entry 0 */
+	igb_set_default_mac_filter(adapter);
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igb_ring *rx_ring = adapter->rx_ring[i];
+
+		igb_set_rx_buffer_len(adapter, rx_ring);
+		igb_configure_rx_ring(adapter, rx_ring);
+	}
+}
+
+/**
+ *  igb_free_tx_resources - Free Tx Resources per Queue
+ *  @tx_ring: Tx descriptor ring for a specific queue
+ *
+ *  Free all transmit software resources
+ **/
+void igb_free_tx_resources(struct igb_ring *tx_ring)
+{
+	igb_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ *  igb_free_all_tx_resources - Free Tx Resources for All Queues
+ *  @adapter: board private structure
+ *
+ *  Free all transmit software resources
+ **/
+static void igb_free_all_tx_resources(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i])
+			igb_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ *  igb_clean_tx_ring - Free Tx Buffers
+ *  @tx_ring: ring to be cleaned
+ **/
+static void igb_clean_tx_ring(struct igb_ring *tx_ring)
+{
+	u16 i = tx_ring->next_to_clean;
+	struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	while (i != tx_ring->next_to_use) {
+		union e1000_adv_tx_desc *eop_desc, *tx_desc;
+
+		/* Free all the Tx ring sk_buffs or xdp frames */
+		if (tx_buffer->type == IGB_TYPE_SKB)
+			dev_kfree_skb_any(tx_buffer->skb);
+		else
+			xdp_return_frame(tx_buffer->xdpf);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* check for eop_desc to determine the end of the packet */
+		eop_desc = tx_buffer->next_to_watch;
+		tx_desc = IGB_TX_DESC(tx_ring, i);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGB_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+		}
+
+		tx_buffer->next_to_watch = NULL;
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		i++;
+		if (unlikely(i == tx_ring->count)) {
+			i = 0;
+			tx_buffer = tx_ring->tx_buffer_info;
+		}
+	}
+
+	/* reset BQL for queue */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	/* reset next_to_use and next_to_clean */
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ *  igb_clean_all_tx_rings - Free Tx Buffers for all queues
+ *  @adapter: board private structure
+ **/
+static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i])
+			igb_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+/**
+ *  igb_free_rx_resources - Free Rx Resources
+ *  @rx_ring: ring to clean the resources from
+ *
+ *  Free all receive software resources
+ **/
+void igb_free_rx_resources(struct igb_ring *rx_ring)
+{
+	igb_clean_rx_ring(rx_ring);
+
+	rx_ring->xdp_prog = NULL;
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ *  igb_free_all_rx_resources - Free Rx Resources for All Queues
+ *  @adapter: board private structure
+ *
+ *  Free all receive software resources
+ **/
+static void igb_free_all_rx_resources(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i])
+			igb_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ *  igb_clean_rx_ring - Free Rx Buffers per Queue
+ *  @rx_ring: ring to free buffers from
+ **/
+static void igb_clean_rx_ring(struct igb_ring *rx_ring)
+{
+	u16 i = rx_ring->next_to_clean;
+
+	dev_kfree_skb(rx_ring->skb);
+	rx_ring->skb = NULL;
+
+	/* Free all the Rx ring sk_buffs */
+	while (i != rx_ring->next_to_alloc) {
+		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      buffer_info->dma,
+					      buffer_info->page_offset,
+					      igb_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev,
+				     buffer_info->dma,
+				     igb_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IGB_RX_DMA_ATTR);
+		__page_frag_cache_drain(buffer_info->page,
+					buffer_info->pagecnt_bias);
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+	}
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ *  igb_clean_all_rx_rings - Free Rx Buffers for all queues
+ *  @adapter: board private structure
+ **/
+static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i])
+			igb_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ *  igb_set_mac - Change the Ethernet Address of the NIC
+ *  @netdev: network interface device structure
+ *  @p: pointer to an address structure
+ *
+ *  Returns 0 on success, negative on failure
+ **/
+static int igb_set_mac(struct net_device *netdev, void *p)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	eth_hw_addr_set(netdev, addr->sa_data);
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+	/* set the correct pool for the new PF MAC address in entry 0 */
+	igb_set_default_mac_filter(adapter);
+
+	return 0;
+}
+
+/**
+ *  igb_write_mc_addr_list - write multicast addresses to MTA
+ *  @netdev: network interface device structure
+ *
+ *  Writes multicast address list to the MTA hash table.
+ *  Returns: -ENOMEM on failure
+ *           0 on no addresses written
+ *           X on writing X addresses to MTA
+ **/
+static int igb_write_mc_addr_list(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct netdev_hw_addr *ha;
+	u8  *mta_list;
+	int i;
+
+	if (netdev_mc_empty(netdev)) {
+		/* nothing to program, so clear mc list */
+		igb_update_mc_addr_list(hw, NULL, 0);
+		igb_restore_vf_multicasts(adapter);
+		return 0;
+	}
+
+	mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
+	if (!mta_list)
+		return -ENOMEM;
+
+	/* The shared function expects a packed array of only addresses. */
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+
+	igb_update_mc_addr_list(hw, mta_list, i);
+	kfree(mta_list);
+
+	return netdev_mc_count(netdev);
+}
+
+static int igb_vlan_promisc_enable(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 i, pf_id;
+
+	switch (hw->mac.type) {
+	case e1000_i210:
+	case e1000_i211:
+	case e1000_i350:
+		/* VLAN filtering needed for VLAN prio filter */
+		if (adapter->netdev->features & NETIF_F_NTUPLE)
+			break;
+		fallthrough;
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i354:
+		/* VLAN filtering needed for pool filtering */
+		if (adapter->vfs_allocated_count)
+			break;
+		fallthrough;
+	default:
+		return 1;
+	}
+
+	/* We are already in VLAN promisc, nothing to do */
+	if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
+		return 0;
+
+	if (!adapter->vfs_allocated_count)
+		goto set_vfta;
+
+	/* Add PF to all active pools */
+	pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
+
+	for (i = E1000_VLVF_ARRAY_SIZE; --i;) {
+		u32 vlvf = rd32(E1000_VLVF(i));
+
+		vlvf |= BIT(pf_id);
+		wr32(E1000_VLVF(i), vlvf);
+	}
+
+set_vfta:
+	/* Set all bits in the VLAN filter table array */
+	for (i = E1000_VLAN_FILTER_TBL_SIZE; i--;)
+		hw->mac.ops.write_vfta(hw, i, ~0U);
+
+	/* Set flag so we don't redo unnecessary work */
+	adapter->flags |= IGB_FLAG_VLAN_PROMISC;
+
+	return 0;
+}
+
+#define VFTA_BLOCK_SIZE 8
+static void igb_scrub_vfta(struct igb_adapter *adapter, u32 vfta_offset)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vfta[VFTA_BLOCK_SIZE] = { 0 };
+	u32 vid_start = vfta_offset * 32;
+	u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32);
+	u32 i, vid, word, bits, pf_id;
+
+	/* guarantee that we don't scrub out management VLAN */
+	vid = adapter->mng_vlan_id;
+	if (vid >= vid_start && vid < vid_end)
+		vfta[(vid - vid_start) / 32] |= BIT(vid % 32);
+
+	if (!adapter->vfs_allocated_count)
+		goto set_vfta;
+
+	pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
+
+	for (i = E1000_VLVF_ARRAY_SIZE; --i;) {
+		u32 vlvf = rd32(E1000_VLVF(i));
+
+		/* pull VLAN ID from VLVF */
+		vid = vlvf & VLAN_VID_MASK;
+
+		/* only concern ourselves with a certain range */
+		if (vid < vid_start || vid >= vid_end)
+			continue;
+
+		if (vlvf & E1000_VLVF_VLANID_ENABLE) {
+			/* record VLAN ID in VFTA */
+			vfta[(vid - vid_start) / 32] |= BIT(vid % 32);
+
+			/* if PF is part of this then continue */
+			if (test_bit(vid, adapter->active_vlans))
+				continue;
+		}
+
+		/* remove PF from the pool */
+		bits = ~BIT(pf_id);
+		bits &= rd32(E1000_VLVF(i));
+		wr32(E1000_VLVF(i), bits);
+	}
+
+set_vfta:
+	/* extract values from active_vlans and write back to VFTA */
+	for (i = VFTA_BLOCK_SIZE; i--;) {
+		vid = (vfta_offset + i) * 32;
+		word = vid / BITS_PER_LONG;
+		bits = vid % BITS_PER_LONG;
+
+		vfta[i] |= adapter->active_vlans[word] >> bits;
+
+		hw->mac.ops.write_vfta(hw, vfta_offset + i, vfta[i]);
+	}
+}
+
+static void igb_vlan_promisc_disable(struct igb_adapter *adapter)
+{
+	u32 i;
+
+	/* We are not in VLAN promisc, nothing to do */
+	if (!(adapter->flags & IGB_FLAG_VLAN_PROMISC))
+		return;
+
+	/* Set flag so we don't redo unnecessary work */
+	adapter->flags &= ~IGB_FLAG_VLAN_PROMISC;
+
+	for (i = 0; i < E1000_VLAN_FILTER_TBL_SIZE; i += VFTA_BLOCK_SIZE)
+		igb_scrub_vfta(adapter, i);
+}
+
+/**
+ *  igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ *  @netdev: network interface device structure
+ *
+ *  The set_rx_mode entry point is called whenever the unicast or multicast
+ *  address lists or the network interface flags are updated.  This routine is
+ *  responsible for configuring the hardware for proper unicast, multicast,
+ *  promiscuous mode, and all-multi behavior.
+ **/
+static void igb_set_rx_mode(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned int vfn = adapter->vfs_allocated_count;
+	u32 rctl = 0, vmolr = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	if (netdev->flags & IFF_PROMISC) {
+		rctl |= E1000_RCTL_UPE | E1000_RCTL_MPE;
+		vmolr |= E1000_VMOLR_MPME;
+
+		/* enable use of UTA filter to force packets to default pool */
+		if (hw->mac.type == e1000_82576)
+			vmolr |= E1000_VMOLR_ROPE;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI) {
+			rctl |= E1000_RCTL_MPE;
+			vmolr |= E1000_VMOLR_MPME;
+		} else {
+			/* Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			count = igb_write_mc_addr_list(netdev);
+			if (count < 0) {
+				rctl |= E1000_RCTL_MPE;
+				vmolr |= E1000_VMOLR_MPME;
+			} else if (count) {
+				vmolr |= E1000_VMOLR_ROMPE;
+			}
+		}
+	}
+
+	/* Write addresses to available RAR registers, if there is not
+	 * sufficient space to store all the addresses then enable
+	 * unicast promiscuous mode
+	 */
+	if (__dev_uc_sync(netdev, igb_uc_sync, igb_uc_unsync)) {
+		rctl |= E1000_RCTL_UPE;
+		vmolr |= E1000_VMOLR_ROPE;
+	}
+
+	/* enable VLAN filtering by default */
+	rctl |= E1000_RCTL_VFE;
+
+	/* disable VLAN filtering for modes that require it */
+	if ((netdev->flags & IFF_PROMISC) ||
+	    (netdev->features & NETIF_F_RXALL)) {
+		/* if we fail to set all rules then just clear VFE */
+		if (igb_vlan_promisc_enable(adapter))
+			rctl &= ~E1000_RCTL_VFE;
+	} else {
+		igb_vlan_promisc_disable(adapter);
+	}
+
+	/* update state of unicast, multicast, and VLAN filtering modes */
+	rctl |= rd32(E1000_RCTL) & ~(E1000_RCTL_UPE | E1000_RCTL_MPE |
+				     E1000_RCTL_VFE);
+	wr32(E1000_RCTL, rctl);
+
+#if (PAGE_SIZE < 8192)
+	if (!adapter->vfs_allocated_count) {
+		if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+			rlpml = IGB_MAX_FRAME_BUILD_SKB;
+	}
+#endif
+	wr32(E1000_RLPML, rlpml);
+
+	/* In order to support SR-IOV and eventually VMDq it is necessary to set
+	 * the VMOLR to enable the appropriate modes.  Without this workaround
+	 * we will have issues with VLAN tag stripping not being done for frames
+	 * that are only arriving because we are the default pool
+	 */
+	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
+		return;
+
+	/* set UTA to appropriate mode */
+	igb_set_uta(adapter, !!(vmolr & E1000_VMOLR_ROPE));
+
+	vmolr |= rd32(E1000_VMOLR(vfn)) &
+		 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
+
+	/* enable Rx jumbo frames, restrict as needed to support build_skb */
+	vmolr &= ~E1000_VMOLR_RLPML_MASK;
+#if (PAGE_SIZE < 8192)
+	if (adapter->max_frame_size <= IGB_MAX_FRAME_BUILD_SKB)
+		vmolr |= IGB_MAX_FRAME_BUILD_SKB;
+	else
+#endif
+		vmolr |= MAX_JUMBO_FRAME_SIZE;
+	vmolr |= E1000_VMOLR_LPE;
+
+	wr32(E1000_VMOLR(vfn), vmolr);
+
+	igb_restore_vf_multicasts(adapter);
+}
+
+static void igb_check_wvbr(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 wvbr = 0;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+	case e1000_i350:
+		wvbr = rd32(E1000_WVBR);
+		if (!wvbr)
+			return;
+		break;
+	default:
+		break;
+	}
+
+	adapter->wvbr |= wvbr;
+}
+
+#define IGB_STAGGERED_QUEUE_OFFSET 8
+
+static void igb_spoof_check(struct igb_adapter *adapter)
+{
+	int j;
+
+	if (!adapter->wvbr)
+		return;
+
+	for (j = 0; j < adapter->vfs_allocated_count; j++) {
+		if (adapter->wvbr & BIT(j) ||
+		    adapter->wvbr & BIT(j + IGB_STAGGERED_QUEUE_OFFSET)) {
+			dev_warn(&adapter->pdev->dev,
+				"Spoof event(s) detected on VF %d\n", j);
+			adapter->wvbr &=
+				~(BIT(j) |
+				  BIT(j + IGB_STAGGERED_QUEUE_OFFSET));
+		}
+	}
+}
+
+/* Need to wait a few seconds after link up to get diagnostic information from
+ * the phy
+ */
+static void igb_update_phy_info(struct timer_list *t)
+{
+	struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
+	igb_get_phy_info(&adapter->hw);
+}
+
+/**
+ *  igb_has_link - check shared code for link and determine up/down
+ *  @adapter: pointer to driver private info
+ **/
+bool igb_has_link(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	bool link_active = false;
+
+	/* get_link_status is set on LSC (link status) interrupt or
+	 * rx sequence error interrupt.  get_link_status will stay
+	 * false until the e1000_check_for_link establishes link
+	 * for copper adapters ONLY
+	 */
+	switch (hw->phy.media_type) {
+	case e1000_media_type_copper:
+		if (!hw->mac.get_link_status)
+			return true;
+		fallthrough;
+	case e1000_media_type_internal_serdes:
+		hw->mac.ops.check_for_link(hw);
+		link_active = !hw->mac.get_link_status;
+		break;
+	default:
+	case e1000_media_type_unknown:
+		break;
+	}
+
+	if (((hw->mac.type == e1000_i210) ||
+	     (hw->mac.type == e1000_i211)) &&
+	     (hw->phy.id == I210_I_PHY_ID)) {
+		if (!netif_carrier_ok(adapter->netdev)) {
+			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+		} else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
+			adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
+			adapter->link_check_timeout = jiffies;
+		}
+	}
+
+	return link_active;
+}
+
+static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
+{
+	bool ret = false;
+	u32 ctrl_ext, thstat;
+
+	/* check for thermal sensor event on i350 copper only */
+	if (hw->mac.type == e1000_i350) {
+		thstat = rd32(E1000_THSTAT);
+		ctrl_ext = rd32(E1000_CTRL_EXT);
+
+		if ((hw->phy.media_type == e1000_media_type_copper) &&
+		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII))
+			ret = !!(thstat & event);
+	}
+
+	return ret;
+}
+
+/**
+ *  igb_check_lvmmc - check for malformed packets received
+ *  and indicated in LVMMC register
+ *  @adapter: pointer to adapter
+ **/
+static void igb_check_lvmmc(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 lvmmc;
+
+	lvmmc = rd32(E1000_LVMMC);
+	if (lvmmc) {
+		if (unlikely(net_ratelimit())) {
+			netdev_warn(adapter->netdev,
+				    "malformed Tx packet detected and dropped, LVMMC:0x%08x\n",
+				    lvmmc);
+		}
+	}
+}
+
+/**
+ *  igb_watchdog - Timer Call-back
+ *  @t: pointer to timer_list containing our private info pointer
+ **/
+static void igb_watchdog(struct timer_list *t)
+{
+	struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->watchdog_task);
+}
+
+static void igb_watchdog_task(struct work_struct *work)
+{
+	struct igb_adapter *adapter = container_of(work,
+						   struct igb_adapter,
+						   watchdog_task);
+	struct e1000_hw *hw = &adapter->hw;
+	struct e1000_phy_info *phy = &hw->phy;
+	struct net_device *netdev = adapter->netdev;
+	u32 link;
+	int i;
+	u32 connsw;
+	u16 phy_data, retry_count = 20;
+
+	link = igb_has_link(adapter);
+
+	if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
+		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
+			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
+		else
+			link = false;
+	}
+
+	/* Force link down if we have fiber to swap to */
+	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+		if (hw->phy.media_type == e1000_media_type_copper) {
+			connsw = rd32(E1000_CONNSW);
+			if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
+				link = 0;
+		}
+	}
+	if (link) {
+		/* Perform a reset if the media type changed. */
+		if (hw->dev_spec._82575.media_changed) {
+			hw->dev_spec._82575.media_changed = false;
+			adapter->flags |= IGB_FLAG_MEDIA_RESET;
+			igb_reset(adapter);
+		}
+		/* Cancel scheduled suspend requests. */
+		pm_runtime_resume(netdev->dev.parent);
+
+		if (!netif_carrier_ok(netdev)) {
+			u32 ctrl;
+
+			hw->mac.ops.get_speed_and_duplex(hw,
+							 &adapter->link_speed,
+							 &adapter->link_duplex);
+
+			ctrl = rd32(E1000_CTRL);
+			/* Links status message must follow this format */
+			netdev_info(netdev,
+			       "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+			       netdev->name,
+			       adapter->link_speed,
+			       adapter->link_duplex == FULL_DUPLEX ?
+			       "Full" : "Half",
+			       (ctrl & E1000_CTRL_TFCE) &&
+			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
+			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
+			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
+
+			/* disable EEE if enabled */
+			if ((adapter->flags & IGB_FLAG_EEE) &&
+				(adapter->link_duplex == HALF_DUPLEX)) {
+				dev_info(&adapter->pdev->dev,
+				"EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n");
+				adapter->hw.dev_spec._82575.eee_disable = true;
+				adapter->flags &= ~IGB_FLAG_EEE;
+			}
+
+			/* check if SmartSpeed worked */
+			igb_check_downshift(hw);
+			if (phy->speed_downgraded)
+				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
+
+			/* check for thermal sensor event */
+			if (igb_thermal_sensor_event(hw,
+			    E1000_THSTAT_LINK_THROTTLE))
+				netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n");
+
+			/* adjust timeout factor according to speed/duplex */
+			adapter->tx_timeout_factor = 1;
+			switch (adapter->link_speed) {
+			case SPEED_10:
+				adapter->tx_timeout_factor = 14;
+				break;
+			case SPEED_100:
+				/* maybe add some timeout factor ? */
+				break;
+			}
+
+			if (adapter->link_speed != SPEED_1000 ||
+			    !hw->phy.ops.read_reg)
+				goto no_wait;
+
+			/* wait for Remote receiver status OK */
+retry_read_status:
+			if (!igb_read_phy_reg(hw, PHY_1000T_STATUS,
+					      &phy_data)) {
+				if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
+				    retry_count) {
+					msleep(100);
+					retry_count--;
+					goto retry_read_status;
+				} else if (!retry_count) {
+					dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
+				}
+			} else {
+				dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
+			}
+no_wait:
+			netif_carrier_on(netdev);
+
+			igb_ping_all_vfs(adapter);
+			igb_check_vf_rate_limit(adapter);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGB_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+
+			/* check for thermal sensor event */
+			if (igb_thermal_sensor_event(hw,
+			    E1000_THSTAT_PWR_DOWN)) {
+				netdev_err(netdev, "The network adapter was stopped because it overheated\n");
+			}
+
+			/* Links status message must follow this format */
+			netdev_info(netdev, "igb: %s NIC Link is Down\n",
+			       netdev->name);
+			netif_carrier_off(netdev);
+
+			igb_ping_all_vfs(adapter);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGB_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+
+			/* link is down, time to check for alternate media */
+			if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
+				igb_check_swap_media(adapter);
+				if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+					schedule_work(&adapter->reset_task);
+					/* return immediately */
+					return;
+				}
+			}
+			pm_schedule_suspend(netdev->dev.parent,
+					    MSEC_PER_SEC * 5);
+
+		/* also check for alternate media here */
+		} else if (!netif_carrier_ok(netdev) &&
+			   (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
+			igb_check_swap_media(adapter);
+			if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
+				schedule_work(&adapter->reset_task);
+				/* return immediately */
+				return;
+			}
+		}
+	}
+
+	spin_lock(&adapter->stats64_lock);
+	igb_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igb_ring *tx_ring = adapter->tx_ring[i];
+		if (!netif_carrier_ok(netdev)) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context).
+			 */
+			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
+				adapter->tx_timeout_count++;
+				schedule_work(&adapter->reset_task);
+				/* return immediately since reset is imminent */
+				return;
+			}
+		}
+
+		/* Force detection of hung controller every watchdog period */
+		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+	}
+
+	/* Cause software interrupt to ensure Rx ring is cleaned */
+	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+		u32 eics = 0;
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			eics |= adapter->q_vector[i]->eims_value;
+		wr32(E1000_EICS, eics);
+	} else {
+		wr32(E1000_ICS, E1000_ICS_RXDMT0);
+	}
+
+	igb_spoof_check(adapter);
+	igb_ptp_rx_hang(adapter);
+	igb_ptp_tx_hang(adapter);
+
+	/* Check LVMMC register on i350/i354 only */
+	if ((adapter->hw.mac.type == e1000_i350) ||
+	    (adapter->hw.mac.type == e1000_i354))
+		igb_check_lvmmc(adapter);
+
+	/* Reset the timer */
+	if (!test_bit(__IGB_DOWN, &adapter->state)) {
+		if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies +  HZ));
+		else
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies + 2 * HZ));
+	}
+}
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+/**
+ *  igb_update_ring_itr - update the dynamic ITR value based on packet size
+ *  @q_vector: pointer to q_vector
+ *
+ *  Stores a new ITR value based on strictly on packet size.  This
+ *  algorithm is less sophisticated than that used in igb_update_itr,
+ *  due to the difficulty of synchronizing statistics across multiple
+ *  receive rings.  The divisors and thresholds used by this function
+ *  were determined based on theoretical maximum wire speed and testing
+ *  data, in order to minimize response time while increasing bulk
+ *  throughput.
+ *  This functionality is controlled by ethtool's coalescing settings.
+ *  NOTE:  This function is called only when operating in a multiqueue
+ *         receive environment.
+ **/
+static void igb_update_ring_itr(struct igb_q_vector *q_vector)
+{
+	int new_val = q_vector->itr_val;
+	int avg_wire_size = 0;
+	struct igb_adapter *adapter = q_vector->adapter;
+	unsigned int packets;
+
+	/* For non-gigabit speeds, just fix the interrupt rate at 4000
+	 * ints/sec - ITR timer value of 120 ticks.
+	 */
+	if (adapter->link_speed != SPEED_1000) {
+		new_val = IGB_4K_ITR;
+		goto set_itr_val;
+	}
+
+	packets = q_vector->rx.total_packets;
+	if (packets)
+		avg_wire_size = q_vector->rx.total_bytes / packets;
+
+	packets = q_vector->tx.total_packets;
+	if (packets)
+		avg_wire_size = max_t(u32, avg_wire_size,
+				      q_vector->tx.total_bytes / packets);
+
+	/* if avg_wire_size isn't set no work was done */
+	if (!avg_wire_size)
+		goto clear_counts;
+
+	/* Add 24 bytes to size to account for CRC, preamble, and gap */
+	avg_wire_size += 24;
+
+	/* Don't starve jumbo frames */
+	avg_wire_size = min(avg_wire_size, 3000);
+
+	/* Give a little boost to mid-size frames */
+	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
+		new_val = avg_wire_size / 3;
+	else
+		new_val = avg_wire_size / 2;
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (new_val < IGB_20K_ITR &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		new_val = IGB_20K_ITR;
+
+set_itr_val:
+	if (new_val != q_vector->itr_val) {
+		q_vector->itr_val = new_val;
+		q_vector->set_itr = 1;
+	}
+clear_counts:
+	q_vector->rx.total_bytes = 0;
+	q_vector->rx.total_packets = 0;
+	q_vector->tx.total_bytes = 0;
+	q_vector->tx.total_packets = 0;
+}
+
+/**
+ *  igb_update_itr - update the dynamic ITR value based on statistics
+ *  @q_vector: pointer to q_vector
+ *  @ring_container: ring info to update the itr for
+ *
+ *  Stores a new ITR value based on packets and byte
+ *  counts during the last interrupt.  The advantage of per interrupt
+ *  computation is faster updates and more accurate ITR for the current
+ *  traffic pattern.  Constants in this function were computed
+ *  based on theoretical maximum wire speed and thresholds were set based
+ *  on testing data as well as attempting to minimize response time
+ *  while increasing bulk throughput.
+ *  This functionality is controlled by ethtool's coalescing settings.
+ *  NOTE:  These calculations are only valid when operating in a single-
+ *         queue environment.
+ **/
+static void igb_update_itr(struct igb_q_vector *q_vector,
+			   struct igb_ring_container *ring_container)
+{
+	unsigned int packets = ring_container->total_packets;
+	unsigned int bytes = ring_container->total_bytes;
+	u8 itrval = ring_container->itr;
+
+	/* no packets, exit with status unchanged */
+	if (packets == 0)
+		return;
+
+	switch (itrval) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes/packets > 8000)
+			itrval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			itrval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes/packets > 8000)
+				itrval = bulk_latency;
+			else if ((packets < 10) || ((bytes/packets) > 1200))
+				itrval = bulk_latency;
+			else if ((packets > 35))
+				itrval = lowest_latency;
+		} else if (bytes/packets > 2000) {
+			itrval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			itrval = lowest_latency;
+		}
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				itrval = low_latency;
+		} else if (bytes < 1500) {
+			itrval = low_latency;
+		}
+		break;
+	}
+
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itrval;
+}
+
+static void igb_set_itr(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	u32 new_itr = q_vector->itr_val;
+	u8 current_itr = 0;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	if (adapter->link_speed != SPEED_1000) {
+		current_itr = 0;
+		new_itr = IGB_4K_ITR;
+		goto set_itr_now;
+	}
+
+	igb_update_itr(q_vector, &q_vector->tx);
+	igb_update_itr(q_vector, &q_vector->rx);
+
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (current_itr == lowest_latency &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		current_itr = low_latency;
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
+		break;
+	case low_latency:
+		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
+		break;
+	case bulk_latency:
+		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != q_vector->itr_val) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > q_vector->itr_val ?
+			  max((new_itr * q_vector->itr_val) /
+			  (new_itr + (q_vector->itr_val >> 2)),
+			  new_itr) : new_itr;
+		/* Don't write the value here; it resets the adapter's
+		 * internal timer, and causes us to delay far longer than
+		 * we should between interrupts.  Instead, we write the ITR
+		 * value at the beginning of the next interrupt so the timing
+		 * ends up being correct.
+		 */
+		q_vector->itr_val = new_itr;
+		q_vector->set_itr = 1;
+	}
+}
+
+static void igb_tx_ctxtdesc(struct igb_ring *tx_ring,
+			    struct igb_tx_buffer *first,
+			    u32 vlan_macip_lens, u32 type_tucmd,
+			    u32 mss_l4len_idx)
+{
+	struct e1000_adv_tx_context_desc *context_desc;
+	u16 i = tx_ring->next_to_use;
+	struct timespec64 ts;
+
+	context_desc = IGB_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* set bits to identify this as an advanced context descriptor */
+	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+	/* For 82575, context index must be unique per ring. */
+	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		mss_l4len_idx |= tx_ring->reg_idx << 4;
+
+	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
+	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
+	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
+
+	/* We assume there is always a valid tx time available. Invalid times
+	 * should have been handled by the upper layers.
+	 */
+	if (tx_ring->launchtime_enable) {
+		ts = ktime_to_timespec64(first->skb->tstamp);
+		skb_txtime_consumed(first->skb);
+		context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
+	} else {
+		context_desc->seqnum_seed = 0;
+	}
+}
+
+static int igb_tso(struct igb_ring *tx_ring,
+		   struct igb_tx_buffer *first,
+		   u8 *hdr_len)
+{
+	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+	type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
+		      E1000_ADVTXD_TUCMD_L4T_UDP : E1000_ADVTXD_TUCMD_L4T_TCP;
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
+		/* IP header will have to cancel out any data that
+		 * is not a part of the outer IP header
+		 */
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
+		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
+
+		ip.v4->tot_len = 0;
+		first->tx_flags |= IGB_TX_FLAGS_TSO |
+				   IGB_TX_FLAGS_CSUM |
+				   IGB_TX_FLAGS_IPV4;
+	} else {
+		ip.v6->payload_len = 0;
+		first->tx_flags |= IGB_TX_FLAGS_TSO |
+				   IGB_TX_FLAGS_CSUM;
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+	if (type_tucmd & E1000_ADVTXD_TUCMD_L4T_TCP) {
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+		csum_replace_by_diff(&l4.tcp->check,
+			(__force __wsum)htonl(paylen));
+	} else {
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+	}
+
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* MSS L4LEN IDX */
+	mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT;
+	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
+
+	/* VLAN MACLEN IPLEN */
+	vlan_macip_lens = l4.hdr - ip.hdr;
+	vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+
+	igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
+			type_tucmd, mss_l4len_idx);
+
+	return 1;
+}
+
+static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
+{
+	struct sk_buff *skb = first->skb;
+	u32 vlan_macip_lens = 0;
+	u32 type_tucmd = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+csum_failed:
+		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN) &&
+		    !tx_ring->launchtime_enable)
+			return;
+		goto no_csum;
+	}
+
+	switch (skb->csum_offset) {
+	case offsetof(struct tcphdr, check):
+		type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+		fallthrough;
+	case offsetof(struct udphdr, check):
+		break;
+	case offsetof(struct sctphdr, checksum):
+		/* validate that this is actually an SCTP request */
+		if (skb_csum_is_sctp(skb)) {
+			type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP;
+			break;
+		}
+		fallthrough;
+	default:
+		skb_checksum_help(skb);
+		goto csum_failed;
+	}
+
+	/* update TX checksum flag */
+	first->tx_flags |= IGB_TX_FLAGS_CSUM;
+	vlan_macip_lens = skb_checksum_start_offset(skb) -
+			  skb_network_offset(skb);
+no_csum:
+	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
+
+	igb_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
+}
+
+#define IGB_SET_FLAG(_input, _flag, _result) \
+	((_flag <= _result) ? \
+	 ((u32)(_input & _flag) * (_result / _flag)) : \
+	 ((u32)(_input & _flag) / (_flag / _result)))
+
+static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
+		       E1000_ADVTXD_DCMD_DEXT |
+		       E1000_ADVTXD_DCMD_IFCS;
+
+	/* set HW vlan bit if vlan is present */
+	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN,
+				 (E1000_ADVTXD_DCMD_VLE));
+
+	/* set segmentation bits for TSO */
+	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO,
+				 (E1000_ADVTXD_DCMD_TSE));
+
+	/* set timestamp bit if present */
+	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP,
+				 (E1000_ADVTXD_MAC_TSTAMP));
+
+	/* insert frame checksum */
+	cmd_type ^= IGB_SET_FLAG(skb->no_fcs, 1, E1000_ADVTXD_DCMD_IFCS);
+
+	return cmd_type;
+}
+
+static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
+				 union e1000_adv_tx_desc *tx_desc,
+				 u32 tx_flags, unsigned int paylen)
+{
+	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
+
+	/* 82575 requires a unique index per ring */
+	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		olinfo_status |= tx_ring->reg_idx << 4;
+
+	/* insert L4 checksum */
+	olinfo_status |= IGB_SET_FLAG(tx_flags,
+				      IGB_TX_FLAGS_CSUM,
+				      (E1000_TXD_POPTS_TXSM << 8));
+
+	/* insert IPv4 checksum */
+	olinfo_status |= IGB_SET_FLAG(tx_flags,
+				      IGB_TX_FLAGS_IPV4,
+				      (E1000_TXD_POPTS_IXSM << 8));
+
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
+{
+	struct net_device *netdev = tx_ring->netdev;
+
+	netif_stop_subqueue(netdev, tx_ring->queue_index);
+
+	/* Herbert's original patch had:
+	 *  smp_mb__after_netif_stop_queue();
+	 * but since that doesn't exist yet, just open code it.
+	 */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (igb_desc_unused(tx_ring) < size)
+		return -EBUSY;
+
+	/* A reprieve! */
+	netif_wake_subqueue(netdev, tx_ring->queue_index);
+
+	u64_stats_update_begin(&tx_ring->tx_syncp2);
+	tx_ring->tx_stats.restart_queue2++;
+	u64_stats_update_end(&tx_ring->tx_syncp2);
+
+	return 0;
+}
+
+static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
+{
+	if (igb_desc_unused(tx_ring) >= size)
+		return 0;
+	return __igb_maybe_stop_tx(tx_ring, size);
+}
+
+static int igb_tx_map(struct igb_ring *tx_ring,
+		      struct igb_tx_buffer *first,
+		      const u8 hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct igb_tx_buffer *tx_buffer;
+	union e1000_adv_tx_desc *tx_desc;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+	unsigned int data_len, size;
+	u32 tx_flags = first->tx_flags;
+	u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
+	u16 i = tx_ring->next_to_use;
+
+	tx_desc = IGB_TX_DESC(tx_ring, i);
+
+	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = IGB_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += IGB_MAX_DATA_PER_TXD;
+			size -= IGB_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = IGB_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
+				       size, DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= size | IGB_TXD_DCMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* set the timestamp */
+	first->time_stamp = jiffies;
+
+	skb_tx_timestamp(skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	dma_wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	/* Make sure there is space in the ring for the next send. */
+	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+		writel(i, tx_ring->tail);
+	}
+	return 0;
+
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	while (tx_buffer != first) {
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_page(tx_ring->dev,
+				       dma_unmap_addr(tx_buffer, dma),
+				       dma_unmap_len(tx_buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		if (i-- == 0)
+			i += tx_ring->count;
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	if (dma_unmap_len(tx_buffer, len))
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buffer, len, 0);
+
+	dev_kfree_skb_any(tx_buffer->skb);
+	tx_buffer->skb = NULL;
+
+	tx_ring->next_to_use = i;
+
+	return -1;
+}
+
+int igb_xmit_xdp_ring(struct igb_adapter *adapter,
+		      struct igb_ring *tx_ring,
+		      struct xdp_frame *xdpf)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+	u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
+	u16 count, i, index = tx_ring->next_to_use;
+	struct igb_tx_buffer *tx_head = &tx_ring->tx_buffer_info[index];
+	struct igb_tx_buffer *tx_buffer = tx_head;
+	union e1000_adv_tx_desc *tx_desc = IGB_TX_DESC(tx_ring, index);
+	u32 len = xdpf->len, cmd_type, olinfo_status;
+	void *data = xdpf->data;
+
+	count = TXD_USE_COUNT(len);
+	for (i = 0; i < nr_frags; i++)
+		count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i]));
+
+	if (igb_maybe_stop_tx(tx_ring, count + 3))
+		return IGB_XDP_CONSUMED;
+
+	i = 0;
+	/* record the location of the first descriptor for this packet */
+	tx_head->bytecount = xdp_get_frame_len(xdpf);
+	tx_head->type = IGB_TYPE_XDP;
+	tx_head->gso_segs = 1;
+	tx_head->xdpf = xdpf;
+
+	olinfo_status = tx_head->bytecount << E1000_ADVTXD_PAYLEN_SHIFT;
+	/* 82575 requires a unique index per ring */
+	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		olinfo_status |= tx_ring->reg_idx << 4;
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+
+	for (;;) {
+		dma_addr_t dma;
+
+		dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto unmap;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, len);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		/* put descriptor type bits */
+		cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT |
+			   E1000_ADVTXD_DCMD_IFCS | len;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		tx_buffer->protocol = 0;
+
+		if (++index == tx_ring->count)
+			index = 0;
+
+		if (i == nr_frags)
+			break;
+
+		tx_buffer = &tx_ring->tx_buffer_info[index];
+		tx_desc = IGB_TX_DESC(tx_ring, index);
+		tx_desc->read.olinfo_status = 0;
+
+		data = skb_frag_address(&sinfo->frags[i]);
+		len = skb_frag_size(&sinfo->frags[i]);
+		i++;
+	}
+	tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_TXD_DCMD);
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), tx_head->bytecount);
+	/* set the timestamp */
+	tx_head->time_stamp = jiffies;
+
+	/* Avoid any potential race with xdp_xmit and cleanup */
+	smp_wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	tx_head->next_to_watch = tx_desc;
+	tx_ring->next_to_use = index;
+
+	/* Make sure there is space in the ring for the next send. */
+	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
+		writel(index, tx_ring->tail);
+
+	return IGB_XDP_TX;
+
+unmap:
+	for (;;) {
+		tx_buffer = &tx_ring->tx_buffer_info[index];
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_page(tx_ring->dev,
+				       dma_unmap_addr(tx_buffer, dma),
+				       dma_unmap_len(tx_buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+		if (tx_buffer == tx_head)
+			break;
+
+		if (!index)
+			index += tx_ring->count;
+		index--;
+	}
+
+	return IGB_XDP_CONSUMED;
+}
+
+netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
+				struct igb_ring *tx_ring)
+{
+	struct igb_tx_buffer *first;
+	int tso;
+	u32 tx_flags = 0;
+	unsigned short f;
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	__be16 protocol = vlan_get_protocol(skb);
+	u8 hdr_len = 0;
+
+	/* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
+	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_frag_size(
+						&skb_shinfo(skb)->frags[f]));
+
+	if (igb_maybe_stop_tx(tx_ring, count + 3)) {
+		/* this is a hard error */
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->type = IGB_TYPE_SKB;
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
+
+		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
+		    !test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
+					   &adapter->state)) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			tx_flags |= IGB_TX_FLAGS_TSTAMP;
+
+			adapter->ptp_tx_skb = skb_get(skb);
+			adapter->ptp_tx_start = jiffies;
+			if (adapter->hw.mac.type == e1000_82576)
+				schedule_work(&adapter->ptp_tx_work);
+		} else {
+			adapter->tx_hwtstamp_skipped++;
+		}
+	}
+
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= IGB_TX_FLAGS_VLAN;
+		tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
+	}
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = protocol;
+
+	tso = igb_tso(tx_ring, first, &hdr_len);
+	if (tso < 0)
+		goto out_drop;
+	else if (!tso)
+		igb_tx_csum(tx_ring, first);
+
+	if (igb_tx_map(tx_ring, first, hdr_len))
+		goto cleanup_tx_tstamp;
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+cleanup_tx_tstamp:
+	if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP)) {
+		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
+
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		if (adapter->hw.mac.type == e1000_82576)
+			cancel_work_sync(&adapter->ptp_tx_work);
+		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
+						    struct sk_buff *skb)
+{
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= adapter->num_tx_queues)
+		r_idx = r_idx % adapter->num_tx_queues;
+
+	return adapter->tx_ring[r_idx];
+}
+
+static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb_put_padto(skb, 17))
+		return NETDEV_TX_OK;
+
+	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
+}
+
+/**
+ *  igb_tx_timeout - Respond to a Tx Hang
+ *  @netdev: network interface device structure
+ *  @txqueue: number of the Tx queue that hung (unused)
+ **/
+static void igb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Do the reset outside of interrupt context */
+	adapter->tx_timeout_count++;
+
+	if (hw->mac.type >= e1000_82580)
+		hw->dev_spec._82575.global_device_reset = true;
+
+	schedule_work(&adapter->reset_task);
+	wr32(E1000_EICS,
+	     (adapter->eims_enable_mask & ~adapter->eims_other));
+}
+
+static void igb_reset_task(struct work_struct *work)
+{
+	struct igb_adapter *adapter;
+	adapter = container_of(work, struct igb_adapter, reset_task);
+
+	rtnl_lock();
+	/* If we're already down or resetting, just bail */
+	if (test_bit(__IGB_DOWN, &adapter->state) ||
+	    test_bit(__IGB_RESETTING, &adapter->state)) {
+		rtnl_unlock();
+		return;
+	}
+
+	igb_dump(adapter);
+	netdev_err(adapter->netdev, "Reset adapter\n");
+	igb_reinit_locked(adapter);
+	rtnl_unlock();
+}
+
+/**
+ *  igb_get_stats64 - Get System Network Statistics
+ *  @netdev: network interface device structure
+ *  @stats: rtnl_link_stats64 pointer
+ **/
+static void igb_get_stats64(struct net_device *netdev,
+			    struct rtnl_link_stats64 *stats)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	spin_lock(&adapter->stats64_lock);
+	igb_update_stats(adapter);
+	memcpy(stats, &adapter->stats64, sizeof(*stats));
+	spin_unlock(&adapter->stats64_lock);
+}
+
+/**
+ *  igb_change_mtu - Change the Maximum Transfer Unit
+ *  @netdev: network interface device structure
+ *  @new_mtu: new value for maximum frame size
+ *
+ *  Returns 0 on success, negative on failure
+ **/
+static int igb_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD;
+
+	if (adapter->xdp_prog) {
+		int i;
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct igb_ring *ring = adapter->rx_ring[i];
+
+			if (max_frame > igb_rx_bufsz(ring)) {
+				netdev_warn(adapter->netdev,
+					    "Requested MTU size is not supported with XDP. Max frame size is %d\n",
+					    max_frame);
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* adjust max frame to be at least the size of a standard frame */
+	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
+	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	/* igb_down has a dependency on max_frame_size */
+	adapter->max_frame_size = max_frame;
+
+	if (netif_running(netdev))
+		igb_down(adapter);
+
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		igb_up(adapter);
+	else
+		igb_reset(adapter);
+
+	clear_bit(__IGB_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+/**
+ *  igb_update_stats - Update the board statistics counters
+ *  @adapter: board private structure
+ **/
+void igb_update_stats(struct igb_adapter *adapter)
+{
+	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	u32 reg, mpc;
+	int i;
+	u64 bytes, packets;
+	unsigned int start;
+	u64 _bytes, _packets;
+
+	/* Prevent stats update while adapter is being reset, or if the pci
+	 * connection is down.
+	 */
+	if (adapter->link_speed == 0)
+		return;
+	if (pci_channel_offline(pdev))
+		return;
+
+	bytes = 0;
+	packets = 0;
+
+	rcu_read_lock();
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igb_ring *ring = adapter->rx_ring[i];
+		u32 rqdpc = rd32(E1000_RQDPC(i));
+		if (hw->mac.type >= e1000_i210)
+			wr32(E1000_RQDPC(i), 0);
+
+		if (rqdpc) {
+			ring->rx_stats.drops += rqdpc;
+			net_stats->rx_fifo_errors += rqdpc;
+		}
+
+		do {
+			start = u64_stats_fetch_begin(&ring->rx_syncp);
+			_bytes = ring->rx_stats.bytes;
+			_packets = ring->rx_stats.packets;
+		} while (u64_stats_fetch_retry(&ring->rx_syncp, start));
+		bytes += _bytes;
+		packets += _packets;
+	}
+
+	net_stats->rx_bytes = bytes;
+	net_stats->rx_packets = packets;
+
+	bytes = 0;
+	packets = 0;
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igb_ring *ring = adapter->tx_ring[i];
+		do {
+			start = u64_stats_fetch_begin(&ring->tx_syncp);
+			_bytes = ring->tx_stats.bytes;
+			_packets = ring->tx_stats.packets;
+		} while (u64_stats_fetch_retry(&ring->tx_syncp, start));
+		bytes += _bytes;
+		packets += _packets;
+	}
+	net_stats->tx_bytes = bytes;
+	net_stats->tx_packets = packets;
+	rcu_read_unlock();
+
+	/* read stats registers */
+	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
+	adapter->stats.gprc += rd32(E1000_GPRC);
+	adapter->stats.gorc += rd32(E1000_GORCL);
+	rd32(E1000_GORCH); /* clear GORCL */
+	adapter->stats.bprc += rd32(E1000_BPRC);
+	adapter->stats.mprc += rd32(E1000_MPRC);
+	adapter->stats.roc += rd32(E1000_ROC);
+
+	adapter->stats.prc64 += rd32(E1000_PRC64);
+	adapter->stats.prc127 += rd32(E1000_PRC127);
+	adapter->stats.prc255 += rd32(E1000_PRC255);
+	adapter->stats.prc511 += rd32(E1000_PRC511);
+	adapter->stats.prc1023 += rd32(E1000_PRC1023);
+	adapter->stats.prc1522 += rd32(E1000_PRC1522);
+	adapter->stats.symerrs += rd32(E1000_SYMERRS);
+	adapter->stats.sec += rd32(E1000_SEC);
+
+	mpc = rd32(E1000_MPC);
+	adapter->stats.mpc += mpc;
+	net_stats->rx_fifo_errors += mpc;
+	adapter->stats.scc += rd32(E1000_SCC);
+	adapter->stats.ecol += rd32(E1000_ECOL);
+	adapter->stats.mcc += rd32(E1000_MCC);
+	adapter->stats.latecol += rd32(E1000_LATECOL);
+	adapter->stats.dc += rd32(E1000_DC);
+	adapter->stats.rlec += rd32(E1000_RLEC);
+	adapter->stats.xonrxc += rd32(E1000_XONRXC);
+	adapter->stats.xontxc += rd32(E1000_XONTXC);
+	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
+	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
+	adapter->stats.fcruc += rd32(E1000_FCRUC);
+	adapter->stats.gptc += rd32(E1000_GPTC);
+	adapter->stats.gotc += rd32(E1000_GOTCL);
+	rd32(E1000_GOTCH); /* clear GOTCL */
+	adapter->stats.rnbc += rd32(E1000_RNBC);
+	adapter->stats.ruc += rd32(E1000_RUC);
+	adapter->stats.rfc += rd32(E1000_RFC);
+	adapter->stats.rjc += rd32(E1000_RJC);
+	adapter->stats.tor += rd32(E1000_TORH);
+	adapter->stats.tot += rd32(E1000_TOTH);
+	adapter->stats.tpr += rd32(E1000_TPR);
+
+	adapter->stats.ptc64 += rd32(E1000_PTC64);
+	adapter->stats.ptc127 += rd32(E1000_PTC127);
+	adapter->stats.ptc255 += rd32(E1000_PTC255);
+	adapter->stats.ptc511 += rd32(E1000_PTC511);
+	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
+	adapter->stats.ptc1522 += rd32(E1000_PTC1522);
+
+	adapter->stats.mptc += rd32(E1000_MPTC);
+	adapter->stats.bptc += rd32(E1000_BPTC);
+
+	adapter->stats.tpt += rd32(E1000_TPT);
+	adapter->stats.colc += rd32(E1000_COLC);
+
+	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
+	/* read internal phy specific stats */
+	reg = rd32(E1000_CTRL_EXT);
+	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
+		adapter->stats.rxerrc += rd32(E1000_RXERRC);
+
+		/* this stat has invalid values on i210/i211 */
+		if ((hw->mac.type != e1000_i210) &&
+		    (hw->mac.type != e1000_i211))
+			adapter->stats.tncrs += rd32(E1000_TNCRS);
+	}
+
+	adapter->stats.tsctc += rd32(E1000_TSCTC);
+	adapter->stats.tsctfc += rd32(E1000_TSCTFC);
+
+	adapter->stats.iac += rd32(E1000_IAC);
+	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
+	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
+	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
+	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
+	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
+	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
+	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
+	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
+
+	/* Fill out the OS statistics structure */
+	net_stats->multicast = adapter->stats.mprc;
+	net_stats->collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	/* RLEC on some newer hardware can be incorrect so build
+	 * our own version based on RUC and ROC
+	 */
+	net_stats->rx_errors = adapter->stats.rxerrc +
+		adapter->stats.crcerrs + adapter->stats.algnerrc +
+		adapter->stats.ruc + adapter->stats.roc +
+		adapter->stats.cexterr;
+	net_stats->rx_length_errors = adapter->stats.ruc +
+				      adapter->stats.roc;
+	net_stats->rx_crc_errors = adapter->stats.crcerrs;
+	net_stats->rx_frame_errors = adapter->stats.algnerrc;
+	net_stats->rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+	net_stats->tx_errors = adapter->stats.ecol +
+			       adapter->stats.latecol;
+	net_stats->tx_aborted_errors = adapter->stats.ecol;
+	net_stats->tx_window_errors = adapter->stats.latecol;
+	net_stats->tx_carrier_errors = adapter->stats.tncrs;
+
+	/* Tx Dropped needs to be maintained elsewhere */
+
+	/* Management Stats */
+	adapter->stats.mgptc += rd32(E1000_MGTPTC);
+	adapter->stats.mgprc += rd32(E1000_MGTPRC);
+	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
+
+	/* OS2BMC Stats */
+	reg = rd32(E1000_MANC);
+	if (reg & E1000_MANC_EN_BMC2OS) {
+		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
+		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
+		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
+		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
+	}
+}
+
+static void igb_perout(struct igb_adapter *adapter, int tsintr_tt)
+{
+	int pin = ptp_find_pin(adapter->ptp_clock, PTP_PF_PEROUT, tsintr_tt);
+	struct e1000_hw *hw = &adapter->hw;
+	struct timespec64 ts;
+	u32 tsauxc;
+
+	if (pin < 0 || pin >= IGB_N_SDP)
+		return;
+
+	spin_lock(&adapter->tmreg_lock);
+
+	if (hw->mac.type == e1000_82580 ||
+	    hw->mac.type == e1000_i354 ||
+	    hw->mac.type == e1000_i350) {
+		s64 ns = timespec64_to_ns(&adapter->perout[tsintr_tt].period);
+		u32 systiml, systimh, level_mask, level, rem;
+		u64 systim, now;
+
+		/* read systim registers in sequence */
+		rd32(E1000_SYSTIMR);
+		systiml = rd32(E1000_SYSTIML);
+		systimh = rd32(E1000_SYSTIMH);
+		systim = (((u64)(systimh & 0xFF)) << 32) | ((u64)systiml);
+		now = timecounter_cyc2time(&adapter->tc, systim);
+
+		if (pin < 2) {
+			level_mask = (tsintr_tt == 1) ? 0x80000 : 0x40000;
+			level = (rd32(E1000_CTRL) & level_mask) ? 1 : 0;
+		} else {
+			level_mask = (tsintr_tt == 1) ? 0x80 : 0x40;
+			level = (rd32(E1000_CTRL_EXT) & level_mask) ? 1 : 0;
+		}
+
+		div_u64_rem(now, ns, &rem);
+		systim = systim + (ns - rem);
+
+		/* synchronize pin level with rising/falling edges */
+		div_u64_rem(now, ns << 1, &rem);
+		if (rem < ns) {
+			/* first half of period */
+			if (level == 0) {
+				/* output is already low, skip this period */
+				systim += ns;
+				pr_notice("igb: periodic output on %s missed falling edge\n",
+					  adapter->sdp_config[pin].name);
+			}
+		} else {
+			/* second half of period */
+			if (level == 1) {
+				/* output is already high, skip this period */
+				systim += ns;
+				pr_notice("igb: periodic output on %s missed rising edge\n",
+					  adapter->sdp_config[pin].name);
+			}
+		}
+
+		/* for this chip family tv_sec is the upper part of the binary value,
+		 * so not seconds
+		 */
+		ts.tv_nsec = (u32)systim;
+		ts.tv_sec  = ((u32)(systim >> 32)) & 0xFF;
+	} else {
+		ts = timespec64_add(adapter->perout[tsintr_tt].start,
+				    adapter->perout[tsintr_tt].period);
+	}
+
+	/* u32 conversion of tv_sec is safe until y2106 */
+	wr32((tsintr_tt == 1) ? E1000_TRGTTIML1 : E1000_TRGTTIML0, ts.tv_nsec);
+	wr32((tsintr_tt == 1) ? E1000_TRGTTIMH1 : E1000_TRGTTIMH0, (u32)ts.tv_sec);
+	tsauxc = rd32(E1000_TSAUXC);
+	tsauxc |= TSAUXC_EN_TT0;
+	wr32(E1000_TSAUXC, tsauxc);
+	adapter->perout[tsintr_tt].start = ts;
+
+	spin_unlock(&adapter->tmreg_lock);
+}
+
+static void igb_extts(struct igb_adapter *adapter, int tsintr_tt)
+{
+	int pin = ptp_find_pin(adapter->ptp_clock, PTP_PF_EXTTS, tsintr_tt);
+	int auxstmpl = (tsintr_tt == 1) ? E1000_AUXSTMPL1 : E1000_AUXSTMPL0;
+	int auxstmph = (tsintr_tt == 1) ? E1000_AUXSTMPH1 : E1000_AUXSTMPH0;
+	struct e1000_hw *hw = &adapter->hw;
+	struct ptp_clock_event event;
+	struct timespec64 ts;
+	unsigned long flags;
+
+	if (pin < 0 || pin >= IGB_N_SDP)
+		return;
+
+	if (hw->mac.type == e1000_82580 ||
+	    hw->mac.type == e1000_i354 ||
+	    hw->mac.type == e1000_i350) {
+		u64 ns = rd32(auxstmpl);
+
+		ns += ((u64)(rd32(auxstmph) & 0xFF)) << 32;
+		spin_lock_irqsave(&adapter->tmreg_lock, flags);
+		ns = timecounter_cyc2time(&adapter->tc, ns);
+		spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+		ts = ns_to_timespec64(ns);
+	} else {
+		ts.tv_nsec = rd32(auxstmpl);
+		ts.tv_sec  = rd32(auxstmph);
+	}
+
+	event.type = PTP_CLOCK_EXTTS;
+	event.index = tsintr_tt;
+	event.timestamp = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+	ptp_clock_event(adapter->ptp_clock, &event);
+}
+
+static void igb_tsync_interrupt(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ack = 0, tsicr = rd32(E1000_TSICR);
+	struct ptp_clock_event event;
+
+	if (tsicr & TSINTR_SYS_WRAP) {
+		event.type = PTP_CLOCK_PPS;
+		if (adapter->ptp_caps.pps)
+			ptp_clock_event(adapter->ptp_clock, &event);
+		ack |= TSINTR_SYS_WRAP;
+	}
+
+	if (tsicr & E1000_TSICR_TXTS) {
+		/* retrieve hardware timestamp */
+		schedule_work(&adapter->ptp_tx_work);
+		ack |= E1000_TSICR_TXTS;
+	}
+
+	if (tsicr & TSINTR_TT0) {
+		igb_perout(adapter, 0);
+		ack |= TSINTR_TT0;
+	}
+
+	if (tsicr & TSINTR_TT1) {
+		igb_perout(adapter, 1);
+		ack |= TSINTR_TT1;
+	}
+
+	if (tsicr & TSINTR_AUTT0) {
+		igb_extts(adapter, 0);
+		ack |= TSINTR_AUTT0;
+	}
+
+	if (tsicr & TSINTR_AUTT1) {
+		igb_extts(adapter, 1);
+		ack |= TSINTR_AUTT1;
+	}
+
+	/* acknowledge the interrupts */
+	wr32(E1000_TSICR, ack);
+}
+
+static irqreturn_t igb_msix_other(int irq, void *data)
+{
+	struct igb_adapter *adapter = data;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 icr = rd32(E1000_ICR);
+	/* reading ICR causes bit 31 of EICR to be cleared */
+
+	if (icr & E1000_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & E1000_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+		/* The DMA Out of Sync is also indication of a spoof event
+		 * in IOV mode. Check the Wrong VM Behavior register to
+		 * see if it is really a spoof event.
+		 */
+		igb_check_wvbr(adapter);
+	}
+
+	/* Check for a mailbox event */
+	if (icr & E1000_ICR_VMMB)
+		igb_msg_task(adapter);
+
+	if (icr & E1000_ICR_LSC) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGB_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	if (icr & E1000_ICR_TS)
+		igb_tsync_interrupt(adapter);
+
+	wr32(E1000_EIMS, adapter->eims_other);
+
+	return IRQ_HANDLED;
+}
+
+static void igb_write_itr(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	u32 itr_val = q_vector->itr_val & 0x7FFC;
+
+	if (!q_vector->set_itr)
+		return;
+
+	if (!itr_val)
+		itr_val = 0x4;
+
+	if (adapter->hw.mac.type == e1000_82575)
+		itr_val |= itr_val << 16;
+	else
+		itr_val |= E1000_EITR_CNT_IGNR;
+
+	writel(itr_val, q_vector->itr_register);
+	q_vector->set_itr = 0;
+}
+
+static irqreturn_t igb_msix_ring(int irq, void *data)
+{
+	struct igb_q_vector *q_vector = data;
+
+	/* Write the ITR value calculated from the previous interrupt. */
+	igb_write_itr(q_vector);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_IGB_DCA
+static void igb_update_tx_dca(struct igb_adapter *adapter,
+			      struct igb_ring *tx_ring,
+			      int cpu)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 txctrl = dca3_get_tag(tx_ring->dev, cpu);
+
+	if (hw->mac.type != e1000_82575)
+		txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT;
+
+	/* We can enable relaxed ordering for reads, but not writes when
+	 * DCA is enabled.  This is due to a known issue in some chipsets
+	 * which will cause the DCA tag to be cleared.
+	 */
+	txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN |
+		  E1000_DCA_TXCTRL_DATA_RRO_EN |
+		  E1000_DCA_TXCTRL_DESC_DCA_EN;
+
+	wr32(E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl);
+}
+
+static void igb_update_rx_dca(struct igb_adapter *adapter,
+			      struct igb_ring *rx_ring,
+			      int cpu)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu);
+
+	if (hw->mac.type != e1000_82575)
+		rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT;
+
+	/* We can enable relaxed ordering for reads, but not writes when
+	 * DCA is enabled.  This is due to a known issue in some chipsets
+	 * which will cause the DCA tag to be cleared.
+	 */
+	rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN |
+		  E1000_DCA_RXCTRL_DESC_DCA_EN;
+
+	wr32(E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl);
+}
+
+static void igb_update_dca(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	int cpu = get_cpu();
+
+	if (q_vector->cpu == cpu)
+		goto out_no_update;
+
+	if (q_vector->tx.ring)
+		igb_update_tx_dca(adapter, q_vector->tx.ring, cpu);
+
+	if (q_vector->rx.ring)
+		igb_update_rx_dca(adapter, q_vector->rx.ring, cpu);
+
+	q_vector->cpu = cpu;
+out_no_update:
+	put_cpu();
+}
+
+static void igb_setup_dca(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
+		return;
+
+	/* Always use CB2 mode, difference is masked in the CB driver. */
+	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		adapter->q_vector[i]->cpu = -1;
+		igb_update_dca(adapter->q_vector[i]);
+	}
+}
+
+static int __igb_notify_dca(struct device *dev, void *data)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long event = *(unsigned long *)data;
+
+	switch (event) {
+	case DCA_PROVIDER_ADD:
+		/* if already enabled, don't do it again */
+		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
+			break;
+		if (dca_add_requester(dev) == 0) {
+			adapter->flags |= IGB_FLAG_DCA_ENABLED;
+			dev_info(&pdev->dev, "DCA enabled\n");
+			igb_setup_dca(adapter);
+			break;
+		}
+		fallthrough; /* since DCA is disabled. */
+	case DCA_PROVIDER_REMOVE:
+		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
+			/* without this a class_device is left
+			 * hanging around in the sysfs model
+			 */
+			dca_remove_requester(dev);
+			dev_info(&pdev->dev, "DCA disabled\n");
+			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
+			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
+			  void *p)
+{
+	int ret_val;
+
+	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
+					 __igb_notify_dca);
+
+	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
+}
+#endif /* CONFIG_IGB_DCA */
+
+#ifdef CONFIG_PCI_IOV
+static int igb_vf_configure(struct igb_adapter *adapter, int vf)
+{
+	unsigned char mac_addr[ETH_ALEN];
+
+	eth_zero_addr(mac_addr);
+	igb_set_vf_mac(adapter, vf, mac_addr);
+
+	/* By default spoof check is enabled for all VFs */
+	adapter->vf_data[vf].spoofchk_enabled = true;
+
+	/* By default VFs are not trusted */
+	adapter->vf_data[vf].trusted = false;
+
+	return 0;
+}
+
+#endif
+static void igb_ping_all_vfs(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ping;
+	int i;
+
+	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
+		ping = E1000_PF_CONTROL_MSG;
+		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
+			ping |= E1000_VT_MSGTYPE_CTS;
+		igb_write_mbx(hw, &ping, 1, i);
+	}
+}
+
+static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 vmolr = rd32(E1000_VMOLR(vf));
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+
+	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
+			    IGB_VF_FLAG_MULTI_PROMISC);
+	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
+
+	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
+		vmolr |= E1000_VMOLR_MPME;
+		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
+		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
+	} else {
+		/* if we have hashes and we are clearing a multicast promisc
+		 * flag we need to write the hashes to the MTA as this step
+		 * was previously skipped
+		 */
+		if (vf_data->num_vf_mc_hashes > 30) {
+			vmolr |= E1000_VMOLR_MPME;
+		} else if (vf_data->num_vf_mc_hashes) {
+			int j;
+
+			vmolr |= E1000_VMOLR_ROMPE;
+			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
+				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
+		}
+	}
+
+	wr32(E1000_VMOLR(vf), vmolr);
+
+	/* there are flags left unprocessed, likely not supported */
+	if (*msgbuf & E1000_VT_MSGINFO_MASK)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int igb_set_vf_multicasts(struct igb_adapter *adapter,
+				  u32 *msgbuf, u32 vf)
+{
+	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+	u16 *hash_list = (u16 *)&msgbuf[1];
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	int i;
+
+	/* salt away the number of multicast addresses assigned
+	 * to this VF for later use to restore when the PF multi cast
+	 * list changes
+	 */
+	vf_data->num_vf_mc_hashes = n;
+
+	/* only up to 30 hash values supported */
+	if (n > 30)
+		n = 30;
+
+	/* store the hashes for later use */
+	for (i = 0; i < n; i++)
+		vf_data->vf_mc_hashes[i] = hash_list[i];
+
+	/* Flush and reset the mta with the new values */
+	igb_set_rx_mode(adapter->netdev);
+
+	return 0;
+}
+
+static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct vf_data_storage *vf_data;
+	int i, j;
+
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		u32 vmolr = rd32(E1000_VMOLR(i));
+
+		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
+
+		vf_data = &adapter->vf_data[i];
+
+		if ((vf_data->num_vf_mc_hashes > 30) ||
+		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
+			vmolr |= E1000_VMOLR_MPME;
+		} else if (vf_data->num_vf_mc_hashes) {
+			vmolr |= E1000_VMOLR_ROMPE;
+			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
+				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
+		}
+		wr32(E1000_VMOLR(i), vmolr);
+	}
+}
+
+static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 pool_mask, vlvf_mask, i;
+
+	/* create mask for VF and other pools */
+	pool_mask = E1000_VLVF_POOLSEL_MASK;
+	vlvf_mask = BIT(E1000_VLVF_POOLSEL_SHIFT + vf);
+
+	/* drop PF from pool bits */
+	pool_mask &= ~BIT(E1000_VLVF_POOLSEL_SHIFT +
+			     adapter->vfs_allocated_count);
+
+	/* Find the vlan filter for this id */
+	for (i = E1000_VLVF_ARRAY_SIZE; i--;) {
+		u32 vlvf = rd32(E1000_VLVF(i));
+		u32 vfta_mask, vid, vfta;
+
+		/* remove the vf from the pool */
+		if (!(vlvf & vlvf_mask))
+			continue;
+
+		/* clear out bit from VLVF */
+		vlvf ^= vlvf_mask;
+
+		/* if other pools are present, just remove ourselves */
+		if (vlvf & pool_mask)
+			goto update_vlvfb;
+
+		/* if PF is present, leave VFTA */
+		if (vlvf & E1000_VLVF_POOLSEL_MASK)
+			goto update_vlvf;
+
+		vid = vlvf & E1000_VLVF_VLANID_MASK;
+		vfta_mask = BIT(vid % 32);
+
+		/* clear bit from VFTA */
+		vfta = adapter->shadow_vfta[vid / 32];
+		if (vfta & vfta_mask)
+			hw->mac.ops.write_vfta(hw, vid / 32, vfta ^ vfta_mask);
+update_vlvf:
+		/* clear pool selection enable */
+		if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
+			vlvf &= E1000_VLVF_POOLSEL_MASK;
+		else
+			vlvf = 0;
+update_vlvfb:
+		/* clear pool bits */
+		wr32(E1000_VLVF(i), vlvf);
+	}
+}
+
+static int igb_find_vlvf_entry(struct e1000_hw *hw, u32 vlan)
+{
+	u32 vlvf;
+	int idx;
+
+	/* short cut the special case */
+	if (vlan == 0)
+		return 0;
+
+	/* Search for the VLAN id in the VLVF entries */
+	for (idx = E1000_VLVF_ARRAY_SIZE; --idx;) {
+		vlvf = rd32(E1000_VLVF(idx));
+		if ((vlvf & VLAN_VID_MASK) == vlan)
+			break;
+	}
+
+	return idx;
+}
+
+static void igb_update_pf_vlvf(struct igb_adapter *adapter, u32 vid)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 bits, pf_id;
+	int idx;
+
+	idx = igb_find_vlvf_entry(hw, vid);
+	if (!idx)
+		return;
+
+	/* See if any other pools are set for this VLAN filter
+	 * entry other than the PF.
+	 */
+	pf_id = adapter->vfs_allocated_count + E1000_VLVF_POOLSEL_SHIFT;
+	bits = ~BIT(pf_id) & E1000_VLVF_POOLSEL_MASK;
+	bits &= rd32(E1000_VLVF(idx));
+
+	/* Disable the filter so this falls into the default pool. */
+	if (!bits) {
+		if (adapter->flags & IGB_FLAG_VLAN_PROMISC)
+			wr32(E1000_VLVF(idx), BIT(pf_id));
+		else
+			wr32(E1000_VLVF(idx), 0);
+	}
+}
+
+static s32 igb_set_vf_vlan(struct igb_adapter *adapter, u32 vid,
+			   bool add, u32 vf)
+{
+	int pf_id = adapter->vfs_allocated_count;
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	/* If VLAN overlaps with one the PF is currently monitoring make
+	 * sure that we are able to allocate a VLVF entry.  This may be
+	 * redundant but it guarantees PF will maintain visibility to
+	 * the VLAN.
+	 */
+	if (add && test_bit(vid, adapter->active_vlans)) {
+		err = igb_vfta_set(hw, vid, pf_id, true, false);
+		if (err)
+			return err;
+	}
+
+	err = igb_vfta_set(hw, vid, vf, add, false);
+
+	if (add && !err)
+		return err;
+
+	/* If we failed to add the VF VLAN or we are removing the VF VLAN
+	 * we may need to drop the PF pool bit in order to allow us to free
+	 * up the VLVF resources.
+	 */
+	if (test_bit(vid, adapter->active_vlans) ||
+	    (adapter->flags & IGB_FLAG_VLAN_PROMISC))
+		igb_update_pf_vlvf(adapter, vid);
+
+	return err;
+}
+
+static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (vid)
+		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
+	else
+		wr32(E1000_VMVIR(vf), 0);
+}
+
+static int igb_enable_port_vlan(struct igb_adapter *adapter, int vf,
+				u16 vlan, u8 qos)
+{
+	int err;
+
+	err = igb_set_vf_vlan(adapter, vlan, true, vf);
+	if (err)
+		return err;
+
+	igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
+	igb_set_vmolr(adapter, vf, !vlan);
+
+	/* revoke access to previous VLAN */
+	if (vlan != adapter->vf_data[vf].pf_vlan)
+		igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan,
+				false, vf);
+
+	adapter->vf_data[vf].pf_vlan = vlan;
+	adapter->vf_data[vf].pf_qos = qos;
+	igb_set_vf_vlan_strip(adapter, vf, true);
+	dev_info(&adapter->pdev->dev,
+		 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
+	if (test_bit(__IGB_DOWN, &adapter->state)) {
+		dev_warn(&adapter->pdev->dev,
+			 "The VF VLAN has been set, but the PF device is not up.\n");
+		dev_warn(&adapter->pdev->dev,
+			 "Bring the PF device up before attempting to use the VF device.\n");
+	}
+
+	return err;
+}
+
+static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf)
+{
+	/* Restore tagless access via VLAN 0 */
+	igb_set_vf_vlan(adapter, 0, true, vf);
+
+	igb_set_vmvir(adapter, 0, vf);
+	igb_set_vmolr(adapter, vf, true);
+
+	/* Remove any PF assigned VLAN */
+	if (adapter->vf_data[vf].pf_vlan)
+		igb_set_vf_vlan(adapter, adapter->vf_data[vf].pf_vlan,
+				false, vf);
+
+	adapter->vf_data[vf].pf_vlan = 0;
+	adapter->vf_data[vf].pf_qos = 0;
+	igb_set_vf_vlan_strip(adapter, vf, false);
+
+	return 0;
+}
+
+static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,
+			       u16 vlan, u8 qos, __be16 vlan_proto)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
+		return -EINVAL;
+
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) :
+			       igb_disable_port_vlan(adapter, vf);
+}
+
+static int igb_set_vf_vlan_msg(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
+	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
+	int ret;
+
+	if (adapter->vf_data[vf].pf_vlan)
+		return -1;
+
+	/* VLAN 0 is a special case, don't allow it to be removed */
+	if (!vid && !add)
+		return 0;
+
+	ret = igb_set_vf_vlan(adapter, vid, !!add, vf);
+	if (!ret)
+		igb_set_vf_vlan_strip(adapter, vf, !!vid);
+	return ret;
+}
+
+static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
+{
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+
+	/* clear flags - except flag that indicates PF has set the MAC */
+	vf_data->flags &= IGB_VF_FLAG_PF_SET_MAC;
+	vf_data->last_nack = jiffies;
+
+	/* reset vlans for device */
+	igb_clear_vf_vfta(adapter, vf);
+	igb_set_vf_vlan(adapter, vf_data->pf_vlan, true, vf);
+	igb_set_vmvir(adapter, vf_data->pf_vlan |
+			       (vf_data->pf_qos << VLAN_PRIO_SHIFT), vf);
+	igb_set_vmolr(adapter, vf, !vf_data->pf_vlan);
+	igb_set_vf_vlan_strip(adapter, vf, !!(vf_data->pf_vlan));
+
+	/* reset multicast table array for vf */
+	adapter->vf_data[vf].num_vf_mc_hashes = 0;
+
+	/* Flush and reset the mta with the new values */
+	igb_set_rx_mode(adapter->netdev);
+}
+
+static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
+{
+	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+
+	/* clear mac address as we were hotplug removed/added */
+	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
+		eth_zero_addr(vf_mac);
+
+	/* process remaining reset events */
+	igb_vf_reset(adapter, vf);
+}
+
+static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
+	u32 reg, msgbuf[3] = {};
+	u8 *addr = (u8 *)(&msgbuf[1]);
+
+	/* process all the same items cleared in a function level reset */
+	igb_vf_reset(adapter, vf);
+
+	/* set vf mac address */
+	igb_set_vf_mac(adapter, vf, vf_mac);
+
+	/* enable transmit and receive for vf */
+	reg = rd32(E1000_VFTE);
+	wr32(E1000_VFTE, reg | BIT(vf));
+	reg = rd32(E1000_VFRE);
+	wr32(E1000_VFRE, reg | BIT(vf));
+
+	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
+
+	/* reply to reset with ack and vf mac address */
+	if (!is_zero_ether_addr(vf_mac)) {
+		msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
+		memcpy(addr, vf_mac, ETH_ALEN);
+	} else {
+		msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_NACK;
+	}
+	igb_write_mbx(hw, msgbuf, 3, vf);
+}
+
+static void igb_flush_mac_table(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < hw->mac.rar_entry_count; i++) {
+		adapter->mac_table[i].state &= ~IGB_MAC_STATE_IN_USE;
+		eth_zero_addr(adapter->mac_table[i].addr);
+		adapter->mac_table[i].queue = 0;
+		igb_rar_set_index(adapter, i);
+	}
+}
+
+static int igb_available_rars(struct igb_adapter *adapter, u8 queue)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	/* do not count rar entries reserved for VFs MAC addresses */
+	int rar_entries = hw->mac.rar_entry_count -
+			  adapter->vfs_allocated_count;
+	int i, count = 0;
+
+	for (i = 0; i < rar_entries; i++) {
+		/* do not count default entries */
+		if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT)
+			continue;
+
+		/* do not count "in use" entries for different queues */
+		if ((adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) &&
+		    (adapter->mac_table[i].queue != queue))
+			continue;
+
+		count++;
+	}
+
+	return count;
+}
+
+/* Set default MAC address for the PF in the first RAR entry */
+static void igb_set_default_mac_filter(struct igb_adapter *adapter)
+{
+	struct igb_mac_addr *mac_table = &adapter->mac_table[0];
+
+	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
+	mac_table->queue = adapter->vfs_allocated_count;
+	mac_table->state = IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE;
+
+	igb_rar_set_index(adapter, 0);
+}
+
+/* If the filter to be added and an already existing filter express
+ * the same address and address type, it should be possible to only
+ * override the other configurations, for example the queue to steer
+ * traffic.
+ */
+static bool igb_mac_entry_can_be_used(const struct igb_mac_addr *entry,
+				      const u8 *addr, const u8 flags)
+{
+	if (!(entry->state & IGB_MAC_STATE_IN_USE))
+		return true;
+
+	if ((entry->state & IGB_MAC_STATE_SRC_ADDR) !=
+	    (flags & IGB_MAC_STATE_SRC_ADDR))
+		return false;
+
+	if (!ether_addr_equal(addr, entry->addr))
+		return false;
+
+	return true;
+}
+
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGB_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igb_add_mac_filter_flags(struct igb_adapter *adapter,
+				    const u8 *addr, const u8 queue,
+				    const u8 flags)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count -
+			  adapter->vfs_allocated_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for the first empty entry in the MAC table.
+	 * Do not touch entries at the end of the table reserved for the VF MAC
+	 * addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!igb_mac_entry_can_be_used(&adapter->mac_table[i],
+					       addr, flags))
+			continue;
+
+		ether_addr_copy(adapter->mac_table[i].addr, addr);
+		adapter->mac_table[i].queue = queue;
+		adapter->mac_table[i].state |= IGB_MAC_STATE_IN_USE | flags;
+
+		igb_rar_set_index(adapter, i);
+		return i;
+	}
+
+	return -ENOSPC;
+}
+
+static int igb_add_mac_filter(struct igb_adapter *adapter, const u8 *addr,
+			      const u8 queue)
+{
+	return igb_add_mac_filter_flags(adapter, addr, queue, 0);
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGB_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igb_del_mac_filter_flags(struct igb_adapter *adapter,
+				    const u8 *addr, const u8 queue,
+				    const u8 flags)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	int rar_entries = hw->mac.rar_entry_count -
+			  adapter->vfs_allocated_count;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* Search for matching entry in the MAC table based on given address
+	 * and queue. Do not touch entries at the end of the table reserved
+	 * for the VF MAC addresses.
+	 */
+	for (i = 0; i < rar_entries; i++) {
+		if (!(adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE))
+			continue;
+		if ((adapter->mac_table[i].state & flags) != flags)
+			continue;
+		if (adapter->mac_table[i].queue != queue)
+			continue;
+		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
+			continue;
+
+		/* When a filter for the default address is "deleted",
+		 * we return it to its initial configuration
+		 */
+		if (adapter->mac_table[i].state & IGB_MAC_STATE_DEFAULT) {
+			adapter->mac_table[i].state =
+				IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE;
+			adapter->mac_table[i].queue =
+				adapter->vfs_allocated_count;
+		} else {
+			adapter->mac_table[i].state = 0;
+			adapter->mac_table[i].queue = 0;
+			eth_zero_addr(adapter->mac_table[i].addr);
+		}
+
+		igb_rar_set_index(adapter, i);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int igb_del_mac_filter(struct igb_adapter *adapter, const u8 *addr,
+			      const u8 queue)
+{
+	return igb_del_mac_filter_flags(adapter, addr, queue, 0);
+}
+
+int igb_add_mac_steering_filter(struct igb_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* In theory, this should be supported on 82575 as well, but
+	 * that part wasn't easily accessible during development.
+	 */
+	if (hw->mac.type != e1000_i210)
+		return -EOPNOTSUPP;
+
+	return igb_add_mac_filter_flags(adapter, addr, queue,
+					IGB_MAC_STATE_QUEUE_STEERING | flags);
+}
+
+int igb_del_mac_steering_filter(struct igb_adapter *adapter,
+				const u8 *addr, u8 queue, u8 flags)
+{
+	return igb_del_mac_filter_flags(adapter, addr, queue,
+					IGB_MAC_STATE_QUEUE_STEERING | flags);
+}
+
+static int igb_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	ret = igb_add_mac_filter(adapter, addr, adapter->vfs_allocated_count);
+
+	return min_t(int, ret, 0);
+}
+
+static int igb_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	igb_del_mac_filter(adapter, addr, adapter->vfs_allocated_count);
+
+	return 0;
+}
+
+static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf,
+				 const u32 info, const u8 *addr)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	struct list_head *pos;
+	struct vf_mac_filter *entry = NULL;
+	int ret = 0;
+
+	if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+	    !vf_data->trusted) {
+		dev_warn(&pdev->dev,
+			 "VF %d requested MAC filter but is administratively denied\n",
+			  vf);
+		return -EINVAL;
+	}
+	if (!is_valid_ether_addr(addr)) {
+		dev_warn(&pdev->dev,
+			 "VF %d attempted to set invalid MAC filter\n",
+			  vf);
+		return -EINVAL;
+	}
+
+	switch (info) {
+	case E1000_VF_MAC_FILTER_CLR:
+		/* remove all unicast MAC filters related to the current VF */
+		list_for_each(pos, &adapter->vf_macs.l) {
+			entry = list_entry(pos, struct vf_mac_filter, l);
+			if (entry->vf == vf) {
+				entry->vf = -1;
+				entry->free = true;
+				igb_del_mac_filter(adapter, entry->vf_mac, vf);
+			}
+		}
+		break;
+	case E1000_VF_MAC_FILTER_ADD:
+		/* try to find empty slot in the list */
+		list_for_each(pos, &adapter->vf_macs.l) {
+			entry = list_entry(pos, struct vf_mac_filter, l);
+			if (entry->free)
+				break;
+		}
+
+		if (entry && entry->free) {
+			entry->free = false;
+			entry->vf = vf;
+			ether_addr_copy(entry->vf_mac, addr);
+
+			ret = igb_add_mac_filter(adapter, addr, vf);
+			ret = min_t(int, ret, 0);
+		} else {
+			ret = -ENOSPC;
+		}
+
+		if (ret == -ENOSPC)
+			dev_warn(&pdev->dev,
+				 "VF %d has requested MAC filter but there is no space for it\n",
+				 vf);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	u32 info = msg[0] & E1000_VT_MSGINFO_MASK;
+
+	/* The VF MAC Address is stored in a packed array of bytes
+	 * starting at the second 32 bit word of the msg array
+	 */
+	unsigned char *addr = (unsigned char *)&msg[1];
+	int ret = 0;
+
+	if (!info) {
+		if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) &&
+		    !vf_data->trusted) {
+			dev_warn(&pdev->dev,
+				 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
+				 vf);
+			return -EINVAL;
+		}
+
+		if (!is_valid_ether_addr(addr)) {
+			dev_warn(&pdev->dev,
+				 "VF %d attempted to set invalid MAC\n",
+				 vf);
+			return -EINVAL;
+		}
+
+		ret = igb_set_vf_mac(adapter, vf, addr);
+	} else {
+		ret = igb_set_vf_mac_filter(adapter, vf, info, addr);
+	}
+
+	return ret;
+}
+
+static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	u32 msg = E1000_VT_MSGTYPE_NACK;
+
+	/* if device isn't clear to send it shouldn't be reading either */
+	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
+	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
+		igb_write_mbx(hw, &msg, 1, vf);
+		vf_data->last_nack = jiffies;
+	}
+}
+
+static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	u32 msgbuf[E1000_VFMAILBOX_SIZE];
+	struct e1000_hw *hw = &adapter->hw;
+	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
+	s32 retval;
+
+	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf, false);
+
+	if (retval) {
+		/* if receive failed revoke VF CTS stats and restart init */
+		dev_err(&pdev->dev, "Error receiving message from VF\n");
+		vf_data->flags &= ~IGB_VF_FLAG_CTS;
+		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
+			goto unlock;
+		goto out;
+	}
+
+	/* this is a message we already processed, do nothing */
+	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
+		goto unlock;
+
+	/* until the vf completes a reset it should not be
+	 * allowed to start any configuration.
+	 */
+	if (msgbuf[0] == E1000_VF_RESET) {
+		/* unlocks mailbox */
+		igb_vf_reset_msg(adapter, vf);
+		return;
+	}
+
+	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
+		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
+			goto unlock;
+		retval = -1;
+		goto out;
+	}
+
+	switch ((msgbuf[0] & 0xFFFF)) {
+	case E1000_VF_SET_MAC_ADDR:
+		retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
+		break;
+	case E1000_VF_SET_PROMISC:
+		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
+		break;
+	case E1000_VF_SET_MULTICAST:
+		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
+		break;
+	case E1000_VF_SET_LPE:
+		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
+		break;
+	case E1000_VF_SET_VLAN:
+		retval = -1;
+		if (vf_data->pf_vlan)
+			dev_warn(&pdev->dev,
+				 "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n",
+				 vf);
+		else
+			retval = igb_set_vf_vlan_msg(adapter, msgbuf, vf);
+		break;
+	default:
+		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
+		retval = -1;
+		break;
+	}
+
+	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
+out:
+	/* notify the VF of the results of what it sent us */
+	if (retval)
+		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
+	else
+		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
+
+	/* unlocks mailbox */
+	igb_write_mbx(hw, msgbuf, 1, vf);
+	return;
+
+unlock:
+	igb_unlock_mbx(hw, vf);
+}
+
+static void igb_msg_task(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+	u32 vf;
+
+	spin_lock_irqsave(&adapter->vfs_lock, flags);
+	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
+		/* process any reset requests */
+		if (!igb_check_for_rst(hw, vf))
+			igb_vf_reset_event(adapter, vf);
+
+		/* process any messages pending */
+		if (!igb_check_for_msg(hw, vf))
+			igb_rcv_msg_from_vf(adapter, vf);
+
+		/* process any acks */
+		if (!igb_check_for_ack(hw, vf))
+			igb_rcv_ack_from_vf(adapter, vf);
+	}
+	spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+}
+
+/**
+ *  igb_set_uta - Set unicast filter table address
+ *  @adapter: board private structure
+ *  @set: boolean indicating if we are setting or clearing bits
+ *
+ *  The unicast table address is a register array of 32-bit registers.
+ *  The table is meant to be used in a way similar to how the MTA is used
+ *  however due to certain limitations in the hardware it is necessary to
+ *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
+ *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
+ **/
+static void igb_set_uta(struct igb_adapter *adapter, bool set)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 uta = set ? ~0 : 0;
+	int i;
+
+	/* we only need to do this if VMDq is enabled */
+	if (!adapter->vfs_allocated_count)
+		return;
+
+	for (i = hw->mac.uta_reg_count; i--;)
+		array_wr32(E1000_UTA, i, uta);
+}
+
+/**
+ *  igb_intr_msi - Interrupt Handler
+ *  @irq: interrupt number
+ *  @data: pointer to a network interface device structure
+ **/
+static irqreturn_t igb_intr_msi(int irq, void *data)
+{
+	struct igb_adapter *adapter = data;
+	struct igb_q_vector *q_vector = adapter->q_vector[0];
+	struct e1000_hw *hw = &adapter->hw;
+	/* read ICR disables interrupts using IAM */
+	u32 icr = rd32(E1000_ICR);
+
+	igb_write_itr(q_vector);
+
+	if (icr & E1000_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & E1000_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		if (!test_bit(__IGB_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	if (icr & E1000_ICR_TS)
+		igb_tsync_interrupt(adapter);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ *  igb_intr - Legacy Interrupt Handler
+ *  @irq: interrupt number
+ *  @data: pointer to a network interface device structure
+ **/
+static irqreturn_t igb_intr(int irq, void *data)
+{
+	struct igb_adapter *adapter = data;
+	struct igb_q_vector *q_vector = adapter->q_vector[0];
+	struct e1000_hw *hw = &adapter->hw;
+	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
+	 * need for the IMC write
+	 */
+	u32 icr = rd32(E1000_ICR);
+
+	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+	 * not set, then the adapter didn't send an interrupt
+	 */
+	if (!(icr & E1000_ICR_INT_ASSERTED))
+		return IRQ_NONE;
+
+	igb_write_itr(q_vector);
+
+	if (icr & E1000_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & E1000_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
+		hw->mac.get_link_status = 1;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGB_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	if (icr & E1000_ICR_TS)
+		igb_tsync_interrupt(adapter);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+
+	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
+			igb_set_itr(q_vector);
+		else
+			igb_update_ring_itr(q_vector);
+	}
+
+	if (!test_bit(__IGB_DOWN, &adapter->state)) {
+		if (adapter->flags & IGB_FLAG_HAS_MSIX)
+			wr32(E1000_EIMS, q_vector->eims_value);
+		else
+			igb_irq_enable(adapter);
+	}
+}
+
+/**
+ *  igb_poll - NAPI Rx polling callback
+ *  @napi: napi polling structure
+ *  @budget: count of how many packets we should handle
+ **/
+static int igb_poll(struct napi_struct *napi, int budget)
+{
+	struct igb_q_vector *q_vector = container_of(napi,
+						     struct igb_q_vector,
+						     napi);
+	bool clean_complete = true;
+	int work_done = 0;
+
+#ifdef CONFIG_IGB_DCA
+	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
+		igb_update_dca(q_vector);
+#endif
+	if (q_vector->tx.ring)
+		clean_complete = igb_clean_tx_irq(q_vector, budget);
+
+	if (q_vector->rx.ring) {
+		int cleaned = igb_clean_rx_irq(q_vector, budget);
+
+		work_done += cleaned;
+		if (cleaned >= budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		igb_ring_irq_enable(q_vector);
+
+	return work_done;
+}
+
+/**
+ *  igb_clean_tx_irq - Reclaim resources after transmit completes
+ *  @q_vector: pointer to q_vector containing needed info
+ *  @napi_budget: Used to determine if we are in netpoll
+ *
+ *  returns true if ring is completely cleaned
+ **/
+static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct igb_ring *tx_ring = q_vector->tx.ring;
+	struct igb_tx_buffer *tx_buffer;
+	union e1000_adv_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	unsigned int i = tx_ring->next_to_clean;
+
+	if (test_bit(__IGB_DOWN, &adapter->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IGB_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+
+		/* free the skb */
+		if (tx_buffer->type == IGB_TYPE_SKB)
+			napi_consume_skb(tx_buffer->skb, napi_budget);
+		else
+			xdp_return_frame(tx_buffer->xdpf);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* clear last DMA location and unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGB_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = IGB_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->tx_syncp);
+	tx_ring->tx_stats.bytes += total_bytes;
+	tx_ring->tx_stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->tx_syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
+		struct e1000_hw *hw = &adapter->hw;
+
+		/* Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i
+		 */
+		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+		if (tx_buffer->next_to_watch &&
+		    time_after(jiffies, tx_buffer->time_stamp +
+			       (adapter->tx_timeout_factor * HZ)) &&
+		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
+
+			/* detected Tx unit hang */
+			dev_err(tx_ring->dev,
+				"Detected Tx Unit Hang\n"
+				"  Tx Queue             <%d>\n"
+				"  TDH                  <%x>\n"
+				"  TDT                  <%x>\n"
+				"  next_to_use          <%x>\n"
+				"  next_to_clean        <%x>\n"
+				"buffer_info[next_to_clean]\n"
+				"  time_stamp           <%lx>\n"
+				"  next_to_watch        <%p>\n"
+				"  jiffies              <%lx>\n"
+				"  desc.status          <%x>\n",
+				tx_ring->queue_index,
+				rd32(E1000_TDH(tx_ring->reg_idx)),
+				readl(tx_ring->tail),
+				tx_ring->next_to_use,
+				tx_ring->next_to_clean,
+				tx_buffer->time_stamp,
+				tx_buffer->next_to_watch,
+				jiffies,
+				tx_buffer->next_to_watch->wb.status);
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			/* we are about to reset, no point in enabling stuff */
+			return true;
+		}
+	}
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	if (unlikely(total_packets &&
+	    netif_carrier_ok(tx_ring->netdev) &&
+	    igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    !(test_bit(__IGB_DOWN, &adapter->state))) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			u64_stats_update_begin(&tx_ring->tx_syncp);
+			tx_ring->tx_stats.restart_queue++;
+			u64_stats_update_end(&tx_ring->tx_syncp);
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ *  igb_reuse_rx_page - page flip buffer and store it back on the ring
+ *  @rx_ring: rx descriptor ring to store buffers on
+ *  @old_buff: donor buffer to have page reused
+ *
+ *  Synchronizes page for reuse by the adapter
+ **/
+static void igb_reuse_rx_page(struct igb_ring *rx_ring,
+			      struct igb_rx_buffer *old_buff)
+{
+	struct igb_rx_buffer *new_buff;
+	u16 nta = rx_ring->next_to_alloc;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls.
+	 */
+	new_buff->dma		= old_buff->dma;
+	new_buff->page		= old_buff->page;
+	new_buff->page_offset	= old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+}
+
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
+				  int rx_buf_pgcnt)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* avoid re-using remote and pfmemalloc pages */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
+		return false;
+#else
+#define IGB_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IGB_RXBUFFER_2048)
+
+	if (rx_buffer->page_offset > IGB_LAST_OFFSET)
+		return false;
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ *  igb_add_rx_frag - Add contents of Rx buffer to sk_buff
+ *  @rx_ring: rx descriptor ring to transact packets on
+ *  @rx_buffer: buffer containing page to add
+ *  @skb: sk_buff to place the data into
+ *  @size: size of buffer to be added
+ *
+ *  This function will add the data contained in rx_buffer->page to the skb.
+ **/
+static void igb_add_rx_frag(struct igb_ring *rx_ring,
+			    struct igb_rx_buffer *rx_buffer,
+			    struct sk_buff *skb,
+			    unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IGB_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
+#endif
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
+					 struct igb_rx_buffer *rx_buffer,
+					 struct xdp_buff *xdp,
+					 ktime_t timestamp)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	unsigned int size = xdp->data_end - xdp->data;
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data);
+
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
+	if (unlikely(!skb))
+		return NULL;
+
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IGB_RX_HDR_LEN)
+		headlen = eth_get_headlen(skb->dev, xdp->data, IGB_RX_HDR_LEN);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen, sizeof(long)));
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(xdp->data + headlen) - page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
+	} else {
+		rx_buffer->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
+				     struct igb_rx_buffer *rx_buffer,
+				     struct xdp_buff *xdp,
+				     ktime_t timestamp)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data_meta);
+
+	/* build an skb around the page buffer */
+	skb = napi_build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
+
+static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
+				   struct igb_ring *rx_ring,
+				   struct xdp_buff *xdp)
+{
+	int err, result = IGB_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	u32 act;
+
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		result = igb_xdp_xmit_back(adapter, xdp);
+		if (result == IGB_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+		if (err)
+			goto out_failure;
+		result = IGB_XDP_REDIR;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_DROP:
+		result = IGB_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	return ERR_PTR(-result);
+}
+
+static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring,
+					  unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = igb_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = ring_uses_build_skb(rx_ring) ?
+		SKB_DATA_ALIGN(IGB_SKB_PAD + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
+static void igb_rx_buffer_flip(struct igb_ring *rx_ring,
+			       struct igb_rx_buffer *rx_buffer,
+			       unsigned int size)
+{
+	unsigned int truesize = igb_rx_frame_truesize(rx_ring, size);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+static inline void igb_rx_checksum(struct igb_ring *ring,
+				   union e1000_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* Ignore Checksum bit is set */
+	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
+		return;
+
+	/* Rx checksum disabled via ethtool */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* TCP/UDP checksum error bit is set */
+	if (igb_test_staterr(rx_desc,
+			     E1000_RXDEXT_STATERR_TCPE |
+			     E1000_RXDEXT_STATERR_IPE)) {
+		/* work around errata with sctp packets where the TCPE aka
+		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+		 * packets, (aka let the stack check the crc32c)
+		 */
+		if (!((skb->len == 60) &&
+		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
+			u64_stats_update_begin(&ring->rx_syncp);
+			ring->rx_stats.csum_err++;
+			u64_stats_update_end(&ring->rx_syncp);
+		}
+		/* let the stack verify checksum errors */
+		return;
+	}
+	/* It must be a TCP or UDP packet with a valid checksum */
+	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
+				      E1000_RXD_STAT_UDPCS))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	dev_dbg(ring->dev, "cksum success: bits %08X\n",
+		le32_to_cpu(rx_desc->wb.upper.status_error));
+}
+
+static inline void igb_rx_hash(struct igb_ring *ring,
+			       union e1000_adv_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	if (ring->netdev->features & NETIF_F_RXHASH)
+		skb_set_hash(skb,
+			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+			     PKT_HASH_TYPE_L3);
+}
+
+/**
+ *  igb_is_non_eop - process handling of non-EOP buffers
+ *  @rx_ring: Rx ring being processed
+ *  @rx_desc: Rx descriptor for current buffer
+ *
+ *  This function updates next to clean.  If the buffer is an EOP buffer
+ *  this function exits returning false, otherwise it will place the
+ *  sk_buff in the next buffer to be chained and return true indicating
+ *  that this is in fact a non-EOP buffer.
+ **/
+static bool igb_is_non_eop(struct igb_ring *rx_ring,
+			   union e1000_adv_rx_desc *rx_desc)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IGB_RX_DESC(rx_ring, ntc));
+
+	if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
+		return false;
+
+	return true;
+}
+
+/**
+ *  igb_cleanup_headers - Correct corrupted or empty headers
+ *  @rx_ring: rx descriptor ring packet is being transacted on
+ *  @rx_desc: pointer to the EOP Rx descriptor
+ *  @skb: pointer to current skb being fixed
+ *
+ *  Address the case where we are pulling data in on pages only
+ *  and as such no data is present in the skb header.
+ *
+ *  In addition if skb is not at least 60 bytes we need to pad it so that
+ *  it is large enough to qualify as a valid Ethernet frame.
+ *
+ *  Returns true if an error was encountered and skb was freed.
+ **/
+static bool igb_cleanup_headers(struct igb_ring *rx_ring,
+				union e1000_adv_rx_desc *rx_desc,
+				struct sk_buff *skb)
+{
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
+	if (unlikely((igb_test_staterr(rx_desc,
+				       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
+		struct net_device *netdev = rx_ring->netdev;
+		if (!(netdev->features & NETIF_F_RXALL)) {
+			dev_kfree_skb_any(skb);
+			return true;
+		}
+	}
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ *  igb_process_skb_fields - Populate skb header fields from Rx descriptor
+ *  @rx_ring: rx descriptor ring packet is being transacted on
+ *  @rx_desc: pointer to the EOP Rx descriptor
+ *  @skb: pointer to current skb being populated
+ *
+ *  This function checks the ring, descriptor, and packet information in
+ *  order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ *  other fields within the skb.
+ **/
+static void igb_process_skb_fields(struct igb_ring *rx_ring,
+				   union e1000_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	struct net_device *dev = rx_ring->netdev;
+
+	igb_rx_hash(rx_ring, rx_desc, skb);
+
+	igb_rx_checksum(rx_ring, rx_desc, skb);
+
+	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) &&
+	    !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))
+		igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
+	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
+		u16 vid;
+
+		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
+		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
+			vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
+		else
+			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+
+	skb_record_rx_queue(skb, rx_ring->queue_index);
+
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static unsigned int igb_rx_offset(struct igb_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IGB_SKB_PAD : 0;
+}
+
+static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
+					       const unsigned int size, int *rx_buf_pgcnt)
+{
+	struct igb_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	*rx_buf_pgcnt =
+#if (PAGE_SIZE < 8192)
+		page_count(rx_buffer->page);
+#else
+		0;
+#endif
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void igb_put_rx_buffer(struct igb_ring *rx_ring,
+			      struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt)
+{
+	if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) {
+		/* hand second half of page back to the ring */
+		igb_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* We are not reusing the buffer so unmap it and free
+		 * any references we are holding to it
+		 */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     igb_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     IGB_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
+static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct igb_ring *rx_ring = q_vector->rx.ring;
+	struct sk_buff *skb = rx_ring->skb;
+	unsigned int total_bytes = 0, total_packets = 0;
+	u16 cleaned_count = igb_desc_unused(rx_ring);
+	unsigned int xdp_xmit = 0;
+	struct xdp_buff xdp;
+	u32 frame_sz = 0;
+	int rx_buf_pgcnt;
+
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	frame_sz = igb_rx_frame_truesize(rx_ring, 0);
+#endif
+	xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
+
+	while (likely(total_packets < budget)) {
+		union e1000_adv_rx_desc *rx_desc;
+		struct igb_rx_buffer *rx_buffer;
+		ktime_t timestamp = 0;
+		int pkt_offset = 0;
+		unsigned int size;
+		void *pktbuf;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
+			igb_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
+		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
+
+		/* pull rx packet timestamp if available and valid */
+		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+			int ts_hdr_len;
+
+			ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector,
+							 pktbuf, &timestamp);
+
+			pkt_offset += ts_hdr_len;
+			size -= ts_hdr_len;
+		}
+
+		/* retrieve a buffer from the ring */
+		if (!skb) {
+			unsigned char *hard_start = pktbuf - igb_rx_offset(rx_ring);
+			unsigned int offset = pkt_offset + igb_rx_offset(rx_ring);
+
+			xdp_prepare_buff(&xdp, hard_start, offset, size, true);
+			xdp_buff_clear_frags_flag(&xdp);
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
+#endif
+			skb = igb_run_xdp(adapter, rx_ring, &xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
+				igb_rx_buffer_flip(rx_ring, rx_buffer, size);
+			} else {
+				rx_buffer->pagecnt_bias++;
+			}
+			total_packets++;
+			total_bytes += size;
+		} else if (skb)
+			igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else if (ring_uses_build_skb(rx_ring))
+			skb = igb_build_skb(rx_ring, rx_buffer, &xdp,
+					    timestamp);
+		else
+			skb = igb_construct_skb(rx_ring, rx_buffer,
+						&xdp, timestamp);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_failed++;
+			rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt);
+		cleaned_count++;
+
+		/* fetch next buffer in frame if non-eop */
+		if (igb_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_bytes += skb->len;
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+		napi_gro_receive(&q_vector->napi, skb);
+
+		/* reset skb pointer */
+		skb = NULL;
+
+		/* update budget accounting */
+		total_packets++;
+	}
+
+	/* place incomplete frames back on ring for completion */
+	rx_ring->skb = skb;
+
+	if (xdp_xmit & IGB_XDP_REDIR)
+		xdp_do_flush();
+
+	if (xdp_xmit & IGB_XDP_TX) {
+		struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+
+		igb_xdp_ring_update_tail(tx_ring);
+	}
+
+	u64_stats_update_begin(&rx_ring->rx_syncp);
+	rx_ring->rx_stats.packets += total_packets;
+	rx_ring->rx_stats.bytes += total_bytes;
+	u64_stats_update_end(&rx_ring->rx_syncp);
+	q_vector->rx.total_packets += total_packets;
+	q_vector->rx.total_bytes += total_bytes;
+
+	if (cleaned_count)
+		igb_alloc_rx_buffers(rx_ring, cleaned_count);
+
+	return total_packets;
+}
+
+static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
+				  struct igb_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(igb_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 igb_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 IGB_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, igb_rx_pg_order(rx_ring));
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = igb_rx_offset(rx_ring);
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
+
+	return true;
+}
+
+/**
+ *  igb_alloc_rx_buffers - Replace used receive buffers
+ *  @rx_ring: rx descriptor ring to allocate new receive buffers
+ *  @cleaned_count: count of buffers to allocate
+ **/
+void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
+{
+	union e1000_adv_rx_desc *rx_desc;
+	struct igb_rx_buffer *bi;
+	u16 i = rx_ring->next_to_use;
+	u16 bufsz;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = IGB_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	bufsz = igb_rx_bufsz(rx_ring);
+
+	do {
+		if (!igb_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset, bufsz,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IGB_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		/* record the next descriptor to use */
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		dma_wmb();
+		writel(i, rx_ring->tail);
+	}
+}
+
+/**
+ * igb_mii_ioctl -
+ * @netdev: pointer to netdev struct
+ * @ifr: interface structure
+ * @cmd: ioctl command to execute
+ **/
+static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	if (adapter->hw.phy.media_type != e1000_media_type_copper)
+		return -EOPNOTSUPP;
+
+	switch (cmd) {
+	case SIOCGMIIPHY:
+		data->phy_id = adapter->hw.phy.addr;
+		break;
+	case SIOCGMIIREG:
+		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
+				     &data->val_out))
+			return -EIO;
+		break;
+	case SIOCSMIIREG:
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+/**
+ * igb_ioctl -
+ * @netdev: pointer to netdev struct
+ * @ifr: interface structure
+ * @cmd: ioctl command to execute
+ **/
+static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCGMIIPHY:
+	case SIOCGMIIREG:
+	case SIOCSMIIREG:
+		return igb_mii_ioctl(netdev, ifr, cmd);
+	case SIOCGHWTSTAMP:
+		return igb_ptp_get_ts_config(netdev, ifr);
+	case SIOCSHWTSTAMP:
+		return igb_ptp_set_ts_config(netdev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+	struct igb_adapter *adapter = hw->back;
+
+	pci_read_config_word(adapter->pdev, reg, value);
+}
+
+void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+	struct igb_adapter *adapter = hw->back;
+
+	pci_write_config_word(adapter->pdev, reg, *value);
+}
+
+s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+	struct igb_adapter *adapter = hw->back;
+
+	if (pcie_capability_read_word(adapter->pdev, reg, value))
+		return -E1000_ERR_CONFIG;
+
+	return 0;
+}
+
+s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+	struct igb_adapter *adapter = hw->back;
+
+	if (pcie_capability_write_word(adapter->pdev, reg, *value))
+		return -E1000_ERR_CONFIG;
+
+	return 0;
+}
+
+static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl, rctl;
+	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+
+	if (enable) {
+		/* enable VLAN tag insert/strip */
+		ctrl = rd32(E1000_CTRL);
+		ctrl |= E1000_CTRL_VME;
+		wr32(E1000_CTRL, ctrl);
+
+		/* Disable CFI check */
+		rctl = rd32(E1000_RCTL);
+		rctl &= ~E1000_RCTL_CFIEN;
+		wr32(E1000_RCTL, rctl);
+	} else {
+		/* disable VLAN tag insert/strip */
+		ctrl = rd32(E1000_CTRL);
+		ctrl &= ~E1000_CTRL_VME;
+		wr32(E1000_CTRL, ctrl);
+	}
+
+	igb_set_vf_vlan_strip(adapter, adapter->vfs_allocated_count, enable);
+}
+
+static int igb_vlan_rx_add_vid(struct net_device *netdev,
+			       __be16 proto, u16 vid)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int pf_id = adapter->vfs_allocated_count;
+
+	/* add the filter since PF can receive vlans w/o entry in vlvf */
+	if (!vid || !(adapter->flags & IGB_FLAG_VLAN_PROMISC))
+		igb_vfta_set(hw, vid, pf_id, true, !!vid);
+
+	set_bit(vid, adapter->active_vlans);
+
+	return 0;
+}
+
+static int igb_vlan_rx_kill_vid(struct net_device *netdev,
+				__be16 proto, u16 vid)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	int pf_id = adapter->vfs_allocated_count;
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* remove VID from filter table */
+	if (vid && !(adapter->flags & IGB_FLAG_VLAN_PROMISC))
+		igb_vfta_set(hw, vid, pf_id, false, true);
+
+	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
+}
+
+static void igb_restore_vlan(struct igb_adapter *adapter)
+{
+	u16 vid = 1;
+
+	igb_vlan_mode(adapter->netdev, adapter->netdev->features);
+	igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0);
+
+	for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID)
+		igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
+}
+
+int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+
+	mac->autoneg = 0;
+
+	/* Make sure dplx is at most 1 bit and lsb of speed is not set
+	 * for the switch() below to work
+	 */
+	if ((spd & 1) || (dplx & ~1))
+		goto err_inval;
+
+	/* Fiber NIC's only allow 1000 gbps Full duplex
+	 * and 100Mbps Full duplex for 100baseFx sfp
+	 */
+	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
+		switch (spd + dplx) {
+		case SPEED_10 + DUPLEX_HALF:
+		case SPEED_10 + DUPLEX_FULL:
+		case SPEED_100 + DUPLEX_HALF:
+			goto err_inval;
+		default:
+			break;
+		}
+	}
+
+	switch (spd + dplx) {
+	case SPEED_10 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_10_HALF;
+		break;
+	case SPEED_10 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_10_FULL;
+		break;
+	case SPEED_100 + DUPLEX_HALF:
+		mac->forced_speed_duplex = ADVERTISE_100_HALF;
+		break;
+	case SPEED_100 + DUPLEX_FULL:
+		mac->forced_speed_duplex = ADVERTISE_100_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_FULL:
+		mac->autoneg = 1;
+		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
+		break;
+	case SPEED_1000 + DUPLEX_HALF: /* not supported */
+	default:
+		goto err_inval;
+	}
+
+	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
+	adapter->hw.phy.mdix = AUTO_ALL_MODES;
+
+	return 0;
+
+err_inval:
+	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
+	return -EINVAL;
+}
+
+static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
+			  bool runtime)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ctrl, rctl, status;
+	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
+	bool wake;
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		__igb_close(netdev, true);
+
+	igb_ptp_suspend(adapter);
+
+	igb_clear_interrupt_scheme(adapter);
+	rtnl_unlock();
+
+	status = rd32(E1000_STATUS);
+	if (status & E1000_STATUS_LU)
+		wufc &= ~E1000_WUFC_LNKC;
+
+	if (wufc) {
+		igb_setup_rctl(adapter);
+		igb_set_rx_mode(netdev);
+
+		/* turn on all-multi mode if wake on multicast is enabled */
+		if (wufc & E1000_WUFC_MC) {
+			rctl = rd32(E1000_RCTL);
+			rctl |= E1000_RCTL_MPE;
+			wr32(E1000_RCTL, rctl);
+		}
+
+		ctrl = rd32(E1000_CTRL);
+		ctrl |= E1000_CTRL_ADVD3WUC;
+		wr32(E1000_CTRL, ctrl);
+
+		/* Allow time for pending master requests to run */
+		igb_disable_pcie_master(hw);
+
+		wr32(E1000_WUC, E1000_WUC_PME_EN);
+		wr32(E1000_WUFC, wufc);
+	} else {
+		wr32(E1000_WUC, 0);
+		wr32(E1000_WUFC, 0);
+	}
+
+	wake = wufc || adapter->en_mng_pt;
+	if (!wake)
+		igb_power_down_link(adapter);
+	else
+		igb_power_up_link(adapter);
+
+	if (enable_wake)
+		*enable_wake = wake;
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	igb_release_hw_control(adapter);
+
+	pci_disable_device(pdev);
+
+	return 0;
+}
+
+static void igb_deliver_wake_packet(struct net_device *netdev)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct sk_buff *skb;
+	u32 wupl;
+
+	wupl = rd32(E1000_WUPL) & E1000_WUPL_MASK;
+
+	/* WUPM stores only the first 128 bytes of the wake packet.
+	 * Read the packet only if we have the whole thing.
+	 */
+	if ((wupl == 0) || (wupl > E1000_WUPM_BYTES))
+		return;
+
+	skb = netdev_alloc_skb_ip_align(netdev, E1000_WUPM_BYTES);
+	if (!skb)
+		return;
+
+	skb_put(skb, wupl);
+
+	/* Ensure reads are 32-bit aligned */
+	wupl = roundup(wupl, 4);
+
+	memcpy_fromio(skb->data, hw->hw_addr + E1000_WUPM_REG(0), wupl);
+
+	skb->protocol = eth_type_trans(skb, netdev);
+	netif_rx(skb);
+}
+
+static int __maybe_unused igb_suspend(struct device *dev)
+{
+	return __igb_shutdown(to_pci_dev(dev), NULL, 0);
+}
+
+static int __maybe_unused __igb_resume(struct device *dev, bool rpm)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 err, val;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	if (!pci_device_is_present(pdev))
+		return -ENODEV;
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"igb: Cannot enable PCI device from suspend\n");
+		return err;
+	}
+	pci_set_master(pdev);
+
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pci_enable_wake(pdev, PCI_D3cold, 0);
+
+	if (igb_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	igb_reset(adapter);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igb_get_hw_control(adapter);
+
+	val = rd32(E1000_WUS);
+	if (val & WAKE_PKT_WUS)
+		igb_deliver_wake_packet(netdev);
+
+	wr32(E1000_WUS, ~0);
+
+	if (!rpm)
+		rtnl_lock();
+	if (!err && netif_running(netdev))
+		err = __igb_open(netdev, true);
+
+	if (!err)
+		netif_device_attach(netdev);
+	if (!rpm)
+		rtnl_unlock();
+
+	return err;
+}
+
+static int __maybe_unused igb_resume(struct device *dev)
+{
+	return __igb_resume(dev, false);
+}
+
+static int __maybe_unused igb_runtime_idle(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (!igb_has_link(adapter))
+		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
+
+	return -EBUSY;
+}
+
+static int __maybe_unused igb_runtime_suspend(struct device *dev)
+{
+	return __igb_shutdown(to_pci_dev(dev), NULL, 1);
+}
+
+static int __maybe_unused igb_runtime_resume(struct device *dev)
+{
+	return __igb_resume(dev, true);
+}
+
+static void igb_shutdown(struct pci_dev *pdev)
+{
+	bool wake;
+
+	__igb_shutdown(pdev, &wake, 0);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, wake);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
+{
+#ifdef CONFIG_PCI_IOV
+	int err;
+
+	if (num_vfs == 0) {
+		return igb_disable_sriov(dev, true);
+	} else {
+		err = igb_enable_sriov(dev, num_vfs, true);
+		return err ? err : num_vfs;
+	}
+#endif
+	return 0;
+}
+
+/**
+ *  igb_io_error_detected - called when PCI error is detected
+ *  @pdev: Pointer to PCI device
+ *  @state: The current pci connection state
+ *
+ *  This function is called after a PCI bus error affecting
+ *  this device has been detected.
+ **/
+static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
+					      pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (state == pci_channel_io_normal) {
+		dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n");
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	}
+
+	netif_device_detach(netdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (netif_running(netdev))
+		igb_down(adapter);
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ *  igb_io_slot_reset - called after the pci bus has been reset.
+ *  @pdev: Pointer to PCI device
+ *
+ *  Restart the card from scratch, as if from a cold-boot. Implementation
+ *  resembles the first-half of the __igb_resume routine.
+ **/
+static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	pci_ers_result_t result;
+
+	if (pci_enable_device_mem(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot re-enable PCI device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+
+		pci_enable_wake(pdev, PCI_D3hot, 0);
+		pci_enable_wake(pdev, PCI_D3cold, 0);
+
+		/* In case of PCI error, adapter lose its HW address
+		 * so we should re-assign it here.
+		 */
+		hw->hw_addr = adapter->io_addr;
+
+		igb_reset(adapter);
+		wr32(E1000_WUS, ~0);
+		result = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	return result;
+}
+
+/**
+ *  igb_io_resume - called when traffic can start flowing again.
+ *  @pdev: Pointer to PCI device
+ *
+ *  This callback is called when the error recovery driver tells us that
+ *  its OK to resume normal operation. Implementation resembles the
+ *  second-half of the __igb_resume routine.
+ */
+static void igb_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev)) {
+		if (igb_up(adapter)) {
+			dev_err(&pdev->dev, "igb_up failed after reset\n");
+			return;
+		}
+	}
+
+	netif_device_attach(netdev);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igb_get_hw_control(adapter);
+}
+
+/**
+ *  igb_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
+ *  @adapter: Pointer to adapter structure
+ *  @index: Index of the RAR entry which need to be synced with MAC table
+ **/
+static void igb_rar_set_index(struct igb_adapter *adapter, u32 index)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rar_low, rar_high;
+	u8 *addr = adapter->mac_table[index].addr;
+
+	/* HW expects these to be in network order when they are plugged
+	 * into the registers which are little endian.  In order to guarantee
+	 * that ordering we need to do an leXX_to_cpup here in order to be
+	 * ready for the byteswap that occurs with writel
+	 */
+	rar_low = le32_to_cpup((__le32 *)(addr));
+	rar_high = le16_to_cpup((__le16 *)(addr + 4));
+
+	/* Indicate to hardware the Address is Valid. */
+	if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) {
+		if (is_valid_ether_addr(addr))
+			rar_high |= E1000_RAH_AV;
+
+		if (adapter->mac_table[index].state & IGB_MAC_STATE_SRC_ADDR)
+			rar_high |= E1000_RAH_ASEL_SRC_ADDR;
+
+		switch (hw->mac.type) {
+		case e1000_82575:
+		case e1000_i210:
+			if (adapter->mac_table[index].state &
+			    IGB_MAC_STATE_QUEUE_STEERING)
+				rar_high |= E1000_RAH_QSEL_ENABLE;
+
+			rar_high |= E1000_RAH_POOL_1 *
+				    adapter->mac_table[index].queue;
+			break;
+		default:
+			rar_high |= E1000_RAH_POOL_1 <<
+				    adapter->mac_table[index].queue;
+			break;
+		}
+	}
+
+	wr32(E1000_RAL(index), rar_low);
+	wrfl();
+	wr32(E1000_RAH(index), rar_high);
+	wrfl();
+}
+
+static int igb_set_vf_mac(struct igb_adapter *adapter,
+			  int vf, unsigned char *mac_addr)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	/* VF MAC addresses start at end of receive addresses and moves
+	 * towards the first, as a result a collision should not be possible
+	 */
+	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
+	unsigned char *vf_mac_addr = adapter->vf_data[vf].vf_mac_addresses;
+
+	ether_addr_copy(vf_mac_addr, mac_addr);
+	ether_addr_copy(adapter->mac_table[rar_entry].addr, mac_addr);
+	adapter->mac_table[rar_entry].queue = vf;
+	adapter->mac_table[rar_entry].state |= IGB_MAC_STATE_IN_USE;
+	igb_rar_set_index(adapter, rar_entry);
+
+	return 0;
+}
+
+static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+
+	/* Setting the VF MAC to 0 reverts the IGB_VF_FLAG_PF_SET_MAC
+	 * flag and allows to overwrite the MAC via VF netdev.  This
+	 * is necessary to allow libvirt a way to restore the original
+	 * MAC after unbinding vfio-pci and reloading igbvf after shutting
+	 * down a VM.
+	 */
+	if (is_zero_ether_addr(mac)) {
+		adapter->vf_data[vf].flags &= ~IGB_VF_FLAG_PF_SET_MAC;
+		dev_info(&adapter->pdev->dev,
+			 "remove administratively set MAC on VF %d\n",
+			 vf);
+	} else if (is_valid_ether_addr(mac)) {
+		adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
+		dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n",
+			 mac, vf);
+		dev_info(&adapter->pdev->dev,
+			 "Reload the VF driver to make this change effective.");
+		/* Generate additional warning if PF is down */
+		if (test_bit(__IGB_DOWN, &adapter->state)) {
+			dev_warn(&adapter->pdev->dev,
+				 "The VF MAC address has been set, but the PF device is not up.\n");
+			dev_warn(&adapter->pdev->dev,
+				 "Bring the PF device up before attempting to use the VF device.\n");
+		}
+	} else {
+		return -EINVAL;
+	}
+	return igb_set_vf_mac(adapter, vf, mac);
+}
+
+static int igb_link_mbps(int internal_link_speed)
+{
+	switch (internal_link_speed) {
+	case SPEED_100:
+		return 100;
+	case SPEED_1000:
+		return 1000;
+	default:
+		return 0;
+	}
+}
+
+static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
+				  int link_speed)
+{
+	int rf_dec, rf_int;
+	u32 bcnrc_val;
+
+	if (tx_rate != 0) {
+		/* Calculate the rate factor values to set */
+		rf_int = link_speed / tx_rate;
+		rf_dec = (link_speed - (rf_int * tx_rate));
+		rf_dec = (rf_dec * BIT(E1000_RTTBCNRC_RF_INT_SHIFT)) /
+			 tx_rate;
+
+		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
+		bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) &
+			      E1000_RTTBCNRC_RF_INT_MASK);
+		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
+	} else {
+		bcnrc_val = 0;
+	}
+
+	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
+	/* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
+	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
+	 */
+	wr32(E1000_RTTBCNRM, 0x14);
+	wr32(E1000_RTTBCNRC, bcnrc_val);
+}
+
+static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
+{
+	int actual_link_speed, i;
+	bool reset_rate = false;
+
+	/* VF TX rate limit was not set or not supported */
+	if ((adapter->vf_rate_link_speed == 0) ||
+	    (adapter->hw.mac.type != e1000_82576))
+		return;
+
+	actual_link_speed = igb_link_mbps(adapter->link_speed);
+	if (actual_link_speed != adapter->vf_rate_link_speed) {
+		reset_rate = true;
+		adapter->vf_rate_link_speed = 0;
+		dev_info(&adapter->pdev->dev,
+			 "Link speed has been changed. VF Transmit rate is disabled\n");
+	}
+
+	for (i = 0; i < adapter->vfs_allocated_count; i++) {
+		if (reset_rate)
+			adapter->vf_data[i].tx_rate = 0;
+
+		igb_set_vf_rate_limit(&adapter->hw, i,
+				      adapter->vf_data[i].tx_rate,
+				      actual_link_speed);
+	}
+}
+
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf,
+			     int min_tx_rate, int max_tx_rate)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int actual_link_speed;
+
+	if (hw->mac.type != e1000_82576)
+		return -EOPNOTSUPP;
+
+	if (min_tx_rate)
+		return -EINVAL;
+
+	actual_link_speed = igb_link_mbps(adapter->link_speed);
+	if ((vf >= adapter->vfs_allocated_count) ||
+	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
+	    (max_tx_rate < 0) ||
+	    (max_tx_rate > actual_link_speed))
+		return -EINVAL;
+
+	adapter->vf_rate_link_speed = actual_link_speed;
+	adapter->vf_data[vf].tx_rate = (u16)max_tx_rate;
+	igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed);
+
+	return 0;
+}
+
+static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
+				   bool setting)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg_val, reg_offset;
+
+	if (!adapter->vfs_allocated_count)
+		return -EOPNOTSUPP;
+
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+
+	reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC;
+	reg_val = rd32(reg_offset);
+	if (setting)
+		reg_val |= (BIT(vf) |
+			    BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT));
+	else
+		reg_val &= ~(BIT(vf) |
+			     BIT(vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT));
+	wr32(reg_offset, reg_val);
+
+	adapter->vf_data[vf].spoofchk_enabled = setting;
+	return 0;
+}
+
+static int igb_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+	if (adapter->vf_data[vf].trusted == setting)
+		return 0;
+
+	adapter->vf_data[vf].trusted = setting;
+
+	dev_info(&adapter->pdev->dev, "VF %u is %strusted\n",
+		 vf, setting ? "" : "not ");
+	return 0;
+}
+
+static int igb_ndo_get_vf_config(struct net_device *netdev,
+				 int vf, struct ifla_vf_info *ivi)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	if (vf >= adapter->vfs_allocated_count)
+		return -EINVAL;
+	ivi->vf = vf;
+	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
+	ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->min_tx_rate = 0;
+	ivi->vlan = adapter->vf_data[vf].pf_vlan;
+	ivi->qos = adapter->vf_data[vf].pf_qos;
+	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
+	ivi->trusted = adapter->vf_data[vf].trusted;
+	return 0;
+}
+
+static void igb_vmm_control(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 reg;
+
+	switch (hw->mac.type) {
+	case e1000_82575:
+	case e1000_i210:
+	case e1000_i211:
+	case e1000_i354:
+	default:
+		/* replication is not supported for 82575 */
+		return;
+	case e1000_82576:
+		/* notify HW that the MAC is adding vlan tags */
+		reg = rd32(E1000_DTXCTL);
+		reg |= E1000_DTXCTL_VLAN_ADDED;
+		wr32(E1000_DTXCTL, reg);
+		fallthrough;
+	case e1000_82580:
+		/* enable replication vlan tag stripping */
+		reg = rd32(E1000_RPLOLR);
+		reg |= E1000_RPLOLR_STRVLAN;
+		wr32(E1000_RPLOLR, reg);
+		fallthrough;
+	case e1000_i350:
+		/* none of the above registers are supported by i350 */
+		break;
+	}
+
+	if (adapter->vfs_allocated_count) {
+		igb_vmdq_set_loopback_pf(hw, true);
+		igb_vmdq_set_replication_pf(hw, true);
+		igb_vmdq_set_anti_spoofing_pf(hw, true,
+					      adapter->vfs_allocated_count);
+	} else {
+		igb_vmdq_set_loopback_pf(hw, false);
+		igb_vmdq_set_replication_pf(hw, false);
+	}
+}
+
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 dmac_thr;
+	u16 hwm;
+	u32 reg;
+
+	if (hw->mac.type > e1000_82580) {
+		if (adapter->flags & IGB_FLAG_DMAC) {
+			/* force threshold to 0. */
+			wr32(E1000_DMCTXTH, 0);
+
+			/* DMA Coalescing high water mark needs to be greater
+			 * than the Rx threshold. Set hwm to PBA - max frame
+			 * size in 16B units, capping it at PBA - 6KB.
+			 */
+			hwm = 64 * (pba - 6);
+			reg = rd32(E1000_FCRTC);
+			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
+			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
+				& E1000_FCRTC_RTH_COAL_MASK);
+			wr32(E1000_FCRTC, reg);
+
+			/* Set the DMA Coalescing Rx threshold to PBA - 2 * max
+			 * frame size, capping it at PBA - 10KB.
+			 */
+			dmac_thr = pba - 10;
+			reg = rd32(E1000_DMACR);
+			reg &= ~E1000_DMACR_DMACTHR_MASK;
+			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
+				& E1000_DMACR_DMACTHR_MASK);
+
+			/* transition to L0x or L1 if available..*/
+			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
+
+			/* watchdog timer= +-1000 usec in 32usec intervals */
+			reg |= (1000 >> 5);
+
+			/* Disable BMC-to-OS Watchdog Enable */
+			if (hw->mac.type != e1000_i354)
+				reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
+			wr32(E1000_DMACR, reg);
+
+			/* no lower threshold to disable
+			 * coalescing(smart fifb)-UTRESH=0
+			 */
+			wr32(E1000_DMCRTRH, 0);
+
+			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
+
+			wr32(E1000_DMCTLX, reg);
+
+			/* free space in tx packet buffer to wake from
+			 * DMA coal
+			 */
+			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
+			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
+		}
+
+		if (hw->mac.type >= e1000_i210 ||
+		    (adapter->flags & IGB_FLAG_DMAC)) {
+			reg = rd32(E1000_PCIEMISC);
+			reg |= E1000_PCIEMISC_LX_DECISION;
+			wr32(E1000_PCIEMISC, reg);
+		} /* endif adapter->dmac is not disabled */
+	} else if (hw->mac.type == e1000_82580) {
+		u32 reg = rd32(E1000_PCIEMISC);
+
+		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
+		wr32(E1000_DMACR, 0);
+	}
+}
+
+/**
+ *  igb_read_i2c_byte - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @dev_addr: device address
+ *  @data: value read
+ *
+ *  Performs byte read operation over I2C interface at
+ *  a specified device address.
+ **/
+s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
+		      u8 dev_addr, u8 *data)
+{
+	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
+	struct i2c_client *this_client = adapter->i2c_client;
+	s32 status;
+	u16 swfw_mask = 0;
+
+	if (!this_client)
+		return E1000_ERR_I2C;
+
+	swfw_mask = E1000_SWFW_PHY0_SM;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+		return E1000_ERR_SWFW_SYNC;
+
+	status = i2c_smbus_read_byte_data(this_client, byte_offset);
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+	if (status < 0)
+		return E1000_ERR_I2C;
+	else {
+		*data = status;
+		return 0;
+	}
+}
+
+/**
+ *  igb_write_i2c_byte - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: value to write
+ *
+ *  Performs byte write operation over I2C interface at
+ *  a specified device address.
+ **/
+s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
+		       u8 dev_addr, u8 data)
+{
+	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
+	struct i2c_client *this_client = adapter->i2c_client;
+	s32 status;
+	u16 swfw_mask = E1000_SWFW_PHY0_SM;
+
+	if (!this_client)
+		return E1000_ERR_I2C;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+		return E1000_ERR_SWFW_SYNC;
+	status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+	if (status)
+		return E1000_ERR_I2C;
+	else
+		return 0;
+
+}
+
+int igb_reinit_queues(struct igb_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (netif_running(netdev))
+		igb_close(netdev);
+
+	igb_reset_interrupt_capability(adapter);
+
+	if (igb_init_interrupt_scheme(adapter, true)) {
+		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	if (netif_running(netdev))
+		err = igb_open(netdev);
+
+	return err;
+}
+
+static void igb_nfc_filter_exit(struct igb_adapter *adapter)
+{
+	struct igb_nfc_filter *rule;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+		igb_erase_filter(adapter, rule);
+
+	hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node)
+		igb_erase_filter(adapter, rule);
+
+	spin_unlock(&adapter->nfc_lock);
+}
+
+static void igb_nfc_filter_restore(struct igb_adapter *adapter)
+{
+	struct igb_nfc_filter *rule;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+		igb_add_filter(adapter, rule);
+
+	spin_unlock(&adapter->nfc_lock);
+}
+/* igb_main.c */
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
new file mode 100644
index 0000000000..319c544b9f
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -0,0 +1,1525 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2011 Richard Cochran <richardcochran@gmail.com> */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/ptp_classify.h>
+
+#include "igb.h"
+
+#define INCVALUE_MASK		0x7fffffff
+#define ISGN			0x80000000
+
+/* The 82580 timesync updates the system timer every 8ns by 8ns,
+ * and this update value cannot be reprogrammed.
+ *
+ * Neither the 82576 nor the 82580 offer registers wide enough to hold
+ * nanoseconds time values for very long. For the 82580, SYSTIM always
+ * counts nanoseconds, but the upper 24 bits are not available. The
+ * frequency is adjusted by changing the 32 bit fractional nanoseconds
+ * register, TIMINCA.
+ *
+ * For the 82576, the SYSTIM register time unit is affect by the
+ * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this
+ * field are needed to provide the nominal 16 nanosecond period,
+ * leaving 19 bits for fractional nanoseconds.
+ *
+ * We scale the NIC clock cycle by a large factor so that relatively
+ * small clock corrections can be added or subtracted at each clock
+ * tick. The drawbacks of a large factor are a) that the clock
+ * register overflows more quickly (not such a big deal) and b) that
+ * the increment per tick has to fit into 24 bits.  As a result we
+ * need to use a shift of 19 so we can fit a value of 16 into the
+ * TIMINCA register.
+ *
+ *
+ *             SYSTIMH            SYSTIML
+ *        +--------------+   +---+---+------+
+ *  82576 |      32      |   | 8 | 5 |  19  |
+ *        +--------------+   +---+---+------+
+ *         \________ 45 bits _______/  fract
+ *
+ *        +----------+---+   +--------------+
+ *  82580 |    24    | 8 |   |      32      |
+ *        +----------+---+   +--------------+
+ *          reserved  \______ 40 bits _____/
+ *
+ *
+ * The 45 bit 82576 SYSTIM overflows every
+ *   2^45 * 10^-9 / 3600 = 9.77 hours.
+ *
+ * The 40 bit 82580 SYSTIM overflows every
+ *   2^40 * 10^-9 /  60  = 18.3 minutes.
+ *
+ * SYSTIM is converted to real time using a timecounter. As
+ * timecounter_cyc2time() allows old timestamps, the timecounter needs
+ * to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, and also the NIC
+ * clock can be adjusted to run up to 6% faster and the system clock
+ * up to 10% slower, so we aim for 6 minutes to be sure the actual
+ * interval in the NIC time is shorter than 9.16 minutes.
+ */
+
+#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 6)
+#define IGB_PTP_TX_TIMEOUT		(HZ * 15)
+#define INCPERIOD_82576			BIT(E1000_TIMINCA_16NS_SHIFT)
+#define INCVALUE_82576_MASK		GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
+#define INCVALUE_82576			(16u << IGB_82576_TSYNC_SHIFT)
+#define IGB_NBITS_82580			40
+#define IGB_82580_BASE_PERIOD		0x800000000
+
+static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter);
+static void igb_ptp_sdp_init(struct igb_adapter *adapter);
+
+/* SYSTIM read access for the 82576 */
+static u64 igb_ptp_read_82576(const struct cyclecounter *cc)
+{
+	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
+	struct e1000_hw *hw = &igb->hw;
+	u64 val;
+	u32 lo, hi;
+
+	lo = rd32(E1000_SYSTIML);
+	hi = rd32(E1000_SYSTIMH);
+
+	val = ((u64) hi) << 32;
+	val |= lo;
+
+	return val;
+}
+
+/* SYSTIM read access for the 82580 */
+static u64 igb_ptp_read_82580(const struct cyclecounter *cc)
+{
+	struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc);
+	struct e1000_hw *hw = &igb->hw;
+	u32 lo, hi;
+	u64 val;
+
+	/* The timestamp latches on lowest register read. For the 82580
+	 * the lowest register is SYSTIMR instead of SYSTIML.  However we only
+	 * need to provide nanosecond resolution, so we just ignore it.
+	 */
+	rd32(E1000_SYSTIMR);
+	lo = rd32(E1000_SYSTIML);
+	hi = rd32(E1000_SYSTIMH);
+
+	val = ((u64) hi) << 32;
+	val |= lo;
+
+	return val;
+}
+
+/* SYSTIM read access for I210/I211 */
+static void igb_ptp_read_i210(struct igb_adapter *adapter,
+			      struct timespec64 *ts)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 sec, nsec;
+
+	/* The timestamp latches on lowest register read. For I210/I211, the
+	 * lowest register is SYSTIMR. Since we only need to provide nanosecond
+	 * resolution, we can ignore it.
+	 */
+	rd32(E1000_SYSTIMR);
+	nsec = rd32(E1000_SYSTIML);
+	sec = rd32(E1000_SYSTIMH);
+
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static void igb_ptp_write_i210(struct igb_adapter *adapter,
+			       const struct timespec64 *ts)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* Writing the SYSTIMR register is not necessary as it only provides
+	 * sub-nanosecond resolution.
+	 */
+	wr32(E1000_SYSTIML, ts->tv_nsec);
+	wr32(E1000_SYSTIMH, (u32)ts->tv_sec);
+}
+
+/**
+ * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp
+ * @adapter: board private structure
+ * @hwtstamps: timestamp structure to update
+ * @systim: unsigned 64bit system time value.
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
+ *
+ * The 'tmreg_lock' spinlock is used to protect the consistency of the
+ * system time value. This is needed because reading the 64 bit time
+ * value involves reading two (or three) 32 bit registers. The first
+ * read latches the value. Ditto for writing.
+ *
+ * In addition, here have extended the system time with an overflow
+ * counter in software.
+ **/
+static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter,
+				       struct skb_shared_hwtstamps *hwtstamps,
+				       u64 systim)
+{
+	unsigned long flags;
+	u64 ns;
+
+	memset(hwtstamps, 0, sizeof(*hwtstamps));
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i354:
+	case e1000_i350:
+		spin_lock_irqsave(&adapter->tmreg_lock, flags);
+		ns = timecounter_cyc2time(&adapter->tc, systim);
+		spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+		hwtstamps->hwtstamp = ns_to_ktime(ns);
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		/* Upper 32 bits contain s, lower 32 bits contain ns. */
+		hwtstamps->hwtstamp = ktime_set(systim >> 32,
+						systim & 0xFFFFFFFF);
+		break;
+	default:
+		break;
+	}
+}
+
+/* PTP clock operations */
+static int igb_ptp_adjfine_82576(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	u64 incvalue;
+
+	incvalue = adjust_by_scaled_ppm(INCVALUE_82576, scaled_ppm);
+
+	wr32(E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK));
+
+	return 0;
+}
+
+static int igb_ptp_adjfine_82580(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	bool neg_adj;
+	u64 rate;
+	u32 inca;
+
+	neg_adj = diff_by_scaled_ppm(IGB_82580_BASE_PERIOD, scaled_ppm, &rate);
+
+	inca = rate & INCVALUE_MASK;
+	if (neg_adj)
+		inca |= ISGN;
+
+	wr32(E1000_TIMINCA, inca);
+
+	return 0;
+}
+
+static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+	timecounter_adjtime(&igb->tc, delta);
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	struct timespec64 now, then = ns_to_timespec64(delta);
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_read_i210(igb, &now);
+	now = timespec64_add(now, then);
+	igb_ptp_write_i210(igb, (const struct timespec64 *)&now);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_gettimex_82576(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	unsigned long flags;
+	u32 lo, hi;
+	u64 ns;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	ptp_read_system_prets(sts);
+	lo = rd32(E1000_SYSTIML);
+	ptp_read_system_postts(sts);
+	hi = rd32(E1000_SYSTIMH);
+
+	ns = timecounter_cyc2time(&igb->tc, ((u64)hi << 32) | lo);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int igb_ptp_gettimex_82580(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	unsigned long flags;
+	u32 lo, hi;
+	u64 ns;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	ptp_read_system_prets(sts);
+	rd32(E1000_SYSTIMR);
+	ptp_read_system_postts(sts);
+	lo = rd32(E1000_SYSTIML);
+	hi = rd32(E1000_SYSTIMH);
+
+	ns = timecounter_cyc2time(&igb->tc, ((u64)hi << 32) | lo);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int igb_ptp_gettimex_i210(struct ptp_clock_info *ptp,
+				 struct timespec64 *ts,
+				 struct ptp_system_timestamp *sts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	ptp_read_system_prets(sts);
+	rd32(E1000_SYSTIMR);
+	ptp_read_system_postts(sts);
+	ts->tv_nsec = rd32(E1000_SYSTIML);
+	ts->tv_sec = rd32(E1000_SYSTIMH);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_settime_82576(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+	u64 ns;
+
+	ns = timespec64_to_ns(ts);
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	timecounter_init(&igb->tc, &igb->cc, ns);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igb_ptp_settime_i210(struct ptp_clock_info *ptp,
+				const struct timespec64 *ts)
+{
+	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
+					       ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igb->tmreg_lock, flags);
+
+	igb_ptp_write_i210(igb, ts);
+
+	spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+
+	return 0;
+}
+
+static void igb_pin_direction(int pin, int input, u32 *ctrl, u32 *ctrl_ext)
+{
+	u32 *ptr = pin < 2 ? ctrl : ctrl_ext;
+	static const u32 mask[IGB_N_SDP] = {
+		E1000_CTRL_SDP0_DIR,
+		E1000_CTRL_SDP1_DIR,
+		E1000_CTRL_EXT_SDP2_DIR,
+		E1000_CTRL_EXT_SDP3_DIR,
+	};
+
+	if (input)
+		*ptr &= ~mask[pin];
+	else
+		*ptr |= mask[pin];
+}
+
+static void igb_pin_extts(struct igb_adapter *igb, int chan, int pin)
+{
+	static const u32 aux0_sel_sdp[IGB_N_SDP] = {
+		AUX0_SEL_SDP0, AUX0_SEL_SDP1, AUX0_SEL_SDP2, AUX0_SEL_SDP3,
+	};
+	static const u32 aux1_sel_sdp[IGB_N_SDP] = {
+		AUX1_SEL_SDP0, AUX1_SEL_SDP1, AUX1_SEL_SDP2, AUX1_SEL_SDP3,
+	};
+	static const u32 ts_sdp_en[IGB_N_SDP] = {
+		TS_SDP0_EN, TS_SDP1_EN, TS_SDP2_EN, TS_SDP3_EN,
+	};
+	struct e1000_hw *hw = &igb->hw;
+	u32 ctrl, ctrl_ext, tssdp = 0;
+
+	ctrl = rd32(E1000_CTRL);
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	tssdp = rd32(E1000_TSSDP);
+
+	igb_pin_direction(pin, 1, &ctrl, &ctrl_ext);
+
+	/* Make sure this pin is not enabled as an output. */
+	tssdp &= ~ts_sdp_en[pin];
+
+	if (chan == 1) {
+		tssdp &= ~AUX1_SEL_SDP3;
+		tssdp |= aux1_sel_sdp[pin] | AUX1_TS_SDP_EN;
+	} else {
+		tssdp &= ~AUX0_SEL_SDP3;
+		tssdp |= aux0_sel_sdp[pin] | AUX0_TS_SDP_EN;
+	}
+
+	wr32(E1000_TSSDP, tssdp);
+	wr32(E1000_CTRL, ctrl);
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+}
+
+static void igb_pin_perout(struct igb_adapter *igb, int chan, int pin, int freq)
+{
+	static const u32 aux0_sel_sdp[IGB_N_SDP] = {
+		AUX0_SEL_SDP0, AUX0_SEL_SDP1, AUX0_SEL_SDP2, AUX0_SEL_SDP3,
+	};
+	static const u32 aux1_sel_sdp[IGB_N_SDP] = {
+		AUX1_SEL_SDP0, AUX1_SEL_SDP1, AUX1_SEL_SDP2, AUX1_SEL_SDP3,
+	};
+	static const u32 ts_sdp_en[IGB_N_SDP] = {
+		TS_SDP0_EN, TS_SDP1_EN, TS_SDP2_EN, TS_SDP3_EN,
+	};
+	static const u32 ts_sdp_sel_tt0[IGB_N_SDP] = {
+		TS_SDP0_SEL_TT0, TS_SDP1_SEL_TT0,
+		TS_SDP2_SEL_TT0, TS_SDP3_SEL_TT0,
+	};
+	static const u32 ts_sdp_sel_tt1[IGB_N_SDP] = {
+		TS_SDP0_SEL_TT1, TS_SDP1_SEL_TT1,
+		TS_SDP2_SEL_TT1, TS_SDP3_SEL_TT1,
+	};
+	static const u32 ts_sdp_sel_fc0[IGB_N_SDP] = {
+		TS_SDP0_SEL_FC0, TS_SDP1_SEL_FC0,
+		TS_SDP2_SEL_FC0, TS_SDP3_SEL_FC0,
+	};
+	static const u32 ts_sdp_sel_fc1[IGB_N_SDP] = {
+		TS_SDP0_SEL_FC1, TS_SDP1_SEL_FC1,
+		TS_SDP2_SEL_FC1, TS_SDP3_SEL_FC1,
+	};
+	static const u32 ts_sdp_sel_clr[IGB_N_SDP] = {
+		TS_SDP0_SEL_FC1, TS_SDP1_SEL_FC1,
+		TS_SDP2_SEL_FC1, TS_SDP3_SEL_FC1,
+	};
+	struct e1000_hw *hw = &igb->hw;
+	u32 ctrl, ctrl_ext, tssdp = 0;
+
+	ctrl = rd32(E1000_CTRL);
+	ctrl_ext = rd32(E1000_CTRL_EXT);
+	tssdp = rd32(E1000_TSSDP);
+
+	igb_pin_direction(pin, 0, &ctrl, &ctrl_ext);
+
+	/* Make sure this pin is not enabled as an input. */
+	if ((tssdp & AUX0_SEL_SDP3) == aux0_sel_sdp[pin])
+		tssdp &= ~AUX0_TS_SDP_EN;
+
+	if ((tssdp & AUX1_SEL_SDP3) == aux1_sel_sdp[pin])
+		tssdp &= ~AUX1_TS_SDP_EN;
+
+	tssdp &= ~ts_sdp_sel_clr[pin];
+	if (freq) {
+		if (chan == 1)
+			tssdp |= ts_sdp_sel_fc1[pin];
+		else
+			tssdp |= ts_sdp_sel_fc0[pin];
+	} else {
+		if (chan == 1)
+			tssdp |= ts_sdp_sel_tt1[pin];
+		else
+			tssdp |= ts_sdp_sel_tt0[pin];
+	}
+	tssdp |= ts_sdp_en[pin];
+
+	wr32(E1000_TSSDP, tssdp);
+	wr32(E1000_CTRL, ctrl);
+	wr32(E1000_CTRL_EXT, ctrl_ext);
+}
+
+static int igb_ptp_feature_enable_82580(struct ptp_clock_info *ptp,
+					struct ptp_clock_request *rq, int on)
+{
+	struct igb_adapter *igb =
+		container_of(ptp, struct igb_adapter, ptp_caps);
+	u32 tsauxc, tsim, tsauxc_mask, tsim_mask, trgttiml, trgttimh, systiml,
+		systimh, level_mask, level, rem;
+	struct e1000_hw *hw = &igb->hw;
+	struct timespec64 ts, start;
+	unsigned long flags;
+	u64 systim, now;
+	int pin = -1;
+	s64 ns;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		/* Reject requests with unsupported flags */
+		if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+					PTP_RISING_EDGE |
+					PTP_FALLING_EDGE |
+					PTP_STRICT_FLAGS))
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS,
+					   rq->extts.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		if (rq->extts.index == 1) {
+			tsauxc_mask = TSAUXC_EN_TS1;
+			tsim_mask = TSINTR_AUTT1;
+		} else {
+			tsauxc_mask = TSAUXC_EN_TS0;
+			tsim_mask = TSINTR_AUTT0;
+		}
+		spin_lock_irqsave(&igb->tmreg_lock, flags);
+		tsauxc = rd32(E1000_TSAUXC);
+		tsim = rd32(E1000_TSIM);
+		if (on) {
+			igb_pin_extts(igb, rq->extts.index, pin);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		} else {
+			tsauxc &= ~tsauxc_mask;
+			tsim &= ~tsim_mask;
+		}
+		wr32(E1000_TSAUXC, tsauxc);
+		wr32(E1000_TSIM, tsim);
+		spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT,
+					   rq->perout.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		ts.tv_sec = rq->perout.period.sec;
+		ts.tv_nsec = rq->perout.period.nsec;
+		ns = timespec64_to_ns(&ts);
+		ns = ns >> 1;
+		if (on && ns < 8LL)
+			return -EINVAL;
+		ts = ns_to_timespec64(ns);
+		if (rq->perout.index == 1) {
+			tsauxc_mask = TSAUXC_EN_TT1;
+			tsim_mask = TSINTR_TT1;
+			trgttiml = E1000_TRGTTIML1;
+			trgttimh = E1000_TRGTTIMH1;
+		} else {
+			tsauxc_mask = TSAUXC_EN_TT0;
+			tsim_mask = TSINTR_TT0;
+			trgttiml = E1000_TRGTTIML0;
+			trgttimh = E1000_TRGTTIMH0;
+		}
+		spin_lock_irqsave(&igb->tmreg_lock, flags);
+		tsauxc = rd32(E1000_TSAUXC);
+		tsim = rd32(E1000_TSIM);
+		if (rq->perout.index == 1) {
+			tsauxc &= ~(TSAUXC_EN_TT1 | TSAUXC_EN_CLK1 | TSAUXC_ST1);
+			tsim &= ~TSINTR_TT1;
+		} else {
+			tsauxc &= ~(TSAUXC_EN_TT0 | TSAUXC_EN_CLK0 | TSAUXC_ST0);
+			tsim &= ~TSINTR_TT0;
+		}
+		if (on) {
+			int i = rq->perout.index;
+
+			/* read systim registers in sequence */
+			rd32(E1000_SYSTIMR);
+			systiml = rd32(E1000_SYSTIML);
+			systimh = rd32(E1000_SYSTIMH);
+			systim = (((u64)(systimh & 0xFF)) << 32) | ((u64)systiml);
+			now = timecounter_cyc2time(&igb->tc, systim);
+
+			if (pin < 2) {
+				level_mask = (i == 1) ? 0x80000 : 0x40000;
+				level = (rd32(E1000_CTRL) & level_mask) ? 1 : 0;
+			} else {
+				level_mask = (i == 1) ? 0x80 : 0x40;
+				level = (rd32(E1000_CTRL_EXT) & level_mask) ? 1 : 0;
+			}
+
+			div_u64_rem(now, ns, &rem);
+			systim = systim + (ns - rem);
+
+			/* synchronize pin level with rising/falling edges */
+			div_u64_rem(now, ns << 1, &rem);
+			if (rem < ns) {
+				/* first half of period */
+				if (level == 0) {
+					/* output is already low, skip this period */
+					systim += ns;
+				}
+			} else {
+				/* second half of period */
+				if (level == 1) {
+					/* output is already high, skip this period */
+					systim += ns;
+				}
+			}
+
+			start = ns_to_timespec64(systim + (ns - rem));
+			igb_pin_perout(igb, i, pin, 0);
+			igb->perout[i].start.tv_sec = start.tv_sec;
+			igb->perout[i].start.tv_nsec = start.tv_nsec;
+			igb->perout[i].period.tv_sec = ts.tv_sec;
+			igb->perout[i].period.tv_nsec = ts.tv_nsec;
+
+			wr32(trgttiml, (u32)systim);
+			wr32(trgttimh, ((u32)(systim >> 32)) & 0xFF);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		}
+		wr32(E1000_TSAUXC, tsauxc);
+		wr32(E1000_TSIM, tsim);
+		spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PPS:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
+				       struct ptp_clock_request *rq, int on)
+{
+	struct igb_adapter *igb =
+		container_of(ptp, struct igb_adapter, ptp_caps);
+	struct e1000_hw *hw = &igb->hw;
+	u32 tsauxc, tsim, tsauxc_mask, tsim_mask, trgttiml, trgttimh, freqout;
+	unsigned long flags;
+	struct timespec64 ts;
+	int use_freq = 0, pin = -1;
+	s64 ns;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		/* Reject requests with unsupported flags */
+		if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+					PTP_RISING_EDGE |
+					PTP_FALLING_EDGE |
+					PTP_STRICT_FLAGS))
+			return -EOPNOTSUPP;
+
+		/* Reject requests failing to enable both edges. */
+		if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+		    (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+		    (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS,
+					   rq->extts.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		if (rq->extts.index == 1) {
+			tsauxc_mask = TSAUXC_EN_TS1;
+			tsim_mask = TSINTR_AUTT1;
+		} else {
+			tsauxc_mask = TSAUXC_EN_TS0;
+			tsim_mask = TSINTR_AUTT0;
+		}
+		spin_lock_irqsave(&igb->tmreg_lock, flags);
+		tsauxc = rd32(E1000_TSAUXC);
+		tsim = rd32(E1000_TSIM);
+		if (on) {
+			igb_pin_extts(igb, rq->extts.index, pin);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		} else {
+			tsauxc &= ~tsauxc_mask;
+			tsim &= ~tsim_mask;
+		}
+		wr32(E1000_TSAUXC, tsauxc);
+		wr32(E1000_TSIM, tsim);
+		spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT,
+					   rq->perout.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		ts.tv_sec = rq->perout.period.sec;
+		ts.tv_nsec = rq->perout.period.nsec;
+		ns = timespec64_to_ns(&ts);
+		ns = ns >> 1;
+		if (on && ((ns <= 70000000LL) || (ns == 125000000LL) ||
+			   (ns == 250000000LL) || (ns == 500000000LL))) {
+			if (ns < 8LL)
+				return -EINVAL;
+			use_freq = 1;
+		}
+		ts = ns_to_timespec64(ns);
+		if (rq->perout.index == 1) {
+			if (use_freq) {
+				tsauxc_mask = TSAUXC_EN_CLK1 | TSAUXC_ST1;
+				tsim_mask = 0;
+			} else {
+				tsauxc_mask = TSAUXC_EN_TT1;
+				tsim_mask = TSINTR_TT1;
+			}
+			trgttiml = E1000_TRGTTIML1;
+			trgttimh = E1000_TRGTTIMH1;
+			freqout = E1000_FREQOUT1;
+		} else {
+			if (use_freq) {
+				tsauxc_mask = TSAUXC_EN_CLK0 | TSAUXC_ST0;
+				tsim_mask = 0;
+			} else {
+				tsauxc_mask = TSAUXC_EN_TT0;
+				tsim_mask = TSINTR_TT0;
+			}
+			trgttiml = E1000_TRGTTIML0;
+			trgttimh = E1000_TRGTTIMH0;
+			freqout = E1000_FREQOUT0;
+		}
+		spin_lock_irqsave(&igb->tmreg_lock, flags);
+		tsauxc = rd32(E1000_TSAUXC);
+		tsim = rd32(E1000_TSIM);
+		if (rq->perout.index == 1) {
+			tsauxc &= ~(TSAUXC_EN_TT1 | TSAUXC_EN_CLK1 | TSAUXC_ST1);
+			tsim &= ~TSINTR_TT1;
+		} else {
+			tsauxc &= ~(TSAUXC_EN_TT0 | TSAUXC_EN_CLK0 | TSAUXC_ST0);
+			tsim &= ~TSINTR_TT0;
+		}
+		if (on) {
+			int i = rq->perout.index;
+			igb_pin_perout(igb, i, pin, use_freq);
+			igb->perout[i].start.tv_sec = rq->perout.start.sec;
+			igb->perout[i].start.tv_nsec = rq->perout.start.nsec;
+			igb->perout[i].period.tv_sec = ts.tv_sec;
+			igb->perout[i].period.tv_nsec = ts.tv_nsec;
+			wr32(trgttimh, rq->perout.start.sec);
+			wr32(trgttiml, rq->perout.start.nsec);
+			if (use_freq)
+				wr32(freqout, ns);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		}
+		wr32(E1000_TSAUXC, tsauxc);
+		wr32(E1000_TSIM, tsim);
+		spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PPS:
+		spin_lock_irqsave(&igb->tmreg_lock, flags);
+		tsim = rd32(E1000_TSIM);
+		if (on)
+			tsim |= TSINTR_SYS_WRAP;
+		else
+			tsim &= ~TSINTR_SYS_WRAP;
+		igb->pps_sys_wrap_on = !!on;
+		wr32(E1000_TSIM, tsim);
+		spin_unlock_irqrestore(&igb->tmreg_lock, flags);
+		return 0;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int igb_ptp_feature_enable(struct ptp_clock_info *ptp,
+				  struct ptp_clock_request *rq, int on)
+{
+	return -EOPNOTSUPP;
+}
+
+static int igb_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
+			      enum ptp_pin_function func, unsigned int chan)
+{
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_EXTTS:
+	case PTP_PF_PEROUT:
+		break;
+	case PTP_PF_PHYSYNC:
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ * igb_ptp_tx_work
+ * @work: pointer to work struct
+ *
+ * This work function polls the TSYNCTXCTL valid bit to determine when a
+ * timestamp has been taken for the current stored skb.
+ **/
+static void igb_ptp_tx_work(struct work_struct *work)
+{
+	struct igb_adapter *adapter = container_of(work, struct igb_adapter,
+						   ptp_tx_work);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tsynctxctl;
+
+	if (!adapter->ptp_tx_skb)
+		return;
+
+	if (time_is_before_jiffies(adapter->ptp_tx_start +
+				   IGB_PTP_TX_TIMEOUT)) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
+		adapter->tx_hwtstamp_timeouts++;
+		/* Clear the tx valid bit in TSYNCTXCTL register to enable
+		 * interrupt
+		 */
+		rd32(E1000_TXSTMPH);
+		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+		return;
+	}
+
+	tsynctxctl = rd32(E1000_TSYNCTXCTL);
+	if (tsynctxctl & E1000_TSYNCTXCTL_VALID)
+		igb_ptp_tx_hwtstamp(adapter);
+	else
+		/* reschedule to check later */
+		schedule_work(&adapter->ptp_tx_work);
+}
+
+static void igb_ptp_overflow_check(struct work_struct *work)
+{
+	struct igb_adapter *igb =
+		container_of(work, struct igb_adapter, ptp_overflow_work.work);
+	struct timespec64 ts;
+	u64 ns;
+
+	/* Update the timecounter */
+	ns = timecounter_read(&igb->tc);
+
+	ts = ns_to_timespec64(ns);
+	pr_debug("igb overflow check at %lld.%09lu\n",
+		 (long long) ts.tv_sec, ts.tv_nsec);
+
+	schedule_delayed_work(&igb->ptp_overflow_work,
+			      IGB_SYSTIM_OVERFLOW_PERIOD);
+}
+
+/**
+ * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched
+ * @adapter: private network adapter structure
+ *
+ * This watchdog task is scheduled to detect error case where hardware has
+ * dropped an Rx packet that was timestamped when the ring is full. The
+ * particular error is rare but leaves the device in a state unable to timestamp
+ * any future packets.
+ **/
+void igb_ptp_rx_hang(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tsyncrxctl = rd32(E1000_TSYNCRXCTL);
+	unsigned long rx_event;
+
+	/* Other hardware uses per-packet timestamps */
+	if (hw->mac.type != e1000_82576)
+		return;
+
+	/* If we don't have a valid timestamp in the registers, just update the
+	 * timeout counter and exit
+	 */
+	if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) {
+		adapter->last_rx_ptp_check = jiffies;
+		return;
+	}
+
+	/* Determine the most recent watchdog or rx_timestamp event */
+	rx_event = adapter->last_rx_ptp_check;
+	if (time_after(adapter->last_rx_timestamp, rx_event))
+		rx_event = adapter->last_rx_timestamp;
+
+	/* Only need to read the high RXSTMP register to clear the lock */
+	if (time_is_before_jiffies(rx_event + 5 * HZ)) {
+		rd32(E1000_RXSTMPH);
+		adapter->last_rx_ptp_check = jiffies;
+		adapter->rx_hwtstamp_cleared++;
+		dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang\n");
+	}
+}
+
+/**
+ * igb_ptp_tx_hang - detect error case where Tx timestamp never finishes
+ * @adapter: private network adapter structure
+ */
+void igb_ptp_tx_hang(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
+					      IGB_PTP_TX_TIMEOUT);
+
+	if (!adapter->ptp_tx_skb)
+		return;
+
+	if (!test_bit(__IGB_PTP_TX_IN_PROGRESS, &adapter->state))
+		return;
+
+	/* If we haven't received a timestamp within the timeout, it is
+	 * reasonable to assume that it will never occur, so we can unlock the
+	 * timestamp bit when this occurs.
+	 */
+	if (timeout) {
+		cancel_work_sync(&adapter->ptp_tx_work);
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
+		adapter->tx_hwtstamp_timeouts++;
+		/* Clear the tx valid bit in TSYNCTXCTL register to enable
+		 * interrupt
+		 */
+		rd32(E1000_TXSTMPH);
+		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+	}
+}
+
+/**
+ * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: Board private structure.
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ **/
+static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
+{
+	struct sk_buff *skb = adapter->ptp_tx_skb;
+	struct e1000_hw *hw = &adapter->hw;
+	struct skb_shared_hwtstamps shhwtstamps;
+	u64 regval;
+	int adjust = 0;
+
+	regval = rd32(E1000_TXSTMPL);
+	regval |= (u64)rd32(E1000_TXSTMPH) << 32;
+
+	igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+	/* adjust timestamp for the TX latency based on link speed */
+	if (adapter->hw.mac.type == e1000_i210) {
+		switch (adapter->link_speed) {
+		case SPEED_10:
+			adjust = IGB_I210_TX_LATENCY_10;
+			break;
+		case SPEED_100:
+			adjust = IGB_I210_TX_LATENCY_100;
+			break;
+		case SPEED_1000:
+			adjust = IGB_I210_TX_LATENCY_1000;
+			break;
+		}
+	}
+
+	shhwtstamps.hwtstamp =
+		ktime_add_ns(shhwtstamps.hwtstamp, adjust);
+
+	/* Clear the lock early before calling skb_tstamp_tx so that
+	 * applications are not woken up before the lock bit is clear. We use
+	 * a copy of the skb pointer to ensure other threads can't change it
+	 * while we're notifying the stack.
+	 */
+	adapter->ptp_tx_skb = NULL;
+	clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
+
+	/* Notify the stack and free the skb after we've unlocked */
+	skb_tstamp_tx(skb, &shhwtstamps);
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
+ * @q_vector: Pointer to interrupt specific structure
+ * @va: Pointer to address containing Rx buffer
+ * @timestamp: Pointer where timestamp will be stored
+ *
+ * This function is meant to retrieve a timestamp from the first buffer of an
+ * incoming frame.  The value is stored in little endian format starting on
+ * byte 8
+ *
+ * Returns: The timestamp header length or 0 if not available
+ **/
+int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+			ktime_t *timestamp)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct skb_shared_hwtstamps ts;
+	__le64 *regval = (__le64 *)va;
+	int adjust = 0;
+
+	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
+		return 0;
+
+	/* The timestamp is recorded in little endian format.
+	 * DWORD: 0        1        2        3
+	 * Field: Reserved Reserved SYSTIML  SYSTIMH
+	 */
+
+	/* check reserved dwords are zero, be/le doesn't matter for zero */
+	if (regval[0])
+		return 0;
+
+	igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
+
+	/* adjust timestamp for the RX latency based on link speed */
+	if (adapter->hw.mac.type == e1000_i210) {
+		switch (adapter->link_speed) {
+		case SPEED_10:
+			adjust = IGB_I210_RX_LATENCY_10;
+			break;
+		case SPEED_100:
+			adjust = IGB_I210_RX_LATENCY_100;
+			break;
+		case SPEED_1000:
+			adjust = IGB_I210_RX_LATENCY_1000;
+			break;
+		}
+	}
+
+	*timestamp = ktime_sub_ns(ts.hwtstamp, adjust);
+
+	return IGB_TS_HDR_LEN;
+}
+
+/**
+ * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
+ * @q_vector: Pointer to interrupt specific structure
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve a timestamp from the internal registers
+ * of the adapter and store it in the skb.
+ **/
+void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
+{
+	struct igb_adapter *adapter = q_vector->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	int adjust = 0;
+	u64 regval;
+
+	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
+		return;
+
+	/* If this bit is set, then the RX registers contain the time stamp. No
+	 * other packet will be time stamped until we read these registers, so
+	 * read the registers to make them available again. Because only one
+	 * packet can be time stamped at a time, we know that the register
+	 * values must belong to this one here and therefore we don't need to
+	 * compare any of the additional attributes stored for it.
+	 *
+	 * If nothing went wrong, then it should have a shared tx_flags that we
+	 * can turn into a skb_shared_hwtstamps.
+	 */
+	if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
+		return;
+
+	regval = rd32(E1000_RXSTMPL);
+	regval |= (u64)rd32(E1000_RXSTMPH) << 32;
+
+	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+
+	/* adjust timestamp for the RX latency based on link speed */
+	if (adapter->hw.mac.type == e1000_i210) {
+		switch (adapter->link_speed) {
+		case SPEED_10:
+			adjust = IGB_I210_RX_LATENCY_10;
+			break;
+		case SPEED_100:
+			adjust = IGB_I210_RX_LATENCY_100;
+			break;
+		case SPEED_1000:
+			adjust = IGB_I210_RX_LATENCY_1000;
+			break;
+		}
+	}
+	skb_hwtstamps(skb)->hwtstamp =
+		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+
+	/* Update the last_rx_timestamp timer in order to enable watchdog check
+	 * for error case of latched timestamp on a dropped packet.
+	 */
+	adapter->last_rx_timestamp = jiffies;
+}
+
+/**
+ * igb_ptp_get_ts_config - get hardware time stamping config
+ * @netdev: netdev struct
+ * @ifr: interface struct
+ *
+ * Get the hwtstamp_config settings to return to the user. Rather than attempt
+ * to deconstruct the settings from the registers, just return a shadow copy
+ * of the last known settings.
+ **/
+int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config *config = &adapter->tstamp_config;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * igb_ptp_set_timestamp_mode - setup hardware for timestamping
+ * @adapter: networking device structure
+ * @config: hwtstamp configuration
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ */
+static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter,
+				      struct hwtstamp_config *config)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+	u32 tsync_rx_cfg = 0;
+	bool is_l4 = false;
+	bool is_l2 = false;
+	u32 regval;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+		break;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		is_l2 = true;
+		is_l4 = true;
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+		/* 82576 cannot timestamp all packets, which it needs to do to
+		 * support both V1 Sync and Delay_Req messages
+		 */
+		if (hw->mac.type != e1000_82576) {
+			tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+			config->rx_filter = HWTSTAMP_FILTER_ALL;
+			break;
+		}
+		fallthrough;
+	default:
+		config->rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	if (hw->mac.type == e1000_82575) {
+		if (tsync_rx_ctl | tsync_tx_ctl)
+			return -EINVAL;
+		return 0;
+	}
+
+	/* Per-packet timestamping only works if all packets are
+	 * timestamped, so enable timestamping in all packets as
+	 * long as one Rx filter was configured.
+	 */
+	if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
+		tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
+		tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		is_l2 = true;
+		is_l4 = true;
+
+		if ((hw->mac.type == e1000_i210) ||
+		    (hw->mac.type == e1000_i211)) {
+			regval = rd32(E1000_RXPBS);
+			regval |= E1000_RXPBS_CFG_TS_EN;
+			wr32(E1000_RXPBS, regval);
+		}
+	}
+
+	/* enable/disable TX */
+	regval = rd32(E1000_TSYNCTXCTL);
+	regval &= ~E1000_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	wr32(E1000_TSYNCTXCTL, regval);
+
+	/* enable/disable RX */
+	regval = rd32(E1000_TSYNCRXCTL);
+	regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	wr32(E1000_TSYNCRXCTL, regval);
+
+	/* define which PTP packets are time stamped */
+	wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
+
+	/* define ethertype filter for timestamped packets */
+	if (is_l2)
+		wr32(E1000_ETQF(IGB_ETQF_FILTER_1588),
+		     (E1000_ETQF_FILTER_ENABLE | /* enable filter */
+		      E1000_ETQF_1588 | /* enable timestamping */
+		      ETH_P_1588));     /* 1588 eth protocol type */
+	else
+		wr32(E1000_ETQF(IGB_ETQF_FILTER_1588), 0);
+
+	/* L4 Queue Filter[3]: filter by destination port and protocol */
+	if (is_l4) {
+		u32 ftqf = (IPPROTO_UDP /* UDP */
+			| E1000_FTQF_VF_BP /* VF not compared */
+			| E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
+			| E1000_FTQF_MASK); /* mask all inputs */
+		ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
+
+		wr32(E1000_IMIR(3), (__force unsigned int)htons(PTP_EV_PORT));
+		wr32(E1000_IMIREXT(3),
+		     (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
+		if (hw->mac.type == e1000_82576) {
+			/* enable source port check */
+			wr32(E1000_SPQF(3), (__force unsigned int)htons(PTP_EV_PORT));
+			ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
+		}
+		wr32(E1000_FTQF(3), ftqf);
+	} else {
+		wr32(E1000_FTQF(3), E1000_FTQF_MASK);
+	}
+	wrfl();
+
+	/* clear TX/RX time stamp registers, just to be sure */
+	regval = rd32(E1000_TXSTMPL);
+	regval = rd32(E1000_TXSTMPH);
+	regval = rd32(E1000_RXSTMPL);
+	regval = rd32(E1000_RXSTMPH);
+
+	return 0;
+}
+
+/**
+ * igb_ptp_set_ts_config - set hardware time stamping config
+ * @netdev: netdev struct
+ * @ifr: interface struct
+ *
+ **/
+int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct igb_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = igb_ptp_set_timestamp_mode(adapter, &config);
+	if (err)
+		return err;
+
+	/* save these settings for future reference */
+	memcpy(&adapter->tstamp_config, &config,
+	       sizeof(adapter->tstamp_config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * igb_ptp_init - Initialize PTP functionality
+ * @adapter: Board private structure
+ *
+ * This function is called at device probe to initialize the PTP
+ * functionality.
+ */
+void igb_ptp_init(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+
+	switch (hw->mac.type) {
+	case e1000_82576:
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 999999881;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfine = igb_ptp_adjfine_82576;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+		adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_82576;
+		adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
+		adapter->ptp_caps.enable = igb_ptp_feature_enable;
+		adapter->cc.read = igb_ptp_read_82576;
+		adapter->cc.mask = CYCLECOUNTER_MASK(64);
+		adapter->cc.mult = 1;
+		adapter->cc.shift = IGB_82576_TSYNC_SHIFT;
+		adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
+		break;
+	case e1000_82580:
+	case e1000_i354:
+	case e1000_i350:
+		igb_ptp_sdp_init(adapter);
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.n_ext_ts = IGB_N_EXTTS;
+		adapter->ptp_caps.n_per_out = IGB_N_PEROUT;
+		adapter->ptp_caps.n_pins = IGB_N_SDP;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.pin_config = adapter->sdp_config;
+		adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
+		adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_82580;
+		adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
+		adapter->ptp_caps.enable = igb_ptp_feature_enable_82580;
+		adapter->ptp_caps.verify = igb_ptp_verify_pin;
+		adapter->cc.read = igb_ptp_read_82580;
+		adapter->cc.mask = CYCLECOUNTER_MASK(IGB_NBITS_82580);
+		adapter->cc.mult = 1;
+		adapter->cc.shift = 0;
+		adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK;
+		break;
+	case e1000_i210:
+	case e1000_i211:
+		igb_ptp_sdp_init(adapter);
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.n_ext_ts = IGB_N_EXTTS;
+		adapter->ptp_caps.n_per_out = IGB_N_PEROUT;
+		adapter->ptp_caps.n_pins = IGB_N_SDP;
+		adapter->ptp_caps.pps = 1;
+		adapter->ptp_caps.pin_config = adapter->sdp_config;
+		adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
+		adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
+		adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_i210;
+		adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
+		adapter->ptp_caps.enable = igb_ptp_feature_enable_i210;
+		adapter->ptp_caps.verify = igb_ptp_verify_pin;
+		break;
+	default:
+		adapter->ptp_clock = NULL;
+		return;
+	}
+
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
+	if (IS_ERR(adapter->ptp_clock)) {
+		adapter->ptp_clock = NULL;
+		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
+	} else if (adapter->ptp_clock) {
+		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
+			 adapter->netdev->name);
+		adapter->ptp_flags |= IGB_PTP_ENABLED;
+
+		spin_lock_init(&adapter->tmreg_lock);
+		INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+
+		if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+			INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+					  igb_ptp_overflow_check);
+
+		adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+		adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+		igb_ptp_reset(adapter);
+	}
+}
+
+/**
+ * igb_ptp_sdp_init - utility function which inits the SDP config structs
+ * @adapter: Board private structure.
+ **/
+void igb_ptp_sdp_init(struct igb_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < IGB_N_SDP; i++) {
+		struct ptp_pin_desc *ppd = &adapter->sdp_config[i];
+
+		snprintf(ppd->name, sizeof(ppd->name), "SDP%d", i);
+		ppd->index = i;
+		ppd->func = PTP_PF_NONE;
+	}
+}
+
+/**
+ * igb_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
+ *
+ * This function stops the overflow check work and PTP Tx timestamp work, and
+ * will prepare the device for OS suspend.
+ */
+void igb_ptp_suspend(struct igb_adapter *adapter)
+{
+	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
+		return;
+
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+		cancel_delayed_work_sync(&adapter->ptp_overflow_work);
+
+	cancel_work_sync(&adapter->ptp_tx_work);
+	if (adapter->ptp_tx_skb) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
+	}
+}
+
+/**
+ * igb_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igb_ptp_stop(struct igb_adapter *adapter)
+{
+	igb_ptp_suspend(adapter);
+
+	if (adapter->ptp_clock) {
+		ptp_clock_unregister(adapter->ptp_clock);
+		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
+			 adapter->netdev->name);
+		adapter->ptp_flags &= ~IGB_PTP_ENABLED;
+	}
+}
+
+/**
+ * igb_ptp_reset - Re-enable the adapter for PTP following a reset.
+ * @adapter: Board private structure.
+ *
+ * This function handles the reset work required to re-enable the PTP device.
+ **/
+void igb_ptp_reset(struct igb_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	unsigned long flags;
+
+	/* reset the tstamp_config */
+	igb_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+	switch (adapter->hw.mac.type) {
+	case e1000_82576:
+		/* Dial the nominal frequency. */
+		wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576);
+		break;
+	case e1000_82580:
+	case e1000_i354:
+	case e1000_i350:
+	case e1000_i210:
+	case e1000_i211:
+		wr32(E1000_TSAUXC, 0x0);
+		wr32(E1000_TSSDP, 0x0);
+		wr32(E1000_TSIM,
+		     TSYNC_INTERRUPTS |
+		     (adapter->pps_sys_wrap_on ? TSINTR_SYS_WRAP : 0));
+		wr32(E1000_IMS, E1000_IMS_TS);
+		break;
+	default:
+		/* No work to do. */
+		goto out;
+	}
+
+	/* Re-initialize the timer. */
+	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) {
+		struct timespec64 ts = ktime_to_timespec64(ktime_get_real());
+
+		igb_ptp_write_i210(adapter, &ts);
+	} else {
+		timecounter_init(&adapter->tc, &adapter->cc,
+				 ktime_to_ns(ktime_get_real()));
+	}
+out:
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	wrfl();
+
+	if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+		schedule_delayed_work(&adapter->ptp_overflow_work,
+				      IGB_SYSTIM_OVERFLOW_PERIOD);
+}
diff --git a/drivers/net/ethernet/intel/igbvf/Makefile b/drivers/net/ethernet/intel/igbvf/Makefile
new file mode 100644
index 0000000000..afd3e36eae
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2009 - 2018 Intel Corporation.
+#
+# Makefile for the Intel(R) 82576 VF ethernet driver
+#
+
+obj-$(CONFIG_IGBVF) += igbvf.o
+
+igbvf-objs := vf.o \
+              mbx.o \
+              ethtool.o \
+              netdev.o
+
diff --git a/drivers/net/ethernet/intel/igbvf/defines.h b/drivers/net/ethernet/intel/igbvf/defines.h
new file mode 100644
index 0000000000..4437f83241
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/defines.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000_DEFINES_H_
+#define _E1000_DEFINES_H_
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define REQ_RX_DESCRIPTOR_MULTIPLE	8
+
+/* IVAR valid bit */
+#define E1000_IVAR_VALID	0x80
+
+/* Receive Descriptor bit definitions */
+#define E1000_RXD_STAT_DD	0x01    /* Descriptor Done */
+#define E1000_RXD_STAT_EOP	0x02    /* End of Packet */
+#define E1000_RXD_STAT_IXSM	0x04    /* Ignore checksum */
+#define E1000_RXD_STAT_VP	0x08    /* IEEE VLAN Packet */
+#define E1000_RXD_STAT_UDPCS	0x10    /* UDP xsum calculated */
+#define E1000_RXD_STAT_TCPCS	0x20    /* TCP xsum calculated */
+#define E1000_RXD_STAT_IPCS	0x40    /* IP xsum calculated */
+#define E1000_RXD_ERR_SE	0x02    /* Symbol Error */
+#define E1000_RXD_SPC_VLAN_MASK	0x0FFF  /* VLAN ID is in lower 12 bits */
+
+#define E1000_RXDEXT_STATERR_LB	0x00040000
+#define E1000_RXDEXT_STATERR_CE	0x01000000
+#define E1000_RXDEXT_STATERR_SE	0x02000000
+#define E1000_RXDEXT_STATERR_SEQ	0x04000000
+#define E1000_RXDEXT_STATERR_CXE	0x10000000
+#define E1000_RXDEXT_STATERR_TCPE	0x20000000
+#define E1000_RXDEXT_STATERR_IPE	0x40000000
+#define E1000_RXDEXT_STATERR_RXE	0x80000000
+
+/* Same mask, but for extended and packet split descriptors */
+#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \
+	E1000_RXDEXT_STATERR_CE  | \
+	E1000_RXDEXT_STATERR_SE  | \
+	E1000_RXDEXT_STATERR_SEQ | \
+	E1000_RXDEXT_STATERR_CXE | \
+	E1000_RXDEXT_STATERR_RXE)
+
+/* Device Control */
+#define E1000_CTRL_RST		0x04000000  /* Global reset */
+
+/* Device Status */
+#define E1000_STATUS_FD		0x00000001      /* Full duplex.0=half,1=full */
+#define E1000_STATUS_LU		0x00000002      /* Link up.0=no,1=link */
+#define E1000_STATUS_TXOFF	0x00000010      /* transmission paused */
+#define E1000_STATUS_SPEED_10	0x00000000      /* Speed 10Mb/s */
+#define E1000_STATUS_SPEED_100	0x00000040      /* Speed 100Mb/s */
+#define E1000_STATUS_SPEED_1000	0x00000080      /* Speed 1000Mb/s */
+
+#define SPEED_10	10
+#define SPEED_100	100
+#define SPEED_1000	1000
+#define HALF_DUPLEX	1
+#define FULL_DUPLEX	2
+
+/* Transmit Descriptor bit definitions */
+#define E1000_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
+#define E1000_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
+#define E1000_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
+#define E1000_TXD_STAT_DD	0x00000001 /* Desc Done */
+
+#define MAX_JUMBO_FRAME_SIZE		0x3F00
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
+
+/* 802.1q VLAN Packet Size */
+#define VLAN_TAG_SIZE		4    /* 802.3ac tag (not DMA'd) */
+
+/* Error Codes */
+#define E1000_SUCCESS		0
+#define E1000_ERR_CONFIG	3
+#define E1000_ERR_MAC_INIT	5
+#define E1000_ERR_MBX		15
+
+/* SRRCTL bit definitions */
+#define E1000_SRRCTL_BSIZEPKT_SHIFT		10 /* Shift _right_ */
+#define E1000_SRRCTL_BSIZEHDRSIZE_MASK		0x00000F00
+#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT		2  /* Shift _left_ */
+#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF	0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS	0x0A000000
+#define E1000_SRRCTL_DESCTYPE_MASK		0x0E000000
+#define E1000_SRRCTL_DROP_EN			0x80000000
+
+#define E1000_SRRCTL_BSIZEPKT_MASK	0x0000007F
+#define E1000_SRRCTL_BSIZEHDR_MASK	0x00003F00
+
+/* Additional Descriptor Control definitions */
+#define E1000_TXDCTL_QUEUE_ENABLE	0x02000000 /* Enable specific Tx Que */
+#define E1000_RXDCTL_QUEUE_ENABLE	0x02000000 /* Enable specific Rx Que */
+
+/* Direct Cache Access (DCA) definitions */
+#define E1000_DCA_TXCTRL_TX_WB_RO_EN	BIT(11) /* Tx Desc writeback RO bit */
+
+#define E1000_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
+
+#endif /* _E1000_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
new file mode 100644
index 0000000000..83b97989a6
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
+
+/* ethtool support for igbvf */
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/delay.h>
+
+#include "igbvf.h"
+#include <linux/if_vlan.h>
+
+struct igbvf_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+	int base_stat_offset;
+};
+
+#define IGBVF_STAT(current, base) \
+		sizeof(((struct igbvf_adapter *)0)->current), \
+		offsetof(struct igbvf_adapter, current), \
+		offsetof(struct igbvf_adapter, base)
+
+static const struct igbvf_stats igbvf_gstrings_stats[] = {
+	{ "rx_packets", IGBVF_STAT(stats.gprc, stats.base_gprc) },
+	{ "tx_packets", IGBVF_STAT(stats.gptc, stats.base_gptc) },
+	{ "rx_bytes", IGBVF_STAT(stats.gorc, stats.base_gorc) },
+	{ "tx_bytes", IGBVF_STAT(stats.gotc, stats.base_gotc) },
+	{ "multicast", IGBVF_STAT(stats.mprc, stats.base_mprc) },
+	{ "lbrx_bytes", IGBVF_STAT(stats.gorlbc, stats.base_gorlbc) },
+	{ "lbrx_packets", IGBVF_STAT(stats.gprlbc, stats.base_gprlbc) },
+	{ "tx_restart_queue", IGBVF_STAT(restart_queue, zero_base) },
+	{ "rx_long_byte_count", IGBVF_STAT(stats.gorc, stats.base_gorc) },
+	{ "rx_csum_offload_good", IGBVF_STAT(hw_csum_good, zero_base) },
+	{ "rx_csum_offload_errors", IGBVF_STAT(hw_csum_err, zero_base) },
+	{ "rx_header_split", IGBVF_STAT(rx_hdr_split, zero_base) },
+	{ "alloc_rx_buff_failed", IGBVF_STAT(alloc_rx_buff_failed, zero_base) },
+};
+
+#define IGBVF_GLOBAL_STATS_LEN ARRAY_SIZE(igbvf_gstrings_stats)
+
+static const char igbvf_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Link test   (on/offline)"
+};
+
+#define IGBVF_TEST_LEN ARRAY_SIZE(igbvf_gstrings_test)
+
+static int igbvf_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 status;
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
+
+	cmd->base.port = -1;
+
+	status = er32(STATUS);
+	if (status & E1000_STATUS_LU) {
+		if (status & E1000_STATUS_SPEED_1000)
+			cmd->base.speed = SPEED_1000;
+		else if (status & E1000_STATUS_SPEED_100)
+			cmd->base.speed = SPEED_100;
+		else
+			cmd->base.speed = SPEED_10;
+
+		if (status & E1000_STATUS_FD)
+			cmd->base.duplex = DUPLEX_FULL;
+		else
+			cmd->base.duplex = DUPLEX_HALF;
+	} else {
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+
+	cmd->base.autoneg = AUTONEG_DISABLE;
+
+	return 0;
+}
+
+static int igbvf_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	return -EOPNOTSUPP;
+}
+
+static void igbvf_get_pauseparam(struct net_device *netdev,
+				 struct ethtool_pauseparam *pause)
+{
+}
+
+static int igbvf_set_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	return -EOPNOTSUPP;
+}
+
+static u32 igbvf_get_msglevel(struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void igbvf_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = data;
+}
+
+static int igbvf_get_regs_len(struct net_device *netdev)
+{
+#define IGBVF_REGS_LEN 8
+	return IGBVF_REGS_LEN * sizeof(u32);
+}
+
+static void igbvf_get_regs(struct net_device *netdev,
+			   struct ethtool_regs *regs, void *p)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+
+	memset(p, 0, IGBVF_REGS_LEN * sizeof(u32));
+
+	regs->version = (1u << 24) |
+			(adapter->pdev->revision << 16) |
+			adapter->pdev->device;
+
+	regs_buff[0] = er32(CTRL);
+	regs_buff[1] = er32(STATUS);
+
+	regs_buff[2] = er32(RDLEN(0));
+	regs_buff[3] = er32(RDH(0));
+	regs_buff[4] = er32(RDT(0));
+
+	regs_buff[5] = er32(TDLEN(0));
+	regs_buff[6] = er32(TDH(0));
+	regs_buff[7] = er32(TDT(0));
+}
+
+static int igbvf_get_eeprom_len(struct net_device *netdev)
+{
+	return 0;
+}
+
+static int igbvf_get_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	return -EOPNOTSUPP;
+}
+
+static int igbvf_set_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	return -EOPNOTSUPP;
+}
+
+static void igbvf_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	strscpy(drvinfo->driver,  igbvf_driver_name, sizeof(drvinfo->driver));
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+}
+
+static void igbvf_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring,
+				struct kernel_ethtool_ringparam *kernel_ring,
+				struct netlink_ext_ack *extack)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct igbvf_ring *tx_ring = adapter->tx_ring;
+	struct igbvf_ring *rx_ring = adapter->rx_ring;
+
+	ring->rx_max_pending = IGBVF_MAX_RXD;
+	ring->tx_max_pending = IGBVF_MAX_TXD;
+	ring->rx_pending = rx_ring->count;
+	ring->tx_pending = tx_ring->count;
+}
+
+static int igbvf_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct igbvf_ring *temp_ring;
+	int err = 0;
+	u32 new_rx_count, new_tx_count;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_rx_count = max_t(u32, ring->rx_pending, IGBVF_MIN_RXD);
+	new_rx_count = min_t(u32, new_rx_count, IGBVF_MAX_RXD);
+	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	new_tx_count = max_t(u32, ring->tx_pending, IGBVF_MIN_TXD);
+	new_tx_count = min_t(u32, new_tx_count, IGBVF_MAX_TXD);
+	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	if ((new_tx_count == adapter->tx_ring->count) &&
+	    (new_rx_count == adapter->rx_ring->count)) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		adapter->tx_ring->count = new_tx_count;
+		adapter->rx_ring->count = new_rx_count;
+		goto clear_reset;
+	}
+
+	temp_ring = vmalloc(sizeof(struct igbvf_ring));
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	igbvf_down(adapter);
+
+	/* We can't just free everything and then setup again,
+	 * because the ISRs in MSI-X mode get passed pointers
+	 * to the Tx and Rx ring structs.
+	 */
+	if (new_tx_count != adapter->tx_ring->count) {
+		memcpy(temp_ring, adapter->tx_ring, sizeof(struct igbvf_ring));
+
+		temp_ring->count = new_tx_count;
+		err = igbvf_setup_tx_resources(adapter, temp_ring);
+		if (err)
+			goto err_setup;
+
+		igbvf_free_tx_resources(adapter->tx_ring);
+
+		memcpy(adapter->tx_ring, temp_ring, sizeof(struct igbvf_ring));
+	}
+
+	if (new_rx_count != adapter->rx_ring->count) {
+		memcpy(temp_ring, adapter->rx_ring, sizeof(struct igbvf_ring));
+
+		temp_ring->count = new_rx_count;
+		err = igbvf_setup_rx_resources(adapter, temp_ring);
+		if (err)
+			goto err_setup;
+
+		igbvf_free_rx_resources(adapter->rx_ring);
+
+		memcpy(adapter->rx_ring, temp_ring, sizeof(struct igbvf_ring));
+	}
+err_setup:
+	igbvf_up(adapter);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__IGBVF_RESETTING, &adapter->state);
+	return err;
+}
+
+static int igbvf_link_test(struct igbvf_adapter *adapter, u64 *data)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	*data = 0;
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	hw->mac.ops.check_for_link(hw);
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	if (!(er32(STATUS) & E1000_STATUS_LU))
+		*data = 1;
+
+	return *data;
+}
+
+static void igbvf_diag_test(struct net_device *netdev,
+			    struct ethtool_test *eth_test, u64 *data)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	set_bit(__IGBVF_TESTING, &adapter->state);
+
+	/* Link test performed before hardware reset so autoneg doesn't
+	 * interfere with test result
+	 */
+	if (igbvf_link_test(adapter, &data[0]))
+		eth_test->flags |= ETH_TEST_FL_FAILED;
+
+	clear_bit(__IGBVF_TESTING, &adapter->state);
+	msleep_interruptible(4 * 1000);
+}
+
+static void igbvf_get_wol(struct net_device *netdev,
+			  struct ethtool_wolinfo *wol)
+{
+	wol->supported = 0;
+	wol->wolopts = 0;
+}
+
+static int igbvf_set_wol(struct net_device *netdev,
+			 struct ethtool_wolinfo *wol)
+{
+	return -EOPNOTSUPP;
+}
+
+static int igbvf_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->requested_itr <= 3)
+		ec->rx_coalesce_usecs = adapter->requested_itr;
+	else
+		ec->rx_coalesce_usecs = adapter->current_itr >> 2;
+
+	return 0;
+}
+
+static int igbvf_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if ((ec->rx_coalesce_usecs >= IGBVF_MIN_ITR_USECS) &&
+	    (ec->rx_coalesce_usecs <= IGBVF_MAX_ITR_USECS)) {
+		adapter->current_itr = ec->rx_coalesce_usecs << 2;
+		adapter->requested_itr = 1000000000 /
+					(adapter->current_itr * 256);
+	} else if ((ec->rx_coalesce_usecs == 3) ||
+		   (ec->rx_coalesce_usecs == 2)) {
+		adapter->current_itr = IGBVF_START_ITR;
+		adapter->requested_itr = ec->rx_coalesce_usecs;
+	} else if (ec->rx_coalesce_usecs == 0) {
+		/* The user's desire is to turn off interrupt throttling
+		 * altogether, but due to HW limitations, we can't do that.
+		 * Instead we set a very small value in EITR, which would
+		 * allow ~967k interrupts per second, but allow the adapter's
+		 * internal clocking to still function properly.
+		 */
+		adapter->current_itr = 4;
+		adapter->requested_itr = 1000000000 /
+					(adapter->current_itr * 256);
+	} else {
+		return -EINVAL;
+	}
+
+	writel(adapter->current_itr,
+	       hw->hw_addr + adapter->rx_ring->itr_register);
+
+	return 0;
+}
+
+static int igbvf_nway_reset(struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		igbvf_reinit_locked(adapter);
+	return 0;
+}
+
+static void igbvf_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats *stats,
+				    u64 *data)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	igbvf_update_stats(adapter);
+	for (i = 0; i < IGBVF_GLOBAL_STATS_LEN; i++) {
+		char *p = (char *)adapter +
+			  igbvf_gstrings_stats[i].stat_offset;
+		char *b = (char *)adapter +
+			  igbvf_gstrings_stats[i].base_stat_offset;
+		data[i] = ((igbvf_gstrings_stats[i].sizeof_stat ==
+			    sizeof(u64)) ? (*(u64 *)p - *(u64 *)b) :
+			    (*(u32 *)p - *(u32 *)b));
+	}
+}
+
+static int igbvf_get_sset_count(struct net_device *dev, int stringset)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		return IGBVF_TEST_LEN;
+	case ETH_SS_STATS:
+		return IGBVF_GLOBAL_STATS_LEN;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void igbvf_get_strings(struct net_device *netdev, u32 stringset,
+			      u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, *igbvf_gstrings_test, sizeof(igbvf_gstrings_test));
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IGBVF_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, igbvf_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	}
+}
+
+static const struct ethtool_ops igbvf_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS,
+	.get_drvinfo		= igbvf_get_drvinfo,
+	.get_regs_len		= igbvf_get_regs_len,
+	.get_regs		= igbvf_get_regs,
+	.get_wol		= igbvf_get_wol,
+	.set_wol		= igbvf_set_wol,
+	.get_msglevel		= igbvf_get_msglevel,
+	.set_msglevel		= igbvf_set_msglevel,
+	.nway_reset		= igbvf_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_eeprom_len		= igbvf_get_eeprom_len,
+	.get_eeprom		= igbvf_get_eeprom,
+	.set_eeprom		= igbvf_set_eeprom,
+	.get_ringparam		= igbvf_get_ringparam,
+	.set_ringparam		= igbvf_set_ringparam,
+	.get_pauseparam		= igbvf_get_pauseparam,
+	.set_pauseparam		= igbvf_set_pauseparam,
+	.self_test		= igbvf_diag_test,
+	.get_sset_count		= igbvf_get_sset_count,
+	.get_strings		= igbvf_get_strings,
+	.get_ethtool_stats	= igbvf_get_ethtool_stats,
+	.get_coalesce		= igbvf_get_coalesce,
+	.set_coalesce		= igbvf_set_coalesce,
+	.get_link_ksettings	= igbvf_get_link_ksettings,
+	.set_link_ksettings	= igbvf_set_link_ksettings,
+};
+
+void igbvf_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &igbvf_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/igbvf/igbvf.h b/drivers/net/ethernet/intel/igbvf/igbvf.h
new file mode 100644
index 0000000000..7b83678ba8
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/igbvf.h
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
+
+/* Linux PRO/1000 Ethernet Driver main header file */
+
+#ifndef _IGBVF_H_
+#define _IGBVF_H_
+
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/io.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+
+#include "vf.h"
+
+/* Forward declarations */
+struct igbvf_info;
+struct igbvf_adapter;
+
+/* Interrupt defines */
+#define IGBVF_START_ITR		488 /* ~8000 ints/sec */
+#define IGBVF_4K_ITR		980
+#define IGBVF_20K_ITR		196
+#define IGBVF_70K_ITR		56
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+/* Interrupt modes, as used by the IntMode parameter */
+#define IGBVF_INT_MODE_LEGACY	0
+#define IGBVF_INT_MODE_MSI	1
+#define IGBVF_INT_MODE_MSIX	2
+
+/* Tx/Rx descriptor defines */
+#define IGBVF_DEFAULT_TXD	256
+#define IGBVF_MAX_TXD		4096
+#define IGBVF_MIN_TXD		64
+
+#define IGBVF_DEFAULT_RXD	256
+#define IGBVF_MAX_RXD		4096
+#define IGBVF_MIN_RXD		64
+
+#define IGBVF_MIN_ITR_USECS	10 /* 100000 irq/sec */
+#define IGBVF_MAX_ITR_USECS	10000 /* 100    irq/sec */
+
+/* RX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ *	   descriptors available in its onboard memory.
+ *	   Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ *	   available in host memory.
+ *	   If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ *	   descriptors until either it has this many to write back, or the
+ *	   ITR timer expires.
+ */
+#define IGBVF_RX_PTHRESH	16
+#define IGBVF_RX_HTHRESH	8
+#define IGBVF_RX_WTHRESH	1
+
+/* this is the size past which hardware will drop packets when setting LPE=0 */
+#define MAXIMUM_ETHERNET_VLAN_SIZE	1522
+
+#define IGBVF_FC_PAUSE_TIME	0x0680 /* 858 usec */
+
+/* How many Tx Descriptors do we need to call netif_wake_queue ? */
+#define IGBVF_TX_QUEUE_WAKE	32
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGBVF_RX_BUFFER_WRITE	16 /* Must be power of 2 */
+
+#define AUTO_ALL_MODES		0
+#define IGBVF_EEPROM_APME	0x0400
+
+#define IGBVF_MNG_VLAN_NONE	(-1)
+
+#define IGBVF_MAX_MAC_FILTERS	3
+
+/* Number of packet split data buffers (not including the header buffer) */
+#define PS_PAGE_BUFFERS		(MAX_PS_BUFFERS - 1)
+
+enum igbvf_boards {
+	board_vf,
+	board_i350_vf,
+};
+
+struct igbvf_queue_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+/* wrappers around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct igbvf_buffer {
+	dma_addr_t dma;
+	struct sk_buff *skb;
+	union {
+		/* Tx */
+		struct {
+			unsigned long time_stamp;
+			union e1000_adv_tx_desc *next_to_watch;
+			u16 length;
+			u16 mapped_as_page;
+		};
+		/* Rx */
+		struct {
+			struct page *page;
+			u64 page_dma;
+			unsigned int page_offset;
+		};
+	};
+};
+
+union igbvf_desc {
+	union e1000_adv_rx_desc rx_desc;
+	union e1000_adv_tx_desc tx_desc;
+	struct e1000_adv_tx_context_desc tx_context_desc;
+};
+
+struct igbvf_ring {
+	struct igbvf_adapter *adapter;  /* backlink */
+	union igbvf_desc *desc;	/* pointer to ring memory  */
+	dma_addr_t dma;		/* phys address of ring    */
+	unsigned int size;	/* length of ring in bytes */
+	unsigned int count;	/* number of desc. in ring */
+
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	u16 head;
+	u16 tail;
+
+	/* array of buffer information structs */
+	struct igbvf_buffer *buffer_info;
+	struct napi_struct napi;
+
+	char name[IFNAMSIZ + 5];
+	u32 eims_value;
+	u32 itr_val;
+	enum latency_range itr_range;
+	u16 itr_register;
+	int set_itr;
+
+	struct sk_buff *rx_skb_top;
+
+	struct igbvf_queue_stats stats;
+};
+
+/* board specific private data structure */
+struct igbvf_adapter {
+	struct timer_list watchdog_timer;
+	struct timer_list blink_timer;
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+
+	const struct igbvf_info *ei;
+
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	u32 bd_number;
+	u32 rx_buffer_len;
+	u32 polling_interval;
+	u16 mng_vlan_id;
+	u16 link_speed;
+	u16 link_duplex;
+
+	spinlock_t tx_queue_lock; /* prevent concurrent tail updates */
+
+	/* track device up/down/testing state */
+	unsigned long state;
+
+	/* Interrupt Throttle Rate */
+	u32 requested_itr; /* ints/sec or adaptive */
+	u32 current_itr; /* Actual ITR register value, not ints/sec */
+
+	/* Tx */
+	struct igbvf_ring *tx_ring /* One per active queue */
+	____cacheline_aligned_in_smp;
+
+	unsigned int restart_queue;
+	u32 txd_cmd;
+
+	u32 tx_int_delay;
+	u32 tx_abs_int_delay;
+
+	unsigned int total_tx_bytes;
+	unsigned int total_tx_packets;
+	unsigned int total_rx_bytes;
+	unsigned int total_rx_packets;
+
+	/* Tx stats */
+	u32 tx_timeout_count;
+	u32 tx_fifo_head;
+	u32 tx_head_addr;
+	u32 tx_fifo_size;
+	u32 tx_dma_failed;
+
+	/* Rx */
+	struct igbvf_ring *rx_ring;
+
+	u32 rx_int_delay;
+	u32 rx_abs_int_delay;
+
+	/* Rx stats */
+	u64 hw_csum_err;
+	u64 hw_csum_good;
+	u64 rx_hdr_split;
+	u32 alloc_rx_buff_failed;
+	u32 rx_dma_failed;
+
+	unsigned int rx_ps_hdr_size;
+	u32 max_frame_size;
+	u32 min_frame_size;
+
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+	spinlock_t stats_lock; /* prevent concurrent stats updates */
+
+	/* structs defined in e1000_hw.h */
+	struct e1000_hw hw;
+
+	/* The VF counters don't clear on read so we have to get a base
+	 * count on driver start up and always subtract that base on
+	 * the first update, thus the flag..
+	 */
+	struct e1000_vf_stats stats;
+	u64 zero_base;
+
+	struct igbvf_ring test_tx_ring;
+	struct igbvf_ring test_rx_ring;
+	u32 test_icr;
+
+	u32 msg_enable;
+	struct msix_entry *msix_entries;
+	int int_mode;
+	u32 eims_enable_mask;
+	u32 eims_other;
+	u32 int_counter0;
+	u32 int_counter1;
+
+	u32 eeprom_wol;
+	u32 wol;
+	u32 pba;
+
+	bool fc_autoneg;
+
+	unsigned long led_status;
+
+	unsigned int flags;
+	unsigned long last_reset;
+};
+
+struct igbvf_info {
+	enum e1000_mac_type	mac;
+	unsigned int		flags;
+	u32			pba;
+	void			(*init_ops)(struct e1000_hw *);
+	s32			(*get_variants)(struct igbvf_adapter *);
+};
+
+/* hardware capability, feature, and workaround flags */
+#define IGBVF_FLAG_RX_CSUM_DISABLED	BIT(0)
+#define IGBVF_FLAG_RX_LB_VLAN_BSWAP	BIT(1)
+#define IGBVF_RX_DESC_ADV(R, i)     \
+	(&((((R).desc))[i].rx_desc))
+#define IGBVF_TX_DESC_ADV(R, i)     \
+	(&((((R).desc))[i].tx_desc))
+#define IGBVF_TX_CTXTDESC_ADV(R, i) \
+	(&((((R).desc))[i].tx_context_desc))
+
+enum igbvf_state_t {
+	__IGBVF_TESTING,
+	__IGBVF_RESETTING,
+	__IGBVF_DOWN
+};
+
+extern char igbvf_driver_name[];
+
+void igbvf_check_options(struct igbvf_adapter *);
+void igbvf_set_ethtool_ops(struct net_device *);
+
+int igbvf_up(struct igbvf_adapter *);
+void igbvf_down(struct igbvf_adapter *);
+void igbvf_reinit_locked(struct igbvf_adapter *);
+int igbvf_setup_rx_resources(struct igbvf_adapter *, struct igbvf_ring *);
+int igbvf_setup_tx_resources(struct igbvf_adapter *, struct igbvf_ring *);
+void igbvf_free_rx_resources(struct igbvf_ring *);
+void igbvf_free_tx_resources(struct igbvf_ring *);
+void igbvf_update_stats(struct igbvf_adapter *);
+
+extern unsigned int copybreak;
+
+#endif /* _IGBVF_H_ */
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.c b/drivers/net/ethernet/intel/igbvf/mbx.c
new file mode 100644
index 0000000000..a3cd7ac48d
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/mbx.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
+
+#include "mbx.h"
+
+/**
+ *  e1000_poll_for_msg - Wait for message notification
+ *  @hw: pointer to the HW structure
+ *
+ *  returns SUCCESS if it successfully received a message notification
+ **/
+static s32 e1000_poll_for_msg(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!mbx->ops.check_for_msg)
+		goto out;
+
+	while (countdown && mbx->ops.check_for_msg(hw)) {
+		countdown--;
+		udelay(mbx->usec_delay);
+	}
+
+	/* if we failed, all future posted messages fail until reset */
+	if (!countdown)
+		mbx->timeout = 0;
+out:
+	return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
+}
+
+/**
+ *  e1000_poll_for_ack - Wait for message acknowledgment
+ *  @hw: pointer to the HW structure
+ *
+ *  returns SUCCESS if it successfully received a message acknowledgment
+ **/
+static s32 e1000_poll_for_ack(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!mbx->ops.check_for_ack)
+		goto out;
+
+	while (countdown && mbx->ops.check_for_ack(hw)) {
+		countdown--;
+		udelay(mbx->usec_delay);
+	}
+
+	/* if we failed, all future posted messages fail until reset */
+	if (!countdown)
+		mbx->timeout = 0;
+out:
+	return countdown ? E1000_SUCCESS : -E1000_ERR_MBX;
+}
+
+/**
+ *  e1000_read_posted_mbx - Wait for message notification and receive message
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns SUCCESS if it successfully received a message notification and
+ *  copied it into the receive buffer.
+ **/
+static s32 e1000_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (!mbx->ops.read)
+		goto out;
+
+	ret_val = e1000_poll_for_msg(hw);
+
+	/* if ack received read message, otherwise we timed out */
+	if (!ret_val)
+		ret_val = mbx->ops.read(hw, msg, size);
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_write_posted_mbx - Write a message to the mailbox, wait for ack
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer and
+ *  received an ack to that message within delay * timeout period
+ **/
+static s32 e1000_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = -E1000_ERR_MBX;
+
+	/* exit if we either can't write or there isn't a defined timeout */
+	if (!mbx->ops.write || !mbx->timeout)
+		goto out;
+
+	/* send msg*/
+	ret_val = mbx->ops.write(hw, msg, size);
+
+	/* if msg sent wait until we receive an ack */
+	if (!ret_val)
+		ret_val = e1000_poll_for_ack(hw);
+out:
+	return ret_val;
+}
+
+/**
+ *  e1000_read_v2p_mailbox - read v2p mailbox
+ *  @hw: pointer to the HW structure
+ *
+ *  This function is used to read the v2p mailbox without losing the read to
+ *  clear status bits.
+ **/
+static u32 e1000_read_v2p_mailbox(struct e1000_hw *hw)
+{
+	u32 v2p_mailbox = er32(V2PMAILBOX(0));
+
+	v2p_mailbox |= hw->dev_spec.vf.v2p_mailbox;
+	hw->dev_spec.vf.v2p_mailbox |= v2p_mailbox & E1000_V2PMAILBOX_R2C_BITS;
+
+	return v2p_mailbox;
+}
+
+/**
+ *  e1000_check_for_bit_vf - Determine if a status bit was set
+ *  @hw: pointer to the HW structure
+ *  @mask: bitmask for bits to be tested and cleared
+ *
+ *  This function is used to check for the read to clear bits within
+ *  the V2P mailbox.
+ **/
+static s32 e1000_check_for_bit_vf(struct e1000_hw *hw, u32 mask)
+{
+	u32 v2p_mailbox = e1000_read_v2p_mailbox(hw);
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (v2p_mailbox & mask)
+		ret_val = E1000_SUCCESS;
+
+	hw->dev_spec.vf.v2p_mailbox &= ~mask;
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_msg_vf - checks to see if the PF has sent mail
+ *  @hw: pointer to the HW structure
+ *
+ *  returns SUCCESS if the PF has set the Status bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_msg_vf(struct e1000_hw *hw)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (!e1000_check_for_bit_vf(hw, E1000_V2PMAILBOX_PFSTS)) {
+		ret_val = E1000_SUCCESS;
+		hw->mbx.stats.reqs++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_ack_vf - checks to see if the PF has ACK'd
+ *  @hw: pointer to the HW structure
+ *
+ *  returns SUCCESS if the PF has set the ACK bit or else ERR_MBX
+ **/
+static s32 e1000_check_for_ack_vf(struct e1000_hw *hw)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (!e1000_check_for_bit_vf(hw, E1000_V2PMAILBOX_PFACK)) {
+		ret_val = E1000_SUCCESS;
+		hw->mbx.stats.acks++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_rst_vf - checks to see if the PF has reset
+ *  @hw: pointer to the HW structure
+ *
+ *  returns true if the PF has set the reset done bit or else false
+ **/
+static s32 e1000_check_for_rst_vf(struct e1000_hw *hw)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+
+	if (!e1000_check_for_bit_vf(hw, (E1000_V2PMAILBOX_RSTD |
+					 E1000_V2PMAILBOX_RSTI))) {
+		ret_val = E1000_SUCCESS;
+		hw->mbx.stats.rsts++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_obtain_mbx_lock_vf - obtain mailbox lock
+ *  @hw: pointer to the HW structure
+ *
+ *  return SUCCESS if we obtained the mailbox lock
+ **/
+static s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw)
+{
+	s32 ret_val = -E1000_ERR_MBX;
+	int count = 10;
+
+	do {
+		/* Take ownership of the buffer */
+		ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_VFU);
+
+		/* reserve mailbox for VF use */
+		if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) {
+			ret_val = 0;
+			break;
+		}
+		udelay(1000);
+	} while (count-- > 0);
+
+	return ret_val;
+}
+
+/**
+ *  e1000_write_mbx_vf - Write a message to the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer
+ **/
+static s32 e1000_write_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size)
+{
+	s32 err;
+	u16 i;
+
+	lockdep_assert_held(&hw->mbx_lock);
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	err = e1000_obtain_mbx_lock_vf(hw);
+	if (err)
+		goto out_no_write;
+
+	/* flush any ack or msg as we are going to overwrite mailbox */
+	e1000_check_for_ack_vf(hw);
+	e1000_check_for_msg_vf(hw);
+
+	/* copy the caller specified message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		array_ew32(VMBMEM(0), i, msg[i]);
+
+	/* update stats */
+	hw->mbx.stats.msgs_tx++;
+
+	/* Drop VFU and interrupt the PF to tell it a message has been sent */
+	ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_REQ);
+
+out_no_write:
+	return err;
+}
+
+/**
+ *  e1000_read_mbx_vf - Reads a message from the inbox intended for VF
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns SUCCESS if it successfully read message from buffer
+ **/
+static s32 e1000_read_mbx_vf(struct e1000_hw *hw, u32 *msg, u16 size)
+{
+	s32 err;
+	u16 i;
+
+	lockdep_assert_held(&hw->mbx_lock);
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	err = e1000_obtain_mbx_lock_vf(hw);
+	if (err)
+		goto out_no_read;
+
+	/* copy the message from the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		msg[i] = array_er32(VMBMEM(0), i);
+
+	/* Acknowledge receipt and release mailbox, then we're done */
+	ew32(V2PMAILBOX(0), E1000_V2PMAILBOX_ACK);
+
+	/* update stats */
+	hw->mbx.stats.msgs_rx++;
+
+out_no_read:
+	return err;
+}
+
+/**
+ *  e1000_init_mbx_params_vf - set initial values for VF mailbox
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes the hw->mbx struct to correct values for VF mailbox
+ */
+s32 e1000_init_mbx_params_vf(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+
+	/* start mailbox as timed out and let the reset_hw call set the timeout
+	 * value to being communications
+	 */
+	mbx->timeout = 0;
+	mbx->usec_delay = E1000_VF_MBX_INIT_DELAY;
+
+	mbx->size = E1000_VFMAILBOX_SIZE;
+
+	mbx->ops.read = e1000_read_mbx_vf;
+	mbx->ops.write = e1000_write_mbx_vf;
+	mbx->ops.read_posted = e1000_read_posted_mbx;
+	mbx->ops.write_posted = e1000_write_posted_mbx;
+	mbx->ops.check_for_msg = e1000_check_for_msg_vf;
+	mbx->ops.check_for_ack = e1000_check_for_ack_vf;
+	mbx->ops.check_for_rst = e1000_check_for_rst_vf;
+
+	mbx->stats.msgs_tx = 0;
+	mbx->stats.msgs_rx = 0;
+	mbx->stats.reqs = 0;
+	mbx->stats.acks = 0;
+	mbx->stats.rsts = 0;
+
+	return E1000_SUCCESS;
+}
diff --git a/drivers/net/ethernet/intel/igbvf/mbx.h b/drivers/net/ethernet/intel/igbvf/mbx.h
new file mode 100644
index 0000000000..e5b31818d5
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/mbx.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _E1000_MBX_H_
+#define _E1000_MBX_H_
+
+#include "vf.h"
+
+#define E1000_V2PMAILBOX_REQ	0x00000001 /* Request for PF Ready bit */
+#define E1000_V2PMAILBOX_ACK	0x00000002 /* Ack PF message received */
+#define E1000_V2PMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
+#define E1000_V2PMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
+#define E1000_V2PMAILBOX_PFSTS	0x00000010 /* PF wrote a message in the MB */
+#define E1000_V2PMAILBOX_PFACK	0x00000020 /* PF ack the previous VF msg */
+#define E1000_V2PMAILBOX_RSTI	0x00000040 /* PF has reset indication */
+#define E1000_V2PMAILBOX_RSTD	0x00000080 /* PF has indicated reset done */
+#define E1000_V2PMAILBOX_R2C_BITS 0x000000B0 /* All read to clear bits */
+
+#define E1000_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
+
+/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the
+ * PF.  The reverse is true if it is E1000_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+/* Messages below or'd with this are the ACK */
+#define E1000_VT_MSGTYPE_ACK	0x80000000
+/* Messages below or'd with this are the NACK */
+#define E1000_VT_MSGTYPE_NACK	0x40000000
+/* Indicates that VF is still clear to send requests */
+#define E1000_VT_MSGTYPE_CTS	0x20000000
+
+/* We have a total wait time of 1s for vf mailbox posted messages */
+#define E1000_VF_MBX_INIT_TIMEOUT	2000 /* retry count for mbx timeout */
+#define E1000_VF_MBX_INIT_DELAY		500  /* usec delay between retries */
+
+#define E1000_VT_MSGINFO_SHIFT	16
+/* bits 23:16 are used for exra info for certain messages */
+#define E1000_VT_MSGINFO_MASK	(0xFF << E1000_VT_MSGINFO_SHIFT)
+
+#define E1000_VF_RESET		0x01 /* VF requests reset */
+#define E1000_VF_SET_MAC_ADDR	0x02 /* VF requests PF to set MAC addr */
+/* VF requests PF to clear all unicast MAC filters */
+#define E1000_VF_MAC_FILTER_CLR (0x01 << E1000_VT_MSGINFO_SHIFT)
+/* VF requests PF to add unicast MAC filter */
+#define E1000_VF_MAC_FILTER_ADD (0x02 << E1000_VT_MSGINFO_SHIFT)
+#define E1000_VF_SET_MULTICAST	0x03 /* VF requests PF to set MC addr */
+#define E1000_VF_SET_VLAN	0x04 /* VF requests PF to set VLAN */
+#define E1000_VF_SET_LPE	0x05 /* VF requests PF to set VMOLR.LPE */
+
+#define E1000_PF_CONTROL_MSG	0x0100 /* PF control message */
+
+void e1000_init_mbx_ops_generic(struct e1000_hw *hw);
+s32 e1000_init_mbx_params_vf(struct e1000_hw *);
+
+#endif /* _E1000_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
new file mode 100644
index 0000000000..7ff2752dd7
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -0,0 +1,3009 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/prefetch.h>
+#include <linux/sctp.h>
+
+#include "igbvf.h"
+
+char igbvf_driver_name[] = "igbvf";
+static const char igbvf_driver_string[] =
+		  "Intel(R) Gigabit Virtual Function Network Driver";
+static const char igbvf_copyright[] =
+		  "Copyright (c) 2009 - 2012 Intel Corporation.";
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int igbvf_poll(struct napi_struct *napi, int budget);
+static void igbvf_reset(struct igbvf_adapter *);
+static void igbvf_set_interrupt_capability(struct igbvf_adapter *);
+static void igbvf_reset_interrupt_capability(struct igbvf_adapter *);
+
+static struct igbvf_info igbvf_vf_info = {
+	.mac		= e1000_vfadapt,
+	.flags		= 0,
+	.pba		= 10,
+	.init_ops	= e1000_init_function_pointers_vf,
+};
+
+static struct igbvf_info igbvf_i350_vf_info = {
+	.mac		= e1000_vfadapt_i350,
+	.flags		= 0,
+	.pba		= 10,
+	.init_ops	= e1000_init_function_pointers_vf,
+};
+
+static const struct igbvf_info *igbvf_info_tbl[] = {
+	[board_vf]	= &igbvf_vf_info,
+	[board_i350_vf]	= &igbvf_i350_vf_info,
+};
+
+/**
+ * igbvf_desc_unused - calculate if we have unused descriptors
+ * @ring: address of receive ring structure
+ **/
+static int igbvf_desc_unused(struct igbvf_ring *ring)
+{
+	if (ring->next_to_clean > ring->next_to_use)
+		return ring->next_to_clean - ring->next_to_use - 1;
+
+	return ring->count + ring->next_to_clean - ring->next_to_use - 1;
+}
+
+/**
+ * igbvf_receive_skb - helper function to handle Rx indications
+ * @adapter: board private structure
+ * @netdev: pointer to netdev struct
+ * @skb: skb to indicate to stack
+ * @status: descriptor status field as written by hardware
+ * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
+ * @skb: pointer to sk_buff to be indicated to stack
+ **/
+static void igbvf_receive_skb(struct igbvf_adapter *adapter,
+			      struct net_device *netdev,
+			      struct sk_buff *skb,
+			      u32 status, __le16 vlan)
+{
+	u16 vid;
+
+	if (status & E1000_RXD_STAT_VP) {
+		if ((adapter->flags & IGBVF_FLAG_RX_LB_VLAN_BSWAP) &&
+		    (status & E1000_RXDEXT_STATERR_LB))
+			vid = be16_to_cpu((__force __be16)vlan) & E1000_RXD_SPC_VLAN_MASK;
+		else
+			vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
+		if (test_bit(vid, adapter->active_vlans))
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+
+	napi_gro_receive(&adapter->rx_ring->napi, skb);
+}
+
+static inline void igbvf_rx_checksum_adv(struct igbvf_adapter *adapter,
+					 u32 status_err, struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* Ignore Checksum bit is set or checksum is disabled through ethtool */
+	if ((status_err & E1000_RXD_STAT_IXSM) ||
+	    (adapter->flags & IGBVF_FLAG_RX_CSUM_DISABLED))
+		return;
+
+	/* TCP/UDP checksum error bit is set */
+	if (status_err &
+	    (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+		/* let the stack verify checksum errors */
+		adapter->hw_csum_err++;
+		return;
+	}
+
+	/* It must be a TCP or UDP packet with a valid checksum */
+	if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	adapter->hw_csum_good++;
+}
+
+/**
+ * igbvf_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @rx_ring: address of ring structure to repopulate
+ * @cleaned_count: number of buffers to repopulate
+ **/
+static void igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring,
+				   int cleaned_count)
+{
+	struct igbvf_adapter *adapter = rx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	union e1000_adv_rx_desc *rx_desc;
+	struct igbvf_buffer *buffer_info;
+	struct sk_buff *skb;
+	unsigned int i;
+	int bufsz;
+
+	i = rx_ring->next_to_use;
+	buffer_info = &rx_ring->buffer_info[i];
+
+	if (adapter->rx_ps_hdr_size)
+		bufsz = adapter->rx_ps_hdr_size;
+	else
+		bufsz = adapter->rx_buffer_len;
+
+	while (cleaned_count--) {
+		rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
+
+		if (adapter->rx_ps_hdr_size && !buffer_info->page_dma) {
+			if (!buffer_info->page) {
+				buffer_info->page = alloc_page(GFP_ATOMIC);
+				if (!buffer_info->page) {
+					adapter->alloc_rx_buff_failed++;
+					goto no_buffers;
+				}
+				buffer_info->page_offset = 0;
+			} else {
+				buffer_info->page_offset ^= PAGE_SIZE / 2;
+			}
+			buffer_info->page_dma =
+				dma_map_page(&pdev->dev, buffer_info->page,
+					     buffer_info->page_offset,
+					     PAGE_SIZE / 2,
+					     DMA_FROM_DEVICE);
+			if (dma_mapping_error(&pdev->dev,
+					      buffer_info->page_dma)) {
+				__free_page(buffer_info->page);
+				buffer_info->page = NULL;
+				dev_err(&pdev->dev, "RX DMA map failed\n");
+				break;
+			}
+		}
+
+		if (!buffer_info->skb) {
+			skb = netdev_alloc_skb_ip_align(netdev, bufsz);
+			if (!skb) {
+				adapter->alloc_rx_buff_failed++;
+				goto no_buffers;
+			}
+
+			buffer_info->skb = skb;
+			buffer_info->dma = dma_map_single(&pdev->dev, skb->data,
+							  bufsz,
+							  DMA_FROM_DEVICE);
+			if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
+				dev_kfree_skb(buffer_info->skb);
+				buffer_info->skb = NULL;
+				dev_err(&pdev->dev, "RX DMA map failed\n");
+				goto no_buffers;
+			}
+		}
+		/* Refresh the desc even if buffer_addrs didn't change because
+		 * each write-back erases this info.
+		 */
+		if (adapter->rx_ps_hdr_size) {
+			rx_desc->read.pkt_addr =
+			     cpu_to_le64(buffer_info->page_dma);
+			rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
+		} else {
+			rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
+			rx_desc->read.hdr_addr = 0;
+		}
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+		buffer_info = &rx_ring->buffer_info[i];
+	}
+
+no_buffers:
+	if (rx_ring->next_to_use != i) {
+		rx_ring->next_to_use = i;
+		if (i == 0)
+			i = (rx_ring->count - 1);
+		else
+			i--;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		*/
+		wmb();
+		writel(i, adapter->hw.hw_addr + rx_ring->tail);
+	}
+}
+
+/**
+ * igbvf_clean_rx_irq - Send received data up the network stack; legacy
+ * @adapter: board private structure
+ * @work_done: output parameter used to indicate completed work
+ * @work_to_do: input parameter setting limit of work
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ **/
+static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter,
+			       int *work_done, int work_to_do)
+{
+	struct igbvf_ring *rx_ring = adapter->rx_ring;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	union e1000_adv_rx_desc *rx_desc, *next_rxd;
+	struct igbvf_buffer *buffer_info, *next_buffer;
+	struct sk_buff *skb;
+	bool cleaned = false;
+	int cleaned_count = 0;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int i;
+	u32 length, hlen, staterr;
+
+	i = rx_ring->next_to_clean;
+	rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
+	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+
+	while (staterr & E1000_RXD_STAT_DD) {
+		if (*work_done >= work_to_do)
+			break;
+		(*work_done)++;
+		rmb(); /* read descriptor and rx_buffer_info after status DD */
+
+		buffer_info = &rx_ring->buffer_info[i];
+
+		/* HW will not DMA in data larger than the given buffer, even
+		 * if it parses the (NFS, of course) header to be larger.  In
+		 * that case, it fills the header buffer and spills the rest
+		 * into the page.
+		 */
+		hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info)
+		       & E1000_RXDADV_HDRBUFLEN_MASK) >>
+		       E1000_RXDADV_HDRBUFLEN_SHIFT;
+		if (hlen > adapter->rx_ps_hdr_size)
+			hlen = adapter->rx_ps_hdr_size;
+
+		length = le16_to_cpu(rx_desc->wb.upper.length);
+		cleaned = true;
+		cleaned_count++;
+
+		skb = buffer_info->skb;
+		prefetch(skb->data - NET_IP_ALIGN);
+		buffer_info->skb = NULL;
+		if (!adapter->rx_ps_hdr_size) {
+			dma_unmap_single(&pdev->dev, buffer_info->dma,
+					 adapter->rx_buffer_len,
+					 DMA_FROM_DEVICE);
+			buffer_info->dma = 0;
+			skb_put(skb, length);
+			goto send_up;
+		}
+
+		if (!skb_shinfo(skb)->nr_frags) {
+			dma_unmap_single(&pdev->dev, buffer_info->dma,
+					 adapter->rx_ps_hdr_size,
+					 DMA_FROM_DEVICE);
+			buffer_info->dma = 0;
+			skb_put(skb, hlen);
+		}
+
+		if (length) {
+			dma_unmap_page(&pdev->dev, buffer_info->page_dma,
+				       PAGE_SIZE / 2,
+				       DMA_FROM_DEVICE);
+			buffer_info->page_dma = 0;
+
+			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+					   buffer_info->page,
+					   buffer_info->page_offset,
+					   length);
+
+			if ((adapter->rx_buffer_len > (PAGE_SIZE / 2)) ||
+			    (page_count(buffer_info->page) != 1))
+				buffer_info->page = NULL;
+			else
+				get_page(buffer_info->page);
+
+			skb->len += length;
+			skb->data_len += length;
+			skb->truesize += PAGE_SIZE / 2;
+		}
+send_up:
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+		next_rxd = IGBVF_RX_DESC_ADV(*rx_ring, i);
+		prefetch(next_rxd);
+		next_buffer = &rx_ring->buffer_info[i];
+
+		if (!(staterr & E1000_RXD_STAT_EOP)) {
+			buffer_info->skb = next_buffer->skb;
+			buffer_info->dma = next_buffer->dma;
+			next_buffer->skb = skb;
+			next_buffer->dma = 0;
+			goto next_desc;
+		}
+
+		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
+			dev_kfree_skb_irq(skb);
+			goto next_desc;
+		}
+
+		total_bytes += skb->len;
+		total_packets++;
+
+		igbvf_rx_checksum_adv(adapter, staterr, skb);
+
+		skb->protocol = eth_type_trans(skb, netdev);
+
+		igbvf_receive_skb(adapter, netdev, skb, staterr,
+				  rx_desc->wb.upper.vlan);
+
+next_desc:
+		rx_desc->wb.upper.status_error = 0;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IGBVF_RX_BUFFER_WRITE) {
+			igbvf_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		/* use prefetched values */
+		rx_desc = next_rxd;
+		buffer_info = next_buffer;
+
+		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+	}
+
+	rx_ring->next_to_clean = i;
+	cleaned_count = igbvf_desc_unused(rx_ring);
+
+	if (cleaned_count)
+		igbvf_alloc_rx_buffers(rx_ring, cleaned_count);
+
+	adapter->total_rx_packets += total_packets;
+	adapter->total_rx_bytes += total_bytes;
+	netdev->stats.rx_bytes += total_bytes;
+	netdev->stats.rx_packets += total_packets;
+	return cleaned;
+}
+
+static void igbvf_put_txbuf(struct igbvf_adapter *adapter,
+			    struct igbvf_buffer *buffer_info)
+{
+	if (buffer_info->dma) {
+		if (buffer_info->mapped_as_page)
+			dma_unmap_page(&adapter->pdev->dev,
+				       buffer_info->dma,
+				       buffer_info->length,
+				       DMA_TO_DEVICE);
+		else
+			dma_unmap_single(&adapter->pdev->dev,
+					 buffer_info->dma,
+					 buffer_info->length,
+					 DMA_TO_DEVICE);
+		buffer_info->dma = 0;
+	}
+	if (buffer_info->skb) {
+		dev_kfree_skb_any(buffer_info->skb);
+		buffer_info->skb = NULL;
+	}
+	buffer_info->time_stamp = 0;
+}
+
+/**
+ * igbvf_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @adapter: board private structure
+ * @tx_ring: ring being initialized
+ *
+ * Return 0 on success, negative on failure
+ **/
+int igbvf_setup_tx_resources(struct igbvf_adapter *adapter,
+			     struct igbvf_ring *tx_ring)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int size;
+
+	size = sizeof(struct igbvf_buffer) * tx_ring->count;
+	tx_ring->buffer_info = vzalloc(size);
+	if (!tx_ring->buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc)
+		goto err;
+
+	tx_ring->adapter = adapter;
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	return 0;
+err:
+	vfree(tx_ring->buffer_info);
+	dev_err(&adapter->pdev->dev,
+		"Unable to allocate memory for the transmit descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igbvf_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
+ * @rx_ring: ring being initialized
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int igbvf_setup_rx_resources(struct igbvf_adapter *adapter,
+			     struct igbvf_ring *rx_ring)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	int size, desc_len;
+
+	size = sizeof(struct igbvf_buffer) * rx_ring->count;
+	rx_ring->buffer_info = vzalloc(size);
+	if (!rx_ring->buffer_info)
+		goto err;
+
+	desc_len = sizeof(union e1000_adv_rx_desc);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * desc_len;
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->desc)
+		goto err;
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	rx_ring->adapter = adapter;
+
+	return 0;
+
+err:
+	vfree(rx_ring->buffer_info);
+	rx_ring->buffer_info = NULL;
+	dev_err(&adapter->pdev->dev,
+		"Unable to allocate memory for the receive descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igbvf_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void igbvf_clean_tx_ring(struct igbvf_ring *tx_ring)
+{
+	struct igbvf_adapter *adapter = tx_ring->adapter;
+	struct igbvf_buffer *buffer_info;
+	unsigned long size;
+	unsigned int i;
+
+	if (!tx_ring->buffer_info)
+		return;
+
+	/* Free all the Tx ring sk_buffs */
+	for (i = 0; i < tx_ring->count; i++) {
+		buffer_info = &tx_ring->buffer_info[i];
+		igbvf_put_txbuf(adapter, buffer_info);
+	}
+
+	size = sizeof(struct igbvf_buffer) * tx_ring->count;
+	memset(tx_ring->buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	writel(0, adapter->hw.hw_addr + tx_ring->head);
+	writel(0, adapter->hw.hw_addr + tx_ring->tail);
+}
+
+/**
+ * igbvf_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: ring to free resources from
+ *
+ * Free all transmit software resources
+ **/
+void igbvf_free_tx_resources(struct igbvf_ring *tx_ring)
+{
+	struct pci_dev *pdev = tx_ring->adapter->pdev;
+
+	igbvf_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->buffer_info);
+	tx_ring->buffer_info = NULL;
+
+	dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
+			  tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * igbvf_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring structure pointer to free buffers from
+ **/
+static void igbvf_clean_rx_ring(struct igbvf_ring *rx_ring)
+{
+	struct igbvf_adapter *adapter = rx_ring->adapter;
+	struct igbvf_buffer *buffer_info;
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned long size;
+	unsigned int i;
+
+	if (!rx_ring->buffer_info)
+		return;
+
+	/* Free all the Rx ring sk_buffs */
+	for (i = 0; i < rx_ring->count; i++) {
+		buffer_info = &rx_ring->buffer_info[i];
+		if (buffer_info->dma) {
+			if (adapter->rx_ps_hdr_size) {
+				dma_unmap_single(&pdev->dev, buffer_info->dma,
+						 adapter->rx_ps_hdr_size,
+						 DMA_FROM_DEVICE);
+			} else {
+				dma_unmap_single(&pdev->dev, buffer_info->dma,
+						 adapter->rx_buffer_len,
+						 DMA_FROM_DEVICE);
+			}
+			buffer_info->dma = 0;
+		}
+
+		if (buffer_info->skb) {
+			dev_kfree_skb(buffer_info->skb);
+			buffer_info->skb = NULL;
+		}
+
+		if (buffer_info->page) {
+			if (buffer_info->page_dma)
+				dma_unmap_page(&pdev->dev,
+					       buffer_info->page_dma,
+					       PAGE_SIZE / 2,
+					       DMA_FROM_DEVICE);
+			put_page(buffer_info->page);
+			buffer_info->page = NULL;
+			buffer_info->page_dma = 0;
+			buffer_info->page_offset = 0;
+		}
+	}
+
+	size = sizeof(struct igbvf_buffer) * rx_ring->count;
+	memset(rx_ring->buffer_info, 0, size);
+
+	/* Zero out the descriptor ring */
+	memset(rx_ring->desc, 0, rx_ring->size);
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	writel(0, adapter->hw.hw_addr + rx_ring->head);
+	writel(0, adapter->hw.hw_addr + rx_ring->tail);
+}
+
+/**
+ * igbvf_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+
+void igbvf_free_rx_resources(struct igbvf_ring *rx_ring)
+{
+	struct pci_dev *pdev = rx_ring->adapter->pdev;
+
+	igbvf_clean_rx_ring(rx_ring);
+
+	vfree(rx_ring->buffer_info);
+	rx_ring->buffer_info = NULL;
+
+	dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
+			  rx_ring->dma);
+	rx_ring->desc = NULL;
+}
+
+/**
+ * igbvf_update_itr - update the dynamic ITR value based on statistics
+ * @adapter: pointer to adapter
+ * @itr_setting: current adapter->itr
+ * @packets: the number of packets during this measurement interval
+ * @bytes: the number of bytes during this measurement interval
+ *
+ * Stores a new ITR value based on packets and byte counts during the last
+ * interrupt.  The advantage of per interrupt computation is faster updates
+ * and more accurate ITR for the current traffic pattern.  Constants in this
+ * function were computed based on theoretical maximum wire speed and thresholds
+ * were set based on testing data as well as attempting to minimize response
+ * time while increasing bulk throughput.
+ **/
+static enum latency_range igbvf_update_itr(struct igbvf_adapter *adapter,
+					   enum latency_range itr_setting,
+					   int packets, int bytes)
+{
+	enum latency_range retval = itr_setting;
+
+	if (packets == 0)
+		goto update_itr_done;
+
+	switch (itr_setting) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes/packets > 8000)
+			retval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			retval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes/packets > 8000)
+				retval = bulk_latency;
+			else if ((packets < 10) || ((bytes/packets) > 1200))
+				retval = bulk_latency;
+			else if ((packets > 35))
+				retval = lowest_latency;
+		} else if (bytes/packets > 2000) {
+			retval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			retval = lowest_latency;
+		}
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				retval = low_latency;
+		} else if (bytes < 6000) {
+			retval = low_latency;
+		}
+		break;
+	default:
+		break;
+	}
+
+update_itr_done:
+	return retval;
+}
+
+static int igbvf_range_to_itr(enum latency_range current_range)
+{
+	int new_itr;
+
+	switch (current_range) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IGBVF_70K_ITR;
+		break;
+	case low_latency:
+		new_itr = IGBVF_20K_ITR;
+		break;
+	case bulk_latency:
+		new_itr = IGBVF_4K_ITR;
+		break;
+	default:
+		new_itr = IGBVF_START_ITR;
+		break;
+	}
+	return new_itr;
+}
+
+static void igbvf_set_itr(struct igbvf_adapter *adapter)
+{
+	u32 new_itr;
+
+	adapter->tx_ring->itr_range =
+			igbvf_update_itr(adapter,
+					 adapter->tx_ring->itr_val,
+					 adapter->total_tx_packets,
+					 adapter->total_tx_bytes);
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (adapter->requested_itr == 3 &&
+	    adapter->tx_ring->itr_range == lowest_latency)
+		adapter->tx_ring->itr_range = low_latency;
+
+	new_itr = igbvf_range_to_itr(adapter->tx_ring->itr_range);
+
+	if (new_itr != adapter->tx_ring->itr_val) {
+		u32 current_itr = adapter->tx_ring->itr_val;
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > current_itr ?
+			  min(current_itr + (new_itr >> 2), new_itr) :
+			  new_itr;
+		adapter->tx_ring->itr_val = new_itr;
+
+		adapter->tx_ring->set_itr = 1;
+	}
+
+	adapter->rx_ring->itr_range =
+			igbvf_update_itr(adapter, adapter->rx_ring->itr_val,
+					 adapter->total_rx_packets,
+					 adapter->total_rx_bytes);
+	if (adapter->requested_itr == 3 &&
+	    adapter->rx_ring->itr_range == lowest_latency)
+		adapter->rx_ring->itr_range = low_latency;
+
+	new_itr = igbvf_range_to_itr(adapter->rx_ring->itr_range);
+
+	if (new_itr != adapter->rx_ring->itr_val) {
+		u32 current_itr = adapter->rx_ring->itr_val;
+
+		new_itr = new_itr > current_itr ?
+			  min(current_itr + (new_itr >> 2), new_itr) :
+			  new_itr;
+		adapter->rx_ring->itr_val = new_itr;
+
+		adapter->rx_ring->set_itr = 1;
+	}
+}
+
+/**
+ * igbvf_clean_tx_irq - Reclaim resources after transmit completes
+ * @tx_ring: ring structure to clean descriptors from
+ *
+ * returns true if ring is completely cleaned
+ **/
+static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
+{
+	struct igbvf_adapter *adapter = tx_ring->adapter;
+	struct net_device *netdev = adapter->netdev;
+	struct igbvf_buffer *buffer_info;
+	struct sk_buff *skb;
+	union e1000_adv_tx_desc *tx_desc, *eop_desc;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int i, count = 0;
+	bool cleaned = false;
+
+	i = tx_ring->next_to_clean;
+	buffer_info = &tx_ring->buffer_info[i];
+	eop_desc = buffer_info->next_to_watch;
+
+	do {
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		buffer_info->next_to_watch = NULL;
+
+		for (cleaned = false; !cleaned; count++) {
+			tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
+			cleaned = (tx_desc == eop_desc);
+			skb = buffer_info->skb;
+
+			if (skb) {
+				unsigned int segs, bytecount;
+
+				/* gso_segs is currently only valid for tcp */
+				segs = skb_shinfo(skb)->gso_segs ?: 1;
+				/* multiply data chunks by size of headers */
+				bytecount = ((segs - 1) * skb_headlen(skb)) +
+					    skb->len;
+				total_packets += segs;
+				total_bytes += bytecount;
+			}
+
+			igbvf_put_txbuf(adapter, buffer_info);
+			tx_desc->wb.status = 0;
+
+			i++;
+			if (i == tx_ring->count)
+				i = 0;
+
+			buffer_info = &tx_ring->buffer_info[i];
+		}
+
+		eop_desc = buffer_info->next_to_watch;
+	} while (count < tx_ring->count);
+
+	tx_ring->next_to_clean = i;
+
+	if (unlikely(count && netif_carrier_ok(netdev) &&
+	    igbvf_desc_unused(tx_ring) >= IGBVF_TX_QUEUE_WAKE)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (netif_queue_stopped(netdev) &&
+		    !(test_bit(__IGBVF_DOWN, &adapter->state))) {
+			netif_wake_queue(netdev);
+			++adapter->restart_queue;
+		}
+	}
+
+	netdev->stats.tx_bytes += total_bytes;
+	netdev->stats.tx_packets += total_packets;
+	return count < tx_ring->count;
+}
+
+static irqreturn_t igbvf_msix_other(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->int_counter1++;
+
+	hw->mac.get_link_status = 1;
+	if (!test_bit(__IGBVF_DOWN, &adapter->state))
+		mod_timer(&adapter->watchdog_timer, jiffies + 1);
+
+	ew32(EIMS, adapter->eims_other);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t igbvf_intr_msix_tx(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct igbvf_ring *tx_ring = adapter->tx_ring;
+
+	if (tx_ring->set_itr) {
+		writel(tx_ring->itr_val,
+		       adapter->hw.hw_addr + tx_ring->itr_register);
+		adapter->tx_ring->set_itr = 0;
+	}
+
+	adapter->total_tx_bytes = 0;
+	adapter->total_tx_packets = 0;
+
+	/* auto mask will automatically re-enable the interrupt when we write
+	 * EICS
+	 */
+	if (!igbvf_clean_tx_irq(tx_ring))
+		/* Ring was not completely cleaned, so fire another interrupt */
+		ew32(EICS, tx_ring->eims_value);
+	else
+		ew32(EIMS, tx_ring->eims_value);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t igbvf_intr_msix_rx(int irq, void *data)
+{
+	struct net_device *netdev = data;
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	adapter->int_counter0++;
+
+	/* Write the ITR value calculated at the end of the
+	 * previous interrupt.
+	 */
+	if (adapter->rx_ring->set_itr) {
+		writel(adapter->rx_ring->itr_val,
+		       adapter->hw.hw_addr + adapter->rx_ring->itr_register);
+		adapter->rx_ring->set_itr = 0;
+	}
+
+	if (napi_schedule_prep(&adapter->rx_ring->napi)) {
+		adapter->total_rx_bytes = 0;
+		adapter->total_rx_packets = 0;
+		__napi_schedule(&adapter->rx_ring->napi);
+	}
+
+	return IRQ_HANDLED;
+}
+
+#define IGBVF_NO_QUEUE -1
+
+static void igbvf_assign_vector(struct igbvf_adapter *adapter, int rx_queue,
+				int tx_queue, int msix_vector)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 ivar, index;
+
+	/* 82576 uses a table-based method for assigning vectors.
+	 * Each queue has a single entry in the table to which we write
+	 * a vector number along with a "valid" bit.  Sadly, the layout
+	 * of the table is somewhat counterintuitive.
+	 */
+	if (rx_queue > IGBVF_NO_QUEUE) {
+		index = (rx_queue >> 1);
+		ivar = array_er32(IVAR0, index);
+		if (rx_queue & 0x1) {
+			/* vector goes into third byte of register */
+			ivar = ivar & 0xFF00FFFF;
+			ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
+		} else {
+			/* vector goes into low byte of register */
+			ivar = ivar & 0xFFFFFF00;
+			ivar |= msix_vector | E1000_IVAR_VALID;
+		}
+		adapter->rx_ring[rx_queue].eims_value = BIT(msix_vector);
+		array_ew32(IVAR0, index, ivar);
+	}
+	if (tx_queue > IGBVF_NO_QUEUE) {
+		index = (tx_queue >> 1);
+		ivar = array_er32(IVAR0, index);
+		if (tx_queue & 0x1) {
+			/* vector goes into high byte of register */
+			ivar = ivar & 0x00FFFFFF;
+			ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
+		} else {
+			/* vector goes into second byte of register */
+			ivar = ivar & 0xFFFF00FF;
+			ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
+		}
+		adapter->tx_ring[tx_queue].eims_value = BIT(msix_vector);
+		array_ew32(IVAR0, index, ivar);
+	}
+}
+
+/**
+ * igbvf_configure_msix - Configure MSI-X hardware
+ * @adapter: board private structure
+ *
+ * igbvf_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ **/
+static void igbvf_configure_msix(struct igbvf_adapter *adapter)
+{
+	u32 tmp;
+	struct e1000_hw *hw = &adapter->hw;
+	struct igbvf_ring *tx_ring = adapter->tx_ring;
+	struct igbvf_ring *rx_ring = adapter->rx_ring;
+	int vector = 0;
+
+	adapter->eims_enable_mask = 0;
+
+	igbvf_assign_vector(adapter, IGBVF_NO_QUEUE, 0, vector++);
+	adapter->eims_enable_mask |= tx_ring->eims_value;
+	writel(tx_ring->itr_val, hw->hw_addr + tx_ring->itr_register);
+	igbvf_assign_vector(adapter, 0, IGBVF_NO_QUEUE, vector++);
+	adapter->eims_enable_mask |= rx_ring->eims_value;
+	writel(rx_ring->itr_val, hw->hw_addr + rx_ring->itr_register);
+
+	/* set vector for other causes, i.e. link changes */
+
+	tmp = (vector++ | E1000_IVAR_VALID);
+
+	ew32(IVAR_MISC, tmp);
+
+	adapter->eims_enable_mask = GENMASK(vector - 1, 0);
+	adapter->eims_other = BIT(vector - 1);
+	e1e_flush();
+}
+
+static void igbvf_reset_interrupt_capability(struct igbvf_adapter *adapter)
+{
+	if (adapter->msix_entries) {
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	}
+}
+
+/**
+ * igbvf_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: board private structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static void igbvf_set_interrupt_capability(struct igbvf_adapter *adapter)
+{
+	int err = -ENOMEM;
+	int i;
+
+	/* we allocate 3 vectors, 1 for Tx, 1 for Rx, one for PF messages */
+	adapter->msix_entries = kcalloc(3, sizeof(struct msix_entry),
+					GFP_KERNEL);
+	if (adapter->msix_entries) {
+		for (i = 0; i < 3; i++)
+			adapter->msix_entries[i].entry = i;
+
+		err = pci_enable_msix_range(adapter->pdev,
+					    adapter->msix_entries, 3, 3);
+	}
+
+	if (err < 0) {
+		/* MSI-X failed */
+		dev_err(&adapter->pdev->dev,
+			"Failed to initialize MSI-X interrupts.\n");
+		igbvf_reset_interrupt_capability(adapter);
+	}
+}
+
+/**
+ * igbvf_request_msix - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * igbvf_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ **/
+static int igbvf_request_msix(struct igbvf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err = 0, vector = 0;
+
+	if (strlen(netdev->name) < (IFNAMSIZ - 5)) {
+		sprintf(adapter->tx_ring->name, "%s-tx-0", netdev->name);
+		sprintf(adapter->rx_ring->name, "%s-rx-0", netdev->name);
+	} else {
+		memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ);
+		memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
+	}
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  igbvf_intr_msix_tx, 0, adapter->tx_ring->name,
+			  netdev);
+	if (err)
+		goto out;
+
+	adapter->tx_ring->itr_register = E1000_EITR(vector);
+	adapter->tx_ring->itr_val = adapter->current_itr;
+	vector++;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  igbvf_intr_msix_rx, 0, adapter->rx_ring->name,
+			  netdev);
+	if (err)
+		goto free_irq_tx;
+
+	adapter->rx_ring->itr_register = E1000_EITR(vector);
+	adapter->rx_ring->itr_val = adapter->current_itr;
+	vector++;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  igbvf_msix_other, 0, netdev->name, netdev);
+	if (err)
+		goto free_irq_rx;
+
+	igbvf_configure_msix(adapter);
+	return 0;
+free_irq_rx:
+	free_irq(adapter->msix_entries[--vector].vector, netdev);
+free_irq_tx:
+	free_irq(adapter->msix_entries[--vector].vector, netdev);
+out:
+	return err;
+}
+
+/**
+ * igbvf_alloc_queues - Allocate memory for all rings
+ * @adapter: board private structure to initialize
+ **/
+static int igbvf_alloc_queues(struct igbvf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	adapter->tx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL);
+	if (!adapter->tx_ring)
+		return -ENOMEM;
+
+	adapter->rx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL);
+	if (!adapter->rx_ring) {
+		kfree(adapter->tx_ring);
+		return -ENOMEM;
+	}
+
+	netif_napi_add(netdev, &adapter->rx_ring->napi, igbvf_poll);
+
+	return 0;
+}
+
+/**
+ * igbvf_request_irq - initialize interrupts
+ * @adapter: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int igbvf_request_irq(struct igbvf_adapter *adapter)
+{
+	int err = -1;
+
+	/* igbvf supports msi-x only */
+	if (adapter->msix_entries)
+		err = igbvf_request_msix(adapter);
+
+	if (!err)
+		return err;
+
+	dev_err(&adapter->pdev->dev,
+		"Unable to allocate interrupt, Error: %d\n", err);
+
+	return err;
+}
+
+static void igbvf_free_irq(struct igbvf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int vector;
+
+	if (adapter->msix_entries) {
+		for (vector = 0; vector < 3; vector++)
+			free_irq(adapter->msix_entries[vector].vector, netdev);
+	}
+}
+
+/**
+ * igbvf_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static void igbvf_irq_disable(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	ew32(EIMC, ~0);
+
+	if (adapter->msix_entries)
+		ew32(EIAC, 0);
+}
+
+/**
+ * igbvf_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static void igbvf_irq_enable(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	ew32(EIAC, adapter->eims_enable_mask);
+	ew32(EIAM, adapter->eims_enable_mask);
+	ew32(EIMS, adapter->eims_enable_mask);
+}
+
+/**
+ * igbvf_poll - NAPI Rx polling callback
+ * @napi: struct associated with this polling callback
+ * @budget: amount of packets driver is allowed to process this poll
+ **/
+static int igbvf_poll(struct napi_struct *napi, int budget)
+{
+	struct igbvf_ring *rx_ring = container_of(napi, struct igbvf_ring, napi);
+	struct igbvf_adapter *adapter = rx_ring->adapter;
+	struct e1000_hw *hw = &adapter->hw;
+	int work_done = 0;
+
+	igbvf_clean_rx_irq(adapter, &work_done, budget);
+
+	if (work_done == budget)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (adapter->requested_itr & 3)
+			igbvf_set_itr(adapter);
+
+		if (!test_bit(__IGBVF_DOWN, &adapter->state))
+			ew32(EIMS, adapter->rx_ring->eims_value);
+	}
+
+	return work_done;
+}
+
+/**
+ * igbvf_set_rlpml - set receive large packet maximum length
+ * @adapter: board private structure
+ *
+ * Configure the maximum size of packets that will be received
+ */
+static void igbvf_set_rlpml(struct igbvf_adapter *adapter)
+{
+	int max_frame_size;
+	struct e1000_hw *hw = &adapter->hw;
+
+	max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	e1000_rlpml_set_vf(hw, max_frame_size);
+
+	spin_unlock_bh(&hw->mbx_lock);
+}
+
+static int igbvf_vlan_rx_add_vid(struct net_device *netdev,
+				 __be16 proto, u16 vid)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	if (hw->mac.ops.set_vfta(hw, vid, true)) {
+		dev_warn(&adapter->pdev->dev, "Vlan id %d\n is not added", vid);
+		spin_unlock_bh(&hw->mbx_lock);
+		return -EINVAL;
+	}
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	set_bit(vid, adapter->active_vlans);
+	return 0;
+}
+
+static int igbvf_vlan_rx_kill_vid(struct net_device *netdev,
+				  __be16 proto, u16 vid)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	if (hw->mac.ops.set_vfta(hw, vid, false)) {
+		dev_err(&adapter->pdev->dev,
+			"Failed to remove vlan id %d\n", vid);
+		spin_unlock_bh(&hw->mbx_lock);
+		return -EINVAL;
+	}
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	clear_bit(vid, adapter->active_vlans);
+	return 0;
+}
+
+static void igbvf_restore_vlan(struct igbvf_adapter *adapter)
+{
+	u16 vid;
+
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+		igbvf_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
+}
+
+/**
+ * igbvf_configure_tx - Configure Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void igbvf_configure_tx(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct igbvf_ring *tx_ring = adapter->tx_ring;
+	u64 tdba;
+	u32 txdctl, dca_txctrl;
+
+	/* disable transmits */
+	txdctl = er32(TXDCTL(0));
+	ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
+	e1e_flush();
+	msleep(10);
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+	ew32(TDLEN(0), tx_ring->count * sizeof(union e1000_adv_tx_desc));
+	tdba = tx_ring->dma;
+	ew32(TDBAL(0), (tdba & DMA_BIT_MASK(32)));
+	ew32(TDBAH(0), (tdba >> 32));
+	ew32(TDH(0), 0);
+	ew32(TDT(0), 0);
+	tx_ring->head = E1000_TDH(0);
+	tx_ring->tail = E1000_TDT(0);
+
+	/* Turn off Relaxed Ordering on head write-backs.  The writebacks
+	 * MUST be delivered in order or it will completely screw up
+	 * our bookkeeping.
+	 */
+	dca_txctrl = er32(DCA_TXCTRL(0));
+	dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
+	ew32(DCA_TXCTRL(0), dca_txctrl);
+
+	/* enable transmits */
+	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
+	ew32(TXDCTL(0), txdctl);
+
+	/* Setup Transmit Descriptor Settings for eop descriptor */
+	adapter->txd_cmd = E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_IFCS;
+
+	/* enable Report Status bit */
+	adapter->txd_cmd |= E1000_ADVTXD_DCMD_RS;
+}
+
+/**
+ * igbvf_setup_srrctl - configure the receive control registers
+ * @adapter: Board private structure
+ **/
+static void igbvf_setup_srrctl(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 srrctl = 0;
+
+	srrctl &= ~(E1000_SRRCTL_DESCTYPE_MASK |
+		    E1000_SRRCTL_BSIZEHDR_MASK |
+		    E1000_SRRCTL_BSIZEPKT_MASK);
+
+	/* Enable queue drop to avoid head of line blocking */
+	srrctl |= E1000_SRRCTL_DROP_EN;
+
+	/* Setup buffer sizes */
+	srrctl |= ALIGN(adapter->rx_buffer_len, 1024) >>
+		  E1000_SRRCTL_BSIZEPKT_SHIFT;
+
+	if (adapter->rx_buffer_len < 2048) {
+		adapter->rx_ps_hdr_size = 0;
+		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+	} else {
+		adapter->rx_ps_hdr_size = 128;
+		srrctl |= adapter->rx_ps_hdr_size <<
+			  E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+	}
+
+	ew32(SRRCTL(0), srrctl);
+}
+
+/**
+ * igbvf_configure_rx - Configure Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void igbvf_configure_rx(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct igbvf_ring *rx_ring = adapter->rx_ring;
+	u64 rdba;
+	u32 rxdctl;
+
+	/* disable receives */
+	rxdctl = er32(RXDCTL(0));
+	ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
+	e1e_flush();
+	msleep(10);
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	rdba = rx_ring->dma;
+	ew32(RDBAL(0), (rdba & DMA_BIT_MASK(32)));
+	ew32(RDBAH(0), (rdba >> 32));
+	ew32(RDLEN(0), rx_ring->count * sizeof(union e1000_adv_rx_desc));
+	rx_ring->head = E1000_RDH(0);
+	rx_ring->tail = E1000_RDT(0);
+	ew32(RDH(0), 0);
+	ew32(RDT(0), 0);
+
+	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
+	rxdctl &= 0xFFF00000;
+	rxdctl |= IGBVF_RX_PTHRESH;
+	rxdctl |= IGBVF_RX_HTHRESH << 8;
+	rxdctl |= IGBVF_RX_WTHRESH << 16;
+
+	igbvf_set_rlpml(adapter);
+
+	/* enable receives */
+	ew32(RXDCTL(0), rxdctl);
+}
+
+/**
+ * igbvf_set_multi - Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_multi entry point is called whenever the multicast address
+ * list or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper multicast,
+ * promiscuous mode, and all-multi behavior.
+ **/
+static void igbvf_set_multi(struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct netdev_hw_addr *ha;
+	u8  *mta_list = NULL;
+	int i;
+
+	if (!netdev_mc_empty(netdev)) {
+		mta_list = kmalloc_array(netdev_mc_count(netdev), ETH_ALEN,
+					 GFP_ATOMIC);
+		if (!mta_list)
+			return;
+	}
+
+	/* prepare a packed array of only addresses. */
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	hw->mac.ops.update_mc_addr_list(hw, mta_list, i, 0, 0);
+
+	spin_unlock_bh(&hw->mbx_lock);
+	kfree(mta_list);
+}
+
+/**
+ * igbvf_set_uni - Configure unicast MAC filters
+ * @netdev: network interface device structure
+ *
+ * This routine is responsible for configuring the hardware for proper
+ * unicast filters.
+ **/
+static int igbvf_set_uni(struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	if (netdev_uc_count(netdev) > IGBVF_MAX_MAC_FILTERS) {
+		pr_err("Too many unicast filters - No Space\n");
+		return -ENOSPC;
+	}
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	/* Clear all unicast MAC filters */
+	hw->mac.ops.set_uc_addr(hw, E1000_VF_MAC_FILTER_CLR, NULL);
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	if (!netdev_uc_empty(netdev)) {
+		struct netdev_hw_addr *ha;
+
+		/* Add MAC filters one by one */
+		netdev_for_each_uc_addr(ha, netdev) {
+			spin_lock_bh(&hw->mbx_lock);
+
+			hw->mac.ops.set_uc_addr(hw, E1000_VF_MAC_FILTER_ADD,
+						ha->addr);
+
+			spin_unlock_bh(&hw->mbx_lock);
+			udelay(200);
+		}
+	}
+
+	return 0;
+}
+
+static void igbvf_set_rx_mode(struct net_device *netdev)
+{
+	igbvf_set_multi(netdev);
+	igbvf_set_uni(netdev);
+}
+
+/**
+ * igbvf_configure - configure the hardware for Rx and Tx
+ * @adapter: private board structure
+ **/
+static void igbvf_configure(struct igbvf_adapter *adapter)
+{
+	igbvf_set_rx_mode(adapter->netdev);
+
+	igbvf_restore_vlan(adapter);
+
+	igbvf_configure_tx(adapter);
+	igbvf_setup_srrctl(adapter);
+	igbvf_configure_rx(adapter);
+	igbvf_alloc_rx_buffers(adapter->rx_ring,
+			       igbvf_desc_unused(adapter->rx_ring));
+}
+
+/* igbvf_reset - bring the hardware into a known good state
+ * @adapter: private board structure
+ *
+ * This function boots the hardware and enables some settings that
+ * require a configuration cycle of the hardware - those cannot be
+ * set/changed during runtime. After reset the device needs to be
+ * properly configured for Rx, Tx etc.
+ */
+static void igbvf_reset(struct igbvf_adapter *adapter)
+{
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	/* Allow time for pending master requests to run */
+	if (mac->ops.reset_hw(hw))
+		dev_info(&adapter->pdev->dev, "PF still resetting\n");
+
+	mac->ops.init_hw(hw);
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	if (is_valid_ether_addr(adapter->hw.mac.addr)) {
+		eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+		memcpy(netdev->perm_addr, adapter->hw.mac.addr,
+		       netdev->addr_len);
+	}
+
+	adapter->last_reset = jiffies;
+}
+
+int igbvf_up(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* hardware has been reset, we need to reload some things */
+	igbvf_configure(adapter);
+
+	clear_bit(__IGBVF_DOWN, &adapter->state);
+
+	napi_enable(&adapter->rx_ring->napi);
+	if (adapter->msix_entries)
+		igbvf_configure_msix(adapter);
+
+	/* Clear any pending interrupts. */
+	er32(EICR);
+	igbvf_irq_enable(adapter);
+
+	/* start the watchdog */
+	hw->mac.get_link_status = 1;
+	mod_timer(&adapter->watchdog_timer, jiffies + 1);
+
+	return 0;
+}
+
+void igbvf_down(struct igbvf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 rxdctl, txdctl;
+
+	/* signal that we're down so the interrupt handler does not
+	 * reschedule our watchdog timer
+	 */
+	set_bit(__IGBVF_DOWN, &adapter->state);
+
+	/* disable receives in the hardware */
+	rxdctl = er32(RXDCTL(0));
+	ew32(RXDCTL(0), rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
+
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+
+	/* disable transmits in the hardware */
+	txdctl = er32(TXDCTL(0));
+	ew32(TXDCTL(0), txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
+
+	/* flush both disables and wait for them to finish */
+	e1e_flush();
+	msleep(10);
+
+	napi_disable(&adapter->rx_ring->napi);
+
+	igbvf_irq_disable(adapter);
+
+	del_timer_sync(&adapter->watchdog_timer);
+
+	/* record the stats before reset*/
+	igbvf_update_stats(adapter);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+	igbvf_reset(adapter);
+	igbvf_clean_tx_ring(adapter->tx_ring);
+	igbvf_clean_rx_ring(adapter->rx_ring);
+}
+
+void igbvf_reinit_locked(struct igbvf_adapter *adapter)
+{
+	might_sleep();
+	while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	igbvf_down(adapter);
+	igbvf_up(adapter);
+	clear_bit(__IGBVF_RESETTING, &adapter->state);
+}
+
+/**
+ * igbvf_sw_init - Initialize general software structures (struct igbvf_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igbvf_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int igbvf_sw_init(struct igbvf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	s32 rc;
+
+	adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN;
+	adapter->rx_ps_hdr_size = 0;
+	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+	adapter->tx_int_delay = 8;
+	adapter->tx_abs_int_delay = 32;
+	adapter->rx_int_delay = 0;
+	adapter->rx_abs_int_delay = 8;
+	adapter->requested_itr = 3;
+	adapter->current_itr = IGBVF_START_ITR;
+
+	/* Set various function pointers */
+	adapter->ei->init_ops(&adapter->hw);
+
+	rc = adapter->hw.mac.ops.init_params(&adapter->hw);
+	if (rc)
+		return rc;
+
+	rc = adapter->hw.mbx.ops.init_params(&adapter->hw);
+	if (rc)
+		return rc;
+
+	igbvf_set_interrupt_capability(adapter);
+
+	if (igbvf_alloc_queues(adapter))
+		return -ENOMEM;
+
+	spin_lock_init(&adapter->tx_queue_lock);
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	igbvf_irq_disable(adapter);
+
+	spin_lock_init(&adapter->stats_lock);
+	spin_lock_init(&adapter->hw.mbx_lock);
+
+	set_bit(__IGBVF_DOWN, &adapter->state);
+	return 0;
+}
+
+static void igbvf_initialize_last_counter_stats(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+
+	adapter->stats.last_gprc = er32(VFGPRC);
+	adapter->stats.last_gorc = er32(VFGORC);
+	adapter->stats.last_gptc = er32(VFGPTC);
+	adapter->stats.last_gotc = er32(VFGOTC);
+	adapter->stats.last_mprc = er32(VFMPRC);
+	adapter->stats.last_gotlbc = er32(VFGOTLBC);
+	adapter->stats.last_gptlbc = er32(VFGPTLBC);
+	adapter->stats.last_gorlbc = er32(VFGORLBC);
+	adapter->stats.last_gprlbc = er32(VFGPRLBC);
+
+	adapter->stats.base_gprc = er32(VFGPRC);
+	adapter->stats.base_gorc = er32(VFGORC);
+	adapter->stats.base_gptc = er32(VFGPTC);
+	adapter->stats.base_gotc = er32(VFGOTC);
+	adapter->stats.base_mprc = er32(VFMPRC);
+	adapter->stats.base_gotlbc = er32(VFGOTLBC);
+	adapter->stats.base_gptlbc = er32(VFGPTLBC);
+	adapter->stats.base_gorlbc = er32(VFGORLBC);
+	adapter->stats.base_gprlbc = er32(VFGPRLBC);
+}
+
+/**
+ * igbvf_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+static int igbvf_open(struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	int err;
+
+	/* disallow open during test */
+	if (test_bit(__IGBVF_TESTING, &adapter->state))
+		return -EBUSY;
+
+	/* allocate transmit descriptors */
+	err = igbvf_setup_tx_resources(adapter, adapter->tx_ring);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = igbvf_setup_rx_resources(adapter, adapter->rx_ring);
+	if (err)
+		goto err_setup_rx;
+
+	/* before we allocate an interrupt, we must be ready to handle it.
+	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
+	 * as soon as we call pci_request_irq, so we have to setup our
+	 * clean_rx handler before we do so.
+	 */
+	igbvf_configure(adapter);
+
+	err = igbvf_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* From here on the code is the same as igbvf_up() */
+	clear_bit(__IGBVF_DOWN, &adapter->state);
+
+	napi_enable(&adapter->rx_ring->napi);
+
+	/* clear any pending interrupts */
+	er32(EICR);
+
+	igbvf_irq_enable(adapter);
+
+	/* start the watchdog */
+	hw->mac.get_link_status = 1;
+	mod_timer(&adapter->watchdog_timer, jiffies + 1);
+
+	return 0;
+
+err_req_irq:
+	igbvf_free_rx_resources(adapter->rx_ring);
+err_setup_rx:
+	igbvf_free_tx_resources(adapter->tx_ring);
+err_setup_tx:
+	igbvf_reset(adapter);
+
+	return err;
+}
+
+/**
+ * igbvf_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+static int igbvf_close(struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	WARN_ON(test_bit(__IGBVF_RESETTING, &adapter->state));
+	igbvf_down(adapter);
+
+	igbvf_free_irq(adapter);
+
+	igbvf_free_tx_resources(adapter->tx_ring);
+	igbvf_free_rx_resources(adapter->rx_ring);
+
+	return 0;
+}
+
+/**
+ * igbvf_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int igbvf_set_mac(struct net_device *netdev, void *p)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	if (!ether_addr_equal(addr->sa_data, hw->mac.addr))
+		return -EADDRNOTAVAIL;
+
+	eth_hw_addr_set(netdev, addr->sa_data);
+
+	return 0;
+}
+
+#define UPDATE_VF_COUNTER(reg, name) \
+{ \
+	u32 current_counter = er32(reg); \
+	if (current_counter < adapter->stats.last_##name) \
+		adapter->stats.name += 0x100000000LL; \
+	adapter->stats.last_##name = current_counter; \
+	adapter->stats.name &= 0xFFFFFFFF00000000LL; \
+	adapter->stats.name |= current_counter; \
+}
+
+/**
+ * igbvf_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+**/
+void igbvf_update_stats(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+
+	/* Prevent stats update while adapter is being reset, link is down
+	 * or if the pci connection is down.
+	 */
+	if (adapter->link_speed == 0)
+		return;
+
+	if (test_bit(__IGBVF_RESETTING, &adapter->state))
+		return;
+
+	if (pci_channel_offline(pdev))
+		return;
+
+	UPDATE_VF_COUNTER(VFGPRC, gprc);
+	UPDATE_VF_COUNTER(VFGORC, gorc);
+	UPDATE_VF_COUNTER(VFGPTC, gptc);
+	UPDATE_VF_COUNTER(VFGOTC, gotc);
+	UPDATE_VF_COUNTER(VFMPRC, mprc);
+	UPDATE_VF_COUNTER(VFGOTLBC, gotlbc);
+	UPDATE_VF_COUNTER(VFGPTLBC, gptlbc);
+	UPDATE_VF_COUNTER(VFGORLBC, gorlbc);
+	UPDATE_VF_COUNTER(VFGPRLBC, gprlbc);
+
+	/* Fill out the OS statistics structure */
+	adapter->netdev->stats.multicast = adapter->stats.mprc;
+}
+
+static void igbvf_print_link_info(struct igbvf_adapter *adapter)
+{
+	dev_info(&adapter->pdev->dev, "Link is Up %d Mbps %s Duplex\n",
+		 adapter->link_speed,
+		 adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half");
+}
+
+static bool igbvf_has_link(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	s32 ret_val = E1000_SUCCESS;
+	bool link_active;
+
+	/* If interface is down, stay link down */
+	if (test_bit(__IGBVF_DOWN, &adapter->state))
+		return false;
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	ret_val = hw->mac.ops.check_for_link(hw);
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	link_active = !hw->mac.get_link_status;
+
+	/* if check for link returns error we will need to reset */
+	if (ret_val && time_after(jiffies, adapter->last_reset + (10 * HZ)))
+		schedule_work(&adapter->reset_task);
+
+	return link_active;
+}
+
+/**
+ * igbvf_watchdog - Timer Call-back
+ * @t: timer list pointer containing private struct
+ **/
+static void igbvf_watchdog(struct timer_list *t)
+{
+	struct igbvf_adapter *adapter = from_timer(adapter, t, watchdog_timer);
+
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->watchdog_task);
+}
+
+static void igbvf_watchdog_task(struct work_struct *work)
+{
+	struct igbvf_adapter *adapter = container_of(work,
+						     struct igbvf_adapter,
+						     watchdog_task);
+	struct net_device *netdev = adapter->netdev;
+	struct e1000_mac_info *mac = &adapter->hw.mac;
+	struct igbvf_ring *tx_ring = adapter->tx_ring;
+	struct e1000_hw *hw = &adapter->hw;
+	u32 link;
+	int tx_pending = 0;
+
+	link = igbvf_has_link(adapter);
+
+	if (link) {
+		if (!netif_carrier_ok(netdev)) {
+			mac->ops.get_link_up_info(&adapter->hw,
+						  &adapter->link_speed,
+						  &adapter->link_duplex);
+			igbvf_print_link_info(adapter);
+
+			netif_carrier_on(netdev);
+			netif_wake_queue(netdev);
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+			dev_info(&adapter->pdev->dev, "Link is Down\n");
+			netif_carrier_off(netdev);
+			netif_stop_queue(netdev);
+		}
+	}
+
+	if (netif_carrier_ok(netdev)) {
+		igbvf_update_stats(adapter);
+	} else {
+		tx_pending = (igbvf_desc_unused(tx_ring) + 1 <
+			      tx_ring->count);
+		if (tx_pending) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context).
+			 */
+			adapter->tx_timeout_count++;
+			schedule_work(&adapter->reset_task);
+		}
+	}
+
+	/* Cause software interrupt to ensure Rx ring is cleaned */
+	ew32(EICS, adapter->rx_ring->eims_value);
+
+	/* Reset the timer */
+	if (!test_bit(__IGBVF_DOWN, &adapter->state))
+		mod_timer(&adapter->watchdog_timer,
+			  round_jiffies(jiffies + (2 * HZ)));
+}
+
+#define IGBVF_TX_FLAGS_CSUM		0x00000001
+#define IGBVF_TX_FLAGS_VLAN		0x00000002
+#define IGBVF_TX_FLAGS_TSO		0x00000004
+#define IGBVF_TX_FLAGS_IPV4		0x00000008
+#define IGBVF_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IGBVF_TX_FLAGS_VLAN_SHIFT	16
+
+static void igbvf_tx_ctxtdesc(struct igbvf_ring *tx_ring, u32 vlan_macip_lens,
+			      u32 type_tucmd, u32 mss_l4len_idx)
+{
+	struct e1000_adv_tx_context_desc *context_desc;
+	struct igbvf_buffer *buffer_info;
+	u16 i = tx_ring->next_to_use;
+
+	context_desc = IGBVF_TX_CTXTDESC_ADV(*tx_ring, i);
+	buffer_info = &tx_ring->buffer_info[i];
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* set bits to identify this as an advanced context descriptor */
+	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
+
+	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
+	context_desc->seqnum_seed	= 0;
+	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
+	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
+
+	buffer_info->time_stamp = jiffies;
+	buffer_info->dma = 0;
+}
+
+static int igbvf_tso(struct igbvf_ring *tx_ring,
+		     struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
+{
+	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
+		/* IP header will have to cancel out any data that
+		 * is not a part of the outer IP header
+		 */
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
+		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
+
+		ip.v4->tot_len = 0;
+	} else {
+		ip.v6->payload_len = 0;
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+
+	/* compute length of segmentation header */
+	*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+
+	/* MSS L4LEN IDX */
+	mss_l4len_idx = (*hdr_len - l4_offset) << E1000_ADVTXD_L4LEN_SHIFT;
+	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
+
+	/* VLAN MACLEN IPLEN */
+	vlan_macip_lens = l4.hdr - ip.hdr;
+	vlan_macip_lens |= (ip.hdr - skb->data) << E1000_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK;
+
+	igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
+
+	return 1;
+}
+
+static bool igbvf_tx_csum(struct igbvf_ring *tx_ring, struct sk_buff *skb,
+			  u32 tx_flags, __be16 protocol)
+{
+	u32 vlan_macip_lens = 0;
+	u32 type_tucmd = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+csum_failed:
+		if (!(tx_flags & IGBVF_TX_FLAGS_VLAN))
+			return false;
+		goto no_csum;
+	}
+
+	switch (skb->csum_offset) {
+	case offsetof(struct tcphdr, check):
+		type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
+		fallthrough;
+	case offsetof(struct udphdr, check):
+		break;
+	case offsetof(struct sctphdr, checksum):
+		/* validate that this is actually an SCTP request */
+		if (skb_csum_is_sctp(skb)) {
+			type_tucmd = E1000_ADVTXD_TUCMD_L4T_SCTP;
+			break;
+		}
+		fallthrough;
+	default:
+		skb_checksum_help(skb);
+		goto csum_failed;
+	}
+
+	vlan_macip_lens = skb_checksum_start_offset(skb) -
+			  skb_network_offset(skb);
+no_csum:
+	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= tx_flags & IGBVF_TX_FLAGS_VLAN_MASK;
+
+	igbvf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0);
+	return true;
+}
+
+static int igbvf_maybe_stop_tx(struct net_device *netdev, int size)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	/* there is enough descriptors then we don't need to worry  */
+	if (igbvf_desc_unused(adapter->tx_ring) >= size)
+		return 0;
+
+	netif_stop_queue(netdev);
+
+	/* Herbert's original patch had:
+	 *  smp_mb__after_netif_stop_queue();
+	 * but since that doesn't exist yet, just open code it.
+	 */
+	smp_mb();
+
+	/* We need to check again just in case room has been made available */
+	if (igbvf_desc_unused(adapter->tx_ring) < size)
+		return -EBUSY;
+
+	netif_wake_queue(netdev);
+
+	++adapter->restart_queue;
+	return 0;
+}
+
+#define IGBVF_MAX_TXD_PWR	16
+#define IGBVF_MAX_DATA_PER_TXD	(1u << IGBVF_MAX_TXD_PWR)
+
+static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter,
+				   struct igbvf_ring *tx_ring,
+				   struct sk_buff *skb)
+{
+	struct igbvf_buffer *buffer_info;
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned int len = skb_headlen(skb);
+	unsigned int count = 0, i;
+	unsigned int f;
+
+	i = tx_ring->next_to_use;
+
+	buffer_info = &tx_ring->buffer_info[i];
+	BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
+	buffer_info->length = len;
+	/* set time_stamp *before* dma to help avoid a possible race */
+	buffer_info->time_stamp = jiffies;
+	buffer_info->mapped_as_page = false;
+	buffer_info->dma = dma_map_single(&pdev->dev, skb->data, len,
+					  DMA_TO_DEVICE);
+	if (dma_mapping_error(&pdev->dev, buffer_info->dma))
+		goto dma_error;
+
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+		const skb_frag_t *frag;
+
+		count++;
+		i++;
+		if (i == tx_ring->count)
+			i = 0;
+
+		frag = &skb_shinfo(skb)->frags[f];
+		len = skb_frag_size(frag);
+
+		buffer_info = &tx_ring->buffer_info[i];
+		BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD);
+		buffer_info->length = len;
+		buffer_info->time_stamp = jiffies;
+		buffer_info->mapped_as_page = true;
+		buffer_info->dma = skb_frag_dma_map(&pdev->dev, frag, 0, len,
+						    DMA_TO_DEVICE);
+		if (dma_mapping_error(&pdev->dev, buffer_info->dma))
+			goto dma_error;
+	}
+
+	tx_ring->buffer_info[i].skb = skb;
+
+	return ++count;
+
+dma_error:
+	dev_err(&pdev->dev, "TX DMA map failed\n");
+
+	/* clear timestamp and dma mappings for failed buffer_info mapping */
+	buffer_info->dma = 0;
+	buffer_info->time_stamp = 0;
+	buffer_info->length = 0;
+	buffer_info->mapped_as_page = false;
+	if (count)
+		count--;
+
+	/* clear timestamp and dma mappings for remaining portion of packet */
+	while (count--) {
+		if (i == 0)
+			i += tx_ring->count;
+		i--;
+		buffer_info = &tx_ring->buffer_info[i];
+		igbvf_put_txbuf(adapter, buffer_info);
+	}
+
+	return 0;
+}
+
+static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
+				      struct igbvf_ring *tx_ring,
+				      int tx_flags, int count,
+				      unsigned int first, u32 paylen,
+				      u8 hdr_len)
+{
+	union e1000_adv_tx_desc *tx_desc = NULL;
+	struct igbvf_buffer *buffer_info;
+	u32 olinfo_status = 0, cmd_type_len;
+	unsigned int i;
+
+	cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
+			E1000_ADVTXD_DCMD_DEXT);
+
+	if (tx_flags & IGBVF_TX_FLAGS_VLAN)
+		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
+
+	if (tx_flags & IGBVF_TX_FLAGS_TSO) {
+		cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
+
+		/* insert tcp checksum */
+		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+
+		/* insert ip checksum */
+		if (tx_flags & IGBVF_TX_FLAGS_IPV4)
+			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
+
+	} else if (tx_flags & IGBVF_TX_FLAGS_CSUM) {
+		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
+	}
+
+	olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
+
+	i = tx_ring->next_to_use;
+	while (count--) {
+		buffer_info = &tx_ring->buffer_info[i];
+		tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
+		tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
+		tx_desc->read.cmd_type_len =
+			 cpu_to_le32(cmd_type_len | buffer_info->length);
+		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+		i++;
+		if (i == tx_ring->count)
+			i = 0;
+	}
+
+	tx_desc->read.cmd_type_len |= cpu_to_le32(adapter->txd_cmd);
+	/* Force memory writes to complete before letting h/w
+	 * know there are new descriptors to fetch.  (Only
+	 * applicable for weak-ordered memory model archs,
+	 * such as IA-64).
+	 */
+	wmb();
+
+	tx_ring->buffer_info[first].next_to_watch = tx_desc;
+	tx_ring->next_to_use = i;
+	writel(i, adapter->hw.hw_addr + tx_ring->tail);
+}
+
+static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
+					     struct net_device *netdev,
+					     struct igbvf_ring *tx_ring)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	unsigned int first, tx_flags = 0;
+	u8 hdr_len = 0;
+	int count = 0;
+	int tso = 0;
+	__be16 protocol = vlan_get_protocol(skb);
+
+	if (test_bit(__IGBVF_DOWN, &adapter->state)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* need: count + 4 desc gap to keep tail from touching
+	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 1 desc for skb->data,
+	 *       + 1 desc for context descriptor,
+	 * head, otherwise try next time
+	 */
+	if (igbvf_maybe_stop_tx(netdev, skb_shinfo(skb)->nr_frags + 4)) {
+		/* this is a hard error */
+		return NETDEV_TX_BUSY;
+	}
+
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= IGBVF_TX_FLAGS_VLAN;
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     IGBVF_TX_FLAGS_VLAN_SHIFT);
+	}
+
+	if (protocol == htons(ETH_P_IP))
+		tx_flags |= IGBVF_TX_FLAGS_IPV4;
+
+	first = tx_ring->next_to_use;
+
+	tso = igbvf_tso(tx_ring, skb, tx_flags, &hdr_len);
+	if (unlikely(tso < 0)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (tso)
+		tx_flags |= IGBVF_TX_FLAGS_TSO;
+	else if (igbvf_tx_csum(tx_ring, skb, tx_flags, protocol) &&
+		 (skb->ip_summed == CHECKSUM_PARTIAL))
+		tx_flags |= IGBVF_TX_FLAGS_CSUM;
+
+	/* count reflects descriptors mapped, if 0 then mapping error
+	 * has occurred and we need to rewind the descriptor queue
+	 */
+	count = igbvf_tx_map_adv(adapter, tx_ring, skb);
+
+	if (count) {
+		igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count,
+				   first, skb->len, hdr_len);
+		/* Make sure there is space in the ring for the next send. */
+		igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4);
+	} else {
+		dev_kfree_skb_any(skb);
+		tx_ring->buffer_info[first].time_stamp = 0;
+		tx_ring->next_to_use = first;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct igbvf_ring *tx_ring;
+
+	if (test_bit(__IGBVF_DOWN, &adapter->state)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	tx_ring = &adapter->tx_ring[0];
+
+	return igbvf_xmit_frame_ring_adv(skb, netdev, tx_ring);
+}
+
+/**
+ * igbvf_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: queue timing out (unused)
+ **/
+static void igbvf_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	/* Do the reset outside of interrupt context */
+	adapter->tx_timeout_count++;
+	schedule_work(&adapter->reset_task);
+}
+
+static void igbvf_reset_task(struct work_struct *work)
+{
+	struct igbvf_adapter *adapter;
+
+	adapter = container_of(work, struct igbvf_adapter, reset_task);
+
+	igbvf_reinit_locked(adapter);
+}
+
+/**
+ * igbvf_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int igbvf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
+
+	while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	/* igbvf_down has a dependency on max_frame_size */
+	adapter->max_frame_size = max_frame;
+	if (netif_running(netdev))
+		igbvf_down(adapter);
+
+	/* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
+	 * means we reserve 2 more, this pushes us to allocate from the next
+	 * larger slab size.
+	 * i.e. RXBUFFER_2048 --> size-4096 slab
+	 * However with the new *_jumbo_rx* routines, jumbo receives will use
+	 * fragmented skbs
+	 */
+
+	if (max_frame <= 1024)
+		adapter->rx_buffer_len = 1024;
+	else if (max_frame <= 2048)
+		adapter->rx_buffer_len = 2048;
+	else
+#if (PAGE_SIZE / 2) > 16384
+		adapter->rx_buffer_len = 16384;
+#else
+		adapter->rx_buffer_len = PAGE_SIZE / 2;
+#endif
+
+	/* adjust allocation if LPE protects us, and we aren't using SBP */
+	if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) ||
+	    (max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN))
+		adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN +
+					 ETH_FCS_LEN;
+
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		igbvf_up(adapter);
+	else
+		igbvf_reset(adapter);
+
+	clear_bit(__IGBVF_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+static int igbvf_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igbvf_suspend(struct device *dev_d)
+{
+	struct net_device *netdev = dev_get_drvdata(dev_d);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev)) {
+		WARN_ON(test_bit(__IGBVF_RESETTING, &adapter->state));
+		igbvf_down(adapter);
+		igbvf_free_irq(adapter);
+	}
+
+	return 0;
+}
+
+static int __maybe_unused igbvf_resume(struct device *dev_d)
+{
+	struct pci_dev *pdev = to_pci_dev(dev_d);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	u32 err;
+
+	pci_set_master(pdev);
+
+	if (netif_running(netdev)) {
+		err = igbvf_request_irq(adapter);
+		if (err)
+			return err;
+	}
+
+	igbvf_reset(adapter);
+
+	if (netif_running(netdev))
+		igbvf_up(adapter);
+
+	netif_device_attach(netdev);
+
+	return 0;
+}
+
+static void igbvf_shutdown(struct pci_dev *pdev)
+{
+	igbvf_suspend(&pdev->dev);
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* Polling 'interrupt' - used by things like netconsole to send skbs
+ * without having to re-enable interrupts. It's not called while
+ * the interrupt routine is executing.
+ */
+static void igbvf_netpoll(struct net_device *netdev)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	disable_irq(adapter->pdev->irq);
+
+	igbvf_clean_tx_irq(adapter->tx_ring);
+
+	enable_irq(adapter->pdev->irq);
+}
+#endif
+
+/**
+ * igbvf_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t igbvf_io_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (netif_running(netdev))
+		igbvf_down(adapter);
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * igbvf_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+ * resembles the first-half of the igbvf_resume routine.
+ */
+static pci_ers_result_t igbvf_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	if (pci_enable_device_mem(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	pci_set_master(pdev);
+
+	igbvf_reset(adapter);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * igbvf_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+ * second-half of the igbvf_resume routine.
+ */
+static void igbvf_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev)) {
+		if (igbvf_up(adapter)) {
+			dev_err(&pdev->dev,
+				"can't bring device back up after reset\n");
+			return;
+		}
+	}
+
+	netif_device_attach(netdev);
+}
+
+/**
+ * igbvf_io_prepare - prepare device driver for PCI reset
+ * @pdev: PCI device information struct
+ */
+static void igbvf_io_prepare(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	igbvf_down(adapter);
+}
+
+/**
+ * igbvf_io_reset_done - PCI reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void igbvf_io_reset_done(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	igbvf_up(adapter);
+	clear_bit(__IGBVF_RESETTING, &adapter->state);
+}
+
+static void igbvf_print_device_info(struct igbvf_adapter *adapter)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+
+	if (hw->mac.type == e1000_vfadapt_i350)
+		dev_info(&pdev->dev, "Intel(R) I350 Virtual Function\n");
+	else
+		dev_info(&pdev->dev, "Intel(R) 82576 Virtual Function\n");
+	dev_info(&pdev->dev, "Address: %pM\n", netdev->dev_addr);
+}
+
+static int igbvf_set_features(struct net_device *netdev,
+			      netdev_features_t features)
+{
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+	if (features & NETIF_F_RXCSUM)
+		adapter->flags &= ~IGBVF_FLAG_RX_CSUM_DISABLED;
+	else
+		adapter->flags |= IGBVF_FLAG_RX_CSUM_DISABLED;
+
+	return 0;
+}
+
+#define IGBVF_MAX_MAC_HDR_LEN		127
+#define IGBVF_MAX_NETWORK_HDR_LEN	511
+
+static netdev_features_t
+igbvf_features_check(struct sk_buff *skb, struct net_device *dev,
+		     netdev_features_t features)
+{
+	unsigned int network_hdr_len, mac_hdr_len;
+
+	/* Make certain the headers can be described by a context descriptor */
+	mac_hdr_len = skb_network_header(skb) - skb->data;
+	if (unlikely(mac_hdr_len > IGBVF_MAX_MAC_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_HW_VLAN_CTAG_TX |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
+	if (unlikely(network_hdr_len >  IGBVF_MAX_NETWORK_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	/* We can only support IPV4 TSO in tunnels if we can mangle the
+	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 */
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
+		features &= ~NETIF_F_TSO;
+
+	return features;
+}
+
+static const struct net_device_ops igbvf_netdev_ops = {
+	.ndo_open		= igbvf_open,
+	.ndo_stop		= igbvf_close,
+	.ndo_start_xmit		= igbvf_xmit_frame,
+	.ndo_set_rx_mode	= igbvf_set_rx_mode,
+	.ndo_set_mac_address	= igbvf_set_mac,
+	.ndo_change_mtu		= igbvf_change_mtu,
+	.ndo_eth_ioctl		= igbvf_ioctl,
+	.ndo_tx_timeout		= igbvf_tx_timeout,
+	.ndo_vlan_rx_add_vid	= igbvf_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= igbvf_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= igbvf_netpoll,
+#endif
+	.ndo_set_features	= igbvf_set_features,
+	.ndo_features_check	= igbvf_features_check,
+};
+
+/**
+ * igbvf_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igbvf_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igbvf_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct igbvf_adapter *adapter;
+	struct e1000_hw *hw;
+	const struct igbvf_info *ei = igbvf_info_tbl[ent->driver_data];
+	static int cards_found;
+	int err;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"No usable DMA configuration, aborting\n");
+		goto err_dma;
+	}
+
+	err = pci_request_regions(pdev, igbvf_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	pci_set_master(pdev);
+
+	err = -ENOMEM;
+	netdev = alloc_etherdev(sizeof(struct igbvf_adapter));
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	hw = &adapter->hw;
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	adapter->ei = ei;
+	adapter->pba = ei->pba;
+	adapter->flags = ei->flags;
+	adapter->hw.back = adapter;
+	adapter->hw.mac.type = ei->mac;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	/* PCI config space info */
+
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+	hw->revision_id = pdev->revision;
+
+	err = -EIO;
+	adapter->hw.hw_addr = ioremap(pci_resource_start(pdev, 0),
+				      pci_resource_len(pdev, 0));
+
+	if (!adapter->hw.hw_addr)
+		goto err_ioremap;
+
+	if (ei->get_variants) {
+		err = ei->get_variants(adapter);
+		if (err)
+			goto err_get_variants;
+	}
+
+	/* setup adapter struct */
+	err = igbvf_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	/* construct the net_device struct */
+	netdev->netdev_ops = &igbvf_netdev_ops;
+
+	igbvf_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
+
+	adapter->bd_number = cards_found++;
+
+	netdev->hw_features = NETIF_F_SG |
+			      NETIF_F_TSO |
+			      NETIF_F_TSO6 |
+			      NETIF_F_RXCSUM |
+			      NETIF_F_HW_CSUM |
+			      NETIF_F_SCTP_CRC;
+
+#define IGBVF_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+				    NETIF_F_GSO_GRE_CSUM | \
+				    NETIF_F_GSO_IPXIP4 | \
+				    NETIF_F_GSO_IPXIP6 | \
+				    NETIF_F_GSO_UDP_TUNNEL | \
+				    NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = IGBVF_GSO_PARTIAL_FEATURES;
+	netdev->hw_features |= NETIF_F_GSO_PARTIAL |
+			       IGBVF_GSO_PARTIAL_FEATURES;
+
+	netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
+
+	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
+	netdev->mpls_features |= NETIF_F_HW_CSUM;
+	netdev->hw_enc_features |= netdev->vlan_features;
+
+	/* set this bit last since it cannot be part of vlan_features */
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
+			    NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_TX;
+
+	/* MTU range: 68 - 9216 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
+	spin_lock_bh(&hw->mbx_lock);
+
+	/*reset the controller to put the device in a known good state */
+	err = hw->mac.ops.reset_hw(hw);
+	if (err) {
+		dev_info(&pdev->dev,
+			 "PF still in reset state. Is the PF interface up?\n");
+	} else {
+		err = hw->mac.ops.read_mac_addr(hw);
+		if (err)
+			dev_info(&pdev->dev, "Error reading MAC address.\n");
+		else if (is_zero_ether_addr(adapter->hw.mac.addr))
+			dev_info(&pdev->dev,
+				 "MAC address not assigned by administrator.\n");
+		eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+	}
+
+	spin_unlock_bh(&hw->mbx_lock);
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		dev_info(&pdev->dev, "Assigning random MAC address.\n");
+		eth_hw_addr_random(netdev);
+		memcpy(adapter->hw.mac.addr, netdev->dev_addr,
+		       netdev->addr_len);
+	}
+
+	timer_setup(&adapter->watchdog_timer, igbvf_watchdog, 0);
+
+	INIT_WORK(&adapter->reset_task, igbvf_reset_task);
+	INIT_WORK(&adapter->watchdog_task, igbvf_watchdog_task);
+
+	/* ring size defaults */
+	adapter->rx_ring->count = 1024;
+	adapter->tx_ring->count = 1024;
+
+	/* reset the hardware with the new settings */
+	igbvf_reset(adapter);
+
+	/* set hardware-specific flags */
+	if (adapter->hw.mac.type == e1000_vfadapt_i350)
+		adapter->flags |= IGBVF_FLAG_RX_LB_VLAN_BSWAP;
+
+	strcpy(netdev->name, "eth%d");
+	err = register_netdev(netdev);
+	if (err)
+		goto err_hw_init;
+
+	/* tell the stack to leave us alone until igbvf_open() is called */
+	netif_carrier_off(netdev);
+	netif_stop_queue(netdev);
+
+	igbvf_print_device_info(adapter);
+
+	igbvf_initialize_last_counter_stats(adapter);
+
+	return 0;
+
+err_hw_init:
+	netif_napi_del(&adapter->rx_ring->napi);
+	kfree(adapter->tx_ring);
+	kfree(adapter->rx_ring);
+err_sw_init:
+	igbvf_reset_interrupt_capability(adapter);
+err_get_variants:
+	iounmap(adapter->hw.hw_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * igbvf_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * igbvf_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void igbvf_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igbvf_adapter *adapter = netdev_priv(netdev);
+	struct e1000_hw *hw = &adapter->hw;
+
+	/* The watchdog timer may be rescheduled, so explicitly
+	 * disable it from being rescheduled.
+	 */
+	set_bit(__IGBVF_DOWN, &adapter->state);
+	del_timer_sync(&adapter->watchdog_timer);
+
+	cancel_work_sync(&adapter->reset_task);
+	cancel_work_sync(&adapter->watchdog_task);
+
+	unregister_netdev(netdev);
+
+	igbvf_reset_interrupt_capability(adapter);
+
+	/* it is important to delete the NAPI struct prior to freeing the
+	 * Rx ring so that you do not end up with null pointer refs
+	 */
+	netif_napi_del(&adapter->rx_ring->napi);
+	kfree(adapter->tx_ring);
+	kfree(adapter->rx_ring);
+
+	iounmap(hw->hw_addr);
+	if (hw->flash_address)
+		iounmap(hw->flash_address);
+	pci_release_regions(pdev);
+
+	free_netdev(netdev);
+
+	pci_disable_device(pdev);
+}
+
+/* PCI Error Recovery (ERS) */
+static const struct pci_error_handlers igbvf_err_handler = {
+	.error_detected = igbvf_io_error_detected,
+	.slot_reset = igbvf_io_slot_reset,
+	.resume = igbvf_io_resume,
+	.reset_prepare = igbvf_io_prepare,
+	.reset_done = igbvf_io_reset_done,
+};
+
+static const struct pci_device_id igbvf_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_VF), board_vf },
+	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_VF), board_i350_vf },
+	{ } /* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, igbvf_pci_tbl);
+
+static SIMPLE_DEV_PM_OPS(igbvf_pm_ops, igbvf_suspend, igbvf_resume);
+
+/* PCI Device API Driver */
+static struct pci_driver igbvf_driver = {
+	.name		= igbvf_driver_name,
+	.id_table	= igbvf_pci_tbl,
+	.probe		= igbvf_probe,
+	.remove		= igbvf_remove,
+	.driver.pm	= &igbvf_pm_ops,
+	.shutdown	= igbvf_shutdown,
+	.err_handler	= &igbvf_err_handler
+};
+
+/**
+ * igbvf_init_module - Driver Registration Routine
+ *
+ * igbvf_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init igbvf_init_module(void)
+{
+	int ret;
+
+	pr_info("%s\n", igbvf_driver_string);
+	pr_info("%s\n", igbvf_copyright);
+
+	ret = pci_register_driver(&igbvf_driver);
+
+	return ret;
+}
+module_init(igbvf_init_module);
+
+/**
+ * igbvf_exit_module - Driver Exit Cleanup Routine
+ *
+ * igbvf_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit igbvf_exit_module(void)
+{
+	pci_unregister_driver(&igbvf_driver);
+}
+module_exit(igbvf_exit_module);
+
+MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Intel(R) Gigabit Virtual Function Network Driver");
+MODULE_LICENSE("GPL v2");
+
+/* netdev.c */
diff --git a/drivers/net/ethernet/intel/igbvf/regs.h b/drivers/net/ethernet/intel/igbvf/regs.h
new file mode 100644
index 0000000000..625a309a33
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/regs.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
+
+#ifndef _E1000_REGS_H_
+#define _E1000_REGS_H_
+
+#define E1000_CTRL	0x00000 /* Device Control - RW */
+#define E1000_STATUS	0x00008 /* Device Status - RO */
+#define E1000_ITR	0x000C4 /* Interrupt Throttling Rate - RW */
+#define E1000_EICR	0x01580 /* Ext. Interrupt Cause Read - R/clr */
+#define E1000_EITR(_n)	(0x01680 + (0x4 * (_n)))
+#define E1000_EICS	0x01520 /* Ext. Interrupt Cause Set - W0 */
+#define E1000_EIMS	0x01524 /* Ext. Interrupt Mask Set/Read - RW */
+#define E1000_EIMC	0x01528 /* Ext. Interrupt Mask Clear - WO */
+#define E1000_EIAC	0x0152C /* Ext. Interrupt Auto Clear - RW */
+#define E1000_EIAM	0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */
+#define E1000_IVAR0	0x01700 /* Interrupt Vector Allocation (array) - RW */
+#define E1000_IVAR_MISC	0x01740 /* IVAR for "other" causes - RW */
+
+/* Convenience macros
+ *
+ * Note: "_n" is the queue number of the register to be written to.
+ *
+ * Example usage:
+ * E1000_RDBAL_REG(current_rx_queue)
+ */
+#define E1000_RDBAL(_n)	((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \
+			 (0x0C000 + ((_n) * 0x40)))
+#define E1000_RDBAH(_n)	((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \
+			 (0x0C004 + ((_n) * 0x40)))
+#define E1000_RDLEN(_n)	((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \
+			 (0x0C008 + ((_n) * 0x40)))
+#define E1000_SRRCTL(_n)	((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \
+				 (0x0C00C + ((_n) * 0x40)))
+#define E1000_RDH(_n)	((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \
+			 (0x0C010 + ((_n) * 0x40)))
+#define E1000_RDT(_n)	((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \
+			 (0x0C018 + ((_n) * 0x40)))
+#define E1000_RXDCTL(_n)	((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \
+				 (0x0C028 + ((_n) * 0x40)))
+#define E1000_TDBAL(_n)	((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \
+			 (0x0E000 + ((_n) * 0x40)))
+#define E1000_TDBAH(_n)	((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \
+			 (0x0E004 + ((_n) * 0x40)))
+#define E1000_TDLEN(_n)	((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \
+			 (0x0E008 + ((_n) * 0x40)))
+#define E1000_TDH(_n)	((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \
+			 (0x0E010 + ((_n) * 0x40)))
+#define E1000_TDT(_n)	((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \
+			 (0x0E018 + ((_n) * 0x40)))
+#define E1000_TXDCTL(_n)	((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \
+				 (0x0E028 + ((_n) * 0x40)))
+#define E1000_DCA_TXCTRL(_n)	(0x03814 + (_n << 8))
+#define E1000_DCA_RXCTRL(_n)	(0x02814 + (_n << 8))
+#define E1000_RAL(_i)	(((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+			 (0x054E0 + ((_i - 16) * 8)))
+#define E1000_RAH(_i)	(((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+			 (0x054E4 + ((_i - 16) * 8)))
+
+/* Statistics registers */
+#define E1000_VFGPRC	0x00F10
+#define E1000_VFGORC	0x00F18
+#define E1000_VFMPRC	0x00F3C
+#define E1000_VFGPTC	0x00F14
+#define E1000_VFGOTC	0x00F34
+#define E1000_VFGOTLBC	0x00F50
+#define E1000_VFGPTLBC	0x00F44
+#define E1000_VFGORLBC	0x00F48
+#define E1000_VFGPRLBC	0x00F40
+
+/* These act per VF so an array friendly macro is used */
+#define E1000_V2PMAILBOX(_n)	(0x00C40 + (4 * (_n)))
+#define E1000_VMBMEM(_n)	(0x00800 + (64 * (_n)))
+
+/* Define macros for handling registers */
+#define er32(reg)	readl(hw->hw_addr + E1000_##reg)
+#define ew32(reg, val)	writel((val), hw->hw_addr +  E1000_##reg)
+#define array_er32(reg, offset) \
+	readl(hw->hw_addr + E1000_##reg + (offset << 2))
+#define array_ew32(reg, offset, val) \
+	writel((val), hw->hw_addr +  E1000_##reg + (offset << 2))
+#define e1e_flush()	er32(STATUS)
+
+#endif
diff --git a/drivers/net/ethernet/intel/igbvf/vf.c b/drivers/net/ethernet/intel/igbvf/vf.c
new file mode 100644
index 0000000000..a47a2e3e54
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/vf.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
+
+#include <linux/etherdevice.h>
+
+#include "vf.h"
+
+static s32 e1000_check_for_link_vf(struct e1000_hw *hw);
+static s32 e1000_get_link_up_info_vf(struct e1000_hw *hw, u16 *speed,
+				     u16 *duplex);
+static s32 e1000_init_hw_vf(struct e1000_hw *hw);
+static s32 e1000_reset_hw_vf(struct e1000_hw *hw);
+
+static void e1000_update_mc_addr_list_vf(struct e1000_hw *hw, u8 *,
+					 u32, u32, u32);
+static void e1000_rar_set_vf(struct e1000_hw *, u8 *, u32);
+static s32 e1000_read_mac_addr_vf(struct e1000_hw *);
+static s32 e1000_set_uc_addr_vf(struct e1000_hw *hw, u32 subcmd, u8 *addr);
+static s32 e1000_set_vfta_vf(struct e1000_hw *, u16, bool);
+
+/**
+ *  e1000_init_mac_params_vf - Inits MAC params
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_init_mac_params_vf(struct e1000_hw *hw)
+{
+	struct e1000_mac_info *mac = &hw->mac;
+
+	/* VF's have no MTA Registers - PF feature only */
+	mac->mta_reg_count = 128;
+	/* VF's have no access to RAR entries  */
+	mac->rar_entry_count = 1;
+
+	/* Function pointers */
+	/* reset */
+	mac->ops.reset_hw = e1000_reset_hw_vf;
+	/* hw initialization */
+	mac->ops.init_hw = e1000_init_hw_vf;
+	/* check for link */
+	mac->ops.check_for_link = e1000_check_for_link_vf;
+	/* link info */
+	mac->ops.get_link_up_info = e1000_get_link_up_info_vf;
+	/* multicast address update */
+	mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_vf;
+	/* set mac address */
+	mac->ops.rar_set = e1000_rar_set_vf;
+	/* read mac address */
+	mac->ops.read_mac_addr = e1000_read_mac_addr_vf;
+	/* set mac filter */
+	mac->ops.set_uc_addr = e1000_set_uc_addr_vf;
+	/* set vlan filter table array */
+	mac->ops.set_vfta = e1000_set_vfta_vf;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_init_function_pointers_vf - Inits function pointers
+ *  @hw: pointer to the HW structure
+ **/
+void e1000_init_function_pointers_vf(struct e1000_hw *hw)
+{
+	hw->mac.ops.init_params = e1000_init_mac_params_vf;
+	hw->mbx.ops.init_params = e1000_init_mbx_params_vf;
+}
+
+/**
+ *  e1000_get_link_up_info_vf - Gets link info.
+ *  @hw: pointer to the HW structure
+ *  @speed: pointer to 16 bit value to store link speed.
+ *  @duplex: pointer to 16 bit value to store duplex.
+ *
+ *  Since we cannot read the PHY and get accurate link info, we must rely upon
+ *  the status register's data which is often stale and inaccurate.
+ **/
+static s32 e1000_get_link_up_info_vf(struct e1000_hw *hw, u16 *speed,
+				     u16 *duplex)
+{
+	s32 status;
+
+	status = er32(STATUS);
+	if (status & E1000_STATUS_SPEED_1000)
+		*speed = SPEED_1000;
+	else if (status & E1000_STATUS_SPEED_100)
+		*speed = SPEED_100;
+	else
+		*speed = SPEED_10;
+
+	if (status & E1000_STATUS_FD)
+		*duplex = FULL_DUPLEX;
+	else
+		*duplex = HALF_DUPLEX;
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_reset_hw_vf - Resets the HW
+ *  @hw: pointer to the HW structure
+ *
+ *  VF's provide a function level reset. This is done using bit 26 of ctrl_reg.
+ *  This is all the reset we can perform on a VF.
+ **/
+static s32 e1000_reset_hw_vf(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	u32 timeout = E1000_VF_INIT_TIMEOUT;
+	u32 ret_val = -E1000_ERR_MAC_INIT;
+	u32 msgbuf[3];
+	u8 *addr = (u8 *)(&msgbuf[1]);
+	u32 ctrl;
+
+	/* assert VF queue/interrupt reset */
+	ctrl = er32(CTRL);
+	ew32(CTRL, ctrl | E1000_CTRL_RST);
+
+	/* we cannot initialize while the RSTI / RSTD bits are asserted */
+	while (!mbx->ops.check_for_rst(hw) && timeout) {
+		timeout--;
+		udelay(5);
+	}
+
+	if (timeout) {
+		/* mailbox timeout can now become active */
+		mbx->timeout = E1000_VF_MBX_INIT_TIMEOUT;
+
+		/* notify PF of VF reset completion */
+		msgbuf[0] = E1000_VF_RESET;
+		mbx->ops.write_posted(hw, msgbuf, 1);
+
+		mdelay(10);
+
+		/* set our "perm_addr" based on info provided by PF */
+		ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
+		if (!ret_val) {
+			switch (msgbuf[0]) {
+			case E1000_VF_RESET | E1000_VT_MSGTYPE_ACK:
+				memcpy(hw->mac.perm_addr, addr, ETH_ALEN);
+				break;
+			case E1000_VF_RESET | E1000_VT_MSGTYPE_NACK:
+				eth_zero_addr(hw->mac.perm_addr);
+				break;
+			default:
+				ret_val = -E1000_ERR_MAC_INIT;
+			}
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_init_hw_vf - Inits the HW
+ *  @hw: pointer to the HW structure
+ *
+ *  Not much to do here except clear the PF Reset indication if there is one.
+ **/
+static s32 e1000_init_hw_vf(struct e1000_hw *hw)
+{
+	/* attempt to set and restore our mac address */
+	e1000_rar_set_vf(hw, hw->mac.addr, 0);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_hash_mc_addr_vf - Generate a multicast hash value
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: pointer to a multicast address
+ *
+ *  Generates a multicast address hash value which is used to determine
+ *  the multicast filter table array address and new table value.  See
+ *  e1000_mta_set_generic()
+ **/
+static u32 e1000_hash_mc_addr_vf(struct e1000_hw *hw, u8 *mc_addr)
+{
+	u32 hash_value, hash_mask;
+	u8 bit_shift = 0;
+
+	/* Register count multiplied by bits per register */
+	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+	/* The bit_shift is the number of left-shifts
+	 * where 0xFF would still fall within the hash mask.
+	 */
+	while (hash_mask >> bit_shift != 0xFF)
+		bit_shift++;
+
+	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+				  (((u16)mc_addr[5]) << bit_shift)));
+
+	return hash_value;
+}
+
+/**
+ *  e1000_update_mc_addr_list_vf - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *  @rar_used_count: the first RAR register free to program
+ *  @rar_count: total number of supported Receive Address Registers
+ *
+ *  Updates the Receive Address Registers and Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ *  The parameter rar_count will usually be hw->mac.rar_entry_count
+ *  unless there are workarounds that change this.
+ **/
+static void e1000_update_mc_addr_list_vf(struct e1000_hw *hw,
+					 u8 *mc_addr_list, u32 mc_addr_count,
+					 u32 rar_used_count, u32 rar_count)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	u32 msgbuf[E1000_VFMAILBOX_SIZE];
+	u16 *hash_list = (u16 *)&msgbuf[1];
+	u32 hash_value;
+	u32 cnt, i;
+	s32 ret_val;
+
+	/* Each entry in the list uses 1 16 bit word.  We have 30
+	 * 16 bit words available in our HW msg buffer (minus 1 for the
+	 * msg type).  That's 30 hash values if we pack 'em right.  If
+	 * there are more than 30 MC addresses to add then punt the
+	 * extras for now and then add code to handle more than 30 later.
+	 * It would be unusual for a server to request that many multi-cast
+	 * addresses except for in large enterprise network environments.
+	 */
+
+	cnt = (mc_addr_count > 30) ? 30 : mc_addr_count;
+	msgbuf[0] = E1000_VF_SET_MULTICAST;
+	msgbuf[0] |= cnt << E1000_VT_MSGINFO_SHIFT;
+
+	for (i = 0; i < cnt; i++) {
+		hash_value = e1000_hash_mc_addr_vf(hw, mc_addr_list);
+		hash_list[i] = hash_value & 0x0FFFF;
+		mc_addr_list += ETH_ALEN;
+	}
+
+	ret_val = mbx->ops.write_posted(hw, msgbuf, E1000_VFMAILBOX_SIZE);
+	if (!ret_val)
+		mbx->ops.read_posted(hw, msgbuf, 1);
+}
+
+/**
+ *  e1000_set_vfta_vf - Set/Unset vlan filter table address
+ *  @hw: pointer to the HW structure
+ *  @vid: determines the vfta register and bit to set/unset
+ *  @set: if true then set bit, else clear bit
+ **/
+static s32 e1000_set_vfta_vf(struct e1000_hw *hw, u16 vid, bool set)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	u32 msgbuf[2];
+	s32 err;
+
+	msgbuf[0] = E1000_VF_SET_VLAN;
+	msgbuf[1] = vid;
+	/* Setting the 8 bit field MSG INFO to true indicates "add" */
+	if (set)
+		msgbuf[0] |= BIT(E1000_VT_MSGINFO_SHIFT);
+
+	mbx->ops.write_posted(hw, msgbuf, 2);
+
+	err = mbx->ops.read_posted(hw, msgbuf, 2);
+
+	msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS;
+
+	/* if nacked the vlan was rejected */
+	if (!err && (msgbuf[0] == (E1000_VF_SET_VLAN | E1000_VT_MSGTYPE_NACK)))
+		err = -E1000_ERR_MAC_INIT;
+
+	return err;
+}
+
+/**
+ *  e1000_rlpml_set_vf - Set the maximum receive packet length
+ *  @hw: pointer to the HW structure
+ *  @max_size: value to assign to max frame size
+ **/
+void e1000_rlpml_set_vf(struct e1000_hw *hw, u16 max_size)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	u32 msgbuf[2];
+	s32 ret_val;
+
+	msgbuf[0] = E1000_VF_SET_LPE;
+	msgbuf[1] = max_size;
+
+	ret_val = mbx->ops.write_posted(hw, msgbuf, 2);
+	if (!ret_val)
+		mbx->ops.read_posted(hw, msgbuf, 1);
+}
+
+/**
+ *  e1000_rar_set_vf - set device MAC address
+ *  @hw: pointer to the HW structure
+ *  @addr: pointer to the receive address
+ *  @index: receive address array register
+ **/
+static void e1000_rar_set_vf(struct e1000_hw *hw, u8 *addr, u32 index)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	u32 msgbuf[3];
+	u8 *msg_addr = (u8 *)(&msgbuf[1]);
+	s32 ret_val;
+
+	memset(msgbuf, 0, 12);
+	msgbuf[0] = E1000_VF_SET_MAC_ADDR;
+	memcpy(msg_addr, addr, ETH_ALEN);
+	ret_val = mbx->ops.write_posted(hw, msgbuf, 3);
+
+	if (!ret_val)
+		ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
+
+	msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS;
+
+	/* if nacked the address was rejected, use "perm_addr" */
+	if (!ret_val &&
+	    (msgbuf[0] == (E1000_VF_SET_MAC_ADDR | E1000_VT_MSGTYPE_NACK)))
+		e1000_read_mac_addr_vf(hw);
+}
+
+/**
+ *  e1000_read_mac_addr_vf - Read device MAC address
+ *  @hw: pointer to the HW structure
+ **/
+static s32 e1000_read_mac_addr_vf(struct e1000_hw *hw)
+{
+	memcpy(hw->mac.addr, hw->mac.perm_addr, ETH_ALEN);
+
+	return E1000_SUCCESS;
+}
+
+/**
+ *  e1000_set_uc_addr_vf - Set or clear unicast filters
+ *  @hw: pointer to the HW structure
+ *  @sub_cmd: add or clear filters
+ *  @addr: pointer to the filter MAC address
+ **/
+static s32 e1000_set_uc_addr_vf(struct e1000_hw *hw, u32 sub_cmd, u8 *addr)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	u32 msgbuf[3], msgbuf_chk;
+	u8 *msg_addr = (u8 *)(&msgbuf[1]);
+	s32 ret_val;
+
+	memset(msgbuf, 0, sizeof(msgbuf));
+	msgbuf[0] |= sub_cmd;
+	msgbuf[0] |= E1000_VF_SET_MAC_ADDR;
+	msgbuf_chk = msgbuf[0];
+
+	if (addr)
+		memcpy(msg_addr, addr, ETH_ALEN);
+
+	ret_val = mbx->ops.write_posted(hw, msgbuf, 3);
+
+	if (!ret_val)
+		ret_val = mbx->ops.read_posted(hw, msgbuf, 3);
+
+	msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS;
+
+	if (!ret_val) {
+		msgbuf[0] &= ~E1000_VT_MSGTYPE_CTS;
+
+		if (msgbuf[0] == (msgbuf_chk | E1000_VT_MSGTYPE_NACK))
+			return -ENOSPC;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  e1000_check_for_link_vf - Check for link for a virtual interface
+ *  @hw: pointer to the HW structure
+ *
+ *  Checks to see if the underlying PF is still talking to the VF and
+ *  if it is then it reports the link state to the hardware, otherwise
+ *  it reports link down and returns an error.
+ **/
+static s32 e1000_check_for_link_vf(struct e1000_hw *hw)
+{
+	struct e1000_mbx_info *mbx = &hw->mbx;
+	struct e1000_mac_info *mac = &hw->mac;
+	s32 ret_val = E1000_SUCCESS;
+	u32 in_msg = 0;
+
+	/* We only want to run this if there has been a rst asserted.
+	 * in this case that could mean a link change, device reset,
+	 * or a virtual function reset
+	 */
+
+	/* If we were hit with a reset or timeout drop the link */
+	if (!mbx->ops.check_for_rst(hw) || !mbx->timeout)
+		mac->get_link_status = true;
+
+	if (!mac->get_link_status)
+		goto out;
+
+	/* if link status is down no point in checking to see if PF is up */
+	if (!(er32(STATUS) & E1000_STATUS_LU))
+		goto out;
+
+	/* if the read failed it could just be a mailbox collision, best wait
+	 * until we are called again and don't report an error
+	 */
+	if (mbx->ops.read(hw, &in_msg, 1))
+		goto out;
+
+	/* if incoming message isn't clear to send we are waiting on response */
+	if (!(in_msg & E1000_VT_MSGTYPE_CTS)) {
+		/* msg is not CTS and is NACK we must have lost CTS status */
+		if (in_msg & E1000_VT_MSGTYPE_NACK)
+			ret_val = -E1000_ERR_MAC_INIT;
+		goto out;
+	}
+
+	/* the PF is talking, if we timed out in the past we reinit */
+	if (!mbx->timeout) {
+		ret_val = -E1000_ERR_MAC_INIT;
+		goto out;
+	}
+
+	/* if we passed all the tests above then the link is up and we no
+	 * longer need to check for link
+	 */
+	mac->get_link_status = false;
+
+out:
+	return ret_val;
+}
+
diff --git a/drivers/net/ethernet/intel/igbvf/vf.h b/drivers/net/ethernet/intel/igbvf/vf.h
new file mode 100644
index 0000000000..ba9bb3132d
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/vf.h
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2009 - 2018 Intel Corporation. */
+
+#ifndef _E1000_VF_H_
+#define _E1000_VF_H_
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+
+#include "regs.h"
+#include "defines.h"
+
+struct e1000_hw;
+
+#define E1000_DEV_ID_82576_VF		0x10CA
+#define E1000_DEV_ID_I350_VF		0x1520
+#define E1000_REVISION_0	0
+#define E1000_REVISION_1	1
+#define E1000_REVISION_2	2
+#define E1000_REVISION_3	3
+#define E1000_REVISION_4	4
+
+#define E1000_FUNC_0	0
+#define E1000_FUNC_1	1
+
+/* Receive Address Register Count
+ * Number of high/low register pairs in the RAR.  The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * These entries are also used for MAC-based filtering.
+ */
+#define E1000_RAR_ENTRIES_VF	1
+
+/* Receive Descriptor - Advanced */
+union e1000_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /* RSS/Packet type */
+					/* Split Header, hdr buffer length */
+					__le16 hdr_info;
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum;  /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+#define E1000_RXDADV_HDRBUFLEN_MASK	0x7FE0
+#define E1000_RXDADV_HDRBUFLEN_SHIFT	5
+
+/* Transmit Descriptor - Advanced */
+union e1000_adv_tx_desc {
+	struct {
+		__le64 buffer_addr; /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd; /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define E1000_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Descriptor */
+#define E1000_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
+#define E1000_ADVTXD_DCMD_EOP	0x01000000 /* End of Packet */
+#define E1000_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define E1000_ADVTXD_DCMD_RS	0x08000000 /* Report Status */
+#define E1000_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
+#define E1000_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
+#define E1000_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
+#define E1000_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+
+/* Context descriptors */
+struct e1000_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 seqnum_seed;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+#define E1000_ADVTXD_MACLEN_SHIFT	9  /* Adv ctxt desc mac len shift */
+#define E1000_ADVTXD_TUCMD_IPV4		0x00000400 /* IP Packet Type: 1=IPv4 */
+#define E1000_ADVTXD_TUCMD_L4T_TCP	0x00000800 /* L4 Packet TYPE of TCP */
+#define E1000_ADVTXD_TUCMD_L4T_SCTP	0x00001000 /* L4 packet TYPE of SCTP */
+#define E1000_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define E1000_ADVTXD_MSS_SHIFT		16 /* Adv ctxt MSS shift */
+
+enum e1000_mac_type {
+	e1000_undefined = 0,
+	e1000_vfadapt,
+	e1000_vfadapt_i350,
+	e1000_num_macs  /* List is 1-based, so subtract 1 for true count. */
+};
+
+struct e1000_vf_stats {
+	u64 base_gprc;
+	u64 base_gptc;
+	u64 base_gorc;
+	u64 base_gotc;
+	u64 base_mprc;
+	u64 base_gotlbc;
+	u64 base_gptlbc;
+	u64 base_gorlbc;
+	u64 base_gprlbc;
+
+	u32 last_gprc;
+	u32 last_gptc;
+	u32 last_gorc;
+	u32 last_gotc;
+	u32 last_mprc;
+	u32 last_gotlbc;
+	u32 last_gptlbc;
+	u32 last_gorlbc;
+	u32 last_gprlbc;
+
+	u64 gprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 mprc;
+	u64 gotlbc;
+	u64 gptlbc;
+	u64 gorlbc;
+	u64 gprlbc;
+};
+
+#include "mbx.h"
+
+struct e1000_mac_operations {
+	/* Function pointers for the MAC. */
+	s32  (*init_params)(struct e1000_hw *);
+	s32  (*check_for_link)(struct e1000_hw *);
+	void (*clear_vfta)(struct e1000_hw *);
+	s32  (*get_bus_info)(struct e1000_hw *);
+	s32  (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *);
+	void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32, u32, u32);
+	s32  (*set_uc_addr)(struct e1000_hw *, u32, u8 *);
+	s32  (*reset_hw)(struct e1000_hw *);
+	s32  (*init_hw)(struct e1000_hw *);
+	s32  (*setup_link)(struct e1000_hw *);
+	void (*write_vfta)(struct e1000_hw *, u32, u32);
+	void (*mta_set)(struct e1000_hw *, u32);
+	void (*rar_set)(struct e1000_hw *, u8*, u32);
+	s32  (*read_mac_addr)(struct e1000_hw *);
+	s32  (*set_vfta)(struct e1000_hw *, u16, bool);
+};
+
+struct e1000_mac_info {
+	struct e1000_mac_operations ops;
+	u8 addr[6];
+	u8 perm_addr[6];
+
+	enum e1000_mac_type type;
+
+	u16 mta_reg_count;
+	u16 rar_entry_count;
+
+	bool get_link_status;
+};
+
+struct e1000_mbx_operations {
+	s32 (*init_params)(struct e1000_hw *hw);
+	s32 (*read)(struct e1000_hw *, u32 *, u16);
+	s32 (*write)(struct e1000_hw *, u32 *, u16);
+	s32 (*read_posted)(struct e1000_hw *, u32 *, u16);
+	s32 (*write_posted)(struct e1000_hw *, u32 *, u16);
+	s32 (*check_for_msg)(struct e1000_hw *);
+	s32 (*check_for_ack)(struct e1000_hw *);
+	s32 (*check_for_rst)(struct e1000_hw *);
+};
+
+struct e1000_mbx_stats {
+	u32 msgs_tx;
+	u32 msgs_rx;
+
+	u32 acks;
+	u32 reqs;
+	u32 rsts;
+};
+
+struct e1000_mbx_info {
+	struct e1000_mbx_operations ops;
+	struct e1000_mbx_stats stats;
+	u32 timeout;
+	u32 usec_delay;
+	u16 size;
+};
+
+struct e1000_dev_spec_vf {
+	u32 vf_number;
+	u32 v2p_mailbox;
+};
+
+struct e1000_hw {
+	void *back;
+
+	u8 __iomem *hw_addr;
+	u8 __iomem *flash_address;
+	unsigned long io_base;
+
+	struct e1000_mac_info  mac;
+	struct e1000_mbx_info mbx;
+	spinlock_t mbx_lock;		/* serializes mailbox ops */
+
+	union {
+		struct e1000_dev_spec_vf vf;
+	} dev_spec;
+
+	u16 device_id;
+	u16 subsystem_vendor_id;
+	u16 subsystem_device_id;
+	u16 vendor_id;
+
+	u8  revision_id;
+};
+
+/* These functions must be implemented by drivers */
+void e1000_rlpml_set_vf(struct e1000_hw *, u16);
+void e1000_init_function_pointers_vf(struct e1000_hw *hw);
+
+#endif /* _E1000_VF_H_ */
diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
new file mode 100644
index 0000000000..95d1e8c490
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c)  2018 Intel Corporation
+
+#
+# Intel(R) I225-LM/I225-V 2.5G Ethernet Controller
+#
+
+obj-$(CONFIG_IGC) += igc.o
+
+igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
+igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o igc_xdp.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
new file mode 100644
index 0000000000..85cc163965
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -0,0 +1,715 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_H_
+#define _IGC_H_
+
+#include <linux/kobject.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/ethtool.h>
+#include <linux/sctp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/bitfield.h>
+#include <linux/hrtimer.h>
+#include <net/xdp.h>
+
+#include "igc_hw.h"
+
+void igc_ethtool_set_ops(struct net_device *);
+
+/* Transmit and receive queues */
+#define IGC_MAX_RX_QUEUES		4
+#define IGC_MAX_TX_QUEUES		4
+
+#define MAX_Q_VECTORS			8
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
+
+#define MAX_ETYPE_FILTER		8
+#define IGC_RETA_SIZE			128
+
+/* SDP support */
+#define IGC_N_EXTTS	2
+#define IGC_N_PEROUT	2
+#define IGC_N_SDP	4
+
+#define MAX_FLEX_FILTER			32
+
+#define IGC_MAX_TX_TSTAMP_REGS		4
+
+enum igc_mac_filter_type {
+	IGC_MAC_FILTER_TYPE_DST = 0,
+	IGC_MAC_FILTER_TYPE_SRC
+};
+
+struct igc_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 restart_queue;
+	u64 restart_queue2;
+};
+
+struct igc_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 drops;
+	u64 csum_err;
+	u64 alloc_failed;
+};
+
+struct igc_rx_packet_stats {
+	u64 ipv4_packets;      /* IPv4 headers processed */
+	u64 ipv4e_packets;     /* IPv4E headers with extensions processed */
+	u64 ipv6_packets;      /* IPv6 headers processed */
+	u64 ipv6e_packets;     /* IPv6E headers with extensions processed */
+	u64 tcp_packets;       /* TCP headers processed */
+	u64 udp_packets;       /* UDP headers processed */
+	u64 sctp_packets;      /* SCTP headers processed */
+	u64 nfs_packets;       /* NFS headers processe */
+	u64 other_packets;
+};
+
+struct igc_tx_timestamp_request {
+	struct sk_buff *skb;   /* reference to the packet being timestamped */
+	unsigned long start;   /* when the tstamp request started (jiffies) */
+	u32 mask;              /* _TSYNCTXCTL_TXTT_{X} bit for this request */
+	u32 regl;              /* which TXSTMPL_{X} register should be used */
+	u32 regh;              /* which TXSTMPH_{X} register should be used */
+	u32 flags;             /* flags that should be added to the tx_buffer */
+};
+
+struct igc_ring_container {
+	struct igc_ring *ring;          /* pointer to linked list of rings */
+	unsigned int total_bytes;       /* total bytes processed this int */
+	unsigned int total_packets;     /* total packets processed this int */
+	u16 work_limit;                 /* total work allowed per interrupt */
+	u8 count;                       /* total number of rings in vector */
+	u8 itr;                         /* current ITR setting for ring */
+};
+
+struct igc_ring {
+	struct igc_q_vector *q_vector;  /* backlink to q_vector */
+	struct net_device *netdev;      /* back pointer to net_device */
+	struct device *dev;             /* device for dma mapping */
+	union {                         /* array of buffer info structs */
+		struct igc_tx_buffer *tx_buffer_info;
+		struct igc_rx_buffer *rx_buffer_info;
+	};
+	void *desc;                     /* descriptor ring memory */
+	unsigned long flags;            /* ring specific flags */
+	void __iomem *tail;             /* pointer to ring tail register */
+	dma_addr_t dma;                 /* phys address of the ring */
+	unsigned int size;              /* length of desc. ring in bytes */
+
+	u16 count;                      /* number of desc. in the ring */
+	u8 queue_index;                 /* logical index of the ring*/
+	u8 reg_idx;                     /* physical index of the ring */
+	bool launchtime_enable;         /* true if LaunchTime is enabled */
+	ktime_t last_tx_cycle;          /* end of the cycle with a launchtime transmission */
+	ktime_t last_ff_cycle;          /* Last cycle with an active first flag */
+
+	u32 start_time;
+	u32 end_time;
+	u32 max_sdu;
+	bool oper_gate_closed;		/* Operating gate. True if the TX Queue is closed */
+	bool admin_gate_closed;		/* Future gate. True if the TX Queue will be closed */
+
+	/* CBS parameters */
+	bool cbs_enable;                /* indicates if CBS is enabled */
+	s32 idleslope;                  /* idleSlope in kbps */
+	s32 sendslope;                  /* sendSlope in kbps */
+	s32 hicredit;                   /* hiCredit in bytes */
+	s32 locredit;                   /* loCredit in bytes */
+
+	/* everything past this point are written often */
+	u16 next_to_clean;
+	u16 next_to_use;
+	u16 next_to_alloc;
+
+	union {
+		/* TX */
+		struct {
+			struct igc_tx_queue_stats tx_stats;
+			struct u64_stats_sync tx_syncp;
+			struct u64_stats_sync tx_syncp2;
+		};
+		/* RX */
+		struct {
+			struct igc_rx_queue_stats rx_stats;
+			struct igc_rx_packet_stats pkt_stats;
+			struct u64_stats_sync rx_syncp;
+			struct sk_buff *skb;
+		};
+	};
+
+	struct xdp_rxq_info xdp_rxq;
+	struct xsk_buff_pool *xsk_pool;
+} ____cacheline_internodealigned_in_smp;
+
+/* Board specific private data structure */
+struct igc_adapter {
+	struct net_device *netdev;
+
+	struct ethtool_eee eee;
+	u16 eee_advert;
+
+	unsigned long state;
+	unsigned int flags;
+	unsigned int num_q_vectors;
+
+	struct msix_entry *msix_entries;
+
+	/* TX */
+	u16 tx_work_limit;
+	u32 tx_timeout_count;
+	int num_tx_queues;
+	struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES];
+
+	/* RX */
+	int num_rx_queues;
+	struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES];
+
+	struct timer_list watchdog_timer;
+	struct timer_list dma_err_timer;
+	struct timer_list phy_info_timer;
+	struct hrtimer hrtimer;
+
+	u32 wol;
+	u32 en_mng_pt;
+	u16 link_speed;
+	u16 link_duplex;
+
+	u8 port_num;
+
+	u8 __iomem *io_addr;
+	/* Interrupt Throttle Rate */
+	u32 rx_itr_setting;
+	u32 tx_itr_setting;
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+	struct work_struct dma_err_task;
+	bool fc_autoneg;
+
+	u8 tx_timeout_factor;
+
+	int msg_enable;
+	u32 max_frame_size;
+	u32 min_frame_size;
+
+	int tc_setup_type;
+	ktime_t base_time;
+	ktime_t cycle_time;
+	bool taprio_offload_enable;
+	u32 qbv_config_change_errors;
+	bool qbv_transition;
+	unsigned int qbv_count;
+	/* Access to oper_gate_closed, admin_gate_closed and qbv_transition
+	 * are protected by the qbv_tx_lock.
+	 */
+	spinlock_t qbv_tx_lock;
+
+	/* OS defined structs */
+	struct pci_dev *pdev;
+	/* lock for statistics */
+	spinlock_t stats64_lock;
+	struct rtnl_link_stats64 stats64;
+
+	/* structs defined in igc_hw.h */
+	struct igc_hw hw;
+	struct igc_hw_stats stats;
+
+	struct igc_q_vector *q_vector[MAX_Q_VECTORS];
+	u32 eims_enable_mask;
+	u32 eims_other;
+
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_skipped;
+	u32 rx_hwtstamp_cleared;
+
+	u32 rss_queues;
+	u32 rss_indir_tbl_init;
+
+	/* Any access to elements in nfc_rule_list is protected by the
+	 * nfc_rule_lock.
+	 */
+	struct mutex nfc_rule_lock;
+	struct list_head nfc_rule_list;
+	unsigned int nfc_rule_count;
+
+	u8 rss_indir_tbl[IGC_RETA_SIZE];
+
+	unsigned long link_check_timeout;
+	struct igc_info ei;
+
+	u32 test_icr;
+
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	/* Access to ptp_tx_skb and ptp_tx_start are protected by the
+	 * ptp_tx_lock.
+	 */
+	spinlock_t ptp_tx_lock;
+	struct igc_tx_timestamp_request tx_tstamp[IGC_MAX_TX_TSTAMP_REGS];
+	struct hwtstamp_config tstamp_config;
+	unsigned int ptp_flags;
+	/* System time value lock */
+	spinlock_t tmreg_lock;
+	struct cyclecounter cc;
+	struct timecounter tc;
+	struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */
+	ktime_t ptp_reset_start; /* Reset time in clock mono */
+	struct system_time_snapshot snapshot;
+
+	char fw_version[32];
+
+	struct bpf_prog *xdp_prog;
+
+	bool pps_sys_wrap_on;
+
+	struct ptp_pin_desc sdp_config[IGC_N_SDP];
+	struct {
+		struct timespec64 start;
+		struct timespec64 period;
+	} perout[IGC_N_PEROUT];
+};
+
+void igc_up(struct igc_adapter *adapter);
+void igc_down(struct igc_adapter *adapter);
+int igc_open(struct net_device *netdev);
+int igc_close(struct net_device *netdev);
+int igc_setup_tx_resources(struct igc_ring *ring);
+int igc_setup_rx_resources(struct igc_ring *ring);
+void igc_free_tx_resources(struct igc_ring *ring);
+void igc_free_rx_resources(struct igc_ring *ring);
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter);
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+			      const u32 max_rss_queues);
+int igc_reinit_queues(struct igc_adapter *adapter);
+void igc_write_rss_indir_tbl(struct igc_adapter *adapter);
+bool igc_has_link(struct igc_adapter *adapter);
+void igc_reset(struct igc_adapter *adapter);
+void igc_update_stats(struct igc_adapter *adapter);
+void igc_disable_rx_ring(struct igc_ring *ring);
+void igc_enable_rx_ring(struct igc_ring *ring);
+void igc_disable_tx_ring(struct igc_ring *ring);
+void igc_enable_tx_ring(struct igc_ring *ring);
+int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+
+/* igc_dump declarations */
+void igc_rings_dump(struct igc_adapter *adapter);
+void igc_regs_dump(struct igc_adapter *adapter);
+
+extern char igc_driver_name[];
+
+#define IGC_REGS_LEN			740
+
+/* flags controlling PTP/1588 function */
+#define IGC_PTP_ENABLED		BIT(0)
+
+/* Flags definitions */
+#define IGC_FLAG_HAS_MSI		BIT(0)
+#define IGC_FLAG_QUEUE_PAIRS		BIT(3)
+#define IGC_FLAG_DMAC			BIT(4)
+#define IGC_FLAG_PTP			BIT(8)
+#define IGC_FLAG_WOL_SUPPORTED		BIT(8)
+#define IGC_FLAG_NEED_LINK_UPDATE	BIT(9)
+#define IGC_FLAG_HAS_MSIX		BIT(13)
+#define IGC_FLAG_EEE			BIT(14)
+#define IGC_FLAG_VLAN_PROMISC		BIT(15)
+#define IGC_FLAG_RX_LEGACY		BIT(16)
+#define IGC_FLAG_TSN_QBV_ENABLED	BIT(17)
+#define IGC_FLAG_TSN_QAV_ENABLED	BIT(18)
+
+#define IGC_FLAG_TSN_ANY_ENABLED \
+	(IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED)
+
+#define IGC_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
+#define IGC_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
+
+#define IGC_MRQC_ENABLE_RSS_MQ		0x00000002
+#define IGC_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
+#define IGC_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
+
+/* RX-desc Write-Back format RSS Type's */
+enum igc_rss_type_num {
+	IGC_RSS_TYPE_NO_HASH		= 0,
+	IGC_RSS_TYPE_HASH_TCP_IPV4	= 1,
+	IGC_RSS_TYPE_HASH_IPV4		= 2,
+	IGC_RSS_TYPE_HASH_TCP_IPV6	= 3,
+	IGC_RSS_TYPE_HASH_IPV6_EX	= 4,
+	IGC_RSS_TYPE_HASH_IPV6		= 5,
+	IGC_RSS_TYPE_HASH_TCP_IPV6_EX	= 6,
+	IGC_RSS_TYPE_HASH_UDP_IPV4	= 7,
+	IGC_RSS_TYPE_HASH_UDP_IPV6	= 8,
+	IGC_RSS_TYPE_HASH_UDP_IPV6_EX	= 9,
+	IGC_RSS_TYPE_MAX		= 10,
+};
+#define IGC_RSS_TYPE_MAX_TABLE		16
+#define IGC_RSS_TYPE_MASK		GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
+
+/* igc_rss_type - Rx descriptor RSS type field */
+static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
+{
+	/* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved)
+	 * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info)
+	 * is slightly slower than via u32 (wb.lower.lo_dword.data)
+	 */
+	return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK);
+}
+
+/* Interrupt defines */
+#define IGC_START_ITR			648 /* ~6000 ints/sec */
+#define IGC_4K_ITR			980
+#define IGC_20K_ITR			196
+#define IGC_70K_ITR			56
+
+#define IGC_DEFAULT_ITR		3 /* dynamic */
+#define IGC_MAX_ITR_USECS	10000
+#define IGC_MIN_ITR_USECS	10
+#define NON_Q_VECTORS		1
+#define MAX_MSIX_ENTRIES	10
+
+/* TX/RX descriptor defines */
+#define IGC_DEFAULT_TXD		256
+#define IGC_DEFAULT_TX_WORK	128
+#define IGC_MIN_TXD		64
+#define IGC_MAX_TXD		4096
+
+#define IGC_DEFAULT_RXD		256
+#define IGC_MIN_RXD		64
+#define IGC_MAX_RXD		4096
+
+/* Supported Rx Buffer Sizes */
+#define IGC_RXBUFFER_256		256
+#define IGC_RXBUFFER_2048		2048
+#define IGC_RXBUFFER_3072		3072
+
+#define AUTO_ALL_MODES		0
+#define IGC_RX_HDR_LEN			IGC_RXBUFFER_256
+
+/* Transmit and receive latency (for PTP timestamps) */
+#define IGC_I225_TX_LATENCY_10		240
+#define IGC_I225_TX_LATENCY_100		58
+#define IGC_I225_TX_LATENCY_1000	80
+#define IGC_I225_TX_LATENCY_2500	1325
+#define IGC_I225_RX_LATENCY_10		6450
+#define IGC_I225_RX_LATENCY_100		185
+#define IGC_I225_RX_LATENCY_1000	300
+#define IGC_I225_RX_LATENCY_2500	1485
+
+/* RX and TX descriptor control thresholds.
+ * PTHRESH - MAC will consider prefetch if it has fewer than this number of
+ *           descriptors available in its onboard memory.
+ *           Setting this to 0 disables RX descriptor prefetch.
+ * HTHRESH - MAC will only prefetch if there are at least this many descriptors
+ *           available in host memory.
+ *           If PTHRESH is 0, this should also be 0.
+ * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back
+ *           descriptors until either it has this many to write back, or the
+ *           ITR timer expires.
+ */
+#define IGC_RX_PTHRESH			8
+#define IGC_RX_HTHRESH			8
+#define IGC_TX_PTHRESH			8
+#define IGC_TX_HTHRESH			1
+#define IGC_RX_WTHRESH			4
+#define IGC_TX_WTHRESH			16
+
+#define IGC_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+#define IGC_TS_HDR_LEN			16
+
+#define IGC_SKB_PAD			(NET_SKB_PAD + NET_IP_ALIGN)
+
+#if (PAGE_SIZE < 8192)
+#define IGC_MAX_FRAME_BUILD_SKB \
+	(SKB_WITH_OVERHEAD(IGC_RXBUFFER_2048) - IGC_SKB_PAD - IGC_TS_HDR_LEN)
+#else
+#define IGC_MAX_FRAME_BUILD_SKB (IGC_RXBUFFER_2048 - IGC_TS_HDR_LEN)
+#endif
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IGC_RX_BUFFER_WRITE	16 /* Must be power of 2 */
+
+/* VLAN info */
+#define IGC_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IGC_TX_FLAGS_VLAN_SHIFT	16
+
+/* igc_test_staterr - tests bits within Rx descriptor status and error fields */
+static inline __le32 igc_test_staterr(union igc_adv_rx_desc *rx_desc,
+				      const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+enum igc_state_t {
+	__IGC_TESTING,
+	__IGC_RESETTING,
+	__IGC_DOWN,
+};
+
+enum igc_tx_flags {
+	/* cmd_type flags */
+	IGC_TX_FLAGS_VLAN	= 0x01,
+	IGC_TX_FLAGS_TSO	= 0x02,
+	IGC_TX_FLAGS_TSTAMP	= 0x04,
+
+	/* olinfo flags */
+	IGC_TX_FLAGS_IPV4	= 0x10,
+	IGC_TX_FLAGS_CSUM	= 0x20,
+
+	IGC_TX_FLAGS_TSTAMP_1	= 0x100,
+	IGC_TX_FLAGS_TSTAMP_2	= 0x200,
+	IGC_TX_FLAGS_TSTAMP_3	= 0x400,
+};
+
+enum igc_boards {
+	board_base,
+};
+
+/* The largest size we can write to the descriptor is 65535.  In order to
+ * maintain a power of two alignment we have to limit ourselves to 32K.
+ */
+#define IGC_MAX_TXD_PWR		15
+#define IGC_MAX_DATA_PER_TXD	BIT(IGC_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S)	DIV_ROUND_UP((S), IGC_MAX_DATA_PER_TXD)
+#define DESC_NEEDED	(MAX_SKB_FRAGS + 4)
+
+enum igc_tx_buffer_type {
+	IGC_TX_BUFFER_TYPE_SKB,
+	IGC_TX_BUFFER_TYPE_XDP,
+	IGC_TX_BUFFER_TYPE_XSK,
+};
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct igc_tx_buffer {
+	union igc_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	enum igc_tx_buffer_type type;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
+	unsigned int bytecount;
+	u16 gso_segs;
+	__be16 protocol;
+
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct igc_rx_buffer {
+	union {
+		struct {
+			dma_addr_t dma;
+			struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+			__u32 page_offset;
+#else
+			__u16 page_offset;
+#endif
+			__u16 pagecnt_bias;
+		};
+		struct xdp_buff *xdp;
+	};
+};
+
+/* context wrapper around xdp_buff to provide access to descriptor metadata */
+struct igc_xdp_buff {
+	struct xdp_buff xdp;
+	union igc_adv_rx_desc *rx_desc;
+	ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */
+};
+
+struct igc_q_vector {
+	struct igc_adapter *adapter;    /* backlink */
+	void __iomem *itr_register;
+	u32 eims_value;                 /* EIMS mask value */
+
+	u16 itr_val;
+	u8 set_itr;
+
+	struct igc_ring_container rx, tx;
+
+	struct napi_struct napi;
+
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
+	char name[IFNAMSIZ + 9];
+	struct net_device poll_dev;
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct igc_ring ring[] ____cacheline_internodealigned_in_smp;
+};
+
+enum igc_filter_match_flags {
+	IGC_FILTER_FLAG_ETHER_TYPE =	BIT(0),
+	IGC_FILTER_FLAG_VLAN_TCI   =	BIT(1),
+	IGC_FILTER_FLAG_SRC_MAC_ADDR =	BIT(2),
+	IGC_FILTER_FLAG_DST_MAC_ADDR =	BIT(3),
+	IGC_FILTER_FLAG_USER_DATA =	BIT(4),
+	IGC_FILTER_FLAG_VLAN_ETYPE =	BIT(5),
+};
+
+struct igc_nfc_filter {
+	u8 match_flags;
+	u16 etype;
+	__be16 vlan_etype;
+	u16 vlan_tci;
+	u16 vlan_tci_mask;
+	u8 src_addr[ETH_ALEN];
+	u8 dst_addr[ETH_ALEN];
+	u8 user_data[8];
+	u8 user_mask[8];
+	u8 flex_index;
+	u8 rx_queue;
+	u8 prio;
+	u8 immediate_irq;
+	u8 drop;
+};
+
+struct igc_nfc_rule {
+	struct list_head list;
+	struct igc_nfc_filter filter;
+	u32 location;
+	u16 action;
+	bool flex;
+};
+
+/* IGC supports a total of 32 NFC rules: 16 MAC address based, 8 VLAN priority
+ * based, 8 ethertype based and 32 Flex filter based rules.
+ */
+#define IGC_MAX_RXNFC_RULES		64
+
+struct igc_flex_filter {
+	u8 index;
+	u8 data[128];
+	u8 mask[16];
+	u8 length;
+	u8 rx_queue;
+	u8 prio;
+	u8 immediate_irq;
+	u8 drop;
+};
+
+/* igc_desc_unused - calculate if we have unused descriptors */
+static inline u16 igc_desc_unused(const struct igc_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+static inline s32 igc_get_phy_info(struct igc_hw *hw)
+{
+	if (hw->phy.ops.get_phy_info)
+		return hw->phy.ops.get_phy_info(hw);
+
+	return 0;
+}
+
+static inline s32 igc_reset_phy(struct igc_hw *hw)
+{
+	if (hw->phy.ops.reset)
+		return hw->phy.ops.reset(hw);
+
+	return 0;
+}
+
+static inline struct netdev_queue *txring_txq(const struct igc_ring *tx_ring)
+{
+	return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+}
+
+enum igc_ring_flags_t {
+	IGC_RING_FLAG_RX_3K_BUFFER,
+	IGC_RING_FLAG_RX_BUILD_SKB_ENABLED,
+	IGC_RING_FLAG_RX_SCTP_CSUM,
+	IGC_RING_FLAG_RX_LB_VLAN_BSWAP,
+	IGC_RING_FLAG_TX_CTX_IDX,
+	IGC_RING_FLAG_TX_DETECT_HANG,
+	IGC_RING_FLAG_AF_XDP_ZC,
+	IGC_RING_FLAG_TX_HWTSTAMP,
+};
+
+#define ring_uses_large_buffer(ring) \
+	test_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define set_ring_uses_large_buffer(ring) \
+	set_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+#define clear_ring_uses_large_buffer(ring) \
+	clear_bit(IGC_RING_FLAG_RX_3K_BUFFER, &(ring)->flags)
+
+#define ring_uses_build_skb(ring) \
+	test_bit(IGC_RING_FLAG_RX_BUILD_SKB_ENABLED, &(ring)->flags)
+
+static inline unsigned int igc_rx_bufsz(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IGC_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IGC_MAX_FRAME_BUILD_SKB + IGC_TS_HDR_LEN;
+#endif
+	return IGC_RXBUFFER_2048;
+}
+
+static inline unsigned int igc_rx_pg_order(struct igc_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	if (hw->phy.ops.read_reg)
+		return hw->phy.ops.read_reg(hw, offset, data);
+
+	return -EOPNOTSUPP;
+}
+
+void igc_reinit_locked(struct igc_adapter *);
+struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
+				      u32 location);
+int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
+void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
+
+void igc_ptp_init(struct igc_adapter *adapter);
+void igc_ptp_reset(struct igc_adapter *adapter);
+void igc_ptp_suspend(struct igc_adapter *adapter);
+void igc_ptp_stop(struct igc_adapter *adapter);
+ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf);
+int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
+int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
+void igc_ptp_tx_hang(struct igc_adapter *adapter);
+void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts);
+void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter);
+
+#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
+
+#define IGC_TXD_DCMD	(IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
+
+#define IGC_RX_DESC(R, i)       \
+	(&(((union igc_adv_rx_desc *)((R)->desc))[i]))
+#define IGC_TX_DESC(R, i)       \
+	(&(((union igc_adv_tx_desc *)((R)->desc))[i]))
+#define IGC_TX_CTXTDESC(R, i)   \
+	(&(((struct igc_adv_tx_context_desc *)((R)->desc))[i]))
+
+#endif /* _IGC_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
new file mode 100644
index 0000000000..a1d815af50
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+#include "igc_i225.h"
+#include "igc_mac.h"
+#include "igc_base.h"
+#include "igc.h"
+
+/**
+ * igc_reset_hw_base - Reset hardware
+ * @hw: pointer to the HW structure
+ *
+ * This resets the hardware into a known state.  This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_reset_hw_base(struct igc_hw *hw)
+{
+	s32 ret_val;
+	u32 ctrl;
+
+	/* Prevent the PCI-E bus from sticking if there is no TLP connection
+	 * on the last TLP read/write transaction when MAC is reset.
+	 */
+	ret_val = igc_disable_pcie_master(hw);
+	if (ret_val)
+		hw_dbg("PCI-E Master disable polling has failed\n");
+
+	hw_dbg("Masking off all interrupts\n");
+	wr32(IGC_IMC, 0xffffffff);
+
+	wr32(IGC_RCTL, 0);
+	wr32(IGC_TCTL, IGC_TCTL_PSP);
+	wrfl();
+
+	usleep_range(10000, 20000);
+
+	ctrl = rd32(IGC_CTRL);
+
+	hw_dbg("Issuing a global reset to MAC\n");
+	wr32(IGC_CTRL, ctrl | IGC_CTRL_RST);
+
+	ret_val = igc_get_auto_rd_done(hw);
+	if (ret_val) {
+		/* When auto config read does not complete, do not
+		 * return with an error. This can happen in situations
+		 * where there is no eeprom and prevents getting link.
+		 */
+		hw_dbg("Auto Read Done did not complete\n");
+	}
+
+	/* Clear any pending interrupt events. */
+	wr32(IGC_IMC, 0xffffffff);
+	rd32(IGC_ICR);
+
+	return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_base - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_nvm_params_base(struct igc_hw *hw)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 eecd = rd32(IGC_EECD);
+	u16 size;
+
+	size = (u16)((eecd & IGC_EECD_SIZE_EX_MASK) >>
+		     IGC_EECD_SIZE_EX_SHIFT);
+
+	/* Added to a constant, "size" becomes the left-shift value
+	 * for setting word_size.
+	 */
+	size += NVM_WORD_SIZE_BASE_SHIFT;
+
+	/* Just in case size is out of range, cap it to the largest
+	 * EEPROM size supported
+	 */
+	if (size > 15)
+		size = 15;
+
+	nvm->type = igc_nvm_eeprom_spi;
+	nvm->word_size = BIT(size);
+	nvm->opcode_bits = 8;
+	nvm->delay_usec = 1;
+
+	nvm->page_size = eecd & IGC_EECD_ADDR_BITS ? 32 : 8;
+	nvm->address_bits = eecd & IGC_EECD_ADDR_BITS ?
+			    16 : 8;
+
+	if (nvm->word_size == BIT(15))
+		nvm->page_size = 128;
+
+	return 0;
+}
+
+/**
+ * igc_setup_copper_link_base - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Configures the link for auto-neg or forced speed and duplex.  Then we check
+ * for link, once link is established calls to configure collision distance
+ * and flow control are called.
+ */
+static s32 igc_setup_copper_link_base(struct igc_hw *hw)
+{
+	s32  ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl |= IGC_CTRL_SLU;
+	ctrl &= ~(IGC_CTRL_FRCSPD | IGC_CTRL_FRCDPX);
+	wr32(IGC_CTRL, ctrl);
+
+	ret_val = igc_setup_copper_link(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_init_mac_params_base - Init MAC func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_mac_params_base(struct igc_hw *hw)
+{
+	struct igc_dev_spec_base *dev_spec = &hw->dev_spec._base;
+	struct igc_mac_info *mac = &hw->mac;
+
+	/* Set mta register count */
+	mac->mta_reg_count = 128;
+	mac->rar_entry_count = IGC_RAR_ENTRIES;
+
+	/* reset */
+	mac->ops.reset_hw = igc_reset_hw_base;
+
+	mac->ops.acquire_swfw_sync = igc_acquire_swfw_sync_i225;
+	mac->ops.release_swfw_sync = igc_release_swfw_sync_i225;
+
+	/* Allow a single clear of the SW semaphore on I225 */
+	if (mac->type == igc_i225)
+		dev_spec->clear_semaphore_once = true;
+
+	/* physical interface link setup */
+	mac->ops.setup_physical_interface = igc_setup_copper_link_base;
+
+	return 0;
+}
+
+/**
+ * igc_init_phy_params_base - Init PHY func ptrs.
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_init_phy_params_base(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+
+	phy->autoneg_mask	= AUTONEG_ADVERTISE_SPEED_DEFAULT_2500;
+	phy->reset_delay_us	= 100;
+
+	/* set lan id */
+	hw->bus.func = (rd32(IGC_STATUS) & IGC_STATUS_FUNC_MASK) >>
+			IGC_STATUS_FUNC_SHIFT;
+
+	/* Make sure the PHY is in a good state. Several people have reported
+	 * firmware leaving the PHY's page select register set to something
+	 * other than the default of zero, which causes the PHY ID read to
+	 * access something other than the intended register.
+	 */
+	ret_val = hw->phy.ops.reset(hw);
+	if (ret_val) {
+		hw_dbg("Error resetting the PHY\n");
+		goto out;
+	}
+
+	ret_val = igc_get_phy_id(hw);
+	if (ret_val)
+		return ret_val;
+
+	igc_check_for_copper_link(hw);
+
+out:
+	return ret_val;
+}
+
+static s32 igc_get_invariants_base(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	s32 ret_val = 0;
+
+	switch (hw->device_id) {
+	case IGC_DEV_ID_I225_LM:
+	case IGC_DEV_ID_I225_V:
+	case IGC_DEV_ID_I225_I:
+	case IGC_DEV_ID_I220_V:
+	case IGC_DEV_ID_I225_K:
+	case IGC_DEV_ID_I225_K2:
+	case IGC_DEV_ID_I226_K:
+	case IGC_DEV_ID_I225_LMVP:
+	case IGC_DEV_ID_I226_LMVP:
+	case IGC_DEV_ID_I225_IT:
+	case IGC_DEV_ID_I226_LM:
+	case IGC_DEV_ID_I226_V:
+	case IGC_DEV_ID_I226_IT:
+	case IGC_DEV_ID_I221_V:
+	case IGC_DEV_ID_I226_BLANK_NVM:
+	case IGC_DEV_ID_I225_BLANK_NVM:
+		mac->type = igc_i225;
+		break;
+	default:
+		return -IGC_ERR_MAC_INIT;
+	}
+
+	hw->phy.media_type = igc_media_type_copper;
+
+	/* mac initialization and operations */
+	ret_val = igc_init_mac_params_base(hw);
+	if (ret_val)
+		goto out;
+
+	/* NVM initialization */
+	ret_val = igc_init_nvm_params_base(hw);
+	switch (hw->mac.type) {
+	case igc_i225:
+		ret_val = igc_init_nvm_params_i225(hw);
+		break;
+	default:
+		break;
+	}
+
+	/* setup PHY parameters */
+	ret_val = igc_init_phy_params_base(hw);
+	if (ret_val)
+		goto out;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_acquire_phy_base - Acquire rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * Acquire access rights to the correct PHY.  This is a
+ * function pointer entry point called by the api module.
+ */
+static s32 igc_acquire_phy_base(struct igc_hw *hw)
+{
+	u16 mask = IGC_SWFW_PHY0_SM;
+
+	return hw->mac.ops.acquire_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_release_phy_base - Release rights to access PHY
+ * @hw: pointer to the HW structure
+ *
+ * A wrapper to release access rights to the correct PHY.  This is a
+ * function pointer entry point called by the api module.
+ */
+static void igc_release_phy_base(struct igc_hw *hw)
+{
+	u16 mask = IGC_SWFW_PHY0_SM;
+
+	hw->mac.ops.release_swfw_sync(hw, mask);
+}
+
+/**
+ * igc_init_hw_base - Initialize hardware
+ * @hw: pointer to the HW structure
+ *
+ * This inits the hardware readying it for operation.
+ */
+static s32 igc_init_hw_base(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	u16 i, rar_count = mac->rar_entry_count;
+	s32 ret_val = 0;
+
+	/* Setup the receive address */
+	igc_init_rx_addrs(hw, rar_count);
+
+	/* Zero out the Multicast HASH table */
+	hw_dbg("Zeroing the MTA\n");
+	for (i = 0; i < mac->mta_reg_count; i++)
+		array_wr32(IGC_MTA, i, 0);
+
+	/* Zero out the Unicast HASH table */
+	hw_dbg("Zeroing the UTA\n");
+	for (i = 0; i < mac->uta_reg_count; i++)
+		array_wr32(IGC_UTA, i, 0);
+
+	/* Setup link and flow control */
+	ret_val = igc_setup_link(hw);
+
+	/* Clear all of the statistics registers (clear on read).  It is
+	 * important that we do this after we have tried to establish link
+	 * because the symbol error count will increment wildly if there
+	 * is no link.
+	 */
+	igc_clear_hw_cntrs_base(hw);
+
+	return ret_val;
+}
+
+/**
+ * igc_power_down_phy_copper_base - Remove link during PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, or wake on lan is not enabled, remove the link.
+ */
+void igc_power_down_phy_copper_base(struct igc_hw *hw)
+{
+	/* If the management interface is not enabled, then power down */
+	if (!(igc_enable_mng_pass_thru(hw) || igc_check_reset_block(hw)))
+		igc_power_down_phy_copper(hw);
+}
+
+/**
+ * igc_rx_fifo_flush_base - Clean rx fifo after Rx enable
+ * @hw: pointer to the HW structure
+ *
+ * After Rx enable, if manageability is enabled then there is likely some
+ * bad data at the start of the fifo and possibly in the DMA fifo.  This
+ * function clears the fifos and flushes any packets that came in as rx was
+ * being enabled.
+ */
+void igc_rx_fifo_flush_base(struct igc_hw *hw)
+{
+	u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled;
+	int i, ms_wait;
+
+	/* disable IPv6 options as per hardware errata */
+	rfctl = rd32(IGC_RFCTL);
+	rfctl |= IGC_RFCTL_IPV6_EX_DIS;
+	wr32(IGC_RFCTL, rfctl);
+
+	if (!(rd32(IGC_MANC) & IGC_MANC_RCV_TCO_EN))
+		return;
+
+	/* Disable all Rx queues */
+	for (i = 0; i < 4; i++) {
+		rxdctl[i] = rd32(IGC_RXDCTL(i));
+		wr32(IGC_RXDCTL(i),
+		     rxdctl[i] & ~IGC_RXDCTL_QUEUE_ENABLE);
+	}
+	/* Poll all queues to verify they have shut down */
+	for (ms_wait = 0; ms_wait < 10; ms_wait++) {
+		usleep_range(1000, 2000);
+		rx_enabled = 0;
+		for (i = 0; i < 4; i++)
+			rx_enabled |= rd32(IGC_RXDCTL(i));
+		if (!(rx_enabled & IGC_RXDCTL_QUEUE_ENABLE))
+			break;
+	}
+
+	if (ms_wait == 10)
+		hw_dbg("Queue disable timed out after 10ms\n");
+
+	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
+	 * incoming packets are rejected.  Set enable and wait 2ms so that
+	 * any packet that was coming in as RCTL.EN was set is flushed
+	 */
+	wr32(IGC_RFCTL, rfctl & ~IGC_RFCTL_LEF);
+
+	rlpml = rd32(IGC_RLPML);
+	wr32(IGC_RLPML, 0);
+
+	rctl = rd32(IGC_RCTL);
+	temp_rctl = rctl & ~(IGC_RCTL_EN | IGC_RCTL_SBP);
+	temp_rctl |= IGC_RCTL_LPE;
+
+	wr32(IGC_RCTL, temp_rctl);
+	wr32(IGC_RCTL, temp_rctl | IGC_RCTL_EN);
+	wrfl();
+	usleep_range(2000, 3000);
+
+	/* Enable Rx queues that were previously enabled and restore our
+	 * previous state
+	 */
+	for (i = 0; i < 4; i++)
+		wr32(IGC_RXDCTL(i), rxdctl[i]);
+	wr32(IGC_RCTL, rctl);
+	wrfl();
+
+	wr32(IGC_RLPML, rlpml);
+	wr32(IGC_RFCTL, rfctl);
+
+	/* Flush receive errors generated by workaround */
+	rd32(IGC_ROC);
+	rd32(IGC_RNBC);
+	rd32(IGC_MPC);
+}
+
+bool igc_is_device_id_i225(struct igc_hw *hw)
+{
+	switch (hw->device_id) {
+	case IGC_DEV_ID_I225_LM:
+	case IGC_DEV_ID_I225_V:
+	case IGC_DEV_ID_I225_I:
+	case IGC_DEV_ID_I225_K:
+	case IGC_DEV_ID_I225_K2:
+	case IGC_DEV_ID_I225_LMVP:
+	case IGC_DEV_ID_I225_IT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool igc_is_device_id_i226(struct igc_hw *hw)
+{
+	switch (hw->device_id) {
+	case IGC_DEV_ID_I226_LM:
+	case IGC_DEV_ID_I226_V:
+	case IGC_DEV_ID_I226_K:
+	case IGC_DEV_ID_I226_IT:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static struct igc_mac_operations igc_mac_ops_base = {
+	.init_hw		= igc_init_hw_base,
+	.check_for_link		= igc_check_for_copper_link,
+	.rar_set		= igc_rar_set,
+	.read_mac_addr		= igc_read_mac_addr,
+	.get_speed_and_duplex	= igc_get_speed_and_duplex_copper,
+};
+
+static const struct igc_phy_operations igc_phy_ops_base = {
+	.acquire		= igc_acquire_phy_base,
+	.release		= igc_release_phy_base,
+	.reset			= igc_phy_hw_reset,
+	.read_reg		= igc_read_phy_reg_gpy,
+	.write_reg		= igc_write_phy_reg_gpy,
+};
+
+const struct igc_info igc_base_info = {
+	.get_invariants		= igc_get_invariants_base,
+	.mac_ops		= &igc_mac_ops_base,
+	.phy_ops		= &igc_phy_ops_base,
+};
diff --git a/drivers/net/ethernet/intel/igc/igc_base.h b/drivers/net/ethernet/intel/igc/igc_base.h
new file mode 100644
index 0000000000..f7d6491d4c
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_base.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_BASE_H_
+#define _IGC_BASE_H_
+
+/* forward declaration */
+void igc_rx_fifo_flush_base(struct igc_hw *hw);
+void igc_power_down_phy_copper_base(struct igc_hw *hw);
+bool igc_is_device_id_i225(struct igc_hw *hw);
+bool igc_is_device_id_i226(struct igc_hw *hw);
+
+/* Transmit Descriptor - Advanced */
+union igc_adv_tx_desc {
+	struct {
+		__le64 buffer_addr;    /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd;       /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Context descriptors */
+struct igc_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 launch_time;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IGC_ADVTXD_MAC_TSTAMP	0x00080000 /* IEEE1588 Timestamp packet */
+#define IGC_ADVTXD_TSTAMP_REG_1	0x00010000 /* Select register 1 for timestamp */
+#define IGC_ADVTXD_TSTAMP_REG_2	0x00020000 /* Select register 2 for timestamp */
+#define IGC_ADVTXD_TSTAMP_REG_3	0x00030000 /* Select register 3 for timestamp */
+#define IGC_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Descriptor */
+#define IGC_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
+#define IGC_ADVTXD_DCMD_EOP	0x01000000 /* End of Packet */
+#define IGC_ADVTXD_DCMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IGC_ADVTXD_DCMD_RS	0x08000000 /* Report Status */
+#define IGC_ADVTXD_DCMD_DEXT	0x20000000 /* Descriptor extension (1=Adv) */
+#define IGC_ADVTXD_DCMD_VLE	0x40000000 /* VLAN pkt enable */
+#define IGC_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
+#define IGC_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+
+#define IGC_RAR_ENTRIES		16
+
+/* Receive Descriptor - Advanced */
+union igc_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /*RSS type, Pkt type*/
+					/* Split Header, header buffer len */
+					__le16 hdr_info;
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+/* Additional Transmit Descriptor Control definitions */
+#define IGC_TXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Tx Queue */
+#define IGC_TXDCTL_SWFLUSH	0x04000000 /* Transmit Software Flush */
+
+/* Additional Receive Descriptor Control definitions */
+#define IGC_RXDCTL_QUEUE_ENABLE	0x02000000 /* Ena specific Rx Queue */
+#define IGC_RXDCTL_SWFLUSH		0x04000000 /* Receive Software Flush */
+
+/* SRRCTL bit definitions */
+#define IGC_SRRCTL_BSIZEPKT_MASK	GENMASK(6, 0)
+#define IGC_SRRCTL_BSIZEPKT(x)		FIELD_PREP(IGC_SRRCTL_BSIZEPKT_MASK, \
+					(x) / 1024) /* in 1 KB resolution */
+#define IGC_SRRCTL_BSIZEHDR_MASK	GENMASK(13, 8)
+#define IGC_SRRCTL_BSIZEHDR(x)		FIELD_PREP(IGC_SRRCTL_BSIZEHDR_MASK, \
+					(x) / 64) /* in 64 bytes resolution */
+#define IGC_SRRCTL_DESCTYPE_MASK	GENMASK(27, 25)
+#define IGC_SRRCTL_DESCTYPE_ADV_ONEBUF	FIELD_PREP(IGC_SRRCTL_DESCTYPE_MASK, 1)
+
+#endif /* _IGC_BASE_H */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
new file mode 100644
index 0000000000..b3037016f3
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -0,0 +1,687 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_DEFINES_H_
+#define _IGC_DEFINES_H_
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define REQ_RX_DESCRIPTOR_MULTIPLE	8
+
+#define IGC_CTRL_EXT_SDP2_DIR	0x00000400 /* SDP2 Data direction */
+#define IGC_CTRL_EXT_SDP3_DIR	0x00000800 /* SDP3 Data direction */
+#define IGC_CTRL_EXT_DRV_LOAD	0x10000000 /* Drv loaded bit for FW */
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define IGC_WUC_PME_EN	0x00000002 /* PME Enable */
+
+/* Wake Up Filter Control */
+#define IGC_WUFC_LNKC		0x00000001 /* Link Status Change Wakeup Enable */
+#define IGC_WUFC_MAG		0x00000002 /* Magic Packet Wakeup Enable */
+#define IGC_WUFC_EX		0x00000004 /* Directed Exact Wakeup Enable */
+#define IGC_WUFC_MC		0x00000008 /* Directed Multicast Wakeup Enable */
+#define IGC_WUFC_BC		0x00000010 /* Broadcast Wakeup Enable */
+#define IGC_WUFC_FLEX_HQ	BIT(14)	   /* Flex Filters Host Queuing */
+#define IGC_WUFC_FLX0		BIT(16)	   /* Flexible Filter 0 Enable */
+#define IGC_WUFC_FLX1		BIT(17)	   /* Flexible Filter 1 Enable */
+#define IGC_WUFC_FLX2		BIT(18)	   /* Flexible Filter 2 Enable */
+#define IGC_WUFC_FLX3		BIT(19)	   /* Flexible Filter 3 Enable */
+#define IGC_WUFC_FLX4		BIT(20)	   /* Flexible Filter 4 Enable */
+#define IGC_WUFC_FLX5		BIT(21)	   /* Flexible Filter 5 Enable */
+#define IGC_WUFC_FLX6		BIT(22)	   /* Flexible Filter 6 Enable */
+#define IGC_WUFC_FLX7		BIT(23)	   /* Flexible Filter 7 Enable */
+
+#define IGC_WUFC_FILTER_MASK GENMASK(23, 14)
+
+#define IGC_CTRL_ADVD3WUC	0x00100000  /* D3 WUC */
+
+/* Wake Up Status */
+#define IGC_WUS_EX	0x00000004 /* Directed Exact */
+#define IGC_WUS_ARPD	0x00000020 /* Directed ARP Request */
+#define IGC_WUS_IPV4	0x00000040 /* Directed IPv4 */
+#define IGC_WUS_IPV6	0x00000080 /* Directed IPv6 */
+#define IGC_WUS_NSD	0x00000400 /* Directed IPv6 Neighbor Solicitation */
+
+/* Packet types that are enabled for wake packet delivery */
+#define WAKE_PKT_WUS ( \
+	IGC_WUS_EX   | \
+	IGC_WUS_ARPD | \
+	IGC_WUS_IPV4 | \
+	IGC_WUS_IPV6 | \
+	IGC_WUS_NSD)
+
+/* Wake Up Packet Length */
+#define IGC_WUPL_MASK	0x00000FFF
+
+/* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */
+#define IGC_WUPM_BYTES	128
+
+/* Wakeup Filter Control Extended */
+#define IGC_WUFC_EXT_FLX8	BIT(8)	/* Flexible Filter 8 Enable */
+#define IGC_WUFC_EXT_FLX9	BIT(9)	/* Flexible Filter 9 Enable */
+#define IGC_WUFC_EXT_FLX10	BIT(10)	/* Flexible Filter 10 Enable */
+#define IGC_WUFC_EXT_FLX11	BIT(11)	/* Flexible Filter 11 Enable */
+#define IGC_WUFC_EXT_FLX12	BIT(12)	/* Flexible Filter 12 Enable */
+#define IGC_WUFC_EXT_FLX13	BIT(13)	/* Flexible Filter 13 Enable */
+#define IGC_WUFC_EXT_FLX14	BIT(14)	/* Flexible Filter 14 Enable */
+#define IGC_WUFC_EXT_FLX15	BIT(15)	/* Flexible Filter 15 Enable */
+#define IGC_WUFC_EXT_FLX16	BIT(16)	/* Flexible Filter 16 Enable */
+#define IGC_WUFC_EXT_FLX17	BIT(17)	/* Flexible Filter 17 Enable */
+#define IGC_WUFC_EXT_FLX18	BIT(18)	/* Flexible Filter 18 Enable */
+#define IGC_WUFC_EXT_FLX19	BIT(19)	/* Flexible Filter 19 Enable */
+#define IGC_WUFC_EXT_FLX20	BIT(20)	/* Flexible Filter 20 Enable */
+#define IGC_WUFC_EXT_FLX21	BIT(21)	/* Flexible Filter 21 Enable */
+#define IGC_WUFC_EXT_FLX22	BIT(22)	/* Flexible Filter 22 Enable */
+#define IGC_WUFC_EXT_FLX23	BIT(23)	/* Flexible Filter 23 Enable */
+#define IGC_WUFC_EXT_FLX24	BIT(24)	/* Flexible Filter 24 Enable */
+#define IGC_WUFC_EXT_FLX25	BIT(25)	/* Flexible Filter 25 Enable */
+#define IGC_WUFC_EXT_FLX26	BIT(26)	/* Flexible Filter 26 Enable */
+#define IGC_WUFC_EXT_FLX27	BIT(27)	/* Flexible Filter 27 Enable */
+#define IGC_WUFC_EXT_FLX28	BIT(28)	/* Flexible Filter 28 Enable */
+#define IGC_WUFC_EXT_FLX29	BIT(29)	/* Flexible Filter 29 Enable */
+#define IGC_WUFC_EXT_FLX30	BIT(30)	/* Flexible Filter 30 Enable */
+#define IGC_WUFC_EXT_FLX31	BIT(31)	/* Flexible Filter 31 Enable */
+
+#define IGC_WUFC_EXT_FILTER_MASK GENMASK(31, 8)
+
+/* Loop limit on how long we wait for auto-negotiation to complete */
+#define COPPER_LINK_UP_LIMIT		10
+#define PHY_AUTO_NEG_LIMIT		45
+
+/* Number of 100 microseconds we wait for PCI Express master disable */
+#define MASTER_DISABLE_TIMEOUT		800
+/*Blocks new Master requests */
+#define IGC_CTRL_GIO_MASTER_DISABLE	0x00000004
+/* Status of Master requests. */
+#define IGC_STATUS_GIO_MASTER_ENABLE	0x00080000
+
+/* Receive Address
+ * Number of high/low register pairs in the RAR. The RAR (Receive Address
+ * Registers) holds the directed and multicast addresses that we monitor.
+ * Technically, we have 16 spots.  However, we reserve one of these spots
+ * (RAR[15]) for our directed address used by controllers with
+ * manageability enabled, allowing us room for 15 multicast addresses.
+ */
+#define IGC_RAH_RAH_MASK	0x0000FFFF
+#define IGC_RAH_ASEL_MASK	0x00030000
+#define IGC_RAH_ASEL_SRC_ADDR	BIT(16)
+#define IGC_RAH_QSEL_MASK	0x000C0000
+#define IGC_RAH_QSEL_SHIFT	18
+#define IGC_RAH_QSEL_ENABLE	BIT(28)
+#define IGC_RAH_AV		0x80000000 /* Receive descriptor valid */
+
+#define IGC_RAL_MAC_ADDR_LEN	4
+#define IGC_RAH_MAC_ADDR_LEN	2
+
+/* Error Codes */
+#define IGC_SUCCESS			0
+#define IGC_ERR_NVM			1
+#define IGC_ERR_PHY			2
+#define IGC_ERR_CONFIG			3
+#define IGC_ERR_PARAM			4
+#define IGC_ERR_MAC_INIT		5
+#define IGC_ERR_RESET			9
+#define IGC_ERR_MASTER_REQUESTS_PENDING	10
+#define IGC_ERR_BLK_PHY_RESET		12
+#define IGC_ERR_SWFW_SYNC		13
+
+/* Device Control */
+#define IGC_CTRL_RST		0x04000000  /* Global reset */
+
+#define IGC_CTRL_PHY_RST	0x80000000  /* PHY Reset */
+#define IGC_CTRL_SLU		0x00000040  /* Set link up (Force Link) */
+#define IGC_CTRL_FRCSPD		0x00000800  /* Force Speed */
+#define IGC_CTRL_FRCDPX		0x00001000  /* Force Duplex */
+#define IGC_CTRL_VME		0x40000000  /* IEEE VLAN mode enable */
+
+#define IGC_CTRL_RFCE		0x08000000  /* Receive Flow Control enable */
+#define IGC_CTRL_TFCE		0x10000000  /* Transmit flow control enable */
+
+#define IGC_CTRL_SDP0_DIR	0x00400000  /* SDP0 Data direction */
+#define IGC_CTRL_SDP1_DIR	0x00800000  /* SDP1 Data direction */
+
+/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
+#define MAX_JUMBO_FRAME_SIZE	0x2600
+
+/* PBA constants */
+#define IGC_PBA_34K		0x0022
+
+/* SW Semaphore Register */
+#define IGC_SWSM_SMBI		0x00000001 /* Driver Semaphore bit */
+#define IGC_SWSM_SWESMBI	0x00000002 /* FW Semaphore bit */
+
+/* SWFW_SYNC Definitions */
+#define IGC_SWFW_EEP_SM		0x1
+#define IGC_SWFW_PHY0_SM	0x2
+
+/* Autoneg Advertisement Register */
+#define NWAY_AR_10T_HD_CAPS	0x0020   /* 10T   Half Duplex Capable */
+#define NWAY_AR_10T_FD_CAPS	0x0040   /* 10T   Full Duplex Capable */
+#define NWAY_AR_100TX_HD_CAPS	0x0080   /* 100TX Half Duplex Capable */
+#define NWAY_AR_100TX_FD_CAPS	0x0100   /* 100TX Full Duplex Capable */
+#define NWAY_AR_PAUSE		0x0400   /* Pause operation desired */
+#define NWAY_AR_ASM_DIR		0x0800   /* Asymmetric Pause Direction bit */
+
+/* Link Partner Ability Register (Base Page) */
+#define NWAY_LPAR_PAUSE		0x0400 /* LP Pause operation desired */
+#define NWAY_LPAR_ASM_DIR	0x0800 /* LP Asymmetric Pause Direction bit */
+
+/* 1000BASE-T Control Register */
+#define CR_1000T_HD_CAPS	0x0100 /* Advertise 1000T HD capability */
+#define CR_1000T_FD_CAPS	0x0200 /* Advertise 1000T FD capability  */
+
+/* 1000BASE-T Status Register */
+#define SR_1000T_REMOTE_RX_STATUS	0x1000 /* Remote receiver OK */
+
+/* PHY GPY 211 registers */
+#define STANDARD_AN_REG_MASK	0x0007 /* MMD */
+#define ANEG_MULTIGBT_AN_CTRL	0x0020 /* MULTI GBT AN Control Register */
+#define MMD_DEVADDR_SHIFT	16     /* Shift MMD to higher bits */
+#define CR_2500T_FD_CAPS	0x0080 /* Advertise 2500T FD capability */
+
+/* NVM Control */
+/* Number of milliseconds for NVM auto read done after MAC reset. */
+#define AUTO_READ_DONE_TIMEOUT		10
+#define IGC_EECD_AUTO_RD		0x00000200  /* NVM Auto Read done */
+#define IGC_EECD_REQ		0x00000040 /* NVM Access Request */
+#define IGC_EECD_GNT		0x00000080 /* NVM Access Grant */
+/* NVM Addressing bits based on type 0=small, 1=large */
+#define IGC_EECD_ADDR_BITS		0x00000400
+#define IGC_NVM_GRANT_ATTEMPTS		1000 /* NVM # attempts to gain grant */
+#define IGC_EECD_SIZE_EX_MASK		0x00007800  /* NVM Size */
+#define IGC_EECD_SIZE_EX_SHIFT		11
+#define IGC_EECD_FLUPD_I225		0x00800000 /* Update FLASH */
+#define IGC_EECD_FLUDONE_I225		0x04000000 /* Update FLASH done*/
+#define IGC_EECD_FLASH_DETECTED_I225	0x00080000 /* FLASH detected */
+#define IGC_FLUDONE_ATTEMPTS		20000
+#define IGC_EERD_EEWR_MAX_COUNT		512 /* buffered EEPROM words rw */
+
+/* Offset to data in NVM read/write registers */
+#define IGC_NVM_RW_REG_DATA	16
+#define IGC_NVM_RW_REG_DONE	2    /* Offset to READ/WRITE done bit */
+#define IGC_NVM_RW_REG_START	1    /* Start operation */
+#define IGC_NVM_RW_ADDR_SHIFT	2    /* Shift to the address bits */
+#define IGC_NVM_POLL_READ	0    /* Flag for polling for read complete */
+#define IGC_NVM_DEV_STARTER	5    /* Dev_starter Version */
+
+/* NVM Word Offsets */
+#define NVM_CHECKSUM_REG		0x003F
+
+/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
+#define NVM_SUM				0xBABA
+#define NVM_WORD_SIZE_BASE_SHIFT	6
+
+/* Collision related configuration parameters */
+#define IGC_COLLISION_THRESHOLD		15
+#define IGC_CT_SHIFT			4
+#define IGC_COLLISION_DISTANCE		63
+#define IGC_COLD_SHIFT			12
+
+/* Device Status */
+#define IGC_STATUS_FD		0x00000001      /* Full duplex.0=half,1=full */
+#define IGC_STATUS_LU		0x00000002      /* Link up.0=no,1=link */
+#define IGC_STATUS_FUNC_MASK	0x0000000C      /* PCI Function Mask */
+#define IGC_STATUS_FUNC_SHIFT	2
+#define IGC_STATUS_TXOFF	0x00000010      /* transmission paused */
+#define IGC_STATUS_SPEED_100	0x00000040      /* Speed 100Mb/s */
+#define IGC_STATUS_SPEED_1000	0x00000080      /* Speed 1000Mb/s */
+#define IGC_STATUS_SPEED_2500	0x00400000	/* Speed 2.5Gb/s */
+
+#define SPEED_10		10
+#define SPEED_100		100
+#define SPEED_1000		1000
+#define SPEED_2500		2500
+#define HALF_DUPLEX		1
+#define FULL_DUPLEX		2
+
+/* 1Gbps and 2.5Gbps half duplex is not supported, nor spec-compliant. */
+#define ADVERTISE_10_HALF		0x0001
+#define ADVERTISE_10_FULL		0x0002
+#define ADVERTISE_100_HALF		0x0004
+#define ADVERTISE_100_FULL		0x0008
+#define ADVERTISE_1000_HALF		0x0010 /* Not used, just FYI */
+#define ADVERTISE_1000_FULL		0x0020
+#define ADVERTISE_2500_HALF		0x0040 /* Not used, just FYI */
+#define ADVERTISE_2500_FULL		0x0080
+
+#define IGC_ALL_SPEED_DUPLEX_2500 ( \
+	ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \
+	ADVERTISE_100_FULL | ADVERTISE_1000_FULL | ADVERTISE_2500_FULL)
+
+#define AUTONEG_ADVERTISE_SPEED_DEFAULT_2500	IGC_ALL_SPEED_DUPLEX_2500
+
+/* Interrupt Cause Read */
+#define IGC_ICR_TXDW		BIT(0)	/* Transmit desc written back */
+#define IGC_ICR_TXQE		BIT(1)	/* Transmit Queue empty */
+#define IGC_ICR_LSC		BIT(2)	/* Link Status Change */
+#define IGC_ICR_RXSEQ		BIT(3)	/* Rx sequence error */
+#define IGC_ICR_RXDMT0		BIT(4)	/* Rx desc min. threshold (0) */
+#define IGC_ICR_RXO		BIT(6)	/* Rx overrun */
+#define IGC_ICR_RXT0		BIT(7)	/* Rx timer intr (ring 0) */
+#define IGC_ICR_TS		BIT(19)	/* Time Sync Interrupt */
+#define IGC_ICR_DRSTA		BIT(30)	/* Device Reset Asserted */
+
+/* If this bit asserted, the driver should claim the interrupt */
+#define IGC_ICR_INT_ASSERTED	BIT(31)
+
+#define IGC_ICS_RXT0		IGC_ICR_RXT0 /* Rx timer intr */
+
+#define IMS_ENABLE_MASK ( \
+	IGC_IMS_RXT0   |    \
+	IGC_IMS_TXDW   |    \
+	IGC_IMS_RXDMT0 |    \
+	IGC_IMS_RXSEQ  |    \
+	IGC_IMS_LSC)
+
+/* Interrupt Mask Set */
+#define IGC_IMS_TXDW		IGC_ICR_TXDW	/* Tx desc written back */
+#define IGC_IMS_RXSEQ		IGC_ICR_RXSEQ	/* Rx sequence error */
+#define IGC_IMS_LSC		IGC_ICR_LSC	/* Link Status Change */
+#define IGC_IMS_DOUTSYNC	IGC_ICR_DOUTSYNC /* NIC DMA out of sync */
+#define IGC_IMS_DRSTA		IGC_ICR_DRSTA	/* Device Reset Asserted */
+#define IGC_IMS_RXT0		IGC_ICR_RXT0	/* Rx timer intr */
+#define IGC_IMS_RXDMT0		IGC_ICR_RXDMT0	/* Rx desc min. threshold */
+#define IGC_IMS_TS		IGC_ICR_TS	/* Time Sync Interrupt */
+
+#define IGC_QVECTOR_MASK	0x7FFC		/* Q-vector mask */
+#define IGC_ITR_VAL_MASK	0x04		/* ITR value mask */
+
+/* Interrupt Cause Set */
+#define IGC_ICS_LSC		IGC_ICR_LSC       /* Link Status Change */
+#define IGC_ICS_RXDMT0		IGC_ICR_RXDMT0    /* rx desc min. threshold */
+
+#define IGC_ICR_DOUTSYNC	0x10000000 /* NIC DMA out of sync */
+#define IGC_EITR_CNT_IGNR	0x80000000 /* Don't reset counters on write */
+#define IGC_IVAR_VALID		0x80
+#define IGC_GPIE_NSICR		0x00000001
+#define IGC_GPIE_MSIX_MODE	0x00000010
+#define IGC_GPIE_EIAME		0x40000000
+#define IGC_GPIE_PBA		0x80000000
+
+/* Receive Descriptor bit definitions */
+#define IGC_RXD_STAT_DD		0x01    /* Descriptor Done */
+
+/* Transmit Descriptor bit definitions */
+#define IGC_TXD_DTYP_D		0x00100000 /* Data Descriptor */
+#define IGC_TXD_DTYP_C		0x00000000 /* Context Descriptor */
+#define IGC_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
+#define IGC_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
+#define IGC_TXD_CMD_EOP		0x01000000 /* End of Packet */
+#define IGC_TXD_CMD_IC		0x04000000 /* Insert Checksum */
+#define IGC_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
+#define IGC_TXD_CMD_VLE		0x40000000 /* Add VLAN tag */
+#define IGC_TXD_STAT_DD		0x00000001 /* Descriptor Done */
+#define IGC_TXD_CMD_TCP		0x01000000 /* TCP packet */
+#define IGC_TXD_CMD_IP		0x02000000 /* IP packet */
+#define IGC_TXD_CMD_TSE		0x04000000 /* TCP Seg enable */
+#define IGC_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
+
+/* IPSec Encrypt Enable */
+#define IGC_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define IGC_ADVTXD_MSS_SHIFT	16 /* Adv ctxt MSS shift */
+
+#define IGC_ADVTXD_TSN_CNTX_FIRST	0x00000080
+
+/* Transmit Control */
+#define IGC_TCTL_EN		0x00000002 /* enable Tx */
+#define IGC_TCTL_PSP		0x00000008 /* pad short packets */
+#define IGC_TCTL_CT		0x00000ff0 /* collision threshold */
+#define IGC_TCTL_COLD		0x003ff000 /* collision distance */
+#define IGC_TCTL_RTLC		0x01000000 /* Re-transmit on late collision */
+
+/* Flow Control Constants */
+#define FLOW_CONTROL_ADDRESS_LOW	0x00C28001
+#define FLOW_CONTROL_ADDRESS_HIGH	0x00000100
+#define FLOW_CONTROL_TYPE		0x8808
+/* Enable XON frame transmission */
+#define IGC_FCRTL_XONE			0x80000000
+
+/* Management Control */
+#define IGC_MANC_RCV_TCO_EN	0x00020000 /* Receive TCO Packets Enabled */
+#define IGC_MANC_BLK_PHY_RST_ON_IDE	0x00040000 /* Block phy resets */
+
+/* Receive Control */
+#define IGC_RCTL_RST		0x00000001 /* Software reset */
+#define IGC_RCTL_EN		0x00000002 /* enable */
+#define IGC_RCTL_SBP		0x00000004 /* store bad packet */
+#define IGC_RCTL_UPE		0x00000008 /* unicast promisc enable */
+#define IGC_RCTL_MPE		0x00000010 /* multicast promisc enable */
+#define IGC_RCTL_LPE		0x00000020 /* long packet enable */
+#define IGC_RCTL_LBM_MAC	0x00000040 /* MAC loopback mode */
+#define IGC_RCTL_LBM_TCVR	0x000000C0 /* tcvr loopback mode */
+
+#define IGC_RCTL_RDMTS_HALF	0x00000000 /* Rx desc min thresh size */
+#define IGC_RCTL_BAM		0x00008000 /* broadcast enable */
+
+/* Split Replication Receive Control */
+#define IGC_SRRCTL_TIMESTAMP		0x40000000
+#define IGC_SRRCTL_TIMER1SEL(timer)	(((timer) & 0x3) << 14)
+#define IGC_SRRCTL_TIMER0SEL(timer)	(((timer) & 0x3) << 17)
+
+/* Receive Descriptor bit definitions */
+#define IGC_RXD_STAT_EOP	0x02	/* End of Packet */
+#define IGC_RXD_STAT_IXSM	0x04	/* Ignore checksum */
+#define IGC_RXD_STAT_UDPCS	0x10	/* UDP xsum calculated */
+#define IGC_RXD_STAT_TCPCS	0x20	/* TCP xsum calculated */
+#define IGC_RXD_STAT_VP		0x08	/* IEEE VLAN Packet */
+
+#define IGC_RXDEXT_STATERR_LB	0x00040000
+
+/* Advanced Receive Descriptor bit definitions */
+#define IGC_RXDADV_STAT_TSIP	0x08000 /* timestamp in packet */
+
+#define IGC_RXDEXT_STATERR_L4E		0x20000000
+#define IGC_RXDEXT_STATERR_IPE		0x40000000
+#define IGC_RXDEXT_STATERR_RXE		0x80000000
+
+#define IGC_MRQC_RSS_FIELD_IPV4_TCP	0x00010000
+#define IGC_MRQC_RSS_FIELD_IPV4		0x00020000
+#define IGC_MRQC_RSS_FIELD_IPV6_TCP_EX	0x00040000
+#define IGC_MRQC_RSS_FIELD_IPV6		0x00100000
+#define IGC_MRQC_RSS_FIELD_IPV6_TCP	0x00200000
+
+/* Header split receive */
+#define IGC_RFCTL_IPV6_EX_DIS	0x00010000
+#define IGC_RFCTL_LEF		0x00040000
+
+#define IGC_RCTL_SZ_256		0x00030000 /* Rx buffer size 256 */
+
+#define IGC_RCTL_MO_SHIFT	12 /* multicast offset shift */
+#define IGC_RCTL_CFIEN		0x00080000 /* canonical form enable */
+#define IGC_RCTL_DPF		0x00400000 /* discard pause frames */
+#define IGC_RCTL_PMCF		0x00800000 /* pass MAC control frames */
+#define IGC_RCTL_SECRC		0x04000000 /* Strip Ethernet CRC */
+
+#define I225_RXPBSIZE_DEFAULT	0x000000A2 /* RXPBSIZE default */
+#define I225_TXPBSIZE_DEFAULT	0x04000014 /* TXPBSIZE default */
+#define IGC_RXPBS_CFG_TS_EN	0x80000000 /* Timestamp in Rx buffer */
+
+#define IGC_TXPBSIZE_TSN	0x04145145 /* 5k bytes buffer for each queue */
+
+#define IGC_DTXMXPKTSZ_TSN	0x19 /* 1600 bytes of max TX DMA packet size */
+#define IGC_DTXMXPKTSZ_DEFAULT	0x98 /* 9728-byte Jumbo frames */
+
+/* Transmit Scheduling Latency */
+/* Latency between transmission scheduling (LaunchTime) and the time
+ * the packet is transmitted to the network in nanosecond.
+ */
+#define IGC_TXOFFSET_SPEED_10	0x000034BC
+#define IGC_TXOFFSET_SPEED_100	0x00000578
+#define IGC_TXOFFSET_SPEED_1000	0x0000012C
+#define IGC_TXOFFSET_SPEED_2500	0x00000578
+
+/* Time Sync Interrupt Causes */
+#define IGC_TSICR_SYS_WRAP	BIT(0) /* SYSTIM Wrap around. */
+#define IGC_TSICR_TXTS		BIT(1) /* Transmit Timestamp. */
+#define IGC_TSICR_TT0		BIT(3) /* Target Time 0 Trigger. */
+#define IGC_TSICR_TT1		BIT(4) /* Target Time 1 Trigger. */
+#define IGC_TSICR_AUTT0		BIT(5) /* Auxiliary Timestamp 0 Taken. */
+#define IGC_TSICR_AUTT1		BIT(6) /* Auxiliary Timestamp 1 Taken. */
+
+#define IGC_TSICR_INTERRUPTS	IGC_TSICR_TXTS
+
+#define IGC_FTQF_VF_BP		0x00008000
+#define IGC_FTQF_1588_TIME_STAMP	0x08000000
+#define IGC_FTQF_MASK			0xF0000000
+#define IGC_FTQF_MASK_PROTO_BP	0x10000000
+
+/* Time Sync Receive Control bit definitions */
+#define IGC_TSYNCRXCTL_TYPE_MASK	0x0000000E  /* Rx type mask */
+#define IGC_TSYNCRXCTL_TYPE_L2_V2	0x00
+#define IGC_TSYNCRXCTL_TYPE_L4_V1	0x02
+#define IGC_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define IGC_TSYNCRXCTL_TYPE_ALL		0x08
+#define IGC_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
+#define IGC_TSYNCRXCTL_ENABLED		0x00000010  /* enable Rx timestamping */
+#define IGC_TSYNCRXCTL_SYSCFI		0x00000020  /* Sys clock frequency */
+#define IGC_TSYNCRXCTL_RXSYNSIG		0x00000400  /* Sample RX tstamp in PHY sop */
+
+/* Time Sync Receive Configuration */
+#define IGC_TSYNCRXCFG_PTP_V1_CTRLT_MASK	0x000000FF
+#define IGC_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE	0x00
+#define IGC_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE	0x01
+
+/* Immediate Interrupt Receive */
+#define IGC_IMIR_CLEAR_MASK	0xF001FFFF /* IMIR Reg Clear Mask */
+#define IGC_IMIR_PORT_BYPASS	0x20000 /* IMIR Port Bypass Bit */
+#define IGC_IMIR_PRIORITY_SHIFT	29 /* IMIR Priority Shift */
+#define IGC_IMIREXT_CLEAR_MASK	0x7FFFF /* IMIREXT Reg Clear Mask */
+
+/* Immediate Interrupt Receive Extended */
+#define IGC_IMIREXT_CTRL_BP	0x00080000  /* Bypass check of ctrl bits */
+#define IGC_IMIREXT_SIZE_BP	0x00001000  /* Packet size bypass */
+
+/* Time Sync Transmit Control bit definitions */
+#define IGC_TSYNCTXCTL_TXTT_0			0x00000001  /* Tx timestamp reg 0 valid */
+#define IGC_TSYNCTXCTL_TXTT_1			0x00000002  /* Tx timestamp reg 1 valid */
+#define IGC_TSYNCTXCTL_TXTT_2			0x00000004  /* Tx timestamp reg 2 valid */
+#define IGC_TSYNCTXCTL_TXTT_3			0x00000008  /* Tx timestamp reg 3 valid */
+#define IGC_TSYNCTXCTL_ENABLED			0x00000010  /* enable Tx timestamping */
+#define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK	0x0000F000  /* max delay */
+#define IGC_TSYNCTXCTL_SYNC_COMP_ERR		0x20000000  /* sync err */
+#define IGC_TSYNCTXCTL_SYNC_COMP		0x40000000  /* sync complete */
+#define IGC_TSYNCTXCTL_START_SYNC		0x80000000  /* initiate sync */
+#define IGC_TSYNCTXCTL_TXSYNSIG			0x00000020  /* Sample TX tstamp in PHY sop */
+
+#define IGC_TSYNCTXCTL_TXTT_ANY ( \
+		IGC_TSYNCTXCTL_TXTT_0 | IGC_TSYNCTXCTL_TXTT_1 | \
+		IGC_TSYNCTXCTL_TXTT_2 | IGC_TSYNCTXCTL_TXTT_3)
+
+/* Timer selection bits */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM0	(0u << 30) /* Select SYSTIM0 for auxiliary time stamp */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM1	(1u << 30) /* Select SYSTIM1 for auxiliary time stamp */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM2	(2u << 30) /* Select SYSTIM2 for auxiliary time stamp */
+#define IGC_AUX_IO_TIMER_SEL_SYSTIM3	(3u << 30) /* Select SYSTIM3 for auxiliary time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM0	(0u << 30) /* Select SYSTIM0 for target time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM1	(1u << 30) /* Select SYSTIM1 for target time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM2	(2u << 30) /* Select SYSTIM2 for target time stamp */
+#define IGC_TT_IO_TIMER_SEL_SYSTIM3	(3u << 30) /* Select SYSTIM3 for target time stamp */
+
+/* TSAUXC Configuration Bits */
+#define IGC_TSAUXC_EN_TT0	BIT(0)  /* Enable target time 0. */
+#define IGC_TSAUXC_EN_TT1	BIT(1)  /* Enable target time 1. */
+#define IGC_TSAUXC_EN_CLK0	BIT(2)  /* Enable Configurable Frequency Clock 0. */
+#define IGC_TSAUXC_ST0		BIT(4)  /* Start Clock 0 Toggle on Target Time 0. */
+#define IGC_TSAUXC_EN_CLK1	BIT(5)  /* Enable Configurable Frequency Clock 1. */
+#define IGC_TSAUXC_ST1		BIT(7)  /* Start Clock 1 Toggle on Target Time 1. */
+#define IGC_TSAUXC_EN_TS0	BIT(8)  /* Enable hardware timestamp 0. */
+#define IGC_TSAUXC_AUTT0	BIT(9)  /* Auxiliary Timestamp Taken. */
+#define IGC_TSAUXC_EN_TS1	BIT(10) /* Enable hardware timestamp 0. */
+#define IGC_TSAUXC_AUTT1	BIT(11) /* Auxiliary Timestamp Taken. */
+#define IGC_TSAUXC_PLSG		BIT(17) /* Generate a pulse. */
+#define IGC_TSAUXC_DISABLE1	BIT(27) /* Disable SYSTIM0 Count Operation. */
+#define IGC_TSAUXC_DISABLE2	BIT(28) /* Disable SYSTIM1 Count Operation. */
+#define IGC_TSAUXC_DISABLE3	BIT(29) /* Disable SYSTIM2 Count Operation. */
+#define IGC_TSAUXC_DIS_TS_CLEAR	BIT(30) /* Disable EN_TT0/1 auto clear. */
+#define IGC_TSAUXC_DISABLE0	BIT(31) /* Disable SYSTIM0 Count Operation. */
+
+/* SDP Configuration Bits */
+#define IGC_AUX0_SEL_SDP0	(0u << 0)  /* Assign SDP0 to auxiliary time stamp 0. */
+#define IGC_AUX0_SEL_SDP1	(1u << 0)  /* Assign SDP1 to auxiliary time stamp 0. */
+#define IGC_AUX0_SEL_SDP2	(2u << 0)  /* Assign SDP2 to auxiliary time stamp 0. */
+#define IGC_AUX0_SEL_SDP3	(3u << 0)  /* Assign SDP3 to auxiliary time stamp 0. */
+#define IGC_AUX0_TS_SDP_EN	(1u << 2)  /* Enable auxiliary time stamp trigger 0. */
+#define IGC_AUX1_SEL_SDP0	(0u << 3)  /* Assign SDP0 to auxiliary time stamp 1. */
+#define IGC_AUX1_SEL_SDP1	(1u << 3)  /* Assign SDP1 to auxiliary time stamp 1. */
+#define IGC_AUX1_SEL_SDP2	(2u << 3)  /* Assign SDP2 to auxiliary time stamp 1. */
+#define IGC_AUX1_SEL_SDP3	(3u << 3)  /* Assign SDP3 to auxiliary time stamp 1. */
+#define IGC_AUX1_TS_SDP_EN	(1u << 5)  /* Enable auxiliary time stamp trigger 1. */
+#define IGC_TS_SDP0_SEL_TT0	(0u << 6)  /* Target time 0 is output on SDP0. */
+#define IGC_TS_SDP0_SEL_TT1	(1u << 6)  /* Target time 1 is output on SDP0. */
+#define IGC_TS_SDP0_SEL_FC0	(2u << 6)  /* Freq clock  0 is output on SDP0. */
+#define IGC_TS_SDP0_SEL_FC1	(3u << 6)  /* Freq clock  1 is output on SDP0. */
+#define IGC_TS_SDP0_EN		(1u << 8)  /* SDP0 is assigned to Tsync. */
+#define IGC_TS_SDP1_SEL_TT0	(0u << 9)  /* Target time 0 is output on SDP1. */
+#define IGC_TS_SDP1_SEL_TT1	(1u << 9)  /* Target time 1 is output on SDP1. */
+#define IGC_TS_SDP1_SEL_FC0	(2u << 9)  /* Freq clock  0 is output on SDP1. */
+#define IGC_TS_SDP1_SEL_FC1	(3u << 9)  /* Freq clock  1 is output on SDP1. */
+#define IGC_TS_SDP1_EN		(1u << 11) /* SDP1 is assigned to Tsync. */
+#define IGC_TS_SDP2_SEL_TT0	(0u << 12) /* Target time 0 is output on SDP2. */
+#define IGC_TS_SDP2_SEL_TT1	(1u << 12) /* Target time 1 is output on SDP2. */
+#define IGC_TS_SDP2_SEL_FC0	(2u << 12) /* Freq clock  0 is output on SDP2. */
+#define IGC_TS_SDP2_SEL_FC1	(3u << 12) /* Freq clock  1 is output on SDP2. */
+#define IGC_TS_SDP2_EN		(1u << 14) /* SDP2 is assigned to Tsync. */
+#define IGC_TS_SDP3_SEL_TT0	(0u << 15) /* Target time 0 is output on SDP3. */
+#define IGC_TS_SDP3_SEL_TT1	(1u << 15) /* Target time 1 is output on SDP3. */
+#define IGC_TS_SDP3_SEL_FC0	(2u << 15) /* Freq clock  0 is output on SDP3. */
+#define IGC_TS_SDP3_SEL_FC1	(3u << 15) /* Freq clock  1 is output on SDP3. */
+#define IGC_TS_SDP3_EN		(1u << 17) /* SDP3 is assigned to Tsync. */
+
+/* Transmit Scheduling */
+#define IGC_TQAVCTRL_TRANSMIT_MODE_TSN	0x00000001
+#define IGC_TQAVCTRL_ENHANCED_QAV	0x00000008
+#define IGC_TQAVCTRL_FUTSCDDIS		0x00000080
+
+#define IGC_TXQCTL_QUEUE_MODE_LAUNCHT	0x00000001
+#define IGC_TXQCTL_STRICT_CYCLE		0x00000002
+#define IGC_TXQCTL_STRICT_END		0x00000004
+#define IGC_TXQCTL_QAV_SEL_MASK		0x000000C0
+#define IGC_TXQCTL_QAV_SEL_CBS0		0x00000080
+#define IGC_TXQCTL_QAV_SEL_CBS1		0x000000C0
+
+#define IGC_TQAVCC_IDLESLOPE_MASK	0xFFFF
+#define IGC_TQAVCC_KEEP_CREDITS		BIT(30)
+
+#define IGC_MAX_SR_QUEUES		2
+
+/* Receive Checksum Control */
+#define IGC_RXCSUM_CRCOFL	0x00000800   /* CRC32 offload enable */
+#define IGC_RXCSUM_PCSD		0x00002000   /* packet checksum disabled */
+
+/* PCIe PTM Control */
+#define IGC_PTM_CTRL_START_NOW	BIT(29) /* Start PTM Now */
+#define IGC_PTM_CTRL_EN		BIT(30) /* Enable PTM */
+#define IGC_PTM_CTRL_TRIG	BIT(31) /* PTM Cycle trigger */
+#define IGC_PTM_CTRL_SHRT_CYC(usec)	(((usec) & 0x3f) << 2)
+#define IGC_PTM_CTRL_PTM_TO(usec)	(((usec) & 0xff) << 8)
+
+#define IGC_PTM_SHORT_CYC_DEFAULT	1   /* Default short cycle interval */
+#define IGC_PTM_CYC_TIME_DEFAULT	5   /* Default PTM cycle time */
+#define IGC_PTM_TIMEOUT_DEFAULT		255 /* Default timeout for PTM errors */
+
+/* PCIe Digital Delay */
+#define IGC_PCIE_DIG_DELAY_DEFAULT	0x01440000
+
+/* PCIe PHY Delay */
+#define IGC_PCIE_PHY_DELAY_DEFAULT	0x40900000
+
+#define IGC_TIMADJ_ADJUST_METH		0x40000000
+
+/* PCIe PTM Status */
+#define IGC_PTM_STAT_VALID		BIT(0) /* PTM Status */
+#define IGC_PTM_STAT_RET_ERR		BIT(1) /* Root port timeout */
+#define IGC_PTM_STAT_BAD_PTM_RES	BIT(2) /* PTM Response msg instead of PTM Response Data */
+#define IGC_PTM_STAT_T4M1_OVFL		BIT(3) /* T4 minus T1 overflow */
+#define IGC_PTM_STAT_ADJUST_1ST		BIT(4) /* 1588 timer adjusted during 1st PTM cycle */
+#define IGC_PTM_STAT_ADJUST_CYC		BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */
+
+/* PCIe PTM Cycle Control */
+#define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec)	((msec) & 0x3ff) /* PTM Cycle Time (msec) */
+#define IGC_PTM_CYCLE_CTRL_AUTO_CYC_EN		BIT(31) /* PTM Cycle Control */
+
+/* GPY211 - I225 defines */
+#define GPY_MMD_MASK		0xFFFF0000
+#define GPY_MMD_SHIFT		16
+#define GPY_REG_MASK		0x0000FFFF
+
+#define IGC_MMDAC_FUNC_DATA	0x4000 /* Data, no post increment */
+
+/* MAC definitions */
+#define IGC_FACTPS_MNGCG	0x20000000
+#define IGC_FWSM_MODE_MASK	0xE
+#define IGC_FWSM_MODE_SHIFT	1
+
+/* Management Control */
+#define IGC_MANC_SMBUS_EN	0x00000001 /* SMBus Enabled - RO */
+#define IGC_MANC_ASF_EN		0x00000002 /* ASF Enabled - RO */
+
+/* PHY */
+#define PHY_REVISION_MASK	0xFFFFFFF0
+#define MAX_PHY_REG_ADDRESS	0x1F  /* 5 bit address bus (0-0x1F) */
+#define IGC_GEN_POLL_TIMEOUT	1920
+
+/* PHY Control Register */
+#define MII_CR_RESTART_AUTO_NEG	0x0200  /* Restart auto negotiation */
+#define MII_CR_POWER_DOWN	0x0800  /* Power down */
+#define MII_CR_AUTO_NEG_EN	0x1000  /* Auto Neg Enable */
+
+/* PHY Status Register */
+#define MII_SR_LINK_STATUS	0x0004 /* Link Status 1 = link */
+#define MII_SR_AUTONEG_COMPLETE	0x0020 /* Auto Neg Complete */
+#define IGC_PHY_RST_COMP	0x0100 /* Internal PHY reset completion */
+
+/* PHY 1000 MII Register/Bit Definitions */
+/* PHY Registers defined by IEEE */
+#define PHY_CONTROL		0x00 /* Control Register */
+#define PHY_STATUS		0x01 /* Status Register */
+#define PHY_ID1			0x02 /* Phy Id Reg (word 1) */
+#define PHY_ID2			0x03 /* Phy Id Reg (word 2) */
+#define PHY_AUTONEG_ADV		0x04 /* Autoneg Advertisement */
+#define PHY_LP_ABILITY		0x05 /* Link Partner Ability (Base Page) */
+#define PHY_1000T_CTRL		0x09 /* 1000Base-T Control Reg */
+#define PHY_1000T_STATUS	0x0A /* 1000Base-T Status Reg */
+
+/* MDI Control */
+#define IGC_MDIC_DATA_MASK	0x0000FFFF
+#define IGC_MDIC_REG_MASK	0x001F0000
+#define IGC_MDIC_REG_SHIFT	16
+#define IGC_MDIC_PHY_MASK	0x03E00000
+#define IGC_MDIC_PHY_SHIFT	21
+#define IGC_MDIC_OP_WRITE	0x04000000
+#define IGC_MDIC_OP_READ	0x08000000
+#define IGC_MDIC_READY		0x10000000
+#define IGC_MDIC_ERROR		0x40000000
+
+#define IGC_N0_QUEUE		-1
+
+#define IGC_MAX_MAC_HDR_LEN	127
+#define IGC_MAX_NETWORK_HDR_LEN	511
+
+#define IGC_VLANPQF_QSEL(_n, q_idx) ((q_idx) << ((_n) * 4))
+#define IGC_VLANPQF_VALID(_n)	(0x1 << (3 + (_n) * 4))
+#define IGC_VLANPQF_QUEUE_MASK	0x03
+
+#define IGC_ADVTXD_MACLEN_SHIFT		9  /* Adv ctxt desc mac len shift */
+#define IGC_ADVTXD_TUCMD_IPV4		0x00000400  /* IP Packet Type:1=IPv4 */
+#define IGC_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet Type of TCP */
+#define IGC_ADVTXD_TUCMD_L4T_SCTP	0x00001000 /* L4 packet TYPE of SCTP */
+
+/* Maximum size of the MTA register table in all supported adapters */
+#define MAX_MTA_REG			128
+
+/* EEE defines */
+#define IGC_IPCNFG_EEE_2_5G_AN		0x00000010 /* IPCNFG EEE Ena 2.5G AN */
+#define IGC_IPCNFG_EEE_1G_AN		0x00000008 /* IPCNFG EEE Ena 1G AN */
+#define IGC_IPCNFG_EEE_100M_AN		0x00000004 /* IPCNFG EEE Ena 100M AN */
+#define IGC_EEER_EEE_NEG		0x20000000 /* EEE capability nego */
+#define IGC_EEER_TX_LPI_EN		0x00010000 /* EEER Tx LPI Enable */
+#define IGC_EEER_RX_LPI_EN		0x00020000 /* EEER Rx LPI Enable */
+#define IGC_EEER_LPI_FC			0x00040000 /* EEER Ena on Flow Cntrl */
+#define IGC_EEE_SU_LPI_CLK_STP		0x00800000 /* EEE LPI Clock Stop */
+
+/* LTR defines */
+#define IGC_LTRC_EEEMS_EN		0x00000020 /* Enable EEE LTR max send */
+#define IGC_RXPBS_SIZE_I225_MASK	0x0000003F /* Rx packet buffer size */
+#define IGC_TW_SYSTEM_1000_MASK		0x000000FF
+/* Minimum time for 100BASE-T where no data will be transmit following move out
+ * of EEE LPI Tx state
+ */
+#define IGC_TW_SYSTEM_100_MASK		0x0000FF00
+#define IGC_TW_SYSTEM_100_SHIFT		8
+/* Reg val to set scale to 1024 nsec */
+#define IGC_LTRMINV_SCALE_1024		2
+/* Reg val to set scale to 32768 nsec */
+#define IGC_LTRMINV_SCALE_32768		3
+/* Reg val to set scale to 1024 nsec */
+#define IGC_LTRMAXV_SCALE_1024		2
+/* Reg val to set scale to 32768 nsec */
+#define IGC_LTRMAXV_SCALE_32768		3
+#define IGC_LTRMINV_LTRV_MASK		0x000003FF /* LTR minimum value */
+#define IGC_LTRMAXV_LTRV_MASK		0x000003FF /* LTR maximum value */
+#define IGC_LTRMINV_LSNP_REQ		0x00008000 /* LTR Snoop Requirement */
+#define IGC_LTRMINV_SCALE_SHIFT		10
+#define IGC_LTRMAXV_LSNP_REQ		0x00008000 /* LTR Snoop Requirement */
+#define IGC_LTRMAXV_SCALE_SHIFT		10
+
+#endif /* _IGC_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_diag.c b/drivers/net/ethernet/intel/igc/igc_diag.c
new file mode 100644
index 0000000000..cc621970c0
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_diag.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2020 Intel Corporation */
+
+#include "igc.h"
+#include "igc_diag.h"
+
+static struct igc_reg_test reg_test[] = {
+	{ IGC_FCAL,	1,	PATTERN_TEST,	0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_FCAH,	1,	PATTERN_TEST,	0x0000FFFF,	0xFFFFFFFF },
+	{ IGC_FCT,	1,	PATTERN_TEST,	0x0000FFFF,	0xFFFFFFFF },
+	{ IGC_RDBAH(0), 4,	PATTERN_TEST,	0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_RDBAL(0),	4,	PATTERN_TEST,	0xFFFFFF80,	0xFFFFFF80 },
+	{ IGC_RDLEN(0),	4,	PATTERN_TEST,	0x000FFF80,	0x000FFFFF },
+	{ IGC_RDT(0),	4,	PATTERN_TEST,	0x0000FFFF,	0x0000FFFF },
+	{ IGC_FCRTH,	1,	PATTERN_TEST,	0x0003FFF0,	0x0003FFF0 },
+	{ IGC_FCTTV,	1,	PATTERN_TEST,	0x0000FFFF,	0x0000FFFF },
+	{ IGC_TIPG,	1,	PATTERN_TEST,	0x3FFFFFFF,	0x3FFFFFFF },
+	{ IGC_TDBAH(0),	4,	PATTERN_TEST,	0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_TDBAL(0),	4,	PATTERN_TEST,	0xFFFFFF80,	0xFFFFFF80 },
+	{ IGC_TDLEN(0),	4,	PATTERN_TEST,	0x000FFF80,	0x000FFFFF },
+	{ IGC_TDT(0),	4,	PATTERN_TEST,	0x0000FFFF,	0x0000FFFF },
+	{ IGC_RCTL,	1,	SET_READ_TEST,	0xFFFFFFFF,	0x00000000 },
+	{ IGC_RCTL,	1,	SET_READ_TEST,	0x04CFB2FE,	0x003FFFFB },
+	{ IGC_RCTL,	1,	SET_READ_TEST,	0x04CFB2FE,	0xFFFFFFFF },
+	{ IGC_TCTL,	1,	SET_READ_TEST,	0xFFFFFFFF,	0x00000000 },
+	{ IGC_RA,	16,	TABLE64_TEST_LO,
+						0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_RA,	16,	TABLE64_TEST_HI,
+						0x900FFFFF,	0xFFFFFFFF },
+	{ IGC_MTA,	128,	TABLE32_TEST,
+						0xFFFFFFFF,	0xFFFFFFFF },
+	{ 0, 0, 0, 0}
+};
+
+static bool reg_pattern_test(struct igc_adapter *adapter, u64 *data, int reg,
+			     u32 mask, u32 write)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 pat, val, before;
+	static const u32 test_pattern[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+	};
+
+	for (pat = 0; pat < ARRAY_SIZE(test_pattern); pat++) {
+		before = rd32(reg);
+		wr32(reg, test_pattern[pat] & write);
+		val = rd32(reg);
+		if (val != (test_pattern[pat] & write & mask)) {
+			netdev_err(adapter->netdev,
+				   "pattern test reg %04X failed: got 0x%08X expected 0x%08X",
+				   reg, val, test_pattern[pat] & write & mask);
+			*data = reg;
+			wr32(reg, before);
+			return false;
+		}
+		wr32(reg, before);
+	}
+	return true;
+}
+
+static bool reg_set_and_check(struct igc_adapter *adapter, u64 *data, int reg,
+			      u32 mask, u32 write)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 val, before;
+
+	before = rd32(reg);
+	wr32(reg, write & mask);
+	val = rd32(reg);
+	if ((write & mask) != (val & mask)) {
+		netdev_err(adapter->netdev,
+			   "set/check reg %04X test failed: got 0x%08X expected 0x%08X",
+			   reg, (val & mask), (write & mask));
+		*data = reg;
+		wr32(reg, before);
+		return false;
+	}
+	wr32(reg, before);
+	return true;
+}
+
+bool igc_reg_test(struct igc_adapter *adapter, u64 *data)
+{
+	struct igc_reg_test *test = reg_test;
+	struct igc_hw *hw = &adapter->hw;
+	u32 value, before, after;
+	u32 i, toggle, b = false;
+
+	/* Because the status register is such a special case,
+	 * we handle it separately from the rest of the register
+	 * tests.  Some bits are read-only, some toggle, and some
+	 * are writeable.
+	 */
+	toggle = 0x6800D3;
+	before = rd32(IGC_STATUS);
+	value = before & toggle;
+	wr32(IGC_STATUS, toggle);
+	after = rd32(IGC_STATUS) & toggle;
+	if (value != after) {
+		netdev_err(adapter->netdev,
+			   "failed STATUS register test got: 0x%08X expected: 0x%08X",
+			   after, value);
+		*data = 1;
+		return false;
+	}
+	/* restore previous status */
+	wr32(IGC_STATUS, before);
+
+	/* Perform the remainder of the register test, looping through
+	 * the test table until we either fail or reach the null entry.
+	 */
+	while (test->reg) {
+		for (i = 0; i < test->array_len; i++) {
+			switch (test->test_type) {
+			case PATTERN_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 0x40),
+						     test->mask,
+						     test->write);
+				break;
+			case SET_READ_TEST:
+				b = reg_set_and_check(adapter, data,
+						      test->reg + (i * 0x40),
+						      test->mask,
+						      test->write);
+				break;
+			case TABLE64_TEST_LO:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE64_TEST_HI:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + 4 + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE32_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 4),
+						     test->mask,
+						     test->write);
+				break;
+			}
+			if (!b)
+				return false;
+		}
+		test++;
+	}
+	*data = 0;
+	return true;
+}
+
+bool igc_eeprom_test(struct igc_adapter *adapter, u64 *data)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	*data = 0;
+
+	if (hw->nvm.ops.validate(hw) != IGC_SUCCESS) {
+		*data = 1;
+		return false;
+	}
+
+	return true;
+}
+
+bool igc_link_test(struct igc_adapter *adapter, u64 *data)
+{
+	bool link_up;
+
+	*data = 0;
+
+	/* add delay to give enough time for autonegotioation to finish */
+	if (adapter->hw.mac.autoneg)
+		ssleep(5);
+
+	link_up = igc_has_link(adapter);
+	if (!link_up) {
+		*data = 1;
+		return false;
+	}
+
+	return true;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_diag.h b/drivers/net/ethernet/intel/igc/igc_diag.h
new file mode 100644
index 0000000000..600658e33b
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_diag.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2020 Intel Corporation */
+
+bool igc_reg_test(struct igc_adapter *adapter, u64 *data);
+bool igc_eeprom_test(struct igc_adapter *adapter, u64 *data);
+bool igc_link_test(struct igc_adapter *adapter, u64 *data);
+
+struct igc_reg_test {
+	u16 reg;
+	u8 array_len;
+	u8 test_type;
+	u32 mask;
+	u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x40 bytes apart, or in contiguous tables.  We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST	1
+#define SET_READ_TEST	2
+#define TABLE32_TEST	3
+#define TABLE64_TEST_LO	4
+#define TABLE64_TEST_HI	5
diff --git a/drivers/net/ethernet/intel/igc/igc_dump.c b/drivers/net/ethernet/intel/igc/igc_dump.c
new file mode 100644
index 0000000000..c09c95cc5f
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_dump.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include "igc.h"
+
+struct igc_reg_info {
+	u32 ofs;
+	char *name;
+};
+
+static const struct igc_reg_info igc_reg_info_tbl[] = {
+	/* General Registers */
+	{IGC_CTRL, "CTRL"},
+	{IGC_STATUS, "STATUS"},
+	{IGC_CTRL_EXT, "CTRL_EXT"},
+	{IGC_MDIC, "MDIC"},
+
+	/* Interrupt Registers */
+	{IGC_ICR, "ICR"},
+
+	/* RX Registers */
+	{IGC_RCTL, "RCTL"},
+	{IGC_RDLEN(0), "RDLEN"},
+	{IGC_RDH(0), "RDH"},
+	{IGC_RDT(0), "RDT"},
+	{IGC_RXDCTL(0), "RXDCTL"},
+	{IGC_RDBAL(0), "RDBAL"},
+	{IGC_RDBAH(0), "RDBAH"},
+
+	/* TX Registers */
+	{IGC_TCTL, "TCTL"},
+	{IGC_TDBAL(0), "TDBAL"},
+	{IGC_TDBAH(0), "TDBAH"},
+	{IGC_TDLEN(0), "TDLEN"},
+	{IGC_TDH(0), "TDH"},
+	{IGC_TDT(0), "TDT"},
+	{IGC_TXDCTL(0), "TXDCTL"},
+
+	/* List Terminator */
+	{}
+};
+
+/* igc_regdump - register printout routine */
+static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo)
+{
+	struct net_device *dev = igc_get_hw_dev(hw);
+	int n = 0;
+	char rname[16];
+	u32 regs[8];
+
+	switch (reginfo->ofs) {
+	case IGC_RDLEN(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_RDLEN(n));
+		break;
+	case IGC_RDH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_RDH(n));
+		break;
+	case IGC_RDT(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_RDT(n));
+		break;
+	case IGC_RXDCTL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_RXDCTL(n));
+		break;
+	case IGC_RDBAL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_RDBAL(n));
+		break;
+	case IGC_RDBAH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_RDBAH(n));
+		break;
+	case IGC_TDBAL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_TDBAL(n));
+		break;
+	case IGC_TDBAH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_TDBAH(n));
+		break;
+	case IGC_TDLEN(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_TDLEN(n));
+		break;
+	case IGC_TDH(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_TDH(n));
+		break;
+	case IGC_TDT(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_TDT(n));
+		break;
+	case IGC_TXDCTL(0):
+		for (n = 0; n < 4; n++)
+			regs[n] = rd32(IGC_TXDCTL(n));
+		break;
+	default:
+		netdev_info(dev, "%-15s %08x\n", reginfo->name,
+			    rd32(reginfo->ofs));
+		return;
+	}
+
+	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
+	netdev_info(dev, "%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
+		    regs[2], regs[3]);
+}
+
+/* igc_rings_dump - Tx-rings and Rx-rings */
+void igc_rings_dump(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct my_u0 { __le64 a; __le64 b; } *u0;
+	union igc_adv_tx_desc *tx_desc;
+	union igc_adv_rx_desc *rx_desc;
+	struct igc_ring *tx_ring;
+	struct igc_ring *rx_ring;
+	u32 staterr;
+	u16 i, n;
+
+	if (!netif_msg_hw(adapter))
+		return;
+
+	netdev_info(netdev, "Device info: state %016lX trans_start %016lX\n",
+		    netdev->state, dev_trans_start(netdev));
+
+	/* Print TX Ring Summary */
+	if (!netif_running(netdev))
+		goto exit;
+
+	netdev_info(netdev, "TX Rings Summary\n");
+	netdev_info(netdev, "Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
+	for (n = 0; n < adapter->num_tx_queues; n++) {
+		struct igc_tx_buffer *buffer_info;
+
+		tx_ring = adapter->tx_ring[n];
+		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
+
+		netdev_info(netdev, "%5d %5X %5X %016llX %04X %p %016llX\n",
+			    n, tx_ring->next_to_use, tx_ring->next_to_clean,
+			    (u64)dma_unmap_addr(buffer_info, dma),
+			    dma_unmap_len(buffer_info, len),
+			    buffer_info->next_to_watch,
+			    (u64)buffer_info->time_stamp);
+	}
+
+	/* Print TX Rings */
+	if (!netif_msg_tx_done(adapter))
+		goto rx_ring_summary;
+
+	netdev_info(netdev, "TX Rings Dump\n");
+
+	/* Transmit Descriptor Formats
+	 *
+	 * Advanced Transmit Descriptor
+	 *   +--------------------------------------------------------------+
+	 * 0 |         Buffer Address [63:0]                                |
+	 *   +--------------------------------------------------------------+
+	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
+	 *   +--------------------------------------------------------------+
+	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
+	 */
+
+	for (n = 0; n < adapter->num_tx_queues; n++) {
+		tx_ring = adapter->tx_ring[n];
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "TX QUEUE INDEX = %d\n",
+			    tx_ring->queue_index);
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
+
+		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
+			const char *next_desc;
+			struct igc_tx_buffer *buffer_info;
+
+			tx_desc = IGC_TX_DESC(tx_ring, i);
+			buffer_info = &tx_ring->tx_buffer_info[i];
+			u0 = (struct my_u0 *)tx_desc;
+			if (i == tx_ring->next_to_use &&
+			    i == tx_ring->next_to_clean)
+				next_desc = " NTC/U";
+			else if (i == tx_ring->next_to_use)
+				next_desc = " NTU";
+			else if (i == tx_ring->next_to_clean)
+				next_desc = " NTC";
+			else
+				next_desc = "";
+
+			netdev_info(netdev, "T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
+				    i, le64_to_cpu(u0->a),
+				    le64_to_cpu(u0->b),
+				    (u64)dma_unmap_addr(buffer_info, dma),
+				    dma_unmap_len(buffer_info, len),
+				    buffer_info->next_to_watch,
+				    (u64)buffer_info->time_stamp,
+				    buffer_info->skb, next_desc);
+
+			if (netif_msg_pktdata(adapter) && buffer_info->skb)
+				print_hex_dump(KERN_INFO, "",
+					       DUMP_PREFIX_ADDRESS,
+					       16, 1, buffer_info->skb->data,
+					       dma_unmap_len(buffer_info, len),
+					       true);
+		}
+	}
+
+	/* Print RX Rings Summary */
+rx_ring_summary:
+	netdev_info(netdev, "RX Rings Summary\n");
+	netdev_info(netdev, "Queue [NTU] [NTC]\n");
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		netdev_info(netdev, "%5d %5X %5X\n", n, rx_ring->next_to_use,
+			    rx_ring->next_to_clean);
+	}
+
+	/* Print RX Rings */
+	if (!netif_msg_rx_status(adapter))
+		goto exit;
+
+	netdev_info(netdev, "RX Rings Dump\n");
+
+	/* Advanced Receive Descriptor (Read) Format
+	 *    63                                           1        0
+	 *    +-----------------------------------------------------+
+	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
+	 *    +----------------------------------------------+------+
+	 *  8 |       Header Buffer Address [63:1]           |  DD  |
+	 *    +-----------------------------------------------------+
+	 *
+	 *
+	 * Advanced Receive Descriptor (Write-Back) Format
+	 *
+	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
+	 *   +------------------------------------------------------+
+	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
+	 *   | Checksum   Ident  |   |           |    | Type | Type |
+	 *   +------------------------------------------------------+
+	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
+	 *   +------------------------------------------------------+
+	 *   63       48 47    32 31            20 19               0
+	 */
+
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "RX QUEUE INDEX = %d\n",
+			    rx_ring->queue_index);
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
+		netdev_info(netdev, "RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
+
+		for (i = 0; i < rx_ring->count; i++) {
+			const char *next_desc;
+			struct igc_rx_buffer *buffer_info;
+
+			buffer_info = &rx_ring->rx_buffer_info[i];
+			rx_desc = IGC_RX_DESC(rx_ring, i);
+			u0 = (struct my_u0 *)rx_desc;
+			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+
+			if (i == rx_ring->next_to_use)
+				next_desc = " NTU";
+			else if (i == rx_ring->next_to_clean)
+				next_desc = " NTC";
+			else
+				next_desc = "";
+
+			if (staterr & IGC_RXD_STAT_DD) {
+				/* Descriptor Done */
+				netdev_info(netdev, "%s[0x%03X]     %016llX %016llX ---------------- %s\n",
+					    "RWB", i,
+					    le64_to_cpu(u0->a),
+					    le64_to_cpu(u0->b),
+					    next_desc);
+			} else {
+				netdev_info(netdev, "%s[0x%03X]     %016llX %016llX %016llX %s\n",
+					    "R  ", i,
+					    le64_to_cpu(u0->a),
+					    le64_to_cpu(u0->b),
+					    (u64)buffer_info->dma,
+					    next_desc);
+
+				if (netif_msg_pktdata(adapter) &&
+				    buffer_info->dma && buffer_info->page) {
+					print_hex_dump(KERN_INFO, "",
+						       DUMP_PREFIX_ADDRESS,
+						       16, 1,
+						       page_address
+						       (buffer_info->page) +
+						       buffer_info->page_offset,
+						       igc_rx_bufsz(rx_ring),
+						       true);
+				}
+			}
+		}
+	}
+
+exit:
+	return;
+}
+
+/* igc_regs_dump - registers dump */
+void igc_regs_dump(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	struct igc_reg_info *reginfo;
+
+	/* Print Registers */
+	netdev_info(adapter->netdev, "Register Dump\n");
+	netdev_info(adapter->netdev, "Register Name   Value\n");
+	for (reginfo = (struct igc_reg_info *)igc_reg_info_tbl;
+	     reginfo->name; reginfo++) {
+		igc_regdump(hw, reginfo);
+	}
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
new file mode 100644
index 0000000000..f7284fa432
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -0,0 +1,2041 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+/* ethtool support for igc */
+#include <linux/if_vlan.h>
+#include <linux/pm_runtime.h>
+#include <linux/mdio.h>
+
+#include "igc.h"
+#include "igc_diag.h"
+
+/* forward declaration */
+struct igc_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define IGC_STAT(_name, _stat) { \
+	.stat_string = _name, \
+	.sizeof_stat = sizeof_field(struct igc_adapter, _stat), \
+	.stat_offset = offsetof(struct igc_adapter, _stat) \
+}
+
+static const struct igc_stats igc_gstrings_stats[] = {
+	IGC_STAT("rx_packets", stats.gprc),
+	IGC_STAT("tx_packets", stats.gptc),
+	IGC_STAT("rx_bytes", stats.gorc),
+	IGC_STAT("tx_bytes", stats.gotc),
+	IGC_STAT("rx_broadcast", stats.bprc),
+	IGC_STAT("tx_broadcast", stats.bptc),
+	IGC_STAT("rx_multicast", stats.mprc),
+	IGC_STAT("tx_multicast", stats.mptc),
+	IGC_STAT("multicast", stats.mprc),
+	IGC_STAT("collisions", stats.colc),
+	IGC_STAT("rx_crc_errors", stats.crcerrs),
+	IGC_STAT("rx_no_buffer_count", stats.rnbc),
+	IGC_STAT("rx_missed_errors", stats.mpc),
+	IGC_STAT("tx_aborted_errors", stats.ecol),
+	IGC_STAT("tx_carrier_errors", stats.tncrs),
+	IGC_STAT("tx_window_errors", stats.latecol),
+	IGC_STAT("tx_abort_late_coll", stats.latecol),
+	IGC_STAT("tx_deferred_ok", stats.dc),
+	IGC_STAT("tx_single_coll_ok", stats.scc),
+	IGC_STAT("tx_multi_coll_ok", stats.mcc),
+	IGC_STAT("tx_timeout_count", tx_timeout_count),
+	IGC_STAT("rx_long_length_errors", stats.roc),
+	IGC_STAT("rx_short_length_errors", stats.ruc),
+	IGC_STAT("rx_align_errors", stats.algnerrc),
+	IGC_STAT("tx_tcp_seg_good", stats.tsctc),
+	IGC_STAT("tx_tcp_seg_failed", stats.tsctfc),
+	IGC_STAT("rx_flow_control_xon", stats.xonrxc),
+	IGC_STAT("rx_flow_control_xoff", stats.xoffrxc),
+	IGC_STAT("tx_flow_control_xon", stats.xontxc),
+	IGC_STAT("tx_flow_control_xoff", stats.xofftxc),
+	IGC_STAT("rx_long_byte_count", stats.gorc),
+	IGC_STAT("tx_dma_out_of_sync", stats.doosync),
+	IGC_STAT("tx_smbus", stats.mgptc),
+	IGC_STAT("rx_smbus", stats.mgprc),
+	IGC_STAT("dropped_smbus", stats.mgpdc),
+	IGC_STAT("os2bmc_rx_by_bmc", stats.o2bgptc),
+	IGC_STAT("os2bmc_tx_by_bmc", stats.b2ospc),
+	IGC_STAT("os2bmc_tx_by_host", stats.o2bspc),
+	IGC_STAT("os2bmc_rx_by_host", stats.b2ogprc),
+	IGC_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts),
+	IGC_STAT("tx_hwtstamp_skipped", tx_hwtstamp_skipped),
+	IGC_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared),
+	IGC_STAT("tx_lpi_counter", stats.tlpic),
+	IGC_STAT("rx_lpi_counter", stats.rlpic),
+	IGC_STAT("qbv_config_change_errors", qbv_config_change_errors),
+};
+
+#define IGC_NETDEV_STAT(_net_stat) { \
+	.stat_string = __stringify(_net_stat), \
+	.sizeof_stat = sizeof_field(struct rtnl_link_stats64, _net_stat), \
+	.stat_offset = offsetof(struct rtnl_link_stats64, _net_stat) \
+}
+
+static const struct igc_stats igc_gstrings_net_stats[] = {
+	IGC_NETDEV_STAT(rx_errors),
+	IGC_NETDEV_STAT(tx_errors),
+	IGC_NETDEV_STAT(tx_dropped),
+	IGC_NETDEV_STAT(rx_length_errors),
+	IGC_NETDEV_STAT(rx_over_errors),
+	IGC_NETDEV_STAT(rx_frame_errors),
+	IGC_NETDEV_STAT(rx_fifo_errors),
+	IGC_NETDEV_STAT(tx_fifo_errors),
+	IGC_NETDEV_STAT(tx_heartbeat_errors)
+};
+
+enum igc_diagnostics_results {
+	TEST_REG = 0,
+	TEST_EEP,
+	TEST_IRQ,
+	TEST_LOOP,
+	TEST_LINK
+};
+
+static const char igc_gstrings_test[][ETH_GSTRING_LEN] = {
+	[TEST_REG]  = "Register test  (offline)",
+	[TEST_EEP]  = "Eeprom test    (offline)",
+	[TEST_IRQ]  = "Interrupt test (offline)",
+	[TEST_LOOP] = "Loopback test  (offline)",
+	[TEST_LINK] = "Link test   (on/offline)"
+};
+
+#define IGC_TEST_LEN (sizeof(igc_gstrings_test) / ETH_GSTRING_LEN)
+
+#define IGC_GLOBAL_STATS_LEN	\
+	(sizeof(igc_gstrings_stats) / sizeof(struct igc_stats))
+#define IGC_NETDEV_STATS_LEN	\
+	(sizeof(igc_gstrings_net_stats) / sizeof(struct igc_stats))
+#define IGC_RX_QUEUE_STATS_LEN \
+	(sizeof(struct igc_rx_queue_stats) / sizeof(u64))
+#define IGC_TX_QUEUE_STATS_LEN 3 /* packets, bytes, restart_queue */
+#define IGC_QUEUE_STATS_LEN \
+	((((struct igc_adapter *)netdev_priv(netdev))->num_rx_queues * \
+	  IGC_RX_QUEUE_STATS_LEN) + \
+	 (((struct igc_adapter *)netdev_priv(netdev))->num_tx_queues * \
+	  IGC_TX_QUEUE_STATS_LEN))
+#define IGC_STATS_LEN \
+	(IGC_GLOBAL_STATS_LEN + IGC_NETDEV_STATS_LEN + IGC_QUEUE_STATS_LEN)
+
+static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IGC_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings)
+
+static void igc_ethtool_get_drvinfo(struct net_device *netdev,
+				    struct ethtool_drvinfo *drvinfo)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u16 nvm_version = 0;
+	u16 gphy_version;
+
+	strscpy(drvinfo->driver, igc_driver_name, sizeof(drvinfo->driver));
+
+	/* NVM image version is reported as firmware version for i225 device */
+	hw->nvm.ops.read(hw, IGC_NVM_DEV_STARTER, 1, &nvm_version);
+
+	/* gPHY firmware version is reported as PHY FW version */
+	gphy_version = igc_read_phy_fw_version(hw);
+
+	scnprintf(adapter->fw_version,
+		  sizeof(adapter->fw_version),
+		  "%x:%x",
+		  nvm_version,
+		  gphy_version);
+
+	strscpy(drvinfo->fw_version, adapter->fw_version,
+		sizeof(drvinfo->fw_version));
+
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN;
+}
+
+static int igc_ethtool_get_regs_len(struct net_device *netdev)
+{
+	return IGC_REGS_LEN * sizeof(u32);
+}
+
+static void igc_ethtool_get_regs(struct net_device *netdev,
+				 struct ethtool_regs *regs, void *p)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u8 i;
+
+	memset(p, 0, IGC_REGS_LEN * sizeof(u32));
+
+	regs->version = (2u << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = rd32(IGC_CTRL);
+	regs_buff[1] = rd32(IGC_STATUS);
+	regs_buff[2] = rd32(IGC_CTRL_EXT);
+	regs_buff[3] = rd32(IGC_MDIC);
+	regs_buff[4] = rd32(IGC_CONNSW);
+
+	/* NVM Register */
+	regs_buff[5] = rd32(IGC_EECD);
+
+	/* Interrupt */
+	/* Reading EICS for EICR because they read the
+	 * same but EICS does not clear on read
+	 */
+	regs_buff[6] = rd32(IGC_EICS);
+	regs_buff[7] = rd32(IGC_EICS);
+	regs_buff[8] = rd32(IGC_EIMS);
+	regs_buff[9] = rd32(IGC_EIMC);
+	regs_buff[10] = rd32(IGC_EIAC);
+	regs_buff[11] = rd32(IGC_EIAM);
+	/* Reading ICS for ICR because they read the
+	 * same but ICS does not clear on read
+	 */
+	regs_buff[12] = rd32(IGC_ICS);
+	regs_buff[13] = rd32(IGC_ICS);
+	regs_buff[14] = rd32(IGC_IMS);
+	regs_buff[15] = rd32(IGC_IMC);
+	regs_buff[16] = rd32(IGC_IAC);
+	regs_buff[17] = rd32(IGC_IAM);
+
+	/* Flow Control */
+	regs_buff[18] = rd32(IGC_FCAL);
+	regs_buff[19] = rd32(IGC_FCAH);
+	regs_buff[20] = rd32(IGC_FCTTV);
+	regs_buff[21] = rd32(IGC_FCRTL);
+	regs_buff[22] = rd32(IGC_FCRTH);
+	regs_buff[23] = rd32(IGC_FCRTV);
+
+	/* Receive */
+	regs_buff[24] = rd32(IGC_RCTL);
+	regs_buff[25] = rd32(IGC_RXCSUM);
+	regs_buff[26] = rd32(IGC_RLPML);
+	regs_buff[27] = rd32(IGC_RFCTL);
+
+	/* Transmit */
+	regs_buff[28] = rd32(IGC_TCTL);
+	regs_buff[29] = rd32(IGC_TIPG);
+
+	/* Wake Up */
+
+	/* MAC */
+
+	/* Statistics */
+	regs_buff[30] = adapter->stats.crcerrs;
+	regs_buff[31] = adapter->stats.algnerrc;
+	regs_buff[32] = adapter->stats.symerrs;
+	regs_buff[33] = adapter->stats.rxerrc;
+	regs_buff[34] = adapter->stats.mpc;
+	regs_buff[35] = adapter->stats.scc;
+	regs_buff[36] = adapter->stats.ecol;
+	regs_buff[37] = adapter->stats.mcc;
+	regs_buff[38] = adapter->stats.latecol;
+	regs_buff[39] = adapter->stats.colc;
+	regs_buff[40] = adapter->stats.dc;
+	regs_buff[41] = adapter->stats.tncrs;
+	regs_buff[42] = adapter->stats.sec;
+	regs_buff[43] = adapter->stats.htdpmc;
+	regs_buff[44] = adapter->stats.rlec;
+	regs_buff[45] = adapter->stats.xonrxc;
+	regs_buff[46] = adapter->stats.xontxc;
+	regs_buff[47] = adapter->stats.xoffrxc;
+	regs_buff[48] = adapter->stats.xofftxc;
+	regs_buff[49] = adapter->stats.fcruc;
+	regs_buff[50] = adapter->stats.prc64;
+	regs_buff[51] = adapter->stats.prc127;
+	regs_buff[52] = adapter->stats.prc255;
+	regs_buff[53] = adapter->stats.prc511;
+	regs_buff[54] = adapter->stats.prc1023;
+	regs_buff[55] = adapter->stats.prc1522;
+	regs_buff[56] = adapter->stats.gprc;
+	regs_buff[57] = adapter->stats.bprc;
+	regs_buff[58] = adapter->stats.mprc;
+	regs_buff[59] = adapter->stats.gptc;
+	regs_buff[60] = adapter->stats.gorc;
+	regs_buff[61] = adapter->stats.gotc;
+	regs_buff[62] = adapter->stats.rnbc;
+	regs_buff[63] = adapter->stats.ruc;
+	regs_buff[64] = adapter->stats.rfc;
+	regs_buff[65] = adapter->stats.roc;
+	regs_buff[66] = adapter->stats.rjc;
+	regs_buff[67] = adapter->stats.mgprc;
+	regs_buff[68] = adapter->stats.mgpdc;
+	regs_buff[69] = adapter->stats.mgptc;
+	regs_buff[70] = adapter->stats.tor;
+	regs_buff[71] = adapter->stats.tot;
+	regs_buff[72] = adapter->stats.tpr;
+	regs_buff[73] = adapter->stats.tpt;
+	regs_buff[74] = adapter->stats.ptc64;
+	regs_buff[75] = adapter->stats.ptc127;
+	regs_buff[76] = adapter->stats.ptc255;
+	regs_buff[77] = adapter->stats.ptc511;
+	regs_buff[78] = adapter->stats.ptc1023;
+	regs_buff[79] = adapter->stats.ptc1522;
+	regs_buff[80] = adapter->stats.mptc;
+	regs_buff[81] = adapter->stats.bptc;
+	regs_buff[82] = adapter->stats.tsctc;
+	regs_buff[83] = adapter->stats.iac;
+	regs_buff[84] = adapter->stats.rpthc;
+	regs_buff[85] = adapter->stats.hgptc;
+	regs_buff[86] = adapter->stats.hgorc;
+	regs_buff[87] = adapter->stats.hgotc;
+	regs_buff[88] = adapter->stats.lenerrs;
+	regs_buff[89] = adapter->stats.scvpc;
+	regs_buff[90] = adapter->stats.hrmpc;
+
+	for (i = 0; i < 4; i++)
+		regs_buff[91 + i] = rd32(IGC_SRRCTL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[95 + i] = rd32(IGC_PSRTYPE(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[99 + i] = rd32(IGC_RDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[103 + i] = rd32(IGC_RDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[107 + i] = rd32(IGC_RDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[111 + i] = rd32(IGC_RDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[115 + i] = rd32(IGC_RDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[119 + i] = rd32(IGC_RXDCTL(i));
+
+	for (i = 0; i < 10; i++)
+		regs_buff[123 + i] = rd32(IGC_EITR(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[139 + i] = rd32(IGC_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[145 + i] = rd32(IGC_RAH(i));
+
+	for (i = 0; i < 4; i++)
+		regs_buff[149 + i] = rd32(IGC_TDBAL(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[152 + i] = rd32(IGC_TDBAH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[156 + i] = rd32(IGC_TDLEN(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[160 + i] = rd32(IGC_TDH(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[164 + i] = rd32(IGC_TDT(i));
+	for (i = 0; i < 4; i++)
+		regs_buff[168 + i] = rd32(IGC_TXDCTL(i));
+
+	/* XXX: Due to a bug few lines above, RAL and RAH registers are
+	 * overwritten. To preserve the ABI, we write these registers again in
+	 * regs_buff.
+	 */
+	for (i = 0; i < 16; i++)
+		regs_buff[172 + i] = rd32(IGC_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[188 + i] = rd32(IGC_RAH(i));
+
+	regs_buff[204] = rd32(IGC_VLANPQF);
+
+	for (i = 0; i < 8; i++)
+		regs_buff[205 + i] = rd32(IGC_ETQF(i));
+
+	regs_buff[213] = adapter->stats.tlpic;
+	regs_buff[214] = adapter->stats.rlpic;
+}
+
+static void igc_ethtool_get_wol(struct net_device *netdev,
+				struct ethtool_wolinfo *wol)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	wol->wolopts = 0;
+
+	if (!(adapter->flags & IGC_FLAG_WOL_SUPPORTED))
+		return;
+
+	wol->supported = WAKE_UCAST | WAKE_MCAST |
+			 WAKE_BCAST | WAKE_MAGIC |
+			 WAKE_PHY;
+
+	/* apply any specific unsupported masks here */
+	switch (adapter->hw.device_id) {
+	default:
+		break;
+	}
+
+	if (adapter->wol & IGC_WUFC_EX)
+		wol->wolopts |= WAKE_UCAST;
+	if (adapter->wol & IGC_WUFC_MC)
+		wol->wolopts |= WAKE_MCAST;
+	if (adapter->wol & IGC_WUFC_BC)
+		wol->wolopts |= WAKE_BCAST;
+	if (adapter->wol & IGC_WUFC_MAG)
+		wol->wolopts |= WAKE_MAGIC;
+	if (adapter->wol & IGC_WUFC_LNKC)
+		wol->wolopts |= WAKE_PHY;
+}
+
+static int igc_ethtool_set_wol(struct net_device *netdev,
+			       struct ethtool_wolinfo *wol)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE | WAKE_FILTER))
+		return -EOPNOTSUPP;
+
+	if (!(adapter->flags & IGC_FLAG_WOL_SUPPORTED))
+		return wol->wolopts ? -EOPNOTSUPP : 0;
+
+	/* these settings will always override what we currently have */
+	adapter->wol = 0;
+
+	if (wol->wolopts & WAKE_UCAST)
+		adapter->wol |= IGC_WUFC_EX;
+	if (wol->wolopts & WAKE_MCAST)
+		adapter->wol |= IGC_WUFC_MC;
+	if (wol->wolopts & WAKE_BCAST)
+		adapter->wol |= IGC_WUFC_BC;
+	if (wol->wolopts & WAKE_MAGIC)
+		adapter->wol |= IGC_WUFC_MAG;
+	if (wol->wolopts & WAKE_PHY)
+		adapter->wol |= IGC_WUFC_LNKC;
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	return 0;
+}
+
+static u32 igc_ethtool_get_msglevel(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void igc_ethtool_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = data;
+}
+
+static int igc_ethtool_nway_reset(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		igc_reinit_locked(adapter);
+	return 0;
+}
+
+static u32 igc_ethtool_get_link(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_mac_info *mac = &adapter->hw.mac;
+
+	/* If the link is not reported up to netdev, interrupts are disabled,
+	 * and so the physical link state may have changed since we last
+	 * looked. Set get_link_status to make sure that the true link
+	 * state is interrogated, rather than pulling a cached and possibly
+	 * stale link state from the driver.
+	 */
+	if (!netif_carrier_ok(netdev))
+		mac->get_link_status = 1;
+
+	return igc_has_link(adapter);
+}
+
+static int igc_ethtool_get_eeprom_len(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->hw.nvm.word_size * 2;
+}
+
+static int igc_ethtool_get_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int first_word, last_word;
+	u16 *eeprom_buff;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				    GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	if (hw->nvm.type == igc_nvm_eeprom_spi) {
+		ret_val = hw->nvm.ops.read(hw, first_word,
+					   last_word - first_word + 1,
+					   eeprom_buff);
+	} else {
+		for (i = 0; i < last_word - first_word + 1; i++) {
+			ret_val = hw->nvm.ops.read(hw, first_word + i, 1,
+						   &eeprom_buff[i]);
+			if (ret_val)
+				break;
+		}
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1),
+	       eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int igc_ethtool_set_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 *eeprom_buff;
+	void *ptr;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EOPNOTSUPP;
+
+	if (hw->mac.type >= igc_i225 &&
+	    !igc_get_flash_presence_i225(hw)) {
+		return -EOPNOTSUPP;
+	}
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EFAULT;
+
+	max_len = hw->nvm.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = (void *)eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/* need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, first_word, 1,
+					    &eeprom_buff[0]);
+		ptr++;
+	}
+	if (((eeprom->offset + eeprom->len) & 1) && ret_val == 0) {
+		/* need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = hw->nvm.ops.read(hw, last_word, 1,
+				   &eeprom_buff[last_word - first_word]);
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = hw->nvm.ops.write(hw, first_word,
+				    last_word - first_word + 1, eeprom_buff);
+
+	/* Update the checksum if nvm write succeeded */
+	if (ret_val == 0)
+		hw->nvm.ops.update(hw);
+
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void
+igc_ethtool_get_ringparam(struct net_device *netdev,
+			  struct ethtool_ringparam *ring,
+			  struct kernel_ethtool_ringparam *kernel_ering,
+			  struct netlink_ext_ack *extack)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IGC_MAX_RXD;
+	ring->tx_max_pending = IGC_MAX_TXD;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+}
+
+static int
+igc_ethtool_set_ringparam(struct net_device *netdev,
+			  struct ethtool_ringparam *ring,
+			  struct kernel_ethtool_ringparam *kernel_ering,
+			  struct netlink_ext_ack *extack)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_ring *temp_ring;
+	u16 new_rx_count, new_tx_count;
+	int i, err = 0;
+
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending)
+		return -EINVAL;
+
+	new_rx_count = min_t(u32, ring->rx_pending, IGC_MAX_RXD);
+	new_rx_count = max_t(u16, new_rx_count, IGC_MIN_RXD);
+	new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	new_tx_count = min_t(u32, ring->tx_pending, IGC_MAX_TXD);
+	new_tx_count = max_t(u16, new_tx_count, IGC_MIN_TXD);
+	new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	if (new_tx_count == adapter->tx_ring_count &&
+	    new_rx_count == adapter->rx_ring_count) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	if (adapter->num_tx_queues > adapter->num_rx_queues)
+		temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
+					       adapter->num_tx_queues));
+	else
+		temp_ring = vmalloc(array_size(sizeof(struct igc_ring),
+					       adapter->num_rx_queues));
+
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	igc_down(adapter);
+
+	/* We can't just free everything and then setup again,
+	 * because the ISRs in MSI-X mode get passed pointers
+	 * to the Tx and Rx ring structs.
+	 */
+	if (new_tx_count != adapter->tx_ring_count) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->tx_ring[i],
+			       sizeof(struct igc_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = igc_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igc_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			igc_free_tx_resources(adapter->tx_ring[i]);
+
+			memcpy(adapter->tx_ring[i], &temp_ring[i],
+			       sizeof(struct igc_ring));
+		}
+
+		adapter->tx_ring_count = new_tx_count;
+	}
+
+	if (new_rx_count != adapter->rx_ring_count) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->rx_ring[i],
+			       sizeof(struct igc_ring));
+
+			temp_ring[i].count = new_rx_count;
+			err = igc_setup_rx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					igc_free_rx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			igc_free_rx_resources(adapter->rx_ring[i]);
+
+			memcpy(adapter->rx_ring[i], &temp_ring[i],
+			       sizeof(struct igc_ring));
+		}
+
+		adapter->rx_ring_count = new_rx_count;
+	}
+err_setup:
+	igc_up(adapter);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__IGC_RESETTING, &adapter->state);
+	return err;
+}
+
+static void igc_ethtool_get_pauseparam(struct net_device *netdev,
+				       struct ethtool_pauseparam *pause)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+
+	pause->autoneg =
+		(adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+	if (hw->fc.current_mode == igc_fc_rx_pause) {
+		pause->rx_pause = 1;
+	} else if (hw->fc.current_mode == igc_fc_tx_pause) {
+		pause->tx_pause = 1;
+	} else if (hw->fc.current_mode == igc_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int igc_ethtool_set_pauseparam(struct net_device *netdev,
+				      struct ethtool_pauseparam *pause)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	int retval = 0;
+
+	adapter->fc_autoneg = pause->autoneg;
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (adapter->fc_autoneg == AUTONEG_ENABLE) {
+		hw->fc.requested_mode = igc_fc_default;
+		if (netif_running(adapter->netdev)) {
+			igc_down(adapter);
+			igc_up(adapter);
+		} else {
+			igc_reset(adapter);
+		}
+	} else {
+		if (pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_full;
+		else if (pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_rx_pause;
+		else if (!pause->rx_pause && pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_tx_pause;
+		else if (!pause->rx_pause && !pause->tx_pause)
+			hw->fc.requested_mode = igc_fc_none;
+
+		hw->fc.current_mode = hw->fc.requested_mode;
+
+		retval = ((hw->phy.media_type == igc_media_type_copper) ?
+			  igc_force_mac_fc(hw) : igc_setup_link(hw));
+	}
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+	return retval;
+}
+
+static void igc_ethtool_get_strings(struct net_device *netdev, u32 stringset,
+				    u8 *data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, *igc_gstrings_test,
+		       IGC_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p, igc_gstrings_stats[i].stat_string);
+		for (i = 0; i < IGC_NETDEV_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					igc_gstrings_net_stats[i].stat_string);
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "tx_queue_%u_restart", i);
+		}
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+			ethtool_sprintf(&p, "rx_queue_%u_drops", i);
+			ethtool_sprintf(&p, "rx_queue_%u_csum_err", i);
+			ethtool_sprintf(&p, "rx_queue_%u_alloc_failed", i);
+		}
+		/* BUG_ON(p - data != IGC_STATS_LEN * ETH_GSTRING_LEN); */
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, igc_priv_flags_strings,
+		       IGC_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return IGC_STATS_LEN;
+	case ETH_SS_TEST:
+		return IGC_TEST_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IGC_PRIV_FLAGS_STR_LEN;
+	default:
+		return -ENOTSUPP;
+	}
+}
+
+static void igc_ethtool_get_stats(struct net_device *netdev,
+				  struct ethtool_stats *stats, u64 *data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
+	unsigned int start;
+	struct igc_ring *ring;
+	int i, j;
+	char *p;
+
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+
+	for (i = 0; i < IGC_GLOBAL_STATS_LEN; i++) {
+		p = (char *)adapter + igc_gstrings_stats[i].stat_offset;
+		data[i] = (igc_gstrings_stats[i].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < IGC_NETDEV_STATS_LEN; j++, i++) {
+		p = (char *)net_stats + igc_gstrings_net_stats[j].stat_offset;
+		data[i] = (igc_gstrings_net_stats[j].sizeof_stat ==
+			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		u64	restart2;
+
+		ring = adapter->tx_ring[j];
+		do {
+			start = u64_stats_fetch_begin(&ring->tx_syncp);
+			data[i]   = ring->tx_stats.packets;
+			data[i + 1] = ring->tx_stats.bytes;
+			data[i + 2] = ring->tx_stats.restart_queue;
+		} while (u64_stats_fetch_retry(&ring->tx_syncp, start));
+		do {
+			start = u64_stats_fetch_begin(&ring->tx_syncp2);
+			restart2  = ring->tx_stats.restart_queue2;
+		} while (u64_stats_fetch_retry(&ring->tx_syncp2, start));
+		data[i + 2] += restart2;
+
+		i += IGC_TX_QUEUE_STATS_LEN;
+	}
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		ring = adapter->rx_ring[j];
+		do {
+			start = u64_stats_fetch_begin(&ring->rx_syncp);
+			data[i]   = ring->rx_stats.packets;
+			data[i + 1] = ring->rx_stats.bytes;
+			data[i + 2] = ring->rx_stats.drops;
+			data[i + 3] = ring->rx_stats.csum_err;
+			data[i + 4] = ring->rx_stats.alloc_failed;
+		} while (u64_stats_fetch_retry(&ring->rx_syncp, start));
+		i += IGC_RX_QUEUE_STATS_LEN;
+	}
+	spin_unlock(&adapter->stats64_lock);
+}
+
+static int igc_ethtool_get_previous_rx_coalesce(struct igc_adapter *adapter)
+{
+	return (adapter->rx_itr_setting <= 3) ?
+		adapter->rx_itr_setting : adapter->rx_itr_setting >> 2;
+}
+
+static int igc_ethtool_get_previous_tx_coalesce(struct igc_adapter *adapter)
+{
+	return (adapter->tx_itr_setting <= 3) ?
+		adapter->tx_itr_setting : adapter->tx_itr_setting >> 2;
+}
+
+static int igc_ethtool_get_coalesce(struct net_device *netdev,
+				    struct ethtool_coalesce *ec,
+				    struct kernel_ethtool_coalesce *kernel_coal,
+				    struct netlink_ext_ack *extack)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	ec->rx_coalesce_usecs = igc_ethtool_get_previous_rx_coalesce(adapter);
+	ec->tx_coalesce_usecs = igc_ethtool_get_previous_tx_coalesce(adapter);
+
+	return 0;
+}
+
+static int igc_ethtool_set_coalesce(struct net_device *netdev,
+				    struct ethtool_coalesce *ec,
+				    struct kernel_ethtool_coalesce *kernel_coal,
+				    struct netlink_ext_ack *extack)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (ec->rx_coalesce_usecs > IGC_MAX_ITR_USECS ||
+	    (ec->rx_coalesce_usecs > 3 &&
+	     ec->rx_coalesce_usecs < IGC_MIN_ITR_USECS) ||
+	    ec->rx_coalesce_usecs == 2)
+		return -EINVAL;
+
+	if (ec->tx_coalesce_usecs > IGC_MAX_ITR_USECS ||
+	    (ec->tx_coalesce_usecs > 3 &&
+	     ec->tx_coalesce_usecs < IGC_MIN_ITR_USECS) ||
+	    ec->tx_coalesce_usecs == 2)
+		return -EINVAL;
+
+	if ((adapter->flags & IGC_FLAG_QUEUE_PAIRS) &&
+	    ec->tx_coalesce_usecs != igc_ethtool_get_previous_tx_coalesce(adapter)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Queue Pair mode enabled, both Rx and Tx coalescing controlled by rx-usecs");
+		return -EINVAL;
+	}
+
+	/* If ITR is disabled, disable DMAC */
+	if (ec->rx_coalesce_usecs == 0) {
+		if (adapter->flags & IGC_FLAG_DMAC)
+			adapter->flags &= ~IGC_FLAG_DMAC;
+	}
+
+	/* convert to rate of irq's per second */
+	if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+
+	/* convert to rate of irq's per second */
+	if (adapter->flags & IGC_FLAG_QUEUE_PAIRS)
+		adapter->tx_itr_setting = adapter->rx_itr_setting;
+	else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		struct igc_q_vector *q_vector = adapter->q_vector[i];
+
+		q_vector->tx.work_limit = adapter->tx_work_limit;
+		if (q_vector->rx.ring)
+			q_vector->itr_val = adapter->rx_itr_setting;
+		else
+			q_vector->itr_val = adapter->tx_itr_setting;
+		if (q_vector->itr_val && q_vector->itr_val <= 3)
+			q_vector->itr_val = IGC_START_ITR;
+		q_vector->set_itr = 1;
+	}
+
+	return 0;
+}
+
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+#define VLAN_TCI_FULL_MASK ((__force __be16)~0)
+static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct igc_nfc_rule *rule = NULL;
+
+	cmd->data = IGC_MAX_RXNFC_RULES;
+
+	mutex_lock(&adapter->nfc_rule_lock);
+
+	rule = igc_get_nfc_rule(adapter, fsp->location);
+	if (!rule)
+		goto out;
+
+	fsp->flow_type = ETHER_FLOW;
+	fsp->ring_cookie = rule->action;
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		fsp->h_u.ether_spec.h_proto = htons(rule->filter.etype);
+		fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) {
+		fsp->flow_type |= FLOW_EXT;
+		fsp->h_ext.vlan_etype = rule->filter.vlan_etype;
+		fsp->m_ext.vlan_etype = ETHER_TYPE_FULL_MASK;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		fsp->flow_type |= FLOW_EXT;
+		fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci);
+		fsp->m_ext.vlan_tci = htons(rule->filter.vlan_tci_mask);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+		ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+				rule->filter.dst_addr);
+		eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		ether_addr_copy(fsp->h_u.ether_spec.h_source,
+				rule->filter.src_addr);
+		eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) {
+		fsp->flow_type |= FLOW_EXT;
+		memcpy(fsp->h_ext.data, rule->filter.user_data, sizeof(fsp->h_ext.data));
+		memcpy(fsp->m_ext.data, rule->filter.user_mask, sizeof(fsp->m_ext.data));
+	}
+
+	mutex_unlock(&adapter->nfc_rule_lock);
+	return 0;
+
+out:
+	mutex_unlock(&adapter->nfc_rule_lock);
+	return -EINVAL;
+}
+
+static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter,
+				     struct ethtool_rxnfc *cmd,
+				     u32 *rule_locs)
+{
+	struct igc_nfc_rule *rule;
+	int cnt = 0;
+
+	cmd->data = IGC_MAX_RXNFC_RULES;
+
+	mutex_lock(&adapter->nfc_rule_lock);
+
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
+		if (cnt == cmd->rule_cnt) {
+			mutex_unlock(&adapter->nfc_rule_lock);
+			return -EMSGSIZE;
+		}
+		rule_locs[cnt] = rule->location;
+		cnt++;
+	}
+
+	mutex_unlock(&adapter->nfc_rule_lock);
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+static int igc_ethtool_get_rss_hash_opts(struct igc_adapter *adapter,
+					 struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* Report default options for RSS on igc */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case UDP_V4_FLOW:
+		if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case UDP_V6_FLOW:
+		if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_get_rxnfc(struct net_device *dev,
+				 struct ethtool_rxnfc *cmd, u32 *rule_locs)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = adapter->num_rx_queues;
+		return 0;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->nfc_rule_count;
+		return 0;
+	case ETHTOOL_GRXCLSRULE:
+		return igc_ethtool_get_nfc_rule(adapter, cmd);
+	case ETHTOOL_GRXCLSRLALL:
+		return igc_ethtool_get_nfc_rules(adapter, cmd, rule_locs);
+	case ETHTOOL_GRXFH:
+		return igc_ethtool_get_rss_hash_opts(adapter, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+#define UDP_RSS_FLAGS (IGC_FLAG_RSS_FIELD_IPV4_UDP | \
+		       IGC_FLAG_RSS_FIELD_IPV6_UDP)
+static int igc_ethtool_set_rss_hash_opt(struct igc_adapter *adapter,
+					struct ethtool_rxnfc *nfc)
+{
+	u32 flags = adapter->flags;
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGC_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGC_FLAG_RSS_FIELD_IPV4_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags &= ~IGC_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags |= IGC_FLAG_RSS_FIELD_IPV6_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (flags != adapter->flags) {
+		struct igc_hw *hw = &adapter->hw;
+		u32 mrqc = rd32(IGC_MRQC);
+
+		if ((flags & UDP_RSS_FLAGS) &&
+		    !(adapter->flags & UDP_RSS_FLAGS))
+			netdev_err(adapter->netdev,
+				   "Enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+
+		adapter->flags = flags;
+
+		/* Perform hash on these packet types */
+		mrqc |= IGC_MRQC_RSS_FIELD_IPV4 |
+			IGC_MRQC_RSS_FIELD_IPV4_TCP |
+			IGC_MRQC_RSS_FIELD_IPV6 |
+			IGC_MRQC_RSS_FIELD_IPV6_TCP;
+
+		mrqc &= ~(IGC_MRQC_RSS_FIELD_IPV4_UDP |
+			  IGC_MRQC_RSS_FIELD_IPV6_UDP);
+
+		if (flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
+			mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
+
+		if (flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
+			mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
+
+		wr32(IGC_MRQC, mrqc);
+	}
+
+	return 0;
+}
+
+static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
+				      const struct ethtool_rx_flow_spec *fsp)
+{
+	INIT_LIST_HEAD(&rule->list);
+
+	rule->action = fsp->ring_cookie;
+	rule->location = fsp->location;
+
+	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
+		rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci);
+		rule->filter.vlan_tci_mask = ntohs(fsp->m_ext.vlan_tci);
+		rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
+	}
+
+	if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
+		rule->filter.etype = ntohs(fsp->h_u.ether_spec.h_proto);
+		rule->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE;
+	}
+
+	/* Both source and destination address filters only support the full
+	 * mask.
+	 */
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
+		rule->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR;
+		ether_addr_copy(rule->filter.src_addr,
+				fsp->h_u.ether_spec.h_source);
+	}
+
+	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
+		rule->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR;
+		ether_addr_copy(rule->filter.dst_addr,
+				fsp->h_u.ether_spec.h_dest);
+	}
+
+	/* VLAN etype matching */
+	if ((fsp->flow_type & FLOW_EXT) && fsp->h_ext.vlan_etype) {
+		rule->filter.vlan_etype = fsp->h_ext.vlan_etype;
+		rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_ETYPE;
+	}
+
+	/* Check for user defined data */
+	if ((fsp->flow_type & FLOW_EXT) &&
+	    (fsp->h_ext.data[0] || fsp->h_ext.data[1])) {
+		rule->filter.match_flags |= IGC_FILTER_FLAG_USER_DATA;
+		memcpy(rule->filter.user_data, fsp->h_ext.data, sizeof(fsp->h_ext.data));
+		memcpy(rule->filter.user_mask, fsp->m_ext.data, sizeof(fsp->m_ext.data));
+	}
+
+	/* The i225/i226 has various different filters. Flex filters provide a
+	 * way to match up to the first 128 bytes of a packet. Use them for:
+	 *   a) For specific user data
+	 *   b) For VLAN EtherType
+	 *   c) For full TCI match
+	 *   d) Or in case multiple filter criteria are set
+	 *
+	 * Otherwise, use the simple MAC, VLAN PRIO or EtherType filters.
+	 */
+	if ((rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA) ||
+	    (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE) ||
+	    ((rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) &&
+	     rule->filter.vlan_tci_mask == ntohs(VLAN_TCI_FULL_MASK)) ||
+	    (rule->filter.match_flags & (rule->filter.match_flags - 1)))
+		rule->flex = true;
+	else
+		rule->flex = false;
+}
+
+/**
+ * igc_ethtool_check_nfc_rule() - Check if NFC rule is valid
+ * @adapter: Pointer to adapter
+ * @rule: Rule under evaluation
+ *
+ * The driver doesn't support rules with multiple matches so if more than
+ * one bit in filter flags is set, @rule is considered invalid.
+ *
+ * Also, if there is already another rule with the same filter in a different
+ * location, @rule is considered invalid.
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter,
+				      struct igc_nfc_rule *rule)
+{
+	struct net_device *dev = adapter->netdev;
+	u8 flags = rule->filter.match_flags;
+	struct igc_nfc_rule *tmp;
+
+	if (!flags) {
+		netdev_dbg(dev, "Rule with no match\n");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(tmp, &adapter->nfc_rule_list, list) {
+		if (!memcmp(&rule->filter, &tmp->filter,
+			    sizeof(rule->filter)) &&
+		    tmp->location != rule->location) {
+			netdev_dbg(dev, "Rule already exists\n");
+			return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct igc_nfc_rule *rule, *old_rule;
+	int err;
+
+	if (!(netdev->hw_features & NETIF_F_NTUPLE)) {
+		netdev_dbg(netdev, "N-tuple filters disabled\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) {
+		netdev_dbg(netdev, "Only ethernet flow type is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (fsp->ring_cookie >= adapter->num_rx_queues) {
+		netdev_dbg(netdev, "Invalid action\n");
+		return -EINVAL;
+	}
+
+	/* There are two ways to match the VLAN TCI:
+	 *  1. Match on PCP field and use vlan prio filter for it
+	 *  2. Match on complete TCI field and use flex filter for it
+	 */
+	if ((fsp->flow_type & FLOW_EXT) &&
+	    fsp->m_ext.vlan_tci &&
+	    fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK) &&
+	    fsp->m_ext.vlan_tci != VLAN_TCI_FULL_MASK) {
+		netdev_dbg(netdev, "VLAN mask not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* VLAN EtherType can only be matched by full mask. */
+	if ((fsp->flow_type & FLOW_EXT) &&
+	    fsp->m_ext.vlan_etype &&
+	    fsp->m_ext.vlan_etype != ETHER_TYPE_FULL_MASK) {
+		netdev_dbg(netdev, "VLAN EtherType mask not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (fsp->location >= IGC_MAX_RXNFC_RULES) {
+		netdev_dbg(netdev, "Invalid location\n");
+		return -EINVAL;
+	}
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return -ENOMEM;
+
+	igc_ethtool_init_nfc_rule(rule, fsp);
+
+	mutex_lock(&adapter->nfc_rule_lock);
+
+	err = igc_ethtool_check_nfc_rule(adapter, rule);
+	if (err)
+		goto err;
+
+	old_rule = igc_get_nfc_rule(adapter, fsp->location);
+	if (old_rule)
+		igc_del_nfc_rule(adapter, old_rule);
+
+	err = igc_add_nfc_rule(adapter, rule);
+	if (err)
+		goto err;
+
+	mutex_unlock(&adapter->nfc_rule_lock);
+	return 0;
+
+err:
+	mutex_unlock(&adapter->nfc_rule_lock);
+	kfree(rule);
+	return err;
+}
+
+static int igc_ethtool_del_nfc_rule(struct igc_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct igc_nfc_rule *rule;
+
+	mutex_lock(&adapter->nfc_rule_lock);
+
+	rule = igc_get_nfc_rule(adapter, fsp->location);
+	if (!rule) {
+		mutex_unlock(&adapter->nfc_rule_lock);
+		return -EINVAL;
+	}
+
+	igc_del_nfc_rule(adapter, rule);
+
+	mutex_unlock(&adapter->nfc_rule_lock);
+	return 0;
+}
+
+static int igc_ethtool_set_rxnfc(struct net_device *dev,
+				 struct ethtool_rxnfc *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		return igc_ethtool_set_rss_hash_opt(adapter, cmd);
+	case ETHTOOL_SRXCLSRLINS:
+		return igc_ethtool_add_nfc_rule(adapter, cmd);
+	case ETHTOOL_SRXCLSRLDEL:
+		return igc_ethtool_del_nfc_rule(adapter, cmd);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+void igc_write_rss_indir_tbl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 reg = IGC_RETA(0);
+	u32 shift = 0;
+	int i = 0;
+
+	while (i < IGC_RETA_SIZE) {
+		u32 val = 0;
+		int j;
+
+		for (j = 3; j >= 0; j--) {
+			val <<= 8;
+			val |= adapter->rss_indir_tbl[i + j];
+		}
+
+		wr32(reg, val << shift);
+		reg += 4;
+		i += 4;
+	}
+}
+
+static u32 igc_ethtool_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return IGC_RETA_SIZE;
+}
+
+static int igc_ethtool_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+				u8 *hfunc)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		indir[i] = adapter->rss_indir_tbl[i];
+
+	return 0;
+}
+
+static int igc_ethtool_set_rxfh(struct net_device *netdev, const u32 *indir,
+				const u8 *key, const u8 hfunc)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 num_queues;
+	int i;
+
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
+	num_queues = adapter->rss_queues;
+
+	/* Verify user input. */
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		if (indir[i] >= num_queues)
+			return -EINVAL;
+
+	for (i = 0; i < IGC_RETA_SIZE; i++)
+		adapter->rss_indir_tbl[i] = indir[i];
+
+	igc_write_rss_indir_tbl(adapter);
+
+	return 0;
+}
+
+static void igc_ethtool_get_channels(struct net_device *netdev,
+				     struct ethtool_channels *ch)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	/* Report maximum channels */
+	ch->max_combined = igc_get_max_rss_queues(adapter);
+
+	/* Report info for other vector */
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		ch->max_other = NON_Q_VECTORS;
+		ch->other_count = NON_Q_VECTORS;
+	}
+
+	ch->combined_count = adapter->rss_queues;
+}
+
+static int igc_ethtool_set_channels(struct net_device *netdev,
+				    struct ethtool_channels *ch)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	unsigned int count = ch->combined_count;
+	unsigned int max_combined = 0;
+
+	/* Verify they are not requesting separate vectors */
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	/* Verify other_count is valid and has not been changed */
+	if (ch->other_count != NON_Q_VECTORS)
+		return -EINVAL;
+
+	/* Verify the number of channels doesn't exceed hw limits */
+	max_combined = igc_get_max_rss_queues(adapter);
+	if (count > max_combined)
+		return -EINVAL;
+
+	if (count != adapter->rss_queues) {
+		adapter->rss_queues = count;
+		igc_set_flag_queue_pairs(adapter, max_combined);
+
+		/* Hardware has to reinitialize queues and interrupts to
+		 * match the new configuration.
+		 */
+		return igc_reinit_queues(adapter);
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_get_ts_info(struct net_device *dev,
+				   struct ethtool_ts_info *info)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	if (adapter->ptp_clock)
+		info->phc_index = ptp_clock_index(adapter->ptp_clock);
+	else
+		info->phc_index = -1;
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+		info->so_timestamping =
+			SOF_TIMESTAMPING_TX_SOFTWARE |
+			SOF_TIMESTAMPING_RX_SOFTWARE |
+			SOF_TIMESTAMPING_SOFTWARE |
+			SOF_TIMESTAMPING_TX_HARDWARE |
+			SOF_TIMESTAMPING_RX_HARDWARE |
+			SOF_TIMESTAMPING_RAW_HARDWARE;
+
+		info->tx_types =
+			BIT(HWTSTAMP_TX_OFF) |
+			BIT(HWTSTAMP_TX_ON);
+
+		info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
+
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static u32 igc_ethtool_get_priv_flags(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IGC_FLAG_RX_LEGACY)
+		priv_flags |= IGC_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IGC_FLAG_RX_LEGACY;
+	if (priv_flags & IGC_PRIV_FLAGS_LEGACY_RX)
+		flags |= IGC_FLAG_RX_LEGACY;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			igc_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_get_eee(struct net_device *netdev,
+			       struct ethtool_eee *edata)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 eeer;
+
+	if (hw->dev_spec._base.eee_enable)
+		edata->advertised =
+			mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert);
+
+	*edata = adapter->eee;
+	edata->supported = SUPPORTED_Autoneg;
+
+	eeer = rd32(IGC_EEER);
+
+	/* EEE status on negotiated link */
+	if (eeer & IGC_EEER_EEE_NEG)
+		edata->eee_active = true;
+
+	if (eeer & IGC_EEER_TX_LPI_EN)
+		edata->tx_lpi_enabled = true;
+
+	edata->eee_enabled = hw->dev_spec._base.eee_enable;
+
+	edata->advertised = SUPPORTED_Autoneg;
+	edata->lp_advertised = SUPPORTED_Autoneg;
+
+	/* Report correct negotiated EEE status for devices that
+	 * wrongly report EEE at half-duplex
+	 */
+	if (adapter->link_duplex == HALF_DUPLEX) {
+		edata->eee_enabled = false;
+		edata->eee_active = false;
+		edata->tx_lpi_enabled = false;
+		edata->advertised &= ~edata->advertised;
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_set_eee(struct net_device *netdev,
+			       struct ethtool_eee *edata)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	struct ethtool_eee eee_curr;
+	s32 ret_val;
+
+	memset(&eee_curr, 0, sizeof(struct ethtool_eee));
+
+	ret_val = igc_ethtool_get_eee(netdev, &eee_curr);
+	if (ret_val) {
+		netdev_err(netdev,
+			   "Problem setting EEE advertisement options\n");
+		return -EINVAL;
+	}
+
+	if (eee_curr.eee_enabled) {
+		if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) {
+			netdev_err(netdev,
+				   "Setting EEE tx-lpi is not supported\n");
+			return -EINVAL;
+		}
+
+		/* Tx LPI timer is not implemented currently */
+		if (edata->tx_lpi_timer) {
+			netdev_err(netdev,
+				   "Setting EEE Tx LPI timer is not supported\n");
+			return -EINVAL;
+		}
+	} else if (!edata->eee_enabled) {
+		netdev_err(netdev,
+			   "Setting EEE options are not supported with EEE disabled\n");
+		return -EINVAL;
+	}
+
+	adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised);
+	if (hw->dev_spec._base.eee_enable != edata->eee_enabled) {
+		hw->dev_spec._base.eee_enable = edata->eee_enabled;
+		adapter->flags |= IGC_FLAG_EEE;
+
+		/* reset link */
+		if (netif_running(netdev))
+			igc_reinit_locked(adapter);
+		else
+			igc_reset(adapter);
+	}
+
+	return 0;
+}
+
+static int igc_ethtool_begin(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_get_sync(&adapter->pdev->dev);
+	return 0;
+}
+
+static void igc_ethtool_complete(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_put(&adapter->pdev->dev);
+}
+
+static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
+					  struct ethtool_link_ksettings *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 status;
+	u32 speed;
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+
+	/* supported link modes */
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 10baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Half);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 100baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 1000baseT_Full);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 2500baseT_Full);
+
+	/* twisted pair */
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = hw->phy.addr;
+	ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+
+	/* advertising link modes */
+	if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Half);
+	if (hw->phy.autoneg_advertised & ADVERTISE_10_FULL)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, 10baseT_Full);
+	if (hw->phy.autoneg_advertised & ADVERTISE_100_HALF)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Half);
+	if (hw->phy.autoneg_advertised & ADVERTISE_100_FULL)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, 100baseT_Full);
+	if (hw->phy.autoneg_advertised & ADVERTISE_1000_FULL)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, 1000baseT_Full);
+	if (hw->phy.autoneg_advertised & ADVERTISE_2500_FULL)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, 2500baseT_Full);
+
+	/* set autoneg settings */
+	if (hw->mac.autoneg == 1) {
+		ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Autoneg);
+	}
+
+	/* Set pause flow control settings */
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+
+	switch (hw->fc.requested_mode) {
+	case igc_fc_full:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		break;
+	case igc_fc_rx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	case igc_fc_tx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	default:
+		break;
+	}
+
+	status = pm_runtime_suspended(&adapter->pdev->dev) ?
+		 0 : rd32(IGC_STATUS);
+
+	if (status & IGC_STATUS_LU) {
+		if (status & IGC_STATUS_SPEED_1000) {
+			/* For I225, STATUS will indicate 1G speed in both
+			 * 1 Gbps and 2.5 Gbps link modes.
+			 * An additional bit is used
+			 * to differentiate between 1 Gbps and 2.5 Gbps.
+			 */
+			if (hw->mac.type == igc_i225 &&
+			    (status & IGC_STATUS_SPEED_2500)) {
+				speed = SPEED_2500;
+			} else {
+				speed = SPEED_1000;
+			}
+		} else if (status & IGC_STATUS_SPEED_100) {
+			speed = SPEED_100;
+		} else {
+			speed = SPEED_10;
+		}
+		if ((status & IGC_STATUS_FD) ||
+		    hw->phy.media_type != igc_media_type_copper)
+			cmd->base.duplex = DUPLEX_FULL;
+		else
+			cmd->base.duplex = DUPLEX_HALF;
+	} else {
+		speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+	cmd->base.speed = speed;
+	if (hw->mac.autoneg)
+		cmd->base.autoneg = AUTONEG_ENABLE;
+	else
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+	/* MDI-X => 2; MDI =>1; Invalid =>0 */
+	if (hw->phy.media_type == igc_media_type_copper)
+		cmd->base.eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X :
+						      ETH_TP_MDI;
+	else
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
+
+	if (hw->phy.mdix == AUTO_ALL_MODES)
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+	else
+		cmd->base.eth_tp_mdix_ctrl = hw->phy.mdix;
+
+	return 0;
+}
+
+static int
+igc_ethtool_set_link_ksettings(struct net_device *netdev,
+			       const struct ethtool_link_ksettings *cmd)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct net_device *dev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	u16 advertised = 0;
+
+	/* When adapter in resetting mode, autoneg/speed/duplex
+	 * cannot be changed
+	 */
+	if (igc_check_reset_block(hw)) {
+		netdev_err(dev, "Cannot change link characteristics when reset is active\n");
+		return -EINVAL;
+	}
+
+	/* MDI setting is only allowed when autoneg enabled because
+	 * some hardware doesn't allow MDI setting when speed or
+	 * duplex is forced.
+	 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		if (cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO &&
+		    cmd->base.autoneg != AUTONEG_ENABLE) {
+			netdev_err(dev, "Forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			return -EINVAL;
+		}
+	}
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  2500baseT_Full))
+		advertised |= ADVERTISE_2500_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  1000baseT_Full))
+		advertised |= ADVERTISE_1000_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  100baseT_Full))
+		advertised |= ADVERTISE_100_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  100baseT_Half))
+		advertised |= ADVERTISE_100_HALF;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  10baseT_Full))
+		advertised |= ADVERTISE_10_FULL;
+
+	if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+						  10baseT_Half))
+		advertised |= ADVERTISE_10_HALF;
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		hw->mac.autoneg = 1;
+		hw->phy.autoneg_advertised = advertised;
+		if (adapter->fc_autoneg)
+			hw->fc.requested_mode = igc_fc_default;
+	} else {
+		netdev_info(dev, "Force mode currently not supported\n");
+	}
+
+	/* MDI-X => 2; MDI => 1; Auto => 3 */
+	if (cmd->base.eth_tp_mdix_ctrl) {
+		/* fix up the value for auto (3 => 0) as zero is mapped
+		 * internally to auto
+		 */
+		if (cmd->base.eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO)
+			hw->phy.mdix = AUTO_ALL_MODES;
+		else
+			hw->phy.mdix = cmd->base.eth_tp_mdix_ctrl;
+	}
+
+	/* reset the link */
+	if (netif_running(adapter->netdev)) {
+		igc_down(adapter);
+		igc_up(adapter);
+	} else {
+		igc_reset(adapter);
+	}
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+static void igc_ethtool_diag_test(struct net_device *netdev,
+				  struct ethtool_test *eth_test, u64 *data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		netdev_info(adapter->netdev, "Offline testing starting");
+		set_bit(__IGC_TESTING, &adapter->state);
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result
+		 */
+		if (!igc_link_test(adapter, &data[TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (if_running)
+			igc_close(netdev);
+		else
+			igc_reset(adapter);
+
+		netdev_info(adapter->netdev, "Register testing starting");
+		if (!igc_reg_test(adapter, &data[TEST_REG]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igc_reset(adapter);
+
+		netdev_info(adapter->netdev, "EEPROM testing starting");
+		if (!igc_eeprom_test(adapter, &data[TEST_EEP]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igc_reset(adapter);
+
+		/* loopback and interrupt tests
+		 * will be implemented in the future
+		 */
+		data[TEST_LOOP] = 0;
+		data[TEST_IRQ] = 0;
+
+		clear_bit(__IGC_TESTING, &adapter->state);
+		if (if_running)
+			igc_open(netdev);
+	} else {
+		netdev_info(adapter->netdev, "Online testing starting");
+
+		/* register, eeprom, intr and loopback tests not run online */
+		data[TEST_REG] = 0;
+		data[TEST_EEP] = 0;
+		data[TEST_IRQ] = 0;
+		data[TEST_LOOP] = 0;
+
+		if (!igc_link_test(adapter, &data[TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+	}
+
+	msleep_interruptible(4 * 1000);
+}
+
+static const struct ethtool_ops igc_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+	.get_drvinfo		= igc_ethtool_get_drvinfo,
+	.get_regs_len		= igc_ethtool_get_regs_len,
+	.get_regs		= igc_ethtool_get_regs,
+	.get_wol		= igc_ethtool_get_wol,
+	.set_wol		= igc_ethtool_set_wol,
+	.get_msglevel		= igc_ethtool_get_msglevel,
+	.set_msglevel		= igc_ethtool_set_msglevel,
+	.nway_reset		= igc_ethtool_nway_reset,
+	.get_link		= igc_ethtool_get_link,
+	.get_eeprom_len		= igc_ethtool_get_eeprom_len,
+	.get_eeprom		= igc_ethtool_get_eeprom,
+	.set_eeprom		= igc_ethtool_set_eeprom,
+	.get_ringparam		= igc_ethtool_get_ringparam,
+	.set_ringparam		= igc_ethtool_set_ringparam,
+	.get_pauseparam		= igc_ethtool_get_pauseparam,
+	.set_pauseparam		= igc_ethtool_set_pauseparam,
+	.get_strings		= igc_ethtool_get_strings,
+	.get_sset_count		= igc_ethtool_get_sset_count,
+	.get_ethtool_stats	= igc_ethtool_get_stats,
+	.get_coalesce		= igc_ethtool_get_coalesce,
+	.set_coalesce		= igc_ethtool_set_coalesce,
+	.get_rxnfc		= igc_ethtool_get_rxnfc,
+	.set_rxnfc		= igc_ethtool_set_rxnfc,
+	.get_rxfh_indir_size	= igc_ethtool_get_rxfh_indir_size,
+	.get_rxfh		= igc_ethtool_get_rxfh,
+	.set_rxfh		= igc_ethtool_set_rxfh,
+	.get_ts_info		= igc_ethtool_get_ts_info,
+	.get_channels		= igc_ethtool_get_channels,
+	.set_channels		= igc_ethtool_set_channels,
+	.get_priv_flags		= igc_ethtool_get_priv_flags,
+	.set_priv_flags		= igc_ethtool_set_priv_flags,
+	.get_eee		= igc_ethtool_get_eee,
+	.set_eee		= igc_ethtool_set_eee,
+	.begin			= igc_ethtool_begin,
+	.complete		= igc_ethtool_complete,
+	.get_link_ksettings	= igc_ethtool_get_link_ksettings,
+	.set_link_ksettings	= igc_ethtool_set_link_ksettings,
+	.self_test		= igc_ethtool_diag_test,
+};
+
+void igc_ethtool_set_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &igc_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
new file mode 100644
index 0000000000..e1c572e0d4
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_HW_H_
+#define _IGC_HW_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+
+#include "igc_regs.h"
+#include "igc_defines.h"
+#include "igc_mac.h"
+#include "igc_phy.h"
+#include "igc_nvm.h"
+#include "igc_i225.h"
+#include "igc_base.h"
+
+#define IGC_DEV_ID_I225_LM			0x15F2
+#define IGC_DEV_ID_I225_V			0x15F3
+#define IGC_DEV_ID_I225_I			0x15F8
+#define IGC_DEV_ID_I220_V			0x15F7
+#define IGC_DEV_ID_I225_K			0x3100
+#define IGC_DEV_ID_I225_K2			0x3101
+#define IGC_DEV_ID_I226_K			0x3102
+#define IGC_DEV_ID_I225_LMVP			0x5502
+#define IGC_DEV_ID_I226_LMVP			0x5503
+#define IGC_DEV_ID_I225_IT			0x0D9F
+#define IGC_DEV_ID_I226_LM			0x125B
+#define IGC_DEV_ID_I226_V			0x125C
+#define IGC_DEV_ID_I226_IT			0x125D
+#define IGC_DEV_ID_I221_V			0x125E
+#define IGC_DEV_ID_I226_BLANK_NVM		0x125F
+#define IGC_DEV_ID_I225_BLANK_NVM		0x15FD
+
+/* Function pointers for the MAC. */
+struct igc_mac_operations {
+	s32 (*check_for_link)(struct igc_hw *hw);
+	s32 (*reset_hw)(struct igc_hw *hw);
+	s32 (*init_hw)(struct igc_hw *hw);
+	s32 (*setup_physical_interface)(struct igc_hw *hw);
+	void (*rar_set)(struct igc_hw *hw, u8 *address, u32 index);
+	s32 (*read_mac_addr)(struct igc_hw *hw);
+	s32 (*get_speed_and_duplex)(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex);
+	s32 (*acquire_swfw_sync)(struct igc_hw *hw, u16 mask);
+	void (*release_swfw_sync)(struct igc_hw *hw, u16 mask);
+};
+
+enum igc_mac_type {
+	igc_undefined = 0,
+	igc_i225,
+	igc_num_macs  /* List is 1-based, so subtract 1 for true count. */
+};
+
+enum igc_media_type {
+	igc_media_type_unknown = 0,
+	igc_media_type_copper = 1,
+	igc_num_media_types
+};
+
+enum igc_nvm_type {
+	igc_nvm_unknown = 0,
+	igc_nvm_eeprom_spi,
+};
+
+struct igc_info {
+	s32 (*get_invariants)(struct igc_hw *hw);
+	struct igc_mac_operations *mac_ops;
+	const struct igc_phy_operations *phy_ops;
+	struct igc_nvm_operations *nvm_ops;
+};
+
+extern const struct igc_info igc_base_info;
+
+struct igc_mac_info {
+	struct igc_mac_operations ops;
+
+	u8 addr[ETH_ALEN];
+	u8 perm_addr[ETH_ALEN];
+
+	enum igc_mac_type type;
+
+	u32 mc_filter_type;
+
+	u16 mta_reg_count;
+	u16 uta_reg_count;
+
+	u32 mta_shadow[MAX_MTA_REG];
+	u16 rar_entry_count;
+
+	bool asf_firmware_present;
+	bool arc_subsystem_valid;
+
+	bool autoneg;
+	bool autoneg_failed;
+	bool get_link_status;
+};
+
+struct igc_nvm_operations {
+	s32 (*acquire)(struct igc_hw *hw);
+	s32 (*read)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+	void (*release)(struct igc_hw *hw);
+	s32 (*write)(struct igc_hw *hw, u16 offset, u16 i, u16 *data);
+	s32 (*update)(struct igc_hw *hw);
+	s32 (*validate)(struct igc_hw *hw);
+};
+
+struct igc_phy_operations {
+	s32 (*acquire)(struct igc_hw *hw);
+	s32 (*check_reset_block)(struct igc_hw *hw);
+	s32 (*force_speed_duplex)(struct igc_hw *hw);
+	s32 (*get_phy_info)(struct igc_hw *hw);
+	s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data);
+	void (*release)(struct igc_hw *hw);
+	s32 (*reset)(struct igc_hw *hw);
+	s32 (*write_reg)(struct igc_hw *hw, u32 address, u16 data);
+};
+
+struct igc_nvm_info {
+	struct igc_nvm_operations ops;
+	enum igc_nvm_type type;
+
+	u16 word_size;
+	u16 delay_usec;
+	u16 address_bits;
+	u16 opcode_bits;
+	u16 page_size;
+};
+
+struct igc_phy_info {
+	struct igc_phy_operations ops;
+
+	u32 addr;
+	u32 id;
+	u32 reset_delay_us; /* in usec */
+	u32 revision;
+
+	enum igc_media_type media_type;
+
+	u16 autoneg_advertised;
+	u16 autoneg_mask;
+
+	u8 mdix;
+
+	bool is_mdix;
+	bool speed_downgraded;
+	bool autoneg_wait_to_complete;
+};
+
+struct igc_bus_info {
+	u16 func;
+	u16 pci_cmd_word;
+};
+
+enum igc_fc_mode {
+	igc_fc_none = 0,
+	igc_fc_rx_pause,
+	igc_fc_tx_pause,
+	igc_fc_full,
+	igc_fc_default = 0xFF
+};
+
+struct igc_fc_info {
+	u32 high_water;     /* Flow control high-water mark */
+	u32 low_water;      /* Flow control low-water mark */
+	u16 pause_time;     /* Flow control pause timer */
+	bool send_xon;      /* Flow control send XON */
+	bool strict_ieee;   /* Strict IEEE mode */
+	enum igc_fc_mode current_mode; /* Type of flow control */
+	enum igc_fc_mode requested_mode;
+};
+
+struct igc_dev_spec_base {
+	bool clear_semaphore_once;
+	bool eee_enable;
+};
+
+struct igc_hw {
+	void *back;
+
+	u8 __iomem *hw_addr;
+	unsigned long io_base;
+
+	struct igc_mac_info  mac;
+	struct igc_fc_info   fc;
+	struct igc_nvm_info  nvm;
+	struct igc_phy_info  phy;
+
+	struct igc_bus_info bus;
+
+	union {
+		struct igc_dev_spec_base	_base;
+	} dev_spec;
+
+	u16 device_id;
+	u16 subsystem_vendor_id;
+	u16 subsystem_device_id;
+	u16 vendor_id;
+
+	u8 revision_id;
+};
+
+/* Statistics counters collected by the MAC */
+struct igc_hw_stats {
+	u64 crcerrs;
+	u64 algnerrc;
+	u64 symerrs;
+	u64 rxerrc;
+	u64 mpc;
+	u64 scc;
+	u64 ecol;
+	u64 mcc;
+	u64 latecol;
+	u64 colc;
+	u64 dc;
+	u64 tncrs;
+	u64 sec;
+	u64 cexterr;
+	u64 rlec;
+	u64 xonrxc;
+	u64 xontxc;
+	u64 xoffrxc;
+	u64 xofftxc;
+	u64 fcruc;
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 tlpic;
+	u64 rlpic;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc;
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mgprc;
+	u64 mgpdc;
+	u64 mgptc;
+	u64 tor;
+	u64 tot;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 tsctc;
+	u64 tsctfc;
+	u64 iac;
+	u64 htdpmc;
+	u64 rpthc;
+	u64 hgptc;
+	u64 hgorc;
+	u64 hgotc;
+	u64 lenerrs;
+	u64 scvpc;
+	u64 hrmpc;
+	u64 doosync;
+	u64 o2bgptc;
+	u64 o2bspc;
+	u64 b2ospc;
+	u64 b2ogprc;
+	u64 txdrop;
+};
+
+struct net_device *igc_get_hw_dev(struct igc_hw *hw);
+#define hw_dbg(format, arg...) \
+	netdev_dbg(igc_get_hw_dev(hw), format, ##arg)
+
+s32  igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+s32  igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
+
+#endif /* _IGC_HW_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c
new file mode 100644
index 0000000000..17546a035a
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/delay.h>
+
+#include "igc_hw.h"
+
+/**
+ * igc_acquire_nvm_i225 - Acquire exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the necessary semaphores for exclusive access to the EEPROM.
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+static s32 igc_acquire_nvm_i225(struct igc_hw *hw)
+{
+	return igc_acquire_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_release_nvm_i225 - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit,
+ * then release the semaphores acquired.
+ */
+static void igc_release_nvm_i225(struct igc_hw *hw)
+{
+	igc_release_swfw_sync_i225(hw, IGC_SWFW_EEP_SM);
+}
+
+/**
+ * igc_get_hw_semaphore_i225 - Acquire hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Acquire the HW semaphore to access the PHY or NVM
+ */
+static s32 igc_get_hw_semaphore_i225(struct igc_hw *hw)
+{
+	s32 timeout = hw->nvm.word_size + 1;
+	s32 i = 0;
+	u32 swsm;
+
+	/* Get the SW semaphore */
+	while (i < timeout) {
+		swsm = rd32(IGC_SWSM);
+		if (!(swsm & IGC_SWSM_SMBI))
+			break;
+
+		usleep_range(500, 600);
+		i++;
+	}
+
+	if (i == timeout) {
+		/* In rare circumstances, the SW semaphore may already be held
+		 * unintentionally. Clear the semaphore once before giving up.
+		 */
+		if (hw->dev_spec._base.clear_semaphore_once) {
+			hw->dev_spec._base.clear_semaphore_once = false;
+			igc_put_hw_semaphore(hw);
+			for (i = 0; i < timeout; i++) {
+				swsm = rd32(IGC_SWSM);
+				if (!(swsm & IGC_SWSM_SMBI))
+					break;
+
+				usleep_range(500, 600);
+			}
+		}
+
+		/* If we do not have the semaphore here, we have to give up. */
+		if (i == timeout) {
+			hw_dbg("Driver can't access device - SMBI bit is set.\n");
+			return -IGC_ERR_NVM;
+		}
+	}
+
+	/* Get the FW semaphore. */
+	for (i = 0; i < timeout; i++) {
+		swsm = rd32(IGC_SWSM);
+		wr32(IGC_SWSM, swsm | IGC_SWSM_SWESMBI);
+
+		/* Semaphore acquired if bit latched */
+		if (rd32(IGC_SWSM) & IGC_SWSM_SWESMBI)
+			break;
+
+		usleep_range(500, 600);
+	}
+
+	if (i == timeout) {
+		/* Release semaphores */
+		igc_put_hw_semaphore(hw);
+		hw_dbg("Driver can't access the NVM\n");
+		return -IGC_ERR_NVM;
+	}
+
+	return 0;
+}
+
+/**
+ * igc_acquire_swfw_sync_i225 - Acquire SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Acquire the SW/FW semaphore to access the PHY or NVM.  The mask
+ * will also specify which port we're acquiring the lock for.
+ */
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+	s32 i = 0, timeout = 200;
+	u32 fwmask = mask << 16;
+	u32 swmask = mask;
+	s32 ret_val = 0;
+	u32 swfw_sync;
+
+	while (i < timeout) {
+		if (igc_get_hw_semaphore_i225(hw)) {
+			ret_val = -IGC_ERR_SWFW_SYNC;
+			goto out;
+		}
+
+		swfw_sync = rd32(IGC_SW_FW_SYNC);
+		if (!(swfw_sync & (fwmask | swmask)))
+			break;
+
+		/* Firmware currently using resource (fwmask) */
+		igc_put_hw_semaphore(hw);
+		mdelay(5);
+		i++;
+	}
+
+	if (i == timeout) {
+		hw_dbg("Driver can't access resource, SW_FW_SYNC timeout.\n");
+		ret_val = -IGC_ERR_SWFW_SYNC;
+		goto out;
+	}
+
+	swfw_sync |= swmask;
+	wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+	igc_put_hw_semaphore(hw);
+out:
+	return ret_val;
+}
+
+/**
+ * igc_release_swfw_sync_i225 - Release SW/FW semaphore
+ * @hw: pointer to the HW structure
+ * @mask: specifies which semaphore to acquire
+ *
+ * Release the SW/FW semaphore used to access the PHY or NVM.  The mask
+ * will also specify which port we're releasing the lock for.
+ */
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask)
+{
+	u32 swfw_sync;
+
+	/* Releasing the resource requires first getting the HW semaphore.
+	 * If we fail to get the semaphore, there is nothing we can do,
+	 * except log an error and quit. We are not allowed to hang here
+	 * indefinitely, as it may cause denial of service or system crash.
+	 */
+	if (igc_get_hw_semaphore_i225(hw)) {
+		hw_dbg("Failed to release SW_FW_SYNC.\n");
+		return;
+	}
+
+	swfw_sync = rd32(IGC_SW_FW_SYNC);
+	swfw_sync &= ~mask;
+	wr32(IGC_SW_FW_SYNC, swfw_sync);
+
+	igc_put_hw_semaphore(hw);
+}
+
+/**
+ * igc_read_nvm_srrd_i225 - Reads Shadow Ram using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the Shadow Ram to read
+ * @words: number of words to read
+ * @data: word read from the Shadow Ram
+ *
+ * Reads a 16 bit word from the Shadow Ram using the EERD register.
+ * Uses necessary synchronization semaphores.
+ */
+static s32 igc_read_nvm_srrd_i225(struct igc_hw *hw, u16 offset, u16 words,
+				  u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+			IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+		status = hw->nvm.ops.acquire(hw);
+		if (status)
+			break;
+
+		status = igc_read_nvm_eerd(hw, offset, count, data + i);
+		hw->nvm.ops.release(hw);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * igc_write_nvm_srwr - Write to Shadow Ram using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow Ram to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow Ram
+ *
+ * Writes data to Shadow Ram at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * Shadow Ram will most likely contain an invalid checksum.
+ */
+static s32 igc_write_nvm_srwr(struct igc_hw *hw, u16 offset, u16 words,
+			      u16 *data)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	s32 ret_val = -IGC_ERR_NVM;
+	u32 attempts = 100000;
+	u32 i, k, eewr = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * too many words for the offset, and not enough words.
+	 */
+	if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+	    words == 0) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		return ret_val;
+	}
+
+	for (i = 0; i < words; i++) {
+		ret_val = -IGC_ERR_NVM;
+		eewr = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) |
+			(data[i] << IGC_NVM_RW_REG_DATA) |
+			IGC_NVM_RW_REG_START;
+
+		wr32(IGC_SRWR, eewr);
+
+		for (k = 0; k < attempts; k++) {
+			if (IGC_NVM_RW_REG_DONE &
+			    rd32(IGC_SRWR)) {
+				ret_val = 0;
+				break;
+			}
+			udelay(5);
+		}
+
+		if (ret_val) {
+			hw_dbg("Shadow RAM write EEWR timed out\n");
+			break;
+		}
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_write_nvm_srwr_i225 - Write to Shadow RAM using EEWR
+ * @hw: pointer to the HW structure
+ * @offset: offset within the Shadow RAM to be written to
+ * @words: number of words to write
+ * @data: 16 bit word(s) to be written to the Shadow RAM
+ *
+ * Writes data to Shadow RAM at offset using EEWR register.
+ *
+ * If igc_update_nvm_checksum is not called after this function , the
+ * data will not be committed to FLASH and also Shadow RAM will most likely
+ * contain an invalid checksum.
+ *
+ * If error code is returned, data and Shadow RAM may be inconsistent - buffer
+ * partially written.
+ */
+static s32 igc_write_nvm_srwr_i225(struct igc_hw *hw, u16 offset, u16 words,
+				   u16 *data)
+{
+	s32 status = 0;
+	u16 i, count;
+
+	/* We cannot hold synchronization semaphores for too long,
+	 * because of forceful takeover procedure. However it is more efficient
+	 * to write in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IGC_EERD_EEWR_MAX_COUNT) {
+		count = (words - i) / IGC_EERD_EEWR_MAX_COUNT > 0 ?
+			IGC_EERD_EEWR_MAX_COUNT : (words - i);
+
+		status = hw->nvm.ops.acquire(hw);
+		if (status)
+			break;
+
+		status = igc_write_nvm_srwr(hw, offset, count, data + i);
+		hw->nvm.ops.release(hw);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ * igc_validate_nvm_checksum_i225 - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+static s32 igc_validate_nvm_checksum_i225(struct igc_hw *hw)
+{
+	s32 (*read_op_ptr)(struct igc_hw *hw, u16 offset, u16 count,
+			   u16 *data);
+	s32 status = 0;
+
+	status = hw->nvm.ops.acquire(hw);
+	if (status)
+		goto out;
+
+	/* Replace the read function with semaphore grabbing with
+	 * the one that skips this for a while.
+	 * We have semaphore taken already here.
+	 */
+	read_op_ptr = hw->nvm.ops.read;
+	hw->nvm.ops.read = igc_read_nvm_eerd;
+
+	status = igc_validate_nvm_checksum(hw);
+
+	/* Revert original read operation. */
+	hw->nvm.ops.read = read_op_ptr;
+
+	hw->nvm.ops.release(hw);
+
+out:
+	return status;
+}
+
+/**
+ * igc_pool_flash_update_done_i225 - Pool FLUDONE status
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_pool_flash_update_done_i225(struct igc_hw *hw)
+{
+	s32 ret_val = -IGC_ERR_NVM;
+	u32 i, reg;
+
+	for (i = 0; i < IGC_FLUDONE_ATTEMPTS; i++) {
+		reg = rd32(IGC_EECD);
+		if (reg & IGC_EECD_FLUDONE_I225) {
+			ret_val = 0;
+			break;
+		}
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_update_flash_i225 - Commit EEPROM to the flash
+ * @hw: pointer to the HW structure
+ */
+static s32 igc_update_flash_i225(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 flup;
+
+	ret_val = igc_pool_flash_update_done_i225(hw);
+	if (ret_val == -IGC_ERR_NVM) {
+		hw_dbg("Flash update time out\n");
+		goto out;
+	}
+
+	flup = rd32(IGC_EECD) | IGC_EECD_FLUPD_I225;
+	wr32(IGC_EECD, flup);
+
+	ret_val = igc_pool_flash_update_done_i225(hw);
+	if (ret_val)
+		hw_dbg("Flash update time out\n");
+	else
+		hw_dbg("Flash update complete\n");
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum_i225 - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum.  Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM. Next commit EEPROM data onto the Flash.
+ */
+static s32 igc_update_nvm_checksum_i225(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	s32 ret_val = 0;
+	u16 i, nvm_data;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	ret_val = igc_read_nvm_eerd(hw, 0, 1, &nvm_data);
+	if (ret_val) {
+		hw_dbg("EEPROM read failed\n");
+		goto out;
+	}
+
+	ret_val = hw->nvm.ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	/* Do not use hw->nvm.ops.write, hw->nvm.ops.read
+	 * because we do not want to take the synchronization
+	 * semaphores twice here.
+	 */
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = igc_read_nvm_eerd(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw->nvm.ops.release(hw);
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16)NVM_SUM - checksum;
+	ret_val = igc_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1,
+				     &checksum);
+	if (ret_val) {
+		hw->nvm.ops.release(hw);
+		hw_dbg("NVM Write Error while updating checksum.\n");
+		goto out;
+	}
+
+	hw->nvm.ops.release(hw);
+
+	ret_val = igc_update_flash_i225(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_get_flash_presence_i225 - Check if flash device is detected
+ * @hw: pointer to the HW structure
+ */
+bool igc_get_flash_presence_i225(struct igc_hw *hw)
+{
+	bool ret_val = false;
+	u32 eec = 0;
+
+	eec = rd32(IGC_EECD);
+	if (eec & IGC_EECD_FLASH_DETECTED_I225)
+		ret_val = true;
+
+	return ret_val;
+}
+
+/**
+ * igc_init_nvm_params_i225 - Init NVM func ptrs.
+ * @hw: pointer to the HW structure
+ */
+s32 igc_init_nvm_params_i225(struct igc_hw *hw)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+
+	nvm->ops.acquire = igc_acquire_nvm_i225;
+	nvm->ops.release = igc_release_nvm_i225;
+
+	/* NVM Function Pointers */
+	if (igc_get_flash_presence_i225(hw)) {
+		nvm->ops.read = igc_read_nvm_srrd_i225;
+		nvm->ops.write = igc_write_nvm_srwr_i225;
+		nvm->ops.validate = igc_validate_nvm_checksum_i225;
+		nvm->ops.update = igc_update_nvm_checksum_i225;
+	} else {
+		nvm->ops.read = igc_read_nvm_eerd;
+		nvm->ops.write = NULL;
+		nvm->ops.validate = NULL;
+		nvm->ops.update = NULL;
+	}
+	return 0;
+}
+
+/**
+ *  igc_set_eee_i225 - Enable/disable EEE support
+ *  @hw: pointer to the HW structure
+ *  @adv2p5G: boolean flag enabling 2.5G EEE advertisement
+ *  @adv1G: boolean flag enabling 1G EEE advertisement
+ *  @adv100M: boolean flag enabling 100M EEE advertisement
+ *
+ *  Enable/disable EEE based on setting in dev_spec structure.
+ **/
+s32 igc_set_eee_i225(struct igc_hw *hw, bool adv2p5G, bool adv1G,
+		     bool adv100M)
+{
+	u32 ipcnfg, eeer;
+
+	ipcnfg = rd32(IGC_IPCNFG);
+	eeer = rd32(IGC_EEER);
+
+	/* enable or disable per user setting */
+	if (hw->dev_spec._base.eee_enable) {
+		u32 eee_su = rd32(IGC_EEE_SU);
+
+		if (adv100M)
+			ipcnfg |= IGC_IPCNFG_EEE_100M_AN;
+		else
+			ipcnfg &= ~IGC_IPCNFG_EEE_100M_AN;
+
+		if (adv1G)
+			ipcnfg |= IGC_IPCNFG_EEE_1G_AN;
+		else
+			ipcnfg &= ~IGC_IPCNFG_EEE_1G_AN;
+
+		if (adv2p5G)
+			ipcnfg |= IGC_IPCNFG_EEE_2_5G_AN;
+		else
+			ipcnfg &= ~IGC_IPCNFG_EEE_2_5G_AN;
+
+		eeer |= (IGC_EEER_TX_LPI_EN | IGC_EEER_RX_LPI_EN |
+			 IGC_EEER_LPI_FC);
+
+		/* This bit should not be set in normal operation. */
+		if (eee_su & IGC_EEE_SU_LPI_CLK_STP)
+			hw_dbg("LPI Clock Stop Bit should not be set!\n");
+	} else {
+		ipcnfg &= ~(IGC_IPCNFG_EEE_2_5G_AN | IGC_IPCNFG_EEE_1G_AN |
+			    IGC_IPCNFG_EEE_100M_AN);
+		eeer &= ~(IGC_EEER_TX_LPI_EN | IGC_EEER_RX_LPI_EN |
+			  IGC_EEER_LPI_FC);
+	}
+	wr32(IGC_IPCNFG, ipcnfg);
+	wr32(IGC_EEER, eeer);
+	rd32(IGC_IPCNFG);
+	rd32(IGC_EEER);
+
+	return IGC_SUCCESS;
+}
+
+/* igc_set_ltr_i225 - Set Latency Tolerance Reporting thresholds
+ * @hw: pointer to the HW structure
+ * @link: bool indicating link status
+ *
+ * Set the LTR thresholds based on the link speed (Mbps), EEE, and DMAC
+ * settings, otherwise specify that there is no LTR requirement.
+ */
+s32 igc_set_ltr_i225(struct igc_hw *hw, bool link)
+{
+	u32 tw_system, ltrc, ltrv, ltr_min, ltr_max, scale_min, scale_max;
+	u16 speed, duplex;
+	s32 size;
+
+	/* If we do not have link, LTR thresholds are zero. */
+	if (link) {
+		hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex);
+
+		/* Check if using copper interface with EEE enabled or if the
+		 * link speed is 10 Mbps.
+		 */
+		if (hw->dev_spec._base.eee_enable &&
+		    speed != SPEED_10) {
+			/* EEE enabled, so send LTRMAX threshold. */
+			ltrc = rd32(IGC_LTRC) |
+			       IGC_LTRC_EEEMS_EN;
+			wr32(IGC_LTRC, ltrc);
+
+			/* Calculate tw_system (nsec). */
+			if (speed == SPEED_100) {
+				tw_system = ((rd32(IGC_EEE_SU) &
+					     IGC_TW_SYSTEM_100_MASK) >>
+					     IGC_TW_SYSTEM_100_SHIFT) * 500;
+			} else {
+				tw_system = (rd32(IGC_EEE_SU) &
+					     IGC_TW_SYSTEM_1000_MASK) * 500;
+			}
+		} else {
+			tw_system = 0;
+		}
+
+		/* Get the Rx packet buffer size. */
+		size = rd32(IGC_RXPBS) &
+		       IGC_RXPBS_SIZE_I225_MASK;
+
+		/* Convert size to bytes, subtract the MTU, and then
+		 * convert the size to bits.
+		 */
+		size *= 1024;
+		size *= 8;
+
+		if (size < 0) {
+			hw_dbg("Invalid effective Rx buffer size %d\n",
+			       size);
+			return -IGC_ERR_CONFIG;
+		}
+
+		/* Calculate the thresholds. Since speed is in Mbps, simplify
+		 * the calculation by multiplying size/speed by 1000 for result
+		 * to be in nsec before dividing by the scale in nsec. Set the
+		 * scale such that the LTR threshold fits in the register.
+		 */
+		ltr_min = (1000 * size) / speed;
+		ltr_max = ltr_min + tw_system;
+		scale_min = (ltr_min / 1024) < 1024 ? IGC_LTRMINV_SCALE_1024 :
+			    IGC_LTRMINV_SCALE_32768;
+		scale_max = (ltr_max / 1024) < 1024 ? IGC_LTRMAXV_SCALE_1024 :
+			    IGC_LTRMAXV_SCALE_32768;
+		ltr_min /= scale_min == IGC_LTRMINV_SCALE_1024 ? 1024 : 32768;
+		ltr_min -= 1;
+		ltr_max /= scale_max == IGC_LTRMAXV_SCALE_1024 ? 1024 : 32768;
+		ltr_max -= 1;
+
+		/* Only write the LTR thresholds if they differ from before. */
+		ltrv = rd32(IGC_LTRMINV);
+		if (ltr_min != (ltrv & IGC_LTRMINV_LTRV_MASK)) {
+			ltrv = IGC_LTRMINV_LSNP_REQ | ltr_min |
+			       (scale_min << IGC_LTRMINV_SCALE_SHIFT);
+			wr32(IGC_LTRMINV, ltrv);
+		}
+
+		ltrv = rd32(IGC_LTRMAXV);
+		if (ltr_max != (ltrv & IGC_LTRMAXV_LTRV_MASK)) {
+			ltrv = IGC_LTRMAXV_LSNP_REQ | ltr_max |
+			       (scale_max << IGC_LTRMAXV_SCALE_SHIFT);
+			wr32(IGC_LTRMAXV, ltrv);
+		}
+	}
+
+	return IGC_SUCCESS;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_i225.h b/drivers/net/ethernet/intel/igc/igc_i225.h
new file mode 100644
index 0000000000..dae47e4f16
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_i225.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_I225_H_
+#define _IGC_I225_H_
+
+s32 igc_acquire_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+void igc_release_swfw_sync_i225(struct igc_hw *hw, u16 mask);
+
+s32 igc_init_nvm_params_i225(struct igc_hw *hw);
+bool igc_get_flash_presence_i225(struct igc_hw *hw);
+s32 igc_set_eee_i225(struct igc_hw *hw, bool adv2p5G, bool adv1G,
+		     bool adv100M);
+s32 igc_set_ltr_i225(struct igc_hw *hw, bool link);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
new file mode 100644
index 0000000000..a5c4b19d71
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -0,0 +1,881 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "igc_mac.h"
+#include "igc_hw.h"
+
+/**
+ * igc_disable_pcie_master - Disables PCI-express master access
+ * @hw: pointer to the HW structure
+ *
+ * Returns 0 (0) if successful, else returns -10
+ * (-IGC_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused
+ * the master requests to be disabled.
+ *
+ * Disables PCI-Express master access and verifies there are no pending
+ * requests.
+ */
+s32 igc_disable_pcie_master(struct igc_hw *hw)
+{
+	s32 timeout = MASTER_DISABLE_TIMEOUT;
+	s32 ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl |= IGC_CTRL_GIO_MASTER_DISABLE;
+	wr32(IGC_CTRL, ctrl);
+
+	while (timeout) {
+		if (!(rd32(IGC_STATUS) &
+		    IGC_STATUS_GIO_MASTER_ENABLE))
+			break;
+		usleep_range(2000, 3000);
+		timeout--;
+	}
+
+	if (!timeout) {
+		hw_dbg("Master requests are pending.\n");
+		ret_val = -IGC_ERR_MASTER_REQUESTS_PENDING;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_init_rx_addrs - Initialize receive addresses
+ * @hw: pointer to the HW structure
+ * @rar_count: receive address registers
+ *
+ * Setup the receive address registers by setting the base receive address
+ * register to the devices MAC address and clearing all the other receive
+ * address registers to 0.
+ */
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count)
+{
+	u8 mac_addr[ETH_ALEN] = {0};
+	u32 i;
+
+	/* Setup the receive address */
+	hw_dbg("Programming MAC Address into RAR[0]\n");
+
+	hw->mac.ops.rar_set(hw, hw->mac.addr, 0);
+
+	/* Zero out the other (rar_entry_count - 1) receive addresses */
+	hw_dbg("Clearing RAR[1-%u]\n", rar_count - 1);
+	for (i = 1; i < rar_count; i++)
+		hw->mac.ops.rar_set(hw, mac_addr, i);
+}
+
+/**
+ * igc_set_fc_watermarks - Set flow control high/low watermarks
+ * @hw: pointer to the HW structure
+ *
+ * Sets the flow control high/low threshold (watermark) registers.  If
+ * flow control XON frame transmission is enabled, then set XON frame
+ * transmission as well.
+ */
+static s32 igc_set_fc_watermarks(struct igc_hw *hw)
+{
+	u32 fcrtl = 0, fcrth = 0;
+
+	/* Set the flow control receive threshold registers.  Normally,
+	 * these registers will be set to a default threshold that may be
+	 * adjusted later by the driver's runtime code.  However, if the
+	 * ability to transmit pause frames is not enabled, then these
+	 * registers will be set to 0.
+	 */
+	if (hw->fc.current_mode & igc_fc_tx_pause) {
+		/* We need to set up the Receive Threshold high and low water
+		 * marks as well as (optionally) enabling the transmission of
+		 * XON frames.
+		 */
+		fcrtl = hw->fc.low_water;
+		if (hw->fc.send_xon)
+			fcrtl |= IGC_FCRTL_XONE;
+
+		fcrth = hw->fc.high_water;
+	}
+	wr32(IGC_FCRTL, fcrtl);
+	wr32(IGC_FCRTH, fcrth);
+
+	return 0;
+}
+
+/**
+ * igc_setup_link - Setup flow control and link settings
+ * @hw: pointer to the HW structure
+ *
+ * Determines which flow control settings to use, then configures flow
+ * control.  Calls the appropriate media-specific link configuration
+ * function.  Assuming the adapter has a valid link partner, a valid link
+ * should be established.  Assumes the hardware has previously been reset
+ * and the transmitter and receiver are not enabled.
+ */
+s32 igc_setup_link(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+
+	/* In the case of the phy reset being blocked, we already have a link.
+	 * We do not need to set it up again.
+	 */
+	if (igc_check_reset_block(hw))
+		goto out;
+
+	/* If requested flow control is set to default, set flow control
+	 * to the both 'rx' and 'tx' pause frames.
+	 */
+	if (hw->fc.requested_mode == igc_fc_default)
+		hw->fc.requested_mode = igc_fc_full;
+
+	/* We want to save off the original Flow Control configuration just
+	 * in case we get disconnected and then reconnected into a different
+	 * hub or switch with different Flow Control capabilities.
+	 */
+	hw->fc.current_mode = hw->fc.requested_mode;
+
+	hw_dbg("After fix-ups FlowControl is now = %x\n", hw->fc.current_mode);
+
+	/* Call the necessary media_type subroutine to configure the link. */
+	ret_val = hw->mac.ops.setup_physical_interface(hw);
+	if (ret_val)
+		goto out;
+
+	/* Initialize the flow control address, type, and PAUSE timer
+	 * registers to their default values.  This is done even if flow
+	 * control is disabled, because it does not hurt anything to
+	 * initialize these registers.
+	 */
+	hw_dbg("Initializing the Flow Control address, type and timer regs\n");
+	wr32(IGC_FCT, FLOW_CONTROL_TYPE);
+	wr32(IGC_FCAH, FLOW_CONTROL_ADDRESS_HIGH);
+	wr32(IGC_FCAL, FLOW_CONTROL_ADDRESS_LOW);
+
+	wr32(IGC_FCTTV, hw->fc.pause_time);
+
+	ret_val = igc_set_fc_watermarks(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_force_mac_fc - Force the MAC's flow control settings
+ * @hw: pointer to the HW structure
+ *
+ * Force the MAC's flow control settings.  Sets the TFCE and RFCE bits in the
+ * device control register to reflect the adapter settings.  TFCE and RFCE
+ * need to be explicitly set by software when a copper PHY is used because
+ * autonegotiation is managed by the PHY rather than the MAC.  Software must
+ * also configure these bits when link is forced on a fiber connection.
+ */
+s32 igc_force_mac_fc(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+
+	/* Because we didn't get link via the internal auto-negotiation
+	 * mechanism (we either forced link or we got link via PHY
+	 * auto-neg), we have to manually enable/disable transmit an
+	 * receive flow control.
+	 *
+	 * The "Case" statement below enables/disable flow control
+	 * according to the "hw->fc.current_mode" parameter.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause
+	 *          frames but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not receive pause frames).
+	 *      3:  Both Rx and TX flow control (symmetric) is enabled.
+	 *  other:  No other values should be possible at this point.
+	 */
+	hw_dbg("hw->fc.current_mode = %u\n", hw->fc.current_mode);
+
+	switch (hw->fc.current_mode) {
+	case igc_fc_none:
+		ctrl &= (~(IGC_CTRL_TFCE | IGC_CTRL_RFCE));
+		break;
+	case igc_fc_rx_pause:
+		ctrl &= (~IGC_CTRL_TFCE);
+		ctrl |= IGC_CTRL_RFCE;
+		break;
+	case igc_fc_tx_pause:
+		ctrl &= (~IGC_CTRL_RFCE);
+		ctrl |= IGC_CTRL_TFCE;
+		break;
+	case igc_fc_full:
+		ctrl |= (IGC_CTRL_TFCE | IGC_CTRL_RFCE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		ret_val = -IGC_ERR_CONFIG;
+		goto out;
+	}
+
+	wr32(IGC_CTRL, ctrl);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_clear_hw_cntrs_base - Clear base hardware counters
+ * @hw: pointer to the HW structure
+ *
+ * Clears the base hardware counters by reading the counter registers.
+ */
+void igc_clear_hw_cntrs_base(struct igc_hw *hw)
+{
+	rd32(IGC_CRCERRS);
+	rd32(IGC_MPC);
+	rd32(IGC_SCC);
+	rd32(IGC_ECOL);
+	rd32(IGC_MCC);
+	rd32(IGC_LATECOL);
+	rd32(IGC_COLC);
+	rd32(IGC_RERC);
+	rd32(IGC_DC);
+	rd32(IGC_RLEC);
+	rd32(IGC_XONRXC);
+	rd32(IGC_XONTXC);
+	rd32(IGC_XOFFRXC);
+	rd32(IGC_XOFFTXC);
+	rd32(IGC_FCRUC);
+	rd32(IGC_GPRC);
+	rd32(IGC_BPRC);
+	rd32(IGC_MPRC);
+	rd32(IGC_GPTC);
+	rd32(IGC_GORCL);
+	rd32(IGC_GORCH);
+	rd32(IGC_GOTCL);
+	rd32(IGC_GOTCH);
+	rd32(IGC_RNBC);
+	rd32(IGC_RUC);
+	rd32(IGC_RFC);
+	rd32(IGC_ROC);
+	rd32(IGC_RJC);
+	rd32(IGC_TORL);
+	rd32(IGC_TORH);
+	rd32(IGC_TOTL);
+	rd32(IGC_TOTH);
+	rd32(IGC_TPR);
+	rd32(IGC_TPT);
+	rd32(IGC_MPTC);
+	rd32(IGC_BPTC);
+
+	rd32(IGC_PRC64);
+	rd32(IGC_PRC127);
+	rd32(IGC_PRC255);
+	rd32(IGC_PRC511);
+	rd32(IGC_PRC1023);
+	rd32(IGC_PRC1522);
+	rd32(IGC_PTC64);
+	rd32(IGC_PTC127);
+	rd32(IGC_PTC255);
+	rd32(IGC_PTC511);
+	rd32(IGC_PTC1023);
+	rd32(IGC_PTC1522);
+
+	rd32(IGC_ALGNERRC);
+	rd32(IGC_RXERRC);
+	rd32(IGC_TNCRS);
+	rd32(IGC_HTDPMC);
+	rd32(IGC_TSCTC);
+
+	rd32(IGC_MGTPRC);
+	rd32(IGC_MGTPDC);
+	rd32(IGC_MGTPTC);
+
+	rd32(IGC_IAC);
+
+	rd32(IGC_RPTHC);
+	rd32(IGC_TLPIC);
+	rd32(IGC_RLPIC);
+	rd32(IGC_HGPTC);
+	rd32(IGC_RXDMTC);
+	rd32(IGC_HGORCL);
+	rd32(IGC_HGORCH);
+	rd32(IGC_HGOTCL);
+	rd32(IGC_HGOTCH);
+	rd32(IGC_LENERRS);
+}
+
+/**
+ * igc_rar_set - Set receive address register
+ * @hw: pointer to the HW structure
+ * @addr: pointer to the receive address
+ * @index: receive address array register
+ *
+ * Sets the receive address array register at index to the address passed
+ * in by addr.
+ */
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index)
+{
+	u32 rar_low, rar_high;
+
+	/* HW expects these in little endian so we reverse the byte order
+	 * from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] |
+		   ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) | ((u32)addr[3] << 24));
+
+	rar_high = ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	/* If MAC address zero, no need to set the AV bit */
+	if (rar_low || rar_high)
+		rar_high |= IGC_RAH_AV;
+
+	/* Some bridges will combine consecutive 32-bit writes into
+	 * a single burst write, which will malfunction on some parts.
+	 * The flushes avoid this.
+	 */
+	wr32(IGC_RAL(index), rar_low);
+	wrfl();
+	wr32(IGC_RAH(index), rar_high);
+	wrfl();
+}
+
+/**
+ * igc_check_for_copper_link - Check for link (Copper)
+ * @hw: pointer to the HW structure
+ *
+ * Checks to see of the link status of the hardware has changed.  If a
+ * change in link status has been detected, then we read the PHY registers
+ * to get the current speed/duplex if link exists.
+ */
+s32 igc_check_for_copper_link(struct igc_hw *hw)
+{
+	struct igc_mac_info *mac = &hw->mac;
+	bool link = false;
+	s32 ret_val;
+
+	/* We only want to go out to the PHY registers to see if Auto-Neg
+	 * has completed and/or if our link status has changed.  The
+	 * get_link_status flag is set upon receiving a Link Status
+	 * Change or Rx Sequence Error interrupt.
+	 */
+	if (!mac->get_link_status) {
+		ret_val = 0;
+		goto out;
+	}
+
+	/* First we want to see if the MII Status Register reports
+	 * link.  If so, then we want to get the current speed/duplex
+	 * of the PHY.
+	 */
+	ret_val = igc_phy_has_link(hw, 1, 0, &link);
+	if (ret_val)
+		goto out;
+
+	if (!link)
+		goto out; /* No link detected */
+
+	mac->get_link_status = false;
+
+	/* Check if there was DownShift, must be checked
+	 * immediately after link-up
+	 */
+	igc_check_downshift(hw);
+
+	/* If we are forcing speed/duplex, then we simply return since
+	 * we have already determined whether we have link or not.
+	 */
+	if (!mac->autoneg) {
+		ret_val = -IGC_ERR_CONFIG;
+		goto out;
+	}
+
+	/* Auto-Neg is enabled.  Auto Speed Detection takes care
+	 * of MAC speed/duplex configuration.  So we only need to
+	 * configure Collision Distance in the MAC.
+	 */
+	igc_config_collision_dist(hw);
+
+	/* Configure Flow Control now that Auto-Neg has completed.
+	 * First, we need to restore the desired flow control
+	 * settings because we may have had to re-autoneg with a
+	 * different link partner.
+	 */
+	ret_val = igc_config_fc_after_link_up(hw);
+	if (ret_val)
+		hw_dbg("Error configuring flow control\n");
+
+out:
+	/* Now that we are aware of our link settings, we can set the LTR
+	 * thresholds.
+	 */
+	ret_val = igc_set_ltr_i225(hw, link);
+
+	return ret_val;
+}
+
+/**
+ * igc_config_collision_dist - Configure collision distance
+ * @hw: pointer to the HW structure
+ *
+ * Configures the collision distance to the default value and is used
+ * during link setup. Currently no func pointer exists and all
+ * implementations are handled in the generic version of this function.
+ */
+void igc_config_collision_dist(struct igc_hw *hw)
+{
+	u32 tctl;
+
+	tctl = rd32(IGC_TCTL);
+
+	tctl &= ~IGC_TCTL_COLD;
+	tctl |= IGC_COLLISION_DISTANCE << IGC_COLD_SHIFT;
+
+	wr32(IGC_TCTL, tctl);
+	wrfl();
+}
+
+/**
+ * igc_config_fc_after_link_up - Configures flow control after link
+ * @hw: pointer to the HW structure
+ *
+ * Checks the status of auto-negotiation after link up to ensure that the
+ * speed and duplex were not forced.  If the link needed to be forced, then
+ * flow control needs to be forced also.  If auto-negotiation is enabled
+ * and did not fail, then we configure flow control based on our link
+ * partner.
+ */
+s32 igc_config_fc_after_link_up(struct igc_hw *hw)
+{
+	u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg;
+	struct igc_mac_info *mac = &hw->mac;
+	u16 speed, duplex;
+	s32 ret_val = 0;
+
+	/* Check for the case where we have fiber media and auto-neg failed
+	 * so we had to force link.  In this case, we need to force the
+	 * configuration of the MAC to match the "fc" parameter.
+	 */
+	if (mac->autoneg_failed)
+		ret_val = igc_force_mac_fc(hw);
+
+	if (ret_val) {
+		hw_dbg("Error forcing flow control settings\n");
+		goto out;
+	}
+
+	/* Check for the case where we have copper media and auto-neg is
+	 * enabled.  In this case, we need to check and see if Auto-Neg
+	 * has completed, and if so, how the PHY and link partner has
+	 * flow control configured.
+	 */
+	if (mac->autoneg) {
+		/* Read the MII Status Register and check to see if AutoNeg
+		 * has completed.  We read this twice because this reg has
+		 * some "sticky" (latched) bits.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+					       &mii_status_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS,
+					       &mii_status_reg);
+		if (ret_val)
+			goto out;
+
+		if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) {
+			hw_dbg("Copper PHY and Auto Neg has not completed.\n");
+			goto out;
+		}
+
+		/* The AutoNeg process has completed, so we now need to
+		 * read both the Auto Negotiation Advertisement
+		 * Register (Address 4) and the Auto_Negotiation Base
+		 * Page Ability Register (Address 5) to determine how
+		 * flow control was negotiated.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV,
+					       &mii_nway_adv_reg);
+		if (ret_val)
+			goto out;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY,
+					       &mii_nway_lp_ability_reg);
+		if (ret_val)
+			goto out;
+		/* Two bits in the Auto Negotiation Advertisement Register
+		 * (Address 4) and two bits in the Auto Negotiation Base
+		 * Page Ability Register (Address 5) determine flow control
+		 * for both the PHY and the link partner.  The following
+		 * table, taken out of the IEEE 802.3ab/D6.0 dated March 25,
+		 * 1999, describes these PAUSE resolution bits and how flow
+		 * control is determined based upon these settings.
+		 * NOTE:  DC = Don't Care
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    0    |  DC   |   DC    | igc_fc_none
+		 *   0   |    1    |   0   |   DC    | igc_fc_none
+		 *   0   |    1    |   1   |    0    | igc_fc_none
+		 *   0   |    1    |   1   |    1    | igc_fc_tx_pause
+		 *   1   |    0    |   0   |   DC    | igc_fc_none
+		 *   1   |   DC    |   1   |   DC    | igc_fc_full
+		 *   1   |    1    |   0   |    0    | igc_fc_none
+		 *   1   |    1    |   0   |    1    | igc_fc_rx_pause
+		 *
+		 * Are both PAUSE bits set to 1?  If so, this implies
+		 * Symmetric Flow Control is enabled at both ends.  The
+		 * ASM_DIR bits are irrelevant per the spec.
+		 *
+		 * For Symmetric Flow Control:
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |   DC    |   1   |   DC    | IGC_fc_full
+		 *
+		 */
+		if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+		    (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) {
+			/* Now we need to check if the user selected RX ONLY
+			 * of pause frames.  In this case, we had to advertise
+			 * FULL flow control because we could not advertise RX
+			 * ONLY. Hence, we must now check to see if we need to
+			 * turn OFF  the TRANSMISSION of PAUSE frames.
+			 */
+			if (hw->fc.requested_mode == igc_fc_full) {
+				hw->fc.current_mode = igc_fc_full;
+				hw_dbg("Flow Control = FULL.\n");
+			} else {
+				hw->fc.current_mode = igc_fc_rx_pause;
+				hw_dbg("Flow Control = RX PAUSE frames only.\n");
+			}
+		}
+
+		/* For receiving PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   0   |    1    |   1   |    1    | igc_fc_tx_pause
+		 */
+		else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = igc_fc_tx_pause;
+			hw_dbg("Flow Control = TX PAUSE frames only.\n");
+		}
+		/* For transmitting PAUSE frames ONLY.
+		 *
+		 *   LOCAL DEVICE  |   LINK PARTNER
+		 * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result
+		 *-------|---------|-------|---------|--------------------
+		 *   1   |    1    |   0   |    1    | igc_fc_rx_pause
+		 */
+		else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) &&
+			 (mii_nway_adv_reg & NWAY_AR_ASM_DIR) &&
+			 !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) &&
+			 (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) {
+			hw->fc.current_mode = igc_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+		/* Per the IEEE spec, at this point flow control should be
+		 * disabled.  However, we want to consider that we could
+		 * be connected to a legacy switch that doesn't advertise
+		 * desired flow control, but can be forced on the link
+		 * partner.  So if we advertised no flow control, that is
+		 * what we will resolve to.  If we advertised some kind of
+		 * receive capability (Rx Pause Only or Full Flow Control)
+		 * and the link partner advertised none, we will configure
+		 * ourselves to enable Rx Flow Control only.  We can do
+		 * this safely for two reasons:  If the link partner really
+		 * didn't want flow control enabled, and we enable Rx, no
+		 * harm done since we won't be receiving any PAUSE frames
+		 * anyway.  If the intent on the link partner was to have
+		 * flow control enabled, then by us enabling RX only, we
+		 * can at least receive pause frames and process them.
+		 * This is a good idea because in most cases, since we are
+		 * predominantly a server NIC, more times than not we will
+		 * be asked to delay transmission of packets than asking
+		 * our link partner to pause transmission of frames.
+		 */
+		else if ((hw->fc.requested_mode == igc_fc_none) ||
+			 (hw->fc.requested_mode == igc_fc_tx_pause) ||
+			 (hw->fc.strict_ieee)) {
+			hw->fc.current_mode = igc_fc_none;
+			hw_dbg("Flow Control = NONE.\n");
+		} else {
+			hw->fc.current_mode = igc_fc_rx_pause;
+			hw_dbg("Flow Control = RX PAUSE frames only.\n");
+		}
+
+		/* Now we need to do one last check...  If we auto-
+		 * negotiated to HALF DUPLEX, flow control should not be
+		 * enabled per IEEE 802.3 spec.
+		 */
+		ret_val = hw->mac.ops.get_speed_and_duplex(hw, &speed, &duplex);
+		if (ret_val) {
+			hw_dbg("Error getting link speed and duplex\n");
+			goto out;
+		}
+
+		if (duplex == HALF_DUPLEX)
+			hw->fc.current_mode = igc_fc_none;
+
+		/* Now we call a subroutine to actually force the MAC
+		 * controller to use the correct flow control settings.
+		 */
+		ret_val = igc_force_mac_fc(hw);
+		if (ret_val) {
+			hw_dbg("Error forcing flow control settings\n");
+			goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_get_auto_rd_done - Check for auto read completion
+ * @hw: pointer to the HW structure
+ *
+ * Check EEPROM for Auto Read done bit.
+ */
+s32 igc_get_auto_rd_done(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	s32 i = 0;
+
+	while (i < AUTO_READ_DONE_TIMEOUT) {
+		if (rd32(IGC_EECD) & IGC_EECD_AUTO_RD)
+			break;
+		usleep_range(1000, 2000);
+		i++;
+	}
+
+	if (i == AUTO_READ_DONE_TIMEOUT) {
+		hw_dbg("Auto read by HW from NVM has not completed.\n");
+		ret_val = -IGC_ERR_RESET;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_get_speed_and_duplex_copper - Retrieve current speed/duplex
+ * @hw: pointer to the HW structure
+ * @speed: stores the current speed
+ * @duplex: stores the current duplex
+ *
+ * Read the status register for the current speed/duplex and store the current
+ * speed and duplex for copper connections.
+ */
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex)
+{
+	u32 status;
+
+	status = rd32(IGC_STATUS);
+	if (status & IGC_STATUS_SPEED_1000) {
+		/* For I225, STATUS will indicate 1G speed in both 1 Gbps
+		 * and 2.5 Gbps link modes. An additional bit is used
+		 * to differentiate between 1 Gbps and 2.5 Gbps.
+		 */
+		if (hw->mac.type == igc_i225 &&
+		    (status & IGC_STATUS_SPEED_2500)) {
+			*speed = SPEED_2500;
+			hw_dbg("2500 Mbs, ");
+		} else {
+			*speed = SPEED_1000;
+			hw_dbg("1000 Mbs, ");
+		}
+	} else if (status & IGC_STATUS_SPEED_100) {
+		*speed = SPEED_100;
+		hw_dbg("100 Mbs, ");
+	} else {
+		*speed = SPEED_10;
+		hw_dbg("10 Mbs, ");
+	}
+
+	if (status & IGC_STATUS_FD) {
+		*duplex = FULL_DUPLEX;
+		hw_dbg("Full Duplex\n");
+	} else {
+		*duplex = HALF_DUPLEX;
+		hw_dbg("Half Duplex\n");
+	}
+
+	return 0;
+}
+
+/**
+ * igc_put_hw_semaphore - Release hardware semaphore
+ * @hw: pointer to the HW structure
+ *
+ * Release hardware semaphore used to access the PHY or NVM
+ */
+void igc_put_hw_semaphore(struct igc_hw *hw)
+{
+	u32 swsm;
+
+	swsm = rd32(IGC_SWSM);
+
+	swsm &= ~(IGC_SWSM_SMBI | IGC_SWSM_SWESMBI);
+
+	wr32(IGC_SWSM, swsm);
+}
+
+/**
+ * igc_enable_mng_pass_thru - Enable processing of ARP's
+ * @hw: pointer to the HW structure
+ *
+ * Verifies the hardware needs to leave interface enabled so that frames can
+ * be directed to and from the management interface.
+ */
+bool igc_enable_mng_pass_thru(struct igc_hw *hw)
+{
+	bool ret_val = false;
+	u32 fwsm, factps;
+	u32 manc;
+
+	if (!hw->mac.asf_firmware_present)
+		goto out;
+
+	manc = rd32(IGC_MANC);
+
+	if (!(manc & IGC_MANC_RCV_TCO_EN))
+		goto out;
+
+	if (hw->mac.arc_subsystem_valid) {
+		fwsm = rd32(IGC_FWSM);
+		factps = rd32(IGC_FACTPS);
+
+		if (!(factps & IGC_FACTPS_MNGCG) &&
+		    ((fwsm & IGC_FWSM_MODE_MASK) ==
+		    (igc_mng_mode_pt << IGC_FWSM_MODE_SHIFT))) {
+			ret_val = true;
+			goto out;
+		}
+	} else {
+		if ((manc & IGC_MANC_SMBUS_EN) &&
+		    !(manc & IGC_MANC_ASF_EN)) {
+			ret_val = true;
+			goto out;
+		}
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ *  igc_hash_mc_addr - Generate a multicast hash value
+ *  @hw: pointer to the HW structure
+ *  @mc_addr: pointer to a multicast address
+ *
+ *  Generates a multicast address hash value which is used to determine
+ *  the multicast filter table array address and new table value.  See
+ *  igc_mta_set()
+ **/
+static u32 igc_hash_mc_addr(struct igc_hw *hw, u8 *mc_addr)
+{
+	u32 hash_value, hash_mask;
+	u8 bit_shift = 0;
+
+	/* Register count multiplied by bits per register */
+	hash_mask = (hw->mac.mta_reg_count * 32) - 1;
+
+	/* For a mc_filter_type of 0, bit_shift is the number of left-shifts
+	 * where 0xFF would still fall within the hash mask.
+	 */
+	while (hash_mask >> bit_shift != 0xFF)
+		bit_shift++;
+
+	/* The portion of the address that is used for the hash table
+	 * is determined by the mc_filter_type setting.
+	 * The algorithm is such that there is a total of 8 bits of shifting.
+	 * The bit_shift for a mc_filter_type of 0 represents the number of
+	 * left-shifts where the MSB of mc_addr[5] would still fall within
+	 * the hash_mask.  Case 0 does this exactly.  Since there are a total
+	 * of 8 bits of shifting, then mc_addr[4] will shift right the
+	 * remaining number of bits. Thus 8 - bit_shift.  The rest of the
+	 * cases are a variation of this algorithm...essentially raising the
+	 * number of bits to shift mc_addr[5] left, while still keeping the
+	 * 8-bit shifting total.
+	 *
+	 * For example, given the following Destination MAC Address and an
+	 * MTA register count of 128 (thus a 4096-bit vector and 0xFFF mask),
+	 * we can see that the bit_shift for case 0 is 4.  These are the hash
+	 * values resulting from each mc_filter_type...
+	 * [0] [1] [2] [3] [4] [5]
+	 * 01  AA  00  12  34  56
+	 * LSB                 MSB
+	 *
+	 * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563
+	 * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6
+	 * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163
+	 * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634
+	 */
+	switch (hw->mac.mc_filter_type) {
+	default:
+	case 0:
+		break;
+	case 1:
+		bit_shift += 1;
+		break;
+	case 2:
+		bit_shift += 2;
+		break;
+	case 3:
+		bit_shift += 4;
+		break;
+	}
+
+	hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) |
+				  (((u16)mc_addr[5]) << bit_shift)));
+
+	return hash_value;
+}
+
+/**
+ *  igc_update_mc_addr_list - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @mc_addr_list: array of multicast addresses to program
+ *  @mc_addr_count: number of multicast addresses to program
+ *
+ *  Updates entire Multicast Table Array.
+ *  The caller must have a packed mc_addr_list of multicast addresses.
+ **/
+void igc_update_mc_addr_list(struct igc_hw *hw,
+			     u8 *mc_addr_list, u32 mc_addr_count)
+{
+	u32 hash_value, hash_bit, hash_reg;
+	int i;
+
+	/* clear mta_shadow */
+	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+	/* update mta_shadow from mc_addr_list */
+	for (i = 0; (u32)i < mc_addr_count; i++) {
+		hash_value = igc_hash_mc_addr(hw, mc_addr_list);
+
+		hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1);
+		hash_bit = hash_value & 0x1F;
+
+		hw->mac.mta_shadow[hash_reg] |= BIT(hash_bit);
+		mc_addr_list += ETH_ALEN;
+	}
+
+	/* replace the entire MTA table */
+	for (i = hw->mac.mta_reg_count - 1; i >= 0; i--)
+		array_wr32(IGC_MTA, i, hw->mac.mta_shadow[i]);
+	wrfl();
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h
new file mode 100644
index 0000000000..b5963f86de
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_mac.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_MAC_H_
+#define _IGC_MAC_H_
+
+#include "igc_hw.h"
+#include "igc_phy.h"
+#include "igc_defines.h"
+
+/* forward declaration */
+s32 igc_disable_pcie_master(struct igc_hw *hw);
+s32 igc_check_for_copper_link(struct igc_hw *hw);
+s32 igc_config_fc_after_link_up(struct igc_hw *hw);
+s32 igc_force_mac_fc(struct igc_hw *hw);
+void igc_init_rx_addrs(struct igc_hw *hw, u16 rar_count);
+s32 igc_setup_link(struct igc_hw *hw);
+void igc_clear_hw_cntrs_base(struct igc_hw *hw);
+s32 igc_get_auto_rd_done(struct igc_hw *hw);
+void igc_put_hw_semaphore(struct igc_hw *hw);
+void igc_rar_set(struct igc_hw *hw, u8 *addr, u32 index);
+void igc_config_collision_dist(struct igc_hw *hw);
+
+s32 igc_get_speed_and_duplex_copper(struct igc_hw *hw, u16 *speed,
+				    u16 *duplex);
+
+bool igc_enable_mng_pass_thru(struct igc_hw *hw);
+void igc_update_mc_addr_list(struct igc_hw *hw,
+			     u8 *mc_addr_list, u32 mc_addr_count);
+
+enum igc_mng_mode {
+	igc_mng_mode_none = 0,
+	igc_mng_mode_asf,
+	igc_mng_mode_pt,
+	igc_mng_mode_ipmi,
+	igc_mng_mode_host_if_only
+};
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
new file mode 100644
index 0000000000..98de34d0ce
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -0,0 +1,7453 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/if_vlan.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <linux/pm_runtime.h>
+#include <net/pkt_sched.h>
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <linux/pci.h>
+
+#include <net/ipv6.h>
+
+#include "igc.h"
+#include "igc_hw.h"
+#include "igc_tsn.h"
+#include "igc_xdp.h"
+
+#define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+#define IGC_XDP_PASS		0
+#define IGC_XDP_CONSUMED	BIT(0)
+#define IGC_XDP_TX		BIT(1)
+#define IGC_XDP_REDIRECT	BIT(2)
+
+static int debug = -1;
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_LICENSE("GPL v2");
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+char igc_driver_name[] = "igc";
+static const char igc_driver_string[] = DRV_SUMMARY;
+static const char igc_copyright[] =
+	"Copyright(c) 2018 Intel Corporation.";
+
+static const struct igc_info *igc_info_tbl[] = {
+	[board_base] = &igc_base_info,
+};
+
+static const struct pci_device_id igc_pci_tbl[] = {
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
+	/* required last entry */
+	{0, }
+};
+
+MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+void igc_reset(struct igc_adapter *adapter)
+{
+	struct net_device *dev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	struct igc_fc_info *fc = &hw->fc;
+	u32 pba, hwm;
+
+	/* Repartition PBA for greater than 9k MTU if required */
+	pba = IGC_PBA_34K;
+
+	/* flow control settings
+	 * The high water mark must be low enough to fit one full frame
+	 * after transmitting the pause frame.  As such we must have enough
+	 * space to allow for us to complete our current transmit and then
+	 * receive the frame that is in progress from the link partner.
+	 * Set it to:
+	 * - the full Rx FIFO size minus one full Tx plus one full Rx frame
+	 */
+	hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
+
+	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
+	fc->low_water = fc->high_water - 16;
+	fc->pause_time = 0xFFFF;
+	fc->send_xon = 1;
+	fc->current_mode = fc->requested_mode;
+
+	hw->mac.ops.reset_hw(hw);
+
+	if (hw->mac.ops.init_hw(hw))
+		netdev_err(dev, "Error on hardware initialization\n");
+
+	/* Re-establish EEE setting */
+	igc_set_eee_i225(hw, true, true, true);
+
+	if (!netif_running(adapter->netdev))
+		igc_power_down_phy_copper_base(&adapter->hw);
+
+	/* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
+	wr32(IGC_VET, ETH_P_8021Q);
+
+	/* Re-enable PTP, where applicable. */
+	igc_ptp_reset(adapter);
+
+	/* Re-enable TSN offloading, where applicable. */
+	igc_tsn_reset(adapter);
+
+	igc_get_phy_info(hw);
+}
+
+/**
+ * igc_power_up_link - Power up the phy link
+ * @adapter: address of board private structure
+ */
+static void igc_power_up_link(struct igc_adapter *adapter)
+{
+	igc_reset_phy(&adapter->hw);
+
+	igc_power_up_phy_copper(&adapter->hw);
+
+	igc_setup_link(&adapter->hw);
+}
+
+/**
+ * igc_release_hw_control - release control of the h/w to f/w
+ * @adapter: address of board private structure
+ *
+ * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that the
+ * driver is no longer loaded.
+ */
+static void igc_release_hw_control(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	if (!pci_device_is_present(adapter->pdev))
+		return;
+
+	/* Let firmware take over control of h/w */
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	wr32(IGC_CTRL_EXT,
+	     ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * igc_get_hw_control - get control of the h/w from f/w
+ * @adapter: address of board private structure
+ *
+ * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
+ * For ASF and Pass Through versions of f/w this means that
+ * the driver is loaded.
+ */
+static void igc_get_hw_control(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl_ext;
+
+	/* Let firmware know the driver has taken over */
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	wr32(IGC_CTRL_EXT,
+	     ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
+}
+
+static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
+{
+	dma_unmap_single(dev, dma_unmap_addr(buf, dma),
+			 dma_unmap_len(buf, len), DMA_TO_DEVICE);
+
+	dma_unmap_len_set(buf, len, 0);
+}
+
+/**
+ * igc_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ */
+static void igc_clean_tx_ring(struct igc_ring *tx_ring)
+{
+	u16 i = tx_ring->next_to_clean;
+	struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+	u32 xsk_frames = 0;
+
+	while (i != tx_ring->next_to_use) {
+		union igc_adv_tx_desc *eop_desc, *tx_desc;
+
+		switch (tx_buffer->type) {
+		case IGC_TX_BUFFER_TYPE_XSK:
+			xsk_frames++;
+			break;
+		case IGC_TX_BUFFER_TYPE_XDP:
+			xdp_return_frame(tx_buffer->xdpf);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+			break;
+		case IGC_TX_BUFFER_TYPE_SKB:
+			dev_kfree_skb_any(tx_buffer->skb);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+			break;
+		default:
+			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
+			break;
+		}
+
+		/* check for eop_desc to determine the end of the packet */
+		eop_desc = tx_buffer->next_to_watch;
+		tx_desc = IGC_TX_DESC(tx_ring, i);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+		}
+
+		tx_buffer->next_to_watch = NULL;
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		i++;
+		if (unlikely(i == tx_ring->count)) {
+			i = 0;
+			tx_buffer = tx_ring->tx_buffer_info;
+		}
+	}
+
+	if (tx_ring->xsk_pool && xsk_frames)
+		xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
+
+	/* reset BQL for queue */
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	/* Zero out the buffer ring */
+	memset(tx_ring->tx_buffer_info, 0,
+	       sizeof(*tx_ring->tx_buffer_info) * tx_ring->count);
+
+	/* Zero out the descriptor ring */
+	memset(tx_ring->desc, 0, tx_ring->size);
+
+	/* reset next_to_use and next_to_clean */
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ * igc_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void igc_free_tx_resources(struct igc_ring *tx_ring)
+{
+	igc_disable_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_all_tx_resources(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igc_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
+ * igc_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i])
+			igc_clean_tx_ring(adapter->tx_ring[i]);
+}
+
+static void igc_disable_tx_ring_hw(struct igc_ring *ring)
+{
+	struct igc_hw *hw = &ring->q_vector->adapter->hw;
+	u8 idx = ring->reg_idx;
+	u32 txdctl;
+
+	txdctl = rd32(IGC_TXDCTL(idx));
+	txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
+	txdctl |= IGC_TXDCTL_SWFLUSH;
+	wr32(IGC_TXDCTL(idx), txdctl);
+}
+
+/**
+ * igc_disable_all_tx_rings_hw - Disable all transmit queue operation
+ * @adapter: board private structure
+ */
+static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+		igc_disable_tx_ring_hw(tx_ring);
+	}
+}
+
+/**
+ * igc_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ */
+int igc_setup_tx_resources(struct igc_ring *tx_ring)
+{
+	struct net_device *ndev = tx_ring->netdev;
+	struct device *dev = tx_ring->dev;
+	int size = 0;
+
+	size = sizeof(struct igc_tx_buffer) * tx_ring->count;
+	tx_ring->tx_buffer_info = vzalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+
+	if (!tx_ring->desc)
+		goto err;
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
+{
+	struct net_device *dev = adapter->netdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = igc_setup_tx_resources(adapter->tx_ring[i]);
+		if (err) {
+			netdev_err(dev, "Error on Tx queue %u setup\n", i);
+			for (i--; i >= 0; i--)
+				igc_free_tx_resources(adapter->tx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
+{
+	u16 i = rx_ring->next_to_clean;
+
+	dev_kfree_skb(rx_ring->skb);
+	rx_ring->skb = NULL;
+
+	/* Free all the Rx ring sk_buffs */
+	while (i != rx_ring->next_to_alloc) {
+		struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      buffer_info->dma,
+					      buffer_info->page_offset,
+					      igc_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev,
+				     buffer_info->dma,
+				     igc_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IGC_RX_DMA_ATTR);
+		__page_frag_cache_drain(buffer_info->page,
+					buffer_info->pagecnt_bias);
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+	}
+}
+
+static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
+{
+	struct igc_rx_buffer *bi;
+	u16 i;
+
+	for (i = 0; i < ring->count; i++) {
+		bi = &ring->rx_buffer_info[i];
+		if (!bi->xdp)
+			continue;
+
+		xsk_buff_free(bi->xdp);
+		bi->xdp = NULL;
+	}
+}
+
+/**
+ * igc_clean_rx_ring - Free Rx Buffers per Queue
+ * @ring: ring to free buffers from
+ */
+static void igc_clean_rx_ring(struct igc_ring *ring)
+{
+	if (ring->xsk_pool)
+		igc_clean_rx_ring_xsk_pool(ring);
+	else
+		igc_clean_rx_ring_page_shared(ring);
+
+	clear_ring_uses_large_buffer(ring);
+
+	ring->next_to_alloc = 0;
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+}
+
+/**
+ * igc_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ */
+static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i])
+			igc_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ */
+void igc_free_rx_resources(struct igc_ring *rx_ring)
+{
+	igc_clean_rx_ring(rx_ring);
+
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void igc_free_all_rx_resources(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igc_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * igc_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ */
+int igc_setup_rx_resources(struct igc_ring *rx_ring)
+{
+	struct net_device *ndev = rx_ring->netdev;
+	struct device *dev = rx_ring->dev;
+	u8 index = rx_ring->queue_index;
+	int size, desc_len, res;
+
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
+			       rx_ring->q_vector->napi.napi_id);
+	if (res < 0) {
+		netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
+			   index);
+		return res;
+	}
+
+	size = sizeof(struct igc_rx_buffer) * rx_ring->count;
+	rx_ring->rx_buffer_info = vzalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	desc_len = sizeof(union igc_adv_rx_desc);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * desc_len;
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+
+	if (!rx_ring->desc)
+		goto err;
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	return 0;
+
+err:
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * igc_setup_all_rx_resources - wrapper to allocate Rx resources
+ *                                (Descriptors) for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
+{
+	struct net_device *dev = adapter->netdev;
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = igc_setup_rx_resources(adapter->rx_ring[i]);
+		if (err) {
+			netdev_err(dev, "Error on Rx queue %u setup\n", i);
+			for (i--; i >= 0; i--)
+				igc_free_rx_resources(adapter->rx_ring[i]);
+			break;
+		}
+	}
+
+	return err;
+}
+
+static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
+					      struct igc_ring *ring)
+{
+	if (!igc_xdp_is_enabled(adapter) ||
+	    !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
+		return NULL;
+
+	return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
+}
+
+/**
+ * igc_configure_rx_ring - Configure a receive ring after Reset
+ * @adapter: board private structure
+ * @ring: receive ring to be configured
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx_ring(struct igc_adapter *adapter,
+				  struct igc_ring *ring)
+{
+	struct igc_hw *hw = &adapter->hw;
+	union igc_adv_rx_desc *rx_desc;
+	int reg_idx = ring->reg_idx;
+	u32 srrctl = 0, rxdctl = 0;
+	u64 rdba = ring->dma;
+	u32 buf_size;
+
+	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
+	if (ring->xsk_pool) {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_XSK_BUFF_POOL,
+						   NULL));
+		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+	} else {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_PAGE_SHARED,
+						   NULL));
+	}
+
+	if (igc_xdp_is_enabled(adapter))
+		set_ring_uses_large_buffer(ring);
+
+	/* disable the queue */
+	wr32(IGC_RXDCTL(reg_idx), 0);
+
+	/* Set DMA base address registers */
+	wr32(IGC_RDBAL(reg_idx),
+	     rdba & 0x00000000ffffffffULL);
+	wr32(IGC_RDBAH(reg_idx), rdba >> 32);
+	wr32(IGC_RDLEN(reg_idx),
+	     ring->count * sizeof(union igc_adv_rx_desc));
+
+	/* initialize head and tail */
+	ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
+	wr32(IGC_RDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	/* reset next-to- use/clean to place SW in sync with hardware */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+
+	if (ring->xsk_pool)
+		buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+	else if (ring_uses_large_buffer(ring))
+		buf_size = IGC_RXBUFFER_3072;
+	else
+		buf_size = IGC_RXBUFFER_2048;
+
+	srrctl = rd32(IGC_SRRCTL(reg_idx));
+	srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK |
+		    IGC_SRRCTL_DESCTYPE_MASK);
+	srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN);
+	srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size);
+	srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
+
+	wr32(IGC_SRRCTL(reg_idx), srrctl);
+
+	rxdctl |= IGC_RX_PTHRESH;
+	rxdctl |= IGC_RX_HTHRESH << 8;
+	rxdctl |= IGC_RX_WTHRESH << 16;
+
+	/* initialize rx_buffer_info */
+	memset(ring->rx_buffer_info, 0,
+	       sizeof(struct igc_rx_buffer) * ring->count);
+
+	/* initialize Rx descriptor 0 */
+	rx_desc = IGC_RX_DESC(ring, 0);
+	rx_desc->wb.upper.length = 0;
+
+	/* enable receive descriptor fetching */
+	rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
+
+	wr32(IGC_RXDCTL(reg_idx), rxdctl);
+}
+
+/**
+ * igc_configure_rx - Configure receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ */
+static void igc_configure_rx(struct igc_adapter *adapter)
+{
+	int i;
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
+}
+
+/**
+ * igc_configure_tx_ring - Configure transmit ring after Reset
+ * @adapter: board private structure
+ * @ring: tx ring to configure
+ *
+ * Configure a transmit ring after a reset.
+ */
+static void igc_configure_tx_ring(struct igc_adapter *adapter,
+				  struct igc_ring *ring)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int reg_idx = ring->reg_idx;
+	u64 tdba = ring->dma;
+	u32 txdctl = 0;
+
+	ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
+
+	/* disable the queue */
+	wr32(IGC_TXDCTL(reg_idx), 0);
+	wrfl();
+
+	wr32(IGC_TDLEN(reg_idx),
+	     ring->count * sizeof(union igc_adv_tx_desc));
+	wr32(IGC_TDBAL(reg_idx),
+	     tdba & 0x00000000ffffffffULL);
+	wr32(IGC_TDBAH(reg_idx), tdba >> 32);
+
+	ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
+	wr32(IGC_TDH(reg_idx), 0);
+	writel(0, ring->tail);
+
+	txdctl |= IGC_TX_PTHRESH;
+	txdctl |= IGC_TX_HTHRESH << 8;
+	txdctl |= IGC_TX_WTHRESH << 16;
+
+	txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
+	wr32(IGC_TXDCTL(reg_idx), txdctl);
+}
+
+/**
+ * igc_configure_tx - Configure transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ */
+static void igc_configure_tx(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
+}
+
+/**
+ * igc_setup_mrqc - configure the multiple receive queue control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_mrqc(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 j, num_rx_queues;
+	u32 mrqc, rxcsum;
+	u32 rss_key[10];
+
+	netdev_rss_key_fill(rss_key, sizeof(rss_key));
+	for (j = 0; j < 10; j++)
+		wr32(IGC_RSSRK(j), rss_key[j]);
+
+	num_rx_queues = adapter->rss_queues;
+
+	if (adapter->rss_indir_tbl_init != num_rx_queues) {
+		for (j = 0; j < IGC_RETA_SIZE; j++)
+			adapter->rss_indir_tbl[j] =
+			(j * num_rx_queues) / IGC_RETA_SIZE;
+		adapter->rss_indir_tbl_init = num_rx_queues;
+	}
+	igc_write_rss_indir_tbl(adapter);
+
+	/* Disable raw packet checksumming so that RSS hash is placed in
+	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
+	 * offloads as they are enabled by default
+	 */
+	rxcsum = rd32(IGC_RXCSUM);
+	rxcsum |= IGC_RXCSUM_PCSD;
+
+	/* Enable Receive Checksum Offload for SCTP */
+	rxcsum |= IGC_RXCSUM_CRCOFL;
+
+	/* Don't need to set TUOFL or IPOFL, they default to 1 */
+	wr32(IGC_RXCSUM, rxcsum);
+
+	/* Generate RSS hash based on packet types, TCP/UDP
+	 * port numbers and/or IPv4/v6 src and dst addresses
+	 */
+	mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
+	       IGC_MRQC_RSS_FIELD_IPV4_TCP |
+	       IGC_MRQC_RSS_FIELD_IPV6 |
+	       IGC_MRQC_RSS_FIELD_IPV6_TCP |
+	       IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
+
+	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
+		mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
+	if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
+		mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
+
+	mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
+
+	wr32(IGC_MRQC, mrqc);
+}
+
+/**
+ * igc_setup_rctl - configure the receive control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_rctl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 rctl;
+
+	rctl = rd32(IGC_RCTL);
+
+	rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
+	rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
+
+	rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
+		(hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
+
+	/* enable stripping of CRC. Newer features require
+	 * that the HW strips the CRC.
+	 */
+	rctl |= IGC_RCTL_SECRC;
+
+	/* disable store bad packets and clear size bits. */
+	rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
+
+	/* enable LPE to allow for reception of jumbo frames */
+	rctl |= IGC_RCTL_LPE;
+
+	/* disable queue 0 to prevent tail write w/o re-config */
+	wr32(IGC_RXDCTL(0), 0);
+
+	/* This is useful for sniffing bad packets. */
+	if (adapter->netdev->features & NETIF_F_RXALL) {
+		/* UPE and MPE will be handled by normal PROMISC logic
+		 * in set_rx_mode
+		 */
+		rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
+			 IGC_RCTL_BAM | /* RX All Bcast Pkts */
+			 IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
+
+		rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
+			  IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
+	}
+
+	wr32(IGC_RCTL, rctl);
+}
+
+/**
+ * igc_setup_tctl - configure the transmit control registers
+ * @adapter: Board private structure
+ */
+static void igc_setup_tctl(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 tctl;
+
+	/* disable queue 0 which icould be enabled by default */
+	wr32(IGC_TXDCTL(0), 0);
+
+	/* Program the Transmit Control Register */
+	tctl = rd32(IGC_TCTL);
+	tctl &= ~IGC_TCTL_CT;
+	tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
+		(IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
+
+	/* Enable transmits */
+	tctl |= IGC_TCTL_EN;
+
+	wr32(IGC_TCTL, tctl);
+}
+
+/**
+ * igc_set_mac_filter_hw() - Set MAC address filter in hardware
+ * @adapter: Pointer to adapter where the filter should be set
+ * @index: Filter index
+ * @type: MAC address filter type (source or destination)
+ * @addr: MAC address
+ * @queue: If non-negative, queue assignment feature is enabled and frames
+ *         matching the filter are enqueued onto 'queue'. Otherwise, queue
+ *         assignment is disabled.
+ */
+static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
+				  enum igc_mac_filter_type type,
+				  const u8 *addr, int queue)
+{
+	struct net_device *dev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	u32 ral, rah;
+
+	if (WARN_ON(index >= hw->mac.rar_entry_count))
+		return;
+
+	ral = le32_to_cpup((__le32 *)(addr));
+	rah = le16_to_cpup((__le16 *)(addr + 4));
+
+	if (type == IGC_MAC_FILTER_TYPE_SRC) {
+		rah &= ~IGC_RAH_ASEL_MASK;
+		rah |= IGC_RAH_ASEL_SRC_ADDR;
+	}
+
+	if (queue >= 0) {
+		rah &= ~IGC_RAH_QSEL_MASK;
+		rah |= (queue << IGC_RAH_QSEL_SHIFT);
+		rah |= IGC_RAH_QSEL_ENABLE;
+	}
+
+	rah |= IGC_RAH_AV;
+
+	wr32(IGC_RAL(index), ral);
+	wr32(IGC_RAH(index), rah);
+
+	netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
+}
+
+/**
+ * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
+ * @adapter: Pointer to adapter where the filter should be cleared
+ * @index: Filter index
+ */
+static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
+{
+	struct net_device *dev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+
+	if (WARN_ON(index >= hw->mac.rar_entry_count))
+		return;
+
+	wr32(IGC_RAL(index), 0);
+	wr32(IGC_RAH(index), 0);
+
+	netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
+}
+
+/* Set default MAC address for the PF in the first RAR entry */
+static void igc_set_default_mac_filter(struct igc_adapter *adapter)
+{
+	struct net_device *dev = adapter->netdev;
+	u8 *addr = adapter->hw.mac.addr;
+
+	netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
+
+	igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
+}
+
+/**
+ * igc_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_set_mac(struct net_device *netdev, void *p)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	eth_hw_addr_set(netdev, addr->sa_data);
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+	/* set the correct pool for the new PF MAC address in entry 0 */
+	igc_set_default_mac_filter(adapter);
+
+	return 0;
+}
+
+/**
+ *  igc_write_mc_addr_list - write multicast addresses to MTA
+ *  @netdev: network interface device structure
+ *
+ *  Writes multicast address list to the MTA hash table.
+ *  Returns: -ENOMEM on failure
+ *           0 on no addresses written
+ *           X on writing X addresses to MTA
+ **/
+static int igc_write_mc_addr_list(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	struct netdev_hw_addr *ha;
+	u8  *mta_list;
+	int i;
+
+	if (netdev_mc_empty(netdev)) {
+		/* nothing to program, so clear mc list */
+		igc_update_mc_addr_list(hw, NULL, 0);
+		return 0;
+	}
+
+	mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
+	if (!mta_list)
+		return -ENOMEM;
+
+	/* The shared function expects a packed array of only addresses. */
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
+
+	igc_update_mc_addr_list(hw, mta_list, i);
+	kfree(mta_list);
+
+	return netdev_mc_count(netdev);
+}
+
+static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
+				bool *first_flag, bool *insert_empty)
+{
+	struct igc_adapter *adapter = netdev_priv(ring->netdev);
+	ktime_t cycle_time = adapter->cycle_time;
+	ktime_t base_time = adapter->base_time;
+	ktime_t now = ktime_get_clocktai();
+	ktime_t baset_est, end_of_cycle;
+	s32 launchtime;
+	s64 n;
+
+	n = div64_s64(ktime_sub_ns(now, base_time), cycle_time);
+
+	baset_est = ktime_add_ns(base_time, cycle_time * (n));
+	end_of_cycle = ktime_add_ns(baset_est, cycle_time);
+
+	if (ktime_compare(txtime, end_of_cycle) >= 0) {
+		if (baset_est != ring->last_ff_cycle) {
+			*first_flag = true;
+			ring->last_ff_cycle = baset_est;
+
+			if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0)
+				*insert_empty = true;
+		}
+	}
+
+	/* Introducing a window at end of cycle on which packets
+	 * potentially not honor launchtime. Window of 5us chosen
+	 * considering software update the tail pointer and packets
+	 * are dma'ed to packet buffer.
+	 */
+	if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC))
+		netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n",
+			    txtime);
+
+	ring->last_tx_cycle = end_of_cycle;
+
+	launchtime = ktime_sub_ns(txtime, baset_est);
+	if (launchtime > 0)
+		div_s64_rem(launchtime, cycle_time, &launchtime);
+	else
+		launchtime = 0;
+
+	return cpu_to_le32(launchtime);
+}
+
+static int igc_init_empty_frame(struct igc_ring *ring,
+				struct igc_tx_buffer *buffer,
+				struct sk_buff *skb)
+{
+	unsigned int size;
+	dma_addr_t dma;
+
+	size = skb_headlen(skb);
+
+	dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring->dev, dma)) {
+		netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
+		return -ENOMEM;
+	}
+
+	buffer->skb = skb;
+	buffer->protocol = 0;
+	buffer->bytecount = skb->len;
+	buffer->gso_segs = 1;
+	buffer->time_stamp = jiffies;
+	dma_unmap_len_set(buffer, len, skb->len);
+	dma_unmap_addr_set(buffer, dma, dma);
+
+	return 0;
+}
+
+static int igc_init_tx_empty_descriptor(struct igc_ring *ring,
+					struct sk_buff *skb,
+					struct igc_tx_buffer *first)
+{
+	union igc_adv_tx_desc *desc;
+	u32 cmd_type, olinfo_status;
+	int err;
+
+	if (!igc_desc_unused(ring))
+		return -EBUSY;
+
+	err = igc_init_empty_frame(ring, first, skb);
+	if (err)
+		return err;
+
+	cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+		   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+		   first->bytecount;
+	olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+
+	desc = IGC_TX_DESC(ring, ring->next_to_use);
+	desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+	desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma));
+
+	netdev_tx_sent_queue(txring_txq(ring), skb->len);
+
+	first->next_to_watch = desc;
+
+	ring->next_to_use++;
+	if (ring->next_to_use == ring->count)
+		ring->next_to_use = 0;
+
+	return 0;
+}
+
+#define IGC_EMPTY_FRAME_SIZE 60
+
+static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
+			    __le32 launch_time, bool first_flag,
+			    u32 vlan_macip_lens, u32 type_tucmd,
+			    u32 mss_l4len_idx)
+{
+	struct igc_adv_tx_context_desc *context_desc;
+	u16 i = tx_ring->next_to_use;
+
+	context_desc = IGC_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* set bits to identify this as an advanced context descriptor */
+	type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
+
+	/* For i225, context index must be unique per ring. */
+	if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
+		mss_l4len_idx |= tx_ring->reg_idx << 4;
+
+	if (first_flag)
+		mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST;
+
+	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
+	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
+	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
+	context_desc->launch_time	= launch_time;
+}
+
+static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first,
+			__le32 launch_time, bool first_flag)
+{
+	struct sk_buff *skb = first->skb;
+	u32 vlan_macip_lens = 0;
+	u32 type_tucmd = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+csum_failed:
+		if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
+		    !tx_ring->launchtime_enable)
+			return;
+		goto no_csum;
+	}
+
+	switch (skb->csum_offset) {
+	case offsetof(struct tcphdr, check):
+		type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
+		fallthrough;
+	case offsetof(struct udphdr, check):
+		break;
+	case offsetof(struct sctphdr, checksum):
+		/* validate that this is actually an SCTP request */
+		if (skb_csum_is_sctp(skb)) {
+			type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
+			break;
+		}
+		fallthrough;
+	default:
+		skb_checksum_help(skb);
+		goto csum_failed;
+	}
+
+	/* update TX checksum flag */
+	first->tx_flags |= IGC_TX_FLAGS_CSUM;
+	vlan_macip_lens = skb_checksum_start_offset(skb) -
+			  skb_network_offset(skb);
+no_csum:
+	vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
+
+	igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
+			vlan_macip_lens, type_tucmd, 0);
+}
+
+static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+	struct net_device *netdev = tx_ring->netdev;
+
+	netif_stop_subqueue(netdev, tx_ring->queue_index);
+
+	/* memory barriier comment */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (igc_desc_unused(tx_ring) < size)
+		return -EBUSY;
+
+	/* A reprieve! */
+	netif_wake_subqueue(netdev, tx_ring->queue_index);
+
+	u64_stats_update_begin(&tx_ring->tx_syncp2);
+	tx_ring->tx_stats.restart_queue2++;
+	u64_stats_update_end(&tx_ring->tx_syncp2);
+
+	return 0;
+}
+
+static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
+{
+	if (igc_desc_unused(tx_ring) >= size)
+		return 0;
+	return __igc_maybe_stop_tx(tx_ring, size);
+}
+
+#define IGC_SET_FLAG(_input, _flag, _result) \
+	(((_flag) <= (_result)) ?				\
+	 ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :	\
+	 ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
+
+static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
+		       IGC_ADVTXD_DCMD_DEXT |
+		       IGC_ADVTXD_DCMD_IFCS;
+
+	/* set HW vlan bit if vlan is present */
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
+				 IGC_ADVTXD_DCMD_VLE);
+
+	/* set segmentation bits for TSO */
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
+				 (IGC_ADVTXD_DCMD_TSE));
+
+	/* set timestamp bit if present, will select the register set
+	 * based on the _TSTAMP(_X) bit.
+	 */
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
+				 (IGC_ADVTXD_MAC_TSTAMP));
+
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_1,
+				 (IGC_ADVTXD_TSTAMP_REG_1));
+
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_2,
+				 (IGC_ADVTXD_TSTAMP_REG_2));
+
+	cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP_3,
+				 (IGC_ADVTXD_TSTAMP_REG_3));
+
+	/* insert frame checksum */
+	cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
+
+	return cmd_type;
+}
+
+static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
+				 union igc_adv_tx_desc *tx_desc,
+				 u32 tx_flags, unsigned int paylen)
+{
+	u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
+
+	/* insert L4 checksum */
+	olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
+			  ((IGC_TXD_POPTS_TXSM << 8) /
+			  IGC_TX_FLAGS_CSUM);
+
+	/* insert IPv4 checksum */
+	olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
+			  (((IGC_TXD_POPTS_IXSM << 8)) /
+			  IGC_TX_FLAGS_IPV4);
+
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static int igc_tx_map(struct igc_ring *tx_ring,
+		      struct igc_tx_buffer *first,
+		      const u8 hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct igc_tx_buffer *tx_buffer;
+	union igc_adv_tx_desc *tx_desc;
+	u32 tx_flags = first->tx_flags;
+	skb_frag_t *frag;
+	u16 i = tx_ring->next_to_use;
+	unsigned int data_len, size;
+	dma_addr_t dma;
+	u32 cmd_type;
+
+	cmd_type = igc_tx_cmd_type(skb, tx_flags);
+	tx_desc = IGC_TX_DESC(tx_ring, i);
+
+	igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += IGC_MAX_DATA_PER_TXD;
+			size -= IGC_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = IGC_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
+				       size, DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= size | IGC_TXD_DCMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* set the timestamp */
+	first->time_stamp = jiffies;
+
+	skb_tx_timestamp(skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	/* Make sure there is space in the ring for the next send. */
+	igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+		writel(i, tx_ring->tail);
+	}
+
+	return 0;
+dma_error:
+	netdev_err(tx_ring->netdev, "TX DMA map failed\n");
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	while (tx_buffer != first) {
+		if (dma_unmap_len(tx_buffer, len))
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+
+		if (i-- == 0)
+			i += tx_ring->count;
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	if (dma_unmap_len(tx_buffer, len))
+		igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+
+	dev_kfree_skb_any(tx_buffer->skb);
+	tx_buffer->skb = NULL;
+
+	tx_ring->next_to_use = i;
+
+	return -1;
+}
+
+static int igc_tso(struct igc_ring *tx_ring,
+		   struct igc_tx_buffer *first,
+		   __le32 launch_time, bool first_flag,
+		   u8 *hdr_len)
+{
+	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+	type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
+		/* IP header will have to cancel out any data that
+		 * is not a part of the outer IP header
+		 */
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
+		type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
+
+		ip.v4->tot_len = 0;
+		first->tx_flags |= IGC_TX_FLAGS_TSO |
+				   IGC_TX_FLAGS_CSUM |
+				   IGC_TX_FLAGS_IPV4;
+	} else {
+		ip.v6->payload_len = 0;
+		first->tx_flags |= IGC_TX_FLAGS_TSO |
+				   IGC_TX_FLAGS_CSUM;
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+	if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+	} else {
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+	}
+
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* MSS L4LEN IDX */
+	mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
+	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
+
+	/* VLAN MACLEN IPLEN */
+	vlan_macip_lens = l4.hdr - ip.hdr;
+	vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
+
+	igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
+			vlan_macip_lens, type_tucmd, mss_l4len_idx);
+
+	return 1;
+}
+
+static bool igc_request_tx_tstamp(struct igc_adapter *adapter, struct sk_buff *skb, u32 *flags)
+{
+	int i;
+
+	for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) {
+		struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i];
+
+		if (tstamp->skb)
+			continue;
+
+		tstamp->skb = skb_get(skb);
+		tstamp->start = jiffies;
+		*flags = tstamp->flags;
+
+		return true;
+	}
+
+	return false;
+}
+
+static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
+				       struct igc_ring *tx_ring)
+{
+	struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
+	bool first_flag = false, insert_empty = false;
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	__be16 protocol = vlan_get_protocol(skb);
+	struct igc_tx_buffer *first;
+	__le32 launch_time = 0;
+	u32 tx_flags = 0;
+	unsigned short f;
+	ktime_t txtime;
+	u8 hdr_len = 0;
+	int tso = 0;
+
+	/* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
+	 *	+ 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
+	 *	+ 2 desc gap to keep tail from touching head,
+	 *	+ 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_frag_size(
+						&skb_shinfo(skb)->frags[f]));
+
+	if (igc_maybe_stop_tx(tx_ring, count + 5)) {
+		/* this is a hard error */
+		return NETDEV_TX_BUSY;
+	}
+
+	if (!tx_ring->launchtime_enable)
+		goto done;
+
+	txtime = skb->tstamp;
+	skb->tstamp = ktime_set(0, 0);
+	launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty);
+
+	if (insert_empty) {
+		struct igc_tx_buffer *empty_info;
+		struct sk_buff *empty;
+		void *data;
+
+		empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+		empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
+		if (!empty)
+			goto done;
+
+		data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
+		memset(data, 0, IGC_EMPTY_FRAME_SIZE);
+
+		igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
+
+		if (igc_init_tx_empty_descriptor(tx_ring,
+						 empty,
+						 empty_info) < 0)
+			dev_kfree_skb_any(empty);
+	}
+
+done:
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->type = IGC_TX_BUFFER_TYPE_SKB;
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	if (adapter->qbv_transition || tx_ring->oper_gate_closed)
+		goto out_drop;
+
+	if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) {
+		adapter->stats.txdrop++;
+		goto out_drop;
+	}
+
+	if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
+		     skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		/* FIXME: add support for retrieving timestamps from
+		 * the other timer registers before skipping the
+		 * timestamping request.
+		 */
+		unsigned long flags;
+		u32 tstamp_flags;
+
+		spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+		if (igc_request_tx_tstamp(adapter, skb, &tstamp_flags)) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			tx_flags |= IGC_TX_FLAGS_TSTAMP | tstamp_flags;
+		} else {
+			adapter->tx_hwtstamp_skipped++;
+		}
+
+		spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+	}
+
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= IGC_TX_FLAGS_VLAN;
+		tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
+	}
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = protocol;
+
+	tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len);
+	if (tso < 0)
+		goto out_drop;
+	else if (!tso)
+		igc_tx_csum(tx_ring, first, launch_time, first_flag);
+
+	igc_tx_map(tx_ring, first, hdr_len);
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+
+	return NETDEV_TX_OK;
+}
+
+static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
+						    struct sk_buff *skb)
+{
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= adapter->num_tx_queues)
+		r_idx = r_idx % adapter->num_tx_queues;
+
+	return adapter->tx_ring[r_idx];
+}
+
+static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
+}
+
+static void igc_rx_checksum(struct igc_ring *ring,
+			    union igc_adv_rx_desc *rx_desc,
+			    struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* Ignore Checksum bit is set */
+	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
+		return;
+
+	/* Rx checksum disabled via ethtool */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* TCP/UDP checksum error bit is set */
+	if (igc_test_staterr(rx_desc,
+			     IGC_RXDEXT_STATERR_L4E |
+			     IGC_RXDEXT_STATERR_IPE)) {
+		/* work around errata with sctp packets where the TCPE aka
+		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+		 * packets (aka let the stack check the crc32c)
+		 */
+		if (!(skb->len == 60 &&
+		      test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
+			u64_stats_update_begin(&ring->rx_syncp);
+			ring->rx_stats.csum_err++;
+			u64_stats_update_end(&ring->rx_syncp);
+		}
+		/* let the stack verify checksum errors */
+		return;
+	}
+	/* It must be a TCP or UDP packet with a valid checksum */
+	if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
+				      IGC_RXD_STAT_UDPCS))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
+		   le32_to_cpu(rx_desc->wb.upper.status_error));
+}
+
+/* Mapping HW RSS Type to enum pkt_hash_types */
+static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
+	[IGC_RSS_TYPE_NO_HASH]		= PKT_HASH_TYPE_L2,
+	[IGC_RSS_TYPE_HASH_TCP_IPV4]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_IPV4]	= PKT_HASH_TYPE_L3,
+	[IGC_RSS_TYPE_HASH_TCP_IPV6]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_IPV6_EX]	= PKT_HASH_TYPE_L3,
+	[IGC_RSS_TYPE_HASH_IPV6]	= PKT_HASH_TYPE_L3,
+	[IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_UDP_IPV4]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_UDP_IPV6]	= PKT_HASH_TYPE_L4,
+	[IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
+	[10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
+	[11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask   */
+	[12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons       */
+	[13] = PKT_HASH_TYPE_NONE,
+	[14] = PKT_HASH_TYPE_NONE,
+	[15] = PKT_HASH_TYPE_NONE,
+};
+
+static inline void igc_rx_hash(struct igc_ring *ring,
+			       union igc_adv_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	if (ring->netdev->features & NETIF_F_RXHASH) {
+		u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+		u32 rss_type = igc_rss_type(rx_desc);
+
+		skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
+	}
+}
+
+static void igc_rx_vlan(struct igc_ring *rx_ring,
+			union igc_adv_rx_desc *rx_desc,
+			struct sk_buff *skb)
+{
+	struct net_device *dev = rx_ring->netdev;
+	u16 vid;
+
+	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
+		if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
+		    test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
+			vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
+		else
+			vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+}
+
+/**
+ * igc_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in order
+ * to populate the hash, checksum, VLAN, protocol, and other fields within the
+ * skb.
+ */
+static void igc_process_skb_fields(struct igc_ring *rx_ring,
+				   union igc_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	igc_rx_hash(rx_ring, rx_desc, skb);
+
+	igc_rx_checksum(rx_ring, rx_desc, skb);
+
+	igc_rx_vlan(rx_ring, rx_desc, skb);
+
+	skb_record_rx_queue(skb, rx_ring->queue_index);
+
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
+{
+	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_CTRL);
+
+	if (enable) {
+		/* enable VLAN tag insert/strip */
+		ctrl |= IGC_CTRL_VME;
+	} else {
+		/* disable VLAN tag insert/strip */
+		ctrl &= ~IGC_CTRL_VME;
+	}
+	wr32(IGC_CTRL, ctrl);
+}
+
+static void igc_restore_vlan(struct igc_adapter *adapter)
+{
+	igc_vlan_mode(adapter->netdev, adapter->netdev->features);
+}
+
+static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
+					       const unsigned int size,
+					       int *rx_buffer_pgcnt)
+{
+	struct igc_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	*rx_buffer_pgcnt =
+#if (PAGE_SIZE < 8192)
+		page_count(rx_buffer->page);
+#else
+		0;
+#endif
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
+			       unsigned int truesize)
+{
+#if (PAGE_SIZE < 8192)
+	buffer->page_offset ^= truesize;
+#else
+	buffer->page_offset += truesize;
+#endif
+}
+
+static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
+					      unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = igc_rx_pg_size(ring) / 2;
+#else
+	truesize = ring_uses_build_skb(ring) ?
+		   SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+		   SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
+/**
+ * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ */
+static void igc_add_rx_frag(struct igc_ring *rx_ring,
+			    struct igc_rx_buffer *rx_buffer,
+			    struct sk_buff *skb,
+			    unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = igc_rx_pg_size(rx_ring) / 2;
+#else
+	truesize = ring_uses_build_skb(rx_ring) ?
+		   SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+		   SKB_DATA_ALIGN(size);
+#endif
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+
+	igc_rx_buffer_flip(rx_buffer, truesize);
+}
+
+static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
+				     struct igc_rx_buffer *rx_buffer,
+				     struct xdp_buff *xdp)
+{
+	unsigned int size = xdp->data_end - xdp->data;
+	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data_meta);
+
+	/* build an skb around the page buffer */
+	skb = napi_build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, size);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	igc_rx_buffer_flip(rx_buffer, truesize);
+	return skb;
+}
+
+static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
+					 struct igc_rx_buffer *rx_buffer,
+					 struct xdp_buff *xdp,
+					 ktime_t timestamp)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	unsigned int size = xdp->data_end - xdp->data;
+	unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
+	void *va = xdp->data;
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data_meta);
+
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi,
+			     IGC_RX_HDR_LEN + metasize);
+	if (unlikely(!skb))
+		return NULL;
+
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IGC_RX_HDR_LEN)
+		headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
+	       ALIGN(headlen + metasize, sizeof(long)));
+
+	if (metasize) {
+		skb_metadata_set(skb, metasize);
+		__skb_pull(skb, metasize);
+	}
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(va + headlen) - page_address(rx_buffer->page),
+				size, truesize);
+		igc_rx_buffer_flip(rx_buffer, truesize);
+	} else {
+		rx_buffer->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+/**
+ * igc_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ */
+static void igc_reuse_rx_page(struct igc_ring *rx_ring,
+			      struct igc_rx_buffer *old_buff)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct igc_rx_buffer *new_buff;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls.
+	 */
+	new_buff->dma		= old_buff->dma;
+	new_buff->page		= old_buff->page;
+	new_buff->page_offset	= old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+}
+
+static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
+				  int rx_buffer_pgcnt)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* avoid re-using remote and pfmemalloc pages */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
+		return false;
+#else
+#define IGC_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
+
+	if (rx_buffer->page_offset > IGC_LAST_OFFSET)
+		return false;
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * igc_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ */
+static bool igc_is_non_eop(struct igc_ring *rx_ring,
+			   union igc_adv_rx_desc *rx_desc)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IGC_RX_DESC(rx_ring, ntc));
+
+	if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
+		return false;
+
+	return true;
+}
+
+/**
+ * igc_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ */
+static bool igc_cleanup_headers(struct igc_ring *rx_ring,
+				union igc_adv_rx_desc *rx_desc,
+				struct sk_buff *skb)
+{
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
+	if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
+		struct net_device *netdev = rx_ring->netdev;
+
+		if (!(netdev->features & NETIF_F_RXALL)) {
+			dev_kfree_skb_any(skb);
+			return true;
+		}
+	}
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+static void igc_put_rx_buffer(struct igc_ring *rx_ring,
+			      struct igc_rx_buffer *rx_buffer,
+			      int rx_buffer_pgcnt)
+{
+	if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
+		/* hand second half of page back to the ring */
+		igc_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		/* We are not reusing the buffer so unmap it and free
+		 * any references we are holding to it
+		 */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
+				     IGC_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
+static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
+{
+	struct igc_adapter *adapter = rx_ring->q_vector->adapter;
+
+	if (ring_uses_build_skb(rx_ring))
+		return IGC_SKB_PAD;
+	if (igc_xdp_is_enabled(adapter))
+		return XDP_PACKET_HEADROOM;
+
+	return 0;
+}
+
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+				  struct igc_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 igc_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 IGC_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_page(page);
+
+		rx_ring->rx_stats.alloc_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = igc_rx_offset(rx_ring);
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
+
+	return true;
+}
+
+/**
+ * igc_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @rx_ring: rx descriptor ring
+ * @cleaned_count: number of buffers to clean
+ */
+static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
+{
+	union igc_adv_rx_desc *rx_desc;
+	u16 i = rx_ring->next_to_use;
+	struct igc_rx_buffer *bi;
+	u16 bufsz;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = IGC_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	bufsz = igc_rx_bufsz(rx_ring);
+
+	do {
+		if (!igc_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset, bufsz,
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IGC_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		/* record the next descriptor to use */
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+}
+
+static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
+{
+	union igc_adv_rx_desc *desc;
+	u16 i = ring->next_to_use;
+	struct igc_rx_buffer *bi;
+	dma_addr_t dma;
+	bool ok = true;
+
+	if (!count)
+		return ok;
+
+	XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff);
+
+	desc = IGC_RX_DESC(ring, i);
+	bi = &ring->rx_buffer_info[i];
+	i -= ring->count;
+
+	do {
+		bi->xdp = xsk_buff_alloc(ring->xsk_pool);
+		if (!bi->xdp) {
+			ok = false;
+			break;
+		}
+
+		dma = xsk_buff_xdp_get_dma(bi->xdp);
+		desc->read.pkt_addr = cpu_to_le64(dma);
+
+		desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			desc = IGC_RX_DESC(ring, 0);
+			bi = ring->rx_buffer_info;
+			i -= ring->count;
+		}
+
+		/* Clear the length for the next_to_use descriptor. */
+		desc->wb.upper.length = 0;
+
+		count--;
+	} while (count);
+
+	i += ring->count;
+
+	if (ring->next_to_use != i) {
+		ring->next_to_use = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, ring->tail);
+	}
+
+	return ok;
+}
+
+/* This function requires __netif_tx_lock is held by the caller. */
+static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
+				      struct xdp_frame *xdpf)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+	u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
+	u16 count, index = ring->next_to_use;
+	struct igc_tx_buffer *head = &ring->tx_buffer_info[index];
+	struct igc_tx_buffer *buffer = head;
+	union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index);
+	u32 olinfo_status, len = xdpf->len, cmd_type;
+	void *data = xdpf->data;
+	u16 i;
+
+	count = TXD_USE_COUNT(len);
+	for (i = 0; i < nr_frags; i++)
+		count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i]));
+
+	if (igc_maybe_stop_tx(ring, count + 3)) {
+		/* this is a hard error */
+		return -EBUSY;
+	}
+
+	i = 0;
+	head->bytecount = xdp_get_frame_len(xdpf);
+	head->type = IGC_TX_BUFFER_TYPE_XDP;
+	head->gso_segs = 1;
+	head->xdpf = xdpf;
+
+	olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+	desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+
+	for (;;) {
+		dma_addr_t dma;
+
+		dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE);
+		if (dma_mapping_error(ring->dev, dma)) {
+			netdev_err_once(ring->netdev,
+					"Failed to map DMA for TX\n");
+			goto unmap;
+		}
+
+		dma_unmap_len_set(buffer, len, len);
+		dma_unmap_addr_set(buffer, dma, dma);
+
+		cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+			   IGC_ADVTXD_DCMD_IFCS | len;
+
+		desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+		desc->read.buffer_addr = cpu_to_le64(dma);
+
+		buffer->protocol = 0;
+
+		if (++index == ring->count)
+			index = 0;
+
+		if (i == nr_frags)
+			break;
+
+		buffer = &ring->tx_buffer_info[index];
+		desc = IGC_TX_DESC(ring, index);
+		desc->read.olinfo_status = 0;
+
+		data = skb_frag_address(&sinfo->frags[i]);
+		len = skb_frag_size(&sinfo->frags[i]);
+		i++;
+	}
+	desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD);
+
+	netdev_tx_sent_queue(txring_txq(ring), head->bytecount);
+	/* set the timestamp */
+	head->time_stamp = jiffies;
+	/* set next_to_watch value indicating a packet is present */
+	head->next_to_watch = desc;
+	ring->next_to_use = index;
+
+	return 0;
+
+unmap:
+	for (;;) {
+		buffer = &ring->tx_buffer_info[index];
+		if (dma_unmap_len(buffer, len))
+			dma_unmap_page(ring->dev,
+				       dma_unmap_addr(buffer, dma),
+				       dma_unmap_len(buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(buffer, len, 0);
+		if (buffer == head)
+			break;
+
+		if (!index)
+			index += ring->count;
+		index--;
+	}
+
+	return -ENOMEM;
+}
+
+static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
+					    int cpu)
+{
+	int index = cpu;
+
+	if (unlikely(index < 0))
+		index = 0;
+
+	while (index >= adapter->num_tx_queues)
+		index -= adapter->num_tx_queues;
+
+	return adapter->tx_ring[index];
+}
+
+static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
+{
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	struct igc_ring *ring;
+	int res;
+
+	if (unlikely(!xdpf))
+		return -EFAULT;
+
+	ring = igc_xdp_get_tx_ring(adapter, cpu);
+	nq = txring_txq(ring);
+
+	__netif_tx_lock(nq, cpu);
+	/* Avoid transmit queue timeout since we share it with the slow path */
+	txq_trans_cond_update(nq);
+	res = igc_xdp_init_tx_descriptor(ring, xdpf);
+	__netif_tx_unlock(nq);
+	return res;
+}
+
+/* This function assumes rcu_read_lock() is held by the caller. */
+static int __igc_xdp_run_prog(struct igc_adapter *adapter,
+			      struct bpf_prog *prog,
+			      struct xdp_buff *xdp)
+{
+	u32 act = bpf_prog_run_xdp(prog, xdp);
+
+	switch (act) {
+	case XDP_PASS:
+		return IGC_XDP_PASS;
+	case XDP_TX:
+		if (igc_xdp_xmit_back(adapter, xdp) < 0)
+			goto out_failure;
+		return IGC_XDP_TX;
+	case XDP_REDIRECT:
+		if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
+			goto out_failure;
+		return IGC_XDP_REDIRECT;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(adapter->netdev, prog, act);
+		fallthrough;
+	case XDP_DROP:
+		return IGC_XDP_CONSUMED;
+	}
+}
+
+static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
+					struct xdp_buff *xdp)
+{
+	struct bpf_prog *prog;
+	int res;
+
+	prog = READ_ONCE(adapter->xdp_prog);
+	if (!prog) {
+		res = IGC_XDP_PASS;
+		goto out;
+	}
+
+	res = __igc_xdp_run_prog(adapter, prog, xdp);
+
+out:
+	return ERR_PTR(-res);
+}
+
+/* This function assumes __netif_tx_lock is held by the caller. */
+static void igc_flush_tx_descriptors(struct igc_ring *ring)
+{
+	/* Once tail pointer is updated, hardware can fetch the descriptors
+	 * any time so we issue a write membar here to ensure all memory
+	 * writes are complete before the tail pointer is updated.
+	 */
+	wmb();
+	writel(ring->next_to_use, ring->tail);
+}
+
+static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
+{
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	struct igc_ring *ring;
+
+	if (status & IGC_XDP_TX) {
+		ring = igc_xdp_get_tx_ring(adapter, cpu);
+		nq = txring_txq(ring);
+
+		__netif_tx_lock(nq, cpu);
+		igc_flush_tx_descriptors(ring);
+		__netif_tx_unlock(nq);
+	}
+
+	if (status & IGC_XDP_REDIRECT)
+		xdp_do_flush();
+}
+
+static void igc_update_rx_stats(struct igc_q_vector *q_vector,
+				unsigned int packets, unsigned int bytes)
+{
+	struct igc_ring *ring = q_vector->rx.ring;
+
+	u64_stats_update_begin(&ring->rx_syncp);
+	ring->rx_stats.packets += packets;
+	ring->rx_stats.bytes += bytes;
+	u64_stats_update_end(&ring->rx_syncp);
+
+	q_vector->rx.total_packets += packets;
+	q_vector->rx.total_bytes += bytes;
+}
+
+static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
+{
+	unsigned int total_bytes = 0, total_packets = 0;
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_ring *rx_ring = q_vector->rx.ring;
+	struct sk_buff *skb = rx_ring->skb;
+	u16 cleaned_count = igc_desc_unused(rx_ring);
+	int xdp_status = 0, rx_buffer_pgcnt;
+
+	while (likely(total_packets < budget)) {
+		union igc_adv_rx_desc *rx_desc;
+		struct igc_rx_buffer *rx_buffer;
+		unsigned int size, truesize;
+		struct igc_xdp_buff ctx;
+		ktime_t timestamp = 0;
+		int pkt_offset = 0;
+		void *pktbuf;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
+			igc_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
+		truesize = igc_get_rx_frame_truesize(rx_ring, size);
+
+		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
+
+		if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
+			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
+							pktbuf);
+			ctx.rx_ts = timestamp;
+			pkt_offset = IGC_TS_HDR_LEN;
+			size -= IGC_TS_HDR_LEN;
+		}
+
+		if (!skb) {
+			xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq);
+			xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring),
+					 igc_rx_offset(rx_ring) + pkt_offset,
+					 size, true);
+			xdp_buff_clear_frags_flag(&ctx.xdp);
+			ctx.rx_desc = rx_desc;
+
+			skb = igc_xdp_run_prog(adapter, &ctx.xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			switch (xdp_res) {
+			case IGC_XDP_CONSUMED:
+				rx_buffer->pagecnt_bias++;
+				break;
+			case IGC_XDP_TX:
+			case IGC_XDP_REDIRECT:
+				igc_rx_buffer_flip(rx_buffer, truesize);
+				xdp_status |= xdp_res;
+				break;
+			}
+
+			total_packets++;
+			total_bytes += size;
+		} else if (skb)
+			igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		else if (ring_uses_build_skb(rx_ring))
+			skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
+		else
+			skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
+						timestamp);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_failed++;
+			rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
+		cleaned_count++;
+
+		/* fetch next buffer in frame if non-eop */
+		if (igc_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_bytes += skb->len;
+
+		/* populate checksum, VLAN, and protocol */
+		igc_process_skb_fields(rx_ring, rx_desc, skb);
+
+		napi_gro_receive(&q_vector->napi, skb);
+
+		/* reset skb pointer */
+		skb = NULL;
+
+		/* update budget accounting */
+		total_packets++;
+	}
+
+	if (xdp_status)
+		igc_finalize_xdp(adapter, xdp_status);
+
+	/* place incomplete frames back on ring for completion */
+	rx_ring->skb = skb;
+
+	igc_update_rx_stats(q_vector, total_packets, total_bytes);
+
+	if (cleaned_count)
+		igc_alloc_rx_buffers(rx_ring, cleaned_count);
+
+	return total_packets;
+}
+
+static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
+					    struct xdp_buff *xdp)
+{
+	unsigned int totalsize = xdp->data_end - xdp->data_meta;
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+
+	net_prefetch(xdp->data_meta);
+
+	skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+	       ALIGN(totalsize, sizeof(long)));
+
+	if (metasize) {
+		skb_metadata_set(skb, metasize);
+		__skb_pull(skb, metasize);
+	}
+
+	return skb;
+}
+
+static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
+				union igc_adv_rx_desc *desc,
+				struct xdp_buff *xdp,
+				ktime_t timestamp)
+{
+	struct igc_ring *ring = q_vector->rx.ring;
+	struct sk_buff *skb;
+
+	skb = igc_construct_skb_zc(ring, xdp);
+	if (!skb) {
+		ring->rx_stats.alloc_failed++;
+		return;
+	}
+
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+	if (igc_cleanup_headers(ring, desc, skb))
+		return;
+
+	igc_process_skb_fields(ring, desc, skb);
+	napi_gro_receive(&q_vector->napi, skb);
+}
+
+static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp)
+{
+	/* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The
+	 * igc_xdp_buff shares its layout with xdp_buff_xsk and private
+	 * igc_xdp_buff fields fall into xdp_buff_xsk->cb
+	 */
+       return (struct igc_xdp_buff *)xdp;
+}
+
+static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_ring *ring = q_vector->rx.ring;
+	u16 cleaned_count = igc_desc_unused(ring);
+	int total_bytes = 0, total_packets = 0;
+	u16 ntc = ring->next_to_clean;
+	struct bpf_prog *prog;
+	bool failure = false;
+	int xdp_status = 0;
+
+	rcu_read_lock();
+
+	prog = READ_ONCE(adapter->xdp_prog);
+
+	while (likely(total_packets < budget)) {
+		union igc_adv_rx_desc *desc;
+		struct igc_rx_buffer *bi;
+		struct igc_xdp_buff *ctx;
+		ktime_t timestamp = 0;
+		unsigned int size;
+		int res;
+
+		desc = IGC_RX_DESC(ring, ntc);
+		size = le16_to_cpu(desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		bi = &ring->rx_buffer_info[ntc];
+
+		ctx = xsk_buff_to_igc_ctx(bi->xdp);
+		ctx->rx_desc = desc;
+
+		if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
+			timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
+							bi->xdp->data);
+			ctx->rx_ts = timestamp;
+
+			bi->xdp->data += IGC_TS_HDR_LEN;
+
+			/* HW timestamp has been copied into local variable. Metadata
+			 * length when XDP program is called should be 0.
+			 */
+			bi->xdp->data_meta += IGC_TS_HDR_LEN;
+			size -= IGC_TS_HDR_LEN;
+		}
+
+		bi->xdp->data_end = bi->xdp->data + size;
+		xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
+
+		res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
+		switch (res) {
+		case IGC_XDP_PASS:
+			igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
+			fallthrough;
+		case IGC_XDP_CONSUMED:
+			xsk_buff_free(bi->xdp);
+			break;
+		case IGC_XDP_TX:
+		case IGC_XDP_REDIRECT:
+			xdp_status |= res;
+			break;
+		}
+
+		bi->xdp = NULL;
+		total_bytes += size;
+		total_packets++;
+		cleaned_count++;
+		ntc++;
+		if (ntc == ring->count)
+			ntc = 0;
+	}
+
+	ring->next_to_clean = ntc;
+	rcu_read_unlock();
+
+	if (cleaned_count >= IGC_RX_BUFFER_WRITE)
+		failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
+
+	if (xdp_status)
+		igc_finalize_xdp(adapter, xdp_status);
+
+	igc_update_rx_stats(q_vector, total_packets, total_bytes);
+
+	if (xsk_uses_need_wakeup(ring->xsk_pool)) {
+		if (failure || ring->next_to_clean == ring->next_to_use)
+			xsk_set_rx_need_wakeup(ring->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(ring->xsk_pool);
+		return total_packets;
+	}
+
+	return failure ? budget : total_packets;
+}
+
+static void igc_update_tx_stats(struct igc_q_vector *q_vector,
+				unsigned int packets, unsigned int bytes)
+{
+	struct igc_ring *ring = q_vector->tx.ring;
+
+	u64_stats_update_begin(&ring->tx_syncp);
+	ring->tx_stats.bytes += bytes;
+	ring->tx_stats.packets += packets;
+	u64_stats_update_end(&ring->tx_syncp);
+
+	q_vector->tx.total_bytes += bytes;
+	q_vector->tx.total_packets += packets;
+}
+
+static void igc_xdp_xmit_zc(struct igc_ring *ring)
+{
+	struct xsk_buff_pool *pool = ring->xsk_pool;
+	struct netdev_queue *nq = txring_txq(ring);
+	union igc_adv_tx_desc *tx_desc = NULL;
+	int cpu = smp_processor_id();
+	struct xdp_desc xdp_desc;
+	u16 budget, ntu;
+
+	if (!netif_carrier_ok(ring->netdev))
+		return;
+
+	__netif_tx_lock(nq, cpu);
+
+	/* Avoid transmit queue timeout since we share it with the slow path */
+	txq_trans_cond_update(nq);
+
+	ntu = ring->next_to_use;
+	budget = igc_desc_unused(ring);
+
+	while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+		u32 cmd_type, olinfo_status;
+		struct igc_tx_buffer *bi;
+		dma_addr_t dma;
+
+		cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+			   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+			   xdp_desc.len;
+		olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
+
+		dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
+		xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
+
+		tx_desc = IGC_TX_DESC(ring, ntu);
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		bi = &ring->tx_buffer_info[ntu];
+		bi->type = IGC_TX_BUFFER_TYPE_XSK;
+		bi->protocol = 0;
+		bi->bytecount = xdp_desc.len;
+		bi->gso_segs = 1;
+		bi->time_stamp = jiffies;
+		bi->next_to_watch = tx_desc;
+
+		netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
+
+		ntu++;
+		if (ntu == ring->count)
+			ntu = 0;
+	}
+
+	ring->next_to_use = ntu;
+	if (tx_desc) {
+		igc_flush_tx_descriptors(ring);
+		xsk_tx_release(pool);
+	}
+
+	__netif_tx_unlock(nq);
+}
+
+/**
+ * igc_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: pointer to q_vector containing needed info
+ * @napi_budget: Used to determine if we are in netpoll
+ *
+ * returns true if ring is completely cleaned
+ */
+static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	struct igc_ring *tx_ring = q_vector->tx.ring;
+	unsigned int i = tx_ring->next_to_clean;
+	struct igc_tx_buffer *tx_buffer;
+	union igc_adv_tx_desc *tx_desc;
+	u32 xsk_frames = 0;
+
+	if (test_bit(__IGC_DOWN, &adapter->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IGC_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+
+		switch (tx_buffer->type) {
+		case IGC_TX_BUFFER_TYPE_XSK:
+			xsk_frames++;
+			break;
+		case IGC_TX_BUFFER_TYPE_XDP:
+			xdp_return_frame(tx_buffer->xdpf);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+			break;
+		case IGC_TX_BUFFER_TYPE_SKB:
+			napi_consume_skb(tx_buffer->skb, napi_budget);
+			igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+			break;
+		default:
+			netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
+			break;
+		}
+
+		/* clear last DMA location and unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IGC_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = IGC_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	netdev_tx_completed_queue(txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+
+	igc_update_tx_stats(q_vector, total_packets, total_bytes);
+
+	if (tx_ring->xsk_pool) {
+		if (xsk_frames)
+			xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
+		if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
+			xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
+		igc_xdp_xmit_zc(tx_ring);
+	}
+
+	if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
+		struct igc_hw *hw = &adapter->hw;
+
+		/* Detect a transmit hang in hardware, this serializes the
+		 * check with the clearing of time_stamp and movement of i
+		 */
+		clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+		if (tx_buffer->next_to_watch &&
+		    time_after(jiffies, tx_buffer->time_stamp +
+		    (adapter->tx_timeout_factor * HZ)) &&
+		    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) &&
+		    (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) &&
+		    !tx_ring->oper_gate_closed) {
+			/* detected Tx unit hang */
+			netdev_err(tx_ring->netdev,
+				   "Detected Tx Unit Hang\n"
+				   "  Tx Queue             <%d>\n"
+				   "  TDH                  <%x>\n"
+				   "  TDT                  <%x>\n"
+				   "  next_to_use          <%x>\n"
+				   "  next_to_clean        <%x>\n"
+				   "buffer_info[next_to_clean]\n"
+				   "  time_stamp           <%lx>\n"
+				   "  next_to_watch        <%p>\n"
+				   "  jiffies              <%lx>\n"
+				   "  desc.status          <%x>\n",
+				   tx_ring->queue_index,
+				   rd32(IGC_TDH(tx_ring->reg_idx)),
+				   readl(tx_ring->tail),
+				   tx_ring->next_to_use,
+				   tx_ring->next_to_clean,
+				   tx_buffer->time_stamp,
+				   tx_buffer->next_to_watch,
+				   jiffies,
+				   tx_buffer->next_to_watch->wb.status);
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			/* we are about to reset, no point in enabling stuff */
+			return true;
+		}
+	}
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	if (unlikely(total_packets &&
+		     netif_carrier_ok(tx_ring->netdev) &&
+		     igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    !(test_bit(__IGC_DOWN, &adapter->state))) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+			u64_stats_update_begin(&tx_ring->tx_syncp);
+			tx_ring->tx_stats.restart_queue++;
+			u64_stats_update_end(&tx_ring->tx_syncp);
+		}
+	}
+
+	return !!budget;
+}
+
+static int igc_find_mac_filter(struct igc_adapter *adapter,
+			       enum igc_mac_filter_type type, const u8 *addr)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int max_entries = hw->mac.rar_entry_count;
+	u32 ral, rah;
+	int i;
+
+	for (i = 0; i < max_entries; i++) {
+		ral = rd32(IGC_RAL(i));
+		rah = rd32(IGC_RAH(i));
+
+		if (!(rah & IGC_RAH_AV))
+			continue;
+		if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
+			continue;
+		if ((rah & IGC_RAH_RAH_MASK) !=
+		    le16_to_cpup((__le16 *)(addr + 4)))
+			continue;
+		if (ral != le32_to_cpup((__le32 *)(addr)))
+			continue;
+
+		return i;
+	}
+
+	return -1;
+}
+
+static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int max_entries = hw->mac.rar_entry_count;
+	u32 rah;
+	int i;
+
+	for (i = 0; i < max_entries; i++) {
+		rah = rd32(IGC_RAH(i));
+
+		if (!(rah & IGC_RAH_AV))
+			return i;
+	}
+
+	return -1;
+}
+
+/**
+ * igc_add_mac_filter() - Add MAC address filter
+ * @adapter: Pointer to adapter where the filter should be added
+ * @type: MAC address filter type (source or destination)
+ * @addr: MAC address
+ * @queue: If non-negative, queue assignment feature is enabled and frames
+ *         matching the filter are enqueued onto 'queue'. Otherwise, queue
+ *         assignment is disabled.
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+static int igc_add_mac_filter(struct igc_adapter *adapter,
+			      enum igc_mac_filter_type type, const u8 *addr,
+			      int queue)
+{
+	struct net_device *dev = adapter->netdev;
+	int index;
+
+	index = igc_find_mac_filter(adapter, type, addr);
+	if (index >= 0)
+		goto update_filter;
+
+	index = igc_get_avail_mac_filter_slot(adapter);
+	if (index < 0)
+		return -ENOSPC;
+
+	netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
+		   index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
+		   addr, queue);
+
+update_filter:
+	igc_set_mac_filter_hw(adapter, index, type, addr, queue);
+	return 0;
+}
+
+/**
+ * igc_del_mac_filter() - Delete MAC address filter
+ * @adapter: Pointer to adapter where the filter should be deleted from
+ * @type: MAC address filter type (source or destination)
+ * @addr: MAC address
+ */
+static void igc_del_mac_filter(struct igc_adapter *adapter,
+			       enum igc_mac_filter_type type, const u8 *addr)
+{
+	struct net_device *dev = adapter->netdev;
+	int index;
+
+	index = igc_find_mac_filter(adapter, type, addr);
+	if (index < 0)
+		return;
+
+	if (index == 0) {
+		/* If this is the default filter, we don't actually delete it.
+		 * We just reset to its default value i.e. disable queue
+		 * assignment.
+		 */
+		netdev_dbg(dev, "Disable default MAC filter queue assignment");
+
+		igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
+	} else {
+		netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
+			   index,
+			   type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
+			   addr);
+
+		igc_clear_mac_filter_hw(adapter, index);
+	}
+}
+
+/**
+ * igc_add_vlan_prio_filter() - Add VLAN priority filter
+ * @adapter: Pointer to adapter where the filter should be added
+ * @prio: VLAN priority value
+ * @queue: Queue number which matching frames are assigned to
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
+				    int queue)
+{
+	struct net_device *dev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	u32 vlanpqf;
+
+	vlanpqf = rd32(IGC_VLANPQF);
+
+	if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
+		netdev_dbg(dev, "VLAN priority filter already in use\n");
+		return -EEXIST;
+	}
+
+	vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
+	vlanpqf |= IGC_VLANPQF_VALID(prio);
+
+	wr32(IGC_VLANPQF, vlanpqf);
+
+	netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
+		   prio, queue);
+	return 0;
+}
+
+/**
+ * igc_del_vlan_prio_filter() - Delete VLAN priority filter
+ * @adapter: Pointer to adapter where the filter should be deleted from
+ * @prio: VLAN priority value
+ */
+static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 vlanpqf;
+
+	vlanpqf = rd32(IGC_VLANPQF);
+
+	vlanpqf &= ~IGC_VLANPQF_VALID(prio);
+	vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
+
+	wr32(IGC_VLANPQF, vlanpqf);
+
+	netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
+		   prio);
+}
+
+static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
+		u32 etqf = rd32(IGC_ETQF(i));
+
+		if (!(etqf & IGC_ETQF_FILTER_ENABLE))
+			return i;
+	}
+
+	return -1;
+}
+
+/**
+ * igc_add_etype_filter() - Add ethertype filter
+ * @adapter: Pointer to adapter where the filter should be added
+ * @etype: Ethertype value
+ * @queue: If non-negative, queue assignment feature is enabled and frames
+ *         matching the filter are enqueued onto 'queue'. Otherwise, queue
+ *         assignment is disabled.
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
+				int queue)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int index;
+	u32 etqf;
+
+	index = igc_get_avail_etype_filter_slot(adapter);
+	if (index < 0)
+		return -ENOSPC;
+
+	etqf = rd32(IGC_ETQF(index));
+
+	etqf &= ~IGC_ETQF_ETYPE_MASK;
+	etqf |= etype;
+
+	if (queue >= 0) {
+		etqf &= ~IGC_ETQF_QUEUE_MASK;
+		etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
+		etqf |= IGC_ETQF_QUEUE_ENABLE;
+	}
+
+	etqf |= IGC_ETQF_FILTER_ENABLE;
+
+	wr32(IGC_ETQF(index), etqf);
+
+	netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
+		   etype, queue);
+	return 0;
+}
+
+static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
+		u32 etqf = rd32(IGC_ETQF(i));
+
+		if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
+			return i;
+	}
+
+	return -1;
+}
+
+/**
+ * igc_del_etype_filter() - Delete ethertype filter
+ * @adapter: Pointer to adapter where the filter should be deleted from
+ * @etype: Ethertype value
+ */
+static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int index;
+
+	index = igc_find_etype_filter(adapter, etype);
+	if (index < 0)
+		return;
+
+	wr32(IGC_ETQF(index), 0);
+
+	netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
+		   etype);
+}
+
+static int igc_flex_filter_select(struct igc_adapter *adapter,
+				  struct igc_flex_filter *input,
+				  u32 *fhft)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u8 fhft_index;
+	u32 fhftsl;
+
+	if (input->index >= MAX_FLEX_FILTER) {
+		dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
+		return -EINVAL;
+	}
+
+	/* Indirect table select register */
+	fhftsl = rd32(IGC_FHFTSL);
+	fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
+	switch (input->index) {
+	case 0 ... 7:
+		fhftsl |= 0x00;
+		break;
+	case 8 ... 15:
+		fhftsl |= 0x01;
+		break;
+	case 16 ... 23:
+		fhftsl |= 0x02;
+		break;
+	case 24 ... 31:
+		fhftsl |= 0x03;
+		break;
+	}
+	wr32(IGC_FHFTSL, fhftsl);
+
+	/* Normalize index down to host table register */
+	fhft_index = input->index % 8;
+
+	*fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
+		IGC_FHFT_EXT(fhft_index - 4);
+
+	return 0;
+}
+
+static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
+				    struct igc_flex_filter *input)
+{
+	struct device *dev = &adapter->pdev->dev;
+	struct igc_hw *hw = &adapter->hw;
+	u8 *data = input->data;
+	u8 *mask = input->mask;
+	u32 queuing;
+	u32 fhft;
+	u32 wufc;
+	int ret;
+	int i;
+
+	/* Length has to be aligned to 8. Otherwise the filter will fail. Bail
+	 * out early to avoid surprises later.
+	 */
+	if (input->length % 8 != 0) {
+		dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
+		return -EINVAL;
+	}
+
+	/* Select corresponding flex filter register and get base for host table. */
+	ret = igc_flex_filter_select(adapter, input, &fhft);
+	if (ret)
+		return ret;
+
+	/* When adding a filter globally disable flex filter feature. That is
+	 * recommended within the datasheet.
+	 */
+	wufc = rd32(IGC_WUFC);
+	wufc &= ~IGC_WUFC_FLEX_HQ;
+	wr32(IGC_WUFC, wufc);
+
+	/* Configure filter */
+	queuing = input->length & IGC_FHFT_LENGTH_MASK;
+	queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
+	queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
+
+	if (input->immediate_irq)
+		queuing |= IGC_FHFT_IMM_INT;
+
+	if (input->drop)
+		queuing |= IGC_FHFT_DROP;
+
+	wr32(fhft + 0xFC, queuing);
+
+	/* Write data (128 byte) and mask (128 bit) */
+	for (i = 0; i < 16; ++i) {
+		const size_t data_idx = i * 8;
+		const size_t row_idx = i * 16;
+		u32 dw0 =
+			(data[data_idx + 0] << 0) |
+			(data[data_idx + 1] << 8) |
+			(data[data_idx + 2] << 16) |
+			(data[data_idx + 3] << 24);
+		u32 dw1 =
+			(data[data_idx + 4] << 0) |
+			(data[data_idx + 5] << 8) |
+			(data[data_idx + 6] << 16) |
+			(data[data_idx + 7] << 24);
+		u32 tmp;
+
+		/* Write row: dw0, dw1 and mask */
+		wr32(fhft + row_idx, dw0);
+		wr32(fhft + row_idx + 4, dw1);
+
+		/* mask is only valid for MASK(7, 0) */
+		tmp = rd32(fhft + row_idx + 8);
+		tmp &= ~GENMASK(7, 0);
+		tmp |= mask[i];
+		wr32(fhft + row_idx + 8, tmp);
+	}
+
+	/* Enable filter. */
+	wufc |= IGC_WUFC_FLEX_HQ;
+	if (input->index > 8) {
+		/* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
+		u32 wufc_ext = rd32(IGC_WUFC_EXT);
+
+		wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
+
+		wr32(IGC_WUFC_EXT, wufc_ext);
+	} else {
+		wufc |= (IGC_WUFC_FLX0 << input->index);
+	}
+	wr32(IGC_WUFC, wufc);
+
+	dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
+		input->index);
+
+	return 0;
+}
+
+static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
+				      const void *src, unsigned int offset,
+				      size_t len, const void *mask)
+{
+	int i;
+
+	/* data */
+	memcpy(&flex->data[offset], src, len);
+
+	/* mask */
+	for (i = 0; i < len; ++i) {
+		const unsigned int idx = i + offset;
+		const u8 *ptr = mask;
+
+		if (mask) {
+			if (ptr[i] & 0xff)
+				flex->mask[idx / 8] |= BIT(idx % 8);
+
+			continue;
+		}
+
+		flex->mask[idx / 8] |= BIT(idx % 8);
+	}
+}
+
+static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 wufc, wufc_ext;
+	int i;
+
+	wufc = rd32(IGC_WUFC);
+	wufc_ext = rd32(IGC_WUFC_EXT);
+
+	for (i = 0; i < MAX_FLEX_FILTER; i++) {
+		if (i < 8) {
+			if (!(wufc & (IGC_WUFC_FLX0 << i)))
+				return i;
+		} else {
+			if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
+				return i;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 wufc, wufc_ext;
+
+	wufc = rd32(IGC_WUFC);
+	wufc_ext = rd32(IGC_WUFC_EXT);
+
+	if (wufc & IGC_WUFC_FILTER_MASK)
+		return true;
+
+	if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
+		return true;
+
+	return false;
+}
+
+static int igc_add_flex_filter(struct igc_adapter *adapter,
+			       struct igc_nfc_rule *rule)
+{
+	struct igc_flex_filter flex = { };
+	struct igc_nfc_filter *filter = &rule->filter;
+	unsigned int eth_offset, user_offset;
+	int ret, index;
+	bool vlan;
+
+	index = igc_find_avail_flex_filter_slot(adapter);
+	if (index < 0)
+		return -ENOSPC;
+
+	/* Construct the flex filter:
+	 *  -> dest_mac [6]
+	 *  -> src_mac [6]
+	 *  -> tpid [2]
+	 *  -> vlan tci [2]
+	 *  -> ether type [2]
+	 *  -> user data [8]
+	 *  -> = 26 bytes => 32 length
+	 */
+	flex.index    = index;
+	flex.length   = 32;
+	flex.rx_queue = rule->action;
+
+	vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
+	eth_offset = vlan ? 16 : 12;
+	user_offset = vlan ? 18 : 14;
+
+	/* Add destination MAC  */
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
+		igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
+					  ETH_ALEN, NULL);
+
+	/* Add source MAC */
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+		igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
+					  ETH_ALEN, NULL);
+
+	/* Add VLAN etype */
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
+		igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
+					  sizeof(filter->vlan_etype),
+					  NULL);
+
+	/* Add VLAN TCI */
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
+		igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
+					  sizeof(filter->vlan_tci), NULL);
+
+	/* Add Ether type */
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		__be16 etype = cpu_to_be16(filter->etype);
+
+		igc_flex_filter_add_field(&flex, &etype, eth_offset,
+					  sizeof(etype), NULL);
+	}
+
+	/* Add user data */
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
+		igc_flex_filter_add_field(&flex, &filter->user_data,
+					  user_offset,
+					  sizeof(filter->user_data),
+					  filter->user_mask);
+
+	/* Add it down to the hardware and enable it. */
+	ret = igc_write_flex_filter_ll(adapter, &flex);
+	if (ret)
+		return ret;
+
+	filter->flex_index = index;
+
+	return 0;
+}
+
+static void igc_del_flex_filter(struct igc_adapter *adapter,
+				u16 reg_index)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 wufc;
+
+	/* Just disable the filter. The filter table itself is kept
+	 * intact. Another flex_filter_add() should override the "old" data
+	 * then.
+	 */
+	if (reg_index > 8) {
+		u32 wufc_ext = rd32(IGC_WUFC_EXT);
+
+		wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
+		wr32(IGC_WUFC_EXT, wufc_ext);
+	} else {
+		wufc = rd32(IGC_WUFC);
+
+		wufc &= ~(IGC_WUFC_FLX0 << reg_index);
+		wr32(IGC_WUFC, wufc);
+	}
+
+	if (igc_flex_filter_in_use(adapter))
+		return;
+
+	/* No filters are in use, we may disable flex filters */
+	wufc = rd32(IGC_WUFC);
+	wufc &= ~IGC_WUFC_FLEX_HQ;
+	wr32(IGC_WUFC, wufc);
+}
+
+static int igc_enable_nfc_rule(struct igc_adapter *adapter,
+			       struct igc_nfc_rule *rule)
+{
+	int err;
+
+	if (rule->flex) {
+		return igc_add_flex_filter(adapter, rule);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		err = igc_add_etype_filter(adapter, rule->filter.etype,
+					   rule->action);
+		if (err)
+			return err;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
+					 rule->filter.src_addr, rule->action);
+		if (err)
+			return err;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
+					 rule->filter.dst_addr, rule->action);
+		if (err)
+			return err;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
+			   VLAN_PRIO_SHIFT;
+
+		err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static void igc_disable_nfc_rule(struct igc_adapter *adapter,
+				 const struct igc_nfc_rule *rule)
+{
+	if (rule->flex) {
+		igc_del_flex_filter(adapter, rule->filter.flex_index);
+		return;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
+		igc_del_etype_filter(adapter, rule->filter.etype);
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
+			   VLAN_PRIO_SHIFT;
+
+		igc_del_vlan_prio_filter(adapter, prio);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
+				   rule->filter.src_addr);
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
+		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
+				   rule->filter.dst_addr);
+}
+
+/**
+ * igc_get_nfc_rule() - Get NFC rule
+ * @adapter: Pointer to adapter
+ * @location: Rule location
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ *
+ * Return: Pointer to NFC rule at @location. If not found, NULL.
+ */
+struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
+				      u32 location)
+{
+	struct igc_nfc_rule *rule;
+
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
+		if (rule->location == location)
+			return rule;
+		if (rule->location > location)
+			break;
+	}
+
+	return NULL;
+}
+
+/**
+ * igc_del_nfc_rule() - Delete NFC rule
+ * @adapter: Pointer to adapter
+ * @rule: Pointer to rule to be deleted
+ *
+ * Disable NFC rule in hardware and delete it from adapter.
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ */
+void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
+{
+	igc_disable_nfc_rule(adapter, rule);
+
+	list_del(&rule->list);
+	adapter->nfc_rule_count--;
+
+	kfree(rule);
+}
+
+static void igc_flush_nfc_rules(struct igc_adapter *adapter)
+{
+	struct igc_nfc_rule *rule, *tmp;
+
+	mutex_lock(&adapter->nfc_rule_lock);
+
+	list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
+		igc_del_nfc_rule(adapter, rule);
+
+	mutex_unlock(&adapter->nfc_rule_lock);
+}
+
+/**
+ * igc_add_nfc_rule() - Add NFC rule
+ * @adapter: Pointer to adapter
+ * @rule: Pointer to rule to be added
+ *
+ * Enable NFC rule in hardware and add it to adapter.
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
+{
+	struct igc_nfc_rule *pred, *cur;
+	int err;
+
+	err = igc_enable_nfc_rule(adapter, rule);
+	if (err)
+		return err;
+
+	pred = NULL;
+	list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
+		if (cur->location >= rule->location)
+			break;
+		pred = cur;
+	}
+
+	list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
+	adapter->nfc_rule_count++;
+	return 0;
+}
+
+static void igc_restore_nfc_rules(struct igc_adapter *adapter)
+{
+	struct igc_nfc_rule *rule;
+
+	mutex_lock(&adapter->nfc_rule_lock);
+
+	list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
+		igc_enable_nfc_rule(adapter, rule);
+
+	mutex_unlock(&adapter->nfc_rule_lock);
+}
+
+static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
+}
+
+static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
+	return 0;
+}
+
+/**
+ * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ */
+static void igc_set_rx_mode(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	if (netdev->flags & IFF_PROMISC) {
+		rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI) {
+			rctl |= IGC_RCTL_MPE;
+		} else {
+			/* Write addresses to the MTA, if the attempt fails
+			 * then we should just turn on promiscuous mode so
+			 * that we can at least receive multicast traffic
+			 */
+			count = igc_write_mc_addr_list(netdev);
+			if (count < 0)
+				rctl |= IGC_RCTL_MPE;
+		}
+	}
+
+	/* Write addresses to available RAR registers, if there is not
+	 * sufficient space to store all the addresses then enable
+	 * unicast promiscuous mode
+	 */
+	if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
+		rctl |= IGC_RCTL_UPE;
+
+	/* update state of unicast and multicast */
+	rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
+	wr32(IGC_RCTL, rctl);
+
+#if (PAGE_SIZE < 8192)
+	if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
+		rlpml = IGC_MAX_FRAME_BUILD_SKB;
+#endif
+	wr32(IGC_RLPML, rlpml);
+}
+
+/**
+ * igc_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ */
+static void igc_configure(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int i = 0;
+
+	igc_get_hw_control(adapter);
+	igc_set_rx_mode(netdev);
+
+	igc_restore_vlan(adapter);
+
+	igc_setup_tctl(adapter);
+	igc_setup_mrqc(adapter);
+	igc_setup_rctl(adapter);
+
+	igc_set_default_mac_filter(adapter);
+	igc_restore_nfc_rules(adapter);
+
+	igc_configure_tx(adapter);
+	igc_configure_rx(adapter);
+
+	igc_rx_fifo_flush_base(&adapter->hw);
+
+	/* call igc_desc_unused which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igc_ring *ring = adapter->rx_ring[i];
+
+		if (ring->xsk_pool)
+			igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
+		else
+			igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+	}
+}
+
+/**
+ * igc_write_ivar - configure ivar for given MSI-X vector
+ * @hw: pointer to the HW structure
+ * @msix_vector: vector number we are allocating to a given ring
+ * @index: row index of IVAR register to write within IVAR table
+ * @offset: column offset of in IVAR, should be multiple of 8
+ *
+ * The IVAR table consists of 2 columns,
+ * each containing an cause allocation for an Rx and Tx ring, and a
+ * variable number of rows depending on the number of queues supported.
+ */
+static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
+			   int index, int offset)
+{
+	u32 ivar = array_rd32(IGC_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((u32)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
+
+	array_wr32(IGC_IVAR0, index, ivar);
+}
+
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+	int rx_queue = IGC_N0_QUEUE;
+	int tx_queue = IGC_N0_QUEUE;
+
+	if (q_vector->rx.ring)
+		rx_queue = q_vector->rx.ring->reg_idx;
+	if (q_vector->tx.ring)
+		tx_queue = q_vector->tx.ring->reg_idx;
+
+	switch (hw->mac.type) {
+	case igc_i225:
+		if (rx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       rx_queue >> 1,
+				       (rx_queue & 0x1) << 4);
+		if (tx_queue > IGC_N0_QUEUE)
+			igc_write_ivar(hw, msix_vector,
+				       tx_queue >> 1,
+				       ((tx_queue & 0x1) << 4) + 8);
+		q_vector->eims_value = BIT(msix_vector);
+		break;
+	default:
+		WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
+		break;
+	}
+
+	/* add q_vector eims value to global eims_enable_mask */
+	adapter->eims_enable_mask |= q_vector->eims_value;
+
+	/* configure q_vector to set itr on first interrupt */
+	q_vector->set_itr = 1;
+}
+
+/**
+ * igc_configure_msix - Configure MSI-X hardware
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ */
+static void igc_configure_msix(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i, vector = 0;
+	u32 tmp;
+
+	adapter->eims_enable_mask = 0;
+
+	/* set vector for other causes, i.e. link changes */
+	switch (hw->mac.type) {
+	case igc_i225:
+		/* Turn on MSI-X capability first, or our settings
+		 * won't stick.  And it will take days to debug.
+		 */
+		wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
+		     IGC_GPIE_PBA | IGC_GPIE_EIAME |
+		     IGC_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		adapter->eims_other = BIT(vector);
+		tmp = (vector++ | IGC_IVAR_VALID) << 8;
+
+		wr32(IGC_IVAR_MISC, tmp);
+		break;
+	default:
+		/* do nothing, since nothing else supports MSI-X */
+		break;
+	} /* switch (hw->mac.type) */
+
+	adapter->eims_enable_mask |= adapter->eims_other;
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		igc_assign_vector(adapter->q_vector[i], vector++);
+
+	wrfl();
+}
+
+/**
+ * igc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ */
+static void igc_irq_enable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
+		u32 regval = rd32(IGC_EIAC);
+
+		wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAM);
+		wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
+		wr32(IGC_EIMS, adapter->eims_enable_mask);
+		wr32(IGC_IMS, ims);
+	} else {
+		wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+		wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+	}
+}
+
+/**
+ * igc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ */
+static void igc_irq_disable(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (adapter->msix_entries) {
+		u32 regval = rd32(IGC_EIAM);
+
+		wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
+		wr32(IGC_EIMC, adapter->eims_enable_mask);
+		regval = rd32(IGC_EIAC);
+		wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
+	}
+
+	wr32(IGC_IAM, 0);
+	wr32(IGC_IMC, ~0);
+	wrfl();
+
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		synchronize_irq(adapter->msix_entries[vector++].vector);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			synchronize_irq(adapter->msix_entries[vector++].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+			      const u32 max_rss_queues)
+{
+	/* Determine if we need to pair queues. */
+	/* If rss_queues > half of max_rss_queues, pair the queues in
+	 * order to conserve interrupts due to limited supply.
+	 */
+	if (adapter->rss_queues > (max_rss_queues / 2))
+		adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	else
+		adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
+}
+
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
+{
+	return IGC_MAX_RX_QUEUES;
+}
+
+static void igc_init_queue_configuration(struct igc_adapter *adapter)
+{
+	u32 max_rss_queues;
+
+	max_rss_queues = igc_get_max_rss_queues(adapter);
+	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
+
+	igc_set_flag_queue_pairs(adapter, max_rss_queues);
+}
+
+/**
+ * igc_reset_q_vector - Reset config for interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be reset
+ *
+ * If NAPI is enabled it will delete any references to the
+ * NAPI struct. This is preparation for igc_free_q_vector.
+ */
+static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	/* if we're coming from igc_set_interrupt_capability, the vectors are
+	 * not yet allocated
+	 */
+	if (!q_vector)
+		return;
+
+	if (q_vector->tx.ring)
+		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+	if (q_vector->rx.ring)
+		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+	netif_napi_del(&q_vector->napi);
+}
+
+/**
+ * igc_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.
+ */
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+	struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+	adapter->q_vector[v_idx] = NULL;
+
+	/* igc_get_stats64() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	if (q_vector)
+		kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * igc_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ */
+static void igc_free_q_vectors(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--) {
+		igc_reset_q_vector(adapter, v_idx);
+		igc_free_q_vector(adapter, v_idx);
+	}
+}
+
+/**
+ * igc_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ * NOTE: These calculations are only valid when operating in a single-
+ * queue environment.
+ */
+static void igc_update_itr(struct igc_q_vector *q_vector,
+			   struct igc_ring_container *ring_container)
+{
+	unsigned int packets = ring_container->total_packets;
+	unsigned int bytes = ring_container->total_bytes;
+	u8 itrval = ring_container->itr;
+
+	/* no packets, exit with status unchanged */
+	if (packets == 0)
+		return;
+
+	switch (itrval) {
+	case lowest_latency:
+		/* handle TSO and jumbo frames */
+		if (bytes / packets > 8000)
+			itrval = bulk_latency;
+		else if ((packets < 5) && (bytes > 512))
+			itrval = low_latency;
+		break;
+	case low_latency:  /* 50 usec aka 20000 ints/s */
+		if (bytes > 10000) {
+			/* this if handles the TSO accounting */
+			if (bytes / packets > 8000)
+				itrval = bulk_latency;
+			else if ((packets < 10) || ((bytes / packets) > 1200))
+				itrval = bulk_latency;
+			else if ((packets > 35))
+				itrval = lowest_latency;
+		} else if (bytes / packets > 2000) {
+			itrval = bulk_latency;
+		} else if (packets <= 2 && bytes < 512) {
+			itrval = lowest_latency;
+		}
+		break;
+	case bulk_latency: /* 250 usec aka 4000 ints/s */
+		if (bytes > 25000) {
+			if (packets > 35)
+				itrval = low_latency;
+		} else if (bytes < 1500) {
+			itrval = low_latency;
+		}
+		break;
+	}
+
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itrval;
+}
+
+static void igc_set_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	u32 new_itr = q_vector->itr_val;
+	u8 current_itr = 0;
+
+	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		current_itr = 0;
+		new_itr = IGC_4K_ITR;
+		goto set_itr_now;
+	default:
+		break;
+	}
+
+	igc_update_itr(q_vector, &q_vector->tx);
+	igc_update_itr(q_vector, &q_vector->rx);
+
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (current_itr == lowest_latency &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		current_itr = low_latency;
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
+		break;
+	case low_latency:
+		new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
+		break;
+	case bulk_latency:
+		new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
+		break;
+	default:
+		break;
+	}
+
+set_itr_now:
+	if (new_itr != q_vector->itr_val) {
+		/* this attempts to bias the interrupt rate towards Bulk
+		 * by adding intermediate steps when interrupt rate is
+		 * increasing
+		 */
+		new_itr = new_itr > q_vector->itr_val ?
+			  max((new_itr * q_vector->itr_val) /
+			  (new_itr + (q_vector->itr_val >> 2)),
+			  new_itr) : new_itr;
+		/* Don't write the value here; it resets the adapter's
+		 * internal timer, and causes us to delay far longer than
+		 * we should between interrupts.  Instead, we write the ITR
+		 * value at the beginning of the next interrupt so the timing
+		 * ends up being correct.
+		 */
+		q_vector->itr_val = new_itr;
+		q_vector->set_itr = 1;
+	}
+}
+
+static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	if (adapter->msix_entries) {
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	} else if (adapter->flags & IGC_FLAG_HAS_MSI) {
+		pci_disable_msi(adapter->pdev);
+	}
+
+	while (v_idx--)
+		igc_reset_q_vector(adapter, v_idx);
+}
+
+/**
+ * igc_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: Pointer to adapter structure
+ * @msix: boolean value for MSI-X capability
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+					 bool msix)
+{
+	int numvecs, i;
+	int err;
+
+	if (!msix)
+		goto msi_only;
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	/* Number of supported queues. */
+	adapter->num_rx_queues = adapter->rss_queues;
+
+	adapter->num_tx_queues = adapter->rss_queues;
+
+	/* start with one vector for every Rx queue */
+	numvecs = adapter->num_rx_queues;
+
+	/* if Tx handler is separate add 1 for every Tx queue */
+	if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
+		numvecs += adapter->num_tx_queues;
+
+	/* store the number of vectors reserved for queues */
+	adapter->num_q_vectors = numvecs;
+
+	/* add 1 vector for link status interrupts */
+	numvecs++;
+
+	adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
+					GFP_KERNEL);
+
+	if (!adapter->msix_entries)
+		return;
+
+	/* populate entry values */
+	for (i = 0; i < numvecs; i++)
+		adapter->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_range(adapter->pdev,
+				    adapter->msix_entries,
+				    numvecs,
+				    numvecs);
+	if (err > 0)
+		return;
+
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+
+	igc_reset_interrupt_capability(adapter);
+
+msi_only:
+	adapter->flags &= ~IGC_FLAG_HAS_MSIX;
+
+	adapter->rss_queues = 1;
+	adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
+	adapter->num_q_vectors = 1;
+	if (!pci_enable_msi(adapter->pdev))
+		adapter->flags |= IGC_FLAG_HAS_MSI;
+}
+
+/**
+ * igc_update_ring_itr - update the dynamic ITR value based on packet size
+ * @q_vector: pointer to q_vector
+ *
+ * Stores a new ITR value based on strictly on packet size.  This
+ * algorithm is less sophisticated than that used in igc_update_itr,
+ * due to the difficulty of synchronizing statistics across multiple
+ * receive rings.  The divisors and thresholds used by this function
+ * were determined based on theoretical maximum wire speed and testing
+ * data, in order to minimize response time while increasing bulk
+ * throughput.
+ * NOTE: This function is called only when operating in a multiqueue
+ * receive environment.
+ */
+static void igc_update_ring_itr(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	int new_val = q_vector->itr_val;
+	int avg_wire_size = 0;
+	unsigned int packets;
+
+	/* For non-gigabit speeds, just fix the interrupt rate at 4000
+	 * ints/sec - ITR timer value of 120 ticks.
+	 */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+	case SPEED_100:
+		new_val = IGC_4K_ITR;
+		goto set_itr_val;
+	default:
+		break;
+	}
+
+	packets = q_vector->rx.total_packets;
+	if (packets)
+		avg_wire_size = q_vector->rx.total_bytes / packets;
+
+	packets = q_vector->tx.total_packets;
+	if (packets)
+		avg_wire_size = max_t(u32, avg_wire_size,
+				      q_vector->tx.total_bytes / packets);
+
+	/* if avg_wire_size isn't set no work was done */
+	if (!avg_wire_size)
+		goto clear_counts;
+
+	/* Add 24 bytes to size to account for CRC, preamble, and gap */
+	avg_wire_size += 24;
+
+	/* Don't starve jumbo frames */
+	avg_wire_size = min(avg_wire_size, 3000);
+
+	/* Give a little boost to mid-size frames */
+	if (avg_wire_size > 300 && avg_wire_size < 1200)
+		new_val = avg_wire_size / 3;
+	else
+		new_val = avg_wire_size / 2;
+
+	/* conservative mode (itr 3) eliminates the lowest_latency setting */
+	if (new_val < IGC_20K_ITR &&
+	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+	    (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+		new_val = IGC_20K_ITR;
+
+set_itr_val:
+	if (new_val != q_vector->itr_val) {
+		q_vector->itr_val = new_val;
+		q_vector->set_itr = 1;
+	}
+clear_counts:
+	q_vector->rx.total_bytes = 0;
+	q_vector->rx.total_packets = 0;
+	q_vector->tx.total_bytes = 0;
+	q_vector->tx.total_packets = 0;
+}
+
+static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
+{
+	struct igc_adapter *adapter = q_vector->adapter;
+	struct igc_hw *hw = &adapter->hw;
+
+	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+		if (adapter->num_q_vectors == 1)
+			igc_set_itr(q_vector);
+		else
+			igc_update_ring_itr(q_vector);
+	}
+
+	if (!test_bit(__IGC_DOWN, &adapter->state)) {
+		if (adapter->msix_entries)
+			wr32(IGC_EIMS, q_vector->eims_value);
+		else
+			igc_irq_enable(adapter);
+	}
+}
+
+static void igc_add_ring(struct igc_ring *ring,
+			 struct igc_ring_container *head)
+{
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * igc_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ */
+static void igc_cache_ring_register(struct igc_adapter *adapter)
+{
+	int i = 0, j = 0;
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+	default:
+		for (; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->reg_idx = i;
+		for (; j < adapter->num_tx_queues; j++)
+			adapter->tx_ring[j]->reg_idx = j;
+		break;
+	}
+}
+
+/**
+ * igc_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ */
+static int igc_poll(struct napi_struct *napi, int budget)
+{
+	struct igc_q_vector *q_vector = container_of(napi,
+						     struct igc_q_vector,
+						     napi);
+	struct igc_ring *rx_ring = q_vector->rx.ring;
+	bool clean_complete = true;
+	int work_done = 0;
+
+	if (q_vector->tx.ring)
+		clean_complete = igc_clean_tx_irq(q_vector, budget);
+
+	if (rx_ring) {
+		int cleaned = rx_ring->xsk_pool ?
+			      igc_clean_rx_irq_zc(q_vector, budget) :
+			      igc_clean_rx_irq(q_vector, budget);
+
+		work_done += cleaned;
+		if (cleaned >= budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done)))
+		igc_ring_irq_enable(q_vector);
+
+	return min(work_done, budget - 1);
+}
+
+/**
+ * igc_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ */
+static int igc_alloc_q_vector(struct igc_adapter *adapter,
+			      unsigned int v_count, unsigned int v_idx,
+			      unsigned int txr_count, unsigned int txr_idx,
+			      unsigned int rxr_count, unsigned int rxr_idx)
+{
+	struct igc_q_vector *q_vector;
+	struct igc_ring *ring;
+	int ring_count;
+
+	/* igc only supports 1 Tx and/or 1 Rx queue per vector */
+	if (txr_count > 1 || rxr_count > 1)
+		return -ENOMEM;
+
+	ring_count = txr_count + rxr_count;
+
+	/* allocate q_vector and rings */
+	q_vector = adapter->q_vector[v_idx];
+	if (!q_vector)
+		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+				   GFP_KERNEL);
+	else
+		memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = adapter->tx_work_limit;
+
+	/* initialize ITR configuration */
+	q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
+	q_vector->itr_val = IGC_START_ITR;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	/* initialize ITR */
+	if (rxr_count) {
+		/* rx or rx/tx vector */
+		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+			q_vector->itr_val = adapter->rx_itr_setting;
+	} else {
+		/* tx only vector */
+		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+			q_vector->itr_val = adapter->tx_itr_setting;
+	}
+
+	if (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		igc_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		/* assign ring to adapter */
+		adapter->tx_ring[txr_idx] = ring;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	if (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		igc_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+	}
+
+	return 0;
+}
+
+/**
+ * igc_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ */
+static int igc_alloc_q_vectors(struct igc_adapter *adapter)
+{
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int q_vectors = adapter->num_q_vectors;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+						 0, 0, 1, rxr_idx);
+
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+		err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+					 tqpv, txr_idx, rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	adapter->num_tx_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		igc_free_q_vector(adapter, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ * @adapter: Pointer to adapter structure
+ * @msix: boolean for MSI-X capability
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ */
+static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
+{
+	struct net_device *dev = adapter->netdev;
+	int err = 0;
+
+	igc_set_interrupt_capability(adapter, msix);
+
+	err = igc_alloc_q_vectors(adapter);
+	if (err) {
+		netdev_err(dev, "Unable to allocate memory for vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	igc_cache_ring_register(adapter);
+
+	return 0;
+
+err_alloc_q_vectors:
+	igc_reset_interrupt_capability(adapter);
+	return err;
+}
+
+/**
+ * igc_sw_init - Initialize general software structures (struct igc_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igc_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ */
+static int igc_sw_init(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IGC_DEFAULT_TXD;
+	adapter->rx_ring_count = IGC_DEFAULT_RXD;
+
+	/* set default ITR values */
+	adapter->rx_itr_setting = IGC_DEFAULT_ITR;
+	adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+
+	/* adjust max frame to be at least the size of a standard frame */
+	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+				VLAN_HLEN;
+	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+	mutex_init(&adapter->nfc_rule_lock);
+	INIT_LIST_HEAD(&adapter->nfc_rule_list);
+	adapter->nfc_rule_count = 0;
+
+	spin_lock_init(&adapter->stats64_lock);
+	spin_lock_init(&adapter->qbv_tx_lock);
+	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
+	adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+	igc_init_queue_configuration(adapter);
+
+	/* This call may decrease the number of queues */
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		netdev_err(netdev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	/* Explicitly disable IRQ since the NIC can be in any state. */
+	igc_irq_disable(adapter);
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	return 0;
+}
+
+/**
+ * igc_up - Open the interface and prepare it to handle traffic
+ * @adapter: board private structure
+ */
+void igc_up(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i = 0;
+
+	/* hardware has been reset, we need to reload some things */
+	igc_configure(adapter);
+
+	clear_bit(__IGC_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&adapter->q_vector[i]->napi);
+
+	if (adapter->msix_entries)
+		igc_configure_msix(adapter);
+	else
+		igc_assign_vector(adapter->q_vector[0], 0);
+
+	/* Clear any pending interrupts. */
+	rd32(IGC_ICR);
+	igc_irq_enable(adapter);
+
+	netif_tx_start_all_queues(adapter->netdev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = true;
+	schedule_work(&adapter->watchdog_task);
+}
+
+/**
+ * igc_update_stats - Update the board statistics counters
+ * @adapter: board private structure
+ */
+void igc_update_stats(struct igc_adapter *adapter)
+{
+	struct rtnl_link_stats64 *net_stats = &adapter->stats64;
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+	u64 _bytes, _packets;
+	u64 bytes, packets;
+	unsigned int start;
+	u32 mpc;
+	int i;
+
+	/* Prevent stats update while adapter is being reset, or if the pci
+	 * connection is down.
+	 */
+	if (adapter->link_speed == 0)
+		return;
+	if (pci_channel_offline(pdev))
+		return;
+
+	packets = 0;
+	bytes = 0;
+
+	rcu_read_lock();
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct igc_ring *ring = adapter->rx_ring[i];
+		u32 rqdpc = rd32(IGC_RQDPC(i));
+
+		if (hw->mac.type >= igc_i225)
+			wr32(IGC_RQDPC(i), 0);
+
+		if (rqdpc) {
+			ring->rx_stats.drops += rqdpc;
+			net_stats->rx_fifo_errors += rqdpc;
+		}
+
+		do {
+			start = u64_stats_fetch_begin(&ring->rx_syncp);
+			_bytes = ring->rx_stats.bytes;
+			_packets = ring->rx_stats.packets;
+		} while (u64_stats_fetch_retry(&ring->rx_syncp, start));
+		bytes += _bytes;
+		packets += _packets;
+	}
+
+	net_stats->rx_bytes = bytes;
+	net_stats->rx_packets = packets;
+
+	packets = 0;
+	bytes = 0;
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		do {
+			start = u64_stats_fetch_begin(&ring->tx_syncp);
+			_bytes = ring->tx_stats.bytes;
+			_packets = ring->tx_stats.packets;
+		} while (u64_stats_fetch_retry(&ring->tx_syncp, start));
+		bytes += _bytes;
+		packets += _packets;
+	}
+	net_stats->tx_bytes = bytes;
+	net_stats->tx_packets = packets;
+	rcu_read_unlock();
+
+	/* read stats registers */
+	adapter->stats.crcerrs += rd32(IGC_CRCERRS);
+	adapter->stats.gprc += rd32(IGC_GPRC);
+	adapter->stats.gorc += rd32(IGC_GORCL);
+	rd32(IGC_GORCH); /* clear GORCL */
+	adapter->stats.bprc += rd32(IGC_BPRC);
+	adapter->stats.mprc += rd32(IGC_MPRC);
+	adapter->stats.roc += rd32(IGC_ROC);
+
+	adapter->stats.prc64 += rd32(IGC_PRC64);
+	adapter->stats.prc127 += rd32(IGC_PRC127);
+	adapter->stats.prc255 += rd32(IGC_PRC255);
+	adapter->stats.prc511 += rd32(IGC_PRC511);
+	adapter->stats.prc1023 += rd32(IGC_PRC1023);
+	adapter->stats.prc1522 += rd32(IGC_PRC1522);
+	adapter->stats.tlpic += rd32(IGC_TLPIC);
+	adapter->stats.rlpic += rd32(IGC_RLPIC);
+	adapter->stats.hgptc += rd32(IGC_HGPTC);
+
+	mpc = rd32(IGC_MPC);
+	adapter->stats.mpc += mpc;
+	net_stats->rx_fifo_errors += mpc;
+	adapter->stats.scc += rd32(IGC_SCC);
+	adapter->stats.ecol += rd32(IGC_ECOL);
+	adapter->stats.mcc += rd32(IGC_MCC);
+	adapter->stats.latecol += rd32(IGC_LATECOL);
+	adapter->stats.dc += rd32(IGC_DC);
+	adapter->stats.rlec += rd32(IGC_RLEC);
+	adapter->stats.xonrxc += rd32(IGC_XONRXC);
+	adapter->stats.xontxc += rd32(IGC_XONTXC);
+	adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
+	adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
+	adapter->stats.fcruc += rd32(IGC_FCRUC);
+	adapter->stats.gptc += rd32(IGC_GPTC);
+	adapter->stats.gotc += rd32(IGC_GOTCL);
+	rd32(IGC_GOTCH); /* clear GOTCL */
+	adapter->stats.rnbc += rd32(IGC_RNBC);
+	adapter->stats.ruc += rd32(IGC_RUC);
+	adapter->stats.rfc += rd32(IGC_RFC);
+	adapter->stats.rjc += rd32(IGC_RJC);
+	adapter->stats.tor += rd32(IGC_TORH);
+	adapter->stats.tot += rd32(IGC_TOTH);
+	adapter->stats.tpr += rd32(IGC_TPR);
+
+	adapter->stats.ptc64 += rd32(IGC_PTC64);
+	adapter->stats.ptc127 += rd32(IGC_PTC127);
+	adapter->stats.ptc255 += rd32(IGC_PTC255);
+	adapter->stats.ptc511 += rd32(IGC_PTC511);
+	adapter->stats.ptc1023 += rd32(IGC_PTC1023);
+	adapter->stats.ptc1522 += rd32(IGC_PTC1522);
+
+	adapter->stats.mptc += rd32(IGC_MPTC);
+	adapter->stats.bptc += rd32(IGC_BPTC);
+
+	adapter->stats.tpt += rd32(IGC_TPT);
+	adapter->stats.colc += rd32(IGC_COLC);
+	adapter->stats.colc += rd32(IGC_RERC);
+
+	adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
+
+	adapter->stats.tsctc += rd32(IGC_TSCTC);
+
+	adapter->stats.iac += rd32(IGC_IAC);
+
+	/* Fill out the OS statistics structure */
+	net_stats->multicast = adapter->stats.mprc;
+	net_stats->collisions = adapter->stats.colc;
+
+	/* Rx Errors */
+
+	/* RLEC on some newer hardware can be incorrect so build
+	 * our own version based on RUC and ROC
+	 */
+	net_stats->rx_errors = adapter->stats.rxerrc +
+		adapter->stats.crcerrs + adapter->stats.algnerrc +
+		adapter->stats.ruc + adapter->stats.roc +
+		adapter->stats.cexterr;
+	net_stats->rx_length_errors = adapter->stats.ruc +
+				      adapter->stats.roc;
+	net_stats->rx_crc_errors = adapter->stats.crcerrs;
+	net_stats->rx_frame_errors = adapter->stats.algnerrc;
+	net_stats->rx_missed_errors = adapter->stats.mpc;
+
+	/* Tx Errors */
+	net_stats->tx_errors = adapter->stats.ecol +
+			       adapter->stats.latecol;
+	net_stats->tx_aborted_errors = adapter->stats.ecol;
+	net_stats->tx_window_errors = adapter->stats.latecol;
+	net_stats->tx_carrier_errors = adapter->stats.tncrs;
+
+	/* Tx Dropped */
+	net_stats->tx_dropped = adapter->stats.txdrop;
+
+	/* Management Stats */
+	adapter->stats.mgptc += rd32(IGC_MGTPTC);
+	adapter->stats.mgprc += rd32(IGC_MGTPRC);
+	adapter->stats.mgpdc += rd32(IGC_MGTPDC);
+}
+
+/**
+ * igc_down - Close the interface
+ * @adapter: board private structure
+ */
+void igc_down(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	u32 tctl, rctl;
+	int i = 0;
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	igc_ptp_suspend(adapter);
+
+	if (pci_device_is_present(adapter->pdev)) {
+		/* disable receives in the hardware */
+		rctl = rd32(IGC_RCTL);
+		wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
+		/* flush and sleep below */
+	}
+	/* set trans_start so we don't get spurious watchdogs during reset */
+	netif_trans_update(netdev);
+
+	netif_carrier_off(netdev);
+	netif_tx_stop_all_queues(netdev);
+
+	if (pci_device_is_present(adapter->pdev)) {
+		/* disable transmits in the hardware */
+		tctl = rd32(IGC_TCTL);
+		tctl &= ~IGC_TCTL_EN;
+		wr32(IGC_TCTL, tctl);
+		/* flush both disables and wait for them to finish */
+		wrfl();
+		usleep_range(10000, 20000);
+
+		igc_irq_disable(adapter);
+	}
+
+	adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		if (adapter->q_vector[i]) {
+			napi_synchronize(&adapter->q_vector[i]->napi);
+			napi_disable(&adapter->q_vector[i]->napi);
+		}
+	}
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	/* record the stats before reset*/
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	adapter->link_speed = 0;
+	adapter->link_duplex = 0;
+
+	if (!pci_channel_offline(adapter->pdev))
+		igc_reset(adapter);
+
+	/* clear VLAN promisc flag so VFTA will be updated if necessary */
+	adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
+
+	igc_disable_all_tx_rings_hw(adapter);
+	igc_clean_all_tx_rings(adapter);
+	igc_clean_all_rx_rings(adapter);
+}
+
+void igc_reinit_locked(struct igc_adapter *adapter)
+{
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	igc_down(adapter);
+	igc_up(adapter);
+	clear_bit(__IGC_RESETTING, &adapter->state);
+}
+
+static void igc_reset_task(struct work_struct *work)
+{
+	struct igc_adapter *adapter;
+
+	adapter = container_of(work, struct igc_adapter, reset_task);
+
+	rtnl_lock();
+	/* If we're already down or resetting, just bail */
+	if (test_bit(__IGC_DOWN, &adapter->state) ||
+	    test_bit(__IGC_RESETTING, &adapter->state)) {
+		rtnl_unlock();
+		return;
+	}
+
+	igc_rings_dump(adapter);
+	igc_regs_dump(adapter);
+	netdev_err(adapter->netdev, "Reset adapter\n");
+	igc_reinit_locked(adapter);
+	rtnl_unlock();
+}
+
+/**
+ * igc_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int igc_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
+		netdev_dbg(netdev, "Jumbo frames not supported with XDP");
+		return -EINVAL;
+	}
+
+	/* adjust max frame to be at least the size of a standard frame */
+	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
+
+	while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	/* igc_down has a dependency on max_frame_size */
+	adapter->max_frame_size = max_frame;
+
+	if (netif_running(netdev))
+		igc_down(adapter);
+
+	netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		igc_up(adapter);
+	else
+		igc_reset(adapter);
+
+	clear_bit(__IGC_RESETTING, &adapter->state);
+
+	return 0;
+}
+
+/**
+ * igc_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: queue number that timed out
+ **/
+static void igc_tx_timeout(struct net_device *netdev,
+			   unsigned int __always_unused txqueue)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+
+	/* Do the reset outside of interrupt context */
+	adapter->tx_timeout_count++;
+	schedule_work(&adapter->reset_task);
+	wr32(IGC_EICS,
+	     (adapter->eims_enable_mask & ~adapter->eims_other));
+}
+
+/**
+ * igc_get_stats64 - Get System Network Statistics
+ * @netdev: network interface device structure
+ * @stats: rtnl_link_stats64 pointer
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are updated here and also from the timer callback.
+ */
+static void igc_get_stats64(struct net_device *netdev,
+			    struct rtnl_link_stats64 *stats)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	spin_lock(&adapter->stats64_lock);
+	if (!test_bit(__IGC_RESETTING, &adapter->state))
+		igc_update_stats(adapter);
+	memcpy(stats, &adapter->stats64, sizeof(*stats));
+	spin_unlock(&adapter->stats64_lock);
+}
+
+static netdev_features_t igc_fix_features(struct net_device *netdev,
+					  netdev_features_t features)
+{
+	/* Since there is no support for separate Rx/Tx vlan accel
+	 * enable/disable make sure Tx flag is always in same state as Rx.
+	 */
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	else
+		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
+
+	return features;
+}
+
+static int igc_set_features(struct net_device *netdev,
+			    netdev_features_t features)
+{
+	netdev_features_t changed = netdev->features ^ features;
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
+		igc_vlan_mode(netdev, features);
+
+	/* Add VLAN support */
+	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
+		return 0;
+
+	if (!(features & NETIF_F_NTUPLE))
+		igc_flush_nfc_rules(adapter);
+
+	netdev->features = features;
+
+	if (netif_running(netdev))
+		igc_reinit_locked(adapter);
+	else
+		igc_reset(adapter);
+
+	return 1;
+}
+
+static netdev_features_t
+igc_features_check(struct sk_buff *skb, struct net_device *dev,
+		   netdev_features_t features)
+{
+	unsigned int network_hdr_len, mac_hdr_len;
+
+	/* Make certain the headers can be described by a context descriptor */
+	mac_hdr_len = skb_network_header(skb) - skb->data;
+	if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_HW_VLAN_CTAG_TX |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
+	if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	/* We can only support IPv4 TSO in tunnels if we can mangle the
+	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 */
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
+		features &= ~NETIF_F_TSO;
+
+	return features;
+}
+
+static void igc_tsync_interrupt(struct igc_adapter *adapter)
+{
+	u32 ack, tsauxc, sec, nsec, tsicr;
+	struct igc_hw *hw = &adapter->hw;
+	struct ptp_clock_event event;
+	struct timespec64 ts;
+
+	tsicr = rd32(IGC_TSICR);
+	ack = 0;
+
+	if (tsicr & IGC_TSICR_SYS_WRAP) {
+		event.type = PTP_CLOCK_PPS;
+		if (adapter->ptp_caps.pps)
+			ptp_clock_event(adapter->ptp_clock, &event);
+		ack |= IGC_TSICR_SYS_WRAP;
+	}
+
+	if (tsicr & IGC_TSICR_TXTS) {
+		/* retrieve hardware timestamp */
+		igc_ptp_tx_tstamp_event(adapter);
+		ack |= IGC_TSICR_TXTS;
+	}
+
+	if (tsicr & IGC_TSICR_TT0) {
+		spin_lock(&adapter->tmreg_lock);
+		ts = timespec64_add(adapter->perout[0].start,
+				    adapter->perout[0].period);
+		wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+		wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsauxc |= IGC_TSAUXC_EN_TT0;
+		wr32(IGC_TSAUXC, tsauxc);
+		adapter->perout[0].start = ts;
+		spin_unlock(&adapter->tmreg_lock);
+		ack |= IGC_TSICR_TT0;
+	}
+
+	if (tsicr & IGC_TSICR_TT1) {
+		spin_lock(&adapter->tmreg_lock);
+		ts = timespec64_add(adapter->perout[1].start,
+				    adapter->perout[1].period);
+		wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+		wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsauxc |= IGC_TSAUXC_EN_TT1;
+		wr32(IGC_TSAUXC, tsauxc);
+		adapter->perout[1].start = ts;
+		spin_unlock(&adapter->tmreg_lock);
+		ack |= IGC_TSICR_TT1;
+	}
+
+	if (tsicr & IGC_TSICR_AUTT0) {
+		nsec = rd32(IGC_AUXSTMPL0);
+		sec  = rd32(IGC_AUXSTMPH0);
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = 0;
+		event.timestamp = sec * NSEC_PER_SEC + nsec;
+		ptp_clock_event(adapter->ptp_clock, &event);
+		ack |= IGC_TSICR_AUTT0;
+	}
+
+	if (tsicr & IGC_TSICR_AUTT1) {
+		nsec = rd32(IGC_AUXSTMPL1);
+		sec  = rd32(IGC_AUXSTMPH1);
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = 1;
+		event.timestamp = sec * NSEC_PER_SEC + nsec;
+		ptp_clock_event(adapter->ptp_clock, &event);
+		ack |= IGC_TSICR_AUTT1;
+	}
+
+	/* acknowledge the interrupts */
+	wr32(IGC_TSICR, ack);
+}
+
+/**
+ * igc_msix_other - msix other interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t igc_msix_other(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_hw *hw = &adapter->hw;
+	u32 icr = rd32(IGC_ICR);
+
+	/* reading ICR causes bit 31 of EICR to be cleared */
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & IGC_ICR_LSC) {
+		hw->mac.get_link_status = true;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	if (icr & IGC_ICR_TS)
+		igc_tsync_interrupt(adapter);
+
+	wr32(IGC_EIMS, adapter->eims_other);
+
+	return IRQ_HANDLED;
+}
+
+static void igc_write_itr(struct igc_q_vector *q_vector)
+{
+	u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
+
+	if (!q_vector->set_itr)
+		return;
+
+	if (!itr_val)
+		itr_val = IGC_ITR_VAL_MASK;
+
+	itr_val |= IGC_EITR_CNT_IGNR;
+
+	writel(itr_val, q_vector->itr_register);
+	q_vector->set_itr = 0;
+}
+
+static irqreturn_t igc_msix_ring(int irq, void *data)
+{
+	struct igc_q_vector *q_vector = data;
+
+	/* Write the ITR value calculated from the previous interrupt. */
+	igc_write_itr(q_vector);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_request_msix - Initialize MSI-X interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ */
+static int igc_request_msix(struct igc_adapter *adapter)
+{
+	unsigned int num_q_vectors = adapter->num_q_vectors;
+	int i = 0, err = 0, vector = 0, free_vector = 0;
+	struct net_device *netdev = adapter->netdev;
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  &igc_msix_other, 0, netdev->name, adapter);
+	if (err)
+		goto err_out;
+
+	if (num_q_vectors > MAX_Q_VECTORS) {
+		num_q_vectors = MAX_Q_VECTORS;
+		dev_warn(&adapter->pdev->dev,
+			 "The number of queue vectors (%d) is higher than max allowed (%d)\n",
+			 adapter->num_q_vectors, MAX_Q_VECTORS);
+	}
+	for (i = 0; i < num_q_vectors; i++) {
+		struct igc_q_vector *q_vector = adapter->q_vector[i];
+
+		vector++;
+
+		q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
+
+		if (q_vector->rx.ring && q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else if (q_vector->tx.ring)
+			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
+				q_vector->tx.ring->queue_index);
+		else if (q_vector->rx.ring)
+			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
+				q_vector->rx.ring->queue_index);
+		else
+			sprintf(q_vector->name, "%s-unused", netdev->name);
+
+		err = request_irq(adapter->msix_entries[vector].vector,
+				  igc_msix_ring, 0, q_vector->name,
+				  q_vector);
+		if (err)
+			goto err_free;
+	}
+
+	igc_configure_msix(adapter);
+	return 0;
+
+err_free:
+	/* free already assigned IRQs */
+	free_irq(adapter->msix_entries[free_vector++].vector, adapter);
+
+	vector--;
+	for (i = 0; i < vector; i++) {
+		free_irq(adapter->msix_entries[free_vector++].vector,
+			 adapter->q_vector[i]);
+	}
+err_out:
+	return err;
+}
+
+/**
+ * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * This function resets the device so that it has 0 rx queues, tx queues, and
+ * MSI-X interrupts allocated.
+ */
+static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
+{
+	igc_free_q_vectors(adapter);
+	igc_reset_interrupt_capability(adapter);
+}
+
+/* Need to wait a few seconds after link up to get diagnostic information from
+ * the phy
+ */
+static void igc_update_phy_info(struct timer_list *t)
+{
+	struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
+
+	igc_get_phy_info(&adapter->hw);
+}
+
+/**
+ * igc_has_link - check shared code for link and determine up/down
+ * @adapter: pointer to driver private info
+ */
+bool igc_has_link(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	bool link_active = false;
+
+	/* get_link_status is set on LSC (link status) interrupt or
+	 * rx sequence error interrupt.  get_link_status will stay
+	 * false until the igc_check_for_link establishes link
+	 * for copper adapters ONLY
+	 */
+	if (!hw->mac.get_link_status)
+		return true;
+	hw->mac.ops.check_for_link(hw);
+	link_active = !hw->mac.get_link_status;
+
+	if (hw->mac.type == igc_i225) {
+		if (!netif_carrier_ok(adapter->netdev)) {
+			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+		} else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
+			adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
+			adapter->link_check_timeout = jiffies;
+		}
+	}
+
+	return link_active;
+}
+
+/**
+ * igc_watchdog - Timer Call-back
+ * @t: timer for the watchdog
+ */
+static void igc_watchdog(struct timer_list *t)
+{
+	struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
+	/* Do the rest outside of interrupt context */
+	schedule_work(&adapter->watchdog_task);
+}
+
+static void igc_watchdog_task(struct work_struct *work)
+{
+	struct igc_adapter *adapter = container_of(work,
+						   struct igc_adapter,
+						   watchdog_task);
+	struct net_device *netdev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_data, retry_count = 20;
+	u32 link;
+	int i;
+
+	link = igc_has_link(adapter);
+
+	if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
+		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
+			adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
+		else
+			link = false;
+	}
+
+	if (link) {
+		/* Cancel scheduled suspend requests. */
+		pm_runtime_resume(netdev->dev.parent);
+
+		if (!netif_carrier_ok(netdev)) {
+			u32 ctrl;
+
+			hw->mac.ops.get_speed_and_duplex(hw,
+							 &adapter->link_speed,
+							 &adapter->link_duplex);
+
+			ctrl = rd32(IGC_CTRL);
+			/* Link status message must follow this format */
+			netdev_info(netdev,
+				    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+				    adapter->link_speed,
+				    adapter->link_duplex == FULL_DUPLEX ?
+				    "Full" : "Half",
+				    (ctrl & IGC_CTRL_TFCE) &&
+				    (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
+				    (ctrl & IGC_CTRL_RFCE) ?  "RX" :
+				    (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
+
+			/* disable EEE if enabled */
+			if ((adapter->flags & IGC_FLAG_EEE) &&
+			    adapter->link_duplex == HALF_DUPLEX) {
+				netdev_info(netdev,
+					    "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
+				adapter->hw.dev_spec._base.eee_enable = false;
+				adapter->flags &= ~IGC_FLAG_EEE;
+			}
+
+			/* check if SmartSpeed worked */
+			igc_check_downshift(hw);
+			if (phy->speed_downgraded)
+				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
+
+			/* adjust timeout factor according to speed/duplex */
+			adapter->tx_timeout_factor = 1;
+			switch (adapter->link_speed) {
+			case SPEED_10:
+				adapter->tx_timeout_factor = 14;
+				break;
+			case SPEED_100:
+			case SPEED_1000:
+			case SPEED_2500:
+				adapter->tx_timeout_factor = 1;
+				break;
+			}
+
+			/* Once the launch time has been set on the wire, there
+			 * is a delay before the link speed can be determined
+			 * based on link-up activity. Write into the register
+			 * as soon as we know the correct link speed.
+			 */
+			igc_tsn_adjust_txtime_offset(adapter);
+
+			if (adapter->link_speed != SPEED_1000)
+				goto no_wait;
+
+			/* wait for Remote receiver status OK */
+retry_read_status:
+			if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
+					      &phy_data)) {
+				if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
+				    retry_count) {
+					msleep(100);
+					retry_count--;
+					goto retry_read_status;
+				} else if (!retry_count) {
+					netdev_err(netdev, "exceed max 2 second\n");
+				}
+			} else {
+				netdev_err(netdev, "read 1000Base-T Status Reg\n");
+			}
+no_wait:
+			netif_carrier_on(netdev);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGC_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+		}
+	} else {
+		if (netif_carrier_ok(netdev)) {
+			adapter->link_speed = 0;
+			adapter->link_duplex = 0;
+
+			/* Links status message must follow this format */
+			netdev_info(netdev, "NIC Link is Down\n");
+			netif_carrier_off(netdev);
+
+			/* link state has changed, schedule phy info update */
+			if (!test_bit(__IGC_DOWN, &adapter->state))
+				mod_timer(&adapter->phy_info_timer,
+					  round_jiffies(jiffies + 2 * HZ));
+
+			pm_schedule_suspend(netdev->dev.parent,
+					    MSEC_PER_SEC * 5);
+		}
+	}
+
+	spin_lock(&adapter->stats64_lock);
+	igc_update_stats(adapter);
+	spin_unlock(&adapter->stats64_lock);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+		if (!netif_carrier_ok(netdev)) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context).
+			 */
+			if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
+				adapter->tx_timeout_count++;
+				schedule_work(&adapter->reset_task);
+				/* return immediately since reset is imminent */
+				return;
+			}
+		}
+
+		/* Force detection of hung controller every watchdog period */
+		set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
+	}
+
+	/* Cause software interrupt to ensure Rx ring is cleaned */
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		u32 eics = 0;
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			eics |= adapter->q_vector[i]->eims_value;
+		wr32(IGC_EICS, eics);
+	} else {
+		wr32(IGC_ICS, IGC_ICS_RXDMT0);
+	}
+
+	igc_ptp_tx_hang(adapter);
+
+	/* Reset the timer */
+	if (!test_bit(__IGC_DOWN, &adapter->state)) {
+		if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies +  HZ));
+		else
+			mod_timer(&adapter->watchdog_timer,
+				  round_jiffies(jiffies + 2 * HZ));
+	}
+}
+
+/**
+ * igc_intr_msi - Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr_msi(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_q_vector *q_vector = adapter->q_vector[0];
+	struct igc_hw *hw = &adapter->hw;
+	/* read ICR disables interrupts using IAM */
+	u32 icr = rd32(IGC_ICR);
+
+	igc_write_itr(q_vector);
+
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+		hw->mac.get_link_status = true;
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	if (icr & IGC_ICR_TS)
+		igc_tsync_interrupt(adapter);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * igc_intr - Legacy Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ */
+static irqreturn_t igc_intr(int irq, void *data)
+{
+	struct igc_adapter *adapter = data;
+	struct igc_q_vector *q_vector = adapter->q_vector[0];
+	struct igc_hw *hw = &adapter->hw;
+	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
+	 * need for the IMC write
+	 */
+	u32 icr = rd32(IGC_ICR);
+
+	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
+	 * not set, then the adapter didn't send an interrupt
+	 */
+	if (!(icr & IGC_ICR_INT_ASSERTED))
+		return IRQ_NONE;
+
+	igc_write_itr(q_vector);
+
+	if (icr & IGC_ICR_DRSTA)
+		schedule_work(&adapter->reset_task);
+
+	if (icr & IGC_ICR_DOUTSYNC) {
+		/* HW is reporting DMA is out of sync */
+		adapter->stats.doosync++;
+	}
+
+	if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
+		hw->mac.get_link_status = true;
+		/* guard against interrupt when we're going down */
+		if (!test_bit(__IGC_DOWN, &adapter->state))
+			mod_timer(&adapter->watchdog_timer, jiffies + 1);
+	}
+
+	if (icr & IGC_ICR_TS)
+		igc_tsync_interrupt(adapter);
+
+	napi_schedule(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void igc_free_irq(struct igc_adapter *adapter)
+{
+	if (adapter->msix_entries) {
+		int vector = 0, i;
+
+		free_irq(adapter->msix_entries[vector++].vector, adapter);
+
+		for (i = 0; i < adapter->num_q_vectors; i++)
+			free_irq(adapter->msix_entries[vector++].vector,
+				 adapter->q_vector[i]);
+	} else {
+		free_irq(adapter->pdev->irq, adapter);
+	}
+}
+
+/**
+ * igc_request_irq - initialize interrupts
+ * @adapter: Pointer to adapter structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static int igc_request_irq(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	int err = 0;
+
+	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
+		err = igc_request_msix(adapter);
+		if (!err)
+			goto request_done;
+		/* fall back to MSI */
+		igc_free_all_tx_resources(adapter);
+		igc_free_all_rx_resources(adapter);
+
+		igc_clear_interrupt_scheme(adapter);
+		err = igc_init_interrupt_scheme(adapter, false);
+		if (err)
+			goto request_done;
+		igc_setup_all_tx_resources(adapter);
+		igc_setup_all_rx_resources(adapter);
+		igc_configure(adapter);
+	}
+
+	igc_assign_vector(adapter->q_vector[0], 0);
+
+	if (adapter->flags & IGC_FLAG_HAS_MSI) {
+		err = request_irq(pdev->irq, &igc_intr_msi, 0,
+				  netdev->name, adapter);
+		if (!err)
+			goto request_done;
+
+		/* fall back to legacy interrupts */
+		igc_reset_interrupt_capability(adapter);
+		adapter->flags &= ~IGC_FLAG_HAS_MSI;
+	}
+
+	err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
+			  netdev->name, adapter);
+
+	if (err)
+		netdev_err(netdev, "Error %d getting interrupt\n", err);
+
+request_done:
+	return err;
+}
+
+/**
+ * __igc_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ * @resuming: boolean indicating if the device is resuming
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int __igc_open(struct net_device *netdev, bool resuming)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+	struct igc_hw *hw = &adapter->hw;
+	int err = 0;
+	int i = 0;
+
+	/* disallow open during test */
+
+	if (test_bit(__IGC_TESTING, &adapter->state)) {
+		WARN_ON(resuming);
+		return -EBUSY;
+	}
+
+	if (!resuming)
+		pm_runtime_get_sync(&pdev->dev);
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = igc_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = igc_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	igc_power_up_link(adapter);
+
+	igc_configure(adapter);
+
+	err = igc_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
+	clear_bit(__IGC_DOWN, &adapter->state);
+
+	for (i = 0; i < adapter->num_q_vectors; i++)
+		napi_enable(&adapter->q_vector[i]->napi);
+
+	/* Clear any pending interrupts. */
+	rd32(IGC_ICR);
+	igc_irq_enable(adapter);
+
+	if (!resuming)
+		pm_runtime_put(&pdev->dev);
+
+	netif_tx_start_all_queues(netdev);
+
+	/* start the watchdog. */
+	hw->mac.get_link_status = true;
+	schedule_work(&adapter->watchdog_task);
+
+	return IGC_SUCCESS;
+
+err_set_queues:
+	igc_free_irq(adapter);
+err_req_irq:
+	igc_release_hw_control(adapter);
+	igc_power_down_phy_copper_base(&adapter->hw);
+	igc_free_all_rx_resources(adapter);
+err_setup_rx:
+	igc_free_all_tx_resources(adapter);
+err_setup_tx:
+	igc_reset(adapter);
+	if (!resuming)
+		pm_runtime_put(&pdev->dev);
+
+	return err;
+}
+
+int igc_open(struct net_device *netdev)
+{
+	return __igc_open(netdev, false);
+}
+
+/**
+ * __igc_close - Disables a network interface
+ * @netdev: network interface device structure
+ * @suspending: boolean indicating the device is suspending
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the driver's control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int __igc_close(struct net_device *netdev, bool suspending)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct pci_dev *pdev = adapter->pdev;
+
+	WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
+
+	if (!suspending)
+		pm_runtime_get_sync(&pdev->dev);
+
+	igc_down(adapter);
+
+	igc_release_hw_control(adapter);
+
+	igc_free_irq(adapter);
+
+	igc_free_all_tx_resources(adapter);
+	igc_free_all_rx_resources(adapter);
+
+	if (!suspending)
+		pm_runtime_put_sync(&pdev->dev);
+
+	return 0;
+}
+
+int igc_close(struct net_device *netdev)
+{
+	if (netif_device_present(netdev) || netdev->dismantle)
+		return __igc_close(netdev, false);
+	return 0;
+}
+
+/**
+ * igc_ioctl - Access the hwtstamp interface
+ * @netdev: network interface device structure
+ * @ifr: interface request data
+ * @cmd: ioctl command
+ **/
+static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCGHWTSTAMP:
+		return igc_ptp_get_ts_config(netdev, ifr);
+	case SIOCSHWTSTAMP:
+		return igc_ptp_set_ts_config(netdev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
+				      bool enable)
+{
+	struct igc_ring *ring;
+
+	if (queue < 0 || queue >= adapter->num_tx_queues)
+		return -EINVAL;
+
+	ring = adapter->tx_ring[queue];
+	ring->launchtime_enable = enable;
+
+	return 0;
+}
+
+static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
+{
+	struct timespec64 b;
+
+	b = ktime_to_timespec64(base_time);
+
+	return timespec64_compare(now, &b) > 0;
+}
+
+static bool validate_schedule(struct igc_adapter *adapter,
+			      const struct tc_taprio_qopt_offload *qopt)
+{
+	int queue_uses[IGC_MAX_TX_QUEUES] = { };
+	struct igc_hw *hw = &adapter->hw;
+	struct timespec64 now;
+	size_t n;
+
+	if (qopt->cycle_time_extension)
+		return false;
+
+	igc_ptp_read(adapter, &now);
+
+	/* If we program the controller's BASET registers with a time
+	 * in the future, it will hold all the packets until that
+	 * time, causing a lot of TX Hangs, so to avoid that, we
+	 * reject schedules that would start in the future.
+	 * Note: Limitation above is no longer in i226.
+	 */
+	if (!is_base_time_past(qopt->base_time, &now) &&
+	    igc_is_device_id_i225(hw))
+		return false;
+
+	for (n = 0; n < qopt->num_entries; n++) {
+		const struct tc_taprio_sched_entry *e, *prev;
+		int i;
+
+		prev = n ? &qopt->entries[n - 1] : NULL;
+		e = &qopt->entries[n];
+
+		/* i225 only supports "global" frame preemption
+		 * settings.
+		 */
+		if (e->command != TC_TAPRIO_CMD_SET_GATES)
+			return false;
+
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			if (e->gate_mask & BIT(i)) {
+				queue_uses[i]++;
+
+				/* There are limitations: A single queue cannot
+				 * be opened and closed multiple times per cycle
+				 * unless the gate stays open. Check for it.
+				 */
+				if (queue_uses[i] > 1 &&
+				    !(prev->gate_mask & BIT(i)))
+					return false;
+			}
+	}
+
+	return true;
+}
+
+static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
+				     struct tc_etf_qopt_offload *qopt)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int err;
+
+	if (hw->mac.type != igc_i225)
+		return -EOPNOTSUPP;
+
+	err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
+	if (err)
+		return err;
+
+	return igc_tsn_offload_apply(adapter);
+}
+
+static int igc_qbv_clear_schedule(struct igc_adapter *adapter)
+{
+	unsigned long flags;
+	int i;
+
+	adapter->base_time = 0;
+	adapter->cycle_time = NSEC_PER_SEC;
+	adapter->taprio_offload_enable = false;
+	adapter->qbv_config_change_errors = 0;
+	adapter->qbv_count = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		ring->start_time = 0;
+		ring->end_time = NSEC_PER_SEC;
+		ring->max_sdu = 0;
+	}
+
+	spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+	adapter->qbv_transition = false;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		ring->oper_gate_closed = false;
+		ring->admin_gate_closed = false;
+	}
+
+	spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+	return 0;
+}
+
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+	igc_qbv_clear_schedule(adapter);
+
+	return 0;
+}
+
+static void igc_taprio_stats(struct net_device *dev,
+			     struct tc_taprio_qopt_stats *stats)
+{
+	/* When Strict_End is enabled, the tx_overruns counter
+	 * will always be zero.
+	 */
+	stats->tx_overruns = 0;
+}
+
+static void igc_taprio_queue_stats(struct net_device *dev,
+				   struct tc_taprio_qopt_queue_stats *queue_stats)
+{
+	struct tc_taprio_qopt_stats *stats = &queue_stats->stats;
+
+	/* When Strict_End is enabled, the tx_overruns counter
+	 * will always be zero.
+	 */
+	stats->tx_overruns = 0;
+}
+
+static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+				 struct tc_taprio_qopt_offload *qopt)
+{
+	bool queue_configured[IGC_MAX_TX_QUEUES] = { };
+	struct igc_hw *hw = &adapter->hw;
+	u32 start_time = 0, end_time = 0;
+	struct timespec64 now;
+	unsigned long flags;
+	size_t n;
+	int i;
+
+	switch (qopt->cmd) {
+	case TAPRIO_CMD_REPLACE:
+		break;
+	case TAPRIO_CMD_DESTROY:
+		return igc_tsn_clear_schedule(adapter);
+	case TAPRIO_CMD_STATS:
+		igc_taprio_stats(adapter->netdev, &qopt->stats);
+		return 0;
+	case TAPRIO_CMD_QUEUE_STATS:
+		igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (qopt->base_time < 0)
+		return -ERANGE;
+
+	if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable)
+		return -EALREADY;
+
+	if (!validate_schedule(adapter, qopt))
+		return -EINVAL;
+
+	adapter->cycle_time = qopt->cycle_time;
+	adapter->base_time = qopt->base_time;
+	adapter->taprio_offload_enable = true;
+
+	igc_ptp_read(adapter, &now);
+
+	for (n = 0; n < qopt->num_entries; n++) {
+		struct tc_taprio_sched_entry *e = &qopt->entries[n];
+
+		end_time += e->interval;
+
+		/* If any of the conditions below are true, we need to manually
+		 * control the end time of the cycle.
+		 * 1. Qbv users can specify a cycle time that is not equal
+		 * to the total GCL intervals. Hence, recalculation is
+		 * necessary here to exclude the time interval that
+		 * exceeds the cycle time.
+		 * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2,
+		 * once the end of the list is reached, it will switch
+		 * to the END_OF_CYCLE state and leave the gates in the
+		 * same state until the next cycle is started.
+		 */
+		if (end_time > adapter->cycle_time ||
+		    n + 1 == qopt->num_entries)
+			end_time = adapter->cycle_time;
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			struct igc_ring *ring = adapter->tx_ring[i];
+
+			if (!(e->gate_mask & BIT(i)))
+				continue;
+
+			/* Check whether a queue stays open for more than one
+			 * entry. If so, keep the start and advance the end
+			 * time.
+			 */
+			if (!queue_configured[i])
+				ring->start_time = start_time;
+			ring->end_time = end_time;
+
+			if (ring->start_time >= adapter->cycle_time)
+				queue_configured[i] = false;
+			else
+				queue_configured[i] = true;
+		}
+
+		start_time += e->interval;
+	}
+
+	spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+	/* Check whether a queue gets configured.
+	 * If not, set the start and end time to be end time.
+	 */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		if (!is_base_time_past(qopt->base_time, &now)) {
+			ring->admin_gate_closed = false;
+		} else {
+			ring->oper_gate_closed = false;
+			ring->admin_gate_closed = false;
+		}
+
+		if (!queue_configured[i]) {
+			if (!is_base_time_past(qopt->base_time, &now))
+				ring->admin_gate_closed = true;
+			else
+				ring->oper_gate_closed = true;
+
+			ring->start_time = end_time;
+			ring->end_time = end_time;
+		}
+	}
+
+	spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+		struct net_device *dev = adapter->netdev;
+
+		if (qopt->max_sdu[i])
+			ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN;
+		else
+			ring->max_sdu = 0;
+	}
+
+	return 0;
+}
+
+static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
+					 struct tc_taprio_qopt_offload *qopt)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int err;
+
+	if (hw->mac.type != igc_i225)
+		return -EOPNOTSUPP;
+
+	err = igc_save_qbv_schedule(adapter, qopt);
+	if (err)
+		return err;
+
+	return igc_tsn_offload_apply(adapter);
+}
+
+static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
+			       bool enable, int idleslope, int sendslope,
+			       int hicredit, int locredit)
+{
+	bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
+	struct net_device *netdev = adapter->netdev;
+	struct igc_ring *ring;
+	int i;
+
+	/* i225 has two sets of credit-based shaper logic.
+	 * Supporting it only on the top two priority queues
+	 */
+	if (queue < 0 || queue > 1)
+		return -EINVAL;
+
+	ring = adapter->tx_ring[queue];
+
+	for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
+		if (adapter->tx_ring[i])
+			cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
+
+	/* CBS should be enabled on the highest priority queue first in order
+	 * for the CBS algorithm to operate as intended.
+	 */
+	if (enable) {
+		if (queue == 1 && !cbs_status[0]) {
+			netdev_err(netdev,
+				   "Enabling CBS on queue1 before queue0\n");
+			return -EINVAL;
+		}
+	} else {
+		if (queue == 0 && cbs_status[1]) {
+			netdev_err(netdev,
+				   "Disabling CBS on queue0 before queue1\n");
+			return -EINVAL;
+		}
+	}
+
+	ring->cbs_enable = enable;
+	ring->idleslope = idleslope;
+	ring->sendslope = sendslope;
+	ring->hicredit = hicredit;
+	ring->locredit = locredit;
+
+	return 0;
+}
+
+static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
+			      struct tc_cbs_qopt_offload *qopt)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int err;
+
+	if (hw->mac.type != igc_i225)
+		return -EOPNOTSUPP;
+
+	if (qopt->queue < 0 || qopt->queue > 1)
+		return -EINVAL;
+
+	err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
+				  qopt->idleslope, qopt->sendslope,
+				  qopt->hicredit, qopt->locredit);
+	if (err)
+		return err;
+
+	return igc_tsn_offload_apply(adapter);
+}
+
+static int igc_tc_query_caps(struct igc_adapter *adapter,
+			     struct tc_query_caps_base *base)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	switch (base->type) {
+	case TC_SETUP_QDISC_TAPRIO: {
+		struct tc_taprio_caps *caps = base->caps;
+
+		caps->broken_mqprio = true;
+
+		if (hw->mac.type == igc_i225) {
+			caps->supports_queue_max_sdu = true;
+			caps->gate_mask_per_txq = true;
+		}
+
+		return 0;
+	}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			void *type_data)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	adapter->tc_setup_type = type;
+
+	switch (type) {
+	case TC_QUERY_CAPS:
+		return igc_tc_query_caps(adapter, type_data);
+	case TC_SETUP_QDISC_TAPRIO:
+		return igc_tsn_enable_qbv_scheduling(adapter, type_data);
+
+	case TC_SETUP_QDISC_ETF:
+		return igc_tsn_enable_launchtime(adapter, type_data);
+
+	case TC_SETUP_QDISC_CBS:
+		return igc_tsn_enable_cbs(adapter, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	switch (bpf->command) {
+	case XDP_SETUP_PROG:
+		return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
+	case XDP_SETUP_XSK_POOL:
+		return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
+					  bpf->xsk.queue_id);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int igc_xdp_xmit(struct net_device *dev, int num_frames,
+			struct xdp_frame **frames, u32 flags)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+	int cpu = smp_processor_id();
+	struct netdev_queue *nq;
+	struct igc_ring *ring;
+	int i, drops;
+
+	if (unlikely(!netif_carrier_ok(dev)))
+		return -ENETDOWN;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	ring = igc_xdp_get_tx_ring(adapter, cpu);
+	nq = txring_txq(ring);
+
+	__netif_tx_lock(nq, cpu);
+
+	/* Avoid transmit queue timeout since we share it with the slow path */
+	txq_trans_cond_update(nq);
+
+	drops = 0;
+	for (i = 0; i < num_frames; i++) {
+		int err;
+		struct xdp_frame *xdpf = frames[i];
+
+		err = igc_xdp_init_tx_descriptor(ring, xdpf);
+		if (err) {
+			xdp_return_frame_rx_napi(xdpf);
+			drops++;
+		}
+	}
+
+	if (flags & XDP_XMIT_FLUSH)
+		igc_flush_tx_descriptors(ring);
+
+	__netif_tx_unlock(nq);
+
+	return num_frames - drops;
+}
+
+static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
+					struct igc_q_vector *q_vector)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 eics = 0;
+
+	eics |= q_vector->eims_value;
+	wr32(IGC_EICS, eics);
+}
+
+int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+	struct igc_q_vector *q_vector;
+	struct igc_ring *ring;
+
+	if (test_bit(__IGC_DOWN, &adapter->state))
+		return -ENETDOWN;
+
+	if (!igc_xdp_is_enabled(adapter))
+		return -ENXIO;
+
+	if (queue_id >= adapter->num_rx_queues)
+		return -EINVAL;
+
+	ring = adapter->rx_ring[queue_id];
+
+	if (!ring->xsk_pool)
+		return -ENXIO;
+
+	q_vector = adapter->q_vector[queue_id];
+	if (!napi_if_scheduled_mark_missed(&q_vector->napi))
+		igc_trigger_rxtxq_interrupt(adapter, q_vector);
+
+	return 0;
+}
+
+static const struct net_device_ops igc_netdev_ops = {
+	.ndo_open		= igc_open,
+	.ndo_stop		= igc_close,
+	.ndo_start_xmit		= igc_xmit_frame,
+	.ndo_set_rx_mode	= igc_set_rx_mode,
+	.ndo_set_mac_address	= igc_set_mac,
+	.ndo_change_mtu		= igc_change_mtu,
+	.ndo_tx_timeout		= igc_tx_timeout,
+	.ndo_get_stats64	= igc_get_stats64,
+	.ndo_fix_features	= igc_fix_features,
+	.ndo_set_features	= igc_set_features,
+	.ndo_features_check	= igc_features_check,
+	.ndo_eth_ioctl		= igc_ioctl,
+	.ndo_setup_tc		= igc_setup_tc,
+	.ndo_bpf		= igc_bpf,
+	.ndo_xdp_xmit		= igc_xdp_xmit,
+	.ndo_xsk_wakeup		= igc_xsk_wakeup,
+};
+
+/* PCIe configuration access */
+void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	pci_read_config_word(adapter->pdev, reg, value);
+}
+
+void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	pci_write_config_word(adapter->pdev, reg, *value);
+}
+
+s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	if (!pci_is_pcie(adapter->pdev))
+		return -IGC_ERR_CONFIG;
+
+	pcie_capability_read_word(adapter->pdev, reg, value);
+
+	return IGC_SUCCESS;
+}
+
+s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	if (!pci_is_pcie(adapter->pdev))
+		return -IGC_ERR_CONFIG;
+
+	pcie_capability_write_word(adapter->pdev, reg, *value);
+
+	return IGC_SUCCESS;
+}
+
+u32 igc_rd32(struct igc_hw *hw, u32 reg)
+{
+	struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
+	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+	u32 value = 0;
+
+	if (IGC_REMOVED(hw_addr))
+		return ~value;
+
+	value = readl(&hw_addr[reg]);
+
+	/* reads should not return all F's */
+	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
+		struct net_device *netdev = igc->netdev;
+
+		hw->hw_addr = NULL;
+		netif_device_detach(netdev);
+		netdev_err(netdev, "PCIe link lost, device now detached\n");
+		WARN(pci_device_is_present(igc->pdev),
+		     "igc: Failed to read reg 0x%x!\n", reg);
+	}
+
+	return value;
+}
+
+/* Mapping HW RSS Type to enum xdp_rss_hash_type */
+static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = {
+	[IGC_RSS_TYPE_NO_HASH]		= XDP_RSS_TYPE_L2,
+	[IGC_RSS_TYPE_HASH_TCP_IPV4]	= XDP_RSS_TYPE_L4_IPV4_TCP,
+	[IGC_RSS_TYPE_HASH_IPV4]	= XDP_RSS_TYPE_L3_IPV4,
+	[IGC_RSS_TYPE_HASH_TCP_IPV6]	= XDP_RSS_TYPE_L4_IPV6_TCP,
+	[IGC_RSS_TYPE_HASH_IPV6_EX]	= XDP_RSS_TYPE_L3_IPV6_EX,
+	[IGC_RSS_TYPE_HASH_IPV6]	= XDP_RSS_TYPE_L3_IPV6,
+	[IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
+	[IGC_RSS_TYPE_HASH_UDP_IPV4]	= XDP_RSS_TYPE_L4_IPV4_UDP,
+	[IGC_RSS_TYPE_HASH_UDP_IPV6]	= XDP_RSS_TYPE_L4_IPV6_UDP,
+	[IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX,
+	[10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
+	[11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask   */
+	[12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons       */
+	[13] = XDP_RSS_TYPE_NONE,
+	[14] = XDP_RSS_TYPE_NONE,
+	[15] = XDP_RSS_TYPE_NONE,
+};
+
+static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
+			   enum xdp_rss_hash_type *rss_type)
+{
+	const struct igc_xdp_buff *ctx = (void *)_ctx;
+
+	if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))
+		return -ENODATA;
+
+	*hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss);
+	*rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)];
+
+	return 0;
+}
+
+static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
+{
+	const struct igc_xdp_buff *ctx = (void *)_ctx;
+
+	if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) {
+		*timestamp = ctx->rx_ts;
+
+		return 0;
+	}
+
+	return -ENODATA;
+}
+
+static const struct xdp_metadata_ops igc_xdp_metadata_ops = {
+	.xmo_rx_hash			= igc_xdp_rx_hash,
+	.xmo_rx_timestamp		= igc_xdp_rx_timestamp,
+};
+
+static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
+{
+	struct igc_adapter *adapter = container_of(timer, struct igc_adapter,
+						   hrtimer);
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+	adapter->qbv_transition = true;
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+		if (tx_ring->admin_gate_closed) {
+			tx_ring->admin_gate_closed = false;
+			tx_ring->oper_gate_closed = true;
+		} else {
+			tx_ring->oper_gate_closed = false;
+		}
+	}
+	adapter->qbv_transition = false;
+
+	spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * igc_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in igc_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * igc_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int igc_probe(struct pci_dev *pdev,
+		     const struct pci_device_id *ent)
+{
+	struct igc_adapter *adapter;
+	struct net_device *netdev;
+	struct igc_hw *hw;
+	const struct igc_info *ei = igc_info_tbl[ent->driver_data];
+	int err;
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"No usable DMA configuration, aborting\n");
+		goto err_dma;
+	}
+
+	err = pci_request_mem_regions(pdev, igc_driver_name);
+	if (err)
+		goto err_pci_reg;
+
+	err = pci_enable_ptm(pdev, NULL);
+	if (err < 0)
+		dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
+
+	pci_set_master(pdev);
+
+	err = -ENOMEM;
+	netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
+				   IGC_MAX_TX_QUEUES);
+
+	if (!netdev)
+		goto err_alloc_etherdev;
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	pci_set_drvdata(pdev, netdev);
+	adapter = netdev_priv(netdev);
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->port_num = hw->bus.func;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	err = pci_save_state(pdev);
+	if (err)
+		goto err_ioremap;
+
+	err = -EIO;
+	adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
+				   pci_resource_len(pdev, 0));
+	if (!adapter->io_addr)
+		goto err_ioremap;
+
+	/* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
+	hw->hw_addr = adapter->io_addr;
+
+	netdev->netdev_ops = &igc_netdev_ops;
+	netdev->xdp_metadata_ops = &igc_xdp_metadata_ops;
+	igc_ethtool_set_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+
+	netdev->mem_start = pci_resource_start(pdev, 0);
+	netdev->mem_end = pci_resource_end(pdev, 0);
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->revision_id = pdev->revision;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	/* Copy the default MAC and PHY function pointers */
+	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
+	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
+
+	/* Initialize skew-specific constants */
+	err = ei->get_invariants(hw);
+	if (err)
+		goto err_sw_init;
+
+	/* Add supported features to the features list*/
+	netdev->features |= NETIF_F_SG;
+	netdev->features |= NETIF_F_TSO;
+	netdev->features |= NETIF_F_TSO6;
+	netdev->features |= NETIF_F_TSO_ECN;
+	netdev->features |= NETIF_F_RXHASH;
+	netdev->features |= NETIF_F_RXCSUM;
+	netdev->features |= NETIF_F_HW_CSUM;
+	netdev->features |= NETIF_F_SCTP_CRC;
+	netdev->features |= NETIF_F_HW_TC;
+
+#define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+				  NETIF_F_GSO_GRE_CSUM | \
+				  NETIF_F_GSO_IPXIP4 | \
+				  NETIF_F_GSO_IPXIP6 | \
+				  NETIF_F_GSO_UDP_TUNNEL | \
+				  NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
+	netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
+
+	/* setup the private structure */
+	err = igc_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= NETIF_F_NTUPLE;
+	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
+	netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
+	netdev->hw_features |= netdev->features;
+
+	netdev->features |= NETIF_F_HIGHDMA;
+
+	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
+	netdev->mpls_features |= NETIF_F_HW_CSUM;
+	netdev->hw_enc_features |= netdev->vlan_features;
+
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+			       NETDEV_XDP_ACT_XSK_ZEROCOPY;
+
+	/* MTU range: 68 - 9216 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
+	/* before reading the NVM, reset the controller to put the device in a
+	 * known good starting state
+	 */
+	hw->mac.ops.reset_hw(hw);
+
+	if (igc_get_flash_presence_i225(hw)) {
+		if (hw->nvm.ops.validate(hw) < 0) {
+			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
+			err = -EIO;
+			goto err_eeprom;
+		}
+	}
+
+	if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
+		/* copy the MAC address out of the NVM */
+		if (hw->mac.ops.read_mac_addr(hw))
+			dev_err(&pdev->dev, "NVM Read Error\n");
+	}
+
+	eth_hw_addr_set(netdev, hw->mac.addr);
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		dev_err(&pdev->dev, "Invalid MAC Address\n");
+		err = -EIO;
+		goto err_eeprom;
+	}
+
+	/* configure RXPBSIZE and TXPBSIZE */
+	wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
+	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+
+	timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
+	timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
+
+	INIT_WORK(&adapter->reset_task, igc_reset_task);
+	INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
+
+	hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	adapter->hrtimer.function = &igc_qbv_scheduling_timer;
+
+	/* Initialize link properties that are user-changeable */
+	adapter->fc_autoneg = true;
+	hw->mac.autoneg = true;
+	hw->phy.autoneg_advertised = 0xaf;
+
+	hw->fc.requested_mode = igc_fc_default;
+	hw->fc.current_mode = igc_fc_default;
+
+	/* By default, support wake on port A */
+	adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
+
+	/* initialize the wol settings based on the eeprom settings */
+	if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
+		adapter->wol |= IGC_WUFC_MAG;
+
+	device_set_wakeup_enable(&adapter->pdev->dev,
+				 adapter->flags & IGC_FLAG_WOL_SUPPORTED);
+
+	igc_ptp_init(adapter);
+
+	igc_tsn_clear_schedule(adapter);
+
+	/* reset the hardware with the new settings */
+	igc_reset(adapter);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igc_get_hw_control(adapter);
+
+	strncpy(netdev->name, "eth%d", IFNAMSIZ);
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	 /* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+	/* Check if Media Autosense is enabled */
+	adapter->ei = *ei;
+
+	/* print pcie link status and MAC address */
+	pcie_print_link_status(pdev);
+	netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
+
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+	/* Disable EEE for internal PHY devices */
+	hw->dev_spec._base.eee_enable = false;
+	adapter->flags &= ~IGC_FLAG_EEE;
+	igc_set_eee_i225(hw, false, false, false);
+
+	pm_runtime_put_noidle(&pdev->dev);
+
+	return 0;
+
+err_register:
+	igc_release_hw_control(adapter);
+err_eeprom:
+	if (!igc_check_reset_block(hw))
+		igc_reset_phy(hw);
+err_sw_init:
+	igc_clear_interrupt_scheme(adapter);
+	iounmap(adapter->io_addr);
+err_ioremap:
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_mem_regions(pdev);
+err_pci_reg:
+err_dma:
+	pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * igc_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * igc_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ */
+static void igc_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	pm_runtime_get_noresume(&pdev->dev);
+
+	igc_flush_nfc_rules(adapter);
+
+	igc_ptp_stop(adapter);
+
+	pci_disable_ptm(pdev);
+	pci_clear_master(pdev);
+
+	set_bit(__IGC_DOWN, &adapter->state);
+
+	del_timer_sync(&adapter->watchdog_timer);
+	del_timer_sync(&adapter->phy_info_timer);
+
+	cancel_work_sync(&adapter->reset_task);
+	cancel_work_sync(&adapter->watchdog_task);
+	hrtimer_cancel(&adapter->hrtimer);
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	igc_release_hw_control(adapter);
+	unregister_netdev(netdev);
+
+	igc_clear_interrupt_scheme(adapter);
+	pci_iounmap(pdev, adapter->io_addr);
+	pci_release_mem_regions(pdev);
+
+	free_netdev(netdev);
+
+	pci_disable_device(pdev);
+}
+
+static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
+			  bool runtime)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl, rctl, status;
+	bool wake;
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		__igc_close(netdev, true);
+
+	igc_ptp_suspend(adapter);
+
+	igc_clear_interrupt_scheme(adapter);
+	rtnl_unlock();
+
+	status = rd32(IGC_STATUS);
+	if (status & IGC_STATUS_LU)
+		wufc &= ~IGC_WUFC_LNKC;
+
+	if (wufc) {
+		igc_setup_rctl(adapter);
+		igc_set_rx_mode(netdev);
+
+		/* turn on all-multi mode if wake on multicast is enabled */
+		if (wufc & IGC_WUFC_MC) {
+			rctl = rd32(IGC_RCTL);
+			rctl |= IGC_RCTL_MPE;
+			wr32(IGC_RCTL, rctl);
+		}
+
+		ctrl = rd32(IGC_CTRL);
+		ctrl |= IGC_CTRL_ADVD3WUC;
+		wr32(IGC_CTRL, ctrl);
+
+		/* Allow time for pending master requests to run */
+		igc_disable_pcie_master(hw);
+
+		wr32(IGC_WUC, IGC_WUC_PME_EN);
+		wr32(IGC_WUFC, wufc);
+	} else {
+		wr32(IGC_WUC, 0);
+		wr32(IGC_WUFC, 0);
+	}
+
+	wake = wufc || adapter->en_mng_pt;
+	if (!wake)
+		igc_power_down_phy_copper_base(&adapter->hw);
+	else
+		igc_power_up_link(adapter);
+
+	if (enable_wake)
+		*enable_wake = wake;
+
+	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
+	 * would have already happened in close and is redundant.
+	 */
+	igc_release_hw_control(adapter);
+
+	pci_disable_device(pdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int __maybe_unused igc_runtime_suspend(struct device *dev)
+{
+	return __igc_shutdown(to_pci_dev(dev), NULL, 1);
+}
+
+static void igc_deliver_wake_packet(struct net_device *netdev)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	struct sk_buff *skb;
+	u32 wupl;
+
+	wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
+
+	/* WUPM stores only the first 128 bytes of the wake packet.
+	 * Read the packet only if we have the whole thing.
+	 */
+	if (wupl == 0 || wupl > IGC_WUPM_BYTES)
+		return;
+
+	skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
+	if (!skb)
+		return;
+
+	skb_put(skb, wupl);
+
+	/* Ensure reads are 32-bit aligned */
+	wupl = roundup(wupl, 4);
+
+	memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
+
+	skb->protocol = eth_type_trans(skb, netdev);
+	netif_rx(skb);
+}
+
+static int __maybe_unused igc_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	u32 err, val;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+
+	if (!pci_device_is_present(pdev))
+		return -ENODEV;
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		netdev_err(netdev, "Cannot enable PCI device from suspend\n");
+		return err;
+	}
+	pci_set_master(pdev);
+
+	pci_enable_wake(pdev, PCI_D3hot, 0);
+	pci_enable_wake(pdev, PCI_D3cold, 0);
+
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		netdev_err(netdev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	igc_reset(adapter);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igc_get_hw_control(adapter);
+
+	val = rd32(IGC_WUS);
+	if (val & WAKE_PKT_WUS)
+		igc_deliver_wake_packet(netdev);
+
+	wr32(IGC_WUS, ~0);
+
+	rtnl_lock();
+	if (!err && netif_running(netdev))
+		err = __igc_open(netdev, true);
+
+	if (!err)
+		netif_device_attach(netdev);
+	rtnl_unlock();
+
+	return err;
+}
+
+static int __maybe_unused igc_runtime_resume(struct device *dev)
+{
+	return igc_resume(dev);
+}
+
+static int __maybe_unused igc_suspend(struct device *dev)
+{
+	return __igc_shutdown(to_pci_dev(dev), NULL, 0);
+}
+
+static int __maybe_unused igc_runtime_idle(struct device *dev)
+{
+	struct net_device *netdev = dev_get_drvdata(dev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	if (!igc_has_link(adapter))
+		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
+
+	return -EBUSY;
+}
+#endif /* CONFIG_PM */
+
+static void igc_shutdown(struct pci_dev *pdev)
+{
+	bool wake;
+
+	__igc_shutdown(pdev, &wake, 0);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, wake);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+/**
+ *  igc_io_error_detected - called when PCI error is detected
+ *  @pdev: Pointer to PCI device
+ *  @state: The current PCI connection state
+ *
+ *  This function is called after a PCI bus error affecting
+ *  this device has been detected.
+ **/
+static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
+					      pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	netif_device_detach(netdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (netif_running(netdev))
+		igc_down(adapter);
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ *  igc_io_slot_reset - called after the PCI bus has been reset.
+ *  @pdev: Pointer to PCI device
+ *
+ *  Restart the card from scratch, as if from a cold-boot. Implementation
+ *  resembles the first-half of the igc_resume routine.
+ **/
+static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct igc_hw *hw = &adapter->hw;
+	pci_ers_result_t result;
+
+	if (pci_enable_device_mem(pdev)) {
+		netdev_err(netdev, "Could not re-enable PCI device after reset\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+
+		pci_enable_wake(pdev, PCI_D3hot, 0);
+		pci_enable_wake(pdev, PCI_D3cold, 0);
+
+		/* In case of PCI error, adapter loses its HW address
+		 * so we should re-assign it here.
+		 */
+		hw->hw_addr = adapter->io_addr;
+
+		igc_reset(adapter);
+		wr32(IGC_WUS, ~0);
+		result = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	return result;
+}
+
+/**
+ *  igc_io_resume - called when traffic can start to flow again.
+ *  @pdev: Pointer to PCI device
+ *
+ *  This callback is called when the error recovery driver tells us that
+ *  its OK to resume normal operation. Implementation resembles the
+ *  second-half of the igc_resume routine.
+ */
+static void igc_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct igc_adapter *adapter = netdev_priv(netdev);
+
+	rtnl_lock();
+	if (netif_running(netdev)) {
+		if (igc_open(netdev)) {
+			netdev_err(netdev, "igc_open failed after reset\n");
+			return;
+		}
+	}
+
+	netif_device_attach(netdev);
+
+	/* let the f/w know that the h/w is now under the control of the
+	 * driver.
+	 */
+	igc_get_hw_control(adapter);
+	rtnl_unlock();
+}
+
+static const struct pci_error_handlers igc_err_handler = {
+	.error_detected = igc_io_error_detected,
+	.slot_reset = igc_io_slot_reset,
+	.resume = igc_io_resume,
+};
+
+#ifdef CONFIG_PM
+static const struct dev_pm_ops igc_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
+	SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
+			   igc_runtime_idle)
+};
+#endif
+
+static struct pci_driver igc_driver = {
+	.name     = igc_driver_name,
+	.id_table = igc_pci_tbl,
+	.probe    = igc_probe,
+	.remove   = igc_remove,
+#ifdef CONFIG_PM
+	.driver.pm = &igc_pm_ops,
+#endif
+	.shutdown = igc_shutdown,
+	.err_handler = &igc_err_handler,
+};
+
+/**
+ * igc_reinit_queues - return error
+ * @adapter: pointer to adapter structure
+ */
+int igc_reinit_queues(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err = 0;
+
+	if (netif_running(netdev))
+		igc_close(netdev);
+
+	igc_reset_interrupt_capability(adapter);
+
+	if (igc_init_interrupt_scheme(adapter, true)) {
+		netdev_err(netdev, "Unable to allocate memory for queues\n");
+		return -ENOMEM;
+	}
+
+	if (netif_running(netdev))
+		err = igc_open(netdev);
+
+	return err;
+}
+
+/**
+ * igc_get_hw_dev - return device
+ * @hw: pointer to hardware structure
+ *
+ * used by hardware layer to print debugging information
+ */
+struct net_device *igc_get_hw_dev(struct igc_hw *hw)
+{
+	struct igc_adapter *adapter = hw->back;
+
+	return adapter->netdev;
+}
+
+static void igc_disable_rx_ring_hw(struct igc_ring *ring)
+{
+	struct igc_hw *hw = &ring->q_vector->adapter->hw;
+	u8 idx = ring->reg_idx;
+	u32 rxdctl;
+
+	rxdctl = rd32(IGC_RXDCTL(idx));
+	rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
+	rxdctl |= IGC_RXDCTL_SWFLUSH;
+	wr32(IGC_RXDCTL(idx), rxdctl);
+}
+
+void igc_disable_rx_ring(struct igc_ring *ring)
+{
+	igc_disable_rx_ring_hw(ring);
+	igc_clean_rx_ring(ring);
+}
+
+void igc_enable_rx_ring(struct igc_ring *ring)
+{
+	struct igc_adapter *adapter = ring->q_vector->adapter;
+
+	igc_configure_rx_ring(adapter, ring);
+
+	if (ring->xsk_pool)
+		igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
+	else
+		igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+}
+
+void igc_disable_tx_ring(struct igc_ring *ring)
+{
+	igc_disable_tx_ring_hw(ring);
+	igc_clean_tx_ring(ring);
+}
+
+void igc_enable_tx_ring(struct igc_ring *ring)
+{
+	struct igc_adapter *adapter = ring->q_vector->adapter;
+
+	igc_configure_tx_ring(adapter, ring);
+}
+
+/**
+ * igc_init_module - Driver Registration Routine
+ *
+ * igc_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ */
+static int __init igc_init_module(void)
+{
+	int ret;
+
+	pr_info("%s\n", igc_driver_string);
+	pr_info("%s\n", igc_copyright);
+
+	ret = pci_register_driver(&igc_driver);
+	return ret;
+}
+
+module_init(igc_init_module);
+
+/**
+ * igc_exit_module - Driver Exit Cleanup Routine
+ *
+ * igc_exit_module is called just before the driver is removed
+ * from memory.
+ */
+static void __exit igc_exit_module(void)
+{
+	pci_unregister_driver(&igc_driver);
+}
+
+module_exit(igc_exit_module);
+/* igc_main.c */
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c
new file mode 100644
index 0000000000..58f81aba01
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include "igc_mac.h"
+#include "igc_nvm.h"
+
+/**
+ * igc_poll_eerd_eewr_done - Poll for EEPROM read/write completion
+ * @hw: pointer to the HW structure
+ * @ee_reg: EEPROM flag for polling
+ *
+ * Polls the EEPROM status bit for either read or write completion based
+ * upon the value of 'ee_reg'.
+ */
+static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg)
+{
+	s32 ret_val = -IGC_ERR_NVM;
+	u32 attempts = 100000;
+	u32 i, reg = 0;
+
+	for (i = 0; i < attempts; i++) {
+		if (ee_reg == IGC_NVM_POLL_READ)
+			reg = rd32(IGC_EERD);
+		else
+			reg = rd32(IGC_EEWR);
+
+		if (reg & IGC_NVM_RW_REG_DONE) {
+			ret_val = 0;
+			break;
+		}
+
+		udelay(5);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_acquire_nvm - Generic request for access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Set the EEPROM access request bit and wait for EEPROM access grant bit.
+ * Return successful if access grant bit set, else clear the request for
+ * EEPROM access and return -IGC_ERR_NVM (-1).
+ */
+s32 igc_acquire_nvm(struct igc_hw *hw)
+{
+	s32 timeout = IGC_NVM_GRANT_ATTEMPTS;
+	u32 eecd = rd32(IGC_EECD);
+	s32 ret_val = 0;
+
+	wr32(IGC_EECD, eecd | IGC_EECD_REQ);
+	eecd = rd32(IGC_EECD);
+
+	while (timeout) {
+		if (eecd & IGC_EECD_GNT)
+			break;
+		udelay(5);
+		eecd = rd32(IGC_EECD);
+		timeout--;
+	}
+
+	if (!timeout) {
+		eecd &= ~IGC_EECD_REQ;
+		wr32(IGC_EECD, eecd);
+		hw_dbg("Could not acquire NVM grant\n");
+		ret_val = -IGC_ERR_NVM;
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_release_nvm - Release exclusive access to EEPROM
+ * @hw: pointer to the HW structure
+ *
+ * Stop any current commands to the EEPROM and clear the EEPROM request bit.
+ */
+void igc_release_nvm(struct igc_hw *hw)
+{
+	u32 eecd;
+
+	eecd = rd32(IGC_EECD);
+	eecd &= ~IGC_EECD_REQ;
+	wr32(IGC_EECD, eecd);
+}
+
+/**
+ * igc_read_nvm_eerd - Reads EEPROM using EERD register
+ * @hw: pointer to the HW structure
+ * @offset: offset of word in the EEPROM to read
+ * @words: number of words to read
+ * @data: word read from the EEPROM
+ *
+ * Reads a 16 bit word from the EEPROM using the EERD register.
+ */
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data)
+{
+	struct igc_nvm_info *nvm = &hw->nvm;
+	u32 i, eerd = 0;
+	s32 ret_val = 0;
+
+	/* A check for invalid values:  offset too large, too many words,
+	 * and not enough words.
+	 */
+	if (offset >= nvm->word_size || (words > (nvm->word_size - offset)) ||
+	    words == 0) {
+		hw_dbg("nvm parameter(s) out of bounds\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset + i) << IGC_NVM_RW_ADDR_SHIFT) +
+			IGC_NVM_RW_REG_START;
+
+		wr32(IGC_EERD, eerd);
+		ret_val = igc_poll_eerd_eewr_done(hw, IGC_NVM_POLL_READ);
+		if (ret_val)
+			break;
+
+		data[i] = (rd32(IGC_EERD) >> IGC_NVM_RW_REG_DATA);
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_read_mac_addr - Read device MAC address
+ * @hw: pointer to the HW structure
+ */
+s32 igc_read_mac_addr(struct igc_hw *hw)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = rd32(IGC_RAH(0));
+	rar_low = rd32(IGC_RAL(0));
+
+	for (i = 0; i < IGC_RAL_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i] = (u8)(rar_low >> (i * 8));
+
+	for (i = 0; i < IGC_RAH_MAC_ADDR_LEN; i++)
+		hw->mac.perm_addr[i + 4] = (u8)(rar_high >> (i * 8));
+
+	for (i = 0; i < ETH_ALEN; i++)
+		hw->mac.addr[i] = hw->mac.perm_addr[i];
+
+	return 0;
+}
+
+/**
+ * igc_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Calculates the EEPROM checksum by reading/adding each word of the EEPROM
+ * and then verifies that the sum of the EEPROM is equal to 0xBABA.
+ */
+s32 igc_validate_nvm_checksum(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, nvm_data;
+	s32 ret_val = 0;
+
+	for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+
+	if (checksum != (u16)NVM_SUM) {
+		hw_dbg("NVM Checksum Invalid\n");
+		ret_val = -IGC_ERR_NVM;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_update_nvm_checksum - Update EEPROM checksum
+ * @hw: pointer to the HW structure
+ *
+ * Updates the EEPROM checksum by reading/adding each word of the EEPROM
+ * up to the checksum.  Then calculates the EEPROM checksum and writes the
+ * value to the EEPROM.
+ */
+s32 igc_update_nvm_checksum(struct igc_hw *hw)
+{
+	u16 checksum = 0;
+	u16 i, nvm_data;
+	s32  ret_val;
+
+	for (i = 0; i < NVM_CHECKSUM_REG; i++) {
+		ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data);
+		if (ret_val) {
+			hw_dbg("NVM Read Error while updating checksum.\n");
+			goto out;
+		}
+		checksum += nvm_data;
+	}
+	checksum = (u16)NVM_SUM - checksum;
+	ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum);
+	if (ret_val)
+		hw_dbg("NVM Write Error while updating checksum.\n");
+
+out:
+	return ret_val;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h
new file mode 100644
index 0000000000..f9fc2e9cfb
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_NVM_H_
+#define _IGC_NVM_H_
+
+s32 igc_acquire_nvm(struct igc_hw *hw);
+void igc_release_nvm(struct igc_hw *hw);
+s32 igc_read_mac_addr(struct igc_hw *hw);
+s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data);
+s32 igc_validate_nvm_checksum(struct igc_hw *hw);
+s32 igc_update_nvm_checksum(struct igc_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c
new file mode 100644
index 0000000000..53b77c969c
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.c
@@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2018 Intel Corporation */
+
+#include "igc_phy.h"
+
+/**
+ * igc_check_reset_block - Check if PHY reset is blocked
+ * @hw: pointer to the HW structure
+ *
+ * Read the PHY management control register and check whether a PHY reset
+ * is blocked.  If a reset is not blocked return 0, otherwise
+ * return IGC_ERR_BLK_PHY_RESET (12).
+ */
+s32 igc_check_reset_block(struct igc_hw *hw)
+{
+	u32 manc;
+
+	manc = rd32(IGC_MANC);
+
+	return (manc & IGC_MANC_BLK_PHY_RST_ON_IDE) ?
+		IGC_ERR_BLK_PHY_RESET : 0;
+}
+
+/**
+ * igc_get_phy_id - Retrieve the PHY ID and revision
+ * @hw: pointer to the HW structure
+ *
+ * Reads the PHY registers and stores the PHY ID and possibly the PHY
+ * revision in the hardware structure.
+ */
+s32 igc_get_phy_id(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	s32 ret_val = 0;
+	u16 phy_id;
+
+	ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id = (u32)(phy_id << 16);
+	usleep_range(200, 500);
+	ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id);
+	if (ret_val)
+		goto out;
+
+	phy->id |= (u32)(phy_id & PHY_REVISION_MASK);
+	phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_phy_has_link - Polls PHY for link
+ * @hw: pointer to the HW structure
+ * @iterations: number of times to poll for link
+ * @usec_interval: delay between polling attempts
+ * @success: pointer to whether polling was successful or not
+ *
+ * Polls the PHY status register for link, 'iterations' number of times.
+ */
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+		     u32 usec_interval, bool *success)
+{
+	u16 i, phy_status;
+	s32 ret_val = 0;
+
+	for (i = 0; i < iterations; i++) {
+		/* Some PHYs require the PHY_STATUS register to be read
+		 * twice due to the link bit being sticky.  No harm doing
+		 * it across the board.
+		 */
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val && usec_interval > 0) {
+			/* If the first read fails, another entity may have
+			 * ownership of the resources, wait and try again to
+			 * see if they have relinquished the resources yet.
+			 */
+			if (usec_interval >= 1000)
+				mdelay(usec_interval / 1000);
+			else
+				udelay(usec_interval);
+		}
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_LINK_STATUS)
+			break;
+		if (usec_interval >= 1000)
+			mdelay(usec_interval / 1000);
+		else
+			udelay(usec_interval);
+	}
+
+	*success = (i < iterations) ? true : false;
+
+	return ret_val;
+}
+
+/**
+ * igc_power_up_phy_copper - Restore copper link in case of PHY power down
+ * @hw: pointer to the HW structure
+ *
+ * In the case of a PHY power down to save power, or to turn off link during a
+ * driver unload, restore the link to previous settings.
+ */
+void igc_power_up_phy_copper(struct igc_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg &= ~MII_CR_POWER_DOWN;
+	hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);
+}
+
+/**
+ * igc_power_down_phy_copper - Power down copper PHY
+ * @hw: pointer to the HW structure
+ *
+ * Power down PHY to save power when interface is down and wake on lan
+ * is not enabled.
+ */
+void igc_power_down_phy_copper(struct igc_hw *hw)
+{
+	u16 mii_reg = 0;
+
+	/* The PHY will retain its settings across a power down/up cycle */
+	hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg);
+	mii_reg |= MII_CR_POWER_DOWN;
+
+	/* Temporary workaround - should be removed when PHY will implement
+	 * IEEE registers as properly
+	 */
+	/* hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg);*/
+	usleep_range(1000, 2000);
+}
+
+/**
+ * igc_check_downshift - Checks whether a downshift in speed occurred
+ * @hw: pointer to the HW structure
+ *
+ * A downshift is detected by querying the PHY link health.
+ */
+void igc_check_downshift(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+
+	/* speed downshift not supported */
+	phy->speed_downgraded = false;
+}
+
+/**
+ * igc_phy_hw_reset - PHY hardware reset
+ * @hw: pointer to the HW structure
+ *
+ * Verify the reset block is not blocking us from resetting.  Acquire
+ * semaphore (if necessary) and read/set/write the device control reset
+ * bit in the PHY.  Wait the appropriate delay time for the device to
+ * reset and release the semaphore (if necessary).
+ */
+s32 igc_phy_hw_reset(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u32 phpm = 0, timeout = 10000;
+	s32  ret_val;
+	u32 ctrl;
+
+	ret_val = igc_check_reset_block(hw);
+	if (ret_val) {
+		ret_val = 0;
+		goto out;
+	}
+
+	ret_val = phy->ops.acquire(hw);
+	if (ret_val)
+		goto out;
+
+	phpm = rd32(IGC_I225_PHPM);
+
+	ctrl = rd32(IGC_CTRL);
+	wr32(IGC_CTRL, ctrl | IGC_CTRL_PHY_RST);
+	wrfl();
+
+	udelay(phy->reset_delay_us);
+
+	wr32(IGC_CTRL, ctrl);
+	wrfl();
+
+	/* SW should guarantee 100us for the completion of the PHY reset */
+	usleep_range(100, 150);
+	do {
+		phpm = rd32(IGC_I225_PHPM);
+		timeout--;
+		udelay(1);
+	} while (!(phpm & IGC_PHY_RST_COMP) && timeout);
+
+	if (!timeout)
+		hw_dbg("Timeout is expired after a phy reset\n");
+
+	usleep_range(100, 150);
+
+	phy->ops.release(hw);
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_phy_setup_autoneg - Configure PHY for auto-negotiation
+ * @hw: pointer to the HW structure
+ *
+ * Reads the MII auto-neg advertisement register and/or the 1000T control
+ * register and if the PHY is already setup for auto-negotiation, then
+ * return successful.  Otherwise, setup advertisement and flow control to
+ * the appropriate values for the wanted auto-negotiation.
+ */
+static s32 igc_phy_setup_autoneg(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 aneg_multigbt_an_ctrl = 0;
+	u16 mii_1000t_ctrl_reg = 0;
+	u16 mii_autoneg_adv_reg;
+	s32 ret_val;
+
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* Read the MII Auto-Neg Advertisement Register (Address 4). */
+	ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL) {
+		/* Read the MII 1000Base-T Control Register (Address 9). */
+		ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL,
+					    &mii_1000t_ctrl_reg);
+		if (ret_val)
+			return ret_val;
+	}
+
+	if (phy->autoneg_mask & ADVERTISE_2500_FULL) {
+		/* Read the MULTI GBT AN Control Register - reg 7.32 */
+		ret_val = phy->ops.read_reg(hw, (STANDARD_AN_REG_MASK <<
+					    MMD_DEVADDR_SHIFT) |
+					    ANEG_MULTIGBT_AN_CTRL,
+					    &aneg_multigbt_an_ctrl);
+
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Need to parse both autoneg_advertised and fc and set up
+	 * the appropriate PHY registers.  First we will parse for
+	 * autoneg_advertised software override.  Since we can advertise
+	 * a plethora of combinations, we need to check each bit
+	 * individually.
+	 */
+
+	/* First we clear all the 10/100 mb speed bits in the Auto-Neg
+	 * Advertisement Register (Address 4) and the 1000 mb speed bits in
+	 * the  1000Base-T Control Register (Address 9).
+	 */
+	mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS |
+				 NWAY_AR_100TX_HD_CAPS |
+				 NWAY_AR_10T_FD_CAPS   |
+				 NWAY_AR_10T_HD_CAPS);
+	mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS);
+
+	hw_dbg("autoneg_advertised %x\n", phy->autoneg_advertised);
+
+	/* Do we want to advertise 10 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_HALF) {
+		hw_dbg("Advertise 10mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS;
+	}
+
+	/* Do we want to advertise 10 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_10_FULL) {
+		hw_dbg("Advertise 10mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Half Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_HALF) {
+		hw_dbg("Advertise 100mb Half duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS;
+	}
+
+	/* Do we want to advertise 100 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_100_FULL) {
+		hw_dbg("Advertise 100mb Full duplex\n");
+		mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 1000 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_1000_HALF)
+		hw_dbg("Advertise 1000mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 1000 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_1000_FULL) {
+		hw_dbg("Advertise 1000mb Full duplex\n");
+		mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS;
+	}
+
+	/* We do not allow the Phy to advertise 2500 Mb Half Duplex */
+	if (phy->autoneg_advertised & ADVERTISE_2500_HALF)
+		hw_dbg("Advertise 2500mb Half duplex request denied!\n");
+
+	/* Do we want to advertise 2500 Mb Full Duplex? */
+	if (phy->autoneg_advertised & ADVERTISE_2500_FULL) {
+		hw_dbg("Advertise 2500mb Full duplex\n");
+		aneg_multigbt_an_ctrl |= CR_2500T_FD_CAPS;
+	} else {
+		aneg_multigbt_an_ctrl &= ~CR_2500T_FD_CAPS;
+	}
+
+	/* Check for a software override of the flow control settings, and
+	 * setup the PHY advertisement registers accordingly.  If
+	 * auto-negotiation is enabled, then software will have to set the
+	 * "PAUSE" bits to the correct value in the Auto-Negotiation
+	 * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto-
+	 * negotiation.
+	 *
+	 * The possible values of the "fc" parameter are:
+	 *      0:  Flow control is completely disabled
+	 *      1:  Rx flow control is enabled (we can receive pause frames
+	 *          but not send pause frames).
+	 *      2:  Tx flow control is enabled (we can send pause frames
+	 *          but we do not support receiving pause frames).
+	 *      3:  Both Rx and Tx flow control (symmetric) are enabled.
+	 *  other:  No software override.  The flow control configuration
+	 *          in the EEPROM is used.
+	 */
+	switch (hw->fc.current_mode) {
+	case igc_fc_none:
+		/* Flow control (Rx & Tx) is completely disabled by a
+		 * software over-ride.
+		 */
+		mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case igc_fc_rx_pause:
+		/* Rx Flow control is enabled, and Tx Flow control is
+		 * disabled, by a software over-ride.
+		 *
+		 * Since there really isn't a way to advertise that we are
+		 * capable of Rx Pause ONLY, we will advertise that we
+		 * support both symmetric and asymmetric Rx PAUSE.  Later
+		 * (in igc_config_fc_after_link_up) we will disable the
+		 * hw's ability to send PAUSE frames.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	case igc_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is
+		 * disabled, by a software over-ride.
+		 */
+		mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR;
+		mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE;
+		break;
+	case igc_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by a software
+		 * over-ride.
+		 */
+		mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE);
+		break;
+	default:
+		hw_dbg("Flow control param set incorrectly\n");
+		return -IGC_ERR_CONFIG;
+	}
+
+	ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg);
+	if (ret_val)
+		return ret_val;
+
+	hw_dbg("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg);
+
+	if (phy->autoneg_mask & ADVERTISE_1000_FULL)
+		ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL,
+					     mii_1000t_ctrl_reg);
+
+	if (phy->autoneg_mask & ADVERTISE_2500_FULL)
+		ret_val = phy->ops.write_reg(hw,
+					     (STANDARD_AN_REG_MASK <<
+					     MMD_DEVADDR_SHIFT) |
+					     ANEG_MULTIGBT_AN_CTRL,
+					     aneg_multigbt_an_ctrl);
+
+	return ret_val;
+}
+
+/**
+ * igc_wait_autoneg - Wait for auto-neg completion
+ * @hw: pointer to the HW structure
+ *
+ * Waits for auto-negotiation to complete or for the auto-negotiation time
+ * limit to expire, which ever happens first.
+ */
+static s32 igc_wait_autoneg(struct igc_hw *hw)
+{
+	u16 i, phy_status;
+	s32 ret_val = 0;
+
+	/* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */
+	for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) {
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status);
+		if (ret_val)
+			break;
+		if (phy_status & MII_SR_AUTONEG_COMPLETE)
+			break;
+		msleep(100);
+	}
+
+	/* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation
+	 * has completed.
+	 */
+	return ret_val;
+}
+
+/**
+ * igc_copper_link_autoneg - Setup/Enable autoneg for copper link
+ * @hw: pointer to the HW structure
+ *
+ * Performs initial bounds checking on autoneg advertisement parameter, then
+ * configure to advertise the full capability.  Setup the PHY to autoneg
+ * and restart the negotiation process between the link partner.  If
+ * autoneg_wait_to_complete, then wait for autoneg to complete before exiting.
+ */
+static s32 igc_copper_link_autoneg(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 phy_ctrl;
+	s32 ret_val;
+
+	/* Perform some bounds checking on the autoneg advertisement
+	 * parameter.
+	 */
+	phy->autoneg_advertised &= phy->autoneg_mask;
+
+	/* If autoneg_advertised is zero, we assume it was not defaulted
+	 * by the calling code so we set to advertise full capability.
+	 */
+	if (phy->autoneg_advertised == 0)
+		phy->autoneg_advertised = phy->autoneg_mask;
+
+	hw_dbg("Reconfiguring auto-neg advertisement params\n");
+	ret_val = igc_phy_setup_autoneg(hw);
+	if (ret_val) {
+		hw_dbg("Error Setting up Auto-Negotiation\n");
+		goto out;
+	}
+	hw_dbg("Restarting Auto-Neg\n");
+
+	/* Restart auto-negotiation by setting the Auto Neg Enable bit and
+	 * the Auto Neg Restart bit in the PHY control register.
+	 */
+	ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG);
+	ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl);
+	if (ret_val)
+		goto out;
+
+	/* Does the user want to wait for Auto-Neg to complete here, or
+	 * check at a later time (for example, callback routine).
+	 */
+	if (phy->autoneg_wait_to_complete) {
+		ret_val = igc_wait_autoneg(hw);
+		if (ret_val) {
+			hw_dbg("Error while waiting for autoneg to complete\n");
+			goto out;
+		}
+	}
+
+	hw->mac.get_link_status = true;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_setup_copper_link - Configure copper link settings
+ * @hw: pointer to the HW structure
+ *
+ * Calls the appropriate function to configure the link for auto-neg or forced
+ * speed and duplex.  Then we check for link, once link is established calls
+ * to configure collision distance and flow control are called.  If link is
+ * not established, we return -IGC_ERR_PHY (-2).
+ */
+s32 igc_setup_copper_link(struct igc_hw *hw)
+{
+	s32 ret_val = 0;
+	bool link;
+
+	if (hw->mac.autoneg) {
+		/* Setup autoneg and flow control advertisement and perform
+		 * autonegotiation.
+		 */
+		ret_val = igc_copper_link_autoneg(hw);
+		if (ret_val)
+			goto out;
+	} else {
+		/* PHY will be set to 10H, 10F, 100H or 100F
+		 * depending on user settings.
+		 */
+		hw_dbg("Forcing Speed and Duplex\n");
+		ret_val = hw->phy.ops.force_speed_duplex(hw);
+		if (ret_val) {
+			hw_dbg("Error Forcing Speed and Duplex\n");
+			goto out;
+		}
+	}
+
+	/* Check link status. Wait up to 100 microseconds for link to become
+	 * valid.
+	 */
+	ret_val = igc_phy_has_link(hw, COPPER_LINK_UP_LIMIT, 10, &link);
+	if (ret_val)
+		goto out;
+
+	if (link) {
+		hw_dbg("Valid link established!!!\n");
+		igc_config_collision_dist(hw);
+		ret_val = igc_config_fc_after_link_up(hw);
+	} else {
+		hw_dbg("Unable to establish link!!!\n");
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_mdic - Read MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to be read
+ * @data: pointer to the read data
+ *
+ * Reads the MDI control register in the PHY at offset and stores the
+ * information read to data.
+ */
+static s32 igc_read_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -IGC_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to retrieve the desired data.
+	 */
+	mdic = ((offset << IGC_MDIC_REG_SHIFT) |
+		(phy->addr << IGC_MDIC_PHY_SHIFT) |
+		(IGC_MDIC_OP_READ));
+
+	wr32(IGC_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+		udelay(50);
+		mdic = rd32(IGC_MDIC);
+		if (mdic & IGC_MDIC_READY)
+			break;
+	}
+	if (!(mdic & IGC_MDIC_READY)) {
+		hw_dbg("MDI Read did not complete\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	if (mdic & IGC_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	*data = (u16)mdic;
+
+out:
+	return ret_val;
+}
+
+/**
+ * igc_write_phy_reg_mdic - Write MDI control register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write to register at offset
+ *
+ * Writes data to MDI control register in the PHY at offset.
+ */
+static s32 igc_write_phy_reg_mdic(struct igc_hw *hw, u32 offset, u16 data)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u32 i, mdic = 0;
+	s32 ret_val = 0;
+
+	if (offset > MAX_PHY_REG_ADDRESS) {
+		hw_dbg("PHY Address %d is out of range\n", offset);
+		ret_val = -IGC_ERR_PARAM;
+		goto out;
+	}
+
+	/* Set up Op-code, Phy Address, and register offset in the MDI
+	 * Control register.  The MAC will take care of interfacing with the
+	 * PHY to write the desired data.
+	 */
+	mdic = (((u32)data) |
+		(offset << IGC_MDIC_REG_SHIFT) |
+		(phy->addr << IGC_MDIC_PHY_SHIFT) |
+		(IGC_MDIC_OP_WRITE));
+
+	wr32(IGC_MDIC, mdic);
+
+	/* Poll the ready bit to see if the MDI read completed
+	 * Increasing the time out as testing showed failures with
+	 * the lower time out
+	 */
+	for (i = 0; i < IGC_GEN_POLL_TIMEOUT; i++) {
+		udelay(50);
+		mdic = rd32(IGC_MDIC);
+		if (mdic & IGC_MDIC_READY)
+			break;
+	}
+	if (!(mdic & IGC_MDIC_READY)) {
+		hw_dbg("MDI Write did not complete\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+	if (mdic & IGC_MDIC_ERROR) {
+		hw_dbg("MDI Error\n");
+		ret_val = -IGC_ERR_PHY;
+		goto out;
+	}
+
+out:
+	return ret_val;
+}
+
+/**
+ * __igc_access_xmdio_reg - Read/write XMDIO register
+ * @hw: pointer to the HW structure
+ * @address: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: pointer to value to read/write from/to the XMDIO address
+ * @read: boolean flag to indicate read or write
+ */
+static s32 __igc_access_xmdio_reg(struct igc_hw *hw, u16 address,
+				  u8 dev_addr, u16 *data, bool read)
+{
+	s32 ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, address);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, IGC_MMDAC_FUNC_DATA |
+					dev_addr);
+	if (ret_val)
+		return ret_val;
+
+	if (read)
+		ret_val = hw->phy.ops.read_reg(hw, IGC_MMDAAD, data);
+	else
+		ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAAD, *data);
+	if (ret_val)
+		return ret_val;
+
+	/* Recalibrate the device back to 0 */
+	ret_val = hw->phy.ops.write_reg(hw, IGC_MMDAC, 0);
+	if (ret_val)
+		return ret_val;
+
+	return ret_val;
+}
+
+/**
+ * igc_read_xmdio_reg - Read XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be read from the EMI address
+ */
+static s32 igc_read_xmdio_reg(struct igc_hw *hw, u16 addr,
+			      u8 dev_addr, u16 *data)
+{
+	return __igc_access_xmdio_reg(hw, addr, dev_addr, data, true);
+}
+
+/**
+ * igc_write_xmdio_reg - Write XMDIO register
+ * @hw: pointer to the HW structure
+ * @addr: XMDIO address to program
+ * @dev_addr: device address to program
+ * @data: value to be written to the XMDIO address
+ */
+static s32 igc_write_xmdio_reg(struct igc_hw *hw, u16 addr,
+			       u8 dev_addr, u16 data)
+{
+	return __igc_access_xmdio_reg(hw, addr, dev_addr, &data, false);
+}
+
+/**
+ * igc_write_phy_reg_gpy - Write GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: register offset to write to
+ * @data: data to write at register offset
+ *
+ * Acquires semaphore, if necessary, then writes the data to PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data)
+{
+	u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+	s32 ret_val;
+
+	offset = offset & GPY_REG_MASK;
+
+	if (!dev_addr) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+		ret_val = igc_write_phy_reg_mdic(hw, offset, data);
+		hw->phy.ops.release(hw);
+	} else {
+		ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr,
+					      data);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_read_phy_reg_gpy - Read GPY PHY register
+ * @hw: pointer to the HW structure
+ * @offset: lower half is register offset to read to
+ * upper half is MMD to use.
+ * @data: data to read at register offset
+ *
+ * Acquires semaphore, if necessary, then reads the data in the PHY register
+ * at the offset. Release any acquired semaphores before exiting.
+ */
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data)
+{
+	u8 dev_addr = (offset & GPY_MMD_MASK) >> GPY_MMD_SHIFT;
+	s32 ret_val;
+
+	offset = offset & GPY_REG_MASK;
+
+	if (!dev_addr) {
+		ret_val = hw->phy.ops.acquire(hw);
+		if (ret_val)
+			return ret_val;
+		ret_val = igc_read_phy_reg_mdic(hw, offset, data);
+		hw->phy.ops.release(hw);
+	} else {
+		ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr,
+					     data);
+	}
+
+	return ret_val;
+}
+
+/**
+ * igc_read_phy_fw_version - Read gPHY firmware version
+ * @hw: pointer to the HW structure
+ */
+u16 igc_read_phy_fw_version(struct igc_hw *hw)
+{
+	struct igc_phy_info *phy = &hw->phy;
+	u16 gphy_version = 0;
+	u16 ret_val;
+
+	/* NVM image version is reported as firmware version for i225 device */
+	ret_val = phy->ops.read_reg(hw, IGC_GPHY_VERSION, &gphy_version);
+	if (ret_val)
+		hw_dbg("igc_phy: read wrong gphy version\n");
+
+	return gphy_version;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_phy.h b/drivers/net/ethernet/intel/igc/igc_phy.h
new file mode 100644
index 0000000000..832a7e359f
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_phy.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_PHY_H_
+#define _IGC_PHY_H_
+
+#include "igc_mac.h"
+
+s32 igc_check_reset_block(struct igc_hw *hw);
+s32 igc_phy_hw_reset(struct igc_hw *hw);
+s32 igc_get_phy_id(struct igc_hw *hw);
+s32 igc_phy_has_link(struct igc_hw *hw, u32 iterations,
+		     u32 usec_interval, bool *success);
+void igc_check_downshift(struct igc_hw *hw);
+s32 igc_setup_copper_link(struct igc_hw *hw);
+void igc_power_up_phy_copper(struct igc_hw *hw);
+void igc_power_down_phy_copper(struct igc_hw *hw);
+s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data);
+s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data);
+u16 igc_read_phy_fw_version(struct igc_hw *hw);
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
new file mode 100644
index 0000000000..928f387922
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -0,0 +1,1265 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2019 Intel Corporation */
+
+#include "igc.h"
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/ptp_classify.h>
+#include <linux/clocksource.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+
+#define INCVALUE_MASK		0x7fffffff
+#define ISGN			0x80000000
+
+#define IGC_PTP_TX_TIMEOUT		(HZ * 15)
+
+#define IGC_PTM_STAT_SLEEP		2
+#define IGC_PTM_STAT_TIMEOUT		100
+
+/* SYSTIM read access for I225 */
+void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 sec, nsec;
+
+	/* The timestamp is latched when SYSTIML is read. */
+	nsec = rd32(IGC_SYSTIML);
+	sec = rd32(IGC_SYSTIMH);
+
+	ts->tv_sec = sec;
+	ts->tv_nsec = nsec;
+}
+
+static void igc_ptp_write_i225(struct igc_adapter *adapter,
+			       const struct timespec64 *ts)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	wr32(IGC_SYSTIML, ts->tv_nsec);
+	wr32(IGC_SYSTIMH, ts->tv_sec);
+}
+
+static int igc_ptp_adjfine_i225(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	struct igc_hw *hw = &igc->hw;
+	int neg_adj = 0;
+	u64 rate;
+	u32 inca;
+
+	if (scaled_ppm < 0) {
+		neg_adj = 1;
+		scaled_ppm = -scaled_ppm;
+	}
+	rate = scaled_ppm;
+	rate <<= 14;
+	rate = div_u64(rate, 78125);
+
+	inca = rate & INCVALUE_MASK;
+	if (neg_adj)
+		inca |= ISGN;
+
+	wr32(IGC_TIMINCA, inca);
+
+	return 0;
+}
+
+static int igc_ptp_adjtime_i225(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	struct timespec64 now, then = ns_to_timespec64(delta);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+	igc_ptp_read(igc, &now);
+	now = timespec64_add(now, then);
+	igc_ptp_write_i225(igc, (const struct timespec64 *)&now);
+
+	spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igc_ptp_gettimex64_i225(struct ptp_clock_info *ptp,
+				   struct timespec64 *ts,
+				   struct ptp_system_timestamp *sts)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	struct igc_hw *hw = &igc->hw;
+	unsigned long flags;
+
+	spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+	ptp_read_system_prets(sts);
+	ts->tv_nsec = rd32(IGC_SYSTIML);
+	ts->tv_sec = rd32(IGC_SYSTIMH);
+	ptp_read_system_postts(sts);
+
+	spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+	return 0;
+}
+
+static int igc_ptp_settime_i225(struct ptp_clock_info *ptp,
+				const struct timespec64 *ts)
+{
+	struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+					       ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+	igc_ptp_write_i225(igc, ts);
+
+	spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+	return 0;
+}
+
+static void igc_pin_direction(int pin, int input, u32 *ctrl, u32 *ctrl_ext)
+{
+	u32 *ptr = pin < 2 ? ctrl : ctrl_ext;
+	static const u32 mask[IGC_N_SDP] = {
+		IGC_CTRL_SDP0_DIR,
+		IGC_CTRL_SDP1_DIR,
+		IGC_CTRL_EXT_SDP2_DIR,
+		IGC_CTRL_EXT_SDP3_DIR,
+	};
+
+	if (input)
+		*ptr &= ~mask[pin];
+	else
+		*ptr |= mask[pin];
+}
+
+static void igc_pin_perout(struct igc_adapter *igc, int chan, int pin, int freq)
+{
+	static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3,
+	};
+	static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3,
+	};
+	static const u32 igc_ts_sdp_en[IGC_N_SDP] = {
+		IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN,
+	};
+	static const u32 igc_ts_sdp_sel_tt0[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_TT0, IGC_TS_SDP1_SEL_TT0,
+		IGC_TS_SDP2_SEL_TT0, IGC_TS_SDP3_SEL_TT0,
+	};
+	static const u32 igc_ts_sdp_sel_tt1[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_TT1, IGC_TS_SDP1_SEL_TT1,
+		IGC_TS_SDP2_SEL_TT1, IGC_TS_SDP3_SEL_TT1,
+	};
+	static const u32 igc_ts_sdp_sel_fc0[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_FC0, IGC_TS_SDP1_SEL_FC0,
+		IGC_TS_SDP2_SEL_FC0, IGC_TS_SDP3_SEL_FC0,
+	};
+	static const u32 igc_ts_sdp_sel_fc1[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1,
+		IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1,
+	};
+	static const u32 igc_ts_sdp_sel_clr[IGC_N_SDP] = {
+		IGC_TS_SDP0_SEL_FC1, IGC_TS_SDP1_SEL_FC1,
+		IGC_TS_SDP2_SEL_FC1, IGC_TS_SDP3_SEL_FC1,
+	};
+	struct igc_hw *hw = &igc->hw;
+	u32 ctrl, ctrl_ext, tssdp = 0;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	tssdp = rd32(IGC_TSSDP);
+
+	igc_pin_direction(pin, 0, &ctrl, &ctrl_ext);
+
+	/* Make sure this pin is not enabled as an input. */
+	if ((tssdp & IGC_AUX0_SEL_SDP3) == igc_aux0_sel_sdp[pin])
+		tssdp &= ~IGC_AUX0_TS_SDP_EN;
+
+	if ((tssdp & IGC_AUX1_SEL_SDP3) == igc_aux1_sel_sdp[pin])
+		tssdp &= ~IGC_AUX1_TS_SDP_EN;
+
+	tssdp &= ~igc_ts_sdp_sel_clr[pin];
+	if (freq) {
+		if (chan == 1)
+			tssdp |= igc_ts_sdp_sel_fc1[pin];
+		else
+			tssdp |= igc_ts_sdp_sel_fc0[pin];
+	} else {
+		if (chan == 1)
+			tssdp |= igc_ts_sdp_sel_tt1[pin];
+		else
+			tssdp |= igc_ts_sdp_sel_tt0[pin];
+	}
+	tssdp |= igc_ts_sdp_en[pin];
+
+	wr32(IGC_TSSDP, tssdp);
+	wr32(IGC_CTRL, ctrl);
+	wr32(IGC_CTRL_EXT, ctrl_ext);
+}
+
+static void igc_pin_extts(struct igc_adapter *igc, int chan, int pin)
+{
+	static const u32 igc_aux0_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX0_SEL_SDP0, IGC_AUX0_SEL_SDP1, IGC_AUX0_SEL_SDP2, IGC_AUX0_SEL_SDP3,
+	};
+	static const u32 igc_aux1_sel_sdp[IGC_N_SDP] = {
+		IGC_AUX1_SEL_SDP0, IGC_AUX1_SEL_SDP1, IGC_AUX1_SEL_SDP2, IGC_AUX1_SEL_SDP3,
+	};
+	static const u32 igc_ts_sdp_en[IGC_N_SDP] = {
+		IGC_TS_SDP0_EN, IGC_TS_SDP1_EN, IGC_TS_SDP2_EN, IGC_TS_SDP3_EN,
+	};
+	struct igc_hw *hw = &igc->hw;
+	u32 ctrl, ctrl_ext, tssdp = 0;
+
+	ctrl = rd32(IGC_CTRL);
+	ctrl_ext = rd32(IGC_CTRL_EXT);
+	tssdp = rd32(IGC_TSSDP);
+
+	igc_pin_direction(pin, 1, &ctrl, &ctrl_ext);
+
+	/* Make sure this pin is not enabled as an output. */
+	tssdp &= ~igc_ts_sdp_en[pin];
+
+	if (chan == 1) {
+		tssdp &= ~IGC_AUX1_SEL_SDP3;
+		tssdp |= igc_aux1_sel_sdp[pin] | IGC_AUX1_TS_SDP_EN;
+	} else {
+		tssdp &= ~IGC_AUX0_SEL_SDP3;
+		tssdp |= igc_aux0_sel_sdp[pin] | IGC_AUX0_TS_SDP_EN;
+	}
+
+	wr32(IGC_TSSDP, tssdp);
+	wr32(IGC_CTRL, ctrl);
+	wr32(IGC_CTRL_EXT, ctrl_ext);
+}
+
+static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+				       struct ptp_clock_request *rq, int on)
+{
+	struct igc_adapter *igc =
+		container_of(ptp, struct igc_adapter, ptp_caps);
+	struct igc_hw *hw = &igc->hw;
+	unsigned long flags;
+	struct timespec64 ts;
+	int use_freq = 0, pin = -1;
+	u32 tsim, tsauxc, tsauxc_mask, tsim_mask, trgttiml, trgttimh, freqout;
+	s64 ns;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		/* Reject requests with unsupported flags */
+		if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+					PTP_RISING_EDGE |
+					PTP_FALLING_EDGE |
+					PTP_STRICT_FLAGS))
+			return -EOPNOTSUPP;
+
+		/* Reject requests failing to enable both edges. */
+		if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+		    (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+		    (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igc->ptp_clock, PTP_PF_EXTTS,
+					   rq->extts.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		if (rq->extts.index == 1) {
+			tsauxc_mask = IGC_TSAUXC_EN_TS1;
+			tsim_mask = IGC_TSICR_AUTT1;
+		} else {
+			tsauxc_mask = IGC_TSAUXC_EN_TS0;
+			tsim_mask = IGC_TSICR_AUTT0;
+		}
+		spin_lock_irqsave(&igc->tmreg_lock, flags);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsim = rd32(IGC_TSIM);
+		if (on) {
+			igc_pin_extts(igc, rq->extts.index, pin);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		} else {
+			tsauxc &= ~tsauxc_mask;
+			tsim &= ~tsim_mask;
+		}
+		wr32(IGC_TSAUXC, tsauxc);
+		wr32(IGC_TSIM, tsim);
+		spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
+		if (on) {
+			pin = ptp_find_pin(igc->ptp_clock, PTP_PF_PEROUT,
+					   rq->perout.index);
+			if (pin < 0)
+				return -EBUSY;
+		}
+		ts.tv_sec = rq->perout.period.sec;
+		ts.tv_nsec = rq->perout.period.nsec;
+		ns = timespec64_to_ns(&ts);
+		ns = ns >> 1;
+		if (on && (ns <= 70000000LL || ns == 125000000LL ||
+			   ns == 250000000LL || ns == 500000000LL)) {
+			if (ns < 8LL)
+				return -EINVAL;
+			use_freq = 1;
+		}
+		ts = ns_to_timespec64(ns);
+		if (rq->perout.index == 1) {
+			if (use_freq) {
+				tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1;
+				tsim_mask = 0;
+			} else {
+				tsauxc_mask = IGC_TSAUXC_EN_TT1;
+				tsim_mask = IGC_TSICR_TT1;
+			}
+			trgttiml = IGC_TRGTTIML1;
+			trgttimh = IGC_TRGTTIMH1;
+			freqout = IGC_FREQOUT1;
+		} else {
+			if (use_freq) {
+				tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0;
+				tsim_mask = 0;
+			} else {
+				tsauxc_mask = IGC_TSAUXC_EN_TT0;
+				tsim_mask = IGC_TSICR_TT0;
+			}
+			trgttiml = IGC_TRGTTIML0;
+			trgttimh = IGC_TRGTTIMH0;
+			freqout = IGC_FREQOUT0;
+		}
+		spin_lock_irqsave(&igc->tmreg_lock, flags);
+		tsauxc = rd32(IGC_TSAUXC);
+		tsim = rd32(IGC_TSIM);
+		if (rq->perout.index == 1) {
+			tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 |
+				    IGC_TSAUXC_ST1);
+			tsim &= ~IGC_TSICR_TT1;
+		} else {
+			tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 |
+				    IGC_TSAUXC_ST0);
+			tsim &= ~IGC_TSICR_TT0;
+		}
+		if (on) {
+			struct timespec64 safe_start;
+			int i = rq->perout.index;
+
+			igc_pin_perout(igc, i, pin, use_freq);
+			igc_ptp_read(igc, &safe_start);
+
+			/* PPS output start time is triggered by Target time(TT)
+			 * register. Programming any past time value into TT
+			 * register will cause PPS to never start. Need to make
+			 * sure we program the TT register a time ahead in
+			 * future. There isn't a stringent need to fire PPS out
+			 * right away. Adding +2 seconds should take care of
+			 * corner cases. Let's say if the SYSTIML is close to
+			 * wrap up and the timer keeps ticking as we program the
+			 * register, adding +2seconds is safe bet.
+			 */
+			safe_start.tv_sec += 2;
+
+			if (rq->perout.start.sec < safe_start.tv_sec)
+				igc->perout[i].start.tv_sec = safe_start.tv_sec;
+			else
+				igc->perout[i].start.tv_sec = rq->perout.start.sec;
+			igc->perout[i].start.tv_nsec = rq->perout.start.nsec;
+			igc->perout[i].period.tv_sec = ts.tv_sec;
+			igc->perout[i].period.tv_nsec = ts.tv_nsec;
+			wr32(trgttimh, (u32)igc->perout[i].start.tv_sec);
+			/* For now, always select timer 0 as source. */
+			wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec |
+					     IGC_TT_IO_TIMER_SEL_SYSTIM0));
+			if (use_freq)
+				wr32(freqout, ns);
+			tsauxc |= tsauxc_mask;
+			tsim |= tsim_mask;
+		}
+		wr32(IGC_TSAUXC, tsauxc);
+		wr32(IGC_TSIM, tsim);
+		spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+		return 0;
+
+	case PTP_CLK_REQ_PPS:
+		spin_lock_irqsave(&igc->tmreg_lock, flags);
+		tsim = rd32(IGC_TSIM);
+		if (on)
+			tsim |= IGC_TSICR_SYS_WRAP;
+		else
+			tsim &= ~IGC_TSICR_SYS_WRAP;
+		igc->pps_sys_wrap_on = on;
+		wr32(IGC_TSIM, tsim);
+		spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+		return 0;
+
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int igc_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin,
+			      enum ptp_pin_function func, unsigned int chan)
+{
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_EXTTS:
+	case PTP_PF_PEROUT:
+		break;
+	case PTP_PF_PHYSYNC:
+		return -1;
+	}
+	return 0;
+}
+
+/**
+ * igc_ptp_systim_to_hwtstamp - convert system time value to HW timestamp
+ * @adapter: board private structure
+ * @hwtstamps: timestamp structure to update
+ * @systim: unsigned 64bit system time value
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
+ *
+ * Returns 0 on success.
+ **/
+static int igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
+				      struct skb_shared_hwtstamps *hwtstamps,
+				      u64 systim)
+{
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+		memset(hwtstamps, 0, sizeof(*hwtstamps));
+		/* Upper 32 bits contain s, lower 32 bits contain ns. */
+		hwtstamps->hwtstamp = ktime_set(systim >> 32,
+						systim & 0xFFFFFFFF);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/**
+ * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer
+ * @adapter: Pointer to adapter the packet buffer belongs to
+ * @buf: Pointer to packet buffer
+ *
+ * This function retrieves the timestamp saved in the beginning of packet
+ * buffer. While two timestamps are available, one in timer0 reference and the
+ * other in timer1 reference, this function considers only the timestamp in
+ * timer0 reference.
+ *
+ * Returns timestamp value.
+ */
+ktime_t igc_ptp_rx_pktstamp(struct igc_adapter *adapter, __le32 *buf)
+{
+	ktime_t timestamp;
+	u32 secs, nsecs;
+	int adjust;
+
+	/* Timestamps are saved in little endian at the beginning of the packet
+	 * buffer following the layout:
+	 *
+	 * DWORD: | 0              | 1              | 2              | 3              |
+	 * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH |
+	 *
+	 * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds
+	 * part of the timestamp.
+	 */
+	nsecs = le32_to_cpu(buf[2]);
+	secs = le32_to_cpu(buf[3]);
+
+	timestamp = ktime_set(secs, nsecs);
+
+	/* Adjust timestamp for the RX latency based on link speed */
+	switch (adapter->link_speed) {
+	case SPEED_10:
+		adjust = IGC_I225_RX_LATENCY_10;
+		break;
+	case SPEED_100:
+		adjust = IGC_I225_RX_LATENCY_100;
+		break;
+	case SPEED_1000:
+		adjust = IGC_I225_RX_LATENCY_1000;
+		break;
+	case SPEED_2500:
+		adjust = IGC_I225_RX_LATENCY_2500;
+		break;
+	default:
+		adjust = 0;
+		netdev_warn_once(adapter->netdev, "Imprecise timestamp\n");
+		break;
+	}
+
+	return ktime_sub_ns(timestamp, adjust);
+}
+
+static void igc_ptp_disable_rx_timestamp(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 val;
+	int i;
+
+	wr32(IGC_TSYNCRXCTL, 0);
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		val = rd32(IGC_SRRCTL(i));
+		val &= ~IGC_SRRCTL_TIMESTAMP;
+		wr32(IGC_SRRCTL(i), val);
+	}
+
+	val = rd32(IGC_RXPBS);
+	val &= ~IGC_RXPBS_CFG_TS_EN;
+	wr32(IGC_RXPBS, val);
+}
+
+static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 val;
+	int i;
+
+	val = rd32(IGC_RXPBS);
+	val |= IGC_RXPBS_CFG_TS_EN;
+	wr32(IGC_RXPBS, val);
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		val = rd32(IGC_SRRCTL(i));
+		/* FIXME: For now, only support retrieving RX timestamps from
+		 * timer 0.
+		 */
+		val |= IGC_SRRCTL_TIMER1SEL(0) | IGC_SRRCTL_TIMER0SEL(0) |
+		       IGC_SRRCTL_TIMESTAMP;
+		wr32(IGC_SRRCTL(i), val);
+	}
+
+	val = IGC_TSYNCRXCTL_ENABLED | IGC_TSYNCRXCTL_TYPE_ALL |
+	      IGC_TSYNCRXCTL_RXSYNSIG;
+	wr32(IGC_TSYNCRXCTL, val);
+}
+
+static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter)
+{
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+
+	for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) {
+		struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i];
+
+		dev_kfree_skb_any(tstamp->skb);
+		tstamp->skb = NULL;
+	}
+
+	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+}
+
+static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	/* Clear the flags first to avoid new packets to be enqueued
+	 * for TX timestamping.
+	 */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+		clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags);
+	}
+
+	/* Now we can clean the pending TX timestamp requests. */
+	igc_ptp_clear_tx_tstamp(adapter);
+
+	wr32(IGC_TSYNCTXCTL, 0);
+}
+
+static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG);
+
+	/* Read TXSTMP registers to discard any timestamp previously stored. */
+	rd32(IGC_TXSTMPL);
+	rd32(IGC_TXSTMPH);
+
+	/* The hardware is ready to accept TX timestamp requests,
+	 * notify the transmit path.
+	 */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+		set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags);
+	}
+
+}
+
+/**
+ * igc_ptp_set_timestamp_mode - setup hardware for timestamping
+ * @adapter: networking device structure
+ * @config: hwtstamp configuration
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
+				      struct hwtstamp_config *config)
+{
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		igc_ptp_disable_tx_timestamp(adapter);
+		break;
+	case HWTSTAMP_TX_ON:
+		igc_ptp_enable_tx_timestamp(adapter);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		igc_ptp_disable_rx_timestamp(adapter);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+		igc_ptp_enable_rx_timestamp(adapter);
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+/* Requires adapter->ptp_tx_lock held by caller. */
+static void igc_ptp_tx_timeout(struct igc_adapter *adapter,
+			       struct igc_tx_timestamp_request *tstamp)
+{
+	dev_kfree_skb_any(tstamp->skb);
+	tstamp->skb = NULL;
+	adapter->tx_hwtstamp_timeouts++;
+
+	netdev_warn(adapter->netdev, "Tx timestamp timeout\n");
+}
+
+void igc_ptp_tx_hang(struct igc_adapter *adapter)
+{
+	struct igc_tx_timestamp_request *tstamp;
+	struct igc_hw *hw = &adapter->hw;
+	unsigned long flags;
+	bool found = false;
+	int i;
+
+	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+
+	for (i = 0; i < IGC_MAX_TX_TSTAMP_REGS; i++) {
+		tstamp = &adapter->tx_tstamp[i];
+
+		if (!tstamp->skb)
+			continue;
+
+		if (time_is_after_jiffies(tstamp->start + IGC_PTP_TX_TIMEOUT))
+			continue;
+
+		igc_ptp_tx_timeout(adapter, tstamp);
+		found = true;
+	}
+
+	if (found) {
+		/* Reading the high register of the first set of timestamp registers
+		 * clears all the equivalent bits in the TSYNCTXCTL register.
+		 */
+		rd32(IGC_TXSTMPH_0);
+	}
+
+	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+}
+
+static void igc_ptp_tx_reg_to_stamp(struct igc_adapter *adapter,
+				    struct igc_tx_timestamp_request *tstamp, u64 regval)
+{
+	struct skb_shared_hwtstamps shhwtstamps;
+	struct sk_buff *skb;
+	int adjust = 0;
+
+	skb = tstamp->skb;
+	if (!skb)
+		return;
+
+	if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval))
+		return;
+
+	switch (adapter->link_speed) {
+	case SPEED_10:
+		adjust = IGC_I225_TX_LATENCY_10;
+		break;
+	case SPEED_100:
+		adjust = IGC_I225_TX_LATENCY_100;
+		break;
+	case SPEED_1000:
+		adjust = IGC_I225_TX_LATENCY_1000;
+		break;
+	case SPEED_2500:
+		adjust = IGC_I225_TX_LATENCY_2500;
+		break;
+	}
+
+	shhwtstamps.hwtstamp =
+		ktime_add_ns(shhwtstamps.hwtstamp, adjust);
+
+	tstamp->skb = NULL;
+
+	skb_tstamp_tx(skb, &shhwtstamps);
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * igc_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: Board private structure
+ *
+ * Check against the ready mask for which of the timestamp register
+ * sets are ready to be retrieved, then retrieve that and notify the
+ * rest of the stack.
+ *
+ * Context: Expects adapter->ptp_tx_lock to be held by caller.
+ */
+static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u64 regval;
+	u32 mask;
+	int i;
+
+	mask = rd32(IGC_TSYNCTXCTL) & IGC_TSYNCTXCTL_TXTT_ANY;
+	if (mask & IGC_TSYNCTXCTL_TXTT_0) {
+		regval = rd32(IGC_TXSTMPL);
+		regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+	} else {
+		/* There's a bug in the hardware that could cause
+		 * missing interrupts for TX timestamping. The issue
+		 * is that for new interrupts to be triggered, the
+		 * IGC_TXSTMPH_0 register must be read.
+		 *
+		 * To avoid discarding a valid timestamp that just
+		 * happened at the "wrong" time, we need to confirm
+		 * that there was no timestamp captured, we do that by
+		 * assuming that no two timestamps in sequence have
+		 * the same nanosecond value.
+		 *
+		 * So, we read the "low" register, read the "high"
+		 * register (to latch a new timestamp) and read the
+		 * "low" register again, if "old" and "new" versions
+		 * of the "low" register are different, a valid
+		 * timestamp was captured, we can read the "high"
+		 * register again.
+		 */
+		u32 txstmpl_old, txstmpl_new;
+
+		txstmpl_old = rd32(IGC_TXSTMPL);
+		rd32(IGC_TXSTMPH);
+		txstmpl_new = rd32(IGC_TXSTMPL);
+
+		if (txstmpl_old == txstmpl_new)
+			goto done;
+
+		regval = txstmpl_new;
+		regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+	}
+
+	igc_ptp_tx_reg_to_stamp(adapter, &adapter->tx_tstamp[0], regval);
+
+done:
+	/* Now that the problematic first register was handled, we can
+	 * use retrieve the timestamps from the other registers
+	 * (starting from '1') with less complications.
+	 */
+	for (i = 1; i < IGC_MAX_TX_TSTAMP_REGS; i++) {
+		struct igc_tx_timestamp_request *tstamp = &adapter->tx_tstamp[i];
+
+		if (!(tstamp->mask & mask))
+			continue;
+
+		regval = rd32(tstamp->regl);
+		regval |= (u64)rd32(tstamp->regh) << 32;
+
+		igc_ptp_tx_reg_to_stamp(adapter, tstamp, regval);
+	}
+}
+
+/**
+ * igc_ptp_tx_tstamp_event
+ * @adapter: board private structure
+ *
+ * Called when a TX timestamp interrupt happens to retrieve the
+ * timestamp and send it up to the socket.
+ */
+void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
+
+	igc_ptp_tx_hwtstamp(adapter);
+
+	spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
+}
+
+/**
+ * igc_ptp_set_ts_config - set hardware time stamping config
+ * @netdev: network interface device structure
+ * @ifr: interface request data
+ *
+ **/
+int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = igc_ptp_set_timestamp_mode(adapter, &config);
+	if (err)
+		return err;
+
+	/* save these settings for future reference */
+	memcpy(&adapter->tstamp_config, &config,
+	       sizeof(adapter->tstamp_config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+/**
+ * igc_ptp_get_ts_config - get hardware time stamping config
+ * @netdev: network interface device structure
+ * @ifr: interface request data
+ *
+ * Get the hwtstamp_config settings to return to the user. Rather than attempt
+ * to deconstruct the settings from the registers, just return a shadow copy
+ * of the last known settings.
+ **/
+int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct hwtstamp_config *config = &adapter->tstamp_config;
+
+	return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+		-EFAULT : 0;
+}
+
+/* The two conditions below must be met for cross timestamping via
+ * PCIe PTM:
+ *
+ * 1. We have an way to convert the timestamps in the PTM messages
+ *    to something related to the system clocks (right now, only
+ *    X86 systems with support for the Always Running Timer allow that);
+ *
+ * 2. We have PTM enabled in the path from the device to the PCIe root port.
+ */
+static bool igc_is_crosststamp_supported(struct igc_adapter *adapter)
+{
+	if (!IS_ENABLED(CONFIG_X86_TSC))
+		return false;
+
+	/* FIXME: it was noticed that enabling support for PCIe PTM in
+	 * some i225-V models could cause lockups when bringing the
+	 * interface up/down. There should be no downsides to
+	 * disabling crosstimestamping support for i225-V, as it
+	 * doesn't have any PTP support. That way we gain some time
+	 * while root causing the issue.
+	 */
+	if (adapter->pdev->device == IGC_DEV_ID_I225_V)
+		return false;
+
+	return pcie_ptm_enabled(adapter->pdev);
+}
+
+static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp)
+{
+#if IS_ENABLED(CONFIG_X86_TSC) && !defined(CONFIG_UML)
+	return convert_art_ns_to_tsc(tstamp);
+#else
+	return (struct system_counterval_t) { };
+#endif
+}
+
+static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	switch (ptm_stat) {
+	case IGC_PTM_STAT_RET_ERR:
+		netdev_err(netdev, "PTM Error: Root port timeout\n");
+		break;
+	case IGC_PTM_STAT_BAD_PTM_RES:
+		netdev_err(netdev, "PTM Error: Bad response, PTM Response Data expected\n");
+		break;
+	case IGC_PTM_STAT_T4M1_OVFL:
+		netdev_err(netdev, "PTM Error: T4 minus T1 overflow\n");
+		break;
+	case IGC_PTM_STAT_ADJUST_1ST:
+		netdev_err(netdev, "PTM Error: 1588 timer adjusted during first PTM cycle\n");
+		break;
+	case IGC_PTM_STAT_ADJUST_CYC:
+		netdev_err(netdev, "PTM Error: 1588 timer adjusted during non-first PTM cycle\n");
+		break;
+	default:
+		netdev_err(netdev, "PTM Error: Unknown error (%#x)\n", ptm_stat);
+		break;
+	}
+}
+
+static int igc_phc_get_syncdevicetime(ktime_t *device,
+				      struct system_counterval_t *system,
+				      void *ctx)
+{
+	u32 stat, t2_curr_h, t2_curr_l, ctrl;
+	struct igc_adapter *adapter = ctx;
+	struct igc_hw *hw = &adapter->hw;
+	int err, count = 100;
+	ktime_t t1, t2_curr;
+
+	/* Get a snapshot of system clocks to use as historic value. */
+	ktime_get_snapshot(&adapter->snapshot);
+
+	do {
+		/* Doing this in a loop because in the event of a
+		 * badly timed (ha!) system clock adjustment, we may
+		 * get PTM errors from the PCI root, but these errors
+		 * are transitory. Repeating the process returns valid
+		 * data eventually.
+		 */
+
+		/* To "manually" start the PTM cycle we need to clear and
+		 * then set again the TRIG bit.
+		 */
+		ctrl = rd32(IGC_PTM_CTRL);
+		ctrl &= ~IGC_PTM_CTRL_TRIG;
+		wr32(IGC_PTM_CTRL, ctrl);
+		ctrl |= IGC_PTM_CTRL_TRIG;
+		wr32(IGC_PTM_CTRL, ctrl);
+
+		/* The cycle only starts "for real" when software notifies
+		 * that it has read the registers, this is done by setting
+		 * VALID bit.
+		 */
+		wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+
+		err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat,
+					 stat, IGC_PTM_STAT_SLEEP,
+					 IGC_PTM_STAT_TIMEOUT);
+		if (err < 0) {
+			netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n");
+			return err;
+		}
+
+		if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID)
+			break;
+
+		if (stat & ~IGC_PTM_STAT_VALID) {
+			/* An error occurred, log it. */
+			igc_ptm_log_error(adapter, stat);
+			/* The STAT register is write-1-to-clear (W1C),
+			 * so write the previous error status to clear it.
+			 */
+			wr32(IGC_PTM_STAT, stat);
+			continue;
+		}
+	} while (--count);
+
+	if (!count) {
+		netdev_err(adapter->netdev, "Exceeded number of tries for PTM cycle\n");
+		return -ETIMEDOUT;
+	}
+
+	t1 = ktime_set(rd32(IGC_PTM_T1_TIM0_H), rd32(IGC_PTM_T1_TIM0_L));
+
+	t2_curr_l = rd32(IGC_PTM_CURR_T2_L);
+	t2_curr_h = rd32(IGC_PTM_CURR_T2_H);
+
+	/* FIXME: When the register that tells the endianness of the
+	 * PTM registers are implemented, check them here and add the
+	 * appropriate conversion.
+	 */
+	t2_curr_h = swab32(t2_curr_h);
+
+	t2_curr = ((s64)t2_curr_h << 32 | t2_curr_l);
+
+	*device = t1;
+	*system = igc_device_tstamp_to_system(t2_curr);
+
+	return 0;
+}
+
+static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp,
+				  struct system_device_crosststamp *cts)
+{
+	struct igc_adapter *adapter = container_of(ptp, struct igc_adapter,
+						   ptp_caps);
+
+	return get_device_system_crosststamp(igc_phc_get_syncdevicetime,
+					     adapter, &adapter->snapshot, cts);
+}
+
+/**
+ * igc_ptp_init - Initialize PTP functionality
+ * @adapter: Board private structure
+ *
+ * This function is called at device probe to initialize the PTP
+ * functionality.
+ */
+void igc_ptp_init(struct igc_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct igc_tx_timestamp_request *tstamp;
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	tstamp = &adapter->tx_tstamp[0];
+	tstamp->mask = IGC_TSYNCTXCTL_TXTT_0;
+	tstamp->regl = IGC_TXSTMPL_0;
+	tstamp->regh = IGC_TXSTMPH_0;
+	tstamp->flags = 0;
+
+	tstamp = &adapter->tx_tstamp[1];
+	tstamp->mask = IGC_TSYNCTXCTL_TXTT_1;
+	tstamp->regl = IGC_TXSTMPL_1;
+	tstamp->regh = IGC_TXSTMPH_1;
+	tstamp->flags = IGC_TX_FLAGS_TSTAMP_1;
+
+	tstamp = &adapter->tx_tstamp[2];
+	tstamp->mask = IGC_TSYNCTXCTL_TXTT_2;
+	tstamp->regl = IGC_TXSTMPL_2;
+	tstamp->regh = IGC_TXSTMPH_2;
+	tstamp->flags = IGC_TX_FLAGS_TSTAMP_2;
+
+	tstamp = &adapter->tx_tstamp[3];
+	tstamp->mask = IGC_TSYNCTXCTL_TXTT_3;
+	tstamp->regl = IGC_TXSTMPL_3;
+	tstamp->regh = IGC_TXSTMPH_3;
+	tstamp->flags = IGC_TX_FLAGS_TSTAMP_3;
+
+	switch (hw->mac.type) {
+	case igc_i225:
+		for (i = 0; i < IGC_N_SDP; i++) {
+			struct ptp_pin_desc *ppd = &adapter->sdp_config[i];
+
+			snprintf(ppd->name, sizeof(ppd->name), "SDP%d", i);
+			ppd->index = i;
+			ppd->func = PTP_PF_NONE;
+		}
+		snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 62499999;
+		adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225;
+		adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225;
+		adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225;
+		adapter->ptp_caps.settime64 = igc_ptp_settime_i225;
+		adapter->ptp_caps.enable = igc_ptp_feature_enable_i225;
+		adapter->ptp_caps.pps = 1;
+		adapter->ptp_caps.pin_config = adapter->sdp_config;
+		adapter->ptp_caps.n_ext_ts = IGC_N_EXTTS;
+		adapter->ptp_caps.n_per_out = IGC_N_PEROUT;
+		adapter->ptp_caps.n_pins = IGC_N_SDP;
+		adapter->ptp_caps.verify = igc_ptp_verify_pin;
+
+		if (!igc_is_crosststamp_supported(adapter))
+			break;
+
+		adapter->ptp_caps.getcrosststamp = igc_ptp_getcrosststamp;
+		break;
+	default:
+		adapter->ptp_clock = NULL;
+		return;
+	}
+
+	spin_lock_init(&adapter->ptp_tx_lock);
+	spin_lock_init(&adapter->tmreg_lock);
+
+	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+	adapter->prev_ptp_time = ktime_to_timespec64(ktime_get_real());
+	adapter->ptp_reset_start = ktime_get();
+
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
+	if (IS_ERR(adapter->ptp_clock)) {
+		adapter->ptp_clock = NULL;
+		netdev_err(netdev, "ptp_clock_register failed\n");
+	} else if (adapter->ptp_clock) {
+		netdev_info(netdev, "PHC added\n");
+		adapter->ptp_flags |= IGC_PTP_ENABLED;
+	}
+}
+
+static void igc_ptp_time_save(struct igc_adapter *adapter)
+{
+	igc_ptp_read(adapter, &adapter->prev_ptp_time);
+	adapter->ptp_reset_start = ktime_get();
+}
+
+static void igc_ptp_time_restore(struct igc_adapter *adapter)
+{
+	struct timespec64 ts = adapter->prev_ptp_time;
+	ktime_t delta;
+
+	delta = ktime_sub(ktime_get(), adapter->ptp_reset_start);
+
+	timespec64_add_ns(&ts, ktime_to_ns(delta));
+
+	igc_ptp_write_i225(adapter, &ts);
+}
+
+static void igc_ptm_stop(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 ctrl;
+
+	ctrl = rd32(IGC_PTM_CTRL);
+	ctrl &= ~IGC_PTM_CTRL_EN;
+
+	wr32(IGC_PTM_CTRL, ctrl);
+}
+
+/**
+ * igc_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
+ *
+ * This function stops the overflow check work and PTP Tx timestamp work, and
+ * will prepare the device for OS suspend.
+ */
+void igc_ptp_suspend(struct igc_adapter *adapter)
+{
+	if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
+		return;
+
+	igc_ptp_clear_tx_tstamp(adapter);
+
+	if (pci_device_is_present(adapter->pdev)) {
+		igc_ptp_time_save(adapter);
+		igc_ptm_stop(adapter);
+	}
+}
+
+/**
+ * igc_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igc_ptp_stop(struct igc_adapter *adapter)
+{
+	igc_ptp_suspend(adapter);
+
+	if (adapter->ptp_clock) {
+		ptp_clock_unregister(adapter->ptp_clock);
+		netdev_info(adapter->netdev, "PHC removed\n");
+		adapter->ptp_flags &= ~IGC_PTP_ENABLED;
+	}
+}
+
+/**
+ * igc_ptp_reset - Re-enable the adapter for PTP following a reset.
+ * @adapter: Board private structure.
+ *
+ * This function handles the reset work required to re-enable the PTP device.
+ **/
+void igc_ptp_reset(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 cycle_ctrl, ctrl;
+	unsigned long flags;
+	u32 timadj;
+
+	/* reset the tstamp_config */
+	igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+	switch (adapter->hw.mac.type) {
+	case igc_i225:
+		timadj = rd32(IGC_TIMADJ);
+		timadj |= IGC_TIMADJ_ADJUST_METH;
+		wr32(IGC_TIMADJ, timadj);
+
+		wr32(IGC_TSAUXC, 0x0);
+		wr32(IGC_TSSDP, 0x0);
+		wr32(IGC_TSIM,
+		     IGC_TSICR_INTERRUPTS |
+		     (adapter->pps_sys_wrap_on ? IGC_TSICR_SYS_WRAP : 0));
+		wr32(IGC_IMS, IGC_IMS_TS);
+
+		if (!igc_is_crosststamp_supported(adapter))
+			break;
+
+		wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT);
+		wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT);
+
+		cycle_ctrl = IGC_PTM_CYCLE_CTRL_CYC_TIME(IGC_PTM_CYC_TIME_DEFAULT);
+
+		wr32(IGC_PTM_CYCLE_CTRL, cycle_ctrl);
+
+		ctrl = IGC_PTM_CTRL_EN |
+			IGC_PTM_CTRL_START_NOW |
+			IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) |
+			IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) |
+			IGC_PTM_CTRL_TRIG;
+
+		wr32(IGC_PTM_CTRL, ctrl);
+
+		/* Force the first cycle to run. */
+		wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID);
+
+		break;
+	default:
+		/* No work to do. */
+		goto out;
+	}
+
+	/* Re-initialize the timer. */
+	if (hw->mac.type == igc_i225) {
+		igc_ptp_time_restore(adapter);
+	} else {
+		timecounter_init(&adapter->tc, &adapter->cc,
+				 ktime_to_ns(ktime_get_real()));
+	}
+out:
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	wrfl();
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
new file mode 100644
index 0000000000..20e17f5fbc
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -0,0 +1,333 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2018 Intel Corporation */
+
+#ifndef _IGC_REGS_H_
+#define _IGC_REGS_H_
+
+/* General Register Descriptions */
+#define IGC_CTRL		0x00000  /* Device Control - RW */
+#define IGC_STATUS		0x00008  /* Device Status - RO */
+#define IGC_EECD		0x00010  /* EEPROM/Flash Control - RW */
+#define IGC_CTRL_EXT		0x00018  /* Extended Device Control - RW */
+#define IGC_MDIC		0x00020  /* MDI Control - RW */
+#define IGC_CONNSW		0x00034  /* Copper/Fiber switch control - RW */
+#define IGC_VET			0x00038  /* VLAN Ether Type - RW */
+#define IGC_I225_PHPM		0x00E14  /* I225 PHY Power Management */
+#define IGC_GPHY_VERSION	0x0001E  /* I225 gPHY Firmware Version */
+
+/* Internal Packet Buffer Size Registers */
+#define IGC_RXPBS		0x02404  /* Rx Packet Buffer Size - RW */
+#define IGC_TXPBS		0x03404  /* Tx Packet Buffer Size - RW */
+
+/* NVM  Register Descriptions */
+#define IGC_EERD		0x12014  /* EEprom mode read - RW */
+#define IGC_EEWR		0x12018  /* EEprom mode write - RW */
+
+/* Flow Control Register Descriptions */
+#define IGC_FCAL		0x00028  /* FC Address Low - RW */
+#define IGC_FCAH		0x0002C  /* FC Address High - RW */
+#define IGC_FCT			0x00030  /* FC Type - RW */
+#define IGC_FCTTV		0x00170  /* FC Transmit Timer - RW */
+#define IGC_FCRTL		0x02160  /* FC Receive Threshold Low - RW */
+#define IGC_FCRTH		0x02168  /* FC Receive Threshold High - RW */
+#define IGC_FCRTV		0x02460  /* FC Refresh Timer Value - RW */
+
+/* Semaphore registers */
+#define IGC_SW_FW_SYNC		0x05B5C  /* SW-FW Synchronization - RW */
+#define IGC_SWSM		0x05B50  /* SW Semaphore */
+#define IGC_FWSM		0x05B54  /* FW Semaphore */
+
+/* Function Active and Power State to MNG */
+#define IGC_FACTPS		0x05B30
+
+/* Interrupt Register Description */
+#define IGC_EICR		0x01580  /* Ext. Interrupt Cause read - W0 */
+#define IGC_EICS		0x01520  /* Ext. Interrupt Cause Set - W0 */
+#define IGC_EIMS		0x01524  /* Ext. Interrupt Mask Set/Read - RW */
+#define IGC_EIMC		0x01528  /* Ext. Interrupt Mask Clear - WO */
+#define IGC_EIAC		0x0152C  /* Ext. Interrupt Auto Clear - RW */
+#define IGC_EIAM		0x01530  /* Ext. Interrupt Auto Mask - RW */
+#define IGC_ICR			0x01500  /* Intr Cause Read - RC/W1C */
+#define IGC_ICS			0x01504  /* Intr Cause Set - WO */
+#define IGC_IMS			0x01508  /* Intr Mask Set/Read - RW */
+#define IGC_IMC			0x0150C  /* Intr Mask Clear - WO */
+#define IGC_IAM			0x01510  /* Intr Ack Auto Mask- RW */
+/* Intr Throttle - RW */
+#define IGC_EITR(_n)		(0x01680 + (0x4 * (_n)))
+/* Interrupt Vector Allocation - RW */
+#define IGC_IVAR0		0x01700
+#define IGC_IVAR_MISC		0x01740  /* IVAR for "other" causes - RW */
+#define IGC_GPIE		0x01514  /* General Purpose Intr Enable - RW */
+
+/* RSS registers */
+#define IGC_MRQC		0x05818 /* Multiple Receive Control - RW */
+
+/* Filtering Registers */
+#define IGC_ETQF(_n)		(0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
+#define IGC_FHFT(_n)		(0x09000 + (256 * (_n))) /* Flexible Host Filter */
+#define IGC_FHFT_EXT(_n)	(0x09A00 + (256 * (_n))) /* Flexible Host Filter Extended */
+#define IGC_FHFTSL		0x05804 /* Flex Filter indirect table select */
+
+/* ETQF register bit definitions */
+#define IGC_ETQF_FILTER_ENABLE	BIT(26)
+#define IGC_ETQF_QUEUE_ENABLE	BIT(31)
+#define IGC_ETQF_QUEUE_SHIFT	16
+#define IGC_ETQF_QUEUE_MASK	0x00070000
+#define IGC_ETQF_ETYPE_MASK	0x0000FFFF
+
+/* FHFT register bit definitions */
+#define IGC_FHFT_LENGTH_MASK	GENMASK(7, 0)
+#define IGC_FHFT_QUEUE_SHIFT	8
+#define IGC_FHFT_QUEUE_MASK	GENMASK(10, 8)
+#define IGC_FHFT_PRIO_SHIFT	16
+#define IGC_FHFT_PRIO_MASK	GENMASK(18, 16)
+#define IGC_FHFT_IMM_INT	BIT(24)
+#define IGC_FHFT_DROP		BIT(25)
+
+/* FHFTSL register bit definitions */
+#define IGC_FHFTSL_FTSL_SHIFT	0
+#define IGC_FHFTSL_FTSL_MASK	GENMASK(1, 0)
+
+/* Redirection Table - RW Array */
+#define IGC_RETA(_i)		(0x05C00 + ((_i) * 4))
+/* RSS Random Key - RW Array */
+#define IGC_RSSRK(_i)		(0x05C80 + ((_i) * 4))
+
+/* Receive Register Descriptions */
+#define IGC_RCTL		0x00100  /* Rx Control - RW */
+#define IGC_SRRCTL(_n)		(0x0C00C + ((_n) * 0x40))
+#define IGC_PSRTYPE(_i)		(0x05480 + ((_i) * 4))
+#define IGC_RDBAL(_n)		(0x0C000 + ((_n) * 0x40))
+#define IGC_RDBAH(_n)		(0x0C004 + ((_n) * 0x40))
+#define IGC_RDLEN(_n)		(0x0C008 + ((_n) * 0x40))
+#define IGC_RDH(_n)		(0x0C010 + ((_n) * 0x40))
+#define IGC_RDT(_n)		(0x0C018 + ((_n) * 0x40))
+#define IGC_RXDCTL(_n)		(0x0C028 + ((_n) * 0x40))
+#define IGC_RQDPC(_n)		(0x0C030 + ((_n) * 0x40))
+#define IGC_RXCSUM		0x05000  /* Rx Checksum Control - RW */
+#define IGC_RLPML		0x05004  /* Rx Long Packet Max Length */
+#define IGC_RFCTL		0x05008  /* Receive Filter Control*/
+#define IGC_MTA			0x05200  /* Multicast Table Array - RW Array */
+#define IGC_RA			0x05400  /* Receive Address - RW Array */
+#define IGC_UTA			0x0A000  /* Unicast Table Array - RW */
+#define IGC_RAL(_n)		(0x05400 + ((_n) * 0x08))
+#define IGC_RAH(_n)		(0x05404 + ((_n) * 0x08))
+#define IGC_VLANPQF		0x055B0  /* VLAN Priority Queue Filter - RW */
+
+/* Transmit Register Descriptions */
+#define IGC_TCTL		0x00400  /* Tx Control - RW */
+#define IGC_TIPG		0x00410  /* Tx Inter-packet gap - RW */
+#define IGC_TDBAL(_n)		(0x0E000 + ((_n) * 0x40))
+#define IGC_TDBAH(_n)		(0x0E004 + ((_n) * 0x40))
+#define IGC_TDLEN(_n)		(0x0E008 + ((_n) * 0x40))
+#define IGC_TDH(_n)		(0x0E010 + ((_n) * 0x40))
+#define IGC_TDT(_n)		(0x0E018 + ((_n) * 0x40))
+#define IGC_TXDCTL(_n)		(0x0E028 + ((_n) * 0x40))
+
+/* MMD Register Descriptions */
+#define IGC_MMDAC		13 /* MMD Access Control */
+#define IGC_MMDAAD		14 /* MMD Access Address/Data */
+
+/* Statistics Register Descriptions */
+#define IGC_CRCERRS	0x04000  /* CRC Error Count - R/clr */
+#define IGC_ALGNERRC	0x04004  /* Alignment Error Count - R/clr */
+#define IGC_RXERRC	0x0400C  /* Receive Error Count - R/clr */
+#define IGC_MPC		0x04010  /* Missed Packet Count - R/clr */
+#define IGC_SCC		0x04014  /* Single Collision Count - R/clr */
+#define IGC_ECOL	0x04018  /* Excessive Collision Count - R/clr */
+#define IGC_MCC		0x0401C  /* Multiple Collision Count - R/clr */
+#define IGC_LATECOL	0x04020  /* Late Collision Count - R/clr */
+#define IGC_COLC	0x04028  /* Collision Count - R/clr */
+#define IGC_RERC	0x0402C  /* Receive Error Count - R/clr */
+#define IGC_DC		0x04030  /* Defer Count - R/clr */
+#define IGC_TNCRS	0x04034  /* Tx-No CRS - R/clr */
+#define IGC_HTDPMC	0x0403C  /* Host Transmit Discarded by MAC - R/clr */
+#define IGC_RLEC	0x04040  /* Receive Length Error Count - R/clr */
+#define IGC_XONRXC	0x04048  /* XON Rx Count - R/clr */
+#define IGC_XONTXC	0x0404C  /* XON Tx Count - R/clr */
+#define IGC_XOFFRXC	0x04050  /* XOFF Rx Count - R/clr */
+#define IGC_XOFFTXC	0x04054  /* XOFF Tx Count - R/clr */
+#define IGC_FCRUC	0x04058  /* Flow Control Rx Unsupported Count- R/clr */
+#define IGC_PRC64	0x0405C  /* Packets Rx (64 bytes) - R/clr */
+#define IGC_PRC127	0x04060  /* Packets Rx (65-127 bytes) - R/clr */
+#define IGC_PRC255	0x04064  /* Packets Rx (128-255 bytes) - R/clr */
+#define IGC_PRC511	0x04068  /* Packets Rx (255-511 bytes) - R/clr */
+#define IGC_PRC1023	0x0406C  /* Packets Rx (512-1023 bytes) - R/clr */
+#define IGC_PRC1522	0x04070  /* Packets Rx (1024-1522 bytes) - R/clr */
+#define IGC_GPRC	0x04074  /* Good Packets Rx Count - R/clr */
+#define IGC_BPRC	0x04078  /* Broadcast Packets Rx Count - R/clr */
+#define IGC_MPRC	0x0407C  /* Multicast Packets Rx Count - R/clr */
+#define IGC_GPTC	0x04080  /* Good Packets Tx Count - R/clr */
+#define IGC_GORCL	0x04088  /* Good Octets Rx Count Low - R/clr */
+#define IGC_GORCH	0x0408C  /* Good Octets Rx Count High - R/clr */
+#define IGC_GOTCL	0x04090  /* Good Octets Tx Count Low - R/clr */
+#define IGC_GOTCH	0x04094  /* Good Octets Tx Count High - R/clr */
+#define IGC_RNBC	0x040A0  /* Rx No Buffers Count - R/clr */
+#define IGC_RUC		0x040A4  /* Rx Undersize Count - R/clr */
+#define IGC_RFC		0x040A8  /* Rx Fragment Count - R/clr */
+#define IGC_ROC		0x040AC  /* Rx Oversize Count - R/clr */
+#define IGC_RJC		0x040B0  /* Rx Jabber Count - R/clr */
+#define IGC_MGTPRC	0x040B4  /* Management Packets Rx Count - R/clr */
+#define IGC_MGTPDC	0x040B8  /* Management Packets Dropped Count - R/clr */
+#define IGC_MGTPTC	0x040BC  /* Management Packets Tx Count - R/clr */
+#define IGC_TORL	0x040C0  /* Total Octets Rx Low - R/clr */
+#define IGC_TORH	0x040C4  /* Total Octets Rx High - R/clr */
+#define IGC_TOTL	0x040C8  /* Total Octets Tx Low - R/clr */
+#define IGC_TOTH	0x040CC  /* Total Octets Tx High - R/clr */
+#define IGC_TPR		0x040D0  /* Total Packets Rx - R/clr */
+#define IGC_TPT		0x040D4  /* Total Packets Tx - R/clr */
+#define IGC_PTC64	0x040D8  /* Packets Tx (64 bytes) - R/clr */
+#define IGC_PTC127	0x040DC  /* Packets Tx (65-127 bytes) - R/clr */
+#define IGC_PTC255	0x040E0  /* Packets Tx (128-255 bytes) - R/clr */
+#define IGC_PTC511	0x040E4  /* Packets Tx (256-511 bytes) - R/clr */
+#define IGC_PTC1023	0x040E8  /* Packets Tx (512-1023 bytes) - R/clr */
+#define IGC_PTC1522	0x040EC  /* Packets Tx (1024-1522 Bytes) - R/clr */
+#define IGC_MPTC	0x040F0  /* Multicast Packets Tx Count - R/clr */
+#define IGC_BPTC	0x040F4  /* Broadcast Packets Tx Count - R/clr */
+#define IGC_TSCTC	0x040F8  /* TCP Segmentation Context Tx - R/clr */
+#define IGC_IAC		0x04100  /* Interrupt Assertion Count */
+#define IGC_RPTHC	0x04104  /* Rx Packets To Host */
+#define IGC_TLPIC	0x04148  /* EEE Tx LPI Count */
+#define IGC_RLPIC	0x0414C  /* EEE Rx LPI Count */
+#define IGC_HGPTC	0x04118  /* Host Good Packets Tx Count */
+#define IGC_RXDMTC	0x04120  /* Rx Descriptor Minimum Threshold Count */
+#define IGC_HGORCL	0x04128  /* Host Good Octets Received Count Low */
+#define IGC_HGORCH	0x0412C  /* Host Good Octets Received Count High */
+#define IGC_HGOTCL	0x04130  /* Host Good Octets Transmit Count Low */
+#define IGC_HGOTCH	0x04134  /* Host Good Octets Transmit Count High */
+#define IGC_LENERRS	0x04138  /* Length Errors Count */
+
+/* Time sync registers */
+#define IGC_TSICR	0x0B66C  /* Time Sync Interrupt Cause */
+#define IGC_TSIM	0x0B674  /* Time Sync Interrupt Mask Register */
+#define IGC_TSAUXC	0x0B640  /* Timesync Auxiliary Control register */
+#define IGC_TSYNCRXCTL	0x0B620  /* Rx Time Sync Control register - RW */
+#define IGC_TSYNCTXCTL	0x0B614  /* Tx Time Sync Control register - RW */
+#define IGC_TSYNCRXCFG	0x05F50  /* Time Sync Rx Configuration - RW */
+#define IGC_TSSDP	0x0003C  /* Time Sync SDP Configuration Register - RW */
+#define IGC_TRGTTIML0	0x0B644 /* Target Time Register 0 Low  - RW */
+#define IGC_TRGTTIMH0	0x0B648 /* Target Time Register 0 High - RW */
+#define IGC_TRGTTIML1	0x0B64C /* Target Time Register 1 Low  - RW */
+#define IGC_TRGTTIMH1	0x0B650 /* Target Time Register 1 High - RW */
+#define IGC_FREQOUT0	0x0B654 /* Frequency Out 0 Control Register - RW */
+#define IGC_FREQOUT1	0x0B658 /* Frequency Out 1 Control Register - RW */
+#define IGC_AUXSTMPL0	0x0B65C /* Auxiliary Time Stamp 0 Register Low  - RO */
+#define IGC_AUXSTMPH0	0x0B660 /* Auxiliary Time Stamp 0 Register High - RO */
+#define IGC_AUXSTMPL1	0x0B664 /* Auxiliary Time Stamp 1 Register Low  - RO */
+#define IGC_AUXSTMPH1	0x0B668 /* Auxiliary Time Stamp 1 Register High - RO */
+
+#define IGC_IMIR(_i)	(0x05A80 + ((_i) * 4))  /* Immediate Interrupt */
+#define IGC_IMIREXT(_i)	(0x05AA0 + ((_i) * 4))  /* Immediate INTR Ext*/
+
+#define IGC_FTQF(_n)	(0x059E0 + (4 * (_n)))  /* 5-tuple Queue Fltr */
+
+/* Transmit Scheduling Registers */
+#define IGC_TQAVCTRL		0x3570
+#define IGC_TXQCTL(_n)		(0x3344 + 0x4 * (_n))
+#define IGC_GTXOFFSET		0x3310
+#define IGC_BASET_L		0x3314
+#define IGC_BASET_H		0x3318
+#define IGC_QBVCYCLET		0x331C
+#define IGC_QBVCYCLET_S		0x3320
+
+#define IGC_STQT(_n)		(0x3324 + 0x4 * (_n))
+#define IGC_ENDQT(_n)		(0x3334 + 0x4 * (_n))
+#define IGC_DTXMXPKTSZ		0x355C
+
+#define IGC_TQAVCC(_n)		(0x3004 + ((_n) * 0x40))
+#define IGC_TQAVHC(_n)		(0x300C + ((_n) * 0x40))
+
+/* System Time Registers */
+#define IGC_SYSTIML	0x0B600  /* System time register Low - RO */
+#define IGC_SYSTIMH	0x0B604  /* System time register High - RO */
+#define IGC_SYSTIMR	0x0B6F8  /* System time register Residue */
+#define IGC_TIMINCA	0x0B608  /* Increment attributes register - RW */
+
+/* TX Timestamp Low */
+#define IGC_TXSTMPL_0		0x0B618
+#define IGC_TXSTMPL_1		0x0B698
+#define IGC_TXSTMPL_2		0x0B6B8
+#define IGC_TXSTMPL_3		0x0B6D8
+
+/* TX Timestamp High */
+#define IGC_TXSTMPH_0		0x0B61C
+#define IGC_TXSTMPH_1		0x0B69C
+#define IGC_TXSTMPH_2		0x0B6BC
+#define IGC_TXSTMPH_3		0x0B6DC
+
+#define IGC_TXSTMPL	0x0B618  /* Tx timestamp value Low - RO */
+#define IGC_TXSTMPH	0x0B61C  /* Tx timestamp value High - RO */
+
+#define IGC_TIMADJ	0x0B60C  /* Time Adjustment Offset Register */
+
+/* PCIe Registers */
+#define IGC_PTM_CTRL		0x12540  /* PTM Control */
+#define IGC_PTM_STAT		0x12544  /* PTM Status */
+#define IGC_PTM_CYCLE_CTRL	0x1254C  /* PTM Cycle Control */
+
+/* PTM Time registers */
+#define IGC_PTM_T1_TIM0_L	0x12558  /* T1 on Timer 0 Low */
+#define IGC_PTM_T1_TIM0_H	0x1255C  /* T1 on Timer 0 High */
+
+#define IGC_PTM_CURR_T2_L	0x1258C  /* Current T2 Low */
+#define IGC_PTM_CURR_T2_H	0x12590  /* Current T2 High */
+#define IGC_PTM_PREV_T2_L	0x12584  /* Previous T2 Low */
+#define IGC_PTM_PREV_T2_H	0x12588  /* Previous T2 High */
+#define IGC_PTM_PREV_T4M1	0x12578  /* T4 Minus T1 on previous PTM Cycle */
+#define IGC_PTM_CURR_T4M1	0x1257C  /* T4 Minus T1 on this PTM Cycle */
+#define IGC_PTM_PREV_T3M2	0x12580  /* T3 Minus T2 on previous PTM Cycle */
+#define IGC_PTM_TDELAY		0x12594  /* PTM PCIe Link Delay */
+
+#define IGC_PCIE_DIG_DELAY	0x12550  /* PCIe Digital Delay */
+#define IGC_PCIE_PHY_DELAY	0x12554  /* PCIe PHY Delay */
+
+/* Management registers */
+#define IGC_MANC	0x05820  /* Management Control - RW */
+
+/* Shadow Ram Write Register - RW */
+#define IGC_SRWR	0x12018
+
+/* Wake Up registers */
+#define IGC_WUC		0x05800  /* Wakeup Control - RW */
+#define IGC_WUFC	0x05808  /* Wakeup Filter Control - RW */
+#define IGC_WUS		0x05810  /* Wakeup Status - R/W1C */
+#define IGC_WUPL	0x05900  /* Wakeup Packet Length - RW */
+#define IGC_WUFC_EXT	0x0580C  /* Wakeup Filter Control Register Extended - RW */
+
+/* Wake Up packet memory */
+#define IGC_WUPM_REG(_i)	(0x05A00 + ((_i) * 4))
+
+/* Energy Efficient Ethernet "EEE" registers */
+#define IGC_EEER	0x0E30 /* Energy Efficient Ethernet "EEE"*/
+#define IGC_IPCNFG	0x0E38 /* Internal PHY Configuration */
+#define IGC_EEE_SU	0x0E34 /* EEE Setup */
+
+/* LTR registers */
+#define IGC_LTRC	0x01A0 /* Latency Tolerance Reporting Control */
+#define IGC_LTRMINV	0x5BB0 /* LTR Minimum Value */
+#define IGC_LTRMAXV	0x5BB4 /* LTR Maximum Value */
+
+/* forward declaration */
+struct igc_hw;
+u32 igc_rd32(struct igc_hw *hw, u32 reg);
+
+/* write operations, indexed using DWORDS */
+#define wr32(reg, val) \
+do { \
+	u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
+	if (!IGC_REMOVED(hw_addr)) \
+		writel((val), &hw_addr[(reg)]); \
+} while (0)
+
+#define rd32(reg) (igc_rd32(hw, reg))
+
+#define wrfl() ((void)rd32(IGC_STATUS))
+
+#define array_wr32(reg, offset, value) \
+	wr32((reg) + ((offset) << 2), (value))
+
+#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+
+#define IGC_REMOVED(h) unlikely(!(h))
+
+#endif
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
new file mode 100644
index 0000000000..22cefb1eee
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2019 Intel Corporation */
+
+#include "igc.h"
+#include "igc_hw.h"
+#include "igc_tsn.h"
+
+static bool is_any_launchtime(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		if (ring->launchtime_enable)
+			return true;
+	}
+
+	return false;
+}
+
+static bool is_cbs_enabled(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		if (ring->cbs_enable)
+			return true;
+	}
+
+	return false;
+}
+
+static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
+{
+	unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED;
+
+	if (adapter->taprio_offload_enable)
+		new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+	if (is_any_launchtime(adapter))
+		new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+	if (is_cbs_enabled(adapter))
+		new_flags |= IGC_FLAG_TSN_QAV_ENABLED;
+
+	return new_flags;
+}
+
+void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u16 txoffset;
+
+	if (!is_any_launchtime(adapter))
+		return;
+
+	switch (adapter->link_speed) {
+	case SPEED_10:
+		txoffset = IGC_TXOFFSET_SPEED_10;
+		break;
+	case SPEED_100:
+		txoffset = IGC_TXOFFSET_SPEED_100;
+		break;
+	case SPEED_1000:
+		txoffset = IGC_TXOFFSET_SPEED_1000;
+		break;
+	case SPEED_2500:
+		txoffset = IGC_TXOFFSET_SPEED_2500;
+		break;
+	default:
+		txoffset = 0;
+		break;
+	}
+
+	wr32(IGC_GTXOFFSET, txoffset);
+}
+
+/* Returns the TSN specific registers to their default values after
+ * the adapter is reset.
+ */
+static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 tqavctrl;
+	int i;
+
+	wr32(IGC_GTXOFFSET, 0);
+	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
+
+	tqavctrl = rd32(IGC_TQAVCTRL);
+	tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
+		      IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS);
+
+	wr32(IGC_TQAVCTRL, tqavctrl);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		wr32(IGC_TXQCTL(i), 0);
+		wr32(IGC_STQT(i), 0);
+		wr32(IGC_ENDQT(i), NSEC_PER_SEC);
+	}
+
+	wr32(IGC_QBVCYCLET_S, 0);
+	wr32(IGC_QBVCYCLET, NSEC_PER_SEC);
+
+	adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED;
+
+	return 0;
+}
+
+static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 tqavctrl, baset_l, baset_h;
+	u32 sec, nsec, cycle;
+	ktime_t base_time, systim;
+	int i;
+
+	wr32(IGC_TSAUXC, 0);
+	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
+	wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+		u32 txqctl = 0;
+		u16 cbs_value;
+		u32 tqavcc;
+
+		wr32(IGC_STQT(i), ring->start_time);
+		wr32(IGC_ENDQT(i), ring->end_time);
+
+		if (adapter->taprio_offload_enable) {
+			/* If taprio_offload_enable is set we are in "taprio"
+			 * mode and we need to be strict about the
+			 * cycles: only transmit a packet if it can be
+			 * completed during that cycle.
+			 *
+			 * If taprio_offload_enable is NOT true when
+			 * enabling TSN offload, the cycle should have
+			 * no external effects, but is only used internally
+			 * to adapt the base time register after a second
+			 * has passed.
+			 *
+			 * Enabling strict mode in this case would
+			 * unnecessarily prevent the transmission of
+			 * certain packets (i.e. at the boundary of a
+			 * second) and thus interfere with the launchtime
+			 * feature that promises transmission at a
+			 * certain point in time.
+			 */
+			txqctl |= IGC_TXQCTL_STRICT_CYCLE |
+				IGC_TXQCTL_STRICT_END;
+		}
+
+		if (ring->launchtime_enable)
+			txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
+
+		/* Skip configuring CBS for Q2 and Q3 */
+		if (i > 1)
+			goto skip_cbs;
+
+		if (ring->cbs_enable) {
+			if (i == 0)
+				txqctl |= IGC_TXQCTL_QAV_SEL_CBS0;
+			else
+				txqctl |= IGC_TXQCTL_QAV_SEL_CBS1;
+
+			/* According to i225 datasheet section 7.5.2.7, we
+			 * should set the 'idleSlope' field from TQAVCC
+			 * register following the equation:
+			 *
+			 * value = link-speed   0x7736 * BW * 0.2
+			 *         ---------- *  -----------------         (E1)
+			 *          100Mbps            2.5
+			 *
+			 * Note that 'link-speed' is in Mbps.
+			 *
+			 * 'BW' is the percentage bandwidth out of full
+			 * link speed which can be found with the
+			 * following equation. Note that idleSlope here
+			 * is the parameter from this function
+			 * which is in kbps.
+			 *
+			 *     BW =     idleSlope
+			 *          -----------------                      (E2)
+			 *          link-speed * 1000
+			 *
+			 * That said, we can come up with a generic
+			 * equation to calculate the value we should set
+			 * it TQAVCC register by replacing 'BW' in E1 by E2.
+			 * The resulting equation is:
+			 *
+			 * value = link-speed * 0x7736 * idleSlope * 0.2
+			 *         -------------------------------------   (E3)
+			 *             100 * 2.5 * link-speed * 1000
+			 *
+			 * 'link-speed' is present in both sides of the
+			 * fraction so it is canceled out. The final
+			 * equation is the following:
+			 *
+			 *     value = idleSlope * 61036
+			 *             -----------------                   (E4)
+			 *                  2500000
+			 *
+			 * NOTE: For i225, given the above, we can see
+			 *       that idleslope is represented in
+			 *       40.959433 kbps units by the value at
+			 *       the TQAVCC register (2.5Gbps / 61036),
+			 *       which reduces the granularity for
+			 *       idleslope increments.
+			 *
+			 * In i225 controller, the sendSlope and loCredit
+			 * parameters from CBS are not configurable
+			 * by software so we don't do any
+			 * 'controller configuration' in respect to
+			 * these parameters.
+			 */
+			cbs_value = DIV_ROUND_UP_ULL(ring->idleslope
+						     * 61036ULL, 2500000);
+
+			tqavcc = rd32(IGC_TQAVCC(i));
+			tqavcc &= ~IGC_TQAVCC_IDLESLOPE_MASK;
+			tqavcc |= cbs_value | IGC_TQAVCC_KEEP_CREDITS;
+			wr32(IGC_TQAVCC(i), tqavcc);
+
+			wr32(IGC_TQAVHC(i),
+			     0x80000000 + ring->hicredit * 0x7736);
+		} else {
+			/* Disable any CBS for the queue */
+			txqctl &= ~(IGC_TXQCTL_QAV_SEL_MASK);
+
+			/* Set idleSlope to zero. */
+			tqavcc = rd32(IGC_TQAVCC(i));
+			tqavcc &= ~(IGC_TQAVCC_IDLESLOPE_MASK |
+				    IGC_TQAVCC_KEEP_CREDITS);
+			wr32(IGC_TQAVCC(i), tqavcc);
+
+			/* Set hiCredit to zero. */
+			wr32(IGC_TQAVHC(i), 0);
+		}
+skip_cbs:
+		wr32(IGC_TXQCTL(i), txqctl);
+	}
+
+	tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS;
+
+	tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV;
+
+	adapter->qbv_count++;
+
+	cycle = adapter->cycle_time;
+	base_time = adapter->base_time;
+
+	nsec = rd32(IGC_SYSTIML);
+	sec = rd32(IGC_SYSTIMH);
+
+	systim = ktime_set(sec, nsec);
+	if (ktime_compare(systim, base_time) > 0) {
+		s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle);
+
+		base_time = ktime_add_ns(base_time, (n + 1) * cycle);
+
+		/* Increase the counter if scheduling into the past while
+		 * Gate Control List (GCL) is running.
+		 */
+		if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
+		    (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) &&
+		    (adapter->qbv_count > 1))
+			adapter->qbv_config_change_errors++;
+	} else {
+		if (igc_is_device_id_i226(hw)) {
+			ktime_t adjust_time, expires_time;
+
+		       /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit
+			* has to be configured before the cycle time and base time.
+			* Tx won't hang if a GCL is already running,
+			* so in this case we don't need to set FutScdDis.
+			*/
+			if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L)))
+				tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS;
+
+			nsec = rd32(IGC_SYSTIML);
+			sec = rd32(IGC_SYSTIMH);
+			systim = ktime_set(sec, nsec);
+
+			adjust_time = adapter->base_time;
+			expires_time = ktime_sub_ns(adjust_time, systim);
+			hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL);
+		}
+	}
+
+	wr32(IGC_TQAVCTRL, tqavctrl);
+
+	wr32(IGC_QBVCYCLET_S, cycle);
+	wr32(IGC_QBVCYCLET, cycle);
+
+	baset_h = div_s64_rem(base_time, NSEC_PER_SEC, &baset_l);
+	wr32(IGC_BASET_H, baset_h);
+
+	/* In i226, Future base time is only supported when FutScdDis bit
+	 * is enabled and only active for re-configuration.
+	 * In this case, initialize the base time with zero to create
+	 * "re-configuration" scenario then only set the desired base time.
+	 */
+	if (tqavctrl & IGC_TQAVCTRL_FUTSCDDIS)
+		wr32(IGC_BASET_L, 0);
+	wr32(IGC_BASET_L, baset_l);
+
+	return 0;
+}
+
+int igc_tsn_reset(struct igc_adapter *adapter)
+{
+	unsigned int new_flags;
+	int err = 0;
+
+	new_flags = igc_tsn_new_flags(adapter);
+
+	if (!(new_flags & IGC_FLAG_TSN_ANY_ENABLED))
+		return igc_tsn_disable_offload(adapter);
+
+	err = igc_tsn_enable_offload(adapter);
+	if (err < 0)
+		return err;
+
+	adapter->flags = new_flags;
+
+	return err;
+}
+
+int igc_tsn_offload_apply(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	/* Per I225/6 HW Design Section 7.5.2.1, transmit mode
+	 * cannot be changed dynamically. Require reset the adapter.
+	 */
+	if (netif_running(adapter->netdev) &&
+	    (igc_is_device_id_i225(hw) || !adapter->qbv_count)) {
+		schedule_work(&adapter->reset_task);
+		return 0;
+	}
+
+	igc_tsn_reset(adapter);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h
new file mode 100644
index 0000000000..b53e6af560
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2020 Intel Corporation */
+
+#ifndef _IGC_TSN_H_
+#define _IGC_TSN_H_
+
+int igc_tsn_offload_apply(struct igc_adapter *adapter);
+int igc_tsn_reset(struct igc_adapter *adapter);
+void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter);
+
+#endif /* _IGC_BASE_H */
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
new file mode 100644
index 0000000000..e27af72aad
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Intel Corporation. */
+
+#include <linux/if_vlan.h>
+#include <net/xdp_sock_drv.h>
+
+#include "igc.h"
+#include "igc_xdp.h"
+
+int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
+		     struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = adapter->netdev;
+	bool if_running = netif_running(dev);
+	struct bpf_prog *old_prog;
+
+	if (dev->mtu > ETH_DATA_LEN) {
+		/* For now, the driver doesn't support XDP functionality with
+		 * jumbo frames so we return error.
+		 */
+		NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (if_running)
+		igc_close(dev);
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (prog)
+		xdp_features_set_redirect_target(dev, true);
+	else
+		xdp_features_clear_redirect_target(dev);
+
+	if (if_running)
+		igc_open(dev);
+
+	return 0;
+}
+
+static int igc_xdp_enable_pool(struct igc_adapter *adapter,
+			       struct xsk_buff_pool *pool, u16 queue_id)
+{
+	struct net_device *ndev = adapter->netdev;
+	struct device *dev = &adapter->pdev->dev;
+	struct igc_ring *rx_ring, *tx_ring;
+	struct napi_struct *napi;
+	bool needs_reset;
+	u32 frame_size;
+	int err;
+
+	if (queue_id >= adapter->num_rx_queues ||
+	    queue_id >= adapter->num_tx_queues)
+		return -EINVAL;
+
+	frame_size = xsk_pool_get_rx_frame_size(pool);
+	if (frame_size < ETH_FRAME_LEN + VLAN_HLEN * 2) {
+		/* When XDP is enabled, the driver doesn't support frames that
+		 * span over multiple buffers. To avoid that, we check if xsk
+		 * frame size is big enough to fit the max ethernet frame size
+		 * + vlan double tagging.
+		 */
+		return -EOPNOTSUPP;
+	}
+
+	err = xsk_pool_dma_map(pool, dev, IGC_RX_DMA_ATTR);
+	if (err) {
+		netdev_err(ndev, "Failed to map xsk pool\n");
+		return err;
+	}
+
+	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
+
+	rx_ring = adapter->rx_ring[queue_id];
+	tx_ring = adapter->tx_ring[queue_id];
+	/* Rx and Tx rings share the same napi context. */
+	napi = &rx_ring->q_vector->napi;
+
+	if (needs_reset) {
+		igc_disable_rx_ring(rx_ring);
+		igc_disable_tx_ring(tx_ring);
+		napi_disable(napi);
+	}
+
+	set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
+	set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
+
+	if (needs_reset) {
+		napi_enable(napi);
+		igc_enable_rx_ring(rx_ring);
+		igc_enable_tx_ring(tx_ring);
+
+		err = igc_xsk_wakeup(ndev, queue_id, XDP_WAKEUP_RX);
+		if (err) {
+			xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
+{
+	struct igc_ring *rx_ring, *tx_ring;
+	struct xsk_buff_pool *pool;
+	struct napi_struct *napi;
+	bool needs_reset;
+
+	if (queue_id >= adapter->num_rx_queues ||
+	    queue_id >= adapter->num_tx_queues)
+		return -EINVAL;
+
+	pool = xsk_get_pool_from_qid(adapter->netdev, queue_id);
+	if (!pool)
+		return -EINVAL;
+
+	needs_reset = netif_running(adapter->netdev) && igc_xdp_is_enabled(adapter);
+
+	rx_ring = adapter->rx_ring[queue_id];
+	tx_ring = adapter->tx_ring[queue_id];
+	/* Rx and Tx rings share the same napi context. */
+	napi = &rx_ring->q_vector->napi;
+
+	if (needs_reset) {
+		igc_disable_rx_ring(rx_ring);
+		igc_disable_tx_ring(tx_ring);
+		napi_disable(napi);
+	}
+
+	xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
+	clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
+	clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
+
+	if (needs_reset) {
+		napi_enable(napi);
+		igc_enable_rx_ring(rx_ring);
+		igc_enable_tx_ring(tx_ring);
+	}
+
+	return 0;
+}
+
+int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool,
+		       u16 queue_id)
+{
+	return pool ? igc_xdp_enable_pool(adapter, pool, queue_id) :
+		      igc_xdp_disable_pool(adapter, queue_id);
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.h b/drivers/net/ethernet/intel/igc/igc_xdp.h
new file mode 100644
index 0000000000..a74e5487d1
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, Intel Corporation. */
+
+#ifndef _IGC_XDP_H_
+#define _IGC_XDP_H_
+
+int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
+		     struct netlink_ext_ack *extack);
+int igc_xdp_setup_pool(struct igc_adapter *adapter, struct xsk_buff_pool *pool,
+		       u16 queue_id);
+
+static inline bool igc_xdp_is_enabled(struct igc_adapter *adapter)
+{
+	return !!adapter->xdp_prog;
+}
+
+#endif /* _IGC_XDP_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
new file mode 100644
index 0000000000..4fb0d9e3f2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 1999 - 2018 Intel Corporation.
+#
+# Makefile for the Intel(R) 10GbE PCI Express ethernet driver
+#
+
+obj-$(CONFIG_IXGBE) += ixgbe.o
+
+ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
+              ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \
+              ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o \
+              ixgbe_xsk.o
+
+ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o \
+                              ixgbe_dcb_82599.o ixgbe_dcb_nl.o
+
+ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
+ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
+ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
+ixgbe-$(CONFIG_IXGBE_IPSEC) += ixgbe_ipsec.o
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
new file mode 100644
index 0000000000..b6f0376e42
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -0,0 +1,1099 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_H_
+#define _IXGBE_H_
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/cpumask.h>
+#include <linux/if_vlan.h>
+#include <linux/jiffies.h>
+#include <linux/phy.h>
+
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
+
+#include "ixgbe_type.h"
+#include "ixgbe_common.h"
+#include "ixgbe_dcb.h"
+#if IS_ENABLED(CONFIG_FCOE)
+#define IXGBE_FCOE
+#include "ixgbe_fcoe.h"
+#endif /* IS_ENABLED(CONFIG_FCOE) */
+#ifdef CONFIG_IXGBE_DCA
+#include <linux/dca.h>
+#endif
+#include "ixgbe_ipsec.h"
+
+#include <net/xdp.h>
+
+/* common prefix used by pr_<> macros */
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+/* TX/RX descriptor defines */
+#define IXGBE_DEFAULT_TXD		    512
+#define IXGBE_DEFAULT_TX_WORK		    256
+#define IXGBE_MAX_TXD_82598		   4096
+#define IXGBE_MAX_TXD_82599		   8192
+#define IXGBE_MAX_TXD_X540		   8192
+#define IXGBE_MAX_TXD_X550		  32768
+#define IXGBE_MIN_TXD			     64
+
+#if (PAGE_SIZE < 8192)
+#define IXGBE_DEFAULT_RXD		    512
+#else
+#define IXGBE_DEFAULT_RXD		    128
+#endif
+#define IXGBE_MAX_RXD_82598		   4096
+#define IXGBE_MAX_RXD_82599		   8192
+#define IXGBE_MAX_RXD_X540		   8192
+#define IXGBE_MAX_RXD_X550		  32768
+#define IXGBE_MIN_RXD			     64
+
+/* flow control */
+#define IXGBE_MIN_FCRTL			   0x40
+#define IXGBE_MAX_FCRTL			0x7FF80
+#define IXGBE_MIN_FCRTH			  0x600
+#define IXGBE_MAX_FCRTH			0x7FFF0
+#define IXGBE_DEFAULT_FCPAUSE		 0xFFFF
+#define IXGBE_MIN_FCPAUSE		      0
+#define IXGBE_MAX_FCPAUSE		 0xFFFF
+
+/* Supported Rx Buffer Sizes */
+#define IXGBE_RXBUFFER_256    256  /* Used for skb receive header */
+#define IXGBE_RXBUFFER_1536  1536
+#define IXGBE_RXBUFFER_2K    2048
+#define IXGBE_RXBUFFER_3K    3072
+#define IXGBE_RXBUFFER_4K    4096
+#define IXGBE_MAX_RXBUFFER  16384  /* largest size for a single descriptor */
+
+#define IXGBE_PKT_HDR_PAD   (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2))
+
+/* Attempt to maximize the headroom available for incoming frames.  We
+ * use a 2K buffer for receives and need 1536/1534 to store the data for
+ * the frame.  This leaves us with 512 bytes of room.  From that we need
+ * to deduct the space needed for the shared info and the padding needed
+ * to IP align the frame.
+ *
+ * Note: For cache line sizes 256 or larger this value is going to end
+ *	 up negative.  In these cases we should fall back to the 3K
+ *	 buffers.
+ */
+#if (PAGE_SIZE < 8192)
+#define IXGBE_MAX_2K_FRAME_BUILD_SKB (IXGBE_RXBUFFER_1536 - NET_IP_ALIGN)
+#define IXGBE_2K_TOO_SMALL_WITH_PADDING \
+((NET_SKB_PAD + IXGBE_RXBUFFER_1536) > SKB_WITH_OVERHEAD(IXGBE_RXBUFFER_2K))
+
+static inline int ixgbe_compute_pad(int rx_buf_len)
+{
+	int page_size, pad_size;
+
+	page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
+	pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len;
+
+	return pad_size;
+}
+
+static inline int ixgbe_skb_pad(void)
+{
+	int rx_buf_len;
+
+	/* If a 2K buffer cannot handle a standard Ethernet frame then
+	 * optimize padding for a 3K buffer instead of a 1.5K buffer.
+	 *
+	 * For a 3K buffer we need to add enough padding to allow for
+	 * tailroom due to NET_IP_ALIGN possibly shifting us out of
+	 * cache-line alignment.
+	 */
+	if (IXGBE_2K_TOO_SMALL_WITH_PADDING)
+		rx_buf_len = IXGBE_RXBUFFER_3K + SKB_DATA_ALIGN(NET_IP_ALIGN);
+	else
+		rx_buf_len = IXGBE_RXBUFFER_1536;
+
+	/* if needed make room for NET_IP_ALIGN */
+	rx_buf_len -= NET_IP_ALIGN;
+
+	return ixgbe_compute_pad(rx_buf_len);
+}
+
+#define IXGBE_SKB_PAD	ixgbe_skb_pad()
+#else
+#define IXGBE_SKB_PAD	(NET_SKB_PAD + NET_IP_ALIGN)
+#endif
+
+/*
+ * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
+ * reserve 64 more, and skb_shared_info adds an additional 320 bytes more,
+ * this adds up to 448 bytes of extra data.
+ *
+ * Since netdev_alloc_skb now allocates a page fragment we can use a value
+ * of 256 and the resultant skb will have a truesize of 960 or less.
+ */
+#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_256
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IXGBE_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+
+#define IXGBE_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+enum ixgbe_tx_flags {
+	/* cmd_type flags */
+	IXGBE_TX_FLAGS_HW_VLAN	= 0x01,
+	IXGBE_TX_FLAGS_TSO	= 0x02,
+	IXGBE_TX_FLAGS_TSTAMP	= 0x04,
+
+	/* olinfo flags */
+	IXGBE_TX_FLAGS_CC	= 0x08,
+	IXGBE_TX_FLAGS_IPV4	= 0x10,
+	IXGBE_TX_FLAGS_CSUM	= 0x20,
+	IXGBE_TX_FLAGS_IPSEC	= 0x40,
+
+	/* software defined flags */
+	IXGBE_TX_FLAGS_SW_VLAN	= 0x80,
+	IXGBE_TX_FLAGS_FCOE	= 0x100,
+};
+
+/* VLAN info */
+#define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT  29
+#define IXGBE_TX_FLAGS_VLAN_SHIFT	16
+
+#define IXGBE_MAX_VF_MC_ENTRIES         30
+#define IXGBE_MAX_VF_FUNCTIONS          64
+#define IXGBE_MAX_VFTA_ENTRIES          128
+#define MAX_EMULATION_MAC_ADDRS         16
+#define IXGBE_MAX_PF_MACVLANS           15
+#define VMDQ_P(p)   ((p) + adapter->ring_feature[RING_F_VMDQ].offset)
+#define IXGBE_82599_VF_DEVICE_ID        0x10ED
+#define IXGBE_X540_VF_DEVICE_ID         0x1515
+
+#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter)	\
+	{							\
+		u32 current_counter = IXGBE_READ_REG(hw, reg);	\
+		if (current_counter < last_counter)		\
+			counter += 0x100000000LL;		\
+		last_counter = current_counter;			\
+		counter &= 0xFFFFFFFF00000000LL;		\
+		counter |= current_counter;			\
+	}
+
+#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \
+	{								 \
+		u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb);	 \
+		u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb);	 \
+		u64 current_counter = (current_counter_msb << 32) |	 \
+			current_counter_lsb;				 \
+		if (current_counter < last_counter)			 \
+			counter += 0x1000000000LL;			 \
+		last_counter = current_counter;				 \
+		counter &= 0xFFFFFFF000000000LL;			 \
+		counter |= current_counter;				 \
+	}
+
+struct vf_stats {
+	u64 gprc;
+	u64 gorc;
+	u64 gptc;
+	u64 gotc;
+	u64 mprc;
+};
+
+struct vf_data_storage {
+	struct pci_dev *vfdev;
+	unsigned char vf_mac_addresses[ETH_ALEN];
+	u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES];
+	u16 num_vf_mc_hashes;
+	bool clear_to_send;
+	struct vf_stats vfstats;
+	struct vf_stats last_vfstats;
+	struct vf_stats saved_rst_vfstats;
+	bool pf_set_mac;
+	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+	u16 pf_qos;
+	u16 tx_rate;
+	int link_enable;
+	int link_state;
+	u8 spoofchk_enabled;
+	bool rss_query_enabled;
+	u8 trusted;
+	int xcast_mode;
+	unsigned int vf_api;
+	u8 primary_abort_count;
+};
+
+enum ixgbevf_xcast_modes {
+	IXGBEVF_XCAST_MODE_NONE = 0,
+	IXGBEVF_XCAST_MODE_MULTI,
+	IXGBEVF_XCAST_MODE_ALLMULTI,
+	IXGBEVF_XCAST_MODE_PROMISC,
+};
+
+struct vf_macvlans {
+	struct list_head l;
+	int vf;
+	bool free;
+	bool is_macvlan;
+	u8 vf_macvlan[ETH_ALEN];
+};
+
+#define IXGBE_MAX_TXD_PWR	14
+#define IXGBE_MAX_DATA_PER_TXD	(1u << IXGBE_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD)
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer */
+struct ixgbe_tx_buffer {
+	union ixgbe_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	union {
+		struct sk_buff *skb;
+		struct xdp_frame *xdpf;
+	};
+	unsigned int bytecount;
+	unsigned short gso_segs;
+	__be16 protocol;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct ixgbe_rx_buffer {
+	union {
+		struct {
+			struct sk_buff *skb;
+			dma_addr_t dma;
+			struct page *page;
+			__u32 page_offset;
+			__u16 pagecnt_bias;
+		};
+		struct {
+			bool discard;
+			struct xdp_buff *xdp;
+		};
+	};
+};
+
+struct ixgbe_queue_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+struct ixgbe_tx_queue_stats {
+	u64 restart_queue;
+	u64 tx_busy;
+	u64 tx_done_old;
+};
+
+struct ixgbe_rx_queue_stats {
+	u64 rsc_count;
+	u64 rsc_flush;
+	u64 non_eop_descs;
+	u64 alloc_rx_page;
+	u64 alloc_rx_page_failed;
+	u64 alloc_rx_buff_failed;
+	u64 csum_err;
+};
+
+#define IXGBE_TS_HDR_LEN 8
+
+enum ixgbe_ring_state_t {
+	__IXGBE_RX_3K_BUFFER,
+	__IXGBE_RX_BUILD_SKB_ENABLED,
+	__IXGBE_RX_RSC_ENABLED,
+	__IXGBE_RX_CSUM_UDP_ZERO_ERR,
+	__IXGBE_RX_FCOE,
+	__IXGBE_TX_FDIR_INIT_DONE,
+	__IXGBE_TX_XPS_INIT_DONE,
+	__IXGBE_TX_DETECT_HANG,
+	__IXGBE_HANG_CHECK_ARMED,
+	__IXGBE_TX_XDP_RING,
+	__IXGBE_TX_DISABLED,
+};
+
+#define ring_uses_build_skb(ring) \
+	test_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+struct ixgbe_fwd_adapter {
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	struct net_device *netdev;
+	unsigned int tx_base_queue;
+	unsigned int rx_base_queue;
+	int pool;
+};
+
+#define check_for_tx_hang(ring) \
+	test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+	set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+	clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state)
+#define ring_is_rsc_enabled(ring) \
+	test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define set_ring_rsc_enabled(ring) \
+	set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define clear_ring_rsc_enabled(ring) \
+	clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define ring_is_xdp(ring) \
+	test_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
+#define set_ring_xdp(ring) \
+	set_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
+#define clear_ring_xdp(ring) \
+	clear_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
+struct ixgbe_ring {
+	struct ixgbe_ring *next;	/* pointer to next ring in q_vector */
+	struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
+	struct net_device *netdev;	/* netdev ring belongs to */
+	struct bpf_prog *xdp_prog;
+	struct device *dev;		/* device for DMA mapping */
+	void *desc;			/* descriptor ring memory */
+	union {
+		struct ixgbe_tx_buffer *tx_buffer_info;
+		struct ixgbe_rx_buffer *rx_buffer_info;
+	};
+	unsigned long state;
+	u8 __iomem *tail;
+	dma_addr_t dma;			/* phys. address of descriptor ring */
+	unsigned int size;		/* length in bytes */
+
+	u16 count;			/* amount of descriptors */
+
+	u8 queue_index; /* needed for multiqueue queue management */
+	u8 reg_idx;			/* holds the special value that gets
+					 * the hardware register offset
+					 * associated with this ring, which is
+					 * different for DCB and RSS modes
+					 */
+	u16 next_to_use;
+	u16 next_to_clean;
+
+	unsigned long last_rx_timestamp;
+
+	union {
+		u16 next_to_alloc;
+		struct {
+			u8 atr_sample_rate;
+			u8 atr_count;
+		};
+	};
+
+	u8 dcb_tc;
+	struct ixgbe_queue_stats stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct ixgbe_tx_queue_stats tx_stats;
+		struct ixgbe_rx_queue_stats rx_stats;
+	};
+	u16 rx_offset;
+	struct xdp_rxq_info xdp_rxq;
+	spinlock_t tx_lock;	/* used in XDP mode */
+	struct xsk_buff_pool *xsk_pool;
+	u16 ring_idx;		/* {rx,tx,xdp}_ring back reference idx */
+	u16 rx_buf_len;
+} ____cacheline_internodealigned_in_smp;
+
+enum ixgbe_ring_f_enum {
+	RING_F_NONE = 0,
+	RING_F_VMDQ,  /* SR-IOV uses the same ring feature */
+	RING_F_RSS,
+	RING_F_FDIR,
+#ifdef IXGBE_FCOE
+	RING_F_FCOE,
+#endif /* IXGBE_FCOE */
+
+	RING_F_ARRAY_SIZE      /* must be last in enum set */
+};
+
+#define IXGBE_MAX_RSS_INDICES		16
+#define IXGBE_MAX_RSS_INDICES_X550	63
+#define IXGBE_MAX_VMDQ_INDICES		64
+#define IXGBE_MAX_FDIR_INDICES		63	/* based on q_vector limit */
+#define IXGBE_MAX_FCOE_INDICES		8
+#define MAX_RX_QUEUES			(IXGBE_MAX_FDIR_INDICES + 1)
+#define MAX_TX_QUEUES			(IXGBE_MAX_FDIR_INDICES + 1)
+#define IXGBE_MAX_XDP_QS		(IXGBE_MAX_FDIR_INDICES + 1)
+#define IXGBE_MAX_L2A_QUEUES		4
+#define IXGBE_BAD_L2A_QUEUE		3
+#define IXGBE_MAX_MACVLANS		63
+
+DECLARE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key);
+
+struct ixgbe_ring_feature {
+	u16 limit;	/* upper limit on feature indices */
+	u16 indices;	/* current value of indices */
+	u16 mask;	/* Mask used for feature to ring mapping */
+	u16 offset;	/* offset to start of feature */
+} ____cacheline_internodealigned_in_smp;
+
+#define IXGBE_82599_VMDQ_8Q_MASK 0x78
+#define IXGBE_82599_VMDQ_4Q_MASK 0x7C
+#define IXGBE_82599_VMDQ_2Q_MASK 0x7E
+
+/*
+ * FCoE requires that all Rx buffers be over 2200 bytes in length.  Since
+ * this is twice the size of a half page we need to double the page order
+ * for FCoE enabled Rx queues.
+ */
+static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring)
+{
+	if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state))
+		return IXGBE_RXBUFFER_3K;
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_build_skb(ring))
+		return IXGBE_MAX_2K_FRAME_BUILD_SKB;
+#endif
+	return IXGBE_RXBUFFER_2K;
+}
+
+static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (test_bit(__IXGBE_RX_3K_BUFFER, &ring->state))
+		return 1;
+#endif
+	return 0;
+}
+#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
+
+#define IXGBE_ITR_ADAPTIVE_MIN_INC	2
+#define IXGBE_ITR_ADAPTIVE_MIN_USECS	10
+#define IXGBE_ITR_ADAPTIVE_MAX_USECS	126
+#define IXGBE_ITR_ADAPTIVE_LATENCY	0x80
+#define IXGBE_ITR_ADAPTIVE_BULK		0x00
+
+struct ixgbe_ring_container {
+	struct ixgbe_ring *ring;	/* pointer to linked list of rings */
+	unsigned long next_update;	/* jiffies value of last update */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u16 work_limit;			/* total work allowed per interrupt */
+	u8 count;			/* total number of rings in vector */
+	u8 itr;				/* current ITR setting for ring */
+};
+
+/* iterator for handling rings in ring container */
+#define ixgbe_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \
+			      ? 8 : 1)
+#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS
+
+/* MAX_Q_VECTORS of these are allocated,
+ * but we only use one per queue-specific vector.
+ */
+struct ixgbe_q_vector {
+	struct ixgbe_adapter *adapter;
+#ifdef CONFIG_IXGBE_DCA
+	int cpu;	    /* CPU for DCA */
+#endif
+	u16 v_idx;		/* index of q_vector within array, also used for
+				 * finding the bit in EICR and friends that
+				 * represents the vector for this ring */
+	u16 itr;		/* Interrupt throttle rate written to EITR */
+	struct ixgbe_ring_container rx, tx;
+
+	struct napi_struct napi;
+	cpumask_t affinity_mask;
+	int numa_node;
+	struct rcu_head rcu;	/* to avoid race with update stats on free */
+	char name[IFNAMSIZ + 9];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct ixgbe_ring ring[] ____cacheline_internodealigned_in_smp;
+};
+
+#ifdef CONFIG_IXGBE_HWMON
+
+#define IXGBE_HWMON_TYPE_LOC		0
+#define IXGBE_HWMON_TYPE_TEMP		1
+#define IXGBE_HWMON_TYPE_CAUTION	2
+#define IXGBE_HWMON_TYPE_MAX		3
+
+struct hwmon_attr {
+	struct device_attribute dev_attr;
+	struct ixgbe_hw *hw;
+	struct ixgbe_thermal_diode_data *sensor;
+	char name[12];
+};
+
+struct hwmon_buff {
+	struct attribute_group group;
+	const struct attribute_group *groups[2];
+	struct attribute *attrs[IXGBE_MAX_SENSORS * 4 + 1];
+	struct hwmon_attr hwmon_list[IXGBE_MAX_SENSORS * 4];
+	unsigned int n_hwmon;
+};
+#endif /* CONFIG_IXGBE_HWMON */
+
+/*
+ * microsecond values for various ITR rates shifted by 2 to fit itr register
+ * with the first 3 bits reserved 0
+ */
+#define IXGBE_MIN_RSC_ITR	24
+#define IXGBE_100K_ITR		40
+#define IXGBE_20K_ITR		200
+#define IXGBE_12K_ITR		336
+
+/* ixgbe_test_staterr - tests bits in Rx descriptor status and error fields */
+static inline __le32 ixgbe_test_staterr(union ixgbe_adv_rx_desc *rx_desc,
+					const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+#define IXGBE_RX_DESC(R, i)	    \
+	(&(((union ixgbe_adv_rx_desc *)((R)->desc))[i]))
+#define IXGBE_TX_DESC(R, i)	    \
+	(&(((union ixgbe_adv_tx_desc *)((R)->desc))[i]))
+#define IXGBE_TX_CTXTDESC(R, i)	    \
+	(&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i]))
+
+#define IXGBE_MAX_JUMBO_FRAME_SIZE	9728 /* Maximum Supported Size 9.5KB */
+#ifdef IXGBE_FCOE
+/* Use 3K as the baby jumbo frame size for FCoE */
+#define IXGBE_FCOE_JUMBO_FRAME_SIZE       3072
+#endif /* IXGBE_FCOE */
+
+#define OTHER_VECTOR 1
+#define NON_Q_VECTORS (OTHER_VECTOR)
+
+#define MAX_MSIX_VECTORS_82599 64
+#define MAX_Q_VECTORS_82599 64
+#define MAX_MSIX_VECTORS_82598 18
+#define MAX_Q_VECTORS_82598 16
+
+struct ixgbe_mac_addr {
+	u8 addr[ETH_ALEN];
+	u16 pool;
+	u16 state; /* bitmask */
+};
+
+#define IXGBE_MAC_STATE_DEFAULT		0x1
+#define IXGBE_MAC_STATE_MODIFIED	0x2
+#define IXGBE_MAC_STATE_IN_USE		0x4
+
+#define MAX_Q_VECTORS MAX_Q_VECTORS_82599
+#define MAX_MSIX_COUNT MAX_MSIX_VECTORS_82599
+
+#define MIN_MSIX_Q_VECTORS 1
+#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
+
+/* default to trying for four seconds */
+#define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
+#define IXGBE_SFP_POLL_JIFFIES (2 * HZ)	/* SFP poll every 2 seconds */
+
+#define IXGBE_PRIMARY_ABORT_LIMIT	5
+
+/* board specific private data structure */
+struct ixgbe_adapter {
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
+	struct pci_dev *pdev;
+	struct mii_bus *mii_bus;
+
+	unsigned long state;
+
+	/* Some features need tri-state capability,
+	 * thus the additional *_CAPABLE flags.
+	 */
+	u32 flags;
+#define IXGBE_FLAG_MSI_ENABLED			BIT(1)
+#define IXGBE_FLAG_MSIX_ENABLED			BIT(3)
+#define IXGBE_FLAG_RX_1BUF_CAPABLE		BIT(4)
+#define IXGBE_FLAG_RX_PS_CAPABLE		BIT(5)
+#define IXGBE_FLAG_RX_PS_ENABLED		BIT(6)
+#define IXGBE_FLAG_DCA_ENABLED			BIT(8)
+#define IXGBE_FLAG_DCA_CAPABLE			BIT(9)
+#define IXGBE_FLAG_IMIR_ENABLED			BIT(10)
+#define IXGBE_FLAG_MQ_CAPABLE			BIT(11)
+#define IXGBE_FLAG_DCB_ENABLED			BIT(12)
+#define IXGBE_FLAG_VMDQ_CAPABLE			BIT(13)
+#define IXGBE_FLAG_VMDQ_ENABLED			BIT(14)
+#define IXGBE_FLAG_FAN_FAIL_CAPABLE		BIT(15)
+#define IXGBE_FLAG_NEED_LINK_UPDATE		BIT(16)
+#define IXGBE_FLAG_NEED_LINK_CONFIG		BIT(17)
+#define IXGBE_FLAG_FDIR_HASH_CAPABLE		BIT(18)
+#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE		BIT(19)
+#define IXGBE_FLAG_FCOE_CAPABLE			BIT(20)
+#define IXGBE_FLAG_FCOE_ENABLED			BIT(21)
+#define IXGBE_FLAG_SRIOV_CAPABLE		BIT(22)
+#define IXGBE_FLAG_SRIOV_ENABLED		BIT(23)
+#define IXGBE_FLAG_RX_HWTSTAMP_ENABLED		BIT(25)
+#define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER	BIT(26)
+#define IXGBE_FLAG_DCB_CAPABLE			BIT(27)
+
+	u32 flags2;
+#define IXGBE_FLAG2_RSC_CAPABLE			BIT(0)
+#define IXGBE_FLAG2_RSC_ENABLED			BIT(1)
+#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE		BIT(2)
+#define IXGBE_FLAG2_TEMP_SENSOR_EVENT		BIT(3)
+#define IXGBE_FLAG2_SEARCH_FOR_SFP		BIT(4)
+#define IXGBE_FLAG2_SFP_NEEDS_RESET		BIT(5)
+#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT	BIT(7)
+#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP		BIT(8)
+#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP		BIT(9)
+#define IXGBE_FLAG2_PTP_PPS_ENABLED		BIT(10)
+#define IXGBE_FLAG2_PHY_INTERRUPT		BIT(11)
+#define IXGBE_FLAG2_VLAN_PROMISC		BIT(13)
+#define IXGBE_FLAG2_EEE_CAPABLE			BIT(14)
+#define IXGBE_FLAG2_EEE_ENABLED			BIT(15)
+#define IXGBE_FLAG2_RX_LEGACY			BIT(16)
+#define IXGBE_FLAG2_IPSEC_ENABLED		BIT(17)
+#define IXGBE_FLAG2_VF_IPSEC_ENABLED		BIT(18)
+#define IXGBE_FLAG2_AUTO_DISABLE_VF		BIT(19)
+
+	/* Tx fast path data */
+	int num_tx_queues;
+	u16 tx_itr_setting;
+	u16 tx_work_limit;
+	u64 tx_ipsec;
+
+	/* Rx fast path data */
+	int num_rx_queues;
+	u16 rx_itr_setting;
+	u64 rx_ipsec;
+
+	/* Port number used to identify VXLAN traffic */
+	__be16 vxlan_port;
+	__be16 geneve_port;
+
+	/* XDP */
+	int num_xdp_queues;
+	struct ixgbe_ring *xdp_ring[IXGBE_MAX_XDP_QS];
+	unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled rings */
+
+	/* TX */
+	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
+
+	u64 restart_queue;
+	u64 lsc_int;
+	u32 tx_timeout_count;
+
+	/* RX */
+	struct ixgbe_ring *rx_ring[MAX_RX_QUEUES];
+	int num_rx_pools;		/* == num_rx_queues in 82598 */
+	int num_rx_queues_per_pool;	/* 1 if 82598, can be many if 82599 */
+	u64 hw_csum_rx_error;
+	u64 hw_rx_no_dma_resources;
+	u64 rsc_total_count;
+	u64 rsc_total_flush;
+	u64 non_eop_descs;
+	u32 alloc_rx_page;
+	u32 alloc_rx_page_failed;
+	u32 alloc_rx_buff_failed;
+
+	struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS];
+
+	/* DCB parameters */
+	struct ieee_pfc *ixgbe_ieee_pfc;
+	struct ieee_ets *ixgbe_ieee_ets;
+	struct ixgbe_dcb_config dcb_cfg;
+	struct ixgbe_dcb_config temp_dcb_cfg;
+	u8 hw_tcs;
+	u8 dcb_set_bitmap;
+	u8 dcbx_cap;
+	enum ixgbe_fc_mode last_lfc_mode;
+
+	int num_q_vectors;	/* current number of q_vectors for device */
+	int max_q_vectors;	/* true count of q_vectors for device */
+	struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE];
+	struct msix_entry *msix_entries;
+
+	u32 test_icr;
+	struct ixgbe_ring test_tx_ring;
+	struct ixgbe_ring test_rx_ring;
+
+	/* structs defined in ixgbe_hw.h */
+	struct ixgbe_hw hw;
+	u16 msg_enable;
+	struct ixgbe_hw_stats stats;
+
+	u64 tx_busy;
+	unsigned int tx_ring_count;
+	unsigned int xdp_ring_count;
+	unsigned int rx_ring_count;
+
+	u32 link_speed;
+	bool link_up;
+	unsigned long sfp_poll_time;
+	unsigned long link_check_timeout;
+
+	struct timer_list service_timer;
+	struct work_struct service_task;
+
+	struct hlist_head fdir_filter_list;
+	unsigned long fdir_overflow; /* number of times ATR was backed off */
+	union ixgbe_atr_input fdir_mask;
+	int fdir_filter_count;
+	u32 fdir_pballoc;
+	u32 atr_sample_rate;
+	spinlock_t fdir_perfect_lock;
+
+#ifdef IXGBE_FCOE
+	struct ixgbe_fcoe fcoe;
+#endif /* IXGBE_FCOE */
+	u8 __iomem *io_addr; /* Mainly for iounmap use */
+	u32 wol;
+
+	u16 bridge_mode;
+
+	char eeprom_id[NVM_VER_SIZE];
+	u16 eeprom_cap;
+
+	u32 interrupt_event;
+	u32 led_reg;
+
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	struct work_struct ptp_tx_work;
+	struct sk_buff *ptp_tx_skb;
+	struct hwtstamp_config tstamp_config;
+	unsigned long ptp_tx_start;
+	unsigned long last_overflow_check;
+	unsigned long last_rx_ptp_check;
+	unsigned long last_rx_timestamp;
+	spinlock_t tmreg_lock;
+	struct cyclecounter hw_cc;
+	struct timecounter hw_tc;
+	u32 base_incval;
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_skipped;
+	u32 rx_hwtstamp_cleared;
+	void (*ptp_setup_sdp)(struct ixgbe_adapter *);
+
+	/* SR-IOV */
+	DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS);
+	unsigned int num_vfs;
+	struct vf_data_storage *vfinfo;
+	int vf_rate_link_speed;
+	struct vf_macvlans vf_mvs;
+	struct vf_macvlans *mv_list;
+
+	u32 timer_event_accumulator;
+	u32 vferr_refcount;
+	struct ixgbe_mac_addr *mac_table;
+	struct kobject *info_kobj;
+#ifdef CONFIG_IXGBE_HWMON
+	struct hwmon_buff *ixgbe_hwmon_buff;
+#endif /* CONFIG_IXGBE_HWMON */
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *ixgbe_dbg_adapter;
+#endif /*CONFIG_DEBUG_FS*/
+
+	u8 default_up;
+	/* Bitmask indicating in use pools */
+	DECLARE_BITMAP(fwd_bitmask, IXGBE_MAX_MACVLANS + 1);
+
+#define IXGBE_MAX_LINK_HANDLE 10
+	struct ixgbe_jump_table *jump_tables[IXGBE_MAX_LINK_HANDLE];
+	unsigned long tables;
+
+/* maximum number of RETA entries among all devices supported by ixgbe
+ * driver: currently it's x550 device in non-SRIOV mode
+ */
+#define IXGBE_MAX_RETA_ENTRIES 512
+	u8 rss_indir_tbl[IXGBE_MAX_RETA_ENTRIES];
+
+#define IXGBE_RSS_KEY_SIZE     40  /* size of RSS Hash Key in bytes */
+	u32 *rss_key;
+
+#ifdef CONFIG_IXGBE_IPSEC
+	struct ixgbe_ipsec *ipsec;
+#endif /* CONFIG_IXGBE_IPSEC */
+	spinlock_t vfs_lock;
+};
+
+static inline int ixgbe_determine_xdp_q_idx(int cpu)
+{
+	if (static_key_enabled(&ixgbe_xdp_locking_key))
+		return cpu % IXGBE_MAX_XDP_QS;
+	else
+		return cpu;
+}
+
+static inline
+struct ixgbe_ring *ixgbe_determine_xdp_ring(struct ixgbe_adapter *adapter)
+{
+	int index = ixgbe_determine_xdp_q_idx(smp_processor_id());
+
+	return adapter->xdp_ring[index];
+}
+
+static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
+{
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		return IXGBE_MAX_RSS_INDICES;
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		return IXGBE_MAX_RSS_INDICES_X550;
+	default:
+		return 0;
+	}
+}
+
+struct ixgbe_fdir_filter {
+	struct hlist_node fdir_node;
+	union ixgbe_atr_input filter;
+	u16 sw_idx;
+	u64 action;
+};
+
+enum ixgbe_state_t {
+	__IXGBE_TESTING,
+	__IXGBE_RESETTING,
+	__IXGBE_DOWN,
+	__IXGBE_DISABLED,
+	__IXGBE_REMOVING,
+	__IXGBE_SERVICE_SCHED,
+	__IXGBE_SERVICE_INITED,
+	__IXGBE_IN_SFP_INIT,
+	__IXGBE_PTP_RUNNING,
+	__IXGBE_PTP_TX_IN_PROGRESS,
+	__IXGBE_RESET_REQUESTED,
+};
+
+struct ixgbe_cb {
+	union {				/* Union defining head/tail partner */
+		struct sk_buff *head;
+		struct sk_buff *tail;
+	};
+	dma_addr_t dma;
+	u16 append_cnt;
+	bool page_released;
+};
+#define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb)
+
+enum ixgbe_boards {
+	board_82598,
+	board_82599,
+	board_X540,
+	board_X550,
+	board_X550EM_x,
+	board_x550em_x_fw,
+	board_x550em_a,
+	board_x550em_a_fw,
+};
+
+extern const struct ixgbe_info ixgbe_82598_info;
+extern const struct ixgbe_info ixgbe_82599_info;
+extern const struct ixgbe_info ixgbe_X540_info;
+extern const struct ixgbe_info ixgbe_X550_info;
+extern const struct ixgbe_info ixgbe_X550EM_x_info;
+extern const struct ixgbe_info ixgbe_x550em_x_fw_info;
+extern const struct ixgbe_info ixgbe_x550em_a_info;
+extern const struct ixgbe_info ixgbe_x550em_a_fw_info;
+#ifdef CONFIG_IXGBE_DCB
+extern const struct dcbnl_rtnl_ops ixgbe_dcbnl_ops;
+#endif
+
+extern char ixgbe_driver_name[];
+#ifdef IXGBE_FCOE
+extern char ixgbe_default_device_descr[];
+#endif /* IXGBE_FCOE */
+
+int ixgbe_open(struct net_device *netdev);
+int ixgbe_close(struct net_device *netdev);
+void ixgbe_up(struct ixgbe_adapter *adapter);
+void ixgbe_down(struct ixgbe_adapter *adapter);
+void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
+void ixgbe_reset(struct ixgbe_adapter *adapter);
+void ixgbe_set_ethtool_ops(struct net_device *netdev);
+int ixgbe_setup_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
+int ixgbe_setup_tx_resources(struct ixgbe_ring *);
+void ixgbe_free_rx_resources(struct ixgbe_ring *);
+void ixgbe_free_tx_resources(struct ixgbe_ring *);
+void ixgbe_configure_rx_ring(struct ixgbe_adapter *, struct ixgbe_ring *);
+void ixgbe_configure_tx_ring(struct ixgbe_adapter *, struct ixgbe_ring *);
+void ixgbe_disable_rx(struct ixgbe_adapter *adapter);
+void ixgbe_disable_tx(struct ixgbe_adapter *adapter);
+void ixgbe_update_stats(struct ixgbe_adapter *adapter);
+int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
+bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
+			 u16 subdevice_id);
+#ifdef CONFIG_PCI_IOV
+void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
+#endif
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+			 const u8 *addr, u16 queue);
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
+			 const u8 *addr, u16 queue);
+void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid);
+void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter);
+netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, struct ixgbe_adapter *,
+				  struct ixgbe_ring *);
+void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16);
+void ixgbe_write_eitr(struct ixgbe_q_vector *);
+int ixgbe_poll(struct napi_struct *napi, int budget);
+int ethtool_ioctl(struct ifreq *ifr);
+s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw);
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl);
+s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_hash_dword input,
+					  union ixgbe_atr_hash_dword common,
+					  u8 queue);
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+				    union ixgbe_atr_input *input_mask);
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id, u8 queue);
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id);
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+					  union ixgbe_atr_input *mask);
+int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+				    struct ixgbe_fdir_filter *input,
+				    u16 sw_idx);
+void ixgbe_set_rx_mode(struct net_device *netdev);
+#ifdef CONFIG_IXGBE_DCB
+void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter);
+#endif
+int ixgbe_setup_tc(struct net_device *dev, u8 tc);
+void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32);
+void ixgbe_do_reset(struct net_device *netdev);
+#ifdef CONFIG_IXGBE_HWMON
+void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter);
+int ixgbe_sysfs_init(struct ixgbe_adapter *adapter);
+#endif /* CONFIG_IXGBE_HWMON */
+#ifdef IXGBE_FCOE
+void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter);
+int ixgbe_fso(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first,
+	      u8 *hdr_len);
+int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
+		   union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb);
+int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
+		       struct scatterlist *sgl, unsigned int sgc);
+int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid,
+			  struct scatterlist *sgl, unsigned int sgc);
+int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid);
+int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter);
+void ixgbe_free_fcoe_ddp_resources(struct ixgbe_adapter *adapter);
+int ixgbe_fcoe_enable(struct net_device *netdev);
+int ixgbe_fcoe_disable(struct net_device *netdev);
+int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type);
+int ixgbe_fcoe_get_hbainfo(struct net_device *netdev,
+			   struct netdev_fcoe_hbainfo *info);
+u8 ixgbe_fcoe_get_tc(struct ixgbe_adapter *adapter);
+#endif /* IXGBE_FCOE */
+#ifdef CONFIG_DEBUG_FS
+void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter);
+void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter);
+void ixgbe_dbg_init(void);
+void ixgbe_dbg_exit(void);
+#else
+static inline void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter) {}
+static inline void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter) {}
+static inline void ixgbe_dbg_init(void) {}
+static inline void ixgbe_dbg_exit(void) {}
+#endif /* CONFIG_DEBUG_FS */
+static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+
+void ixgbe_ptp_init(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_stop(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_tx_hang(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *);
+void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb);
+static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring,
+					 union ixgbe_adv_rx_desc *rx_desc,
+					 struct sk_buff *skb)
+{
+	if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_TSIP))) {
+		ixgbe_ptp_rx_pktstamp(rx_ring->q_vector, skb);
+		return;
+	}
+
+	if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS)))
+		return;
+
+	ixgbe_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
+	/* Update the last_rx_timestamp timer in order to enable watchdog check
+	 * for error case of latched timestamp on a dropped packet.
+	 */
+	rx_ring->last_rx_timestamp = jiffies;
+}
+
+int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr);
+int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr);
+void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_reset(struct ixgbe_adapter *adapter);
+void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter);
+#ifdef CONFIG_PCI_IOV
+void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter);
+#endif
+
+netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
+				  struct ixgbe_adapter *adapter,
+				  struct ixgbe_ring *tx_ring);
+u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter);
+void ixgbe_store_key(struct ixgbe_adapter *adapter);
+void ixgbe_store_reta(struct ixgbe_adapter *adapter);
+s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
+#ifdef CONFIG_IXGBE_IPSEC
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+		    union ixgbe_adv_rx_desc *rx_desc,
+		    struct sk_buff *skb);
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first,
+		   struct ixgbe_ipsec_tx_data *itd);
+void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf);
+int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf);
+int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf);
+#else
+static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { }
+static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { }
+static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { }
+static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+				  union ixgbe_adv_rx_desc *rx_desc,
+				  struct sk_buff *skb) { }
+static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+				 struct ixgbe_tx_buffer *first,
+				 struct ixgbe_ipsec_tx_data *itd) { return 0; }
+static inline void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter,
+					u32 vf) { }
+static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter,
+					u32 *mbuf, u32 vf) { return -EACCES; }
+static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter,
+					u32 *mbuf, u32 vf) { return -EACCES; }
+#endif /* CONFIG_IXGBE_IPSEC */
+
+static inline bool ixgbe_enabled_xdp_adapter(struct ixgbe_adapter *adapter)
+{
+	return !!adapter->xdp_prog;
+}
+
+#endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
new file mode 100644
index 0000000000..100388968e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -0,0 +1,1196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include "ixgbe.h"
+#include "ixgbe_phy.h"
+
+#define IXGBE_82598_MAX_TX_QUEUES 32
+#define IXGBE_82598_MAX_RX_QUEUES 64
+#define IXGBE_82598_RAR_ENTRIES   16
+#define IXGBE_82598_MC_TBL_SIZE  128
+#define IXGBE_82598_VFT_TBL_SIZE 128
+#define IXGBE_82598_RX_PB_SIZE	 512
+
+static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg_wait_to_complete);
+static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+				       u8 *eeprom_data);
+
+/**
+ *  ixgbe_set_pcie_completion_timeout - set pci-e completion timeout
+ *  @hw: pointer to the HW structure
+ *
+ *  The defaults for 82598 should be in the range of 50us to 50ms,
+ *  however the hardware default for these parts is 500us to 1ms which is less
+ *  than the 10ms recommended by the pci-e spec.  To address this we need to
+ *  increase the value to either 10ms to 250ms for capability version 1 config,
+ *  or 16ms to 55ms for version 2.
+ **/
+static void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw)
+{
+	u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR);
+	u16 pcie_devctl2;
+
+	if (ixgbe_removed(hw->hw_addr))
+		return;
+
+	/* only take action if timeout value is defaulted to 0 */
+	if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK)
+		goto out;
+
+	/*
+	 * if capababilities version is type 1 we can write the
+	 * timeout of 10ms to 250ms through the GCR register
+	 */
+	if (!(gcr & IXGBE_GCR_CAP_VER2)) {
+		gcr |= IXGBE_GCR_CMPL_TMOUT_10ms;
+		goto out;
+	}
+
+	/*
+	 * for version 2 capabilities we need to write the config space
+	 * directly in order to set the completion timeout value for
+	 * 16ms to 55ms
+	 */
+	pcie_devctl2 = ixgbe_read_pci_cfg_word(hw, IXGBE_PCI_DEVICE_CONTROL2);
+	pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms;
+	ixgbe_write_pci_cfg_word(hw, IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2);
+out:
+	/* disable completion timeout resend */
+	gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND;
+	IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr);
+}
+
+static s32 ixgbe_get_invariants_82598(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+
+	/* Call PHY identify routine to get the phy type */
+	ixgbe_identify_phy_generic(hw);
+
+	mac->mcft_size = IXGBE_82598_MC_TBL_SIZE;
+	mac->vft_size = IXGBE_82598_VFT_TBL_SIZE;
+	mac->num_rar_entries = IXGBE_82598_RAR_ENTRIES;
+	mac->rx_pb_size = IXGBE_82598_RX_PB_SIZE;
+	mac->max_rx_queues = IXGBE_82598_MAX_RX_QUEUES;
+	mac->max_tx_queues = IXGBE_82598_MAX_TX_QUEUES;
+	mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_phy_ops_82598 - PHY/SFP specific init
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize any function pointers that were not able to be
+ *  set during get_invariants because the PHY/SFP type was
+ *  not known.  Perform the SFP init if necessary.
+ *
+ **/
+static s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u16 list_offset, data_offset;
+
+	/* Identify the PHY */
+	phy->ops.identify(hw);
+
+	/* Overwrite the link function pointers if copper PHY */
+	if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
+		mac->ops.setup_link = &ixgbe_setup_copper_link_82598;
+		mac->ops.get_link_capabilities =
+			&ixgbe_get_copper_link_capabilities_generic;
+	}
+
+	switch (hw->phy.type) {
+	case ixgbe_phy_tn:
+		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
+		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
+		break;
+	case ixgbe_phy_nl:
+		phy->ops.reset = &ixgbe_reset_phy_nl;
+
+		/* Call SFP+ identify routine to get the SFP+ module type */
+		ret_val = phy->ops.identify_sfp(hw);
+		if (ret_val)
+			return ret_val;
+		if (hw->phy.sfp_type == ixgbe_sfp_type_unknown)
+			return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+		/* Check to see if SFP+ module is supported */
+		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw,
+							    &list_offset,
+							    &data_offset);
+		if (ret_val)
+			return IXGBE_ERR_SFP_NOT_SUPPORTED;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware using the generic start_hw function.
+ *  Disables relaxed ordering for archs other than SPARC
+ *  Then set pcie completion timeout
+ *
+ **/
+static s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw)
+{
+	s32 ret_val;
+
+	ret_val = ixgbe_start_hw_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* set the completion timeout for interface */
+	ixgbe_set_pcie_completion_timeout(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_link_capabilities_82598 - Determines link capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @autoneg: boolean auto-negotiation value
+ *
+ *  Determines the link capabilities by reading the AUTOC register.
+ **/
+static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw,
+					     ixgbe_link_speed *speed,
+					     bool *autoneg)
+{
+	u32 autoc = 0;
+
+	/*
+	 * Determine link capabilities based on the stored value of AUTOC,
+	 * which represents EEPROM defaults.  If AUTOC value has not been
+	 * stored, use the current register value.
+	 */
+	if (hw->mac.orig_link_settings_stored)
+		autoc = hw->mac.orig_autoc;
+	else
+		autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		*autoneg = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_1G_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_KX4_AN:
+	case IXGBE_AUTOC_LMS_KX4_AN_1G_AN:
+		*speed = IXGBE_LINK_SPEED_UNKNOWN;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		break;
+
+	default:
+		return IXGBE_ERR_LINK_SETUP;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_media_type_82598 - Determines media type
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the media type (fiber, copper, backplane)
+ **/
+static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw)
+{
+	/* Detect if there is a copper PHY attached. */
+	switch (hw->phy.type) {
+	case ixgbe_phy_cu_unknown:
+	case ixgbe_phy_tn:
+		return ixgbe_media_type_copper;
+
+	default:
+		break;
+	}
+
+	/* Media type for I82598 is based on device ID */
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82598:
+	case IXGBE_DEV_ID_82598_BX:
+		/* Default device ID is mezzanine card KX/KX4 */
+		return ixgbe_media_type_backplane;
+
+	case IXGBE_DEV_ID_82598AF_DUAL_PORT:
+	case IXGBE_DEV_ID_82598AF_SINGLE_PORT:
+	case IXGBE_DEV_ID_82598_DA_DUAL_PORT:
+	case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM:
+	case IXGBE_DEV_ID_82598EB_XF_LR:
+	case IXGBE_DEV_ID_82598EB_SFP_LOM:
+		return ixgbe_media_type_fiber;
+
+	case IXGBE_DEV_ID_82598EB_CX4:
+	case IXGBE_DEV_ID_82598_CX4_DUAL_PORT:
+		return ixgbe_media_type_cx4;
+
+	case IXGBE_DEV_ID_82598AT:
+	case IXGBE_DEV_ID_82598AT2:
+		return ixgbe_media_type_copper;
+
+	default:
+		return ixgbe_media_type_unknown;
+	}
+}
+
+/**
+ *  ixgbe_fc_enable_82598 - Enable flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to the current settings.
+ **/
+static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
+{
+	u32 fctrl_reg;
+	u32 rmcs_reg;
+	u32 reg;
+	u32 fcrtl, fcrth;
+	u32 link_speed = 0;
+	int i;
+	bool link_up;
+
+	/* Validate the water mark configuration */
+	if (!hw->fc.pause_time)
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+
+	/* Low water mark of zero causes XOFF floods */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			if (!hw->fc.low_water[i] ||
+			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+				hw_dbg(hw, "Invalid water mark configuration\n");
+				return IXGBE_ERR_INVALID_LINK_SETTINGS;
+			}
+		}
+	}
+
+	/*
+	 * On 82598 having Rx FC on causes resets while doing 1G
+	 * so if it's on turn it off once we know link_speed. For
+	 * more details see 82598 Specification update.
+	 */
+	hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+	if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) {
+		switch (hw->fc.requested_mode) {
+		case ixgbe_fc_full:
+			hw->fc.requested_mode = ixgbe_fc_tx_pause;
+			break;
+		case ixgbe_fc_rx_pause:
+			hw->fc.requested_mode = ixgbe_fc_none;
+			break;
+		default:
+			/* no change */
+			break;
+		}
+	}
+
+	/* Negotiate the fc mode to use */
+	hw->mac.ops.fc_autoneg(hw);
+
+	/* Disable any previous flow control settings */
+	fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	fctrl_reg &= ~(IXGBE_FCTRL_RFCE | IXGBE_FCTRL_RPFCE);
+
+	rmcs_reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	rmcs_reg &= ~(IXGBE_RMCS_TFCE_PRIORITY | IXGBE_RMCS_TFCE_802_3X);
+
+	/*
+	 * The possible values of fc.current_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *     we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.current_mode) {
+	case ixgbe_fc_none:
+		/*
+		 * Flow control is disabled by software override or autoneg.
+		 * The code below will actually disable it in the HW.
+		 */
+		break;
+	case ixgbe_fc_rx_pause:
+		/*
+		 * Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+		fctrl_reg |= IXGBE_FCTRL_RFCE;
+		break;
+	case ixgbe_fc_tx_pause:
+		/*
+		 * Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
+		break;
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		fctrl_reg |= IXGBE_FCTRL_RFCE;
+		rmcs_reg |= IXGBE_RMCS_TFCE_802_3X;
+		break;
+	default:
+		hw_dbg(hw, "Flow control param set incorrectly\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	/* Set 802.3x based flow control settings. */
+	fctrl_reg |= IXGBE_FCTRL_DPF;
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg);
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg);
+
+	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl);
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth);
+		} else {
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0);
+		}
+
+	}
+
+	/* Configure pause time (2 TCs per register) */
+	reg = hw->fc.pause_time * 0x00010001;
+	for (i = 0; i < (MAX_TRAFFIC_CLASS / 2); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+	/* Configure flow control refresh threshold value */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_start_mac_link_82598 - Configures MAC link settings
+ *  @hw: pointer to hardware structure
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Configures link settings based on values in the ixgbe_hw struct.
+ *  Restarts the link.  Performs autonegotiation if needed.
+ **/
+static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw,
+				      bool autoneg_wait_to_complete)
+{
+	u32 autoc_reg;
+	u32 links_reg;
+	u32 i;
+	s32 status = 0;
+
+	/* Restart link */
+	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+
+	/* Only poll for autoneg to complete if specified to do so */
+	if (autoneg_wait_to_complete) {
+		if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_AN ||
+		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
+			links_reg = 0; /* Just in case Autoneg time = 0 */
+			for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+				links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+				if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+					break;
+				msleep(100);
+			}
+			if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+				status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+				hw_dbg(hw, "Autonegotiation did not complete.\n");
+			}
+		}
+	}
+
+	/* Add delay to filter out noises during initial link setup */
+	msleep(50);
+
+	return status;
+}
+
+/**
+ *  ixgbe_validate_link_ready - Function looks for phy link
+ *  @hw: pointer to hardware structure
+ *
+ *  Function indicates success when phy link is available. If phy is not ready
+ *  within 5 seconds of MAC indicating link, the function returns error.
+ **/
+static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw)
+{
+	u32 timeout;
+	u16 an_reg;
+
+	if (hw->device_id != IXGBE_DEV_ID_82598AT2)
+		return 0;
+
+	for (timeout = 0;
+	     timeout < IXGBE_VALIDATE_LINK_READY_TIMEOUT; timeout++) {
+		hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN, &an_reg);
+
+		if ((an_reg & MDIO_AN_STAT1_COMPLETE) &&
+		    (an_reg & MDIO_STAT1_LSTATUS))
+			break;
+
+		msleep(100);
+	}
+
+	if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) {
+		hw_dbg(hw, "Link was indicated but link is down\n");
+		return IXGBE_ERR_LINK_SETUP;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_check_mac_link_82598 - Get link/speed status
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @link_up: true is link is up, false otherwise
+ *  @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ *  Reads the links register to determine if link is up and the current speed
+ **/
+static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw,
+				      ixgbe_link_speed *speed, bool *link_up,
+				      bool link_up_wait_to_complete)
+{
+	u32 links_reg;
+	u32 i;
+	u16 link_reg, adapt_comp_reg;
+
+	/*
+	 * SERDES PHY requires us to read link status from register 0xC79F.
+	 * Bit 0 set indicates link is up/ready; clear indicates link down.
+	 * 0xC00C is read to check that the XAUI lanes are active.  Bit 0
+	 * clear indicates active; set indicates inactive.
+	 */
+	if (hw->phy.type == ixgbe_phy_nl) {
+		hw->phy.ops.read_reg(hw, 0xC79F, MDIO_MMD_PMAPMD, &link_reg);
+		hw->phy.ops.read_reg(hw, 0xC79F, MDIO_MMD_PMAPMD, &link_reg);
+		hw->phy.ops.read_reg(hw, 0xC00C, MDIO_MMD_PMAPMD,
+				     &adapt_comp_reg);
+		if (link_up_wait_to_complete) {
+			for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+				if ((link_reg & 1) &&
+				    ((adapt_comp_reg & 1) == 0)) {
+					*link_up = true;
+					break;
+				} else {
+					*link_up = false;
+				}
+				msleep(100);
+				hw->phy.ops.read_reg(hw, 0xC79F,
+						     MDIO_MMD_PMAPMD,
+						     &link_reg);
+				hw->phy.ops.read_reg(hw, 0xC00C,
+						     MDIO_MMD_PMAPMD,
+						     &adapt_comp_reg);
+			}
+		} else {
+			if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0))
+				*link_up = true;
+			else
+				*link_up = false;
+		}
+
+		if (!*link_up)
+			return 0;
+	}
+
+	links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+	if (link_up_wait_to_complete) {
+		for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+			if (links_reg & IXGBE_LINKS_UP) {
+				*link_up = true;
+				break;
+			} else {
+				*link_up = false;
+			}
+			msleep(100);
+			links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+		}
+	} else {
+		if (links_reg & IXGBE_LINKS_UP)
+			*link_up = true;
+		else
+			*link_up = false;
+	}
+
+	if (links_reg & IXGBE_LINKS_SPEED)
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+	else
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+	if ((hw->device_id == IXGBE_DEV_ID_82598AT2) && *link_up &&
+	    (ixgbe_validate_link_ready(hw) != 0))
+		*link_up = false;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_setup_mac_link_82598 - Set MAC link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Set the link speed in the AUTOC register and restarts link.
+ **/
+static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw,
+				      ixgbe_link_speed speed,
+				      bool autoneg_wait_to_complete)
+{
+	bool		 autoneg	   = false;
+	ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
+	u32              curr_autoc        = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32              autoc             = curr_autoc;
+	u32              link_mode         = autoc & IXGBE_AUTOC_LMS_MASK;
+
+	/* Check to see if speed passed in is supported. */
+	ixgbe_get_link_capabilities_82598(hw, &link_capabilities, &autoneg);
+	speed &= link_capabilities;
+
+	if (speed == IXGBE_LINK_SPEED_UNKNOWN)
+		return IXGBE_ERR_LINK_SETUP;
+
+	/* Set KX4/KX support according to speed requested */
+	else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN ||
+		 link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) {
+		autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK;
+		if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+			autoc |= IXGBE_AUTOC_KX4_SUPP;
+		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+			autoc |= IXGBE_AUTOC_KX_SUPP;
+		if (autoc != curr_autoc)
+			IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
+	}
+
+	/* Setup and restart the link based on the new values in
+	 * ixgbe_hw This will write the AUTOC register based on the new
+	 * stored values
+	 */
+	return ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete);
+}
+
+
+/**
+ *  ixgbe_setup_copper_link_82598 - Set the PHY autoneg advertised field
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: true if waiting is needed to complete
+ *
+ *  Sets the link speed in the AUTOC register in the MAC and restarts link.
+ **/
+static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw,
+					       ixgbe_link_speed speed,
+					       bool autoneg_wait_to_complete)
+{
+	s32 status;
+
+	/* Setup the PHY according to input speed */
+	status = hw->phy.ops.setup_link_speed(hw, speed,
+					      autoneg_wait_to_complete);
+	/* Set up MAC */
+	ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete);
+
+	return status;
+}
+
+/**
+ *  ixgbe_reset_hw_82598 - Performs hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks and
+ *  clears all interrupts, performing a PHY reset, and performing a link (MAC)
+ *  reset.
+ **/
+static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw)
+{
+	s32 status;
+	s32 phy_status = 0;
+	u32 ctrl;
+	u32 gheccr;
+	u32 i;
+	u32 autoc;
+	u8  analog_val;
+
+	/* Call adapter stop to disable tx/rx and clear interrupts */
+	status = hw->mac.ops.stop_adapter(hw);
+	if (status)
+		return status;
+
+	/*
+	 * Power up the Atlas Tx lanes if they are currently powered down.
+	 * Atlas Tx lanes are powered down for MAC loopback tests, but
+	 * they are not automatically restored on reset.
+	 */
+	hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &analog_val);
+	if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) {
+		/* Enable Tx Atlas so packets can be transmitted again */
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK,
+					      analog_val);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G,
+					      analog_val);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G,
+					      analog_val);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
+					     &analog_val);
+		analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN,
+					      analog_val);
+	}
+
+	/* Reset PHY */
+	if (hw->phy.reset_disable == false) {
+		/* PHY ops must be identified and initialized prior to reset */
+
+		/* Init PHY and function pointers, perform SFP setup */
+		phy_status = hw->phy.ops.init(hw);
+		if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+			return phy_status;
+		if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT)
+			goto mac_reset_top;
+
+		hw->phy.ops.reset(hw);
+	}
+
+mac_reset_top:
+	/*
+	 * Issue global reset to the MAC.  This needs to be a SW reset.
+	 * If link reset is used, it might reset the MAC when mng is using it
+	 */
+	ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST;
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(1000, 1200);
+
+	/* Poll for reset bit to self-clear indicating reset is complete */
+	for (i = 0; i < 10; i++) {
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		if (!(ctrl & IXGBE_CTRL_RST))
+			break;
+		udelay(1);
+	}
+	if (ctrl & IXGBE_CTRL_RST) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "Reset polling failed to complete.\n");
+	}
+
+	msleep(50);
+
+	/*
+	 * Double resets are required for recovery from certain error
+	 * conditions.  Between resets, it is necessary to stall to allow time
+	 * for any pending HW events to complete.
+	 */
+	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+		goto mac_reset_top;
+	}
+
+	gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR);
+	gheccr &= ~(BIT(21) | BIT(18) | BIT(9) | BIT(6));
+	IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr);
+
+	/*
+	 * Store the original AUTOC value if it has not been
+	 * stored off yet.  Otherwise restore the stored original
+	 * AUTOC value since the reset operation sets back to deaults.
+	 */
+	autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	if (hw->mac.orig_link_settings_stored == false) {
+		hw->mac.orig_autoc = autoc;
+		hw->mac.orig_link_settings_stored = true;
+	} else if (autoc != hw->mac.orig_autoc) {
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc);
+	}
+
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+	/*
+	 * Store MAC address from RAR0, clear receive address registers, and
+	 * clear the multicast table
+	 */
+	hw->mac.ops.init_rx_addrs(hw);
+
+	if (phy_status)
+		status = phy_status;
+
+	return status;
+}
+
+/**
+ *  ixgbe_set_vmdq_82598 - Associate a VMDq set index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to associate with a VMDq index
+ *  @vmdq: VMDq set index
+ **/
+static s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+	rar_high &= ~IXGBE_RAH_VIND_MASK;
+	rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK);
+	IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_vmdq_82598 - Disassociate a VMDq set index from an rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to associate with a VMDq index
+ *  @vmdq: VMDq clear index (not used in 82598, but elsewhere)
+ **/
+static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar));
+	if (rar_high & IXGBE_RAH_VIND_MASK) {
+		rar_high &= ~IXGBE_RAH_VIND_MASK;
+		IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_set_vfta_82598 - Set VLAN filter table
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFTA
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFTA
+ *  @vlvf_bypass: boolean flag - unused
+ *
+ *  Turn on/off specified VLAN in the VLAN filter table.
+ **/
+static s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+				bool vlan_on, bool vlvf_bypass)
+{
+	u32 regindex;
+	u32 bitindex;
+	u32 bits;
+	u32 vftabyte;
+
+	if (vlan > 4095)
+		return IXGBE_ERR_PARAM;
+
+	/* Determine 32-bit word position in array */
+	regindex = (vlan >> 5) & 0x7F;   /* upper seven bits */
+
+	/* Determine the location of the (VMD) queue index */
+	vftabyte =  ((vlan >> 3) & 0x03); /* bits (4:3) indicating byte array */
+	bitindex = (vlan & 0x7) << 2;    /* lower 3 bits indicate nibble */
+
+	/* Set the nibble for VMD queue index */
+	bits = IXGBE_READ_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex));
+	bits &= (~(0x0F << bitindex));
+	bits |= (vind << bitindex);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex), bits);
+
+	/* Determine the location of the bit for this VLAN id */
+	bitindex = vlan & 0x1F;   /* lower five bits */
+
+	bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex));
+	if (vlan_on)
+		/* Turn on this VLAN id */
+		bits |= BIT(bitindex);
+	else
+		/* Turn off this VLAN id */
+		bits &= ~BIT(bitindex);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_vfta_82598 - Clear VLAN filter table
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the VLAN filter table, and the VMDq index associated with the filter
+ **/
+static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw)
+{
+	u32 offset;
+	u32 vlanbyte;
+
+	for (offset = 0; offset < hw->mac.vft_size; offset++)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
+
+	for (vlanbyte = 0; vlanbyte < 4; vlanbyte++)
+		for (offset = 0; offset < hw->mac.vft_size; offset++)
+			IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset),
+					0);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_analog_reg8_82598 - Reads 8 bit Atlas analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: analog register to read
+ *  @val: read value
+ *
+ *  Performs read operation to Atlas analog register specified.
+ **/
+static s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+	u32  atlas_ctl;
+
+	IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL,
+			IXGBE_ATLASCTL_WRITE_CMD | (reg << 8));
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+	atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
+	*val = (u8)atlas_ctl;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_analog_reg8_82598 - Writes 8 bit Atlas analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: atlas register to write
+ *  @val: value to write
+ *
+ *  Performs write operation to Atlas analog register specified.
+ **/
+static s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+	u32  atlas_ctl;
+
+	atlas_ctl = (reg << 8) | val;
+	IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, atlas_ctl);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_i2c_phy_82598 - Reads 8 bit word over I2C interface.
+ *  @hw: pointer to hardware structure
+ *  @dev_addr: address to read from
+ *  @byte_offset: byte offset to read from dev_addr
+ *  @eeprom_data: value read
+ *
+ *  Performs 8 byte read operation to SFP module's data over I2C interface.
+ **/
+static s32 ixgbe_read_i2c_phy_82598(struct ixgbe_hw *hw, u8 dev_addr,
+				    u8 byte_offset, u8 *eeprom_data)
+{
+	s32 status = 0;
+	u16 sfp_addr = 0;
+	u16 sfp_data = 0;
+	u16 sfp_stat = 0;
+	u16 gssr;
+	u32 i;
+
+	if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)
+		gssr = IXGBE_GSSR_PHY1_SM;
+	else
+		gssr = IXGBE_GSSR_PHY0_SM;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0)
+		return IXGBE_ERR_SWFW_SYNC;
+
+	if (hw->phy.type == ixgbe_phy_nl) {
+		/*
+		 * phy SDA/SCL registers are at addresses 0xC30A to
+		 * 0xC30D.  These registers are used to talk to the SFP+
+		 * module's EEPROM through the SDA/SCL (I2C) interface.
+		 */
+		sfp_addr = (dev_addr << 8) + byte_offset;
+		sfp_addr = (sfp_addr | IXGBE_I2C_EEPROM_READ_MASK);
+		hw->phy.ops.write_reg_mdi(hw,
+					  IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR,
+					  MDIO_MMD_PMAPMD,
+					  sfp_addr);
+
+		/* Poll status */
+		for (i = 0; i < 100; i++) {
+			hw->phy.ops.read_reg_mdi(hw,
+						IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT,
+						MDIO_MMD_PMAPMD,
+						&sfp_stat);
+			sfp_stat = sfp_stat & IXGBE_I2C_EEPROM_STATUS_MASK;
+			if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS)
+				break;
+			usleep_range(10000, 20000);
+		}
+
+		if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) {
+			hw_dbg(hw, "EEPROM read did not pass.\n");
+			status = IXGBE_ERR_SFP_NOT_PRESENT;
+			goto out;
+		}
+
+		/* Read data */
+		hw->phy.ops.read_reg_mdi(hw, IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA,
+					MDIO_MMD_PMAPMD, &sfp_data);
+
+		*eeprom_data = (u8)(sfp_data >> 8);
+	} else {
+		status = IXGBE_ERR_PHY;
+	}
+
+out:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return status;
+}
+
+/**
+ *  ixgbe_read_i2c_eeprom_82598 - Reads 8 bit word over I2C interface.
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to read
+ *  @eeprom_data: value read
+ *
+ *  Performs 8 byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+static s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset,
+				       u8 *eeprom_data)
+{
+	return ixgbe_read_i2c_phy_82598(hw, IXGBE_I2C_EEPROM_DEV_ADDR,
+					byte_offset, eeprom_data);
+}
+
+/**
+ *  ixgbe_read_i2c_sff8472_82598 - Reads 8 bit word over I2C interface.
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset at address 0xA2
+ *  @sff8472_data: value read
+ *
+ *  Performs 8 byte read operation to SFP module's SFF-8472 data over I2C
+ **/
+static s32 ixgbe_read_i2c_sff8472_82598(struct ixgbe_hw *hw, u8 byte_offset,
+				       u8 *sff8472_data)
+{
+	return ixgbe_read_i2c_phy_82598(hw, IXGBE_I2C_EEPROM_DEV_ADDR2,
+					byte_offset, sff8472_data);
+}
+
+/**
+ *  ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple
+ *  port devices.
+ *  @hw: pointer to the HW structure
+ *
+ *  Calls common function and corrects issue with some single port devices
+ *  that enable LAN1 but not LAN0.
+ **/
+static void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw)
+{
+	struct ixgbe_bus_info *bus = &hw->bus;
+	u16 pci_gen = 0;
+	u16 pci_ctrl2 = 0;
+
+	ixgbe_set_lan_id_multi_port_pcie(hw);
+
+	/* check if LAN0 is disabled */
+	hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen);
+	if ((pci_gen != 0) && (pci_gen != 0xFFFF)) {
+
+		hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2);
+
+		/* if LAN0 is completely disabled force function to 0 */
+		if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) &&
+		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) &&
+		    !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) {
+
+			bus->func = 0;
+		}
+	}
+}
+
+/**
+ * ixgbe_set_rxpba_82598 - Initialize RX packet buffer
+ * @hw: pointer to hardware structure
+ * @num_pb: number of packet buffers to allocate
+ * @headroom: reserve n KB of headroom
+ * @strategy: packet buffer allocation strategy
+ **/
+static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb,
+				  u32 headroom, int strategy)
+{
+	u32 rxpktsize = IXGBE_RXPBSIZE_64KB;
+	u8  i = 0;
+
+	if (!num_pb)
+		return;
+
+	/* Setup Rx packet buffer sizes */
+	switch (strategy) {
+	case PBA_STRATEGY_WEIGHTED:
+		/* Setup the first four at 80KB */
+		rxpktsize = IXGBE_RXPBSIZE_80KB;
+		for (; i < 4; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		/* Setup the last four at 48KB...don't re-init i */
+		rxpktsize = IXGBE_RXPBSIZE_48KB;
+		fallthrough;
+	case PBA_STRATEGY_EQUAL:
+	default:
+		/* Divide the remaining Rx packet buffer evenly among the TCs */
+		for (; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		break;
+	}
+
+	/* Setup Tx packet buffer sizes */
+	for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB);
+}
+
+static const struct ixgbe_mac_operations mac_ops_82598 = {
+	.init_hw		= &ixgbe_init_hw_generic,
+	.reset_hw		= &ixgbe_reset_hw_82598,
+	.start_hw		= &ixgbe_start_hw_82598,
+	.clear_hw_cntrs		= &ixgbe_clear_hw_cntrs_generic,
+	.get_media_type		= &ixgbe_get_media_type_82598,
+	.enable_rx_dma          = &ixgbe_enable_rx_dma_generic,
+	.get_mac_addr		= &ixgbe_get_mac_addr_generic,
+	.stop_adapter		= &ixgbe_stop_adapter_generic,
+	.get_bus_info           = &ixgbe_get_bus_info_generic,
+	.set_lan_id             = &ixgbe_set_lan_id_multi_port_pcie_82598,
+	.read_analog_reg8	= &ixgbe_read_analog_reg8_82598,
+	.write_analog_reg8	= &ixgbe_write_analog_reg8_82598,
+	.setup_link		= &ixgbe_setup_mac_link_82598,
+	.set_rxpba		= &ixgbe_set_rxpba_82598,
+	.check_link		= &ixgbe_check_mac_link_82598,
+	.get_link_capabilities	= &ixgbe_get_link_capabilities_82598,
+	.led_on			= &ixgbe_led_on_generic,
+	.led_off		= &ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
+	.blink_led_start	= &ixgbe_blink_led_start_generic,
+	.blink_led_stop		= &ixgbe_blink_led_stop_generic,
+	.set_rar		= &ixgbe_set_rar_generic,
+	.clear_rar		= &ixgbe_clear_rar_generic,
+	.set_vmdq		= &ixgbe_set_vmdq_82598,
+	.clear_vmdq		= &ixgbe_clear_vmdq_82598,
+	.init_rx_addrs		= &ixgbe_init_rx_addrs_generic,
+	.update_mc_addr_list	= &ixgbe_update_mc_addr_list_generic,
+	.enable_mc		= &ixgbe_enable_mc_generic,
+	.disable_mc		= &ixgbe_disable_mc_generic,
+	.clear_vfta		= &ixgbe_clear_vfta_82598,
+	.set_vfta		= &ixgbe_set_vfta_82598,
+	.fc_enable		= &ixgbe_fc_enable_82598,
+	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
+	.set_fw_drv_ver         = NULL,
+	.acquire_swfw_sync      = &ixgbe_acquire_swfw_sync,
+	.release_swfw_sync      = &ixgbe_release_swfw_sync,
+	.init_swfw_sync		= NULL,
+	.get_thermal_sensor_data = NULL,
+	.init_thermal_sensor_thresh = NULL,
+	.prot_autoc_read	= &prot_autoc_read_generic,
+	.prot_autoc_write	= &prot_autoc_write_generic,
+	.enable_rx		= &ixgbe_enable_rx_generic,
+	.disable_rx		= &ixgbe_disable_rx_generic,
+};
+
+static const struct ixgbe_eeprom_operations eeprom_ops_82598 = {
+	.init_params		= &ixgbe_init_eeprom_params_generic,
+	.read			= &ixgbe_read_eerd_generic,
+	.write			= &ixgbe_write_eeprom_generic,
+	.write_buffer		= &ixgbe_write_eeprom_buffer_bit_bang_generic,
+	.read_buffer		= &ixgbe_read_eerd_buffer_generic,
+	.calc_checksum          = &ixgbe_calc_eeprom_checksum_generic,
+	.validate_checksum	= &ixgbe_validate_eeprom_checksum_generic,
+	.update_checksum	= &ixgbe_update_eeprom_checksum_generic,
+};
+
+static const struct ixgbe_phy_operations phy_ops_82598 = {
+	.identify		= &ixgbe_identify_phy_generic,
+	.identify_sfp		= &ixgbe_identify_module_generic,
+	.init			= &ixgbe_init_phy_ops_82598,
+	.reset			= &ixgbe_reset_phy_generic,
+	.read_reg		= &ixgbe_read_phy_reg_generic,
+	.write_reg		= &ixgbe_write_phy_reg_generic,
+	.read_reg_mdi		= &ixgbe_read_phy_reg_mdi,
+	.write_reg_mdi		= &ixgbe_write_phy_reg_mdi,
+	.setup_link		= &ixgbe_setup_phy_link_generic,
+	.setup_link_speed	= &ixgbe_setup_phy_link_speed_generic,
+	.read_i2c_sff8472	= &ixgbe_read_i2c_sff8472_82598,
+	.read_i2c_eeprom	= &ixgbe_read_i2c_eeprom_82598,
+	.check_overtemp		= &ixgbe_tn_check_overtemp,
+};
+
+const struct ixgbe_info ixgbe_82598_info = {
+	.mac			= ixgbe_mac_82598EB,
+	.get_invariants		= &ixgbe_get_invariants_82598,
+	.mac_ops		= &mac_ops_82598,
+	.eeprom_ops		= &eeprom_ops_82598,
+	.phy_ops		= &phy_ops_82598,
+	.mvals			= ixgbe_mvals_8259X,
+};
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
new file mode 100644
index 0000000000..58ea959a44
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -0,0 +1,2257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include "ixgbe.h"
+#include "ixgbe_phy.h"
+#include "ixgbe_mbx.h"
+
+#define IXGBE_82599_MAX_TX_QUEUES 128
+#define IXGBE_82599_MAX_RX_QUEUES 128
+#define IXGBE_82599_RAR_ENTRIES   128
+#define IXGBE_82599_MC_TBL_SIZE   128
+#define IXGBE_82599_VFT_TBL_SIZE  128
+#define IXGBE_82599_RX_PB_SIZE	  512
+
+static void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+static void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+static void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw);
+static void
+ixgbe_set_hard_rate_select_speed(struct ixgbe_hw *, ixgbe_link_speed);
+static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+					   ixgbe_link_speed speed,
+					   bool autoneg_wait_to_complete);
+static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
+static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+				      bool autoneg_wait_to_complete);
+static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
+			       ixgbe_link_speed speed,
+			       bool autoneg_wait_to_complete);
+static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg_wait_to_complete);
+static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw);
+static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+				     u8 dev_addr, u8 *data);
+static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+				      u8 dev_addr, u8 data);
+static s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw);
+static bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw);
+
+bool ixgbe_mng_enabled(struct ixgbe_hw *hw)
+{
+	u32 fwsm, manc, factps;
+
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+	if ((fwsm & IXGBE_FWSM_MODE_MASK) != IXGBE_FWSM_FW_MODE_PT)
+		return false;
+
+	manc = IXGBE_READ_REG(hw, IXGBE_MANC);
+	if (!(manc & IXGBE_MANC_RCV_TCO_EN))
+		return false;
+
+	factps = IXGBE_READ_REG(hw, IXGBE_FACTPS(hw));
+	if (factps & IXGBE_FACTPS_MNGCG)
+		return false;
+
+	return true;
+}
+
+static void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+
+	/* enable the laser control functions for SFP+ fiber
+	 * and MNG not enabled
+	 */
+	if ((mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) &&
+	    !ixgbe_mng_enabled(hw)) {
+		mac->ops.disable_tx_laser =
+				       &ixgbe_disable_tx_laser_multispeed_fiber;
+		mac->ops.enable_tx_laser =
+					&ixgbe_enable_tx_laser_multispeed_fiber;
+		mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber;
+	} else {
+		mac->ops.disable_tx_laser = NULL;
+		mac->ops.enable_tx_laser = NULL;
+		mac->ops.flap_tx_laser = NULL;
+	}
+
+	if (hw->phy.multispeed_fiber) {
+		/* Set up dual speed SFP+ support */
+		mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber;
+		mac->ops.setup_mac_link = ixgbe_setup_mac_link_82599;
+		mac->ops.set_rate_select_speed =
+					       ixgbe_set_hard_rate_select_speed;
+	} else {
+		if ((mac->ops.get_media_type(hw) ==
+		     ixgbe_media_type_backplane) &&
+		    (hw->phy.smart_speed == ixgbe_smart_speed_auto ||
+		     hw->phy.smart_speed == ixgbe_smart_speed_on) &&
+		     !ixgbe_verify_lesm_fw_enabled_82599(hw))
+			mac->ops.setup_link = &ixgbe_setup_mac_link_smartspeed;
+		else
+			mac->ops.setup_link = &ixgbe_setup_mac_link_82599;
+	}
+}
+
+static s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw)
+{
+	s32 ret_val;
+	u16 list_offset, data_offset, data_value;
+
+	if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) {
+		ixgbe_init_mac_link_ops_82599(hw);
+
+		hw->phy.ops.reset = NULL;
+
+		ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
+							      &data_offset);
+		if (ret_val)
+			return ret_val;
+
+		/* PHY config will finish before releasing the semaphore */
+		ret_val = hw->mac.ops.acquire_swfw_sync(hw,
+							IXGBE_GSSR_MAC_CSR_SM);
+		if (ret_val)
+			return IXGBE_ERR_SWFW_SYNC;
+
+		if (hw->eeprom.ops.read(hw, ++data_offset, &data_value))
+			goto setup_sfp_err;
+		while (data_value != 0xffff) {
+			IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value);
+			IXGBE_WRITE_FLUSH(hw);
+			if (hw->eeprom.ops.read(hw, ++data_offset, &data_value))
+				goto setup_sfp_err;
+		}
+
+		/* Release the semaphore */
+		hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+		/*
+		 * Delay obtaining semaphore again to allow FW access,
+		 * semaphore_delay is in ms usleep_range needs us.
+		 */
+		usleep_range(hw->eeprom.semaphore_delay * 1000,
+			     hw->eeprom.semaphore_delay * 2000);
+
+		/* Restart DSP and set SFI mode */
+		ret_val = hw->mac.ops.prot_autoc_write(hw,
+			hw->mac.orig_autoc | IXGBE_AUTOC_LMS_10G_SERIAL,
+			false);
+
+		if (ret_val) {
+			hw_dbg(hw, " sfp module setup not complete\n");
+			return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE;
+		}
+	}
+
+	return 0;
+
+setup_sfp_err:
+	/* Release the semaphore */
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+	/* Delay obtaining semaphore again to allow FW access,
+	 * semaphore_delay is in ms usleep_range needs us.
+	 */
+	usleep_range(hw->eeprom.semaphore_delay * 1000,
+		     hw->eeprom.semaphore_delay * 2000);
+	hw_err(hw, "eeprom read at offset %d failed\n", data_offset);
+	return IXGBE_ERR_SFP_SETUP_NOT_COMPLETE;
+}
+
+/**
+ *  prot_autoc_read_82599 - Hides MAC differences needed for AUTOC read
+ *  @hw: pointer to hardware structure
+ *  @locked: Return the if we locked for this read.
+ *  @reg_val: Value we read from AUTOC
+ *
+ *  For this part (82599) we need to wrap read-modify-writes with a possible
+ *  FW/SW lock.  It is assumed this lock will be freed with the next
+ *  prot_autoc_write_82599().  Note, that locked can only be true in cases
+ *  where this function doesn't return an error.
+ **/
+static s32 prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked,
+				 u32 *reg_val)
+{
+	s32 ret_val;
+
+	*locked = false;
+	/* If LESM is on then we need to hold the SW/FW semaphore. */
+	if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
+		ret_val = hw->mac.ops.acquire_swfw_sync(hw,
+					IXGBE_GSSR_MAC_CSR_SM);
+		if (ret_val)
+			return IXGBE_ERR_SWFW_SYNC;
+
+		*locked = true;
+	}
+
+	*reg_val = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	return 0;
+}
+
+/**
+ * prot_autoc_write_82599 - Hides MAC differences needed for AUTOC write
+ * @hw: pointer to hardware structure
+ * @autoc: value to write to AUTOC
+ * @locked: bool to indicate whether the SW/FW lock was already taken by
+ *	     previous proc_autoc_read_82599.
+ *
+ * This part (82599) may need to hold a the SW/FW lock around all writes to
+ * AUTOC. Likewise after a write we need to do a pipeline reset.
+ **/
+static s32 prot_autoc_write_82599(struct ixgbe_hw *hw, u32 autoc, bool locked)
+{
+	s32 ret_val = 0;
+
+	/* Blocked by MNG FW so bail */
+	if (ixgbe_check_reset_blocked(hw))
+		goto out;
+
+	/* We only need to get the lock if:
+	 *  - We didn't do it already (in the read part of a read-modify-write)
+	 *  - LESM is enabled.
+	 */
+	if (!locked && ixgbe_verify_lesm_fw_enabled_82599(hw)) {
+		ret_val = hw->mac.ops.acquire_swfw_sync(hw,
+					IXGBE_GSSR_MAC_CSR_SM);
+		if (ret_val)
+			return IXGBE_ERR_SWFW_SYNC;
+
+		locked = true;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc);
+	ret_val = ixgbe_reset_pipeline_82599(hw);
+
+out:
+	/* Free the SW/FW semaphore as we either grabbed it here or
+	 * already had it when this function was called.
+	 */
+	if (locked)
+		hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+
+	return ret_val;
+}
+
+static s32 ixgbe_get_invariants_82599(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+
+	ixgbe_init_mac_link_ops_82599(hw);
+
+	mac->mcft_size = IXGBE_82599_MC_TBL_SIZE;
+	mac->vft_size = IXGBE_82599_VFT_TBL_SIZE;
+	mac->num_rar_entries = IXGBE_82599_RAR_ENTRIES;
+	mac->rx_pb_size = IXGBE_82599_RX_PB_SIZE;
+	mac->max_rx_queues = IXGBE_82599_MAX_RX_QUEUES;
+	mac->max_tx_queues = IXGBE_82599_MAX_TX_QUEUES;
+	mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_phy_ops_82599 - PHY/SFP specific init
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize any function pointers that were not able to be
+ *  set during get_invariants because the PHY/SFP type was
+ *  not known.  Perform the SFP init if necessary.
+ *
+ **/
+static s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	s32 ret_val;
+	u32 esdp;
+
+	if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) {
+		/* Store flag indicating I2C bus access control unit. */
+		hw->phy.qsfp_shared_i2c_bus = true;
+
+		/* Initialize access to QSFP+ I2C bus */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp |= IXGBE_ESDP_SDP0_DIR;
+		esdp &= ~IXGBE_ESDP_SDP1_DIR;
+		esdp &= ~IXGBE_ESDP_SDP0;
+		esdp &= ~IXGBE_ESDP_SDP0_NATIVE;
+		esdp &= ~IXGBE_ESDP_SDP1_NATIVE;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+
+		phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_82599;
+		phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_82599;
+	}
+
+	/* Identify the PHY or SFP module */
+	ret_val = phy->ops.identify(hw);
+
+	/* Setup function pointers based on detected SFP module and speeds */
+	ixgbe_init_mac_link_ops_82599(hw);
+
+	/* If copper media, overwrite with copper function pointers */
+	if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) {
+		mac->ops.setup_link = &ixgbe_setup_copper_link_82599;
+		mac->ops.get_link_capabilities =
+			&ixgbe_get_copper_link_capabilities_generic;
+	}
+
+	/* Set necessary function pointers based on phy type */
+	switch (hw->phy.type) {
+	case ixgbe_phy_tn:
+		phy->ops.check_link = &ixgbe_check_phy_link_tnx;
+		phy->ops.setup_link = &ixgbe_setup_phy_link_tnx;
+		break;
+	default:
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_get_link_capabilities_82599 - Determines link capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @autoneg: true when autoneg or autotry is enabled
+ *
+ *  Determines the link capabilities by reading the AUTOC register.
+ **/
+static s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw,
+					     ixgbe_link_speed *speed,
+					     bool *autoneg)
+{
+	u32 autoc = 0;
+
+	/* Determine 1G link capabilities off of SFP+ type */
+	if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+	    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		return 0;
+	}
+
+	/*
+	 * Determine link capabilities based on the stored value of AUTOC,
+	 * which represents EEPROM defaults.  If AUTOC value has not been
+	 * stored, use the current register value.
+	 */
+	if (hw->mac.orig_link_settings_stored)
+		autoc = hw->mac.orig_autoc;
+	else
+		autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+	switch (autoc & IXGBE_AUTOC_LMS_MASK) {
+	case IXGBE_AUTOC_LMS_1G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_10G_LINK_NO_AN:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		*autoneg = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_1G_AN:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_10G_SERIAL:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		*autoneg = false;
+		break;
+
+	case IXGBE_AUTOC_LMS_KX4_KX_KR:
+	case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN:
+		*speed = IXGBE_LINK_SPEED_UNKNOWN;
+		if (autoc & IXGBE_AUTOC_KR_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII:
+		*speed = IXGBE_LINK_SPEED_100_FULL;
+		if (autoc & IXGBE_AUTOC_KR_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX4_SUPP)
+			*speed |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (autoc & IXGBE_AUTOC_KX_SUPP)
+			*speed |= IXGBE_LINK_SPEED_1GB_FULL;
+		*autoneg = true;
+		break;
+
+	case IXGBE_AUTOC_LMS_SGMII_1G_100M:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_100_FULL;
+		*autoneg = false;
+		break;
+
+	default:
+		return IXGBE_ERR_LINK_SETUP;
+	}
+
+	if (hw->phy.multispeed_fiber) {
+		*speed |= IXGBE_LINK_SPEED_10GB_FULL |
+			  IXGBE_LINK_SPEED_1GB_FULL;
+
+		/* QSFP must not enable auto-negotiation */
+		if (hw->phy.media_type == ixgbe_media_type_fiber_qsfp)
+			*autoneg = false;
+		else
+			*autoneg = true;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_media_type_82599 - Get media type
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the media type (fiber, copper, backplane)
+ **/
+static enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw)
+{
+	/* Detect if there is a copper PHY attached. */
+	switch (hw->phy.type) {
+	case ixgbe_phy_cu_unknown:
+	case ixgbe_phy_tn:
+		return ixgbe_media_type_copper;
+
+	default:
+		break;
+	}
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82599_KX4:
+	case IXGBE_DEV_ID_82599_KX4_MEZZ:
+	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+	case IXGBE_DEV_ID_82599_KR:
+	case IXGBE_DEV_ID_82599_BACKPLANE_FCOE:
+	case IXGBE_DEV_ID_82599_XAUI_LOM:
+		/* Default device ID is mezzanine card KX/KX4 */
+		return ixgbe_media_type_backplane;
+
+	case IXGBE_DEV_ID_82599_SFP:
+	case IXGBE_DEV_ID_82599_SFP_FCOE:
+	case IXGBE_DEV_ID_82599_SFP_EM:
+	case IXGBE_DEV_ID_82599_SFP_SF2:
+	case IXGBE_DEV_ID_82599_SFP_SF_QP:
+	case IXGBE_DEV_ID_82599EN_SFP:
+		return ixgbe_media_type_fiber;
+
+	case IXGBE_DEV_ID_82599_CX4:
+		return ixgbe_media_type_cx4;
+
+	case IXGBE_DEV_ID_82599_T3_LOM:
+		return ixgbe_media_type_copper;
+
+	case IXGBE_DEV_ID_82599_LS:
+		return ixgbe_media_type_fiber_lco;
+
+	case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+		return ixgbe_media_type_fiber_qsfp;
+
+	default:
+		return ixgbe_media_type_unknown;
+	}
+}
+
+/**
+ * ixgbe_stop_mac_link_on_d3_82599 - Disables link on D3
+ * @hw: pointer to hardware structure
+ *
+ * Disables link, should be called during D3 power down sequence.
+ *
+ **/
+static void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw)
+{
+	u32 autoc2_reg;
+	u16 ee_ctrl_2 = 0;
+
+	hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_2, &ee_ctrl_2);
+
+	if (!ixgbe_mng_present(hw) && !hw->wol_enabled &&
+	    ee_ctrl_2 & IXGBE_EEPROM_CCD_BIT) {
+		autoc2_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+		autoc2_reg |= IXGBE_AUTOC2_LINK_DISABLE_ON_D3_MASK;
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2_reg);
+	}
+}
+
+/**
+ *  ixgbe_start_mac_link_82599 - Setup MAC link settings
+ *  @hw: pointer to hardware structure
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Configures link settings based on values in the ixgbe_hw struct.
+ *  Restarts the link.  Performs autonegotiation if needed.
+ **/
+static s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw,
+			       bool autoneg_wait_to_complete)
+{
+	u32 autoc_reg;
+	u32 links_reg;
+	u32 i;
+	s32 status = 0;
+	bool got_lock = false;
+
+	if (ixgbe_verify_lesm_fw_enabled_82599(hw)) {
+		status = hw->mac.ops.acquire_swfw_sync(hw,
+						IXGBE_GSSR_MAC_CSR_SM);
+		if (status)
+			return status;
+
+		got_lock = true;
+	}
+
+	/* Restart link */
+	ixgbe_reset_pipeline_82599(hw);
+
+	if (got_lock)
+		hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM);
+
+	/* Only poll for autoneg to complete if specified to do so */
+	if (autoneg_wait_to_complete) {
+		autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+		if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_KX_KR ||
+		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+		    (autoc_reg & IXGBE_AUTOC_LMS_MASK) ==
+		     IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+			links_reg = 0; /* Just in case Autoneg time = 0 */
+			for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+				links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+				if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+					break;
+				msleep(100);
+			}
+			if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+				status = IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+				hw_dbg(hw, "Autoneg did not complete.\n");
+			}
+		}
+	}
+
+	/* Add delay to filter out noises during initial link setup */
+	msleep(50);
+
+	return status;
+}
+
+/**
+ *  ixgbe_disable_tx_laser_multispeed_fiber - Disable Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  The base drivers may require better control over SFP+ module
+ *  PHY states.  This includes selectively shutting down the Tx
+ *  laser on the PHY, effectively halting physical link.
+ **/
+static void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* Blocked by MNG FW so bail */
+	if (ixgbe_check_reset_blocked(hw))
+		return;
+
+	/* Disable tx laser; allow 100us to go dark per spec */
+	esdp_reg |= IXGBE_ESDP_SDP3;
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(100);
+}
+
+/**
+ *  ixgbe_enable_tx_laser_multispeed_fiber - Enable Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  The base drivers may require better control over SFP+ module
+ *  PHY states.  This includes selectively turning on the Tx
+ *  laser on the PHY, effectively starting physical link.
+ **/
+static void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* Enable tx laser; allow 100ms to light up */
+	esdp_reg &= ~IXGBE_ESDP_SDP3;
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+	IXGBE_WRITE_FLUSH(hw);
+	msleep(100);
+}
+
+/**
+ *  ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser
+ *  @hw: pointer to hardware structure
+ *
+ *  When the driver changes the link speeds that it can support,
+ *  it sets autotry_restart to true to indicate that we need to
+ *  initiate a new autotry session with the link partner.  To do
+ *  so, we set the speed then disable and re-enable the tx laser, to
+ *  alert the link partner that it also needs to restart autotry on its
+ *  end.  This is consistent with true clause 37 autoneg, which also
+ *  involves a loss of signal.
+ **/
+static void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw)
+{
+	/* Blocked by MNG FW so bail */
+	if (ixgbe_check_reset_blocked(hw))
+		return;
+
+	if (hw->mac.autotry_restart) {
+		ixgbe_disable_tx_laser_multispeed_fiber(hw);
+		ixgbe_enable_tx_laser_multispeed_fiber(hw);
+		hw->mac.autotry_restart = false;
+	}
+}
+
+/**
+ * ixgbe_set_hard_rate_select_speed - Set module link speed
+ * @hw: pointer to hardware structure
+ * @speed: link speed to set
+ *
+ * Set module link speed via RS0/RS1 rate select pins.
+ */
+static void
+ixgbe_set_hard_rate_select_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed)
+{
+	u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	switch (speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5);
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		esdp_reg &= ~IXGBE_ESDP_SDP5;
+		esdp_reg |= IXGBE_ESDP_SDP5_DIR;
+		break;
+	default:
+		hw_dbg(hw, "Invalid fixed module speed\n");
+		return;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ *  ixgbe_setup_mac_link_smartspeed - Set MAC link speed using SmartSpeed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Implements the Intel SmartSpeed algorithm.
+ **/
+static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
+				     ixgbe_link_speed speed,
+				     bool autoneg_wait_to_complete)
+{
+	s32 status = 0;
+	ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+	s32 i, j;
+	bool link_up = false;
+	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+
+	 /* Set autoneg_advertised value based on input link speed */
+	hw->phy.autoneg_advertised = 0;
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_100_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+	/*
+	 * Implement Intel SmartSpeed algorithm.  SmartSpeed will reduce the
+	 * autoneg advertisement if link is unable to be established at the
+	 * highest negotiated rate.  This can sometimes happen due to integrity
+	 * issues with the physical media connection.
+	 */
+
+	/* First, try to get link with full advertisement */
+	hw->phy.smart_speed_active = false;
+	for (j = 0; j < IXGBE_SMARTSPEED_MAX_RETRIES; j++) {
+		status = ixgbe_setup_mac_link_82599(hw, speed,
+						    autoneg_wait_to_complete);
+		if (status != 0)
+			goto out;
+
+		/*
+		 * Wait for the controller to acquire link.  Per IEEE 802.3ap,
+		 * Section 73.10.2, we may have to wait up to 500ms if KR is
+		 * attempted, or 200ms if KX/KX4/BX/BX4 is attempted, per
+		 * Table 9 in the AN MAS.
+		 */
+		for (i = 0; i < 5; i++) {
+			mdelay(100);
+
+			/* If we have link, just jump out */
+			status = hw->mac.ops.check_link(hw, &link_speed,
+							&link_up, false);
+			if (status != 0)
+				goto out;
+
+			if (link_up)
+				goto out;
+		}
+	}
+
+	/*
+	 * We didn't get link.  If we advertised KR plus one of KX4/KX
+	 * (or BX4/BX), then disable KR and try again.
+	 */
+	if (((autoc_reg & IXGBE_AUTOC_KR_SUPP) == 0) ||
+	    ((autoc_reg & IXGBE_AUTOC_KX4_KX_SUPP_MASK) == 0))
+		goto out;
+
+	/* Turn SmartSpeed on to disable KR support */
+	hw->phy.smart_speed_active = true;
+	status = ixgbe_setup_mac_link_82599(hw, speed,
+					    autoneg_wait_to_complete);
+	if (status != 0)
+		goto out;
+
+	/*
+	 * Wait for the controller to acquire link.  600ms will allow for
+	 * the AN link_fail_inhibit_timer as well for multiple cycles of
+	 * parallel detect, both 10g and 1g. This allows for the maximum
+	 * connect attempts as defined in the AN MAS table 73-7.
+	 */
+	for (i = 0; i < 6; i++) {
+		mdelay(100);
+
+		/* If we have link, just jump out */
+		status = hw->mac.ops.check_link(hw, &link_speed,
+						&link_up, false);
+		if (status != 0)
+			goto out;
+
+		if (link_up)
+			goto out;
+	}
+
+	/* We didn't get link.  Turn SmartSpeed back off. */
+	hw->phy.smart_speed_active = false;
+	status = ixgbe_setup_mac_link_82599(hw, speed,
+					    autoneg_wait_to_complete);
+
+out:
+	if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL))
+		hw_dbg(hw, "Smartspeed has downgraded the link speed from the maximum advertised\n");
+	return status;
+}
+
+/**
+ *  ixgbe_setup_mac_link_82599 - Set MAC link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Set the link speed in the AUTOC register and restarts link.
+ **/
+static s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw,
+				      ixgbe_link_speed speed,
+				      bool autoneg_wait_to_complete)
+{
+	bool autoneg = false;
+	s32 status;
+	u32 pma_pmd_1g, link_mode, links_reg, i;
+	u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK;
+	ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN;
+
+	/* holds the value of AUTOC register at this current point in time */
+	u32 current_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	/* holds the cached value of AUTOC register */
+	u32 orig_autoc = 0;
+	/* temporary variable used for comparison purposes */
+	u32 autoc = current_autoc;
+
+	/* Check to see if speed passed in is supported. */
+	status = hw->mac.ops.get_link_capabilities(hw, &link_capabilities,
+						   &autoneg);
+	if (status)
+		return status;
+
+	speed &= link_capabilities;
+
+	if (speed == IXGBE_LINK_SPEED_UNKNOWN)
+		return IXGBE_ERR_LINK_SETUP;
+
+	/* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/
+	if (hw->mac.orig_link_settings_stored)
+		orig_autoc = hw->mac.orig_autoc;
+	else
+		orig_autoc = autoc;
+
+	link_mode = autoc & IXGBE_AUTOC_LMS_MASK;
+	pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK;
+
+	if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
+	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+	    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+		/* Set KX4/KX/KR support according to speed requested */
+		autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP);
+		if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+			if (orig_autoc & IXGBE_AUTOC_KX4_SUPP)
+				autoc |= IXGBE_AUTOC_KX4_SUPP;
+			if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) &&
+			    (hw->phy.smart_speed_active == false))
+				autoc |= IXGBE_AUTOC_KR_SUPP;
+		}
+		if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+			autoc |= IXGBE_AUTOC_KX_SUPP;
+	} else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) &&
+		   (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN ||
+		    link_mode == IXGBE_AUTOC_LMS_1G_AN)) {
+		/* Switch from 1G SFI to 10G SFI if requested */
+		if ((speed == IXGBE_LINK_SPEED_10GB_FULL) &&
+		    (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) {
+			autoc &= ~IXGBE_AUTOC_LMS_MASK;
+			autoc |= IXGBE_AUTOC_LMS_10G_SERIAL;
+		}
+	} else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) &&
+		   (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) {
+		/* Switch from 10G SFI to 1G SFI if requested */
+		if ((speed == IXGBE_LINK_SPEED_1GB_FULL) &&
+		    (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) {
+			autoc &= ~IXGBE_AUTOC_LMS_MASK;
+			if (autoneg)
+				autoc |= IXGBE_AUTOC_LMS_1G_AN;
+			else
+				autoc |= IXGBE_AUTOC_LMS_1G_LINK_NO_AN;
+		}
+	}
+
+	if (autoc != current_autoc) {
+		/* Restart link */
+		status = hw->mac.ops.prot_autoc_write(hw, autoc, false);
+		if (status)
+			return status;
+
+		/* Only poll for autoneg to complete if specified to do so */
+		if (autoneg_wait_to_complete) {
+			if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR ||
+			    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN ||
+			    link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) {
+				links_reg = 0; /*Just in case Autoneg time=0*/
+				for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) {
+					links_reg =
+					       IXGBE_READ_REG(hw, IXGBE_LINKS);
+					if (links_reg & IXGBE_LINKS_KX_AN_COMP)
+						break;
+					msleep(100);
+				}
+				if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) {
+					status =
+						IXGBE_ERR_AUTONEG_NOT_COMPLETE;
+					hw_dbg(hw, "Autoneg did not complete.\n");
+				}
+			}
+		}
+
+		/* Add delay to filter out noises during initial link setup */
+		msleep(50);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_setup_copper_link_82599 - Set the PHY autoneg advertised field
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: true if waiting is needed to complete
+ *
+ *  Restarts link on PHY and MAC based on settings passed in.
+ **/
+static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg_wait_to_complete)
+{
+	s32 status;
+
+	/* Setup the PHY according to input speed */
+	status = hw->phy.ops.setup_link_speed(hw, speed,
+					      autoneg_wait_to_complete);
+	/* Set up MAC */
+	ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete);
+
+	return status;
+}
+
+/**
+ *  ixgbe_reset_hw_82599 - Perform hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks
+ *  and clears all interrupts, perform a PHY reset, and perform a link (MAC)
+ *  reset.
+ **/
+static s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw)
+{
+	ixgbe_link_speed link_speed;
+	s32 status;
+	u32 ctrl, i, autoc, autoc2;
+	u32 curr_lms;
+	bool link_up = false;
+
+	/* Call adapter stop to disable tx/rx and clear interrupts */
+	status = hw->mac.ops.stop_adapter(hw);
+	if (status)
+		return status;
+
+	/* flush pending Tx transactions */
+	ixgbe_clear_tx_pending(hw);
+
+	/* PHY ops must be identified and initialized prior to reset */
+
+	/* Identify PHY and related function pointers */
+	status = hw->phy.ops.init(hw);
+
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		return status;
+
+	/* Setup SFP module if there is one present. */
+	if (hw->phy.sfp_setup_needed) {
+		status = hw->mac.ops.setup_sfp(hw);
+		hw->phy.sfp_setup_needed = false;
+	}
+
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		return status;
+
+	/* Reset PHY */
+	if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL)
+		hw->phy.ops.reset(hw);
+
+	/* remember AUTOC from before we reset */
+	curr_lms = IXGBE_READ_REG(hw, IXGBE_AUTOC) & IXGBE_AUTOC_LMS_MASK;
+
+mac_reset_top:
+	/*
+	 * Issue global reset to the MAC. Needs to be SW reset if link is up.
+	 * If link reset is used when link is up, it might reset the PHY when
+	 * mng is using it.  If link is down or the flag to force full link
+	 * reset is set, then perform link reset.
+	 */
+	ctrl = IXGBE_CTRL_LNK_RST;
+	if (!hw->force_full_reset) {
+		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+		if (link_up)
+			ctrl = IXGBE_CTRL_RST;
+	}
+
+	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(1000, 1200);
+
+	/* Poll for reset bit to self-clear indicating reset is complete */
+	for (i = 0; i < 10; i++) {
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		if (!(ctrl & IXGBE_CTRL_RST_MASK))
+			break;
+		udelay(1);
+	}
+
+	if (ctrl & IXGBE_CTRL_RST_MASK) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "Reset polling failed to complete.\n");
+	}
+
+	msleep(50);
+
+	/*
+	 * Double resets are required for recovery from certain error
+	 * conditions.  Between resets, it is necessary to stall to allow time
+	 * for any pending HW events to complete.
+	 */
+	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+		goto mac_reset_top;
+	}
+
+	/*
+	 * Store the original AUTOC/AUTOC2 values if they have not been
+	 * stored off yet.  Otherwise restore the stored original
+	 * values since the reset operation sets back to defaults.
+	 */
+	autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+
+	/* Enable link if disabled in NVM */
+	if (autoc2 & IXGBE_AUTOC2_LINK_DISABLE_MASK) {
+		autoc2 &= ~IXGBE_AUTOC2_LINK_DISABLE_MASK;
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	if (hw->mac.orig_link_settings_stored == false) {
+		hw->mac.orig_autoc = autoc;
+		hw->mac.orig_autoc2 = autoc2;
+		hw->mac.orig_link_settings_stored = true;
+	} else {
+
+		/* If MNG FW is running on a multi-speed device that
+		 * doesn't autoneg with out driver support we need to
+		 * leave LMS in the state it was before we MAC reset.
+		 * Likewise if we support WoL we don't want change the
+		 * LMS state either.
+		 */
+		if ((hw->phy.multispeed_fiber && ixgbe_mng_enabled(hw)) ||
+		    hw->wol_enabled)
+			hw->mac.orig_autoc =
+				(hw->mac.orig_autoc & ~IXGBE_AUTOC_LMS_MASK) |
+				curr_lms;
+
+		if (autoc != hw->mac.orig_autoc) {
+			status = hw->mac.ops.prot_autoc_write(hw,
+							hw->mac.orig_autoc,
+							false);
+			if (status)
+				return status;
+		}
+
+		if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) !=
+		    (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) {
+			autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK;
+			autoc2 |= (hw->mac.orig_autoc2 &
+				   IXGBE_AUTOC2_UPPER_MASK);
+			IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2);
+		}
+	}
+
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+	/*
+	 * Store MAC address from RAR0, clear receive address registers, and
+	 * clear the multicast table.  Also reset num_rar_entries to 128,
+	 * since we modify this value when programming the SAN MAC address.
+	 */
+	hw->mac.num_rar_entries = IXGBE_82599_RAR_ENTRIES;
+	hw->mac.ops.init_rx_addrs(hw);
+
+	/* Store the permanent SAN mac address */
+	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
+
+	/* Add the SAN MAC address to the RAR only if it's a valid address */
+	if (is_valid_ether_addr(hw->mac.san_addr)) {
+		/* Save the SAN MAC RAR index */
+		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
+		hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index,
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+		/* clear VMDq pool/queue selection for this RAR */
+		hw->mac.ops.clear_vmdq(hw, hw->mac.san_mac_rar_index,
+				       IXGBE_CLEAR_VMDQ_ALL);
+
+		/* Reserve the last RAR for the SAN MAC address */
+		hw->mac.num_rar_entries--;
+	}
+
+	/* Store the alternative WWNN/WWPN prefix */
+	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
+				       &hw->mac.wwpn_prefix);
+
+	return status;
+}
+
+/**
+ * ixgbe_fdir_check_cmd_complete - poll to check whether FDIRCMD is complete
+ * @hw: pointer to hardware structure
+ * @fdircmd: current value of FDIRCMD register
+ */
+static s32 ixgbe_fdir_check_cmd_complete(struct ixgbe_hw *hw, u32 *fdircmd)
+{
+	int i;
+
+	for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) {
+		*fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD);
+		if (!(*fdircmd & IXGBE_FDIRCMD_CMD_MASK))
+			return 0;
+		udelay(10);
+	}
+
+	return IXGBE_ERR_FDIR_CMD_INCOMPLETE;
+}
+
+/**
+ *  ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables.
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw)
+{
+	int i;
+	u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL);
+	u32 fdircmd;
+	s32 err;
+
+	fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE;
+
+	/*
+	 * Before starting reinitialization process,
+	 * FDIRCMD.CMD must be zero.
+	 */
+	err = ixgbe_fdir_check_cmd_complete(hw, &fdircmd);
+	if (err) {
+		hw_dbg(hw, "Flow Director previous command did not complete, aborting table re-initialization.\n");
+		return err;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0);
+	IXGBE_WRITE_FLUSH(hw);
+	/*
+	 * 82599 adapters flow director init flow cannot be restarted,
+	 * Workaround 82599 silicon errata by performing the following steps
+	 * before re-writing the FDIRCTRL control register with the same value.
+	 * - write 1 to bit 8 of FDIRCMD register &
+	 * - write 0 to bit 8 of FDIRCMD register
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) |
+			 IXGBE_FDIRCMD_CLEARHT));
+	IXGBE_WRITE_FLUSH(hw);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+			(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) &
+			 ~IXGBE_FDIRCMD_CLEARHT));
+	IXGBE_WRITE_FLUSH(hw);
+	/*
+	 * Clear FDIR Hash register to clear any leftover hashes
+	 * waiting to be programmed.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00);
+	IXGBE_WRITE_FLUSH(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Poll init-done after we write FDIRCTRL register */
+	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
+		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
+				   IXGBE_FDIRCTRL_INIT_DONE)
+			break;
+		usleep_range(1000, 2000);
+	}
+	if (i >= IXGBE_FDIR_INIT_DONE_POLL) {
+		hw_dbg(hw, "Flow Director Signature poll time exceeded!\n");
+		return IXGBE_ERR_FDIR_REINIT_FAILED;
+	}
+
+	/* Clear FDIR statistics registers (read to clear) */
+	IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT);
+	IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT);
+	IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
+	IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+	IXGBE_READ_REG(hw, IXGBE_FDIRLEN);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_fdir_enable_82599 - Initialize Flow Director control registers
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register
+ **/
+static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	int i;
+
+	/* Prime the keys for hashing */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY);
+
+	/*
+	 * Poll init-done after we write the register.  Estimated times:
+	 *      10G: PBALLOC = 11b, timing is 60us
+	 *       1G: PBALLOC = 11b, timing is 600us
+	 *     100M: PBALLOC = 11b, timing is 6ms
+	 *
+	 *     Multiple these timings by 4 if under full Rx load
+	 *
+	 * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for
+	 * 1 msec per poll time.  If we're at line rate and drop to 100M, then
+	 * this might not finish in our poll time, but we can live with that
+	 * for now.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl);
+	IXGBE_WRITE_FLUSH(hw);
+	for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) {
+		if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) &
+				   IXGBE_FDIRCTRL_INIT_DONE)
+			break;
+		usleep_range(1000, 2000);
+	}
+
+	if (i >= IXGBE_FDIR_INIT_DONE_POLL)
+		hw_dbg(hw, "Flow Director poll time exceeded!\n");
+}
+
+/**
+ *  ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *             contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	/*
+	 * Continue setup of fdirctrl register bits:
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 filters are left
+	 */
+	fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
+
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters
+ *  @hw: pointer to hardware structure
+ *  @fdirctrl: value to write to flow director control register, initially
+ *             contains just the value of the Rx packet buffer allocation
+ **/
+s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl)
+{
+	/*
+	 * Continue setup of fdirctrl register bits:
+	 *  Turn perfect match filtering on
+	 *  Initialize the drop queue
+	 *  Move the flexible bytes to use the ethertype - shift 6 words
+	 *  Set the maximum length per hash bucket to 0xA filters
+	 *  Send interrupt when 64 (0x4 * 16) filters are left
+	 */
+	fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH |
+		    (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) |
+		    (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) |
+		    (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) |
+		    (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT);
+
+	/* write hashes and fdirctrl register, poll for completion */
+	ixgbe_fdir_enable_82599(hw, fdirctrl);
+
+	return 0;
+}
+
+/*
+ * These defines allow us to quickly generate all of the necessary instructions
+ * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION
+ * for values 0 through 15
+ */
+#define IXGBE_ATR_COMMON_HASH_KEY \
+		(IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY)
+#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \
+do { \
+	u32 n = (_n); \
+	if (IXGBE_ATR_COMMON_HASH_KEY & BIT(n)) \
+		common_hash ^= lo_hash_dword >> n; \
+	else if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n)) \
+		bucket_hash ^= lo_hash_dword >> n; \
+	else if (IXGBE_ATR_SIGNATURE_HASH_KEY & BIT(n)) \
+		sig_hash ^= lo_hash_dword << (16 - n); \
+	if (IXGBE_ATR_COMMON_HASH_KEY & BIT(n + 16)) \
+		common_hash ^= hi_hash_dword >> n; \
+	else if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n + 16)) \
+		bucket_hash ^= hi_hash_dword >> n; \
+	else if (IXGBE_ATR_SIGNATURE_HASH_KEY & BIT(n + 16)) \
+		sig_hash ^= hi_hash_dword << (16 - n); \
+} while (0)
+
+/**
+ *  ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash
+ *  @input: input bitstream to compute the hash on
+ *  @common: compressed common input dword
+ *
+ *  This function is almost identical to the function above but contains
+ *  several optimizations such as unwinding all of the loops, letting the
+ *  compiler work out all of the conditional ifs since the keys are static
+ *  defines, and computing two keys at once since the hashed dword stream
+ *  will be the same for both keys.
+ **/
+static u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input,
+					    union ixgbe_atr_hash_dword common)
+{
+	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+	u32 sig_hash = 0, bucket_hash = 0, common_hash = 0;
+
+	/* record the flow_vm_vlan bits as they are a key part to the hash */
+	flow_vm_vlan = ntohl(input.dword);
+
+	/* generate common hash dword */
+	hi_hash_dword = ntohl(common.dword);
+
+	/* low dword is word swapped version of common */
+	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+	/* apply flow ID/VM pool/VLAN ID bits to hash words */
+	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+	/* Process bits 0 and 16 */
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(0);
+
+	/*
+	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+	 * delay this because bit 0 of the stream should not be processed
+	 * so we do not add the vlan until after bit 0 was processed
+	 */
+	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+	/* Process remaining 30 bit of the key */
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(1);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(2);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(3);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(4);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(5);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(6);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(7);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(8);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(9);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(10);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(11);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(12);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(13);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(14);
+	IXGBE_COMPUTE_SIG_HASH_ITERATION(15);
+
+	/* combine common_hash result with signature and bucket hashes */
+	bucket_hash ^= common_hash;
+	bucket_hash &= IXGBE_ATR_HASH_MASK;
+
+	sig_hash ^= common_hash << 16;
+	sig_hash &= IXGBE_ATR_HASH_MASK << 16;
+
+	/* return completed signature hash */
+	return sig_hash ^ bucket_hash;
+}
+
+/**
+ *  ixgbe_fdir_add_signature_filter_82599 - Adds a signature hash filter
+ *  @hw: pointer to hardware structure
+ *  @input: unique input dword
+ *  @common: compressed common input dword
+ *  @queue: queue index to direct traffic to
+ *
+ * Note that the tunnel bit in input must not be set when the hardware
+ * tunneling support does not exist.
+ **/
+s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_hash_dword input,
+					  union ixgbe_atr_hash_dword common,
+					  u8 queue)
+{
+	u64 fdirhashcmd;
+	u8 flow_type;
+	bool tunnel;
+	u32 fdircmd;
+
+	/*
+	 * Get the flow_type in order to program FDIRCMD properly
+	 * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6
+	 */
+	tunnel = !!(input.formatted.flow_type & IXGBE_ATR_L4TYPE_TUNNEL_MASK);
+	flow_type = input.formatted.flow_type &
+		    (IXGBE_ATR_L4TYPE_TUNNEL_MASK - 1);
+	switch (flow_type) {
+	case IXGBE_ATR_FLOW_TYPE_TCPV4:
+	case IXGBE_ATR_FLOW_TYPE_UDPV4:
+	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+	case IXGBE_ATR_FLOW_TYPE_TCPV6:
+	case IXGBE_ATR_FLOW_TYPE_UDPV6:
+	case IXGBE_ATR_FLOW_TYPE_SCTPV6:
+		break;
+	default:
+		hw_dbg(hw, " Error on flow type input\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	/* configure FDIRCMD register */
+	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+	fdircmd |= (u32)flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+	if (tunnel)
+		fdircmd |= IXGBE_FDIRCMD_TUNNEL_FILTER;
+
+	/*
+	 * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits
+	 * is for FDIRCMD.  Then do a 64-bit register write from FDIRHASH.
+	 */
+	fdirhashcmd = (u64)fdircmd << 32;
+	fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common);
+	IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd);
+
+	hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd);
+
+	return 0;
+}
+
+#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \
+do { \
+	u32 n = (_n); \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n)) \
+		bucket_hash ^= lo_hash_dword >> n; \
+	if (IXGBE_ATR_BUCKET_HASH_KEY & BIT(n + 16)) \
+		bucket_hash ^= hi_hash_dword >> n; \
+} while (0)
+
+/**
+ *  ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash
+ *  @input: input bitstream to compute the hash on
+ *  @input_mask: mask for the input bitstream
+ *
+ *  This function serves two main purposes.  First it applies the input_mask
+ *  to the atr_input resulting in a cleaned up atr_input data stream.
+ *  Secondly it computes the hash and stores it in the bkt_hash field at
+ *  the end of the input byte stream.  This way it will be available for
+ *  future use without needing to recompute the hash.
+ **/
+void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
+					  union ixgbe_atr_input *input_mask)
+{
+
+	u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan;
+	u32 bucket_hash = 0;
+	__be32 hi_dword = 0;
+	int i;
+
+	/* Apply masks to input data */
+	for (i = 0; i <= 10; i++)
+		input->dword_stream[i] &= input_mask->dword_stream[i];
+
+	/* record the flow_vm_vlan bits as they are a key part to the hash */
+	flow_vm_vlan = ntohl(input->dword_stream[0]);
+
+	/* generate common hash dword */
+	for (i = 1; i <= 10; i++)
+		hi_dword ^= input->dword_stream[i];
+	hi_hash_dword = ntohl(hi_dword);
+
+	/* low dword is word swapped version of common */
+	lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16);
+
+	/* apply flow ID/VM pool/VLAN ID bits to hash words */
+	hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16);
+
+	/* Process bits 0 and 16 */
+	IXGBE_COMPUTE_BKT_HASH_ITERATION(0);
+
+	/*
+	 * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to
+	 * delay this because bit 0 of the stream should not be processed
+	 * so we do not add the vlan until after bit 0 was processed
+	 */
+	lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16);
+
+	/* Process remaining 30 bit of the key */
+	for (i = 1; i <= 15; i++)
+		IXGBE_COMPUTE_BKT_HASH_ITERATION(i);
+
+	/*
+	 * Limit hash to 13 bits since max bucket count is 8K.
+	 * Store result at the end of the input stream.
+	 */
+	input->formatted.bkt_hash = (__force __be16)(bucket_hash & 0x1FFF);
+}
+
+/**
+ *  ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks
+ *  @input_mask: mask to be bit swapped
+ *
+ *  The source and destination port masks for flow director are bit swapped
+ *  in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc.  In order to
+ *  generate a correctly swapped value we need to bit swap the mask and that
+ *  is what is accomplished by this function.
+ **/
+static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask)
+{
+	u32 mask = ntohs(input_mask->formatted.dst_port);
+
+	mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT;
+	mask |= ntohs(input_mask->formatted.src_port);
+	mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1);
+	mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2);
+	mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4);
+	return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8);
+}
+
+/*
+ * These two macros are meant to address the fact that we have registers
+ * that are either all or in part big-endian.  As a result on big-endian
+ * systems we will end up byte swapping the value to little-endian before
+ * it is byte swapped again and written to the hardware in the original
+ * big-endian format.
+ */
+#define IXGBE_STORE_AS_BE32(_value) \
+	(((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \
+	 (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24))
+
+#define IXGBE_WRITE_REG_BE32(a, reg, value) \
+	IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(ntohl(value)))
+
+#define IXGBE_STORE_AS_BE16(_value) __swab16(ntohs((_value)))
+
+s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
+				    union ixgbe_atr_input *input_mask)
+{
+	/* mask IPv6 since it is currently not supported */
+	u32 fdirm = IXGBE_FDIRM_DIPv6;
+	u32 fdirtcpm;
+
+	/*
+	 * Program the relevant mask registers.  If src/dst_port or src/dst_addr
+	 * are zero, then assume a full mask for that field.  Also assume that
+	 * a VLAN of 0 is unspecified, so mask that out as well.  L4type
+	 * cannot be masked out in this implementation.
+	 *
+	 * This also assumes IPv4 only.  IPv6 masking isn't supported at this
+	 * point in time.
+	 */
+
+	/* verify bucket hash is cleared on hash generation */
+	if (input_mask->formatted.bkt_hash)
+		hw_dbg(hw, " bucket hash should always be 0 in mask\n");
+
+	/* Program FDIRM and verify partial masks */
+	switch (input_mask->formatted.vm_pool & 0x7F) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_POOL;
+		break;
+	case 0x7F:
+		break;
+	default:
+		hw_dbg(hw, " Error on vm pool mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) {
+	case 0x0:
+		fdirm |= IXGBE_FDIRM_L4P;
+		if (input_mask->formatted.dst_port ||
+		    input_mask->formatted.src_port) {
+			hw_dbg(hw, " Error on src/dst port mask\n");
+			return IXGBE_ERR_CONFIG;
+		}
+		break;
+	case IXGBE_ATR_L4TYPE_MASK:
+		break;
+	default:
+		hw_dbg(hw, " Error on flow type mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (ntohs(input_mask->formatted.vlan_id) & 0xEFFF) {
+	case 0x0000:
+		/* mask VLAN ID */
+		fdirm |= IXGBE_FDIRM_VLANID;
+		fallthrough;
+	case 0x0FFF:
+		/* mask VLAN priority */
+		fdirm |= IXGBE_FDIRM_VLANP;
+		break;
+	case 0xE000:
+		/* mask VLAN ID only */
+		fdirm |= IXGBE_FDIRM_VLANID;
+		fallthrough;
+	case 0xEFFF:
+		/* no VLAN fields masked */
+		break;
+	default:
+		hw_dbg(hw, " Error on VLAN mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch ((__force u16)input_mask->formatted.flex_bytes & 0xFFFF) {
+	case 0x0000:
+		/* Mask Flex Bytes */
+		fdirm |= IXGBE_FDIRM_FLEX;
+		fallthrough;
+	case 0xFFFF:
+		break;
+	default:
+		hw_dbg(hw, " Error on flexible byte mask\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	/* Now mask VM pool and destination IPv6 - bits 5 and 2 */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm);
+
+	/* store the TCP/UDP port masks, bit reversed from port layout */
+	fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask);
+
+	/* write both the same so that UDP and TCP use the same mask */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm);
+
+	/* also use it for SCTP */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, ~fdirtcpm);
+		break;
+	default:
+		break;
+	}
+
+	/* store source and destination IP masks (big-enian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
+			     ~input_mask->formatted.src_ip[0]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,
+			     ~input_mask->formatted.dst_ip[0]);
+
+	return 0;
+}
+
+s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id, u8 queue)
+{
+	u32 fdirport, fdirvlan, fdirhash, fdircmd;
+	s32 err;
+
+	/* currently IPv6 is not supported, must be programmed with 0 */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0),
+			     input->formatted.src_ip[0]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1),
+			     input->formatted.src_ip[1]);
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2),
+			     input->formatted.src_ip[2]);
+
+	/* record the source address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]);
+
+	/* record the first 32 bits of the destination address (big-endian) */
+	IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]);
+
+	/* record source and destination port (little-endian)*/
+	fdirport = be16_to_cpu(input->formatted.dst_port);
+	fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT;
+	fdirport |= be16_to_cpu(input->formatted.src_port);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport);
+
+	/* record vlan (little-endian) and flex_bytes(big-endian) */
+	fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes);
+	fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT;
+	fdirvlan |= ntohs(input->formatted.vlan_id);
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan);
+
+	/* configure FDIRHASH register */
+	fdirhash = (__force u32)input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/*
+	 * flush all previous writes to make certain registers are
+	 * programmed prior to issuing the command
+	 */
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* configure FDIRCMD register */
+	fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE |
+		  IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN;
+	if (queue == IXGBE_FDIR_DROP_QUEUE)
+		fdircmd |= IXGBE_FDIRCMD_DROP;
+	fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT;
+	fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT;
+	fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT;
+
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd);
+	err = ixgbe_fdir_check_cmd_complete(hw, &fdircmd);
+	if (err) {
+		hw_dbg(hw, "Flow Director command did not complete!\n");
+		return err;
+	}
+
+	return 0;
+}
+
+s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
+					  union ixgbe_atr_input *input,
+					  u16 soft_id)
+{
+	u32 fdirhash;
+	u32 fdircmd;
+	s32 err;
+
+	/* configure FDIRHASH register */
+	fdirhash = (__force u32)input->formatted.bkt_hash;
+	fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT;
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+
+	/* flush hash to HW */
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Query if filter is present */
+	IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT);
+
+	err = ixgbe_fdir_check_cmd_complete(hw, &fdircmd);
+	if (err) {
+		hw_dbg(hw, "Flow Director command did not complete!\n");
+		return err;
+	}
+
+	/* if filter exists in hardware then remove it */
+	if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) {
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash);
+		IXGBE_WRITE_FLUSH(hw);
+		IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD,
+				IXGBE_FDIRCMD_CMD_REMOVE_FLOW);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: analog register to read
+ *  @val: read value
+ *
+ *  Performs read operation to Omer analog register specified.
+ **/
+static s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val)
+{
+	u32  core_ctl;
+
+	IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD |
+			(reg << 8));
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+	core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL);
+	*val = (u8)core_ctl;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_analog_reg8_82599 - Writes 8 bit Omer analog register
+ *  @hw: pointer to hardware structure
+ *  @reg: atlas register to write
+ *  @val: value to write
+ *
+ *  Performs write operation to Omer analog register specified.
+ **/
+static s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val)
+{
+	u32  core_ctl;
+
+	core_ctl = (reg << 8) | val;
+	IXGBE_WRITE_REG(hw, IXGBE_CORECTL, core_ctl);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(10);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_start_hw_82599 - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware using the generic start_hw function
+ *  and the generation start_hw function.
+ *  Then performs revision-specific operations, if any.
+ **/
+static s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+
+	ret_val = ixgbe_start_hw_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = ixgbe_start_hw_gen2(hw);
+	if (ret_val)
+		return ret_val;
+
+	/* We need to run link autotry after the driver loads */
+	hw->mac.autotry_restart = true;
+
+	return ixgbe_verify_fw_version_82599(hw);
+}
+
+/**
+ *  ixgbe_identify_phy_82599 - Get physical layer module
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines the physical layer module found on the current adapter.
+ *  If PHY already detected, maintains current PHY type in hw struct,
+ *  otherwise executes the PHY detection routine.
+ **/
+static s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw)
+{
+	s32 status;
+
+	/* Detect PHY if not unknown - returns success if already detected. */
+	status = ixgbe_identify_phy_generic(hw);
+	if (status) {
+		/* 82599 10GBASE-T requires an external PHY */
+		if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper)
+			return status;
+		status = ixgbe_identify_module_generic(hw);
+	}
+
+	/* Set PHY type none if no PHY detected */
+	if (hw->phy.type == ixgbe_phy_unknown) {
+		hw->phy.type = ixgbe_phy_none;
+		status = 0;
+	}
+
+	/* Return error if SFP module has been detected but is not supported */
+	if (hw->phy.type == ixgbe_phy_sfp_unsupported)
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+	return status;
+}
+
+/**
+ *  ixgbe_enable_rx_dma_82599 - Enable the Rx DMA unit on 82599
+ *  @hw: pointer to hardware structure
+ *  @regval: register value to write to RXCTRL
+ *
+ *  Enables the Rx DMA unit for 82599
+ **/
+static s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval)
+{
+	/*
+	 * Workaround for 82599 silicon errata when enabling the Rx datapath.
+	 * If traffic is incoming before we enable the Rx unit, it could hang
+	 * the Rx DMA unit.  Therefore, make sure the security engine is
+	 * completely disabled prior to enabling the Rx unit.
+	 */
+	hw->mac.ops.disable_rx_buff(hw);
+
+	if (regval & IXGBE_RXCTRL_RXEN)
+		hw->mac.ops.enable_rx(hw);
+	else
+		hw->mac.ops.disable_rx(hw);
+
+	hw->mac.ops.enable_rx_buff(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_verify_fw_version_82599 - verify fw version for 82599
+ *  @hw: pointer to hardware structure
+ *
+ *  Verifies that installed the firmware version is 0.6 or higher
+ *  for SFI devices. All 82599 SFI devices should have version 0.6 or higher.
+ *
+ *  Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or
+ *  if the FW version is not supported.
+ **/
+static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_EEPROM_VERSION;
+	u16 fw_offset, fw_ptp_cfg_offset;
+	u16 offset;
+	u16 fw_version = 0;
+
+	/* firmware check is only necessary for SFI devices */
+	if (hw->phy.media_type != ixgbe_media_type_fiber)
+		return 0;
+
+	/* get the offset to the Firmware Module block */
+	offset = IXGBE_FW_PTR;
+	if (hw->eeprom.ops.read(hw, offset, &fw_offset))
+		goto fw_version_err;
+
+	if (fw_offset == 0 || fw_offset == 0xFFFF)
+		return IXGBE_ERR_EEPROM_VERSION;
+
+	/* get the offset to the Pass Through Patch Configuration block */
+	offset = fw_offset + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR;
+	if (hw->eeprom.ops.read(hw, offset, &fw_ptp_cfg_offset))
+		goto fw_version_err;
+
+	if (fw_ptp_cfg_offset == 0 || fw_ptp_cfg_offset == 0xFFFF)
+		return IXGBE_ERR_EEPROM_VERSION;
+
+	/* get the firmware version */
+	offset = fw_ptp_cfg_offset + IXGBE_FW_PATCH_VERSION_4;
+	if (hw->eeprom.ops.read(hw, offset, &fw_version))
+		goto fw_version_err;
+
+	if (fw_version > 0x5)
+		status = 0;
+
+	return status;
+
+fw_version_err:
+	hw_err(hw, "eeprom read at offset %d failed\n", offset);
+	return IXGBE_ERR_EEPROM_VERSION;
+}
+
+/**
+ *  ixgbe_verify_lesm_fw_enabled_82599 - Checks LESM FW module state.
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns true if the LESM FW module is present and enabled. Otherwise
+ *  returns false. Smart Speed must be disabled if LESM FW module is enabled.
+ **/
+static bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw)
+{
+	u16 fw_offset, fw_lesm_param_offset, fw_lesm_state;
+	s32 status;
+
+	/* get the offset to the Firmware Module block */
+	status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset);
+
+	if (status || fw_offset == 0 || fw_offset == 0xFFFF)
+		return false;
+
+	/* get the offset to the LESM Parameters block */
+	status = hw->eeprom.ops.read(hw, (fw_offset +
+				     IXGBE_FW_LESM_PARAMETERS_PTR),
+				     &fw_lesm_param_offset);
+
+	if (status ||
+	    fw_lesm_param_offset == 0 || fw_lesm_param_offset == 0xFFFF)
+		return false;
+
+	/* get the lesm state word */
+	status = hw->eeprom.ops.read(hw, (fw_lesm_param_offset +
+				     IXGBE_FW_LESM_STATE_1),
+				     &fw_lesm_state);
+
+	if (!status && (fw_lesm_state & IXGBE_FW_LESM_STATE_ENABLED))
+		return true;
+
+	return false;
+}
+
+/**
+ *  ixgbe_read_eeprom_buffer_82599 - Read EEPROM word(s) using
+ *  fastest available method
+ *
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in EEPROM to read
+ *  @words: number of words
+ *  @data: word(s) read from the EEPROM
+ *
+ *  Retrieves 16 bit word(s) read from EEPROM
+ **/
+static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset,
+					  u16 words, u16 *data)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+
+	/* If EEPROM is detected and can be addressed using 14 bits,
+	 * use EERD otherwise use bit bang
+	 */
+	if (eeprom->type == ixgbe_eeprom_spi &&
+	    offset + (words - 1) <= IXGBE_EERD_MAX_ADDR)
+		return ixgbe_read_eerd_buffer_generic(hw, offset, words, data);
+
+	return ixgbe_read_eeprom_buffer_bit_bang_generic(hw, offset, words,
+							 data);
+}
+
+/**
+ *  ixgbe_read_eeprom_82599 - Read EEPROM word using
+ *  fastest available method
+ *
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM
+ **/
+static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw,
+				   u16 offset, u16 *data)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+
+	/*
+	 * If EEPROM is detected and can be addressed using 14 bits,
+	 * use EERD otherwise use bit bang
+	 */
+	if (eeprom->type == ixgbe_eeprom_spi && offset <= IXGBE_EERD_MAX_ADDR)
+		return ixgbe_read_eerd_generic(hw, offset, data);
+
+	return ixgbe_read_eeprom_bit_bang_generic(hw, offset, data);
+}
+
+/**
+ * ixgbe_reset_pipeline_82599 - perform pipeline reset
+ *
+ * @hw: pointer to hardware structure
+ *
+ * Reset pipeline by asserting Restart_AN together with LMS change to ensure
+ * full pipeline reset.  Note - We must hold the SW/FW semaphore before writing
+ * to AUTOC, so this function assumes the semaphore is held.
+ **/
+static s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw)
+{
+	s32 ret_val;
+	u32 anlp1_reg = 0;
+	u32 i, autoc_reg, autoc2_reg;
+
+	/* Enable link if disabled in NVM */
+	autoc2_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	if (autoc2_reg & IXGBE_AUTOC2_LINK_DISABLE_MASK) {
+		autoc2_reg &= ~IXGBE_AUTOC2_LINK_DISABLE_MASK;
+		IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2_reg);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+
+	/* Write AUTOC register with toggled LMS[2] bit and Restart_AN */
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC,
+			autoc_reg ^ (0x4 << IXGBE_AUTOC_LMS_SHIFT));
+
+	/* Wait for AN to leave state 0 */
+	for (i = 0; i < 10; i++) {
+		usleep_range(4000, 8000);
+		anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+		if (anlp1_reg & IXGBE_ANLP1_AN_STATE_MASK)
+			break;
+	}
+
+	if (!(anlp1_reg & IXGBE_ANLP1_AN_STATE_MASK)) {
+		hw_dbg(hw, "auto negotiation not completed\n");
+		ret_val = IXGBE_ERR_RESET_FAILED;
+		goto reset_pipeline_out;
+	}
+
+	ret_val = 0;
+
+reset_pipeline_out:
+	/* Write AUTOC register with original LMS field and Restart_AN */
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @dev_addr: address to read from
+ *  @data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ **/
+static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+				     u8 dev_addr, u8 *data)
+{
+	u32 esdp;
+	s32 status;
+	s32 timeout = 200;
+
+	if (hw->phy.qsfp_shared_i2c_bus == true) {
+		/* Acquire I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp |= IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+
+		while (timeout) {
+			esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+			if (esdp & IXGBE_ESDP_SDP1)
+				break;
+
+			usleep_range(5000, 10000);
+			timeout--;
+		}
+
+		if (!timeout) {
+			hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n");
+			status = IXGBE_ERR_I2C;
+			goto release_i2c_access;
+		}
+	}
+
+	status = ixgbe_read_i2c_byte_generic(hw, byte_offset, dev_addr, data);
+
+release_i2c_access:
+	if (hw->phy.qsfp_shared_i2c_bus == true) {
+		/* Release I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp &= ~IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_write_i2c_byte_82599 - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: address to write to
+ *  @data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ **/
+static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset,
+				      u8 dev_addr, u8 data)
+{
+	u32 esdp;
+	s32 status;
+	s32 timeout = 200;
+
+	if (hw->phy.qsfp_shared_i2c_bus == true) {
+		/* Acquire I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp |= IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+
+		while (timeout) {
+			esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+			if (esdp & IXGBE_ESDP_SDP1)
+				break;
+
+			usleep_range(5000, 10000);
+			timeout--;
+		}
+
+		if (!timeout) {
+			hw_dbg(hw, "Driver can't access resource, acquiring I2C bus timeout.\n");
+			status = IXGBE_ERR_I2C;
+			goto release_i2c_access;
+		}
+	}
+
+	status = ixgbe_write_i2c_byte_generic(hw, byte_offset, dev_addr, data);
+
+release_i2c_access:
+	if (hw->phy.qsfp_shared_i2c_bus == true) {
+		/* Release I2C bus ownership. */
+		esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		esdp &= ~IXGBE_ESDP_SDP0;
+		IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	return status;
+}
+
+static const struct ixgbe_mac_operations mac_ops_82599 = {
+	.init_hw                = &ixgbe_init_hw_generic,
+	.reset_hw               = &ixgbe_reset_hw_82599,
+	.start_hw               = &ixgbe_start_hw_82599,
+	.clear_hw_cntrs         = &ixgbe_clear_hw_cntrs_generic,
+	.get_media_type         = &ixgbe_get_media_type_82599,
+	.enable_rx_dma          = &ixgbe_enable_rx_dma_82599,
+	.disable_rx_buff	= &ixgbe_disable_rx_buff_generic,
+	.enable_rx_buff		= &ixgbe_enable_rx_buff_generic,
+	.get_mac_addr           = &ixgbe_get_mac_addr_generic,
+	.get_san_mac_addr       = &ixgbe_get_san_mac_addr_generic,
+	.get_device_caps        = &ixgbe_get_device_caps_generic,
+	.get_wwn_prefix         = &ixgbe_get_wwn_prefix_generic,
+	.stop_adapter           = &ixgbe_stop_adapter_generic,
+	.get_bus_info           = &ixgbe_get_bus_info_generic,
+	.set_lan_id             = &ixgbe_set_lan_id_multi_port_pcie,
+	.read_analog_reg8       = &ixgbe_read_analog_reg8_82599,
+	.write_analog_reg8      = &ixgbe_write_analog_reg8_82599,
+	.stop_link_on_d3	= &ixgbe_stop_mac_link_on_d3_82599,
+	.setup_link             = &ixgbe_setup_mac_link_82599,
+	.set_rxpba		= &ixgbe_set_rxpba_generic,
+	.check_link             = &ixgbe_check_mac_link_generic,
+	.get_link_capabilities  = &ixgbe_get_link_capabilities_82599,
+	.led_on                 = &ixgbe_led_on_generic,
+	.led_off                = &ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
+	.blink_led_start        = &ixgbe_blink_led_start_generic,
+	.blink_led_stop         = &ixgbe_blink_led_stop_generic,
+	.set_rar                = &ixgbe_set_rar_generic,
+	.clear_rar              = &ixgbe_clear_rar_generic,
+	.set_vmdq               = &ixgbe_set_vmdq_generic,
+	.set_vmdq_san_mac	= &ixgbe_set_vmdq_san_mac_generic,
+	.clear_vmdq             = &ixgbe_clear_vmdq_generic,
+	.init_rx_addrs          = &ixgbe_init_rx_addrs_generic,
+	.update_mc_addr_list    = &ixgbe_update_mc_addr_list_generic,
+	.enable_mc              = &ixgbe_enable_mc_generic,
+	.disable_mc             = &ixgbe_disable_mc_generic,
+	.clear_vfta             = &ixgbe_clear_vfta_generic,
+	.set_vfta               = &ixgbe_set_vfta_generic,
+	.fc_enable              = &ixgbe_fc_enable_generic,
+	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
+	.set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
+	.init_uta_tables        = &ixgbe_init_uta_tables_generic,
+	.setup_sfp              = &ixgbe_setup_sfp_modules_82599,
+	.set_mac_anti_spoofing  = &ixgbe_set_mac_anti_spoofing,
+	.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing,
+	.acquire_swfw_sync      = &ixgbe_acquire_swfw_sync,
+	.release_swfw_sync      = &ixgbe_release_swfw_sync,
+	.init_swfw_sync		= NULL,
+	.get_thermal_sensor_data = &ixgbe_get_thermal_sensor_data_generic,
+	.init_thermal_sensor_thresh = &ixgbe_init_thermal_sensor_thresh_generic,
+	.prot_autoc_read	= &prot_autoc_read_82599,
+	.prot_autoc_write	= &prot_autoc_write_82599,
+	.enable_rx		= &ixgbe_enable_rx_generic,
+	.disable_rx		= &ixgbe_disable_rx_generic,
+};
+
+static const struct ixgbe_eeprom_operations eeprom_ops_82599 = {
+	.init_params		= &ixgbe_init_eeprom_params_generic,
+	.read			= &ixgbe_read_eeprom_82599,
+	.read_buffer		= &ixgbe_read_eeprom_buffer_82599,
+	.write			= &ixgbe_write_eeprom_generic,
+	.write_buffer		= &ixgbe_write_eeprom_buffer_bit_bang_generic,
+	.calc_checksum		= &ixgbe_calc_eeprom_checksum_generic,
+	.validate_checksum	= &ixgbe_validate_eeprom_checksum_generic,
+	.update_checksum	= &ixgbe_update_eeprom_checksum_generic,
+};
+
+static const struct ixgbe_phy_operations phy_ops_82599 = {
+	.identify		= &ixgbe_identify_phy_82599,
+	.identify_sfp		= &ixgbe_identify_module_generic,
+	.init			= &ixgbe_init_phy_ops_82599,
+	.reset			= &ixgbe_reset_phy_generic,
+	.read_reg		= &ixgbe_read_phy_reg_generic,
+	.write_reg		= &ixgbe_write_phy_reg_generic,
+	.setup_link		= &ixgbe_setup_phy_link_generic,
+	.setup_link_speed	= &ixgbe_setup_phy_link_speed_generic,
+	.read_i2c_byte		= &ixgbe_read_i2c_byte_generic,
+	.write_i2c_byte		= &ixgbe_write_i2c_byte_generic,
+	.read_i2c_sff8472	= &ixgbe_read_i2c_sff8472_generic,
+	.read_i2c_eeprom	= &ixgbe_read_i2c_eeprom_generic,
+	.write_i2c_eeprom	= &ixgbe_write_i2c_eeprom_generic,
+	.check_overtemp		= &ixgbe_tn_check_overtemp,
+};
+
+const struct ixgbe_info ixgbe_82599_info = {
+	.mac                    = ixgbe_mac_82599EB,
+	.get_invariants         = &ixgbe_get_invariants_82599,
+	.mac_ops                = &mac_ops_82599,
+	.eeprom_ops             = &eeprom_ops_82599,
+	.phy_ops                = &phy_ops_82599,
+	.mbx_ops                = &mbx_ops_generic,
+	.mvals			= ixgbe_mvals_8259X,
+};
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
new file mode 100644
index 0000000000..878dd8dff5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -0,0 +1,4409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/netdevice.h>
+
+#include "ixgbe.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw);
+static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw);
+static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw);
+static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw);
+static void ixgbe_standby_eeprom(struct ixgbe_hw *hw);
+static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
+					u16 count);
+static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count);
+static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
+static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec);
+static void ixgbe_release_eeprom(struct ixgbe_hw *hw);
+
+static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr);
+static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg);
+static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					     u16 words, u16 *data);
+static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					     u16 words, u16 *data);
+static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+						 u16 offset);
+static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw);
+
+/* Base table for registers values that change by MAC */
+const u32 ixgbe_mvals_8259X[IXGBE_MVALS_IDX_LIMIT] = {
+	IXGBE_MVALS_INIT(8259X)
+};
+
+/**
+ *  ixgbe_device_supports_autoneg_fc - Check if phy supports autoneg flow
+ *  control
+ *  @hw: pointer to hardware structure
+ *
+ *  There are several phys that do not support autoneg flow control. This
+ *  function check the device id to see if the associated phy supports
+ *  autoneg flow control.
+ **/
+bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
+{
+	bool supported = false;
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	switch (hw->phy.media_type) {
+	case ixgbe_media_type_fiber:
+		/* flow control autoneg black list */
+		switch (hw->device_id) {
+		case IXGBE_DEV_ID_X550EM_A_SFP:
+		case IXGBE_DEV_ID_X550EM_A_SFP_N:
+			supported = false;
+			break;
+		default:
+			hw->mac.ops.check_link(hw, &speed, &link_up, false);
+			/* if link is down, assume supported */
+			if (link_up)
+				supported = speed == IXGBE_LINK_SPEED_1GB_FULL;
+			else
+				supported = true;
+		}
+
+		break;
+	case ixgbe_media_type_backplane:
+		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_XFI)
+			supported = false;
+		else
+			supported = true;
+		break;
+	case ixgbe_media_type_copper:
+		/* only some copper devices support flow control autoneg */
+		switch (hw->device_id) {
+		case IXGBE_DEV_ID_82599_T3_LOM:
+		case IXGBE_DEV_ID_X540T:
+		case IXGBE_DEV_ID_X540T1:
+		case IXGBE_DEV_ID_X550T:
+		case IXGBE_DEV_ID_X550T1:
+		case IXGBE_DEV_ID_X550EM_X_10G_T:
+		case IXGBE_DEV_ID_X550EM_A_10G_T:
+		case IXGBE_DEV_ID_X550EM_A_1G_T:
+		case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+			supported = true;
+			break;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
+	}
+
+	if (!supported)
+		hw_dbg(hw, "Device %x does not support flow control autoneg\n",
+		       hw->device_id);
+
+	return supported;
+}
+
+/**
+ *  ixgbe_setup_fc_generic - Set up flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Called at init time to set up flow control.
+ **/
+s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw)
+{
+	s32 ret_val = 0;
+	u32 reg = 0, reg_bp = 0;
+	u16 reg_cu = 0;
+	bool locked = false;
+
+	/*
+	 * Validate the requested mode.  Strict IEEE mode does not allow
+	 * ixgbe_fc_rx_pause because it will cause us to fail at UNH.
+	 */
+	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+		hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+	}
+
+	/*
+	 * 10gig parts do not have a word in the EEPROM to determine the
+	 * default flow control setting, so we explicitly set it to full.
+	 */
+	if (hw->fc.requested_mode == ixgbe_fc_default)
+		hw->fc.requested_mode = ixgbe_fc_full;
+
+	/*
+	 * Set up the 1G and 10G flow control advertisement registers so the
+	 * HW will be able to do fc autoneg once the cable is plugged in.  If
+	 * we link at 10G, the 1G advertisement is harmless and vice versa.
+	 */
+	switch (hw->phy.media_type) {
+	case ixgbe_media_type_backplane:
+		/* some MAC's need RMW protection on AUTOC */
+		ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &reg_bp);
+		if (ret_val)
+			return ret_val;
+
+		fallthrough; /* only backplane uses autoc */
+	case ixgbe_media_type_fiber:
+		reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+
+		break;
+	case ixgbe_media_type_copper:
+		hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE,
+					MDIO_MMD_AN, &reg_cu);
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * The possible values of fc.requested_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *    we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_none:
+		/* Flow control completely disabled by software override. */
+		reg &= ~(IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE);
+		if (hw->phy.media_type == ixgbe_media_type_backplane)
+			reg_bp &= ~(IXGBE_AUTOC_SYM_PAUSE |
+				    IXGBE_AUTOC_ASM_PAUSE);
+		else if (hw->phy.media_type == ixgbe_media_type_copper)
+			reg_cu &= ~(IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE);
+		break;
+	case ixgbe_fc_tx_pause:
+		/*
+		 * Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		reg |= IXGBE_PCS1GANA_ASM_PAUSE;
+		reg &= ~IXGBE_PCS1GANA_SYM_PAUSE;
+		if (hw->phy.media_type == ixgbe_media_type_backplane) {
+			reg_bp |= IXGBE_AUTOC_ASM_PAUSE;
+			reg_bp &= ~IXGBE_AUTOC_SYM_PAUSE;
+		} else if (hw->phy.media_type == ixgbe_media_type_copper) {
+			reg_cu |= IXGBE_TAF_ASM_PAUSE;
+			reg_cu &= ~IXGBE_TAF_SYM_PAUSE;
+		}
+		break;
+	case ixgbe_fc_rx_pause:
+		/*
+		 * Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE, as such we fall
+		 * through to the fc_full statement.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		reg |= IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE;
+		if (hw->phy.media_type == ixgbe_media_type_backplane)
+			reg_bp |= IXGBE_AUTOC_SYM_PAUSE |
+				  IXGBE_AUTOC_ASM_PAUSE;
+		else if (hw->phy.media_type == ixgbe_media_type_copper)
+			reg_cu |= IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE;
+		break;
+	default:
+		hw_dbg(hw, "Flow control param set incorrectly\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	if (hw->mac.type != ixgbe_mac_X540) {
+		/*
+		 * Enable auto-negotiation between the MAC & PHY;
+		 * the MAC will advertise clause 37 flow control.
+		 */
+		IXGBE_WRITE_REG(hw, IXGBE_PCS1GANA, reg);
+		reg = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
+
+		/* Disable AN timeout */
+		if (hw->fc.strict_ieee)
+			reg &= ~IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN;
+
+		IXGBE_WRITE_REG(hw, IXGBE_PCS1GLCTL, reg);
+		hw_dbg(hw, "Set up FC; PCS1GLCTL = 0x%08X\n", reg);
+	}
+
+	/*
+	 * AUTOC restart handles negotiation of 1G and 10G on backplane
+	 * and copper. There is no need to set the PCS1GCTL register.
+	 *
+	 */
+	if (hw->phy.media_type == ixgbe_media_type_backplane) {
+		/* Need the SW/FW semaphore around AUTOC writes if 82599 and
+		 * LESM is on, likewise reset_pipeline requries the lock as
+		 * it also writes AUTOC.
+		 */
+		ret_val = hw->mac.ops.prot_autoc_write(hw, reg_bp, locked);
+		if (ret_val)
+			return ret_val;
+
+	} else if ((hw->phy.media_type == ixgbe_media_type_copper) &&
+		   ixgbe_device_supports_autoneg_fc(hw)) {
+		hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE,
+				      MDIO_MMD_AN, reg_cu);
+	}
+
+	hw_dbg(hw, "Set up FC; IXGBE_AUTOC = 0x%08X\n", reg);
+	return ret_val;
+}
+
+/**
+ *  ixgbe_start_hw_generic - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware by filling the bus info structure and media type, clears
+ *  all on chip counters, initializes receive address registers, multicast
+ *  table, VLAN filter table, calls routine to set up link and flow control
+ *  settings, and leaves transmit and receive units disabled and uninitialized
+ **/
+s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
+{
+	s32 ret_val;
+	u32 ctrl_ext;
+	u16 device_caps;
+
+	/* Set the media type */
+	hw->phy.media_type = hw->mac.ops.get_media_type(hw);
+
+	/* Identify the PHY */
+	hw->phy.ops.identify(hw);
+
+	/* Clear the VLAN filter table */
+	hw->mac.ops.clear_vfta(hw);
+
+	/* Clear statistics registers */
+	hw->mac.ops.clear_hw_cntrs(hw);
+
+	/* Set No Snoop Disable */
+	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+	ctrl_ext |= IXGBE_CTRL_EXT_NS_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Setup flow control if method for doing so */
+	if (hw->mac.ops.setup_fc) {
+		ret_val = hw->mac.ops.setup_fc(hw);
+		if (ret_val)
+			return ret_val;
+	}
+
+	/* Cashe bit indicating need for crosstalk fix */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		hw->mac.ops.get_device_caps(hw, &device_caps);
+		if (device_caps & IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR)
+			hw->need_crosstalk_fix = false;
+		else
+			hw->need_crosstalk_fix = true;
+		break;
+	default:
+		hw->need_crosstalk_fix = false;
+		break;
+	}
+
+	/* Clear adapter stopped flag */
+	hw->adapter_stopped = false;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_start_hw_gen2 - Init sequence for common device family
+ *  @hw: pointer to hw structure
+ *
+ * Performs the init sequence common to the second generation
+ * of 10 GbE devices.
+ * Devices in the second generation:
+ *     82599
+ *     X540
+ **/
+s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
+{
+	u32 i;
+
+	/* Clear the rate limiters */
+	for (i = 0; i < hw->mac.max_tx_queues; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
+		IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0);
+	}
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_hw_generic - Generic hardware initialization
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize the hardware by resetting the hardware, filling the bus info
+ *  structure and media type, clears all on chip counters, initializes receive
+ *  address registers, multicast table, VLAN filter table, calls routine to set
+ *  up link and flow control settings, and leaves transmit and receive units
+ *  disabled and uninitialized
+ **/
+s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
+{
+	s32 status;
+
+	/* Reset the hardware */
+	status = hw->mac.ops.reset_hw(hw);
+
+	if (status == 0) {
+		/* Start the HW */
+		status = hw->mac.ops.start_hw(hw);
+	}
+
+	/* Initialize the LED link active for LED blink support */
+	if (hw->mac.ops.init_led_link_act)
+		hw->mac.ops.init_led_link_act(hw);
+
+	return status;
+}
+
+/**
+ *  ixgbe_clear_hw_cntrs_generic - Generic clear hardware counters
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears all hardware statistics counters by reading them from the hardware
+ *  Statistics counters are clear on read.
+ **/
+s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw)
+{
+	u16 i = 0;
+
+	IXGBE_READ_REG(hw, IXGBE_CRCERRS);
+	IXGBE_READ_REG(hw, IXGBE_ILLERRC);
+	IXGBE_READ_REG(hw, IXGBE_ERRBC);
+	IXGBE_READ_REG(hw, IXGBE_MSPDC);
+	for (i = 0; i < 8; i++)
+		IXGBE_READ_REG(hw, IXGBE_MPC(i));
+
+	IXGBE_READ_REG(hw, IXGBE_MLFC);
+	IXGBE_READ_REG(hw, IXGBE_MRFC);
+	IXGBE_READ_REG(hw, IXGBE_RLEC);
+	IXGBE_READ_REG(hw, IXGBE_LXONTXC);
+	IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
+	if (hw->mac.type >= ixgbe_mac_82599EB) {
+		IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
+		IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
+	} else {
+		IXGBE_READ_REG(hw, IXGBE_LXONRXC);
+		IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
+	}
+
+	for (i = 0; i < 8; i++) {
+		IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
+		IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
+		if (hw->mac.type >= ixgbe_mac_82599EB) {
+			IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
+			IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
+		} else {
+			IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
+			IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
+		}
+	}
+	if (hw->mac.type >= ixgbe_mac_82599EB)
+		for (i = 0; i < 8; i++)
+			IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
+	IXGBE_READ_REG(hw, IXGBE_PRC64);
+	IXGBE_READ_REG(hw, IXGBE_PRC127);
+	IXGBE_READ_REG(hw, IXGBE_PRC255);
+	IXGBE_READ_REG(hw, IXGBE_PRC511);
+	IXGBE_READ_REG(hw, IXGBE_PRC1023);
+	IXGBE_READ_REG(hw, IXGBE_PRC1522);
+	IXGBE_READ_REG(hw, IXGBE_GPRC);
+	IXGBE_READ_REG(hw, IXGBE_BPRC);
+	IXGBE_READ_REG(hw, IXGBE_MPRC);
+	IXGBE_READ_REG(hw, IXGBE_GPTC);
+	IXGBE_READ_REG(hw, IXGBE_GORCL);
+	IXGBE_READ_REG(hw, IXGBE_GORCH);
+	IXGBE_READ_REG(hw, IXGBE_GOTCL);
+	IXGBE_READ_REG(hw, IXGBE_GOTCH);
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		for (i = 0; i < 8; i++)
+			IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+	IXGBE_READ_REG(hw, IXGBE_RUC);
+	IXGBE_READ_REG(hw, IXGBE_RFC);
+	IXGBE_READ_REG(hw, IXGBE_ROC);
+	IXGBE_READ_REG(hw, IXGBE_RJC);
+	IXGBE_READ_REG(hw, IXGBE_MNGPRC);
+	IXGBE_READ_REG(hw, IXGBE_MNGPDC);
+	IXGBE_READ_REG(hw, IXGBE_MNGPTC);
+	IXGBE_READ_REG(hw, IXGBE_TORL);
+	IXGBE_READ_REG(hw, IXGBE_TORH);
+	IXGBE_READ_REG(hw, IXGBE_TPR);
+	IXGBE_READ_REG(hw, IXGBE_TPT);
+	IXGBE_READ_REG(hw, IXGBE_PTC64);
+	IXGBE_READ_REG(hw, IXGBE_PTC127);
+	IXGBE_READ_REG(hw, IXGBE_PTC255);
+	IXGBE_READ_REG(hw, IXGBE_PTC511);
+	IXGBE_READ_REG(hw, IXGBE_PTC1023);
+	IXGBE_READ_REG(hw, IXGBE_PTC1522);
+	IXGBE_READ_REG(hw, IXGBE_MPTC);
+	IXGBE_READ_REG(hw, IXGBE_BPTC);
+	for (i = 0; i < 16; i++) {
+		IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+		IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+		if (hw->mac.type >= ixgbe_mac_82599EB) {
+			IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBRC_H(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC_H(i));
+			IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
+		} else {
+			IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+		}
+	}
+
+	if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X540) {
+		if (hw->phy.id == 0)
+			hw->phy.ops.identify(hw);
+		hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL, MDIO_MMD_PCS, &i);
+		hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH, MDIO_MMD_PCS, &i);
+		hw->phy.ops.read_reg(hw, IXGBE_LDPCECL, MDIO_MMD_PCS, &i);
+		hw->phy.ops.read_reg(hw, IXGBE_LDPCECH, MDIO_MMD_PCS, &i);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_pba_string_generic - Reads part number string from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @pba_num: stores the part number string from the EEPROM
+ *  @pba_num_size: part number string buffer length
+ *
+ *  Reads the part number string from the EEPROM.
+ **/
+s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+				  u32 pba_num_size)
+{
+	s32 ret_val;
+	u16 data;
+	u16 pba_ptr;
+	u16 offset;
+	u16 length;
+
+	if (pba_num == NULL) {
+		hw_dbg(hw, "PBA string buffer was null\n");
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data);
+	if (ret_val) {
+		hw_dbg(hw, "NVM Read Error\n");
+		return ret_val;
+	}
+
+	ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM1_PTR, &pba_ptr);
+	if (ret_val) {
+		hw_dbg(hw, "NVM Read Error\n");
+		return ret_val;
+	}
+
+	/*
+	 * if data is not ptr guard the PBA must be in legacy format which
+	 * means pba_ptr is actually our second data word for the PBA number
+	 * and we can decode it into an ascii string
+	 */
+	if (data != IXGBE_PBANUM_PTR_GUARD) {
+		hw_dbg(hw, "NVM PBA number is not stored as string\n");
+
+		/* we will need 11 characters to store the PBA */
+		if (pba_num_size < 11) {
+			hw_dbg(hw, "PBA string buffer too small\n");
+			return IXGBE_ERR_NO_SPACE;
+		}
+
+		/* extract hex string from data and pba_ptr */
+		pba_num[0] = (data >> 12) & 0xF;
+		pba_num[1] = (data >> 8) & 0xF;
+		pba_num[2] = (data >> 4) & 0xF;
+		pba_num[3] = data & 0xF;
+		pba_num[4] = (pba_ptr >> 12) & 0xF;
+		pba_num[5] = (pba_ptr >> 8) & 0xF;
+		pba_num[6] = '-';
+		pba_num[7] = 0;
+		pba_num[8] = (pba_ptr >> 4) & 0xF;
+		pba_num[9] = pba_ptr & 0xF;
+
+		/* put a null character on the end of our string */
+		pba_num[10] = '\0';
+
+		/* switch all the data but the '-' to hex char */
+		for (offset = 0; offset < 10; offset++) {
+			if (pba_num[offset] < 0xA)
+				pba_num[offset] += '0';
+			else if (pba_num[offset] < 0x10)
+				pba_num[offset] += 'A' - 0xA;
+		}
+
+		return 0;
+	}
+
+	ret_val = hw->eeprom.ops.read(hw, pba_ptr, &length);
+	if (ret_val) {
+		hw_dbg(hw, "NVM Read Error\n");
+		return ret_val;
+	}
+
+	if (length == 0xFFFF || length == 0) {
+		hw_dbg(hw, "NVM PBA number section invalid length\n");
+		return IXGBE_ERR_PBA_SECTION;
+	}
+
+	/* check if pba_num buffer is big enough */
+	if (pba_num_size  < (((u32)length * 2) - 1)) {
+		hw_dbg(hw, "PBA string buffer too small\n");
+		return IXGBE_ERR_NO_SPACE;
+	}
+
+	/* trim pba length from start of string */
+	pba_ptr++;
+	length--;
+
+	for (offset = 0; offset < length; offset++) {
+		ret_val = hw->eeprom.ops.read(hw, pba_ptr + offset, &data);
+		if (ret_val) {
+			hw_dbg(hw, "NVM Read Error\n");
+			return ret_val;
+		}
+		pba_num[offset * 2] = (u8)(data >> 8);
+		pba_num[(offset * 2) + 1] = (u8)(data & 0xFF);
+	}
+	pba_num[offset * 2] = '\0';
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_mac_addr_generic - Generic get MAC address
+ *  @hw: pointer to hardware structure
+ *  @mac_addr: Adapter MAC address
+ *
+ *  Reads the adapter's MAC address from first Receive Address Register (RAR0)
+ *  A reset of the adapter must be performed prior to calling this function
+ *  in order for the MAC address to have been loaded from the EEPROM into RAR0
+ **/
+s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr)
+{
+	u32 rar_high;
+	u32 rar_low;
+	u16 i;
+
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0));
+	rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0));
+
+	for (i = 0; i < 4; i++)
+		mac_addr[i] = (u8)(rar_low >> (i*8));
+
+	for (i = 0; i < 2; i++)
+		mac_addr[i+4] = (u8)(rar_high >> (i*8));
+
+	return 0;
+}
+
+enum ixgbe_bus_width ixgbe_convert_bus_width(u16 link_status)
+{
+	switch (link_status & IXGBE_PCI_LINK_WIDTH) {
+	case IXGBE_PCI_LINK_WIDTH_1:
+		return ixgbe_bus_width_pcie_x1;
+	case IXGBE_PCI_LINK_WIDTH_2:
+		return ixgbe_bus_width_pcie_x2;
+	case IXGBE_PCI_LINK_WIDTH_4:
+		return ixgbe_bus_width_pcie_x4;
+	case IXGBE_PCI_LINK_WIDTH_8:
+		return ixgbe_bus_width_pcie_x8;
+	default:
+		return ixgbe_bus_width_unknown;
+	}
+}
+
+enum ixgbe_bus_speed ixgbe_convert_bus_speed(u16 link_status)
+{
+	switch (link_status & IXGBE_PCI_LINK_SPEED) {
+	case IXGBE_PCI_LINK_SPEED_2500:
+		return ixgbe_bus_speed_2500;
+	case IXGBE_PCI_LINK_SPEED_5000:
+		return ixgbe_bus_speed_5000;
+	case IXGBE_PCI_LINK_SPEED_8000:
+		return ixgbe_bus_speed_8000;
+	default:
+		return ixgbe_bus_speed_unknown;
+	}
+}
+
+/**
+ *  ixgbe_get_bus_info_generic - Generic set PCI bus info
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure
+ **/
+s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw)
+{
+	u16 link_status;
+
+	hw->bus.type = ixgbe_bus_type_pci_express;
+
+	/* Get the negotiated link width and speed from PCI config space */
+	link_status = ixgbe_read_pci_cfg_word(hw, IXGBE_PCI_LINK_STATUS);
+
+	hw->bus.width = ixgbe_convert_bus_width(link_status);
+	hw->bus.speed = ixgbe_convert_bus_speed(link_status);
+
+	hw->mac.ops.set_lan_id(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices
+ *  @hw: pointer to the HW structure
+ *
+ *  Determines the LAN function id by reading memory-mapped registers
+ *  and swaps the port value if requested.
+ **/
+void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw)
+{
+	struct ixgbe_bus_info *bus = &hw->bus;
+	u16 ee_ctrl_4;
+	u32 reg;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_STATUS);
+	bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT;
+	bus->lan_id = bus->func;
+
+	/* check for a port swap */
+	reg = IXGBE_READ_REG(hw, IXGBE_FACTPS(hw));
+	if (reg & IXGBE_FACTPS_LFS)
+		bus->func ^= 0x1;
+
+	/* Get MAC instance from EEPROM for configuring CS4227 */
+	if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) {
+		hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4);
+		bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >>
+				   IXGBE_EE_CTRL_4_INST_ID_SHIFT;
+	}
+}
+
+/**
+ *  ixgbe_stop_adapter_generic - Generic stop Tx/Rx units
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
+ *  disables transmit and receive units. The adapter_stopped flag is used by
+ *  the shared code and drivers to determine if the adapter is in a stopped
+ *  state and should not touch the hardware.
+ **/
+s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
+{
+	u32 reg_val;
+	u16 i;
+
+	/*
+	 * Set the adapter_stopped flag so other driver functions stop touching
+	 * the hardware
+	 */
+	hw->adapter_stopped = true;
+
+	/* Disable the receive unit */
+	hw->mac.ops.disable_rx(hw);
+
+	/* Clear interrupt mask to stop interrupts from being generated */
+	IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
+
+	/* Clear any pending interrupts, flush previous writes */
+	IXGBE_READ_REG(hw, IXGBE_EICR);
+
+	/* Disable the transmit unit.  Each queue must be disabled. */
+	for (i = 0; i < hw->mac.max_tx_queues; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), IXGBE_TXDCTL_SWFLSH);
+
+	/* Disable the receive unit by stopping each queue */
+	for (i = 0; i < hw->mac.max_rx_queues; i++) {
+		reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+		reg_val &= ~IXGBE_RXDCTL_ENABLE;
+		reg_val |= IXGBE_RXDCTL_SWFLSH;
+		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), reg_val);
+	}
+
+	/* flush all queues disables */
+	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(1000, 2000);
+
+	/*
+	 * Prevent the PCI-E bus from hanging by disabling PCI-E primary
+	 * access and verify no pending requests
+	 */
+	return ixgbe_disable_pcie_primary(hw);
+}
+
+/**
+ *  ixgbe_init_led_link_act_generic - Store the LED index link/activity.
+ *  @hw: pointer to hardware structure
+ *
+ *  Store the index for the link active LED. This will be used to support
+ *  blinking the LED.
+ **/
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u32 led_reg, led_mode;
+	u16 i;
+
+	led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	/* Get LED link active from the LEDCTL register */
+	for (i = 0; i < 4; i++) {
+		led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i);
+
+		if ((led_mode & IXGBE_LED_MODE_MASK_BASE) ==
+		    IXGBE_LED_LINK_ACTIVE) {
+			mac->led_link_act = i;
+			return 0;
+		}
+	}
+
+	/* If LEDCTL register does not have the LED link active set, then use
+	 * known MAC defaults.
+	 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_x550em_a:
+		mac->led_link_act = 0;
+		break;
+	case ixgbe_mac_X550EM_x:
+		mac->led_link_act = 1;
+		break;
+	default:
+		mac->led_link_act = 2;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_led_on_generic - Turns on the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to turn on
+ **/
+s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
+	/* To turn on the LED, set mode to ON. */
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_led_off_generic - Turns off the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to turn off
+ **/
+s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
+	/* To turn off the LED, set mode to OFF. */
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_eeprom_params_generic - Initialize EEPROM params
+ *  @hw: pointer to hardware structure
+ *
+ *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ *  ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	u32 eec;
+	u16 eeprom_size;
+
+	if (eeprom->type == ixgbe_eeprom_uninitialized) {
+		eeprom->type = ixgbe_eeprom_none;
+		/* Set default semaphore delay to 10ms which is a well
+		 * tested value */
+		eeprom->semaphore_delay = 10;
+		/* Clear EEPROM page size, it will be initialized as needed */
+		eeprom->word_page_size = 0;
+
+		/*
+		 * Check for EEPROM present first.
+		 * If not present leave as none
+		 */
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+		if (eec & IXGBE_EEC_PRES) {
+			eeprom->type = ixgbe_eeprom_spi;
+
+			/*
+			 * SPI EEPROM is assumed here.  This code would need to
+			 * change if a future EEPROM is not SPI.
+			 */
+			eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+					    IXGBE_EEC_SIZE_SHIFT);
+			eeprom->word_size = BIT(eeprom_size +
+						 IXGBE_EEPROM_WORD_SIZE_SHIFT);
+		}
+
+		if (eec & IXGBE_EEC_ADDR_SIZE)
+			eeprom->address_bits = 16;
+		else
+			eeprom->address_bits = 8;
+		hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: %d\n",
+		       eeprom->type, eeprom->word_size, eeprom->address_bits);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_eeprom_buffer_bit_bang_generic - Write EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to write
+ *  @words: number of words
+ *  @data: 16 bit word(s) to write to EEPROM
+ *
+ *  Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					       u16 words, u16 *data)
+{
+	s32 status;
+	u16 i, count;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0)
+		return IXGBE_ERR_INVALID_ARGUMENT;
+
+	if (offset + words > hw->eeprom.word_size)
+		return IXGBE_ERR_EEPROM;
+
+	/*
+	 * The EEPROM page size cannot be queried from the chip. We do lazy
+	 * initialization. It is worth to do that when we write large buffer.
+	 */
+	if ((hw->eeprom.word_page_size == 0) &&
+	    (words > IXGBE_EEPROM_PAGE_SIZE_MAX))
+		ixgbe_detect_eeprom_page_size_generic(hw, offset);
+
+	/*
+	 * We cannot hold synchronization semaphores for too long
+	 * to avoid other entity starvation. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
+		count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
+			 IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
+		status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset + i,
+							    count, &data[i]);
+
+		if (status != 0)
+			break;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_write_eeprom_buffer_bit_bang - Writes 16 bit word(s) to EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @words: number of word(s)
+ *  @data: 16 bit word(s) to be written to the EEPROM
+ *
+ *  If ixgbe_eeprom_update_checksum is not called after this function, the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					      u16 words, u16 *data)
+{
+	s32 status;
+	u16 word;
+	u16 page_size;
+	u16 i;
+	u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI;
+
+	/* Prepare the EEPROM for writing  */
+	status = ixgbe_acquire_eeprom(hw);
+	if (status)
+		return status;
+
+	if (ixgbe_ready_eeprom(hw) != 0) {
+		ixgbe_release_eeprom(hw);
+		return IXGBE_ERR_EEPROM;
+	}
+
+	for (i = 0; i < words; i++) {
+		ixgbe_standby_eeprom(hw);
+
+		/* Send the WRITE ENABLE command (8 bit opcode) */
+		ixgbe_shift_out_eeprom_bits(hw,
+					    IXGBE_EEPROM_WREN_OPCODE_SPI,
+					    IXGBE_EEPROM_OPCODE_BITS);
+
+		ixgbe_standby_eeprom(hw);
+
+		/* Some SPI eeproms use the 8th address bit embedded
+		 * in the opcode
+		 */
+		if ((hw->eeprom.address_bits == 8) &&
+		    ((offset + i) >= 128))
+			write_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
+
+		/* Send the Write command (8-bit opcode + addr) */
+		ixgbe_shift_out_eeprom_bits(hw, write_opcode,
+					    IXGBE_EEPROM_OPCODE_BITS);
+		ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
+					    hw->eeprom.address_bits);
+
+		page_size = hw->eeprom.word_page_size;
+
+		/* Send the data in burst via SPI */
+		do {
+			word = data[i];
+			word = (word >> 8) | (word << 8);
+			ixgbe_shift_out_eeprom_bits(hw, word, 16);
+
+			if (page_size == 0)
+				break;
+
+			/* do not wrap around page */
+			if (((offset + i) & (page_size - 1)) ==
+			    (page_size - 1))
+				break;
+		} while (++i < words);
+
+		ixgbe_standby_eeprom(hw);
+		usleep_range(10000, 20000);
+	}
+	/* Done with writing - release the EEPROM */
+	ixgbe_release_eeprom(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_eeprom_generic - Writes 16 bit value to EEPROM
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be written to
+ *  @data: 16 bit word to be written to the EEPROM
+ *
+ *  If ixgbe_eeprom_update_checksum is not called after this function, the
+ *  EEPROM will most likely contain an invalid checksum.
+ **/
+s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	hw->eeprom.ops.init_params(hw);
+
+	if (offset >= hw->eeprom.word_size)
+		return IXGBE_ERR_EEPROM;
+
+	return ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data);
+}
+
+/**
+ *  ixgbe_read_eeprom_buffer_bit_bang_generic - Read EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @words: number of word(s)
+ *  @data: read 16 bit words(s) from EEPROM
+ *
+ *  Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					      u16 words, u16 *data)
+{
+	s32 status;
+	u16 i, count;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0)
+		return IXGBE_ERR_INVALID_ARGUMENT;
+
+	if (offset + words > hw->eeprom.word_size)
+		return IXGBE_ERR_EEPROM;
+
+	/*
+	 * We cannot hold synchronization semaphores for too long
+	 * to avoid other entity starvation. However it is more efficient
+	 * to read in bursts than synchronizing access for each word.
+	 */
+	for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) {
+		count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ?
+			 IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i);
+
+		status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset + i,
+							   count, &data[i]);
+
+		if (status)
+			return status;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_eeprom_buffer_bit_bang - Read EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @words: number of word(s)
+ *  @data: read 16 bit word(s) from EEPROM
+ *
+ *  Reads 16 bit word(s) from EEPROM through bit-bang method
+ **/
+static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset,
+					     u16 words, u16 *data)
+{
+	s32 status;
+	u16 word_in;
+	u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI;
+	u16 i;
+
+	/* Prepare the EEPROM for reading  */
+	status = ixgbe_acquire_eeprom(hw);
+	if (status)
+		return status;
+
+	if (ixgbe_ready_eeprom(hw) != 0) {
+		ixgbe_release_eeprom(hw);
+		return IXGBE_ERR_EEPROM;
+	}
+
+	for (i = 0; i < words; i++) {
+		ixgbe_standby_eeprom(hw);
+		/* Some SPI eeproms use the 8th address bit embedded
+		 * in the opcode
+		 */
+		if ((hw->eeprom.address_bits == 8) &&
+		    ((offset + i) >= 128))
+			read_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI;
+
+		/* Send the READ command (opcode + addr) */
+		ixgbe_shift_out_eeprom_bits(hw, read_opcode,
+					    IXGBE_EEPROM_OPCODE_BITS);
+		ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2),
+					    hw->eeprom.address_bits);
+
+		/* Read the data. */
+		word_in = ixgbe_shift_in_eeprom_bits(hw, 16);
+		data[i] = (word_in >> 8) | (word_in << 8);
+	}
+
+	/* End this read operation */
+	ixgbe_release_eeprom(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_eeprom_bit_bang_generic - Read EEPROM word using bit-bang
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be read
+ *  @data: read 16 bit value from EEPROM
+ *
+ *  Reads 16 bit value from EEPROM through bit-bang method
+ **/
+s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+				       u16 *data)
+{
+	hw->eeprom.ops.init_params(hw);
+
+	if (offset >= hw->eeprom.word_size)
+		return IXGBE_ERR_EEPROM;
+
+	return ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
+}
+
+/**
+ *  ixgbe_read_eerd_buffer_generic - Read EEPROM word(s) using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of word in the EEPROM to read
+ *  @words: number of word(s)
+ *  @data: 16 bit word(s) from the EEPROM
+ *
+ *  Reads a 16 bit word(s) from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				   u16 words, u16 *data)
+{
+	u32 eerd;
+	s32 status;
+	u32 i;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0)
+		return IXGBE_ERR_INVALID_ARGUMENT;
+
+	if (offset >= hw->eeprom.word_size)
+		return IXGBE_ERR_EEPROM;
+
+	for (i = 0; i < words; i++) {
+		eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) |
+		       IXGBE_EEPROM_RW_REG_START;
+
+		IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd);
+		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_READ);
+
+		if (status == 0) {
+			data[i] = (IXGBE_READ_REG(hw, IXGBE_EERD) >>
+				   IXGBE_EEPROM_RW_REG_DATA);
+		} else {
+			hw_dbg(hw, "Eeprom read timed out\n");
+			return status;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_detect_eeprom_page_size_generic - Detect EEPROM page size
+ *  @hw: pointer to hardware structure
+ *  @offset: offset within the EEPROM to be used as a scratch pad
+ *
+ *  Discover EEPROM page size by writing marching data at given offset.
+ *  This function is called only when we are writing a new large buffer
+ *  at given offset so the data would be overwritten anyway.
+ **/
+static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw,
+						 u16 offset)
+{
+	u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX];
+	s32 status;
+	u16 i;
+
+	for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++)
+		data[i] = i;
+
+	hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX;
+	status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset,
+					     IXGBE_EEPROM_PAGE_SIZE_MAX, data);
+	hw->eeprom.word_page_size = 0;
+	if (status)
+		return status;
+
+	status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data);
+	if (status)
+		return status;
+
+	/*
+	 * When writing in burst more than the actual page size
+	 * EEPROM address wraps around current page.
+	 */
+	hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX - data[0];
+
+	hw_dbg(hw, "Detected EEPROM page size = %d words.\n",
+	       hw->eeprom.word_page_size);
+	return 0;
+}
+
+/**
+ *  ixgbe_read_eerd_generic - Read EEPROM word using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+	return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data);
+}
+
+/**
+ *  ixgbe_write_eewr_buffer_generic - Write EEPROM word(s) using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @words: number of words
+ *  @data: word(s) write to the EEPROM
+ *
+ *  Write a 16 bit word(s) to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				    u16 words, u16 *data)
+{
+	u32 eewr;
+	s32 status;
+	u16 i;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (words == 0)
+		return IXGBE_ERR_INVALID_ARGUMENT;
+
+	if (offset >= hw->eeprom.word_size)
+		return IXGBE_ERR_EEPROM;
+
+	for (i = 0; i < words; i++) {
+		eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) |
+		       (data[i] << IXGBE_EEPROM_RW_REG_DATA) |
+		       IXGBE_EEPROM_RW_REG_START;
+
+		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
+		if (status) {
+			hw_dbg(hw, "Eeprom write EEWR timed out\n");
+			return status;
+		}
+
+		IXGBE_WRITE_REG(hw, IXGBE_EEWR, eewr);
+
+		status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE);
+		if (status) {
+			hw_dbg(hw, "Eeprom write EEWR timed out\n");
+			return status;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_eewr_generic - Write EEPROM word using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @data: word write to the EEPROM
+ *
+ *  Write a 16 bit word to the EEPROM using the EEWR register.
+ **/
+s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data);
+}
+
+/**
+ *  ixgbe_poll_eerd_eewr_done - Poll EERD read or EEWR write status
+ *  @hw: pointer to hardware structure
+ *  @ee_reg: EEPROM flag for polling
+ *
+ *  Polls the status bit (bit 1) of the EERD or EEWR to determine when the
+ *  read or write is done respectively.
+ **/
+static s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg)
+{
+	u32 i;
+	u32 reg;
+
+	for (i = 0; i < IXGBE_EERD_EEWR_ATTEMPTS; i++) {
+		if (ee_reg == IXGBE_NVM_POLL_READ)
+			reg = IXGBE_READ_REG(hw, IXGBE_EERD);
+		else
+			reg = IXGBE_READ_REG(hw, IXGBE_EEWR);
+
+		if (reg & IXGBE_EEPROM_RW_REG_DONE) {
+			return 0;
+		}
+		udelay(5);
+	}
+	return IXGBE_ERR_EEPROM;
+}
+
+/**
+ *  ixgbe_acquire_eeprom - Acquire EEPROM using bit-bang
+ *  @hw: pointer to hardware structure
+ *
+ *  Prepares EEPROM for access using bit-bang method. This function should
+ *  be called before issuing a command to the EEPROM.
+ **/
+static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw)
+{
+	u32 eec;
+	u32 i;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) != 0)
+		return IXGBE_ERR_SWFW_SYNC;
+
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+
+	/* Request EEPROM Access */
+	eec |= IXGBE_EEC_REQ;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+
+	for (i = 0; i < IXGBE_EEPROM_GRANT_ATTEMPTS; i++) {
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+		if (eec & IXGBE_EEC_GNT)
+			break;
+		udelay(5);
+	}
+
+	/* Release if grant not acquired */
+	if (!(eec & IXGBE_EEC_GNT)) {
+		eec &= ~IXGBE_EEC_REQ;
+		IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+		hw_dbg(hw, "Could not acquire EEPROM grant\n");
+
+		hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+		return IXGBE_ERR_EEPROM;
+	}
+
+	/* Setup EEPROM for Read/Write */
+	/* Clear CS and SK */
+	eec &= ~(IXGBE_EEC_CS | IXGBE_EEC_SK);
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+	return 0;
+}
+
+/**
+ *  ixgbe_get_eeprom_semaphore - Get hardware semaphore
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the hardware semaphores so EEPROM access can occur for bit-bang method
+ **/
+static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw)
+{
+	u32 timeout = 2000;
+	u32 i;
+	u32 swsm;
+
+	/* Get SMBI software semaphore between device drivers first */
+	for (i = 0; i < timeout; i++) {
+		/*
+		 * If the SMBI bit is 0 when we read it, then the bit will be
+		 * set and we have the semaphore
+		 */
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw));
+		if (!(swsm & IXGBE_SWSM_SMBI))
+			break;
+		usleep_range(50, 100);
+	}
+
+	if (i == timeout) {
+		hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore not granted.\n");
+		/* this release is particularly important because our attempts
+		 * above to get the semaphore may have succeeded, and if there
+		 * was a timeout, we should unconditionally clear the semaphore
+		 * bits to free the driver to make progress
+		 */
+		ixgbe_release_eeprom_semaphore(hw);
+
+		usleep_range(50, 100);
+		/* one last try
+		 * If the SMBI bit is 0 when we read it, then the bit will be
+		 * set and we have the semaphore
+		 */
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw));
+		if (swsm & IXGBE_SWSM_SMBI) {
+			hw_dbg(hw, "Software semaphore SMBI between device drivers not granted.\n");
+			return IXGBE_ERR_EEPROM;
+		}
+	}
+
+	/* Now get the semaphore between SW/FW through the SWESMBI bit */
+	for (i = 0; i < timeout; i++) {
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw));
+
+		/* Set the SW EEPROM semaphore bit to request access */
+		swsm |= IXGBE_SWSM_SWESMBI;
+		IXGBE_WRITE_REG(hw, IXGBE_SWSM(hw), swsm);
+
+		/* If we set the bit successfully then we got the
+		 * semaphore.
+		 */
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw));
+		if (swsm & IXGBE_SWSM_SWESMBI)
+			break;
+
+		usleep_range(50, 100);
+	}
+
+	/* Release semaphores and return error if SW EEPROM semaphore
+	 * was not granted because we don't have access to the EEPROM
+	 */
+	if (i >= timeout) {
+		hw_dbg(hw, "SWESMBI Software EEPROM semaphore not granted.\n");
+		ixgbe_release_eeprom_semaphore(hw);
+		return IXGBE_ERR_EEPROM;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_release_eeprom_semaphore - Release hardware semaphore
+ *  @hw: pointer to hardware structure
+ *
+ *  This function clears hardware semaphore bits.
+ **/
+static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw)
+{
+	u32 swsm;
+
+	swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw));
+
+	/* Release both semaphores by writing 0 to the bits SWESMBI and SMBI */
+	swsm &= ~(IXGBE_SWSM_SWESMBI | IXGBE_SWSM_SMBI);
+	IXGBE_WRITE_REG(hw, IXGBE_SWSM(hw), swsm);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ *  ixgbe_ready_eeprom - Polls for EEPROM ready
+ *  @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw)
+{
+	u16 i;
+	u8 spi_stat_reg;
+
+	/*
+	 * Read "Status Register" repeatedly until the LSB is cleared.  The
+	 * EEPROM will signal that the command has been completed by clearing
+	 * bit 0 of the internal status register.  If it's not cleared within
+	 * 5 milliseconds, then error out.
+	 */
+	for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) {
+		ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI,
+					    IXGBE_EEPROM_OPCODE_BITS);
+		spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8);
+		if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI))
+			break;
+
+		udelay(5);
+		ixgbe_standby_eeprom(hw);
+	}
+
+	/*
+	 * On some parts, SPI write time could vary from 0-20mSec on 3.3V
+	 * devices (and only 0-5mSec on 5V devices)
+	 */
+	if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) {
+		hw_dbg(hw, "SPI EEPROM Status error\n");
+		return IXGBE_ERR_EEPROM;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_standby_eeprom - Returns EEPROM to a "standby" state
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_standby_eeprom(struct ixgbe_hw *hw)
+{
+	u32 eec;
+
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+
+	/* Toggle CS to flush commands */
+	eec |= IXGBE_EEC_CS;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+	eec &= ~IXGBE_EEC_CS;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+}
+
+/**
+ *  ixgbe_shift_out_eeprom_bits - Shift data bits out to the EEPROM.
+ *  @hw: pointer to hardware structure
+ *  @data: data to send to the EEPROM
+ *  @count: number of bits to shift out
+ **/
+static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data,
+					u16 count)
+{
+	u32 eec;
+	u32 mask;
+	u32 i;
+
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+
+	/*
+	 * Mask is used to shift "count" bits of "data" out to the EEPROM
+	 * one bit at a time.  Determine the starting bit based on count
+	 */
+	mask = BIT(count - 1);
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * A "1" is shifted out to the EEPROM by setting bit "DI" to a
+		 * "1", and then raising and then lowering the clock (the SK
+		 * bit controls the clock input to the EEPROM).  A "0" is
+		 * shifted out to the EEPROM by setting "DI" to "0" and then
+		 * raising and then lowering the clock.
+		 */
+		if (data & mask)
+			eec |= IXGBE_EEC_DI;
+		else
+			eec &= ~IXGBE_EEC_DI;
+
+		IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+		IXGBE_WRITE_FLUSH(hw);
+
+		udelay(1);
+
+		ixgbe_raise_eeprom_clk(hw, &eec);
+		ixgbe_lower_eeprom_clk(hw, &eec);
+
+		/*
+		 * Shift mask to signify next bit of data to shift in to the
+		 * EEPROM
+		 */
+		mask = mask >> 1;
+	}
+
+	/* We leave the "DI" bit set to "0" when we leave this routine. */
+	eec &= ~IXGBE_EEC_DI;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ *  ixgbe_shift_in_eeprom_bits - Shift data bits in from the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @count: number of bits to shift
+ **/
+static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count)
+{
+	u32 eec;
+	u32 i;
+	u16 data = 0;
+
+	/*
+	 * In order to read a register from the EEPROM, we need to shift
+	 * 'count' bits in from the EEPROM. Bits are "shifted in" by raising
+	 * the clock input to the EEPROM (setting the SK bit), and then reading
+	 * the value of the "DO" bit.  During this "shifting in" process the
+	 * "DI" bit should always be clear.
+	 */
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+
+	eec &= ~(IXGBE_EEC_DO | IXGBE_EEC_DI);
+
+	for (i = 0; i < count; i++) {
+		data = data << 1;
+		ixgbe_raise_eeprom_clk(hw, &eec);
+
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+
+		eec &= ~(IXGBE_EEC_DI);
+		if (eec & IXGBE_EEC_DO)
+			data |= 1;
+
+		ixgbe_lower_eeprom_clk(hw, &eec);
+	}
+
+	return data;
+}
+
+/**
+ *  ixgbe_raise_eeprom_clk - Raises the EEPROM's clock input.
+ *  @hw: pointer to hardware structure
+ *  @eec: EEC register's current value
+ **/
+static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
+{
+	/*
+	 * Raise the clock input to the EEPROM
+	 * (setting the SK bit), then delay
+	 */
+	*eec = *eec | IXGBE_EEC_SK;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), *eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+}
+
+/**
+ *  ixgbe_lower_eeprom_clk - Lowers the EEPROM's clock input.
+ *  @hw: pointer to hardware structure
+ *  @eec: EEC's current value
+ **/
+static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec)
+{
+	/*
+	 * Lower the clock input to the EEPROM (clearing the SK bit), then
+	 * delay
+	 */
+	*eec = *eec & ~IXGBE_EEC_SK;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), *eec);
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(1);
+}
+
+/**
+ *  ixgbe_release_eeprom - Release EEPROM, release semaphores
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_release_eeprom(struct ixgbe_hw *hw)
+{
+	u32 eec;
+
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+
+	eec |= IXGBE_EEC_CS;  /* Pull CS high */
+	eec &= ~IXGBE_EEC_SK; /* Lower SCK */
+
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+	IXGBE_WRITE_FLUSH(hw);
+
+	udelay(1);
+
+	/* Stop requesting EEPROM access */
+	eec &= ~IXGBE_EEC_REQ;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), eec);
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+	/*
+	 * Delay before attempt to obtain semaphore again to allow FW
+	 * access. semaphore_delay is in ms we need us for usleep_range
+	 */
+	usleep_range(hw->eeprom.semaphore_delay * 1000,
+		     hw->eeprom.semaphore_delay * 2000);
+}
+
+/**
+ *  ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw)
+{
+	u16 i;
+	u16 j;
+	u16 checksum = 0;
+	u16 length = 0;
+	u16 pointer = 0;
+	u16 word = 0;
+
+	/* Include 0x0-0x3F in the checksum */
+	for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) {
+		if (hw->eeprom.ops.read(hw, i, &word)) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			break;
+		}
+		checksum += word;
+	}
+
+	/* Include all data from pointers except for the fw pointer */
+	for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) {
+		if (hw->eeprom.ops.read(hw, i, &pointer)) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			return IXGBE_ERR_EEPROM;
+		}
+
+		/* If the pointer seems invalid */
+		if (pointer == 0xFFFF || pointer == 0)
+			continue;
+
+		if (hw->eeprom.ops.read(hw, pointer, &length)) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			return IXGBE_ERR_EEPROM;
+		}
+
+		if (length == 0xFFFF || length == 0)
+			continue;
+
+		for (j = pointer + 1; j <= pointer + length; j++) {
+			if (hw->eeprom.ops.read(hw, j, &word)) {
+				hw_dbg(hw, "EEPROM read failed\n");
+				return IXGBE_ERR_EEPROM;
+			}
+			checksum += word;
+		}
+	}
+
+	checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+	return (s32)checksum;
+}
+
+/**
+ *  ixgbe_validate_eeprom_checksum_generic - Validate EEPROM checksum
+ *  @hw: pointer to hardware structure
+ *  @checksum_val: calculated checksum
+ *
+ *  Performs checksum calculation and validates the EEPROM checksum.  If the
+ *  caller does not need checksum_val, the value can be NULL.
+ **/
+s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+					   u16 *checksum_val)
+{
+	s32 status;
+	u16 checksum;
+	u16 read_checksum = 0;
+
+	/*
+	 * Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+	if (status) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		return status;
+	}
+
+	status = hw->eeprom.ops.calc_checksum(hw);
+	if (status < 0)
+		return status;
+
+	checksum = (u16)(status & 0xffff);
+
+	status = hw->eeprom.ops.read(hw, IXGBE_EEPROM_CHECKSUM, &read_checksum);
+	if (status) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		return status;
+	}
+
+	/* Verify read checksum from EEPROM is the same as
+	 * calculated checksum
+	 */
+	if (read_checksum != checksum)
+		status = IXGBE_ERR_EEPROM_CHECKSUM;
+
+	/* If the user cares, return the calculated checksum */
+	if (checksum_val)
+		*checksum_val = checksum;
+
+	return status;
+}
+
+/**
+ *  ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 checksum;
+
+	/*
+	 * Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+	if (status) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		return status;
+	}
+
+	status = hw->eeprom.ops.calc_checksum(hw);
+	if (status < 0)
+		return status;
+
+	checksum = (u16)(status & 0xffff);
+
+	status = hw->eeprom.ops.write(hw, IXGBE_EEPROM_CHECKSUM, checksum);
+
+	return status;
+}
+
+/**
+ *  ixgbe_set_rar_generic - Set Rx address register
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *  @addr: Address to put into receive address register
+ *  @vmdq: VMDq "set" or "pool" index
+ *  @enable_addr: set flag that address is active
+ *
+ *  Puts an ethernet address into a receive address register.
+ **/
+s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+			  u32 enable_addr)
+{
+	u32 rar_low, rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (index >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", index);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	/* setup VMDq pool selection before this RAR gets enabled */
+	hw->mac.ops.set_vmdq(hw, index, vmdq);
+
+	/*
+	 * HW expects these in little endian so we reverse the byte
+	 * order from network order (big endian) to little endian
+	 */
+	rar_low = ((u32)addr[0] |
+		   ((u32)addr[1] << 8) |
+		   ((u32)addr[2] << 16) |
+		   ((u32)addr[3] << 24));
+	/*
+	 * Some parts put the VMDq setting in the extra RAH bits,
+	 * so save everything except the lower 16 bits that hold part
+	 * of the address and the address valid bit.
+	 */
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
+	rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
+	rar_high |= ((u32)addr[4] | ((u32)addr[5] << 8));
+
+	if (enable_addr != 0)
+		rar_high |= IXGBE_RAH_AV;
+
+	/* Record lower 32 bits of MAC address and then make
+	 * sure that write is flushed to hardware before writing
+	 * the upper 16 bits and setting the valid bit.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low);
+	IXGBE_WRITE_FLUSH(hw);
+	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_rar_generic - Remove Rx address register
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *
+ *  Clears an ethernet address from a receive address register.
+ **/
+s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 rar_high;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (index >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", index);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	/*
+	 * Some parts put the VMDq setting in the extra RAH bits,
+	 * so save everything except the lower 16 bits that hold part
+	 * of the address and the address valid bit.
+	 */
+	rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index));
+	rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV);
+
+	/* Clear the address valid bit and upper 16 bits of the address
+	 * before clearing the lower bits. This way we aren't updating
+	 * a live filter.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high);
+	IXGBE_WRITE_FLUSH(hw);
+	IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0);
+
+	/* clear VMDq pool/queue selection for this RAR */
+	hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_rx_addrs_generic - Initializes receive address filters.
+ *  @hw: pointer to hardware structure
+ *
+ *  Places the MAC address in receive address register 0 and clears the rest
+ *  of the receive address registers. Clears the multicast table. Assumes
+ *  the receiver is in reset when the routine is called.
+ **/
+s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw)
+{
+	u32 i;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/*
+	 * If the current mac address is valid, assume it is a software override
+	 * to the permanent address.
+	 * Otherwise, use the permanent address from the eeprom.
+	 */
+	if (!is_valid_ether_addr(hw->mac.addr)) {
+		/* Get the MAC address from the RAR0 for later reference */
+		hw->mac.ops.get_mac_addr(hw, hw->mac.addr);
+
+		hw_dbg(hw, " Keeping Current RAR0 Addr =%pM\n", hw->mac.addr);
+	} else {
+		/* Setup the receive address. */
+		hw_dbg(hw, "Overriding MAC Address in RAR[0]\n");
+		hw_dbg(hw, " New MAC Addr =%pM\n", hw->mac.addr);
+
+		hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV);
+	}
+
+	/*  clear VMDq pool/queue selection for RAR 0 */
+	hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL);
+
+	hw->addr_ctrl.overflow_promisc = 0;
+
+	hw->addr_ctrl.rar_used_count = 1;
+
+	/* Zero out the other receive addresses. */
+	hw_dbg(hw, "Clearing RAR[1-%d]\n", rar_entries - 1);
+	for (i = 1; i < rar_entries; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0);
+	}
+
+	/* Clear the MTA */
+	hw->addr_ctrl.mta_in_use = 0;
+	IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
+
+	hw_dbg(hw, " Clearing MTA\n");
+	for (i = 0; i < hw->mac.mcft_size; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0);
+
+	if (hw->mac.ops.init_uta_tables)
+		hw->mac.ops.init_uta_tables(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_mta_vector - Determines bit-vector in multicast table to set
+ *  @hw: pointer to hardware structure
+ *  @mc_addr: the multicast address
+ *
+ *  Extracts the 12 bits, from a multicast address, to determine which
+ *  bit-vector to set in the multicast table. The hardware uses 12 bits, from
+ *  incoming rx multicast addresses, to determine the bit-vector to check in
+ *  the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
+ *  by the MO field of the MCSTCTRL. The MO field is set during initialization
+ *  to mc_filter_type.
+ **/
+static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
+{
+	u32 vector = 0;
+
+	switch (hw->mac.mc_filter_type) {
+	case 0:   /* use bits [47:36] of the address */
+		vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+		break;
+	case 1:   /* use bits [46:35] of the address */
+		vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+		break;
+	case 2:   /* use bits [45:34] of the address */
+		vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+		break;
+	case 3:   /* use bits [43:32] of the address */
+		vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+		break;
+	default:  /* Invalid mc_filter_type */
+		hw_dbg(hw, "MC filter type param set incorrectly\n");
+		break;
+	}
+
+	/* vector can only be 12-bits or boundary will be exceeded */
+	vector &= 0xFFF;
+	return vector;
+}
+
+/**
+ *  ixgbe_set_mta - Set bit-vector in multicast table
+ *  @hw: pointer to hardware structure
+ *  @mc_addr: Multicast address
+ *
+ *  Sets the bit-vector in the multicast table.
+ **/
+static void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr)
+{
+	u32 vector;
+	u32 vector_bit;
+	u32 vector_reg;
+
+	hw->addr_ctrl.mta_in_use++;
+
+	vector = ixgbe_mta_vector(hw, mc_addr);
+	hw_dbg(hw, " bit-vector = 0x%03X\n", vector);
+
+	/*
+	 * The MTA is a register array of 128 32-bit registers. It is treated
+	 * like an array of 4096 bits.  We want to set bit
+	 * BitArray[vector_value]. So we figure out what register the bit is
+	 * in, read it, OR in the new bit, then write back the new value.  The
+	 * register is determined by the upper 7 bits of the vector value and
+	 * the bit within that register are determined by the lower 5 bits of
+	 * the value.
+	 */
+	vector_reg = (vector >> 5) & 0x7F;
+	vector_bit = vector & 0x1F;
+	hw->mac.mta_shadow[vector_reg] |= BIT(vector_bit);
+}
+
+/**
+ *  ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses
+ *  @hw: pointer to hardware structure
+ *  @netdev: pointer to net device structure
+ *
+ *  The given list replaces any existing list. Clears the MC addrs from receive
+ *  address registers and the multicast table. Uses unused receive address
+ *  registers for the first multicast addresses, and hashes the rest into the
+ *  multicast table.
+ **/
+s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
+				      struct net_device *netdev)
+{
+	struct netdev_hw_addr *ha;
+	u32 i;
+
+	/*
+	 * Set the new number of MC addresses that we are being requested to
+	 * use.
+	 */
+	hw->addr_ctrl.num_mc_addrs = netdev_mc_count(netdev);
+	hw->addr_ctrl.mta_in_use = 0;
+
+	/* Clear mta_shadow */
+	hw_dbg(hw, " Clearing MTA\n");
+	memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow));
+
+	/* Update mta shadow */
+	netdev_for_each_mc_addr(ha, netdev) {
+		hw_dbg(hw, " Adding the multicast addresses:\n");
+		ixgbe_set_mta(hw, ha->addr);
+	}
+
+	/* Enable mta */
+	for (i = 0; i < hw->mac.mcft_size; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_MTA(0), i,
+				      hw->mac.mta_shadow[i]);
+
+	if (hw->addr_ctrl.mta_in_use > 0)
+		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL,
+				IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type);
+
+	hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n");
+	return 0;
+}
+
+/**
+ *  ixgbe_enable_mc_generic - Enable multicast address in RAR
+ *  @hw: pointer to hardware structure
+ *
+ *  Enables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
+
+	if (a->mta_in_use > 0)
+		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE |
+				hw->mac.mc_filter_type);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_disable_mc_generic - Disable multicast address in RAR
+ *  @hw: pointer to hardware structure
+ *
+ *  Disables multicast address in RAR and the use of the multicast hash table.
+ **/
+s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_addr_filter_info *a = &hw->addr_ctrl;
+
+	if (a->mta_in_use > 0)
+		IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_fc_enable_generic - Enable flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to the current settings.
+ **/
+s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
+{
+	u32 mflcn_reg, fccfg_reg;
+	u32 reg;
+	u32 fcrtl, fcrth;
+	int i;
+
+	/* Validate the water mark configuration. */
+	if (!hw->fc.pause_time)
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+
+	/* Low water mark of zero causes XOFF floods */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			if (!hw->fc.low_water[i] ||
+			    hw->fc.low_water[i] >= hw->fc.high_water[i]) {
+				hw_dbg(hw, "Invalid water mark configuration\n");
+				return IXGBE_ERR_INVALID_LINK_SETTINGS;
+			}
+		}
+	}
+
+	/* Negotiate the fc mode to use */
+	hw->mac.ops.fc_autoneg(hw);
+
+	/* Disable any previous flow control settings */
+	mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+	mflcn_reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE);
+
+	fccfg_reg = IXGBE_READ_REG(hw, IXGBE_FCCFG);
+	fccfg_reg &= ~(IXGBE_FCCFG_TFCE_802_3X | IXGBE_FCCFG_TFCE_PRIORITY);
+
+	/*
+	 * The possible values of fc.current_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *    we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.current_mode) {
+	case ixgbe_fc_none:
+		/*
+		 * Flow control is disabled by software override or autoneg.
+		 * The code below will actually disable it in the HW.
+		 */
+		break;
+	case ixgbe_fc_rx_pause:
+		/*
+		 * Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+		mflcn_reg |= IXGBE_MFLCN_RFCE;
+		break;
+	case ixgbe_fc_tx_pause:
+		/*
+		 * Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
+		break;
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		mflcn_reg |= IXGBE_MFLCN_RFCE;
+		fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X;
+		break;
+	default:
+		hw_dbg(hw, "Flow control param set incorrectly\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	/* Set 802.3x based flow control settings. */
+	mflcn_reg |= IXGBE_MFLCN_DPF;
+	IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg);
+	IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg);
+
+	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if ((hw->fc.current_mode & ixgbe_fc_tx_pause) &&
+		    hw->fc.high_water[i]) {
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
+			fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+		} else {
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0);
+			/*
+			 * In order to prevent Tx hangs when the internal Tx
+			 * switch is enabled we must set the high water mark
+			 * to the Rx packet buffer size - 24KB.  This allows
+			 * the Tx switch to function even under heavy Rx
+			 * workloads.
+			 */
+			fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 24576;
+		}
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth);
+	}
+
+	/* Configure pause time (2 TCs per register) */
+	reg = hw->fc.pause_time * 0x00010001U;
+	for (i = 0; i < (MAX_TRAFFIC_CLASS / 2); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_negotiate_fc - Negotiate flow control
+ *  @hw: pointer to hardware structure
+ *  @adv_reg: flow control advertised settings
+ *  @lp_reg: link partner's flow control settings
+ *  @adv_sym: symmetric pause bit in advertisement
+ *  @adv_asm: asymmetric pause bit in advertisement
+ *  @lp_sym: symmetric pause bit in link partner advertisement
+ *  @lp_asm: asymmetric pause bit in link partner advertisement
+ *
+ *  Find the intersection between advertised settings and link partner's
+ *  advertised settings
+ **/
+s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
+{
+	if ((!(adv_reg)) ||  (!(lp_reg)))
+		return IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+	if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) {
+		/*
+		 * Now we need to check if the user selected Rx ONLY
+		 * of pause frames.  In this case, we had to advertise
+		 * FULL flow control because we could not advertise RX
+		 * ONLY. Hence, we must now check to see if we need to
+		 * turn OFF the TRANSMISSION of PAUSE frames.
+		 */
+		if (hw->fc.requested_mode == ixgbe_fc_full) {
+			hw->fc.current_mode = ixgbe_fc_full;
+			hw_dbg(hw, "Flow Control = FULL.\n");
+		} else {
+			hw->fc.current_mode = ixgbe_fc_rx_pause;
+			hw_dbg(hw, "Flow Control=RX PAUSE frames only\n");
+		}
+	} else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+		   (lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+		hw->fc.current_mode = ixgbe_fc_tx_pause;
+		hw_dbg(hw, "Flow Control = TX PAUSE frames only.\n");
+	} else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) &&
+		   !(lp_reg & lp_sym) && (lp_reg & lp_asm)) {
+		hw->fc.current_mode = ixgbe_fc_rx_pause;
+		hw_dbg(hw, "Flow Control = RX PAUSE frames only.\n");
+	} else {
+		hw->fc.current_mode = ixgbe_fc_none;
+		hw_dbg(hw, "Flow Control = NONE.\n");
+	}
+	return 0;
+}
+
+/**
+ *  ixgbe_fc_autoneg_fiber - Enable flow control on 1 gig fiber
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according on 1 gig fiber.
+ **/
+static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw)
+{
+	u32 pcs_anadv_reg, pcs_lpab_reg, linkstat;
+	s32 ret_val;
+
+	/*
+	 * On multispeed fiber at 1g, bail out if
+	 * - link is up but AN did not complete, or if
+	 * - link is up and AN completed but timed out
+	 */
+
+	linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
+	if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) ||
+	    (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1))
+		return IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+	pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+	pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
+
+	ret_val =  ixgbe_negotiate_fc(hw, pcs_anadv_reg,
+			       pcs_lpab_reg, IXGBE_PCS1GANA_SYM_PAUSE,
+			       IXGBE_PCS1GANA_ASM_PAUSE,
+			       IXGBE_PCS1GANA_SYM_PAUSE,
+			       IXGBE_PCS1GANA_ASM_PAUSE);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_fc_autoneg_backplane - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to IEEE clause 37.
+ **/
+static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw)
+{
+	u32 links2, anlp1_reg, autoc_reg, links;
+	s32 ret_val;
+
+	/*
+	 * On backplane, bail out if
+	 * - backplane autoneg was not completed, or if
+	 * - we are 82599 and link partner is not AN enabled
+	 */
+	links = IXGBE_READ_REG(hw, IXGBE_LINKS);
+	if ((links & IXGBE_LINKS_KX_AN_COMP) == 0)
+		return IXGBE_ERR_FC_NOT_NEGOTIATED;
+
+	if (hw->mac.type == ixgbe_mac_82599EB) {
+		links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2);
+		if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0)
+			return IXGBE_ERR_FC_NOT_NEGOTIATED;
+	}
+	/*
+	 * Read the 10g AN autoc and LP ability registers and resolve
+	 * local flow control settings accordingly
+	 */
+	autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+
+	ret_val = ixgbe_negotiate_fc(hw, autoc_reg,
+		anlp1_reg, IXGBE_AUTOC_SYM_PAUSE, IXGBE_AUTOC_ASM_PAUSE,
+		IXGBE_ANLP1_SYM_PAUSE, IXGBE_ANLP1_ASM_PAUSE);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_fc_autoneg_copper - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ *
+ *  Enable flow control according to IEEE clause 37.
+ **/
+static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw)
+{
+	u16 technology_ability_reg = 0;
+	u16 lp_technology_ability_reg = 0;
+
+	hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE,
+			     MDIO_MMD_AN,
+			     &technology_ability_reg);
+	hw->phy.ops.read_reg(hw, MDIO_AN_LPA,
+			     MDIO_MMD_AN,
+			     &lp_technology_ability_reg);
+
+	return ixgbe_negotiate_fc(hw, (u32)technology_ability_reg,
+				  (u32)lp_technology_ability_reg,
+				  IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE,
+				  IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE);
+}
+
+/**
+ *  ixgbe_fc_autoneg - Configure flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Compares our advertised flow control capabilities to those advertised by
+ *  our link partner, and determines the proper flow control mode to use.
+ **/
+void ixgbe_fc_autoneg(struct ixgbe_hw *hw)
+{
+	s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED;
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	/*
+	 * AN should have completed when the cable was plugged in.
+	 * Look for reasons to bail out.  Bail out if:
+	 * - FC autoneg is disabled, or if
+	 * - link is not up.
+	 *
+	 * Since we're being called from an LSC, link is already known to be up.
+	 * So use link_up_wait_to_complete=false.
+	 */
+	if (hw->fc.disable_fc_autoneg)
+		goto out;
+
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+	if (!link_up)
+		goto out;
+
+	switch (hw->phy.media_type) {
+	/* Autoneg flow control on fiber adapters */
+	case ixgbe_media_type_fiber:
+		if (speed == IXGBE_LINK_SPEED_1GB_FULL)
+			ret_val = ixgbe_fc_autoneg_fiber(hw);
+		break;
+
+	/* Autoneg flow control on backplane adapters */
+	case ixgbe_media_type_backplane:
+		ret_val = ixgbe_fc_autoneg_backplane(hw);
+		break;
+
+	/* Autoneg flow control on copper adapters */
+	case ixgbe_media_type_copper:
+		if (ixgbe_device_supports_autoneg_fc(hw))
+			ret_val = ixgbe_fc_autoneg_copper(hw);
+		break;
+
+	default:
+		break;
+	}
+
+out:
+	if (ret_val == 0) {
+		hw->fc.fc_was_autonegged = true;
+	} else {
+		hw->fc.fc_was_autonegged = false;
+		hw->fc.current_mode = hw->fc.requested_mode;
+	}
+}
+
+/**
+ * ixgbe_pcie_timeout_poll - Return number of times to poll for completion
+ * @hw: pointer to hardware structure
+ *
+ * System-wide timeout range is encoded in PCIe Device Control2 register.
+ *
+ *  Add 10% to specified maximum and return the number of times to poll for
+ *  completion timeout, in units of 100 microsec.  Never return less than
+ *  800 = 80 millisec.
+ **/
+static u32 ixgbe_pcie_timeout_poll(struct ixgbe_hw *hw)
+{
+	s16 devctl2;
+	u32 pollcnt;
+
+	devctl2 = ixgbe_read_pci_cfg_word(hw, IXGBE_PCI_DEVICE_CONTROL2);
+	devctl2 &= IXGBE_PCIDEVCTRL2_TIMEO_MASK;
+
+	switch (devctl2) {
+	case IXGBE_PCIDEVCTRL2_65_130ms:
+		 pollcnt = 1300;         /* 130 millisec */
+		break;
+	case IXGBE_PCIDEVCTRL2_260_520ms:
+		pollcnt = 5200;         /* 520 millisec */
+		break;
+	case IXGBE_PCIDEVCTRL2_1_2s:
+		pollcnt = 20000;        /* 2 sec */
+		break;
+	case IXGBE_PCIDEVCTRL2_4_8s:
+		pollcnt = 80000;        /* 8 sec */
+		break;
+	case IXGBE_PCIDEVCTRL2_17_34s:
+		pollcnt = 34000;        /* 34 sec */
+		break;
+	case IXGBE_PCIDEVCTRL2_50_100us:        /* 100 microsecs */
+	case IXGBE_PCIDEVCTRL2_1_2ms:           /* 2 millisecs */
+	case IXGBE_PCIDEVCTRL2_16_32ms:         /* 32 millisec */
+	case IXGBE_PCIDEVCTRL2_16_32ms_def:     /* 32 millisec default */
+	default:
+		pollcnt = 800;          /* 80 millisec minimum */
+		break;
+	}
+
+	/* add 10% to spec maximum */
+	return (pollcnt * 11) / 10;
+}
+
+/**
+ *  ixgbe_disable_pcie_primary - Disable PCI-express primary access
+ *  @hw: pointer to hardware structure
+ *
+ *  Disables PCI-Express primary access and verifies there are no pending
+ *  requests. IXGBE_ERR_PRIMARY_REQUESTS_PENDING is returned if primary disable
+ *  bit hasn't caused the primary requests to be disabled, else 0
+ *  is returned signifying primary requests disabled.
+ **/
+static s32 ixgbe_disable_pcie_primary(struct ixgbe_hw *hw)
+{
+	u32 i, poll;
+	u16 value;
+
+	/* Always set this bit to ensure any future transactions are blocked */
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS);
+
+	/* Poll for bit to read as set */
+	for (i = 0; i < IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT; i++) {
+		if (IXGBE_READ_REG(hw, IXGBE_CTRL) & IXGBE_CTRL_GIO_DIS)
+			break;
+		usleep_range(100, 120);
+	}
+	if (i >= IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT) {
+		hw_dbg(hw, "GIO disable did not set - requesting resets\n");
+		goto gio_disable_fail;
+	}
+
+	/* Exit if primary requests are blocked */
+	if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO) ||
+	    ixgbe_removed(hw->hw_addr))
+		return 0;
+
+	/* Poll for primary request bit to clear */
+	for (i = 0; i < IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT; i++) {
+		udelay(100);
+		if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO))
+			return 0;
+	}
+
+	/*
+	 * Two consecutive resets are required via CTRL.RST per datasheet
+	 * 5.2.5.3.2 Primary Disable.  We set a flag to inform the reset routine
+	 * of this need.  The first reset prevents new primary requests from
+	 * being issued by our device.  We then must wait 1usec or more for any
+	 * remaining completions from the PCIe bus to trickle in, and then reset
+	 * again to clear out any effects they may have had on our device.
+	 */
+	hw_dbg(hw, "GIO Primary Disable bit didn't clear - requesting resets\n");
+gio_disable_fail:
+	hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+
+	if (hw->mac.type >= ixgbe_mac_X550)
+		return 0;
+
+	/*
+	 * Before proceeding, make sure that the PCIe block does not have
+	 * transactions pending.
+	 */
+	poll = ixgbe_pcie_timeout_poll(hw);
+	for (i = 0; i < poll; i++) {
+		udelay(100);
+		value = ixgbe_read_pci_cfg_word(hw, IXGBE_PCI_DEVICE_STATUS);
+		if (ixgbe_removed(hw->hw_addr))
+			return 0;
+		if (!(value & IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING))
+			return 0;
+	}
+
+	hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n");
+	return IXGBE_ERR_PRIMARY_REQUESTS_PENDING;
+}
+
+/**
+ *  ixgbe_acquire_swfw_sync - Acquire SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to acquire
+ *
+ *  Acquires the SWFW semaphore through the GSSR register for the specified
+ *  function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask)
+{
+	u32 gssr = 0;
+	u32 swmask = mask;
+	u32 fwmask = mask << 5;
+	u32 timeout = 200;
+	u32 i;
+
+	for (i = 0; i < timeout; i++) {
+		/*
+		 * SW NVM semaphore bit is used for access to all
+		 * SW_FW_SYNC bits (not just NVM)
+		 */
+		if (ixgbe_get_eeprom_semaphore(hw))
+			return IXGBE_ERR_SWFW_SYNC;
+
+		gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
+		if (!(gssr & (fwmask | swmask))) {
+			gssr |= swmask;
+			IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
+			ixgbe_release_eeprom_semaphore(hw);
+			return 0;
+		} else {
+			/* Resource is currently in use by FW or SW */
+			ixgbe_release_eeprom_semaphore(hw);
+			usleep_range(5000, 10000);
+		}
+	}
+
+	/* If time expired clear the bits holding the lock and retry */
+	if (gssr & (fwmask | swmask))
+		ixgbe_release_swfw_sync(hw, gssr & (fwmask | swmask));
+
+	usleep_range(5000, 10000);
+	return IXGBE_ERR_SWFW_SYNC;
+}
+
+/**
+ *  ixgbe_release_swfw_sync - Release SWFW semaphore
+ *  @hw: pointer to hardware structure
+ *  @mask: Mask to specify which semaphore to release
+ *
+ *  Releases the SWFW semaphore through the GSSR register for the specified
+ *  function (CSR, PHY0, PHY1, EEPROM, Flash)
+ **/
+void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u32 mask)
+{
+	u32 gssr;
+	u32 swmask = mask;
+
+	ixgbe_get_eeprom_semaphore(hw);
+
+	gssr = IXGBE_READ_REG(hw, IXGBE_GSSR);
+	gssr &= ~swmask;
+	IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr);
+
+	ixgbe_release_eeprom_semaphore(hw);
+}
+
+/**
+ * prot_autoc_read_generic - Hides MAC differences needed for AUTOC read
+ * @hw: pointer to hardware structure
+ * @reg_val: Value we read from AUTOC
+ * @locked: bool to indicate whether the SW/FW lock should be taken.  Never
+ *	    true in this the generic case.
+ *
+ * The default case requires no protection so just to the register read.
+ **/
+s32 prot_autoc_read_generic(struct ixgbe_hw *hw, bool *locked, u32 *reg_val)
+{
+	*locked = false;
+	*reg_val = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	return 0;
+}
+
+/**
+ * prot_autoc_write_generic - Hides MAC differences needed for AUTOC write
+ * @hw: pointer to hardware structure
+ * @reg_val: value to write to AUTOC
+ * @locked: bool to indicate whether the SW/FW lock was already taken by
+ *	    previous read.
+ **/
+s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked)
+{
+	IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_val);
+	return 0;
+}
+
+/**
+ *  ixgbe_disable_rx_buff_generic - Stops the receive data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Stops the receive data path and waits for the HW to internally
+ *  empty the Rx security block.
+ **/
+s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw)
+{
+#define IXGBE_MAX_SECRX_POLL 40
+	int i;
+	int secrxreg;
+
+	secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	secrxreg |= IXGBE_SECRXCTRL_RX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
+	for (i = 0; i < IXGBE_MAX_SECRX_POLL; i++) {
+		secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT);
+		if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY)
+			break;
+		else
+			/* Use interrupt-safe sleep just in case */
+			udelay(1000);
+	}
+
+	/* For informational purposes only */
+	if (i >= IXGBE_MAX_SECRX_POLL)
+		hw_dbg(hw, "Rx unit being enabled before security path fully disabled. Continuing with init.\n");
+
+	return 0;
+
+}
+
+/**
+ *  ixgbe_enable_rx_buff_generic - Enables the receive data path
+ *  @hw: pointer to hardware structure
+ *
+ *  Enables the receive data path
+ **/
+s32 ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw)
+{
+	u32 secrxreg;
+
+	secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_enable_rx_dma_generic - Enable the Rx DMA unit
+ *  @hw: pointer to hardware structure
+ *  @regval: register value to write to RXCTRL
+ *
+ *  Enables the Rx DMA unit
+ **/
+s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval)
+{
+	if (regval & IXGBE_RXCTRL_RXEN)
+		hw->mac.ops.enable_rx(hw);
+	else
+		hw->mac.ops.disable_rx(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_blink_led_start_generic - Blink LED based on index.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to blink
+ **/
+s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index)
+{
+	ixgbe_link_speed speed = 0;
+	bool link_up = false;
+	u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	bool locked = false;
+	s32 ret_val;
+
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
+	/*
+	 * Link must be up to auto-blink the LEDs;
+	 * Force it if link is down.
+	 */
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+
+	if (!link_up) {
+		ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg);
+		if (ret_val)
+			return ret_val;
+
+		autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+		autoc_reg |= IXGBE_AUTOC_FLU;
+
+		ret_val = hw->mac.ops.prot_autoc_write(hw, autoc_reg, locked);
+		if (ret_val)
+			return ret_val;
+
+		IXGBE_WRITE_FLUSH(hw);
+
+		usleep_range(10000, 20000);
+	}
+
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg |= IXGBE_LED_BLINK(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_blink_led_stop_generic - Stop blinking LED based on index.
+ *  @hw: pointer to hardware structure
+ *  @index: led number to stop blinking
+ **/
+s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index)
+{
+	u32 autoc_reg = 0;
+	u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	bool locked = false;
+	s32 ret_val;
+
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
+	ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg);
+	if (ret_val)
+		return ret_val;
+
+	autoc_reg &= ~IXGBE_AUTOC_FLU;
+	autoc_reg |= IXGBE_AUTOC_AN_RESTART;
+
+	ret_val = hw->mac.ops.prot_autoc_write(hw, autoc_reg, locked);
+	if (ret_val)
+		return ret_val;
+
+	led_reg &= ~IXGBE_LED_MODE_MASK(index);
+	led_reg &= ~IXGBE_LED_BLINK(index);
+	led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_san_mac_addr_offset - Get SAN MAC address offset from the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @san_mac_offset: SAN MAC address offset
+ *
+ *  This function will read the EEPROM location for the SAN MAC address
+ *  pointer, and returns the value at that location.  This is used in both
+ *  get and set mac_addr routines.
+ **/
+static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw,
+					u16 *san_mac_offset)
+{
+	s32 ret_val;
+
+	/*
+	 * First read the EEPROM pointer to see if the MAC addresses are
+	 * available.
+	 */
+	ret_val = hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR,
+				      san_mac_offset);
+	if (ret_val)
+		hw_err(hw, "eeprom read at offset %d failed\n",
+		       IXGBE_SAN_MAC_ADDR_PTR);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbe_get_san_mac_addr_generic - SAN MAC address retrieval from the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @san_mac_addr: SAN MAC address
+ *
+ *  Reads the SAN MAC address from the EEPROM, if it's available.  This is
+ *  per-port, so set_lan_id() must be called before reading the addresses.
+ *  set_lan_id() is called by identify_sfp(), but this cannot be relied
+ *  upon for non-SFP connections, so we must call it here.
+ **/
+s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr)
+{
+	u16 san_mac_data, san_mac_offset;
+	u8 i;
+	s32 ret_val;
+
+	/*
+	 * First read the EEPROM pointer to see if the MAC addresses are
+	 * available.  If they're not, no point in calling set_lan_id() here.
+	 */
+	ret_val = ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset);
+	if (ret_val || san_mac_offset == 0 || san_mac_offset == 0xFFFF)
+
+		goto san_mac_addr_clr;
+
+	/* make sure we know which port we need to program */
+	hw->mac.ops.set_lan_id(hw);
+	/* apply the port offset to the address offset */
+	(hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) :
+			 (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET);
+	for (i = 0; i < 3; i++) {
+		ret_val = hw->eeprom.ops.read(hw, san_mac_offset,
+					      &san_mac_data);
+		if (ret_val) {
+			hw_err(hw, "eeprom read at offset %d failed\n",
+			       san_mac_offset);
+			goto san_mac_addr_clr;
+		}
+		san_mac_addr[i * 2] = (u8)(san_mac_data);
+		san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8);
+		san_mac_offset++;
+	}
+	return 0;
+
+san_mac_addr_clr:
+	/* No addresses available in this EEPROM.  It's not necessarily an
+	 * error though, so just wipe the local address and return.
+	 */
+	for (i = 0; i < 6; i++)
+		san_mac_addr[i] = 0xFF;
+	return ret_val;
+}
+
+/**
+ *  ixgbe_get_pcie_msix_count_generic - Gets MSI-X vector count
+ *  @hw: pointer to hardware structure
+ *
+ *  Read PCIe configuration space, and get the MSI-X vector count from
+ *  the capabilities table.
+ **/
+u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw)
+{
+	u16 msix_count;
+	u16 max_msix_count;
+	u16 pcie_offset;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		pcie_offset = IXGBE_PCIE_MSIX_82598_CAPS;
+		max_msix_count = IXGBE_MAX_MSIX_VECTORS_82598;
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS;
+		max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599;
+		break;
+	default:
+		return 1;
+	}
+
+	msix_count = ixgbe_read_pci_cfg_word(hw, pcie_offset);
+	if (ixgbe_removed(hw->hw_addr))
+		msix_count = 0;
+	msix_count &= IXGBE_PCIE_MSIX_TBL_SZ_MASK;
+
+	/* MSI-X count is zero-based in HW */
+	msix_count++;
+
+	if (msix_count > max_msix_count)
+		msix_count = max_msix_count;
+
+	return msix_count;
+}
+
+/**
+ *  ixgbe_clear_vmdq_generic - Disassociate a VMDq pool index from a rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to disassociate
+ *  @vmdq: VMDq pool index to remove from the rar
+ **/
+s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 mpsar_lo, mpsar_hi;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+	mpsar_hi = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+
+	if (ixgbe_removed(hw->hw_addr))
+		return 0;
+
+	if (!mpsar_lo && !mpsar_hi)
+		return 0;
+
+	if (vmdq == IXGBE_CLEAR_VMDQ_ALL) {
+		if (mpsar_lo) {
+			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0);
+			mpsar_lo = 0;
+		}
+		if (mpsar_hi) {
+			IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0);
+			mpsar_hi = 0;
+		}
+	} else if (vmdq < 32) {
+		mpsar_lo &= ~BIT(vmdq);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo);
+	} else {
+		mpsar_hi &= ~BIT(vmdq - 32);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi);
+	}
+
+	/* was that the last pool using this rar? */
+	if (mpsar_lo == 0 && mpsar_hi == 0 &&
+	    rar != 0 && rar != hw->mac.san_mac_rar_index)
+		hw->mac.ops.clear_rar(hw, rar);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_set_vmdq_generic - Associate a VMDq pool index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @rar: receive address register index to associate with a VMDq index
+ *  @vmdq: VMDq pool index
+ **/
+s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq)
+{
+	u32 mpsar;
+	u32 rar_entries = hw->mac.num_rar_entries;
+
+	/* Make sure we are using a valid rar index range */
+	if (rar >= rar_entries) {
+		hw_dbg(hw, "RAR index %d is out of range.\n", rar);
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	if (vmdq < 32) {
+		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar));
+		mpsar |= BIT(vmdq);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar);
+	} else {
+		mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar));
+		mpsar |= BIT(vmdq - 32);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar);
+	}
+	return 0;
+}
+
+/**
+ *  ixgbe_set_vmdq_san_mac_generic - Associate VMDq pool index with a rx address
+ *  @hw: pointer to hardware struct
+ *  @vmdq: VMDq pool index
+ *
+ *  This function should only be involved in the IOV mode.
+ *  In IOV mode, Default pool is next pool after the number of
+ *  VFs advertized and not 0.
+ *  MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index]
+ **/
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq)
+{
+	u32 rar = hw->mac.san_mac_rar_index;
+
+	if (vmdq < 32) {
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), BIT(vmdq));
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0);
+	} else {
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), BIT(vmdq - 32));
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw)
+{
+	int i;
+
+	for (i = 0; i < 128; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_UTA(i), 0);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_find_vlvf_slot - find the vlanid or the first empty slot
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vlvf_bypass: true to find vlanid only, false returns first empty slot if
+ *		  vlanid not found
+ *
+ *  return the VLVF index where this VLAN id should be placed
+ *
+ **/
+static s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan, bool vlvf_bypass)
+{
+	s32 regindex, first_empty_slot;
+	u32 bits;
+
+	/* short cut the special case */
+	if (vlan == 0)
+		return 0;
+
+	/* if vlvf_bypass is set we don't want to use an empty slot, we
+	 * will simply bypass the VLVF if there are no entries present in the
+	 * VLVF that contain our VLAN
+	 */
+	first_empty_slot = vlvf_bypass ? IXGBE_ERR_NO_SPACE : 0;
+
+	/* add VLAN enable bit for comparison */
+	vlan |= IXGBE_VLVF_VIEN;
+
+	/* Search for the vlan id in the VLVF entries. Save off the first empty
+	 * slot found along the way.
+	 *
+	 * pre-decrement loop covering (IXGBE_VLVF_ENTRIES - 1) .. 1
+	 */
+	for (regindex = IXGBE_VLVF_ENTRIES; --regindex;) {
+		bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex));
+		if (bits == vlan)
+			return regindex;
+		if (!first_empty_slot && !bits)
+			first_empty_slot = regindex;
+	}
+
+	/* If we are here then we didn't find the VLAN.  Return first empty
+	 * slot we found during our search, else error.
+	 */
+	if (!first_empty_slot)
+		hw_dbg(hw, "No space in VLVF.\n");
+
+	return first_empty_slot ? : IXGBE_ERR_NO_SPACE;
+}
+
+/**
+ *  ixgbe_set_vfta_generic - Set VLAN filter table
+ *  @hw: pointer to hardware structure
+ *  @vlan: VLAN id to write to VLAN filter
+ *  @vind: VMDq output index that maps queue to VLAN id in VFVFB
+ *  @vlan_on: boolean flag to turn on/off VLAN in VFVF
+ *  @vlvf_bypass: boolean flag indicating updating default pool is okay
+ *
+ *  Turn on/off specified VLAN in the VLAN filter table.
+ **/
+s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+			   bool vlan_on, bool vlvf_bypass)
+{
+	u32 regidx, vfta_delta, vfta, bits;
+	s32 vlvf_index;
+
+	if ((vlan > 4095) || (vind > 63))
+		return IXGBE_ERR_PARAM;
+
+	/*
+	 * this is a 2 part operation - first the VFTA, then the
+	 * VLVF and VLVFB if VT Mode is set
+	 * We don't write the VFTA until we know the VLVF part succeeded.
+	 */
+
+	/* Part 1
+	 * The VFTA is a bitstring made up of 128 32-bit registers
+	 * that enable the particular VLAN id, much like the MTA:
+	 *    bits[11-5]: which register
+	 *    bits[4-0]:  which bit in the register
+	 */
+	regidx = vlan / 32;
+	vfta_delta = BIT(vlan % 32);
+	vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regidx));
+
+	/* vfta_delta represents the difference between the current value
+	 * of vfta and the value we want in the register.  Since the diff
+	 * is an XOR mask we can just update vfta using an XOR.
+	 */
+	vfta_delta &= vlan_on ? ~vfta : vfta;
+	vfta ^= vfta_delta;
+
+	/* Part 2
+	 * If VT Mode is set
+	 *   Either vlan_on
+	 *     make sure the vlan is in VLVF
+	 *     set the vind bit in the matching VLVFB
+	 *   Or !vlan_on
+	 *     clear the pool bit and possibly the vind
+	 */
+	if (!(IXGBE_READ_REG(hw, IXGBE_VT_CTL) & IXGBE_VT_CTL_VT_ENABLE))
+		goto vfta_update;
+
+	vlvf_index = ixgbe_find_vlvf_slot(hw, vlan, vlvf_bypass);
+	if (vlvf_index < 0) {
+		if (vlvf_bypass)
+			goto vfta_update;
+		return vlvf_index;
+	}
+
+	bits = IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32));
+
+	/* set the pool bit */
+	bits |= BIT(vind % 32);
+	if (vlan_on)
+		goto vlvf_update;
+
+	/* clear the pool bit */
+	bits ^= BIT(vind % 32);
+
+	if (!bits &&
+	    !IXGBE_READ_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + 1 - vind / 32))) {
+		/* Clear VFTA first, then disable VLVF.  Otherwise
+		 * we run the risk of stray packets leaking into
+		 * the PF via the default pool
+		 */
+		if (vfta_delta)
+			IXGBE_WRITE_REG(hw, IXGBE_VFTA(regidx), vfta);
+
+		/* disable VLVF and clear remaining bit from pool */
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32), 0);
+
+		return 0;
+	}
+
+	/* If there are still bits set in the VLVFB registers
+	 * for the VLAN ID indicated we need to see if the
+	 * caller is requesting that we clear the VFTA entry bit.
+	 * If the caller has requested that we clear the VFTA
+	 * entry bit but there are still pools/VFs using this VLAN
+	 * ID entry then ignore the request.  We're not worried
+	 * about the case where we're turning the VFTA VLAN ID
+	 * entry bit on, only when requested to turn it off as
+	 * there may be multiple pools and/or VFs using the
+	 * VLAN ID entry.  In that case we cannot clear the
+	 * VFTA bit until all pools/VFs using that VLAN ID have also
+	 * been cleared.  This will be indicated by "bits" being
+	 * zero.
+	 */
+	vfta_delta = 0;
+
+vlvf_update:
+	/* record pool change and enable VLAN ID if not already enabled */
+	IXGBE_WRITE_REG(hw, IXGBE_VLVFB(vlvf_index * 2 + vind / 32), bits);
+	IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), IXGBE_VLVF_VIEN | vlan);
+
+vfta_update:
+	/* Update VFTA now that we are ready for traffic */
+	if (vfta_delta)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(regidx), vfta);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clear_vfta_generic - Clear VLAN filter table
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the VLAN filter table, and the VMDq index associated with the filter
+ **/
+s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw)
+{
+	u32 offset;
+
+	for (offset = 0; offset < hw->mac.vft_size; offset++)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0);
+
+	for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) {
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2 + 1), 0);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_need_crosstalk_fix - Determine if we need to do cross talk fix
+ *  @hw: pointer to hardware structure
+ *
+ *  Contains the logic to identify if we need to verify link for the
+ *  crosstalk fix
+ **/
+static bool ixgbe_need_crosstalk_fix(struct ixgbe_hw *hw)
+{
+	/* Does FW say we need the fix */
+	if (!hw->need_crosstalk_fix)
+		return false;
+
+	/* Only consider SFP+ PHYs i.e. media type fiber */
+	switch (hw->mac.ops.get_media_type(hw)) {
+	case ixgbe_media_type_fiber:
+	case ixgbe_media_type_fiber_qsfp:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ *  ixgbe_check_mac_link_generic - Determine link and speed status
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @link_up: true when link is up
+ *  @link_up_wait_to_complete: bool used to wait for link up or not
+ *
+ *  Reads the links register to determine if link is up and the current speed
+ **/
+s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+				 bool *link_up, bool link_up_wait_to_complete)
+{
+	bool crosstalk_fix_active = ixgbe_need_crosstalk_fix(hw);
+	u32 links_reg, links_orig;
+	u32 i;
+
+	/* If Crosstalk fix enabled do the sanity check of making sure
+	 * the SFP+ cage is full.
+	 */
+	if (crosstalk_fix_active) {
+		u32 sfp_cage_full;
+
+		switch (hw->mac.type) {
+		case ixgbe_mac_82599EB:
+			sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
+					IXGBE_ESDP_SDP2;
+			break;
+		case ixgbe_mac_X550EM_x:
+		case ixgbe_mac_x550em_a:
+			sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) &
+					IXGBE_ESDP_SDP0;
+			break;
+		default:
+			/* sanity check - No SFP+ devices here */
+			sfp_cage_full = false;
+			break;
+		}
+
+		if (!sfp_cage_full) {
+			*link_up = false;
+			*speed = IXGBE_LINK_SPEED_UNKNOWN;
+			return 0;
+		}
+	}
+
+	/* clear the old state */
+	links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS);
+
+	links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+
+	if (links_orig != links_reg) {
+		hw_dbg(hw, "LINKS changed from %08X to %08X\n",
+		       links_orig, links_reg);
+	}
+
+	if (link_up_wait_to_complete) {
+		for (i = 0; i < IXGBE_LINK_UP_TIME; i++) {
+			if (links_reg & IXGBE_LINKS_UP) {
+				*link_up = true;
+				break;
+			} else {
+				*link_up = false;
+			}
+			msleep(100);
+			links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+		}
+	} else {
+		if (links_reg & IXGBE_LINKS_UP) {
+			if (crosstalk_fix_active) {
+				/* Check the link state again after a delay
+				 * to filter out spurious link up
+				 * notifications.
+				 */
+				mdelay(5);
+				links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS);
+				if (!(links_reg & IXGBE_LINKS_UP)) {
+					*link_up = false;
+					*speed = IXGBE_LINK_SPEED_UNKNOWN;
+					return 0;
+				}
+			}
+			*link_up = true;
+		} else {
+			*link_up = false;
+		}
+	}
+
+	switch (links_reg & IXGBE_LINKS_SPEED_82599) {
+	case IXGBE_LINKS_SPEED_10G_82599:
+		if ((hw->mac.type >= ixgbe_mac_X550) &&
+		    (links_reg & IXGBE_LINKS_SPEED_NON_STD))
+			*speed = IXGBE_LINK_SPEED_2_5GB_FULL;
+		else
+			*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_1G_82599:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_100_82599:
+		if ((hw->mac.type >= ixgbe_mac_X550) &&
+		    (links_reg & IXGBE_LINKS_SPEED_NON_STD))
+			*speed = IXGBE_LINK_SPEED_5GB_FULL;
+		else
+			*speed = IXGBE_LINK_SPEED_100_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_10_X550EM_A:
+		*speed = IXGBE_LINK_SPEED_UNKNOWN;
+		if (hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T ||
+		    hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L) {
+			*speed = IXGBE_LINK_SPEED_10_FULL;
+		}
+		break;
+	default:
+		*speed = IXGBE_LINK_SPEED_UNKNOWN;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_wwn_prefix_generic - Get alternative WWNN/WWPN prefix from
+ *  the EEPROM
+ *  @hw: pointer to hardware structure
+ *  @wwnn_prefix: the alternative WWNN prefix
+ *  @wwpn_prefix: the alternative WWPN prefix
+ *
+ *  This function will read the EEPROM from the alternative SAN MAC address
+ *  block to check the support for the alternative WWNN/WWPN prefix support.
+ **/
+s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+					u16 *wwpn_prefix)
+{
+	u16 offset, caps;
+	u16 alt_san_mac_blk_offset;
+
+	/* clear output first */
+	*wwnn_prefix = 0xFFFF;
+	*wwpn_prefix = 0xFFFF;
+
+	/* check if alternative SAN MAC is supported */
+	offset = IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR;
+	if (hw->eeprom.ops.read(hw, offset, &alt_san_mac_blk_offset))
+		goto wwn_prefix_err;
+
+	if ((alt_san_mac_blk_offset == 0) ||
+	    (alt_san_mac_blk_offset == 0xFFFF))
+		return 0;
+
+	/* check capability in alternative san mac address block */
+	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET;
+	if (hw->eeprom.ops.read(hw, offset, &caps))
+		goto wwn_prefix_err;
+	if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN))
+		return 0;
+
+	/* get the corresponding prefix for WWNN/WWPN */
+	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET;
+	if (hw->eeprom.ops.read(hw, offset, wwnn_prefix))
+		hw_err(hw, "eeprom read at offset %d failed\n", offset);
+
+	offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET;
+	if (hw->eeprom.ops.read(hw, offset, wwpn_prefix))
+		goto wwn_prefix_err;
+
+	return 0;
+
+wwn_prefix_err:
+	hw_err(hw, "eeprom read at offset %d failed\n", offset);
+	return 0;
+}
+
+/**
+ *  ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing
+ *  @hw: pointer to hardware structure
+ *  @enable: enable or disable switch for MAC anti-spoofing
+ *  @vf: Virtual Function pool - VF Pool to set for MAC anti-spoofing
+ *
+ **/
+void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf)
+{
+	int vf_target_reg = vf >> 3;
+	int vf_target_shift = vf % 8;
+	u32 pfvfspoof;
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
+	if (enable)
+		pfvfspoof |= BIT(vf_target_shift);
+	else
+		pfvfspoof &= ~BIT(vf_target_shift);
+	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
+}
+
+/**
+ *  ixgbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing
+ *  @hw: pointer to hardware structure
+ *  @enable: enable or disable switch for VLAN anti-spoofing
+ *  @vf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing
+ *
+ **/
+void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf)
+{
+	int vf_target_reg = vf >> 3;
+	int vf_target_shift = vf % 8 + IXGBE_SPOOF_VLANAS_SHIFT;
+	u32 pfvfspoof;
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
+	if (enable)
+		pfvfspoof |= BIT(vf_target_shift);
+	else
+		pfvfspoof &= ~BIT(vf_target_shift);
+	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
+}
+
+/**
+ *  ixgbe_get_device_caps_generic - Get additional device capabilities
+ *  @hw: pointer to hardware structure
+ *  @device_caps: the EEPROM word with the extra device capabilities
+ *
+ *  This function will read the EEPROM location for the device capabilities,
+ *  and return the word through device_caps.
+ **/
+s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps)
+{
+	hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps);
+
+	return 0;
+}
+
+/**
+ * ixgbe_set_rxpba_generic - Initialize RX packet buffer
+ * @hw: pointer to hardware structure
+ * @num_pb: number of packet buffers to allocate
+ * @headroom: reserve n KB of headroom
+ * @strategy: packet buffer allocation strategy
+ **/
+void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw,
+			     int num_pb,
+			     u32 headroom,
+			     int strategy)
+{
+	u32 pbsize = hw->mac.rx_pb_size;
+	int i = 0;
+	u32 rxpktsize, txpktsize, txpbthresh;
+
+	/* Reserve headroom */
+	pbsize -= headroom;
+
+	if (!num_pb)
+		num_pb = 1;
+
+	/* Divide remaining packet buffer space amongst the number
+	 * of packet buffers requested using supplied strategy.
+	 */
+	switch (strategy) {
+	case (PBA_STRATEGY_WEIGHTED):
+		/* pba_80_48 strategy weight first half of packet buffer with
+		 * 5/8 of the packet buffer space.
+		 */
+		rxpktsize = ((pbsize * 5 * 2) / (num_pb * 8));
+		pbsize -= rxpktsize * (num_pb / 2);
+		rxpktsize <<= IXGBE_RXPBSIZE_SHIFT;
+		for (; i < (num_pb / 2); i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		fallthrough; /* configure remaining packet buffers */
+	case (PBA_STRATEGY_EQUAL):
+		/* Divide the remaining Rx packet buffer evenly among the TCs */
+		rxpktsize = (pbsize / (num_pb - i)) << IXGBE_RXPBSIZE_SHIFT;
+		for (; i < num_pb; i++)
+			IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize);
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * Setup Tx packet buffer and threshold equally for all TCs
+	 * TXPBTHRESH register is set in K so divide by 1024 and subtract
+	 * 10 since the largest packet we support is just over 9K.
+	 */
+	txpktsize = IXGBE_TXPBSIZE_MAX / num_pb;
+	txpbthresh = (txpktsize / 1024) - IXGBE_TXPKT_SIZE_MAX;
+	for (i = 0; i < num_pb; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize);
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh);
+	}
+
+	/* Clear unused TCs, if any, to zero buffer size*/
+	for (; i < IXGBE_MAX_PB; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0);
+	}
+}
+
+/**
+ *  ixgbe_calculate_checksum - Calculate checksum for buffer
+ *  @buffer: pointer to EEPROM
+ *  @length: size of EEPROM to calculate a checksum for
+ *
+ *  Calculates the checksum for some buffer on a specified length.  The
+ *  checksum calculated is returned.
+ **/
+u8 ixgbe_calculate_checksum(u8 *buffer, u32 length)
+{
+	u32 i;
+	u8 sum = 0;
+
+	if (!buffer)
+		return 0;
+
+	for (i = 0; i < length; i++)
+		sum += buffer[i];
+
+	return (u8) (0 - sum);
+}
+
+/**
+ *  ixgbe_hic_unlocked - Issue command to manageability block unlocked
+ *  @hw: pointer to the HW structure
+ *  @buffer: command to write and where the return status will be placed
+ *  @length: length of buffer, must be multiple of 4 bytes
+ *  @timeout: time in ms to wait for command completion
+ *
+ *  Communicates with the manageability block. On success return 0
+ *  else returns semaphore error when encountering an error acquiring
+ *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ *
+ *  This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held
+ *  by the caller.
+ **/
+s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length,
+		       u32 timeout)
+{
+	u32 hicr, i, fwsts;
+	u16 dword_len;
+
+	if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+		hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length);
+		return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Set bit 9 of FWSTS clearing FW reset indication */
+	fwsts = IXGBE_READ_REG(hw, IXGBE_FWSTS);
+	IXGBE_WRITE_REG(hw, IXGBE_FWSTS, fwsts | IXGBE_FWSTS_FWRI);
+
+	/* Check that the host interface is enabled. */
+	hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+	if (!(hicr & IXGBE_HICR_EN)) {
+		hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n");
+		return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+	}
+
+	/* Calculate length in DWORDs. We must be DWORD aligned */
+	if (length % sizeof(u32)) {
+		hw_dbg(hw, "Buffer length failure, not aligned to dword");
+		return IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	dword_len = length >> 2;
+
+	/* The device driver writes the relevant command block
+	 * into the ram area.
+	 */
+	for (i = 0; i < dword_len; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG,
+				      i, (__force u32)cpu_to_le32(buffer[i]));
+
+	/* Setting this bit tells the ARC that a new command is pending. */
+	IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C);
+
+	for (i = 0; i < timeout; i++) {
+		hicr = IXGBE_READ_REG(hw, IXGBE_HICR);
+		if (!(hicr & IXGBE_HICR_C))
+			break;
+		usleep_range(1000, 2000);
+	}
+
+	/* Check command successful completion. */
+	if ((timeout && i == timeout) ||
+	    !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))
+		return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_host_interface_command - Issue command to manageability block
+ *  @hw: pointer to the HW structure
+ *  @buffer: contains the command to write and where the return status will
+ *           be placed
+ *  @length: length of buffer, must be multiple of 4 bytes
+ *  @timeout: time in ms to wait for command completion
+ *  @return_data: read and return data from the buffer (true) or not (false)
+ *  Needed because FW structures are big endian and decoding of
+ *  these fields can be 8 bit or 16 bit based on command. Decoding
+ *  is not easily understood without making a table of commands.
+ *  So we will leave this up to the caller to read back the data
+ *  in these cases.
+ *
+ *  Communicates with the manageability block.  On success return 0
+ *  else return IXGBE_ERR_HOST_INTERFACE_COMMAND.
+ **/
+s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *buffer,
+				 u32 length, u32 timeout,
+				 bool return_data)
+{
+	u32 hdr_size = sizeof(struct ixgbe_hic_hdr);
+	struct ixgbe_hic_hdr *hdr = buffer;
+	u32 *u32arr = buffer;
+	u16 buf_len, dword_len;
+	s32 status;
+	u32 bi;
+
+	if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) {
+		hw_dbg(hw, "Buffer length failure buffersize-%d.\n", length);
+		return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+	}
+	/* Take management host interface semaphore */
+	status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+	if (status)
+		return status;
+
+	status = ixgbe_hic_unlocked(hw, buffer, length, timeout);
+	if (status)
+		goto rel_out;
+
+	if (!return_data)
+		goto rel_out;
+
+	/* Calculate length in DWORDs */
+	dword_len = hdr_size >> 2;
+
+	/* first pull in the header so we know the buffer length */
+	for (bi = 0; bi < dword_len; bi++) {
+		u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+		le32_to_cpus(&u32arr[bi]);
+	}
+
+	/* If there is any thing in data position pull it in */
+	buf_len = hdr->buf_len;
+	if (!buf_len)
+		goto rel_out;
+
+	if (length < round_up(buf_len, 4) + hdr_size) {
+		hw_dbg(hw, "Buffer not large enough for reply message.\n");
+		status = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		goto rel_out;
+	}
+
+	/* Calculate length in DWORDs, add 3 for odd lengths */
+	dword_len = (buf_len + 3) >> 2;
+
+	/* Pull in the rest of the buffer (bi is where we left off) */
+	for (; bi <= dword_len; bi++) {
+		u32arr[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi);
+		le32_to_cpus(&u32arr[bi]);
+	}
+
+rel_out:
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM);
+
+	return status;
+}
+
+/**
+ *  ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware
+ *  @hw: pointer to the HW structure
+ *  @maj: driver version major number
+ *  @min: driver version minor number
+ *  @build: driver version build number
+ *  @sub: driver version sub build number
+ *  @len: length of driver_ver string
+ *  @driver_ver: driver string
+ *
+ *  Sends driver version number to firmware through the manageability
+ *  block.  On success return 0
+ *  else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+				 u8 build, u8 sub, __always_unused u16 len,
+				 __always_unused const char *driver_ver)
+{
+	struct ixgbe_hic_drv_info fw_cmd;
+	int i;
+	s32 ret_val;
+
+	fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
+	fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN;
+	fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+	fw_cmd.port_num = hw->bus.func;
+	fw_cmd.ver_maj = maj;
+	fw_cmd.ver_min = min;
+	fw_cmd.ver_build = build;
+	fw_cmd.ver_sub = sub;
+	fw_cmd.hdr.checksum = 0;
+	fw_cmd.pad = 0;
+	fw_cmd.pad2 = 0;
+	fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+				(FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+
+	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+		ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
+						       sizeof(fw_cmd),
+						       IXGBE_HI_COMMAND_TIMEOUT,
+						       true);
+		if (ret_val != 0)
+			continue;
+
+		if (fw_cmd.hdr.cmd_or_resp.ret_status ==
+		    FW_CEM_RESP_STATUS_SUCCESS)
+			ret_val = 0;
+		else
+			ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND;
+
+		break;
+	}
+
+	return ret_val;
+}
+
+/**
+ * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo
+ * @hw: pointer to the hardware structure
+ *
+ * The 82599 and x540 MACs can experience issues if TX work is still pending
+ * when a reset occurs.  This function prevents this by flushing the PCIe
+ * buffers on the system.
+ **/
+void ixgbe_clear_tx_pending(struct ixgbe_hw *hw)
+{
+	u32 gcr_ext, hlreg0, i, poll;
+	u16 value;
+
+	/*
+	 * If double reset is not requested then all transactions should
+	 * already be clear and as such there is no work to do
+	 */
+	if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED))
+		return;
+
+	/*
+	 * Set loopback enable to prevent any transmits from being sent
+	 * should the link come up.  This assumes that the RXCTRL.RXEN bit
+	 * has already been cleared.
+	 */
+	hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK);
+
+	/* wait for a last completion before clearing buffers */
+	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(3000, 6000);
+
+	/* Before proceeding, make sure that the PCIe block does not have
+	 * transactions pending.
+	 */
+	poll = ixgbe_pcie_timeout_poll(hw);
+	for (i = 0; i < poll; i++) {
+		usleep_range(100, 200);
+		value = ixgbe_read_pci_cfg_word(hw, IXGBE_PCI_DEVICE_STATUS);
+		if (ixgbe_removed(hw->hw_addr))
+			break;
+		if (!(value & IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING))
+			break;
+	}
+
+	/* initiate cleaning flow for buffers in the PCIe transaction layer */
+	gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT);
+	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT,
+			gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR);
+
+	/* Flush all writes and allow 20usec for all transactions to clear */
+	IXGBE_WRITE_FLUSH(hw);
+	udelay(20);
+
+	/* restore previous register values */
+	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+}
+
+static const u8 ixgbe_emc_temp_data[4] = {
+	IXGBE_EMC_INTERNAL_DATA,
+	IXGBE_EMC_DIODE1_DATA,
+	IXGBE_EMC_DIODE2_DATA,
+	IXGBE_EMC_DIODE3_DATA
+};
+static const u8 ixgbe_emc_therm_limit[4] = {
+	IXGBE_EMC_INTERNAL_THERM_LIMIT,
+	IXGBE_EMC_DIODE1_THERM_LIMIT,
+	IXGBE_EMC_DIODE2_THERM_LIMIT,
+	IXGBE_EMC_DIODE3_THERM_LIMIT
+};
+
+/**
+ *  ixgbe_get_ets_data - Extracts the ETS bit data
+ *  @hw: pointer to hardware structure
+ *  @ets_cfg: extected ETS data
+ *  @ets_offset: offset of ETS data
+ *
+ *  Returns error code.
+ **/
+static s32 ixgbe_get_ets_data(struct ixgbe_hw *hw, u16 *ets_cfg,
+			      u16 *ets_offset)
+{
+	s32 status;
+
+	status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, ets_offset);
+	if (status)
+		return status;
+
+	if ((*ets_offset == 0x0000) || (*ets_offset == 0xFFFF))
+		return IXGBE_NOT_IMPLEMENTED;
+
+	status = hw->eeprom.ops.read(hw, *ets_offset, ets_cfg);
+	if (status)
+		return status;
+
+	if ((*ets_cfg & IXGBE_ETS_TYPE_MASK) != IXGBE_ETS_TYPE_EMC_SHIFTED)
+		return IXGBE_NOT_IMPLEMENTED;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_thermal_sensor_data_generic - Gathers thermal sensor data
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the thermal sensor data structure
+ **/
+s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  num_sensors;
+	u8  i;
+	struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	/* Only support thermal sensors attached to physical port 0 */
+	if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1))
+		return IXGBE_NOT_IMPLEMENTED;
+
+	status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset);
+	if (status)
+		return status;
+
+	num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+	if (num_sensors > IXGBE_MAX_SENSORS)
+		num_sensors = IXGBE_MAX_SENSORS;
+
+	for (i = 0; i < num_sensors; i++) {
+		u8  sensor_index;
+		u8  sensor_location;
+
+		status = hw->eeprom.ops.read(hw, (ets_offset + 1 + i),
+					     &ets_sensor);
+		if (status)
+			return status;
+
+		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+				IXGBE_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+				   IXGBE_ETS_DATA_LOC_SHIFT);
+
+		if (sensor_location != 0) {
+			status = hw->phy.ops.read_i2c_byte(hw,
+					ixgbe_emc_temp_data[sensor_index],
+					IXGBE_I2C_THERMAL_SENSOR_ADDR,
+					&data->sensor[i].temp);
+			if (status)
+				return status;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_init_thermal_sensor_thresh_generic - Inits thermal sensor thresholds
+ * @hw: pointer to hardware structure
+ *
+ * Inits the thermal sensor thresholds according to the NVM map
+ * and save off the threshold and location values into mac.thermal_sensor_data
+ **/
+s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 ets_offset;
+	u16 ets_cfg;
+	u16 ets_sensor;
+	u8  low_thresh_delta;
+	u8  num_sensors;
+	u8  therm_limit;
+	u8  i;
+	struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data;
+
+	memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data));
+
+	/* Only support thermal sensors attached to physical port 0 */
+	if ((IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1))
+		return IXGBE_NOT_IMPLEMENTED;
+
+	status = ixgbe_get_ets_data(hw, &ets_cfg, &ets_offset);
+	if (status)
+		return status;
+
+	low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >>
+			     IXGBE_ETS_LTHRES_DELTA_SHIFT);
+	num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK);
+	if (num_sensors > IXGBE_MAX_SENSORS)
+		num_sensors = IXGBE_MAX_SENSORS;
+
+	for (i = 0; i < num_sensors; i++) {
+		u8  sensor_index;
+		u8  sensor_location;
+
+		if (hw->eeprom.ops.read(hw, ets_offset + 1 + i, &ets_sensor)) {
+			hw_err(hw, "eeprom read at offset %d failed\n",
+			       ets_offset + 1 + i);
+			continue;
+		}
+		sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >>
+				IXGBE_ETS_DATA_INDEX_SHIFT);
+		sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >>
+				   IXGBE_ETS_DATA_LOC_SHIFT);
+		therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK;
+
+		hw->phy.ops.write_i2c_byte(hw,
+			ixgbe_emc_therm_limit[sensor_index],
+			IXGBE_I2C_THERMAL_SENSOR_ADDR, therm_limit);
+
+		if (sensor_location == 0)
+			continue;
+
+		data->sensor[i].location = sensor_location;
+		data->sensor[i].caution_thresh = therm_limit;
+		data->sensor[i].max_op_thresh = therm_limit - low_thresh_delta;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_orom_version - Return option ROM from EEPROM
+ *
+ *  @hw: pointer to hardware structure
+ *  @nvm_ver: pointer to output structure
+ *
+ *  if valid option ROM version, nvm_ver->or_valid set to true
+ *  else nvm_ver->or_valid is false.
+ **/
+void ixgbe_get_orom_version(struct ixgbe_hw *hw,
+			    struct ixgbe_nvm_version *nvm_ver)
+{
+	u16 offset, eeprom_cfg_blkh, eeprom_cfg_blkl;
+
+	nvm_ver->or_valid = false;
+	/* Option Rom may or may not be present.  Start with pointer */
+	hw->eeprom.ops.read(hw, NVM_OROM_OFFSET, &offset);
+
+	/* make sure offset is valid */
+	if (offset == 0x0 || offset == NVM_INVALID_PTR)
+		return;
+
+	hw->eeprom.ops.read(hw, offset + NVM_OROM_BLK_HI, &eeprom_cfg_blkh);
+	hw->eeprom.ops.read(hw, offset + NVM_OROM_BLK_LOW, &eeprom_cfg_blkl);
+
+	/* option rom exists and is valid */
+	if ((eeprom_cfg_blkl | eeprom_cfg_blkh) == 0x0 ||
+	    eeprom_cfg_blkl == NVM_VER_INVALID ||
+	    eeprom_cfg_blkh == NVM_VER_INVALID)
+		return;
+
+	nvm_ver->or_valid = true;
+	nvm_ver->or_major = eeprom_cfg_blkl >> NVM_OROM_SHIFT;
+	nvm_ver->or_build = (eeprom_cfg_blkl << NVM_OROM_SHIFT) |
+			    (eeprom_cfg_blkh >> NVM_OROM_SHIFT);
+	nvm_ver->or_patch = eeprom_cfg_blkh & NVM_OROM_PATCH_MASK;
+}
+
+/**
+ *  ixgbe_get_oem_prod_version - Etrack ID from EEPROM
+ *  @hw: pointer to hardware structure
+ *  @nvm_ver: pointer to output structure
+ *
+ *  if valid OEM product version, nvm_ver->oem_valid set to true
+ *  else nvm_ver->oem_valid is false.
+ **/
+void ixgbe_get_oem_prod_version(struct ixgbe_hw *hw,
+				struct ixgbe_nvm_version *nvm_ver)
+{
+	u16 rel_num, prod_ver, mod_len, cap, offset;
+
+	nvm_ver->oem_valid = false;
+	hw->eeprom.ops.read(hw, NVM_OEM_PROD_VER_PTR, &offset);
+
+	/* Return is offset to OEM Product Version block is invalid */
+	if (offset == 0x0 || offset == NVM_INVALID_PTR)
+		return;
+
+	/* Read product version block */
+	hw->eeprom.ops.read(hw, offset, &mod_len);
+	hw->eeprom.ops.read(hw, offset + NVM_OEM_PROD_VER_CAP_OFF, &cap);
+
+	/* Return if OEM product version block is invalid */
+	if (mod_len != NVM_OEM_PROD_VER_MOD_LEN ||
+	    (cap & NVM_OEM_PROD_VER_CAP_MASK) != 0x0)
+		return;
+
+	hw->eeprom.ops.read(hw, offset + NVM_OEM_PROD_VER_OFF_L, &prod_ver);
+	hw->eeprom.ops.read(hw, offset + NVM_OEM_PROD_VER_OFF_H, &rel_num);
+
+	/* Return if version is invalid */
+	if ((rel_num | prod_ver) == 0x0 ||
+	    rel_num == NVM_VER_INVALID || prod_ver == NVM_VER_INVALID)
+		return;
+
+	nvm_ver->oem_major = prod_ver >> NVM_VER_SHIFT;
+	nvm_ver->oem_minor = prod_ver & NVM_VER_MASK;
+	nvm_ver->oem_release = rel_num;
+	nvm_ver->oem_valid = true;
+}
+
+/**
+ *  ixgbe_get_etk_id - Return Etrack ID from EEPROM
+ *
+ *  @hw: pointer to hardware structure
+ *  @nvm_ver: pointer to output structure
+ *
+ *  word read errors will return 0xFFFF
+ **/
+void ixgbe_get_etk_id(struct ixgbe_hw *hw,
+		      struct ixgbe_nvm_version *nvm_ver)
+{
+	u16 etk_id_l, etk_id_h;
+
+	if (hw->eeprom.ops.read(hw, NVM_ETK_OFF_LOW, &etk_id_l))
+		etk_id_l = NVM_VER_INVALID;
+	if (hw->eeprom.ops.read(hw, NVM_ETK_OFF_HI, &etk_id_h))
+		etk_id_h = NVM_VER_INVALID;
+
+	/* The word order for the version format is determined by high order
+	 * word bit 15.
+	 */
+	if ((etk_id_h & NVM_ETK_VALID) == 0) {
+		nvm_ver->etk_id = etk_id_h;
+		nvm_ver->etk_id |= (etk_id_l << NVM_ETK_SHIFT);
+	} else {
+		nvm_ver->etk_id = etk_id_l;
+		nvm_ver->etk_id |= (etk_id_h << NVM_ETK_SHIFT);
+	}
+}
+
+void ixgbe_disable_rx_generic(struct ixgbe_hw *hw)
+{
+	u32 rxctrl;
+
+	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	if (rxctrl & IXGBE_RXCTRL_RXEN) {
+		if (hw->mac.type != ixgbe_mac_82598EB) {
+			u32 pfdtxgswc;
+
+			pfdtxgswc = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC);
+			if (pfdtxgswc & IXGBE_PFDTXGSWC_VT_LBEN) {
+				pfdtxgswc &= ~IXGBE_PFDTXGSWC_VT_LBEN;
+				IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, pfdtxgswc);
+				hw->mac.set_lben = true;
+			} else {
+				hw->mac.set_lben = false;
+			}
+		}
+		rxctrl &= ~IXGBE_RXCTRL_RXEN;
+		IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl);
+	}
+}
+
+void ixgbe_enable_rx_generic(struct ixgbe_hw *hw)
+{
+	u32 rxctrl;
+
+	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, (rxctrl | IXGBE_RXCTRL_RXEN));
+
+	if (hw->mac.type != ixgbe_mac_82598EB) {
+		if (hw->mac.set_lben) {
+			u32 pfdtxgswc;
+
+			pfdtxgswc = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC);
+			pfdtxgswc |= IXGBE_PFDTXGSWC_VT_LBEN;
+			IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, pfdtxgswc);
+			hw->mac.set_lben = false;
+		}
+	}
+}
+
+/** ixgbe_mng_present - returns true when management capability is present
+ * @hw: pointer to hardware structure
+ **/
+bool ixgbe_mng_present(struct ixgbe_hw *hw)
+{
+	u32 fwsm;
+
+	if (hw->mac.type < ixgbe_mac_82599EB)
+		return false;
+
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+
+	return !!(fwsm & IXGBE_FWSM_FW_MODE_PT);
+}
+
+/**
+ *  ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ *
+ *  Set the link speed in the MAC and/or PHY register and restarts link.
+ */
+s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+					  ixgbe_link_speed speed,
+					  bool autoneg_wait_to_complete)
+{
+	ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+	ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN;
+	s32 status = 0;
+	u32 speedcnt = 0;
+	u32 i = 0;
+	bool autoneg, link_up = false;
+
+	/* Mask off requested but non-supported speeds */
+	status = hw->mac.ops.get_link_capabilities(hw, &link_speed, &autoneg);
+	if (status)
+		return status;
+
+	speed &= link_speed;
+
+	/* Try each speed one by one, highest priority first.  We do this in
+	 * software because 10Gb fiber doesn't support speed autonegotiation.
+	 */
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+		speedcnt++;
+		highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL;
+
+		/* Set the module link speed */
+		switch (hw->phy.media_type) {
+		case ixgbe_media_type_fiber:
+			hw->mac.ops.set_rate_select_speed(hw,
+						    IXGBE_LINK_SPEED_10GB_FULL);
+			break;
+		case ixgbe_media_type_fiber_qsfp:
+			/* QSFP module automatically detects MAC link speed */
+			break;
+		default:
+			hw_dbg(hw, "Unexpected media type\n");
+			break;
+		}
+
+		/* Allow module to change analog characteristics (1G->10G) */
+		msleep(40);
+
+		status = hw->mac.ops.setup_mac_link(hw,
+						    IXGBE_LINK_SPEED_10GB_FULL,
+						    autoneg_wait_to_complete);
+		if (status)
+			return status;
+
+		/* Flap the Tx laser if it has not already been done */
+		if (hw->mac.ops.flap_tx_laser)
+			hw->mac.ops.flap_tx_laser(hw);
+
+		/* Wait for the controller to acquire link.  Per IEEE 802.3ap,
+		 * Section 73.10.2, we may have to wait up to 500ms if KR is
+		 * attempted.  82599 uses the same timing for 10g SFI.
+		 */
+		for (i = 0; i < 5; i++) {
+			/* Wait for the link partner to also set speed */
+			msleep(100);
+
+			/* If we have link, just jump out */
+			status = hw->mac.ops.check_link(hw, &link_speed,
+							&link_up, false);
+			if (status)
+				return status;
+
+			if (link_up)
+				goto out;
+		}
+	}
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+		speedcnt++;
+		if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN)
+			highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+		/* Set the module link speed */
+		switch (hw->phy.media_type) {
+		case ixgbe_media_type_fiber:
+			hw->mac.ops.set_rate_select_speed(hw,
+						     IXGBE_LINK_SPEED_1GB_FULL);
+			break;
+		case ixgbe_media_type_fiber_qsfp:
+			/* QSFP module automatically detects link speed */
+			break;
+		default:
+			hw_dbg(hw, "Unexpected media type\n");
+			break;
+		}
+
+		/* Allow module to change analog characteristics (10G->1G) */
+		msleep(40);
+
+		status = hw->mac.ops.setup_mac_link(hw,
+						    IXGBE_LINK_SPEED_1GB_FULL,
+						    autoneg_wait_to_complete);
+		if (status)
+			return status;
+
+		/* Flap the Tx laser if it has not already been done */
+		if (hw->mac.ops.flap_tx_laser)
+			hw->mac.ops.flap_tx_laser(hw);
+
+		/* Wait for the link partner to also set speed */
+		msleep(100);
+
+		/* If we have link, just jump out */
+		status = hw->mac.ops.check_link(hw, &link_speed, &link_up,
+						false);
+		if (status)
+			return status;
+
+		if (link_up)
+			goto out;
+	}
+
+	/* We didn't get link.  Configure back to the highest speed we tried,
+	 * (if there was more than one).  We call ourselves back with just the
+	 * single highest speed that the user requested.
+	 */
+	if (speedcnt > 1)
+		status = ixgbe_setup_mac_link_multispeed_fiber(hw,
+						      highest_link_speed,
+						      autoneg_wait_to_complete);
+
+out:
+	/* Set autoneg_advertised value based on input link speed */
+	hw->phy.autoneg_advertised = 0;
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+	return status;
+}
+
+/**
+ *  ixgbe_set_soft_rate_select_speed - Set module link speed
+ *  @hw: pointer to hardware structure
+ *  @speed: link speed to set
+ *
+ *  Set module link speed via the soft rate select.
+ */
+void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
+				      ixgbe_link_speed speed)
+{
+	s32 status;
+	u8 rs, eeprom_data;
+
+	switch (speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		/* one bit mask same as setting on */
+		rs = IXGBE_SFF_SOFT_RS_SELECT_10G;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		rs = IXGBE_SFF_SOFT_RS_SELECT_1G;
+		break;
+	default:
+		hw_dbg(hw, "Invalid fixed module speed\n");
+		return;
+	}
+
+	/* Set RS0 */
+	status = hw->phy.ops.read_i2c_byte(hw, IXGBE_SFF_SFF_8472_OSCB,
+					   IXGBE_I2C_EEPROM_DEV_ADDR2,
+					   &eeprom_data);
+	if (status) {
+		hw_dbg(hw, "Failed to read Rx Rate Select RS0\n");
+		return;
+	}
+
+	eeprom_data = (eeprom_data & ~IXGBE_SFF_SOFT_RS_SELECT_MASK) | rs;
+
+	status = hw->phy.ops.write_i2c_byte(hw, IXGBE_SFF_SFF_8472_OSCB,
+					    IXGBE_I2C_EEPROM_DEV_ADDR2,
+					    eeprom_data);
+	if (status) {
+		hw_dbg(hw, "Failed to write Rx Rate Select RS0\n");
+		return;
+	}
+
+	/* Set RS1 */
+	status = hw->phy.ops.read_i2c_byte(hw, IXGBE_SFF_SFF_8472_ESCB,
+					   IXGBE_I2C_EEPROM_DEV_ADDR2,
+					   &eeprom_data);
+	if (status) {
+		hw_dbg(hw, "Failed to read Rx Rate Select RS1\n");
+		return;
+	}
+
+	eeprom_data = (eeprom_data & ~IXGBE_SFF_SOFT_RS_SELECT_MASK) | rs;
+
+	status = hw->phy.ops.write_i2c_byte(hw, IXGBE_SFF_SFF_8472_ESCB,
+					    IXGBE_I2C_EEPROM_DEV_ADDR2,
+					    eeprom_data);
+	if (status) {
+		hw_dbg(hw, "Failed to write Rx Rate Select RS1\n");
+		return;
+	}
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
new file mode 100644
index 0000000000..34761e691d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_COMMON_H_
+#define _IXGBE_COMMON_H_
+
+#include "ixgbe_type.h"
+#include "ixgbe.h"
+
+u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw);
+s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw);
+s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw);
+s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num,
+				  u32 pba_num_size);
+s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr);
+enum ixgbe_bus_width ixgbe_convert_bus_width(u16 link_status);
+enum ixgbe_bus_speed ixgbe_convert_bus_speed(u16 link_status);
+s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw);
+void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw);
+s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
+s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					       u16 words, u16 *data);
+s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data);
+s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				   u16 words, u16 *data);
+s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
+s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset,
+				    u16 words, u16 *data);
+s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+				       u16 *data);
+s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset,
+					      u16 words, u16 *data);
+s32 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw);
+s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw,
+					   u16 *checksum_val);
+s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw);
+
+s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq,
+			  u32 enable_addr);
+s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw);
+s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
+				      struct net_device *netdev);
+s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
+s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
+s32 ixgbe_disable_rx_buff_generic(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_buff_generic(struct ixgbe_hw *hw);
+s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
+s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw);
+s32 ixgbe_setup_fc_generic(struct ixgbe_hw *);
+bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw);
+void ixgbe_fc_autoneg(struct ixgbe_hw *hw);
+
+s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u32 mask);
+void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u32 mask);
+s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr);
+s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq);
+s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq);
+s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw);
+s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan,
+			   u32 vind, bool vlan_on, bool vlvf_bypass);
+s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw);
+s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw,
+				 ixgbe_link_speed *speed,
+				 bool *link_up, bool link_up_wait_to_complete);
+s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix,
+				 u16 *wwpn_prefix);
+
+s32 prot_autoc_read_generic(struct ixgbe_hw *hw, bool *, u32 *reg_val);
+s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked);
+
+s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
+void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
+void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf);
+s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps);
+s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
+				 u8 build, u8 ver, u16 len, const char *str);
+u8 ixgbe_calculate_checksum(u8 *buffer, u32 length);
+s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, void *, u32 length,
+				 u32 timeout, bool return_data);
+s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 len, u32 timeout);
+s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
+			  u32 (*data)[FW_PHY_ACT_DATA_COUNT]);
+void ixgbe_clear_tx_pending(struct ixgbe_hw *hw);
+bool ixgbe_mng_present(struct ixgbe_hw *hw);
+bool ixgbe_mng_enabled(struct ixgbe_hw *hw);
+
+void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb,
+			     u32 headroom, int strategy);
+
+extern const u32 ixgbe_mvals_8259X[IXGBE_MVALS_IDX_LIMIT];
+
+#define IXGBE_I2C_THERMAL_SENSOR_ADDR	0xF8
+#define IXGBE_EMC_INTERNAL_DATA		0x00
+#define IXGBE_EMC_INTERNAL_THERM_LIMIT	0x20
+#define IXGBE_EMC_DIODE1_DATA		0x01
+#define IXGBE_EMC_DIODE1_THERM_LIMIT	0x19
+#define IXGBE_EMC_DIODE2_DATA		0x23
+#define IXGBE_EMC_DIODE2_THERM_LIMIT	0x1A
+#define IXGBE_EMC_DIODE3_DATA		0x2A
+#define IXGBE_EMC_DIODE3_THERM_LIMIT	0x30
+
+s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw);
+s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw);
+void ixgbe_get_etk_id(struct ixgbe_hw *hw,
+		      struct ixgbe_nvm_version *nvm_ver);
+void ixgbe_get_oem_prod_version(struct ixgbe_hw *hw,
+				struct ixgbe_nvm_version *nvm_ver);
+void ixgbe_get_orom_version(struct ixgbe_hw *hw,
+			    struct ixgbe_nvm_version *nvm_ver);
+void ixgbe_disable_rx_generic(struct ixgbe_hw *hw);
+void ixgbe_enable_rx_generic(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
+					  ixgbe_link_speed speed,
+					  bool autoneg_wait_to_complete);
+void ixgbe_set_soft_rate_select_speed(struct ixgbe_hw *hw,
+				      ixgbe_link_speed speed);
+
+#define IXGBE_FAILED_READ_RETRIES 5
+#define IXGBE_FAILED_READ_REG 0xffffffffU
+#define IXGBE_FAILED_READ_CFG_DWORD 0xffffffffU
+#define IXGBE_FAILED_READ_CFG_WORD 0xffffU
+
+u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg);
+void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value);
+
+static inline bool ixgbe_removed(void __iomem *addr)
+{
+	return unlikely(!addr);
+}
+
+static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value)
+{
+	u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
+
+	if (ixgbe_removed(reg_addr))
+		return;
+	writel(value, reg_addr + reg);
+}
+#define IXGBE_WRITE_REG(a, reg, value) ixgbe_write_reg((a), (reg), (value))
+
+#ifndef writeq
+#define writeq writeq
+static inline void writeq(u64 val, void __iomem *addr)
+{
+	writel((u32)val, addr);
+	writel((u32)(val >> 32), addr + 4);
+}
+#endif
+
+static inline void ixgbe_write_reg64(struct ixgbe_hw *hw, u32 reg, u64 value)
+{
+	u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
+
+	if (ixgbe_removed(reg_addr))
+		return;
+	writeq(value, reg_addr + reg);
+}
+#define IXGBE_WRITE_REG64(a, reg, value) ixgbe_write_reg64((a), (reg), (value))
+
+u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg);
+#define IXGBE_READ_REG(a, reg) ixgbe_read_reg((a), (reg))
+
+#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) \
+		ixgbe_write_reg((a), (reg) + ((offset) << 2), (value))
+
+#define IXGBE_READ_REG_ARRAY(a, reg, offset) \
+		ixgbe_read_reg((a), (reg) + ((offset) << 2))
+
+#define IXGBE_WRITE_FLUSH(a) ixgbe_read_reg((a), IXGBE_STATUS)
+
+#define ixgbe_hw_to_netdev(hw) (((struct ixgbe_adapter *)(hw)->back)->netdev)
+
+#define hw_dbg(hw, format, arg...) \
+	netdev_dbg(ixgbe_hw_to_netdev(hw), format, ## arg)
+#define hw_err(hw, format, arg...) \
+	netdev_err(ixgbe_hw_to_netdev(hw), format, ## arg)
+#define e_dev_info(format, arg...) \
+	dev_info(&adapter->pdev->dev, format, ## arg)
+#define e_dev_warn(format, arg...) \
+	dev_warn(&adapter->pdev->dev, format, ## arg)
+#define e_dev_err(format, arg...) \
+	dev_err(&adapter->pdev->dev, format, ## arg)
+#define e_dev_notice(format, arg...) \
+	dev_notice(&adapter->pdev->dev, format, ## arg)
+#define e_info(msglvl, format, arg...) \
+	netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_err(msglvl, format, arg...) \
+	netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_warn(msglvl, format, arg...) \
+	netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_crit(msglvl, format, arg...) \
+	netif_crit(adapter, msglvl, adapter->netdev, format, ## arg)
+#endif /* IXGBE_COMMON */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
new file mode 100644
index 0000000000..d26cea5b43
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+#include "ixgbe_dcb_82599.h"
+
+/**
+ * ixgbe_ieee_credits - This calculates the ieee traffic class
+ * credits from the configured bandwidth percentages. Credits
+ * are the smallest unit programmable into the underlying
+ * hardware. The IEEE 802.1Qaz specification do not use bandwidth
+ * groups so this is much simplified from the CEE case.
+ * @bw: bandwidth index by traffic class
+ * @refill: refill credits index by traffic class
+ * @max: max credits by traffic class
+ * @max_frame: maximum frame size
+ */
+static s32 ixgbe_ieee_credits(__u8 *bw, __u16 *refill,
+			      __u16 *max, int max_frame)
+{
+	int min_percent = 100;
+	int min_credit, multiplier;
+	int i;
+
+	min_credit = ((max_frame / 2) + DCB_CREDIT_QUANTUM - 1) /
+			DCB_CREDIT_QUANTUM;
+
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if (bw[i] < min_percent && bw[i])
+			min_percent = bw[i];
+	}
+
+	multiplier = (min_credit / min_percent) + 1;
+
+	/* Find out the hw credits for each TC */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		int val = min(bw[i] * multiplier, MAX_CREDIT_REFILL);
+
+		if (val < min_credit)
+			val = min_credit;
+		refill[i] = val;
+
+		max[i] = bw[i] ? (bw[i] * MAX_CREDIT)/100 : min_credit;
+	}
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_calculate_tc_credits - Calculates traffic class credits
+ * @hw: pointer to hardware structure
+ * @dcb_config: Struct containing DCB settings
+ * @max_frame: Maximum frame size
+ * @direction: Configuring either Tx or Rx
+ *
+ * This function calculates the credits allocated to each traffic class.
+ * It should be called only after the rules are checked by
+ * ixgbe_dcb_check_config().
+ */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_hw *hw,
+				   struct ixgbe_dcb_config *dcb_config,
+				   int max_frame, u8 direction)
+{
+	struct tc_bw_alloc *p;
+	int min_credit;
+	int min_multiplier;
+	int min_percent = 100;
+	/* Initialization values default for Tx settings */
+	u32 credit_refill       = 0;
+	u32 credit_max          = 0;
+	u16 link_percentage     = 0;
+	u8  bw_percent          = 0;
+	u8  i;
+
+	if (!dcb_config)
+		return DCB_ERR_CONFIG;
+
+	min_credit = ((max_frame / 2) + DCB_CREDIT_QUANTUM - 1) /
+			DCB_CREDIT_QUANTUM;
+
+	/* Find smallest link percentage */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[direction];
+		bw_percent = dcb_config->bw_percentage[direction][p->bwg_id];
+		link_percentage = p->bwg_percent;
+
+		link_percentage = (link_percentage * bw_percent) / 100;
+
+		if (link_percentage && link_percentage < min_percent)
+			min_percent = link_percentage;
+	}
+
+	/*
+	 * The ratio between traffic classes will control the bandwidth
+	 * percentages seen on the wire. To calculate this ratio we use
+	 * a multiplier. It is required that the refill credits must be
+	 * larger than the max frame size so here we find the smallest
+	 * multiplier that will allow all bandwidth percentages to be
+	 * greater than the max frame size.
+	 */
+	min_multiplier = (min_credit / min_percent) + 1;
+
+	/* Find out the link percentage for each TC first */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		p = &dcb_config->tc_config[i].path[direction];
+		bw_percent = dcb_config->bw_percentage[direction][p->bwg_id];
+
+		link_percentage = p->bwg_percent;
+		/* Must be careful of integer division for very small nums */
+		link_percentage = (link_percentage * bw_percent) / 100;
+		if (p->bwg_percent > 0 && link_percentage == 0)
+			link_percentage = 1;
+
+		/* Save link_percentage for reference */
+		p->link_percent = (u8)link_percentage;
+
+		/* Calculate credit refill ratio using multiplier */
+		credit_refill = min(link_percentage * min_multiplier,
+				    MAX_CREDIT_REFILL);
+
+		/* Refill at least minimum credit */
+		if (credit_refill < min_credit)
+			credit_refill = min_credit;
+
+		p->data_credits_refill = (u16)credit_refill;
+
+		/* Calculate maximum credit for the TC */
+		credit_max = (link_percentage * MAX_CREDIT) / 100;
+
+		/*
+		 * Adjustment based on rule checking, if the percentage
+		 * of a TC is too small, the maximum credit may not be
+		 * enough to send out a jumbo frame in data plane arbitration.
+		 */
+		if (credit_max < min_credit)
+			credit_max = min_credit;
+
+		if (direction == DCB_TX_CONFIG) {
+			/*
+			 * Adjustment based on rule checking, if the
+			 * percentage of a TC is too small, the maximum
+			 * credit may not be enough to send out a TSO
+			 * packet in descriptor plane arbitration.
+			 */
+			if ((hw->mac.type == ixgbe_mac_82598EB) &&
+			    credit_max &&
+			    (credit_max < MINIMUM_CREDIT_FOR_TSO))
+				credit_max = MINIMUM_CREDIT_FOR_TSO;
+
+			dcb_config->tc_config[i].desc_credits_max =
+				(u16)credit_max;
+		}
+
+		p->data_credits_max = (u16)credit_max;
+	}
+
+	return 0;
+}
+
+void ixgbe_dcb_unpack_pfc(struct ixgbe_dcb_config *cfg, u8 *pfc_en)
+{
+	struct tc_configuration *tc_config = &cfg->tc_config[0];
+	int tc;
+
+	for (*pfc_en = 0, tc = 0; tc < MAX_TRAFFIC_CLASS; tc++) {
+		if (tc_config[tc].dcb_pfc != pfc_disabled)
+			*pfc_en |= BIT(tc);
+	}
+}
+
+void ixgbe_dcb_unpack_refill(struct ixgbe_dcb_config *cfg, int direction,
+			     u16 *refill)
+{
+	struct tc_configuration *tc_config = &cfg->tc_config[0];
+	int tc;
+
+	for (tc = 0; tc < MAX_TRAFFIC_CLASS; tc++)
+		refill[tc] = tc_config[tc].path[direction].data_credits_refill;
+}
+
+void ixgbe_dcb_unpack_max(struct ixgbe_dcb_config *cfg, u16 *max)
+{
+	struct tc_configuration *tc_config = &cfg->tc_config[0];
+	int tc;
+
+	for (tc = 0; tc < MAX_TRAFFIC_CLASS; tc++)
+		max[tc] = tc_config[tc].desc_credits_max;
+}
+
+void ixgbe_dcb_unpack_bwgid(struct ixgbe_dcb_config *cfg, int direction,
+			    u8 *bwgid)
+{
+	struct tc_configuration *tc_config = &cfg->tc_config[0];
+	int tc;
+
+	for (tc = 0; tc < MAX_TRAFFIC_CLASS; tc++)
+		bwgid[tc] = tc_config[tc].path[direction].bwg_id;
+}
+
+void ixgbe_dcb_unpack_prio(struct ixgbe_dcb_config *cfg, int direction,
+			    u8 *ptype)
+{
+	struct tc_configuration *tc_config = &cfg->tc_config[0];
+	int tc;
+
+	for (tc = 0; tc < MAX_TRAFFIC_CLASS; tc++)
+		ptype[tc] = tc_config[tc].path[direction].prio_type;
+}
+
+u8 ixgbe_dcb_get_tc_from_up(struct ixgbe_dcb_config *cfg, int direction, u8 up)
+{
+	struct tc_configuration *tc_config = &cfg->tc_config[0];
+	u8 prio_mask = BIT(up);
+	u8 tc = cfg->num_tcs.pg_tcs;
+
+	/* If tc is 0 then DCB is likely not enabled or supported */
+	if (!tc)
+		return 0;
+
+	/*
+	 * Test from maximum TC to 1 and report the first match we find.  If
+	 * we find no match we can assume that the TC is 0 since the TC must
+	 * be set for all user priorities
+	 */
+	for (tc--; tc; tc--) {
+		if (prio_mask & tc_config[tc].path[direction].up_to_tc_bitmap)
+			break;
+	}
+
+	return tc;
+}
+
+void ixgbe_dcb_unpack_map(struct ixgbe_dcb_config *cfg, int direction, u8 *map)
+{
+	u8 up;
+
+	for (up = 0; up < MAX_USER_PRIORITY; up++)
+		map[up] = ixgbe_dcb_get_tc_from_up(cfg, direction, up);
+}
+
+/**
+ * ixgbe_dcb_hw_config - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @dcb_config: pointer to ixgbe_dcb_config structure
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw,
+			struct ixgbe_dcb_config *dcb_config)
+{
+	u8 pfc_en;
+	u8 ptype[MAX_TRAFFIC_CLASS];
+	u8 bwgid[MAX_TRAFFIC_CLASS];
+	u8 prio_tc[MAX_TRAFFIC_CLASS];
+	u16 refill[MAX_TRAFFIC_CLASS];
+	u16 max[MAX_TRAFFIC_CLASS];
+
+	/* Unpack CEE standard containers */
+	ixgbe_dcb_unpack_pfc(dcb_config, &pfc_en);
+	ixgbe_dcb_unpack_refill(dcb_config, DCB_TX_CONFIG, refill);
+	ixgbe_dcb_unpack_max(dcb_config, max);
+	ixgbe_dcb_unpack_bwgid(dcb_config, DCB_TX_CONFIG, bwgid);
+	ixgbe_dcb_unpack_prio(dcb_config, DCB_TX_CONFIG, ptype);
+	ixgbe_dcb_unpack_map(dcb_config, DCB_TX_CONFIG, prio_tc);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		return ixgbe_dcb_hw_config_82598(hw, pfc_en, refill, max,
+						 bwgid, ptype);
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		return ixgbe_dcb_hw_config_82599(hw, pfc_en, refill, max,
+						 bwgid, ptype, prio_tc);
+	default:
+		break;
+	}
+	return 0;
+}
+
+/* Helper routines to abstract HW specifics from DCB netlink ops */
+s32 ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
+{
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		return ixgbe_dcb_config_pfc_82598(hw, pfc_en);
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		return ixgbe_dcb_config_pfc_82599(hw, pfc_en, prio_tc);
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame)
+{
+	__u16 refill[IEEE_8021QAZ_MAX_TCS], max[IEEE_8021QAZ_MAX_TCS];
+	__u8 prio_type[IEEE_8021QAZ_MAX_TCS];
+	int i;
+
+	/* naively give each TC a bwg to map onto CEE hardware */
+	__u8 bwg_id[IEEE_8021QAZ_MAX_TCS] = {0, 1, 2, 3, 4, 5, 6, 7};
+
+	/* Map TSA onto CEE prio type */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		switch (ets->tc_tsa[i]) {
+		case IEEE_8021QAZ_TSA_STRICT:
+			prio_type[i] = 2;
+			break;
+		case IEEE_8021QAZ_TSA_ETS:
+			prio_type[i] = 0;
+			break;
+		default:
+			/* Hardware only supports priority strict or
+			 * ETS transmission selection algorithms if
+			 * we receive some other value from dcbnl
+			 * throw an error
+			 */
+			return -EINVAL;
+		}
+	}
+
+	ixgbe_ieee_credits(ets->tc_tx_bw, refill, max, max_frame);
+	return ixgbe_dcb_hw_ets_config(hw, refill, max,
+				       bwg_id, prio_type, ets->prio_tc);
+}
+
+s32 ixgbe_dcb_hw_ets_config(struct ixgbe_hw *hw,
+			    u16 *refill, u16 *max, u8 *bwg_id,
+			    u8 *prio_type, u8 *prio_tc)
+{
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max,
+							prio_type);
+		ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max,
+							     bwg_id, prio_type);
+		ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max,
+							     bwg_id, prio_type);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max,
+						  bwg_id, prio_type, prio_tc);
+		ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max,
+						       bwg_id, prio_type);
+		ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max, bwg_id,
+						       prio_type, prio_tc);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static void ixgbe_dcb_read_rtrup2tc_82599(struct ixgbe_hw *hw, u8 *map)
+{
+	u32 reg, i;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RTRUP2TC);
+	for (i = 0; i < MAX_USER_PRIORITY; i++)
+		map[i] = IXGBE_RTRUP2TC_UP_MASK &
+			(reg >> (i * IXGBE_RTRUP2TC_UP_SHIFT));
+}
+
+void ixgbe_dcb_read_rtrup2tc(struct ixgbe_hw *hw, u8 *map)
+{
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		ixgbe_dcb_read_rtrup2tc_82599(hw, map);
+		break;
+	default:
+		break;
+	}
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
new file mode 100644
index 0000000000..60cd5863bf
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _DCB_CONFIG_H_
+#define _DCB_CONFIG_H_
+
+#include <linux/dcbnl.h>
+#include "ixgbe_type.h"
+
+/* DCB data structures */
+
+#define IXGBE_MAX_PACKET_BUFFERS 8
+#define MAX_USER_PRIORITY        8
+#define MAX_BW_GROUP             8
+#define BW_PERCENT               100
+
+#define DCB_TX_CONFIG            0
+#define DCB_RX_CONFIG            1
+
+/* DCB error Codes */
+#define DCB_SUCCESS              0
+#define DCB_ERR_CONFIG           -1
+#define DCB_ERR_PARAM            -2
+
+/* Transmit and receive Errors */
+/* Error in bandwidth group allocation */
+#define DCB_ERR_BW_GROUP        -3
+/* Error in traffic class bandwidth allocation */
+#define DCB_ERR_TC_BW           -4
+/* Traffic class has both link strict and group strict enabled */
+#define DCB_ERR_LS_GS           -5
+/* Link strict traffic class has non zero bandwidth */
+#define DCB_ERR_LS_BW_NONZERO   -6
+/* Link strict bandwidth group has non zero bandwidth */
+#define DCB_ERR_LS_BWG_NONZERO  -7
+/*  Traffic class has zero bandwidth */
+#define DCB_ERR_TC_BW_ZERO      -8
+
+#define DCB_NOT_IMPLEMENTED      0x7FFFFFFF
+
+struct dcb_pfc_tc_debug {
+	u8  tc;
+	u8  pause_status;
+	u64 pause_quanta;
+};
+
+enum strict_prio_type {
+	prio_none = 0,
+	prio_group,
+	prio_link
+};
+
+/* DCB capability definitions */
+#define IXGBE_DCB_PG_SUPPORT        0x00000001
+#define IXGBE_DCB_PFC_SUPPORT       0x00000002
+#define IXGBE_DCB_BCN_SUPPORT       0x00000004
+#define IXGBE_DCB_UP2TC_SUPPORT     0x00000008
+#define IXGBE_DCB_GSP_SUPPORT       0x00000010
+
+#define IXGBE_DCB_8_TC_SUPPORT      0x80
+
+struct dcb_support {
+	/* DCB capabilities */
+	u32 capabilities;
+
+	/* Each bit represents a number of TCs configurable in the hw.
+	 * If 8 traffic classes can be configured, the value is 0x80.
+	 */
+	u8  traffic_classes;
+	u8  pfc_traffic_classes;
+};
+
+/* Traffic class bandwidth allocation per direction */
+struct tc_bw_alloc {
+	u8 bwg_id;		  /* Bandwidth Group (BWG) ID */
+	u8 bwg_percent;		  /* % of BWG's bandwidth */
+	u8 link_percent;	  /* % of link bandwidth */
+	u8 up_to_tc_bitmap;	  /* User Priority to Traffic Class mapping */
+	u16 data_credits_refill;  /* Credit refill amount in 64B granularity */
+	u16 data_credits_max;	  /* Max credits for a configured packet buffer
+				   * in 64B granularity.*/
+	enum strict_prio_type prio_type; /* Link or Group Strict Priority */
+};
+
+enum dcb_pfc_type {
+	pfc_disabled = 0,
+	pfc_enabled_full,
+	pfc_enabled_tx,
+	pfc_enabled_rx
+};
+
+/* Traffic class configuration */
+struct tc_configuration {
+	struct tc_bw_alloc path[2]; /* One each for Tx/Rx */
+	enum dcb_pfc_type  dcb_pfc; /* Class based flow control setting */
+
+	u16 desc_credits_max; /* For Tx Descriptor arbitration */
+	u8 tc; /* Traffic class (TC) */
+};
+
+struct dcb_num_tcs {
+	u8 pg_tcs;
+	u8 pfc_tcs;
+};
+
+struct ixgbe_dcb_config {
+	struct dcb_support support;
+	struct dcb_num_tcs num_tcs;
+	struct tc_configuration tc_config[MAX_TRAFFIC_CLASS];
+	u8     bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */
+	bool   pfc_mode_enable;
+
+	u32  dcb_cfg_version; /* Not used...OS-specific? */
+	u32  link_speed; /* For bandwidth allocation validation purpose */
+};
+
+/* DCB driver APIs */
+void ixgbe_dcb_unpack_pfc(struct ixgbe_dcb_config *cfg, u8 *pfc_en);
+void ixgbe_dcb_unpack_refill(struct ixgbe_dcb_config *, int, u16 *);
+void ixgbe_dcb_unpack_max(struct ixgbe_dcb_config *, u16 *);
+void ixgbe_dcb_unpack_bwgid(struct ixgbe_dcb_config *, int, u8 *);
+void ixgbe_dcb_unpack_prio(struct ixgbe_dcb_config *, int, u8 *);
+void ixgbe_dcb_unpack_map(struct ixgbe_dcb_config *, int, u8 *);
+u8 ixgbe_dcb_get_tc_from_up(struct ixgbe_dcb_config *, int, u8);
+
+/* DCB credits calculation */
+s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_hw *,
+				   struct ixgbe_dcb_config *, int, u8);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max);
+s32 ixgbe_dcb_hw_ets_config(struct ixgbe_hw *hw, u16 *refill, u16 *max,
+			    u8 *bwg_id, u8 *prio_type, u8 *tc_prio);
+s32 ixgbe_dcb_hw_pfc_config(struct ixgbe_hw *hw, u8 pfc_en, u8 *tc_prio);
+s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *);
+
+void ixgbe_dcb_read_rtrup2tc(struct ixgbe_hw *hw, u8 *map);
+
+/* DCB definitions for credit calculation */
+#define DCB_CREDIT_QUANTUM	64   /* DCB Quantum */
+#define MAX_CREDIT_REFILL       511  /* 0x1FF * 64B = 32704B */
+#define DCB_MAX_TSO_SIZE        (32*1024) /* MAX TSO packet size supported in DCB mode */
+#define MINIMUM_CREDIT_FOR_TSO  (DCB_MAX_TSO_SIZE/64 + 1) /* 513 for 32KB TSO packet */
+#define MAX_CREDIT              4095 /* Maximum credit supported: 256KB * 1204 / 64B */
+
+#endif /* _DCB_CONFIG_H */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
new file mode 100644
index 0000000000..379ae747cd
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82598.h"
+
+/**
+ * ixgbe_dcb_config_rx_arbiter_82598 - Config Rx data arbiter
+ * @hw: pointer to hardware structure
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @prio_type: priority type indexed by traffic class
+ *
+ * Configure Rx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
+					u16 *refill,
+					u16 *max,
+					u8 *prio_type)
+{
+	u32    reg           = 0;
+	u32    credit_refill = 0;
+	u32    credit_max    = 0;
+	u8     i             = 0;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RUPPBMR) | IXGBE_RUPPBMR_MQA;
+	IXGBE_WRITE_REG(hw, IXGBE_RUPPBMR, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	/* Enable Arbiter */
+	reg &= ~IXGBE_RMCS_ARBDIS;
+	/* Enable Receive Recycle within the BWG */
+	reg |= IXGBE_RMCS_RRM;
+	/* Enable Deficit Fixed Priority arbitration*/
+	reg |= IXGBE_RMCS_DFP;
+
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		credit_refill = refill[i];
+		credit_max    = max[i];
+
+		reg = credit_refill | (credit_max << IXGBE_RT2CR_MCL_SHIFT);
+
+		if (prio_type[i] == prio_link)
+			reg |= IXGBE_RT2CR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RT2CR(i), reg);
+	}
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+	reg |= IXGBE_RDRXCTL_RDMTS_1_2;
+	reg |= IXGBE_RDRXCTL_MPBEN;
+	reg |= IXGBE_RDRXCTL_MCEN;
+	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	/* Make sure there is enough descriptors before arbitration */
+	reg &= ~IXGBE_RXCTRL_DMBYPS;
+	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter_82598 - Config Tx Desc. arbiter
+ * @hw: pointer to hardware structure
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @bwg_id: bandwidth grouping indexed by traffic class
+ * @prio_type: priority type indexed by traffic class
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
+						u16 *refill,
+						u16 *max,
+						u8 *bwg_id,
+						u8 *prio_type)
+{
+	u32    reg, max_credits;
+	u8     i;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_DPMCS);
+
+	/* Enable arbiter */
+	reg &= ~IXGBE_DPMCS_ARBDIS;
+	reg |= IXGBE_DPMCS_TSOEF;
+
+	/* Configure Max TSO packet size 34KB including payload and headers */
+	reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT);
+
+	IXGBE_WRITE_REG(hw, IXGBE_DPMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		max_credits = max[i];
+		reg = max_credits << IXGBE_TDTQ2TCCR_MCL_SHIFT;
+		reg |= refill[i];
+		reg |= (u32)(bwg_id[i]) << IXGBE_TDTQ2TCCR_BWG_SHIFT;
+
+		if (prio_type[i] == prio_group)
+			reg |= IXGBE_TDTQ2TCCR_GSP;
+
+		if (prio_type[i] == prio_link)
+			reg |= IXGBE_TDTQ2TCCR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TDTQ2TCCR(i), reg);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter_82598 - Config Tx data arbiter
+ * @hw: pointer to hardware structure
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @bwg_id: bandwidth grouping indexed by traffic class
+ * @prio_type: priority type indexed by traffic class
+ *
+ * Configure Tx Data Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
+						u16 *refill,
+						u16 *max,
+						u8 *bwg_id,
+						u8 *prio_type)
+{
+	u32 reg;
+	u8 i;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_PDPMCS);
+	/* Enable Data Plane Arbiter */
+	reg &= ~IXGBE_PDPMCS_ARBDIS;
+	/* Enable DFP and Transmit Recycle Mode */
+	reg |= (IXGBE_PDPMCS_TPPAC | IXGBE_PDPMCS_TRM);
+
+	IXGBE_WRITE_REG(hw, IXGBE_PDPMCS, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		reg = refill[i];
+		reg |= (u32)(max[i]) << IXGBE_TDPT2TCCR_MCL_SHIFT;
+		reg |= (u32)(bwg_id[i]) << IXGBE_TDPT2TCCR_BWG_SHIFT;
+
+		if (prio_type[i] == prio_group)
+			reg |= IXGBE_TDPT2TCCR_GSP;
+
+		if (prio_type[i] == prio_link)
+			reg |= IXGBE_TDPT2TCCR_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TDPT2TCCR(i), reg);
+	}
+
+	/* Enable Tx packet buffer division */
+	reg = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
+	reg |= IXGBE_DTXCTL_ENDBUBD;
+	IXGBE_WRITE_REG(hw, IXGBE_DTXCTL, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_pfc_82598 - Config priority flow control
+ * @hw: pointer to hardware structure
+ * @pfc_en: enabled pfc bitmask
+ *
+ * Configure Priority Flow Control for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en)
+{
+	u32 fcrtl, reg;
+	u8  i;
+
+	/* Enable Transmit Priority Flow Control */
+	reg = IXGBE_READ_REG(hw, IXGBE_RMCS);
+	reg &= ~IXGBE_RMCS_TFCE_802_3X;
+	reg |= IXGBE_RMCS_TFCE_PRIORITY;
+	IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg);
+
+	/* Enable Receive Priority Flow Control */
+	reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	reg &= ~(IXGBE_FCTRL_RPFCE | IXGBE_FCTRL_RFCE);
+
+	if (pfc_en)
+		reg |= IXGBE_FCTRL_RPFCE;
+
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg);
+
+	/* Configure PFC Tx thresholds per TC */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		if (!(pfc_en & BIT(i))) {
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0);
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0);
+			continue;
+		}
+
+		fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+		reg = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl);
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg);
+	}
+
+	/* Configure pause time */
+	reg = hw->fc.pause_time * 0x00010001;
+	for (i = 0; i < (MAX_TRAFFIC_CLASS / 2); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+	/* Configure flow control refresh threshold value */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats_82598 - Configure traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+static s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw)
+{
+	u32 reg = 0;
+	u8  i   = 0;
+	u8  j   = 0;
+
+	/* Receive Queues stats setting -  8 queues per statistics reg */
+	for (i = 0, j = 0; i < 15 && j < 8; i = i + 2, j++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i));
+		reg |= ((0x1010101) * j);
+		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg);
+		reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i + 1));
+		reg |= ((0x1010101) * j);
+		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i + 1), reg);
+	}
+	/* Transmit Queues stats setting -  4 queues per statistics reg */
+	for (i = 0; i < 8; i++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_TQSMR(i));
+		reg |= ((0x1010101) * i);
+		IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i), reg);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_hw_config_82598 - Config and enable DCB
+ * @hw: pointer to hardware structure
+ * @pfc_en: enabled pfc bitmask
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @bwg_id: bandwidth grouping indexed by traffic class
+ * @prio_type: priority type indexed by traffic class
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+			      u16 *max, u8 *bwg_id, u8 *prio_type)
+{
+	ixgbe_dcb_config_rx_arbiter_82598(hw, refill, max, prio_type);
+	ixgbe_dcb_config_tx_desc_arbiter_82598(hw, refill, max,
+					       bwg_id, prio_type);
+	ixgbe_dcb_config_tx_data_arbiter_82598(hw, refill, max,
+					       bwg_id, prio_type);
+	ixgbe_dcb_config_pfc_82598(hw, pfc_en);
+	ixgbe_dcb_config_tc_stats_82598(hw);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
new file mode 100644
index 0000000000..fdca41abb4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _DCB_82598_CONFIG_H_
+#define _DCB_82598_CONFIG_H_
+
+/* DCB register definitions */
+
+#define IXGBE_DPMCS_MTSOS_SHIFT 16
+#define IXGBE_DPMCS_TDPAC       0x00000001 /* 0 Round Robin, 1 DFP - Deficit Fixed Priority */
+#define IXGBE_DPMCS_TRM         0x00000010 /* Transmit Recycle Mode */
+#define IXGBE_DPMCS_ARBDIS      0x00000040 /* DCB arbiter disable */
+#define IXGBE_DPMCS_TSOEF       0x00080000 /* TSO Expand Factor: 0=x4, 1=x2 */
+
+#define IXGBE_RUPPBMR_MQA       0x80000000 /* Enable UP to queue mapping */
+
+#define IXGBE_RT2CR_MCL_SHIFT   12 /* Offset to Max Credit Limit setting */
+#define IXGBE_RT2CR_LSP         0x80000000 /* LSP enable bit */
+
+#define IXGBE_RDRXCTL_MPBEN     0x00000010 /* DMA config for multiple packet buffers enable */
+#define IXGBE_RDRXCTL_MCEN      0x00000040 /* DMA config for multiple cores (RSS) enable */
+
+#define IXGBE_TDTQ2TCCR_MCL_SHIFT   12
+#define IXGBE_TDTQ2TCCR_BWG_SHIFT   9
+#define IXGBE_TDTQ2TCCR_GSP     0x40000000
+#define IXGBE_TDTQ2TCCR_LSP     0x80000000
+
+#define IXGBE_TDPT2TCCR_MCL_SHIFT   12
+#define IXGBE_TDPT2TCCR_BWG_SHIFT   9
+#define IXGBE_TDPT2TCCR_GSP     0x40000000
+#define IXGBE_TDPT2TCCR_LSP     0x80000000
+
+#define IXGBE_PDPMCS_TPPAC      0x00000020 /* 0 Round Robin, 1 for DFP - Deficit Fixed Priority */
+#define IXGBE_PDPMCS_ARBDIS     0x00000040 /* Arbiter disable */
+#define IXGBE_PDPMCS_TRM        0x00000100 /* Transmit Recycle Mode enable */
+
+#define IXGBE_DTXCTL_ENDBUBD    0x00000004 /* Enable DBU buffer division */
+
+#define IXGBE_TXPBSIZE_40KB     0x0000A000 /* 40KB Packet Buffer */
+#define IXGBE_RXPBSIZE_48KB     0x0000C000 /* 48KB Packet Buffer */
+#define IXGBE_RXPBSIZE_64KB     0x00010000 /* 64KB Packet Buffer */
+#define IXGBE_RXPBSIZE_80KB     0x00014000 /* 80KB Packet Buffer */
+
+#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000
+
+/* DCB hardware-specific driver APIs */
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, u8 pfc_en);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw,
+					u16 *refill,
+					u16 *max,
+					u8 *prio_type);
+
+s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw,
+						u16 *refill,
+						u16 *max,
+						u8 *bwg_id,
+						u8 *prio_type);
+
+s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw,
+						u16 *refill,
+						u16 *max,
+						u8 *bwg_id,
+						u8 *prio_type);
+
+s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+			      u16 *max, u8 *bwg_id, u8 *prio_type);
+
+#endif /* _DCB_82598_CONFIG_H */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
new file mode 100644
index 0000000000..7948849840
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_dcb.h"
+#include "ixgbe_dcb_82599.h"
+
+/**
+ * ixgbe_dcb_config_rx_arbiter_82599 - Config Rx Data arbiter
+ * @hw: pointer to hardware structure
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @bwg_id: bandwidth grouping indexed by traffic class
+ * @prio_type: priority type indexed by traffic class
+ * @prio_tc: priority to tc assignments indexed by priority
+ *
+ * Configure Rx Packet Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
+				      u16 *refill,
+				      u16 *max,
+				      u8 *bwg_id,
+				      u8 *prio_type,
+				      u8 *prio_tc)
+{
+	u32    reg           = 0;
+	u32    credit_refill = 0;
+	u32    credit_max    = 0;
+	u8     i             = 0;
+
+	/*
+	 * Disable the arbiter before changing parameters
+	 * (always enable recycle mode; WSP)
+	 */
+	reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC | IXGBE_RTRPCS_ARBDIS;
+	IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
+
+	/* Map all traffic classes to their UP */
+	reg = 0;
+	for (i = 0; i < MAX_USER_PRIORITY; i++)
+		reg |= (prio_tc[i] << (i * IXGBE_RTRUP2TC_UP_SHIFT));
+	IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		credit_refill = refill[i];
+		credit_max    = max[i];
+		reg = credit_refill | (credit_max << IXGBE_RTRPT4C_MCL_SHIFT);
+
+		reg |= (u32)(bwg_id[i]) << IXGBE_RTRPT4C_BWG_SHIFT;
+
+		if (prio_type[i] == prio_link)
+			reg |= IXGBE_RTRPT4C_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RTRPT4C(i), reg);
+	}
+
+	/*
+	 * Configure Rx packet plane (recycle mode; WSP) and
+	 * enable arbiter
+	 */
+	reg = IXGBE_RTRPCS_RRM | IXGBE_RTRPCS_RAC;
+	IXGBE_WRITE_REG(hw, IXGBE_RTRPCS, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_desc_arbiter_82599 - Config Tx Desc. arbiter
+ * @hw: pointer to hardware structure
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @bwg_id: bandwidth grouping indexed by traffic class
+ * @prio_type: priority type indexed by traffic class
+ *
+ * Configure Tx Descriptor Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
+					   u16 *refill,
+					   u16 *max,
+					   u8 *bwg_id,
+					   u8 *prio_type)
+{
+	u32    reg, max_credits;
+	u8     i;
+
+	/* Clear the per-Tx queue credits; we use per-TC instead */
+	for (i = 0; i < 128; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i);
+		IXGBE_WRITE_REG(hw, IXGBE_RTTDT1C, 0);
+	}
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		max_credits = max[i];
+		reg = max_credits << IXGBE_RTTDT2C_MCL_SHIFT;
+		reg |= refill[i];
+		reg |= (u32)(bwg_id[i]) << IXGBE_RTTDT2C_BWG_SHIFT;
+
+		if (prio_type[i] == prio_group)
+			reg |= IXGBE_RTTDT2C_GSP;
+
+		if (prio_type[i] == prio_link)
+			reg |= IXGBE_RTTDT2C_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RTTDT2C(i), reg);
+	}
+
+	/*
+	 * Configure Tx descriptor plane (recycle mode; WSP) and
+	 * enable arbiter
+	 */
+	reg = IXGBE_RTTDCS_TDPAC | IXGBE_RTTDCS_TDRM;
+	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tx_data_arbiter_82599 - Config Tx Data arbiter
+ * @hw: pointer to hardware structure
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @bwg_id: bandwidth grouping indexed by traffic class
+ * @prio_type: priority type indexed by traffic class
+ * @prio_tc: priority to tc assignments indexed by priority
+ *
+ * Configure Tx Packet Arbiter and credits for each traffic class.
+ */
+s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
+					   u16 *refill,
+					   u16 *max,
+					   u8 *bwg_id,
+					   u8 *prio_type,
+					   u8 *prio_tc)
+{
+	u32 reg;
+	u8 i;
+
+	/*
+	 * Disable the arbiter before changing parameters
+	 * (always enable recycle mode; SP; arb delay)
+	 */
+	reg = IXGBE_RTTPCS_TPPAC | IXGBE_RTTPCS_TPRM |
+	      (IXGBE_RTTPCS_ARBD_DCB << IXGBE_RTTPCS_ARBD_SHIFT) |
+	      IXGBE_RTTPCS_ARBDIS;
+	IXGBE_WRITE_REG(hw, IXGBE_RTTPCS, reg);
+
+	/* Map all traffic classes to their UP */
+	reg = 0;
+	for (i = 0; i < MAX_USER_PRIORITY; i++)
+		reg |= (prio_tc[i] << (i * IXGBE_RTTUP2TC_UP_SHIFT));
+	IXGBE_WRITE_REG(hw, IXGBE_RTTUP2TC, reg);
+
+	/* Configure traffic class credits and priority */
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		reg = refill[i];
+		reg |= (u32)(max[i]) << IXGBE_RTTPT2C_MCL_SHIFT;
+		reg |= (u32)(bwg_id[i]) << IXGBE_RTTPT2C_BWG_SHIFT;
+
+		if (prio_type[i] == prio_group)
+			reg |= IXGBE_RTTPT2C_GSP;
+
+		if (prio_type[i] == prio_link)
+			reg |= IXGBE_RTTPT2C_LSP;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RTTPT2C(i), reg);
+	}
+
+	/*
+	 * Configure Tx packet plane (recycle mode; SP; arb delay) and
+	 * enable arbiter
+	 */
+	reg = IXGBE_RTTPCS_TPPAC | IXGBE_RTTPCS_TPRM |
+	      (IXGBE_RTTPCS_ARBD_DCB << IXGBE_RTTPCS_ARBD_SHIFT);
+	IXGBE_WRITE_REG(hw, IXGBE_RTTPCS, reg);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_pfc_82599 - Configure priority flow control
+ * @hw: pointer to hardware structure
+ * @pfc_en: enabled pfc bitmask
+ * @prio_tc: priority to tc assignments indexed by priority
+ *
+ * Configure Priority Flow Control (PFC) for each traffic class.
+ */
+s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc)
+{
+	u32 i, j, fcrtl, reg;
+	u8 max_tc = 0;
+
+	/* Enable Transmit Priority Flow Control */
+	IXGBE_WRITE_REG(hw, IXGBE_FCCFG, IXGBE_FCCFG_TFCE_PRIORITY);
+
+	/* Enable Receive Priority Flow Control */
+	reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+	reg |= IXGBE_MFLCN_DPF;
+
+	/*
+	 * X540 & X550 supports per TC Rx priority flow control.
+	 * So clear all TCs and only enable those that should be
+	 * enabled.
+	 */
+	reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE);
+
+	if (hw->mac.type >= ixgbe_mac_X540)
+		reg |= pfc_en << IXGBE_MFLCN_RPFCE_SHIFT;
+
+	if (pfc_en)
+		reg |= IXGBE_MFLCN_RPFCE;
+
+	IXGBE_WRITE_REG(hw, IXGBE_MFLCN, reg);
+
+	for (i = 0; i < MAX_USER_PRIORITY; i++) {
+		if (prio_tc[i] > max_tc)
+			max_tc = prio_tc[i];
+	}
+
+
+	/* Configure PFC Tx thresholds per TC */
+	for (i = 0; i <= max_tc; i++) {
+		int enabled = 0;
+
+		for (j = 0; j < MAX_USER_PRIORITY; j++) {
+			if ((prio_tc[j] == i) && (pfc_en & BIT(j))) {
+				enabled = 1;
+				break;
+			}
+		}
+
+		if (enabled) {
+			reg = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN;
+			fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE;
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl);
+		} else {
+			/* In order to prevent Tx hangs when the internal Tx
+			 * switch is enabled we must set the high water mark
+			 * to the Rx packet buffer size - 24KB.  This allows
+			 * the Tx switch to function even under heavy Rx
+			 * workloads.
+			 */
+			reg = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 24576;
+			IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0);
+		}
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), reg);
+	}
+
+	for (; i < MAX_TRAFFIC_CLASS; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), 0);
+	}
+
+	/* Configure pause time (2 TCs per register) */
+	reg = hw->fc.pause_time * 0x00010001;
+	for (i = 0; i < (MAX_TRAFFIC_CLASS / 2); i++)
+		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
+
+	/* Configure flow control refresh threshold value */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2);
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_config_tc_stats_82599 - Config traffic class statistics
+ * @hw: pointer to hardware structure
+ *
+ * Configure queue statistics registers, all queues belonging to same traffic
+ * class uses a single set of queue statistics counters.
+ */
+static s32 ixgbe_dcb_config_tc_stats_82599(struct ixgbe_hw *hw)
+{
+	u32 reg = 0;
+	u8  i   = 0;
+
+	/*
+	 * Receive Queues stats setting
+	 * 32 RQSMR registers, each configuring 4 queues.
+	 * Set all 16 queues of each TC to the same stat
+	 * with TC 'n' going to stat 'n'.
+	 */
+	for (i = 0; i < 32; i++) {
+		reg = 0x01010101 * (i / 4);
+		IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg);
+	}
+	/*
+	 * Transmit Queues stats setting
+	 * 32 TQSM registers, each controlling 4 queues.
+	 * Set all queues of each TC to the same stat
+	 * with TC 'n' going to stat 'n'.
+	 * Tx queues are allocated non-uniformly to TCs:
+	 * 32, 32, 16, 16, 8, 8, 8, 8.
+	 */
+	for (i = 0; i < 32; i++) {
+		if (i < 8)
+			reg = 0x00000000;
+		else if (i < 16)
+			reg = 0x01010101;
+		else if (i < 20)
+			reg = 0x02020202;
+		else if (i < 24)
+			reg = 0x03030303;
+		else if (i < 26)
+			reg = 0x04040404;
+		else if (i < 28)
+			reg = 0x05050505;
+		else if (i < 30)
+			reg = 0x06060606;
+		else
+			reg = 0x07070707;
+		IXGBE_WRITE_REG(hw, IXGBE_TQSM(i), reg);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_dcb_hw_config_82599 - Configure and enable DCB
+ * @hw: pointer to hardware structure
+ * @pfc_en: enabled pfc bitmask
+ * @refill: refill credits index by traffic class
+ * @max: max credits index by traffic class
+ * @bwg_id: bandwidth grouping indexed by traffic class
+ * @prio_type: priority type indexed by traffic class
+ * @prio_tc: priority to tc assignments indexed by priority
+ *
+ * Configure dcb settings and enable dcb mode.
+ */
+s32 ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+			      u16 *max, u8 *bwg_id, u8 *prio_type, u8 *prio_tc)
+{
+	ixgbe_dcb_config_rx_arbiter_82599(hw, refill, max, bwg_id,
+					  prio_type, prio_tc);
+	ixgbe_dcb_config_tx_desc_arbiter_82599(hw, refill, max,
+					       bwg_id, prio_type);
+	ixgbe_dcb_config_tx_data_arbiter_82599(hw, refill, max,
+					       bwg_id, prio_type, prio_tc);
+	ixgbe_dcb_config_pfc_82599(hw, pfc_en, prio_tc);
+	ixgbe_dcb_config_tc_stats_82599(hw);
+
+	return 0;
+}
+
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
new file mode 100644
index 0000000000..c6f084883c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _DCB_82599_CONFIG_H_
+#define _DCB_82599_CONFIG_H_
+
+/* DCB register definitions */
+#define IXGBE_RTTDCS_TDPAC      0x00000001 /* 0 Round Robin,
+					    * 1 WSP - Weighted Strict Priority
+					    */
+#define IXGBE_RTTDCS_VMPAC      0x00000002 /* 0 Round Robin,
+					    * 1 WRR - Weighted Round Robin
+					    */
+#define IXGBE_RTTDCS_TDRM       0x00000010 /* Transmit Recycle Mode */
+#define IXGBE_RTTDCS_ARBDIS     0x00000040 /* DCB arbiter disable */
+#define IXGBE_RTTDCS_BDPM       0x00400000 /* Bypass Data Pipe - must clear! */
+#define IXGBE_RTTDCS_BPBFSM     0x00800000 /* Bypass PB Free Space - must
+					     * clear!
+					     */
+#define IXGBE_RTTDCS_SPEED_CHG  0x80000000 /* Link speed change */
+
+/* Receive UP2TC mapping */
+#define IXGBE_RTRUP2TC_UP_SHIFT 3
+#define IXGBE_RTRUP2TC_UP_MASK	7
+/* Transmit UP2TC mapping */
+#define IXGBE_RTTUP2TC_UP_SHIFT 3
+
+#define IXGBE_RTRPT4C_MCL_SHIFT 12 /* Offset to Max Credit Limit setting */
+#define IXGBE_RTRPT4C_BWG_SHIFT 9  /* Offset to BWG index */
+#define IXGBE_RTRPT4C_GSP       0x40000000 /* GSP enable bit */
+#define IXGBE_RTRPT4C_LSP       0x80000000 /* LSP enable bit */
+
+#define IXGBE_RDRXCTL_MPBEN     0x00000010 /* DMA config for multiple packet
+					    * buffers enable
+					    */
+#define IXGBE_RDRXCTL_MCEN      0x00000040 /* DMA config for multiple cores
+					    * (RSS) enable
+					    */
+
+/* RTRPCS Bit Masks */
+#define IXGBE_RTRPCS_RRM        0x00000002 /* Receive Recycle Mode enable */
+/* Receive Arbitration Control: 0 Round Robin, 1 DFP */
+#define IXGBE_RTRPCS_RAC        0x00000004
+#define IXGBE_RTRPCS_ARBDIS     0x00000040 /* Arbitration disable bit */
+
+/* RTTDT2C Bit Masks */
+#define IXGBE_RTTDT2C_MCL_SHIFT 12
+#define IXGBE_RTTDT2C_BWG_SHIFT 9
+#define IXGBE_RTTDT2C_GSP       0x40000000
+#define IXGBE_RTTDT2C_LSP       0x80000000
+
+#define IXGBE_RTTPT2C_MCL_SHIFT 12
+#define IXGBE_RTTPT2C_BWG_SHIFT 9
+#define IXGBE_RTTPT2C_GSP       0x40000000
+#define IXGBE_RTTPT2C_LSP       0x80000000
+
+/* RTTPCS Bit Masks */
+#define IXGBE_RTTPCS_TPPAC      0x00000020 /* 0 Round Robin,
+					    * 1 SP - Strict Priority
+					    */
+#define IXGBE_RTTPCS_ARBDIS     0x00000040 /* Arbiter disable */
+#define IXGBE_RTTPCS_TPRM       0x00000100 /* Transmit Recycle Mode enable */
+#define IXGBE_RTTPCS_ARBD_SHIFT 22
+#define IXGBE_RTTPCS_ARBD_DCB   0x4        /* Arbitration delay in DCB mode */
+
+/* SECTXMINIFG DCB */
+#define IXGBE_SECTX_DCB		0x00001F00 /* DCB TX Buffer IFG */
+
+
+/* DCB hardware-specific driver APIs */
+
+/* DCB PFC functions */
+s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en, u8 *prio_tc);
+
+/* DCB hw initialization */
+s32 ixgbe_dcb_config_rx_arbiter_82599(struct ixgbe_hw *hw,
+					u16 *refill,
+					u16 *max,
+					u8 *bwg_id,
+					u8 *prio_type,
+					u8 *prio_tc);
+
+s32 ixgbe_dcb_config_tx_desc_arbiter_82599(struct ixgbe_hw *hw,
+						u16 *refill,
+						u16 *max,
+						u8 *bwg_id,
+						u8 *prio_type);
+
+s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw,
+						u16 *refill,
+						u16 *max,
+						u8 *bwg_id,
+						u8 *prio_type,
+						u8 *prio_tc);
+
+s32 ixgbe_dcb_hw_config_82599(struct ixgbe_hw *hw, u8 pfc_en, u16 *refill,
+			      u16 *max, u8 *bwg_id, u8 *prio_type,
+			      u8 *prio_tc);
+
+#endif /* _DCB_82599_CONFIG_H */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
new file mode 100644
index 0000000000..e85f7d2e88
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -0,0 +1,782 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include <linux/dcbnl.h>
+#include "ixgbe_dcb_82598.h"
+#include "ixgbe_dcb_82599.h"
+#include "ixgbe_sriov.h"
+
+/* Callbacks for DCB netlink in the kernel */
+#define BIT_PFC		0x02
+#define BIT_PG_RX	0x04
+#define BIT_PG_TX	0x08
+#define BIT_APP_UPCHG	0x10
+
+/* Responses for the DCB_C_SET_ALL command */
+#define DCB_HW_CHG_RST  0  /* DCB configuration changed with reset */
+#define DCB_NO_HW_CHG   1  /* DCB configuration did not change */
+#define DCB_HW_CHG      2  /* DCB configuration changed, no reset */
+
+static int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max)
+{
+	struct ixgbe_dcb_config *scfg = &adapter->temp_dcb_cfg;
+	struct ixgbe_dcb_config *dcfg = &adapter->dcb_cfg;
+	struct tc_configuration *src = NULL;
+	struct tc_configuration *dst = NULL;
+	int i, j;
+	int tx = DCB_TX_CONFIG;
+	int rx = DCB_RX_CONFIG;
+	int changes = 0;
+#ifdef IXGBE_FCOE
+	struct dcb_app app = {
+			      .selector = DCB_APP_IDTYPE_ETHTYPE,
+			      .protocol = ETH_P_FCOE,
+			     };
+	u8 up = dcb_getapp(adapter->netdev, &app);
+
+	if (up && !(up & BIT(adapter->fcoe.up)))
+		changes |= BIT_APP_UPCHG;
+#endif
+
+	for (i = DCB_PG_ATTR_TC_0; i < tc_max + DCB_PG_ATTR_TC_0; i++) {
+		src = &scfg->tc_config[i - DCB_PG_ATTR_TC_0];
+		dst = &dcfg->tc_config[i - DCB_PG_ATTR_TC_0];
+
+		if (dst->path[tx].prio_type != src->path[tx].prio_type) {
+			dst->path[tx].prio_type = src->path[tx].prio_type;
+			changes |= BIT_PG_TX;
+		}
+
+		if (dst->path[tx].bwg_id != src->path[tx].bwg_id) {
+			dst->path[tx].bwg_id = src->path[tx].bwg_id;
+			changes |= BIT_PG_TX;
+		}
+
+		if (dst->path[tx].bwg_percent != src->path[tx].bwg_percent) {
+			dst->path[tx].bwg_percent = src->path[tx].bwg_percent;
+			changes |= BIT_PG_TX;
+		}
+
+		if (dst->path[tx].up_to_tc_bitmap !=
+				src->path[tx].up_to_tc_bitmap) {
+			dst->path[tx].up_to_tc_bitmap =
+				src->path[tx].up_to_tc_bitmap;
+			changes |= (BIT_PG_TX | BIT_PFC | BIT_APP_UPCHG);
+		}
+
+		if (dst->path[rx].prio_type != src->path[rx].prio_type) {
+			dst->path[rx].prio_type = src->path[rx].prio_type;
+			changes |= BIT_PG_RX;
+		}
+
+		if (dst->path[rx].bwg_id != src->path[rx].bwg_id) {
+			dst->path[rx].bwg_id = src->path[rx].bwg_id;
+			changes |= BIT_PG_RX;
+		}
+
+		if (dst->path[rx].bwg_percent != src->path[rx].bwg_percent) {
+			dst->path[rx].bwg_percent = src->path[rx].bwg_percent;
+			changes |= BIT_PG_RX;
+		}
+
+		if (dst->path[rx].up_to_tc_bitmap !=
+				src->path[rx].up_to_tc_bitmap) {
+			dst->path[rx].up_to_tc_bitmap =
+				src->path[rx].up_to_tc_bitmap;
+			changes |= (BIT_PG_RX | BIT_PFC | BIT_APP_UPCHG);
+		}
+	}
+
+	for (i = DCB_PG_ATTR_BW_ID_0; i < DCB_PG_ATTR_BW_ID_MAX; i++) {
+		j = i - DCB_PG_ATTR_BW_ID_0;
+		if (dcfg->bw_percentage[tx][j] != scfg->bw_percentage[tx][j]) {
+			dcfg->bw_percentage[tx][j] = scfg->bw_percentage[tx][j];
+			changes |= BIT_PG_TX;
+		}
+		if (dcfg->bw_percentage[rx][j] != scfg->bw_percentage[rx][j]) {
+			dcfg->bw_percentage[rx][j] = scfg->bw_percentage[rx][j];
+			changes |= BIT_PG_RX;
+		}
+	}
+
+	for (i = DCB_PFC_UP_ATTR_0; i < DCB_PFC_UP_ATTR_MAX; i++) {
+		j = i - DCB_PFC_UP_ATTR_0;
+		if (dcfg->tc_config[j].dcb_pfc != scfg->tc_config[j].dcb_pfc) {
+			dcfg->tc_config[j].dcb_pfc = scfg->tc_config[j].dcb_pfc;
+			changes |= BIT_PFC;
+		}
+	}
+
+	if (dcfg->pfc_mode_enable != scfg->pfc_mode_enable) {
+		dcfg->pfc_mode_enable = scfg->pfc_mode_enable;
+		changes |= BIT_PFC;
+	}
+
+	return changes;
+}
+
+static u8 ixgbe_dcbnl_get_state(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
+}
+
+static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	/* Fail command if not in CEE mode */
+	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return 1;
+
+	/* verify there is something to do, if not then exit */
+	if (!state == !(adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+		return 0;
+
+	return !!ixgbe_setup_tc(netdev,
+				state ? adapter->dcb_cfg.num_tcs.pg_tcs : 0);
+}
+
+static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev,
+					 u8 *perm_addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int i, j;
+
+	memset(perm_addr, 0xff, MAX_ADDR_LEN);
+
+	for (i = 0; i < netdev->addr_len; i++)
+		perm_addr[i] = adapter->hw.mac.perm_addr[i];
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+		for (j = 0; j < netdev->addr_len; j++, i++)
+			perm_addr[i] = adapter->hw.mac.san_addr[j];
+		break;
+	default:
+		break;
+	}
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+					 u8 prio, u8 bwg_id, u8 bw_pct,
+					 u8 up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (prio != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio;
+	if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id = bwg_id;
+	if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent =
+			bw_pct;
+	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap =
+			up_map;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+					  u8 bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct;
+}
+
+static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+					 u8 prio, u8 bwg_id, u8 bw_pct,
+					 u8 up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (prio != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio;
+	if (bwg_id != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id = bwg_id;
+	if (bw_pct != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent =
+			bw_pct;
+	if (up_map != DCB_ATTR_VALUE_UNDEFINED)
+		adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap =
+			up_map;
+}
+
+static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+					  u8 bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct;
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
+					 u8 *prio, u8 *bwg_id, u8 *bw_pct,
+					 u8 *up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type;
+	*bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id;
+	*bw_pct = adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent;
+	*up_map = adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
+					  u8 *bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id];
+}
+
+static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc,
+					 u8 *prio, u8 *bwg_id, u8 *bw_pct,
+					 u8 *up_map)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type;
+	*bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id;
+	*bw_pct = adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent;
+	*up_map = adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap;
+}
+
+static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
+					  u8 *bw_pct)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id];
+}
+
+static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
+				    u8 setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting;
+	if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc !=
+	    adapter->dcb_cfg.tc_config[priority].dcb_pfc)
+		adapter->temp_dcb_cfg.pfc_mode_enable = true;
+}
+
+static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
+				    u8 *setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	*setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc;
+}
+
+static void ixgbe_dcbnl_devreset(struct net_device *dev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (netif_running(dev))
+		dev->netdev_ops->ndo_stop(dev);
+
+	ixgbe_clear_interrupt_scheme(adapter);
+	ixgbe_init_interrupt_scheme(adapter);
+
+	if (netif_running(dev))
+		dev->netdev_ops->ndo_open(dev);
+
+	clear_bit(__IXGBE_RESETTING, &adapter->state);
+}
+
+static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_dcb_config *dcb_cfg = &adapter->dcb_cfg;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int ret = DCB_NO_HW_CHG;
+	int i;
+
+	/* Fail command if not in CEE mode */
+	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return DCB_NO_HW_CHG;
+
+	adapter->dcb_set_bitmap |= ixgbe_copy_dcb_cfg(adapter,
+						      MAX_TRAFFIC_CLASS);
+	if (!adapter->dcb_set_bitmap)
+		return DCB_NO_HW_CHG;
+
+	if (adapter->dcb_set_bitmap & (BIT_PG_TX|BIT_PG_RX)) {
+		u16 refill[MAX_TRAFFIC_CLASS], max[MAX_TRAFFIC_CLASS];
+		u8 bwg_id[MAX_TRAFFIC_CLASS], prio_type[MAX_TRAFFIC_CLASS];
+		/* Priority to TC mapping in CEE case default to 1:1 */
+		u8 prio_tc[MAX_USER_PRIORITY];
+		int max_frame = adapter->netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+#ifdef IXGBE_FCOE
+		if (adapter->netdev->features & NETIF_F_FCOE_MTU)
+			max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
+#endif
+
+		ixgbe_dcb_calculate_tc_credits(hw, dcb_cfg, max_frame,
+					       DCB_TX_CONFIG);
+		ixgbe_dcb_calculate_tc_credits(hw, dcb_cfg, max_frame,
+					       DCB_RX_CONFIG);
+
+		ixgbe_dcb_unpack_refill(dcb_cfg, DCB_TX_CONFIG, refill);
+		ixgbe_dcb_unpack_max(dcb_cfg, max);
+		ixgbe_dcb_unpack_bwgid(dcb_cfg, DCB_TX_CONFIG, bwg_id);
+		ixgbe_dcb_unpack_prio(dcb_cfg, DCB_TX_CONFIG, prio_type);
+		ixgbe_dcb_unpack_map(dcb_cfg, DCB_TX_CONFIG, prio_tc);
+
+		ixgbe_dcb_hw_ets_config(hw, refill, max, bwg_id,
+					prio_type, prio_tc);
+
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+			netdev_set_prio_tc_map(netdev, i, prio_tc[i]);
+
+		ret = DCB_HW_CHG_RST;
+	}
+
+	if (adapter->dcb_set_bitmap & BIT_PFC) {
+		if (dcb_cfg->pfc_mode_enable) {
+			u8 pfc_en;
+			u8 prio_tc[MAX_USER_PRIORITY];
+
+			ixgbe_dcb_unpack_map(dcb_cfg, DCB_TX_CONFIG, prio_tc);
+			ixgbe_dcb_unpack_pfc(dcb_cfg, &pfc_en);
+			ixgbe_dcb_hw_pfc_config(hw, pfc_en, prio_tc);
+		} else {
+			hw->mac.ops.fc_enable(hw);
+		}
+
+		ixgbe_set_rx_drop_en(adapter);
+
+		ret = DCB_HW_CHG;
+	}
+
+#ifdef IXGBE_FCOE
+	/* Reprogram FCoE hardware offloads when the traffic class
+	 * FCoE is using changes. This happens if the APP info
+	 * changes or the up2tc mapping is updated.
+	 */
+	if (adapter->dcb_set_bitmap & BIT_APP_UPCHG) {
+		struct dcb_app app = {
+				      .selector = DCB_APP_IDTYPE_ETHTYPE,
+				      .protocol = ETH_P_FCOE,
+				     };
+		u8 up = dcb_getapp(netdev, &app);
+
+		adapter->fcoe.up = ffs(up) - 1;
+		ixgbe_dcbnl_devreset(netdev);
+		ret = DCB_HW_CHG_RST;
+	}
+#endif
+
+	adapter->dcb_set_bitmap = 0x00;
+	return ret;
+}
+
+static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 0x80;
+		break;
+	case DCB_CAP_ATTR_GSP:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_BCN:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = adapter->dcbx_cap;
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	return 0;
+}
+
+static int ixgbe_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
+		switch (tcid) {
+		case DCB_NUMTCS_ATTR_PG:
+			*num = adapter->dcb_cfg.num_tcs.pg_tcs;
+			break;
+		case DCB_NUMTCS_ATTR_PFC:
+			*num = adapter->dcb_cfg.num_tcs.pfc_tcs;
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ixgbe_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num)
+{
+	return -EINVAL;
+}
+
+static u8 ixgbe_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->dcb_cfg.pfc_mode_enable;
+}
+
+static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->temp_dcb_cfg.pfc_mode_enable = state;
+}
+
+/**
+ * ixgbe_dcbnl_getapp - retrieve the DCBX application user priority
+ * @netdev : the corresponding netdev
+ * @idtype : identifies the id as ether type or TCP/UDP port number
+ * @id: id is either ether type or TCP/UDP port number
+ *
+ * Returns : on success, returns a non-zero 802.1p user priority bitmap
+ * otherwise returns -EINVAL as the invalid user priority bitmap to indicate an
+ * error.
+ */
+static int ixgbe_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct dcb_app app = {
+				.selector = idtype,
+				.protocol = id,
+			     };
+
+	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return -EINVAL;
+
+	return dcb_getapp(netdev, &app);
+}
+
+static int ixgbe_dcbnl_ieee_getets(struct net_device *dev,
+				   struct ieee_ets *ets)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ieee_ets *my_ets = adapter->ixgbe_ieee_ets;
+
+	ets->ets_cap = adapter->dcb_cfg.num_tcs.pg_tcs;
+
+	/* No IEEE PFC settings available */
+	if (!my_ets)
+		return 0;
+
+	ets->cbs = my_ets->cbs;
+	memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_rx_bw, my_ets->tc_rx_bw, sizeof(ets->tc_rx_bw));
+	memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc));
+	return 0;
+}
+
+static int ixgbe_dcbnl_ieee_setets(struct net_device *dev,
+				   struct ieee_ets *ets)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
+	int i, err;
+	__u8 max_tc = 0;
+	__u8 map_chg = 0;
+
+	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	if (!adapter->ixgbe_ieee_ets) {
+		adapter->ixgbe_ieee_ets = kmalloc(sizeof(struct ieee_ets),
+						  GFP_KERNEL);
+		if (!adapter->ixgbe_ieee_ets)
+			return -ENOMEM;
+
+		/* initialize UP2TC mappings to invalid value */
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+			adapter->ixgbe_ieee_ets->prio_tc[i] =
+				IEEE_8021QAZ_MAX_TCS;
+		/* if possible update UP2TC mappings from HW */
+		ixgbe_dcb_read_rtrup2tc(&adapter->hw,
+					adapter->ixgbe_ieee_ets->prio_tc);
+	}
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (ets->prio_tc[i] > max_tc)
+			max_tc = ets->prio_tc[i];
+		if (ets->prio_tc[i] != adapter->ixgbe_ieee_ets->prio_tc[i])
+			map_chg = 1;
+	}
+
+	memcpy(adapter->ixgbe_ieee_ets, ets, sizeof(*adapter->ixgbe_ieee_ets));
+
+	if (max_tc)
+		max_tc++;
+
+	if (max_tc > adapter->dcb_cfg.num_tcs.pg_tcs)
+		return -EINVAL;
+
+	if (max_tc != adapter->hw_tcs) {
+		err = ixgbe_setup_tc(dev, max_tc);
+		if (err)
+			return err;
+	} else if (map_chg) {
+		ixgbe_dcbnl_devreset(dev);
+	}
+
+	return ixgbe_dcb_hw_ets(&adapter->hw, ets, max_frame);
+}
+
+static int ixgbe_dcbnl_ieee_getpfc(struct net_device *dev,
+				   struct ieee_pfc *pfc)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ieee_pfc *my_pfc = adapter->ixgbe_ieee_pfc;
+	int i;
+
+	pfc->pfc_cap = adapter->dcb_cfg.num_tcs.pfc_tcs;
+
+	/* No IEEE PFC settings available */
+	if (!my_pfc)
+		return 0;
+
+	pfc->pfc_en = my_pfc->pfc_en;
+	pfc->mbc = my_pfc->mbc;
+	pfc->delay = my_pfc->delay;
+
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		pfc->requests[i] = adapter->stats.pxoffrxc[i];
+		pfc->indications[i] = adapter->stats.pxofftxc[i];
+	}
+
+	return 0;
+}
+
+static int ixgbe_dcbnl_ieee_setpfc(struct net_device *dev,
+				   struct ieee_pfc *pfc)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 *prio_tc;
+	int err;
+
+	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	if (!adapter->ixgbe_ieee_pfc) {
+		adapter->ixgbe_ieee_pfc = kmalloc(sizeof(struct ieee_pfc),
+						  GFP_KERNEL);
+		if (!adapter->ixgbe_ieee_pfc)
+			return -ENOMEM;
+	}
+
+	prio_tc = adapter->ixgbe_ieee_ets->prio_tc;
+	memcpy(adapter->ixgbe_ieee_pfc, pfc, sizeof(*adapter->ixgbe_ieee_pfc));
+
+	/* Enable link flow control parameters if PFC is disabled */
+	if (pfc->pfc_en)
+		err = ixgbe_dcb_hw_pfc_config(hw, pfc->pfc_en, prio_tc);
+	else
+		err = hw->mac.ops.fc_enable(hw);
+
+	ixgbe_set_rx_drop_en(adapter);
+
+	return err;
+}
+
+static int ixgbe_dcbnl_ieee_setapp(struct net_device *dev,
+				   struct dcb_app *app)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int err;
+
+	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	err = dcb_ieee_setapp(dev, app);
+	if (err)
+		return err;
+
+#ifdef IXGBE_FCOE
+	if (app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	    app->protocol == ETH_P_FCOE) {
+		u8 app_mask = dcb_ieee_getapp_mask(dev, app);
+
+		if (app_mask & BIT(adapter->fcoe.up))
+			return 0;
+
+		adapter->fcoe.up = app->priority;
+		ixgbe_dcbnl_devreset(dev);
+	}
+#endif
+
+	/* VF devices should use default UP when available */
+	if (app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	    app->protocol == 0) {
+		int vf;
+
+		adapter->default_up = app->priority;
+
+		for (vf = 0; vf < adapter->num_vfs; vf++) {
+			struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+
+			if (!vfinfo->pf_qos)
+				ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
+						app->priority, vf);
+		}
+	}
+
+	return 0;
+}
+
+static int ixgbe_dcbnl_ieee_delapp(struct net_device *dev,
+				   struct dcb_app *app)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int err;
+
+	if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	err = dcb_ieee_delapp(dev, app);
+
+#ifdef IXGBE_FCOE
+	if (!err && app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	    app->protocol == ETH_P_FCOE) {
+		u8 app_mask = dcb_ieee_getapp_mask(dev, app);
+
+		if (app_mask & BIT(adapter->fcoe.up))
+			return 0;
+
+		adapter->fcoe.up = app_mask ?
+				   ffs(app_mask) - 1 : IXGBE_FCOE_DEFTC;
+		ixgbe_dcbnl_devreset(dev);
+	}
+#endif
+	/* IF default priority is being removed clear VF default UP */
+	if (app->selector == IEEE_8021QAZ_APP_SEL_ETHERTYPE &&
+	    app->protocol == 0 && adapter->default_up == app->priority) {
+		int vf;
+		long unsigned int app_mask = dcb_ieee_getapp_mask(dev, app);
+		int qos = app_mask ? find_first_bit(&app_mask, 8) : 0;
+
+		adapter->default_up = qos;
+
+		for (vf = 0; vf < adapter->num_vfs; vf++) {
+			struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+
+			if (!vfinfo->pf_qos)
+				ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
+						qos, vf);
+		}
+	}
+
+	return err;
+}
+
+static u8 ixgbe_dcbnl_getdcbx(struct net_device *dev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	return adapter->dcbx_cap;
+}
+
+static u8 ixgbe_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ieee_ets ets = {0};
+	struct ieee_pfc pfc = {0};
+	int err = 0;
+
+	/* no support for LLD_MANAGED modes or CEE+IEEE */
+	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+	    ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
+	    !(mode & DCB_CAP_DCBX_HOST))
+		return 1;
+
+	if (mode == adapter->dcbx_cap)
+		return 0;
+
+	adapter->dcbx_cap = mode;
+
+	/* ETS and PFC defaults */
+	ets.ets_cap = 8;
+	pfc.pfc_cap = 8;
+
+	if (mode & DCB_CAP_DCBX_VER_IEEE) {
+		ixgbe_dcbnl_ieee_setets(dev, &ets);
+		ixgbe_dcbnl_ieee_setpfc(dev, &pfc);
+	} else if (mode & DCB_CAP_DCBX_VER_CEE) {
+		u8 mask = BIT_PFC | BIT_PG_TX | BIT_PG_RX | BIT_APP_UPCHG;
+
+		adapter->dcb_set_bitmap |= mask;
+		ixgbe_dcbnl_set_all(dev);
+	} else {
+		/* Drop into single TC mode strict priority as this
+		 * indicates CEE and IEEE versions are disabled
+		 */
+		ixgbe_dcbnl_ieee_setets(dev, &ets);
+		ixgbe_dcbnl_ieee_setpfc(dev, &pfc);
+		err = ixgbe_setup_tc(dev, 0);
+	}
+
+	return err ? 1 : 0;
+}
+
+const struct dcbnl_rtnl_ops ixgbe_dcbnl_ops = {
+	.ieee_getets	= ixgbe_dcbnl_ieee_getets,
+	.ieee_setets	= ixgbe_dcbnl_ieee_setets,
+	.ieee_getpfc	= ixgbe_dcbnl_ieee_getpfc,
+	.ieee_setpfc	= ixgbe_dcbnl_ieee_setpfc,
+	.ieee_setapp	= ixgbe_dcbnl_ieee_setapp,
+	.ieee_delapp	= ixgbe_dcbnl_ieee_delapp,
+	.getstate	= ixgbe_dcbnl_get_state,
+	.setstate	= ixgbe_dcbnl_set_state,
+	.getpermhwaddr	= ixgbe_dcbnl_get_perm_hw_addr,
+	.setpgtccfgtx	= ixgbe_dcbnl_set_pg_tc_cfg_tx,
+	.setpgbwgcfgtx	= ixgbe_dcbnl_set_pg_bwg_cfg_tx,
+	.setpgtccfgrx	= ixgbe_dcbnl_set_pg_tc_cfg_rx,
+	.setpgbwgcfgrx	= ixgbe_dcbnl_set_pg_bwg_cfg_rx,
+	.getpgtccfgtx	= ixgbe_dcbnl_get_pg_tc_cfg_tx,
+	.getpgbwgcfgtx	= ixgbe_dcbnl_get_pg_bwg_cfg_tx,
+	.getpgtccfgrx	= ixgbe_dcbnl_get_pg_tc_cfg_rx,
+	.getpgbwgcfgrx	= ixgbe_dcbnl_get_pg_bwg_cfg_rx,
+	.setpfccfg	= ixgbe_dcbnl_set_pfc_cfg,
+	.getpfccfg	= ixgbe_dcbnl_get_pfc_cfg,
+	.setall		= ixgbe_dcbnl_set_all,
+	.getcap		= ixgbe_dcbnl_getcap,
+	.getnumtcs	= ixgbe_dcbnl_getnumtcs,
+	.setnumtcs	= ixgbe_dcbnl_setnumtcs,
+	.getpfcstate	= ixgbe_dcbnl_getpfcstate,
+	.setpfcstate	= ixgbe_dcbnl_setpfcstate,
+	.getapp		= ixgbe_dcbnl_getapp,
+	.getdcbx	= ixgbe_dcbnl_getdcbx,
+	.setdcbx	= ixgbe_dcbnl_setdcbx,
+};
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
new file mode 100644
index 0000000000..5b1cf49df3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "ixgbe.h"
+
+static struct dentry *ixgbe_dbg_root;
+
+static char ixgbe_dbg_reg_ops_buf[256] = "";
+
+static ssize_t ixgbe_dbg_common_ops_read(struct file *filp, char __user *buffer,
+					 size_t count, loff_t *ppos,
+					 char *dbg_buf)
+{
+	struct ixgbe_adapter *adapter = filp->private_data;
+	char *buf;
+	int len;
+
+	/* don't allow partial reads */
+	if (*ppos != 0)
+		return 0;
+
+	buf = kasprintf(GFP_KERNEL, "%s: %s\n",
+			adapter->netdev->name, dbg_buf);
+	if (!buf)
+		return -ENOMEM;
+
+	if (count < strlen(buf)) {
+		kfree(buf);
+		return -ENOSPC;
+	}
+
+	len = simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+
+	kfree(buf);
+	return len;
+}
+
+/**
+ * ixgbe_dbg_reg_ops_read - read for reg_ops datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer,
+				      size_t count, loff_t *ppos)
+{
+	return ixgbe_dbg_common_ops_read(filp, buffer, count, ppos,
+					 ixgbe_dbg_reg_ops_buf);
+}
+
+/**
+ * ixgbe_dbg_reg_ops_write - write into reg_ops datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_reg_ops_write(struct file *filp,
+				     const char __user *buffer,
+				     size_t count, loff_t *ppos)
+{
+	struct ixgbe_adapter *adapter = filp->private_data;
+	int len;
+
+	/* don't allow partial writes */
+	if (*ppos != 0)
+		return 0;
+	if (count >= sizeof(ixgbe_dbg_reg_ops_buf))
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(ixgbe_dbg_reg_ops_buf,
+				     sizeof(ixgbe_dbg_reg_ops_buf)-1,
+				     ppos,
+				     buffer,
+				     count);
+	if (len < 0)
+		return len;
+
+	ixgbe_dbg_reg_ops_buf[len] = '\0';
+
+	if (strncmp(ixgbe_dbg_reg_ops_buf, "write", 5) == 0) {
+		u32 reg, value;
+		int cnt;
+		cnt = sscanf(&ixgbe_dbg_reg_ops_buf[5], "%x %x", &reg, &value);
+		if (cnt == 2) {
+			IXGBE_WRITE_REG(&adapter->hw, reg, value);
+			value = IXGBE_READ_REG(&adapter->hw, reg);
+			e_dev_info("write: 0x%08x = 0x%08x\n", reg, value);
+		} else {
+			e_dev_info("write <reg> <value>\n");
+		}
+	} else if (strncmp(ixgbe_dbg_reg_ops_buf, "read", 4) == 0) {
+		u32 reg, value;
+		int cnt;
+		cnt = sscanf(&ixgbe_dbg_reg_ops_buf[4], "%x", &reg);
+		if (cnt == 1) {
+			value = IXGBE_READ_REG(&adapter->hw, reg);
+			e_dev_info("read 0x%08x = 0x%08x\n", reg, value);
+		} else {
+			e_dev_info("read <reg>\n");
+		}
+	} else {
+		e_dev_info("Unknown command %s\n", ixgbe_dbg_reg_ops_buf);
+		e_dev_info("Available commands:\n");
+		e_dev_info("   read <reg>\n");
+		e_dev_info("   write <reg> <value>\n");
+	}
+	return count;
+}
+
+static const struct file_operations ixgbe_dbg_reg_ops_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  ixgbe_dbg_reg_ops_read,
+	.write = ixgbe_dbg_reg_ops_write,
+};
+
+static char ixgbe_dbg_netdev_ops_buf[256] = "";
+
+/**
+ * ixgbe_dbg_netdev_ops_read - read for netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_netdev_ops_read(struct file *filp, char __user *buffer,
+					 size_t count, loff_t *ppos)
+{
+	return ixgbe_dbg_common_ops_read(filp, buffer, count, ppos,
+					 ixgbe_dbg_netdev_ops_buf);
+}
+
+/**
+ * ixgbe_dbg_netdev_ops_write - write into netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp,
+					  const char __user *buffer,
+					  size_t count, loff_t *ppos)
+{
+	struct ixgbe_adapter *adapter = filp->private_data;
+	int len;
+
+	/* don't allow partial writes */
+	if (*ppos != 0)
+		return 0;
+	if (count >= sizeof(ixgbe_dbg_netdev_ops_buf))
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(ixgbe_dbg_netdev_ops_buf,
+				     sizeof(ixgbe_dbg_netdev_ops_buf)-1,
+				     ppos,
+				     buffer,
+				     count);
+	if (len < 0)
+		return len;
+
+	ixgbe_dbg_netdev_ops_buf[len] = '\0';
+
+	if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) {
+		/* TX Queue number below is wrong, but ixgbe does not use it */
+		adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev,
+							    UINT_MAX);
+		e_dev_info("tx_timeout called\n");
+	} else {
+		e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf);
+		e_dev_info("Available commands:\n");
+		e_dev_info("    tx_timeout\n");
+	}
+	return count;
+}
+
+static const struct file_operations ixgbe_dbg_netdev_ops_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ixgbe_dbg_netdev_ops_read,
+	.write = ixgbe_dbg_netdev_ops_write,
+};
+
+/**
+ * ixgbe_dbg_adapter_init - setup the debugfs directory for the adapter
+ * @adapter: the adapter that is starting up
+ **/
+void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter)
+{
+	const char *name = pci_name(adapter->pdev);
+
+	adapter->ixgbe_dbg_adapter = debugfs_create_dir(name, ixgbe_dbg_root);
+	debugfs_create_file("reg_ops", 0600, adapter->ixgbe_dbg_adapter,
+			    adapter, &ixgbe_dbg_reg_ops_fops);
+	debugfs_create_file("netdev_ops", 0600, adapter->ixgbe_dbg_adapter,
+			    adapter, &ixgbe_dbg_netdev_ops_fops);
+}
+
+/**
+ * ixgbe_dbg_adapter_exit - clear out the adapter's debugfs entries
+ * @adapter: the adapter that is exiting
+ **/
+void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter)
+{
+	debugfs_remove_recursive(adapter->ixgbe_dbg_adapter);
+	adapter->ixgbe_dbg_adapter = NULL;
+}
+
+/**
+ * ixgbe_dbg_init - start up debugfs for the driver
+ **/
+void ixgbe_dbg_init(void)
+{
+	ixgbe_dbg_root = debugfs_create_dir(ixgbe_driver_name, NULL);
+}
+
+/**
+ * ixgbe_dbg_exit - clean out the driver's debugfs entries
+ **/
+void ixgbe_dbg_exit(void)
+{
+	debugfs_remove_recursive(ixgbe_dbg_root);
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
new file mode 100644
index 0000000000..0bbad4a5cc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -0,0 +1,3637 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* ethtool support for ixgbe */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/uaccess.h>
+
+#include "ixgbe.h"
+#include "ixgbe_phy.h"
+
+
+enum {NETDEV_STATS, IXGBE_STATS};
+
+struct ixgbe_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int type;
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define IXGBE_STAT(m)		IXGBE_STATS, \
+				sizeof(((struct ixgbe_adapter *)0)->m), \
+				offsetof(struct ixgbe_adapter, m)
+#define IXGBE_NETDEV_STAT(m)	NETDEV_STATS, \
+				sizeof(((struct rtnl_link_stats64 *)0)->m), \
+				offsetof(struct rtnl_link_stats64, m)
+
+static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
+	{"rx_packets", IXGBE_NETDEV_STAT(rx_packets)},
+	{"tx_packets", IXGBE_NETDEV_STAT(tx_packets)},
+	{"rx_bytes", IXGBE_NETDEV_STAT(rx_bytes)},
+	{"tx_bytes", IXGBE_NETDEV_STAT(tx_bytes)},
+	{"rx_pkts_nic", IXGBE_STAT(stats.gprc)},
+	{"tx_pkts_nic", IXGBE_STAT(stats.gptc)},
+	{"rx_bytes_nic", IXGBE_STAT(stats.gorc)},
+	{"tx_bytes_nic", IXGBE_STAT(stats.gotc)},
+	{"lsc_int", IXGBE_STAT(lsc_int)},
+	{"tx_busy", IXGBE_STAT(tx_busy)},
+	{"non_eop_descs", IXGBE_STAT(non_eop_descs)},
+	{"rx_errors", IXGBE_NETDEV_STAT(rx_errors)},
+	{"tx_errors", IXGBE_NETDEV_STAT(tx_errors)},
+	{"rx_dropped", IXGBE_NETDEV_STAT(rx_dropped)},
+	{"tx_dropped", IXGBE_NETDEV_STAT(tx_dropped)},
+	{"multicast", IXGBE_NETDEV_STAT(multicast)},
+	{"broadcast", IXGBE_STAT(stats.bprc)},
+	{"rx_no_buffer_count", IXGBE_STAT(stats.rnbc[0]) },
+	{"collisions", IXGBE_NETDEV_STAT(collisions)},
+	{"rx_over_errors", IXGBE_NETDEV_STAT(rx_over_errors)},
+	{"rx_crc_errors", IXGBE_NETDEV_STAT(rx_crc_errors)},
+	{"rx_frame_errors", IXGBE_NETDEV_STAT(rx_frame_errors)},
+	{"hw_rsc_aggregated", IXGBE_STAT(rsc_total_count)},
+	{"hw_rsc_flushed", IXGBE_STAT(rsc_total_flush)},
+	{"fdir_match", IXGBE_STAT(stats.fdirmatch)},
+	{"fdir_miss", IXGBE_STAT(stats.fdirmiss)},
+	{"fdir_overflow", IXGBE_STAT(fdir_overflow)},
+	{"rx_fifo_errors", IXGBE_NETDEV_STAT(rx_fifo_errors)},
+	{"rx_missed_errors", IXGBE_NETDEV_STAT(rx_missed_errors)},
+	{"tx_aborted_errors", IXGBE_NETDEV_STAT(tx_aborted_errors)},
+	{"tx_carrier_errors", IXGBE_NETDEV_STAT(tx_carrier_errors)},
+	{"tx_fifo_errors", IXGBE_NETDEV_STAT(tx_fifo_errors)},
+	{"tx_heartbeat_errors", IXGBE_NETDEV_STAT(tx_heartbeat_errors)},
+	{"tx_timeout_count", IXGBE_STAT(tx_timeout_count)},
+	{"tx_restart_queue", IXGBE_STAT(restart_queue)},
+	{"rx_length_errors", IXGBE_STAT(stats.rlec)},
+	{"rx_long_length_errors", IXGBE_STAT(stats.roc)},
+	{"rx_short_length_errors", IXGBE_STAT(stats.ruc)},
+	{"tx_flow_control_xon", IXGBE_STAT(stats.lxontxc)},
+	{"rx_flow_control_xon", IXGBE_STAT(stats.lxonrxc)},
+	{"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)},
+	{"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)},
+	{"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)},
+	{"alloc_rx_page", IXGBE_STAT(alloc_rx_page)},
+	{"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)},
+	{"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)},
+	{"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)},
+	{"os2bmc_rx_by_bmc", IXGBE_STAT(stats.o2bgptc)},
+	{"os2bmc_tx_by_bmc", IXGBE_STAT(stats.b2ospc)},
+	{"os2bmc_tx_by_host", IXGBE_STAT(stats.o2bspc)},
+	{"os2bmc_rx_by_host", IXGBE_STAT(stats.b2ogprc)},
+	{"tx_hwtstamp_timeouts", IXGBE_STAT(tx_hwtstamp_timeouts)},
+	{"tx_hwtstamp_skipped", IXGBE_STAT(tx_hwtstamp_skipped)},
+	{"rx_hwtstamp_cleared", IXGBE_STAT(rx_hwtstamp_cleared)},
+	{"tx_ipsec", IXGBE_STAT(tx_ipsec)},
+	{"rx_ipsec", IXGBE_STAT(rx_ipsec)},
+#ifdef IXGBE_FCOE
+	{"fcoe_bad_fccrc", IXGBE_STAT(stats.fccrc)},
+	{"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)},
+	{"rx_fcoe_packets", IXGBE_STAT(stats.fcoeprc)},
+	{"rx_fcoe_dwords", IXGBE_STAT(stats.fcoedwrc)},
+	{"fcoe_noddp", IXGBE_STAT(stats.fcoe_noddp)},
+	{"fcoe_noddp_ext_buff", IXGBE_STAT(stats.fcoe_noddp_ext_buff)},
+	{"tx_fcoe_packets", IXGBE_STAT(stats.fcoeptc)},
+	{"tx_fcoe_dwords", IXGBE_STAT(stats.fcoedwtc)},
+#endif /* IXGBE_FCOE */
+};
+
+/* ixgbe allocates num_tx_queues and num_rx_queues symmetrically so
+ * we set the num_rx_queues to evaluate to num_tx_queues. This is
+ * used because we do not have a good way to get the max number of
+ * rx queues with CONFIG_RPS disabled.
+ */
+#define IXGBE_NUM_RX_QUEUES netdev->num_tx_queues
+
+#define IXGBE_QUEUE_STATS_LEN ( \
+	(netdev->num_tx_queues + IXGBE_NUM_RX_QUEUES) * \
+	(sizeof(struct ixgbe_queue_stats) / sizeof(u64)))
+#define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats)
+#define IXGBE_PB_STATS_LEN ( \
+			(sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \
+			 sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \
+			 sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \
+			 sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \
+			/ sizeof(u64))
+#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \
+			 IXGBE_PB_STATS_LEN + \
+			 IXGBE_QUEUE_STATS_LEN)
+
+static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)", "Eeprom test    (offline)",
+	"Interrupt test (offline)", "Loopback test  (offline)",
+	"Link test   (on/offline)"
+};
+#define IXGBE_TEST_LEN sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN
+
+static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBE_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+#define IXGBE_PRIV_FLAGS_VF_IPSEC_EN	BIT(1)
+	"vf-ipsec",
+#define IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF	BIT(2)
+	"mdd-disable-vf",
+};
+
+#define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings)
+
+#define ixgbe_isbackplane(type) ((type) == ixgbe_media_type_backplane)
+
+static void ixgbe_set_supported_10gtypes(struct ixgbe_hw *hw,
+					 struct ethtool_link_ksettings *cmd)
+{
+	if (!ixgbe_isbackplane(hw->phy.media_type)) {
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10000baseT_Full);
+		return;
+	}
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82598:
+	case IXGBE_DEV_ID_82599_KX4:
+	case IXGBE_DEV_ID_82599_KX4_MEZZ:
+	case IXGBE_DEV_ID_X550EM_X_KX4:
+		ethtool_link_ksettings_add_link_mode
+			(cmd, supported, 10000baseKX4_Full);
+		break;
+	case IXGBE_DEV_ID_82598_BX:
+	case IXGBE_DEV_ID_82599_KR:
+	case IXGBE_DEV_ID_X550EM_X_KR:
+	case IXGBE_DEV_ID_X550EM_X_XFI:
+		ethtool_link_ksettings_add_link_mode
+			(cmd, supported, 10000baseKR_Full);
+		break;
+	default:
+		ethtool_link_ksettings_add_link_mode
+			(cmd, supported, 10000baseKX4_Full);
+		ethtool_link_ksettings_add_link_mode
+			(cmd, supported, 10000baseKR_Full);
+		break;
+	}
+}
+
+static void ixgbe_set_advertising_10gtypes(struct ixgbe_hw *hw,
+					   struct ethtool_link_ksettings *cmd)
+{
+	if (!ixgbe_isbackplane(hw->phy.media_type)) {
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10000baseT_Full);
+		return;
+	}
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82598:
+	case IXGBE_DEV_ID_82599_KX4:
+	case IXGBE_DEV_ID_82599_KX4_MEZZ:
+	case IXGBE_DEV_ID_X550EM_X_KX4:
+		ethtool_link_ksettings_add_link_mode
+			(cmd, advertising, 10000baseKX4_Full);
+		break;
+	case IXGBE_DEV_ID_82598_BX:
+	case IXGBE_DEV_ID_82599_KR:
+	case IXGBE_DEV_ID_X550EM_X_KR:
+	case IXGBE_DEV_ID_X550EM_X_XFI:
+		ethtool_link_ksettings_add_link_mode
+			(cmd, advertising, 10000baseKR_Full);
+		break;
+	default:
+		ethtool_link_ksettings_add_link_mode
+			(cmd, advertising, 10000baseKX4_Full);
+		ethtool_link_ksettings_add_link_mode
+			(cmd, advertising, 10000baseKR_Full);
+		break;
+	}
+}
+
+static int ixgbe_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	ixgbe_link_speed supported_link;
+	bool autoneg = false;
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+
+	hw->mac.ops.get_link_capabilities(hw, &supported_link, &autoneg);
+
+	/* set the supported link speeds */
+	if (supported_link & IXGBE_LINK_SPEED_10GB_FULL) {
+		ixgbe_set_supported_10gtypes(hw, cmd);
+		ixgbe_set_advertising_10gtypes(hw, cmd);
+	}
+	if (supported_link & IXGBE_LINK_SPEED_5GB_FULL)
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     5000baseT_Full);
+
+	if (supported_link & IXGBE_LINK_SPEED_2_5GB_FULL)
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     2500baseT_Full);
+
+	if (supported_link & IXGBE_LINK_SPEED_1GB_FULL) {
+		if (ixgbe_isbackplane(hw->phy.media_type)) {
+			ethtool_link_ksettings_add_link_mode(cmd, supported,
+							     1000baseKX_Full);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     1000baseKX_Full);
+		} else {
+			ethtool_link_ksettings_add_link_mode(cmd, supported,
+							     1000baseT_Full);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     1000baseT_Full);
+		}
+	}
+	if (supported_link & IXGBE_LINK_SPEED_100_FULL) {
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     100baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     100baseT_Full);
+	}
+	if (supported_link & IXGBE_LINK_SPEED_10_FULL) {
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10baseT_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10baseT_Full);
+	}
+
+	/* set the advertised speeds */
+	if (hw->phy.autoneg_advertised) {
+		ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     10baseT_Full);
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     100baseT_Full);
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
+			ixgbe_set_advertising_10gtypes(hw, cmd);
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) {
+			if (ethtool_link_ksettings_test_link_mode
+				(cmd, supported, 1000baseKX_Full))
+				ethtool_link_ksettings_add_link_mode
+					(cmd, advertising, 1000baseKX_Full);
+			else
+				ethtool_link_ksettings_add_link_mode
+					(cmd, advertising, 1000baseT_Full);
+		}
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     5000baseT_Full);
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL)
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     2500baseT_Full);
+	} else {
+		if (hw->phy.multispeed_fiber && !autoneg) {
+			if (supported_link & IXGBE_LINK_SPEED_10GB_FULL)
+				ethtool_link_ksettings_add_link_mode
+					(cmd, advertising, 10000baseT_Full);
+		}
+	}
+
+	if (autoneg) {
+		ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+		cmd->base.autoneg = AUTONEG_ENABLE;
+	} else
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+	/* Determine the remaining settings based on the PHY type. */
+	switch (adapter->hw.phy.type) {
+	case ixgbe_phy_tn:
+	case ixgbe_phy_aq:
+	case ixgbe_phy_x550em_ext_t:
+	case ixgbe_phy_fw:
+	case ixgbe_phy_cu_unknown:
+		ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
+		cmd->base.port = PORT_TP;
+		break;
+	case ixgbe_phy_qt:
+		ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
+		cmd->base.port = PORT_FIBRE;
+		break;
+	case ixgbe_phy_nl:
+	case ixgbe_phy_sfp_passive_tyco:
+	case ixgbe_phy_sfp_passive_unknown:
+	case ixgbe_phy_sfp_ftl:
+	case ixgbe_phy_sfp_avago:
+	case ixgbe_phy_sfp_intel:
+	case ixgbe_phy_sfp_unknown:
+	case ixgbe_phy_qsfp_passive_unknown:
+	case ixgbe_phy_qsfp_active_unknown:
+	case ixgbe_phy_qsfp_intel:
+	case ixgbe_phy_qsfp_unknown:
+		/* SFP+ devices, further checking needed */
+		switch (adapter->hw.phy.sfp_type) {
+		case ixgbe_sfp_type_da_cu:
+		case ixgbe_sfp_type_da_cu_core0:
+		case ixgbe_sfp_type_da_cu_core1:
+			ethtool_link_ksettings_add_link_mode(cmd, supported,
+							     FIBRE);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     FIBRE);
+			cmd->base.port = PORT_DA;
+			break;
+		case ixgbe_sfp_type_sr:
+		case ixgbe_sfp_type_lr:
+		case ixgbe_sfp_type_srlr_core0:
+		case ixgbe_sfp_type_srlr_core1:
+		case ixgbe_sfp_type_1g_sx_core0:
+		case ixgbe_sfp_type_1g_sx_core1:
+		case ixgbe_sfp_type_1g_lx_core0:
+		case ixgbe_sfp_type_1g_lx_core1:
+			ethtool_link_ksettings_add_link_mode(cmd, supported,
+							     FIBRE);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     FIBRE);
+			cmd->base.port = PORT_FIBRE;
+			break;
+		case ixgbe_sfp_type_not_present:
+			ethtool_link_ksettings_add_link_mode(cmd, supported,
+							     FIBRE);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     FIBRE);
+			cmd->base.port = PORT_NONE;
+			break;
+		case ixgbe_sfp_type_1g_cu_core0:
+		case ixgbe_sfp_type_1g_cu_core1:
+			ethtool_link_ksettings_add_link_mode(cmd, supported,
+							     TP);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     TP);
+			cmd->base.port = PORT_TP;
+			break;
+		case ixgbe_sfp_type_unknown:
+		default:
+			ethtool_link_ksettings_add_link_mode(cmd, supported,
+							     FIBRE);
+			ethtool_link_ksettings_add_link_mode(cmd, advertising,
+							     FIBRE);
+			cmd->base.port = PORT_OTHER;
+			break;
+		}
+		break;
+	case ixgbe_phy_xaui:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     FIBRE);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     FIBRE);
+		cmd->base.port = PORT_NONE;
+		break;
+	case ixgbe_phy_unknown:
+	case ixgbe_phy_generic:
+	case ixgbe_phy_sfp_unsupported:
+	default:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     FIBRE);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     FIBRE);
+		cmd->base.port = PORT_OTHER;
+		break;
+	}
+
+	/* Indicate pause support */
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_full:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		break;
+	case ixgbe_fc_rx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	case ixgbe_fc_tx_pause:
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     Asym_Pause);
+		break;
+	default:
+		ethtool_link_ksettings_del_link_mode(cmd, advertising, Pause);
+		ethtool_link_ksettings_del_link_mode(cmd, advertising,
+						     Asym_Pause);
+	}
+
+	if (netif_carrier_ok(netdev)) {
+		switch (adapter->link_speed) {
+		case IXGBE_LINK_SPEED_10GB_FULL:
+			cmd->base.speed = SPEED_10000;
+			break;
+		case IXGBE_LINK_SPEED_5GB_FULL:
+			cmd->base.speed = SPEED_5000;
+			break;
+		case IXGBE_LINK_SPEED_2_5GB_FULL:
+			cmd->base.speed = SPEED_2500;
+			break;
+		case IXGBE_LINK_SPEED_1GB_FULL:
+			cmd->base.speed = SPEED_1000;
+			break;
+		case IXGBE_LINK_SPEED_100_FULL:
+			cmd->base.speed = SPEED_100;
+			break;
+		case IXGBE_LINK_SPEED_10_FULL:
+			cmd->base.speed = SPEED_10;
+			break;
+		default:
+			break;
+		}
+		cmd->base.duplex = DUPLEX_FULL;
+	} else {
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+
+	return 0;
+}
+
+static int ixgbe_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 advertised, old;
+	s32 err = 0;
+
+	if ((hw->phy.media_type == ixgbe_media_type_copper) ||
+	    (hw->phy.multispeed_fiber)) {
+		/*
+		 * this function does not support duplex forcing, but can
+		 * limit the advertising of the adapter to the specified speed
+		 */
+		if (!linkmode_subset(cmd->link_modes.advertising,
+				     cmd->link_modes.supported))
+			return -EINVAL;
+
+		/* only allow one speed at a time if no autoneg */
+		if (!cmd->base.autoneg && hw->phy.multispeed_fiber) {
+			if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+								  10000baseT_Full) &&
+			    ethtool_link_ksettings_test_link_mode(cmd, advertising,
+								  1000baseT_Full))
+				return -EINVAL;
+		}
+
+		old = hw->phy.autoneg_advertised;
+		advertised = 0;
+		if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+							  10000baseT_Full))
+			advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+		if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+							  5000baseT_Full))
+			advertised |= IXGBE_LINK_SPEED_5GB_FULL;
+		if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+							  2500baseT_Full))
+			advertised |= IXGBE_LINK_SPEED_2_5GB_FULL;
+		if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+							  1000baseT_Full))
+			advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+		if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+							  100baseT_Full))
+			advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+		if (ethtool_link_ksettings_test_link_mode(cmd, advertising,
+							  10baseT_Full))
+			advertised |= IXGBE_LINK_SPEED_10_FULL;
+
+		if (old == advertised)
+			return err;
+		/* this sets the link speed and restarts auto-neg */
+		while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
+			usleep_range(1000, 2000);
+
+		hw->mac.autotry_restart = true;
+		err = hw->mac.ops.setup_link(hw, advertised, true);
+		if (err) {
+			e_info(probe, "setup link failed with code %d\n", err);
+			hw->mac.ops.setup_link(hw, old, true);
+		}
+		clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
+	} else {
+		/* in this case we currently only support 10Gb/FULL */
+		u32 speed = cmd->base.speed;
+
+		if ((cmd->base.autoneg == AUTONEG_ENABLE) ||
+		    (!ethtool_link_ksettings_test_link_mode(cmd, advertising,
+							    10000baseT_Full)) ||
+		    (speed + cmd->base.duplex != SPEED_10000 + DUPLEX_FULL))
+			return -EINVAL;
+	}
+
+	return err;
+}
+
+static void ixgbe_get_pause_stats(struct net_device *netdev,
+				  struct ethtool_pause_stats *stats)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+
+	stats->tx_pause_frames = hwstats->lxontxc + hwstats->lxofftxc;
+	stats->rx_pause_frames = hwstats->lxonrxc + hwstats->lxoffrxc;
+}
+
+static void ixgbe_get_pauseparam(struct net_device *netdev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (ixgbe_device_supports_autoneg_fc(hw) &&
+	    !hw->fc.disable_fc_autoneg)
+		pause->autoneg = 1;
+	else
+		pause->autoneg = 0;
+
+	if (hw->fc.current_mode == ixgbe_fc_rx_pause) {
+		pause->rx_pause = 1;
+	} else if (hw->fc.current_mode == ixgbe_fc_tx_pause) {
+		pause->tx_pause = 1;
+	} else if (hw->fc.current_mode == ixgbe_fc_full) {
+		pause->rx_pause = 1;
+		pause->tx_pause = 1;
+	}
+}
+
+static int ixgbe_set_pauseparam(struct net_device *netdev,
+				struct ethtool_pauseparam *pause)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_fc_info fc = hw->fc;
+
+	/* 82598 does no support link flow control with DCB enabled */
+	if ((hw->mac.type == ixgbe_mac_82598EB) &&
+	    (adapter->flags & IXGBE_FLAG_DCB_ENABLED))
+		return -EINVAL;
+
+	/* some devices do not support autoneg of link flow control */
+	if ((pause->autoneg == AUTONEG_ENABLE) &&
+	    !ixgbe_device_supports_autoneg_fc(hw))
+		return -EINVAL;
+
+	fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE);
+
+	if ((pause->rx_pause && pause->tx_pause) || pause->autoneg)
+		fc.requested_mode = ixgbe_fc_full;
+	else if (pause->rx_pause && !pause->tx_pause)
+		fc.requested_mode = ixgbe_fc_rx_pause;
+	else if (!pause->rx_pause && pause->tx_pause)
+		fc.requested_mode = ixgbe_fc_tx_pause;
+	else
+		fc.requested_mode = ixgbe_fc_none;
+
+	/* if the thing changed then we'll update and use new autoneg */
+	if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) {
+		hw->fc = fc;
+		if (netif_running(netdev))
+			ixgbe_reinit_locked(adapter);
+		else
+			ixgbe_reset(adapter);
+	}
+
+	return 0;
+}
+
+static u32 ixgbe_get_msglevel(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	return adapter->msg_enable;
+}
+
+static void ixgbe_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	adapter->msg_enable = data;
+}
+
+static int ixgbe_get_regs_len(struct net_device *netdev)
+{
+#define IXGBE_REGS_LEN  1145
+	return IXGBE_REGS_LEN * sizeof(u32);
+}
+
+#define IXGBE_GET_STAT(_A_, _R_) _A_->stats._R_
+
+static void ixgbe_get_regs(struct net_device *netdev,
+			   struct ethtool_regs *regs, void *p)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u8 i;
+
+	memset(p, 0, IXGBE_REGS_LEN * sizeof(u32));
+
+	regs->version = hw->mac.type << 24 | hw->revision_id << 16 |
+			hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL);
+	regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS);
+	regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+	regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP);
+	regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP);
+	regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER);
+	regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER);
+
+	/* NVM Register */
+	regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+	regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_EERD);
+	regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_FLA(hw));
+	regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_EEMNGCTL);
+	regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_EEMNGDATA);
+	regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_FLMNGCTL);
+	regs_buff[14] = IXGBE_READ_REG(hw, IXGBE_FLMNGDATA);
+	regs_buff[15] = IXGBE_READ_REG(hw, IXGBE_FLMNGCNT);
+	regs_buff[16] = IXGBE_READ_REG(hw, IXGBE_FLOP);
+	regs_buff[17] = IXGBE_READ_REG(hw, IXGBE_GRC(hw));
+
+	/* Interrupt */
+	/* don't read EICR because it can clear interrupt causes, instead
+	 * read EICS which is a shadow but doesn't clear EICR */
+	regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS);
+	regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS);
+	regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS);
+	regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC);
+	regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC);
+	regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM);
+	regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0));
+	regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0));
+	regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT);
+	regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA);
+	regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0));
+	regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE);
+
+	/* Flow Control */
+	regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP);
+	for (i = 0; i < 4; i++)
+		regs_buff[31 + i] = IXGBE_READ_REG(hw, IXGBE_FCTTV(i));
+	for (i = 0; i < 8; i++) {
+		switch (hw->mac.type) {
+		case ixgbe_mac_82598EB:
+			regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i));
+			regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i));
+			break;
+		case ixgbe_mac_82599EB:
+		case ixgbe_mac_X540:
+		case ixgbe_mac_X550:
+		case ixgbe_mac_X550EM_x:
+		case ixgbe_mac_x550em_a:
+			regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL_82599(i));
+			regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH_82599(i));
+			break;
+		default:
+			break;
+		}
+	}
+	regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV);
+	regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS);
+
+	/* Receive DMA */
+	for (i = 0; i < 64; i++)
+		regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i));
+	for (i = 0; i < 64; i++)
+		regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+	regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i));
+	regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN);
+
+	/* Receive */
+	regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
+	regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL);
+	for (i = 0; i < 16; i++)
+		regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i));
+	regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0));
+	regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+	regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL);
+	regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i));
+	regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP);
+
+	/* Transmit */
+	for (i = 0; i < 32; i++)
+		regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i));
+	for (i = 0; i < 32; i++)
+		regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i));
+	regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL);
+	for (i = 0; i < 16; i++)
+		regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
+	regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG);
+	for (i = 0; i < 8; i++)
+		regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i));
+	regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP);
+
+	/* Wake Up */
+	regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC);
+	regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC);
+	regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS);
+	regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV);
+	regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT);
+	regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT);
+	regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL);
+	regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM);
+	regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0));
+
+	/* DCB */
+	regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS);   /* same as FCCFG  */
+	regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS); /* same as RTTPCS */
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS);
+		regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR);
+		for (i = 0; i < 8; i++)
+			regs_buff[833 + i] =
+				IXGBE_READ_REG(hw, IXGBE_RT2CR(i));
+		for (i = 0; i < 8; i++)
+			regs_buff[841 + i] =
+				IXGBE_READ_REG(hw, IXGBE_RT2SR(i));
+		for (i = 0; i < 8; i++)
+			regs_buff[849 + i] =
+				IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i));
+		for (i = 0; i < 8; i++)
+			regs_buff[857 + i] =
+				IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i));
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
+		regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RTRPCS);
+		for (i = 0; i < 8; i++)
+			regs_buff[833 + i] =
+				IXGBE_READ_REG(hw, IXGBE_RTRPT4C(i));
+		for (i = 0; i < 8; i++)
+			regs_buff[841 + i] =
+				IXGBE_READ_REG(hw, IXGBE_RTRPT4S(i));
+		for (i = 0; i < 8; i++)
+			regs_buff[849 + i] =
+				IXGBE_READ_REG(hw, IXGBE_RTTDT2C(i));
+		for (i = 0; i < 8; i++)
+			regs_buff[857 + i] =
+				IXGBE_READ_REG(hw, IXGBE_RTTDT2S(i));
+		break;
+	default:
+		break;
+	}
+
+	for (i = 0; i < 8; i++)
+		regs_buff[865 + i] =
+		IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i)); /* same as RTTPT2C */
+	for (i = 0; i < 8; i++)
+		regs_buff[873 + i] =
+		IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i)); /* same as RTTPT2S */
+
+	/* Statistics */
+	regs_buff[881] = IXGBE_GET_STAT(adapter, crcerrs);
+	regs_buff[882] = IXGBE_GET_STAT(adapter, illerrc);
+	regs_buff[883] = IXGBE_GET_STAT(adapter, errbc);
+	regs_buff[884] = IXGBE_GET_STAT(adapter, mspdc);
+	for (i = 0; i < 8; i++)
+		regs_buff[885 + i] = IXGBE_GET_STAT(adapter, mpc[i]);
+	regs_buff[893] = IXGBE_GET_STAT(adapter, mlfc);
+	regs_buff[894] = IXGBE_GET_STAT(adapter, mrfc);
+	regs_buff[895] = IXGBE_GET_STAT(adapter, rlec);
+	regs_buff[896] = IXGBE_GET_STAT(adapter, lxontxc);
+	regs_buff[897] = IXGBE_GET_STAT(adapter, lxonrxc);
+	regs_buff[898] = IXGBE_GET_STAT(adapter, lxofftxc);
+	regs_buff[899] = IXGBE_GET_STAT(adapter, lxoffrxc);
+	for (i = 0; i < 8; i++)
+		regs_buff[900 + i] = IXGBE_GET_STAT(adapter, pxontxc[i]);
+	for (i = 0; i < 8; i++)
+		regs_buff[908 + i] = IXGBE_GET_STAT(adapter, pxonrxc[i]);
+	for (i = 0; i < 8; i++)
+		regs_buff[916 + i] = IXGBE_GET_STAT(adapter, pxofftxc[i]);
+	for (i = 0; i < 8; i++)
+		regs_buff[924 + i] = IXGBE_GET_STAT(adapter, pxoffrxc[i]);
+	regs_buff[932] = IXGBE_GET_STAT(adapter, prc64);
+	regs_buff[933] = IXGBE_GET_STAT(adapter, prc127);
+	regs_buff[934] = IXGBE_GET_STAT(adapter, prc255);
+	regs_buff[935] = IXGBE_GET_STAT(adapter, prc511);
+	regs_buff[936] = IXGBE_GET_STAT(adapter, prc1023);
+	regs_buff[937] = IXGBE_GET_STAT(adapter, prc1522);
+	regs_buff[938] = IXGBE_GET_STAT(adapter, gprc);
+	regs_buff[939] = IXGBE_GET_STAT(adapter, bprc);
+	regs_buff[940] = IXGBE_GET_STAT(adapter, mprc);
+	regs_buff[941] = IXGBE_GET_STAT(adapter, gptc);
+	regs_buff[942] = (u32)IXGBE_GET_STAT(adapter, gorc);
+	regs_buff[943] = (u32)(IXGBE_GET_STAT(adapter, gorc) >> 32);
+	regs_buff[944] = (u32)IXGBE_GET_STAT(adapter, gotc);
+	regs_buff[945] = (u32)(IXGBE_GET_STAT(adapter, gotc) >> 32);
+	for (i = 0; i < 8; i++)
+		regs_buff[946 + i] = IXGBE_GET_STAT(adapter, rnbc[i]);
+	regs_buff[954] = IXGBE_GET_STAT(adapter, ruc);
+	regs_buff[955] = IXGBE_GET_STAT(adapter, rfc);
+	regs_buff[956] = IXGBE_GET_STAT(adapter, roc);
+	regs_buff[957] = IXGBE_GET_STAT(adapter, rjc);
+	regs_buff[958] = IXGBE_GET_STAT(adapter, mngprc);
+	regs_buff[959] = IXGBE_GET_STAT(adapter, mngpdc);
+	regs_buff[960] = IXGBE_GET_STAT(adapter, mngptc);
+	regs_buff[961] = (u32)IXGBE_GET_STAT(adapter, tor);
+	regs_buff[962] = (u32)(IXGBE_GET_STAT(adapter, tor) >> 32);
+	regs_buff[963] = IXGBE_GET_STAT(adapter, tpr);
+	regs_buff[964] = IXGBE_GET_STAT(adapter, tpt);
+	regs_buff[965] = IXGBE_GET_STAT(adapter, ptc64);
+	regs_buff[966] = IXGBE_GET_STAT(adapter, ptc127);
+	regs_buff[967] = IXGBE_GET_STAT(adapter, ptc255);
+	regs_buff[968] = IXGBE_GET_STAT(adapter, ptc511);
+	regs_buff[969] = IXGBE_GET_STAT(adapter, ptc1023);
+	regs_buff[970] = IXGBE_GET_STAT(adapter, ptc1522);
+	regs_buff[971] = IXGBE_GET_STAT(adapter, mptc);
+	regs_buff[972] = IXGBE_GET_STAT(adapter, bptc);
+	regs_buff[973] = IXGBE_GET_STAT(adapter, xec);
+	for (i = 0; i < 16; i++)
+		regs_buff[974 + i] = IXGBE_GET_STAT(adapter, qprc[i]);
+	for (i = 0; i < 16; i++)
+		regs_buff[990 + i] = IXGBE_GET_STAT(adapter, qptc[i]);
+	for (i = 0; i < 16; i++)
+		regs_buff[1006 + i] = IXGBE_GET_STAT(adapter, qbrc[i]);
+	for (i = 0; i < 16; i++)
+		regs_buff[1022 + i] = IXGBE_GET_STAT(adapter, qbtc[i]);
+
+	/* MAC */
+	regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG);
+	regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL);
+	regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA);
+	regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0);
+	regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1);
+	regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
+	regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP);
+	regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP);
+	regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP);
+	regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1);
+	regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP);
+	regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA);
+	regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE);
+	regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD);
+	regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS);
+	regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA);
+	regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+	regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD);
+	regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD);
+	regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG);
+	regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1);
+	regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2);
+	regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS);
+	regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC);
+	regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS);
+	regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC);
+	regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS);
+	regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2);
+	regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3);
+	regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1);
+	regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2);
+	regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL);
+
+	/* Diagnostic */
+	regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i));
+	regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN);
+	for (i = 0; i < 4; i++)
+		regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i));
+	regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE);
+	regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL);
+	for (i = 0; i < 8; i++)
+		regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i));
+	regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN);
+	for (i = 0; i < 4; i++)
+		regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i));
+	regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE);
+	regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL);
+	for (i = 0; i < 4; i++)
+		regs_buff[1102 + i] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA(i));
+	regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL);
+	for (i = 0; i < 4; i++)
+		regs_buff[1107 + i] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA(i));
+	for (i = 0; i < 8; i++)
+		regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i));
+	regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL);
+	regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1);
+	regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2);
+	regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1);
+	regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2);
+	regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS);
+	regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL);
+	regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC);
+	regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC);
+
+	/* 82599 X540 specific registers  */
+	regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+
+	/* 82599 X540 specific DCB registers  */
+	regs_buff[1129] = IXGBE_READ_REG(hw, IXGBE_RTRUP2TC);
+	regs_buff[1130] = IXGBE_READ_REG(hw, IXGBE_RTTUP2TC);
+	for (i = 0; i < 4; i++)
+		regs_buff[1131 + i] = IXGBE_READ_REG(hw, IXGBE_TXLLQ(i));
+	regs_buff[1135] = IXGBE_READ_REG(hw, IXGBE_RTTBCNRM);
+					/* same as RTTQCNRM */
+	regs_buff[1136] = IXGBE_READ_REG(hw, IXGBE_RTTBCNRD);
+					/* same as RTTQCNRR */
+
+	/* X540 specific DCB registers  */
+	regs_buff[1137] = IXGBE_READ_REG(hw, IXGBE_RTTQCNCR);
+	regs_buff[1138] = IXGBE_READ_REG(hw, IXGBE_RTTQCNTG);
+
+	/* Security config registers */
+	regs_buff[1139] = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+	regs_buff[1140] = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT);
+	regs_buff[1141] = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF);
+	regs_buff[1142] = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	regs_buff[1143] = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	regs_buff[1144] = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT);
+}
+
+static int ixgbe_get_eeprom_len(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	return adapter->hw.eeprom.word_size * 2;
+}
+
+static int ixgbe_get_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	int first_word, last_word, eeprom_len;
+	int ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_len = last_word - first_word + 1;
+
+	eeprom_buff = kmalloc_array(eeprom_len, sizeof(u16), GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ret_val = hw->eeprom.ops.read_buffer(hw, first_word, eeprom_len,
+					     eeprom_buff);
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < eeprom_len; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
+	kfree(eeprom_buff);
+
+	return ret_val;
+}
+
+static int ixgbe_set_eeprom(struct net_device *netdev,
+			    struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 *eeprom_buff;
+	void *ptr;
+	int max_len, first_word, last_word, ret_val = 0;
+	u16 i;
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
+		return -EINVAL;
+
+	max_len = hw->eeprom.word_size * 2;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	ptr = eeprom_buff;
+
+	if (eeprom->offset & 1) {
+		/*
+		 * need read/modify/write of first changed EEPROM word
+		 * only the second byte of the word is being modified
+		 */
+		ret_val = hw->eeprom.ops.read(hw, first_word, &eeprom_buff[0]);
+		if (ret_val)
+			goto err;
+
+		ptr++;
+	}
+	if ((eeprom->offset + eeprom->len) & 1) {
+		/*
+		 * need read/modify/write of last changed EEPROM word
+		 * only the first byte of the word is being modified
+		 */
+		ret_val = hw->eeprom.ops.read(hw, last_word,
+					  &eeprom_buff[last_word - first_word]);
+		if (ret_val)
+			goto err;
+	}
+
+	/* Device's eeprom is always little-endian, word addressable */
+	for (i = 0; i < last_word - first_word + 1; i++)
+		le16_to_cpus(&eeprom_buff[i]);
+
+	memcpy(ptr, bytes, eeprom->len);
+
+	for (i = 0; i < last_word - first_word + 1; i++)
+		cpu_to_le16s(&eeprom_buff[i]);
+
+	ret_val = hw->eeprom.ops.write_buffer(hw, first_word,
+					      last_word - first_word + 1,
+					      eeprom_buff);
+
+	/* Update the checksum */
+	if (ret_val == 0)
+		hw->eeprom.ops.update_checksum(hw);
+
+err:
+	kfree(eeprom_buff);
+	return ret_val;
+}
+
+static void ixgbe_get_drvinfo(struct net_device *netdev,
+			      struct ethtool_drvinfo *drvinfo)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	strscpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver));
+
+	strscpy(drvinfo->fw_version, adapter->eeprom_id,
+		sizeof(drvinfo->fw_version));
+
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IXGBE_PRIV_FLAGS_STR_LEN;
+}
+
+static u32 ixgbe_get_max_rxd(struct ixgbe_adapter *adapter)
+{
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		return IXGBE_MAX_RXD_82598;
+	case ixgbe_mac_82599EB:
+		return IXGBE_MAX_RXD_82599;
+	case ixgbe_mac_X540:
+		return IXGBE_MAX_RXD_X540;
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		return IXGBE_MAX_RXD_X550;
+	default:
+		return IXGBE_MAX_RXD_82598;
+	}
+}
+
+static u32 ixgbe_get_max_txd(struct ixgbe_adapter *adapter)
+{
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		return IXGBE_MAX_TXD_82598;
+	case ixgbe_mac_82599EB:
+		return IXGBE_MAX_TXD_82599;
+	case ixgbe_mac_X540:
+		return IXGBE_MAX_TXD_X540;
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		return IXGBE_MAX_TXD_X550;
+	default:
+		return IXGBE_MAX_TXD_82598;
+	}
+}
+
+static void ixgbe_get_ringparam(struct net_device *netdev,
+				struct ethtool_ringparam *ring,
+				struct kernel_ethtool_ringparam *kernel_ring,
+				struct netlink_ext_ack *extack)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_ring *tx_ring = adapter->tx_ring[0];
+	struct ixgbe_ring *rx_ring = adapter->rx_ring[0];
+
+	ring->rx_max_pending = ixgbe_get_max_rxd(adapter);
+	ring->tx_max_pending = ixgbe_get_max_txd(adapter);
+	ring->rx_pending = rx_ring->count;
+	ring->tx_pending = tx_ring->count;
+}
+
+static int ixgbe_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring,
+			       struct kernel_ethtool_ringparam *kernel_ring,
+			       struct netlink_ext_ack *extack)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_ring *temp_ring;
+	int i, j, err = 0;
+	u32 new_rx_count, new_tx_count;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_tx_count = clamp_t(u32, ring->tx_pending,
+			       IXGBE_MIN_TXD, ixgbe_get_max_txd(adapter));
+	new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	new_rx_count = clamp_t(u32, ring->rx_pending,
+			       IXGBE_MIN_RXD, ixgbe_get_max_rxd(adapter));
+	new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	if ((new_tx_count == adapter->tx_ring_count) &&
+	    (new_rx_count == adapter->rx_ring_count)) {
+		/* nothing to do */
+		return 0;
+	}
+
+	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			adapter->xdp_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->xdp_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	/* allocate temporary buffer to store rings in */
+	i = max_t(int, adapter->num_tx_queues + adapter->num_xdp_queues,
+		  adapter->num_rx_queues);
+	temp_ring = vmalloc(array_size(i, sizeof(struct ixgbe_ring)));
+
+	if (!temp_ring) {
+		err = -ENOMEM;
+		goto clear_reset;
+	}
+
+	ixgbe_down(adapter);
+
+	/*
+	 * Setup new Tx resources and free the old Tx resources in that order.
+	 * We can then assign the new resources to the rings via a memcpy.
+	 * The advantage to this approach is that we are guaranteed to still
+	 * have resources even in the case of an allocation failure.
+	 */
+	if (new_tx_count != adapter->tx_ring_count) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->tx_ring[i],
+			       sizeof(struct ixgbe_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = ixgbe_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbe_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (j = 0; j < adapter->num_xdp_queues; j++, i++) {
+			memcpy(&temp_ring[i], adapter->xdp_ring[j],
+			       sizeof(struct ixgbe_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = ixgbe_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbe_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			ixgbe_free_tx_resources(adapter->tx_ring[i]);
+
+			memcpy(adapter->tx_ring[i], &temp_ring[i],
+			       sizeof(struct ixgbe_ring));
+		}
+		for (j = 0; j < adapter->num_xdp_queues; j++, i++) {
+			ixgbe_free_tx_resources(adapter->xdp_ring[j]);
+
+			memcpy(adapter->xdp_ring[j], &temp_ring[i],
+			       sizeof(struct ixgbe_ring));
+		}
+
+		adapter->tx_ring_count = new_tx_count;
+	}
+
+	/* Repeat the process for the Rx rings if needed */
+	if (new_rx_count != adapter->rx_ring_count) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			memcpy(&temp_ring[i], adapter->rx_ring[i],
+			       sizeof(struct ixgbe_ring));
+
+			/* Clear copied XDP RX-queue info */
+			memset(&temp_ring[i].xdp_rxq, 0,
+			       sizeof(temp_ring[i].xdp_rxq));
+
+			temp_ring[i].count = new_rx_count;
+			err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbe_free_rx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			ixgbe_free_rx_resources(adapter->rx_ring[i]);
+
+			memcpy(adapter->rx_ring[i], &temp_ring[i],
+			       sizeof(struct ixgbe_ring));
+		}
+
+		adapter->rx_ring_count = new_rx_count;
+	}
+
+err_setup:
+	ixgbe_up(adapter);
+	vfree(temp_ring);
+clear_reset:
+	clear_bit(__IXGBE_RESETTING, &adapter->state);
+	return err;
+}
+
+static int ixgbe_get_sset_count(struct net_device *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_TEST:
+		return IXGBE_TEST_LEN;
+	case ETH_SS_STATS:
+		return IXGBE_STATS_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IXGBE_PRIV_FLAGS_STR_LEN;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void ixgbe_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats *stats, u64 *data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *net_stats;
+	unsigned int start;
+	struct ixgbe_ring *ring;
+	int i, j;
+	char *p = NULL;
+
+	ixgbe_update_stats(adapter);
+	net_stats = dev_get_stats(netdev, &temp);
+	for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
+		switch (ixgbe_gstrings_stats[i].type) {
+		case NETDEV_STATS:
+			p = (char *) net_stats +
+					ixgbe_gstrings_stats[i].stat_offset;
+			break;
+		case IXGBE_STATS:
+			p = (char *) adapter +
+					ixgbe_gstrings_stats[i].stat_offset;
+			break;
+		default:
+			data[i] = 0;
+			continue;
+		}
+
+		data[i] = (ixgbe_gstrings_stats[i].sizeof_stat ==
+			   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+	for (j = 0; j < netdev->num_tx_queues; j++) {
+		ring = adapter->tx_ring[j];
+		if (!ring) {
+			data[i] = 0;
+			data[i+1] = 0;
+			i += 2;
+			continue;
+		}
+
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			data[i]   = ring->stats.packets;
+			data[i+1] = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		i += 2;
+	}
+	for (j = 0; j < IXGBE_NUM_RX_QUEUES; j++) {
+		ring = adapter->rx_ring[j];
+		if (!ring) {
+			data[i] = 0;
+			data[i+1] = 0;
+			i += 2;
+			continue;
+		}
+
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			data[i]   = ring->stats.packets;
+			data[i+1] = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		i += 2;
+	}
+
+	for (j = 0; j < IXGBE_MAX_PACKET_BUFFERS; j++) {
+		data[i++] = adapter->stats.pxontxc[j];
+		data[i++] = adapter->stats.pxofftxc[j];
+	}
+	for (j = 0; j < IXGBE_MAX_PACKET_BUFFERS; j++) {
+		data[i++] = adapter->stats.pxonrxc[j];
+		data[i++] = adapter->stats.pxoffrxc[j];
+	}
+}
+
+static void ixgbe_get_strings(struct net_device *netdev, u32 stringset,
+			      u8 *data)
+{
+	unsigned int i;
+	u8 *p = data;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		for (i = 0; i < IXGBE_TEST_LEN; i++)
+			ethtool_sprintf(&p, ixgbe_gstrings_test[i]);
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++)
+			ethtool_sprintf(&p,
+					ixgbe_gstrings_stats[i].stat_string);
+		for (i = 0; i < netdev->num_tx_queues; i++) {
+			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
+		}
+		for (i = 0; i < IXGBE_NUM_RX_QUEUES; i++) {
+			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
+			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
+		}
+		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
+			ethtool_sprintf(&p, "tx_pb_%u_pxon", i);
+			ethtool_sprintf(&p, "tx_pb_%u_pxoff", i);
+		}
+		for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) {
+			ethtool_sprintf(&p, "rx_pb_%u_pxon", i);
+			ethtool_sprintf(&p, "rx_pb_%u_pxoff", i);
+		}
+		/* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, ixgbe_priv_flags_strings,
+		       IXGBE_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+	}
+}
+
+static int ixgbe_link_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool link_up;
+	u32 link_speed = 0;
+
+	if (ixgbe_removed(hw->hw_addr)) {
+		*data = 1;
+		return 1;
+	}
+	*data = 0;
+
+	hw->mac.ops.check_link(hw, &link_speed, &link_up, true);
+	if (link_up)
+		return *data;
+	else
+		*data = 1;
+	return *data;
+}
+
+/* ethtool register test data */
+struct ixgbe_reg_test {
+	u16 reg;
+	u8  array_len;
+	u8  test_type;
+	u32 mask;
+	u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x40 bytes apart, or in contiguous tables.  We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST	1
+#define SET_READ_TEST	2
+#define WRITE_NO_TEST	3
+#define TABLE32_TEST	4
+#define TABLE64_TEST_LO	5
+#define TABLE64_TEST_HI	6
+
+/* default 82599 register test */
+static const struct ixgbe_reg_test reg_test_82599[] = {
+	{ IXGBE_FCRTL_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCRTH_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
+	{ IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 },
+	{ IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
+	{ IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
+	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFF80 },
+	{ IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000001, 0x00000001 },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF },
+	{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ .reg = 0 }
+};
+
+/* default 82598 register test */
+static const struct ixgbe_reg_test reg_test_82598[] = {
+	{ IXGBE_FCRTL(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 },
+	{ IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	/* Enable all four RX queues before testing. */
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
+	/* RDH is read-only for 82598, only test RDT. */
+	{ IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 },
+	{ IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 },
+	{ IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TIPG, 1, PATTERN_TEST, 0x000000FF, 0x000000FF },
+	{ IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000003, 0x00000003 },
+	{ IXGBE_DTXCTL, 1, SET_READ_TEST, 0x00000005, 0x00000005 },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF },
+	{ IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ .reg = 0 }
+};
+
+static bool reg_pattern_test(struct ixgbe_adapter *adapter, u64 *data, int reg,
+			     u32 mask, u32 write)
+{
+	u32 pat, val, before;
+	static const u32 test_pattern[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+
+	if (ixgbe_removed(adapter->hw.hw_addr)) {
+		*data = 1;
+		return true;
+	}
+	for (pat = 0; pat < ARRAY_SIZE(test_pattern); pat++) {
+		before = ixgbe_read_reg(&adapter->hw, reg);
+		ixgbe_write_reg(&adapter->hw, reg, test_pattern[pat] & write);
+		val = ixgbe_read_reg(&adapter->hw, reg);
+		if (val != (test_pattern[pat] & write & mask)) {
+			e_err(drv, "pattern test reg %04X failed: got 0x%08X expected 0x%08X\n",
+			      reg, val, (test_pattern[pat] & write & mask));
+			*data = reg;
+			ixgbe_write_reg(&adapter->hw, reg, before);
+			return true;
+		}
+		ixgbe_write_reg(&adapter->hw, reg, before);
+	}
+	return false;
+}
+
+static bool reg_set_and_check(struct ixgbe_adapter *adapter, u64 *data, int reg,
+			      u32 mask, u32 write)
+{
+	u32 val, before;
+
+	if (ixgbe_removed(adapter->hw.hw_addr)) {
+		*data = 1;
+		return true;
+	}
+	before = ixgbe_read_reg(&adapter->hw, reg);
+	ixgbe_write_reg(&adapter->hw, reg, write & mask);
+	val = ixgbe_read_reg(&adapter->hw, reg);
+	if ((write & mask) != (val & mask)) {
+		e_err(drv, "set/check reg %04X test failed: got 0x%08X expected 0x%08X\n",
+		      reg, (val & mask), (write & mask));
+		*data = reg;
+		ixgbe_write_reg(&adapter->hw, reg, before);
+		return true;
+	}
+	ixgbe_write_reg(&adapter->hw, reg, before);
+	return false;
+}
+
+static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	const struct ixgbe_reg_test *test;
+	u32 value, before, after;
+	u32 i, toggle;
+
+	if (ixgbe_removed(adapter->hw.hw_addr)) {
+		e_err(drv, "Adapter removed - register test blocked\n");
+		*data = 1;
+		return 1;
+	}
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		toggle = 0x7FFFF3FF;
+		test = reg_test_82598;
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		toggle = 0x7FFFF30F;
+		test = reg_test_82599;
+		break;
+	default:
+		*data = 1;
+		return 1;
+	}
+
+	/*
+	 * Because the status register is such a special case,
+	 * we handle it separately from the rest of the register
+	 * tests.  Some bits are read-only, some toggle, and some
+	 * are writeable on newer MACs.
+	 */
+	before = ixgbe_read_reg(&adapter->hw, IXGBE_STATUS);
+	value = (ixgbe_read_reg(&adapter->hw, IXGBE_STATUS) & toggle);
+	ixgbe_write_reg(&adapter->hw, IXGBE_STATUS, toggle);
+	after = ixgbe_read_reg(&adapter->hw, IXGBE_STATUS) & toggle;
+	if (value != after) {
+		e_err(drv, "failed STATUS register test got: 0x%08X expected: 0x%08X\n",
+		      after, value);
+		*data = 1;
+		return 1;
+	}
+	/* restore previous status */
+	ixgbe_write_reg(&adapter->hw, IXGBE_STATUS, before);
+
+	/*
+	 * Perform the remainder of the register test, looping through
+	 * the test table until we either fail or reach the null entry.
+	 */
+	while (test->reg) {
+		for (i = 0; i < test->array_len; i++) {
+			bool b = false;
+
+			switch (test->test_type) {
+			case PATTERN_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 0x40),
+						     test->mask,
+						     test->write);
+				break;
+			case SET_READ_TEST:
+				b = reg_set_and_check(adapter, data,
+						      test->reg + (i * 0x40),
+						      test->mask,
+						      test->write);
+				break;
+			case WRITE_NO_TEST:
+				ixgbe_write_reg(&adapter->hw,
+						test->reg + (i * 0x40),
+						test->write);
+				break;
+			case TABLE32_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 4),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE64_TEST_LO:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE64_TEST_HI:
+				b = reg_pattern_test(adapter, data,
+						     (test->reg + 4) + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			}
+			if (b)
+				return 1;
+		}
+		test++;
+	}
+
+	*data = 0;
+	return 0;
+}
+
+static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	if (hw->eeprom.ops.validate_checksum(hw, NULL))
+		*data = 1;
+	else
+		*data = 0;
+	return *data;
+}
+
+static irqreturn_t ixgbe_test_intr(int irq, void *data)
+{
+	struct net_device *netdev = (struct net_device *) data;
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR);
+
+	return IRQ_HANDLED;
+}
+
+static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	struct net_device *netdev = adapter->netdev;
+	u32 mask, i = 0, shared_int = true;
+	u32 irq = adapter->pdev->irq;
+
+	*data = 0;
+
+	/* Hook up test interrupt handler just for this test */
+	if (adapter->msix_entries) {
+		/* NOTE: we don't test MSI-X interrupts here, yet */
+		return 0;
+	} else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
+		shared_int = false;
+		if (request_irq(irq, ixgbe_test_intr, 0, netdev->name,
+				netdev)) {
+			*data = 1;
+			return -1;
+		}
+	} else if (!request_irq(irq, ixgbe_test_intr, IRQF_PROBE_SHARED,
+				netdev->name, netdev)) {
+		shared_int = false;
+	} else if (request_irq(irq, ixgbe_test_intr, IRQF_SHARED,
+			       netdev->name, netdev)) {
+		*data = 1;
+		return -1;
+	}
+	e_info(hw, "testing %s interrupt\n", shared_int ?
+	       "shared" : "unshared");
+
+	/* Disable all the interrupts */
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
+	IXGBE_WRITE_FLUSH(&adapter->hw);
+	usleep_range(10000, 20000);
+
+	/* Test each interrupt */
+	for (; i < 10; i++) {
+		/* Interrupt to test */
+		mask = BIT(i);
+
+		if (!shared_int) {
+			/*
+			 * Disable the interrupts to be reported in
+			 * the cause register and then force the same
+			 * interrupt and see if one gets posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_FLUSH(&adapter->hw);
+			usleep_range(10000, 20000);
+
+			if (adapter->test_icr & mask) {
+				*data = 3;
+				break;
+			}
+		}
+
+		/*
+		 * Enable the interrupt to be reported in the cause
+		 * register and then force the same interrupt and see
+		 * if one gets posted.  If an interrupt was not posted
+		 * to the bus, the test failed.
+		 */
+		adapter->test_icr = 0;
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
+		IXGBE_WRITE_FLUSH(&adapter->hw);
+		usleep_range(10000, 20000);
+
+		if (!(adapter->test_icr & mask)) {
+			*data = 4;
+			break;
+		}
+
+		if (!shared_int) {
+			/*
+			 * Disable the other interrupts to be reported in
+			 * the cause register and then force the other
+			 * interrupts and see if any get posted.  If
+			 * an interrupt was posted to the bus, the
+			 * test failed.
+			 */
+			adapter->test_icr = 0;
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS,
+					~mask & 0x00007FFF);
+			IXGBE_WRITE_FLUSH(&adapter->hw);
+			usleep_range(10000, 20000);
+
+			if (adapter->test_icr) {
+				*data = 5;
+				break;
+			}
+		}
+	}
+
+	/* Disable all the interrupts */
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
+	IXGBE_WRITE_FLUSH(&adapter->hw);
+	usleep_range(10000, 20000);
+
+	/* Unhook test interrupt handler */
+	free_irq(irq, netdev);
+
+	return *data;
+}
+
+static void ixgbe_free_desc_rings(struct ixgbe_adapter *adapter)
+{
+	/* Shut down the DMA engines now so they can be reinitialized later,
+	 * since the test rings and normally used rings should overlap on
+	 * queue 0 we can just use the standard disable Rx/Tx calls and they
+	 * will take care of disabling the test rings for us.
+	 */
+
+	/* first Rx */
+	ixgbe_disable_rx(adapter);
+
+	/* now Tx */
+	ixgbe_disable_tx(adapter);
+
+	ixgbe_reset(adapter);
+
+	ixgbe_free_tx_resources(&adapter->test_tx_ring);
+	ixgbe_free_rx_resources(&adapter->test_rx_ring);
+}
+
+static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ring *tx_ring = &adapter->test_tx_ring;
+	struct ixgbe_ring *rx_ring = &adapter->test_rx_ring;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 rctl, reg_data;
+	int ret_val;
+	int err;
+
+	/* Setup Tx descriptor ring and Tx buffers */
+	tx_ring->count = IXGBE_DEFAULT_TXD;
+	tx_ring->queue_index = 0;
+	tx_ring->dev = &adapter->pdev->dev;
+	tx_ring->netdev = adapter->netdev;
+	tx_ring->reg_idx = adapter->tx_ring[0]->reg_idx;
+
+	err = ixgbe_setup_tx_resources(tx_ring);
+	if (err)
+		return 1;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_DMATXCTL);
+		reg_data |= IXGBE_DMATXCTL_TE;
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_DMATXCTL, reg_data);
+		break;
+	default:
+		break;
+	}
+
+	ixgbe_configure_tx_ring(adapter, tx_ring);
+
+	/* Setup Rx Descriptor ring and Rx buffers */
+	rx_ring->count = IXGBE_DEFAULT_RXD;
+	rx_ring->queue_index = 0;
+	rx_ring->dev = &adapter->pdev->dev;
+	rx_ring->netdev = adapter->netdev;
+	rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx;
+
+	err = ixgbe_setup_rx_resources(adapter, rx_ring);
+	if (err) {
+		ret_val = 4;
+		goto err_nomem;
+	}
+
+	hw->mac.ops.disable_rx(hw);
+
+	ixgbe_configure_rx_ring(adapter, rx_ring);
+
+	rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_RXCTRL);
+	rctl |= IXGBE_RXCTRL_DMBYPS;
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_RXCTRL, rctl);
+
+	hw->mac.ops.enable_rx(hw);
+
+	return 0;
+
+err_nomem:
+	ixgbe_free_desc_rings(adapter);
+	return ret_val;
+}
+
+static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg_data;
+
+
+	/* Setup MAC loopback */
+	reg_data = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	reg_data |= IXGBE_HLREG0_LPBK;
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_data);
+
+	reg_data = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+	reg_data |= IXGBE_FCTRL_BAM | IXGBE_FCTRL_SBP | IXGBE_FCTRL_MPE;
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_data);
+
+	/* X540 and X550 needs to set the MACC.FLU bit to force link up */
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		reg_data = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg_data |= IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data);
+		break;
+	default:
+		if (hw->mac.orig_autoc) {
+			reg_data = hw->mac.orig_autoc | IXGBE_AUTOC_FLU;
+			IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_data);
+		} else {
+			return 10;
+		}
+	}
+	IXGBE_WRITE_FLUSH(hw);
+	usleep_range(10000, 20000);
+
+	/* Disable Atlas Tx lanes; re-enabled in reset path */
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		u8 atlas;
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_REG_EN;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, atlas);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, atlas);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, atlas);
+
+		hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, &atlas);
+		atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL;
+		hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, atlas);
+	}
+
+	return 0;
+}
+
+static void ixgbe_loopback_cleanup(struct ixgbe_adapter *adapter)
+{
+	u32 reg_data;
+
+	reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_HLREG0);
+	reg_data &= ~IXGBE_HLREG0_LPBK;
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_HLREG0, reg_data);
+}
+
+static void ixgbe_create_lbtest_frame(struct sk_buff *skb,
+				      unsigned int frame_size)
+{
+	memset(skb->data, 0xFF, frame_size);
+	frame_size >>= 1;
+	memset(&skb->data[frame_size], 0xAA, frame_size / 2 - 1);
+	skb->data[frame_size + 10] = 0xBE;
+	skb->data[frame_size + 12] = 0xAF;
+}
+
+static bool ixgbe_check_lbtest_frame(struct ixgbe_rx_buffer *rx_buffer,
+				     unsigned int frame_size)
+{
+	unsigned char *data;
+
+	frame_size >>= 1;
+
+	data = page_address(rx_buffer->page) + rx_buffer->page_offset;
+
+	return data[3] == 0xFF && data[frame_size + 10] == 0xBE &&
+		data[frame_size + 12] == 0xAF;
+}
+
+static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
+				  struct ixgbe_ring *tx_ring,
+				  unsigned int size)
+{
+	union ixgbe_adv_rx_desc *rx_desc;
+	u16 rx_ntc, tx_ntc, count = 0;
+
+	/* initialize next to clean and descriptor values */
+	rx_ntc = rx_ring->next_to_clean;
+	tx_ntc = tx_ring->next_to_clean;
+	rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc);
+
+	while (tx_ntc != tx_ring->next_to_use) {
+		union ixgbe_adv_tx_desc *tx_desc;
+		struct ixgbe_tx_buffer *tx_buffer;
+
+		tx_desc = IXGBE_TX_DESC(tx_ring, tx_ntc);
+
+		/* if DD is not set transmit has not completed */
+		if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+			return count;
+
+		/* unmap buffer on Tx side */
+		tx_buffer = &tx_ring->tx_buffer_info[tx_ntc];
+
+		/* Free all the Tx ring sk_buffs */
+		dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* increment Tx next to clean counter */
+		tx_ntc++;
+		if (tx_ntc == tx_ring->count)
+			tx_ntc = 0;
+	}
+
+	while (rx_desc->wb.upper.length) {
+		struct ixgbe_rx_buffer *rx_buffer;
+
+		/* check Rx buffer */
+		rx_buffer = &rx_ring->rx_buffer_info[rx_ntc];
+
+		/* sync Rx buffer for CPU read */
+		dma_sync_single_for_cpu(rx_ring->dev,
+					rx_buffer->dma,
+					ixgbe_rx_bufsz(rx_ring),
+					DMA_FROM_DEVICE);
+
+		/* verify contents of skb */
+		if (ixgbe_check_lbtest_frame(rx_buffer, size))
+			count++;
+		else
+			break;
+
+		/* sync Rx buffer for device write */
+		dma_sync_single_for_device(rx_ring->dev,
+					   rx_buffer->dma,
+					   ixgbe_rx_bufsz(rx_ring),
+					   DMA_FROM_DEVICE);
+
+		/* increment Rx next to clean counter */
+		rx_ntc++;
+		if (rx_ntc == rx_ring->count)
+			rx_ntc = 0;
+
+		/* fetch next descriptor */
+		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc);
+	}
+
+	netdev_tx_reset_queue(txring_txq(tx_ring));
+
+	/* re-map buffers to ring, store next to clean values */
+	ixgbe_alloc_rx_buffers(rx_ring, count);
+	rx_ring->next_to_clean = rx_ntc;
+	tx_ring->next_to_clean = tx_ntc;
+
+	return count;
+}
+
+static int ixgbe_run_loopback_test(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ring *tx_ring = &adapter->test_tx_ring;
+	struct ixgbe_ring *rx_ring = &adapter->test_rx_ring;
+	int i, j, lc, good_cnt, ret_val = 0;
+	unsigned int size = 1024;
+	netdev_tx_t tx_ret_val;
+	struct sk_buff *skb;
+	u32 flags_orig = adapter->flags;
+
+	/* DCB can modify the frames on Tx */
+	adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
+
+	/* allocate test skb */
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (!skb)
+		return 11;
+
+	/* place data into test skb */
+	ixgbe_create_lbtest_frame(skb, size);
+	skb_put(skb, size);
+
+	/*
+	 * Calculate the loop count based on the largest descriptor ring
+	 * The idea is to wrap the largest ring a number of times using 64
+	 * send/receive pairs during each loop
+	 */
+
+	if (rx_ring->count <= tx_ring->count)
+		lc = ((tx_ring->count / 64) * 2) + 1;
+	else
+		lc = ((rx_ring->count / 64) * 2) + 1;
+
+	for (j = 0; j <= lc; j++) {
+		/* reset count of good packets */
+		good_cnt = 0;
+
+		/* place 64 packets on the transmit queue*/
+		for (i = 0; i < 64; i++) {
+			skb_get(skb);
+			tx_ret_val = ixgbe_xmit_frame_ring(skb,
+							   adapter,
+							   tx_ring);
+			if (tx_ret_val == NETDEV_TX_OK)
+				good_cnt++;
+		}
+
+		if (good_cnt != 64) {
+			ret_val = 12;
+			break;
+		}
+
+		/* allow 200 milliseconds for packets to go from Tx to Rx */
+		msleep(200);
+
+		good_cnt = ixgbe_clean_test_rings(rx_ring, tx_ring, size);
+		if (good_cnt != 64) {
+			ret_val = 13;
+			break;
+		}
+	}
+
+	/* free the original skb */
+	kfree_skb(skb);
+	adapter->flags = flags_orig;
+
+	return ret_val;
+}
+
+static int ixgbe_loopback_test(struct ixgbe_adapter *adapter, u64 *data)
+{
+	*data = ixgbe_setup_desc_rings(adapter);
+	if (*data)
+		goto out;
+	*data = ixgbe_setup_loopback_test(adapter);
+	if (*data)
+		goto err_loopback;
+	*data = ixgbe_run_loopback_test(adapter);
+	ixgbe_loopback_cleanup(adapter);
+
+err_loopback:
+	ixgbe_free_desc_rings(adapter);
+out:
+	return *data;
+}
+
+static void ixgbe_diag_test(struct net_device *netdev,
+			    struct ethtool_test *eth_test, u64 *data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+
+	if (ixgbe_removed(adapter->hw.hw_addr)) {
+		e_err(hw, "Adapter removed - test blocked\n");
+		data[0] = 1;
+		data[1] = 1;
+		data[2] = 1;
+		data[3] = 1;
+		data[4] = 1;
+		eth_test->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+	set_bit(__IXGBE_TESTING, &adapter->state);
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		struct ixgbe_hw *hw = &adapter->hw;
+
+		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+			int i;
+			for (i = 0; i < adapter->num_vfs; i++) {
+				if (adapter->vfinfo[i].clear_to_send) {
+					netdev_warn(netdev, "offline diagnostic is not supported when VFs are present\n");
+					data[0] = 1;
+					data[1] = 1;
+					data[2] = 1;
+					data[3] = 1;
+					data[4] = 1;
+					eth_test->flags |= ETH_TEST_FL_FAILED;
+					clear_bit(__IXGBE_TESTING,
+						  &adapter->state);
+					return;
+				}
+			}
+		}
+
+		/* Offline tests */
+		e_info(hw, "offline testing starting\n");
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result
+		 */
+		if (ixgbe_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (if_running)
+			/* indicate we're in test mode */
+			ixgbe_close(netdev);
+		else
+			ixgbe_reset(adapter);
+
+		e_info(hw, "register testing starting\n");
+		if (ixgbe_reg_test(adapter, &data[0]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		ixgbe_reset(adapter);
+		e_info(hw, "eeprom testing starting\n");
+		if (ixgbe_eeprom_test(adapter, &data[1]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		ixgbe_reset(adapter);
+		e_info(hw, "interrupt testing starting\n");
+		if (ixgbe_intr_test(adapter, &data[2]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* If SRIOV or VMDq is enabled then skip MAC
+		 * loopback diagnostic. */
+		if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
+				      IXGBE_FLAG_VMDQ_ENABLED)) {
+			e_info(hw, "Skip MAC loopback diagnostic in VT mode\n");
+			data[3] = 0;
+			goto skip_loopback;
+		}
+
+		ixgbe_reset(adapter);
+		e_info(hw, "loopback testing starting\n");
+		if (ixgbe_loopback_test(adapter, &data[3]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+skip_loopback:
+		ixgbe_reset(adapter);
+
+		/* clear testing bit and return adapter to previous state */
+		clear_bit(__IXGBE_TESTING, &adapter->state);
+		if (if_running)
+			ixgbe_open(netdev);
+		else if (hw->mac.ops.disable_tx_laser)
+			hw->mac.ops.disable_tx_laser(hw);
+	} else {
+		e_info(hw, "online testing starting\n");
+
+		/* Online tests */
+		if (ixgbe_link_test(adapter, &data[4]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Offline tests aren't run; pass by default */
+		data[0] = 0;
+		data[1] = 0;
+		data[2] = 0;
+		data[3] = 0;
+
+		clear_bit(__IXGBE_TESTING, &adapter->state);
+	}
+}
+
+static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter,
+			       struct ethtool_wolinfo *wol)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int retval = 0;
+
+	/* WOL not supported for all devices */
+	if (!ixgbe_wol_supported(adapter, hw->device_id,
+				 hw->subsystem_device_id)) {
+		retval = 1;
+		wol->supported = 0;
+	}
+
+	return retval;
+}
+
+static void ixgbe_get_wol(struct net_device *netdev,
+			  struct ethtool_wolinfo *wol)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	wol->supported = WAKE_UCAST | WAKE_MCAST |
+			 WAKE_BCAST | WAKE_MAGIC;
+	wol->wolopts = 0;
+
+	if (ixgbe_wol_exclusion(adapter, wol) ||
+	    !device_can_wakeup(&adapter->pdev->dev))
+		return;
+
+	if (adapter->wol & IXGBE_WUFC_EX)
+		wol->wolopts |= WAKE_UCAST;
+	if (adapter->wol & IXGBE_WUFC_MC)
+		wol->wolopts |= WAKE_MCAST;
+	if (adapter->wol & IXGBE_WUFC_BC)
+		wol->wolopts |= WAKE_BCAST;
+	if (adapter->wol & IXGBE_WUFC_MAG)
+		wol->wolopts |= WAKE_MAGIC;
+}
+
+static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE |
+			    WAKE_FILTER))
+		return -EOPNOTSUPP;
+
+	if (ixgbe_wol_exclusion(adapter, wol))
+		return wol->wolopts ? -EOPNOTSUPP : 0;
+
+	adapter->wol = 0;
+
+	if (wol->wolopts & WAKE_UCAST)
+		adapter->wol |= IXGBE_WUFC_EX;
+	if (wol->wolopts & WAKE_MCAST)
+		adapter->wol |= IXGBE_WUFC_MC;
+	if (wol->wolopts & WAKE_BCAST)
+		adapter->wol |= IXGBE_WUFC_BC;
+	if (wol->wolopts & WAKE_MAGIC)
+		adapter->wol |= IXGBE_WUFC_MAG;
+
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	return 0;
+}
+
+static int ixgbe_nway_reset(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_reinit_locked(adapter);
+
+	return 0;
+}
+
+static int ixgbe_set_phys_id(struct net_device *netdev,
+			     enum ethtool_phys_id_state state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (!hw->mac.ops.led_on || !hw->mac.ops.led_off)
+		return -EOPNOTSUPP;
+
+	switch (state) {
+	case ETHTOOL_ID_ACTIVE:
+		adapter->led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+		return 2;
+
+	case ETHTOOL_ID_ON:
+		hw->mac.ops.led_on(hw, hw->mac.led_link_act);
+		break;
+
+	case ETHTOOL_ID_OFF:
+		hw->mac.ops.led_off(hw, hw->mac.led_link_act);
+		break;
+
+	case ETHTOOL_ID_INACTIVE:
+		/* Restore LED settings */
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_LEDCTL, adapter->led_reg);
+		break;
+	}
+
+	return 0;
+}
+
+static int ixgbe_get_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	/* only valid if in constant ITR mode */
+	if (adapter->rx_itr_setting <= 1)
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+	else
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+	/* if in mixed tx/rx queues per vector mode, report only rx settings */
+	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+		return 0;
+
+	/* only valid if in constant ITR mode */
+	if (adapter->tx_itr_setting <= 1)
+		ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+	else
+		ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+
+	return 0;
+}
+
+/*
+ * this function must be called before setting the new value of
+ * rx_itr_setting
+ */
+static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	/* nothing to do if LRO or RSC are not enabled */
+	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) ||
+	    !(netdev->features & NETIF_F_LRO))
+		return false;
+
+	/* check the feature flag value and enable RSC if necessary */
+	if (adapter->rx_itr_setting == 1 ||
+	    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
+		if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
+			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+			e_info(probe, "rx-usecs value high enough to re-enable RSC\n");
+			return true;
+		}
+	/* if interrupt rate is too high then disable RSC */
+	} else if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+		adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+		e_info(probe, "rx-usecs set too low, disabling RSC\n");
+		return true;
+	}
+	return false;
+}
+
+static int ixgbe_set_coalesce(struct net_device *netdev,
+			      struct ethtool_coalesce *ec,
+			      struct kernel_ethtool_coalesce *kernel_coal,
+			      struct netlink_ext_ack *extack)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_q_vector *q_vector;
+	int i;
+	u16 tx_itr_param, rx_itr_param, tx_itr_prev;
+	bool need_reset = false;
+
+	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count) {
+		/* reject Tx specific changes in case of mixed RxTx vectors */
+		if (ec->tx_coalesce_usecs)
+			return -EINVAL;
+		tx_itr_prev = adapter->rx_itr_setting;
+	} else {
+		tx_itr_prev = adapter->tx_itr_setting;
+	}
+
+	if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) ||
+	    (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)))
+		return -EINVAL;
+
+	if (ec->rx_coalesce_usecs > 1)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+
+	if (adapter->rx_itr_setting == 1)
+		rx_itr_param = IXGBE_20K_ITR;
+	else
+		rx_itr_param = adapter->rx_itr_setting;
+
+	if (ec->tx_coalesce_usecs > 1)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+
+	if (adapter->tx_itr_setting == 1)
+		tx_itr_param = IXGBE_12K_ITR;
+	else
+		tx_itr_param = adapter->tx_itr_setting;
+
+	/* mixed Rx/Tx */
+	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+		adapter->tx_itr_setting = adapter->rx_itr_setting;
+
+	/* detect ITR changes that require update of TXDCTL.WTHRESH */
+	if ((adapter->tx_itr_setting != 1) &&
+	    (adapter->tx_itr_setting < IXGBE_100K_ITR)) {
+		if ((tx_itr_prev == 1) ||
+		    (tx_itr_prev >= IXGBE_100K_ITR))
+			need_reset = true;
+	} else {
+		if ((tx_itr_prev != 1) &&
+		    (tx_itr_prev < IXGBE_100K_ITR))
+			need_reset = true;
+	}
+
+	/* check the old value and enable RSC if necessary */
+	need_reset |= ixgbe_update_rsc(adapter);
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		q_vector = adapter->q_vector[i];
+		if (q_vector->tx.count && !q_vector->rx.count)
+			/* tx only */
+			q_vector->itr = tx_itr_param;
+		else
+			/* rx only or mixed */
+			q_vector->itr = rx_itr_param;
+		ixgbe_write_eitr(q_vector);
+	}
+
+	/*
+	 * do reset here at the end to make sure EITR==0 case is handled
+	 * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings
+	 * also locks in RSC enable/disable which requires reset
+	 */
+	if (need_reset)
+		ixgbe_do_reset(netdev);
+
+	return 0;
+}
+
+static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	union ixgbe_atr_input *mask = &adapter->fdir_mask;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct hlist_node *node2;
+	struct ixgbe_fdir_filter *rule = NULL;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (fsp->location <= rule->sw_idx)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->sw_idx)
+		return -EINVAL;
+
+	/* fill out the flow spec entry */
+
+	/* set flow type field */
+	switch (rule->filter.formatted.flow_type) {
+	case IXGBE_ATR_FLOW_TYPE_TCPV4:
+		fsp->flow_type = TCP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_UDPV4:
+		fsp->flow_type = UDP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_SCTPV4:
+		fsp->flow_type = SCTP_V4_FLOW;
+		break;
+	case IXGBE_ATR_FLOW_TYPE_IPV4:
+		fsp->flow_type = IP_USER_FLOW;
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port;
+	fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port;
+	fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port;
+	fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port;
+	fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0];
+	fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0];
+	fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0];
+	fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id;
+	fsp->m_ext.vlan_tci = mask->formatted.vlan_id;
+	fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes;
+	fsp->m_ext.vlan_etype = mask->formatted.flex_bytes;
+	fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool);
+	fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool);
+	fsp->flow_type |= FLOW_EXT;
+
+	/* record action */
+	if (rule->action == IXGBE_FDIR_DROP_QUEUE)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->action;
+
+	return 0;
+}
+
+static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter,
+				      struct ethtool_rxnfc *cmd,
+				      u32 *rule_locs)
+{
+	struct hlist_node *node2;
+	struct ixgbe_fdir_filter *rule;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = (1024 << adapter->fdir_pballoc) - 2;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[cnt] = rule->sw_idx;
+		cnt++;
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter,
+				   struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+
+	/* Report default options for RSS on ixgbe */
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case UDP_V4_FLOW:
+		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case TCP_V6_FLOW:
+		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case UDP_V6_FLOW:
+		if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+			cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		fallthrough;
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ixgbe_rss_indir_tbl_max(struct ixgbe_adapter *adapter)
+{
+	if (adapter->hw.mac.type < ixgbe_mac_X550)
+		return 16;
+	else
+		return 64;
+}
+
+static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			   u32 *rule_locs)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = min_t(int, adapter->num_rx_queues,
+				  ixgbe_rss_indir_tbl_max(adapter));
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->fdir_filter_count;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = ixgbe_get_ethtool_fdir_all(adapter, cmd, rule_locs);
+		break;
+	case ETHTOOL_GRXFH:
+		ret = ixgbe_get_rss_hash_opts(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+				    struct ixgbe_fdir_filter *input,
+				    u16 sw_idx)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hlist_node *node2;
+	struct ixgbe_fdir_filter *rule, *parent;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		/* hash found, or no matching entry */
+		if (rule->sw_idx >= sw_idx)
+			break;
+		parent = rule;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->sw_idx == sw_idx)) {
+		if (!input || (rule->filter.formatted.bkt_hash !=
+			       input->filter.formatted.bkt_hash)) {
+			err = ixgbe_fdir_erase_perfect_filter_82599(hw,
+								&rule->filter,
+								sw_idx);
+		}
+
+		hlist_del(&rule->fdir_node);
+		kfree(rule);
+		adapter->fdir_filter_count--;
+	}
+
+	/*
+	 * If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
+
+	/* initialize node and set software index */
+	INIT_HLIST_NODE(&input->fdir_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_behind(&input->fdir_node, &parent->fdir_node);
+	else
+		hlist_add_head(&input->fdir_node,
+			       &adapter->fdir_filter_list);
+
+	/* update counts */
+	adapter->fdir_filter_count++;
+
+	return 0;
+}
+
+static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp,
+				       u8 *flow_type)
+{
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+		break;
+	case UDP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+		break;
+	case SCTP_V4_FLOW:
+		*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+		break;
+	case IP_USER_FLOW:
+		switch (fsp->h_u.usr_ip4_spec.proto) {
+		case IPPROTO_TCP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+			break;
+		case IPPROTO_UDP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4;
+			break;
+		case IPPROTO_SCTP:
+			*flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4;
+			break;
+		case 0:
+			if (!fsp->m_u.usr_ip4_spec.proto) {
+				*flow_type = IXGBE_ATR_FLOW_TYPE_IPV4;
+				break;
+			}
+			fallthrough;
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_fdir_filter *input;
+	union ixgbe_atr_input mask;
+	u8 queue;
+	int err;
+
+	if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+		return -EOPNOTSUPP;
+
+	/* ring_cookie is a masked into a set of queues and ixgbe pools or
+	 * we use the drop index.
+	 */
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		queue = IXGBE_FDIR_DROP_QUEUE;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+		if (!vf && (ring >= adapter->num_rx_queues))
+			return -EINVAL;
+		else if (vf &&
+			 ((vf > adapter->num_vfs) ||
+			   ring >= adapter->num_rx_queues_per_pool))
+			return -EINVAL;
+
+		/* Map the ring onto the absolute queue index */
+		if (!vf)
+			queue = adapter->rx_ring[ring]->reg_idx;
+		else
+			queue = ((vf - 1) *
+				adapter->num_rx_queues_per_pool) + ring;
+	}
+
+	/* Don't allow indexes to exist outside of available space */
+	if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) {
+		e_err(drv, "Location out of range\n");
+		return -EINVAL;
+	}
+
+	input = kzalloc(sizeof(*input), GFP_ATOMIC);
+	if (!input)
+		return -ENOMEM;
+
+	memset(&mask, 0, sizeof(union ixgbe_atr_input));
+
+	/* set SW index */
+	input->sw_idx = fsp->location;
+
+	/* record flow type */
+	if (!ixgbe_flowspec_to_flow_type(fsp,
+					 &input->filter.formatted.flow_type)) {
+		e_err(drv, "Unrecognized flow type\n");
+		goto err_out;
+	}
+
+	mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+				   IXGBE_ATR_L4TYPE_MASK;
+
+	if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
+		mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
+
+	/* Copy input into formatted structures */
+	input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+	mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src;
+	input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+	mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst;
+	input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+	mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+	input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+	mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+
+	if (fsp->flow_type & FLOW_EXT) {
+		input->filter.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->h_ext.data[1]);
+		mask.formatted.vm_pool =
+				(unsigned char)ntohl(fsp->m_ext.data[1]);
+		input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci;
+		mask.formatted.vlan_id = fsp->m_ext.vlan_tci;
+		input->filter.formatted.flex_bytes =
+						fsp->h_ext.vlan_etype;
+		mask.formatted.flex_bytes = fsp->m_ext.vlan_etype;
+	}
+
+	/* determine if we need to drop or route the packet */
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC)
+		input->action = IXGBE_FDIR_DROP_QUEUE;
+	else
+		input->action = fsp->ring_cookie;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (hlist_empty(&adapter->fdir_filter_list)) {
+		/* save mask and program input mask into HW */
+		memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
+		err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
+		if (err) {
+			e_err(drv, "Error writing mask\n");
+			goto err_out_w_lock;
+		}
+	} else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
+		e_err(drv, "Only one mask supported per port\n");
+		goto err_out_w_lock;
+	}
+
+	/* apply mask and compute/store hash */
+	ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
+
+	/* program filters to filter memory */
+	err = ixgbe_fdir_write_perfect_filter_82599(hw,
+				&input->filter, input->sw_idx, queue);
+	if (err)
+		goto err_out_w_lock;
+
+	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	return err;
+err_out_w_lock:
+	spin_unlock(&adapter->fdir_perfect_lock);
+err_out:
+	kfree(input);
+	return -EINVAL;
+}
+
+static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
+					struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int err;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, fsp->location);
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	return err;
+}
+
+#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \
+		       IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter,
+				  struct ethtool_rxnfc *nfc)
+{
+	u32 flags2 = adapter->flags2;
+
+	/*
+	 * RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (flags2 != adapter->flags2) {
+		struct ixgbe_hw *hw = &adapter->hw;
+		u32 mrqc;
+		unsigned int pf_pool = adapter->num_vfs;
+
+		if ((hw->mac.type >= ixgbe_mac_X550) &&
+		    (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+			mrqc = IXGBE_READ_REG(hw, IXGBE_PFVFMRQC(pf_pool));
+		else
+			mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+
+		if ((flags2 & UDP_RSS_FLAGS) &&
+		    !(adapter->flags2 & UDP_RSS_FLAGS))
+			e_warn(drv, "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+
+		adapter->flags2 = flags2;
+
+		/* Perform hash on these packet types */
+		mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4
+		      | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
+		      | IXGBE_MRQC_RSS_FIELD_IPV6
+		      | IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
+
+		mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP |
+			  IXGBE_MRQC_RSS_FIELD_IPV6_UDP);
+
+		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
+
+		if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+			mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
+
+		if ((hw->mac.type >= ixgbe_mac_X550) &&
+		    (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+			IXGBE_WRITE_REG(hw, IXGBE_PFVFMRQC(pf_pool), mrqc);
+		else
+			IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+	}
+
+	return 0;
+}
+
+static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXFH:
+		ret = ixgbe_set_rss_hash_opt(adapter, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static u32 ixgbe_get_rxfh_key_size(struct net_device *netdev)
+{
+	return IXGBE_RSS_KEY_SIZE;
+}
+
+static u32 ixgbe_rss_indir_size(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	return ixgbe_rss_indir_tbl_entries(adapter);
+}
+
+static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir)
+{
+	int i, reta_size = ixgbe_rss_indir_tbl_entries(adapter);
+	u16 rss_m = adapter->ring_feature[RING_F_RSS].mask;
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		rss_m = adapter->ring_feature[RING_F_RSS].indices - 1;
+
+	for (i = 0; i < reta_size; i++)
+		indir[i] = adapter->rss_indir_tbl[i] & rss_m;
+}
+
+static int ixgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			  u8 *hfunc)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (indir)
+		ixgbe_get_reta(adapter, indir);
+
+	if (key)
+		memcpy(key, adapter->rss_key, ixgbe_get_rxfh_key_size(netdev));
+
+	return 0;
+}
+
+static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
+			  const u8 *key, const u8 hfunc)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int i;
+	u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
+
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
+	/* Fill out the redirection table */
+	if (indir) {
+		int max_queues = min_t(int, adapter->num_rx_queues,
+				       ixgbe_rss_indir_tbl_max(adapter));
+
+		/*Allow at least 2 queues w/ SR-IOV.*/
+		if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) &&
+		    (max_queues < 2))
+			max_queues = 2;
+
+		/* Verify user input. */
+		for (i = 0; i < reta_entries; i++)
+			if (indir[i] >= max_queues)
+				return -EINVAL;
+
+		for (i = 0; i < reta_entries; i++)
+			adapter->rss_indir_tbl[i] = indir[i];
+
+		ixgbe_store_reta(adapter);
+	}
+
+	/* Fill out the rss hash key */
+	if (key) {
+		memcpy(adapter->rss_key, key, ixgbe_get_rxfh_key_size(netdev));
+		ixgbe_store_key(adapter);
+	}
+
+	return 0;
+}
+
+static int ixgbe_get_ts_info(struct net_device *dev,
+			     struct ethtool_ts_info *info)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	/* we always support timestamping disabled */
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
+		break;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_82599EB:
+		info->rx_filters |=
+			BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+			BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+			BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
+		break;
+	default:
+		return ethtool_op_get_ts_info(dev, info);
+	}
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE |
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	if (adapter->ptp_clock)
+		info->phc_index = ptp_clock_index(adapter->ptp_clock);
+	else
+		info->phc_index = -1;
+
+	info->tx_types =
+		BIT(HWTSTAMP_TX_OFF) |
+		BIT(HWTSTAMP_TX_ON);
+
+	return 0;
+}
+
+static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter)
+{
+	unsigned int max_combined;
+	u8 tcs = adapter->hw_tcs;
+
+	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
+		/* We only support one q_vector without MSI-X */
+		max_combined = 1;
+	} else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		/* Limit value based on the queue mask */
+		max_combined = adapter->ring_feature[RING_F_RSS].mask + 1;
+	} else if (tcs > 1) {
+		/* For DCB report channels per traffic class */
+		if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
+			/* 8 TC w/ 4 queues per TC */
+			max_combined = 4;
+		} else if (tcs > 4) {
+			/* 8 TC w/ 8 queues per TC */
+			max_combined = 8;
+		} else {
+			/* 4 TC w/ 16 queues per TC */
+			max_combined = 16;
+		}
+	} else if (adapter->atr_sample_rate) {
+		/* support up to 64 queues with ATR */
+		max_combined = IXGBE_MAX_FDIR_INDICES;
+	} else {
+		/* support up to 16 queues with RSS */
+		max_combined = ixgbe_max_rss_indices(adapter);
+	}
+
+	return min_t(int, max_combined, num_online_cpus());
+}
+
+static void ixgbe_get_channels(struct net_device *dev,
+			       struct ethtool_channels *ch)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	/* report maximum channels */
+	ch->max_combined = ixgbe_max_channels(adapter);
+
+	/* report info for other vector */
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
+		ch->max_other = NON_Q_VECTORS;
+		ch->other_count = NON_Q_VECTORS;
+	}
+
+	/* record RSS queues */
+	ch->combined_count = adapter->ring_feature[RING_F_RSS].indices;
+
+	/* nothing else to report if RSS is disabled */
+	if (ch->combined_count == 1)
+		return;
+
+	/* we do not support ATR queueing if SR-IOV is enabled */
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		return;
+
+	/* same thing goes for being DCB enabled */
+	if (adapter->hw_tcs > 1)
+		return;
+
+	/* if ATR is disabled we can exit */
+	if (!adapter->atr_sample_rate)
+		return;
+
+	/* report flow director queues as maximum channels */
+	ch->combined_count = adapter->ring_feature[RING_F_FDIR].indices;
+}
+
+static int ixgbe_set_channels(struct net_device *dev,
+			      struct ethtool_channels *ch)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	unsigned int count = ch->combined_count;
+	u8 max_rss_indices = ixgbe_max_rss_indices(adapter);
+
+	/* verify they are not requesting separate vectors */
+	if (!count || ch->rx_count || ch->tx_count)
+		return -EINVAL;
+
+	/* verify other_count has not changed */
+	if (ch->other_count != NON_Q_VECTORS)
+		return -EINVAL;
+
+	/* verify the number of channels does not exceed hardware limits */
+	if (count > ixgbe_max_channels(adapter))
+		return -EINVAL;
+
+	/* update feature limits from largest to smallest supported values */
+	adapter->ring_feature[RING_F_FDIR].limit = count;
+
+	/* cap RSS limit */
+	if (count > max_rss_indices)
+		count = max_rss_indices;
+	adapter->ring_feature[RING_F_RSS].limit = count;
+
+#ifdef IXGBE_FCOE
+	/* cap FCoE limit at 8 */
+	if (count > IXGBE_FCRETA_SIZE)
+		count = IXGBE_FCRETA_SIZE;
+	adapter->ring_feature[RING_F_FCOE].limit = count;
+
+#endif
+	/* use setup TC to update any traffic class queue mapping */
+	return ixgbe_setup_tc(dev, adapter->hw_tcs);
+}
+
+static int ixgbe_get_module_info(struct net_device *dev,
+				       struct ethtool_modinfo *modinfo)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	s32 status;
+	u8 sff8472_rev, addr_mode;
+	bool page_swap = false;
+
+	if (hw->phy.type == ixgbe_phy_fw)
+		return -ENXIO;
+
+	/* Check whether we support SFF-8472 or not */
+	status = hw->phy.ops.read_i2c_eeprom(hw,
+					     IXGBE_SFF_SFF_8472_COMP,
+					     &sff8472_rev);
+	if (status)
+		return -EIO;
+
+	/* addressing mode is not supported */
+	status = hw->phy.ops.read_i2c_eeprom(hw,
+					     IXGBE_SFF_SFF_8472_SWAP,
+					     &addr_mode);
+	if (status)
+		return -EIO;
+
+	if (addr_mode & IXGBE_SFF_ADDRESSING_MODE) {
+		e_err(drv, "Address change required to access page 0xA2, but not supported. Please report the module type to the driver maintainers.\n");
+		page_swap = true;
+	}
+
+	if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap ||
+	    !(addr_mode & IXGBE_SFF_DDM_IMPLEMENTED)) {
+		/* We have a SFP, but it does not support SFF-8472 */
+		modinfo->type = ETH_MODULE_SFF_8079;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+	} else {
+		/* We have a SFP which supports a revision of SFF-8472. */
+		modinfo->type = ETH_MODULE_SFF_8472;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+	}
+
+	return 0;
+}
+
+static int ixgbe_get_module_eeprom(struct net_device *dev,
+					 struct ethtool_eeprom *ee,
+					 u8 *data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	s32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+	u8 databyte = 0xFF;
+	int i = 0;
+
+	if (ee->len == 0)
+		return -EINVAL;
+
+	if (hw->phy.type == ixgbe_phy_fw)
+		return -ENXIO;
+
+	for (i = ee->offset; i < ee->offset + ee->len; i++) {
+		/* I2C reads can take long time */
+		if (test_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
+			return -EBUSY;
+
+		if (i < ETH_MODULE_SFF_8079_LEN)
+			status = hw->phy.ops.read_i2c_eeprom(hw, i, &databyte);
+		else
+			status = hw->phy.ops.read_i2c_sff8472(hw, i, &databyte);
+
+		if (status)
+			return -EIO;
+
+		data[i - ee->offset] = databyte;
+	}
+
+	return 0;
+}
+
+static const struct {
+	ixgbe_link_speed mac_speed;
+	u32 supported;
+} ixgbe_ls_map[] = {
+	{ IXGBE_LINK_SPEED_10_FULL, SUPPORTED_10baseT_Full },
+	{ IXGBE_LINK_SPEED_100_FULL, SUPPORTED_100baseT_Full },
+	{ IXGBE_LINK_SPEED_1GB_FULL, SUPPORTED_1000baseT_Full },
+	{ IXGBE_LINK_SPEED_2_5GB_FULL, SUPPORTED_2500baseX_Full },
+	{ IXGBE_LINK_SPEED_10GB_FULL, SUPPORTED_10000baseT_Full },
+};
+
+static const struct {
+	u32 lp_advertised;
+	u32 mac_speed;
+} ixgbe_lp_map[] = {
+	{ FW_PHY_ACT_UD_2_100M_TX_EEE, SUPPORTED_100baseT_Full },
+	{ FW_PHY_ACT_UD_2_1G_T_EEE, SUPPORTED_1000baseT_Full },
+	{ FW_PHY_ACT_UD_2_10G_T_EEE, SUPPORTED_10000baseT_Full },
+	{ FW_PHY_ACT_UD_2_1G_KX_EEE, SUPPORTED_1000baseKX_Full },
+	{ FW_PHY_ACT_UD_2_10G_KX4_EEE, SUPPORTED_10000baseKX4_Full },
+	{ FW_PHY_ACT_UD_2_10G_KR_EEE, SUPPORTED_10000baseKR_Full},
+};
+
+static int
+ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_eee *edata)
+{
+	u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
+	struct ixgbe_hw *hw = &adapter->hw;
+	s32 rc;
+	u16 i;
+
+	rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_UD_2, &info);
+	if (rc)
+		return rc;
+
+	edata->lp_advertised = 0;
+	for (i = 0; i < ARRAY_SIZE(ixgbe_lp_map); ++i) {
+		if (info[0] & ixgbe_lp_map[i].lp_advertised)
+			edata->lp_advertised |= ixgbe_lp_map[i].mac_speed;
+	}
+
+	edata->supported = 0;
+	for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) {
+		if (hw->phy.eee_speeds_supported & ixgbe_ls_map[i].mac_speed)
+			edata->supported |= ixgbe_ls_map[i].supported;
+	}
+
+	edata->advertised = 0;
+	for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) {
+		if (hw->phy.eee_speeds_advertised & ixgbe_ls_map[i].mac_speed)
+			edata->advertised |= ixgbe_ls_map[i].supported;
+	}
+
+	edata->eee_enabled = !!edata->advertised;
+	edata->tx_lpi_enabled = edata->eee_enabled;
+	if (edata->advertised & edata->lp_advertised)
+		edata->eee_active = true;
+
+	return 0;
+}
+
+static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_EEE_CAPABLE))
+		return -EOPNOTSUPP;
+
+	if (hw->phy.eee_speeds_supported && hw->phy.type == ixgbe_phy_fw)
+		return ixgbe_get_eee_fw(adapter, edata);
+
+	return -EOPNOTSUPP;
+}
+
+static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_eee *edata)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ethtool_eee eee_data;
+	s32 ret_val;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_EEE_CAPABLE))
+		return -EOPNOTSUPP;
+
+	memset(&eee_data, 0, sizeof(struct ethtool_eee));
+
+	ret_val = ixgbe_get_eee(netdev, &eee_data);
+	if (ret_val)
+		return ret_val;
+
+	if (eee_data.eee_enabled && !edata->eee_enabled) {
+		if (eee_data.tx_lpi_enabled != edata->tx_lpi_enabled) {
+			e_err(drv, "Setting EEE tx-lpi is not supported\n");
+			return -EINVAL;
+		}
+
+		if (eee_data.tx_lpi_timer != edata->tx_lpi_timer) {
+			e_err(drv,
+			      "Setting EEE Tx LPI timer is not supported\n");
+			return -EINVAL;
+		}
+
+		if (eee_data.advertised != edata->advertised) {
+			e_err(drv,
+			      "Setting EEE advertised speeds is not supported\n");
+			return -EINVAL;
+		}
+	}
+
+	if (eee_data.eee_enabled != edata->eee_enabled) {
+		if (edata->eee_enabled) {
+			adapter->flags2 |= IXGBE_FLAG2_EEE_ENABLED;
+			hw->phy.eee_speeds_advertised =
+						   hw->phy.eee_speeds_supported;
+		} else {
+			adapter->flags2 &= ~IXGBE_FLAG2_EEE_ENABLED;
+			hw->phy.eee_speeds_advertised = 0;
+		}
+
+		/* reset link */
+		if (netif_running(netdev))
+			ixgbe_reinit_locked(adapter);
+		else
+			ixgbe_reset(adapter);
+	}
+
+	return 0;
+}
+
+static u32 ixgbe_get_priv_flags(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
+		priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX;
+
+	if (adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED)
+		priv_flags |= IXGBE_PRIV_FLAGS_VF_IPSEC_EN;
+
+	if (adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF)
+		priv_flags |= IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF;
+
+	return priv_flags;
+}
+
+static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags2 = adapter->flags2;
+	unsigned int i;
+
+	flags2 &= ~IXGBE_FLAG2_RX_LEGACY;
+	if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX)
+		flags2 |= IXGBE_FLAG2_RX_LEGACY;
+
+	flags2 &= ~IXGBE_FLAG2_VF_IPSEC_ENABLED;
+	if (priv_flags & IXGBE_PRIV_FLAGS_VF_IPSEC_EN)
+		flags2 |= IXGBE_FLAG2_VF_IPSEC_ENABLED;
+
+	flags2 &= ~IXGBE_FLAG2_AUTO_DISABLE_VF;
+	if (priv_flags & IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF) {
+		if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
+			/* Reset primary abort counter */
+			for (i = 0; i < adapter->num_vfs; i++)
+				adapter->vfinfo[i].primary_abort_count = 0;
+
+			flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF;
+		} else {
+			e_info(probe,
+			       "Cannot set private flags: Operation not supported\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	if (flags2 != adapter->flags2) {
+		adapter->flags2 = flags2;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			ixgbe_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
+static const struct ethtool_ops ixgbe_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+	.get_drvinfo            = ixgbe_get_drvinfo,
+	.get_regs_len           = ixgbe_get_regs_len,
+	.get_regs               = ixgbe_get_regs,
+	.get_wol                = ixgbe_get_wol,
+	.set_wol                = ixgbe_set_wol,
+	.nway_reset             = ixgbe_nway_reset,
+	.get_link               = ethtool_op_get_link,
+	.get_eeprom_len         = ixgbe_get_eeprom_len,
+	.get_eeprom             = ixgbe_get_eeprom,
+	.set_eeprom             = ixgbe_set_eeprom,
+	.get_ringparam          = ixgbe_get_ringparam,
+	.set_ringparam          = ixgbe_set_ringparam,
+	.get_pause_stats	= ixgbe_get_pause_stats,
+	.get_pauseparam         = ixgbe_get_pauseparam,
+	.set_pauseparam         = ixgbe_set_pauseparam,
+	.get_msglevel           = ixgbe_get_msglevel,
+	.set_msglevel           = ixgbe_set_msglevel,
+	.self_test              = ixgbe_diag_test,
+	.get_strings            = ixgbe_get_strings,
+	.set_phys_id            = ixgbe_set_phys_id,
+	.get_sset_count         = ixgbe_get_sset_count,
+	.get_ethtool_stats      = ixgbe_get_ethtool_stats,
+	.get_coalesce           = ixgbe_get_coalesce,
+	.set_coalesce           = ixgbe_set_coalesce,
+	.get_rxnfc		= ixgbe_get_rxnfc,
+	.set_rxnfc		= ixgbe_set_rxnfc,
+	.get_rxfh_indir_size	= ixgbe_rss_indir_size,
+	.get_rxfh_key_size	= ixgbe_get_rxfh_key_size,
+	.get_rxfh		= ixgbe_get_rxfh,
+	.set_rxfh		= ixgbe_set_rxfh,
+	.get_eee		= ixgbe_get_eee,
+	.set_eee		= ixgbe_set_eee,
+	.get_channels		= ixgbe_get_channels,
+	.set_channels		= ixgbe_set_channels,
+	.get_priv_flags		= ixgbe_get_priv_flags,
+	.set_priv_flags		= ixgbe_set_priv_flags,
+	.get_ts_info		= ixgbe_get_ts_info,
+	.get_module_info	= ixgbe_get_module_info,
+	.get_module_eeprom	= ixgbe_get_module_eeprom,
+	.get_link_ksettings     = ixgbe_get_link_ksettings,
+	.set_link_ksettings     = ixgbe_set_link_ksettings,
+};
+
+void ixgbe_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ixgbe_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
new file mode 100644
index 0000000000..7311bd545a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -0,0 +1,1047 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include <linux/if_ether.h>
+#include <linux/gfp.h>
+#include <linux/if_vlan.h>
+#include <generated/utsrelease.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/fc/fc_fs.h>
+#include <scsi/fc/fc_fcoe.h>
+#include <scsi/libfc.h>
+#include <scsi/libfcoe.h>
+
+/**
+ * ixgbe_fcoe_clear_ddp - clear the given ddp context
+ * @ddp: ptr to the ixgbe_fcoe_ddp
+ *
+ * Returns : none
+ *
+ */
+static inline void ixgbe_fcoe_clear_ddp(struct ixgbe_fcoe_ddp *ddp)
+{
+	ddp->len = 0;
+	ddp->err = 1;
+	ddp->udl = NULL;
+	ddp->udp = 0UL;
+	ddp->sgl = NULL;
+	ddp->sgc = 0;
+}
+
+/**
+ * ixgbe_fcoe_ddp_put - free the ddp context for a given xid
+ * @netdev: the corresponding net_device
+ * @xid: the xid that corresponding ddp will be freed
+ *
+ * This is the implementation of net_device_ops.ndo_fcoe_ddp_done
+ * and it is expected to be called by ULD, i.e., FCP layer of libfc
+ * to release the corresponding ddp context when the I/O is done.
+ *
+ * Returns : data length already ddp-ed in bytes
+ */
+int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
+{
+	int len;
+	struct ixgbe_fcoe *fcoe;
+	struct ixgbe_adapter *adapter;
+	struct ixgbe_fcoe_ddp *ddp;
+	struct ixgbe_hw *hw;
+	u32 fcbuff;
+
+	if (!netdev)
+		return 0;
+
+	if (xid >= netdev->fcoe_ddp_xid)
+		return 0;
+
+	adapter = netdev_priv(netdev);
+	fcoe = &adapter->fcoe;
+	ddp = &fcoe->ddp[xid];
+	if (!ddp->udl)
+		return 0;
+
+	hw = &adapter->hw;
+	len = ddp->len;
+	/* if no error then skip ddp context invalidation */
+	if (!ddp->err)
+		goto skip_ddpinv;
+
+	if (hw->mac.type == ixgbe_mac_X550) {
+		/* X550 does not require DDP FCoE lock */
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCDFC(0, xid), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_FCDFC(3, xid),
+				(xid | IXGBE_FCFLTRW_WE));
+
+		/* program FCBUFF */
+		IXGBE_WRITE_REG(hw, IXGBE_FCDDC(2, xid), 0);
+
+		/* program FCDMARW */
+		IXGBE_WRITE_REG(hw, IXGBE_FCDDC(3, xid),
+				(xid | IXGBE_FCDMARW_WE));
+
+		/* read FCBUFF to check context invalidated */
+		IXGBE_WRITE_REG(hw, IXGBE_FCDDC(3, xid),
+				(xid | IXGBE_FCDMARW_RE));
+		fcbuff = IXGBE_READ_REG(hw, IXGBE_FCDDC(2, xid));
+	} else {
+		/* other hardware requires DDP FCoE lock */
+		spin_lock_bh(&fcoe->lock);
+		IXGBE_WRITE_REG(hw, IXGBE_FCFLT, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_FCFLTRW,
+				(xid | IXGBE_FCFLTRW_WE));
+		IXGBE_WRITE_REG(hw, IXGBE_FCBUFF, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_FCDMARW,
+				(xid | IXGBE_FCDMARW_WE));
+
+		/* guaranteed to be invalidated after 100us */
+		IXGBE_WRITE_REG(hw, IXGBE_FCDMARW,
+				(xid | IXGBE_FCDMARW_RE));
+		fcbuff = IXGBE_READ_REG(hw, IXGBE_FCBUFF);
+		spin_unlock_bh(&fcoe->lock);
+		}
+
+	if (fcbuff & IXGBE_FCBUFF_VALID)
+		usleep_range(100, 150);
+
+skip_ddpinv:
+	if (ddp->sgl)
+		dma_unmap_sg(&adapter->pdev->dev, ddp->sgl, ddp->sgc,
+			     DMA_FROM_DEVICE);
+	if (ddp->pool) {
+		dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
+		ddp->pool = NULL;
+	}
+
+	ixgbe_fcoe_clear_ddp(ddp);
+
+	return len;
+}
+
+/**
+ * ixgbe_fcoe_ddp_setup - called to set up ddp context
+ * @netdev: the corresponding net_device
+ * @xid: the exchange id requesting ddp
+ * @sgl: the scatter-gather list for this request
+ * @sgc: the number of scatter-gather items
+ * @target_mode: 1 to setup target mode, 0 to setup initiator mode
+ *
+ * Returns : 1 for success and 0 for no ddp
+ */
+static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
+				struct scatterlist *sgl, unsigned int sgc,
+				int target_mode)
+{
+	struct ixgbe_adapter *adapter;
+	struct ixgbe_hw *hw;
+	struct ixgbe_fcoe *fcoe;
+	struct ixgbe_fcoe_ddp *ddp;
+	struct ixgbe_fcoe_ddp_pool *ddp_pool;
+	struct scatterlist *sg;
+	unsigned int i, j, dmacount;
+	unsigned int len;
+	static const unsigned int bufflen = IXGBE_FCBUFF_MIN;
+	unsigned int firstoff = 0;
+	unsigned int lastsize;
+	unsigned int thisoff = 0;
+	unsigned int thislen = 0;
+	u32 fcbuff, fcdmarw, fcfltrw, fcrxctl;
+	dma_addr_t addr = 0;
+
+	if (!netdev || !sgl)
+		return 0;
+
+	adapter = netdev_priv(netdev);
+	if (xid >= netdev->fcoe_ddp_xid) {
+		e_warn(drv, "xid=0x%x out-of-range\n", xid);
+		return 0;
+	}
+
+	/* no DDP if we are already down or resetting */
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state))
+		return 0;
+
+	fcoe = &adapter->fcoe;
+	ddp = &fcoe->ddp[xid];
+	if (ddp->sgl) {
+		e_err(drv, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
+		      xid, ddp->sgl, ddp->sgc);
+		return 0;
+	}
+	ixgbe_fcoe_clear_ddp(ddp);
+
+
+	if (!fcoe->ddp_pool) {
+		e_warn(drv, "No ddp_pool resources allocated\n");
+		return 0;
+	}
+
+	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, get_cpu());
+	if (!ddp_pool->pool) {
+		e_warn(drv, "xid=0x%x no ddp pool for fcoe\n", xid);
+		goto out_noddp;
+	}
+
+	/* setup dma from scsi command sgl */
+	dmacount = dma_map_sg(&adapter->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
+	if (dmacount == 0) {
+		e_err(drv, "xid 0x%x DMA map error\n", xid);
+		goto out_noddp;
+	}
+
+	/* alloc the udl from per cpu ddp pool */
+	ddp->udl = dma_pool_alloc(ddp_pool->pool, GFP_ATOMIC, &ddp->udp);
+	if (!ddp->udl) {
+		e_err(drv, "failed allocated ddp context\n");
+		goto out_noddp_unmap;
+	}
+	ddp->pool = ddp_pool->pool;
+	ddp->sgl = sgl;
+	ddp->sgc = sgc;
+
+	j = 0;
+	for_each_sg(sgl, sg, dmacount, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		while (len) {
+			/* max number of buffers allowed in one DDP context */
+			if (j >= IXGBE_BUFFCNT_MAX) {
+				ddp_pool->noddp++;
+				goto out_noddp_free;
+			}
+
+			/* get the offset of length of current buffer */
+			thisoff = addr & ((dma_addr_t)bufflen - 1);
+			thislen = min((bufflen - thisoff), len);
+			/*
+			 * all but the 1st buffer (j == 0)
+			 * must be aligned on bufflen
+			 */
+			if ((j != 0) && (thisoff))
+				goto out_noddp_free;
+			/*
+			 * all but the last buffer
+			 * ((i == (dmacount - 1)) && (thislen == len))
+			 * must end at bufflen
+			 */
+			if (((i != (dmacount - 1)) || (thislen != len))
+			    && ((thislen + thisoff) != bufflen))
+				goto out_noddp_free;
+
+			ddp->udl[j] = (u64)(addr - thisoff);
+			/* only the first buffer may have none-zero offset */
+			if (j == 0)
+				firstoff = thisoff;
+			len -= thislen;
+			addr += thislen;
+			j++;
+		}
+	}
+	/* only the last buffer may have non-full bufflen */
+	lastsize = thisoff + thislen;
+
+	/*
+	 * lastsize can not be buffer len.
+	 * If it is then adding another buffer with lastsize = 1.
+	 */
+	if (lastsize == bufflen) {
+		if (j >= IXGBE_BUFFCNT_MAX) {
+			ddp_pool->noddp_ext_buff++;
+			goto out_noddp_free;
+		}
+
+		ddp->udl[j] = (u64)(fcoe->extra_ddp_buffer_dma);
+		j++;
+		lastsize = 1;
+	}
+	put_cpu();
+
+	fcbuff = (IXGBE_FCBUFF_4KB << IXGBE_FCBUFF_BUFFSIZE_SHIFT);
+	fcbuff |= ((j & 0xff) << IXGBE_FCBUFF_BUFFCNT_SHIFT);
+	fcbuff |= (firstoff << IXGBE_FCBUFF_OFFSET_SHIFT);
+	/* Set WRCONTX bit to allow DDP for target */
+	if (target_mode)
+		fcbuff |= (IXGBE_FCBUFF_WRCONTX);
+	fcbuff |= (IXGBE_FCBUFF_VALID);
+
+	fcdmarw = xid;
+	fcdmarw |= IXGBE_FCDMARW_WE;
+	fcdmarw |= (lastsize << IXGBE_FCDMARW_LASTSIZE_SHIFT);
+
+	fcfltrw = xid;
+	fcfltrw |= IXGBE_FCFLTRW_WE;
+
+	/* program DMA context */
+	hw = &adapter->hw;
+
+	/* turn on last frame indication for target mode as FCP_RSPtarget is
+	 * supposed to send FCP_RSP when it is done. */
+	if (target_mode && !test_bit(__IXGBE_FCOE_TARGET, &fcoe->mode)) {
+		set_bit(__IXGBE_FCOE_TARGET, &fcoe->mode);
+		fcrxctl = IXGBE_READ_REG(hw, IXGBE_FCRXCTRL);
+		fcrxctl |= IXGBE_FCRXCTRL_LASTSEQH;
+		IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL, fcrxctl);
+	}
+
+	if (hw->mac.type == ixgbe_mac_X550) {
+		/* X550 does not require DDP lock */
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCDDC(0, xid),
+				ddp->udp & DMA_BIT_MASK(32));
+		IXGBE_WRITE_REG(hw, IXGBE_FCDDC(1, xid), (u64)ddp->udp >> 32);
+		IXGBE_WRITE_REG(hw, IXGBE_FCDDC(2, xid), fcbuff);
+		IXGBE_WRITE_REG(hw, IXGBE_FCDDC(3, xid), fcdmarw);
+		/* program filter context */
+		IXGBE_WRITE_REG(hw, IXGBE_FCDFC(0, xid), IXGBE_FCFLT_VALID);
+		IXGBE_WRITE_REG(hw, IXGBE_FCDFC(1, xid), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_FCDFC(3, xid), fcfltrw);
+	} else {
+		/* DDP lock for indirect DDP context access */
+		spin_lock_bh(&fcoe->lock);
+
+		IXGBE_WRITE_REG(hw, IXGBE_FCPTRL, ddp->udp & DMA_BIT_MASK(32));
+		IXGBE_WRITE_REG(hw, IXGBE_FCPTRH, (u64)ddp->udp >> 32);
+		IXGBE_WRITE_REG(hw, IXGBE_FCBUFF, fcbuff);
+		IXGBE_WRITE_REG(hw, IXGBE_FCDMARW, fcdmarw);
+		/* program filter context */
+		IXGBE_WRITE_REG(hw, IXGBE_FCPARAM, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_FCFLT, IXGBE_FCFLT_VALID);
+		IXGBE_WRITE_REG(hw, IXGBE_FCFLTRW, fcfltrw);
+
+		spin_unlock_bh(&fcoe->lock);
+	}
+
+	return 1;
+
+out_noddp_free:
+	dma_pool_free(ddp->pool, ddp->udl, ddp->udp);
+	ixgbe_fcoe_clear_ddp(ddp);
+
+out_noddp_unmap:
+	dma_unmap_sg(&adapter->pdev->dev, sgl, sgc, DMA_FROM_DEVICE);
+out_noddp:
+	put_cpu();
+	return 0;
+}
+
+/**
+ * ixgbe_fcoe_ddp_get - called to set up ddp context in initiator mode
+ * @netdev: the corresponding net_device
+ * @xid: the exchange id requesting ddp
+ * @sgl: the scatter-gather list for this request
+ * @sgc: the number of scatter-gather items
+ *
+ * This is the implementation of net_device_ops.ndo_fcoe_ddp_setup
+ * and is expected to be called from ULD, e.g., FCP layer of libfc
+ * to set up ddp for the corresponding xid of the given sglist for
+ * the corresponding I/O.
+ *
+ * Returns : 1 for success and 0 for no ddp
+ */
+int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
+		       struct scatterlist *sgl, unsigned int sgc)
+{
+	return ixgbe_fcoe_ddp_setup(netdev, xid, sgl, sgc, 0);
+}
+
+/**
+ * ixgbe_fcoe_ddp_target - called to set up ddp context in target mode
+ * @netdev: the corresponding net_device
+ * @xid: the exchange id requesting ddp
+ * @sgl: the scatter-gather list for this request
+ * @sgc: the number of scatter-gather items
+ *
+ * This is the implementation of net_device_ops.ndo_fcoe_ddp_target
+ * and is expected to be called from ULD, e.g., FCP layer of libfc
+ * to set up ddp for the corresponding xid of the given sglist for
+ * the corresponding I/O. The DDP in target mode is a write I/O request
+ * from the initiator.
+ *
+ * Returns : 1 for success and 0 for no ddp
+ */
+int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid,
+			    struct scatterlist *sgl, unsigned int sgc)
+{
+	return ixgbe_fcoe_ddp_setup(netdev, xid, sgl, sgc, 1);
+}
+
+/**
+ * ixgbe_fcoe_ddp - check ddp status and mark it done
+ * @adapter: ixgbe adapter
+ * @rx_desc: advanced rx descriptor
+ * @skb: the skb holding the received data
+ *
+ * This checks ddp status.
+ *
+ * Returns : < 0 indicates an error or not a FCiE ddp, 0 indicates
+ * not passing the skb to ULD, > 0 indicates is the length of data
+ * being ddped.
+ */
+int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter,
+		   union ixgbe_adv_rx_desc *rx_desc,
+		   struct sk_buff *skb)
+{
+	int rc = -EINVAL;
+	struct ixgbe_fcoe *fcoe;
+	struct ixgbe_fcoe_ddp *ddp;
+	struct fc_frame_header *fh;
+	struct fcoe_crc_eof *crc;
+	__le32 fcerr = ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_FCERR);
+	__le32 ddp_err;
+	int ddp_max;
+	u32 fctl;
+	u16 xid;
+
+	if (fcerr == cpu_to_le32(IXGBE_FCERR_BADCRC))
+		skb->ip_summed = CHECKSUM_NONE;
+	else
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (eth_hdr(skb)->h_proto == htons(ETH_P_8021Q))
+		fh = (struct fc_frame_header *)(skb->data +
+			sizeof(struct vlan_hdr) + sizeof(struct fcoe_hdr));
+	else
+		fh = (struct fc_frame_header *)(skb->data +
+			sizeof(struct fcoe_hdr));
+
+	fctl = ntoh24(fh->fh_f_ctl);
+	if (fctl & FC_FC_EX_CTX)
+		xid =  be16_to_cpu(fh->fh_ox_id);
+	else
+		xid =  be16_to_cpu(fh->fh_rx_id);
+
+	ddp_max = IXGBE_FCOE_DDP_MAX;
+	/* X550 has different DDP Max limit */
+	if (adapter->hw.mac.type == ixgbe_mac_X550)
+		ddp_max = IXGBE_FCOE_DDP_MAX_X550;
+	if (xid >= ddp_max)
+		return -EINVAL;
+
+	fcoe = &adapter->fcoe;
+	ddp = &fcoe->ddp[xid];
+	if (!ddp->udl)
+		return -EINVAL;
+
+	ddp_err = ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_FCEOFE |
+					      IXGBE_RXDADV_ERR_FCERR);
+	if (ddp_err)
+		return -EINVAL;
+
+	switch (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_FCSTAT)) {
+	/* return 0 to bypass going to ULD for DDPed data */
+	case cpu_to_le32(IXGBE_RXDADV_STAT_FCSTAT_DDP):
+		/* update length of DDPed data */
+		ddp->len = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+		rc = 0;
+		break;
+	/* unmap the sg list when FCPRSP is received */
+	case cpu_to_le32(IXGBE_RXDADV_STAT_FCSTAT_FCPRSP):
+		dma_unmap_sg(&adapter->pdev->dev, ddp->sgl,
+			     ddp->sgc, DMA_FROM_DEVICE);
+		ddp->err = (__force u32)ddp_err;
+		ddp->sgl = NULL;
+		ddp->sgc = 0;
+		fallthrough;
+	/* if DDP length is present pass it through to ULD */
+	case cpu_to_le32(IXGBE_RXDADV_STAT_FCSTAT_NODDP):
+		/* update length of DDPed data */
+		ddp->len = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+		if (ddp->len)
+			rc = ddp->len;
+		break;
+	/* no match will return as an error */
+	case cpu_to_le32(IXGBE_RXDADV_STAT_FCSTAT_NOMTCH):
+	default:
+		break;
+	}
+
+	/* In target mode, check the last data frame of the sequence.
+	 * For DDP in target mode, data is already DDPed but the header
+	 * indication of the last data frame ould allow is to tell if we
+	 * got all the data and the ULP can send FCP_RSP back, as this is
+	 * not a full fcoe frame, we fill the trailer here so it won't be
+	 * dropped by the ULP stack.
+	 */
+	if ((fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA) &&
+	    (fctl & FC_FC_END_SEQ)) {
+		skb_linearize(skb);
+		crc = skb_put(skb, sizeof(*crc));
+		crc->fcoe_eof = FC_EOF_T;
+	}
+
+	return rc;
+}
+
+/**
+ * ixgbe_fso - ixgbe FCoE Sequence Offload (FSO)
+ * @tx_ring: tx desc ring
+ * @first: first tx_buffer structure containing skb, tx_flags, and protocol
+ * @hdr_len: hdr_len to be returned
+ *
+ * This sets up large send offload for FCoE
+ *
+ * Returns : 0 indicates success, < 0 for error
+ */
+int ixgbe_fso(struct ixgbe_ring *tx_ring,
+	      struct ixgbe_tx_buffer *first,
+	      u8 *hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct fc_frame_header *fh;
+	u32 vlan_macip_lens;
+	u32 fcoe_sof_eof = 0;
+	u32 mss_l4len_idx;
+	u32 type_tucmd = IXGBE_ADVTXT_TUCMD_FCOE;
+	u8 sof, eof;
+
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_type != SKB_GSO_FCOE)) {
+		dev_err(tx_ring->dev, "Wrong gso type %d:expecting SKB_GSO_FCOE\n",
+			skb_shinfo(skb)->gso_type);
+		return -EINVAL;
+	}
+
+	/* resets the header to point fcoe/fc */
+	skb_set_network_header(skb, skb->mac_len);
+	skb_set_transport_header(skb, skb->mac_len +
+				 sizeof(struct fcoe_hdr));
+
+	/* sets up SOF and ORIS */
+	sof = ((struct fcoe_hdr *)skb_network_header(skb))->fcoe_sof;
+	switch (sof) {
+	case FC_SOF_I2:
+		fcoe_sof_eof = IXGBE_ADVTXD_FCOEF_ORIS;
+		break;
+	case FC_SOF_I3:
+		fcoe_sof_eof = IXGBE_ADVTXD_FCOEF_SOF |
+			       IXGBE_ADVTXD_FCOEF_ORIS;
+		break;
+	case FC_SOF_N2:
+		break;
+	case FC_SOF_N3:
+		fcoe_sof_eof = IXGBE_ADVTXD_FCOEF_SOF;
+		break;
+	default:
+		dev_warn(tx_ring->dev, "unknown sof = 0x%x\n", sof);
+		return -EINVAL;
+	}
+
+	/* the first byte of the last dword is EOF */
+	skb_copy_bits(skb, skb->len - 4, &eof, 1);
+	/* sets up EOF and ORIE */
+	switch (eof) {
+	case FC_EOF_N:
+		fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_EOF_N;
+		break;
+	case FC_EOF_T:
+		/* lso needs ORIE */
+		if (skb_is_gso(skb))
+			fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_EOF_N |
+					IXGBE_ADVTXD_FCOEF_ORIE;
+		else
+			fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_EOF_T;
+		break;
+	case FC_EOF_NI:
+		fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_EOF_NI;
+		break;
+	case FC_EOF_A:
+		fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_EOF_A;
+		break;
+	default:
+		dev_warn(tx_ring->dev, "unknown eof = 0x%x\n", eof);
+		return -EINVAL;
+	}
+
+	/* sets up PARINC indicating data offset */
+	fh = (struct fc_frame_header *)skb_transport_header(skb);
+	if (fh->fh_f_ctl[2] & FC_FC_REL_OFF)
+		fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_PARINC;
+
+	/* include trailer in headlen as it is replicated per frame */
+	*hdr_len = sizeof(struct fcoe_crc_eof);
+
+	/* hdr_len includes fc_hdr if FCoE LSO is enabled */
+	if (skb_is_gso(skb)) {
+		*hdr_len += skb_transport_offset(skb) +
+			    sizeof(struct fc_frame_header);
+		/* update gso_segs and bytecount */
+		first->gso_segs = DIV_ROUND_UP(skb->len - *hdr_len,
+					       skb_shinfo(skb)->gso_size);
+		first->bytecount += (first->gso_segs - 1) * *hdr_len;
+		first->tx_flags |= IXGBE_TX_FLAGS_TSO;
+		/* Hardware expects L4T to be RSV for FCoE TSO */
+		type_tucmd |= IXGBE_ADVTXD_TUCMD_L4T_RSV;
+	}
+
+	/* set flag indicating FCOE to ixgbe_tx_map call */
+	first->tx_flags |= IXGBE_TX_FLAGS_FCOE | IXGBE_TX_FLAGS_CC;
+
+	/* mss_l4len_id: use 0 for FSO as TSO, no need for L4LEN */
+	mss_l4len_idx = skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
+
+	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
+	vlan_macip_lens = skb_transport_offset(skb) +
+			  sizeof(struct fc_frame_header);
+	vlan_macip_lens |= (skb_transport_offset(skb) - 4)
+			   << IXGBE_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
+
+	/* write context desc */
+	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fcoe_sof_eof,
+			  type_tucmd, mss_l4len_idx);
+
+	return 0;
+}
+
+static void ixgbe_fcoe_dma_pool_free(struct ixgbe_fcoe *fcoe, unsigned int cpu)
+{
+	struct ixgbe_fcoe_ddp_pool *ddp_pool;
+
+	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
+	dma_pool_destroy(ddp_pool->pool);
+	ddp_pool->pool = NULL;
+}
+
+static int ixgbe_fcoe_dma_pool_alloc(struct ixgbe_fcoe *fcoe,
+				     struct device *dev,
+				     unsigned int cpu)
+{
+	struct ixgbe_fcoe_ddp_pool *ddp_pool;
+	struct dma_pool *pool;
+	char pool_name[32];
+
+	snprintf(pool_name, 32, "ixgbe_fcoe_ddp_%u", cpu);
+
+	pool = dma_pool_create(pool_name, dev, IXGBE_FCPTR_MAX,
+			       IXGBE_FCPTR_ALIGN, PAGE_SIZE);
+	if (!pool)
+		return -ENOMEM;
+
+	ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
+	ddp_pool->pool = pool;
+	ddp_pool->noddp = 0;
+	ddp_pool->noddp_ext_buff = 0;
+
+	return 0;
+}
+
+/**
+ * ixgbe_configure_fcoe - configures registers for fcoe at start
+ * @adapter: ptr to ixgbe adapter
+ *
+ * This sets up FCoE related registers
+ *
+ * Returns : none
+ */
+void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i, fcoe_q, fcoe_i, fcoe_q_h = 0;
+	int fcreta_size;
+	u32 etqf;
+
+	/* Minimal functionality for FCoE requires at least CRC offloads */
+	if (!(adapter->netdev->features & NETIF_F_FCOE_CRC))
+		return;
+
+	/* Enable L2 EtherType filter for FCoE, needed for FCoE CRC and DDP */
+	etqf = ETH_P_FCOE | IXGBE_ETQF_FCOE | IXGBE_ETQF_FILTER_EN;
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		etqf |= IXGBE_ETQF_POOL_ENABLE;
+		etqf |= VMDQ_P(0) << IXGBE_ETQF_POOL_SHIFT;
+	}
+	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FCOE), etqf);
+	IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FCOE), 0);
+
+	/* leave registers un-configured if FCoE is disabled */
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+		return;
+
+	/* Use one or more Rx queues for FCoE by redirection table */
+	fcreta_size = IXGBE_FCRETA_SIZE;
+	if (adapter->hw.mac.type == ixgbe_mac_X550)
+		fcreta_size = IXGBE_FCRETA_SIZE_X550;
+
+	for (i = 0; i < fcreta_size; i++) {
+		if (adapter->hw.mac.type == ixgbe_mac_X550) {
+			int fcoe_i_h = fcoe->offset + ((i + fcreta_size) %
+							fcoe->indices);
+			fcoe_q_h = adapter->rx_ring[fcoe_i_h]->reg_idx;
+			fcoe_q_h = (fcoe_q_h << IXGBE_FCRETA_ENTRY_HIGH_SHIFT) &
+				   IXGBE_FCRETA_ENTRY_HIGH_MASK;
+		}
+
+		fcoe_i = fcoe->offset + (i % fcoe->indices);
+		fcoe_i &= IXGBE_FCRETA_ENTRY_MASK;
+		fcoe_q = adapter->rx_ring[fcoe_i]->reg_idx;
+		fcoe_q |= fcoe_q_h;
+		IXGBE_WRITE_REG(hw, IXGBE_FCRETA(i), fcoe_q);
+	}
+	IXGBE_WRITE_REG(hw, IXGBE_FCRECTL, IXGBE_FCRECTL_ENA);
+
+	/* Enable L2 EtherType filter for FIP */
+	etqf = ETH_P_FIP | IXGBE_ETQF_FILTER_EN;
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		etqf |= IXGBE_ETQF_POOL_ENABLE;
+		etqf |= VMDQ_P(0) << IXGBE_ETQF_POOL_SHIFT;
+	}
+	IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FIP), etqf);
+
+	/* Send FIP frames to the first FCoE queue */
+	fcoe_q = adapter->rx_ring[fcoe->offset]->reg_idx;
+	IXGBE_WRITE_REG(hw, IXGBE_ETQS(IXGBE_ETQF_FILTER_FIP),
+			IXGBE_ETQS_QUEUE_EN |
+			(fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT));
+
+	/* Configure FCoE Rx control */
+	IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL,
+			IXGBE_FCRXCTRL_FCCRCBO |
+			(FC_FCOE_VER << IXGBE_FCRXCTRL_FCOEVER_SHIFT));
+}
+
+/**
+ * ixgbe_free_fcoe_ddp_resources - release all fcoe ddp context resources
+ * @adapter : ixgbe adapter
+ *
+ * Cleans up outstanding ddp context resources
+ *
+ * Returns : none
+ */
+void ixgbe_free_fcoe_ddp_resources(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	int cpu, i, ddp_max;
+
+	/* do nothing if no DDP pools were allocated */
+	if (!fcoe->ddp_pool)
+		return;
+
+	ddp_max = IXGBE_FCOE_DDP_MAX;
+	/* X550 has different DDP Max limit */
+	if (adapter->hw.mac.type == ixgbe_mac_X550)
+		ddp_max = IXGBE_FCOE_DDP_MAX_X550;
+
+	for (i = 0; i < ddp_max; i++)
+		ixgbe_fcoe_ddp_put(adapter->netdev, i);
+
+	for_each_possible_cpu(cpu)
+		ixgbe_fcoe_dma_pool_free(fcoe, cpu);
+
+	dma_unmap_single(&adapter->pdev->dev,
+			 fcoe->extra_ddp_buffer_dma,
+			 IXGBE_FCBUFF_MIN,
+			 DMA_FROM_DEVICE);
+	kfree(fcoe->extra_ddp_buffer);
+
+	fcoe->extra_ddp_buffer = NULL;
+	fcoe->extra_ddp_buffer_dma = 0;
+}
+
+/**
+ * ixgbe_setup_fcoe_ddp_resources - setup all fcoe ddp context resources
+ * @adapter: ixgbe adapter
+ *
+ * Sets up ddp context resouces
+ *
+ * Returns : 0 indicates success or -EINVAL on failure
+ */
+int ixgbe_setup_fcoe_ddp_resources(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+	struct device *dev = &adapter->pdev->dev;
+	void *buffer;
+	dma_addr_t dma;
+	unsigned int cpu;
+
+	/* do nothing if no DDP pools were allocated */
+	if (!fcoe->ddp_pool)
+		return 0;
+
+	/* Extra buffer to be shared by all DDPs for HW work around */
+	buffer = kmalloc(IXGBE_FCBUFF_MIN, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	dma = dma_map_single(dev, buffer, IXGBE_FCBUFF_MIN, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dma)) {
+		e_err(drv, "failed to map extra DDP buffer\n");
+		kfree(buffer);
+		return -ENOMEM;
+	}
+
+	fcoe->extra_ddp_buffer = buffer;
+	fcoe->extra_ddp_buffer_dma = dma;
+
+	/* allocate pci pool for each cpu */
+	for_each_possible_cpu(cpu) {
+		int err = ixgbe_fcoe_dma_pool_alloc(fcoe, dev, cpu);
+		if (!err)
+			continue;
+
+		e_err(drv, "failed to alloc DDP pool on cpu:%d\n", cpu);
+		ixgbe_free_fcoe_ddp_resources(adapter);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int ixgbe_fcoe_ddp_enable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_CAPABLE))
+		return -EINVAL;
+
+	fcoe->ddp_pool = alloc_percpu(struct ixgbe_fcoe_ddp_pool);
+
+	if (!fcoe->ddp_pool) {
+		e_err(drv, "failed to allocate percpu DDP resources\n");
+		return -ENOMEM;
+	}
+
+	adapter->netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1;
+	/* X550 has different DDP Max limit */
+	if (adapter->hw.mac.type == ixgbe_mac_X550)
+		adapter->netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX_X550 - 1;
+
+	return 0;
+}
+
+static void ixgbe_fcoe_ddp_disable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+
+	adapter->netdev->fcoe_ddp_xid = 0;
+
+	if (!fcoe->ddp_pool)
+		return;
+
+	free_percpu(fcoe->ddp_pool);
+	fcoe->ddp_pool = NULL;
+}
+
+/**
+ * ixgbe_fcoe_enable - turn on FCoE offload feature
+ * @netdev: the corresponding netdev
+ *
+ * Turns on FCoE offload feature in 82599.
+ *
+ * Returns : 0 indicates success or -EINVAL on failure
+ */
+int ixgbe_fcoe_enable(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+
+	atomic_inc(&fcoe->refcnt);
+
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_CAPABLE))
+		return -EINVAL;
+
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
+		return -EINVAL;
+
+	e_info(drv, "Enabling FCoE offload features.\n");
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		e_warn(probe, "Enabling FCoE on PF will disable legacy VFs\n");
+
+	if (netif_running(netdev))
+		netdev->netdev_ops->ndo_stop(netdev);
+
+	/* Allocate per CPU memory to track DDP pools */
+	ixgbe_fcoe_ddp_enable(adapter);
+
+	/* enable FCoE and notify stack */
+	adapter->flags |= IXGBE_FLAG_FCOE_ENABLED;
+	netdev->features |= NETIF_F_FCOE_MTU;
+	netdev_features_change(netdev);
+
+	/* release existing queues and reallocate them */
+	ixgbe_clear_interrupt_scheme(adapter);
+	ixgbe_init_interrupt_scheme(adapter);
+
+	if (netif_running(netdev))
+		netdev->netdev_ops->ndo_open(netdev);
+
+	return 0;
+}
+
+/**
+ * ixgbe_fcoe_disable - turn off FCoE offload feature
+ * @netdev: the corresponding netdev
+ *
+ * Turns off FCoE offload feature in 82599.
+ *
+ * Returns : 0 indicates success or -EINVAL on failure
+ */
+int ixgbe_fcoe_disable(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (!atomic_dec_and_test(&adapter->fcoe.refcnt))
+		return -EINVAL;
+
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+		return -EINVAL;
+
+	e_info(drv, "Disabling FCoE offload features.\n");
+	if (netif_running(netdev))
+		netdev->netdev_ops->ndo_stop(netdev);
+
+	/* Free per CPU memory to track DDP pools */
+	ixgbe_fcoe_ddp_disable(adapter);
+
+	/* disable FCoE and notify stack */
+	adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+	netdev->features &= ~NETIF_F_FCOE_MTU;
+
+	netdev_features_change(netdev);
+
+	/* release existing queues and reallocate them */
+	ixgbe_clear_interrupt_scheme(adapter);
+	ixgbe_init_interrupt_scheme(adapter);
+
+	if (netif_running(netdev))
+		netdev->netdev_ops->ndo_open(netdev);
+
+	return 0;
+}
+
+/**
+ * ixgbe_fcoe_get_wwn - get world wide name for the node or the port
+ * @netdev : ixgbe adapter
+ * @wwn : the world wide name
+ * @type: the type of world wide name
+ *
+ * Returns the node or port world wide name if both the prefix and the san
+ * mac address are valid, then the wwn is formed based on the NAA-2 for
+ * IEEE Extended name identifier (ref. to T10 FC-LS Spec., Sec. 15.3).
+ *
+ * Returns : 0 on success
+ */
+int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type)
+{
+	u16 prefix = 0xffff;
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_mac_info *mac = &adapter->hw.mac;
+
+	switch (type) {
+	case NETDEV_FCOE_WWNN:
+		prefix = mac->wwnn_prefix;
+		break;
+	case NETDEV_FCOE_WWPN:
+		prefix = mac->wwpn_prefix;
+		break;
+	default:
+		break;
+	}
+
+	if ((prefix != 0xffff) &&
+	    is_valid_ether_addr(mac->san_addr)) {
+		*wwn = ((u64) prefix << 48) |
+		       ((u64) mac->san_addr[0] << 40) |
+		       ((u64) mac->san_addr[1] << 32) |
+		       ((u64) mac->san_addr[2] << 24) |
+		       ((u64) mac->san_addr[3] << 16) |
+		       ((u64) mac->san_addr[4] << 8)  |
+		       ((u64) mac->san_addr[5]);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+/**
+ * ixgbe_fcoe_get_hbainfo - get FCoE HBA information
+ * @netdev : ixgbe adapter
+ * @info : HBA information
+ *
+ * Returns ixgbe HBA information
+ *
+ * Returns : 0 on success
+ */
+int ixgbe_fcoe_get_hbainfo(struct net_device *netdev,
+			   struct netdev_fcoe_hbainfo *info)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 dsn;
+
+	if (!info)
+		return -EINVAL;
+
+	/* Don't return information on unsupported devices */
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+		return -EINVAL;
+
+	/* Manufacturer */
+	snprintf(info->manufacturer, sizeof(info->manufacturer),
+		 "Intel Corporation");
+
+	/* Serial Number */
+
+	/* Get the PCI-e Device Serial Number Capability */
+	dsn = pci_get_dsn(adapter->pdev);
+	if (dsn)
+		snprintf(info->serial_number, sizeof(info->serial_number),
+			 "%016llX", dsn);
+	else
+		snprintf(info->serial_number, sizeof(info->serial_number),
+			 "Unknown");
+
+	/* Hardware Version */
+	snprintf(info->hardware_version,
+		 sizeof(info->hardware_version),
+		 "Rev %d", hw->revision_id);
+	/* Driver Name/Version */
+	snprintf(info->driver_version,
+		 sizeof(info->driver_version),
+		 "%s v%s",
+		 ixgbe_driver_name,
+		 UTS_RELEASE);
+	/* Firmware Version */
+	strscpy(info->firmware_version, adapter->eeprom_id,
+		sizeof(info->firmware_version));
+
+	/* Model */
+	if (hw->mac.type == ixgbe_mac_82599EB) {
+		snprintf(info->model,
+			 sizeof(info->model),
+			 "Intel 82599");
+	} else if (hw->mac.type == ixgbe_mac_X550) {
+		snprintf(info->model,
+			 sizeof(info->model),
+			 "Intel X550");
+	} else {
+		snprintf(info->model,
+			 sizeof(info->model),
+			 "Intel X540");
+	}
+
+	/* Model Description */
+	snprintf(info->model_description,
+		 sizeof(info->model_description),
+		 "%s",
+		 ixgbe_default_device_descr);
+
+	return 0;
+}
+
+/**
+ * ixgbe_fcoe_get_tc - get the current TC that fcoe is mapped to
+ * @adapter: pointer to the device adapter structure
+ *
+ * Return : TC that FCoE is mapped to
+ */
+u8 ixgbe_fcoe_get_tc(struct ixgbe_adapter *adapter)
+{
+#ifdef CONFIG_IXGBE_DCB
+	return netdev_get_prio_tc_map(adapter->netdev, adapter->fcoe.up);
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
new file mode 100644
index 0000000000..724f538232
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_FCOE_H
+#define _IXGBE_FCOE_H
+
+#include <scsi/fc/fc_fs.h>
+#include <scsi/fc/fc_fcoe.h>
+
+/* shift bits within STAT fo FCSTAT */
+#define IXGBE_RXDADV_FCSTAT_SHIFT	4
+
+/* ddp user buffer */
+#define IXGBE_BUFFCNT_MAX	256	/* 8 bits bufcnt */
+#define IXGBE_FCPTR_ALIGN	16
+#define IXGBE_FCPTR_MAX	(IXGBE_BUFFCNT_MAX * sizeof(dma_addr_t))
+#define IXGBE_FCBUFF_4KB	0x0
+#define IXGBE_FCBUFF_8KB	0x1
+#define IXGBE_FCBUFF_16KB	0x2
+#define IXGBE_FCBUFF_64KB	0x3
+#define IXGBE_FCBUFF_MAX	65536	/* 64KB max */
+#define IXGBE_FCBUFF_MIN	4096	/* 4KB min */
+#define IXGBE_FCOE_DDP_MAX	512	/* 9 bits xid */
+#define IXGBE_FCOE_DDP_MAX_X550	2048	/* 11 bits xid */
+
+/* Default traffic class to use for FCoE */
+#define IXGBE_FCOE_DEFTC	3
+
+/* fcerr */
+#define IXGBE_FCERR_BADCRC       0x00100000
+
+/* FCoE DDP for target mode */
+#define __IXGBE_FCOE_TARGET	1
+
+struct ixgbe_fcoe_ddp {
+	int len;
+	u32 err;
+	unsigned int sgc;
+	struct scatterlist *sgl;
+	dma_addr_t udp;
+	u64 *udl;
+	struct dma_pool *pool;
+};
+
+/* per cpu variables */
+struct ixgbe_fcoe_ddp_pool {
+	struct dma_pool *pool;
+	u64 noddp;
+	u64 noddp_ext_buff;
+};
+
+struct ixgbe_fcoe {
+	struct ixgbe_fcoe_ddp_pool __percpu *ddp_pool;
+	atomic_t refcnt;
+	spinlock_t lock;
+	struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX_X550];
+	void *extra_ddp_buffer;
+	dma_addr_t extra_ddp_buffer_dma;
+	unsigned long mode;
+	u8 up;
+};
+
+#endif /* _IXGBE_FCOE_H */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
new file mode 100644
index 0000000000..13a6fca310
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -0,0 +1,1307 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */
+
+#include "ixgbe.h"
+#include <net/xfrm.h>
+#include <crypto/aead.h>
+#include <linux/if_bridge.h>
+
+#define IXGBE_IPSEC_KEY_BITS  160
+static const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+
+static void ixgbe_ipsec_del_sa(struct xfrm_state *xs);
+
+/**
+ * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @key: key byte array
+ * @salt: salt bytes
+ **/
+static void ixgbe_ipsec_set_tx_sa(struct ixgbe_hw *hw, u16 idx,
+				  u32 key[], u32 salt)
+{
+	u32 reg;
+	int i;
+
+	for (i = 0; i < 4; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(i),
+				(__force u32)cpu_to_be32(key[3 - i]));
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, (__force u32)cpu_to_be32(salt));
+	IXGBE_WRITE_FLUSH(hw);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_IPSTXIDX);
+	reg &= IXGBE_RXTXIDX_IPS_EN;
+	reg |= idx << IXGBE_RXTXIDX_IDX_SHIFT | IXGBE_RXTXIDX_WRITE;
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, reg);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_item - set an Rx table item
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @tbl: table selector
+ *
+ * Trigger the device to store into a particular Rx table the
+ * data that has already been loaded into the input register
+ **/
+static void ixgbe_ipsec_set_rx_item(struct ixgbe_hw *hw, u16 idx,
+				    enum ixgbe_ipsec_tbl_sel tbl)
+{
+	u32 reg;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_IPSRXIDX);
+	reg &= IXGBE_RXTXIDX_IPS_EN;
+	reg |= tbl << IXGBE_RXIDX_TBL_SHIFT |
+	       idx << IXGBE_RXTXIDX_IDX_SHIFT |
+	       IXGBE_RXTXIDX_WRITE;
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, reg);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_sa - set up the register bits to save SA info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @spi: security parameter index
+ * @key: key byte array
+ * @salt: salt bytes
+ * @mode: rx decrypt control bits
+ * @ip_idx: index into IP table for related IP address
+ **/
+static void ixgbe_ipsec_set_rx_sa(struct ixgbe_hw *hw, u16 idx, __be32 spi,
+				  u32 key[], u32 salt, u32 mode, u32 ip_idx)
+{
+	int i;
+
+	/* store the SPI (in bigendian) and IPidx */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI,
+			(__force u32)cpu_to_le32((__force u32)spi));
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, ip_idx);
+	IXGBE_WRITE_FLUSH(hw);
+
+	ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_spi_tbl);
+
+	/* store the key, salt, and mode */
+	for (i = 0; i < 4; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(i),
+				(__force u32)cpu_to_be32(key[3 - i]));
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, (__force u32)cpu_to_be32(salt));
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, mode);
+	IXGBE_WRITE_FLUSH(hw);
+
+	ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_key_tbl);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_ip - set up the register bits to save SA IP addr info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @addr: IP address byte array
+ **/
+static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
+{
+	int i;
+
+	/* store the ip address */
+	for (i = 0; i < 4; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(i),
+				(__force u32)cpu_to_le32((__force u32)addr[i]));
+	IXGBE_WRITE_FLUSH(hw);
+
+	ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_ip_tbl);
+}
+
+/**
+ * ixgbe_ipsec_clear_hw_tables - because some tables don't get cleared on reset
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 buf[4] = {0, 0, 0, 0};
+	u16 idx;
+
+	/* disable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+
+	/* scrub the tables - split the loops for the max of the IP table */
+	for (idx = 0; idx < IXGBE_IPSEC_MAX_RX_IP_COUNT; idx++) {
+		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+		ixgbe_ipsec_set_rx_ip(hw, idx, (__be32 *)buf);
+	}
+	for (; idx < IXGBE_IPSEC_MAX_SA_COUNT; idx++) {
+		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+	}
+}
+
+/**
+ * ixgbe_ipsec_stop_data
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_data(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool link = adapter->link_up;
+	u32 t_rdy, r_rdy;
+	u32 limit;
+	u32 reg;
+
+	/* halt data paths */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+	reg |= IXGBE_SECTXCTRL_TX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	reg |= IXGBE_SECRXCTRL_RX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+	/* If both Tx and Rx are ready there are no packets
+	 * that we need to flush so the loopback configuration
+	 * below is not necessary.
+	 */
+	t_rdy = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) &
+		IXGBE_SECTXSTAT_SECTX_RDY;
+	r_rdy = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) &
+		IXGBE_SECRXSTAT_SECRX_RDY;
+	if (t_rdy && r_rdy)
+		return;
+
+	/* If the tx fifo doesn't have link, but still has data,
+	 * we can't clear the tx sec block.  Set the MAC loopback
+	 * before block clear
+	 */
+	if (!link) {
+		reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg |= IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+		reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		reg |= IXGBE_HLREG0_LPBK;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+		IXGBE_WRITE_FLUSH(hw);
+		mdelay(3);
+	}
+
+	/* wait for the paths to empty */
+	limit = 20;
+	do {
+		mdelay(10);
+		t_rdy = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) &
+			IXGBE_SECTXSTAT_SECTX_RDY;
+		r_rdy = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) &
+			IXGBE_SECRXSTAT_SECRX_RDY;
+	} while (!(t_rdy && r_rdy) && limit--);
+
+	/* undo loopback if we played with it earlier */
+	if (!link) {
+		reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg &= ~IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+		reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		reg &= ~IXGBE_HLREG0_LPBK;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+		IXGBE_WRITE_FLUSH(hw);
+	}
+}
+
+/**
+ * ixgbe_ipsec_stop_engine
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_engine(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg;
+
+	ixgbe_ipsec_stop_data(adapter);
+
+	/* disable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+
+	/* disable the Rx and Tx engines and full packet store-n-forward */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+	reg |= IXGBE_SECTXCTRL_SECTX_DIS;
+	reg &= ~IXGBE_SECTXCTRL_STORE_FORWARD;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	reg |= IXGBE_SECRXCTRL_SECRX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+	/* restore the "tx security buffer almost full threshold" to 0x250 */
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, 0x250);
+
+	/* Set minimum IFG between packets back to the default 0x1 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	reg = (reg & 0xfffffff0) | 0x1;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+	/* final set for normal (no ipsec offload) processing */
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_SECTX_DIS);
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, IXGBE_SECRXCTRL_SECRX_DIS);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_start_engine
+ * @adapter: board private structure
+ *
+ * NOTE: this increases power consumption whether being used or not
+ **/
+static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg;
+
+	ixgbe_ipsec_stop_data(adapter);
+
+	/* Set minimum IFG between packets to 3 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	reg = (reg & 0xfffffff0) | 0x3;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+	/* Set "tx security buffer almost full threshold" to 0x15 so that the
+	 * almost full indication is generated only after buffer contains at
+	 * least an entire jumbo packet.
+	 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF);
+	reg = (reg & 0xfffffc00) | 0x15;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, reg);
+
+	/* restart the data paths by clearing the DISABLE bits */
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_STORE_FORWARD);
+
+	/* enable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, IXGBE_RXTXIDX_IPS_EN);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, IXGBE_RXTXIDX_IPS_EN);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset
+ * @adapter: board private structure
+ *
+ * Reload the HW tables from the SW tables after they've been bashed
+ * by a chip reset.
+ *
+ * Any VF entries are removed from the SW and HW tables since either
+ * (a) the VF also gets reset on PF reset and will ask again for the
+ * offloads, or (b) the VF has been removed by a change in the num_vfs.
+ **/
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED))
+		return;
+
+	/* clean up and restart the engine */
+	ixgbe_ipsec_stop_engine(adapter);
+	ixgbe_ipsec_clear_hw_tables(adapter);
+	ixgbe_ipsec_start_engine(adapter);
+
+	/* reload the Rx and Tx keys */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+		struct rx_sa *r = &ipsec->rx_tbl[i];
+		struct tx_sa *t = &ipsec->tx_tbl[i];
+
+		if (r->used) {
+			if (r->mode & IXGBE_RXTXMOD_VF)
+				ixgbe_ipsec_del_sa(r->xs);
+			else
+				ixgbe_ipsec_set_rx_sa(hw, i, r->xs->id.spi,
+						      r->key, r->salt,
+						      r->mode, r->iptbl_ind);
+		}
+
+		if (t->used) {
+			if (t->mode & IXGBE_RXTXMOD_VF)
+				ixgbe_ipsec_del_sa(t->xs);
+			else
+				ixgbe_ipsec_set_tx_sa(hw, i, t->key, t->salt);
+		}
+	}
+
+	/* reload the IP addrs */
+	for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
+		struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
+
+		if (ipsa->used)
+			ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
+	}
+}
+
+/**
+ * ixgbe_ipsec_find_empty_idx - find the first unused security parameter index
+ * @ipsec: pointer to ipsec struct
+ * @rxtable: true if we need to look in the Rx table
+ *
+ * Returns the first unused index in either the Rx or Tx SA table
+ **/
+static int ixgbe_ipsec_find_empty_idx(struct ixgbe_ipsec *ipsec, bool rxtable)
+{
+	u32 i;
+
+	if (rxtable) {
+		if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search rx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->rx_tbl[i].used)
+				return i;
+		}
+	} else {
+		if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search tx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->tx_tbl[i].used)
+				return i;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+/**
+ * ixgbe_ipsec_find_rx_state - find the state that matches
+ * @ipsec: pointer to ipsec struct
+ * @daddr: inbound address to match
+ * @proto: protocol to match
+ * @spi: SPI to match
+ * @ip4: true if using an ipv4 address
+ *
+ * Returns a pointer to the matching SA state information
+ **/
+static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
+						    __be32 *daddr, u8 proto,
+						    __be32 spi, bool ip4)
+{
+	struct rx_sa *rsa;
+	struct xfrm_state *ret = NULL;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist,
+				   (__force u32)spi) {
+		if (rsa->mode & IXGBE_RXTXMOD_VF)
+			continue;
+		if (spi == rsa->xs->id.spi &&
+		    ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
+		      (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
+				       sizeof(rsa->xs->id.daddr.a6)))) &&
+		    proto == rsa->xs->id.proto) {
+			ret = rsa->xs;
+			xfrm_state_hold(ret);
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol
+ * @xs: pointer to xfrm_state struct
+ * @mykey: pointer to key array to populate
+ * @mysalt: pointer to salt value to populate
+ *
+ * This copies the protocol keys and salt to our own data tables.  The
+ * 82599 family only supports the one algorithm.
+ **/
+static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
+					u32 *mykey, u32 *mysalt)
+{
+	struct net_device *dev = xs->xso.real_dev;
+	unsigned char *key_data;
+	char *alg_name = NULL;
+	int key_len;
+
+	if (!xs->aead) {
+		netdev_err(dev, "Unsupported IPsec algorithm\n");
+		return -EINVAL;
+	}
+
+	if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+		netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+			   IXGBE_IPSEC_AUTH_BITS);
+		return -EINVAL;
+	}
+
+	key_data = &xs->aead->alg_key[0];
+	key_len = xs->aead->alg_key_len;
+	alg_name = xs->aead->alg_name;
+
+	if (strcmp(alg_name, aes_gcm_name)) {
+		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+			   aes_gcm_name);
+		return -EINVAL;
+	}
+
+	/* The key bytes come down in a bigendian array of bytes, so
+	 * we don't need to do any byteswapping.
+	 * 160 accounts for 16 byte key and 4 byte salt
+	 */
+	if (key_len == IXGBE_IPSEC_KEY_BITS) {
+		*mysalt = ((u32 *)key_data)[4];
+	} else if (key_len != (IXGBE_IPSEC_KEY_BITS - (sizeof(*mysalt) * 8))) {
+		netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
+		return -EINVAL;
+	} else {
+		netdev_info(dev, "IPsec hw offload parameters missing 32 bit salt value\n");
+		*mysalt = 0;
+	}
+	memcpy(mykey, key_data, 16);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ipsec_check_mgmt_ip - make sure there is no clash with mgmt IP filters
+ * @xs: pointer to transformer state struct
+ **/
+static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.real_dev;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 mfval, manc, reg;
+	int num_filters = 4;
+	bool manc_ipv4;
+	u32 bmcipval;
+	int i, j;
+
+#define MANC_EN_IPV4_FILTER      BIT(24)
+#define MFVAL_IPV4_FILTER_SHIFT  16
+#define MFVAL_IPV6_FILTER_SHIFT  24
+#define MIPAF_ARR(_m, _n)        (IXGBE_MIPAF + ((_m) * 0x10) + ((_n) * 4))
+
+#define IXGBE_BMCIP(_n)          (0x5050 + ((_n) * 4))
+#define IXGBE_BMCIPVAL           0x5060
+#define BMCIP_V4                 0x2
+#define BMCIP_V6                 0x3
+#define BMCIP_MASK               0x3
+
+	manc = IXGBE_READ_REG(hw, IXGBE_MANC);
+	manc_ipv4 = !!(manc & MANC_EN_IPV4_FILTER);
+	mfval = IXGBE_READ_REG(hw, IXGBE_MFVAL);
+	bmcipval = IXGBE_READ_REG(hw, IXGBE_BMCIPVAL);
+
+	if (xs->props.family == AF_INET) {
+		/* are there any IPv4 filters to check? */
+		if (manc_ipv4) {
+			/* the 4 ipv4 filters are all in MIPAF(3, i) */
+			for (i = 0; i < num_filters; i++) {
+				if (!(mfval & BIT(MFVAL_IPV4_FILTER_SHIFT + i)))
+					continue;
+
+				reg = IXGBE_READ_REG(hw, MIPAF_ARR(3, i));
+				if (reg == (__force u32)xs->id.daddr.a4)
+					return 1;
+			}
+		}
+
+		if ((bmcipval & BMCIP_MASK) == BMCIP_V4) {
+			reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(3));
+			if (reg == (__force u32)xs->id.daddr.a4)
+				return 1;
+		}
+
+	} else {
+		/* if there are ipv4 filters, they are in the last ipv6 slot */
+		if (manc_ipv4)
+			num_filters = 3;
+
+		for (i = 0; i < num_filters; i++) {
+			if (!(mfval & BIT(MFVAL_IPV6_FILTER_SHIFT + i)))
+				continue;
+
+			for (j = 0; j < 4; j++) {
+				reg = IXGBE_READ_REG(hw, MIPAF_ARR(i, j));
+				if (reg != (__force u32)xs->id.daddr.a6[j])
+					break;
+			}
+			if (j == 4)   /* did we match all 4 words? */
+				return 1;
+		}
+
+		if ((bmcipval & BMCIP_MASK) == BMCIP_V6) {
+			for (j = 0; j < 4; j++) {
+				reg = IXGBE_READ_REG(hw, IXGBE_BMCIP(j));
+				if (reg != (__force u32)xs->id.daddr.a6[j])
+					break;
+			}
+			if (j == 4)   /* did we match all 4 words? */
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ * @extack: extack point to fill failure reason
+ **/
+static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,
+			      struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = xs->xso.real_dev;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int checked, match, first;
+	u16 sa_idx;
+	int ret;
+	int i;
+
+	if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for ipsec offload");
+		return -EINVAL;
+	}
+
+	if (xs->props.mode != XFRM_MODE_TRANSPORT) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported mode for ipsec offload");
+		return -EINVAL;
+	}
+
+	if (ixgbe_ipsec_check_mgmt_ip(xs)) {
+		NL_SET_ERR_MSG_MOD(extack, "IPsec IP addr clash with mgmt filters");
+		return -EINVAL;
+	}
+
+	if (xs->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported ipsec offload type");
+		return -EINVAL;
+	}
+
+	if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+		struct rx_sa rsa;
+
+		if (xs->calg) {
+			NL_SET_ERR_MSG_MOD(extack, "Compression offload not supported");
+			return -EINVAL;
+		}
+
+		/* find the first unused index */
+		ret = ixgbe_ipsec_find_empty_idx(ipsec, true);
+		if (ret < 0) {
+			NL_SET_ERR_MSG_MOD(extack, "No space for SA in Rx table!");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&rsa, 0, sizeof(rsa));
+		rsa.used = true;
+		rsa.xs = xs;
+
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.decrypt = xs->ealg || xs->aead;
+
+		/* get the key and salt */
+		ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Rx SA table");
+			return ret;
+		}
+
+		/* get ip for rx sa table */
+		if (xs->props.family == AF_INET6)
+			memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16);
+		else
+			memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4);
+
+		/* The HW does not have a 1:1 mapping from keys to IP addrs, so
+		 * check for a matching IP addr entry in the table.  If the addr
+		 * already exists, use it; else find an unused slot and add the
+		 * addr.  If one does not exist and there are no unused table
+		 * entries, fail the request.
+		 */
+
+		/* Find an existing match or first not used, and stop looking
+		 * after we've checked all we know we have.
+		 */
+		checked = 0;
+		match = -1;
+		first = -1;
+		for (i = 0;
+		     i < IXGBE_IPSEC_MAX_RX_IP_COUNT &&
+		     (checked < ipsec->num_rx_sa || first < 0);
+		     i++) {
+			if (ipsec->ip_tbl[i].used) {
+				if (!memcmp(ipsec->ip_tbl[i].ipaddr,
+					    rsa.ipaddr, sizeof(rsa.ipaddr))) {
+					match = i;
+					break;
+				}
+				checked++;
+			} else if (first < 0) {
+				first = i;  /* track the first empty seen */
+			}
+		}
+
+		if (ipsec->num_rx_sa == 0)
+			first = 0;
+
+		if (match >= 0) {
+			/* addrs are the same, we should use this one */
+			rsa.iptbl_ind = match;
+			ipsec->ip_tbl[match].ref_cnt++;
+
+		} else if (first >= 0) {
+			/* no matches, but here's an empty slot */
+			rsa.iptbl_ind = first;
+
+			memcpy(ipsec->ip_tbl[first].ipaddr,
+			       rsa.ipaddr, sizeof(rsa.ipaddr));
+			ipsec->ip_tbl[first].ref_cnt = 1;
+			ipsec->ip_tbl[first].used = true;
+
+			ixgbe_ipsec_set_rx_ip(hw, rsa.iptbl_ind, rsa.ipaddr);
+
+		} else {
+			/* no match and no empty slot */
+			NL_SET_ERR_MSG_MOD(extack, "No space for SA in Rx IP SA table");
+			memset(&rsa, 0, sizeof(rsa));
+			return -ENOSPC;
+		}
+
+		rsa.mode = IXGBE_RXMOD_VALID;
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.mode |= IXGBE_RXMOD_PROTO_ESP;
+		if (rsa.decrypt)
+			rsa.mode |= IXGBE_RXMOD_DECRYPT;
+		if (rsa.xs->props.family == AF_INET6)
+			rsa.mode |= IXGBE_RXMOD_IPV6;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa));
+
+		ixgbe_ipsec_set_rx_sa(hw, sa_idx, rsa.xs->id.spi, rsa.key,
+				      rsa.salt, rsa.mode, rsa.iptbl_ind);
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX;
+
+		ipsec->num_rx_sa++;
+
+		/* hash the new entry for faster search in Rx path */
+		hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
+			     (__force u32)rsa.xs->id.spi);
+	} else {
+		struct tx_sa tsa;
+
+		if (adapter->num_vfs &&
+		    adapter->bridge_mode != BRIDGE_MODE_VEPA)
+			return -EOPNOTSUPP;
+
+		/* find the first unused index */
+		ret = ixgbe_ipsec_find_empty_idx(ipsec, false);
+		if (ret < 0) {
+			NL_SET_ERR_MSG_MOD(extack, "No space for SA in Tx table");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&tsa, 0, sizeof(tsa));
+		tsa.used = true;
+		tsa.xs = xs;
+
+		if (xs->id.proto & IPPROTO_ESP)
+			tsa.encrypt = xs->ealg || xs->aead;
+
+		ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Tx SA table");
+			memset(&tsa, 0, sizeof(tsa));
+			return ret;
+		}
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa));
+
+		ixgbe_ipsec_set_tx_sa(hw, sa_idx, tsa.key, tsa.salt);
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX;
+
+		ipsec->num_tx_sa++;
+	}
+
+	/* enable the engine if not already warmed up */
+	if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED)) {
+		ixgbe_ipsec_start_engine(adapter);
+		adapter->flags2 |= IXGBE_FLAG2_IPSEC_ENABLED;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.real_dev;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 zerobuf[4] = {0, 0, 0, 0};
+	u16 sa_idx;
+
+	if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+		struct rx_sa *rsa;
+		u8 ipi;
+
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+		rsa = &ipsec->rx_tbl[sa_idx];
+
+		if (!rsa->used) {
+			netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbe_ipsec_set_rx_sa(hw, sa_idx, 0, zerobuf, 0, 0, 0);
+		hash_del_rcu(&rsa->hlist);
+
+		/* if the IP table entry is referenced by only this SA,
+		 * i.e. ref_cnt is only 1, clear the IP table entry as well
+		 */
+		ipi = rsa->iptbl_ind;
+		if (ipsec->ip_tbl[ipi].ref_cnt > 0) {
+			ipsec->ip_tbl[ipi].ref_cnt--;
+
+			if (!ipsec->ip_tbl[ipi].ref_cnt) {
+				memset(&ipsec->ip_tbl[ipi], 0,
+				       sizeof(struct rx_ip_sa));
+				ixgbe_ipsec_set_rx_ip(hw, ipi,
+						      (__force __be32 *)zerobuf);
+			}
+		}
+
+		memset(rsa, 0, sizeof(struct rx_sa));
+		ipsec->num_rx_sa--;
+	} else {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+
+		if (!ipsec->tx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbe_ipsec_set_tx_sa(hw, sa_idx, zerobuf, 0);
+		memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa));
+		ipsec->num_tx_sa--;
+	}
+
+	/* if there are no SAs left, stop the engine to save energy */
+	if (ipsec->num_rx_sa == 0 && ipsec->num_tx_sa == 0) {
+		adapter->flags2 &= ~IXGBE_FLAG2_IPSEC_ENABLED;
+		ixgbe_ipsec_stop_engine(adapter);
+	}
+}
+
+/**
+ * ixgbe_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+	if (xs->props.family == AF_INET) {
+		/* Offload with IPv4 options is not supported yet */
+		if (ip_hdr(skb)->ihl != 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+
+	return true;
+}
+
+static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
+	.xdo_dev_state_add = ixgbe_ipsec_add_sa,
+	.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
+	.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
+};
+
+/**
+ * ixgbe_ipsec_vf_clear - clear the tables of data for a VF
+ * @adapter: board private structure
+ * @vf: VF id to be removed
+ **/
+void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	int i;
+
+	if (!ipsec)
+		return;
+
+	/* search rx sa table */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) {
+		if (!ipsec->rx_tbl[i].used)
+			continue;
+		if (ipsec->rx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
+		    ipsec->rx_tbl[i].vf == vf)
+			ixgbe_ipsec_del_sa(ipsec->rx_tbl[i].xs);
+	}
+
+	/* search tx sa table */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_tx_sa; i++) {
+		if (!ipsec->tx_tbl[i].used)
+			continue;
+		if (ipsec->tx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
+		    ipsec->tx_tbl[i].vf == vf)
+			ixgbe_ipsec_del_sa(ipsec->tx_tbl[i].xs);
+	}
+}
+
+/**
+ * ixgbe_ipsec_vf_add_sa - translate VF request to SA add
+ * @adapter: board private structure
+ * @msgbuf: The message buffer
+ * @vf: the VF index
+ *
+ * Make up a new xs and algorithm info from the data sent by the VF.
+ * We only need to sketch in just enough to set up the HW offload.
+ * Put the resulting offload_handle into the return message to the VF.
+ *
+ * Returns 0 or error value
+ **/
+int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_algo_desc *algo;
+	struct sa_mbx_msg *sam;
+	struct xfrm_state *xs;
+	size_t aead_len;
+	u16 sa_idx;
+	u32 pfsa;
+	int err;
+
+	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
+	if (!adapter->vfinfo[vf].trusted ||
+	    !(adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED)) {
+		e_warn(drv, "VF %d attempted to add an IPsec SA\n", vf);
+		err = -EACCES;
+		goto err_out;
+	}
+
+	/* Tx IPsec offload doesn't seem to work on this
+	 * device, so block these requests for now.
+	 */
+	if (sam->dir != XFRM_DEV_OFFLOAD_IN) {
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+
+	xs = kzalloc(sizeof(*xs), GFP_KERNEL);
+	if (unlikely(!xs)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	xs->xso.dir = sam->dir;
+	xs->id.spi = sam->spi;
+	xs->id.proto = sam->proto;
+	xs->props.family = sam->family;
+	if (xs->props.family == AF_INET6)
+		memcpy(&xs->id.daddr.a6, sam->addr, sizeof(xs->id.daddr.a6));
+	else
+		memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
+	xs->xso.dev = adapter->netdev;
+
+	algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+	if (unlikely(!algo)) {
+		err = -ENOENT;
+		goto err_xs;
+	}
+
+	aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
+	xs->aead = kzalloc(aead_len, GFP_KERNEL);
+	if (unlikely(!xs->aead)) {
+		err = -ENOMEM;
+		goto err_xs;
+	}
+
+	xs->props.ealgo = algo->desc.sadb_alg_id;
+	xs->geniv = algo->uinfo.aead.geniv;
+	xs->aead->alg_icv_len = IXGBE_IPSEC_AUTH_BITS;
+	xs->aead->alg_key_len = IXGBE_IPSEC_KEY_BITS;
+	memcpy(xs->aead->alg_key, sam->key, sizeof(sam->key));
+	memcpy(xs->aead->alg_name, aes_gcm_name, sizeof(aes_gcm_name));
+
+	/* set up the HW offload */
+	err = ixgbe_ipsec_add_sa(xs, NULL);
+	if (err)
+		goto err_aead;
+
+	pfsa = xs->xso.offload_handle;
+	if (pfsa < IXGBE_IPSEC_BASE_TX_INDEX) {
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_RX_INDEX;
+		ipsec->rx_tbl[sa_idx].vf = vf;
+		ipsec->rx_tbl[sa_idx].mode |= IXGBE_RXTXMOD_VF;
+	} else {
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+		ipsec->tx_tbl[sa_idx].vf = vf;
+		ipsec->tx_tbl[sa_idx].mode |= IXGBE_RXTXMOD_VF;
+	}
+
+	msgbuf[1] = xs->xso.offload_handle;
+
+	return 0;
+
+err_aead:
+	kfree_sensitive(xs->aead);
+err_xs:
+	kfree_sensitive(xs);
+err_out:
+	msgbuf[1] = err;
+	return err;
+}
+
+/**
+ * ixgbe_ipsec_vf_del_sa - translate VF request to SA delete
+ * @adapter: board private structure
+ * @msgbuf: The message buffer
+ * @vf: the VF index
+ *
+ * Given the offload_handle sent by the VF, look for the related SA table
+ * entry and use its xs field to call for a delete of the SA.
+ *
+ * Note: We silently ignore requests to delete entries that are already
+ *       set to unused because when a VF is set to "DOWN", the PF first
+ *       gets a reset and clears all the VF's entries; then the VF's
+ *       XFRM stack sends individual deletes for each entry, which the
+ *       reset already removed.  In the future it might be good to try to
+ *       optimize this so not so many unnecessary delete messages are sent.
+ *
+ * Returns 0 or error value
+ **/
+int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	u32 pfsa = msgbuf[1];
+	u16 sa_idx;
+
+	if (!adapter->vfinfo[vf].trusted) {
+		e_err(drv, "vf %d attempted to delete an SA\n", vf);
+		return -EPERM;
+	}
+
+	if (pfsa < IXGBE_IPSEC_BASE_TX_INDEX) {
+		struct rx_sa *rsa;
+
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_RX_INDEX;
+		if (sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT) {
+			e_err(drv, "vf %d SA index %d out of range\n",
+			      vf, sa_idx);
+			return -EINVAL;
+		}
+
+		rsa = &ipsec->rx_tbl[sa_idx];
+
+		if (!rsa->used)
+			return 0;
+
+		if (!(rsa->mode & IXGBE_RXTXMOD_VF) ||
+		    rsa->vf != vf) {
+			e_err(drv, "vf %d bad Rx SA index %d\n", vf, sa_idx);
+			return -ENOENT;
+		}
+
+		xs = ipsec->rx_tbl[sa_idx].xs;
+	} else {
+		struct tx_sa *tsa;
+
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+		if (sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT) {
+			e_err(drv, "vf %d SA index %d out of range\n",
+			      vf, sa_idx);
+			return -EINVAL;
+		}
+
+		tsa = &ipsec->tx_tbl[sa_idx];
+
+		if (!tsa->used)
+			return 0;
+
+		if (!(tsa->mode & IXGBE_RXTXMOD_VF) ||
+		    tsa->vf != vf) {
+			e_err(drv, "vf %d bad Tx SA index %d\n", vf, sa_idx);
+			return -ENOENT;
+		}
+
+		xs = ipsec->tx_tbl[sa_idx].xs;
+	}
+
+	ixgbe_ipsec_del_sa(xs);
+
+	/* remove the xs that was made-up in the add request */
+	kfree_sensitive(xs);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
+ * @tx_ring: outgoing context
+ * @first: current data packet
+ * @itd: ipsec Tx data for later use in building context descriptor
+ **/
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+		   struct ixgbe_tx_buffer *first,
+		   struct ixgbe_ipsec_tx_data *itd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	struct sec_path *sp;
+	struct tx_sa *tsa;
+
+	sp = skb_sec_path(first->skb);
+	if (unlikely(!sp->len)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
+			   __func__, sp->len);
+		return 0;
+	}
+
+	xs = xfrm_input_state(first->skb);
+	if (unlikely(!xs)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
+			   __func__, xs);
+		return 0;
+	}
+
+	itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+	if (unlikely(itd->sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT)) {
+		netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
+			   __func__, itd->sa_idx, xs->xso.offload_handle);
+		return 0;
+	}
+
+	tsa = &ipsec->tx_tbl[itd->sa_idx];
+	if (unlikely(!tsa->used)) {
+		netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
+			   __func__, itd->sa_idx);
+		return 0;
+	}
+
+	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
+
+	if (xs->id.proto == IPPROTO_ESP) {
+
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
+		if (first->protocol == htons(ETH_P_IP))
+			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
+
+		/* The actual trailer length is authlen (16 bytes) plus
+		 * 2 bytes for the proto and the padlen values, plus
+		 * padlen bytes of padding.  This ends up not the same
+		 * as the static value found in xs->props.trailer_len (21).
+		 *
+		 * ... but if we're doing GSO, don't bother as the stack
+		 * doesn't add a trailer for those.
+		 */
+		if (!skb_is_gso(first->skb)) {
+			/* The "correct" way to get the auth length would be
+			 * to use
+			 *    authlen = crypto_aead_authsize(xs->data);
+			 * but since we know we only have one size to worry
+			 * about * we can let the compiler use the constant
+			 * and save us a few CPU cycles.
+			 */
+			const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+			struct sk_buff *skb = first->skb;
+			u8 padlen;
+			int ret;
+
+			ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+					    &padlen, 1);
+			if (unlikely(ret))
+				return 0;
+			itd->trailer_len = authlen + 2 + padlen;
+		}
+	}
+	if (tsa->encrypt)
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
+
+	return 1;
+}
+
+/**
+ * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor
+ * @rx_ring: receiving ring
+ * @rx_desc: receive data descriptor
+ * @skb: current data packet
+ *
+ * Determine if there was an ipsec encapsulation noticed, and if so set up
+ * the resulting status for later in the receive stack.
+ **/
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+		    union ixgbe_adv_rx_desc *rx_desc,
+		    struct sk_buff *skb)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev);
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+	__le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
+					     IXGBE_RXDADV_PKTTYPE_IPSEC_ESP);
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_offload *xo = NULL;
+	struct xfrm_state *xs = NULL;
+	struct ipv6hdr *ip6 = NULL;
+	struct iphdr *ip4 = NULL;
+	struct sec_path *sp;
+	void *daddr;
+	__be32 spi;
+	u8 *c_hdr;
+	u8 proto;
+
+	/* Find the ip and crypto headers in the data.
+	 * We can assume no vlan header in the way, b/c the
+	 * hw won't recognize the IPsec packet and anyway the
+	 * currently vlan device doesn't support xfrm offload.
+	 */
+	if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV4)) {
+		ip4 = (struct iphdr *)(skb->data + ETH_HLEN);
+		daddr = &ip4->daddr;
+		c_hdr = (u8 *)ip4 + ip4->ihl * 4;
+	} else if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV6)) {
+		ip6 = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+		daddr = &ip6->daddr;
+		c_hdr = (u8 *)ip6 + sizeof(struct ipv6hdr);
+	} else {
+		return;
+	}
+
+	switch (pkt_info & ipsec_pkt_types) {
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH):
+		spi = ((struct ip_auth_hdr *)c_hdr)->spi;
+		proto = IPPROTO_AH;
+		break;
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_ESP):
+		spi = ((struct ip_esp_hdr *)c_hdr)->spi;
+		proto = IPPROTO_ESP;
+		break;
+	default:
+		return;
+	}
+
+	xs = ixgbe_ipsec_find_rx_state(ipsec, daddr, proto, spi, !!ip4);
+	if (unlikely(!xs))
+		return;
+
+	sp = secpath_set(skb);
+	if (unlikely(!sp))
+		return;
+
+	sp->xvec[sp->len++] = xs;
+	sp->olen++;
+	xo = xfrm_offload(skb);
+	xo->flags = CRYPTO_DONE;
+	xo->status = CRYPTO_SUCCESS;
+
+	adapter->rx_ipsec++;
+}
+
+/**
+ * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
+ * @adapter: board private structure
+ **/
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_ipsec *ipsec;
+	u32 t_dis, r_dis;
+	size_t size;
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	/* If there is no support for either Tx or Rx offload
+	 * we should not be advertising support for IPsec.
+	 */
+	t_dis = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) &
+		IXGBE_SECTXSTAT_SECTX_OFF_DIS;
+	r_dis = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) &
+		IXGBE_SECRXSTAT_SECRX_OFF_DIS;
+	if (t_dis || r_dis)
+		return;
+
+	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+	if (!ipsec)
+		goto err1;
+	hash_init(ipsec->rx_sa_list);
+
+	size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->rx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->rx_tbl)
+		goto err2;
+
+	size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->tx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->tx_tbl)
+		goto err2;
+
+	size = sizeof(struct rx_ip_sa) * IXGBE_IPSEC_MAX_RX_IP_COUNT;
+	ipsec->ip_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->ip_tbl)
+		goto err2;
+
+	ipsec->num_rx_sa = 0;
+	ipsec->num_tx_sa = 0;
+
+	adapter->ipsec = ipsec;
+	ixgbe_ipsec_stop_engine(adapter);
+	ixgbe_ipsec_clear_hw_tables(adapter);
+
+	adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+
+	return;
+
+err2:
+	kfree(ipsec->ip_tbl);
+	kfree(ipsec->rx_tbl);
+	kfree(ipsec->tx_tbl);
+	kfree(ipsec);
+err1:
+	netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
+}
+
+/**
+ * ixgbe_stop_ipsec_offload - tear down the ipsec offload
+ * @adapter: board private structure
+ **/
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+
+	adapter->ipsec = NULL;
+	if (ipsec) {
+		kfree(ipsec->ip_tbl);
+		kfree(ipsec->rx_tbl);
+		kfree(ipsec->tx_tbl);
+		kfree(ipsec);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
new file mode 100644
index 0000000000..809ab51a78
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved. */
+
+#ifndef _IXGBE_IPSEC_H_
+#define _IXGBE_IPSEC_H_
+
+#define IXGBE_IPSEC_MAX_SA_COUNT	1024
+#define IXGBE_IPSEC_MAX_RX_IP_COUNT	128
+#define IXGBE_IPSEC_BASE_RX_INDEX	0
+#define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS		128
+
+#define IXGBE_RXTXIDX_IPS_EN		0x00000001
+#define IXGBE_RXIDX_TBL_SHIFT		1
+enum ixgbe_ipsec_tbl_sel {
+	ips_rx_ip_tbl	=	0x01,
+	ips_rx_spi_tbl	=	0x02,
+	ips_rx_key_tbl	=	0x03,
+};
+
+#define IXGBE_RXTXIDX_IDX_SHIFT		3
+#define IXGBE_RXTXIDX_READ		0x40000000
+#define IXGBE_RXTXIDX_WRITE		0x80000000
+
+#define IXGBE_RXMOD_VALID		0x00000001
+#define IXGBE_RXMOD_PROTO_ESP		0x00000004
+#define IXGBE_RXMOD_DECRYPT		0x00000008
+#define IXGBE_RXMOD_IPV6		0x00000010
+#define IXGBE_RXTXMOD_VF		0x00000020
+
+struct rx_sa {
+	struct hlist_node hlist;
+	struct xfrm_state *xs;
+	__be32 ipaddr[4];
+	u32 key[4];
+	u32 salt;
+	u32 mode;
+	u8  iptbl_ind;
+	bool used;
+	bool decrypt;
+	u32 vf;
+};
+
+struct rx_ip_sa {
+	__be32 ipaddr[4];
+	u32 ref_cnt;
+	bool used;
+};
+
+struct tx_sa {
+	struct xfrm_state *xs;
+	u32 key[4];
+	u32 salt;
+	u32 mode;
+	bool encrypt;
+	bool used;
+	u32 vf;
+};
+
+struct ixgbe_ipsec_tx_data {
+	u32 flags;
+	u16 trailer_len;
+	u16 sa_idx;
+};
+
+struct ixgbe_ipsec {
+	u16 num_rx_sa;
+	u16 num_tx_sa;
+	struct rx_ip_sa *ip_tbl;
+	struct rx_sa *rx_tbl;
+	struct tx_sa *tx_tbl;
+	DECLARE_HASHTABLE(rx_sa_list, 10);
+};
+
+struct sa_mbx_msg {
+	__be32 spi;
+	u8 dir;
+	u8 proto;
+	u16 family;
+	__be32 addr[4];
+	u32 key[5];
+};
+#endif /* _IXGBE_IPSEC_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
new file mode 100644
index 0000000000..0ee943db3d
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -0,0 +1,1302 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include "ixgbe_sriov.h"
+
+#ifdef CONFIG_IXGBE_DCB
+/**
+ * ixgbe_cache_ring_dcb_sriov - Descriptor ring to register mapping for SR-IOV
+ * @adapter: board private structure to initialize
+ *
+ * Cache the descriptor ring offsets for SR-IOV to the assigned rings.  It
+ * will also try to cache the proper offsets if RSS/FCoE are enabled along
+ * with VMDq.
+ *
+ **/
+static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
+{
+#ifdef IXGBE_FCOE
+	struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
+#endif /* IXGBE_FCOE */
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	int i;
+	u16 reg_idx, pool;
+	u8 tcs = adapter->hw_tcs;
+
+	/* verify we have DCB queueing enabled before proceeding */
+	if (tcs <= 1)
+		return false;
+
+	/* verify we have VMDq enabled before proceeding */
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return false;
+
+	/* start at VMDq register offset for SR-IOV enabled setups */
+	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
+	for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
+		/* If we are greater than indices move to next pool */
+		if ((reg_idx & ~vmdq->mask) >= tcs) {
+			pool++;
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+		}
+		adapter->rx_ring[i]->reg_idx = reg_idx;
+		adapter->rx_ring[i]->netdev = pool ? NULL : adapter->netdev;
+	}
+
+	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
+	for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
+		/* If we are greater than indices move to next pool */
+		if ((reg_idx & ~vmdq->mask) >= tcs)
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+		adapter->tx_ring[i]->reg_idx = reg_idx;
+	}
+
+#ifdef IXGBE_FCOE
+	/* nothing to do if FCoE is disabled */
+	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+		return true;
+
+	/* The work is already done if the FCoE ring is shared */
+	if (fcoe->offset < tcs)
+		return true;
+
+	/* The FCoE rings exist separately, we need to move their reg_idx */
+	if (fcoe->indices) {
+		u16 queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+		u8 fcoe_tc = ixgbe_fcoe_get_tc(adapter);
+
+		reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
+		for (i = fcoe->offset; i < adapter->num_rx_queues; i++) {
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
+			adapter->rx_ring[i]->reg_idx = reg_idx;
+			adapter->rx_ring[i]->netdev = adapter->netdev;
+			reg_idx++;
+		}
+
+		reg_idx = (vmdq->offset + vmdq->indices) * queues_per_pool;
+		for (i = fcoe->offset; i < adapter->num_tx_queues; i++) {
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask) + fcoe_tc;
+			adapter->tx_ring[i]->reg_idx = reg_idx;
+			reg_idx++;
+		}
+	}
+
+#endif /* IXGBE_FCOE */
+	return true;
+}
+
+/* ixgbe_get_first_reg_idx - Return first register index associated with ring */
+static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc,
+				    unsigned int *tx, unsigned int *rx)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 num_tcs = adapter->hw_tcs;
+
+	*tx = 0;
+	*rx = 0;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		/* TxQs/TC: 4	RxQs/TC: 8 */
+		*tx = tc << 2; /* 0, 4,  8, 12, 16, 20, 24, 28 */
+		*rx = tc << 3; /* 0, 8, 16, 24, 32, 40, 48, 56 */
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		if (num_tcs > 4) {
+			/*
+			 * TCs    : TC0/1 TC2/3 TC4-7
+			 * TxQs/TC:    32    16     8
+			 * RxQs/TC:    16    16    16
+			 */
+			*rx = tc << 4;
+			if (tc < 3)
+				*tx = tc << 5;		/*   0,  32,  64 */
+			else if (tc < 5)
+				*tx = (tc + 2) << 4;	/*  80,  96 */
+			else
+				*tx = (tc + 8) << 3;	/* 104, 112, 120 */
+		} else {
+			/*
+			 * TCs    : TC0 TC1 TC2/3
+			 * TxQs/TC:  64  32    16
+			 * RxQs/TC:  32  32    32
+			 */
+			*rx = tc << 5;
+			if (tc < 2)
+				*tx = tc << 6;		/*  0,  64 */
+			else
+				*tx = (tc + 4) << 4;	/* 96, 112 */
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB
+ * @adapter: board private structure to initialize
+ *
+ * Cache the descriptor ring offsets for DCB to the assigned rings.
+ *
+ **/
+static bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter)
+{
+	u8 num_tcs = adapter->hw_tcs;
+	unsigned int tx_idx, rx_idx;
+	int tc, offset, rss_i, i;
+
+	/* verify we have DCB queueing enabled before proceeding */
+	if (num_tcs <= 1)
+		return false;
+
+	rss_i = adapter->ring_feature[RING_F_RSS].indices;
+
+	for (tc = 0, offset = 0; tc < num_tcs; tc++, offset += rss_i) {
+		ixgbe_get_first_reg_idx(adapter, tc, &tx_idx, &rx_idx);
+		for (i = 0; i < rss_i; i++, tx_idx++, rx_idx++) {
+			adapter->tx_ring[offset + i]->reg_idx = tx_idx;
+			adapter->rx_ring[offset + i]->reg_idx = rx_idx;
+			adapter->rx_ring[offset + i]->netdev = adapter->netdev;
+			adapter->tx_ring[offset + i]->dcb_tc = tc;
+			adapter->rx_ring[offset + i]->dcb_tc = tc;
+		}
+	}
+
+	return true;
+}
+
+#endif
+/**
+ * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov
+ * @adapter: board private structure to initialize
+ *
+ * SR-IOV doesn't use any descriptor rings but changes the default if
+ * no other mapping is used.
+ *
+ */
+static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
+{
+#ifdef IXGBE_FCOE
+	struct ixgbe_ring_feature *fcoe = &adapter->ring_feature[RING_F_FCOE];
+#endif /* IXGBE_FCOE */
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	struct ixgbe_ring_feature *rss = &adapter->ring_feature[RING_F_RSS];
+	u16 reg_idx, pool;
+	int i;
+
+	/* only proceed if VMDq is enabled */
+	if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED))
+		return false;
+
+	/* start at VMDq register offset for SR-IOV enabled setups */
+	pool = 0;
+	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
+	for (i = 0; i < adapter->num_rx_queues; i++, reg_idx++) {
+#ifdef IXGBE_FCOE
+		/* Allow first FCoE queue to be mapped as RSS */
+		if (fcoe->offset && (i > fcoe->offset))
+			break;
+#endif
+		/* If we are greater than indices move to next pool */
+		if ((reg_idx & ~vmdq->mask) >= rss->indices) {
+			pool++;
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+		}
+		adapter->rx_ring[i]->reg_idx = reg_idx;
+		adapter->rx_ring[i]->netdev = pool ? NULL : adapter->netdev;
+	}
+
+#ifdef IXGBE_FCOE
+	/* FCoE uses a linear block of queues so just assigning 1:1 */
+	for (; i < adapter->num_rx_queues; i++, reg_idx++) {
+		adapter->rx_ring[i]->reg_idx = reg_idx;
+		adapter->rx_ring[i]->netdev = adapter->netdev;
+	}
+
+#endif
+	reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
+	for (i = 0; i < adapter->num_tx_queues; i++, reg_idx++) {
+#ifdef IXGBE_FCOE
+		/* Allow first FCoE queue to be mapped as RSS */
+		if (fcoe->offset && (i > fcoe->offset))
+			break;
+#endif
+		/* If we are greater than indices move to next pool */
+		if ((reg_idx & rss->mask) >= rss->indices)
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+		adapter->tx_ring[i]->reg_idx = reg_idx;
+	}
+
+#ifdef IXGBE_FCOE
+	/* FCoE uses a linear block of queues so just assigning 1:1 */
+	for (; i < adapter->num_tx_queues; i++, reg_idx++)
+		adapter->tx_ring[i]->reg_idx = reg_idx;
+
+#endif
+
+	return true;
+}
+
+/**
+ * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS
+ * @adapter: board private structure to initialize
+ *
+ * Cache the descriptor ring offsets for RSS to the assigned rings.
+ *
+ **/
+static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter)
+{
+	int i, reg_idx;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		adapter->rx_ring[i]->reg_idx = i;
+		adapter->rx_ring[i]->netdev = adapter->netdev;
+	}
+	for (i = 0, reg_idx = 0; i < adapter->num_tx_queues; i++, reg_idx++)
+		adapter->tx_ring[i]->reg_idx = reg_idx;
+	for (i = 0; i < adapter->num_xdp_queues; i++, reg_idx++)
+		adapter->xdp_ring[i]->reg_idx = reg_idx;
+
+	return true;
+}
+
+/**
+ * ixgbe_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ *
+ * Note, the order the various feature calls is important.  It must start with
+ * the "most" features enabled at the same time, then trickle down to the
+ * least amount of features turned on at once.
+ **/
+static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
+{
+	/* start with default case */
+	adapter->rx_ring[0]->reg_idx = 0;
+	adapter->tx_ring[0]->reg_idx = 0;
+
+#ifdef CONFIG_IXGBE_DCB
+	if (ixgbe_cache_ring_dcb_sriov(adapter))
+		return;
+
+	if (ixgbe_cache_ring_dcb(adapter))
+		return;
+
+#endif
+	if (ixgbe_cache_ring_sriov(adapter))
+		return;
+
+	ixgbe_cache_ring_rss(adapter);
+}
+
+static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter)
+{
+	int queues;
+
+	queues = min_t(int, IXGBE_MAX_XDP_QS, nr_cpu_ids);
+	return adapter->xdp_prog ? queues : 0;
+}
+
+#define IXGBE_RSS_64Q_MASK	0x3F
+#define IXGBE_RSS_16Q_MASK	0xF
+#define IXGBE_RSS_8Q_MASK	0x7
+#define IXGBE_RSS_4Q_MASK	0x3
+#define IXGBE_RSS_2Q_MASK	0x1
+#define IXGBE_RSS_DISABLED_MASK	0x0
+
+#ifdef CONFIG_IXGBE_DCB
+/**
+ * ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
+ * @adapter: board private structure to initialize
+ *
+ * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
+ * and VM pools where appropriate.  Also assign queues based on DCB
+ * priorities and map accordingly..
+ *
+ **/
+static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter)
+{
+	int i;
+	u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
+	u16 vmdq_m = 0;
+#ifdef IXGBE_FCOE
+	u16 fcoe_i = 0;
+#endif
+	u8 tcs = adapter->hw_tcs;
+
+	/* verify we have DCB queueing enabled before proceeding */
+	if (tcs <= 1)
+		return false;
+
+	/* verify we have VMDq enabled before proceeding */
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return false;
+
+	/* limit VMDq instances on the PF by number of Tx queues */
+	vmdq_i = min_t(u16, vmdq_i, MAX_TX_QUEUES / tcs);
+
+	/* Add starting offset to total pool count */
+	vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
+
+	/* 16 pools w/ 8 TC per pool */
+	if (tcs > 4) {
+		vmdq_i = min_t(u16, vmdq_i, 16);
+		vmdq_m = IXGBE_82599_VMDQ_8Q_MASK;
+	/* 32 pools w/ 4 TC per pool */
+	} else {
+		vmdq_i = min_t(u16, vmdq_i, 32);
+		vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
+	}
+
+#ifdef IXGBE_FCOE
+	/* queues in the remaining pools are available for FCoE */
+	fcoe_i = (128 / __ALIGN_MASK(1, ~vmdq_m)) - vmdq_i;
+
+#endif
+	/* remove the starting offset from the pool count */
+	vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
+
+	/* save features for later use */
+	adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
+	adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
+
+	/*
+	 * We do not support DCB, VMDq, and RSS all simultaneously
+	 * so we will disable RSS since it is the lowest priority
+	 */
+	adapter->ring_feature[RING_F_RSS].indices = 1;
+	adapter->ring_feature[RING_F_RSS].mask = IXGBE_RSS_DISABLED_MASK;
+
+	/* disable ATR as it is not supported when VMDq is enabled */
+	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+
+	adapter->num_rx_pools = vmdq_i;
+	adapter->num_rx_queues_per_pool = tcs;
+
+	adapter->num_tx_queues = vmdq_i * tcs;
+	adapter->num_xdp_queues = 0;
+	adapter->num_rx_queues = vmdq_i * tcs;
+
+#ifdef IXGBE_FCOE
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+		struct ixgbe_ring_feature *fcoe;
+
+		fcoe = &adapter->ring_feature[RING_F_FCOE];
+
+		/* limit ourselves based on feature limits */
+		fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
+
+		if (fcoe_i) {
+			/* alloc queues for FCoE separately */
+			fcoe->indices = fcoe_i;
+			fcoe->offset = vmdq_i * tcs;
+
+			/* add queues to adapter */
+			adapter->num_tx_queues += fcoe_i;
+			adapter->num_rx_queues += fcoe_i;
+		} else if (tcs > 1) {
+			/* use queue belonging to FcoE TC */
+			fcoe->indices = 1;
+			fcoe->offset = ixgbe_fcoe_get_tc(adapter);
+		} else {
+			adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+
+			fcoe->indices = 0;
+			fcoe->offset = 0;
+		}
+	}
+
+#endif /* IXGBE_FCOE */
+	/* configure TC to queue mapping */
+	for (i = 0; i < tcs; i++)
+		netdev_set_tc_queue(adapter->netdev, i, 1, i);
+
+	return true;
+}
+
+static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
+{
+	struct net_device *dev = adapter->netdev;
+	struct ixgbe_ring_feature *f;
+	int rss_i, rss_m, i;
+	int tcs;
+
+	/* Map queue offset and counts onto allocated tx queues */
+	tcs = adapter->hw_tcs;
+
+	/* verify we have DCB queueing enabled before proceeding */
+	if (tcs <= 1)
+		return false;
+
+	/* determine the upper limit for our current DCB mode */
+	rss_i = dev->num_tx_queues / tcs;
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
+		/* 8 TC w/ 4 queues per TC */
+		rss_i = min_t(u16, rss_i, 4);
+		rss_m = IXGBE_RSS_4Q_MASK;
+	} else if (tcs > 4) {
+		/* 8 TC w/ 8 queues per TC */
+		rss_i = min_t(u16, rss_i, 8);
+		rss_m = IXGBE_RSS_8Q_MASK;
+	} else {
+		/* 4 TC w/ 16 queues per TC */
+		rss_i = min_t(u16, rss_i, 16);
+		rss_m = IXGBE_RSS_16Q_MASK;
+	}
+
+	/* set RSS mask and indices */
+	f = &adapter->ring_feature[RING_F_RSS];
+	rss_i = min_t(int, rss_i, f->limit);
+	f->indices = rss_i;
+	f->mask = rss_m;
+
+	/* disable ATR as it is not supported when multiple TCs are enabled */
+	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+
+#ifdef IXGBE_FCOE
+	/* FCoE enabled queues require special configuration indexed
+	 * by feature specific indices and offset. Here we map FCoE
+	 * indices onto the DCB queue pairs allowing FCoE to own
+	 * configuration later.
+	 */
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+		u8 tc = ixgbe_fcoe_get_tc(adapter);
+
+		f = &adapter->ring_feature[RING_F_FCOE];
+		f->indices = min_t(u16, rss_i, f->limit);
+		f->offset = rss_i * tc;
+	}
+
+#endif /* IXGBE_FCOE */
+	for (i = 0; i < tcs; i++)
+		netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
+
+	adapter->num_tx_queues = rss_i * tcs;
+	adapter->num_xdp_queues = 0;
+	adapter->num_rx_queues = rss_i * tcs;
+
+	return true;
+}
+
+#endif
+/**
+ * ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
+ * @adapter: board private structure to initialize
+ *
+ * When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
+ * and VM pools where appropriate.  If RSS is available, then also try and
+ * enable RSS and map accordingly.
+ *
+ **/
+static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter)
+{
+	u16 vmdq_i = adapter->ring_feature[RING_F_VMDQ].limit;
+	u16 vmdq_m = 0;
+	u16 rss_i = adapter->ring_feature[RING_F_RSS].limit;
+	u16 rss_m = IXGBE_RSS_DISABLED_MASK;
+#ifdef IXGBE_FCOE
+	u16 fcoe_i = 0;
+#endif
+
+	/* only proceed if SR-IOV is enabled */
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return false;
+
+	/* limit l2fwd RSS based on total Tx queue limit */
+	rss_i = min_t(u16, rss_i, MAX_TX_QUEUES / vmdq_i);
+
+	/* Add starting offset to total pool count */
+	vmdq_i += adapter->ring_feature[RING_F_VMDQ].offset;
+
+	/* double check we are limited to maximum pools */
+	vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i);
+
+	/* 64 pool mode with 2 queues per pool */
+	if (vmdq_i > 32) {
+		vmdq_m = IXGBE_82599_VMDQ_2Q_MASK;
+		rss_m = IXGBE_RSS_2Q_MASK;
+		rss_i = min_t(u16, rss_i, 2);
+	/* 32 pool mode with up to 4 queues per pool */
+	} else {
+		vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
+		rss_m = IXGBE_RSS_4Q_MASK;
+		/* We can support 4, 2, or 1 queues */
+		rss_i = (rss_i > 3) ? 4 : (rss_i > 1) ? 2 : 1;
+	}
+
+#ifdef IXGBE_FCOE
+	/* queues in the remaining pools are available for FCoE */
+	fcoe_i = 128 - (vmdq_i * __ALIGN_MASK(1, ~vmdq_m));
+
+#endif
+	/* remove the starting offset from the pool count */
+	vmdq_i -= adapter->ring_feature[RING_F_VMDQ].offset;
+
+	/* save features for later use */
+	adapter->ring_feature[RING_F_VMDQ].indices = vmdq_i;
+	adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m;
+
+	/* limit RSS based on user input and save for later use */
+	adapter->ring_feature[RING_F_RSS].indices = rss_i;
+	adapter->ring_feature[RING_F_RSS].mask = rss_m;
+
+	adapter->num_rx_pools = vmdq_i;
+	adapter->num_rx_queues_per_pool = rss_i;
+
+	adapter->num_rx_queues = vmdq_i * rss_i;
+	adapter->num_tx_queues = vmdq_i * rss_i;
+	adapter->num_xdp_queues = 0;
+
+	/* disable ATR as it is not supported when VMDq is enabled */
+	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+
+#ifdef IXGBE_FCOE
+	/*
+	 * FCoE can use rings from adjacent buffers to allow RSS
+	 * like behavior.  To account for this we need to add the
+	 * FCoE indices to the total ring count.
+	 */
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+		struct ixgbe_ring_feature *fcoe;
+
+		fcoe = &adapter->ring_feature[RING_F_FCOE];
+
+		/* limit ourselves based on feature limits */
+		fcoe_i = min_t(u16, fcoe_i, fcoe->limit);
+
+		if (vmdq_i > 1 && fcoe_i) {
+			/* alloc queues for FCoE separately */
+			fcoe->indices = fcoe_i;
+			fcoe->offset = vmdq_i * rss_i;
+		} else {
+			/* merge FCoE queues with RSS queues */
+			fcoe_i = min_t(u16, fcoe_i + rss_i, num_online_cpus());
+
+			/* limit indices to rss_i if MSI-X is disabled */
+			if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
+				fcoe_i = rss_i;
+
+			/* attempt to reserve some queues for just FCoE */
+			fcoe->indices = min_t(u16, fcoe_i, fcoe->limit);
+			fcoe->offset = fcoe_i - fcoe->indices;
+
+			fcoe_i -= rss_i;
+		}
+
+		/* add queues to adapter */
+		adapter->num_tx_queues += fcoe_i;
+		adapter->num_rx_queues += fcoe_i;
+	}
+
+#endif
+	/* To support macvlan offload we have to use num_tc to
+	 * restrict the queues that can be used by the device.
+	 * By doing this we can avoid reporting a false number of
+	 * queues.
+	 */
+	if (vmdq_i > 1)
+		netdev_set_num_tc(adapter->netdev, 1);
+
+	/* populate TC0 for use by pool 0 */
+	netdev_set_tc_queue(adapter->netdev, 0,
+			    adapter->num_rx_queues_per_pool, 0);
+
+	return true;
+}
+
+/**
+ * ixgbe_set_rss_queues - Allocate queues for RSS
+ * @adapter: board private structure to initialize
+ *
+ * This is our "base" multiqueue mode.  RSS (Receive Side Scaling) will try
+ * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
+ *
+ **/
+static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_ring_feature *f;
+	u16 rss_i;
+
+	/* set mask for 16 queue limit of RSS */
+	f = &adapter->ring_feature[RING_F_RSS];
+	rss_i = f->limit;
+
+	f->indices = rss_i;
+
+	if (hw->mac.type < ixgbe_mac_X550)
+		f->mask = IXGBE_RSS_16Q_MASK;
+	else
+		f->mask = IXGBE_RSS_64Q_MASK;
+
+	/* disable ATR by default, it will be configured below */
+	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+
+	/*
+	 * Use Flow Director in addition to RSS to ensure the best
+	 * distribution of flows across cores, even when an FDIR flow
+	 * isn't matched.
+	 */
+	if (rss_i > 1 && adapter->atr_sample_rate) {
+		f = &adapter->ring_feature[RING_F_FDIR];
+
+		rss_i = f->indices = f->limit;
+
+		if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
+	}
+
+#ifdef IXGBE_FCOE
+	/*
+	 * FCoE can exist on the same rings as standard network traffic
+	 * however it is preferred to avoid that if possible.  In order
+	 * to get the best performance we allocate as many FCoE queues
+	 * as we can and we place them at the end of the ring array to
+	 * avoid sharing queues with standard RSS on systems with 24 or
+	 * more CPUs.
+	 */
+	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) {
+		struct net_device *dev = adapter->netdev;
+		u16 fcoe_i;
+
+		f = &adapter->ring_feature[RING_F_FCOE];
+
+		/* merge FCoE queues with RSS queues */
+		fcoe_i = min_t(u16, f->limit + rss_i, num_online_cpus());
+		fcoe_i = min_t(u16, fcoe_i, dev->num_tx_queues);
+
+		/* limit indices to rss_i if MSI-X is disabled */
+		if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
+			fcoe_i = rss_i;
+
+		/* attempt to reserve some queues for just FCoE */
+		f->indices = min_t(u16, fcoe_i, f->limit);
+		f->offset = fcoe_i - f->indices;
+		rss_i = max_t(u16, fcoe_i, rss_i);
+	}
+
+#endif /* IXGBE_FCOE */
+	adapter->num_rx_queues = rss_i;
+	adapter->num_tx_queues = rss_i;
+	adapter->num_xdp_queues = ixgbe_xdp_queues(adapter);
+
+	return true;
+}
+
+/**
+ * ixgbe_set_num_queues - Allocate queues for device, feature dependent
+ * @adapter: board private structure to initialize
+ *
+ * This is the top level queue allocation routine.  The order here is very
+ * important, starting with the "most" number of features turned on at once,
+ * and ending with the smallest set of features.  This way large combinations
+ * can be allocated if they're turned on, and smaller combinations are the
+ * fallthrough conditions.
+ *
+ **/
+static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
+{
+	/* Start with base case */
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
+	adapter->num_xdp_queues = 0;
+	adapter->num_rx_pools = 1;
+	adapter->num_rx_queues_per_pool = 1;
+
+#ifdef CONFIG_IXGBE_DCB
+	if (ixgbe_set_dcb_sriov_queues(adapter))
+		return;
+
+	if (ixgbe_set_dcb_queues(adapter))
+		return;
+
+#endif
+	if (ixgbe_set_sriov_queues(adapter))
+		return;
+
+	ixgbe_set_rss_queues(adapter);
+}
+
+/**
+ * ixgbe_acquire_msix_vectors - acquire MSI-X vectors
+ * @adapter: board private structure
+ *
+ * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
+ * return a negative error code if unable to acquire MSI-X vectors for any
+ * reason.
+ */
+static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i, vectors, vector_threshold;
+
+	/* We start by asking for one vector per queue pair with XDP queues
+	 * being stacked with TX queues.
+	 */
+	vectors = max(adapter->num_rx_queues, adapter->num_tx_queues);
+	vectors = max(vectors, adapter->num_xdp_queues);
+
+	/* It is easy to be greedy for MSI-X vectors. However, it really
+	 * doesn't do much good if we have a lot more vectors than CPUs. We'll
+	 * be somewhat conservative and only ask for (roughly) the same number
+	 * of vectors as there are CPUs.
+	 */
+	vectors = min_t(int, vectors, num_online_cpus());
+
+	/* Some vectors are necessary for non-queue interrupts */
+	vectors += NON_Q_VECTORS;
+
+	/* Hardware can only support a maximum of hw.mac->max_msix_vectors.
+	 * With features such as RSS and VMDq, we can easily surpass the
+	 * number of Rx and Tx descriptor queues supported by our device.
+	 * Thus, we cap the maximum in the rare cases where the CPU count also
+	 * exceeds our vector limit
+	 */
+	vectors = min_t(int, vectors, hw->mac.max_msix_vectors);
+
+	/* We want a minimum of two MSI-X vectors for (1) a TxQ[0] + RxQ[0]
+	 * handler, and (2) an Other (Link Status Change, etc.) handler.
+	 */
+	vector_threshold = MIN_MSIX_COUNT;
+
+	adapter->msix_entries = kcalloc(vectors,
+					sizeof(struct msix_entry),
+					GFP_KERNEL);
+	if (!adapter->msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < vectors; i++)
+		adapter->msix_entries[i].entry = i;
+
+	vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+					vector_threshold, vectors);
+
+	if (vectors < 0) {
+		/* A negative count of allocated vectors indicates an error in
+		 * acquiring within the specified range of MSI-X vectors
+		 */
+		e_dev_warn("Failed to allocate MSI-X interrupts. Err: %d\n",
+			   vectors);
+
+		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+
+		return vectors;
+	}
+
+	/* we successfully allocated some number of vectors within our
+	 * requested range.
+	 */
+	adapter->flags |= IXGBE_FLAG_MSIX_ENABLED;
+
+	/* Adjust for only the vectors we'll use, which is minimum
+	 * of max_q_vectors, or the number of vectors we were allocated.
+	 */
+	vectors -= NON_Q_VECTORS;
+	adapter->num_q_vectors = min_t(int, vectors, adapter->max_q_vectors);
+
+	return 0;
+}
+
+static void ixgbe_add_ring(struct ixgbe_ring *ring,
+			   struct ixgbe_ring_container *head)
+{
+	ring->next = head->ring;
+	head->ring = ring;
+	head->count++;
+	head->next_update = jiffies + 1;
+}
+
+/**
+ * ixgbe_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @xdp_count: total number of XDP rings to allocate
+ * @xdp_idx: index of first XDP ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
+				int v_count, int v_idx,
+				int txr_count, int txr_idx,
+				int xdp_count, int xdp_idx,
+				int rxr_count, int rxr_idx)
+{
+	int node = dev_to_node(&adapter->pdev->dev);
+	struct ixgbe_q_vector *q_vector;
+	struct ixgbe_ring *ring;
+	int cpu = -1;
+	int ring_count;
+	u8 tcs = adapter->hw_tcs;
+
+	ring_count = txr_count + rxr_count + xdp_count;
+
+	/* customize cpu for Flow Director mapping */
+	if ((tcs <= 1) && !(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
+		u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+		if (rss_i > 1 && adapter->atr_sample_rate) {
+			cpu = cpumask_local_spread(v_idx, node);
+			node = cpu_to_node(cpu);
+		}
+	}
+
+	/* allocate q_vector and rings */
+	q_vector = kzalloc_node(struct_size(q_vector, ring, ring_count),
+				GFP_KERNEL, node);
+	if (!q_vector)
+		q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+				   GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* setup affinity mask and node */
+	if (cpu != -1)
+		cpumask_set_cpu(cpu, &q_vector->affinity_mask);
+	q_vector->numa_node = node;
+
+#ifdef CONFIG_IXGBE_DCA
+	/* initialize CPU for DCA */
+	q_vector->cpu = -1;
+
+#endif
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+	q_vector->v_idx = v_idx;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = adapter->tx_work_limit;
+
+	/* Initialize setting for adaptive ITR */
+	q_vector->tx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
+			   IXGBE_ITR_ADAPTIVE_LATENCY;
+	q_vector->rx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
+			   IXGBE_ITR_ADAPTIVE_LATENCY;
+
+	/* intialize ITR */
+	if (txr_count && !rxr_count) {
+		/* tx only vector */
+		if (adapter->tx_itr_setting == 1)
+			q_vector->itr = IXGBE_12K_ITR;
+		else
+			q_vector->itr = adapter->tx_itr_setting;
+	} else {
+		/* rx or rx/tx vector */
+		if (adapter->rx_itr_setting == 1)
+			q_vector->itr = IXGBE_20K_ITR;
+		else
+			q_vector->itr = adapter->rx_itr_setting;
+	}
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	while (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbe_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+
+		/* assign ring to adapter */
+		WRITE_ONCE(adapter->tx_ring[txr_idx], ring);
+
+		/* update count and index */
+		txr_count--;
+		txr_idx += v_count;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (xdp_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbe_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = xdp_idx;
+		set_ring_xdp(ring);
+		spin_lock_init(&ring->tx_lock);
+
+		/* assign ring to adapter */
+		WRITE_ONCE(adapter->xdp_ring[xdp_idx], ring);
+
+		/* update count and index */
+		xdp_count--;
+		xdp_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		ixgbe_add_ring(ring, &q_vector->rx);
+
+		/*
+		 * 82599 errata, UDP frames with a 0 checksum
+		 * can be marked as checksum errors.
+		 */
+		if (adapter->hw.mac.type == ixgbe_mac_82599EB)
+			set_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state);
+
+#ifdef IXGBE_FCOE
+		if (adapter->netdev->features & NETIF_F_FCOE_MTU) {
+			struct ixgbe_ring_feature *f;
+			f = &adapter->ring_feature[RING_F_FCOE];
+			if ((rxr_idx >= f->offset) &&
+			    (rxr_idx < f->offset + f->indices))
+				set_bit(__IXGBE_RX_FCOE, &ring->state);
+		}
+
+#endif /* IXGBE_FCOE */
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to adapter */
+		WRITE_ONCE(adapter->rx_ring[rxr_idx], ring);
+
+		/* update count and index */
+		rxr_count--;
+		rxr_idx += v_count;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx)
+{
+	struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx];
+	struct ixgbe_ring *ring;
+
+	ixgbe_for_each_ring(ring, q_vector->tx) {
+		if (ring_is_xdp(ring))
+			WRITE_ONCE(adapter->xdp_ring[ring->queue_index], NULL);
+		else
+			WRITE_ONCE(adapter->tx_ring[ring->queue_index], NULL);
+	}
+
+	ixgbe_for_each_ring(ring, q_vector->rx)
+		WRITE_ONCE(adapter->rx_ring[ring->queue_index], NULL);
+
+	adapter->q_vector[v_idx] = NULL;
+	__netif_napi_del(&q_vector->napi);
+
+	/*
+	 * after a call to __netif_napi_del() napi may still be used and
+	 * ixgbe_get_stats64() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
+{
+	int q_vectors = adapter->num_q_vectors;
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int xdp_remaining = adapter->num_xdp_queues;
+	int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
+	int err, i;
+
+	/* only one q_vector if MSI-X is disabled. */
+	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
+		q_vectors = 1;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
+		for (; rxr_remaining; v_idx++) {
+			err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
+						   0, 0, 0, 0, 1, rxr_idx);
+
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining--;
+			rxr_idx++;
+		}
+	}
+
+	for (; v_idx < q_vectors; v_idx++) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+		int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors - v_idx);
+
+		err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
+					   tqpv, txr_idx,
+					   xqpv, xdp_idx,
+					   rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		xdp_remaining -= xqpv;
+		rxr_idx++;
+		txr_idx++;
+		xdp_idx += xqpv;
+	}
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		if (adapter->rx_ring[i])
+			adapter->rx_ring[i]->ring_idx = i;
+	}
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		if (adapter->tx_ring[i])
+			adapter->tx_ring[i]->ring_idx = i;
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		if (adapter->xdp_ring[i])
+			adapter->xdp_ring[i]->ring_idx = i;
+	}
+
+	return 0;
+
+err_out:
+	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		ixgbe_free_q_vector(adapter, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter)
+{
+	int v_idx = adapter->num_q_vectors;
+
+	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
+	adapter->num_rx_queues = 0;
+	adapter->num_q_vectors = 0;
+
+	while (v_idx--)
+		ixgbe_free_q_vector(adapter, v_idx);
+}
+
+static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter)
+{
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
+		adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
+		pci_disable_msix(adapter->pdev);
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+	} else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) {
+		adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED;
+		pci_disable_msi(adapter->pdev);
+	}
+}
+
+/**
+ * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static void ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter)
+{
+	int err;
+
+	/* We will try to get MSI-X interrupts first */
+	if (!ixgbe_acquire_msix_vectors(adapter))
+		return;
+
+	/* At this point, we do not have MSI-X capabilities. We need to
+	 * reconfigure or disable various features which require MSI-X
+	 * capability.
+	 */
+
+	/* Disable DCB unless we only have a single traffic class */
+	if (adapter->hw_tcs > 1) {
+		e_dev_warn("Number of DCB TCs exceeds number of available queues. Disabling DCB support.\n");
+		netdev_reset_tc(adapter->netdev);
+
+		if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+			adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
+
+		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
+		adapter->temp_dcb_cfg.pfc_mode_enable = false;
+		adapter->dcb_cfg.pfc_mode_enable = false;
+	}
+
+	adapter->hw_tcs = 0;
+	adapter->dcb_cfg.num_tcs.pg_tcs = 1;
+	adapter->dcb_cfg.num_tcs.pfc_tcs = 1;
+
+	/* Disable SR-IOV support */
+	e_dev_warn("Disabling SR-IOV support\n");
+	ixgbe_disable_sriov(adapter);
+
+	/* Disable RSS */
+	e_dev_warn("Disabling RSS support\n");
+	adapter->ring_feature[RING_F_RSS].limit = 1;
+
+	/* recalculate number of queues now that many features have been
+	 * changed or disabled.
+	 */
+	ixgbe_set_num_queues(adapter);
+	adapter->num_q_vectors = 1;
+
+	err = pci_enable_msi(adapter->pdev);
+	if (err)
+		e_dev_warn("Failed to allocate MSI interrupt, falling back to legacy. Error: %d\n",
+			   err);
+	else
+		adapter->flags |= IXGBE_FLAG_MSI_ENABLED;
+}
+
+/**
+ * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme
+ * @adapter: board private structure to initialize
+ *
+ * We determine which interrupt scheme to use based on...
+ * - Kernel support (MSI, MSI-X)
+ *   - which can be user-defined (via MODULE_PARAM)
+ * - Hardware queue count (num_*_queues)
+ *   - defined by miscellaneous hardware support/features (RSS, etc.)
+ **/
+int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
+{
+	int err;
+
+	/* Number of supported queues */
+	ixgbe_set_num_queues(adapter);
+
+	/* Set interrupt mode */
+	ixgbe_set_interrupt_capability(adapter);
+
+	err = ixgbe_alloc_q_vectors(adapter);
+	if (err) {
+		e_dev_err("Unable to allocate memory for queue vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	ixgbe_cache_ring_register(adapter);
+
+	e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
+		   (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
+		   adapter->num_rx_queues, adapter->num_tx_queues,
+		   adapter->num_xdp_queues);
+
+	set_bit(__IXGBE_DOWN, &adapter->state);
+
+	return 0;
+
+err_alloc_q_vectors:
+	ixgbe_reset_interrupt_capability(adapter);
+	return err;
+}
+
+/**
+ * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @adapter: board private structure to clear interrupt scheme on
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
+{
+	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
+	adapter->num_rx_queues = 0;
+
+	ixgbe_free_q_vectors(adapter);
+	ixgbe_reset_interrupt_capability(adapter);
+}
+
+void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
+		       u32 fceof_saidx, u32 type_tucmd, u32 mss_l4len_idx)
+{
+	struct ixgbe_adv_tx_context_desc *context_desc;
+	u16 i = tx_ring->next_to_use;
+
+	context_desc = IXGBE_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* set bits to identify this as an advanced context descriptor */
+	type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
+
+	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
+	context_desc->fceof_saidx	= cpu_to_le32(fceof_saidx);
+	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
+	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
+}
+
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
new file mode 100644
index 0000000000..dd03b017df
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -0,0 +1,11608 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/interrupt.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/pkt_sched.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/if_macvlan.h>
+#include <linux/if_bridge.h>
+#include <linux/prefetch.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/atomic.h>
+#include <linux/numa.h>
+#include <generated/utsrelease.h>
+#include <scsi/fc/fc_fcoe.h>
+#include <net/udp_tunnel.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/vxlan.h>
+#include <net/mpls.h>
+#include <net/netdev_queues.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xfrm.h>
+
+#include "ixgbe.h"
+#include "ixgbe_common.h"
+#include "ixgbe_dcb_82599.h"
+#include "ixgbe_phy.h"
+#include "ixgbe_sriov.h"
+#include "ixgbe_model.h"
+#include "ixgbe_txrx_common.h"
+
+char ixgbe_driver_name[] = "ixgbe";
+static const char ixgbe_driver_string[] =
+			      "Intel(R) 10 Gigabit PCI Express Network Driver";
+#ifdef IXGBE_FCOE
+char ixgbe_default_device_descr[] =
+			      "Intel(R) 10 Gigabit Network Connection";
+#else
+static char ixgbe_default_device_descr[] =
+			      "Intel(R) 10 Gigabit Network Connection";
+#endif
+static const char ixgbe_copyright[] =
+				"Copyright (c) 1999-2016 Intel Corporation.";
+
+static const char ixgbe_overheat_msg[] = "Network adapter has been stopped because it has over heated. Restart the computer. If the problem persists, power off the system and replace the adapter";
+
+static const struct ixgbe_info *ixgbe_info_tbl[] = {
+	[board_82598]		= &ixgbe_82598_info,
+	[board_82599]		= &ixgbe_82599_info,
+	[board_X540]		= &ixgbe_X540_info,
+	[board_X550]		= &ixgbe_X550_info,
+	[board_X550EM_x]	= &ixgbe_X550EM_x_info,
+	[board_x550em_x_fw]	= &ixgbe_x550em_x_fw_info,
+	[board_x550em_a]	= &ixgbe_x550em_a_info,
+	[board_x550em_a_fw]	= &ixgbe_x550em_a_fw_info,
+};
+
+/* ixgbe_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ixgbe_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX), board_82598 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T), board_X540 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP), board_82599 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T1), board_X540 },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T), board_X550},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T1), board_X550},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KX4), board_X550EM_x},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_XFI), board_X550EM_x},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KR), board_X550EM_x},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_10G_T), board_X550EM_x},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_SFP), board_X550EM_x},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_1G_T), board_x550em_x_fw},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_KR), board_x550em_a },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_KR_L), board_x550em_a },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII), board_x550em_a },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L), board_x550em_a },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_10G_T), board_x550em_a},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T), board_x550em_a_fw },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L), board_x550em_a_fw },
+	/* required last entry */
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, ixgbe_pci_tbl);
+
+#ifdef CONFIG_IXGBE_DCA
+static int ixgbe_notify_dca(struct notifier_block *, unsigned long event,
+			    void *p);
+static struct notifier_block dca_notifier = {
+	.notifier_call = ixgbe_notify_dca,
+	.next          = NULL,
+	.priority      = 0
+};
+#endif
+
+#ifdef CONFIG_PCI_IOV
+static unsigned int max_vfs;
+module_param(max_vfs, uint, 0);
+MODULE_PARM_DESC(max_vfs,
+		 "Maximum number of virtual functions to allocate per physical function - default is zero and maximum value is 63. (Deprecated)");
+#endif /* CONFIG_PCI_IOV */
+
+static bool allow_unsupported_sfp;
+module_param(allow_unsupported_sfp, bool, 0);
+MODULE_PARM_DESC(allow_unsupported_sfp,
+		 "Allow unsupported and untested SFP+ modules on 82599-based adapters");
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
+MODULE_LICENSE("GPL v2");
+
+DEFINE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key);
+EXPORT_SYMBOL(ixgbe_xdp_locking_key);
+
+static struct workqueue_struct *ixgbe_wq;
+
+static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev);
+static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *);
+
+static const struct net_device_ops ixgbe_netdev_ops;
+
+static bool netif_is_ixgbe(struct net_device *dev)
+{
+	return dev && (dev->netdev_ops == &ixgbe_netdev_ops);
+}
+
+static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
+					  u32 reg, u16 *value)
+{
+	struct pci_dev *parent_dev;
+	struct pci_bus *parent_bus;
+
+	parent_bus = adapter->pdev->bus->parent;
+	if (!parent_bus)
+		return -1;
+
+	parent_dev = parent_bus->self;
+	if (!parent_dev)
+		return -1;
+
+	if (!pci_is_pcie(parent_dev))
+		return -1;
+
+	pcie_capability_read_word(parent_dev, reg, value);
+	if (*value == IXGBE_FAILED_READ_CFG_WORD &&
+	    ixgbe_check_cfg_remove(&adapter->hw, parent_dev))
+		return -1;
+	return 0;
+}
+
+static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 link_status = 0;
+	int err;
+
+	hw->bus.type = ixgbe_bus_type_pci_express;
+
+	/* Get the negotiated link width and speed from PCI config space of the
+	 * parent, as this device is behind a switch
+	 */
+	err = ixgbe_read_pci_cfg_word_parent(adapter, 18, &link_status);
+
+	/* assume caller will handle error case */
+	if (err)
+		return err;
+
+	hw->bus.width = ixgbe_convert_bus_width(link_status);
+	hw->bus.speed = ixgbe_convert_bus_speed(link_status);
+
+	return 0;
+}
+
+/**
+ * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent
+ * @hw: hw specific details
+ *
+ * This function is used by probe to determine whether a device's PCI-Express
+ * bandwidth details should be gathered from the parent bus instead of from the
+ * device. Used to ensure that various locations all have the correct device ID
+ * checks.
+ */
+static inline bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw)
+{
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82599_SFP_SF_QP:
+	case IXGBE_DEV_ID_82599_QSFP_SF_QP:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void ixgbe_check_minimum_link(struct ixgbe_adapter *adapter,
+				     int expected_gts)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct pci_dev *pdev;
+
+	/* Some devices are not connected over PCIe and thus do not negotiate
+	 * speed. These devices do not have valid bus info, and thus any report
+	 * we generate may not be correct.
+	 */
+	if (hw->bus.type == ixgbe_bus_type_internal)
+		return;
+
+	/* determine whether to use the parent device */
+	if (ixgbe_pcie_from_parent(&adapter->hw))
+		pdev = adapter->pdev->bus->parent->self;
+	else
+		pdev = adapter->pdev;
+
+	pcie_print_link_status(pdev);
+}
+
+static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
+{
+	if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
+	    !test_bit(__IXGBE_REMOVING, &adapter->state) &&
+	    !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state))
+		queue_work(ixgbe_wq, &adapter->service_task);
+}
+
+static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+
+	if (!hw->hw_addr)
+		return;
+	hw->hw_addr = NULL;
+	e_dev_err("Adapter removed\n");
+	if (test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
+		ixgbe_service_event_schedule(adapter);
+}
+
+static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
+{
+	u8 __iomem *reg_addr;
+	u32 value;
+	int i;
+
+	reg_addr = READ_ONCE(hw->hw_addr);
+	if (ixgbe_removed(reg_addr))
+		return IXGBE_FAILED_READ_REG;
+
+	/* Register read of 0xFFFFFFF can indicate the adapter has been removed,
+	 * so perform several status register reads to determine if the adapter
+	 * has been removed.
+	 */
+	for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
+		value = readl(reg_addr + IXGBE_STATUS);
+		if (value != IXGBE_FAILED_READ_REG)
+			break;
+		mdelay(3);
+	}
+
+	if (value == IXGBE_FAILED_READ_REG)
+		ixgbe_remove_adapter(hw);
+	else
+		value = readl(reg_addr + reg);
+	return value;
+}
+
+/**
+ * ixgbe_read_reg - Read from device register
+ * @hw: hw specific details
+ * @reg: offset of register to read
+ *
+ * Returns : value read or IXGBE_FAILED_READ_REG if removed
+ *
+ * This function is used to read device registers. It checks for device
+ * removal by confirming any read that returns all ones by checking the
+ * status register value for all ones. This function avoids reading from
+ * the hardware if a removal was previously detected in which case it
+ * returns IXGBE_FAILED_READ_REG (all ones).
+ */
+u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
+{
+	u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
+	u32 value;
+
+	if (ixgbe_removed(reg_addr))
+		return IXGBE_FAILED_READ_REG;
+	if (unlikely(hw->phy.nw_mng_if_sel &
+		     IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) {
+		struct ixgbe_adapter *adapter;
+		int i;
+
+		for (i = 0; i < 200; ++i) {
+			value = readl(reg_addr + IXGBE_MAC_SGMII_BUSY);
+			if (likely(!value))
+				goto writes_completed;
+			if (value == IXGBE_FAILED_READ_REG) {
+				ixgbe_remove_adapter(hw);
+				return IXGBE_FAILED_READ_REG;
+			}
+			udelay(5);
+		}
+
+		adapter = hw->back;
+		e_warn(hw, "register writes incomplete %08x\n", value);
+	}
+
+writes_completed:
+	value = readl(reg_addr + reg);
+	if (unlikely(value == IXGBE_FAILED_READ_REG))
+		value = ixgbe_check_remove(hw, reg);
+	return value;
+}
+
+static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev)
+{
+	u16 value;
+
+	pci_read_config_word(pdev, PCI_VENDOR_ID, &value);
+	if (value == IXGBE_FAILED_READ_CFG_WORD) {
+		ixgbe_remove_adapter(hw);
+		return true;
+	}
+	return false;
+}
+
+u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+	u16 value;
+
+	if (ixgbe_removed(hw->hw_addr))
+		return IXGBE_FAILED_READ_CFG_WORD;
+	pci_read_config_word(adapter->pdev, reg, &value);
+	if (value == IXGBE_FAILED_READ_CFG_WORD &&
+	    ixgbe_check_cfg_remove(hw, adapter->pdev))
+		return IXGBE_FAILED_READ_CFG_WORD;
+	return value;
+}
+
+#ifdef CONFIG_PCI_IOV
+static u32 ixgbe_read_pci_cfg_dword(struct ixgbe_hw *hw, u32 reg)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+	u32 value;
+
+	if (ixgbe_removed(hw->hw_addr))
+		return IXGBE_FAILED_READ_CFG_DWORD;
+	pci_read_config_dword(adapter->pdev, reg, &value);
+	if (value == IXGBE_FAILED_READ_CFG_DWORD &&
+	    ixgbe_check_cfg_remove(hw, adapter->pdev))
+		return IXGBE_FAILED_READ_CFG_DWORD;
+	return value;
+}
+#endif /* CONFIG_PCI_IOV */
+
+void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+
+	if (ixgbe_removed(hw->hw_addr))
+		return;
+	pci_write_config_word(adapter->pdev, reg, value);
+}
+
+static void ixgbe_service_event_complete(struct ixgbe_adapter *adapter)
+{
+	BUG_ON(!test_bit(__IXGBE_SERVICE_SCHED, &adapter->state));
+
+	/* flush memory to make sure state is correct before next watchdog */
+	smp_mb__before_atomic();
+	clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
+}
+
+struct ixgbe_reg_info {
+	u32 ofs;
+	char *name;
+};
+
+static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
+
+	/* General Registers */
+	{IXGBE_CTRL, "CTRL"},
+	{IXGBE_STATUS, "STATUS"},
+	{IXGBE_CTRL_EXT, "CTRL_EXT"},
+
+	/* Interrupt Registers */
+	{IXGBE_EICR, "EICR"},
+
+	/* RX Registers */
+	{IXGBE_SRRCTL(0), "SRRCTL"},
+	{IXGBE_DCA_RXCTRL(0), "DRXCTL"},
+	{IXGBE_RDLEN(0), "RDLEN"},
+	{IXGBE_RDH(0), "RDH"},
+	{IXGBE_RDT(0), "RDT"},
+	{IXGBE_RXDCTL(0), "RXDCTL"},
+	{IXGBE_RDBAL(0), "RDBAL"},
+	{IXGBE_RDBAH(0), "RDBAH"},
+
+	/* TX Registers */
+	{IXGBE_TDBAL(0), "TDBAL"},
+	{IXGBE_TDBAH(0), "TDBAH"},
+	{IXGBE_TDLEN(0), "TDLEN"},
+	{IXGBE_TDH(0), "TDH"},
+	{IXGBE_TDT(0), "TDT"},
+	{IXGBE_TXDCTL(0), "TXDCTL"},
+
+	/* List Terminator */
+	{ .name = NULL }
+};
+
+
+/*
+ * ixgbe_regdump - register printout routine
+ */
+static void ixgbe_regdump(struct ixgbe_hw *hw, struct ixgbe_reg_info *reginfo)
+{
+	int i;
+	char rname[16];
+	u32 regs[64];
+
+	switch (reginfo->ofs) {
+	case IXGBE_SRRCTL(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
+		break;
+	case IXGBE_DCA_RXCTRL(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
+		break;
+	case IXGBE_RDLEN(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i));
+		break;
+	case IXGBE_RDH(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDH(i));
+		break;
+	case IXGBE_RDT(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDT(i));
+		break;
+	case IXGBE_RXDCTL(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
+		break;
+	case IXGBE_RDBAL(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i));
+		break;
+	case IXGBE_RDBAH(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i));
+		break;
+	case IXGBE_TDBAL(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i));
+		break;
+	case IXGBE_TDBAH(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i));
+		break;
+	case IXGBE_TDLEN(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i));
+		break;
+	case IXGBE_TDH(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDH(i));
+		break;
+	case IXGBE_TDT(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_TDT(i));
+		break;
+	case IXGBE_TXDCTL(0):
+		for (i = 0; i < 64; i++)
+			regs[i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
+		break;
+	default:
+		pr_info("%-15s %08x\n",
+			reginfo->name, IXGBE_READ_REG(hw, reginfo->ofs));
+		return;
+	}
+
+	i = 0;
+	while (i < 64) {
+		int j;
+		char buf[9 * 8 + 1];
+		char *p = buf;
+
+		snprintf(rname, 16, "%s[%d-%d]", reginfo->name, i, i + 7);
+		for (j = 0; j < 8; j++)
+			p += sprintf(p, " %08x", regs[i++]);
+		pr_err("%-15s%s\n", rname, buf);
+	}
+
+}
+
+static void ixgbe_print_buffer(struct ixgbe_ring *ring, int n)
+{
+	struct ixgbe_tx_buffer *tx_buffer;
+
+	tx_buffer = &ring->tx_buffer_info[ring->next_to_clean];
+	pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
+		n, ring->next_to_use, ring->next_to_clean,
+		(u64)dma_unmap_addr(tx_buffer, dma),
+		dma_unmap_len(tx_buffer, len),
+		tx_buffer->next_to_watch,
+		(u64)tx_buffer->time_stamp);
+}
+
+/*
+ * ixgbe_dump - Print registers, tx-rings and rx-rings
+ */
+static void ixgbe_dump(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_reg_info *reginfo;
+	int n = 0;
+	struct ixgbe_ring *ring;
+	struct ixgbe_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	struct my_u0 { u64 a; u64 b; } *u0;
+	struct ixgbe_ring *rx_ring;
+	union ixgbe_adv_rx_desc *rx_desc;
+	struct ixgbe_rx_buffer *rx_buffer_info;
+	int i = 0;
+
+	if (!netif_msg_hw(adapter))
+		return;
+
+	/* Print netdevice Info */
+	if (netdev) {
+		dev_info(&adapter->pdev->dev, "Net device Info\n");
+		pr_info("Device Name     state            "
+			"trans_start\n");
+		pr_info("%-15s %016lX %016lX\n",
+			netdev->name,
+			netdev->state,
+			dev_trans_start(netdev));
+	}
+
+	/* Print Registers */
+	dev_info(&adapter->pdev->dev, "Register Dump\n");
+	pr_info(" Register Name   Value\n");
+	for (reginfo = (struct ixgbe_reg_info *)ixgbe_reg_info_tbl;
+	     reginfo->name; reginfo++) {
+		ixgbe_regdump(hw, reginfo);
+	}
+
+	/* Print TX Ring Summary */
+	if (!netdev || !netif_running(netdev))
+		return;
+
+	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
+	pr_info(" %s     %s              %s        %s\n",
+		"Queue [NTU] [NTC] [bi(ntc)->dma  ]",
+		"leng", "ntw", "timestamp");
+	for (n = 0; n < adapter->num_tx_queues; n++) {
+		ring = adapter->tx_ring[n];
+		ixgbe_print_buffer(ring, n);
+	}
+
+	for (n = 0; n < adapter->num_xdp_queues; n++) {
+		ring = adapter->xdp_ring[n];
+		ixgbe_print_buffer(ring, n);
+	}
+
+	/* Print TX Rings */
+	if (!netif_msg_tx_done(adapter))
+		goto rx_ring_summary;
+
+	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
+
+	/* Transmit Descriptor Formats
+	 *
+	 * 82598 Advanced Transmit Descriptor
+	 *   +--------------------------------------------------------------+
+	 * 0 |         Buffer Address [63:0]                                |
+	 *   +--------------------------------------------------------------+
+	 * 8 |  PAYLEN  | POPTS  | IDX | STA | DCMD  |DTYP |  RSV |  DTALEN |
+	 *   +--------------------------------------------------------------+
+	 *   63       46 45    40 39 36 35 32 31   24 23 20 19              0
+	 *
+	 * 82598 Advanced Transmit Descriptor (Write-Back Format)
+	 *   +--------------------------------------------------------------+
+	 * 0 |                          RSV [63:0]                          |
+	 *   +--------------------------------------------------------------+
+	 * 8 |            RSV           |  STA  |          NXTSEQ           |
+	 *   +--------------------------------------------------------------+
+	 *   63                       36 35   32 31                         0
+	 *
+	 * 82599+ Advanced Transmit Descriptor
+	 *   +--------------------------------------------------------------+
+	 * 0 |         Buffer Address [63:0]                                |
+	 *   +--------------------------------------------------------------+
+	 * 8 |PAYLEN  |POPTS|CC|IDX  |STA  |DCMD  |DTYP |MAC  |RSV  |DTALEN |
+	 *   +--------------------------------------------------------------+
+	 *   63     46 45 40 39 38 36 35 32 31  24 23 20 19 18 17 16 15     0
+	 *
+	 * 82599+ Advanced Transmit Descriptor (Write-Back Format)
+	 *   +--------------------------------------------------------------+
+	 * 0 |                          RSV [63:0]                          |
+	 *   +--------------------------------------------------------------+
+	 * 8 |            RSV           |  STA  |           RSV             |
+	 *   +--------------------------------------------------------------+
+	 *   63                       36 35   32 31                         0
+	 */
+
+	for (n = 0; n < adapter->num_tx_queues; n++) {
+		ring = adapter->tx_ring[n];
+		pr_info("------------------------------------\n");
+		pr_info("TX QUEUE INDEX = %d\n", ring->queue_index);
+		pr_info("------------------------------------\n");
+		pr_info("%s%s    %s              %s        %s          %s\n",
+			"T [desc]     [address 63:0  ] ",
+			"[PlPOIdStDDt Ln] [bi->dma       ] ",
+			"leng", "ntw", "timestamp", "bi->skb");
+
+		for (i = 0; ring->desc && (i < ring->count); i++) {
+			tx_desc = IXGBE_TX_DESC(ring, i);
+			tx_buffer = &ring->tx_buffer_info[i];
+			u0 = (struct my_u0 *)tx_desc;
+			if (dma_unmap_len(tx_buffer, len) > 0) {
+				const char *ring_desc;
+
+				if (i == ring->next_to_use &&
+				    i == ring->next_to_clean)
+					ring_desc = " NTC/U";
+				else if (i == ring->next_to_use)
+					ring_desc = " NTU";
+				else if (i == ring->next_to_clean)
+					ring_desc = " NTC";
+				else
+					ring_desc = "";
+				pr_info("T [0x%03X]    %016llX %016llX %016llX %08X %p %016llX %p%s",
+					i,
+					le64_to_cpu((__force __le64)u0->a),
+					le64_to_cpu((__force __le64)u0->b),
+					(u64)dma_unmap_addr(tx_buffer, dma),
+					dma_unmap_len(tx_buffer, len),
+					tx_buffer->next_to_watch,
+					(u64)tx_buffer->time_stamp,
+					tx_buffer->skb,
+					ring_desc);
+
+				if (netif_msg_pktdata(adapter) &&
+				    tx_buffer->skb)
+					print_hex_dump(KERN_INFO, "",
+						DUMP_PREFIX_ADDRESS, 16, 1,
+						tx_buffer->skb->data,
+						dma_unmap_len(tx_buffer, len),
+						true);
+			}
+		}
+	}
+
+	/* Print RX Rings Summary */
+rx_ring_summary:
+	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
+	pr_info("Queue [NTU] [NTC]\n");
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		pr_info("%5d %5X %5X\n",
+			n, rx_ring->next_to_use, rx_ring->next_to_clean);
+	}
+
+	/* Print RX Rings */
+	if (!netif_msg_rx_status(adapter))
+		return;
+
+	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
+
+	/* Receive Descriptor Formats
+	 *
+	 * 82598 Advanced Receive Descriptor (Read) Format
+	 *    63                                           1        0
+	 *    +-----------------------------------------------------+
+	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
+	 *    +----------------------------------------------+------+
+	 *  8 |       Header Buffer Address [63:1]           |  DD  |
+	 *    +-----------------------------------------------------+
+	 *
+	 *
+	 * 82598 Advanced Receive Descriptor (Write-Back) Format
+	 *
+	 *   63       48 47    32 31  30      21 20 16 15   4 3     0
+	 *   +------------------------------------------------------+
+	 * 0 |       RSS Hash /  |SPH| HDR_LEN  | RSV |Packet|  RSS |
+	 *   | Packet   | IP     |   |          |     | Type | Type |
+	 *   | Checksum | Ident  |   |          |     |      |      |
+	 *   +------------------------------------------------------+
+	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
+	 *   +------------------------------------------------------+
+	 *   63       48 47    32 31            20 19               0
+	 *
+	 * 82599+ Advanced Receive Descriptor (Read) Format
+	 *    63                                           1        0
+	 *    +-----------------------------------------------------+
+	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
+	 *    +----------------------------------------------+------+
+	 *  8 |       Header Buffer Address [63:1]           |  DD  |
+	 *    +-----------------------------------------------------+
+	 *
+	 *
+	 * 82599+ Advanced Receive Descriptor (Write-Back) Format
+	 *
+	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
+	 *   +------------------------------------------------------+
+	 * 0 |RSS / Frag Checksum|SPH| HDR_LEN  |RSC- |Packet|  RSS |
+	 *   |/ RTT / PCoE_PARAM |   |          | CNT | Type | Type |
+	 *   |/ Flow Dir Flt ID  |   |          |     |      |      |
+	 *   +------------------------------------------------------+
+	 * 8 | VLAN Tag | Length |Extended Error| Xtnd Status/NEXTP |
+	 *   +------------------------------------------------------+
+	 *   63       48 47    32 31          20 19                 0
+	 */
+
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		pr_info("------------------------------------\n");
+		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
+		pr_info("------------------------------------\n");
+		pr_info("%s%s%s\n",
+			"R  [desc]      [ PktBuf     A0] ",
+			"[  HeadBuf   DD] [bi->dma       ] [bi->skb       ] ",
+			"<-- Adv Rx Read format");
+		pr_info("%s%s%s\n",
+			"RWB[desc]      [PcsmIpSHl PtRs] ",
+			"[vl er S cks ln] ---------------- [bi->skb       ] ",
+			"<-- Adv Rx Write-Back format");
+
+		for (i = 0; i < rx_ring->count; i++) {
+			const char *ring_desc;
+
+			if (i == rx_ring->next_to_use)
+				ring_desc = " NTU";
+			else if (i == rx_ring->next_to_clean)
+				ring_desc = " NTC";
+			else
+				ring_desc = "";
+
+			rx_buffer_info = &rx_ring->rx_buffer_info[i];
+			rx_desc = IXGBE_RX_DESC(rx_ring, i);
+			u0 = (struct my_u0 *)rx_desc;
+			if (rx_desc->wb.upper.length) {
+				/* Descriptor Done */
+				pr_info("RWB[0x%03X]     %016llX %016llX ---------------- %p%s\n",
+					i,
+					le64_to_cpu((__force __le64)u0->a),
+					le64_to_cpu((__force __le64)u0->b),
+					rx_buffer_info->skb,
+					ring_desc);
+			} else {
+				pr_info("R  [0x%03X]     %016llX %016llX %016llX %p%s\n",
+					i,
+					le64_to_cpu((__force __le64)u0->a),
+					le64_to_cpu((__force __le64)u0->b),
+					(u64)rx_buffer_info->dma,
+					rx_buffer_info->skb,
+					ring_desc);
+
+				if (netif_msg_pktdata(adapter) &&
+				    rx_buffer_info->dma) {
+					print_hex_dump(KERN_INFO, "",
+					   DUMP_PREFIX_ADDRESS, 16, 1,
+					   page_address(rx_buffer_info->page) +
+						    rx_buffer_info->page_offset,
+					   ixgbe_rx_bufsz(rx_ring), true);
+				}
+			}
+		}
+	}
+}
+
+static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter)
+{
+	u32 ctrl_ext;
+
+	/* Let firmware take over control of h/w */
+	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
+			ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD);
+}
+
+static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter)
+{
+	u32 ctrl_ext;
+
+	/* Let firmware know the driver has taken over */
+	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
+			ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD);
+}
+
+/**
+ * ixgbe_set_ivar - set the IVAR registers, mapping interrupt causes to vectors
+ * @adapter: pointer to adapter struct
+ * @direction: 0 for Rx, 1 for Tx, -1 for other causes
+ * @queue: queue to map the corresponding interrupt to
+ * @msix_vector: the vector to map to the corresponding queue
+ *
+ */
+static void ixgbe_set_ivar(struct ixgbe_adapter *adapter, s8 direction,
+			   u8 queue, u8 msix_vector)
+{
+	u32 ivar, index;
+	struct ixgbe_hw *hw = &adapter->hw;
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+		if (direction == -1)
+			direction = 0;
+		index = (((direction * 64) + queue) >> 2) & 0x1F;
+		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
+		ivar &= ~(0xFF << (8 * (queue & 0x3)));
+		ivar |= (msix_vector << (8 * (queue & 0x3)));
+		IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		if (direction == -1) {
+			/* other causes */
+			msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+			index = ((queue & 1) * 8);
+			ivar = IXGBE_READ_REG(&adapter->hw, IXGBE_IVAR_MISC);
+			ivar &= ~(0xFF << index);
+			ivar |= (msix_vector << index);
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR_MISC, ivar);
+			break;
+		} else {
+			/* tx or rx causes */
+			msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+			index = ((16 * (queue & 1)) + (8 * direction));
+			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(queue >> 1));
+			ivar &= ~(0xFF << index);
+			ivar |= (msix_vector << index);
+			IXGBE_WRITE_REG(hw, IXGBE_IVAR(queue >> 1), ivar);
+			break;
+		}
+	default:
+		break;
+	}
+}
+
+void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter,
+			    u64 qmask)
+{
+	u32 mask;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		mask = (qmask & 0xFFFFFFFF);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
+		mask = (qmask >> 32);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
+		break;
+	default:
+		break;
+	}
+}
+
+static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+	int i;
+	u32 data;
+
+	if ((hw->fc.current_mode != ixgbe_fc_full) &&
+	    (hw->fc.current_mode != ixgbe_fc_rx_pause))
+		return;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
+		break;
+	default:
+		data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
+	}
+	hwstats->lxoffrxc += data;
+
+	/* refill credits (no tx hang) if we received xoff */
+	if (!data)
+		return;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		clear_bit(__IXGBE_HANG_CHECK_ARMED,
+			  &adapter->tx_ring[i]->state);
+
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		clear_bit(__IXGBE_HANG_CHECK_ARMED,
+			  &adapter->xdp_ring[i]->state);
+}
+
+static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+	u32 xoff[8] = {0};
+	u8 tc;
+	int i;
+	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+
+	if (adapter->ixgbe_ieee_pfc)
+		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
+
+	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) {
+		ixgbe_update_xoff_rx_lfc(adapter);
+		return;
+	}
+
+	/* update stats for each tc, only valid with PFC enabled */
+	for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
+		u32 pxoffrxc;
+
+		switch (hw->mac.type) {
+		case ixgbe_mac_82598EB:
+			pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
+			break;
+		default:
+			pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
+		}
+		hwstats->pxoffrxc[i] += pxoffrxc;
+		/* Get the TC for given UP */
+		tc = netdev_get_prio_tc_map(adapter->netdev, i);
+		xoff[tc] += pxoffrxc;
+	}
+
+	/* disarm tx queues that have received xoff frames */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
+
+		tc = tx_ring->dcb_tc;
+		if (xoff[tc])
+			clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i];
+
+		tc = xdp_ring->dcb_tc;
+		if (xoff[tc])
+			clear_bit(__IXGBE_HANG_CHECK_ARMED, &xdp_ring->state);
+	}
+}
+
+static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring)
+{
+	return ring->stats.packets;
+}
+
+static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring)
+{
+	unsigned int head, tail;
+
+	head = ring->next_to_clean;
+	tail = ring->next_to_use;
+
+	return ((head <= tail) ? tail : tail + ring->count) - head;
+}
+
+static inline bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring)
+{
+	u32 tx_done = ixgbe_get_tx_completed(tx_ring);
+	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
+	u32 tx_pending = ixgbe_get_tx_pending(tx_ring);
+
+	clear_check_for_tx_hang(tx_ring);
+
+	/*
+	 * Check for a hung queue, but be thorough. This verifies
+	 * that a transmit has been completed since the previous
+	 * check AND there is at least one packet pending. The
+	 * ARMED bit is set to indicate a potential hang. The
+	 * bit is cleared if a pause frame is received to remove
+	 * false hang detection due to PFC or 802.3x frames. By
+	 * requiring this to fail twice we avoid races with
+	 * pfc clearing the ARMED bit and conditions where we
+	 * run the check_tx_hang logic with a transmit completion
+	 * pending but without time to complete it yet.
+	 */
+	if (tx_done_old == tx_done && tx_pending)
+		/* make sure it is true for two checks in a row */
+		return test_and_set_bit(__IXGBE_HANG_CHECK_ARMED,
+					&tx_ring->state);
+	/* update completed stats and continue */
+	tx_ring->tx_stats.tx_done_old = tx_done;
+	/* reset the countdown */
+	clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
+
+	return false;
+}
+
+/**
+ * ixgbe_tx_timeout_reset - initiate reset due to Tx timeout
+ * @adapter: driver private struct
+ **/
+static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter)
+{
+
+	/* Do the reset outside of interrupt context */
+	if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
+		set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
+		e_warn(drv, "initiating reset due to tx timeout\n");
+		ixgbe_service_event_schedule(adapter);
+	}
+}
+
+/**
+ * ixgbe_tx_maxrate - callback to set the maximum per-queue bitrate
+ * @netdev: network interface device structure
+ * @queue_index: Tx queue to set
+ * @maxrate: desired maximum transmit bitrate
+ **/
+static int ixgbe_tx_maxrate(struct net_device *netdev,
+			    int queue_index, u32 maxrate)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 bcnrc_val = ixgbe_link_mbps(adapter);
+
+	if (!maxrate)
+		return 0;
+
+	/* Calculate the rate factor values to set */
+	bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT;
+	bcnrc_val /= maxrate;
+
+	/* clear everything but the rate factor */
+	bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK |
+	IXGBE_RTTBCNRC_RF_DEC_MASK;
+
+	/* enable the rate scheduler */
+	bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA;
+
+	IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, queue_index);
+	IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val);
+
+	return 0;
+}
+
+/**
+ * ixgbe_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: structure containing interrupt and ring information
+ * @tx_ring: tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ **/
+static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
+			       struct ixgbe_ring *tx_ring, int napi_budget)
+{
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	struct ixgbe_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	unsigned int i = tx_ring->next_to_clean;
+	struct netdev_queue *txq;
+
+	if (test_bit(__IXGBE_DOWN, &adapter->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IXGBE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+		if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
+			total_ipsec++;
+
+		/* free the skb */
+		if (ring_is_xdp(tx_ring))
+			xdp_return_frame(tx_buffer->xdpf);
+		else
+			napi_consume_skb(tx_buffer->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+	adapter->tx_ipsec += total_ipsec;
+
+	if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
+		/* schedule immediate reset if we believe we hung */
+		struct ixgbe_hw *hw = &adapter->hw;
+		e_err(drv, "Detected Tx Unit Hang %s\n"
+			"  Tx Queue             <%d>\n"
+			"  TDH, TDT             <%x>, <%x>\n"
+			"  next_to_use          <%x>\n"
+			"  next_to_clean        <%x>\n"
+			"tx_buffer_info[next_to_clean]\n"
+			"  time_stamp           <%lx>\n"
+			"  jiffies              <%lx>\n",
+			ring_is_xdp(tx_ring) ? "(XDP)" : "",
+			tx_ring->queue_index,
+			IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
+			IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
+			tx_ring->next_to_use, i,
+			tx_ring->tx_buffer_info[i].time_stamp, jiffies);
+
+		if (!ring_is_xdp(tx_ring))
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+		e_info(probe,
+		       "tx hang %d detected on queue %d, resetting adapter\n",
+			adapter->tx_timeout_count + 1, tx_ring->queue_index);
+
+		/* schedule immediate reset if we believe we hung */
+		ixgbe_tx_timeout_reset(adapter);
+
+		/* the adapter is about to reset, no point in enabling stuff */
+		return true;
+	}
+
+	if (ring_is_xdp(tx_ring))
+		return !!budget;
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
+	if (!__netif_txq_completed_wake(txq, total_packets, total_bytes,
+					ixgbe_desc_unused(tx_ring),
+					TX_WAKE_THRESHOLD,
+					!netif_carrier_ok(tx_ring->netdev) ||
+					test_bit(__IXGBE_DOWN, &adapter->state)))
+		++tx_ring->tx_stats.restart_queue;
+
+	return !!budget;
+}
+
+#ifdef CONFIG_IXGBE_DCA
+static void ixgbe_update_tx_dca(struct ixgbe_adapter *adapter,
+				struct ixgbe_ring *tx_ring,
+				int cpu)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 txctrl = 0;
+	u16 reg_offset;
+
+	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
+		txctrl = dca3_get_tag(tx_ring->dev, cpu);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		reg_offset = IXGBE_DCA_TXCTRL(tx_ring->reg_idx);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		reg_offset = IXGBE_DCA_TXCTRL_82599(tx_ring->reg_idx);
+		txctrl <<= IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599;
+		break;
+	default:
+		/* for unknown hardware do not write register */
+		return;
+	}
+
+	/*
+	 * We can enable relaxed ordering for reads, but not writes when
+	 * DCA is enabled.  This is due to a known issue in some chipsets
+	 * which will cause the DCA tag to be cleared.
+	 */
+	txctrl |= IXGBE_DCA_TXCTRL_DESC_RRO_EN |
+		  IXGBE_DCA_TXCTRL_DATA_RRO_EN |
+		  IXGBE_DCA_TXCTRL_DESC_DCA_EN;
+
+	IXGBE_WRITE_REG(hw, reg_offset, txctrl);
+}
+
+static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter,
+				struct ixgbe_ring *rx_ring,
+				int cpu)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 rxctrl = 0;
+	u8 reg_idx = rx_ring->reg_idx;
+
+	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
+		rxctrl = dca3_get_tag(rx_ring->dev, cpu);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		rxctrl <<= IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599;
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * We can enable relaxed ordering for reads, but not writes when
+	 * DCA is enabled.  This is due to a known issue in some chipsets
+	 * which will cause the DCA tag to be cleared.
+	 */
+	rxctrl |= IXGBE_DCA_RXCTRL_DESC_RRO_EN |
+		  IXGBE_DCA_RXCTRL_DATA_DCA_EN |
+		  IXGBE_DCA_RXCTRL_DESC_DCA_EN;
+
+	IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(reg_idx), rxctrl);
+}
+
+static void ixgbe_update_dca(struct ixgbe_q_vector *q_vector)
+{
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	struct ixgbe_ring *ring;
+	int cpu = get_cpu();
+
+	if (q_vector->cpu == cpu)
+		goto out_no_update;
+
+	ixgbe_for_each_ring(ring, q_vector->tx)
+		ixgbe_update_tx_dca(adapter, ring, cpu);
+
+	ixgbe_for_each_ring(ring, q_vector->rx)
+		ixgbe_update_rx_dca(adapter, ring, cpu);
+
+	q_vector->cpu = cpu;
+out_no_update:
+	put_cpu();
+}
+
+static void ixgbe_setup_dca(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	/* always use CB2 mode, difference is masked in the CB driver */
+	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
+				IXGBE_DCA_CTRL_DCA_MODE_CB2);
+	else
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
+				IXGBE_DCA_CTRL_DCA_DISABLE);
+
+	for (i = 0; i < adapter->num_q_vectors; i++) {
+		adapter->q_vector[i]->cpu = -1;
+		ixgbe_update_dca(adapter->q_vector[i]);
+	}
+}
+
+static int __ixgbe_notify_dca(struct device *dev, void *data)
+{
+	struct ixgbe_adapter *adapter = dev_get_drvdata(dev);
+	unsigned long event = *(unsigned long *)data;
+
+	if (!(adapter->flags & IXGBE_FLAG_DCA_CAPABLE))
+		return 0;
+
+	switch (event) {
+	case DCA_PROVIDER_ADD:
+		/* if we're already enabled, don't do it again */
+		if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
+			break;
+		if (dca_add_requester(dev) == 0) {
+			adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
+					IXGBE_DCA_CTRL_DCA_MODE_CB2);
+			break;
+		}
+		fallthrough; /* DCA is disabled. */
+	case DCA_PROVIDER_REMOVE:
+		if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
+			dca_remove_requester(dev);
+			adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED;
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
+					IXGBE_DCA_CTRL_DCA_DISABLE);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+#endif /* CONFIG_IXGBE_DCA */
+
+#define IXGBE_RSS_L4_TYPES_MASK \
+	((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \
+	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \
+	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \
+	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP))
+
+static inline void ixgbe_rx_hash(struct ixgbe_ring *ring,
+				 union ixgbe_adv_rx_desc *rx_desc,
+				 struct sk_buff *skb)
+{
+	u16 rss_type;
+
+	if (!(ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
+		   IXGBE_RXDADV_RSSTYPE_MASK;
+
+	if (!rss_type)
+		return;
+
+	skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+		     (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
+		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+}
+
+#ifdef IXGBE_FCOE
+/**
+ * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type
+ * @ring: structure containing ring specific data
+ * @rx_desc: advanced rx descriptor
+ *
+ * Returns : true if it is FCoE pkt
+ */
+static inline bool ixgbe_rx_is_fcoe(struct ixgbe_ring *ring,
+				    union ixgbe_adv_rx_desc *rx_desc)
+{
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+
+	return test_bit(__IXGBE_RX_FCOE, &ring->state) &&
+	       ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_ETQF_MASK)) ==
+		(cpu_to_le16(IXGBE_ETQF_FILTER_FCOE <<
+			     IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT)));
+}
+
+#endif /* IXGBE_FCOE */
+/**
+ * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @ring: structure containing ring specific data
+ * @rx_desc: current Rx descriptor being processed
+ * @skb: skb currently being received and modified
+ **/
+static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring,
+				     union ixgbe_adv_rx_desc *rx_desc,
+				     struct sk_buff *skb)
+{
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+	bool encap_pkt = false;
+
+	skb_checksum_none_assert(skb);
+
+	/* Rx csum disabled */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* check for VXLAN and Geneve packets */
+	if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) {
+		encap_pkt = true;
+		skb->encapsulation = 1;
+	}
+
+	/* if IP and error */
+	if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) &&
+	    ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) {
+		ring->rx_stats.csum_err++;
+		return;
+	}
+
+	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS))
+		return;
+
+	if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) {
+		/*
+		 * 82599 errata, UDP frames with a 0 checksum can be marked as
+		 * checksum errors.
+		 */
+		if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_UDP)) &&
+		    test_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state))
+			return;
+
+		ring->rx_stats.csum_err++;
+		return;
+	}
+
+	/* It must be a TCP or UDP packet with a valid checksum */
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	if (encap_pkt) {
+		if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_OUTERIPCS))
+			return;
+
+		if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_OUTERIPER)) {
+			skb->ip_summed = CHECKSUM_NONE;
+			return;
+		}
+		/* If we checked the outer header let the stack know */
+		skb->csum_level = 1;
+	}
+}
+
+static unsigned int ixgbe_rx_offset(struct ixgbe_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IXGBE_SKB_PAD : 0;
+}
+
+static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
+				    struct ixgbe_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(ixgbe_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_rx_page_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 ixgbe_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE,
+				 IXGBE_RX_DMA_ATTR);
+
+	/*
+	 * if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, ixgbe_rx_pg_order(rx_ring));
+
+		rx_ring->rx_stats.alloc_rx_page_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = rx_ring->rx_offset;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
+	rx_ring->rx_stats.alloc_rx_page++;
+
+	return true;
+}
+
+/**
+ * ixgbe_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
+{
+	union ixgbe_adv_rx_desc *rx_desc;
+	struct ixgbe_rx_buffer *bi;
+	u16 i = rx_ring->next_to_use;
+	u16 bufsz;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = IXGBE_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	bufsz = ixgbe_rx_bufsz(rx_ring);
+
+	do {
+		if (!ixgbe_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset, bufsz,
+						 DMA_FROM_DEVICE);
+
+		/*
+		 * Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IXGBE_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+}
+
+static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring,
+				   struct sk_buff *skb)
+{
+	u16 hdr_len = skb_headlen(skb);
+
+	/* set gso_size to avoid messing up TCP MSS */
+	skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len),
+						 IXGBE_CB(skb)->append_cnt);
+	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+}
+
+static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring,
+				   struct sk_buff *skb)
+{
+	/* if append_cnt is 0 then frame is not RSC */
+	if (!IXGBE_CB(skb)->append_cnt)
+		return;
+
+	rx_ring->rx_stats.rsc_count += IXGBE_CB(skb)->append_cnt;
+	rx_ring->rx_stats.rsc_flush++;
+
+	ixgbe_set_rsc_gso_size(rx_ring, skb);
+
+	/* gso_size is computed using append_cnt so always clear it last */
+	IXGBE_CB(skb)->append_cnt = 0;
+}
+
+/**
+ * ixgbe_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, timestamp, protocol, and
+ * other fields within the skb.
+ **/
+void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
+			      union ixgbe_adv_rx_desc *rx_desc,
+			      struct sk_buff *skb)
+{
+	struct net_device *dev = rx_ring->netdev;
+	u32 flags = rx_ring->q_vector->adapter->flags;
+
+	ixgbe_update_rsc_stats(rx_ring, skb);
+
+	ixgbe_rx_hash(rx_ring, rx_desc, skb);
+
+	ixgbe_rx_checksum(rx_ring, rx_desc, skb);
+
+	if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED))
+		ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
+
+	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
+		u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+
+	if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
+		ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
+
+	/* record Rx queue, or update MACVLAN statistics */
+	if (netif_is_ixgbe(dev))
+		skb_record_rx_queue(skb, rx_ring->queue_index);
+	else
+		macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
+				 false);
+
+	skb->protocol = eth_type_trans(skb, dev);
+}
+
+void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
+		  struct sk_buff *skb)
+{
+	napi_gro_receive(&q_vector->napi, skb);
+}
+
+/**
+ * ixgbe_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring,
+			     union ixgbe_adv_rx_desc *rx_desc,
+			     struct sk_buff *skb)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IXGBE_RX_DESC(rx_ring, ntc));
+
+	/* update RSC append count if present */
+	if (ring_is_rsc_enabled(rx_ring)) {
+		__le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data &
+				     cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK);
+
+		if (unlikely(rsc_enabled)) {
+			u32 rsc_cnt = le32_to_cpu(rsc_enabled);
+
+			rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT;
+			IXGBE_CB(skb)->append_cnt += rsc_cnt - 1;
+
+			/* update ntc based on RSC value */
+			ntc = le32_to_cpu(rx_desc->wb.upper.status_error);
+			ntc &= IXGBE_RXDADV_NEXTP_MASK;
+			ntc >>= IXGBE_RXDADV_NEXTP_SHIFT;
+		}
+	}
+
+	/* if we are the last buffer then there is nothing else to do */
+	if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
+		return false;
+
+	/* place skb in next buffer to be received */
+	rx_ring->rx_buffer_info[ntc].skb = skb;
+	rx_ring->rx_stats.non_eop_descs++;
+
+	return true;
+}
+
+/**
+ * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an ixgbe specific version of __pskb_pull_tail.  The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
+			    struct sk_buff *skb)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+	unsigned char *va;
+	unsigned int pull_len;
+
+	/*
+	 * it is valid to use page_address instead of kmap since we are
+	 * working with pages allocated out of the lomem pool per
+	 * alloc_page(GFP_ATOMIC)
+	 */
+	va = skb_frag_address(frag);
+
+	/*
+	 * we need the header to contain the greater of either ETH_HLEN or
+	 * 60 bytes if the skb->len is less than 60 for skb_pad.
+	 */
+	pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+	/* update all of the pointers */
+	skb_frag_size_sub(frag, pull_len);
+	skb_frag_off_add(frag, pull_len);
+	skb->data_len -= pull_len;
+	skb->tail += pull_len;
+}
+
+/**
+ * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being updated
+ *
+ * This function provides a basic DMA sync up for the first fragment of an
+ * skb.  The reason for doing this is that the first fragment cannot be
+ * unmapped until we have reached the end of packet descriptor for a buffer
+ * chain.
+ */
+static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
+				struct sk_buff *skb)
+{
+	if (ring_uses_build_skb(rx_ring)) {
+		unsigned long mask = (unsigned long)ixgbe_rx_pg_size(rx_ring) - 1;
+		unsigned long offset = (unsigned long)(skb->data) & mask;
+
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      IXGBE_CB(skb)->dma,
+					      offset,
+					      skb_headlen(skb),
+					      DMA_FROM_DEVICE);
+	} else {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      IXGBE_CB(skb)->dma,
+					      skb_frag_off(frag),
+					      skb_frag_size(frag),
+					      DMA_FROM_DEVICE);
+	}
+
+	/* If the page was released, just unmap it. */
+	if (unlikely(IXGBE_CB(skb)->page_released)) {
+		dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
+				     ixgbe_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IXGBE_RX_DMA_ATTR);
+	}
+}
+
+/**
+ * ixgbe_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check if the skb is valid in the XDP case it will be an error pointer.
+ * Return true in this case to abort processing and advance to next
+ * descriptor.
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right.  These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
+			   union ixgbe_adv_rx_desc *rx_desc,
+			   struct sk_buff *skb)
+{
+	struct net_device *netdev = rx_ring->netdev;
+
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
+	/* Verify netdev is present, and that packet does not have any
+	 * errors that would be unacceptable to the netdev.
+	 */
+	if (!netdev ||
+	    (unlikely(ixgbe_test_staterr(rx_desc,
+					 IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
+	     !(netdev->features & NETIF_F_RXALL)))) {
+		dev_kfree_skb_any(skb);
+		return true;
+	}
+
+	/* place header in linear portion of buffer */
+	if (!skb_headlen(skb))
+		ixgbe_pull_tail(rx_ring, skb);
+
+#ifdef IXGBE_FCOE
+	/* do not attempt to pad FCoE Frames as this will disrupt DDP */
+	if (ixgbe_rx_is_fcoe(rx_ring, rx_desc))
+		return false;
+
+#endif
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring,
+				struct ixgbe_rx_buffer *old_buff)
+{
+	struct ixgbe_rx_buffer *new_buff;
+	u16 nta = rx_ring->next_to_alloc;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* Transfer page from old buffer to new buffer.
+	 * Move each member individually to avoid possible store
+	 * forwarding stalls and unnecessary copy of skb.
+	 */
+	new_buff->dma		= old_buff->dma;
+	new_buff->page		= old_buff->page;
+	new_buff->page_offset	= old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+}
+
+static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer,
+				    int rx_buffer_pgcnt)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* avoid re-using remote and pfmemalloc pages */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
+		return false;
+#else
+	/* The last offset is a bit aggressive in that we assume the
+	 * worst case of FCoE being enabled and using a 3K buffer.
+	 * However this should have minimal impact as the 1K extra is
+	 * still less than one buffer in size.
+	 */
+#define IXGBE_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBE_RXBUFFER_3K)
+	if (rx_buffer->page_offset > IXGBE_LAST_OFFSET)
+		return false;
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: size of data in rx_buffer
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
+			      struct ixgbe_rx_buffer *rx_buffer,
+			      struct sk_buff *skb,
+			      unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = rx_ring->rx_offset ?
+				SKB_DATA_ALIGN(rx_ring->rx_offset + size) :
+				SKB_DATA_ALIGN(size);
+#endif
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring,
+						   union ixgbe_adv_rx_desc *rx_desc,
+						   struct sk_buff **skb,
+						   const unsigned int size,
+						   int *rx_buffer_pgcnt)
+{
+	struct ixgbe_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	*rx_buffer_pgcnt =
+#if (PAGE_SIZE < 8192)
+		page_count(rx_buffer->page);
+#else
+		0;
+#endif
+	prefetchw(rx_buffer->page);
+	*skb = rx_buffer->skb;
+
+	/* Delay unmapping of the first packet. It carries the header
+	 * information, HW may still access the header after the writeback.
+	 * Only unmap it when EOP is reached
+	 */
+	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) {
+		if (!*skb)
+			goto skip_sync;
+	} else {
+		if (*skb)
+			ixgbe_dma_sync_frag(rx_ring, *skb);
+	}
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+skip_sync:
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
+				struct ixgbe_rx_buffer *rx_buffer,
+				struct sk_buff *skb,
+				int rx_buffer_pgcnt)
+{
+	if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
+		/* hand second half of page back to the ring */
+		ixgbe_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
+			/* the page has been released from the ring */
+			IXGBE_CB(skb)->page_released = true;
+		} else {
+			/* we are not reusing the buffer so unmap it */
+			dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+					     ixgbe_rx_pg_size(rx_ring),
+					     DMA_FROM_DEVICE,
+					     IXGBE_RX_DMA_ATTR);
+		}
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+	rx_buffer->skb = NULL;
+}
+
+static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
+					   struct ixgbe_rx_buffer *rx_buffer,
+					   struct xdp_buff *xdp,
+					   union ixgbe_adv_rx_desc *rx_desc)
+{
+	unsigned int size = xdp->data_end - xdp->data;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data);
+
+	/* Note, we get here by enabling legacy-rx via:
+	 *
+	 *    ethtool --set-priv-flags <dev> legacy-rx on
+	 *
+	 * In this mode, we currently get 0 extra XDP headroom as
+	 * opposed to having legacy-rx off, where we process XDP
+	 * packets going to stack via ixgbe_build_skb(). The latter
+	 * provides us currently with 192 bytes of headroom.
+	 *
+	 * For ixgbe_construct_skb() mode it means that the
+	 * xdp->data_meta will always point to xdp->data, since
+	 * the helper cannot expand the head. Should this ever
+	 * change in future for legacy-rx mode on, then lets also
+	 * add xdp->data_meta handling here.
+	 */
+
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
+	if (unlikely(!skb))
+		return NULL;
+
+	if (size > IXGBE_RX_HDR_SIZE) {
+		if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
+			IXGBE_CB(skb)->dma = rx_buffer->dma;
+
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				xdp->data - page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
+	} else {
+		memcpy(__skb_put(skb, size),
+		       xdp->data, ALIGN(size, sizeof(long)));
+		rx_buffer->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
+				       struct ixgbe_rx_buffer *rx_buffer,
+				       struct xdp_buff *xdp,
+				       union ixgbe_adv_rx_desc *rx_desc)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	struct sk_buff *skb;
+
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points extactly as xdp->data, otherwise we
+	 * likely have a consumer accessing first few bytes of meta
+	 * data, and then actual data.
+	 */
+	net_prefetch(xdp->data_meta);
+
+	/* build an skb to around the page buffer */
+	skb = napi_build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	/* record DMA address if this is the start of a chain of buffers */
+	if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
+		IXGBE_CB(skb)->dma = rx_buffer->dma;
+
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
+
+static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
+				     struct ixgbe_ring *rx_ring,
+				     struct xdp_buff *xdp)
+{
+	int err, result = IXGBE_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct ixgbe_ring *ring;
+	struct xdp_frame *xdpf;
+	u32 act;
+
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdpf = xdp_convert_buff_to_frame(xdp);
+		if (unlikely(!xdpf))
+			goto out_failure;
+		ring = ixgbe_determine_xdp_ring(adapter);
+		if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+			spin_lock(&ring->tx_lock);
+		result = ixgbe_xmit_xdp_ring(ring, xdpf);
+		if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+			spin_unlock(&ring->tx_lock);
+		if (result == IXGBE_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	case XDP_REDIRECT:
+		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+		if (err)
+			goto out_failure;
+		result = IXGBE_XDP_REDIR;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough; /* handle aborts by dropping packet */
+	case XDP_DROP:
+		result = IXGBE_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	return ERR_PTR(-result);
+}
+
+static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
+					    unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = ixgbe_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = rx_ring->rx_offset ?
+		SKB_DATA_ALIGN(rx_ring->rx_offset + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
+static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring,
+				 struct ixgbe_rx_buffer *rx_buffer,
+				 unsigned int size)
+{
+	unsigned int truesize = ixgbe_rx_frame_truesize(rx_ring, size);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+/**
+ * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @q_vector: structure containing interrupt and ring information
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing.  The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the syste.
+ *
+ * Returns amount of work completed
+ **/
+static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
+			       struct ixgbe_ring *rx_ring,
+			       const int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+#ifdef IXGBE_FCOE
+	int ddp_bytes;
+	unsigned int mss = 0;
+#endif /* IXGBE_FCOE */
+	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+	unsigned int offset = rx_ring->rx_offset;
+	unsigned int xdp_xmit = 0;
+	struct xdp_buff xdp;
+
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0);
+#endif
+	xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
+
+	while (likely(total_rx_packets < budget)) {
+		union ixgbe_adv_rx_desc *rx_desc;
+		struct ixgbe_rx_buffer *rx_buffer;
+		struct sk_buff *skb;
+		int rx_buffer_pgcnt;
+		unsigned int size;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
+			ixgbe_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size, &rx_buffer_pgcnt);
+
+		/* retrieve a buffer from the ring */
+		if (!skb) {
+			unsigned char *hard_start;
+
+			hard_start = page_address(rx_buffer->page) +
+				     rx_buffer->page_offset - offset;
+			xdp_prepare_buff(&xdp, hard_start, offset, size, true);
+			xdp_buff_clear_frags_flag(&xdp);
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
+#endif
+			skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			unsigned int xdp_res = -PTR_ERR(skb);
+
+			if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
+				xdp_xmit |= xdp_res;
+				ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
+			} else {
+				rx_buffer->pagecnt_bias++;
+			}
+			total_rx_packets++;
+			total_rx_bytes += size;
+		} else if (skb) {
+			ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		} else if (ring_uses_build_skb(rx_ring)) {
+			skb = ixgbe_build_skb(rx_ring, rx_buffer,
+					      &xdp, rx_desc);
+		} else {
+			skb = ixgbe_construct_skb(rx_ring, rx_buffer,
+						  &xdp, rx_desc);
+		}
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt);
+		cleaned_count++;
+
+		/* place incomplete frames back on ring for completion */
+		if (ixgbe_is_non_eop(rx_ring, rx_desc, skb))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
+			continue;
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* populate checksum, timestamp, VLAN, and protocol */
+		ixgbe_process_skb_fields(rx_ring, rx_desc, skb);
+
+#ifdef IXGBE_FCOE
+		/* if ddp, not passing to ULD unless for FCP_RSP or error */
+		if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) {
+			ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb);
+			/* include DDPed FCoE data */
+			if (ddp_bytes > 0) {
+				if (!mss) {
+					mss = rx_ring->netdev->mtu -
+						sizeof(struct fcoe_hdr) -
+						sizeof(struct fc_frame_header) -
+						sizeof(struct fcoe_crc_eof);
+					if (mss > 512)
+						mss &= ~511;
+				}
+				total_rx_bytes += ddp_bytes;
+				total_rx_packets += DIV_ROUND_UP(ddp_bytes,
+								 mss);
+			}
+			if (!ddp_bytes) {
+				dev_kfree_skb_any(skb);
+				continue;
+			}
+		}
+
+#endif /* IXGBE_FCOE */
+		ixgbe_rx_skb(q_vector, skb);
+
+		/* update budget accounting */
+		total_rx_packets++;
+	}
+
+	if (xdp_xmit & IXGBE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_xmit & IXGBE_XDP_TX) {
+		struct ixgbe_ring *ring = ixgbe_determine_xdp_ring(adapter);
+
+		ixgbe_xdp_ring_update_tail_locked(ring);
+	}
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	q_vector->rx.total_packets += total_rx_packets;
+	q_vector->rx.total_bytes += total_rx_bytes;
+
+	return total_rx_packets;
+}
+
+/**
+ * ixgbe_configure_msix - Configure MSI-X hardware
+ * @adapter: board private structure
+ *
+ * ixgbe_configure_msix sets up the hardware to properly generate MSI-X
+ * interrupts.
+ **/
+static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_q_vector *q_vector;
+	int v_idx;
+	u32 mask;
+
+	/* Populate MSIX to EITR Select */
+	if (adapter->num_vfs > 32) {
+		u32 eitrsel = BIT(adapter->num_vfs - 32) - 1;
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, eitrsel);
+	}
+
+	/*
+	 * Populate the IVAR table and set the ITR values to the
+	 * corresponding register.
+	 */
+	for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
+		struct ixgbe_ring *ring;
+		q_vector = adapter->q_vector[v_idx];
+
+		ixgbe_for_each_ring(ring, q_vector->rx)
+			ixgbe_set_ivar(adapter, 0, ring->reg_idx, v_idx);
+
+		ixgbe_for_each_ring(ring, q_vector->tx)
+			ixgbe_set_ivar(adapter, 1, ring->reg_idx, v_idx);
+
+		ixgbe_write_eitr(q_vector);
+	}
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		ixgbe_set_ivar(adapter, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
+			       v_idx);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		ixgbe_set_ivar(adapter, -1, 1, v_idx);
+		break;
+	default:
+		break;
+	}
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), 1950);
+
+	/* set up to autoclear timer, and the vectors */
+	mask = IXGBE_EIMS_ENABLE_MASK;
+	mask &= ~(IXGBE_EIMS_OTHER |
+		  IXGBE_EIMS_MAILBOX |
+		  IXGBE_EIMS_LSC);
+
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask);
+}
+
+/**
+ * ixgbe_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @ring_container: structure containing ring performance data
+ *
+ *      Stores a new ITR value based on packets and byte
+ *      counts during the last interrupt.  The advantage of per interrupt
+ *      computation is faster updates and more accurate ITR for the current
+ *      traffic pattern.  Constants in this function were computed
+ *      based on theoretical maximum wire speed and thresholds were set based
+ *      on testing data as well as attempting to minimize response time
+ *      while increasing bulk throughput.
+ **/
+static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
+			     struct ixgbe_ring_container *ring_container)
+{
+	unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
+			   IXGBE_ITR_ADAPTIVE_LATENCY;
+	unsigned int avg_wire_size, packets, bytes;
+	unsigned long next_update = jiffies;
+
+	/* If we don't have any rings just leave ourselves set for maximum
+	 * possible latency so we take ourselves out of the equation.
+	 */
+	if (!ring_container->ring)
+		return;
+
+	/* If we didn't update within up to 1 - 2 jiffies we can assume
+	 * that either packets are coming in so slow there hasn't been
+	 * any work, or that there is so much work that NAPI is dealing
+	 * with interrupt moderation and we don't need to do anything.
+	 */
+	if (time_after(next_update, ring_container->next_update))
+		goto clear_counts;
+
+	packets = ring_container->total_packets;
+
+	/* We have no packets to actually measure against. This means
+	 * either one of the other queues on this vector is active or
+	 * we are a Tx queue doing TSO with too high of an interrupt rate.
+	 *
+	 * When this occurs just tick up our delay by the minimum value
+	 * and hope that this extra delay will prevent us from being called
+	 * without any work on our queue.
+	 */
+	if (!packets) {
+		itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+		if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+			itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+		itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
+		goto clear_counts;
+	}
+
+	bytes = ring_container->total_bytes;
+
+	/* If packets are less than 4 or bytes are less than 9000 assume
+	 * insufficient data to use bulk rate limiting approach. We are
+	 * likely latency driven.
+	 */
+	if (packets < 4 && bytes < 9000) {
+		itr = IXGBE_ITR_ADAPTIVE_LATENCY;
+		goto adjust_by_size;
+	}
+
+	/* Between 4 and 48 we can assume that our current interrupt delay
+	 * is only slightly too low. As such we should increase it by a small
+	 * fixed amount.
+	 */
+	if (packets < 48) {
+		itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+		if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+			itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+		goto clear_counts;
+	}
+
+	/* Between 48 and 96 is our "goldilocks" zone where we are working
+	 * out "just right". Just report that our current ITR is good for us.
+	 */
+	if (packets < 96) {
+		itr = q_vector->itr >> 2;
+		goto clear_counts;
+	}
+
+	/* If packet count is 96 or greater we are likely looking at a slight
+	 * overrun of the delay we want. Try halving our delay to see if that
+	 * will cut the number of packets in half per interrupt.
+	 */
+	if (packets < 256) {
+		itr = q_vector->itr >> 3;
+		if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
+			itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
+		goto clear_counts;
+	}
+
+	/* The paths below assume we are dealing with a bulk ITR since number
+	 * of packets is 256 or greater. We are just going to have to compute
+	 * a value and try to bring the count under control, though for smaller
+	 * packet sizes there isn't much we can do as NAPI polling will likely
+	 * be kicking in sooner rather than later.
+	 */
+	itr = IXGBE_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+	/* If packet counts are 256 or greater we can assume we have a gross
+	 * overestimation of what the rate should be. Instead of trying to fine
+	 * tune it just use the formula below to try and dial in an exact value
+	 * give the current packet size of the frame.
+	 */
+	avg_wire_size = bytes / packets;
+
+	/* The following is a crude approximation of:
+	 *  wmem_default / (size + overhead) = desired_pkts_per_int
+	 *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+	 *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+	 *
+	 * Assuming wmem_default is 212992 and overhead is 640 bytes per
+	 * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+	 * formula down to
+	 *
+	 *  (170 * (size + 24)) / (size + 640) = ITR
+	 *
+	 * We first do some math on the packet size and then finally bitshift
+	 * by 8 after rounding up. We also have to account for PCIe link speed
+	 * difference as ITR scales based on this.
+	 */
+	if (avg_wire_size <= 60) {
+		/* Start at 50k ints/sec */
+		avg_wire_size = 5120;
+	} else if (avg_wire_size <= 316) {
+		/* 50K ints/sec to 16K ints/sec */
+		avg_wire_size *= 40;
+		avg_wire_size += 2720;
+	} else if (avg_wire_size <= 1084) {
+		/* 16K ints/sec to 9.2K ints/sec */
+		avg_wire_size *= 15;
+		avg_wire_size += 11452;
+	} else if (avg_wire_size < 1968) {
+		/* 9.2K ints/sec to 8K ints/sec */
+		avg_wire_size *= 5;
+		avg_wire_size += 22420;
+	} else {
+		/* plateau at a limit of 8K ints/sec */
+		avg_wire_size = 32256;
+	}
+
+	/* If we are in low latency mode half our delay which doubles the rate
+	 * to somewhere between 100K to 16K ints/sec
+	 */
+	if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
+		avg_wire_size >>= 1;
+
+	/* Resultant value is 256 times larger than it needs to be. This
+	 * gives us room to adjust the value as needed to either increase
+	 * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+	 *
+	 * Use addition as we have already recorded the new latency flag
+	 * for the ITR value.
+	 */
+	switch (q_vector->adapter->link_speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+	case IXGBE_LINK_SPEED_100_FULL:
+	default:
+		itr += DIV_ROUND_UP(avg_wire_size,
+				    IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
+		       IXGBE_ITR_ADAPTIVE_MIN_INC;
+		break;
+	case IXGBE_LINK_SPEED_2_5GB_FULL:
+	case IXGBE_LINK_SPEED_1GB_FULL:
+	case IXGBE_LINK_SPEED_10_FULL:
+		if (avg_wire_size > 8064)
+			avg_wire_size = 8064;
+		itr += DIV_ROUND_UP(avg_wire_size,
+				    IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
+		       IXGBE_ITR_ADAPTIVE_MIN_INC;
+		break;
+	}
+
+clear_counts:
+	/* write back value */
+	ring_container->itr = itr;
+
+	/* next update should occur within next jiffy */
+	ring_container->next_update = next_update + 1;
+
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+}
+
+/**
+ * ixgbe_write_eitr - write EITR register in hardware specific way
+ * @q_vector: structure containing interrupt and ring information
+ *
+ * This function is made to be called by ethtool and by the driver
+ * when it needs to update EITR registers at runtime.  Hardware
+ * specific quirks/differences are taken care of here.
+ */
+void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
+{
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int v_idx = q_vector->v_idx;
+	u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		/* must write high and low 16 bits to reset counter */
+		itr_reg |= (itr_reg << 16);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		/*
+		 * set the WDIS bit to not clear the timer bits and cause an
+		 * immediate assertion of the interrupt
+		 */
+		itr_reg |= IXGBE_EITR_CNT_WDIS;
+		break;
+	default:
+		break;
+	}
+	IXGBE_WRITE_REG(hw, IXGBE_EITR(v_idx), itr_reg);
+}
+
+static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
+{
+	u32 new_itr;
+
+	ixgbe_update_itr(q_vector, &q_vector->tx);
+	ixgbe_update_itr(q_vector, &q_vector->rx);
+
+	/* use the smallest value of new ITR delay calculations */
+	new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
+
+	/* Clear latency flag if set, shift into correct position */
+	new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+	new_itr <<= 2;
+
+	if (new_itr != q_vector->itr) {
+		/* save the algorithm value here */
+		q_vector->itr = new_itr;
+
+		ixgbe_write_eitr(q_vector);
+	}
+}
+
+/**
+ * ixgbe_check_overtemp_subtask - check for over temperature
+ * @adapter: pointer to adapter
+ **/
+static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 eicr = adapter->interrupt_event;
+	s32 rc;
+
+	if (test_bit(__IXGBE_DOWN, &adapter->state))
+		return;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_EVENT))
+		return;
+
+	adapter->flags2 &= ~IXGBE_FLAG2_TEMP_SENSOR_EVENT;
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_82599_T3_LOM:
+		/*
+		 * Since the warning interrupt is for both ports
+		 * we don't have to check if:
+		 *  - This interrupt wasn't for our port.
+		 *  - We may have missed the interrupt so always have to
+		 *    check if we  got a LSC
+		 */
+		if (!(eicr & IXGBE_EICR_GPI_SDP0_8259X) &&
+		    !(eicr & IXGBE_EICR_LSC))
+			return;
+
+		if (!(eicr & IXGBE_EICR_LSC) && hw->mac.ops.check_link) {
+			u32 speed;
+			bool link_up = false;
+
+			hw->mac.ops.check_link(hw, &speed, &link_up, false);
+
+			if (link_up)
+				return;
+		}
+
+		/* Check if this is not due to overtemp */
+		if (hw->phy.ops.check_overtemp(hw) != IXGBE_ERR_OVERTEMP)
+			return;
+
+		break;
+	case IXGBE_DEV_ID_X550EM_A_1G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+		rc = hw->phy.ops.check_overtemp(hw);
+		if (rc != IXGBE_ERR_OVERTEMP)
+			return;
+		break;
+	default:
+		if (adapter->hw.mac.type >= ixgbe_mac_X540)
+			return;
+		if (!(eicr & IXGBE_EICR_GPI_SDP0(hw)))
+			return;
+		break;
+	}
+	e_crit(drv, "%s\n", ixgbe_overheat_msg);
+
+	adapter->interrupt_event = 0;
+}
+
+static void ixgbe_check_fan_failure(struct ixgbe_adapter *adapter, u32 eicr)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if ((adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) &&
+	    (eicr & IXGBE_EICR_GPI_SDP1(hw))) {
+		e_crit(probe, "Fan has stopped, replace the adapter\n");
+		/* write to clear the interrupt */
+		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1(hw));
+	}
+}
+
+static void ixgbe_check_overtemp_event(struct ixgbe_adapter *adapter, u32 eicr)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE))
+		return;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+		/*
+		 * Need to check link state so complete overtemp check
+		 * on service task
+		 */
+		if (((eicr & IXGBE_EICR_GPI_SDP0(hw)) ||
+		     (eicr & IXGBE_EICR_LSC)) &&
+		    (!test_bit(__IXGBE_DOWN, &adapter->state))) {
+			adapter->interrupt_event = eicr;
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT;
+			ixgbe_service_event_schedule(adapter);
+			return;
+		}
+		return;
+	case ixgbe_mac_x550em_a:
+		if (eicr & IXGBE_EICR_GPI_SDP0_X550EM_a) {
+			adapter->interrupt_event = eicr;
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT;
+			ixgbe_service_event_schedule(adapter);
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
+					IXGBE_EICR_GPI_SDP0_X550EM_a);
+			IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICR,
+					IXGBE_EICR_GPI_SDP0_X550EM_a);
+		}
+		return;
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X540:
+		if (!(eicr & IXGBE_EICR_TS))
+			return;
+		break;
+	default:
+		return;
+	}
+
+	e_crit(drv, "%s\n", ixgbe_overheat_msg);
+}
+
+static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw)
+{
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		if (hw->phy.type == ixgbe_phy_nl)
+			return true;
+		return false;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		switch (hw->mac.ops.get_media_type(hw)) {
+		case ixgbe_media_type_fiber:
+		case ixgbe_media_type_fiber_qsfp:
+			return true;
+		default:
+			return false;
+		}
+	default:
+		return false;
+	}
+}
+
+static void ixgbe_check_sfp_event(struct ixgbe_adapter *adapter, u32 eicr)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 eicr_mask = IXGBE_EICR_GPI_SDP2(hw);
+
+	if (!ixgbe_is_sfp(hw))
+		return;
+
+	/* Later MAC's use different SDP */
+	if (hw->mac.type >= ixgbe_mac_X540)
+		eicr_mask = IXGBE_EICR_GPI_SDP0_X540;
+
+	if (eicr & eicr_mask) {
+		/* Clear the interrupt */
+		IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask);
+		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
+			adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
+			adapter->sfp_poll_time = 0;
+			ixgbe_service_event_schedule(adapter);
+		}
+	}
+
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB &&
+	    (eicr & IXGBE_EICR_GPI_SDP1(hw))) {
+		/* Clear the interrupt */
+		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1(hw));
+		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
+			adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG;
+			ixgbe_service_event_schedule(adapter);
+		}
+	}
+}
+
+static void ixgbe_check_lsc(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	adapter->lsc_int++;
+	adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
+	adapter->link_check_timeout = jiffies;
+	if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
+		IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC);
+		IXGBE_WRITE_FLUSH(hw);
+		ixgbe_service_event_schedule(adapter);
+	}
+}
+
+static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter,
+					   u64 qmask)
+{
+	u32 mask;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
+		IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		mask = (qmask & 0xFFFFFFFF);
+		if (mask)
+			IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
+		mask = (qmask >> 32);
+		if (mask)
+			IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
+		break;
+	default:
+		break;
+	}
+	/* skip the flush */
+}
+
+/**
+ * ixgbe_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ * @queues: enable irqs for queues
+ * @flush: flush register write
+ **/
+static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues,
+				    bool flush)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
+
+	/* don't reenable LSC while waiting for link */
+	if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)
+		mask &= ~IXGBE_EIMS_LSC;
+
+	if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE)
+		switch (adapter->hw.mac.type) {
+		case ixgbe_mac_82599EB:
+			mask |= IXGBE_EIMS_GPI_SDP0(hw);
+			break;
+		case ixgbe_mac_X540:
+		case ixgbe_mac_X550:
+		case ixgbe_mac_X550EM_x:
+		case ixgbe_mac_x550em_a:
+			mask |= IXGBE_EIMS_TS;
+			break;
+		default:
+			break;
+		}
+	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE)
+		mask |= IXGBE_EIMS_GPI_SDP1(hw);
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+		mask |= IXGBE_EIMS_GPI_SDP1(hw);
+		mask |= IXGBE_EIMS_GPI_SDP2(hw);
+		fallthrough;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		if (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
+		    adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
+		    adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP_N)
+			mask |= IXGBE_EIMS_GPI_SDP0(&adapter->hw);
+		if (adapter->hw.phy.type == ixgbe_phy_x550em_ext_t)
+			mask |= IXGBE_EICR_GPI_SDP0_X540;
+		mask |= IXGBE_EIMS_ECC;
+		mask |= IXGBE_EIMS_MAILBOX;
+		break;
+	default:
+		break;
+	}
+
+	if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) &&
+	    !(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT))
+		mask |= IXGBE_EIMS_FLOW_DIR;
+
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
+	if (queues)
+		ixgbe_irq_enable_queues(adapter, ~0);
+	if (flush)
+		IXGBE_WRITE_FLUSH(&adapter->hw);
+}
+
+static irqreturn_t ixgbe_msix_other(int irq, void *data)
+{
+	struct ixgbe_adapter *adapter = data;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 eicr;
+
+	/*
+	 * Workaround for Silicon errata.  Use clear-by-write instead
+	 * of clear-by-read.  Reading with EICS will return the
+	 * interrupt causes without clearing, which later be done
+	 * with the write to EICR.
+	 */
+	eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
+
+	/* The lower 16bits of the EICR register are for the queue interrupts
+	 * which should be masked here in order to not accidentally clear them if
+	 * the bits are high when ixgbe_msix_other is called. There is a race
+	 * condition otherwise which results in possible performance loss
+	 * especially if the ixgbe_msix_other interrupt is triggering
+	 * consistently (as it would when PPS is turned on for the X540 device)
+	 */
+	eicr &= 0xFFFF0000;
+
+	IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr);
+
+	if (eicr & IXGBE_EICR_LSC)
+		ixgbe_check_lsc(adapter);
+
+	if (eicr & IXGBE_EICR_MAILBOX)
+		ixgbe_msg_task(adapter);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		if (hw->phy.type == ixgbe_phy_x550em_ext_t &&
+		    (eicr & IXGBE_EICR_GPI_SDP0_X540)) {
+			adapter->flags2 |= IXGBE_FLAG2_PHY_INTERRUPT;
+			ixgbe_service_event_schedule(adapter);
+			IXGBE_WRITE_REG(hw, IXGBE_EICR,
+					IXGBE_EICR_GPI_SDP0_X540);
+		}
+		if (eicr & IXGBE_EICR_ECC) {
+			e_info(link, "Received ECC Err, initiating reset\n");
+			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
+			ixgbe_service_event_schedule(adapter);
+			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
+		}
+		/* Handle Flow Director Full threshold interrupt */
+		if (eicr & IXGBE_EICR_FLOW_DIR) {
+			int reinit_count = 0;
+			int i;
+			for (i = 0; i < adapter->num_tx_queues; i++) {
+				struct ixgbe_ring *ring = adapter->tx_ring[i];
+				if (test_and_clear_bit(__IXGBE_TX_FDIR_INIT_DONE,
+						       &ring->state))
+					reinit_count++;
+			}
+			if (reinit_count) {
+				/* no more flow director interrupts until after init */
+				IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_FLOW_DIR);
+				adapter->flags2 |= IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
+				ixgbe_service_event_schedule(adapter);
+			}
+		}
+		ixgbe_check_sfp_event(adapter, eicr);
+		ixgbe_check_overtemp_event(adapter, eicr);
+		break;
+	default:
+		break;
+	}
+
+	ixgbe_check_fan_failure(adapter, eicr);
+
+	if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
+		ixgbe_ptp_check_pps_event(adapter);
+
+	/* re-enable the original interrupt state, no lsc, no queues */
+	if (!test_bit(__IXGBE_DOWN, &adapter->state))
+		ixgbe_irq_enable(adapter, false, false);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data)
+{
+	struct ixgbe_q_vector *q_vector = data;
+
+	/* EIAM disabled interrupts (on this vector) for us */
+
+	if (q_vector->rx.ring || q_vector->tx.ring)
+		napi_schedule_irqoff(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ixgbe_poll - NAPI Rx polling callback
+ * @napi: structure for representing this polling device
+ * @budget: how many packets driver is allowed to clean
+ *
+ * This function is used for legacy and MSI, NAPI mode
+ **/
+int ixgbe_poll(struct napi_struct *napi, int budget)
+{
+	struct ixgbe_q_vector *q_vector =
+				container_of(napi, struct ixgbe_q_vector, napi);
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	struct ixgbe_ring *ring;
+	int per_ring_budget, work_done = 0;
+	bool clean_complete = true;
+
+#ifdef CONFIG_IXGBE_DCA
+	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
+		ixgbe_update_dca(q_vector);
+#endif
+
+	ixgbe_for_each_ring(ring, q_vector->tx) {
+		bool wd = ring->xsk_pool ?
+			  ixgbe_clean_xdp_tx_irq(q_vector, ring, budget) :
+			  ixgbe_clean_tx_irq(q_vector, ring, budget);
+
+		if (!wd)
+			clean_complete = false;
+	}
+
+	/* Exit if we are called by netpoll */
+	if (budget <= 0)
+		return budget;
+
+	/* attempt to distribute budget to each queue fairly, but don't allow
+	 * the budget to go below 1 because we'll exit polling */
+	if (q_vector->rx.count > 1)
+		per_ring_budget = max(budget/q_vector->rx.count, 1);
+	else
+		per_ring_budget = budget;
+
+	ixgbe_for_each_ring(ring, q_vector->rx) {
+		int cleaned = ring->xsk_pool ?
+			      ixgbe_clean_rx_irq_zc(q_vector, ring,
+						    per_ring_budget) :
+			      ixgbe_clean_rx_irq(q_vector, ring,
+						 per_ring_budget);
+
+		work_done += cleaned;
+		if (cleaned >= per_ring_budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* all work done, exit the polling mode */
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (adapter->rx_itr_setting & 1)
+			ixgbe_set_itr(q_vector);
+		if (!test_bit(__IXGBE_DOWN, &adapter->state))
+			ixgbe_irq_enable_queues(adapter,
+						BIT_ULL(q_vector->v_idx));
+	}
+
+	return min(work_done, budget - 1);
+}
+
+/**
+ * ixgbe_request_msix_irqs - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * ixgbe_request_msix_irqs allocates MSI-X vectors and requests
+ * interrupts from the kernel.
+ **/
+static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	unsigned int ri = 0, ti = 0;
+	int vector, err;
+
+	for (vector = 0; vector < adapter->num_q_vectors; vector++) {
+		struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
+		struct msix_entry *entry = &adapter->msix_entries[vector];
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-TxRx-%u", netdev->name, ri++);
+			ti++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-rx-%u", netdev->name, ri++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-tx-%u", netdev->name, ti++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = request_irq(entry->vector, &ixgbe_msix_clean_rings, 0,
+				  q_vector->name, q_vector);
+		if (err) {
+			e_err(probe, "request_irq failed for MSIX interrupt "
+			      "Error: %d\n", err);
+			goto free_queue_irqs;
+		}
+		/* If Flow Director is enabled, set interrupt affinity */
+		if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+			/* assign the mask for this irq */
+			irq_update_affinity_hint(entry->vector,
+						 &q_vector->affinity_mask);
+		}
+	}
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  ixgbe_msix_other, 0, netdev->name, adapter);
+	if (err) {
+		e_err(probe, "request_irq for msix_other failed: %d\n", err);
+		goto free_queue_irqs;
+	}
+
+	return 0;
+
+free_queue_irqs:
+	while (vector) {
+		vector--;
+		irq_update_affinity_hint(adapter->msix_entries[vector].vector,
+					 NULL);
+		free_irq(adapter->msix_entries[vector].vector,
+			 adapter->q_vector[vector]);
+	}
+	adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED;
+	pci_disable_msix(adapter->pdev);
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+	return err;
+}
+
+/**
+ * ixgbe_intr - legacy mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t ixgbe_intr(int irq, void *data)
+{
+	struct ixgbe_adapter *adapter = data;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_q_vector *q_vector = adapter->q_vector[0];
+	u32 eicr;
+
+	/*
+	 * Workaround for silicon errata #26 on 82598.  Mask the interrupt
+	 * before the read of EICR.
+	 */
+	IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
+
+	/* for NAPI, using EIAM to auto-mask tx/rx interrupt bits on read
+	 * therefore no explicit interrupt disable is necessary */
+	eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
+	if (!eicr) {
+		/*
+		 * shared interrupt alert!
+		 * make sure interrupts are enabled because the read will
+		 * have disabled interrupts due to EIAM
+		 * finish the workaround of silicon errata on 82598.  Unmask
+		 * the interrupt that we masked before the EICR read.
+		 */
+		if (!test_bit(__IXGBE_DOWN, &adapter->state))
+			ixgbe_irq_enable(adapter, true, true);
+		return IRQ_NONE;	/* Not our interrupt */
+	}
+
+	if (eicr & IXGBE_EICR_LSC)
+		ixgbe_check_lsc(adapter);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+		ixgbe_check_sfp_event(adapter, eicr);
+		fallthrough;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		if (eicr & IXGBE_EICR_ECC) {
+			e_info(link, "Received ECC Err, initiating reset\n");
+			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
+			ixgbe_service_event_schedule(adapter);
+			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
+		}
+		ixgbe_check_overtemp_event(adapter, eicr);
+		break;
+	default:
+		break;
+	}
+
+	ixgbe_check_fan_failure(adapter, eicr);
+	if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
+		ixgbe_ptp_check_pps_event(adapter);
+
+	/* would disable interrupts here but EIAM disabled it */
+	napi_schedule_irqoff(&q_vector->napi);
+
+	/*
+	 * re-enable link(maybe) and non-queue interrupts, no flush.
+	 * ixgbe_poll will re-enable the queue interrupts
+	 */
+	if (!test_bit(__IXGBE_DOWN, &adapter->state))
+		ixgbe_irq_enable(adapter, false, false);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ixgbe_request_irq - initialize interrupts
+ * @adapter: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int ixgbe_request_irq(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
+		err = ixgbe_request_msix_irqs(adapter);
+	else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED)
+		err = request_irq(adapter->pdev->irq, ixgbe_intr, 0,
+				  netdev->name, adapter);
+	else
+		err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED,
+				  netdev->name, adapter);
+
+	if (err)
+		e_err(probe, "request_irq failed, Error %d\n", err);
+
+	return err;
+}
+
+static void ixgbe_free_irq(struct ixgbe_adapter *adapter)
+{
+	int vector;
+
+	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
+		free_irq(adapter->pdev->irq, adapter);
+		return;
+	}
+
+	if (!adapter->msix_entries)
+		return;
+
+	for (vector = 0; vector < adapter->num_q_vectors; vector++) {
+		struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
+		struct msix_entry *entry = &adapter->msix_entries[vector];
+
+		/* free only the irqs that were actually requested */
+		if (!q_vector->rx.ring && !q_vector->tx.ring)
+			continue;
+
+		/* clear the affinity_mask in the IRQ descriptor */
+		irq_update_affinity_hint(entry->vector, NULL);
+
+		free_irq(entry->vector, q_vector);
+	}
+
+	free_irq(adapter->msix_entries[vector].vector, adapter);
+}
+
+/**
+ * ixgbe_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter)
+{
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82598EB:
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
+		break;
+	default:
+		break;
+	}
+	IXGBE_WRITE_FLUSH(&adapter->hw);
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
+		int vector;
+
+		for (vector = 0; vector < adapter->num_q_vectors; vector++)
+			synchronize_irq(adapter->msix_entries[vector].vector);
+
+		synchronize_irq(adapter->msix_entries[vector++].vector);
+	} else {
+		synchronize_irq(adapter->pdev->irq);
+	}
+}
+
+/**
+ * ixgbe_configure_msi_and_legacy - Initialize PIN (INTA...) and MSI interrupts
+ * @adapter: board private structure
+ *
+ **/
+static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_q_vector *q_vector = adapter->q_vector[0];
+
+	ixgbe_write_eitr(q_vector);
+
+	ixgbe_set_ivar(adapter, 0, 0, 0);
+	ixgbe_set_ivar(adapter, 1, 0, 0);
+
+	e_info(hw, "Legacy interrupt IVAR setup done\n");
+}
+
+/**
+ * ixgbe_configure_tx_ring - Configure 8259x Tx ring after Reset
+ * @adapter: board private structure
+ * @ring: structure containing ring specific data
+ *
+ * Configure the Tx descriptor ring after a reset.
+ **/
+void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter,
+			     struct ixgbe_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 tdba = ring->dma;
+	int wait_loop = 10;
+	u32 txdctl = IXGBE_TXDCTL_ENABLE;
+	u8 reg_idx = ring->reg_idx;
+
+	ring->xsk_pool = NULL;
+	if (ring_is_xdp(ring))
+		ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
+
+	/* disable queue to avoid issues while updating state */
+	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), 0);
+	IXGBE_WRITE_FLUSH(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_TDBAL(reg_idx),
+			(tdba & DMA_BIT_MASK(32)));
+	IXGBE_WRITE_REG(hw, IXGBE_TDBAH(reg_idx), (tdba >> 32));
+	IXGBE_WRITE_REG(hw, IXGBE_TDLEN(reg_idx),
+			ring->count * sizeof(union ixgbe_adv_tx_desc));
+	IXGBE_WRITE_REG(hw, IXGBE_TDH(reg_idx), 0);
+	IXGBE_WRITE_REG(hw, IXGBE_TDT(reg_idx), 0);
+	ring->tail = adapter->io_addr + IXGBE_TDT(reg_idx);
+
+	/*
+	 * set WTHRESH to encourage burst writeback, it should not be set
+	 * higher than 1 when:
+	 * - ITR is 0 as it could cause false TX hangs
+	 * - ITR is set to > 100k int/sec and BQL is enabled
+	 *
+	 * In order to avoid issues WTHRESH + PTHRESH should always be equal
+	 * to or less than the number of on chip descriptors, which is
+	 * currently 40.
+	 */
+	if (!ring->q_vector || (ring->q_vector->itr < IXGBE_100K_ITR))
+		txdctl |= 1u << 16;	/* WTHRESH = 1 */
+	else
+		txdctl |= 8u << 16;	/* WTHRESH = 8 */
+
+	/*
+	 * Setting PTHRESH to 32 both improves performance
+	 * and avoids a TX hang with DFP enabled
+	 */
+	txdctl |= (1u << 8) |	/* HTHRESH = 1 */
+		   32;		/* PTHRESH = 32 */
+
+	/* reinitialize flowdirector state */
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+		ring->atr_sample_rate = adapter->atr_sample_rate;
+		ring->atr_count = 0;
+		set_bit(__IXGBE_TX_FDIR_INIT_DONE, &ring->state);
+	} else {
+		ring->atr_sample_rate = 0;
+	}
+
+	/* initialize XPS */
+	if (!test_and_set_bit(__IXGBE_TX_XPS_INIT_DONE, &ring->state)) {
+		struct ixgbe_q_vector *q_vector = ring->q_vector;
+
+		if (q_vector)
+			netif_set_xps_queue(ring->netdev,
+					    &q_vector->affinity_mask,
+					    ring->queue_index);
+	}
+
+	clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state);
+
+	/* reinitialize tx_buffer_info */
+	memset(ring->tx_buffer_info, 0,
+	       sizeof(struct ixgbe_tx_buffer) * ring->count);
+
+	/* enable queue */
+	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), txdctl);
+
+	/* TXDCTL.EN will return 0 on 82598 if link is down, so skip it */
+	if (hw->mac.type == ixgbe_mac_82598EB &&
+	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+		return;
+
+	/* poll to verify queue is enabled */
+	do {
+		usleep_range(1000, 2000);
+		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
+	} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
+	if (!wait_loop)
+		hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
+}
+
+static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 rttdcs, mtqc;
+	u8 tcs = adapter->hw_tcs;
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	/* disable the arbiter while setting MTQC */
+	rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
+	rttdcs |= IXGBE_RTTDCS_ARBDIS;
+	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
+
+	/* set transmit pool layout */
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		mtqc = IXGBE_MTQC_VT_ENA;
+		if (tcs > 4)
+			mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
+		else if (tcs > 1)
+			mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
+		else if (adapter->ring_feature[RING_F_VMDQ].mask ==
+			 IXGBE_82599_VMDQ_4Q_MASK)
+			mtqc |= IXGBE_MTQC_32VF;
+		else
+			mtqc |= IXGBE_MTQC_64VF;
+	} else {
+		if (tcs > 4) {
+			mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
+		} else if (tcs > 1) {
+			mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
+		} else {
+			u8 max_txq = adapter->num_tx_queues +
+				adapter->num_xdp_queues;
+			if (max_txq > 63)
+				mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
+			else
+				mtqc = IXGBE_MTQC_64Q_1PB;
+		}
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc);
+
+	/* Enable Security TX Buffer IFG for multiple pb */
+	if (tcs) {
+		u32 sectx = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+		sectx |= IXGBE_SECTX_DCB;
+		IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, sectx);
+	}
+
+	/* re-enable the arbiter */
+	rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
+	IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
+}
+
+/**
+ * ixgbe_configure_tx - Configure 8259x Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void ixgbe_configure_tx(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 dmatxctl;
+	u32 i;
+
+	ixgbe_setup_mtqc(adapter);
+
+	if (hw->mac.type != ixgbe_mac_82598EB) {
+		/* DMATXCTL.EN must be before Tx queues are enabled */
+		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
+		dmatxctl |= IXGBE_DMATXCTL_TE;
+		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
+	}
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		ixgbe_configure_tx_ring(adapter, adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbe_configure_tx_ring(adapter, adapter->xdp_ring[i]);
+}
+
+static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter,
+				 struct ixgbe_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 reg_idx = ring->reg_idx;
+	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(reg_idx));
+
+	srrctl |= IXGBE_SRRCTL_DROP_EN;
+
+	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
+}
+
+static void ixgbe_disable_rx_drop(struct ixgbe_adapter *adapter,
+				  struct ixgbe_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 reg_idx = ring->reg_idx;
+	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(reg_idx));
+
+	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
+
+	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
+}
+
+#ifdef CONFIG_IXGBE_DCB
+void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
+#else
+static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
+#endif
+{
+	int i;
+	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+
+	if (adapter->ixgbe_ieee_pfc)
+		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
+
+	/*
+	 * We should set the drop enable bit if:
+	 *  SR-IOV is enabled
+	 *   or
+	 *  Number of Rx queues > 1 and flow control is disabled
+	 *
+	 *  This allows us to avoid head of line blocking for security
+	 *  and performance reasons.
+	 */
+	if (adapter->num_vfs || (adapter->num_rx_queues > 1 &&
+	    !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en)) {
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			ixgbe_enable_rx_drop(adapter, adapter->rx_ring[i]);
+	} else {
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			ixgbe_disable_rx_drop(adapter, adapter->rx_ring[i]);
+	}
+}
+
+#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
+
+static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
+				   struct ixgbe_ring *rx_ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 srrctl;
+	u8 reg_idx = rx_ring->reg_idx;
+
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		u16 mask = adapter->ring_feature[RING_F_RSS].mask;
+
+		/*
+		 * if VMDq is not active we must program one srrctl register
+		 * per RSS queue since we have enabled RDRXCTL.MVMEN
+		 */
+		reg_idx &= mask;
+	}
+
+	/* configure header buffer length, needed for RSC */
+	srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
+
+	/* configure the packet buffer length */
+	if (rx_ring->xsk_pool) {
+		u32 xsk_buf_len = xsk_pool_get_rx_frame_size(rx_ring->xsk_pool);
+
+		/* If the MAC support setting RXDCTL.RLPML, the
+		 * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
+		 * RXDCTL.RLPML is set to the actual UMEM buffer
+		 * size. If not, then we are stuck with a 1k buffer
+		 * size resolution. In this case frames larger than
+		 * the UMEM buffer size viewed in a 1k resolution will
+		 * be dropped.
+		 */
+		if (hw->mac.type != ixgbe_mac_82599EB)
+			srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+		else
+			srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	} else if (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) {
+		srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	} else {
+		srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	}
+
+	/* configure descriptor type */
+	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
+
+	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl);
+}
+
+/**
+ * ixgbe_rss_indir_tbl_entries - Return RSS indirection table entries
+ * @adapter: device handle
+ *
+ *  - 82598/82599/X540:     128
+ *  - X550(non-SRIOV mode): 512
+ *  - X550(SRIOV mode):     64
+ */
+u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter)
+{
+	if (adapter->hw.mac.type < ixgbe_mac_X550)
+		return 128;
+	else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		return 64;
+	else
+		return 512;
+}
+
+/**
+ * ixgbe_store_key - Write the RSS key to HW
+ * @adapter: device handle
+ *
+ * Write the RSS key stored in adapter.rss_key to HW.
+ */
+void ixgbe_store_key(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < 10; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), adapter->rss_key[i]);
+}
+
+/**
+ * ixgbe_init_rss_key - Initialize adapter RSS key
+ * @adapter: device handle
+ *
+ * Allocates and initializes the RSS key if it is not allocated.
+ **/
+static inline int ixgbe_init_rss_key(struct ixgbe_adapter *adapter)
+{
+	u32 *rss_key;
+
+	if (!adapter->rss_key) {
+		rss_key = kzalloc(IXGBE_RSS_KEY_SIZE, GFP_KERNEL);
+		if (unlikely(!rss_key))
+			return -ENOMEM;
+
+		netdev_rss_key_fill(rss_key, IXGBE_RSS_KEY_SIZE);
+		adapter->rss_key = rss_key;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_store_reta - Write the RETA table to HW
+ * @adapter: device handle
+ *
+ * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
+ */
+void ixgbe_store_reta(struct ixgbe_adapter *adapter)
+{
+	u32 i, reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reta = 0;
+	u32 indices_multi;
+	u8 *indir_tbl = adapter->rss_indir_tbl;
+
+	/* Fill out the redirection table as follows:
+	 *  - 82598:      8 bit wide entries containing pair of 4 bit RSS
+	 *    indices.
+	 *  - 82599/X540: 8 bit wide entries containing 4 bit RSS index
+	 *  - X550:       8 bit wide entries containing 6 bit RSS index
+	 */
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+		indices_multi = 0x11;
+	else
+		indices_multi = 0x1;
+
+	/* Write redirection table to HW */
+	for (i = 0; i < reta_entries; i++) {
+		reta |= indices_multi * indir_tbl[i] << (i & 0x3) * 8;
+		if ((i & 3) == 3) {
+			if (i < 128)
+				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
+			else
+				IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32),
+						reta);
+			reta = 0;
+		}
+	}
+}
+
+/**
+ * ixgbe_store_vfreta - Write the RETA table to HW (x550 devices in SRIOV mode)
+ * @adapter: device handle
+ *
+ * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
+ */
+static void ixgbe_store_vfreta(struct ixgbe_adapter *adapter)
+{
+	u32 i, reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vfreta = 0;
+
+	/* Write redirection table to HW */
+	for (i = 0; i < reta_entries; i++) {
+		u16 pool = adapter->num_rx_pools;
+
+		vfreta |= (u32)adapter->rss_indir_tbl[i] << (i & 0x3) * 8;
+		if ((i & 3) != 3)
+			continue;
+
+		while (pool--)
+			IXGBE_WRITE_REG(hw,
+					IXGBE_PFVFRETA(i >> 2, VMDQ_P(pool)),
+					vfreta);
+		vfreta = 0;
+	}
+}
+
+static void ixgbe_setup_reta(struct ixgbe_adapter *adapter)
+{
+	u32 i, j;
+	u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
+	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+
+	/* Program table for at least 4 queues w/ SR-IOV so that VFs can
+	 * make full use of any rings they may have.  We will use the
+	 * PSRTYPE register to control how many rings we use within the PF.
+	 */
+	if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4))
+		rss_i = 4;
+
+	/* Fill out hash function seeds */
+	ixgbe_store_key(adapter);
+
+	/* Fill out redirection table */
+	memset(adapter->rss_indir_tbl, 0, sizeof(adapter->rss_indir_tbl));
+
+	for (i = 0, j = 0; i < reta_entries; i++, j++) {
+		if (j == rss_i)
+			j = 0;
+
+		adapter->rss_indir_tbl[i] = j;
+	}
+
+	ixgbe_store_reta(adapter);
+}
+
+static void ixgbe_setup_vfreta(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+	int i, j;
+
+	/* Fill out hash function seeds */
+	for (i = 0; i < 10; i++) {
+		u16 pool = adapter->num_rx_pools;
+
+		while (pool--)
+			IXGBE_WRITE_REG(hw,
+					IXGBE_PFVFRSSRK(i, VMDQ_P(pool)),
+					*(adapter->rss_key + i));
+	}
+
+	/* Fill out the redirection table */
+	for (i = 0, j = 0; i < 64; i++, j++) {
+		if (j == rss_i)
+			j = 0;
+
+		adapter->rss_indir_tbl[i] = j;
+	}
+
+	ixgbe_store_vfreta(adapter);
+}
+
+static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 mrqc = 0, rss_field = 0, vfmrqc = 0;
+	u32 rxcsum;
+
+	/* Disable indicating checksum in descriptor, enables RSS hash */
+	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
+	rxcsum |= IXGBE_RXCSUM_PCSD;
+	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
+
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
+		if (adapter->ring_feature[RING_F_RSS].mask)
+			mrqc = IXGBE_MRQC_RSSEN;
+	} else {
+		u8 tcs = adapter->hw_tcs;
+
+		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+			if (tcs > 4)
+				mrqc = IXGBE_MRQC_VMDQRT8TCEN;	/* 8 TCs */
+			else if (tcs > 1)
+				mrqc = IXGBE_MRQC_VMDQRT4TCEN;	/* 4 TCs */
+			else if (adapter->ring_feature[RING_F_VMDQ].mask ==
+				 IXGBE_82599_VMDQ_4Q_MASK)
+				mrqc = IXGBE_MRQC_VMDQRSS32EN;
+			else
+				mrqc = IXGBE_MRQC_VMDQRSS64EN;
+
+			/* Enable L3/L4 for Tx Switched packets only for X550,
+			 * older devices do not support this feature
+			 */
+			if (hw->mac.type >= ixgbe_mac_X550)
+				mrqc |= IXGBE_MRQC_L3L4TXSWEN;
+		} else {
+			if (tcs > 4)
+				mrqc = IXGBE_MRQC_RTRSS8TCEN;
+			else if (tcs > 1)
+				mrqc = IXGBE_MRQC_RTRSS4TCEN;
+			else
+				mrqc = IXGBE_MRQC_RSSEN;
+		}
+	}
+
+	/* Perform hash on these packet types */
+	rss_field |= IXGBE_MRQC_RSS_FIELD_IPV4 |
+		     IXGBE_MRQC_RSS_FIELD_IPV4_TCP |
+		     IXGBE_MRQC_RSS_FIELD_IPV6 |
+		     IXGBE_MRQC_RSS_FIELD_IPV6_TCP;
+
+	if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP)
+		rss_field |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP;
+	if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP)
+		rss_field |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP;
+
+	if ((hw->mac.type >= ixgbe_mac_X550) &&
+	    (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) {
+		u16 pool = adapter->num_rx_pools;
+
+		/* Enable VF RSS mode */
+		mrqc |= IXGBE_MRQC_MULTIPLE_RSS;
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+		/* Setup RSS through the VF registers */
+		ixgbe_setup_vfreta(adapter);
+		vfmrqc = IXGBE_MRQC_RSSEN;
+		vfmrqc |= rss_field;
+
+		while (pool--)
+			IXGBE_WRITE_REG(hw,
+					IXGBE_PFVFMRQC(VMDQ_P(pool)),
+					vfmrqc);
+	} else {
+		ixgbe_setup_reta(adapter);
+		mrqc |= rss_field;
+		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+	}
+}
+
+/**
+ * ixgbe_configure_rscctl - enable RSC for the indicated ring
+ * @adapter: address of board private structure
+ * @ring: structure containing ring specific data
+ **/
+static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter,
+				   struct ixgbe_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 rscctrl;
+	u8 reg_idx = ring->reg_idx;
+
+	if (!ring_is_rsc_enabled(ring))
+		return;
+
+	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(reg_idx));
+	rscctrl |= IXGBE_RSCCTL_RSCEN;
+	/*
+	 * we must limit the number of descriptors so that the
+	 * total size of max desc * buf_len is not greater
+	 * than 65536
+	 */
+	rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
+	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl);
+}
+
+#define IXGBE_MAX_RX_DESC_POLL 10
+static void ixgbe_rx_desc_queue_enable(struct ixgbe_adapter *adapter,
+				       struct ixgbe_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int wait_loop = IXGBE_MAX_RX_DESC_POLL;
+	u32 rxdctl;
+	u8 reg_idx = ring->reg_idx;
+
+	if (ixgbe_removed(hw->hw_addr))
+		return;
+	/* RXDCTL.EN will return 0 on 82598 if link is down, so skip it */
+	if (hw->mac.type == ixgbe_mac_82598EB &&
+	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+		return;
+
+	do {
+		usleep_range(1000, 2000);
+		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+	} while (--wait_loop && !(rxdctl & IXGBE_RXDCTL_ENABLE));
+
+	if (!wait_loop) {
+		e_err(drv, "RXDCTL.ENABLE on Rx queue %d not set within "
+		      "the polling period\n", reg_idx);
+	}
+}
+
+void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
+			     struct ixgbe_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	union ixgbe_adv_rx_desc *rx_desc;
+	u64 rdba = ring->dma;
+	u32 rxdctl;
+	u8 reg_idx = ring->reg_idx;
+
+	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
+	ring->xsk_pool = ixgbe_xsk_pool(adapter, ring);
+	if (ring->xsk_pool) {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_XSK_BUFF_POOL,
+						   NULL));
+		xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+	} else {
+		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+						   MEM_TYPE_PAGE_SHARED, NULL));
+	}
+
+	/* disable queue to avoid use of these values while updating state */
+	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+	rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+
+	/* write value back with RXDCTL.ENABLE bit cleared */
+	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_RDBAL(reg_idx), (rdba & DMA_BIT_MASK(32)));
+	IXGBE_WRITE_REG(hw, IXGBE_RDBAH(reg_idx), (rdba >> 32));
+	IXGBE_WRITE_REG(hw, IXGBE_RDLEN(reg_idx),
+			ring->count * sizeof(union ixgbe_adv_rx_desc));
+	/* Force flushing of IXGBE_RDLEN to prevent MDD */
+	IXGBE_WRITE_FLUSH(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_RDH(reg_idx), 0);
+	IXGBE_WRITE_REG(hw, IXGBE_RDT(reg_idx), 0);
+	ring->tail = adapter->io_addr + IXGBE_RDT(reg_idx);
+
+	ixgbe_configure_srrctl(adapter, ring);
+	ixgbe_configure_rscctl(adapter, ring);
+
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		/*
+		 * enable cache line friendly hardware writes:
+		 * PTHRESH=32 descriptors (half the internal cache),
+		 * this also removes ugly rx_no_buffer_count increment
+		 * HTHRESH=4 descriptors (to minimize latency on fetch)
+		 * WTHRESH=8 burst writeback up to two cache lines
+		 */
+		rxdctl &= ~0x3FFFFF;
+		rxdctl |=  0x080420;
+#if (PAGE_SIZE < 8192)
+	/* RXDCTL.RLPML does not work on 82599 */
+	} else if (hw->mac.type != ixgbe_mac_82599EB) {
+		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+			    IXGBE_RXDCTL_RLPML_EN);
+
+		/* Limit the maximum frame size so we don't overrun the skb.
+		 * This can happen in SRIOV mode when the MTU of the VF is
+		 * higher than the MTU of the PF.
+		 */
+		if (ring_uses_build_skb(ring) &&
+		    !test_bit(__IXGBE_RX_3K_BUFFER, &ring->state))
+			rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB |
+				  IXGBE_RXDCTL_RLPML_EN;
+#endif
+	}
+
+	ring->rx_offset = ixgbe_rx_offset(ring);
+
+	if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) {
+		u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+
+		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+			    IXGBE_RXDCTL_RLPML_EN);
+		rxdctl |= xsk_buf_len | IXGBE_RXDCTL_RLPML_EN;
+
+		ring->rx_buf_len = xsk_buf_len;
+	}
+
+	/* initialize rx_buffer_info */
+	memset(ring->rx_buffer_info, 0,
+	       sizeof(struct ixgbe_rx_buffer) * ring->count);
+
+	/* initialize Rx descriptor 0 */
+	rx_desc = IXGBE_RX_DESC(ring, 0);
+	rx_desc->wb.upper.length = 0;
+
+	/* enable receive descriptor ring */
+	rxdctl |= IXGBE_RXDCTL_ENABLE;
+	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+
+	ixgbe_rx_desc_queue_enable(adapter, ring);
+	if (ring->xsk_pool)
+		ixgbe_alloc_rx_buffers_zc(ring, ixgbe_desc_unused(ring));
+	else
+		ixgbe_alloc_rx_buffers(ring, ixgbe_desc_unused(ring));
+}
+
+static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int rss_i = adapter->ring_feature[RING_F_RSS].indices;
+	u16 pool = adapter->num_rx_pools;
+
+	/* PSRTYPE must be initialized in non 82598 adapters */
+	u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
+		      IXGBE_PSRTYPE_UDPHDR |
+		      IXGBE_PSRTYPE_IPV4HDR |
+		      IXGBE_PSRTYPE_L2HDR |
+		      IXGBE_PSRTYPE_IPV6HDR;
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	if (rss_i > 3)
+		psrtype |= 2u << 29;
+	else if (rss_i > 1)
+		psrtype |= 1u << 29;
+
+	while (pool--)
+		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype);
+}
+
+static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 pool = adapter->num_rx_pools;
+	u32 reg_offset, vf_shift, vmolr;
+	u32 gcr_ext, vmdctl;
+	int i;
+
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return;
+
+	vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL);
+	vmdctl |= IXGBE_VMD_CTL_VMDQ_EN;
+	vmdctl &= ~IXGBE_VT_CTL_POOL_MASK;
+	vmdctl |= VMDQ_P(0) << IXGBE_VT_CTL_POOL_SHIFT;
+	vmdctl |= IXGBE_VT_CTL_REPLEN;
+	IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl);
+
+	/* accept untagged packets until a vlan tag is
+	 * specifically set for the VMDQ queue/pool
+	 */
+	vmolr = IXGBE_VMOLR_AUPE;
+	while (pool--)
+		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
+
+	vf_shift = VMDQ_P(0) % 32;
+	reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0;
+
+	/* Enable only the PF's pool for Tx/Rx */
+	IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), GENMASK(31, vf_shift));
+	IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), reg_offset - 1);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), GENMASK(31, vf_shift));
+	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), reg_offset - 1);
+	if (adapter->bridge_mode == BRIDGE_MODE_VEB)
+		IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
+
+	/* Map PF MAC address in RAR Entry 0 to first pool following VFs */
+	hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0));
+
+	/* clear VLAN promisc flag so VFTA will be updated if necessary */
+	adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC;
+
+	/*
+	 * Set up VF register offsets for selected VT Mode,
+	 * i.e. 32 or 64 VFs for SR-IOV
+	 */
+	switch (adapter->ring_feature[RING_F_VMDQ].mask) {
+	case IXGBE_82599_VMDQ_8Q_MASK:
+		gcr_ext = IXGBE_GCR_EXT_VT_MODE_16;
+		break;
+	case IXGBE_82599_VMDQ_4Q_MASK:
+		gcr_ext = IXGBE_GCR_EXT_VT_MODE_32;
+		break;
+	default:
+		gcr_ext = IXGBE_GCR_EXT_VT_MODE_64;
+		break;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
+
+	for (i = 0; i < adapter->num_vfs; i++) {
+		/* configure spoof checking */
+		ixgbe_ndo_set_vf_spoofchk(adapter->netdev, i,
+					  adapter->vfinfo[i].spoofchk_enabled);
+
+		/* Enable/Disable RSS query feature  */
+		ixgbe_ndo_set_vf_rss_query_en(adapter->netdev, i,
+					  adapter->vfinfo[i].rss_query_enabled);
+	}
+}
+
+static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+	struct ixgbe_ring *rx_ring;
+	int i;
+	u32 mhadd, hlreg0;
+
+#ifdef IXGBE_FCOE
+	/* adjust max frame to be able to do baby jumbo for FCoE */
+	if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) &&
+	    (max_frame < IXGBE_FCOE_JUMBO_FRAME_SIZE))
+		max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+
+#endif /* IXGBE_FCOE */
+
+	/* adjust max frame to be at least the size of a standard frame */
+	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+		max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN);
+
+	mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
+	if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) {
+		mhadd &= ~IXGBE_MHADD_MFS_MASK;
+		mhadd |= max_frame << IXGBE_MHADD_MFS_SHIFT;
+
+		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
+	}
+
+	hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+	/* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */
+	hlreg0 |= IXGBE_HLREG0_JUMBOEN;
+	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+
+	/*
+	 * Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		rx_ring = adapter->rx_ring[i];
+
+		clear_ring_rsc_enabled(rx_ring);
+		clear_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
+		clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state);
+
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
+			set_ring_rsc_enabled(rx_ring);
+
+		if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state))
+			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
+
+		if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
+			continue;
+
+		set_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state);
+
+#if (PAGE_SIZE < 8192)
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
+			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
+
+		if (IXGBE_2K_TOO_SMALL_WITH_PADDING ||
+		    (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)))
+			set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
+#endif
+	}
+}
+
+static void ixgbe_setup_rdrxctl(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		/*
+		 * For VMDq support of different descriptor types or
+		 * buffer sizes through the use of multiple SRRCTL
+		 * registers, RDRXCTL.MVMEN must be set to 1
+		 *
+		 * also, the manual doesn't mention it clearly but DCA hints
+		 * will only use queue 0's tags unless this bit is set.  Side
+		 * effects of setting this bit are only that SRRCTL must be
+		 * fully programmed [0..15]
+		 */
+		rdrxctl |= IXGBE_RDRXCTL_MVMEN;
+		break;
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		if (adapter->num_vfs)
+			rdrxctl |= IXGBE_RDRXCTL_PSP;
+		fallthrough;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		/* Disable RSC for ACK packets */
+		IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
+		   (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
+		rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
+		/* hardware requires some bits to be set by default */
+		rdrxctl |= (IXGBE_RDRXCTL_RSCACKC | IXGBE_RDRXCTL_FCOE_WRFIX);
+		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
+		break;
+	default:
+		/* We should do nothing since we don't know this hardware */
+		return;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
+}
+
+/**
+ * ixgbe_configure_rx - Configure 8259x Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+	u32 rxctrl, rfctl;
+
+	/* disable receives while setting up the descriptors */
+	hw->mac.ops.disable_rx(hw);
+
+	ixgbe_setup_psrtype(adapter);
+	ixgbe_setup_rdrxctl(adapter);
+
+	/* RSC Setup */
+	rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL);
+	rfctl &= ~IXGBE_RFCTL_RSC_DIS;
+	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))
+		rfctl |= IXGBE_RFCTL_RSC_DIS;
+
+	/* disable NFS filtering */
+	rfctl |= (IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS);
+	IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
+
+	/* Program registers for the distribution of queues */
+	ixgbe_setup_mrqc(adapter);
+
+	/* set_rx_buffer_len must be called before ring initialization */
+	ixgbe_set_rx_buffer_len(adapter);
+
+	/*
+	 * Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		ixgbe_configure_rx_ring(adapter, adapter->rx_ring[i]);
+
+	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	/* disable drop enable for 82598 parts */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		rxctrl |= IXGBE_RXCTRL_DMBYPS;
+
+	/* enable all receives */
+	rxctrl |= IXGBE_RXCTRL_RXEN;
+	hw->mac.ops.enable_rx_dma(hw, rxctrl);
+}
+
+static int ixgbe_vlan_rx_add_vid(struct net_device *netdev,
+				 __be16 proto, u16 vid)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	/* add VID to filter table */
+	if (!vid || !(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+		hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true, !!vid);
+
+	set_bit(vid, adapter->active_vlans);
+
+	return 0;
+}
+
+static int ixgbe_find_vlvf_entry(struct ixgbe_hw *hw, u32 vlan)
+{
+	u32 vlvf;
+	int idx;
+
+	/* short cut the special case */
+	if (vlan == 0)
+		return 0;
+
+	/* Search for the vlan id in the VLVF entries */
+	for (idx = IXGBE_VLVF_ENTRIES; --idx;) {
+		vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(idx));
+		if ((vlvf & VLAN_VID_MASK) == vlan)
+			break;
+	}
+
+	return idx;
+}
+
+void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 bits, word;
+	int idx;
+
+	idx = ixgbe_find_vlvf_entry(hw, vid);
+	if (!idx)
+		return;
+
+	/* See if any other pools are set for this VLAN filter
+	 * entry other than the PF.
+	 */
+	word = idx * 2 + (VMDQ_P(0) / 32);
+	bits = ~BIT(VMDQ_P(0) % 32);
+	bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
+
+	/* Disable the filter so this falls into the default pool. */
+	if (!bits && !IXGBE_READ_REG(hw, IXGBE_VLVFB(word ^ 1))) {
+		if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+			IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(idx), 0);
+	}
+}
+
+static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev,
+				  __be16 proto, u16 vid)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	/* remove VID from filter table */
+	if (vid && !(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+		hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), false, true);
+
+	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
+}
+
+/**
+ * ixgbe_vlan_strip_disable - helper to disable hw vlan stripping
+ * @adapter: driver data
+ */
+static void ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl;
+	int i, j;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+		vlnctrl &= ~IXGBE_VLNCTRL_VME;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+			if (!netif_is_ixgbe(ring->netdev))
+				continue;
+
+			j = ring->reg_idx;
+			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j));
+			vlnctrl &= ~IXGBE_RXDCTL_VME;
+			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ixgbe_vlan_strip_enable - helper to enable hw vlan stripping
+ * @adapter: driver data
+ */
+static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl;
+	int i, j;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+		vlnctrl |= IXGBE_VLNCTRL_VME;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+			if (!netif_is_ixgbe(ring->netdev))
+				continue;
+
+			j = ring->reg_idx;
+			vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j));
+			vlnctrl |= IXGBE_RXDCTL_VME;
+			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl, i;
+
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
+	if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
+	/* For VMDq and SR-IOV we must leave VLAN filtering enabled */
+		vlnctrl |= IXGBE_VLNCTRL_VFE;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+	} else {
+		vlnctrl &= ~IXGBE_VLNCTRL_VFE;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+		return;
+	}
+
+	/* Nothing to do for 82598 */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	/* We are already in VLAN promisc, nothing to do */
+	if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)
+		return;
+
+	/* Set flag so we don't redo unnecessary work */
+	adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC;
+
+	/* Add PF to all active pools */
+	for (i = IXGBE_VLVF_ENTRIES; --i;) {
+		u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32);
+		u32 vlvfb = IXGBE_READ_REG(hw, reg_offset);
+
+		vlvfb |= BIT(VMDQ_P(0) % 32);
+		IXGBE_WRITE_REG(hw, reg_offset, vlvfb);
+	}
+
+	/* Set all bits in the VLAN filter table array */
+	for (i = hw->mac.vft_size; i--;)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), ~0U);
+}
+
+#define VFTA_BLOCK_SIZE 8
+static void ixgbe_scrub_vfta(struct ixgbe_adapter *adapter, u32 vfta_offset)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vfta[VFTA_BLOCK_SIZE] = { 0 };
+	u32 vid_start = vfta_offset * 32;
+	u32 vid_end = vid_start + (VFTA_BLOCK_SIZE * 32);
+	u32 i, vid, word, bits;
+
+	for (i = IXGBE_VLVF_ENTRIES; --i;) {
+		u32 vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i));
+
+		/* pull VLAN ID from VLVF */
+		vid = vlvf & VLAN_VID_MASK;
+
+		/* only concern outselves with a certain range */
+		if (vid < vid_start || vid >= vid_end)
+			continue;
+
+		if (vlvf) {
+			/* record VLAN ID in VFTA */
+			vfta[(vid - vid_start) / 32] |= BIT(vid % 32);
+
+			/* if PF is part of this then continue */
+			if (test_bit(vid, adapter->active_vlans))
+				continue;
+		}
+
+		/* remove PF from the pool */
+		word = i * 2 + VMDQ_P(0) / 32;
+		bits = ~BIT(VMDQ_P(0) % 32);
+		bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), bits);
+	}
+
+	/* extract values from active_vlans and write back to VFTA */
+	for (i = VFTA_BLOCK_SIZE; i--;) {
+		vid = (vfta_offset + i) * 32;
+		word = vid / BITS_PER_LONG;
+		bits = vid % BITS_PER_LONG;
+
+		vfta[i] |= adapter->active_vlans[word] >> bits;
+
+		IXGBE_WRITE_REG(hw, IXGBE_VFTA(vfta_offset + i), vfta[i]);
+	}
+}
+
+static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlnctrl, i;
+
+	/* Set VLAN filtering to enabled */
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+	vlnctrl |= IXGBE_VLNCTRL_VFE;
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
+	if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) ||
+	    hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	/* We are not in VLAN promisc, nothing to do */
+	if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+		return;
+
+	/* Set flag so we don't redo unnecessary work */
+	adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC;
+
+	for (i = 0; i < hw->mac.vft_size; i += VFTA_BLOCK_SIZE)
+		ixgbe_scrub_vfta(adapter, i);
+}
+
+static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
+{
+	u16 vid = 1;
+
+	ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0);
+
+	for_each_set_bit_from(vid, adapter->active_vlans, VLAN_N_VID)
+		ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
+}
+
+/**
+ * ixgbe_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: -ENOMEM on failure
+ *                0 on no addresses written
+ *                X on writing X addresses to MTA
+ **/
+static int ixgbe_write_mc_addr_list(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (!netif_running(netdev))
+		return 0;
+
+	if (hw->mac.ops.update_mc_addr_list)
+		hw->mac.ops.update_mc_addr_list(hw, netdev);
+	else
+		return -ENOMEM;
+
+#ifdef CONFIG_PCI_IOV
+	ixgbe_restore_vf_multicasts(adapter);
+#endif
+
+	return netdev_mc_count(netdev);
+}
+
+#ifdef CONFIG_PCI_IOV
+void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
+
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
+			hw->mac.ops.set_rar(hw, i,
+					    mac_table->addr,
+					    mac_table->pool,
+					    IXGBE_RAH_AV);
+		else
+			hw->mac.ops.clear_rar(hw, i);
+	}
+}
+
+#endif
+static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		if (!(mac_table->state & IXGBE_MAC_STATE_MODIFIED))
+			continue;
+
+		mac_table->state &= ~IXGBE_MAC_STATE_MODIFIED;
+
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
+			hw->mac.ops.set_rar(hw, i,
+					    mac_table->addr,
+					    mac_table->pool,
+					    IXGBE_RAH_AV);
+		else
+			hw->mac.ops.clear_rar(hw, i);
+	}
+}
+
+static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
+		mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
+	}
+
+	ixgbe_sync_mac_table(adapter);
+}
+
+static int ixgbe_available_rars(struct ixgbe_adapter *adapter, u16 pool)
+{
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i, count = 0;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		/* do not count default RAR as available */
+		if (mac_table->state & IXGBE_MAC_STATE_DEFAULT)
+			continue;
+
+		/* only count unused and addresses that belong to us */
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE) {
+			if (mac_table->pool != pool)
+				continue;
+		}
+
+		count++;
+	}
+
+	return count;
+}
+
+/* this function destroys the first RAR entry */
+static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	memcpy(&mac_table->addr, hw->mac.addr, ETH_ALEN);
+	mac_table->pool = VMDQ_P(0);
+
+	mac_table->state = IXGBE_MAC_STATE_DEFAULT | IXGBE_MAC_STATE_IN_USE;
+
+	hw->mac.ops.set_rar(hw, 0, mac_table->addr, mac_table->pool,
+			    IXGBE_RAH_AV);
+}
+
+int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter,
+			 const u8 *addr, u16 pool)
+{
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		if (mac_table->state & IXGBE_MAC_STATE_IN_USE)
+			continue;
+
+		ether_addr_copy(mac_table->addr, addr);
+		mac_table->pool = pool;
+
+		mac_table->state |= IXGBE_MAC_STATE_MODIFIED |
+				    IXGBE_MAC_STATE_IN_USE;
+
+		ixgbe_sync_mac_table(adapter);
+
+		return i;
+	}
+
+	return -ENOMEM;
+}
+
+int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
+			 const u8 *addr, u16 pool)
+{
+	struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	/* search table for addr, if found clear IN_USE flag and sync */
+	for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) {
+		/* we can only delete an entry if it is in use */
+		if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE))
+			continue;
+		/* we only care about entries that belong to the given pool */
+		if (mac_table->pool != pool)
+			continue;
+		/* we only care about a specific MAC address */
+		if (!ether_addr_equal(addr, mac_table->addr))
+			continue;
+
+		mac_table->state |= IXGBE_MAC_STATE_MODIFIED;
+		mac_table->state &= ~IXGBE_MAC_STATE_IN_USE;
+
+		ixgbe_sync_mac_table(adapter);
+
+		return 0;
+	}
+
+	return -ENOMEM;
+}
+
+static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int ret;
+
+	ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0));
+
+	return min_t(int, ret, 0);
+}
+
+static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0));
+
+	return 0;
+}
+
+/**
+ * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_method entry point is called whenever the unicast/multicast
+ * address list or the network interface flags are updated.  This routine is
+ * responsible for configuring the hardware for proper unicast, multicast and
+ * promiscuous mode.
+ **/
+void ixgbe_set_rx_mode(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
+	netdev_features_t features = netdev->features;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+
+	/* set all bits that we expect to always be set */
+	fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */
+	fctrl |= IXGBE_FCTRL_BAM;
+	fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */
+	fctrl |= IXGBE_FCTRL_PMCF;
+
+	/* clear the bits we are changing the status of */
+	fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
+	if (netdev->flags & IFF_PROMISC) {
+		hw->addr_ctrl.user_set_promisc = true;
+		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
+		vmolr |= IXGBE_VMOLR_MPE;
+		features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+	} else {
+		if (netdev->flags & IFF_ALLMULTI) {
+			fctrl |= IXGBE_FCTRL_MPE;
+			vmolr |= IXGBE_VMOLR_MPE;
+		}
+		hw->addr_ctrl.user_set_promisc = false;
+	}
+
+	/*
+	 * Write addresses to available RAR registers, if there is not
+	 * sufficient space to store all the addresses then enable
+	 * unicast promiscuous mode
+	 */
+	if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) {
+		fctrl |= IXGBE_FCTRL_UPE;
+		vmolr |= IXGBE_VMOLR_ROPE;
+	}
+
+	/* Write addresses to the MTA, if the attempt fails
+	 * then we should just turn on promiscuous mode so
+	 * that we can at least receive multicast traffic
+	 */
+	count = ixgbe_write_mc_addr_list(netdev);
+	if (count < 0) {
+		fctrl |= IXGBE_FCTRL_MPE;
+		vmolr |= IXGBE_VMOLR_MPE;
+	} else if (count) {
+		vmolr |= IXGBE_VMOLR_ROMPE;
+	}
+
+	if (hw->mac.type != ixgbe_mac_82598EB) {
+		vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(VMDQ_P(0))) &
+			 ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE |
+			   IXGBE_VMOLR_ROPE);
+		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(0)), vmolr);
+	}
+
+	/* This is useful for sniffing bad packets. */
+	if (features & NETIF_F_RXALL) {
+		/* UPE and MPE will be handled by normal PROMISC logic
+		 * in e1000e_set_rx_mode */
+		fctrl |= (IXGBE_FCTRL_SBP | /* Receive bad packets */
+			  IXGBE_FCTRL_BAM | /* RX All Bcast Pkts */
+			  IXGBE_FCTRL_PMCF); /* RX All MAC Ctrl Pkts */
+
+		fctrl &= ~(IXGBE_FCTRL_DPF);
+		/* NOTE:  VLAN filtering is disabled by setting PROMISC */
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+
+	if (features & NETIF_F_HW_VLAN_CTAG_RX)
+		ixgbe_vlan_strip_enable(adapter);
+	else
+		ixgbe_vlan_strip_disable(adapter);
+
+	if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
+		ixgbe_vlan_promisc_disable(adapter);
+	else
+		ixgbe_vlan_promisc_enable(adapter);
+}
+
+static void ixgbe_napi_enable_all(struct ixgbe_adapter *adapter)
+{
+	int q_idx;
+
+	for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++)
+		napi_enable(&adapter->q_vector[q_idx]->napi);
+}
+
+static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
+{
+	int q_idx;
+
+	for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++)
+		napi_disable(&adapter->q_vector[q_idx]->napi);
+}
+
+static int ixgbe_udp_tunnel_sync(struct net_device *dev, unsigned int table)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct udp_tunnel_info ti;
+
+	udp_tunnel_nic_get_port(dev, table, 0, &ti);
+	if (ti.type == UDP_TUNNEL_TYPE_VXLAN)
+		adapter->vxlan_port = ti.port;
+	else
+		adapter->geneve_port = ti.port;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL,
+			ntohs(adapter->vxlan_port) |
+			ntohs(adapter->geneve_port) <<
+				IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT);
+	return 0;
+}
+
+static const struct udp_tunnel_nic_info ixgbe_udp_tunnels_x550 = {
+	.sync_table	= ixgbe_udp_tunnel_sync,
+	.flags		= UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
+	.tables		= {
+		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN,  },
+	},
+};
+
+static const struct udp_tunnel_nic_info ixgbe_udp_tunnels_x550em_a = {
+	.sync_table	= ixgbe_udp_tunnel_sync,
+	.flags		= UDP_TUNNEL_NIC_INFO_IPV4_ONLY,
+	.tables		= {
+		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN,  },
+		{ .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_GENEVE, },
+	},
+};
+
+#ifdef CONFIG_IXGBE_DCB
+/**
+ * ixgbe_configure_dcb - Configure DCB hardware
+ * @adapter: ixgbe adapter struct
+ *
+ * This is called by the driver on open to configure the DCB hardware.
+ * This is also called by the gennetlink interface when reconfiguring
+ * the DCB state.
+ */
+static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int max_frame = adapter->netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+	if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) {
+		if (hw->mac.type == ixgbe_mac_82598EB)
+			netif_set_tso_max_size(adapter->netdev, 65536);
+		return;
+	}
+
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		netif_set_tso_max_size(adapter->netdev, 32768);
+
+#ifdef IXGBE_FCOE
+	if (adapter->netdev->features & NETIF_F_FCOE_MTU)
+		max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
+#endif
+
+	/* reconfigure the hardware */
+	if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE) {
+		ixgbe_dcb_calculate_tc_credits(hw, &adapter->dcb_cfg, max_frame,
+						DCB_TX_CONFIG);
+		ixgbe_dcb_calculate_tc_credits(hw, &adapter->dcb_cfg, max_frame,
+						DCB_RX_CONFIG);
+		ixgbe_dcb_hw_config(hw, &adapter->dcb_cfg);
+	} else if (adapter->ixgbe_ieee_ets && adapter->ixgbe_ieee_pfc) {
+		ixgbe_dcb_hw_ets(&adapter->hw,
+				 adapter->ixgbe_ieee_ets,
+				 max_frame);
+		ixgbe_dcb_hw_pfc_config(&adapter->hw,
+					adapter->ixgbe_ieee_pfc->pfc_en,
+					adapter->ixgbe_ieee_ets->prio_tc);
+	}
+
+	/* Enable RSS Hash per TC */
+	if (hw->mac.type != ixgbe_mac_82598EB) {
+		u32 msb = 0;
+		u16 rss_i = adapter->ring_feature[RING_F_RSS].indices - 1;
+
+		while (rss_i) {
+			msb++;
+			rss_i >>= 1;
+		}
+
+		/* write msb to all 8 TCs in one write */
+		IXGBE_WRITE_REG(hw, IXGBE_RQTC, msb * 0x11111111);
+	}
+}
+#endif
+
+/* Additional bittime to account for IXGBE framing */
+#define IXGBE_ETH_FRAMING 20
+
+/**
+ * ixgbe_hpbthresh - calculate high water mark for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb: packet buffer to calculate
+ */
+static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *dev = adapter->netdev;
+	int link, tc, kb, marker;
+	u32 dv_id, rx_pba;
+
+	/* Calculate max LAN frame size */
+	tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING;
+
+#ifdef IXGBE_FCOE
+	/* FCoE traffic class uses FCOE jumbo frames */
+	if ((dev->features & NETIF_F_FCOE_MTU) &&
+	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
+	    (pb == ixgbe_fcoe_get_tc(adapter)))
+		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+#endif
+
+	/* Calculate delay value for device */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		dv_id = IXGBE_DV_X540(link, tc);
+		break;
+	default:
+		dv_id = IXGBE_DV(link, tc);
+		break;
+	}
+
+	/* Loopback switch introduces additional latency */
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		dv_id += IXGBE_B2BT(tc);
+
+	/* Delay value is calculated in bit times convert to KB */
+	kb = IXGBE_BT2KB(dv_id);
+	rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10;
+
+	marker = rx_pba - kb;
+
+	/* It is possible that the packet buffer is not large enough
+	 * to provide required headroom. In this case throw an error
+	 * to user and a do the best we can.
+	 */
+	if (marker < 0) {
+		e_warn(drv, "Packet Buffer(%i) can not provide enough"
+			    "headroom to support flow control."
+			    "Decrease MTU or number of traffic classes\n", pb);
+		marker = tc + 1;
+	}
+
+	return marker;
+}
+
+/**
+ * ixgbe_lpbthresh - calculate low water mark for flow control
+ *
+ * @adapter: board private structure to calculate for
+ * @pb: packet buffer to calculate
+ */
+static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *dev = adapter->netdev;
+	int tc;
+	u32 dv_id;
+
+	/* Calculate max LAN frame size */
+	tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+#ifdef IXGBE_FCOE
+	/* FCoE traffic class uses FCOE jumbo frames */
+	if ((dev->features & NETIF_F_FCOE_MTU) &&
+	    (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) &&
+	    (pb == netdev_get_prio_tc_map(dev, adapter->fcoe.up)))
+		tc = IXGBE_FCOE_JUMBO_FRAME_SIZE;
+#endif
+
+	/* Calculate delay value for device */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		dv_id = IXGBE_LOW_DV_X540(tc);
+		break;
+	default:
+		dv_id = IXGBE_LOW_DV(tc);
+		break;
+	}
+
+	/* Delay value is calculated in bit times convert to KB */
+	return IXGBE_BT2KB(dv_id);
+}
+
+/*
+ * ixgbe_pbthresh_setup - calculate and setup high low water marks
+ */
+static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int num_tc = adapter->hw_tcs;
+	int i;
+
+	if (!num_tc)
+		num_tc = 1;
+
+	for (i = 0; i < num_tc; i++) {
+		hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
+		hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i);
+
+		/* Low water marks must not be larger than high water marks */
+		if (hw->fc.low_water[i] > hw->fc.high_water[i])
+			hw->fc.low_water[i] = 0;
+	}
+
+	for (; i < MAX_TRAFFIC_CLASS; i++)
+		hw->fc.high_water[i] = 0;
+}
+
+static void ixgbe_configure_pb(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int hdrm;
+	u8 tc = adapter->hw_tcs;
+
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE ||
+	    adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
+		hdrm = 32 << adapter->fdir_pballoc;
+	else
+		hdrm = 0;
+
+	hw->mac.ops.set_rxpba(hw, tc, hdrm, PBA_STRATEGY_EQUAL);
+	ixgbe_pbthresh_setup(adapter);
+}
+
+static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct hlist_node *node2;
+	struct ixgbe_fdir_filter *filter;
+	u8 queue;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (!hlist_empty(&adapter->fdir_filter_list))
+		ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask);
+
+	hlist_for_each_entry_safe(filter, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		if (filter->action == IXGBE_FDIR_DROP_QUEUE) {
+			queue = IXGBE_FDIR_DROP_QUEUE;
+		} else {
+			u32 ring = ethtool_get_flow_spec_ring(filter->action);
+			u8 vf = ethtool_get_flow_spec_ring_vf(filter->action);
+
+			if (!vf && (ring >= adapter->num_rx_queues)) {
+				e_err(drv, "FDIR restore failed without VF, ring: %u\n",
+				      ring);
+				continue;
+			} else if (vf &&
+				   ((vf > adapter->num_vfs) ||
+				     ring >= adapter->num_rx_queues_per_pool)) {
+				e_err(drv, "FDIR restore failed with VF, vf: %hhu, ring: %u\n",
+				      vf, ring);
+				continue;
+			}
+
+			/* Map the ring onto the absolute queue index */
+			if (!vf)
+				queue = adapter->rx_ring[ring]->reg_idx;
+			else
+				queue = ((vf - 1) *
+					adapter->num_rx_queues_per_pool) + ring;
+		}
+
+		ixgbe_fdir_write_perfect_filter_82599(hw,
+				&filter->filter, filter->sw_idx, queue);
+	}
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+}
+
+/**
+ * ixgbe_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
+{
+	u16 i = rx_ring->next_to_clean;
+	struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
+
+	if (rx_ring->xsk_pool) {
+		ixgbe_xsk_clean_rx_ring(rx_ring);
+		goto skip_free;
+	}
+
+	/* Free all the Rx ring sk_buffs */
+	while (i != rx_ring->next_to_alloc) {
+		if (rx_buffer->skb) {
+			struct sk_buff *skb = rx_buffer->skb;
+			if (IXGBE_CB(skb)->page_released)
+				dma_unmap_page_attrs(rx_ring->dev,
+						     IXGBE_CB(skb)->dma,
+						     ixgbe_rx_pg_size(rx_ring),
+						     DMA_FROM_DEVICE,
+						     IXGBE_RX_DMA_ATTR);
+			dev_kfree_skb(skb);
+		}
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      rx_buffer->dma,
+					      rx_buffer->page_offset,
+					      ixgbe_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+				     ixgbe_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IXGBE_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+
+		i++;
+		rx_buffer++;
+		if (i == rx_ring->count) {
+			i = 0;
+			rx_buffer = rx_ring->rx_buffer_info;
+		}
+	}
+
+skip_free:
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
+			     struct ixgbe_fwd_adapter *accel)
+{
+	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
+	int num_tc = netdev_get_num_tc(adapter->netdev);
+	struct net_device *vdev = accel->netdev;
+	int i, baseq, err;
+
+	baseq = accel->pool * adapter->num_rx_queues_per_pool;
+	netdev_dbg(vdev, "pool %i:%i queues %i:%i\n",
+		   accel->pool, adapter->num_rx_pools,
+		   baseq, baseq + adapter->num_rx_queues_per_pool);
+
+	accel->rx_base_queue = baseq;
+	accel->tx_base_queue = baseq;
+
+	/* record configuration for macvlan interface in vdev */
+	for (i = 0; i < num_tc; i++)
+		netdev_bind_sb_channel_queue(adapter->netdev, vdev,
+					     i, rss_i, baseq + (rss_i * i));
+
+	for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
+		adapter->rx_ring[baseq + i]->netdev = vdev;
+
+	/* Guarantee all rings are updated before we update the
+	 * MAC address filter.
+	 */
+	wmb();
+
+	/* ixgbe_add_mac_filter will return an index if it succeeds, so we
+	 * need to only treat it as an error value if it is negative.
+	 */
+	err = ixgbe_add_mac_filter(adapter, vdev->dev_addr,
+				   VMDQ_P(accel->pool));
+	if (err >= 0)
+		return 0;
+
+	/* if we cannot add the MAC rule then disable the offload */
+	macvlan_release_l2fw_offload(vdev);
+
+	for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
+		adapter->rx_ring[baseq + i]->netdev = NULL;
+
+	netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
+
+	/* unbind the queues and drop the subordinate channel config */
+	netdev_unbind_sb_channel(adapter->netdev, vdev);
+	netdev_set_sb_channel(vdev, 0);
+
+	clear_bit(accel->pool, adapter->fwd_bitmask);
+	kfree(accel);
+
+	return err;
+}
+
+static int ixgbe_macvlan_up(struct net_device *vdev,
+			    struct netdev_nested_priv *priv)
+{
+	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data;
+	struct ixgbe_fwd_adapter *accel;
+
+	if (!netif_is_macvlan(vdev))
+		return 0;
+
+	accel = macvlan_accel_priv(vdev);
+	if (!accel)
+		return 0;
+
+	ixgbe_fwd_ring_up(adapter, accel);
+
+	return 0;
+}
+
+static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
+{
+	struct netdev_nested_priv priv = {
+		.data = (void *)adapter,
+	};
+
+	netdev_walk_all_upper_dev_rcu(adapter->netdev,
+				      ixgbe_macvlan_up, &priv);
+}
+
+static void ixgbe_configure(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	ixgbe_configure_pb(adapter);
+#ifdef CONFIG_IXGBE_DCB
+	ixgbe_configure_dcb(adapter);
+#endif
+	/*
+	 * We must restore virtualization before VLANs or else
+	 * the VLVF registers will not be populated
+	 */
+	ixgbe_configure_virtualization(adapter);
+
+	ixgbe_set_rx_mode(adapter->netdev);
+	ixgbe_restore_vlan(adapter);
+	ixgbe_ipsec_restore(adapter);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		hw->mac.ops.disable_rx_buff(hw);
+		break;
+	default:
+		break;
+	}
+
+	if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) {
+		ixgbe_init_fdir_signature_82599(&adapter->hw,
+						adapter->fdir_pballoc);
+	} else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) {
+		ixgbe_init_fdir_perfect_82599(&adapter->hw,
+					      adapter->fdir_pballoc);
+		ixgbe_fdir_filter_restore(adapter);
+	}
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+		hw->mac.ops.enable_rx_buff(hw);
+		break;
+	default:
+		break;
+	}
+
+#ifdef CONFIG_IXGBE_DCA
+	/* configure DCA */
+	if (adapter->flags & IXGBE_FLAG_DCA_CAPABLE)
+		ixgbe_setup_dca(adapter);
+#endif /* CONFIG_IXGBE_DCA */
+
+#ifdef IXGBE_FCOE
+	/* configure FCoE L2 filters, redirection table, and Rx control */
+	ixgbe_configure_fcoe(adapter);
+
+#endif /* IXGBE_FCOE */
+	ixgbe_configure_tx(adapter);
+	ixgbe_configure_rx(adapter);
+	ixgbe_configure_dfwd(adapter);
+}
+
+/**
+ * ixgbe_sfp_link_config - set up SFP+ link
+ * @adapter: pointer to private adapter struct
+ **/
+static void ixgbe_sfp_link_config(struct ixgbe_adapter *adapter)
+{
+	/*
+	 * We are assuming the worst case scenario here, and that
+	 * is that an SFP was inserted/removed after the reset
+	 * but before SFP detection was enabled.  As such the best
+	 * solution is to just start searching as soon as we start
+	 */
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+		adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP;
+
+	adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
+	adapter->sfp_poll_time = 0;
+}
+
+/**
+ * ixgbe_non_sfp_link_config - set up non-SFP+ link
+ * @hw: pointer to private hardware struct
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw)
+{
+	u32 speed;
+	bool autoneg, link_up = false;
+	int ret = IXGBE_ERR_LINK_SETUP;
+
+	if (hw->mac.ops.check_link)
+		ret = hw->mac.ops.check_link(hw, &speed, &link_up, false);
+
+	if (ret)
+		return ret;
+
+	speed = hw->phy.autoneg_advertised;
+	if (!speed && hw->mac.ops.get_link_capabilities) {
+		ret = hw->mac.ops.get_link_capabilities(hw, &speed,
+							&autoneg);
+		/* remove NBASE-T speeds from default autonegotiation
+		 * to accommodate broken network switches in the field
+		 * which cannot cope with advertised NBASE-T speeds
+		 */
+		speed &= ~(IXGBE_LINK_SPEED_5GB_FULL |
+			   IXGBE_LINK_SPEED_2_5GB_FULL);
+	}
+
+	if (ret)
+		return ret;
+
+	if (hw->mac.ops.setup_link)
+		ret = hw->mac.ops.setup_link(hw, speed, link_up);
+
+	return ret;
+}
+
+/**
+ * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset
+ * @adapter: board private structure
+ *
+ * On a reset we need to clear out the VF stats or accounting gets
+ * messed up because they're not clear on read.
+ **/
+static void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < adapter->num_vfs; i++) {
+		adapter->vfinfo[i].last_vfstats.gprc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gprc +=
+			adapter->vfinfo[i].vfstats.gprc;
+		adapter->vfinfo[i].vfstats.gprc = 0;
+		adapter->vfinfo[i].last_vfstats.gptc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gptc +=
+			adapter->vfinfo[i].vfstats.gptc;
+		adapter->vfinfo[i].vfstats.gptc = 0;
+		adapter->vfinfo[i].last_vfstats.gorc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gorc +=
+			adapter->vfinfo[i].vfstats.gorc;
+		adapter->vfinfo[i].vfstats.gorc = 0;
+		adapter->vfinfo[i].last_vfstats.gotc =
+			IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i));
+		adapter->vfinfo[i].saved_rst_vfstats.gotc +=
+			adapter->vfinfo[i].vfstats.gotc;
+		adapter->vfinfo[i].vfstats.gotc = 0;
+		adapter->vfinfo[i].last_vfstats.mprc =
+			IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i));
+		adapter->vfinfo[i].saved_rst_vfstats.mprc +=
+			adapter->vfinfo[i].vfstats.mprc;
+		adapter->vfinfo[i].vfstats.mprc = 0;
+	}
+}
+
+static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gpie = 0;
+
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
+		gpie = IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
+		       IXGBE_GPIE_OCD;
+		gpie |= IXGBE_GPIE_EIAME;
+		/*
+		 * use EIAM to auto-mask when MSI-X interrupt is asserted
+		 * this saves a register write for every interrupt
+		 */
+		switch (hw->mac.type) {
+		case ixgbe_mac_82598EB:
+			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
+			break;
+		case ixgbe_mac_82599EB:
+		case ixgbe_mac_X540:
+		case ixgbe_mac_X550:
+		case ixgbe_mac_X550EM_x:
+		case ixgbe_mac_x550em_a:
+		default:
+			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
+			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
+			break;
+		}
+	} else {
+		/* legacy interrupts, use EIAM to auto-mask when reading EICR,
+		 * specifically only auto mask tx and rx interrupts */
+		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
+	}
+
+	/* XXX: to interrupt immediately for EICS writes, enable this */
+	/* gpie |= IXGBE_GPIE_EIMEN; */
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		gpie &= ~IXGBE_GPIE_VTMODE_MASK;
+
+		switch (adapter->ring_feature[RING_F_VMDQ].mask) {
+		case IXGBE_82599_VMDQ_8Q_MASK:
+			gpie |= IXGBE_GPIE_VTMODE_16;
+			break;
+		case IXGBE_82599_VMDQ_4Q_MASK:
+			gpie |= IXGBE_GPIE_VTMODE_32;
+			break;
+		default:
+			gpie |= IXGBE_GPIE_VTMODE_64;
+			break;
+		}
+	}
+
+	/* Enable Thermal over heat sensor interrupt */
+	if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) {
+		switch (adapter->hw.mac.type) {
+		case ixgbe_mac_82599EB:
+			gpie |= IXGBE_SDP0_GPIEN_8259X;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Enable fan failure interrupt */
+	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE)
+		gpie |= IXGBE_SDP1_GPIEN(hw);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+		gpie |= IXGBE_SDP1_GPIEN_8259X | IXGBE_SDP2_GPIEN_8259X;
+		break;
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		gpie |= IXGBE_SDP0_GPIEN_X540;
+		break;
+	default:
+		break;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
+}
+
+static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+	u32 ctrl_ext;
+
+	ixgbe_get_hw_control(adapter);
+	ixgbe_setup_gpie(adapter);
+
+	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
+		ixgbe_configure_msix(adapter);
+	else
+		ixgbe_configure_msi_and_legacy(adapter);
+
+	/* enable the optics for 82599 SFP+ fiber */
+	if (hw->mac.ops.enable_tx_laser)
+		hw->mac.ops.enable_tx_laser(hw);
+
+	if (hw->phy.ops.set_phy_power)
+		hw->phy.ops.set_phy_power(hw, true);
+
+	smp_mb__before_atomic();
+	clear_bit(__IXGBE_DOWN, &adapter->state);
+	ixgbe_napi_enable_all(adapter);
+
+	if (ixgbe_is_sfp(hw)) {
+		ixgbe_sfp_link_config(adapter);
+	} else {
+		err = ixgbe_non_sfp_link_config(hw);
+		if (err)
+			e_err(probe, "link_config FAILED %d\n", err);
+	}
+
+	/* clear any pending interrupts, may auto mask */
+	IXGBE_READ_REG(hw, IXGBE_EICR);
+	ixgbe_irq_enable(adapter, true, true);
+
+	/*
+	 * If this adapter has a fan, check to see if we had a failure
+	 * before we enabled the interrupt.
+	 */
+	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
+		u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		if (esdp & IXGBE_ESDP_SDP1)
+			e_crit(drv, "Fan has stopped, replace the adapter\n");
+	}
+
+	/* bring the link up in the watchdog, this could race with our first
+	 * link up interrupt but shouldn't be a problem */
+	adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
+	adapter->link_check_timeout = jiffies;
+	mod_timer(&adapter->service_timer, jiffies);
+
+	ixgbe_clear_vf_stats_counters(adapter);
+	/* Set PF Reset Done bit so PF/VF Mail Ops can work */
+	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
+	ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
+
+	/* update setting rx tx for all active vfs */
+	ixgbe_set_all_vfs(adapter);
+}
+
+void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
+{
+	/* put off any impending NetWatchDogTimeout */
+	netif_trans_update(adapter->netdev);
+
+	while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+	if (adapter->hw.phy.type == ixgbe_phy_fw)
+		ixgbe_watchdog_link_is_down(adapter);
+	ixgbe_down(adapter);
+	/*
+	 * If SR-IOV enabled then wait a bit before bringing the adapter
+	 * back up to give the VFs time to respond to the reset.  The
+	 * two second wait is based upon the watchdog timer cycle in
+	 * the VF driver.
+	 */
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		msleep(2000);
+	ixgbe_up(adapter);
+	clear_bit(__IXGBE_RESETTING, &adapter->state);
+}
+
+void ixgbe_up(struct ixgbe_adapter *adapter)
+{
+	/* hardware has been reset, we need to reload some things */
+	ixgbe_configure(adapter);
+
+	ixgbe_up_complete(adapter);
+}
+
+static unsigned long ixgbe_get_completion_timeout(struct ixgbe_adapter *adapter)
+{
+	u16 devctl2;
+
+	pcie_capability_read_word(adapter->pdev, PCI_EXP_DEVCTL2, &devctl2);
+
+	switch (devctl2 & IXGBE_PCIDEVCTRL2_TIMEO_MASK) {
+	case IXGBE_PCIDEVCTRL2_17_34s:
+	case IXGBE_PCIDEVCTRL2_4_8s:
+		/* For now we cap the upper limit on delay to 2 seconds
+		 * as we end up going up to 34 seconds of delay in worst
+		 * case timeout value.
+		 */
+	case IXGBE_PCIDEVCTRL2_1_2s:
+		return 2000000ul;	/* 2.0 s */
+	case IXGBE_PCIDEVCTRL2_260_520ms:
+		return 520000ul;	/* 520 ms */
+	case IXGBE_PCIDEVCTRL2_65_130ms:
+		return 130000ul;	/* 130 ms */
+	case IXGBE_PCIDEVCTRL2_16_32ms:
+		return 32000ul;		/* 32 ms */
+	case IXGBE_PCIDEVCTRL2_1_2ms:
+		return 2000ul;		/* 2 ms */
+	case IXGBE_PCIDEVCTRL2_50_100us:
+		return 100ul;		/* 100 us */
+	case IXGBE_PCIDEVCTRL2_16_32ms_def:
+		return 32000ul;		/* 32 ms */
+	default:
+		break;
+	}
+
+	/* We shouldn't need to hit this path, but just in case default as
+	 * though completion timeout is not supported and support 32ms.
+	 */
+	return 32000ul;
+}
+
+void ixgbe_disable_rx(struct ixgbe_adapter *adapter)
+{
+	unsigned long wait_delay, delay_interval;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i, wait_loop;
+	u32 rxdctl;
+
+	/* disable receives */
+	hw->mac.ops.disable_rx(hw);
+
+	if (ixgbe_removed(hw->hw_addr))
+		return;
+
+	/* disable all enabled Rx queues */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *ring = adapter->rx_ring[i];
+		u8 reg_idx = ring->reg_idx;
+
+		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+		rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+		rxdctl |= IXGBE_RXDCTL_SWFLSH;
+
+		/* write value back with RXDCTL.ENABLE bit cleared */
+		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+	}
+
+	/* RXDCTL.EN may not change on 82598 if link is down, so skip it */
+	if (hw->mac.type == ixgbe_mac_82598EB &&
+	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+		return;
+
+	/* Determine our minimum delay interval. We will increase this value
+	 * with each subsequent test. This way if the device returns quickly
+	 * we should spend as little time as possible waiting, however as
+	 * the time increases we will wait for larger periods of time.
+	 *
+	 * The trick here is that we increase the interval using the
+	 * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result
+	 * of that wait is that it totals up to 100x whatever interval we
+	 * choose. Since our minimum wait is 100us we can just divide the
+	 * total timeout by 100 to get our minimum delay interval.
+	 */
+	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+	wait_loop = IXGBE_MAX_RX_DESC_POLL;
+	wait_delay = delay_interval;
+
+	while (wait_loop--) {
+		usleep_range(wait_delay, wait_delay + 10);
+		wait_delay += delay_interval * 2;
+		rxdctl = 0;
+
+		/* OR together the reading of all the active RXDCTL registers,
+		 * and then test the result. We need the disable to complete
+		 * before we start freeing the memory and invalidating the
+		 * DMA mappings.
+		 */
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			struct ixgbe_ring *ring = adapter->rx_ring[i];
+			u8 reg_idx = ring->reg_idx;
+
+			rxdctl |= IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+		}
+
+		if (!(rxdctl & IXGBE_RXDCTL_ENABLE))
+			return;
+	}
+
+	e_err(drv,
+	      "RXDCTL.ENABLE for one or more queues not cleared within the polling period\n");
+}
+
+void ixgbe_disable_tx(struct ixgbe_adapter *adapter)
+{
+	unsigned long wait_delay, delay_interval;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i, wait_loop;
+	u32 txdctl;
+
+	if (ixgbe_removed(hw->hw_addr))
+		return;
+
+	/* disable all enabled Tx queues */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct ixgbe_ring *ring = adapter->tx_ring[i];
+		u8 reg_idx = ring->reg_idx;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+	}
+
+	/* disable all enabled XDP Tx queues */
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *ring = adapter->xdp_ring[i];
+		u8 reg_idx = ring->reg_idx;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+	}
+
+	/* If the link is not up there shouldn't be much in the way of
+	 * pending transactions. Those that are left will be flushed out
+	 * when the reset logic goes through the flush sequence to clean out
+	 * the pending Tx transactions.
+	 */
+	if (!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+		goto dma_engine_disable;
+
+	/* Determine our minimum delay interval. We will increase this value
+	 * with each subsequent test. This way if the device returns quickly
+	 * we should spend as little time as possible waiting, however as
+	 * the time increases we will wait for larger periods of time.
+	 *
+	 * The trick here is that we increase the interval using the
+	 * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result
+	 * of that wait is that it totals up to 100x whatever interval we
+	 * choose. Since our minimum wait is 100us we can just divide the
+	 * total timeout by 100 to get our minimum delay interval.
+	 */
+	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+	wait_loop = IXGBE_MAX_RX_DESC_POLL;
+	wait_delay = delay_interval;
+
+	while (wait_loop--) {
+		usleep_range(wait_delay, wait_delay + 10);
+		wait_delay += delay_interval * 2;
+		txdctl = 0;
+
+		/* OR together the reading of all the active TXDCTL registers,
+		 * and then test the result. We need the disable to complete
+		 * before we start freeing the memory and invalidating the
+		 * DMA mappings.
+		 */
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			struct ixgbe_ring *ring = adapter->tx_ring[i];
+			u8 reg_idx = ring->reg_idx;
+
+			txdctl |= IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
+		}
+		for (i = 0; i < adapter->num_xdp_queues; i++) {
+			struct ixgbe_ring *ring = adapter->xdp_ring[i];
+			u8 reg_idx = ring->reg_idx;
+
+			txdctl |= IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
+		}
+
+		if (!(txdctl & IXGBE_TXDCTL_ENABLE))
+			goto dma_engine_disable;
+	}
+
+	e_err(drv,
+	      "TXDCTL.ENABLE for one or more queues not cleared within the polling period\n");
+
+dma_engine_disable:
+	/* Disable the Tx DMA engine on 82599 and later MAC */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
+				(IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
+				 ~IXGBE_DMATXCTL_TE));
+		fallthrough;
+	default:
+		break;
+	}
+}
+
+void ixgbe_reset(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	if (ixgbe_removed(hw->hw_addr))
+		return;
+	/* lock SFP init bit to prevent race conditions with the watchdog */
+	while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
+		usleep_range(1000, 2000);
+
+	/* clear all SFP and link config related flags while holding SFP_INIT */
+	adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP |
+			     IXGBE_FLAG2_SFP_NEEDS_RESET);
+	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
+
+	err = hw->mac.ops.init_hw(hw);
+	switch (err) {
+	case 0:
+	case IXGBE_ERR_SFP_NOT_PRESENT:
+	case IXGBE_ERR_SFP_NOT_SUPPORTED:
+		break;
+	case IXGBE_ERR_PRIMARY_REQUESTS_PENDING:
+		e_dev_err("primary disable timed out\n");
+		break;
+	case IXGBE_ERR_EEPROM_VERSION:
+		/* We are running on a pre-production device, log a warning */
+		e_dev_warn("This device is a pre-production adapter/LOM. "
+			   "Please be aware there may be issues associated with "
+			   "your hardware.  If you are experiencing problems "
+			   "please contact your Intel or hardware "
+			   "representative who provided you with this "
+			   "hardware.\n");
+		break;
+	default:
+		e_dev_err("Hardware Error: %d\n", err);
+	}
+
+	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
+
+	/* flush entries out of MAC table */
+	ixgbe_flush_sw_mac_table(adapter);
+	__dev_uc_unsync(netdev, NULL);
+
+	/* do not flush user set addresses */
+	ixgbe_mac_set_default_filter(adapter);
+
+	/* update SAN MAC vmdq pool selection */
+	if (hw->mac.san_mac_rar_index)
+		hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0));
+
+	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state))
+		ixgbe_ptp_reset(adapter);
+
+	if (hw->phy.ops.set_phy_power) {
+		if (!netif_running(adapter->netdev) && !adapter->wol)
+			hw->phy.ops.set_phy_power(hw, false);
+		else
+			hw->phy.ops.set_phy_power(hw, true);
+	}
+}
+
+/**
+ * ixgbe_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
+{
+	u16 i = tx_ring->next_to_clean;
+	struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	if (tx_ring->xsk_pool) {
+		ixgbe_xsk_clean_tx_ring(tx_ring);
+		goto out;
+	}
+
+	while (i != tx_ring->next_to_use) {
+		union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
+
+		/* Free all the Tx ring sk_buffs */
+		if (ring_is_xdp(tx_ring))
+			xdp_return_frame(tx_buffer->xdpf);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* check for eop_desc to determine the end of the packet */
+		eop_desc = tx_buffer->next_to_watch;
+		tx_desc = IXGBE_TX_DESC(tx_ring, i);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		i++;
+		if (unlikely(i == tx_ring->count)) {
+			i = 0;
+			tx_buffer = tx_ring->tx_buffer_info;
+		}
+	}
+
+	/* reset BQL for queue */
+	if (!ring_is_xdp(tx_ring))
+		netdev_tx_reset_queue(txring_txq(tx_ring));
+
+out:
+	/* reset next_to_use and next_to_clean */
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ * ixgbe_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void ixgbe_clean_all_rx_rings(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		ixgbe_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * ixgbe_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		ixgbe_clean_tx_ring(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbe_clean_tx_ring(adapter->xdp_ring[i]);
+}
+
+static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
+{
+	struct hlist_node *node2;
+	struct ixgbe_fdir_filter *filter;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	hlist_for_each_entry_safe(filter, node2,
+				  &adapter->fdir_filter_list, fdir_node) {
+		hlist_del(&filter->fdir_node);
+		kfree(filter);
+	}
+	adapter->fdir_filter_count = 0;
+
+	spin_unlock(&adapter->fdir_perfect_lock);
+}
+
+void ixgbe_down(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	/* signal that we are down to the interrupt handler */
+	if (test_and_set_bit(__IXGBE_DOWN, &adapter->state))
+		return; /* do nothing if already down */
+
+	/* Shut off incoming Tx traffic */
+	netif_tx_stop_all_queues(netdev);
+
+	/* call carrier off first to avoid false dev_watchdog timeouts */
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+
+	/* Disable Rx */
+	ixgbe_disable_rx(adapter);
+
+	/* synchronize_rcu() needed for pending XDP buffers to drain */
+	if (adapter->xdp_ring[0])
+		synchronize_rcu();
+
+	ixgbe_irq_disable(adapter);
+
+	ixgbe_napi_disable_all(adapter);
+
+	clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
+	adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
+	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
+
+	del_timer_sync(&adapter->service_timer);
+
+	if (adapter->num_vfs) {
+		/* Clear EITR Select mapping */
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0);
+
+		/* Mark all the VFs as inactive */
+		for (i = 0 ; i < adapter->num_vfs; i++)
+			adapter->vfinfo[i].clear_to_send = false;
+
+		/* update setting rx tx for all active vfs */
+		ixgbe_set_all_vfs(adapter);
+	}
+
+	/* disable transmits in the hardware now that interrupts are off */
+	ixgbe_disable_tx(adapter);
+
+	if (!pci_channel_offline(adapter->pdev))
+		ixgbe_reset(adapter);
+
+	/* power down the optics for 82599 SFP+ fiber */
+	if (hw->mac.ops.disable_tx_laser)
+		hw->mac.ops.disable_tx_laser(hw);
+
+	ixgbe_clean_all_tx_rings(adapter);
+	ixgbe_clean_all_rx_rings(adapter);
+}
+
+/**
+ * ixgbe_set_eee_capable - helper function to determine EEE support on X550
+ * @adapter: board private structure
+ */
+static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X550EM_A_1G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+		if (!hw->phy.eee_speeds_supported)
+			break;
+		adapter->flags2 |= IXGBE_FLAG2_EEE_CAPABLE;
+		if (!hw->phy.eee_speeds_advertised)
+			break;
+		adapter->flags2 |= IXGBE_FLAG2_EEE_ENABLED;
+		break;
+	default:
+		adapter->flags2 &= ~IXGBE_FLAG2_EEE_CAPABLE;
+		adapter->flags2 &= ~IXGBE_FLAG2_EEE_ENABLED;
+		break;
+	}
+}
+
+/**
+ * ixgbe_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: queue number that timed out
+ **/
+static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	/* Do the reset outside of interrupt context */
+	ixgbe_tx_timeout_reset(adapter);
+}
+
+#ifdef CONFIG_IXGBE_DCB
+static void ixgbe_init_dcb(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct tc_configuration *tc;
+	int j;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+	case ixgbe_mac_82599EB:
+		adapter->dcb_cfg.num_tcs.pg_tcs = MAX_TRAFFIC_CLASS;
+		adapter->dcb_cfg.num_tcs.pfc_tcs = MAX_TRAFFIC_CLASS;
+		break;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+		adapter->dcb_cfg.num_tcs.pg_tcs = X540_TRAFFIC_CLASS;
+		adapter->dcb_cfg.num_tcs.pfc_tcs = X540_TRAFFIC_CLASS;
+		break;
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+	default:
+		adapter->dcb_cfg.num_tcs.pg_tcs = DEF_TRAFFIC_CLASS;
+		adapter->dcb_cfg.num_tcs.pfc_tcs = DEF_TRAFFIC_CLASS;
+		break;
+	}
+
+	/* Configure DCB traffic classes */
+	for (j = 0; j < MAX_TRAFFIC_CLASS; j++) {
+		tc = &adapter->dcb_cfg.tc_config[j];
+		tc->path[DCB_TX_CONFIG].bwg_id = 0;
+		tc->path[DCB_TX_CONFIG].bwg_percent = 12 + (j & 1);
+		tc->path[DCB_RX_CONFIG].bwg_id = 0;
+		tc->path[DCB_RX_CONFIG].bwg_percent = 12 + (j & 1);
+		tc->dcb_pfc = pfc_disabled;
+	}
+
+	/* Initialize default user to priority mapping, UPx->TC0 */
+	tc = &adapter->dcb_cfg.tc_config[0];
+	tc->path[DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF;
+	tc->path[DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF;
+
+	adapter->dcb_cfg.bw_percentage[DCB_TX_CONFIG][0] = 100;
+	adapter->dcb_cfg.bw_percentage[DCB_RX_CONFIG][0] = 100;
+	adapter->dcb_cfg.pfc_mode_enable = false;
+	adapter->dcb_set_bitmap = 0x00;
+	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
+		adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE;
+	memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg,
+	       sizeof(adapter->temp_dcb_cfg));
+}
+#endif
+
+/**
+ * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter)
+ * @adapter: board private structure to initialize
+ * @ii: pointer to ixgbe_info for device
+ *
+ * ixgbe_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
+			 const struct ixgbe_info *ii)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned int rss, fdir;
+	u32 fwsm;
+	int i;
+
+	/* PCI config space info */
+
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->revision_id = pdev->revision;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	/* get_invariants needs the device IDs */
+	ii->get_invariants(hw);
+
+	/* Set common capability flags and settings */
+	rss = min_t(int, ixgbe_max_rss_indices(adapter), num_online_cpus());
+	adapter->ring_feature[RING_F_RSS].limit = rss;
+	adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE;
+	adapter->max_q_vectors = MAX_Q_VECTORS_82599;
+	adapter->atr_sample_rate = 20;
+	fdir = min_t(int, IXGBE_MAX_FDIR_INDICES, num_online_cpus());
+	adapter->ring_feature[RING_F_FDIR].limit = fdir;
+	adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K;
+	adapter->ring_feature[RING_F_VMDQ].limit = 1;
+#ifdef CONFIG_IXGBE_DCA
+	adapter->flags |= IXGBE_FLAG_DCA_CAPABLE;
+#endif
+#ifdef CONFIG_IXGBE_DCB
+	adapter->flags |= IXGBE_FLAG_DCB_CAPABLE;
+	adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
+#endif
+#ifdef IXGBE_FCOE
+	adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE;
+	adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+#ifdef CONFIG_IXGBE_DCB
+	/* Default traffic class to use for FCoE */
+	adapter->fcoe.up = IXGBE_FCOE_DEFTC;
+#endif /* CONFIG_IXGBE_DCB */
+#endif /* IXGBE_FCOE */
+
+	/* initialize static ixgbe jump table entries */
+	adapter->jump_tables[0] = kzalloc(sizeof(*adapter->jump_tables[0]),
+					  GFP_KERNEL);
+	if (!adapter->jump_tables[0])
+		return -ENOMEM;
+	adapter->jump_tables[0]->mat = ixgbe_ipv4_fields;
+
+	for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++)
+		adapter->jump_tables[i] = NULL;
+
+	adapter->mac_table = kcalloc(hw->mac.num_rar_entries,
+				     sizeof(struct ixgbe_mac_addr),
+				     GFP_KERNEL);
+	if (!adapter->mac_table)
+		return -ENOMEM;
+
+	if (ixgbe_init_rss_key(adapter))
+		return -ENOMEM;
+
+	adapter->af_xdp_zc_qps = bitmap_zalloc(IXGBE_MAX_XDP_QS, GFP_KERNEL);
+	if (!adapter->af_xdp_zc_qps)
+		return -ENOMEM;
+
+	/* Set MAC specific capability flags and exceptions */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE;
+
+		if (hw->device_id == IXGBE_DEV_ID_82598AT)
+			adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE;
+
+		adapter->max_q_vectors = MAX_Q_VECTORS_82598;
+		adapter->ring_feature[RING_F_FDIR].limit = 0;
+		adapter->atr_sample_rate = 0;
+		adapter->fdir_pballoc = 0;
+#ifdef IXGBE_FCOE
+		adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
+		adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED;
+#ifdef CONFIG_IXGBE_DCB
+		adapter->fcoe.up = 0;
+#endif /* IXGBE_DCB */
+#endif /* IXGBE_FCOE */
+		break;
+	case ixgbe_mac_82599EB:
+		if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM)
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+		break;
+	case ixgbe_mac_X540:
+		fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+		if (fwsm & IXGBE_FWSM_TS_ENABLED)
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+		break;
+	case ixgbe_mac_x550em_a:
+		switch (hw->device_id) {
+		case IXGBE_DEV_ID_X550EM_A_1G_T:
+		case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+			break;
+		default:
+			break;
+		}
+		fallthrough;
+	case ixgbe_mac_X550EM_x:
+#ifdef CONFIG_IXGBE_DCB
+		adapter->flags &= ~IXGBE_FLAG_DCB_CAPABLE;
+#endif
+#ifdef IXGBE_FCOE
+		adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
+#ifdef CONFIG_IXGBE_DCB
+		adapter->fcoe.up = 0;
+#endif /* IXGBE_DCB */
+#endif /* IXGBE_FCOE */
+		fallthrough;
+	case ixgbe_mac_X550:
+		if (hw->mac.type == ixgbe_mac_X550)
+			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
+#ifdef CONFIG_IXGBE_DCA
+		adapter->flags &= ~IXGBE_FLAG_DCA_CAPABLE;
+#endif
+		break;
+	default:
+		break;
+	}
+
+#ifdef IXGBE_FCOE
+	/* FCoE support exists, always init the FCoE lock */
+	spin_lock_init(&adapter->fcoe.lock);
+
+#endif
+	/* n-tuple support exists, always init our spinlock */
+	spin_lock_init(&adapter->fdir_perfect_lock);
+
+	/* init spinlock to avoid concurrency of VF resources */
+	spin_lock_init(&adapter->vfs_lock);
+
+#ifdef CONFIG_IXGBE_DCB
+	ixgbe_init_dcb(adapter);
+#endif
+	ixgbe_init_ipsec_offload(adapter);
+
+	/* default flow control settings */
+	hw->fc.requested_mode = ixgbe_fc_full;
+	hw->fc.current_mode = ixgbe_fc_full;	/* init for ethtool output */
+	ixgbe_pbthresh_setup(adapter);
+	hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE;
+	hw->fc.send_xon = true;
+	hw->fc.disable_fc_autoneg = ixgbe_device_supports_autoneg_fc(hw);
+
+#ifdef CONFIG_PCI_IOV
+	if (max_vfs > 0)
+		e_dev_warn("Enabling SR-IOV VFs using the max_vfs module parameter is deprecated - please use the pci sysfs interface instead.\n");
+
+	/* assign number of SR-IOV VFs */
+	if (hw->mac.type != ixgbe_mac_82598EB) {
+		if (max_vfs > IXGBE_MAX_VFS_DRV_LIMIT) {
+			max_vfs = 0;
+			e_dev_warn("max_vfs parameter out of range. Not assigning any SR-IOV VFs\n");
+		}
+	}
+#endif /* CONFIG_PCI_IOV */
+
+	/* enable itr by default in dynamic mode */
+	adapter->rx_itr_setting = 1;
+	adapter->tx_itr_setting = 1;
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IXGBE_DEFAULT_TXD;
+	adapter->rx_ring_count = IXGBE_DEFAULT_RXD;
+
+	/* set default work limits */
+	adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
+
+	/* initialize eeprom parameters */
+	if (ixgbe_init_eeprom_params_generic(hw)) {
+		e_dev_err("EEPROM initialization failed\n");
+		return -EIO;
+	}
+
+	/* PF holds first pool slot */
+	set_bit(0, adapter->fwd_bitmask);
+	set_bit(__IXGBE_DOWN, &adapter->state);
+
+	/* enable locking for XDP_TX if we have more CPUs than queues */
+	if (nr_cpu_ids > IXGBE_MAX_XDP_QS)
+		static_branch_enable(&ixgbe_xdp_locking_key);
+
+	return 0;
+}
+
+/**
+ * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring:    tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int orig_node = dev_to_node(dev);
+	int ring_node = NUMA_NO_NODE;
+	int size;
+
+	size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
+
+	if (tx_ring->q_vector)
+		ring_node = tx_ring->q_vector->numa_node;
+
+	tx_ring->tx_buffer_info = vmalloc_node(size, ring_node);
+	if (!tx_ring->tx_buffer_info)
+		tx_ring->tx_buffer_info = vmalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	set_dev_node(dev, ring_node);
+	tx_ring->desc = dma_alloc_coherent(dev,
+					   tx_ring->size,
+					   &tx_ring->dma,
+					   GFP_KERNEL);
+	set_dev_node(dev, orig_node);
+	if (!tx_ring->desc)
+		tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+						   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc)
+		goto err;
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * ixgbe_setup_all_tx_resources - allocate all queues Tx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
+{
+	int i, j = 0, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = ixgbe_setup_tx_resources(adapter->tx_ring[i]);
+		if (!err)
+			continue;
+
+		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
+		goto err_setup_tx;
+	}
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		err = ixgbe_setup_tx_resources(adapter->xdp_ring[j]);
+		if (!err)
+			continue;
+
+		e_err(probe, "Allocation for Tx Queue %u failed\n", j);
+		goto err_setup_tx;
+	}
+
+	return 0;
+err_setup_tx:
+	/* rewind the index freeing the rings as we go */
+	while (j--)
+		ixgbe_free_tx_resources(adapter->xdp_ring[j]);
+	while (i--)
+		ixgbe_free_tx_resources(adapter->tx_ring[i]);
+	return err;
+}
+
+static int ixgbe_rx_napi_id(struct ixgbe_ring *rx_ring)
+{
+	struct ixgbe_q_vector *q_vector = rx_ring->q_vector;
+
+	return q_vector ? q_vector->napi.napi_id : 0;
+}
+
+/**
+ * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: pointer to ixgbe_adapter
+ * @rx_ring:    rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
+			     struct ixgbe_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int orig_node = dev_to_node(dev);
+	int ring_node = NUMA_NO_NODE;
+	int size;
+
+	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
+
+	if (rx_ring->q_vector)
+		ring_node = rx_ring->q_vector->numa_node;
+
+	rx_ring->rx_buffer_info = vmalloc_node(size, ring_node);
+	if (!rx_ring->rx_buffer_info)
+		rx_ring->rx_buffer_info = vmalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	set_dev_node(dev, ring_node);
+	rx_ring->desc = dma_alloc_coherent(dev,
+					   rx_ring->size,
+					   &rx_ring->dma,
+					   GFP_KERNEL);
+	set_dev_node(dev, orig_node);
+	if (!rx_ring->desc)
+		rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+						   &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->desc)
+		goto err;
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+			     rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0)
+		goto err;
+
+	WRITE_ONCE(rx_ring->xdp_prog, adapter->xdp_prog);
+
+	return 0;
+err:
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * ixgbe_setup_all_rx_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]);
+		if (!err)
+			continue;
+
+		e_err(probe, "Allocation for Rx Queue %u failed\n", i);
+		goto err_setup_rx;
+	}
+
+#ifdef IXGBE_FCOE
+	err = ixgbe_setup_fcoe_ddp_resources(adapter);
+	if (!err)
+#endif
+		return 0;
+err_setup_rx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ixgbe_free_rx_resources(adapter->rx_ring[i]);
+	return err;
+}
+
+/**
+ * ixgbe_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring)
+{
+	ixgbe_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i]->desc)
+			ixgbe_free_tx_resources(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		if (adapter->xdp_ring[i]->desc)
+			ixgbe_free_tx_resources(adapter->xdp_ring[i]);
+}
+
+/**
+ * ixgbe_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
+{
+	ixgbe_clean_rx_ring(rx_ring);
+
+	rx_ring->xdp_prog = NULL;
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+#ifdef IXGBE_FCOE
+	ixgbe_free_fcoe_ddp_resources(adapter);
+
+#endif
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i]->desc)
+			ixgbe_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP
+ * @adapter: device handle, pointer to adapter
+ */
+static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter)
+{
+	if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
+		return IXGBE_RXBUFFER_2K;
+	else
+		return IXGBE_RXBUFFER_3K;
+}
+
+/**
+ * ixgbe_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (ixgbe_enabled_xdp_adapter(adapter)) {
+		int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD;
+
+		if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) {
+			e_warn(probe, "Requested MTU size is not supported with XDP\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * For 82599EB we cannot allow legacy VFs to enable their receive
+	 * paths when MTU greater than 1500 is configured.  So display a
+	 * warning that legacy VFs will be disabled.
+	 */
+	if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) &&
+	    (adapter->hw.mac.type == ixgbe_mac_82599EB) &&
+	    (new_mtu > ETH_DATA_LEN))
+		e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n");
+
+	netdev_dbg(netdev, "changing MTU from %d to %d\n",
+		   netdev->mtu, new_mtu);
+
+	/* must set new MTU before calling down or up */
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		ixgbe_reinit_locked(adapter);
+
+	return 0;
+}
+
+/**
+ * ixgbe_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+int ixgbe_open(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err, queues;
+
+	/* disallow open during test */
+	if (test_bit(__IXGBE_TESTING, &adapter->state))
+		return -EBUSY;
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = ixgbe_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = ixgbe_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	ixgbe_configure(adapter);
+
+	err = ixgbe_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Notify the stack of the actual queue counts. */
+	queues = adapter->num_tx_queues;
+	err = netif_set_real_num_tx_queues(netdev, queues);
+	if (err)
+		goto err_set_queues;
+
+	queues = adapter->num_rx_queues;
+	err = netif_set_real_num_rx_queues(netdev, queues);
+	if (err)
+		goto err_set_queues;
+
+	ixgbe_ptp_init(adapter);
+
+	ixgbe_up_complete(adapter);
+
+	udp_tunnel_nic_reset_ntf(netdev);
+
+	return 0;
+
+err_set_queues:
+	ixgbe_free_irq(adapter);
+err_req_irq:
+	ixgbe_free_all_rx_resources(adapter);
+	if (hw->phy.ops.set_phy_power && !adapter->wol)
+		hw->phy.ops.set_phy_power(&adapter->hw, false);
+err_setup_rx:
+	ixgbe_free_all_tx_resources(adapter);
+err_setup_tx:
+	ixgbe_reset(adapter);
+
+	return err;
+}
+
+static void ixgbe_close_suspend(struct ixgbe_adapter *adapter)
+{
+	ixgbe_ptp_suspend(adapter);
+
+	if (adapter->hw.phy.ops.enter_lplu) {
+		adapter->hw.phy.reset_disable = true;
+		ixgbe_down(adapter);
+		adapter->hw.phy.ops.enter_lplu(&adapter->hw);
+		adapter->hw.phy.reset_disable = false;
+	} else {
+		ixgbe_down(adapter);
+	}
+
+	ixgbe_free_irq(adapter);
+
+	ixgbe_free_all_tx_resources(adapter);
+	ixgbe_free_all_rx_resources(adapter);
+}
+
+/**
+ * ixgbe_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+int ixgbe_close(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	ixgbe_ptp_stop(adapter);
+
+	if (netif_device_present(netdev))
+		ixgbe_close_suspend(adapter);
+
+	ixgbe_fdir_filter_exit(adapter);
+
+	ixgbe_release_hw_control(adapter);
+
+	return 0;
+}
+
+static int __maybe_unused ixgbe_resume(struct device *dev_d)
+{
+	struct pci_dev *pdev = to_pci_dev(dev_d);
+	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev = adapter->netdev;
+	u32 err;
+
+	adapter->hw.hw_addr = adapter->io_addr;
+
+	err = pci_enable_device_mem(pdev);
+	if (err) {
+		e_dev_err("Cannot enable PCI device from suspend\n");
+		return err;
+	}
+	smp_mb__before_atomic();
+	clear_bit(__IXGBE_DISABLED, &adapter->state);
+	pci_set_master(pdev);
+
+	device_wakeup_disable(dev_d);
+
+	ixgbe_reset(adapter);
+
+	IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
+
+	rtnl_lock();
+	err = ixgbe_init_interrupt_scheme(adapter);
+	if (!err && netif_running(netdev))
+		err = ixgbe_open(netdev);
+
+
+	if (!err)
+		netif_device_attach(netdev);
+	rtnl_unlock();
+
+	return err;
+}
+
+static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake)
+{
+	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 ctrl;
+	u32 wufc = adapter->wol;
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_close_suspend(adapter);
+
+	ixgbe_clear_interrupt_scheme(adapter);
+	rtnl_unlock();
+
+	if (hw->mac.ops.stop_link_on_d3)
+		hw->mac.ops.stop_link_on_d3(hw);
+
+	if (wufc) {
+		u32 fctrl;
+
+		ixgbe_set_rx_mode(netdev);
+
+		/* enable the optics for 82599 SFP+ fiber as we can WoL */
+		if (hw->mac.ops.enable_tx_laser)
+			hw->mac.ops.enable_tx_laser(hw);
+
+		/* enable the reception of multicast packets */
+		fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+		fctrl |= IXGBE_FCTRL_MPE;
+		IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
+
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		ctrl |= IXGBE_CTRL_GIO_DIS;
+		IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+
+		IXGBE_WRITE_REG(hw, IXGBE_WUFC, wufc);
+	} else {
+		IXGBE_WRITE_REG(hw, IXGBE_WUC, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0);
+	}
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		pci_wake_from_d3(pdev, false);
+		break;
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		pci_wake_from_d3(pdev, !!wufc);
+		break;
+	default:
+		break;
+	}
+
+	*enable_wake = !!wufc;
+	if (hw->phy.ops.set_phy_power && !*enable_wake)
+		hw->phy.ops.set_phy_power(hw, false);
+
+	ixgbe_release_hw_control(adapter);
+
+	if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state))
+		pci_disable_device(pdev);
+
+	return 0;
+}
+
+static int __maybe_unused ixgbe_suspend(struct device *dev_d)
+{
+	struct pci_dev *pdev = to_pci_dev(dev_d);
+	int retval;
+	bool wake;
+
+	retval = __ixgbe_shutdown(pdev, &wake);
+
+	device_set_wakeup_enable(dev_d, wake);
+
+	return retval;
+}
+
+static void ixgbe_shutdown(struct pci_dev *pdev)
+{
+	bool wake;
+
+	__ixgbe_shutdown(pdev, &wake);
+
+	if (system_state == SYSTEM_POWER_OFF) {
+		pci_wake_from_d3(pdev, wake);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
+}
+
+/**
+ * ixgbe_update_stats - Update the board statistics counters.
+ * @adapter: board private structure
+ **/
+void ixgbe_update_stats(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_hw_stats *hwstats = &adapter->stats;
+	u64 total_mpc = 0;
+	u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
+	u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
+	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+	u64 alloc_rx_page = 0;
+	u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
+
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state))
+		return;
+
+	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) {
+		u64 rsc_count = 0;
+		u64 rsc_flush = 0;
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count;
+			rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush;
+		}
+		adapter->rsc_total_count = rsc_count;
+		adapter->rsc_total_flush = rsc_flush;
+	}
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *rx_ring = READ_ONCE(adapter->rx_ring[i]);
+
+		if (!rx_ring)
+			continue;
+		non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+		alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
+		alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
+		alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
+		hw_csum_rx_error += rx_ring->rx_stats.csum_err;
+		bytes += rx_ring->stats.bytes;
+		packets += rx_ring->stats.packets;
+	}
+	adapter->non_eop_descs = non_eop_descs;
+	adapter->alloc_rx_page = alloc_rx_page;
+	adapter->alloc_rx_page_failed = alloc_rx_page_failed;
+	adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
+	adapter->hw_csum_rx_error = hw_csum_rx_error;
+	netdev->stats.rx_bytes = bytes;
+	netdev->stats.rx_packets = packets;
+
+	bytes = 0;
+	packets = 0;
+	/* gather some stats to the adapter struct that are per queue */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct ixgbe_ring *tx_ring = READ_ONCE(adapter->tx_ring[i]);
+
+		if (!tx_ring)
+			continue;
+		restart_queue += tx_ring->tx_stats.restart_queue;
+		tx_busy += tx_ring->tx_stats.tx_busy;
+		bytes += tx_ring->stats.bytes;
+		packets += tx_ring->stats.packets;
+	}
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *xdp_ring = READ_ONCE(adapter->xdp_ring[i]);
+
+		if (!xdp_ring)
+			continue;
+		restart_queue += xdp_ring->tx_stats.restart_queue;
+		tx_busy += xdp_ring->tx_stats.tx_busy;
+		bytes += xdp_ring->stats.bytes;
+		packets += xdp_ring->stats.packets;
+	}
+	adapter->restart_queue = restart_queue;
+	adapter->tx_busy = tx_busy;
+	netdev->stats.tx_bytes = bytes;
+	netdev->stats.tx_packets = packets;
+
+	hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
+
+	/* 8 register reads */
+	for (i = 0; i < 8; i++) {
+		/* for packet buffers not used, the register should read 0 */
+		mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i));
+		missed_rx += mpc;
+		hwstats->mpc[i] += mpc;
+		total_mpc += hwstats->mpc[i];
+		hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
+		hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
+		switch (hw->mac.type) {
+		case ixgbe_mac_82598EB:
+			hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i));
+			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i));
+			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i));
+			hwstats->pxonrxc[i] +=
+				IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
+			break;
+		case ixgbe_mac_82599EB:
+		case ixgbe_mac_X540:
+		case ixgbe_mac_X550:
+		case ixgbe_mac_X550EM_x:
+		case ixgbe_mac_x550em_a:
+			hwstats->pxonrxc[i] +=
+				IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
+			break;
+		default:
+			break;
+		}
+	}
+
+	/*16 register reads */
+	for (i = 0; i < 16; i++) {
+		hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
+		hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
+		if ((hw->mac.type == ixgbe_mac_82599EB) ||
+		    (hw->mac.type == ixgbe_mac_X540) ||
+		    (hw->mac.type == ixgbe_mac_X550) ||
+		    (hw->mac.type == ixgbe_mac_X550EM_x) ||
+		    (hw->mac.type == ixgbe_mac_x550em_a)) {
+			hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */
+			hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i));
+			IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */
+		}
+	}
+
+	hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
+	/* work around hardware counting issue */
+	hwstats->gprc -= missed_rx;
+
+	ixgbe_update_xoff_received(adapter);
+
+	/* 82598 hardware only has a 32 bit counter in the high register */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
+		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
+		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
+		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH);
+		break;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		/* OS2BMC stats are X540 and later */
+		hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC);
+		hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC);
+		hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC);
+		hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC);
+		fallthrough;
+	case ixgbe_mac_82599EB:
+		for (i = 0; i < 16; i++)
+			adapter->hw_rx_no_dma_resources +=
+					     IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
+		hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL);
+		IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */
+		hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL);
+		IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */
+		hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL);
+		IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */
+		hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
+		hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH);
+		hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS);
+#ifdef IXGBE_FCOE
+		hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
+		hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
+		hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
+		hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
+		hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
+		hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
+		/* Add up per cpu counters for total ddp aloc fail */
+		if (adapter->fcoe.ddp_pool) {
+			struct ixgbe_fcoe *fcoe = &adapter->fcoe;
+			struct ixgbe_fcoe_ddp_pool *ddp_pool;
+			unsigned int cpu;
+			u64 noddp = 0, noddp_ext_buff = 0;
+			for_each_possible_cpu(cpu) {
+				ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu);
+				noddp += ddp_pool->noddp;
+				noddp_ext_buff += ddp_pool->noddp_ext_buff;
+			}
+			hwstats->fcoe_noddp = noddp;
+			hwstats->fcoe_noddp_ext_buff = noddp_ext_buff;
+		}
+#endif /* IXGBE_FCOE */
+		break;
+	default:
+		break;
+	}
+	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
+	hwstats->bprc += bprc;
+	hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		hwstats->mprc -= bprc;
+	hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC);
+	hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
+	hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
+	hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
+	hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
+	hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
+	hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
+	hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
+	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
+	hwstats->lxontxc += lxon;
+	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
+	hwstats->lxofftxc += lxoff;
+	hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
+	hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
+	/*
+	 * 82598 errata - tx of flow control packets is included in tx counters
+	 */
+	xon_off_tot = lxon + lxoff;
+	hwstats->gptc -= xon_off_tot;
+	hwstats->mptc -= xon_off_tot;
+	hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN));
+	hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
+	hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
+	hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
+	hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
+	hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
+	hwstats->ptc64 -= xon_off_tot;
+	hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
+	hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
+	hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
+	hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
+	hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
+	hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
+
+	/* Fill out the OS statistics structure */
+	netdev->stats.multicast = hwstats->mprc;
+
+	/* Rx Errors */
+	netdev->stats.rx_errors = hwstats->crcerrs + hwstats->rlec;
+	netdev->stats.rx_dropped = 0;
+	netdev->stats.rx_length_errors = hwstats->rlec;
+	netdev->stats.rx_crc_errors = hwstats->crcerrs;
+	netdev->stats.rx_missed_errors = total_mpc;
+
+	/* VF Stats Collection - skip while resetting because these
+	 * are not clear on read and otherwise you'll sometimes get
+	 * crazy values.
+	 */
+	if (!test_bit(__IXGBE_RESETTING, &adapter->state)) {
+		for (i = 0; i < adapter->num_vfs; i++) {
+			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i),
+						adapter->vfinfo[i].last_vfstats.gprc,
+						adapter->vfinfo[i].vfstats.gprc);
+			UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i),
+						adapter->vfinfo[i].last_vfstats.gptc,
+						adapter->vfinfo[i].vfstats.gptc);
+			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i),
+						IXGBE_PVFGORC_MSB(i),
+						adapter->vfinfo[i].last_vfstats.gorc,
+						adapter->vfinfo[i].vfstats.gorc);
+			UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i),
+						IXGBE_PVFGOTC_MSB(i),
+						adapter->vfinfo[i].last_vfstats.gotc,
+						adapter->vfinfo[i].vfstats.gotc);
+			UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i),
+						adapter->vfinfo[i].last_vfstats.mprc,
+						adapter->vfinfo[i].vfstats.mprc);
+		}
+	}
+}
+
+/**
+ * ixgbe_fdir_reinit_subtask - worker thread to reinit FDIR filter table
+ * @adapter: pointer to the device adapter structure
+ **/
+static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT))
+		return;
+
+	adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
+
+	/* if interface is down do nothing */
+	if (test_bit(__IXGBE_DOWN, &adapter->state))
+		return;
+
+	/* do nothing if we are not using signature filters */
+	if (!(adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE))
+		return;
+
+	adapter->fdir_overflow++;
+
+	if (ixgbe_reinit_fdir_tables_82599(hw) == 0) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			set_bit(__IXGBE_TX_FDIR_INIT_DONE,
+				&(adapter->tx_ring[i]->state));
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			set_bit(__IXGBE_TX_FDIR_INIT_DONE,
+				&adapter->xdp_ring[i]->state);
+		/* re-enable flow director interrupts */
+		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
+	} else {
+		e_err(probe, "failed to finish FDIR re-initialization, "
+		      "ignored adding FDIR ATR filters\n");
+	}
+}
+
+/**
+ * ixgbe_check_hang_subtask - check for hung queues and dropped interrupts
+ * @adapter: pointer to the device adapter structure
+ *
+ * This function serves two purposes.  First it strobes the interrupt lines
+ * in order to make certain interrupts are occurring.  Secondly it sets the
+ * bits needed to check for TX hangs.  As a result we should immediately
+ * determine if a hang has occurred.
+ */
+static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 eics = 0;
+	int i;
+
+	/* If we're down, removing or resetting, just bail */
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_REMOVING, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state))
+		return;
+
+	/* Force detection of hung controller */
+	if (netif_carrier_ok(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			set_check_for_tx_hang(adapter->tx_ring[i]);
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			set_check_for_tx_hang(adapter->xdp_ring[i]);
+	}
+
+	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
+		/*
+		 * for legacy and MSI interrupts don't set any bits
+		 * that are enabled for EIAM, because this operation
+		 * would set *both* EIMS and EICS for any bit in EIAM
+		 */
+		IXGBE_WRITE_REG(hw, IXGBE_EICS,
+			(IXGBE_EICS_TCP_TIMER | IXGBE_EICS_OTHER));
+	} else {
+		/* get one bit for every active tx/rx interrupt vector */
+		for (i = 0; i < adapter->num_q_vectors; i++) {
+			struct ixgbe_q_vector *qv = adapter->q_vector[i];
+			if (qv->rx.ring || qv->tx.ring)
+				eics |= BIT_ULL(i);
+		}
+	}
+
+	/* Cause software interrupt to ensure rings are cleaned */
+	ixgbe_irq_rearm_queues(adapter, eics);
+}
+
+/**
+ * ixgbe_watchdog_update_link - update the link status
+ * @adapter: pointer to the device adapter structure
+ **/
+static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 link_speed = adapter->link_speed;
+	bool link_up = adapter->link_up;
+	bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+
+	if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE))
+		return;
+
+	if (hw->mac.ops.check_link) {
+		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+	} else {
+		/* always assume link is up, if no check link function */
+		link_speed = IXGBE_LINK_SPEED_10GB_FULL;
+		link_up = true;
+	}
+
+	if (adapter->ixgbe_ieee_pfc)
+		pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
+
+	if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) {
+		hw->mac.ops.fc_enable(hw);
+		ixgbe_set_rx_drop_en(adapter);
+	}
+
+	if (link_up ||
+	    time_after(jiffies, (adapter->link_check_timeout +
+				 IXGBE_TRY_LINK_TIMEOUT))) {
+		adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
+		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	adapter->link_up = link_up;
+	adapter->link_speed = link_speed;
+}
+
+static void ixgbe_update_default_up(struct ixgbe_adapter *adapter)
+{
+#ifdef CONFIG_IXGBE_DCB
+	struct net_device *netdev = adapter->netdev;
+	struct dcb_app app = {
+			      .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE,
+			      .protocol = 0,
+			     };
+	u8 up = 0;
+
+	if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)
+		up = dcb_ieee_getapp_mask(netdev, &app);
+
+	adapter->default_up = (up > 1) ? (ffs(up) - 1) : 0;
+#endif
+}
+
+/**
+ * ixgbe_watchdog_link_is_up - update netif_carrier status and
+ *                             print link up message
+ * @adapter: pointer to the device adapter structure
+ **/
+static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 link_speed = adapter->link_speed;
+	const char *speed_str;
+	bool flow_rx, flow_tx;
+
+	/* only continue if link was previously down */
+	if (netif_carrier_ok(netdev))
+		return;
+
+	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB: {
+		u32 frctl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+		u32 rmcs = IXGBE_READ_REG(hw, IXGBE_RMCS);
+		flow_rx = !!(frctl & IXGBE_FCTRL_RFCE);
+		flow_tx = !!(rmcs & IXGBE_RMCS_TFCE_802_3X);
+	}
+		break;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+	case ixgbe_mac_82599EB: {
+		u32 mflcn = IXGBE_READ_REG(hw, IXGBE_MFLCN);
+		u32 fccfg = IXGBE_READ_REG(hw, IXGBE_FCCFG);
+		flow_rx = !!(mflcn & IXGBE_MFLCN_RFCE);
+		flow_tx = !!(fccfg & IXGBE_FCCFG_TFCE_802_3X);
+	}
+		break;
+	default:
+		flow_tx = false;
+		flow_rx = false;
+		break;
+	}
+
+	adapter->last_rx_ptp_check = jiffies;
+
+	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state))
+		ixgbe_ptp_start_cyclecounter(adapter);
+
+	switch (link_speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		speed_str = "10 Gbps";
+		break;
+	case IXGBE_LINK_SPEED_5GB_FULL:
+		speed_str = "5 Gbps";
+		break;
+	case IXGBE_LINK_SPEED_2_5GB_FULL:
+		speed_str = "2.5 Gbps";
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		speed_str = "1 Gbps";
+		break;
+	case IXGBE_LINK_SPEED_100_FULL:
+		speed_str = "100 Mbps";
+		break;
+	case IXGBE_LINK_SPEED_10_FULL:
+		speed_str = "10 Mbps";
+		break;
+	default:
+		speed_str = "unknown speed";
+		break;
+	}
+	e_info(drv, "NIC Link is Up %s, Flow Control: %s\n", speed_str,
+	       ((flow_rx && flow_tx) ? "RX/TX" :
+	       (flow_rx ? "RX" :
+	       (flow_tx ? "TX" : "None"))));
+
+	netif_carrier_on(netdev);
+	ixgbe_check_vf_rate_limit(adapter);
+
+	/* enable transmits */
+	netif_tx_wake_all_queues(adapter->netdev);
+
+	/* update the default user priority for VFs */
+	ixgbe_update_default_up(adapter);
+
+	/* ping all the active vfs to let them know link has changed */
+	ixgbe_ping_all_vfs(adapter);
+}
+
+/**
+ * ixgbe_watchdog_link_is_down - update netif_carrier status and
+ *                               print link down message
+ * @adapter: pointer to the adapter structure
+ **/
+static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	adapter->link_up = false;
+	adapter->link_speed = 0;
+
+	/* only continue if link was up previously */
+	if (!netif_carrier_ok(netdev))
+		return;
+
+	/* poll for SFP+ cable when link is down */
+	if (ixgbe_is_sfp(hw) && hw->mac.type == ixgbe_mac_82598EB)
+		adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP;
+
+	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state))
+		ixgbe_ptp_start_cyclecounter(adapter);
+
+	e_info(drv, "NIC Link is Down\n");
+	netif_carrier_off(netdev);
+
+	/* ping all the active vfs to let them know link has changed */
+	ixgbe_ping_all_vfs(adapter);
+}
+
+static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
+
+		if (tx_ring->next_to_use != tx_ring->next_to_clean)
+			return true;
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *ring = adapter->xdp_ring[i];
+
+		if (ring->next_to_use != ring->next_to_clean)
+			return true;
+	}
+
+	return false;
+}
+
+static bool ixgbe_vf_tx_pending(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+
+	int i, j;
+
+	if (!adapter->num_vfs)
+		return false;
+
+	/* resetting the PF is only needed for MAC before X550 */
+	if (hw->mac.type >= ixgbe_mac_X550)
+		return false;
+
+	for (i = 0; i < adapter->num_vfs; i++) {
+		for (j = 0; j < q_per_pool; j++) {
+			u32 h, t;
+
+			h = IXGBE_READ_REG(hw, IXGBE_PVFTDHN(q_per_pool, i, j));
+			t = IXGBE_READ_REG(hw, IXGBE_PVFTDTN(q_per_pool, i, j));
+
+			if (h != t)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+/**
+ * ixgbe_watchdog_flush_tx - flush queues on link down
+ * @adapter: pointer to the device adapter structure
+ **/
+static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter)
+{
+	if (!netif_carrier_ok(adapter->netdev)) {
+		if (ixgbe_ring_tx_pending(adapter) ||
+		    ixgbe_vf_tx_pending(adapter)) {
+			/* We've lost link, so the controller stops DMA,
+			 * but we've got queued Tx work that's never going
+			 * to get done, so reset controller to flush Tx.
+			 * (Do the reset outside of interrupt context).
+			 */
+			e_warn(drv, "initiating reset to clear Tx work after link loss\n");
+			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
+		}
+	}
+}
+
+#ifdef CONFIG_PCI_IOV
+static void ixgbe_bad_vf_abort(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB &&
+	    adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) {
+		adapter->vfinfo[vf].primary_abort_count++;
+		if (adapter->vfinfo[vf].primary_abort_count ==
+		    IXGBE_PRIMARY_ABORT_LIMIT) {
+			ixgbe_set_vf_link_state(adapter, vf,
+						IFLA_VF_LINK_STATE_DISABLE);
+			adapter->vfinfo[vf].primary_abort_count = 0;
+
+			e_info(drv,
+			       "Malicious Driver Detection event detected on PF %d VF %d MAC: %pM mdd-disable-vf=on",
+			       hw->bus.func, vf,
+			       adapter->vfinfo[vf].vf_mac_addresses);
+		}
+	}
+}
+
+static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	unsigned int vf;
+	u32 gpc;
+
+	if (!(netif_carrier_ok(adapter->netdev)))
+		return;
+
+	gpc = IXGBE_READ_REG(hw, IXGBE_TXDGPC);
+	if (gpc) /* If incrementing then no need for the check below */
+		return;
+	/* Check to see if a bad DMA write target from an errant or
+	 * malicious VF has caused a PCIe error.  If so then we can
+	 * issue a VFLR to the offending VF(s) and then resume without
+	 * requesting a full slot reset.
+	 */
+
+	if (!pdev)
+		return;
+
+	/* check status reg for all VFs owned by this PF */
+	for (vf = 0; vf < adapter->num_vfs; ++vf) {
+		struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+		u16 status_reg;
+
+		if (!vfdev)
+			continue;
+		pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
+		if (status_reg != IXGBE_FAILED_READ_CFG_WORD &&
+		    status_reg & PCI_STATUS_REC_MASTER_ABORT) {
+			ixgbe_bad_vf_abort(adapter, vf);
+			pcie_flr(vfdev);
+		}
+	}
+}
+
+static void ixgbe_spoof_check(struct ixgbe_adapter *adapter)
+{
+	u32 ssvpc;
+
+	/* Do not perform spoof check for 82598 or if not in IOV mode */
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
+	    adapter->num_vfs == 0)
+		return;
+
+	ssvpc = IXGBE_READ_REG(&adapter->hw, IXGBE_SSVPC);
+
+	/*
+	 * ssvpc register is cleared on read, if zero then no
+	 * spoofed packets in the last interval.
+	 */
+	if (!ssvpc)
+		return;
+
+	e_warn(drv, "%u Spoofed packets detected\n", ssvpc);
+}
+#else
+static void ixgbe_spoof_check(struct ixgbe_adapter __always_unused *adapter)
+{
+}
+
+static void
+ixgbe_check_for_bad_vf(struct ixgbe_adapter __always_unused *adapter)
+{
+}
+#endif /* CONFIG_PCI_IOV */
+
+
+/**
+ * ixgbe_watchdog_subtask - check and bring link up
+ * @adapter: pointer to the device adapter structure
+ **/
+static void ixgbe_watchdog_subtask(struct ixgbe_adapter *adapter)
+{
+	/* if interface is down, removing or resetting, do nothing */
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_REMOVING, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state))
+		return;
+
+	ixgbe_watchdog_update_link(adapter);
+
+	if (adapter->link_up)
+		ixgbe_watchdog_link_is_up(adapter);
+	else
+		ixgbe_watchdog_link_is_down(adapter);
+
+	ixgbe_check_for_bad_vf(adapter);
+	ixgbe_spoof_check(adapter);
+	ixgbe_update_stats(adapter);
+
+	ixgbe_watchdog_flush_tx(adapter);
+}
+
+/**
+ * ixgbe_sfp_detection_subtask - poll for SFP+ cable
+ * @adapter: the ixgbe adapter structure
+ **/
+static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	s32 err;
+
+	/* not searching for SFP so there is nothing to do here */
+	if (!(adapter->flags2 & IXGBE_FLAG2_SEARCH_FOR_SFP) &&
+	    !(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET))
+		return;
+
+	if (adapter->sfp_poll_time &&
+	    time_after(adapter->sfp_poll_time, jiffies))
+		return; /* If not yet time to poll for SFP */
+
+	/* someone else is in init, wait until next service event */
+	if (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
+		return;
+
+	adapter->sfp_poll_time = jiffies + IXGBE_SFP_POLL_JIFFIES - 1;
+
+	err = hw->phy.ops.identify_sfp(hw);
+	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto sfp_out;
+
+	if (err == IXGBE_ERR_SFP_NOT_PRESENT) {
+		/* If no cable is present, then we need to reset
+		 * the next time we find a good cable. */
+		adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
+	}
+
+	/* exit on error */
+	if (err)
+		goto sfp_out;
+
+	/* exit if reset not needed */
+	if (!(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET))
+		goto sfp_out;
+
+	adapter->flags2 &= ~IXGBE_FLAG2_SFP_NEEDS_RESET;
+
+	/*
+	 * A module may be identified correctly, but the EEPROM may not have
+	 * support for that module.  setup_sfp() will fail in that case, so
+	 * we should not allow that module to load.
+	 */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		err = hw->phy.ops.reset(hw);
+	else
+		err = hw->mac.ops.setup_sfp(hw);
+
+	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		goto sfp_out;
+
+	adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG;
+	e_info(probe, "detected SFP+: %d\n", hw->phy.sfp_type);
+
+sfp_out:
+	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
+
+	if ((err == IXGBE_ERR_SFP_NOT_SUPPORTED) &&
+	    (adapter->netdev->reg_state == NETREG_REGISTERED)) {
+		e_dev_err("failed to initialize because an unsupported "
+			  "SFP+ module type was detected.\n");
+		e_dev_err("Reload the driver after installing a "
+			  "supported module.\n");
+		unregister_netdev(adapter->netdev);
+	}
+}
+
+/**
+ * ixgbe_sfp_link_config_subtask - set up link SFP after module install
+ * @adapter: the ixgbe adapter structure
+ **/
+static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 cap_speed;
+	u32 speed;
+	bool autoneg = false;
+
+	if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_CONFIG))
+		return;
+
+	/* someone else is in init, wait until next service event */
+	if (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
+		return;
+
+	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
+
+	hw->mac.ops.get_link_capabilities(hw, &cap_speed, &autoneg);
+
+	/* advertise highest capable link speed */
+	if (!autoneg && (cap_speed & IXGBE_LINK_SPEED_10GB_FULL))
+		speed = IXGBE_LINK_SPEED_10GB_FULL;
+	else
+		speed = cap_speed & (IXGBE_LINK_SPEED_10GB_FULL |
+				     IXGBE_LINK_SPEED_1GB_FULL);
+
+	if (hw->mac.ops.setup_link)
+		hw->mac.ops.setup_link(hw, speed, true);
+
+	adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
+	adapter->link_check_timeout = jiffies;
+	clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
+}
+
+/**
+ * ixgbe_service_timer - Timer Call-back
+ * @t: pointer to timer_list structure
+ **/
+static void ixgbe_service_timer(struct timer_list *t)
+{
+	struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer);
+	unsigned long next_event_offset;
+
+	/* poll faster when waiting for link */
+	if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)
+		next_event_offset = HZ / 10;
+	else
+		next_event_offset = HZ * 2;
+
+	/* Reset the timer */
+	mod_timer(&adapter->service_timer, next_event_offset + jiffies);
+
+	ixgbe_service_event_schedule(adapter);
+}
+
+static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 status;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_PHY_INTERRUPT))
+		return;
+
+	adapter->flags2 &= ~IXGBE_FLAG2_PHY_INTERRUPT;
+
+	if (!hw->phy.ops.handle_lasi)
+		return;
+
+	status = hw->phy.ops.handle_lasi(&adapter->hw);
+	if (status != IXGBE_ERR_OVERTEMP)
+		return;
+
+	e_crit(drv, "%s\n", ixgbe_overheat_msg);
+}
+
+static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter)
+{
+	if (!test_and_clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state))
+		return;
+
+	rtnl_lock();
+	/* If we're already down, removing or resetting, just bail */
+	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
+	    test_bit(__IXGBE_REMOVING, &adapter->state) ||
+	    test_bit(__IXGBE_RESETTING, &adapter->state)) {
+		rtnl_unlock();
+		return;
+	}
+
+	ixgbe_dump(adapter);
+	netdev_err(adapter->netdev, "Reset adapter\n");
+	adapter->tx_timeout_count++;
+
+	ixgbe_reinit_locked(adapter);
+	rtnl_unlock();
+}
+
+/**
+ * ixgbe_check_fw_error - Check firmware for errors
+ * @adapter: the adapter private structure
+ *
+ * Check firmware errors in register FWSM
+ */
+static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 fwsm;
+
+	/* read fwsm.ext_err_ind register and log errors */
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+
+	if (fwsm & IXGBE_FWSM_EXT_ERR_IND_MASK ||
+	    !(fwsm & IXGBE_FWSM_FW_VAL_BIT))
+		e_dev_warn("Warning firmware error detected FWSM: 0x%08X\n",
+			   fwsm);
+
+	if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) {
+		e_dev_err("Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * ixgbe_service_task - manages and runs subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void ixgbe_service_task(struct work_struct *work)
+{
+	struct ixgbe_adapter *adapter = container_of(work,
+						     struct ixgbe_adapter,
+						     service_task);
+	if (ixgbe_removed(adapter->hw.hw_addr)) {
+		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
+			rtnl_lock();
+			ixgbe_down(adapter);
+			rtnl_unlock();
+		}
+		ixgbe_service_event_complete(adapter);
+		return;
+	}
+	if (ixgbe_check_fw_error(adapter)) {
+		if (!test_bit(__IXGBE_DOWN, &adapter->state))
+			unregister_netdev(adapter->netdev);
+		ixgbe_service_event_complete(adapter);
+		return;
+	}
+	ixgbe_reset_subtask(adapter);
+	ixgbe_phy_interrupt_subtask(adapter);
+	ixgbe_sfp_detection_subtask(adapter);
+	ixgbe_sfp_link_config_subtask(adapter);
+	ixgbe_check_overtemp_subtask(adapter);
+	ixgbe_watchdog_subtask(adapter);
+	ixgbe_fdir_reinit_subtask(adapter);
+	ixgbe_check_hang_subtask(adapter);
+
+	if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) {
+		ixgbe_ptp_overflow_check(adapter);
+		if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER)
+			ixgbe_ptp_rx_hang(adapter);
+		ixgbe_ptp_tx_hang(adapter);
+	}
+
+	ixgbe_service_event_complete(adapter);
+}
+
+static int ixgbe_tso(struct ixgbe_ring *tx_ring,
+		     struct ixgbe_tx_buffer *first,
+		     u8 *hdr_len,
+		     struct ixgbe_ipsec_tx_data *itd)
+{
+	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		struct udphdr *udp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	u32 fceof_saidx = 0;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	if (eth_p_mpls(first->protocol))
+		ip.hdr = skb_inner_network_header(skb);
+	else
+		ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+	type_tucmd = (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ?
+		      IXGBE_ADVTXD_TUCMD_L4T_UDP : IXGBE_ADVTXD_TUCMD_L4T_TCP;
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+		int len = csum_start - trans_start;
+
+		/* IP header will have to cancel out any data that
+		 * is not a part of the outer IP header, so set to
+		 * a reverse csum if needed, else init check to 0.
+		 */
+		ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+					   csum_fold(csum_partial(trans_start,
+								  len, 0)) : 0;
+		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
+
+		ip.v4->tot_len = 0;
+		first->tx_flags |= IXGBE_TX_FLAGS_TSO |
+				   IXGBE_TX_FLAGS_CSUM |
+				   IXGBE_TX_FLAGS_IPV4;
+	} else {
+		ip.v6->payload_len = 0;
+		first->tx_flags |= IXGBE_TX_FLAGS_TSO |
+				   IXGBE_TX_FLAGS_CSUM;
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+
+	if (type_tucmd & IXGBE_ADVTXD_TUCMD_L4T_TCP) {
+		/* compute length of segmentation header */
+		*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+		csum_replace_by_diff(&l4.tcp->check,
+				     (__force __wsum)htonl(paylen));
+	} else {
+		/* compute length of segmentation header */
+		*hdr_len = sizeof(*l4.udp) + l4_offset;
+		csum_replace_by_diff(&l4.udp->check,
+				     (__force __wsum)htonl(paylen));
+	}
+
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* mss_l4len_id: use 0 as index for TSO */
+	mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT;
+	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
+
+	fceof_saidx |= itd->sa_idx;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
+	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
+	vlan_macip_lens = l4.hdr - ip.hdr;
+	vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
+
+	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
+			  mss_l4len_idx);
+
+	return 1;
+}
+
+static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
+			  struct ixgbe_tx_buffer *first,
+			  struct ixgbe_ipsec_tx_data *itd)
+{
+	struct sk_buff *skb = first->skb;
+	u32 vlan_macip_lens = 0;
+	u32 fceof_saidx = 0;
+	u32 type_tucmd = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL) {
+csum_failed:
+		if (!(first->tx_flags & (IXGBE_TX_FLAGS_HW_VLAN |
+					 IXGBE_TX_FLAGS_CC)))
+			return;
+		goto no_csum;
+	}
+
+	switch (skb->csum_offset) {
+	case offsetof(struct tcphdr, check):
+		type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP;
+		fallthrough;
+	case offsetof(struct udphdr, check):
+		break;
+	case offsetof(struct sctphdr, checksum):
+		/* validate that this is actually an SCTP request */
+		if (skb_csum_is_sctp(skb)) {
+			type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_SCTP;
+			break;
+		}
+		fallthrough;
+	default:
+		skb_checksum_help(skb);
+		goto csum_failed;
+	}
+
+	/* update TX checksum flag */
+	first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
+	vlan_macip_lens = skb_checksum_start_offset(skb) -
+			  skb_network_offset(skb);
+no_csum:
+	/* vlan_macip_lens: MACLEN, VLAN tag */
+	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
+
+	fceof_saidx |= itd->sa_idx;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
+	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
+}
+
+#define IXGBE_SET_FLAG(_input, _flag, _result) \
+	((_flag <= _result) ? \
+	 ((u32)(_input & _flag) * (_result / _flag)) : \
+	 ((u32)(_input & _flag) / (_flag / _result)))
+
+static u32 ixgbe_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+		       IXGBE_ADVTXD_DCMD_DEXT |
+		       IXGBE_ADVTXD_DCMD_IFCS;
+
+	/* set HW vlan bit if vlan is present */
+	cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_HW_VLAN,
+				   IXGBE_ADVTXD_DCMD_VLE);
+
+	/* set segmentation enable bits for TSO/FSO */
+	cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSO,
+				   IXGBE_ADVTXD_DCMD_TSE);
+
+	/* set timestamp bit if present */
+	cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSTAMP,
+				   IXGBE_ADVTXD_MAC_TSTAMP);
+
+	/* insert frame checksum */
+	cmd_type ^= IXGBE_SET_FLAG(skb->no_fcs, 1, IXGBE_ADVTXD_DCMD_IFCS);
+
+	return cmd_type;
+}
+
+static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
+				   u32 tx_flags, unsigned int paylen)
+{
+	u32 olinfo_status = paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
+
+	/* enable L4 checksum for TSO and TX checksum offload */
+	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
+					IXGBE_TX_FLAGS_CSUM,
+					IXGBE_ADVTXD_POPTS_TXSM);
+
+	/* enable IPv4 checksum for TSO */
+	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
+					IXGBE_TX_FLAGS_IPV4,
+					IXGBE_ADVTXD_POPTS_IXSM);
+
+	/* enable IPsec */
+	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
+					IXGBE_TX_FLAGS_IPSEC,
+					IXGBE_ADVTXD_POPTS_IPSEC);
+
+	/*
+	 * Check Context must be set if Tx switch is enabled, which it
+	 * always is for case where virtual functions are running
+	 */
+	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
+					IXGBE_TX_FLAGS_CC,
+					IXGBE_ADVTXD_CC);
+
+	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+}
+
+static int __ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
+{
+	if (!netif_subqueue_try_stop(tx_ring->netdev, tx_ring->queue_index,
+				     ixgbe_desc_unused(tx_ring), size))
+		return -EBUSY;
+
+	++tx_ring->tx_stats.restart_queue;
+	return 0;
+}
+
+static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size)
+{
+	if (likely(ixgbe_desc_unused(tx_ring) >= size))
+		return 0;
+
+	return __ixgbe_maybe_stop_tx(tx_ring, size);
+}
+
+static int ixgbe_tx_map(struct ixgbe_ring *tx_ring,
+			struct ixgbe_tx_buffer *first,
+			const u8 hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct ixgbe_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+	unsigned int data_len, size;
+	u32 tx_flags = first->tx_flags;
+	u32 cmd_type = ixgbe_tx_cmd_type(skb, tx_flags);
+	u16 i = tx_ring->next_to_use;
+
+	tx_desc = IXGBE_TX_DESC(tx_ring, i);
+
+	ixgbe_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+
+#ifdef IXGBE_FCOE
+	if (tx_flags & IXGBE_TX_FLAGS_FCOE) {
+		if (data_len < sizeof(struct fcoe_crc_eof)) {
+			size -= sizeof(struct fcoe_crc_eof) - data_len;
+			data_len = 0;
+		} else {
+			data_len -= sizeof(struct fcoe_crc_eof);
+		}
+	}
+
+#endif
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cpu_to_le32(cmd_type ^ IXGBE_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += IXGBE_MAX_DATA_PER_TXD;
+			size -= IXGBE_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+#ifdef IXGBE_FCOE
+		size = min_t(unsigned int, data_len, skb_frag_size(frag));
+#else
+		size = skb_frag_size(frag);
+#endif
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= size | IXGBE_TXD_CMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
+
+	/* set the timestamp */
+	first->time_stamp = jiffies;
+
+	skb_tx_timestamp(skb);
+
+	/*
+	 * Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+		writel(i, tx_ring->tail);
+	}
+
+	return 0;
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	for (;;) {
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_page(tx_ring->dev,
+				       dma_unmap_addr(tx_buffer, dma),
+				       dma_unmap_len(tx_buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+		if (tx_buffer == first)
+			break;
+		if (i == 0)
+			i += tx_ring->count;
+		i--;
+	}
+
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+
+	tx_ring->next_to_use = i;
+
+	return -1;
+}
+
+static void ixgbe_atr(struct ixgbe_ring *ring,
+		      struct ixgbe_tx_buffer *first)
+{
+	struct ixgbe_q_vector *q_vector = ring->q_vector;
+	union ixgbe_atr_hash_dword input = { .dword = 0 };
+	union ixgbe_atr_hash_dword common = { .dword = 0 };
+	union {
+		unsigned char *network;
+		struct iphdr *ipv4;
+		struct ipv6hdr *ipv6;
+	} hdr;
+	struct tcphdr *th;
+	unsigned int hlen;
+	struct sk_buff *skb;
+	__be16 vlan_id;
+	int l4_proto;
+
+	/* if ring doesn't have a interrupt vector, cannot perform ATR */
+	if (!q_vector)
+		return;
+
+	/* do nothing if sampling is disabled */
+	if (!ring->atr_sample_rate)
+		return;
+
+	ring->atr_count++;
+
+	/* currently only IPv4/IPv6 with TCP is supported */
+	if ((first->protocol != htons(ETH_P_IP)) &&
+	    (first->protocol != htons(ETH_P_IPV6)))
+		return;
+
+	/* snag network header to get L4 type and address */
+	skb = first->skb;
+	hdr.network = skb_network_header(skb);
+	if (unlikely(hdr.network <= skb->data))
+		return;
+	if (skb->encapsulation &&
+	    first->protocol == htons(ETH_P_IP) &&
+	    hdr.ipv4->protocol == IPPROTO_UDP) {
+		struct ixgbe_adapter *adapter = q_vector->adapter;
+
+		if (unlikely(skb_tail_pointer(skb) < hdr.network +
+			     vxlan_headroom(0)))
+			return;
+
+		/* verify the port is recognized as VXLAN */
+		if (adapter->vxlan_port &&
+		    udp_hdr(skb)->dest == adapter->vxlan_port)
+			hdr.network = skb_inner_network_header(skb);
+
+		if (adapter->geneve_port &&
+		    udp_hdr(skb)->dest == adapter->geneve_port)
+			hdr.network = skb_inner_network_header(skb);
+	}
+
+	/* Make sure we have at least [minimum IPv4 header + TCP]
+	 * or [IPv6 header] bytes
+	 */
+	if (unlikely(skb_tail_pointer(skb) < hdr.network + 40))
+		return;
+
+	/* Currently only IPv4/IPv6 with TCP is supported */
+	switch (hdr.ipv4->version) {
+	case IPVERSION:
+		/* access ihl as u8 to avoid unaligned access on ia64 */
+		hlen = (hdr.network[0] & 0x0F) << 2;
+		l4_proto = hdr.ipv4->protocol;
+		break;
+	case 6:
+		hlen = hdr.network - skb->data;
+		l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
+		hlen -= hdr.network - skb->data;
+		break;
+	default:
+		return;
+	}
+
+	if (l4_proto != IPPROTO_TCP)
+		return;
+
+	if (unlikely(skb_tail_pointer(skb) < hdr.network +
+		     hlen + sizeof(struct tcphdr)))
+		return;
+
+	th = (struct tcphdr *)(hdr.network + hlen);
+
+	/* skip this packet since the socket is closing */
+	if (th->fin)
+		return;
+
+	/* sample on all syn packets or once every atr sample count */
+	if (!th->syn && (ring->atr_count < ring->atr_sample_rate))
+		return;
+
+	/* reset sample count */
+	ring->atr_count = 0;
+
+	vlan_id = htons(first->tx_flags >> IXGBE_TX_FLAGS_VLAN_SHIFT);
+
+	/*
+	 * src and dst are inverted, think how the receiver sees them
+	 *
+	 * The input is broken into two sections, a non-compressed section
+	 * containing vm_pool, vlan_id, and flow_type.  The rest of the data
+	 * is XORed together and stored in the compressed dword.
+	 */
+	input.formatted.vlan_id = vlan_id;
+
+	/*
+	 * since src port and flex bytes occupy the same word XOR them together
+	 * and write the value to source port portion of compressed dword
+	 */
+	if (first->tx_flags & (IXGBE_TX_FLAGS_SW_VLAN | IXGBE_TX_FLAGS_HW_VLAN))
+		common.port.src ^= th->dest ^ htons(ETH_P_8021Q);
+	else
+		common.port.src ^= th->dest ^ first->protocol;
+	common.port.dst ^= th->source;
+
+	switch (hdr.ipv4->version) {
+	case IPVERSION:
+		input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4;
+		common.ip ^= hdr.ipv4->saddr ^ hdr.ipv4->daddr;
+		break;
+	case 6:
+		input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV6;
+		common.ip ^= hdr.ipv6->saddr.s6_addr32[0] ^
+			     hdr.ipv6->saddr.s6_addr32[1] ^
+			     hdr.ipv6->saddr.s6_addr32[2] ^
+			     hdr.ipv6->saddr.s6_addr32[3] ^
+			     hdr.ipv6->daddr.s6_addr32[0] ^
+			     hdr.ipv6->daddr.s6_addr32[1] ^
+			     hdr.ipv6->daddr.s6_addr32[2] ^
+			     hdr.ipv6->daddr.s6_addr32[3];
+		break;
+	default:
+		break;
+	}
+
+	if (hdr.network != skb_network_header(skb))
+		input.formatted.flow_type |= IXGBE_ATR_L4TYPE_TUNNEL_MASK;
+
+	/* This assumes the Rx queue and Tx queue are bound to the same CPU */
+	ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw,
+					      input, common, ring->queue_index);
+}
+
+#ifdef IXGBE_FCOE
+static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
+			      struct net_device *sb_dev)
+{
+	struct ixgbe_adapter *adapter;
+	struct ixgbe_ring_feature *f;
+	int txq;
+
+	if (sb_dev) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		struct net_device *vdev = sb_dev;
+
+		txq = vdev->tc_to_txq[tc].offset;
+		txq += reciprocal_scale(skb_get_hash(skb),
+					vdev->tc_to_txq[tc].count);
+
+		return txq;
+	}
+
+	/*
+	 * only execute the code below if protocol is FCoE
+	 * or FIP and we have FCoE enabled on the adapter
+	 */
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_FCOE):
+	case htons(ETH_P_FIP):
+		adapter = netdev_priv(dev);
+
+		if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
+			break;
+		fallthrough;
+	default:
+		return netdev_pick_tx(dev, skb, sb_dev);
+	}
+
+	f = &adapter->ring_feature[RING_F_FCOE];
+
+	txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
+					   smp_processor_id();
+
+	while (txq >= f->indices)
+		txq -= f->indices;
+
+	return txq + f->offset;
+}
+
+#endif
+int ixgbe_xmit_xdp_ring(struct ixgbe_ring *ring,
+			struct xdp_frame *xdpf)
+{
+	struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
+	u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
+	u16 i = 0, index = ring->next_to_use;
+	struct ixgbe_tx_buffer *tx_head = &ring->tx_buffer_info[index];
+	struct ixgbe_tx_buffer *tx_buff = tx_head;
+	union ixgbe_adv_tx_desc *tx_desc = IXGBE_TX_DESC(ring, index);
+	u32 cmd_type, len = xdpf->len;
+	void *data = xdpf->data;
+
+	if (unlikely(ixgbe_desc_unused(ring) < 1 + nr_frags))
+		return IXGBE_XDP_CONSUMED;
+
+	tx_head->bytecount = xdp_get_frame_len(xdpf);
+	tx_head->gso_segs = 1;
+	tx_head->xdpf = xdpf;
+
+	tx_desc->read.olinfo_status =
+		cpu_to_le32(tx_head->bytecount << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+	for (;;) {
+		dma_addr_t dma;
+
+		dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE);
+		if (dma_mapping_error(ring->dev, dma))
+			goto unmap;
+
+		dma_unmap_len_set(tx_buff, len, len);
+		dma_unmap_addr_set(tx_buff, dma, dma);
+
+		cmd_type = IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT |
+			   IXGBE_ADVTXD_DCMD_IFCS | len;
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		tx_buff->protocol = 0;
+
+		if (++index == ring->count)
+			index = 0;
+
+		if (i == nr_frags)
+			break;
+
+		tx_buff = &ring->tx_buffer_info[index];
+		tx_desc = IXGBE_TX_DESC(ring, index);
+		tx_desc->read.olinfo_status = 0;
+
+		data = skb_frag_address(&sinfo->frags[i]);
+		len = skb_frag_size(&sinfo->frags[i]);
+		i++;
+	}
+	/* put descriptor type bits */
+	tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD);
+
+	/* Avoid any potential race with xdp_xmit and cleanup */
+	smp_wmb();
+
+	tx_head->next_to_watch = tx_desc;
+	ring->next_to_use = index;
+
+	return IXGBE_XDP_TX;
+
+unmap:
+	for (;;) {
+		tx_buff = &ring->tx_buffer_info[index];
+		if (dma_unmap_len(tx_buff, len))
+			dma_unmap_page(ring->dev, dma_unmap_addr(tx_buff, dma),
+				       dma_unmap_len(tx_buff, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buff, len, 0);
+		if (tx_buff == tx_head)
+			break;
+
+		if (!index)
+			index += ring->count;
+		index--;
+	}
+
+	return IXGBE_XDP_CONSUMED;
+}
+
+netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
+			  struct ixgbe_adapter *adapter,
+			  struct ixgbe_ring *tx_ring)
+{
+	struct ixgbe_tx_buffer *first;
+	int tso;
+	u32 tx_flags = 0;
+	unsigned short f;
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	struct ixgbe_ipsec_tx_data ipsec_tx = { 0 };
+	__be16 protocol = skb->protocol;
+	u8 hdr_len = 0;
+
+	/*
+	 * need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
+	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_frag_size(
+						&skb_shinfo(skb)->frags[f]));
+
+	if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	/* if we have a HW VLAN tag being added default to the HW one */
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
+	/* else if it is a SW VLAN check the next protocol and store the tag */
+	} else if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vhdr, _vhdr;
+		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
+		if (!vhdr)
+			goto out_drop;
+
+		tx_flags |= ntohs(vhdr->h_vlan_TCI) <<
+				  IXGBE_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IXGBE_TX_FLAGS_SW_VLAN;
+	}
+	protocol = vlan_get_protocol(skb);
+
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+	    adapter->ptp_clock) {
+		if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
+		    !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS,
+					   &adapter->state)) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+			tx_flags |= IXGBE_TX_FLAGS_TSTAMP;
+
+			/* schedule check for Tx timestamp */
+			adapter->ptp_tx_skb = skb_get(skb);
+			adapter->ptp_tx_start = jiffies;
+			schedule_work(&adapter->ptp_tx_work);
+		} else {
+			adapter->tx_hwtstamp_skipped++;
+		}
+	}
+
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * Use the l2switch_enable flag - would be false if the DMA
+	 * Tx switch had been disabled.
+	 */
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		tx_flags |= IXGBE_TX_FLAGS_CC;
+
+#endif
+	/* DCB maps skb priorities 0-7 onto 3 bit PCP of VLAN tag. */
+	if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+	    ((tx_flags & (IXGBE_TX_FLAGS_HW_VLAN | IXGBE_TX_FLAGS_SW_VLAN)) ||
+	     (skb->priority != TC_PRIO_CONTROL))) {
+		tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
+		tx_flags |= (skb->priority & 0x7) <<
+					IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT;
+		if (tx_flags & IXGBE_TX_FLAGS_SW_VLAN) {
+			struct vlan_ethhdr *vhdr;
+
+			if (skb_cow_head(skb, 0))
+				goto out_drop;
+			vhdr = skb_vlan_eth_hdr(skb);
+			vhdr->h_vlan_TCI = htons(tx_flags >>
+						 IXGBE_TX_FLAGS_VLAN_SHIFT);
+		} else {
+			tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
+		}
+	}
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = protocol;
+
+#ifdef IXGBE_FCOE
+	/* setup tx offload for FCoE */
+	if ((protocol == htons(ETH_P_FCOE)) &&
+	    (tx_ring->netdev->features & (NETIF_F_FSO | NETIF_F_FCOE_CRC))) {
+		tso = ixgbe_fso(tx_ring, first, &hdr_len);
+		if (tso < 0)
+			goto out_drop;
+
+		goto xmit_fcoe;
+	}
+
+#endif /* IXGBE_FCOE */
+
+#ifdef CONFIG_IXGBE_IPSEC
+	if (xfrm_offload(skb) &&
+	    !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
+		goto out_drop;
+#endif
+	tso = ixgbe_tso(tx_ring, first, &hdr_len, &ipsec_tx);
+	if (tso < 0)
+		goto out_drop;
+	else if (!tso)
+		ixgbe_tx_csum(tx_ring, first, &ipsec_tx);
+
+	/* add the ATR filter if ATR is on */
+	if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
+		ixgbe_atr(tx_ring, first);
+
+#ifdef IXGBE_FCOE
+xmit_fcoe:
+#endif /* IXGBE_FCOE */
+	if (ixgbe_tx_map(tx_ring, first, hdr_len))
+		goto cleanup_tx_timestamp;
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+cleanup_tx_timestamp:
+	if (unlikely(tx_flags & IXGBE_TX_FLAGS_TSTAMP)) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+		cancel_work_sync(&adapter->ptp_tx_work);
+		clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
+				      struct net_device *netdev,
+				      struct ixgbe_ring *ring)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_ring *tx_ring;
+
+	/*
+	 * The minimum packet size for olinfo paylen is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb_put_padto(skb, 17))
+		return NETDEV_TX_OK;
+
+	tx_ring = ring ? ring : adapter->tx_ring[skb_get_queue_mapping(skb)];
+	if (unlikely(test_bit(__IXGBE_TX_DISABLED, &tx_ring->state)))
+		return NETDEV_TX_BUSY;
+
+	return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
+}
+
+static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb,
+				    struct net_device *netdev)
+{
+	return __ixgbe_xmit_frame(skb, netdev, NULL);
+}
+
+/**
+ * ixgbe_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ixgbe_set_mac(struct net_device *netdev, void *p)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	eth_hw_addr_set(netdev, addr->sa_data);
+	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
+
+	ixgbe_mac_set_default_filter(adapter);
+
+	return 0;
+}
+
+static int
+ixgbe_mdio_read(struct net_device *netdev, int prtad, int devad, u16 addr)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 value;
+	int rc;
+
+	if (adapter->mii_bus) {
+		int regnum = addr;
+
+		if (devad != MDIO_DEVAD_NONE)
+			return mdiobus_c45_read(adapter->mii_bus, prtad,
+						devad, regnum);
+
+		return mdiobus_read(adapter->mii_bus, prtad, regnum);
+	}
+
+	if (prtad != hw->phy.mdio.prtad)
+		return -EINVAL;
+	rc = hw->phy.ops.read_reg(hw, addr, devad, &value);
+	if (!rc)
+		rc = value;
+	return rc;
+}
+
+static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad,
+			    u16 addr, u16 value)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (adapter->mii_bus) {
+		int regnum = addr;
+
+		if (devad != MDIO_DEVAD_NONE)
+			return mdiobus_c45_write(adapter->mii_bus, prtad, devad,
+						 regnum, value);
+
+		return mdiobus_write(adapter->mii_bus, prtad, regnum, value);
+	}
+
+	if (prtad != hw->phy.mdio.prtad)
+		return -EINVAL;
+	return hw->phy.ops.write_reg(hw, addr, devad, value);
+}
+
+static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return ixgbe_ptp_set_ts_config(adapter, req);
+	case SIOCGHWTSTAMP:
+		return ixgbe_ptp_get_ts_config(adapter, req);
+	case SIOCGMIIPHY:
+		if (!adapter->hw.phy.ops.read_reg)
+			return -EOPNOTSUPP;
+		fallthrough;
+	default:
+		return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd);
+	}
+}
+
+/**
+ * ixgbe_add_sanmac_netdev - Add the SAN MAC address to the corresponding
+ * netdev->dev_addrs
+ * @dev: network interface device structure
+ *
+ * Returns non-zero on failure
+ **/
+static int ixgbe_add_sanmac_netdev(struct net_device *dev)
+{
+	int err = 0;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (is_valid_ether_addr(hw->mac.san_addr)) {
+		rtnl_lock();
+		err = dev_addr_add(dev, hw->mac.san_addr, NETDEV_HW_ADDR_T_SAN);
+		rtnl_unlock();
+
+		/* update SAN MAC vmdq pool selection */
+		hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0));
+	}
+	return err;
+}
+
+/**
+ * ixgbe_del_sanmac_netdev - Removes the SAN MAC address to the corresponding
+ * netdev->dev_addrs
+ * @dev: network interface device structure
+ *
+ * Returns non-zero on failure
+ **/
+static int ixgbe_del_sanmac_netdev(struct net_device *dev)
+{
+	int err = 0;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_mac_info *mac = &adapter->hw.mac;
+
+	if (is_valid_ether_addr(mac->san_addr)) {
+		rtnl_lock();
+		err = dev_addr_del(dev, mac->san_addr, NETDEV_HW_ADDR_T_SAN);
+		rtnl_unlock();
+	}
+	return err;
+}
+
+static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats,
+				   struct ixgbe_ring *ring)
+{
+	u64 bytes, packets;
+	unsigned int start;
+
+	if (ring) {
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			packets = ring->stats.packets;
+			bytes   = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		stats->tx_packets += packets;
+		stats->tx_bytes   += bytes;
+	}
+}
+
+static void ixgbe_get_stats64(struct net_device *netdev,
+			      struct rtnl_link_stats64 *stats)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *ring = READ_ONCE(adapter->rx_ring[i]);
+		u64 bytes, packets;
+		unsigned int start;
+
+		if (ring) {
+			do {
+				start = u64_stats_fetch_begin(&ring->syncp);
+				packets = ring->stats.packets;
+				bytes   = ring->stats.bytes;
+			} while (u64_stats_fetch_retry(&ring->syncp, start));
+			stats->rx_packets += packets;
+			stats->rx_bytes   += bytes;
+		}
+	}
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct ixgbe_ring *ring = READ_ONCE(adapter->tx_ring[i]);
+
+		ixgbe_get_ring_stats64(stats, ring);
+	}
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *ring = READ_ONCE(adapter->xdp_ring[i]);
+
+		ixgbe_get_ring_stats64(stats, ring);
+	}
+	rcu_read_unlock();
+
+	/* following stats updated by ixgbe_watchdog_task() */
+	stats->multicast	= netdev->stats.multicast;
+	stats->rx_errors	= netdev->stats.rx_errors;
+	stats->rx_length_errors	= netdev->stats.rx_length_errors;
+	stats->rx_crc_errors	= netdev->stats.rx_crc_errors;
+	stats->rx_missed_errors	= netdev->stats.rx_missed_errors;
+}
+
+static int ixgbe_ndo_get_vf_stats(struct net_device *netdev, int vf,
+				  struct ifla_vf_stats *vf_stats)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (vf < 0 || vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	vf_stats->rx_packets = adapter->vfinfo[vf].vfstats.gprc;
+	vf_stats->rx_bytes   = adapter->vfinfo[vf].vfstats.gorc;
+	vf_stats->tx_packets = adapter->vfinfo[vf].vfstats.gptc;
+	vf_stats->tx_bytes   = adapter->vfinfo[vf].vfstats.gotc;
+	vf_stats->multicast  = adapter->vfinfo[vf].vfstats.mprc;
+
+	return 0;
+}
+
+#ifdef CONFIG_IXGBE_DCB
+/**
+ * ixgbe_validate_rtr - verify 802.1Qp to Rx packet buffer mapping is valid.
+ * @adapter: pointer to ixgbe_adapter
+ * @tc: number of traffic classes currently enabled
+ *
+ * Configure a valid 802.1Qp to Rx packet buffer mapping ie confirm
+ * 802.1Q priority maps to a packet buffer that exists.
+ */
+static void ixgbe_validate_rtr(struct ixgbe_adapter *adapter, u8 tc)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg, rsave;
+	int i;
+
+	/* 82598 have a static priority to TC mapping that can not
+	 * be changed so no validation is needed.
+	 */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_RTRUP2TC);
+	rsave = reg;
+
+	for (i = 0; i < MAX_TRAFFIC_CLASS; i++) {
+		u8 up2tc = reg >> (i * IXGBE_RTRUP2TC_UP_SHIFT);
+
+		/* If up2tc is out of bounds default to zero */
+		if (up2tc > tc)
+			reg &= ~(0x7 << IXGBE_RTRUP2TC_UP_SHIFT);
+	}
+
+	if (reg != rsave)
+		IXGBE_WRITE_REG(hw, IXGBE_RTRUP2TC, reg);
+
+	return;
+}
+
+/**
+ * ixgbe_set_prio_tc_map - Configure netdev prio tc map
+ * @adapter: Pointer to adapter struct
+ *
+ * Populate the netdev user priority to tc map
+ */
+static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter)
+{
+	struct net_device *dev = adapter->netdev;
+	struct ixgbe_dcb_config *dcb_cfg = &adapter->dcb_cfg;
+	struct ieee_ets *ets = adapter->ixgbe_ieee_ets;
+	u8 prio;
+
+	for (prio = 0; prio < MAX_USER_PRIORITY; prio++) {
+		u8 tc = 0;
+
+		if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE)
+			tc = ixgbe_dcb_get_tc_from_up(dcb_cfg, 0, prio);
+		else if (ets)
+			tc = ets->prio_tc[prio];
+
+		netdev_set_prio_tc_map(dev, prio, tc);
+	}
+}
+
+#endif /* CONFIG_IXGBE_DCB */
+static int ixgbe_reassign_macvlan_pool(struct net_device *vdev,
+				       struct netdev_nested_priv *priv)
+{
+	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data;
+	struct ixgbe_fwd_adapter *accel;
+	int pool;
+
+	/* we only care about macvlans... */
+	if (!netif_is_macvlan(vdev))
+		return 0;
+
+	/* that have hardware offload enabled... */
+	accel = macvlan_accel_priv(vdev);
+	if (!accel)
+		return 0;
+
+	/* If we can relocate to a different bit do so */
+	pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+	if (pool < adapter->num_rx_pools) {
+		set_bit(pool, adapter->fwd_bitmask);
+		accel->pool = pool;
+		return 0;
+	}
+
+	/* if we cannot find a free pool then disable the offload */
+	netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
+	macvlan_release_l2fw_offload(vdev);
+
+	/* unbind the queues and drop the subordinate channel config */
+	netdev_unbind_sb_channel(adapter->netdev, vdev);
+	netdev_set_sb_channel(vdev, 0);
+
+	kfree(accel);
+
+	return 0;
+}
+
+static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct netdev_nested_priv priv = {
+		.data = (void *)adapter,
+	};
+
+	/* flush any stale bits out of the fwd bitmask */
+	bitmap_clear(adapter->fwd_bitmask, 1, 63);
+
+	/* walk through upper devices reassigning pools */
+	netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool,
+				      &priv);
+}
+
+/**
+ * ixgbe_setup_tc - configure net_device for multiple traffic classes
+ *
+ * @dev: net device to configure
+ * @tc: number of traffic classes to enable
+ */
+int ixgbe_setup_tc(struct net_device *dev, u8 tc)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	/* Hardware supports up to 8 traffic classes */
+	if (tc > adapter->dcb_cfg.num_tcs.pg_tcs)
+		return -EINVAL;
+
+	if (hw->mac.type == ixgbe_mac_82598EB && tc && tc < MAX_TRAFFIC_CLASS)
+		return -EINVAL;
+
+	/* Hardware has to reinitialize queues and interrupts to
+	 * match packet buffer alignment. Unfortunately, the
+	 * hardware is not flexible enough to do this dynamically.
+	 */
+	if (netif_running(dev))
+		ixgbe_close(dev);
+	else
+		ixgbe_reset(adapter);
+
+	ixgbe_clear_interrupt_scheme(adapter);
+
+#ifdef CONFIG_IXGBE_DCB
+	if (tc) {
+		if (adapter->xdp_prog) {
+			e_warn(probe, "DCB is not supported with XDP\n");
+
+			ixgbe_init_interrupt_scheme(adapter);
+			if (netif_running(dev))
+				ixgbe_open(dev);
+			return -EINVAL;
+		}
+
+		netdev_set_num_tc(dev, tc);
+		ixgbe_set_prio_tc_map(adapter);
+
+		adapter->hw_tcs = tc;
+		adapter->flags |= IXGBE_FLAG_DCB_ENABLED;
+
+		if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
+			adapter->last_lfc_mode = adapter->hw.fc.requested_mode;
+			adapter->hw.fc.requested_mode = ixgbe_fc_none;
+		}
+	} else {
+		netdev_reset_tc(dev);
+
+		if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+			adapter->hw.fc.requested_mode = adapter->last_lfc_mode;
+
+		adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED;
+		adapter->hw_tcs = tc;
+
+		adapter->temp_dcb_cfg.pfc_mode_enable = false;
+		adapter->dcb_cfg.pfc_mode_enable = false;
+	}
+
+	ixgbe_validate_rtr(adapter, tc);
+
+#endif /* CONFIG_IXGBE_DCB */
+	ixgbe_init_interrupt_scheme(adapter);
+
+	ixgbe_defrag_macvlan_pools(dev);
+
+	if (netif_running(dev))
+		return ixgbe_open(dev);
+
+	return 0;
+}
+
+static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
+			       struct tc_cls_u32_offload *cls)
+{
+	u32 hdl = cls->knode.handle;
+	u32 uhtid = TC_U32_USERHTID(cls->knode.handle);
+	u32 loc = cls->knode.handle & 0xfffff;
+	int err = 0, i, j;
+	struct ixgbe_jump_table *jump = NULL;
+
+	if (loc > IXGBE_MAX_HW_ENTRIES)
+		return -EINVAL;
+
+	if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE))
+		return -EINVAL;
+
+	/* Clear this filter in the link data it is associated with */
+	if (uhtid != 0x800) {
+		jump = adapter->jump_tables[uhtid];
+		if (!jump)
+			return -EINVAL;
+		if (!test_bit(loc - 1, jump->child_loc_map))
+			return -EINVAL;
+		clear_bit(loc - 1, jump->child_loc_map);
+	}
+
+	/* Check if the filter being deleted is a link */
+	for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
+		jump = adapter->jump_tables[i];
+		if (jump && jump->link_hdl == hdl) {
+			/* Delete filters in the hardware in the child hash
+			 * table associated with this link
+			 */
+			for (j = 0; j < IXGBE_MAX_HW_ENTRIES; j++) {
+				if (!test_bit(j, jump->child_loc_map))
+					continue;
+				spin_lock(&adapter->fdir_perfect_lock);
+				err = ixgbe_update_ethtool_fdir_entry(adapter,
+								      NULL,
+								      j + 1);
+				spin_unlock(&adapter->fdir_perfect_lock);
+				clear_bit(j, jump->child_loc_map);
+			}
+			/* Remove resources for this link */
+			kfree(jump->input);
+			kfree(jump->mask);
+			kfree(jump);
+			adapter->jump_tables[i] = NULL;
+			return err;
+		}
+	}
+
+	spin_lock(&adapter->fdir_perfect_lock);
+	err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc);
+	spin_unlock(&adapter->fdir_perfect_lock);
+	return err;
+}
+
+static int ixgbe_configure_clsu32_add_hnode(struct ixgbe_adapter *adapter,
+					    struct tc_cls_u32_offload *cls)
+{
+	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
+
+	if (uhtid >= IXGBE_MAX_LINK_HANDLE)
+		return -EINVAL;
+
+	/* This ixgbe devices do not support hash tables at the moment
+	 * so abort when given hash tables.
+	 */
+	if (cls->hnode.divisor > 0)
+		return -EINVAL;
+
+	set_bit(uhtid - 1, &adapter->tables);
+	return 0;
+}
+
+static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter,
+					    struct tc_cls_u32_offload *cls)
+{
+	u32 uhtid = TC_U32_USERHTID(cls->hnode.handle);
+
+	if (uhtid >= IXGBE_MAX_LINK_HANDLE)
+		return -EINVAL;
+
+	clear_bit(uhtid - 1, &adapter->tables);
+	return 0;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+struct upper_walk_data {
+	struct ixgbe_adapter *adapter;
+	u64 action;
+	int ifindex;
+	u8 queue;
+};
+
+static int get_macvlan_queue(struct net_device *upper,
+			     struct netdev_nested_priv *priv)
+{
+	if (netif_is_macvlan(upper)) {
+		struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
+		struct ixgbe_adapter *adapter;
+		struct upper_walk_data *data;
+		int ifindex;
+
+		data = (struct upper_walk_data *)priv->data;
+		ifindex = data->ifindex;
+		adapter = data->adapter;
+		if (vadapter && upper->ifindex == ifindex) {
+			data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
+			data->action = data->queue;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex,
+				  u8 *queue, u64 *action)
+{
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	unsigned int num_vfs = adapter->num_vfs, vf;
+	struct netdev_nested_priv priv;
+	struct upper_walk_data data;
+	struct net_device *upper;
+
+	/* redirect to a SRIOV VF */
+	for (vf = 0; vf < num_vfs; ++vf) {
+		upper = pci_get_drvdata(adapter->vfinfo[vf].vfdev);
+		if (upper->ifindex == ifindex) {
+			*queue = vf * __ALIGN_MASK(1, ~vmdq->mask);
+			*action = vf + 1;
+			*action <<= ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+			return 0;
+		}
+	}
+
+	/* redirect to a offloaded macvlan netdev */
+	data.adapter = adapter;
+	data.ifindex = ifindex;
+	data.action = 0;
+	data.queue = 0;
+	priv.data = (void *)&data;
+	if (netdev_walk_all_upper_dev_rcu(adapter->netdev,
+					  get_macvlan_queue, &priv)) {
+		*action = data.action;
+		*queue = data.queue;
+
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int parse_tc_actions(struct ixgbe_adapter *adapter,
+			    struct tcf_exts *exts, u64 *action, u8 *queue)
+{
+	const struct tc_action *a;
+	int i;
+
+	if (!tcf_exts_has_actions(exts))
+		return -EINVAL;
+
+	tcf_exts_for_each_action(i, a, exts) {
+		/* Drop action */
+		if (is_tcf_gact_shot(a)) {
+			*action = IXGBE_FDIR_DROP_QUEUE;
+			*queue = IXGBE_FDIR_DROP_QUEUE;
+			return 0;
+		}
+
+		/* Redirect to a VF or a offloaded macvlan */
+		if (is_tcf_mirred_egress_redirect(a)) {
+			struct net_device *dev = tcf_mirred_dev(a);
+
+			if (!dev)
+				return -EINVAL;
+			return handle_redirect_action(adapter, dev->ifindex,
+						      queue, action);
+		}
+
+		return -EINVAL;
+	}
+
+	return -EINVAL;
+}
+#else
+static int parse_tc_actions(struct ixgbe_adapter *adapter,
+			    struct tcf_exts *exts, u64 *action, u8 *queue)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_NET_CLS_ACT */
+
+static int ixgbe_clsu32_build_input(struct ixgbe_fdir_filter *input,
+				    union ixgbe_atr_input *mask,
+				    struct tc_cls_u32_offload *cls,
+				    struct ixgbe_mat_field *field_ptr,
+				    struct ixgbe_nexthdr *nexthdr)
+{
+	int i, j, off;
+	__be32 val, m;
+	bool found_entry = false, found_jump_field = false;
+
+	for (i = 0; i < cls->knode.sel->nkeys; i++) {
+		off = cls->knode.sel->keys[i].off;
+		val = cls->knode.sel->keys[i].val;
+		m = cls->knode.sel->keys[i].mask;
+
+		for (j = 0; field_ptr[j].val; j++) {
+			if (field_ptr[j].off == off) {
+				field_ptr[j].val(input, mask, (__force u32)val,
+						 (__force u32)m);
+				input->filter.formatted.flow_type |=
+					field_ptr[j].type;
+				found_entry = true;
+				break;
+			}
+		}
+		if (nexthdr) {
+			if (nexthdr->off == cls->knode.sel->keys[i].off &&
+			    nexthdr->val ==
+			    (__force u32)cls->knode.sel->keys[i].val &&
+			    nexthdr->mask ==
+			    (__force u32)cls->knode.sel->keys[i].mask)
+				found_jump_field = true;
+			else
+				continue;
+		}
+	}
+
+	if (nexthdr && !found_jump_field)
+		return -EINVAL;
+
+	if (!found_entry)
+		return 0;
+
+	mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
+				    IXGBE_ATR_L4TYPE_MASK;
+
+	if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
+		mask->formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
+
+	return 0;
+}
+
+static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
+				  struct tc_cls_u32_offload *cls)
+{
+	__be16 protocol = cls->common.protocol;
+	u32 loc = cls->knode.handle & 0xfffff;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_mat_field *field_ptr;
+	struct ixgbe_fdir_filter *input = NULL;
+	union ixgbe_atr_input *mask = NULL;
+	struct ixgbe_jump_table *jump = NULL;
+	int i, err = -EINVAL;
+	u8 queue;
+	u32 uhtid, link_uhtid;
+
+	uhtid = TC_U32_USERHTID(cls->knode.handle);
+	link_uhtid = TC_U32_USERHTID(cls->knode.link_handle);
+
+	/* At the moment cls_u32 jumps to network layer and skips past
+	 * L2 headers. The canonical method to match L2 frames is to use
+	 * negative values. However this is error prone at best but really
+	 * just broken because there is no way to "know" what sort of hdr
+	 * is in front of the network layer. Fix cls_u32 to support L2
+	 * headers when needed.
+	 */
+	if (protocol != htons(ETH_P_IP))
+		return err;
+
+	if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) {
+		e_err(drv, "Location out of range\n");
+		return err;
+	}
+
+	/* cls u32 is a graph starting at root node 0x800. The driver tracks
+	 * links and also the fields used to advance the parser across each
+	 * link (e.g. nexthdr/eat parameters from 'tc'). This way we can map
+	 * the u32 graph onto the hardware parse graph denoted in ixgbe_model.h
+	 * To add support for new nodes update ixgbe_model.h parse structures
+	 * this function _should_ be generic try not to hardcode values here.
+	 */
+	if (uhtid == 0x800) {
+		field_ptr = (adapter->jump_tables[0])->mat;
+	} else {
+		if (uhtid >= IXGBE_MAX_LINK_HANDLE)
+			return err;
+		if (!adapter->jump_tables[uhtid])
+			return err;
+		field_ptr = (adapter->jump_tables[uhtid])->mat;
+	}
+
+	if (!field_ptr)
+		return err;
+
+	/* At this point we know the field_ptr is valid and need to either
+	 * build cls_u32 link or attach filter. Because adding a link to
+	 * a handle that does not exist is invalid and the same for adding
+	 * rules to handles that don't exist.
+	 */
+
+	if (link_uhtid) {
+		struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps;
+
+		if (link_uhtid >= IXGBE_MAX_LINK_HANDLE)
+			return err;
+
+		if (!test_bit(link_uhtid - 1, &adapter->tables))
+			return err;
+
+		/* Multiple filters as links to the same hash table are not
+		 * supported. To add a new filter with the same next header
+		 * but different match/jump conditions, create a new hash table
+		 * and link to it.
+		 */
+		if (adapter->jump_tables[link_uhtid] &&
+		    (adapter->jump_tables[link_uhtid])->link_hdl) {
+			e_err(drv, "Link filter exists for link: %x\n",
+			      link_uhtid);
+			return err;
+		}
+
+		for (i = 0; nexthdr[i].jump; i++) {
+			if (nexthdr[i].o != cls->knode.sel->offoff ||
+			    nexthdr[i].s != cls->knode.sel->offshift ||
+			    nexthdr[i].m !=
+			    (__force u32)cls->knode.sel->offmask)
+				return err;
+
+			jump = kzalloc(sizeof(*jump), GFP_KERNEL);
+			if (!jump)
+				return -ENOMEM;
+			input = kzalloc(sizeof(*input), GFP_KERNEL);
+			if (!input) {
+				err = -ENOMEM;
+				goto free_jump;
+			}
+			mask = kzalloc(sizeof(*mask), GFP_KERNEL);
+			if (!mask) {
+				err = -ENOMEM;
+				goto free_input;
+			}
+			jump->input = input;
+			jump->mask = mask;
+			jump->link_hdl = cls->knode.handle;
+
+			err = ixgbe_clsu32_build_input(input, mask, cls,
+						       field_ptr, &nexthdr[i]);
+			if (!err) {
+				jump->mat = nexthdr[i].jump;
+				adapter->jump_tables[link_uhtid] = jump;
+				break;
+			} else {
+				kfree(mask);
+				kfree(input);
+				kfree(jump);
+			}
+		}
+		return 0;
+	}
+
+	input = kzalloc(sizeof(*input), GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+	mask = kzalloc(sizeof(*mask), GFP_KERNEL);
+	if (!mask) {
+		err = -ENOMEM;
+		goto free_input;
+	}
+
+	if ((uhtid != 0x800) && (adapter->jump_tables[uhtid])) {
+		if ((adapter->jump_tables[uhtid])->input)
+			memcpy(input, (adapter->jump_tables[uhtid])->input,
+			       sizeof(*input));
+		if ((adapter->jump_tables[uhtid])->mask)
+			memcpy(mask, (adapter->jump_tables[uhtid])->mask,
+			       sizeof(*mask));
+
+		/* Lookup in all child hash tables if this location is already
+		 * filled with a filter
+		 */
+		for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
+			struct ixgbe_jump_table *link = adapter->jump_tables[i];
+
+			if (link && (test_bit(loc - 1, link->child_loc_map))) {
+				e_err(drv, "Filter exists in location: %x\n",
+				      loc);
+				err = -EINVAL;
+				goto err_out;
+			}
+		}
+	}
+	err = ixgbe_clsu32_build_input(input, mask, cls, field_ptr, NULL);
+	if (err)
+		goto err_out;
+
+	err = parse_tc_actions(adapter, cls->knode.exts, &input->action,
+			       &queue);
+	if (err < 0)
+		goto err_out;
+
+	input->sw_idx = loc;
+
+	spin_lock(&adapter->fdir_perfect_lock);
+
+	if (hlist_empty(&adapter->fdir_filter_list)) {
+		memcpy(&adapter->fdir_mask, mask, sizeof(*mask));
+		err = ixgbe_fdir_set_input_mask_82599(hw, mask);
+		if (err)
+			goto err_out_w_lock;
+	} else if (memcmp(&adapter->fdir_mask, mask, sizeof(*mask))) {
+		err = -EINVAL;
+		goto err_out_w_lock;
+	}
+
+	ixgbe_atr_compute_perfect_hash_82599(&input->filter, mask);
+	err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter,
+						    input->sw_idx, queue);
+	if (err)
+		goto err_out_w_lock;
+
+	ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+	spin_unlock(&adapter->fdir_perfect_lock);
+
+	if ((uhtid != 0x800) && (adapter->jump_tables[uhtid]))
+		set_bit(loc - 1, (adapter->jump_tables[uhtid])->child_loc_map);
+
+	kfree(mask);
+	return err;
+err_out_w_lock:
+	spin_unlock(&adapter->fdir_perfect_lock);
+err_out:
+	kfree(mask);
+free_input:
+	kfree(input);
+free_jump:
+	kfree(jump);
+	return err;
+}
+
+static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
+				  struct tc_cls_u32_offload *cls_u32)
+{
+	switch (cls_u32->command) {
+	case TC_CLSU32_NEW_KNODE:
+	case TC_CLSU32_REPLACE_KNODE:
+		return ixgbe_configure_clsu32(adapter, cls_u32);
+	case TC_CLSU32_DELETE_KNODE:
+		return ixgbe_delete_clsu32(adapter, cls_u32);
+	case TC_CLSU32_NEW_HNODE:
+	case TC_CLSU32_REPLACE_HNODE:
+		return ixgbe_configure_clsu32_add_hnode(adapter, cls_u32);
+	case TC_CLSU32_DELETE_HNODE:
+		return ixgbe_configure_clsu32_del_hnode(adapter, cls_u32);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+				   void *cb_priv)
+{
+	struct ixgbe_adapter *adapter = cb_priv;
+
+	if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSU32:
+		return ixgbe_setup_tc_cls_u32(adapter, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int ixgbe_setup_tc_mqprio(struct net_device *dev,
+				 struct tc_mqprio_qopt *mqprio)
+{
+	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+	return ixgbe_setup_tc(dev, mqprio->num_tc);
+}
+
+static LIST_HEAD(ixgbe_block_cb_list);
+
+static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			    void *type_data)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple(type_data,
+						  &ixgbe_block_cb_list,
+						  ixgbe_setup_tc_block_cb,
+						  adapter, adapter, true);
+	case TC_SETUP_QDISC_MQPRIO:
+		return ixgbe_setup_tc_mqprio(dev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+#ifdef CONFIG_PCI_IOV
+void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	rtnl_lock();
+	ixgbe_setup_tc(netdev, adapter->hw_tcs);
+	rtnl_unlock();
+}
+
+#endif
+void ixgbe_do_reset(struct net_device *netdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_reinit_locked(adapter);
+	else
+		ixgbe_reset(adapter);
+}
+
+static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
+					    netdev_features_t features)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	/* If Rx checksum is disabled, then RSC/LRO should also be disabled */
+	if (!(features & NETIF_F_RXCSUM))
+		features &= ~NETIF_F_LRO;
+
+	/* Turn off LRO if not RSC capable */
+	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE))
+		features &= ~NETIF_F_LRO;
+
+	if (adapter->xdp_prog && (features & NETIF_F_LRO)) {
+		e_dev_err("LRO is not supported with XDP\n");
+		features &= ~NETIF_F_LRO;
+	}
+
+	return features;
+}
+
+static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter)
+{
+	int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+			num_online_cpus());
+
+	/* go back to full RSS if we're not running SR-IOV */
+	if (!adapter->ring_feature[RING_F_VMDQ].offset)
+		adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED |
+				    IXGBE_FLAG_SRIOV_ENABLED);
+
+	adapter->ring_feature[RING_F_RSS].limit = rss;
+	adapter->ring_feature[RING_F_VMDQ].limit = 1;
+
+	ixgbe_setup_tc(adapter->netdev, adapter->hw_tcs);
+}
+
+static int ixgbe_set_features(struct net_device *netdev,
+			      netdev_features_t features)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	netdev_features_t changed = netdev->features ^ features;
+	bool need_reset = false;
+
+	/* Make sure RSC matches LRO, reset if change */
+	if (!(features & NETIF_F_LRO)) {
+		if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
+			need_reset = true;
+		adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
+	} else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
+		   !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) {
+		if (adapter->rx_itr_setting == 1 ||
+		    adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
+			adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
+			need_reset = true;
+		} else if ((changed ^ features) & NETIF_F_LRO) {
+			e_info(probe, "rx-usecs set too low, "
+			       "disabling RSC\n");
+		}
+	}
+
+	/*
+	 * Check if Flow Director n-tuple support or hw_tc support was
+	 * enabled or disabled.  If the state changed, we need to reset.
+	 */
+	if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) {
+		/* turn off ATR, enable perfect filters and reset */
+		if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
+			need_reset = true;
+
+		adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
+		adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+	} else {
+		/* turn off perfect filters, enable ATR and reset */
+		if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
+			need_reset = true;
+
+		adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
+
+		/* We cannot enable ATR if SR-IOV is enabled */
+		if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED ||
+		    /* We cannot enable ATR if we have 2 or more tcs */
+		    (adapter->hw_tcs > 1) ||
+		    /* We cannot enable ATR if RSS is disabled */
+		    (adapter->ring_feature[RING_F_RSS].limit <= 1) ||
+		    /* A sample rate of 0 indicates ATR disabled */
+		    (!adapter->atr_sample_rate))
+			; /* do nothing not supported */
+		else /* otherwise supported and set the flag */
+			adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
+	}
+
+	if (changed & NETIF_F_RXALL)
+		need_reset = true;
+
+	netdev->features = features;
+
+	if ((changed & NETIF_F_HW_L2FW_DOFFLOAD) && adapter->num_rx_pools > 1)
+		ixgbe_reset_l2fw_offload(adapter);
+	else if (need_reset)
+		ixgbe_do_reset(netdev);
+	else if (changed & (NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_FILTER))
+		ixgbe_set_rx_mode(netdev);
+
+	return 1;
+}
+
+static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
+			     struct net_device *dev,
+			     const unsigned char *addr, u16 vid,
+			     u16 flags,
+			     struct netlink_ext_ack *extack)
+{
+	/* guarantee we can provide a unique filter for the unicast address */
+	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
+		struct ixgbe_adapter *adapter = netdev_priv(dev);
+		u16 pool = VMDQ_P(0);
+
+		if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool))
+			return -ENOMEM;
+	}
+
+	return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
+}
+
+/**
+ * ixgbe_configure_bridge_mode - set various bridge modes
+ * @adapter: the private structure
+ * @mode: requested bridge mode
+ *
+ * Configure some settings require for various bridge modes.
+ **/
+static int ixgbe_configure_bridge_mode(struct ixgbe_adapter *adapter,
+				       __u16 mode)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned int p, num_pools;
+	u32 vmdctl;
+
+	switch (mode) {
+	case BRIDGE_MODE_VEPA:
+		/* disable Tx loopback, rely on switch hairpin mode */
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC, 0);
+
+		/* must enable Rx switching replication to allow multicast
+		 * packet reception on all VFs, and to enable source address
+		 * pruning.
+		 */
+		vmdctl = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
+		vmdctl |= IXGBE_VT_CTL_REPLEN;
+		IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
+
+		/* enable Rx source address pruning. Note, this requires
+		 * replication to be enabled or else it does nothing.
+		 */
+		num_pools = adapter->num_vfs + adapter->num_rx_pools;
+		for (p = 0; p < num_pools; p++) {
+			if (hw->mac.ops.set_source_address_pruning)
+				hw->mac.ops.set_source_address_pruning(hw,
+								       true,
+								       p);
+		}
+		break;
+	case BRIDGE_MODE_VEB:
+		/* enable Tx loopback for internal VF/PF communication */
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC,
+				IXGBE_PFDTXGSWC_VT_LBEN);
+
+		/* disable Rx switching replication unless we have SR-IOV
+		 * virtual functions
+		 */
+		vmdctl = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
+		if (!adapter->num_vfs)
+			vmdctl &= ~IXGBE_VT_CTL_REPLEN;
+		IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
+
+		/* disable Rx source address pruning, since we don't expect to
+		 * be receiving external loopback of our transmitted frames.
+		 */
+		num_pools = adapter->num_vfs + adapter->num_rx_pools;
+		for (p = 0; p < num_pools; p++) {
+			if (hw->mac.ops.set_source_address_pruning)
+				hw->mac.ops.set_source_address_pruning(hw,
+								       false,
+								       p);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	adapter->bridge_mode = mode;
+
+	e_info(drv, "enabling bridge mode: %s\n",
+	       mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
+
+	return 0;
+}
+
+static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
+				    struct nlmsghdr *nlh, u16 flags,
+				    struct netlink_ext_ack *extack)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct nlattr *attr, *br_spec;
+	int rem;
+
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return -EOPNOTSUPP;
+
+	br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+	if (!br_spec)
+		return -EINVAL;
+
+	nla_for_each_nested(attr, br_spec, rem) {
+		int status;
+		__u16 mode;
+
+		if (nla_type(attr) != IFLA_BRIDGE_MODE)
+			continue;
+
+		mode = nla_get_u16(attr);
+		status = ixgbe_configure_bridge_mode(adapter, mode);
+		if (status)
+			return status;
+
+		break;
+	}
+
+	return 0;
+}
+
+static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
+				    struct net_device *dev,
+				    u32 filter_mask, int nlflags)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return 0;
+
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
+				       adapter->bridge_mode, 0, 0, nlflags,
+				       filter_mask, NULL);
+}
+
+static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(pdev);
+	struct ixgbe_fwd_adapter *accel;
+	int tcs = adapter->hw_tcs ? : 1;
+	int pool, err;
+
+	if (adapter->xdp_prog) {
+		e_warn(probe, "L2FW offload is not supported with XDP\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The hardware supported by ixgbe only filters on the destination MAC
+	 * address. In order to avoid issues we only support offloading modes
+	 * where the hardware can actually provide the functionality.
+	 */
+	if (!macvlan_supports_dest_filter(vdev))
+		return ERR_PTR(-EMEDIUMTYPE);
+
+	/* We need to lock down the macvlan to be a single queue device so that
+	 * we can reuse the tc_to_txq field in the macvlan netdev to represent
+	 * the queue mapping to our netdev.
+	 */
+	if (netif_is_multiqueue(vdev))
+		return ERR_PTR(-ERANGE);
+
+	pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools);
+	if (pool == adapter->num_rx_pools) {
+		u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
+		u16 reserved_pools;
+
+		if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+		     adapter->num_rx_pools >= (MAX_TX_QUEUES / tcs)) ||
+		    adapter->num_rx_pools > IXGBE_MAX_MACVLANS)
+			return ERR_PTR(-EBUSY);
+
+		/* Hardware has a limited number of available pools. Each VF,
+		 * and the PF require a pool. Check to ensure we don't
+		 * attempt to use more then the available number of pools.
+		 */
+		if (used_pools >= IXGBE_MAX_VF_FUNCTIONS)
+			return ERR_PTR(-EBUSY);
+
+		/* Enable VMDq flag so device will be set in VM mode */
+		adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED |
+				  IXGBE_FLAG_SRIOV_ENABLED;
+
+		/* Try to reserve as many queues per pool as possible,
+		 * we start with the configurations that support 4 queues
+		 * per pools, followed by 2, and then by just 1 per pool.
+		 */
+		if (used_pools < 32 && adapter->num_rx_pools < 16)
+			reserved_pools = min_t(u16,
+					       32 - used_pools,
+					       16 - adapter->num_rx_pools);
+		else if (adapter->num_rx_pools < 32)
+			reserved_pools = min_t(u16,
+					       64 - used_pools,
+					       32 - adapter->num_rx_pools);
+		else
+			reserved_pools = 64 - used_pools;
+
+
+		if (!reserved_pools)
+			return ERR_PTR(-EBUSY);
+
+		adapter->ring_feature[RING_F_VMDQ].limit += reserved_pools;
+
+		/* Force reinit of ring allocation with VMDQ enabled */
+		err = ixgbe_setup_tc(pdev, adapter->hw_tcs);
+		if (err)
+			return ERR_PTR(err);
+
+		if (pool >= adapter->num_rx_pools)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	accel = kzalloc(sizeof(*accel), GFP_KERNEL);
+	if (!accel)
+		return ERR_PTR(-ENOMEM);
+
+	set_bit(pool, adapter->fwd_bitmask);
+	netdev_set_sb_channel(vdev, pool);
+	accel->pool = pool;
+	accel->netdev = vdev;
+
+	if (!netif_running(pdev))
+		return accel;
+
+	err = ixgbe_fwd_ring_up(adapter, accel);
+	if (err)
+		return ERR_PTR(err);
+
+	return accel;
+}
+
+static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
+{
+	struct ixgbe_fwd_adapter *accel = priv;
+	struct ixgbe_adapter *adapter = netdev_priv(pdev);
+	unsigned int rxbase = accel->rx_base_queue;
+	unsigned int i;
+
+	/* delete unicast filter associated with offloaded interface */
+	ixgbe_del_mac_filter(adapter, accel->netdev->dev_addr,
+			     VMDQ_P(accel->pool));
+
+	/* Allow remaining Rx packets to get flushed out of the
+	 * Rx FIFO before we drop the netdev for the ring.
+	 */
+	usleep_range(10000, 20000);
+
+	for (i = 0; i < adapter->num_rx_queues_per_pool; i++) {
+		struct ixgbe_ring *ring = adapter->rx_ring[rxbase + i];
+		struct ixgbe_q_vector *qv = ring->q_vector;
+
+		/* Make sure we aren't processing any packets and clear
+		 * netdev to shut down the ring.
+		 */
+		if (netif_running(adapter->netdev))
+			napi_synchronize(&qv->napi);
+		ring->netdev = NULL;
+	}
+
+	/* unbind the queues and drop the subordinate channel config */
+	netdev_unbind_sb_channel(pdev, accel->netdev);
+	netdev_set_sb_channel(accel->netdev, 0);
+
+	clear_bit(accel->pool, adapter->fwd_bitmask);
+	kfree(accel);
+}
+
+#define IXGBE_MAX_MAC_HDR_LEN		127
+#define IXGBE_MAX_NETWORK_HDR_LEN	511
+
+static netdev_features_t
+ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
+		     netdev_features_t features)
+{
+	unsigned int network_hdr_len, mac_hdr_len;
+
+	/* Make certain the headers can be described by a context descriptor */
+	mac_hdr_len = skb_network_header(skb) - skb->data;
+	if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
+				    NETIF_F_HW_VLAN_CTAG_TX |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
+	if (unlikely(network_hdr_len >  IXGBE_MAX_NETWORK_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_GSO_UDP_L4 |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	/* We can only support IPV4 TSO in tunnels if we can mangle the
+	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 * IPsec offoad sets skb->encapsulation but still can handle
+	 * the TSO, so it's the exception.
+	 */
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
+#ifdef CONFIG_IXGBE_IPSEC
+		if (!secpath_exists(skb))
+#endif
+			features &= ~NETIF_F_TSO;
+	}
+
+	return features;
+}
+
+static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+	bool need_reset;
+	int num_queues;
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		return -EINVAL;
+
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+		return -EINVAL;
+
+	/* verify ixgbe ring attributes are sufficient for XDP */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+		if (ring_is_rsc_enabled(ring))
+			return -EINVAL;
+
+		if (frame_size > ixgbe_rx_bufsz(ring))
+			return -EINVAL;
+	}
+
+	/* if the number of cpus is much larger than the maximum of queues,
+	 * we should stop it and then return with ENOMEM like before.
+	 */
+	if (nr_cpu_ids > IXGBE_MAX_XDP_QS * 2)
+		return -ENOMEM;
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+	need_reset = (!!prog != !!old_prog);
+
+	/* If transitioning XDP modes reconfigure rings */
+	if (need_reset) {
+		int err;
+
+		if (!prog)
+			/* Wait until ndo_xsk_wakeup completes. */
+			synchronize_rcu();
+		err = ixgbe_setup_tc(dev, adapter->hw_tcs);
+
+		if (err)
+			return -EINVAL;
+		if (!prog)
+			xdp_features_clear_redirect_target(dev);
+	} else {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			WRITE_ONCE(adapter->rx_ring[i]->xdp_prog,
+				   adapter->xdp_prog);
+		}
+	}
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	/* Kick start the NAPI context if there is an AF_XDP socket open
+	 * on that queue id. This so that receiving will start.
+	 */
+	if (need_reset && prog) {
+		num_queues = min_t(int, adapter->num_rx_queues,
+				   adapter->num_xdp_queues);
+		for (i = 0; i < num_queues; i++)
+			if (adapter->xdp_ring[i]->xsk_pool)
+				(void)ixgbe_xsk_wakeup(adapter->netdev, i,
+						       XDP_WAKEUP_RX);
+		xdp_features_set_redirect_target(dev, true);
+	}
+
+	return 0;
+}
+
+static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ixgbe_xdp_setup(dev, xdp->prog);
+	case XDP_SETUP_XSK_POOL:
+		return ixgbe_xsk_pool_setup(adapter, xdp->xsk.pool,
+					    xdp->xsk.queue_id);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
+{
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 */
+	wmb();
+	writel(ring->next_to_use, ring->tail);
+}
+
+void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring)
+{
+	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+		spin_lock(&ring->tx_lock);
+	ixgbe_xdp_ring_update_tail(ring);
+	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+		spin_unlock(&ring->tx_lock);
+}
+
+static int ixgbe_xdp_xmit(struct net_device *dev, int n,
+			  struct xdp_frame **frames, u32 flags)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ring *ring;
+	int nxmit = 0;
+	int i;
+
+	if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
+		return -ENETDOWN;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	/* During program transitions its possible adapter->xdp_prog is assigned
+	 * but ring has not been configured yet. In this case simply abort xmit.
+	 */
+	ring = adapter->xdp_prog ? ixgbe_determine_xdp_ring(adapter) : NULL;
+	if (unlikely(!ring))
+		return -ENXIO;
+
+	if (unlikely(test_bit(__IXGBE_TX_DISABLED, &ring->state)))
+		return -ENXIO;
+
+	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+		spin_lock(&ring->tx_lock);
+
+	for (i = 0; i < n; i++) {
+		struct xdp_frame *xdpf = frames[i];
+		int err;
+
+		err = ixgbe_xmit_xdp_ring(ring, xdpf);
+		if (err != IXGBE_XDP_TX)
+			break;
+		nxmit++;
+	}
+
+	if (unlikely(flags & XDP_XMIT_FLUSH))
+		ixgbe_xdp_ring_update_tail(ring);
+
+	if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+		spin_unlock(&ring->tx_lock);
+
+	return nxmit;
+}
+
+static const struct net_device_ops ixgbe_netdev_ops = {
+	.ndo_open		= ixgbe_open,
+	.ndo_stop		= ixgbe_close,
+	.ndo_start_xmit		= ixgbe_xmit_frame,
+	.ndo_set_rx_mode	= ixgbe_set_rx_mode,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= ixgbe_set_mac,
+	.ndo_change_mtu		= ixgbe_change_mtu,
+	.ndo_tx_timeout		= ixgbe_tx_timeout,
+	.ndo_set_tx_maxrate	= ixgbe_tx_maxrate,
+	.ndo_vlan_rx_add_vid	= ixgbe_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= ixgbe_vlan_rx_kill_vid,
+	.ndo_eth_ioctl		= ixgbe_ioctl,
+	.ndo_set_vf_mac		= ixgbe_ndo_set_vf_mac,
+	.ndo_set_vf_vlan	= ixgbe_ndo_set_vf_vlan,
+	.ndo_set_vf_rate	= ixgbe_ndo_set_vf_bw,
+	.ndo_set_vf_spoofchk	= ixgbe_ndo_set_vf_spoofchk,
+	.ndo_set_vf_link_state	= ixgbe_ndo_set_vf_link_state,
+	.ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en,
+	.ndo_set_vf_trust	= ixgbe_ndo_set_vf_trust,
+	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
+	.ndo_get_vf_stats	= ixgbe_ndo_get_vf_stats,
+	.ndo_get_stats64	= ixgbe_get_stats64,
+	.ndo_setup_tc		= __ixgbe_setup_tc,
+#ifdef IXGBE_FCOE
+	.ndo_select_queue	= ixgbe_select_queue,
+	.ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get,
+	.ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target,
+	.ndo_fcoe_ddp_done = ixgbe_fcoe_ddp_put,
+	.ndo_fcoe_enable = ixgbe_fcoe_enable,
+	.ndo_fcoe_disable = ixgbe_fcoe_disable,
+	.ndo_fcoe_get_wwn = ixgbe_fcoe_get_wwn,
+	.ndo_fcoe_get_hbainfo = ixgbe_fcoe_get_hbainfo,
+#endif /* IXGBE_FCOE */
+	.ndo_set_features = ixgbe_set_features,
+	.ndo_fix_features = ixgbe_fix_features,
+	.ndo_fdb_add		= ixgbe_ndo_fdb_add,
+	.ndo_bridge_setlink	= ixgbe_ndo_bridge_setlink,
+	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
+	.ndo_dfwd_add_station	= ixgbe_fwd_add,
+	.ndo_dfwd_del_station	= ixgbe_fwd_del,
+	.ndo_features_check	= ixgbe_features_check,
+	.ndo_bpf		= ixgbe_xdp,
+	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
+	.ndo_xsk_wakeup         = ixgbe_xsk_wakeup,
+};
+
+static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
+				 struct ixgbe_ring *tx_ring)
+{
+	unsigned long wait_delay, delay_interval;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 reg_idx = tx_ring->reg_idx;
+	int wait_loop;
+	u32 txdctl;
+
+	IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+
+	/* delay mechanism from ixgbe_disable_tx */
+	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+	wait_loop = IXGBE_MAX_RX_DESC_POLL;
+	wait_delay = delay_interval;
+
+	while (wait_loop--) {
+		usleep_range(wait_delay, wait_delay + 10);
+		wait_delay += delay_interval * 2;
+		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
+
+		if (!(txdctl & IXGBE_TXDCTL_ENABLE))
+			return;
+	}
+
+	e_err(drv, "TXDCTL.ENABLE not cleared within the polling period\n");
+}
+
+static void ixgbe_disable_txr(struct ixgbe_adapter *adapter,
+			      struct ixgbe_ring *tx_ring)
+{
+	set_bit(__IXGBE_TX_DISABLED, &tx_ring->state);
+	ixgbe_disable_txr_hw(adapter, tx_ring);
+}
+
+static void ixgbe_disable_rxr_hw(struct ixgbe_adapter *adapter,
+				 struct ixgbe_ring *rx_ring)
+{
+	unsigned long wait_delay, delay_interval;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u8 reg_idx = rx_ring->reg_idx;
+	int wait_loop;
+	u32 rxdctl;
+
+	rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+	rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+	rxdctl |= IXGBE_RXDCTL_SWFLSH;
+
+	/* write value back with RXDCTL.ENABLE bit cleared */
+	IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
+
+	/* RXDCTL.EN may not change on 82598 if link is down, so skip it */
+	if (hw->mac.type == ixgbe_mac_82598EB &&
+	    !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP))
+		return;
+
+	/* delay mechanism from ixgbe_disable_rx */
+	delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
+
+	wait_loop = IXGBE_MAX_RX_DESC_POLL;
+	wait_delay = delay_interval;
+
+	while (wait_loop--) {
+		usleep_range(wait_delay, wait_delay + 10);
+		wait_delay += delay_interval * 2;
+		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
+
+		if (!(rxdctl & IXGBE_RXDCTL_ENABLE))
+			return;
+	}
+
+	e_err(drv, "RXDCTL.ENABLE not cleared within the polling period\n");
+}
+
+static void ixgbe_reset_txr_stats(struct ixgbe_ring *tx_ring)
+{
+	memset(&tx_ring->stats, 0, sizeof(tx_ring->stats));
+	memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
+}
+
+static void ixgbe_reset_rxr_stats(struct ixgbe_ring *rx_ring)
+{
+	memset(&rx_ring->stats, 0, sizeof(rx_ring->stats));
+	memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
+}
+
+/**
+ * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings
+ * @adapter: adapter structure
+ * @ring: ring index
+ *
+ * This function disables a certain Rx/Tx/XDP Tx ring. The function
+ * assumes that the netdev is running.
+ **/
+void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring)
+{
+	struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
+
+	rx_ring = adapter->rx_ring[ring];
+	tx_ring = adapter->tx_ring[ring];
+	xdp_ring = adapter->xdp_ring[ring];
+
+	ixgbe_disable_txr(adapter, tx_ring);
+	if (xdp_ring)
+		ixgbe_disable_txr(adapter, xdp_ring);
+	ixgbe_disable_rxr_hw(adapter, rx_ring);
+
+	if (xdp_ring)
+		synchronize_rcu();
+
+	/* Rx/Tx/XDP Tx share the same napi context. */
+	napi_disable(&rx_ring->q_vector->napi);
+
+	ixgbe_clean_tx_ring(tx_ring);
+	if (xdp_ring)
+		ixgbe_clean_tx_ring(xdp_ring);
+	ixgbe_clean_rx_ring(rx_ring);
+
+	ixgbe_reset_txr_stats(tx_ring);
+	if (xdp_ring)
+		ixgbe_reset_txr_stats(xdp_ring);
+	ixgbe_reset_rxr_stats(rx_ring);
+}
+
+/**
+ * ixgbe_txrx_ring_enable - Enable Rx/Tx/XDP Tx rings
+ * @adapter: adapter structure
+ * @ring: ring index
+ *
+ * This function enables a certain Rx/Tx/XDP Tx ring. The function
+ * assumes that the netdev is running.
+ **/
+void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring)
+{
+	struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
+
+	rx_ring = adapter->rx_ring[ring];
+	tx_ring = adapter->tx_ring[ring];
+	xdp_ring = adapter->xdp_ring[ring];
+
+	/* Rx/Tx/XDP Tx share the same napi context. */
+	napi_enable(&rx_ring->q_vector->napi);
+
+	ixgbe_configure_tx_ring(adapter, tx_ring);
+	if (xdp_ring)
+		ixgbe_configure_tx_ring(adapter, xdp_ring);
+	ixgbe_configure_rx_ring(adapter, rx_ring);
+
+	clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state);
+	if (xdp_ring)
+		clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state);
+}
+
+/**
+ * ixgbe_enumerate_functions - Get the number of ports this device has
+ * @adapter: adapter structure
+ *
+ * This function enumerates the phsyical functions co-located on a single slot,
+ * in order to determine how many ports a device has. This is most useful in
+ * determining the required GT/s of PCIe bandwidth necessary for optimal
+ * performance.
+ **/
+static inline int ixgbe_enumerate_functions(struct ixgbe_adapter *adapter)
+{
+	struct pci_dev *entry, *pdev = adapter->pdev;
+	int physfns = 0;
+
+	/* Some cards can not use the generic count PCIe functions method,
+	 * because they are behind a parent switch, so we hardcode these with
+	 * the correct number of functions.
+	 */
+	if (ixgbe_pcie_from_parent(&adapter->hw))
+		physfns = 4;
+
+	list_for_each_entry(entry, &adapter->pdev->bus->devices, bus_list) {
+		/* don't count virtual functions */
+		if (entry->is_virtfn)
+			continue;
+
+		/* When the devices on the bus don't all match our device ID,
+		 * we can't reliably determine the correct number of
+		 * functions. This can occur if a function has been direct
+		 * attached to a virtual machine using VT-d, for example. In
+		 * this case, simply return -1 to indicate this.
+		 */
+		if ((entry->vendor != pdev->vendor) ||
+		    (entry->device != pdev->device))
+			return -1;
+
+		physfns++;
+	}
+
+	return physfns;
+}
+
+/**
+ * ixgbe_wol_supported - Check whether device supports WoL
+ * @adapter: the adapter private structure
+ * @device_id: the device ID
+ * @subdevice_id: the subsystem device ID
+ *
+ * This function is used by probe and ethtool to determine
+ * which devices have WoL support
+ *
+ **/
+bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
+			 u16 subdevice_id)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
+
+	/* WOL not supported on 82598 */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return false;
+
+	/* check eeprom to see if WOL is enabled for X540 and newer */
+	if (hw->mac.type >= ixgbe_mac_X540) {
+		if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
+		    ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
+		     (hw->bus.func == 0)))
+			return true;
+	}
+
+	/* WOL is determined based on device IDs for 82599 MACs */
+	switch (device_id) {
+	case IXGBE_DEV_ID_82599_SFP:
+		/* Only these subdevices could supports WOL */
+		switch (subdevice_id) {
+		case IXGBE_SUBDEV_ID_82599_560FLR:
+		case IXGBE_SUBDEV_ID_82599_LOM_SNAP6:
+		case IXGBE_SUBDEV_ID_82599_SFP_WOL0:
+		case IXGBE_SUBDEV_ID_82599_SFP_2OCP:
+			/* only support first port */
+			if (hw->bus.func != 0)
+				break;
+			fallthrough;
+		case IXGBE_SUBDEV_ID_82599_SP_560FLR:
+		case IXGBE_SUBDEV_ID_82599_SFP:
+		case IXGBE_SUBDEV_ID_82599_RNDC:
+		case IXGBE_SUBDEV_ID_82599_ECNA_DP:
+		case IXGBE_SUBDEV_ID_82599_SFP_1OCP:
+		case IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM1:
+		case IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM2:
+			return true;
+		}
+		break;
+	case IXGBE_DEV_ID_82599EN_SFP:
+		/* Only these subdevices support WOL */
+		switch (subdevice_id) {
+		case IXGBE_SUBDEV_ID_82599EN_SFP_OCP1:
+			return true;
+		}
+		break;
+	case IXGBE_DEV_ID_82599_COMBO_BACKPLANE:
+		/* All except this subdevice support WOL */
+		if (subdevice_id != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ)
+			return true;
+		break;
+	case IXGBE_DEV_ID_82599_KX4:
+		return  true;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+/**
+ * ixgbe_set_fw_version - Set FW version
+ * @adapter: the adapter private structure
+ *
+ * This function is used by probe and ethtool to determine the FW version to
+ * format to display. The FW version is taken from the EEPROM/NVM.
+ */
+static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_nvm_version nvm_ver;
+
+	ixgbe_get_oem_prod_version(hw, &nvm_ver);
+	if (nvm_ver.oem_valid) {
+		snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+			 "%x.%x.%x", nvm_ver.oem_major, nvm_ver.oem_minor,
+			 nvm_ver.oem_release);
+		return;
+	}
+
+	ixgbe_get_etk_id(hw, &nvm_ver);
+	ixgbe_get_orom_version(hw, &nvm_ver);
+
+	if (nvm_ver.or_valid) {
+		snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+			 "0x%08x, %d.%d.%d", nvm_ver.etk_id, nvm_ver.or_major,
+			 nvm_ver.or_build, nvm_ver.or_patch);
+		return;
+	}
+
+	/* Set ETrack ID format */
+	snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id),
+		 "0x%08x", nvm_ver.etk_id);
+}
+
+/**
+ * ixgbe_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ixgbe_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ixgbe_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct ixgbe_adapter *adapter = NULL;
+	struct ixgbe_hw *hw;
+	const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
+	unsigned int indices = MAX_TX_QUEUES;
+	u8 part_str[IXGBE_PBANUM_LENGTH];
+	int i, err, expected_gts;
+	bool disable_dev = false;
+#ifdef IXGBE_FCOE
+	u16 device_caps;
+#endif
+	u32 eec;
+
+	/* Catch broken hardware that put the wrong VF device ID in
+	 * the PCIe SR-IOV capability.
+	 */
+	if (pdev->is_virtfn) {
+		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
+		     pci_name(pdev), pdev->vendor, pdev->device);
+		return -EINVAL;
+	}
+
+	err = pci_enable_device_mem(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev,
+			"No usable DMA configuration, aborting\n");
+		goto err_dma;
+	}
+
+	err = pci_request_mem_regions(pdev, ixgbe_driver_name);
+	if (err) {
+		dev_err(&pdev->dev,
+			"pci_request_selected_regions failed 0x%x\n", err);
+		goto err_pci_reg;
+	}
+
+	pci_set_master(pdev);
+	pci_save_state(pdev);
+
+	if (ii->mac == ixgbe_mac_82598EB) {
+#ifdef CONFIG_IXGBE_DCB
+		/* 8 TC w/ 4 queues per TC */
+		indices = 4 * MAX_TRAFFIC_CLASS;
+#else
+		indices = IXGBE_MAX_RSS_INDICES;
+#endif
+	}
+
+	netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	adapter = netdev_priv(netdev);
+
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+			      pci_resource_len(pdev, 0));
+	adapter->io_addr = hw->hw_addr;
+	if (!hw->hw_addr) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+
+	netdev->netdev_ops = &ixgbe_netdev_ops;
+	ixgbe_set_ethtool_ops(netdev);
+	netdev->watchdog_timeo = 5 * HZ;
+	strscpy(netdev->name, pci_name(pdev), sizeof(netdev->name));
+
+	/* Setup hw api */
+	hw->mac.ops   = *ii->mac_ops;
+	hw->mac.type  = ii->mac;
+	hw->mvals     = ii->mvals;
+	if (ii->link_ops)
+		hw->link.ops  = *ii->link_ops;
+
+	/* EEPROM */
+	hw->eeprom.ops = *ii->eeprom_ops;
+	eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+	if (ixgbe_removed(hw->hw_addr)) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+	/* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */
+	if (!(eec & BIT(8)))
+		hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic;
+
+	/* PHY */
+	hw->phy.ops = *ii->phy_ops;
+	hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+	/* ixgbe_identify_phy_generic will set prtad and mmds properly */
+	hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
+	hw->phy.mdio.mmds = 0;
+	hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
+	hw->phy.mdio.dev = netdev;
+	hw->phy.mdio.mdio_read = ixgbe_mdio_read;
+	hw->phy.mdio.mdio_write = ixgbe_mdio_write;
+
+	/* setup the private structure */
+	err = ixgbe_sw_init(adapter, ii);
+	if (err)
+		goto err_sw_init;
+
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB)
+		adapter->flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+		netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550;
+		break;
+	case ixgbe_mac_x550em_a:
+		netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550em_a;
+		break;
+	default:
+		break;
+	}
+
+	/* Make sure the SWFW semaphore is in a valid state */
+	if (hw->mac.ops.init_swfw_sync)
+		hw->mac.ops.init_swfw_sync(hw);
+
+	/* Make it possible the adapter to be woken up via WOL */
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_82599EB:
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * If there is a fan on this device and it has failed log the
+	 * failure.
+	 */
+	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
+		u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+		if (esdp & IXGBE_ESDP_SDP1)
+			e_crit(probe, "Fan has stopped, replace the adapter\n");
+	}
+
+	if (allow_unsupported_sfp)
+		hw->allow_unsupported_sfp = allow_unsupported_sfp;
+
+	/* reset_hw fills in the perm_addr as well */
+	hw->phy.reset_if_overtemp = true;
+	err = hw->mac.ops.reset_hw(hw);
+	hw->phy.reset_if_overtemp = false;
+	ixgbe_set_eee_capable(adapter);
+	if (err == IXGBE_ERR_SFP_NOT_PRESENT) {
+		err = 0;
+	} else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
+		e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n");
+		e_dev_err("Reload the driver after installing a supported module.\n");
+		goto err_sw_init;
+	} else if (err) {
+		e_dev_err("HW Init failed: %d\n", err);
+		goto err_sw_init;
+	}
+
+#ifdef CONFIG_PCI_IOV
+	/* SR-IOV not supported on the 82598 */
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+		goto skip_sriov;
+	/* Mailbox */
+	ixgbe_init_mbx_params_pf(hw);
+	hw->mbx.ops = ii->mbx_ops;
+	pci_sriov_set_totalvfs(pdev, IXGBE_MAX_VFS_DRV_LIMIT);
+	ixgbe_enable_sriov(adapter, max_vfs);
+skip_sriov:
+
+#endif
+	netdev->features = NETIF_F_SG |
+			   NETIF_F_TSO |
+			   NETIF_F_TSO6 |
+			   NETIF_F_RXHASH |
+			   NETIF_F_RXCSUM |
+			   NETIF_F_HW_CSUM;
+
+#define IXGBE_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+				    NETIF_F_GSO_GRE_CSUM | \
+				    NETIF_F_GSO_IPXIP4 | \
+				    NETIF_F_GSO_IPXIP6 | \
+				    NETIF_F_GSO_UDP_TUNNEL | \
+				    NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = IXGBE_GSO_PARTIAL_FEATURES;
+	netdev->features |= NETIF_F_GSO_PARTIAL |
+			    IXGBE_GSO_PARTIAL_FEATURES;
+
+	if (hw->mac.type >= ixgbe_mac_82599EB)
+		netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_GSO_UDP_L4;
+
+#ifdef CONFIG_IXGBE_IPSEC
+#define IXGBE_ESP_FEATURES	(NETIF_F_HW_ESP | \
+				 NETIF_F_HW_ESP_TX_CSUM | \
+				 NETIF_F_GSO_ESP)
+
+	if (adapter->ipsec)
+		netdev->features |= IXGBE_ESP_FEATURES;
+#endif
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features |
+			       NETIF_F_HW_VLAN_CTAG_FILTER |
+			       NETIF_F_HW_VLAN_CTAG_RX |
+			       NETIF_F_HW_VLAN_CTAG_TX |
+			       NETIF_F_RXALL |
+			       NETIF_F_HW_L2FW_DOFFLOAD;
+
+	if (hw->mac.type >= ixgbe_mac_82599EB)
+		netdev->hw_features |= NETIF_F_NTUPLE |
+				       NETIF_F_HW_TC;
+
+	netdev->features |= NETIF_F_HIGHDMA;
+
+	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
+	netdev->hw_enc_features |= netdev->vlan_features;
+	netdev->mpls_features |= NETIF_F_SG |
+				 NETIF_F_TSO |
+				 NETIF_F_TSO6 |
+				 NETIF_F_HW_CSUM;
+	netdev->mpls_features |= IXGBE_GSO_PARTIAL_FEATURES;
+
+	/* set this bit last since it cannot be part of vlan_features */
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
+			    NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_TX;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+			       NETDEV_XDP_ACT_XSK_ZEROCOPY;
+
+	/* MTU range: 68 - 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+
+#ifdef CONFIG_IXGBE_DCB
+	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
+		netdev->dcbnl_ops = &ixgbe_dcbnl_ops;
+#endif
+
+#ifdef IXGBE_FCOE
+	if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) {
+		unsigned int fcoe_l;
+
+		if (hw->mac.ops.get_device_caps) {
+			hw->mac.ops.get_device_caps(hw, &device_caps);
+			if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS)
+				adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE;
+		}
+
+
+		fcoe_l = min_t(int, IXGBE_FCRETA_SIZE, num_online_cpus());
+		adapter->ring_feature[RING_F_FCOE].limit = fcoe_l;
+
+		netdev->features |= NETIF_F_FSO |
+				    NETIF_F_FCOE_CRC;
+
+		netdev->vlan_features |= NETIF_F_FSO |
+					 NETIF_F_FCOE_CRC |
+					 NETIF_F_FCOE_MTU;
+	}
+#endif /* IXGBE_FCOE */
+	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
+		netdev->hw_features |= NETIF_F_LRO;
+	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
+		netdev->features |= NETIF_F_LRO;
+
+	if (ixgbe_check_fw_error(adapter)) {
+		err = -EIO;
+		goto err_sw_init;
+	}
+
+	/* make sure the EEPROM is good */
+	if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
+		e_dev_err("The EEPROM Checksum Is Not Valid\n");
+		err = -EIO;
+		goto err_sw_init;
+	}
+
+	eth_platform_get_mac_address(&adapter->pdev->dev,
+				     adapter->hw.mac.perm_addr);
+
+	eth_hw_addr_set(netdev, hw->mac.perm_addr);
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		e_dev_err("invalid MAC address\n");
+		err = -EIO;
+		goto err_sw_init;
+	}
+
+	/* Set hw->mac.addr to permanent MAC address */
+	ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
+	ixgbe_mac_set_default_filter(adapter);
+
+	timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
+
+	if (ixgbe_removed(hw->hw_addr)) {
+		err = -EIO;
+		goto err_sw_init;
+	}
+	INIT_WORK(&adapter->service_task, ixgbe_service_task);
+	set_bit(__IXGBE_SERVICE_INITED, &adapter->state);
+	clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
+
+	err = ixgbe_init_interrupt_scheme(adapter);
+	if (err)
+		goto err_sw_init;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		u64_stats_init(&adapter->rx_ring[i]->syncp);
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		u64_stats_init(&adapter->tx_ring[i]->syncp);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		u64_stats_init(&adapter->xdp_ring[i]->syncp);
+
+	/* WOL not supported for all devices */
+	adapter->wol = 0;
+	hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);
+	hw->wol_enabled = ixgbe_wol_supported(adapter, pdev->device,
+						pdev->subsystem_device);
+	if (hw->wol_enabled)
+		adapter->wol = IXGBE_WUFC_MAG;
+
+	device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
+
+	/* save off EEPROM version number */
+	ixgbe_set_fw_version(adapter);
+
+	/* pick up the PCI bus settings for reporting later */
+	if (ixgbe_pcie_from_parent(hw))
+		ixgbe_get_parent_bus_info(adapter);
+	else
+		 hw->mac.ops.get_bus_info(hw);
+
+	/* calculate the expected PCIe bandwidth required for optimal
+	 * performance. Note that some older parts will never have enough
+	 * bandwidth due to being older generation PCIe parts. We clamp these
+	 * parts to ensure no warning is displayed if it can't be fixed.
+	 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82598EB:
+		expected_gts = min(ixgbe_enumerate_functions(adapter) * 10, 16);
+		break;
+	default:
+		expected_gts = ixgbe_enumerate_functions(adapter) * 10;
+		break;
+	}
+
+	/* don't check link if we failed to enumerate functions */
+	if (expected_gts > 0)
+		ixgbe_check_minimum_link(adapter, expected_gts);
+
+	err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str));
+	if (err)
+		strscpy(part_str, "Unknown", sizeof(part_str));
+	if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
+		e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n",
+			   hw->mac.type, hw->phy.type, hw->phy.sfp_type,
+			   part_str);
+	else
+		e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n",
+			   hw->mac.type, hw->phy.type, part_str);
+
+	e_dev_info("%pM\n", netdev->dev_addr);
+
+	/* reset the hardware with the new settings */
+	err = hw->mac.ops.start_hw(hw);
+	if (err == IXGBE_ERR_EEPROM_VERSION) {
+		/* We are running on a pre-production device, log a warning */
+		e_dev_warn("This device is a pre-production adapter/LOM. "
+			   "Please be aware there may be issues associated "
+			   "with your hardware.  If you are experiencing "
+			   "problems please contact your Intel or hardware "
+			   "representative who provided you with this "
+			   "hardware.\n");
+	}
+	strcpy(netdev->name, "eth%d");
+	pci_set_drvdata(pdev, adapter);
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+
+	/* power down the optics for 82599 SFP+ fiber */
+	if (hw->mac.ops.disable_tx_laser)
+		hw->mac.ops.disable_tx_laser(hw);
+
+	/* carrier off reporting is important to ethtool even BEFORE open */
+	netif_carrier_off(netdev);
+
+#ifdef CONFIG_IXGBE_DCA
+	if (dca_add_requester(&pdev->dev) == 0) {
+		adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
+		ixgbe_setup_dca(adapter);
+	}
+#endif
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs);
+		for (i = 0; i < adapter->num_vfs; i++)
+			ixgbe_vf_configuration(pdev, (i | 0x10000000));
+	}
+
+	/* firmware requires driver version to be 0xFFFFFFFF
+	 * since os does not support feature
+	 */
+	if (hw->mac.ops.set_fw_drv_ver)
+		hw->mac.ops.set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF,
+					   sizeof(UTS_RELEASE) - 1,
+					   UTS_RELEASE);
+
+	/* add san mac addr to netdev */
+	ixgbe_add_sanmac_netdev(netdev);
+
+	e_dev_info("%s\n", ixgbe_default_device_descr);
+
+#ifdef CONFIG_IXGBE_HWMON
+	if (ixgbe_sysfs_init(adapter))
+		e_err(probe, "failed to allocate sysfs resources\n");
+#endif /* CONFIG_IXGBE_HWMON */
+
+	ixgbe_dbg_adapter_init(adapter);
+
+	/* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */
+	if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link)
+		hw->mac.ops.setup_link(hw,
+			IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL,
+			true);
+
+	err = ixgbe_mii_bus_init(hw);
+	if (err)
+		goto err_netdev;
+
+	return 0;
+
+err_netdev:
+	unregister_netdev(netdev);
+err_register:
+	ixgbe_release_hw_control(adapter);
+	ixgbe_clear_interrupt_scheme(adapter);
+err_sw_init:
+	ixgbe_disable_sriov(adapter);
+	adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP;
+	iounmap(adapter->io_addr);
+	kfree(adapter->jump_tables[0]);
+	kfree(adapter->mac_table);
+	kfree(adapter->rss_key);
+	bitmap_free(adapter->af_xdp_zc_qps);
+err_ioremap:
+	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_mem_regions(pdev);
+err_pci_reg:
+err_dma:
+	if (!adapter || disable_dev)
+		pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * ixgbe_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ixgbe_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void ixgbe_remove(struct pci_dev *pdev)
+{
+	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev;
+	bool disable_dev;
+	int i;
+
+	/* if !adapter then we already cleaned up in probe */
+	if (!adapter)
+		return;
+
+	netdev  = adapter->netdev;
+	ixgbe_dbg_adapter_exit(adapter);
+
+	set_bit(__IXGBE_REMOVING, &adapter->state);
+	cancel_work_sync(&adapter->service_task);
+
+	if (adapter->mii_bus)
+		mdiobus_unregister(adapter->mii_bus);
+
+#ifdef CONFIG_IXGBE_DCA
+	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
+		adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED;
+		dca_remove_requester(&pdev->dev);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
+				IXGBE_DCA_CTRL_DCA_DISABLE);
+	}
+
+#endif
+#ifdef CONFIG_IXGBE_HWMON
+	ixgbe_sysfs_exit(adapter);
+#endif /* CONFIG_IXGBE_HWMON */
+
+	/* remove the added san mac */
+	ixgbe_del_sanmac_netdev(netdev);
+
+#ifdef CONFIG_PCI_IOV
+	ixgbe_disable_sriov(adapter);
+#endif
+	if (netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(netdev);
+
+	ixgbe_stop_ipsec_offload(adapter);
+	ixgbe_clear_interrupt_scheme(adapter);
+
+	ixgbe_release_hw_control(adapter);
+
+#ifdef CONFIG_DCB
+	kfree(adapter->ixgbe_ieee_pfc);
+	kfree(adapter->ixgbe_ieee_ets);
+
+#endif
+	iounmap(adapter->io_addr);
+	pci_release_mem_regions(pdev);
+
+	e_dev_info("complete\n");
+
+	for (i = 0; i < IXGBE_MAX_LINK_HANDLE; i++) {
+		if (adapter->jump_tables[i]) {
+			kfree(adapter->jump_tables[i]->input);
+			kfree(adapter->jump_tables[i]->mask);
+		}
+		kfree(adapter->jump_tables[i]);
+	}
+
+	kfree(adapter->mac_table);
+	kfree(adapter->rss_key);
+	bitmap_free(adapter->af_xdp_zc_qps);
+	disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
+	free_netdev(netdev);
+
+	if (disable_dev)
+		pci_disable_device(pdev);
+}
+
+/**
+ * ixgbe_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t state)
+{
+	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev = adapter->netdev;
+
+#ifdef CONFIG_PCI_IOV
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct pci_dev *bdev, *vfdev;
+	u32 dw0, dw1, dw2, dw3;
+	int vf, pos;
+	u16 req_id, pf_func;
+
+	if (adapter->hw.mac.type == ixgbe_mac_82598EB ||
+	    adapter->num_vfs == 0)
+		goto skip_bad_vf_detection;
+
+	bdev = pdev->bus->self;
+	while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT))
+		bdev = bdev->bus->self;
+
+	if (!bdev)
+		goto skip_bad_vf_detection;
+
+	pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		goto skip_bad_vf_detection;
+
+	dw0 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG);
+	dw1 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 4);
+	dw2 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 8);
+	dw3 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 12);
+	if (ixgbe_removed(hw->hw_addr))
+		goto skip_bad_vf_detection;
+
+	req_id = dw1 >> 16;
+	/* On the 82599 if bit 7 of the requestor ID is set then it's a VF */
+	if (!(req_id & 0x0080))
+		goto skip_bad_vf_detection;
+
+	pf_func = req_id & 0x01;
+	if ((pf_func & 1) == (pdev->devfn & 1)) {
+		unsigned int device_id;
+
+		vf = (req_id & 0x7F) >> 1;
+		e_dev_err("VF %d has caused a PCIe error\n", vf);
+		e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: "
+				"%8.8x\tdw3: %8.8x\n",
+		dw0, dw1, dw2, dw3);
+		switch (adapter->hw.mac.type) {
+		case ixgbe_mac_82599EB:
+			device_id = IXGBE_82599_VF_DEVICE_ID;
+			break;
+		case ixgbe_mac_X540:
+			device_id = IXGBE_X540_VF_DEVICE_ID;
+			break;
+		case ixgbe_mac_X550:
+			device_id = IXGBE_DEV_ID_X550_VF;
+			break;
+		case ixgbe_mac_X550EM_x:
+			device_id = IXGBE_DEV_ID_X550EM_X_VF;
+			break;
+		case ixgbe_mac_x550em_a:
+			device_id = IXGBE_DEV_ID_X550EM_A_VF;
+			break;
+		default:
+			device_id = 0;
+			break;
+		}
+
+		/* Find the pci device of the offending VF */
+		vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, device_id, NULL);
+		while (vfdev) {
+			if (vfdev->devfn == (req_id & 0xFF))
+				break;
+			vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+					       device_id, vfdev);
+		}
+		/*
+		 * There's a slim chance the VF could have been hot plugged,
+		 * so if it is no longer present we don't need to issue the
+		 * VFLR.  Just clean up the AER in that case.
+		 */
+		if (vfdev) {
+			pcie_flr(vfdev);
+			/* Free device reference count */
+			pci_dev_put(vfdev);
+		}
+	}
+
+	/*
+	 * Even though the error may have occurred on the other port
+	 * we still need to increment the vf error reference count for
+	 * both ports because the I/O resume function will be called
+	 * for both of them.
+	 */
+	adapter->vferr_refcount++;
+
+	return PCI_ERS_RESULT_RECOVERED;
+
+skip_bad_vf_detection:
+#endif /* CONFIG_PCI_IOV */
+	if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	if (!netif_device_present(netdev))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		ixgbe_close_suspend(adapter);
+
+	if (state == pci_channel_io_perm_failure) {
+		rtnl_unlock();
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state))
+		pci_disable_device(pdev);
+	rtnl_unlock();
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ixgbe_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ */
+static pci_ers_result_t ixgbe_io_slot_reset(struct pci_dev *pdev)
+{
+	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
+	pci_ers_result_t result;
+
+	if (pci_enable_device_mem(pdev)) {
+		e_err(probe, "Cannot re-enable PCI device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		smp_mb__before_atomic();
+		clear_bit(__IXGBE_DISABLED, &adapter->state);
+		adapter->hw.hw_addr = adapter->io_addr;
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+
+		pci_wake_from_d3(pdev, false);
+
+		ixgbe_reset(adapter);
+		IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
+		result = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	return result;
+}
+
+/**
+ * ixgbe_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation.
+ */
+static void ixgbe_io_resume(struct pci_dev *pdev)
+{
+	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
+	struct net_device *netdev = adapter->netdev;
+
+#ifdef CONFIG_PCI_IOV
+	if (adapter->vferr_refcount) {
+		e_info(drv, "Resuming after VF err\n");
+		adapter->vferr_refcount--;
+		return;
+	}
+
+#endif
+	rtnl_lock();
+	if (netif_running(netdev))
+		ixgbe_open(netdev);
+
+	netif_device_attach(netdev);
+	rtnl_unlock();
+}
+
+static const struct pci_error_handlers ixgbe_err_handler = {
+	.error_detected = ixgbe_io_error_detected,
+	.slot_reset = ixgbe_io_slot_reset,
+	.resume = ixgbe_io_resume,
+};
+
+static SIMPLE_DEV_PM_OPS(ixgbe_pm_ops, ixgbe_suspend, ixgbe_resume);
+
+static struct pci_driver ixgbe_driver = {
+	.name      = ixgbe_driver_name,
+	.id_table  = ixgbe_pci_tbl,
+	.probe     = ixgbe_probe,
+	.remove    = ixgbe_remove,
+	.driver.pm = &ixgbe_pm_ops,
+	.shutdown  = ixgbe_shutdown,
+	.sriov_configure = ixgbe_pci_sriov_configure,
+	.err_handler = &ixgbe_err_handler
+};
+
+/**
+ * ixgbe_init_module - Driver Registration Routine
+ *
+ * ixgbe_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init ixgbe_init_module(void)
+{
+	int ret;
+	pr_info("%s\n", ixgbe_driver_string);
+	pr_info("%s\n", ixgbe_copyright);
+
+	ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name);
+	if (!ixgbe_wq) {
+		pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name);
+		return -ENOMEM;
+	}
+
+	ixgbe_dbg_init();
+
+	ret = pci_register_driver(&ixgbe_driver);
+	if (ret) {
+		destroy_workqueue(ixgbe_wq);
+		ixgbe_dbg_exit();
+		return ret;
+	}
+
+#ifdef CONFIG_IXGBE_DCA
+	dca_register_notify(&dca_notifier);
+#endif
+
+	return 0;
+}
+
+module_init(ixgbe_init_module);
+
+/**
+ * ixgbe_exit_module - Driver Exit Cleanup Routine
+ *
+ * ixgbe_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit ixgbe_exit_module(void)
+{
+#ifdef CONFIG_IXGBE_DCA
+	dca_unregister_notify(&dca_notifier);
+#endif
+	pci_unregister_driver(&ixgbe_driver);
+
+	ixgbe_dbg_exit();
+	if (ixgbe_wq) {
+		destroy_workqueue(ixgbe_wq);
+		ixgbe_wq = NULL;
+	}
+}
+
+#ifdef CONFIG_IXGBE_DCA
+static int ixgbe_notify_dca(struct notifier_block *nb, unsigned long event,
+			    void *p)
+{
+	int ret_val;
+
+	ret_val = driver_for_each_device(&ixgbe_driver.driver, NULL, &event,
+					 __ixgbe_notify_dca);
+
+	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+#endif /* CONFIG_IXGBE_DCA */
+
+module_exit(ixgbe_exit_module);
+
+/* ixgbe_main.c */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
new file mode 100644
index 0000000000..5679293e53
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include "ixgbe.h"
+#include "ixgbe_mbx.h"
+
+/**
+ *  ixgbe_read_mbx - Reads a message from the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to read
+ *
+ *  returns SUCCESS if it successfully read message from buffer
+ **/
+s32 ixgbe_read_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+
+	/* limit read to size of mailbox */
+	if (size > mbx->size)
+		size = mbx->size;
+
+	if (!mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	return mbx->ops->read(hw, msg, size, mbx_id);
+}
+
+/**
+ *  ixgbe_write_mbx - Write a message to the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer
+ **/
+s32 ixgbe_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size, u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+
+	if (size > mbx->size)
+		return IXGBE_ERR_MBX;
+
+	if (!mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	return mbx->ops->write(hw, msg, size, mbx_id);
+}
+
+/**
+ *  ixgbe_check_for_msg - checks to see if someone sent us mail
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 ixgbe_check_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+
+	if (!mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	return mbx->ops->check_for_msg(hw, mbx_id);
+}
+
+/**
+ *  ixgbe_check_for_ack - checks to see if someone sent us ACK
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 ixgbe_check_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+
+	if (!mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	return mbx->ops->check_for_ack(hw, mbx_id);
+}
+
+/**
+ *  ixgbe_check_for_rst - checks to see if other side has reset
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to check
+ *
+ *  returns SUCCESS if the Status bit was found or else ERR_MBX
+ **/
+s32 ixgbe_check_for_rst(struct ixgbe_hw *hw, u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+
+	if (!mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	return mbx->ops->check_for_rst(hw, mbx_id);
+}
+
+/**
+ *  ixgbe_poll_for_msg - Wait for message notification
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message notification
+ **/
+static s32 ixgbe_poll_for_msg(struct ixgbe_hw *hw, u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!countdown || !mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	while (mbx->ops->check_for_msg(hw, mbx_id)) {
+		countdown--;
+		if (!countdown)
+			return IXGBE_ERR_MBX;
+		udelay(mbx->usec_delay);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_poll_for_ack - Wait for message acknowledgement
+ *  @hw: pointer to the HW structure
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message acknowledgement
+ **/
+static s32 ixgbe_poll_for_ack(struct ixgbe_hw *hw, u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!countdown || !mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	while (mbx->ops->check_for_ack(hw, mbx_id)) {
+		countdown--;
+		if (!countdown)
+			return IXGBE_ERR_MBX;
+		udelay(mbx->usec_delay);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_posted_mbx - Wait for message notification and receive message
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully received a message notification and
+ *  copied it into the receive buffer.
+ **/
+static s32 ixgbe_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
+				 u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	s32 ret_val;
+
+	if (!mbx->ops)
+		return IXGBE_ERR_MBX;
+
+	ret_val = ixgbe_poll_for_msg(hw, mbx_id);
+	if (ret_val)
+		return ret_val;
+
+	/* if ack received read message */
+	return mbx->ops->read(hw, msg, size, mbx_id);
+}
+
+/**
+ *  ixgbe_write_posted_mbx - Write a message to the mailbox, wait for ack
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @mbx_id: id of mailbox to write
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer and
+ *  received an ack to that message within delay * timeout period
+ **/
+static s32 ixgbe_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size,
+			   u16 mbx_id)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	s32 ret_val;
+
+	/* exit if either we can't write or there isn't a defined timeout */
+	if (!mbx->ops || !mbx->timeout)
+		return IXGBE_ERR_MBX;
+
+	/* send msg */
+	ret_val = mbx->ops->write(hw, msg, size, mbx_id);
+	if (ret_val)
+		return ret_val;
+
+	/* if msg sent wait until we receive an ack */
+	return ixgbe_poll_for_ack(hw, mbx_id);
+}
+
+static s32 ixgbe_check_for_bit_pf(struct ixgbe_hw *hw, u32 mask, s32 index)
+{
+	u32 mbvficr = IXGBE_READ_REG(hw, IXGBE_MBVFICR(index));
+
+	if (mbvficr & mask) {
+		IXGBE_WRITE_REG(hw, IXGBE_MBVFICR(index), mask);
+		return 0;
+	}
+
+	return IXGBE_ERR_MBX;
+}
+
+/**
+ *  ixgbe_check_for_msg_pf - checks to see if the VF has sent mail
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 ixgbe_check_for_msg_pf(struct ixgbe_hw *hw, u16 vf_number)
+{
+	s32 index = IXGBE_MBVFICR_INDEX(vf_number);
+	u32 vf_bit = vf_number % 16;
+
+	if (!ixgbe_check_for_bit_pf(hw, IXGBE_MBVFICR_VFREQ_VF1 << vf_bit,
+				    index)) {
+		hw->mbx.stats.reqs++;
+		return 0;
+	}
+
+	return IXGBE_ERR_MBX;
+}
+
+/**
+ *  ixgbe_check_for_ack_pf - checks to see if the VF has ACKed
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 ixgbe_check_for_ack_pf(struct ixgbe_hw *hw, u16 vf_number)
+{
+	s32 index = IXGBE_MBVFICR_INDEX(vf_number);
+	u32 vf_bit = vf_number % 16;
+
+	if (!ixgbe_check_for_bit_pf(hw, IXGBE_MBVFICR_VFACK_VF1 << vf_bit,
+				    index)) {
+		hw->mbx.stats.acks++;
+		return 0;
+	}
+
+	return IXGBE_ERR_MBX;
+}
+
+/**
+ *  ixgbe_check_for_rst_pf - checks to see if the VF has reset
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if the VF has set the Status bit or else ERR_MBX
+ **/
+static s32 ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number)
+{
+	u32 reg_offset = (vf_number < 32) ? 0 : 1;
+	u32 vf_shift = vf_number % 32;
+	u32 vflre = 0;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+		vflre = IXGBE_READ_REG(hw, IXGBE_VFLRE(reg_offset));
+		break;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		vflre = IXGBE_READ_REG(hw, IXGBE_VFLREC(reg_offset));
+		break;
+	default:
+		break;
+	}
+
+	if (vflre & BIT(vf_shift)) {
+		IXGBE_WRITE_REG(hw, IXGBE_VFLREC(reg_offset), BIT(vf_shift));
+		hw->mbx.stats.rsts++;
+		return 0;
+	}
+
+	return IXGBE_ERR_MBX;
+}
+
+/**
+ *  ixgbe_obtain_mbx_lock_pf - obtain mailbox lock
+ *  @hw: pointer to the HW structure
+ *  @vf_number: the VF index
+ *
+ *  return SUCCESS if we obtained the mailbox lock
+ **/
+static s32 ixgbe_obtain_mbx_lock_pf(struct ixgbe_hw *hw, u16 vf_number)
+{
+	u32 p2v_mailbox;
+
+	/* Take ownership of the buffer */
+	IXGBE_WRITE_REG(hw, IXGBE_PFMAILBOX(vf_number), IXGBE_PFMAILBOX_PFU);
+
+	/* reserve mailbox for vf use */
+	p2v_mailbox = IXGBE_READ_REG(hw, IXGBE_PFMAILBOX(vf_number));
+	if (p2v_mailbox & IXGBE_PFMAILBOX_PFU)
+		return 0;
+
+	return IXGBE_ERR_MBX;
+}
+
+/**
+ *  ixgbe_write_mbx_pf - Places a message in the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @vf_number: the VF index
+ *
+ *  returns SUCCESS if it successfully copied message into the buffer
+ **/
+static s32 ixgbe_write_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
+			      u16 vf_number)
+{
+	s32 ret_val;
+	u16 i;
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	ret_val = ixgbe_obtain_mbx_lock_pf(hw, vf_number);
+	if (ret_val)
+		return ret_val;
+
+	/* flush msg and acks as we are overwriting the message buffer */
+	ixgbe_check_for_msg_pf(hw, vf_number);
+	ixgbe_check_for_ack_pf(hw, vf_number);
+
+	/* copy the caller specified message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf_number), i, msg[i]);
+
+	/* Interrupt VF to tell it a message has been sent and release buffer*/
+	IXGBE_WRITE_REG(hw, IXGBE_PFMAILBOX(vf_number), IXGBE_PFMAILBOX_STS);
+
+	/* update stats */
+	hw->mbx.stats.msgs_tx++;
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_mbx_pf - Read a message from the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *  @vf_number: the VF index
+ *
+ *  This function copies a message from the mailbox buffer to the caller's
+ *  memory buffer.  The presumption is that the caller knows that there was
+ *  a message due to a VF request so no polling for message is needed.
+ **/
+static s32 ixgbe_read_mbx_pf(struct ixgbe_hw *hw, u32 *msg, u16 size,
+			     u16 vf_number)
+{
+	s32 ret_val;
+	u16 i;
+
+	/* lock the mailbox to prevent pf/vf race condition */
+	ret_val = ixgbe_obtain_mbx_lock_pf(hw, vf_number);
+	if (ret_val)
+		return ret_val;
+
+	/* copy the message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		msg[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_PFMBMEM(vf_number), i);
+
+	/* Acknowledge the message and release buffer */
+	IXGBE_WRITE_REG(hw, IXGBE_PFMAILBOX(vf_number), IXGBE_PFMAILBOX_ACK);
+
+	/* update stats */
+	hw->mbx.stats.msgs_rx++;
+
+	return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ *  ixgbe_init_mbx_params_pf - set initial values for pf mailbox
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes the hw->mbx struct to correct values for pf mailbox
+ */
+void ixgbe_init_mbx_params_pf(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+
+	if (hw->mac.type != ixgbe_mac_82599EB &&
+	    hw->mac.type != ixgbe_mac_X550 &&
+	    hw->mac.type != ixgbe_mac_X550EM_x &&
+	    hw->mac.type != ixgbe_mac_x550em_a &&
+	    hw->mac.type != ixgbe_mac_X540)
+		return;
+
+	mbx->timeout = 0;
+	mbx->usec_delay = 0;
+
+	mbx->stats.msgs_tx = 0;
+	mbx->stats.msgs_rx = 0;
+	mbx->stats.reqs = 0;
+	mbx->stats.acks = 0;
+	mbx->stats.rsts = 0;
+
+	mbx->size = IXGBE_VFMAILBOX_SIZE;
+}
+#endif /* CONFIG_PCI_IOV */
+
+const struct ixgbe_mbx_operations mbx_ops_generic = {
+	.read                   = ixgbe_read_mbx_pf,
+	.write                  = ixgbe_write_mbx_pf,
+	.read_posted            = ixgbe_read_posted_mbx,
+	.write_posted           = ixgbe_write_posted_mbx,
+	.check_for_msg          = ixgbe_check_for_msg_pf,
+	.check_for_ack          = ixgbe_check_for_ack_pf,
+	.check_for_rst          = ixgbe_check_for_rst_pf,
+};
+
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
new file mode 100644
index 0000000000..8f4316b192
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_MBX_H_
+#define _IXGBE_MBX_H_
+
+#include "ixgbe_type.h"
+
+#define IXGBE_VFMAILBOX_SIZE        16 /* 16 32 bit words - 64 bytes */
+#define IXGBE_ERR_MBX               -100
+
+#define IXGBE_VFMAILBOX             0x002FC
+#define IXGBE_VFMBMEM               0x00200
+
+#define IXGBE_PFMAILBOX_STS   0x00000001 /* Initiate message send to VF */
+#define IXGBE_PFMAILBOX_ACK   0x00000002 /* Ack message recv'd from VF */
+#define IXGBE_PFMAILBOX_VFU   0x00000004 /* VF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_PFU   0x00000008 /* PF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_RVFU  0x00000010 /* Reset VFU - used when VF stuck */
+
+#define IXGBE_MBVFICR_VFREQ_MASK 0x0000FFFF /* bits for VF messages */
+#define IXGBE_MBVFICR_VFREQ_VF1  0x00000001 /* bit for VF 1 message */
+#define IXGBE_MBVFICR_VFACK_MASK 0xFFFF0000 /* bits for VF acks */
+#define IXGBE_MBVFICR_VFACK_VF1  0x00010000 /* bit for VF 1 ack */
+
+
+/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the
+ * PF.  The reverse is true if it is IXGBE_PF_*.
+ * Message ACK's are the value or'd with 0xF0000000
+ */
+#define IXGBE_VT_MSGTYPE_ACK      0x80000000  /* Messages below or'd with
+					       * this are the ACK */
+#define IXGBE_VT_MSGTYPE_NACK     0x40000000  /* Messages below or'd with
+					       * this are the NACK */
+#define IXGBE_VT_MSGTYPE_CTS      0x20000000  /* Indicates that VF is still
+						 clear to send requests */
+#define IXGBE_VT_MSGINFO_SHIFT    16
+/* bits 23:16 are used for exra info for certain messages */
+#define IXGBE_VT_MSGINFO_MASK     (0xFF << IXGBE_VT_MSGINFO_SHIFT)
+
+/* definitions to support mailbox API version negotiation */
+
+/*
+ * Each element denotes a version of the API; existing numbers may not
+ * change; any additions must go at the end
+ */
+enum ixgbe_pfvf_api_rev {
+	ixgbe_mbox_api_10,	/* API version 1.0, linux/freebsd VF driver */
+	ixgbe_mbox_api_20,	/* API version 2.0, solaris Phase1 VF driver */
+	ixgbe_mbox_api_11,	/* API version 1.1, linux/freebsd VF driver */
+	ixgbe_mbox_api_12,	/* API version 1.2, linux/freebsd VF driver */
+	ixgbe_mbox_api_13,	/* API version 1.3, linux/freebsd VF driver */
+	ixgbe_mbox_api_14,	/* API version 1.4, linux/freebsd VF driver */
+	/* This value should always be last */
+	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
+};
+
+/* mailbox API, legacy requests */
+#define IXGBE_VF_RESET            0x01 /* VF requests reset */
+#define IXGBE_VF_SET_MAC_ADDR     0x02 /* VF requests PF to set MAC addr */
+#define IXGBE_VF_SET_MULTICAST    0x03 /* VF requests PF to set MC addr */
+#define IXGBE_VF_SET_VLAN         0x04 /* VF requests PF to set VLAN */
+
+/* mailbox API, version 1.0 VF requests */
+#define IXGBE_VF_SET_LPE	0x05 /* VF requests PF to set VMOLR.LPE */
+#define IXGBE_VF_SET_MACVLAN	0x06 /* VF requests PF for unicast filter */
+#define IXGBE_VF_API_NEGOTIATE	0x08 /* negotiate API version */
+
+/* mailbox API, version 1.1 VF requests */
+#define IXGBE_VF_GET_QUEUES	0x09 /* get queue configuration */
+
+/* GET_QUEUES return data indices within the mailbox */
+#define IXGBE_VF_TX_QUEUES	1	/* number of Tx queues supported */
+#define IXGBE_VF_RX_QUEUES	2	/* number of Rx queues supported */
+#define IXGBE_VF_TRANS_VLAN	3	/* Indication of port vlan */
+#define IXGBE_VF_DEF_QUEUE	4	/* Default queue offset */
+
+/* mailbox API, version 1.2 VF requests */
+#define IXGBE_VF_GET_RETA	0x0a	/* VF request for RETA */
+#define IXGBE_VF_GET_RSS_KEY	0x0b	/* get RSS key */
+
+#define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
+
+/* mailbox API, version 1.4 VF requests */
+#define IXGBE_VF_IPSEC_ADD	0x0d
+#define IXGBE_VF_IPSEC_DEL	0x0e
+
+#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */
+
+/* length of permanent address message returned from PF */
+#define IXGBE_VF_PERMADDR_MSG_LEN 4
+/* word in permanent address message with the current multicast type */
+#define IXGBE_VF_MC_TYPE_WORD     3
+
+#define IXGBE_PF_CONTROL_MSG      0x0100 /* PF control message */
+
+#define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */
+#define IXGBE_VF_MBX_INIT_DELAY   500  /* microseconds between retries */
+
+s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16);
+s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16);
+s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16);
+s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16);
+#ifdef CONFIG_PCI_IOV
+void ixgbe_init_mbx_params_pf(struct ixgbe_hw *);
+#endif /* CONFIG_PCI_IOV */
+
+extern const struct ixgbe_mbx_operations mbx_ops_generic;
+
+#endif /* _IXGBE_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
new file mode 100644
index 0000000000..1e6cf220f5
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_MODEL_H_
+#define _IXGBE_MODEL_H_
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+
+struct ixgbe_mat_field {
+	unsigned int off;
+	int (*val)(struct ixgbe_fdir_filter *input,
+		   union ixgbe_atr_input *mask,
+		   u32 val, u32 m);
+	unsigned int type;
+};
+
+struct ixgbe_jump_table {
+	struct ixgbe_mat_field *mat;
+	struct ixgbe_fdir_filter *input;
+	union ixgbe_atr_input *mask;
+	u32 link_hdl;
+	unsigned long child_loc_map[32];
+};
+
+#define IXGBE_MAX_HW_ENTRIES 2045
+
+static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input,
+				     union ixgbe_atr_input *mask,
+				     u32 val, u32 m)
+{
+	input->filter.formatted.src_ip[0] = (__force __be32)val;
+	mask->formatted.src_ip[0] = (__force __be32)m;
+	return 0;
+}
+
+static inline int ixgbe_mat_prgm_dip(struct ixgbe_fdir_filter *input,
+				     union ixgbe_atr_input *mask,
+				     u32 val, u32 m)
+{
+	input->filter.formatted.dst_ip[0] = (__force __be32)val;
+	mask->formatted.dst_ip[0] = (__force __be32)m;
+	return 0;
+}
+
+static struct ixgbe_mat_field ixgbe_ipv4_fields[] = {
+	{ .off = 12, .val = ixgbe_mat_prgm_sip,
+	  .type = IXGBE_ATR_FLOW_TYPE_IPV4},
+	{ .off = 16, .val = ixgbe_mat_prgm_dip,
+	  .type = IXGBE_ATR_FLOW_TYPE_IPV4},
+	{ .val = NULL } /* terminal node */
+};
+
+static inline int ixgbe_mat_prgm_ports(struct ixgbe_fdir_filter *input,
+				       union ixgbe_atr_input *mask,
+				       u32 val, u32 m)
+{
+	input->filter.formatted.src_port = (__force __be16)(val & 0xffff);
+	mask->formatted.src_port = (__force __be16)(m & 0xffff);
+	input->filter.formatted.dst_port = (__force __be16)(val >> 16);
+	mask->formatted.dst_port = (__force __be16)(m >> 16);
+
+	return 0;
+};
+
+static struct ixgbe_mat_field ixgbe_tcp_fields[] = {
+	{.off = 0, .val = ixgbe_mat_prgm_ports,
+	 .type = IXGBE_ATR_FLOW_TYPE_TCPV4},
+	{ .val = NULL } /* terminal node */
+};
+
+static struct ixgbe_mat_field ixgbe_udp_fields[] = {
+	{.off = 0, .val = ixgbe_mat_prgm_ports,
+	 .type = IXGBE_ATR_FLOW_TYPE_UDPV4},
+	{ .val = NULL } /* terminal node */
+};
+
+struct ixgbe_nexthdr {
+	/* offset, shift, and mask of position to next header */
+	unsigned int o;
+	u32 s;
+	u32 m;
+	/* match criteria to make this jump*/
+	unsigned int off;
+	u32 val;
+	u32 mask;
+	/* location of jump to make */
+	struct ixgbe_mat_field *jump;
+};
+
+static struct ixgbe_nexthdr ixgbe_ipv4_jumps[] = {
+	{ .o = 0, .s = 6, .m = 0xf,
+	  .off = 8, .val = 0x600, .mask = 0xff00, .jump = ixgbe_tcp_fields},
+	{ .o = 0, .s = 6, .m = 0xf,
+	  .off = 8, .val = 0x1100, .mask = 0xff00, .jump = ixgbe_udp_fields},
+	{ .jump = NULL } /* terminal node */
+};
+#endif /* _IXGBE_MODEL_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
new file mode 100644
index 0000000000..689470c1e8
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -0,0 +1,2799 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/sched.h>
+
+#include "ixgbe.h"
+#include "ixgbe_phy.h"
+
+static void ixgbe_i2c_start(struct ixgbe_hw *hw);
+static void ixgbe_i2c_stop(struct ixgbe_hw *hw);
+static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data);
+static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data);
+static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw);
+static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data);
+static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data);
+static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
+static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl);
+static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data);
+static bool ixgbe_get_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl);
+static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw);
+static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id);
+static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw);
+static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw);
+
+/**
+ *  ixgbe_out_i2c_byte_ack - Send I2C byte with ack
+ *  @hw: pointer to the hardware structure
+ *  @byte: byte to send
+ *
+ *  Returns an error code on error.
+ **/
+static s32 ixgbe_out_i2c_byte_ack(struct ixgbe_hw *hw, u8 byte)
+{
+	s32 status;
+
+	status = ixgbe_clock_out_i2c_byte(hw, byte);
+	if (status)
+		return status;
+	return ixgbe_get_i2c_ack(hw);
+}
+
+/**
+ *  ixgbe_in_i2c_byte_ack - Receive an I2C byte and send ack
+ *  @hw: pointer to the hardware structure
+ *  @byte: pointer to a u8 to receive the byte
+ *
+ *  Returns an error code on error.
+ **/
+static s32 ixgbe_in_i2c_byte_ack(struct ixgbe_hw *hw, u8 *byte)
+{
+	s32 status;
+
+	status = ixgbe_clock_in_i2c_byte(hw, byte);
+	if (status)
+		return status;
+	/* ACK */
+	return ixgbe_clock_out_i2c_bit(hw, false);
+}
+
+/**
+ *  ixgbe_ones_comp_byte_add - Perform one's complement addition
+ *  @add1: addend 1
+ *  @add2: addend 2
+ *
+ *  Returns one's complement 8-bit sum.
+ **/
+static u8 ixgbe_ones_comp_byte_add(u8 add1, u8 add2)
+{
+	u16 sum = add1 + add2;
+
+	sum = (sum & 0xFF) + (sum >> 8);
+	return sum & 0xFF;
+}
+
+/**
+ *  ixgbe_read_i2c_combined_generic_int - Perform I2C read combined operation
+ *  @hw: pointer to the hardware structure
+ *  @addr: I2C bus address to read from
+ *  @reg: I2C device register to read from
+ *  @val: pointer to location to receive read value
+ *  @lock: true if to take and release semaphore
+ *
+ *  Returns an error code on error.
+ */
+s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+					u16 reg, u16 *val, bool lock)
+{
+	u32 swfw_mask = hw->phy.phy_semaphore_mask;
+	int max_retry = 3;
+	int retry = 0;
+	u8 csum_byte;
+	u8 high_bits;
+	u8 low_bits;
+	u8 reg_high;
+	u8 csum;
+
+	reg_high = ((reg >> 7) & 0xFE) | 1;     /* Indicate read combined */
+	csum = ixgbe_ones_comp_byte_add(reg_high, reg & 0xFF);
+	csum = ~csum;
+	do {
+		if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+			return IXGBE_ERR_SWFW_SYNC;
+		ixgbe_i2c_start(hw);
+		/* Device Address and write indication */
+		if (ixgbe_out_i2c_byte_ack(hw, addr))
+			goto fail;
+		/* Write bits 14:8 */
+		if (ixgbe_out_i2c_byte_ack(hw, reg_high))
+			goto fail;
+		/* Write bits 7:0 */
+		if (ixgbe_out_i2c_byte_ack(hw, reg & 0xFF))
+			goto fail;
+		/* Write csum */
+		if (ixgbe_out_i2c_byte_ack(hw, csum))
+			goto fail;
+		/* Re-start condition */
+		ixgbe_i2c_start(hw);
+		/* Device Address and read indication */
+		if (ixgbe_out_i2c_byte_ack(hw, addr | 1))
+			goto fail;
+		/* Get upper bits */
+		if (ixgbe_in_i2c_byte_ack(hw, &high_bits))
+			goto fail;
+		/* Get low bits */
+		if (ixgbe_in_i2c_byte_ack(hw, &low_bits))
+			goto fail;
+		/* Get csum */
+		if (ixgbe_clock_in_i2c_byte(hw, &csum_byte))
+			goto fail;
+		/* NACK */
+		if (ixgbe_clock_out_i2c_bit(hw, false))
+			goto fail;
+		ixgbe_i2c_stop(hw);
+		if (lock)
+			hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		*val = (high_bits << 8) | low_bits;
+		return 0;
+
+fail:
+		ixgbe_i2c_bus_clear(hw);
+		if (lock)
+			hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		retry++;
+		if (retry < max_retry)
+			hw_dbg(hw, "I2C byte read combined error - Retry.\n");
+		else
+			hw_dbg(hw, "I2C byte read combined error.\n");
+	} while (retry < max_retry);
+
+	return IXGBE_ERR_I2C;
+}
+
+/**
+ *  ixgbe_write_i2c_combined_generic_int - Perform I2C write combined operation
+ *  @hw: pointer to the hardware structure
+ *  @addr: I2C bus address to write to
+ *  @reg: I2C device register to write to
+ *  @val: value to write
+ *  @lock: true if to take and release semaphore
+ *
+ *  Returns an error code on error.
+ */
+s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+					 u16 reg, u16 val, bool lock)
+{
+	u32 swfw_mask = hw->phy.phy_semaphore_mask;
+	int max_retry = 1;
+	int retry = 0;
+	u8 reg_high;
+	u8 csum;
+
+	reg_high = (reg >> 7) & 0xFE;   /* Indicate write combined */
+	csum = ixgbe_ones_comp_byte_add(reg_high, reg & 0xFF);
+	csum = ixgbe_ones_comp_byte_add(csum, val >> 8);
+	csum = ixgbe_ones_comp_byte_add(csum, val & 0xFF);
+	csum = ~csum;
+	do {
+		if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+			return IXGBE_ERR_SWFW_SYNC;
+		ixgbe_i2c_start(hw);
+		/* Device Address and write indication */
+		if (ixgbe_out_i2c_byte_ack(hw, addr))
+			goto fail;
+		/* Write bits 14:8 */
+		if (ixgbe_out_i2c_byte_ack(hw, reg_high))
+			goto fail;
+		/* Write bits 7:0 */
+		if (ixgbe_out_i2c_byte_ack(hw, reg & 0xFF))
+			goto fail;
+		/* Write data 15:8 */
+		if (ixgbe_out_i2c_byte_ack(hw, val >> 8))
+			goto fail;
+		/* Write data 7:0 */
+		if (ixgbe_out_i2c_byte_ack(hw, val & 0xFF))
+			goto fail;
+		/* Write csum */
+		if (ixgbe_out_i2c_byte_ack(hw, csum))
+			goto fail;
+		ixgbe_i2c_stop(hw);
+		if (lock)
+			hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		return 0;
+
+fail:
+		ixgbe_i2c_bus_clear(hw);
+		if (lock)
+			hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		retry++;
+		if (retry < max_retry)
+			hw_dbg(hw, "I2C byte write combined error - Retry.\n");
+		else
+			hw_dbg(hw, "I2C byte write combined error.\n");
+	} while (retry < max_retry);
+
+	return IXGBE_ERR_I2C;
+}
+
+/**
+ *  ixgbe_probe_phy - Probe a single address for a PHY
+ *  @hw: pointer to hardware structure
+ *  @phy_addr: PHY address to probe
+ *
+ *  Returns true if PHY found
+ **/
+static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr)
+{
+	u16 ext_ability = 0;
+
+	hw->phy.mdio.prtad = phy_addr;
+	if (mdio45_probe(&hw->phy.mdio, phy_addr) != 0)
+		return false;
+
+	if (ixgbe_get_phy_id(hw))
+		return false;
+
+	hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id);
+
+	if (hw->phy.type == ixgbe_phy_unknown) {
+		hw->phy.ops.read_reg(hw,
+				     MDIO_PMA_EXTABLE,
+				     MDIO_MMD_PMAPMD,
+				     &ext_ability);
+		if (ext_ability &
+		    (MDIO_PMA_EXTABLE_10GBT |
+		     MDIO_PMA_EXTABLE_1000BT))
+			hw->phy.type = ixgbe_phy_cu_unknown;
+		else
+			hw->phy.type = ixgbe_phy_generic;
+	}
+
+	return true;
+}
+
+/**
+ *  ixgbe_identify_phy_generic - Get physical layer module
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines the physical layer module found on the current adapter.
+ **/
+s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
+{
+	u32 phy_addr;
+	u32 status = IXGBE_ERR_PHY_ADDR_INVALID;
+
+	if (!hw->phy.phy_semaphore_mask) {
+		if (hw->bus.lan_id)
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
+		else
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
+	}
+
+	if (hw->phy.type != ixgbe_phy_unknown)
+		return 0;
+
+	if (hw->phy.nw_mng_if_sel) {
+		phy_addr = (hw->phy.nw_mng_if_sel &
+			    IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+			   IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+		if (ixgbe_probe_phy(hw, phy_addr))
+			return 0;
+		else
+			return IXGBE_ERR_PHY_ADDR_INVALID;
+	}
+
+	for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
+		if (ixgbe_probe_phy(hw, phy_addr)) {
+			status = 0;
+			break;
+		}
+	}
+
+	/* Certain media types do not have a phy so an address will not
+	 * be found and the code will take this path.  Caller has to
+	 * decide if it is an error or not.
+	 */
+	if (status)
+		hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
+
+	return status;
+}
+
+/**
+ * ixgbe_check_reset_blocked - check status of MNG FW veto bit
+ * @hw: pointer to the hardware structure
+ *
+ * This function checks the MMNGC.MNG_VETO bit to see if there are
+ * any constraints on link from manageability.  For MAC's that don't
+ * have this bit just return false since the link can not be blocked
+ * via this method.
+ **/
+bool ixgbe_check_reset_blocked(struct ixgbe_hw *hw)
+{
+	u32 mmngc;
+
+	/* If we don't have this bit, it can't be blocking */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return false;
+
+	mmngc = IXGBE_READ_REG(hw, IXGBE_MMNGC);
+	if (mmngc & IXGBE_MMNGC_MNG_VETO) {
+		hw_dbg(hw, "MNG_VETO bit detected.\n");
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ *  ixgbe_get_phy_id - Get the phy type
+ *  @hw: pointer to hardware structure
+ *
+ **/
+static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 phy_id_high = 0;
+	u16 phy_id_low = 0;
+
+	status = hw->phy.ops.read_reg(hw, MDIO_DEVID1, MDIO_MMD_PMAPMD,
+				      &phy_id_high);
+
+	if (!status) {
+		hw->phy.id = (u32)(phy_id_high << 16);
+		status = hw->phy.ops.read_reg(hw, MDIO_DEVID2, MDIO_MMD_PMAPMD,
+					      &phy_id_low);
+		hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK);
+		hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK);
+	}
+	return status;
+}
+
+/**
+ *  ixgbe_get_phy_type_from_id - Get the phy type
+ *  @phy_id: hardware phy id
+ *
+ **/
+static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
+{
+	enum ixgbe_phy_type phy_type;
+
+	switch (phy_id) {
+	case TN1010_PHY_ID:
+		phy_type = ixgbe_phy_tn;
+		break;
+	case X550_PHY_ID2:
+	case X550_PHY_ID3:
+	case X540_PHY_ID:
+		phy_type = ixgbe_phy_aq;
+		break;
+	case QT2022_PHY_ID:
+		phy_type = ixgbe_phy_qt;
+		break;
+	case ATH_PHY_ID:
+		phy_type = ixgbe_phy_nl;
+		break;
+	case X557_PHY_ID:
+	case X557_PHY_ID2:
+		phy_type = ixgbe_phy_x550em_ext_t;
+		break;
+	case BCM54616S_E_PHY_ID:
+		phy_type = ixgbe_phy_ext_1g_t;
+		break;
+	default:
+		phy_type = ixgbe_phy_unknown;
+		break;
+	}
+
+	return phy_type;
+}
+
+/**
+ *  ixgbe_reset_phy_generic - Performs a PHY reset
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
+{
+	u32 i;
+	u16 ctrl = 0;
+	s32 status = 0;
+
+	if (hw->phy.type == ixgbe_phy_unknown)
+		status = ixgbe_identify_phy_generic(hw);
+
+	if (status != 0 || hw->phy.type == ixgbe_phy_none)
+		return status;
+
+	/* Don't reset PHY if it's shut down due to overtemp. */
+	if (!hw->phy.reset_if_overtemp &&
+	    (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw)))
+		return 0;
+
+	/* Blocked by MNG FW so bail */
+	if (ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	/*
+	 * Perform soft PHY reset to the PHY_XS.
+	 * This will cause a soft reset to the PHY
+	 */
+	hw->phy.ops.write_reg(hw, MDIO_CTRL1,
+			      MDIO_MMD_PHYXS,
+			      MDIO_CTRL1_RESET);
+
+	/*
+	 * Poll for reset bit to self-clear indicating reset is complete.
+	 * Some PHYs could take up to 3 seconds to complete and need about
+	 * 1.7 usec delay after the reset is complete.
+	 */
+	for (i = 0; i < 30; i++) {
+		msleep(100);
+		if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
+			status = hw->phy.ops.read_reg(hw,
+						  IXGBE_MDIO_TX_VENDOR_ALARMS_3,
+						  MDIO_MMD_PMAPMD, &ctrl);
+			if (status)
+				return status;
+
+			if (ctrl & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) {
+				udelay(2);
+				break;
+			}
+		} else {
+			status = hw->phy.ops.read_reg(hw, MDIO_CTRL1,
+						      MDIO_MMD_PHYXS, &ctrl);
+			if (status)
+				return status;
+
+			if (!(ctrl & MDIO_CTRL1_RESET)) {
+				udelay(2);
+				break;
+			}
+		}
+	}
+
+	if (ctrl & MDIO_CTRL1_RESET) {
+		hw_dbg(hw, "PHY reset polling failed to complete.\n");
+		return IXGBE_ERR_RESET_FAILED;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_phy_reg_mdi - read PHY register
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit address of PHY register to read
+ *  @device_type: 5 bit device type
+ *  @phy_data: Pointer to read data from PHY register
+ *
+ *  Reads a value from a specified PHY register without the SWFW lock
+ **/
+s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type,
+		       u16 *phy_data)
+{
+	u32 i, data, command;
+
+	/* Setup and write the address cycle command */
+	command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+		   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+		   (hw->phy.mdio.prtad << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+		   (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
+
+	IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+	/* Check every 10 usec to see if the address cycle completed.
+	 * The MDI Command bit will clear when the operation is
+	 * complete
+	 */
+	for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+		udelay(10);
+
+		command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+		if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+				break;
+	}
+
+
+	if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+		hw_dbg(hw, "PHY address command did not complete.\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	/* Address cycle complete, setup and write the read
+	 * command
+	 */
+	command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+		   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+		   (hw->phy.mdio.prtad << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+		   (IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND));
+
+	IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+	/* Check every 10 usec to see if the address cycle
+	 * completed. The MDI Command bit will clear when the
+	 * operation is complete
+	 */
+	for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+		udelay(10);
+
+		command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+		if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+			break;
+	}
+
+	if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+		hw_dbg(hw, "PHY read command didn't complete\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	/* Read operation is complete.  Get the data
+	 * from MSRWD
+	 */
+	data = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+	data >>= IXGBE_MSRWD_READ_DATA_SHIFT;
+	*phy_data = (u16)(data);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_phy_reg_generic - Reads a value from a specified PHY register
+ *  using the SWFW lock - this function is needed in most cases
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit address of PHY register to read
+ *  @device_type: 5 bit device type
+ *  @phy_data: Pointer to read data from PHY register
+ **/
+s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+			       u32 device_type, u16 *phy_data)
+{
+	s32 status;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr) == 0) {
+		status = ixgbe_read_phy_reg_mdi(hw, reg_addr, device_type,
+						phy_data);
+		hw->mac.ops.release_swfw_sync(hw, gssr);
+	} else {
+		return IXGBE_ERR_SWFW_SYNC;
+	}
+
+	return status;
+}
+
+/**
+ *  ixgbe_write_phy_reg_mdi - Writes a value to specified PHY register
+ *  without SWFW lock
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @device_type: 5 bit device type
+ *  @phy_data: Data to write to the PHY register
+ **/
+s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
+				u32 device_type, u16 phy_data)
+{
+	u32 i, command;
+
+	/* Put the data in the MDI single read and write data register*/
+	IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data);
+
+	/* Setup and write the address cycle command */
+	command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+		   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+		   (hw->phy.mdio.prtad << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+		   (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND));
+
+	IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+	/*
+	 * Check every 10 usec to see if the address cycle completed.
+	 * The MDI Command bit will clear when the operation is
+	 * complete
+	 */
+	for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+		udelay(10);
+
+		command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+		if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+			break;
+	}
+
+	if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+		hw_dbg(hw, "PHY address cmd didn't complete\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	/*
+	 * Address cycle complete, setup and write the write
+	 * command
+	 */
+	command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT)  |
+		   (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) |
+		   (hw->phy.mdio.prtad << IXGBE_MSCA_PHY_ADDR_SHIFT) |
+		   (IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND));
+
+	IXGBE_WRITE_REG(hw, IXGBE_MSCA, command);
+
+	/* Check every 10 usec to see if the address cycle
+	 * completed. The MDI Command bit will clear when the
+	 * operation is complete
+	 */
+	for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+		udelay(10);
+
+		command = IXGBE_READ_REG(hw, IXGBE_MSCA);
+		if ((command & IXGBE_MSCA_MDI_COMMAND) == 0)
+			break;
+	}
+
+	if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) {
+		hw_dbg(hw, "PHY write cmd didn't complete\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_write_phy_reg_generic - Writes a value to specified PHY register
+ *  using SWFW lock- this function is needed in most cases
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @device_type: 5 bit device type
+ *  @phy_data: Data to write to the PHY register
+ **/
+s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+				u32 device_type, u16 phy_data)
+{
+	s32 status;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr) == 0) {
+		status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type,
+						 phy_data);
+		hw->mac.ops.release_swfw_sync(hw, gssr);
+	} else {
+		return IXGBE_ERR_SWFW_SYNC;
+	}
+
+	return status;
+}
+
+#define IXGBE_HW_READ_REG(addr) IXGBE_READ_REG(hw, addr)
+
+/**
+ *  ixgbe_msca_cmd - Write the command register and poll for completion/timeout
+ *  @hw: pointer to hardware structure
+ *  @cmd: command register value to write
+ **/
+static s32 ixgbe_msca_cmd(struct ixgbe_hw *hw, u32 cmd)
+{
+	IXGBE_WRITE_REG(hw, IXGBE_MSCA, cmd);
+
+	return readx_poll_timeout(IXGBE_HW_READ_REG, IXGBE_MSCA, cmd,
+				  !(cmd & IXGBE_MSCA_MDI_COMMAND), 10,
+				  10 * IXGBE_MDIO_COMMAND_TIMEOUT);
+}
+
+/**
+ *  ixgbe_mii_bus_read_generic_c22 - Read a clause 22 register with gssr flags
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ *  @gssr: semaphore flags to acquire
+ **/
+static s32 ixgbe_mii_bus_read_generic_c22(struct ixgbe_hw *hw, int addr,
+					  int regnum, u32 gssr)
+{
+	u32 hwaddr, cmd;
+	s32 data;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
+		return -EBUSY;
+
+	hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT;
+	hwaddr |= (regnum & GENMASK(5, 0)) << IXGBE_MSCA_DEV_TYPE_SHIFT;
+	cmd = hwaddr | IXGBE_MSCA_OLD_PROTOCOL |
+		IXGBE_MSCA_READ_AUTOINC | IXGBE_MSCA_MDI_COMMAND;
+
+	data = ixgbe_msca_cmd(hw, cmd);
+	if (data < 0)
+		goto mii_bus_read_done;
+
+	data = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+	data = (data >> IXGBE_MSRWD_READ_DATA_SHIFT) & GENMASK(16, 0);
+
+mii_bus_read_done:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return data;
+}
+
+/**
+ *  ixgbe_mii_bus_read_generic_c45 - Read a clause 45 register with gssr flags
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @devad: device address to read
+ *  @regnum: register number
+ *  @gssr: semaphore flags to acquire
+ **/
+static s32 ixgbe_mii_bus_read_generic_c45(struct ixgbe_hw *hw, int addr,
+					  int devad, int regnum, u32 gssr)
+{
+	u32 hwaddr, cmd;
+	s32 data;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
+		return -EBUSY;
+
+	hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT;
+	hwaddr |= devad << 16 | regnum;
+	cmd = hwaddr | IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND;
+
+	data = ixgbe_msca_cmd(hw, cmd);
+	if (data < 0)
+		goto mii_bus_read_done;
+
+	cmd = hwaddr | IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND;
+	data = ixgbe_msca_cmd(hw, cmd);
+	if (data < 0)
+		goto mii_bus_read_done;
+
+	data = IXGBE_READ_REG(hw, IXGBE_MSRWD);
+	data = (data >> IXGBE_MSRWD_READ_DATA_SHIFT) & GENMASK(16, 0);
+
+mii_bus_read_done:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return data;
+}
+
+/**
+ *  ixgbe_mii_bus_write_generic_c22 - Write a clause 22 register with gssr flags
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @regnum: register number
+ *  @val: value to write
+ *  @gssr: semaphore flags to acquire
+ **/
+static s32 ixgbe_mii_bus_write_generic_c22(struct ixgbe_hw *hw, int addr,
+					   int regnum, u16 val, u32 gssr)
+{
+	u32 hwaddr, cmd;
+	s32 err;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
+		return -EBUSY;
+
+	IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)val);
+
+	hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT;
+	hwaddr |= (regnum & GENMASK(5, 0)) << IXGBE_MSCA_DEV_TYPE_SHIFT;
+	cmd = hwaddr | IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE |
+		IXGBE_MSCA_MDI_COMMAND;
+
+	err = ixgbe_msca_cmd(hw, cmd);
+
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return err;
+}
+
+/**
+ *  ixgbe_mii_bus_write_generic_c45 - Write a clause 45 register with gssr flags
+ *  @hw: pointer to hardware structure
+ *  @addr: address
+ *  @devad: device address to read
+ *  @regnum: register number
+ *  @val: value to write
+ *  @gssr: semaphore flags to acquire
+ **/
+static s32 ixgbe_mii_bus_write_generic_c45(struct ixgbe_hw *hw, int addr,
+					   int devad, int regnum, u16 val,
+					   u32 gssr)
+{
+	u32 hwaddr, cmd;
+	s32 err;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, gssr))
+		return -EBUSY;
+
+	IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)val);
+
+	hwaddr = addr << IXGBE_MSCA_PHY_ADDR_SHIFT;
+	hwaddr |= devad << 16 | regnum;
+	cmd = hwaddr | IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND;
+
+	err = ixgbe_msca_cmd(hw, cmd);
+	if (err < 0)
+		goto mii_bus_write_done;
+
+	cmd = hwaddr | IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND;
+	err = ixgbe_msca_cmd(hw, cmd);
+
+mii_bus_write_done:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return err;
+}
+
+/**
+ *  ixgbe_mii_bus_read_c22 - Read a clause 22 register
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @addr: address
+ *  @regnum: register number
+ **/
+static s32 ixgbe_mii_bus_read_c22(struct mii_bus *bus, int addr, int regnum)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	return ixgbe_mii_bus_read_generic_c22(hw, addr, regnum, gssr);
+}
+
+/**
+ *  ixgbe_mii_bus_read_c45 - Read a clause 45 register
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @devad: device address to read
+ *  @addr: address
+ *  @regnum: register number
+ **/
+static s32 ixgbe_mii_bus_read_c45(struct mii_bus *bus, int devad, int addr,
+				  int regnum)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	return ixgbe_mii_bus_read_generic_c45(hw, addr, devad, regnum, gssr);
+}
+
+/**
+ *  ixgbe_mii_bus_write_c22 - Write a clause 22 register
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @addr: address
+ *  @regnum: register number
+ *  @val: value to write
+ **/
+static s32 ixgbe_mii_bus_write_c22(struct mii_bus *bus, int addr, int regnum,
+				   u16 val)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	return ixgbe_mii_bus_write_generic_c22(hw, addr, regnum, val, gssr);
+}
+
+/**
+ *  ixgbe_mii_bus_write_c45 - Write a clause 45 register
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @addr: address
+ *  @devad: device address to read
+ *  @regnum: register number
+ *  @val: value to write
+ **/
+static s32 ixgbe_mii_bus_write_c45(struct mii_bus *bus, int addr, int devad,
+				   int regnum, u16 val)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	return ixgbe_mii_bus_write_generic_c45(hw, addr, devad, regnum, val,
+					       gssr);
+}
+
+/**
+ *  ixgbe_x550em_a_mii_bus_read_c22 - Read a clause 22 register on x550em_a
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @addr: address
+ *  @regnum: register number
+ **/
+static s32 ixgbe_x550em_a_mii_bus_read_c22(struct mii_bus *bus, int addr,
+					   int regnum)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM;
+	return ixgbe_mii_bus_read_generic_c22(hw, addr, regnum, gssr);
+}
+
+/**
+ *  ixgbe_x550em_a_mii_bus_read_c45 - Read a clause 45 register on x550em_a
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @addr: address
+ *  @devad: device address to read
+ *  @regnum: register number
+ **/
+static s32 ixgbe_x550em_a_mii_bus_read_c45(struct mii_bus *bus, int addr,
+					   int devad, int regnum)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM;
+	return ixgbe_mii_bus_read_generic_c45(hw, addr, devad, regnum, gssr);
+}
+
+/**
+ *  ixgbe_x550em_a_mii_bus_write_c22 - Write a clause 22 register on x550em_a
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @addr: address
+ *  @regnum: register number
+ *  @val: value to write
+ **/
+static s32 ixgbe_x550em_a_mii_bus_write_c22(struct mii_bus *bus, int addr,
+					    int regnum, u16 val)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM;
+	return ixgbe_mii_bus_write_generic_c22(hw, addr, regnum, val, gssr);
+}
+
+/**
+ *  ixgbe_x550em_a_mii_bus_write_c45 - Write a clause 45 register on x550em_a
+ *  @bus: pointer to mii_bus structure which points to our driver private
+ *  @addr: address
+ *  @devad: device address to read
+ *  @regnum: register number
+ *  @val: value to write
+ **/
+static s32 ixgbe_x550em_a_mii_bus_write_c45(struct mii_bus *bus, int addr,
+					    int devad, int regnum, u16 val)
+{
+	struct ixgbe_adapter *adapter = bus->priv;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 gssr = hw->phy.phy_semaphore_mask;
+
+	gssr |= IXGBE_GSSR_TOKEN_SM | IXGBE_GSSR_PHY0_SM;
+	return ixgbe_mii_bus_write_generic_c45(hw, addr, devad, regnum, val,
+					       gssr);
+}
+
+/**
+ * ixgbe_get_first_secondary_devfn - get first device downstream of root port
+ * @devfn: PCI_DEVFN of root port on domain 0, bus 0
+ *
+ * Returns pci_dev pointer to PCI_DEVFN(0, 0) on subordinate side of root
+ * on domain 0, bus 0, devfn = 'devfn'
+ **/
+static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn)
+{
+	struct pci_dev *rp_pdev;
+	int bus;
+
+	rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn);
+	if (rp_pdev && rp_pdev->subordinate) {
+		bus = rp_pdev->subordinate->number;
+		pci_dev_put(rp_pdev);
+		return pci_get_domain_bus_and_slot(0, bus, 0);
+	}
+
+	pci_dev_put(rp_pdev);
+	return NULL;
+}
+
+/**
+ * ixgbe_x550em_a_has_mii - is this the first ixgbe x550em_a PCI function?
+ * @hw: pointer to hardware structure
+ *
+ * Returns true if hw points to lowest numbered PCI B:D.F x550_em_a device in
+ * the SoC.  There are up to 4 MACs sharing a single MDIO bus on the x550em_a,
+ * but we only want to register one MDIO bus.
+ **/
+static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+	struct pci_dev *pdev = adapter->pdev;
+	struct pci_dev *func0_pdev;
+	bool has_mii = false;
+
+	/* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices
+	 * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0
+	 * It's not valid for function 0 to be disabled and function 1 is up,
+	 * so the lowest numbered ixgbe dev will be device 0 function 0 on one
+	 * of those two root ports
+	 */
+	func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0));
+	if (func0_pdev) {
+		if (func0_pdev == pdev)
+			has_mii = true;
+		goto out;
+	}
+	func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0));
+	if (func0_pdev == pdev)
+		has_mii = true;
+
+out:
+	pci_dev_put(func0_pdev);
+	return has_mii;
+}
+
+/**
+ * ixgbe_mii_bus_init - mii_bus structure setup
+ * @hw: pointer to hardware structure
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ixgbe_mii_bus_init initializes a mii_bus structure in adapter
+ **/
+s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw)
+{
+	s32 (*write_c22)(struct mii_bus *bus, int addr, int regnum, u16 val);
+	s32 (*read_c22)(struct mii_bus *bus, int addr, int regnum);
+	s32 (*write_c45)(struct mii_bus *bus, int addr, int devad, int regnum,
+			 u16 val);
+	s32 (*read_c45)(struct mii_bus *bus, int addr, int devad, int regnum);
+	struct ixgbe_adapter *adapter = hw->back;
+	struct pci_dev *pdev = adapter->pdev;
+	struct device *dev = &adapter->netdev->dev;
+	struct mii_bus *bus;
+
+	switch (hw->device_id) {
+	/* C3000 SoCs */
+	case IXGBE_DEV_ID_X550EM_A_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR_L:
+	case IXGBE_DEV_ID_X550EM_A_SFP_N:
+	case IXGBE_DEV_ID_X550EM_A_SGMII:
+	case IXGBE_DEV_ID_X550EM_A_SGMII_L:
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_SFP:
+	case IXGBE_DEV_ID_X550EM_A_1G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+		if (!ixgbe_x550em_a_has_mii(hw))
+			return 0;
+		read_c22 = ixgbe_x550em_a_mii_bus_read_c22;
+		write_c22 = ixgbe_x550em_a_mii_bus_write_c22;
+		read_c45 = ixgbe_x550em_a_mii_bus_read_c45;
+		write_c45 = ixgbe_x550em_a_mii_bus_write_c45;
+		break;
+	default:
+		read_c22 = ixgbe_mii_bus_read_c22;
+		write_c22 = ixgbe_mii_bus_write_c22;
+		read_c45 = ixgbe_mii_bus_read_c45;
+		write_c45 = ixgbe_mii_bus_write_c45;
+		break;
+	}
+
+	bus = devm_mdiobus_alloc(dev);
+	if (!bus)
+		return -ENOMEM;
+
+	bus->read = read_c22;
+	bus->write = write_c22;
+	bus->read_c45 = read_c45;
+	bus->write_c45 = write_c45;
+
+	/* Use the position of the device in the PCI hierarchy as the id */
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mdio-%s", ixgbe_driver_name,
+		 pci_name(pdev));
+
+	bus->name = "ixgbe-mdio";
+	bus->priv = adapter;
+	bus->parent = dev;
+	bus->phy_mask = GENMASK(31, 0);
+
+	/* Support clause 22/45 natively.  ixgbe_probe() sets MDIO_EMULATE_C22
+	 * unfortunately that causes some clause 22 frames to be sent with
+	 * clause 45 addressing.  We don't want that.
+	 */
+	hw->phy.mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_SUPPORTS_C22;
+
+	adapter->mii_bus = bus;
+	return mdiobus_register(bus);
+}
+
+/**
+ *  ixgbe_setup_phy_link_generic - Set and restart autoneg
+ *  @hw: pointer to hardware structure
+ *
+ *  Restart autonegotiation and PHY and waits for completion.
+ **/
+s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
+	bool autoneg = false;
+	ixgbe_link_speed speed;
+
+	ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
+
+	/* Set or unset auto-negotiation 10G advertisement */
+	hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, &autoneg_reg);
+
+	autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G;
+	if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) &&
+	    (speed & IXGBE_LINK_SPEED_10GB_FULL))
+		autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G;
+
+	hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, autoneg_reg);
+
+	hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+			     MDIO_MMD_AN, &autoneg_reg);
+
+	if (hw->mac.type == ixgbe_mac_X550) {
+		/* Set or unset auto-negotiation 5G advertisement */
+		autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE;
+		if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) &&
+		    (speed & IXGBE_LINK_SPEED_5GB_FULL))
+			autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE;
+
+		/* Set or unset auto-negotiation 2.5G advertisement */
+		autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE;
+		if ((hw->phy.autoneg_advertised &
+		     IXGBE_LINK_SPEED_2_5GB_FULL) &&
+		    (speed & IXGBE_LINK_SPEED_2_5GB_FULL))
+			autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE;
+	}
+
+	/* Set or unset auto-negotiation 1G advertisement */
+	autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
+	if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) &&
+	    (speed & IXGBE_LINK_SPEED_1GB_FULL))
+		autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
+
+	hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+			      MDIO_MMD_AN, autoneg_reg);
+
+	/* Set or unset auto-negotiation 100M advertisement */
+	hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, &autoneg_reg);
+
+	autoneg_reg &= ~(ADVERTISE_100FULL | ADVERTISE_100HALF);
+	if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) &&
+	    (speed & IXGBE_LINK_SPEED_100_FULL))
+		autoneg_reg |= ADVERTISE_100FULL;
+
+	hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, autoneg_reg);
+
+	/* Blocked by MNG FW so don't reset PHY */
+	if (ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	/* Restart PHY autonegotiation and wait for completion */
+	hw->phy.ops.read_reg(hw, MDIO_CTRL1,
+			     MDIO_MMD_AN, &autoneg_reg);
+
+	autoneg_reg |= MDIO_AN_CTRL1_RESTART;
+
+	hw->phy.ops.write_reg(hw, MDIO_CTRL1,
+			      MDIO_MMD_AN, autoneg_reg);
+
+	return status;
+}
+
+/**
+ *  ixgbe_setup_phy_link_speed_generic - Sets the auto advertised capabilities
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: unused
+ **/
+s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+				       ixgbe_link_speed speed,
+				       bool autoneg_wait_to_complete)
+{
+	/* Clear autoneg_advertised and set new values based on input link
+	 * speed.
+	 */
+	hw->phy.autoneg_advertised = 0;
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_5GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_5GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_2_5GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_2_5GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_100_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL;
+
+	if (speed & IXGBE_LINK_SPEED_10_FULL)
+		hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10_FULL;
+
+	/* Setup link based on the new speed settings */
+	if (hw->phy.ops.setup_link)
+		hw->phy.ops.setup_link(hw);
+
+	return 0;
+}
+
+/**
+ * ixgbe_get_copper_speeds_supported - Get copper link speed from phy
+ * @hw: pointer to hardware structure
+ *
+ * Determines the supported link capabilities by reading the PHY auto
+ * negotiation register.
+ */
+static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw)
+{
+	u16 speed_ability;
+	s32 status;
+
+	status = hw->phy.ops.read_reg(hw, MDIO_SPEED, MDIO_MMD_PMAPMD,
+				      &speed_ability);
+	if (status)
+		return status;
+
+	if (speed_ability & MDIO_SPEED_10G)
+		hw->phy.speeds_supported |= IXGBE_LINK_SPEED_10GB_FULL;
+	if (speed_ability & MDIO_PMA_SPEED_1000)
+		hw->phy.speeds_supported |= IXGBE_LINK_SPEED_1GB_FULL;
+	if (speed_ability & MDIO_PMA_SPEED_100)
+		hw->phy.speeds_supported |= IXGBE_LINK_SPEED_100_FULL;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X550:
+		hw->phy.speeds_supported |= IXGBE_LINK_SPEED_2_5GB_FULL;
+		hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL;
+		break;
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_get_copper_link_capabilities_generic - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: boolean auto-negotiation value
+ */
+s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+					       ixgbe_link_speed *speed,
+					       bool *autoneg)
+{
+	s32 status = 0;
+
+	*autoneg = true;
+	if (!hw->phy.speeds_supported)
+		status = ixgbe_get_copper_speeds_supported(hw);
+
+	*speed = hw->phy.speeds_supported;
+	return status;
+}
+
+/**
+ *  ixgbe_check_phy_link_tnx - Determine link and speed status
+ *  @hw: pointer to hardware structure
+ *  @speed: link speed
+ *  @link_up: status of link
+ *
+ *  Reads the VS1 register to determine if link is up and the current speed for
+ *  the PHY.
+ **/
+s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
+			     bool *link_up)
+{
+	s32 status;
+	u32 time_out;
+	u32 max_time_out = 10;
+	u16 phy_link = 0;
+	u16 phy_speed = 0;
+	u16 phy_data = 0;
+
+	/* Initialize speed and link to default case */
+	*link_up = false;
+	*speed = IXGBE_LINK_SPEED_10GB_FULL;
+
+	/*
+	 * Check current speed and link status of the PHY register.
+	 * This is a vendor specific register and may have to
+	 * be changed for other copper PHYs.
+	 */
+	for (time_out = 0; time_out < max_time_out; time_out++) {
+		udelay(10);
+		status = hw->phy.ops.read_reg(hw,
+					      MDIO_STAT1,
+					      MDIO_MMD_VEND1,
+					      &phy_data);
+		phy_link = phy_data &
+			    IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS;
+		phy_speed = phy_data &
+			    IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS;
+		if (phy_link == IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS) {
+			*link_up = true;
+			if (phy_speed ==
+			    IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS)
+				*speed = IXGBE_LINK_SPEED_1GB_FULL;
+			break;
+		}
+	}
+
+	return status;
+}
+
+/**
+ *	ixgbe_setup_phy_link_tnx - Set and restart autoneg
+ *	@hw: pointer to hardware structure
+ *
+ *	Restart autonegotiation and PHY and waits for completion.
+ *      This function always returns success, this is nessary since
+ *	it is called via a function pointer that could call other
+ *	functions that could return an error.
+ **/
+s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw)
+{
+	u16 autoneg_reg = IXGBE_MII_AUTONEG_REG;
+	bool autoneg = false;
+	ixgbe_link_speed speed;
+
+	ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
+
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
+		/* Set or unset auto-negotiation 10G advertisement */
+		hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL,
+				     MDIO_MMD_AN,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G;
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
+			autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G;
+
+		hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL,
+				      MDIO_MMD_AN,
+				      autoneg_reg);
+	}
+
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
+		/* Set or unset auto-negotiation 1G advertisement */
+		hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
+				     MDIO_MMD_AN,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
+			autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX;
+
+		hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG,
+				      MDIO_MMD_AN,
+				      autoneg_reg);
+	}
+
+	if (speed & IXGBE_LINK_SPEED_100_FULL) {
+		/* Set or unset auto-negotiation 100M advertisement */
+		hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE,
+				     MDIO_MMD_AN,
+				     &autoneg_reg);
+
+		autoneg_reg &= ~(ADVERTISE_100FULL |
+				 ADVERTISE_100HALF);
+		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
+			autoneg_reg |= ADVERTISE_100FULL;
+
+		hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE,
+				      MDIO_MMD_AN,
+				      autoneg_reg);
+	}
+
+	/* Blocked by MNG FW so don't reset PHY */
+	if (ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	/* Restart PHY autonegotiation and wait for completion */
+	hw->phy.ops.read_reg(hw, MDIO_CTRL1,
+			     MDIO_MMD_AN, &autoneg_reg);
+
+	autoneg_reg |= MDIO_AN_CTRL1_RESTART;
+
+	hw->phy.ops.write_reg(hw, MDIO_CTRL1,
+			      MDIO_MMD_AN, autoneg_reg);
+	return 0;
+}
+
+/**
+ *  ixgbe_reset_phy_nl - Performs a PHY reset
+ *  @hw: pointer to hardware structure
+ **/
+s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw)
+{
+	u16 phy_offset, control, eword, edata, block_crc;
+	bool end_data = false;
+	u16 list_offset, data_offset;
+	u16 phy_data = 0;
+	s32 ret_val;
+	u32 i;
+
+	/* Blocked by MNG FW so bail */
+	if (ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS, &phy_data);
+
+	/* reset the PHY and poll for completion */
+	hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS,
+			      (phy_data | MDIO_CTRL1_RESET));
+
+	for (i = 0; i < 100; i++) {
+		hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS,
+				     &phy_data);
+		if ((phy_data & MDIO_CTRL1_RESET) == 0)
+			break;
+		usleep_range(10000, 20000);
+	}
+
+	if ((phy_data & MDIO_CTRL1_RESET) != 0) {
+		hw_dbg(hw, "PHY reset did not complete.\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	/* Get init offsets */
+	ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset,
+						      &data_offset);
+	if (ret_val)
+		return ret_val;
+
+	ret_val = hw->eeprom.ops.read(hw, data_offset, &block_crc);
+	data_offset++;
+	while (!end_data) {
+		/*
+		 * Read control word from PHY init contents offset
+		 */
+		ret_val = hw->eeprom.ops.read(hw, data_offset, &eword);
+		if (ret_val)
+			goto err_eeprom;
+		control = (eword & IXGBE_CONTROL_MASK_NL) >>
+			   IXGBE_CONTROL_SHIFT_NL;
+		edata = eword & IXGBE_DATA_MASK_NL;
+		switch (control) {
+		case IXGBE_DELAY_NL:
+			data_offset++;
+			hw_dbg(hw, "DELAY: %d MS\n", edata);
+			usleep_range(edata * 1000, edata * 2000);
+			break;
+		case IXGBE_DATA_NL:
+			hw_dbg(hw, "DATA:\n");
+			data_offset++;
+			ret_val = hw->eeprom.ops.read(hw, data_offset++,
+						      &phy_offset);
+			if (ret_val)
+				goto err_eeprom;
+			for (i = 0; i < edata; i++) {
+				ret_val = hw->eeprom.ops.read(hw, data_offset,
+							      &eword);
+				if (ret_val)
+					goto err_eeprom;
+				hw->phy.ops.write_reg(hw, phy_offset,
+						      MDIO_MMD_PMAPMD, eword);
+				hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword,
+				       phy_offset);
+				data_offset++;
+				phy_offset++;
+			}
+			break;
+		case IXGBE_CONTROL_NL:
+			data_offset++;
+			hw_dbg(hw, "CONTROL:\n");
+			if (edata == IXGBE_CONTROL_EOL_NL) {
+				hw_dbg(hw, "EOL\n");
+				end_data = true;
+			} else if (edata == IXGBE_CONTROL_SOL_NL) {
+				hw_dbg(hw, "SOL\n");
+			} else {
+				hw_dbg(hw, "Bad control value\n");
+				return IXGBE_ERR_PHY;
+			}
+			break;
+		default:
+			hw_dbg(hw, "Bad control type\n");
+			return IXGBE_ERR_PHY;
+		}
+	}
+
+	return ret_val;
+
+err_eeprom:
+	hw_err(hw, "eeprom read at offset %d failed\n", data_offset);
+	return IXGBE_ERR_PHY;
+}
+
+/**
+ *  ixgbe_identify_module_generic - Identifies module type
+ *  @hw: pointer to hardware structure
+ *
+ *  Determines HW type and calls appropriate function.
+ **/
+s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw)
+{
+	switch (hw->mac.ops.get_media_type(hw)) {
+	case ixgbe_media_type_fiber:
+		return ixgbe_identify_sfp_module_generic(hw);
+	case ixgbe_media_type_fiber_qsfp:
+		return ixgbe_identify_qsfp_module_generic(hw);
+	default:
+		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+		return IXGBE_ERR_SFP_NOT_PRESENT;
+	}
+
+	return IXGBE_ERR_SFP_NOT_PRESENT;
+}
+
+/**
+ *  ixgbe_identify_sfp_module_generic - Identifies SFP modules
+ *  @hw: pointer to hardware structure
+ *
+ *  Searches for and identifies the SFP module and assigns appropriate PHY type.
+ **/
+s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+	s32 status;
+	u32 vendor_oui = 0;
+	enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
+	u8 identifier = 0;
+	u8 comp_codes_1g = 0;
+	u8 comp_codes_10g = 0;
+	u8 oui_bytes[3] = {0, 0, 0};
+	u8 cable_tech = 0;
+	u8 cable_spec = 0;
+	u16 enforce_sfp = 0;
+
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) {
+		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+		return IXGBE_ERR_SFP_NOT_PRESENT;
+	}
+
+	/* LAN ID is needed for sfp_type determination */
+	hw->mac.ops.set_lan_id(hw);
+
+	status = hw->phy.ops.read_i2c_eeprom(hw,
+					     IXGBE_SFF_IDENTIFIER,
+					     &identifier);
+
+	if (status)
+		goto err_read_i2c_eeprom;
+
+	if (identifier != IXGBE_SFF_IDENTIFIER_SFP) {
+		hw->phy.type = ixgbe_phy_sfp_unsupported;
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+	}
+	status = hw->phy.ops.read_i2c_eeprom(hw,
+					     IXGBE_SFF_1GBE_COMP_CODES,
+					     &comp_codes_1g);
+
+	if (status)
+		goto err_read_i2c_eeprom;
+
+	status = hw->phy.ops.read_i2c_eeprom(hw,
+					     IXGBE_SFF_10GBE_COMP_CODES,
+					     &comp_codes_10g);
+
+	if (status)
+		goto err_read_i2c_eeprom;
+	status = hw->phy.ops.read_i2c_eeprom(hw,
+					     IXGBE_SFF_CABLE_TECHNOLOGY,
+					     &cable_tech);
+
+	if (status)
+		goto err_read_i2c_eeprom;
+
+	 /* ID Module
+	  * =========
+	  * 0   SFP_DA_CU
+	  * 1   SFP_SR
+	  * 2   SFP_LR
+	  * 3   SFP_DA_CORE0 - 82599-specific
+	  * 4   SFP_DA_CORE1 - 82599-specific
+	  * 5   SFP_SR/LR_CORE0 - 82599-specific
+	  * 6   SFP_SR/LR_CORE1 - 82599-specific
+	  * 7   SFP_act_lmt_DA_CORE0 - 82599-specific
+	  * 8   SFP_act_lmt_DA_CORE1 - 82599-specific
+	  * 9   SFP_1g_cu_CORE0 - 82599-specific
+	  * 10  SFP_1g_cu_CORE1 - 82599-specific
+	  * 11  SFP_1g_sx_CORE0 - 82599-specific
+	  * 12  SFP_1g_sx_CORE1 - 82599-specific
+	  */
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+			hw->phy.sfp_type = ixgbe_sfp_type_da_cu;
+		else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)
+			hw->phy.sfp_type = ixgbe_sfp_type_sr;
+		else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)
+			hw->phy.sfp_type = ixgbe_sfp_type_lr;
+		else
+			hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+	} else {
+		if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) {
+			if (hw->bus.lan_id == 0)
+				hw->phy.sfp_type =
+					     ixgbe_sfp_type_da_cu_core0;
+			else
+				hw->phy.sfp_type =
+					     ixgbe_sfp_type_da_cu_core1;
+		} else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) {
+			hw->phy.ops.read_i2c_eeprom(
+					hw, IXGBE_SFF_CABLE_SPEC_COMP,
+					&cable_spec);
+			if (cable_spec &
+			    IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) {
+				if (hw->bus.lan_id == 0)
+					hw->phy.sfp_type =
+					ixgbe_sfp_type_da_act_lmt_core0;
+				else
+					hw->phy.sfp_type =
+					ixgbe_sfp_type_da_act_lmt_core1;
+			} else {
+				hw->phy.sfp_type =
+						ixgbe_sfp_type_unknown;
+			}
+		} else if (comp_codes_10g &
+			   (IXGBE_SFF_10GBASESR_CAPABLE |
+			    IXGBE_SFF_10GBASELR_CAPABLE)) {
+			if (hw->bus.lan_id == 0)
+				hw->phy.sfp_type =
+					      ixgbe_sfp_type_srlr_core0;
+			else
+				hw->phy.sfp_type =
+					      ixgbe_sfp_type_srlr_core1;
+		} else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) {
+			if (hw->bus.lan_id == 0)
+				hw->phy.sfp_type =
+					ixgbe_sfp_type_1g_cu_core0;
+			else
+				hw->phy.sfp_type =
+					ixgbe_sfp_type_1g_cu_core1;
+		} else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) {
+			if (hw->bus.lan_id == 0)
+				hw->phy.sfp_type =
+					ixgbe_sfp_type_1g_sx_core0;
+			else
+				hw->phy.sfp_type =
+					ixgbe_sfp_type_1g_sx_core1;
+		} else if (comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) {
+			if (hw->bus.lan_id == 0)
+				hw->phy.sfp_type =
+					ixgbe_sfp_type_1g_lx_core0;
+			else
+				hw->phy.sfp_type =
+					ixgbe_sfp_type_1g_lx_core1;
+		} else {
+			hw->phy.sfp_type = ixgbe_sfp_type_unknown;
+		}
+	}
+
+	if (hw->phy.sfp_type != stored_sfp_type)
+		hw->phy.sfp_setup_needed = true;
+
+	/* Determine if the SFP+ PHY is dual speed or not. */
+	hw->phy.multispeed_fiber = false;
+	if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) &&
+	     (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) ||
+	    ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) &&
+	     (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)))
+		hw->phy.multispeed_fiber = true;
+
+	/* Determine PHY vendor */
+	if (hw->phy.type != ixgbe_phy_nl) {
+		hw->phy.id = identifier;
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+					    IXGBE_SFF_VENDOR_OUI_BYTE0,
+					    &oui_bytes[0]);
+
+		if (status != 0)
+			goto err_read_i2c_eeprom;
+
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+					    IXGBE_SFF_VENDOR_OUI_BYTE1,
+					    &oui_bytes[1]);
+
+		if (status != 0)
+			goto err_read_i2c_eeprom;
+
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+					    IXGBE_SFF_VENDOR_OUI_BYTE2,
+					    &oui_bytes[2]);
+
+		if (status != 0)
+			goto err_read_i2c_eeprom;
+
+		vendor_oui =
+		  ((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) |
+		   (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) |
+		   (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT));
+
+		switch (vendor_oui) {
+		case IXGBE_SFF_VENDOR_OUI_TYCO:
+			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+				hw->phy.type =
+					    ixgbe_phy_sfp_passive_tyco;
+			break;
+		case IXGBE_SFF_VENDOR_OUI_FTL:
+			if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
+				hw->phy.type = ixgbe_phy_sfp_ftl_active;
+			else
+				hw->phy.type = ixgbe_phy_sfp_ftl;
+			break;
+		case IXGBE_SFF_VENDOR_OUI_AVAGO:
+			hw->phy.type = ixgbe_phy_sfp_avago;
+			break;
+		case IXGBE_SFF_VENDOR_OUI_INTEL:
+			hw->phy.type = ixgbe_phy_sfp_intel;
+			break;
+		default:
+			if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE)
+				hw->phy.type =
+					 ixgbe_phy_sfp_passive_unknown;
+			else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE)
+				hw->phy.type =
+					ixgbe_phy_sfp_active_unknown;
+			else
+				hw->phy.type = ixgbe_phy_sfp_unknown;
+			break;
+		}
+	}
+
+	/* Allow any DA cable vendor */
+	if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE |
+	    IXGBE_SFF_DA_ACTIVE_CABLE))
+		return 0;
+
+	/* Verify supported 1G SFP modules */
+	if (comp_codes_10g == 0 &&
+	    !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
+		hw->phy.type = ixgbe_phy_sfp_unsupported;
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+	}
+
+	/* Anything else 82598-based is supported */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return 0;
+
+	hw->mac.ops.get_device_caps(hw, &enforce_sfp);
+	if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) &&
+	    !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+	      hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) {
+		/* Make sure we're a supported PHY type */
+		if (hw->phy.type == ixgbe_phy_sfp_intel)
+			return 0;
+		if (hw->allow_unsupported_sfp) {
+			e_warn(drv, "WARNING: Intel (R) Network Connections are quality tested using Intel (R) Ethernet Optics.  Using untested modules is not supported and may cause unstable operation or damage to the module or the adapter.  Intel Corporation is not responsible for any harm caused by using untested modules.\n");
+			return 0;
+		}
+		hw_dbg(hw, "SFP+ module not supported\n");
+		hw->phy.type = ixgbe_phy_sfp_unsupported;
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+	}
+	return 0;
+
+err_read_i2c_eeprom:
+	hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+	if (hw->phy.type != ixgbe_phy_nl) {
+		hw->phy.id = 0;
+		hw->phy.type = ixgbe_phy_unknown;
+	}
+	return IXGBE_ERR_SFP_NOT_PRESENT;
+}
+
+/**
+ * ixgbe_identify_qsfp_module_generic - Identifies QSFP modules
+ * @hw: pointer to hardware structure
+ *
+ * Searches for and identifies the QSFP module and assigns appropriate PHY type
+ **/
+static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_adapter *adapter = hw->back;
+	s32 status;
+	u32 vendor_oui = 0;
+	enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type;
+	u8 identifier = 0;
+	u8 comp_codes_1g = 0;
+	u8 comp_codes_10g = 0;
+	u8 oui_bytes[3] = {0, 0, 0};
+	u16 enforce_sfp = 0;
+	u8 connector = 0;
+	u8 cable_length = 0;
+	u8 device_tech = 0;
+	bool active_cable = false;
+
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) {
+		hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+		return IXGBE_ERR_SFP_NOT_PRESENT;
+	}
+
+	/* LAN ID is needed for sfp_type determination */
+	hw->mac.ops.set_lan_id(hw);
+
+	status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_IDENTIFIER,
+					     &identifier);
+
+	if (status != 0)
+		goto err_read_i2c_eeprom;
+
+	if (identifier != IXGBE_SFF_IDENTIFIER_QSFP_PLUS) {
+		hw->phy.type = ixgbe_phy_sfp_unsupported;
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+	}
+
+	hw->phy.id = identifier;
+
+	status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_QSFP_10GBE_COMP,
+					     &comp_codes_10g);
+
+	if (status != 0)
+		goto err_read_i2c_eeprom;
+
+	status = hw->phy.ops.read_i2c_eeprom(hw, IXGBE_SFF_QSFP_1GBE_COMP,
+					     &comp_codes_1g);
+
+	if (status != 0)
+		goto err_read_i2c_eeprom;
+
+	if (comp_codes_10g & IXGBE_SFF_QSFP_DA_PASSIVE_CABLE) {
+		hw->phy.type = ixgbe_phy_qsfp_passive_unknown;
+		if (hw->bus.lan_id == 0)
+			hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core0;
+		else
+			hw->phy.sfp_type = ixgbe_sfp_type_da_cu_core1;
+	} else if (comp_codes_10g & (IXGBE_SFF_10GBASESR_CAPABLE |
+				     IXGBE_SFF_10GBASELR_CAPABLE)) {
+		if (hw->bus.lan_id == 0)
+			hw->phy.sfp_type = ixgbe_sfp_type_srlr_core0;
+		else
+			hw->phy.sfp_type = ixgbe_sfp_type_srlr_core1;
+	} else {
+		if (comp_codes_10g & IXGBE_SFF_QSFP_DA_ACTIVE_CABLE)
+			active_cable = true;
+
+		if (!active_cable) {
+			/* check for active DA cables that pre-date
+			 * SFF-8436 v3.6
+			 */
+			hw->phy.ops.read_i2c_eeprom(hw,
+					IXGBE_SFF_QSFP_CONNECTOR,
+					&connector);
+
+			hw->phy.ops.read_i2c_eeprom(hw,
+					IXGBE_SFF_QSFP_CABLE_LENGTH,
+					&cable_length);
+
+			hw->phy.ops.read_i2c_eeprom(hw,
+					IXGBE_SFF_QSFP_DEVICE_TECH,
+					&device_tech);
+
+			if ((connector ==
+				     IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE) &&
+			    (cable_length > 0) &&
+			    ((device_tech >> 4) ==
+				     IXGBE_SFF_QSFP_TRANSMITER_850NM_VCSEL))
+				active_cable = true;
+		}
+
+		if (active_cable) {
+			hw->phy.type = ixgbe_phy_qsfp_active_unknown;
+			if (hw->bus.lan_id == 0)
+				hw->phy.sfp_type =
+						ixgbe_sfp_type_da_act_lmt_core0;
+			else
+				hw->phy.sfp_type =
+						ixgbe_sfp_type_da_act_lmt_core1;
+		} else {
+			/* unsupported module type */
+			hw->phy.type = ixgbe_phy_sfp_unsupported;
+			return IXGBE_ERR_SFP_NOT_SUPPORTED;
+		}
+	}
+
+	if (hw->phy.sfp_type != stored_sfp_type)
+		hw->phy.sfp_setup_needed = true;
+
+	/* Determine if the QSFP+ PHY is dual speed or not. */
+	hw->phy.multispeed_fiber = false;
+	if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) &&
+	     (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) ||
+	    ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) &&
+	     (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE)))
+		hw->phy.multispeed_fiber = true;
+
+	/* Determine PHY vendor for optical modules */
+	if (comp_codes_10g & (IXGBE_SFF_10GBASESR_CAPABLE |
+			      IXGBE_SFF_10GBASELR_CAPABLE)) {
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+					IXGBE_SFF_QSFP_VENDOR_OUI_BYTE0,
+					&oui_bytes[0]);
+
+		if (status != 0)
+			goto err_read_i2c_eeprom;
+
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+					IXGBE_SFF_QSFP_VENDOR_OUI_BYTE1,
+					&oui_bytes[1]);
+
+		if (status != 0)
+			goto err_read_i2c_eeprom;
+
+		status = hw->phy.ops.read_i2c_eeprom(hw,
+					IXGBE_SFF_QSFP_VENDOR_OUI_BYTE2,
+					&oui_bytes[2]);
+
+		if (status != 0)
+			goto err_read_i2c_eeprom;
+
+		vendor_oui =
+			((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) |
+			 (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) |
+			 (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT));
+
+		if (vendor_oui == IXGBE_SFF_VENDOR_OUI_INTEL)
+			hw->phy.type = ixgbe_phy_qsfp_intel;
+		else
+			hw->phy.type = ixgbe_phy_qsfp_unknown;
+
+		hw->mac.ops.get_device_caps(hw, &enforce_sfp);
+		if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP)) {
+			/* Make sure we're a supported PHY type */
+			if (hw->phy.type == ixgbe_phy_qsfp_intel)
+				return 0;
+			if (hw->allow_unsupported_sfp) {
+				e_warn(drv, "WARNING: Intel (R) Network Connections are quality tested using Intel (R) Ethernet Optics. Using untested modules is not supported and may cause unstable operation or damage to the module or the adapter. Intel Corporation is not responsible for any harm caused by using untested modules.\n");
+				return 0;
+			}
+			hw_dbg(hw, "QSFP module not supported\n");
+			hw->phy.type = ixgbe_phy_sfp_unsupported;
+			return IXGBE_ERR_SFP_NOT_SUPPORTED;
+		}
+		return 0;
+	}
+	return 0;
+
+err_read_i2c_eeprom:
+	hw->phy.sfp_type = ixgbe_sfp_type_not_present;
+	hw->phy.id = 0;
+	hw->phy.type = ixgbe_phy_unknown;
+
+	return IXGBE_ERR_SFP_NOT_PRESENT;
+}
+
+/**
+ *  ixgbe_get_sfp_init_sequence_offsets - Provides offset of PHY init sequence
+ *  @hw: pointer to hardware structure
+ *  @list_offset: offset to the SFP ID list
+ *  @data_offset: offset to the SFP data block
+ *
+ *  Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if
+ *  so it returns the offsets to the phy init sequence block.
+ **/
+s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+					u16 *list_offset,
+					u16 *data_offset)
+{
+	u16 sfp_id;
+	u16 sfp_type = hw->phy.sfp_type;
+
+	if (hw->phy.sfp_type == ixgbe_sfp_type_unknown)
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+	if (hw->phy.sfp_type == ixgbe_sfp_type_not_present)
+		return IXGBE_ERR_SFP_NOT_PRESENT;
+
+	if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) &&
+	    (hw->phy.sfp_type == ixgbe_sfp_type_da_cu))
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+
+	/*
+	 * Limiting active cables and 1G Phys must be initialized as
+	 * SR modules
+	 */
+	if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 ||
+	    sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+	    sfp_type == ixgbe_sfp_type_1g_cu_core0 ||
+	    sfp_type == ixgbe_sfp_type_1g_sx_core0)
+		sfp_type = ixgbe_sfp_type_srlr_core0;
+	else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 ||
+		 sfp_type == ixgbe_sfp_type_1g_lx_core1 ||
+		 sfp_type == ixgbe_sfp_type_1g_cu_core1 ||
+		 sfp_type == ixgbe_sfp_type_1g_sx_core1)
+		sfp_type = ixgbe_sfp_type_srlr_core1;
+
+	/* Read offset to PHY init contents */
+	if (hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset)) {
+		hw_err(hw, "eeprom read at %d failed\n",
+		       IXGBE_PHY_INIT_OFFSET_NL);
+		return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT;
+	}
+
+	if ((!*list_offset) || (*list_offset == 0xFFFF))
+		return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT;
+
+	/* Shift offset to first ID word */
+	(*list_offset)++;
+
+	/*
+	 * Find the matching SFP ID in the EEPROM
+	 * and program the init sequence
+	 */
+	if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id))
+		goto err_phy;
+
+	while (sfp_id != IXGBE_PHY_INIT_END_NL) {
+		if (sfp_id == sfp_type) {
+			(*list_offset)++;
+			if (hw->eeprom.ops.read(hw, *list_offset, data_offset))
+				goto err_phy;
+			if ((!*data_offset) || (*data_offset == 0xFFFF)) {
+				hw_dbg(hw, "SFP+ module not supported\n");
+				return IXGBE_ERR_SFP_NOT_SUPPORTED;
+			} else {
+				break;
+			}
+		} else {
+			(*list_offset) += 2;
+			if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id))
+				goto err_phy;
+		}
+	}
+
+	if (sfp_id == IXGBE_PHY_INIT_END_NL) {
+		hw_dbg(hw, "No matching SFP+ module found\n");
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+	}
+
+	return 0;
+
+err_phy:
+	hw_err(hw, "eeprom read at offset %d failed\n", *list_offset);
+	return IXGBE_ERR_PHY;
+}
+
+/**
+ *  ixgbe_read_i2c_eeprom_generic - Reads 8 bit EEPROM word over I2C interface
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to read
+ *  @eeprom_data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				  u8 *eeprom_data)
+{
+	return hw->phy.ops.read_i2c_byte(hw, byte_offset,
+					 IXGBE_I2C_EEPROM_DEV_ADDR,
+					 eeprom_data);
+}
+
+/**
+ *  ixgbe_read_i2c_sff8472_generic - Reads 8 bit word over I2C interface
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset at address 0xA2
+ *  @sff8472_data: value read
+ *
+ *  Performs byte read operation to SFP module's SFF-8472 data over I2C
+ **/
+s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				   u8 *sff8472_data)
+{
+	return hw->phy.ops.read_i2c_byte(hw, byte_offset,
+					 IXGBE_I2C_EEPROM_DEV_ADDR2,
+					 sff8472_data);
+}
+
+/**
+ *  ixgbe_write_i2c_eeprom_generic - Writes 8 bit EEPROM word over I2C interface
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: EEPROM byte offset to write
+ *  @eeprom_data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface.
+ **/
+s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				   u8 eeprom_data)
+{
+	return hw->phy.ops.write_i2c_byte(hw, byte_offset,
+					  IXGBE_I2C_EEPROM_DEV_ADDR,
+					  eeprom_data);
+}
+
+/**
+ * ixgbe_is_sfp_probe - Returns true if SFP is being detected
+ * @hw: pointer to hardware structure
+ * @offset: eeprom offset to be read
+ * @addr: I2C address to be read
+ */
+static bool ixgbe_is_sfp_probe(struct ixgbe_hw *hw, u8 offset, u8 addr)
+{
+	if (addr == IXGBE_I2C_EEPROM_DEV_ADDR &&
+	    offset == IXGBE_SFF_IDENTIFIER &&
+	    hw->phy.sfp_type == ixgbe_sfp_type_not_present)
+		return true;
+	return false;
+}
+
+/**
+ *  ixgbe_read_i2c_byte_generic_int - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @dev_addr: device address
+ *  @data: value read
+ *  @lock: true if to take and release semaphore
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ */
+static s32 ixgbe_read_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
+					   u8 dev_addr, u8 *data, bool lock)
+{
+	s32 status;
+	u32 max_retry = 10;
+	u32 retry = 0;
+	u32 swfw_mask = hw->phy.phy_semaphore_mask;
+	bool nack = true;
+
+	if (hw->mac.type >= ixgbe_mac_X550)
+		max_retry = 3;
+	if (ixgbe_is_sfp_probe(hw, byte_offset, dev_addr))
+		max_retry = IXGBE_SFP_DETECT_RETRIES;
+
+	*data = 0;
+
+	do {
+		if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+			return IXGBE_ERR_SWFW_SYNC;
+
+		ixgbe_i2c_start(hw);
+
+		/* Device Address and write indication */
+		status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		ixgbe_i2c_start(hw);
+
+		/* Device Address and read indication */
+		status = ixgbe_clock_out_i2c_byte(hw, (dev_addr | 0x1));
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_in_i2c_byte(hw, data);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_bit(hw, nack);
+		if (status != 0)
+			goto fail;
+
+		ixgbe_i2c_stop(hw);
+		if (lock)
+			hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		return 0;
+
+fail:
+		ixgbe_i2c_bus_clear(hw);
+		if (lock) {
+			hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+			msleep(100);
+		}
+		retry++;
+		if (retry < max_retry)
+			hw_dbg(hw, "I2C byte read error - Retrying.\n");
+		else
+			hw_dbg(hw, "I2C byte read error.\n");
+
+	} while (retry < max_retry);
+
+	return status;
+}
+
+/**
+ *  ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @dev_addr: device address
+ *  @data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ */
+s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data)
+{
+	return ixgbe_read_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+					       data, true);
+}
+
+/**
+ *  ixgbe_read_i2c_byte_generic_unlocked - Reads 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to read
+ *  @dev_addr: device address
+ *  @data: value read
+ *
+ *  Performs byte read operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ */
+s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+					 u8 dev_addr, u8 *data)
+{
+	return ixgbe_read_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+					       data, false);
+}
+
+/**
+ *  ixgbe_write_i2c_byte_generic_int - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: value to write
+ *  @lock: true if to take and release semaphore
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ */
+static s32 ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
+					    u8 dev_addr, u8 data, bool lock)
+{
+	s32 status;
+	u32 max_retry = 1;
+	u32 retry = 0;
+	u32 swfw_mask = hw->phy.phy_semaphore_mask;
+
+	if (lock && hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	do {
+		ixgbe_i2c_start(hw);
+
+		status = ixgbe_clock_out_i2c_byte(hw, dev_addr);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_byte(hw, byte_offset);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_clock_out_i2c_byte(hw, data);
+		if (status != 0)
+			goto fail;
+
+		status = ixgbe_get_i2c_ack(hw);
+		if (status != 0)
+			goto fail;
+
+		ixgbe_i2c_stop(hw);
+		if (lock)
+			hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		return 0;
+
+fail:
+		ixgbe_i2c_bus_clear(hw);
+		retry++;
+		if (retry < max_retry)
+			hw_dbg(hw, "I2C byte write error - Retrying.\n");
+		else
+			hw_dbg(hw, "I2C byte write error.\n");
+	} while (retry < max_retry);
+
+	if (lock)
+		hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+
+	return status;
+}
+
+/**
+ *  ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ */
+s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data)
+{
+	return ixgbe_write_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+						data, true);
+}
+
+/**
+ *  ixgbe_write_i2c_byte_generic_unlocked - Writes 8 bit word over I2C
+ *  @hw: pointer to hardware structure
+ *  @byte_offset: byte offset to write
+ *  @dev_addr: device address
+ *  @data: value to write
+ *
+ *  Performs byte write operation to SFP module's EEPROM over I2C interface at
+ *  a specified device address.
+ */
+s32 ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+					  u8 dev_addr, u8 data)
+{
+	return ixgbe_write_i2c_byte_generic_int(hw, byte_offset, dev_addr,
+						data, false);
+}
+
+/**
+ *  ixgbe_i2c_start - Sets I2C start condition
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets I2C start condition (High -> Low on SDA while SCL is High)
+ *  Set bit-bang mode on X550 hardware.
+ **/
+static void ixgbe_i2c_start(struct ixgbe_hw *hw)
+{
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+
+	i2cctl |= IXGBE_I2C_BB_EN(hw);
+
+	/* Start condition must begin with data and clock high */
+	ixgbe_set_i2c_data(hw, &i2cctl, 1);
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+	/* Setup time for start condition (4.7us) */
+	udelay(IXGBE_I2C_T_SU_STA);
+
+	ixgbe_set_i2c_data(hw, &i2cctl, 0);
+
+	/* Hold time for start condition (4us) */
+	udelay(IXGBE_I2C_T_HD_STA);
+
+	ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	udelay(IXGBE_I2C_T_LOW);
+
+}
+
+/**
+ *  ixgbe_i2c_stop - Sets I2C stop condition
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets I2C stop condition (Low -> High on SDA while SCL is High)
+ *  Disables bit-bang mode and negates data output enable on X550
+ *  hardware.
+ **/
+static void ixgbe_i2c_stop(struct ixgbe_hw *hw)
+{
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+	u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
+	u32 clk_oe_bit = IXGBE_I2C_CLK_OE_N_EN(hw);
+	u32 bb_en_bit = IXGBE_I2C_BB_EN(hw);
+
+	/* Stop condition must begin with data low and clock high */
+	ixgbe_set_i2c_data(hw, &i2cctl, 0);
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+	/* Setup time for stop condition (4us) */
+	udelay(IXGBE_I2C_T_SU_STO);
+
+	ixgbe_set_i2c_data(hw, &i2cctl, 1);
+
+	/* bus free time between stop and start (4.7us)*/
+	udelay(IXGBE_I2C_T_BUF);
+
+	if (bb_en_bit || data_oe_bit || clk_oe_bit) {
+		i2cctl &= ~bb_en_bit;
+		i2cctl |= data_oe_bit | clk_oe_bit;
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), i2cctl);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+}
+
+/**
+ *  ixgbe_clock_in_i2c_byte - Clocks in one byte via I2C
+ *  @hw: pointer to hardware structure
+ *  @data: data byte to clock in
+ *
+ *  Clocks in one byte data via I2C data/clock
+ **/
+static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data)
+{
+	s32 i;
+	bool bit = false;
+
+	*data = 0;
+	for (i = 7; i >= 0; i--) {
+		ixgbe_clock_in_i2c_bit(hw, &bit);
+		*data |= bit << i;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clock_out_i2c_byte - Clocks out one byte via I2C
+ *  @hw: pointer to hardware structure
+ *  @data: data byte clocked out
+ *
+ *  Clocks out one byte data via I2C data/clock
+ **/
+static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data)
+{
+	s32 status;
+	s32 i;
+	u32 i2cctl;
+	bool bit = false;
+
+	for (i = 7; i >= 0; i--) {
+		bit = (data >> i) & 0x1;
+		status = ixgbe_clock_out_i2c_bit(hw, bit);
+
+		if (status != 0)
+			break;
+	}
+
+	/* Release SDA line (set high) */
+	i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+	i2cctl |= IXGBE_I2C_DATA_OUT(hw);
+	i2cctl |= IXGBE_I2C_DATA_OE_N_EN(hw);
+	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), i2cctl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return status;
+}
+
+/**
+ *  ixgbe_get_i2c_ack - Polls for I2C ACK
+ *  @hw: pointer to hardware structure
+ *
+ *  Clocks in/out one bit via I2C data/clock
+ **/
+static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw)
+{
+	u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
+	s32 status = 0;
+	u32 i = 0;
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+	u32 timeout = 10;
+	bool ack = true;
+
+	if (data_oe_bit) {
+		i2cctl |= IXGBE_I2C_DATA_OUT(hw);
+		i2cctl |= data_oe_bit;
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), i2cctl);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+	/* Minimum high period of clock is 4us */
+	udelay(IXGBE_I2C_T_HIGH);
+
+	/* Poll for ACK.  Note that ACK in I2C spec is
+	 * transition from 1 to 0 */
+	for (i = 0; i < timeout; i++) {
+		i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+		ack = ixgbe_get_i2c_data(hw, &i2cctl);
+
+		udelay(1);
+		if (ack == 0)
+			break;
+	}
+
+	if (ack == 1) {
+		hw_dbg(hw, "I2C ack was not received.\n");
+		status = IXGBE_ERR_I2C;
+	}
+
+	ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	udelay(IXGBE_I2C_T_LOW);
+
+	return status;
+}
+
+/**
+ *  ixgbe_clock_in_i2c_bit - Clocks in one bit via I2C data/clock
+ *  @hw: pointer to hardware structure
+ *  @data: read data value
+ *
+ *  Clocks in one bit via I2C data/clock
+ **/
+static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data)
+{
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+	u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
+
+	if (data_oe_bit) {
+		i2cctl |= IXGBE_I2C_DATA_OUT(hw);
+		i2cctl |= data_oe_bit;
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), i2cctl);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+	ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+	/* Minimum high period of clock is 4us */
+	udelay(IXGBE_I2C_T_HIGH);
+
+	i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+	*data = ixgbe_get_i2c_data(hw, &i2cctl);
+
+	ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+	/* Minimum low period of clock is 4.7 us */
+	udelay(IXGBE_I2C_T_LOW);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock
+ *  @hw: pointer to hardware structure
+ *  @data: data value to write
+ *
+ *  Clocks out one bit via I2C data/clock
+ **/
+static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data)
+{
+	s32 status;
+	u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+
+	status = ixgbe_set_i2c_data(hw, &i2cctl, data);
+	if (status == 0) {
+		ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+		/* Minimum high period of clock is 4us */
+		udelay(IXGBE_I2C_T_HIGH);
+
+		ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+		/* Minimum low period of clock is 4.7 us.
+		 * This also takes care of the data hold time.
+		 */
+		udelay(IXGBE_I2C_T_LOW);
+	} else {
+		hw_dbg(hw, "I2C data was not set to %X\n", data);
+		return IXGBE_ERR_I2C;
+	}
+
+	return 0;
+}
+/**
+ *  ixgbe_raise_i2c_clk - Raises the I2C SCL clock
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Raises the I2C clock line '0'->'1'
+ *  Negates the I2C clock output enable on X550 hardware.
+ **/
+static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
+{
+	u32 clk_oe_bit = IXGBE_I2C_CLK_OE_N_EN(hw);
+	u32 i = 0;
+	u32 timeout = IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT;
+	u32 i2cctl_r = 0;
+
+	if (clk_oe_bit) {
+		*i2cctl |= clk_oe_bit;
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), *i2cctl);
+	}
+
+	for (i = 0; i < timeout; i++) {
+		*i2cctl |= IXGBE_I2C_CLK_OUT(hw);
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), *i2cctl);
+		IXGBE_WRITE_FLUSH(hw);
+		/* SCL rise time (1000ns) */
+		udelay(IXGBE_I2C_T_RISE);
+
+		i2cctl_r = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+		if (i2cctl_r & IXGBE_I2C_CLK_IN(hw))
+			break;
+	}
+}
+
+/**
+ *  ixgbe_lower_i2c_clk - Lowers the I2C SCL clock
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Lowers the I2C clock line '1'->'0'
+ *  Asserts the I2C clock output enable on X550 hardware.
+ **/
+static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl)
+{
+
+	*i2cctl &= ~IXGBE_I2C_CLK_OUT(hw);
+	*i2cctl &= ~IXGBE_I2C_CLK_OE_N_EN(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), *i2cctl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* SCL fall time (300ns) */
+	udelay(IXGBE_I2C_T_FALL);
+}
+
+/**
+ *  ixgbe_set_i2c_data - Sets the I2C data bit
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *  @data: I2C data value (0 or 1) to set
+ *
+ *  Sets the I2C data bit
+ *  Asserts the I2C data output enable on X550 hardware.
+ **/
+static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data)
+{
+	u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
+
+	if (data)
+		*i2cctl |= IXGBE_I2C_DATA_OUT(hw);
+	else
+		*i2cctl &= ~IXGBE_I2C_DATA_OUT(hw);
+	*i2cctl &= ~data_oe_bit;
+
+	IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), *i2cctl);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Data rise/fall (1000ns/300ns) and set-up time (250ns) */
+	udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA);
+
+	if (!data)	/* Can't verify data in this case */
+		return 0;
+	if (data_oe_bit) {
+		*i2cctl |= data_oe_bit;
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), *i2cctl);
+		IXGBE_WRITE_FLUSH(hw);
+	}
+
+	/* Verify data was set correctly */
+	*i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+	if (data != ixgbe_get_i2c_data(hw, i2cctl)) {
+		hw_dbg(hw, "Error - I2C data was not set to %X.\n", data);
+		return IXGBE_ERR_I2C;
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_get_i2c_data - Reads the I2C SDA data bit
+ *  @hw: pointer to hardware structure
+ *  @i2cctl: Current value of I2CCTL register
+ *
+ *  Returns the I2C data bit value
+ *  Negates the I2C data output enable on X550 hardware.
+ **/
+static bool ixgbe_get_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl)
+{
+	u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN(hw);
+
+	if (data_oe_bit) {
+		*i2cctl |= data_oe_bit;
+		IXGBE_WRITE_REG(hw, IXGBE_I2CCTL(hw), *i2cctl);
+		IXGBE_WRITE_FLUSH(hw);
+		udelay(IXGBE_I2C_T_FALL);
+	}
+
+	if (*i2cctl & IXGBE_I2C_DATA_IN(hw))
+		return true;
+	return false;
+}
+
+/**
+ *  ixgbe_i2c_bus_clear - Clears the I2C bus
+ *  @hw: pointer to hardware structure
+ *
+ *  Clears the I2C bus by sending nine clock pulses.
+ *  Used when data line is stuck low.
+ **/
+static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw)
+{
+	u32 i2cctl;
+	u32 i;
+
+	ixgbe_i2c_start(hw);
+	i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL(hw));
+
+	ixgbe_set_i2c_data(hw, &i2cctl, 1);
+
+	for (i = 0; i < 9; i++) {
+		ixgbe_raise_i2c_clk(hw, &i2cctl);
+
+		/* Min high period of clock is 4us */
+		udelay(IXGBE_I2C_T_HIGH);
+
+		ixgbe_lower_i2c_clk(hw, &i2cctl);
+
+		/* Min low period of clock is 4.7us*/
+		udelay(IXGBE_I2C_T_LOW);
+	}
+
+	ixgbe_i2c_start(hw);
+
+	/* Put the i2c bus back to default state */
+	ixgbe_i2c_stop(hw);
+}
+
+/**
+ *  ixgbe_tn_check_overtemp - Checks if an overtemp occurred.
+ *  @hw: pointer to hardware structure
+ *
+ *  Checks if the LASI temp alarm status was triggered due to overtemp
+ **/
+s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw)
+{
+	u16 phy_data = 0;
+
+	if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM)
+		return 0;
+
+	/* Check that the LASI temp alarm status was triggered */
+	hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG,
+			     MDIO_MMD_PMAPMD, &phy_data);
+
+	if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM))
+		return 0;
+
+	return IXGBE_ERR_OVERTEMP;
+}
+
+/** ixgbe_set_copper_phy_power - Control power for copper phy
+ *  @hw: pointer to hardware structure
+ *  @on: true for on, false for off
+ **/
+s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
+{
+	u32 status;
+	u16 reg;
+
+	/* Bail if we don't have copper phy */
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
+		return 0;
+
+	if (!on && ixgbe_mng_present(hw))
+		return 0;
+
+	status = hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, &reg);
+	if (status)
+		return status;
+
+	if (on) {
+		reg &= ~IXGBE_MDIO_PHY_SET_LOW_POWER_MODE;
+	} else {
+		if (ixgbe_check_reset_blocked(hw))
+			return 0;
+		reg |= IXGBE_MDIO_PHY_SET_LOW_POWER_MODE;
+	}
+
+	status = hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, reg);
+	return status;
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
new file mode 100644
index 0000000000..6544c4539c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_PHY_H_
+#define _IXGBE_PHY_H_
+
+#include "ixgbe_type.h"
+#define IXGBE_I2C_EEPROM_DEV_ADDR    0xA0
+#define IXGBE_I2C_EEPROM_DEV_ADDR2   0xA2
+
+/* EEPROM byte offsets */
+#define IXGBE_SFF_IDENTIFIER		0x0
+#define IXGBE_SFF_IDENTIFIER_SFP	0x3
+#define IXGBE_SFF_VENDOR_OUI_BYTE0	0x25
+#define IXGBE_SFF_VENDOR_OUI_BYTE1	0x26
+#define IXGBE_SFF_VENDOR_OUI_BYTE2	0x27
+#define IXGBE_SFF_1GBE_COMP_CODES	0x6
+#define IXGBE_SFF_10GBE_COMP_CODES	0x3
+#define IXGBE_SFF_CABLE_TECHNOLOGY	0x8
+#define IXGBE_SFF_CABLE_SPEC_COMP	0x3C
+#define IXGBE_SFF_SFF_8472_SWAP		0x5C
+#define IXGBE_SFF_SFF_8472_COMP		0x5E
+#define IXGBE_SFF_SFF_8472_OSCB		0x6E
+#define IXGBE_SFF_SFF_8472_ESCB		0x76
+#define IXGBE_SFF_IDENTIFIER_QSFP_PLUS	0xD
+#define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE0	0xA5
+#define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE1	0xA6
+#define IXGBE_SFF_QSFP_VENDOR_OUI_BYTE2	0xA7
+#define IXGBE_SFF_QSFP_CONNECTOR	0x82
+#define IXGBE_SFF_QSFP_10GBE_COMP	0x83
+#define IXGBE_SFF_QSFP_1GBE_COMP	0x86
+#define IXGBE_SFF_QSFP_CABLE_LENGTH	0x92
+#define IXGBE_SFF_QSFP_DEVICE_TECH	0x93
+
+/* Bitmasks */
+#define IXGBE_SFF_DA_PASSIVE_CABLE		0x4
+#define IXGBE_SFF_DA_ACTIVE_CABLE		0x8
+#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING	0x4
+#define IXGBE_SFF_1GBASESX_CAPABLE		0x1
+#define IXGBE_SFF_1GBASELX_CAPABLE		0x2
+#define IXGBE_SFF_1GBASET_CAPABLE		0x8
+#define IXGBE_SFF_10GBASESR_CAPABLE		0x10
+#define IXGBE_SFF_10GBASELR_CAPABLE		0x20
+#define IXGBE_SFF_SOFT_RS_SELECT_MASK		0x8
+#define IXGBE_SFF_SOFT_RS_SELECT_10G		0x8
+#define IXGBE_SFF_SOFT_RS_SELECT_1G		0x0
+#define IXGBE_SFF_ADDRESSING_MODE		0x4
+#define IXGBE_SFF_DDM_IMPLEMENTED		0x40
+#define IXGBE_SFF_QSFP_DA_ACTIVE_CABLE		0x1
+#define IXGBE_SFF_QSFP_DA_PASSIVE_CABLE		0x8
+#define IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE	0x23
+#define IXGBE_SFF_QSFP_TRANSMITER_850NM_VCSEL	0x0
+#define IXGBE_I2C_EEPROM_READ_MASK		0x100
+#define IXGBE_I2C_EEPROM_STATUS_MASK		0x3
+#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION	0x0
+#define IXGBE_I2C_EEPROM_STATUS_PASS		0x1
+#define IXGBE_I2C_EEPROM_STATUS_FAIL		0x2
+#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS	0x3
+#define IXGBE_CS4227				0xBE    /* CS4227 address */
+#define IXGBE_CS4227_GLOBAL_ID_LSB		0
+#define IXGBE_CS4227_GLOBAL_ID_MSB		1
+#define IXGBE_CS4227_SCRATCH			2
+#define IXGBE_CS4227_EFUSE_PDF_SKU		0x19F
+#define IXGBE_CS4223_SKU_ID			0x0010  /* Quad port */
+#define IXGBE_CS4227_SKU_ID			0x0014  /* Dual port */
+#define IXGBE_CS4227_RESET_PENDING		0x1357
+#define IXGBE_CS4227_RESET_COMPLETE		0x5AA5
+#define IXGBE_CS4227_RETRIES			15
+#define IXGBE_CS4227_EFUSE_STATUS		0x0181
+#define IXGBE_CS4227_LINE_SPARE22_MSB		0x12AD	/* Reg to set speed */
+#define IXGBE_CS4227_LINE_SPARE24_LSB		0x12B0	/* Reg to set EDC */
+#define IXGBE_CS4227_HOST_SPARE22_MSB		0x1AAD	/* Reg to set speed */
+#define IXGBE_CS4227_HOST_SPARE24_LSB		0x1AB0	/* Reg to program EDC */
+#define IXGBE_CS4227_EEPROM_STATUS		0x5001
+#define IXGBE_CS4227_EEPROM_LOAD_OK		0x0001
+#define IXGBE_CS4227_SPEED_1G			0x8000
+#define IXGBE_CS4227_SPEED_10G			0
+#define IXGBE_CS4227_EDC_MODE_CX1		0x0002
+#define IXGBE_CS4227_EDC_MODE_SR		0x0004
+#define IXGBE_CS4227_EDC_MODE_DIAG		0x0008
+#define IXGBE_CS4227_RESET_HOLD			500	/* microseconds */
+#define IXGBE_CS4227_RESET_DELAY		500	/* milliseconds */
+#define IXGBE_CS4227_CHECK_DELAY		30	/* milliseconds */
+#define IXGBE_PE				0xE0	/* Port expander addr */
+#define IXGBE_PE_OUTPUT				1	/* Output reg offset */
+#define IXGBE_PE_CONFIG				3	/* Config reg offset */
+#define IXGBE_PE_BIT1				BIT(1)
+
+/* Flow control defines */
+#define IXGBE_TAF_SYM_PAUSE                  0x400
+#define IXGBE_TAF_ASM_PAUSE                  0x800
+
+/* Bit-shift macros */
+#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT    24
+#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT    16
+#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT    8
+
+/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */
+#define IXGBE_SFF_VENDOR_OUI_TYCO     0x00407600
+#define IXGBE_SFF_VENDOR_OUI_FTL      0x00906500
+#define IXGBE_SFF_VENDOR_OUI_AVAGO    0x00176A00
+#define IXGBE_SFF_VENDOR_OUI_INTEL    0x001B2100
+
+/* I2C SDA and SCL timing parameters for standard mode */
+#define IXGBE_I2C_T_HD_STA  4
+#define IXGBE_I2C_T_LOW     5
+#define IXGBE_I2C_T_HIGH    4
+#define IXGBE_I2C_T_SU_STA  5
+#define IXGBE_I2C_T_HD_DATA 5
+#define IXGBE_I2C_T_SU_DATA 1
+#define IXGBE_I2C_T_RISE    1
+#define IXGBE_I2C_T_FALL    1
+#define IXGBE_I2C_T_SU_STO  4
+#define IXGBE_I2C_T_BUF     5
+
+#define IXGBE_SFP_DETECT_RETRIES	2
+
+#define IXGBE_TN_LASI_STATUS_REG        0x9005
+#define IXGBE_TN_LASI_STATUS_TEMP_ALARM 0x0008
+
+/* SFP+ SFF-8472 Compliance code */
+#define IXGBE_SFF_SFF_8472_UNSUP      0x00
+
+s32 ixgbe_mii_bus_init(struct ixgbe_hw *hw);
+
+s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw);
+s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw);
+s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+			       u32 device_type, u16 *phy_data);
+s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr,
+				u32 device_type, u16 phy_data);
+s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
+			   u32 device_type, u16 *phy_data);
+s32 ixgbe_write_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr,
+			    u32 device_type, u16 phy_data);
+s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw);
+s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw,
+				       ixgbe_link_speed speed,
+				       bool autoneg_wait_to_complete);
+s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw,
+					       ixgbe_link_speed *speed,
+					       bool *autoneg);
+bool ixgbe_check_reset_blocked(struct ixgbe_hw *hw);
+
+/* PHY specific */
+s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw,
+			     ixgbe_link_speed *speed,
+			     bool *link_up);
+s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw);
+
+s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw);
+s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on);
+s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw);
+s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw,
+					u16 *list_offset,
+					u16 *data_offset);
+s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw);
+s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				u8 dev_addr, u8 *data);
+s32 ixgbe_read_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+					 u8 dev_addr, u8 *data);
+s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				 u8 dev_addr, u8 data);
+s32 ixgbe_write_i2c_byte_generic_unlocked(struct ixgbe_hw *hw, u8 byte_offset,
+					  u8 dev_addr, u8 data);
+s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				  u8 *eeprom_data);
+s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				   u8 *sff8472_data);
+s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
+				   u8 eeprom_data);
+s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+					u16 *val, bool lock);
+s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+					 u16 val, bool lock);
+#endif /* _IXGBE_PHY_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
new file mode 100644
index 0000000000..9339edbd90
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -0,0 +1,1523 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include <linux/ptp_classify.h>
+#include <linux/clocksource.h>
+
+/*
+ * The 82599 and the X540 do not have true 64bit nanosecond scale
+ * counter registers. Instead, SYSTIME is defined by a fixed point
+ * system which allows the user to define the scale counter increment
+ * value at every level change of the oscillator driving the SYSTIME
+ * value. For both devices the TIMINCA:IV field defines this
+ * increment. On the X540 device, 31 bits are provided. However on the
+ * 82599 only provides 24 bits. The time unit is determined by the
+ * clock frequency of the oscillator in combination with the TIMINCA
+ * register. When these devices link at 10Gb the oscillator has a
+ * period of 6.4ns. In order to convert the scale counter into
+ * nanoseconds the cyclecounter and timecounter structures are
+ * used. The SYSTIME registers need to be converted to ns values by use
+ * of only a right shift (division by power of 2). The following math
+ * determines the largest incvalue that will fit into the available
+ * bits in the TIMINCA register.
+ *
+ * PeriodWidth: Number of bits to store the clock period
+ * MaxWidth: The maximum width value of the TIMINCA register
+ * Period: The clock period for the oscillator
+ * round(): discard the fractional portion of the calculation
+ *
+ * Period * [ 2 ^ ( MaxWidth - PeriodWidth ) ]
+ *
+ * For the X540, MaxWidth is 31 bits, and the base period is 6.4 ns
+ * For the 82599, MaxWidth is 24 bits, and the base period is 6.4 ns
+ *
+ * The period also changes based on the link speed:
+ * At 10Gb link or no link, the period remains the same.
+ * At 1Gb link, the period is multiplied by 10. (64ns)
+ * At 100Mb link, the period is multiplied by 100. (640ns)
+ *
+ * The calculated value allows us to right shift the SYSTIME register
+ * value in order to quickly convert it into a nanosecond clock,
+ * while allowing for the maximum possible adjustment value.
+ *
+ * These diagrams are only for the 10Gb link period
+ *
+ *           SYSTIMEH            SYSTIMEL
+ *       +--------------+  +--------------+
+ * X540  |      32      |  | 1 | 3 |  28  |
+ *       *--------------+  +--------------+
+ *        \________ 36 bits ______/  fract
+ *
+ *       +--------------+  +--------------+
+ * 82599 |      32      |  | 8 | 3 |  21  |
+ *       *--------------+  +--------------+
+ *        \________ 43 bits ______/  fract
+ *
+ * The 36 bit X540 SYSTIME overflows every
+ *   2^36 * 10^-9 / 60 = 1.14 minutes or 69 seconds
+ *
+ * The 43 bit 82599 SYSTIME overflows every
+ *   2^43 * 10^-9 / 3600 = 2.4 hours
+ */
+#define IXGBE_INCVAL_10GB 0x66666666
+#define IXGBE_INCVAL_1GB  0x40000000
+#define IXGBE_INCVAL_100  0x50000000
+
+#define IXGBE_INCVAL_SHIFT_10GB  28
+#define IXGBE_INCVAL_SHIFT_1GB   24
+#define IXGBE_INCVAL_SHIFT_100   21
+
+#define IXGBE_INCVAL_SHIFT_82599 7
+#define IXGBE_INCPER_SHIFT_82599 24
+
+#define IXGBE_OVERFLOW_PERIOD    (HZ * 30)
+#define IXGBE_PTP_TX_TIMEOUT     (HZ)
+
+/* We use our own definitions instead of NSEC_PER_SEC because we want to mark
+ * the value as a ULL to force precision when bit shifting.
+ */
+#define NS_PER_SEC      1000000000ULL
+#define NS_PER_HALF_SEC  500000000ULL
+
+/* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL
+ * which contain measurements of seconds and nanoseconds respectively. This
+ * matches the standard linux representation of time in the kernel. In addition,
+ * the X550 also has a SYSTIMER register which represents residue, or
+ * subnanosecond overflow adjustments. To control clock adjustment, the TIMINCA
+ * register is used, but it is unlike the X540 and 82599 devices. TIMINCA
+ * represents units of 2^-32 nanoseconds, and uses 31 bits for this, with the
+ * high bit representing whether the adjustent is positive or negative. Every
+ * clock cycle, the X550 will add 12.5 ns + TIMINCA which can result in a range
+ * of 12 to 13 nanoseconds adjustment. Unlike the 82599 and X540 devices, the
+ * X550's clock for purposes of SYSTIME generation is constant and not dependent
+ * on the link speed.
+ *
+ *           SYSTIMEH           SYSTIMEL        SYSTIMER
+ *       +--------------+  +--------------+  +-------------+
+ * X550  |      32      |  |      32      |  |     32      |
+ *       *--------------+  +--------------+  +-------------+
+ *       \____seconds___/   \_nanoseconds_/  \__2^-32 ns__/
+ *
+ * This results in a full 96 bits to represent the clock, with 32 bits for
+ * seconds, 32 bits for nanoseconds (largest value is 0d999999999 or just under
+ * 1 second) and an additional 32 bits to measure sub nanosecond adjustments for
+ * underflow of adjustments.
+ *
+ * The 32 bits of seconds for the X550 overflows every
+ *   2^32 / ( 365.25 * 24 * 60 * 60 ) = ~136 years.
+ *
+ * In order to adjust the clock frequency for the X550, the TIMINCA register is
+ * provided. This register represents a + or minus nearly 0.5 ns adjustment to
+ * the base frequency. It is measured in 2^-32 ns units, with the high bit being
+ * the sign bit. This register enables software to calculate frequency
+ * adjustments and apply them directly to the clock rate.
+ *
+ * The math for converting scaled_ppm into TIMINCA values is fairly
+ * straightforward.
+ *
+ *   TIMINCA value = ( Base_Frequency * scaled_ppm ) / 1000000ULL << 16
+ *
+ * To avoid overflow, we simply use mul_u64_u64_div_u64.
+ *
+ * This assumes that scaled_ppm is never high enough to create a value bigger
+ * than TIMINCA's 31 bits can store. This is ensured by the stack, and is
+ * measured in parts per billion. Calculating this value is also simple.
+ *   Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL
+ *
+ * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is
+ * 12.5 nanoseconds. This means that the Max ppb is 39999999
+ *   Note: We subtract one in order to ensure no overflow, because the TIMINCA
+ *         register can only hold slightly under 0.5 nanoseconds.
+ *
+ * Because TIMINCA is measured in 2^-32 ns units, we have to convert 12.5 ns
+ * into 2^-32 units, which is
+ *
+ *  12.5 * 2^32 = C80000000
+ *
+ * Some revisions of hardware have a faster base frequency than the registers
+ * were defined for. To fix this, we use a timecounter structure with the
+ * proper mult and shift to convert the cycles into nanoseconds of time.
+ */
+#define IXGBE_X550_BASE_PERIOD 0xC80000000ULL
+#define INCVALUE_MASK	0x7FFFFFFF
+#define ISGN		0x80000000
+
+/**
+ * ixgbe_ptp_setup_sdp_X540
+ * @adapter: private adapter structure
+ *
+ * this function enables or disables the clock out feature on SDP0 for
+ * the X540 device. It will create a 1 second periodic output that can
+ * be used as the PPS (via an interrupt).
+ *
+ * It calculates when the system time will be on an exact second, and then
+ * aligns the start of the PPS signal to that value.
+ *
+ * This works by using the cycle counter shift and mult values in reverse, and
+ * assumes that the values we're shifting will not overflow.
+ */
+static void ixgbe_ptp_setup_sdp_X540(struct ixgbe_adapter *adapter)
+{
+	struct cyclecounter *cc = &adapter->hw_cc;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem;
+	u64 ns = 0, clock_edge = 0, clock_period;
+	unsigned long flags;
+
+	/* disable the pin first */
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
+	IXGBE_WRITE_FLUSH(hw);
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED))
+		return;
+
+	esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* enable the SDP0 pin as output, and connected to the
+	 * native function for Timesync (ClockOut)
+	 */
+	esdp |= IXGBE_ESDP_SDP0_DIR |
+		IXGBE_ESDP_SDP0_NATIVE;
+
+	/* enable the Clock Out feature on SDP0, and allow
+	 * interrupts to occur when the pin changes
+	 */
+	tsauxc = (IXGBE_TSAUXC_EN_CLK |
+		  IXGBE_TSAUXC_SYNCLK |
+		  IXGBE_TSAUXC_SDP0_INT);
+
+	/* Determine the clock time period to use. This assumes that the
+	 * cycle counter shift is small enough to avoid overflow.
+	 */
+	clock_period = div_u64((NS_PER_HALF_SEC << cc->shift), cc->mult);
+	clktiml = (u32)(clock_period);
+	clktimh = (u32)(clock_period >> 32);
+
+	/* Read the current clock time, and save the cycle counter value */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_read(&adapter->hw_tc);
+	clock_edge = adapter->hw_tc.cycle_last;
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	/* Figure out how many seconds to add in order to round up */
+	div_u64_rem(ns, NS_PER_SEC, &rem);
+
+	/* Figure out how many nanoseconds to add to round the clock edge up
+	 * to the next full second
+	 */
+	rem = (NS_PER_SEC - rem);
+
+	/* Adjust the clock edge to align with the next full second. */
+	clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
+	trgttiml = (u32)clock_edge;
+	trgttimh = (u32)(clock_edge >> 32);
+
+	IXGBE_WRITE_REG(hw, IXGBE_CLKTIML, clktiml);
+	IXGBE_WRITE_REG(hw, IXGBE_CLKTIMH, clktimh);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
+
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ptp_setup_sdp_X550
+ * @adapter: private adapter structure
+ *
+ * Enable or disable a clock output signal on SDP 0 for X550 hardware.
+ *
+ * Use the target time feature to align the output signal on the next full
+ * second.
+ *
+ * This works by using the cycle counter shift and mult values in reverse, and
+ * assumes that the values we're shifting will not overflow.
+ */
+static void ixgbe_ptp_setup_sdp_X550(struct ixgbe_adapter *adapter)
+{
+	u32 esdp, tsauxc, freqout, trgttiml, trgttimh, rem, tssdp;
+	struct cyclecounter *cc = &adapter->hw_cc;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 ns = 0, clock_edge = 0;
+	struct timespec64 ts;
+	unsigned long flags;
+
+	/* disable the pin first */
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0);
+	IXGBE_WRITE_FLUSH(hw);
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED))
+		return;
+
+	esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	/* enable the SDP0 pin as output, and connected to the
+	 * native function for Timesync (ClockOut)
+	 */
+	esdp |= IXGBE_ESDP_SDP0_DIR |
+		IXGBE_ESDP_SDP0_NATIVE;
+
+	/* enable the Clock Out feature on SDP0, and use Target Time 0 to
+	 * enable generation of interrupts on the clock change.
+	 */
+#define IXGBE_TSAUXC_DIS_TS_CLEAR 0x40000000
+	tsauxc = (IXGBE_TSAUXC_EN_CLK | IXGBE_TSAUXC_ST0 |
+		  IXGBE_TSAUXC_EN_TT0 | IXGBE_TSAUXC_SDP0_INT |
+		  IXGBE_TSAUXC_DIS_TS_CLEAR);
+
+	tssdp = (IXGBE_TSSDP_TS_SDP0_EN |
+		 IXGBE_TSSDP_TS_SDP0_CLK0);
+
+	/* Determine the clock time period to use. This assumes that the
+	 * cycle counter shift is small enough to avoid overflowing a 32bit
+	 * value.
+	 */
+	freqout = div_u64(NS_PER_HALF_SEC << cc->shift,  cc->mult);
+
+	/* Read the current clock time, and save the cycle counter value */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_read(&adapter->hw_tc);
+	clock_edge = adapter->hw_tc.cycle_last;
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	/* Figure out how far past the next second we are */
+	div_u64_rem(ns, NS_PER_SEC, &rem);
+
+	/* Figure out how many nanoseconds to add to round the clock edge up
+	 * to the next full second
+	 */
+	rem = (NS_PER_SEC - rem);
+
+	/* Adjust the clock edge to align with the next full second. */
+	clock_edge += div_u64(((u64)rem << cc->shift), cc->mult);
+
+	/* X550 hardware stores the time in 32bits of 'billions of cycles' and
+	 * 32bits of 'cycles'. There's no guarantee that cycles represents
+	 * nanoseconds. However, we can use the math from a timespec64 to
+	 * convert into the hardware representation.
+	 *
+	 * See ixgbe_ptp_read_X550() for more details.
+	 */
+	ts = ns_to_timespec64(clock_edge);
+	trgttiml = (u32)ts.tv_nsec;
+	trgttimh = (u32)ts.tv_sec;
+
+	IXGBE_WRITE_REG(hw, IXGBE_FREQOUT0, freqout);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml);
+	IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh);
+
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSSDP, tssdp);
+	IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ptp_read_X550 - read cycle counter value
+ * @cc: cyclecounter structure
+ *
+ * This function reads SYSTIME registers. It is called by the cyclecounter
+ * structure to convert from internal representation into nanoseconds. We need
+ * this for X550 since some skews do not have expected clock frequency and
+ * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of
+ * "cycles", rather than seconds and nanoseconds.
+ */
+static u64 ixgbe_ptp_read_X550(const struct cyclecounter *cc)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(cc, struct ixgbe_adapter, hw_cc);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct timespec64 ts;
+
+	/* storage is 32 bits of 'billions of cycles' and 32 bits of 'cycles'.
+	 * Some revisions of hardware run at a higher frequency and so the
+	 * cycles are not guaranteed to be nanoseconds. The timespec64 created
+	 * here is used for its math/conversions but does not necessarily
+	 * represent nominal time.
+	 *
+	 * It should be noted that this cyclecounter will overflow at a
+	 * non-bitmask field since we have to convert our billions of cycles
+	 * into an actual cycles count. This results in some possible weird
+	 * situations at high cycle counter stamps. However given that 32 bits
+	 * of "seconds" is ~138 years this isn't a problem. Even at the
+	 * increased frequency of some revisions, this is still ~103 years.
+	 * Since the SYSTIME values start at 0 and we never write them, it is
+	 * highly unlikely for the cyclecounter to overflow in practice.
+	 */
+	IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
+	ts.tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+	ts.tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
+
+	return (u64)timespec64_to_ns(&ts);
+}
+
+/**
+ * ixgbe_ptp_read_82599 - read raw cycle counter (to be used by time counter)
+ * @cc: the cyclecounter structure
+ *
+ * this function reads the cyclecounter registers and is called by the
+ * cyclecounter structure used to construct a ns counter from the
+ * arbitrary fixed point registers
+ */
+static u64 ixgbe_ptp_read_82599(const struct cyclecounter *cc)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(cc, struct ixgbe_adapter, hw_cc);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 stamp = 0;
+
+	stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+	stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+
+	return stamp;
+}
+
+/**
+ * ixgbe_ptp_convert_to_hwtstamp - convert register value to hw timestamp
+ * @adapter: private adapter structure
+ * @hwtstamp: stack timestamp structure
+ * @timestamp: unsigned 64bit system time value
+ *
+ * We need to convert the adapter's RX/TXSTMP registers into a hwtstamp value
+ * which can be used by the stack's ptp functions.
+ *
+ * The lock is used to protect consistency of the cyclecounter and the SYSTIME
+ * registers. However, it does not need to protect against the Rx or Tx
+ * timestamp registers, as there can't be a new timestamp until the old one is
+ * unlatched by reading.
+ *
+ * In addition to the timestamp in hardware, some controllers need a software
+ * overflow cyclecounter, and this function takes this into account as well.
+ **/
+static void ixgbe_ptp_convert_to_hwtstamp(struct ixgbe_adapter *adapter,
+					  struct skb_shared_hwtstamps *hwtstamp,
+					  u64 timestamp)
+{
+	unsigned long flags;
+	struct timespec64 systime;
+	u64 ns;
+
+	memset(hwtstamp, 0, sizeof(*hwtstamp));
+
+	switch (adapter->hw.mac.type) {
+	/* X550 and later hardware supposedly represent time using a seconds
+	 * and nanoseconds counter, instead of raw 64bits nanoseconds. We need
+	 * to convert the timestamp into cycles before it can be fed to the
+	 * cyclecounter. We need an actual cyclecounter because some revisions
+	 * of hardware run at a higher frequency and thus the counter does
+	 * not represent seconds/nanoseconds. Instead it can be thought of as
+	 * cycles and billions of cycles.
+	 */
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		/* Upper 32 bits represent billions of cycles, lower 32 bits
+		 * represent cycles. However, we use timespec64_to_ns for the
+		 * correct math even though the units haven't been corrected
+		 * yet.
+		 */
+		systime.tv_sec = timestamp >> 32;
+		systime.tv_nsec = timestamp & 0xFFFFFFFF;
+
+		timestamp = timespec64_to_ns(&systime);
+		break;
+	default:
+		break;
+	}
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	ns = timecounter_cyc2time(&adapter->hw_tc, timestamp);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	hwtstamp->hwtstamp = ns_to_ktime(ns);
+}
+
+/**
+ * ixgbe_ptp_adjfine_82599
+ * @ptp: the ptp clock structure
+ * @scaled_ppm: scaled parts per million adjustment from base
+ *
+ * Adjust the frequency of the ptp cycle counter by the
+ * indicated scaled_ppm from the base frequency.
+ *
+ * Scaled parts per million is ppm with a 16-bit binary fractional field.
+ */
+static int ixgbe_ptp_adjfine_82599(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 incval;
+
+	smp_mb();
+	incval = READ_ONCE(adapter->base_incval);
+	incval = adjust_by_scaled_ppm(incval, scaled_ppm);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+		if (incval > 0xFFFFFFFFULL)
+			e_dev_warn("PTP scaled_ppm adjusted SYSTIME rate overflowed!\n");
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval);
+		break;
+	case ixgbe_mac_82599EB:
+		if (incval > 0x00FFFFFFULL)
+			e_dev_warn("PTP scaled_ppm adjusted SYSTIME rate overflowed!\n");
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
+				BIT(IXGBE_INCPER_SHIFT_82599) |
+				((u32)incval & 0x00FFFFFFUL));
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_adjfine_X550
+ * @ptp: the ptp clock structure
+ * @scaled_ppm: scaled parts per million adjustment from base
+ *
+ * Adjust the frequency of the SYSTIME registers by the indicated scaled_ppm
+ * from base frequency.
+ *
+ * Scaled parts per million is ppm with a 16-bit binary fractional field.
+ */
+static int ixgbe_ptp_adjfine_X550(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct ixgbe_adapter *adapter =
+			container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool neg_adj;
+	u64 rate;
+	u32 inca;
+
+	neg_adj = diff_by_scaled_ppm(IXGBE_X550_BASE_PERIOD, scaled_ppm, &rate);
+
+	/* warn if rate is too large */
+	if (rate >= INCVALUE_MASK)
+		e_dev_warn("PTP scaled_ppm adjusted SYSTIME rate overflowed!\n");
+
+	inca = rate & INCVALUE_MASK;
+	if (neg_adj)
+		inca |= ISGN;
+
+	IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, inca);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_adjtime
+ * @ptp: the ptp clock structure
+ * @delta: offset to adjust the cycle counter by
+ *
+ * adjust the timer by resetting the timecounter structure.
+ */
+static int ixgbe_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	unsigned long flags;
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	timecounter_adjtime(&adapter->hw_tc, delta);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_gettimex
+ * @ptp: the ptp clock structure
+ * @ts: timespec to hold the PHC timestamp
+ * @sts: structure to hold the system time before and after reading the PHC
+ *
+ * read the timecounter and return the correct value on ns,
+ * after converting it into a struct timespec.
+ */
+static int ixgbe_ptp_gettimex(struct ptp_clock_info *ptp,
+			      struct timespec64 *ts,
+			      struct ptp_system_timestamp *sts)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned long flags;
+	u64 ns, stamp;
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		/* Upper 32 bits represent billions of cycles, lower 32 bits
+		 * represent cycles. However, we use timespec64_to_ns for the
+		 * correct math even though the units haven't been corrected
+		 * yet.
+		 */
+		ptp_read_system_prets(sts);
+		IXGBE_READ_REG(hw, IXGBE_SYSTIMR);
+		ptp_read_system_postts(sts);
+		ts->tv_nsec = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+		ts->tv_sec = IXGBE_READ_REG(hw, IXGBE_SYSTIMH);
+		stamp = timespec64_to_ns(ts);
+		break;
+	default:
+		ptp_read_system_prets(sts);
+		stamp = IXGBE_READ_REG(hw, IXGBE_SYSTIML);
+		ptp_read_system_postts(sts);
+		stamp |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32;
+		break;
+	}
+
+	ns = timecounter_cyc2time(&adapter->hw_tc, stamp);
+
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_settime
+ * @ptp: the ptp clock structure
+ * @ts: the timespec containing the new time for the cycle counter
+ *
+ * reset the timecounter to use a new base value instead of the kernel
+ * wall timer value.
+ */
+static int ixgbe_ptp_settime(struct ptp_clock_info *ptp,
+			     const struct timespec64 *ts)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+	unsigned long flags;
+	u64 ns = timespec64_to_ns(ts);
+
+	/* reset the timecounter */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	timecounter_init(&adapter->hw_tc, &adapter->hw_cc, ns);
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_feature_enable
+ * @ptp: the ptp clock structure
+ * @rq: the requested feature to change
+ * @on: whether to enable or disable the feature
+ *
+ * enable (or disable) ancillary features of the phc subsystem.
+ * our driver only supports the PPS feature on the X540
+ */
+static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp,
+				    struct ptp_clock_request *rq, int on)
+{
+	struct ixgbe_adapter *adapter =
+		container_of(ptp, struct ixgbe_adapter, ptp_caps);
+
+	/**
+	 * When PPS is enabled, unmask the interrupt for the ClockOut
+	 * feature, so that the interrupt handler can send the PPS
+	 * event when the clock SDP triggers. Clear mask when PPS is
+	 * disabled
+	 */
+	if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp)
+		return -ENOTSUPP;
+
+	if (on)
+		adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED;
+	else
+		adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
+
+	adapter->ptp_setup_sdp(adapter);
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_check_pps_event
+ * @adapter: the private adapter structure
+ *
+ * This function is called by the interrupt routine when checking for
+ * interrupts. It will check and handle a pps event.
+ */
+void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ptp_clock_event event;
+
+	event.type = PTP_CLOCK_PPS;
+
+	/* this check is necessary in case the interrupt was enabled via some
+	 * alternative means (ex. debug_fs). Better to check here than
+	 * everywhere that calls this function.
+	 */
+	if (!adapter->ptp_clock)
+		return;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X540:
+		ptp_clock_event(adapter->ptp_clock, &event);
+		break;
+	default:
+		break;
+	}
+}
+
+/**
+ * ixgbe_ptp_overflow_check - watchdog task to detect SYSTIME overflow
+ * @adapter: private adapter struct
+ *
+ * this watchdog task periodically reads the timecounter
+ * in order to prevent missing when the system time registers wrap
+ * around. This needs to be run approximately twice a minute.
+ */
+void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter)
+{
+	bool timeout = time_is_before_jiffies(adapter->last_overflow_check +
+					     IXGBE_OVERFLOW_PERIOD);
+	unsigned long flags;
+
+	if (timeout) {
+		/* Update the timecounter */
+		spin_lock_irqsave(&adapter->tmreg_lock, flags);
+		timecounter_read(&adapter->hw_tc);
+		spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+		adapter->last_overflow_check = jiffies;
+	}
+}
+
+/**
+ * ixgbe_ptp_rx_hang - detect error case when Rx timestamp registers latched
+ * @adapter: private network adapter structure
+ *
+ * this watchdog task is scheduled to detect error case where hardware has
+ * dropped an Rx packet that was timestamped when the ring is full. The
+ * particular error is rare but leaves the device in a state unable to timestamp
+ * any future packets.
+ */
+void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+	struct ixgbe_ring *rx_ring;
+	unsigned long rx_event;
+	int n;
+
+	/* if we don't have a valid timestamp in the registers, just update the
+	 * timeout counter and exit
+	 */
+	if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) {
+		adapter->last_rx_ptp_check = jiffies;
+		return;
+	}
+
+	/* determine the most recent watchdog or rx_timestamp event */
+	rx_event = adapter->last_rx_ptp_check;
+	for (n = 0; n < adapter->num_rx_queues; n++) {
+		rx_ring = adapter->rx_ring[n];
+		if (time_after(rx_ring->last_rx_timestamp, rx_event))
+			rx_event = rx_ring->last_rx_timestamp;
+	}
+
+	/* only need to read the high RXSTMP register to clear the lock */
+	if (time_is_before_jiffies(rx_event + 5 * HZ)) {
+		IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
+		adapter->last_rx_ptp_check = jiffies;
+
+		adapter->rx_hwtstamp_cleared++;
+		e_warn(drv, "clearing RX Timestamp hang\n");
+	}
+}
+
+/**
+ * ixgbe_ptp_clear_tx_timestamp - utility function to clear Tx timestamp state
+ * @adapter: the private adapter structure
+ *
+ * This function should be called whenever the state related to a Tx timestamp
+ * needs to be cleared. This helps ensure that all related bits are reset for
+ * the next Tx timestamp event.
+ */
+static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	IXGBE_READ_REG(hw, IXGBE_TXSTMPH);
+	if (adapter->ptp_tx_skb) {
+		dev_kfree_skb_any(adapter->ptp_tx_skb);
+		adapter->ptp_tx_skb = NULL;
+	}
+	clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+}
+
+/**
+ * ixgbe_ptp_tx_hang - detect error case where Tx timestamp never finishes
+ * @adapter: private network adapter structure
+ */
+void ixgbe_ptp_tx_hang(struct ixgbe_adapter *adapter)
+{
+	bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
+					      IXGBE_PTP_TX_TIMEOUT);
+
+	if (!adapter->ptp_tx_skb)
+		return;
+
+	if (!test_bit(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state))
+		return;
+
+	/* If we haven't received a timestamp within the timeout, it is
+	 * reasonable to assume that it will never occur, so we can unlock the
+	 * timestamp bit when this occurs.
+	 */
+	if (timeout) {
+		cancel_work_sync(&adapter->ptp_tx_work);
+		ixgbe_ptp_clear_tx_timestamp(adapter);
+		adapter->tx_hwtstamp_timeouts++;
+		e_warn(drv, "clearing Tx timestamp hang\n");
+	}
+}
+
+/**
+ * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: the private adapter struct
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the shhwtstamps structure which
+ * is passed up the network stack
+ */
+static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter)
+{
+	struct sk_buff *skb = adapter->ptp_tx_skb;
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct skb_shared_hwtstamps shhwtstamps;
+	u64 regval = 0;
+
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32;
+	ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval);
+
+	/* Handle cleanup of the ptp_tx_skb ourselves, and unlock the state
+	 * bit prior to notifying the stack via skb_tstamp_tx(). This prevents
+	 * well behaved applications from attempting to timestamp again prior
+	 * to the lock bit being clear.
+	 */
+	adapter->ptp_tx_skb = NULL;
+	clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state);
+
+	/* Notify the stack and then free the skb after we've unlocked */
+	skb_tstamp_tx(skb, &shhwtstamps);
+	dev_kfree_skb_any(skb);
+}
+
+/**
+ * ixgbe_ptp_tx_hwtstamp_work
+ * @work: pointer to the work struct
+ *
+ * This work item polls TSYNCTXCTL valid bit to determine when a Tx hardware
+ * timestamp has been taken for the current skb. It is necessary, because the
+ * descriptor's "done" bit does not correlate with the timestamp event.
+ */
+static void ixgbe_ptp_tx_hwtstamp_work(struct work_struct *work)
+{
+	struct ixgbe_adapter *adapter = container_of(work, struct ixgbe_adapter,
+						     ptp_tx_work);
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
+					      IXGBE_PTP_TX_TIMEOUT);
+	u32 tsynctxctl;
+
+	/* we have to have a valid skb to poll for a timestamp */
+	if (!adapter->ptp_tx_skb) {
+		ixgbe_ptp_clear_tx_timestamp(adapter);
+		return;
+	}
+
+	/* stop polling once we have a valid timestamp */
+	tsynctxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
+	if (tsynctxctl & IXGBE_TSYNCTXCTL_VALID) {
+		ixgbe_ptp_tx_hwtstamp(adapter);
+		return;
+	}
+
+	if (timeout) {
+		ixgbe_ptp_clear_tx_timestamp(adapter);
+		adapter->tx_hwtstamp_timeouts++;
+		e_warn(drv, "clearing Tx Timestamp hang\n");
+	} else {
+		/* reschedule to keep checking if it's not available yet */
+		schedule_work(&adapter->ptp_tx_work);
+	}
+}
+
+/**
+ * ixgbe_ptp_rx_pktstamp - utility function to get RX time stamp from buffer
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: the packet
+ *
+ * This function will be called by the Rx routine of the timestamp for this
+ * packet is stored in the buffer. The value is stored in little endian format
+ * starting at the end of the packet data.
+ */
+void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *q_vector,
+			   struct sk_buff *skb)
+{
+	__le64 regval;
+
+	/* copy the bits out of the skb, and then trim the skb length */
+	skb_copy_bits(skb, skb->len - IXGBE_TS_HDR_LEN, &regval,
+		      IXGBE_TS_HDR_LEN);
+	__pskb_trim(skb, skb->len - IXGBE_TS_HDR_LEN);
+
+	/* The timestamp is recorded in little endian format, and is stored at
+	 * the end of the packet.
+	 *
+	 * DWORD: N              N + 1      N + 2
+	 * Field: End of Packet  SYSTIMH    SYSTIML
+	 */
+	ixgbe_ptp_convert_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+				      le64_to_cpu(regval));
+}
+
+/**
+ * ixgbe_ptp_rx_rgtstamp - utility function which checks for RX time stamp
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: particular skb to send timestamp with
+ *
+ * if the timestamp is valid, we convert it into the timecounter ns
+ * value, then store that result into the shhwtstamps structure which
+ * is passed up the network stack
+ */
+void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector,
+			   struct sk_buff *skb)
+{
+	struct ixgbe_adapter *adapter;
+	struct ixgbe_hw *hw;
+	u64 regval = 0;
+	u32 tsyncrxctl;
+
+	/* we cannot process timestamps on a ring without a q_vector */
+	if (!q_vector || !q_vector->adapter)
+		return;
+
+	adapter = q_vector->adapter;
+	hw = &adapter->hw;
+
+	/* Read the tsyncrxctl register afterwards in order to prevent taking an
+	 * I/O hit on every packet.
+	 */
+
+	tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+	if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID))
+		return;
+
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL);
+	regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32;
+
+	ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+}
+
+/**
+ * ixgbe_ptp_get_ts_config - get current hardware timestamping configuration
+ * @adapter: pointer to adapter structure
+ * @ifr: ioctl data
+ *
+ * This function returns the current timestamping settings. Rather than
+ * attempt to deconstruct registers to fill in the values, simply keep a copy
+ * of the old settings around, and return a copy when requested.
+ */
+int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
+{
+	struct hwtstamp_config *config = &adapter->tstamp_config;
+
+	return copy_to_user(ifr->ifr_data, config,
+			    sizeof(*config)) ? -EFAULT : 0;
+}
+
+/**
+ * ixgbe_ptp_set_timestamp_mode - setup the hardware for the requested mode
+ * @adapter: the private ixgbe adapter structure
+ * @config: the hwtstamp configuration requested
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't cause any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ * Since hardware always timestamps Path delay packets when timestamping V2
+ * packets, regardless of the type specified in the register, only use V2
+ * Event mode. This more accurately tells the user what the hardware is going
+ * to do anyways.
+ *
+ * Note: this may modify the hwtstamp configuration towards a more general
+ * mode, if required to support the specifically requested mode.
+ */
+static int ixgbe_ptp_set_timestamp_mode(struct ixgbe_adapter *adapter,
+				 struct hwtstamp_config *config)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 tsync_tx_ctl = IXGBE_TSYNCTXCTL_ENABLED;
+	u32 tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED;
+	u32 tsync_rx_mtrl = PTP_EV_PORT << 16;
+	u32 aflags = adapter->flags;
+	bool is_l2 = false;
+	u32 regval;
+
+	switch (config->tx_type) {
+	case HWTSTAMP_TX_OFF:
+		tsync_tx_ctl = 0;
+		break;
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (config->rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		tsync_rx_ctl = 0;
+		tsync_rx_mtrl = 0;
+		aflags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_mtrl |= IXGBE_RXMTRL_V1_SYNC_MSG;
+		aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L4_V1;
+		tsync_rx_mtrl |= IXGBE_RXMTRL_V1_DELAY_REQ_MSG;
+		aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2;
+		is_l2 = true;
+		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		aflags |= (IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+			   IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		break;
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_NTP_ALL:
+	case HWTSTAMP_FILTER_ALL:
+		/* The X550 controller is capable of timestamping all packets,
+		 * which allows it to accept any filter.
+		 */
+		if (hw->mac.type >= ixgbe_mac_X550) {
+			tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_ALL;
+			config->rx_filter = HWTSTAMP_FILTER_ALL;
+			aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+			break;
+		}
+		fallthrough;
+	default:
+		/*
+		 * register RXMTRL must be set in order to do V1 packets,
+		 * therefore it is not possible to time stamp both V1 Sync and
+		 * Delay_Req messages and hardware does not support
+		 * timestamping all packets => return error
+		 */
+		config->rx_filter = HWTSTAMP_FILTER_NONE;
+		return -ERANGE;
+	}
+
+	if (hw->mac.type == ixgbe_mac_82598EB) {
+		adapter->flags &= ~(IXGBE_FLAG_RX_HWTSTAMP_ENABLED |
+				    IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER);
+		if (tsync_rx_ctl | tsync_tx_ctl)
+			return -ERANGE;
+		return 0;
+	}
+
+	/* Per-packet timestamping only works if the filter is set to all
+	 * packets. Since this is desired, always timestamp all packets as long
+	 * as any Rx filter was configured.
+	 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		/* enable timestamping all packets only if at least some
+		 * packets were requested. Otherwise, play nice and disable
+		 * timestamping
+		 */
+		if (config->rx_filter == HWTSTAMP_FILTER_NONE)
+			break;
+
+		tsync_rx_ctl = IXGBE_TSYNCRXCTL_ENABLED |
+			       IXGBE_TSYNCRXCTL_TYPE_ALL |
+			       IXGBE_TSYNCRXCTL_TSIP_UT_EN;
+		config->rx_filter = HWTSTAMP_FILTER_ALL;
+		aflags |= IXGBE_FLAG_RX_HWTSTAMP_ENABLED;
+		aflags &= ~IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER;
+		is_l2 = true;
+		break;
+	default:
+		break;
+	}
+
+	/* define ethertype filter for timestamping L2 packets */
+	if (is_l2)
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588),
+				(IXGBE_ETQF_FILTER_EN | /* enable filter */
+				 IXGBE_ETQF_1588 | /* enable timestamping */
+				 ETH_P_1588));     /* 1588 eth protocol type */
+	else
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_1588), 0);
+
+	/* enable/disable TX */
+	regval = IXGBE_READ_REG(hw, IXGBE_TSYNCTXCTL);
+	regval &= ~IXGBE_TSYNCTXCTL_ENABLED;
+	regval |= tsync_tx_ctl;
+	IXGBE_WRITE_REG(hw, IXGBE_TSYNCTXCTL, regval);
+
+	/* enable/disable RX */
+	regval = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL);
+	regval &= ~(IXGBE_TSYNCRXCTL_ENABLED | IXGBE_TSYNCRXCTL_TYPE_MASK);
+	regval |= tsync_rx_ctl;
+	IXGBE_WRITE_REG(hw, IXGBE_TSYNCRXCTL, regval);
+
+	/* define which PTP packets are time stamped */
+	IXGBE_WRITE_REG(hw, IXGBE_RXMTRL, tsync_rx_mtrl);
+
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* configure adapter flags only when HW is actually configured */
+	adapter->flags = aflags;
+
+	/* clear TX/RX time stamp registers, just to be sure */
+	ixgbe_ptp_clear_tx_timestamp(adapter);
+	IXGBE_READ_REG(hw, IXGBE_RXSTMPH);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_set_ts_config - user entry point for timestamp mode
+ * @adapter: pointer to adapter struct
+ * @ifr: ioctl data
+ *
+ * Set hardware to requested mode. If unsupported, return an error with no
+ * changes. Otherwise, store the mode for future reference.
+ */
+int ixgbe_ptp_set_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr)
+{
+	struct hwtstamp_config config;
+	int err;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	err = ixgbe_ptp_set_timestamp_mode(adapter, &config);
+	if (err)
+		return err;
+
+	/* save these settings for future reference */
+	memcpy(&adapter->tstamp_config, &config,
+	       sizeof(adapter->tstamp_config));
+
+	return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+		-EFAULT : 0;
+}
+
+static void ixgbe_ptp_link_speed_adjust(struct ixgbe_adapter *adapter,
+					u32 *shift, u32 *incval)
+{
+	/**
+	 * Scale the NIC cycle counter by a large factor so that
+	 * relatively small corrections to the frequency can be added
+	 * or subtracted. The drawbacks of a large factor include
+	 * (a) the clock register overflows more quickly, (b) the cycle
+	 * counter structure must be able to convert the systime value
+	 * to nanoseconds using only a multiplier and a right-shift,
+	 * and (c) the value must fit within the timinca register space
+	 * => math based on internal DMA clock rate and available bits
+	 *
+	 * Note that when there is no link, internal DMA clock is same as when
+	 * link speed is 10Gb. Set the registers correctly even when link is
+	 * down to preserve the clock setting
+	 */
+	switch (adapter->link_speed) {
+	case IXGBE_LINK_SPEED_100_FULL:
+		*shift = IXGBE_INCVAL_SHIFT_100;
+		*incval = IXGBE_INCVAL_100;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		*shift = IXGBE_INCVAL_SHIFT_1GB;
+		*incval = IXGBE_INCVAL_1GB;
+		break;
+	case IXGBE_LINK_SPEED_10GB_FULL:
+	default:
+		*shift = IXGBE_INCVAL_SHIFT_10GB;
+		*incval = IXGBE_INCVAL_10GB;
+		break;
+	}
+}
+
+/**
+ * ixgbe_ptp_start_cyclecounter - create the cycle counter from hw
+ * @adapter: pointer to the adapter structure
+ *
+ * This function should be called to set the proper values for the TIMINCA
+ * register and tell the cyclecounter structure what the tick rate of SYSTIME
+ * is. It does not directly modify SYSTIME registers or the timecounter
+ * structure. It should be called whenever a new TIMINCA value is necessary,
+ * such as during initialization or when the link speed changes.
+ */
+void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct cyclecounter cc;
+	unsigned long flags;
+	u32 incval = 0;
+	u32 fuse0 = 0;
+
+	/* For some of the boards below this mask is technically incorrect.
+	 * The timestamp mask overflows at approximately 61bits. However the
+	 * particular hardware does not overflow on an even bitmask value.
+	 * Instead, it overflows due to conversion of upper 32bits billions of
+	 * cycles. Timecounters are not really intended for this purpose so
+	 * they do not properly function if the overflow point isn't 2^N-1.
+	 * However, the actual SYSTIME values in question take ~138 years to
+	 * overflow. In practice this means they won't actually overflow. A
+	 * proper fix to this problem would require modification of the
+	 * timecounter delta calculations.
+	 */
+	cc.mask = CLOCKSOURCE_MASK(64);
+	cc.mult = 1;
+	cc.shift = 0;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X550EM_x:
+		/* SYSTIME assumes X550EM_x board frequency is 300Mhz, and is
+		 * designed to represent seconds and nanoseconds when this is
+		 * the case. However, some revisions of hardware have a 400Mhz
+		 * clock and we have to compensate for this frequency
+		 * variation using corrected mult and shift values.
+		 */
+		fuse0 = IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0));
+		if (!(fuse0 & IXGBE_FUSES0_300MHZ)) {
+			cc.mult = 3;
+			cc.shift = 2;
+		}
+		fallthrough;
+	case ixgbe_mac_x550em_a:
+	case ixgbe_mac_X550:
+		cc.read = ixgbe_ptp_read_X550;
+		break;
+	case ixgbe_mac_X540:
+		cc.read = ixgbe_ptp_read_82599;
+
+		ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval);
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, incval);
+		break;
+	case ixgbe_mac_82599EB:
+		cc.read = ixgbe_ptp_read_82599;
+
+		ixgbe_ptp_link_speed_adjust(adapter, &cc.shift, &incval);
+		incval >>= IXGBE_INCVAL_SHIFT_82599;
+		cc.shift -= IXGBE_INCVAL_SHIFT_82599;
+		IXGBE_WRITE_REG(hw, IXGBE_TIMINCA,
+				BIT(IXGBE_INCPER_SHIFT_82599) | incval);
+		break;
+	default:
+		/* other devices aren't supported */
+		return;
+	}
+
+	/* update the base incval used to calculate frequency adjustment */
+	WRITE_ONCE(adapter->base_incval, incval);
+	smp_mb();
+
+	/* need lock to prevent incorrect read while modifying cyclecounter */
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	memcpy(&adapter->hw_cc, &cc, sizeof(adapter->hw_cc));
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+}
+
+/**
+ * ixgbe_ptp_init_systime - Initialize SYSTIME registers
+ * @adapter: the ixgbe private board structure
+ *
+ * Initialize and start the SYSTIME registers.
+ */
+static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 tsauxc;
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+	case ixgbe_mac_X550:
+		tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
+
+		/* Reset SYSTIME registers to 0 */
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+
+		/* Reset interrupt settings */
+		IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
+		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
+
+		/* Activate the SYSTIME counter */
+		IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
+				tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
+		break;
+	case ixgbe_mac_X540:
+	case ixgbe_mac_82599EB:
+		/* Reset SYSTIME registers to 0 */
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+		IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+		break;
+	default:
+		/* Other devices aren't supported */
+		return;
+	}
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ptp_reset
+ * @adapter: the ixgbe private board structure
+ *
+ * When the MAC resets, all the hardware bits for timesync are reset. This
+ * function is used to re-enable the device for PTP based on current settings.
+ * We do lose the current clock time, so just reset the cyclecounter to the
+ * system real clock time.
+ *
+ * This function will maintain hwtstamp_config settings, and resets the SDP
+ * output if it was enabled.
+ */
+void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned long flags;
+
+	/* reset the hardware timestamping mode */
+	ixgbe_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
+
+	/* 82598 does not support PTP */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
+	ixgbe_ptp_start_cyclecounter(adapter);
+
+	ixgbe_ptp_init_systime(adapter);
+
+	spin_lock_irqsave(&adapter->tmreg_lock, flags);
+	timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
+			 ktime_to_ns(ktime_get_real()));
+	spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+	adapter->last_overflow_check = jiffies;
+
+	/* Now that the shift has been calculated and the systime
+	 * registers reset, (re-)enable the Clock out feature
+	 */
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
+}
+
+/**
+ * ixgbe_ptp_create_clock
+ * @adapter: the ixgbe private adapter structure
+ *
+ * This function performs setup of the user entry point function table and
+ * initializes the PTP clock device, which is used to access the clock-like
+ * features of the PTP core. It will be called by ixgbe_ptp_init, and may
+ * reuse a previously initialized clock (such as during a suspend/resume
+ * cycle).
+ */
+static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	long err;
+
+	/* do nothing if we already have a clock device */
+	if (!IS_ERR_OR_NULL(adapter->ptp_clock))
+		return 0;
+
+	switch (adapter->hw.mac.type) {
+	case ixgbe_mac_X540:
+		snprintf(adapter->ptp_caps.name,
+			 sizeof(adapter->ptp_caps.name),
+			 "%s", netdev->name);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 250000000;
+		adapter->ptp_caps.n_alarm = 0;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.n_per_out = 0;
+		adapter->ptp_caps.pps = 1;
+		adapter->ptp_caps.adjfine = ixgbe_ptp_adjfine_82599;
+		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
+		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
+		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
+		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X540;
+		break;
+	case ixgbe_mac_82599EB:
+		snprintf(adapter->ptp_caps.name,
+			 sizeof(adapter->ptp_caps.name),
+			 "%s", netdev->name);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 250000000;
+		adapter->ptp_caps.n_alarm = 0;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.n_per_out = 0;
+		adapter->ptp_caps.pps = 0;
+		adapter->ptp_caps.adjfine = ixgbe_ptp_adjfine_82599;
+		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
+		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
+		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
+		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+		break;
+	case ixgbe_mac_X550:
+	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
+		snprintf(adapter->ptp_caps.name, 16, "%s", netdev->name);
+		adapter->ptp_caps.owner = THIS_MODULE;
+		adapter->ptp_caps.max_adj = 30000000;
+		adapter->ptp_caps.n_alarm = 0;
+		adapter->ptp_caps.n_ext_ts = 0;
+		adapter->ptp_caps.n_per_out = 0;
+		adapter->ptp_caps.pps = 1;
+		adapter->ptp_caps.adjfine = ixgbe_ptp_adjfine_X550;
+		adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime;
+		adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex;
+		adapter->ptp_caps.settime64 = ixgbe_ptp_settime;
+		adapter->ptp_caps.enable = ixgbe_ptp_feature_enable;
+		adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X550;
+		break;
+	default:
+		adapter->ptp_clock = NULL;
+		adapter->ptp_setup_sdp = NULL;
+		return -EOPNOTSUPP;
+	}
+
+	adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+						&adapter->pdev->dev);
+	if (IS_ERR(adapter->ptp_clock)) {
+		err = PTR_ERR(adapter->ptp_clock);
+		adapter->ptp_clock = NULL;
+		e_dev_err("ptp_clock_register failed\n");
+		return err;
+	} else if (adapter->ptp_clock)
+		e_dev_info("registered PHC device on %s\n", netdev->name);
+
+	/* set default timestamp mode to disabled here. We do this in
+	 * create_clock instead of init, because we don't want to override the
+	 * previous settings during a resume cycle.
+	 */
+	adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+	adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+	return 0;
+}
+
+/**
+ * ixgbe_ptp_init
+ * @adapter: the ixgbe private adapter structure
+ *
+ * This function performs the required steps for enabling PTP
+ * support. If PTP support has already been loaded it simply calls the
+ * cyclecounter init routine and exits.
+ */
+void ixgbe_ptp_init(struct ixgbe_adapter *adapter)
+{
+	/* initialize the spin lock first since we can't control when a user
+	 * will call the entry functions once we have initialized the clock
+	 * device
+	 */
+	spin_lock_init(&adapter->tmreg_lock);
+
+	/* obtain a PTP device, or re-use an existing device */
+	if (ixgbe_ptp_create_clock(adapter))
+		return;
+
+	/* we have a clock so we can initialize work now */
+	INIT_WORK(&adapter->ptp_tx_work, ixgbe_ptp_tx_hwtstamp_work);
+
+	/* reset the PTP related hardware bits */
+	ixgbe_ptp_reset(adapter);
+
+	/* enter the IXGBE_PTP_RUNNING state */
+	set_bit(__IXGBE_PTP_RUNNING, &adapter->state);
+
+	return;
+}
+
+/**
+ * ixgbe_ptp_suspend - stop PTP work items
+ * @adapter: pointer to adapter struct
+ *
+ * this function suspends PTP activity, and prevents more PTP work from being
+ * generated, but does not destroy the PTP clock device.
+ */
+void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter)
+{
+	/* Leave the IXGBE_PTP_RUNNING state. */
+	if (!test_and_clear_bit(__IXGBE_PTP_RUNNING, &adapter->state))
+		return;
+
+	adapter->flags2 &= ~IXGBE_FLAG2_PTP_PPS_ENABLED;
+	if (adapter->ptp_setup_sdp)
+		adapter->ptp_setup_sdp(adapter);
+
+	/* ensure that we cancel any pending PTP Tx work item in progress */
+	cancel_work_sync(&adapter->ptp_tx_work);
+	ixgbe_ptp_clear_tx_timestamp(adapter);
+}
+
+/**
+ * ixgbe_ptp_stop - close the PTP device
+ * @adapter: pointer to adapter struct
+ *
+ * completely destroy the PTP device, should only be called when the device is
+ * being fully closed.
+ */
+void ixgbe_ptp_stop(struct ixgbe_adapter *adapter)
+{
+	/* first, suspend PTP activity */
+	ixgbe_ptp_suspend(adapter);
+
+	/* disable the PTP clock device */
+	if (adapter->ptp_clock) {
+		ptp_clock_unregister(adapter->ptp_clock);
+		adapter->ptp_clock = NULL;
+		e_dev_info("removed PHC on %s\n",
+			   adapter->netdev->name);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
new file mode 100644
index 0000000000..ea88ac04ab
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -0,0 +1,1855 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/ipv6.h>
+#include <linux/if_bridge.h>
+#ifdef NETIF_F_HW_VLAN_CTAG_TX
+#include <linux/if_vlan.h>
+#endif
+
+#include "ixgbe.h"
+#include "ixgbe_type.h"
+#include "ixgbe_sriov.h"
+
+#ifdef CONFIG_PCI_IOV
+static inline void ixgbe_alloc_vf_macvlans(struct ixgbe_adapter *adapter,
+					   unsigned int num_vfs)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_macvlans *mv_list;
+	int num_vf_macvlans, i;
+
+	/* Initialize list of VF macvlans */
+	INIT_LIST_HEAD(&adapter->vf_mvs.l);
+
+	num_vf_macvlans = hw->mac.num_rar_entries -
+			  (IXGBE_MAX_PF_MACVLANS + 1 + num_vfs);
+	if (!num_vf_macvlans)
+		return;
+
+	mv_list = kcalloc(num_vf_macvlans, sizeof(struct vf_macvlans),
+			  GFP_KERNEL);
+	if (mv_list) {
+		for (i = 0; i < num_vf_macvlans; i++) {
+			mv_list[i].vf = -1;
+			mv_list[i].free = true;
+			list_add(&mv_list[i].l, &adapter->vf_mvs.l);
+		}
+		adapter->mv_list = mv_list;
+	}
+}
+
+static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
+				unsigned int num_vfs)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (adapter->xdp_prog) {
+		e_warn(probe, "SRIOV is not supported with XDP\n");
+		return -EINVAL;
+	}
+
+	/* Enable VMDq flag so device will be set in VM mode */
+	adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED |
+			  IXGBE_FLAG_VMDQ_ENABLED;
+
+	/* Allocate memory for per VF control structures */
+	adapter->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage),
+				  GFP_KERNEL);
+	if (!adapter->vfinfo)
+		return -ENOMEM;
+
+	adapter->num_vfs = num_vfs;
+
+	ixgbe_alloc_vf_macvlans(adapter, num_vfs);
+	adapter->ring_feature[RING_F_VMDQ].offset = num_vfs;
+
+	/* Initialize default switching mode VEB */
+	IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
+	adapter->bridge_mode = BRIDGE_MODE_VEB;
+
+	/* limit traffic classes based on VFs enabled */
+	if ((adapter->hw.mac.type == ixgbe_mac_82599EB) && (num_vfs < 16)) {
+		adapter->dcb_cfg.num_tcs.pg_tcs = MAX_TRAFFIC_CLASS;
+		adapter->dcb_cfg.num_tcs.pfc_tcs = MAX_TRAFFIC_CLASS;
+	} else if (num_vfs < 32) {
+		adapter->dcb_cfg.num_tcs.pg_tcs = 4;
+		adapter->dcb_cfg.num_tcs.pfc_tcs = 4;
+	} else {
+		adapter->dcb_cfg.num_tcs.pg_tcs = 1;
+		adapter->dcb_cfg.num_tcs.pfc_tcs = 1;
+	}
+
+	/* Disable RSC when in SR-IOV mode */
+	adapter->flags2 &= ~(IXGBE_FLAG2_RSC_CAPABLE |
+			     IXGBE_FLAG2_RSC_ENABLED);
+
+	for (i = 0; i < num_vfs; i++) {
+		/* enable spoof checking for all VFs */
+		adapter->vfinfo[i].spoofchk_enabled = true;
+		adapter->vfinfo[i].link_enable = true;
+
+		/* We support VF RSS querying only for 82599 and x540
+		 * devices at the moment. These devices share RSS
+		 * indirection table and RSS hash key with PF therefore
+		 * we want to disable the querying by default.
+		 */
+		adapter->vfinfo[i].rss_query_enabled = false;
+
+		/* Untrust all VFs */
+		adapter->vfinfo[i].trusted = false;
+
+		/* set the default xcast mode */
+		adapter->vfinfo[i].xcast_mode = IXGBEVF_XCAST_MODE_NONE;
+	}
+
+	e_info(probe, "SR-IOV enabled with %d VFs\n", num_vfs);
+	return 0;
+}
+
+/**
+ * ixgbe_get_vfs - Find and take references to all vf devices
+ * @adapter: Pointer to adapter struct
+ */
+static void ixgbe_get_vfs(struct ixgbe_adapter *adapter)
+{
+	struct pci_dev *pdev = adapter->pdev;
+	u16 vendor = pdev->vendor;
+	struct pci_dev *vfdev;
+	int vf = 0;
+	u16 vf_id;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+	if (!pos)
+		return;
+	pci_read_config_word(pdev, pos + PCI_SRIOV_VF_DID, &vf_id);
+
+	vfdev = pci_get_device(vendor, vf_id, NULL);
+	for (; vfdev; vfdev = pci_get_device(vendor, vf_id, vfdev)) {
+		if (!vfdev->is_virtfn)
+			continue;
+		if (vfdev->physfn != pdev)
+			continue;
+		if (vf >= adapter->num_vfs)
+			continue;
+		pci_dev_get(vfdev);
+		adapter->vfinfo[vf].vfdev = vfdev;
+		++vf;
+	}
+}
+
+/* Note this function is called when the user wants to enable SR-IOV
+ * VFs using the now deprecated module parameter
+ */
+void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs)
+{
+	int pre_existing_vfs = 0;
+	unsigned int num_vfs;
+
+	pre_existing_vfs = pci_num_vf(adapter->pdev);
+	if (!pre_existing_vfs && !max_vfs)
+		return;
+
+	/* If there are pre-existing VFs then we have to force
+	 * use of that many - over ride any module parameter value.
+	 * This may result from the user unloading the PF driver
+	 * while VFs were assigned to guest VMs or because the VFs
+	 * have been created via the new PCI SR-IOV sysfs interface.
+	 */
+	if (pre_existing_vfs) {
+		num_vfs = pre_existing_vfs;
+		dev_warn(&adapter->pdev->dev,
+			 "Virtual Functions already enabled for this device - Please reload all VF drivers to avoid spoofed packet errors\n");
+	} else {
+		int err;
+		/*
+		 * The 82599 supports up to 64 VFs per physical function
+		 * but this implementation limits allocation to 63 so that
+		 * basic networking resources are still available to the
+		 * physical function.  If the user requests greater than
+		 * 63 VFs then it is an error - reset to default of zero.
+		 */
+		num_vfs = min_t(unsigned int, max_vfs, IXGBE_MAX_VFS_DRV_LIMIT);
+
+		err = pci_enable_sriov(adapter->pdev, num_vfs);
+		if (err) {
+			e_err(probe, "Failed to enable PCI sriov: %d\n", err);
+			return;
+		}
+	}
+
+	if (!__ixgbe_enable_sriov(adapter, num_vfs)) {
+		ixgbe_get_vfs(adapter);
+		return;
+	}
+
+	/* If we have gotten to this point then there is no memory available
+	 * to manage the VF devices - print message and bail.
+	 */
+	e_err(probe, "Unable to allocate memory for VF Data Storage - "
+	      "SRIOV disabled\n");
+	ixgbe_disable_sriov(adapter);
+}
+
+#endif /* #ifdef CONFIG_PCI_IOV */
+int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
+{
+	unsigned int num_vfs = adapter->num_vfs, vf;
+	unsigned long flags;
+	int rss;
+
+	spin_lock_irqsave(&adapter->vfs_lock, flags);
+	/* set num VFs to 0 to prevent access to vfinfo */
+	adapter->num_vfs = 0;
+	spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+
+	/* put the reference to all of the vf devices */
+	for (vf = 0; vf < num_vfs; ++vf) {
+		struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
+
+		if (!vfdev)
+			continue;
+		adapter->vfinfo[vf].vfdev = NULL;
+		pci_dev_put(vfdev);
+	}
+
+	/* free VF control structures */
+	kfree(adapter->vfinfo);
+	adapter->vfinfo = NULL;
+
+	/* free macvlan list */
+	kfree(adapter->mv_list);
+	adapter->mv_list = NULL;
+
+	/* if SR-IOV is already disabled then there is nothing to do */
+	if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
+		return 0;
+
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * If our VFs are assigned we cannot shut down SR-IOV
+	 * without causing issues, so just leave the hardware
+	 * available but disabled
+	 */
+	if (pci_vfs_assigned(adapter->pdev)) {
+		e_dev_warn("Unloading driver while VFs are assigned - VFs will not be deallocated\n");
+		return -EPERM;
+	}
+	/* disable iov and allow time for transactions to clear */
+	pci_disable_sriov(adapter->pdev);
+#endif
+
+	/* Disable VMDq flag so device will be set in VM mode */
+	if (bitmap_weight(adapter->fwd_bitmask, adapter->num_rx_pools) == 1) {
+		adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
+		adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+		rss = min_t(int, ixgbe_max_rss_indices(adapter),
+			    num_online_cpus());
+	} else {
+		rss = min_t(int, IXGBE_MAX_L2A_QUEUES, num_online_cpus());
+	}
+
+	adapter->ring_feature[RING_F_VMDQ].offset = 0;
+	adapter->ring_feature[RING_F_RSS].limit = rss;
+
+	/* take a breather then clean up driver data */
+	msleep(100);
+	return 0;
+}
+
+static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs)
+{
+#ifdef CONFIG_PCI_IOV
+	struct ixgbe_adapter *adapter = pci_get_drvdata(dev);
+	int pre_existing_vfs = pci_num_vf(dev);
+	int err = 0, num_rx_pools, i, limit;
+	u8 num_tc;
+
+	if (pre_existing_vfs && pre_existing_vfs != num_vfs)
+		err = ixgbe_disable_sriov(adapter);
+	else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
+		return num_vfs;
+
+	if (err)
+		return err;
+
+	/* While the SR-IOV capability structure reports total VFs to be 64,
+	 * we limit the actual number allocated as below based on two factors.
+	 *    Num_TCs	MAX_VFs
+	 *	1	  63
+	 *	<=4	  31
+	 *	>4	  15
+	 * First, we reserve some transmit/receive resources for the PF.
+	 * Second, VMDQ also uses the same pools that SR-IOV does. We need to
+	 * account for this, so that we don't accidentally allocate more VFs
+	 * than we have available pools. The PCI bus driver already checks for
+	 * other values out of range.
+	 */
+	num_tc = adapter->hw_tcs;
+	num_rx_pools = bitmap_weight(adapter->fwd_bitmask,
+				     adapter->num_rx_pools);
+	limit = (num_tc > 4) ? IXGBE_MAX_VFS_8TC :
+		(num_tc > 1) ? IXGBE_MAX_VFS_4TC : IXGBE_MAX_VFS_1TC;
+
+	if (num_vfs > (limit - num_rx_pools)) {
+		e_dev_err("Currently configured with %d TCs, and %d offloaded macvlans. Creating more than %d VFs is not allowed\n",
+			  num_tc, num_rx_pools - 1, limit - num_rx_pools);
+		return -EPERM;
+	}
+
+	err = __ixgbe_enable_sriov(adapter, num_vfs);
+	if (err)
+		return  err;
+
+	for (i = 0; i < num_vfs; i++)
+		ixgbe_vf_configuration(dev, (i | 0x10000000));
+
+	/* reset before enabling SRIOV to avoid mailbox issues */
+	ixgbe_sriov_reinit(adapter);
+
+	err = pci_enable_sriov(dev, num_vfs);
+	if (err) {
+		e_dev_warn("Failed to enable PCI sriov: %d\n", err);
+		return err;
+	}
+	ixgbe_get_vfs(adapter);
+
+	return num_vfs;
+#else
+	return 0;
+#endif
+}
+
+static int ixgbe_pci_sriov_disable(struct pci_dev *dev)
+{
+	struct ixgbe_adapter *adapter = pci_get_drvdata(dev);
+	int err;
+#ifdef CONFIG_PCI_IOV
+	u32 current_flags = adapter->flags;
+	int prev_num_vf = pci_num_vf(dev);
+#endif
+
+	err = ixgbe_disable_sriov(adapter);
+
+	/* Only reinit if no error and state changed */
+#ifdef CONFIG_PCI_IOV
+	if (!err && (current_flags != adapter->flags ||
+		     prev_num_vf != pci_num_vf(dev)))
+		ixgbe_sriov_reinit(adapter);
+#endif
+
+	return err;
+}
+
+int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
+{
+	if (num_vfs == 0)
+		return ixgbe_pci_sriov_disable(dev);
+	else
+		return ixgbe_pci_sriov_enable(dev, num_vfs);
+}
+
+static int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter,
+				   u32 *msgbuf, u32 vf)
+{
+	int entries = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK)
+		       >> IXGBE_VT_MSGINFO_SHIFT;
+	u16 *hash_list = (u16 *)&msgbuf[1];
+	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+	u32 vector_bit;
+	u32 vector_reg;
+	u32 mta_reg;
+	u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+
+	/* only so many hash values supported */
+	entries = min(entries, IXGBE_MAX_VF_MC_ENTRIES);
+
+	/*
+	 * salt away the number of multi cast addresses assigned
+	 * to this VF for later use to restore when the PF multi cast
+	 * list changes
+	 */
+	vfinfo->num_vf_mc_hashes = entries;
+
+	/*
+	 * VFs are limited to using the MTA hash table for their multicast
+	 * addresses
+	 */
+	for (i = 0; i < entries; i++) {
+		vfinfo->vf_mc_hashes[i] = hash_list[i];
+	}
+
+	for (i = 0; i < vfinfo->num_vf_mc_hashes; i++) {
+		vector_reg = (vfinfo->vf_mc_hashes[i] >> 5) & 0x7F;
+		vector_bit = vfinfo->vf_mc_hashes[i] & 0x1F;
+		mta_reg = IXGBE_READ_REG(hw, IXGBE_MTA(vector_reg));
+		mta_reg |= BIT(vector_bit);
+		IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg);
+	}
+	vmolr |= IXGBE_VMOLR_ROMPE;
+	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+
+	return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct vf_data_storage *vfinfo;
+	int i, j;
+	u32 vector_bit;
+	u32 vector_reg;
+	u32 mta_reg;
+
+	for (i = 0; i < adapter->num_vfs; i++) {
+		u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(i));
+		vfinfo = &adapter->vfinfo[i];
+		for (j = 0; j < vfinfo->num_vf_mc_hashes; j++) {
+			hw->addr_ctrl.mta_in_use++;
+			vector_reg = (vfinfo->vf_mc_hashes[j] >> 5) & 0x7F;
+			vector_bit = vfinfo->vf_mc_hashes[j] & 0x1F;
+			mta_reg = IXGBE_READ_REG(hw, IXGBE_MTA(vector_reg));
+			mta_reg |= BIT(vector_bit);
+			IXGBE_WRITE_REG(hw, IXGBE_MTA(vector_reg), mta_reg);
+		}
+
+		if (vfinfo->num_vf_mc_hashes)
+			vmolr |= IXGBE_VMOLR_ROMPE;
+		else
+			vmolr &= ~IXGBE_VMOLR_ROMPE;
+		IXGBE_WRITE_REG(hw, IXGBE_VMOLR(i), vmolr);
+	}
+
+	/* Restore any VF macvlans */
+	ixgbe_full_sync_mac_table(adapter);
+}
+#endif
+
+static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid,
+			     u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	/* If VLAN overlaps with one the PF is currently monitoring make
+	 * sure that we are able to allocate a VLVF entry.  This may be
+	 * redundant but it guarantees PF will maintain visibility to
+	 * the VLAN.
+	 */
+	if (add && test_bit(vid, adapter->active_vlans)) {
+		err = hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), true, false);
+		if (err)
+			return err;
+	}
+
+	err = hw->mac.ops.set_vfta(hw, vid, vf, !!add, false);
+
+	if (add && !err)
+		return err;
+
+	/* If we failed to add the VF VLAN or we are removing the VF VLAN
+	 * we may need to drop the PF pool bit in order to allow us to free
+	 * up the VLVF resources.
+	 */
+	if (test_bit(vid, adapter->active_vlans) ||
+	    (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+		ixgbe_update_pf_promisc_vlvf(adapter, vid);
+
+	return err;
+}
+
+static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 max_frs;
+
+	if (max_frame < ETH_MIN_MTU || max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) {
+		e_err(drv, "VF max_frame %d out of range\n", max_frame);
+		return -EINVAL;
+	}
+
+	/*
+	 * For 82599EB we have to keep all PFs and VFs operating with
+	 * the same max_frame value in order to avoid sending an oversize
+	 * frame to a VF.  In order to guarantee this is handled correctly
+	 * for all cases we have several special exceptions to take into
+	 * account before we can enable the VF for receive
+	 */
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
+		struct net_device *dev = adapter->netdev;
+		int pf_max_frame = dev->mtu + ETH_HLEN;
+		u32 reg_offset, vf_shift, vfre;
+		s32 err = 0;
+
+#ifdef CONFIG_FCOE
+		if (dev->features & NETIF_F_FCOE_MTU)
+			pf_max_frame = max_t(int, pf_max_frame,
+					     IXGBE_FCOE_JUMBO_FRAME_SIZE);
+
+#endif /* CONFIG_FCOE */
+		switch (adapter->vfinfo[vf].vf_api) {
+		case ixgbe_mbox_api_11:
+		case ixgbe_mbox_api_12:
+		case ixgbe_mbox_api_13:
+		case ixgbe_mbox_api_14:
+			/* Version 1.1 supports jumbo frames on VFs if PF has
+			 * jumbo frames enabled which means legacy VFs are
+			 * disabled
+			 */
+			if (pf_max_frame > ETH_FRAME_LEN)
+				break;
+			fallthrough;
+		default:
+			/* If the PF or VF are running w/ jumbo frames enabled
+			 * we need to shut down the VF Rx path as we cannot
+			 * support jumbo frames on legacy VFs
+			 */
+			if ((pf_max_frame > ETH_FRAME_LEN) ||
+			    (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)))
+				err = -EINVAL;
+			break;
+		}
+
+		/* determine VF receive enable location */
+		vf_shift = vf % 32;
+		reg_offset = vf / 32;
+
+		/* enable or disable receive depending on error */
+		vfre = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
+		if (err)
+			vfre &= ~BIT(vf_shift);
+		else
+			vfre |= BIT(vf_shift);
+		IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), vfre);
+
+		if (err) {
+			e_err(drv, "VF max_frame %d out of range\n", max_frame);
+			return err;
+		}
+	}
+
+	/* pull current max frame size from hardware */
+	max_frs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
+	max_frs &= IXGBE_MHADD_MFS_MASK;
+	max_frs >>= IXGBE_MHADD_MFS_SHIFT;
+
+	if (max_frs < max_frame) {
+		max_frs = max_frame << IXGBE_MHADD_MFS_SHIFT;
+		IXGBE_WRITE_REG(hw, IXGBE_MAXFRS, max_frs);
+	}
+
+	e_info(hw, "VF requests change max MTU to %d\n", max_frame);
+
+	return 0;
+}
+
+static void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe)
+{
+	u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+	vmolr |= IXGBE_VMOLR_BAM;
+	if (aupe)
+		vmolr |= IXGBE_VMOLR_AUPE;
+	else
+		vmolr &= ~IXGBE_VMOLR_AUPE;
+	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+}
+
+static void ixgbe_clear_vmvir(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), 0);
+}
+
+static void ixgbe_clear_vf_vlans(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vlvfb_mask, pool_mask, i;
+
+	/* create mask for VF and other pools */
+	pool_mask = ~BIT(VMDQ_P(0) % 32);
+	vlvfb_mask = BIT(vf % 32);
+
+	/* post increment loop, covers VLVF_ENTRIES - 1 to 0 */
+	for (i = IXGBE_VLVF_ENTRIES; i--;) {
+		u32 bits[2], vlvfb, vid, vfta, vlvf;
+		u32 word = i * 2 + vf / 32;
+		u32 mask;
+
+		vlvfb = IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
+
+		/* if our bit isn't set we can skip it */
+		if (!(vlvfb & vlvfb_mask))
+			continue;
+
+		/* clear our bit from vlvfb */
+		vlvfb ^= vlvfb_mask;
+
+		/* create 64b mask to chedk to see if we should clear VLVF */
+		bits[word % 2] = vlvfb;
+		bits[~word % 2] = IXGBE_READ_REG(hw, IXGBE_VLVFB(word ^ 1));
+
+		/* if other pools are present, just remove ourselves */
+		if (bits[(VMDQ_P(0) / 32) ^ 1] ||
+		    (bits[VMDQ_P(0) / 32] & pool_mask))
+			goto update_vlvfb;
+
+		/* if PF is present, leave VFTA */
+		if (bits[0] || bits[1])
+			goto update_vlvf;
+
+		/* if we cannot determine VLAN just remove ourselves */
+		vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i));
+		if (!vlvf)
+			goto update_vlvfb;
+
+		vid = vlvf & VLAN_VID_MASK;
+		mask = BIT(vid % 32);
+
+		/* clear bit from VFTA */
+		vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(vid / 32));
+		if (vfta & mask)
+			IXGBE_WRITE_REG(hw, IXGBE_VFTA(vid / 32), vfta ^ mask);
+update_vlvf:
+		/* clear POOL selection enable */
+		IXGBE_WRITE_REG(hw, IXGBE_VLVF(i), 0);
+
+		if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
+			vlvfb = 0;
+update_vlvfb:
+		/* clear pool bits */
+		IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), vlvfb);
+	}
+}
+
+static int ixgbe_set_vf_macvlan(struct ixgbe_adapter *adapter,
+				int vf, int index, unsigned char *mac_addr)
+{
+	struct vf_macvlans *entry;
+	struct list_head *pos;
+	int retval = 0;
+
+	if (index <= 1) {
+		list_for_each(pos, &adapter->vf_mvs.l) {
+			entry = list_entry(pos, struct vf_macvlans, l);
+			if (entry->vf == vf) {
+				entry->vf = -1;
+				entry->free = true;
+				entry->is_macvlan = false;
+				ixgbe_del_mac_filter(adapter,
+						     entry->vf_macvlan, vf);
+			}
+		}
+	}
+
+	/*
+	 * If index was zero then we were asked to clear the uc list
+	 * for the VF.  We're done.
+	 */
+	if (!index)
+		return 0;
+
+	entry = NULL;
+
+	list_for_each(pos, &adapter->vf_mvs.l) {
+		entry = list_entry(pos, struct vf_macvlans, l);
+		if (entry->free)
+			break;
+	}
+
+	/*
+	 * If we traversed the entire list and didn't find a free entry
+	 * then we're out of space on the RAR table.  Also entry may
+	 * be NULL because the original memory allocation for the list
+	 * failed, which is not fatal but does mean we can't support
+	 * VF requests for MACVLAN because we couldn't allocate
+	 * memory for the list management required.
+	 */
+	if (!entry || !entry->free)
+		return -ENOSPC;
+
+	retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
+	if (retval < 0)
+		return retval;
+
+	entry->free = false;
+	entry->is_macvlan = true;
+	entry->vf = vf;
+	memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
+
+	return 0;
+}
+
+static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	struct vf_data_storage *vfinfo = &adapter->vfinfo[vf];
+	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+	u8 num_tcs = adapter->hw_tcs;
+	u32 reg_val;
+	u32 queue;
+
+	/* remove VLAN filters beloning to this VF */
+	ixgbe_clear_vf_vlans(adapter, vf);
+
+	/* add back PF assigned VLAN or VLAN 0 */
+	ixgbe_set_vf_vlan(adapter, true, vfinfo->pf_vlan, vf);
+
+	/* reset offloads to defaults */
+	ixgbe_set_vmolr(hw, vf, !vfinfo->pf_vlan);
+
+	/* set outgoing tags for VFs */
+	if (!vfinfo->pf_vlan && !vfinfo->pf_qos && !num_tcs) {
+		ixgbe_clear_vmvir(adapter, vf);
+	} else {
+		if (vfinfo->pf_qos || !num_tcs)
+			ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
+					vfinfo->pf_qos, vf);
+		else
+			ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
+					adapter->default_up, vf);
+
+		if (vfinfo->spoofchk_enabled) {
+			hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf);
+			hw->mac.ops.set_mac_anti_spoofing(hw, true, vf);
+		}
+	}
+
+	/* reset multicast table array for vf */
+	adapter->vfinfo[vf].num_vf_mc_hashes = 0;
+
+	/* clear any ipsec table info */
+	ixgbe_ipsec_vf_clear(adapter, vf);
+
+	/* Flush and reset the mta with the new values */
+	ixgbe_set_rx_mode(adapter->netdev);
+
+	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
+	ixgbe_set_vf_macvlan(adapter, vf, 0, NULL);
+
+	/* reset VF api back to unknown */
+	adapter->vfinfo[vf].vf_api = ixgbe_mbox_api_10;
+
+	/* Restart each queue for given VF */
+	for (queue = 0; queue < q_per_pool; queue++) {
+		unsigned int reg_idx = (vf * q_per_pool) + queue;
+
+		reg_val = IXGBE_READ_REG(hw, IXGBE_PVFTXDCTL(reg_idx));
+
+		/* Re-enabling only configured queues */
+		if (reg_val) {
+			reg_val |= IXGBE_TXDCTL_ENABLE;
+			IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val);
+			reg_val &= ~IXGBE_TXDCTL_ENABLE;
+			IXGBE_WRITE_REG(hw, IXGBE_PVFTXDCTL(reg_idx), reg_val);
+		}
+	}
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+static void ixgbe_vf_clear_mbx(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 word;
+
+	/* Clear VF's mailbox memory */
+	for (word = 0; word < IXGBE_VFMAILBOX_SIZE; word++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_PFMBMEM(vf), word, 0);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+static int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter,
+			    int vf, unsigned char *mac_addr)
+{
+	s32 retval;
+
+	ixgbe_del_mac_filter(adapter, adapter->vfinfo[vf].vf_mac_addresses, vf);
+	retval = ixgbe_add_mac_filter(adapter, mac_addr, vf);
+	if (retval >= 0)
+		memcpy(adapter->vfinfo[vf].vf_mac_addresses, mac_addr,
+		       ETH_ALEN);
+	else
+		eth_zero_addr(adapter->vfinfo[vf].vf_mac_addresses);
+
+	return retval;
+}
+
+int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
+{
+	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
+	unsigned int vfn = (event_mask & 0x3f);
+
+	bool enable = ((event_mask & 0x10000000U) != 0);
+
+	if (enable)
+		eth_zero_addr(adapter->vfinfo[vfn].vf_mac_addresses);
+
+	return 0;
+}
+
+static inline void ixgbe_write_qde(struct ixgbe_adapter *adapter, u32 vf,
+				   u32 qde)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+	int i;
+
+	for (i = vf * q_per_pool; i < ((vf + 1) * q_per_pool); i++) {
+		u32 reg;
+
+		/* flush previous write */
+		IXGBE_WRITE_FLUSH(hw);
+
+		/* indicate to hardware that we want to set drop enable */
+		reg = IXGBE_QDE_WRITE | qde;
+		reg |= i <<  IXGBE_QDE_IDX_SHIFT;
+		IXGBE_WRITE_REG(hw, IXGBE_QDE, reg);
+	}
+}
+
+/**
+ * ixgbe_set_vf_rx_tx - Set VF rx tx
+ * @adapter: Pointer to adapter struct
+ * @vf: VF identifier
+ *
+ * Set or reset correct transmit and receive for vf
+ **/
+static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf)
+{
+	u32 reg_cur_tx, reg_cur_rx, reg_req_tx, reg_req_rx;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg_offset, vf_shift;
+
+	vf_shift = vf % 32;
+	reg_offset = vf / 32;
+
+	reg_cur_tx = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset));
+	reg_cur_rx = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
+
+	if (adapter->vfinfo[vf].link_enable) {
+		reg_req_tx = reg_cur_tx | 1 << vf_shift;
+		reg_req_rx = reg_cur_rx | 1 << vf_shift;
+	} else {
+		reg_req_tx = reg_cur_tx & ~(1 << vf_shift);
+		reg_req_rx = reg_cur_rx & ~(1 << vf_shift);
+	}
+
+	/* The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs.
+	 * For more info take a look at ixgbe_set_vf_lpe
+	 */
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
+		struct net_device *dev = adapter->netdev;
+		int pf_max_frame = dev->mtu + ETH_HLEN;
+
+#if IS_ENABLED(CONFIG_FCOE)
+		if (dev->features & NETIF_F_FCOE_MTU)
+			pf_max_frame = max_t(int, pf_max_frame,
+					     IXGBE_FCOE_JUMBO_FRAME_SIZE);
+#endif /* CONFIG_FCOE */
+
+		if (pf_max_frame > ETH_FRAME_LEN)
+			reg_req_rx = reg_cur_rx & ~(1 << vf_shift);
+	}
+
+	/* Enable/Disable particular VF */
+	if (reg_cur_tx != reg_req_tx)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg_req_tx);
+	if (reg_cur_rx != reg_req_rx)
+		IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg_req_rx);
+}
+
+static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
+	u32 reg, reg_offset, vf_shift;
+	u32 msgbuf[4] = {0, 0, 0, 0};
+	u8 *addr = (u8 *)(&msgbuf[1]);
+	u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+	int i;
+
+	e_info(probe, "VF Reset msg received from vf %d\n", vf);
+
+	/* reset the filters for the device */
+	ixgbe_vf_reset_event(adapter, vf);
+
+	ixgbe_vf_clear_mbx(adapter, vf);
+
+	/* set vf mac address */
+	if (!is_zero_ether_addr(vf_mac))
+		ixgbe_set_vf_mac(adapter, vf, vf_mac);
+
+	vf_shift = vf % 32;
+	reg_offset = vf / 32;
+
+	/* force drop enable for all VF Rx queues */
+	reg = IXGBE_QDE_ENABLE;
+	if (adapter->vfinfo[vf].pf_vlan)
+		reg |= IXGBE_QDE_HIDE_VLAN;
+
+	ixgbe_write_qde(adapter, vf, reg);
+
+	ixgbe_set_vf_rx_tx(adapter, vf);
+
+	/* enable VF mailbox for further messages */
+	adapter->vfinfo[vf].clear_to_send = true;
+
+	/* Enable counting of spoofed packets in the SSVPC register */
+	reg = IXGBE_READ_REG(hw, IXGBE_VMECM(reg_offset));
+	reg |= BIT(vf_shift);
+	IXGBE_WRITE_REG(hw, IXGBE_VMECM(reg_offset), reg);
+
+	/*
+	 * Reset the VFs TDWBAL and TDWBAH registers
+	 * which are not cleared by an FLR
+	 */
+	for (i = 0; i < q_per_pool; i++) {
+		IXGBE_WRITE_REG(hw, IXGBE_PVFTDWBAHn(q_per_pool, vf, i), 0);
+		IXGBE_WRITE_REG(hw, IXGBE_PVFTDWBALn(q_per_pool, vf, i), 0);
+	}
+
+	/* reply to reset with ack and vf mac address */
+	msgbuf[0] = IXGBE_VF_RESET;
+	if (!is_zero_ether_addr(vf_mac) && adapter->vfinfo[vf].pf_set_mac) {
+		msgbuf[0] |= IXGBE_VT_MSGTYPE_ACK;
+		memcpy(addr, vf_mac, ETH_ALEN);
+	} else {
+		msgbuf[0] |= IXGBE_VT_MSGTYPE_NACK;
+	}
+
+	/*
+	 * Piggyback the multicast filter type so VF can compute the
+	 * correct vectors
+	 */
+	msgbuf[3] = hw->mac.mc_filter_type;
+	ixgbe_write_mbx(hw, msgbuf, IXGBE_VF_PERMADDR_MSG_LEN, vf);
+
+	return 0;
+}
+
+static int ixgbe_set_vf_mac_addr(struct ixgbe_adapter *adapter,
+				 u32 *msgbuf, u32 vf)
+{
+	u8 *new_mac = ((u8 *)(&msgbuf[1]));
+
+	if (!is_valid_ether_addr(new_mac)) {
+		e_warn(drv, "VF %d attempted to set invalid mac\n", vf);
+		return -1;
+	}
+
+	if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted &&
+	    !ether_addr_equal(adapter->vfinfo[vf].vf_mac_addresses, new_mac)) {
+		e_warn(drv,
+		       "VF %d attempted to override administratively set MAC address\n"
+		       "Reload the VF driver to resume operations\n",
+		       vf);
+		return -1;
+	}
+
+	return ixgbe_set_vf_mac(adapter, vf, new_mac) < 0;
+}
+
+static int ixgbe_set_vf_vlan_msg(struct ixgbe_adapter *adapter,
+				 u32 *msgbuf, u32 vf)
+{
+	u32 add = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >> IXGBE_VT_MSGINFO_SHIFT;
+	u32 vid = (msgbuf[1] & IXGBE_VLVF_VLANID_MASK);
+	u8 tcs = adapter->hw_tcs;
+
+	if (adapter->vfinfo[vf].pf_vlan || tcs) {
+		e_warn(drv,
+		       "VF %d attempted to override administratively set VLAN configuration\n"
+		       "Reload the VF driver to resume operations\n",
+		       vf);
+		return -1;
+	}
+
+	/* VLAN 0 is a special case, don't allow it to be removed */
+	if (!vid && !add)
+		return 0;
+
+	return ixgbe_set_vf_vlan(adapter, add, vid, vf);
+}
+
+static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter,
+				    u32 *msgbuf, u32 vf)
+{
+	u8 *new_mac = ((u8 *)(&msgbuf[1]));
+	int index = (msgbuf[0] & IXGBE_VT_MSGINFO_MASK) >>
+		    IXGBE_VT_MSGINFO_SHIFT;
+	int err;
+
+	if (adapter->vfinfo[vf].pf_set_mac && !adapter->vfinfo[vf].trusted &&
+	    index > 0) {
+		e_warn(drv,
+		       "VF %d requested MACVLAN filter but is administratively denied\n",
+		       vf);
+		return -1;
+	}
+
+	/* An non-zero index indicates the VF is setting a filter */
+	if (index) {
+		if (!is_valid_ether_addr(new_mac)) {
+			e_warn(drv, "VF %d attempted to set invalid mac\n", vf);
+			return -1;
+		}
+
+		/*
+		 * If the VF is allowed to set MAC filters then turn off
+		 * anti-spoofing to avoid false positives.
+		 */
+		if (adapter->vfinfo[vf].spoofchk_enabled) {
+			struct ixgbe_hw *hw = &adapter->hw;
+
+			hw->mac.ops.set_mac_anti_spoofing(hw, false, vf);
+			hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf);
+		}
+	}
+
+	err = ixgbe_set_vf_macvlan(adapter, vf, index, new_mac);
+	if (err == -ENOSPC)
+		e_warn(drv,
+		       "VF %d has requested a MACVLAN filter but there is no space for it\n",
+		       vf);
+
+	return err < 0;
+}
+
+static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
+				  u32 *msgbuf, u32 vf)
+{
+	int api = msgbuf[1];
+
+	switch (api) {
+	case ixgbe_mbox_api_10:
+	case ixgbe_mbox_api_11:
+	case ixgbe_mbox_api_12:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
+		adapter->vfinfo[vf].vf_api = api;
+		return 0;
+	default:
+		break;
+	}
+
+	e_info(drv, "VF %d requested invalid api version %u\n", vf, api);
+
+	return -1;
+}
+
+static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
+			       u32 *msgbuf, u32 vf)
+{
+	struct net_device *dev = adapter->netdev;
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	unsigned int default_tc = 0;
+	u8 num_tcs = adapter->hw_tcs;
+
+	/* verify the PF is supporting the correct APIs */
+	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_20:
+	case ixgbe_mbox_api_11:
+	case ixgbe_mbox_api_12:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
+		break;
+	default:
+		return -1;
+	}
+
+	/* only allow 1 Tx queue for bandwidth limiting */
+	msgbuf[IXGBE_VF_TX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask);
+	msgbuf[IXGBE_VF_RX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask);
+
+	/* if TCs > 1 determine which TC belongs to default user priority */
+	if (num_tcs > 1)
+		default_tc = netdev_get_prio_tc_map(dev, adapter->default_up);
+
+	/* notify VF of need for VLAN tag stripping, and correct queue */
+	if (num_tcs)
+		msgbuf[IXGBE_VF_TRANS_VLAN] = num_tcs;
+	else if (adapter->vfinfo[vf].pf_vlan || adapter->vfinfo[vf].pf_qos)
+		msgbuf[IXGBE_VF_TRANS_VLAN] = 1;
+	else
+		msgbuf[IXGBE_VF_TRANS_VLAN] = 0;
+
+	/* notify VF of default queue */
+	msgbuf[IXGBE_VF_DEF_QUEUE] = default_tc;
+
+	return 0;
+}
+
+static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	u32 i, j;
+	u32 *out_buf = &msgbuf[1];
+	const u8 *reta = adapter->rss_indir_tbl;
+	u32 reta_size = ixgbe_rss_indir_tbl_entries(adapter);
+
+	/* Check if operation is permitted */
+	if (!adapter->vfinfo[vf].rss_query_enabled)
+		return -EPERM;
+
+	/* verify the PF is supporting the correct API */
+	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_12:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	/* This mailbox command is supported (required) only for 82599 and x540
+	 * VFs which support up to 4 RSS queues. Therefore we will compress the
+	 * RETA by saving only 2 bits from each entry. This way we will be able
+	 * to transfer the whole RETA in a single mailbox operation.
+	 */
+	for (i = 0; i < reta_size / 16; i++) {
+		out_buf[i] = 0;
+		for (j = 0; j < 16; j++)
+			out_buf[i] |= (u32)(reta[16 * i + j] & 0x3) << (2 * j);
+	}
+
+	return 0;
+}
+
+static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter,
+				u32 *msgbuf, u32 vf)
+{
+	u32 *rss_key = &msgbuf[1];
+
+	/* Check if the operation is permitted */
+	if (!adapter->vfinfo[vf].rss_query_enabled)
+		return -EPERM;
+
+	/* verify the PF is supporting the correct API */
+	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_12:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	memcpy(rss_key, adapter->rss_key, IXGBE_RSS_KEY_SIZE);
+
+	return 0;
+}
+
+static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
+				      u32 *msgbuf, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int xcast_mode = msgbuf[1];
+	u32 vmolr, fctrl, disable, enable;
+
+	/* verify the PF is supporting the correct APIs */
+	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_12:
+		/* promisc introduced in 1.3 version */
+		if (xcast_mode == IXGBEVF_XCAST_MODE_PROMISC)
+			return -EOPNOTSUPP;
+		fallthrough;
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (xcast_mode > IXGBEVF_XCAST_MODE_MULTI &&
+	    !adapter->vfinfo[vf].trusted) {
+		xcast_mode = IXGBEVF_XCAST_MODE_MULTI;
+	}
+
+	if (adapter->vfinfo[vf].xcast_mode == xcast_mode)
+		goto out;
+
+	switch (xcast_mode) {
+	case IXGBEVF_XCAST_MODE_NONE:
+		disable = IXGBE_VMOLR_ROMPE |
+			  IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
+		enable = IXGBE_VMOLR_BAM;
+		break;
+	case IXGBEVF_XCAST_MODE_MULTI:
+		disable = IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
+		enable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE;
+		break;
+	case IXGBEVF_XCAST_MODE_ALLMULTI:
+		disable = IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
+		enable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_MPE;
+		break;
+	case IXGBEVF_XCAST_MODE_PROMISC:
+		if (hw->mac.type <= ixgbe_mac_82599EB)
+			return -EOPNOTSUPP;
+
+		fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
+		if (!(fctrl & IXGBE_FCTRL_UPE)) {
+			/* VF promisc requires PF in promisc */
+			e_warn(drv,
+			       "Enabling VF promisc requires PF in promisc\n");
+			return -EPERM;
+		}
+
+		disable = IXGBE_VMOLR_VPE;
+		enable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE |
+			 IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf));
+	vmolr &= ~disable;
+	vmolr |= enable;
+	IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr);
+
+	adapter->vfinfo[vf].xcast_mode = xcast_mode;
+
+out:
+	msgbuf[1] = xcast_mode;
+
+	return 0;
+}
+
+static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter,
+				   u32 *msgbuf, u32 vf)
+{
+	u32 *link_state = &msgbuf[1];
+
+	/* verify the PF is supporting the correct API */
+	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_12:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	*link_state = adapter->vfinfo[vf].link_enable;
+
+	return 0;
+}
+
+static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
+{
+	u32 mbx_size = IXGBE_VFMAILBOX_SIZE;
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
+	struct ixgbe_hw *hw = &adapter->hw;
+	s32 retval;
+
+	retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf);
+
+	if (retval) {
+		pr_err("Error receiving message from VF\n");
+		return retval;
+	}
+
+	/* this is a message we already processed, do nothing */
+	if (msgbuf[0] & (IXGBE_VT_MSGTYPE_ACK | IXGBE_VT_MSGTYPE_NACK))
+		return 0;
+
+	/* flush the ack before we write any messages back */
+	IXGBE_WRITE_FLUSH(hw);
+
+	if (msgbuf[0] == IXGBE_VF_RESET)
+		return ixgbe_vf_reset_msg(adapter, vf);
+
+	/*
+	 * until the vf completes a virtual function reset it should not be
+	 * allowed to start any configuration.
+	 */
+	if (!adapter->vfinfo[vf].clear_to_send) {
+		msgbuf[0] |= IXGBE_VT_MSGTYPE_NACK;
+		ixgbe_write_mbx(hw, msgbuf, 1, vf);
+		return 0;
+	}
+
+	switch ((msgbuf[0] & 0xFFFF)) {
+	case IXGBE_VF_SET_MAC_ADDR:
+		retval = ixgbe_set_vf_mac_addr(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_SET_MULTICAST:
+		retval = ixgbe_set_vf_multicasts(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_SET_VLAN:
+		retval = ixgbe_set_vf_vlan_msg(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_SET_LPE:
+		retval = ixgbe_set_vf_lpe(adapter, msgbuf[1], vf);
+		break;
+	case IXGBE_VF_SET_MACVLAN:
+		retval = ixgbe_set_vf_macvlan_msg(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_API_NEGOTIATE:
+		retval = ixgbe_negotiate_vf_api(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_GET_QUEUES:
+		retval = ixgbe_get_vf_queues(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_GET_RETA:
+		retval = ixgbe_get_vf_reta(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_GET_RSS_KEY:
+		retval = ixgbe_get_vf_rss_key(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_UPDATE_XCAST_MODE:
+		retval = ixgbe_update_vf_xcast_mode(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_GET_LINK_STATE:
+		retval = ixgbe_get_vf_link_state(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_IPSEC_ADD:
+		retval = ixgbe_ipsec_vf_add_sa(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_IPSEC_DEL:
+		retval = ixgbe_ipsec_vf_del_sa(adapter, msgbuf, vf);
+		break;
+	default:
+		e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]);
+		retval = IXGBE_ERR_MBX;
+		break;
+	}
+
+	/* notify the VF of the results of what it sent us */
+	if (retval)
+		msgbuf[0] |= IXGBE_VT_MSGTYPE_NACK;
+	else
+		msgbuf[0] |= IXGBE_VT_MSGTYPE_ACK;
+
+	msgbuf[0] |= IXGBE_VT_MSGTYPE_CTS;
+
+	ixgbe_write_mbx(hw, msgbuf, mbx_size, vf);
+
+	return retval;
+}
+
+static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 msg = IXGBE_VT_MSGTYPE_NACK;
+
+	/* if device isn't clear to send it shouldn't be reading either */
+	if (!adapter->vfinfo[vf].clear_to_send)
+		ixgbe_write_mbx(hw, &msg, 1, vf);
+}
+
+void ixgbe_msg_task(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned long flags;
+	u32 vf;
+
+	spin_lock_irqsave(&adapter->vfs_lock, flags);
+	for (vf = 0; vf < adapter->num_vfs; vf++) {
+		/* process any reset requests */
+		if (!ixgbe_check_for_rst(hw, vf))
+			ixgbe_vf_reset_event(adapter, vf);
+
+		/* process any messages pending */
+		if (!ixgbe_check_for_msg(hw, vf))
+			ixgbe_rcv_msg_from_vf(adapter, vf);
+
+		/* process any acks */
+		if (!ixgbe_check_for_ack(hw, vf))
+			ixgbe_rcv_ack_from_vf(adapter, vf);
+	}
+	spin_unlock_irqrestore(&adapter->vfs_lock, flags);
+}
+
+static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 ping;
+
+	ping = IXGBE_PF_CONTROL_MSG;
+	if (adapter->vfinfo[vf].clear_to_send)
+		ping |= IXGBE_VT_MSGTYPE_CTS;
+	ixgbe_write_mbx(hw, &ping, 1, vf);
+}
+
+void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 ping;
+	int i;
+
+	for (i = 0 ; i < adapter->num_vfs; i++) {
+		ping = IXGBE_PF_CONTROL_MSG;
+		if (adapter->vfinfo[i].clear_to_send)
+			ping |= IXGBE_VT_MSGTYPE_CTS;
+		ixgbe_write_mbx(hw, &ping, 1, i);
+	}
+}
+
+/**
+ * ixgbe_set_all_vfs - update vfs queues
+ * @adapter: Pointer to adapter struct
+ *
+ * Update setting transmit and receive queues for all vfs
+ **/
+void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0 ; i < adapter->num_vfs; i++)
+		ixgbe_set_vf_link_state(adapter, i,
+					adapter->vfinfo[i].link_state);
+}
+
+int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	s32 retval;
+
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	if (is_valid_ether_addr(mac)) {
+		dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n",
+			 mac, vf);
+		dev_info(&adapter->pdev->dev, "Reload the VF driver to make this change effective.");
+
+		retval = ixgbe_set_vf_mac(adapter, vf, mac);
+		if (retval >= 0) {
+			adapter->vfinfo[vf].pf_set_mac = true;
+
+			if (test_bit(__IXGBE_DOWN, &adapter->state)) {
+				dev_warn(&adapter->pdev->dev, "The VF MAC address has been set, but the PF device is not up.\n");
+				dev_warn(&adapter->pdev->dev, "Bring the PF device up before attempting to use the VF device.\n");
+			}
+		} else {
+			dev_warn(&adapter->pdev->dev, "The VF MAC address was NOT set due to invalid or duplicate MAC address.\n");
+		}
+	} else if (is_zero_ether_addr(mac)) {
+		unsigned char *vf_mac_addr =
+					   adapter->vfinfo[vf].vf_mac_addresses;
+
+		/* nothing to do */
+		if (is_zero_ether_addr(vf_mac_addr))
+			return 0;
+
+		dev_info(&adapter->pdev->dev, "removing MAC on VF %d\n", vf);
+
+		retval = ixgbe_del_mac_filter(adapter, vf_mac_addr, vf);
+		if (retval >= 0) {
+			adapter->vfinfo[vf].pf_set_mac = false;
+			memcpy(vf_mac_addr, mac, ETH_ALEN);
+		} else {
+			dev_warn(&adapter->pdev->dev, "Could NOT remove the VF MAC address.\n");
+		}
+	} else {
+		retval = -EINVAL;
+	}
+
+	return retval;
+}
+
+static int ixgbe_enable_port_vlan(struct ixgbe_adapter *adapter, int vf,
+				  u16 vlan, u8 qos)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	err = ixgbe_set_vf_vlan(adapter, true, vlan, vf);
+	if (err)
+		goto out;
+
+	/* Revoke tagless access via VLAN 0 */
+	ixgbe_set_vf_vlan(adapter, false, 0, vf);
+
+	ixgbe_set_vmvir(adapter, vlan, qos, vf);
+	ixgbe_set_vmolr(hw, vf, false);
+
+	/* enable hide vlan on X550 */
+	if (hw->mac.type >= ixgbe_mac_X550)
+		ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE |
+				IXGBE_QDE_HIDE_VLAN);
+
+	adapter->vfinfo[vf].pf_vlan = vlan;
+	adapter->vfinfo[vf].pf_qos = qos;
+	dev_info(&adapter->pdev->dev,
+		 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
+	if (test_bit(__IXGBE_DOWN, &adapter->state)) {
+		dev_warn(&adapter->pdev->dev,
+			 "The VF VLAN has been set, but the PF device is not up.\n");
+		dev_warn(&adapter->pdev->dev,
+			 "Bring the PF device up before attempting to use the VF device.\n");
+	}
+
+out:
+	return err;
+}
+
+static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	err = ixgbe_set_vf_vlan(adapter, false,
+				adapter->vfinfo[vf].pf_vlan, vf);
+	/* Restore tagless access via VLAN 0 */
+	ixgbe_set_vf_vlan(adapter, true, 0, vf);
+	ixgbe_clear_vmvir(adapter, vf);
+	ixgbe_set_vmolr(hw, vf, true);
+
+	/* disable hide VLAN on X550 */
+	if (hw->mac.type >= ixgbe_mac_X550)
+		ixgbe_write_qde(adapter, vf, IXGBE_QDE_ENABLE);
+
+	adapter->vfinfo[vf].pf_vlan = 0;
+	adapter->vfinfo[vf].pf_qos = 0;
+
+	return err;
+}
+
+int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+			  u8 qos, __be16 vlan_proto)
+{
+	int err = 0;
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
+		return -EINVAL;
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+	if (vlan || qos) {
+		/* Check if there is already a port VLAN set, if so
+		 * we have to delete the old one first before we
+		 * can set the new one.  The usage model had
+		 * previously assumed the user would delete the
+		 * old port VLAN before setting a new one but this
+		 * is not necessarily the case.
+		 */
+		if (adapter->vfinfo[vf].pf_vlan)
+			err = ixgbe_disable_port_vlan(adapter, vf);
+		if (err)
+			goto out;
+		err = ixgbe_enable_port_vlan(adapter, vf, vlan, qos);
+	} else {
+		err = ixgbe_disable_port_vlan(adapter, vf);
+	}
+
+out:
+	return err;
+}
+
+int ixgbe_link_mbps(struct ixgbe_adapter *adapter)
+{
+	switch (adapter->link_speed) {
+	case IXGBE_LINK_SPEED_100_FULL:
+		return 100;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		return 1000;
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		return 10000;
+	default:
+		return 0;
+	}
+}
+
+static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf)
+{
+	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 bcnrc_val = 0;
+	u16 queue, queues_per_pool;
+	u16 tx_rate = adapter->vfinfo[vf].tx_rate;
+
+	if (tx_rate) {
+		/* start with base link speed value */
+		bcnrc_val = adapter->vf_rate_link_speed;
+
+		/* Calculate the rate factor values to set */
+		bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT;
+		bcnrc_val /= tx_rate;
+
+		/* clear everything but the rate factor */
+		bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK |
+			     IXGBE_RTTBCNRC_RF_DEC_MASK;
+
+		/* enable the rate scheduler */
+		bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA;
+	}
+
+	/*
+	 * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
+	 * register. Typically MMW_SIZE=0x014 if 9728-byte jumbo is supported
+	 * and 0x004 otherwise.
+	 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_82599EB:
+		IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRM, 0x4);
+		break;
+	case ixgbe_mac_X540:
+		IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRM, 0x14);
+		break;
+	default:
+		break;
+	}
+
+	/* determine how many queues per pool based on VMDq mask */
+	queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+
+	/* write value for all Tx queues belonging to VF */
+	for (queue = 0; queue < queues_per_pool; queue++) {
+		unsigned int reg_idx = (vf * queues_per_pool) + queue;
+
+		IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, reg_idx);
+		IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val);
+	}
+}
+
+void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	/* VF Tx rate limit was not set */
+	if (!adapter->vf_rate_link_speed)
+		return;
+
+	if (ixgbe_link_mbps(adapter) != adapter->vf_rate_link_speed) {
+		adapter->vf_rate_link_speed = 0;
+		dev_info(&adapter->pdev->dev,
+			 "Link speed has been changed. VF Transmit rate is disabled\n");
+	}
+
+	for (i = 0; i < adapter->num_vfs; i++) {
+		if (!adapter->vf_rate_link_speed)
+			adapter->vfinfo[i].tx_rate = 0;
+
+		ixgbe_set_vf_rate_limit(adapter, i);
+	}
+}
+
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int link_speed;
+
+	/* verify VF is active */
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	/* verify link is up */
+	if (!adapter->link_up)
+		return -EINVAL;
+
+	/* verify we are linked at 10Gbps */
+	link_speed = ixgbe_link_mbps(adapter);
+	if (link_speed != 10000)
+		return -EINVAL;
+
+	if (min_tx_rate)
+		return -EINVAL;
+
+	/* rate limit cannot be less than 10Mbs or greater than link speed */
+	if (max_tx_rate && ((max_tx_rate <= 10) || (max_tx_rate > link_speed)))
+		return -EINVAL;
+
+	/* store values */
+	adapter->vf_rate_link_speed = link_speed;
+	adapter->vfinfo[vf].tx_rate = max_tx_rate;
+
+	/* update hardware configuration */
+	ixgbe_set_vf_rate_limit(adapter, vf);
+
+	return 0;
+}
+
+int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	adapter->vfinfo[vf].spoofchk_enabled = setting;
+
+	/* configure MAC spoofing */
+	hw->mac.ops.set_mac_anti_spoofing(hw, setting, vf);
+
+	/* configure VLAN spoofing */
+	hw->mac.ops.set_vlan_anti_spoofing(hw, setting, vf);
+
+	/* Ensure LLDP and FC is set for Ethertype Antispoofing if we will be
+	 * calling set_ethertype_anti_spoofing for each VF in loop below
+	 */
+	if (hw->mac.ops.set_ethertype_anti_spoofing) {
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_LLDP),
+				(IXGBE_ETQF_FILTER_EN    |
+				 IXGBE_ETQF_TX_ANTISPOOF |
+				 ETH_P_LLDP));
+
+		IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FC),
+				(IXGBE_ETQF_FILTER_EN |
+				 IXGBE_ETQF_TX_ANTISPOOF |
+				 ETH_P_PAUSE));
+
+		hw->mac.ops.set_ethertype_anti_spoofing(hw, setting, vf);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_set_vf_link_state - Set link state
+ * @adapter: Pointer to adapter struct
+ * @vf: VF identifier
+ * @state: required link state
+ *
+ * Set a link force state on/off a single vf
+ **/
+void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state)
+{
+	adapter->vfinfo[vf].link_state = state;
+
+	switch (state) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		if (test_bit(__IXGBE_DOWN, &adapter->state))
+			adapter->vfinfo[vf].link_enable = false;
+		else
+			adapter->vfinfo[vf].link_enable = true;
+		break;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		adapter->vfinfo[vf].link_enable = true;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		adapter->vfinfo[vf].link_enable = false;
+		break;
+	}
+
+	ixgbe_set_vf_rx_tx(adapter, vf);
+
+	/* restart the VF */
+	adapter->vfinfo[vf].clear_to_send = false;
+	ixgbe_ping_vf(adapter, vf);
+}
+
+/**
+ * ixgbe_ndo_set_vf_link_state - Set link state
+ * @netdev: network interface device structure
+ * @vf: VF identifier
+ * @state: required link state
+ *
+ * Set the link state of a specified VF, regardless of physical link state
+ **/
+int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	if (vf < 0 || vf >= adapter->num_vfs) {
+		dev_err(&adapter->pdev->dev,
+			"NDO set VF link - invalid VF identifier %d\n", vf);
+		return -EINVAL;
+	}
+
+	switch (state) {
+	case IFLA_VF_LINK_STATE_ENABLE:
+		dev_info(&adapter->pdev->dev,
+			 "NDO set VF %d link state %d - not supported\n",
+			vf, state);
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		dev_info(&adapter->pdev->dev,
+			 "NDO set VF %d link state disable\n", vf);
+		ixgbe_set_vf_link_state(adapter, vf, state);
+		break;
+	case IFLA_VF_LINK_STATE_AUTO:
+		dev_info(&adapter->pdev->dev,
+			 "NDO set VF %d link state auto\n", vf);
+		ixgbe_set_vf_link_state(adapter, vf, state);
+		break;
+	default:
+		dev_err(&adapter->pdev->dev,
+			"NDO set VF %d - invalid link state %d\n", vf, state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
+				  bool setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	/* This operation is currently supported only for 82599 and x540
+	 * devices.
+	 */
+	if (adapter->hw.mac.type < ixgbe_mac_82599EB ||
+	    adapter->hw.mac.type >= ixgbe_mac_X550)
+		return -EOPNOTSUPP;
+
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	adapter->vfinfo[vf].rss_query_enabled = setting;
+
+	return 0;
+}
+
+int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+
+	/* nothing to do */
+	if (adapter->vfinfo[vf].trusted == setting)
+		return 0;
+
+	adapter->vfinfo[vf].trusted = setting;
+
+	/* reset VF to reconfigure features */
+	adapter->vfinfo[vf].clear_to_send = false;
+	ixgbe_ping_vf(adapter, vf);
+
+	e_info(drv, "VF %u is %strusted\n", vf, setting ? "" : "not ");
+
+	return 0;
+}
+
+int ixgbe_ndo_get_vf_config(struct net_device *netdev,
+			    int vf, struct ifla_vf_info *ivi)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	if (vf >= adapter->num_vfs)
+		return -EINVAL;
+	ivi->vf = vf;
+	memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
+	ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate;
+	ivi->min_tx_rate = 0;
+	ivi->vlan = adapter->vfinfo[vf].pf_vlan;
+	ivi->qos = adapter->vfinfo[vf].pf_qos;
+	ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled;
+	ivi->rss_query_en = adapter->vfinfo[vf].rss_query_enabled;
+	ivi->trusted = adapter->vfinfo[vf].trusted;
+	return 0;
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
new file mode 100644
index 0000000000..0690ecb8df
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_SRIOV_H_
+#define _IXGBE_SRIOV_H_
+
+/*  ixgbe driver limit the max number of VFs could be enabled to
+ *  63 (IXGBE_MAX_VF_FUNCTIONS - 1)
+ */
+#define IXGBE_MAX_VFS_DRV_LIMIT  (IXGBE_MAX_VF_FUNCTIONS - 1)
+#define IXGBE_MAX_VFS_1TC		IXGBE_MAX_VF_FUNCTIONS
+#define IXGBE_MAX_VFS_4TC		32
+#define IXGBE_MAX_VFS_8TC		16
+
+#ifdef CONFIG_PCI_IOV
+void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
+#endif
+void ixgbe_msg_task(struct ixgbe_adapter *adapter);
+int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
+void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
+void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter);
+int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
+int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
+			   u8 qos, __be16 vlan_proto);
+int ixgbe_link_mbps(struct ixgbe_adapter *adapter);
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate);
+int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
+				  bool setting);
+int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting);
+int ixgbe_ndo_get_vf_config(struct net_device *netdev,
+			    int vf, struct ifla_vf_info *ivi);
+int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state);
+void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
+void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state);
+int ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
+#ifdef CONFIG_PCI_IOV
+void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs);
+#endif
+int ixgbe_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
+
+static inline void ixgbe_set_vmvir(struct ixgbe_adapter *adapter,
+				   u16 vid, u16 qos, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vmvir = vid | (qos << VLAN_PRIO_SHIFT) | IXGBE_VMVIR_VLANA_DEFAULT;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), vmvir);
+}
+
+#endif /* _IXGBE_SRIOV_H_ */
+
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
new file mode 100644
index 0000000000..204844288c
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe.h"
+#include "ixgbe_common.h"
+#include "ixgbe_type.h"
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sysfs.h>
+#include <linux/kobject.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/hwmon.h>
+
+/* hwmon callback functions */
+static ssize_t ixgbe_hwmon_show_location(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct hwmon_attr *ixgbe_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	return sprintf(buf, "loc%u\n",
+		       ixgbe_attr->sensor->location);
+}
+
+static ssize_t ixgbe_hwmon_show_temp(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct hwmon_attr *ixgbe_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	unsigned int value;
+
+	/* reset the temp field */
+	ixgbe_attr->hw->mac.ops.get_thermal_sensor_data(ixgbe_attr->hw);
+
+	value = ixgbe_attr->sensor->temp;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t ixgbe_hwmon_show_cautionthresh(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct hwmon_attr *ixgbe_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	unsigned int value = ixgbe_attr->sensor->caution_thresh;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t ixgbe_hwmon_show_maxopthresh(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct hwmon_attr *ixgbe_attr = container_of(attr, struct hwmon_attr,
+						     dev_attr);
+	unsigned int value = ixgbe_attr->sensor->max_op_thresh;
+
+	/* display millidegree */
+	value *= 1000;
+
+	return sprintf(buf, "%u\n", value);
+}
+
+/**
+ * ixgbe_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file.
+ * @adapter: pointer to the adapter structure
+ * @offset: offset in the eeprom sensor data table
+ * @type: type of sensor data to display
+ *
+ * For each file we want in hwmon's sysfs interface we need a device_attribute
+ * This is included in our hwmon_attr struct that contains the references to
+ * the data structures we need to get the data to display.
+ */
+static int ixgbe_add_hwmon_attr(struct ixgbe_adapter *adapter,
+				unsigned int offset, int type) {
+	int rc;
+	unsigned int n_attr;
+	struct hwmon_attr *ixgbe_attr;
+
+	n_attr = adapter->ixgbe_hwmon_buff->n_hwmon;
+	ixgbe_attr = &adapter->ixgbe_hwmon_buff->hwmon_list[n_attr];
+
+	switch (type) {
+	case IXGBE_HWMON_TYPE_LOC:
+		ixgbe_attr->dev_attr.show = ixgbe_hwmon_show_location;
+		snprintf(ixgbe_attr->name, sizeof(ixgbe_attr->name),
+			 "temp%u_label", offset + 1);
+		break;
+	case IXGBE_HWMON_TYPE_TEMP:
+		ixgbe_attr->dev_attr.show = ixgbe_hwmon_show_temp;
+		snprintf(ixgbe_attr->name, sizeof(ixgbe_attr->name),
+			 "temp%u_input", offset + 1);
+		break;
+	case IXGBE_HWMON_TYPE_CAUTION:
+		ixgbe_attr->dev_attr.show = ixgbe_hwmon_show_cautionthresh;
+		snprintf(ixgbe_attr->name, sizeof(ixgbe_attr->name),
+			 "temp%u_max", offset + 1);
+		break;
+	case IXGBE_HWMON_TYPE_MAX:
+		ixgbe_attr->dev_attr.show = ixgbe_hwmon_show_maxopthresh;
+		snprintf(ixgbe_attr->name, sizeof(ixgbe_attr->name),
+			 "temp%u_crit", offset + 1);
+		break;
+	default:
+		rc = -EPERM;
+		return rc;
+	}
+
+	/* These always the same regardless of type */
+	ixgbe_attr->sensor =
+		&adapter->hw.mac.thermal_sensor_data.sensor[offset];
+	ixgbe_attr->hw = &adapter->hw;
+	ixgbe_attr->dev_attr.store = NULL;
+	ixgbe_attr->dev_attr.attr.mode = 0444;
+	ixgbe_attr->dev_attr.attr.name = ixgbe_attr->name;
+	sysfs_attr_init(&ixgbe_attr->dev_attr.attr);
+
+	adapter->ixgbe_hwmon_buff->attrs[n_attr] = &ixgbe_attr->dev_attr.attr;
+
+	++adapter->ixgbe_hwmon_buff->n_hwmon;
+
+	return 0;
+}
+
+static void ixgbe_sysfs_del_adapter(struct ixgbe_adapter *adapter)
+{
+}
+
+/* called from ixgbe_main.c */
+void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter)
+{
+	ixgbe_sysfs_del_adapter(adapter);
+}
+
+/* called from ixgbe_main.c */
+int ixgbe_sysfs_init(struct ixgbe_adapter *adapter)
+{
+	struct hwmon_buff *ixgbe_hwmon;
+	struct device *hwmon_dev;
+	unsigned int i;
+	int rc = 0;
+
+	/* If this method isn't defined we don't support thermals */
+	if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL) {
+		goto exit;
+	}
+
+	/* Don't create thermal hwmon interface if no sensors present */
+	if (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw))
+		goto exit;
+
+	ixgbe_hwmon = devm_kzalloc(&adapter->pdev->dev, sizeof(*ixgbe_hwmon),
+				   GFP_KERNEL);
+	if (ixgbe_hwmon == NULL) {
+		rc = -ENOMEM;
+		goto exit;
+	}
+	adapter->ixgbe_hwmon_buff = ixgbe_hwmon;
+
+	for (i = 0; i < IXGBE_MAX_SENSORS; i++) {
+		/*
+		 * Only create hwmon sysfs entries for sensors that have
+		 * meaningful data for.
+		 */
+		if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0)
+			continue;
+
+		/* Bail if any hwmon attr struct fails to initialize */
+		rc = ixgbe_add_hwmon_attr(adapter, i, IXGBE_HWMON_TYPE_CAUTION);
+		if (rc)
+			goto exit;
+		rc = ixgbe_add_hwmon_attr(adapter, i, IXGBE_HWMON_TYPE_LOC);
+		if (rc)
+			goto exit;
+		rc = ixgbe_add_hwmon_attr(adapter, i, IXGBE_HWMON_TYPE_TEMP);
+		if (rc)
+			goto exit;
+		rc = ixgbe_add_hwmon_attr(adapter, i, IXGBE_HWMON_TYPE_MAX);
+		if (rc)
+			goto exit;
+	}
+
+	ixgbe_hwmon->groups[0] = &ixgbe_hwmon->group;
+	ixgbe_hwmon->group.attrs = ixgbe_hwmon->attrs;
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(&adapter->pdev->dev,
+							   "ixgbe",
+							   ixgbe_hwmon,
+							   ixgbe_hwmon->groups);
+	if (IS_ERR(hwmon_dev))
+		rc = PTR_ERR(hwmon_dev);
+exit:
+	return rc;
+}
+
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
new file mode 100644
index 0000000000..f1f69ce674
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Intel Corporation. */
+
+#ifndef _IXGBE_TXRX_COMMON_H_
+#define _IXGBE_TXRX_COMMON_H_
+
+#define IXGBE_XDP_PASS		0
+#define IXGBE_XDP_CONSUMED	BIT(0)
+#define IXGBE_XDP_TX		BIT(1)
+#define IXGBE_XDP_REDIR		BIT(2)
+#define IXGBE_XDP_EXIT		BIT(3)
+
+#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
+		       IXGBE_TXD_CMD_RS)
+
+int ixgbe_xmit_xdp_ring(struct ixgbe_ring *ring,
+			struct xdp_frame *xdpf);
+bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
+			   union ixgbe_adv_rx_desc *rx_desc,
+			   struct sk_buff *skb);
+void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
+			      union ixgbe_adv_rx_desc *rx_desc,
+			      struct sk_buff *skb);
+void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
+		  struct sk_buff *skb);
+void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring);
+void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring);
+void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, u64 qmask);
+
+void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring);
+void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
+
+struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter,
+				     struct ixgbe_ring *ring);
+int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter,
+			 struct xsk_buff_pool *pool,
+			 u16 qid);
+
+bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
+int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
+			  struct ixgbe_ring *rx_ring,
+			  const int budget);
+void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
+bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
+			    struct ixgbe_ring *tx_ring, int napi_budget);
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
+
+#endif /* #define _IXGBE_TXRX_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
new file mode 100644
index 0000000000..2b00db92b0
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -0,0 +1,3816 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_TYPE_H_
+#define _IXGBE_TYPE_H_
+
+#include <linux/types.h>
+#include <linux/mdio.h>
+#include <linux/netdevice.h>
+
+/* Device IDs */
+#define IXGBE_DEV_ID_82598               0x10B6
+#define IXGBE_DEV_ID_82598_BX            0x1508
+#define IXGBE_DEV_ID_82598AF_DUAL_PORT   0x10C6
+#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7
+#define IXGBE_DEV_ID_82598EB_SFP_LOM     0x10DB
+#define IXGBE_DEV_ID_82598AT             0x10C8
+#define IXGBE_DEV_ID_82598AT2            0x150B
+#define IXGBE_DEV_ID_82598EB_CX4         0x10DD
+#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC
+#define IXGBE_DEV_ID_82598_DA_DUAL_PORT  0x10F1
+#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM      0x10E1
+#define IXGBE_DEV_ID_82598EB_XF_LR       0x10F4
+#define IXGBE_DEV_ID_82599_KX4           0x10F7
+#define IXGBE_DEV_ID_82599_KX4_MEZZ      0x1514
+#define IXGBE_DEV_ID_82599_KR            0x1517
+#define IXGBE_DEV_ID_82599_T3_LOM        0x151C
+#define IXGBE_DEV_ID_82599_CX4           0x10F9
+#define IXGBE_DEV_ID_82599_SFP           0x10FB
+#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE       0x152a
+#define IXGBE_DEV_ID_82599_SFP_FCOE      0x1529
+#define IXGBE_SUBDEV_ID_82599_SFP        0x11A9
+#define IXGBE_SUBDEV_ID_82599_SFP_WOL0   0x1071
+#define IXGBE_SUBDEV_ID_82599_RNDC       0x1F72
+#define IXGBE_SUBDEV_ID_82599_560FLR     0x17D0
+#define IXGBE_SUBDEV_ID_82599_SP_560FLR  0x211B
+#define IXGBE_SUBDEV_ID_82599_LOM_SNAP6		0x2159
+#define IXGBE_SUBDEV_ID_82599_SFP_1OCP		0x000D
+#define IXGBE_SUBDEV_ID_82599_SFP_2OCP		0x0008
+#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM1	0x8976
+#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM2	0x06EE
+#define IXGBE_SUBDEV_ID_82599_ECNA_DP    0x0470
+#define IXGBE_DEV_ID_82599_SFP_EM        0x1507
+#define IXGBE_DEV_ID_82599_SFP_SF2       0x154D
+#define IXGBE_DEV_ID_82599EN_SFP         0x1557
+#define IXGBE_SUBDEV_ID_82599EN_SFP_OCP1 0x0001
+#define IXGBE_DEV_ID_82599_XAUI_LOM      0x10FC
+#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8
+#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ  0x000C
+#define IXGBE_DEV_ID_82599_LS            0x154F
+#define IXGBE_DEV_ID_X540T               0x1528
+#define IXGBE_DEV_ID_82599_SFP_SF_QP     0x154A
+#define IXGBE_DEV_ID_82599_QSFP_SF_QP    0x1558
+#define IXGBE_DEV_ID_X540T1              0x1560
+
+#define IXGBE_DEV_ID_X550T		0x1563
+#define IXGBE_DEV_ID_X550T1		0x15D1
+#define IXGBE_DEV_ID_X550EM_X_KX4	0x15AA
+#define IXGBE_DEV_ID_X550EM_X_KR	0x15AB
+#define IXGBE_DEV_ID_X550EM_X_SFP	0x15AC
+#define IXGBE_DEV_ID_X550EM_X_10G_T	0x15AD
+#define IXGBE_DEV_ID_X550EM_X_1G_T	0x15AE
+#define IXGBE_DEV_ID_X550EM_X_XFI	0x15B0
+#define IXGBE_DEV_ID_X550EM_A_KR	0x15C2
+#define IXGBE_DEV_ID_X550EM_A_KR_L	0x15C3
+#define IXGBE_DEV_ID_X550EM_A_SFP_N	0x15C4
+#define IXGBE_DEV_ID_X550EM_A_SGMII	0x15C6
+#define IXGBE_DEV_ID_X550EM_A_SGMII_L	0x15C7
+#define IXGBE_DEV_ID_X550EM_A_10G_T	0x15C8
+#define IXGBE_DEV_ID_X550EM_A_SFP	0x15CE
+#define IXGBE_DEV_ID_X550EM_A_1G_T	0x15E4
+#define IXGBE_DEV_ID_X550EM_A_1G_T_L	0x15E5
+
+/* VF Device IDs */
+#define IXGBE_DEV_ID_82599_VF		0x10ED
+#define IXGBE_DEV_ID_X540_VF		0x1515
+#define IXGBE_DEV_ID_X550_VF		0x1565
+#define IXGBE_DEV_ID_X550EM_X_VF	0x15A8
+#define IXGBE_DEV_ID_X550EM_A_VF	0x15C5
+
+#define IXGBE_CAT(r, m)	IXGBE_##r##_##m
+
+#define IXGBE_BY_MAC(_hw, r)	((_hw)->mvals[IXGBE_CAT(r, IDX)])
+
+/* General Registers */
+#define IXGBE_CTRL      0x00000
+#define IXGBE_STATUS    0x00008
+#define IXGBE_CTRL_EXT  0x00018
+#define IXGBE_ESDP      0x00020
+#define IXGBE_EODSDP    0x00028
+
+#define IXGBE_I2CCTL_8259X	0x00028
+#define IXGBE_I2CCTL_X540	IXGBE_I2CCTL_8259X
+#define IXGBE_I2CCTL_X550	0x15F5C
+#define IXGBE_I2CCTL_X550EM_x	IXGBE_I2CCTL_X550
+#define IXGBE_I2CCTL_X550EM_a	IXGBE_I2CCTL_X550
+#define IXGBE_I2CCTL(_hw)	IXGBE_BY_MAC((_hw), I2CCTL)
+
+#define IXGBE_LEDCTL    0x00200
+#define IXGBE_FRTIMER   0x00048
+#define IXGBE_TCPTIMER  0x0004C
+#define IXGBE_CORESPARE 0x00600
+#define IXGBE_EXVET     0x05078
+
+/* NVM Registers */
+#define IXGBE_EEC_8259X		0x10010
+#define IXGBE_EEC_X540		IXGBE_EEC_8259X
+#define IXGBE_EEC_X550		IXGBE_EEC_8259X
+#define IXGBE_EEC_X550EM_x	IXGBE_EEC_8259X
+#define IXGBE_EEC_X550EM_a	0x15FF8
+#define IXGBE_EEC(_hw)		IXGBE_BY_MAC((_hw), EEC)
+#define IXGBE_EERD      0x10014
+#define IXGBE_EEWR      0x10018
+#define IXGBE_FLA_8259X		0x1001C
+#define IXGBE_FLA_X540		IXGBE_FLA_8259X
+#define IXGBE_FLA_X550		IXGBE_FLA_8259X
+#define IXGBE_FLA_X550EM_x	IXGBE_FLA_8259X
+#define IXGBE_FLA_X550EM_a	0x15F68
+#define IXGBE_FLA(_hw)		IXGBE_BY_MAC((_hw), FLA)
+#define IXGBE_EEMNGCTL  0x10110
+#define IXGBE_EEMNGDATA 0x10114
+#define IXGBE_FLMNGCTL  0x10118
+#define IXGBE_FLMNGDATA 0x1011C
+#define IXGBE_FLMNGCNT  0x10120
+#define IXGBE_FLOP      0x1013C
+#define IXGBE_GRC_8259X		0x10200
+#define IXGBE_GRC_X540		IXGBE_GRC_8259X
+#define IXGBE_GRC_X550		IXGBE_GRC_8259X
+#define IXGBE_GRC_X550EM_x	IXGBE_GRC_8259X
+#define IXGBE_GRC_X550EM_a	0x15F64
+#define IXGBE_GRC(_hw)		IXGBE_BY_MAC((_hw), GRC)
+
+/* General Receive Control */
+#define IXGBE_GRC_MNG  0x00000001 /* Manageability Enable */
+#define IXGBE_GRC_APME 0x00000002 /* APM enabled in EEPROM */
+
+#define IXGBE_VPDDIAG0  0x10204
+#define IXGBE_VPDDIAG1  0x10208
+
+/* I2CCTL Bit Masks */
+#define IXGBE_I2C_CLK_IN_8259X		0x00000001
+#define IXGBE_I2C_CLK_IN_X540		IXGBE_I2C_CLK_IN_8259X
+#define IXGBE_I2C_CLK_IN_X550		0x00004000
+#define IXGBE_I2C_CLK_IN_X550EM_x	IXGBE_I2C_CLK_IN_X550
+#define IXGBE_I2C_CLK_IN_X550EM_a	IXGBE_I2C_CLK_IN_X550
+#define IXGBE_I2C_CLK_IN(_hw)		IXGBE_BY_MAC((_hw), I2C_CLK_IN)
+
+#define IXGBE_I2C_CLK_OUT_8259X		0x00000002
+#define IXGBE_I2C_CLK_OUT_X540		IXGBE_I2C_CLK_OUT_8259X
+#define IXGBE_I2C_CLK_OUT_X550		0x00000200
+#define IXGBE_I2C_CLK_OUT_X550EM_x	IXGBE_I2C_CLK_OUT_X550
+#define IXGBE_I2C_CLK_OUT_X550EM_a	IXGBE_I2C_CLK_OUT_X550
+#define IXGBE_I2C_CLK_OUT(_hw)		IXGBE_BY_MAC((_hw), I2C_CLK_OUT)
+
+#define IXGBE_I2C_DATA_IN_8259X		0x00000004
+#define IXGBE_I2C_DATA_IN_X540		IXGBE_I2C_DATA_IN_8259X
+#define IXGBE_I2C_DATA_IN_X550		0x00001000
+#define IXGBE_I2C_DATA_IN_X550EM_x	IXGBE_I2C_DATA_IN_X550
+#define IXGBE_I2C_DATA_IN_X550EM_a	IXGBE_I2C_DATA_IN_X550
+#define IXGBE_I2C_DATA_IN(_hw)		IXGBE_BY_MAC((_hw), I2C_DATA_IN)
+
+#define IXGBE_I2C_DATA_OUT_8259X	0x00000008
+#define IXGBE_I2C_DATA_OUT_X540		IXGBE_I2C_DATA_OUT_8259X
+#define IXGBE_I2C_DATA_OUT_X550		0x00000400
+#define IXGBE_I2C_DATA_OUT_X550EM_x	IXGBE_I2C_DATA_OUT_X550
+#define IXGBE_I2C_DATA_OUT_X550EM_a	IXGBE_I2C_DATA_OUT_X550
+#define IXGBE_I2C_DATA_OUT(_hw)		IXGBE_BY_MAC((_hw), I2C_DATA_OUT)
+
+#define IXGBE_I2C_DATA_OE_N_EN_8259X	0
+#define IXGBE_I2C_DATA_OE_N_EN_X540	IXGBE_I2C_DATA_OE_N_EN_8259X
+#define IXGBE_I2C_DATA_OE_N_EN_X550	0x00000800
+#define IXGBE_I2C_DATA_OE_N_EN_X550EM_x	IXGBE_I2C_DATA_OE_N_EN_X550
+#define IXGBE_I2C_DATA_OE_N_EN_X550EM_a	IXGBE_I2C_DATA_OE_N_EN_X550
+#define IXGBE_I2C_DATA_OE_N_EN(_hw)	IXGBE_BY_MAC((_hw), I2C_DATA_OE_N_EN)
+
+#define IXGBE_I2C_BB_EN_8259X		0
+#define IXGBE_I2C_BB_EN_X540		IXGBE_I2C_BB_EN_8259X
+#define IXGBE_I2C_BB_EN_X550		0x00000100
+#define IXGBE_I2C_BB_EN_X550EM_x	IXGBE_I2C_BB_EN_X550
+#define IXGBE_I2C_BB_EN_X550EM_a	IXGBE_I2C_BB_EN_X550
+#define IXGBE_I2C_BB_EN(_hw)		IXGBE_BY_MAC((_hw), I2C_BB_EN)
+
+#define IXGBE_I2C_CLK_OE_N_EN_8259X	0
+#define IXGBE_I2C_CLK_OE_N_EN_X540	IXGBE_I2C_CLK_OE_N_EN_8259X
+#define IXGBE_I2C_CLK_OE_N_EN_X550	0x00002000
+#define IXGBE_I2C_CLK_OE_N_EN_X550EM_x	IXGBE_I2C_CLK_OE_N_EN_X550
+#define IXGBE_I2C_CLK_OE_N_EN_X550EM_a	IXGBE_I2C_CLK_OE_N_EN_X550
+#define IXGBE_I2C_CLK_OE_N_EN(_hw)	 IXGBE_BY_MAC((_hw), I2C_CLK_OE_N_EN)
+
+#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT	500
+
+#define IXGBE_I2C_THERMAL_SENSOR_ADDR	0xF8
+#define IXGBE_EMC_INTERNAL_DATA		0x00
+#define IXGBE_EMC_INTERNAL_THERM_LIMIT	0x20
+#define IXGBE_EMC_DIODE1_DATA		0x01
+#define IXGBE_EMC_DIODE1_THERM_LIMIT	0x19
+#define IXGBE_EMC_DIODE2_DATA		0x23
+#define IXGBE_EMC_DIODE2_THERM_LIMIT	0x1A
+
+#define IXGBE_MAX_SENSORS		3
+
+struct ixgbe_thermal_diode_data {
+	u8 location;
+	u8 temp;
+	u8 caution_thresh;
+	u8 max_op_thresh;
+};
+
+struct ixgbe_thermal_sensor_data {
+	struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS];
+};
+
+#define NVM_OROM_OFFSET		0x17
+#define NVM_OROM_BLK_LOW	0x83
+#define NVM_OROM_BLK_HI		0x84
+#define NVM_OROM_PATCH_MASK	0xFF
+#define NVM_OROM_SHIFT		8
+
+#define NVM_VER_MASK		0x00FF	/* version mask */
+#define NVM_VER_SHIFT		8	/* version bit shift */
+#define NVM_OEM_PROD_VER_PTR	0x1B /* OEM Product version block pointer */
+#define NVM_OEM_PROD_VER_CAP_OFF 0x1 /* OEM Product version format offset */
+#define NVM_OEM_PROD_VER_OFF_L	0x2  /* OEM Product version offset low */
+#define NVM_OEM_PROD_VER_OFF_H	0x3  /* OEM Product version offset high */
+#define NVM_OEM_PROD_VER_CAP_MASK 0xF /* OEM Product version cap mask */
+#define NVM_OEM_PROD_VER_MOD_LEN 0x3 /* OEM Product version module length */
+#define NVM_ETK_OFF_LOW		0x2D /* version low order word */
+#define NVM_ETK_OFF_HI		0x2E /* version high order word */
+#define NVM_ETK_SHIFT		16   /* high version word shift */
+#define NVM_VER_INVALID		0xFFFF
+#define NVM_ETK_VALID		0x8000
+#define NVM_INVALID_PTR		0xFFFF
+#define NVM_VER_SIZE		32   /* version sting size */
+
+struct ixgbe_nvm_version {
+	u32 etk_id;
+	u8  nvm_major;
+	u16 nvm_minor;
+	u8  nvm_id;
+
+	bool oem_valid;
+	u8   oem_major;
+	u8   oem_minor;
+	u16  oem_release;
+
+	bool or_valid;
+	u8  or_major;
+	u16 or_build;
+	u8  or_patch;
+};
+
+/* Interrupt Registers */
+#define IXGBE_EICR      0x00800
+#define IXGBE_EICS      0x00808
+#define IXGBE_EIMS      0x00880
+#define IXGBE_EIMC      0x00888
+#define IXGBE_EIAC      0x00810
+#define IXGBE_EIAM      0x00890
+#define IXGBE_EICS_EX(_i)   (0x00A90 + (_i) * 4)
+#define IXGBE_EIMS_EX(_i)   (0x00AA0 + (_i) * 4)
+#define IXGBE_EIMC_EX(_i)   (0x00AB0 + (_i) * 4)
+#define IXGBE_EIAM_EX(_i)   (0x00AD0 + (_i) * 4)
+/*
+ * 82598 EITR is 16 bits but set the limits based on the max
+ * supported by all ixgbe hardware.  82599 EITR is only 12 bits,
+ * with the lower 3 always zero.
+ */
+#define IXGBE_MAX_INT_RATE 488281
+#define IXGBE_MIN_INT_RATE 956
+#define IXGBE_MAX_EITR     0x00000FF8
+#define IXGBE_MIN_EITR     8
+#define IXGBE_EITR(_i)  (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \
+			 (0x012300 + (((_i) - 24) * 4)))
+#define IXGBE_EITR_ITR_INT_MASK 0x00000FF8
+#define IXGBE_EITR_LLI_MOD      0x00008000
+#define IXGBE_EITR_CNT_WDIS     0x80000000
+#define IXGBE_IVAR(_i)  (0x00900 + ((_i) * 4)) /* 24 at 0x900-0x960 */
+#define IXGBE_IVAR_MISC 0x00A00 /* misc MSI-X interrupt causes */
+#define IXGBE_EITRSEL   0x00894
+#define IXGBE_MSIXT     0x00000 /* MSI-X Table. 0x0000 - 0x01C */
+#define IXGBE_MSIXPBA   0x02000 /* MSI-X Pending bit array */
+#define IXGBE_PBACL(_i) (((_i) == 0) ? (0x11068) : (0x110C0 + ((_i) * 4)))
+#define IXGBE_GPIE      0x00898
+
+/* Flow Control Registers */
+#define IXGBE_FCADBUL   0x03210
+#define IXGBE_FCADBUH   0x03214
+#define IXGBE_FCAMACL   0x04328
+#define IXGBE_FCAMACH   0x0432C
+#define IXGBE_FCRTH_82599(_i) (0x03260 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_FCRTL_82599(_i) (0x03220 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_PFCTOP    0x03008
+#define IXGBE_FCTTV(_i) (0x03200 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_FCRTL(_i) (0x03220 + ((_i) * 8)) /* 8 of these (0-7) */
+#define IXGBE_FCRTH(_i) (0x03260 + ((_i) * 8)) /* 8 of these (0-7) */
+#define IXGBE_FCRTV     0x032A0
+#define IXGBE_FCCFG     0x03D00
+#define IXGBE_TFCS      0x0CE00
+
+/* Receive DMA Registers */
+#define IXGBE_RDBAL(_i) (((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \
+			 (0x0D000 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDBAH(_i) (((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \
+			 (0x0D004 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDLEN(_i) (((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \
+			 (0x0D008 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDH(_i)   (((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \
+			 (0x0D010 + (((_i) - 64) * 0x40)))
+#define IXGBE_RDT(_i)   (((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \
+			 (0x0D018 + (((_i) - 64) * 0x40)))
+#define IXGBE_RXDCTL(_i) (((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \
+			 (0x0D028 + (((_i) - 64) * 0x40)))
+#define IXGBE_RSCCTL(_i) (((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \
+			 (0x0D02C + (((_i) - 64) * 0x40)))
+#define IXGBE_RSCDBU     0x03028
+#define IXGBE_RDDCC      0x02F20
+#define IXGBE_RXMEMWRAP  0x03190
+#define IXGBE_STARCTRL   0x03024
+/*
+ * Split and Replication Receive Control Registers
+ * 00-15 : 0x02100 + n*4
+ * 16-64 : 0x01014 + n*0x40
+ * 64-127: 0x0D014 + (n-64)*0x40
+ */
+#define IXGBE_SRRCTL(_i) (((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \
+			  (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \
+			  (0x0D014 + (((_i) - 64) * 0x40))))
+/*
+ * Rx DCA Control Register:
+ * 00-15 : 0x02200 + n*4
+ * 16-64 : 0x0100C + n*0x40
+ * 64-127: 0x0D00C + (n-64)*0x40
+ */
+#define IXGBE_DCA_RXCTRL(_i)    (((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \
+				 (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \
+				 (0x0D00C + (((_i) - 64) * 0x40))))
+#define IXGBE_RDRXCTL           0x02F00
+#define IXGBE_RXPBSIZE(_i)      (0x03C00 + ((_i) * 4))
+					     /* 8 of these 0x03C00 - 0x03C1C */
+#define IXGBE_RXCTRL    0x03000
+#define IXGBE_DROPEN    0x03D04
+#define IXGBE_RXPBSIZE_SHIFT 10
+
+/* Receive Registers */
+#define IXGBE_RXCSUM    0x05000
+#define IXGBE_RFCTL     0x05008
+#define IXGBE_DRECCCTL  0x02F08
+#define IXGBE_DRECCCTL_DISABLE 0
+/* Multicast Table Array - 128 entries */
+#define IXGBE_MTA(_i)   (0x05200 + ((_i) * 4))
+#define IXGBE_RAL(_i)   (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \
+			 (0x0A200 + ((_i) * 8)))
+#define IXGBE_RAH(_i)   (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
+			 (0x0A204 + ((_i) * 8)))
+#define IXGBE_MPSAR_LO(_i) (0x0A600 + ((_i) * 8))
+#define IXGBE_MPSAR_HI(_i) (0x0A604 + ((_i) * 8))
+/* Packet split receive type */
+#define IXGBE_PSRTYPE(_i)    (((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \
+			      (0x0EA00 + ((_i) * 4)))
+/* array of 4096 1-bit vlan filters */
+#define IXGBE_VFTA(_i)  (0x0A000 + ((_i) * 4))
+/*array of 4096 4-bit vlan vmdq indices */
+#define IXGBE_VFTAVIND(_j, _i)  (0x0A200 + ((_j) * 0x200) + ((_i) * 4))
+#define IXGBE_FCTRL     0x05080
+#define IXGBE_VLNCTRL   0x05088
+#define IXGBE_MCSTCTRL  0x05090
+#define IXGBE_MRQC      0x05818
+#define IXGBE_SAQF(_i)  (0x0E000 + ((_i) * 4)) /* Source Address Queue Filter */
+#define IXGBE_DAQF(_i)  (0x0E200 + ((_i) * 4)) /* Dest. Address Queue Filter */
+#define IXGBE_SDPQF(_i) (0x0E400 + ((_i) * 4)) /* Src Dest. Addr Queue Filter */
+#define IXGBE_FTQF(_i)  (0x0E600 + ((_i) * 4)) /* Five Tuple Queue Filter */
+#define IXGBE_ETQF(_i)  (0x05128 + ((_i) * 4)) /* EType Queue Filter */
+#define IXGBE_ETQS(_i)  (0x0EC00 + ((_i) * 4)) /* EType Queue Select */
+#define IXGBE_SYNQF     0x0EC30 /* SYN Packet Queue Filter */
+#define IXGBE_RQTC      0x0EC70
+#define IXGBE_MTQC      0x08120
+#define IXGBE_VLVF(_i)  (0x0F100 + ((_i) * 4))  /* 64 of these (0-63) */
+#define IXGBE_VLVFB(_i) (0x0F200 + ((_i) * 4))  /* 128 of these (0-127) */
+#define IXGBE_VMVIR(_i) (0x08000 + ((_i) * 4))  /* 64 of these (0-63) */
+#define IXGBE_PFFLPL	0x050B0
+#define IXGBE_PFFLPH	0x050B4
+#define IXGBE_VT_CTL         0x051B0
+#define IXGBE_PFMAILBOX(_i)  (0x04B00 + (4 * (_i))) /* 64 total */
+#define IXGBE_PFMBMEM(_i)    (0x13000 + (64 * (_i))) /* 64 Mailboxes, 16 DW each */
+#define IXGBE_PFMBICR(_i)    (0x00710 + (4 * (_i))) /* 4 total */
+#define IXGBE_PFMBIMR(_i)    (0x00720 + (4 * (_i))) /* 4 total */
+#define IXGBE_VFRE(_i)       (0x051E0 + ((_i) * 4))
+#define IXGBE_VFTE(_i)       (0x08110 + ((_i) * 4))
+#define IXGBE_VMECM(_i)      (0x08790 + ((_i) * 4))
+#define IXGBE_QDE            0x2F04
+#define IXGBE_VMTXSW(_i)     (0x05180 + ((_i) * 4)) /* 2 total */
+#define IXGBE_VMOLR(_i)      (0x0F000 + ((_i) * 4)) /* 64 total */
+#define IXGBE_UTA(_i)        (0x0F400 + ((_i) * 4))
+#define IXGBE_MRCTL(_i)      (0x0F600 + ((_i) * 4))
+#define IXGBE_VMRVLAN(_i)    (0x0F610 + ((_i) * 4))
+#define IXGBE_VMRVM(_i)      (0x0F630 + ((_i) * 4))
+#define IXGBE_WQBR_RX(_i)    (0x2FB0 + ((_i) * 4)) /* 4 total */
+#define IXGBE_WQBR_TX(_i)    (0x8130 + ((_i) * 4)) /* 4 total */
+#define IXGBE_L34T_IMIR(_i)  (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/
+#define IXGBE_RXFECCERR0         0x051B8
+#define IXGBE_LLITHRESH 0x0EC90
+#define IXGBE_IMIR(_i)  (0x05A80 + ((_i) * 4))  /* 8 of these (0-7) */
+#define IXGBE_IMIREXT(_i)       (0x05AA0 + ((_i) * 4))  /* 8 of these (0-7) */
+#define IXGBE_IMIRVP    0x05AC0
+#define IXGBE_VMD_CTL   0x0581C
+#define IXGBE_RETA(_i)  (0x05C00 + ((_i) * 4))  /* 32 of these (0-31) */
+#define IXGBE_ERETA(_i)	(0x0EE80 + ((_i) * 4))  /* 96 of these (0-95) */
+#define IXGBE_RSSRK(_i) (0x05C80 + ((_i) * 4))  /* 10 of these (0-9) */
+
+/* Registers for setting up RSS on X550 with SRIOV
+ * _p - pool number (0..63)
+ * _i - index (0..10 for PFVFRSSRK, 0..15 for PFVFRETA)
+ */
+#define IXGBE_PFVFMRQC(_p)	(0x03400 + ((_p) * 4))
+#define IXGBE_PFVFRSSRK(_i, _p)	(0x018000 + ((_i) * 4) + ((_p) * 0x40))
+#define IXGBE_PFVFRETA(_i, _p)	(0x019000 + ((_i) * 4) + ((_p) * 0x40))
+
+/* Flow Director registers */
+#define IXGBE_FDIRCTRL  0x0EE00
+#define IXGBE_FDIRHKEY  0x0EE68
+#define IXGBE_FDIRSKEY  0x0EE6C
+#define IXGBE_FDIRDIP4M 0x0EE3C
+#define IXGBE_FDIRSIP4M 0x0EE40
+#define IXGBE_FDIRTCPM  0x0EE44
+#define IXGBE_FDIRUDPM  0x0EE48
+#define IXGBE_FDIRSCTPM	0x0EE78
+#define IXGBE_FDIRIP6M  0x0EE74
+#define IXGBE_FDIRM     0x0EE70
+
+/* Flow Director Stats registers */
+#define IXGBE_FDIRFREE  0x0EE38
+#define IXGBE_FDIRLEN   0x0EE4C
+#define IXGBE_FDIRUSTAT 0x0EE50
+#define IXGBE_FDIRFSTAT 0x0EE54
+#define IXGBE_FDIRMATCH 0x0EE58
+#define IXGBE_FDIRMISS  0x0EE5C
+
+/* Flow Director Programming registers */
+#define IXGBE_FDIRSIPv6(_i) (0x0EE0C + ((_i) * 4)) /* 3 of these (0-2) */
+#define IXGBE_FDIRIPSA      0x0EE18
+#define IXGBE_FDIRIPDA      0x0EE1C
+#define IXGBE_FDIRPORT      0x0EE20
+#define IXGBE_FDIRVLAN      0x0EE24
+#define IXGBE_FDIRHASH      0x0EE28
+#define IXGBE_FDIRCMD       0x0EE2C
+
+/* Transmit DMA registers */
+#define IXGBE_TDBAL(_i) (0x06000 + ((_i) * 0x40)) /* 32 of these (0-31)*/
+#define IXGBE_TDBAH(_i) (0x06004 + ((_i) * 0x40))
+#define IXGBE_TDLEN(_i) (0x06008 + ((_i) * 0x40))
+#define IXGBE_TDH(_i)   (0x06010 + ((_i) * 0x40))
+#define IXGBE_TDT(_i)   (0x06018 + ((_i) * 0x40))
+#define IXGBE_TXDCTL(_i) (0x06028 + ((_i) * 0x40))
+#define IXGBE_TDWBAL(_i) (0x06038 + ((_i) * 0x40))
+#define IXGBE_TDWBAH(_i) (0x0603C + ((_i) * 0x40))
+#define IXGBE_DTXCTL    0x07E00
+
+#define IXGBE_DMATXCTL      0x04A80
+#define IXGBE_PFVFSPOOF(_i) (0x08200 + ((_i) * 4)) /* 8 of these 0 - 7 */
+#define IXGBE_PFDTXGSWC     0x08220
+#define IXGBE_DTXMXSZRQ     0x08100
+#define IXGBE_DTXTCPFLGL    0x04A88
+#define IXGBE_DTXTCPFLGH    0x04A8C
+#define IXGBE_LBDRPEN       0x0CA00
+#define IXGBE_TXPBTHRESH(_i) (0x04950 + ((_i) * 4)) /* 8 of these 0 - 7 */
+
+#define IXGBE_DMATXCTL_TE       0x1 /* Transmit Enable */
+#define IXGBE_DMATXCTL_NS       0x2 /* No Snoop LSO hdr buffer */
+#define IXGBE_DMATXCTL_GDV      0x8 /* Global Double VLAN */
+#define IXGBE_DMATXCTL_MDP_EN   0x20 /* Bit 5 */
+#define IXGBE_DMATXCTL_MBINTEN  0x40 /* Bit 6 */
+#define IXGBE_DMATXCTL_VT_SHIFT 16  /* VLAN EtherType */
+
+#define IXGBE_PFDTXGSWC_VT_LBEN 0x1 /* Local L2 VT switch enable */
+
+/* Anti-spoofing defines */
+#define IXGBE_SPOOF_MACAS_MASK          0xFF
+#define IXGBE_SPOOF_VLANAS_MASK         0xFF00
+#define IXGBE_SPOOF_VLANAS_SHIFT        8
+#define IXGBE_SPOOF_ETHERTYPEAS		0xFF000000
+#define IXGBE_SPOOF_ETHERTYPEAS_SHIFT	16
+#define IXGBE_PFVFSPOOF_REG_COUNT       8
+
+#define IXGBE_DCA_TXCTRL(_i)    (0x07200 + ((_i) * 4)) /* 16 of these (0-15) */
+/* Tx DCA Control register : 128 of these (0-127) */
+#define IXGBE_DCA_TXCTRL_82599(_i)  (0x0600C + ((_i) * 0x40))
+#define IXGBE_TIPG      0x0CB00
+#define IXGBE_TXPBSIZE(_i)      (0x0CC00 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_MNGTXMAP  0x0CD10
+#define IXGBE_TIPG_FIBER_DEFAULT 3
+#define IXGBE_TXPBSIZE_SHIFT    10
+
+/* Wake up registers */
+#define IXGBE_WUC       0x05800
+#define IXGBE_WUFC      0x05808
+#define IXGBE_WUS       0x05810
+#define IXGBE_IPAV      0x05838
+#define IXGBE_IP4AT     0x05840 /* IPv4 table 0x5840-0x5858 */
+#define IXGBE_IP6AT     0x05880 /* IPv6 table 0x5880-0x588F */
+
+#define IXGBE_WUPL      0x05900
+#define IXGBE_WUPM      0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */
+#define IXGBE_VXLANCTRL	0x0000507C /* Rx filter VXLAN UDPPORT Register */
+#define IXGBE_FHFT(_n)	(0x09000 + ((_n) * 0x100)) /* Flex host filter table */
+#define IXGBE_FHFT_EXT(_n)	(0x09800 + ((_n) * 0x100)) /* Ext Flexible Host
+							    * Filter Table */
+
+/* masks for accessing VXLAN and GENEVE UDP ports */
+#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK     0x0000ffff /* VXLAN port */
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK    0xffff0000 /* GENEVE port */
+#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK       0xffffffff /* GENEVE/VXLAN */
+
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT   16
+
+#define IXGBE_FLEXIBLE_FILTER_COUNT_MAX         4
+#define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX     2
+
+/* Each Flexible Filter is at most 128 (0x80) bytes in length */
+#define IXGBE_FLEXIBLE_FILTER_SIZE_MAX  128
+#define IXGBE_FHFT_LENGTH_OFFSET        0xFC  /* Length byte in FHFT */
+#define IXGBE_FHFT_LENGTH_MASK          0x0FF /* Length in lower byte */
+
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define IXGBE_WUC_PME_EN     0x00000002 /* PME Enable */
+#define IXGBE_WUC_PME_STATUS 0x00000004 /* PME Status */
+#define IXGBE_WUC_WKEN       0x00000010 /* Enable PE_WAKE_N pin assertion  */
+
+/* Wake Up Filter Control */
+#define IXGBE_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define IXGBE_WUFC_MAG  0x00000002 /* Magic Packet Wakeup Enable */
+#define IXGBE_WUFC_EX   0x00000004 /* Directed Exact Wakeup Enable */
+#define IXGBE_WUFC_MC   0x00000008 /* Directed Multicast Wakeup Enable */
+#define IXGBE_WUFC_BC   0x00000010 /* Broadcast Wakeup Enable */
+#define IXGBE_WUFC_ARP  0x00000020 /* ARP Request Packet Wakeup Enable */
+#define IXGBE_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */
+#define IXGBE_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */
+#define IXGBE_WUFC_MNG  0x00000100 /* Directed Mgmt Packet Wakeup Enable */
+
+#define IXGBE_WUFC_IGNORE_TCO   0x00008000 /* Ignore WakeOn TCO packets */
+#define IXGBE_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */
+#define IXGBE_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */
+#define IXGBE_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */
+#define IXGBE_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */
+#define IXGBE_WUFC_FLX4 0x00100000 /* Flexible Filter 4 Enable */
+#define IXGBE_WUFC_FLX5 0x00200000 /* Flexible Filter 5 Enable */
+#define IXGBE_WUFC_FLX_FILTERS     0x000F0000 /* Mask for 4 flex filters */
+#define IXGBE_WUFC_EXT_FLX_FILTERS 0x00300000 /* Mask for Ext. flex filters */
+#define IXGBE_WUFC_ALL_FILTERS     0x003F00FF /* Mask for all wakeup filters */
+#define IXGBE_WUFC_FLX_OFFSET      16 /* Offset to the Flexible Filters bits */
+
+/* Wake Up Status */
+#define IXGBE_WUS_LNKC  IXGBE_WUFC_LNKC
+#define IXGBE_WUS_MAG   IXGBE_WUFC_MAG
+#define IXGBE_WUS_EX    IXGBE_WUFC_EX
+#define IXGBE_WUS_MC    IXGBE_WUFC_MC
+#define IXGBE_WUS_BC    IXGBE_WUFC_BC
+#define IXGBE_WUS_ARP   IXGBE_WUFC_ARP
+#define IXGBE_WUS_IPV4  IXGBE_WUFC_IPV4
+#define IXGBE_WUS_IPV6  IXGBE_WUFC_IPV6
+#define IXGBE_WUS_MNG   IXGBE_WUFC_MNG
+#define IXGBE_WUS_FLX0  IXGBE_WUFC_FLX0
+#define IXGBE_WUS_FLX1  IXGBE_WUFC_FLX1
+#define IXGBE_WUS_FLX2  IXGBE_WUFC_FLX2
+#define IXGBE_WUS_FLX3  IXGBE_WUFC_FLX3
+#define IXGBE_WUS_FLX4  IXGBE_WUFC_FLX4
+#define IXGBE_WUS_FLX5  IXGBE_WUFC_FLX5
+#define IXGBE_WUS_FLX_FILTERS  IXGBE_WUFC_FLX_FILTERS
+
+/* Wake Up Packet Length */
+#define IXGBE_WUPL_LENGTH_MASK 0xFFFF
+
+/* DCB registers */
+#define MAX_TRAFFIC_CLASS        8
+#define X540_TRAFFIC_CLASS       4
+#define DEF_TRAFFIC_CLASS        1
+#define IXGBE_RMCS      0x03D00
+#define IXGBE_DPMCS     0x07F40
+#define IXGBE_PDPMCS    0x0CD00
+#define IXGBE_RUPPBMR   0x050A0
+#define IXGBE_RT2CR(_i) (0x03C20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RT2SR(_i) (0x03C40 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TDTQ2TCCR(_i)     (0x0602C + ((_i) * 0x40)) /* 8 of these (0-7) */
+#define IXGBE_TDTQ2TCSR(_i)     (0x0622C + ((_i) * 0x40)) /* 8 of these (0-7) */
+#define IXGBE_TDPT2TCCR(_i)     (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TDPT2TCSR(_i)     (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
+
+/* Security Control Registers */
+#define IXGBE_SECTXCTRL         0x08800
+#define IXGBE_SECTXSTAT         0x08804
+#define IXGBE_SECTXBUFFAF       0x08808
+#define IXGBE_SECTXMINIFG       0x08810
+#define IXGBE_SECRXCTRL         0x08D00
+#define IXGBE_SECRXSTAT         0x08D04
+
+/* Security Bit Fields and Masks */
+#define IXGBE_SECTXCTRL_SECTX_DIS       0x00000001
+#define IXGBE_SECTXCTRL_TX_DIS          0x00000002
+#define IXGBE_SECTXCTRL_STORE_FORWARD   0x00000004
+
+#define IXGBE_SECTXSTAT_SECTX_RDY       0x00000001
+#define IXGBE_SECTXSTAT_SECTX_OFF_DIS   0x00000002
+#define IXGBE_SECTXSTAT_ECC_TXERR       0x00000004
+
+#define IXGBE_SECRXCTRL_SECRX_DIS       0x00000001
+#define IXGBE_SECRXCTRL_RX_DIS          0x00000002
+
+#define IXGBE_SECRXSTAT_SECRX_RDY       0x00000001
+#define IXGBE_SECRXSTAT_SECRX_OFF_DIS   0x00000002
+#define IXGBE_SECRXSTAT_ECC_RXERR       0x00000004
+
+/* LinkSec (MacSec) Registers */
+#define IXGBE_LSECTXCAP         0x08A00
+#define IXGBE_LSECRXCAP         0x08F00
+#define IXGBE_LSECTXCTRL        0x08A04
+#define IXGBE_LSECTXSCL         0x08A08 /* SCI Low */
+#define IXGBE_LSECTXSCH         0x08A0C /* SCI High */
+#define IXGBE_LSECTXSA          0x08A10
+#define IXGBE_LSECTXPN0         0x08A14
+#define IXGBE_LSECTXPN1         0x08A18
+#define IXGBE_LSECTXKEY0(_n)    (0x08A1C + (4 * (_n))) /* 4 of these (0-3) */
+#define IXGBE_LSECTXKEY1(_n)    (0x08A2C + (4 * (_n))) /* 4 of these (0-3) */
+#define IXGBE_LSECRXCTRL        0x08F04
+#define IXGBE_LSECRXSCL         0x08F08
+#define IXGBE_LSECRXSCH         0x08F0C
+#define IXGBE_LSECRXSA(_i)      (0x08F10 + (4 * (_i))) /* 2 of these (0-1) */
+#define IXGBE_LSECRXPN(_i)      (0x08F18 + (4 * (_i))) /* 2 of these (0-1) */
+#define IXGBE_LSECRXKEY(_n, _m) (0x08F20 + ((0x10 * (_n)) + (4 * (_m))))
+#define IXGBE_LSECTXUT          0x08A3C /* OutPktsUntagged */
+#define IXGBE_LSECTXPKTE        0x08A40 /* OutPktsEncrypted */
+#define IXGBE_LSECTXPKTP        0x08A44 /* OutPktsProtected */
+#define IXGBE_LSECTXOCTE        0x08A48 /* OutOctetsEncrypted */
+#define IXGBE_LSECTXOCTP        0x08A4C /* OutOctetsProtected */
+#define IXGBE_LSECRXUT          0x08F40 /* InPktsUntagged/InPktsNoTag */
+#define IXGBE_LSECRXOCTD        0x08F44 /* InOctetsDecrypted */
+#define IXGBE_LSECRXOCTV        0x08F48 /* InOctetsValidated */
+#define IXGBE_LSECRXBAD         0x08F4C /* InPktsBadTag */
+#define IXGBE_LSECRXNOSCI       0x08F50 /* InPktsNoSci */
+#define IXGBE_LSECRXUNSCI       0x08F54 /* InPktsUnknownSci */
+#define IXGBE_LSECRXUNCH        0x08F58 /* InPktsUnchecked */
+#define IXGBE_LSECRXDELAY       0x08F5C /* InPktsDelayed */
+#define IXGBE_LSECRXLATE        0x08F60 /* InPktsLate */
+#define IXGBE_LSECRXOK(_n)      (0x08F64 + (0x04 * (_n))) /* InPktsOk */
+#define IXGBE_LSECRXINV(_n)     (0x08F6C + (0x04 * (_n))) /* InPktsInvalid */
+#define IXGBE_LSECRXNV(_n)      (0x08F74 + (0x04 * (_n))) /* InPktsNotValid */
+#define IXGBE_LSECRXUNSA        0x08F7C /* InPktsUnusedSa */
+#define IXGBE_LSECRXNUSA        0x08F80 /* InPktsNotUsingSa */
+
+/* LinkSec (MacSec) Bit Fields and Masks */
+#define IXGBE_LSECTXCAP_SUM_MASK        0x00FF0000
+#define IXGBE_LSECTXCAP_SUM_SHIFT       16
+#define IXGBE_LSECRXCAP_SUM_MASK        0x00FF0000
+#define IXGBE_LSECRXCAP_SUM_SHIFT       16
+
+#define IXGBE_LSECTXCTRL_EN_MASK        0x00000003
+#define IXGBE_LSECTXCTRL_DISABLE        0x0
+#define IXGBE_LSECTXCTRL_AUTH           0x1
+#define IXGBE_LSECTXCTRL_AUTH_ENCRYPT   0x2
+#define IXGBE_LSECTXCTRL_AISCI          0x00000020
+#define IXGBE_LSECTXCTRL_PNTHRSH_MASK   0xFFFFFF00
+#define IXGBE_LSECTXCTRL_RSV_MASK       0x000000D8
+
+#define IXGBE_LSECRXCTRL_EN_MASK        0x0000000C
+#define IXGBE_LSECRXCTRL_EN_SHIFT       2
+#define IXGBE_LSECRXCTRL_DISABLE        0x0
+#define IXGBE_LSECRXCTRL_CHECK          0x1
+#define IXGBE_LSECRXCTRL_STRICT         0x2
+#define IXGBE_LSECRXCTRL_DROP           0x3
+#define IXGBE_LSECRXCTRL_PLSH           0x00000040
+#define IXGBE_LSECRXCTRL_RP             0x00000080
+#define IXGBE_LSECRXCTRL_RSV_MASK       0xFFFFFF33
+
+/* IpSec Registers */
+#define IXGBE_IPSTXIDX          0x08900
+#define IXGBE_IPSTXSALT         0x08904
+#define IXGBE_IPSTXKEY(_i)      (0x08908 + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXIDX          0x08E00
+#define IXGBE_IPSRXIPADDR(_i)   (0x08E04 + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXSPI          0x08E14
+#define IXGBE_IPSRXIPIDX        0x08E18
+#define IXGBE_IPSRXKEY(_i)      (0x08E1C + (4 * (_i))) /* 4 of these (0-3) */
+#define IXGBE_IPSRXSALT         0x08E2C
+#define IXGBE_IPSRXMOD          0x08E30
+
+#define IXGBE_SECTXCTRL_STORE_FORWARD_ENABLE    0x4
+
+/* DCB registers */
+#define IXGBE_RTRPCS      0x02430
+#define IXGBE_RTTDCS      0x04900
+#define IXGBE_RTTDCS_ARBDIS     0x00000040 /* DCB arbiter disable */
+#define IXGBE_RTTPCS      0x0CD00
+#define IXGBE_RTRUP2TC    0x03020
+#define IXGBE_RTTUP2TC    0x0C800
+#define IXGBE_RTRPT4C(_i) (0x02140 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_TXLLQ(_i)   (0x082E0 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_RTRPT4S(_i) (0x02160 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDT2C(_i) (0x04910 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDT2S(_i) (0x04930 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTPT2C(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTPT2S(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_RTTDQSEL    0x04904
+#define IXGBE_RTTDT1C     0x04908
+#define IXGBE_RTTDT1S     0x0490C
+#define IXGBE_RTTQCNCR    0x08B00
+#define IXGBE_RTTQCNTG    0x04A90
+#define IXGBE_RTTBCNRD    0x0498C
+#define IXGBE_RTTQCNRR    0x0498C
+#define IXGBE_RTTDTECC    0x04990
+#define IXGBE_RTTDTECC_NO_BCN   0x00000100
+#define IXGBE_RTTBCNRC    0x04984
+#define IXGBE_RTTBCNRC_RS_ENA	0x80000000
+#define IXGBE_RTTBCNRC_RF_DEC_MASK	0x00003FFF
+#define IXGBE_RTTBCNRC_RF_INT_SHIFT	14
+#define IXGBE_RTTBCNRC_RF_INT_MASK	\
+	(IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT)
+#define IXGBE_RTTBCNRM    0x04980
+#define IXGBE_RTTQCNRM    0x04980
+
+/* FCoE Direct DMA Context */
+#define IXGBE_FCDDC(_i, _j)	(0x20000 + ((_i) * 0x4) + ((_j) * 0x10))
+/* FCoE DMA Context Registers */
+#define IXGBE_FCPTRL    0x02410 /* FC User Desc. PTR Low */
+#define IXGBE_FCPTRH    0x02414 /* FC USer Desc. PTR High */
+#define IXGBE_FCBUFF    0x02418 /* FC Buffer Control */
+#define IXGBE_FCDMARW   0x02420 /* FC Receive DMA RW */
+#define IXGBE_FCINVST0  0x03FC0 /* FC Invalid DMA Context Status Reg 0 */
+#define IXGBE_FCINVST(_i)       (IXGBE_FCINVST0 + ((_i) * 4))
+#define IXGBE_FCBUFF_VALID      BIT(0)    /* DMA Context Valid */
+#define IXGBE_FCBUFF_BUFFSIZE   (3u << 3) /* User Buffer Size */
+#define IXGBE_FCBUFF_WRCONTX    BIT(7)    /* 0: Initiator, 1: Target */
+#define IXGBE_FCBUFF_BUFFCNT    0x0000ff00 /* Number of User Buffers */
+#define IXGBE_FCBUFF_OFFSET     0xffff0000 /* User Buffer Offset */
+#define IXGBE_FCBUFF_BUFFSIZE_SHIFT  3
+#define IXGBE_FCBUFF_BUFFCNT_SHIFT   8
+#define IXGBE_FCBUFF_OFFSET_SHIFT    16
+#define IXGBE_FCDMARW_WE        BIT(14)   /* Write enable */
+#define IXGBE_FCDMARW_RE        BIT(15)   /* Read enable */
+#define IXGBE_FCDMARW_FCOESEL   0x000001ff  /* FC X_ID: 11 bits */
+#define IXGBE_FCDMARW_LASTSIZE  0xffff0000  /* Last User Buffer Size */
+#define IXGBE_FCDMARW_LASTSIZE_SHIFT 16
+
+/* FCoE SOF/EOF */
+#define IXGBE_TEOFF     0x04A94 /* Tx FC EOF */
+#define IXGBE_TSOFF     0x04A98 /* Tx FC SOF */
+#define IXGBE_REOFF     0x05158 /* Rx FC EOF */
+#define IXGBE_RSOFF     0x051F8 /* Rx FC SOF */
+/* FCoE Direct Filter Context */
+#define IXGBE_FCDFC(_i, _j)	(0x28000 + ((_i) * 0x4) + ((_j) * 0x10))
+#define IXGBE_FCDFCD(_i)	(0x30000 + ((_i) * 0x4))
+/* FCoE Filter Context Registers */
+#define IXGBE_FCFLT     0x05108 /* FC FLT Context */
+#define IXGBE_FCFLTRW   0x05110 /* FC Filter RW Control */
+#define IXGBE_FCPARAM   0x051d8 /* FC Offset Parameter */
+#define IXGBE_FCFLT_VALID       BIT(0)   /* Filter Context Valid */
+#define IXGBE_FCFLT_FIRST       BIT(1)   /* Filter First */
+#define IXGBE_FCFLT_SEQID       0x00ff0000 /* Sequence ID */
+#define IXGBE_FCFLT_SEQCNT      0xff000000 /* Sequence Count */
+#define IXGBE_FCFLTRW_RVALDT    BIT(13)  /* Fast Re-Validation */
+#define IXGBE_FCFLTRW_WE        BIT(14)  /* Write Enable */
+#define IXGBE_FCFLTRW_RE        BIT(15)  /* Read Enable */
+/* FCoE Receive Control */
+#define IXGBE_FCRXCTRL  0x05100 /* FC Receive Control */
+#define IXGBE_FCRXCTRL_FCOELLI  BIT(0)   /* Low latency interrupt */
+#define IXGBE_FCRXCTRL_SAVBAD   BIT(1)   /* Save Bad Frames */
+#define IXGBE_FCRXCTRL_FRSTRDH  BIT(2)   /* EN 1st Read Header */
+#define IXGBE_FCRXCTRL_LASTSEQH BIT(3)   /* EN Last Header in Seq */
+#define IXGBE_FCRXCTRL_ALLH     BIT(4)   /* EN All Headers */
+#define IXGBE_FCRXCTRL_FRSTSEQH BIT(5)   /* EN 1st Seq. Header */
+#define IXGBE_FCRXCTRL_ICRC     BIT(6)   /* Ignore Bad FC CRC */
+#define IXGBE_FCRXCTRL_FCCRCBO  BIT(7)   /* FC CRC Byte Ordering */
+#define IXGBE_FCRXCTRL_FCOEVER  0x00000f00 /* FCoE Version: 4 bits */
+#define IXGBE_FCRXCTRL_FCOEVER_SHIFT 8
+/* FCoE Redirection */
+#define IXGBE_FCRECTL   0x0ED00 /* FC Redirection Control */
+#define IXGBE_FCRETA0   0x0ED10 /* FC Redirection Table 0 */
+#define IXGBE_FCRETA(_i)        (IXGBE_FCRETA0 + ((_i) * 4)) /* FCoE Redir */
+#define IXGBE_FCRECTL_ENA       0x1        /* FCoE Redir Table Enable */
+#define IXGBE_FCRETA_SIZE       8          /* Max entries in FCRETA */
+#define IXGBE_FCRETA_ENTRY_MASK 0x0000007f /* 7 bits for the queue index */
+#define IXGBE_FCRETA_SIZE_X550	32 /* Max entries in FCRETA */
+/* Higher 7 bits for the queue index */
+#define IXGBE_FCRETA_ENTRY_HIGH_MASK	0x007F0000
+#define IXGBE_FCRETA_ENTRY_HIGH_SHIFT	16
+
+/* Stats registers */
+#define IXGBE_CRCERRS   0x04000
+#define IXGBE_ILLERRC   0x04004
+#define IXGBE_ERRBC     0x04008
+#define IXGBE_MSPDC     0x04010
+#define IXGBE_MPC(_i)   (0x03FA0 + ((_i) * 4)) /* 8 of these 3FA0-3FBC*/
+#define IXGBE_MLFC      0x04034
+#define IXGBE_MRFC      0x04038
+#define IXGBE_RLEC      0x04040
+#define IXGBE_LXONTXC   0x03F60
+#define IXGBE_LXONRXC   0x0CF60
+#define IXGBE_LXOFFTXC  0x03F68
+#define IXGBE_LXOFFRXC  0x0CF68
+#define IXGBE_LXONRXCNT 0x041A4
+#define IXGBE_LXOFFRXCNT 0x041A8
+#define IXGBE_PXONRXCNT(_i)     (0x04140 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXOFFRXCNT(_i)    (0x04160 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXON2OFFCNT(_i)   (0x03240 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_PXONTXC(_i)       (0x03F00 + ((_i) * 4)) /* 8 of these 3F00-3F1C*/
+#define IXGBE_PXONRXC(_i)       (0x0CF00 + ((_i) * 4)) /* 8 of these CF00-CF1C*/
+#define IXGBE_PXOFFTXC(_i)      (0x03F20 + ((_i) * 4)) /* 8 of these 3F20-3F3C*/
+#define IXGBE_PXOFFRXC(_i)      (0x0CF20 + ((_i) * 4)) /* 8 of these CF20-CF3C*/
+#define IXGBE_PRC64     0x0405C
+#define IXGBE_PRC127    0x04060
+#define IXGBE_PRC255    0x04064
+#define IXGBE_PRC511    0x04068
+#define IXGBE_PRC1023   0x0406C
+#define IXGBE_PRC1522   0x04070
+#define IXGBE_GPRC      0x04074
+#define IXGBE_BPRC      0x04078
+#define IXGBE_MPRC      0x0407C
+#define IXGBE_GPTC      0x04080
+#define IXGBE_GORCL     0x04088
+#define IXGBE_GORCH     0x0408C
+#define IXGBE_GOTCL     0x04090
+#define IXGBE_GOTCH     0x04094
+#define IXGBE_RNBC(_i)  (0x03FC0 + ((_i) * 4)) /* 8 of these 3FC0-3FDC*/
+#define IXGBE_RUC       0x040A4
+#define IXGBE_RFC       0x040A8
+#define IXGBE_ROC       0x040AC
+#define IXGBE_RJC       0x040B0
+#define IXGBE_MNGPRC    0x040B4
+#define IXGBE_MNGPDC    0x040B8
+#define IXGBE_MNGPTC    0x0CF90
+#define IXGBE_TORL      0x040C0
+#define IXGBE_TORH      0x040C4
+#define IXGBE_TPR       0x040D0
+#define IXGBE_TPT       0x040D4
+#define IXGBE_PTC64     0x040D8
+#define IXGBE_PTC127    0x040DC
+#define IXGBE_PTC255    0x040E0
+#define IXGBE_PTC511    0x040E4
+#define IXGBE_PTC1023   0x040E8
+#define IXGBE_PTC1522   0x040EC
+#define IXGBE_MPTC      0x040F0
+#define IXGBE_BPTC      0x040F4
+#define IXGBE_XEC       0x04120
+#define IXGBE_SSVPC     0x08780
+
+#define IXGBE_RQSMR(_i) (0x02300 + ((_i) * 4))
+#define IXGBE_TQSMR(_i) (((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \
+			 (0x08600 + ((_i) * 4)))
+#define IXGBE_TQSM(_i)  (0x08600 + ((_i) * 4))
+
+#define IXGBE_QPRC(_i) (0x01030 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QPTC(_i) (0x06030 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBTC(_i) (0x06034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC_L(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBRC_H(_i) (0x01038 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QPRDC(_i) (0x01430 + ((_i) * 0x40)) /* 16 of these */
+#define IXGBE_QBTC_L(_i) (0x08700 + ((_i) * 0x8)) /* 16 of these */
+#define IXGBE_QBTC_H(_i) (0x08704 + ((_i) * 0x8)) /* 16 of these */
+#define IXGBE_FCCRC     0x05118 /* Count of Good Eth CRC w/ Bad FC CRC */
+#define IXGBE_FCOERPDC  0x0241C /* FCoE Rx Packets Dropped Count */
+#define IXGBE_FCLAST    0x02424 /* FCoE Last Error Count */
+#define IXGBE_FCOEPRC   0x02428 /* Number of FCoE Packets Received */
+#define IXGBE_FCOEDWRC  0x0242C /* Number of FCoE DWords Received */
+#define IXGBE_FCOEPTC   0x08784 /* Number of FCoE Packets Transmitted */
+#define IXGBE_FCOEDWTC  0x08788 /* Number of FCoE DWords Transmitted */
+#define IXGBE_O2BGPTC   0x041C4
+#define IXGBE_O2BSPC    0x087B0
+#define IXGBE_B2OSPC    0x041C0
+#define IXGBE_B2OGPRC   0x02F90
+#define IXGBE_PCRC8ECL  0x0E810
+#define IXGBE_PCRC8ECH  0x0E811
+#define IXGBE_PCRC8ECH_MASK     0x1F
+#define IXGBE_LDPCECL   0x0E820
+#define IXGBE_LDPCECH   0x0E821
+
+/* MII clause 22/28 definitions */
+#define IXGBE_MDIO_PHY_LOW_POWER_MODE	0x0800
+
+#define IXGBE_MDIO_XENPAK_LASI_STATUS	0x9005 /* XENPAK LASI Status register */
+#define IXGBE_XENPAK_LASI_LINK_STATUS_ALARM 0x1 /* Link Status Alarm change */
+
+#define IXGBE_MDIO_AUTO_NEG_LINK_STATUS	0x4 /* Indicates if link is up */
+
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_MASK	0x7 /* Speed/Duplex Mask */
+#define IXGBE_MDIO_AUTO_NEG_VEN_STAT_SPEED_MASK	0x6 /* Speed Mask */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10M_HALF 0x0 /* 10Mb/s Half Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10M_FULL 0x1 /* 10Mb/s Full Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_100M_HALF 0x2 /* 100Mb/s H Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_100M_FULL 0x3 /* 100Mb/s F Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB_HALF 0x4 /* 1Gb/s Half Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB_FULL 0x5 /* 1Gb/s Full Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB_HALF 0x6 /* 10Gb/s Half Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB_FULL 0x7 /* 10Gb/s Full Duplex */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB	0x4 /* 1Gb/s */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB	0x6 /* 10Gb/s */
+
+#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400	/* 1G Provisioning 1 */
+#define IXGBE_MII_AUTONEG_XNP_TX_REG		0x17	/* 1G XNP Transmit */
+#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX	0x4000	/* full duplex, bit:14*/
+#define IXGBE_MII_1GBASE_T_ADVERTISE		0x8000	/* full duplex, bit:15*/
+#define IXGBE_MII_2_5GBASE_T_ADVERTISE		0x0400
+#define IXGBE_MII_5GBASE_T_ADVERTISE		0x0800
+#define IXGBE_MII_RESTART			0x200
+#define IXGBE_MII_AUTONEG_LINK_UP		0x04
+#define IXGBE_MII_AUTONEG_REG			0x0
+
+/* Management */
+#define IXGBE_MAVTV(_i) (0x05010 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MFUTP(_i) (0x05030 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MANC      0x05820
+#define IXGBE_MFVAL     0x05824
+#define IXGBE_MANC2H    0x05860
+#define IXGBE_MDEF(_i)  (0x05890 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_MIPAF     0x058B0
+#define IXGBE_MMAL(_i)  (0x05910 + ((_i) * 8)) /* 4 of these (0-3) */
+#define IXGBE_MMAH(_i)  (0x05914 + ((_i) * 8)) /* 4 of these (0-3) */
+#define IXGBE_FTFT      0x09400 /* 0x9400-0x97FC */
+#define IXGBE_METF(_i)  (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */
+#define IXGBE_LSWFW     0x15014
+
+/* Management Bit Fields and Masks */
+#define IXGBE_MANC_RCV_TCO_EN	0x00020000 /* Rcv TCO packet enable */
+
+/* Firmware Semaphore Register */
+#define IXGBE_FWSM_MODE_MASK	0xE
+#define IXGBE_FWSM_FW_MODE_PT	0x4
+#define IXGBE_FWSM_FW_NVM_RECOVERY_MODE	BIT(5)
+#define IXGBE_FWSM_EXT_ERR_IND_MASK	0x01F80000
+#define IXGBE_FWSM_FW_VAL_BIT	BIT(15)
+
+/* ARC Subsystem registers */
+#define IXGBE_HICR      0x15F00
+#define IXGBE_FWSTS     0x15F0C
+#define IXGBE_HSMC0R    0x15F04
+#define IXGBE_HSMC1R    0x15F08
+#define IXGBE_SWSR      0x15F10
+#define IXGBE_HFDR      0x15FE8
+#define IXGBE_FLEX_MNG  0x15800 /* 0x15800 - 0x15EFC */
+
+#define IXGBE_HICR_EN              0x01  /* Enable bit - RO */
+/* Driver sets this bit when done to put command in RAM */
+#define IXGBE_HICR_C               0x02
+#define IXGBE_HICR_SV              0x04  /* Status Validity */
+#define IXGBE_HICR_FW_RESET_ENABLE 0x40
+#define IXGBE_HICR_FW_RESET        0x80
+
+/* PCI-E registers */
+#define IXGBE_GCR       0x11000
+#define IXGBE_GTV       0x11004
+#define IXGBE_FUNCTAG   0x11008
+#define IXGBE_GLT       0x1100C
+#define IXGBE_GSCL_1    0x11010
+#define IXGBE_GSCL_2    0x11014
+#define IXGBE_GSCL_3    0x11018
+#define IXGBE_GSCL_4    0x1101C
+#define IXGBE_GSCN_0    0x11020
+#define IXGBE_GSCN_1    0x11024
+#define IXGBE_GSCN_2    0x11028
+#define IXGBE_GSCN_3    0x1102C
+#define IXGBE_FACTPS_8259X	0x10150
+#define IXGBE_FACTPS_X540	IXGBE_FACTPS_8259X
+#define IXGBE_FACTPS_X550	IXGBE_FACTPS_8259X
+#define IXGBE_FACTPS_X550EM_x	IXGBE_FACTPS_8259X
+#define IXGBE_FACTPS_X550EM_a	0x15FEC
+#define IXGBE_FACTPS(_hw)	IXGBE_BY_MAC((_hw), FACTPS)
+
+#define IXGBE_PCIEANACTL  0x11040
+#define IXGBE_SWSM_8259X	0x10140
+#define IXGBE_SWSM_X540		IXGBE_SWSM_8259X
+#define IXGBE_SWSM_X550		IXGBE_SWSM_8259X
+#define IXGBE_SWSM_X550EM_x	IXGBE_SWSM_8259X
+#define IXGBE_SWSM_X550EM_a	0x15F70
+#define IXGBE_SWSM(_hw)		IXGBE_BY_MAC((_hw), SWSM)
+#define IXGBE_FWSM_8259X	0x10148
+#define IXGBE_FWSM_X540		IXGBE_FWSM_8259X
+#define IXGBE_FWSM_X550		IXGBE_FWSM_8259X
+#define IXGBE_FWSM_X550EM_x	IXGBE_FWSM_8259X
+#define IXGBE_FWSM_X550EM_a	0x15F74
+#define IXGBE_FWSM(_hw)		IXGBE_BY_MAC((_hw), FWSM)
+#define IXGBE_GSSR      0x10160
+#define IXGBE_MREVID    0x11064
+#define IXGBE_DCA_ID    0x11070
+#define IXGBE_DCA_CTRL  0x11074
+#define IXGBE_SWFW_SYNC_8259X		IXGBE_GSSR
+#define IXGBE_SWFW_SYNC_X540		IXGBE_SWFW_SYNC_8259X
+#define IXGBE_SWFW_SYNC_X550		IXGBE_SWFW_SYNC_8259X
+#define IXGBE_SWFW_SYNC_X550EM_x	IXGBE_SWFW_SYNC_8259X
+#define IXGBE_SWFW_SYNC_X550EM_a	0x15F78
+#define IXGBE_SWFW_SYNC(_hw)		IXGBE_BY_MAC((_hw), SWFW_SYNC)
+
+/* PCIe registers 82599-specific */
+#define IXGBE_GCR_EXT           0x11050
+#define IXGBE_GSCL_5_82599      0x11030
+#define IXGBE_GSCL_6_82599      0x11034
+#define IXGBE_GSCL_7_82599      0x11038
+#define IXGBE_GSCL_8_82599      0x1103C
+#define IXGBE_PHYADR_82599      0x11040
+#define IXGBE_PHYDAT_82599      0x11044
+#define IXGBE_PHYCTL_82599      0x11048
+#define IXGBE_PBACLR_82599      0x11068
+
+#define IXGBE_CIAA_8259X	0x11088
+#define IXGBE_CIAA_X540		IXGBE_CIAA_8259X
+#define IXGBE_CIAA_X550		0x11508
+#define IXGBE_CIAA_X550EM_x	IXGBE_CIAA_X550
+#define IXGBE_CIAA_X550EM_a	IXGBE_CIAA_X550
+#define IXGBE_CIAA(_hw)		IXGBE_BY_MAC((_hw), CIAA)
+
+#define IXGBE_CIAD_8259X	0x1108C
+#define IXGBE_CIAD_X540		IXGBE_CIAD_8259X
+#define IXGBE_CIAD_X550		0x11510
+#define IXGBE_CIAD_X550EM_x	IXGBE_CIAD_X550
+#define IXGBE_CIAD_X550EM_a	IXGBE_CIAD_X550
+#define IXGBE_CIAD(_hw)		IXGBE_BY_MAC((_hw), CIAD)
+
+#define IXGBE_PICAUSE           0x110B0
+#define IXGBE_PIENA             0x110B8
+#define IXGBE_CDQ_MBR_82599     0x110B4
+#define IXGBE_PCIESPARE         0x110BC
+#define IXGBE_MISC_REG_82599    0x110F0
+#define IXGBE_ECC_CTRL_0_82599  0x11100
+#define IXGBE_ECC_CTRL_1_82599  0x11104
+#define IXGBE_ECC_STATUS_82599  0x110E0
+#define IXGBE_BAR_CTRL_82599    0x110F4
+
+/* PCI Express Control */
+#define IXGBE_GCR_CMPL_TMOUT_MASK       0x0000F000
+#define IXGBE_GCR_CMPL_TMOUT_10ms       0x00001000
+#define IXGBE_GCR_CMPL_TMOUT_RESEND     0x00010000
+#define IXGBE_GCR_CAP_VER2              0x00040000
+
+#define IXGBE_GCR_EXT_MSIX_EN           0x80000000
+#define IXGBE_GCR_EXT_BUFFERS_CLEAR     0x40000000
+#define IXGBE_GCR_EXT_VT_MODE_16        0x00000001
+#define IXGBE_GCR_EXT_VT_MODE_32        0x00000002
+#define IXGBE_GCR_EXT_VT_MODE_64        0x00000003
+#define IXGBE_GCR_EXT_SRIOV             (IXGBE_GCR_EXT_MSIX_EN | \
+					 IXGBE_GCR_EXT_VT_MODE_64)
+
+/* Time Sync Registers */
+#define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */
+#define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */
+#define IXGBE_RXSTMPL    0x051E8 /* Rx timestamp Low - RO */
+#define IXGBE_RXSTMPH    0x051A4 /* Rx timestamp High - RO */
+#define IXGBE_RXSATRL    0x051A0 /* Rx timestamp attribute low - RO */
+#define IXGBE_RXSATRH    0x051A8 /* Rx timestamp attribute high - RO */
+#define IXGBE_RXMTRL     0x05120 /* RX message type register low - RW */
+#define IXGBE_TXSTMPL    0x08C04 /* Tx timestamp value Low - RO */
+#define IXGBE_TXSTMPH    0x08C08 /* Tx timestamp value High - RO */
+#define IXGBE_SYSTIML    0x08C0C /* System time register Low - RO */
+#define IXGBE_SYSTIMH    0x08C10 /* System time register High - RO */
+#define IXGBE_SYSTIMR    0x08C58 /* System time register Residue - RO */
+#define IXGBE_TIMINCA    0x08C14 /* Increment attributes register - RW */
+#define IXGBE_TIMADJL    0x08C18 /* Time Adjustment Offset register Low - RW */
+#define IXGBE_TIMADJH    0x08C1C /* Time Adjustment Offset register High - RW */
+#define IXGBE_TSAUXC     0x08C20 /* TimeSync Auxiliary Control register - RW */
+#define IXGBE_TRGTTIML0  0x08C24 /* Target Time Register 0 Low - RW */
+#define IXGBE_TRGTTIMH0  0x08C28 /* Target Time Register 0 High - RW */
+#define IXGBE_TRGTTIML1  0x08C2C /* Target Time Register 1 Low - RW */
+#define IXGBE_TRGTTIMH1  0x08C30 /* Target Time Register 1 High - RW */
+#define IXGBE_CLKTIML    0x08C34 /* Clock Out Time Register Low - RW */
+#define IXGBE_CLKTIMH    0x08C38 /* Clock Out Time Register High - RW */
+#define IXGBE_FREQOUT0   0x08C34 /* Frequency Out 0 Control register - RW */
+#define IXGBE_FREQOUT1   0x08C38 /* Frequency Out 1 Control register - RW */
+#define IXGBE_AUXSTMPL0  0x08C3C /* Auxiliary Time Stamp 0 register Low - RO */
+#define IXGBE_AUXSTMPH0  0x08C40 /* Auxiliary Time Stamp 0 register High - RO */
+#define IXGBE_AUXSTMPL1  0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */
+#define IXGBE_AUXSTMPH1  0x08C48 /* Auxiliary Time Stamp 1 register High - RO */
+#define IXGBE_TSIM       0x08C68 /* TimeSync Interrupt Mask Register - RW */
+#define IXGBE_TSSDP      0x0003C /* TimeSync SDP Configuration Register - RW */
+
+/* Diagnostic Registers */
+#define IXGBE_RDSTATCTL   0x02C20
+#define IXGBE_RDSTAT(_i)  (0x02C00 + ((_i) * 4)) /* 0x02C00-0x02C1C */
+#define IXGBE_RDHMPN      0x02F08
+#define IXGBE_RIC_DW(_i)  (0x02F10 + ((_i) * 4))
+#define IXGBE_RDPROBE     0x02F20
+#define IXGBE_RDMAM       0x02F30
+#define IXGBE_RDMAD       0x02F34
+#define IXGBE_TDSTATCTL   0x07C20
+#define IXGBE_TDSTAT(_i)  (0x07C00 + ((_i) * 4)) /* 0x07C00 - 0x07C1C */
+#define IXGBE_TDHMPN      0x07F08
+#define IXGBE_TDHMPN2     0x082FC
+#define IXGBE_TXDESCIC    0x082CC
+#define IXGBE_TIC_DW(_i)  (0x07F10 + ((_i) * 4))
+#define IXGBE_TIC_DW2(_i) (0x082B0 + ((_i) * 4))
+#define IXGBE_TDPROBE     0x07F20
+#define IXGBE_TXBUFCTRL   0x0C600
+#define IXGBE_TXBUFDATA(_i) (0x0C610 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_RXBUFCTRL   0x03600
+#define IXGBE_RXBUFDATA(_i) (0x03610 + ((_i) * 4)) /* 4 of these (0-3) */
+#define IXGBE_PCIE_DIAG(_i)     (0x11090 + ((_i) * 4)) /* 8 of these */
+#define IXGBE_RFVAL     0x050A4
+#define IXGBE_MDFTC1    0x042B8
+#define IXGBE_MDFTC2    0x042C0
+#define IXGBE_MDFTFIFO1 0x042C4
+#define IXGBE_MDFTFIFO2 0x042C8
+#define IXGBE_MDFTS     0x042CC
+#define IXGBE_RXDATAWRPTR(_i)   (0x03700 + ((_i) * 4)) /* 8 of these 3700-370C*/
+#define IXGBE_RXDESCWRPTR(_i)   (0x03710 + ((_i) * 4)) /* 8 of these 3710-371C*/
+#define IXGBE_RXDATARDPTR(_i)   (0x03720 + ((_i) * 4)) /* 8 of these 3720-372C*/
+#define IXGBE_RXDESCRDPTR(_i)   (0x03730 + ((_i) * 4)) /* 8 of these 3730-373C*/
+#define IXGBE_TXDATAWRPTR(_i)   (0x0C700 + ((_i) * 4)) /* 8 of these C700-C70C*/
+#define IXGBE_TXDESCWRPTR(_i)   (0x0C710 + ((_i) * 4)) /* 8 of these C710-C71C*/
+#define IXGBE_TXDATARDPTR(_i)   (0x0C720 + ((_i) * 4)) /* 8 of these C720-C72C*/
+#define IXGBE_TXDESCRDPTR(_i)   (0x0C730 + ((_i) * 4)) /* 8 of these C730-C73C*/
+#define IXGBE_PCIEECCCTL 0x1106C
+#define IXGBE_RXWRPTR(_i)       (0x03100 + ((_i) * 4)) /* 8 of these 3100-310C*/
+#define IXGBE_RXUSED(_i)        (0x03120 + ((_i) * 4)) /* 8 of these 3120-312C*/
+#define IXGBE_RXRDPTR(_i)       (0x03140 + ((_i) * 4)) /* 8 of these 3140-314C*/
+#define IXGBE_RXRDWRPTR(_i)     (0x03160 + ((_i) * 4)) /* 8 of these 3160-310C*/
+#define IXGBE_TXWRPTR(_i)       (0x0C100 + ((_i) * 4)) /* 8 of these C100-C10C*/
+#define IXGBE_TXUSED(_i)        (0x0C120 + ((_i) * 4)) /* 8 of these C120-C12C*/
+#define IXGBE_TXRDPTR(_i)       (0x0C140 + ((_i) * 4)) /* 8 of these C140-C14C*/
+#define IXGBE_TXRDWRPTR(_i)     (0x0C160 + ((_i) * 4)) /* 8 of these C160-C10C*/
+#define IXGBE_PCIEECCCTL0 0x11100
+#define IXGBE_PCIEECCCTL1 0x11104
+#define IXGBE_RXDBUECC  0x03F70
+#define IXGBE_TXDBUECC  0x0CF70
+#define IXGBE_RXDBUEST 0x03F74
+#define IXGBE_TXDBUEST 0x0CF74
+#define IXGBE_PBTXECC   0x0C300
+#define IXGBE_PBRXECC   0x03300
+#define IXGBE_GHECCR    0x110B0
+
+/* MAC Registers */
+#define IXGBE_PCS1GCFIG 0x04200
+#define IXGBE_PCS1GLCTL 0x04208
+#define IXGBE_PCS1GLSTA 0x0420C
+#define IXGBE_PCS1GDBG0 0x04210
+#define IXGBE_PCS1GDBG1 0x04214
+#define IXGBE_PCS1GANA  0x04218
+#define IXGBE_PCS1GANLP 0x0421C
+#define IXGBE_PCS1GANNP 0x04220
+#define IXGBE_PCS1GANLPNP 0x04224
+#define IXGBE_HLREG0    0x04240
+#define IXGBE_HLREG1    0x04244
+#define IXGBE_PAP       0x04248
+#define IXGBE_MACA      0x0424C
+#define IXGBE_APAE      0x04250
+#define IXGBE_ARD       0x04254
+#define IXGBE_AIS       0x04258
+#define IXGBE_MSCA      0x0425C
+#define IXGBE_MSRWD     0x04260
+#define IXGBE_MLADD     0x04264
+#define IXGBE_MHADD     0x04268
+#define IXGBE_MAXFRS    0x04268
+#define IXGBE_TREG      0x0426C
+#define IXGBE_PCSS1     0x04288
+#define IXGBE_PCSS2     0x0428C
+#define IXGBE_XPCSS     0x04290
+#define IXGBE_MFLCN     0x04294
+#define IXGBE_SERDESC   0x04298
+#define IXGBE_MAC_SGMII_BUSY 0x04298
+#define IXGBE_MACS      0x0429C
+#define IXGBE_AUTOC     0x042A0
+#define IXGBE_LINKS     0x042A4
+#define IXGBE_LINKS2    0x04324
+#define IXGBE_AUTOC2    0x042A8
+#define IXGBE_AUTOC3    0x042AC
+#define IXGBE_ANLP1     0x042B0
+#define IXGBE_ANLP2     0x042B4
+#define IXGBE_MACC      0x04330
+#define IXGBE_ATLASCTL  0x04800
+#define IXGBE_MMNGC     0x042D0
+#define IXGBE_ANLPNP1   0x042D4
+#define IXGBE_ANLPNP2   0x042D8
+#define IXGBE_KRPCSFC   0x042E0
+#define IXGBE_KRPCSS    0x042E4
+#define IXGBE_FECS1     0x042E8
+#define IXGBE_FECS2     0x042EC
+#define IXGBE_SMADARCTL 0x14F10
+#define IXGBE_MPVC      0x04318
+#define IXGBE_SGMIIC    0x04314
+
+/* Statistics Registers */
+#define IXGBE_RXNFGPC      0x041B0
+#define IXGBE_RXNFGBCL     0x041B4
+#define IXGBE_RXNFGBCH     0x041B8
+#define IXGBE_RXDGPC       0x02F50
+#define IXGBE_RXDGBCL      0x02F54
+#define IXGBE_RXDGBCH      0x02F58
+#define IXGBE_RXDDGPC      0x02F5C
+#define IXGBE_RXDDGBCL     0x02F60
+#define IXGBE_RXDDGBCH     0x02F64
+#define IXGBE_RXLPBKGPC    0x02F68
+#define IXGBE_RXLPBKGBCL   0x02F6C
+#define IXGBE_RXLPBKGBCH   0x02F70
+#define IXGBE_RXDLPBKGPC   0x02F74
+#define IXGBE_RXDLPBKGBCL  0x02F78
+#define IXGBE_RXDLPBKGBCH  0x02F7C
+#define IXGBE_TXDGPC       0x087A0
+#define IXGBE_TXDGBCL      0x087A4
+#define IXGBE_TXDGBCH      0x087A8
+
+#define IXGBE_RXDSTATCTRL 0x02F40
+
+/* Copper Pond 2 link timeout */
+#define IXGBE_VALIDATE_LINK_READY_TIMEOUT 50
+
+/* Omer CORECTL */
+#define IXGBE_CORECTL           0x014F00
+/* BARCTRL */
+#define IXGBE_BARCTRL               0x110F4
+#define IXGBE_BARCTRL_FLSIZE        0x0700
+#define IXGBE_BARCTRL_FLSIZE_SHIFT  8
+#define IXGBE_BARCTRL_CSRSIZE       0x2000
+
+/* RSCCTL Bit Masks */
+#define IXGBE_RSCCTL_RSCEN          0x01
+#define IXGBE_RSCCTL_MAXDESC_1      0x00
+#define IXGBE_RSCCTL_MAXDESC_4      0x04
+#define IXGBE_RSCCTL_MAXDESC_8      0x08
+#define IXGBE_RSCCTL_MAXDESC_16     0x0C
+
+/* RSCDBU Bit Masks */
+#define IXGBE_RSCDBU_RSCSMALDIS_MASK    0x0000007F
+#define IXGBE_RSCDBU_RSCACKDIS          0x00000080
+
+/* RDRXCTL Bit Masks */
+#define IXGBE_RDRXCTL_RDMTS_1_2     0x00000000 /* Rx Desc Min Threshold Size */
+#define IXGBE_RDRXCTL_CRCSTRIP      0x00000002 /* CRC Strip */
+#define IXGBE_RDRXCTL_PSP           0x00000004 /* Pad small packet */
+#define IXGBE_RDRXCTL_MVMEN         0x00000020
+#define IXGBE_RDRXCTL_DMAIDONE      0x00000008 /* DMA init cycle done */
+#define IXGBE_RDRXCTL_AGGDIS        0x00010000 /* Aggregation disable */
+#define IXGBE_RDRXCTL_RSCFRSTSIZE   0x003E0000 /* RSC First packet size */
+#define IXGBE_RDRXCTL_RSCLLIDIS     0x00800000 /* Disable RSC compl on LLI */
+#define IXGBE_RDRXCTL_RSCACKC       0x02000000 /* must set 1 when RSC enabled */
+#define IXGBE_RDRXCTL_FCOE_WRFIX    0x04000000 /* must set 1 when RSC enabled */
+#define IXGBE_RDRXCTL_MBINTEN       0x10000000
+#define IXGBE_RDRXCTL_MDP_EN        0x20000000
+
+/* RQTC Bit Masks and Shifts */
+#define IXGBE_RQTC_SHIFT_TC(_i)     ((_i) * 4)
+#define IXGBE_RQTC_TC0_MASK         (0x7 << 0)
+#define IXGBE_RQTC_TC1_MASK         (0x7 << 4)
+#define IXGBE_RQTC_TC2_MASK         (0x7 << 8)
+#define IXGBE_RQTC_TC3_MASK         (0x7 << 12)
+#define IXGBE_RQTC_TC4_MASK         (0x7 << 16)
+#define IXGBE_RQTC_TC5_MASK         (0x7 << 20)
+#define IXGBE_RQTC_TC6_MASK         (0x7 << 24)
+#define IXGBE_RQTC_TC7_MASK         (0x7 << 28)
+
+/* PSRTYPE.RQPL Bit masks and shift */
+#define IXGBE_PSRTYPE_RQPL_MASK     0x7
+#define IXGBE_PSRTYPE_RQPL_SHIFT    29
+
+/* CTRL Bit Masks */
+#define IXGBE_CTRL_GIO_DIS      0x00000004 /* Global IO Primary Disable bit */
+#define IXGBE_CTRL_LNK_RST      0x00000008 /* Link Reset. Resets everything. */
+#define IXGBE_CTRL_RST          0x04000000 /* Reset (SW) */
+#define IXGBE_CTRL_RST_MASK     (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST)
+
+/* FACTPS */
+#define IXGBE_FACTPS_MNGCG      0x20000000 /* Manageblility Clock Gated */
+#define IXGBE_FACTPS_LFS        0x40000000 /* LAN Function Select */
+
+/* MHADD Bit Masks */
+#define IXGBE_MHADD_MFS_MASK    0xFFFF0000
+#define IXGBE_MHADD_MFS_SHIFT   16
+
+/* Extended Device Control */
+#define IXGBE_CTRL_EXT_PFRSTD   0x00004000 /* Physical Function Reset Done */
+#define IXGBE_CTRL_EXT_NS_DIS   0x00010000 /* No Snoop disable */
+#define IXGBE_CTRL_EXT_RO_DIS   0x00020000 /* Relaxed Ordering disable */
+#define IXGBE_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */
+
+/* Direct Cache Access (DCA) definitions */
+#define IXGBE_DCA_CTRL_DCA_ENABLE  0x00000000 /* DCA Enable */
+#define IXGBE_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */
+
+#define IXGBE_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */
+#define IXGBE_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */
+
+#define IXGBE_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */
+#define IXGBE_DCA_RXCTRL_CPUID_MASK_82599  0xFF000000 /* Rx CPUID Mask */
+#define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599 24 /* Rx CPUID Shift */
+#define IXGBE_DCA_RXCTRL_DESC_DCA_EN BIT(5) /* DCA Rx Desc enable */
+#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN BIT(6) /* DCA Rx Desc header enable */
+#define IXGBE_DCA_RXCTRL_DATA_DCA_EN BIT(7) /* DCA Rx Desc payload enable */
+#define IXGBE_DCA_RXCTRL_DESC_RRO_EN BIT(9) /* DCA Rx rd Desc Relax Order */
+#define IXGBE_DCA_RXCTRL_DATA_WRO_EN BIT(13) /* Rx wr data Relax Order */
+#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN BIT(15) /* Rx wr header RO */
+
+#define IXGBE_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */
+#define IXGBE_DCA_TXCTRL_CPUID_MASK_82599  0xFF000000 /* Tx CPUID Mask */
+#define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599 24 /* Tx CPUID Shift */
+#define IXGBE_DCA_TXCTRL_DESC_DCA_EN BIT(5) /* DCA Tx Desc enable */
+#define IXGBE_DCA_TXCTRL_DESC_RRO_EN BIT(9) /* Tx rd Desc Relax Order */
+#define IXGBE_DCA_TXCTRL_DESC_WRO_EN BIT(11) /* Tx Desc writeback RO bit */
+#define IXGBE_DCA_TXCTRL_DATA_RRO_EN BIT(13) /* Tx rd data Relax Order */
+#define IXGBE_DCA_MAX_QUEUES_82598   16 /* DCA regs only on 16 queues */
+
+/* MSCA Bit Masks */
+#define IXGBE_MSCA_NP_ADDR_MASK      0x0000FFFF /* MDI Address (new protocol) */
+#define IXGBE_MSCA_NP_ADDR_SHIFT     0
+#define IXGBE_MSCA_DEV_TYPE_MASK     0x001F0000 /* Device Type (new protocol) */
+#define IXGBE_MSCA_DEV_TYPE_SHIFT    16 /* Register Address (old protocol */
+#define IXGBE_MSCA_PHY_ADDR_MASK     0x03E00000 /* PHY Address mask */
+#define IXGBE_MSCA_PHY_ADDR_SHIFT    21 /* PHY Address shift*/
+#define IXGBE_MSCA_OP_CODE_MASK      0x0C000000 /* OP CODE mask */
+#define IXGBE_MSCA_OP_CODE_SHIFT     26 /* OP CODE shift */
+#define IXGBE_MSCA_ADDR_CYCLE        0x00000000 /* OP CODE 00 (addr cycle) */
+#define IXGBE_MSCA_WRITE             0x04000000 /* OP CODE 01 (write) */
+#define IXGBE_MSCA_READ              0x0C000000 /* OP CODE 11 (read) */
+#define IXGBE_MSCA_READ_AUTOINC      0x08000000 /* OP CODE 10 (read, auto inc)*/
+#define IXGBE_MSCA_ST_CODE_MASK      0x30000000 /* ST Code mask */
+#define IXGBE_MSCA_ST_CODE_SHIFT     28 /* ST Code shift */
+#define IXGBE_MSCA_NEW_PROTOCOL      0x00000000 /* ST CODE 00 (new protocol) */
+#define IXGBE_MSCA_OLD_PROTOCOL      0x10000000 /* ST CODE 01 (old protocol) */
+#define IXGBE_MSCA_MDI_COMMAND       0x40000000 /* Initiate MDI command */
+#define IXGBE_MSCA_MDI_IN_PROG_EN    0x80000000 /* MDI in progress enable */
+
+/* MSRWD bit masks */
+#define IXGBE_MSRWD_WRITE_DATA_MASK     0x0000FFFF
+#define IXGBE_MSRWD_WRITE_DATA_SHIFT    0
+#define IXGBE_MSRWD_READ_DATA_MASK      0xFFFF0000
+#define IXGBE_MSRWD_READ_DATA_SHIFT     16
+
+/* Atlas registers */
+#define IXGBE_ATLAS_PDN_LPBK    0x24
+#define IXGBE_ATLAS_PDN_10G     0xB
+#define IXGBE_ATLAS_PDN_1G      0xC
+#define IXGBE_ATLAS_PDN_AN      0xD
+
+/* Atlas bit masks */
+#define IXGBE_ATLASCTL_WRITE_CMD        0x00010000
+#define IXGBE_ATLAS_PDN_TX_REG_EN       0x10
+#define IXGBE_ATLAS_PDN_TX_10G_QL_ALL   0xF0
+#define IXGBE_ATLAS_PDN_TX_1G_QL_ALL    0xF0
+#define IXGBE_ATLAS_PDN_TX_AN_QL_ALL    0xF0
+
+/* Omer bit masks */
+#define IXGBE_CORECTL_WRITE_CMD         0x00010000
+
+/* MDIO definitions */
+
+#define IXGBE_MDIO_ZERO_DEV_TYPE		0x0
+#define IXGBE_MDIO_PCS_DEV_TYPE		0x3
+#define IXGBE_TWINAX_DEV			1
+
+#define IXGBE_MDIO_COMMAND_TIMEOUT     100 /* PHY Timeout for 1 GB mode */
+
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS  0x0008 /* 1 = Link Up */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0 - 10G, 1 - 1G */
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED    0x0018
+#define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED     0x0010
+
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_STAT	0xC800 /* AUTO_NEG Vendor Status Reg */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM  0xCC00 /* AUTO_NEG Vendor TX Reg */
+#define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2 0xCC01 /* AUTO_NEG Vendor Tx Reg */
+#define IXGBE_MDIO_AUTO_NEG_VEN_LSC	0x1 /* AUTO_NEG Vendor Tx LSC */
+#define IXGBE_MDIO_AUTO_NEG_EEE_ADVT	0x3C /* AUTO_NEG EEE Advt Reg */
+
+#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE	 0x0800 /* Set low power mode */
+#define IXGBE_AUTO_NEG_LP_STATUS	0xE820 /* AUTO NEG Rx LP Status Reg */
+#define IXGBE_AUTO_NEG_LP_1000BASE_CAP	0x8000 /* AUTO NEG Rx LP 1000BaseT */
+#define IXGBE_MDIO_TX_VENDOR_ALARMS_3	0xCC02 /* Vendor Alarms 3 Reg */
+#define IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK 0x3 /* PHY Reset Complete Mask */
+#define IXGBE_MDIO_GLOBAL_RES_PR_10 0xC479 /* Global Resv Provisioning 10 Reg */
+#define IXGBE_MDIO_POWER_UP_STALL	0x8000 /* Power Up Stall */
+#define IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK	0xFF00 /* int std mask */
+#define IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG	0xFC00 /* chip std int flag */
+#define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK	0xFF01 /* int chip-wide mask */
+#define IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG	0xFC01 /* int chip-wide mask */
+#define IXGBE_MDIO_GLOBAL_ALARM_1		0xCC00 /* Global alarm 1 */
+#define IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT	0x0010 /* device fault */
+#define IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL	0x4000 /* high temp failure */
+#define IXGBE_MDIO_GLOBAL_FAULT_MSG		0xC850 /* global fault msg */
+#define IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP	0x8007 /* high temp failure */
+#define IXGBE_MDIO_GLOBAL_INT_MASK		0xD400 /* Global int mask */
+/* autoneg vendor alarm int enable */
+#define IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN	0x1000
+#define IXGBE_MDIO_GLOBAL_ALARM_1_INT		0x4 /* int in Global alarm 1 */
+#define IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN	0x1 /* vendor alarm int enable */
+#define IXGBE_MDIO_GLOBAL_STD_ALM2_INT		0x200 /* vendor alarm2 int mask */
+#define IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN	0x4000 /* int high temp enable */
+#define IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN	0x0010 /*int dev fault enable */
+
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR	0xC30A /* PHY_XS SDA/SCL Addr Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA	0xC30B /* PHY_XS SDA/SCL Data Reg */
+#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT	0xC30C /* PHY_XS SDA/SCL Stat Reg */
+#define IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK	0xD401 /* PHY TX Vendor LASI */
+#define IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN	0x1 /* PHY TX Vendor LASI enable */
+#define IXGBE_MDIO_PMD_STD_TX_DISABLE_CNTR	0x9 /* Standard Tx Dis Reg */
+#define IXGBE_MDIO_PMD_GLOBAL_TX_DISABLE	0x0001 /* PMD Global Tx Dis */
+
+/* MII clause 22/28 definitions */
+#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */
+#define IXGBE_MII_AUTONEG_XNP_TX_REG             0x17   /* 1G XNP Transmit */
+#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX      0x4000 /* full duplex, bit:14*/
+#define IXGBE_MII_1GBASE_T_ADVERTISE             0x8000 /* full duplex, bit:15*/
+#define IXGBE_MII_AUTONEG_REG                    0x0
+
+#define IXGBE_PHY_REVISION_MASK        0xFFFFFFF0
+#define IXGBE_MAX_PHY_ADDR             32
+
+/* PHY IDs*/
+#define TN1010_PHY_ID    0x00A19410
+#define TNX_FW_REV       0xB
+#define X540_PHY_ID      0x01540200
+#define X550_PHY_ID2	0x01540223
+#define X550_PHY_ID3	0x01540221
+#define X557_PHY_ID      0x01540240
+#define X557_PHY_ID2	0x01540250
+#define QT2022_PHY_ID    0x0043A400
+#define ATH_PHY_ID       0x03429050
+#define AQ_FW_REV        0x20
+#define BCM54616S_E_PHY_ID 0x03625D10
+
+/* Special PHY Init Routine */
+#define IXGBE_PHY_INIT_OFFSET_NL 0x002B
+#define IXGBE_PHY_INIT_END_NL    0xFFFF
+#define IXGBE_CONTROL_MASK_NL    0xF000
+#define IXGBE_DATA_MASK_NL       0x0FFF
+#define IXGBE_CONTROL_SHIFT_NL   12
+#define IXGBE_DELAY_NL           0
+#define IXGBE_DATA_NL            1
+#define IXGBE_CONTROL_NL         0x000F
+#define IXGBE_CONTROL_EOL_NL     0x0FFF
+#define IXGBE_CONTROL_SOL_NL     0x0000
+
+/* General purpose Interrupt Enable */
+#define IXGBE_SDP0_GPIEN_8259X		0x00000001 /* SDP0 */
+#define IXGBE_SDP1_GPIEN_8259X		0x00000002 /* SDP1 */
+#define IXGBE_SDP2_GPIEN_8259X		0x00000004 /* SDP2 */
+#define IXGBE_SDP0_GPIEN_X540		0x00000002 /* SDP0 on X540 and X550 */
+#define IXGBE_SDP1_GPIEN_X540		0x00000004 /* SDP1 on X540 and X550 */
+#define IXGBE_SDP2_GPIEN_X540		0x00000008 /* SDP2 on X540 and X550 */
+#define IXGBE_SDP0_GPIEN_X550		IXGBE_SDP0_GPIEN_X540
+#define IXGBE_SDP1_GPIEN_X550		IXGBE_SDP1_GPIEN_X540
+#define IXGBE_SDP2_GPIEN_X550		IXGBE_SDP2_GPIEN_X540
+#define IXGBE_SDP0_GPIEN_X550EM_x	IXGBE_SDP0_GPIEN_X540
+#define IXGBE_SDP1_GPIEN_X550EM_x	IXGBE_SDP1_GPIEN_X540
+#define IXGBE_SDP2_GPIEN_X550EM_x	IXGBE_SDP2_GPIEN_X540
+#define IXGBE_SDP0_GPIEN_X550EM_a	IXGBE_SDP0_GPIEN_X540
+#define IXGBE_SDP1_GPIEN_X550EM_a	IXGBE_SDP1_GPIEN_X540
+#define IXGBE_SDP2_GPIEN_X550EM_a	IXGBE_SDP2_GPIEN_X540
+#define IXGBE_SDP0_GPIEN(_hw)		IXGBE_BY_MAC((_hw), SDP0_GPIEN)
+#define IXGBE_SDP1_GPIEN(_hw)		IXGBE_BY_MAC((_hw), SDP1_GPIEN)
+#define IXGBE_SDP2_GPIEN(_hw)		IXGBE_BY_MAC((_hw), SDP2_GPIEN)
+
+#define IXGBE_GPIE_MSIX_MODE     0x00000010 /* MSI-X mode */
+#define IXGBE_GPIE_OCD           0x00000020 /* Other Clear Disable */
+#define IXGBE_GPIE_EIMEN         0x00000040 /* Immediate Interrupt Enable */
+#define IXGBE_GPIE_EIAME         0x40000000
+#define IXGBE_GPIE_PBA_SUPPORT   0x80000000
+#define IXGBE_GPIE_RSC_DELAY_SHIFT 11
+#define IXGBE_GPIE_VTMODE_MASK   0x0000C000 /* VT Mode Mask */
+#define IXGBE_GPIE_VTMODE_16     0x00004000 /* 16 VFs 8 queues per VF */
+#define IXGBE_GPIE_VTMODE_32     0x00008000 /* 32 VFs 4 queues per VF */
+#define IXGBE_GPIE_VTMODE_64     0x0000C000 /* 64 VFs 2 queues per VF */
+
+/* Packet Buffer Initialization */
+#define IXGBE_TXPBSIZE_20KB     0x00005000 /* 20KB Packet Buffer */
+#define IXGBE_TXPBSIZE_40KB     0x0000A000 /* 40KB Packet Buffer */
+#define IXGBE_RXPBSIZE_48KB     0x0000C000 /* 48KB Packet Buffer */
+#define IXGBE_RXPBSIZE_64KB     0x00010000 /* 64KB Packet Buffer */
+#define IXGBE_RXPBSIZE_80KB     0x00014000 /* 80KB Packet Buffer */
+#define IXGBE_RXPBSIZE_128KB    0x00020000 /* 128KB Packet Buffer */
+#define IXGBE_RXPBSIZE_MAX      0x00080000 /* 512KB Packet Buffer*/
+#define IXGBE_TXPBSIZE_MAX      0x00028000 /* 160KB Packet Buffer*/
+
+#define IXGBE_TXPKT_SIZE_MAX    0xA        /* Max Tx Packet size  */
+#define IXGBE_MAX_PB		8
+
+/* Packet buffer allocation strategies */
+enum {
+	PBA_STRATEGY_EQUAL	= 0,	/* Distribute PB space equally */
+#define PBA_STRATEGY_EQUAL	PBA_STRATEGY_EQUAL
+	PBA_STRATEGY_WEIGHTED	= 1,	/* Weight front half of TCs */
+#define PBA_STRATEGY_WEIGHTED	PBA_STRATEGY_WEIGHTED
+};
+
+/* Transmit Flow Control status */
+#define IXGBE_TFCS_TXOFF         0x00000001
+#define IXGBE_TFCS_TXOFF0        0x00000100
+#define IXGBE_TFCS_TXOFF1        0x00000200
+#define IXGBE_TFCS_TXOFF2        0x00000400
+#define IXGBE_TFCS_TXOFF3        0x00000800
+#define IXGBE_TFCS_TXOFF4        0x00001000
+#define IXGBE_TFCS_TXOFF5        0x00002000
+#define IXGBE_TFCS_TXOFF6        0x00004000
+#define IXGBE_TFCS_TXOFF7        0x00008000
+
+/* TCP Timer */
+#define IXGBE_TCPTIMER_KS            0x00000100
+#define IXGBE_TCPTIMER_COUNT_ENABLE  0x00000200
+#define IXGBE_TCPTIMER_COUNT_FINISH  0x00000400
+#define IXGBE_TCPTIMER_LOOP          0x00000800
+#define IXGBE_TCPTIMER_DURATION_MASK 0x000000FF
+
+/* HLREG0 Bit Masks */
+#define IXGBE_HLREG0_TXCRCEN      0x00000001   /* bit  0 */
+#define IXGBE_HLREG0_RXCRCSTRP    0x00000002   /* bit  1 */
+#define IXGBE_HLREG0_JUMBOEN      0x00000004   /* bit  2 */
+#define IXGBE_HLREG0_TXPADEN      0x00000400   /* bit 10 */
+#define IXGBE_HLREG0_TXPAUSEEN    0x00001000   /* bit 12 */
+#define IXGBE_HLREG0_RXPAUSEEN    0x00004000   /* bit 14 */
+#define IXGBE_HLREG0_LPBK         0x00008000   /* bit 15 */
+#define IXGBE_HLREG0_MDCSPD       0x00010000   /* bit 16 */
+#define IXGBE_HLREG0_CONTMDC      0x00020000   /* bit 17 */
+#define IXGBE_HLREG0_CTRLFLTR     0x00040000   /* bit 18 */
+#define IXGBE_HLREG0_PREPEND      0x00F00000   /* bits 20-23 */
+#define IXGBE_HLREG0_PRIPAUSEEN   0x01000000   /* bit 24 */
+#define IXGBE_HLREG0_RXPAUSERECDA 0x06000000   /* bits 25-26 */
+#define IXGBE_HLREG0_RXLNGTHERREN 0x08000000   /* bit 27 */
+#define IXGBE_HLREG0_RXPADSTRIPEN 0x10000000   /* bit 28 */
+
+/* VMD_CTL bitmasks */
+#define IXGBE_VMD_CTL_VMDQ_EN     0x00000001
+#define IXGBE_VMD_CTL_VMDQ_FILTER 0x00000002
+
+/* VT_CTL bitmasks */
+#define IXGBE_VT_CTL_DIS_DEFPL  0x20000000 /* disable default pool */
+#define IXGBE_VT_CTL_REPLEN     0x40000000 /* replication enabled */
+#define IXGBE_VT_CTL_VT_ENABLE  0x00000001  /* Enable VT Mode */
+#define IXGBE_VT_CTL_POOL_SHIFT 7
+#define IXGBE_VT_CTL_POOL_MASK  (0x3F << IXGBE_VT_CTL_POOL_SHIFT)
+
+/* VMOLR bitmasks */
+#define IXGBE_VMOLR_UPE		0x00400000 /* unicast promiscuous */
+#define IXGBE_VMOLR_VPE		0x00800000 /* VLAN promiscuous */
+#define IXGBE_VMOLR_AUPE        0x01000000 /* accept untagged packets */
+#define IXGBE_VMOLR_ROMPE       0x02000000 /* accept packets in MTA tbl */
+#define IXGBE_VMOLR_ROPE        0x04000000 /* accept packets in UC tbl */
+#define IXGBE_VMOLR_BAM         0x08000000 /* accept broadcast packets */
+#define IXGBE_VMOLR_MPE         0x10000000 /* multicast promiscuous */
+
+/* VFRE bitmask */
+#define IXGBE_VFRE_ENABLE_ALL   0xFFFFFFFF
+
+#define IXGBE_VF_INIT_TIMEOUT   200 /* Number of retries to clear RSTI */
+
+/* RDHMPN and TDHMPN bitmasks */
+#define IXGBE_RDHMPN_RDICADDR       0x007FF800
+#define IXGBE_RDHMPN_RDICRDREQ      0x00800000
+#define IXGBE_RDHMPN_RDICADDR_SHIFT 11
+#define IXGBE_TDHMPN_TDICADDR       0x003FF800
+#define IXGBE_TDHMPN_TDICRDREQ      0x00800000
+#define IXGBE_TDHMPN_TDICADDR_SHIFT 11
+
+#define IXGBE_RDMAM_MEM_SEL_SHIFT   13
+#define IXGBE_RDMAM_DWORD_SHIFT     9
+#define IXGBE_RDMAM_DESC_COMP_FIFO  1
+#define IXGBE_RDMAM_DFC_CMD_FIFO    2
+#define IXGBE_RDMAM_TCN_STATUS_RAM  4
+#define IXGBE_RDMAM_WB_COLL_FIFO    5
+#define IXGBE_RDMAM_QSC_CNT_RAM     6
+#define IXGBE_RDMAM_QSC_QUEUE_CNT   8
+#define IXGBE_RDMAM_QSC_QUEUE_RAM   0xA
+#define IXGBE_RDMAM_DESC_COM_FIFO_RANGE     135
+#define IXGBE_RDMAM_DESC_COM_FIFO_COUNT     4
+#define IXGBE_RDMAM_DFC_CMD_FIFO_RANGE      48
+#define IXGBE_RDMAM_DFC_CMD_FIFO_COUNT      7
+#define IXGBE_RDMAM_TCN_STATUS_RAM_RANGE    256
+#define IXGBE_RDMAM_TCN_STATUS_RAM_COUNT    9
+#define IXGBE_RDMAM_WB_COLL_FIFO_RANGE      8
+#define IXGBE_RDMAM_WB_COLL_FIFO_COUNT      4
+#define IXGBE_RDMAM_QSC_CNT_RAM_RANGE       64
+#define IXGBE_RDMAM_QSC_CNT_RAM_COUNT       4
+#define IXGBE_RDMAM_QSC_QUEUE_CNT_RANGE     32
+#define IXGBE_RDMAM_QSC_QUEUE_CNT_COUNT     4
+#define IXGBE_RDMAM_QSC_QUEUE_RAM_RANGE     128
+#define IXGBE_RDMAM_QSC_QUEUE_RAM_COUNT     8
+
+#define IXGBE_TXDESCIC_READY        0x80000000
+
+/* Receive Checksum Control */
+#define IXGBE_RXCSUM_IPPCSE     0x00001000   /* IP payload checksum enable */
+#define IXGBE_RXCSUM_PCSD       0x00002000   /* packet checksum disabled */
+
+/* FCRTL Bit Masks */
+#define IXGBE_FCRTL_XONE        0x80000000  /* XON enable */
+#define IXGBE_FCRTH_FCEN        0x80000000  /* Packet buffer fc enable */
+
+/* PAP bit masks*/
+#define IXGBE_PAP_TXPAUSECNT_MASK   0x0000FFFF /* Pause counter mask */
+
+/* RMCS Bit Masks */
+#define IXGBE_RMCS_RRM          0x00000002 /* Receive Recycle Mode enable */
+/* Receive Arbitration Control: 0 Round Robin, 1 DFP */
+#define IXGBE_RMCS_RAC          0x00000004
+#define IXGBE_RMCS_DFP          IXGBE_RMCS_RAC /* Deficit Fixed Priority ena */
+#define IXGBE_RMCS_TFCE_802_3X         0x00000008 /* Tx Priority FC ena */
+#define IXGBE_RMCS_TFCE_PRIORITY       0x00000010 /* Tx Priority FC ena */
+#define IXGBE_RMCS_ARBDIS       0x00000040 /* Arbitration disable bit */
+
+/* FCCFG Bit Masks */
+#define IXGBE_FCCFG_TFCE_802_3X         0x00000008 /* Tx link FC enable */
+#define IXGBE_FCCFG_TFCE_PRIORITY       0x00000010 /* Tx priority FC enable */
+
+/* Interrupt register bitmasks */
+
+/* Extended Interrupt Cause Read */
+#define IXGBE_EICR_RTX_QUEUE    0x0000FFFF /* RTx Queue Interrupt */
+#define IXGBE_EICR_FLOW_DIR     0x00010000 /* FDir Exception */
+#define IXGBE_EICR_RX_MISS      0x00020000 /* Packet Buffer Overrun */
+#define IXGBE_EICR_PCI          0x00040000 /* PCI Exception */
+#define IXGBE_EICR_MAILBOX      0x00080000 /* VF to PF Mailbox Interrupt */
+#define IXGBE_EICR_LSC          0x00100000 /* Link Status Change */
+#define IXGBE_EICR_LINKSEC      0x00200000 /* PN Threshold */
+#define IXGBE_EICR_MNG          0x00400000 /* Manageability Event Interrupt */
+#define IXGBE_EICR_TS           0x00800000 /* Thermal Sensor Event */
+#define IXGBE_EICR_TIMESYNC     0x01000000 /* Timesync Event */
+#define IXGBE_EICR_GPI_SDP0_8259X	0x01000000 /* Gen Purpose INT on SDP0 */
+#define IXGBE_EICR_GPI_SDP1_8259X	0x02000000 /* Gen Purpose INT on SDP1 */
+#define IXGBE_EICR_GPI_SDP2_8259X	0x04000000 /* Gen Purpose INT on SDP2 */
+#define IXGBE_EICR_GPI_SDP0_X540	0x02000000
+#define IXGBE_EICR_GPI_SDP1_X540	0x04000000
+#define IXGBE_EICR_GPI_SDP2_X540	0x08000000
+#define IXGBE_EICR_GPI_SDP0_X550	IXGBE_EICR_GPI_SDP0_X540
+#define IXGBE_EICR_GPI_SDP1_X550	IXGBE_EICR_GPI_SDP1_X540
+#define IXGBE_EICR_GPI_SDP2_X550	IXGBE_EICR_GPI_SDP2_X540
+#define IXGBE_EICR_GPI_SDP0_X550EM_x	IXGBE_EICR_GPI_SDP0_X540
+#define IXGBE_EICR_GPI_SDP1_X550EM_x	IXGBE_EICR_GPI_SDP1_X540
+#define IXGBE_EICR_GPI_SDP2_X550EM_x	IXGBE_EICR_GPI_SDP2_X540
+#define IXGBE_EICR_GPI_SDP0_X550EM_a	IXGBE_EICR_GPI_SDP0_X540
+#define IXGBE_EICR_GPI_SDP1_X550EM_a	IXGBE_EICR_GPI_SDP1_X540
+#define IXGBE_EICR_GPI_SDP2_X550EM_a	IXGBE_EICR_GPI_SDP2_X540
+#define IXGBE_EICR_GPI_SDP0(_hw)	IXGBE_BY_MAC((_hw), EICR_GPI_SDP0)
+#define IXGBE_EICR_GPI_SDP1(_hw)	IXGBE_BY_MAC((_hw), EICR_GPI_SDP1)
+#define IXGBE_EICR_GPI_SDP2(_hw)	IXGBE_BY_MAC((_hw), EICR_GPI_SDP2)
+
+#define IXGBE_EICR_ECC          0x10000000 /* ECC Error */
+#define IXGBE_EICR_PBUR         0x10000000 /* Packet Buffer Handler Error */
+#define IXGBE_EICR_DHER         0x20000000 /* Descriptor Handler Error */
+#define IXGBE_EICR_TCP_TIMER    0x40000000 /* TCP Timer */
+#define IXGBE_EICR_OTHER        0x80000000 /* Interrupt Cause Active */
+
+/* Extended Interrupt Cause Set */
+#define IXGBE_EICS_RTX_QUEUE    IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EICS_FLOW_DIR     IXGBE_EICR_FLOW_DIR  /* FDir Exception */
+#define IXGBE_EICS_RX_MISS      IXGBE_EICR_RX_MISS   /* Pkt Buffer Overrun */
+#define IXGBE_EICS_PCI          IXGBE_EICR_PCI       /* PCI Exception */
+#define IXGBE_EICS_MAILBOX      IXGBE_EICR_MAILBOX   /* VF to PF Mailbox Int */
+#define IXGBE_EICS_LSC          IXGBE_EICR_LSC       /* Link Status Change */
+#define IXGBE_EICS_MNG          IXGBE_EICR_MNG       /* MNG Event Interrupt */
+#define IXGBE_EICS_TIMESYNC     IXGBE_EICR_TIMESYNC  /* Timesync Event */
+#define IXGBE_EICS_GPI_SDP0(_hw)	IXGBE_EICR_GPI_SDP0(_hw)
+#define IXGBE_EICS_GPI_SDP1(_hw)	IXGBE_EICR_GPI_SDP1(_hw)
+#define IXGBE_EICS_GPI_SDP2(_hw)	IXGBE_EICR_GPI_SDP2(_hw)
+#define IXGBE_EICS_ECC          IXGBE_EICR_ECC       /* ECC Error */
+#define IXGBE_EICS_PBUR         IXGBE_EICR_PBUR      /* Pkt Buf Handler Err */
+#define IXGBE_EICS_DHER         IXGBE_EICR_DHER      /* Desc Handler Error */
+#define IXGBE_EICS_TCP_TIMER    IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EICS_OTHER        IXGBE_EICR_OTHER     /* INT Cause Active */
+
+/* Extended Interrupt Mask Set */
+#define IXGBE_EIMS_RTX_QUEUE    IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EIMS_FLOW_DIR     IXGBE_EICR_FLOW_DIR  /* FDir Exception */
+#define IXGBE_EIMS_RX_MISS      IXGBE_EICR_RX_MISS   /* Packet Buffer Overrun */
+#define IXGBE_EIMS_PCI          IXGBE_EICR_PCI       /* PCI Exception */
+#define IXGBE_EIMS_MAILBOX      IXGBE_EICR_MAILBOX   /* VF to PF Mailbox Int */
+#define IXGBE_EIMS_LSC          IXGBE_EICR_LSC       /* Link Status Change */
+#define IXGBE_EIMS_MNG          IXGBE_EICR_MNG       /* MNG Event Interrupt */
+#define IXGBE_EIMS_TS           IXGBE_EICR_TS        /* Thermel Sensor Event */
+#define IXGBE_EIMS_TIMESYNC     IXGBE_EICR_TIMESYNC  /* Timesync Event */
+#define IXGBE_EIMS_GPI_SDP0(_hw)	IXGBE_EICR_GPI_SDP0(_hw)
+#define IXGBE_EIMS_GPI_SDP1(_hw)	IXGBE_EICR_GPI_SDP1(_hw)
+#define IXGBE_EIMS_GPI_SDP2(_hw)	IXGBE_EICR_GPI_SDP2(_hw)
+#define IXGBE_EIMS_ECC          IXGBE_EICR_ECC       /* ECC Error */
+#define IXGBE_EIMS_PBUR         IXGBE_EICR_PBUR      /* Pkt Buf Handler Err */
+#define IXGBE_EIMS_DHER         IXGBE_EICR_DHER      /* Descr Handler Error */
+#define IXGBE_EIMS_TCP_TIMER    IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EIMS_OTHER        IXGBE_EICR_OTHER     /* INT Cause Active */
+
+/* Extended Interrupt Mask Clear */
+#define IXGBE_EIMC_RTX_QUEUE    IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */
+#define IXGBE_EIMC_FLOW_DIR     IXGBE_EICR_FLOW_DIR  /* FDir Exception */
+#define IXGBE_EIMC_RX_MISS      IXGBE_EICR_RX_MISS   /* Packet Buffer Overrun */
+#define IXGBE_EIMC_PCI          IXGBE_EICR_PCI       /* PCI Exception */
+#define IXGBE_EIMC_MAILBOX      IXGBE_EICR_MAILBOX   /* VF to PF Mailbox Int */
+#define IXGBE_EIMC_LSC          IXGBE_EICR_LSC       /* Link Status Change */
+#define IXGBE_EIMC_MNG          IXGBE_EICR_MNG       /* MNG Event Interrupt */
+#define IXGBE_EIMC_TIMESYNC     IXGBE_EICR_TIMESYNC  /* Timesync Event */
+#define IXGBE_EIMC_GPI_SDP0(_hw)	IXGBE_EICR_GPI_SDP0(_hw)
+#define IXGBE_EIMC_GPI_SDP1(_hw)	IXGBE_EICR_GPI_SDP1(_hw)
+#define IXGBE_EIMC_GPI_SDP2(_hw)	IXGBE_EICR_GPI_SDP2(_hw)
+#define IXGBE_EIMC_ECC          IXGBE_EICR_ECC       /* ECC Error */
+#define IXGBE_EIMC_PBUR         IXGBE_EICR_PBUR      /* Pkt Buf Handler Err */
+#define IXGBE_EIMC_DHER         IXGBE_EICR_DHER      /* Desc Handler Err */
+#define IXGBE_EIMC_TCP_TIMER    IXGBE_EICR_TCP_TIMER /* TCP Timer */
+#define IXGBE_EIMC_OTHER        IXGBE_EICR_OTHER     /* INT Cause Active */
+
+#define IXGBE_EIMS_ENABLE_MASK ( \
+				IXGBE_EIMS_RTX_QUEUE       | \
+				IXGBE_EIMS_LSC             | \
+				IXGBE_EIMS_TCP_TIMER       | \
+				IXGBE_EIMS_OTHER)
+
+/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */
+#define IXGBE_IMIR_PORT_IM_EN     0x00010000  /* TCP port enable */
+#define IXGBE_IMIR_PORT_BP        0x00020000  /* TCP port check bypass */
+#define IXGBE_IMIREXT_SIZE_BP     0x00001000  /* Packet size bypass */
+#define IXGBE_IMIREXT_CTRL_URG    0x00002000  /* Check URG bit in header */
+#define IXGBE_IMIREXT_CTRL_ACK    0x00004000  /* Check ACK bit in header */
+#define IXGBE_IMIREXT_CTRL_PSH    0x00008000  /* Check PSH bit in header */
+#define IXGBE_IMIREXT_CTRL_RST    0x00010000  /* Check RST bit in header */
+#define IXGBE_IMIREXT_CTRL_SYN    0x00020000  /* Check SYN bit in header */
+#define IXGBE_IMIREXT_CTRL_FIN    0x00040000  /* Check FIN bit in header */
+#define IXGBE_IMIREXT_CTRL_BP     0x00080000  /* Bypass check of control bits */
+#define IXGBE_IMIR_SIZE_BP_82599  0x00001000 /* Packet size bypass */
+#define IXGBE_IMIR_CTRL_URG_82599 0x00002000 /* Check URG bit in header */
+#define IXGBE_IMIR_CTRL_ACK_82599 0x00004000 /* Check ACK bit in header */
+#define IXGBE_IMIR_CTRL_PSH_82599 0x00008000 /* Check PSH bit in header */
+#define IXGBE_IMIR_CTRL_RST_82599 0x00010000 /* Check RST bit in header */
+#define IXGBE_IMIR_CTRL_SYN_82599 0x00020000 /* Check SYN bit in header */
+#define IXGBE_IMIR_CTRL_FIN_82599 0x00040000 /* Check FIN bit in header */
+#define IXGBE_IMIR_CTRL_BP_82599  0x00080000 /* Bypass check of control bits */
+#define IXGBE_IMIR_LLI_EN_82599   0x00100000 /* Enables low latency Int */
+#define IXGBE_IMIR_RX_QUEUE_MASK_82599  0x0000007F /* Rx Queue Mask */
+#define IXGBE_IMIR_RX_QUEUE_SHIFT_82599 21 /* Rx Queue Shift */
+#define IXGBE_IMIRVP_PRIORITY_MASK      0x00000007 /* VLAN priority mask */
+#define IXGBE_IMIRVP_PRIORITY_EN        0x00000008 /* VLAN priority enable */
+
+#define IXGBE_MAX_FTQF_FILTERS          128
+#define IXGBE_FTQF_PROTOCOL_MASK        0x00000003
+#define IXGBE_FTQF_PROTOCOL_TCP         0x00000000
+#define IXGBE_FTQF_PROTOCOL_UDP         0x00000001
+#define IXGBE_FTQF_PROTOCOL_SCTP        2
+#define IXGBE_FTQF_PRIORITY_MASK        0x00000007
+#define IXGBE_FTQF_PRIORITY_SHIFT       2
+#define IXGBE_FTQF_POOL_MASK            0x0000003F
+#define IXGBE_FTQF_POOL_SHIFT           8
+#define IXGBE_FTQF_5TUPLE_MASK_MASK     0x0000001F
+#define IXGBE_FTQF_5TUPLE_MASK_SHIFT    25
+#define IXGBE_FTQF_SOURCE_ADDR_MASK     0x1E
+#define IXGBE_FTQF_DEST_ADDR_MASK       0x1D
+#define IXGBE_FTQF_SOURCE_PORT_MASK     0x1B
+#define IXGBE_FTQF_DEST_PORT_MASK       0x17
+#define IXGBE_FTQF_PROTOCOL_COMP_MASK   0x0F
+#define IXGBE_FTQF_POOL_MASK_EN         0x40000000
+#define IXGBE_FTQF_QUEUE_ENABLE         0x80000000
+
+/* Interrupt clear mask */
+#define IXGBE_IRQ_CLEAR_MASK    0xFFFFFFFF
+
+/* Interrupt Vector Allocation Registers */
+#define IXGBE_IVAR_REG_NUM      25
+#define IXGBE_IVAR_REG_NUM_82599       64
+#define IXGBE_IVAR_TXRX_ENTRY   96
+#define IXGBE_IVAR_RX_ENTRY     64
+#define IXGBE_IVAR_RX_QUEUE(_i)    (0 + (_i))
+#define IXGBE_IVAR_TX_QUEUE(_i)    (64 + (_i))
+#define IXGBE_IVAR_TX_ENTRY     32
+
+#define IXGBE_IVAR_TCP_TIMER_INDEX       96 /* 0 based index */
+#define IXGBE_IVAR_OTHER_CAUSES_INDEX    97 /* 0 based index */
+
+#define IXGBE_MSIX_VECTOR(_i)   (0 + (_i))
+
+#define IXGBE_IVAR_ALLOC_VAL    0x80 /* Interrupt Allocation valid */
+
+/* ETYPE Queue Filter/Select Bit Masks */
+#define IXGBE_MAX_ETQF_FILTERS  8
+#define IXGBE_ETQF_FCOE         0x08000000 /* bit 27 */
+#define IXGBE_ETQF_BCN          0x10000000 /* bit 28 */
+#define IXGBE_ETQF_TX_ANTISPOOF	0x20000000 /* bit 29 */
+#define IXGBE_ETQF_1588         0x40000000 /* bit 30 */
+#define IXGBE_ETQF_FILTER_EN    0x80000000 /* bit 31 */
+#define IXGBE_ETQF_POOL_ENABLE   BIT(26) /* bit 26 */
+#define IXGBE_ETQF_POOL_SHIFT		20
+
+#define IXGBE_ETQS_RX_QUEUE     0x007F0000 /* bits 22:16 */
+#define IXGBE_ETQS_RX_QUEUE_SHIFT       16
+#define IXGBE_ETQS_LLI          0x20000000 /* bit 29 */
+#define IXGBE_ETQS_QUEUE_EN     0x80000000 /* bit 31 */
+
+/*
+ * ETQF filter list: one static filter per filter consumer. This is
+ *                   to avoid filter collisions later. Add new filters
+ *                   here!!
+ *
+ * Current filters:
+ *    EAPOL 802.1x (0x888e): Filter 0
+ *    FCoE (0x8906):         Filter 2
+ *    1588 (0x88f7):         Filter 3
+ *    FIP  (0x8914):         Filter 4
+ *    LLDP (0x88CC):         Filter 5
+ *    LACP (0x8809):         Filter 6
+ *    FC   (0x8808):         Filter 7
+ */
+#define IXGBE_ETQF_FILTER_EAPOL          0
+#define IXGBE_ETQF_FILTER_FCOE           2
+#define IXGBE_ETQF_FILTER_1588           3
+#define IXGBE_ETQF_FILTER_FIP            4
+#define IXGBE_ETQF_FILTER_LLDP		 5
+#define IXGBE_ETQF_FILTER_LACP		 6
+#define IXGBE_ETQF_FILTER_FC		 7
+
+/* VLAN Control Bit Masks */
+#define IXGBE_VLNCTRL_VET       0x0000FFFF  /* bits 0-15 */
+#define IXGBE_VLNCTRL_CFI       0x10000000  /* bit 28 */
+#define IXGBE_VLNCTRL_CFIEN     0x20000000  /* bit 29 */
+#define IXGBE_VLNCTRL_VFE       0x40000000  /* bit 30 */
+#define IXGBE_VLNCTRL_VME       0x80000000  /* bit 31 */
+
+/* VLAN pool filtering masks */
+#define IXGBE_VLVF_VIEN         0x80000000  /* filter is valid */
+#define IXGBE_VLVF_ENTRIES      64
+#define IXGBE_VLVF_VLANID_MASK  0x00000FFF
+
+/* Per VF Port VLAN insertion rules */
+#define IXGBE_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */
+#define IXGBE_VMVIR_VLANA_NEVER   0x80000000 /* Never insert VLAN tag */
+
+#define IXGBE_ETHERNET_IEEE_VLAN_TYPE 0x8100  /* 802.1q protocol */
+
+/* STATUS Bit Masks */
+#define IXGBE_STATUS_LAN_ID         0x0000000C /* LAN ID */
+#define IXGBE_STATUS_LAN_ID_SHIFT   2          /* LAN ID Shift*/
+#define IXGBE_STATUS_GIO            0x00080000 /* GIO Primary Enable Status */
+
+#define IXGBE_STATUS_LAN_ID_0   0x00000000 /* LAN ID 0 */
+#define IXGBE_STATUS_LAN_ID_1   0x00000004 /* LAN ID 1 */
+
+/* ESDP Bit Masks */
+#define IXGBE_ESDP_SDP0 0x00000001 /* SDP0 Data Value */
+#define IXGBE_ESDP_SDP1 0x00000002 /* SDP1 Data Value */
+#define IXGBE_ESDP_SDP2 0x00000004 /* SDP2 Data Value */
+#define IXGBE_ESDP_SDP3 0x00000008 /* SDP3 Data Value */
+#define IXGBE_ESDP_SDP4 0x00000010 /* SDP4 Data Value */
+#define IXGBE_ESDP_SDP5 0x00000020 /* SDP5 Data Value */
+#define IXGBE_ESDP_SDP6 0x00000040 /* SDP6 Data Value */
+#define IXGBE_ESDP_SDP0_DIR     0x00000100 /* SDP0 IO direction */
+#define IXGBE_ESDP_SDP1_DIR     0x00000200 /* SDP1 IO direction */
+#define IXGBE_ESDP_SDP4_DIR     0x00000004 /* SDP4 IO direction */
+#define IXGBE_ESDP_SDP5_DIR     0x00002000 /* SDP5 IO direction */
+#define IXGBE_ESDP_SDP0_NATIVE  0x00010000 /* SDP0 Native Function */
+#define IXGBE_ESDP_SDP1_NATIVE  0x00020000 /* SDP1 IO mode */
+
+/* LEDCTL Bit Masks */
+#define IXGBE_LED_IVRT_BASE      0x00000040
+#define IXGBE_LED_BLINK_BASE     0x00000080
+#define IXGBE_LED_MODE_MASK_BASE 0x0000000F
+#define IXGBE_LED_OFFSET(_base, _i) (_base << (8 * (_i)))
+#define IXGBE_LED_MODE_SHIFT(_i) (8 * (_i))
+#define IXGBE_LED_IVRT(_i)       IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i)
+#define IXGBE_LED_BLINK(_i)      IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i)
+#define IXGBE_LED_MODE_MASK(_i)  IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i)
+#define IXGBE_X557_LED_MANUAL_SET_MASK	BIT(8)
+#define IXGBE_X557_MAX_LED_INDEX	3
+#define IXGBE_X557_LED_PROVISIONING	0xC430
+
+/* LED modes */
+#define IXGBE_LED_LINK_UP       0x0
+#define IXGBE_LED_LINK_10G      0x1
+#define IXGBE_LED_MAC           0x2
+#define IXGBE_LED_FILTER        0x3
+#define IXGBE_LED_LINK_ACTIVE   0x4
+#define IXGBE_LED_LINK_1G       0x5
+#define IXGBE_LED_ON            0xE
+#define IXGBE_LED_OFF           0xF
+
+/* AUTOC Bit Masks */
+#define IXGBE_AUTOC_KX4_KX_SUPP_MASK 0xC0000000
+#define IXGBE_AUTOC_KX4_SUPP    0x80000000
+#define IXGBE_AUTOC_KX_SUPP     0x40000000
+#define IXGBE_AUTOC_PAUSE       0x30000000
+#define IXGBE_AUTOC_ASM_PAUSE   0x20000000
+#define IXGBE_AUTOC_SYM_PAUSE   0x10000000
+#define IXGBE_AUTOC_RF          0x08000000
+#define IXGBE_AUTOC_PD_TMR      0x06000000
+#define IXGBE_AUTOC_AN_RX_LOOSE 0x01000000
+#define IXGBE_AUTOC_AN_RX_DRIFT 0x00800000
+#define IXGBE_AUTOC_AN_RX_ALIGN 0x007C0000
+#define IXGBE_AUTOC_FECA        0x00040000
+#define IXGBE_AUTOC_FECR        0x00020000
+#define IXGBE_AUTOC_KR_SUPP     0x00010000
+#define IXGBE_AUTOC_AN_RESTART  0x00001000
+#define IXGBE_AUTOC_FLU         0x00000001
+#define IXGBE_AUTOC_LMS_SHIFT   13
+#define IXGBE_AUTOC_LMS_10G_SERIAL      (0x3 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR       (0x4 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_SGMII_1G_100M   (0x5 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII (0x7 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_MASK            (0x7 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_1G_LINK_NO_AN   (0x0 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_10G_LINK_NO_AN  (0x1 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_1G_AN           (0x2 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_AN          (0x4 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_KX4_AN_1G_AN    (0x6 << IXGBE_AUTOC_LMS_SHIFT)
+#define IXGBE_AUTOC_LMS_ATTACH_TYPE     (0x7 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+
+#define IXGBE_AUTOC_1G_PMA_PMD_MASK    0x00000200
+#define IXGBE_AUTOC_1G_PMA_PMD_SHIFT   9
+#define IXGBE_AUTOC_10G_PMA_PMD_MASK   0x00000180
+#define IXGBE_AUTOC_10G_PMA_PMD_SHIFT  7
+#define IXGBE_AUTOC_10G_XAUI   (0u << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_10G_KX4    (1u << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_10G_CX4    (2u << IXGBE_AUTOC_10G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_BX      (0u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_KX      (1u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_SFI     (0u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC_1G_KX_BX   (1u << IXGBE_AUTOC_1G_PMA_PMD_SHIFT)
+
+#define IXGBE_AUTOC2_UPPER_MASK  0xFFFF0000
+#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK  0x00030000
+#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT 16
+#define IXGBE_AUTOC2_10G_KR  (0u << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_10G_XFI (1u << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_10G_SFI (2u << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT)
+#define IXGBE_AUTOC2_LINK_DISABLE_ON_D3_MASK  0x50000000
+#define IXGBE_AUTOC2_LINK_DISABLE_MASK        0x70000000
+
+#define IXGBE_MACC_FLU       0x00000001
+#define IXGBE_MACC_FSV_10G   0x00030000
+#define IXGBE_MACC_FS        0x00040000
+#define IXGBE_MAC_RX2TX_LPBK 0x00000002
+
+/* Veto Bit definition */
+#define IXGBE_MMNGC_MNG_VETO  0x00000001
+
+/* LINKS Bit Masks */
+#define IXGBE_LINKS_KX_AN_COMP  0x80000000
+#define IXGBE_LINKS_UP          0x40000000
+#define IXGBE_LINKS_SPEED       0x20000000
+#define IXGBE_LINKS_MODE        0x18000000
+#define IXGBE_LINKS_RX_MODE     0x06000000
+#define IXGBE_LINKS_TX_MODE     0x01800000
+#define IXGBE_LINKS_XGXS_EN     0x00400000
+#define IXGBE_LINKS_SGMII_EN    0x02000000
+#define IXGBE_LINKS_PCS_1G_EN   0x00200000
+#define IXGBE_LINKS_1G_AN_EN    0x00100000
+#define IXGBE_LINKS_KX_AN_IDLE  0x00080000
+#define IXGBE_LINKS_1G_SYNC     0x00040000
+#define IXGBE_LINKS_10G_ALIGN   0x00020000
+#define IXGBE_LINKS_10G_LANE_SYNC 0x00017000
+#define IXGBE_LINKS_TL_FAULT    0x00001000
+#define IXGBE_LINKS_SIGNAL      0x00000F00
+
+#define IXGBE_LINKS_SPEED_NON_STD   0x08000000
+#define IXGBE_LINKS_SPEED_82599     0x30000000
+#define IXGBE_LINKS_SPEED_10G_82599 0x30000000
+#define IXGBE_LINKS_SPEED_1G_82599  0x20000000
+#define IXGBE_LINKS_SPEED_100_82599 0x10000000
+#define IXGBE_LINKS_SPEED_10_X550EM_A 0
+#define IXGBE_LINK_UP_TIME      90 /* 9.0 Seconds */
+#define IXGBE_AUTO_NEG_TIME     45 /* 4.5 Seconds */
+
+#define IXGBE_LINKS2_AN_SUPPORTED   0x00000040
+
+/* PCS1GLSTA Bit Masks */
+#define IXGBE_PCS1GLSTA_LINK_OK         1
+#define IXGBE_PCS1GLSTA_SYNK_OK         0x10
+#define IXGBE_PCS1GLSTA_AN_COMPLETE     0x10000
+#define IXGBE_PCS1GLSTA_AN_PAGE_RX      0x20000
+#define IXGBE_PCS1GLSTA_AN_TIMED_OUT    0x40000
+#define IXGBE_PCS1GLSTA_AN_REMOTE_FAULT 0x80000
+#define IXGBE_PCS1GLSTA_AN_ERROR_RWS    0x100000
+
+#define IXGBE_PCS1GANA_SYM_PAUSE        0x80
+#define IXGBE_PCS1GANA_ASM_PAUSE        0x100
+
+/* PCS1GLCTL Bit Masks */
+#define IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN  0x00040000 /* PCS 1G autoneg to en */
+#define IXGBE_PCS1GLCTL_FLV_LINK_UP     1
+#define IXGBE_PCS1GLCTL_FORCE_LINK      0x20
+#define IXGBE_PCS1GLCTL_LOW_LINK_LATCH  0x40
+#define IXGBE_PCS1GLCTL_AN_ENABLE       0x10000
+#define IXGBE_PCS1GLCTL_AN_RESTART      0x20000
+
+/* ANLP1 Bit Masks */
+#define IXGBE_ANLP1_PAUSE               0x0C00
+#define IXGBE_ANLP1_SYM_PAUSE           0x0400
+#define IXGBE_ANLP1_ASM_PAUSE           0x0800
+#define IXGBE_ANLP1_AN_STATE_MASK       0x000f0000
+
+/* SW Semaphore Register bitmasks */
+#define IXGBE_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */
+#define IXGBE_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */
+#define IXGBE_SWSM_WMNG 0x00000004 /* Wake MNG Clock */
+#define IXGBE_SWFW_REGSMP 0x80000000 /* Register Semaphore bit 31 */
+
+/* SW_FW_SYNC/GSSR definitions */
+#define IXGBE_GSSR_EEP_SM		0x0001
+#define IXGBE_GSSR_PHY0_SM		0x0002
+#define IXGBE_GSSR_PHY1_SM		0x0004
+#define IXGBE_GSSR_MAC_CSR_SM		0x0008
+#define IXGBE_GSSR_FLASH_SM		0x0010
+#define IXGBE_GSSR_NVM_UPDATE_SM	0x0200
+#define IXGBE_GSSR_SW_MNG_SM		0x0400
+#define IXGBE_GSSR_TOKEN_SM	0x40000000 /* SW bit for shared access */
+#define IXGBE_GSSR_SHARED_I2C_SM	0x1806 /* Wait for both phys & I2Cs */
+#define IXGBE_GSSR_I2C_MASK		0x1800
+#define IXGBE_GSSR_NVM_PHY_MASK		0xF
+
+/* FW Status register bitmask */
+#define IXGBE_FWSTS_FWRI    0x00000200 /* Firmware Reset Indication */
+
+/* EEC Register */
+#define IXGBE_EEC_SK        0x00000001 /* EEPROM Clock */
+#define IXGBE_EEC_CS        0x00000002 /* EEPROM Chip Select */
+#define IXGBE_EEC_DI        0x00000004 /* EEPROM Data In */
+#define IXGBE_EEC_DO        0x00000008 /* EEPROM Data Out */
+#define IXGBE_EEC_FWE_MASK  0x00000030 /* FLASH Write Enable */
+#define IXGBE_EEC_FWE_DIS   0x00000010 /* Disable FLASH writes */
+#define IXGBE_EEC_FWE_EN    0x00000020 /* Enable FLASH writes */
+#define IXGBE_EEC_FWE_SHIFT 4
+#define IXGBE_EEC_REQ       0x00000040 /* EEPROM Access Request */
+#define IXGBE_EEC_GNT       0x00000080 /* EEPROM Access Grant */
+#define IXGBE_EEC_PRES      0x00000100 /* EEPROM Present */
+#define IXGBE_EEC_ARD       0x00000200 /* EEPROM Auto Read Done */
+#define IXGBE_EEC_FLUP      0x00800000 /* Flash update command */
+#define IXGBE_EEC_SEC1VAL   0x02000000 /* Sector 1 Valid */
+#define IXGBE_EEC_FLUDONE   0x04000000 /* Flash update done */
+/* EEPROM Addressing bits based on type (0-small, 1-large) */
+#define IXGBE_EEC_ADDR_SIZE 0x00000400
+#define IXGBE_EEC_SIZE      0x00007800 /* EEPROM Size */
+#define IXGBE_EERD_MAX_ADDR 0x00003FFF /* EERD alows 14 bits for addr. */
+
+#define IXGBE_EEC_SIZE_SHIFT          11
+#define IXGBE_EEPROM_WORD_SIZE_SHIFT  6
+#define IXGBE_EEPROM_OPCODE_BITS      8
+
+/* Part Number String Length */
+#define IXGBE_PBANUM_LENGTH 11
+
+/* Checksum and EEPROM pointers */
+#define IXGBE_PBANUM_PTR_GUARD		0xFAFA
+#define IXGBE_EEPROM_CHECKSUM		0x3F
+#define IXGBE_EEPROM_SUM		0xBABA
+#define IXGBE_EEPROM_CTRL_4		0x45
+#define IXGBE_EE_CTRL_4_INST_ID		0x10
+#define IXGBE_EE_CTRL_4_INST_ID_SHIFT	4
+#define IXGBE_PCIE_ANALOG_PTR		0x03
+#define IXGBE_ATLAS0_CONFIG_PTR		0x04
+#define IXGBE_PHY_PTR			0x04
+#define IXGBE_ATLAS1_CONFIG_PTR		0x05
+#define IXGBE_OPTION_ROM_PTR		0x05
+#define IXGBE_PCIE_GENERAL_PTR		0x06
+#define IXGBE_PCIE_CONFIG0_PTR		0x07
+#define IXGBE_PCIE_CONFIG1_PTR		0x08
+#define IXGBE_CORE0_PTR			0x09
+#define IXGBE_CORE1_PTR			0x0A
+#define IXGBE_MAC0_PTR			0x0B
+#define IXGBE_MAC1_PTR			0x0C
+#define IXGBE_CSR0_CONFIG_PTR		0x0D
+#define IXGBE_CSR1_CONFIG_PTR		0x0E
+#define IXGBE_PCIE_ANALOG_PTR_X550	0x02
+#define IXGBE_SHADOW_RAM_SIZE_X550	0x4000
+#define IXGBE_IXGBE_PCIE_GENERAL_SIZE	0x24
+#define IXGBE_PCIE_CONFIG_SIZE		0x08
+#define IXGBE_EEPROM_LAST_WORD		0x41
+#define IXGBE_FW_PTR			0x0F
+#define IXGBE_PBANUM0_PTR		0x15
+#define IXGBE_PBANUM1_PTR		0x16
+#define IXGBE_FREE_SPACE_PTR		0X3E
+
+/* External Thermal Sensor Config */
+#define IXGBE_ETS_CFG                   0x26
+#define IXGBE_ETS_LTHRES_DELTA_MASK     0x07C0
+#define IXGBE_ETS_LTHRES_DELTA_SHIFT    6
+#define IXGBE_ETS_TYPE_MASK             0x0038
+#define IXGBE_ETS_TYPE_SHIFT            3
+#define IXGBE_ETS_TYPE_EMC              0x000
+#define IXGBE_ETS_TYPE_EMC_SHIFTED      0x000
+#define IXGBE_ETS_NUM_SENSORS_MASK      0x0007
+#define IXGBE_ETS_DATA_LOC_MASK         0x3C00
+#define IXGBE_ETS_DATA_LOC_SHIFT        10
+#define IXGBE_ETS_DATA_INDEX_MASK       0x0300
+#define IXGBE_ETS_DATA_INDEX_SHIFT      8
+#define IXGBE_ETS_DATA_HTHRESH_MASK     0x00FF
+
+#define IXGBE_SAN_MAC_ADDR_PTR  0x28
+#define IXGBE_DEVICE_CAPS       0x2C
+#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11
+#define IXGBE_PCIE_MSIX_82599_CAPS  0x72
+#define IXGBE_MAX_MSIX_VECTORS_82599	0x40
+#define IXGBE_PCIE_MSIX_82598_CAPS  0x62
+#define IXGBE_MAX_MSIX_VECTORS_82598	0x13
+
+/* MSI-X capability fields masks */
+#define IXGBE_PCIE_MSIX_TBL_SZ_MASK     0x7FF
+
+/* Legacy EEPROM word offsets */
+#define IXGBE_ISCSI_BOOT_CAPS           0x0033
+#define IXGBE_ISCSI_SETUP_PORT_0        0x0030
+#define IXGBE_ISCSI_SETUP_PORT_1        0x0034
+
+/* EEPROM Commands - SPI */
+#define IXGBE_EEPROM_MAX_RETRY_SPI      5000 /* Max wait 5ms for RDY signal */
+#define IXGBE_EEPROM_STATUS_RDY_SPI     0x01
+#define IXGBE_EEPROM_READ_OPCODE_SPI    0x03  /* EEPROM read opcode */
+#define IXGBE_EEPROM_WRITE_OPCODE_SPI   0x02  /* EEPROM write opcode */
+#define IXGBE_EEPROM_A8_OPCODE_SPI      0x08  /* opcode bit-3 = addr bit-8 */
+#define IXGBE_EEPROM_WREN_OPCODE_SPI    0x06  /* EEPROM set Write Ena latch */
+/* EEPROM reset Write Enable latch */
+#define IXGBE_EEPROM_WRDI_OPCODE_SPI    0x04
+#define IXGBE_EEPROM_RDSR_OPCODE_SPI    0x05  /* EEPROM read Status reg */
+#define IXGBE_EEPROM_WRSR_OPCODE_SPI    0x01  /* EEPROM write Status reg */
+#define IXGBE_EEPROM_ERASE4K_OPCODE_SPI 0x20  /* EEPROM ERASE 4KB */
+#define IXGBE_EEPROM_ERASE64K_OPCODE_SPI  0xD8  /* EEPROM ERASE 64KB */
+#define IXGBE_EEPROM_ERASE256_OPCODE_SPI  0xDB  /* EEPROM ERASE 256B */
+
+/* EEPROM Read Register */
+#define IXGBE_EEPROM_RW_REG_DATA   16 /* data offset in EEPROM read reg */
+#define IXGBE_EEPROM_RW_REG_DONE   2  /* Offset to READ done bit */
+#define IXGBE_EEPROM_RW_REG_START  1  /* First bit to start operation */
+#define IXGBE_EEPROM_RW_ADDR_SHIFT 2  /* Shift to the address bits */
+#define IXGBE_NVM_POLL_WRITE       1  /* Flag for polling for write complete */
+#define IXGBE_NVM_POLL_READ        0  /* Flag for polling for read complete */
+
+#define NVM_INIT_CTRL_3			0x38
+#define NVM_INIT_CTRL_3_LPLU		0x8
+#define NVM_INIT_CTRL_3_D10GMP_PORT0	0x40
+#define NVM_INIT_CTRL_3_D10GMP_PORT1	0x100
+
+#define IXGBE_EEPROM_PAGE_SIZE_MAX       128
+#define IXGBE_EEPROM_RD_BUFFER_MAX_COUNT 512 /* EEPROM words # read in burst */
+#define IXGBE_EEPROM_WR_BUFFER_MAX_COUNT 256 /* EEPROM words # wr in burst */
+
+#define IXGBE_EEPROM_CTRL_2	1 /* EEPROM CTRL word 2 */
+#define IXGBE_EEPROM_CCD_BIT	2 /* EEPROM Core Clock Disable bit */
+
+#ifndef IXGBE_EEPROM_GRANT_ATTEMPTS
+#define IXGBE_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM # attempts to gain grant */
+#endif
+
+#ifndef IXGBE_EERD_EEWR_ATTEMPTS
+/* Number of 5 microseconds we wait for EERD read and
+ * EERW write to complete */
+#define IXGBE_EERD_EEWR_ATTEMPTS 100000
+#endif
+
+#ifndef IXGBE_FLUDONE_ATTEMPTS
+/* # attempts we wait for flush update to complete */
+#define IXGBE_FLUDONE_ATTEMPTS 20000
+#endif
+
+#define IXGBE_PCIE_CTRL2                 0x5   /* PCIe Control 2 Offset */
+#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE    0x8   /* Dummy Function Enable */
+#define IXGBE_PCIE_CTRL2_LAN_DISABLE     0x2   /* LAN PCI Disable */
+#define IXGBE_PCIE_CTRL2_DISABLE_SELECT  0x1   /* LAN Disable Select */
+
+#define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET  0x0
+#define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET  0x3
+#define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP  0x1
+#define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS  0x2
+#define IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR	BIT(7)
+#define IXGBE_FW_LESM_PARAMETERS_PTR     0x2
+#define IXGBE_FW_LESM_STATE_1            0x1
+#define IXGBE_FW_LESM_STATE_ENABLED      0x8000 /* LESM Enable bit */
+#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR   0x4
+#define IXGBE_FW_PATCH_VERSION_4         0x7
+#define IXGBE_FCOE_IBA_CAPS_BLK_PTR         0x33 /* iSCSI/FCOE block */
+#define IXGBE_FCOE_IBA_CAPS_FCOE            0x20 /* FCOE flags */
+#define IXGBE_ISCSI_FCOE_BLK_PTR            0x17 /* iSCSI/FCOE block */
+#define IXGBE_ISCSI_FCOE_FLAGS_OFFSET       0x0  /* FCOE flags */
+#define IXGBE_ISCSI_FCOE_FLAGS_ENABLE       0x1  /* FCOE flags enable bit */
+#define IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR      0x27 /* Alt. SAN MAC block */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET  0x0 /* Alt. SAN MAC capability */
+#define IXGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET 0x1 /* Alt. SAN MAC 0 offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET 0x4 /* Alt. SAN MAC 1 offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET  0x7 /* Alt. WWNN prefix offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET  0x8 /* Alt. WWPN prefix offset */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC  0x0 /* Alt. SAN MAC exists */
+#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN  0x1 /* Alt. WWN base exists */
+
+#define IXGBE_DEVICE_CAPS_WOL_PORT0_1  0x4 /* WoL supported on ports 0 & 1 */
+#define IXGBE_DEVICE_CAPS_WOL_PORT0    0x8 /* WoL supported on port 0 */
+#define IXGBE_DEVICE_CAPS_WOL_MASK     0xC /* Mask for WoL capabilities */
+
+/* PCI Bus Info */
+#define IXGBE_PCI_DEVICE_STATUS   0xAA
+#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING   0x0020
+#define IXGBE_PCI_LINK_STATUS     0xB2
+#define IXGBE_PCI_DEVICE_CONTROL2 0xC8
+#define IXGBE_PCI_LINK_WIDTH      0x3F0
+#define IXGBE_PCI_LINK_WIDTH_1    0x10
+#define IXGBE_PCI_LINK_WIDTH_2    0x20
+#define IXGBE_PCI_LINK_WIDTH_4    0x40
+#define IXGBE_PCI_LINK_WIDTH_8    0x80
+#define IXGBE_PCI_LINK_SPEED      0xF
+#define IXGBE_PCI_LINK_SPEED_2500 0x1
+#define IXGBE_PCI_LINK_SPEED_5000 0x2
+#define IXGBE_PCI_LINK_SPEED_8000 0x3
+#define IXGBE_PCI_HEADER_TYPE_REGISTER  0x0E
+#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80
+#define IXGBE_PCI_DEVICE_CONTROL2_16ms  0x0005
+
+#define IXGBE_PCIDEVCTRL2_TIMEO_MASK	0xf
+#define IXGBE_PCIDEVCTRL2_16_32ms_def	0x0
+#define IXGBE_PCIDEVCTRL2_50_100us	0x1
+#define IXGBE_PCIDEVCTRL2_1_2ms		0x2
+#define IXGBE_PCIDEVCTRL2_16_32ms	0x5
+#define IXGBE_PCIDEVCTRL2_65_130ms	0x6
+#define IXGBE_PCIDEVCTRL2_260_520ms	0x9
+#define IXGBE_PCIDEVCTRL2_1_2s		0xa
+#define IXGBE_PCIDEVCTRL2_4_8s		0xd
+#define IXGBE_PCIDEVCTRL2_17_34s	0xe
+
+/* Number of 100 microseconds we wait for PCI Express primary disable */
+#define IXGBE_PCI_PRIMARY_DISABLE_TIMEOUT	800
+
+/* RAH */
+#define IXGBE_RAH_VIND_MASK     0x003C0000
+#define IXGBE_RAH_VIND_SHIFT    18
+#define IXGBE_RAH_AV            0x80000000
+#define IXGBE_CLEAR_VMDQ_ALL    0xFFFFFFFF
+
+/* Header split receive */
+#define IXGBE_RFCTL_ISCSI_DIS       0x00000001
+#define IXGBE_RFCTL_ISCSI_DWC_MASK  0x0000003E
+#define IXGBE_RFCTL_ISCSI_DWC_SHIFT 1
+#define IXGBE_RFCTL_RSC_DIS		0x00000020
+#define IXGBE_RFCTL_NFSW_DIS        0x00000040
+#define IXGBE_RFCTL_NFSR_DIS        0x00000080
+#define IXGBE_RFCTL_NFS_VER_MASK    0x00000300
+#define IXGBE_RFCTL_NFS_VER_SHIFT   8
+#define IXGBE_RFCTL_NFS_VER_2       0
+#define IXGBE_RFCTL_NFS_VER_3       1
+#define IXGBE_RFCTL_NFS_VER_4       2
+#define IXGBE_RFCTL_IPV6_DIS        0x00000400
+#define IXGBE_RFCTL_IPV6_XSUM_DIS   0x00000800
+#define IXGBE_RFCTL_IPFRSP_DIS      0x00004000
+#define IXGBE_RFCTL_IPV6_EX_DIS     0x00010000
+#define IXGBE_RFCTL_NEW_IPV6_EXT_DIS 0x00020000
+
+/* Transmit Config masks */
+#define IXGBE_TXDCTL_ENABLE     0x02000000 /* Enable specific Tx Queue */
+#define IXGBE_TXDCTL_SWFLSH     0x04000000 /* Tx Desc. write-back flushing */
+#define IXGBE_TXDCTL_WTHRESH_SHIFT      16 /* shift to WTHRESH bits */
+/* Enable short packet padding to 64 bytes */
+#define IXGBE_TX_PAD_ENABLE     0x00000400
+#define IXGBE_JUMBO_FRAME_ENABLE 0x00000004  /* Allow jumbo frames */
+/* This allows for 16K packets + 4k for vlan */
+#define IXGBE_MAX_FRAME_SZ      0x40040000
+
+#define IXGBE_TDWBAL_HEAD_WB_ENABLE   0x1      /* Tx head write-back enable */
+#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE 0x2      /* Tx seq# write-back enable */
+
+/* Receive Config masks */
+#define IXGBE_RXCTRL_RXEN       0x00000001  /* Enable Receiver */
+#define IXGBE_RXCTRL_DMBYPS     0x00000002  /* Descriptor Monitor Bypass */
+#define IXGBE_RXDCTL_ENABLE     0x02000000  /* Enable specific Rx Queue */
+#define IXGBE_RXDCTL_SWFLSH     0x04000000  /* Rx Desc. write-back flushing */
+#define IXGBE_RXDCTL_RLPMLMASK  0x00003FFF  /* Only supported on the X540 */
+#define IXGBE_RXDCTL_RLPML_EN   0x00008000
+#define IXGBE_RXDCTL_VME        0x40000000  /* VLAN mode enable */
+
+#define IXGBE_TSAUXC_EN_CLK		0x00000004
+#define IXGBE_TSAUXC_SYNCLK		0x00000008
+#define IXGBE_TSAUXC_SDP0_INT		0x00000040
+#define IXGBE_TSAUXC_EN_TT0		0x00000001
+#define IXGBE_TSAUXC_EN_TT1		0x00000002
+#define IXGBE_TSAUXC_ST0		0x00000010
+#define IXGBE_TSAUXC_DISABLE_SYSTIME	0x80000000
+
+#define IXGBE_TSSDP_TS_SDP0_SEL_MASK	0x000000C0
+#define IXGBE_TSSDP_TS_SDP0_CLK0	0x00000080
+#define IXGBE_TSSDP_TS_SDP0_EN		0x00000100
+
+#define IXGBE_TSYNCTXCTL_VALID		0x00000001 /* Tx timestamp valid */
+#define IXGBE_TSYNCTXCTL_ENABLED	0x00000010 /* Tx timestamping enabled */
+
+#define IXGBE_TSYNCRXCTL_VALID		0x00000001 /* Rx timestamp valid */
+#define IXGBE_TSYNCRXCTL_TYPE_MASK	0x0000000E /* Rx type mask */
+#define IXGBE_TSYNCRXCTL_TYPE_L2_V2	0x00
+#define IXGBE_TSYNCRXCTL_TYPE_L4_V1	0x02
+#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2	0x04
+#define IXGBE_TSYNCRXCTL_TYPE_ALL	0x08
+#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2	0x0A
+#define IXGBE_TSYNCRXCTL_ENABLED	0x00000010 /* Rx Timestamping enabled */
+#define IXGBE_TSYNCRXCTL_TSIP_UT_EN	0x00800000 /* Rx Timestamp in Packet */
+
+#define IXGBE_TSIM_TXTS			0x00000002
+
+#define IXGBE_RXMTRL_V1_CTRLT_MASK	0x000000FF
+#define IXGBE_RXMTRL_V1_SYNC_MSG	0x00
+#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG	0x01
+#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG	0x02
+#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG	0x03
+#define IXGBE_RXMTRL_V1_MGMT_MSG	0x04
+
+#define IXGBE_RXMTRL_V2_MSGID_MASK		0x0000FF00
+#define IXGBE_RXMTRL_V2_SYNC_MSG		0x0000
+#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG		0x0100
+#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG		0x0200
+#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG		0x0300
+#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG		0x0800
+#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG		0x0900
+#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG	0x0A00
+#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG		0x0B00
+#define IXGBE_RXMTRL_V2_SIGNALING_MSG		0x0C00
+#define IXGBE_RXMTRL_V2_MGMT_MSG		0x0D00
+
+#define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */
+#define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/
+#define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */
+#define IXGBE_FCTRL_BAM 0x00000400 /* Broadcast Accept Mode */
+#define IXGBE_FCTRL_PMCF 0x00001000 /* Pass MAC Control Frames */
+#define IXGBE_FCTRL_DPF 0x00002000 /* Discard Pause Frame */
+/* Receive Priority Flow Control Enable */
+#define IXGBE_FCTRL_RPFCE 0x00004000
+#define IXGBE_FCTRL_RFCE 0x00008000 /* Receive Flow Control Ena */
+#define IXGBE_MFLCN_PMCF        0x00000001 /* Pass MAC Control Frames */
+#define IXGBE_MFLCN_DPF         0x00000002 /* Discard Pause Frame */
+#define IXGBE_MFLCN_RPFCE       0x00000004 /* Receive Priority FC Enable */
+#define IXGBE_MFLCN_RFCE        0x00000008 /* Receive FC Enable */
+#define IXGBE_MFLCN_RPFCE_MASK	0x00000FF4 /* Receive FC Mask */
+
+#define IXGBE_MFLCN_RPFCE_SHIFT		 4
+
+/* Multiple Receive Queue Control */
+#define IXGBE_MRQC_RSSEN                 0x00000001  /* RSS Enable */
+#define IXGBE_MRQC_MRQE_MASK                    0xF /* Bits 3:0 */
+#define IXGBE_MRQC_RT8TCEN               0x00000002 /* 8 TC no RSS */
+#define IXGBE_MRQC_RT4TCEN               0x00000003 /* 4 TC no RSS */
+#define IXGBE_MRQC_RTRSS8TCEN            0x00000004 /* 8 TC w/ RSS */
+#define IXGBE_MRQC_RTRSS4TCEN            0x00000005 /* 4 TC w/ RSS */
+#define IXGBE_MRQC_VMDQEN                0x00000008 /* VMDq2 64 pools no RSS */
+#define IXGBE_MRQC_VMDQRSS32EN           0x0000000A /* VMDq2 32 pools w/ RSS */
+#define IXGBE_MRQC_VMDQRSS64EN           0x0000000B /* VMDq2 64 pools w/ RSS */
+#define IXGBE_MRQC_VMDQRT8TCEN           0x0000000C /* VMDq2/RT 16 pool 8 TC */
+#define IXGBE_MRQC_VMDQRT4TCEN           0x0000000D /* VMDq2/RT 32 pool 4 TC */
+#define IXGBE_MRQC_RSS_FIELD_MASK        0xFFFF0000
+#define IXGBE_MRQC_RSS_FIELD_IPV4_TCP    0x00010000
+#define IXGBE_MRQC_RSS_FIELD_IPV4        0x00020000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP 0x00040000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX     0x00080000
+#define IXGBE_MRQC_RSS_FIELD_IPV6        0x00100000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_TCP    0x00200000
+#define IXGBE_MRQC_RSS_FIELD_IPV4_UDP    0x00400000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_UDP    0x00800000
+#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000
+#define IXGBE_MRQC_MULTIPLE_RSS          0x00002000
+#define IXGBE_MRQC_L3L4TXSWEN            0x00008000
+
+#define IXGBE_FWSM_TS_ENABLED	0x1
+
+/* Queue Drop Enable */
+#define IXGBE_QDE_ENABLE	0x00000001
+#define IXGBE_QDE_HIDE_VLAN	0x00000002
+#define IXGBE_QDE_IDX_MASK	0x00007F00
+#define IXGBE_QDE_IDX_SHIFT	8
+#define IXGBE_QDE_WRITE		0x00010000
+
+#define IXGBE_TXD_POPTS_IXSM 0x01       /* Insert IP checksum */
+#define IXGBE_TXD_POPTS_TXSM 0x02       /* Insert TCP/UDP checksum */
+#define IXGBE_TXD_CMD_EOP    0x01000000 /* End of Packet */
+#define IXGBE_TXD_CMD_IFCS   0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IXGBE_TXD_CMD_IC     0x04000000 /* Insert Checksum */
+#define IXGBE_TXD_CMD_RS     0x08000000 /* Report Status */
+#define IXGBE_TXD_CMD_DEXT   0x20000000 /* Descriptor extension (0 = legacy) */
+#define IXGBE_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
+#define IXGBE_TXD_STAT_DD    0x00000001 /* Descriptor Done */
+
+/* Multiple Transmit Queue Command Register */
+#define IXGBE_MTQC_RT_ENA       0x1 /* DCB Enable */
+#define IXGBE_MTQC_VT_ENA       0x2 /* VMDQ2 Enable */
+#define IXGBE_MTQC_64Q_1PB      0x0 /* 64 queues 1 pack buffer */
+#define IXGBE_MTQC_32VF         0x8 /* 4 TX Queues per pool w/32VF's */
+#define IXGBE_MTQC_64VF         0x4 /* 2 TX Queues per pool w/64VF's */
+#define IXGBE_MTQC_8TC_8TQ      0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */
+#define IXGBE_MTQC_4TC_4TQ	0x8 /* 4 TC if RT_ENA or 4 TQ if VT_ENA */
+
+/* Receive Descriptor bit definitions */
+#define IXGBE_RXD_STAT_DD       0x01    /* Descriptor Done */
+#define IXGBE_RXD_STAT_EOP      0x02    /* End of Packet */
+#define IXGBE_RXD_STAT_FLM      0x04    /* FDir Match */
+#define IXGBE_RXD_STAT_VP       0x08    /* IEEE VLAN Packet */
+#define IXGBE_RXDADV_NEXTP_MASK   0x000FFFF0 /* Next Descriptor Index */
+#define IXGBE_RXDADV_NEXTP_SHIFT  0x00000004
+#define IXGBE_RXD_STAT_UDPCS    0x10    /* UDP xsum calculated */
+#define IXGBE_RXD_STAT_L4CS     0x20    /* L4 xsum calculated */
+#define IXGBE_RXD_STAT_IPCS     0x40    /* IP xsum calculated */
+#define IXGBE_RXD_STAT_PIF      0x80    /* passed in-exact filter */
+#define IXGBE_RXD_STAT_CRCV     0x100   /* Speculative CRC Valid */
+#define IXGBE_RXD_STAT_OUTERIPCS  0x100 /* Cloud IP xsum calculated */
+#define IXGBE_RXD_STAT_VEXT     0x200   /* 1st VLAN found */
+#define IXGBE_RXD_STAT_UDPV     0x400   /* Valid UDP checksum */
+#define IXGBE_RXD_STAT_DYNINT   0x800   /* Pkt caused INT via DYNINT */
+#define IXGBE_RXD_STAT_LLINT    0x800   /* Pkt caused Low Latency Interrupt */
+#define IXGBE_RXD_STAT_TSIP     0x08000 /* Time Stamp in packet buffer */
+#define IXGBE_RXD_STAT_TS       0x10000 /* Time Stamp */
+#define IXGBE_RXD_STAT_SECP     0x20000 /* Security Processing */
+#define IXGBE_RXD_STAT_LB       0x40000 /* Loopback Status */
+#define IXGBE_RXD_STAT_ACK      0x8000  /* ACK Packet indication */
+#define IXGBE_RXD_ERR_CE        0x01    /* CRC Error */
+#define IXGBE_RXD_ERR_LE        0x02    /* Length Error */
+#define IXGBE_RXD_ERR_PE        0x08    /* Packet Error */
+#define IXGBE_RXD_ERR_OSE       0x10    /* Oversize Error */
+#define IXGBE_RXD_ERR_USE       0x20    /* Undersize Error */
+#define IXGBE_RXD_ERR_TCPE      0x40    /* TCP/UDP Checksum Error */
+#define IXGBE_RXD_ERR_IPE       0x80    /* IP Checksum Error */
+#define IXGBE_RXDADV_ERR_MASK           0xfff00000 /* RDESC.ERRORS mask */
+#define IXGBE_RXDADV_ERR_SHIFT          20         /* RDESC.ERRORS shift */
+#define IXGBE_RXDADV_ERR_OUTERIPER	0x04000000 /* CRC IP Header error */
+#define IXGBE_RXDADV_ERR_FCEOFE         0x80000000 /* FCoEFe/IPE */
+#define IXGBE_RXDADV_ERR_FCERR          0x00700000 /* FCERR/FDIRERR */
+#define IXGBE_RXDADV_ERR_FDIR_LEN       0x00100000 /* FDIR Length error */
+#define IXGBE_RXDADV_ERR_FDIR_DROP      0x00200000 /* FDIR Drop error */
+#define IXGBE_RXDADV_ERR_FDIR_COLL      0x00400000 /* FDIR Collision error */
+#define IXGBE_RXDADV_ERR_HBO    0x00800000 /*Header Buffer Overflow */
+#define IXGBE_RXDADV_ERR_CE     0x01000000 /* CRC Error */
+#define IXGBE_RXDADV_ERR_LE     0x02000000 /* Length Error */
+#define IXGBE_RXDADV_ERR_PE     0x08000000 /* Packet Error */
+#define IXGBE_RXDADV_ERR_OSE    0x10000000 /* Oversize Error */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL  0x08000000 /* overlap ERR_PE  */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH    0x10000000 /* overlap ERR_OSE */
+#define IXGBE_RXDADV_ERR_IPSEC_AUTH_FAILED   0x18000000
+#define IXGBE_RXDADV_ERR_USE    0x20000000 /* Undersize Error */
+#define IXGBE_RXDADV_ERR_TCPE   0x40000000 /* TCP/UDP Checksum Error */
+#define IXGBE_RXDADV_ERR_IPE    0x80000000 /* IP Checksum Error */
+#define IXGBE_RXD_VLAN_ID_MASK  0x0FFF  /* VLAN ID is in lower 12 bits */
+#define IXGBE_RXD_PRI_MASK      0xE000  /* Priority is in upper 3 bits */
+#define IXGBE_RXD_PRI_SHIFT     13
+#define IXGBE_RXD_CFI_MASK      0x1000  /* CFI is bit 12 */
+#define IXGBE_RXD_CFI_SHIFT     12
+
+#define IXGBE_RXDADV_STAT_DD            IXGBE_RXD_STAT_DD  /* Done */
+#define IXGBE_RXDADV_STAT_EOP           IXGBE_RXD_STAT_EOP /* End of Packet */
+#define IXGBE_RXDADV_STAT_FLM           IXGBE_RXD_STAT_FLM /* FDir Match */
+#define IXGBE_RXDADV_STAT_VP            IXGBE_RXD_STAT_VP  /* IEEE VLAN Pkt */
+#define IXGBE_RXDADV_STAT_MASK          0x000fffff /* Stat/NEXTP: bit 0-19 */
+#define IXGBE_RXDADV_STAT_FCEOFS        0x00000040 /* FCoE EOF/SOF Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT        0x00000030 /* FCoE Pkt Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH 0x00000000 /* 00: No Ctxt Match */
+#define IXGBE_RXDADV_STAT_FCSTAT_NODDP  0x00000010 /* 01: Ctxt w/o DDP */
+#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */
+#define IXGBE_RXDADV_STAT_FCSTAT_DDP    0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_TS		0x00010000 /* IEEE 1588 Time Stamp */
+#define IXGBE_RXDADV_STAT_SECP		0x00020000 /* IPsec/MACsec pkt found */
+
+/* PSRTYPE bit definitions */
+#define IXGBE_PSRTYPE_TCPHDR    0x00000010
+#define IXGBE_PSRTYPE_UDPHDR    0x00000020
+#define IXGBE_PSRTYPE_IPV4HDR   0x00000100
+#define IXGBE_PSRTYPE_IPV6HDR   0x00000200
+#define IXGBE_PSRTYPE_L2HDR     0x00001000
+
+/* SRRCTL bit definitions */
+#define IXGBE_SRRCTL_BSIZEPKT_SHIFT     10     /* so many KBs */
+#define IXGBE_SRRCTL_RDMTS_SHIFT        22
+#define IXGBE_SRRCTL_RDMTS_MASK         0x01C00000
+#define IXGBE_SRRCTL_DROP_EN            0x10000000
+#define IXGBE_SRRCTL_BSIZEPKT_MASK      0x0000007F
+#define IXGBE_SRRCTL_BSIZEHDR_MASK      0x00003F00
+#define IXGBE_SRRCTL_DESCTYPE_LEGACY    0x00000000
+#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT  0x04000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
+#define IXGBE_SRRCTL_DESCTYPE_MASK      0x0E000000
+
+#define IXGBE_RXDPS_HDRSTAT_HDRSP       0x00008000
+#define IXGBE_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF
+
+#define IXGBE_RXDADV_RSSTYPE_MASK       0x0000000F
+#define IXGBE_RXDADV_PKTTYPE_MASK       0x0000FFF0
+#define IXGBE_RXDADV_PKTTYPE_MASK_EX    0x0001FFF0
+#define IXGBE_RXDADV_HDRBUFLEN_MASK     0x00007FE0
+#define IXGBE_RXDADV_RSCCNT_MASK        0x001E0000
+#define IXGBE_RXDADV_RSCCNT_SHIFT       17
+#define IXGBE_RXDADV_HDRBUFLEN_SHIFT    5
+#define IXGBE_RXDADV_SPLITHEADER_EN     0x00001000
+#define IXGBE_RXDADV_SPH                0x8000
+
+/* RSS Hash results */
+#define IXGBE_RXDADV_RSSTYPE_NONE       0x00000000
+#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP   0x00000001
+#define IXGBE_RXDADV_RSSTYPE_IPV4       0x00000002
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP   0x00000003
+#define IXGBE_RXDADV_RSSTYPE_IPV6_EX    0x00000004
+#define IXGBE_RXDADV_RSSTYPE_IPV6       0x00000005
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
+#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP   0x00000007
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP   0x00000008
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
+
+/* RSS Packet Types as indicated in the receive descriptor. */
+#define IXGBE_RXDADV_PKTTYPE_NONE       0x00000000
+#define IXGBE_RXDADV_PKTTYPE_IPV4       0x00000010 /* IPv4 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV4_EX    0x00000020 /* IPv4 hdr + extensions */
+#define IXGBE_RXDADV_PKTTYPE_IPV6       0x00000040 /* IPv6 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV6_EX    0x00000080 /* IPv6 hdr + extensions */
+#define IXGBE_RXDADV_PKTTYPE_TCP        0x00000100 /* TCP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_UDP        0x00000200 /* UDP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_SCTP       0x00000400 /* SCTP hdr present */
+#define IXGBE_RXDADV_PKTTYPE_NFS        0x00000800 /* NFS hdr present */
+#define IXGBE_RXDADV_PKTTYPE_VXLAN	0x00000800 /* VXLAN hdr present */
+#define IXGBE_RXDADV_PKTTYPE_TUNNEL	0x00010000 /* Tunnel type */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP  0x00001000 /* IPSec ESP */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH   0x00002000 /* IPSec AH */
+#define IXGBE_RXDADV_PKTTYPE_LINKSEC    0x00004000 /* LinkSec Encap */
+#define IXGBE_RXDADV_PKTTYPE_ETQF       0x00008000 /* PKTTYPE is ETQF index */
+#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK  0x00000070 /* ETQF has 8 indices */
+#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4          /* Right-shift 4 bits */
+
+/* Masks to determine if packets should be dropped due to frame errors */
+#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
+				      IXGBE_RXD_ERR_CE | \
+				      IXGBE_RXD_ERR_LE | \
+				      IXGBE_RXD_ERR_PE | \
+				      IXGBE_RXD_ERR_OSE | \
+				      IXGBE_RXD_ERR_USE)
+
+#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \
+				      IXGBE_RXDADV_ERR_CE | \
+				      IXGBE_RXDADV_ERR_LE | \
+				      IXGBE_RXDADV_ERR_PE | \
+				      IXGBE_RXDADV_ERR_OSE | \
+				      IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL | \
+				      IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH | \
+				      IXGBE_RXDADV_ERR_USE)
+
+/* Multicast bit mask */
+#define IXGBE_MCSTCTRL_MFE      0x4
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE  8
+#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE  8
+#define IXGBE_REQ_TX_BUFFER_GRANULARITY   1024
+
+/* Vlan-specific macros */
+#define IXGBE_RX_DESC_SPECIAL_VLAN_MASK  0x0FFF /* VLAN ID in lower 12 bits */
+#define IXGBE_RX_DESC_SPECIAL_PRI_MASK   0xE000 /* Priority in upper 3 bits */
+#define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT  0x000D /* Priority in upper 3 of 16 */
+#define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT  IXGBE_RX_DESC_SPECIAL_PRI_SHIFT
+
+/* SR-IOV specific macros */
+#define IXGBE_MBVFICR_INDEX(vf_number)   (vf_number >> 4)
+#define IXGBE_MBVFICR(_i)		(0x00710 + ((_i) * 4))
+#define IXGBE_VFLRE(_i)		((((_i) & 1) ? 0x001C0 : 0x00600))
+#define IXGBE_VFLREC(_i)		(0x00700 + ((_i) * 4))
+/* Translated register #defines */
+#define IXGBE_PVFTDH(P)		(0x06010 + (0x40 * (P)))
+#define IXGBE_PVFTDT(P)		(0x06018 + (0x40 * (P)))
+#define IXGBE_PVFTXDCTL(P)	(0x06028 + (0x40 * (P)))
+#define IXGBE_PVFTDWBAL(P)	(0x06038 + (0x40 * (P)))
+#define IXGBE_PVFTDWBAH(P)	(0x0603C + (0x40 * (P)))
+#define IXGBE_PVFGPRC(x)	(0x0101C + (0x40 * (x)))
+#define IXGBE_PVFGPTC(x)	(0x08300 + (0x04 * (x)))
+#define IXGBE_PVFGORC_LSB(x)	(0x01020 + (0x40 * (x)))
+#define IXGBE_PVFGORC_MSB(x)	(0x0D020 + (0x40 * (x)))
+#define IXGBE_PVFGOTC_LSB(x)	(0x08400 + (0x08 * (x)))
+#define IXGBE_PVFGOTC_MSB(x)	(0x08404 + (0x08 * (x)))
+#define IXGBE_PVFMPRC(x)	(0x0D01C + (0x40 * (x)))
+
+#define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \
+		(IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index)))
+#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \
+		(IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index)))
+
+#define IXGBE_PVFTDHN(q_per_pool, vf_number, vf_q_index) \
+		(IXGBE_PVFTDH((q_per_pool)*(vf_number) + (vf_q_index)))
+#define IXGBE_PVFTDTN(q_per_pool, vf_number, vf_q_index) \
+		(IXGBE_PVFTDT((q_per_pool)*(vf_number) + (vf_q_index)))
+
+enum ixgbe_fdir_pballoc_type {
+	IXGBE_FDIR_PBALLOC_NONE = 0,
+	IXGBE_FDIR_PBALLOC_64K  = 1,
+	IXGBE_FDIR_PBALLOC_128K = 2,
+	IXGBE_FDIR_PBALLOC_256K = 3,
+};
+#define IXGBE_FDIR_PBALLOC_SIZE_SHIFT           16
+
+/* Flow Director register values */
+#define IXGBE_FDIRCTRL_PBALLOC_64K              0x00000001
+#define IXGBE_FDIRCTRL_PBALLOC_128K             0x00000002
+#define IXGBE_FDIRCTRL_PBALLOC_256K             0x00000003
+#define IXGBE_FDIRCTRL_INIT_DONE                0x00000008
+#define IXGBE_FDIRCTRL_PERFECT_MATCH            0x00000010
+#define IXGBE_FDIRCTRL_REPORT_STATUS            0x00000020
+#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS     0x00000080
+#define IXGBE_FDIRCTRL_DROP_Q_SHIFT             8
+#define IXGBE_FDIRCTRL_FLEX_SHIFT               16
+#define IXGBE_FDIRCTRL_DROP_NO_MATCH		0x00008000
+#define IXGBE_FDIRCTRL_FILTERMODE_SHIFT		21
+#define IXGBE_FDIRCTRL_FILTERMODE_MACVLAN	0x0001 /* bit 23:21, 001b */
+#define IXGBE_FDIRCTRL_FILTERMODE_CLOUD		0x0002 /* bit 23:21, 010b */
+#define IXGBE_FDIRCTRL_SEARCHLIM                0x00800000
+#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT         24
+#define IXGBE_FDIRCTRL_FULL_THRESH_MASK         0xF0000000
+#define IXGBE_FDIRCTRL_FULL_THRESH_SHIFT        28
+
+#define IXGBE_FDIRTCPM_DPORTM_SHIFT             16
+#define IXGBE_FDIRUDPM_DPORTM_SHIFT             16
+#define IXGBE_FDIRIP6M_DIPM_SHIFT               16
+#define IXGBE_FDIRM_VLANID                      0x00000001
+#define IXGBE_FDIRM_VLANP                       0x00000002
+#define IXGBE_FDIRM_POOL                        0x00000004
+#define IXGBE_FDIRM_L4P                         0x00000008
+#define IXGBE_FDIRM_FLEX                        0x00000010
+#define IXGBE_FDIRM_DIPv6                       0x00000020
+
+#define IXGBE_FDIRFREE_FREE_MASK                0xFFFF
+#define IXGBE_FDIRFREE_FREE_SHIFT               0
+#define IXGBE_FDIRFREE_COLL_MASK                0x7FFF0000
+#define IXGBE_FDIRFREE_COLL_SHIFT               16
+#define IXGBE_FDIRLEN_MAXLEN_MASK               0x3F
+#define IXGBE_FDIRLEN_MAXLEN_SHIFT              0
+#define IXGBE_FDIRLEN_MAXHASH_MASK              0x7FFF0000
+#define IXGBE_FDIRLEN_MAXHASH_SHIFT             16
+#define IXGBE_FDIRUSTAT_ADD_MASK                0xFFFF
+#define IXGBE_FDIRUSTAT_ADD_SHIFT               0
+#define IXGBE_FDIRUSTAT_REMOVE_MASK             0xFFFF0000
+#define IXGBE_FDIRUSTAT_REMOVE_SHIFT            16
+#define IXGBE_FDIRFSTAT_FADD_MASK               0x00FF
+#define IXGBE_FDIRFSTAT_FADD_SHIFT              0
+#define IXGBE_FDIRFSTAT_FREMOVE_MASK            0xFF00
+#define IXGBE_FDIRFSTAT_FREMOVE_SHIFT           8
+#define IXGBE_FDIRPORT_DESTINATION_SHIFT        16
+#define IXGBE_FDIRVLAN_FLEX_SHIFT               16
+#define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT       15
+#define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT       16
+
+#define IXGBE_FDIRCMD_CMD_MASK                  0x00000003
+#define IXGBE_FDIRCMD_CMD_ADD_FLOW              0x00000001
+#define IXGBE_FDIRCMD_CMD_REMOVE_FLOW           0x00000002
+#define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT        0x00000003
+#define IXGBE_FDIRCMD_FILTER_VALID              0x00000004
+#define IXGBE_FDIRCMD_FILTER_UPDATE             0x00000008
+#define IXGBE_FDIRCMD_IPv6DMATCH                0x00000010
+#define IXGBE_FDIRCMD_L4TYPE_UDP                0x00000020
+#define IXGBE_FDIRCMD_L4TYPE_TCP                0x00000040
+#define IXGBE_FDIRCMD_L4TYPE_SCTP               0x00000060
+#define IXGBE_FDIRCMD_IPV6                      0x00000080
+#define IXGBE_FDIRCMD_CLEARHT                   0x00000100
+#define IXGBE_FDIRCMD_DROP                      0x00000200
+#define IXGBE_FDIRCMD_INT                       0x00000400
+#define IXGBE_FDIRCMD_LAST                      0x00000800
+#define IXGBE_FDIRCMD_COLLISION                 0x00001000
+#define IXGBE_FDIRCMD_QUEUE_EN                  0x00008000
+#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT           5
+#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT            16
+#define IXGBE_FDIRCMD_RX_TUNNEL_FILTER_SHIFT	23
+#define IXGBE_FDIRCMD_VT_POOL_SHIFT             24
+#define IXGBE_FDIR_INIT_DONE_POLL               10
+#define IXGBE_FDIRCMD_CMD_POLL                  10
+#define IXGBE_FDIRCMD_TUNNEL_FILTER		0x00800000
+
+#define IXGBE_FDIR_DROP_QUEUE                   127
+
+/* Manageablility Host Interface defines */
+#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH	1792 /* Num of bytes in range */
+#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH	448 /* Num of dwords in range */
+#define IXGBE_HI_COMMAND_TIMEOUT	500 /* Process HI command limit */
+#define IXGBE_HI_FLASH_ERASE_TIMEOUT	1000 /* Process Erase command limit */
+#define IXGBE_HI_FLASH_UPDATE_TIMEOUT	5000 /* Process Update command limit */
+#define IXGBE_HI_FLASH_APPLY_TIMEOUT	0 /* Process Apply command limit */
+
+/* CEM Support */
+#define FW_CEM_HDR_LEN			0x4
+#define FW_CEM_CMD_DRIVER_INFO		0xDD
+#define FW_CEM_CMD_DRIVER_INFO_LEN	0x5
+#define FW_CEM_CMD_RESERVED		0x0
+#define FW_CEM_UNUSED_VER		0x0
+#define FW_CEM_MAX_RETRIES		3
+#define FW_CEM_RESP_STATUS_SUCCESS	0x1
+#define FW_CEM_DRIVER_VERSION_SIZE	39 /* +9 would send 48 bytes to fw */
+#define FW_READ_SHADOW_RAM_CMD		0x31
+#define FW_READ_SHADOW_RAM_LEN		0x6
+#define FW_WRITE_SHADOW_RAM_CMD		0x33
+#define FW_WRITE_SHADOW_RAM_LEN		0xA /* 8 plus 1 WORD to write */
+#define FW_SHADOW_RAM_DUMP_CMD		0x36
+#define FW_SHADOW_RAM_DUMP_LEN		0
+#define FW_DEFAULT_CHECKSUM		0xFF /* checksum always 0xFF */
+#define FW_NVM_DATA_OFFSET		3
+#define FW_MAX_READ_BUFFER_SIZE		1024
+#define FW_DISABLE_RXEN_CMD		0xDE
+#define FW_DISABLE_RXEN_LEN		0x1
+#define FW_PHY_MGMT_REQ_CMD		0x20
+#define FW_PHY_TOKEN_REQ_CMD		0x0A
+#define FW_PHY_TOKEN_REQ_LEN		2
+#define FW_PHY_TOKEN_REQ		0
+#define FW_PHY_TOKEN_REL		1
+#define FW_PHY_TOKEN_OK			1
+#define FW_PHY_TOKEN_RETRY		0x80
+#define FW_PHY_TOKEN_DELAY		5	/* milliseconds */
+#define FW_PHY_TOKEN_WAIT		5	/* seconds */
+#define FW_PHY_TOKEN_RETRIES ((FW_PHY_TOKEN_WAIT * 1000) / FW_PHY_TOKEN_DELAY)
+#define FW_INT_PHY_REQ_CMD		0xB
+#define FW_INT_PHY_REQ_LEN		10
+#define FW_INT_PHY_REQ_READ		0
+#define FW_INT_PHY_REQ_WRITE		1
+#define FW_PHY_ACT_REQ_CMD		5
+#define FW_PHY_ACT_DATA_COUNT		4
+#define FW_PHY_ACT_REQ_LEN		(4 + 4 * FW_PHY_ACT_DATA_COUNT)
+#define FW_PHY_ACT_INIT_PHY		1
+#define FW_PHY_ACT_SETUP_LINK		2
+#define FW_PHY_ACT_LINK_SPEED_10	BIT(0)
+#define FW_PHY_ACT_LINK_SPEED_100	BIT(1)
+#define FW_PHY_ACT_LINK_SPEED_1G	BIT(2)
+#define FW_PHY_ACT_LINK_SPEED_2_5G	BIT(3)
+#define FW_PHY_ACT_LINK_SPEED_5G	BIT(4)
+#define FW_PHY_ACT_LINK_SPEED_10G	BIT(5)
+#define FW_PHY_ACT_LINK_SPEED_20G	BIT(6)
+#define FW_PHY_ACT_LINK_SPEED_25G	BIT(7)
+#define FW_PHY_ACT_LINK_SPEED_40G	BIT(8)
+#define FW_PHY_ACT_LINK_SPEED_50G	BIT(9)
+#define FW_PHY_ACT_LINK_SPEED_100G	BIT(10)
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT 16
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_MASK (3 << \
+					  HW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT)
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_NONE 0u
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_TX	1u
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_RX	2u
+#define FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX 3u
+#define FW_PHY_ACT_SETUP_LINK_LP	BIT(18)
+#define FW_PHY_ACT_SETUP_LINK_HP	BIT(19)
+#define FW_PHY_ACT_SETUP_LINK_EEE	BIT(20)
+#define FW_PHY_ACT_SETUP_LINK_AN	BIT(22)
+#define FW_PHY_ACT_SETUP_LINK_RSP_DOWN	BIT(0)
+#define FW_PHY_ACT_GET_LINK_INFO	3
+#define FW_PHY_ACT_GET_LINK_INFO_EEE	BIT(19)
+#define FW_PHY_ACT_GET_LINK_INFO_FC_TX	BIT(20)
+#define FW_PHY_ACT_GET_LINK_INFO_FC_RX	BIT(21)
+#define FW_PHY_ACT_GET_LINK_INFO_POWER	BIT(22)
+#define FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE	BIT(24)
+#define FW_PHY_ACT_GET_LINK_INFO_TEMP	BIT(25)
+#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX	BIT(28)
+#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX	BIT(29)
+#define FW_PHY_ACT_FORCE_LINK_DOWN	4
+#define FW_PHY_ACT_FORCE_LINK_DOWN_OFF	BIT(0)
+#define FW_PHY_ACT_PHY_SW_RESET		5
+#define FW_PHY_ACT_PHY_HW_RESET		6
+#define FW_PHY_ACT_GET_PHY_INFO		7
+#define FW_PHY_ACT_UD_2			0x1002
+#define FW_PHY_ACT_UD_2_10G_KR_EEE	BIT(6)
+#define FW_PHY_ACT_UD_2_10G_KX4_EEE	BIT(5)
+#define FW_PHY_ACT_UD_2_1G_KX_EEE	BIT(4)
+#define FW_PHY_ACT_UD_2_10G_T_EEE	BIT(3)
+#define FW_PHY_ACT_UD_2_1G_T_EEE	BIT(2)
+#define FW_PHY_ACT_UD_2_100M_TX_EEE	BIT(1)
+#define FW_PHY_ACT_RETRIES		50
+#define FW_PHY_INFO_SPEED_MASK		0xFFFu
+#define FW_PHY_INFO_ID_HI_MASK		0xFFFF0000u
+#define FW_PHY_INFO_ID_LO_MASK		0x0000FFFFu
+
+/* Host Interface Command Structures */
+struct ixgbe_hic_hdr {
+	u8 cmd;
+	u8 buf_len;
+	union {
+		u8 cmd_resv;
+		u8 ret_status;
+	} cmd_or_resp;
+	u8 checksum;
+};
+
+struct ixgbe_hic_hdr2_req {
+	u8 cmd;
+	u8 buf_lenh;
+	u8 buf_lenl;
+	u8 checksum;
+};
+
+struct ixgbe_hic_hdr2_rsp {
+	u8 cmd;
+	u8 buf_lenl;
+	u8 buf_lenh_status;     /* 7-5: high bits of buf_len, 4-0: status */
+	u8 checksum;
+};
+
+union ixgbe_hic_hdr2 {
+	struct ixgbe_hic_hdr2_req req;
+	struct ixgbe_hic_hdr2_rsp rsp;
+};
+
+struct ixgbe_hic_drv_info {
+	struct ixgbe_hic_hdr hdr;
+	u8 port_num;
+	u8 ver_sub;
+	u8 ver_build;
+	u8 ver_min;
+	u8 ver_maj;
+	u8 pad; /* end spacing to ensure length is mult. of dword */
+	u16 pad2; /* end spacing to ensure length is mult. of dword2 */
+};
+
+struct ixgbe_hic_drv_info2 {
+	struct ixgbe_hic_hdr hdr;
+	u8 port_num;
+	u8 ver_sub;
+	u8 ver_build;
+	u8 ver_min;
+	u8 ver_maj;
+	char driver_string[FW_CEM_DRIVER_VERSION_SIZE];
+};
+
+/* These need to be dword aligned */
+struct ixgbe_hic_read_shadow_ram {
+	union ixgbe_hic_hdr2 hdr;
+	u32 address;
+	u16 length;
+	u16 pad2;
+	u16 data;
+	u16 pad3;
+};
+
+struct ixgbe_hic_write_shadow_ram {
+	union ixgbe_hic_hdr2 hdr;
+	__be32 address;
+	__be16 length;
+	u16 pad2;
+	u16 data;
+	u16 pad3;
+};
+
+struct ixgbe_hic_disable_rxen {
+	struct ixgbe_hic_hdr hdr;
+	u8  port_number;
+	u8  pad2;
+	u16 pad3;
+};
+
+struct ixgbe_hic_phy_token_req {
+	struct ixgbe_hic_hdr hdr;
+	u8 port_number;
+	u8 command_type;
+	u16 pad;
+};
+
+struct ixgbe_hic_internal_phy_req {
+	struct ixgbe_hic_hdr hdr;
+	u8 port_number;
+	u8 command_type;
+	__be16 address;
+	u16 rsv1;
+	__be32 write_data;
+	u16 pad;
+} __packed;
+
+struct ixgbe_hic_internal_phy_resp {
+	struct ixgbe_hic_hdr hdr;
+	__be32 read_data;
+};
+
+struct ixgbe_hic_phy_activity_req {
+	struct ixgbe_hic_hdr hdr;
+	u8 port_number;
+	u8 pad;
+	__le16 activity_id;
+	__be32 data[FW_PHY_ACT_DATA_COUNT];
+};
+
+struct ixgbe_hic_phy_activity_resp {
+	struct ixgbe_hic_hdr hdr;
+	__be32 data[FW_PHY_ACT_DATA_COUNT];
+};
+
+/* Transmit Descriptor - Advanced */
+union ixgbe_adv_tx_desc {
+	struct {
+		__le64 buffer_addr;      /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd;       /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Receive Descriptor - Advanced */
+union ixgbe_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /* RSS, Pkt type */
+					__le16 hdr_info; /* Splithdr, hdrlen */
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+/* Context descriptors */
+struct ixgbe_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 fceof_saidx;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IXGBE_ADVTXD_DTALEN_MASK      0x0000FFFF /* Data buf length(bytes) */
+#define IXGBE_ADVTXD_MAC_LINKSEC      0x00040000 /* Insert LinkSec */
+#define IXGBE_ADVTXD_MAC_TSTAMP	      0x00080000 /* IEEE 1588 Time Stamp */
+#define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK   0x000003FF /* IPSec SA index */
+#define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK    0x000001FF /* IPSec ESP length */
+#define IXGBE_ADVTXD_DTYP_MASK  0x00F00000 /* DTYP mask */
+#define IXGBE_ADVTXD_DTYP_CTXT  0x00200000 /* Advanced Context Desc */
+#define IXGBE_ADVTXD_DTYP_DATA  0x00300000 /* Advanced Data Descriptor */
+#define IXGBE_ADVTXD_DCMD_EOP   IXGBE_TXD_CMD_EOP  /* End of Packet */
+#define IXGBE_ADVTXD_DCMD_IFCS  IXGBE_TXD_CMD_IFCS /* Insert FCS */
+#define IXGBE_ADVTXD_DCMD_RS    IXGBE_TXD_CMD_RS   /* Report Status */
+#define IXGBE_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000    /* DDP hdr type or iSCSI */
+#define IXGBE_ADVTXD_DCMD_DEXT  IXGBE_TXD_CMD_DEXT /* Desc ext (1=Adv) */
+#define IXGBE_ADVTXD_DCMD_VLE   IXGBE_TXD_CMD_VLE  /* VLAN pkt enable */
+#define IXGBE_ADVTXD_DCMD_TSE   0x80000000 /* TCP Seg enable */
+#define IXGBE_ADVTXD_STAT_DD    IXGBE_TXD_STAT_DD  /* Descriptor Done */
+#define IXGBE_ADVTXD_STAT_SN_CRC      0x00000002 /* NXTSEQ/SEED pres in WB */
+#define IXGBE_ADVTXD_STAT_RSV   0x0000000C /* STA Reserved */
+#define IXGBE_ADVTXD_IDX_SHIFT  4 /* Adv desc Index shift */
+#define IXGBE_ADVTXD_CC         0x00000080 /* Check Context */
+#define IXGBE_ADVTXD_POPTS_SHIFT      8  /* Adv desc POPTS shift */
+#define IXGBE_ADVTXD_POPTS_IXSM (IXGBE_TXD_POPTS_IXSM << \
+				 IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \
+				 IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_IPSEC     0x00000400 /* IPSec offload request */
+#define IXGBE_ADVTXD_POPTS_ISCO_1ST  0x00000000 /* 1st TSO of iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_MDL  0x00000800 /* Middle TSO of iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800 /* 1st&Last TSO-full iSCSI PDU */
+#define IXGBE_ADVTXD_POPTS_RSV       0x00002000 /* POPTS Reserved */
+#define IXGBE_ADVTXD_PAYLEN_SHIFT    14 /* Adv desc PAYLEN shift */
+#define IXGBE_ADVTXD_MACLEN_SHIFT    9  /* Adv ctxt desc mac len shift */
+#define IXGBE_ADVTXD_VLAN_SHIFT      16  /* Adv ctxt vlan tag shift */
+#define IXGBE_ADVTXD_TUCMD_IPV4      0x00000400  /* IP Packet Type: 1=IPv4 */
+#define IXGBE_ADVTXD_TUCMD_IPV6      0x00000000  /* IP Packet Type: 0=IPv6 */
+#define IXGBE_ADVTXD_TUCMD_L4T_UDP   0x00000000  /* L4 Packet TYPE of UDP */
+#define IXGBE_ADVTXD_TUCMD_L4T_TCP   0x00000800  /* L4 Packet TYPE of TCP */
+#define IXGBE_ADVTXD_TUCMD_L4T_SCTP  0x00001000  /* L4 Packet TYPE of SCTP */
+#define IXGBE_ADVTXD_TUCMD_L4T_RSV     0x00001800 /* RSV L4 Packet TYPE */
+#define IXGBE_ADVTXD_TUCMD_MKRREQ    0x00002000 /*Req requires Markers and CRC*/
+#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */
+#define IXGBE_ADVTXT_TUCMD_FCOE      0x00008000       /* FCoE Frame Type */
+#define IXGBE_ADVTXD_FCOEF_SOF       (BIT(2) << 10) /* FC SOF index */
+#define IXGBE_ADVTXD_FCOEF_PARINC    (BIT(3) << 10) /* Rel_Off in F_CTL */
+#define IXGBE_ADVTXD_FCOEF_ORIE      (BIT(4) << 10) /* Orientation: End */
+#define IXGBE_ADVTXD_FCOEF_ORIS      (BIT(5) << 10) /* Orientation: Start */
+#define IXGBE_ADVTXD_FCOEF_EOF_N     (0u << 10)  /* 00: EOFn */
+#define IXGBE_ADVTXD_FCOEF_EOF_T     (1u << 10)  /* 01: EOFt */
+#define IXGBE_ADVTXD_FCOEF_EOF_NI    (2u << 10)  /* 10: EOFni */
+#define IXGBE_ADVTXD_FCOEF_EOF_A     (3u << 10)  /* 11: EOFa */
+#define IXGBE_ADVTXD_FCOEF_EOF_MASK  (3u << 10)  /* FC EOF index */
+#define IXGBE_ADVTXD_L4LEN_SHIFT     8  /* Adv ctxt L4LEN shift */
+#define IXGBE_ADVTXD_MSS_SHIFT       16  /* Adv ctxt MSS shift */
+
+/* Autonegotiation advertised speeds */
+typedef u32 ixgbe_autoneg_advertised;
+/* Link speed */
+typedef u32 ixgbe_link_speed;
+#define IXGBE_LINK_SPEED_UNKNOWN	0
+#define IXGBE_LINK_SPEED_10_FULL	0x0002
+#define IXGBE_LINK_SPEED_100_FULL	0x0008
+#define IXGBE_LINK_SPEED_1GB_FULL	0x0020
+#define IXGBE_LINK_SPEED_2_5GB_FULL	0x0400
+#define IXGBE_LINK_SPEED_5GB_FULL	0x0800
+#define IXGBE_LINK_SPEED_10GB_FULL	0x0080
+#define IXGBE_LINK_SPEED_82598_AUTONEG (IXGBE_LINK_SPEED_1GB_FULL | \
+					IXGBE_LINK_SPEED_10GB_FULL)
+#define IXGBE_LINK_SPEED_82599_AUTONEG (IXGBE_LINK_SPEED_100_FULL | \
+					IXGBE_LINK_SPEED_1GB_FULL | \
+					IXGBE_LINK_SPEED_10GB_FULL)
+
+/* Flow Control Data Sheet defined values
+ * Calculation and defines taken from 802.1bb Annex O
+ */
+
+/* BitTimes (BT) conversion */
+#define IXGBE_BT2KB(BT) ((BT + (8 * 1024 - 1)) / (8 * 1024))
+#define IXGBE_B2BT(BT) (BT * 8)
+
+/* Calculate Delay to respond to PFC */
+#define IXGBE_PFC_D	672
+
+/* Calculate Cable Delay */
+#define IXGBE_CABLE_DC	5556 /* Delay Copper */
+#define IXGBE_CABLE_DO	5000 /* Delay Optical */
+
+/* Calculate Interface Delay X540 */
+#define IXGBE_PHY_DC	25600	/* Delay 10G BASET */
+#define IXGBE_MAC_DC	8192	/* Delay Copper XAUI interface */
+#define IXGBE_XAUI_DC	(2 * 2048) /* Delay Copper Phy */
+
+#define IXGBE_ID_X540	(IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC)
+
+/* Calculate Interface Delay 82598, 82599 */
+#define IXGBE_PHY_D	12800
+#define IXGBE_MAC_D	4096
+#define IXGBE_XAUI_D	(2 * 1024)
+
+#define IXGBE_ID	(IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D)
+
+/* Calculate Delay incurred from higher layer */
+#define IXGBE_HD	6144
+
+/* Calculate PCI Bus delay for low thresholds */
+#define IXGBE_PCI_DELAY	10000
+
+/* Calculate X540 delay value in bit times */
+#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \
+			((36 * \
+			  (IXGBE_B2BT(_max_frame_link) + \
+			   IXGBE_PFC_D + \
+			   (2 * IXGBE_CABLE_DC) + \
+			   (2 * IXGBE_ID_X540) + \
+			   IXGBE_HD) / 25 + 1) + \
+			 2 * IXGBE_B2BT(_max_frame_tc))
+
+/* Calculate 82599, 82598 delay value in bit times */
+#define IXGBE_DV(_max_frame_link, _max_frame_tc) \
+			((36 * \
+			  (IXGBE_B2BT(_max_frame_link) + \
+			   IXGBE_PFC_D + \
+			   (2 * IXGBE_CABLE_DC) + \
+			   (2 * IXGBE_ID) + \
+			   IXGBE_HD) / 25 + 1) + \
+			 2 * IXGBE_B2BT(_max_frame_tc))
+
+/* Calculate low threshold delay values */
+#define IXGBE_LOW_DV_X540(_max_frame_tc) \
+			(2 * IXGBE_B2BT(_max_frame_tc) + \
+			(36 * IXGBE_PCI_DELAY / 25) + 1)
+#define IXGBE_LOW_DV(_max_frame_tc) \
+			(2 * IXGBE_LOW_DV_X540(_max_frame_tc))
+
+/* Software ATR hash keys */
+#define IXGBE_ATR_BUCKET_HASH_KEY    0x3DAD14E2
+#define IXGBE_ATR_SIGNATURE_HASH_KEY 0x174D3614
+
+/* Software ATR input stream values and masks */
+#define IXGBE_ATR_HASH_MASK		0x7fff
+#define IXGBE_ATR_L4TYPE_MASK		0x3
+#define IXGBE_ATR_L4TYPE_UDP		0x1
+#define IXGBE_ATR_L4TYPE_TCP		0x2
+#define IXGBE_ATR_L4TYPE_SCTP		0x3
+#define IXGBE_ATR_L4TYPE_IPV6_MASK	0x4
+#define IXGBE_ATR_L4TYPE_TUNNEL_MASK	0x10
+enum ixgbe_atr_flow_type {
+	IXGBE_ATR_FLOW_TYPE_IPV4   = 0x0,
+	IXGBE_ATR_FLOW_TYPE_UDPV4  = 0x1,
+	IXGBE_ATR_FLOW_TYPE_TCPV4  = 0x2,
+	IXGBE_ATR_FLOW_TYPE_SCTPV4 = 0x3,
+	IXGBE_ATR_FLOW_TYPE_IPV6   = 0x4,
+	IXGBE_ATR_FLOW_TYPE_UDPV6  = 0x5,
+	IXGBE_ATR_FLOW_TYPE_TCPV6  = 0x6,
+	IXGBE_ATR_FLOW_TYPE_SCTPV6 = 0x7,
+};
+
+/* Flow Director ATR input struct. */
+union ixgbe_atr_input {
+	/*
+	 * Byte layout in order, all values with MSB first:
+	 *
+	 * vm_pool    - 1 byte
+	 * flow_type  - 1 byte
+	 * vlan_id    - 2 bytes
+	 * src_ip     - 16 bytes
+	 * dst_ip     - 16 bytes
+	 * src_port   - 2 bytes
+	 * dst_port   - 2 bytes
+	 * flex_bytes - 2 bytes
+	 * bkt_hash   - 2 bytes
+	 */
+	struct {
+		u8     vm_pool;
+		u8     flow_type;
+		__be16 vlan_id;
+		__be32 dst_ip[4];
+		__be32 src_ip[4];
+		__be16 src_port;
+		__be16 dst_port;
+		__be16 flex_bytes;
+		__be16 bkt_hash;
+	} formatted;
+	__be32 dword_stream[11];
+};
+
+/* Flow Director compressed ATR hash input struct */
+union ixgbe_atr_hash_dword {
+	struct {
+		u8 vm_pool;
+		u8 flow_type;
+		__be16 vlan_id;
+	} formatted;
+	__be32 ip;
+	struct {
+		__be16 src;
+		__be16 dst;
+	} port;
+	__be16 flex_bytes;
+	__be32 dword;
+};
+
+#define IXGBE_MVALS_INIT(m)		\
+	IXGBE_CAT(EEC, m),		\
+	IXGBE_CAT(FLA, m),		\
+	IXGBE_CAT(GRC, m),		\
+	IXGBE_CAT(FACTPS, m),		\
+	IXGBE_CAT(SWSM, m),		\
+	IXGBE_CAT(SWFW_SYNC, m),	\
+	IXGBE_CAT(FWSM, m),		\
+	IXGBE_CAT(SDP0_GPIEN, m),	\
+	IXGBE_CAT(SDP1_GPIEN, m),	\
+	IXGBE_CAT(SDP2_GPIEN, m),	\
+	IXGBE_CAT(EICR_GPI_SDP0, m),	\
+	IXGBE_CAT(EICR_GPI_SDP1, m),	\
+	IXGBE_CAT(EICR_GPI_SDP2, m),	\
+	IXGBE_CAT(CIAA, m),		\
+	IXGBE_CAT(CIAD, m),		\
+	IXGBE_CAT(I2C_CLK_IN, m),	\
+	IXGBE_CAT(I2C_CLK_OUT, m),	\
+	IXGBE_CAT(I2C_DATA_IN, m),	\
+	IXGBE_CAT(I2C_DATA_OUT, m),	\
+	IXGBE_CAT(I2C_DATA_OE_N_EN, m),	\
+	IXGBE_CAT(I2C_BB_EN, m),	\
+	IXGBE_CAT(I2C_CLK_OE_N_EN, m),	\
+	IXGBE_CAT(I2CCTL, m)
+
+enum ixgbe_mvals {
+	IXGBE_MVALS_INIT(IDX),
+	IXGBE_MVALS_IDX_LIMIT
+};
+
+enum ixgbe_eeprom_type {
+	ixgbe_eeprom_uninitialized = 0,
+	ixgbe_eeprom_spi,
+	ixgbe_flash,
+	ixgbe_eeprom_none /* No NVM support */
+};
+
+enum ixgbe_mac_type {
+	ixgbe_mac_unknown = 0,
+	ixgbe_mac_82598EB,
+	ixgbe_mac_82599EB,
+	ixgbe_mac_X540,
+	ixgbe_mac_X550,
+	ixgbe_mac_X550EM_x,
+	ixgbe_mac_x550em_a,
+	ixgbe_num_macs
+};
+
+enum ixgbe_phy_type {
+	ixgbe_phy_unknown = 0,
+	ixgbe_phy_none,
+	ixgbe_phy_tn,
+	ixgbe_phy_aq,
+	ixgbe_phy_x550em_kr,
+	ixgbe_phy_x550em_kx4,
+	ixgbe_phy_x550em_xfi,
+	ixgbe_phy_x550em_ext_t,
+	ixgbe_phy_ext_1g_t,
+	ixgbe_phy_cu_unknown,
+	ixgbe_phy_qt,
+	ixgbe_phy_xaui,
+	ixgbe_phy_nl,
+	ixgbe_phy_sfp_passive_tyco,
+	ixgbe_phy_sfp_passive_unknown,
+	ixgbe_phy_sfp_active_unknown,
+	ixgbe_phy_sfp_avago,
+	ixgbe_phy_sfp_ftl,
+	ixgbe_phy_sfp_ftl_active,
+	ixgbe_phy_sfp_unknown,
+	ixgbe_phy_sfp_intel,
+	ixgbe_phy_qsfp_passive_unknown,
+	ixgbe_phy_qsfp_active_unknown,
+	ixgbe_phy_qsfp_intel,
+	ixgbe_phy_qsfp_unknown,
+	ixgbe_phy_sfp_unsupported,
+	ixgbe_phy_sgmii,
+	ixgbe_phy_fw,
+	ixgbe_phy_generic
+};
+
+/*
+ * SFP+ module type IDs:
+ *
+ * ID   Module Type
+ * =============
+ * 0    SFP_DA_CU
+ * 1    SFP_SR
+ * 2    SFP_LR
+ * 3    SFP_DA_CU_CORE0 - 82599-specific
+ * 4    SFP_DA_CU_CORE1 - 82599-specific
+ * 5    SFP_SR/LR_CORE0 - 82599-specific
+ * 6    SFP_SR/LR_CORE1 - 82599-specific
+ */
+enum ixgbe_sfp_type {
+	ixgbe_sfp_type_da_cu = 0,
+	ixgbe_sfp_type_sr = 1,
+	ixgbe_sfp_type_lr = 2,
+	ixgbe_sfp_type_da_cu_core0 = 3,
+	ixgbe_sfp_type_da_cu_core1 = 4,
+	ixgbe_sfp_type_srlr_core0 = 5,
+	ixgbe_sfp_type_srlr_core1 = 6,
+	ixgbe_sfp_type_da_act_lmt_core0 = 7,
+	ixgbe_sfp_type_da_act_lmt_core1 = 8,
+	ixgbe_sfp_type_1g_cu_core0 = 9,
+	ixgbe_sfp_type_1g_cu_core1 = 10,
+	ixgbe_sfp_type_1g_sx_core0 = 11,
+	ixgbe_sfp_type_1g_sx_core1 = 12,
+	ixgbe_sfp_type_1g_lx_core0 = 13,
+	ixgbe_sfp_type_1g_lx_core1 = 14,
+	ixgbe_sfp_type_not_present = 0xFFFE,
+	ixgbe_sfp_type_unknown = 0xFFFF
+};
+
+enum ixgbe_media_type {
+	ixgbe_media_type_unknown = 0,
+	ixgbe_media_type_fiber,
+	ixgbe_media_type_fiber_qsfp,
+	ixgbe_media_type_fiber_lco,
+	ixgbe_media_type_copper,
+	ixgbe_media_type_backplane,
+	ixgbe_media_type_cx4,
+	ixgbe_media_type_virtual
+};
+
+/* Flow Control Settings */
+enum ixgbe_fc_mode {
+	ixgbe_fc_none = 0,
+	ixgbe_fc_rx_pause,
+	ixgbe_fc_tx_pause,
+	ixgbe_fc_full,
+	ixgbe_fc_default
+};
+
+/* Smart Speed Settings */
+#define IXGBE_SMARTSPEED_MAX_RETRIES	3
+enum ixgbe_smart_speed {
+	ixgbe_smart_speed_auto = 0,
+	ixgbe_smart_speed_on,
+	ixgbe_smart_speed_off
+};
+
+/* PCI bus types */
+enum ixgbe_bus_type {
+	ixgbe_bus_type_unknown = 0,
+	ixgbe_bus_type_pci_express,
+	ixgbe_bus_type_internal,
+	ixgbe_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum ixgbe_bus_speed {
+	ixgbe_bus_speed_unknown = 0,
+	ixgbe_bus_speed_33      = 33,
+	ixgbe_bus_speed_66      = 66,
+	ixgbe_bus_speed_100     = 100,
+	ixgbe_bus_speed_120     = 120,
+	ixgbe_bus_speed_133     = 133,
+	ixgbe_bus_speed_2500    = 2500,
+	ixgbe_bus_speed_5000    = 5000,
+	ixgbe_bus_speed_8000    = 8000,
+	ixgbe_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum ixgbe_bus_width {
+	ixgbe_bus_width_unknown = 0,
+	ixgbe_bus_width_pcie_x1 = 1,
+	ixgbe_bus_width_pcie_x2 = 2,
+	ixgbe_bus_width_pcie_x4 = 4,
+	ixgbe_bus_width_pcie_x8 = 8,
+	ixgbe_bus_width_32      = 32,
+	ixgbe_bus_width_64      = 64,
+	ixgbe_bus_width_reserved
+};
+
+struct ixgbe_addr_filter_info {
+	u32 num_mc_addrs;
+	u32 rar_used_count;
+	u32 mta_in_use;
+	u32 overflow_promisc;
+	bool uc_set_promisc;
+	bool user_set_promisc;
+};
+
+/* Bus parameters */
+struct ixgbe_bus_info {
+	enum ixgbe_bus_speed speed;
+	enum ixgbe_bus_width width;
+	enum ixgbe_bus_type type;
+
+	u8 func;
+	u8 lan_id;
+	u8 instance_id;
+};
+
+/* Flow control parameters */
+struct ixgbe_fc_info {
+	u32 high_water[MAX_TRAFFIC_CLASS]; /* Flow Control High-water */
+	u32 low_water[MAX_TRAFFIC_CLASS]; /* Flow Control Low-water */
+	u16 pause_time; /* Flow Control Pause timer */
+	bool send_xon; /* Flow control send XON */
+	bool strict_ieee; /* Strict IEEE mode */
+	bool disable_fc_autoneg; /* Do not autonegotiate FC */
+	bool fc_was_autonegged; /* Is current_mode the result of autonegging? */
+	enum ixgbe_fc_mode current_mode; /* FC mode in effect */
+	enum ixgbe_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+/* Statistics counters collected by the MAC */
+struct ixgbe_hw_stats {
+	u64 crcerrs;
+	u64 illerrc;
+	u64 errbc;
+	u64 mspdc;
+	u64 mpctotal;
+	u64 mpc[8];
+	u64 mlfc;
+	u64 mrfc;
+	u64 rlec;
+	u64 lxontxc;
+	u64 lxonrxc;
+	u64 lxofftxc;
+	u64 lxoffrxc;
+	u64 pxontxc[8];
+	u64 pxonrxc[8];
+	u64 pxofftxc[8];
+	u64 pxoffrxc[8];
+	u64 prc64;
+	u64 prc127;
+	u64 prc255;
+	u64 prc511;
+	u64 prc1023;
+	u64 prc1522;
+	u64 gprc;
+	u64 bprc;
+	u64 mprc;
+	u64 gptc;
+	u64 gorc;
+	u64 gotc;
+	u64 rnbc[8];
+	u64 ruc;
+	u64 rfc;
+	u64 roc;
+	u64 rjc;
+	u64 mngprc;
+	u64 mngpdc;
+	u64 mngptc;
+	u64 tor;
+	u64 tpr;
+	u64 tpt;
+	u64 ptc64;
+	u64 ptc127;
+	u64 ptc255;
+	u64 ptc511;
+	u64 ptc1023;
+	u64 ptc1522;
+	u64 mptc;
+	u64 bptc;
+	u64 xec;
+	u64 rqsmr[16];
+	u64 tqsmr[8];
+	u64 qprc[16];
+	u64 qptc[16];
+	u64 qbrc[16];
+	u64 qbtc[16];
+	u64 qprdc[16];
+	u64 pxon2offc[8];
+	u64 fdirustat_add;
+	u64 fdirustat_remove;
+	u64 fdirfstat_fadd;
+	u64 fdirfstat_fremove;
+	u64 fdirmatch;
+	u64 fdirmiss;
+	u64 fccrc;
+	u64 fcoerpdc;
+	u64 fcoeprc;
+	u64 fcoeptc;
+	u64 fcoedwrc;
+	u64 fcoedwtc;
+	u64 fcoe_noddp;
+	u64 fcoe_noddp_ext_buff;
+	u64 b2ospc;
+	u64 b2ogprc;
+	u64 o2bgptc;
+	u64 o2bspc;
+};
+
+/* forward declaration */
+struct ixgbe_hw;
+
+/* Function pointer table */
+struct ixgbe_eeprom_operations {
+	s32 (*init_params)(struct ixgbe_hw *);
+	s32 (*read)(struct ixgbe_hw *, u16, u16 *);
+	s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+	s32 (*write)(struct ixgbe_hw *, u16, u16);
+	s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *);
+	s32 (*validate_checksum)(struct ixgbe_hw *, u16 *);
+	s32 (*update_checksum)(struct ixgbe_hw *);
+	s32 (*calc_checksum)(struct ixgbe_hw *);
+};
+
+struct ixgbe_mac_operations {
+	s32 (*init_hw)(struct ixgbe_hw *);
+	s32 (*reset_hw)(struct ixgbe_hw *);
+	s32 (*start_hw)(struct ixgbe_hw *);
+	s32 (*clear_hw_cntrs)(struct ixgbe_hw *);
+	enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *);
+	s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *);
+	s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *);
+	s32 (*get_device_caps)(struct ixgbe_hw *, u16 *);
+	s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *);
+	s32 (*stop_adapter)(struct ixgbe_hw *);
+	s32 (*get_bus_info)(struct ixgbe_hw *);
+	void (*set_lan_id)(struct ixgbe_hw *);
+	s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*);
+	s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8);
+	s32 (*setup_sfp)(struct ixgbe_hw *);
+	s32 (*disable_rx_buff)(struct ixgbe_hw *);
+	s32 (*enable_rx_buff)(struct ixgbe_hw *);
+	s32 (*enable_rx_dma)(struct ixgbe_hw *, u32);
+	s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u32);
+	void (*release_swfw_sync)(struct ixgbe_hw *, u32);
+	void (*init_swfw_sync)(struct ixgbe_hw *);
+	s32 (*prot_autoc_read)(struct ixgbe_hw *, bool *, u32 *);
+	s32 (*prot_autoc_write)(struct ixgbe_hw *, u32, bool);
+
+	/* Link */
+	void (*disable_tx_laser)(struct ixgbe_hw *);
+	void (*enable_tx_laser)(struct ixgbe_hw *);
+	void (*flap_tx_laser)(struct ixgbe_hw *);
+	void (*stop_link_on_d3)(struct ixgbe_hw *);
+	s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
+	s32 (*setup_mac_link)(struct ixgbe_hw *, ixgbe_link_speed, bool);
+	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
+	s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
+				     bool *);
+	void (*set_rate_select_speed)(struct ixgbe_hw *, ixgbe_link_speed);
+
+	/* Packet Buffer Manipulation */
+	void (*set_rxpba)(struct ixgbe_hw *, int, u32, int);
+
+	/* LED */
+	s32 (*led_on)(struct ixgbe_hw *, u32);
+	s32 (*led_off)(struct ixgbe_hw *, u32);
+	s32 (*blink_led_start)(struct ixgbe_hw *, u32);
+	s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
+	s32 (*init_led_link_act)(struct ixgbe_hw *);
+
+	/* RAR, Multicast, VLAN */
+	s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
+	s32 (*clear_rar)(struct ixgbe_hw *, u32);
+	s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
+	s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32);
+	s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
+	s32 (*init_rx_addrs)(struct ixgbe_hw *);
+	s32 (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *);
+	s32 (*enable_mc)(struct ixgbe_hw *);
+	s32 (*disable_mc)(struct ixgbe_hw *);
+	s32 (*clear_vfta)(struct ixgbe_hw *);
+	s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool, bool);
+	s32 (*init_uta_tables)(struct ixgbe_hw *);
+	void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int);
+	void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int);
+
+	/* Flow Control */
+	s32 (*fc_enable)(struct ixgbe_hw *);
+	s32 (*setup_fc)(struct ixgbe_hw *);
+	void (*fc_autoneg)(struct ixgbe_hw *);
+
+	/* Manageability interface */
+	s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8, u16,
+			      const char *);
+	s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
+	s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
+	bool (*fw_recovery_mode)(struct ixgbe_hw *hw);
+	void (*disable_rx)(struct ixgbe_hw *hw);
+	void (*enable_rx)(struct ixgbe_hw *hw);
+	void (*set_source_address_pruning)(struct ixgbe_hw *, bool,
+					   unsigned int);
+	void (*set_ethertype_anti_spoofing)(struct ixgbe_hw *, bool, int);
+
+	/* DMA Coalescing */
+	s32 (*dmac_config)(struct ixgbe_hw *hw);
+	s32 (*dmac_update_tcs)(struct ixgbe_hw *hw);
+	s32 (*dmac_config_tcs)(struct ixgbe_hw *hw);
+	s32 (*read_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32 *);
+	s32 (*write_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32);
+};
+
+struct ixgbe_phy_operations {
+	s32 (*identify)(struct ixgbe_hw *);
+	s32 (*identify_sfp)(struct ixgbe_hw *);
+	s32 (*init)(struct ixgbe_hw *);
+	s32 (*reset)(struct ixgbe_hw *);
+	s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *);
+	s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16);
+	s32 (*read_reg_mdi)(struct ixgbe_hw *, u32, u32, u16 *);
+	s32 (*write_reg_mdi)(struct ixgbe_hw *, u32, u32, u16);
+	s32 (*setup_link)(struct ixgbe_hw *);
+	s32 (*setup_internal_link)(struct ixgbe_hw *);
+	s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool);
+	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *);
+	s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *);
+	s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8);
+	s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *);
+	s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *);
+	s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
+	s32 (*check_overtemp)(struct ixgbe_hw *);
+	s32 (*set_phy_power)(struct ixgbe_hw *, bool on);
+	s32 (*enter_lplu)(struct ixgbe_hw *);
+	s32 (*handle_lasi)(struct ixgbe_hw *hw);
+	s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+				      u8 *value);
+	s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+				       u8 value);
+};
+
+struct ixgbe_link_operations {
+	s32 (*read_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val);
+	s32 (*read_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+				  u16 *val);
+	s32 (*write_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val);
+	s32 (*write_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+				   u16 val);
+};
+
+struct ixgbe_link_info {
+	struct ixgbe_link_operations ops;
+	u8 addr;
+};
+
+struct ixgbe_eeprom_info {
+	struct ixgbe_eeprom_operations  ops;
+	enum ixgbe_eeprom_type          type;
+	u32                             semaphore_delay;
+	u16                             word_size;
+	u16                             address_bits;
+	u16                             word_page_size;
+	u16				ctrl_word_3;
+};
+
+#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED	0x01
+struct ixgbe_mac_info {
+	struct ixgbe_mac_operations     ops;
+	enum ixgbe_mac_type             type;
+	u8                              addr[ETH_ALEN];
+	u8                              perm_addr[ETH_ALEN];
+	u8                              san_addr[ETH_ALEN];
+	/* prefix for World Wide Node Name (WWNN) */
+	u16                             wwnn_prefix;
+	/* prefix for World Wide Port Name (WWPN) */
+	u16                             wwpn_prefix;
+	u16				max_msix_vectors;
+#define IXGBE_MAX_MTA			128
+	u32				mta_shadow[IXGBE_MAX_MTA];
+	s32                             mc_filter_type;
+	u32                             mcft_size;
+	u32                             vft_size;
+	u32                             num_rar_entries;
+	u32                             rar_highwater;
+	u32				rx_pb_size;
+	u32                             max_tx_queues;
+	u32                             max_rx_queues;
+	u32                             orig_autoc;
+	u32                             orig_autoc2;
+	bool                            orig_link_settings_stored;
+	bool                            autotry_restart;
+	u8                              flags;
+	u8				san_mac_rar_index;
+	struct ixgbe_thermal_sensor_data  thermal_sensor_data;
+	bool				set_lben;
+	u8				led_link_act;
+};
+
+struct ixgbe_phy_info {
+	struct ixgbe_phy_operations     ops;
+	struct mdio_if_info		mdio;
+	enum ixgbe_phy_type             type;
+	u32                             id;
+	enum ixgbe_sfp_type             sfp_type;
+	bool                            sfp_setup_needed;
+	u32                             revision;
+	enum ixgbe_media_type           media_type;
+	u32				phy_semaphore_mask;
+	bool                            reset_disable;
+	ixgbe_autoneg_advertised        autoneg_advertised;
+	ixgbe_link_speed		speeds_supported;
+	ixgbe_link_speed		eee_speeds_supported;
+	ixgbe_link_speed		eee_speeds_advertised;
+	enum ixgbe_smart_speed          smart_speed;
+	bool                            smart_speed_active;
+	bool                            multispeed_fiber;
+	bool                            reset_if_overtemp;
+	bool                            qsfp_shared_i2c_bus;
+	u32				nw_mng_if_sel;
+};
+
+#include "ixgbe_mbx.h"
+
+struct ixgbe_mbx_operations {
+	s32 (*init_params)(struct ixgbe_hw *hw);
+	s32 (*read)(struct ixgbe_hw *, u32 *, u16,  u16);
+	s32 (*write)(struct ixgbe_hw *, u32 *, u16, u16);
+	s32 (*read_posted)(struct ixgbe_hw *, u32 *, u16,  u16);
+	s32 (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16);
+	s32 (*check_for_msg)(struct ixgbe_hw *, u16);
+	s32 (*check_for_ack)(struct ixgbe_hw *, u16);
+	s32 (*check_for_rst)(struct ixgbe_hw *, u16);
+};
+
+struct ixgbe_mbx_stats {
+	u32 msgs_tx;
+	u32 msgs_rx;
+
+	u32 acks;
+	u32 reqs;
+	u32 rsts;
+};
+
+struct ixgbe_mbx_info {
+	const struct ixgbe_mbx_operations *ops;
+	struct ixgbe_mbx_stats stats;
+	u32 timeout;
+	u32 usec_delay;
+	u32 v2p_mailbox;
+	u16 size;
+};
+
+struct ixgbe_hw {
+	u8 __iomem			*hw_addr;
+	void				*back;
+	struct ixgbe_mac_info		mac;
+	struct ixgbe_addr_filter_info	addr_ctrl;
+	struct ixgbe_fc_info		fc;
+	struct ixgbe_phy_info		phy;
+	struct ixgbe_link_info		link;
+	struct ixgbe_eeprom_info	eeprom;
+	struct ixgbe_bus_info		bus;
+	struct ixgbe_mbx_info		mbx;
+	const u32			*mvals;
+	u16				device_id;
+	u16				vendor_id;
+	u16				subsystem_device_id;
+	u16				subsystem_vendor_id;
+	u8				revision_id;
+	bool				adapter_stopped;
+	bool				force_full_reset;
+	bool				allow_unsupported_sfp;
+	bool				wol_enabled;
+	bool				need_crosstalk_fix;
+};
+
+struct ixgbe_info {
+	enum ixgbe_mac_type		mac;
+	s32 				(*get_invariants)(struct ixgbe_hw *);
+	const struct ixgbe_mac_operations	*mac_ops;
+	const struct ixgbe_eeprom_operations	*eeprom_ops;
+	const struct ixgbe_phy_operations	*phy_ops;
+	const struct ixgbe_mbx_operations	*mbx_ops;
+	const struct ixgbe_link_operations	*link_ops;
+	const u32			*mvals;
+};
+
+
+/* Error Codes */
+#define IXGBE_ERR_EEPROM                        -1
+#define IXGBE_ERR_EEPROM_CHECKSUM               -2
+#define IXGBE_ERR_PHY                           -3
+#define IXGBE_ERR_CONFIG                        -4
+#define IXGBE_ERR_PARAM                         -5
+#define IXGBE_ERR_MAC_TYPE                      -6
+#define IXGBE_ERR_UNKNOWN_PHY                   -7
+#define IXGBE_ERR_LINK_SETUP                    -8
+#define IXGBE_ERR_ADAPTER_STOPPED               -9
+#define IXGBE_ERR_INVALID_MAC_ADDR              -10
+#define IXGBE_ERR_DEVICE_NOT_SUPPORTED          -11
+#define IXGBE_ERR_PRIMARY_REQUESTS_PENDING      -12
+#define IXGBE_ERR_INVALID_LINK_SETTINGS         -13
+#define IXGBE_ERR_AUTONEG_NOT_COMPLETE          -14
+#define IXGBE_ERR_RESET_FAILED                  -15
+#define IXGBE_ERR_SWFW_SYNC                     -16
+#define IXGBE_ERR_PHY_ADDR_INVALID              -17
+#define IXGBE_ERR_I2C                           -18
+#define IXGBE_ERR_SFP_NOT_SUPPORTED             -19
+#define IXGBE_ERR_SFP_NOT_PRESENT               -20
+#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT       -21
+#define IXGBE_ERR_NO_SAN_ADDR_PTR               -22
+#define IXGBE_ERR_FDIR_REINIT_FAILED            -23
+#define IXGBE_ERR_EEPROM_VERSION                -24
+#define IXGBE_ERR_NO_SPACE                      -25
+#define IXGBE_ERR_OVERTEMP                      -26
+#define IXGBE_ERR_FC_NOT_NEGOTIATED             -27
+#define IXGBE_ERR_FC_NOT_SUPPORTED              -28
+#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE        -30
+#define IXGBE_ERR_PBA_SECTION                   -31
+#define IXGBE_ERR_INVALID_ARGUMENT              -32
+#define IXGBE_ERR_HOST_INTERFACE_COMMAND        -33
+#define IXGBE_ERR_FDIR_CMD_INCOMPLETE		-38
+#define IXGBE_ERR_FW_RESP_INVALID		-39
+#define IXGBE_ERR_TOKEN_RETRY			-40
+#define IXGBE_NOT_IMPLEMENTED                   0x7FFFFFFF
+
+#define IXGBE_FUSES0_GROUP(_i)		(0x11158 + ((_i) * 4))
+#define IXGBE_FUSES0_300MHZ		BIT(5)
+#define IXGBE_FUSES0_REV_MASK		(3u << 6)
+
+#define IXGBE_KRM_PORT_CAR_GEN_CTRL(P)	((P) ? 0x8010 : 0x4010)
+#define IXGBE_KRM_LINK_S1(P)		((P) ? 0x8200 : 0x4200)
+#define IXGBE_KRM_LINK_CTRL_1(P)	((P) ? 0x820C : 0x420C)
+#define IXGBE_KRM_AN_CNTL_1(P)		((P) ? 0x822C : 0x422C)
+#define IXGBE_KRM_AN_CNTL_4(P)		((P) ? 0x8238 : 0x4238)
+#define IXGBE_KRM_AN_CNTL_8(P)		((P) ? 0x8248 : 0x4248)
+#define IXGBE_KRM_PCS_KX_AN(P)		((P) ? 0x9918 : 0x5918)
+#define IXGBE_KRM_SGMII_CTRL(P)		((P) ? 0x82A0 : 0x42A0)
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH(P)	((P) ? 0x836C : 0x436C)
+#define IXGBE_KRM_DSP_TXFFE_STATE_4(P)	((P) ? 0x8634 : 0x4634)
+#define IXGBE_KRM_DSP_TXFFE_STATE_5(P)	((P) ? 0x8638 : 0x4638)
+#define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P)	((P) ? 0x8B00 : 0x4B00)
+#define IXGBE_KRM_PMD_DFX_BURNIN(P)	((P) ? 0x8E00 : 0x4E00)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20(P)	((P) ? 0x9054 : 0x5054)
+#define IXGBE_KRM_TX_COEFF_CTRL_1(P)	((P) ? 0x9520 : 0x5520)
+#define IXGBE_KRM_RX_ANA_CTL(P)		((P) ? 0x9A00 : 0x5A00)
+#define IXGBE_KRM_FLX_TMRS_CTRL_ST31(P)	((P) ? 0x9180 : 0x5180)
+
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA		~(0x3 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR		BIT(20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR		(0x2 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN		BIT(25)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN		BIT(26)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN		BIT(27)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M		~(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M		BIT(28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G		(0x2 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G		(0x3 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN		(0x4 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G		(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK		(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART	BIT(31)
+
+#define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B		BIT(9)
+#define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS		BIT(11)
+
+#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK	(7u << 8)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G	(2u << 8)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G	(4u << 8)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_SGMII_EN		BIT(12)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CLAUSE_37_EN	BIT(13)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_FEC_REQ		BIT(14)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC		BIT(15)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX		BIT(16)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR		BIT(18)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX		BIT(24)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR		BIT(26)
+#define IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE		BIT(28)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE		BIT(29)
+#define IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART		BIT(31)
+
+#define IXGBE_KRM_AN_CNTL_1_SYM_PAUSE			BIT(28)
+#define IXGBE_KRM_AN_CNTL_1_ASM_PAUSE			BIT(29)
+
+#define IXGBE_KRM_AN_CNTL_8_LINEAR			BIT(0)
+#define IXGBE_KRM_AN_CNTL_8_LIMITING			BIT(1)
+
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE		BIT(10)
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE		BIT(11)
+#define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D	BIT(12)
+#define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D		BIT(19)
+
+#define IXGBE_KRM_DSP_TXFFE_STATE_C0_EN			BIT(6)
+#define IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN		BIT(15)
+#define IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN		BIT(16)
+
+#define IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL	BIT(4)
+#define IXGBE_KRM_RX_TRN_LINKUP_CTRL_PROTOCOL_BYPASS	BIT(2)
+
+#define IXGBE_KRM_PMD_DFX_BURNIN_TX_RX_KR_LB_MASK	(3u << 16)
+
+#define IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN	BIT(1)
+#define IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN	BIT(2)
+#define IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN		BIT(3)
+#define IXGBE_KRM_TX_COEFF_CTRL_1_OVRRD_EN		BIT(31)
+
+#define IXGBE_SB_IOSF_INDIRECT_CTRL		0x00011144
+#define IXGBE_SB_IOSF_INDIRECT_DATA		0x00011148
+
+#define IXGBE_SB_IOSF_CTRL_ADDR_SHIFT		0
+#define IXGBE_SB_IOSF_CTRL_ADDR_MASK		0xFF
+#define IXGBE_SB_IOSF_CTRL_RESP_STAT_SHIFT	18
+#define IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK \
+				(0x3 << IXGBE_SB_IOSF_CTRL_RESP_STAT_SHIFT)
+#define IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT	20
+#define IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK \
+				(0xFF << IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT)
+#define IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT	28
+#define IXGBE_SB_IOSF_CTRL_TARGET_SELECT_MASK	0x7
+#define IXGBE_SB_IOSF_CTRL_BUSY_SHIFT		31
+#define IXGBE_SB_IOSF_CTRL_BUSY		BIT(IXGBE_SB_IOSF_CTRL_BUSY_SHIFT)
+#define IXGBE_SB_IOSF_TARGET_KR_PHY	0
+
+#define IXGBE_NW_MNG_IF_SEL		0x00011178
+#define IXGBE_NW_MNG_IF_SEL_MDIO_ACT		BIT(1)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10M	BIT(17)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_100M	BIT(18)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G	BIT(19)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G	BIT(20)
+#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G	BIT(21)
+#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE	BIT(25)
+#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE	BIT(24) /* X552 only */
+#define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT	3
+#define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD	\
+				(0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT)
+#endif /* _IXGBE_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
new file mode 100644
index 0000000000..d5cfb51ff6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -0,0 +1,922 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+
+#include "ixgbe.h"
+#include "ixgbe_phy.h"
+#include "ixgbe_x540.h"
+
+#define IXGBE_X540_MAX_TX_QUEUES	128
+#define IXGBE_X540_MAX_RX_QUEUES	128
+#define IXGBE_X540_RAR_ENTRIES		128
+#define IXGBE_X540_MC_TBL_SIZE		128
+#define IXGBE_X540_VFT_TBL_SIZE		128
+#define IXGBE_X540_RX_PB_SIZE		384
+
+static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw);
+static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw);
+static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw);
+static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw);
+
+enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw)
+{
+	return ixgbe_media_type_copper;
+}
+
+s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+
+	/* set_phy_power was set by default to NULL */
+	phy->ops.set_phy_power = ixgbe_set_copper_phy_power;
+
+	mac->mcft_size = IXGBE_X540_MC_TBL_SIZE;
+	mac->vft_size = IXGBE_X540_VFT_TBL_SIZE;
+	mac->num_rar_entries = IXGBE_X540_RAR_ENTRIES;
+	mac->rx_pb_size = IXGBE_X540_RX_PB_SIZE;
+	mac->max_rx_queues = IXGBE_X540_MAX_RX_QUEUES;
+	mac->max_tx_queues = IXGBE_X540_MAX_TX_QUEUES;
+	mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_setup_mac_link_X540 - Set the auto advertised capabilitires
+ *  @hw: pointer to hardware structure
+ *  @speed: new link speed
+ *  @autoneg_wait_to_complete: true when waiting for completion is needed
+ **/
+s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			      bool autoneg_wait_to_complete)
+{
+	return hw->phy.ops.setup_link_speed(hw, speed,
+					    autoneg_wait_to_complete);
+}
+
+/**
+ *  ixgbe_reset_hw_X540 - Perform hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks
+ *  and clears all interrupts, perform a PHY reset, and perform a link (MAC)
+ *  reset.
+ **/
+s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u32 ctrl, i;
+	u32 swfw_mask = hw->phy.phy_semaphore_mask;
+
+	/* Call adapter stop to disable tx/rx and clear interrupts */
+	status = hw->mac.ops.stop_adapter(hw);
+	if (status)
+		return status;
+
+	/* flush pending Tx transactions */
+	ixgbe_clear_tx_pending(hw);
+
+mac_reset_top:
+	status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+	if (status) {
+		hw_dbg(hw, "semaphore failed with %d", status);
+		return IXGBE_ERR_SWFW_SYNC;
+	}
+
+	ctrl = IXGBE_CTRL_RST;
+	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+	IXGBE_WRITE_FLUSH(hw);
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+	usleep_range(1000, 1200);
+
+	/* Poll for reset bit to self-clear indicating reset is complete */
+	for (i = 0; i < 10; i++) {
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		if (!(ctrl & IXGBE_CTRL_RST_MASK))
+			break;
+		udelay(1);
+	}
+
+	if (ctrl & IXGBE_CTRL_RST_MASK) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "Reset polling failed to complete.\n");
+	}
+	msleep(100);
+
+	/*
+	 * Double resets are required for recovery from certain error
+	 * conditions.  Between resets, it is necessary to stall to allow time
+	 * for any pending HW events to complete.
+	 */
+	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+		goto mac_reset_top;
+	}
+
+	/* Set the Rx packet buffer size. */
+	IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT);
+
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+	/*
+	 * Store MAC address from RAR0, clear receive address registers, and
+	 * clear the multicast table.  Also reset num_rar_entries to 128,
+	 * since we modify this value when programming the SAN MAC address.
+	 */
+	hw->mac.num_rar_entries = IXGBE_X540_MAX_TX_QUEUES;
+	hw->mac.ops.init_rx_addrs(hw);
+
+	/* Store the permanent SAN mac address */
+	hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr);
+
+	/* Add the SAN MAC address to the RAR only if it's a valid address */
+	if (is_valid_ether_addr(hw->mac.san_addr)) {
+		/* Save the SAN MAC RAR index */
+		hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1;
+
+		hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index,
+				    hw->mac.san_addr, 0, IXGBE_RAH_AV);
+
+		/* clear VMDq pool/queue selection for this RAR */
+		hw->mac.ops.clear_vmdq(hw, hw->mac.san_mac_rar_index,
+				       IXGBE_CLEAR_VMDQ_ALL);
+
+		/* Reserve the last RAR for the SAN MAC address */
+		hw->mac.num_rar_entries--;
+	}
+
+	/* Store the alternative WWNN/WWPN prefix */
+	hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix,
+				   &hw->mac.wwpn_prefix);
+
+	return status;
+}
+
+/**
+ *  ixgbe_start_hw_X540 - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware using the generic start_hw function
+ *  and the generation start_hw function.
+ *  Then performs revision-specific operations, if any.
+ **/
+s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw)
+{
+	s32 ret_val;
+
+	ret_val = ixgbe_start_hw_generic(hw);
+	if (ret_val)
+		return ret_val;
+
+	return ixgbe_start_hw_gen2(hw);
+}
+
+/**
+ *  ixgbe_init_eeprom_params_X540 - Initialize EEPROM params
+ *  @hw: pointer to hardware structure
+ *
+ *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ *  ixgbe_hw struct in order to set up EEPROM access.
+ **/
+s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	u32 eec;
+	u16 eeprom_size;
+
+	if (eeprom->type == ixgbe_eeprom_uninitialized) {
+		eeprom->semaphore_delay = 10;
+		eeprom->type = ixgbe_flash;
+
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+		eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+				    IXGBE_EEC_SIZE_SHIFT);
+		eeprom->word_size = BIT(eeprom_size +
+					IXGBE_EEPROM_WORD_SIZE_SHIFT);
+
+		hw_dbg(hw, "Eeprom params: type = %d, size = %d\n",
+		       eeprom->type, eeprom->word_size);
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbe_read_eerd_X540- Read EEPROM word using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @data: word read from the EEPROM
+ *
+ *  Reads a 16 bit word from the EEPROM using the EERD register.
+ **/
+static s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+	s32 status;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	status = ixgbe_read_eerd_generic(hw, offset, data);
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_read_eerd_buffer_X540 - Read EEPROM word(s) using EERD
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @words: number of words
+ *  @data: word(s) read from the EEPROM
+ *
+ *  Reads a 16 bit word(s) from the EEPROM using the EERD register.
+ **/
+static s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw,
+				       u16 offset, u16 words, u16 *data)
+{
+	s32 status;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	status = ixgbe_read_eerd_buffer_generic(hw, offset, words, data);
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_write_eewr_X540 - Write EEPROM word using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @data: word write to the EEPROM
+ *
+ *  Write a 16 bit word to the EEPROM using the EEWR register.
+ **/
+static s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	s32 status;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	status = ixgbe_write_eewr_generic(hw, offset, data);
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_write_eewr_buffer_X540 - Write EEPROM word(s) using EEWR
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @words: number of words
+ *  @data: word(s) write to the EEPROM
+ *
+ *  Write a 16 bit word(s) to the EEPROM using the EEWR register.
+ **/
+static s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw,
+					u16 offset, u16 words, u16 *data)
+{
+	s32 status;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	status = ixgbe_write_eewr_buffer_generic(hw, offset, words, data);
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ *  ixgbe_calc_eeprom_checksum_X540 - Calculates and returns the checksum
+ *
+ *  This function does not use synchronization for EERD and EEWR. It can
+ *  be used internally by function which utilize ixgbe_acquire_swfw_sync_X540.
+ *
+ *  @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw)
+{
+	u16 i;
+	u16 j;
+	u16 checksum = 0;
+	u16 length = 0;
+	u16 pointer = 0;
+	u16 word = 0;
+	u16 checksum_last_word = IXGBE_EEPROM_CHECKSUM;
+	u16 ptr_start = IXGBE_PCIE_ANALOG_PTR;
+
+	/*
+	 * Do not use hw->eeprom.ops.read because we do not want to take
+	 * the synchronization semaphores here. Instead use
+	 * ixgbe_read_eerd_generic
+	 */
+
+	/* Include 0x0-0x3F in the checksum */
+	for (i = 0; i < checksum_last_word; i++) {
+		if (ixgbe_read_eerd_generic(hw, i, &word)) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			return IXGBE_ERR_EEPROM;
+		}
+		checksum += word;
+	}
+
+	/*
+	 * Include all data from pointers 0x3, 0x6-0xE.  This excludes the
+	 * FW, PHY module, and PCIe Expansion/Option ROM pointers.
+	 */
+	for (i = ptr_start; i < IXGBE_FW_PTR; i++) {
+		if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR)
+			continue;
+
+		if (ixgbe_read_eerd_generic(hw, i, &pointer)) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			break;
+		}
+
+		/* Skip pointer section if the pointer is invalid. */
+		if (pointer == 0xFFFF || pointer == 0 ||
+		    pointer >= hw->eeprom.word_size)
+			continue;
+
+		if (ixgbe_read_eerd_generic(hw, pointer, &length)) {
+			hw_dbg(hw, "EEPROM read failed\n");
+			return IXGBE_ERR_EEPROM;
+		}
+
+		/* Skip pointer section if length is invalid. */
+		if (length == 0xFFFF || length == 0 ||
+		    (pointer + length) >= hw->eeprom.word_size)
+			continue;
+
+		for (j = pointer + 1; j <= pointer + length; j++) {
+			if (ixgbe_read_eerd_generic(hw, j, &word)) {
+				hw_dbg(hw, "EEPROM read failed\n");
+				return IXGBE_ERR_EEPROM;
+			}
+			checksum += word;
+		}
+	}
+
+	checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+	return (s32)checksum;
+}
+
+/**
+ *  ixgbe_validate_eeprom_checksum_X540 - Validate EEPROM checksum
+ *  @hw: pointer to hardware structure
+ *  @checksum_val: calculated checksum
+ *
+ *  Performs checksum calculation and validates the EEPROM checksum.  If the
+ *  caller does not need checksum_val, the value can be NULL.
+ **/
+static s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw,
+					       u16 *checksum_val)
+{
+	s32 status;
+	u16 checksum;
+	u16 read_checksum = 0;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+	if (status) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		return status;
+	}
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	status = hw->eeprom.ops.calc_checksum(hw);
+	if (status < 0)
+		goto out;
+
+	checksum = (u16)(status & 0xffff);
+
+	/* Do not use hw->eeprom.ops.read because we do not want to take
+	 * the synchronization semaphores twice here.
+	 */
+	status = ixgbe_read_eerd_generic(hw, IXGBE_EEPROM_CHECKSUM,
+					 &read_checksum);
+	if (status)
+		goto out;
+
+	/* Verify read checksum from EEPROM is the same as
+	 * calculated checksum
+	 */
+	if (read_checksum != checksum) {
+		hw_dbg(hw, "Invalid EEPROM checksum");
+		status = IXGBE_ERR_EEPROM_CHECKSUM;
+	}
+
+	/* If the user cares, return the calculated checksum */
+	if (checksum_val)
+		*checksum_val = checksum;
+
+out:
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+	return status;
+}
+
+/**
+ * ixgbe_update_eeprom_checksum_X540 - Updates the EEPROM checksum and flash
+ * @hw: pointer to hardware structure
+ *
+ * After writing EEPROM to shadow RAM using EEWR register, software calculates
+ * checksum and updates the EEPROM and instructs the hardware to update
+ * the flash.
+ **/
+static s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 checksum;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+	if (status) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		return status;
+	}
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM))
+		return  IXGBE_ERR_SWFW_SYNC;
+
+	status = hw->eeprom.ops.calc_checksum(hw);
+	if (status < 0)
+		goto out;
+
+	checksum = (u16)(status & 0xffff);
+
+	/* Do not use hw->eeprom.ops.write because we do not want to
+	 * take the synchronization semaphores twice here.
+	 */
+	status = ixgbe_write_eewr_generic(hw, IXGBE_EEPROM_CHECKSUM, checksum);
+	if (status)
+		goto out;
+
+	status = ixgbe_update_flash_X540(hw);
+
+out:
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	return status;
+}
+
+/**
+ * ixgbe_update_flash_X540 - Instruct HW to copy EEPROM to Flash device
+ * @hw: pointer to hardware structure
+ *
+ * Set FLUP (bit 23) of the EEC register to instruct Hardware to copy
+ * EEPROM from shadow RAM to the flash device.
+ **/
+static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw)
+{
+	u32 flup;
+	s32 status;
+
+	status = ixgbe_poll_flash_update_done_X540(hw);
+	if (status == IXGBE_ERR_EEPROM) {
+		hw_dbg(hw, "Flash update time out\n");
+		return status;
+	}
+
+	flup = IXGBE_READ_REG(hw, IXGBE_EEC(hw)) | IXGBE_EEC_FLUP;
+	IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), flup);
+
+	status = ixgbe_poll_flash_update_done_X540(hw);
+	if (status == 0)
+		hw_dbg(hw, "Flash update complete\n");
+	else
+		hw_dbg(hw, "Flash update time out\n");
+
+	if (hw->revision_id == 0) {
+		flup = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+
+		if (flup & IXGBE_EEC_SEC1VAL) {
+			flup |= IXGBE_EEC_FLUP;
+			IXGBE_WRITE_REG(hw, IXGBE_EEC(hw), flup);
+		}
+
+		status = ixgbe_poll_flash_update_done_X540(hw);
+		if (status == 0)
+			hw_dbg(hw, "Flash update complete\n");
+		else
+			hw_dbg(hw, "Flash update time out\n");
+	}
+
+	return status;
+}
+
+/**
+ * ixgbe_poll_flash_update_done_X540 - Poll flash update status
+ * @hw: pointer to hardware structure
+ *
+ * Polls the FLUDONE (bit 26) of the EEC Register to determine when the
+ * flash update is done.
+ **/
+static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw)
+{
+	u32 i;
+	u32 reg;
+
+	for (i = 0; i < IXGBE_FLUDONE_ATTEMPTS; i++) {
+		reg = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+		if (reg & IXGBE_EEC_FLUDONE)
+			return 0;
+		udelay(5);
+	}
+	return IXGBE_ERR_EEPROM;
+}
+
+/**
+ * ixgbe_acquire_swfw_sync_X540 - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore thought the SW_FW_SYNC register for
+ * the specified function (CSR, PHY0, PHY1, NVM, Flash)
+ **/
+s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
+{
+	u32 swmask = mask & IXGBE_GSSR_NVM_PHY_MASK;
+	u32 swi2c_mask = mask & IXGBE_GSSR_I2C_MASK;
+	u32 fwmask = swmask << 5;
+	u32 timeout = 200;
+	u32 hwmask = 0;
+	u32 swfw_sync;
+	u32 i;
+
+	if (swmask & IXGBE_GSSR_EEP_SM)
+		hwmask = IXGBE_GSSR_FLASH_SM;
+
+	/* SW only mask does not have FW bit pair */
+	if (mask & IXGBE_GSSR_SW_MNG_SM)
+		swmask |= IXGBE_GSSR_SW_MNG_SM;
+
+	swmask |= swi2c_mask;
+	fwmask |= swi2c_mask << 2;
+	for (i = 0; i < timeout; i++) {
+		/* SW NVM semaphore bit is used for access to all
+		 * SW_FW_SYNC bits (not just NVM)
+		 */
+		if (ixgbe_get_swfw_sync_semaphore(hw))
+			return IXGBE_ERR_SWFW_SYNC;
+
+		swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw));
+		if (!(swfw_sync & (fwmask | swmask | hwmask))) {
+			swfw_sync |= swmask;
+			IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC(hw), swfw_sync);
+			ixgbe_release_swfw_sync_semaphore(hw);
+			usleep_range(5000, 6000);
+			return 0;
+		}
+		/* Firmware currently using resource (fwmask), hardware
+		 * currently using resource (hwmask), or other software
+		 * thread currently using resource (swmask)
+		 */
+		ixgbe_release_swfw_sync_semaphore(hw);
+		usleep_range(5000, 10000);
+	}
+
+	/* If the resource is not released by the FW/HW the SW can assume that
+	 * the FW/HW malfunctions. In that case the SW should set the SW bit(s)
+	 * of the requested resource(s) while ignoring the corresponding FW/HW
+	 * bits in the SW_FW_SYNC register.
+	 */
+	if (ixgbe_get_swfw_sync_semaphore(hw))
+		return IXGBE_ERR_SWFW_SYNC;
+	swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw));
+	if (swfw_sync & (fwmask | hwmask)) {
+		swfw_sync |= swmask;
+		IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC(hw), swfw_sync);
+		ixgbe_release_swfw_sync_semaphore(hw);
+		usleep_range(5000, 6000);
+		return 0;
+	}
+	/* If the resource is not released by other SW the SW can assume that
+	 * the other SW malfunctions. In that case the SW should clear all SW
+	 * flags that it does not own and then repeat the whole process once
+	 * again.
+	 */
+	if (swfw_sync & swmask) {
+		u32 rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM |
+			    IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM |
+			    IXGBE_GSSR_SW_MNG_SM;
+
+		if (swi2c_mask)
+			rmask |= IXGBE_GSSR_I2C_MASK;
+		ixgbe_release_swfw_sync_X540(hw, rmask);
+		ixgbe_release_swfw_sync_semaphore(hw);
+		return IXGBE_ERR_SWFW_SYNC;
+	}
+	ixgbe_release_swfw_sync_semaphore(hw);
+
+	return IXGBE_ERR_SWFW_SYNC;
+}
+
+/**
+ * ixgbe_release_swfw_sync_X540 - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore through the SW_FW_SYNC register
+ * for the specified function (CSR, PHY0, PHY1, EVM, Flash)
+ **/
+void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
+{
+	u32 swmask = mask & (IXGBE_GSSR_NVM_PHY_MASK | IXGBE_GSSR_SW_MNG_SM);
+	u32 swfw_sync;
+
+	if (mask & IXGBE_GSSR_I2C_MASK)
+		swmask |= mask & IXGBE_GSSR_I2C_MASK;
+	ixgbe_get_swfw_sync_semaphore(hw);
+
+	swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw));
+	swfw_sync &= ~swmask;
+	IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC(hw), swfw_sync);
+
+	ixgbe_release_swfw_sync_semaphore(hw);
+	usleep_range(5000, 6000);
+}
+
+/**
+ * ixgbe_get_swfw_sync_semaphore - Get hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * Sets the hardware semaphores so SW/FW can gain control of shared resources
+ */
+static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw)
+{
+	u32 timeout = 2000;
+	u32 i;
+	u32 swsm;
+
+	/* Get SMBI software semaphore between device drivers first */
+	for (i = 0; i < timeout; i++) {
+		/* If the SMBI bit is 0 when we read it, then the bit will be
+		 * set and we have the semaphore
+		 */
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw));
+		if (!(swsm & IXGBE_SWSM_SMBI))
+			break;
+		usleep_range(50, 100);
+	}
+
+	if (i == timeout) {
+		hw_dbg(hw,
+		       "Software semaphore SMBI between device drivers not granted.\n");
+		return IXGBE_ERR_EEPROM;
+	}
+
+	/* Now get the semaphore between SW/FW through the REGSMP bit */
+	for (i = 0; i < timeout; i++) {
+		swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw));
+		if (!(swsm & IXGBE_SWFW_REGSMP))
+			return 0;
+
+		usleep_range(50, 100);
+	}
+
+	/* Release semaphores and return error if SW NVM semaphore
+	 * was not granted because we do not have access to the EEPROM
+	 */
+	hw_dbg(hw, "REGSMP Software NVM semaphore not granted\n");
+	ixgbe_release_swfw_sync_semaphore(hw);
+	return IXGBE_ERR_EEPROM;
+}
+
+/**
+ * ixgbe_release_swfw_sync_semaphore - Release hardware semaphore
+ * @hw: pointer to hardware structure
+ *
+ * This function clears hardware semaphore bits.
+ **/
+static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw)
+{
+	 u32 swsm;
+
+	/* Release both semaphores by writing 0 to the bits REGSMP and SMBI */
+
+	swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC(hw));
+	swsm &= ~IXGBE_SWFW_REGSMP;
+	IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC(hw), swsm);
+
+	swsm = IXGBE_READ_REG(hw, IXGBE_SWSM(hw));
+	swsm &= ~IXGBE_SWSM_SMBI;
+	IXGBE_WRITE_REG(hw, IXGBE_SWSM(hw), swsm);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ *  ixgbe_init_swfw_sync_X540 - Release hardware semaphore
+ *  @hw: pointer to hardware structure
+ *
+ *  This function reset hardware semaphore bits for a semaphore that may
+ *  have be left locked due to a catastrophic failure.
+ **/
+void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw)
+{
+	u32 rmask;
+
+	/* First try to grab the semaphore but we don't need to bother
+	 * looking to see whether we got the lock or not since we do
+	 * the same thing regardless of whether we got the lock or not.
+	 * We got the lock - we release it.
+	 * We timeout trying to get the lock - we force its release.
+	 */
+	ixgbe_get_swfw_sync_semaphore(hw);
+	ixgbe_release_swfw_sync_semaphore(hw);
+
+	/* Acquire and release all software resources. */
+	rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM |
+		IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM |
+		IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_I2C_MASK;
+
+	ixgbe_acquire_swfw_sync_X540(hw, rmask);
+	ixgbe_release_swfw_sync_X540(hw, rmask);
+}
+
+/**
+ * ixgbe_blink_led_start_X540 - Blink LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to blink
+ *
+ * Devices that implement the version 2 interface:
+ *   X540
+ **/
+s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index)
+{
+	u32 macc_reg;
+	u32 ledctl_reg;
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
+	/* Link should be up in order for the blink bit in the LED control
+	 * register to work. Force link and speed in the MAC if link is down.
+	 * This will be reversed when we stop the blinking.
+	 */
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+	if (!link_up) {
+		macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		macc_reg |= IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
+	}
+	/* Set the LED to LINK_UP + BLINK. */
+	ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
+	ledctl_reg |= IXGBE_LED_BLINK(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+
+/**
+ * ixgbe_blink_led_stop_X540 - Stop blinking LED based on index.
+ * @hw: pointer to hardware structure
+ * @index: led number to stop blinking
+ *
+ * Devices that implement the version 2 interface:
+ *   X540
+ **/
+s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index)
+{
+	u32 macc_reg;
+	u32 ledctl_reg;
+
+	if (index > 3)
+		return IXGBE_ERR_PARAM;
+
+	/* Restore the LED to its default value. */
+	ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+	ledctl_reg &= ~IXGBE_LED_MODE_MASK(index);
+	ledctl_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index);
+	ledctl_reg &= ~IXGBE_LED_BLINK(index);
+	IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg);
+
+	/* Unforce link and speed in the MAC. */
+	macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+	macc_reg &= ~(IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS);
+	IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg);
+	IXGBE_WRITE_FLUSH(hw);
+
+	return 0;
+}
+static const struct ixgbe_mac_operations mac_ops_X540 = {
+	.init_hw                = &ixgbe_init_hw_generic,
+	.reset_hw               = &ixgbe_reset_hw_X540,
+	.start_hw               = &ixgbe_start_hw_X540,
+	.clear_hw_cntrs         = &ixgbe_clear_hw_cntrs_generic,
+	.get_media_type         = &ixgbe_get_media_type_X540,
+	.enable_rx_dma          = &ixgbe_enable_rx_dma_generic,
+	.get_mac_addr           = &ixgbe_get_mac_addr_generic,
+	.get_san_mac_addr       = &ixgbe_get_san_mac_addr_generic,
+	.get_device_caps        = &ixgbe_get_device_caps_generic,
+	.get_wwn_prefix         = &ixgbe_get_wwn_prefix_generic,
+	.stop_adapter           = &ixgbe_stop_adapter_generic,
+	.get_bus_info           = &ixgbe_get_bus_info_generic,
+	.set_lan_id             = &ixgbe_set_lan_id_multi_port_pcie,
+	.read_analog_reg8       = NULL,
+	.write_analog_reg8      = NULL,
+	.setup_link             = &ixgbe_setup_mac_link_X540,
+	.set_rxpba		= &ixgbe_set_rxpba_generic,
+	.check_link             = &ixgbe_check_mac_link_generic,
+	.get_link_capabilities  = &ixgbe_get_copper_link_capabilities_generic,
+	.led_on                 = &ixgbe_led_on_generic,
+	.led_off                = &ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
+	.blink_led_start        = &ixgbe_blink_led_start_X540,
+	.blink_led_stop         = &ixgbe_blink_led_stop_X540,
+	.set_rar                = &ixgbe_set_rar_generic,
+	.clear_rar              = &ixgbe_clear_rar_generic,
+	.set_vmdq               = &ixgbe_set_vmdq_generic,
+	.set_vmdq_san_mac	= &ixgbe_set_vmdq_san_mac_generic,
+	.clear_vmdq             = &ixgbe_clear_vmdq_generic,
+	.init_rx_addrs          = &ixgbe_init_rx_addrs_generic,
+	.update_mc_addr_list    = &ixgbe_update_mc_addr_list_generic,
+	.enable_mc              = &ixgbe_enable_mc_generic,
+	.disable_mc             = &ixgbe_disable_mc_generic,
+	.clear_vfta             = &ixgbe_clear_vfta_generic,
+	.set_vfta               = &ixgbe_set_vfta_generic,
+	.fc_enable              = &ixgbe_fc_enable_generic,
+	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
+	.set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
+	.init_uta_tables        = &ixgbe_init_uta_tables_generic,
+	.setup_sfp              = NULL,
+	.set_mac_anti_spoofing  = &ixgbe_set_mac_anti_spoofing,
+	.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing,
+	.acquire_swfw_sync      = &ixgbe_acquire_swfw_sync_X540,
+	.release_swfw_sync      = &ixgbe_release_swfw_sync_X540,
+	.init_swfw_sync		= &ixgbe_init_swfw_sync_X540,
+	.disable_rx_buff	= &ixgbe_disable_rx_buff_generic,
+	.enable_rx_buff		= &ixgbe_enable_rx_buff_generic,
+	.get_thermal_sensor_data = NULL,
+	.init_thermal_sensor_thresh = NULL,
+	.prot_autoc_read	= &prot_autoc_read_generic,
+	.prot_autoc_write	= &prot_autoc_write_generic,
+	.enable_rx		= &ixgbe_enable_rx_generic,
+	.disable_rx		= &ixgbe_disable_rx_generic,
+};
+
+static const struct ixgbe_eeprom_operations eeprom_ops_X540 = {
+	.init_params            = &ixgbe_init_eeprom_params_X540,
+	.read                   = &ixgbe_read_eerd_X540,
+	.read_buffer		= &ixgbe_read_eerd_buffer_X540,
+	.write                  = &ixgbe_write_eewr_X540,
+	.write_buffer		= &ixgbe_write_eewr_buffer_X540,
+	.calc_checksum		= &ixgbe_calc_eeprom_checksum_X540,
+	.validate_checksum      = &ixgbe_validate_eeprom_checksum_X540,
+	.update_checksum        = &ixgbe_update_eeprom_checksum_X540,
+};
+
+static const struct ixgbe_phy_operations phy_ops_X540 = {
+	.identify               = &ixgbe_identify_phy_generic,
+	.identify_sfp           = &ixgbe_identify_sfp_module_generic,
+	.init			= NULL,
+	.reset                  = NULL,
+	.read_reg               = &ixgbe_read_phy_reg_generic,
+	.write_reg              = &ixgbe_write_phy_reg_generic,
+	.setup_link             = &ixgbe_setup_phy_link_generic,
+	.setup_link_speed       = &ixgbe_setup_phy_link_speed_generic,
+	.read_i2c_byte          = &ixgbe_read_i2c_byte_generic,
+	.write_i2c_byte         = &ixgbe_write_i2c_byte_generic,
+	.read_i2c_sff8472	= &ixgbe_read_i2c_sff8472_generic,
+	.read_i2c_eeprom        = &ixgbe_read_i2c_eeprom_generic,
+	.write_i2c_eeprom       = &ixgbe_write_i2c_eeprom_generic,
+	.check_overtemp         = &ixgbe_tn_check_overtemp,
+	.set_phy_power          = &ixgbe_set_copper_phy_power,
+};
+
+static const u32 ixgbe_mvals_X540[IXGBE_MVALS_IDX_LIMIT] = {
+	IXGBE_MVALS_INIT(X540)
+};
+
+const struct ixgbe_info ixgbe_X540_info = {
+	.mac                    = ixgbe_mac_X540,
+	.get_invariants         = &ixgbe_get_invariants_X540,
+	.mac_ops                = &mac_ops_X540,
+	.eeprom_ops             = &eeprom_ops_X540,
+	.phy_ops                = &phy_ops_X540,
+	.mbx_ops                = &mbx_ops_generic,
+	.mvals			= ixgbe_mvals_X540,
+};
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
new file mode 100644
index 0000000000..e246c0d2a4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe_type.h"
+
+s32 ixgbe_get_invariants_X540(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			      bool autoneg_wait_to_complete);
+s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw);
+s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw);
+enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw);
+s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			      bool autoneg_wait_to_complete);
+s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask);
+void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask);
+void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw);
+s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
new file mode 100644
index 0000000000..aa4bf6c9a2
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -0,0 +1,4150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "ixgbe_x540.h"
+#include "ixgbe_type.h"
+#include "ixgbe_common.h"
+#include "ixgbe_phy.h"
+
+static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed);
+static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *);
+static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *);
+static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *);
+static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *);
+
+static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+	struct ixgbe_link_info *link = &hw->link;
+
+	/* Start with X540 invariants, since so simular */
+	ixgbe_get_invariants_X540(hw);
+
+	if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper)
+		phy->ops.set_phy_power = NULL;
+
+	link->addr = IXGBE_CS4227;
+
+	return 0;
+}
+
+static s32 ixgbe_get_invariants_X550_x_fw(struct ixgbe_hw *hw)
+{
+	struct ixgbe_phy_info *phy = &hw->phy;
+
+	/* Start with X540 invariants, since so similar */
+	ixgbe_get_invariants_X540(hw);
+
+	phy->ops.set_phy_power = NULL;
+
+	return 0;
+}
+
+static s32 ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
+
+	/* Start with X540 invariants, since so simular */
+	ixgbe_get_invariants_X540(hw);
+
+	if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper)
+		phy->ops.set_phy_power = NULL;
+
+	return 0;
+}
+
+static s32 ixgbe_get_invariants_X550_a_fw(struct ixgbe_hw *hw)
+{
+	struct ixgbe_phy_info *phy = &hw->phy;
+
+	/* Start with X540 invariants, since so similar */
+	ixgbe_get_invariants_X540(hw);
+
+	phy->ops.set_phy_power = NULL;
+
+	return 0;
+}
+
+/** ixgbe_setup_mux_ctl - Setup ESDP register for I2C mux control
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_setup_mux_ctl(struct ixgbe_hw *hw)
+{
+	u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+
+	if (hw->bus.lan_id) {
+		esdp &= ~(IXGBE_ESDP_SDP1_NATIVE | IXGBE_ESDP_SDP1);
+		esdp |= IXGBE_ESDP_SDP1_DIR;
+	}
+	esdp &= ~(IXGBE_ESDP_SDP0_NATIVE | IXGBE_ESDP_SDP0_DIR);
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_read_cs4227 - Read CS4227 register
+ * @hw: pointer to hardware structure
+ * @reg: register number to write
+ * @value: pointer to receive value read
+ *
+ * Returns status code
+ */
+static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
+{
+	return hw->link.ops.read_link_unlocked(hw, hw->link.addr, reg, value);
+}
+
+/**
+ * ixgbe_write_cs4227 - Write CS4227 register
+ * @hw: pointer to hardware structure
+ * @reg: register number to write
+ * @value: value to write to register
+ *
+ * Returns status code
+ */
+static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
+{
+	return hw->link.ops.write_link_unlocked(hw, hw->link.addr, reg, value);
+}
+
+/**
+ * ixgbe_read_pe - Read register from port expander
+ * @hw: pointer to hardware structure
+ * @reg: register number to read
+ * @value: pointer to receive read value
+ *
+ * Returns status code
+ */
+static s32 ixgbe_read_pe(struct ixgbe_hw *hw, u8 reg, u8 *value)
+{
+	s32 status;
+
+	status = ixgbe_read_i2c_byte_generic_unlocked(hw, reg, IXGBE_PE, value);
+	if (status)
+		hw_err(hw, "port expander access failed with %d\n", status);
+	return status;
+}
+
+/**
+ * ixgbe_write_pe - Write register to port expander
+ * @hw: pointer to hardware structure
+ * @reg: register number to write
+ * @value: value to write
+ *
+ * Returns status code
+ */
+static s32 ixgbe_write_pe(struct ixgbe_hw *hw, u8 reg, u8 value)
+{
+	s32 status;
+
+	status = ixgbe_write_i2c_byte_generic_unlocked(hw, reg, IXGBE_PE,
+						       value);
+	if (status)
+		hw_err(hw, "port expander access failed with %d\n", status);
+	return status;
+}
+
+/**
+ * ixgbe_reset_cs4227 - Reset CS4227 using port expander
+ * @hw: pointer to hardware structure
+ *
+ * This function assumes that the caller has acquired the proper semaphore.
+ * Returns error code
+ */
+static s32 ixgbe_reset_cs4227(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u32 retry;
+	u16 value;
+	u8 reg;
+
+	/* Trigger hard reset. */
+	status = ixgbe_read_pe(hw, IXGBE_PE_OUTPUT, &reg);
+	if (status)
+		return status;
+	reg |= IXGBE_PE_BIT1;
+	status = ixgbe_write_pe(hw, IXGBE_PE_OUTPUT, reg);
+	if (status)
+		return status;
+
+	status = ixgbe_read_pe(hw, IXGBE_PE_CONFIG, &reg);
+	if (status)
+		return status;
+	reg &= ~IXGBE_PE_BIT1;
+	status = ixgbe_write_pe(hw, IXGBE_PE_CONFIG, reg);
+	if (status)
+		return status;
+
+	status = ixgbe_read_pe(hw, IXGBE_PE_OUTPUT, &reg);
+	if (status)
+		return status;
+	reg &= ~IXGBE_PE_BIT1;
+	status = ixgbe_write_pe(hw, IXGBE_PE_OUTPUT, reg);
+	if (status)
+		return status;
+
+	usleep_range(IXGBE_CS4227_RESET_HOLD, IXGBE_CS4227_RESET_HOLD + 100);
+
+	status = ixgbe_read_pe(hw, IXGBE_PE_OUTPUT, &reg);
+	if (status)
+		return status;
+	reg |= IXGBE_PE_BIT1;
+	status = ixgbe_write_pe(hw, IXGBE_PE_OUTPUT, reg);
+	if (status)
+		return status;
+
+	/* Wait for the reset to complete. */
+	msleep(IXGBE_CS4227_RESET_DELAY);
+	for (retry = 0; retry < IXGBE_CS4227_RETRIES; retry++) {
+		status = ixgbe_read_cs4227(hw, IXGBE_CS4227_EFUSE_STATUS,
+					   &value);
+		if (!status && value == IXGBE_CS4227_EEPROM_LOAD_OK)
+			break;
+		msleep(IXGBE_CS4227_CHECK_DELAY);
+	}
+	if (retry == IXGBE_CS4227_RETRIES) {
+		hw_err(hw, "CS4227 reset did not complete\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	status = ixgbe_read_cs4227(hw, IXGBE_CS4227_EEPROM_STATUS, &value);
+	if (status || !(value & IXGBE_CS4227_EEPROM_LOAD_OK)) {
+		hw_err(hw, "CS4227 EEPROM did not load successfully\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_check_cs4227 - Check CS4227 and reset as needed
+ * @hw: pointer to hardware structure
+ */
+static void ixgbe_check_cs4227(struct ixgbe_hw *hw)
+{
+	u32 swfw_mask = hw->phy.phy_semaphore_mask;
+	s32 status;
+	u16 value;
+	u8 retry;
+
+	for (retry = 0; retry < IXGBE_CS4227_RETRIES; retry++) {
+		status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+		if (status) {
+			hw_err(hw, "semaphore failed with %d\n", status);
+			msleep(IXGBE_CS4227_CHECK_DELAY);
+			continue;
+		}
+
+		/* Get status of reset flow. */
+		status = ixgbe_read_cs4227(hw, IXGBE_CS4227_SCRATCH, &value);
+		if (!status && value == IXGBE_CS4227_RESET_COMPLETE)
+			goto out;
+
+		if (status || value != IXGBE_CS4227_RESET_PENDING)
+			break;
+
+		/* Reset is pending. Wait and check again. */
+		hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+		msleep(IXGBE_CS4227_CHECK_DELAY);
+	}
+	/* If still pending, assume other instance failed. */
+	if (retry == IXGBE_CS4227_RETRIES) {
+		status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+		if (status) {
+			hw_err(hw, "semaphore failed with %d\n", status);
+			return;
+		}
+	}
+
+	/* Reset the CS4227. */
+	status = ixgbe_reset_cs4227(hw);
+	if (status) {
+		hw_err(hw, "CS4227 reset failed: %d", status);
+		goto out;
+	}
+
+	/* Reset takes so long, temporarily release semaphore in case the
+	 * other driver instance is waiting for the reset indication.
+	 */
+	ixgbe_write_cs4227(hw, IXGBE_CS4227_SCRATCH,
+			   IXGBE_CS4227_RESET_PENDING);
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+	usleep_range(10000, 12000);
+	status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+	if (status) {
+		hw_err(hw, "semaphore failed with %d", status);
+		return;
+	}
+
+	/* Record completion for next time. */
+	status = ixgbe_write_cs4227(hw, IXGBE_CS4227_SCRATCH,
+				    IXGBE_CS4227_RESET_COMPLETE);
+
+out:
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+	msleep(hw->eeprom.semaphore_delay);
+}
+
+/** ixgbe_identify_phy_x550em - Get PHY type based on device id
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns error code
+ */
+static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
+{
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X550EM_A_SFP:
+		if (hw->bus.lan_id)
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
+		else
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
+		return ixgbe_identify_module_generic(hw);
+	case IXGBE_DEV_ID_X550EM_X_SFP:
+		/* set up for CS4227 usage */
+		hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM;
+		ixgbe_setup_mux_ctl(hw);
+		ixgbe_check_cs4227(hw);
+		fallthrough;
+	case IXGBE_DEV_ID_X550EM_A_SFP_N:
+		return ixgbe_identify_module_generic(hw);
+	case IXGBE_DEV_ID_X550EM_X_KX4:
+		hw->phy.type = ixgbe_phy_x550em_kx4;
+		break;
+	case IXGBE_DEV_ID_X550EM_X_XFI:
+		hw->phy.type = ixgbe_phy_x550em_xfi;
+		break;
+	case IXGBE_DEV_ID_X550EM_X_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR_L:
+		hw->phy.type = ixgbe_phy_x550em_kr;
+		break;
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
+		if (hw->bus.lan_id)
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
+		else
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
+		fallthrough;
+	case IXGBE_DEV_ID_X550EM_X_10G_T:
+		return ixgbe_identify_phy_generic(hw);
+	case IXGBE_DEV_ID_X550EM_X_1G_T:
+		hw->phy.type = ixgbe_phy_ext_1g_t;
+		break;
+	case IXGBE_DEV_ID_X550EM_A_1G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+		hw->phy.type = ixgbe_phy_fw;
+		hw->phy.ops.read_reg = NULL;
+		hw->phy.ops.write_reg = NULL;
+		if (hw->bus.lan_id)
+			hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM;
+		else
+			hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
+				     u32 device_type, u16 *phy_data)
+{
+	return IXGBE_NOT_IMPLEMENTED;
+}
+
+static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
+				      u32 device_type, u16 phy_data)
+{
+	return IXGBE_NOT_IMPLEMENTED;
+}
+
+/**
+ * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
+					   u16 reg, u16 *val)
+{
+	return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true);
+}
+
+/**
+ * ixgbe_read_i2c_combined_generic_unlocked - Do I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
+					 u16 reg, u16 *val)
+{
+	return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
+					    u8 addr, u16 reg, u16 val)
+{
+	return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic_unlocked - Do I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
+					  u8 addr, u16 reg, u16 val)
+{
+	return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false);
+}
+
+/**
+ * ixgbe_fw_phy_activity - Perform an activity on a PHY
+ * @hw: pointer to hardware structure
+ * @activity: activity to perform
+ * @data: Pointer to 4 32-bit words of data
+ */
+s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity,
+			  u32 (*data)[FW_PHY_ACT_DATA_COUNT])
+{
+	union {
+		struct ixgbe_hic_phy_activity_req cmd;
+		struct ixgbe_hic_phy_activity_resp rsp;
+	} hic;
+	u16 retries = FW_PHY_ACT_RETRIES;
+	s32 rc;
+	u32 i;
+
+	do {
+		memset(&hic, 0, sizeof(hic));
+		hic.cmd.hdr.cmd = FW_PHY_ACT_REQ_CMD;
+		hic.cmd.hdr.buf_len = FW_PHY_ACT_REQ_LEN;
+		hic.cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+		hic.cmd.port_number = hw->bus.lan_id;
+		hic.cmd.activity_id = cpu_to_le16(activity);
+		for (i = 0; i < ARRAY_SIZE(hic.cmd.data); ++i)
+			hic.cmd.data[i] = cpu_to_be32((*data)[i]);
+
+		rc = ixgbe_host_interface_command(hw, &hic.cmd, sizeof(hic.cmd),
+						  IXGBE_HI_COMMAND_TIMEOUT,
+						  true);
+		if (rc)
+			return rc;
+		if (hic.rsp.hdr.cmd_or_resp.ret_status ==
+		    FW_CEM_RESP_STATUS_SUCCESS) {
+			for (i = 0; i < FW_PHY_ACT_DATA_COUNT; ++i)
+				(*data)[i] = be32_to_cpu(hic.rsp.data[i]);
+			return 0;
+		}
+		usleep_range(20, 30);
+		--retries;
+	} while (retries > 0);
+
+	return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+}
+
+static const struct {
+	u16 fw_speed;
+	ixgbe_link_speed phy_speed;
+} ixgbe_fw_map[] = {
+	{ FW_PHY_ACT_LINK_SPEED_10, IXGBE_LINK_SPEED_10_FULL },
+	{ FW_PHY_ACT_LINK_SPEED_100, IXGBE_LINK_SPEED_100_FULL },
+	{ FW_PHY_ACT_LINK_SPEED_1G, IXGBE_LINK_SPEED_1GB_FULL },
+	{ FW_PHY_ACT_LINK_SPEED_2_5G, IXGBE_LINK_SPEED_2_5GB_FULL },
+	{ FW_PHY_ACT_LINK_SPEED_5G, IXGBE_LINK_SPEED_5GB_FULL },
+	{ FW_PHY_ACT_LINK_SPEED_10G, IXGBE_LINK_SPEED_10GB_FULL },
+};
+
+/**
+ * ixgbe_get_phy_id_fw - Get the phy ID via firmware command
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ */
+static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw)
+{
+	u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
+	u16 phy_speeds;
+	u16 phy_id_lo;
+	s32 rc;
+	u16 i;
+
+	if (hw->phy.id)
+		return 0;
+
+	rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_PHY_INFO, &info);
+	if (rc)
+		return rc;
+
+	hw->phy.speeds_supported = 0;
+	phy_speeds = info[0] & FW_PHY_INFO_SPEED_MASK;
+	for (i = 0; i < ARRAY_SIZE(ixgbe_fw_map); ++i) {
+		if (phy_speeds & ixgbe_fw_map[i].fw_speed)
+			hw->phy.speeds_supported |= ixgbe_fw_map[i].phy_speed;
+	}
+
+	hw->phy.id = info[0] & FW_PHY_INFO_ID_HI_MASK;
+	phy_id_lo = info[1] & FW_PHY_INFO_ID_LO_MASK;
+	hw->phy.id |= phy_id_lo & IXGBE_PHY_REVISION_MASK;
+	hw->phy.revision = phy_id_lo & ~IXGBE_PHY_REVISION_MASK;
+	if (!hw->phy.id || hw->phy.id == IXGBE_PHY_REVISION_MASK)
+		return IXGBE_ERR_PHY_ADDR_INVALID;
+
+	hw->phy.autoneg_advertised = hw->phy.speeds_supported;
+	hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_100_FULL |
+				       IXGBE_LINK_SPEED_1GB_FULL;
+	hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported;
+	return 0;
+}
+
+/**
+ * ixgbe_identify_phy_fw - Get PHY type based on firmware command
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ */
+static s32 ixgbe_identify_phy_fw(struct ixgbe_hw *hw)
+{
+	if (hw->bus.lan_id)
+		hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
+	else
+		hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
+
+	hw->phy.type = ixgbe_phy_fw;
+	hw->phy.ops.read_reg = NULL;
+	hw->phy.ops.write_reg = NULL;
+	return ixgbe_get_phy_id_fw(hw);
+}
+
+/**
+ * ixgbe_shutdown_fw_phy - Shutdown a firmware-controlled PHY
+ * @hw: pointer to hardware structure
+ *
+ * Returns error code
+ */
+static s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *hw)
+{
+	u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 };
+
+	setup[0] = FW_PHY_ACT_FORCE_LINK_DOWN_OFF;
+	return ixgbe_fw_phy_activity(hw, FW_PHY_ACT_FORCE_LINK_DOWN, &setup);
+}
+
+/**
+ * ixgbe_setup_fw_link - Setup firmware-controlled PHYs
+ * @hw: pointer to hardware structure
+ */
+static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw)
+{
+	u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 };
+	s32 rc;
+	u16 i;
+
+	if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+		hw_err(hw, "rx_pause not valid in strict IEEE mode\n");
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+	}
+
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_full:
+		setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX <<
+			    FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT;
+		break;
+	case ixgbe_fc_rx_pause:
+		setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RX <<
+			    FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT;
+		break;
+	case ixgbe_fc_tx_pause:
+		setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_TX <<
+			    FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT;
+		break;
+	default:
+		break;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ixgbe_fw_map); ++i) {
+		if (hw->phy.autoneg_advertised & ixgbe_fw_map[i].phy_speed)
+			setup[0] |= ixgbe_fw_map[i].fw_speed;
+	}
+	setup[0] |= FW_PHY_ACT_SETUP_LINK_HP | FW_PHY_ACT_SETUP_LINK_AN;
+
+	if (hw->phy.eee_speeds_advertised)
+		setup[0] |= FW_PHY_ACT_SETUP_LINK_EEE;
+
+	rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_SETUP_LINK, &setup);
+	if (rc)
+		return rc;
+	if (setup[0] == FW_PHY_ACT_SETUP_LINK_RSP_DOWN)
+		return IXGBE_ERR_OVERTEMP;
+	return 0;
+}
+
+/**
+ * ixgbe_fc_autoneg_fw - Set up flow control for FW-controlled PHYs
+ * @hw: pointer to hardware structure
+ *
+ * Called at init time to set up flow control.
+ */
+static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw)
+{
+	if (hw->fc.requested_mode == ixgbe_fc_default)
+		hw->fc.requested_mode = ixgbe_fc_full;
+
+	return ixgbe_setup_fw_link(hw);
+}
+
+/** ixgbe_init_eeprom_params_X550 - Initialize EEPROM params
+ *  @hw: pointer to hardware structure
+ *
+ *  Initializes the EEPROM parameters ixgbe_eeprom_info within the
+ *  ixgbe_hw struct in order to set up EEPROM access.
+ **/
+static s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw)
+{
+	struct ixgbe_eeprom_info *eeprom = &hw->eeprom;
+	u32 eec;
+	u16 eeprom_size;
+
+	if (eeprom->type == ixgbe_eeprom_uninitialized) {
+		eeprom->semaphore_delay = 10;
+		eeprom->type = ixgbe_flash;
+
+		eec = IXGBE_READ_REG(hw, IXGBE_EEC(hw));
+		eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >>
+				    IXGBE_EEC_SIZE_SHIFT);
+		eeprom->word_size = BIT(eeprom_size +
+					IXGBE_EEPROM_WORD_SIZE_SHIFT);
+
+		hw_dbg(hw, "Eeprom params: type = %d, size = %d\n",
+		       eeprom->type, eeprom->word_size);
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_iosf_wait - Wait for IOSF command completion
+ * @hw: pointer to hardware structure
+ * @ctrl: pointer to location to receive final IOSF control value
+ *
+ * Return: failing status on timeout
+ *
+ * Note: ctrl can be NULL if the IOSF control register value is not needed
+ */
+static s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl)
+{
+	u32 i, command;
+
+	/* Check every 10 usec to see if the address cycle completed.
+	 * The SB IOSF BUSY bit will clear when the operation is
+	 * complete.
+	 */
+	for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) {
+		command = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL);
+		if (!(command & IXGBE_SB_IOSF_CTRL_BUSY))
+			break;
+		udelay(10);
+	}
+	if (ctrl)
+		*ctrl = command;
+	if (i == IXGBE_MDIO_COMMAND_TIMEOUT) {
+		hw_dbg(hw, "IOSF wait timed out\n");
+		return IXGBE_ERR_PHY;
+	}
+
+	return 0;
+}
+
+/** ixgbe_read_iosf_sb_reg_x550 - Writes a value to specified register of the
+ *  IOSF device
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @device_type: 3 bit device type
+ *  @phy_data: Pointer to read data from the register
+ **/
+static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+				       u32 device_type, u32 *data)
+{
+	u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM;
+	u32 command, error;
+	s32 ret;
+
+	ret = hw->mac.ops.acquire_swfw_sync(hw, gssr);
+	if (ret)
+		return ret;
+
+	ret = ixgbe_iosf_wait(hw, NULL);
+	if (ret)
+		goto out;
+
+	command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) |
+		   (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT));
+
+	/* Write IOSF control register */
+	IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL, command);
+
+	ret = ixgbe_iosf_wait(hw, &command);
+
+	if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+		error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+			 IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
+		hw_dbg(hw, "Failed to read, error %x\n", error);
+		return IXGBE_ERR_PHY;
+	}
+
+	if (!ret)
+		*data = IXGBE_READ_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA);
+
+out:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return ret;
+}
+
+/**
+ * ixgbe_get_phy_token - Get the token for shared PHY access
+ * @hw: Pointer to hardware structure
+ */
+static s32 ixgbe_get_phy_token(struct ixgbe_hw *hw)
+{
+	struct ixgbe_hic_phy_token_req token_cmd;
+	s32 status;
+
+	token_cmd.hdr.cmd = FW_PHY_TOKEN_REQ_CMD;
+	token_cmd.hdr.buf_len = FW_PHY_TOKEN_REQ_LEN;
+	token_cmd.hdr.cmd_or_resp.cmd_resv = 0;
+	token_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+	token_cmd.port_number = hw->bus.lan_id;
+	token_cmd.command_type = FW_PHY_TOKEN_REQ;
+	token_cmd.pad = 0;
+	status = ixgbe_host_interface_command(hw, &token_cmd, sizeof(token_cmd),
+					      IXGBE_HI_COMMAND_TIMEOUT,
+					      true);
+	if (status)
+		return status;
+	if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK)
+		return 0;
+	if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY)
+		return IXGBE_ERR_FW_RESP_INVALID;
+
+	return IXGBE_ERR_TOKEN_RETRY;
+}
+
+/**
+ * ixgbe_put_phy_token - Put the token for shared PHY access
+ * @hw: Pointer to hardware structure
+ */
+static s32 ixgbe_put_phy_token(struct ixgbe_hw *hw)
+{
+	struct ixgbe_hic_phy_token_req token_cmd;
+	s32 status;
+
+	token_cmd.hdr.cmd = FW_PHY_TOKEN_REQ_CMD;
+	token_cmd.hdr.buf_len = FW_PHY_TOKEN_REQ_LEN;
+	token_cmd.hdr.cmd_or_resp.cmd_resv = 0;
+	token_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+	token_cmd.port_number = hw->bus.lan_id;
+	token_cmd.command_type = FW_PHY_TOKEN_REL;
+	token_cmd.pad = 0;
+	status = ixgbe_host_interface_command(hw, &token_cmd, sizeof(token_cmd),
+					      IXGBE_HI_COMMAND_TIMEOUT,
+					      true);
+	if (status)
+		return status;
+	if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK)
+		return 0;
+	return IXGBE_ERR_FW_RESP_INVALID;
+}
+
+/**
+ *  ixgbe_write_iosf_sb_reg_x550a - Write to IOSF PHY register
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @device_type: 3 bit device type
+ *  @data: Data to write to the register
+ **/
+static s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+					 __always_unused u32 device_type,
+					 u32 data)
+{
+	struct ixgbe_hic_internal_phy_req write_cmd;
+
+	memset(&write_cmd, 0, sizeof(write_cmd));
+	write_cmd.hdr.cmd = FW_INT_PHY_REQ_CMD;
+	write_cmd.hdr.buf_len = FW_INT_PHY_REQ_LEN;
+	write_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+	write_cmd.port_number = hw->bus.lan_id;
+	write_cmd.command_type = FW_INT_PHY_REQ_WRITE;
+	write_cmd.address = cpu_to_be16(reg_addr);
+	write_cmd.write_data = cpu_to_be32(data);
+
+	return ixgbe_host_interface_command(hw, &write_cmd, sizeof(write_cmd),
+					    IXGBE_HI_COMMAND_TIMEOUT, false);
+}
+
+/**
+ *  ixgbe_read_iosf_sb_reg_x550a - Read from IOSF PHY register
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @device_type: 3 bit device type
+ *  @data: Pointer to read data from the register
+ **/
+static s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+					__always_unused u32 device_type,
+					u32 *data)
+{
+	union {
+		struct ixgbe_hic_internal_phy_req cmd;
+		struct ixgbe_hic_internal_phy_resp rsp;
+	} hic;
+	s32 status;
+
+	memset(&hic, 0, sizeof(hic));
+	hic.cmd.hdr.cmd = FW_INT_PHY_REQ_CMD;
+	hic.cmd.hdr.buf_len = FW_INT_PHY_REQ_LEN;
+	hic.cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+	hic.cmd.port_number = hw->bus.lan_id;
+	hic.cmd.command_type = FW_INT_PHY_REQ_READ;
+	hic.cmd.address = cpu_to_be16(reg_addr);
+
+	status = ixgbe_host_interface_command(hw, &hic.cmd, sizeof(hic.cmd),
+					      IXGBE_HI_COMMAND_TIMEOUT, true);
+
+	/* Extract the register value from the response. */
+	*data = be32_to_cpu(hic.rsp.read_data);
+
+	return status;
+}
+
+/** ixgbe_read_ee_hostif_buffer_X550- Read EEPROM word(s) using hostif
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @words: number of words
+ *  @data: word(s) read from the EEPROM
+ *
+ *  Reads a 16 bit word(s) from the EEPROM using the hostif.
+ **/
+static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+					    u16 offset, u16 words, u16 *data)
+{
+	const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM;
+	struct ixgbe_hic_read_shadow_ram buffer;
+	u32 current_word = 0;
+	u16 words_to_read;
+	s32 status;
+	u32 i;
+
+	/* Take semaphore for the entire operation. */
+	status = hw->mac.ops.acquire_swfw_sync(hw, mask);
+	if (status) {
+		hw_dbg(hw, "EEPROM read buffer - semaphore failed\n");
+		return status;
+	}
+
+	while (words) {
+		if (words > FW_MAX_READ_BUFFER_SIZE / 2)
+			words_to_read = FW_MAX_READ_BUFFER_SIZE / 2;
+		else
+			words_to_read = words;
+
+		buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
+		buffer.hdr.req.buf_lenh = 0;
+		buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
+		buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+		/* convert offset from words to bytes */
+		buffer.address = (__force u32)cpu_to_be32((offset +
+							   current_word) * 2);
+		buffer.length = (__force u16)cpu_to_be16(words_to_read * 2);
+		buffer.pad2 = 0;
+		buffer.pad3 = 0;
+
+		status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
+					    IXGBE_HI_COMMAND_TIMEOUT);
+		if (status) {
+			hw_dbg(hw, "Host interface command failed\n");
+			goto out;
+		}
+
+		for (i = 0; i < words_to_read; i++) {
+			u32 reg = IXGBE_FLEX_MNG + (FW_NVM_DATA_OFFSET << 2) +
+				  2 * i;
+			u32 value = IXGBE_READ_REG(hw, reg);
+
+			data[current_word] = (u16)(value & 0xffff);
+			current_word++;
+			i++;
+			if (i < words_to_read) {
+				value >>= 16;
+				data[current_word] = (u16)(value & 0xffff);
+				current_word++;
+			}
+		}
+		words -= words_to_read;
+	}
+
+out:
+	hw->mac.ops.release_swfw_sync(hw, mask);
+	return status;
+}
+
+/** ixgbe_checksum_ptr_x550 - Checksum one pointer region
+ *  @hw: pointer to hardware structure
+ *  @ptr: pointer offset in eeprom
+ *  @size: size of section pointed by ptr, if 0 first word will be used as size
+ *  @csum: address of checksum to update
+ *
+ *  Returns error status for any failure
+ **/
+static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr,
+				   u16 size, u16 *csum, u16 *buffer,
+				   u32 buffer_size)
+{
+	u16 buf[256];
+	s32 status;
+	u16 length, bufsz, i, start;
+	u16 *local_buffer;
+
+	bufsz = ARRAY_SIZE(buf);
+
+	/* Read a chunk at the pointer location */
+	if (!buffer) {
+		status = ixgbe_read_ee_hostif_buffer_X550(hw, ptr, bufsz, buf);
+		if (status) {
+			hw_dbg(hw, "Failed to read EEPROM image\n");
+			return status;
+		}
+		local_buffer = buf;
+	} else {
+		if (buffer_size < ptr)
+			return  IXGBE_ERR_PARAM;
+		local_buffer = &buffer[ptr];
+	}
+
+	if (size) {
+		start = 0;
+		length = size;
+	} else {
+		start = 1;
+		length = local_buffer[0];
+
+		/* Skip pointer section if length is invalid. */
+		if (length == 0xFFFF || length == 0 ||
+		    (ptr + length) >= hw->eeprom.word_size)
+			return 0;
+	}
+
+	if (buffer && ((u32)start + (u32)length > buffer_size))
+		return IXGBE_ERR_PARAM;
+
+	for (i = start; length; i++, length--) {
+		if (i == bufsz && !buffer) {
+			ptr += bufsz;
+			i = 0;
+			if (length < bufsz)
+				bufsz = length;
+
+			/* Read a chunk at the pointer location */
+			status = ixgbe_read_ee_hostif_buffer_X550(hw, ptr,
+								  bufsz, buf);
+			if (status) {
+				hw_dbg(hw, "Failed to read EEPROM image\n");
+				return status;
+			}
+		}
+		*csum += local_buffer[i];
+	}
+	return 0;
+}
+
+/** ixgbe_calc_checksum_X550 - Calculates and returns the checksum
+ *  @hw: pointer to hardware structure
+ *  @buffer: pointer to buffer containing calculated checksum
+ *  @buffer_size: size of buffer
+ *
+ *  Returns a negative error code on error, or the 16-bit checksum
+ **/
+static s32 ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer,
+				    u32 buffer_size)
+{
+	u16 eeprom_ptrs[IXGBE_EEPROM_LAST_WORD + 1];
+	u16 *local_buffer;
+	s32 status;
+	u16 checksum = 0;
+	u16 pointer, i, size;
+
+	hw->eeprom.ops.init_params(hw);
+
+	if (!buffer) {
+		/* Read pointer area */
+		status = ixgbe_read_ee_hostif_buffer_X550(hw, 0,
+						IXGBE_EEPROM_LAST_WORD + 1,
+						eeprom_ptrs);
+		if (status) {
+			hw_dbg(hw, "Failed to read EEPROM image\n");
+			return status;
+		}
+		local_buffer = eeprom_ptrs;
+	} else {
+		if (buffer_size < IXGBE_EEPROM_LAST_WORD)
+			return IXGBE_ERR_PARAM;
+		local_buffer = buffer;
+	}
+
+	/* For X550 hardware include 0x0-0x41 in the checksum, skip the
+	 * checksum word itself
+	 */
+	for (i = 0; i <= IXGBE_EEPROM_LAST_WORD; i++)
+		if (i != IXGBE_EEPROM_CHECKSUM)
+			checksum += local_buffer[i];
+
+	/* Include all data from pointers 0x3, 0x6-0xE.  This excludes the
+	 * FW, PHY module, and PCIe Expansion/Option ROM pointers.
+	 */
+	for (i = IXGBE_PCIE_ANALOG_PTR_X550; i < IXGBE_FW_PTR; i++) {
+		if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR)
+			continue;
+
+		pointer = local_buffer[i];
+
+		/* Skip pointer section if the pointer is invalid. */
+		if (pointer == 0xFFFF || pointer == 0 ||
+		    pointer >= hw->eeprom.word_size)
+			continue;
+
+		switch (i) {
+		case IXGBE_PCIE_GENERAL_PTR:
+			size = IXGBE_IXGBE_PCIE_GENERAL_SIZE;
+			break;
+		case IXGBE_PCIE_CONFIG0_PTR:
+		case IXGBE_PCIE_CONFIG1_PTR:
+			size = IXGBE_PCIE_CONFIG_SIZE;
+			break;
+		default:
+			size = 0;
+			break;
+		}
+
+		status = ixgbe_checksum_ptr_x550(hw, pointer, size, &checksum,
+						 buffer, buffer_size);
+		if (status)
+			return status;
+	}
+
+	checksum = (u16)IXGBE_EEPROM_SUM - checksum;
+
+	return (s32)checksum;
+}
+
+/** ixgbe_calc_eeprom_checksum_X550 - Calculates and returns the checksum
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns a negative error code on error, or the 16-bit checksum
+ **/
+static s32 ixgbe_calc_eeprom_checksum_X550(struct ixgbe_hw *hw)
+{
+	return ixgbe_calc_checksum_X550(hw, NULL, 0);
+}
+
+/** ixgbe_read_ee_hostif_X550 - Read EEPROM word using a host interface command
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to read
+ *  @data: word read from the EEPROM
+ *
+ *   Reads a 16 bit word from the EEPROM using the hostif.
+ **/
+static s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data)
+{
+	const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM;
+	struct ixgbe_hic_read_shadow_ram buffer;
+	s32 status;
+
+	buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD;
+	buffer.hdr.req.buf_lenh = 0;
+	buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN;
+	buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+	/* convert offset from words to bytes */
+	buffer.address = (__force u32)cpu_to_be32(offset * 2);
+	/* one word */
+	buffer.length = (__force u16)cpu_to_be16(sizeof(u16));
+
+	status = hw->mac.ops.acquire_swfw_sync(hw, mask);
+	if (status)
+		return status;
+
+	status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
+				    IXGBE_HI_COMMAND_TIMEOUT);
+	if (!status) {
+		*data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG,
+						  FW_NVM_DATA_OFFSET);
+	}
+
+	hw->mac.ops.release_swfw_sync(hw, mask);
+	return status;
+}
+
+/** ixgbe_validate_eeprom_checksum_X550 - Validate EEPROM checksum
+ *  @hw: pointer to hardware structure
+ *  @checksum_val: calculated checksum
+ *
+ *  Performs checksum calculation and validates the EEPROM checksum.  If the
+ *  caller does not need checksum_val, the value can be NULL.
+ **/
+static s32 ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw,
+					       u16 *checksum_val)
+{
+	s32 status;
+	u16 checksum;
+	u16 read_checksum = 0;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = hw->eeprom.ops.read(hw, 0, &checksum);
+	if (status) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		return status;
+	}
+
+	status = hw->eeprom.ops.calc_checksum(hw);
+	if (status < 0)
+		return status;
+
+	checksum = (u16)(status & 0xffff);
+
+	status = ixgbe_read_ee_hostif_X550(hw, IXGBE_EEPROM_CHECKSUM,
+					   &read_checksum);
+	if (status)
+		return status;
+
+	/* Verify read checksum from EEPROM is the same as
+	 * calculated checksum
+	 */
+	if (read_checksum != checksum) {
+		status = IXGBE_ERR_EEPROM_CHECKSUM;
+		hw_dbg(hw, "Invalid EEPROM checksum");
+	}
+
+	/* If the user cares, return the calculated checksum */
+	if (checksum_val)
+		*checksum_val = checksum;
+
+	return status;
+}
+
+/** ixgbe_write_ee_hostif_X550 - Write EEPROM word using hostif
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @data: word write to the EEPROM
+ *
+ *  Write a 16 bit word to the EEPROM using the hostif.
+ **/
+static s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset,
+					   u16 data)
+{
+	s32 status;
+	struct ixgbe_hic_write_shadow_ram buffer;
+
+	buffer.hdr.req.cmd = FW_WRITE_SHADOW_RAM_CMD;
+	buffer.hdr.req.buf_lenh = 0;
+	buffer.hdr.req.buf_lenl = FW_WRITE_SHADOW_RAM_LEN;
+	buffer.hdr.req.checksum = FW_DEFAULT_CHECKSUM;
+
+	/* one word */
+	buffer.length = cpu_to_be16(sizeof(u16));
+	buffer.data = data;
+	buffer.address = cpu_to_be32(offset * 2);
+
+	status = ixgbe_host_interface_command(hw, &buffer, sizeof(buffer),
+					      IXGBE_HI_COMMAND_TIMEOUT, false);
+	return status;
+}
+
+/** ixgbe_write_ee_hostif_X550 - Write EEPROM word using hostif
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @data: word write to the EEPROM
+ *
+ *  Write a 16 bit word to the EEPROM using the hostif.
+ **/
+static s32 ixgbe_write_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 data)
+{
+	s32 status = 0;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == 0) {
+		status = ixgbe_write_ee_hostif_data_X550(hw, offset, data);
+		hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	} else {
+		hw_dbg(hw, "write ee hostif failed to get semaphore");
+		status = IXGBE_ERR_SWFW_SYNC;
+	}
+
+	return status;
+}
+
+/** ixgbe_update_flash_X550 - Instruct HW to copy EEPROM to Flash device
+ *  @hw: pointer to hardware structure
+ *
+ *  Issue a shadow RAM dump to FW to copy EEPROM from shadow RAM to the flash.
+ **/
+static s32 ixgbe_update_flash_X550(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	union ixgbe_hic_hdr2 buffer;
+
+	buffer.req.cmd = FW_SHADOW_RAM_DUMP_CMD;
+	buffer.req.buf_lenh = 0;
+	buffer.req.buf_lenl = FW_SHADOW_RAM_DUMP_LEN;
+	buffer.req.checksum = FW_DEFAULT_CHECKSUM;
+
+	status = ixgbe_host_interface_command(hw, &buffer, sizeof(buffer),
+					      IXGBE_HI_COMMAND_TIMEOUT, false);
+	return status;
+}
+
+/**
+ * ixgbe_get_bus_info_X550em - Set PCI bus info
+ * @hw: pointer to hardware structure
+ *
+ * Sets bus link width and speed to unknown because X550em is
+ * not a PCI device.
+ **/
+static s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw)
+{
+	hw->bus.type  = ixgbe_bus_type_internal;
+	hw->bus.width = ixgbe_bus_width_unknown;
+	hw->bus.speed = ixgbe_bus_speed_unknown;
+
+	hw->mac.ops.set_lan_id(hw);
+
+	return 0;
+}
+
+/**
+ * ixgbe_fw_recovery_mode_X550 - Check FW NVM recovery mode
+ * @hw: pointer t hardware structure
+ *
+ * Returns true if in FW NVM recovery mode.
+ */
+static bool ixgbe_fw_recovery_mode_X550(struct ixgbe_hw *hw)
+{
+	u32 fwsm;
+
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+	return !!(fwsm & IXGBE_FWSM_FW_NVM_RECOVERY_MODE);
+}
+
+/** ixgbe_disable_rx_x550 - Disable RX unit
+ *
+ *  Enables the Rx DMA unit for x550
+ **/
+static void ixgbe_disable_rx_x550(struct ixgbe_hw *hw)
+{
+	u32 rxctrl, pfdtxgswc;
+	s32 status;
+	struct ixgbe_hic_disable_rxen fw_cmd;
+
+	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+	if (rxctrl & IXGBE_RXCTRL_RXEN) {
+		pfdtxgswc = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC);
+		if (pfdtxgswc & IXGBE_PFDTXGSWC_VT_LBEN) {
+			pfdtxgswc &= ~IXGBE_PFDTXGSWC_VT_LBEN;
+			IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, pfdtxgswc);
+			hw->mac.set_lben = true;
+		} else {
+			hw->mac.set_lben = false;
+		}
+
+		fw_cmd.hdr.cmd = FW_DISABLE_RXEN_CMD;
+		fw_cmd.hdr.buf_len = FW_DISABLE_RXEN_LEN;
+		fw_cmd.hdr.checksum = FW_DEFAULT_CHECKSUM;
+		fw_cmd.port_number = hw->bus.lan_id;
+
+		status = ixgbe_host_interface_command(hw, &fw_cmd,
+					sizeof(struct ixgbe_hic_disable_rxen),
+					IXGBE_HI_COMMAND_TIMEOUT, true);
+
+		/* If we fail - disable RX using register write */
+		if (status) {
+			rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
+			if (rxctrl & IXGBE_RXCTRL_RXEN) {
+				rxctrl &= ~IXGBE_RXCTRL_RXEN;
+				IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl);
+			}
+		}
+	}
+}
+
+/** ixgbe_update_eeprom_checksum_X550 - Updates the EEPROM checksum and flash
+ *  @hw: pointer to hardware structure
+ *
+ *  After writing EEPROM to shadow RAM using EEWR register, software calculates
+ *  checksum and updates the EEPROM and instructs the hardware to update
+ *  the flash.
+ **/
+static s32 ixgbe_update_eeprom_checksum_X550(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 checksum = 0;
+
+	/* Read the first word from the EEPROM. If this times out or fails, do
+	 * not continue or we could be in for a very long wait while every
+	 * EEPROM read fails
+	 */
+	status = ixgbe_read_ee_hostif_X550(hw, 0, &checksum);
+	if (status) {
+		hw_dbg(hw, "EEPROM read failed\n");
+		return status;
+	}
+
+	status = ixgbe_calc_eeprom_checksum_X550(hw);
+	if (status < 0)
+		return status;
+
+	checksum = (u16)(status & 0xffff);
+
+	status = ixgbe_write_ee_hostif_X550(hw, IXGBE_EEPROM_CHECKSUM,
+					    checksum);
+	if (status)
+		return status;
+
+	status = ixgbe_update_flash_X550(hw);
+
+	return status;
+}
+
+/** ixgbe_write_ee_hostif_buffer_X550 - Write EEPROM word(s) using hostif
+ *  @hw: pointer to hardware structure
+ *  @offset: offset of  word in the EEPROM to write
+ *  @words: number of words
+ *  @data: word(s) write to the EEPROM
+ *
+ *
+ *  Write a 16 bit word(s) to the EEPROM using the hostif.
+ **/
+static s32 ixgbe_write_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
+					     u16 offset, u16 words,
+					     u16 *data)
+{
+	s32 status = 0;
+	u32 i = 0;
+
+	/* Take semaphore for the entire operation. */
+	status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+	if (status) {
+		hw_dbg(hw, "EEPROM write buffer - semaphore failed\n");
+		return status;
+	}
+
+	for (i = 0; i < words; i++) {
+		status = ixgbe_write_ee_hostif_data_X550(hw, offset + i,
+							 data[i]);
+		if (status) {
+			hw_dbg(hw, "Eeprom buffered write failed\n");
+			break;
+		}
+	}
+
+	hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM);
+
+	return status;
+}
+
+/** ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register of the
+ *  IOSF device
+ *
+ *  @hw: pointer to hardware structure
+ *  @reg_addr: 32 bit PHY register to write
+ *  @device_type: 3 bit device type
+ *  @data: Data to write to the register
+ **/
+static s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr,
+					u32 device_type, u32 data)
+{
+	u32 gssr = IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_PHY0_SM;
+	u32 command, error;
+	s32 ret;
+
+	ret = hw->mac.ops.acquire_swfw_sync(hw, gssr);
+	if (ret)
+		return ret;
+
+	ret = ixgbe_iosf_wait(hw, NULL);
+	if (ret)
+		goto out;
+
+	command = ((reg_addr << IXGBE_SB_IOSF_CTRL_ADDR_SHIFT) |
+		   (device_type << IXGBE_SB_IOSF_CTRL_TARGET_SELECT_SHIFT));
+
+	/* Write IOSF control register */
+	IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_CTRL, command);
+
+	/* Write IOSF data register */
+	IXGBE_WRITE_REG(hw, IXGBE_SB_IOSF_INDIRECT_DATA, data);
+
+	ret = ixgbe_iosf_wait(hw, &command);
+
+	if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) {
+		error = (command & IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK) >>
+			 IXGBE_SB_IOSF_CTRL_CMPL_ERR_SHIFT;
+		hw_dbg(hw, "Failed to write, error %x\n", error);
+		return IXGBE_ERR_PHY;
+	}
+
+out:
+	hw->mac.ops.release_swfw_sync(hw, gssr);
+	return ret;
+}
+
+/**
+ *  ixgbe_setup_ixfi_x550em_x - MAC specific iXFI configuration
+ *  @hw: pointer to hardware structure
+ *
+ *  iXfI configuration needed for ixgbe_mac_X550EM_x devices.
+ **/
+static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u32 reg_val;
+
+	/* Disable training protocol FSM. */
+	status = ixgbe_read_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL;
+	status = ixgbe_write_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	if (status)
+		return status;
+
+	/* Disable Flex from training TXFFE. */
+	status = ixgbe_read_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN;
+	reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN;
+	reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN;
+	status = ixgbe_write_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	if (status)
+		return status;
+
+	status = ixgbe_read_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN;
+	reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN;
+	reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN;
+	status = ixgbe_write_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	if (status)
+		return status;
+
+	/* Enable override for coefficients. */
+	status = ixgbe_read_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_OVRRD_EN;
+	reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN;
+	reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN;
+	reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN;
+	status = ixgbe_write_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	return status;
+}
+
+/**
+ *  ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the
+ *  internal PHY
+ *  @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u32 link_ctrl;
+
+	/* Restart auto-negotiation. */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl);
+
+	if (status) {
+		hw_dbg(hw, "Auto-negotiation did not complete\n");
+		return status;
+	}
+
+	link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+	status = hw->mac.ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl);
+
+	if (hw->mac.type == ixgbe_mac_x550em_a) {
+		u32 flx_mask_st20;
+
+		/* Indicate to FW that AN restart has been asserted */
+		status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20);
+
+		if (status) {
+			hw_dbg(hw, "Auto-negotiation did not complete\n");
+			return status;
+		}
+
+		flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART;
+		status = hw->mac.ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20);
+	}
+
+	return status;
+}
+
+/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode.
+ *  @hw: pointer to hardware structure
+ *  @speed: the link speed to force
+ *
+ *  Configures the integrated KR PHY to use iXFI mode. Used to connect an
+ *  internal and external PHY at a specific speed, without autonegotiation.
+ **/
+static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	s32 status;
+	u32 reg_val;
+
+	/* iXFI is only supported with X552 */
+	if (mac->type != ixgbe_mac_X550EM_x)
+		return IXGBE_ERR_LINK_SETUP;
+
+	/* Disable AN and force speed to 10G Serial. */
+	status = ixgbe_read_iosf_sb_reg_x550(hw,
+					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+	reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+
+	/* Select forced link speed for internal PHY. */
+	switch (*speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
+		break;
+	default:
+		/* Other link speeds are not supported by internal KR PHY. */
+		return IXGBE_ERR_LINK_SETUP;
+	}
+
+	status = ixgbe_write_iosf_sb_reg_x550(hw,
+				IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	if (status)
+		return status;
+
+	/* Additional configuration needed for x550em_x */
+	if (hw->mac.type == ixgbe_mac_X550EM_x) {
+		status = ixgbe_setup_ixfi_x550em_x(hw);
+		if (status)
+			return status;
+	}
+
+	/* Toggle port SW reset by AN reset. */
+	status = ixgbe_restart_an_internal_phy_x550em(hw);
+
+	return status;
+}
+
+/**
+ *  ixgbe_supported_sfp_modules_X550em - Check if SFP module type is supported
+ *  @hw: pointer to hardware structure
+ *  @linear: true if SFP module is linear
+ */
+static s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear)
+{
+	switch (hw->phy.sfp_type) {
+	case ixgbe_sfp_type_not_present:
+		return IXGBE_ERR_SFP_NOT_PRESENT;
+	case ixgbe_sfp_type_da_cu_core0:
+	case ixgbe_sfp_type_da_cu_core1:
+		*linear = true;
+		break;
+	case ixgbe_sfp_type_srlr_core0:
+	case ixgbe_sfp_type_srlr_core1:
+	case ixgbe_sfp_type_da_act_lmt_core0:
+	case ixgbe_sfp_type_da_act_lmt_core1:
+	case ixgbe_sfp_type_1g_sx_core0:
+	case ixgbe_sfp_type_1g_sx_core1:
+	case ixgbe_sfp_type_1g_lx_core0:
+	case ixgbe_sfp_type_1g_lx_core1:
+		*linear = false;
+		break;
+	case ixgbe_sfp_type_unknown:
+	case ixgbe_sfp_type_1g_cu_core0:
+	case ixgbe_sfp_type_1g_cu_core1:
+	default:
+		return IXGBE_ERR_SFP_NOT_SUPPORTED;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_setup_mac_link_sfp_x550em - Configure the KR PHY for SFP.
+ * @hw: pointer to hardware structure
+ * @speed: the link speed to force
+ * @autoneg_wait_to_complete: unused
+ *
+ * Configures the extern PHY and the integrated KR PHY for SFP support.
+ */
+static s32
+ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
+				ixgbe_link_speed speed,
+				__always_unused bool autoneg_wait_to_complete)
+{
+	s32 status;
+	u16 reg_slice, reg_val;
+	bool setup_linear = false;
+
+	/* Check if SFP module is supported and linear */
+	status = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+
+	/* If no SFP module present, then return success. Return success since
+	 * there is no reason to configure CS4227 and SFP not present error is
+	 * not accepted in the setup MAC link flow.
+	 */
+	if (status == IXGBE_ERR_SFP_NOT_PRESENT)
+		return 0;
+
+	if (status)
+		return status;
+
+	/* Configure internal PHY for KR/KX. */
+	ixgbe_setup_kr_speed_x550em(hw, speed);
+
+	/* Configure CS4227 LINE side to proper mode. */
+	reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
+	if (setup_linear)
+		reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+	else
+		reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
+
+	status = hw->link.ops.write_link(hw, hw->link.addr, reg_slice,
+					 reg_val);
+
+	return status;
+}
+
+/**
+ * ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode
+ * @hw: pointer to hardware structure
+ * @speed: the link speed to force
+ *
+ * Configures the integrated PHY for native SFI mode. Used to connect the
+ * internal PHY directly to an SFP cage, without autonegotiation.
+ **/
+static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	s32 status;
+	u32 reg_val;
+
+	/* Disable all AN and force speed to 10G Serial. */
+	status = mac->ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+
+	/* Select forced link speed for internal PHY. */
+	switch (*speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+		break;
+	default:
+		/* Other link speeds are not supported by internal PHY. */
+		return IXGBE_ERR_LINK_SETUP;
+	}
+
+	(void)mac->ops.write_iosf_sb_reg(hw,
+			IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	/* change mode enforcement rules to hybrid */
+	(void)mac->ops.read_iosf_sb_reg(hw,
+			IXGBE_KRM_FLX_TMRS_CTRL_ST31(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	reg_val |= 0x0400;
+
+	(void)mac->ops.write_iosf_sb_reg(hw,
+			IXGBE_KRM_FLX_TMRS_CTRL_ST31(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	/* manually control the config */
+	(void)mac->ops.read_iosf_sb_reg(hw,
+			IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	reg_val |= 0x20002240;
+
+	(void)mac->ops.write_iosf_sb_reg(hw,
+			IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	/* move the AN base page values */
+	(void)mac->ops.read_iosf_sb_reg(hw,
+			IXGBE_KRM_PCS_KX_AN(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	reg_val |= 0x1;
+
+	(void)mac->ops.write_iosf_sb_reg(hw,
+			IXGBE_KRM_PCS_KX_AN(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	/* set the AN37 over CB mode */
+	(void)mac->ops.read_iosf_sb_reg(hw,
+			IXGBE_KRM_AN_CNTL_4(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	reg_val |= 0x20000000;
+
+	(void)mac->ops.write_iosf_sb_reg(hw,
+			IXGBE_KRM_AN_CNTL_4(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	/* restart AN manually */
+	(void)mac->ops.read_iosf_sb_reg(hw,
+			IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+
+	(void)mac->ops.write_iosf_sb_reg(hw,
+			IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+			IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	/* Toggle port SW reset by AN reset. */
+	status = ixgbe_restart_an_internal_phy_x550em(hw);
+
+	return status;
+}
+
+/**
+ * ixgbe_setup_mac_link_sfp_n - Setup internal PHY for native SFP
+ * @hw: pointer to hardware structure
+ * @speed: link speed
+ * @autoneg_wait_to_complete: unused
+ *
+ * Configure the integrated PHY for native SFP support.
+ */
+static s32
+ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			   __always_unused bool autoneg_wait_to_complete)
+{
+	bool setup_linear = false;
+	u32 reg_phy_int;
+	s32 ret_val;
+
+	/* Check if SFP module is supported and linear */
+	ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+
+	/* If no SFP module present, then return success. Return success since
+	 * SFP not present error is not excepted in the setup MAC link flow.
+	 */
+	if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT)
+		return 0;
+
+	if (ret_val)
+		return ret_val;
+
+	/* Configure internal PHY for native SFI based on module type */
+	ret_val = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_phy_int);
+	if (ret_val)
+		return ret_val;
+
+	reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA;
+	if (!setup_linear)
+		reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR;
+
+	ret_val = hw->mac.ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int);
+	if (ret_val)
+		return ret_val;
+
+	/* Setup SFI internal link. */
+	return ixgbe_setup_sfi_x550a(hw, &speed);
+}
+
+/**
+ * ixgbe_setup_mac_link_sfp_x550a - Setup internal PHY for SFP
+ * @hw: pointer to hardware structure
+ * @speed: link speed
+ * @autoneg_wait_to_complete: unused
+ *
+ * Configure the integrated PHY for SFP support.
+ */
+static s32
+ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+			       __always_unused bool autoneg_wait_to_complete)
+{
+	u32 reg_slice, slice_offset;
+	bool setup_linear = false;
+	u16 reg_phy_ext;
+	s32 ret_val;
+
+	/* Check if SFP module is supported and linear */
+	ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+
+	/* If no SFP module present, then return success. Return success since
+	 * SFP not present error is not excepted in the setup MAC link flow.
+	 */
+	if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT)
+		return 0;
+
+	if (ret_val)
+		return ret_val;
+
+	/* Configure internal PHY for KR/KX. */
+	ixgbe_setup_kr_speed_x550em(hw, speed);
+
+	if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE)
+		return IXGBE_ERR_PHY_ADDR_INVALID;
+
+	/* Get external PHY SKU id */
+	ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_EFUSE_PDF_SKU,
+				       IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
+	if (ret_val)
+		return ret_val;
+
+	/* When configuring quad port CS4223, the MAC instance is part
+	 * of the slice offset.
+	 */
+	if (reg_phy_ext == IXGBE_CS4223_SKU_ID)
+		slice_offset = (hw->bus.lan_id +
+				(hw->bus.instance_id << 1)) << 12;
+	else
+		slice_offset = hw->bus.lan_id << 12;
+
+	/* Configure CS4227/CS4223 LINE side to proper mode. */
+	reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + slice_offset;
+
+	ret_val = hw->phy.ops.read_reg(hw, reg_slice,
+				       IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
+	if (ret_val)
+		return ret_val;
+
+	reg_phy_ext &= ~((IXGBE_CS4227_EDC_MODE_CX1 << 1) |
+			 (IXGBE_CS4227_EDC_MODE_SR << 1));
+
+	if (setup_linear)
+		reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
+	else
+		reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
+
+	ret_val = hw->phy.ops.write_reg(hw, reg_slice,
+					IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);
+	if (ret_val)
+		return ret_val;
+
+	/* Flush previous write with a read */
+	return hw->phy.ops.read_reg(hw, reg_slice,
+				    IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
+}
+
+/**
+ * ixgbe_setup_mac_link_t_X550em - Sets the auto advertised link speed
+ * @hw: pointer to hardware structure
+ * @speed: new link speed
+ * @autoneg_wait: true when waiting for completion is needed
+ *
+ * Setup internal/external PHY link speed based on link speed, then set
+ * external PHY auto advertised link speed.
+ *
+ * Returns error status for any failure
+ **/
+static s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw,
+					 ixgbe_link_speed speed,
+					 bool autoneg_wait)
+{
+	s32 status;
+	ixgbe_link_speed force_speed;
+
+	/* Setup internal/external PHY link speed to iXFI (10G), unless
+	 * only 1G is auto advertised then setup KX link.
+	 */
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		force_speed = IXGBE_LINK_SPEED_10GB_FULL;
+	else
+		force_speed = IXGBE_LINK_SPEED_1GB_FULL;
+
+	/* If X552 and internal link mode is XFI, then setup XFI internal link.
+	 */
+	if (hw->mac.type == ixgbe_mac_X550EM_x &&
+	    !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
+		status = ixgbe_setup_ixfi_x550em(hw, &force_speed);
+
+		if (status)
+			return status;
+	}
+
+	return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait);
+}
+
+/** ixgbe_check_link_t_X550em - Determine link and speed status
+  * @hw: pointer to hardware structure
+  * @speed: pointer to link speed
+  * @link_up: true when link is up
+  * @link_up_wait_to_complete: bool used to wait for link up or not
+  *
+  * Check that both the MAC and X557 external PHY have link.
+  **/
+static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
+				     ixgbe_link_speed *speed,
+				     bool *link_up,
+				     bool link_up_wait_to_complete)
+{
+	u32 status;
+	u16 i, autoneg_status;
+
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
+		return IXGBE_ERR_CONFIG;
+
+	status = ixgbe_check_mac_link_generic(hw, speed, link_up,
+					      link_up_wait_to_complete);
+
+	/* If check link fails or MAC link is not up, then return */
+	if (status || !(*link_up))
+		return status;
+
+	/* MAC link is up, so check external PHY link.
+	 * Link status is latching low, and can only be used to detect link
+	 * drop, and not the current status of the link without performing
+	 * back-to-back reads.
+	 */
+	for (i = 0; i < 2; i++) {
+		status = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
+					      &autoneg_status);
+
+		if (status)
+			return status;
+	}
+
+	/* If external PHY link is not up, then indicate link not up */
+	if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS))
+		*link_up = false;
+
+	return 0;
+}
+
+/**
+ * ixgbe_setup_sgmii - Set up link for sgmii
+ * @hw: pointer to hardware structure
+ * @speed: unused
+ * @autoneg_wait_to_complete: unused
+ */
+static s32
+ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed,
+		  __always_unused bool autoneg_wait_to_complete)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u32 lval, sval, flx_val;
+	s32 rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+				       IXGBE_SB_IOSF_TARGET_KR_PHY, &lval);
+	if (rc)
+		return rc;
+
+	lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+	lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+	lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_SGMII_EN;
+	lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CLAUSE_37_EN;
+	lval |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
+	rc = mac->ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, lval);
+	if (rc)
+		return rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				       IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id),
+				       IXGBE_SB_IOSF_TARGET_KR_PHY, &sval);
+	if (rc)
+		return rc;
+
+	sval |= IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D;
+	sval |= IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D;
+	rc = mac->ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, sval);
+	if (rc)
+		return rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+	if (rc)
+		return rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+	if (rc)
+		return rc;
+
+	flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+	flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+
+	rc = mac->ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val);
+	if (rc)
+		return rc;
+
+	rc = ixgbe_restart_an_internal_phy_x550em(hw);
+	return rc;
+}
+
+/**
+ * ixgbe_setup_sgmii_fw - Set up link for sgmii with firmware-controlled PHYs
+ * @hw: pointer to hardware structure
+ * @speed: the link speed to force
+ * @autoneg_wait: true when waiting for completion is needed
+ */
+static s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed,
+				bool autoneg_wait)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u32 lval, sval, flx_val;
+	s32 rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+				       IXGBE_SB_IOSF_TARGET_KR_PHY, &lval);
+	if (rc)
+		return rc;
+
+	lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+	lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+	lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_SGMII_EN;
+	lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CLAUSE_37_EN;
+	lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
+	rc = mac->ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, lval);
+	if (rc)
+		return rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				       IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id),
+				       IXGBE_SB_IOSF_TARGET_KR_PHY, &sval);
+	if (rc)
+		return rc;
+
+	sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D;
+	sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D;
+	rc = mac->ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, sval);
+	if (rc)
+		return rc;
+
+	rc = mac->ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, lval);
+	if (rc)
+		return rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				    IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				    IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+	if (rc)
+		return rc;
+
+	flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN;
+	flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+
+	rc = mac->ops.write_iosf_sb_reg(hw,
+				    IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				    IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val);
+	if (rc)
+		return rc;
+
+	ixgbe_restart_an_internal_phy_x550em(hw);
+
+	return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait);
+}
+
+/**
+ * ixgbe_fc_autoneg_sgmii_x550em_a - Enable flow control IEEE clause 37
+ * @hw: pointer to hardware structure
+ *
+ * Enable flow control according to IEEE clause 37.
+ */
+static void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw)
+{
+	s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+	u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 };
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	/* AN should have completed when the cable was plugged in.
+	 * Look for reasons to bail out.  Bail out if:
+	 * - FC autoneg is disabled, or if
+	 * - link is not up.
+	 */
+	if (hw->fc.disable_fc_autoneg)
+		goto out;
+
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+	if (!link_up)
+		goto out;
+
+	/* Check if auto-negotiation has completed */
+	status = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &info);
+	if (status || !(info[0] & FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE)) {
+		status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+		goto out;
+	}
+
+	/* Negotiate the flow control */
+	status = ixgbe_negotiate_fc(hw, info[0], info[0],
+				    FW_PHY_ACT_GET_LINK_INFO_FC_RX,
+				    FW_PHY_ACT_GET_LINK_INFO_FC_TX,
+				    FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX,
+				    FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX);
+
+out:
+	if (!status) {
+		hw->fc.fc_was_autonegged = true;
+	} else {
+		hw->fc.fc_was_autonegged = false;
+		hw->fc.current_mode = hw->fc.requested_mode;
+	}
+}
+
+/** ixgbe_init_mac_link_ops_X550em_a - Init mac link function pointers
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_init_mac_link_ops_X550em_a(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+
+	switch (mac->ops.get_media_type(hw)) {
+	case ixgbe_media_type_fiber:
+		mac->ops.setup_fc = NULL;
+		mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a;
+		break;
+	case ixgbe_media_type_copper:
+		if (hw->device_id != IXGBE_DEV_ID_X550EM_A_1G_T &&
+		    hw->device_id != IXGBE_DEV_ID_X550EM_A_1G_T_L) {
+			mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em;
+			break;
+		}
+		mac->ops.fc_autoneg = ixgbe_fc_autoneg_sgmii_x550em_a;
+		mac->ops.setup_fc = ixgbe_fc_autoneg_fw;
+		mac->ops.setup_link = ixgbe_setup_sgmii_fw;
+		mac->ops.check_link = ixgbe_check_mac_link_generic;
+		break;
+	case ixgbe_media_type_backplane:
+		mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a;
+		mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a;
+		break;
+	default:
+		break;
+	}
+}
+
+/** ixgbe_init_mac_link_ops_X550em - init mac link function pointers
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+
+	mac->ops.setup_fc = ixgbe_setup_fc_x550em;
+
+	switch (mac->ops.get_media_type(hw)) {
+	case ixgbe_media_type_fiber:
+		/* CS4227 does not support autoneg, so disable the laser control
+		 * functions for SFP+ fiber
+		 */
+		mac->ops.disable_tx_laser = NULL;
+		mac->ops.enable_tx_laser = NULL;
+		mac->ops.flap_tx_laser = NULL;
+		mac->ops.setup_link = ixgbe_setup_mac_link_multispeed_fiber;
+		switch (hw->device_id) {
+		case IXGBE_DEV_ID_X550EM_A_SFP_N:
+			mac->ops.setup_mac_link = ixgbe_setup_mac_link_sfp_n;
+			break;
+		case IXGBE_DEV_ID_X550EM_A_SFP:
+			mac->ops.setup_mac_link =
+						ixgbe_setup_mac_link_sfp_x550a;
+			break;
+		default:
+			mac->ops.setup_mac_link =
+						ixgbe_setup_mac_link_sfp_x550em;
+			break;
+		}
+		mac->ops.set_rate_select_speed =
+					ixgbe_set_soft_rate_select_speed;
+		break;
+	case ixgbe_media_type_copper:
+		if (hw->device_id == IXGBE_DEV_ID_X550EM_X_1G_T)
+			break;
+		mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em;
+		mac->ops.setup_fc = ixgbe_setup_fc_generic;
+		mac->ops.check_link = ixgbe_check_link_t_X550em;
+		break;
+	case ixgbe_media_type_backplane:
+		if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII ||
+		    hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII_L)
+			mac->ops.setup_link = ixgbe_setup_sgmii;
+		break;
+	default:
+		break;
+	}
+
+	/* Additional modification for X550em_a devices */
+	if (hw->mac.type == ixgbe_mac_x550em_a)
+		ixgbe_init_mac_link_ops_X550em_a(hw);
+}
+
+/** ixgbe_setup_sfp_modules_X550em - Setup SFP module
+ * @hw: pointer to hardware structure
+ */
+static s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw)
+{
+	s32 status;
+	bool linear;
+
+	/* Check if SFP module is supported */
+	status = ixgbe_supported_sfp_modules_X550em(hw, &linear);
+	if (status)
+		return status;
+
+	ixgbe_init_mac_link_ops_X550em(hw);
+	hw->phy.ops.reset = NULL;
+
+	return 0;
+}
+
+/** ixgbe_get_link_capabilities_x550em - Determines link capabilities
+ * @hw: pointer to hardware structure
+ * @speed: pointer to link speed
+ * @autoneg: true when autoneg or autotry is enabled
+ **/
+static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
+					      ixgbe_link_speed *speed,
+					      bool *autoneg)
+{
+	if (hw->phy.type == ixgbe_phy_fw) {
+		*autoneg = true;
+		*speed = hw->phy.speeds_supported;
+		return 0;
+	}
+
+	/* SFP */
+	if (hw->phy.media_type == ixgbe_media_type_fiber) {
+		/* CS4227 SFP must not enable auto-negotiation */
+		*autoneg = false;
+
+		if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
+		    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 ||
+		    hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+		    hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) {
+			*speed = IXGBE_LINK_SPEED_1GB_FULL;
+			return 0;
+		}
+
+		/* Link capabilities are based on SFP */
+		if (hw->phy.multispeed_fiber)
+			*speed = IXGBE_LINK_SPEED_10GB_FULL |
+				 IXGBE_LINK_SPEED_1GB_FULL;
+		else
+			*speed = IXGBE_LINK_SPEED_10GB_FULL;
+	} else {
+		switch (hw->phy.type) {
+		case ixgbe_phy_x550em_kx4:
+			*speed = IXGBE_LINK_SPEED_1GB_FULL |
+				 IXGBE_LINK_SPEED_2_5GB_FULL |
+				 IXGBE_LINK_SPEED_10GB_FULL;
+			break;
+		case ixgbe_phy_x550em_xfi:
+			*speed = IXGBE_LINK_SPEED_1GB_FULL |
+				 IXGBE_LINK_SPEED_10GB_FULL;
+			break;
+		case ixgbe_phy_ext_1g_t:
+		case ixgbe_phy_sgmii:
+			*speed = IXGBE_LINK_SPEED_1GB_FULL;
+			break;
+		case ixgbe_phy_x550em_kr:
+			if (hw->mac.type == ixgbe_mac_x550em_a) {
+				/* check different backplane modes */
+				if (hw->phy.nw_mng_if_sel &
+				    IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G) {
+					*speed = IXGBE_LINK_SPEED_2_5GB_FULL;
+					break;
+				} else if (hw->device_id ==
+					   IXGBE_DEV_ID_X550EM_A_KR_L) {
+					*speed = IXGBE_LINK_SPEED_1GB_FULL;
+					break;
+				}
+			}
+			fallthrough;
+		default:
+			*speed = IXGBE_LINK_SPEED_10GB_FULL |
+				 IXGBE_LINK_SPEED_1GB_FULL;
+			break;
+		}
+		*autoneg = true;
+	}
+	return 0;
+}
+
+/**
+ * ixgbe_get_lasi_ext_t_x550em - Determime external Base T PHY interrupt cause
+ * @hw: pointer to hardware structure
+ * @lsc: pointer to boolean flag which indicates whether external Base T
+ *	 PHY interrupt is lsc
+ *
+ * Determime if external Base T PHY interrupt cause is high temperature
+ * failure alarm or link status change.
+ *
+ * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature
+ * failure alarm, else return PHY access status.
+ **/
+static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
+{
+	u32 status;
+	u16 reg;
+
+	*lsc = false;
+
+	/* Vendor alarm triggered */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
+				      MDIO_MMD_VEND1,
+				      &reg);
+
+	if (status || !(reg & IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN))
+		return status;
+
+	/* Vendor Auto-Neg alarm triggered or Global alarm 1 triggered */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG,
+				      MDIO_MMD_VEND1,
+				      &reg);
+
+	if (status || !(reg & (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN |
+				IXGBE_MDIO_GLOBAL_ALARM_1_INT)))
+		return status;
+
+	/* Global alarm triggered */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1,
+				      MDIO_MMD_VEND1,
+				      &reg);
+
+	if (status)
+		return status;
+
+	/* If high temperature failure, then return over temp error and exit */
+	if (reg & IXGBE_MDIO_GLOBAL_ALM_1_HI_TMP_FAIL) {
+		/* power down the PHY in case the PHY FW didn't already */
+		ixgbe_set_copper_phy_power(hw, false);
+		return IXGBE_ERR_OVERTEMP;
+	}
+	if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) {
+		/*  device fault alarm triggered */
+		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG,
+					  MDIO_MMD_VEND1,
+					  &reg);
+		if (status)
+			return status;
+
+		/* if device fault was due to high temp alarm handle and exit */
+		if (reg == IXGBE_MDIO_GLOBAL_FAULT_MSG_HI_TMP) {
+			/* power down the PHY in case the PHY FW didn't */
+			ixgbe_set_copper_phy_power(hw, false);
+			return IXGBE_ERR_OVERTEMP;
+		}
+	}
+
+	/* Vendor alarm 2 triggered */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
+				      MDIO_MMD_AN, &reg);
+
+	if (status || !(reg & IXGBE_MDIO_GLOBAL_STD_ALM2_INT))
+		return status;
+
+	/* link connect/disconnect event occurred */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2,
+				      MDIO_MMD_AN, &reg);
+
+	if (status)
+		return status;
+
+	/* Indicate LSC */
+	if (reg & IXGBE_MDIO_AUTO_NEG_VEN_LSC)
+		*lsc = true;
+
+	return 0;
+}
+
+/**
+ * ixgbe_enable_lasi_ext_t_x550em - Enable external Base T PHY interrupts
+ * @hw: pointer to hardware structure
+ *
+ * Enable link status change and temperature failure alarm for the external
+ * Base T PHY
+ *
+ * Returns PHY access status
+ **/
+static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
+{
+	u32 status;
+	u16 reg;
+	bool lsc;
+
+	/* Clear interrupt flags */
+	status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc);
+
+	/* Enable link status change alarm */
+
+	/* Enable the LASI interrupts on X552 devices to receive notifications
+	 * of the link configurations of the external PHY and correspondingly
+	 * support the configuration of the internal iXFI link, since iXFI does
+	 * not support auto-negotiation. This is not required for X553 devices
+	 * having KR support, which performs auto-negotiations and which is used
+	 * as the internal link to the external PHY. Hence adding a check here
+	 * to avoid enabling LASI interrupts for X553 devices.
+	 */
+	if (hw->mac.type != ixgbe_mac_x550em_a) {
+		status = hw->phy.ops.read_reg(hw,
+					    IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+					    MDIO_MMD_AN, &reg);
+		if (status)
+			return status;
+
+		reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
+
+		status = hw->phy.ops.write_reg(hw,
+					    IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
+					    MDIO_MMD_AN, reg);
+		if (status)
+			return status;
+	}
+
+	/* Enable high temperature failure and global fault alarms */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
+				      MDIO_MMD_VEND1,
+				      &reg);
+	if (status)
+		return status;
+
+	reg |= (IXGBE_MDIO_GLOBAL_INT_HI_TEMP_EN |
+		IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN);
+
+	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
+				       MDIO_MMD_VEND1,
+				       reg);
+	if (status)
+		return status;
+
+	/* Enable vendor Auto-Neg alarm and Global Interrupt Mask 1 alarm */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
+				      MDIO_MMD_VEND1,
+				      &reg);
+	if (status)
+		return status;
+
+	reg |= (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN |
+		IXGBE_MDIO_GLOBAL_ALARM_1_INT);
+
+	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
+				       MDIO_MMD_VEND1,
+				       reg);
+	if (status)
+		return status;
+
+	/* Enable chip-wide vendor alarm */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
+				      MDIO_MMD_VEND1,
+				      &reg);
+	if (status)
+		return status;
+
+	reg |= IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN;
+
+	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
+				       MDIO_MMD_VEND1,
+				       reg);
+
+	return status;
+}
+
+/**
+ * ixgbe_handle_lasi_ext_t_x550em - Handle external Base T PHY interrupt
+ * @hw: pointer to hardware structure
+ *
+ * Handle external Base T PHY interrupt. If high temperature
+ * failure alarm then return error, else if link status change
+ * then setup internal/external PHY link
+ *
+ * Return IXGBE_ERR_OVERTEMP if interrupt is high temperature
+ * failure alarm, else return PHY access status.
+ **/
+static s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw)
+{
+	struct ixgbe_phy_info *phy = &hw->phy;
+	bool lsc;
+	u32 status;
+
+	status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc);
+	if (status)
+		return status;
+
+	if (lsc && phy->ops.setup_internal_link)
+		return phy->ops.setup_internal_link(hw);
+
+	return 0;
+}
+
+/**
+ * ixgbe_setup_kr_speed_x550em - Configure the KR PHY for link speed.
+ * @hw: pointer to hardware structure
+ * @speed: link speed
+ *
+ * Configures the integrated KR PHY.
+ **/
+static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
+				       ixgbe_link_speed speed)
+{
+	s32 status;
+	u32 reg_val;
+
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+	reg_val &= ~(IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR |
+		     IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX);
+
+	/* Advertise 10G support. */
+	if (speed & IXGBE_LINK_SPEED_10GB_FULL)
+		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR;
+
+	/* Advertise 1G support. */
+	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
+		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX;
+
+	status = hw->mac.ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	if (hw->mac.type == ixgbe_mac_x550em_a) {
+		/* Set lane mode  to KR auto negotiation */
+		status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+
+		if (status)
+			return status;
+
+		reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN;
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+		reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+		reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+
+		status = hw->mac.ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	}
+
+	return ixgbe_restart_an_internal_phy_x550em(hw);
+}
+
+/**
+ * ixgbe_setup_kr_x550em - Configure the KR PHY
+ * @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw)
+{
+	/* leave link alone for 2.5G */
+	if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL)
+		return 0;
+
+	if (ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	return ixgbe_setup_kr_speed_x550em(hw, hw->phy.autoneg_advertised);
+}
+
+/** ixgbe_ext_phy_t_x550em_get_link - Get ext phy link status
+ *  @hw: address of hardware structure
+ *  @link_up: address of boolean to indicate link status
+ *
+ *  Returns error code if unable to get link status.
+ **/
+static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up)
+{
+	u32 ret;
+	u16 autoneg_status;
+
+	*link_up = false;
+
+	/* read this twice back to back to indicate current status */
+	ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
+				   &autoneg_status);
+	if (ret)
+		return ret;
+
+	ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
+				   &autoneg_status);
+	if (ret)
+		return ret;
+
+	*link_up = !!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS);
+
+	return 0;
+}
+
+/** ixgbe_setup_internal_phy_t_x550em - Configure KR PHY to X557 link
+ *  @hw: point to hardware structure
+ *
+ *  Configures the link between the integrated KR PHY and the external X557 PHY
+ *  The driver will call this function when it gets a link status change
+ *  interrupt from the X557 PHY. This function configures the link speed
+ *  between the PHYs to match the link speed of the BASE-T link.
+ *
+ * A return of a non-zero value indicates an error, and the base driver should
+ * not report link up.
+ **/
+static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
+{
+	ixgbe_link_speed force_speed;
+	bool link_up;
+	u32 status;
+	u16 speed;
+
+	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
+		return IXGBE_ERR_CONFIG;
+
+	if (!(hw->mac.type == ixgbe_mac_X550EM_x &&
+	      !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE))) {
+		speed = IXGBE_LINK_SPEED_10GB_FULL |
+			IXGBE_LINK_SPEED_1GB_FULL;
+		return ixgbe_setup_kr_speed_x550em(hw, speed);
+	}
+
+	/* If link is not up, then there is no setup necessary so return  */
+	status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+	if (status)
+		return status;
+
+	if (!link_up)
+		return 0;
+
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
+				      MDIO_MMD_AN,
+				      &speed);
+	if (status)
+		return status;
+
+	/* If link is not still up, then no setup is necessary so return */
+	status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+	if (status)
+		return status;
+
+	if (!link_up)
+		return 0;
+
+	/* clear everything but the speed and duplex bits */
+	speed &= IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_MASK;
+
+	switch (speed) {
+	case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB_FULL:
+		force_speed = IXGBE_LINK_SPEED_10GB_FULL;
+		break;
+	case IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB_FULL:
+		force_speed = IXGBE_LINK_SPEED_1GB_FULL;
+		break;
+	default:
+		/* Internal PHY does not support anything else */
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+	}
+
+	return ixgbe_setup_ixfi_x550em(hw, &force_speed);
+}
+
+/** ixgbe_reset_phy_t_X550em - Performs X557 PHY reset and enables LASI
+ *  @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw)
+{
+	s32 status;
+
+	status = ixgbe_reset_phy_generic(hw);
+
+	if (status)
+		return status;
+
+	/* Configure Link Status Alarm and Temperature Threshold interrupts */
+	return ixgbe_enable_lasi_ext_t_x550em(hw);
+}
+
+/**
+ *  ixgbe_led_on_t_x550em - Turns on the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @led_idx: led number to turn on
+ **/
+static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+	u16 phy_data;
+
+	if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+		return IXGBE_ERR_PARAM;
+
+	/* To turn on the LED, set mode to ON. */
+	hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			     MDIO_MMD_VEND1, &phy_data);
+	phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK;
+	hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			      MDIO_MMD_VEND1, phy_data);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_led_off_t_x550em - Turns off the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @led_idx: led number to turn off
+ **/
+static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+	u16 phy_data;
+
+	if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+		return IXGBE_ERR_PARAM;
+
+	/* To turn on the LED, set mode to ON. */
+	hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			     MDIO_MMD_VEND1, &phy_data);
+	phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK;
+	hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			      MDIO_MMD_VEND1, phy_data);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_set_fw_drv_ver_x550 - Sends driver version to firmware
+ *  @hw: pointer to the HW structure
+ *  @maj: driver version major number
+ *  @min: driver version minor number
+ *  @build: driver version build number
+ *  @sub: driver version sub build number
+ *  @len: length of driver_ver string
+ *  @driver_ver: driver string
+ *
+ *  Sends driver version number to firmware through the manageability
+ *  block.  On success return 0
+ *  else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring
+ *  semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails.
+ **/
+static s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min,
+				     u8 build, u8 sub, u16 len,
+				     const char *driver_ver)
+{
+	struct ixgbe_hic_drv_info2 fw_cmd;
+	s32 ret_val;
+	int i;
+
+	if (!len || !driver_ver || (len > sizeof(fw_cmd.driver_string)))
+		return IXGBE_ERR_INVALID_ARGUMENT;
+
+	fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO;
+	fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN + len;
+	fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED;
+	fw_cmd.port_num = (u8)hw->bus.func;
+	fw_cmd.ver_maj = maj;
+	fw_cmd.ver_min = min;
+	fw_cmd.ver_build = build;
+	fw_cmd.ver_sub = sub;
+	fw_cmd.hdr.checksum = 0;
+	memcpy(fw_cmd.driver_string, driver_ver, len);
+	fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+			      (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
+
+	for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
+		ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd,
+						       sizeof(fw_cmd),
+						       IXGBE_HI_COMMAND_TIMEOUT,
+						       true);
+		if (ret_val)
+			continue;
+
+		if (fw_cmd.hdr.cmd_or_resp.ret_status !=
+		    FW_CEM_RESP_STATUS_SUCCESS)
+			return IXGBE_ERR_HOST_INTERFACE_COMMAND;
+		return 0;
+	}
+
+	return ret_val;
+}
+
+/** ixgbe_get_lcd_x550em - Determine lowest common denominator
+ *  @hw: pointer to hardware structure
+ *  @lcd_speed: pointer to lowest common link speed
+ *
+ *  Determine lowest common link speed with link partner.
+ **/
+static s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw,
+				  ixgbe_link_speed *lcd_speed)
+{
+	u16 an_lp_status;
+	s32 status;
+	u16 word = hw->eeprom.ctrl_word_3;
+
+	*lcd_speed = IXGBE_LINK_SPEED_UNKNOWN;
+
+	status = hw->phy.ops.read_reg(hw, IXGBE_AUTO_NEG_LP_STATUS,
+				      MDIO_MMD_AN,
+				      &an_lp_status);
+	if (status)
+		return status;
+
+	/* If link partner advertised 1G, return 1G */
+	if (an_lp_status & IXGBE_AUTO_NEG_LP_1000BASE_CAP) {
+		*lcd_speed = IXGBE_LINK_SPEED_1GB_FULL;
+		return status;
+	}
+
+	/* If 10G disabled for LPLU via NVM D10GMP, then return no valid LCD */
+	if ((hw->bus.lan_id && (word & NVM_INIT_CTRL_3_D10GMP_PORT1)) ||
+	    (word & NVM_INIT_CTRL_3_D10GMP_PORT0))
+		return status;
+
+	/* Link partner not capable of lower speeds, return 10G */
+	*lcd_speed = IXGBE_LINK_SPEED_10GB_FULL;
+	return status;
+}
+
+/**
+ * ixgbe_setup_fc_x550em - Set up flow control
+ * @hw: pointer to hardware structure
+ */
+static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
+{
+	bool pause, asm_dir;
+	u32 reg_val;
+	s32 rc = 0;
+
+	/* Validate the requested mode */
+	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+		hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+	}
+
+	/* 10gig parts do not have a word in the EEPROM to determine the
+	 * default flow control setting, so we explicitly set it to full.
+	 */
+	if (hw->fc.requested_mode == ixgbe_fc_default)
+		hw->fc.requested_mode = ixgbe_fc_full;
+
+	/* Determine PAUSE and ASM_DIR bits. */
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_none:
+		pause = false;
+		asm_dir = false;
+		break;
+	case ixgbe_fc_tx_pause:
+		pause = false;
+		asm_dir = true;
+		break;
+	case ixgbe_fc_rx_pause:
+		/* Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE, as such we fall
+		 * through to the fc_full statement.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+		fallthrough;
+	case ixgbe_fc_full:
+		pause = true;
+		asm_dir = true;
+		break;
+	default:
+		hw_err(hw, "Flow control param set incorrectly\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X550EM_X_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR_L:
+		rc = hw->mac.ops.read_iosf_sb_reg(hw,
+					    IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					    IXGBE_SB_IOSF_TARGET_KR_PHY,
+					    &reg_val);
+		if (rc)
+			return rc;
+
+		reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+			     IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+		if (pause)
+			reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+		if (asm_dir)
+			reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+		rc = hw->mac.ops.write_iosf_sb_reg(hw,
+					    IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					    IXGBE_SB_IOSF_TARGET_KR_PHY,
+					    reg_val);
+
+		/* This device does not fully support AN. */
+		hw->fc.disable_fc_autoneg = true;
+		break;
+	case IXGBE_DEV_ID_X550EM_X_XFI:
+		hw->fc.disable_fc_autoneg = true;
+		break;
+	default:
+		break;
+	}
+	return rc;
+}
+
+/**
+ *  ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw)
+{
+	u32 link_s1, lp_an_page_low, an_cntl_1;
+	s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	/* AN should have completed when the cable was plugged in.
+	 * Look for reasons to bail out.  Bail out if:
+	 * - FC autoneg is disabled, or if
+	 * - link is not up.
+	 */
+	if (hw->fc.disable_fc_autoneg) {
+		hw_err(hw, "Flow control autoneg is disabled");
+		goto out;
+	}
+
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+	if (!link_up) {
+		hw_err(hw, "The link is down");
+		goto out;
+	}
+
+	/* Check at auto-negotiation has completed */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+					IXGBE_KRM_LINK_S1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1);
+
+	if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+		goto out;
+	}
+
+	/* Read the 10g AN autoc and LP ability registers and resolve
+	 * local flow control settings accordingly
+	 */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1);
+
+	if (status) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		goto out;
+	}
+
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low);
+
+	if (status) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		goto out;
+	}
+
+	status = ixgbe_negotiate_fc(hw, an_cntl_1, lp_an_page_low,
+				    IXGBE_KRM_AN_CNTL_1_SYM_PAUSE,
+				    IXGBE_KRM_AN_CNTL_1_ASM_PAUSE,
+				    IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE,
+				    IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE);
+
+out:
+	if (!status) {
+		hw->fc.fc_was_autonegged = true;
+	} else {
+		hw->fc.fc_was_autonegged = false;
+		hw->fc.current_mode = hw->fc.requested_mode;
+	}
+}
+
+/**
+ *  ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw)
+{
+	hw->fc.fc_was_autonegged = false;
+	hw->fc.current_mode = hw->fc.requested_mode;
+}
+
+/** ixgbe_enter_lplu_x550em - Transition to low power states
+ *  @hw: pointer to hardware structure
+ *
+ *  Configures Low Power Link Up on transition to low power states
+ *  (from D0 to non-D0). Link is required to enter LPLU so avoid resetting
+ *  the X557 PHY immediately prior to entering LPLU.
+ **/
+static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
+{
+	u16 an_10g_cntl_reg, autoneg_reg, speed;
+	s32 status;
+	ixgbe_link_speed lcd_speed;
+	u32 save_autoneg;
+	bool link_up;
+
+	/* If blocked by MNG FW, then don't restart AN */
+	if (ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+	if (status)
+		return status;
+
+	status = hw->eeprom.ops.read(hw, NVM_INIT_CTRL_3,
+				     &hw->eeprom.ctrl_word_3);
+	if (status)
+		return status;
+
+	/* If link is down, LPLU disabled in NVM, WoL disabled, or
+	 * manageability disabled, then force link down by entering
+	 * low power mode.
+	 */
+	if (!link_up || !(hw->eeprom.ctrl_word_3 & NVM_INIT_CTRL_3_LPLU) ||
+	    !(hw->wol_enabled || ixgbe_mng_present(hw)))
+		return ixgbe_set_copper_phy_power(hw, false);
+
+	/* Determine LCD */
+	status = ixgbe_get_lcd_t_x550em(hw, &lcd_speed);
+	if (status)
+		return status;
+
+	/* If no valid LCD link speed, then force link down and exit. */
+	if (lcd_speed == IXGBE_LINK_SPEED_UNKNOWN)
+		return ixgbe_set_copper_phy_power(hw, false);
+
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
+				      MDIO_MMD_AN,
+				      &speed);
+	if (status)
+		return status;
+
+	/* If no link now, speed is invalid so take link down */
+	status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up);
+	if (status)
+		return ixgbe_set_copper_phy_power(hw, false);
+
+	/* clear everything but the speed bits */
+	speed &= IXGBE_MDIO_AUTO_NEG_VEN_STAT_SPEED_MASK;
+
+	/* If current speed is already LCD, then exit. */
+	if (((speed == IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB) &&
+	     (lcd_speed == IXGBE_LINK_SPEED_1GB_FULL)) ||
+	    ((speed == IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB) &&
+	     (lcd_speed == IXGBE_LINK_SPEED_10GB_FULL)))
+		return status;
+
+	/* Clear AN completed indication */
+	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM,
+				      MDIO_MMD_AN,
+				      &autoneg_reg);
+	if (status)
+		return status;
+
+	status = hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL,
+				      MDIO_MMD_AN,
+				      &an_10g_cntl_reg);
+	if (status)
+		return status;
+
+	status = hw->phy.ops.read_reg(hw,
+				      IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+				      MDIO_MMD_AN,
+				      &autoneg_reg);
+	if (status)
+		return status;
+
+	save_autoneg = hw->phy.autoneg_advertised;
+
+	/* Setup link at least common link speed */
+	status = hw->mac.ops.setup_link(hw, lcd_speed, false);
+
+	/* restore autoneg from before setting lplu speed */
+	hw->phy.autoneg_advertised = save_autoneg;
+
+	return status;
+}
+
+/**
+ * ixgbe_reset_phy_fw - Reset firmware-controlled PHYs
+ * @hw: pointer to hardware structure
+ */
+static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw)
+{
+	u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 };
+	s32 rc;
+
+	if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw))
+		return 0;
+
+	rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_PHY_SW_RESET, &store);
+	if (rc)
+		return rc;
+	memset(store, 0, sizeof(store));
+
+	rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_INIT_PHY, &store);
+	if (rc)
+		return rc;
+
+	return ixgbe_setup_fw_link(hw);
+}
+
+/**
+ * ixgbe_check_overtemp_fw - Check firmware-controlled PHYs for overtemp
+ * @hw: pointer to hardware structure
+ */
+static s32 ixgbe_check_overtemp_fw(struct ixgbe_hw *hw)
+{
+	u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 };
+	s32 rc;
+
+	rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store);
+	if (rc)
+		return rc;
+
+	if (store[0] & FW_PHY_ACT_GET_LINK_INFO_TEMP) {
+		ixgbe_shutdown_fw_phy(hw);
+		return IXGBE_ERR_OVERTEMP;
+	}
+	return 0;
+}
+
+/**
+ * ixgbe_read_mng_if_sel_x550em - Read NW_MNG_IF_SEL register
+ * @hw: pointer to hardware structure
+ *
+ * Read NW_MNG_IF_SEL register and save field values.
+ */
+static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw)
+{
+	/* Save NW management interface connected on board. This is used
+	 * to determine internal PHY mode.
+	 */
+	hw->phy.nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL);
+
+	/* If X552 (X550EM_a) and MDIO is connected to external PHY, then set
+	 * PHY address. This register field was has only been used for X552.
+	 */
+	if (hw->mac.type == ixgbe_mac_x550em_a &&
+	    hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) {
+		hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
+				      IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+				     IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+	}
+}
+
+/** ixgbe_init_phy_ops_X550em - PHY/SFP specific init
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize any function pointers that were not able to be
+ *  set during init_shared_code because the PHY/SFP type was
+ *  not known.  Perform the SFP init if necessary.
+ **/
+static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
+{
+	struct ixgbe_phy_info *phy = &hw->phy;
+	s32 ret_val;
+
+	hw->mac.ops.set_lan_id(hw);
+
+	ixgbe_read_mng_if_sel_x550em(hw);
+
+	if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) {
+		phy->phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM;
+		ixgbe_setup_mux_ctl(hw);
+	}
+
+	/* Identify the PHY or SFP module */
+	ret_val = phy->ops.identify(hw);
+	if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+	    ret_val == IXGBE_ERR_PHY_ADDR_INVALID)
+		return ret_val;
+
+	/* Setup function pointers based on detected hardware */
+	ixgbe_init_mac_link_ops_X550em(hw);
+	if (phy->sfp_type != ixgbe_sfp_type_unknown)
+		phy->ops.reset = NULL;
+
+	/* Set functions pointers based on phy type */
+	switch (hw->phy.type) {
+	case ixgbe_phy_x550em_kx4:
+		phy->ops.setup_link = NULL;
+		phy->ops.read_reg = ixgbe_read_phy_reg_x550em;
+		phy->ops.write_reg = ixgbe_write_phy_reg_x550em;
+		break;
+	case ixgbe_phy_x550em_kr:
+		phy->ops.setup_link = ixgbe_setup_kr_x550em;
+		phy->ops.read_reg = ixgbe_read_phy_reg_x550em;
+		phy->ops.write_reg = ixgbe_write_phy_reg_x550em;
+		break;
+	case ixgbe_phy_x550em_xfi:
+		/* link is managed by HW */
+		phy->ops.setup_link = NULL;
+		phy->ops.read_reg = ixgbe_read_phy_reg_x550em;
+		phy->ops.write_reg = ixgbe_write_phy_reg_x550em;
+		break;
+	case ixgbe_phy_x550em_ext_t:
+		/* Save NW management interface connected on board. This is used
+		 * to determine internal PHY mode
+		 */
+		phy->nw_mng_if_sel = IXGBE_READ_REG(hw, IXGBE_NW_MNG_IF_SEL);
+
+		/* If internal link mode is XFI, then setup iXFI internal link,
+		 * else setup KR now.
+		 */
+		phy->ops.setup_internal_link =
+					      ixgbe_setup_internal_phy_t_x550em;
+
+		/* setup SW LPLU only for first revision */
+		if (hw->mac.type == ixgbe_mac_X550EM_x &&
+		    !(IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)) &
+		      IXGBE_FUSES0_REV_MASK))
+			phy->ops.enter_lplu = ixgbe_enter_lplu_t_x550em;
+
+		phy->ops.handle_lasi = ixgbe_handle_lasi_ext_t_x550em;
+		phy->ops.reset = ixgbe_reset_phy_t_X550em;
+		break;
+	case ixgbe_phy_sgmii:
+		phy->ops.setup_link = NULL;
+		break;
+	case ixgbe_phy_fw:
+		phy->ops.setup_link = ixgbe_setup_fw_link;
+		phy->ops.reset = ixgbe_reset_phy_fw;
+		break;
+	case ixgbe_phy_ext_1g_t:
+		phy->ops.setup_link = NULL;
+		phy->ops.read_reg = NULL;
+		phy->ops.write_reg = NULL;
+		phy->ops.reset = NULL;
+		break;
+	default:
+		break;
+	}
+
+	return ret_val;
+}
+
+/** ixgbe_get_media_type_X550em - Get media type
+ *  @hw: pointer to hardware structure
+ *
+ *  Returns the media type (fiber, copper, backplane)
+ *
+ */
+static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw)
+{
+	enum ixgbe_media_type media_type;
+
+	/* Detect if there is a copper PHY attached. */
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X550EM_A_SGMII:
+	case IXGBE_DEV_ID_X550EM_A_SGMII_L:
+		hw->phy.type = ixgbe_phy_sgmii;
+		fallthrough;
+	case IXGBE_DEV_ID_X550EM_X_KR:
+	case IXGBE_DEV_ID_X550EM_X_KX4:
+	case IXGBE_DEV_ID_X550EM_X_XFI:
+	case IXGBE_DEV_ID_X550EM_A_KR:
+	case IXGBE_DEV_ID_X550EM_A_KR_L:
+		media_type = ixgbe_media_type_backplane;
+		break;
+	case IXGBE_DEV_ID_X550EM_X_SFP:
+	case IXGBE_DEV_ID_X550EM_A_SFP:
+	case IXGBE_DEV_ID_X550EM_A_SFP_N:
+		media_type = ixgbe_media_type_fiber;
+		break;
+	case IXGBE_DEV_ID_X550EM_X_1G_T:
+	case IXGBE_DEV_ID_X550EM_X_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+		media_type = ixgbe_media_type_copper;
+		break;
+	default:
+		media_type = ixgbe_media_type_unknown;
+		break;
+	}
+	return media_type;
+}
+
+/** ixgbe_init_ext_t_x550em - Start (unstall) the external Base T PHY.
+ ** @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u16 reg;
+
+	status = hw->phy.ops.read_reg(hw,
+				      IXGBE_MDIO_TX_VENDOR_ALARMS_3,
+				      MDIO_MMD_PMAPMD,
+				      &reg);
+	if (status)
+		return status;
+
+	/* If PHY FW reset completed bit is set then this is the first
+	 * SW instance after a power on so the PHY FW must be un-stalled.
+	 */
+	if (reg & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) {
+		status = hw->phy.ops.read_reg(hw,
+					IXGBE_MDIO_GLOBAL_RES_PR_10,
+					MDIO_MMD_VEND1,
+					&reg);
+		if (status)
+			return status;
+
+		reg &= ~IXGBE_MDIO_POWER_UP_STALL;
+
+		status = hw->phy.ops.write_reg(hw,
+					IXGBE_MDIO_GLOBAL_RES_PR_10,
+					MDIO_MMD_VEND1,
+					reg);
+		if (status)
+			return status;
+	}
+
+	return status;
+}
+
+/**
+ * ixgbe_set_mdio_speed - Set MDIO clock speed
+ * @hw: pointer to hardware structure
+ */
+static void ixgbe_set_mdio_speed(struct ixgbe_hw *hw)
+{
+	u32 hlreg0;
+
+	switch (hw->device_id) {
+	case IXGBE_DEV_ID_X550EM_X_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_SGMII:
+	case IXGBE_DEV_ID_X550EM_A_SGMII_L:
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_SFP:
+		/* Config MDIO clock speed before the first MDIO PHY access */
+		hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		hlreg0 &= ~IXGBE_HLREG0_MDCSPD;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+		break;
+	case IXGBE_DEV_ID_X550EM_A_1G_T:
+	case IXGBE_DEV_ID_X550EM_A_1G_T_L:
+		/* Select fast MDIO clock speed for these devices */
+		hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		hlreg0 |= IXGBE_HLREG0_MDCSPD;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
+		break;
+	default:
+		break;
+	}
+}
+
+/**  ixgbe_reset_hw_X550em - Perform hardware reset
+ **  @hw: pointer to hardware structure
+ **
+ **  Resets the hardware by resetting the transmit and receive units, masks
+ **  and clears all interrupts, perform a PHY reset, and perform a link (MAC)
+ **  reset.
+ **/
+static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
+{
+	ixgbe_link_speed link_speed;
+	s32 status;
+	u32 ctrl = 0;
+	u32 i;
+	bool link_up = false;
+	u32 swfw_mask = hw->phy.phy_semaphore_mask;
+
+	/* Call adapter stop to disable Tx/Rx and clear interrupts */
+	status = hw->mac.ops.stop_adapter(hw);
+	if (status)
+		return status;
+
+	/* flush pending Tx transactions */
+	ixgbe_clear_tx_pending(hw);
+
+	/* set MDIO speed before talking to the PHY in case it's the 1st time */
+	ixgbe_set_mdio_speed(hw);
+
+	/* PHY ops must be identified and initialized prior to reset */
+	status = hw->phy.ops.init(hw);
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+	    status == IXGBE_ERR_PHY_ADDR_INVALID)
+		return status;
+
+	/* start the external PHY */
+	if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
+		status = ixgbe_init_ext_t_x550em(hw);
+		if (status)
+			return status;
+	}
+
+	/* Setup SFP module if there is one present. */
+	if (hw->phy.sfp_setup_needed) {
+		status = hw->mac.ops.setup_sfp(hw);
+		hw->phy.sfp_setup_needed = false;
+	}
+
+	if (status == IXGBE_ERR_SFP_NOT_SUPPORTED)
+		return status;
+
+	/* Reset PHY */
+	if (!hw->phy.reset_disable && hw->phy.ops.reset)
+		hw->phy.ops.reset(hw);
+
+mac_reset_top:
+	/* Issue global reset to the MAC.  Needs to be SW reset if link is up.
+	 * If link reset is used when link is up, it might reset the PHY when
+	 * mng is using it.  If link is down or the flag to force full link
+	 * reset is set, then perform link reset.
+	 */
+	ctrl = IXGBE_CTRL_LNK_RST;
+
+	if (!hw->force_full_reset) {
+		hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+		if (link_up)
+			ctrl = IXGBE_CTRL_RST;
+	}
+
+	status = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask);
+	if (status) {
+		hw_dbg(hw, "semaphore failed with %d", status);
+		return IXGBE_ERR_SWFW_SYNC;
+	}
+
+	ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL);
+	IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl);
+	IXGBE_WRITE_FLUSH(hw);
+	hw->mac.ops.release_swfw_sync(hw, swfw_mask);
+	usleep_range(1000, 1200);
+
+	/* Poll for reset bit to self-clear meaning reset is complete */
+	for (i = 0; i < 10; i++) {
+		ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL);
+		if (!(ctrl & IXGBE_CTRL_RST_MASK))
+			break;
+		udelay(1);
+	}
+
+	if (ctrl & IXGBE_CTRL_RST_MASK) {
+		status = IXGBE_ERR_RESET_FAILED;
+		hw_dbg(hw, "Reset polling failed to complete.\n");
+	}
+
+	msleep(50);
+
+	/* Double resets are required for recovery from certain error
+	 * clear the multicast table.  Also reset num_rar_entries to 128,
+	 * since we modify this value when programming the SAN MAC address.
+	 */
+	if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) {
+		hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED;
+		goto mac_reset_top;
+	}
+
+	/* Store the permanent mac address */
+	hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr);
+
+	/* Store MAC address from RAR0, clear receive address registers, and
+	 * clear the multicast table.  Also reset num_rar_entries to 128,
+	 * since we modify this value when programming the SAN MAC address.
+	 */
+	hw->mac.num_rar_entries = 128;
+	hw->mac.ops.init_rx_addrs(hw);
+
+	ixgbe_set_mdio_speed(hw);
+
+	if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP)
+		ixgbe_setup_mux_ctl(hw);
+
+	return status;
+}
+
+/** ixgbe_set_ethertype_anti_spoofing_X550 - Enable/Disable Ethertype
+ *	anti-spoofing
+ *  @hw:  pointer to hardware structure
+ *  @enable: enable or disable switch for Ethertype anti-spoofing
+ *  @vf: Virtual Function pool - VF Pool to set for Ethertype anti-spoofing
+ **/
+static void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw,
+						   bool enable, int vf)
+{
+	int vf_target_reg = vf >> 3;
+	int vf_target_shift = vf % 8 + IXGBE_SPOOF_ETHERTYPEAS_SHIFT;
+	u32 pfvfspoof;
+
+	pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg));
+	if (enable)
+		pfvfspoof |= BIT(vf_target_shift);
+	else
+		pfvfspoof &= ~BIT(vf_target_shift);
+
+	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
+}
+
+/** ixgbe_set_source_address_pruning_X550 - Enable/Disbale src address pruning
+ *  @hw: pointer to hardware structure
+ *  @enable: enable or disable source address pruning
+ *  @pool: Rx pool to set source address pruning for
+ **/
+static void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw,
+						  bool enable,
+						  unsigned int pool)
+{
+	u64 pfflp;
+
+	/* max rx pool is 63 */
+	if (pool > 63)
+		return;
+
+	pfflp = (u64)IXGBE_READ_REG(hw, IXGBE_PFFLPL);
+	pfflp |= (u64)IXGBE_READ_REG(hw, IXGBE_PFFLPH) << 32;
+
+	if (enable)
+		pfflp |= (1ULL << pool);
+	else
+		pfflp &= ~(1ULL << pool);
+
+	IXGBE_WRITE_REG(hw, IXGBE_PFFLPL, (u32)pfflp);
+	IXGBE_WRITE_REG(hw, IXGBE_PFFLPH, (u32)(pfflp >> 32));
+}
+
+/**
+ *  ixgbe_setup_fc_backplane_x550em_a - Set up flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Called at init time to set up flow control.
+ **/
+static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u32 an_cntl = 0;
+
+	/* Validate the requested mode */
+	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+		hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+	}
+
+	if (hw->fc.requested_mode == ixgbe_fc_default)
+		hw->fc.requested_mode = ixgbe_fc_full;
+
+	/* Set up the 1G and 10G flow control advertisement registers so the
+	 * HW will be able to do FC autoneg once the cable is plugged in.  If
+	 * we link at 10G, the 1G advertisement is harmless and vice versa.
+	 */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+					IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl);
+
+	if (status) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		return status;
+	}
+
+	/* The possible values of fc.requested_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *    we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_none:
+		/* Flow control completely disabled by software override. */
+		an_cntl &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+			     IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+		break;
+	case ixgbe_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		an_cntl |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+		an_cntl &= ~IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+		break;
+	case ixgbe_fc_rx_pause:
+		/* Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE, as such we fall
+		 * through to the fc_full statement.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+			   IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+		break;
+	default:
+		hw_err(hw, "Flow control param set incorrectly\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	status = hw->mac.ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl);
+
+	/* Restart auto-negotiation. */
+	status = ixgbe_restart_an_internal_phy_x550em(hw);
+
+	return status;
+}
+
+/**
+ * ixgbe_set_mux - Set mux for port 1 access with CS4227
+ * @hw: pointer to hardware structure
+ * @state: set mux if 1, clear if 0
+ */
+static void ixgbe_set_mux(struct ixgbe_hw *hw, u8 state)
+{
+	u32 esdp;
+
+	if (!hw->bus.lan_id)
+		return;
+	esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
+	if (state)
+		esdp |= IXGBE_ESDP_SDP1;
+	else
+		esdp &= ~IXGBE_ESDP_SDP1;
+	IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_acquire_swfw_sync_X550em - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore and sets the I2C MUX
+ */
+static s32 ixgbe_acquire_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
+{
+	s32 status;
+
+	status = ixgbe_acquire_swfw_sync_X540(hw, mask);
+	if (status)
+		return status;
+
+	if (mask & IXGBE_GSSR_I2C_MASK)
+		ixgbe_set_mux(hw, 1);
+
+	return 0;
+}
+
+/**
+ * ixgbe_release_swfw_sync_X550em - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Releases the SWFW semaphore and sets the I2C MUX
+ */
+static void ixgbe_release_swfw_sync_X550em(struct ixgbe_hw *hw, u32 mask)
+{
+	if (mask & IXGBE_GSSR_I2C_MASK)
+		ixgbe_set_mux(hw, 0);
+
+	ixgbe_release_swfw_sync_X540(hw, mask);
+}
+
+/**
+ * ixgbe_acquire_swfw_sync_x550em_a - Acquire SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to acquire
+ *
+ * Acquires the SWFW semaphore and get the shared PHY token as needed
+ */
+static s32 ixgbe_acquire_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask)
+{
+	u32 hmask = mask & ~IXGBE_GSSR_TOKEN_SM;
+	int retries = FW_PHY_TOKEN_RETRIES;
+	s32 status;
+
+	while (--retries) {
+		status = 0;
+		if (hmask)
+			status = ixgbe_acquire_swfw_sync_X540(hw, hmask);
+		if (status)
+			return status;
+		if (!(mask & IXGBE_GSSR_TOKEN_SM))
+			return 0;
+
+		status = ixgbe_get_phy_token(hw);
+		if (!status)
+			return 0;
+		if (hmask)
+			ixgbe_release_swfw_sync_X540(hw, hmask);
+		if (status != IXGBE_ERR_TOKEN_RETRY)
+			return status;
+		msleep(FW_PHY_TOKEN_DELAY);
+	}
+
+	return status;
+}
+
+/**
+ * ixgbe_release_swfw_sync_x550em_a - Release SWFW semaphore
+ * @hw: pointer to hardware structure
+ * @mask: Mask to specify which semaphore to release
+ *
+ * Release the SWFW semaphore and puts the shared PHY token as needed
+ */
+static void ixgbe_release_swfw_sync_x550em_a(struct ixgbe_hw *hw, u32 mask)
+{
+	u32 hmask = mask & ~IXGBE_GSSR_TOKEN_SM;
+
+	if (mask & IXGBE_GSSR_TOKEN_SM)
+		ixgbe_put_phy_token(hw);
+
+	if (hmask)
+		ixgbe_release_swfw_sync_X540(hw, hmask);
+}
+
+/**
+ * ixgbe_read_phy_reg_x550a - Reads specified PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit address of PHY register to read
+ * @device_type: 5 bit device type
+ * @phy_data: Pointer to read data from PHY register
+ *
+ * Reads a value from a specified PHY register using the SWFW lock and PHY
+ * Token. The PHY Token is needed since the MDIO is shared between to MAC
+ * instances.
+ */
+static s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+				    u32 device_type, u16 *phy_data)
+{
+	u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM;
+	s32 status;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, mask))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	status = hw->phy.ops.read_reg_mdi(hw, reg_addr, device_type, phy_data);
+
+	hw->mac.ops.release_swfw_sync(hw, mask);
+
+	return status;
+}
+
+/**
+ * ixgbe_write_phy_reg_x550a - Writes specified PHY register
+ * @hw: pointer to hardware structure
+ * @reg_addr: 32 bit PHY register to write
+ * @device_type: 5 bit device type
+ * @phy_data: Data to write to the PHY register
+ *
+ * Writes a value to specified PHY register using the SWFW lock and PHY Token.
+ * The PHY Token is needed since the MDIO is shared between to MAC instances.
+ */
+static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
+				     u32 device_type, u16 phy_data)
+{
+	u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM;
+	s32 status;
+
+	if (hw->mac.ops.acquire_swfw_sync(hw, mask))
+		return IXGBE_ERR_SWFW_SYNC;
+
+	status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, phy_data);
+	hw->mac.ops.release_swfw_sync(hw, mask);
+
+	return status;
+}
+
+#define X550_COMMON_MAC \
+	.init_hw			= &ixgbe_init_hw_generic, \
+	.start_hw			= &ixgbe_start_hw_X540, \
+	.clear_hw_cntrs			= &ixgbe_clear_hw_cntrs_generic, \
+	.enable_rx_dma			= &ixgbe_enable_rx_dma_generic, \
+	.get_mac_addr			= &ixgbe_get_mac_addr_generic, \
+	.get_device_caps		= &ixgbe_get_device_caps_generic, \
+	.stop_adapter			= &ixgbe_stop_adapter_generic, \
+	.set_lan_id			= &ixgbe_set_lan_id_multi_port_pcie, \
+	.read_analog_reg8		= NULL, \
+	.write_analog_reg8		= NULL, \
+	.set_rxpba			= &ixgbe_set_rxpba_generic, \
+	.check_link			= &ixgbe_check_mac_link_generic, \
+	.blink_led_start		= &ixgbe_blink_led_start_X540, \
+	.blink_led_stop			= &ixgbe_blink_led_stop_X540, \
+	.set_rar			= &ixgbe_set_rar_generic, \
+	.clear_rar			= &ixgbe_clear_rar_generic, \
+	.set_vmdq			= &ixgbe_set_vmdq_generic, \
+	.set_vmdq_san_mac		= &ixgbe_set_vmdq_san_mac_generic, \
+	.clear_vmdq			= &ixgbe_clear_vmdq_generic, \
+	.init_rx_addrs			= &ixgbe_init_rx_addrs_generic, \
+	.update_mc_addr_list		= &ixgbe_update_mc_addr_list_generic, \
+	.enable_mc			= &ixgbe_enable_mc_generic, \
+	.disable_mc			= &ixgbe_disable_mc_generic, \
+	.clear_vfta			= &ixgbe_clear_vfta_generic, \
+	.set_vfta			= &ixgbe_set_vfta_generic, \
+	.fc_enable			= &ixgbe_fc_enable_generic, \
+	.set_fw_drv_ver			= &ixgbe_set_fw_drv_ver_x550, \
+	.init_uta_tables		= &ixgbe_init_uta_tables_generic, \
+	.set_mac_anti_spoofing		= &ixgbe_set_mac_anti_spoofing, \
+	.set_vlan_anti_spoofing		= &ixgbe_set_vlan_anti_spoofing, \
+	.set_source_address_pruning	= \
+				&ixgbe_set_source_address_pruning_X550, \
+	.set_ethertype_anti_spoofing	= \
+				&ixgbe_set_ethertype_anti_spoofing_X550, \
+	.disable_rx_buff		= &ixgbe_disable_rx_buff_generic, \
+	.enable_rx_buff			= &ixgbe_enable_rx_buff_generic, \
+	.get_thermal_sensor_data	= NULL, \
+	.init_thermal_sensor_thresh	= NULL, \
+	.fw_recovery_mode		= &ixgbe_fw_recovery_mode_X550, \
+	.enable_rx			= &ixgbe_enable_rx_generic, \
+	.disable_rx			= &ixgbe_disable_rx_x550, \
+
+static const struct ixgbe_mac_operations mac_ops_X550 = {
+	X550_COMMON_MAC
+	.led_on			= ixgbe_led_on_generic,
+	.led_off		= ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
+	.reset_hw		= &ixgbe_reset_hw_X540,
+	.get_media_type		= &ixgbe_get_media_type_X540,
+	.get_san_mac_addr	= &ixgbe_get_san_mac_addr_generic,
+	.get_wwn_prefix		= &ixgbe_get_wwn_prefix_generic,
+	.setup_link		= &ixgbe_setup_mac_link_X540,
+	.get_link_capabilities	= &ixgbe_get_copper_link_capabilities_generic,
+	.get_bus_info		= &ixgbe_get_bus_info_generic,
+	.setup_sfp		= NULL,
+	.acquire_swfw_sync	= &ixgbe_acquire_swfw_sync_X540,
+	.release_swfw_sync	= &ixgbe_release_swfw_sync_X540,
+	.init_swfw_sync		= &ixgbe_init_swfw_sync_X540,
+	.prot_autoc_read	= prot_autoc_read_generic,
+	.prot_autoc_write	= prot_autoc_write_generic,
+	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
+};
+
+static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
+	X550_COMMON_MAC
+	.led_on			= ixgbe_led_on_t_x550em,
+	.led_off		= ixgbe_led_off_t_x550em,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
+	.reset_hw		= &ixgbe_reset_hw_X550em,
+	.get_media_type		= &ixgbe_get_media_type_X550em,
+	.get_san_mac_addr	= NULL,
+	.get_wwn_prefix		= NULL,
+	.setup_link		= &ixgbe_setup_mac_link_X540,
+	.get_link_capabilities	= &ixgbe_get_link_capabilities_X550em,
+	.get_bus_info		= &ixgbe_get_bus_info_X550em,
+	.setup_sfp		= ixgbe_setup_sfp_modules_X550em,
+	.acquire_swfw_sync	= &ixgbe_acquire_swfw_sync_X550em,
+	.release_swfw_sync	= &ixgbe_release_swfw_sync_X550em,
+	.init_swfw_sync		= &ixgbe_init_swfw_sync_X540,
+	.setup_fc		= NULL, /* defined later */
+	.fc_autoneg		= ixgbe_fc_autoneg,
+	.read_iosf_sb_reg	= ixgbe_read_iosf_sb_reg_x550,
+	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550,
+};
+
+static const struct ixgbe_mac_operations mac_ops_X550EM_x_fw = {
+	X550_COMMON_MAC
+	.led_on			= NULL,
+	.led_off		= NULL,
+	.init_led_link_act	= NULL,
+	.reset_hw		= &ixgbe_reset_hw_X550em,
+	.get_media_type		= &ixgbe_get_media_type_X550em,
+	.get_san_mac_addr	= NULL,
+	.get_wwn_prefix		= NULL,
+	.setup_link		= &ixgbe_setup_mac_link_X540,
+	.get_link_capabilities	= &ixgbe_get_link_capabilities_X550em,
+	.get_bus_info		= &ixgbe_get_bus_info_X550em,
+	.setup_sfp		= ixgbe_setup_sfp_modules_X550em,
+	.acquire_swfw_sync	= &ixgbe_acquire_swfw_sync_X550em,
+	.release_swfw_sync	= &ixgbe_release_swfw_sync_X550em,
+	.init_swfw_sync		= &ixgbe_init_swfw_sync_X540,
+	.setup_fc		= NULL,
+	.fc_autoneg		= ixgbe_fc_autoneg,
+	.read_iosf_sb_reg	= ixgbe_read_iosf_sb_reg_x550,
+	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550,
+};
+
+static const struct ixgbe_mac_operations mac_ops_x550em_a = {
+	X550_COMMON_MAC
+	.led_on			= ixgbe_led_on_t_x550em,
+	.led_off		= ixgbe_led_off_t_x550em,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
+	.reset_hw		= ixgbe_reset_hw_X550em,
+	.get_media_type		= ixgbe_get_media_type_X550em,
+	.get_san_mac_addr	= NULL,
+	.get_wwn_prefix		= NULL,
+	.setup_link		= &ixgbe_setup_mac_link_X540,
+	.get_link_capabilities	= ixgbe_get_link_capabilities_X550em,
+	.get_bus_info		= ixgbe_get_bus_info_X550em,
+	.setup_sfp		= ixgbe_setup_sfp_modules_X550em,
+	.acquire_swfw_sync	= ixgbe_acquire_swfw_sync_x550em_a,
+	.release_swfw_sync	= ixgbe_release_swfw_sync_x550em_a,
+	.setup_fc		= ixgbe_setup_fc_x550em,
+	.fc_autoneg		= ixgbe_fc_autoneg,
+	.read_iosf_sb_reg	= ixgbe_read_iosf_sb_reg_x550a,
+	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550a,
+};
+
+static const struct ixgbe_mac_operations mac_ops_x550em_a_fw = {
+	X550_COMMON_MAC
+	.led_on			= ixgbe_led_on_generic,
+	.led_off		= ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
+	.reset_hw		= ixgbe_reset_hw_X550em,
+	.get_media_type		= ixgbe_get_media_type_X550em,
+	.get_san_mac_addr	= NULL,
+	.get_wwn_prefix		= NULL,
+	.setup_link		= NULL, /* defined later */
+	.get_link_capabilities	= ixgbe_get_link_capabilities_X550em,
+	.get_bus_info		= ixgbe_get_bus_info_X550em,
+	.setup_sfp		= ixgbe_setup_sfp_modules_X550em,
+	.acquire_swfw_sync	= ixgbe_acquire_swfw_sync_x550em_a,
+	.release_swfw_sync	= ixgbe_release_swfw_sync_x550em_a,
+	.setup_fc		= ixgbe_setup_fc_x550em,
+	.fc_autoneg		= ixgbe_fc_autoneg,
+	.read_iosf_sb_reg	= ixgbe_read_iosf_sb_reg_x550a,
+	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550a,
+};
+
+#define X550_COMMON_EEP \
+	.read			= &ixgbe_read_ee_hostif_X550, \
+	.read_buffer		= &ixgbe_read_ee_hostif_buffer_X550, \
+	.write			= &ixgbe_write_ee_hostif_X550, \
+	.write_buffer		= &ixgbe_write_ee_hostif_buffer_X550, \
+	.validate_checksum	= &ixgbe_validate_eeprom_checksum_X550, \
+	.update_checksum	= &ixgbe_update_eeprom_checksum_X550, \
+	.calc_checksum		= &ixgbe_calc_eeprom_checksum_X550, \
+
+static const struct ixgbe_eeprom_operations eeprom_ops_X550 = {
+	X550_COMMON_EEP
+	.init_params		= &ixgbe_init_eeprom_params_X550,
+};
+
+static const struct ixgbe_eeprom_operations eeprom_ops_X550EM_x = {
+	X550_COMMON_EEP
+	.init_params		= &ixgbe_init_eeprom_params_X540,
+};
+
+#define X550_COMMON_PHY	\
+	.identify_sfp		= &ixgbe_identify_module_generic, \
+	.reset			= NULL, \
+	.setup_link_speed	= &ixgbe_setup_phy_link_speed_generic, \
+	.read_i2c_byte		= &ixgbe_read_i2c_byte_generic, \
+	.write_i2c_byte		= &ixgbe_write_i2c_byte_generic, \
+	.read_i2c_sff8472	= &ixgbe_read_i2c_sff8472_generic, \
+	.read_i2c_eeprom	= &ixgbe_read_i2c_eeprom_generic, \
+	.write_i2c_eeprom	= &ixgbe_write_i2c_eeprom_generic, \
+	.setup_link		= &ixgbe_setup_phy_link_generic, \
+	.set_phy_power		= NULL,
+
+static const struct ixgbe_phy_operations phy_ops_X550 = {
+	X550_COMMON_PHY
+	.check_overtemp		= &ixgbe_tn_check_overtemp,
+	.init			= NULL,
+	.identify		= &ixgbe_identify_phy_generic,
+	.read_reg		= &ixgbe_read_phy_reg_generic,
+	.write_reg		= &ixgbe_write_phy_reg_generic,
+};
+
+static const struct ixgbe_phy_operations phy_ops_X550EM_x = {
+	X550_COMMON_PHY
+	.check_overtemp		= &ixgbe_tn_check_overtemp,
+	.init			= &ixgbe_init_phy_ops_X550em,
+	.identify		= &ixgbe_identify_phy_x550em,
+	.read_reg		= &ixgbe_read_phy_reg_generic,
+	.write_reg		= &ixgbe_write_phy_reg_generic,
+};
+
+static const struct ixgbe_phy_operations phy_ops_x550em_x_fw = {
+	X550_COMMON_PHY
+	.check_overtemp		= NULL,
+	.init			= ixgbe_init_phy_ops_X550em,
+	.identify		= ixgbe_identify_phy_x550em,
+	.read_reg		= NULL,
+	.write_reg		= NULL,
+	.read_reg_mdi		= NULL,
+	.write_reg_mdi		= NULL,
+};
+
+static const struct ixgbe_phy_operations phy_ops_x550em_a = {
+	X550_COMMON_PHY
+	.check_overtemp		= &ixgbe_tn_check_overtemp,
+	.init			= &ixgbe_init_phy_ops_X550em,
+	.identify		= &ixgbe_identify_phy_x550em,
+	.read_reg		= &ixgbe_read_phy_reg_x550a,
+	.write_reg		= &ixgbe_write_phy_reg_x550a,
+	.read_reg_mdi		= &ixgbe_read_phy_reg_mdi,
+	.write_reg_mdi		= &ixgbe_write_phy_reg_mdi,
+};
+
+static const struct ixgbe_phy_operations phy_ops_x550em_a_fw = {
+	X550_COMMON_PHY
+	.check_overtemp		= ixgbe_check_overtemp_fw,
+	.init			= ixgbe_init_phy_ops_X550em,
+	.identify		= ixgbe_identify_phy_fw,
+	.read_reg		= NULL,
+	.write_reg		= NULL,
+	.read_reg_mdi		= NULL,
+	.write_reg_mdi		= NULL,
+};
+
+static const struct ixgbe_link_operations link_ops_x550em_x = {
+	.read_link		= &ixgbe_read_i2c_combined_generic,
+	.read_link_unlocked	= &ixgbe_read_i2c_combined_generic_unlocked,
+	.write_link		= &ixgbe_write_i2c_combined_generic,
+	.write_link_unlocked	= &ixgbe_write_i2c_combined_generic_unlocked,
+};
+
+static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = {
+	IXGBE_MVALS_INIT(X550)
+};
+
+static const u32 ixgbe_mvals_X550EM_x[IXGBE_MVALS_IDX_LIMIT] = {
+	IXGBE_MVALS_INIT(X550EM_x)
+};
+
+static const u32 ixgbe_mvals_x550em_a[IXGBE_MVALS_IDX_LIMIT] = {
+	IXGBE_MVALS_INIT(X550EM_a)
+};
+
+const struct ixgbe_info ixgbe_X550_info = {
+	.mac			= ixgbe_mac_X550,
+	.get_invariants		= &ixgbe_get_invariants_X540,
+	.mac_ops		= &mac_ops_X550,
+	.eeprom_ops		= &eeprom_ops_X550,
+	.phy_ops		= &phy_ops_X550,
+	.mbx_ops		= &mbx_ops_generic,
+	.mvals			= ixgbe_mvals_X550,
+};
+
+const struct ixgbe_info ixgbe_X550EM_x_info = {
+	.mac			= ixgbe_mac_X550EM_x,
+	.get_invariants		= &ixgbe_get_invariants_X550_x,
+	.mac_ops		= &mac_ops_X550EM_x,
+	.eeprom_ops		= &eeprom_ops_X550EM_x,
+	.phy_ops		= &phy_ops_X550EM_x,
+	.mbx_ops		= &mbx_ops_generic,
+	.mvals			= ixgbe_mvals_X550EM_x,
+	.link_ops		= &link_ops_x550em_x,
+};
+
+const struct ixgbe_info ixgbe_x550em_x_fw_info = {
+	.mac			= ixgbe_mac_X550EM_x,
+	.get_invariants		= ixgbe_get_invariants_X550_x_fw,
+	.mac_ops		= &mac_ops_X550EM_x_fw,
+	.eeprom_ops		= &eeprom_ops_X550EM_x,
+	.phy_ops		= &phy_ops_x550em_x_fw,
+	.mbx_ops		= &mbx_ops_generic,
+	.mvals			= ixgbe_mvals_X550EM_x,
+};
+
+const struct ixgbe_info ixgbe_x550em_a_info = {
+	.mac			= ixgbe_mac_x550em_a,
+	.get_invariants		= &ixgbe_get_invariants_X550_a,
+	.mac_ops		= &mac_ops_x550em_a,
+	.eeprom_ops		= &eeprom_ops_X550EM_x,
+	.phy_ops		= &phy_ops_x550em_a,
+	.mbx_ops		= &mbx_ops_generic,
+	.mvals			= ixgbe_mvals_x550em_a,
+};
+
+const struct ixgbe_info ixgbe_x550em_a_fw_info = {
+	.mac			= ixgbe_mac_x550em_a,
+	.get_invariants		= ixgbe_get_invariants_X550_a_fw,
+	.mac_ops		= &mac_ops_x550em_a_fw,
+	.eeprom_ops		= &eeprom_ops_X550EM_x,
+	.phy_ops		= &phy_ops_x550em_a_fw,
+	.mbx_ops		= &mbx_ops_generic,
+	.mvals			= ixgbe_mvals_x550em_a,
+};
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
new file mode 100644
index 0000000000..1703c640a4
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -0,0 +1,574 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "ixgbe.h"
+#include "ixgbe_txrx_common.h"
+
+struct xsk_buff_pool *ixgbe_xsk_pool(struct ixgbe_adapter *adapter,
+				     struct ixgbe_ring *ring)
+{
+	bool xdp_on = READ_ONCE(adapter->xdp_prog);
+	int qid = ring->ring_idx;
+
+	if (!xdp_on || !test_bit(qid, adapter->af_xdp_zc_qps))
+		return NULL;
+
+	return xsk_get_pool_from_qid(adapter->netdev, qid);
+}
+
+static int ixgbe_xsk_pool_enable(struct ixgbe_adapter *adapter,
+				 struct xsk_buff_pool *pool,
+				 u16 qid)
+{
+	struct net_device *netdev = adapter->netdev;
+	bool if_running;
+	int err;
+
+	if (qid >= adapter->num_rx_queues)
+		return -EINVAL;
+
+	if (qid >= netdev->real_num_rx_queues ||
+	    qid >= netdev->real_num_tx_queues)
+		return -EINVAL;
+
+	err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IXGBE_RX_DMA_ATTR);
+	if (err)
+		return err;
+
+	if_running = netif_running(adapter->netdev) &&
+		     ixgbe_enabled_xdp_adapter(adapter);
+
+	if (if_running)
+		ixgbe_txrx_ring_disable(adapter, qid);
+
+	set_bit(qid, adapter->af_xdp_zc_qps);
+
+	if (if_running) {
+		ixgbe_txrx_ring_enable(adapter, qid);
+
+		/* Kick start the NAPI context so that receiving will start */
+		err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
+		if (err) {
+			clear_bit(qid, adapter->af_xdp_zc_qps);
+			xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int ixgbe_xsk_pool_disable(struct ixgbe_adapter *adapter, u16 qid)
+{
+	struct xsk_buff_pool *pool;
+	bool if_running;
+
+	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+	if (!pool)
+		return -EINVAL;
+
+	if_running = netif_running(adapter->netdev) &&
+		     ixgbe_enabled_xdp_adapter(adapter);
+
+	if (if_running)
+		ixgbe_txrx_ring_disable(adapter, qid);
+
+	clear_bit(qid, adapter->af_xdp_zc_qps);
+	xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR);
+
+	if (if_running)
+		ixgbe_txrx_ring_enable(adapter, qid);
+
+	return 0;
+}
+
+int ixgbe_xsk_pool_setup(struct ixgbe_adapter *adapter,
+			 struct xsk_buff_pool *pool,
+			 u16 qid)
+{
+	return pool ? ixgbe_xsk_pool_enable(adapter, pool, qid) :
+		ixgbe_xsk_pool_disable(adapter, qid);
+}
+
+static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
+			    struct ixgbe_ring *rx_ring,
+			    struct xdp_buff *xdp)
+{
+	int err, result = IXGBE_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct ixgbe_ring *ring;
+	struct xdp_frame *xdpf;
+	u32 act;
+
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+	if (likely(act == XDP_REDIRECT)) {
+		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+		if (!err)
+			return IXGBE_XDP_REDIR;
+		if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+			result = IXGBE_XDP_EXIT;
+		else
+			result = IXGBE_XDP_CONSUMED;
+		goto out_failure;
+	}
+
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdpf = xdp_convert_buff_to_frame(xdp);
+		if (unlikely(!xdpf))
+			goto out_failure;
+		ring = ixgbe_determine_xdp_ring(adapter);
+		if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+			spin_lock(&ring->tx_lock);
+		result = ixgbe_xmit_xdp_ring(ring, xdpf);
+		if (static_branch_unlikely(&ixgbe_xdp_locking_key))
+			spin_unlock(&ring->tx_lock);
+		if (result == IXGBE_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	case XDP_DROP:
+		result = IXGBE_XDP_CONSUMED;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+		result = IXGBE_XDP_CONSUMED;
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+	}
+	return result;
+}
+
+bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
+{
+	union ixgbe_adv_rx_desc *rx_desc;
+	struct ixgbe_rx_buffer *bi;
+	u16 i = rx_ring->next_to_use;
+	dma_addr_t dma;
+	bool ok = true;
+
+	/* nothing to do */
+	if (!count)
+		return true;
+
+	rx_desc = IXGBE_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	do {
+		bi->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
+		if (!bi->xdp) {
+			ok = false;
+			break;
+		}
+
+		dma = xsk_buff_xdp_get_dma(bi->xdp);
+
+		/* Refresh the desc even if buffer_addrs didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IXGBE_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		count--;
+	} while (count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		rx_ring->next_to_use = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+
+	return ok;
+}
+
+static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
+					      const struct xdp_buff *xdp)
+{
+	unsigned int totalsize = xdp->data_end - xdp->data_meta;
+	unsigned int metasize = xdp->data - xdp->data_meta;
+	struct sk_buff *skb;
+
+	net_prefetch(xdp->data_meta);
+
+	/* allocate a skb to store the frags */
+	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, totalsize,
+			       GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!skb))
+		return NULL;
+
+	memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+	       ALIGN(totalsize, sizeof(long)));
+
+	if (metasize) {
+		skb_metadata_set(skb, metasize);
+		__skb_pull(skb, metasize);
+	}
+
+	return skb;
+}
+
+static void ixgbe_inc_ntc(struct ixgbe_ring *rx_ring)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(IXGBE_RX_DESC(rx_ring, ntc));
+}
+
+int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
+			  struct ixgbe_ring *rx_ring,
+			  const int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	struct ixgbe_adapter *adapter = q_vector->adapter;
+	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+	unsigned int xdp_res, xdp_xmit = 0;
+	bool failure = false;
+	struct sk_buff *skb;
+
+	while (likely(total_rx_packets < budget)) {
+		union ixgbe_adv_rx_desc *rx_desc;
+		struct ixgbe_rx_buffer *bi;
+		unsigned int size;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
+			failure = failure ||
+				  !ixgbe_alloc_rx_buffers_zc(rx_ring,
+							     cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+
+		if (unlikely(!ixgbe_test_staterr(rx_desc,
+						 IXGBE_RXD_STAT_EOP))) {
+			struct ixgbe_rx_buffer *next_bi;
+
+			xsk_buff_free(bi->xdp);
+			bi->xdp = NULL;
+			ixgbe_inc_ntc(rx_ring);
+			next_bi =
+			       &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+			next_bi->discard = true;
+			continue;
+		}
+
+		if (unlikely(bi->discard)) {
+			xsk_buff_free(bi->xdp);
+			bi->xdp = NULL;
+			bi->discard = false;
+			ixgbe_inc_ntc(rx_ring);
+			continue;
+		}
+
+		bi->xdp->data_end = bi->xdp->data + size;
+		xsk_buff_dma_sync_for_cpu(bi->xdp, rx_ring->xsk_pool);
+		xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
+
+		if (likely(xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))) {
+			xdp_xmit |= xdp_res;
+		} else if (xdp_res == IXGBE_XDP_EXIT) {
+			failure = true;
+			break;
+		} else if (xdp_res == IXGBE_XDP_CONSUMED) {
+			xsk_buff_free(bi->xdp);
+		} else if (xdp_res == IXGBE_XDP_PASS) {
+			goto construct_skb;
+		}
+
+		bi->xdp = NULL;
+		total_rx_packets++;
+		total_rx_bytes += size;
+
+		cleaned_count++;
+		ixgbe_inc_ntc(rx_ring);
+		continue;
+
+construct_skb:
+		/* XDP_PASS path */
+		skb = ixgbe_construct_skb_zc(rx_ring, bi->xdp);
+		if (!skb) {
+			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			break;
+		}
+
+		xsk_buff_free(bi->xdp);
+		bi->xdp = NULL;
+
+		cleaned_count++;
+		ixgbe_inc_ntc(rx_ring);
+
+		if (eth_skb_pad(skb))
+			continue;
+
+		total_rx_bytes += skb->len;
+		total_rx_packets++;
+
+		ixgbe_process_skb_fields(rx_ring, rx_desc, skb);
+		ixgbe_rx_skb(q_vector, skb);
+	}
+
+	if (xdp_xmit & IXGBE_XDP_REDIR)
+		xdp_do_flush_map();
+
+	if (xdp_xmit & IXGBE_XDP_TX) {
+		struct ixgbe_ring *ring = ixgbe_determine_xdp_ring(adapter);
+
+		ixgbe_xdp_ring_update_tail_locked(ring);
+	}
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	q_vector->rx.total_packets += total_rx_packets;
+	q_vector->rx.total_bytes += total_rx_bytes;
+
+	if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
+		if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+			xsk_set_rx_need_wakeup(rx_ring->xsk_pool);
+		else
+			xsk_clear_rx_need_wakeup(rx_ring->xsk_pool);
+
+		return (int)total_rx_packets;
+	}
+	return failure ? budget : (int)total_rx_packets;
+}
+
+void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
+{
+	struct ixgbe_rx_buffer *bi;
+	u16 i;
+
+	for (i = 0; i < rx_ring->count; i++) {
+		bi = &rx_ring->rx_buffer_info[i];
+
+		if (!bi->xdp)
+			continue;
+
+		xsk_buff_free(bi->xdp);
+		bi->xdp = NULL;
+	}
+}
+
+static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
+{
+	struct xsk_buff_pool *pool = xdp_ring->xsk_pool;
+	union ixgbe_adv_tx_desc *tx_desc = NULL;
+	struct ixgbe_tx_buffer *tx_bi;
+	bool work_done = true;
+	struct xdp_desc desc;
+	dma_addr_t dma;
+	u32 cmd_type;
+
+	while (budget-- > 0) {
+		if (unlikely(!ixgbe_desc_unused(xdp_ring))) {
+			work_done = false;
+			break;
+		}
+
+		if (!netif_carrier_ok(xdp_ring->netdev))
+			break;
+
+		if (!xsk_tx_peek_desc(pool, &desc))
+			break;
+
+		dma = xsk_buff_raw_get_dma(pool, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(pool, dma, desc.len);
+
+		tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
+		tx_bi->bytecount = desc.len;
+		tx_bi->xdpf = NULL;
+		tx_bi->gso_segs = 1;
+
+		tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		/* put descriptor type bits */
+		cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+			   IXGBE_ADVTXD_DCMD_DEXT |
+			   IXGBE_ADVTXD_DCMD_IFCS;
+		cmd_type |= desc.len | IXGBE_TXD_CMD;
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+		tx_desc->read.olinfo_status =
+			cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+		xdp_ring->next_to_use++;
+		if (xdp_ring->next_to_use == xdp_ring->count)
+			xdp_ring->next_to_use = 0;
+	}
+
+	if (tx_desc) {
+		ixgbe_xdp_ring_update_tail(xdp_ring);
+		xsk_tx_release(pool);
+	}
+
+	return !!budget && work_done;
+}
+
+static void ixgbe_clean_xdp_tx_buffer(struct ixgbe_ring *tx_ring,
+				      struct ixgbe_tx_buffer *tx_bi)
+{
+	xdp_return_frame(tx_bi->xdpf);
+	dma_unmap_single(tx_ring->dev,
+			 dma_unmap_addr(tx_bi, dma),
+			 dma_unmap_len(tx_bi, len), DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_bi, len, 0);
+}
+
+bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
+			    struct ixgbe_ring *tx_ring, int napi_budget)
+{
+	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+	unsigned int total_packets = 0, total_bytes = 0;
+	struct xsk_buff_pool *pool = tx_ring->xsk_pool;
+	union ixgbe_adv_tx_desc *tx_desc;
+	struct ixgbe_tx_buffer *tx_bi;
+	u32 xsk_frames = 0;
+
+	tx_bi = &tx_ring->tx_buffer_info[ntc];
+	tx_desc = IXGBE_TX_DESC(tx_ring, ntc);
+
+	while (ntc != ntu) {
+		if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+			break;
+
+		total_bytes += tx_bi->bytecount;
+		total_packets += tx_bi->gso_segs;
+
+		if (tx_bi->xdpf)
+			ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
+		else
+			xsk_frames++;
+
+		tx_bi->xdpf = NULL;
+
+		tx_bi++;
+		tx_desc++;
+		ntc++;
+		if (unlikely(ntc == tx_ring->count)) {
+			ntc = 0;
+			tx_bi = tx_ring->tx_buffer_info;
+			tx_desc = IXGBE_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+	}
+
+	tx_ring->next_to_clean = ntc;
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+	if (xsk_frames)
+		xsk_tx_completed(pool, xsk_frames);
+
+	if (xsk_uses_need_wakeup(pool))
+		xsk_set_tx_need_wakeup(pool);
+
+	return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
+}
+
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ring *ring;
+
+	if (test_bit(__IXGBE_DOWN, &adapter->state))
+		return -ENETDOWN;
+
+	if (!READ_ONCE(adapter->xdp_prog))
+		return -EINVAL;
+
+	if (qid >= adapter->num_xdp_queues)
+		return -EINVAL;
+
+	ring = adapter->xdp_ring[qid];
+
+	if (test_bit(__IXGBE_TX_DISABLED, &ring->state))
+		return -ENETDOWN;
+
+	if (!ring->xsk_pool)
+		return -EINVAL;
+
+	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+		u64 eics = BIT_ULL(ring->q_vector->v_idx);
+
+		ixgbe_irq_rearm_queues(adapter, eics);
+	}
+
+	return 0;
+}
+
+void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring)
+{
+	u16 ntc = tx_ring->next_to_clean, ntu = tx_ring->next_to_use;
+	struct xsk_buff_pool *pool = tx_ring->xsk_pool;
+	struct ixgbe_tx_buffer *tx_bi;
+	u32 xsk_frames = 0;
+
+	while (ntc != ntu) {
+		tx_bi = &tx_ring->tx_buffer_info[ntc];
+
+		if (tx_bi->xdpf)
+			ixgbe_clean_xdp_tx_buffer(tx_ring, tx_bi);
+		else
+			xsk_frames++;
+
+		tx_bi->xdpf = NULL;
+
+		ntc++;
+		if (ntc == tx_ring->count)
+			ntc = 0;
+	}
+
+	if (xsk_frames)
+		xsk_tx_completed(pool, xsk_frames);
+}
diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
new file mode 100644
index 0000000000..186a4bb24f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 1999 - 2018 Intel Corporation.
+#
+# Makefile for the Intel(R) 82599 VF ethernet driver
+#
+
+obj-$(CONFIG_IXGBEVF) += ixgbevf.o
+
+ixgbevf-objs := vf.o \
+                mbx.o \
+                ethtool.o \
+                ixgbevf_main.o
+ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o
+
diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
new file mode 100644
index 0000000000..5f08779c0e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBEVF_DEFINES_H_
+#define _IXGBEVF_DEFINES_H_
+
+/* Device IDs */
+#define IXGBE_DEV_ID_82599_VF		0x10ED
+#define IXGBE_DEV_ID_X540_VF		0x1515
+#define IXGBE_DEV_ID_X550_VF		0x1565
+#define IXGBE_DEV_ID_X550EM_X_VF	0x15A8
+#define IXGBE_DEV_ID_X550EM_A_VF	0x15C5
+
+#define IXGBE_DEV_ID_82599_VF_HV	0x152E
+#define IXGBE_DEV_ID_X540_VF_HV		0x1530
+#define IXGBE_DEV_ID_X550_VF_HV		0x1564
+#define IXGBE_DEV_ID_X550EM_X_VF_HV	0x15A9
+
+#define IXGBE_VF_IRQ_CLEAR_MASK		7
+#define IXGBE_VF_MAX_TX_QUEUES		8
+#define IXGBE_VF_MAX_RX_QUEUES		8
+
+/* DCB define */
+#define IXGBE_VF_MAX_TRAFFIC_CLASS	8
+
+/* Link speed */
+typedef u32 ixgbe_link_speed;
+#define IXGBE_LINK_SPEED_1GB_FULL	0x0020
+#define IXGBE_LINK_SPEED_10GB_FULL	0x0080
+#define IXGBE_LINK_SPEED_100_FULL	0x0008
+
+#define IXGBE_CTRL_RST		0x04000000 /* Reset (SW) */
+#define IXGBE_RXDCTL_ENABLE	0x02000000 /* Enable specific Rx Queue */
+#define IXGBE_TXDCTL_ENABLE	0x02000000 /* Enable specific Tx Queue */
+#define IXGBE_LINKS_UP		0x40000000
+#define IXGBE_LINKS_SPEED_82599		0x30000000
+#define IXGBE_LINKS_SPEED_10G_82599	0x30000000
+#define IXGBE_LINKS_SPEED_1G_82599	0x20000000
+#define IXGBE_LINKS_SPEED_100_82599	0x10000000
+
+/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
+#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE	8
+#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE	8
+#define IXGBE_REQ_TX_BUFFER_GRANULARITY		1024
+
+/* Interrupt Vector Allocation Registers */
+#define IXGBE_IVAR_ALLOC_VAL	0x80 /* Interrupt Allocation valid */
+
+#define IXGBE_VF_INIT_TIMEOUT	200 /* Number of retries to clear RSTI */
+
+/* Receive Config masks */
+#define IXGBE_RXCTRL_RXEN	0x00000001  /* Enable Receiver */
+#define IXGBE_RXCTRL_DMBYPS	0x00000002  /* Descriptor Monitor Bypass */
+#define IXGBE_RXDCTL_ENABLE	0x02000000  /* Enable specific Rx Queue */
+#define IXGBE_RXDCTL_VME	0x40000000  /* VLAN mode enable */
+#define IXGBE_RXDCTL_RLPMLMASK	0x00003FFF  /* Only supported on the X540 */
+#define IXGBE_RXDCTL_RLPML_EN	0x00008000
+
+/* DCA Control */
+#define IXGBE_DCA_TXCTRL_TX_WB_RO_EN BIT(11) /* Tx Desc writeback RO bit */
+
+/* PSRTYPE bit definitions */
+#define IXGBE_PSRTYPE_TCPHDR	0x00000010
+#define IXGBE_PSRTYPE_UDPHDR	0x00000020
+#define IXGBE_PSRTYPE_IPV4HDR	0x00000100
+#define IXGBE_PSRTYPE_IPV6HDR	0x00000200
+#define IXGBE_PSRTYPE_L2HDR	0x00001000
+
+/* SRRCTL bit definitions */
+#define IXGBE_SRRCTL_BSIZEPKT_SHIFT	10     /* so many KBs */
+#define IXGBE_SRRCTL_RDMTS_SHIFT	22
+#define IXGBE_SRRCTL_RDMTS_MASK		0x01C00000
+#define IXGBE_SRRCTL_DROP_EN		0x10000000
+#define IXGBE_SRRCTL_BSIZEPKT_MASK	0x0000007F
+#define IXGBE_SRRCTL_BSIZEHDR_MASK	0x00003F00
+#define IXGBE_SRRCTL_DESCTYPE_LEGACY	0x00000000
+#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT	0x04000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000
+#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
+#define IXGBE_SRRCTL_DESCTYPE_MASK	0x0E000000
+
+/* Receive Descriptor bit definitions */
+#define IXGBE_RXD_STAT_DD	0x01    /* Descriptor Done */
+#define IXGBE_RXD_STAT_EOP	0x02    /* End of Packet */
+#define IXGBE_RXD_STAT_FLM	0x04    /* FDir Match */
+#define IXGBE_RXD_STAT_VP	0x08    /* IEEE VLAN Packet */
+#define IXGBE_RXDADV_NEXTP_MASK	0x000FFFF0 /* Next Descriptor Index */
+#define IXGBE_RXDADV_NEXTP_SHIFT	0x00000004
+#define IXGBE_RXD_STAT_UDPCS	0x10    /* UDP xsum calculated */
+#define IXGBE_RXD_STAT_L4CS	0x20    /* L4 xsum calculated */
+#define IXGBE_RXD_STAT_IPCS	0x40    /* IP xsum calculated */
+#define IXGBE_RXD_STAT_PIF	0x80    /* passed in-exact filter */
+#define IXGBE_RXD_STAT_CRCV	0x100   /* Speculative CRC Valid */
+#define IXGBE_RXD_STAT_VEXT	0x200   /* 1st VLAN found */
+#define IXGBE_RXD_STAT_UDPV	0x400   /* Valid UDP checksum */
+#define IXGBE_RXD_STAT_DYNINT	0x800   /* Pkt caused INT via DYNINT */
+#define IXGBE_RXD_STAT_TS	0x10000 /* Time Stamp */
+#define IXGBE_RXD_STAT_SECP	0x20000 /* Security Processing */
+#define IXGBE_RXD_STAT_LB	0x40000 /* Loopback Status */
+#define IXGBE_RXD_STAT_ACK	0x8000  /* ACK Packet indication */
+#define IXGBE_RXD_ERR_CE	0x01    /* CRC Error */
+#define IXGBE_RXD_ERR_LE	0x02    /* Length Error */
+#define IXGBE_RXD_ERR_PE	0x08    /* Packet Error */
+#define IXGBE_RXD_ERR_OSE	0x10    /* Oversize Error */
+#define IXGBE_RXD_ERR_USE	0x20    /* Undersize Error */
+#define IXGBE_RXD_ERR_TCPE	0x40    /* TCP/UDP Checksum Error */
+#define IXGBE_RXD_ERR_IPE	0x80    /* IP Checksum Error */
+#define IXGBE_RXDADV_ERR_MASK	0xFFF00000 /* RDESC.ERRORS mask */
+#define IXGBE_RXDADV_ERR_SHIFT	20         /* RDESC.ERRORS shift */
+#define IXGBE_RXDADV_ERR_HBO	0x00800000 /*Header Buffer Overflow */
+#define IXGBE_RXDADV_ERR_CE	0x01000000 /* CRC Error */
+#define IXGBE_RXDADV_ERR_LE	0x02000000 /* Length Error */
+#define IXGBE_RXDADV_ERR_PE	0x08000000 /* Packet Error */
+#define IXGBE_RXDADV_ERR_OSE	0x10000000 /* Oversize Error */
+#define IXGBE_RXDADV_ERR_USE	0x20000000 /* Undersize Error */
+#define IXGBE_RXDADV_ERR_TCPE	0x40000000 /* TCP/UDP Checksum Error */
+#define IXGBE_RXDADV_ERR_IPE	0x80000000 /* IP Checksum Error */
+#define IXGBE_RXD_VLAN_ID_MASK	0x0FFF  /* VLAN ID is in lower 12 bits */
+#define IXGBE_RXD_PRI_MASK	0xE000  /* Priority is in upper 3 bits */
+#define IXGBE_RXD_PRI_SHIFT	13
+#define IXGBE_RXD_CFI_MASK	0x1000  /* CFI is bit 12 */
+#define IXGBE_RXD_CFI_SHIFT	12
+
+#define IXGBE_RXDADV_STAT_DD		IXGBE_RXD_STAT_DD  /* Done */
+#define IXGBE_RXDADV_STAT_EOP		IXGBE_RXD_STAT_EOP /* End of Packet */
+#define IXGBE_RXDADV_STAT_FLM		IXGBE_RXD_STAT_FLM /* FDir Match */
+#define IXGBE_RXDADV_STAT_VP		IXGBE_RXD_STAT_VP  /* IEEE VLAN Pkt */
+#define IXGBE_RXDADV_STAT_MASK		0x000FFFFF /* Stat/NEXTP: bit 0-19 */
+#define IXGBE_RXDADV_STAT_FCEOFS	0x00000040 /* FCoE EOF/SOF Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT	0x00000030 /* FCoE Pkt Stat */
+#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH	0x00000000 /* 00: No Ctxt Match */
+#define IXGBE_RXDADV_STAT_FCSTAT_NODDP	0x00000010 /* 01: Ctxt w/o DDP */
+#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP	0x00000020 /* 10: Recv. FCP_RSP */
+#define IXGBE_RXDADV_STAT_FCSTAT_DDP	0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_SECP		0x00020000 /* IPsec/MACsec pkt found */
+
+#define IXGBE_RXDADV_RSSTYPE_MASK	0x0000000F
+#define IXGBE_RXDADV_PKTTYPE_MASK	0x0000FFF0
+#define IXGBE_RXDADV_PKTTYPE_IPV4	0x00000010 /* IPv4 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV6	0x00000040 /* IPv6 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP	0x00001000 /* IPSec ESP */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH	0x00002000 /* IPSec AH */
+#define IXGBE_RXDADV_PKTTYPE_MASK_EX	0x0001FFF0
+#define IXGBE_RXDADV_HDRBUFLEN_MASK	0x00007FE0
+#define IXGBE_RXDADV_RSCCNT_MASK	0x001E0000
+#define IXGBE_RXDADV_RSCCNT_SHIFT	17
+#define IXGBE_RXDADV_HDRBUFLEN_SHIFT	5
+#define IXGBE_RXDADV_SPLITHEADER_EN	0x00001000
+#define IXGBE_RXDADV_SPH		0x8000
+
+/* RSS Hash results */
+#define IXGBE_RXDADV_RSSTYPE_NONE		0x00000000
+#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP		0x00000001
+#define IXGBE_RXDADV_RSSTYPE_IPV4		0x00000002
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP		0x00000003
+#define IXGBE_RXDADV_RSSTYPE_IPV6_EX		0x00000004
+#define IXGBE_RXDADV_RSSTYPE_IPV6		0x00000005
+#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX	0x00000006
+#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP		0x00000007
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP		0x00000008
+#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX	0x00000009
+
+#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
+				      IXGBE_RXD_ERR_CE |  \
+				      IXGBE_RXD_ERR_LE |  \
+				      IXGBE_RXD_ERR_PE |  \
+				      IXGBE_RXD_ERR_OSE | \
+				      IXGBE_RXD_ERR_USE)
+
+#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \
+					 IXGBE_RXDADV_ERR_CE |  \
+					 IXGBE_RXDADV_ERR_LE |  \
+					 IXGBE_RXDADV_ERR_PE |  \
+					 IXGBE_RXDADV_ERR_OSE | \
+					 IXGBE_RXDADV_ERR_USE)
+
+#define IXGBE_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
+#define IXGBE_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
+#define IXGBE_TXD_CMD_EOP	0x01000000 /* End of Packet */
+#define IXGBE_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
+#define IXGBE_TXD_CMD_IC	0x04000000 /* Insert Checksum */
+#define IXGBE_TXD_CMD_RS	0x08000000 /* Report Status */
+#define IXGBE_TXD_CMD_DEXT	0x20000000 /* Descriptor ext (0 = legacy) */
+#define IXGBE_TXD_CMD_VLE	0x40000000 /* Add VLAN tag */
+#define IXGBE_TXD_STAT_DD	0x00000001 /* Descriptor Done */
+#define IXGBE_TXD_CMD		(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS)
+
+/* Transmit Descriptor - Advanced */
+union ixgbe_adv_tx_desc {
+	struct {
+		__le64 buffer_addr;      /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd;       /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Receive Descriptor - Advanced */
+union ixgbe_adv_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /* RSS, Pkt type */
+					__le16 hdr_info; /* Splithdr, hdrlen */
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+/* Context descriptors */
+struct ixgbe_adv_tx_context_desc {
+	__le32 vlan_macip_lens;
+	__le32 fceof_saidx;
+	__le32 type_tucmd_mlhl;
+	__le32 mss_l4len_idx;
+};
+
+/* Adv Transmit Descriptor Config Masks */
+#define IXGBE_ADVTXD_DTYP_MASK	0x00F00000 /* DTYP mask */
+#define IXGBE_ADVTXD_DTYP_CTXT	0x00200000 /* Advanced Context Desc */
+#define IXGBE_ADVTXD_DTYP_DATA	0x00300000 /* Advanced Data Descriptor */
+#define IXGBE_ADVTXD_DCMD_EOP	IXGBE_TXD_CMD_EOP  /* End of Packet */
+#define IXGBE_ADVTXD_DCMD_IFCS	IXGBE_TXD_CMD_IFCS /* Insert FCS */
+#define IXGBE_ADVTXD_DCMD_RS	IXGBE_TXD_CMD_RS   /* Report Status */
+#define IXGBE_ADVTXD_DCMD_DEXT	IXGBE_TXD_CMD_DEXT /* Desc ext (1=Adv) */
+#define IXGBE_ADVTXD_DCMD_VLE	IXGBE_TXD_CMD_VLE  /* VLAN pkt enable */
+#define IXGBE_ADVTXD_DCMD_TSE	0x80000000 /* TCP Seg enable */
+#define IXGBE_ADVTXD_STAT_DD	IXGBE_TXD_STAT_DD  /* Descriptor Done */
+#define IXGBE_ADVTXD_TUCMD_IPV4	0x00000400  /* IP Packet Type: 1=IPv4 */
+#define IXGBE_ADVTXD_TUCMD_IPV6	0x00000000  /* IP Packet Type: 0=IPv6 */
+#define IXGBE_ADVTXD_TUCMD_L4T_UDP	0x00000000  /* L4 Packet TYPE of UDP */
+#define IXGBE_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet TYPE of TCP */
+#define IXGBE_ADVTXD_TUCMD_L4T_SCTP	0x00001000  /* L4 Packet TYPE of SCTP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP   0x00002000 /* IPSec Type ESP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 /* ESP Encrypt Enable */
+#define IXGBE_ADVTXD_IDX_SHIFT	4 /* Adv desc Index shift */
+#define IXGBE_ADVTXD_CC		0x00000080 /* Check Context */
+#define IXGBE_ADVTXD_POPTS_SHIFT	8  /* Adv desc POPTS shift */
+#define IXGBE_ADVTXD_POPTS_IPSEC	0x00000400 /* IPSec offload request */
+#define IXGBE_ADVTXD_POPTS_IXSM	(IXGBE_TXD_POPTS_IXSM << \
+				 IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_TXSM	(IXGBE_TXD_POPTS_TXSM << \
+				 IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_PAYLEN_SHIFT	14 /* Adv desc PAYLEN shift */
+#define IXGBE_ADVTXD_MACLEN_SHIFT	9  /* Adv ctxt desc mac len shift */
+#define IXGBE_ADVTXD_VLAN_SHIFT		16 /* Adv ctxt vlan tag shift */
+#define IXGBE_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
+#define IXGBE_ADVTXD_MSS_SHIFT		16 /* Adv ctxt MSS shift */
+
+/* Interrupt register bitmasks */
+
+#define IXGBE_EITR_CNT_WDIS	0x80000000
+#define IXGBE_MAX_EITR		0x00000FF8
+#define IXGBE_MIN_EITR		8
+
+/* Error Codes */
+#define IXGBE_ERR_INVALID_MAC_ADDR	-1
+#define IXGBE_ERR_RESET_FAILED		-2
+#define IXGBE_ERR_INVALID_ARGUMENT	-3
+#define IXGBE_ERR_CONFIG		-4
+#define IXGBE_ERR_MBX			-5
+#define IXGBE_ERR_TIMEOUT		-6
+#define IXGBE_ERR_PARAM			-7
+
+/* Transmit Config masks */
+#define IXGBE_TXDCTL_ENABLE		0x02000000 /* Ena specific Tx Queue */
+#define IXGBE_TXDCTL_SWFLSH		0x04000000 /* Tx Desc. wr-bk flushing */
+#define IXGBE_TXDCTL_WTHRESH_SHIFT	16	   /* shift to WTHRESH bits */
+
+#define IXGBE_DCA_RXCTRL_DESC_DCA_EN	BIT(5)  /* Rx Desc enable */
+#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN	BIT(6)  /* Rx Desc header ena */
+#define IXGBE_DCA_RXCTRL_DATA_DCA_EN	BIT(7)  /* Rx Desc payload ena */
+#define IXGBE_DCA_RXCTRL_DESC_RRO_EN	BIT(9)  /* Rx rd Desc Relax Order */
+#define IXGBE_DCA_RXCTRL_DATA_WRO_EN	BIT(13) /* Rx wr data Relax Order */
+#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN	BIT(15) /* Rx wr header RO */
+
+#define IXGBE_DCA_TXCTRL_DESC_DCA_EN	BIT(5)  /* DCA Tx Desc enable */
+#define IXGBE_DCA_TXCTRL_DESC_RRO_EN	BIT(9)  /* Tx rd Desc Relax Order */
+#define IXGBE_DCA_TXCTRL_DESC_WRO_EN	BIT(11) /* Tx Desc writeback RO bit */
+#define IXGBE_DCA_TXCTRL_DATA_RRO_EN	BIT(13) /* Tx rd data Relax Order */
+
+#endif /* _IXGBEVF_DEFINES_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
new file mode 100644
index 0000000000..296915414a
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -0,0 +1,1001 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/* ethtool support for ixgbevf */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/vmalloc.h>
+#include <linux/if_vlan.h>
+#include <linux/uaccess.h>
+
+#include "ixgbevf.h"
+
+enum {NETDEV_STATS, IXGBEVF_STATS};
+
+struct ixgbe_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int type;
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define IXGBEVF_STAT(_name, _stat) { \
+	.stat_string = _name, \
+	.type = IXGBEVF_STATS, \
+	.sizeof_stat = sizeof_field(struct ixgbevf_adapter, _stat), \
+	.stat_offset = offsetof(struct ixgbevf_adapter, _stat) \
+}
+
+#define IXGBEVF_NETDEV_STAT(_net_stat) { \
+	.stat_string = #_net_stat, \
+	.type = NETDEV_STATS, \
+	.sizeof_stat = sizeof_field(struct net_device_stats, _net_stat), \
+	.stat_offset = offsetof(struct net_device_stats, _net_stat) \
+}
+
+static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
+	IXGBEVF_NETDEV_STAT(rx_packets),
+	IXGBEVF_NETDEV_STAT(tx_packets),
+	IXGBEVF_NETDEV_STAT(rx_bytes),
+	IXGBEVF_NETDEV_STAT(tx_bytes),
+	IXGBEVF_STAT("tx_busy", tx_busy),
+	IXGBEVF_STAT("tx_restart_queue", restart_queue),
+	IXGBEVF_STAT("tx_timeout_count", tx_timeout_count),
+	IXGBEVF_NETDEV_STAT(multicast),
+	IXGBEVF_STAT("rx_csum_offload_errors", hw_csum_rx_error),
+	IXGBEVF_STAT("alloc_rx_page", alloc_rx_page),
+	IXGBEVF_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
+	IXGBEVF_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
+	IXGBEVF_STAT("tx_ipsec", tx_ipsec),
+	IXGBEVF_STAT("rx_ipsec", rx_ipsec),
+};
+
+#define IXGBEVF_QUEUE_STATS_LEN ( \
+	(((struct ixgbevf_adapter *)netdev_priv(netdev))->num_tx_queues + \
+	 ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_xdp_queues + \
+	 ((struct ixgbevf_adapter *)netdev_priv(netdev))->num_rx_queues) * \
+	 (sizeof(struct ixgbevf_stats) / sizeof(u64)))
+#define IXGBEVF_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbevf_gstrings_stats)
+
+#define IXGBEVF_STATS_LEN (IXGBEVF_GLOBAL_STATS_LEN + IXGBEVF_QUEUE_STATS_LEN)
+static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
+	"Register test  (offline)",
+	"Link test   (on/offline)"
+};
+
+#define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
+
+static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = {
+#define IXGBEVF_PRIV_FLAGS_LEGACY_RX	BIT(0)
+	"legacy-rx",
+};
+
+#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings)
+
+static int ixgbevf_get_link_ksettings(struct net_device *netdev,
+				      struct ethtool_link_ksettings *cmd)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full);
+	cmd->base.autoneg = AUTONEG_DISABLE;
+	cmd->base.port = -1;
+
+	if (adapter->link_up) {
+		__u32 speed = SPEED_10000;
+
+		switch (adapter->link_speed) {
+		case IXGBE_LINK_SPEED_10GB_FULL:
+			speed = SPEED_10000;
+			break;
+		case IXGBE_LINK_SPEED_1GB_FULL:
+			speed = SPEED_1000;
+			break;
+		case IXGBE_LINK_SPEED_100_FULL:
+			speed = SPEED_100;
+			break;
+		}
+
+		cmd->base.speed = speed;
+		cmd->base.duplex = DUPLEX_FULL;
+	} else {
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+	}
+
+	return 0;
+}
+
+static u32 ixgbevf_get_msglevel(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void ixgbevf_set_msglevel(struct net_device *netdev, u32 data)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = data;
+}
+
+static int ixgbevf_get_regs_len(struct net_device *netdev)
+{
+#define IXGBE_REGS_LEN 45
+	return IXGBE_REGS_LEN * sizeof(u32);
+}
+
+static void ixgbevf_get_regs(struct net_device *netdev,
+			     struct ethtool_regs *regs,
+			     void *p)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 *regs_buff = p;
+	u32 regs_len = ixgbevf_get_regs_len(netdev);
+	u8 i;
+
+	memset(p, 0, regs_len);
+
+	/* generate a number suitable for ethtool's register version */
+	regs->version = (1u << 24) | (hw->revision_id << 16) | hw->device_id;
+
+	/* General Registers */
+	regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_VFCTRL);
+	regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_VFSTATUS);
+	regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+	regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_VFRXMEMWRAP);
+	regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_VFFRTIMER);
+
+	/* Interrupt */
+	/* don't read EICR because it can clear interrupt causes, instead
+	 * read EICS which is a shadow but doesn't clear EICR
+	 */
+	regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_VTEICS);
+	regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_VTEICS);
+	regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_VTEIMS);
+	regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_VTEIMC);
+	regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_VTEIAC);
+	regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_VTEIAM);
+	regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_VTEITR(0));
+	regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_VTIVAR(0));
+	regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC);
+
+	/* Receive DMA */
+	for (i = 0; i < 2; i++)
+		regs_buff[14 + i] = IXGBE_READ_REG(hw, IXGBE_VFRDBAL(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[16 + i] = IXGBE_READ_REG(hw, IXGBE_VFRDBAH(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[18 + i] = IXGBE_READ_REG(hw, IXGBE_VFRDLEN(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[20 + i] = IXGBE_READ_REG(hw, IXGBE_VFRDH(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[22 + i] = IXGBE_READ_REG(hw, IXGBE_VFRDT(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[24 + i] = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[26 + i] = IXGBE_READ_REG(hw, IXGBE_VFSRRCTL(i));
+
+	/* Receive */
+	regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_VFPSRTYPE);
+
+	/* Transmit */
+	for (i = 0; i < 2; i++)
+		regs_buff[29 + i] = IXGBE_READ_REG(hw, IXGBE_VFTDBAL(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[31 + i] = IXGBE_READ_REG(hw, IXGBE_VFTDBAH(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[33 + i] = IXGBE_READ_REG(hw, IXGBE_VFTDLEN(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_VFTDH(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[37 + i] = IXGBE_READ_REG(hw, IXGBE_VFTDT(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[39 + i] = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[41 + i] = IXGBE_READ_REG(hw, IXGBE_VFTDWBAL(i));
+	for (i = 0; i < 2; i++)
+		regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_VFTDWBAH(i));
+}
+
+static void ixgbevf_get_drvinfo(struct net_device *netdev,
+				struct ethtool_drvinfo *drvinfo)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	strscpy(drvinfo->driver, ixgbevf_driver_name, sizeof(drvinfo->driver));
+	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
+		sizeof(drvinfo->bus_info));
+
+	drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN;
+}
+
+static void ixgbevf_get_ringparam(struct net_device *netdev,
+				  struct ethtool_ringparam *ring,
+				  struct kernel_ethtool_ringparam *kernel_ring,
+				  struct netlink_ext_ack *extack)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	ring->rx_max_pending = IXGBEVF_MAX_RXD;
+	ring->tx_max_pending = IXGBEVF_MAX_TXD;
+	ring->rx_pending = adapter->rx_ring_count;
+	ring->tx_pending = adapter->tx_ring_count;
+}
+
+static int ixgbevf_set_ringparam(struct net_device *netdev,
+				 struct ethtool_ringparam *ring,
+				 struct kernel_ethtool_ringparam *kernel_ring,
+				 struct netlink_ext_ack *extack)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbevf_ring *tx_ring = NULL, *rx_ring = NULL;
+	u32 new_rx_count, new_tx_count;
+	int i, j, err = 0;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_tx_count = max_t(u32, ring->tx_pending, IXGBEVF_MIN_TXD);
+	new_tx_count = min_t(u32, new_tx_count, IXGBEVF_MAX_TXD);
+	new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE);
+
+	new_rx_count = max_t(u32, ring->rx_pending, IXGBEVF_MIN_RXD);
+	new_rx_count = min_t(u32, new_rx_count, IXGBEVF_MAX_RXD);
+	new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE);
+
+	/* if nothing to do return success */
+	if ((new_tx_count == adapter->tx_ring_count) &&
+	    (new_rx_count == adapter->rx_ring_count))
+		return 0;
+
+	while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state))
+		usleep_range(1000, 2000);
+
+	if (!netif_running(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			adapter->xdp_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			adapter->rx_ring[i]->count = new_rx_count;
+		adapter->tx_ring_count = new_tx_count;
+		adapter->xdp_ring_count = new_tx_count;
+		adapter->rx_ring_count = new_rx_count;
+		goto clear_reset;
+	}
+
+	if (new_tx_count != adapter->tx_ring_count) {
+		tx_ring = vmalloc(array_size(sizeof(*tx_ring),
+					     adapter->num_tx_queues +
+						adapter->num_xdp_queues));
+		if (!tx_ring) {
+			err = -ENOMEM;
+			goto clear_reset;
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			/* clone ring and setup updated count */
+			tx_ring[i] = *adapter->tx_ring[i];
+			tx_ring[i].count = new_tx_count;
+			err = ixgbevf_setup_tx_resources(&tx_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbevf_free_tx_resources(&tx_ring[i]);
+				}
+
+				vfree(tx_ring);
+				tx_ring = NULL;
+
+				goto clear_reset;
+			}
+		}
+
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			/* clone ring and setup updated count */
+			tx_ring[i] = *adapter->xdp_ring[j];
+			tx_ring[i].count = new_tx_count;
+			err = ixgbevf_setup_tx_resources(&tx_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbevf_free_tx_resources(&tx_ring[i]);
+				}
+
+				vfree(tx_ring);
+				tx_ring = NULL;
+
+				goto clear_reset;
+			}
+		}
+	}
+
+	if (new_rx_count != adapter->rx_ring_count) {
+		rx_ring = vmalloc(array_size(sizeof(*rx_ring),
+					     adapter->num_rx_queues));
+		if (!rx_ring) {
+			err = -ENOMEM;
+			goto clear_reset;
+		}
+
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			/* clone ring and setup updated count */
+			rx_ring[i] = *adapter->rx_ring[i];
+
+			/* Clear copied XDP RX-queue info */
+			memset(&rx_ring[i].xdp_rxq, 0,
+			       sizeof(rx_ring[i].xdp_rxq));
+
+			rx_ring[i].count = new_rx_count;
+			err = ixgbevf_setup_rx_resources(adapter, &rx_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbevf_free_rx_resources(&rx_ring[i]);
+				}
+
+				vfree(rx_ring);
+				rx_ring = NULL;
+
+				goto clear_reset;
+			}
+		}
+	}
+
+	/* bring interface down to prepare for update */
+	ixgbevf_down(adapter);
+
+	/* Tx */
+	if (tx_ring) {
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+			*adapter->tx_ring[i] = tx_ring[i];
+		}
+		adapter->tx_ring_count = new_tx_count;
+
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+			*adapter->xdp_ring[j] = tx_ring[i];
+		}
+		adapter->xdp_ring_count = new_tx_count;
+
+		vfree(tx_ring);
+		tx_ring = NULL;
+	}
+
+	/* Rx */
+	if (rx_ring) {
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			ixgbevf_free_rx_resources(adapter->rx_ring[i]);
+			*adapter->rx_ring[i] = rx_ring[i];
+		}
+		adapter->rx_ring_count = new_rx_count;
+
+		vfree(rx_ring);
+		rx_ring = NULL;
+	}
+
+	/* restore interface using new values */
+	ixgbevf_up(adapter);
+
+clear_reset:
+	/* free Tx resources if Rx error is encountered */
+	if (tx_ring) {
+		for (i = 0;
+		     i < adapter->num_tx_queues + adapter->num_xdp_queues; i++)
+			ixgbevf_free_tx_resources(&tx_ring[i]);
+		vfree(tx_ring);
+	}
+
+	clear_bit(__IXGBEVF_RESETTING, &adapter->state);
+	return err;
+}
+
+static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset)
+{
+	switch (stringset) {
+	case ETH_SS_TEST:
+		return IXGBEVF_TEST_LEN;
+	case ETH_SS_STATS:
+		return IXGBEVF_STATS_LEN;
+	case ETH_SS_PRIV_FLAGS:
+		return IXGBEVF_PRIV_FLAGS_STR_LEN;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void ixgbevf_get_ethtool_stats(struct net_device *netdev,
+				      struct ethtool_stats *stats, u64 *data)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *net_stats;
+	unsigned int start;
+	struct ixgbevf_ring *ring;
+	int i, j;
+	char *p;
+
+	ixgbevf_update_stats(adapter);
+	net_stats = dev_get_stats(netdev, &temp);
+	for (i = 0; i < IXGBEVF_GLOBAL_STATS_LEN; i++) {
+		switch (ixgbevf_gstrings_stats[i].type) {
+		case NETDEV_STATS:
+			p = (char *)net_stats +
+					ixgbevf_gstrings_stats[i].stat_offset;
+			break;
+		case IXGBEVF_STATS:
+			p = (char *)adapter +
+					ixgbevf_gstrings_stats[i].stat_offset;
+			break;
+		default:
+			data[i] = 0;
+			continue;
+		}
+
+		data[i] = (ixgbevf_gstrings_stats[i].sizeof_stat ==
+			   sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+
+	/* populate Tx queue data */
+	for (j = 0; j < adapter->num_tx_queues; j++) {
+		ring = adapter->tx_ring[j];
+		if (!ring) {
+			data[i++] = 0;
+			data[i++] = 0;
+			continue;
+		}
+
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			data[i]   = ring->stats.packets;
+			data[i + 1] = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		i += 2;
+	}
+
+	/* populate XDP queue data */
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		ring = adapter->xdp_ring[j];
+		if (!ring) {
+			data[i++] = 0;
+			data[i++] = 0;
+			continue;
+		}
+
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			data[i] = ring->stats.packets;
+			data[i + 1] = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		i += 2;
+	}
+
+	/* populate Rx queue data */
+	for (j = 0; j < adapter->num_rx_queues; j++) {
+		ring = adapter->rx_ring[j];
+		if (!ring) {
+			data[i++] = 0;
+			data[i++] = 0;
+			continue;
+		}
+
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			data[i]   = ring->stats.packets;
+			data[i + 1] = ring->stats.bytes;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		i += 2;
+	}
+}
+
+static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset,
+				u8 *data)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	char *p = (char *)data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_TEST:
+		memcpy(data, *ixgbe_gstrings_test,
+		       IXGBEVF_TEST_LEN * ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_STATS:
+		for (i = 0; i < IXGBEVF_GLOBAL_STATS_LEN; i++) {
+			memcpy(p, ixgbevf_gstrings_stats[i].stat_string,
+			       ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+
+		for (i = 0; i < adapter->num_tx_queues; i++) {
+			sprintf(p, "tx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "tx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_xdp_queues; i++) {
+			sprintf(p, "xdp_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "xdp_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+		for (i = 0; i < adapter->num_rx_queues; i++) {
+			sprintf(p, "rx_queue_%u_packets", i);
+			p += ETH_GSTRING_LEN;
+			sprintf(p, "rx_queue_%u_bytes", i);
+			p += ETH_GSTRING_LEN;
+		}
+		break;
+	case ETH_SS_PRIV_FLAGS:
+		memcpy(data, ixgbevf_priv_flags_strings,
+		       IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		break;
+	}
+}
+
+static int ixgbevf_link_test(struct ixgbevf_adapter *adapter, u64 *data)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool link_up;
+	u32 link_speed = 0;
+	*data = 0;
+
+	hw->mac.ops.check_link(hw, &link_speed, &link_up, true);
+	if (!link_up)
+		*data = 1;
+
+	return *data;
+}
+
+/* ethtool register test data */
+struct ixgbevf_reg_test {
+	u16 reg;
+	u8  array_len;
+	u8  test_type;
+	u32 mask;
+	u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x40 bytes apart, or in contiguous tables.  We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST	1
+#define SET_READ_TEST	2
+#define WRITE_NO_TEST	3
+#define TABLE32_TEST	4
+#define TABLE64_TEST_LO	5
+#define TABLE64_TEST_HI	6
+
+/* default VF register test */
+static const struct ixgbevf_reg_test reg_test_vf[] = {
+	{ IXGBE_VFRDBAL(0), 2, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 },
+	{ IXGBE_VFRDBAH(0), 2, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_VFRDLEN(0), 2, PATTERN_TEST, 0x000FFF80, 0x000FFFFF },
+	{ IXGBE_VFRXDCTL(0), 2, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE },
+	{ IXGBE_VFRDT(0), 2, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF },
+	{ IXGBE_VFRXDCTL(0), 2, WRITE_NO_TEST, 0, 0 },
+	{ IXGBE_VFTDBAL(0), 2, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF },
+	{ IXGBE_VFTDBAH(0), 2, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF },
+	{ IXGBE_VFTDLEN(0), 2, PATTERN_TEST, 0x000FFF80, 0x000FFF80 },
+	{ .reg = 0 }
+};
+
+static const u32 register_test_patterns[] = {
+	0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+};
+
+static bool reg_pattern_test(struct ixgbevf_adapter *adapter, u64 *data,
+			     int reg, u32 mask, u32 write)
+{
+	u32 pat, val, before;
+
+	if (IXGBE_REMOVED(adapter->hw.hw_addr)) {
+		*data = 1;
+		return true;
+	}
+	for (pat = 0; pat < ARRAY_SIZE(register_test_patterns); pat++) {
+		before = ixgbevf_read_reg(&adapter->hw, reg);
+		ixgbe_write_reg(&adapter->hw, reg,
+				register_test_patterns[pat] & write);
+		val = ixgbevf_read_reg(&adapter->hw, reg);
+		if (val != (register_test_patterns[pat] & write & mask)) {
+			hw_dbg(&adapter->hw,
+			       "pattern test reg %04X failed: got 0x%08X expected 0x%08X\n",
+			       reg, val,
+			       register_test_patterns[pat] & write & mask);
+			*data = reg;
+			ixgbe_write_reg(&adapter->hw, reg, before);
+			return true;
+		}
+		ixgbe_write_reg(&adapter->hw, reg, before);
+	}
+	return false;
+}
+
+static bool reg_set_and_check(struct ixgbevf_adapter *adapter, u64 *data,
+			      int reg, u32 mask, u32 write)
+{
+	u32 val, before;
+
+	if (IXGBE_REMOVED(adapter->hw.hw_addr)) {
+		*data = 1;
+		return true;
+	}
+	before = ixgbevf_read_reg(&adapter->hw, reg);
+	ixgbe_write_reg(&adapter->hw, reg, write & mask);
+	val = ixgbevf_read_reg(&adapter->hw, reg);
+	if ((write & mask) != (val & mask)) {
+		pr_err("set/check reg %04X test failed: got 0x%08X expected 0x%08X\n",
+		       reg, (val & mask), write & mask);
+		*data = reg;
+		ixgbe_write_reg(&adapter->hw, reg, before);
+		return true;
+	}
+	ixgbe_write_reg(&adapter->hw, reg, before);
+	return false;
+}
+
+static int ixgbevf_reg_test(struct ixgbevf_adapter *adapter, u64 *data)
+{
+	const struct ixgbevf_reg_test *test;
+	u32 i;
+
+	if (IXGBE_REMOVED(adapter->hw.hw_addr)) {
+		dev_err(&adapter->pdev->dev,
+			"Adapter removed - register test blocked\n");
+		*data = 1;
+		return 1;
+	}
+	test = reg_test_vf;
+
+	/* Perform the register test, looping through the test table
+	 * until we either fail or reach the null entry.
+	 */
+	while (test->reg) {
+		for (i = 0; i < test->array_len; i++) {
+			bool b = false;
+
+			switch (test->test_type) {
+			case PATTERN_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 0x40),
+						     test->mask,
+						     test->write);
+				break;
+			case SET_READ_TEST:
+				b = reg_set_and_check(adapter, data,
+						      test->reg + (i * 0x40),
+						      test->mask,
+						      test->write);
+				break;
+			case WRITE_NO_TEST:
+				ixgbe_write_reg(&adapter->hw,
+						test->reg + (i * 0x40),
+						test->write);
+				break;
+			case TABLE32_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 4),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE64_TEST_LO:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE64_TEST_HI:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + 4 + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			}
+			if (b)
+				return 1;
+		}
+		test++;
+	}
+
+	*data = 0;
+	return *data;
+}
+
+static void ixgbevf_diag_test(struct net_device *netdev,
+			      struct ethtool_test *eth_test, u64 *data)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+
+	if (IXGBE_REMOVED(adapter->hw.hw_addr)) {
+		dev_err(&adapter->pdev->dev,
+			"Adapter removed - test blocked\n");
+		data[0] = 1;
+		data[1] = 1;
+		eth_test->flags |= ETH_TEST_FL_FAILED;
+		return;
+	}
+	set_bit(__IXGBEVF_TESTING, &adapter->state);
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		/* Offline tests */
+
+		hw_dbg(&adapter->hw, "offline testing starting\n");
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result
+		 */
+		if (ixgbevf_link_test(adapter, &data[1]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (if_running)
+			/* indicate we're in test mode */
+			ixgbevf_close(netdev);
+		else
+			ixgbevf_reset(adapter);
+
+		hw_dbg(&adapter->hw, "register testing starting\n");
+		if (ixgbevf_reg_test(adapter, &data[0]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		ixgbevf_reset(adapter);
+
+		clear_bit(__IXGBEVF_TESTING, &adapter->state);
+		if (if_running)
+			ixgbevf_open(netdev);
+	} else {
+		hw_dbg(&adapter->hw, "online testing starting\n");
+		/* Online tests */
+		if (ixgbevf_link_test(adapter, &data[1]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		/* Online tests aren't run; pass by default */
+		data[0] = 0;
+
+		clear_bit(__IXGBEVF_TESTING, &adapter->state);
+	}
+	msleep_interruptible(4 * 1000);
+}
+
+static int ixgbevf_nway_reset(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ixgbevf_reinit_locked(adapter);
+
+	return 0;
+}
+
+static int ixgbevf_get_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec,
+				struct kernel_ethtool_coalesce *kernel_coal,
+				struct netlink_ext_ack *extack)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	/* only valid if in constant ITR mode */
+	if (adapter->rx_itr_setting <= 1)
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting;
+	else
+		ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2;
+
+	/* if in mixed Tx/Rx queues per vector mode, report only Rx settings */
+	if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count)
+		return 0;
+
+	/* only valid if in constant ITR mode */
+	if (adapter->tx_itr_setting <= 1)
+		ec->tx_coalesce_usecs = adapter->tx_itr_setting;
+	else
+		ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2;
+
+	return 0;
+}
+
+static int ixgbevf_set_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec,
+				struct kernel_ethtool_coalesce *kernel_coal,
+				struct netlink_ext_ack *extack)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbevf_q_vector *q_vector;
+	int num_vectors, i;
+	u16 tx_itr_param, rx_itr_param;
+
+	/* don't accept Tx specific changes if we've got mixed RxTx vectors */
+	if (adapter->q_vector[0]->tx.count &&
+	    adapter->q_vector[0]->rx.count && ec->tx_coalesce_usecs)
+		return -EINVAL;
+
+	if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) ||
+	    (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)))
+		return -EINVAL;
+
+	if (ec->rx_coalesce_usecs > 1)
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2;
+	else
+		adapter->rx_itr_setting = ec->rx_coalesce_usecs;
+
+	if (adapter->rx_itr_setting == 1)
+		rx_itr_param = IXGBE_20K_ITR;
+	else
+		rx_itr_param = adapter->rx_itr_setting;
+
+	if (ec->tx_coalesce_usecs > 1)
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2;
+	else
+		adapter->tx_itr_setting = ec->tx_coalesce_usecs;
+
+	if (adapter->tx_itr_setting == 1)
+		tx_itr_param = IXGBE_12K_ITR;
+	else
+		tx_itr_param = adapter->tx_itr_setting;
+
+	num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+
+	for (i = 0; i < num_vectors; i++) {
+		q_vector = adapter->q_vector[i];
+		if (q_vector->tx.count && !q_vector->rx.count)
+			/* Tx only */
+			q_vector->itr = tx_itr_param;
+		else
+			/* Rx only or mixed */
+			q_vector->itr = rx_itr_param;
+		ixgbevf_write_eitr(q_vector);
+	}
+
+	return 0;
+}
+
+static int ixgbevf_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+			     u32 *rules __always_unused)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		info->data = adapter->num_rx_queues;
+		return 0;
+	default:
+		hw_dbg(&adapter->hw, "Command parameters not supported\n");
+		return -EOPNOTSUPP;
+	}
+}
+
+static u32 ixgbevf_get_rxfh_indir_size(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	if (adapter->hw.mac.type >= ixgbe_mac_X550_vf)
+		return IXGBEVF_X550_VFRETA_SIZE;
+
+	return IXGBEVF_82599_RETA_SIZE;
+}
+
+static u32 ixgbevf_get_rxfh_key_size(struct net_device *netdev)
+{
+	return IXGBEVF_RSS_HASH_KEY_SIZE;
+}
+
+static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			    u8 *hfunc)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	int err = 0;
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (adapter->hw.mac.type >= ixgbe_mac_X550_vf) {
+		if (key)
+			memcpy(key, adapter->rss_key,
+			       ixgbevf_get_rxfh_key_size(netdev));
+
+		if (indir) {
+			int i;
+
+			for (i = 0; i < IXGBEVF_X550_VFRETA_SIZE; i++)
+				indir[i] = adapter->rss_indir_tbl[i];
+		}
+	} else {
+		/* If neither indirection table nor hash key was requested
+		 *  - just return a success avoiding taking any locks.
+		 */
+		if (!indir && !key)
+			return 0;
+
+		spin_lock_bh(&adapter->mbx_lock);
+		if (indir)
+			err = ixgbevf_get_reta_locked(&adapter->hw, indir,
+						      adapter->num_rx_queues);
+
+		if (!err && key)
+			err = ixgbevf_get_rss_key_locked(&adapter->hw, key);
+
+		spin_unlock_bh(&adapter->mbx_lock);
+	}
+
+	return err;
+}
+
+static u32 ixgbevf_get_priv_flags(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	u32 priv_flags = 0;
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX;
+
+	return priv_flags;
+}
+
+static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	unsigned int flags = adapter->flags;
+
+	flags &= ~IXGBEVF_FLAGS_LEGACY_RX;
+	if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX)
+		flags |= IXGBEVF_FLAGS_LEGACY_RX;
+
+	if (flags != adapter->flags) {
+		adapter->flags = flags;
+
+		/* reset interface to repopulate queues */
+		if (netif_running(netdev))
+			ixgbevf_reinit_locked(adapter);
+	}
+
+	return 0;
+}
+
+static const struct ethtool_ops ixgbevf_ethtool_ops = {
+	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
+	.get_drvinfo		= ixgbevf_get_drvinfo,
+	.get_regs_len		= ixgbevf_get_regs_len,
+	.get_regs		= ixgbevf_get_regs,
+	.nway_reset		= ixgbevf_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_ringparam		= ixgbevf_get_ringparam,
+	.set_ringparam		= ixgbevf_set_ringparam,
+	.get_msglevel		= ixgbevf_get_msglevel,
+	.set_msglevel		= ixgbevf_set_msglevel,
+	.self_test		= ixgbevf_diag_test,
+	.get_sset_count		= ixgbevf_get_sset_count,
+	.get_strings		= ixgbevf_get_strings,
+	.get_ethtool_stats	= ixgbevf_get_ethtool_stats,
+	.get_coalesce		= ixgbevf_get_coalesce,
+	.set_coalesce		= ixgbevf_set_coalesce,
+	.get_rxnfc		= ixgbevf_get_rxnfc,
+	.get_rxfh_indir_size	= ixgbevf_get_rxfh_indir_size,
+	.get_rxfh_key_size	= ixgbevf_get_rxfh_key_size,
+	.get_rxfh		= ixgbevf_get_rxfh,
+	.get_link_ksettings	= ixgbevf_get_link_ksettings,
+	.get_priv_flags		= ixgbevf_get_priv_flags,
+	.set_priv_flags		= ixgbevf_set_priv_flags,
+};
+
+void ixgbevf_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ixgbevf_ethtool_ops;
+}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
new file mode 100644
index 0000000000..66cf17f194
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */
+
+#include "ixgbevf.h"
+#include <net/xfrm.h>
+#include <crypto/aead.h>
+
+#define IXGBE_IPSEC_KEY_BITS  160
+static const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+
+/**
+ * ixgbevf_ipsec_set_pf_sa - ask the PF to set up an SA
+ * @adapter: board private structure
+ * @xs: xfrm info to be sent to the PF
+ *
+ * Returns: positive offload handle from the PF, or negative error code
+ **/
+static int ixgbevf_ipsec_set_pf_sa(struct ixgbevf_adapter *adapter,
+				   struct xfrm_state *xs)
+{
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE] = { 0 };
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct sa_mbx_msg *sam;
+	int ret;
+
+	/* send the important bits to the PF */
+	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
+	sam->dir = xs->xso.dir;
+	sam->spi = xs->id.spi;
+	sam->proto = xs->id.proto;
+	sam->family = xs->props.family;
+
+	if (xs->props.family == AF_INET6)
+		memcpy(sam->addr, &xs->id.daddr.a6, sizeof(xs->id.daddr.a6));
+	else
+		memcpy(sam->addr, &xs->id.daddr.a4, sizeof(xs->id.daddr.a4));
+	memcpy(sam->key, xs->aead->alg_key, sizeof(sam->key));
+
+	msgbuf[0] = IXGBE_VF_IPSEC_ADD;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	ret = ixgbevf_write_mbx(hw, msgbuf, IXGBE_VFMAILBOX_SIZE);
+	if (ret)
+		goto out;
+
+	ret = ixgbevf_poll_mbx(hw, msgbuf, 2);
+	if (ret)
+		goto out;
+
+	ret = (int)msgbuf[1];
+	if (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE && ret >= 0)
+		ret = -1;
+
+out:
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	return ret;
+}
+
+/**
+ * ixgbevf_ipsec_del_pf_sa - ask the PF to delete an SA
+ * @adapter: board private structure
+ * @pfsa: sa index returned from PF when created, -1 for all
+ *
+ * Returns: 0 on success, or negative error code
+ **/
+static int ixgbevf_ipsec_del_pf_sa(struct ixgbevf_adapter *adapter, int pfsa)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 msgbuf[2];
+	int err;
+
+	memset(msgbuf, 0, sizeof(msgbuf));
+	msgbuf[0] = IXGBE_VF_IPSEC_DEL;
+	msgbuf[1] = (u32)pfsa;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	err = ixgbevf_write_mbx(hw, msgbuf, 2);
+	if (err)
+		goto out;
+
+	err = ixgbevf_poll_mbx(hw, msgbuf, 2);
+	if (err)
+		goto out;
+
+out:
+	spin_unlock_bh(&adapter->mbx_lock);
+	return err;
+}
+
+/**
+ * ixgbevf_ipsec_restore - restore the IPsec HW settings after a reset
+ * @adapter: board private structure
+ *
+ * Reload the HW tables from the SW tables after they've been bashed
+ * by a chip reset.  While we're here, make sure any stale VF data is
+ * removed, since we go through reset when num_vfs changes.
+ **/
+void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	if (!(adapter->netdev->features & NETIF_F_HW_ESP))
+		return;
+
+	/* reload the Rx and Tx keys */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+		struct rx_sa *r = &ipsec->rx_tbl[i];
+		struct tx_sa *t = &ipsec->tx_tbl[i];
+		int ret;
+
+		if (r->used) {
+			ret = ixgbevf_ipsec_set_pf_sa(adapter, r->xs);
+			if (ret < 0)
+				netdev_err(netdev, "reload rx_tbl[%d] failed = %d\n",
+					   i, ret);
+		}
+
+		if (t->used) {
+			ret = ixgbevf_ipsec_set_pf_sa(adapter, t->xs);
+			if (ret < 0)
+				netdev_err(netdev, "reload tx_tbl[%d] failed = %d\n",
+					   i, ret);
+		}
+	}
+}
+
+/**
+ * ixgbevf_ipsec_find_empty_idx - find the first unused security parameter index
+ * @ipsec: pointer to IPsec struct
+ * @rxtable: true if we need to look in the Rx table
+ *
+ * Returns the first unused index in either the Rx or Tx SA table
+ **/
+static
+int ixgbevf_ipsec_find_empty_idx(struct ixgbevf_ipsec *ipsec, bool rxtable)
+{
+	u32 i;
+
+	if (rxtable) {
+		if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search rx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->rx_tbl[i].used)
+				return i;
+		}
+	} else {
+		if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search tx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->tx_tbl[i].used)
+				return i;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+/**
+ * ixgbevf_ipsec_find_rx_state - find the state that matches
+ * @ipsec: pointer to IPsec struct
+ * @daddr: inbound address to match
+ * @proto: protocol to match
+ * @spi: SPI to match
+ * @ip4: true if using an IPv4 address
+ *
+ * Returns a pointer to the matching SA state information
+ **/
+static
+struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec,
+					       __be32 *daddr, u8 proto,
+					       __be32 spi, bool ip4)
+{
+	struct xfrm_state *ret = NULL;
+	struct rx_sa *rsa;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist,
+				   (__force u32)spi) {
+		if (spi == rsa->xs->id.spi &&
+		    ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
+		      (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
+				       sizeof(rsa->xs->id.daddr.a6)))) &&
+		    proto == rsa->xs->id.proto) {
+			ret = rsa->xs;
+			xfrm_state_hold(ret);
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * ixgbevf_ipsec_parse_proto_keys - find the key and salt based on the protocol
+ * @xs: pointer to xfrm_state struct
+ * @mykey: pointer to key array to populate
+ * @mysalt: pointer to salt value to populate
+ *
+ * This copies the protocol keys and salt to our own data tables.  The
+ * 82599 family only supports the one algorithm.
+ **/
+static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs,
+					  u32 *mykey, u32 *mysalt)
+{
+	struct net_device *dev = xs->xso.real_dev;
+	unsigned char *key_data;
+	char *alg_name = NULL;
+	int key_len;
+
+	if (!xs->aead) {
+		netdev_err(dev, "Unsupported IPsec algorithm\n");
+		return -EINVAL;
+	}
+
+	if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+		netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+			   IXGBE_IPSEC_AUTH_BITS);
+		return -EINVAL;
+	}
+
+	key_data = &xs->aead->alg_key[0];
+	key_len = xs->aead->alg_key_len;
+	alg_name = xs->aead->alg_name;
+
+	if (strcmp(alg_name, aes_gcm_name)) {
+		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+			   aes_gcm_name);
+		return -EINVAL;
+	}
+
+	/* The key bytes come down in a big endian array of bytes, so
+	 * we don't need to do any byte swapping.
+	 * 160 accounts for 16 byte key and 4 byte salt
+	 */
+	if (key_len > IXGBE_IPSEC_KEY_BITS) {
+		*mysalt = ((u32 *)key_data)[4];
+	} else if (key_len == IXGBE_IPSEC_KEY_BITS) {
+		*mysalt = 0;
+	} else {
+		netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
+		return -EINVAL;
+	}
+	memcpy(mykey, key_data, 16);
+
+	return 0;
+}
+
+/**
+ * ixgbevf_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ * @extack: extack point to fill failure reason
+ **/
+static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs,
+				struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = xs->xso.real_dev;
+	struct ixgbevf_adapter *adapter;
+	struct ixgbevf_ipsec *ipsec;
+	u16 sa_idx;
+	int ret;
+
+	adapter = netdev_priv(dev);
+	ipsec = adapter->ipsec;
+
+	if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for IPsec offload");
+		return -EINVAL;
+	}
+
+	if (xs->props.mode != XFRM_MODE_TRANSPORT) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported mode for ipsec offload");
+		return -EINVAL;
+	}
+
+	if (xs->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported ipsec offload type");
+		return -EINVAL;
+	}
+
+	if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+		struct rx_sa rsa;
+
+		if (xs->calg) {
+			NL_SET_ERR_MSG_MOD(extack, "Compression offload not supported");
+			return -EINVAL;
+		}
+
+		/* find the first unused index */
+		ret = ixgbevf_ipsec_find_empty_idx(ipsec, true);
+		if (ret < 0) {
+			NL_SET_ERR_MSG_MOD(extack, "No space for SA in Rx table!");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&rsa, 0, sizeof(rsa));
+		rsa.used = true;
+		rsa.xs = xs;
+
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.decrypt = xs->ealg || xs->aead;
+
+		/* get the key and salt */
+		ret = ixgbevf_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Rx SA table");
+			return ret;
+		}
+
+		/* get ip for rx sa table */
+		if (xs->props.family == AF_INET6)
+			memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16);
+		else
+			memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4);
+
+		rsa.mode = IXGBE_RXMOD_VALID;
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.mode |= IXGBE_RXMOD_PROTO_ESP;
+		if (rsa.decrypt)
+			rsa.mode |= IXGBE_RXMOD_DECRYPT;
+		if (rsa.xs->props.family == AF_INET6)
+			rsa.mode |= IXGBE_RXMOD_IPV6;
+
+		ret = ixgbevf_ipsec_set_pf_sa(adapter, xs);
+		if (ret < 0)
+			return ret;
+		rsa.pfsa = ret;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa));
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX;
+
+		ipsec->num_rx_sa++;
+
+		/* hash the new entry for faster search in Rx path */
+		hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
+			     (__force u32)rsa.xs->id.spi);
+	} else {
+		struct tx_sa tsa;
+
+		/* find the first unused index */
+		ret = ixgbevf_ipsec_find_empty_idx(ipsec, false);
+		if (ret < 0) {
+			NL_SET_ERR_MSG_MOD(extack, "No space for SA in Tx table");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&tsa, 0, sizeof(tsa));
+		tsa.used = true;
+		tsa.xs = xs;
+
+		if (xs->id.proto & IPPROTO_ESP)
+			tsa.encrypt = xs->ealg || xs->aead;
+
+		ret = ixgbevf_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
+		if (ret) {
+			NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for Tx SA table");
+			memset(&tsa, 0, sizeof(tsa));
+			return ret;
+		}
+
+		ret = ixgbevf_ipsec_set_pf_sa(adapter, xs);
+		if (ret < 0)
+			return ret;
+		tsa.pfsa = ret;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa));
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX;
+
+		ipsec->num_tx_sa++;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbevf_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void ixgbevf_ipsec_del_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.real_dev;
+	struct ixgbevf_adapter *adapter;
+	struct ixgbevf_ipsec *ipsec;
+	u16 sa_idx;
+
+	adapter = netdev_priv(dev);
+	ipsec = adapter->ipsec;
+
+	if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+
+		if (!ipsec->rx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbevf_ipsec_del_pf_sa(adapter, ipsec->rx_tbl[sa_idx].pfsa);
+		hash_del_rcu(&ipsec->rx_tbl[sa_idx].hlist);
+		memset(&ipsec->rx_tbl[sa_idx], 0, sizeof(struct rx_sa));
+		ipsec->num_rx_sa--;
+	} else {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+
+		if (!ipsec->tx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbevf_ipsec_del_pf_sa(adapter, ipsec->tx_tbl[sa_idx].pfsa);
+		memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa));
+		ipsec->num_tx_sa--;
+	}
+}
+
+/**
+ * ixgbevf_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool ixgbevf_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+	if (xs->props.family == AF_INET) {
+		/* Offload with IPv4 options is not supported yet */
+		if (ip_hdr(skb)->ihl != 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+
+	return true;
+}
+
+static const struct xfrmdev_ops ixgbevf_xfrmdev_ops = {
+	.xdo_dev_state_add = ixgbevf_ipsec_add_sa,
+	.xdo_dev_state_delete = ixgbevf_ipsec_del_sa,
+	.xdo_dev_offload_ok = ixgbevf_ipsec_offload_ok,
+};
+
+/**
+ * ixgbevf_ipsec_tx - setup Tx flags for IPsec offload
+ * @tx_ring: outgoing context
+ * @first: current data packet
+ * @itd: ipsec Tx data for later use in building context descriptor
+ **/
+int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+		     struct ixgbevf_tx_buffer *first,
+		     struct ixgbevf_ipsec_tx_data *itd)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	struct sec_path *sp;
+	struct tx_sa *tsa;
+	u16 sa_idx;
+
+	sp = skb_sec_path(first->skb);
+	if (unlikely(!sp->len)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
+			   __func__, sp->len);
+		return 0;
+	}
+
+	xs = xfrm_input_state(first->skb);
+	if (unlikely(!xs)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
+			   __func__, xs);
+		return 0;
+	}
+
+	sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+	if (unlikely(sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT)) {
+		netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
+			   __func__, sa_idx, xs->xso.offload_handle);
+		return 0;
+	}
+
+	tsa = &ipsec->tx_tbl[sa_idx];
+	if (unlikely(!tsa->used)) {
+		netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
+			   __func__, sa_idx);
+		return 0;
+	}
+
+	itd->pfsa = tsa->pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+
+	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CSUM;
+
+	if (xs->id.proto == IPPROTO_ESP) {
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
+		if (first->protocol == htons(ETH_P_IP))
+			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
+
+		/* The actual trailer length is authlen (16 bytes) plus
+		 * 2 bytes for the proto and the padlen values, plus
+		 * padlen bytes of padding.  This ends up not the same
+		 * as the static value found in xs->props.trailer_len (21).
+		 *
+		 * ... but if we're doing GSO, don't bother as the stack
+		 * doesn't add a trailer for those.
+		 */
+		if (!skb_is_gso(first->skb)) {
+			/* The "correct" way to get the auth length would be
+			 * to use
+			 *    authlen = crypto_aead_authsize(xs->data);
+			 * but since we know we only have one size to worry
+			 * about * we can let the compiler use the constant
+			 * and save us a few CPU cycles.
+			 */
+			const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+			struct sk_buff *skb = first->skb;
+			u8 padlen;
+			int ret;
+
+			ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+					    &padlen, 1);
+			if (unlikely(ret))
+				return 0;
+			itd->trailer_len = authlen + 2 + padlen;
+		}
+	}
+	if (tsa->encrypt)
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
+
+	return 1;
+}
+
+/**
+ * ixgbevf_ipsec_rx - decode IPsec bits from Rx descriptor
+ * @rx_ring: receiving ring
+ * @rx_desc: receive data descriptor
+ * @skb: current data packet
+ *
+ * Determine if there was an IPsec encapsulation noticed, and if so set up
+ * the resulting status for later in the receive stack.
+ **/
+void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+		      union ixgbe_adv_rx_desc *rx_desc,
+		      struct sk_buff *skb)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(rx_ring->netdev);
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+	__le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
+					     IXGBE_RXDADV_PKTTYPE_IPSEC_ESP);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_offload *xo = NULL;
+	struct xfrm_state *xs = NULL;
+	struct ipv6hdr *ip6 = NULL;
+	struct iphdr *ip4 = NULL;
+	struct sec_path *sp;
+	void *daddr;
+	__be32 spi;
+	u8 *c_hdr;
+	u8 proto;
+
+	/* Find the IP and crypto headers in the data.
+	 * We can assume no VLAN header in the way, b/c the
+	 * hw won't recognize the IPsec packet and anyway the
+	 * currently VLAN device doesn't support xfrm offload.
+	 */
+	if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV4)) {
+		ip4 = (struct iphdr *)(skb->data + ETH_HLEN);
+		daddr = &ip4->daddr;
+		c_hdr = (u8 *)ip4 + ip4->ihl * 4;
+	} else if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV6)) {
+		ip6 = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+		daddr = &ip6->daddr;
+		c_hdr = (u8 *)ip6 + sizeof(struct ipv6hdr);
+	} else {
+		return;
+	}
+
+	switch (pkt_info & ipsec_pkt_types) {
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH):
+		spi = ((struct ip_auth_hdr *)c_hdr)->spi;
+		proto = IPPROTO_AH;
+		break;
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_ESP):
+		spi = ((struct ip_esp_hdr *)c_hdr)->spi;
+		proto = IPPROTO_ESP;
+		break;
+	default:
+		return;
+	}
+
+	xs = ixgbevf_ipsec_find_rx_state(ipsec, daddr, proto, spi, !!ip4);
+	if (unlikely(!xs))
+		return;
+
+	sp = secpath_set(skb);
+	if (unlikely(!sp))
+		return;
+
+	sp->xvec[sp->len++] = xs;
+	sp->olen++;
+	xo = xfrm_offload(skb);
+	xo->flags = CRYPTO_DONE;
+	xo->status = CRYPTO_SUCCESS;
+
+	adapter->rx_ipsec++;
+}
+
+/**
+ * ixgbevf_init_ipsec_offload - initialize registers for IPsec operation
+ * @adapter: board private structure
+ **/
+void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec;
+	size_t size;
+
+	switch (adapter->hw.api_version) {
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_15:
+		break;
+	default:
+		return;
+	}
+
+	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+	if (!ipsec)
+		goto err1;
+	hash_init(ipsec->rx_sa_list);
+
+	size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->rx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->rx_tbl)
+		goto err2;
+
+	size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->tx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->tx_tbl)
+		goto err2;
+
+	ipsec->num_rx_sa = 0;
+	ipsec->num_tx_sa = 0;
+
+	adapter->ipsec = ipsec;
+
+	adapter->netdev->xfrmdev_ops = &ixgbevf_xfrmdev_ops;
+
+#define IXGBEVF_ESP_FEATURES	(NETIF_F_HW_ESP | \
+				 NETIF_F_HW_ESP_TX_CSUM | \
+				 NETIF_F_GSO_ESP)
+
+	adapter->netdev->features |= IXGBEVF_ESP_FEATURES;
+	adapter->netdev->hw_enc_features |= IXGBEVF_ESP_FEATURES;
+
+	return;
+
+err2:
+	kfree(ipsec->rx_tbl);
+	kfree(ipsec->tx_tbl);
+	kfree(ipsec);
+err1:
+	netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
+}
+
+/**
+ * ixgbevf_stop_ipsec_offload - tear down the IPsec offload
+ * @adapter: board private structure
+ **/
+void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+
+	adapter->ipsec = NULL;
+	if (ipsec) {
+		kfree(ipsec->rx_tbl);
+		kfree(ipsec->tx_tbl);
+		kfree(ipsec);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.h b/drivers/net/ethernet/intel/ixgbevf/ipsec.h
new file mode 100644
index 0000000000..d229901653
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */
+
+#ifndef _IXGBEVF_IPSEC_H_
+#define _IXGBEVF_IPSEC_H_
+
+#define IXGBE_IPSEC_MAX_SA_COUNT	1024
+#define IXGBE_IPSEC_BASE_RX_INDEX	0
+#define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS		128
+
+#define IXGBE_RXMOD_VALID		0x00000001
+#define IXGBE_RXMOD_PROTO_ESP		0x00000004
+#define IXGBE_RXMOD_DECRYPT		0x00000008
+#define IXGBE_RXMOD_IPV6		0x00000010
+
+struct rx_sa {
+	struct hlist_node hlist;
+	struct xfrm_state *xs;
+	__be32 ipaddr[4];
+	u32 key[4];
+	u32 salt;
+	u32 mode;
+	u32 pfsa;
+	bool used;
+	bool decrypt;
+};
+
+struct rx_ip_sa {
+	__be32 ipaddr[4];
+	u32 ref_cnt;
+	bool used;
+};
+
+struct tx_sa {
+	struct xfrm_state *xs;
+	u32 key[4];
+	u32 salt;
+	u32 pfsa;
+	bool encrypt;
+	bool used;
+};
+
+struct ixgbevf_ipsec_tx_data {
+	u32 flags;
+	u16 trailer_len;
+	u16 pfsa;
+};
+
+struct ixgbevf_ipsec {
+	u16 num_rx_sa;
+	u16 num_tx_sa;
+	struct rx_sa *rx_tbl;
+	struct tx_sa *tx_tbl;
+	DECLARE_HASHTABLE(rx_sa_list, 10);
+};
+
+struct sa_mbx_msg {
+	__be32 spi;
+	u8 dir;
+	u8 proto;
+	u16 family;
+	__be32 addr[4];
+	u32 key[5];
+};
+#endif /* _IXGBEVF_IPSEC_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
new file mode 100644
index 0000000000..130cb86877
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -0,0 +1,498 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBEVF_H_
+#define _IXGBEVF_H_
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/timer.h>
+#include <linux/io.h>
+#include <linux/netdevice.h>
+#include <linux/if_vlan.h>
+#include <linux/u64_stats_sync.h>
+#include <net/xdp.h>
+
+#include "vf.h"
+#include "ipsec.h"
+
+#define IXGBE_MAX_TXD_PWR	14
+#define IXGBE_MAX_DATA_PER_TXD	BIT(IXGBE_MAX_TXD_PWR)
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD)
+#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct ixgbevf_tx_buffer {
+	union ixgbe_adv_tx_desc *next_to_watch;
+	unsigned long time_stamp;
+	union {
+		struct sk_buff *skb;
+		/* XDP uses address ptr on irq_clean */
+		void *data;
+	};
+	unsigned int bytecount;
+	unsigned short gso_segs;
+	__be16 protocol;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+	u32 tx_flags;
+};
+
+struct ixgbevf_rx_buffer {
+	dma_addr_t dma;
+	struct page *page;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+	__u32 page_offset;
+#else
+	__u16 page_offset;
+#endif
+	__u16 pagecnt_bias;
+};
+
+struct ixgbevf_stats {
+	u64 packets;
+	u64 bytes;
+};
+
+struct ixgbevf_tx_queue_stats {
+	u64 restart_queue;
+	u64 tx_busy;
+	u64 tx_done_old;
+};
+
+struct ixgbevf_rx_queue_stats {
+	u64 alloc_rx_page_failed;
+	u64 alloc_rx_buff_failed;
+	u64 alloc_rx_page;
+	u64 csum_err;
+};
+
+enum ixgbevf_ring_state_t {
+	__IXGBEVF_RX_3K_BUFFER,
+	__IXGBEVF_RX_BUILD_SKB_ENABLED,
+	__IXGBEVF_TX_DETECT_HANG,
+	__IXGBEVF_HANG_CHECK_ARMED,
+	__IXGBEVF_TX_XDP_RING,
+	__IXGBEVF_TX_XDP_RING_PRIMED,
+};
+
+#define ring_is_xdp(ring) \
+		test_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define set_ring_xdp(ring) \
+		set_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+#define clear_ring_xdp(ring) \
+		clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state)
+
+struct ixgbevf_ring {
+	struct ixgbevf_ring *next;
+	struct ixgbevf_q_vector *q_vector;	/* backpointer to q_vector */
+	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
+	struct device *dev;
+	void *desc;			/* descriptor ring memory */
+	dma_addr_t dma;			/* phys. address of descriptor ring */
+	unsigned int size;		/* length in bytes */
+	u16 count;			/* amount of descriptors */
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 next_to_alloc;
+
+	union {
+		struct ixgbevf_tx_buffer *tx_buffer_info;
+		struct ixgbevf_rx_buffer *rx_buffer_info;
+	};
+	unsigned long state;
+	struct ixgbevf_stats stats;
+	struct u64_stats_sync syncp;
+	union {
+		struct ixgbevf_tx_queue_stats tx_stats;
+		struct ixgbevf_rx_queue_stats rx_stats;
+	};
+	struct xdp_rxq_info xdp_rxq;
+	u64 hw_csum_rx_error;
+	u8 __iomem *tail;
+	struct sk_buff *skb;
+
+	/* holds the special value that gets the hardware register offset
+	 * associated with this ring, which is different for DCB and RSS modes
+	 */
+	u16 reg_idx;
+	int queue_index; /* needed for multiqueue queue management */
+} ____cacheline_internodealigned_in_smp;
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define IXGBEVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
+
+#define MAX_RX_QUEUES IXGBE_VF_MAX_RX_QUEUES
+#define MAX_TX_QUEUES IXGBE_VF_MAX_TX_QUEUES
+#define MAX_XDP_QUEUES IXGBE_VF_MAX_TX_QUEUES
+#define IXGBEVF_MAX_RSS_QUEUES		2
+#define IXGBEVF_82599_RETA_SIZE		128	/* 128 entries */
+#define IXGBEVF_X550_VFRETA_SIZE	64	/* 64 entries */
+#define IXGBEVF_RSS_HASH_KEY_SIZE	40
+#define IXGBEVF_VFRSSRK_REGS		10	/* 10 registers for RSS key */
+
+#define IXGBEVF_DEFAULT_TXD	1024
+#define IXGBEVF_DEFAULT_RXD	512
+#define IXGBEVF_MAX_TXD		4096
+#define IXGBEVF_MIN_TXD		64
+#define IXGBEVF_MAX_RXD		4096
+#define IXGBEVF_MIN_RXD		64
+
+/* Supported Rx Buffer Sizes */
+#define IXGBEVF_RXBUFFER_256	256    /* Used for packet split */
+#define IXGBEVF_RXBUFFER_2048	2048
+#define IXGBEVF_RXBUFFER_3072	3072
+
+#define IXGBEVF_RX_HDR_SIZE	IXGBEVF_RXBUFFER_256
+
+#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
+
+#define IXGBEVF_SKB_PAD		(NET_SKB_PAD + NET_IP_ALIGN)
+#if (PAGE_SIZE < 8192)
+#define IXGBEVF_MAX_FRAME_BUILD_SKB \
+	(SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD)
+#else
+#define IXGBEVF_MAX_FRAME_BUILD_SKB	IXGBEVF_RXBUFFER_2048
+#endif
+
+#define IXGBE_TX_FLAGS_CSUM		BIT(0)
+#define IXGBE_TX_FLAGS_VLAN		BIT(1)
+#define IXGBE_TX_FLAGS_TSO		BIT(2)
+#define IXGBE_TX_FLAGS_IPV4		BIT(3)
+#define IXGBE_TX_FLAGS_IPSEC		BIT(4)
+#define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0x0000e000
+#define IXGBE_TX_FLAGS_VLAN_SHIFT	16
+
+#define ring_uses_large_buffer(ring) \
+	test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define set_ring_uses_large_buffer(ring) \
+	set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+#define clear_ring_uses_large_buffer(ring) \
+	clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state)
+
+#define ring_uses_build_skb(ring) \
+	test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define set_ring_build_skb_enabled(ring) \
+	set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+#define clear_ring_build_skb_enabled(ring) \
+	clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state)
+
+static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return IXGBEVF_RXBUFFER_3072;
+
+	if (ring_uses_build_skb(ring))
+		return IXGBEVF_MAX_FRAME_BUILD_SKB;
+#endif
+	return IXGBEVF_RXBUFFER_2048;
+}
+
+static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring)
+{
+#if (PAGE_SIZE < 8192)
+	if (ring_uses_large_buffer(ring))
+		return 1;
+#endif
+	return 0;
+}
+
+#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring))
+
+#define check_for_tx_hang(ring) \
+	test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+	set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+	clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state)
+
+struct ixgbevf_ring_container {
+	struct ixgbevf_ring *ring;	/* pointer to linked list of rings */
+	unsigned int total_bytes;	/* total bytes processed this int */
+	unsigned int total_packets;	/* total packets processed this int */
+	u8 count;			/* total number of rings in vector */
+	u8 itr;				/* current ITR setting for ring */
+};
+
+/* iterator for handling rings in ring container */
+#define ixgbevf_for_each_ring(pos, head) \
+	for (pos = (head).ring; pos != NULL; pos = pos->next)
+
+/* MAX_MSIX_Q_VECTORS of these are allocated,
+ * but we only use one per queue-specific vector.
+ */
+struct ixgbevf_q_vector {
+	struct ixgbevf_adapter *adapter;
+	/* index of q_vector within array, also used for finding the bit in
+	 * EICR and friends that represents the vector for this ring
+	 */
+	u16 v_idx;
+	u16 itr; /* Interrupt throttle rate written to EITR */
+	struct napi_struct napi;
+	struct ixgbevf_ring_container rx, tx;
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
+	char name[IFNAMSIZ + 9];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	unsigned int state;
+#define IXGBEVF_QV_STATE_IDLE		0
+#define IXGBEVF_QV_STATE_NAPI		1    /* NAPI owns this QV */
+#define IXGBEVF_QV_STATE_POLL		2    /* poll owns this QV */
+#define IXGBEVF_QV_STATE_DISABLED	4    /* QV is disabled */
+#define IXGBEVF_QV_OWNED	(IXGBEVF_QV_STATE_NAPI | IXGBEVF_QV_STATE_POLL)
+#define IXGBEVF_QV_LOCKED	(IXGBEVF_QV_OWNED | IXGBEVF_QV_STATE_DISABLED)
+#define IXGBEVF_QV_STATE_NAPI_YIELD	8    /* NAPI yielded this QV */
+#define IXGBEVF_QV_STATE_POLL_YIELD	16   /* poll yielded this QV */
+#define IXGBEVF_QV_YIELD	(IXGBEVF_QV_STATE_NAPI_YIELD | \
+				 IXGBEVF_QV_STATE_POLL_YIELD)
+#define IXGBEVF_QV_USER_PEND	(IXGBEVF_QV_STATE_POLL | \
+				 IXGBEVF_QV_STATE_POLL_YIELD)
+	spinlock_t lock;
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+};
+
+/* microsecond values for various ITR rates shifted by 2 to fit itr register
+ * with the first 3 bits reserved 0
+ */
+#define IXGBE_MIN_RSC_ITR	24
+#define IXGBE_100K_ITR		40
+#define IXGBE_20K_ITR		200
+#define IXGBE_12K_ITR		336
+
+/* Helper macros to switch between ints/sec and what the register uses.
+ * And yes, it's the same math going both ways.  The lowest value
+ * supported by all of the ixgbe hardware is 8.
+ */
+#define EITR_INTS_PER_SEC_TO_REG(_eitr) \
+	((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8)
+#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG
+
+/* ixgbevf_test_staterr - tests bits in Rx descriptor status and error fields */
+static inline __le32 ixgbevf_test_staterr(union ixgbe_adv_rx_desc *rx_desc,
+					  const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+static inline u16 ixgbevf_desc_unused(struct ixgbevf_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+static inline void ixgbevf_write_tail(struct ixgbevf_ring *ring, u32 value)
+{
+	writel(value, ring->tail);
+}
+
+#define IXGBEVF_RX_DESC(R, i)	\
+	(&(((union ixgbe_adv_rx_desc *)((R)->desc))[i]))
+#define IXGBEVF_TX_DESC(R, i)	\
+	(&(((union ixgbe_adv_tx_desc *)((R)->desc))[i]))
+#define IXGBEVF_TX_CTXTDESC(R, i)	\
+	(&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i]))
+
+#define IXGBE_MAX_JUMBO_FRAME_SIZE	9728 /* Maximum Supported Size 9.5KB */
+
+#define OTHER_VECTOR	1
+#define NON_Q_VECTORS	(OTHER_VECTOR)
+
+#define MAX_MSIX_Q_VECTORS	2
+
+#define MIN_MSIX_Q_VECTORS	1
+#define MIN_MSIX_COUNT		(MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
+
+#define IXGBEVF_RX_DMA_ATTR \
+	(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
+/* board specific private data structure */
+struct ixgbevf_adapter {
+	/* this field must be first, see ixgbevf_process_skb_fields */
+	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+
+	struct ixgbevf_q_vector *q_vector[MAX_MSIX_Q_VECTORS];
+
+	/* Interrupt Throttle Rate */
+	u16 rx_itr_setting;
+	u16 tx_itr_setting;
+
+	/* interrupt masks */
+	u32 eims_enable_mask;
+	u32 eims_other;
+
+	/* XDP */
+	int num_xdp_queues;
+	struct ixgbevf_ring *xdp_ring[MAX_XDP_QUEUES];
+
+	/* TX */
+	int num_tx_queues;
+	struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
+	u64 restart_queue;
+	u32 tx_timeout_count;
+	u64 tx_ipsec;
+
+	/* RX */
+	int num_rx_queues;
+	struct ixgbevf_ring *rx_ring[MAX_TX_QUEUES]; /* One per active queue */
+	u64 hw_csum_rx_error;
+	u64 hw_rx_no_dma_resources;
+	int num_msix_vectors;
+	u64 alloc_rx_page_failed;
+	u64 alloc_rx_buff_failed;
+	u64 alloc_rx_page;
+	u64 rx_ipsec;
+
+	struct msix_entry *msix_entries;
+
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
+	struct pci_dev *pdev;
+
+	/* structs defined in ixgbe_vf.h */
+	struct ixgbe_hw hw;
+	u16 msg_enable;
+	/* Interrupt Throttle Rate */
+	u32 eitr_param;
+
+	struct ixgbevf_hw_stats stats;
+
+	unsigned long state;
+	u64 tx_busy;
+	unsigned int tx_ring_count;
+	unsigned int xdp_ring_count;
+	unsigned int rx_ring_count;
+
+	u8 __iomem *io_addr; /* Mainly for iounmap use */
+	u32 link_speed;
+	bool link_up;
+
+	struct timer_list service_timer;
+	struct work_struct service_task;
+
+	spinlock_t mbx_lock;
+	unsigned long last_reset;
+
+	u32 *rss_key;
+	u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
+	u32 flags;
+	bool link_state;
+
+#define IXGBEVF_FLAGS_LEGACY_RX		BIT(1)
+
+#ifdef CONFIG_XFRM
+	struct ixgbevf_ipsec *ipsec;
+#endif /* CONFIG_XFRM */
+};
+
+enum ixbgevf_state_t {
+	__IXGBEVF_TESTING,
+	__IXGBEVF_RESETTING,
+	__IXGBEVF_DOWN,
+	__IXGBEVF_DISABLED,
+	__IXGBEVF_REMOVING,
+	__IXGBEVF_SERVICE_SCHED,
+	__IXGBEVF_SERVICE_INITED,
+	__IXGBEVF_RESET_REQUESTED,
+	__IXGBEVF_QUEUE_RESET_REQUESTED,
+};
+
+enum ixgbevf_boards {
+	board_82599_vf,
+	board_82599_vf_hv,
+	board_X540_vf,
+	board_X540_vf_hv,
+	board_X550_vf,
+	board_X550_vf_hv,
+	board_X550EM_x_vf,
+	board_X550EM_x_vf_hv,
+	board_x550em_a_vf,
+};
+
+enum ixgbevf_xcast_modes {
+	IXGBEVF_XCAST_MODE_NONE = 0,
+	IXGBEVF_XCAST_MODE_MULTI,
+	IXGBEVF_XCAST_MODE_ALLMULTI,
+	IXGBEVF_XCAST_MODE_PROMISC,
+};
+
+extern const struct ixgbevf_info ixgbevf_82599_vf_info;
+extern const struct ixgbevf_info ixgbevf_X540_vf_info;
+extern const struct ixgbevf_info ixgbevf_X550_vf_info;
+extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_info;
+extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops;
+extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops_legacy;
+extern const struct ixgbevf_info ixgbevf_x550em_a_vf_info;
+
+extern const struct ixgbevf_info ixgbevf_82599_vf_hv_info;
+extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info;
+extern const struct ixgbevf_info ixgbevf_X550_vf_hv_info;
+extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info;
+extern const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops;
+
+/* needed by ethtool.c */
+extern const char ixgbevf_driver_name[];
+
+int ixgbevf_open(struct net_device *netdev);
+int ixgbevf_close(struct net_device *netdev);
+void ixgbevf_up(struct ixgbevf_adapter *adapter);
+void ixgbevf_down(struct ixgbevf_adapter *adapter);
+void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter);
+void ixgbevf_reset(struct ixgbevf_adapter *adapter);
+void ixgbevf_set_ethtool_ops(struct net_device *netdev);
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+			       struct ixgbevf_ring *rx_ring);
+int ixgbevf_setup_tx_resources(struct ixgbevf_ring *);
+void ixgbevf_free_rx_resources(struct ixgbevf_ring *);
+void ixgbevf_free_tx_resources(struct ixgbevf_ring *);
+void ixgbevf_update_stats(struct ixgbevf_adapter *adapter);
+int ethtool_ioctl(struct ifreq *ifr);
+
+extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
+
+#ifdef CONFIG_IXGBEVF_IPSEC
+void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter);
+void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter);
+void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter);
+void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+		      union ixgbe_adv_rx_desc *rx_desc,
+		      struct sk_buff *skb);
+int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+		     struct ixgbevf_tx_buffer *first,
+		     struct ixgbevf_ipsec_tx_data *itd);
+#else
+static inline void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter)
+{ }
+static inline void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter)
+{ }
+static inline void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter) { }
+static inline void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+				    union ixgbe_adv_rx_desc *rx_desc,
+				    struct sk_buff *skb) { }
+static inline int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+				   struct ixgbevf_tx_buffer *first,
+				   struct ixgbevf_ipsec_tx_data *itd)
+{ return 0; }
+#endif /* CONFIG_IXGBEVF_IPSEC */
+
+#define ixgbevf_hw_to_netdev(hw) \
+	(((struct ixgbevf_adapter *)(hw)->back)->netdev)
+
+#define hw_dbg(hw, format, arg...) \
+	netdev_dbg(ixgbevf_hw_to_netdev(hw), format, ## arg)
+
+s32 ixgbevf_poll_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size);
+s32 ixgbevf_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size);
+
+#endif /* _IXGBEVF_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
new file mode 100644
index 0000000000..a44e4bd561
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -0,0 +1,4932 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+/******************************************************************************
+ Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code
+******************************************************************************/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/ethtool.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/prefetch.h>
+#include <net/mpls.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/atomic.h>
+#include <net/xfrm.h>
+
+#include "ixgbevf.h"
+
+const char ixgbevf_driver_name[] = "ixgbevf";
+static const char ixgbevf_driver_string[] =
+	"Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver";
+
+static char ixgbevf_copyright[] =
+	"Copyright (c) 2009 - 2018 Intel Corporation.";
+
+static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
+	[board_82599_vf]	= &ixgbevf_82599_vf_info,
+	[board_82599_vf_hv]	= &ixgbevf_82599_vf_hv_info,
+	[board_X540_vf]		= &ixgbevf_X540_vf_info,
+	[board_X540_vf_hv]	= &ixgbevf_X540_vf_hv_info,
+	[board_X550_vf]		= &ixgbevf_X550_vf_info,
+	[board_X550_vf_hv]	= &ixgbevf_X550_vf_hv_info,
+	[board_X550EM_x_vf]	= &ixgbevf_X550EM_x_vf_info,
+	[board_X550EM_x_vf_hv]	= &ixgbevf_X550EM_x_vf_hv_info,
+	[board_x550em_a_vf]	= &ixgbevf_x550em_a_vf_info,
+};
+
+/* ixgbevf_pci_tbl - PCI Device ID Table
+ *
+ * Wildcard entries (PCI_ANY_ID) should come last
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static const struct pci_device_id ixgbevf_pci_tbl[] = {
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF), board_82599_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_VF_HV), board_82599_vf_hv },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF), board_X540_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540_VF_HV), board_X540_vf_hv },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550_VF), board_X550_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550_VF_HV), board_X550_vf_hv },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv},
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_VF), board_x550em_a_vf },
+	/* required last entry */
+	{0, }
+};
+MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl);
+
+MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
+MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver");
+MODULE_LICENSE("GPL v2");
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static struct workqueue_struct *ixgbevf_wq;
+
+static void ixgbevf_service_event_schedule(struct ixgbevf_adapter *adapter)
+{
+	if (!test_bit(__IXGBEVF_DOWN, &adapter->state) &&
+	    !test_bit(__IXGBEVF_REMOVING, &adapter->state) &&
+	    !test_and_set_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state))
+		queue_work(ixgbevf_wq, &adapter->service_task);
+}
+
+static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter)
+{
+	BUG_ON(!test_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state));
+
+	/* flush memory to make sure state is correct before next watchdog */
+	smp_mb__before_atomic();
+	clear_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state);
+}
+
+/* forward decls */
+static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter);
+static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector);
+static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter);
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer);
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *old_buff);
+
+static void ixgbevf_remove_adapter(struct ixgbe_hw *hw)
+{
+	struct ixgbevf_adapter *adapter = hw->back;
+
+	if (!hw->hw_addr)
+		return;
+	hw->hw_addr = NULL;
+	dev_err(&adapter->pdev->dev, "Adapter removed\n");
+	if (test_bit(__IXGBEVF_SERVICE_INITED, &adapter->state))
+		ixgbevf_service_event_schedule(adapter);
+}
+
+static void ixgbevf_check_remove(struct ixgbe_hw *hw, u32 reg)
+{
+	u32 value;
+
+	/* The following check not only optimizes a bit by not
+	 * performing a read on the status register when the
+	 * register just read was a status register read that
+	 * returned IXGBE_FAILED_READ_REG. It also blocks any
+	 * potential recursion.
+	 */
+	if (reg == IXGBE_VFSTATUS) {
+		ixgbevf_remove_adapter(hw);
+		return;
+	}
+	value = ixgbevf_read_reg(hw, IXGBE_VFSTATUS);
+	if (value == IXGBE_FAILED_READ_REG)
+		ixgbevf_remove_adapter(hw);
+}
+
+u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg)
+{
+	u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
+	u32 value;
+
+	if (IXGBE_REMOVED(reg_addr))
+		return IXGBE_FAILED_READ_REG;
+	value = readl(reg_addr + reg);
+	if (unlikely(value == IXGBE_FAILED_READ_REG))
+		ixgbevf_check_remove(hw, reg);
+	return value;
+}
+
+/**
+ * ixgbevf_set_ivar - set IVAR registers - maps interrupt causes to vectors
+ * @adapter: pointer to adapter struct
+ * @direction: 0 for Rx, 1 for Tx, -1 for other causes
+ * @queue: queue to map the corresponding interrupt to
+ * @msix_vector: the vector to map to the corresponding queue
+ **/
+static void ixgbevf_set_ivar(struct ixgbevf_adapter *adapter, s8 direction,
+			     u8 queue, u8 msix_vector)
+{
+	u32 ivar, index;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (direction == -1) {
+		/* other causes */
+		msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+		ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR_MISC);
+		ivar &= ~0xFF;
+		ivar |= msix_vector;
+		IXGBE_WRITE_REG(hw, IXGBE_VTIVAR_MISC, ivar);
+	} else {
+		/* Tx or Rx causes */
+		msix_vector |= IXGBE_IVAR_ALLOC_VAL;
+		index = ((16 * (queue & 1)) + (8 * direction));
+		ivar = IXGBE_READ_REG(hw, IXGBE_VTIVAR(queue >> 1));
+		ivar &= ~(0xFF << index);
+		ivar |= (msix_vector << index);
+		IXGBE_WRITE_REG(hw, IXGBE_VTIVAR(queue >> 1), ivar);
+	}
+}
+
+static u64 ixgbevf_get_tx_completed(struct ixgbevf_ring *ring)
+{
+	return ring->stats.packets;
+}
+
+static u32 ixgbevf_get_tx_pending(struct ixgbevf_ring *ring)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(ring->netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	u32 head = IXGBE_READ_REG(hw, IXGBE_VFTDH(ring->reg_idx));
+	u32 tail = IXGBE_READ_REG(hw, IXGBE_VFTDT(ring->reg_idx));
+
+	if (head != tail)
+		return (head < tail) ?
+			tail - head : (tail + ring->count - head);
+
+	return 0;
+}
+
+static inline bool ixgbevf_check_tx_hang(struct ixgbevf_ring *tx_ring)
+{
+	u32 tx_done = ixgbevf_get_tx_completed(tx_ring);
+	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
+	u32 tx_pending = ixgbevf_get_tx_pending(tx_ring);
+
+	clear_check_for_tx_hang(tx_ring);
+
+	/* Check for a hung queue, but be thorough. This verifies
+	 * that a transmit has been completed since the previous
+	 * check AND there is at least one packet pending. The
+	 * ARMED bit is set to indicate a potential hang.
+	 */
+	if ((tx_done_old == tx_done) && tx_pending) {
+		/* make sure it is true for two checks in a row */
+		return test_and_set_bit(__IXGBEVF_HANG_CHECK_ARMED,
+					&tx_ring->state);
+	}
+	/* reset the countdown */
+	clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &tx_ring->state);
+
+	/* update completed stats and continue */
+	tx_ring->tx_stats.tx_done_old = tx_done;
+
+	return false;
+}
+
+static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter)
+{
+	/* Do the reset outside of interrupt context */
+	if (!test_bit(__IXGBEVF_DOWN, &adapter->state)) {
+		set_bit(__IXGBEVF_RESET_REQUESTED, &adapter->state);
+		ixgbevf_service_event_schedule(adapter);
+	}
+}
+
+/**
+ * ixgbevf_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ * @txqueue: transmit queue hanging (unused)
+ **/
+static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	ixgbevf_tx_timeout_reset(adapter);
+}
+
+/**
+ * ixgbevf_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: board private structure
+ * @tx_ring: tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ **/
+static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
+				 struct ixgbevf_ring *tx_ring, int napi_budget)
+{
+	struct ixgbevf_adapter *adapter = q_vector->adapter;
+	struct ixgbevf_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
+	unsigned int budget = tx_ring->count / 2;
+	unsigned int i = tx_ring->next_to_clean;
+
+	if (test_bit(__IXGBEVF_DOWN, &adapter->state))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+		if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
+			total_ipsec++;
+
+		/* free the skb */
+		if (ring_is_xdp(tx_ring))
+			page_frag_free(tx_buffer->data);
+		else
+			napi_consume_skb(tx_buffer->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+	adapter->tx_ipsec += total_ipsec;
+
+	if (check_for_tx_hang(tx_ring) && ixgbevf_check_tx_hang(tx_ring)) {
+		struct ixgbe_hw *hw = &adapter->hw;
+		union ixgbe_adv_tx_desc *eop_desc;
+
+		eop_desc = tx_ring->tx_buffer_info[i].next_to_watch;
+
+		pr_err("Detected Tx Unit Hang%s\n"
+		       "  Tx Queue             <%d>\n"
+		       "  TDH, TDT             <%x>, <%x>\n"
+		       "  next_to_use          <%x>\n"
+		       "  next_to_clean        <%x>\n"
+		       "tx_buffer_info[next_to_clean]\n"
+		       "  next_to_watch        <%p>\n"
+		       "  eop_desc->wb.status  <%x>\n"
+		       "  time_stamp           <%lx>\n"
+		       "  jiffies              <%lx>\n",
+		       ring_is_xdp(tx_ring) ? " XDP" : "",
+		       tx_ring->queue_index,
+		       IXGBE_READ_REG(hw, IXGBE_VFTDH(tx_ring->reg_idx)),
+		       IXGBE_READ_REG(hw, IXGBE_VFTDT(tx_ring->reg_idx)),
+		       tx_ring->next_to_use, i,
+		       eop_desc, (eop_desc ? eop_desc->wb.status : 0),
+		       tx_ring->tx_buffer_info[i].time_stamp, jiffies);
+
+		if (!ring_is_xdp(tx_ring))
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+
+		/* schedule immediate reset if we believe we hung */
+		ixgbevf_tx_timeout_reset(adapter);
+
+		return true;
+	}
+
+	if (ring_is_xdp(tx_ring))
+		return !!budget;
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+		     (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    !test_bit(__IXGBEVF_DOWN, &adapter->state)) {
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+			++tx_ring->tx_stats.restart_queue;
+		}
+	}
+
+	return !!budget;
+}
+
+/**
+ * ixgbevf_rx_skb - Helper function to determine proper Rx method
+ * @q_vector: structure containing interrupt and ring information
+ * @skb: packet to send up
+ **/
+static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector,
+			   struct sk_buff *skb)
+{
+	napi_gro_receive(&q_vector->napi, skb);
+}
+
+#define IXGBE_RSS_L4_TYPES_MASK \
+	((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \
+	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \
+	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \
+	 (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP))
+
+static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring,
+				   union ixgbe_adv_rx_desc *rx_desc,
+				   struct sk_buff *skb)
+{
+	u16 rss_type;
+
+	if (!(ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
+		   IXGBE_RXDADV_RSSTYPE_MASK;
+
+	if (!rss_type)
+		return;
+
+	skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
+		     (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
+		     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+}
+
+/**
+ * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @ring: structure containig ring specific data
+ * @rx_desc: current Rx descriptor being processed
+ * @skb: skb currently being received and modified
+ **/
+static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring,
+				       union ixgbe_adv_rx_desc *rx_desc,
+				       struct sk_buff *skb)
+{
+	skb_checksum_none_assert(skb);
+
+	/* Rx csum disabled */
+	if (!(ring->netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	/* if IP and error */
+	if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) &&
+	    ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) {
+		ring->rx_stats.csum_err++;
+		return;
+	}
+
+	if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS))
+		return;
+
+	if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) {
+		ring->rx_stats.csum_err++;
+		return;
+	}
+
+	/* It must be a TCP or UDP packet with a valid checksum */
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
+/**
+ * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ *
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the checksum, VLAN, protocol, and other fields within
+ * the skb.
+ **/
+static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
+				       union ixgbe_adv_rx_desc *rx_desc,
+				       struct sk_buff *skb)
+{
+	ixgbevf_rx_hash(rx_ring, rx_desc, skb);
+	ixgbevf_rx_checksum(rx_ring, rx_desc, skb);
+
+	if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
+		u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
+		unsigned long *active_vlans = netdev_priv(rx_ring->netdev);
+
+		if (test_bit(vid & VLAN_VID_MASK, active_vlans))
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
+	}
+
+	if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
+		ixgbevf_ipsec_rx(rx_ring, rx_desc, skb);
+
+	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+}
+
+static
+struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring,
+						const unsigned int size)
+{
+	struct ixgbevf_rx_buffer *rx_buffer;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	prefetchw(rx_buffer->page);
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *rx_buffer,
+				  struct sk_buff *skb)
+{
+	if (ixgbevf_can_reuse_rx_page(rx_buffer)) {
+		/* hand second half of page back to the ring */
+		ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		if (IS_ERR(skb))
+			/* We are not reusing the buffer so unmap it and free
+			 * any references we are holding to it
+			 */
+			dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+					     ixgbevf_rx_pg_size(rx_ring),
+					     DMA_FROM_DEVICE,
+					     IXGBEVF_RX_DMA_ATTR);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+}
+
+/**
+ * ixgbevf_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ *
+ * This function updates next to clean.  If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring,
+			       union ixgbe_adv_rx_desc *rx_desc)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(IXGBEVF_RX_DESC(rx_ring, ntc));
+
+	if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
+		return false;
+
+	return true;
+}
+
+static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring)
+{
+	return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0;
+}
+
+static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
+				      struct ixgbevf_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	/* alloc new page for storage */
+	page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring));
+	if (unlikely(!page)) {
+		rx_ring->rx_stats.alloc_rx_page_failed++;
+		return false;
+	}
+
+	/* map page for use */
+	dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+				 ixgbevf_rx_pg_size(rx_ring),
+				 DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
+
+	/* if mapping failed free memory back to system since
+	 * there isn't much point in holding memory we can't use
+	 */
+	if (dma_mapping_error(rx_ring->dev, dma)) {
+		__free_pages(page, ixgbevf_rx_pg_order(rx_ring));
+
+		rx_ring->rx_stats.alloc_rx_page_failed++;
+		return false;
+	}
+
+	bi->dma = dma;
+	bi->page = page;
+	bi->page_offset = ixgbevf_rx_offset(rx_ring);
+	bi->pagecnt_bias = 1;
+	rx_ring->rx_stats.alloc_rx_page++;
+
+	return true;
+}
+
+/**
+ * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
+				     u16 cleaned_count)
+{
+	union ixgbe_adv_rx_desc *rx_desc;
+	struct ixgbevf_rx_buffer *bi;
+	unsigned int i = rx_ring->next_to_use;
+
+	/* nothing to do or no valid netdev defined */
+	if (!cleaned_count || !rx_ring->netdev)
+		return;
+
+	rx_desc = IXGBEVF_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	do {
+		if (!ixgbevf_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 ixgbevf_rx_bufsz(rx_ring),
+						 DMA_FROM_DEVICE);
+
+		/* Refresh the desc even if pkt_addr didn't change
+		 * because each write-back erases this info.
+		 */
+		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = IXGBEVF_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the length for the next_to_use descriptor */
+		rx_desc->wb.upper.length = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		/* record the next descriptor to use */
+		rx_ring->next_to_use = i;
+
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		ixgbevf_write_tail(rx_ring, i);
+	}
+}
+
+/**
+ * ixgbevf_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right.  These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
+				    union ixgbe_adv_rx_desc *rx_desc,
+				    struct sk_buff *skb)
+{
+	/* XDP packets use error pointer so abort at this point */
+	if (IS_ERR(skb))
+		return true;
+
+	/* verify that the packet does not have any known errors */
+	if (unlikely(ixgbevf_test_staterr(rx_desc,
+					  IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
+		struct net_device *netdev = rx_ring->netdev;
+
+		if (!(netdev->features & NETIF_F_RXALL)) {
+			dev_kfree_skb_any(skb);
+			return true;
+		}
+	}
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * ixgbevf_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
+				  struct ixgbevf_rx_buffer *old_buff)
+{
+	struct ixgbevf_rx_buffer *new_buff;
+	u16 nta = rx_ring->next_to_alloc;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	new_buff->page = old_buff->page;
+	new_buff->dma = old_buff->dma;
+	new_buff->page_offset = old_buff->page_offset;
+	new_buff->pagecnt_bias = old_buff->pagecnt_bias;
+}
+
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* avoid re-using remote and pfmemalloc pages */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+		return false;
+#else
+#define IXGBEVF_LAST_OFFSET \
+	(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048)
+
+	if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET)
+		return false;
+
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(!pagecnt_bias)) {
+		page_ref_add(page, USHRT_MAX);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @skb: sk_buff to place the data into
+ * @size: size of buffer to be added
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ **/
+static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
+				struct ixgbevf_rx_buffer *rx_buffer,
+				struct sk_buff *skb,
+				unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
+				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
+				SKB_DATA_ALIGN(size);
+#endif
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+			rx_buffer->page_offset, size, truesize);
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+static
+struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring,
+				      struct ixgbevf_rx_buffer *rx_buffer,
+				      struct xdp_buff *xdp,
+				      union ixgbe_adv_rx_desc *rx_desc)
+{
+	unsigned int size = xdp->data_end - xdp->data;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	unsigned int headlen;
+	struct sk_buff *skb;
+
+	/* prefetch first cache line of first page */
+	net_prefetch(xdp->data);
+
+	/* Note, we get here by enabling legacy-rx via:
+	 *
+	 *    ethtool --set-priv-flags <dev> legacy-rx on
+	 *
+	 * In this mode, we currently get 0 extra XDP headroom as
+	 * opposed to having legacy-rx off, where we process XDP
+	 * packets going to stack via ixgbevf_build_skb().
+	 *
+	 * For ixgbevf_construct_skb() mode it means that the
+	 * xdp->data_meta will always point to xdp->data, since
+	 * the helper cannot expand the head. Should this ever
+	 * changed in future for legacy-rx mode on, then lets also
+	 * add xdp->data_meta handling here.
+	 */
+
+	/* allocate a skb to store the frags */
+	skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Determine available headroom for copy */
+	headlen = size;
+	if (headlen > IXGBEVF_RX_HDR_SIZE)
+		headlen = eth_get_headlen(skb->dev, xdp->data,
+					  IXGBEVF_RX_HDR_SIZE);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	memcpy(__skb_put(skb, headlen), xdp->data,
+	       ALIGN(headlen, sizeof(long)));
+
+	/* update all of the pointers */
+	size -= headlen;
+	if (size) {
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				(xdp->data + headlen) -
+					page_address(rx_buffer->page),
+				size, truesize);
+#if (PAGE_SIZE < 8192)
+		rx_buffer->page_offset ^= truesize;
+#else
+		rx_buffer->page_offset += truesize;
+#endif
+	} else {
+		rx_buffer->pagecnt_bias++;
+	}
+
+	return skb;
+}
+
+static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter,
+					     u32 qmask)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask);
+}
+
+static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring,
+					 struct ixgbevf_rx_buffer *rx_buffer,
+					 struct xdp_buff *xdp,
+					 union ixgbe_adv_rx_desc *rx_desc)
+{
+	unsigned int metasize = xdp->data - xdp->data_meta;
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+#else
+	unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+				SKB_DATA_ALIGN(xdp->data_end -
+					       xdp->data_hard_start);
+#endif
+	struct sk_buff *skb;
+
+	/* Prefetch first cache line of first page. If xdp->data_meta
+	 * is unused, this points to xdp->data, otherwise, we likely
+	 * have a consumer accessing first few bytes of meta data,
+	 * and then actual data.
+	 */
+	net_prefetch(xdp->data_meta);
+
+	/* build an skb around the page buffer */
+	skb = napi_build_skb(xdp->data_hard_start, truesize);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* update pointers within the skb to store the data */
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+	if (metasize)
+		skb_metadata_set(skb, metasize);
+
+	/* update buffer offset */
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
+
+#define IXGBEVF_XDP_PASS 0
+#define IXGBEVF_XDP_CONSUMED 1
+#define IXGBEVF_XDP_TX 2
+
+static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
+				 struct xdp_buff *xdp)
+{
+	struct ixgbevf_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	u32 len, cmd_type;
+	dma_addr_t dma;
+	u16 i;
+
+	len = xdp->data_end - xdp->data;
+
+	if (unlikely(!ixgbevf_desc_unused(ring)))
+		return IXGBEVF_XDP_CONSUMED;
+
+	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring->dev, dma))
+		return IXGBEVF_XDP_CONSUMED;
+
+	/* record the location of the first descriptor for this packet */
+	i = ring->next_to_use;
+	tx_buffer = &ring->tx_buffer_info[i];
+
+	dma_unmap_len_set(tx_buffer, len, len);
+	dma_unmap_addr_set(tx_buffer, dma, dma);
+	tx_buffer->data = xdp->data;
+	tx_buffer->bytecount = len;
+	tx_buffer->gso_segs = 1;
+	tx_buffer->protocol = 0;
+
+	/* Populate minimal context descriptor that will provide for the
+	 * fact that we are expected to process Ethernet frames.
+	 */
+	if (!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state)) {
+		struct ixgbe_adv_tx_context_desc *context_desc;
+
+		set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state);
+
+		context_desc = IXGBEVF_TX_CTXTDESC(ring, 0);
+		context_desc->vlan_macip_lens	=
+			cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT);
+		context_desc->fceof_saidx	= 0;
+		context_desc->type_tucmd_mlhl	=
+			cpu_to_le32(IXGBE_TXD_CMD_DEXT |
+				    IXGBE_ADVTXD_DTYP_CTXT);
+		context_desc->mss_l4len_idx	= 0;
+
+		i = 1;
+	}
+
+	/* put descriptor type bits */
+	cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+		   IXGBE_ADVTXD_DCMD_DEXT |
+		   IXGBE_ADVTXD_DCMD_IFCS;
+	cmd_type |= len | IXGBE_TXD_CMD;
+
+	tx_desc = IXGBEVF_TX_DESC(ring, i);
+	tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	tx_desc->read.olinfo_status =
+			cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) |
+				    IXGBE_ADVTXD_CC);
+
+	/* Avoid any potential race with cleanup */
+	smp_wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	i++;
+	if (i == ring->count)
+		i = 0;
+
+	tx_buffer->next_to_watch = tx_desc;
+	ring->next_to_use = i;
+
+	return IXGBEVF_XDP_TX;
+}
+
+static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
+				       struct ixgbevf_ring  *rx_ring,
+				       struct xdp_buff *xdp)
+{
+	int result = IXGBEVF_XDP_PASS;
+	struct ixgbevf_ring *xdp_ring;
+	struct bpf_prog *xdp_prog;
+	u32 act;
+
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		break;
+	case XDP_TX:
+		xdp_ring = adapter->xdp_ring[rx_ring->queue_index];
+		result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp);
+		if (result == IXGBEVF_XDP_CONSUMED)
+			goto out_failure;
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
+		fallthrough;
+	case XDP_ABORTED:
+out_failure:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		fallthrough; /* handle aborts by dropping packet */
+	case XDP_DROP:
+		result = IXGBEVF_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	return ERR_PTR(-result);
+}
+
+static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring,
+					      unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = ring_uses_build_skb(rx_ring) ?
+		SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
+static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
+				   struct ixgbevf_rx_buffer *rx_buffer,
+				   unsigned int size)
+{
+	unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size);
+
+#if (PAGE_SIZE < 8192)
+	rx_buffer->page_offset ^= truesize;
+#else
+	rx_buffer->page_offset += truesize;
+#endif
+}
+
+static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
+				struct ixgbevf_ring *rx_ring,
+				int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0;
+	struct ixgbevf_adapter *adapter = q_vector->adapter;
+	u16 cleaned_count = ixgbevf_desc_unused(rx_ring);
+	struct sk_buff *skb = rx_ring->skb;
+	bool xdp_xmit = false;
+	struct xdp_buff xdp;
+
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0);
+#endif
+	xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
+
+	while (likely(total_rx_packets < budget)) {
+		struct ixgbevf_rx_buffer *rx_buffer;
+		union ixgbe_adv_rx_desc *rx_desc;
+		unsigned int size;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) {
+			ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		size = le16_to_cpu(rx_desc->wb.upper.length);
+		if (!size)
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * RXD_STAT_DD bit is set
+		 */
+		rmb();
+
+		rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size);
+
+		/* retrieve a buffer from the ring */
+		if (!skb) {
+			unsigned int offset = ixgbevf_rx_offset(rx_ring);
+			unsigned char *hard_start;
+
+			hard_start = page_address(rx_buffer->page) +
+				     rx_buffer->page_offset - offset;
+			xdp_prepare_buff(&xdp, hard_start, offset, size, true);
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size);
+#endif
+			skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
+		}
+
+		if (IS_ERR(skb)) {
+			if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
+				xdp_xmit = true;
+				ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
+						       size);
+			} else {
+				rx_buffer->pagecnt_bias++;
+			}
+			total_rx_packets++;
+			total_rx_bytes += size;
+		} else if (skb) {
+			ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size);
+		} else if (ring_uses_build_skb(rx_ring)) {
+			skb = ixgbevf_build_skb(rx_ring, rx_buffer,
+						&xdp, rx_desc);
+		} else {
+			skb = ixgbevf_construct_skb(rx_ring, rx_buffer,
+						    &xdp, rx_desc);
+		}
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_ring->rx_stats.alloc_rx_buff_failed++;
+			rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb);
+		cleaned_count++;
+
+		/* fetch next buffer in frame if non-eop */
+		if (ixgbevf_is_non_eop(rx_ring, rx_desc))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
+			skb = NULL;
+			continue;
+		}
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		/* Workaround hardware that can't do proper VEPA multicast
+		 * source pruning.
+		 */
+		if ((skb->pkt_type == PACKET_BROADCAST ||
+		     skb->pkt_type == PACKET_MULTICAST) &&
+		    ether_addr_equal(rx_ring->netdev->dev_addr,
+				     eth_hdr(skb)->h_source)) {
+			dev_kfree_skb_irq(skb);
+			continue;
+		}
+
+		/* populate checksum, VLAN, and protocol */
+		ixgbevf_process_skb_fields(rx_ring, rx_desc, skb);
+
+		ixgbevf_rx_skb(q_vector, skb);
+
+		/* reset skb pointer */
+		skb = NULL;
+
+		/* update budget accounting */
+		total_rx_packets++;
+	}
+
+	/* place incomplete frames back on ring for completion */
+	rx_ring->skb = skb;
+
+	if (xdp_xmit) {
+		struct ixgbevf_ring *xdp_ring =
+			adapter->xdp_ring[rx_ring->queue_index];
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.
+		 */
+		wmb();
+		ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use);
+	}
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	q_vector->rx.total_packets += total_rx_packets;
+	q_vector->rx.total_bytes += total_rx_bytes;
+
+	return total_rx_packets;
+}
+
+/**
+ * ixgbevf_poll - NAPI polling calback
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean more than one or more rings associated with a
+ * q_vector.
+ **/
+static int ixgbevf_poll(struct napi_struct *napi, int budget)
+{
+	struct ixgbevf_q_vector *q_vector =
+		container_of(napi, struct ixgbevf_q_vector, napi);
+	struct ixgbevf_adapter *adapter = q_vector->adapter;
+	struct ixgbevf_ring *ring;
+	int per_ring_budget, work_done = 0;
+	bool clean_complete = true;
+
+	ixgbevf_for_each_ring(ring, q_vector->tx) {
+		if (!ixgbevf_clean_tx_irq(q_vector, ring, budget))
+			clean_complete = false;
+	}
+
+	if (budget <= 0)
+		return budget;
+
+	/* attempt to distribute budget to each queue fairly, but don't allow
+	 * the budget to go below 1 because we'll exit polling
+	 */
+	if (q_vector->rx.count > 1)
+		per_ring_budget = max(budget/q_vector->rx.count, 1);
+	else
+		per_ring_budget = budget;
+
+	ixgbevf_for_each_ring(ring, q_vector->rx) {
+		int cleaned = ixgbevf_clean_rx_irq(q_vector, ring,
+						   per_ring_budget);
+		work_done += cleaned;
+		if (cleaned >= per_ring_budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* Exit the polling mode, but don't re-enable interrupts if stack might
+	 * poll us due to busy-polling
+	 */
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (adapter->rx_itr_setting == 1)
+			ixgbevf_set_itr(q_vector);
+		if (!test_bit(__IXGBEVF_DOWN, &adapter->state) &&
+		    !test_bit(__IXGBEVF_REMOVING, &adapter->state))
+			ixgbevf_irq_enable_queues(adapter,
+						  BIT(q_vector->v_idx));
+	}
+
+	return min(work_done, budget - 1);
+}
+
+/**
+ * ixgbevf_write_eitr - write VTEITR register in hardware specific way
+ * @q_vector: structure containing interrupt and ring information
+ **/
+void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector)
+{
+	struct ixgbevf_adapter *adapter = q_vector->adapter;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int v_idx = q_vector->v_idx;
+	u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR;
+
+	/* set the WDIS bit to not clear the timer bits and cause an
+	 * immediate assertion of the interrupt
+	 */
+	itr_reg |= IXGBE_EITR_CNT_WDIS;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VTEITR(v_idx), itr_reg);
+}
+
+/**
+ * ixgbevf_configure_msix - Configure MSI-X hardware
+ * @adapter: board private structure
+ *
+ * ixgbevf_configure_msix sets up the hardware to properly generate MSI-X
+ * interrupts.
+ **/
+static void ixgbevf_configure_msix(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_q_vector *q_vector;
+	int q_vectors, v_idx;
+
+	q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	adapter->eims_enable_mask = 0;
+
+	/* Populate the IVAR table and set the ITR values to the
+	 * corresponding register.
+	 */
+	for (v_idx = 0; v_idx < q_vectors; v_idx++) {
+		struct ixgbevf_ring *ring;
+
+		q_vector = adapter->q_vector[v_idx];
+
+		ixgbevf_for_each_ring(ring, q_vector->rx)
+			ixgbevf_set_ivar(adapter, 0, ring->reg_idx, v_idx);
+
+		ixgbevf_for_each_ring(ring, q_vector->tx)
+			ixgbevf_set_ivar(adapter, 1, ring->reg_idx, v_idx);
+
+		if (q_vector->tx.ring && !q_vector->rx.ring) {
+			/* Tx only vector */
+			if (adapter->tx_itr_setting == 1)
+				q_vector->itr = IXGBE_12K_ITR;
+			else
+				q_vector->itr = adapter->tx_itr_setting;
+		} else {
+			/* Rx or Rx/Tx vector */
+			if (adapter->rx_itr_setting == 1)
+				q_vector->itr = IXGBE_20K_ITR;
+			else
+				q_vector->itr = adapter->rx_itr_setting;
+		}
+
+		/* add q_vector eims value to global eims_enable_mask */
+		adapter->eims_enable_mask |= BIT(v_idx);
+
+		ixgbevf_write_eitr(q_vector);
+	}
+
+	ixgbevf_set_ivar(adapter, -1, 1, v_idx);
+	/* setup eims_other and add value to global eims_enable_mask */
+	adapter->eims_other = BIT(v_idx);
+	adapter->eims_enable_mask |= adapter->eims_other;
+}
+
+enum latency_range {
+	lowest_latency = 0,
+	low_latency = 1,
+	bulk_latency = 2,
+	latency_invalid = 255
+};
+
+/**
+ * ixgbevf_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: structure containing interrupt and ring information
+ * @ring_container: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt.  The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern.  Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ **/
+static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector,
+			       struct ixgbevf_ring_container *ring_container)
+{
+	int bytes = ring_container->total_bytes;
+	int packets = ring_container->total_packets;
+	u32 timepassed_us;
+	u64 bytes_perint;
+	u8 itr_setting = ring_container->itr;
+
+	if (packets == 0)
+		return;
+
+	/* simple throttle rate management
+	 *    0-20MB/s lowest (100000 ints/s)
+	 *   20-100MB/s low   (20000 ints/s)
+	 *  100-1249MB/s bulk (12000 ints/s)
+	 */
+	/* what was last interrupt timeslice? */
+	timepassed_us = q_vector->itr >> 2;
+	if (timepassed_us == 0)
+		return;
+
+	bytes_perint = bytes / timepassed_us; /* bytes/usec */
+
+	switch (itr_setting) {
+	case lowest_latency:
+		if (bytes_perint > 10)
+			itr_setting = low_latency;
+		break;
+	case low_latency:
+		if (bytes_perint > 20)
+			itr_setting = bulk_latency;
+		else if (bytes_perint <= 10)
+			itr_setting = lowest_latency;
+		break;
+	case bulk_latency:
+		if (bytes_perint <= 20)
+			itr_setting = low_latency;
+		break;
+	}
+
+	/* clear work counters since we have the values we need */
+	ring_container->total_bytes = 0;
+	ring_container->total_packets = 0;
+
+	/* write updated itr to ring container */
+	ring_container->itr = itr_setting;
+}
+
+static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector)
+{
+	u32 new_itr = q_vector->itr;
+	u8 current_itr;
+
+	ixgbevf_update_itr(q_vector, &q_vector->tx);
+	ixgbevf_update_itr(q_vector, &q_vector->rx);
+
+	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+	switch (current_itr) {
+	/* counts and packets in update_itr are dependent on these numbers */
+	case lowest_latency:
+		new_itr = IXGBE_100K_ITR;
+		break;
+	case low_latency:
+		new_itr = IXGBE_20K_ITR;
+		break;
+	case bulk_latency:
+		new_itr = IXGBE_12K_ITR;
+		break;
+	default:
+		break;
+	}
+
+	if (new_itr != q_vector->itr) {
+		/* do an exponential smoothing */
+		new_itr = (10 * new_itr * q_vector->itr) /
+			  ((9 * new_itr) + q_vector->itr);
+
+		/* save the algorithm value here */
+		q_vector->itr = new_itr;
+
+		ixgbevf_write_eitr(q_vector);
+	}
+}
+
+static irqreturn_t ixgbevf_msix_other(int irq, void *data)
+{
+	struct ixgbevf_adapter *adapter = data;
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	hw->mac.get_link_status = 1;
+
+	ixgbevf_service_event_schedule(adapter);
+
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_other);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ixgbevf_msix_clean_rings - single unshared vector rx clean (all queues)
+ * @irq: unused
+ * @data: pointer to our q_vector struct for this interrupt vector
+ **/
+static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data)
+{
+	struct ixgbevf_q_vector *q_vector = data;
+
+	/* EIAM disabled interrupts (on this vector) for us */
+	if (q_vector->rx.ring || q_vector->tx.ring)
+		napi_schedule_irqoff(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts
+ * @adapter: board private structure
+ *
+ * ixgbevf_request_msix_irqs allocates MSI-X vectors and requests
+ * interrupts from the kernel.
+ **/
+static int ixgbevf_request_msix_irqs(struct ixgbevf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	unsigned int ri = 0, ti = 0;
+	int vector, err;
+
+	for (vector = 0; vector < q_vectors; vector++) {
+		struct ixgbevf_q_vector *q_vector = adapter->q_vector[vector];
+		struct msix_entry *entry = &adapter->msix_entries[vector];
+
+		if (q_vector->tx.ring && q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-TxRx-%u", netdev->name, ri++);
+			ti++;
+		} else if (q_vector->rx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-rx-%u", netdev->name, ri++);
+		} else if (q_vector->tx.ring) {
+			snprintf(q_vector->name, sizeof(q_vector->name),
+				 "%s-tx-%u", netdev->name, ti++);
+		} else {
+			/* skip this unused q_vector */
+			continue;
+		}
+		err = request_irq(entry->vector, &ixgbevf_msix_clean_rings, 0,
+				  q_vector->name, q_vector);
+		if (err) {
+			hw_dbg(&adapter->hw,
+			       "request_irq failed for MSIX interrupt Error: %d\n",
+			       err);
+			goto free_queue_irqs;
+		}
+	}
+
+	err = request_irq(adapter->msix_entries[vector].vector,
+			  &ixgbevf_msix_other, 0, netdev->name, adapter);
+	if (err) {
+		hw_dbg(&adapter->hw, "request_irq for msix_other failed: %d\n",
+		       err);
+		goto free_queue_irqs;
+	}
+
+	return 0;
+
+free_queue_irqs:
+	while (vector) {
+		vector--;
+		free_irq(adapter->msix_entries[vector].vector,
+			 adapter->q_vector[vector]);
+	}
+	/* This failure is non-recoverable - it indicates the system is
+	 * out of MSIX vector resources and the VF driver cannot run
+	 * without them.  Set the number of msix vectors to zero
+	 * indicating that not enough can be allocated.  The error
+	 * will be returned to the user indicating device open failed.
+	 * Any further attempts to force the driver to open will also
+	 * fail.  The only way to recover is to unload the driver and
+	 * reload it again.  If the system has recovered some MSIX
+	 * vectors then it may succeed.
+	 */
+	adapter->num_msix_vectors = 0;
+	return err;
+}
+
+/**
+ * ixgbevf_request_irq - initialize interrupts
+ * @adapter: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int ixgbevf_request_irq(struct ixgbevf_adapter *adapter)
+{
+	int err = ixgbevf_request_msix_irqs(adapter);
+
+	if (err)
+		hw_dbg(&adapter->hw, "request_irq failed, Error %d\n", err);
+
+	return err;
+}
+
+static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
+{
+	int i, q_vectors;
+
+	if (!adapter->msix_entries)
+		return;
+
+	q_vectors = adapter->num_msix_vectors;
+	i = q_vectors - 1;
+
+	free_irq(adapter->msix_entries[i].vector, adapter);
+	i--;
+
+	for (; i >= 0; i--) {
+		/* free only the irqs that were actually requested */
+		if (!adapter->q_vector[i]->rx.ring &&
+		    !adapter->q_vector[i]->tx.ring)
+			continue;
+
+		free_irq(adapter->msix_entries[i].vector,
+			 adapter->q_vector[i]);
+	}
+}
+
+/**
+ * ixgbevf_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ **/
+static inline void ixgbevf_irq_disable(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, ~0);
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, 0);
+
+	IXGBE_WRITE_FLUSH(hw);
+
+	for (i = 0; i < adapter->num_msix_vectors; i++)
+		synchronize_irq(adapter->msix_entries[i].vector);
+}
+
+/**
+ * ixgbevf_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ **/
+static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, adapter->eims_enable_mask);
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, adapter->eims_enable_mask);
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_enable_mask);
+}
+
+/**
+ * ixgbevf_configure_tx_ring - Configure 82599 VF Tx ring after Reset
+ * @adapter: board private structure
+ * @ring: structure containing ring specific data
+ *
+ * Configure the Tx descriptor ring after a reset.
+ **/
+static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter,
+				      struct ixgbevf_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 tdba = ring->dma;
+	int wait_loop = 10;
+	u32 txdctl = IXGBE_TXDCTL_ENABLE;
+	u8 reg_idx = ring->reg_idx;
+
+	/* disable queue to avoid issues while updating state */
+	IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+	IXGBE_WRITE_FLUSH(hw);
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFTDBAL(reg_idx), tdba & DMA_BIT_MASK(32));
+	IXGBE_WRITE_REG(hw, IXGBE_VFTDBAH(reg_idx), tdba >> 32);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTDLEN(reg_idx),
+			ring->count * sizeof(union ixgbe_adv_tx_desc));
+
+	/* disable head writeback */
+	IXGBE_WRITE_REG(hw, IXGBE_VFTDWBAH(reg_idx), 0);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTDWBAL(reg_idx), 0);
+
+	/* enable relaxed ordering */
+	IXGBE_WRITE_REG(hw, IXGBE_VFDCA_TXCTRL(reg_idx),
+			(IXGBE_DCA_TXCTRL_DESC_RRO_EN |
+			 IXGBE_DCA_TXCTRL_DATA_RRO_EN));
+
+	/* reset head and tail pointers */
+	IXGBE_WRITE_REG(hw, IXGBE_VFTDH(reg_idx), 0);
+	IXGBE_WRITE_REG(hw, IXGBE_VFTDT(reg_idx), 0);
+	ring->tail = adapter->io_addr + IXGBE_VFTDT(reg_idx);
+
+	/* reset ntu and ntc to place SW in sync with hardwdare */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+
+	/* In order to avoid issues WTHRESH + PTHRESH should always be equal
+	 * to or less than the number of on chip descriptors, which is
+	 * currently 40.
+	 */
+	txdctl |= (8 << 16);    /* WTHRESH = 8 */
+
+	/* Setting PTHRESH to 32 both improves performance */
+	txdctl |= (1u << 8) |    /* HTHRESH = 1 */
+		   32;           /* PTHRESH = 32 */
+
+	/* reinitialize tx_buffer_info */
+	memset(ring->tx_buffer_info, 0,
+	       sizeof(struct ixgbevf_tx_buffer) * ring->count);
+
+	clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state);
+	clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state);
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), txdctl);
+
+	/* poll to verify queue is enabled */
+	do {
+		usleep_range(1000, 2000);
+		txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(reg_idx));
+	}  while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
+	if (!wait_loop)
+		hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
+}
+
+/**
+ * ixgbevf_configure_tx - Configure 82599 VF Transmit Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter)
+{
+	u32 i;
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		ixgbevf_configure_tx_ring(adapter, adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbevf_configure_tx_ring(adapter, adapter->xdp_ring[i]);
+}
+
+#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT	2
+
+static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter,
+				     struct ixgbevf_ring *ring, int index)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 srrctl;
+
+	srrctl = IXGBE_SRRCTL_DROP_EN;
+
+	srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT;
+	if (ring_uses_large_buffer(ring))
+		srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	else
+		srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
+	srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl);
+}
+
+static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	/* PSRTYPE must be initialized in 82599 */
+	u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR |
+		      IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR |
+		      IXGBE_PSRTYPE_L2HDR;
+
+	if (adapter->num_rx_queues > 1)
+		psrtype |= BIT(29);
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
+}
+
+#define IXGBEVF_MAX_RX_DESC_POLL 10
+static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter,
+				     struct ixgbevf_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int wait_loop = IXGBEVF_MAX_RX_DESC_POLL;
+	u32 rxdctl;
+	u8 reg_idx = ring->reg_idx;
+
+	if (IXGBE_REMOVED(hw->hw_addr))
+		return;
+	rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(reg_idx));
+	rxdctl &= ~IXGBE_RXDCTL_ENABLE;
+
+	/* write value back with RXDCTL.ENABLE bit cleared */
+	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
+
+	/* the hardware may take up to 100us to really disable the Rx queue */
+	do {
+		udelay(10);
+		rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(reg_idx));
+	} while (--wait_loop && (rxdctl & IXGBE_RXDCTL_ENABLE));
+
+	if (!wait_loop)
+		pr_err("RXDCTL.ENABLE queue %d not cleared while polling\n",
+		       reg_idx);
+}
+
+static void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter,
+					 struct ixgbevf_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	int wait_loop = IXGBEVF_MAX_RX_DESC_POLL;
+	u32 rxdctl;
+	u8 reg_idx = ring->reg_idx;
+
+	if (IXGBE_REMOVED(hw->hw_addr))
+		return;
+	do {
+		usleep_range(1000, 2000);
+		rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(reg_idx));
+	} while (--wait_loop && !(rxdctl & IXGBE_RXDCTL_ENABLE));
+
+	if (!wait_loop)
+		pr_err("RXDCTL.ENABLE queue %d not set while polling\n",
+		       reg_idx);
+}
+
+/**
+ * ixgbevf_init_rss_key - Initialize adapter RSS key
+ * @adapter: device handle
+ *
+ * Allocates and initializes the RSS key if it is not allocated.
+ **/
+static inline int ixgbevf_init_rss_key(struct ixgbevf_adapter *adapter)
+{
+	u32 *rss_key;
+
+	if (!adapter->rss_key) {
+		rss_key = kzalloc(IXGBEVF_RSS_HASH_KEY_SIZE, GFP_KERNEL);
+		if (unlikely(!rss_key))
+			return -ENOMEM;
+
+		netdev_rss_key_fill(rss_key, IXGBEVF_RSS_HASH_KEY_SIZE);
+		adapter->rss_key = rss_key;
+	}
+
+	return 0;
+}
+
+static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vfmrqc = 0, vfreta = 0;
+	u16 rss_i = adapter->num_rx_queues;
+	u8 i, j;
+
+	/* Fill out hash function seeds */
+	for (i = 0; i < IXGBEVF_VFRSSRK_REGS; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_VFRSSRK(i), *(adapter->rss_key + i));
+
+	for (i = 0, j = 0; i < IXGBEVF_X550_VFRETA_SIZE; i++, j++) {
+		if (j == rss_i)
+			j = 0;
+
+		adapter->rss_indir_tbl[i] = j;
+
+		vfreta |= j << (i & 0x3) * 8;
+		if ((i & 3) == 3) {
+			IXGBE_WRITE_REG(hw, IXGBE_VFRETA(i >> 2), vfreta);
+			vfreta = 0;
+		}
+	}
+
+	/* Perform hash on these packet types */
+	vfmrqc |= IXGBE_VFMRQC_RSS_FIELD_IPV4 |
+		IXGBE_VFMRQC_RSS_FIELD_IPV4_TCP |
+		IXGBE_VFMRQC_RSS_FIELD_IPV6 |
+		IXGBE_VFMRQC_RSS_FIELD_IPV6_TCP;
+
+	vfmrqc |= IXGBE_VFMRQC_RSSEN;
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, vfmrqc);
+}
+
+static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
+				      struct ixgbevf_ring *ring)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	union ixgbe_adv_rx_desc *rx_desc;
+	u64 rdba = ring->dma;
+	u32 rxdctl;
+	u8 reg_idx = ring->reg_idx;
+
+	/* disable queue to avoid issues while updating state */
+	rxdctl = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(reg_idx));
+	ixgbevf_disable_rx_queue(adapter, ring);
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFRDBAL(reg_idx), rdba & DMA_BIT_MASK(32));
+	IXGBE_WRITE_REG(hw, IXGBE_VFRDBAH(reg_idx), rdba >> 32);
+	IXGBE_WRITE_REG(hw, IXGBE_VFRDLEN(reg_idx),
+			ring->count * sizeof(union ixgbe_adv_rx_desc));
+
+#ifndef CONFIG_SPARC
+	/* enable relaxed ordering */
+	IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx),
+			IXGBE_DCA_RXCTRL_DESC_RRO_EN);
+#else
+	IXGBE_WRITE_REG(hw, IXGBE_VFDCA_RXCTRL(reg_idx),
+			IXGBE_DCA_RXCTRL_DESC_RRO_EN |
+			IXGBE_DCA_RXCTRL_DATA_WRO_EN);
+#endif
+
+	/* reset head and tail pointers */
+	IXGBE_WRITE_REG(hw, IXGBE_VFRDH(reg_idx), 0);
+	IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0);
+	ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx);
+
+	/* initialize rx_buffer_info */
+	memset(ring->rx_buffer_info, 0,
+	       sizeof(struct ixgbevf_rx_buffer) * ring->count);
+
+	/* initialize Rx descriptor 0 */
+	rx_desc = IXGBEVF_RX_DESC(ring, 0);
+	rx_desc->wb.upper.length = 0;
+
+	/* reset ntu and ntc to place SW in sync with hardwdare */
+	ring->next_to_clean = 0;
+	ring->next_to_use = 0;
+	ring->next_to_alloc = 0;
+
+	ixgbevf_configure_srrctl(adapter, ring, reg_idx);
+
+	/* RXDCTL.RLPML does not work on 82599 */
+	if (adapter->hw.mac.type != ixgbe_mac_82599_vf) {
+		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
+			    IXGBE_RXDCTL_RLPML_EN);
+
+#if (PAGE_SIZE < 8192)
+		/* Limit the maximum frame size so we don't overrun the skb */
+		if (ring_uses_build_skb(ring) &&
+		    !ring_uses_large_buffer(ring))
+			rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB |
+				  IXGBE_RXDCTL_RLPML_EN;
+#endif
+	}
+
+	rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME;
+	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl);
+
+	ixgbevf_rx_desc_queue_enable(adapter, ring);
+	ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring));
+}
+
+static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter,
+				      struct ixgbevf_ring *rx_ring)
+{
+	struct net_device *netdev = adapter->netdev;
+	unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+
+	/* set build_skb and buffer size flags */
+	clear_ring_build_skb_enabled(rx_ring);
+	clear_ring_uses_large_buffer(rx_ring);
+
+	if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX)
+		return;
+
+	if (PAGE_SIZE < 8192)
+		if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB)
+			set_ring_uses_large_buffer(rx_ring);
+
+	/* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */
+	if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring))
+		return;
+
+	set_ring_build_skb_enabled(rx_ring);
+}
+
+/**
+ * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset
+ * @adapter: board private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+	int i, ret;
+
+	ixgbevf_setup_psrtype(adapter);
+	if (hw->mac.type >= ixgbe_mac_X550_vf)
+		ixgbevf_setup_vfmrqc(adapter);
+
+	spin_lock_bh(&adapter->mbx_lock);
+	/* notify the PF of our intent to use this size of frame */
+	ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
+	spin_unlock_bh(&adapter->mbx_lock);
+	if (ret)
+		dev_err(&adapter->pdev->dev,
+			"Failed to set MTU at %d\n", netdev->mtu);
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+		ixgbevf_set_rx_buffer_len(adapter, rx_ring);
+		ixgbevf_configure_rx_ring(adapter, rx_ring);
+	}
+}
+
+static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev,
+				   __be16 proto, u16 vid)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	/* add VID to filter table */
+	err = hw->mac.ops.set_vfta(hw, vid, 0, true);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	if (err) {
+		netdev_err(netdev, "VF could not set VLAN %d\n", vid);
+
+		/* translate error return types so error makes sense */
+		if (err == IXGBE_ERR_MBX)
+			return -EIO;
+
+		if (err == IXGBE_ERR_INVALID_ARGUMENT)
+			return -EACCES;
+	}
+
+	set_bit(vid, adapter->active_vlans);
+
+	return err;
+}
+
+static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev,
+				    __be16 proto, u16 vid)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	/* remove VID from filter table */
+	err = hw->mac.ops.set_vfta(hw, vid, 0, false);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	if (err)
+		netdev_err(netdev, "Could not remove VLAN %d\n", vid);
+
+	clear_bit(vid, adapter->active_vlans);
+
+	return err;
+}
+
+static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter)
+{
+	u16 vid;
+
+	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+		ixgbevf_vlan_rx_add_vid(adapter->netdev,
+					htons(ETH_P_8021Q), vid);
+}
+
+static int ixgbevf_write_uc_addr_list(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int count = 0;
+
+	if (!netdev_uc_empty(netdev)) {
+		struct netdev_hw_addr *ha;
+
+		netdev_for_each_uc_addr(ha, netdev) {
+			hw->mac.ops.set_uc_addr(hw, ++count, ha->addr);
+			udelay(200);
+		}
+	} else {
+		/* If the list is empty then send message to PF driver to
+		 * clear all MAC VLANs on this VF.
+		 */
+		hw->mac.ops.set_uc_addr(hw, 0, NULL);
+	}
+
+	return count;
+}
+
+/**
+ * ixgbevf_set_rx_mode - Multicast and unicast set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_method entry point is called whenever the multicast address
+ * list, unicast address list or the network interface flags are updated.
+ * This routine is responsible for configuring the hardware for proper
+ * multicast mode and configuring requested unicast filters.
+ **/
+static void ixgbevf_set_rx_mode(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned int flags = netdev->flags;
+	int xcast_mode;
+
+	/* request the most inclusive mode we need */
+	if (flags & IFF_PROMISC)
+		xcast_mode = IXGBEVF_XCAST_MODE_PROMISC;
+	else if (flags & IFF_ALLMULTI)
+		xcast_mode = IXGBEVF_XCAST_MODE_ALLMULTI;
+	else if (flags & (IFF_BROADCAST | IFF_MULTICAST))
+		xcast_mode = IXGBEVF_XCAST_MODE_MULTI;
+	else
+		xcast_mode = IXGBEVF_XCAST_MODE_NONE;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	hw->mac.ops.update_xcast_mode(hw, xcast_mode);
+
+	/* reprogram multicast list */
+	hw->mac.ops.update_mc_addr_list(hw, netdev);
+
+	ixgbevf_write_uc_addr_list(netdev);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+}
+
+static void ixgbevf_napi_enable_all(struct ixgbevf_adapter *adapter)
+{
+	int q_idx;
+	struct ixgbevf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+
+	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
+		q_vector = adapter->q_vector[q_idx];
+		napi_enable(&q_vector->napi);
+	}
+}
+
+static void ixgbevf_napi_disable_all(struct ixgbevf_adapter *adapter)
+{
+	int q_idx;
+	struct ixgbevf_q_vector *q_vector;
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+
+	for (q_idx = 0; q_idx < q_vectors; q_idx++) {
+		q_vector = adapter->q_vector[q_idx];
+		napi_disable(&q_vector->napi);
+	}
+}
+
+static int ixgbevf_configure_dcb(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned int def_q = 0;
+	unsigned int num_tcs = 0;
+	unsigned int num_rx_queues = adapter->num_rx_queues;
+	unsigned int num_tx_queues = adapter->num_tx_queues;
+	int err;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	/* fetch queue configuration from the PF */
+	err = ixgbevf_get_queues(hw, &num_tcs, &def_q);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	if (err)
+		return err;
+
+	if (num_tcs > 1) {
+		/* we need only one Tx queue */
+		num_tx_queues = 1;
+
+		/* update default Tx ring register index */
+		adapter->tx_ring[0]->reg_idx = def_q;
+
+		/* we need as many queues as traffic classes */
+		num_rx_queues = num_tcs;
+	}
+
+	/* if we have a bad config abort request queue reset */
+	if ((adapter->num_rx_queues != num_rx_queues) ||
+	    (adapter->num_tx_queues != num_tx_queues)) {
+		/* force mailbox timeout to prevent further messages */
+		hw->mbx.timeout = 0;
+
+		/* wait for watchdog to come around and bail us out */
+		set_bit(__IXGBEVF_QUEUE_RESET_REQUESTED, &adapter->state);
+	}
+
+	return 0;
+}
+
+static void ixgbevf_configure(struct ixgbevf_adapter *adapter)
+{
+	ixgbevf_configure_dcb(adapter);
+
+	ixgbevf_set_rx_mode(adapter->netdev);
+
+	ixgbevf_restore_vlan(adapter);
+	ixgbevf_ipsec_restore(adapter);
+
+	ixgbevf_configure_tx(adapter);
+	ixgbevf_configure_rx(adapter);
+}
+
+static void ixgbevf_save_reset_stats(struct ixgbevf_adapter *adapter)
+{
+	/* Only save pre-reset stats if there are some */
+	if (adapter->stats.vfgprc || adapter->stats.vfgptc) {
+		adapter->stats.saved_reset_vfgprc += adapter->stats.vfgprc -
+			adapter->stats.base_vfgprc;
+		adapter->stats.saved_reset_vfgptc += adapter->stats.vfgptc -
+			adapter->stats.base_vfgptc;
+		adapter->stats.saved_reset_vfgorc += adapter->stats.vfgorc -
+			adapter->stats.base_vfgorc;
+		adapter->stats.saved_reset_vfgotc += adapter->stats.vfgotc -
+			adapter->stats.base_vfgotc;
+		adapter->stats.saved_reset_vfmprc += adapter->stats.vfmprc -
+			adapter->stats.base_vfmprc;
+	}
+}
+
+static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	adapter->stats.last_vfgprc = IXGBE_READ_REG(hw, IXGBE_VFGPRC);
+	adapter->stats.last_vfgorc = IXGBE_READ_REG(hw, IXGBE_VFGORC_LSB);
+	adapter->stats.last_vfgorc |=
+		(((u64)(IXGBE_READ_REG(hw, IXGBE_VFGORC_MSB))) << 32);
+	adapter->stats.last_vfgptc = IXGBE_READ_REG(hw, IXGBE_VFGPTC);
+	adapter->stats.last_vfgotc = IXGBE_READ_REG(hw, IXGBE_VFGOTC_LSB);
+	adapter->stats.last_vfgotc |=
+		(((u64)(IXGBE_READ_REG(hw, IXGBE_VFGOTC_MSB))) << 32);
+	adapter->stats.last_vfmprc = IXGBE_READ_REG(hw, IXGBE_VFMPRC);
+
+	adapter->stats.base_vfgprc = adapter->stats.last_vfgprc;
+	adapter->stats.base_vfgorc = adapter->stats.last_vfgorc;
+	adapter->stats.base_vfgptc = adapter->stats.last_vfgptc;
+	adapter->stats.base_vfgotc = adapter->stats.last_vfgotc;
+	adapter->stats.base_vfmprc = adapter->stats.last_vfmprc;
+}
+
+static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	static const int api[] = {
+		ixgbe_mbox_api_15,
+		ixgbe_mbox_api_14,
+		ixgbe_mbox_api_13,
+		ixgbe_mbox_api_12,
+		ixgbe_mbox_api_11,
+		ixgbe_mbox_api_10,
+		ixgbe_mbox_api_unknown
+	};
+	int err, idx = 0;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	while (api[idx] != ixgbe_mbox_api_unknown) {
+		err = hw->mac.ops.negotiate_api_version(hw, api[idx]);
+		if (!err)
+			break;
+		idx++;
+	}
+
+	if (hw->api_version >= ixgbe_mbox_api_15) {
+		hw->mbx.ops.init_params(hw);
+		memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops,
+		       sizeof(struct ixgbe_mbx_operations));
+	}
+
+	spin_unlock_bh(&adapter->mbx_lock);
+}
+
+static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool state;
+
+	ixgbevf_configure_msix(adapter);
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	if (is_valid_ether_addr(hw->mac.addr))
+		hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0);
+	else
+		hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	state = adapter->link_state;
+	hw->mac.ops.get_link_state(hw, &adapter->link_state);
+	if (state && state != adapter->link_state)
+		dev_info(&pdev->dev, "VF is administratively disabled\n");
+
+	smp_mb__before_atomic();
+	clear_bit(__IXGBEVF_DOWN, &adapter->state);
+	ixgbevf_napi_enable_all(adapter);
+
+	/* clear any pending interrupts, may auto mask */
+	IXGBE_READ_REG(hw, IXGBE_VTEICR);
+	ixgbevf_irq_enable(adapter);
+
+	/* enable transmits */
+	netif_tx_start_all_queues(netdev);
+
+	ixgbevf_save_reset_stats(adapter);
+	ixgbevf_init_last_counter_stats(adapter);
+
+	hw->mac.get_link_status = 1;
+	mod_timer(&adapter->service_timer, jiffies);
+}
+
+void ixgbevf_up(struct ixgbevf_adapter *adapter)
+{
+	ixgbevf_configure(adapter);
+
+	ixgbevf_up_complete(adapter);
+}
+
+/**
+ * ixgbevf_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
+{
+	u16 i = rx_ring->next_to_clean;
+
+	/* Free Rx ring sk_buff */
+	if (rx_ring->skb) {
+		dev_kfree_skb(rx_ring->skb);
+		rx_ring->skb = NULL;
+	}
+
+	/* Free all the Rx ring pages */
+	while (i != rx_ring->next_to_alloc) {
+		struct ixgbevf_rx_buffer *rx_buffer;
+
+		rx_buffer = &rx_ring->rx_buffer_info[i];
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      rx_buffer->dma,
+					      rx_buffer->page_offset,
+					      ixgbevf_rx_bufsz(rx_ring),
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		dma_unmap_page_attrs(rx_ring->dev,
+				     rx_buffer->dma,
+				     ixgbevf_rx_pg_size(rx_ring),
+				     DMA_FROM_DEVICE,
+				     IXGBEVF_RX_DMA_ATTR);
+
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+
+		i++;
+		if (i == rx_ring->count)
+			i = 0;
+	}
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * ixgbevf_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring)
+{
+	u16 i = tx_ring->next_to_clean;
+	struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	while (i != tx_ring->next_to_use) {
+		union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
+
+		/* Free all the Tx ring sk_buffs */
+		if (ring_is_xdp(tx_ring))
+			page_frag_free(tx_buffer->data);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* check for eop_desc to determine the end of the packet */
+		eop_desc = tx_buffer->next_to_watch;
+		tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		i++;
+		if (unlikely(i == tx_ring->count)) {
+			i = 0;
+			tx_buffer = tx_ring->tx_buffer_info;
+		}
+	}
+
+	/* reset next_to_use and next_to_clean */
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+}
+
+/**
+ * ixgbevf_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void ixgbevf_clean_all_rx_rings(struct ixgbevf_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		ixgbevf_clean_rx_ring(adapter->rx_ring[i]);
+}
+
+/**
+ * ixgbevf_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @adapter: board private structure
+ **/
+static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		ixgbevf_clean_tx_ring(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbevf_clean_tx_ring(adapter->xdp_ring[i]);
+}
+
+void ixgbevf_down(struct ixgbevf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	/* signal that we are down to the interrupt handler */
+	if (test_and_set_bit(__IXGBEVF_DOWN, &adapter->state))
+		return; /* do nothing if already down */
+
+	/* disable all enabled Rx queues */
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		ixgbevf_disable_rx_queue(adapter, adapter->rx_ring[i]);
+
+	usleep_range(10000, 20000);
+
+	netif_tx_stop_all_queues(netdev);
+
+	/* call carrier off first to avoid false dev_watchdog timeouts */
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+
+	ixgbevf_irq_disable(adapter);
+
+	ixgbevf_napi_disable_all(adapter);
+
+	del_timer_sync(&adapter->service_timer);
+
+	/* disable transmits in the hardware now that interrupts are off */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		u8 reg_idx = adapter->tx_ring[i]->reg_idx;
+
+		IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx),
+				IXGBE_TXDCTL_SWFLSH);
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		u8 reg_idx = adapter->xdp_ring[i]->reg_idx;
+
+		IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx),
+				IXGBE_TXDCTL_SWFLSH);
+	}
+
+	if (!pci_channel_offline(adapter->pdev))
+		ixgbevf_reset(adapter);
+
+	ixgbevf_clean_all_tx_rings(adapter);
+	ixgbevf_clean_all_rx_rings(adapter);
+}
+
+void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter)
+{
+	while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state))
+		msleep(1);
+
+	ixgbevf_down(adapter);
+	pci_set_master(adapter->pdev);
+	ixgbevf_up(adapter);
+
+	clear_bit(__IXGBEVF_RESETTING, &adapter->state);
+}
+
+void ixgbevf_reset(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct net_device *netdev = adapter->netdev;
+
+	if (hw->mac.ops.reset_hw(hw)) {
+		hw_dbg(hw, "PF still resetting\n");
+	} else {
+		hw->mac.ops.init_hw(hw);
+		ixgbevf_negotiate_api(adapter);
+	}
+
+	if (is_valid_ether_addr(adapter->hw.mac.addr)) {
+		eth_hw_addr_set(netdev, adapter->hw.mac.addr);
+		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
+	}
+
+	adapter->last_reset = jiffies;
+}
+
+static int ixgbevf_acquire_msix_vectors(struct ixgbevf_adapter *adapter,
+					int vectors)
+{
+	int vector_threshold;
+
+	/* We'll want at least 2 (vector_threshold):
+	 * 1) TxQ[0] + RxQ[0] handler
+	 * 2) Other (Link Status Change, etc.)
+	 */
+	vector_threshold = MIN_MSIX_COUNT;
+
+	/* The more we get, the more we will assign to Tx/Rx Cleanup
+	 * for the separate queues...where Rx Cleanup >= Tx Cleanup.
+	 * Right now, we simply care about how many we'll get; we'll
+	 * set them up later while requesting irq's.
+	 */
+	vectors = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+					vector_threshold, vectors);
+
+	if (vectors < 0) {
+		dev_err(&adapter->pdev->dev,
+			"Unable to allocate MSI-X interrupts\n");
+		kfree(adapter->msix_entries);
+		adapter->msix_entries = NULL;
+		return vectors;
+	}
+
+	/* Adjust for only the vectors we'll use, which is minimum
+	 * of max_msix_q_vectors + NON_Q_VECTORS, or the number of
+	 * vectors we were allocated.
+	 */
+	adapter->num_msix_vectors = vectors;
+
+	return 0;
+}
+
+/**
+ * ixgbevf_set_num_queues - Allocate queues for device, feature dependent
+ * @adapter: board private structure to initialize
+ *
+ * This is the top level queue allocation routine.  The order here is very
+ * important, starting with the "most" number of features turned on at once,
+ * and ending with the smallest set of features.  This way large combinations
+ * can be allocated if they're turned on, and smaller combinations are the
+ * fall through conditions.
+ *
+ **/
+static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	unsigned int def_q = 0;
+	unsigned int num_tcs = 0;
+	int err;
+
+	/* Start with base case */
+	adapter->num_rx_queues = 1;
+	adapter->num_tx_queues = 1;
+	adapter->num_xdp_queues = 0;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	/* fetch queue configuration from the PF */
+	err = ixgbevf_get_queues(hw, &num_tcs, &def_q);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	if (err)
+		return;
+
+	/* we need as many queues as traffic classes */
+	if (num_tcs > 1) {
+		adapter->num_rx_queues = num_tcs;
+	} else {
+		u16 rss = min_t(u16, num_online_cpus(), IXGBEVF_MAX_RSS_QUEUES);
+
+		switch (hw->api_version) {
+		case ixgbe_mbox_api_11:
+		case ixgbe_mbox_api_12:
+		case ixgbe_mbox_api_13:
+		case ixgbe_mbox_api_14:
+		case ixgbe_mbox_api_15:
+			if (adapter->xdp_prog &&
+			    hw->mac.max_tx_queues == rss)
+				rss = rss > 3 ? 2 : 1;
+
+			adapter->num_rx_queues = rss;
+			adapter->num_tx_queues = rss;
+			adapter->num_xdp_queues = adapter->xdp_prog ? rss : 0;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+/**
+ * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported
+ * @adapter: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter)
+{
+	int vector, v_budget;
+
+	/* It's easy to be greedy for MSI-X vectors, but it really
+	 * doesn't do us much good if we have a lot more vectors
+	 * than CPU's.  So let's be conservative and only ask for
+	 * (roughly) the same number of vectors as there are CPU's.
+	 * The default is to use pairs of vectors.
+	 */
+	v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues);
+	v_budget = min_t(int, v_budget, num_online_cpus());
+	v_budget += NON_Q_VECTORS;
+
+	adapter->msix_entries = kcalloc(v_budget,
+					sizeof(struct msix_entry), GFP_KERNEL);
+	if (!adapter->msix_entries)
+		return -ENOMEM;
+
+	for (vector = 0; vector < v_budget; vector++)
+		adapter->msix_entries[vector].entry = vector;
+
+	/* A failure in MSI-X entry allocation isn't fatal, but the VF driver
+	 * does not support any other modes, so we will simply fail here. Note
+	 * that we clean up the msix_entries pointer else-where.
+	 */
+	return ixgbevf_acquire_msix_vectors(adapter, v_budget);
+}
+
+static void ixgbevf_add_ring(struct ixgbevf_ring *ring,
+			     struct ixgbevf_ring_container *head)
+{
+	ring->next = head->ring;
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: number of Tx rings for q vector
+ * @txr_idx: index of first Tx ring to assign
+ * @xdp_count: total number of XDP rings to allocate
+ * @xdp_idx: index of first XDP ring to allocate
+ * @rxr_count: number of Rx rings for q vector
+ * @rxr_idx: index of first Rx ring to assign
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx,
+				  int txr_count, int txr_idx,
+				  int xdp_count, int xdp_idx,
+				  int rxr_count, int rxr_idx)
+{
+	struct ixgbevf_q_vector *q_vector;
+	int reg_idx = txr_idx + xdp_idx;
+	struct ixgbevf_ring *ring;
+	int ring_count, size;
+
+	ring_count = txr_count + xdp_count + rxr_count;
+	size = sizeof(*q_vector) + (sizeof(*ring) * ring_count);
+
+	/* allocate q_vector and rings */
+	q_vector = kzalloc(size, GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll);
+
+	/* tie q_vector and adapter together */
+	adapter->q_vector[v_idx] = q_vector;
+	q_vector->adapter = adapter;
+	q_vector->v_idx = v_idx;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	while (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbevf_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = txr_idx;
+		ring->reg_idx = reg_idx;
+
+		/* assign ring to adapter */
+		adapter->tx_ring[txr_idx] = ring;
+
+		/* update count and index */
+		txr_count--;
+		txr_idx++;
+		reg_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (xdp_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbevf_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = xdp_idx;
+		ring->reg_idx = reg_idx;
+		set_ring_xdp(ring);
+
+		/* assign ring to adapter */
+		adapter->xdp_ring[xdp_idx] = ring;
+
+		/* update count and index */
+		xdp_count--;
+		xdp_idx++;
+		reg_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		ixgbevf_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
+		ring->count = adapter->rx_ring_count;
+		ring->queue_index = rxr_idx;
+		ring->reg_idx = rxr_idx;
+
+		/* assign ring to adapter */
+		adapter->rx_ring[rxr_idx] = ring;
+
+		/* update count and index */
+		rxr_count--;
+		rxr_idx++;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: index of vector in adapter struct
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx)
+{
+	struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx];
+	struct ixgbevf_ring *ring;
+
+	ixgbevf_for_each_ring(ring, q_vector->tx) {
+		if (ring_is_xdp(ring))
+			adapter->xdp_ring[ring->queue_index] = NULL;
+		else
+			adapter->tx_ring[ring->queue_index] = NULL;
+	}
+
+	ixgbevf_for_each_ring(ring, q_vector->rx)
+		adapter->rx_ring[ring->queue_index] = NULL;
+
+	adapter->q_vector[v_idx] = NULL;
+	netif_napi_del(&q_vector->napi);
+
+	/* ixgbevf_get_stats() might access the rings on this vector,
+	 * we must wait a grace period before freeing it.
+	 */
+	kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * ixgbevf_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter)
+{
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+	int rxr_remaining = adapter->num_rx_queues;
+	int txr_remaining = adapter->num_tx_queues;
+	int xdp_remaining = adapter->num_xdp_queues;
+	int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
+	int err;
+
+	if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
+		for (; rxr_remaining; v_idx++, q_vectors--) {
+			int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+
+			err = ixgbevf_alloc_q_vector(adapter, v_idx,
+						     0, 0, 0, 0, rqpv, rxr_idx);
+			if (err)
+				goto err_out;
+
+			/* update counts and index */
+			rxr_remaining -= rqpv;
+			rxr_idx += rqpv;
+		}
+	}
+
+	for (; q_vectors; v_idx++, q_vectors--) {
+		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors);
+		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors);
+		int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors);
+
+		err = ixgbevf_alloc_q_vector(adapter, v_idx,
+					     tqpv, txr_idx,
+					     xqpv, xdp_idx,
+					     rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		rxr_idx += rqpv;
+		txr_remaining -= tqpv;
+		txr_idx += tqpv;
+		xdp_remaining -= xqpv;
+		xdp_idx += xqpv;
+	}
+
+	return 0;
+
+err_out:
+	while (v_idx) {
+		v_idx--;
+		ixgbevf_free_q_vector(adapter, v_idx);
+	}
+
+	return -ENOMEM;
+}
+
+/**
+ * ixgbevf_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
+{
+	int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS;
+
+	while (q_vectors) {
+		q_vectors--;
+		ixgbevf_free_q_vector(adapter, q_vectors);
+	}
+}
+
+/**
+ * ixgbevf_reset_interrupt_capability - Reset MSIX setup
+ * @adapter: board private structure
+ *
+ **/
+static void ixgbevf_reset_interrupt_capability(struct ixgbevf_adapter *adapter)
+{
+	if (!adapter->msix_entries)
+		return;
+
+	pci_disable_msix(adapter->pdev);
+	kfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+}
+
+/**
+ * ixgbevf_init_interrupt_scheme - Determine if MSIX is supported and init
+ * @adapter: board private structure to initialize
+ *
+ **/
+static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter)
+{
+	int err;
+
+	/* Number of supported queues */
+	ixgbevf_set_num_queues(adapter);
+
+	err = ixgbevf_set_interrupt_capability(adapter);
+	if (err) {
+		hw_dbg(&adapter->hw,
+		       "Unable to setup interrupt capabilities\n");
+		goto err_set_interrupt;
+	}
+
+	err = ixgbevf_alloc_q_vectors(adapter);
+	if (err) {
+		hw_dbg(&adapter->hw, "Unable to allocate memory for queue vectors\n");
+		goto err_alloc_q_vectors;
+	}
+
+	hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count %u\n",
+	       (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
+	       adapter->num_rx_queues, adapter->num_tx_queues,
+	       adapter->num_xdp_queues);
+
+	set_bit(__IXGBEVF_DOWN, &adapter->state);
+
+	return 0;
+err_alloc_q_vectors:
+	ixgbevf_reset_interrupt_capability(adapter);
+err_set_interrupt:
+	return err;
+}
+
+/**
+ * ixgbevf_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @adapter: board private structure to clear interrupt scheme on
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter)
+{
+	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
+	adapter->num_rx_queues = 0;
+
+	ixgbevf_free_q_vectors(adapter);
+	ixgbevf_reset_interrupt_capability(adapter);
+}
+
+/**
+ * ixgbevf_sw_init - Initialize general software structures
+ * @adapter: board private structure to initialize
+ *
+ * ixgbevf_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct pci_dev *pdev = adapter->pdev;
+	struct net_device *netdev = adapter->netdev;
+	int err;
+
+	/* PCI config space info */
+	hw->vendor_id = pdev->vendor;
+	hw->device_id = pdev->device;
+	hw->revision_id = pdev->revision;
+	hw->subsystem_vendor_id = pdev->subsystem_vendor;
+	hw->subsystem_device_id = pdev->subsystem_device;
+
+	hw->mbx.ops.init_params(hw);
+
+	if (hw->mac.type >= ixgbe_mac_X550_vf) {
+		err = ixgbevf_init_rss_key(adapter);
+		if (err)
+			goto out;
+	}
+
+	/* assume legacy case in which PF would only give VF 2 queues */
+	hw->mac.max_tx_queues = 2;
+	hw->mac.max_rx_queues = 2;
+
+	/* lock to protect mailbox accesses */
+	spin_lock_init(&adapter->mbx_lock);
+
+	err = hw->mac.ops.reset_hw(hw);
+	if (err) {
+		dev_info(&pdev->dev,
+			 "PF still in reset state.  Is the PF interface up?\n");
+	} else {
+		err = hw->mac.ops.init_hw(hw);
+		if (err) {
+			pr_err("init_shared_code failed: %d\n", err);
+			goto out;
+		}
+		ixgbevf_negotiate_api(adapter);
+		err = hw->mac.ops.get_mac_addr(hw, hw->mac.addr);
+		if (err)
+			dev_info(&pdev->dev, "Error reading MAC address\n");
+		else if (is_zero_ether_addr(adapter->hw.mac.addr))
+			dev_info(&pdev->dev,
+				 "MAC address not assigned by administrator.\n");
+		eth_hw_addr_set(netdev, hw->mac.addr);
+	}
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		dev_info(&pdev->dev, "Assigning random MAC address\n");
+		eth_hw_addr_random(netdev);
+		ether_addr_copy(hw->mac.addr, netdev->dev_addr);
+		ether_addr_copy(hw->mac.perm_addr, netdev->dev_addr);
+	}
+
+	/* Enable dynamic interrupt throttling rates */
+	adapter->rx_itr_setting = 1;
+	adapter->tx_itr_setting = 1;
+
+	/* set default ring sizes */
+	adapter->tx_ring_count = IXGBEVF_DEFAULT_TXD;
+	adapter->rx_ring_count = IXGBEVF_DEFAULT_RXD;
+
+	adapter->link_state = true;
+
+	set_bit(__IXGBEVF_DOWN, &adapter->state);
+	return 0;
+
+out:
+	return err;
+}
+
+#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter)	\
+	{							\
+		u32 current_counter = IXGBE_READ_REG(hw, reg);	\
+		if (current_counter < last_counter)		\
+			counter += 0x100000000LL;		\
+		last_counter = current_counter;			\
+		counter &= 0xFFFFFFFF00000000LL;		\
+		counter |= current_counter;			\
+	}
+
+#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \
+	{								 \
+		u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb);	 \
+		u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb);	 \
+		u64 current_counter = (current_counter_msb << 32) |	 \
+			current_counter_lsb;				 \
+		if (current_counter < last_counter)			 \
+			counter += 0x1000000000LL;			 \
+		last_counter = current_counter;				 \
+		counter &= 0xFFFFFFF000000000LL;			 \
+		counter |= current_counter;				 \
+	}
+/**
+ * ixgbevf_update_stats - Update the board statistics counters.
+ * @adapter: board private structure
+ **/
+void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+	u64 alloc_rx_page = 0, hw_csum_rx_error = 0;
+	int i;
+
+	if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
+	    test_bit(__IXGBEVF_RESETTING, &adapter->state))
+		return;
+
+	UPDATE_VF_COUNTER_32bit(IXGBE_VFGPRC, adapter->stats.last_vfgprc,
+				adapter->stats.vfgprc);
+	UPDATE_VF_COUNTER_32bit(IXGBE_VFGPTC, adapter->stats.last_vfgptc,
+				adapter->stats.vfgptc);
+	UPDATE_VF_COUNTER_36bit(IXGBE_VFGORC_LSB, IXGBE_VFGORC_MSB,
+				adapter->stats.last_vfgorc,
+				adapter->stats.vfgorc);
+	UPDATE_VF_COUNTER_36bit(IXGBE_VFGOTC_LSB, IXGBE_VFGOTC_MSB,
+				adapter->stats.last_vfgotc,
+				adapter->stats.vfgotc);
+	UPDATE_VF_COUNTER_32bit(IXGBE_VFMPRC, adapter->stats.last_vfmprc,
+				adapter->stats.vfmprc);
+
+	for (i = 0;  i  < adapter->num_rx_queues;  i++) {
+		struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+		hw_csum_rx_error += rx_ring->rx_stats.csum_err;
+		alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
+		alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
+		alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
+	}
+
+	adapter->hw_csum_rx_error = hw_csum_rx_error;
+	adapter->alloc_rx_page_failed = alloc_rx_page_failed;
+	adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
+	adapter->alloc_rx_page = alloc_rx_page;
+}
+
+/**
+ * ixgbevf_service_timer - Timer Call-back
+ * @t: pointer to timer_list struct
+ **/
+static void ixgbevf_service_timer(struct timer_list *t)
+{
+	struct ixgbevf_adapter *adapter = from_timer(adapter, t,
+						     service_timer);
+
+	/* Reset the timer */
+	mod_timer(&adapter->service_timer, (HZ * 2) + jiffies);
+
+	ixgbevf_service_event_schedule(adapter);
+}
+
+static void ixgbevf_reset_subtask(struct ixgbevf_adapter *adapter)
+{
+	if (!test_and_clear_bit(__IXGBEVF_RESET_REQUESTED, &adapter->state))
+		return;
+
+	rtnl_lock();
+	/* If we're already down or resetting, just bail */
+	if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
+	    test_bit(__IXGBEVF_REMOVING, &adapter->state) ||
+	    test_bit(__IXGBEVF_RESETTING, &adapter->state)) {
+		rtnl_unlock();
+		return;
+	}
+
+	adapter->tx_timeout_count++;
+
+	ixgbevf_reinit_locked(adapter);
+	rtnl_unlock();
+}
+
+/**
+ * ixgbevf_check_hang_subtask - check for hung queues and dropped interrupts
+ * @adapter: pointer to the device adapter structure
+ *
+ * This function serves two purposes.  First it strobes the interrupt lines
+ * in order to make certain interrupts are occurring.  Secondly it sets the
+ * bits needed to check for TX hangs.  As a result we should immediately
+ * determine if a hang has occurred.
+ **/
+static void ixgbevf_check_hang_subtask(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 eics = 0;
+	int i;
+
+	/* If we're down or resetting, just bail */
+	if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
+	    test_bit(__IXGBEVF_RESETTING, &adapter->state))
+		return;
+
+	/* Force detection of hung controller */
+	if (netif_carrier_ok(adapter->netdev)) {
+		for (i = 0; i < adapter->num_tx_queues; i++)
+			set_check_for_tx_hang(adapter->tx_ring[i]);
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			set_check_for_tx_hang(adapter->xdp_ring[i]);
+	}
+
+	/* get one bit for every active Tx/Rx interrupt vector */
+	for (i = 0; i < adapter->num_msix_vectors - NON_Q_VECTORS; i++) {
+		struct ixgbevf_q_vector *qv = adapter->q_vector[i];
+
+		if (qv->rx.ring || qv->tx.ring)
+			eics |= BIT(i);
+	}
+
+	/* Cause software interrupt to ensure rings are cleaned */
+	IXGBE_WRITE_REG(hw, IXGBE_VTEICS, eics);
+}
+
+/**
+ * ixgbevf_watchdog_update_link - update the link status
+ * @adapter: pointer to the device adapter structure
+ **/
+static void ixgbevf_watchdog_update_link(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 link_speed = adapter->link_speed;
+	bool link_up = adapter->link_up;
+	s32 err;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	err = hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	/* if check for link returns error we will need to reset */
+	if (err && time_after(jiffies, adapter->last_reset + (10 * HZ))) {
+		set_bit(__IXGBEVF_RESET_REQUESTED, &adapter->state);
+		link_up = false;
+	}
+
+	adapter->link_up = link_up;
+	adapter->link_speed = link_speed;
+}
+
+/**
+ * ixgbevf_watchdog_link_is_up - update netif_carrier status and
+ *				 print link up message
+ * @adapter: pointer to the device adapter structure
+ **/
+static void ixgbevf_watchdog_link_is_up(struct ixgbevf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	/* only continue if link was previously down */
+	if (netif_carrier_ok(netdev))
+		return;
+
+	dev_info(&adapter->pdev->dev, "NIC Link is Up %s\n",
+		 (adapter->link_speed == IXGBE_LINK_SPEED_10GB_FULL) ?
+		 "10 Gbps" :
+		 (adapter->link_speed == IXGBE_LINK_SPEED_1GB_FULL) ?
+		 "1 Gbps" :
+		 (adapter->link_speed == IXGBE_LINK_SPEED_100_FULL) ?
+		 "100 Mbps" :
+		 "unknown speed");
+
+	netif_carrier_on(netdev);
+}
+
+/**
+ * ixgbevf_watchdog_link_is_down - update netif_carrier status and
+ *				   print link down message
+ * @adapter: pointer to the adapter structure
+ **/
+static void ixgbevf_watchdog_link_is_down(struct ixgbevf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	adapter->link_speed = 0;
+
+	/* only continue if link was up previously */
+	if (!netif_carrier_ok(netdev))
+		return;
+
+	dev_info(&adapter->pdev->dev, "NIC Link is Down\n");
+
+	netif_carrier_off(netdev);
+}
+
+/**
+ * ixgbevf_watchdog_subtask - worker thread to bring link up
+ * @adapter: board private structure
+ **/
+static void ixgbevf_watchdog_subtask(struct ixgbevf_adapter *adapter)
+{
+	/* if interface is down do nothing */
+	if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
+	    test_bit(__IXGBEVF_RESETTING, &adapter->state))
+		return;
+
+	ixgbevf_watchdog_update_link(adapter);
+
+	if (adapter->link_up && adapter->link_state)
+		ixgbevf_watchdog_link_is_up(adapter);
+	else
+		ixgbevf_watchdog_link_is_down(adapter);
+
+	ixgbevf_update_stats(adapter);
+}
+
+/**
+ * ixgbevf_service_task - manages and runs subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void ixgbevf_service_task(struct work_struct *work)
+{
+	struct ixgbevf_adapter *adapter = container_of(work,
+						       struct ixgbevf_adapter,
+						       service_task);
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (IXGBE_REMOVED(hw->hw_addr)) {
+		if (!test_bit(__IXGBEVF_DOWN, &adapter->state)) {
+			rtnl_lock();
+			ixgbevf_down(adapter);
+			rtnl_unlock();
+		}
+		return;
+	}
+
+	ixgbevf_queue_reset_subtask(adapter);
+	ixgbevf_reset_subtask(adapter);
+	ixgbevf_watchdog_subtask(adapter);
+	ixgbevf_check_hang_subtask(adapter);
+
+	ixgbevf_service_event_complete(adapter);
+}
+
+/**
+ * ixgbevf_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring)
+{
+	ixgbevf_clean_tx_ring(tx_ring);
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc,
+			  tx_ring->dma);
+
+	tx_ring->desc = NULL;
+}
+
+/**
+ * ixgbevf_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ **/
+static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++)
+		if (adapter->tx_ring[i]->desc)
+			ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		if (adapter->xdp_ring[i]->desc)
+			ixgbevf_free_tx_resources(adapter->xdp_ring[i]);
+}
+
+/**
+ * ixgbevf_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: Tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
+	int size;
+
+	size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
+	tx_ring->tx_buffer_info = vmalloc(size);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	u64_stats_init(&tx_ring->syncp);
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc)
+		goto err;
+
+	return 0;
+
+err:
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+	hw_dbg(&adapter->hw, "Unable to allocate memory for the transmit descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * ixgbevf_setup_all_tx_resources - allocate all queues Tx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter)
+{
+	int i, j = 0, err = 0;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		err = ixgbevf_setup_tx_resources(adapter->tx_ring[i]);
+		if (!err)
+			continue;
+		hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i);
+		goto err_setup_tx;
+	}
+
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		err = ixgbevf_setup_tx_resources(adapter->xdp_ring[j]);
+		if (!err)
+			continue;
+		hw_dbg(&adapter->hw, "Allocation for XDP Queue %u failed\n", j);
+		goto err_setup_tx;
+	}
+
+	return 0;
+err_setup_tx:
+	/* rewind the index freeing the rings as we go */
+	while (j--)
+		ixgbevf_free_tx_resources(adapter->xdp_ring[j]);
+	while (i--)
+		ixgbevf_free_tx_resources(adapter->tx_ring[i]);
+
+	return err;
+}
+
+/**
+ * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
+ * @rx_ring: Rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,
+			       struct ixgbevf_ring *rx_ring)
+{
+	int size;
+
+	size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
+	rx_ring->rx_buffer_info = vmalloc(size);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	u64_stats_init(&rx_ring->syncp);
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	rx_ring->desc = dma_alloc_coherent(rx_ring->dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+
+	if (!rx_ring->desc)
+		goto err;
+
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+			     rx_ring->queue_index, 0) < 0)
+		goto err;
+
+	rx_ring->xdp_prog = adapter->xdp_prog;
+
+	return 0;
+err:
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * ixgbevf_setup_all_rx_resources - allocate all queues Rx resources
+ * @adapter: board private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter)
+{
+	int i, err = 0;
+
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		err = ixgbevf_setup_rx_resources(adapter, adapter->rx_ring[i]);
+		if (!err)
+			continue;
+		hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i);
+		goto err_setup_rx;
+	}
+
+	return 0;
+err_setup_rx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ixgbevf_free_rx_resources(adapter->rx_ring[i]);
+	return err;
+}
+
+/**
+ * ixgbevf_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring)
+{
+	ixgbevf_clean_rx_ring(rx_ring);
+
+	rx_ring->xdp_prog = NULL;
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size, rx_ring->desc,
+			  rx_ring->dma);
+
+	rx_ring->desc = NULL;
+}
+
+/**
+ * ixgbevf_free_all_rx_resources - Free Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ **/
+static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		if (adapter->rx_ring[i]->desc)
+			ixgbevf_free_rx_resources(adapter->rx_ring[i]);
+}
+
+/**
+ * ixgbevf_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ **/
+int ixgbevf_open(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int err;
+
+	/* A previous failure to open the device because of a lack of
+	 * available MSIX vector resources may have reset the number
+	 * of msix vectors variable to zero.  The only way to recover
+	 * is to unload/reload the driver and hope that the system has
+	 * been able to recover some MSIX vector resources.
+	 */
+	if (!adapter->num_msix_vectors)
+		return -ENOMEM;
+
+	if (hw->adapter_stopped) {
+		ixgbevf_reset(adapter);
+		/* if adapter is still stopped then PF isn't up and
+		 * the VF can't start.
+		 */
+		if (hw->adapter_stopped) {
+			err = IXGBE_ERR_MBX;
+			pr_err("Unable to start - perhaps the PF Driver isn't up yet\n");
+			goto err_setup_reset;
+		}
+	}
+
+	/* disallow open during test */
+	if (test_bit(__IXGBEVF_TESTING, &adapter->state))
+		return -EBUSY;
+
+	netif_carrier_off(netdev);
+
+	/* allocate transmit descriptors */
+	err = ixgbevf_setup_all_tx_resources(adapter);
+	if (err)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	err = ixgbevf_setup_all_rx_resources(adapter);
+	if (err)
+		goto err_setup_rx;
+
+	ixgbevf_configure(adapter);
+
+	err = ixgbevf_request_irq(adapter);
+	if (err)
+		goto err_req_irq;
+
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
+	if (err)
+		goto err_set_queues;
+
+	err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
+	if (err)
+		goto err_set_queues;
+
+	ixgbevf_up_complete(adapter);
+
+	return 0;
+
+err_set_queues:
+	ixgbevf_free_irq(adapter);
+err_req_irq:
+	ixgbevf_free_all_rx_resources(adapter);
+err_setup_rx:
+	ixgbevf_free_all_tx_resources(adapter);
+err_setup_tx:
+	ixgbevf_reset(adapter);
+err_setup_reset:
+
+	return err;
+}
+
+/**
+ * ixgbevf_close_suspend - actions necessary to both suspend and close flows
+ * @adapter: the private adapter struct
+ *
+ * This function should contain the necessary work common to both suspending
+ * and closing of the device.
+ */
+static void ixgbevf_close_suspend(struct ixgbevf_adapter *adapter)
+{
+	ixgbevf_down(adapter);
+	ixgbevf_free_irq(adapter);
+	ixgbevf_free_all_tx_resources(adapter);
+	ixgbevf_free_all_rx_resources(adapter);
+}
+
+/**
+ * ixgbevf_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ **/
+int ixgbevf_close(struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	if (netif_device_present(netdev))
+		ixgbevf_close_suspend(adapter);
+
+	return 0;
+}
+
+static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter)
+{
+	struct net_device *dev = adapter->netdev;
+
+	if (!test_and_clear_bit(__IXGBEVF_QUEUE_RESET_REQUESTED,
+				&adapter->state))
+		return;
+
+	/* if interface is down do nothing */
+	if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
+	    test_bit(__IXGBEVF_RESETTING, &adapter->state))
+		return;
+
+	/* Hardware has to reinitialize queues and interrupts to
+	 * match packet buffer alignment. Unfortunately, the
+	 * hardware is not flexible enough to do this dynamically.
+	 */
+	rtnl_lock();
+
+	if (netif_running(dev))
+		ixgbevf_close(dev);
+
+	ixgbevf_clear_interrupt_scheme(adapter);
+	ixgbevf_init_interrupt_scheme(adapter);
+
+	if (netif_running(dev))
+		ixgbevf_open(dev);
+
+	rtnl_unlock();
+}
+
+static void ixgbevf_tx_ctxtdesc(struct ixgbevf_ring *tx_ring,
+				u32 vlan_macip_lens, u32 fceof_saidx,
+				u32 type_tucmd, u32 mss_l4len_idx)
+{
+	struct ixgbe_adv_tx_context_desc *context_desc;
+	u16 i = tx_ring->next_to_use;
+
+	context_desc = IXGBEVF_TX_CTXTDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	/* set bits to identify this as an advanced context descriptor */
+	type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
+
+	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
+	context_desc->fceof_saidx	= cpu_to_le32(fceof_saidx);
+	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
+	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
+}
+
+static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
+		       struct ixgbevf_tx_buffer *first,
+		       u8 *hdr_len,
+		       struct ixgbevf_ipsec_tx_data *itd)
+{
+	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
+	struct sk_buff *skb = first->skb;
+	union {
+		struct iphdr *v4;
+		struct ipv6hdr *v6;
+		unsigned char *hdr;
+	} ip;
+	union {
+		struct tcphdr *tcp;
+		unsigned char *hdr;
+	} l4;
+	u32 paylen, l4_offset;
+	u32 fceof_saidx = 0;
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	if (eth_p_mpls(first->protocol))
+		ip.hdr = skb_inner_network_header(skb);
+	else
+		ip.hdr = skb_network_header(skb);
+	l4.hdr = skb_checksum_start(skb);
+
+	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
+	type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP;
+
+	/* initialize outer IP header fields */
+	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+		int len = csum_start - trans_start;
+
+		/* IP header will have to cancel out any data that
+		 * is not a part of the outer IP header, so set to
+		 * a reverse csum if needed, else init check to 0.
+		 */
+		ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+					   csum_fold(csum_partial(trans_start,
+								  len, 0)) : 0;
+		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
+
+		ip.v4->tot_len = 0;
+		first->tx_flags |= IXGBE_TX_FLAGS_TSO |
+				   IXGBE_TX_FLAGS_CSUM |
+				   IXGBE_TX_FLAGS_IPV4;
+	} else {
+		ip.v6->payload_len = 0;
+		first->tx_flags |= IXGBE_TX_FLAGS_TSO |
+				   IXGBE_TX_FLAGS_CSUM;
+	}
+
+	/* determine offset of inner transport header */
+	l4_offset = l4.hdr - skb->data;
+
+	/* compute length of segmentation header */
+	*hdr_len = (l4.tcp->doff * 4) + l4_offset;
+
+	/* remove payload length from inner checksum */
+	paylen = skb->len - l4_offset;
+	csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
+
+	/* update gso size and bytecount with header size */
+	first->gso_segs = skb_shinfo(skb)->gso_segs;
+	first->bytecount += (first->gso_segs - 1) * *hdr_len;
+
+	/* mss_l4len_id: use 1 as index for TSO */
+	mss_l4len_idx = (*hdr_len - l4_offset) << IXGBE_ADVTXD_L4LEN_SHIFT;
+	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
+	mss_l4len_idx |= (1u << IXGBE_ADVTXD_IDX_SHIFT);
+
+	fceof_saidx |= itd->pfsa;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
+	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
+	vlan_macip_lens = l4.hdr - ip.hdr;
+	vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
+
+	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
+			    mss_l4len_idx);
+
+	return 1;
+}
+
+static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
+			    struct ixgbevf_tx_buffer *first,
+			    struct ixgbevf_ipsec_tx_data *itd)
+{
+	struct sk_buff *skb = first->skb;
+	u32 vlan_macip_lens = 0;
+	u32 fceof_saidx = 0;
+	u32 type_tucmd = 0;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		goto no_csum;
+
+	switch (skb->csum_offset) {
+	case offsetof(struct tcphdr, check):
+		type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP;
+		fallthrough;
+	case offsetof(struct udphdr, check):
+		break;
+	case offsetof(struct sctphdr, checksum):
+		/* validate that this is actually an SCTP request */
+		if (skb_csum_is_sctp(skb)) {
+			type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_SCTP;
+			break;
+		}
+		fallthrough;
+	default:
+		skb_checksum_help(skb);
+		goto no_csum;
+	}
+
+	if (first->protocol == htons(ETH_P_IP))
+		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
+
+	/* update TX checksum flag */
+	first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
+	vlan_macip_lens = skb_checksum_start_offset(skb) -
+			  skb_network_offset(skb);
+no_csum:
+	/* vlan_macip_lens: MACLEN, VLAN tag */
+	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
+	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
+
+	fceof_saidx |= itd->pfsa;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
+	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens,
+			    fceof_saidx, type_tucmd, 0);
+}
+
+static __le32 ixgbevf_tx_cmd_type(u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	__le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA |
+				      IXGBE_ADVTXD_DCMD_IFCS |
+				      IXGBE_ADVTXD_DCMD_DEXT);
+
+	/* set HW VLAN bit if VLAN is present */
+	if (tx_flags & IXGBE_TX_FLAGS_VLAN)
+		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_VLE);
+
+	/* set segmentation enable bits for TSO/FSO */
+	if (tx_flags & IXGBE_TX_FLAGS_TSO)
+		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE);
+
+	return cmd_type;
+}
+
+static void ixgbevf_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
+				     u32 tx_flags, unsigned int paylen)
+{
+	__le32 olinfo_status = cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+	/* enable L4 checksum for TSO and TX checksum offload */
+	if (tx_flags & IXGBE_TX_FLAGS_CSUM)
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM);
+
+	/* enble IPv4 checksum for TSO */
+	if (tx_flags & IXGBE_TX_FLAGS_IPV4)
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM);
+
+	/* enable IPsec */
+	if (tx_flags & IXGBE_TX_FLAGS_IPSEC)
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IPSEC);
+
+	/* use index 1 context for TSO/FSO/FCOE/IPSEC */
+	if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_IPSEC))
+		olinfo_status |= cpu_to_le32(1u << IXGBE_ADVTXD_IDX_SHIFT);
+
+	/* Check Context must be set if Tx switch is enabled, which it
+	 * always is for case where virtual functions are running
+	 */
+	olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC);
+
+	tx_desc->read.olinfo_status = olinfo_status;
+}
+
+static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
+			   struct ixgbevf_tx_buffer *first,
+			   const u8 hdr_len)
+{
+	struct sk_buff *skb = first->skb;
+	struct ixgbevf_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+	unsigned int data_len, size;
+	u32 tx_flags = first->tx_flags;
+	__le32 cmd_type = ixgbevf_tx_cmd_type(tx_flags);
+	u16 i = tx_ring->next_to_use;
+
+	tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
+
+	ixgbevf_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += IXGBE_MAX_DATA_PER_TXD;
+			size -= IXGBE_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+		size = skb_frag_size(frag);
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= cpu_to_le32(size) | cpu_to_le32(IXGBE_TXD_CMD);
+	tx_desc->read.cmd_type_len = cmd_type;
+
+	/* set the timestamp */
+	first->time_stamp = jiffies;
+
+	skb_tx_timestamp(skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier (wmb) to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	/* notify HW of packet */
+	ixgbevf_write_tail(tx_ring, i);
+
+	return;
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	while (tx_buffer != first) {
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_page(tx_ring->dev,
+				       dma_unmap_addr(tx_buffer, dma),
+				       dma_unmap_len(tx_buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		if (i-- == 0)
+			i += tx_ring->count;
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	if (dma_unmap_len(tx_buffer, len))
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+	dma_unmap_len_set(tx_buffer, len, 0);
+
+	dev_kfree_skb_any(tx_buffer->skb);
+	tx_buffer->skb = NULL;
+
+	tx_ring->next_to_use = i;
+}
+
+static int __ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
+{
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	/* Herbert's original patch had:
+	 *  smp_mb__after_netif_stop_queue();
+	 * but since that doesn't exist yet, just open code it.
+	 */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (likely(ixgbevf_desc_unused(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_queue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+	++tx_ring->tx_stats.restart_queue;
+
+	return 0;
+}
+
+static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size)
+{
+	if (likely(ixgbevf_desc_unused(tx_ring) >= size))
+		return 0;
+	return __ixgbevf_maybe_stop_tx(tx_ring, size);
+}
+
+static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
+				   struct ixgbevf_ring *tx_ring)
+{
+	struct ixgbevf_tx_buffer *first;
+	int tso;
+	u32 tx_flags = 0;
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	struct ixgbevf_ipsec_tx_data ipsec_tx = { 0 };
+#if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD
+	unsigned short f;
+#endif
+	u8 hdr_len = 0;
+	u8 *dst_mac = skb_header_pointer(skb, 0, 0, NULL);
+
+	if (!dst_mac || is_link_local_ether_addr(dst_mac)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
+	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+#if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+		count += TXD_USE_COUNT(skb_frag_size(frag));
+	}
+#else
+	count += skb_shinfo(skb)->nr_frags;
+#endif
+	if (ixgbevf_maybe_stop_tx(tx_ring, count + 3)) {
+		tx_ring->tx_stats.tx_busy++;
+		return NETDEV_TX_BUSY;
+	}
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb);
+		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IXGBE_TX_FLAGS_VLAN;
+	}
+
+	/* record initial flags and protocol */
+	first->tx_flags = tx_flags;
+	first->protocol = vlan_get_protocol(skb);
+
+#ifdef CONFIG_IXGBEVF_IPSEC
+	if (xfrm_offload(skb) && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
+		goto out_drop;
+#endif
+	tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx);
+	if (tso < 0)
+		goto out_drop;
+	else if (!tso)
+		ixgbevf_tx_csum(tx_ring, first, &ipsec_tx);
+
+	ixgbevf_tx_map(tx_ring, first, hdr_len);
+
+	ixgbevf_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	return NETDEV_TX_OK;
+
+out_drop:
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+
+	return NETDEV_TX_OK;
+}
+
+static netdev_tx_t ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbevf_ring *tx_ring;
+
+	if (skb->len <= 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* The minimum packet size for olinfo paylen is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb->len < 17) {
+		if (skb_padto(skb, 17))
+			return NETDEV_TX_OK;
+		skb->len = 17;
+	}
+
+	tx_ring = adapter->tx_ring[skb->queue_mapping];
+	return ixgbevf_xmit_frame_ring(skb, tx_ring);
+}
+
+/**
+ * ixgbevf_set_mac - Change the Ethernet Address of the NIC
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ixgbevf_set_mac(struct net_device *netdev, void *p)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct sockaddr *addr = p;
+	int err;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	err = hw->mac.ops.set_rar(hw, 0, addr->sa_data, 0);
+
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	if (err)
+		return -EPERM;
+
+	ether_addr_copy(hw->mac.addr, addr->sa_data);
+	ether_addr_copy(hw->mac.perm_addr, addr->sa_data);
+	eth_hw_addr_set(netdev, addr->sa_data);
+
+	return 0;
+}
+
+/**
+ * ixgbevf_change_mtu - Change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	struct ixgbe_hw *hw = &adapter->hw;
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
+	int ret;
+
+	/* prevent MTU being changed to a size unsupported by XDP */
+	if (adapter->xdp_prog) {
+		dev_warn(&adapter->pdev->dev, "MTU cannot be changed while XDP program is loaded\n");
+		return -EPERM;
+	}
+
+	spin_lock_bh(&adapter->mbx_lock);
+	/* notify the PF of our intent to use this size of frame */
+	ret = hw->mac.ops.set_rlpml(hw, max_frame);
+	spin_unlock_bh(&adapter->mbx_lock);
+	if (ret)
+		return -EINVAL;
+
+	hw_dbg(hw, "changing MTU from %d to %d\n",
+	       netdev->mtu, new_mtu);
+
+	/* must set new MTU before calling down or up */
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		ixgbevf_reinit_locked(adapter);
+
+	return 0;
+}
+
+static int __maybe_unused ixgbevf_suspend(struct device *dev_d)
+{
+	struct net_device *netdev = dev_get_drvdata(dev_d);
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		ixgbevf_close_suspend(adapter);
+
+	ixgbevf_clear_interrupt_scheme(adapter);
+	rtnl_unlock();
+
+	return 0;
+}
+
+static int __maybe_unused ixgbevf_resume(struct device *dev_d)
+{
+	struct pci_dev *pdev = to_pci_dev(dev_d);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	u32 err;
+
+	adapter->hw.hw_addr = adapter->io_addr;
+	smp_mb__before_atomic();
+	clear_bit(__IXGBEVF_DISABLED, &adapter->state);
+	pci_set_master(pdev);
+
+	ixgbevf_reset(adapter);
+
+	rtnl_lock();
+	err = ixgbevf_init_interrupt_scheme(adapter);
+	if (!err && netif_running(netdev))
+		err = ixgbevf_open(netdev);
+	rtnl_unlock();
+	if (err)
+		return err;
+
+	netif_device_attach(netdev);
+
+	return err;
+}
+
+static void ixgbevf_shutdown(struct pci_dev *pdev)
+{
+	ixgbevf_suspend(&pdev->dev);
+}
+
+static void ixgbevf_get_tx_ring_stats(struct rtnl_link_stats64 *stats,
+				      const struct ixgbevf_ring *ring)
+{
+	u64 bytes, packets;
+	unsigned int start;
+
+	if (ring) {
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			bytes = ring->stats.bytes;
+			packets = ring->stats.packets;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		stats->tx_bytes += bytes;
+		stats->tx_packets += packets;
+	}
+}
+
+static void ixgbevf_get_stats(struct net_device *netdev,
+			      struct rtnl_link_stats64 *stats)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+	unsigned int start;
+	u64 bytes, packets;
+	const struct ixgbevf_ring *ring;
+	int i;
+
+	ixgbevf_update_stats(adapter);
+
+	stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc;
+
+	rcu_read_lock();
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		ring = adapter->rx_ring[i];
+		do {
+			start = u64_stats_fetch_begin(&ring->syncp);
+			bytes = ring->stats.bytes;
+			packets = ring->stats.packets;
+		} while (u64_stats_fetch_retry(&ring->syncp, start));
+		stats->rx_bytes += bytes;
+		stats->rx_packets += packets;
+	}
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		ring = adapter->tx_ring[i];
+		ixgbevf_get_tx_ring_stats(stats, ring);
+	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		ring = adapter->xdp_ring[i];
+		ixgbevf_get_tx_ring_stats(stats, ring);
+	}
+	rcu_read_unlock();
+}
+
+#define IXGBEVF_MAX_MAC_HDR_LEN		127
+#define IXGBEVF_MAX_NETWORK_HDR_LEN	511
+
+static netdev_features_t
+ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev,
+		       netdev_features_t features)
+{
+	unsigned int network_hdr_len, mac_hdr_len;
+
+	/* Make certain the headers can be described by a context descriptor */
+	mac_hdr_len = skb_network_header(skb) - skb->data;
+	if (unlikely(mac_hdr_len > IXGBEVF_MAX_MAC_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_HW_VLAN_CTAG_TX |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
+	if (unlikely(network_hdr_len >  IXGBEVF_MAX_NETWORK_HDR_LEN))
+		return features & ~(NETIF_F_HW_CSUM |
+				    NETIF_F_SCTP_CRC |
+				    NETIF_F_TSO |
+				    NETIF_F_TSO6);
+
+	/* We can only support IPV4 TSO in tunnels if we can mangle the
+	 * inner IP ID field, so strip TSO if MANGLEID is not supported.
+	 */
+	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
+		features &= ~NETIF_F_TSO;
+
+	return features;
+}
+
+static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+	struct bpf_prog *old_prog;
+
+	/* verify ixgbevf ring attributes are sufficient for XDP */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbevf_ring *ring = adapter->rx_ring[i];
+
+		if (frame_size > ixgbevf_rx_bufsz(ring))
+			return -EINVAL;
+	}
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+
+	/* If transitioning XDP modes reconfigure rings */
+	if (!!prog != !!old_prog) {
+		/* Hardware has to reinitialize queues and interrupts to
+		 * match packet buffer alignment. Unfortunately, the
+		 * hardware is not flexible enough to do this dynamically.
+		 */
+		if (netif_running(dev))
+			ixgbevf_close(dev);
+
+		ixgbevf_clear_interrupt_scheme(adapter);
+		ixgbevf_init_interrupt_scheme(adapter);
+
+		if (netif_running(dev))
+			ixgbevf_open(dev);
+	} else {
+		for (i = 0; i < adapter->num_rx_queues; i++)
+			xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+	}
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ixgbevf_xdp_setup(dev, xdp->prog);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct net_device_ops ixgbevf_netdev_ops = {
+	.ndo_open		= ixgbevf_open,
+	.ndo_stop		= ixgbevf_close,
+	.ndo_start_xmit		= ixgbevf_xmit_frame,
+	.ndo_set_rx_mode	= ixgbevf_set_rx_mode,
+	.ndo_get_stats64	= ixgbevf_get_stats,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= ixgbevf_set_mac,
+	.ndo_change_mtu		= ixgbevf_change_mtu,
+	.ndo_tx_timeout		= ixgbevf_tx_timeout,
+	.ndo_vlan_rx_add_vid	= ixgbevf_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= ixgbevf_vlan_rx_kill_vid,
+	.ndo_features_check	= ixgbevf_features_check,
+	.ndo_bpf		= ixgbevf_xdp,
+};
+
+static void ixgbevf_assign_netdev_ops(struct net_device *dev)
+{
+	dev->netdev_ops = &ixgbevf_netdev_ops;
+	ixgbevf_set_ethtool_ops(dev);
+	dev->watchdog_timeo = 5 * HZ;
+}
+
+/**
+ * ixgbevf_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ixgbevf_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ixgbevf_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ **/
+static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev;
+	struct ixgbevf_adapter *adapter = NULL;
+	struct ixgbe_hw *hw = NULL;
+	const struct ixgbevf_info *ii = ixgbevf_info_tbl[ent->driver_data];
+	bool disable_dev = false;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_err(&pdev->dev, "No usable DMA configuration, aborting\n");
+		goto err_dma;
+	}
+
+	err = pci_request_regions(pdev, ixgbevf_driver_name);
+	if (err) {
+		dev_err(&pdev->dev, "pci_request_regions failed 0x%x\n", err);
+		goto err_pci_reg;
+	}
+
+	pci_set_master(pdev);
+
+	netdev = alloc_etherdev_mq(sizeof(struct ixgbevf_adapter),
+				   MAX_TX_QUEUES);
+	if (!netdev) {
+		err = -ENOMEM;
+		goto err_alloc_etherdev;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	adapter = netdev_priv(netdev);
+
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+	hw = &adapter->hw;
+	hw->back = adapter;
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	/* call save state here in standalone driver because it relies on
+	 * adapter struct to exist, and needs to call netdev_priv
+	 */
+	pci_save_state(pdev);
+
+	hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+			      pci_resource_len(pdev, 0));
+	adapter->io_addr = hw->hw_addr;
+	if (!hw->hw_addr) {
+		err = -EIO;
+		goto err_ioremap;
+	}
+
+	ixgbevf_assign_netdev_ops(netdev);
+
+	/* Setup HW API */
+	memcpy(&hw->mac.ops, ii->mac_ops, sizeof(hw->mac.ops));
+	hw->mac.type  = ii->mac;
+
+	memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops_legacy,
+	       sizeof(struct ixgbe_mbx_operations));
+
+	/* setup the private structure */
+	err = ixgbevf_sw_init(adapter);
+	if (err)
+		goto err_sw_init;
+
+	/* The HW MAC address was set and/or determined in sw_init */
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		pr_err("invalid MAC address\n");
+		err = -EIO;
+		goto err_sw_init;
+	}
+
+	netdev->hw_features = NETIF_F_SG |
+			      NETIF_F_TSO |
+			      NETIF_F_TSO6 |
+			      NETIF_F_RXCSUM |
+			      NETIF_F_HW_CSUM |
+			      NETIF_F_SCTP_CRC;
+
+#define IXGBEVF_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+				      NETIF_F_GSO_GRE_CSUM | \
+				      NETIF_F_GSO_IPXIP4 | \
+				      NETIF_F_GSO_IPXIP6 | \
+				      NETIF_F_GSO_UDP_TUNNEL | \
+				      NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = IXGBEVF_GSO_PARTIAL_FEATURES;
+	netdev->hw_features |= NETIF_F_GSO_PARTIAL |
+			       IXGBEVF_GSO_PARTIAL_FEATURES;
+
+	netdev->features = netdev->hw_features | NETIF_F_HIGHDMA;
+
+	netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
+	netdev->mpls_features |= NETIF_F_SG |
+				 NETIF_F_TSO |
+				 NETIF_F_TSO6 |
+				 NETIF_F_HW_CSUM;
+	netdev->mpls_features |= IXGBEVF_GSO_PARTIAL_FEATURES;
+	netdev->hw_enc_features |= netdev->vlan_features;
+
+	/* set this bit last since it cannot be part of vlan_features */
+	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
+			    NETIF_F_HW_VLAN_CTAG_RX |
+			    NETIF_F_HW_VLAN_CTAG_TX;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->xdp_features = NETDEV_XDP_ACT_BASIC;
+
+	/* MTU range: 68 - 1504 or 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	switch (adapter->hw.api_version) {
+	case ixgbe_mbox_api_11:
+	case ixgbe_mbox_api_12:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_15:
+		netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
+				  (ETH_HLEN + ETH_FCS_LEN);
+		break;
+	default:
+		if (adapter->hw.mac.type != ixgbe_mac_82599_vf)
+			netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
+					  (ETH_HLEN + ETH_FCS_LEN);
+		else
+			netdev->max_mtu = ETH_DATA_LEN + ETH_FCS_LEN;
+		break;
+	}
+
+	if (IXGBE_REMOVED(hw->hw_addr)) {
+		err = -EIO;
+		goto err_sw_init;
+	}
+
+	timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0);
+
+	INIT_WORK(&adapter->service_task, ixgbevf_service_task);
+	set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state);
+	clear_bit(__IXGBEVF_SERVICE_SCHED, &adapter->state);
+
+	err = ixgbevf_init_interrupt_scheme(adapter);
+	if (err)
+		goto err_sw_init;
+
+	strcpy(netdev->name, "eth%d");
+
+	err = register_netdev(netdev);
+	if (err)
+		goto err_register;
+
+	pci_set_drvdata(pdev, netdev);
+	netif_carrier_off(netdev);
+	ixgbevf_init_ipsec_offload(adapter);
+
+	ixgbevf_init_last_counter_stats(adapter);
+
+	/* print the VF info */
+	dev_info(&pdev->dev, "%pM\n", netdev->dev_addr);
+	dev_info(&pdev->dev, "MAC: %d\n", hw->mac.type);
+
+	switch (hw->mac.type) {
+	case ixgbe_mac_X550_vf:
+		dev_info(&pdev->dev, "Intel(R) X550 Virtual Function\n");
+		break;
+	case ixgbe_mac_X540_vf:
+		dev_info(&pdev->dev, "Intel(R) X540 Virtual Function\n");
+		break;
+	case ixgbe_mac_82599_vf:
+	default:
+		dev_info(&pdev->dev, "Intel(R) 82599 Virtual Function\n");
+		break;
+	}
+
+	return 0;
+
+err_register:
+	ixgbevf_clear_interrupt_scheme(adapter);
+err_sw_init:
+	ixgbevf_reset_interrupt_capability(adapter);
+	iounmap(adapter->io_addr);
+	kfree(adapter->rss_key);
+err_ioremap:
+	disable_dev = !test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state);
+	free_netdev(netdev);
+err_alloc_etherdev:
+	pci_release_regions(pdev);
+err_pci_reg:
+err_dma:
+	if (!adapter || disable_dev)
+		pci_disable_device(pdev);
+	return err;
+}
+
+/**
+ * ixgbevf_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ixgbevf_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.  The could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void ixgbevf_remove(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgbevf_adapter *adapter;
+	bool disable_dev;
+
+	if (!netdev)
+		return;
+
+	adapter = netdev_priv(netdev);
+
+	set_bit(__IXGBEVF_REMOVING, &adapter->state);
+	cancel_work_sync(&adapter->service_task);
+
+	if (netdev->reg_state == NETREG_REGISTERED)
+		unregister_netdev(netdev);
+
+	ixgbevf_stop_ipsec_offload(adapter);
+	ixgbevf_clear_interrupt_scheme(adapter);
+	ixgbevf_reset_interrupt_capability(adapter);
+
+	iounmap(adapter->io_addr);
+	pci_release_regions(pdev);
+
+	hw_dbg(&adapter->hw, "Remove complete\n");
+
+	kfree(adapter->rss_key);
+	disable_dev = !test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state);
+	free_netdev(netdev);
+
+	if (disable_dev)
+		pci_disable_device(pdev);
+}
+
+/**
+ * ixgbevf_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ **/
+static pci_ers_result_t ixgbevf_io_error_detected(struct pci_dev *pdev,
+						  pci_channel_state_t state)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	if (!test_bit(__IXGBEVF_SERVICE_INITED, &adapter->state))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		ixgbevf_close_suspend(adapter);
+
+	if (state == pci_channel_io_perm_failure) {
+		rtnl_unlock();
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	if (!test_and_set_bit(__IXGBEVF_DISABLED, &adapter->state))
+		pci_disable_device(pdev);
+	rtnl_unlock();
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * ixgbevf_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot. Implementation
+ * resembles the first-half of the ixgbevf_resume routine.
+ **/
+static pci_ers_result_t ixgbevf_io_slot_reset(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
+
+	if (pci_enable_device_mem(pdev)) {
+		dev_err(&pdev->dev,
+			"Cannot re-enable PCI device after reset.\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	adapter->hw.hw_addr = adapter->io_addr;
+	smp_mb__before_atomic();
+	clear_bit(__IXGBEVF_DISABLED, &adapter->state);
+	pci_set_master(pdev);
+
+	ixgbevf_reset(adapter);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * ixgbevf_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation. Implementation resembles the
+ * second-half of the ixgbevf_resume routine.
+ **/
+static void ixgbevf_io_resume(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+
+	rtnl_lock();
+	if (netif_running(netdev))
+		ixgbevf_open(netdev);
+
+	netif_device_attach(netdev);
+	rtnl_unlock();
+}
+
+/* PCI Error Recovery (ERS) */
+static const struct pci_error_handlers ixgbevf_err_handler = {
+	.error_detected = ixgbevf_io_error_detected,
+	.slot_reset = ixgbevf_io_slot_reset,
+	.resume = ixgbevf_io_resume,
+};
+
+static SIMPLE_DEV_PM_OPS(ixgbevf_pm_ops, ixgbevf_suspend, ixgbevf_resume);
+
+static struct pci_driver ixgbevf_driver = {
+	.name		= ixgbevf_driver_name,
+	.id_table	= ixgbevf_pci_tbl,
+	.probe		= ixgbevf_probe,
+	.remove		= ixgbevf_remove,
+
+	/* Power Management Hooks */
+	.driver.pm	= &ixgbevf_pm_ops,
+
+	.shutdown	= ixgbevf_shutdown,
+	.err_handler	= &ixgbevf_err_handler
+};
+
+/**
+ * ixgbevf_init_module - Driver Registration Routine
+ *
+ * ixgbevf_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init ixgbevf_init_module(void)
+{
+	int err;
+
+	pr_info("%s\n", ixgbevf_driver_string);
+	pr_info("%s\n", ixgbevf_copyright);
+	ixgbevf_wq = create_singlethread_workqueue(ixgbevf_driver_name);
+	if (!ixgbevf_wq) {
+		pr_err("%s: Failed to create workqueue\n", ixgbevf_driver_name);
+		return -ENOMEM;
+	}
+
+	err = pci_register_driver(&ixgbevf_driver);
+	if (err) {
+		destroy_workqueue(ixgbevf_wq);
+		return err;
+	}
+
+	return 0;
+}
+
+module_init(ixgbevf_init_module);
+
+/**
+ * ixgbevf_exit_module - Driver Exit Cleanup Routine
+ *
+ * ixgbevf_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit ixgbevf_exit_module(void)
+{
+	pci_unregister_driver(&ixgbevf_driver);
+	if (ixgbevf_wq) {
+		destroy_workqueue(ixgbevf_wq);
+		ixgbevf_wq = NULL;
+	}
+}
+
+#ifdef DEBUG
+/**
+ * ixgbevf_get_hw_dev_name - return device name string
+ * used by hardware layer to print debugging information
+ * @hw: pointer to private hardware struct
+ **/
+char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw)
+{
+	struct ixgbevf_adapter *adapter = hw->back;
+
+	return adapter->netdev->name;
+}
+
+#endif
+module_exit(ixgbevf_exit_module);
+
+/* ixgbevf_main.c */
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
new file mode 100644
index 0000000000..a55dd978f7
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "mbx.h"
+#include "ixgbevf.h"
+
+/**
+ *  ixgbevf_poll_for_msg - Wait for message notification
+ *  @hw: pointer to the HW structure
+ *
+ *  returns 0 if it successfully received a message notification
+ **/
+static s32 ixgbevf_poll_for_msg(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!countdown || !mbx->ops.check_for_msg)
+		return IXGBE_ERR_CONFIG;
+
+	while (countdown && mbx->ops.check_for_msg(hw)) {
+		countdown--;
+		udelay(mbx->udelay);
+	}
+
+	return countdown ? 0 : IXGBE_ERR_TIMEOUT;
+}
+
+/**
+ *  ixgbevf_poll_for_ack - Wait for message acknowledgment
+ *  @hw: pointer to the HW structure
+ *
+ *  returns 0 if it successfully received a message acknowledgment
+ **/
+static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	int countdown = mbx->timeout;
+
+	if (!countdown || !mbx->ops.check_for_ack)
+		return IXGBE_ERR_CONFIG;
+
+	while (countdown && mbx->ops.check_for_ack(hw)) {
+		countdown--;
+		udelay(mbx->udelay);
+	}
+
+	return countdown ? 0 : IXGBE_ERR_TIMEOUT;
+}
+
+/**
+ * ixgbevf_read_mailbox_vf - read VF's mailbox register
+ * @hw: pointer to the HW structure
+ *
+ * This function is used to read the mailbox register dedicated for VF without
+ * losing the read to clear status bits.
+ **/
+static u32 ixgbevf_read_mailbox_vf(struct ixgbe_hw *hw)
+{
+	u32 vf_mailbox = IXGBE_READ_REG(hw, IXGBE_VFMAILBOX);
+
+	vf_mailbox |= hw->mbx.vf_mailbox;
+	hw->mbx.vf_mailbox |= vf_mailbox & IXGBE_VFMAILBOX_R2C_BITS;
+
+	return vf_mailbox;
+}
+
+/**
+ * ixgbevf_clear_msg_vf - clear PF status bit
+ * @hw: pointer to the HW structure
+ *
+ * This function is used to clear PFSTS bit in the VFMAILBOX register
+ **/
+static void ixgbevf_clear_msg_vf(struct ixgbe_hw *hw)
+{
+	u32 vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+
+	if (vf_mailbox & IXGBE_VFMAILBOX_PFSTS) {
+		hw->mbx.stats.reqs++;
+		hw->mbx.vf_mailbox &= ~IXGBE_VFMAILBOX_PFSTS;
+	}
+}
+
+/**
+ * ixgbevf_clear_ack_vf - clear PF ACK bit
+ * @hw: pointer to the HW structure
+ *
+ * This function is used to clear PFACK bit in the VFMAILBOX register
+ **/
+static void ixgbevf_clear_ack_vf(struct ixgbe_hw *hw)
+{
+	u32 vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+
+	if (vf_mailbox & IXGBE_VFMAILBOX_PFACK) {
+		hw->mbx.stats.acks++;
+		hw->mbx.vf_mailbox &= ~IXGBE_VFMAILBOX_PFACK;
+	}
+}
+
+/**
+ * ixgbevf_clear_rst_vf - clear PF reset bit
+ * @hw: pointer to the HW structure
+ *
+ * This function is used to clear reset indication and reset done bit in
+ * VFMAILBOX register after reset the shared resources and the reset sequence.
+ **/
+static void ixgbevf_clear_rst_vf(struct ixgbe_hw *hw)
+{
+	u32 vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+
+	if (vf_mailbox & (IXGBE_VFMAILBOX_RSTI | IXGBE_VFMAILBOX_RSTD)) {
+		hw->mbx.stats.rsts++;
+		hw->mbx.vf_mailbox &= ~(IXGBE_VFMAILBOX_RSTI |
+					IXGBE_VFMAILBOX_RSTD);
+	}
+}
+
+/**
+ *  ixgbevf_check_for_bit_vf - Determine if a status bit was set
+ *  @hw: pointer to the HW structure
+ *  @mask: bitmask for bits to be tested and cleared
+ *
+ *  This function is used to check for the read to clear bits within
+ *  the V2P mailbox.
+ **/
+static s32 ixgbevf_check_for_bit_vf(struct ixgbe_hw *hw, u32 mask)
+{
+	u32 vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+	s32 ret_val = IXGBE_ERR_MBX;
+
+	if (vf_mailbox & mask)
+		ret_val = 0;
+
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_check_for_msg_vf - checks to see if the PF has sent mail
+ *  @hw: pointer to the HW structure
+ *
+ *  returns 0 if the PF has set the Status bit or else ERR_MBX
+ **/
+static s32 ixgbevf_check_for_msg_vf(struct ixgbe_hw *hw)
+{
+	s32 ret_val = IXGBE_ERR_MBX;
+
+	if (!ixgbevf_check_for_bit_vf(hw, IXGBE_VFMAILBOX_PFSTS)) {
+		ret_val = 0;
+		hw->mbx.stats.reqs++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_check_for_ack_vf - checks to see if the PF has ACK'd
+ *  @hw: pointer to the HW structure
+ *
+ *  returns 0 if the PF has set the ACK bit or else ERR_MBX
+ **/
+static s32 ixgbevf_check_for_ack_vf(struct ixgbe_hw *hw)
+{
+	s32 ret_val = IXGBE_ERR_MBX;
+
+	if (!ixgbevf_check_for_bit_vf(hw, IXGBE_VFMAILBOX_PFACK)) {
+		ret_val = 0;
+		ixgbevf_clear_ack_vf(hw);
+		hw->mbx.stats.acks++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_check_for_rst_vf - checks to see if the PF has reset
+ *  @hw: pointer to the HW structure
+ *
+ *  returns true if the PF has set the reset done bit or else false
+ **/
+static s32 ixgbevf_check_for_rst_vf(struct ixgbe_hw *hw)
+{
+	s32 ret_val = IXGBE_ERR_MBX;
+
+	if (!ixgbevf_check_for_bit_vf(hw, (IXGBE_VFMAILBOX_RSTD |
+					   IXGBE_VFMAILBOX_RSTI))) {
+		ret_val = 0;
+		ixgbevf_clear_rst_vf(hw);
+		hw->mbx.stats.rsts++;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_obtain_mbx_lock_vf - obtain mailbox lock
+ *  @hw: pointer to the HW structure
+ *
+ *  return 0 if we obtained the mailbox lock
+ **/
+static s32 ixgbevf_obtain_mbx_lock_vf(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = IXGBE_ERR_CONFIG;
+	int countdown = mbx->timeout;
+	u32 vf_mailbox;
+
+	if (!mbx->timeout)
+		return ret_val;
+
+	while (countdown--) {
+		/* Reserve mailbox for VF use */
+		vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+		vf_mailbox |= IXGBE_VFMAILBOX_VFU;
+		IXGBE_WRITE_REG(hw, IXGBE_VFMAILBOX, vf_mailbox);
+
+		/* Verify that VF is the owner of the lock */
+		if (ixgbevf_read_mailbox_vf(hw) & IXGBE_VFMAILBOX_VFU) {
+			ret_val = 0;
+			break;
+		}
+
+		/* Wait a bit before trying again */
+		udelay(mbx->udelay);
+	}
+
+	if (ret_val)
+		ret_val = IXGBE_ERR_TIMEOUT;
+
+	return ret_val;
+}
+
+/**
+ * ixgbevf_release_mbx_lock_vf - release mailbox lock
+ * @hw: pointer to the HW structure
+ **/
+static void ixgbevf_release_mbx_lock_vf(struct ixgbe_hw *hw)
+{
+	u32 vf_mailbox;
+
+	/* Return ownership of the buffer */
+	vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+	vf_mailbox &= ~IXGBE_VFMAILBOX_VFU;
+	IXGBE_WRITE_REG(hw, IXGBE_VFMAILBOX, vf_mailbox);
+}
+
+/**
+ * ixgbevf_release_mbx_lock_vf_legacy - release mailbox lock
+ * @hw: pointer to the HW structure
+ **/
+static void ixgbevf_release_mbx_lock_vf_legacy(struct ixgbe_hw *__always_unused hw)
+{
+}
+
+/**
+ *  ixgbevf_write_mbx_vf - Write a message to the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns 0 if it successfully copied message into the buffer
+ **/
+static s32 ixgbevf_write_mbx_vf(struct ixgbe_hw *hw, u32 *msg, u16 size)
+{
+	u32 vf_mailbox;
+	s32 ret_val;
+	u16 i;
+
+	/* lock the mailbox to prevent PF/VF race condition */
+	ret_val = ixgbevf_obtain_mbx_lock_vf(hw);
+	if (ret_val)
+		goto out_no_write;
+
+	/* flush msg and acks as we are overwriting the message buffer */
+	ixgbevf_clear_msg_vf(hw);
+	ixgbevf_clear_ack_vf(hw);
+
+	/* copy the caller specified message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_VFMBMEM, i, msg[i]);
+
+	/* update stats */
+	hw->mbx.stats.msgs_tx++;
+
+	/* interrupt the PF to tell it a message has been sent */
+	vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+	vf_mailbox |= IXGBE_VFMAILBOX_REQ;
+	IXGBE_WRITE_REG(hw, IXGBE_VFMAILBOX, vf_mailbox);
+
+	/* if msg sent wait until we receive an ack */
+	ret_val = ixgbevf_poll_for_ack(hw);
+
+out_no_write:
+	hw->mbx.ops.release(hw);
+
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_write_mbx_vf_legacy - Write a message to the mailbox
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns 0 if it successfully copied message into the buffer
+ **/
+static s32 ixgbevf_write_mbx_vf_legacy(struct ixgbe_hw *hw, u32 *msg, u16 size)
+{
+	s32 ret_val;
+	u16 i;
+
+	/* lock the mailbox to prevent PF/VF race condition */
+	ret_val = ixgbevf_obtain_mbx_lock_vf(hw);
+	if (ret_val)
+		goto out_no_write;
+
+	/* flush msg and acks as we are overwriting the message buffer */
+	ixgbevf_check_for_msg_vf(hw);
+	ixgbevf_clear_msg_vf(hw);
+	ixgbevf_check_for_ack_vf(hw);
+	ixgbevf_clear_ack_vf(hw);
+
+	/* copy the caller specified message to the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		IXGBE_WRITE_REG_ARRAY(hw, IXGBE_VFMBMEM, i, msg[i]);
+
+	/* update stats */
+	hw->mbx.stats.msgs_tx++;
+
+	/* Drop VFU and interrupt the PF to tell it a message has been sent */
+	IXGBE_WRITE_REG(hw, IXGBE_VFMAILBOX, IXGBE_VFMAILBOX_REQ);
+
+out_no_write:
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_read_mbx_vf - Reads a message from the inbox intended for VF
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns 0 if it successfully read message from buffer
+ **/
+static s32 ixgbevf_read_mbx_vf(struct ixgbe_hw *hw, u32 *msg, u16 size)
+{
+	u32 vf_mailbox;
+	s32 ret_val;
+	u16 i;
+
+	/* check if there is a message from PF */
+	ret_val = ixgbevf_check_for_msg_vf(hw);
+	if (ret_val)
+		return ret_val;
+
+	ixgbevf_clear_msg_vf(hw);
+
+	/* copy the message from the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		msg[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_VFMBMEM, i);
+
+	/* Acknowledge receipt */
+	vf_mailbox = ixgbevf_read_mailbox_vf(hw);
+	vf_mailbox |= IXGBE_VFMAILBOX_ACK;
+	IXGBE_WRITE_REG(hw, IXGBE_VFMAILBOX, vf_mailbox);
+
+	/* update stats */
+	hw->mbx.stats.msgs_rx++;
+
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_read_mbx_vf_legacy - Reads a message from the inbox intended for VF
+ *  @hw: pointer to the HW structure
+ *  @msg: The message buffer
+ *  @size: Length of buffer
+ *
+ *  returns 0 if it successfully read message from buffer
+ **/
+static s32 ixgbevf_read_mbx_vf_legacy(struct ixgbe_hw *hw, u32 *msg, u16 size)
+{
+	s32 ret_val = 0;
+	u16 i;
+
+	/* lock the mailbox to prevent PF/VF race condition */
+	ret_val = ixgbevf_obtain_mbx_lock_vf(hw);
+	if (ret_val)
+		goto out_no_read;
+
+	/* copy the message from the mailbox memory buffer */
+	for (i = 0; i < size; i++)
+		msg[i] = IXGBE_READ_REG_ARRAY(hw, IXGBE_VFMBMEM, i);
+
+	/* Acknowledge receipt and release mailbox, then we're done */
+	IXGBE_WRITE_REG(hw, IXGBE_VFMAILBOX, IXGBE_VFMAILBOX_ACK);
+
+	/* update stats */
+	hw->mbx.stats.msgs_rx++;
+
+out_no_read:
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_init_mbx_params_vf - set initial values for VF mailbox
+ *  @hw: pointer to the HW structure
+ *
+ *  Initializes the hw->mbx struct to correct values for VF mailbox
+ */
+static s32 ixgbevf_init_mbx_params_vf(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+
+	/* start mailbox as timed out and let the reset_hw call set the timeout
+	 * value to begin communications
+	 */
+	mbx->timeout = IXGBE_VF_MBX_INIT_TIMEOUT;
+	mbx->udelay = IXGBE_VF_MBX_INIT_DELAY;
+
+	mbx->size = IXGBE_VFMAILBOX_SIZE;
+
+	mbx->stats.msgs_tx = 0;
+	mbx->stats.msgs_rx = 0;
+	mbx->stats.reqs = 0;
+	mbx->stats.acks = 0;
+	mbx->stats.rsts = 0;
+
+	return 0;
+}
+
+/**
+ * ixgbevf_poll_mbx - Wait for message and read it from the mailbox
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ *
+ * returns 0 if it successfully read message from buffer
+ **/
+s32 ixgbevf_poll_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = IXGBE_ERR_CONFIG;
+
+	if (!mbx->ops.read || !mbx->ops.check_for_msg || !mbx->timeout)
+		return ret_val;
+
+	/* limit read to size of mailbox */
+	if (size > mbx->size)
+		size = mbx->size;
+
+	ret_val = ixgbevf_poll_for_msg(hw);
+	/* if ack received read message, otherwise we timed out */
+	if (!ret_val)
+		ret_val = mbx->ops.read(hw, msg, size);
+
+	return ret_val;
+}
+
+/**
+ * ixgbevf_write_mbx - Write a message to the mailbox and wait for ACK
+ * @hw: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ *
+ * returns 0 if it successfully copied message into the buffer and
+ * received an ACK to that message within specified period
+ **/
+s32 ixgbevf_write_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	s32 ret_val = IXGBE_ERR_CONFIG;
+
+	/**
+	 * exit if either we can't write, release
+	 * or there is no timeout defined
+	 */
+	if (!mbx->ops.write || !mbx->ops.check_for_ack || !mbx->ops.release ||
+	    !mbx->timeout)
+		return ret_val;
+
+	if (size > mbx->size)
+		ret_val = IXGBE_ERR_PARAM;
+	else
+		ret_val = mbx->ops.write(hw, msg, size);
+
+	return ret_val;
+}
+
+const struct ixgbe_mbx_operations ixgbevf_mbx_ops = {
+	.init_params	= ixgbevf_init_mbx_params_vf,
+	.release	= ixgbevf_release_mbx_lock_vf,
+	.read		= ixgbevf_read_mbx_vf,
+	.write		= ixgbevf_write_mbx_vf,
+	.check_for_msg	= ixgbevf_check_for_msg_vf,
+	.check_for_ack	= ixgbevf_check_for_ack_vf,
+	.check_for_rst	= ixgbevf_check_for_rst_vf,
+};
+
+const struct ixgbe_mbx_operations ixgbevf_mbx_ops_legacy = {
+	.init_params	= ixgbevf_init_mbx_params_vf,
+	.release	= ixgbevf_release_mbx_lock_vf_legacy,
+	.read		= ixgbevf_read_mbx_vf_legacy,
+	.write		= ixgbevf_write_mbx_vf_legacy,
+	.check_for_msg	= ixgbevf_check_for_msg_vf,
+	.check_for_ack	= ixgbevf_check_for_ack_vf,
+	.check_for_rst	= ixgbevf_check_for_rst_vf,
+};
+
+/* Mailbox operations when running on Hyper-V.
+ * On Hyper-V, PF/VF communication is not through the
+ * hardware mailbox; this communication is through
+ * a software mediated path.
+ * Most mail box operations are noop while running on
+ * Hyper-V.
+ */
+const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops = {
+	.init_params	= ixgbevf_init_mbx_params_vf,
+	.check_for_rst	= ixgbevf_check_for_rst_vf,
+};
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
new file mode 100644
index 0000000000..835bbcc5cc
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBE_MBX_H_
+#define _IXGBE_MBX_H_
+
+#include "vf.h"
+
+#define IXGBE_VFMAILBOX_SIZE	16 /* 16 32 bit words - 64 bytes */
+
+#define IXGBE_VFMAILBOX		0x002FC
+#define IXGBE_VFMBMEM		0x00200
+
+/* Define mailbox register bits */
+#define IXGBE_VFMAILBOX_REQ	0x00000001 /* Request for PF Ready bit */
+#define IXGBE_VFMAILBOX_ACK	0x00000002 /* Ack PF message received */
+#define IXGBE_VFMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
+#define IXGBE_VFMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
+#define IXGBE_VFMAILBOX_PFSTS	0x00000010 /* PF wrote a message in the MB */
+#define IXGBE_VFMAILBOX_PFACK	0x00000020 /* PF ack the previous VF msg */
+#define IXGBE_VFMAILBOX_RSTI	0x00000040 /* PF has reset indication */
+#define IXGBE_VFMAILBOX_RSTD	0x00000080 /* PF has indicated reset done */
+#define IXGBE_VFMAILBOX_R2C_BITS 0x000000B0 /* All read to clear bits */
+
+#define IXGBE_PFMAILBOX(x)	(0x04B00 + (4 * (x)))
+#define IXGBE_PFMBMEM(vfn)	(0x13000 + (64 * (vfn)))
+
+#define IXGBE_PFMAILBOX_STS	0x00000001 /* Initiate message send to VF */
+#define IXGBE_PFMAILBOX_ACK	0x00000002 /* Ack message recv'd from VF */
+#define IXGBE_PFMAILBOX_VFU	0x00000004 /* VF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_PFU	0x00000008 /* PF owns the mailbox buffer */
+#define IXGBE_PFMAILBOX_RVFU	0x00000010 /* Reset VFU - used when VF stuck */
+
+#define IXGBE_MBVFICR_VFREQ_MASK 0x0000FFFF /* bits for VF messages */
+#define IXGBE_MBVFICR_VFREQ_VF1	0x00000001 /* bit for VF 1 message */
+#define IXGBE_MBVFICR_VFACK_MASK 0xFFFF0000 /* bits for VF acks */
+#define IXGBE_MBVFICR_VFACK_VF1	0x00010000 /* bit for VF 1 ack */
+
+/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the
+ * PF.  The reverse is true if it is IXGBE_PF_*.
+ * Message results are the value or'd with 0xF0000000
+ */
+#define IXGBE_VT_MSGTYPE_SUCCESS	0x80000000 /* Messages or'd with this
+						    * have succeeded
+						    */
+#define IXGBE_VT_MSGTYPE_FAILURE	0x40000000 /* Messages or'd with this
+						    * have failed
+						    */
+#define IXGBE_VT_MSGTYPE_CTS		0x20000000 /* Indicates that VF is still
+						    * clear to send requests
+						    */
+#define IXGBE_VT_MSGINFO_SHIFT	16
+/* bits 23:16 are used for exra info for certain messages */
+#define IXGBE_VT_MSGINFO_MASK	(0xFF << IXGBE_VT_MSGINFO_SHIFT)
+
+/* definitions to support mailbox API version negotiation */
+
+/* each element denotes a version of the API; existing numbers may not
+ * change; any additions must go at the end
+ */
+enum ixgbe_pfvf_api_rev {
+	ixgbe_mbox_api_10,	/* API version 1.0, linux/freebsd VF driver */
+	ixgbe_mbox_api_20,	/* API version 2.0, solaris Phase1 VF driver */
+	ixgbe_mbox_api_11,	/* API version 1.1, linux/freebsd VF driver */
+	ixgbe_mbox_api_12,	/* API version 1.2, linux/freebsd VF driver */
+	ixgbe_mbox_api_13,	/* API version 1.3, linux/freebsd VF driver */
+	ixgbe_mbox_api_14,	/* API version 1.4, linux/freebsd VF driver */
+	ixgbe_mbox_api_15,	/* API version 1.5, linux/freebsd VF driver */
+	/* This value should always be last */
+	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
+};
+
+/* mailbox API, legacy requests */
+#define IXGBE_VF_RESET		0x01 /* VF requests reset */
+#define IXGBE_VF_SET_MAC_ADDR	0x02 /* VF requests PF to set MAC addr */
+#define IXGBE_VF_SET_MULTICAST	0x03 /* VF requests PF to set MC addr */
+#define IXGBE_VF_SET_VLAN	0x04 /* VF requests PF to set VLAN */
+
+/* mailbox API, version 1.0 VF requests */
+#define IXGBE_VF_SET_LPE	0x05 /* VF requests PF to set VMOLR.LPE */
+#define IXGBE_VF_SET_MACVLAN	0x06 /* VF requests PF for unicast filter */
+#define IXGBE_VF_API_NEGOTIATE	0x08 /* negotiate API version */
+
+/* mailbox API, version 1.1 VF requests */
+#define IXGBE_VF_GET_QUEUE	0x09 /* get queue configuration */
+
+/* GET_QUEUES return data indices within the mailbox */
+#define IXGBE_VF_TX_QUEUES	1 /* number of Tx queues supported */
+#define IXGBE_VF_RX_QUEUES	2 /* number of Rx queues supported */
+#define IXGBE_VF_TRANS_VLAN	3 /* Indication of port VLAN */
+#define IXGBE_VF_DEF_QUEUE	4 /* Default queue offset */
+
+/* mailbox API, version 1.2 VF requests */
+#define IXGBE_VF_GET_RETA	0x0a	/* VF request for RETA */
+#define IXGBE_VF_GET_RSS_KEY	0x0b	/* get RSS hash key */
+
+#define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
+
+/* mailbox API, version 1.4 VF requests */
+#define IXGBE_VF_IPSEC_ADD	0x0d
+#define IXGBE_VF_IPSEC_DEL	0x0e
+
+#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */
+
+/* length of permanent address message returned from PF */
+#define IXGBE_VF_PERMADDR_MSG_LEN	4
+/* word in permanent address message with the current multicast type */
+#define IXGBE_VF_MC_TYPE_WORD		3
+
+#define IXGBE_PF_CONTROL_MSG		0x0100 /* PF control message */
+
+#define IXGBE_VF_MBX_INIT_TIMEOUT	2000 /* number of retries on mailbox */
+#define IXGBE_VF_MBX_INIT_DELAY		500  /* microseconds between retries */
+
+/* forward declaration of the HW struct */
+struct ixgbe_hw;
+
+#endif /* _IXGBE_MBX_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/regs.h b/drivers/net/ethernet/intel/ixgbevf/regs.h
new file mode 100644
index 0000000000..68d16ae5b6
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/regs.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef _IXGBEVF_REGS_H_
+#define _IXGBEVF_REGS_H_
+
+#define IXGBE_VFCTRL		0x00000
+#define IXGBE_VFSTATUS		0x00008
+#define IXGBE_VFLINKS		0x00010
+#define IXGBE_VFFRTIMER		0x00048
+#define IXGBE_VFRXMEMWRAP	0x03190
+#define IXGBE_VTEICR		0x00100
+#define IXGBE_VTEICS		0x00104
+#define IXGBE_VTEIMS		0x00108
+#define IXGBE_VTEIMC		0x0010C
+#define IXGBE_VTEIAC		0x00110
+#define IXGBE_VTEIAM		0x00114
+#define IXGBE_VTEITR(x)		(0x00820 + (4 * (x)))
+#define IXGBE_VTIVAR(x)		(0x00120 + (4 * (x)))
+#define IXGBE_VTIVAR_MISC	0x00140
+#define IXGBE_VTRSCINT(x)	(0x00180 + (4 * (x)))
+#define IXGBE_VFRDBAL(x)	(0x01000 + (0x40 * (x)))
+#define IXGBE_VFRDBAH(x)	(0x01004 + (0x40 * (x)))
+#define IXGBE_VFRDLEN(x)	(0x01008 + (0x40 * (x)))
+#define IXGBE_VFRDH(x)		(0x01010 + (0x40 * (x)))
+#define IXGBE_VFRDT(x)		(0x01018 + (0x40 * (x)))
+#define IXGBE_VFRXDCTL(x)	(0x01028 + (0x40 * (x)))
+#define IXGBE_VFSRRCTL(x)	(0x01014 + (0x40 * (x)))
+#define IXGBE_VFRSCCTL(x)	(0x0102C + (0x40 * (x)))
+#define IXGBE_VFPSRTYPE		0x00300
+#define IXGBE_VFTDBAL(x)	(0x02000 + (0x40 * (x)))
+#define IXGBE_VFTDBAH(x)	(0x02004 + (0x40 * (x)))
+#define IXGBE_VFTDLEN(x)	(0x02008 + (0x40 * (x)))
+#define IXGBE_VFTDH(x)		(0x02010 + (0x40 * (x)))
+#define IXGBE_VFTDT(x)		(0x02018 + (0x40 * (x)))
+#define IXGBE_VFTXDCTL(x)	(0x02028 + (0x40 * (x)))
+#define IXGBE_VFTDWBAL(x)	(0x02038 + (0x40 * (x)))
+#define IXGBE_VFTDWBAH(x)	(0x0203C + (0x40 * (x)))
+#define IXGBE_VFDCA_RXCTRL(x)	(0x0100C + (0x40 * (x)))
+#define IXGBE_VFDCA_TXCTRL(x)	(0x0200c + (0x40 * (x)))
+#define IXGBE_VFGPRC		0x0101C
+#define IXGBE_VFGPTC		0x0201C
+#define IXGBE_VFGORC_LSB	0x01020
+#define IXGBE_VFGORC_MSB	0x01024
+#define IXGBE_VFGOTC_LSB	0x02020
+#define IXGBE_VFGOTC_MSB	0x02024
+#define IXGBE_VFMPRC		0x01034
+#define IXGBE_VFMRQC		0x3000
+#define IXGBE_VFRSSRK(x)	(0x3100 + ((x) * 4))
+#define IXGBE_VFRETA(x)		(0x3200 + ((x) * 4))
+
+/* VFMRQC bits */
+#define IXGBE_VFMRQC_RSSEN		0x00000001  /* RSS Enable */
+#define IXGBE_VFMRQC_RSS_FIELD_IPV4_TCP	0x00010000
+#define IXGBE_VFMRQC_RSS_FIELD_IPV4	0x00020000
+#define IXGBE_VFMRQC_RSS_FIELD_IPV6	0x00100000
+#define IXGBE_VFMRQC_RSS_FIELD_IPV6_TCP	0x00200000
+
+#define IXGBE_WRITE_FLUSH(a)	(IXGBE_READ_REG(a, IXGBE_VFSTATUS))
+
+#endif /* _IXGBEVF_REGS_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
new file mode 100644
index 0000000000..1641d00d8e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -0,0 +1,1078 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#include "vf.h"
+#include "ixgbevf.h"
+
+/* On Hyper-V, to reset, we need to read from this offset
+ * from the PCI config space. This is the mechanism used on
+ * Hyper-V to support PF/VF communication.
+ */
+#define IXGBE_HV_RESET_OFFSET           0x201
+
+static inline s32 ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, u32 *msg,
+					     u32 *retmsg, u16 size)
+{
+	s32 retval = ixgbevf_write_mbx(hw, msg, size);
+
+	if (retval)
+		return retval;
+
+	return ixgbevf_poll_mbx(hw, retmsg, size);
+}
+
+/**
+ *  ixgbevf_start_hw_vf - Prepare hardware for Tx/Rx
+ *  @hw: pointer to hardware structure
+ *
+ *  Starts the hardware by filling the bus info structure and media type, clears
+ *  all on chip counters, initializes receive address registers, multicast
+ *  table, VLAN filter table, calls routine to set up link and flow control
+ *  settings, and leaves transmit and receive units disabled and uninitialized
+ **/
+static s32 ixgbevf_start_hw_vf(struct ixgbe_hw *hw)
+{
+	/* Clear adapter stopped flag */
+	hw->adapter_stopped = false;
+
+	return 0;
+}
+
+/**
+ *  ixgbevf_init_hw_vf - virtual function hardware initialization
+ *  @hw: pointer to hardware structure
+ *
+ *  Initialize the hardware by resetting the hardware and then starting
+ *  the hardware
+ **/
+static s32 ixgbevf_init_hw_vf(struct ixgbe_hw *hw)
+{
+	s32 status = hw->mac.ops.start_hw(hw);
+
+	hw->mac.ops.get_mac_addr(hw, hw->mac.addr);
+
+	return status;
+}
+
+/**
+ *  ixgbevf_reset_hw_vf - Performs hardware reset
+ *  @hw: pointer to hardware structure
+ *
+ *  Resets the hardware by resetting the transmit and receive units, masks and
+ *  clears all interrupts.
+ **/
+static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	u32 timeout = IXGBE_VF_INIT_TIMEOUT;
+	u32 msgbuf[IXGBE_VF_PERMADDR_MSG_LEN];
+	u8 *addr = (u8 *)(&msgbuf[1]);
+	s32 ret_val;
+
+	/* Call adapter stop to disable tx/rx and clear interrupts */
+	hw->mac.ops.stop_adapter(hw);
+
+	/* reset the api version */
+	hw->api_version = ixgbe_mbox_api_10;
+	hw->mbx.ops.init_params(hw);
+	memcpy(&hw->mbx.ops, &ixgbevf_mbx_ops_legacy,
+	       sizeof(struct ixgbe_mbx_operations));
+
+	IXGBE_WRITE_REG(hw, IXGBE_VFCTRL, IXGBE_CTRL_RST);
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* we cannot reset while the RSTI / RSTD bits are asserted */
+	while (!mbx->ops.check_for_rst(hw) && timeout) {
+		timeout--;
+		udelay(5);
+	}
+
+	if (!timeout)
+		return IXGBE_ERR_RESET_FAILED;
+
+	/* mailbox timeout can now become active */
+	mbx->timeout = IXGBE_VF_MBX_INIT_TIMEOUT;
+
+	msgbuf[0] = IXGBE_VF_RESET;
+	ixgbevf_write_mbx(hw, msgbuf, 1);
+
+	mdelay(10);
+
+	/* set our "perm_addr" based on info provided by PF
+	 * also set up the mc_filter_type which is piggy backed
+	 * on the mac address in word 3
+	 */
+	ret_val = ixgbevf_poll_mbx(hw, msgbuf, IXGBE_VF_PERMADDR_MSG_LEN);
+	if (ret_val)
+		return ret_val;
+
+	/* New versions of the PF may NACK the reset return message
+	 * to indicate that no MAC address has yet been assigned for
+	 * the VF.
+	 */
+	if (msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_SUCCESS) &&
+	    msgbuf[0] != (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_FAILURE))
+		return IXGBE_ERR_INVALID_MAC_ADDR;
+
+	if (msgbuf[0] == (IXGBE_VF_RESET | IXGBE_VT_MSGTYPE_SUCCESS))
+		ether_addr_copy(hw->mac.perm_addr, addr);
+
+	hw->mac.mc_filter_type = msgbuf[IXGBE_VF_MC_TYPE_WORD];
+
+	return 0;
+}
+
+/**
+ * ixgbevf_hv_reset_hw_vf - reset via Hyper-V
+ * @hw: pointer to private hardware struct
+ *
+ * Hyper-V variant; the VF/PF communication is through the PCI
+ * config space.
+ */
+static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw)
+{
+#if IS_ENABLED(CONFIG_PCI_MMCONFIG)
+	struct ixgbevf_adapter *adapter = hw->back;
+	int i;
+
+	for (i = 0; i < 6; i++)
+		pci_read_config_byte(adapter->pdev,
+				     (i + IXGBE_HV_RESET_OFFSET),
+				     &hw->mac.perm_addr[i]);
+	return 0;
+#else
+	pr_err("PCI_MMCONFIG needs to be enabled for Hyper-V\n");
+	return -EOPNOTSUPP;
+#endif
+}
+
+/**
+ *  ixgbevf_stop_hw_vf - Generic stop Tx/Rx units
+ *  @hw: pointer to hardware structure
+ *
+ *  Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts,
+ *  disables transmit and receive units. The adapter_stopped flag is used by
+ *  the shared code and drivers to determine if the adapter is in a stopped
+ *  state and should not touch the hardware.
+ **/
+static s32 ixgbevf_stop_hw_vf(struct ixgbe_hw *hw)
+{
+	u32 number_of_queues;
+	u32 reg_val;
+	u16 i;
+
+	/* Set the adapter_stopped flag so other driver functions stop touching
+	 * the hardware
+	 */
+	hw->adapter_stopped = true;
+
+	/* Disable the receive unit by stopped each queue */
+	number_of_queues = hw->mac.max_rx_queues;
+	for (i = 0; i < number_of_queues; i++) {
+		reg_val = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(i));
+		if (reg_val & IXGBE_RXDCTL_ENABLE) {
+			reg_val &= ~IXGBE_RXDCTL_ENABLE;
+			IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(i), reg_val);
+		}
+	}
+
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* Clear interrupt mask to stop from interrupts being generated */
+	IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, IXGBE_VF_IRQ_CLEAR_MASK);
+
+	/* Clear any pending interrupts */
+	IXGBE_READ_REG(hw, IXGBE_VTEICR);
+
+	/* Disable the transmit unit.  Each queue must be disabled. */
+	number_of_queues = hw->mac.max_tx_queues;
+	for (i = 0; i < number_of_queues; i++) {
+		reg_val = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(i));
+		if (reg_val & IXGBE_TXDCTL_ENABLE) {
+			reg_val &= ~IXGBE_TXDCTL_ENABLE;
+			IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(i), reg_val);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ *  ixgbevf_mta_vector - Determines bit-vector in multicast table to set
+ *  @hw: pointer to hardware structure
+ *  @mc_addr: the multicast address
+ *
+ *  Extracts the 12 bits, from a multicast address, to determine which
+ *  bit-vector to set in the multicast table. The hardware uses 12 bits, from
+ *  incoming Rx multicast addresses, to determine the bit-vector to check in
+ *  the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
+ *  by the MO field of the MCSTCTRL. The MO field is set during initialization
+ *  to mc_filter_type.
+ **/
+static s32 ixgbevf_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr)
+{
+	u32 vector = 0;
+
+	switch (hw->mac.mc_filter_type) {
+	case 0:   /* use bits [47:36] of the address */
+		vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+		break;
+	case 1:   /* use bits [46:35] of the address */
+		vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+		break;
+	case 2:   /* use bits [45:34] of the address */
+		vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+		break;
+	case 3:   /* use bits [43:32] of the address */
+		vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+		break;
+	default:  /* Invalid mc_filter_type */
+		break;
+	}
+
+	/* vector can only be 12-bits or boundary will be exceeded */
+	vector &= 0xFFF;
+	return vector;
+}
+
+/**
+ *  ixgbevf_get_mac_addr_vf - Read device MAC address
+ *  @hw: pointer to the HW structure
+ *  @mac_addr: pointer to storage for retrieved MAC address
+ **/
+static s32 ixgbevf_get_mac_addr_vf(struct ixgbe_hw *hw, u8 *mac_addr)
+{
+	ether_addr_copy(mac_addr, hw->mac.perm_addr);
+
+	return 0;
+}
+
+static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
+{
+	u32 msgbuf[3], msgbuf_chk;
+	u8 *msg_addr = (u8 *)(&msgbuf[1]);
+	s32 ret_val;
+
+	memset(msgbuf, 0, sizeof(msgbuf));
+	/* If index is one then this is the start of a new list and needs
+	 * indication to the PF so it can do it's own list management.
+	 * If it is zero then that tells the PF to just clear all of
+	 * this VF's macvlans and there is no new list.
+	 */
+	msgbuf[0] |= index << IXGBE_VT_MSGINFO_SHIFT;
+	msgbuf[0] |= IXGBE_VF_SET_MACVLAN;
+	msgbuf_chk = msgbuf[0];
+
+	if (addr)
+		ether_addr_copy(msg_addr, addr);
+
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					     ARRAY_SIZE(msgbuf));
+	if (!ret_val) {
+		msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+		if (msgbuf[0] == (msgbuf_chk | IXGBE_VT_MSGTYPE_FAILURE))
+			return -ENOMEM;
+	}
+
+	return ret_val;
+}
+
+static s32 ixgbevf_hv_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ixgbevf_get_reta_locked - get the RSS redirection table (RETA) contents.
+ * @hw: pointer to hardware structure
+ * @reta: buffer to fill with RETA contents.
+ * @num_rx_queues: Number of Rx queues configured for this port
+ *
+ * The "reta" buffer should be big enough to contain 32 registers.
+ *
+ * Returns: 0 on success.
+ *          if API doesn't support this operation - (-EOPNOTSUPP).
+ */
+int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues)
+{
+	int err, i, j;
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
+	u32 *hw_reta = &msgbuf[1];
+	u32 mask = 0;
+
+	/* We have to use a mailbox for 82599 and x540 devices only.
+	 * For these devices RETA has 128 entries.
+	 * Also these VFs support up to 4 RSS queues. Therefore PF will compress
+	 * 16 RETA entries in each DWORD giving 2 bits to each entry.
+	 */
+	int dwords = IXGBEVF_82599_RETA_SIZE / 16;
+
+	/* We support the RSS querying for 82599 and x540 devices only.
+	 * Thus return an error if API doesn't support RETA querying or querying
+	 * is not supported for this device type.
+	 */
+	switch (hw->api_version) {
+	case ixgbe_mbox_api_15:
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_12:
+		if (hw->mac.type < ixgbe_mac_X550_vf)
+			break;
+		fallthrough;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	msgbuf[0] = IXGBE_VF_GET_RETA;
+
+	err = ixgbevf_write_mbx(hw, msgbuf, 1);
+
+	if (err)
+		return err;
+
+	err = ixgbevf_poll_mbx(hw, msgbuf, dwords + 1);
+
+	if (err)
+		return err;
+
+	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+	/* If the operation has been refused by a PF return -EPERM */
+	if (msgbuf[0] == (IXGBE_VF_GET_RETA | IXGBE_VT_MSGTYPE_FAILURE))
+		return -EPERM;
+
+	/* If we didn't get an ACK there must have been
+	 * some sort of mailbox error so we should treat it
+	 * as such.
+	 */
+	if (msgbuf[0] != (IXGBE_VF_GET_RETA | IXGBE_VT_MSGTYPE_SUCCESS))
+		return IXGBE_ERR_MBX;
+
+	/* ixgbevf doesn't support more than 2 queues at the moment */
+	if (num_rx_queues > 1)
+		mask = 0x1;
+
+	for (i = 0; i < dwords; i++)
+		for (j = 0; j < 16; j++)
+			reta[i * 16 + j] = (hw_reta[i] >> (2 * j)) & mask;
+
+	return 0;
+}
+
+/**
+ * ixgbevf_get_rss_key_locked - get the RSS Random Key
+ * @hw: pointer to the HW structure
+ * @rss_key: buffer to fill with RSS Hash Key contents.
+ *
+ * The "rss_key" buffer should be big enough to contain 10 registers.
+ *
+ * Returns: 0 on success.
+ *          if API doesn't support this operation - (-EOPNOTSUPP).
+ */
+int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key)
+{
+	int err;
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
+
+	/* We currently support the RSS Random Key retrieval for 82599 and x540
+	 * devices only.
+	 *
+	 * Thus return an error if API doesn't support RSS Random Key retrieval
+	 * or if the operation is not supported for this device type.
+	 */
+	switch (hw->api_version) {
+	case ixgbe_mbox_api_15:
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_12:
+		if (hw->mac.type < ixgbe_mac_X550_vf)
+			break;
+		fallthrough;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	msgbuf[0] = IXGBE_VF_GET_RSS_KEY;
+	err = ixgbevf_write_mbx(hw, msgbuf, 1);
+
+	if (err)
+		return err;
+
+	err = ixgbevf_poll_mbx(hw, msgbuf, 11);
+
+	if (err)
+		return err;
+
+	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+	/* If the operation has been refused by a PF return -EPERM */
+	if (msgbuf[0] == (IXGBE_VF_GET_RSS_KEY | IXGBE_VT_MSGTYPE_FAILURE))
+		return -EPERM;
+
+	/* If we didn't get an ACK there must have been
+	 * some sort of mailbox error so we should treat it
+	 * as such.
+	 */
+	if (msgbuf[0] != (IXGBE_VF_GET_RSS_KEY | IXGBE_VT_MSGTYPE_SUCCESS))
+		return IXGBE_ERR_MBX;
+
+	memcpy(rss_key, msgbuf + 1, IXGBEVF_RSS_HASH_KEY_SIZE);
+
+	return 0;
+}
+
+/**
+ *  ixgbevf_set_rar_vf - set device MAC address
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *  @addr: Address to put into receive address register
+ *  @vmdq: Unused in this implementation
+ **/
+static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
+			      u32 vmdq)
+{
+	u32 msgbuf[3];
+	u8 *msg_addr = (u8 *)(&msgbuf[1]);
+	s32 ret_val;
+
+	memset(msgbuf, 0, sizeof(msgbuf));
+	msgbuf[0] = IXGBE_VF_SET_MAC_ADDR;
+	ether_addr_copy(msg_addr, addr);
+
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					     ARRAY_SIZE(msgbuf));
+	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+	/* if nacked the address was rejected, use "perm_addr" */
+	if (!ret_val &&
+	    (msgbuf[0] == (IXGBE_VF_SET_MAC_ADDR | IXGBE_VT_MSGTYPE_FAILURE))) {
+		ixgbevf_get_mac_addr_vf(hw, hw->mac.addr);
+		return IXGBE_ERR_MBX;
+	}
+
+	return ret_val;
+}
+
+/**
+ *  ixgbevf_hv_set_rar_vf - set device MAC address Hyper-V variant
+ *  @hw: pointer to hardware structure
+ *  @index: Receive address register to write
+ *  @addr: Address to put into receive address register
+ *  @vmdq: Unused in this implementation
+ *
+ * We don't really allow setting the device MAC address. However,
+ * if the address being set is the permanent MAC address we will
+ * permit that.
+ **/
+static s32 ixgbevf_hv_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
+				 u32 vmdq)
+{
+	if (ether_addr_equal(addr, hw->mac.perm_addr))
+		return 0;
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ *  ixgbevf_update_mc_addr_list_vf - Update Multicast addresses
+ *  @hw: pointer to the HW structure
+ *  @netdev: pointer to net device structure
+ *
+ *  Updates the Multicast Table Array.
+ **/
+static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
+					  struct net_device *netdev)
+{
+	struct netdev_hw_addr *ha;
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
+	u16 *vector_list = (u16 *)&msgbuf[1];
+	u32 cnt, i;
+
+	/* Each entry in the list uses 1 16 bit word.  We have 30
+	 * 16 bit words available in our HW msg buffer (minus 1 for the
+	 * msg type).  That's 30 hash values if we pack 'em right.  If
+	 * there are more than 30 MC addresses to add then punt the
+	 * extras for now and then add code to handle more than 30 later.
+	 * It would be unusual for a server to request that many multi-cast
+	 * addresses except for in large enterprise network environments.
+	 */
+
+	cnt = netdev_mc_count(netdev);
+	if (cnt > 30)
+		cnt = 30;
+	msgbuf[0] = IXGBE_VF_SET_MULTICAST;
+	msgbuf[0] |= cnt << IXGBE_VT_MSGINFO_SHIFT;
+
+	i = 0;
+	netdev_for_each_mc_addr(ha, netdev) {
+		if (i == cnt)
+			break;
+		if (is_link_local_ether_addr(ha->addr))
+			continue;
+
+		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
+	}
+
+	return ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+			IXGBE_VFMAILBOX_SIZE);
+}
+
+/**
+ * ixgbevf_hv_update_mc_addr_list_vf - stub
+ * @hw: unused
+ * @netdev: unused
+ *
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw,
+					     struct net_device *netdev)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ *  ixgbevf_update_xcast_mode - Update Multicast mode
+ *  @hw: pointer to the HW structure
+ *  @xcast_mode: new multicast mode
+ *
+ *  Updates the Multicast Mode of VF.
+ **/
+static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
+{
+	u32 msgbuf[2];
+	s32 err;
+
+	switch (hw->api_version) {
+	case ixgbe_mbox_api_12:
+		/* promisc introduced in 1.3 version */
+		if (xcast_mode == IXGBEVF_XCAST_MODE_PROMISC)
+			return -EOPNOTSUPP;
+		fallthrough;
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_15:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE;
+	msgbuf[1] = xcast_mode;
+
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					 ARRAY_SIZE(msgbuf));
+	if (err)
+		return err;
+
+	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+	if (msgbuf[0] == (IXGBE_VF_UPDATE_XCAST_MODE | IXGBE_VT_MSGTYPE_FAILURE))
+		return -EPERM;
+
+	return 0;
+}
+
+/**
+ * ixgbevf_hv_update_xcast_mode - stub
+ * @hw: unused
+ * @xcast_mode: unused
+ *
+ * Hyper-V variant - just a stub.
+ */
+static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ixgbevf_get_link_state_vf - Get VF link state from PF
+ * @hw: pointer to the HW structure
+ * @link_state: link state storage
+ *
+ * Returns state of the operation error or success.
+ */
+static s32 ixgbevf_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state)
+{
+	u32 msgbuf[2];
+	s32 ret_val;
+	s32 err;
+
+	msgbuf[0] = IXGBE_VF_GET_LINK_STATE;
+	msgbuf[1] = 0x0;
+
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+
+	if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE)) {
+		ret_val = IXGBE_ERR_MBX;
+	} else {
+		ret_val = 0;
+		*link_state = msgbuf[1];
+	}
+
+	return ret_val;
+}
+
+/**
+ * ixgbevf_hv_get_link_state_vf - * Hyper-V variant - just a stub.
+ * @hw: unused
+ * @link_state: unused
+ *
+ * Hyper-V variant; there is no mailbox communication.
+ */
+static s32 ixgbevf_hv_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ *  ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address
+ *  @hw: pointer to the HW structure
+ *  @vlan: 12 bit VLAN ID
+ *  @vind: unused by VF drivers
+ *  @vlan_on: if true then set bit, else clear bit
+ **/
+static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+			       bool vlan_on)
+{
+	u32 msgbuf[2];
+	s32 err;
+
+	msgbuf[0] = IXGBE_VF_SET_VLAN;
+	msgbuf[1] = vlan;
+	/* Setting the 8 bit field MSG INFO to TRUE indicates "add" */
+	msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
+
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					 ARRAY_SIZE(msgbuf));
+	if (err)
+		goto mbx_err;
+
+	/* remove extra bits from the message */
+	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+	msgbuf[0] &= ~(0xFF << IXGBE_VT_MSGINFO_SHIFT);
+
+	if (msgbuf[0] != (IXGBE_VF_SET_VLAN | IXGBE_VT_MSGTYPE_SUCCESS))
+		err = IXGBE_ERR_INVALID_ARGUMENT;
+
+mbx_err:
+	return err;
+}
+
+/**
+ * ixgbevf_hv_set_vfta_vf - * Hyper-V variant - just a stub.
+ * @hw: unused
+ * @vlan: unused
+ * @vind: unused
+ * @vlan_on: unused
+ */
+static s32 ixgbevf_hv_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
+				  bool vlan_on)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ *  ixgbevf_setup_mac_link_vf - Setup MAC link settings
+ *  @hw: pointer to hardware structure
+ *  @speed: Unused in this implementation
+ *  @autoneg: Unused in this implementation
+ *  @autoneg_wait_to_complete: Unused in this implementation
+ *
+ *  Do nothing and return success.  VF drivers are not allowed to change
+ *  global settings.  Maintained for driver compatibility.
+ **/
+static s32 ixgbevf_setup_mac_link_vf(struct ixgbe_hw *hw,
+				     ixgbe_link_speed speed, bool autoneg,
+				     bool autoneg_wait_to_complete)
+{
+	return 0;
+}
+
+/**
+ *  ixgbevf_check_mac_link_vf - Get link/speed status
+ *  @hw: pointer to hardware structure
+ *  @speed: pointer to link speed
+ *  @link_up: true is link is up, false otherwise
+ *  @autoneg_wait_to_complete: unused
+ *
+ *  Reads the links register to determine if link is up and the current speed
+ **/
+static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw,
+				     ixgbe_link_speed *speed,
+				     bool *link_up,
+				     bool autoneg_wait_to_complete)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	struct ixgbe_mac_info *mac = &hw->mac;
+	s32 ret_val = 0;
+	u32 links_reg;
+	u32 in_msg = 0;
+
+	/* If we were hit with a reset drop the link */
+	if (!mbx->ops.check_for_rst(hw) || !mbx->timeout)
+		mac->get_link_status = true;
+
+	if (!mac->get_link_status)
+		goto out;
+
+	/* if link status is down no point in checking to see if pf is up */
+	links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+	if (!(links_reg & IXGBE_LINKS_UP))
+		goto out;
+
+	/* for SFP+ modules and DA cables on 82599 it can take up to 500usecs
+	 * before the link status is correct
+	 */
+	if (mac->type == ixgbe_mac_82599_vf) {
+		int i;
+
+		for (i = 0; i < 5; i++) {
+			udelay(100);
+			links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+
+			if (!(links_reg & IXGBE_LINKS_UP))
+				goto out;
+		}
+	}
+
+	switch (links_reg & IXGBE_LINKS_SPEED_82599) {
+	case IXGBE_LINKS_SPEED_10G_82599:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_1G_82599:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_100_82599:
+		*speed = IXGBE_LINK_SPEED_100_FULL;
+		break;
+	}
+
+	/* if the read failed it could just be a mailbox collision, best wait
+	 * until we are called again and don't report an error
+	 */
+	if (mbx->ops.read(hw, &in_msg, 1)) {
+		if (hw->api_version >= ixgbe_mbox_api_15)
+			mac->get_link_status = false;
+		goto out;
+	}
+
+	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
+		/* msg is not CTS and is NACK we must have lost CTS status */
+		if (in_msg & IXGBE_VT_MSGTYPE_FAILURE)
+			ret_val = -1;
+		goto out;
+	}
+
+	/* the pf is talking, if we timed out in the past we reinit */
+	if (!mbx->timeout) {
+		ret_val = -1;
+		goto out;
+	}
+
+	/* if we passed all the tests above then the link is up and we no
+	 * longer need to check for link
+	 */
+	mac->get_link_status = false;
+
+out:
+	*link_up = !mac->get_link_status;
+	return ret_val;
+}
+
+/**
+ * ixgbevf_hv_check_mac_link_vf - check link
+ * @hw: pointer to private hardware struct
+ * @speed: pointer to link speed
+ * @link_up: true is link is up, false otherwise
+ * @autoneg_wait_to_complete: unused
+ *
+ * Hyper-V variant; there is no mailbox communication.
+ */
+static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw,
+					ixgbe_link_speed *speed,
+					bool *link_up,
+					bool autoneg_wait_to_complete)
+{
+	struct ixgbe_mbx_info *mbx = &hw->mbx;
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u32 links_reg;
+
+	/* If we were hit with a reset drop the link */
+	if (!mbx->ops.check_for_rst(hw) || !mbx->timeout)
+		mac->get_link_status = true;
+
+	if (!mac->get_link_status)
+		goto out;
+
+	/* if link status is down no point in checking to see if pf is up */
+	links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+	if (!(links_reg & IXGBE_LINKS_UP))
+		goto out;
+
+	/* for SFP+ modules and DA cables on 82599 it can take up to 500usecs
+	 * before the link status is correct
+	 */
+	if (mac->type == ixgbe_mac_82599_vf) {
+		int i;
+
+		for (i = 0; i < 5; i++) {
+			udelay(100);
+			links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS);
+
+			if (!(links_reg & IXGBE_LINKS_UP))
+				goto out;
+		}
+	}
+
+	switch (links_reg & IXGBE_LINKS_SPEED_82599) {
+	case IXGBE_LINKS_SPEED_10G_82599:
+		*speed = IXGBE_LINK_SPEED_10GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_1G_82599:
+		*speed = IXGBE_LINK_SPEED_1GB_FULL;
+		break;
+	case IXGBE_LINKS_SPEED_100_82599:
+		*speed = IXGBE_LINK_SPEED_100_FULL;
+		break;
+	}
+
+	/* if we passed all the tests above then the link is up and we no
+	 * longer need to check for link
+	 */
+	mac->get_link_status = false;
+
+out:
+	*link_up = !mac->get_link_status;
+	return 0;
+}
+
+/**
+ *  ixgbevf_set_rlpml_vf - Set the maximum receive packet length
+ *  @hw: pointer to the HW structure
+ *  @max_size: value to assign to max frame size
+ **/
+static s32 ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
+{
+	u32 msgbuf[2];
+	s32 ret_val;
+
+	msgbuf[0] = IXGBE_VF_SET_LPE;
+	msgbuf[1] = max_size;
+
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					     ARRAY_SIZE(msgbuf));
+	if (ret_val)
+		return ret_val;
+	if ((msgbuf[0] & IXGBE_VF_SET_LPE) &&
+	    (msgbuf[0] & IXGBE_VT_MSGTYPE_FAILURE))
+		return IXGBE_ERR_MBX;
+
+	return 0;
+}
+
+/**
+ * ixgbevf_hv_set_rlpml_vf - Set the maximum receive packet length
+ * @hw: pointer to the HW structure
+ * @max_size: value to assign to max frame size
+ * Hyper-V variant.
+ **/
+static s32 ixgbevf_hv_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
+{
+	u32 reg;
+
+	/* If we are on Hyper-V, we implement this functionality
+	 * differently.
+	 */
+	reg =  IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(0));
+	/* CRC == 4 */
+	reg |= ((max_size + 4) | IXGBE_RXDCTL_RLPML_EN);
+	IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(0), reg);
+
+	return 0;
+}
+
+/**
+ *  ixgbevf_negotiate_api_version_vf - Negotiate supported API version
+ *  @hw: pointer to the HW structure
+ *  @api: integer containing requested API version
+ **/
+static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api)
+{
+	int err;
+	u32 msg[3];
+
+	/* Negotiate the mailbox API version */
+	msg[0] = IXGBE_VF_API_NEGOTIATE;
+	msg[1] = api;
+	msg[2] = 0;
+
+	err = ixgbevf_write_msg_read_ack(hw, msg, msg, ARRAY_SIZE(msg));
+	if (!err) {
+		msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+		/* Store value and return 0 on success */
+		if (msg[0] == (IXGBE_VF_API_NEGOTIATE |
+			      IXGBE_VT_MSGTYPE_SUCCESS)) {
+			hw->api_version = api;
+			return 0;
+		}
+
+		err = IXGBE_ERR_INVALID_ARGUMENT;
+	}
+
+	return err;
+}
+
+/**
+ *  ixgbevf_hv_negotiate_api_version_vf - Negotiate supported API version
+ *  @hw: pointer to the HW structure
+ *  @api: integer containing requested API version
+ *  Hyper-V version - only ixgbe_mbox_api_10 supported.
+ **/
+static int ixgbevf_hv_negotiate_api_version_vf(struct ixgbe_hw *hw, int api)
+{
+	/* Hyper-V only supports api version ixgbe_mbox_api_10 */
+	if (api != ixgbe_mbox_api_10)
+		return IXGBE_ERR_INVALID_ARGUMENT;
+
+	return 0;
+}
+
+int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
+		       unsigned int *default_tc)
+{
+	int err;
+	u32 msg[5];
+
+	/* do nothing if API doesn't support ixgbevf_get_queues */
+	switch (hw->api_version) {
+	case ixgbe_mbox_api_11:
+	case ixgbe_mbox_api_12:
+	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
+	case ixgbe_mbox_api_15:
+		break;
+	default:
+		return 0;
+	}
+
+	/* Fetch queue configuration from the PF */
+	msg[0] = IXGBE_VF_GET_QUEUE;
+	msg[1] = msg[2] = msg[3] = msg[4] = 0;
+
+	err = ixgbevf_write_msg_read_ack(hw, msg, msg, ARRAY_SIZE(msg));
+	if (!err) {
+		msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+		/* if we didn't get an ACK there must have been
+		 * some sort of mailbox error so we should treat it
+		 * as such
+		 */
+		if (msg[0] != (IXGBE_VF_GET_QUEUE | IXGBE_VT_MSGTYPE_SUCCESS))
+			return IXGBE_ERR_MBX;
+
+		/* record and validate values from message */
+		hw->mac.max_tx_queues = msg[IXGBE_VF_TX_QUEUES];
+		if (hw->mac.max_tx_queues == 0 ||
+		    hw->mac.max_tx_queues > IXGBE_VF_MAX_TX_QUEUES)
+			hw->mac.max_tx_queues = IXGBE_VF_MAX_TX_QUEUES;
+
+		hw->mac.max_rx_queues = msg[IXGBE_VF_RX_QUEUES];
+		if (hw->mac.max_rx_queues == 0 ||
+		    hw->mac.max_rx_queues > IXGBE_VF_MAX_RX_QUEUES)
+			hw->mac.max_rx_queues = IXGBE_VF_MAX_RX_QUEUES;
+
+		*num_tcs = msg[IXGBE_VF_TRANS_VLAN];
+		/* in case of unknown state assume we cannot tag frames */
+		if (*num_tcs > hw->mac.max_rx_queues)
+			*num_tcs = 1;
+
+		*default_tc = msg[IXGBE_VF_DEF_QUEUE];
+		/* default to queue 0 on out-of-bounds queue number */
+		if (*default_tc >= hw->mac.max_tx_queues)
+			*default_tc = 0;
+	}
+
+	return err;
+}
+
+static const struct ixgbe_mac_operations ixgbevf_mac_ops = {
+	.init_hw		= ixgbevf_init_hw_vf,
+	.reset_hw		= ixgbevf_reset_hw_vf,
+	.start_hw		= ixgbevf_start_hw_vf,
+	.get_mac_addr		= ixgbevf_get_mac_addr_vf,
+	.stop_adapter		= ixgbevf_stop_hw_vf,
+	.setup_link		= ixgbevf_setup_mac_link_vf,
+	.check_link		= ixgbevf_check_mac_link_vf,
+	.negotiate_api_version	= ixgbevf_negotiate_api_version_vf,
+	.set_rar		= ixgbevf_set_rar_vf,
+	.update_mc_addr_list	= ixgbevf_update_mc_addr_list_vf,
+	.update_xcast_mode	= ixgbevf_update_xcast_mode,
+	.get_link_state		= ixgbevf_get_link_state_vf,
+	.set_uc_addr		= ixgbevf_set_uc_addr_vf,
+	.set_vfta		= ixgbevf_set_vfta_vf,
+	.set_rlpml		= ixgbevf_set_rlpml_vf,
+};
+
+static const struct ixgbe_mac_operations ixgbevf_hv_mac_ops = {
+	.init_hw		= ixgbevf_init_hw_vf,
+	.reset_hw		= ixgbevf_hv_reset_hw_vf,
+	.start_hw		= ixgbevf_start_hw_vf,
+	.get_mac_addr		= ixgbevf_get_mac_addr_vf,
+	.stop_adapter		= ixgbevf_stop_hw_vf,
+	.setup_link		= ixgbevf_setup_mac_link_vf,
+	.check_link		= ixgbevf_hv_check_mac_link_vf,
+	.negotiate_api_version	= ixgbevf_hv_negotiate_api_version_vf,
+	.set_rar		= ixgbevf_hv_set_rar_vf,
+	.update_mc_addr_list	= ixgbevf_hv_update_mc_addr_list_vf,
+	.update_xcast_mode	= ixgbevf_hv_update_xcast_mode,
+	.get_link_state		= ixgbevf_hv_get_link_state_vf,
+	.set_uc_addr		= ixgbevf_hv_set_uc_addr_vf,
+	.set_vfta		= ixgbevf_hv_set_vfta_vf,
+	.set_rlpml		= ixgbevf_hv_set_rlpml_vf,
+};
+
+const struct ixgbevf_info ixgbevf_82599_vf_info = {
+	.mac = ixgbe_mac_82599_vf,
+	.mac_ops = &ixgbevf_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_82599_vf_hv_info = {
+	.mac = ixgbe_mac_82599_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_X540_vf_info = {
+	.mac = ixgbe_mac_X540_vf,
+	.mac_ops = &ixgbevf_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_X540_vf_hv_info = {
+	.mac = ixgbe_mac_X540_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_X550_vf_info = {
+	.mac = ixgbe_mac_X550_vf,
+	.mac_ops = &ixgbevf_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_X550_vf_hv_info = {
+	.mac = ixgbe_mac_X550_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_X550EM_x_vf_info = {
+	.mac = ixgbe_mac_X550EM_x_vf,
+	.mac_ops = &ixgbevf_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info = {
+	.mac = ixgbe_mac_X550EM_x_vf,
+	.mac_ops = &ixgbevf_hv_mac_ops,
+};
+
+const struct ixgbevf_info ixgbevf_x550em_a_vf_info = {
+	.mac = ixgbe_mac_x550em_a_vf,
+	.mac_ops = &ixgbevf_mac_ops,
+};
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
new file mode 100644
index 0000000000..b4eef5b6c1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 1999 - 2018 Intel Corporation. */
+
+#ifndef __IXGBE_VF_H__
+#define __IXGBE_VF_H__
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/netdevice.h>
+
+#include "defines.h"
+#include "regs.h"
+#include "mbx.h"
+
+struct ixgbe_hw;
+
+struct ixgbe_mac_operations {
+	s32 (*init_hw)(struct ixgbe_hw *);
+	s32 (*reset_hw)(struct ixgbe_hw *);
+	s32 (*start_hw)(struct ixgbe_hw *);
+	s32 (*clear_hw_cntrs)(struct ixgbe_hw *);
+	enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *);
+	s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *);
+	s32 (*stop_adapter)(struct ixgbe_hw *);
+	s32 (*get_bus_info)(struct ixgbe_hw *);
+	s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api);
+
+	/* Link */
+	s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool);
+	s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool);
+	s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *,
+				     bool *);
+
+	/* RAR, Multicast, VLAN */
+	s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32);
+	s32 (*set_uc_addr)(struct ixgbe_hw *, u32, u8 *);
+	s32 (*init_rx_addrs)(struct ixgbe_hw *);
+	s32 (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *);
+	s32 (*update_xcast_mode)(struct ixgbe_hw *, int);
+	s32 (*get_link_state)(struct ixgbe_hw *hw, bool *link_state);
+	s32 (*enable_mc)(struct ixgbe_hw *);
+	s32 (*disable_mc)(struct ixgbe_hw *);
+	s32 (*clear_vfta)(struct ixgbe_hw *);
+	s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool);
+	s32 (*set_rlpml)(struct ixgbe_hw *, u16);
+};
+
+enum ixgbe_mac_type {
+	ixgbe_mac_unknown = 0,
+	ixgbe_mac_82599_vf,
+	ixgbe_mac_X540_vf,
+	ixgbe_mac_X550_vf,
+	ixgbe_mac_X550EM_x_vf,
+	ixgbe_mac_x550em_a_vf,
+	ixgbe_num_macs
+};
+
+struct ixgbe_mac_info {
+	struct ixgbe_mac_operations ops;
+	u8 addr[6];
+	u8 perm_addr[6];
+
+	enum ixgbe_mac_type type;
+
+	s32  mc_filter_type;
+
+	bool get_link_status;
+	u32  max_tx_queues;
+	u32  max_rx_queues;
+	u32  max_msix_vectors;
+};
+
+struct ixgbe_mbx_operations {
+	s32 (*init_params)(struct ixgbe_hw *hw);
+	void (*release)(struct ixgbe_hw *hw);
+	s32 (*read)(struct ixgbe_hw *, u32 *, u16);
+	s32 (*write)(struct ixgbe_hw *, u32 *, u16);
+	s32 (*check_for_msg)(struct ixgbe_hw *);
+	s32 (*check_for_ack)(struct ixgbe_hw *);
+	s32 (*check_for_rst)(struct ixgbe_hw *);
+};
+
+struct ixgbe_mbx_stats {
+	u32 msgs_tx;
+	u32 msgs_rx;
+
+	u32 acks;
+	u32 reqs;
+	u32 rsts;
+};
+
+struct ixgbe_mbx_info {
+	struct ixgbe_mbx_operations ops;
+	struct ixgbe_mbx_stats stats;
+	u32 timeout;
+	u32 udelay;
+	u32 vf_mailbox;
+	u16 size;
+};
+
+struct ixgbe_hw {
+	void *back;
+
+	u8 __iomem *hw_addr;
+
+	struct ixgbe_mac_info mac;
+	struct ixgbe_mbx_info mbx;
+
+	u16 device_id;
+	u16 subsystem_vendor_id;
+	u16 subsystem_device_id;
+	u16 vendor_id;
+
+	u8  revision_id;
+	bool adapter_stopped;
+
+	int api_version;
+};
+
+struct ixgbevf_hw_stats {
+	u64 base_vfgprc;
+	u64 base_vfgptc;
+	u64 base_vfgorc;
+	u64 base_vfgotc;
+	u64 base_vfmprc;
+
+	u64 last_vfgprc;
+	u64 last_vfgptc;
+	u64 last_vfgorc;
+	u64 last_vfgotc;
+	u64 last_vfmprc;
+
+	u64 vfgprc;
+	u64 vfgptc;
+	u64 vfgorc;
+	u64 vfgotc;
+	u64 vfmprc;
+
+	u64 saved_reset_vfgprc;
+	u64 saved_reset_vfgptc;
+	u64 saved_reset_vfgorc;
+	u64 saved_reset_vfgotc;
+	u64 saved_reset_vfmprc;
+};
+
+struct ixgbevf_info {
+	enum ixgbe_mac_type mac;
+	const struct ixgbe_mac_operations *mac_ops;
+};
+
+#define IXGBE_FAILED_READ_REG 0xffffffffU
+
+#define IXGBE_REMOVED(a) unlikely(!(a))
+
+static inline void ixgbe_write_reg(struct ixgbe_hw *hw, u32 reg, u32 value)
+{
+	u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
+
+	if (IXGBE_REMOVED(reg_addr))
+		return;
+	writel(value, reg_addr + reg);
+}
+
+#define IXGBE_WRITE_REG(h, r, v) ixgbe_write_reg(h, r, v)
+
+u32 ixgbevf_read_reg(struct ixgbe_hw *hw, u32 reg);
+#define IXGBE_READ_REG(h, r) ixgbevf_read_reg(h, r)
+
+static inline void ixgbe_write_reg_array(struct ixgbe_hw *hw, u32 reg,
+					 u32 offset, u32 value)
+{
+	ixgbe_write_reg(hw, reg + (offset << 2), value);
+}
+
+#define IXGBE_WRITE_REG_ARRAY(h, r, o, v) ixgbe_write_reg_array(h, r, o, v)
+
+static inline u32 ixgbe_read_reg_array(struct ixgbe_hw *hw, u32 reg,
+				       u32 offset)
+{
+	return ixgbevf_read_reg(hw, reg + (offset << 2));
+}
+
+#define IXGBE_READ_REG_ARRAY(h, r, o) ixgbe_read_reg_array(h, r, o)
+
+int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
+		       unsigned int *default_tc);
+int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues);
+int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key);
+#endif /* __IXGBE_VF_H__ */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/net/ethernet/intel
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip